User:DESiegel60/Namespacedups
Here is the namespacedups.php script, as copied from the Mediawiki download site.
- Start script
<?php
- Copyright (C) 2005 Brion Vibber <brion@pobox.com>
- http://www.mediawiki.org/
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License along
- with this program; if not, write to the Free Software Foundation, Inc.,
- 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- http://www.gnu.org/copyleft/gpl.html
$options = array( 'fix', 'suffix' );
/** */
require_once( 'commandLine.inc' );
- require_once( 'maintenance/userDupes.inc' );
class NamespaceConflictChecker {
function NamespaceConflictChecker( &$db ) {
$this->db =& $db;
}
function checkAll( $fix, $suffix = ) {
global $wgContLang;
$spaces = $wgContLang->getNamespaces();
$ok = true;
foreach( $spaces as $ns => $name ) {
$ok = $this->checkNamespace( $ns, $name, $fix, $suffix ) && $ok;
}
return $ok;
}
function checkNamespace( $ns, $name, $fix, $suffix = ) {
echo "Checking namespace $ns: \"$name\"\n";
if( $name == ) {
echo "... skipping article namespace\n";
return true;
}
$conflicts = $this->getConflicts( $ns, $name );
$count = count( $conflicts );
if( $count == 0 ) {
echo "... no conflicts detected!\n";
return true;
}
echo "... $count conflicts detected:\n";
$ok = true;
foreach( $conflicts as $row ) {
$resolvable = $this->reportConflict( $row, $suffix );
$ok = $ok && $resolvable;
if( $fix && ( $resolvable || $suffix != ) ) {
$ok = $this->resolveConflict( $row, $resolvable, $suffix ) && $ok;
}
}
return $ok;
}
function getConflicts( $ns, $name ) {
$page = $this->newSchema() ? 'page' : 'cur';
$table = $this->db->tableName( $page );
$prefix = $this->db->strencode( $name );
$likeprefix = str_replace( '_', '\\_', $prefix);
$sql = "SELECT {$page}_id AS id,
{$page}_title AS oldtitle,
$ns AS namespace,
TRIM(LEADING '$prefix:' FROM {$page}_title) AS title
FROM {$table}
WHERE {$page}_namespace=0
AND {$page}_title LIKE '$likeprefix:%'";
$result = $this->db->query( $sql, 'NamespaceConflictChecker::getConflicts' );
$set = array();
while( $row = $this->db->fetchObject( $result ) ) {
$set[] = $row;
}
$this->db->freeResult( $result );
return $set;
}
function reportConflict( $row, $suffix ) {
$newTitle = Title::makeTitle( $row->namespace, $row->title );
printf( "... %d (0,\"%s\") -> (%d,\"%s\") %s\n",
$row->id,
$row->oldtitle,
$row->namespace,
$row->title,
$newTitle->getPrefixedText() );
$id = $newTitle->getArticleId();
if( $id ) {
echo "... *** cannot resolve automatically; page exists with ID $id ***\n";
return false;
} else {
return true;
}
}
function resolveConflict( $row, $resolvable, $suffix ) {
if( !$resolvable ) {
$row->title .= $suffix;
$title = Title::makeTitle( $row->namespace, $row->title );
echo "... *** using suffixed form [[" . $title->getPrefixedText() . "]] ***\n";
}
$tables = $this->newSchema()
? array( 'page' )
: array( 'cur', 'old' );
foreach( $tables as $table ) {
$this->resolveConflictOn( $row, $table );
}
return true;
}
function resolveConflictOn( $row, $table ) {
$fname = 'NamespaceConflictChecker::resolveConflictOn';
echo "... resolving on $table... ";
$this->db->update( $table,
array(
"{$table}_namespace" => $row->namespace,
"{$table}_title" => $row->title,
),
array(
"{$table}_namespace" => 0,
"{$table}_title" => $row->oldtitle,
),
$fname );
echo "ok.\n";
return true;
}
function newSchema() {
return class_exists( 'Revision' );
}
}
$wgTitle = Title::newFromText( 'Namespace title conflict cleanup script' );
$fix = isset( $options['fix'] );
$suffix = isset( $options['suffix'] ) ? $options['suffix'] : ;
$dbw =& wfGetDB( DB_MASTER );
$duper = new NamespaceConflictChecker( $dbw );
$retval = $duper->checkAll( $fix, $suffix );
if( $retval ) {
echo "\nLooks good!\n";
exit( 0 );
} else {
echo "\nOh noeees\n";
exit( -1 );
}
?>
- End script***
Here is the File UserDups.inc
- Start File
<?php
- Copyright (C) 2005 Brion Vibber <brion@pobox.com>
- http://www.mediawiki.org/
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License along
- with this program; if not, write to the Free Software Foundation, Inc.,
- 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- http://www.gnu.org/copyleft/gpl.html
/**
* Look for duplicate user table entries and optionally prune them.
*/
class UserDupes {
var $db;
var $reassigned;
var $trimmed;
var $failed;
function UserDupes( &$database ) {
$this->db =& $database;
}
/**
* Check if this database's user table has already had a unique
* user_name index applied.
* @return bool
*/
function hasUniqueIndex() {
$fname = 'UserDupes::hasUniqueIndex';
$info = $this->db->indexInfo( 'user', 'user_name', $fname );
if( !$info ) {
echo "WARNING: doesn't seem to have user_name index at all!\n";
return false;
}
# Confusingly, 'Non_unique' is 0 for *unique* indexes,
# and 1 for *non-unique* indexes. Pass the crack, MySQL,
# it's obviously some good stuff!
return ( $info->Non_unique == 0 );
}
/**
* Checks the database for duplicate user account records
* and remove them in preparation for application of a unique
* index on the user_name field. Returns true if the table is
* clean or if duplicates have been resolved automatically.
*
* May return false if there are unresolvable problems.
* Status information will be echo'd to stdout.
*
* @return bool
*/
function clearDupes() {
return $this->checkDupes( true );
}
/**
* Checks the database for duplicate user account records
* in preparation for application of a unique index on the
* user_name field. Returns true if the table is clean or
* if duplicates can be resolved automatically.
*
* Returns false if there are duplicates and resolution was
* not requested. (If doing resolution, edits may be reassigned.)
* Status information will be echo'd to stdout.
*
* @param bool $doDelete pass true to actually remove things
* from the database; false to just check.
* @return bool
*/
function checkDupes( $doDelete = false ) {
global $wgDBname;
if( $this->hasUniqueIndex() ) {
echo "$wgDBname already has a unique index on its user table.\n";
return true;
}
$this->lock();
echo "Checking for duplicate accounts...\n";
$dupes = $this->getDupes();
$count = count( $dupes );
echo "Found $count accounts with duplicate records on $wgDBname.\n";
$this->trimmed = 0;
$this->reassigned = 0;
$this->failed = 0;
foreach( $dupes as $name ) {
$this->examine( $name, $doDelete );
}
$this->unlock();
echo "\n";
if( $this->reassigned > 0 ) {
if( $doDelete ) {
echo "$this->reassigned duplicate accounts had edits reassigned to a canonical record id.\n";
} else {
echo "$this->reassigned duplicate accounts need to have edits reassigned.\n";
}
}
if( $this->trimmed > 0 ) {
if( $doDelete ) {
echo "$this->trimmed duplicate user records were deleted from $wgDBname.\n";
} else {
echo "$this->trimmed duplicate user accounts were found on $wgDBname which can be removed safely.\n";
}
}
if( $this->failed > 0 ) {
echo "Something terribly awry; $this->failed duplicate accounts were not removed.\n";
return false;
}
if( $this->trimmed == 0 || $doDelete ) {
echo "It is now safe to apply the unique index on user_name.\n";
return true;
} else {
echo "Run this script again with the --fix option to automatically delete them.\n";
return false;
}
}
/**
* We don't want anybody to mess with our stuff...
* @access private
*/
function lock() {
$fname = 'UserDupes::lock';
if( $this->newSchema() ) {
$set = array( 'user', 'revision' );
} else {
$set = array( 'user', 'cur', 'old' );
}
$names = array_map( array( $this, 'lockTable' ), $set );
$tables = implode( ',', $names );
$result = $this->db->query( "LOCK TABLES $tables", $fname );
}
function lockTable( $table ) {
return $this->db->tableName( $table ) . ' WRITE';
}
/**
* @return bool
* @access private
*/
function newSchema() {
return class_exists( 'Revision' );
}
/**
* @access private
*/
function unlock() {
$fname = 'UserDupes::unlock';
$result = $this->db->query( "UNLOCK TABLES", $fname );
}
/**
* Grab usernames for which multiple records are present in the database.
* @return array
* @access private
*/
function getDupes() {
$fname = 'UserDupes::listDupes';
$user = $this->db->tableName( 'user' );
$result = $this->db->query(
"SELECT user_name,COUNT(*) AS n
FROM $user
GROUP BY user_name
HAVING n > 1", $fname );
$list = array();
while( $row = $this->db->fetchObject( $result ) ) {
$list[] = $row->user_name;
}
$this->db->freeResult( $result );
return $list;
}
/**
* Examine user records for the given name. Try to see which record
* will be the one that actually gets used, then check remaining records
* for edits. If the dupes have no edits, we can safely remove them.
* @param string $name
* @param bool $doDelete
* @access private
*/
function examine( $name, $doDelete ) {
$fname = 'UserDupes::listDupes';
$result = $this->db->select( 'user',
array( 'user_id' ),
array( 'user_name' => $name ),
$fname );
$firstRow = $this->db->fetchObject( $result );
$firstId = $firstRow->user_id;
echo "Record that will be used for '$name' is user_id=$firstId\n";
while( $row = $this->db->fetchObject( $result ) ) {
$dupeId = $row->user_id;
echo "... dupe id $dupeId: ";
$edits = $this->editCount( $dupeId );
if( $edits > 0 ) {
$this->reassigned++;
echo "has $edits edits! ";
if( $doDelete ) {
$this->reassignEdits( $dupeId, $firstId );
$newEdits = $this->editCount( $dupeId );
if( $newEdits == 0 ) {
echo "confirmed cleaned. ";
} else {
$this->failed++;
echo "WARNING! $newEdits remaining edits for $dupeId; NOT deleting user.\n";
continue;
}
} else {
echo "(will need to reassign edits on fix)";
}
} else {
echo "ok, no edits. ";
}
$this->trimmed++;
if( $doDelete ) {
$this->trimAccount( $dupeId );
}
echo "\n";
}
$this->db->freeResult( $result );
}
/**
* Count the number of edits attributed to this user.
* Does not currently check log table or other things
* where it might show up...
* @param int $userid
* @return int
* @access private
*/
function editCount( $userid ) {
if( $this->newSchema() ) {
return $this->editCountOn( 'revision', 'rev_user', $userid );
} else {
return $this->editCountOn( 'cur', 'cur_user', $userid ) +
$this->editCountOn( 'old', 'old_user', $userid );
}
}
/**
* Count the number of hits on a given table for this account.
* @param string $table
* @param string $field
* @param int $userid
* @return int
* @access private
*/
function editCountOn( $table, $field, $userid ) {
$fname = 'UserDupes::editCountOn';
return IntVal( $this->db->selectField(
$table,
'COUNT(*)',
array( $field => $userid ),
$fname ) );
}
/**
* @param int $from
* @param int $to
* @access private
*/
function reassignEdits( $from, $to ) {
$set = $this->newSchema()
? array( 'revision' => 'rev_user' )
: array( 'cur' => 'cur_user', 'old' => 'old_user' );
foreach( $set as $table => $field ) {
$this->reassignEditsOn( $table, $field, $from, $to );
}
}
/**
* @param string $table
* @param string $field
* @param int $from
* @param int $to
* @access private
*/
function reassignEditsOn( $table, $field, $from, $to ) {
$fname = 'UserDupes::reassignEditsOn';
echo "reassigning on $table... ";
$result = $this->db->update( $table,
array( $field => $to ),
array( $field => $from ),
$fname );
echo "ok. ";
}
/**
* Remove a user account line.
* @param int $userid
* @access private
*/
function trimAccount( $userid ) {
$fname = 'UserDupes::trimAccount';
echo "deleting...";
$this->db->delete( 'user', array( 'user_id' => $userid ), $fname );
echo " ok";
}
}
?>
- end file
Here is the file commadLine.inc
- Start file
<?php
/**
* @todo document
* @package MediaWiki
* @subpackage Maintenance
*/
/** */
- Abort if called from a web server
if ( isset( $_SERVER ) && array_key_exists( 'REQUEST_METHOD', $_SERVER ) ) {
print "This script must be run from the command line\n";
exit();
}
define("MEDIAWIKI",true);
- Process command line arguments
- $options becomes an array with keys set to the option names
- $optionsWithArgs is an array of GNU-style options that take an argument. The arguments are returned
- in the values of $options.
- $args becomes a zero-based array containing the non-option arguments
if ( !isset( $optionsWithArgs ) ) {
$optionsWithArgs = array();
}
$optionsWithArgs[] = 'conf'; # For specifying the location of LocalSettings.php
$self = array_shift( $argv );
$self = __FILE__;
$IP = realpath( dirname( $self ) . "/.." );
chdir( $IP );
$options = array();
$args = array();
for( $arg = reset( $argv ); $arg !== false; $arg = next( $argv ) ) {
if ( substr( $arg, 0, 2 ) == '--' ) {
# Long options
$option = substr( $arg, 2 );
if ( in_array( $option, $optionsWithArgs ) ) {
$param = next( $argv );
if ( $param === false ) {
die( "$arg needs an value after it\n" );
}
$options[$option] = $param;
} else {
$bits = explode( '=', $option, 2 );
if( count( $bits ) > 1 ) {
$option = $bits[0];
$param = $bits[1];
} else {
$param = 1;
}
$options[$option] = $param;
}
} elseif ( $arg{0} == '-' ) {
# Short options
for ( $p=1; $p<strlen( $arg ); $p++ ) {
$option = $arg{$p};
if ( in_array( $option, $optionsWithArgs ) ) {
$param = next( $argv );
if ( $param === false ) {
die( "$arg needs an value after it\n" );
}
$options[$option] = $param;
} else {
$options[$option] = 1;
}
}
} else {
$args[] = $arg;
}
}
- General initialisation
$wgCommandLineMode = true;
- Turn off output buffering if it's on
@ob_end_flush();
$sep = strchr( $include_path = ini_get( "include_path" ), ";" ) ? ";" : ":";
if (!isset( $wgUseNormalUser ) ) {
$wgUseNormalUser = false;
}
if ( file_exists( '/home/wikipedia/common/langlist' ) ) {
$wgWikiFarm = true;
require_once( "$IP/includes/SiteConfiguration.php" );
# Get $conf
require( "$IP/InitialiseSettings.php" );
if ( empty( $wgNoDBParam ) ) {
# Check if we were passed a db name
$db = array_shift( $args );
list( $site, $lang ) = $wgConf->siteFromDB( $db );
# If not, work out the language and site the old way
if ( is_null( $site ) || is_null( $lang ) ) {
if ( !$db ) {
$lang = "aa";
} else {
$lang = $db;
}
if ( isset( $args[0] ) ) {
$site = array_shift( $args );
} else {
$site = "wikipedia";
}
}
} else {
$lang = "aa";
$site = "wikipedia";
}
# This is for the IRC scripts, which now run as the apache user
# The apache user doesn't have access to the wikiadmin_pass command
if ( $_ENV['USER'] == "apache" ) {
$wgUseNormalUser = true;
}
putenv( "wikilang=$lang");
$DP = $IP;
ini_set( "include_path", ".:$IP:$IP/includes:$IP/languages:$IP/maintenance" );
require_once( "$IP/includes/Defines.php" );
require_once( "$IP/CommonSettings.php" );
if ( !$wgUseNormalUser ) {
$wgDBuser = $wgDBadminuser = "wikiadmin";
$wgDBpassword = $wgDBadminpassword = trim(`wikiadmin_pass`);
}
} else {
$wgWikiFarm = false;
if ( isset( $options['conf'] ) ) {
$settingsFile = $options['conf'];
} else {
$settingsFile = "$IP/LocalSettings.php";
}
if ( ! is_readable( $settingsFile ) ) {
print "A copy of your installation's LocalSettings.php\n" .
"must exist in the source directory.\n";
exit();
}
$wgCommandLineMode = true;
$DP = $IP;
require_once( "$IP/includes/Defines.php" );
require_once( $settingsFile );
ini_set( "include_path", ".$sep$IP$sep$IP/includes$sep$IP/languages$sep$IP/maintenance" );
if ( is_readable( "$IP/AdminSettings.php" ) ) {
require_once( "$IP/AdminSettings.php" );
}
}
- Turn off output buffering again, it might have been turned on in the settings files
@ob_end_flush();
- Same with these
$wgCommandLineMode = true;
if ( empty( $wgUseNormalUser ) && isset( $wgDBadminuser ) && $wgDBservers ) {
$wgDBuser = $wgDBadminuser;
$wgDBpassword = $wgDBadminpassword;
foreach ( $wgDBservers as $i => $server ) {
$wgDBservers[$i]['user'] = $wgDBuser;
$wgDBservers[$i]['password'] = $wgDBpassword;
}
}
ini_set( 'memory_limit', -1 );
require_once( "Setup.php" );
require_once( "install-utils.inc" );
$wgTitle = Title::newFromText( "Command line script" );
set_time_limit(0);
// --------------------------------------------------------------------
// Functions
// --------------------------------------------------------------------
function wfWaitForSlaves( $maxLag ) {
global $wgLoadBalancer;
if ( $maxLag ) {
list( $host, $lag ) = $wgLoadBalancer->getMaxLag();
while ( $lag > $maxLag ) {
$name = @gethostbyaddr( $host );
if ( $name !== false ) {
$host = $name;
}
print "Waiting for $host (lagged $lag seconds)...\n";
sleep($maxLag);
list( $host, $lag ) = $wgLoadBalancer->getMaxLag();
}
}
}
?>
- EndFile