User:DESiegel60/Namespacedups

From ISFDB
Jump to navigation Jump to search

Here is the namespacedups.php script, as copied from the Mediawiki download site.

Start script

<?php

  1. Copyright (C) 2005 Brion Vibber <brion@pobox.com>
  2. http://www.mediawiki.org/
  3. This program is free software; you can redistribute it and/or modify
  4. it under the terms of the GNU General Public License as published by
  5. the Free Software Foundation; either version 2 of the License, or
  6. (at your option) any later version.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU General Public License for more details.
  11. You should have received a copy of the GNU General Public License along
  12. with this program; if not, write to the Free Software Foundation, Inc.,
  13. 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  14. http://www.gnu.org/copyleft/gpl.html

$options = array( 'fix', 'suffix' );

/** */ require_once( 'commandLine.inc' );

  1. require_once( 'maintenance/userDupes.inc' );

class NamespaceConflictChecker {

   function NamespaceConflictChecker( &$db ) {
       $this->db =& $db;
   }
   
   function checkAll( $fix, $suffix =  ) {
       global $wgContLang;
       $spaces = $wgContLang->getNamespaces();
       $ok = true;
       foreach( $spaces as $ns => $name ) {
           $ok = $this->checkNamespace( $ns, $name, $fix, $suffix ) && $ok;
       }
       return $ok;
   }
   
   function checkNamespace( $ns, $name, $fix, $suffix =  ) {
       echo "Checking namespace $ns: \"$name\"\n";
       if( $name ==  ) {
           echo "... skipping article namespace\n";
           return true;
       }
       
       $conflicts = $this->getConflicts( $ns, $name );
       $count = count( $conflicts );
       if( $count == 0 ) {
           echo "... no conflicts detected!\n";
           return true;
       }
       
       echo "... $count conflicts detected:\n";
       $ok = true;
       foreach( $conflicts as $row ) {
           $resolvable = $this->reportConflict( $row, $suffix );
           $ok = $ok && $resolvable;
           if( $fix && ( $resolvable || $suffix !=  ) ) {
               $ok = $this->resolveConflict( $row, $resolvable, $suffix ) && $ok;
           }
       }
       return $ok;
   }
   
   function getConflicts( $ns, $name ) {
       $page  = $this->newSchema() ? 'page' : 'cur';
       $table = $this->db->tableName( $page );
       
       $prefix     = $this->db->strencode( $name );
       $likeprefix = str_replace( '_', '\\_', $prefix);
       
       $sql = "SELECT {$page}_id                                  AS id,
                      {$page}_title                               AS oldtitle,
                      $ns                                         AS namespace,
                      TRIM(LEADING '$prefix:' FROM {$page}_title) AS title
                 FROM {$table}
                WHERE {$page}_namespace=0
                  AND {$page}_title LIKE '$likeprefix:%'";
       
       $result = $this->db->query( $sql, 'NamespaceConflictChecker::getConflicts' );
       
       $set = array();
       while( $row = $this->db->fetchObject( $result ) ) {
           $set[] = $row;
       }
       $this->db->freeResult( $result );
       
       return $set;
   }
   
   function reportConflict( $row, $suffix ) {
       $newTitle = Title::makeTitle( $row->namespace, $row->title );
       printf( "... %d (0,\"%s\") -> (%d,\"%s\") %s\n",
           $row->id,
           $row->oldtitle,
           $row->namespace,
           $row->title,
           $newTitle->getPrefixedText() );
       
       $id = $newTitle->getArticleId();
       if( $id ) {
           echo "...  *** cannot resolve automatically; page exists with ID $id ***\n";
           return false;
       } else {
           return true;
       }
   }
   
   function resolveConflict( $row, $resolvable, $suffix ) {
       if( !$resolvable ) {
           $row->title .= $suffix;
           $title = Title::makeTitle( $row->namespace, $row->title );
           echo "...  *** using suffixed form [[" . $title->getPrefixedText() . "]] ***\n";
       }
       $tables = $this->newSchema() 
           ? array( 'page' )
           : array( 'cur', 'old' );
       foreach( $tables as $table ) {
           $this->resolveConflictOn( $row, $table );
       }
       return true;
   }
   
   function resolveConflictOn( $row, $table ) {
       $fname = 'NamespaceConflictChecker::resolveConflictOn';
       echo "... resolving on $table... ";
       $this->db->update( $table,
           array(
               "{$table}_namespace" => $row->namespace,
               "{$table}_title"     => $row->title,
           ),
           array(
               "{$table}_namespace" => 0,
               "{$table}_title"     => $row->oldtitle,
           ),
           $fname );
       echo "ok.\n";
       return true;
   }
   
   function newSchema() {
       return class_exists( 'Revision' );
   }

}



$wgTitle = Title::newFromText( 'Namespace title conflict cleanup script' );

$fix = isset( $options['fix'] ); $suffix = isset( $options['suffix'] ) ? $options['suffix'] : ; $dbw =& wfGetDB( DB_MASTER ); $duper = new NamespaceConflictChecker( $dbw ); $retval = $duper->checkAll( $fix, $suffix );

if( $retval ) {

   echo "\nLooks good!\n";
   exit( 0 );

} else {

   echo "\nOh noeees\n";
   exit( -1 );

}

?>

End script***

Here is the File UserDups.inc

Start File

<?php

  1. Copyright (C) 2005 Brion Vibber <brion@pobox.com>
  2. http://www.mediawiki.org/
  3. This program is free software; you can redistribute it and/or modify
  4. it under the terms of the GNU General Public License as published by
  5. the Free Software Foundation; either version 2 of the License, or
  6. (at your option) any later version.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU General Public License for more details.
  11. You should have received a copy of the GNU General Public License along
  12. with this program; if not, write to the Free Software Foundation, Inc.,
  13. 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  14. http://www.gnu.org/copyleft/gpl.html

/**

* Look for duplicate user table entries and optionally prune them.
*/

class UserDupes {

   var $db;
   var $reassigned;
   var $trimmed;
   var $failed;
   
   function UserDupes( &$database ) {
       $this->db =& $database;
   }
   
   /**
    * Check if this database's user table has already had a unique
    * user_name index applied.
    * @return bool
    */
   function hasUniqueIndex() {
       $fname = 'UserDupes::hasUniqueIndex';
       $info = $this->db->indexInfo( 'user', 'user_name', $fname );
       if( !$info ) {
           echo "WARNING: doesn't seem to have user_name index at all!\n";
           return false;
       }
       
       # Confusingly, 'Non_unique' is 0 for *unique* indexes,
       # and 1 for *non-unique* indexes. Pass the crack, MySQL,
       # it's obviously some good stuff!
       return ( $info->Non_unique == 0 );
   }
   
   /**
    * Checks the database for duplicate user account records
    * and remove them in preparation for application of a unique
    * index on the user_name field. Returns true if the table is
    * clean or if duplicates have been resolved automatically.
    *
    * May return false if there are unresolvable problems.
    * Status information will be echo'd to stdout.
    *
    * @return bool
    */
   function clearDupes() {
       return $this->checkDupes( true );
   }
   
   /**
    * Checks the database for duplicate user account records
    * in preparation for application of a unique index on the
    * user_name field. Returns true if the table is clean or
    * if duplicates can be resolved automatically.
    *
    * Returns false if there are duplicates and resolution was
    * not requested. (If doing resolution, edits may be reassigned.)
    * Status information will be echo'd to stdout.
    *
    * @param bool $doDelete pass true to actually remove things
    *                       from the database; false to just check.
    * @return bool
    */
   function checkDupes( $doDelete = false ) {
       global $wgDBname;
       
       if( $this->hasUniqueIndex() ) {
           echo "$wgDBname already has a unique index on its user table.\n";
           return true;
       }
       
       $this->lock();
       
       echo "Checking for duplicate accounts...\n";
       $dupes = $this->getDupes();
       $count = count( $dupes );
       
       echo "Found $count accounts with duplicate records on $wgDBname.\n";
       $this->trimmed    = 0;
       $this->reassigned = 0;
       $this->failed     = 0;
       foreach( $dupes as $name ) {
           $this->examine( $name, $doDelete );
       }
       
       $this->unlock();
       
       echo "\n";
       
       if( $this->reassigned > 0 ) {
           if( $doDelete ) {
               echo "$this->reassigned duplicate accounts had edits reassigned to a canonical record id.\n";
           } else {
               echo "$this->reassigned duplicate accounts need to have edits reassigned.\n";
           }
       }
       
       if( $this->trimmed > 0 ) {
           if( $doDelete ) {
               echo "$this->trimmed duplicate user records were deleted from $wgDBname.\n";
           } else {
               echo "$this->trimmed duplicate user accounts were found on $wgDBname which can be removed safely.\n";
           }
       }
       
       if( $this->failed > 0 ) {
           echo "Something terribly awry; $this->failed duplicate accounts were not removed.\n";
           return false;
       }
       
       if( $this->trimmed == 0 || $doDelete ) {
           echo "It is now safe to apply the unique index on user_name.\n";
           return true;
       } else {
           echo "Run this script again with the --fix option to automatically delete them.\n";
           return false;
       }
   }
   
   /**
    * We don't want anybody to mess with our stuff...
    * @access private
    */
   function lock() {
       $fname = 'UserDupes::lock';
       if( $this->newSchema() ) {
           $set = array( 'user', 'revision' );
       } else {
           $set = array( 'user', 'cur', 'old' );
       }
       $names = array_map( array( $this, 'lockTable' ), $set );
       $tables = implode( ',', $names );
       
       $result = $this->db->query( "LOCK TABLES $tables", $fname );
   }
   
   function lockTable( $table ) {
       return $this->db->tableName( $table ) . ' WRITE';
   }
   
   /**
    * @return bool
    * @access private
    */
   function newSchema() {
       return class_exists( 'Revision' );
   }
   
   /**
    * @access private
    */
   function unlock() {
       $fname = 'UserDupes::unlock';
       $result = $this->db->query( "UNLOCK TABLES", $fname );
   }
   
   /**
    * Grab usernames for which multiple records are present in the database.
    * @return array
    * @access private
    */
   function getDupes() {
       $fname = 'UserDupes::listDupes';
       $user = $this->db->tableName( 'user' );
       $result = $this->db->query(
            "SELECT user_name,COUNT(*) AS n
               FROM $user
           GROUP BY user_name
             HAVING n > 1", $fname );
       
       $list = array();
       while( $row = $this->db->fetchObject( $result ) ) {
           $list[] = $row->user_name;
       }
       $this->db->freeResult( $result );
       
       return $list;
   }
   
   /**
    * Examine user records for the given name. Try to see which record
    * will be the one that actually gets used, then check remaining records
    * for edits. If the dupes have no edits, we can safely remove them.
    * @param string $name
    * @param bool $doDelete
    * @access private
    */
   function examine( $name, $doDelete ) {
       $fname = 'UserDupes::listDupes';
       $result = $this->db->select( 'user',
           array( 'user_id' ),
           array( 'user_name' => $name ),
           $fname );
       
       $firstRow = $this->db->fetchObject( $result );
       $firstId  = $firstRow->user_id;
       echo "Record that will be used for '$name' is user_id=$firstId\n";
       
       while( $row = $this->db->fetchObject( $result ) ) {
           $dupeId = $row->user_id;
           echo "... dupe id $dupeId: ";
           $edits = $this->editCount( $dupeId );
           if( $edits > 0 ) {
               $this->reassigned++;
               echo "has $edits edits! ";
               if( $doDelete ) {
                   $this->reassignEdits( $dupeId, $firstId );
                   $newEdits = $this->editCount( $dupeId );
                   if( $newEdits == 0 ) {
                       echo "confirmed cleaned. ";
                   } else {
                       $this->failed++;
                       echo "WARNING! $newEdits remaining edits for $dupeId; NOT deleting user.\n";
                       continue;
                   }
               } else {
                   echo "(will need to reassign edits on fix)";
               }
           } else {
               echo "ok, no edits. ";
           }
           $this->trimmed++;
           if( $doDelete ) {
               $this->trimAccount( $dupeId );
           }
           echo "\n";
       }
       $this->db->freeResult( $result );
   }
   
   /**
    * Count the number of edits attributed to this user.
    * Does not currently check log table or other things
    * where it might show up...
    * @param int $userid
    * @return int
    * @access private
    */
   function editCount( $userid ) {
       if( $this->newSchema() ) {
           return $this->editCountOn( 'revision', 'rev_user', $userid );
       } else {
           return $this->editCountOn( 'cur', 'cur_user', $userid ) +
               $this->editCountOn( 'old', 'old_user', $userid );
       }
   }
   
   /**
    * Count the number of hits on a given table for this account.
    * @param string $table
    * @param string $field
    * @param int $userid
    * @return int
    * @access private
    */
   function editCountOn( $table, $field, $userid ) {
       $fname = 'UserDupes::editCountOn';
       return IntVal( $this->db->selectField(
           $table,
           'COUNT(*)',
           array( $field => $userid ),
           $fname ) );
   }
   
   /**
    * @param int $from
    * @param int $to
    * @access private
    */
   function reassignEdits( $from, $to ) {
       $set = $this->newSchema()
           ? array( 'revision' => 'rev_user' )
           : array( 'cur' => 'cur_user', 'old' => 'old_user' );
       foreach( $set as $table => $field ) {
           $this->reassignEditsOn( $table, $field, $from, $to );
       }
   }
   
   /**
    * @param string $table
    * @param string $field
    * @param int $from
    * @param int $to
    * @access private
    */
   function reassignEditsOn( $table, $field, $from, $to ) {
       $fname = 'UserDupes::reassignEditsOn';
       echo "reassigning on $table... ";
       $result = $this->db->update( $table,
           array( $field => $to ),
           array( $field => $from ),
           $fname );
       echo "ok. ";
   }
   
   /**
    * Remove a user account line.
    * @param int $userid
    * @access private
    */
   function trimAccount( $userid ) {
       $fname = 'UserDupes::trimAccount';
       echo "deleting...";
       $this->db->delete( 'user', array( 'user_id' => $userid ), $fname );
       echo " ok";
   }
   

}


?>


end file

Here is the file commadLine.inc

Start file

<?php /**

* @todo document
* @package MediaWiki
* @subpackage Maintenance
*/

/** */

  1. Abort if called from a web server

if ( isset( $_SERVER ) && array_key_exists( 'REQUEST_METHOD', $_SERVER ) ) {

   print "This script must be run from the command line\n";
   exit();

}

define("MEDIAWIKI",true);

  1. Process command line arguments
  2. $options becomes an array with keys set to the option names
  3. $optionsWithArgs is an array of GNU-style options that take an argument. The arguments are returned
  4. in the values of $options.
  5. $args becomes a zero-based array containing the non-option arguments

if ( !isset( $optionsWithArgs ) ) {

   $optionsWithArgs = array();

} $optionsWithArgs[] = 'conf'; # For specifying the location of LocalSettings.php

$self = array_shift( $argv ); $self = __FILE__; $IP = realpath( dirname( $self ) . "/.." ); chdir( $IP );

$options = array(); $args = array();

for( $arg = reset( $argv ); $arg !== false; $arg = next( $argv ) ) {

   if ( substr( $arg, 0, 2 ) == '--' ) {
       # Long options
       $option = substr( $arg, 2 );
       if ( in_array( $option, $optionsWithArgs ) ) {
           $param = next( $argv );
           if ( $param === false ) {
               die( "$arg needs an value after it\n" );
           }
           $options[$option] = $param;
       } else {
           $bits = explode( '=', $option, 2 );
           if( count( $bits ) > 1 ) {
               $option = $bits[0];
               $param = $bits[1];
           } else {
               $param = 1;
           }
           $options[$option] = $param;
       }
   } elseif ( $arg{0} == '-' ) {
       # Short options
       for ( $p=1; $p<strlen( $arg ); $p++ ) {
           $option = $arg{$p};
           if ( in_array( $option, $optionsWithArgs ) ) {
               $param = next( $argv );
               if ( $param === false ) {
                   die( "$arg needs an value after it\n" );
               }
               $options[$option] = $param;
           } else {
               $options[$option] = 1;
           }
       }
   } else {
       $args[] = $arg;
   }

}

  1. General initialisation

$wgCommandLineMode = true;

  1. Turn off output buffering if it's on

@ob_end_flush(); $sep = strchr( $include_path = ini_get( "include_path" ), ";" ) ? ";" : ":";

if (!isset( $wgUseNormalUser ) ) {

   $wgUseNormalUser = false;

}

if ( file_exists( '/home/wikipedia/common/langlist' ) ) {

   $wgWikiFarm = true;
   require_once( "$IP/includes/SiteConfiguration.php" );
   # Get $conf
   require( "$IP/InitialiseSettings.php" );
   if ( empty( $wgNoDBParam ) ) {
       # Check if we were passed a db name
       $db = array_shift( $args );
       list( $site, $lang ) = $wgConf->siteFromDB( $db );
       # If not, work out the language and site the old way
       if ( is_null( $site ) || is_null( $lang ) ) {
           if ( !$db ) {    
               $lang = "aa";
           } else {
               $lang = $db;
           }
           if ( isset( $args[0] ) ) {
               $site = array_shift( $args );
           } else {
               $site = "wikipedia";
           }
       }
   } else {
       $lang = "aa";
       $site = "wikipedia";
   }
   # This is for the IRC scripts, which now run as the apache user
   # The apache user doesn't have access to the wikiadmin_pass command
   if ( $_ENV['USER'] == "apache" ) {
       $wgUseNormalUser = true;
   }
   putenv( "wikilang=$lang");
   $DP = $IP;
   ini_set( "include_path", ".:$IP:$IP/includes:$IP/languages:$IP/maintenance" );
   require_once( "$IP/includes/Defines.php" );
   require_once( "$IP/CommonSettings.php" );
   if ( !$wgUseNormalUser ) {
       $wgDBuser = $wgDBadminuser = "wikiadmin";
       $wgDBpassword = $wgDBadminpassword = trim(`wikiadmin_pass`);
   }

} else {

   $wgWikiFarm = false;
   if ( isset( $options['conf'] ) ) {
       $settingsFile = $options['conf'];
   } else {
       $settingsFile = "$IP/LocalSettings.php";
   }
   if ( ! is_readable( $settingsFile ) ) {
       print "A copy of your installation's LocalSettings.php\n" .
         "must exist in the source directory.\n";
       exit();
   }
   $wgCommandLineMode = true;
   $DP = $IP;
   require_once( "$IP/includes/Defines.php" );
   require_once( $settingsFile );
   ini_set( "include_path", ".$sep$IP$sep$IP/includes$sep$IP/languages$sep$IP/maintenance" );
   
   if ( is_readable( "$IP/AdminSettings.php" ) ) {
       require_once( "$IP/AdminSettings.php" );
   }

}

  1. Turn off output buffering again, it might have been turned on in the settings files

@ob_end_flush();

  1. Same with these

$wgCommandLineMode = true;

if ( empty( $wgUseNormalUser ) && isset( $wgDBadminuser ) && $wgDBservers ) {

   $wgDBuser = $wgDBadminuser;
   $wgDBpassword = $wgDBadminpassword;
   foreach ( $wgDBservers as $i => $server ) {
       $wgDBservers[$i]['user'] = $wgDBuser;
       $wgDBservers[$i]['password'] = $wgDBpassword;
   }

}

ini_set( 'memory_limit', -1 );

require_once( "Setup.php" ); require_once( "install-utils.inc" ); $wgTitle = Title::newFromText( "Command line script" ); set_time_limit(0);

// -------------------------------------------------------------------- // Functions // --------------------------------------------------------------------

function wfWaitForSlaves( $maxLag ) {

   global $wgLoadBalancer;
   if ( $maxLag ) {
       list( $host, $lag ) = $wgLoadBalancer->getMaxLag();
       while ( $lag > $maxLag ) {
           $name = @gethostbyaddr( $host );
           if ( $name !== false ) {
               $host = $name;
           }
           print "Waiting for $host (lagged $lag seconds)...\n";
           sleep($maxLag);
           list( $host, $lag ) = $wgLoadBalancer->getMaxLag();
       }
   }

}


?>


EndFile