Manual:Chris G's botclasses/AllPagesBot.php

From Linux Web Expert

The printable version is no longer supported and may have rendering errors. Please update your browser bookmarks and please use the default browser print function instead.

This is a bot that uses Chris G's botclasses to retrieve a list of all files on the wiki and store that list in two text files, one for the File: namespace, and another for all the other namespaces. Customize the urls, login info, and namespace variables to suit your needs.

<?php
/* AllPagesBot
 * By Leucosticte, https://www.mediawiki.org/wiki/User:Leucosticte
 * GNU Public License 2.0
 *
 * This bot retrieves a list of all files on the wiki and stores that list in two text files,
 * one for the File: namespace, and another for all the other namespaces.
 */

/* Setup my classes. */
include( 'botclasses.php' );
$wiki      = new wikipedia;
$wiki->url = "http://en.wikipedia.org/w/api.php";

/* All the login stuff. */
$user = 'REMOVED';
$pass = 'REMOVED';
$wiki->login( $user,$pass );

$namespaces = range( 0, 15 ); // Default namespaces
// Extra namespaces
#$namespaces[] = 500;
#$namespaces[] = 501;
$namespaces = array_filter( $namespaces, "notFile" ); // Filter out the File: namespace

$pageTitlesFile = 'PageTitles.txt';
$pageTitlesNs6File = 'PageTitlesNs6.txt';
$pageTitles = fopen ( $pageTitlesFile, 'w' );
$pageTitlesNs6 = fopen ( $pageTitlesNs6File, 'w' );

iterate ( $wiki, $namespaces, $pageTitles ); // Everything but File: namespace
iterate ( $wiki, array ( 6), $pageTitlesNs6 ); // Only the File: namespace

// Filter out the File: namespace
function notFile ( $var ) {
    return ( $var != 6 );
}

// Retrieve the data and store it in the file
function iterate ( $wiki, $namespaces, $pageTitles ) {
    foreach ( $namespaces as $namespace ) {
        $done = false;
        $apfrom = '';
            while ( !$done ) {
            $query = "?action=query&format=php&list=allpages&aplimit=500&apnamespace=$namespace";
            if ( $apfrom ) {
                $query .= "&apfrom=$apfrom";
            }
            $ret = $wiki->query ( $query );
            if ( !isset ( $ret['query-continue'] ) ) {
                $done = true;
            } else {
                $apfrom = $ret['query-continue']['allpages']['apfrom'];
            }
            foreach ( $ret['query']['allpages'] as $thisPage ) {
                fwrite ( $pageTitles, $thisPage['title'] . "\n" );
            }
        }
    }
}
fclose ( $pageTitles );