Usor:Amahoney/1000 Paginae singulis
With this utility it's possible to check the size of a single page (in language-weighted characters), even if it isn't one of the 1000 Pages. The routine is in Perl and requires the Encode package (standard) and the MediaWiki::Bot package (downloaded from CPAN).
Code
recensere#!/usr/bin/perl
# check size of a single page by 1000-Pages rules
use strict;
use Encode;
use MediaWiki::Bot;
my $WEIGHT = 1.1;
my $title = shift @ARGV;
if ($title eq '')
{
warn "must supply page title";
exit;
}
my $vicibot = MediaWiki::Bot->new({assert=>'bot', host=>'la.wikipedia.org', debug=>1});
my $page = $vicibot->get_text($title);
if (defined $page)
{
if ($page =~ m|REDIRECT \[\[(.*)\]\]|)
{
my $redir = $1;
$page = $vicibot->get_text($redir);
}
}
$vicibot->logout();
my $before = 0;
my $after = 0;
if (defined $page)
{
$before = length($page);
$page =~ s|\n| |g;
# strip inter-wiki links
$page =~ s|(\[\[[a-z][a-z\-]+?\:.*?\]\])||g;
# strip comments
$page =~ s|<\!--.*?-->||g;
# trim trailing white space
$page =~ s|[ \n\t]*$||;
$after = length($page) * $WEIGHT;
}
else
{
warn "could not read page $title";
exit;
}
print "page $title: size $after\n";