ParseWiki

From Meta, a Wikimedia project coordination wiki

Not ready for the big time yet!

Comments welcomed. Please do not edit the program without testing it first!

# ParseWiki.pl - Parses the Wikipedia RecentChanges page.
# Copyright (C) 2001 Dave McKee
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version. This program is distributed in the # hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details. You should have received a copy of the GNU # General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

# This should not even be considered an alpha version. More a proof-of-theory.

use strict;
use warnings;
my ($text, $Z0, $Z, $TIME, $CHANGES, $ISEDIT, $MESS, $IP, $X0, $X, $ID, @g);
# $text='<li><a href="/wiki.cgi?action=browse&diff=1&id=Frank">(diff)</a>  <a href="/wiki/Frank">Frank</a> 4:49 pm (2 <a href="/wiki.cgi?action=history&id=Frank">changes</a>)  . . . . . <a href="/wiki.cgi?MichaelTinkler" title="ID 4676 from 24.169.85.xxx">MichaelTinkler</a>';

# &diff=1&id= and ">(diff)

use LWP::Simple;
my @wiki=split(/<li>/, get "http://www.wikipedia.com/wiki.cgi?action=rc&days=1");

for (@wiki) {

# print $_;
$text=$_;
if ($text=~/^<a href="/wiki.cgi?action=browse&diff=1&id=/)
  {
    {
    $text=~/<a href="/wiki/(.*)">(.*)</a> (.{0,7}m)/;
    $Z0=$1;
    $Z=$2;
    $TIME=$3;
    }

    {$text=~/ [a|p]m ((.*) <a href="/wiki.cgi?action=history/;
    $CHANGES=$1;
    if (not $CHANGES) {$CHANGES=0};
    }

    $ISEDIT=($text=~/<em>(edit)</em>/);

    {$text=~/<strong>[(.*)]</strong>/;
    $MESS=$1;
    if (not $MESS) {$MESS="n/a"};
    }

    {if ($text=~/ . . ([0-9|.]*.xxx)$/) {$IP=$1;$Z0='n/a';$Z='n/a';$ID='n/a'} else
      {
        {$text=~/. <a href="/wiki.cgi?(.*)" title="ID (.*) from (.*.xxx)">(.*)</a>/;
         $X0=$1;
         $ID=$2;
         $IP=$3;
         $X=$4;
        }
    } }
    
#  print "$X0:$X at $TIME ($CHANGES) ($ISEDIT) [$MESS] by $ID $IP $Z0:$Z
";
  @_=($X0,$X,$TIME,$CHANGES,$ISEDIT,$MESS,$ID,$IP,$Z0,$Z);
  push (@g,@_);
  }
else
  {print 'nowiki'}
};
print @g;