ParseWiki
This page is kept for historical interest. Any policies mentioned may be obsolete. If you want to revive the topic, you can use the talk page or start a discussion on the community forum. |
Not ready for the big time yet!
Comments welcomed. Please do not edit the program without testing it first!
# ParseWiki.pl - Parses the Wikipedia RecentChanges page. # Copyright (C) 2001 Dave McKee # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. This program is distributed in the # hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. You should have received a copy of the GNU # General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # This should not even be considered an alpha version. More a proof-of-theory. use strict; use warnings; my ($text, $Z0, $Z, $TIME, $CHANGES, $ISEDIT, $MESS, $IP, $X0, $X, $ID, @g); # $text='<li><a href="/wiki.cgi?action=browse&diff=1&id=Frank">(diff)</a> <a href="/wiki/Frank">Frank</a> 4:49 pm (2 <a href="/wiki.cgi?action=history&id=Frank">changes</a>) . . . . . <a href="/wiki.cgi?MichaelTinkler" title="ID 4676 from 24.169.85.xxx">MichaelTinkler</a>'; # &diff=1&id= and ">(diff) use LWP::Simple; my @wiki=split(/<li>/, get "http://www.wikipedia.com/wiki.cgi?action=rc&days=1"); for (@wiki) { # print $_; $text=$_; if ($text=~/^<a href="/wiki.cgi?action=browse&diff=1&id=/) { { $text=~/<a href="/wiki/(.*)">(.*)</a> (.{0,7}m)/; $Z0=$1; $Z=$2; $TIME=$3; } {$text=~/ [a|p]m ((.*) <a href="/wiki.cgi?action=history/; $CHANGES=$1; if (not $CHANGES) {$CHANGES=0}; } $ISEDIT=($text=~/<em>(edit)</em>/); {$text=~/<strong>[(.*)]</strong>/; $MESS=$1; if (not $MESS) {$MESS="n/a"}; } {if ($text=~/ . . ([0-9|.]*.xxx)$/) {$IP=$1;$Z0='n/a';$Z='n/a';$ID='n/a'} else { {$text=~/. <a href="/wiki.cgi?(.*)" title="ID (.*) from (.*.xxx)">(.*)</a>/; $X0=$1; $ID=$2; $IP=$3; $X=$4; } } } # print "$X0:$X at $TIME ($CHANGES) ($ISEDIT) [$MESS] by $ID $IP $Z0:$Z "; @_=($X0,$X,$TIME,$CHANGES,$ISEDIT,$MESS,$ID,$IP,$Z0,$Z); push (@g,@_); } else {print 'nowiki'} }; print @g;