User:Sylvain Schmitz~metawiki/Watchlist RSS feed in PHP

It's a hack, it's a kludge, it's localized for the French Wikipedia, it's an ugly tiny script written in PHP. But it does generate an RSS feed from the Special:Watchlist recent changes.

Feel free to modify and improve this GFDL'ed code! Syntax coloration is generated using the .phps extension.

Installation[edit]

Copy this code in a .php file on a server; a cookie file .htcookie.$wp_domain has to be readable and writable by the server. Point your feed aggregator to the PHP script; you can provide as an extra parameter a different path to the watchlist, for instance to hide your own entries.

The Code[edit]

<?php /* example call: * fr.wikipedia.rss/w/index.php?title=Special:Watchlist&hideOwn=1 */ /****************************************************************** Setup. */ // user name and password on the targeted wikipedia $wp_name = 'login'; $wp_password = 'password'; // default domain and path $wp_domain = 'fr.wikipedia.org'; $wp_watchlist = '/wiki/Special:Watchlist'; // maximum number of entries in the feed $max_entries = 20; // time zone on the server $wp_tmz = "+01:00"; // localized array for month names $months = array ("janvier" => "01", "février" => "02", "mars" => "03", "avril" => "04", "mai" => "05", "juin" => "06", "juillet" => "07", "août" => "08", "septembre" => "09", "octobre" => "10", "novembre" => "11", "décembre" => "12"); // localized user pages prefix $wp_userpage = "Utilisateur:"; // localized title $wp_title = "Liste de suivi"; // localized description $wp_description = $wp_title." de ".$wp_name; /*********************************************************** End of setup. */ // process the script request $ruri = substr($_SERVER['REQUEST_URI'], strlen($_SERVER['SCRIPT_NAME'])); if (strlen ($ruri) != 0) { $wp_watchlist = $ruri; } // name of the cookie file $cookie_file = ".htcookie.$wp_domain"; // get the expiration time from the cookie $time = 0; $cookie_fp = fopen ($cookie_file, "r"); if ($cookie_fp) { while (!feof ($cookie_fp)) { $cookie = fgets ($cookie_fp, 4096); if (strpos ($cookie, "wikiUserID") !== FALSE) { $ce = explode ("\t", $cookie); $time = $ce[4]; break; } } fclose ($cookie_fp); } // check whether a new login is needed if (($time - 60) < time ()) { // login URL $wp_login = '/w/index.php?title=Special:Userlogin' .'&action=submitlogin&type=login'; // login connection $login = curl_init (); $postdata = array (); $postdata['wpName'] = $wp_name; $postdata['wpPassword'] = $wp_password; $postdata['wpRemember'] = '1'; $postdata['wpLoginattempt'] = 'true'; $post = null; foreach ($postdata as $key=>$value) if ($key && $value) $post .= $key."=".urlencode($value)."&"; curl_setopt ($login, CURLOPT_POST, TRUE); curl_setopt ($login, CURLOPT_POSTFIELDS, $post); curl_setopt ($login, CURLOPT_COOKIEJAR, $cookie_file); curl_setopt ($login, CURLOPT_URL, $wp_domain.$wp_login); curl_exec ($login); curl_close ($login); } // grab the contents $content = curl_init (); curl_setopt ($content, CURLOPT_RETURNTRANSFER, TRUE); curl_setopt ($content, CURLOPT_COOKIEFILE, $cookie_file); curl_setopt ($content, CURLOPT_COOKIEJAR, $cookie_file); curl_setopt ($content, CURLOPT_URL, $wp_domain.$wp_watchlist); $watchlist = curl_exec ($content); curl_close ($content); // function for ISO8601 time and date function to_iso8601 ($date_str) { global $months; $date_fields = explode (" ", $date_str); $day = $date_fields[0]; if (strlen ($day) == 1) $day = "0".$day; $month = $date_fields[1]; $year = $date_fields[2]; return $year."-".$months[$month]."-".$day."T"; } // explode the contents by days $days = explode ("<h4>", $watchlist); $entries = array (); $times = array (); $authors = array (); $nentries = 0; for ($i = 1; $i < sizeof ($days) && $nentries < $max_entries; $i++) { $the_date = to_iso8601 (substr ($days[$i], 0, strpos ($days[$i], "</h4>"))); $tmp = explode ("<li>", $days[$i]); for ($j = 1; $j < sizeof ($tmp) && $nentries < $max_entries; $j++) { $offset = strpos ($tmp[$j], ' title="') + 8; $entries[$nentries] = substr ($tmp[$j], $offset, strpos (substr ($tmp[$j], $offset), '"')); $offset = strpos ($tmp[$j], '; ') + 2; $times[$nentries] = $the_date.substr ($tmp[$j], $offset, 5).$wp_tmz; $offset = strpos ($tmp[$j], ' title="'.$wp_userpage) + 8 + strlen ($wp_userpage); $authors[$nentries] = substr ($tmp[$j], $offset, strpos (substr ($tmp[$j], $offset), '"')); $nentries++; } } /********************************************************* RSS generation. */ $disallowed_xml = array ("&", "<", ">"); $replacements_xml = array ("&", "<", ">"); // header header("Content-Type: application/xml; charset=utf-8"); print ("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"); print ("<!DOCTYPE rdf:RDF [\n"); print ("<!ENTITY % HTMLlat1 PUBLIC\n"); print (" \"-//W3C//ENTITIES Latin 1 for XHTML//EN\"\n"); print (" \"http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent\">\n"); print ("]>\n"); print ("<rdf:RDF\n"); print (" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\" \n"); print (" xmlns:sy=\"http://purl.org/rss/1.0/modules/syndication/\"\n"); print (" xmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n"); //print (" xmlns:content=\"http://purl.org/rss/1.0/modules/content/\"\n"); print (" xmlns=\"http://purl.org/rss/1.0/\"\n"); print (">\n"); // channel summary print (" <channel rdf:about=\"http://" .$wp_domain.str_replace ($disallowed_xml, $replacements_xml, $wp_watchlist)."\">\n"); print (" <title>$wp_title</title>\n"); print (" <link>http://" .$wp_domain.str_replace ($disallowed_xml, $replacements_xml, $wp_watchlist)."</link>\n"); print (" <description>$wp_description</description>\n"); print (" <dc:source>http://" .$wp_domain.str_replace ($disallowed_xml, $replacements_xml, $wp_watchlist)."</dc:source>\n"); print (" <dc:date>".date("Y-m-d\TH:iO")."</dc:date>\n"); print (" <sy:updatePeriod>hourly</sy:updatePeriod>\n"); print (" <sy:updateFrequency>4</sy:updateFrequency>\n"); print (" <sy:updateBase>1970-01-01T00:00+00:00</sy:updateBase>\n"); print (" <items>\n"); print (" <rdf:Seq>\n"); for ($i = 0; $i < $nentries; $i++) { print (" <rdf:li resource=\"http://$wp_domain/wiki/" .urlencode(str_replace (" ", "_", $entries[$i]))."\" />\n"); } print (" </rdf:Seq>\n"); print (" </items>\n"); print ("\n"); print (" </channel>\n"); // items for ($i = 0; $i < $nentries; $i++) { print (" <item rdf:about=\"http://$wp_domain/wiki/" .urlencode(str_replace (" ", "_", $entries[$i]))."\">\n"); print (" <title>".$entries[$i]."</title>\n"); print (" <dc:creator>".$authors[$i]."</dc:creator>\n"); print (" <dc:date>".$times[$i]."</dc:date>\n"); print (" </item>\n\n"); } // footer print ("</rdf:RDF>\n"); ?>