User:Millbot-Stats/genstats.py
From Meta, a Wikimedia project coordination wiki
Note that program is not licensed under GFDL, but under AGPL!
#!/usr/bin/python # -*- coding: utf-8 -*- # # Millbot-Stats, v. 1.1. A bot for generating statistics at MediaWiki sites. # Copyright (C) 2008 Milos Rancic <millosh@gmail.com> # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as # published by the Free Software Foundation, either version 3 of the # License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. import os import sys import time import pickle import stats mydir = "./" sys.path.append(mydir) sys.path.append(mydir + "pywikipedia/") from os.path import * from wikipedia import * #languages['ar'] = { # 'name': "Arabic", # 'full name': "Arabic language", # } #projects['wikipedia'] = { # 'baseurl': "wikipedia.org", # 'lang addition type': "prefix", # 'suffix': "/wiki/", # 'langs': [ 'ar', '...', ], # 'template': 'Template:Wikipedia statistics', # } projects = pickle.load(open("projects.pickle")) languages = pickle.load(open("languages.pickle")) transfile = mydir + "translations.conf.py" execfile(transfile) for plang in languages: if plang not in translations: translations[plang] = {} for slang in languages: if slang not in translations[plang]: translations[plang][slang] = languages[slang]['full name'] if languages[slang]['name'] not in translations[plang]: translations[plang][languages[slang]['name']] = languages[slang]['name'] datadir = mydir + "data/" #wikipedia_project = projects['wikipedia'] ##projects = { 'wikiversity': projects['wikiversity'] } pr_dictwikis = {} pr_totals = {} pr_listwikis = {} truefalse = {} for fam in projects: if fam not in truefalse: truefalse[fam] = {} project = projects[fam] langs = project['langs'] listwikis = [] dictwikis = {} totalgood = 0 totaltotal = 0 totaledits = 0 totaladmins = 0 totalusers = 0 totalimages = 0 oyear = time.strftime("%Y") omont = time.strftime("%m") odate = time.strftime("%d") ohour = time.strftime("%H") ominu = time.strftime("%M") for lang in langs: if project['lang addition type'] == 'prefix': url = "http://" + lang + "." + project['baseurl'] + project['suffix'] + "Special:Statistics?action=raw" # else: ... # define your own types daydir = datadir + oyear + "/" + omont + "/" + odate + "/" odir = daydir + fam + "/" + lang + "/" if not isdir(odir): cmd = "mkdir -p " + odir os.system(cmd) fd = odir + "raw-stats-" + ohour + "-" + ominu + ".txt" command = "wget -O " + fd + " " + url os.system(command) row = file(fd).read()[:-1] if len(row) > 0: cols = re.split(";",row) good = re.sub("^.*?;good=([0-9]+);.*?$","\g<1>",row) total = re.sub("^total=([0-9]+);.*?$","\g<1>",row) edits = re.sub("^.*?;edits=([0-9]+);.*?$","\g<1>",row) admins = re.sub("^.*?;admins=([0-9]+);.*?$","\g<1>",row) users = re.sub("^.*?;users=([0-9]+);.*?$","\g<1>",row) images = re.sub("^.*?;images=([0-9]+);.*?$","\g<1>",row) index = float(good) while index in dictwikis: index -= 0.001 listwikis.append(index) dictwikis[index] = { 'true': 'true', 'code': lang, 'good': good, 'total': total, 'edits': edits, 'admins': admins, 'users': users, 'images': images, 'time': time.strftime("%Y-%m-%d %H:%M:%S"), } totalgood += int(good) totaltotal += int(total) totaledits += int(edits) totaladmins += int(admins) totalusers += int(users) totalimages += int(images) truefalse[fam][lang] = "true" else: truefalse[fam][lang] = "false" totals = { 'totalgood': totalgood, 'totaltotal': totaltotal, 'totaledits': totaledits, 'totaladmins': totaladmins, 'totalusers': totalusers, 'totalimages': totalimages, 'totaltime': time.strftime("%Y-%m-%d %H:%M:%S") } listwikis.sort() listwikis.reverse() pr_dictwikis[fam] = dictwikis pr_totals[fam] = totals pr_listwikis[fam] = listwikis stats_sites = {} stats_sites['wikipedia'] = {} stats_sites['wikipedia']['sr'] = { 'projects': [ 'wikipedia', 'wiktionary', 'wikibooks', 'wikinews', 'wikisource', 'wikiversity', 'wikiquote', ], } stats_sites['wiktionary'] = {} stats_sites['wikibooks'] = {} stats_sites['wikinews'] = {} stats_sites['wikisource'] = {} stats_sites['wikiversity'] = {} stats_sites['wikiquote'] = {} for st in stats_sites: sites = stats_sites[st] for s in sites: for fam in sites[s]['projects']: dictwikis = pr_dictwikis[fam] totals = pr_totals[fam] listwikis = pr_listwikis[fam] outfile = daydir + fam + "/stats-" + '-for-' + st + "-" + s + ".txt" stats.engine(projects[st]['baseurl'],mydir,outfile,s,listwikis,dictwikis,translations,totals,sites) #stats.engine(wikipedia_project['baseurl'],mydir,outfile,s,listwikis,dictwikis,translations,totals,sites) content = file(outfile).read().decode('utf-8') site = getSite(s,st) t = translations[s][projects[fam]['template']] c = translations[s]['Bot: Updating statistics'] print t page = Page(s,t.decode('utf-8')) page.put(content,comment=c.decode('utf-8'))