User:Millbot-Stats/genstats.py

From Meta, a Wikimedia project coordination wiki

Note that program is not licensed under GFDL, but under AGPL!

#!/usr/bin/python
# -*- coding: utf-8 -*-
# 
# Millbot-Stats, v. 1.1. A bot for generating statistics at MediaWiki sites.
# Copyright (C) 2008 Milos Rancic <millosh@gmail.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import os
import sys
import time
import pickle
import stats
mydir = "./"
sys.path.append(mydir)
sys.path.append(mydir + "pywikipedia/")
from os.path import *
from wikipedia import *

#languages['ar'] = {
#	'name': "Arabic",
#	'full name': "Arabic language",
#	}
#projects['wikipedia'] = {
#	'baseurl': "wikipedia.org",
#	'lang addition type': "prefix",
#	'suffix': "/wiki/",
#	'langs': [ 'ar', '...', ],
#	'template': 'Template:Wikipedia statistics',  
#	}

projects = pickle.load(open("projects.pickle"))
languages = pickle.load(open("languages.pickle"))

transfile = mydir + "translations.conf.py"
execfile(transfile)

for plang in languages:
	if plang not in translations:
		translations[plang] = {}
	for slang in languages:
		if slang not in translations[plang]:
			translations[plang][slang] = languages[slang]['full name']
		if languages[slang]['name'] not in translations[plang]:
			translations[plang][languages[slang]['name']] = languages[slang]['name']

datadir = mydir + "data/"

#wikipedia_project = projects['wikipedia']
##projects = { 'wikiversity': projects['wikiversity'] }
pr_dictwikis = {}
pr_totals = {}
pr_listwikis = {}
truefalse = {}

for fam in projects:
	if fam not in truefalse:
		truefalse[fam] = {}
	project = projects[fam]
	langs = project['langs']
	
	listwikis = []
	dictwikis = {}
	
	totalgood = 0
	totaltotal = 0
	totaledits = 0
	totaladmins = 0
	totalusers = 0
	totalimages = 0
	oyear = time.strftime("%Y")
	omont = time.strftime("%m")
	odate = time.strftime("%d")
	ohour = time.strftime("%H")
	ominu = time.strftime("%M")
	
	for lang in langs:
		if project['lang addition type'] == 'prefix':
			url = "http://" + lang + "." + project['baseurl'] + project['suffix'] + "Special:Statistics?action=raw"
		# else: ... # define your own types
		daydir = datadir + oyear + "/" + omont + "/" + odate + "/"
		odir = daydir + fam + "/" + lang + "/"
		if not isdir(odir):
			cmd = "mkdir -p " + odir
			os.system(cmd)
		fd = odir + "raw-stats-" + ohour + "-" + ominu + ".txt"
		command = "wget -O " + fd + " " + url
		os.system(command)
		row = file(fd).read()[:-1]
		if len(row) > 0:
			cols = re.split(";",row)
			good = re.sub("^.*?;good=([0-9]+);.*?$","\g<1>",row)
			total = re.sub("^total=([0-9]+);.*?$","\g<1>",row)
			edits = re.sub("^.*?;edits=([0-9]+);.*?$","\g<1>",row)
			admins = re.sub("^.*?;admins=([0-9]+);.*?$","\g<1>",row)
			users = re.sub("^.*?;users=([0-9]+);.*?$","\g<1>",row)
			images = re.sub("^.*?;images=([0-9]+);.*?$","\g<1>",row)
			index = float(good)
			while index in dictwikis:
				index -= 0.001
			listwikis.append(index)
			dictwikis[index] = {
				'true': 'true',
				'code': lang,
				'good': good,
				'total': total,
				'edits': edits,
				'admins': admins,
				'users': users,
				'images': images, 
				'time': time.strftime("%Y-%m-%d %H:%M:%S"), 
				}
			totalgood += int(good)
			totaltotal += int(total)
			totaledits += int(edits)
			totaladmins += int(admins)
			totalusers += int(users)
			totalimages += int(images)
			truefalse[fam][lang] = "true"
		else:
			truefalse[fam][lang] = "false"
	totals = {
		'totalgood': totalgood,
		'totaltotal': totaltotal,
		'totaledits': totaledits,
		'totaladmins': totaladmins,
		'totalusers': totalusers,
		'totalimages': totalimages,
		'totaltime': time.strftime("%Y-%m-%d %H:%M:%S")
		}
	listwikis.sort()
	listwikis.reverse()
	pr_dictwikis[fam] = dictwikis
	pr_totals[fam] = totals
	pr_listwikis[fam] = listwikis

stats_sites = {}
stats_sites['wikipedia'] = {}
stats_sites['wikipedia']['sr'] = {
	'projects': [
		'wikipedia', 'wiktionary', 'wikibooks', 'wikinews',
		'wikisource', 'wikiversity', 'wikiquote',
		], 
	}
stats_sites['wiktionary'] = {}
stats_sites['wikibooks'] = {}
stats_sites['wikinews'] = {}
stats_sites['wikisource'] = {}
stats_sites['wikiversity'] = {}
stats_sites['wikiquote'] = {}

for st in stats_sites:
	sites = stats_sites[st]
	for s in sites:
		for fam in sites[s]['projects']:
			dictwikis = pr_dictwikis[fam]
			totals = pr_totals[fam]
			listwikis = pr_listwikis[fam]
			outfile = daydir + fam + "/stats-" + '-for-' + st + "-" + s + ".txt"
			stats.engine(projects[st]['baseurl'],mydir,outfile,s,listwikis,dictwikis,translations,totals,sites)
			#stats.engine(wikipedia_project['baseurl'],mydir,outfile,s,listwikis,dictwikis,translations,totals,sites)
			content = file(outfile).read().decode('utf-8')
			site = getSite(s,st)
			t = translations[s][projects[fam]['template']]
			c = translations[s]['Bot: Updating statistics']
			print t
			page = Page(s,t.decode('utf-8'))
			page.put(content,comment=c.decode('utf-8'))