User:Millbot-Stats/genstats.py

From Meta, a Wikimedia project coordination wiki
Jump to: navigation, search

Note that program is not licensed under GFDL, but under AGPL!

#!/usr/bin/python
# -*- coding: utf-8 -*-
# 
# Millbot-Stats, v. 1.1. A bot for generating statistics at MediaWiki sites.
# Copyright (C) 2008 Milos Rancic <millosh@gmail.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
import os
import sys
import time
import pickle
import stats
mydir = "./"
sys.path.append(mydir)
sys.path.append(mydir + "pywikipedia/")
from os.path import *
from wikipedia import *
 
#languages['ar'] = {
#       'name': "Arabic",
#       'full name': "Arabic language",
#       }
#projects['wikipedia'] = {
#       'baseurl': "wikipedia.org",
#       'lang addition type': "prefix",
#       'suffix': "/wiki/",
#       'langs': [ 'ar', '...', ],
#       'template': 'Template:Wikipedia statistics',  
#       }
 
projects = pickle.load(open("projects.pickle"))
languages = pickle.load(open("languages.pickle"))
 
transfile = mydir + "translations.conf.py"
execfile(transfile)
 
for plang in languages:
        if plang not in translations:
                translations[plang] = {}
        for slang in languages:
                if slang not in translations[plang]:
                        translations[plang][slang] = languages[slang]['full name']
                if languages[slang]['name'] not in translations[plang]:
                        translations[plang][languages[slang]['name']] = languages[slang]['name']
 
datadir = mydir + "data/"
 
#wikipedia_project = projects['wikipedia']
##projects = { 'wikiversity': projects['wikiversity'] }
pr_dictwikis = {}
pr_totals = {}
pr_listwikis = {}
truefalse = {}
 
for fam in projects:
        if fam not in truefalse:
                truefalse[fam] = {}
        project = projects[fam]
        langs = project['langs']
 
        listwikis = []
        dictwikis = {}
 
        totalgood = 0
        totaltotal = 0
        totaledits = 0
        totaladmins = 0
        totalusers = 0
        totalimages = 0
        oyear = time.strftime("%Y")
        omont = time.strftime("%m")
        odate = time.strftime("%d")
        ohour = time.strftime("%H")
        ominu = time.strftime("%M")
 
        for lang in langs:
                if project['lang addition type'] == 'prefix':
                        url = "http://" + lang + "." + project['baseurl'] + project['suffix'] + "Special:Statistics?action=raw"
                # else: ... # define your own types
                daydir = datadir + oyear + "/" + omont + "/" + odate + "/"
                odir = daydir + fam + "/" + lang + "/"
                if not isdir(odir):
                        cmd = "mkdir -p " + odir
                        os.system(cmd)
                fd = odir + "raw-stats-" + ohour + "-" + ominu + ".txt"
                command = "wget -O " + fd + " " + url
                os.system(command)
                row = file(fd).read()[:-1]
                if len(row) > 0:
                        cols = re.split(";",row)
                        good = re.sub("^.*?;good=([0-9]+);.*?$","\g<1>",row)
                        total = re.sub("^total=([0-9]+);.*?$","\g<1>",row)
                        edits = re.sub("^.*?;edits=([0-9]+);.*?$","\g<1>",row)
                        admins = re.sub("^.*?;admins=([0-9]+);.*?$","\g<1>",row)
                        users = re.sub("^.*?;users=([0-9]+);.*?$","\g<1>",row)
                        images = re.sub("^.*?;images=([0-9]+);.*?$","\g<1>",row)
                        index = float(good)
                        while index in dictwikis:
                                index -= 0.001
                        listwikis.append(index)
                        dictwikis[index] = {
                                'true': 'true',
                                'code': lang,
                                'good': good,
                                'total': total,
                                'edits': edits,
                                'admins': admins,
                                'users': users,
                                'images': images, 
                                'time': time.strftime("%Y-%m-%d %H:%M:%S"), 
                                }
                        totalgood += int(good)
                        totaltotal += int(total)
                        totaledits += int(edits)
                        totaladmins += int(admins)
                        totalusers += int(users)
                        totalimages += int(images)
                        truefalse[fam][lang] = "true"
                else:
                        truefalse[fam][lang] = "false"
        totals = {
                'totalgood': totalgood,
                'totaltotal': totaltotal,
                'totaledits': totaledits,
                'totaladmins': totaladmins,
                'totalusers': totalusers,
                'totalimages': totalimages,
                'totaltime': time.strftime("%Y-%m-%d %H:%M:%S")
                }
        listwikis.sort()
        listwikis.reverse()
        pr_dictwikis[fam] = dictwikis
        pr_totals[fam] = totals
        pr_listwikis[fam] = listwikis
 
stats_sites = {}
stats_sites['wikipedia'] = {}
stats_sites['wikipedia']['sr'] = {
        'projects': [
                'wikipedia', 'wiktionary', 'wikibooks', 'wikinews',
                'wikisource', 'wikiversity', 'wikiquote',
                ], 
        }
stats_sites['wiktionary'] = {}
stats_sites['wikibooks'] = {}
stats_sites['wikinews'] = {}
stats_sites['wikisource'] = {}
stats_sites['wikiversity'] = {}
stats_sites['wikiquote'] = {}
 
for st in stats_sites:
        sites = stats_sites[st]
        for s in sites:
                for fam in sites[s]['projects']:
                        dictwikis = pr_dictwikis[fam]
                        totals = pr_totals[fam]
                        listwikis = pr_listwikis[fam]
                        outfile = daydir + fam + "/stats-" + '-for-' + st + "-" + s + ".txt"
                        stats.engine(projects[st]['baseurl'],mydir,outfile,s,listwikis,dictwikis,translations,totals,sites)
                        #stats.engine(wikipedia_project['baseurl'],mydir,outfile,s,listwikis,dictwikis,translations,totals,sites)
                        content = file(outfile).read().decode('utf-8')
                        site = getSite(s,st)
                        t = translations[s][projects[fam]['template']]
                        c = translations[s]['Bot: Updating statistics']
                        print t
                        page = Page(s,t.decode('utf-8'))
                        page.put(content,comment=c.decode('utf-8'))
Personal tools
Namespaces

Variants
Actions
Navigation
Community
Beyond the Web
Print/export
Toolbox