List of Wikipedias by sample of articles/Source code

From Meta, a Wikimedia project coordination wiki

Jump to: navigation, search

Here is Smeira's original source code but its out-of-date. The last updates has used the source code below. There are three files. The first is the actual program. The second is a utility to generate the list of articles. The third is a utility to generate the list of previous scores to calculate growth.

Contents

[edit] MakeScoreTable.py

# -*- coding: utf_8 -*-
import sys
sys.path.append('./pywikipedia')
 
import wikipedia
import pagegenerators
import catlib
import traceback
import os
from time import strftime
 
#language information
lang_info ={'en': {'name':u'English',     'localname':u'English',  'weight': 1.0},
            'de': {'name':u'German',      'localname':u'Deutsch',  'weight':1.0},
            'fr': {'name':u'French',      'localname':u'Français', 'weight':1.0},
            'pl': {'name':u'Polish',      'localname':u'Polski',   'weight':1.1},
            'ja': {'name':u'Japanese',    'localname':u'日本語',    'weight':1.9},
            'it': {'name':u'Italian',     'localname':u'Italiano', 'weight':1.1},
            'nl': {'name':u'Dutch',       'localname':u'Nederlands', 'weight':0.9},
            'pt': {'name':u'Portuguese',  'localname':u'Português', 'weight':1.1},
            'es': {'name':u'Spanish',     'localname':u'Español',  'weight':1.1},
            'sv': {'name':u'Swedish',     'localname':u'Svenska',  'weight':1.1},
            'ru': {'name':u'Russian',     'localname':u'Русский',  'weight':1.4},
            'zh': {'name':u'Chinese',     'localname':u'中文',      'weight':3.7},
            'no': {'name':u'Norwegian (Bokmål)','localname':u'Norsk (Bokmål)', 'weight':1.2},
            'fi': {'name':u'Finnish',     'localname':u'Suomi', 'weight':1.1},
            'vo': {'name':u'Volapük',     'localname':u'Volapük'},
            'ca': {'name':u'Catalan',     'localname':u'Català', 'weight':1.1},
            'ro': {'name':u'Romanian',    'localname':u'Română', 'weight':1.1},
            'tr': {'name':u'Turkish',     'localname':u'Türkçe', 'weight':1.3},
            'uk': {'name':u'Ukrainian',   'localname':u'Українська', 'weight':1.3},
            'eo': {'name':u'Esperanto',   'localname':u'Esperanto', 'weight':1.1},
            'cs': {'name':u'Czech',       'localname':u'Čeština', 'weight':1.3},
            'hu': {'name':u'Hungarian',   'localname':u'Magyar', 'weight':1.1},
            'sk': {'name':u'Slovak',      'localname':u'Slovenčina', 'weight':1.3},
            'da': {'name':u'Danish',      'localname':u'Dansk', 'weight':1.2},
            'id': {'name':u'Indonesian',  'localname':u'Bahasa Indonesia'},
            'he': {'name':u'Hebrew',      'localname':u'עברית', 'weight':1.2},
            'lt': {'name':u'Lithuanian',  'localname':u'Lietuvių'},
            'sr': {'name':u'Serbian',     'localname':u'Српски / Srpski', 'weight':1.4},
            'sl': {'name':u'Slovenian',   'localname':u'Slovenščina', 'weight':1.2},
            'ko': {'name':u'Korean',      'localname':u'한국어', 'weight':2.5},
            'ar': {'name':u'Arabic',      'localname':u'العربية', 'weight':1.0},
            'bg': {'name':u'Bulgarian',   'localname':u'Български', 'weight':1.1},
            'et': {'name':u'Estonian',    'localname':u'Eesti'},
            'hr': {'name':u'Croatian',    'localname':u'Hrvatski', 'weight':1.3},
            'new':{'name':u'Newar / Nepal Bhasa','localname':u'नेपाल भाषा'},
            'te': {'name':u'Telugu',      'localname':u'తెలుగు'},
            'vi': {'name':u'Vietnamese',  'localname':u'Tiếng Việt', 'weight':1.1},
            'th': {'name':u'Thai',        'localname':u'ไทย', 'weight':1.0},
            'gl': {'name':u'Galician',    'localname':u'Galego'},
            'fa': {'name':u'Persian',     'localname':u'فارسی', 'weight':1.2},
            'nn': {'name':u'Norwegian (Nynorsk)','localname':u'Nynorsk', 'similar_lang':'no'},
            'ceb':{'name':u'Cebuano',     'localname':u'Sinugboanong Binisaya', 'weight':0.8},
            'el': {'name':u'Greek',       'localname':u'Ελληνικά', 'weight':1.1},
            'ms': {'name':u'Malay',       'localname':u'Bahasa Melayu'},
            'simple':{'name':u'Simple English','localname':u'Simple English'},
            'eu': {'name':u'Basque',      'localname':u'Euskara', 'weight':1.1},
            'bpy':{'name':u'Bishnupriya Manipuri','localname':u'ইমার ঠার/বিষ্ণুপ্রিয়া মণিপুরী'},
            'bs': {'name':u'Bosnian',     'localname':u'Bosanski', 'similar_lang':'hr'},
            'lb': {'name':u'Luxembourgish','localname':u'Lëtzebuergesch'},
            'is': {'name':u'Icelandic',   'localname':u'Íslenska'},
            'ka': {'name':u'Georgian',    'localname':u'ქართული'},
            'sq': {'name':u'Albanian',    'localname':u'Shqip'},
            'la': {'name':u'Latin',       'localname':u'Latina', 'weight':1.1},
            'br': {'name':u'Breton',      'localname':u'Brezhoneg'},
            'az': {'name':u'Azeri',       'localname':u'Azərbaycan'},
            'hi': {'name':u'Hindi',       'localname':u'हिन्दी'},
            'bn': {'name':u'Bengali',     'localname':u'বাংলা'},
            'ht': {'name':u'Haitian',     'localname':u'Krèyol ayisyen'},
            'mk': {'name':u'Macedonian',  'localname':u'Македонски'},
            'mr': {'name':u'Marathi',     'localname':u'मराठी'},
            'sh': {'name':u'Serbo-Croatian','localname':u'Srpskohrvatski / Српскохрватски', 'similar_lang':'hr'},
            'tl': {'name':u'Tagalog',     'localname':u'Tagalog'},
            'io': {'name':u'Ido',         'localname':u'Ido'},
            'cy': {'name':u'Welsh',       'localname':u'Cymraeg', 'weight':1.2},
            'pms':{'name':u'Piedmontese', 'localname':u'Piemontèis'},
            'lv': {'name':u'Latvian',     'localname':u'Latviešu'},
            'su': {'name':u'Sundanese',   'localname':u'Basa Sunda'},
            'ta': {'name':u'Tamil',       'localname':u'தமிழ்', 'weight':0.9},
            'jv': {'name':u'Javanese',    'localname':u'Basa Jawa'},
            'nap':{'name':u'Neapolitan',  'localname':u'Nnapulitano'},
            'oc': {'name':u'Occitan',     'localname':u'Occitan'},
            'nds':{'name':u'Low Saxon',   'localname':u'Plattdüütsch'},
            'scn':{'name':u'Sicilian',    'localname':u'Sicilianu'},
            'ast':{'name':u'Asturian',    'localname':u'Asturianu'},
            'ku': {'name':u'Kurdish',     'localname':u'Kurdî / كوردی'},
            'be': {'name':u'Belarusian',  'localname':u'Беларуская'},
            'wa': {'name':u'Walloon',     'localname':u'Walon'},
            'af': {'name':u'Afrikaans',   'localname':u'Afrikaans'},
            'be-x-old':{'name':u'Belarusian (Tarashkevitsa)','localname':u'Беларуская (тарашкевіца)'},
            'tg': {'name':u'Tajik',       'localname':u'Тоҷикӣ'},
            'an': {'name':u'Aragonese',   'localname':u'Aragonés'},
            'fy': {'name':u'West Frisian','localname':u'Frysk'},
            'vec':{'name':u'Venetian',    'localname':u'Vèneto'},
            'roa-tara':{'name':u'Tarantino',   'localname':u'Tarandíne'},
            'cv': {'name':u'Chuvash',     'localname':u'Чăваш'},
            'zh-yue':{'name':u'Cantonese',   'localname':u'粵語', 'similar_lang':'zh'},
            'ur': {'name':u'Urdu',        'localname':u'اردو'},
            'ksh':{'name':u'Ripuarian',   'localname':u'Ripoarisch'},
            'sw': {'name':u'Swahili',     'localname':u'Kiswahili'},
            'qu': {'name':u'Quechua',     'localname':u'Runa Simi'},
            'uz': {'name':u'Uzbek',       'localname':u'O‘zbek'},
            'mi': {'name':u'Maori',       'localname':u'Māori'},
            'ga': {'name':u'Irish',       'localname':u'Gaeilge'},
            'bat-smg':{'name':u'Samogitian',  'localname':u'Žemaitėška'},
            'ml': {'name':u'Malayalam',   'localname':u'മലയാളം', 'weight':1.1},
            'gd': {'name':u'Scottish Gaelic','localname':u'Gàidhlig'},
            'yo': {'name':u'Yoruba',      'localname':u'Yorùbá'},
            'co': {'name':u'Corsican',    'localname':u'Corsu'},
            'kn': {'name':u'Kannada',     'localname':u'ಕನ್ನಡ'},
            'pam':{'name':u'Kapampangan', 'localname':u'Kapampangan'},
            'yi': {'name':u'Yiddish',     'localname':u'ייִדיש'},
            'hsb':{'name':u'Upper Sorbian','localname':u'Hornjoserbsce'},
            'nah':{'name':u'Nahuatl',     'localname':u'Nāhuatl'},
            'ia': {'name':u'Interlingua', 'localname':u'Interlingua', 'weight':1.0},
            'li': {'name':u'Limburgian',  'localname':u'Limburgs'},
            'sa': {'name':u'Sanskrit',    'localname':u'संस्कृतम्'},
            'hy': {'name':u'Armenian',    'localname':u'Հայերեն', 'weight':1.2},
            'tt': {'name':u'Tatar',       'localname':u'Tatarça / Татарча'},
            'als':{'name':u'Alemannic',   'localname':u'Alemannisch'},
            'roa-rup':{'name':u'Aromanian',   'localname':u'Armãneashce'},
            'lmo':{'name':u'Lombard',     'localname':u'Lumbaart'},
            'map-bms':{'name':u'Banyumasan',  'localname':u'Basa Banyumasan'},
            'am': {'name':u'Amharic',     'localname':u'አማርኛ'},
            'nrm':{'name':u'Norman',      'localname':u'Nouormand/Normaund'},
            'zh-min-nan':{'name':u'Min Nan',     'localname':u'Bân-lâm-gú', 'weight':1.2},
            'pag':{'name':u'Pangasinan',  'localname':u'Pangasinan'},
            'wuu':{'name':u'Wu',          'localname':u'吴语', 'similar_lang':'zh'},
            'fo': {'name':u'Faroese',     'localname':u'Føroyskt'},
            'vls':{'name':u'West Flemish','localname':u'West-Vlams'},
            'nds-nl':{'name':u'Dutch Low Saxon','localname':u'Nedersaksisch'},
            'se': {'name':u'Northern Sami','localname':u'Sámegiella'},
            'rm': {'name':u'Romansh',     'localname':u'Rumantsch'},
            'ne': {'name':u'Nepali',      'localname':u'नेपाली'},
            'war':{'name':u'Waray-Waray', 'localname':u'Winaray'},
            'fur':{'name':u'Friulian',    'localname':u'Furlan'},
            'lij':{'name':u'Ligurian',    'localname':u'Líguru'},
            'nov':{'name':u'Novial',      'localname':u'Novial'},
            'bh': {'name':u'Bihari',      'localname':u'भोजपुरी'},
            'sco':{'name':u'Scots',       'localname':u'Scots'},
            'dv': {'name':u'Divehi',      'localname':u'ދިވެހިބަސް'},
            'pi': {'name':u'Pali',        'localname':u'पाऴि'},
            'diq':{'name':u'Zazaki',      'localname':u'Zazaki'},
            'ilo':{'name':u'Ilokano',     'localname':u'Ilokano'},
            'kk': {'name':u'Kazakh',      'localname':u'Қазақша'},
            'os': {'name':u'Ossetian',    'localname':u'Иронау'},
            'frp':{'name':u'Franco-Provençal/Arpitan','localname':u'Arpitan'},
            'zh-classical':{'name':u'Classical Chinese','localname':u'古文 / 文言文', 'similar_lang':'zh'},
            'mt': {'name':u'Maltese',     'localname':u'Malti'},
            'lad':{'name':u'Ladino',      'localname':u'Dzhudezmo'},
            'fiu-vro':{'name':u'Võro',        'localname':u'Võro'},
            'pdc':{'name':u'Pennsylvania German','localname':u'Deitsch'},
            'csb':{'name':u'Kashubian',   'localname':u'Kaszëbsczi'},
            'kw': {'name':u'Cornish',     'localname':u'Kernewek'},
            'bar':{'name':u'Bavarian',    'localname':u'Boarisch'},
            'to': {'name':u'Tongan',      'localname':u'faka Tonga'},
            'haw':{'name':u'Hawaiian',    'localname':u'Hawai`i'},
            'mn': {'name':u'Mongolian',   'localname':u'Монгол'},
            'ps': {'name':u'Pashto',      'localname':u'پښتو'},
            'ang':{'name':u'Anglo-Saxon', 'localname':u'Englisc'},
            'km': {'name':u'Khmer',       'localname':u'ភាសាខ្មែរ'},
            'gv': {'name':u'Manx',        'localname':u'Gaelg'},
            'tk': {'name':u'Turkmen',     'localname':u'تركمن / Туркмен'},
            'ln': {'name':u'Lingala',     'localname':u'Lingala'},
            'ie': {'name':u'Interlingue', 'localname':u'Interlingue'},
            'tpi':{'name':u'Tok Pisin',   'localname':u'Tok Pisin'},
            'crh':{'name':u'Crimean Tatar','localname':u'Qırımtatarca'},
            'jbo':{'name':u'Lojban',      'localname':u'Lojban', 'weight':1.2},
            'wo': {'name':u'Wolof',       'localname':u'Wolof'},
            'ay': {'name':u'Aymara',      'localname':u'Aymar'},
            'zea':{'name':u'Zealandic',   'localname':u'Zeêuws'},
            'eml':{'name':u'Emilian-Romagnol','localname':u'Emiliàn e rumagnòl'},
            'si': {'name':u'Sinhalese',   'localname':u'සිංහල'},
            'sc': {'name':u'Sardinian',   'localname':u'Sardu'},
            'or': {'name':u'Oriya',       'localname':u'ଓଡ଼ିଆ'},
            'ig': {'name':u'Igbo',        'localname':u'Igbo'},
            'mg': {'name':u'Malagasy',    'localname':u'Malagasy'},
            'cbk-zam':{'name':u'Zamboanga Chavacano','localname':u'Chavacano de Zamboanga'},
            'gu': {'name':u'Gujarati',    'localname':u'ગુજરાતી'},
            'ky': {'name':u'Kirghiz',     'localname':u'Кыргызча'},
            'kg': {'name':u'Kongo',       'localname':u'KiKongo'},
            'ty': {'name':u'Tahitian',    'localname':u'Reo Mā`ohi'},
            'glk':{'name':u'Gilaki',      'localname':u'گیلکی'},
            'arc':{'name':u'Assyrian Neo-Aramaic','localname':u'ܐܪܡܝܐ'},
            'mo': {'name':u'Moldovan',    'localname':u'Молдовеняскэ'},
            'gn': {'name':u'Guarani',     'localname':u'Avañe\'ẽ'},
            'kab':{'name':u'Kabyle',      'localname':u'Taqbaylit'},
            'so': {'name':u'Somali',      'localname':u'Soomaaliga'},
            'ks': {'name':u'Kashmiri',    'localname':u'कश्मीरी / كشميري'},
            'stq':{'name':u'Saterland Frisian','localname':u'Seeltersk'},
            'mzn':{'name':u'Mazandarani', 'localname':u'مَزِروني'},
            'cu': {'name':u'Old Church Slavonic','localname':u'Словѣньскъ'},
            'ce': {'name':u'Chechen',     'localname':u'Нохчийн'},
            'udm':{'name':u'Udmurt',      'localname':u'Удмурт кыл'},
            'tet':{'name':u'Tetum',       'localname':u'Tetun'},
            'sd': {'name':u'Sindhi',      'localname':u'سنڌي، سندھی ، सिन्ध'},
            'pap':{'name':u'Papiamentu',  'localname':u'Papiamentu'},
            'ba': {'name':u'Bashkir',     'localname':u'Башҡорт'},
            'pa': {'name':u'Punjabi',     'localname':u'ਪੰਜਾਬੀ'},
            'rmy':{'name':u'Romani',      'localname':u'romani - रोमानी'},
            'lo': {'name':u'Lao',         'localname':u'ລາວ'},
            'na': {'name':u'Nauruan',     'localname':u'dorerin Naoero'},
            'bcl':{'name':u'Central Bicolano','localname':u'Bikol'},
            'kaa':{'name':u'Karakalpak',  'localname':u'Qaraqalpaq tili'},
            'gan':{'name':u'Gan',         'localname':u'贛語', 'similar_lang':'zh'},
            'iu': {'name':u'Inuktitut',   'localname':u'ᐃᓄᒃᑎᑐᑦ'},
            'myv':{'name':u'Erzya',       'localname':u'Эрзянь (Erzjanj Kelj)'},
            'szl':{'name':u'Silesian',    'localname':u'Ślůnski'},
            'sah':{'name':u'Sakha',       'localname':u'Саха тыла (Saxa Tyla)'},
            'my': {'name':u'Burmese',     'localname':u'Burmese'},
            'ext':{'name':u'Extremaduran','localname':u'Estremeñu'},
            'hif':{'name':u'Fiji Hindi',  'localname':u'Fiji Hindi'},
            'bo': {'name':u'Tibetan',     'localname':u'བོད་སྐད་'},
            'srn':{'name':u'Sranan',      'localname':u'Sranantongo'},
            'got':{'name':u'Gothic',      'localname':u'ðミフᄇðミフ﾿ðミヘトðミフᄚðミヘツðミフᄚðミフᄊðミフᄈðミフᄚ'},
            'dsb':{'name':u'Lower Sorbian','localname':u'Dolnoserbšćina'},
            'bm': {'name':u'Bambara',     'localname':u'Bamanankan'},
            'sm': {'name':u'Samoan',      'localname':u'Gagana Samoa'},
            'cdo':{'name':u'Min Dong',    'localname':u'Mìng-dĕ̤ng-ngṳ̄'},
            'chr':{'name':u'Cherokee',    'localname':u'ᏣᎳᎩ ᎧᏬᏂᎯᏍᏗ'},
            'mdf':{'name':u'Moksha',      'localname':u'Мокшень (Mokshanj Kälj)'},
            'om': {'name':u'Oromo',       'localname':u'Oromoo'},
            'ee': {'name':u'Ewe',         'localname':u'Eʋegbe'},
            'as': {'name':u'Assamese',    'localname':u'অসমীয়া ভাষা আৰু লিপি'},
            'ti': {'name':u'Tigrinya',    'localname':u'ትግርኛ_ፊደል'},
            'ug': {'name':u'Uyghur',      'localname':u'Oyghurque'},
            'kv': {'name':u'Komi',        'localname':u'Коми'},
            'zu': {'name':u'Zulu',        'localname':u'IsiZulu'},
            'av': {'name':u'Avar',        'localname':u'Авар'},
            'nv': {'name':u'Navajo',      'localname':u'Diné bizaad'},
            'ss': {'name':u'Swati',       'localname':u'SiSwati'},
            'pih':{'name':u'Norfolk',     'localname':u'Norfuk'},
            'ts': {'name':u'Tsonga',      'localname':u'Xitsonga'},
            'cr': {'name':u'Cree',        'localname':u'Nehiyaw'},
            've': {'name':u'Venda',       'localname':u'TshiVenda'},
            'ch': {'name':u'Chamorro',    'localname':u'Chamoru'},
            'bi': {'name':u'Bislama',     'localname':u'Bislama'},
            'xh': {'name':u'Xhosa',       'localname':u'IsiXhosa'},
            'rw': {'name':u'Kinyarwanda', 'localname':u'Kinyarwanda'},
            'dz': {'name':u'Dzongkha',    'localname':u'རྫོང་ཁ་'},
            'tn': {'name':u'Tswana',      'localname':u'Setswana'},
            'kl': {'name':u'Greenlandic', 'localname':u'Kalaallisut'},
            'bug':{'name':u'Buginese',    'localname':u'Basa Ugi'},
            'ik': {'name':u'Inupiak',     'localname':u'Iñupiak uqautchit'},
            'bxr':{'name':u'Buryat (Russia)','localname':u'Буряад'},
            'st': {'name':u'Sesotho',     'localname':u'Sesotho'},
            'xal':{'name':u'Kalmyk',      'localname':u'Хальмг_келн'},
            'ny': {'name':u'Chichewa',    'localname':u'Chicheŵa'},
            'ak': {'name':u'Akan',        'localname':u'Akana'},
            'ab': {'name':u'Abkhazian',   'localname':u'Аҧсуа бызшәа'},
            'fj': {'name':u'Fijian',      'localname':u'Na Vosa Vakaviti'},
            'lg': {'name':u'Luganda',     'localname':u'Luganda'},
            'tw': {'name':u'Twi',         'localname':u'Twi'},
            'ha': {'name':u'Hausa',       'localname':u'هَوُسَ'},
            'za': {'name':u'Zhuang',      'localname':u'Sawcuengh'},
            'ff': {'name':u'Fula',        'localname':u'Fulfulde'},
            'lbe':{'name':u'Lak',         'localname':u'Лакку маз'},
            'ki': {'name':u'Kikuyu',      'localname':u'Gĩgĩkũyũ'},
            'sn': {'name':u'Shona',       'localname':u'ChiShona'},
            'tum':{'name':u'Tumbuka',     'localname':u'ChiTumbuka'},
            'sg': {'name':u'Sango',       'localname':u'Sängö'},
            'ii': {'name':u'Sichuan Yi',  'localname':u'ꆇꉙ'},
            'chy':{'name':u'Cheyenne',    'localname':u'Tsetsêhestâhese'},
            'rn': {'name':u'Kirundi',     'localname':u'Kirundi'},
            'cho':{'name':u'Choctaw',     'localname':u'Chahta Anumpa'},
            'mh': {'name':u'Marshallese', 'localname':u'Kajin M̧ajeļ'},
            'aa': {'name':u'Afar',        'localname':u'Afar'},
            'ng': {'name':u'Ndonga',      'localname':u'Oshiwambo'},
            'kj': {'name':u'Kuanyama',    'localname':u'Kuanyama'},
            'ho': {'name':u'Hiri Motu',   'localname':u'Hiri Motu'},
            'mus':{'name':u'Muscogee',    'localname':u'Muskogee'},
            'kr': {'name':u'Kanuri',      'localname':u' Kanuri'},
            'hz': {'name':u'Herero',      'localname':u'Otsiherero'},
            'tokipona':{'name':u'Tokipona',    'localname':u'Tokipona'},       
            'arz':{'name':u'Egyptian Arabic','localname':u'مصرى (Maṣrī)', 'similar_lang':'ar'},
            'pnt':{'name':u'Pontic','localname':u'Ποντιακά', 'similar_lang':'el'},
            'mhr':{'name':u'Meadow Mari','localname':u'Олык Марий'},
            'ace':{'name':u'Acehnese','localname':u'Acèh'},
            'ckb':{'name':u'Soranî','localname':u'Soranî / کوردی'},
            'mwl':{'name':u'Mirandese','localname':u'Mirandés'},
            'pnb':{'name':u'Western Panjabi','localname':u'پنجابی'},
            'hak':{'name':u'Hakka',       'localname':u'Hak-kâ-fa / 客家話'}}
 
 
#languages to process
#lang_keys = ['en', 'hr', 'zh', 'ru','eo','ca','simple','es','ar','fi','sl','sv']
lang_keys = lang_info.keys()
lang_keys.sort()
 
#optimize by caching english pages
page_cache = {}
 
#debug
max_words = -1
 
prev_score = {}
 
#score colors
color10000 = 'BF5FFF'
color4000 = 'FF7F00'
color2000 = 'FFBE00'
color1000 = 'FFFF00'
color500  = 'BEFF00' 
color250  = '40FF00'
color100  = '00FF7D'
color0    = 'EFEFEF'
 
 
#format with spaces
def FormatNumber(s):
    r = []
    for i, c in enumerate(reversed(str(s))):
        if i and (not (i % 3)):
            r.insert(0, ',')
        r.insert(0, c)
    return ''.join(r)
 
 
def GetPreviousScores():
 
    temp_path = "PreviousScores.txt"  
    if os.path.isfile(temp_path):
       temp_file = open(temp_path)
       for line in temp_file:
            tokens = line.split()
            prev_score[tokens[0]] = float(tokens[1]);
       temp_file.close()
 
 
def GetArticle(en_wiki, en_word, wiki, lang):
    if en_word in page_cache:
        en_article = page_cache[en_word];
    else:
 
        temp_path = "~en_article_%s.txt" % (en_word)  
        if os.path.isfile(temp_path):
            temp_file = open(temp_path)
            en_article   = temp_file.read().decode('utf_8')
            temp_file.close()
        else:
            en_page    = wikipedia.Page(en_wiki, en_word)
            en_article = en_page.get(get_redirect=True)
 
            if en_article.upper().find(u'#REDIRECT') > -1:
               text_start = en_article.find('[[')
               text_end = en_article.find(']]', text_start)
               en_word = en_article[text_start+2:text_end]
               en_page = wikipedia.Page(en_wiki, en_word)
               en_article = en_page.get()
 
            temp_file = open(temp_path, 'w')
            temp_file.write(en_article.encode('utf_8'))
            temp_file.close()
 
        page_cache[en_word] = en_article
 
    article = en_article
    if lang != 'en':
        text_start = en_article.find(u'[[' + lang + u':')
        if text_start > 0:
            text_start = en_article.find(':',  text_start)
            text_end   = en_article.find(']]', text_start)
            word = en_article[text_start+1:text_end]
 
            page = wikipedia.Page(wiki, word)
            article = page.get(get_redirect=True)
 
            if article.upper().find(u'#REDIRECT') > -1:
                text_start = article.find('[[')
                text_end = article.find(']]', text_start)
                word = article[text_start+2:text_end]
                page = wikipedia.Page(wiki, word)
                article = page.get()
        else:
            article = ''
    return article
 
 
def GetArticleInterwikiName(en_word, lang):
 
 
    if en_word in page_cache:
        en_article = page_cache[en_word];
    else:
 
        temp_path = "~en_article_%s.txt" % (en_word)  
        if os.path.isfile(temp_path):
            temp_file = open(temp_path)
            en_article   = temp_file.read().decode('utf_8')
            temp_file.close()
        else:
            print 'where is '+temp_path
            en_article = ''
 
        page_cache[en_word] = en_article
 
    word = ''
    if lang != 'en':
        text_start = en_article.find(u'[[' + lang + u':')
        if text_start > 0:
            text_start = en_article.find(':',  text_start)
            text_end   = en_article.find(']]', text_start)
            word = en_article[text_start+1:text_end]
 
    return word
 
 
def GetInterwikiLength(article):
 
    #calculate len of all interwiki links
    interwiki_len   = 0
    interwiki_last  = 0
    interwiki_colon = 0
    interwiki_nl    = 0
    interwiki_first = article.find(u'[[', interwiki_last)
    while interwiki_first > -1:    
        interwiki_last  = article.find(u']]', interwiki_first)
        interwiki_colon = article.find(u':',  interwiki_first)
        if interwiki_colon > -1 and interwiki_colon < interwiki_last:
           curlang = article[interwiki_first+2:interwiki_colon]
           if lang_info.has_key(curlang):
               interwiki_nl = article.find(u'\n', interwiki_last)
               if interwiki_nl > -1:
                  interwiki_len = interwiki_len + (interwiki_nl - interwiki_first) + 1
               else:
                  interwiki_len = interwiki_len + (interwiki_last - interwiki_first) + 2
        interwiki_first = article.find(u'[[', interwiki_last)
 
    return interwiki_len
 
 
def GetCommentLength(article):
 
    #calculate len of all comments
    comment_len   = 0
    comment_last  = 0
    comment_first = article.find(u'<!--', comment_last)
    while comment_first > -1:    
        comment_last = article.find(u'-->', comment_first)
        if comment_last == -1:
           comment_last = comment_first + 4
 
        comment_len = comment_len + (comment_last - comment_first) - 4
        comment_first = article.find(u'<!--', comment_last)
 
    return comment_len
 
def GetArticleType(wt_article_size):
   if wt_article_size == 0:
      return 'absent'
   elif wt_article_size > 0 and wt_article_size < 10000:
      return 'stubs'
   elif wt_article_size >= 10000 and wt_article_size < 30000:
      return 'articles'
   elif wt_article_size >= 30000:
      return 'longarticles'
 
def GetScoreForLang(lang):
    absent       = lang_info[lang]['absent']
    stubs        = lang_info[lang]['stubs']
    articles     = lang_info[lang]['articles']
    longarticles = lang_info[lang]['longarticles']
    return GetScore(absent, stubs, articles, longarticles)
 
def GetScore(absent, stubs, articles, longarticles):
    max_count = absent + stubs + articles + longarticles
    max_score = max_count * 9
    raw_score = stubs + (articles*4) + (longarticles*9)
    if max_score > 0:
        score = 100.0 * raw_score / max_score
    else:
        score = 0
    return score
 
def GetLink(subtable,lang,value):
    return '[[/'+subtable+'#' + lang +' '+lang_info[lang]['localname']+ '|' + value + ']]'
 
def GetTableNumber(count, min_subtable_count, max_subtable_count, subtable, lang):
    value = FormatNumber(count)
    if count >= min_subtable_count and (count < max_subtable_count or max_subtable_count==-1):
       return GetLink(subtable,lang,value)
    else:
       return value
 
 
num_lang = 0
 
def CalculateStatistics():
    for lang in lang_keys:
        CalculateStatisticsForLang(lang)
 
 
def CalculateStatisticsForLang(lang):
 
 
    en_wiki = wikipedia.Site('en', 'wikipedia')
 
    global num_lang
    num_lang = num_lang + 1
 
    print ('=['+lang+' '+str(num_lang)+ '/' + str(len(lang_keys)) + ']').ljust(76,'=')
 
    try:
 
        temp_path = "~%s_output.txt" % (lang)  
        if os.path.isfile(temp_path):
 
            temp_file = open(temp_path)
            lang_info[lang]['total_size']   = float(temp_file.readline())
            lang_info[lang]['absent']       = int(temp_file.readline())
            lang_info[lang]['stubs']        = int(temp_file.readline())
            lang_info[lang]['articles']     = int(temp_file.readline())
            lang_info[lang]['longarticles'] = int(temp_file.readline())
 
            art_count = int(temp_file.readline())
            lang_info[lang]['art_count']    = art_count  
            for index in range(art_count):
                artKey = 'art_'+str(index)
                lang_info[lang][artKey] = {}
                lang_info[lang][artKey]['name']  = temp_file.readline().decode('utf_8').strip()
                lang_info[lang][artKey]['size']  = int(float(temp_file.readline()))
                lang_info[lang][artKey]['error'] = temp_file.readline().decode('utf_8').strip()
 
            temp_file.close()
 
            print '..using previous %s result...' % (lang)
 
        else:        
 
            lang_info[lang]['total_size']   = 0
            lang_info[lang]['absent']       = 0
            lang_info[lang]['stubs']        = 0
            lang_info[lang]['articles']     = 0
            lang_info[lang]['longarticles'] = 0
 
            lang_info[lang]['art_count']    = 0
 
            if lang=='en':
                wiki = en_wiki
            else:
                wiki = wikipedia.Site(lang, 'wikipedia')
 
            word_file = open("ArticleList.txt")
            word_count = 0
 
            lang_weight = 1.0
            if lang_info[lang].has_key('weight'):
               lang_weight = lang_info[lang]['weight']
            else:
                if lang_info[lang].has_key('similar_lang'):
                    lang_weight = lang_info[lang_info[lang]['similar_lang']]['weight']
 
            for line in word_file:
 
                word_count = word_count + 1
                if word_count > max_words and max_words > 0:
                    break;
 
                en_word = line[:-1].decode('utf_8')
                wt_article_size = 0
                error = ''
 
                try:
                    article         = GetArticle(en_wiki, en_word, wiki, lang)  
                    article_size    = len(article)
 
                    interwiki_len   = GetInterwikiLength(article)
                    comment_len     = GetCommentLength(article)
                    wt_article_size =(article_size - interwiki_len - comment_len) * lang_weight
 
                    article_type    = GetArticleType(wt_article_size)
 
                    lang_info[lang][article_type] = lang_info[lang][article_type] + 1
                    lang_info[lang]['total_size'] = lang_info[lang]['total_size'] + wt_article_size
 
                    print str(lang).ljust(3), str(word_count).rjust(3), en_word.ljust(30),
                    print str(article_size).rjust(11), str(lang_weight).rjust(5), str(interwiki_len).rjust(9), str(comment_len).rjust(9)
 
                except:
                    sys.stderr.write('\n')
                    traceback.print_exc()
                    sys.stderr.write('\n')
                    try:
                        err_type  = sys.exc_info()[0]
                        err_value = sys.exc_info()[1]
                        error = CookString(unicode(err_type) + ': ' + unicode(err_value))
                    except:
                        error = "Error."
 
                art_index = lang_info[lang]['art_count']
                artKey = 'art_'+str(art_index)
                lang_info[lang][artKey] = {}
                lang_info[lang][artKey]['name'] = en_word
                lang_info[lang][artKey]['size'] = wt_article_size
                lang_info[lang][artKey]['error'] = error
                lang_info[lang]['art_count'] = art_index + 1  
 
            word_file.close()
 
            temp_file = open(temp_path,'w')
            temp_file.write(str(lang_info[lang]['total_size'])+'\n')
            temp_file.write(str(lang_info[lang]['absent'])+'\n')
            temp_file.write(str(lang_info[lang]['stubs'])+'\n')
            temp_file.write(str(lang_info[lang]['articles'])+'\n')
            temp_file.write(str(lang_info[lang]['longarticles'])+'\n')
 
            temp_file.write(str(lang_info[lang]['art_count'])+'\n')
            for index in range(lang_info[lang]['art_count']):
                artKey = 'art_'+str(index)
                temp_file.write(lang_info[lang][artKey]['name'].encode('utf_8')+'\n')
                temp_file.write(str(lang_info[lang][artKey]['size'])+'\n')
                temp_file.write(lang_info[lang][artKey]['error'].encode('utf_8')+'\n')
            temp_file.close()
 
    except:
        sys.stderr.write('\n')
        traceback.print_exc()
        sys.stderr.write('\n')
 
 
 
def CompareScore(lang1, lang2):
    return cmp(GetScoreForLang(lang2),GetScoreForLang(lang1))
 
 
def GetGrowthNumber(lang, score):
    if prev_score.has_key(lang):
        return score - prev_score[lang]
 
def GetGrowth(lang, score):
    if prev_score.has_key(lang):
       growth    = "%+2.2f" % round(GetGrowthNumber(lang, score),2)
    else:
       growth    = "n/a"
    if growth == '-0.00':
       growth = '+0.00' 
    return growth
 
 
def GetAverageSize(lang, article_count):
    if article_count > 0:
       avg_size = int(round(lang_info[lang]['total_size'] / article_count))
    else:
       avg_size = 0
    return avg_size
 
 
 
def GetMedianSize(lang):
    x = []
    art_count = lang_info[lang]['art_count']  
    for index in range(art_count):
        artKey = 'art_'+str(index)
        size = lang_info[lang][artKey]['size']
        if size > 0:
            x.append(size)
    x.sort()
    mid = len(x)/2
 
    if len(x) > 0:
        if len(x) % 2:
            return x[mid]    
        else:
            return (x[mid-1] + x[mid]) / 2
    return 0
 
def PrintResults():
 
    lang_keys.sort(CompareScore)
 
    print '\n'
    print 'RESULTS\n----------------------------------------------------------------------'
    print u'Lang:',' AvgSize','Median','Absent',' <10k ','10-30k',' >30k ', 'Score', 'Growth'
    for lang in lang_keys:
 
        absent        = lang_info[lang]['absent']
        stubs         = lang_info[lang]['stubs']
        articles      = lang_info[lang]['articles']
        longarticles  = lang_info[lang]['longarticles']
 
        article_count = stubs + articles + longarticles
        score         = GetScore(absent, stubs, articles, longarticles)
        growth        = GetGrowth(lang, score)
        avg_size      = GetAverageSize(lang, article_count)
        med_size      = GetMedianSize(lang)
 
        print lang.ljust(6),
        print str(avg_size).rjust(7),
        print str(med_size).rjust(7),
        print str(absent).rjust(5),
        print str(stubs).rjust(6),
        print str(articles).rjust(6),
        print str(longarticles).rjust(6),
        print ("%6.2f" % score).rjust(6),
        print growth.rjust(6)
 
def GetWikiTableResults(awards):
 
    lang_keys.sort(CompareScore)
 
    table = '{|class="wikitable sortable" border="1" cellpadding="2" cellspacing="0" style="width:100%; background: #f9f9f9; border: 1px solid #aaaaaa; border-collapse: collapse; white-space: nowrap; text-align: center"'
    table = table + '\n|-\n'
    table = table + u'!width = 45 | № !! width = 55 | Wiki !! width = 220 | Language !! width = 55 | [[Talk:List of Wikipedias by sample of articles/Archives/2008#Proposed weighting of characters for formula_.28Option.232_using_Babel_text.29|Weight]] !! width = 120 | Mean Article<br>Size !! width = 120 | [[Talk:List_of_Wikipedias_by_sample_of_articles#average_or_median.3F|Median Article<br>Size]] !! width = 80 | [[/Absent Articles|Absent]]<br>(0k) !! width=80| Stubs<br>(< 10k)!! width = 80 | Articles<br>(10-30k) !! width = 80 | Long Art.<br>(> 30k) !! width = 80 | [[Talk:List of Wikipedias by sample of articles/Archives/2008#Other possibility of maximum score|Score]]'
    table = table + '!! width = 50 | [[Talk:List of Wikipedias by sample of articles/Archives/2008#Script_extension|Growth]]'
    table = table + '\n|-\n'
    i=0
    for lang in lang_keys:
        i = i + 1
 
        absent        = lang_info[lang]['absent']
        stubs         = lang_info[lang]['stubs']
        articles      = lang_info[lang]['articles']
        longarticles  = lang_info[lang]['longarticles']
 
        article_count = stubs + articles + longarticles
 
        dagger = u'†'
        if absent + article_count == 0:
            lang_footnote = dagger
            absent = lang_info['en']['art_count']
        else:
            lang_footnote = ''
 
        table = table + '|' + str(i) + '\n'
        table = table + '| [[:' + lang + ':|' + lang + ']]' + lang_footnote + '\n'
        table = table + '| style = "text-align: left" | [[:w:' + lang_info[lang]['name'] + ' language|' + lang_info[lang]['localname'] + ']]\n'
 
        if lang_info[lang].has_key('weight'):
           weight = str(lang_info[lang]['weight'])
        else:
            if lang_info[lang].has_key('similar_lang'):
               weight = str(lang_info[lang_info[lang]['similar_lang']]['weight']) + '**' 
            else:
               weight = '1.0*' 
 
        score         = GetScore(absent, stubs, articles, longarticles)
        growth        = GetGrowth(lang, score)
        avg_size      = GetAverageSize(lang, article_count)
        med_size      = GetMedianSize(lang)
 
 
        if HasAwards(awards, lang):
            growth = GetLink('Growth',lang, growth)
 
        table = table + '| ' + weight + '\n'
        table = table + '| ' + GetTableNumber(avg_size,     1,  -1,'Neglected',       lang) + '\n'
        table = table + '| ' + FormatNumber(med_size) + '\n'
        table = table + '| ' + GetTableNumber(absent,       1, 250,'Absent Articles', lang) + '\n'
        table = table + '| ' + GetTableNumber(stubs,        1, 100,'Stubs',           lang) + '\n'
        table = table + '| ' + GetTableNumber(articles,     1, 100,'Articles',        lang) + '\n'
        table = table + '| ' + GetTableNumber(longarticles, 1, 100,'Long Articles',   lang) + '\n'
 
        #color code score
        if score >= 100.00:    
            color = "|style = \"background: "+u'\u0023'+color10000+"\""
        elif score >= 40.00:    
            color = "|style = \"background: "+u'\u0023'+color4000+"\""
        elif score >= 20.00:
            color = "|style = \"background: "+u'\u0023'+color2000+"\""
        elif score >= 10.00:
            color = "|style = \"background: "+u'\u0023'+color1000+"\""
        elif score >= 5.00:
            color = "|style = \"background: "+u'\u0023'+color500+"\""
        elif score >= 2.50:
            color = "|style = \"background: "+u'\u0023'+color250+"\""
        elif score >= 1.00:
            color = "|style = \"background: "+u'\u0023'+color100+"\""
        else:
            color = "|style = \"background: "+u'\u0023'+color0+"\"";
 
        table = table + color + '| ' + ("%.2f" % score) + '\n'
        table = table + '| ' + growth + '\n'
        table = table + '|-\n'
 
    table = table[:-2] + '}'
    return table
 
def GetWikiTableArticles(article_type, min_articles, max_articles):
    lang_keys.sort()
    table = u''
    i=0
    for lang in lang_keys:
        i = i + 1
        count=0  
        section = u'==='+lang+' [[:w:' + lang_info[lang]['name'] + ' language|' + lang_info[lang]['localname'] + ']]===\n'
        for index in range(lang_info[lang]['art_count']):
            artKey  = 'art_'+str(index)
            artSize = lang_info[lang][artKey]['size']
            artType = GetArticleType(artSize)
            if artType == article_type:
               section = section + '#[[en:'+lang_info[lang][artKey]['name']+']] '+lang_info[lang][artKey]['error'] + '\n'
               count = count + 1
        if count >= min_articles and count < max_articles:
            table = table + section
 
    return table
 
 
def GetArticleName(lang, artKey):
 
    if lang_info[lang].has_key(artKey):
       return lang_info[lang][artKey]['name']
    return 0
 
 
def GetArticleSize(lang, artKey):
 
    if lang_info[lang].has_key(artKey):
       return lang_info[lang][artKey]['size']
    return 0
 
def GetEdgeFactor(lang, artKey):
 
    size = GetArticleSize(lang, artKey)
 
    if size==0:
        return 1
    if size > 7000 and size < 1000:
        return (size - 7000) / 1000
    if size > 24000 and size < 30000:
        return (size - 24000) / 1000
    return 0
 
def GetRuntFactor(lang, artKey):
 
    size = GetArticleSize(lang, artKey)
 
    if size > 0:
        for index in range(lang_info['en']['art_count']):
            otherArtKey =  'art_'+str(index)
            if otherArtKey <> artKey:
               otherSize = GetArticleSize(lang, otherArtKey)
               if otherSize > 0 and otherSize < size:
                   return 0 #you are not the runt
        return 4
    return 0
 
 
def GetArticlePoints(lang, artKey):
 
    size = GetArticleSize(lang, artKey)
    if size > 0 and size < 10000:
       return 1
    elif size > 10000 and size < 30000:
       return 4
    elif size > 30000:
       return 9
    return 0
 
 
def GetAverageArticlePoints(artKey):
    total = 0
    for lang in lang_keys:
        total = total + GetArticlePoints(lang, artKey)
    return float(total) / len(lang_keys)
 
 
def GetAverageArticleSize(artKey):
    total = 0
    for lang in lang_keys:
        total = total + GetArticleSize(lang, artKey)
    return int(float(total) / len(lang_keys))
 
 
def GetNeglectForArticle(lang, artInfo):
    artKey = artInfo['artKey']
    avgPnts = GetAverageArticlePoints(artKey) #0 to 9
    pnts = GetArticlePoints(lang, artKey)     #0 to 9
    edgeFactor = GetEdgeFactor(lang, artKey)  #0 to 6
    runtFactor = GetRuntFactor(lang, artKey)  #0 to 4
    return avgPnts - pnts + edgeFactor + runtFactor
 
def CompareNeglect(artInfo1, artInfo2):
    if artInfo2['neglect'] == artInfo1['neglect']:
        return cmp(artInfo2['popularity'], artInfo1['popularity'])
    else:
        return cmp(artInfo2['neglect'], artInfo1['neglect'])
 
def ComparePopularity(artInfo1, artInfo2):
    return cmp(artInfo2['popularity'], artInfo1['popularity'])
 
 
def GetArticlesSortedByNeglect(lang):
    artInfos = []
 
    if lang_info['en'].has_key('art_count'):
      for index in range(lang_info['en']['art_count']):
        artKey =  'art_'+str(index)
        artInfos.append( {} )
        artInfos[index]['artKey']  = artKey
        artInfos[index]['popularity']  = GetAverageArticleSize(artKey)
        artInfos[index]['neglect'] = GetNeglectForArticle(lang, artInfos[index])
    artInfos.sort(CompareNeglect)
    return artInfos
 
def GetNeglectedArticles(lang, max_articles):
 
    artInfos = GetArticlesSortedByNeglect(lang);
 
    i=0
    table = u''
    for artInfo in artInfos:
 
       if lang_info[lang].has_key(artInfo['artKey']): 
 
           en_word = lang_info[lang][artInfo['artKey']]['name'] 
           table = table + '#[[en:'+en_word+']]'
 
           size = int(GetArticleSize(lang, artInfo['artKey']))
           if size > 0:
               iw_name = GetArticleInterwikiName(en_word, lang)
               if iw_name == '':
                   table = table + ' ('+str(size) + ')'
               else:
                   iw_link = lang+':'+iw_name
                   table = table + ' ([['+iw_link+'|'+str(size)+']])'
 
           table = table + '\n'
 
       i=i+1
       if i >= max_articles: break;
 
    return table
 
 
def GetPopularArticles(max_articles):
 
    artInfos = GetArticlesSortedByNeglect('en');
    artInfos.sort(ComparePopularity)
 
    i=0
    table = u''
    for artInfo in artInfos:
       table = table + '# '+str(artInfo['popularity'])+' [[en:'+lang_info['en'][artInfo['artKey']]['name']+']]\n'
       i=i+1
       if i >= max_articles and max_articles > 0: break;
 
    return table
 
 
def GetWikiNeglectedArticles():
    lang_keys.sort()
    table = u''
 
    print 'writing Popular Articles...'
    table = table + u'==Popular Articles==\n'
    table = table + GetPopularArticles(-1)
 
    print 'writing Neglected Articles...'
    table = table + u'==Neglected Articles==\n'
    for lang in lang_keys:
        print ' '+lang
 
        if lang_info[lang]['art_count'] > 0:
            table = table + u'==='+lang+' [[:w:' + lang_info[lang]['name'] + ' language|' + lang_info[lang]['localname'] + ']]===\n'
            table = table + GetNeglectedArticles(lang, 10)
 
    return table
 
def SaveWikiTableResults(awards):
 
    print 'writing Results...'
    f = open('results.txt', 'w')
    f.write(GetWikiTableResults(awards).encode("utf_8"))
    f.close();
 
    print 'writing Absent...'
    f = open('_absent.txt', 'w')
    f.write(GetWikiTableArticles('absent',1, 250).encode("utf_8"))
    f.close();
 
    print 'writing Stubs...'
    f = open('_stub.txt', 'w')
    f.write(GetWikiTableArticles('stubs',1, 100).encode("utf_8"))
    f.close();
 
    print 'writing Articles...'
    f = open('_articles.txt', 'w')
    f.write(GetWikiTableArticles('articles',1, 100).encode("utf_8"))
    f.close();
 
    print 'writing Long Articles...'
    f = open('_longarticles.txt', 'w')
    f.write(GetWikiTableArticles('longarticles',1,100).encode("utf_8"))
    f.close();
 
    print 'writing Suggestions...'
    f = open('_neglectedarticles.txt', 'w')
    f.write(GetWikiNeglectedArticles().encode("utf_8"))
    f.close();
 
    print 'writing Awards...'
    f = open('_growth.txt', 'w')
    f.write(GetWikiAwards(awards).encode("utf_8"))
    f.close();
 
 
def CookString(rawString):
 
    print rawString
    cookString = ''
    for part in rawString.replace("'","||").split("|"):
        if len(part)==0:
            cookString = cookString + "'"
        else:
            cookString = cookString + eval("u'"+part+"'")
    return cookString        
 
def GetGrowths(article):
    growths = {}
    lang_last  = 0
    lang_first = article.find(u'[[:', lang_last)
    while lang_first > -1:
        lang_last  = article.find(u'|',  lang_first)
        if lang_last == -1:
            break;
        lang = article[lang_first+3:lang_last-1]
        score_first = article.find(u'style = "background:',lang_last);
        if score_first == -1:
            break;
        score_last  = article.find(u'|', score_first+32);
        if score_last == -1:
            break;
        growth_end = article.find(u'\n', score_last)
        growth_str = article[score_last+2:growth_end]
        try:
           growth_pipe = growth_str.find(u'|') 
           if growth_pipe > -1:
              growth_str = growth_str[growth_pipe+1:-2]
           growth = float(growth_str)
        except:
           growth = 0 
        growths[lang]=growth
        lang_first = article.find(u'[[:', score_last)
    return growths
 
def GetLastUpdated(article):
    date_first = article.find(u'Last Update:')
    if date_first > -1:
       date_last = article.find(u'<br/>')
       if date_last > -1:
           hyphen = article.find(u'-', date_first,date_last)
           if hyphen > -1:
               date_first = hyphen+1
           else:
               date_first = date_first + 12
 
           parts = article[date_first:date_last].strip().split(' ');
           if len(parts[0])==1:
              parts[0] = '0'+parts[0]
           parts[1] = parts[1][0:3]
           return ' '.join(parts)
 
growthsG = {}
def CompareGrowth(lang1, lang2):
    global growthsG
    return cmp(growthsG[lang2],growthsG[lang1])
 
 
def CalculatePlacing(growths,oldid,update):
    global growthsG;
    growthsG = growths
    lang_keys = growths.keys()
    lang_keys.sort(CompareGrowth)
    placeNo=0
 
    print update
 
    placing = []
    for lang in lang_keys:
        if placeNo < 3 or growths[lang] > 1:
           placeNo = placeNo + 1
           if placeNo==1:
              placestr = '1st Place'
              ribbonimg = 'Article blue.svg'
           if placeNo==2:
              placestr = '2nd Place'
              ribbonimg = 'Article red.svg'
           if placeNo==3:
              placestr = '3rd Place'
              ribbonimg = 'Article yellow.svg'
           if placeNo>3:
              placestr = 'Honorable Mention'
              ribbonimg = 'Article green.svg'
           print " %d  %-3s %+2.2f" % (placeNo, lang, growths[lang])
           place = {'lang':lang,'growth':growths[lang],'oldid':oldid,'update':update,'placestr':placestr,'ribbonimg':ribbonimg}
           placing.append(place) 
    return placing
 
 
def GetPreviousAwards():
 
    article_name = 'List of Wikipedias by sample of articles'
 
    meta_wiki = wikipedia.Site('meta', 'meta')
    meta_page = wikipedia.Page(meta_wiki, article_name)
 
    awards = {}
    prevUpdate = ''
    prevGrowth = 0
    for (oldid,datetime,username,comments) in meta_page.getVersionHistory():
        if datetime.find('2009') > -1 and (comments.lower().find("updat") > -1 or comments.lower().find('correct') > -1):
            article   = meta_page.getOldVersion(get_redirect=False,oldid=oldid)
            growths = GetGrowths(article)
            if 'en' in growths:
                update = GetLastUpdated(article)
                growth = growths['en']
                if update <> prevUpdate and prevGrowth <> growth:
                    prevUpdate = update
                    prevGrowth = growth
                    awards[update] = CalculatePlacing(growths,oldid,update)
    return awards                
 
def HasAwards(awards, lang):
 
    for update, placings in awards.items():
        for place in placings:
            if lang == place['lang']:
                return True
    return False
 
def CompareRows(rowA,rowB):
    if rowA['place']['placestr']==rowB['place']['placestr']:
        return cmp(rowB['place']['growth'],rowA['place']['growth'])
    return cmp(rowA['place']['placestr'],rowB['place']['placestr'])
 
def GetWikiAwards(awards):
    global lang_info
    global lang_keys
 
    table = u'==2009 Improvement Awards==\n'
    for lang in lang_keys:
        section = u'==='+lang+' [[:w:' + lang_info[lang]['name'] + ' language|' + lang_info[lang]['localname'] + ']]===\n'
        rows = []
        for update, placings in awards.items():
           for place in placings:
               if lang == place['lang']:
                  mid_section = '|-\n'
                  mid_section = mid_section + '|width = 150 | [[Image:%s|20px]] %s\n' % (place['ribbonimg'],place['placestr'])
                  if place['oldid'] == -1:  
                      mid_section = mid_section + '|width = 120 align=center| [[:m:List of Wikipedias by sample of articles|%s]]\n' % (place['update'])
                  else:  
                      mid_section = mid_section + '|width = 120 align=center| <span class="plainlinks">[http://meta.wikimedia.org/w/index.php?title=List_of_Wikipedias_by_sample_of_articles&oldid=%s %s]</span>\n' % (place['oldid'],place['update'])
                  mid_section = mid_section + '|width = 80 align=center| %+2.2f\n' % round(place['growth'],2)
                  rows.append({'place':place,'mid_section':mid_section})
        if len(rows) > 0:
            rows.sort(CompareRows)
            if len(rows) > 1:
                section = section + '{|class="wikitable sortable" cellpadding="6" cellspacing="0"\n'
                section = section + '! !! !!\n'
            else:
                section = section + '{|class="wikitable" cellpadding="6" cellspacing="0"\n'
            for row in rows:
                section = section + row['mid_section']
            section = section + '|}\n'
            table = table + section
    return table;
 
def CalculateAwards():
 
    print "calculating awards..."
 
    todays = {}
    for lang in lang_keys:
        absent        = lang_info[lang]['absent']
        stubs         = lang_info[lang]['stubs']
        articles      = lang_info[lang]['articles']
        longarticles  = lang_info[lang]['longarticles']
        score         = GetScore(absent, stubs, articles, longarticles)
        growth        = GetGrowthNumber(lang, score)
        todays[lang] = growth
 
    update = strftime("%d %b %Y")
    placing = CalculatePlacing(todays,-1,update)
 
    awards = GetPreviousAwards()
    awards[update] = placing
    return awards
 
#support dividing up work
if len(sys.argv) == 3:
    part      = int(sys.argv[1])-1
    numparts  = int(sys.argv[2])
    lang_keys = filter(lambda lang: lang_keys.index(lang) % numparts == part, lang_keys)
 
 
GetPreviousScores()
CalculateStatistics()
awards = CalculateAwards()
PrintResults()
SaveWikiTableResults(awards)

[edit] GetArticleList.py

# -*- coding: utf_8 -*-
import sys
sys.path.append('./pywikipedia')
 
import wikipedia
import pagegenerators
import catlib
import traceback
import os
 
article_name = 'List of articles every Wikipedia should have'
 
meta_wiki = wikipedia.Site('meta', 'meta')
meta_page = wikipedia.Page(meta_wiki, article_name)
article   = meta_page.get(get_redirect=False)
 
f = open('ArticleList.txt', 'w')
count = 0
grand_total = 0
 
name_last  = 0
name_first = article.find(u'[[en:', name_last)
while name_first > -1:
    name_mid  = article.find(u'|',  name_first)
 
 
    cat_start =article.rfind(u'\n== ', name_last, name_first)
    if cat_start > -1:
        cat_end   = article.find(u'==',cat_start+3, name_first)
        if cat_end > -1: 
            cat   = article[cat_start+3:cat_end]
            print ''
            print cat
            print str('').center(len(cat),'-')
            count = 0
 
    name_last = article.find(u']]', name_first)
    if name_last > name_mid:
      name_last = name_mid
    article_item = article[name_first+5:name_last]
    f.write(article_item.encode("utf_8"))
    f.write('\n')
    count = count + 1
    grand_total = grand_total + 1
    print count, article_item
    name_first = article.find(u'[[en:', name_last)
 
f.close();
 
print ''
print 'GRAND TOTAL'
print '-----------'
print  grand_total, 'articles'

[edit] GetPreviousScores.py

# -*- coding: utf_8 -*-
import sys
sys.path.append('./pywikipedia/')
 
import wikipedia
import pagegenerators
import catlib
import traceback
import os
 
article_name = 'List of Wikipedias by sample of articles'
 
meta_wiki = wikipedia.Site('meta', 'meta')
meta_page = wikipedia.Page(meta_wiki, article_name)
article   = meta_page.get(get_redirect=False)
 
f = open('PreviousScores.txt', 'w')
count = 0
lang_last  = 0
lang_first = article.find(u'[[:', lang_last)
while lang_first > -1:
    lang_last  = article.find(u'|',  lang_first)
 
    lang = article[lang_first+3:lang_last-1]
 
    score_first = article.find(u'style = "background:',lang_last);
    score_last  = article.find(u'|', score_first+32);
 
    score = article[score_first+31:score_last-1]
 
    f.write(lang + ' ' + score + '\n')
 
    count = count + 1
    print count, lang, score
    lang_first = article.find(u'[[:', score_last)
 
f.close();