User:Faebot/code/advanced permissions

#!/usr/bin/python
# -*- coding: utf-8 -*-
'''
# batchGoogleSpreadsheet.py
#
# Grab a Google Spreadsheet from the API in JSON format and play with it to create a wikitable.
# This can be improved in many ways, but it works for the moment.
#
# Date: May 2014
# Author: Fae - https://meta.wikimedia.org/wiki/User:Fae
# Permissions: CC-BY-SA
'''

import wikipedia, sys, config, urllib2, re, string, time, os, pprint
import json

site = wikipedia.getSite('meta','meta')

def urltry(u, headers = { 'User-Agent' : 'Mozilla/5.0' } ):
  countErr=0
  x=''
  while x=='':
      try:
          req = urllib2.Request(u,None,headers)
          x = urllib2.urlopen(req)
          time.sleep(1)
      except:
          x=''
          countErr+=1
          if countErr>300: countErr=300  #  5 minutes between read attempts, eventually
          print '** ERROR',countErr,'\n ** Failed to read from '+u+'\n ** Pause for '+str(countErr*1)+' seconds and try again ['+time.strftime("%H:%M:%S")+']'
          time.sleep(1*countErr)
  return x

def htmltry(x,u):
    countErr=0
    r=True
    while r:
        try:
            return x.read()
        except:
            x=urltry(u)
            countErr+=1
            if countErr>200:
                p=300
            else:
                p=countErr*2
            print '** ERROR',countErr,'\n ** Failed to read xml'
            if countErr==1:
                print 'xml ='+str(x)
                print 'url ='+u
            print ' ** Pause for '+str(p)+' seconds and try again'
            time.sleep(p)
        else:
            r=False
    return

def gurl(key, gid):
    return "https://spreadsheets.google.com/feeds/list/"+ key +"/" + gid + "/public/values?alt=json-in-script&callback=x"

key = "0AvhjkTJIpW2zdDl1bVBuOU1jQUJwOHd5YmhmSzFaZHc"
gid = "3"  # Sheet number; this is trial and error, no automatic lookup!

url = gurl(key, gid)

html = htmltry(urltry(url),url)
html = ')'.join(html.split('API callback\nx(')[1].split(')')[0:-1])
data = json.loads(html)['feed']['entry']

mapping = [ # Warning, if the spreadsheet titles change, these will too!
        [u'gsx$staff', 'Username'],
        [u'gsx$usecasereceived', 'Received'],
        [u'gsx$usecasestatus', 'Status'],
        [u'gsx$usecase', 'Usecase'],
        [u'gsx$rightsapproved', 'Rights'],
        [u'gsx$rightsapprovaldate','Approval'],
        ]

lastUpdated = data[0]['updated']['$t'].split('T')[0]
runDate = time.strftime("%Y-%m-%d")

#pprint.pprint(data)

report = u"This table is a mirror of the Google spreadsheet [https://docs.google.com/spreadsheet/pub?key="+key+" here]. The table was last <abbr title='The data in this wikitable being identical to the Google spreadsheet data at that time, apart from formatting.'>synchronized</abbr> on "+ runDate +" and the data here last <abbr title='This being the most recent edit date of the Google spreadsheet when last checked by Faebot.'>amended</abbr> by the WMF on "+lastUpdated+ ". For any maintainance issues, please leave a note for [[User talk:Fae|the bot operator]].\n\n{| class='wikitable sortable'\n!"
report += "!!".join([t[1] for t in mapping])

for r in range(len(data)):
    if re.search("Example", data[r]['gsx$staff']['$t']): continue
    report+="\n|-"
    for t in mapping:
        d = data[r][t[0]]['$t'].encode('utf-8')
        if t[1]=='Username':
            d = '[[Special:CentralAuth/'+d+'|'+d+']]'
        elif re.search(r'Received|Approval', t[1]) and re.search("\d{1,2}\D\d{1,2}\D\d{4}", d):
            d = re.sub(r"(\d{1,2})\D(\d{1,2})\D(\d{4})", r"\3-\1-\2", d)
        ctext = ''
        if re.search('Received|Approval|Status', t[1]):
            ctext = "align='center'|"
        report += "\n|" + ctext + d
report += "\n|}"

log = wikipedia.Page(site, "WMF Advanced Permissions")
wikipedia.setAction("Update")
log.put(report)