User:PiRSquared17/iwm.py.js

From Meta, a Wikimedia project coordination wiki

Note: After publishing, you may have to bypass your browser's cache to see the changes.

  • Firefox / Safari: Hold Shift while clicking Reload, or press either Ctrl-F5 or Ctrl-R (⌘-R on a Mac)
  • Google Chrome: Press Ctrl-Shift-R (⌘-Shift-R on a Mac)
  • Internet Explorer / Edge: Hold Ctrl while clicking Refresh, or press Ctrl-F5
  • Opera: Press Ctrl-F5.
/* Warning: this takes a very long time to finish!

import json, requests, re, sys, collections, time

api = 'https://meta.wikimedia.org/w/api.php?action=query&prop=revisions&titles=Interwiki_map&rvlimit=50&rvprop=timestamp|content|ids&rvslots=*&rvdir=newer&formatversion=2&format=json'
offset = ''

BAD_IDS = [42872, 45038, 94698, 97639, 112493, 112541, 349899, 349903, 349907, 351587]

iw_map = collections.defaultdict(list)
prev = {}

while True:
    results = json.loads(requests.get(api + offset).text)
    revs = results['query']['pages'][0]['revisions']
    for rev in revs:
        cur = dict(re.findall(r'\n\|\s*(.*?)\s*\|\|\s*(.*?)\r?\n', rev['slots']['main']['content']))
        cur = {k.lower(): v for k, v in cur.items()}
        if rev['revid'] in BAD_IDS or not cur: continue
        for key in set(cur) - set(prev):
            iw_map[key].append(('+', rev['revid'], rev['timestamp']))
        for key in set(prev) - set(cur):
            iw_map[key].append((u'\u2212', rev['revid'], rev['timestamp']))
        for key in set(prev) & set(cur):
            if prev[key] != cur[key]:
                iw_map[key].append(('#', rev['revid'], rev['timestamp']))
        prev = cur
    if 'continue' in results:
        offset = '&rvcontinue=%s' % results['continue']['rvcontinue']
        #time.sleep(1)
    else:
        break

print('''== Current interwikis ==
{| class="wikitable sortable"
|-
! Prefix
! First added
! Last added
! Last modified
! Modifications
! Total links
! Unique pages
! Unique targets''')

for prefix in sorted(cur):
    tool_url = 'https://tools.wmflabs.org/pirsquared/iw.php?iw=%s' % prefix
    text = requests.get(tool_url + '&count=on').text
    total, unique, targets = re.findall('<td>TOTAL</td><td>(\d+)</td><td>(\d+)</td><td>(\d+)</td>', text)[0]
    total, unique, targets = int(total), int(unique), int(targets)

    added = iw_map[prefix][0]
    added = '[[Special:Diff/%d|%s]]' % (added[1], added[2].split('T')[0])

    for i in iw_map[prefix]:
        if i[0] == '+': last_added = i
    last_added = '[[Special:Diff/%d|%s]]' % (last_added[1], last_added[2].split('T')[0])

    modified = iw_map[prefix][-1]
    modified = '[[Special:Diff/%d|%s]]' % (modified[1], modified[2].split('T')[0])

    history = ['[[Special:Diff/%d|%s]]' % (x[1], x[0]) for x in iw_map[prefix]]
    history = ', '.join(history)

    fmt = (tool_url, prefix, added, last_added, modified, history, total, unique, targets)
    print('|-\n| [%s %s]\n| %s\n| %s\n| %s\n| %s\n| %d\n| %d\n| %d' % fmt)
    sys.stdout.flush()

print('|}')

print('''== Former interwikis ==
{| class="wikitable sortable"
|-
! Prefix
! Added
! Removed
! Modifications''')

for prefix in sorted(set(iw_map) - set(cur)):
    added = iw_map[prefix][0]
    added = '[[Special:Diff/%d|%s]]' % (added[1], added[2].split('T')[0])

    removed = iw_map[prefix][-1]
    removed = '[[Special:Diff/%d|%s]]' % (removed[1], removed[2].split('T')[0])

    history = ['[[Special:Diff/%d|%s]]' % (x[1], x[0]) for x in iw_map[prefix]]
    history = ', '.join(history)

    print('|-\n| %s\n| %s\n| %s\n| %s' % (prefix, added, removed, history))
    sys.stdout.flush()

print('|}')

*/