User:TolBot/Task 1

From Meta, a Wikimedia project coordination wiki
TolBot: Task 1
Status  Approved (inactive)
Wiki w:en:
Summary Update COVID-19 vaccination data templates
Page(s) Pages:
Period daily
Language Python
Supervision supervised
Excl. compl.? No

Source[edit]

Version 1.1.1, updated 14 May 2021.
run = False
page = 'Template:COVID-19 vaccination data'
edit_summary = ''
bot_username = ''
bot_password = ''

### Variables

skipped_locations = ('Northern Ireland', 'Scotland', 'Wales', 'England', 'Europe', 'Africa', 'Oceania', 'North America', 'South America', 'Asia', 'High income', 'Upper middle income', 'Lower middle income', 'Low income')

#regex_vac_prev = '<!-- DATA BELOW THIS LINE UPDATED AUTOMATICALLY -->\n'
#regex_vac_post = '\n<!-- DATA ABOVE THIS LINE UPDATED AUTOMATICALLY -->'
regex_vac_prev = '<!-- PASTE UPDATED DATA BELOW THIS LINE -->\n'
regex_vac_post = '\n<!-- UPDATED DATA ABOVE THIS LINE -->'
regex_vac_str = regex_vac_prev + r'(.*?)' + regex_vac_post
regex_date_prev_re = r'\(as of <!--DATE OF LATEST AVAILABLE DATA \(USUALLY ONE DAY PRIOR TO THE REFRESH DATE\), DO NOT CHANGE THIS DATE UNLESS TABLE DATA HAS BEEN UPDATED-->' # Backslash parentheses
regex_date_post_re = r'\{\{efn\|' # Backslash curly brackets and pipe
regex_date_str = regex_date_prev_re + r'(.*?)' + regex_date_post_re
regex_date_prev = '(as of <!--DATE OF LATEST AVAILABLE DATA (USUALLY ONE DAY PRIOR TO THE REFRESH DATE), DO NOT CHANGE THIS DATE UNLESS TABLE DATA HAS BEEN UPDATED-->'
regex_date_post = '{{efn|'

api = 'https://en.wikipedia.org/w/api.php'
api_par_token = '?action=query&meta=tokens&type=login&format=json'
api_par_login = '?action=login&format=json'
api_par_parse = '?action=parse&prop=wikitext&format=json'
api_par_edit_token = '?action=query&meta=tokens&format=json'
api_par_edit = '?action=edit&format=json'

### Functions

# Import
import pandas as pd
from datetime import datetime as dt
import requests
import re
import json
import numpy as np
import subprocess

# Login
def login():
  session = requests.Session()
  token_obj = session.get(api + api_par_token)
  token_json = token_obj.json()
  token = token_json['query']['tokens']['logintoken']
  params = {
      'lgname': bot_username,
      'lgpassword': bot_password,
      'lgtoken': token
      }
  login_obj = session.post(api + api_par_login, data=params)
  login_json = login_obj.json()
  if login_json['login']['result'] != 'Success':
    raise ConnectionError('Login failed.')
  userid = login_json['login']['lguserid']
  username = login_json['login']['lgusername']
  return session

# Download & load CSV file
def load():
  try:
    subprocess.run(['wget', '-q', '-O', 'vaccinations.csv', 'https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/vaccinations.csv'])
  except:
    raise ConnectionError('Could not get vaccination data')
  return pd.read_csv('vaccinations.csv')

# Sort and remove duplicates
def clean(vac):
  vac.sort_values('date', ascending=False, inplace=True) # Sort by date
  vac.drop_duplicates('location', inplace=True) # Drop duplicate locations (so only the most recent remains)
  vac['sort'] = vac['total_vaccinations']
  vac['sort'] = np.where(vac['sort'].isnull(), vac['people_vaccinated'], vac['sort']) # Sort by people vaccinated if no total vaccinations
  vac['sort'] = np.where(vac['sort'].isnull(), vac['people_fully_vaccinated'], vac['sort']) # Sort by people fully vaccinated if no total vaccinations or people vaccinated
  vac.sort_values('sort', ascending=False, inplace=True) # Sort
  if len(vac.index) < 100 or len(vac.index) > 1000: # Sanity check
    raise ValueError('Vaccination data abnormally sized')
  return vac

# Generate wikitext
def generate(vac):
  text = ''
  for index, row in vac.iterrows():
    if row['location'] == 'World':
      row_text = '|{{pad|0.1em}}[[File:Emojione 1F310.svg|23x15px|alt=|link=]]{{pad|0.4em}}World{{efn|name=world-total}}'
    elif row['location'] in skipped_locations:
      continue
    else:
      row_text = '|{{flag+link|COVID-19 pandemic in|' + row['location'] + '}}'
    
    if not pd.isna(row['people_vaccinated']): # Use people vaccinated if available
      num = int(row['people_vaccinated'])
      row_text += '||' + '{:,}'.format(num) + '||' + str(round(row['people_vaccinated_per_hundred'], 1)) + '%'
      #row_text += '||' + f'{num:,}' + '||' + str(round(row['people_vaccinated_per_hundred'], 1)) + '%' # Py ≥ 3.6
    elif not pd.isna(row['total_vaccinations']): # Use total vaccinations if available
      num = int(row['total_vaccinations'])
      row_text += '{{efn|name=incorrect-total}}||{{font color|darkred|' + '{:,}'.format(num) + '}}||--'
      #row_text += '{{efn|name=incorrect-total}}||{{font color|darkred|' + f'{num:,}' + '}}||--' # Py ≥ 3.6
    elif not pd.isna(row['people_fully_vaccinated']): # Use people fully vaccinated if available
      num = int(row['people_fully_vaccinated'])
      row_text += '{{efn|This country\'s data reflects {{font color|darkorange|people fully vaccinated}}, not people vaccinated at least once.}}||{{font color|darkorange|' + '{:,}'.format(num) + '}}||--'
      #row_text += '{{efn|This country\'s data reflects {{font color|darkorange|people fully vaccinated}}, not people vaccinated at least once.}}||{{font color|darkorange|' + f'{num:,}' + '}}||--' # Py ≥ 3.6
    else: # Skip if no data
      continue
    
    row_text += '<tr>'
    text += row_text + '\n'
  return text

# Get current date
def get_date():
  date_obj = dt.utcnow()
  date_year = str(date_obj.year)
  date_month = date_obj.strftime('%B')
  date_day = str(date_obj.day)
  date = date_day + ' ' + date_month + ' ' + date_year
  return date

# Get wikitext from page
def parse(session):
  page_obj = session.get(api + api_par_parse + '&page=' + page)
  page_json = page_obj.json()
  page_wikitext = page_json['parse']['wikitext']['*']
  return page_wikitext

# Find and replace data with regex
def change(wikitext_old, vac_wikitext, date):
  repl_vac = regex_vac_prev + vac_wikitext + regex_vac_post
  repl_date = regex_date_prev + date + regex_date_post
  wikitext_vac = re.sub(regex_vac_str, repl_vac, wikitext_old, flags=re.DOTALL)
  wikitext_date = re.sub(regex_date_str, repl_date, wikitext_vac)
  return wikitext_date

# Edit the page with new data
def edit(session, wikitext_new):
  token_obj = session.get(api + api_par_edit_token)
  token_json = token_obj.json()
  token = token_json['query']['tokens']['csrftoken']
  params = {
      'title': page,
      'text': wikitext_new,
      'summary': edit_summary,
      'token': token
  }
  edit_obj = session.post(api + api_par_edit, data=params)
  edit_json = edit_obj.json()
  return edit_json

# Run

vac_wikitext = generate(clean(load()))
session = login()
wikitext_old = parse(session)
date = get_date()
wikitext_new = change(wikitext_old, vac_wikitext, date)
if run: result = edit(session, wikitext_new)
else: print(wikitext_new)

Licensing[edit]

This work (all source code in this level 2 section) is licensed under:

Version history[edit]

  • 1.0.0-pre: Initial release. 17 April 2021; Python 3.7.
  • 1.0.0: Fix sorting bug (report: w:en: diff #1018455365); add exception handling for initial variables not defined. 18 April 2021; Python 3.7.
  • 1.0.1: Use .format() instead of f-strings for compatibility with Python <3.6. 21 Apr 2021; Python 3.5–3.7.
  • 1.1.0: Improve generate() by adding fallback to people fully vaccinated. 7 May 2021; Python 3.5–3.7.
  • 1.1.1: Add 'High income', 'Upper middle income', 'Lower middle income', 'Low income' to the list of locations to skip. 14 May 2021; Python 3.5–3.7.