Jump to content

User:TolBot/Task 4

From Meta, a Wikimedia project coordination wiki
TolBot: Task 4
Status  Approved
Wiki w:en:
Summary Remove w:en:Template:Draft categories from mainspace articles
Page(s) Category: w:en:Category:Articles using draft categories
Period weekly
Language Python
Supervision automatic
Excl. compl.? No

Source

[edit]
Version 1.1.1, updated 7 Sep 2021.
import requests
import json
import re
import datetime as dt

run = True

edit_summary = '[[m:User:TolBot/Task 4|Task 4]]: Remove [[Template:Draft categories]] from mainspace article'
edit_summary_log = '[[m:User:TolBot/Task 4|Task 4]]: Log'

log_page = 'User:TolBot/Task 4/Log'

cat_title = 'Category:Articles using draft categories'

regex_main_p = r'\n\{\{[Dd]raft (?:categories|cats)\|(?:1\=)?(.*?)\n\}\}'
regex_main = re.compile(regex_main_p, flags=re.DOTALL)
regex_cat_p = r'\[\[:[Cc]ategory:'
regex_cat = re.compile(regex_cat_p)
regex_cat_r = r'[[Category:'
regex_blank_p = r'^\s*?$'
regex_blank = re.compile(regex_blank_p)
regexes = {
    'main': regex_main,
    'cat': (regex_cat, regex_cat_r),
    'blank': regex_blank
}

bot_username = ''
bot_password = ''
bot_credentials = (bot_username, bot_password)

api = 'https://en.wikipedia.org/w/api.php?format=json&formatversion=2'

# Login
def login(api, credentials):
  session = requests.Session()
  token_params = {
      'action': 'query',
      'meta': 'tokens',
      'type': 'login'
  }
  token_q = session.post(api, data=token_params).json()
  token = token_q['query']['tokens']['logintoken']
  username, password = credentials
  login_params = {
      'action': 'login',
      'lgname': username,
      'lgpassword': password,
      'lgtoken': token
  }
  login_q = session.post(api, data=login_params).json()
  return session

# Get and parse current data on page
def parse(api, session, page):
  params = {
      'action': 'parse',
      'prop': 'wikitext',
      'page': page
  }
  q = session.post(api, data=params).json()
  wikitext = q['parse']['wikitext']
  return wikitext

# Edit page
def edit(api, session, page, text, summary, **kwargs):
  token_params = {
      'action': 'query',
      'meta': 'tokens'
  }
  token_q = session.post(api, data=token_params).json()
  token = token_q['query']['tokens']['csrftoken']
  edit_params = {
      'action': 'edit',
      'title': page,
      'text': text,
      'summary': summary,
      'token': token
  }
  if 'appendtext' in kwargs:
    edit_params['appendtext'] = kwargs['appendtext']
    del edit_params['text']
  edit_q = session.post(api, data=edit_params).json()
  return edit_q

# Get pages in category
def category_members(api, session, category):
  params = {
      'action': 'query',
      'list': 'categorymembers',
      'cmtitle': category,
      'cmlimit': 'max'
  }
  q = session.post(api, data=params).json()
  titles = [page['title'] for page in q['query']['categorymembers']]
  return titles

# Regex replacement
def replace(text, regexes):
  regex_main = regexes['main']
  regex_cat, repl_cat = regexes['cat']
  regex_blank = regexes['blank']
  match = regex_main.search(text)
  if match:
    text_cats = match.group(1)
    text_cats = regex_cat.sub(repl_cat, text_cats, count=0)
    if regex_blank.search(text_cats):
      text_cats = '\n'
    text = regex_main.sub(text_cats, text)
  return text

# Main function
def main(api, creds, summary, cat, regexes, log_page):
  timestamp = str(dt.datetime.utcnow())
  session = login(api, creds)
  titles = category_members(api, session, cat)
  queries = []
  for title in titles:
    wikitext = parse(api, session, title)
    wikitext = replace(wikitext, regexes)
    if run:
      queries.append(edit(api, session, title, wikitext, summary))
    else:
      print('\n-----\n\n', wikitext)
  lines = []
  for query in queries:
    out = '* '
    ed = query['edit']
    if 'newrevid' in ed:
      out += '[[Special:Diff/' + str(ed['newrevid']) + '|'
      out += 'diff #' + str(ed['newrevid']) + ']] to '
    else:
      out += 'Could not process '
    out += '[[' + ed['title'] + ']] '
    if 'newtimestamp' in ed:
      out += 'at ' + ed['newtimestamp']
    lines.append(out)
  log = '\n'.join(lines)
  log = '\n== ' + timestamp + ' ==\n' + log
  log_summary = edit_summary_log + ' ' + str(len(lines)) + ' page(s)'
  if run:
    edit(api, session, log_page, None, log_summary, appendtext=log)

main(api, bot_credentials, edit_summary, cat_title, regexes, log_page)

Licensing

[edit]

This work (all source code in this level 2 section) is licensed under:

Version history

[edit]
  • 1.0.0: Initial release. 27 Jul 2021.
  • 1.1.0: Add logging, split replacement into function, process draft categories contents (fix category links, trim whitespace). 24 Aug 2021.
  • 1.1.1: Regex bug fix (use match.group() instead of match[]). 7 Sep 2021.