User:TolBot/Task 4
Appearance
| Status | |
|---|---|
| Wiki | w:en: |
| Summary | Remove w:en:Template:Draft categories from mainspace articles |
| Page(s) | Category: w:en:Category:Articles using draft categories |
| Period | weekly |
| Language | Python |
| Supervision | automatic |
| Excl. compl.? | No |
Source
[edit]| Version 1.1.1, updated 7 Sep 2021. |
|---|
import requests
import json
import re
import datetime as dt
run = True
edit_summary = '[[m:User:TolBot/Task 4|Task 4]]: Remove [[Template:Draft categories]] from mainspace article'
edit_summary_log = '[[m:User:TolBot/Task 4|Task 4]]: Log'
log_page = 'User:TolBot/Task 4/Log'
cat_title = 'Category:Articles using draft categories'
regex_main_p = r'\n\{\{[Dd]raft (?:categories|cats)\|(?:1\=)?(.*?)\n\}\}'
regex_main = re.compile(regex_main_p, flags=re.DOTALL)
regex_cat_p = r'\[\[:[Cc]ategory:'
regex_cat = re.compile(regex_cat_p)
regex_cat_r = r'[[Category:'
regex_blank_p = r'^\s*?$'
regex_blank = re.compile(regex_blank_p)
regexes = {
'main': regex_main,
'cat': (regex_cat, regex_cat_r),
'blank': regex_blank
}
bot_username = ''
bot_password = ''
bot_credentials = (bot_username, bot_password)
api = 'https://en.wikipedia.org/w/api.php?format=json&formatversion=2'
# Login
def login(api, credentials):
session = requests.Session()
token_params = {
'action': 'query',
'meta': 'tokens',
'type': 'login'
}
token_q = session.post(api, data=token_params).json()
token = token_q['query']['tokens']['logintoken']
username, password = credentials
login_params = {
'action': 'login',
'lgname': username,
'lgpassword': password,
'lgtoken': token
}
login_q = session.post(api, data=login_params).json()
return session
# Get and parse current data on page
def parse(api, session, page):
params = {
'action': 'parse',
'prop': 'wikitext',
'page': page
}
q = session.post(api, data=params).json()
wikitext = q['parse']['wikitext']
return wikitext
# Edit page
def edit(api, session, page, text, summary, **kwargs):
token_params = {
'action': 'query',
'meta': 'tokens'
}
token_q = session.post(api, data=token_params).json()
token = token_q['query']['tokens']['csrftoken']
edit_params = {
'action': 'edit',
'title': page,
'text': text,
'summary': summary,
'token': token
}
if 'appendtext' in kwargs:
edit_params['appendtext'] = kwargs['appendtext']
del edit_params['text']
edit_q = session.post(api, data=edit_params).json()
return edit_q
# Get pages in category
def category_members(api, session, category):
params = {
'action': 'query',
'list': 'categorymembers',
'cmtitle': category,
'cmlimit': 'max'
}
q = session.post(api, data=params).json()
titles = [page['title'] for page in q['query']['categorymembers']]
return titles
# Regex replacement
def replace(text, regexes):
regex_main = regexes['main']
regex_cat, repl_cat = regexes['cat']
regex_blank = regexes['blank']
match = regex_main.search(text)
if match:
text_cats = match.group(1)
text_cats = regex_cat.sub(repl_cat, text_cats, count=0)
if regex_blank.search(text_cats):
text_cats = '\n'
text = regex_main.sub(text_cats, text)
return text
# Main function
def main(api, creds, summary, cat, regexes, log_page):
timestamp = str(dt.datetime.utcnow())
session = login(api, creds)
titles = category_members(api, session, cat)
queries = []
for title in titles:
wikitext = parse(api, session, title)
wikitext = replace(wikitext, regexes)
if run:
queries.append(edit(api, session, title, wikitext, summary))
else:
print('\n-----\n\n', wikitext)
lines = []
for query in queries:
out = '* '
ed = query['edit']
if 'newrevid' in ed:
out += '[[Special:Diff/' + str(ed['newrevid']) + '|'
out += 'diff #' + str(ed['newrevid']) + ']] to '
else:
out += 'Could not process '
out += '[[' + ed['title'] + ']] '
if 'newtimestamp' in ed:
out += 'at ' + ed['newtimestamp']
lines.append(out)
log = '\n'.join(lines)
log = '\n== ' + timestamp + ' ==\n' + log
log_summary = edit_summary_log + ' ' + str(len(lines)) + ' page(s)'
if run:
edit(api, session, log_page, None, log_summary, appendtext=log)
main(api, bot_credentials, edit_summary, cat_title, regexes, log_page)
|
Licensing
[edit]This work (all source code in this level 2 section) is licensed under:
- The Creative Commons Attribution-ShareAlike 3.0 (CC BY-SA 3.0) license
- The Creative Commons Attribution-ShareAlike 4.0 (CC BY-SA 4.0) license
- The GNU Lesser General Public License (LGPL), version 3 or any later version
- The GNU Free Documentation License (GFDL), version 1.3 or any later version, with no Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts
Version history
[edit]- 1.0.0: Initial release. 27 Jul 2021.
- 1.1.0: Add logging, split replacement into function, process draft categories contents (fix category links, trim whitespace). 24 Aug 2021.
- 1.1.1: Regex bug fix (use match.group() instead of match[]). 7 Sep 2021.