User:Sakura emad/cat.js
Note: After publishing, you may have to bypass your browser's cache to see the changes.
- Firefox / Safari: Hold Shift while clicking Reload, or press either Ctrl-F5 or Ctrl-R (⌘-R on a Mac)
- Google Chrome: Press Ctrl-Shift-R (⌘-Shift-R on a Mac)
- Internet Explorer / Edge: Hold Ctrl while clicking Refresh, or press Ctrl-F5
- Opera: Press Ctrl-F5.
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
This bots finds the English Wikipedia counterpart of a non-English Wikipedia
page and fetches its categories. If any of those categories has a counterpart
in the origin Wikipedia, the bot then adds the page to those categories.
"""
#
# (C) User:Huji, 2021
#
# Distributed under the terms of the MIT license.
#
import pywikibot
from pywikibot import pagegenerators
#import fa_cosmetic_changes_core as fccc
from functools import lru_cache
from pywikibot.bot import (
SingleSiteBot,
ExistingPageBot,
NoRedirectPageBot,
AutomaticTWSummaryBot,
)
import re
# Show help with the parameter -help.
docuReplacements = {"¶ms;": pagegenerators.parameterHelp}
class CategorizeBot(
SingleSiteBot,
ExistingPageBot,
NoRedirectPageBot,
AutomaticTWSummaryBot,
):
update_options = {
"cosmetic": False, # Whether to run cosmetic changes script
}
def __init__(self, generator, **kwargs):
"""
@param generator: the page generator that determines which pages
to work on
@type generator: generator
"""
super(CategorizeBot, self).__init__(site=True, **kwargs)
self.generator = generator
self.skip_categories = [
"ئەو پەڕانەی ھەمان پۆل پەسەند ناکەن",
]
self.summary = (
"[[ویکیپیدیا:پۆلێنکردنی وتارە ھاوسەنگەکان|بۆت]]: زیادکردنی پۆلە ھاوسەنگەکان"
)
self.allowednamespaces = [0, 4, 6, 10, 12, 14, 16]
self.cosmetic_changes = kwargs["cosmetic"]
self.site_fa = pywikibot.Site("ckb")
self.site_en = pywikibot.Site("en")
self.remove_parent = False
def list_intersection(self, list1, list2):
list3 = [value for value in list1 if value in list2]
return list3
@lru_cache(maxsize=None)
def get_existing_cats(self, page):
"""Get a list() of categories the page is in."""
cats = list(page.categories())
cat_titles = list()
for c in cats:
cat_titles.append(c.title(with_ns=False))
return cat_titles
@lru_cache(maxsize=None)
def check_eligibility(self, candidate):
"""Determine if the category is addable."""
cat = pywikibot.Page(self.site_fa, "پۆل:%s" % candidate)
if not cat.exists():
return False
cat_cats = self.get_existing_cats(cat)
ineligible_parents = [
"پۆلە شاردراوەکان",
"پۆلەکانی شوێنکەوتن",
"پۆلەکانی کۆلکە",
"پۆلە ڕەوانەکراوە نەرمەکان"
]
if len(self.list_intersection(ineligible_parents, cat_cats)) > 0:
return False
return True
@lru_cache(maxsize=None)
def check_eligibility_en(self, candidate):
"""Determine if the category is addable."""
cat = pywikibot.Page(self.site_en, "Category:%s" % candidate)
cat_cats = self.get_existing_cats(cat)
ineligible_parents = [
"Hidden categories",
"Tracking categories",
"Stub categories"
]
if len(self.list_intersection(ineligible_parents, cat_cats)) > 0:
return False
return True
@lru_cache(maxsize=None)
def is_child_category_of(self, child, parent):
child_cat = pywikibot.Page(self.site_fa, "پۆل:%s" % child)
child_cat_cats = self.get_existing_cats(child_cat)
if parent in child_cat_cats:
return True
return False
def treat_page(self):
"""Process the current page that the bot is working on."""
page = self.current_page
if page.namespace() not in self.allowednamespaces:
pywikibot.output("Namespace not allowed!")
return False
langlinks = page.langlinks()
remote_page = None
for ll in langlinks:
if ll.site.code == "en":
remote_page = pywikibot.Page(ll)
break
if remote_page is None:
pywikibot.output("No interwiki link to enwiki; skipped.")
return False
if remote_page.isRedirectPage():
pywikibot.output("Target page is a redirect; skipped.")
return False
current_categories = self.get_existing_cats(page)
if len(set(self.skip_categories) & set(current_categories)) > 0:
pywikibot.output("Page disallows this bot; skipped.")
remote_categories = list(remote_page.categories())
added_categories = list()
removed_categories = list()
for rc in remote_categories:
if self.check_eligibility_en(rc.title(with_ns=False)) is False:
continue
candidate = None
for ll in rc.langlinks():
if ll.site.code == "ckb":
candidate = ll.title
if candidate is None:
continue
if candidate not in current_categories:
if self.check_eligibility(candidate):
# If a child of this category is already used, don't add it
skip_less_specific = False
for cc in current_categories:
if self.is_child_category_of(cc, candidate):
skip_less_specific = True
pywikibot.output(
"More specific category already used."
)
# Otherwise add this category
if skip_less_specific is False:
added_categories.append(candidate)
# If a parent of what you just added is used, remove it
if self.remove_parent is True:
candidate_fullname = "پۆل:%s" % candidate
candidate_page = pywikibot.Page(
self.site_fa,
candidate_fullname
)
candidate_parents = self.get_existing_cats(
candidate_page
)
intersection = self.list_intersection(
candidate_parents,
current_categories)
if len(intersection) > 0:
pywikibot.output("Removing less specific parent.")
removed_categories.extend(intersection)
if len(added_categories) > 0:
text = page.text
for ac in added_categories:
text += "\n[[پۆل:%s]]" % ac
if len(removed_categories) > 0:
for rc in removed_categories:
rc_pattern = r"\n\[\[پۆل:" + rc + r"(\|[^\]]*)?\]\]"
text = re.sub(rc_pattern, "", text)
if self.cosmetic_changes is True:
text, ver, msg = fccc.fa_cosmetic_changes(text, page)
self.put_current(text, summary=self.summary)
def main(*args):
"""
Process command line arguments and invoke bot.
@param args: command line arguments
@type args: list of unicode
"""
options = {}
# Default value for "cosmetic" option
options["cosmetic"] = False
# Process global arguments to determine desired site
local_args = pywikibot.handle_args(args)
# Process pagegenerators arguments
gen_factory = pagegenerators.GeneratorFactory()
local_args = gen_factory.handle_args(local_args)
# Parse command line arguments
for arg in local_args:
arg, sep, value = arg.partition(":")
option = arg[1:]
if option in ("summary", "text"):
if not value:
pywikibot.input("Please enter a value for " + arg)
options[option] = value
# Take the remaining options as booleans.
else:
options[option] = True
gen = gen_factory.getCombinedGenerator(preload=True)
if gen:
bot = CategorizeBot(gen, **options)
bot.run()
return True
else:
pywikibot.bot.suggest_help(missing_generator=True)
return False
if __name__ == "__main__":
main()