User:John Vandenberg/pywikipedia-unusedfiles.diff
Appearance
Index: wikipedia.py =================================================================== --- wikipedia.py (revision 4195) +++ wikipedia.py (working copy) @@ -3732,21 +3732,29 @@ if not repeat: break - def unusedfiles(self, number = 10, repeat = False): + def unusedfiles(self, number = 10, repeat = False, extension = None): throttle = True seen = set() while True: path = self.unusedfiles_address(n=number) get_throttle() html = self.getUrl(path) - entryR = re.compile('<li>\(<a href=".+?" title="(?P<title>.+?)">.+?</a>\) ') + entryR = re.compile('<a href=".+?" title="(?P<title>Image:.+?)">.+?</a>') for m in entryR.finditer(html): + fileext = None title = m.group('title') + if extension: + fileext = title[len(title)-3:] - if title not in seen: - seen.add(title) - page = ImagePage(self, title) - yield page + if title not in seen and fileext == extension: + # Check whether the media is used in a Proofread page + basename = title[6:] + page = Page(self, 'Page:' + basename) + + if not page.exists(): + seen.add(title) + image = ImagePage(self, title) + yield image if not repeat: break Index: pagegenerators.py =================================================================== --- pagegenerators.py (revision 4195) +++ pagegenerators.py (working copy) @@ -118,10 +118,10 @@ for page in pageWithImages.imagelinks(followRedirects = False, loose = True): yield page -def UnusedFilesGenerator(number = 100, repeat = False, site = None): +def UnusedFilesGenerator(number = 100, repeat = False, site = None, extension = None): if site is None: site = wikipedia.getSite() - for page in site.unusedfiles(number=number, repeat=repeat): + for page in site.unusedfiles(number=number, repeat=repeat, extension=extension): yield wikipedia.ImagePage(page.site(), page.title()) def WithoutInterwikiPageGenerator(number = 100, repeat = False, site = None):