Index: wikipedia.py
===================================================================
--- wikipedia.py (revision 4195)
+++ wikipedia.py (working copy)
@@ -3732,21 +3732,29 @@
if not repeat:
break
- def unusedfiles(self, number = 10, repeat = False):
+ def unusedfiles(self, number = 10, repeat = False, extension = None):
throttle = True
seen = set()
while True:
path = self.unusedfiles_address(n=number)
get_throttle()
html = self.getUrl(path)
- entryR = re.compile('<li>\(<a href=".+?" title="(?P<title>.+?)">.+?</a>\) ')
+ entryR = re.compile('<a href=".+?" title="(?P<title>Image:.+?)">.+?</a>')
for m in entryR.finditer(html):
+ fileext = None
title = m.group('title')
+ if extension:
+ fileext = title[len(title)-3:]
- if title not in seen:
- seen.add(title)
- page = ImagePage(self, title)
- yield page
+ if title not in seen and fileext == extension:
+ # Check whether the media is used in a Proofread page
+ basename = title[6:]
+ page = Page(self, 'Page:' + basename)
+
+ if not page.exists():
+ seen.add(title)
+ image = ImagePage(self, title)
+ yield image
if not repeat:
break
Index: pagegenerators.py
===================================================================
--- pagegenerators.py (revision 4195)
+++ pagegenerators.py (working copy)
@@ -118,10 +118,10 @@
for page in pageWithImages.imagelinks(followRedirects = False, loose = True):
yield page
-def UnusedFilesGenerator(number = 100, repeat = False, site = None):
+def UnusedFilesGenerator(number = 100, repeat = False, site = None, extension = None):
if site is None:
site = wikipedia.getSite()
- for page in site.unusedfiles(number=number, repeat=repeat):
+ for page in site.unusedfiles(number=number, repeat=repeat, extension=extension):
yield wikipedia.ImagePage(page.site(), page.title())
def WithoutInterwikiPageGenerator(number = 100, repeat = False, site = None):