In [1]:
import pywikibot
import mwparserfromhell


VERBOSE:pywiki:Starting 1 threads...

In [2]:
enwp = pywikibot.Site('en', 'wikipedia')

In [3]:
doi_template = pywikibot.Page(enwp, 'Template:Cite doi')

In [10]:
doi_pages = doi_template.getReferences(follow_redirects=True, onlyTemplateInclusion=True, namespaces=0)

In [11]:
page_objs = list(doi_pages)

In [12]:
len(page_objs)


Out[12]:
14060

In [15]:
page_titles = map(lambda p: p.title(), page_objs)

In [20]:
import json
json_output = open('doi_page_titles.json', 'w')
json.dump(page_titles, json_output)

In [19]:
ls


doi_page_titles.json  Finding DOIs example.ipynb  LICENSE  README.md

In [11]:
for page in doi_pages:
    print 'Working on: ',  page
    page_text = page.get()
    wiki_code = mwparserfromhell.parse(page_text)
    templates = wiki_code.filter_templates()
    for template in templates:
        if template.name.lower().replace('_',' ') == 'cite doi':
            for param in template.params:
                print 'param name or position: ', param.name, ' param value: ',param.value


VERBOSE:pywiki:Found 1 wikipedia:en processes running, including this one.
Working on:  [[en:Altruism]]
param name or position:  1  param value:  10.1093/acprof:oso/9780199586073.001.0001
param name or position:  2  param value:  noedit
param name or position:  1  param value:  10.1093/acprof:oso/9780199586073.001.0001
param name or position:  2  param value:  noedit
param name or position:  1  param value:  10.1126/science.1133755
param name or position:  1  param value:  10.1038/nature02043
param name or position:  1  param value:  10.1016/S1053-5357(99)00045-1
param name or position:  1  param value:  10.1348/000712606X129213
param name or position:  1  param value:  10.1007/s12147-002-0024-2
Working on:  [[en:Anthropology]]
param name or position:  1  param value:  10.1525/aa.1998.100.3.716
Working on:  [[en:Analysis of variance]]
param name or position:  1  param value:  10.1093/biomet/6.1.1
param name or position:  2  param value:  noedit
param name or position:  1  param value:  10.1214/aoms/1177728717
param name or position:  2  param value:  noedit
Working on:  [[en:Albert Einstein]]
param name or position: 
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-11-3b627f697fae> in <module>()
      1 for page in doi_pages:
      2     print 'Working on: ',  page
----> 3     page_text = page.get()
      4     wiki_code = mwparserfromhell.parse(page_text)
      5     templates = wiki_code.filter_templates()

/home/notconfusing/workspace/pwb-core/pywikibot/__init__.pyc in wrapper(*__args, **__kw)
    348                         % locals(), _logger)
    349                 del __kw[old_arg]
--> 350             return method(*__args, **__kw)
    351         wrapper.__doc__ = method.__doc__
    352         wrapper.__name__ = method.__name__

/home/notconfusing/workspace/pwb-core/pywikibot/__init__.pyc in wrapper(*__args, **__kw)
    348                         % locals(), _logger)
    349                 del __kw[old_arg]
--> 350             return method(*__args, **__kw)
    351         wrapper.__doc__ = method.__doc__
    352         wrapper.__name__ = method.__name__

/home/notconfusing/workspace/pwb-core/pywikibot/page.pyc in get(self, force, get_redirect, sysop)
    289                     delattr(self, attr)
    290         try:
--> 291             self._getInternals(sysop)
    292         except pywikibot.IsRedirectPage:
    293             if not get_redirect:

/home/notconfusing/workspace/pwb-core/pywikibot/page.pyc in _getInternals(self, sysop)
    313                 or self._revisions[self._revid].text is None:
    314             try:
--> 315                 self.site.loadrevisions(self, getText=True, sysop=sysop)
    316             except (pywikibot.NoPage, pywikibot.SectionError) as e:
    317                 self._getexception = e

/home/notconfusing/workspace/pwb-core/pywikibot/site.pyc in loadrevisions(self, page, getText, revids, startid, endid, starttime, endtime, rvdir, user, excludeuser, section, sysop, step, total)
   1888         # TODO if sysop: something
   1889         rvgen.continuekey = "revisions"
-> 1890         for pagedata in rvgen:
   1891             if page is not None:
   1892                 if not self.sametitle(pagedata['title'],

/home/notconfusing/workspace/pwb-core/pywikibot/data/api.pyc in __iter__(self)
    658             if not hasattr(self, "data"):
    659                 try:
--> 660                     self.data = self.request.submit()
    661                 except Server504Error:
    662                     # server timeout, usually caused by request with high limit

/home/notconfusing/workspace/pwb-core/pywikibot/data/api.pyc in submit(self)
    289                     rawdata = http.request(self.site, uri, ssl, method="POST",
    290                                            headers={'Content-Type': 'application/x-www-form-urlencoded'},
--> 291                                            body=paramstring)
    292 ##                import traceback
    293 ##                traceback.print_stack()

/home/notconfusing/workspace/pwb-core/pywikibot/comms/http.pyc in request(site, uri, ssl, *args, **kwargs)
    130     request = threadedhttp.HttpRequest(baseuri, *args, **kwargs)
    131     http_queue.put(request)
--> 132     request.lock.acquire()
    133 
    134     #TODO: do some error correcting stuff

/usr/lib/python2.7/threading.pyc in acquire(self, blocking)
    465                     self._note("%s.acquire(%s): blocked waiting, value=%s",
    466                             self, blocking, self.__value)
--> 467                 self.__cond.wait()
    468             else:
    469                 self.__value = self.__value - 1

/usr/lib/python2.7/threading.pyc in wait(self, timeout)
    337         try:    # restore state no matter what (e.g., KeyboardInterrupt)
    338             if timeout is None:
--> 339                 waiter.acquire()
    340                 if __debug__:
    341                     self._note("%s.wait(): got it", self)

KeyboardInterrupt: 
 1  param value:  10.1098/rsbm.1955.0005
param name or position:  2  param value:  noedit
Working on:  [[en:Abortion]]