In [30]:
!wget https://quarry.wmflabs.org/run/160592/output/0/tsv?download=true -O enwiki-redirects-endash-20170308.tsv


--2017-03-08 18:18:21--  https://quarry.wmflabs.org/run/160592/output/0/tsv?download=true
Resolving quarry.wmflabs.org (quarry.wmflabs.org)... 10.68.21.68
Connecting to quarry.wmflabs.org (quarry.wmflabs.org)|10.68.21.68|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/csv]
Saving to: ‘enwiki-redirects-endash-20170308.tsv’

enwiki-redirects-en     [               <=>    ]  26.40M  1.81MB/s   in 12s    

2017-03-08 18:18:33 (2.12 MB/s) - ‘enwiki-redirects-endash-20170308.tsv’ saved [27684056]


In [1]:
import pywikibot
site = pywikibot.Site('en', 'wikipedia')

In [86]:
site.log

In [79]:
import pandas as pd
import numpy as np
import seaborn as sns
%matplotlib inline

import pickle

In [31]:
redirects_df = pd.read_csv("enwiki-redirects-endash-20170308.tsv", sep="\t")
len(redirects_df)


Out[31]:
330470

In [82]:
error_count = 0
total_df = pd.DataFrame(columns=["revid", "timestamp", "user", "comment", "is_redirect", "page_title", "page_namespace", "page_text"])
count = 0 
errors = []
for r in redirects_df.iterrows():
    revs = []
    redirect_page = r[1].page_title
    try:
        page = pywikibot.Page(site, redirect_page)
        oldest_rev = page.oldest_revision
        revs.append((oldest_rev.revid, oldest_rev.timestamp.isoformat(), oldest_rev.user, oldest_rev.comment, page.isRedirectPage(), page.title(), page.namespace().id, page.text))
    except Exception as e:
        errors.append(r[1].page_title)
        error_count = error_count + 1

    rev_df = pd.DataFrame(revs, columns=["revid", "timestamp", "user", "comment", "is_redirect", "page_title", "page_namespace", "page_text"]) 
    total_df = pd.concat([total_df, rev_df])
    
    count = count + 1
    if count % 100 == 0:
        
        total_df.to_csv("enwiki-redirects-endash-processed.tsv", sep="\t")
        total_df.to_pickle("enwiki-redirects-endash-processed.pickle")
        
        with open('enwiki-redirects-endash-errors.pickle', 'wb') as fp:
            pickle.dump(errors, fp)
        
        with open('enwiki-redirects-endash-errors.tsv', 'wb') as ft:
            for item in errors:
                ft.write(bytes(item + "\n", 'UTF-8'))
        
        print(count, error_count)


100 0
200 1
300 2
400 2
500 2
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-82-2f8f626ee732> in <module>()
      8     try:
      9         page = pywikibot.Page(site, redirect_page)
---> 10         oldest_rev = page.oldest_revision
     11         revs.append((oldest_rev.revid, oldest_rev.timestamp.isoformat(), oldest_rev.user, oldest_rev.comment, page.isRedirectPage(), page.title(), page.text))
     12     except Exception as e:

/srv/paws/pwb/pywikibot/page.py in oldest_revision(self)
    765         @rtype: L{Revision}
    766         """
--> 767         return next(self.revisions(reverse=True, total=1))
    768 
    769     def isRedirectPage(self):

/srv/paws/pwb/pywikibot/tools/__init__.py in wrapper(*__args, **__kw)
   1446                              cls, depth)
   1447                     del __kw[old_arg]
-> 1448             return obj(*__args, **__kw)
   1449 
   1450         if not __debug__:

/srv/paws/pwb/pywikibot/page.py in revisions(self, reverse, total, content, rollback, starttime, endtime)
   1632         self.site.loadrevisions(self, getText=content, rvdir=reverse,
   1633                                 starttime=starttime, endtime=endtime,
-> 1634                                 total=total, rollback=rollback)
   1635         return (self._revisions[rev] for rev in
   1636                 sorted(self._revisions, reverse=not reverse)[:total])

/srv/paws/pwb/pywikibot/site.py in loadrevisions(self, page, getText, revids, startid, endid, starttime, endtime, rvdir, user, excludeuser, section, sysop, step, total, rollback)
   3938             rvgen.set_maximum_items(-1)  # suppress use of rvlimit parameter
   3939 
-> 3940         for pagedata in rvgen:
   3941             if not self.sametitle(pagedata['title'],
   3942                                   page.title(withSection=False)):

/srv/paws/pwb/pywikibot/data/api.py in __iter__(self)
   2751                         _logger)
   2752             if not hasattr(self, "data"):
-> 2753                 self.data = self.request.submit()
   2754             if not self.data or not isinstance(self.data, dict):
   2755                 pywikibot.debug(

/srv/paws/pwb/pywikibot/data/api.py in submit(self)
   1952                 rawdata = http.request(
   1953                     site=self.site, uri=uri, method='GET' if use_get else 'POST',
-> 1954                     body=body, headers=headers)
   1955             except Server504Error:
   1956                 pywikibot.log(u"Caught HTTP 504 error; retrying")

/srv/paws/pwb/pywikibot/tools/__init__.py in wrapper(*__args, **__kw)
   1446                              cls, depth)
   1447                     del __kw[old_arg]
-> 1448             return obj(*__args, **__kw)
   1449 
   1450         if not __debug__:

/srv/paws/pwb/pywikibot/comms/http.py in request(site, uri, method, params, body, headers, data, **kwargs)
    326     headers['user-agent'] = user_agent(site, format_string)
    327 
--> 328     r = fetch(baseuri, method, params, body, headers, **kwargs)
    329     return r.content
    330 

/srv/paws/pwb/pywikibot/comms/http.py in fetch(uri, method, params, body, headers, default_error_handling, use_fake_user_agent, data, **kwargs)
    508             headers['user-agent'] = fake_user_agent()
    509 
--> 510     request = _enqueue(uri, method, params, body, headers, **kwargs)
    511     assert(request._data is not None)  # if there's no data in the answer we're in trouble
    512     # Run the error handling callback in the callers thread so exceptions

/srv/paws/pwb/pywikibot/comms/http.py in _enqueue(uri, method, params, body, headers, data, **kwargs)
    465     request = threadedhttp.HttpRequest(
    466         uri, method, params, body, all_headers, callbacks, **kwargs)
--> 467     _http_process(session, request)
    468     return request
    469 

/srv/paws/pwb/pywikibot/comms/http.py in _http_process(session, http_request)
    379         response = session.request(method, uri, params=params, data=body,
    380                                    headers=headers, auth=auth, timeout=timeout,
--> 381                                    verify=not ignore_validation)
    382     except Exception as e:
    383         http_request.data = e

/srv/paws/lib/python3.4/site-packages/requests/sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
    472             hooks = hooks,
    473         )
--> 474         prep = self.prepare_request(req)
    475 
    476         proxies = proxies or {}

/srv/paws/lib/python3.4/site-packages/requests/sessions.py in prepare_request(self, request)
    405             auth=merge_setting(auth, self.auth),
    406             cookies=merged_cookies,
--> 407             hooks=merge_hooks(request.hooks, self.hooks),
    408         )
    409         return p

/srv/paws/lib/python3.4/site-packages/requests/models.py in prepare(self, method, url, headers, files, data, params, auth, cookies, hooks, json)
    304         self.prepare_cookies(cookies)
    305         self.prepare_body(data, files, json)
--> 306         self.prepare_auth(auth, url)
    307 
    308         # Note that prepare_auth must be last to enable authentication schemes

/srv/paws/lib/python3.4/site-packages/requests/models.py in prepare_auth(self, auth, url)
    541 
    542             # Allow auth to make its changes.
--> 543             r = auth(self)
    544 
    545             # Update self to reflect the auth changes.

/srv/paws/lib/python3.4/site-packages/requests_oauthlib/oauth1_auth.py in __call__(self, r)
     78             r.headers['Content-Type'] = CONTENT_TYPE_FORM_URLENCODED
     79             r.url, headers, r.body = self.client.sign(
---> 80                 unicode(r.url), unicode(r.method), r.body or '', r.headers)
     81         elif self.force_include_body:
     82             # To allow custom clients to work on non form encoded bodies.

/srv/paws/lib/python3.4/site-packages/oauthlib/oauth1/rfc5849/__init__.py in sign(self, uri, http_method, body, headers, realm)
    311         # generate the signature
    312         request.oauth_params.append(
--> 313             ('oauth_signature', self.get_oauth_signature(request)))
    314 
    315         # render the signed request and return it

/srv/paws/lib/python3.4/site-packages/oauthlib/oauth1/rfc5849/__init__.py in get_oauth_signature(self, request)
    126                                             self.resource_owner_secret)
    127 
--> 128         uri, headers, body = self._render(request)
    129 
    130         collected_params = signature.collect_parameters(

/srv/paws/lib/python3.4/site-packages/oauthlib/oauth1/rfc5849/__init__.py in _render(self, request, formencode, realm)
    208         if self.signature_type == SIGNATURE_TYPE_AUTH_HEADER:
    209             headers = parameters.prepare_headers(
--> 210                 request.oauth_params, request.headers, realm=realm)
    211         elif self.signature_type == SIGNATURE_TYPE_BODY and request.decoded_body is not None:
    212             body = parameters.prepare_form_encoded_body(

/srv/paws/lib/python3.4/site-packages/oauthlib/oauth1/rfc5849/utils.py in wrapper(params, *args, **kwargs)
     29     def wrapper(params, *args, **kwargs):
     30         params = filter_oauth_params(params)
---> 31         return target(params, *args, **kwargs)
     32 
     33     wrapper.__doc__ = target.__doc__

/srv/paws/lib/python3.4/site-packages/oauthlib/oauth1/rfc5849/parameters.py in prepare_headers(oauth_params, headers, realm)
     49     # field as follows:
     50     authorization_header_parameters_parts = []
---> 51     for oauth_parameter_name, value in oauth_params:
     52         # 1.  Parameter names and values are encoded per Parameter Encoding
     53         #     (`Section 3.6`_)

KeyboardInterrupt: 

In [76]:
rev_df


Out[76]:
revid timestamp user comment is_redirect page_title page_text
0 545870185 2013-03-21T03:45:13Z The Emperor's New Spy The Emperor's New Spy moved page [[1550–1600 i... True 1550–1600 in fashion #REDIRECT [[1550–1600 in Western European fash...

In [85]:
page.namespace().id


Out[85]:
0

In [ ]:


In [71]:
revs_df


Out[71]:
[(334052434,
  '2009-12-26T04:55:57Z',
  'DASHBot',
  'moved [["FF.SS." - Cioè: "...che mi hai portato a fare sopra a Posillipo se non mi vuoi più bene?"]] to [["FF.SS." – Cioè: "...che mi hai portato a fare sopra a Posillipo se non mi vuoi più bene?"]]: Robot moving pages:per [[WP:HYPHEN]] ([[User talk',
  True,
  '"FF.SS." - Cioè: "...che mi hai portato a fare sopra a Posillipo se non mi vuoi più bene?"',
  '#REDIRECT [["FF.SS." – Cioè: "...che mi hai portato a fare sopra a Posillipo se non mi vuoi più bene?"]]\n\n{{R from modification}}\n{{R from move}}'),
 (415186497,
  '2011-02-21T19:55:28Z',
  'DerBorg',
  '[[WP:AES|←]]Redirected page to [["FF.SS." – Cioè: "...che mi hai portato a fare sopra a Posillipo se non mi vuoi più bene?"]]',
  True,
  'FF.SS.',
  '#REDIRECT [["FF.SS." – Cioè: "...che mi hai portato a fare sopra a Posillipo se non mi vuoi più bene?"]]'),
 (475846929,
  '2012-02-08T23:25:55Z',
  'RjwilmsiBot',
  'Create redirect for title with diacritics using [[Project:AWB|AWB]] (7952)',
  True,
  '"FF.SS." – Cioe: "...che mi hai portato a fare sopra a Posillipo se non mi vuoi piu bene?"',
  '#REDIRECT [["FF.SS." – Cioè: "...che mi hai portato a fare sopra a Posillipo se non mi vuoi più bene?"]] {{R from title without diacritics}}'),
 (586979837,
  '2013-12-20T17:31:33Z',
  'Cavarrone',
  '[[WP:AES|←]]Redirected page to [["FF.SS." – Cioè: "...che mi hai portato a fare sopra a Posillipo se non mi vuoi più bene?"]]',
  True,
  'F.F.S.S., cioè.. che mi hai portato a fare sopra a Posillipo se non mi vuoi più bene?',
  '#REDIRECT [["FF.SS." – Cioè: "...che mi hai portato a fare sopra a Posillipo se non mi vuoi più bene?"]]'),
 (708965298,
  '2016-03-08T12:52:52Z',
  'AnomieBOT',
  'Redirecting to [[:"FF.SS." – Cioe: "...che mi hai portato a fare sopra a Posillipo se non mi vuoi piu bene?"]] because titles with en-dashes are hard to type (and resolving the double redirect to [[:"FF.SS." – Cioè: "...che mi hai portato a fare s...',
  True,
  '"FF.SS." - Cioe: "...che mi hai portato a fare sopra a Posillipo se non mi vuoi piu bene?"',
  '#REDIRECT [[:"FF.SS." – Cioè: "...che mi hai portato a fare sopra a Posillipo se non mi vuoi più bene?"]]\n{{Redr|from modification|p1={{-r|"FF.SS." – Cioe: "...che mi hai portato a fare sopra a Posillipo se non mi vuoi piu bene?"}}}}'),
 (20470716,
  '2005-08-07T12:03:02Z',
  'Adz',
  'create redirect',
  True,
  'Port lotniczy Szczecin-Goleniów',
  '#REDIRECT [["Solidarity" Szczecin–Goleniów Airport]]'),
 (22077099,
  '2005-08-29T04:52:37Z',
  'Adz',
  'create redirect',
  True,
  'Szczecin International Airport',
  '#REDIRECT [["Solidarity" Szczecin–Goleniów Airport]]'),
 (43444571,
  '2006-03-12T15:52:25Z',
  'Balcer',
  'redir',
  True,
  'Szczecin-Goleniów Airport',
  '#REDIRECT [["Solidarity" Szczecin–Goleniów Airport]]'),
 (66093939,
  '2006-07-27T03:56:58Z',
  'Mareklug',
  '{{R from title without diacritics}}',
  True,
  'Szczecin-Goleniow "Solidarnosc" Airport',
  '#REDIRECT [["Solidarity" Szczecin–Goleniów Airport]] {{R from title without diacritics}}'),
 (66094262,
  '2006-07-27T03:59:40Z',
  'Mareklug',
  'redirect',
  True,
  'Szczecin-Goleniow Solidarity Airport',
  '#REDIRECT [["Solidarity" Szczecin–Goleniów Airport]]'),
 (147052256,
  '2007-07-25T19:09:04Z',
  'Targeman',
  'redirect',
  True,
  'Szz',
  '#REDIRECT [["Solidarity" Szczecin–Goleniów Airport]]'),
 (178066046,
  '2007-12-15T12:34:08Z',
  'Zyxw',
  '[[WP:AES|←]]Redirected page to [[Szczecin-Goleniów "Solidarność" Airport]]',
  True,
  'Szczecin-Goleniów "Solidarnosc" Airport',
  '#REDIRECT [["Solidarity" Szczecin–Goleniów Airport]]'),
 (199229641,
  '2008-03-19T00:42:13Z',
  'Eubot',
  'Redirected page to [[Szczecin-Goleniów "Solidarność" Airport]]. ("Port lotniczy Szczecin-Goleniów").',
  True,
  'Port lotniczy Szczecin-Goleniow',
  '#REDIRECT [["Solidarity" Szczecin–Goleniów Airport]] {{R from title without diacritics}}'),
 (199721147,
  '2008-03-21T00:38:48Z',
  'Eubot',
  'Redirected page to [[Szczecin-Goleniów "Solidarność" Airport]]. ("Szczecin-Goleniów Airport").',
  True,
  'Szczecin-Goleniow Airport',
  '#REDIRECT [["Solidarity" Szczecin–Goleniów Airport]] {{R from title without diacritics}}'),
 (261295911,
  '2009-01-01T18:30:17Z',
  'Uzdzislaw',
  '[[WP:AES|←]]Redirected page to [[Szczecin-Goleniów "Solidarność" Airport]]',
  True,
  'SZZ',
  '#REDIRECT [["Solidarity" Szczecin–Goleniów Airport]]\n{{R from airport code}}'),
 (301278464,
  '2009-07-09T23:26:40Z',
  'Nappyrootslistener',
  '[[WP:AES|←]]Redirected page to [[Szczecin-Goleniów "Solidarność" Airport]]',
  True,
  'Solidarność Airport',
  '#REDIRECT [["Solidarity" Szczecin–Goleniów Airport]]'),
 (301278560,
  '2009-07-09T23:27:17Z',
  'Nappyrootslistener',
  '[[WP:AES|←]]Redirected page to [[Szczecin-Goleniów "Solidarność" Airport]]',
  True,
  'Solidarnosc Airport',
  '#REDIRECT [["Solidarity" Szczecin–Goleniów Airport]]'),
 (478076640,
  '2012-02-21T15:08:41Z',
  'Igor alexandrov',
  'moved [[Szczecin-Goleniów "Solidarność" Airport]] to [["Solidarity" Szczecin-Goleniów Airport]]: proper English name',
  True,
  'Szczecin-Goleniów "Solidarność" Airport',
  '#REDIRECT [["Solidarity" Szczecin–Goleniów Airport]]'),
 (495325659,
  '2012-05-31T17:26:12Z',
  'RjwilmsiBot',
  'Create redirect for title with diacritics using [[Project:AWB|AWB]] (8073)',
  True,
  '"Solidarity" Szczecin-Goleniow Airport',
  '#REDIRECT [["Solidarity" Szczecin–Goleniów Airport]] {{R from title without diacritics}}'),
 (555161539,
  '2013-05-15T03:40:18Z',
  'Cherkash',
  'Cherkash moved page [["Solidarity" Szczecin-Goleniów Airport]] to [["Solidarity" Szczecin–Goleniów Airport]]: fixed a dash',
  True,
  '"Solidarity" Szczecin-Goleniów Airport',
  '#REDIRECT [["Solidarity" Szczecin–Goleniów Airport]]'),
 (626062909,
  '2014-09-18T09:48:35Z',
  'RjwilmsiBot',
  'Create redirect for title with diacritics using [[Project:AWB|AWB]] (10447)',
  True,
  '"Solidarity" Szczecin–Goleniow Airport',
  '#REDIRECT [["Solidarity" Szczecin–Goleniów Airport]] {{R from title without diacritics}}'),
 (714079955,
  '2016-04-07T14:23:12Z',
  'Feminist',
  '/* top */Redirecting to [["Solidarity" Szczecin–Goleniów Airport]] using [[Project:AWB|AWB]]',
  True,
  '"Solidarity" Szczecin–Goleniów (airport)',
  '#REDIRECT [["Solidarity" Szczecin–Goleniów Airport]]\n{{R from alternative disambiguation}}'),
 (714250580,
  '2016-04-08T16:07:20Z',
  'AnomieBOT',
  'Redirecting to [[:"Solidarity" Szczecin–Goleniów (airport)]] because titles with en-dashes are hard to type (and resolving the double redirect to [[:"Solidarity" Szczecin–Goleniów Airport]]).  Errors? [[User:AnomieBOT/shutoff/EnDashRedirectCreator]]',
  True,
  '"Solidarity" Szczecin-Goleniów (airport)',
  '#REDIRECT [[:"Solidarity" Szczecin–Goleniów Airport]]\n{{Redr|from modification|p1={{-r|"Solidarity" Szczecin–Goleniów (airport)}}}}'),
 (199768772,
  '2008-03-21T05:19:58Z',
  'Eubot',
  'Redirected page to [[38 – Auch das war Wien]].',
  True,
  '38 - Auch das war Wien',
  "#REDIRECT [['38 – Vienna Before the Fall]] {{R from title without diacritics}}\n{{R from modification}}"),
 (628359230,
  '2014-10-05T17:43:11Z',
  'Lugnuts',
  "Lugnuts moved page [[38 – Auch das war Wien]] to [['38 – Vienna Before the Fall]]: [[WP:NCF]], [[WP:UE]]",
  True,
  '38 – Auch das war Wien',
  "#REDIRECT [['38 – Vienna Before the Fall]]\n\n{{R from move}}"),
 (628360066,
  '2014-10-05T17:51:01Z',
  'Lugnuts',
  "[[WP:AES|←]]Redirected page to [['38 – Vienna Before the Fall]]",
  True,
  "'38 - Vienna Before the Fall",
  "#REDIRECT[['38 – Vienna Before the Fall]]"),
 (628360093,
  '2014-10-05T17:51:18Z',
  'Lugnuts',
  "[[WP:AES|←]]Redirected page to [['38 – Vienna Before the Fall]]",
  True,
  '38 – Vienna Before the Fall',
  "#REDIRECT[['38 – Vienna Before the Fall]]"),
 (628360127,
  '2014-10-05T17:51:34Z',
  'Lugnuts',
  "[[WP:AES|←]]Redirected page to [['38 – Vienna Before the Fall]]",
  True,
  '38 - Vienna Before the Fall',
  "#REDIRECT[['38 – Vienna Before the Fall]]"),
 (9096674,
  '2004-12-23T19:07:20Z',
  '81.154.8.38',
  '',
  True,
  "'64-'95",
  "#REDIRECT [['64–'95]]\n{{R from modification}}"),
 (236143794,
  '2008-09-04T01:07:26Z',
  'Koavf',
  "moved [['64 - '95]] to [['64–'95]]:&#32;–",
  True,
  "'64 - '95",
  "#REDIRECT [['64–'95]]"),
 (172462904,
  '2007-11-19T10:42:51Z',
  'Bensin',
  "#redirect [['74 - '75]]",
  True,
  "'74-'75",
  "#REDIRECT [['74–'75]]\n{{R from modification}}"),
 (334047672,
  '2009-12-26T04:06:22Z',
  'DASHBot',
  "moved [['74 - '75]] to [['74 – '75]]: Robot moving pages: per [[WP:HYPHEN]] ([[User talk:Tim1357|error?]])",
  True,
  "'74 - '75",
  "#REDIRECT [['74–'75]]\n\n{{R from modification}}\n{{R from move}}"),
 (343067910,
  '2010-02-10T02:15:01Z',
  'Koavf',
  "moved [['74 – '75]] to [['74–'75]]: [[WP:DASH]]",
  True,
  "'74 – '75",
  "#REDIRECT [['74–'75]]\n\n{{R from modification}}\n{{R from move}}"),
 (502025050,
  '2012-07-13T08:33:43Z',
  'Omegangel',
  "[[WP:AES|←]]Redirected page to [['74–'75]]",
  True,
  '74 75',
  "#REDIRECT [['74–'75]]"),
 (704840462,
  '2016-02-13T22:32:11Z',
  'Smylers',
  'Find the song even without the apostrophes',
  True,
  '74-75',
  "#REDIRECT [['74–'75]]"),
 (760435201,
  '2017-01-16T23:40:04Z',
  'Pigsonthewing',
  "#REDIRECT[['74–'75]]",
  True,
  '74–75',
  "#REDIRECT[['74–'75]]"),
 (760435355,
  '2017-01-16T23:41:32Z',
  'Pigsonthewing',
  '+',
  True,
  '74—75',
  "#REDIRECT[['74–'75]]"),
 (297057866,
  '2009-06-18T00:02:04Z',
  'Koavf',
  "moved [['99-'00 Demos]] to [['99–'00 Demos]]: –",
  True,
  "'99-'00 Demos",
  "#REDIRECT [['99–'00 Demos]]\n\n{{R from modification}}\n\n{{R unprintworthy}}"),
 (17058946,
  '2004-12-25T00:00:57Z',
  'Lumidek',
  '',
  True,
  'Hooft-Polyakov monopole',
  "#REDIRECT [['t Hooft–Polyakov monopole]]"),
 (273794069,
  '2009-02-28T02:41:54Z',
  'Legoktm',
  "moved [['t Hooft-Polyakov monopole]] to [['t Hooft–Polyakov monopole]]:&#32;Moving page per [[WP:ENDASH]]",
  True,
  "'t Hooft-Polyakov monopole",
  "#REDIRECT [['t Hooft–Polyakov monopole]]\n{{R from modification}}"),
 (365570887,
  '2010-06-02T04:02:55Z',
  'Anthony Appleyard',
  'moved [[(1952-19??)]] to [[(1952–19??)]]: Requested at [[Wikipedia:Requested moves]] as uncontroversial (http://en.wikipedia.org/w/index.php?title=Wikipedia:Requested_moves&oldid=365563522#movereq-.281952-19.3F.3F.29)&wpMovetalk=1',
  True,
  '(1952-19??)',
  '#REDIRECT [[(1952–19??)]]\n{{R from modification}}'),
 (690277281,
  '2015-11-12T11:26:09Z',
  'Faceless Enemy',
  'Faceless Enemy moved page [[.45 Remington-Thompson]] to [[.45 Remington–Thompson]]: Per MOS mdash',
  True,
  '.45 Remington-Thompson',
  '#REDIRECT [[.45 Remington–Thompson]]\n\n{{This is a redirect|from move|from incorrect punctuation|up}}'),
 (395470118,
  '2010-11-08T02:31:34Z',
  'Octane',
  '[[WP:AES|←]]Redirected page to [[.577/450 Martini-Henry]]',
  True,
  '.577/450',
  '#REDIRECT [[.577/450 Martini–Henry]]'),
 (662397894,
  '2015-05-15T03:47:22Z',
  'Faceless Enemy',
  '[[WP:AES|←]]Redirected page to [[.577/450 Martini-Henry]]',
  True,
  '.450/577',
  '#REDIRECT [[.577/450 Martini–Henry]]'),
 (662397929,
  '2015-05-15T03:47:45Z',
  'Faceless Enemy',
  'Faceless Enemy moved page [[.577/450 Martini-Henry]] to [[.577/450 Martini–Henry]]: ndash per [[Wikipedia:Manual_of_Style#Dashes]]',
  True,
  '.577/450 Martini-Henry',
  '#REDIRECT [[.577/450 Martini–Henry]]\n{{R from move}}'),
 (297057849,
  '2009-06-18T00:02:01Z',
  'Koavf',
  'moved [[0079-0088]] to [[0079–0088]]',
  True,
  '0079-0088',
  '#REDIRECT [[0079–0088]]\n\n{{R from modification}}\n\n{{R unprintworthy}}'),
 (475231316,
  '2012-02-05T15:17:54Z',
  'RjwilmsiBot',
  'creating redirects for pages with endashes in title using [[Project:AWB|AWB]] (7940)',
  True,
  '00 Schneider - Jagd auf Nihil Baxter',
  '#REDIRECT [[00 Schneider – Jagd auf Nihil Baxter]] {{R from modification}}'),
 (463616950,
  '2011-12-02T05:03:11Z',
  'DASHBot',
  'moved [[0110111 - Quantum Physics & A Horseshoe]] to [[0110111 – Quantum Physics & A Horseshoe]]: [[WP:BOT|BOT]]: Moving page per [[WP:HYPHEN]].',
  True,
  '0110111 - Quantum Physics & A Horseshoe',
  '#REDIRECT [[0110111 – Quantum Physics & A Horseshoe]] {{R from modification}}'),
 (297057875,
  '2009-06-18T00:02:06Z',
  'Koavf',
  'moved [[03/07-09/07]] to [[03/07–09/07]]: –',
  True,
  '03/07-09/07',
  '#REDIRECT [[03/07–09/07]]\n\n{{R from modification}}\n\n{{R unprintworthy}}'),
 (475231375,
  '2012-02-05T15:18:21Z',
  'RjwilmsiBot',
  'creating redirects for pages with endashes in title using [[Project:AWB|AWB]] (7940)',
  True,
  '1. Mai - Helden bei der Arbeit',
  '#REDIRECT [[1. Mai – Helden bei der Arbeit]] {{R from modification}}'),
 (383597886,
  '2010-09-08T06:56:48Z',
  'David Eppstein',
  'redirect with hyphen in place of en-dash',
  True,
  '1/3-2/3 conjecture',
  '#REDIRECT [[1/3–2/3 conjecture]]\n{{R from modification}}'),
 (384115936,
  '2010-09-11T00:30:40Z',
  'David Eppstein',
  'redlink redirect',
  True,
  'The 1/3 − 2/3 conjecture',
  '#REDIRECT [[1/3–2/3 conjecture]]'),
 (120866901,
  '2007-04-07T00:43:32Z',
  '-5-',
  'moved [[10/22/00 - Las Vegas Nevada]] to [[10/22/00 - Las Vegas, Nevada]]: Punctuation',
  True,
  '10/22/00 - Las Vegas Nevada',
  '#REDIRECT [[10/22/00 – Las Vegas, Nevada]]\n{{R from modification}}'),
 (317587888,
  '2009-10-03T04:24:00Z',
  'Koavf',
  'moved [[10/22/00 - Las Vegas, Nevada]] to [[10/22/00 – Las Vegas, Nevada]]: ndash',
  True,
  '10/22/00 - Las Vegas, Nevada',
  '#REDIRECT [[10/22/00 – Las Vegas, Nevada]]\n\n{{R from modification}}\n\n{{R unprintworthy}}'),
 (120867146,
  '2007-04-07T00:45:03Z',
  '-5-',
  'moved [[10/25/00 - San Diego California]] to [[10/25/00 - San Diego, California]]: Punctuation',
  True,
  '10/25/00 - San Diego California',
  '#REDIRECT [[10/25/00 – San Diego, California]]\n{{R from modification}}'),
 (317588002,
  '2009-10-03T04:25:07Z',
  'Koavf',
  'moved [[10/25/00 - San Diego, California]] to [[10/25/00 – San Diego, California]]: ndash',
  True,
  '10/25/00 - San Diego, California',
  '#REDIRECT [[10/25/00 – San Diego, California]]\n\n{{R from modification}}\n\n{{R unprintworthy}}'),
 (120866749,
  '2007-04-07T00:42:49Z',
  '-5-',
  'moved [[10/7/00 - Detroit Michigan]] to [[10/7/00 - Detroit, Michigan]]: Punctuation',
  True,
  '10/7/00 - Detroit Michigan',
  '#REDIRECT [[10/7/00 – Detroit, Michigan]]\n{{R from modification}}'),
 (317588012,
  '2009-10-03T04:25:10Z',
  'Koavf',
  'moved [[10/7/00 - Detroit, Michigan]] to [[10/7/00 – Detroit, Michigan]]: ndash',
  True,
  '10/7/00 - Detroit, Michigan',
  '#REDIRECT [[10/7/00 – Detroit, Michigan]]\n\n{{R from modification}}\n\n{{R unprintworthy}}'),
 (120866816,
  '2007-04-07T00:43:10Z',
  '-5-',
  'moved [[10/9/00 - Chicago Illinois]] to [[10/9/00 - Chicago, Illinois]]: Punctuation',
  True,
  '10/9/00 - Chicago Illinois',
  '#REDIRECT [[10/9/00 – Chicago, Illinois]]\n{{R from modification}}'),
 (317588022,
  '2009-10-03T04:25:13Z',
  'Koavf',
  'moved [[10/9/00 - Chicago, Illinois]] to [[10/9/00 – Chicago, Illinois]]: ndash',
  True,
  '10/9/00 - Chicago, Illinois',
  '#REDIRECT [[10/9/00 – Chicago, Illinois]]\n\n{{R from modification}}\n\n{{R unprintworthy}}'),
 (764459710,
  '2017-02-09T01:17:42Z',
  'Editor 2050',
  '[[WP:AES|←]]Redirected page to [[1000 – Oru Note Paranja Katha]]',
  True,
  '1000: Oru Note Paranja Katha',
  '#REDIRECT [[1000 – Oru Note Paranja Katha]]'),
 (764481180,
  '2017-02-09T04:24:19Z',
  'AnomieBOT',
  'Redirecting to [[:1000 – Oru Note Paranja Katha]] because titles with en-dashes are hard to type.  Errors? [[User:AnomieBOT/shutoff/EnDashRedirectCreator]]',
  True,
  '1000 - Oru Note Paranja Katha',
  '#REDIRECT [[:1000 – Oru Note Paranja Katha]]\n{{Redirect shell|{{R from modification|1={{-r|1000 – Oru Note Paranja Katha}}}}}}\n{{User:AnomieBOT/Auto-G8|target=1000 – Oru Note Paranja Katha}}'),
 (475231399,
  '2012-02-05T15:18:32Z',
  'RjwilmsiBot',
  'creating redirects for pages with endashes in title using [[Project:AWB|AWB]] (7940)',
  True,
  '100 Jahre - Der Countdown',
  '#REDIRECT [[100 Jahre – Der Countdown]] {{R from modification}}'),
 (596328967,
  '2014-02-20T12:42:19Z',
  'DadaNeem',
  '#REDIRECT [[100 Jahre – Der Countdown]]',
  True,
  '100 Years - The Countdown',
  '#REDIRECT [[100 Jahre – Der Countdown]]'),
 (40669679,
  '2006-02-22T04:37:47Z',
  'Larryv',
  'create redirect',
  True,
  '103rd Street (IRT Broadway-Seventh Avenue Line station)',
  '#REDIRECT [[103rd Street (IRT Broadway–Seventh Avenue Line)]]'),
 (140830143,
  '2007-06-26T22:25:15Z',
  'Marc Shepherd',
  'moved [[103rd Street (IRT Broadway-Seventh Avenue Line)]] to [[103rd Street (IRT Broadway–Seventh Avenue Line)]] over redirect: undo recent move per [[WP:DASH]]',
  True,
  '103rd Street (IRT Broadway-Seventh Avenue Line)',
  '#REDIRECT [[103rd Street (IRT Broadway–Seventh Avenue Line)]]'),
 (336884360,
  '2010-01-09T23:04:33Z',
  'Tinlinkin',
  'redirect to [[103rd Street (IRT Broadway – Seventh Avenue Line)]]',
  True,
  '103rd Street (IRT Broadway - Seventh Avenue Line)',
  '#REDIRECT [[103rd Street (IRT Broadway–Seventh Avenue Line)]] {{R from title without diacritics}}'),
 (732171705,
  '2016-07-30T03:28:57Z',
  'Dicklyon',
  'Dicklyon moved page [[103rd Street (IRT Broadway – Seventh Avenue Line)]] to [[103rd Street (IRT Broadway–Seventh Avenue Line)]] over redirect: unspace the en dash',
  True,
  '103rd Street (IRT Broadway – Seventh Avenue Line)',
  '#REDIRECT [[103rd Street (IRT Broadway–Seventh Avenue Line)]]\n{{R from move}}'),
 (185737150,
  '2008-01-20T23:30:04Z',
  'DanTD',
  'moved [[103rd Sreet - Beverly Hills (Metra)]] to [[103rd Street - Beverly Hills (Metra)]]: title misspelled',
  True,
  '103rd Sreet - Beverly Hills (Metra)',
  '#REDIRECT [[103rd Street–Beverly Hills (Metra station)]]'),
 (334054074,
  '2009-12-26T05:13:22Z',
  'DASHBot',
  'moved [[103rd Street - Beverly Hills (Metra)]] to [[103rd Street – Beverly Hills (Metra)]]: Robot moving pages:per [[WP:HYPHEN]] ([[User talk:Tim1357|error?]])',
  True,
  '103rd Street - Beverly Hills (Metra)',
  '#REDIRECT [[103rd Street–Beverly Hills (Metra station)]]\n\n{{R from modification}}\n{{R from move}}'),
 (595645871,
  '2014-02-15T22:20:31Z',
  'DanTD',
  'DanTD moved page [[103rd Street – Beverly Hills (Metra)]] to [[103rd Street – Beverly Hills (Metra station)]]: conform to station naming conventions',
  True,
  '103rd Street – Beverly Hills (Metra)',
  '#REDIRECT [[103rd Street–Beverly Hills (Metra station)]]\n{{R from move}}'),
 (708983602,
  '2016-03-08T15:08:12Z',
  'AnomieBOT',
  'Redirecting to [[:103rd Street – Beverly Hills (Metra station)]] because titles with en-dashes are hard to type.  Errors? [[User:AnomieBOT/shutoff/EnDashRedirectCreator]]',
  True,
  '103rd Street - Beverly Hills (Metra station)',
  '#REDIRECT [[103rd Street–Beverly Hills (Metra station)]]\n{{Redr|from modification|p1={{-r|103rd Street – Beverly Hills (Metra station)}}}}'),
 (741675832,
  '2016-09-29T00:43:31Z',
  'Kew Gardens 613',
  'Kew Gardens 613 moved page [[103rd Street – Beverly Hills (Metra station)]] to [[103rd Street–Beverly Hills (Metra station)]]',
  True,
  '103rd Street – Beverly Hills (Metra station)',
  '#REDIRECT [[103rd Street–Beverly Hills (Metra station)]]\n{{R from move}}'),
 (741689905,
  '2016-09-29T02:26:58Z',
  'AnomieBOT',
  'Redirecting to [[:103rd Street–Beverly Hills (Metra station)]] because titles with en-dashes are hard to type.  Errors? [[User:AnomieBOT/shutoff/EnDashRedirectCreator]]',
  True,
  '103rd Street-Beverly Hills (Metra station)',
  '#REDIRECT [[:103rd Street–Beverly Hills (Metra station)]]\n{{Redr|from modification|p1={{-r|103rd Street–Beverly Hills (Metra station)}}}}'),
 (5338631,
  '2004-06-16T20:17:11Z',
  'PZFUN',
  '',
  True,
  '103rd Street-Corona Plaza (New York Subway)',
  '#REDIRECT [[103rd Street–Corona Plaza (IRT Flushing Line)]]'),
 (12866330,
  '2005-01-31T23:40:32Z',
  'SPUI',
  '103rd Street-Corona Plaza (7-Flushing) moved to 103rd Street-Corona Plaza (IRT Flushing Line station)',
  True,
  '103rd Street-Corona Plaza (7-Flushing)',
  '#REDIRECT [[103rd Street–Corona Plaza (IRT Flushing Line)]]'),
 (37808383,
  '2006-02-02T06:29:43Z',
  'Larryv',
  "moved [[103rd Street-Corona Plaza (IRT Flushing Line station)]] to [[103rd Street-Corona Plaza (IRT Flushing Line)]]: 'station' unnecessary",
  True,
  '103rd Street-Corona Plaza (IRT Flushing Line station)',
  '#REDIRECT [[103rd Street–Corona Plaza (IRT Flushing Line)]]'),
 (101911738,
  '2007-01-20T01:16:34Z',
  'NE2',
  '[[WP:AES|←]]Redirected page to [[103rd Street-Corona Plaza (IRT Flushing Line)]]',
  True,
  '103rd Street (IRT Flushing Line)',
  '#REDIRECT [[103rd Street–Corona Plaza (IRT Flushing Line)]]'),
 (119997520,
  '2007-04-03T16:00:31Z',
  'NE2',
  '[[WP:AES|←]]Redirected page to [[103rd Street–Corona Plaza (IRT Flushing Line)]]',
  True,
  '104th Street (IRT Flushing Line)',
  '#REDIRECT [[103rd Street–Corona Plaza (IRT Flushing Line)]]'),
 (141377844,
  '2007-06-29T12:15:07Z',
  'Marc Shepherd',
  'moved [[103rd Street-Corona Plaza (IRT Flushing Line)]] to [[103rd Street–Corona Plaza (IRT Flushing Line)]] over redirect: undo recent move per standard at [[WP:DASH]]',
  True,
  '103rd Street-Corona Plaza (IRT Flushing Line)',
  '#REDIRECT [[103rd Street–Corona Plaza (IRT Flushing Line)]]'),
 (306180860,
  '2009-08-05T09:58:34Z',
  'Tinlinkin',
  'redirect to [[103rd Street – Corona Plaza (IRT Flushing Line)]]',
  True,
  '103rd Street - Corona Plaza (IRT Flushing Line)',
  '#REDIRECT [[103rd Street–Corona Plaza (IRT Flushing Line)]] {{R from title without diacritics}}\n{{R from modification}}'),
 (731861522,
  '2016-07-28T00:12:19Z',
  'Epicgenius',
  'Epicgenius moved page [[103rd Street – Corona Plaza (IRT Flushing Line)]] to [[103rd Street–Corona Plaza (IRT Flushing Line)]] over redirect: unspace endash',
  True,
  '103rd Street – Corona Plaza (IRT Flushing Line)',
  '#REDIRECT [[103rd Street–Corona Plaza (IRT Flushing Line)]]\n{{R from move}}'),
 (612302785,
  '2014-06-10T03:24:02Z',
  'Spyder Monkey',
  '[[WP:AES|←]]Redirected page to [[104–128 South Side Square]]',
  True,
  'Buildings at 104-128 S. Side Sq.',
  '#REDIRECT [[104–128 South Side Square]]'),
 (708973935,
  '2016-03-08T13:58:37Z',
  'AnomieBOT',
  'Redirecting to [[:104–128 South Side Square]] because titles with en-dashes are hard to type.  Errors? [[User:AnomieBOT/shutoff/EnDashRedirectCreator]]',
  True,
  '104-128 South Side Square',
  '#REDIRECT [[:104–128 South Side Square]]\n{{Redr|from modification|p1={{-r|104–128 South Side Square}}}}'),
 (334054162,
  '2009-12-26T05:14:09Z',
  'DASHBot',
  'moved [[107th Street - Beverly Hills (Metra)]] to [[107th Street – Beverly Hills (Metra)]]: Robot moving pages:per [[WP:HYPHEN]] ([[User talk:Tim1357|error?]])',
  True,
  '107th Street - Beverly Hills (Metra)',
  '#REDIRECT [[107th Street–Beverly Hills (Metra station)]]\n\n{{R from modification}}\n{{R from move}}'),
 (595645951,
  '2014-02-15T22:21:07Z',
  'DanTD',
  'DanTD moved page [[107th Street – Beverly Hills (Metra)]] to [[107th Street – Beverly Hills (Metra station)]]: conform to station naming conventions',
  True,
  '107th Street – Beverly Hills (Metra)',
  '#REDIRECT [[107th Street–Beverly Hills (Metra station)]]\n{{R from move}}'),
 (708956917,
  '2016-03-08T11:40:00Z',
  'AnomieBOT',
  'Redirecting to [[:107th Street – Beverly Hills (Metra station)]] because titles with en-dashes are hard to type.  Errors? [[User:AnomieBOT/shutoff/EnDashRedirectCreator]]',
  True,
  '107th Street - Beverly Hills (Metra station)',
  '#REDIRECT [[107th Street–Beverly Hills (Metra station)]]\n{{Redr|from modification|p1={{-r|107th Street – Beverly Hills (Metra station)}}}}'),
 (741676092,
  '2016-09-29T00:45:47Z',
  'Kew Gardens 613',
  'Kew Gardens 613 moved page [[107th Street – Beverly Hills (Metra station)]] to [[107th Street–Beverly Hills (Metra station)]]',
  True,
  '107th Street – Beverly Hills (Metra station)',
  '#REDIRECT [[107th Street–Beverly Hills (Metra station)]]\n{{R from move}}'),
 (741689784,
  '2016-09-29T02:26:08Z',
  'AnomieBOT',
  'Redirecting to [[:107th Street–Beverly Hills (Metra station)]] because titles with en-dashes are hard to type.  Errors? [[User:AnomieBOT/shutoff/EnDashRedirectCreator]]',
  True,
  '107th Street-Beverly Hills (Metra station)',
  '#REDIRECT [[:107th Street–Beverly Hills (Metra station)]]\n{{Redr|from modification|p1={{-r|107th Street–Beverly Hills (Metra station)}}}}'),
 (739682478,
  '2016-09-16T08:36:22Z',
  'Ham II',
  'Ham II moved page [[107-123 Muswell Hill Road]] to [[107–123 Muswell Hill Road]]: [[MOS:DASH]]',
  True,
  '107-123 Muswell Hill Road',
  '#REDIRECT [[107–123 Muswell Hill Road]]\n{{R from move}}'),
 (297057880,
  '2009-06-18T00:02:08Z',
  'Koavf',
  'moved [[10: 1993-2003 - Ten Years Of]] to [[10: 1993–2003 – Ten Years Of]]: –',
  True,
  '10: 1993-2003 - Ten Years Of',
  '#REDIRECT [[10: 1993–2003 – Ten Years Of]]\n\n{{R from modification}}\n\n{{R unprintworthy}}'),
 (334053726,
  '2009-12-26T05:09:29Z',
  'DASHBot',
  'moved [[10 Jahre - Best Of]] to [[10 Jahre – Best Of]]: Robot moving pages:per [[WP:HYPHEN]] ([[User talk:Tim1357|error?]])',
  True,
  '10 Jahre - Best Of',
  '#REDIRECT [[10 Jahre – Best Of]]\n\n{{R from modification}}\n{{R from move}}'),
 (742542349,
  '2016-10-04T09:22:20Z',
  'Caldorwards4',
  'Caldorwards4 moved page [[10 Years of Greatest Hits]] to [[10 Years of Hits – Newly Recorded]]: correct title',
  True,
  '10 Years of Greatest Hits',
  '#REDIRECT [[10 Years of Greatest Hits – Newly Recorded]]'),
 (742542415,
  '2016-10-04T09:22:59Z',
  'Caldorwards4',
  'Caldorwards4 moved page [[10 Years of Hits – Newly Recorded]] to [[10 Years of Greatest Hits – Newly Recorded]]: oops',
  True,
  '10 Years of Hits – Newly Recorded',
  '#REDIRECT [[10 Years of Greatest Hits – Newly Recorded]]\n{{R from move}}'),
 (742549520,
  '2016-10-04T10:32:23Z',
  'AnomieBOT',
  'Redirecting to [[:10 Years of Hits – Newly Recorded]] because titles with en-dashes are hard to type (and resolving the double redirect to [[:10 Years of Greatest Hits – Newly Recorded]]).  Errors? [[User:AnomieBOT/shutoff/EnDashRedirectCreator]]',
  True,
  '10 Years of Hits - Newly Recorded',
  '#REDIRECT [[:10 Years of Greatest Hits – Newly Recorded]]\n{{Redr|from modification|p1={{-r|10 Years of Hits – Newly Recorded}}}}'),
 (742549542,
  '2016-10-04T10:32:33Z',
  'AnomieBOT',
  'Redirecting to [[:10 Years of Greatest Hits – Newly Recorded]] because titles with en-dashes are hard to type.  Errors? [[User:AnomieBOT/shutoff/EnDashRedirectCreator]]',
  True,
  '10 Years of Greatest Hits - Newly Recorded',
  '#REDIRECT [[:10 Years of Greatest Hits – Newly Recorded]]\n{{Redr|from modification|p1={{-r|10 Years of Greatest Hits – Newly Recorded}}}}'),
 (708964044,
  '2016-03-08T12:43:12Z',
  'AnomieBOT',
  'Redirecting to [[:10 år – En snäll mans bekännelser]] because titles with en-dashes are hard to type.  Errors? [[User:AnomieBOT/shutoff/EnDashRedirectCreator]]',
  True,
  '10 år - En snäll mans bekännelser',
  '#REDIRECT [[:10 år – En snäll mans bekännelser]]\n{{Redr|from modification|p1={{-r|10 år – En snäll mans bekännelser}}}}'),
 (725567194,
  '2016-06-16T13:32:26Z',
  'RjwilmsiBot',
  '/* top */Create redirect for title with diacritics using [[Project:AWB|AWB]] (12022)',
  True,
  '10 ar – En snall mans bekannelser',
  '#REDIRECT [[10 år – En snäll mans bekännelser]] {{R from title without diacritics}}'),
 (725618421,
  '2016-06-16T20:21:04Z',
  'AnomieBOT',
  'Redirecting to [[:10 ar – En snall mans bekannelser]] because titles with en-dashes are hard to type (and resolving the double redirect to [[:10 år – En snäll mans bekännelser]]).  Errors? [[User:AnomieBOT/shutoff/EnDashRedirectCreator]]',
  True,
  '10 ar - En snall mans bekannelser',
  '#REDIRECT [[:10 år – En snäll mans bekännelser]]\n{{Redr|from modification|p1={{-r|10 ar – En snall mans bekannelser}}}}'),
 (334054590,
  '2009-12-26T05:18:04Z',
  'DASHBot',
  'moved [[10th MMC - Kyustendil]] to [[10th MMC – Kyustendil]]: Robot moving pages:per [[WP:HYPHEN]] ([[User talk:Tim1357|error?]])',
  True,
  '10th MMC - Kyustendil',
  '#REDIRECT [[10th MMC – Kyustendil]]\n\n{{R from modification}}\n{{R from move}}')]

In [32]:
subset_df = redirects_df[0:100]
subset_df


Out[32]:
page_id page_title rd_title
0 25560554 "FF.SS."_-_Cioè:_"...che_mi_hai_portato_a_fare... "FF.SS."_–_Cioè:_"...che_mi_hai_portato_a_fare...
1 30962649 FF.SS. "FF.SS."_–_Cioè:_"...che_mi_hai_portato_a_fare...
2 34657348 "FF.SS."_–_Cioe:_"...che_mi_hai_portato_a_fare... "FF.SS."_–_Cioè:_"...che_mi_hai_portato_a_fare...
3 41418201 F.F.S.S.,_cioè.._che_mi_hai_portato_a_fare_sop... "FF.SS."_–_Cioè:_"...che_mi_hai_portato_a_fare...
4 49679430 "FF.SS."_-_Cioe:_"...che_mi_hai_portato_a_fare... "FF.SS."_–_Cioè:_"...che_mi_hai_portato_a_fare...
5 2398718 Port_lotniczy_Szczecin-Goleniów "Solidarity"_Szczecin–Goleniów_Airport
6 2565845 Szczecin_International_Airport "Solidarity"_Szczecin–Goleniów_Airport
7 4367474 Szczecin-Goleniów_Airport "Solidarity"_Szczecin–Goleniów_Airport
8 6135639 Szczecin-Goleniow_"Solidarnosc"_Airport "Solidarity"_Szczecin–Goleniów_Airport
9 6135674 Szczecin-Goleniow_Solidarity_Airport "Solidarity"_Szczecin–Goleniów_Airport
10 12446084 Szz "Solidarity"_Szczecin–Goleniów_Airport
11 14715365 Szczecin-Goleniów_"Solidarnosc"_Airport "Solidarity"_Szczecin–Goleniów_Airport
12 16385404 Port_lotniczy_Szczecin-Goleniow "Solidarity"_Szczecin–Goleniów_Airport
13 16444660 Szczecin-Goleniow_Airport "Solidarity"_Szczecin–Goleniów_Airport
14 20915643 SZZ "Solidarity"_Szczecin–Goleniów_Airport
15 23555019 Solidarność_Airport "Solidarity"_Szczecin–Goleniów_Airport
16 23555025 Solidarnosc_Airport "Solidarity"_Szczecin–Goleniów_Airport
17 34827897 Szczecin-Goleniów_"Solidarność"_Airport "Solidarity"_Szczecin–Goleniów_Airport
18 35994818 "Solidarity"_Szczecin-Goleniow_Airport "Solidarity"_Szczecin–Goleniów_Airport
19 39391434 "Solidarity"_Szczecin-Goleniów_Airport "Solidarity"_Szczecin–Goleniów_Airport
20 43855356 "Solidarity"_Szczecin–Goleniow_Airport "Solidarity"_Szczecin–Goleniów_Airport
21 50073414 "Solidarity"_Szczecin–Goleniów_(airport) "Solidarity"_Szczecin–Goleniów_Airport
22 50084473 "Solidarity"_Szczecin-Goleniów_(airport) "Solidarity"_Szczecin–Goleniów_Airport
23 16451346 38_-_Auch_das_war_Wien '38_–_Vienna_Before_the_Fall
24 44031004 38_–_Auch_das_war_Wien '38_–_Vienna_Before_the_Fall
25 44031050 '38_-_Vienna_Before_the_Fall '38_–_Vienna_Before_the_Fall
26 44031053 38_–_Vienna_Before_the_Fall '38_–_Vienna_Before_the_Fall
27 44031058 38_-_Vienna_Before_the_Fall '38_–_Vienna_Before_the_Fall
28 1317664 '64-'95 '64–'95
29 19166400 '64_-_'95 '64–'95
... ... ... ...
70 41954081 103rd_Street_–_Beverly_Hills_(Metra) 103rd_Street–Beverly_Hills_(Metra_station)
71 49681032 103rd_Street_-_Beverly_Hills_(Metra_station) 103rd_Street–Beverly_Hills_(Metra_station)
72 51784682 103rd_Street_–_Beverly_Hills_(Metra_station) 103rd_Street–Beverly_Hills_(Metra_station)
73 51786492 103rd_Street-Beverly_Hills_(Metra_station) 103rd_Street–Beverly_Hills_(Metra_station)
74 730973 103rd_Street-Corona_Plaza_(New_York_Subway) 103rd_Street–Corona_Plaza_(IRT_Flushing_Line)
75 1453582 103rd_Street-Corona_Plaza_(7-Flushing) 103rd_Street–Corona_Plaza_(IRT_Flushing_Line)
76 3930501 103rd_Street-Corona_Plaza_(IRT_Flushing_Line_s... 103rd_Street–Corona_Plaza_(IRT_Flushing_Line)
77 9011853 103rd_Street_(IRT_Flushing_Line) 103rd_Street–Corona_Plaza_(IRT_Flushing_Line)
78 10440364 104th_Street_(IRT_Flushing_Line) 103rd_Street–Corona_Plaza_(IRT_Flushing_Line)
79 12005273 103rd_Street-Corona_Plaza_(IRT_Flushing_Line) 103rd_Street–Corona_Plaza_(IRT_Flushing_Line)
80 23872564 103rd_Street_-_Corona_Plaza_(IRT_Flushing_Line) 103rd_Street–Corona_Plaza_(IRT_Flushing_Line)
81 51182156 103rd_Street_–_Corona_Plaza_(IRT_Flushing_Line) 103rd_Street–Corona_Plaza_(IRT_Flushing_Line)
82 43013681 Buildings_at_104-128_S._Side_Sq. 104–128_South_Side_Square
83 49680236 104-128_South_Side_Square 104–128_South_Side_Square
84 25560634 107th_Street_-_Beverly_Hills_(Metra) 107th_Street–Beverly_Hills_(Metra_station)
85 41954085 107th_Street_–_Beverly_Hills_(Metra) 107th_Street–Beverly_Hills_(Metra_station)
86 49678424 107th_Street_-_Beverly_Hills_(Metra_station) 107th_Street–Beverly_Hills_(Metra_station)
87 51784708 107th_Street_–_Beverly_Hills_(Metra_station) 107th_Street–Beverly_Hills_(Metra_station)
88 51786482 107th_Street-Beverly_Hills_(Metra_station) 107th_Street–Beverly_Hills_(Metra_station)
89 51622846 107-123_Muswell_Hill_Road 107–123_Muswell_Hill_Road
90 23259687 10:_1993-2003_-_Ten_Years_Of 10:_1993–2003_–_Ten_Years_Of
91 25560608 10_Jahre_-_Best_Of 10_Jahre_–_Best_Of
92 51859414 10_Years_of_Greatest_Hits 10_Years_of_Greatest_Hits_–_Newly_Recorded
93 51859418 10_Years_of_Hits_–_Newly_Recorded 10_Years_of_Greatest_Hits_–_Newly_Recorded
94 51859856 10_Years_of_Hits_-_Newly_Recorded 10_Years_of_Greatest_Hits_–_Newly_Recorded
95 51859859 10_Years_of_Greatest_Hits_-_Newly_Recorded 10_Years_of_Greatest_Hits_–_Newly_Recorded
96 49679325 10_år_-_En_snäll_mans_bekännelser 10_år_–_En_snäll_mans_bekännelser
97 50825971 10_ar_–_En_snall_mans_bekannelser 10_år_–_En_snäll_mans_bekännelser
98 50831282 10_ar_-_En_snall_mans_bekannelser 10_år_–_En_snäll_mans_bekännelser
99 25560653 10th_MMC_-_Kyustendil 10th_MMC_–_Kyustendil

100 rows × 3 columns


In [59]:
revs = []
for r in subset_df.iterrows():
    redirect_page = r[1].page_title
    try:
        page = pywikibot.Page(site, redirect_page)
        oldest_rev = page.oldest_revision
        revs.append((oldest_rev.revid, oldest_rev.timestamp.isoformat(), oldest_rev.user, oldest_rev.comment, page.isRedirectPage(), page.title(), page.text))
    except Exception as e:
        pass

In [60]:
pd.DataFrame(revs, columns=["revid", "timestamp", "user", "comment", "is_redirect", "page_title", "page_text"])


Out[60]:
revid timestamp user comment is_redirect page_title page_text
0 334052434 2009-12-26T04:55:57Z DASHBot moved [["FF.SS." - Cioè: "...che mi hai portat... True "FF.SS." - Cioè: "...che mi hai portato a fare... #REDIRECT [["FF.SS." – Cioè: "...che mi hai po...
1 415186497 2011-02-21T19:55:28Z DerBorg [[WP:AES|←]]Redirected page to [["FF.SS." – Ci... True FF.SS. #REDIRECT [["FF.SS." – Cioè: "...che mi hai po...
2 475846929 2012-02-08T23:25:55Z RjwilmsiBot Create redirect for title with diacritics usin... True "FF.SS." – Cioe: "...che mi hai portato a fare... #REDIRECT [["FF.SS." – Cioè: "...che mi hai po...
3 586979837 2013-12-20T17:31:33Z Cavarrone [[WP:AES|←]]Redirected page to [["FF.SS." – Ci... True F.F.S.S., cioè.. che mi hai portato a fare sop... #REDIRECT [["FF.SS." – Cioè: "...che mi hai po...
4 708965298 2016-03-08T12:52:52Z AnomieBOT Redirecting to [[:"FF.SS." – Cioe: "...che mi ... True "FF.SS." - Cioe: "...che mi hai portato a fare... #REDIRECT [[:"FF.SS." – Cioè: "...che mi hai p...
5 20470716 2005-08-07T12:03:02Z Adz create redirect True Port lotniczy Szczecin-Goleniów #REDIRECT [["Solidarity" Szczecin–Goleniów Air...
6 22077099 2005-08-29T04:52:37Z Adz create redirect True Szczecin International Airport #REDIRECT [["Solidarity" Szczecin–Goleniów Air...
7 43444571 2006-03-12T15:52:25Z Balcer redir True Szczecin-Goleniów Airport #REDIRECT [["Solidarity" Szczecin–Goleniów Air...
8 66093939 2006-07-27T03:56:58Z Mareklug {{R from title without diacritics}} True Szczecin-Goleniow "Solidarnosc" Airport #REDIRECT [["Solidarity" Szczecin–Goleniów Air...
9 66094262 2006-07-27T03:59:40Z Mareklug redirect True Szczecin-Goleniow Solidarity Airport #REDIRECT [["Solidarity" Szczecin–Goleniów Air...
10 147052256 2007-07-25T19:09:04Z Targeman redirect True Szz #REDIRECT [["Solidarity" Szczecin–Goleniów Air...
11 178066046 2007-12-15T12:34:08Z Zyxw [[WP:AES|←]]Redirected page to [[Szczecin-Gole... True Szczecin-Goleniów "Solidarnosc" Airport #REDIRECT [["Solidarity" Szczecin–Goleniów Air...
12 199229641 2008-03-19T00:42:13Z Eubot Redirected page to [[Szczecin-Goleniów "Solida... True Port lotniczy Szczecin-Goleniow #REDIRECT [["Solidarity" Szczecin–Goleniów Air...
13 199721147 2008-03-21T00:38:48Z Eubot Redirected page to [[Szczecin-Goleniów "Solida... True Szczecin-Goleniow Airport #REDIRECT [["Solidarity" Szczecin–Goleniów Air...
14 261295911 2009-01-01T18:30:17Z Uzdzislaw [[WP:AES|←]]Redirected page to [[Szczecin-Gole... True SZZ #REDIRECT [["Solidarity" Szczecin–Goleniów Air...
15 301278464 2009-07-09T23:26:40Z Nappyrootslistener [[WP:AES|←]]Redirected page to [[Szczecin-Gole... True Solidarność Airport #REDIRECT [["Solidarity" Szczecin–Goleniów Air...
16 301278560 2009-07-09T23:27:17Z Nappyrootslistener [[WP:AES|←]]Redirected page to [[Szczecin-Gole... True Solidarnosc Airport #REDIRECT [["Solidarity" Szczecin–Goleniów Air...
17 478076640 2012-02-21T15:08:41Z Igor alexandrov moved [[Szczecin-Goleniów "Solidarność" Airpor... True Szczecin-Goleniów "Solidarność" Airport #REDIRECT [["Solidarity" Szczecin–Goleniów Air...
18 495325659 2012-05-31T17:26:12Z RjwilmsiBot Create redirect for title with diacritics usin... True "Solidarity" Szczecin-Goleniow Airport #REDIRECT [["Solidarity" Szczecin–Goleniów Air...
19 555161539 2013-05-15T03:40:18Z Cherkash Cherkash moved page [["Solidarity" Szczecin-Go... True "Solidarity" Szczecin-Goleniów Airport #REDIRECT [["Solidarity" Szczecin–Goleniów Air...
20 626062909 2014-09-18T09:48:35Z RjwilmsiBot Create redirect for title with diacritics usin... True "Solidarity" Szczecin–Goleniow Airport #REDIRECT [["Solidarity" Szczecin–Goleniów Air...
21 714079955 2016-04-07T14:23:12Z Feminist /* top */Redirecting to [["Solidarity" Szczeci... True "Solidarity" Szczecin–Goleniów (airport) #REDIRECT [["Solidarity" Szczecin–Goleniów Air...
22 714250580 2016-04-08T16:07:20Z AnomieBOT Redirecting to [[:"Solidarity" Szczecin–Goleni... True "Solidarity" Szczecin-Goleniów (airport) #REDIRECT [[:"Solidarity" Szczecin–Goleniów Ai...
23 199768772 2008-03-21T05:19:58Z Eubot Redirected page to [[38 – Auch das war Wien]]. True 38 - Auch das war Wien #REDIRECT [['38 – Vienna Before the Fall]] {{R...
24 628359230 2014-10-05T17:43:11Z Lugnuts Lugnuts moved page [[38 – Auch das war Wien]] ... True 38 – Auch das war Wien #REDIRECT [['38 – Vienna Before the Fall]]\n\n...
25 628360066 2014-10-05T17:51:01Z Lugnuts [[WP:AES|←]]Redirected page to [['38 – Vienna ... True '38 - Vienna Before the Fall #REDIRECT[['38 – Vienna Before the Fall]]
26 628360093 2014-10-05T17:51:18Z Lugnuts [[WP:AES|←]]Redirected page to [['38 – Vienna ... True 38 – Vienna Before the Fall #REDIRECT[['38 – Vienna Before the Fall]]
27 628360127 2014-10-05T17:51:34Z Lugnuts [[WP:AES|←]]Redirected page to [['38 – Vienna ... True 38 - Vienna Before the Fall #REDIRECT[['38 – Vienna Before the Fall]]
28 9096674 2004-12-23T19:07:20Z 81.154.8.38 True '64-'95 #REDIRECT [['64–'95]]\n{{R from modification}}
29 236143794 2008-09-04T01:07:26Z Koavf moved [['64 - '95]] to [['64–'95]]:&#32;– True '64 - '95 #REDIRECT [['64–'95]]
... ... ... ... ... ... ... ...
70 595645871 2014-02-15T22:20:31Z DanTD DanTD moved page [[103rd Street – Beverly Hill... True 103rd Street – Beverly Hills (Metra) #REDIRECT [[103rd Street–Beverly Hills (Metra ...
71 708983602 2016-03-08T15:08:12Z AnomieBOT Redirecting to [[:103rd Street – Beverly Hills... True 103rd Street - Beverly Hills (Metra station) #REDIRECT [[103rd Street–Beverly Hills (Metra ...
72 741675832 2016-09-29T00:43:31Z Kew Gardens 613 Kew Gardens 613 moved page [[103rd Street – Be... True 103rd Street – Beverly Hills (Metra station) #REDIRECT [[103rd Street–Beverly Hills (Metra ...
73 741689905 2016-09-29T02:26:58Z AnomieBOT Redirecting to [[:103rd Street–Beverly Hills (... True 103rd Street-Beverly Hills (Metra station) #REDIRECT [[:103rd Street–Beverly Hills (Metra...
74 5338631 2004-06-16T20:17:11Z PZFUN True 103rd Street-Corona Plaza (New York Subway) #REDIRECT [[103rd Street–Corona Plaza (IRT Flu...
75 12866330 2005-01-31T23:40:32Z SPUI 103rd Street-Corona Plaza (7-Flushing) moved t... True 103rd Street-Corona Plaza (7-Flushing) #REDIRECT [[103rd Street–Corona Plaza (IRT Flu...
76 37808383 2006-02-02T06:29:43Z Larryv moved [[103rd Street-Corona Plaza (IRT Flushin... True 103rd Street-Corona Plaza (IRT Flushing Line s... #REDIRECT [[103rd Street–Corona Plaza (IRT Flu...
77 101911738 2007-01-20T01:16:34Z NE2 [[WP:AES|←]]Redirected page to [[103rd Street-... True 103rd Street (IRT Flushing Line) #REDIRECT [[103rd Street–Corona Plaza (IRT Flu...
78 119997520 2007-04-03T16:00:31Z NE2 [[WP:AES|←]]Redirected page to [[103rd Street–... True 104th Street (IRT Flushing Line) #REDIRECT [[103rd Street–Corona Plaza (IRT Flu...
79 141377844 2007-06-29T12:15:07Z Marc Shepherd moved [[103rd Street-Corona Plaza (IRT Flushin... True 103rd Street-Corona Plaza (IRT Flushing Line) #REDIRECT [[103rd Street–Corona Plaza (IRT Flu...
80 306180860 2009-08-05T09:58:34Z Tinlinkin redirect to [[103rd Street – Corona Plaza (IRT... True 103rd Street - Corona Plaza (IRT Flushing Line) #REDIRECT [[103rd Street–Corona Plaza (IRT Flu...
81 731861522 2016-07-28T00:12:19Z Epicgenius Epicgenius moved page [[103rd Street – Corona ... True 103rd Street – Corona Plaza (IRT Flushing Line) #REDIRECT [[103rd Street–Corona Plaza (IRT Flu...
82 612302785 2014-06-10T03:24:02Z Spyder Monkey [[WP:AES|←]]Redirected page to [[104–128 South... True Buildings at 104-128 S. Side Sq. #REDIRECT [[104–128 South Side Square]]
83 708973935 2016-03-08T13:58:37Z AnomieBOT Redirecting to [[:104–128 South Side Square]] ... True 104-128 South Side Square #REDIRECT [[:104–128 South Side Square]]\n{{Re...
84 334054162 2009-12-26T05:14:09Z DASHBot moved [[107th Street - Beverly Hills (Metra)]]... True 107th Street - Beverly Hills (Metra) #REDIRECT [[107th Street–Beverly Hills (Metra ...
85 595645951 2014-02-15T22:21:07Z DanTD DanTD moved page [[107th Street – Beverly Hill... True 107th Street – Beverly Hills (Metra) #REDIRECT [[107th Street–Beverly Hills (Metra ...
86 708956917 2016-03-08T11:40:00Z AnomieBOT Redirecting to [[:107th Street – Beverly Hills... True 107th Street - Beverly Hills (Metra station) #REDIRECT [[107th Street–Beverly Hills (Metra ...
87 741676092 2016-09-29T00:45:47Z Kew Gardens 613 Kew Gardens 613 moved page [[107th Street – Be... True 107th Street – Beverly Hills (Metra station) #REDIRECT [[107th Street–Beverly Hills (Metra ...
88 741689784 2016-09-29T02:26:08Z AnomieBOT Redirecting to [[:107th Street–Beverly Hills (... True 107th Street-Beverly Hills (Metra station) #REDIRECT [[:107th Street–Beverly Hills (Metra...
89 739682478 2016-09-16T08:36:22Z Ham II Ham II moved page [[107-123 Muswell Hill Road]... True 107-123 Muswell Hill Road #REDIRECT [[107–123 Muswell Hill Road]]\n{{R f...
90 297057880 2009-06-18T00:02:08Z Koavf moved [[10: 1993-2003 - Ten Years Of]] to [[10... True 10: 1993-2003 - Ten Years Of #REDIRECT [[10: 1993–2003 – Ten Years Of]]\n\n...
91 334053726 2009-12-26T05:09:29Z DASHBot moved [[10 Jahre - Best Of]] to [[10 Jahre – B... True 10 Jahre - Best Of #REDIRECT [[10 Jahre – Best Of]]\n\n{{R from m...
92 742542349 2016-10-04T09:22:20Z Caldorwards4 Caldorwards4 moved page [[10 Years of Greatest... True 10 Years of Greatest Hits #REDIRECT [[10 Years of Greatest Hits – Newly ...
93 742542415 2016-10-04T09:22:59Z Caldorwards4 Caldorwards4 moved page [[10 Years of Hits – N... True 10 Years of Hits – Newly Recorded #REDIRECT [[10 Years of Greatest Hits – Newly ...
94 742549520 2016-10-04T10:32:23Z AnomieBOT Redirecting to [[:10 Years of Hits – Newly Rec... True 10 Years of Hits - Newly Recorded #REDIRECT [[:10 Years of Greatest Hits – Newly...
95 742549542 2016-10-04T10:32:33Z AnomieBOT Redirecting to [[:10 Years of Greatest Hits – ... True 10 Years of Greatest Hits - Newly Recorded #REDIRECT [[:10 Years of Greatest Hits – Newly...
96 708964044 2016-03-08T12:43:12Z AnomieBOT Redirecting to [[:10 år – En snäll mans bekänn... True 10 år - En snäll mans bekännelser #REDIRECT [[:10 år – En snäll mans bekännelser...
97 725567194 2016-06-16T13:32:26Z RjwilmsiBot /* top */Create redirect for title with diacri... True 10 ar – En snall mans bekannelser #REDIRECT [[10 år – En snäll mans bekännelser]...
98 725618421 2016-06-16T20:21:04Z AnomieBOT Redirecting to [[:10 ar – En snall mans bekann... True 10 ar - En snall mans bekannelser #REDIRECT [[:10 år – En snäll mans bekännelser...
99 334054590 2009-12-26T05:18:04Z DASHBot moved [[10th MMC - Kyustendil]] to [[10th MMC ... True 10th MMC - Kyustendil #REDIRECT [[10th MMC – Kyustendil]]\n\n{{R fro...

100 rows × 7 columns


In [52]:
oldest_rev


Out[52]:
{'_content_model': 'wikitext', 'revid': 334054590, 'text': None, 'timestamp': Timestamp(2009, 12, 26, 5, 18, 4), 'rollbacktoken': None, 'minor': False, 'comment': 'moved [[10th MMC - Kyustendil]] to [[10th MMC – Kyustendil]]: Robot moving pages:per [[WP:HYPHEN]] ([[User talk:Tim1357|error?]])', 'anon': False, '_parent_id': 0, '_sha1': '833116308709346a871892a2e15435cb19163bcc', 'user': 'DASHBot'}

In [58]:
page.isRedirectPage


Out[58]:
<bound method Page.isRedirectPage of Page('10th MMC - Kyustendil')>

In [ ]:


In [72]:
def get_subset_data(subset_df):
    revs = []
    error_count = 0
    for r in subset_df.iterrows():
        redirect_page = r[1].page_title
        try:
            page = pywikibot.Page(site, redirect_page)
            oldest_rev = page.oldest_revision
            revs.append((oldest_rev.revid, oldest_rev.timestamp.isoformat(), oldest_rev.user, oldest_rev.comment, page.isRedirectPage(), page.title(), page.text))
        except Exception as e:
            error_count = error_count + 1
  
    revs_df = pd.DataFrame(revs, columns=["revid", "timestamp", "user", "comment", "is_redirect", "page_title", "page_text"])
    print(type(revs_df))
    
    return revs_df, error_count

In [73]:
errors = 0
total_df = pd.DataFrame(columns=["revid", "timestamp", "user", "comment", "is_redirect", "page_title", "page_text"])
total_pages = round(len(redirects_df)/100) + 1

for count in range(0,total_pages):
    print("Starting ", count)
    subset_df = redirects_df[count:count+100]
    
    revs_df, error_count = get_subset_data(subset_df)
    total_df = pd.concat([total_df, revs_df])
    
    total_df.to_csv("enwiki-redirects-endash-processed.tsv", sep="\t")
    total_df.to_pickle("enwiki-redirects-endash-processed.pickle")
        
    errors = errors + error_count
    
    print(count, errors)


Starting  0
<class 'pandas.core.frame.DataFrame'>
0 0
Starting  1
<class 'pandas.core.frame.DataFrame'>
1 0
Starting  2
<class 'pandas.core.frame.DataFrame'>
2 0
Starting  3
<class 'pandas.core.frame.DataFrame'>
3 0
Starting  4
<class 'pandas.core.frame.DataFrame'>
4 0
Starting  5
<class 'pandas.core.frame.DataFrame'>
5 0
Starting  6
<class 'pandas.core.frame.DataFrame'>
6 0
Starting  7
<class 'pandas.core.frame.DataFrame'>
7 0
Starting  8
<class 'pandas.core.frame.DataFrame'>
8 0
Starting  9
<class 'pandas.core.frame.DataFrame'>
9 0
Starting  10
<class 'pandas.core.frame.DataFrame'>
10 0
Starting  11
<class 'pandas.core.frame.DataFrame'>
11 0
Starting  12
<class 'pandas.core.frame.DataFrame'>
12 0
Starting  13
<class 'pandas.core.frame.DataFrame'>
13 0
Starting  14
<class 'pandas.core.frame.DataFrame'>
14 0
Starting  15
<class 'pandas.core.frame.DataFrame'>
15 0
Starting  16
<class 'pandas.core.frame.DataFrame'>
16 0
Starting  17
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-73-a7e9a5b24d4e> in <module>()
      7     subset_df = redirects_df[count:count+100]
      8 
----> 9     revs_df, error_count = get_subset_data(subset_df)
     10     total_df = pd.concat([total_df, revs_df])
     11 

<ipython-input-72-b142a5c982b1> in get_subset_data(subset_df)
      7             page = pywikibot.Page(site, redirect_page)
      8             oldest_rev = page.oldest_revision
----> 9             revs.append((oldest_rev.revid, oldest_rev.timestamp.isoformat(), oldest_rev.user, oldest_rev.comment, page.isRedirectPage(), page.title(), page.text))
     10         except Exception as e:
     11             error_count = error_count + 1

/srv/paws/pwb/pywikibot/page.py in text(self)
    583         if not hasattr(self, '_text') or self._text is None:
    584             try:
--> 585                 self._text = self.get(get_redirect=True)
    586             except pywikibot.NoPage:
    587                 # TODO: what other exceptions might be returned?

/srv/paws/pwb/pywikibot/tools/__init__.py in wrapper(*__args, **__kw)
   1446                              cls, depth)
   1447                     del __kw[old_arg]
-> 1448             return obj(*__args, **__kw)
   1449 
   1450         if not __debug__:

/srv/paws/pwb/pywikibot/page.py in get(self, force, get_redirect, sysop)
    443             del self.latest_revision_id
    444         try:
--> 445             self._getInternals(sysop)
    446         except pywikibot.IsRedirectPage:
    447             if not get_redirect:

/srv/paws/pwb/pywikibot/page.py in _getInternals(self, sysop)
    473         if self._latest_cached_revision() is None:
    474             try:
--> 475                 self.site.loadrevisions(self, getText=True, sysop=sysop)
    476             except (pywikibot.NoPage, pywikibot.SectionError) as e:
    477                 self._getexception = e

/srv/paws/pwb/pywikibot/site.py in loadrevisions(self, page, getText, revids, startid, endid, starttime, endtime, rvdir, user, excludeuser, section, sysop, step, total, rollback)
   3938             rvgen.set_maximum_items(-1)  # suppress use of rvlimit parameter
   3939 
-> 3940         for pagedata in rvgen:
   3941             if not self.sametitle(pagedata['title'],
   3942                                   page.title(withSection=False)):

/srv/paws/pwb/pywikibot/data/api.py in __iter__(self)
   2751                         _logger)
   2752             if not hasattr(self, "data"):
-> 2753                 self.data = self.request.submit()
   2754             if not self.data or not isinstance(self.data, dict):
   2755                 pywikibot.debug(

/srv/paws/pwb/pywikibot/data/api.py in submit(self)
   1952                 rawdata = http.request(
   1953                     site=self.site, uri=uri, method='GET' if use_get else 'POST',
-> 1954                     body=body, headers=headers)
   1955             except Server504Error:
   1956                 pywikibot.log(u"Caught HTTP 504 error; retrying")

/srv/paws/pwb/pywikibot/tools/__init__.py in wrapper(*__args, **__kw)
   1446                              cls, depth)
   1447                     del __kw[old_arg]
-> 1448             return obj(*__args, **__kw)
   1449 
   1450         if not __debug__:

/srv/paws/pwb/pywikibot/comms/http.py in request(site, uri, method, params, body, headers, data, **kwargs)
    326     headers['user-agent'] = user_agent(site, format_string)
    327 
--> 328     r = fetch(baseuri, method, params, body, headers, **kwargs)
    329     return r.content
    330 

/srv/paws/pwb/pywikibot/comms/http.py in fetch(uri, method, params, body, headers, default_error_handling, use_fake_user_agent, data, **kwargs)
    508             headers['user-agent'] = fake_user_agent()
    509 
--> 510     request = _enqueue(uri, method, params, body, headers, **kwargs)
    511     assert(request._data is not None)  # if there's no data in the answer we're in trouble
    512     # Run the error handling callback in the callers thread so exceptions

/srv/paws/pwb/pywikibot/comms/http.py in _enqueue(uri, method, params, body, headers, data, **kwargs)
    465     request = threadedhttp.HttpRequest(
    466         uri, method, params, body, all_headers, callbacks, **kwargs)
--> 467     _http_process(session, request)
    468     return request
    469 

/srv/paws/pwb/pywikibot/comms/http.py in _http_process(session, http_request)
    379         response = session.request(method, uri, params=params, data=body,
    380                                    headers=headers, auth=auth, timeout=timeout,
--> 381                                    verify=not ignore_validation)
    382     except Exception as e:
    383         http_request.data = e

/srv/paws/lib/python3.4/site-packages/requests/sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
    472             hooks = hooks,
    473         )
--> 474         prep = self.prepare_request(req)
    475 
    476         proxies = proxies or {}

/srv/paws/lib/python3.4/site-packages/requests/sessions.py in prepare_request(self, request)
    405             auth=merge_setting(auth, self.auth),
    406             cookies=merged_cookies,
--> 407             hooks=merge_hooks(request.hooks, self.hooks),
    408         )
    409         return p

/srv/paws/lib/python3.4/site-packages/requests/models.py in prepare(self, method, url, headers, files, data, params, auth, cookies, hooks, json)
    304         self.prepare_cookies(cookies)
    305         self.prepare_body(data, files, json)
--> 306         self.prepare_auth(auth, url)
    307 
    308         # Note that prepare_auth must be last to enable authentication schemes

/srv/paws/lib/python3.4/site-packages/requests/models.py in prepare_auth(self, auth, url)
    541 
    542             # Allow auth to make its changes.
--> 543             r = auth(self)
    544 
    545             # Update self to reflect the auth changes.

/srv/paws/lib/python3.4/site-packages/requests_oauthlib/oauth1_auth.py in __call__(self, r)
     78             r.headers['Content-Type'] = CONTENT_TYPE_FORM_URLENCODED
     79             r.url, headers, r.body = self.client.sign(
---> 80                 unicode(r.url), unicode(r.method), r.body or '', r.headers)
     81         elif self.force_include_body:
     82             # To allow custom clients to work on non form encoded bodies.

/srv/paws/lib/python3.4/site-packages/oauthlib/oauth1/rfc5849/__init__.py in sign(self, uri, http_method, body, headers, realm)
    311         # generate the signature
    312         request.oauth_params.append(
--> 313             ('oauth_signature', self.get_oauth_signature(request)))
    314 
    315         # render the signed request and return it

/srv/paws/lib/python3.4/site-packages/oauthlib/oauth1/rfc5849/__init__.py in get_oauth_signature(self, request)
    131             uri_query=urlparse.urlparse(uri).query,
    132             body=body,
--> 133             headers=headers)
    134         log.debug("Collected params: {0}".format(collected_params))
    135 

/srv/paws/lib/python3.4/site-packages/oauthlib/oauth1/rfc5849/signature.py in collect_parameters(uri_query, body, headers, exclude_oauth_signature, with_realm)
    292 
    293     # TODO: enforce header param inclusion conditions
--> 294     bodyparams = extract_params(body) or []
    295     params.extend(bodyparams)
    296 

/srv/paws/lib/python3.4/site-packages/oauthlib/common.py in extract_params(raw)
    172     if isinstance(raw, bytes_type) or isinstance(raw, unicode_type):
    173         try:
--> 174             params = urldecode(raw)
    175         except ValueError:
    176             params = None

/srv/paws/lib/python3.4/site-packages/oauthlib/common.py in urldecode(query)
    159 
    160     # unicode all the things
--> 161     return decode_params_utf8(params)
    162 
    163 

/srv/paws/lib/python3.4/site-packages/oauthlib/common.py in decode_params_utf8(params)
    106         decoded.append((
    107             k.decode('utf-8') if isinstance(k, bytes_type) else k,
--> 108             v.decode('utf-8') if isinstance(v, bytes_type) else v))
    109     return decoded
    110 

KeyboardInterrupt: