In [30]:
!wget https://quarry.wmflabs.org/run/160592/output/0/tsv?download=true -O enwiki-redirects-endash-20170308.tsv
--2017-03-08 18:18:21-- https://quarry.wmflabs.org/run/160592/output/0/tsv?download=true
Resolving quarry.wmflabs.org (quarry.wmflabs.org)... 10.68.21.68
Connecting to quarry.wmflabs.org (quarry.wmflabs.org)|10.68.21.68|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/csv]
Saving to: ‘enwiki-redirects-endash-20170308.tsv’
enwiki-redirects-en [ <=> ] 26.40M 1.81MB/s in 12s
2017-03-08 18:18:33 (2.12 MB/s) - ‘enwiki-redirects-endash-20170308.tsv’ saved [27684056]
In [1]:
import pywikibot
site = pywikibot.Site('en', 'wikipedia')
In [86]:
site.log
In [79]:
import pandas as pd
import numpy as np
import seaborn as sns
%matplotlib inline
import pickle
In [31]:
redirects_df = pd.read_csv("enwiki-redirects-endash-20170308.tsv", sep="\t")
len(redirects_df)
Out[31]:
330470
In [82]:
error_count = 0
total_df = pd.DataFrame(columns=["revid", "timestamp", "user", "comment", "is_redirect", "page_title", "page_namespace", "page_text"])
count = 0
errors = []
for r in redirects_df.iterrows():
revs = []
redirect_page = r[1].page_title
try:
page = pywikibot.Page(site, redirect_page)
oldest_rev = page.oldest_revision
revs.append((oldest_rev.revid, oldest_rev.timestamp.isoformat(), oldest_rev.user, oldest_rev.comment, page.isRedirectPage(), page.title(), page.namespace().id, page.text))
except Exception as e:
errors.append(r[1].page_title)
error_count = error_count + 1
rev_df = pd.DataFrame(revs, columns=["revid", "timestamp", "user", "comment", "is_redirect", "page_title", "page_namespace", "page_text"])
total_df = pd.concat([total_df, rev_df])
count = count + 1
if count % 100 == 0:
total_df.to_csv("enwiki-redirects-endash-processed.tsv", sep="\t")
total_df.to_pickle("enwiki-redirects-endash-processed.pickle")
with open('enwiki-redirects-endash-errors.pickle', 'wb') as fp:
pickle.dump(errors, fp)
with open('enwiki-redirects-endash-errors.tsv', 'wb') as ft:
for item in errors:
ft.write(bytes(item + "\n", 'UTF-8'))
print(count, error_count)
100 0
200 1
300 2
400 2
500 2
---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
<ipython-input-82-2f8f626ee732> in <module>()
8 try:
9 page = pywikibot.Page(site, redirect_page)
---> 10 oldest_rev = page.oldest_revision
11 revs.append((oldest_rev.revid, oldest_rev.timestamp.isoformat(), oldest_rev.user, oldest_rev.comment, page.isRedirectPage(), page.title(), page.text))
12 except Exception as e:
/srv/paws/pwb/pywikibot/page.py in oldest_revision(self)
765 @rtype: L{Revision}
766 """
--> 767 return next(self.revisions(reverse=True, total=1))
768
769 def isRedirectPage(self):
/srv/paws/pwb/pywikibot/tools/__init__.py in wrapper(*__args, **__kw)
1446 cls, depth)
1447 del __kw[old_arg]
-> 1448 return obj(*__args, **__kw)
1449
1450 if not __debug__:
/srv/paws/pwb/pywikibot/page.py in revisions(self, reverse, total, content, rollback, starttime, endtime)
1632 self.site.loadrevisions(self, getText=content, rvdir=reverse,
1633 starttime=starttime, endtime=endtime,
-> 1634 total=total, rollback=rollback)
1635 return (self._revisions[rev] for rev in
1636 sorted(self._revisions, reverse=not reverse)[:total])
/srv/paws/pwb/pywikibot/site.py in loadrevisions(self, page, getText, revids, startid, endid, starttime, endtime, rvdir, user, excludeuser, section, sysop, step, total, rollback)
3938 rvgen.set_maximum_items(-1) # suppress use of rvlimit parameter
3939
-> 3940 for pagedata in rvgen:
3941 if not self.sametitle(pagedata['title'],
3942 page.title(withSection=False)):
/srv/paws/pwb/pywikibot/data/api.py in __iter__(self)
2751 _logger)
2752 if not hasattr(self, "data"):
-> 2753 self.data = self.request.submit()
2754 if not self.data or not isinstance(self.data, dict):
2755 pywikibot.debug(
/srv/paws/pwb/pywikibot/data/api.py in submit(self)
1952 rawdata = http.request(
1953 site=self.site, uri=uri, method='GET' if use_get else 'POST',
-> 1954 body=body, headers=headers)
1955 except Server504Error:
1956 pywikibot.log(u"Caught HTTP 504 error; retrying")
/srv/paws/pwb/pywikibot/tools/__init__.py in wrapper(*__args, **__kw)
1446 cls, depth)
1447 del __kw[old_arg]
-> 1448 return obj(*__args, **__kw)
1449
1450 if not __debug__:
/srv/paws/pwb/pywikibot/comms/http.py in request(site, uri, method, params, body, headers, data, **kwargs)
326 headers['user-agent'] = user_agent(site, format_string)
327
--> 328 r = fetch(baseuri, method, params, body, headers, **kwargs)
329 return r.content
330
/srv/paws/pwb/pywikibot/comms/http.py in fetch(uri, method, params, body, headers, default_error_handling, use_fake_user_agent, data, **kwargs)
508 headers['user-agent'] = fake_user_agent()
509
--> 510 request = _enqueue(uri, method, params, body, headers, **kwargs)
511 assert(request._data is not None) # if there's no data in the answer we're in trouble
512 # Run the error handling callback in the callers thread so exceptions
/srv/paws/pwb/pywikibot/comms/http.py in _enqueue(uri, method, params, body, headers, data, **kwargs)
465 request = threadedhttp.HttpRequest(
466 uri, method, params, body, all_headers, callbacks, **kwargs)
--> 467 _http_process(session, request)
468 return request
469
/srv/paws/pwb/pywikibot/comms/http.py in _http_process(session, http_request)
379 response = session.request(method, uri, params=params, data=body,
380 headers=headers, auth=auth, timeout=timeout,
--> 381 verify=not ignore_validation)
382 except Exception as e:
383 http_request.data = e
/srv/paws/lib/python3.4/site-packages/requests/sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
472 hooks = hooks,
473 )
--> 474 prep = self.prepare_request(req)
475
476 proxies = proxies or {}
/srv/paws/lib/python3.4/site-packages/requests/sessions.py in prepare_request(self, request)
405 auth=merge_setting(auth, self.auth),
406 cookies=merged_cookies,
--> 407 hooks=merge_hooks(request.hooks, self.hooks),
408 )
409 return p
/srv/paws/lib/python3.4/site-packages/requests/models.py in prepare(self, method, url, headers, files, data, params, auth, cookies, hooks, json)
304 self.prepare_cookies(cookies)
305 self.prepare_body(data, files, json)
--> 306 self.prepare_auth(auth, url)
307
308 # Note that prepare_auth must be last to enable authentication schemes
/srv/paws/lib/python3.4/site-packages/requests/models.py in prepare_auth(self, auth, url)
541
542 # Allow auth to make its changes.
--> 543 r = auth(self)
544
545 # Update self to reflect the auth changes.
/srv/paws/lib/python3.4/site-packages/requests_oauthlib/oauth1_auth.py in __call__(self, r)
78 r.headers['Content-Type'] = CONTENT_TYPE_FORM_URLENCODED
79 r.url, headers, r.body = self.client.sign(
---> 80 unicode(r.url), unicode(r.method), r.body or '', r.headers)
81 elif self.force_include_body:
82 # To allow custom clients to work on non form encoded bodies.
/srv/paws/lib/python3.4/site-packages/oauthlib/oauth1/rfc5849/__init__.py in sign(self, uri, http_method, body, headers, realm)
311 # generate the signature
312 request.oauth_params.append(
--> 313 ('oauth_signature', self.get_oauth_signature(request)))
314
315 # render the signed request and return it
/srv/paws/lib/python3.4/site-packages/oauthlib/oauth1/rfc5849/__init__.py in get_oauth_signature(self, request)
126 self.resource_owner_secret)
127
--> 128 uri, headers, body = self._render(request)
129
130 collected_params = signature.collect_parameters(
/srv/paws/lib/python3.4/site-packages/oauthlib/oauth1/rfc5849/__init__.py in _render(self, request, formencode, realm)
208 if self.signature_type == SIGNATURE_TYPE_AUTH_HEADER:
209 headers = parameters.prepare_headers(
--> 210 request.oauth_params, request.headers, realm=realm)
211 elif self.signature_type == SIGNATURE_TYPE_BODY and request.decoded_body is not None:
212 body = parameters.prepare_form_encoded_body(
/srv/paws/lib/python3.4/site-packages/oauthlib/oauth1/rfc5849/utils.py in wrapper(params, *args, **kwargs)
29 def wrapper(params, *args, **kwargs):
30 params = filter_oauth_params(params)
---> 31 return target(params, *args, **kwargs)
32
33 wrapper.__doc__ = target.__doc__
/srv/paws/lib/python3.4/site-packages/oauthlib/oauth1/rfc5849/parameters.py in prepare_headers(oauth_params, headers, realm)
49 # field as follows:
50 authorization_header_parameters_parts = []
---> 51 for oauth_parameter_name, value in oauth_params:
52 # 1. Parameter names and values are encoded per Parameter Encoding
53 # (`Section 3.6`_)
KeyboardInterrupt:
In [76]:
rev_df
Out[76]:
revid
timestamp
user
comment
is_redirect
page_title
page_text
0
545870185
2013-03-21T03:45:13Z
The Emperor's New Spy
The Emperor's New Spy moved page [[1550–1600 i...
True
1550–1600 in fashion
#REDIRECT [[1550–1600 in Western European fash...
In [85]:
page.namespace().id
Out[85]:
0
In [ ]:
In [71]:
revs_df
Out[71]:
[(334052434,
'2009-12-26T04:55:57Z',
'DASHBot',
'moved [["FF.SS." - Cioè: "...che mi hai portato a fare sopra a Posillipo se non mi vuoi più bene?"]] to [["FF.SS." – Cioè: "...che mi hai portato a fare sopra a Posillipo se non mi vuoi più bene?"]]: Robot moving pages:per [[WP:HYPHEN]] ([[User talk',
True,
'"FF.SS." - Cioè: "...che mi hai portato a fare sopra a Posillipo se non mi vuoi più bene?"',
'#REDIRECT [["FF.SS." – Cioè: "...che mi hai portato a fare sopra a Posillipo se non mi vuoi più bene?"]]\n\n{{R from modification}}\n{{R from move}}'),
(415186497,
'2011-02-21T19:55:28Z',
'DerBorg',
'[[WP:AES|←]]Redirected page to [["FF.SS." – Cioè: "...che mi hai portato a fare sopra a Posillipo se non mi vuoi più bene?"]]',
True,
'FF.SS.',
'#REDIRECT [["FF.SS." – Cioè: "...che mi hai portato a fare sopra a Posillipo se non mi vuoi più bene?"]]'),
(475846929,
'2012-02-08T23:25:55Z',
'RjwilmsiBot',
'Create redirect for title with diacritics using [[Project:AWB|AWB]] (7952)',
True,
'"FF.SS." – Cioe: "...che mi hai portato a fare sopra a Posillipo se non mi vuoi piu bene?"',
'#REDIRECT [["FF.SS." – Cioè: "...che mi hai portato a fare sopra a Posillipo se non mi vuoi più bene?"]] {{R from title without diacritics}}'),
(586979837,
'2013-12-20T17:31:33Z',
'Cavarrone',
'[[WP:AES|←]]Redirected page to [["FF.SS." – Cioè: "...che mi hai portato a fare sopra a Posillipo se non mi vuoi più bene?"]]',
True,
'F.F.S.S., cioè.. che mi hai portato a fare sopra a Posillipo se non mi vuoi più bene?',
'#REDIRECT [["FF.SS." – Cioè: "...che mi hai portato a fare sopra a Posillipo se non mi vuoi più bene?"]]'),
(708965298,
'2016-03-08T12:52:52Z',
'AnomieBOT',
'Redirecting to [[:"FF.SS." – Cioe: "...che mi hai portato a fare sopra a Posillipo se non mi vuoi piu bene?"]] because titles with en-dashes are hard to type (and resolving the double redirect to [[:"FF.SS." – Cioè: "...che mi hai portato a fare s...',
True,
'"FF.SS." - Cioe: "...che mi hai portato a fare sopra a Posillipo se non mi vuoi piu bene?"',
'#REDIRECT [[:"FF.SS." – Cioè: "...che mi hai portato a fare sopra a Posillipo se non mi vuoi più bene?"]]\n{{Redr|from modification|p1={{-r|"FF.SS." – Cioe: "...che mi hai portato a fare sopra a Posillipo se non mi vuoi piu bene?"}}}}'),
(20470716,
'2005-08-07T12:03:02Z',
'Adz',
'create redirect',
True,
'Port lotniczy Szczecin-Goleniów',
'#REDIRECT [["Solidarity" Szczecin–Goleniów Airport]]'),
(22077099,
'2005-08-29T04:52:37Z',
'Adz',
'create redirect',
True,
'Szczecin International Airport',
'#REDIRECT [["Solidarity" Szczecin–Goleniów Airport]]'),
(43444571,
'2006-03-12T15:52:25Z',
'Balcer',
'redir',
True,
'Szczecin-Goleniów Airport',
'#REDIRECT [["Solidarity" Szczecin–Goleniów Airport]]'),
(66093939,
'2006-07-27T03:56:58Z',
'Mareklug',
'{{R from title without diacritics}}',
True,
'Szczecin-Goleniow "Solidarnosc" Airport',
'#REDIRECT [["Solidarity" Szczecin–Goleniów Airport]] {{R from title without diacritics}}'),
(66094262,
'2006-07-27T03:59:40Z',
'Mareklug',
'redirect',
True,
'Szczecin-Goleniow Solidarity Airport',
'#REDIRECT [["Solidarity" Szczecin–Goleniów Airport]]'),
(147052256,
'2007-07-25T19:09:04Z',
'Targeman',
'redirect',
True,
'Szz',
'#REDIRECT [["Solidarity" Szczecin–Goleniów Airport]]'),
(178066046,
'2007-12-15T12:34:08Z',
'Zyxw',
'[[WP:AES|←]]Redirected page to [[Szczecin-Goleniów "Solidarność" Airport]]',
True,
'Szczecin-Goleniów "Solidarnosc" Airport',
'#REDIRECT [["Solidarity" Szczecin–Goleniów Airport]]'),
(199229641,
'2008-03-19T00:42:13Z',
'Eubot',
'Redirected page to [[Szczecin-Goleniów "Solidarność" Airport]]. ("Port lotniczy Szczecin-Goleniów").',
True,
'Port lotniczy Szczecin-Goleniow',
'#REDIRECT [["Solidarity" Szczecin–Goleniów Airport]] {{R from title without diacritics}}'),
(199721147,
'2008-03-21T00:38:48Z',
'Eubot',
'Redirected page to [[Szczecin-Goleniów "Solidarność" Airport]]. ("Szczecin-Goleniów Airport").',
True,
'Szczecin-Goleniow Airport',
'#REDIRECT [["Solidarity" Szczecin–Goleniów Airport]] {{R from title without diacritics}}'),
(261295911,
'2009-01-01T18:30:17Z',
'Uzdzislaw',
'[[WP:AES|←]]Redirected page to [[Szczecin-Goleniów "Solidarność" Airport]]',
True,
'SZZ',
'#REDIRECT [["Solidarity" Szczecin–Goleniów Airport]]\n{{R from airport code}}'),
(301278464,
'2009-07-09T23:26:40Z',
'Nappyrootslistener',
'[[WP:AES|←]]Redirected page to [[Szczecin-Goleniów "Solidarność" Airport]]',
True,
'Solidarność Airport',
'#REDIRECT [["Solidarity" Szczecin–Goleniów Airport]]'),
(301278560,
'2009-07-09T23:27:17Z',
'Nappyrootslistener',
'[[WP:AES|←]]Redirected page to [[Szczecin-Goleniów "Solidarność" Airport]]',
True,
'Solidarnosc Airport',
'#REDIRECT [["Solidarity" Szczecin–Goleniów Airport]]'),
(478076640,
'2012-02-21T15:08:41Z',
'Igor alexandrov',
'moved [[Szczecin-Goleniów "Solidarność" Airport]] to [["Solidarity" Szczecin-Goleniów Airport]]: proper English name',
True,
'Szczecin-Goleniów "Solidarność" Airport',
'#REDIRECT [["Solidarity" Szczecin–Goleniów Airport]]'),
(495325659,
'2012-05-31T17:26:12Z',
'RjwilmsiBot',
'Create redirect for title with diacritics using [[Project:AWB|AWB]] (8073)',
True,
'"Solidarity" Szczecin-Goleniow Airport',
'#REDIRECT [["Solidarity" Szczecin–Goleniów Airport]] {{R from title without diacritics}}'),
(555161539,
'2013-05-15T03:40:18Z',
'Cherkash',
'Cherkash moved page [["Solidarity" Szczecin-Goleniów Airport]] to [["Solidarity" Szczecin–Goleniów Airport]]: fixed a dash',
True,
'"Solidarity" Szczecin-Goleniów Airport',
'#REDIRECT [["Solidarity" Szczecin–Goleniów Airport]]'),
(626062909,
'2014-09-18T09:48:35Z',
'RjwilmsiBot',
'Create redirect for title with diacritics using [[Project:AWB|AWB]] (10447)',
True,
'"Solidarity" Szczecin–Goleniow Airport',
'#REDIRECT [["Solidarity" Szczecin–Goleniów Airport]] {{R from title without diacritics}}'),
(714079955,
'2016-04-07T14:23:12Z',
'Feminist',
'/* top */Redirecting to [["Solidarity" Szczecin–Goleniów Airport]] using [[Project:AWB|AWB]]',
True,
'"Solidarity" Szczecin–Goleniów (airport)',
'#REDIRECT [["Solidarity" Szczecin–Goleniów Airport]]\n{{R from alternative disambiguation}}'),
(714250580,
'2016-04-08T16:07:20Z',
'AnomieBOT',
'Redirecting to [[:"Solidarity" Szczecin–Goleniów (airport)]] because titles with en-dashes are hard to type (and resolving the double redirect to [[:"Solidarity" Szczecin–Goleniów Airport]]). Errors? [[User:AnomieBOT/shutoff/EnDashRedirectCreator]]',
True,
'"Solidarity" Szczecin-Goleniów (airport)',
'#REDIRECT [[:"Solidarity" Szczecin–Goleniów Airport]]\n{{Redr|from modification|p1={{-r|"Solidarity" Szczecin–Goleniów (airport)}}}}'),
(199768772,
'2008-03-21T05:19:58Z',
'Eubot',
'Redirected page to [[38 – Auch das war Wien]].',
True,
'38 - Auch das war Wien',
"#REDIRECT [['38 – Vienna Before the Fall]] {{R from title without diacritics}}\n{{R from modification}}"),
(628359230,
'2014-10-05T17:43:11Z',
'Lugnuts',
"Lugnuts moved page [[38 – Auch das war Wien]] to [['38 – Vienna Before the Fall]]: [[WP:NCF]], [[WP:UE]]",
True,
'38 – Auch das war Wien',
"#REDIRECT [['38 – Vienna Before the Fall]]\n\n{{R from move}}"),
(628360066,
'2014-10-05T17:51:01Z',
'Lugnuts',
"[[WP:AES|←]]Redirected page to [['38 – Vienna Before the Fall]]",
True,
"'38 - Vienna Before the Fall",
"#REDIRECT[['38 – Vienna Before the Fall]]"),
(628360093,
'2014-10-05T17:51:18Z',
'Lugnuts',
"[[WP:AES|←]]Redirected page to [['38 – Vienna Before the Fall]]",
True,
'38 – Vienna Before the Fall',
"#REDIRECT[['38 – Vienna Before the Fall]]"),
(628360127,
'2014-10-05T17:51:34Z',
'Lugnuts',
"[[WP:AES|←]]Redirected page to [['38 – Vienna Before the Fall]]",
True,
'38 - Vienna Before the Fall',
"#REDIRECT[['38 – Vienna Before the Fall]]"),
(9096674,
'2004-12-23T19:07:20Z',
'81.154.8.38',
'',
True,
"'64-'95",
"#REDIRECT [['64–'95]]\n{{R from modification}}"),
(236143794,
'2008-09-04T01:07:26Z',
'Koavf',
"moved [['64 - '95]] to [['64–'95]]: –",
True,
"'64 - '95",
"#REDIRECT [['64–'95]]"),
(172462904,
'2007-11-19T10:42:51Z',
'Bensin',
"#redirect [['74 - '75]]",
True,
"'74-'75",
"#REDIRECT [['74–'75]]\n{{R from modification}}"),
(334047672,
'2009-12-26T04:06:22Z',
'DASHBot',
"moved [['74 - '75]] to [['74 – '75]]: Robot moving pages: per [[WP:HYPHEN]] ([[User talk:Tim1357|error?]])",
True,
"'74 - '75",
"#REDIRECT [['74–'75]]\n\n{{R from modification}}\n{{R from move}}"),
(343067910,
'2010-02-10T02:15:01Z',
'Koavf',
"moved [['74 – '75]] to [['74–'75]]: [[WP:DASH]]",
True,
"'74 – '75",
"#REDIRECT [['74–'75]]\n\n{{R from modification}}\n{{R from move}}"),
(502025050,
'2012-07-13T08:33:43Z',
'Omegangel',
"[[WP:AES|←]]Redirected page to [['74–'75]]",
True,
'74 75',
"#REDIRECT [['74–'75]]"),
(704840462,
'2016-02-13T22:32:11Z',
'Smylers',
'Find the song even without the apostrophes',
True,
'74-75',
"#REDIRECT [['74–'75]]"),
(760435201,
'2017-01-16T23:40:04Z',
'Pigsonthewing',
"#REDIRECT[['74–'75]]",
True,
'74–75',
"#REDIRECT[['74–'75]]"),
(760435355,
'2017-01-16T23:41:32Z',
'Pigsonthewing',
'+',
True,
'74—75',
"#REDIRECT[['74–'75]]"),
(297057866,
'2009-06-18T00:02:04Z',
'Koavf',
"moved [['99-'00 Demos]] to [['99–'00 Demos]]: –",
True,
"'99-'00 Demos",
"#REDIRECT [['99–'00 Demos]]\n\n{{R from modification}}\n\n{{R unprintworthy}}"),
(17058946,
'2004-12-25T00:00:57Z',
'Lumidek',
'',
True,
'Hooft-Polyakov monopole',
"#REDIRECT [['t Hooft–Polyakov monopole]]"),
(273794069,
'2009-02-28T02:41:54Z',
'Legoktm',
"moved [['t Hooft-Polyakov monopole]] to [['t Hooft–Polyakov monopole]]: Moving page per [[WP:ENDASH]]",
True,
"'t Hooft-Polyakov monopole",
"#REDIRECT [['t Hooft–Polyakov monopole]]\n{{R from modification}}"),
(365570887,
'2010-06-02T04:02:55Z',
'Anthony Appleyard',
'moved [[(1952-19??)]] to [[(1952–19??)]]: Requested at [[Wikipedia:Requested moves]] as uncontroversial (http://en.wikipedia.org/w/index.php?title=Wikipedia:Requested_moves&oldid=365563522#movereq-.281952-19.3F.3F.29)&wpMovetalk=1',
True,
'(1952-19??)',
'#REDIRECT [[(1952–19??)]]\n{{R from modification}}'),
(690277281,
'2015-11-12T11:26:09Z',
'Faceless Enemy',
'Faceless Enemy moved page [[.45 Remington-Thompson]] to [[.45 Remington–Thompson]]: Per MOS mdash',
True,
'.45 Remington-Thompson',
'#REDIRECT [[.45 Remington–Thompson]]\n\n{{This is a redirect|from move|from incorrect punctuation|up}}'),
(395470118,
'2010-11-08T02:31:34Z',
'Octane',
'[[WP:AES|←]]Redirected page to [[.577/450 Martini-Henry]]',
True,
'.577/450',
'#REDIRECT [[.577/450 Martini–Henry]]'),
(662397894,
'2015-05-15T03:47:22Z',
'Faceless Enemy',
'[[WP:AES|←]]Redirected page to [[.577/450 Martini-Henry]]',
True,
'.450/577',
'#REDIRECT [[.577/450 Martini–Henry]]'),
(662397929,
'2015-05-15T03:47:45Z',
'Faceless Enemy',
'Faceless Enemy moved page [[.577/450 Martini-Henry]] to [[.577/450 Martini–Henry]]: ndash per [[Wikipedia:Manual_of_Style#Dashes]]',
True,
'.577/450 Martini-Henry',
'#REDIRECT [[.577/450 Martini–Henry]]\n{{R from move}}'),
(297057849,
'2009-06-18T00:02:01Z',
'Koavf',
'moved [[0079-0088]] to [[0079–0088]]',
True,
'0079-0088',
'#REDIRECT [[0079–0088]]\n\n{{R from modification}}\n\n{{R unprintworthy}}'),
(475231316,
'2012-02-05T15:17:54Z',
'RjwilmsiBot',
'creating redirects for pages with endashes in title using [[Project:AWB|AWB]] (7940)',
True,
'00 Schneider - Jagd auf Nihil Baxter',
'#REDIRECT [[00 Schneider – Jagd auf Nihil Baxter]] {{R from modification}}'),
(463616950,
'2011-12-02T05:03:11Z',
'DASHBot',
'moved [[0110111 - Quantum Physics & A Horseshoe]] to [[0110111 – Quantum Physics & A Horseshoe]]: [[WP:BOT|BOT]]: Moving page per [[WP:HYPHEN]].',
True,
'0110111 - Quantum Physics & A Horseshoe',
'#REDIRECT [[0110111 – Quantum Physics & A Horseshoe]] {{R from modification}}'),
(297057875,
'2009-06-18T00:02:06Z',
'Koavf',
'moved [[03/07-09/07]] to [[03/07–09/07]]: –',
True,
'03/07-09/07',
'#REDIRECT [[03/07–09/07]]\n\n{{R from modification}}\n\n{{R unprintworthy}}'),
(475231375,
'2012-02-05T15:18:21Z',
'RjwilmsiBot',
'creating redirects for pages with endashes in title using [[Project:AWB|AWB]] (7940)',
True,
'1. Mai - Helden bei der Arbeit',
'#REDIRECT [[1. Mai – Helden bei der Arbeit]] {{R from modification}}'),
(383597886,
'2010-09-08T06:56:48Z',
'David Eppstein',
'redirect with hyphen in place of en-dash',
True,
'1/3-2/3 conjecture',
'#REDIRECT [[1/3–2/3 conjecture]]\n{{R from modification}}'),
(384115936,
'2010-09-11T00:30:40Z',
'David Eppstein',
'redlink redirect',
True,
'The 1/3 − 2/3 conjecture',
'#REDIRECT [[1/3–2/3 conjecture]]'),
(120866901,
'2007-04-07T00:43:32Z',
'-5-',
'moved [[10/22/00 - Las Vegas Nevada]] to [[10/22/00 - Las Vegas, Nevada]]: Punctuation',
True,
'10/22/00 - Las Vegas Nevada',
'#REDIRECT [[10/22/00 – Las Vegas, Nevada]]\n{{R from modification}}'),
(317587888,
'2009-10-03T04:24:00Z',
'Koavf',
'moved [[10/22/00 - Las Vegas, Nevada]] to [[10/22/00 – Las Vegas, Nevada]]: ndash',
True,
'10/22/00 - Las Vegas, Nevada',
'#REDIRECT [[10/22/00 – Las Vegas, Nevada]]\n\n{{R from modification}}\n\n{{R unprintworthy}}'),
(120867146,
'2007-04-07T00:45:03Z',
'-5-',
'moved [[10/25/00 - San Diego California]] to [[10/25/00 - San Diego, California]]: Punctuation',
True,
'10/25/00 - San Diego California',
'#REDIRECT [[10/25/00 – San Diego, California]]\n{{R from modification}}'),
(317588002,
'2009-10-03T04:25:07Z',
'Koavf',
'moved [[10/25/00 - San Diego, California]] to [[10/25/00 – San Diego, California]]: ndash',
True,
'10/25/00 - San Diego, California',
'#REDIRECT [[10/25/00 – San Diego, California]]\n\n{{R from modification}}\n\n{{R unprintworthy}}'),
(120866749,
'2007-04-07T00:42:49Z',
'-5-',
'moved [[10/7/00 - Detroit Michigan]] to [[10/7/00 - Detroit, Michigan]]: Punctuation',
True,
'10/7/00 - Detroit Michigan',
'#REDIRECT [[10/7/00 – Detroit, Michigan]]\n{{R from modification}}'),
(317588012,
'2009-10-03T04:25:10Z',
'Koavf',
'moved [[10/7/00 - Detroit, Michigan]] to [[10/7/00 – Detroit, Michigan]]: ndash',
True,
'10/7/00 - Detroit, Michigan',
'#REDIRECT [[10/7/00 – Detroit, Michigan]]\n\n{{R from modification}}\n\n{{R unprintworthy}}'),
(120866816,
'2007-04-07T00:43:10Z',
'-5-',
'moved [[10/9/00 - Chicago Illinois]] to [[10/9/00 - Chicago, Illinois]]: Punctuation',
True,
'10/9/00 - Chicago Illinois',
'#REDIRECT [[10/9/00 – Chicago, Illinois]]\n{{R from modification}}'),
(317588022,
'2009-10-03T04:25:13Z',
'Koavf',
'moved [[10/9/00 - Chicago, Illinois]] to [[10/9/00 – Chicago, Illinois]]: ndash',
True,
'10/9/00 - Chicago, Illinois',
'#REDIRECT [[10/9/00 – Chicago, Illinois]]\n\n{{R from modification}}\n\n{{R unprintworthy}}'),
(764459710,
'2017-02-09T01:17:42Z',
'Editor 2050',
'[[WP:AES|←]]Redirected page to [[1000 – Oru Note Paranja Katha]]',
True,
'1000: Oru Note Paranja Katha',
'#REDIRECT [[1000 – Oru Note Paranja Katha]]'),
(764481180,
'2017-02-09T04:24:19Z',
'AnomieBOT',
'Redirecting to [[:1000 – Oru Note Paranja Katha]] because titles with en-dashes are hard to type. Errors? [[User:AnomieBOT/shutoff/EnDashRedirectCreator]]',
True,
'1000 - Oru Note Paranja Katha',
'#REDIRECT [[:1000 – Oru Note Paranja Katha]]\n{{Redirect shell|{{R from modification|1={{-r|1000 – Oru Note Paranja Katha}}}}}}\n{{User:AnomieBOT/Auto-G8|target=1000 – Oru Note Paranja Katha}}'),
(475231399,
'2012-02-05T15:18:32Z',
'RjwilmsiBot',
'creating redirects for pages with endashes in title using [[Project:AWB|AWB]] (7940)',
True,
'100 Jahre - Der Countdown',
'#REDIRECT [[100 Jahre – Der Countdown]] {{R from modification}}'),
(596328967,
'2014-02-20T12:42:19Z',
'DadaNeem',
'#REDIRECT [[100 Jahre – Der Countdown]]',
True,
'100 Years - The Countdown',
'#REDIRECT [[100 Jahre – Der Countdown]]'),
(40669679,
'2006-02-22T04:37:47Z',
'Larryv',
'create redirect',
True,
'103rd Street (IRT Broadway-Seventh Avenue Line station)',
'#REDIRECT [[103rd Street (IRT Broadway–Seventh Avenue Line)]]'),
(140830143,
'2007-06-26T22:25:15Z',
'Marc Shepherd',
'moved [[103rd Street (IRT Broadway-Seventh Avenue Line)]] to [[103rd Street (IRT Broadway–Seventh Avenue Line)]] over redirect: undo recent move per [[WP:DASH]]',
True,
'103rd Street (IRT Broadway-Seventh Avenue Line)',
'#REDIRECT [[103rd Street (IRT Broadway–Seventh Avenue Line)]]'),
(336884360,
'2010-01-09T23:04:33Z',
'Tinlinkin',
'redirect to [[103rd Street (IRT Broadway – Seventh Avenue Line)]]',
True,
'103rd Street (IRT Broadway - Seventh Avenue Line)',
'#REDIRECT [[103rd Street (IRT Broadway–Seventh Avenue Line)]] {{R from title without diacritics}}'),
(732171705,
'2016-07-30T03:28:57Z',
'Dicklyon',
'Dicklyon moved page [[103rd Street (IRT Broadway – Seventh Avenue Line)]] to [[103rd Street (IRT Broadway–Seventh Avenue Line)]] over redirect: unspace the en dash',
True,
'103rd Street (IRT Broadway – Seventh Avenue Line)',
'#REDIRECT [[103rd Street (IRT Broadway–Seventh Avenue Line)]]\n{{R from move}}'),
(185737150,
'2008-01-20T23:30:04Z',
'DanTD',
'moved [[103rd Sreet - Beverly Hills (Metra)]] to [[103rd Street - Beverly Hills (Metra)]]: title misspelled',
True,
'103rd Sreet - Beverly Hills (Metra)',
'#REDIRECT [[103rd Street–Beverly Hills (Metra station)]]'),
(334054074,
'2009-12-26T05:13:22Z',
'DASHBot',
'moved [[103rd Street - Beverly Hills (Metra)]] to [[103rd Street – Beverly Hills (Metra)]]: Robot moving pages:per [[WP:HYPHEN]] ([[User talk:Tim1357|error?]])',
True,
'103rd Street - Beverly Hills (Metra)',
'#REDIRECT [[103rd Street–Beverly Hills (Metra station)]]\n\n{{R from modification}}\n{{R from move}}'),
(595645871,
'2014-02-15T22:20:31Z',
'DanTD',
'DanTD moved page [[103rd Street – Beverly Hills (Metra)]] to [[103rd Street – Beverly Hills (Metra station)]]: conform to station naming conventions',
True,
'103rd Street – Beverly Hills (Metra)',
'#REDIRECT [[103rd Street–Beverly Hills (Metra station)]]\n{{R from move}}'),
(708983602,
'2016-03-08T15:08:12Z',
'AnomieBOT',
'Redirecting to [[:103rd Street – Beverly Hills (Metra station)]] because titles with en-dashes are hard to type. Errors? [[User:AnomieBOT/shutoff/EnDashRedirectCreator]]',
True,
'103rd Street - Beverly Hills (Metra station)',
'#REDIRECT [[103rd Street–Beverly Hills (Metra station)]]\n{{Redr|from modification|p1={{-r|103rd Street – Beverly Hills (Metra station)}}}}'),
(741675832,
'2016-09-29T00:43:31Z',
'Kew Gardens 613',
'Kew Gardens 613 moved page [[103rd Street – Beverly Hills (Metra station)]] to [[103rd Street–Beverly Hills (Metra station)]]',
True,
'103rd Street – Beverly Hills (Metra station)',
'#REDIRECT [[103rd Street–Beverly Hills (Metra station)]]\n{{R from move}}'),
(741689905,
'2016-09-29T02:26:58Z',
'AnomieBOT',
'Redirecting to [[:103rd Street–Beverly Hills (Metra station)]] because titles with en-dashes are hard to type. Errors? [[User:AnomieBOT/shutoff/EnDashRedirectCreator]]',
True,
'103rd Street-Beverly Hills (Metra station)',
'#REDIRECT [[:103rd Street–Beverly Hills (Metra station)]]\n{{Redr|from modification|p1={{-r|103rd Street–Beverly Hills (Metra station)}}}}'),
(5338631,
'2004-06-16T20:17:11Z',
'PZFUN',
'',
True,
'103rd Street-Corona Plaza (New York Subway)',
'#REDIRECT [[103rd Street–Corona Plaza (IRT Flushing Line)]]'),
(12866330,
'2005-01-31T23:40:32Z',
'SPUI',
'103rd Street-Corona Plaza (7-Flushing) moved to 103rd Street-Corona Plaza (IRT Flushing Line station)',
True,
'103rd Street-Corona Plaza (7-Flushing)',
'#REDIRECT [[103rd Street–Corona Plaza (IRT Flushing Line)]]'),
(37808383,
'2006-02-02T06:29:43Z',
'Larryv',
"moved [[103rd Street-Corona Plaza (IRT Flushing Line station)]] to [[103rd Street-Corona Plaza (IRT Flushing Line)]]: 'station' unnecessary",
True,
'103rd Street-Corona Plaza (IRT Flushing Line station)',
'#REDIRECT [[103rd Street–Corona Plaza (IRT Flushing Line)]]'),
(101911738,
'2007-01-20T01:16:34Z',
'NE2',
'[[WP:AES|←]]Redirected page to [[103rd Street-Corona Plaza (IRT Flushing Line)]]',
True,
'103rd Street (IRT Flushing Line)',
'#REDIRECT [[103rd Street–Corona Plaza (IRT Flushing Line)]]'),
(119997520,
'2007-04-03T16:00:31Z',
'NE2',
'[[WP:AES|←]]Redirected page to [[103rd Street–Corona Plaza (IRT Flushing Line)]]',
True,
'104th Street (IRT Flushing Line)',
'#REDIRECT [[103rd Street–Corona Plaza (IRT Flushing Line)]]'),
(141377844,
'2007-06-29T12:15:07Z',
'Marc Shepherd',
'moved [[103rd Street-Corona Plaza (IRT Flushing Line)]] to [[103rd Street–Corona Plaza (IRT Flushing Line)]] over redirect: undo recent move per standard at [[WP:DASH]]',
True,
'103rd Street-Corona Plaza (IRT Flushing Line)',
'#REDIRECT [[103rd Street–Corona Plaza (IRT Flushing Line)]]'),
(306180860,
'2009-08-05T09:58:34Z',
'Tinlinkin',
'redirect to [[103rd Street – Corona Plaza (IRT Flushing Line)]]',
True,
'103rd Street - Corona Plaza (IRT Flushing Line)',
'#REDIRECT [[103rd Street–Corona Plaza (IRT Flushing Line)]] {{R from title without diacritics}}\n{{R from modification}}'),
(731861522,
'2016-07-28T00:12:19Z',
'Epicgenius',
'Epicgenius moved page [[103rd Street – Corona Plaza (IRT Flushing Line)]] to [[103rd Street–Corona Plaza (IRT Flushing Line)]] over redirect: unspace endash',
True,
'103rd Street – Corona Plaza (IRT Flushing Line)',
'#REDIRECT [[103rd Street–Corona Plaza (IRT Flushing Line)]]\n{{R from move}}'),
(612302785,
'2014-06-10T03:24:02Z',
'Spyder Monkey',
'[[WP:AES|←]]Redirected page to [[104–128 South Side Square]]',
True,
'Buildings at 104-128 S. Side Sq.',
'#REDIRECT [[104–128 South Side Square]]'),
(708973935,
'2016-03-08T13:58:37Z',
'AnomieBOT',
'Redirecting to [[:104–128 South Side Square]] because titles with en-dashes are hard to type. Errors? [[User:AnomieBOT/shutoff/EnDashRedirectCreator]]',
True,
'104-128 South Side Square',
'#REDIRECT [[:104–128 South Side Square]]\n{{Redr|from modification|p1={{-r|104–128 South Side Square}}}}'),
(334054162,
'2009-12-26T05:14:09Z',
'DASHBot',
'moved [[107th Street - Beverly Hills (Metra)]] to [[107th Street – Beverly Hills (Metra)]]: Robot moving pages:per [[WP:HYPHEN]] ([[User talk:Tim1357|error?]])',
True,
'107th Street - Beverly Hills (Metra)',
'#REDIRECT [[107th Street–Beverly Hills (Metra station)]]\n\n{{R from modification}}\n{{R from move}}'),
(595645951,
'2014-02-15T22:21:07Z',
'DanTD',
'DanTD moved page [[107th Street – Beverly Hills (Metra)]] to [[107th Street – Beverly Hills (Metra station)]]: conform to station naming conventions',
True,
'107th Street – Beverly Hills (Metra)',
'#REDIRECT [[107th Street–Beverly Hills (Metra station)]]\n{{R from move}}'),
(708956917,
'2016-03-08T11:40:00Z',
'AnomieBOT',
'Redirecting to [[:107th Street – Beverly Hills (Metra station)]] because titles with en-dashes are hard to type. Errors? [[User:AnomieBOT/shutoff/EnDashRedirectCreator]]',
True,
'107th Street - Beverly Hills (Metra station)',
'#REDIRECT [[107th Street–Beverly Hills (Metra station)]]\n{{Redr|from modification|p1={{-r|107th Street – Beverly Hills (Metra station)}}}}'),
(741676092,
'2016-09-29T00:45:47Z',
'Kew Gardens 613',
'Kew Gardens 613 moved page [[107th Street – Beverly Hills (Metra station)]] to [[107th Street–Beverly Hills (Metra station)]]',
True,
'107th Street – Beverly Hills (Metra station)',
'#REDIRECT [[107th Street–Beverly Hills (Metra station)]]\n{{R from move}}'),
(741689784,
'2016-09-29T02:26:08Z',
'AnomieBOT',
'Redirecting to [[:107th Street–Beverly Hills (Metra station)]] because titles with en-dashes are hard to type. Errors? [[User:AnomieBOT/shutoff/EnDashRedirectCreator]]',
True,
'107th Street-Beverly Hills (Metra station)',
'#REDIRECT [[:107th Street–Beverly Hills (Metra station)]]\n{{Redr|from modification|p1={{-r|107th Street–Beverly Hills (Metra station)}}}}'),
(739682478,
'2016-09-16T08:36:22Z',
'Ham II',
'Ham II moved page [[107-123 Muswell Hill Road]] to [[107–123 Muswell Hill Road]]: [[MOS:DASH]]',
True,
'107-123 Muswell Hill Road',
'#REDIRECT [[107–123 Muswell Hill Road]]\n{{R from move}}'),
(297057880,
'2009-06-18T00:02:08Z',
'Koavf',
'moved [[10: 1993-2003 - Ten Years Of]] to [[10: 1993–2003 – Ten Years Of]]: –',
True,
'10: 1993-2003 - Ten Years Of',
'#REDIRECT [[10: 1993–2003 – Ten Years Of]]\n\n{{R from modification}}\n\n{{R unprintworthy}}'),
(334053726,
'2009-12-26T05:09:29Z',
'DASHBot',
'moved [[10 Jahre - Best Of]] to [[10 Jahre – Best Of]]: Robot moving pages:per [[WP:HYPHEN]] ([[User talk:Tim1357|error?]])',
True,
'10 Jahre - Best Of',
'#REDIRECT [[10 Jahre – Best Of]]\n\n{{R from modification}}\n{{R from move}}'),
(742542349,
'2016-10-04T09:22:20Z',
'Caldorwards4',
'Caldorwards4 moved page [[10 Years of Greatest Hits]] to [[10 Years of Hits – Newly Recorded]]: correct title',
True,
'10 Years of Greatest Hits',
'#REDIRECT [[10 Years of Greatest Hits – Newly Recorded]]'),
(742542415,
'2016-10-04T09:22:59Z',
'Caldorwards4',
'Caldorwards4 moved page [[10 Years of Hits – Newly Recorded]] to [[10 Years of Greatest Hits – Newly Recorded]]: oops',
True,
'10 Years of Hits – Newly Recorded',
'#REDIRECT [[10 Years of Greatest Hits – Newly Recorded]]\n{{R from move}}'),
(742549520,
'2016-10-04T10:32:23Z',
'AnomieBOT',
'Redirecting to [[:10 Years of Hits – Newly Recorded]] because titles with en-dashes are hard to type (and resolving the double redirect to [[:10 Years of Greatest Hits – Newly Recorded]]). Errors? [[User:AnomieBOT/shutoff/EnDashRedirectCreator]]',
True,
'10 Years of Hits - Newly Recorded',
'#REDIRECT [[:10 Years of Greatest Hits – Newly Recorded]]\n{{Redr|from modification|p1={{-r|10 Years of Hits – Newly Recorded}}}}'),
(742549542,
'2016-10-04T10:32:33Z',
'AnomieBOT',
'Redirecting to [[:10 Years of Greatest Hits – Newly Recorded]] because titles with en-dashes are hard to type. Errors? [[User:AnomieBOT/shutoff/EnDashRedirectCreator]]',
True,
'10 Years of Greatest Hits - Newly Recorded',
'#REDIRECT [[:10 Years of Greatest Hits – Newly Recorded]]\n{{Redr|from modification|p1={{-r|10 Years of Greatest Hits – Newly Recorded}}}}'),
(708964044,
'2016-03-08T12:43:12Z',
'AnomieBOT',
'Redirecting to [[:10 år – En snäll mans bekännelser]] because titles with en-dashes are hard to type. Errors? [[User:AnomieBOT/shutoff/EnDashRedirectCreator]]',
True,
'10 år - En snäll mans bekännelser',
'#REDIRECT [[:10 år – En snäll mans bekännelser]]\n{{Redr|from modification|p1={{-r|10 år – En snäll mans bekännelser}}}}'),
(725567194,
'2016-06-16T13:32:26Z',
'RjwilmsiBot',
'/* top */Create redirect for title with diacritics using [[Project:AWB|AWB]] (12022)',
True,
'10 ar – En snall mans bekannelser',
'#REDIRECT [[10 år – En snäll mans bekännelser]] {{R from title without diacritics}}'),
(725618421,
'2016-06-16T20:21:04Z',
'AnomieBOT',
'Redirecting to [[:10 ar – En snall mans bekannelser]] because titles with en-dashes are hard to type (and resolving the double redirect to [[:10 år – En snäll mans bekännelser]]). Errors? [[User:AnomieBOT/shutoff/EnDashRedirectCreator]]',
True,
'10 ar - En snall mans bekannelser',
'#REDIRECT [[:10 år – En snäll mans bekännelser]]\n{{Redr|from modification|p1={{-r|10 ar – En snall mans bekannelser}}}}'),
(334054590,
'2009-12-26T05:18:04Z',
'DASHBot',
'moved [[10th MMC - Kyustendil]] to [[10th MMC – Kyustendil]]: Robot moving pages:per [[WP:HYPHEN]] ([[User talk:Tim1357|error?]])',
True,
'10th MMC - Kyustendil',
'#REDIRECT [[10th MMC – Kyustendil]]\n\n{{R from modification}}\n{{R from move}}')]
In [32]:
subset_df = redirects_df[0:100]
subset_df
Out[32]:
page_id
page_title
rd_title
0
25560554
"FF.SS."_-_Cioè:_"...che_mi_hai_portato_a_fare...
"FF.SS."_–_Cioè:_"...che_mi_hai_portato_a_fare...
1
30962649
FF.SS.
"FF.SS."_–_Cioè:_"...che_mi_hai_portato_a_fare...
2
34657348
"FF.SS."_–_Cioe:_"...che_mi_hai_portato_a_fare...
"FF.SS."_–_Cioè:_"...che_mi_hai_portato_a_fare...
3
41418201
F.F.S.S.,_cioè.._che_mi_hai_portato_a_fare_sop...
"FF.SS."_–_Cioè:_"...che_mi_hai_portato_a_fare...
4
49679430
"FF.SS."_-_Cioe:_"...che_mi_hai_portato_a_fare...
"FF.SS."_–_Cioè:_"...che_mi_hai_portato_a_fare...
5
2398718
Port_lotniczy_Szczecin-Goleniów
"Solidarity"_Szczecin–Goleniów_Airport
6
2565845
Szczecin_International_Airport
"Solidarity"_Szczecin–Goleniów_Airport
7
4367474
Szczecin-Goleniów_Airport
"Solidarity"_Szczecin–Goleniów_Airport
8
6135639
Szczecin-Goleniow_"Solidarnosc"_Airport
"Solidarity"_Szczecin–Goleniów_Airport
9
6135674
Szczecin-Goleniow_Solidarity_Airport
"Solidarity"_Szczecin–Goleniów_Airport
10
12446084
Szz
"Solidarity"_Szczecin–Goleniów_Airport
11
14715365
Szczecin-Goleniów_"Solidarnosc"_Airport
"Solidarity"_Szczecin–Goleniów_Airport
12
16385404
Port_lotniczy_Szczecin-Goleniow
"Solidarity"_Szczecin–Goleniów_Airport
13
16444660
Szczecin-Goleniow_Airport
"Solidarity"_Szczecin–Goleniów_Airport
14
20915643
SZZ
"Solidarity"_Szczecin–Goleniów_Airport
15
23555019
Solidarność_Airport
"Solidarity"_Szczecin–Goleniów_Airport
16
23555025
Solidarnosc_Airport
"Solidarity"_Szczecin–Goleniów_Airport
17
34827897
Szczecin-Goleniów_"Solidarność"_Airport
"Solidarity"_Szczecin–Goleniów_Airport
18
35994818
"Solidarity"_Szczecin-Goleniow_Airport
"Solidarity"_Szczecin–Goleniów_Airport
19
39391434
"Solidarity"_Szczecin-Goleniów_Airport
"Solidarity"_Szczecin–Goleniów_Airport
20
43855356
"Solidarity"_Szczecin–Goleniow_Airport
"Solidarity"_Szczecin–Goleniów_Airport
21
50073414
"Solidarity"_Szczecin–Goleniów_(airport)
"Solidarity"_Szczecin–Goleniów_Airport
22
50084473
"Solidarity"_Szczecin-Goleniów_(airport)
"Solidarity"_Szczecin–Goleniów_Airport
23
16451346
38_-_Auch_das_war_Wien
'38_–_Vienna_Before_the_Fall
24
44031004
38_–_Auch_das_war_Wien
'38_–_Vienna_Before_the_Fall
25
44031050
'38_-_Vienna_Before_the_Fall
'38_–_Vienna_Before_the_Fall
26
44031053
38_–_Vienna_Before_the_Fall
'38_–_Vienna_Before_the_Fall
27
44031058
38_-_Vienna_Before_the_Fall
'38_–_Vienna_Before_the_Fall
28
1317664
'64-'95
'64–'95
29
19166400
'64_-_'95
'64–'95
...
...
...
...
70
41954081
103rd_Street_–_Beverly_Hills_(Metra)
103rd_Street–Beverly_Hills_(Metra_station)
71
49681032
103rd_Street_-_Beverly_Hills_(Metra_station)
103rd_Street–Beverly_Hills_(Metra_station)
72
51784682
103rd_Street_–_Beverly_Hills_(Metra_station)
103rd_Street–Beverly_Hills_(Metra_station)
73
51786492
103rd_Street-Beverly_Hills_(Metra_station)
103rd_Street–Beverly_Hills_(Metra_station)
74
730973
103rd_Street-Corona_Plaza_(New_York_Subway)
103rd_Street–Corona_Plaza_(IRT_Flushing_Line)
75
1453582
103rd_Street-Corona_Plaza_(7-Flushing)
103rd_Street–Corona_Plaza_(IRT_Flushing_Line)
76
3930501
103rd_Street-Corona_Plaza_(IRT_Flushing_Line_s...
103rd_Street–Corona_Plaza_(IRT_Flushing_Line)
77
9011853
103rd_Street_(IRT_Flushing_Line)
103rd_Street–Corona_Plaza_(IRT_Flushing_Line)
78
10440364
104th_Street_(IRT_Flushing_Line)
103rd_Street–Corona_Plaza_(IRT_Flushing_Line)
79
12005273
103rd_Street-Corona_Plaza_(IRT_Flushing_Line)
103rd_Street–Corona_Plaza_(IRT_Flushing_Line)
80
23872564
103rd_Street_-_Corona_Plaza_(IRT_Flushing_Line)
103rd_Street–Corona_Plaza_(IRT_Flushing_Line)
81
51182156
103rd_Street_–_Corona_Plaza_(IRT_Flushing_Line)
103rd_Street–Corona_Plaza_(IRT_Flushing_Line)
82
43013681
Buildings_at_104-128_S._Side_Sq.
104–128_South_Side_Square
83
49680236
104-128_South_Side_Square
104–128_South_Side_Square
84
25560634
107th_Street_-_Beverly_Hills_(Metra)
107th_Street–Beverly_Hills_(Metra_station)
85
41954085
107th_Street_–_Beverly_Hills_(Metra)
107th_Street–Beverly_Hills_(Metra_station)
86
49678424
107th_Street_-_Beverly_Hills_(Metra_station)
107th_Street–Beverly_Hills_(Metra_station)
87
51784708
107th_Street_–_Beverly_Hills_(Metra_station)
107th_Street–Beverly_Hills_(Metra_station)
88
51786482
107th_Street-Beverly_Hills_(Metra_station)
107th_Street–Beverly_Hills_(Metra_station)
89
51622846
107-123_Muswell_Hill_Road
107–123_Muswell_Hill_Road
90
23259687
10:_1993-2003_-_Ten_Years_Of
10:_1993–2003_–_Ten_Years_Of
91
25560608
10_Jahre_-_Best_Of
10_Jahre_–_Best_Of
92
51859414
10_Years_of_Greatest_Hits
10_Years_of_Greatest_Hits_–_Newly_Recorded
93
51859418
10_Years_of_Hits_–_Newly_Recorded
10_Years_of_Greatest_Hits_–_Newly_Recorded
94
51859856
10_Years_of_Hits_-_Newly_Recorded
10_Years_of_Greatest_Hits_–_Newly_Recorded
95
51859859
10_Years_of_Greatest_Hits_-_Newly_Recorded
10_Years_of_Greatest_Hits_–_Newly_Recorded
96
49679325
10_år_-_En_snäll_mans_bekännelser
10_år_–_En_snäll_mans_bekännelser
97
50825971
10_ar_–_En_snall_mans_bekannelser
10_år_–_En_snäll_mans_bekännelser
98
50831282
10_ar_-_En_snall_mans_bekannelser
10_år_–_En_snäll_mans_bekännelser
99
25560653
10th_MMC_-_Kyustendil
10th_MMC_–_Kyustendil
100 rows × 3 columns
In [59]:
revs = []
for r in subset_df.iterrows():
redirect_page = r[1].page_title
try:
page = pywikibot.Page(site, redirect_page)
oldest_rev = page.oldest_revision
revs.append((oldest_rev.revid, oldest_rev.timestamp.isoformat(), oldest_rev.user, oldest_rev.comment, page.isRedirectPage(), page.title(), page.text))
except Exception as e:
pass
In [60]:
pd.DataFrame(revs, columns=["revid", "timestamp", "user", "comment", "is_redirect", "page_title", "page_text"])
Out[60]:
revid
timestamp
user
comment
is_redirect
page_title
page_text
0
334052434
2009-12-26T04:55:57Z
DASHBot
moved [["FF.SS." - Cioè: "...che mi hai portat...
True
"FF.SS." - Cioè: "...che mi hai portato a fare...
#REDIRECT [["FF.SS." – Cioè: "...che mi hai po...
1
415186497
2011-02-21T19:55:28Z
DerBorg
[[WP:AES|←]]Redirected page to [["FF.SS." – Ci...
True
FF.SS.
#REDIRECT [["FF.SS." – Cioè: "...che mi hai po...
2
475846929
2012-02-08T23:25:55Z
RjwilmsiBot
Create redirect for title with diacritics usin...
True
"FF.SS." – Cioe: "...che mi hai portato a fare...
#REDIRECT [["FF.SS." – Cioè: "...che mi hai po...
3
586979837
2013-12-20T17:31:33Z
Cavarrone
[[WP:AES|←]]Redirected page to [["FF.SS." – Ci...
True
F.F.S.S., cioè.. che mi hai portato a fare sop...
#REDIRECT [["FF.SS." – Cioè: "...che mi hai po...
4
708965298
2016-03-08T12:52:52Z
AnomieBOT
Redirecting to [[:"FF.SS." – Cioe: "...che mi ...
True
"FF.SS." - Cioe: "...che mi hai portato a fare...
#REDIRECT [[:"FF.SS." – Cioè: "...che mi hai p...
5
20470716
2005-08-07T12:03:02Z
Adz
create redirect
True
Port lotniczy Szczecin-Goleniów
#REDIRECT [["Solidarity" Szczecin–Goleniów Air...
6
22077099
2005-08-29T04:52:37Z
Adz
create redirect
True
Szczecin International Airport
#REDIRECT [["Solidarity" Szczecin–Goleniów Air...
7
43444571
2006-03-12T15:52:25Z
Balcer
redir
True
Szczecin-Goleniów Airport
#REDIRECT [["Solidarity" Szczecin–Goleniów Air...
8
66093939
2006-07-27T03:56:58Z
Mareklug
{{R from title without diacritics}}
True
Szczecin-Goleniow "Solidarnosc" Airport
#REDIRECT [["Solidarity" Szczecin–Goleniów Air...
9
66094262
2006-07-27T03:59:40Z
Mareklug
redirect
True
Szczecin-Goleniow Solidarity Airport
#REDIRECT [["Solidarity" Szczecin–Goleniów Air...
10
147052256
2007-07-25T19:09:04Z
Targeman
redirect
True
Szz
#REDIRECT [["Solidarity" Szczecin–Goleniów Air...
11
178066046
2007-12-15T12:34:08Z
Zyxw
[[WP:AES|←]]Redirected page to [[Szczecin-Gole...
True
Szczecin-Goleniów "Solidarnosc" Airport
#REDIRECT [["Solidarity" Szczecin–Goleniów Air...
12
199229641
2008-03-19T00:42:13Z
Eubot
Redirected page to [[Szczecin-Goleniów "Solida...
True
Port lotniczy Szczecin-Goleniow
#REDIRECT [["Solidarity" Szczecin–Goleniów Air...
13
199721147
2008-03-21T00:38:48Z
Eubot
Redirected page to [[Szczecin-Goleniów "Solida...
True
Szczecin-Goleniow Airport
#REDIRECT [["Solidarity" Szczecin–Goleniów Air...
14
261295911
2009-01-01T18:30:17Z
Uzdzislaw
[[WP:AES|←]]Redirected page to [[Szczecin-Gole...
True
SZZ
#REDIRECT [["Solidarity" Szczecin–Goleniów Air...
15
301278464
2009-07-09T23:26:40Z
Nappyrootslistener
[[WP:AES|←]]Redirected page to [[Szczecin-Gole...
True
Solidarność Airport
#REDIRECT [["Solidarity" Szczecin–Goleniów Air...
16
301278560
2009-07-09T23:27:17Z
Nappyrootslistener
[[WP:AES|←]]Redirected page to [[Szczecin-Gole...
True
Solidarnosc Airport
#REDIRECT [["Solidarity" Szczecin–Goleniów Air...
17
478076640
2012-02-21T15:08:41Z
Igor alexandrov
moved [[Szczecin-Goleniów "Solidarność" Airpor...
True
Szczecin-Goleniów "Solidarność" Airport
#REDIRECT [["Solidarity" Szczecin–Goleniów Air...
18
495325659
2012-05-31T17:26:12Z
RjwilmsiBot
Create redirect for title with diacritics usin...
True
"Solidarity" Szczecin-Goleniow Airport
#REDIRECT [["Solidarity" Szczecin–Goleniów Air...
19
555161539
2013-05-15T03:40:18Z
Cherkash
Cherkash moved page [["Solidarity" Szczecin-Go...
True
"Solidarity" Szczecin-Goleniów Airport
#REDIRECT [["Solidarity" Szczecin–Goleniów Air...
20
626062909
2014-09-18T09:48:35Z
RjwilmsiBot
Create redirect for title with diacritics usin...
True
"Solidarity" Szczecin–Goleniow Airport
#REDIRECT [["Solidarity" Szczecin–Goleniów Air...
21
714079955
2016-04-07T14:23:12Z
Feminist
/* top */Redirecting to [["Solidarity" Szczeci...
True
"Solidarity" Szczecin–Goleniów (airport)
#REDIRECT [["Solidarity" Szczecin–Goleniów Air...
22
714250580
2016-04-08T16:07:20Z
AnomieBOT
Redirecting to [[:"Solidarity" Szczecin–Goleni...
True
"Solidarity" Szczecin-Goleniów (airport)
#REDIRECT [[:"Solidarity" Szczecin–Goleniów Ai...
23
199768772
2008-03-21T05:19:58Z
Eubot
Redirected page to [[38 – Auch das war Wien]].
True
38 - Auch das war Wien
#REDIRECT [['38 – Vienna Before the Fall]] {{R...
24
628359230
2014-10-05T17:43:11Z
Lugnuts
Lugnuts moved page [[38 – Auch das war Wien]] ...
True
38 – Auch das war Wien
#REDIRECT [['38 – Vienna Before the Fall]]\n\n...
25
628360066
2014-10-05T17:51:01Z
Lugnuts
[[WP:AES|←]]Redirected page to [['38 – Vienna ...
True
'38 - Vienna Before the Fall
#REDIRECT[['38 – Vienna Before the Fall]]
26
628360093
2014-10-05T17:51:18Z
Lugnuts
[[WP:AES|←]]Redirected page to [['38 – Vienna ...
True
38 – Vienna Before the Fall
#REDIRECT[['38 – Vienna Before the Fall]]
27
628360127
2014-10-05T17:51:34Z
Lugnuts
[[WP:AES|←]]Redirected page to [['38 – Vienna ...
True
38 - Vienna Before the Fall
#REDIRECT[['38 – Vienna Before the Fall]]
28
9096674
2004-12-23T19:07:20Z
81.154.8.38
True
'64-'95
#REDIRECT [['64–'95]]\n{{R from modification}}
29
236143794
2008-09-04T01:07:26Z
Koavf
moved [['64 - '95]] to [['64–'95]]: –
True
'64 - '95
#REDIRECT [['64–'95]]
...
...
...
...
...
...
...
...
70
595645871
2014-02-15T22:20:31Z
DanTD
DanTD moved page [[103rd Street – Beverly Hill...
True
103rd Street – Beverly Hills (Metra)
#REDIRECT [[103rd Street–Beverly Hills (Metra ...
71
708983602
2016-03-08T15:08:12Z
AnomieBOT
Redirecting to [[:103rd Street – Beverly Hills...
True
103rd Street - Beverly Hills (Metra station)
#REDIRECT [[103rd Street–Beverly Hills (Metra ...
72
741675832
2016-09-29T00:43:31Z
Kew Gardens 613
Kew Gardens 613 moved page [[103rd Street – Be...
True
103rd Street – Beverly Hills (Metra station)
#REDIRECT [[103rd Street–Beverly Hills (Metra ...
73
741689905
2016-09-29T02:26:58Z
AnomieBOT
Redirecting to [[:103rd Street–Beverly Hills (...
True
103rd Street-Beverly Hills (Metra station)
#REDIRECT [[:103rd Street–Beverly Hills (Metra...
74
5338631
2004-06-16T20:17:11Z
PZFUN
True
103rd Street-Corona Plaza (New York Subway)
#REDIRECT [[103rd Street–Corona Plaza (IRT Flu...
75
12866330
2005-01-31T23:40:32Z
SPUI
103rd Street-Corona Plaza (7-Flushing) moved t...
True
103rd Street-Corona Plaza (7-Flushing)
#REDIRECT [[103rd Street–Corona Plaza (IRT Flu...
76
37808383
2006-02-02T06:29:43Z
Larryv
moved [[103rd Street-Corona Plaza (IRT Flushin...
True
103rd Street-Corona Plaza (IRT Flushing Line s...
#REDIRECT [[103rd Street–Corona Plaza (IRT Flu...
77
101911738
2007-01-20T01:16:34Z
NE2
[[WP:AES|←]]Redirected page to [[103rd Street-...
True
103rd Street (IRT Flushing Line)
#REDIRECT [[103rd Street–Corona Plaza (IRT Flu...
78
119997520
2007-04-03T16:00:31Z
NE2
[[WP:AES|←]]Redirected page to [[103rd Street–...
True
104th Street (IRT Flushing Line)
#REDIRECT [[103rd Street–Corona Plaza (IRT Flu...
79
141377844
2007-06-29T12:15:07Z
Marc Shepherd
moved [[103rd Street-Corona Plaza (IRT Flushin...
True
103rd Street-Corona Plaza (IRT Flushing Line)
#REDIRECT [[103rd Street–Corona Plaza (IRT Flu...
80
306180860
2009-08-05T09:58:34Z
Tinlinkin
redirect to [[103rd Street – Corona Plaza (IRT...
True
103rd Street - Corona Plaza (IRT Flushing Line)
#REDIRECT [[103rd Street–Corona Plaza (IRT Flu...
81
731861522
2016-07-28T00:12:19Z
Epicgenius
Epicgenius moved page [[103rd Street – Corona ...
True
103rd Street – Corona Plaza (IRT Flushing Line)
#REDIRECT [[103rd Street–Corona Plaza (IRT Flu...
82
612302785
2014-06-10T03:24:02Z
Spyder Monkey
[[WP:AES|←]]Redirected page to [[104–128 South...
True
Buildings at 104-128 S. Side Sq.
#REDIRECT [[104–128 South Side Square]]
83
708973935
2016-03-08T13:58:37Z
AnomieBOT
Redirecting to [[:104–128 South Side Square]] ...
True
104-128 South Side Square
#REDIRECT [[:104–128 South Side Square]]\n{{Re...
84
334054162
2009-12-26T05:14:09Z
DASHBot
moved [[107th Street - Beverly Hills (Metra)]]...
True
107th Street - Beverly Hills (Metra)
#REDIRECT [[107th Street–Beverly Hills (Metra ...
85
595645951
2014-02-15T22:21:07Z
DanTD
DanTD moved page [[107th Street – Beverly Hill...
True
107th Street – Beverly Hills (Metra)
#REDIRECT [[107th Street–Beverly Hills (Metra ...
86
708956917
2016-03-08T11:40:00Z
AnomieBOT
Redirecting to [[:107th Street – Beverly Hills...
True
107th Street - Beverly Hills (Metra station)
#REDIRECT [[107th Street–Beverly Hills (Metra ...
87
741676092
2016-09-29T00:45:47Z
Kew Gardens 613
Kew Gardens 613 moved page [[107th Street – Be...
True
107th Street – Beverly Hills (Metra station)
#REDIRECT [[107th Street–Beverly Hills (Metra ...
88
741689784
2016-09-29T02:26:08Z
AnomieBOT
Redirecting to [[:107th Street–Beverly Hills (...
True
107th Street-Beverly Hills (Metra station)
#REDIRECT [[:107th Street–Beverly Hills (Metra...
89
739682478
2016-09-16T08:36:22Z
Ham II
Ham II moved page [[107-123 Muswell Hill Road]...
True
107-123 Muswell Hill Road
#REDIRECT [[107–123 Muswell Hill Road]]\n{{R f...
90
297057880
2009-06-18T00:02:08Z
Koavf
moved [[10: 1993-2003 - Ten Years Of]] to [[10...
True
10: 1993-2003 - Ten Years Of
#REDIRECT [[10: 1993–2003 – Ten Years Of]]\n\n...
91
334053726
2009-12-26T05:09:29Z
DASHBot
moved [[10 Jahre - Best Of]] to [[10 Jahre – B...
True
10 Jahre - Best Of
#REDIRECT [[10 Jahre – Best Of]]\n\n{{R from m...
92
742542349
2016-10-04T09:22:20Z
Caldorwards4
Caldorwards4 moved page [[10 Years of Greatest...
True
10 Years of Greatest Hits
#REDIRECT [[10 Years of Greatest Hits – Newly ...
93
742542415
2016-10-04T09:22:59Z
Caldorwards4
Caldorwards4 moved page [[10 Years of Hits – N...
True
10 Years of Hits – Newly Recorded
#REDIRECT [[10 Years of Greatest Hits – Newly ...
94
742549520
2016-10-04T10:32:23Z
AnomieBOT
Redirecting to [[:10 Years of Hits – Newly Rec...
True
10 Years of Hits - Newly Recorded
#REDIRECT [[:10 Years of Greatest Hits – Newly...
95
742549542
2016-10-04T10:32:33Z
AnomieBOT
Redirecting to [[:10 Years of Greatest Hits – ...
True
10 Years of Greatest Hits - Newly Recorded
#REDIRECT [[:10 Years of Greatest Hits – Newly...
96
708964044
2016-03-08T12:43:12Z
AnomieBOT
Redirecting to [[:10 år – En snäll mans bekänn...
True
10 år - En snäll mans bekännelser
#REDIRECT [[:10 år – En snäll mans bekännelser...
97
725567194
2016-06-16T13:32:26Z
RjwilmsiBot
/* top */Create redirect for title with diacri...
True
10 ar – En snall mans bekannelser
#REDIRECT [[10 år – En snäll mans bekännelser]...
98
725618421
2016-06-16T20:21:04Z
AnomieBOT
Redirecting to [[:10 ar – En snall mans bekann...
True
10 ar - En snall mans bekannelser
#REDIRECT [[:10 år – En snäll mans bekännelser...
99
334054590
2009-12-26T05:18:04Z
DASHBot
moved [[10th MMC - Kyustendil]] to [[10th MMC ...
True
10th MMC - Kyustendil
#REDIRECT [[10th MMC – Kyustendil]]\n\n{{R fro...
100 rows × 7 columns
In [52]:
oldest_rev
Out[52]:
{'_content_model': 'wikitext', 'revid': 334054590, 'text': None, 'timestamp': Timestamp(2009, 12, 26, 5, 18, 4), 'rollbacktoken': None, 'minor': False, 'comment': 'moved [[10th MMC - Kyustendil]] to [[10th MMC – Kyustendil]]: Robot moving pages:per [[WP:HYPHEN]] ([[User talk:Tim1357|error?]])', 'anon': False, '_parent_id': 0, '_sha1': '833116308709346a871892a2e15435cb19163bcc', 'user': 'DASHBot'}
In [58]:
page.isRedirectPage
Out[58]:
<bound method Page.isRedirectPage of Page('10th MMC - Kyustendil')>
In [ ]:
In [72]:
def get_subset_data(subset_df):
revs = []
error_count = 0
for r in subset_df.iterrows():
redirect_page = r[1].page_title
try:
page = pywikibot.Page(site, redirect_page)
oldest_rev = page.oldest_revision
revs.append((oldest_rev.revid, oldest_rev.timestamp.isoformat(), oldest_rev.user, oldest_rev.comment, page.isRedirectPage(), page.title(), page.text))
except Exception as e:
error_count = error_count + 1
revs_df = pd.DataFrame(revs, columns=["revid", "timestamp", "user", "comment", "is_redirect", "page_title", "page_text"])
print(type(revs_df))
return revs_df, error_count
In [73]:
errors = 0
total_df = pd.DataFrame(columns=["revid", "timestamp", "user", "comment", "is_redirect", "page_title", "page_text"])
total_pages = round(len(redirects_df)/100) + 1
for count in range(0,total_pages):
print("Starting ", count)
subset_df = redirects_df[count:count+100]
revs_df, error_count = get_subset_data(subset_df)
total_df = pd.concat([total_df, revs_df])
total_df.to_csv("enwiki-redirects-endash-processed.tsv", sep="\t")
total_df.to_pickle("enwiki-redirects-endash-processed.pickle")
errors = errors + error_count
print(count, errors)
Starting 0
<class 'pandas.core.frame.DataFrame'>
0 0
Starting 1
<class 'pandas.core.frame.DataFrame'>
1 0
Starting 2
<class 'pandas.core.frame.DataFrame'>
2 0
Starting 3
<class 'pandas.core.frame.DataFrame'>
3 0
Starting 4
<class 'pandas.core.frame.DataFrame'>
4 0
Starting 5
<class 'pandas.core.frame.DataFrame'>
5 0
Starting 6
<class 'pandas.core.frame.DataFrame'>
6 0
Starting 7
<class 'pandas.core.frame.DataFrame'>
7 0
Starting 8
<class 'pandas.core.frame.DataFrame'>
8 0
Starting 9
<class 'pandas.core.frame.DataFrame'>
9 0
Starting 10
<class 'pandas.core.frame.DataFrame'>
10 0
Starting 11
<class 'pandas.core.frame.DataFrame'>
11 0
Starting 12
<class 'pandas.core.frame.DataFrame'>
12 0
Starting 13
<class 'pandas.core.frame.DataFrame'>
13 0
Starting 14
<class 'pandas.core.frame.DataFrame'>
14 0
Starting 15
<class 'pandas.core.frame.DataFrame'>
15 0
Starting 16
<class 'pandas.core.frame.DataFrame'>
16 0
Starting 17
---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
<ipython-input-73-a7e9a5b24d4e> in <module>()
7 subset_df = redirects_df[count:count+100]
8
----> 9 revs_df, error_count = get_subset_data(subset_df)
10 total_df = pd.concat([total_df, revs_df])
11
<ipython-input-72-b142a5c982b1> in get_subset_data(subset_df)
7 page = pywikibot.Page(site, redirect_page)
8 oldest_rev = page.oldest_revision
----> 9 revs.append((oldest_rev.revid, oldest_rev.timestamp.isoformat(), oldest_rev.user, oldest_rev.comment, page.isRedirectPage(), page.title(), page.text))
10 except Exception as e:
11 error_count = error_count + 1
/srv/paws/pwb/pywikibot/page.py in text(self)
583 if not hasattr(self, '_text') or self._text is None:
584 try:
--> 585 self._text = self.get(get_redirect=True)
586 except pywikibot.NoPage:
587 # TODO: what other exceptions might be returned?
/srv/paws/pwb/pywikibot/tools/__init__.py in wrapper(*__args, **__kw)
1446 cls, depth)
1447 del __kw[old_arg]
-> 1448 return obj(*__args, **__kw)
1449
1450 if not __debug__:
/srv/paws/pwb/pywikibot/page.py in get(self, force, get_redirect, sysop)
443 del self.latest_revision_id
444 try:
--> 445 self._getInternals(sysop)
446 except pywikibot.IsRedirectPage:
447 if not get_redirect:
/srv/paws/pwb/pywikibot/page.py in _getInternals(self, sysop)
473 if self._latest_cached_revision() is None:
474 try:
--> 475 self.site.loadrevisions(self, getText=True, sysop=sysop)
476 except (pywikibot.NoPage, pywikibot.SectionError) as e:
477 self._getexception = e
/srv/paws/pwb/pywikibot/site.py in loadrevisions(self, page, getText, revids, startid, endid, starttime, endtime, rvdir, user, excludeuser, section, sysop, step, total, rollback)
3938 rvgen.set_maximum_items(-1) # suppress use of rvlimit parameter
3939
-> 3940 for pagedata in rvgen:
3941 if not self.sametitle(pagedata['title'],
3942 page.title(withSection=False)):
/srv/paws/pwb/pywikibot/data/api.py in __iter__(self)
2751 _logger)
2752 if not hasattr(self, "data"):
-> 2753 self.data = self.request.submit()
2754 if not self.data or not isinstance(self.data, dict):
2755 pywikibot.debug(
/srv/paws/pwb/pywikibot/data/api.py in submit(self)
1952 rawdata = http.request(
1953 site=self.site, uri=uri, method='GET' if use_get else 'POST',
-> 1954 body=body, headers=headers)
1955 except Server504Error:
1956 pywikibot.log(u"Caught HTTP 504 error; retrying")
/srv/paws/pwb/pywikibot/tools/__init__.py in wrapper(*__args, **__kw)
1446 cls, depth)
1447 del __kw[old_arg]
-> 1448 return obj(*__args, **__kw)
1449
1450 if not __debug__:
/srv/paws/pwb/pywikibot/comms/http.py in request(site, uri, method, params, body, headers, data, **kwargs)
326 headers['user-agent'] = user_agent(site, format_string)
327
--> 328 r = fetch(baseuri, method, params, body, headers, **kwargs)
329 return r.content
330
/srv/paws/pwb/pywikibot/comms/http.py in fetch(uri, method, params, body, headers, default_error_handling, use_fake_user_agent, data, **kwargs)
508 headers['user-agent'] = fake_user_agent()
509
--> 510 request = _enqueue(uri, method, params, body, headers, **kwargs)
511 assert(request._data is not None) # if there's no data in the answer we're in trouble
512 # Run the error handling callback in the callers thread so exceptions
/srv/paws/pwb/pywikibot/comms/http.py in _enqueue(uri, method, params, body, headers, data, **kwargs)
465 request = threadedhttp.HttpRequest(
466 uri, method, params, body, all_headers, callbacks, **kwargs)
--> 467 _http_process(session, request)
468 return request
469
/srv/paws/pwb/pywikibot/comms/http.py in _http_process(session, http_request)
379 response = session.request(method, uri, params=params, data=body,
380 headers=headers, auth=auth, timeout=timeout,
--> 381 verify=not ignore_validation)
382 except Exception as e:
383 http_request.data = e
/srv/paws/lib/python3.4/site-packages/requests/sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
472 hooks = hooks,
473 )
--> 474 prep = self.prepare_request(req)
475
476 proxies = proxies or {}
/srv/paws/lib/python3.4/site-packages/requests/sessions.py in prepare_request(self, request)
405 auth=merge_setting(auth, self.auth),
406 cookies=merged_cookies,
--> 407 hooks=merge_hooks(request.hooks, self.hooks),
408 )
409 return p
/srv/paws/lib/python3.4/site-packages/requests/models.py in prepare(self, method, url, headers, files, data, params, auth, cookies, hooks, json)
304 self.prepare_cookies(cookies)
305 self.prepare_body(data, files, json)
--> 306 self.prepare_auth(auth, url)
307
308 # Note that prepare_auth must be last to enable authentication schemes
/srv/paws/lib/python3.4/site-packages/requests/models.py in prepare_auth(self, auth, url)
541
542 # Allow auth to make its changes.
--> 543 r = auth(self)
544
545 # Update self to reflect the auth changes.
/srv/paws/lib/python3.4/site-packages/requests_oauthlib/oauth1_auth.py in __call__(self, r)
78 r.headers['Content-Type'] = CONTENT_TYPE_FORM_URLENCODED
79 r.url, headers, r.body = self.client.sign(
---> 80 unicode(r.url), unicode(r.method), r.body or '', r.headers)
81 elif self.force_include_body:
82 # To allow custom clients to work on non form encoded bodies.
/srv/paws/lib/python3.4/site-packages/oauthlib/oauth1/rfc5849/__init__.py in sign(self, uri, http_method, body, headers, realm)
311 # generate the signature
312 request.oauth_params.append(
--> 313 ('oauth_signature', self.get_oauth_signature(request)))
314
315 # render the signed request and return it
/srv/paws/lib/python3.4/site-packages/oauthlib/oauth1/rfc5849/__init__.py in get_oauth_signature(self, request)
131 uri_query=urlparse.urlparse(uri).query,
132 body=body,
--> 133 headers=headers)
134 log.debug("Collected params: {0}".format(collected_params))
135
/srv/paws/lib/python3.4/site-packages/oauthlib/oauth1/rfc5849/signature.py in collect_parameters(uri_query, body, headers, exclude_oauth_signature, with_realm)
292
293 # TODO: enforce header param inclusion conditions
--> 294 bodyparams = extract_params(body) or []
295 params.extend(bodyparams)
296
/srv/paws/lib/python3.4/site-packages/oauthlib/common.py in extract_params(raw)
172 if isinstance(raw, bytes_type) or isinstance(raw, unicode_type):
173 try:
--> 174 params = urldecode(raw)
175 except ValueError:
176 params = None
/srv/paws/lib/python3.4/site-packages/oauthlib/common.py in urldecode(query)
159
160 # unicode all the things
--> 161 return decode_params_utf8(params)
162
163
/srv/paws/lib/python3.4/site-packages/oauthlib/common.py in decode_params_utf8(params)
106 decoded.append((
107 k.decode('utf-8') if isinstance(k, bytes_type) else k,
--> 108 v.decode('utf-8') if isinstance(v, bytes_type) else v))
109 return decoded
110
KeyboardInterrupt:
Content source: staeiou/wiki-stat-notebooks
Similar notebooks: