In [42]:
import pandas as pd

In [43]:
#configurations that need to be done
file_name = 'politician-data-0-100' #make sure that the file is parallel to this program

In [44]:
pol = pd.read_csv(file_name,sep='\t')
pol.head()


Out[44]:
Unnamed: 0 #DBpURL ID WikiURL gender name birthDate deathDate occupation nationality party
0 0 http://dbpedia.org/resource/Quincy_Timberlake 11 http://en.wikipedia.org/wiki/Quincy_Timberlake male [' quincy timberlake ', ' timberlake quincy '] [' 1980-04-22 '] NaN ['politician'] ['kenyan'] NaN
1 1 http://dbpedia.org/resource/Fanny_Edelman 29 http://en.wikipedia.org/wiki/Fanny_Edelman female [' fanny edelman ', ' edelman fanny '] [' 1911-02-27 '] [' 2011-11-01 '] ['politician'] ['argentine'] NaN
2 2 http://dbpedia.org/resource/Stojan_Novaković 65 http://en.wikipedia.org/wiki/Stojan_Novaković NaN [' stojan novaković ', ' (стојан новаковић) ',... [' 1842-11-01 '] [' 1915-02-18 '] ['historian', 'philology', 'politician', 'dipl... ['serbian'] [' serbian progressive party (historical) ']
3 3 http://dbpedia.org/resource/Paul_Scheffer 102 http://en.wikipedia.org/wiki/Paul_Scheffer male [' scheffer paul '] [' 1954-09-03 '] NaN ['politician'] NaN NaN
4 4 http://dbpedia.org/resource/Ed_Robb 127 http://en.wikipedia.org/wiki/Ed_Robb male NaN NaN NaN ['politician'] ['american'] NaN

In [45]:
handle_list = [x.split('/')[-1] for x in pol['WikiURL']]
handle_list


Out[45]:
['Quincy_Timberlake',
 'Fanny_Edelman',
 'Stojan_Novaković',
 'Paul_Scheffer',
 'Ed_Robb',
 'Nizamettin_Erkmen',
 'Claudio_Scajola',
 'Lawrence_Lual_Lual',
 'Thomas_Clausen_(Louisiana)',
 'Yang_Ti-liang',
 'Georges_Colombier',
 'Gottlieb_Duttweiler',
 'Mohammad_Natsir',
 'Ján_Ševčík',
 'Georges_Othily',
 'Athanasios_N._Miaoulis',
 'Kristalina_Georgieva',
 'Otto_Hoetzsch',
 'Rafiq_Ahmed_Jamali',
 'Carolyn_Maloney',
 'Sir_Gilbert_Pickering,_1st_Baronet',
 'Karel_Rüütli',
 'Todd_Rokita',
 'Salmir_Kaplan',
 'Ramón_Jiménez_Fuentes',
 'Krasen_Kralev',
 'Denver_Butler',
 'Massimo_Giorgetti',
 'Nancy_Shukri',
 'Julie_Green',
 'Elżbieta_Pierzchała',
 'Brice_Lalonde',
 'Chandrasiri_Gajadeera',
 'Slavica_Đukić_Dejanović',
 'Ho_Dam',
 'José_Piñera_Carvallo',
 'Bruce_Jesson',
 'Thomas_Ravenel',
 'Akhilesh_Yadav',
 'Luc_Recordon',
 'Benjamin_D._Dwinnell',
 'José_Francisco_Rábago',
 'Gustaw_Przeczek',
 'Jacques_Lavoie',
 'Nessa_Childers',
 'S._Nijamudeen',
 'María_García_Quiroz',
 'Tunji_Olurin',
 'Jalagam_Vengala_Rao',
 'Jim_McGovern_(British_politician)',
 'Mohammad_Reza_Aref',
 'Alexander_Hermann,_Count_of_Wartensleben',
 'Ali_Ahsan_Mohammad_Mojaheed',
 'Faustino_Félix_Chávez',
 'Saadaldeen_Talib',
 'Laxmikant_Parsekar',
 'Tulsidas_Jadhav',
 'Rigoberta_Menchú',
 'Lucie_Leblanc',
 'David_Milwyn_Duggan',
 'John_Wilson_(Mid_Durham_MP)',
 'Yōsuke_Tsuruho',
 'Mark_Souder',
 'Benjamin_Boyd_(South_Carolina)',
 'Joaquín_Abril_Martorell',
 'Martin_Lidegaard',
 'Paul_Janson',
 'Iain_Smith_(Scottish_politician)',
 'Jacques_Germeaux',
 'R._Avudaiyappan',
 'Al_Smith',
 'William_Gupton',
 'Bill_Malarky',
 'Jeanette_Dousdebes_Rubio',
 'Buzz_Thomas',
 'Sachin_Ahir',
 'Maria_Lohela',
 'Carlos_Morales_Vázquez',
 'Daniel_Spagnou',
 'Antoine_Wright_(politician)',
 'Pawan_Kumar_Tinu',
 'Sardar_Tufail_Ahmad_Khan_Mayo',
 'Mohammad_Khan_Qajar',
 'Tom_Tauke',
 'Wojciech_Olejniczak',
 'Ayisha_Osori',
 'Samuel_Miller_Quincy',
 'Arsalan_Fathipour',
 'Claus_Nissen_Riiber_Berg',
 'Alun_Davies_(politician)',
 'Muhammad_Khan_Achakzai',
 'Dragan_Đilas',
 'Elżbieta_Bieńkowska',
 'Sándor_Rónai',
 'V._J._Sukselainen',
 'Peter_Bossman',
 'Muhammad_Fazal_Karim',
 'Hortensia_Aragón_Castillo',
 'Walter_E._Fauntroy',
 'Leopold_De_Wael']

In [46]:
handle_frame = pd.DataFrame(data=handle_list, columns=['handle'])
handle_frame.head()


Out[46]:
handle
0 Quincy_Timberlake
1 Fanny_Edelman
2 Stojan_Novaković
3 Paul_Scheffer
4 Ed_Robb

In [47]:
handle_frame['finished_reading'] = False
handle_frame['time_taken_in_mins'] = 0.0
handle_frame.head()


Out[47]:
handle finished_reading time_taken_in_mins
0 Quincy_Timberlake False 0.0
1 Fanny_Edelman False 0.0
2 Stojan_Novaković False 0.0
3 Paul_Scheffer False 0.0
4 Ed_Robb False 0.0

In [48]:
handle_frame.to_csv(file_name+'-tracker.csv')

In [ ]: