In [11]:
import pandas as pd
In [12]:
mpdata = pd.read_json("data.json")
In [13]:
mpdata
Out[13]:
<class 'pandas.core.frame.DataFrame'>
Int64Index: 650 entries, 0 to 649
Data columns (total 6 columns):
interests 645 non-null values
name 650 non-null values
twfy_dob 396 non-null values
twfy_id 650 non-null values
wiki_dob 573 non-null values
wiki_url 629 non-null values
dtypes: int64(1), object(5)
In [14]:
mpdata.wiki_url
Out[14]:
0 http://en.wikipedia.org/wiki/Bridget_Phillipson
1 http://en.wikipedia.org/wiki/Sharon_Hodgson
2 http://en.wikipedia.org/wiki/Julie_Elliott
3 http://en.wikipedia.org/wiki/Naomi_Long
4 http://en.wikipedia.org/wiki/Ian_Paisley_Jr
5 http://en.wikipedia.org/wiki/Pat_Doherty
6 http://en.wikipedia.org/wiki/Kevan_Jones
7 http://en.wikipedia.org/wiki/Steve_Webb
8 http://en.wikipedia.org/wiki/Jenny_Chapman
9 http://en.wikipedia.org/wiki/Jeffrey_Donaldson
10 http://en.wikipedia.org/wiki/Hywel_Williams
11 http://en.wikipedia.org/wiki/Chris_Skidmore
12 http://en.wikipedia.org/wiki/Charles_Walker_(B...
13 http://en.wikipedia.org/wiki/Tom_Greatrex
14 http://en.wikipedia.org/wiki/David_Simpson_(UK...
...
635 None
636 None
637 http://en.wikipedia.org/wiki/Paul_Maskey
638 None
639 None
640 http://en.wikipedia.org/wiki/George_Galloway
641 None
642 None
643 None
644 None
645 None
646 None
647 None
648 http://en.wikipedia.org/wiki/Francie_Molloy
649 None
Name: wiki_url, Length: 650, dtype: object
In [15]:
a_dob = mpdata.twfy_dob.notnull() | mpdata.wiki_dob.notnull()
In [16]:
a_dob.value_counts()
Out[16]:
True 612
False 38
dtype: int64
In [17]:
ls
LICENSE constants.py.in-git looking-at-data.ipynb
MPofTheDay.iml constants.pyc resources/
MPofTheDay.sketch/ cron.py scripts/
README.md data.json
constants.py exploring-data/
In [18]:
import datetime as dt
import json
import re
import sys
import time
import urllib
import urllib2
In [19]:
import constants
In [20]:
url = urllib2.urlopen("http://www.theyworkforyou.com/api/getMPs?" + urllib.urlencode({
"key": constants.TWFY_API_KEY,
}))
all_members = json.loads(url.read().decode("latin-1").encode("utf-8"))
# make a list of all the MP IDs
all_ids = [x['person_id'] for x in all_members]
# make a comma separated list of all the MP IDs
all_ids_str = ','.join(all_ids)
# get a big json object of all the extra MP info!
url = "http://www.theyworkforyou.com/api/getMPsInfo?" + urllib.urlencode({
"key": constants.TWFY_API_KEY,
"id": all_ids_str,
})
url_data = urllib2.urlopen(url)
all_mp_extra_info = json.loads(url_data.read().decode("latin-1").encode("utf-8"))
In [21]:
all_mp_extra_info[all_ids[4]].keys()
Out[21]:
[u'public_whip_dreammp1109_absent',
u'public_whip_dreammp811_both_voted',
u'public_whip_dreammp826_absent',
u'public_whip_dreammp1110_absent',
u'public_whip_dreammp826_distance',
u'comments_on_speeches',
u'three_word_alliterations_rank_outof',
u'register_member_interests_date',
u'debate_sectionsspoken_inlastyear_rank_joint',
u'by_member_id',
u'reading_ease_rank_outof',
u'select_committees',
u'reading_ease',
u'debate_sectionsspoken_inlastyear_quintile',
u'wrans_departments',
u'public_whip_dreammp1109_both_voted',
u'wrans_asked_inlastyear_rank_outof',
u'public_whip_dreammp1132_distance',
u'wrans_asked_inlastyear_rank_joint',
u'public_whip_dreammp1084_absent',
u'public_whip_dreammp1109_distance',
u'public_whip_dreammp1132_absent',
u'public_whip_dreammp1084_both_voted',
u'public_whip_dreammp1053_distance',
u'public_whip_dreammp1110_both_voted',
u'public_whip_dreammp1074_distance',
u'comments_on_speeches_rank_joint',
u'debate_sectionsspoken_inlastyear_rank',
u'public_whip_dreammp826_both_voted',
u'public_whip_dreammp1065_both_voted',
u'public_whip_dreammp1074_absent',
u'comments_on_speeches_quintile',
u'wikipedia_url',
u'comments_on_speeches_rank',
u'public_whip_dreammp1132_both_voted',
u'public_whip_dreammp984_absent',
u'reading_year_rank_outof',
u'debate_sectionsspoken_inlastyear',
u'reading_ease_quintile',
u'public_whip_dreammp1065_distance',
u'public_whip_dreammp1110_distance',
u'public_whip_dreammp1084_distance',
u'three_word_alliterations',
u'three_word_alliterations_quintile',
u'reading_year_quintile',
u'public_whip_dreammp1065_absent',
u'wrans_asked_inlastyear',
u'public_whip_dreammp811_absent',
u'three_word_alliterations_rank_joint',
u'reading_ease_rank',
u'reading_year_rank',
u'public_whip_dreammp984_both_voted',
u'public_whip_dreammp811_distance',
u'public_whip_dreammp1052_distance',
u'public_whip_dreammp1124_absent',
u'wrans_asked_inlastyear_rank',
u'public_whip_dreammp1124_both_voted',
u'wrans_asked_inlastyear_quintile',
u'three_word_alliteration_content',
u'public_whip_dreammp1074_both_voted',
u'public_whip_dreammp1053_absent',
u'wrans_answered_inlastyear',
u'reading_year',
u'public_whip_dreammp1052_absent',
u'comments_on_speeches_rank_outof',
u'register_member_interests_html',
u'public_whip_dreammp1124_distance',
u'wrans_subjects',
u'public_whip_dreammp1052_both_voted',
u'debate_sectionsspoken_inlastyear_rank_outof',
u'select_committees_chair',
u'public_whip_dreammp984_distance',
u'public_whip_dreammp1053_both_voted',
u'three_word_alliterations_rank']
In [22]:
all_keys = set()
for info in all_mp_extra_info.values():
for k in info.keys():
all_keys.add(k)
In [24]:
all_keys
Out[24]:
set([u'expenses2008_colfamily_travel_a_rank',
u'expenses2007_col5c_quintile',
u'expenses2008_col6_rank_outof',
u'expenses2009_colmp_other_travel_d',
u'expenses2009_colmp_other_travel_b',
u'expenses2004_col8_rank_joint',
u'expenses2009_colmp_other_travel_a',
u'expenses2008_col5_quintile',
u'expenses2004_col7a_rank_joint',
u'expenses2004_col1',
u'expenses2009_colmp_other_travel_a_rank_joint',
u'expenses2009_colmp_other_travel_b_rank_outof',
u'expenses2007_col7_rank_outof',
u'expenses2005_col9_rank_joint',
u'expenses2002_col2_rank_joint',
u'expenses2009_colmp_reg_travel_b_rank',
u'expenses2004_col2_quintile',
u'expenses2008_colmp_other_travel_d_rank',
u'expenses2005_col7_rank',
u'expenses2008_colmp_reg_travel_b_rank',
u'writetothem_responsiveness_responded_outof_2007',
u'writetothem_responsiveness_responded_outof_2006',
u'writetothem_responsiveness_responded_outof_2005',
u'expenses2008_colmp_other_travel_d',
u'expenses2008_col6_quintile',
u'expenses2008_coltotal_travel',
u'expenses2008_colmp_other_travel_a',
u'expenses2008_colmp_other_travel_c',
u'expenses2008_colmp_other_travel_b',
u'expenses2009_colmp_reg_travel_d_quintile',
u'three_word_alliterations_quintile',
u'expenses2008_colfamily_travel_a_rank_joint',
u'expenses2009_colmp_other_travel_c',
u'expenses2005_col4_rank_joint',
u'expenses2005_col4_rank_outof',
u'expenses2008_col7a_quintile',
u'writetothem_sent_2006',
u'expenses2007_col9_rank',
u'three_word_alliteration_content',
u'expenses2002_col4_quintile',
u'expenses2007_col7_rank',
u'expenses2002_col6_rank',
u'expenses2004_col5_rank_outof',
u'expenses2008_col6_rank',
u'expenses2002_col8_rank_outof',
u'expenses2008_colmp_reg_travel_d_rank_joint',
u'expenses2008_coltotal_exc_travel_quintile',
u'expenses2002_col2_quintile',
u'expenses2009_total_rank_outof',
u'expenses2002_col8_quintile',
u'expenses2006_total',
u'public_whip_dreammp1077_distance',
u'expenses2008_colmp_reg_travel_d_rank',
u'wrans_asked_inlastyear_rank_joint',
u'writetothem_responsiveness_mean_2005_rank_outof',
u'expenses2008_col8_rank',
u'expenses2008_colfamily_travel_a_quintile',
u'expenses2004_col7_rank',
u'expenses2008_colfamily_travel_b',
u'expenses2008_colfamily_travel_a',
u'expenses2009_coltotal_travel',
u'expenses2002_col1_rank_joint',
u'expenses2008_colmp_reg_travel_a_rank_outof',
u'expenses2005_col2_rank_outof',
u'expenses2004_total_rank_joint',
u'expenses2008_col3_rank_outof',
u'expenses2009_col5_quintile',
u'expenses2009_colmp_reg_travel_a_rank_joint',
u'expenses2005_col5_rank_outof',
u'reading_ease_quintile',
u'expenses2007_col5f_rank',
u'expenses2007_col1_rank_outof',
u'expenses2005_total',
u'expenses2008_col9_rank_joint',
u'expenses2002_col6_rank_joint',
u'expenses2009_colfamily_travel_a_rank_outof',
u'expenses2003_col3_quintile',
u'public_whip_dreammp826_absent',
u'constituency',
u'expenses2004_col5_rank_joint',
u'expenses2009_colemployee_travel_b',
u'expenses2009_colemployee_travel_a',
u'expenses2009_colmp_other_travel_c_rank',
u'expenses2005_total_quintile',
u'expenses2007_total_quintile',
u'expenses2007_col5b_rank_joint',
u'expenses2004_col2_rank_joint',
u'expenses2007_col4_quintile',
u'expenses2005_col7a_rank_joint',
u'expenses2008_coltotal_exc_travel_rank',
u'expenses2005_col9_rank_outof',
u'writetothem_responsiveness_mean_2005_quintile',
u'expenses2003_col4_rank_outof',
u'expenses2002_col5_rank_joint',
u'expenses2009_col4',
u'expenses2009_col5',
u'expenses2008_colcomms_allowance_quintile',
u'expenses2009_col1',
u'expenses2005_col9_quintile',
u'expenses2009_col3',
u'expenses2009_colspouse_travel_a_quintile',
u'expenses2003_col4_rank_joint',
u'expenses2003_total_rank',
u'expenses2009_col5_rank_joint',
u'expenses2005_col1_rank_outof',
u'expenses2009_colcomms_allowance_rank',
u'public_whip_dreammp1074_distance',
u'expenses2003_col5_rank_outof',
u'expenses2005_col8_rank_joint',
u'public_whip_dreammp1110_distance',
u'public_whip_dreammp1084_distance',
u'public_whip_dreammp363_both_voted',
u'photo_attribution_text',
u'expenses2008_coltotal_exc_travel_rank_joint',
u'expenses2007_col5_rank_joint',
u'expenses2004_col9',
u'expenses2004_col5_quintile',
u'expenses2002_col9_quintile',
u'writetothem_responsiveness_responded_outof_2008',
u'reading_year_rank',
u'expenses2003_col7_rank_joint',
u'public_whip_dreammp856_distance',
u'public_whip_dreammp1124_distance',
u'public_whip_dreammp1052_both_voted',
u'three_word_alliterations_rank',
u'public_whip_dreammp1079_absent',
u'reading_year',
u'expenses2008_col4_quintile',
u'public_whip_dreammp1049_both_voted',
u'expenses2004_col7',
u'public_whip_dreammp975_absent',
u'expenses2009_colcomms_allowance',
u'expenses2005_col1_rank',
u'is_speaker_candidate',
u'expenses2007_col3_rank_joint',
u'expenses2008_colmp_other_travel_b_quintile',
u'expenses2008_total_quintile',
u'expenses2009_colstationery_rank_joint',
u'expenses2007_col5d_rank',
u'public_whip_dreammp1077_absent',
u'expenses2003_col8_rank',
u'comments_on_speeches_quintile',
u'journa_list_link',
u'expenses2009_colspouse_travel_a_rank',
u'expenses2002_col3_rank',
u'expenses2009_col9_rank_outof',
u'reading_year_rank_outof',
u'expenses2009_colmp_other_travel_b_rank',
u'expenses2005_col5_rank',
u'expenses2004_col7a',
u'expenses2007_col5_rank_outof',
u'expenses2005_col8_rank',
u'expenses2005_col7_quintile',
u'expenses2006_col7a',
u'public_whip_dreammp984_distance',
u'expenses2006_col1',
u'expenses2006_col3',
u'expenses2009_colmp_other_travel_b_rank_joint',
u'expenses2006_col5',
u'expenses2006_col4',
u'expenses2006_col7',
u'expenses2008_col1_quintile',
u'expenses2006_col9',
u'expenses2006_col8',
u'expenses2005_total_rank',
u'expenses2008_col2_rank',
u'comments_on_speeches_rank_joint',
u'expenses2009_colmp_reg_travel_c_rank_outof',
u'expenses2009_colmp_reg_travel_c_rank_joint',
u'expenses2009_col5_rank_outof',
u'public_whip_dreammp856_absent',
u'expenses2009_col6_quintile',
u'expenses2008_colfamily_travel_a_rank_outof',
u'expenses2007_col5e_rank_outof',
u'expenses2007_col7a_rank_outof',
u'expenses2007_col8_rank_outof',
u'expenses2002_col5_rank_outof',
u'writetothem_responsiveness_data_quality_category_2005',
u'writetothem_responsiveness_data_quality_category_2006',
u'writetothem_responsiveness_data_quality_category_2007',
u'expenses2007_col8_quintile',
u'public_whip_dreammp1084_both_voted',
u'expenses2002_col4_rank_outof',
u'writetothem_responsiveness_data_quality_category_2008',
u'date_of_birth',
u'expenses2009_col2_rank',
u'expenses2007_col1_quintile',
u'expenses2007_col5b_rank',
u'expenses2009_total_rank',
u'three_word_alliterations',
u'expenses2008_colmp_other_travel_b_rank',
u'writetothem_responsiveness_responded_2007',
u'writetothem_responsiveness_responded_2006',
u'writetothem_responsiveness_responded_2005',
u'expenses2009_colspouse_travel_a_rank_joint',
u'comments_on_speeches_rank',
u'expenses2004_col7_quintile',
u'expenses2007_col5a_quintile',
u'writetothem_responsiveness_responded_2008',
u'expenses2003_col2_quintile',
u'public_whip_dreammp996_distance',
u'expenses2008_colspouse_travel_a_rank_outof',
u'name',
u'expenses2007_col4_rank_outof',
u'expenses2008_colmp_reg_travel_b_rank_outof',
u'expenses2002_col8_rank_joint',
u'expenses2003_col7_rank',
u'expenses2007_col3_quintile',
u'public_whip_dreammp1079_both_voted',
u'expenses2009_colmp_reg_travel_b_quintile',
u'public_whip_dreammp1080_both_voted',
u'expenses2003_col5_rank',
u'expenses2009_col6_rank_joint',
u'public_whip_dreammp1053_both_voted',
u'expenses2004_col6_quintile',
u'expenses2005_col5_quintile',
u'expenses2008_col2_rank_outof',
u'maiden_speech',
u'expenses2005_col5_rank_joint',
u'expenses2008_colspouse_travel_a_rank',
u'expenses2008_col3_rank',
u'expenses2004_col9_rank_joint',
u'expenses2002_total_rank_outof',
u'expenses2008_colmp_reg_travel_d_quintile',
u'expenses2009_total_quintile',
u'expenses2008_col4_rank_joint',
u'expenses2009_colfamily_travel_a_rank',
u'expenses2008_total_rank_outof',
u'expenses2009_col9_rank',
u'expenses2009_col4_rank',
u'expenses2008_coltotal_travel_quintile',
u'expenses2003_col9_quintile',
u'expenses2003_total_rank_outof',
u'expenses2004_col9_rank',
u'expenses2002_col2_rank',
u'expenses2007_col8_rank',
u'expenses2009_col1_rank_outof',
u'expenses2007_col5b_rank_outof',
u'expenses2004_col4_rank_joint',
u'debate_sectionsspoken_inlastyear_rank_outof',
u'expenses2009_colmp_reg_travel_d_rank_outof',
u'expenses2007_col5c_rank',
u'expenses2002_total',
u'expenses2009_colmp_reg_travel_d_rank',
u'expenses2008_colmp_other_travel_c_quintile',
u'expenses2008_colcomms_allowance_rank_joint',
u'public_whip_dreammp856_both_voted',
u'expenses2009_coltotal_travel_quintile',
u'three_word_alliterations_rank_outof',
u'writetothem_responsiveness_fuzzy_response_description_2008',
u'reading_ease_rank_outof',
u'select_committees',
u'expenses2008_colmp_other_travel_c_rank_joint',
u'expenses2009_colmp_other_travel_c_quintile',
u'expenses2009_col5_rank',
u'writetothem_responsiveness_fuzzy_response_description_2005',
u'writetothem_responsiveness_fuzzy_response_description_2007',
u'writetothem_responsiveness_fuzzy_response_description_2006',
u'expenses2004_col7a_rank',
u'public_whip_dreammp1053_distance',
u'expenses2007_total_rank',
u'expenses2008_col3_quintile',
u'expenses2007_col7a',
u'expenses2005_col4_quintile',
u'expenses2007_col3_rank',
u'expenses2007_col2_quintile',
u'expenses2008_colmp_reg_travel_c',
u'expenses2008_colmp_reg_travel_b',
u'expenses2004_col9_rank_outof',
u'expenses2005_col7a_quintile',
u'expenses2002_col8_rank',
u'public_whip_dreammp1030_absent',
u'expenses2007_col7_quintile',
u'expenses2003_col6_quintile',
u'expenses2004_col8',
u'expenses2002_col5_rank',
u'expenses2006_col2',
u'expenses2008_col8_quintile',
u'expenses2009_colmp_other_travel_c_rank_joint',
u'expenses2009_col1_quintile',
u'expenses2008_colmp_reg_travel_a',
u'expenses2009_colmp_other_travel_d_rank_outof',
u'expenses2004_col4_quintile',
u'wrans_asked_inlastyear_rank',
u'expenses2007_col7a_quintile',
u'expenses2006_col6',
u'expenses2002_col1_rank',
u'expenses2007_col5a_rank_outof',
u'expenses2005_col3_rank',
u'expenses2007_col9_quintile',
u'expenses2005_total_rank_joint',
u'expenses2005_col7_rank_joint',
u'expenses2009_colmp_reg_travel_d_rank_joint',
u'wrans_subjects',
u'public_whip_dreammp1065_absent',
u'expenses2009_colmp_reg_travel_c_quintile',
u'expenses2008_colmp_reg_travel_d',
u'expenses2003_col1_rank_outof',
u'photo_attribution_link',
u'expenses2003_col5_quintile',
u'expenses2002_col7_rank',
u'expenses2009_colcomms_allowance_quintile',
u'expenses2003_col3_rank_joint',
u'public_whip_dreammp1132_distance',
u'expenses2005_col8_rank_outof',
u'expenses2009_coltotal_inc_travel',
u'expenses2008_col1_rank',
u'expenses2003_col2_rank_outof',
u'expenses2008_total',
u'expenses2009_colmp_reg_travel_a_quintile',
u'expenses2005_col7a',
u'expenses2008_colmp_other_travel_a_rank_joint',
u'expenses2007_col3_rank_outof',
u'expenses2009_col6_rank',
u'public_whip_dreammp837_both_voted',
u'expenses2007_col7a_rank',
u'expenses2008_colspouse_travel_a_rank_joint',
u'expenses2003_col6',
u'expenses2003_col7',
u'expenses2003_col4',
u'expenses2003_col5',
u'expenses2003_col2',
u'expenses2003_col3',
u'expenses2003_col1',
u'expenses2008_colmp_reg_travel_c_quintile',
u'expenses2003_col8',
u'expenses2003_col9',
u'public_whip_dreammp363_absent',
u'expenses2009_colspouse_travel_b',
u'expenses2009_colspouse_travel_a',
u'speaker_candidate_response_summary',
u'public_whip_dreammp837_absent',
u'expenses2007_col2_rank_joint',
u'expenses2004_col8_rank',
u'writetothem_responsiveness_notes_2008',
u'expenses2003_col2_rank_joint',
u'expenses2005_col1',
u'expenses2008_coltotal_inc_travel',
u'writetothem_responsiveness_notes_2006',
u'expenses2003_col8_rank_outof',
u'writetothem_responsiveness_notes_2005',
u'expenses2004_col5_rank',
u'speaker_candidate_elected',
u'expenses2009_col9_rank_joint',
u'public_whip_dreammp984_both_voted',
u'expenses2003_col1_rank_joint',
u'expenses2008_col8',
u'expenses2005_col6',
u'expenses2008_col3',
u'expenses2008_col2',
u'expenses2008_col1',
u'speaker_candidate_response',
u'expenses2008_col7',
u'expenses2008_col6',
u'expenses2008_col5',
u'expenses2008_col4',
u'expenses2008_colmp_reg_travel_b_rank_joint',
u'expenses2008_col1_rank_joint',
u'public_whip_dreammp1050_absent',
u'expenses2009_colmp_reg_travel_b_rank_outof',
u'expenses2002_total_quintile',
u'expenses2005_col4_rank',
u'expenses2005_col6_rank_outof',
u'expenses2007_col1_rank_joint',
u'writetothem_responsiveness_mean_2005',
u'debate_sectionsspoken_inlastyear',
u'writetothem_responsiveness_mean_2007',
u'writetothem_responsiveness_mean_2006',
u'expenses2007_col5d_quintile',
u'expenses2005_col6_rank_joint',
u'writetothem_responsiveness_mean_2008',
u'bbc_profile_url',
u'expenses2009_colstationery_quintile',
u'expenses2009_colmp_other_travel_a_rank_outof',
u'expenses2008_col4_rank',
u'public_whip_dreammp837_distance',
u'expenses2008_colspouse_travel_a',
u'expenses2008_colspouse_travel_b',
u'expenses2003_col7_quintile',
u'expenses2009_colmp_other_travel_d_quintile',
u'writetothem_responsiveness_notes_2007',
u'expenses2008_colcomms_allowance',
u'has_endorsed_speaker_principles',
u'expenses2008_col9_rank',
u'expenses2007_col7_rank_joint',
u'expenses2007_col9',
u'expenses2003_col1_quintile',
u'expenses2009_colmp_reg_travel_b_rank_joint',
u'expenses2003_col2_rank',
u'expenses2007_col5e_quintile',
u'expenses2007_col5e_rank_joint',
u'expenses2007_col5f_quintile',
u'reading_ease',
u'expenses2009_col4_rank_outof',
u'expenses2004_col3_quintile',
u'expenses2007_col2_rank_outof',
u'expenses2007_col7a_rank_joint',
u'expenses2005_col9_rank',
u'public_whip_dreammp1132_absent',
u'public_whip_dreammp1084_absent',
u'debate_sectionsspoken_inlastyear_rank',
u'expenses2009_col3_quintile',
u'expenses2007_col4_rank',
u'wikipedia_url',
u'expenses2003_col3_rank',
u'expenses2005_col1_rank_joint',
u'expenses2002_col7_quintile',
u'expenses2008_col2_rank_joint',
u'sp_url',
u'expenses2008_colmp_other_travel_c_rank_outof',
u'expenses2009_col1_rank_joint',
u'expenses2008_colmp_other_travel_b_rank_outof',
u'expenses2004_total_quintile',
u'expenses2008_col5_rank_joint',
u'expenses2003_col9_rank_joint',
u'expenses2008_colmp_other_travel_a_rank_outof',
u'expenses2003_col5_rank_joint',
u'expenses2007_col6_quintile',
u'public_whip_dreammp1049_distance',
u'public_whip_dreammp1079_distance',
u'expenses2004_col2_rank_outof',
u'expenses2007_col5a_rank_joint',
u'expenses2008_coltotal_exc_travel',
u'public_whip_dreammp1087_absent',
u'expenses2008_total_rank',
u'expenses2005_col7_rank_outof',
u'public_whip_dreammp1052_absent',
u'expenses2009_colmp_reg_travel_a_rank_outof',
u'expenses2007_col5_quintile',
u'expenses2003_col9_rank',
u'expenses2004_total',
u'expenses_url',
u'public_whip_dreammp1087_distance',
u'expenses2008_col8_rank_joint',
u'expenses2003_col7_rank_outof',
u'expenses2003_col9_rank_outof',
u'expenses2002_col7_rank_outof',
u'expenses2003_col6_rank_joint',
u'public_whip_dreammp363_distance',
u'expenses2005_total_rank_outof',
u'expenses2008_col7_rank_outof',
u'expenses2002_col7_rank_joint',
u'expenses2008_colmp_other_travel_b_rank_joint',
u'expenses2007_total',
u'public_whip_dreammp1071_both_voted',
u'expenses2003_total_quintile',
u'expenses2007_col6_rank',
u'public_whip_dreammp811_both_voted',
u'expenses2004_col3_rank_outof',
u'public_whip_dreammp826_both_voted',
u'expenses2002_col6_quintile',
u'expenses2002_col6_rank_outof',
u'expenses2009_colcomms_allowance_rank_outof',
u'expenses2008_colmp_reg_travel_d_rank_outof',
u'expenses2004_total_rank',
u'expenses2005_col8_quintile',
u'expenses2007_col5f_rank_outof',
u'twitter_username',
u'expenses2007_col5_rank',
u'expenses2002_col9_rank',
u'three_word_alliterations_rank_joint',
u'reading_ease_rank',
u'expenses2002_col3_rank_outof',
u'speaker_candidate_replied_on',
u'register_member_interests_date',
u'expenses2007_col5a',
u'expenses2002_col4_rank_joint',
u'expenses2007_col5b',
u'expenses2004_col7_rank_joint',
u'public_whip_dreammp996_both_voted',
u'public_whip_dreammp975_both_voted',
u'expenses2009_col3_rank',
u'expenses2009_colfamily_travel_a_quintile',
u'expenses2009_colstationery',
u'public_whip_dreammp996_absent',
u'expenses2008_col7a',
u'expenses2009_colcomms_allowance_rank_joint',
u'expenses2008_coltotal_travel_rank_joint',
u'public_whip_dreammp1074_both_voted',
u'expenses2009_col1_rank',
u'expenses2005_col7a_rank',
u'debate_sectionsspoken_inlastyear_quintile',
u'expenses2005_col3_quintile',
u'expenses2007_col6_rank_joint',
u'expenses2002_total_rank',
u'expenses2009_coltotal_travel_rank',
u'expenses2009_coltotal_travel_rank_joint',
u'public_whip_dreammp1080_distance',
u'public_whip_dreammp811_absent',
u'expenses2005_col2_rank_joint',
u'expenses2008_colemployee_travel_a',
u'expenses2004_col6_rank_outof',
u'expenses2007_col6_rank_outof',
u'public_whip_dreammp1049_absent',
u'party',
u'writetothem_responsiveness_mean_2005_rank_joint',
u'public_whip_dreammp1071_absent',
u'expenses2004_col4_rank_outof',
u'expenses2004_col7_rank_outof',
u'speaker_candidate_contacted_on',
u'expenses2009_col9_quintile',
u'expenses2007_col5f_rank_joint',
u'expenses2008_colmp_other_travel_a_rank',
u'expenses2004_col1_rank_joint',
u'expenses2004_col8_quintile',
u'expenses2004_col2_rank',
u'expenses2007_col5c_rank_outof',
u'expenses2007_col5d_rank_joint',
u'expenses2005_col7a_rank_outof',
u'expenses2008_coltotal_travel_rank',
u'expenses2008_col2_quintile',
u'expenses2008_colcomms_allowance_rank',
u'expenses2008_col7a_rank_joint',
u'public_whip_dreammp984_absent',
u'expenses2009_colstationery_rank_outof',
u'expenses2008_coltotal_travel_rank_outof',
u'expenses2005_col2',
u'expenses2005_col3',
u'expenses2005_col4',
u'expenses2005_col5',
u'expenses2007_col9_rank_joint',
u'expenses2005_col7',
u'expenses2005_col8',
u'expenses2005_col9',
u'public_whip_dreammp826_distance',
u'expenses2008_colmp_other_travel_d_rank_joint',
u'public_whip_dreammp1110_both_voted',
u'expenses2008_col4_rank_outof',
u'expenses2009_total',
u'expenses2003_col8_rank_joint',
u'expenses2009_col9',
u'expenses2003_col6_rank_outof',
u'public_whip_dreammp1065_both_voted',
u'expenses2009_col2_rank_joint',
u'mp_website',
u'expenses2004_col8_rank_outof',
u'expenses2007_total_rank_outof',
u'expenses2009_col2_quintile',
u'public_whip_dreammp1132_both_voted',
u'expenses2007_col5d_rank_outof',
u'expenses2009_col6',
u'expenses2009_col6_rank_outof',
u'reading_year_quintile',
u'expenses2002_col9_rank_outof',
u'expenses2009_col4_quintile',
u'expenses2009_col2',
u'public_whip_dreammp1052_distance',
u'expenses2002_col1_quintile',
u'expenses2003_total_rank_joint',
u'expenses2003_col8_quintile',
u'wrans_asked_inlastyear_quintile',
u'expenses2007_col2_rank',
u'expenses2008_col7_quintile',
u'expenses2002_col3_rank_joint',
u'public_whip_dreammp1065_distance',
u'public_whip_dreammp975_distance',
u'expenses2005_col2_quintile',
u'expenses2007_col9_rank_outof',
u'expenses2008_col9',
u'expenses2009_colfamily_travel_a_rank_joint',
u'public_whip_dreammp1080_absent',
u'expenses2004_total_rank_outof',
u'expenses2002_col4_rank',
u'expenses2008_colmp_reg_travel_c_rank_joint',
u'public_whip_dreammp1110_absent',
u'expenses2008_total_rank_joint',
u'expenses2003_col3_rank_outof',
u'debate_sectionsspoken_inlastyear_rank_joint',
u'expenses2009_colmp_reg_travel_a',
u'expenses2009_colmp_reg_travel_b',
u'expenses2009_colmp_reg_travel_c',
u'expenses2009_colmp_reg_travel_d',
u'public_whip_dreammp1074_absent',
u'expenses2008_col3_rank_joint',
u'public_whip_dreammp1109_distance',
u'expenses2002_total_rank_joint',
u'public_whip_dreammp1030_both_voted',
u'expenses2008_col7a_rank',
u'writetothem_sent_2008',
u'writetothem_sent_2007',
u'expenses2008_colmp_other_travel_a_quintile',
u'writetothem_sent_2005',
u'expenses2007_col4_rank_joint',
u'expenses2008_col7_rank',
u'expenses2007_col5c_rank_joint',
u'expenses2002_col3_quintile',
u'expenses2008_col9_quintile',
u'expenses2004_col7a_rank_outof',
u'expenses2004_col3_rank_joint',
u'public_whip_dreammp1051_distance',
u'expenses2007_col5f',
u'expenses2009_colmp_reg_travel_a_rank',
u'expenses2007_col1_rank',
u'public_whip_dreammp1124_absent',
u'expenses2003_col1_rank',
u'expenses2002_col4',
u'expenses2009_colspouse_travel_a_rank_outof',
u'expenses2004_col6_rank',
u'expenses2008_colmp_reg_travel_c_rank',
u'wrans_asked_inlastyear',
u'expenses2004_col3_rank',
u'select_committees_chair',
u'expenses2008_colspouse_travel_a_quintile',
u'expenses2009_colmp_other_travel_d_rank_joint',
u'public_whip_dreammp1109_absent',
u'expenses2004_col1_quintile',
u'expenses2002_col3',
u'expenses2008_colmp_other_travel_c_rank',
u'expenses2008_colmp_reg_travel_c_rank_outof',
u'expenses2004_col7a_quintile',
u'expenses2008_col5_rank',
u'expenses2005_col2_rank',
u'edm_ais_url',
u'expenses2009_colmp_other_travel_d_rank',
u'expenses2007_col5b_quintile',
u'writetothem_responsiveness_mean_2005_rank',
u'public_whip_dreammp1050_distance',
u'expenses2008_colmp_other_travel_d_rank_outof',
u'wrans_departments',
u'expenses2004_col4_rank',
u'expenses2009_coltotal_travel_rank_outof',
u'expenses2008_col8_rank_outof',
u'expenses2003_col6_rank',
u'expenses2004_col6',
u'expenses2004_col5',
u'expenses2004_col4',
u'expenses2004_col3',
u'expenses2004_col2',
u'expenses2008_colmp_other_travel_d_quintile',
u'expenses2002_col5_quintile',
u'expenses2004_col1_rank',
u'expenses2007_col5a_rank',
u'expenses2009_colmp_other_travel_a_quintile',
u'expenses2004_col1_rank_outof',
u'public_whip_dreammp1051_both_voted',
u'expenses2005_col3_rank_joint',
u'public_whip_dreammp1077_both_voted',
u'expenses2007_col2',
u'expenses2007_col3',
u'expenses2007_col1',
u'expenses2007_col6',
u'expenses2007_col7',
u'expenses2007_col4',
u'expenses2007_col5',
u'expenses2007_col8',
u'wrans_answered_inlastyear',
u'expenses2008_col7a_rank_outof',
u'comments_on_speeches_rank_outof',
u'expenses2007_col8_rank_joint',
u'expenses2005_col1_quintile',
u'by_member_id',
u'expenses2002_col2_rank_outof',
u'expenses2007_col5e_rank',
u'public_whip_dreammp1109_both_voted',
u'public_whip_dreammp1050_both_voted',
u'expenses2003_col4_rank',
u'comments_on_speeches',
u'expenses2005_col6_rank',
u'public_whip_dreammp1124_both_voted',
u'expenses2009_colstationery_rank',
u'expenses2008_col5_rank_outof',
u'expenses2007_col5c',
u'expenses2007_col5d',
u'expenses2007_col5e',
u'expenses2008_colmp_reg_travel_b_quintile',
u'public_whip_dreammp1087_both_voted',
u'wrans_asked_inlastyear_rank_outof',
u'expenses2009_colfamily_travel_a',
u'expenses2009_colfamily_travel_b',
u'expenses2003_col4_quintile',
u'expenses2008_col7_rank_joint',
u'expenses2002_col1_rank_outof',
u'expenses2009_colmp_other_travel_b_quintile',
u'register_member_interests_html',
u'public_whip_dreammp1071_distance',
u'expenses2004_col6_rank_joint',
u'expenses2008_colmp_reg_travel_a_quintile',
u'public_whip_dreammp811_distance',
u'expenses2007_total_rank_joint',
u'expenses2008_colmp_reg_travel_a_rank_joint',
u'expenses2008_colmp_reg_travel_a_rank',
u'guardian_mp_summary',
u'expenses2005_col3_rank_outof',
u'expenses2009_colmp_reg_travel_c_rank',
u'expenses2002_col5',
u'public_whip_dreammp1030_distance',
u'expenses2002_col7',
u'expenses2002_col6',
u'expenses2002_col1',
u'expenses2008_col9_rank_outof',
u'expenses2002_col2',
u'guardian_aristotle_id',
u'as_endorsed_speaker_principles',
u'expenses2005_col6_quintile',
u'expenses2002_col9',
u'expenses2002_col8',
u'expenses2008_col1_rank_outof',
u'expenses2008_coltotal_exc_travel_rank_outof',
u'expenses2008_col6_rank_joint',
u'expenses2009_colmp_other_travel_c_rank_outof',
u'public_whip_dreammp1053_absent',
u'expenses2004_col9_quintile',
u'expenses2008_colemployee_travel_b',
u'expenses2003_total',
u'expenses2009_col2_rank_outof',
u'expenses2002_col9_rank_joint',
u'expenses2009_col3_rank_outof',
u'public_whip_dreammp1051_absent',
u'expenses2009_colmp_other_travel_a_rank',
u'expenses2008_colcomms_allowance_rank_outof'])
In [25]:
mpdata.wiki_url
Out[25]:
0 http://en.wikipedia.org/wiki/Bridget_Phillipson
1 http://en.wikipedia.org/wiki/Sharon_Hodgson
2 http://en.wikipedia.org/wiki/Julie_Elliott
3 http://en.wikipedia.org/wiki/Naomi_Long
4 http://en.wikipedia.org/wiki/Ian_Paisley_Jr
5 http://en.wikipedia.org/wiki/Pat_Doherty
6 http://en.wikipedia.org/wiki/Kevan_Jones
7 http://en.wikipedia.org/wiki/Steve_Webb
8 http://en.wikipedia.org/wiki/Jenny_Chapman
9 http://en.wikipedia.org/wiki/Jeffrey_Donaldson
10 http://en.wikipedia.org/wiki/Hywel_Williams
11 http://en.wikipedia.org/wiki/Chris_Skidmore
12 http://en.wikipedia.org/wiki/Charles_Walker_(B...
13 http://en.wikipedia.org/wiki/Tom_Greatrex
14 http://en.wikipedia.org/wiki/David_Simpson_(UK...
...
635 None
636 None
637 http://en.wikipedia.org/wiki/Paul_Maskey
638 None
639 None
640 http://en.wikipedia.org/wiki/George_Galloway
641 None
642 None
643 None
644 None
645 None
646 None
647 None
648 http://en.wikipedia.org/wiki/Francie_Molloy
649 None
Name: wiki_url, Length: 650, dtype: object
In [29]:
## May need to call:
#import nltk
#nltk.download()
import summarize as sz
In [31]:
example_sum = sz.summarize_page(mpdata.wiki_url[0])
In [32]:
example_sum.summaries
Out[32]:
[u'Bridget Maeve Phillipson[2] (born 19 December 1983) is a British Labour Party politician who was elected at the 2010 general election as the Member of Parliament (MP) for Houghton and Sunderland South.',
u'[4] and in 2005 graduated with a BA (Hons) degree in Modern History.',
u"Between 2007-2010[5] she was a manager for 'Wearside Women in Need', a charity refuge for women affected by domestic violence.",
u'She is an aide to Jim Murphy, the Shadow Defence Secretary.',
u'At at September 2013, she was a member of the House of Commons Public Bill Committee for the Defence Reform Bill.',
u'She is married to Lawrence and enjoys reading, music and dog-walking,[5] as well as films.']
In [33]:
import re
In [70]:
MAX_LEN = 1000
def mp_sz(wiki_url):
def strip_wiki_extras(sent):
return re.sub("(\s\(born.*?\d{4}\))|(\[.*?\])|(^\s)", "", sent)
def abridge(sent):
i = MAX_LEN
while i < len(sent):
if sent[i] == " ":
return sent[:(i + 1)] + "..."
i += 1
return sent
sum_arr = sz.summarize_page(wiki_url).summaries
sum_clean_arr = [strip_wiki_extras(s) for s in sum_arr]
summary = " ".join(sum_clean_arr)
if len(summary) > MAX_LEN:
summary = abridge(summary)
return summary
In [71]:
for name, url in zip(mpdata.name, mpdata.wiki_url):
if url:
print name, "\n\n", mp_sz(url), "\n\n\n\n"
Bridget Phillipson
Bridget Maeve Phillipson is a British Labour Party politician who was elected at the 2010 general election as the Member of Parliament (MP) for Houghton and Sunderland South. and in 2005 graduated with a BA (Hons) degree in Modern History. Between 2007-2010 she was a manager for 'Wearside Women in Need', a charity refuge for women affected by domestic violence. She is an aide to Jim Murphy, the Shadow Defence Secretary. At at September 2013, she was a member of the House of Commons Public Bill Committee for the Defence Reform Bill. She is married to Lawrence and enjoys reading, music and dog-walking, as well as films.
Sharon Hodgson
I term_start = 7 October 2013 Sharon Hodgson is a British Labour Party politician, who has been the Member of Parliament (MP) for Washington and Sunderland West since 2010. After school she worked as an accounts clerk in the Team Valley, then attended Newcastle College and the Trades Union Congress Academy in London. Before becoming an MP herself, Hodgson worked as Labour Link Officer for UNISON. She was elected for two years as the women's officer within the Tyne Bridge Constituency Labour Party (CLP) in 1998. In 2004 Hodgson was chosen as the Labour candidate for the 2005 General Election for Gateshead East and Washington West. David Clelland, the current MP for Tyne Bridge was chosen in December 2006 by Labour Party members to fight the new seat of Gateshead at the next election. She won the seat in the 2010 General Election. In June 2009 Hodgson was promoted to the position of assistant Government Whip. Back in opposition, the new Labour Leader, Ed Miliband, appointed her Shadow Children ...
Julie Elliott
Julie Elliott is a British Labour Party politician, who has been the Member of Parliament (MP) for Sunderland Central since 2010. She was educated at Seaham Northlea Comprehensive, and gained a degree in Government and Public Policy at Newcastle Polytechnic. She became a regional organiser for the GMB Trade Union in 1999.
Naomi Long
Naomi Long MP is a Northern Irish politician. Born Naomi Rachel Johnston in East Belfast, she attended Mersey Street Primary and Bloomfield Collegiate School. She became an Assembly member for East Belfast in the 2003 election and within three years became deputy leader of the party. In 2003, Long was elected to the Northern Ireland Assembly for Belfast East, succeeding her fellow party member John Alderdice. She became the second woman to hold the post, after Grace Bannister (1981–82). On 6 May 2010, she defeated Peter Robinson, First Minister of Northern Ireland and leader of the Democratic Unionist Party, to become Member of Parliament (MP) for Belfast East in the House of Commons. Long became the first liberal-affiliated MP elected to Westminster in Northern Ireland since James Brown Dougherty in Londonderry City in 1914. On 10 December 2012 Long received a number of death threats along with petrol bomb being thrown inside an unmarked police car guarding her constituency office.
Ian Paisley Jnr
Ian Richard Kyle Paisley, Jr, MP is the Member of Parliament (MP) for North Antrim and member of the Northern Ireland Assembly for the Democratic Unionist Party (DUP) and an author. He is a member of the Free Presbyterian Church of Ulster. He regularly attended the Free Presbyterian Church where his father preached since he was a small child of 2 or 3 years. At university, he read Modern History and Irish Politics, and gained a BA (Hons) and MSSc respectively. In 1996, Paisley was elected to the Northern Ireland Forum for North Antrim. Paisley successfully ran to succeed his father as the Westminster MP for North Antrim in the 2010 UK general election, winning 46.4% of the vote share. I've never had any ambition to get anywhere beyond where I am today. " However, the DUP denied that Paisley's comments were discriminatory. " Although he cited growing up, he still strongly opposed homosexuality. There was a series of public blunders and further controversy in February 2008 following scrutiny ...
Pat Doherty
Patrick "Pat" Doherty is an Irish republican politician and abstentionist Member of Parliament for West Tyrone. He has been an abstentionist Sinn Féin Member of Parliament of the British parliament for West Tyrone since 2001, as well as a member of the Northern Ireland Assembly since the 1998 elections. In May 2002, using parliamentary privilege, Ulster Unionist Party MP David Burnside named Doherty as a member of the IRA army council. According to The Times Guide to the House of Commons, Doherty is married with three daughters and two sons, was educated at St Joseph's College, Lochwinnoch, and is a site engineer who likes building stone walls. In 2 1/2 years, Pat Doherty spent £16,000 on printer cartridges, an amount that he admitted was "probably excessive". In 2012 to some surprise Doherty supported funding for a loyalist flute band in Castlederg.
Kevan Jones
Kevan David Jones is a British Labour Party politician, who has been the Member of Parliament (MP) for North Durham since 2001. He went to the comprehensive Portland School on Sparken Hill in Worksop. His Private Member's Bill, the Christmas Day (Trading) Act 2004, successfully passed Parliament, and came into force in December 2004. He polled 64.1% of the vote. As at September 2013, he was a member of the House of Commons Public Bill Committee for the Defence Reform Bill. In October 2008 he was appointed Parliamentary Under Secretary of State and Minister for Veterans at the Ministry of Defence. Ms Lumley refused to comment. Kevan Jones has many links with local community, such as when he presented certificates to school children from the Ouston and Pelton Brass Band for successfully completing their exams.
Steve Webb
Steven John Webb, better known as Steve Webb, is an English Liberal Democrat politician. Webb was born in Birmingham to Brian and Patricia Webb, and attended the local comprehensive school (Dartmouth High School), before going on to study Philosophy, Politics and Economics at Hertford College, Oxford. At the 1997 General Election Webb was elected as MP for Northavon, just north of Bristol, overturning a Conservative majority of over 11,000. On 8 January 2009 Nick Clegg announced his "General Election Team" and an "economic recovery group" with Webb appointed as Work and Pensions spokesman. Webb is also a member of the cross-party Christians in Parliament and vice-president of the Liberal Democrat Christian Forum. He has also recognised the emerging potential of online social utility networks by joining MySpace and Facebook, two of the biggest. At the 2010 general election, the constituency of Northavon was split into two new constituencies. I have confirmed that I regard accrued index-linked ...
Jenny Chapman
Jennifer Chapman (born 25 September 1973) is a British Labour Party politician, who has been the Member of Parliament (MP) for Darlington since the 2010 general election. She has worked as a prison psychologist.. Chapman worked as senior parliamentary researcher for Darlington Labour MP Alan Milburn. " She was elected Darlington MP in the 2010 general election with a majority of 3,388. Chapman is also a Vice Chair of the Blairite organisation Progress http://www.progressonline.org.uk/about-progress/chair-patrons/
Jeffrey M Donaldson
Jeffrey Mark Donaldson, MP is a Northern Irish politician and Member of Parliament for Lagan Valley belonging to the Democratic Unionist Party. He attended the Kilkeel High School then Castlereagh College. Two of Donaldson's cousins were killed by the Provisional Irish Republican Army while serving in the Royal Ulster Constabulary: Sam Donaldson was killed in 1970 and Alex Donaldson, a Chief Inspector, died in a mortar attack on a Newry police station in 1985. But I also wanted to be involved politically in opposing that campaign as well". Donaldson attained the rank of corporal in the Ulster Defence Regiment. In 1998 Donaldson was in the Ulster Unionists' negotiating team for the Good Friday Agreement. Disagreements over the Good Friday Agreement negotiations planted the seeds of discontent between the figurehead of the anti-agreement faction of the UUP (Donaldson) and the pro-agreement party leader (Trimble). Donaldson engineered several party council meetings in protest against David ...
Hywel Williams
He previously represented Caernarfon. He joined Bangor University's social work practice centre in 1985. He contested the Clwyd South seat for the Welsh Assembly and polled 25% of the vote, almost 5 times as many votes as secured by Plaid Cymru in the 1997 general election. His parliamentary responsibilities within Plaid Cymru are work and pensions, disabilities and health. He has three daughters, named Gwenno, Elin and Angharad and also a newborn son, named Owain. On 1 September 2007 Williams visited the Kurdish Cultural Festival in Gelsenkirchen, Germany, where he spoke of the importance of the maintenance of the Kurdish language. In a 2007 interview with Raz Jabary for Kurdish Aspect, Williams expressed his support for an independent Kurdistan and claimed that it would be no threat to Turkey. Moses Gruffydd
Chris Skidmore
Bristol, UK He serves as the Conservative Member of Parliament (MP) for Kingswood, England. Christopher Skidmore was born on 17 May 1981 at Longwell Green in South Gloucestershire (then in the county of Avon), in the West of England. Skidmore was educated at Bristol Grammar School, a mixed independent school in the City of Bristol, Avon, in south west England, before going up to Christ Church in the University of Oxford, graduating in 2002 with a first class degree in Modern History (BA) and a MSt. Chris Skidmore worked for David Willetts and Michael Gove as an advisor, before being selected to contest his home seat of Kingswood in 2009. He is a member of the Commons Select Committee on Health, specialising in health care reform and social care. Skidmore is also a member of the Free Enterprise Group of MPs, and along with colleagues co-authored After the Coalition (2011) and Britannia Unchained (2012).
Charles Walker
Charles Ashley Rupert Walker is a Conservative Party politician in the United Kingdom. Walker was educated at the American School in London, followed by the University of Oregon in the United States, receiving a BSc in Political Science in 1990. Walker belongs to the trade union Amicus. In the 2005 general election he was elected Member of Parliament for Broxbourne, succeeding Dame Marion Roe. Walker shouted "police state", a comment to which Tony Blair responded furiously. He has lived with Obsessive Compulsive Disorder for more than 30 years.
Tom Greatrex
He is presently Shadow Energy Minister. Greatrex was born in Kent. He currently lives in Cambuslang and is married, with twin daughters. He latterly worked as a policy adviser for Scottish Secretaries Douglas Alexander, Des Browne and Jim Murphy. Selected as the Scottish Labour Party candidate for Rutherglen and Hamilton West, he was elected with a majority of 21,002 at the United Kingdom general election, 2010. He was the Shadow Under-Secretary of State for Scotland supporting Ann McKechin, Shadow Secretary State for Scotland until October 2011, when he was made Shadow Energy Minister.
David Simpson
Thomas David Simpson is a Democratic Unionist (DUP) politician in Northern Ireland. In the 2001 general election Simpson stood unsuccessfully against Ulster Unionist Party leader David Trimble in the parliamentary constituency of Upper Bann. Simpson won the parliamentary seat from Trimble in the 2005 general election and retained his council seat. Simpson used parliamentary privilege in 2007 to accuse Sinn Féin MLA Francie Molloy of involvement in the 1979 killing of Simpson's cousin, a former police officer. and the Transport Select Committee (2007-2009). He has been DUP Spokesperson on Trade and Industry (2005-2007), Transport (2007-2009), Young People (2007-2010), International Development (2007-2010), and Business, Innovation and Skills (2009 to present), Communities and Local Government (2010 to present) and Education (2012 to present). He is a proponent of creationism, and his former election agent and constituency assistant, David McConaghie, who had played a key role in Simpson's ...
Grahame Morris
Grahame Mark Morris is a British Labour Party politician, who was elected at the 2010 general election as the Member of Parliament for Easington, replacing the previous Labour MP John Cummings, who decided to step down. He had worked as a researcher for previous MP John Cummings since 2003, and was also Secretary of Easington Constituency Labour Party. He was Parliamentary Private Secretary to Shadow Secretary of State for Energy and Climate Change, Meg Hillier.
Adrian Sanders
Adrian Mark Sanders (born 25 April 1959, Paignton) is a Liberal Democrat politician in the United Kingdom. He went to primary schools in Paignton and Torquay then Torquay Boys' Grammar School. He has had Type 1 diabetes since 1990 and campaigns on issues relating to diabetes. Sanders is a fan of rock music and occasionally presents a Rock Show on local radio station Palm 105.5. From 1986-89 he lived in Hebden Bridge, West Yorkshire, working for the Association of Liberal Democrat Councillors before moving back to Paignton in 1990. He then moved to become a policy officer at the National Council for Voluntary Organisations (1993–94) and then the Southern Association of Voluntary Action Groups for Europe. The case prompted a change in the law, banning potentially confusing party descriptions. In the 1997 general election Sanders stood again in Torbay, this time successfully, defeating the Conservative incumbent Rupert Allason by a majority of 12 votes. ] holds. In the 2006 Leadership election ...
Justine Greening
Greening was appointed Secretary of State for International Development in September 2012. Before entering Parliament, she trained and qualified as an accountant, before working as an accountant/finance manager for, amongst others, Price Waterhouse Coopers, GlaxoSmithKline and Centrica. She unseated Tony Colman, who had held the seat for Labour since defeating David Mellor in 1997. In January 2009 following a further Shadow Ministerial reshuffle, Greening was promoted to Shadow Minister for London, within the Communities and Local Government Team with responsibility for Local Government Finance. In October 2011 she became Secretary of State for Transport and was appointed a Privy Councillor. Greening represents the London constituency of Putney and had always campaigned against a third runway at Heathrow. A move attacked by Boris Johnson. On 4 September 2012 she was replaced by Patrick McLoughlin at the Department of Transport and became Secretary of State for International Development ...
Lady Hermon
Sylvia Eileen Hermon, Lady Hermon (née Paisley; born 11 August 1955) is a Northern Irish politician. On 25 March 2010, Lady Hermon announced that she was resigning from the Ulster Unionist Party, and would fight the next general election as an Independent candidate. Born Sylvia Eileen Paisley in Galbally, Dungannon, County Tyrone. She rose to a place on the party Executive by the following year. On 6 November 2008 her husband Jack Hermon died. She is a longstanding supporter of the Alzheimer's Research Trust and helped launch its Northern Ireland network centre. Hermon was chosen as UUP candidate for the North Down constituency to contest the 2001 general election and went on to defeat the incumbent Robert McCartney by over 7,000 votes. She subsequently lost the Trade and Industry portfolio and took responsibility for Culture, Media and Sport in 2002. Initially considered to be amongst the frontrunners, Hermon eventually declined the opportunity, feeling that she could not combine it with ...
Jack Lopresti
Giacomo (Jack) Lopresti (23 August 1969) is a British Conservative Party politician. Lopresti previously represented the Stockwood Ward on Bristol City Council from 1999 to 2007. On leaving school, Lopresti worked in his family ice cream and catering business locally for over ten years. Lopresti was a member of the A-List. Lopresti is currently[when? In 2011, he was a member of the special Select Committee set up to scrutinise the Bill that became the Armed Forces Act 2011.
Frank Roy
Frank Roy is a British Labour Party politician, who has been the Member of Parliament (MP) for Motherwell and Wishaw since 1997. He is the first MP born locally to represent Motherwell and Wishaw. In 2001 Roy resigned as parliamentary private secretary to Helen Liddell in the wake of the cancellation of a visit to Carfin Grotto by Irish Taoiseach Bertie Ahern.
Phil Wilson
The by-election was held following the resignation of former Prime Minister and member for Sedgefield, Tony Blair. Wilson has also worked as a gambling lobbyist. Wilson is known for being one of the "Famous Five", a group of local Labour Party members who selected a young Tony Blair as the Labour candidate for Sedgefield for the 1983 election. Wilson has been an Assistant Whip for the Labour Party since the 2010 general election. In 2012 he was elected to the Progress strategy board.
Albert Owen
Albert Owen is a Welsh Labour Party politician and Member of Parliament (MP) for Ynys Môn. In 1999, he unsuccessfully stood for the Labour party in the Welsh Assembly elections. Albert has rebelled against the Labour Party's political whip on certain occasions. However he has also voted for the Iraq war and against motions opposing post office closures He married Angela Margaret Magee.
Chris Ruane
Christopher Shaun Ruane is a Welsh Labour Party politician who has been the Member of Parliament (MP) for the Vale of Clwyd since 1997. Ruane attended Ysgol Mair Roman Catholic primary school in Rhyl. He was a primary school teacher from 1982–97, and a deputy head from 1991-7. He contested Clwyd North West in 1992. He was the Parliamentary Private Secretary to Peter Hain from 2003 until his resignation in March 2007 in protest against the decision to replace Trident. The study showed that 27 MPS, including Chris Ruane receive rental income from their homes in London while simultaneously claiming rental income from the taxpayer to live at another residence. Mr Ruane said: “I have acted completely within the rules.” They have two daughters.
Michael McCann
Michael McCann is a British Labour Party politician who has been the Member of Parliament (MP) for East Kilbride, Strathaven and Lesmahagow since 2010, succeeding Adam Ingram. In 2013, he became one of the few Labour MPs to vote against the Marriage (Same Sex Couples) Bill, which eventually was passed with cross-party support.
Menzies Campbell
Sir Walter Menzies Campbell CH CBE QC MP (/ˈmɪŋɨs/; born 22 May 1941), often known as Ming Campbell, is a British Liberal Democrat politician and advocate, and a retired sprinter. Campbell held the British record for the 100 metres sprint from 1967 to 1974, having run the distance in 10.2 s. He captained the Great Britain athletics team in 1965 and 1966. He was involved in debating at the Union and with the Glasgow University Dialectic Society, where his contemporaries included Derry Irvine, Donald Dewar and John Smith, who attempted to recruit him for the Labour Party. He specialised in planning and licensing law. Campbell married Elspeth, Lady Grant-Suttie, daughter of Major General Roy Urquhart, in June 1970. He also captained the Great Britain athletics team in 1965 and 1966, and held the British 100 metres record from 1967 to 1974. Campbell became chairman of the Scottish Liberals in 1975, and was a candidate at various general elections between 1974 and 1983. He was made the Liberal ...
Roberta Blackman-Woods
Professor Roberta Carol Blackman-Woods (née Woods; born 16 August 1957) is a British Labour Party politician who has been the Member of Parliament (MP) for the City of Durham since 2005. Blackman-Woods is from Northern Ireland and was educated at the University of Ulster, graduating with a BSc degree and later a PhD in Social Science. She has also served as a Councillor on Oxford and Newcastle City Councils. She added her husband's surname, Blackman, after selection by the constituency Labour Party, to avoid confusion with the existing Liberal Democrat candidate Carol Woods. She also quoted Bill Bryson's Notes from a Small Island: "Why, it's wonderful - a perfect little city... Following Des Brown's retirement to the backbenches she served as PPS to David Lammy MP as Minister of State for Higher Education. In 2005, she became Chair of the All Party Afghanistan Group and in 2007 she also became Chair of the All Party Balanced and Sustainable Communities Group. In the October 2011 shadow ...
Gordon Brown
Brown has been a Member of Parliament (MP) since 1983, first for Dunfermline East and currently for Kirkcaldy and Cowdenbeath. He entered Parliament in 1983 as the MP for Dunfermline East. In 2007, Tony Blair resigned as Prime Minister and Labour Leader and Brown was chosen to replace him in an uncontested election. On 11 May 2010, Brown officially resigned as Prime Minister and Leader of the Labour Party. She was the daughter of John Souter, a timber merchant. At age sixteen he wrote that he loathed and resented this "ludicrous" experiment on young lives. He served as Rector until 1975, and also edited the document The Red Paper on Scotland. His first Westminster office mate was a newly elected MP from the Sedgefield constituency by the name of Tony Blair. After the sudden death of Labour leader John Smith in May 1994, Brown did not contest the leadership after Tony Blair became favourite, deciding to make way for Blair to avoiding splitting the pro-modernising vote in the leadership ...
Jane Ellison
Bradford Jane Elizabeth Ellison is a British Conservative Party politician, who was elected at the 2010 general election as Member of Parliament (MP) for Battersea. Ellison was born in Bradford and studied at the University of Oxford and was seminal in organising various social musical groups, in particular, a close harmony octet called Mixed Revues. A former Barnet London Borough Councillor, she was selected as Conservative Parliamentary Candidate for Battersea in September 2006 following an Open Primary held at the Battersea Arts Centre, and having unsuccessfully contested Pendle in 2005. Ellison also maintains a season ticket at Tottenham Hotspur F.C.
Susan Elan Jones
Susan Elan Jones is a British Labour Party politician, who was elected at the 2010 general election as the Member of Parliament for Clwyd South, replacing the previous Labour MP Martyn Jones on his retirement. Jones comes from Ponciau near Rhosllannerchrugog and studied at Bristol University and Cardiff University. In the 1997 general election, Jones stood for Labour in Surrey Heath, coming third. She has actively supported Welsh language issues, writing in one article: "I would contend that for a Labour supporter being actively pro-Welsh language is a natural part of our DNA, as much as supporting public services, tackling low pay or taking on any other equalities cause". David Cameron reversed his decision two weeks later and announced that the service would not be scrapped. She has frequently backed various local business interests in parliamentary debates, including solar panel businesses concerned about cuts to subsidies for domestic solar panels, and wood panelling businesses which ...
Nia Griffith
Nia Rhiannon Griffith is a British Labour Party politician, who has been the Member of Parliament (MP) for Llanelli since 2005. She is currently Shadow Minister for Wales, deputy to Shadow Secretary of State for Wales, Owen Smith. Her family comes from mining villages near Neath, in South Wales. Griffith became a teacher at Queen Elizabeth Cambria School in Carmarthen then Gowerton Comprehensive School in Swansea. She was elected as a councillor to the Carmarthen Town Council in 1987, serving as the sheriff in 1997 and deputy mayor in 1998. She also chairs the All-Party Steel and Metals Group. Since October 2010, she has been a Shadow Minister for Business Innovation and Skills. Her main political interests are tackling climate change and issues affecting industry. Following a review of expenses Sir Thomas Legg ordered Griffith to repay £4,099.77 in mortgage interest claims. In June 2010 Griffith had to repay the cost of sending 71 letters having been found to have breached Parliamentary ...
---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
<ipython-input-71-95b3e60b7346> in <module>()
1 for name, url in zip(mpdata.name, mpdata.wiki_url):
2 if url:
----> 3 print name, "\n\n", mp_sz(url), "\n\n\n\n"
<ipython-input-70-ca7cc4c11061> in mp_sz(wiki_url)
13 return sent
14
---> 15 sum_arr = sz.summarize_page(wiki_url).summaries
16 sum_clean_arr = [strip_wiki_extras(s) for s in sum_arr]
17 summary = " ".join(sum_clean_arr)
/Users/james/Projects/Hacks/MPofTheDay/summarize.pyc in summarize_page(url)
84 import requests
85
---> 86 html = bs4.BeautifulSoup(requests.get(url).text)
87 b = find_likely_body(html)
88 summaries = map(lambda p: re.sub('\s+', ' ', summarize_block(p.text)).strip(), b.find_all('p'))
/Users/james/anaconda/lib/python2.7/site-packages/bs4/__init__.pyc in __init__(self, markup, features, builder, parse_only, from_encoding, **kwargs)
194 self.reset()
195 try:
--> 196 self._feed()
197 break
198 except ParserRejectedMarkup:
/Users/james/anaconda/lib/python2.7/site-packages/bs4/__init__.pyc in _feed(self)
208 self.builder.reset()
209
--> 210 self.builder.feed(self.markup)
211 # Close out any unfinished strings and close all the open tags.
212 self.endData()
/Users/james/anaconda/lib/python2.7/site-packages/bs4/builder/_lxml.pyc in feed(self, markup)
224 self.parser = self.parser_for(encoding)
225 self.parser.feed(markup)
--> 226 self.parser.close()
227 except (UnicodeDecodeError, LookupError, etree.ParserError), e:
228 raise ParserRejectedMarkup(str(e))
/Users/james/anaconda/lib/python2.7/site-packages/lxml/etree.so in lxml.etree._FeedParser.close (src/lxml/lxml.etree.c:89871)()
/Users/james/anaconda/lib/python2.7/site-packages/lxml/etree.so in lxml.etree._TargetParserContext._handleParseResult (src/lxml/lxml.etree.c:99222)()
/Users/james/anaconda/lib/python2.7/site-packages/lxml/etree.so in lxml.etree._TargetParserContext._handleParseResult (src/lxml/lxml.etree.c:99046)()
/Users/james/anaconda/lib/python2.7/site-packages/lxml/etree.so in lxml.etree._ExceptionContext._raise_if_stored (src/lxml/lxml.etree.c:9359)()
/Users/james/anaconda/lib/python2.7/site-packages/lxml/etree.so in lxml.etree._handleSaxStartNoNs (src/lxml/lxml.etree.c:94582)()
/Users/james/anaconda/lib/python2.7/site-packages/lxml/etree.so in lxml.etree._PythonSaxParserTarget._handleSaxStart (src/lxml/lxml.etree.c:98399)()
/Users/james/anaconda/lib/python2.7/site-packages/bs4/builder/_lxml.pyc in start(self, name, attrs, nsmap)
162 namespace, name = self._getNsTag(name)
163 nsprefix = self._prefix_for_namespace(namespace)
--> 164 self.soup.handle_starttag(name, namespace, nsprefix, attrs)
165
166 def _prefix_for_namespace(self, namespace):
/Users/james/anaconda/lib/python2.7/site-packages/bs4/__init__.pyc in handle_starttag(self, name, namespace, nsprefix, attrs)
332
333 # print "Start tag %s: %s" % (name, attrs)
--> 334 self.endData()
335
336 if (self.parse_only and len(self.tagStack) <= 1
/Users/james/anaconda/lib/python2.7/site-packages/bs4/__init__.pyc in endData(self, containerClass)
286
287 o = containerClass(current_data)
--> 288 self.object_was_parsed(o)
289
290 def object_was_parsed(self, o, parent=None, most_recent_element=None):
/Users/james/anaconda/lib/python2.7/site-packages/bs4/__init__.pyc in object_was_parsed(self, o, parent, most_recent_element)
296 if most_recent_element is not None:
297 most_recent_element.next_element = o
--> 298 self._most_recent_element = o
299 parent.contents.append(o)
300
KeyboardInterrupt:
David Laws
In [47]:
re.sub("\s\(born.*?\d{4}\)", "", re.sub("\[\d+\]", "", "fdasf (in 1945) sonethfjd"))
Out[47]:
'fdasf (in 1945) sonethfjd'
In [ ]:
Content source: jamesporter/MPofTheDay
Similar notebooks: