In [22]:
import datetime
from dateutil import parser
import requests
import json
import pandas
import matplotlib
%matplotlib inline
matplotlib.style.use('ggplot')
In [23]:
def age(str):
#calculate age based on date strings stored in Popit
born = parser.parse(str)
today = datetime.date.today()
age = today.year - born.year - ((today.month, today.day) < (born.month, born.day))
return int(age)
In [24]:
#Getting total number of pages via REST request
page_request = requests.get('http://api.openhluttaw.org/en/persons')
pages = page_request.json()['num_pages']
#Fetch and build list of all representatives
persons =[]
for page in range(1,pages+1):
req_representatives = requests.get('http://api.openhluttaw.org/en/persons?page='+str(page))
for person in json.loads(req_representatives.content)['results']:
persons.append(person)
for person in persons:
if person['birth_date']:
person['age'] = age(person['birth_date'])
In [25]:
import pandas
data = { "age": [], "birth_date":[] }
data_index = { "age": [], "birth_date":[]}
for entry in persons:
if entry.has_key('age'):
data["age"].append(entry["age"])
data_index["age"].append(entry["name"])
data["birth_date"].append(entry["birth_date"])
data_index["birth_date"].append(entry["name"])
final_data = { "age": pandas.Series(data["age"], index=data_index["age"]),
"birth_date": pandas.Series(data["birth_date"], index=data_index["birth_date"])
}
persons_age_df = pandas.DataFrame(final_data)
persons_age_df.sort_values(by="age")
print "Median:" + " " + str(persons_age_df['age'].median())
print "Youngest:" + " " + str(persons_age_df['age'].min())
print "Oldest:" + " " + str(persons_age_df['age'].max())
In [26]:
persons_age_df["age"].plot(kind="hist",figsize=(15,15))
Out[26]:
In [27]:
women_reps = []
for person in persons:
if person['gender'] == 'female':
women_reps.append(person)
print "Number of Women reps " + str(len(women_reps))
print "Percentage of Women representatives " + "{0:.2f}".format(65/498.0*100)+"%"
In [28]:
#data is a bit mixed with some unicode myanmar language, not count these for now
def is_ascii(s):
return all(ord(c) < 128 for c in s)
data = { "name": [], "national_identity":[] }
for entry in persons:
if entry['national_identity']:
if is_ascii(entry['national_identity']):
data["national_identity"].append(entry["national_identity"])
data["name"].append(entry["name"])
national_identity_df = pandas.DataFrame(data)
series = (national_identity_df['national_identity'])
bar = series.value_counts()
bar.plot.barh(figsize=(20,10))
Out[28]:
In [29]:
amyotha_req = requests.get('http://api.openhluttaw.org/en/organizations/897739b2831e41109713ac9d8a96c845')
In [30]:
memberships = json.loads(amyotha_req.content)['result']['memberships']
In [31]:
amyotha = []
for member in memberships:
r = requests.get('http://api.openhluttaw.org/en/organizations/' + member['on_behalf_of_id'])
if json.loads(r.content)['result']['name']:
party = json.loads(r.content)['result']['name']
amyotha.append({'consituency': member['post']['label'],
'party':party , 'gender':member['person']['gender'].lower() })
In [32]:
amyotha_df = pandas.DataFrame(amyotha)
amyotha_df_gender=amyotha_df.drop('consituency',axis=1)
gender_counts = amyotha_df_gender.groupby('party')['gender'].value_counts()
gender_counts
Out[32]:
In [33]:
#this is horrible, but it's 2:30am probably can be done better with pandas functions - Khairil
#better example contribution welcome
matplotlib.style.use('ggplot')
index_party = []
gender_values = []
for party in gender_counts.index:
index_party.append(party[0])
index_party = list(set(index_party))
for party in index_party:
male_count = gender_counts[party].male
if 'female' in gender_counts[party].index:
female_count = gender_counts[party].female
else:
female_count=0
gender_values.append([male_count,female_count])
gender_df = pandas.DataFrame(gender_values, index=index_party, columns=['male','female'])
gender_df.plot.barh(stacked=True,figsize=(12,5))
Out[33]:
In [34]:
parties = amyotha_df['party']
pie = parties.value_counts()
pie.plot.pie(figsize=(20,20))
Out[34]:
In [35]:
amyotha_req = requests.get('http://api.openhluttaw.org/my/organizations/897739b2831e41109713ac9d8a96c845')
memberships = json.loads(amyotha_req.content)['result']['memberships']
amyotha_my = []
for member in memberships:
r = requests.get('http://api.openhluttaw.org/my/organizations/' + member['on_behalf_of_id'])
if json.loads(r.content)['result']['name']:
party = json.loads(r.content)['result']['name']
amyotha_my.append({'consituency': member['post']['label'],
'party':party , 'gender':member['person']['gender'].lower() })
amyotha_my_df = pandas.DataFrame(amyotha_my)
amyotha_df_gender=amyotha_my_df.drop('consituency',axis=1)
gender_counts = amyotha_df_gender.groupby('party')['gender'].value_counts()
gender_counts
Out[35]:
In [36]:
#this is horrible, but it's 2:30am probably can be done better with pandas functions - Khairil
#better example contribution welcome
index_party = []
gender_values = []
for party in gender_counts.index:
index_party.append(party[0])
index_party = list(set(index_party))
for party in index_party:
male_count = gender_counts[party].male
if 'female' in gender_counts[party].index:
female_count = gender_counts[party].female
else:
female_count=0
gender_values.append([male_count,female_count])
gender_df = pandas.DataFrame(gender_values, index=index_party, columns=['male','female'])
import matplotlib
%matplotlib inline
matplotlib.rc('font', family='Padauk') #Needed for proper rendering of characters
gender_df.plot.barh(stacked=True,figsize=(12,5))
Out[36]:
In [37]:
pyithu_req = requests.get('http://api.openhluttaw.org/en/organizations/7f162ebef80e4a4aba12361ea1151fce')
memberships = pyithu_req.json()['result']['memberships']
pyithu = []
for member in memberships:
r = requests.get('http://api.openhluttaw.org/en/organizations/' + member['on_behalf_of_id'])
if json.loads(r.content)['result']['name']:
party = json.loads(r.content)['result']['name']
pyithu.append({'consituency': member['post']['label'],
'party':party })
In [38]:
%matplotlib inline
pyithu_df = pandas.DataFrame(pyithu)
parties = pyithu_df['party']
pie = parties.value_counts()
pie.plot.pie(figsize=(20,20))
Out[38]:
In [ ]: