In [37]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import scipy as sp
import seaborn as sns
%matplotlib inline
sns.set_context("notebook")
sns.set(style="whitegrid")
# 2014 expenditure the US spent on foreign aid.
# Found here: https://explorer.usaid.gov/query
long_aid = pd.read_excel('data/2000_2014_foreign_aid.xlsx')
aid = long_aid[long_aid.fiscal_year == 2014]
for attribute in aid:
print attribute
In [38]:
# Let's look at the development assistance committee categories
categories = aid['dac_category_name'].unique()
In [39]:
def total_costs(dataframe, region_label, cost_label):
costs = {}
for index,row in dataframe.iterrows():
region = row[region_label]
cost = row[cost_label]
if cost >= 0:
if region in costs:
costs[region] += cost
else:
costs[region] = cost
return costs
def costs_of(df, category, region_label, cost_label):
aid_category = df[df.dac_category_name == category]
return total_costs(aid_category, region_label, cost_label)
def dict_to_df(dictionary, cols):
return pd.DataFrame(dictionary.items(), columns=cols)
totals = total_costs(aid, 'dac_category_name', 'constant_amount')
totals_df = dict_to_df(totals, ['Category', 'Cost'])
totals_df
Out[39]:
In [40]:
totals_df.to_csv('data/totals.csv', index=False)
In [41]:
years = range(2000,2015)
year_totals = {}
for year in years:
year_totals[year] = total_costs(long_aid[long_aid.fiscal_year == year], 'dac_category_name', 'constant_amount')
In [42]:
year_totals_df = pd.DataFrame.from_dict(year_totals)
year_totals_dfs = {}
for year in year_totals:
year_totals_dfs[year] = dict_to_df(year_totals[year], ['Expenditure', 'Cost'])
year_totals_df = pd.concat(year_totals_dfs)
year_totals_df['Year'] = year_totals_df.index.get_level_values(0)
year_totals_df.to_csv('data/year_totals.csv', index=False)
print(year_totals_df.columns)
In [43]:
country_costs = {}
for category in categories:
country_costs[category] = costs_of(aid, category, 'country_name', 'constant_amount')
country_costs_df = pd.DataFrame.from_dict(country_costs)
country_costs_df.index.name = 'Country'
country_costs_df = country_costs_df.reset_index(level=0)
regions = {}
for index,row in aid.iterrows():
region = row['region_name']
country = row['country_name']
regions[country] = region
country_costs_df['Region'] = country_costs_df['Country']
country_costs_df['Region'] = country_costs_df['Region'].replace(regions)
country_costs_df
Out[43]:
In [44]:
country_costs_df.columns = ['2014 ' + col for col in country_costs_df.columns]
country_costs_df = country_costs_df.rename(columns={'2014 Country': 'Country', '2014 Region': 'Region'})
country_costs_df
Out[44]:
In [53]:
span_country_costs = {}
for category in categories:
span_country_costs[category] = costs_of(long_aid, category, 'country_name', 'constant_amount')
span_country_costs_df = pd.DataFrame.from_dict(span_country_costs)
span_country_costs_df.index.name = 'Country'
span_country_costs_df = span_country_costs_df.reset_index(level=0)
span_regions = {}
for index,row in long_aid.iterrows():
span_region = row['region_name']
span_country = row['country_name']
span_regions[span_country] = span_region
span_country_costs_df['Region'] = span_country_costs_df['Country']
span_country_costs_df['Region'] = span_country_costs_df['Region'].replace(regions)
In [55]:
def to_int(x):
if np.isnan(x):
return np.nan
else:
return int(x)
span_country_costs_df[categories] = span_country_costs_df[categories].applymap(to_int)
span_country_costs_df
Out[55]:
In [72]:
cats = ['Agriculture',
'Commodity Assistance',
'Economic Growth',
'Education',
'Governance',
'Health and Population',
'Humanitarian',
'Infrastructure',
'Other']
def totals_for_cats(df):
spending = {}
for index, row in df.iterrows():
cat = row['dac_category_name']
code = row['country_code']
if cat in cats and code != 'WLD' and code != '':
name = row['country_name']
amount = row['constant_amount']
if amount >= 0:
if code in spending:
spending[code]['2014 ' + cat] += amount
else:
spending[code] = {'name': name}
for c in cats:
spending[code]['2014 ' + c] = 0
spending[code]['2014 ' + cat] = amount
return spending
In [73]:
aid = aid.dropna(axis=0, subset=['country_code'])
totalsEachCat = totals_for_cats(aid)
In [75]:
totalsdf = pd.DataFrame.from_dict(totalsEachCat, orient='index')
totalsdf.to_csv('data/aid_FY2014_condensed.csv', encoding='utf-8')
In [47]:
span_country_costs_df.columns = ['2000-2014 ' + col for col in span_country_costs_df.columns]
In [48]:
span_country_costs_df = span_country_costs_df.drop('2000-2014 Region', 1)
span_country_costs_df = span_country_costs_df.rename( columns={'2000-2014 Country': 'Country'})
In [49]:
span_country_costs_df
Out[49]:
In [24]:
def plot_costs(costs, x, y, xlabel, ylabel):
plt.figure(figsize=(12, 26))
cost_plot = sns.barplot(data=costs.sort(x, ascending=False), x=x, y=y)
cost_plot.set(xlabel=xlabel, ylabel=ylabel)
sns.despine(left=True, bottom=True)
plot_costs(totals_df, 'Cost', 'Category', 'Total Money Spent in 2014', 'US Foreign Expenditure Category')
In [25]:
hdr = pd.read_excel('data/HDR.xlsx')
hdr
Out[25]:
In [26]:
populations = pd.read_csv('data/2014_populations.csv')
foreign_aid = pd.merge(hdr,country_costs_df, on='Country', how='outer')
foreign_aid = pd.merge(foreign_aid,span_country_costs_df, on='Country', how='outer')
foreign_aid = foreign_aid.rename(columns={'Region_y': 'Region'})
for category in ['2014 ' + cat for cat in categories]:
foreign_aid[category] = foreign_aid[category].fillna(0)
for category in ['2000-2014 ' + cat for cat in categories]:
foreign_aid[category] = foreign_aid[category].fillna(0)
foreign_aid = pd.merge(foreign_aid, populations, on='Country', how='outer')
foreign_aid = foreign_aid.dropna(subset=['2013 Life Expectancy'])
foreign_aid.ix[foreign_aid.Country==u"C\xf4te d'Ivoire", 'Country'] = "Ivory Coast"
In [27]:
europe = 'Europe and Eurasia'
asia = 'East Asia and Oceania'
n_america = 'North America'
s_america = 'South and Central America'
middle_east = 'Middle East and North Africa'
s_africa = 'Sub-Saharan Africa'
regions = dict(zip(foreign_aid.Country, foreign_aid.Region))
In [28]:
for country in regions:
if regions[country] == 'Europe and Eurasia':
regions[country] = europe
if regions[country] == 'East Asia and Oceania':
regions[country] = asia
if regions[country] == 'Western Hemisphere':
regions[country] = s_america
regions['Australia'] = asia
regions['New Zealand'] = asia
regions['Korea'] = asia
regions['China'] = asia
regions['Austria'] = europe
regions['Belgium'] = europe
regions['Bolivia'] = s_america
regions['Burma'] = asia
regions['Ceylon'] = asia
regions['Czechoslovakia'] = europe
regions['Dominican Rep'] = s_america
regions['France'] = europe
regions['Iran'] = middle_east
regions['Italy'] = europe
regions['Laos'] = asia
regions['Malaya'] = asia
regions['Portugal'] = europe
regions['Saudi Arabia'] = middle_east
regions['Spain'] = europe
regions['Syria'] = middle_east
regions['South Africa'] = s_africa
regions['United states'] = n_america
regions['Canada'] = n_america
regions['Venezuala'] = s_america
regions['Germany'] = europe
regions['Yugoslavia'] = europe
regions['Andorra'] = europe
regions['Antigua and Barbuda'] = s_america
regions['Bahamas'] = s_america
regions['Bolivia (Plurinational State of)'] = s_america
regions['Brunei Darussalam'] = asia
regions['Cape Verde'] = s_africa
regions['Congo'] = s_africa
regions['Congo (Democratic Republic of the)'] = s_africa
regions['Ivory Coast'] = s_africa
regions['Denmark'] = europe
regions['Dominica'] = s_america
regions['Equatorial Guinea'] = s_africa
regions['Eritrea'] = middle_east
regions['Finland'] = europe
regions['Grenada'] = s_america
regions['Guinea-Bissau'] = s_africa
regions['Hong Kong, China (SAR)'] = asia
regions['Iceland'] = europe
regions['Iran (Islamic Republic of)'] = middle_east
regions['Kiribati'] = asia
regions['Korea (Republic of)'] = asia
regions['Kuwait'] = middle_east
regions['Lao People\'s Democratic Republic'] = asia
regions['Latvia'] = europe
regions['Liechtenstein'] = europe
regions['Lithuania'] = europe
regions['Luxembourg'] = europe
regions['Micronesia (Federated States of)'] = asia
regions['Moldova (Republic of)'] = europe
regions['Myanmar'] = middle_east
regions['Netherlands'] = europe
regions['Norway'] = europe
regions['Palestine, State of'] = middle_east
regions['Poland'] = europe
regions['Qatar'] = middle_east
regions['Russian Federation'] = europe
regions['Saint Kitts and Nevis'] = s_america
regions['Saint Lucia'] = s_america
regions['Saint Vincent and the Grenadines'] = s_america
regions['Singapore'] = asia
regions['Slovakia'] = europe
regions['Slovenia'] = europe
regions['Suriname'] = s_america
regions['Sweden'] = europe
regions['Syrian Arab Republic'] = middle_east
regions['Tanzania (United Republic of)'] = s_africa
regions['The former Yugoslav Republic of Macedonia'] = europe
regions['United Arab Emirates'] = middle_east
regions['United Kingdom'] = europe
regions['United States'] = n_america
regions['Canada'] = n_america
regions['Venezuela (Bolivarian Republic of)'] = s_america
regions['Viet Nam'] = asia
def map_getter(someMap, column):
def getter(x):
key = x[column]
if key in someMap:
return (someMap[key])
else:
return None
return getter
foreign_aid = foreign_aid
foreign_aid['Region'] = foreign_aid.apply(map_getter(regions, 0), axis=1)
In [29]:
countries = foreign_aid[pd.isnull(foreign_aid['Region'])]['Country'].unique()
for country in countries:
print country
In [30]:
foreign_aid['Region'].unique()
Out[30]:
In [31]:
foreign_aid.to_csv('data/merged_aid.csv', index=False)
In [122]:
#countries of the world with coordinates data
cowcoords = pd.read_csv('data/cow_cleaned.csv', encoding='utf-8')
In [129]:
def trimStr(x):
if type(x) is unicode:
return x.strip()
else:
return x
cowcoords = cowcoords.applymap(trimStr)
cowcoords.iloc[0]['Country']
Out[129]:
In [131]:
newmerged = foreign_aid.merge(cowcoords, how='outer', on='Country')
Out[131]:
In [134]:
newmerged.to_csv('data/merged_with_coordinates.csv', encoding='utf-8')
In [114]:
raw_data = {
'subject_id': ['1', '2', '3', '4', '5'],
'first_name': ['Alex ', 'Amy ', 'Allen ', 'Alice ', 'Ayoung '],
'last_name': ['Anderson', 'Ackerman', 'Ali', 'Aoni', 'Atiches']}
df_a = pd.DataFrame(raw_data, columns = ['subject_id', 'first_name', 'last_name'])
df_a
Out[114]:
In [117]:
def addStar(x):
if type(x) is str:
return x.strip()+'*'
else:
return x
df_a.applymap(addStar)
Out[117]:
In [96]:
raw_data = {
'subject_id': ['4', '5', '6', '7', '8'],
'first_name': ['Billy', 'Brian', 'Bran', 'Bryce', 'Betty'],
'last_name': ['Bonder', 'Black', 'Balwner', 'Brice', 'Btisan']}
df_b = pd.DataFrame(raw_data, columns = ['subject_id', 'first_name', 'last_name'])
df_b
Out[96]:
In [98]:
raw_data = {
'subject_id': ['1', '2', '3', '4', '5', '7', '8', '9', '10', '11'],
'test_id': [51, 15, 15, 61, 16, 14, 15, 1, 61, 16]}
df_n = pd.DataFrame(raw_data, columns = ['subject_id','test_id'])
df_new = pd.concat([df_a, df_b])
pd.merge(df_new, df_n, on='subject_id')
Out[98]:
In [ ]:
sns.jointplot(kind='reg',x="2014 Economic Growth", y="2013 GNI per Capita", data=foreign_aid)
In [ ]:
sns.jointplot(kind='reg', x="2014 Education", y="2013 Mean Years of Schooling", data=foreign_aid)
In [ ]:
sns.jointplot(kind='reg',x="2014 Health and Population", y="2013 Life Expectancy", data=foreign_aid)