In [6]:
# datasource: https://data.humdata.org/dataset/unhcr-refugee-pop-stats/resource/fbacbba3-1b20-4331-931b-6a21a4cb80f5
# dataset is labeled "Group of concern to UNHCR"
In [1]:
import pandas as pd
import matplotlib.pyplot as plt
% matplotlib inline
In [2]:
df = pd.read_csv('refugee_data.csv')
In [3]:
df.head(20)
Out[3]:
In [4]:
df['Population type'].unique()
Out[4]:
In [5]:
df.columns
Out[5]:
In [6]:
recent = df[['Country', 'Origin_Returned_from', 'Population type','2013']]
def population_type_count(a):
a = recent[recent['Population type'] == a]
a.groupby('Country')['2013'].sum()
a_table = pd.DataFrame(a.groupby('Country')['2013'].sum())
return a_table.sort_values(by='2013', ascending = False).head(20)
In [7]:
population_type_count('Refugees')
Out[7]:
In [8]:
population_type_count('Asylum seekers')
Out[8]:
In [9]:
population_type_count('Internally displaced')
Out[9]:
In [10]:
population_type_count('Stateless')
Out[10]:
In [11]:
population_type_count('Returned IDPs')
Out[11]:
In [12]:
population_type_count('Returned refugees')
Out[12]:
In [13]:
recent.columns
Out[13]:
In [14]:
returned_refugees = recent[recent['Population type'] == 'Returned refugees']
returned_refugees.groupby('Origin_Returned_from')['2013'].sum()
returned_table = pd.DataFrame(returned_refugees.groupby('Origin_Returned_from')['2013'].sum())
returned_table.sort_values(by='2013', ascending = False).head(10)
Out[14]:
In [15]:
asylum_seekers = recent[recent['Population type'] == 'Asylum seekers']
seekers_table = pd.DataFrame(asylum_seekers.groupby('Origin_Returned_from')['2013'].sum())
seekers_table.sort_values(by='2013', ascending = False).head(10)
Out[15]:
In [16]:
df.columns
Out[16]:
In [33]:
years_available=['2000', '2001','2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013']
pop_types=['Asylum seekers', 'Refugees', 'Internally displaced']
totals_dict_list=[]
for poptype in pop_types:
poptype_dictionary ={}
for year in years_available:
poptype_only = df[(df['Population type'] == poptype) & (df[year].notnull())]
poptype_per_year = poptype_only[year].sum()
print(year, 'there were in total', poptype_per_year, poptype)
poptype_dictionary[year] = poptype_per_year
totals_dict_list.append(poptype_dictionary)
totals_dict_list
Out[33]:
In [34]:
asylum_over_time = totals_dict_list[0]
asylums_table = pd.DataFrame(asylum_over_time, index=['Total asylum seekers per year'])
asylums_table
#asylums_table.plot(kind='bar')
Out[34]:
In [35]:
refugees_over_time = totals_dict_list[1]
refugees_table = pd.DataFrame(refugees_over_time, index=['Total refugees per year'])
refugees_table
Out[35]:
In [36]:
idp_over_time = totals_dict_list[2]
idps_table = pd.DataFrame(idp_over_time, index=['Total IDPs per year'])
idps_table
Out[36]:
In [65]:
#CONCATENATE DID NOT WORK, BUT KEEPING THIS NOTES AS MEMORY
#asylums_table.plot(kind='bar')
#asylum_over_time = totals_dict_list[0]
#asylums_table = pd.DataFrame(asylum_over_time, index=['Total asylum seekers per year'])
#asylums_table
#pd.concat(totals_dict_list[0], axis=0, join='outer', join_axes=None, ignore_index=False, keys=None, levels=None, names=None, verify_integrity=False, copy=True)
#http://stackoverflow.com/questions/31974548/take-a-row-from-one-dataframe-and-insert-into-first-row-of-another-dataframe-in
#A-> dataframe with (v,w,x,y,z) columns ( Some values)
#b -> dataframe with (v,w,x,y,z) columns ( All values)
#b = pd.concat([A[A.v==1],b])
#asylums_table = pd.concat([idps_table[idps_table.v==1],asylums_table])
In [50]:
two_table = asylums_table.append(refugees_table)
two_table
Out[50]:
In [52]:
totals_table = two_table.append(idps_table)
totals_table
# http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.append.html
Out[52]:
In [71]:
#totals_table.plot()
plt.style.use("ggplot")
totals_table2 = totals_table.T
totals_table2.plot(figsize=(10,7), ylim=(0,25000000), linewidth=3, y)
#http://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.plot
Out[71]:
In [ ]: