In [12]:
import pandas as pd
import matplotlib.pyplot as plt
% matplotlib inline
In [13]:
df = pd.read_csv('water_data_class.csv', encoding='latin-1')
In [14]:
df
Out[14]:
In [15]:
# only countries with more than 25 million inhabitants and those who have values in every column (2002 that one with most non-values)
big_ones = df[df['population'] > 25000000]
notnull = df[df['annual freshwater withdrawl [%] 2002'].notnull()]
selection = df[(df['population'] > 25000000) & (df['annual freshwater withdrawl [%] 2002'].notnull()) & (df['annual freshwater withdrawl [%] 2002'] != 0)]
selection
Out[15]:
In [16]:
selection['total renewable freshwater resources [billion cubic meters] 2014'].describe()
Out[16]:
In [17]:
selection['annual freshwater withdrawl [%] 2014'].describe()
# @ TA values over 100% indicate that on top of renewable resources, water of non-renewable sources is withdrawn,
# as described in the WorldBank metadata for the original datasets
Out[17]:
In [18]:
selection.sort_values('total renewable freshwater resources [billion cubic meters] 2014', ascending = True).head(5)
Out[18]:
In [19]:
selection.sort_values('total renewable freshwater resources [billion cubic meters] 2014', ascending = False).head(5)
Out[19]:
In [20]:
#inserting column with population converted to population in 100,000 inhabitants
population_hundret = selection['population']/100000
selection.insert(3,"population in 100,000 inhabitants", population_hundret)
selection
Out[20]:
In [60]:
# putting water and population of 100,000 inhabitants into relation
water_per_hundret = (selection['total renewable freshwater resources [billion cubic meters] 2014']/population_hundret)*1000
selection.insert(6, "Renewable freshwater resource per 100,000 inhabitants [million cubic meters]", water_per_hundret)
In [61]:
# sorting for countries with most water
selection.sort_values(by='Renewable freshwater resource per 100,000 inhabitants [million cubic meters]', ascending = False).head(5)
Out[61]:
In [62]:
#plotting those with most
plt.style.use('ggplot')
#does not print sorted bars
#selection.sort_values(by='Renewable freshwater resource per 100,000 inhabitants [million cubic meters]', ascending = False).head(5).plot(kind='barh', x='country', y='total renewable freshwater resources [billion cubic meters] 2014', legend=False)
In [63]:
# sorting for countries with least water
selection.sort_values(by='Renewable freshwater resource per 100,000 inhabitants [million cubic meters]').head(5)
Out[63]:
In [64]:
#plotting those with least
selection.sort_values(by='Renewable freshwater resource per 100,000 inhabitants [million cubic meters]').head(5).plot(kind="barh", x ="country", y ="Renewable freshwater resource per 100,000 inhabitants [million cubic meters]", legend=False)
Out[64]:
In [65]:
selection.columns
Out[65]:
In [67]:
withdrawl = selection[['country','annual freshwater withdrawl [%] 2002','annual freshwater withdrawl [%] 2014']]
withdrawl_diff=selection['annual freshwater withdrawl [%] 2014'] - selection['annual freshwater withdrawl [%] 2002']
withdrawl.insert(3, 'Change in withdrawl rate', withdrawl_diff)
withdrawl.head()
Out[67]:
In [70]:
withdrawl.sort_values(by='Change in withdrawl rate').head(5)
Out[70]:
In [71]:
withdrawl.sort_values(by='Change in withdrawl rate').tail(5)
Out[71]:
In [92]:
selection.sort_values(by='population in 100,000 inhabitants', ascending= False).head(10).plot(kind='scatter', x='population in 100,000 inhabitants', y='annual freshwater withdrawl [%] 2014')
#would be nice to put the country's name next to each dot, no idea how to do that unfortunately. Couldn't google useful things
Out[92]:
In [93]:
selection.sort_values(by='population in 100,000 inhabitants').head(10).plot(kind='scatter', x='population in 100,000 inhabitants', y='annual freshwater withdrawl [%] 2014')
#would be nice to put the country's name next to each dot, no idea how to do that unfortunately. Couldn't google useful things
Out[93]:
In [99]:
#those with most water
selection.sort_values(by='total renewable freshwater resources [billion cubic meters] 2014', ascending=False).head(10).plot(kind='scatter', x='population in 100,000 inhabitants', y='annual freshwater withdrawl [%] 2014')
Out[99]:
In [101]:
#those with least water
selection.sort_values(by='total renewable freshwater resources [billion cubic meters] 2014').head(10).plot(kind='scatter', x='population in 100,000 inhabitants', y='annual freshwater withdrawl [%] 2014')
Out[101]:
In [108]:
selection.plot(kind='scatter', x='annual freshwater withdrawl [%] 2014', y='Renewable freshwater resource per 100,000 inhabitants [million cubic meters]', xlim=(0,80), ylim=(0,3500))
Out[108]:
In [ ]: