In [26]:
%matplotlib notebook
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import difflib
import re
#import seaborn as sns
Import Local Authority to NUTS key // Source: http://ec.europa.eu/eurostat/web/nuts/local-administrative-units
In [3]:
key = pd.read_excel('key.xlsx',sheetname='UK', usecols=['NUTS3_13','LAU1_NAT_CODE_NEW'])
key = key.drop_duplicates()
key.columns = ['NUTS3','Area Code']
key = key.set_index('Area Code')
Area Code for South Oxfordshire (Contains Chargrove)
In [4]:
ac = 'E07000179'
Import Gross Value Added // Source: https://www.ons.gov.uk/economy/grossvalueaddedgva/datasets/regionalgrossvalueaddedincomeapproach
In [22]:
gva = pd.read_excel('gva.xls',sheetname='Table 1', skiprows=1)
gva = gva[gva['NUTS level'].str.contains("NUTS3") == True]
gva = gva.set_index('NUTS code')
gva = key.merge(gva, left_on='NUTS3', right_index=True, how='left')
gva.drop(gva.columns[[0,1,2,3,4,5,6,7,8,9,10,11,21]], axis=1, inplace=True)
gva = gva[gva.index.str.contains(ac) == True]
gva = gva.transpose()
gva.columns = ['Gross Value Added']
bs = gva.iloc[0]['Gross Value Added']
gva['GVA % Change'] = ((gva['Gross Value Added'] - bs)/bs) * 100
xi = [2006,2007,2008,2009,2010,2011,2012,2013,2014]
gva = gva.set_index([xi],drop=True)
gva.head()
Out[22]:
In [6]:
healthd = pd.read_excel('health.xlsx',sheetname='District & UA')
healthi = pd.read_excel('health.xlsx',sheetname='Indicator Metadata')
Selected Indicator
In [7]:
Id = '1.01ii'
ind = healthi[healthi['Indicator'].str.contains('{} '.format(Id)) == True]
nm = ind.iloc[0]['Indicator']
unit = ind.iloc[0]['Unit']
#Print out information about selected indicator.
ind
Out[7]:
In [23]:
# Select indicator from main dataset
data = healthd[healthd['Indicator'].str.contains('{} '.format(Id)) == True]
data = data[[1,4,6]]
# Get list of time periods
xx = data[[0]]
xx = xx.drop_duplicates()
xl = xx['Time Period'].tolist()
# Loop to create column for each year
for x in range(len(xl)):
inputs = data[data['Time Period'].str.contains(xl[x]) == True]
inputs = inputs.drop('Time Period', 1)
# Average male and female values
inputs = inputs.groupby(inputs['Area Code']).mean()
inputs.columns = [xl[x]]
if x == 0:
datar = inputs
else:
datar = datar.merge(inputs, left_index=True, right_index=True, how='left')
# Select data for South Oxfordshire
datar = datar[datar.index.str.contains(ac) == True]
datar = datar.transpose()
datar.columns = ['Children LIF']
bl = datar.iloc[0]['Children LIF']
datar['Child LIF % Change'] = ((datar['Children LIF'] - bl)/bl) * -100
datar = datar.set_index([xi],drop=True)
datar
Out[23]:
Merge Data Sets
In [24]:
df = gva.merge(datar, left_index=True, right_index=True, how='outer')
df
Out[24]:
Plot Graph
In [25]:
plt.figure()
plt.plot(df['GVA % Change'], '-o', df['Child LIF % Change'], '-o');
plt.gca().set_title('Percentage Change in GVA & Children in Low Income Families (South Oxfordshire)');
plt.gca().set_ylabel('% Change from 2006 Baseline');
plt.gca().legend(['Gross Value Added','Reduction in Children in Low Income Families']);
In [ ]: