In [32]:
# Use case for NLGIS2 data analysis using data service and pandas library
# (C) Vyacheslav Tykhonov vty@iisg.nl
# International Institute of Social History
# http://socialhistory.org
%matplotlib inline
import urllib2
import simplejson
import json
import sys
import pandas as pd
import random
import vincent
from vincent import Axis, AxisProperties, PropertySet, ValueRef
from pandas.io.json import json_normalize
# Global settings
apiurl = "http://node-128.dev.socialhistoryservices.org/api/data"
amscodecolumn = 'amsterdam_code'
yearcolumn = 'year'
# Default values
varcode = "TXVV"
varyear = "1982"
colors = ['red', 'green', 'orange', 'brown', 'purple', 'blue', 'cyan']
def load_api_data(apiurl, code, year):
amscode = str(code)
jsondataurl = apiurl + "?code=" + str(code) + '&year=' + year
req = urllib2.Request(jsondataurl)
opener = urllib2.build_opener()
f = opener.open(req)
dataframe = simplejson.load(f)
return dataframe
def data2frame(dataframe):
data = dataframe['data']
years = {}
debug = 0
datavalues = {}
for item in data:
amscode = item[amscodecolumn]
year = item[yearcolumn]
datavalues[year] = item
if debug:
print str(amscode) + ' ' + str(year)
print item
for year in datavalues:
values = datavalues[year]
for name in values:
if debug:
print name + ' ' + str(values[name])
return datavalues
data = load_api_data(apiurl, varcode, varyear)
# 'indicator': 'TK', 'code': 'TXCU', 'naam': 'ADORP', 'amsterdam_code': '10996', 'value': 89.0, 'year': 1937, 'id': 1, 'cbsnr': '1'
# Create DataFrame object pf and load data
yeardf, amscodedf, naamdf, valuedf = [],[],[],[]
for amscode in data:
values = data[amscode]
yeardf.append(data[amscode]['year'])
naamdf.append(data[amscode]['naam'])
amscodedf.append(data[amscode]['amsterdam_code'])
valuedf.append(data[amscode]['value'])
df = pd.DataFrame([yeardf,amscodedf,naamdf,valuedf]).T
df.columns = ['year', 'amsterdam_code', 'naam', 'value']
#data
#json_normalize(data[0])
#dataframe = json.loads(data)
#df.read_json(data)
# Exploring dataset to see columns and data
print df.head()
# Copy dataframe to new variable
newframe = df[['year', 'amsterdam_code', 'naam', 'value']]
Now let's calculate total values for each city and show first 20 locations
In [27]:
newframe = df[['amsterdam_code', 'value']][:20]
print newframe
We need some basic color maps to calculate from out data
In [28]:
def colormapslimits(dataframe):
scale = []
frame1 = []
frame2 = []
avg = values.median()
for value in dataframe:
if value <= avg:
frame1.append(value)
else:
frame2.append(value)
avg1 = pd.DataFrame(frame1).median()
avg2 = pd.DataFrame(frame2).median()
return (values.min(), int(avg1), int(avg), int(avg2), values.max())
In [29]:
values = newframe['value'][:20]
dfnames = df['naam'][:20]
codes = [df['amsterdam_code'][:20]]
list_data = []
names = []
for value in values:
list_data.append(value)
for name in dfnames:
names.append(name)
# New dataframe to make chart
thisDF = pd.DataFrame(list_data, names)
colormap = colormapslimits(values)
print names
print list_data
print 'Limits to build color map: ' + str(colormap)
bar = vincent.Bar(thisDF)
bar.axes[0].properties = AxisProperties (
labels=PropertySet(
angle=ValueRef(value=45),
align=ValueRef(value='left')
)
)
vincent.core.initialize_notebook()
bar.axis_titles(x='', y='Value')
bar.display()
The same data on Pie chart
In [30]:
pie = vincent.Pie(thisDF)
pie.colors(brew='Set3')
pie.legend('Locations')
Out[30]:
Create final dataset with amsterdam codes, values and colors
In [31]:
print "Colors for visualization of locations on map"
for value in list_data:
# 42.0, 75, 221, 321, 2331.0
for i in range(len(colormap)):
min, max = colormap[i-1], colormap[i]
if value >= min:
if value < max:
print str(value) + ' ' + colors[i]
To do: visualize dataset on map of Netherlands