In [3]:
import pandas as pd, json, numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
In [4]:
cluster=json.loads(file('../json/cluster.json','r').read())
citysave=json.loads(file('../json/citysave.json','r').read())
In [5]:
import wolframalpha
app_id='A4TU9P-E3QAGQJL39'
#dcsala@masdar.ac.ae
client = wolframalpha.Client(app_id)
In [6]:
unicities={}
for i in cluster:
if cluster[i] not in unicities:
unicities[cluster[i]]=citysave[i]['country']
In [7]:
G={}
error=[]
In [8]:
uk=unicities.keys()
for h in range(900,1800):
c=uk[h]
if c not in G.keys()+error:
print h,
ys={"pop":0,"nearby":{}}
q='population of '+c+', '+unicities[c]
try:
res = client.query(q)
for i in range(len(res['pod'])):
if res['pod'][i]['@title']=="Result":
x=res['pod'][i]['subpod']['plaintext']
popul=x[:x.find('people')-1]
if 'mill' in popul:
popul=float(popul[:popul.find('mill')-1])*1000000.0
ys['pop']=int(popul)
if res['pod'][i]['@title']=="Nearby cities":
x=res['pod'][i]['subpod']['plaintext'].split('\n')
for y in x[:-1]:
people=y[y.rfind('|')+2:y.find('people')-1]
if 'mill' in people:
people=float(people[:people.find('mill')-1])*1000000.0
km=float(y[y.find('|')+2:y.find('km')-1])
ys['nearby'][y.split('|')[0].split(',')[0].strip()]={"km":km,"people":int(people)}
G[c]=ys
except: error.append(c)
In [13]:
file("../json/pop2.json",'w').write(json.dumps(G))
file("../json/pop2e.json",'w').write(json.dumps(error))
Postprocessing
In [ ]:
G=json.loads(file('../json/pop2.json','r').read())
error=json.loads(file('../json/pop2e.json','r').read())
In [14]:
error2=[]
In [15]:
import unicodedata
def strip_accents(s):
return ''.join(c for c in unicodedata.normalize('NFD', s)
if unicodedata.category(c) != 'Mn')
def remove_accents(input_str):
nfkd_form = unicodedata.normalize('NFKD', input_str)
only_ascii = nfkd_form.encode('ASCII', 'ignore')
return only_ascii
In [ ]:
for c in error:
if c not in G.keys()+error2:
ys={"pop":0,"nearby":{}}
q=remove_accents(strip_accents('population of '+c.split('/')[0]+', '+unicities[c]))
res = client.query(q)
if 'pod' in res:
for i in range(len(res['pod'])):
if res['pod'][i]['@title']=="Result":
x=res['pod'][i]['subpod']['plaintext']
if 'available' not in x:
popul=x[:x.find('people')-1]
if 'mill' in popul:
popul=popul[:popul.find('mill')-1]
if '|' in popul:popul=popul.split('|')[1].strip()
ys['pop']=int(float(popul)*1000000.0)
if res['pod'][i]['@title']=="Nearby cities":
x=res['pod'][i]['subpod']['plaintext'].split('\n')
if 'available' not in x:
for y in x[:-1]:
people=y[y.rfind('|')+2:y.find('people')-1]
if 'mill' in people:
people=float(people[:people.find('mill')-1])*1000000.0
km=float(y[y.find('|')+2:y.find(' km ')])
ys['nearby'][y.split('|')[0].split(',')[0].strip()]={"km":km,"people":int(people)}
G[c]=ys
print 'success',c
else:
print 'error',c
error2.append(c)
In [19]:
file("../json/pop2b.json",'w').write(json.dumps(G))
file("../json/pop2eb.json",'w').write(json.dumps(error2))
print len(G),len(error),len(error2)
In [25]:
unicities[u'Cotonou']
Out[25]:
In [21]:
uk=unicities.keys()
for h in range(900,1800):
c=uk[h]
if c not in G:
print c