In [1]:
#szekelyland workbook - extrapolating from romanian values using mortality statistics from INSSE
import pandas as pd, numpy as np
In [2]:
df=pd.read_csv('trexportPivot_POP206C.csv')
de=pd.read_csv('trexportPivot_POP206E.csv')
In [3]:
de.columns
Out[3]:
In [4]:
df=df.drop(u' UM: Numar persoane',axis=1).set_index([u' Ani',u'Clasificarea internationala a maladiilor - Revizia a X a 1994',u' Macroregiuni regiuni de dezvoltare si judete'])
de=de.drop(u' UM: Numar persoane',axis=1).set_index([u' Ani',u'Sexe',u' Grupe de varsta ',u' Macroregiuni regiuni de dezvoltare si judete'])
In [5]:
de.head()
Out[5]:
In [6]:
for i in df.index.levels[0].unique():
print i
In [7]:
for i in df.index.levels[1].unique():
print i
In [8]:
d={
"Alte cauze":1094,
"Boli ale aparatului circulator":1064,
"Boli ale aparatului digestiv":1078,
"Boli ale aparatului genito-urinar":1084,
"Boli ale aparatului respirator":1072,
"Boli ale sistemului nervos boli ale ochiului si anexele sale boli ale urechii si apofizei mastoide":9,
"Boli endocrine de nutritie si metabolism":1051,
"Boli infectioase si parazitare":1001,
"Leziuni traumatice otraviri si alte consecinte ale cauzelor externe":1095,
"Malformatii congenitale deformatii si anomalii cromozomiale":1093,
"Sarcina nastere si lauzie":1087,
"Total":1000,
"Tulburari mentale si de comportament":1055,
"Tumori":1026,
"Unele afectiuni a caror origine se situeaza in perioada perinatala":1092,
"din care: Boala ischemica a inimii":0,
"din care: Boli cerebro-vasculare":0,
"din care: Diabet zaharat":0,
"din care: Tuberculoza":0}
n0=[1048,1082,1083]
n9=[1058,1062,1063]
In [9]:
#Transylvanian counties
#['HARGHITA','MURES','COVASNA','CLUJ','BRASOV','ARAD','ALBA','SALAJ','TIMIS','BISTRITA-NASAUD','MARAMURES','BIHOR','HUNEDOARA','CARAS-SEVERIN','SATU MARE','SIBIU']
In [10]:
df.index.levels[1].unique()
Out[10]:
In [11]:
#calculate county share in romania total
pop={}
for j in df.index.levels[0].unique():
if j[6:] not in pop:pop[j[6:]]={}
for k in df.index.levels[1].unique():
try:
if d[k]!=0:
if d[k] not in pop[j[6:]]:pop[j[6:]][str(d[k])]={}
pop[j[6:]][str(d[k])]["ro"]=df.loc[j].loc[k].loc[' TOTAL'][0]
try: a=df.loc[j].loc[k].loc[' Regiunea CENTRU'][0]
except: a=0
try: b=df.loc[j].loc[k].loc[' Regiunea VEST'][0]
except: b=0
try: c=df.loc[j].loc[k].loc[' Regiunea NORD-VEST'][0]
except: c=0
pop[j[6:]][str(d[k])]["szf"]=(a+b+c)*1.0/df.loc[j].loc[k].loc[' TOTAL'][0]
except: pass
In [12]:
for y in pop:
for c in n9:
if str(c) not in pop[y]:pop[y][str(c)]={}
for m in ["szf","ro"]:
try: pop[y][str(c)][m]=pop[y]['9'][m]/3.0
except: pass
pop[y].pop('9');
for c in n0:
if str(c) not in pop[y]:pop[y][str(c)]={}
for m in ["szf","ro"]:
try: pop[y][str(c)][m]=pop[y]['1000'][m]
except: pass
In [134]:
#calculate age deviation from national average
e={"Feminin":"f","Masculin":"m","Total":"s"}
pop2={}
pop3={}
for j in de.index.levels[0].unique():
if j[6:] not in pop2:pop2[j[6:]]={}
if j[6:] not in pop3:pop3[j[6:]]={}
for k in de.index.levels[1].unique():
if e[k] not in pop2[j[6:]]:pop2[j[6:]][e[k]]={}
if e[k] not in pop3[j[6:]]:pop3[j[6:]][e[k]]={}
for l in de.index.levels[2].unique():
age=l[:3].strip().strip('-')
try: a=de.loc[j].loc[k].loc[l].loc[' Regiunea CENTRU'][0]
except: a=0
try: b=de.loc[j].loc[k].loc[l].loc[' Regiunea VEST'][0]
except: b=0
try: c=de.loc[j].loc[k].loc[l].loc[' Regiunea NORD-VEST'][0]
except: c=0
try: d=de.loc[j].loc[k].loc[l].loc[' TOTAL'][0]
except: d=0
if age!='To':
if age!='0':
if age!='85':
pop2[j[6:]][e[k]][age]=(a+b+c)*1.0
pop3[j[6:]][e[k]][age]=(d)*1.0
else:
pop2[j[6:]][e[k]]['90']=(a+b+c)*3.0/10
pop2[j[6:]][e[k]]['95']=(a+b+c)*1.0/10
pop2[j[6:]][e[k]]['85']=(a+b+c)*6.0/10
pop3[j[6:]][e[k]]['90']=(d)*3.0/10
pop3[j[6:]][e[k]]['95']=(d)*1.0/10
pop3[j[6:]][e[k]]['85']=(d)*6.0/10
else:
pop2[j[6:]][e[k]]['0']=(a+b+c)*1.0/5
pop2[j[6:]][e[k]]['1']=(a+b+c)*1.0/5
pop2[j[6:]][e[k]]['2']=(a+b+c)*1.0/5
pop2[j[6:]][e[k]]['3']=(a+b+c)*1.0/5
pop2[j[6:]][e[k]]['4']=(a+b+c)*1.0/5
pop3[j[6:]][e[k]]['0']=(d)*1.0/5
pop3[j[6:]][e[k]]['1']=(d)*1.0/5
pop3[j[6:]][e[k]]['2']=(d)*1.0/5
pop3[j[6:]][e[k]]['3']=(d)*1.0/5
pop3[j[6:]][e[k]]['4']=(d)*1.0/5
In [135]:
for i in pop2:
for j in ["f","m"]:
for a in pop2[i][j]:
try:
pop2[i][j][a]=pop2[i][j][a]*1.0/sum(pop2[i]["s"].values())
pop3[i][j][a]=pop3[i][j][a]*1.0/sum(pop3[i]["s"].values())
except:pass
In [136]:
import zipfile,json
#read RO data
z = zipfile.ZipFile('db2/642.zip')
ro = json.loads(z.open('data.json').read())
In [137]:
h=json.loads(file('hierarchy2.json').read())
In [138]:
szf=[]
for i in ro:
szf.append({"a":i["a"],
"c":i["c"],
"g":i["g"],
"t":i["t"],
"s":i["s"]*1.0*float(pop[str(i["t"])][h[i['c']]["group"]]["szf"])})
In [139]:
szf2=[]
for i in ro:
szf2.append({"a":i["a"],
"c":i["c"],
"g":i["g"],
"t":i["t"],
"s":i["s"]*1.0*float(pop[str(i["t"])][h[i['c']]["group"]]["szf"])
*float(pop2[str(i["t"])][i["g"]][str(i["a"])])
/float(pop3[str(i["t"])][i["g"]][str(i["a"])])
})
In [140]:
#save files
try:
import zlib
compression = zipfile.ZIP_DEFLATED
except:
compression = zipfile.ZIP_STORED
file('db2/data.json','w').write(json.dumps(szf))
zf = zipfile.ZipFile('db2/9997.zip', mode='w')
zf.write('db2/data.json','data.json',compress_type=compression)
zf.close()
file('db2/data.json','w').write(json.dumps(szf2))
zf = zipfile.ZipFile('db2/9996.zip', mode='w')
zf.write('db2/data.json','data.json',compress_type=compression)
zf.close()
In [18]:
#update dictionaries
In [111]:
#run only once
c=json.loads(file('countries.json').read())
c=[u'9996']+c
file('countries.json','w').write(json.dumps(c))
In [63]:
c=json.loads(file('cnames.json').read())
c[u'9996']=u'Transylvania'
file('cnames.json','w').write(json.dumps(c))
In [64]:
c=json.loads(file('hnames.json').read())
c[u'Transylvania']=u'Erdély'
file('hnames.json','w').write(json.dumps(c))
In [22]:
#update population
In [82]:
p=json.loads(file('pop.json').read())
In [83]:
x=pd.read_csv('trexportPivot_POP107A.csv')
In [84]:
x=x.drop([u' Medii de rezidenta',u' UM: Numar persoane'],axis=1).set_index([u'Varste si grupe de varsta',u' Sexe',u' Ani',u' Macroregiuni regiuni de dezvoltare si judete'])
In [85]:
x=x.unstack(u' Macroregiuni regiuni de dezvoltare si judete')
In [86]:
x=pd.DataFrame(x.T.sum())
In [87]:
indice=[str(i)+' ani' for i in range(5)]+[str(i*5)+'-'+str((i+1)*5-1)+' ani' for i in range(1,17)]
indice[5]='5- 9 ani'
In [101]:
p['9997']={}
p['9996']={}
gg={"f":u" Feminin","m":u" Masculin"}
for y in range(1999,2013):
if str(y) not in p['9997']:p['9997'][str(y)]={}
for g in ["f","m"]:
if g not in p['9997'][str(y)]:p['9997'][str(y)][g]={}
for i in indice:
p['9997'][str(y)][g][str(i[:2].strip().strip('-'))]=str(float(x.loc[i].loc[gg[g]].loc[' Anul '+str(y)][0]))
p['9997'][str(y)][g][str(85)]=str(float(x.loc['85 ani si peste'].loc[gg[g]].loc[' Anul '+str(y)][0])*6.0/10)
p['9997'][str(y)][g][str(90)]=str(float(x.loc['85 ani si peste'].loc[gg[g]].loc[' Anul '+str(y)][0])*3.0/10)
p['9997'][str(y)][g][str(95)]=str(float(x.loc['85 ani si peste'].loc[gg[g]].loc[' Anul '+str(y)][0])*1.0/10)
In [102]:
p['9996']=p['9997']
In [103]:
#save updated population file
file('pop.json','w').write(json.dumps(p))
In [107]:
p['9996']['2005']['f']
Out[107]: