In [1]:
import pandas as pd
import pysal as ps
import numpy as np
from jenks import jenks
import pandas as pd
import geopandas as gpd
#taken from https://github.com/perrygeo/jenks
'''
Arguments
---------
dbfile : DBF file - Input to be imported
upper : Condition - If true, make column heads upper case
'''
def dbf2DF(dbfile, upper=True): #Reads in DBF files and returns Pandas DF
db = ps.open(dbfile) #Pysal to open DBF
d = {col: db.by_col(col) for col in db.header} #Convert dbf to dictionary
pandasDF = pd.DataFrame(db[:]) #Convert to Pandas DF
pandasDF.columns=db.header
pandasDF.dropna(inplace=True)
return pandasDF
def getPercent(dataset,string):
'''
Arguments
---------
dataset : pandas datafra- Input to produce the % column on
string : String or list of strings - variable or variables to convert to %
'''
#remove rows with zeros in total column
mask = (dataset.iloc[:,dataset.shape[1]-1] == 0)
dataset = dataset.loc[~mask,:]
variable = np.zeros(dataset.shape[0])
#if me need to aggregate more columns in the same category:
if len(string) > 1:
variable = dataset.loc[:,string].sum(axis=1)
else:
variable = dataset[string].iloc[:,0]
newVariableName = string[0].split('_')[0]
dataset[newVariableName + '_p'] = variable/dataset.iloc[:,dataset.shape[1]-1] * 100
dataset[newVariableName + '_p'] = dataset[newVariableName + '_p'].map(lambda x: round(x,1))
dataset = dataset.loc[:,['REDCODE',newVariableName + '_p']]
dataset.dropna(inplace=True)
try:
dataset[newVariableName + '_q'] = pd.qcut(dataset[newVariableName + '_p'], 10,labels=False)
dataset[newVariableName + '_q'] = dataset[newVariableName + '_q'].map(lambda x: int(x))
except ValueError:
print 'Can not be cut in 10 quantiles. All values are set to 0'
dataset[newVariableName + '_q'] = 0
bins = jenks(list(dataset[newVariableName + '_p']), 10)
dataset[newVariableName + '_j'] = pd.cut(dataset[newVariableName + '_p'],bins,labels = range(10))
dataset[newVariableName + '_j'].fillna(0,inplace=True) #NaNs are because 0 percent gets out of jenks. This belong to the first jenks group
return dataset
In [2]:
#radios de caba y pba sacados de cartografia oficial de indec
mapa = gpd.GeoDataFrame.from_file('data/shapes/agba/agba.shp')
print mapa.shape
mapa.head()
Out[2]:
In [3]:
educ = dbf2DF('data/redatam/MNI.DBF')
educ.head()
Out[3]:
In [4]:
#8 es universitario completo
educ = getPercent(dataset = educ,string = ['MNI_8'])
educ.head()
Out[4]:
In [5]:
print educ.REDCODE.isin(mapa.REDCODE).sum()*1./educ.shape[0]
print mapa.REDCODE.isin(educ.REDCODE).sum()*1./mapa.shape[0]
mapa = mapa.merge(educ,on='REDCODE')
In [6]:
cell = dbf2DF('data/redatam/CELULAR.DBF')
cell.head()
Out[6]:
In [7]:
cell = getPercent(dataset = cell,string = ['H2819C_1'])
cell.head()
Out[7]:
In [8]:
print cell.REDCODE.isin(mapa.REDCODE).sum()*1./cell.shape[0]
print mapa.REDCODE.isin(cell.REDCODE).sum()*1./mapa.shape[0]
mapa = mapa.merge(cell,on='REDCODE')
In [9]:
comp = dbf2DF('data/redatam/COMPUTER.DBF')
comp.head()
Out[9]:
In [10]:
comp = getPercent(dataset = comp,string = ['H2819B_1'])
comp.head()
Out[10]:
In [11]:
print comp.REDCODE.isin(mapa.REDCODE).sum()*1./comp.shape[0]
print mapa.REDCODE.isin(comp.REDCODE).sum()*1./mapa.shape[0]
mapa = mapa.merge(comp,on='REDCODE')
In [12]:
empty = dbf2DF('data/redatam/EMPTY.DBF')
empty.head()
Out[12]:
In [13]:
empty = getPercent(dataset = empty,string = ['V02_3','V02_4','V02_5','V02_6','V02_7'])
empty.head()
Out[13]:
In [14]:
print empty.REDCODE.isin(mapa.REDCODE).sum()*1./empty.shape[0]
print mapa.REDCODE.isin(empty.REDCODE).sum()*1./mapa.shape[0]
mapa = mapa.merge(empty,on='REDCODE')
In [15]:
inmi = dbf2DF('data/redatam/INMIGRATION.DBF')
inmi.head()
Out[15]:
In [16]:
inmi = getPercent(dataset = inmi,string = ['P05_1'])
inmi.head()
Out[16]:
In [17]:
print inmi.REDCODE.isin(mapa.REDCODE).sum()*1./inmi.shape[0]
print mapa.REDCODE.isin(inmi.REDCODE).sum()*1./mapa.shape[0]
mapa = mapa.merge(inmi,on='REDCODE')
In [18]:
legalOrig = dbf2DF('data/redatam/LEGAL.DBF')
legalOrig.head()
Out[18]:
In [19]:
legal = getPercent(dataset = legalOrig,string = ['PROP_1','PROP_3'])
legal.head()
Out[19]:
In [20]:
legal.columns = ['REDCODE','TENREG_p','TENREG_q','TENREG_j']
In [21]:
print legal.REDCODE.isin(mapa.REDCODE).sum()*1./legal.shape[0]
print mapa.REDCODE.isin(legal.REDCODE).sum()*1./mapa.shape[0]
mapa = mapa.merge(legal,on='REDCODE')
In [22]:
owner = getPercent(dataset = legalOrig,string = ['PROP_1'])
owner.head()
Out[22]:
In [23]:
print owner.REDCODE.isin(mapa.REDCODE).sum()*1./owner.shape[0]
print mapa.REDCODE.isin(owner.REDCODE).sum()*1./mapa.shape[0]
mapa = mapa.merge(owner,on='REDCODE')
In [24]:
rent = getPercent(dataset = legalOrig,string = ['PROP_3'])
rent.head()
Out[24]:
In [25]:
rent.columns = ['REDCODE','INQ_p','INQ_q','INQ_j']
In [26]:
print rent.REDCODE.isin(mapa.REDCODE).sum()*1./rent.shape[0]
print mapa.REDCODE.isin(rent.REDCODE).sum()*1./mapa.shape[0]
mapa = mapa.merge(rent,on='REDCODE')
In [27]:
mapa.head()
Out[27]:
In [30]:
mapa.to_file('data/shapes/agbaData')
In [ ]: