In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pysal
from pysal.weights.util import get_points_array_from_shapefile
In [ ]:
# Build Census track dataframe reading csv file exported from shapefile
datac = "~/Git/Resolution/SC2010_CEM_RMSP_Income_Race.csv"
dfc = pd.read_csv(datac)
dfc.head()
In [ ]:
# slice income columns and sum each column
income = dfc.loc[:,'DR_005':'DR_014']
income.sum()
In [ ]:
# plot income
% matplotlib inline
sns.set_style("darkgrid")
sns.set_context("talk", font_scale=1, rc={"lines.linewidth": 1.5})
plt.figure(facecolor="white", figsize=(12, 8), dpi=300)
plt.xlabel('Income')
plt.ylabel('Frequency sum')
plt.title('Income intervals')
income.sum().plot(kind='bar')
|DR_005|Up to 1/8 minimum wage
|DR_006|1/8 to 1/4 minimum wage
|DR_007|1/4 to 1/2 minimum wage
|DR_008|1/2 to 1 minimum wage
|DR_009|1 to 2 minimum wages
|DR_010|2 to 3 minimum wages
|DR_011|3 to 5 minimum wages
|DR_012|5 to 10 minimum wages
|DR_013|More than 10 minimum wages
|DR_014|No income
Groups to be defined. Probably: up to ¼ m.w.; ¼ to ½; ½ to 1; 1 to 3, 3 to 5, more than 5.
In [ ]:
# slice race and color columns and sum
race = dfc.loc[:,'P3_001':'P3_006']
race.sum()
In [ ]:
# plot Race and color
% matplotlib inline
sns.set_style("darkgrid")
sns.set_context("talk", font_scale=1, rc={"lines.linewidth": 1.5})
plt.figure(facecolor="white", figsize=(10, 6), dpi=300)
plt.xlabel('Race and Color')
plt.ylabel('Frequency sum')
plt.title('Race and Color intervals')
race.sum().plot(kind='bar')
In [ ]:
# Build weighting area dataframe reading csv file exported from shapefile
dataw = "~/Git/Resolution/AP2010_CEM_RMSP_EGP_EDU.csv"
dfw = pd.read_csv(dataw)
dfw.head()
In [ ]:
# slice education columns and sum each column
education = dfw.loc[:,'EDU1':'EDU5']
education.sum()
In [ ]:
# plot education
% matplotlib inline
sns.set_style("darkgrid")
sns.set_context("talk", font_scale=1, rc={"lines.linewidth": 1.5})
plt.figure(facecolor="white", figsize=(10, 6), dpi=300)
plt.xlabel('Education')
plt.ylabel('Frequency sum')
plt.title('Education intervals')
education.sum().plot(kind='bar')
|EDU1| No education and incomplete elementary school - Sem instrução ou fundamental incompleto
|EDU2| Complete elementary school and incomplete high school - Fundamental completo e médio incompleto
|EDU3| Complete high school and incomplete college - Médio completo e superior incompleto
|EDU4| Complete College/University - Superior Completo
|EDU5| Not determined - Não determinado
In [ ]:
# slice ocupation columns and sum each column
ocupation = dfw.loc[:,'EGP1':'EGP11_']
ocupation.sum()
In [ ]:
# plot ocupation
% matplotlib inline
sns.set_style("darkgrid")
sns.set_context("talk", font_scale=1, rc={"lines.linewidth": 1.5})
plt.figure(facecolor="white", figsize=(12, 8), dpi=300)
plt.xlabel('Ocupation')
plt.ylabel('Frequency sum')
plt.title('Ocupation intervals')
ocupation.sum().plot(kind='bar')
|EGP1| I. Higher professionals
|EGP2| II. Lower professionals
|EGP3| IIIa. Routine non-manuals, higher degree
|EGP4| IIIb. Routine non-manual, lower degree
|EGP5| IVa2. Proprietors and employers
|EGP6| IVc1. Rural employers
|EGP7| IVc2. Self-employed farmers and subsistence agriculture workers
|EGP8| V. Technicians and Supervisors of manual workers
|EGP9| VI. Skilled workers
|EGP10| VIIa. Semi- and unskilled workers
|EGP11| VIIb. Agricultural workers
[SS] Classes EGP6, EGP7 and EGP11 could be grouped in Agricultural workers
In [6]:
# Build Census Ocupation dataframe reading csv file
dataoccup = "~/Dropbox/Resolution - SP London/Data/Census/Original Data/Sao Paulo/occupation_micro_rmsp_2010.csv"
dfoccup = pd.read_csv(dataoccup, na_filter=True, dtype={'V6461':np.str})
dfoccup.head()
Out[6]:
In [7]:
# delete militar occupations from lines
# '0':51086, '110':23, '210':112, '299':122, '411':76, '412':113, '511':21, '512':9, '599':486,'999':219 = 1181 rows
codes = ['110', '210', '299', '411', '412', '511', '512', '599', '999']
for i in codes:
dfoccup = dfoccup.drop(dfoccup[dfoccup.V6461 == i].index)
In [8]:
dfocc = dfoccup.loc[:,("AREAP", "PESO", "V6461")]
dfocc["V6461"] = dfocc["V6461"].map(lambda x: str(x)[0:3])
dfocc.tail()
# dfocc[dfocc.AREAP == 3518800005020]
# dftt["valid"] = dftt["PESO"] != dftt["PESODOM"] #for validation only
# dfocc[dfocc.AREAP == 3503901003001]
# dfocc[(dfocc.AREAP == 3503901003001) & (dfocc.V6461 =='133')]
Out[8]:
In [9]:
# dfocc.groupby(["AREAP", "V6461"])["PESO"].sum()
print("Valid area: ", dfoccup["AREAP"].nunique())
print("Valid occup: ", dfoccup["V6461"].nunique())
print(len(dfoccup.V6461[dfoccup.V6461 == '0']))
print(dfoccup.V6461[dfoccup.V6461 == '110'])
In [10]:
table = pd.pivot_table(dfocc, values='PESO', index=['AREAP'], columns=['V6461'], aggfunc=np.sum, fill_value=0)
dfocc_group = pd.DataFrame(table)
dfocc_group.columns = ['v' + str(col) for col in dfocc_group.columns]
dfocc_group.head()
Out[10]:
In [11]:
# Compute ocupations columns for second level hierarchy
dfocc_group['v11'] = dfocc_group[['v111','v112']].sum(axis=1)
dfocc_group['v12'] = dfocc_group[['v121','v122']].sum(axis=1)
dfocc_group['v13'] = dfocc_group[['v131','v132','v133','v134']].sum(axis=1)
dfocc_group['v14'] = dfocc_group[['v141','v142','v143']].sum(axis=1)
dfocc_group['v21'] = dfocc_group[['v211','v212','v213','v214','v215','v216']].sum(axis=1)
dfocc_group['v22'] = dfocc_group[['v221','v222','v223','v224','v225','v226']].sum(axis=1)
dfocc_group['v23'] = dfocc_group[['v231','v232','v233','v234','v235']].sum(axis=1)
dfocc_group['v24'] = dfocc_group[['v241','v242','v243']].sum(axis=1)
dfocc_group['v25'] = dfocc_group[['v251','v252']].sum(axis=1)
dfocc_group['v26'] = dfocc_group[['v261','v262','v263','v264','v265']].sum(axis=1)
dfocc_group['v31'] = dfocc_group[['v311','v312','v313','v314','v315']].sum(axis=1)
dfocc_group['v32'] = dfocc_group[['v321','v322','v323','v324','v325']].sum(axis=1)
dfocc_group['v33'] = dfocc_group[['v331','v332','v333','v334','v335']].sum(axis=1)
dfocc_group['v34'] = dfocc_group[['v341','v342','v343']].sum(axis=1)
dfocc_group['v35'] = dfocc_group[['v351','v352']].sum(axis=1)
dfocc_group['v41'] = dfocc_group[['v411','v412','v413']].sum(axis=1)
dfocc_group['v42'] = dfocc_group[['v421','v422']].sum(axis=1)
dfocc_group['v43'] = dfocc_group[['v431','v432']].sum(axis=1)
dfocc_group['v44'] = dfocc_group[['v441']].sum(axis=1)
dfocc_group['v51'] = dfocc_group[['v511','v512','v513','v514','v515','v516']].sum(axis=1)
dfocc_group['v52'] = dfocc_group[['v521','v522','v523','v524']].sum(axis=1)
dfocc_group['v53'] = dfocc_group[['v531','v532']].sum(axis=1)
dfocc_group['v54'] = dfocc_group[['v541']].sum(axis=1)
dfocc_group['v61'] = dfocc_group[['v611','v612','v613']].sum(axis=1)
dfocc_group['v62'] = dfocc_group[['v621','v622']].sum(axis=1)
dfocc_group['v71'] = dfocc_group[['v711','v712','v713']].sum(axis=1)
dfocc_group['v72'] = dfocc_group[['v721','v722','v723']].sum(axis=1)
dfocc_group['v73'] = dfocc_group[['v731','v732']].sum(axis=1)
dfocc_group['v74'] = dfocc_group[['v741','v742']].sum(axis=1)
dfocc_group['v75'] = dfocc_group[['v751','v752','v753','v754']].sum(axis=1)
dfocc_group['v81'] = dfocc_group[['v811','v812','v813','v814','v815','v816','v817','v818']].sum(axis=1)
dfocc_group['v82'] = dfocc_group[['v821']].sum(axis=1)
dfocc_group['v83'] = dfocc_group[['v831','v832','v833','v834','v835']].sum(axis=1)
dfocc_group['v91'] = dfocc_group[['v911','v912']].sum(axis=1)
dfocc_group['v92'] = dfocc_group[['v921']].sum(axis=1)
dfocc_group['v93'] = dfocc_group[['v931','v932','v933']].sum(axis=1)
dfocc_group['v94'] = dfocc_group[['v941']].sum(axis=1)
dfocc_group['v95'] = dfocc_group[['v951','v952']].sum(axis=1)
dfocc_group['v96'] = dfocc_group[['v961','v962']].sum(axis=1)
# Compute ocupations columns for first level hierarchy
dfocc_group['v1'] = dfocc_group[['v11','v12','v13','v14']].sum(axis=1)
dfocc_group['v2'] = dfocc_group[['v21','v22','v23','v24','v25','v26']].sum(axis=1)
dfocc_group['v3'] = dfocc_group[['v31','v32','v33','v34','v35']].sum(axis=1)
dfocc_group['v4'] = dfocc_group[['v41','v42','v43','v44']].sum(axis=1)
dfocc_group['v5'] = dfocc_group[['v51','v52','v53','v54']].sum(axis=1)
dfocc_group['v6'] = dfocc_group[['v61','v62']].sum(axis=1)
dfocc_group['v7'] = dfocc_group[['v71','v72','v73','v74','v75']].sum(axis=1)
dfocc_group['v8'] = dfocc_group[['v81','v82','v83']].sum(axis=1)
dfocc_group['v9'] = dfocc_group[['v91','v92','v93','v94','v95','v96']].sum(axis=1)
In [14]:
# Save data with new columns to csv
csv_temp = "~/Downloads/occupation_grouped_rmsp.csv"
dfocc_group.loc[:,'v0':'v9'].to_csv(csv_temp) #save to csv
In [8]:
# Labels level 1
labelsl1 = [
'Diretores Gerentes - v1',
'Prof. Das Ciências/Intelec - v2',
'Técnicos/Prof. Nív. méd - v3',
'Trab De Apoio Administrativo - v4',
'Trab Serv./Vend. Com Mercados - v5',
'Trab quali Agro/Florestais/Caça - v6',
'Trab quali Oper/Art Const/Mecâ - v7',
'Oper Instal/Máquinas/Montadores - v8',
'ocup Elementares - v9']
In [9]:
# plot income
% matplotlib inline
sns.set_style("darkgrid")
sns.set_context("talk", font_scale=0.7, rc={"lines.linewidth": 1.5})
plt.figure(facecolor="white", figsize=(10, 4), dpi=300)
plt.xlabel('Ocupation groups')
plt.title('Ocupation intervals - First level hierarchy ')
plt.ticklabel_format(style='plain', axis='y')
fig1 = dfocc_group.loc[:,"v1":"v9"].sum().plot(kind='bar')
lab1 = fig1.set_xticklabels(labelsl1)
plt.xlabel('Ocupation groups')
Out[9]:
In [10]:
# Labels level 2
labelsl2 = [
'Dir Exe/Adm Púb./Execut Legisla - v11',
'Dirig Administrativos/Comerciais - v12',
'Dirig Geren Produção/Operação - v13',
'Ger Hotéis/Rest/Comércios - v14',
'Prof Ciências/Engenharia - v21',
'Prof Saúde - v22',
'Prof Ensino - v23',
'Esp Org/Adm Pública/Empresa - v24',
'Prof Tec Informação/Comunic - v25',
'Prof Direito/Ciên Socia/Cultura - v26',
'Prof nv méd Ciênc/Engenharia - v31',
'Prof nv méd Saúde/Afins - v32',
'Prof nv méd Oper Financ/Adm - v33',
'Prof nv méd Jurídico/Soc/Cultura - v34',
'Téc nv méd Tec Informaç/Comun - v35',
'Escriturários - v41',
'Trab Atend Direto ao Público - v42',
'Trab cálc numéric/Encar materia - v43',
'Out Trab Apoio Administrativo - v44',
'Trab Serviços Pessoais - v51',
'Vendedores - v52',
'Trab Cuidados Pessoais - v53',
'Trab Serv Proteção/Segurança - v54',
'Agricultores/Trab Agropecuária - v61',
'Trab Florestais qual/Pesca/Caça - v62',
'Trab qual/Oper Const/Eletricistas - v71',
'Trab qual/Oper Metal/Const/Mec - v72',
'Artesãos/Oper Artes Gráficas - v73',
'Trab Esp Eletricidade/Eletrônica - v74',
'Oper Proc Aliment/Madeir/Confec - v75',
'Operador Instal Fixas/Máquinas - v81',
'Montadores - v82',
'Condut Veíc/Opera equip pesado - v83',
'Trab Doméstic/Limpeza Edifício - v91',
'Trab Elem Agro/Pesca/Florest - v92',
'Trab elem Miner/Const/Transp - v93',
'Ajudantes Preparação Alimentos - v94',
'Trab Ambulantes Serviços Afins - v95',
'Coletor Lixo/Out ocup Elemen - v96']
In [11]:
# plot income
% matplotlib inline
sns.set_style("darkgrid")
sns.set_context("talk", font_scale=0.7, rc={"lines.linewidth": 1.5})
plt.figure(facecolor="white", figsize=(12, 6), dpi=300)
plt.xlabel('Ocupation groups')
plt.title('Ocupation intervals - Second level hierarchy ')
plt.ticklabel_format(style='plain', axis='y')
fig1 = dfocc_group.loc[:,"v11":"v96"].sum().plot(kind='bar')
plt.xlabel('Ocupation groups')
lab2 = fig1.set_xticklabels(labelsl2)
In [14]:
labelsl3 = [
'Super Poder Exec/Legislativo - v111',
'Diretores Gerais/Ger Gerais - v112',
'Dir Administração/Serviços - v121',
'Dir Venda/Comerc/Desenvolv - v122',
'Dir Prod Agro/Silvicult/Aq/Pesc - v131',
'Dir Ind Transf/Miner/Const/Dist - v132',
'Dirg Serv Tec Inform/Comunic - v133',
'Dirg Ger Serviços Prof - v134',
'Ger Hotéis/Restaurantes - v141',
'Ger Com Atacadista/Varejistas - v142',
'Outros Ger Serviços - v143',
'Físicos/Químicos/Afins - v211',
'Matemátic/Atuário/Estatístico - v212',
'Prof Ciências Biológicas - v213',
'Eng (Exc Eletrotecnólogos) - v214',
'Eng Eletrotécnicos - v215',
'Arquitet/Urban/Agrimens/Desen - v216',
'Médicos - v221',
'Prof Enfermagem/Partos - v222',
'Prof Medicina Tradic/Alternativa - v223',
'Paramédicos - v224',
'Veterinários - v225',
'Outros Prof Saúde - v226',
'Profe Universidades/Ensino Sup - v231',
'Profe Formação Profissional - v232',
'Profe Ensino méd - v233',
'Profe Ensino Fundam/Pré-Escol - v234',
'Outros Prof Ensino - v235',
'Especialistas Finanças - v241',
'Especialistas Organ Administ - v242',
'Prof Vendas/Comerc/Rel Púb - v243',
'Des Analist Prog Software/Multi - v251',
'Espec Base Dados/Rede Comp - v252',
'Prof Direito - v261',
'Arquivolog/Curad Museu/Biblio - v262',
'Esp Ciências Sociais/Teologia - v263',
'Escrit/Jornalistas/Linguistas - v264',
'Artistas Criativos/Interpretativos - v265',
'Téc Ciências Físic/Engenharia - v311',
'Superv Eng Minas/Transf/Const - v312',
'Téc Controle Processos - v313',
'Téc Prof nv méd Ciência Biológ - v314',
'Téc Control Nav Marítima/Aero - v315',
'Téc Médicos Farmacêuticos - v321',
'Prof nv méd Enfermag/Parto - v322',
'Prof nv méd Medicina Trad/Alte - v323',
'Téc Assistentes Veterinários - v324',
'Outros Prof nv méd Saúde - v325',
'Prof nv méd Finança/Matemát - v331',
'Agentes Corretores Comerciais - v332',
'Agentes Serviços Comerciais - v333',
'Secretários Adm/Especializados - v334',
'Agentes Adm Pública Aplic Lei - v335',
'Prof nv méd Juríd/Socia/Relig - v341',
'Trab Esporte/Condicion Físico - v342',
'Prof nv med Cultur/Artíst/Culiná - v343',
'Téc Oper Tec Info/Comunic/Usr - v351',
'Téc Telecomunic/Radiodifusão - v352',
'Escriturários Gerais - v411',
'Secretários (Geral) - v412',
'Oper Máquinas Escritório - v413',
'Caixas Banco/Cobrad/Pagador - v421',
'Trab Serv Informação Cliente - v422',
'Auxiliares Contáb/Financeiro - v431',
'Trab Enc Reg Mater/Transport - v432',
'Outros Trab Apoio Administrat - v441',
'Trab ServDireto Passageiros - v511',
'Cozinheiros - v512',
'Garçons/Atendentes bar - v513',
'Cabeleir/Trat Beleza Afins - v514',
'Sup Manuten/Limpeza Edifícios - v515',
'Outros Trab Serviços Pessoais - v516',
'Vendedores Rua/Post Mercado - v521',
'Comerciant/Vendedores Lojas - v522',
'Caixas Expedidores Bilhetes - v523',
'Outros Vendedores - v524',
'Cuidad Criança/Ajudantes Prof - v531',
'Trab Cuid Pessoais Serv Saúde - v532',
'Trab Serv Proteção/Segurança - v541',
'Agricult/Trab qual Agricultura - v611',
'Criadores/Trab qual Pecuária - v612',
'ProdutTrab qual Agropec Mista - v613',
'Trab Florestais qual/ Afins - v621',
'Pescadores/Caçadores - v622',
'Trab Const Civil Obra Estrutur - v711',
'Trab qual Const (Acabamento) - v712',
'Pintores/Limpad Fachada Afins - v713',
'Moldador/Sold/Chapist/Caldeir - v721',
'Ferreiros/Ferramenteiros Afins - v722',
'Mecânic/Reparad Máquinas - v723',
'Artesãos - v731',
'Trab qual/Oper Artes Gráficas - v732',
'Instal/Reparador Equip Elétrico - v741',
'Instal/Rep Equip Eletrô/Teleco - v742',
'Trab qual Proces Alimentos - v751',
'Trab qual Madeira/Marceneiros - v752',
'Trab ql Op Confec/Calçad/Aces - v753',
'Out Trab ql/Op Indúst/Artesan - v754',
'Oper Minera/Extr Proc Minerais - v811',
'Oper Proc Recobridoras Metais - v812',
'Oper Inst Máq Químic/Fotográf - v813',
'Op Máq Borrach/Papel/Plástico - v814',
'Oper Máq Têxteis/Art Couro - v815',
'Oper Máq Aliment/Prod Afins - v816',
'Op Inst Papel/Proc Madeira - v817',
'Out Op Instal Fixa/Máquinas - v818',
'Montadores - v821',
'Maquinist Locomotivas Afins - v831',
'Condut Automóv/Caminho/Moto - v832',
'Condut Cam Pesados/Ônibus - v833',
'Oper Equip Móveis Pesados - v834',
'Marinheiros Coberta E Afins - v835',
'Trab Domést/Limpeza Edifícios - v911',
'Lavador Veíc/Janel/Roupa/Limp - v912',
'Trab elem Agrop/Pesc/Florest - v921',
'Trab elem Mineraç/Construção - v931',
'Trab elem Ind Transformação - v932',
'Trab elem Transpor/Armazen - v933',
'Ajudantes Preparação Aliment - v941',
'Trab Ambulant Serviços Afins - v951',
'Vend Ambulant (Exc Aliment) - v952',
'Coletores Lixo - v961',
'Outras Ocupações elem - v962']
In [15]:
# plot income
% matplotlib inline
sns.set_style("darkgrid")
sns.set_context("talk", font_scale=0.7, rc={"lines.linewidth": 1.5})
plt.figure(facecolor="white", figsize=(18, 6), dpi=300)
plt.ylabel('Frequency sum')
plt.title('Ocupation intervals - São Paulo Census microdata FULL')
plt.ticklabel_format(style='plain', axis='y')
fig2 = dfocc_group.loc[:,"v111":"v962"].sum().plot(kind='bar')
plt.xlabel('Ocupation groups')
lab3 = fig2.set_xticklabels(labelsl3)
In [16]:
# Slice data frame to get proportion data columns
cor_occup_hier1 = dfocc_group.loc[:,'v1':'v9'].corr()
cor_occup_hier1
Out[16]:
In [17]:
# Plot Heatmap based on hier_level1 data frame
% matplotlib inline
sns.set(context="notebook")
plt.subplots(figsize=(12, 9), dpi=300)
plt.title('Pearson correlation matrix - Occupation groups first level hierarchy')
f2 = sns.heatmap(cor_occup_hier1, square=True, xticklabels=labelsl1, yticklabels=labelsl1)
plt.tight_layout()
# plt.savefig("/Users/sandrofsousa/Dropbox/Resolution - SP London/Documents/Histograms/Occupation_pearson_.png")
In [18]:
# Slice data frame to get proportion data columns
cor_occup_hier2 = dfocc_group.loc[:,'v11':'v96'].corr()
cor_occup_hier2.head()
Out[18]:
In [19]:
# Plot Heatmap based on hier_level3 data frame
% matplotlib inline
sns.set(context="notebook")
plt.subplots(figsize=(12, 9), dpi=300)
plt.title('Pearson correlation matrix - Occupation groups second level hierarchy')
f2 = sns.heatmap(cor_occup_hier2, square=True, xticklabels=labelsl2, yticklabels=labelsl2)
plt.tight_layout()
# plt.savefig("/Users/sandrofsousa/Dropbox/Resolution - SP London/Documents/Histograms/Occupation_pearson_hierl2.png")
In [21]:
# Slice data frame to get proportion data columns
cor_occup_hier3 = dfocc_group.loc[:,'v111':'v962'].corr()
cor_occup_hier3.head()
Out[21]:
In [22]:
# Plot Heatmap based on hier_level3 data frame
% matplotlib inline
sns.set(context="notebook")
plt.subplots(figsize=(26, 21), dpi=300)
plt.title('Pearson correlation matrix - Occupation groups third level hierarchy', fontsize=20)
f3 = sns.heatmap(cor_occup_hier3, square=True, xticklabels=labelsl3, yticklabels=labelsl3)
plt.tight_layout()
# plt.savefig("/Users/sandrofsousa/Dropbox/Resolution - SP London/Documents/Histograms/Occupation_pearson_hierl3.png")
In [23]:
# Read dbf associated with the same shape file
# Obs: QGIS field lenght limit(10) changed column names during join
def calc_moran(arrayi, arrayj, weight):
moran = pysal.Moran_BV(arrayi, arrayj, weight, permutations=999)
result = moran.I
pvalue = moran.p_z_sim
return result, pvalue
def block_processing(i, weight, varnames, file):
morans = []
pvalues = []
for item in varnames:
j = np.array(file.by_col[item])
serie = calc_moran(i,j,weight)
moran = serie[0]
pvalue = serie[1]
morans.append(moran)
pvalues.append(pvalue)
return morans, pvalues
def calc_moran_matrix(varnames, weight, file):
moran_matrix = []
pvalue_matrix = []
for item in varnames:
i = np.array(file.by_col[item])
result = block_processing(i,weight, varnames, file)
res_moran = result[0]
res_pvalue = result[1]
moran_matrix.append(res_moran)
pvalue_matrix.append(res_pvalue)
return moran_matrix, pvalue_matrix
In [27]:
# Save data with new columns to csv
csvoccup = "~/Dropbox/Resolution - SP London/Data/Census/São Paulo/occupation_grouped_rmsp_2010.csv"
dfocc_group.loc[:,'v111':'v9'].to_csv(csvoccup) #save to csv
Based on Queen Contiguity Weight
Prepares data reading shape file for OA London and computes Contiguity Based Weights using Queen method
In [28]:
shp_path1 = "/Users/sandrofsousa/Dropbox/Resolution - SP London/Data/Shape files/" \
"São Paulo/WeightingAreas_Shapefile/AP2010_CEM_RMSP_EGP_EDU_OCC.shp"
weight_cont_occup = pysal.queen_from_shapefile(shp_path1)
Histogram showing the distribuition of neighbors in queen weight
In [32]:
% matplotlib inline
sns.set(context="notebook")
plt.figure(facecolor="white", figsize=(6, 3), dpi=300)
plt.xlabel('Neighbors')
plt.ylabel('Frequency')
plt.title('Queen weights hist. - neighbors relations for occupation SP')
plt.bar(*zip(*weight_cont_occup.histogram))
Out[32]:
Compute Bivariate Moran for all elements of occupation groups
Variables are compared pairwised resulting on a matrix 999 random permutations are used for calculation of pseudo p_values
In [33]:
# Call main function to compute Moran and P-values ethnic groups
f1 = pysal.open("/Users/sandrofsousa/Dropbox/Resolution - SP London/Data/Shape files/" \
"São Paulo/WeightingAreas_Shapefile/AP2010_CEM_RMSP_EGP_EDU_OCC_CENT.dbf")
In [34]:
var_occup_hier1 = list(cor_occup_hier1)
matrices_occupl1 = calc_moran_matrix(var_occup_hier1, weight_cont_occup, f1)
In [35]:
dfmoran_occup_hier1 = pd.DataFrame(matrices_occupl1[0], columns=var_occup_hier1, index=var_occup_hier1)
dfmoran_occup_hier1.head()
Out[35]:
In [36]:
# Plot Heatmap based on dfmoran_occup data frame - Proportions
% matplotlib inline
sns.set(context="notebook")
plt.subplots(figsize=(12,9), dpi=300)
plt.title('Moran I correlation matrix - Ocupation groups Queen weight Hierarchy level 1')
h1 = sns.heatmap(dfmoran_occup_hier1, square=True, xticklabels=labelsl1, yticklabels=labelsl1)
plt.tight_layout()
# plt.savefig("/Users/sandrofsousa/Dropbox/Resolution - SP London/Documents/Histograms/Occupation_moran_hierl1.png")
Moran Bivariate Queen weight for second hierarchy level
In [37]:
var_occup_hier2 = list(cor_occup_hier2)
matrices_occupl2 = calc_moran_matrix(var_occup_hier2, weight_cont_occup, f1)
In [38]:
dfmoran_occup_hier2 = pd.DataFrame(matrices_occupl2[0], columns=var_occup_hier2, index=var_occup_hier2)
dfmoran_occup_hier2.head()
Out[38]:
In [39]:
# Plot Heatmap based on dfmoran_occup data frame - Proportions
% matplotlib inline
sns.set(context="notebook")
plt.subplots(figsize=(16,12), dpi=300)
plt.title('Moran I correlation matrix - Ocupation groups Queen weight Hierarchy level 2')
h2 = sns.heatmap(dfmoran_occup_hier2, square=True, xticklabels=labelsl2, yticklabels=labelsl2)
plt.tight_layout()
# plt.savefig("/Users/sandrofsousa/Dropbox/Resolution - SP London/Documents/Histograms/Occupation_moran_hierl2.png")
Moran Bivariate Queen weight for third hierarchy level
In [40]:
var_occup_hier3 = list(cor_occup_hier3)
matrices_occupl3 = calc_moran_matrix(var_occup_hier3, weight_cont_occup, f1)
In [41]:
dfmoran_occup_hier3 = pd.DataFrame(matrices_occupl3[0], columns=var_occup_hier3, index=var_occup_hier3)
dfmoran_occup_hier3.head()
Out[41]:
In [42]:
# Plot Heatmap based on dfmoran_occup data frame - Proportions
% matplotlib inline
sns.set(context="notebook")
plt.subplots(figsize=(26, 21), dpi=300)
plt.title('Moran I correlation matrix - Ocupation groups Queen weight Hierarchy level 3', fontsize=25)
h3 = sns.heatmap(dfmoran_occup_hier3, square=True, xticklabels=labelsl3, yticklabels=labelsl3)
plt.tight_layout()
# plt.savefig("/Users/sandrofsousa/Dropbox/Resolution - SP London/Documents/Histograms/Occupation_moran_hierl3.png")