In [1]:
import pandas as pd
In [12]:
columns = {"Dte." : np.int64,
"Nom Dte." : str,
"Barri" : str,
"AEB" : int,
"Any" : int,
"Població" : int,
"Naixements Total" : int,
"Naixements Nens" : int,
"naixements Nenes" : int,
"Defuncions Total" : int,
"Defuncions Homes" : int,
"Defuncions Dones" : int,
}
In [18]:
df = pd.DataFrame()
#######################
# add population #
#######################
add_df = pd.read_csv("evolucio_poblacio_barris.txt", sep=';', decimal=',')
for col in add_df.columns[2:]:
year = float(col)
aux_df = add_df[["Dte.", "Barri", col]].copy()
aux_df.columns = ["Dte.", "Barri", "Població"]
aux_df.insert(2, "AEB", -1.0)
aux_df.insert(3, "Any", year)
df = aux_df.append(df, ignore_index=True)
def correctDecimalPlaces(item):
if item > 100.0:
return item
else:
return item*1000
df["Població"] = df["Població"].astype(float).apply(correctDecimalPlaces)
#######################
# add natality #
#######################
df2 = pd.DataFrame()
add_df = pd.read_csv("evolucio_natalitat_bar.txt", sep=';', decimal=',')
for col in add_df.columns[2:]:
if (col.split()[1] != "Total"):
continue
year = col.split()[0]
aux_df = add_df[["Dte.", "Barri", col, "{} Nens".format(year), "{} Nenes".format(year)]].copy()
aux_df.columns = ["Dte.", "Barri", "Naixements Total", "Naixements Nens", "Naixements Nenes"]
aux_df["Any"] = float(year)
aux_df["AEB"] = -1.0
df2 = aux_df.append(df2, ignore_index=True)
df = pd.merge(df, df2, how='outer')
#######################
# add mortality #
#######################
df2 = pd.DataFrame()
add_df = pd.read_csv("evolucio_defuncions_bar.txt", sep=';', decimal=',')
for col in add_df.columns[2:]:
if (col.split()[1] != "Total"):
continue
year = col.split()[0]
aux_df = add_df[["Dte.", "Barri", col, "{} Homes".format(year), "{} Dones".format(year)]].copy()
aux_df.columns = ["Dte.", "Barri", "Defuncions Total", "Defuncions Homes", "Defuncions Dones"]
aux_df["Any"] = float(year)
aux_df["AEB"] = -1.0
df2 = aux_df.append(df2, ignore_index=True)
df = pd.merge(df, df2, how='outer')
#######################
# add studies #
#######################
df2 = pd.DataFrame()
add_df = pd.read_csv("nivell_estudis_barri.csv", sep=";", skiprows=4, decimal=',')
# fix encoding
barris = {barri.split(".")[0] : barri for barri in np.unique(df["Barri"])}
def FixNeighbourhoodNames(item, barris):
return barris.get(item.split(".")[0], np.nan)
add_df["Territori"] = add_df["Territori"].apply(FixNeighbourhoodNames, args=(barris,))
for col in add_df.columns[3:-1]:
if col.split()[-2] != "estudis":
continue
year = col.split()[-1]
aux_df = add_df[["Territori", col, "{} amb estudis obligatoris {} ".format(col[:13], year),
"{} amb batxillerat superior, CFGM {} ".format(col[:13], year),
"{} amb estudis universitaris i CFGS {} ".format(col[:13], year)]].copy()
aux_df.columns = ["Barri", "% Població sense estudis", "% Població amb estudis obligatoris",
"% Població amb batxillerat superior o CFGM", "% Població amb estudis universitaris o CFGS"]
aux_df["Any"] = float(year)
aux_df["AEB"] = -1.0
df2 = aux_df.append(df2, ignore_index=True)
df = pd.merge(df, df2, how='outer')
########################
# add District name #
########################
districtes = {1 : "Ciutat Vella",
2 : "Eixample",
3 : "Sants-Montjuïc",
4 : "Les Corts",
5 : "Sarrià-Sant Gervasi",
6 : "Gràcia",
7 : "Horta-Guinardó",
8 : "Nou Barris",
9 : "Sant Andreu",
10 : "Sant Martí"}
def findDistricName(item, districtes):
dist_number = item
return districtes.get(dist_number, np.nan)
df.insert(1, "Nom Dte.", df["Dte."].apply(findDistricName, args=(districtes,)))
#######################
# add rents #
#######################
df2 = pd.DataFrame()
add_df = pd.read_csv("../alquiler_incasol/anual_bcn_lloguer_barris.txt", sep=';', decimal=',')
add_df["Barri"] = df["Barri"]
for col in add_df.columns[2:]:
aux_df = add_df[["Dte.", "Barri", col]].copy()
aux_df.columns = ["Dte.", "Barri", "Lloguer promig"]
aux_df["Any"] = float(col)
aux_df["AEB"] = -1.0
df2 = aux_df.append(df2, ignore_index=True)
df = pd.merge(df, df2, how='outer')
#######################
# add rents/m^2 #
#######################
df2 = pd.DataFrame()
add_df = pd.read_csv("../alquiler_incasol/anual_bcn_lloguer_m2_barris.txt", sep=';', decimal=',')
add_df["Barri"] = df["Barri"]
for col in add_df.columns[2:]:
aux_df = add_df[["Dte.", "Barri", col]].copy()
aux_df.columns = ["Dte.", "Barri", "Lloguer promig/m^2"]
aux_df["Any"] = float(col)
aux_df["AEB"] = -1.0
df2 = aux_df.append(df2, ignore_index=True)
df = pd.merge(df, df2, how='outer')
#######################
# format coulmns #
#######################
df["AEB"] = df["AEB"].astype(int)
df["Dte."] = df["Dte."].astype(int)
df["Any"] = df["Any"].astype(int)
df["Població"] = df["Població"].astype(float)
#######################
# save formatted data #
#######################
df.to_csv("ajuntament_bcn_formatted_data.csv", sep=';', encoding='utf-8', index=False, decimal=',')
In [4]:
df
Out[4]:
In [38]:
df.dtypes
Out[38]:
In [24]:
add_df = pd.read_csv("nivell_estudis_barri.csv", sep=";", skiprows=4, decimal=',')
In [21]:
df.columns
Out[21]:
In [38]:
import locale
from locale import atof
locale.setlocale(locale.LC_NUMERIC, '')
add_df = pd.read_csv("evolucio_poblacio_barris.txt", sep=';')
add_df["2010"] = add_df["2010"].astype(float)
add_df[add_df["Dte."] == 7]
Out[38]:
In [9]:
Out[9]:
In [ ]: