In [1]:
import os
import pandas as pd
import pandas_profiling as pd_profiling
import altair as alt
In [2]:
def read_field_type(x):
'''
Para facilitar la lectura de los dataframes con los tipos de columna correspondientes.
'''
if x in ['String']:
return str
elif x in ['Integer', 'Long']:
return int
else:
return str
In [3]:
spend_gob_names = pd.read_csv(os.path.join('data',
'dataset_3_Revenue_and_ pending_(Central_Government)_Real_DICTIONARY.csv'),
sep=';',
encoding='iso-8859-1')
In [4]:
spend_gob_names
Out[4]:
In [5]:
spend_gob = pd.read_csv(os.path.join('data',
'dataset_3_Revenue_and_ pending_(Central_Government)_1990-2017 (real).csv'),
sep=';',
encoding='iso-8859-1',
dtype=dict(zip(spend_gob_names['Field name'], spend_gob_names['Field type'].apply(read_field_type))))
In [6]:
spend_gob.columns = spend_gob.columns.str.lower()
In [7]:
spend_gob['real_amount'] = spend_gob['real_amount'].str.replace(',', '.').astype(float)
In [8]:
spend_gob.head(5)
Out[8]:
In [14]:
spend_gob[spend_gob['real_amount'] < 0]
Out[14]:
In [13]:
spend_gob['nivel2']
Out[13]:
In [ ]:
for col in s
Ejemplito
In [ ]:
In [9]:
cobre = spend_gob.query("nivel3 == 'Cobre bruto'")
In [28]:
alt.Chart(cobre).mark_bar().encode(
x='periodo:O',
y='sum(real_amount):Q',
color='nivel3'
)
Out[28]:
In [23]:
alt.Chart(spend_gob).mark_area().encode(
x='periodo:O',
y='sum(real_amount):Q',
color='nivel3',
column='nivel2'
)
Out[23]: