Datathon Mayo 2018 "Haciendo el gasto fiscal visible para el público"

Gasto y presupuesto anual de las Instituciones Públicas

Lectura de Datos



In [1]:

    
import os
import pandas as pd
import pandas_profiling as pd_profiling
import altair as alt
import matplotlib.pyplot as plt
#%matplotlib ipympl



In [2]:

    
def read_field_type(x):
    '''
    Para facilitar la lectura de los dataframes con los tipos de columna correspondientes.
    '''
    if x in ['String']:
        return str
    elif x in ['Integer', 'Long']:
        return int
    else:
        return str



In [3]:

    
pub_revenue_spend_names = pd.read_csv(os.path.join('data', 
                              'Anual_revenue and spend_2009-2017 by Programs (DICCIONARY).csv'), 
                              sep=';', 
                              encoding='latin-1')



In [4]:

    
pub_revenue_spend_names









    Out[4]:







  
    
      
      Field name
      Field type
      Field size
      Field format
      Range
      Description
      Example
    
  
  
    
      0
      Periodo
      Integer
      8
      #
      2009 to 2017
      Year of the spend or revenue
      2009
    
    
      1
      Nombre Partida
      String
      65
      NaN
      29 categories
      Ministeries
      CONGRESO NACIONAL
    
    
      2
      Nombre Capitulo
      String
      80
      NaN
      NaN
      Public Services
      SENADO
    
    
      3
      Nombre Programa
      String
      80
      NaN
      NaN
      Programs
      SENADO
    
    
      4
      SUBTTULO
      String
      82
      NaN
      NaN
      First level of the budget classification
      ADQUISICION DE ACTIVOS NO FINANCIEROS         ...
    
    
      5
      ITEM
      String
      81
      NaN
      NaN
      Second level of the budget classification
      Del Sector Privado                            ...
    
    
      6
      ASIGNACIN
      String
      258
      NaN
      NaN
      Third level of the budget classification
      Administradora del Fondo para Bonificacin por...
    
    
      7
      TIPO
      String
      8
      NaN
      1355 categories
      Revenue or spend account
      INGRESOS
    
    
      8
      Ppto_inicial_Real
      Long
      12
      #
      (-56028405 to 61544202)
      Real Amount in millions (chilean pesos) of Bud...
      61544202
    
    
      9
      Ejec_Acum_Real
      Long
      12
      #
      (-44789758 to 51048556)
      Real Amount in millions (chilean pesos) of Spe...
      51048556



In [ ]:

    
pub_revenue_spend_names



In [5]:

    
pub_revenue_spend = pd.read_csv(os.path.join('data',
                                             'Anual_revenue and spend_2009-2017 by Programs (millions) v3.csv'),
                                sep=';', 
                                encoding='utf-8',
                                dtype=dict(zip(pub_revenue_spend_names['Field name'], pub_revenue_spend_names['Field type'].apply(read_field_type))))



In [12]:

    
['partida', 'partida', 'capitulo', 'programa', 'subtitulo', 'item', 'asignacion', 'tipo', 'presupuesto', ]









    Out[12]:





Index(['periodo', 'partida', 'capítulo', 'programa', 'subtítulo', 'item',
       'asignación', 'tipo', 'ppto_inicial_real (millones)',
       'ejec_acum_real (millones)'],
      dtype='object')



In [6]:

    
pub_revenue_spend.head(5)









    Out[6]:







  
    
      
      Periodo
      Partida
      Capítulo
      Programa
      SUBTÍTULO
      ITEM
      ASIGNACIÓN
      TIPO
      Ppto_inicial_Real (millones)
      Ejec_Acum_Real (millones)
    
  
  
    
      0
      2009
      CONGRESO NACIONAL
      BIBLIOTECA DEL CONGRESO
      BIBLIOTECA DEL CONGRESO
      ADQUISICION DE ACTIVOS NO FINANCIEROS         ...
      Edificios                                     ...
      NaN
      GASTOS
      0
      2398
    
    
      1
      2009
      CONGRESO NACIONAL
      BIBLIOTECA DEL CONGRESO
      BIBLIOTECA DEL CONGRESO
      ADQUISICION DE ACTIVOS NO FINANCIEROS         ...
      Equipos Informticos                          ...
      NaN
      GASTOS
      171
      143
    
    
      2
      2009
      CONGRESO NACIONAL
      BIBLIOTECA DEL CONGRESO
      BIBLIOTECA DEL CONGRESO
      ADQUISICION DE ACTIVOS NO FINANCIEROS         ...
      Mobiliario y Otros                            ...
      NaN
      GASTOS
      55
      11
    
    
      3
      2009
      CONGRESO NACIONAL
      BIBLIOTECA DEL CONGRESO
      BIBLIOTECA DEL CONGRESO
      ADQUISICION DE ACTIVOS NO FINANCIEROS         ...
      Mquinas y Equipos                            ...
      NaN
      GASTOS
      48
      23
    
    
      4
      2009
      CONGRESO NACIONAL
      BIBLIOTECA DEL CONGRESO
      BIBLIOTECA DEL CONGRESO
      ADQUISICION DE ACTIVOS NO FINANCIEROS         ...
      Programas Informticos                        ...
      NaN
      GASTOS
      88
      87



In [8]:

    
pub_revenue_spend.describe(include='all')









    Out[8]:







  
    
      
      Periodo
      Partida
      Capítulo
      Programa
      SUBTÍTULO
      ITEM
      ASIGNACIÓN
      TIPO
      Ppto_inicial_Real (millones)
      Ejec_Acum_Real (millones)
    
  
  
    
      count
      72508.000000
      72508
      72508
      72508
      72507
      62247
      33105
      72166
      7.250800e+04
      7.250800e+04
    
    
      unique
      NaN
      29
      214
      370
      27
      123
      7330
      2
      NaN
      NaN
    
    
      top
      NaN
      MINISTERIO DEL INTERIOR Y SEGURIDAD PÚBLICA
      FISCO
      APORTE FISCAL LIBRE
      TRANSFERENCIAS CORRIENTES                     ...
      A Otras Entidades Pblicas                    ...
      Subsecretara de Redes Asistenciales          ...
      GASTOS
      NaN
      NaN
    
    
      freq
      NaN
      16761
      3792
      1430
      15274
      9822
      409
      49625
      NaN
      NaN
    
    
      mean
      2013.144591
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      1.935500e+04
      2.148433e+04
    
    
      std
      2.585293
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      1.015925e+06
      9.664764e+05
    
    
      min
      2009.000000
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      -5.602840e+07
      -4.478976e+07
    
    
      25%
      2011.000000
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      0.000000e+00
      1.200000e+01
    
    
      50%
      2013.000000
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      5.100000e+01
      1.750000e+02
    
    
      75%
      2015.000000
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      1.682000e+03
      2.403000e+03
    
    
      max
      2017.000000
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      6.154420e+07
      5.104856e+07



In [7]:

    
pub_revenue_spend['TIPO'].unique()









    Out[7]:





array(['GASTOS', 'INGRESOS', nan], dtype=object)



In [8]:

    
pub_revenue_spend['SUBTÍTULO'].nunique()









    Out[8]:





27



In [9]:

    
pub_revenue_spend.groupby(['TIPO'])['SUBTÍTULO'].nunique()









    Out[9]:





TIPO
GASTOS      15
INGRESOS    13
Name: SUBTÍTULO, dtype: int64



In [10]:

    
for name, group in pub_revenue_spend.groupby('TIPO'):
    print('tipo: {}\n'.format(name))
    print('\t{}'.format(group['SUBTÍTULO'].unique()))
    print('\n\n')









    



tipo: GASTOS

	['ADQUISICION DE ACTIVOS NO FINANCIEROS                                           '
 'BIENES Y SERVICIOS DE CONSUMO                                                   '
 'GASTOS EN PERSONAL                                                              '
 'INICIATIVAS DE INVERSION                                                        '
 'SALDO FINAL DE CAJA                                                             '
 'SERVICIO DE LA DEUDA                                                            '
 'PRESTACIONES DE SEGURIDAD SOCIAL                                                '
 'TRANSFERENCIAS CORRIENTES                                                       '
 'INTEGROS AL FISCO                                                               '
 'TRANSFERENCIAS DE CAPITAL                                                       '
 'OTROS GASTOS CORRIENTES                                                         '
 'ADQUISICION DE ACTIVOS FINANCIEROS                                              '
 'PRESTAMOS                                                                       '
 'APORTE FISCAL LIBRE                                                             '
 'APORTE FISCAL PARA SERVICIO DE LA DEUDA                                         '
 nan]



tipo: INGRESOS

	['APORTE FISCAL                                                                   '
 'ENDEUDAMIENTO                                                                   '
 'INGRESOS DE OPERACION                                                           '
 'OTROS INGRESOS CORRIENTES                                                       '
 'RECUPERACION DE PRESTAMOS                                                       '
 'SALDO INICIAL DE CAJA                                                           '
 'TRANSFERENCIAS CORRIENTES                                                       '
 'VENTA DE ACTIVOS NO FINANCIEROS                                                 '
 'RENTAS DE LA PROPIEDAD                                                          '
 'TRANSFERENCIAS PARA GASTOS DE CAPITAL                                           '
 'VENTA DE ACTIVOS FINANCIEROS                                                    '
 'IMPOSICIONES PREVISIONALES                                                      '
 'IMPUESTOS                                                                       ']



In [11]:

    
pub_revenue_spend[pub_revenue_spend['SUBTÍTULO'].isnull()]









    Out[11]:







  
    
      
      Periodo
      Partida
      Capítulo
      Programa
      SUBTÍTULO
      ITEM
      ASIGNACIÓN
      TIPO
      Ppto_inicial_Real (millones)
      Ejec_Acum_Real (millones)
    
  
  
    
      72215
      2017
      TESORO PÚBLICO
      FISCO
      APORTE FISCAL LIBRE
      NaN
      NaN
      NaN
      GASTOS
      55183
      52132



In [ ]:

    
pub_revenue_spend.query)



In [ ]:

	Field name	Field type	Field size	Field format	Range	Description	Example
0	Periodo	Integer	8	#	2009 to 2017	Year of the spend or revenue	2009
1	Nombre Partida	String	65	NaN	29 categories	Ministeries	CONGRESO NACIONAL
2	Nombre Capitulo	String	80	NaN	NaN	Public Services	SENADO
3	Nombre Programa	String	80	NaN	NaN	Programs	SENADO
4	SUBTTULO	String	82	NaN	NaN	First level of the budget classification	ADQUISICION DE ACTIVOS NO FINANCIEROS ...
5	ITEM	String	81	NaN	NaN	Second level of the budget classification	Del Sector Privado ...
6	ASIGNACIN	String	258	NaN	NaN	Third level of the budget classification	Administradora del Fondo para Bonificacin por...
7	TIPO	String	8	NaN	1355 categories	Revenue or spend account	INGRESOS
8	Ppto_inicial_Real	Long	12	#	(-56028405 to 61544202)	Real Amount in millions (chilean pesos) of Bud...	61544202
9	Ejec_Acum_Real	Long	12	#	(-44789758 to 51048556)	Real Amount in millions (chilean pesos) of Spe...	51048556

	Periodo	Partida	Capítulo	Programa	SUBTÍTULO	ITEM	ASIGNACIÓN	TIPO	Ppto_inicial_Real (millones)	Ejec_Acum_Real (millones)
0	2009	CONGRESO NACIONAL	BIBLIOTECA DEL CONGRESO	BIBLIOTECA DEL CONGRESO	ADQUISICION DE ACTIVOS NO FINANCIEROS ...	Edificios ...	NaN	GASTOS	0	2398
1	2009	CONGRESO NACIONAL	BIBLIOTECA DEL CONGRESO	BIBLIOTECA DEL CONGRESO	ADQUISICION DE ACTIVOS NO FINANCIEROS ...	Equipos Informticos ...	NaN	GASTOS	171	143
2	2009	CONGRESO NACIONAL	BIBLIOTECA DEL CONGRESO	BIBLIOTECA DEL CONGRESO	ADQUISICION DE ACTIVOS NO FINANCIEROS ...	Mobiliario y Otros ...	NaN	GASTOS	55	11
3	2009	CONGRESO NACIONAL	BIBLIOTECA DEL CONGRESO	BIBLIOTECA DEL CONGRESO	ADQUISICION DE ACTIVOS NO FINANCIEROS ...	Mquinas y Equipos ...	NaN	GASTOS	48	23
4	2009	CONGRESO NACIONAL	BIBLIOTECA DEL CONGRESO	BIBLIOTECA DEL CONGRESO	ADQUISICION DE ACTIVOS NO FINANCIEROS ...	Programas Informticos ...	NaN	GASTOS	88	87

	Periodo	Partida	Capítulo	Programa	SUBTÍTULO	ITEM	ASIGNACIÓN	TIPO	Ppto_inicial_Real (millones)	Ejec_Acum_Real (millones)
count	72508.000000	72508	72508	72508	72507	62247	33105	72166	7.250800e+04	7.250800e+04
unique	NaN	29	214	370	27	123	7330	2	NaN	NaN
top	NaN	MINISTERIO DEL INTERIOR Y SEGURIDAD PÚBLICA	FISCO	APORTE FISCAL LIBRE	TRANSFERENCIAS CORRIENTES ...	A Otras Entidades Pblicas ...	Subsecretara de Redes Asistenciales ...	GASTOS	NaN	NaN
freq	NaN	16761	3792	1430	15274	9822	409	49625	NaN	NaN
mean	2013.144591	NaN	NaN	NaN	NaN	NaN	NaN	NaN	1.935500e+04	2.148433e+04
std	2.585293	NaN	NaN	NaN	NaN	NaN	NaN	NaN	1.015925e+06	9.664764e+05
min	2009.000000	NaN	NaN	NaN	NaN	NaN	NaN	NaN	-5.602840e+07	-4.478976e+07
25%	2011.000000	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0.000000e+00	1.200000e+01
50%	2013.000000	NaN	NaN	NaN	NaN	NaN	NaN	NaN	5.100000e+01	1.750000e+02
75%	2015.000000	NaN	NaN	NaN	NaN	NaN	NaN	NaN	1.682000e+03	2.403000e+03
max	2017.000000	NaN	NaN	NaN	NaN	NaN	NaN	NaN	6.154420e+07	5.104856e+07