In [139]:
import pandas as pd
import numpy as np
In [140]:
df_qt_adt_con = pd.read_csv('../data/qtde_aditivos_contrato.csv', sep=';', index_col=0,dtype={'nu_CPFCNPJ': str})
df_qt_adt_cnpj = pd.read_csv('../data/qtde_aditivos_cpfcnpj.csv', sep=';', index_col=0, dtype={'nu_CPFCNPJ': str})
df_qt_con_cnpj = pd.read_csv('../data/qtde_contratos_empresa.csv', sep=';', index_col=0, dtype={'nu_CPFCNPJ': str})
df_qt_lic_tip = pd.read_csv('../data/qtde_licitacoes_por_tipo_por_cnpj.csv', sep=';', dtype={'nu_CPFCNPJ': str})
df_qt_par_tip = pd.read_csv('../data/qtde_participacoes_por_tipo_por_cnpj.csv', sep=';', dtype={'nu_CPFCNPJ': str})
df_val_con = pd.read_csv('../data/valor_contratos_por_cnpj.csv', sep=';', index_col=0, dtype={'nu_CPFCNPJ': str})
df_val_tip = pd.read_csv('../data/valor_por_licitacao_por_cnpj.csv', sep=';',index_col=0, dtype={'nu_CPFCNPJ': str})
In [141]:
df_qt_adt_con.head()
Out[141]:
In [142]:
df_qt_adt_cnpj.head()
Out[142]:
In [143]:
df_qt_con_cnpj.head()
Out[143]:
In [144]:
#df_qt_con_gan.head()
In [145]:
df_qt_lic_tip.head()
Out[145]:
In [146]:
df_qt_par_tip.head()
Out[146]:
In [147]:
df_val_con.head()
Out[147]:
In [148]:
df_val_tip.head()
Out[148]:
In [149]:
df_merge = df_qt_adt_cnpj.merge(df_qt_con_cnpj, on='nu_CPFCNPJ')
df_merge.head()
Out[149]:
In [150]:
df_merge = df_merge.merge(df_val_con, on='nu_CPFCNPJ')
df_merge.head()
Out[150]:
In [151]:
df_val_tip = df_val_tip.groupby(["nu_CPFCNPJ", "tp_Licitacao"])["vl_Contrato"].sum().reset_index()
df_merge_2 = df_val_tip.merge(df_qt_par_tip,on=["nu_CPFCNPJ", "tp_Licitacao"])
df_merge_2 = df_merge_2[(df_merge_2.tp_Licitacao == 0) | (df_merge_2.tp_Licitacao == 2) | (df_merge_2.tp_Licitacao == 3)]
df_merge_2.head()
Out[151]:
In [152]:
df_merge_2["valor_total_pregao"] = df_merge_2[df_merge_2.tp_Licitacao == 0]["vl_Contrato"]
df_merge_2["valor_total_tomada"] = df_merge_2[df_merge_2.tp_Licitacao == 2]["vl_Contrato"]
df_merge_2["valor_total_convite"] = df_merge_2[df_merge_2.tp_Licitacao == 3]["vl_Contrato"]
df_merge_2.head()
Out[152]:
In [153]:
df_merge_2["participacoes_pregao"] = df_merge_2[df_merge_2.tp_Licitacao == 0]["participacoes"]
df_merge_2["participacoes_tomada"] = df_merge_2[df_merge_2.tp_Licitacao == 2]["participacoes"]
df_merge_2["participacoes_convite"] = df_merge_2[df_merge_2.tp_Licitacao == 3]["participacoes"]
df_merge_2 = df_merge_2.fillna(value=0)
df_merge_2 = df_merge_2.groupby(["nu_CPFCNPJ"],as_index=False).sum()
df_merge_2 = df_merge_2.drop("tp_Licitacao", axis=1)
df_merge_2.head(100)
Out[153]:
In [155]:
df_merge = df_merge.merge(df_merge_2, on="nu_CPFCNPJ")
df_merge.head()
Out[155]:
In [ ]:
df_merge.to_csv(path_or_buf="../data/features.csv", sep=';')