Merge Boladão


In [139]:
import pandas as pd
import numpy as np

In [140]:
df_qt_adt_con = pd.read_csv('../data/qtde_aditivos_contrato.csv', sep=';', index_col=0,dtype={'nu_CPFCNPJ': str})
df_qt_adt_cnpj = pd.read_csv('../data/qtde_aditivos_cpfcnpj.csv', sep=';', index_col=0, dtype={'nu_CPFCNPJ': str})
df_qt_con_cnpj = pd.read_csv('../data/qtde_contratos_empresa.csv', sep=';', index_col=0, dtype={'nu_CPFCNPJ': str})
df_qt_lic_tip = pd.read_csv('../data/qtde_licitacoes_por_tipo_por_cnpj.csv', sep=';', dtype={'nu_CPFCNPJ': str})
df_qt_par_tip = pd.read_csv('../data/qtde_participacoes_por_tipo_por_cnpj.csv', sep=';', dtype={'nu_CPFCNPJ': str})
df_val_con = pd.read_csv('../data/valor_contratos_por_cnpj.csv', sep=';', index_col=0, dtype={'nu_CPFCNPJ': str})
df_val_tip = pd.read_csv('../data/valor_por_licitacao_por_cnpj.csv', sep=';',index_col=0, dtype={'nu_CPFCNPJ': str})

In [141]:
df_qt_adt_con.head()


Out[141]:
nu_Contrato qtdContratosAditivos
0 000000209 1
1 000000239 2
2 000012002 8
3 000012003 6
4 000012004 3

In [142]:
df_qt_adt_cnpj.head()


Out[142]:
nu_CPFCNPJ qtdAditivosPorCPFCNPJ
0 191.0 37
1 16438.0 11
2 52159.0 12
3 75957.0 4
4 135950.0 11

In [143]:
df_qt_con_cnpj.head()


Out[143]:
nu_CPFCNPJ qtdContratos
0 191.0 10
1 16438.0 1
2 52159.0 1
3 75957.0 1
4 135950.0 1

In [144]:
#df_qt_con_gan.head()

In [145]:
df_qt_lic_tip.head()


Out[145]:
cd_Credor tp_Licitacao total
0 8129436 3 8
1 8147418 3 5
2 131523430 3 3
3 740546430 7 1
4 740580450 3 26

In [146]:
df_qt_par_tip.head()


Out[146]:
nu_CPFCNPJ tp_Licitacao participacoes
0 0 3 1
1 191 3 1
2 191 6 3
3 191 7 13
4 191 9 1

In [147]:
df_val_con.head()


Out[147]:
nu_CPFCNPJ vl_TotalContrato
0 191.0 4129353.8
1 16438.0 160000.0
2 52159.0 1.0
3 75957.0 4000.0
4 135950.0 1.0

In [148]:
df_val_tip.head()


Out[148]:
nu_CPFCNPJ tp_Licitacao vl_Contrato
0 191.0 3.0 14000.0
1 191.0 6.0 2825024.8
2 191.0 7.0 1290329.0
3 16438.0 7.0 160000.0
4 52159.0 7.0 1.0

In [149]:
df_merge = df_qt_adt_cnpj.merge(df_qt_con_cnpj, on='nu_CPFCNPJ')
df_merge.head()


Out[149]:
nu_CPFCNPJ qtdAditivosPorCPFCNPJ qtdContratos
0 191.0 37 10
1 16438.0 11 1
2 52159.0 12 1
3 75957.0 4 1
4 135950.0 11 1

In [150]:
df_merge = df_merge.merge(df_val_con, on='nu_CPFCNPJ')
df_merge.head()


Out[150]:
nu_CPFCNPJ qtdAditivosPorCPFCNPJ qtdContratos vl_TotalContrato
0 191.0 37 10 4129353.8
1 16438.0 11 1 160000.0
2 52159.0 12 1 1.0
3 75957.0 4 1 4000.0
4 135950.0 11 1 1.0

NÃO ALTERE AS PRÓXIMAS LINHAS


In [151]:
df_val_tip = df_val_tip.groupby(["nu_CPFCNPJ", "tp_Licitacao"])["vl_Contrato"].sum().reset_index()

df_merge_2 = df_val_tip.merge(df_qt_par_tip,on=["nu_CPFCNPJ", "tp_Licitacao"])
df_merge_2 = df_merge_2[(df_merge_2.tp_Licitacao == 0) | (df_merge_2.tp_Licitacao == 2) | (df_merge_2.tp_Licitacao == 3)]
df_merge_2.head()


Out[151]:
nu_CPFCNPJ tp_Licitacao vl_Contrato participacoes
1 10015275000152 3 79575.0 1
2 10102168000161 3 109500.0 2
4 10140642000140 3 71257.0 5
6 10142026000128 3 64587.2 2
14 10158142000135 3 80900.0 1

In [152]:
df_merge_2["valor_total_pregao"] = df_merge_2[df_merge_2.tp_Licitacao == 0]["vl_Contrato"]
df_merge_2["valor_total_tomada"] = df_merge_2[df_merge_2.tp_Licitacao == 2]["vl_Contrato"]
df_merge_2["valor_total_convite"] = df_merge_2[df_merge_2.tp_Licitacao == 3]["vl_Contrato"]
df_merge_2.head()


Out[152]:
nu_CPFCNPJ tp_Licitacao vl_Contrato participacoes valor_total_pregao valor_total_tomada valor_total_convite
1 10015275000152 3 79575.0 1 NaN NaN 79575.0
2 10102168000161 3 109500.0 2 NaN NaN 109500.0
4 10140642000140 3 71257.0 5 NaN NaN 71257.0
6 10142026000128 3 64587.2 2 NaN NaN 64587.2
14 10158142000135 3 80900.0 1 NaN NaN 80900.0

In [153]:
df_merge_2["participacoes_pregao"] = df_merge_2[df_merge_2.tp_Licitacao == 0]["participacoes"]
df_merge_2["participacoes_tomada"] = df_merge_2[df_merge_2.tp_Licitacao == 2]["participacoes"]
df_merge_2["participacoes_convite"] = df_merge_2[df_merge_2.tp_Licitacao == 3]["participacoes"]
df_merge_2 = df_merge_2.fillna(value=0)
df_merge_2 = df_merge_2.groupby(["nu_CPFCNPJ"],as_index=False).sum()
df_merge_2 = df_merge_2.drop("tp_Licitacao", axis=1)
df_merge_2.head(100)


Out[153]:
nu_CPFCNPJ vl_Contrato participacoes valor_total_pregao valor_total_tomada valor_total_convite participacoes_pregao participacoes_tomada participacoes_convite
0 10015275000152 79575.00 1 0.00 0.00 79575.00 0.0 0.0 1.0
1 10102168000161 109500.00 2 0.00 0.00 109500.00 0.0 0.0 2.0
2 10140642000140 71257.00 5 0.00 0.00 71257.00 0.0 0.0 5.0
3 10142026000128 64587.20 2 0.00 0.00 64587.20 0.0 0.0 2.0
4 10158142000135 80900.00 1 0.00 0.00 80900.00 0.0 0.0 1.0
5 10171194000141 45285.00 1 0.00 0.00 45285.00 0.0 0.0 1.0
6 10175041000172 133450.00 5 31050.00 0.00 102400.00 1.0 0.0 4.0
7 10175806000174 98798.22 3 0.00 0.00 98798.22 0.0 0.0 3.0
8 10190291000181 84009.98 1 0.00 84009.98 0.00 0.0 1.0 0.0
9 10194075000104 43534.00 9 21984.00 0.00 21550.00 5.0 0.0 4.0
10 10197914000148 496289.25 6 408189.00 0.00 88100.25 4.0 0.0 2.0
11 10199574000194 95683.44 2 0.00 0.00 95683.44 0.0 0.0 2.0
12 10202212000105 79700.00 1 0.00 0.00 79700.00 0.0 0.0 1.0
13 10204697000176 494852.46 2 0.00 494852.46 0.00 0.0 2.0 0.0
14 10212999000196 55200.00 2 0.00 0.00 55200.00 0.0 0.0 2.0
15 10213388000162 132593.09 2 0.00 0.00 132593.09 0.0 0.0 2.0
16 10214137000100 43850.00 4 0.00 0.00 43850.00 0.0 0.0 4.0
17 10218588000108 176776.77 10 32338.77 0.00 144438.00 2.0 0.0 8.0
18 10220390000169 1365.00 1 0.00 0.00 1365.00 0.0 0.0 1.0
19 10226284000192 470153.35 34 0.00 179270.00 290883.35 0.0 5.0 29.0
20 10234481000153 4500029.75 3 0.00 0.00 4500029.75 0.0 0.0 3.0
21 10242282000197 1263284.00 22 0.00 0.00 1263284.00 0.0 0.0 22.0
22 10245250000145 1394.00 1 0.00 0.00 1394.00 0.0 0.0 1.0
23 10247764000130 853921.35 36 0.00 0.00 853921.35 0.0 0.0 36.0
24 10254819000139 51237.73 1 0.00 51237.73 0.00 0.0 1.0 0.0
25 10254948000127 86550.00 2 86550.00 0.00 0.00 2.0 0.0 0.0
26 10258562000193 955726.34 14 0.00 0.00 955726.34 0.0 0.0 14.0
27 10258613000187 19725.00 2 0.00 0.00 19725.00 0.0 0.0 2.0
28 10261810000155 44698.98 2 0.00 33054.44 11644.54 0.0 1.0 1.0
29 10261825000113 176030.00 4 0.00 0.00 176030.00 0.0 0.0 4.0
... ... ... ... ... ... ... ... ... ...
70 10374173000123 42500.00 4 0.00 0.00 42500.00 0.0 0.0 4.0
71 10377336000121 93218.26 3 0.00 0.00 93218.26 0.0 0.0 3.0
72 10382660000138 14400.00 4 0.00 0.00 14400.00 0.0 0.0 4.0
73 10391965000106 45000.00 1 0.00 0.00 45000.00 0.0 0.0 1.0
74 10399218000114 42906.75 1 0.00 0.00 42906.75 0.0 0.0 1.0
75 10402165000143 553025.85 12 0.00 0.00 553025.85 0.0 0.0 12.0
76 10403325000179 28000.00 1 0.00 0.00 28000.00 0.0 0.0 1.0
77 10405018000127 98340.00 6 75240.00 0.00 23100.00 3.0 0.0 3.0
78 10408838000172 1247544.00 38 366868.64 175401.20 705274.16 6.0 3.0 29.0
79 10409223000160 5942793.05 59 0.00 2546912.59 3395880.46 0.0 9.0 50.0
80 10414278000169 866301.40 63 0.00 56000.40 810301.00 0.0 2.0 61.0
81 10414854000178 8650.00 1 8650.00 0.00 0.00 1.0 0.0 0.0
82 10418144000116 71375.00 3 0.00 0.00 71375.00 0.0 0.0 3.0
83 10420157000120 150350.00 1 0.00 150350.00 0.00 0.0 1.0 0.0
84 10425286000100 1625011.92 55 540548.00 0.00 1084463.92 4.0 0.0 51.0
85 10426216000177 7153855.00 1 0.00 0.00 7153855.00 0.0 0.0 1.0
86 10427090000155 231700.00 17 0.00 0.00 231700.00 0.0 0.0 17.0
87 10427951000103 134072.50 2 134072.50 0.00 0.00 2.0 0.0 0.0
88 10432612000107 79595.00 2 0.00 0.00 79595.00 0.0 0.0 2.0
89 10433358000161 12000.00 1 0.00 0.00 12000.00 0.0 0.0 1.0
90 10436658000102 60000.00 1 60000.00 0.00 0.00 1.0 0.0 0.0
91 10443640000120 77628.15 1 0.00 0.00 77628.15 0.0 0.0 1.0
92 10445253000122 760488.47 19 293840.76 0.00 466647.71 4.0 0.0 15.0
93 10446347000116 101429.90 3 0.00 0.00 101429.90 0.0 0.0 3.0
94 10451464000178 584100.75 6 524100.75 0.00 60000.00 3.0 0.0 3.0
95 10451754000111 1588631.12 24 0.00 1442333.53 146297.59 0.0 16.0 8.0
96 10454756000164 353198.00 29 0.00 0.00 353198.00 0.0 0.0 29.0
97 10459145000109 770507.67 3 0.00 770507.67 0.00 0.0 3.0 0.0
98 10460168000133 105222.50 3 0.00 0.00 105222.50 0.0 0.0 3.0
99 10461975000170 150600.00 2 0.00 0.00 150600.00 0.0 0.0 2.0

100 rows × 9 columns


In [155]:
df_merge = df_merge.merge(df_merge_2, on="nu_CPFCNPJ")
df_merge.head()


Out[155]:
nu_CPFCNPJ qtdAditivosPorCPFCNPJ qtdContratos vl_TotalContrato vl_Contrato_x participacoes_x valor_total_pregao_x valor_total_tomada_x valor_total_convite_x participacoes_pregao_x participacoes_tomada_x participacoes_convite_x vl_Contrato_y participacoes_y valor_total_pregao_y valor_total_tomada_y valor_total_convite_y participacoes_pregao_y participacoes_tomada_y participacoes_convite_y
0 10140642000140 26 8 415668.77 71257.0 5 0.0 0.0 71257.0 0.0 0.0 5.0 71257.0 5 0.0 0.0 71257.0 0.0 0.0 5.0
1 10142026000128 19 3 67887.20 64587.2 2 0.0 0.0 64587.2 0.0 0.0 2.0 64587.2 2 0.0 0.0 64587.2 0.0 0.0 2.0
2 10158142000135 11 1 80900.00 80900.0 1 0.0 0.0 80900.0 0.0 0.0 1.0 80900.0 1 0.0 0.0 80900.0 0.0 0.0 1.0
3 10171194000141 4 1 45285.00 45285.0 1 0.0 0.0 45285.0 0.0 0.0 1.0 45285.0 1 0.0 0.0 45285.0 0.0 0.0 1.0
4 10175041000172 45 4 144250.00 133450.0 5 31050.0 0.0 102400.0 1.0 0.0 4.0 133450.0 5 31050.0 0.0 102400.0 1.0 0.0 4.0

In [ ]:
df_merge.to_csv(path_or_buf="../data/features.csv", sep=';')