In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

In [2]:
data = pd.read_csv('../data/2016-08-08-last-year.xz',
                   parse_dates=[16],
                   dtype={'document_id': np.str,
                          'congressperson_id': np.str,
                          'congressperson_document': np.str,
                          'term_id': np.str,
                          'cnpj_cpf': np.str,
                          'reimbursement_number': np.str})

There are 374,484 expenses reimbursed in the past year.


In [3]:
print(data.shape)


(374484, 29)

In [4]:
data.head()


Out[4]:
document_id congressperson_name congressperson_id congressperson_document term state party term_id subquota_number subquota_description ... net_value month year installment passenger leg_of_the_trip batch_number reimbursement_number reimbursement_value applicant_id
0 5886345 ABEL MESQUITA JR. 178957 1 2015 RR DEM 55 1 Maintenance of office supporting parliamentary... ... 165.65 11 2015 0 NaN NaN 1255355 5294 NaN 3074
1 5886361 ABEL MESQUITA JR. 178957 1 2015 RR DEM 55 1 Maintenance of office supporting parliamentary... ... 59.48 12 2015 0 NaN NaN 1255361 5294 NaN 3074
2 5886341 ABEL MESQUITA JR. 178957 1 2015 RR DEM 55 1 Maintenance of office supporting parliamentary... ... 130.95 11 2015 0 NaN NaN 1255355 5294 NaN 3074
3 5928783 ABEL MESQUITA JR. 178957 1 2015 RR DEM 55 1 Maintenance of office supporting parliamentary... ... 193.06 12 2015 0 NaN NaN 1268867 5370 NaN 3074
4 5608486 ABEL MESQUITA JR. 178957 1 2015 RR DEM 55 1 Maintenance of office supporting parliamentary... ... 310.25 2 2015 0 NaN NaN 1168538 4966 NaN 3074

5 rows × 29 columns


In [5]:
data.iloc[0]


Out[5]:
document_id                                                             5886345
congressperson_name                                           ABEL MESQUITA JR.
congressperson_id                                                        178957
congressperson_document                                                       1
term                                                                       2015
state                                                                        RR
party                                                                       DEM
term_id                                                                      55
subquota_number                                                               1
subquota_description          Maintenance of office supporting parliamentary...
subquota_group_id                                                             0
subquota_group_description                                                  NaN
supplier                                COMPANHIA DE AGUAS E ESGOTOS DE RORAIMA
cnpj_cpf                                                         05939467000115
document_number                                                0010100910378000
document_type                                                                 0
issue_date                                                  2015-11-14 00:00:00
document_value                                                           165.65
remark_value                                                                  0
net_value                                                                165.65
month                                                                        11
year                                                                       2015
installment                                                                   0
passenger                                                                   NaN
leg_of_the_trip                                                             NaN
batch_number                                                            1255355
reimbursement_number                                                       5294
reimbursement_value                                                         NaN
applicant_id                                                               3074
Name: 0, dtype: object

In [6]:
def change_type_to_category(column):
    data[column] = data[column].astype('category')

category_columns = ['congressperson_id',
       'state', 'party', 'term_id',
       'subquota_number', 'subquota_group_id',
       'document_type', 'applicant_id', 'congressperson_name']

[change_type_to_category(column) for column in category_columns]; None

Last year, R$213,668,049.56 were spent without public bidding. On average, each of the 374,484 expenses had a value of 570 Reais.


In [7]:
data['net_value'].describe()


Out[7]:
count    374484.000000
mean        570.566565
std        1993.167639
min       -9240.770000
25%          45.000000
50%         134.310000
75%         481.000000
max      189600.000000
Name: net_value, dtype: float64

In [8]:
data['net_value'].sum()


Out[8]:
213668049.56999993

Data seems to contain outliers. Negative net values and other records in the range of hundreds of thousands of Reais.


In [9]:
sns.distplot(data['net_value'])


Out[9]:
<matplotlib.axes._subplots.AxesSubplot at 0x114f98128>

Most expensive document reimbursed by the government: R$189,600.00


In [10]:
most_expensive_reimbursement = \
    data[data['net_value'] == data['net_value'].max()].iloc[0]
most_expensive_reimbursement


Out[10]:
document_id                                               5884288
congressperson_name                           ARNALDO FARIA DE SÁ
congressperson_id                                           73434
congressperson_document                                       337
term                                                         2015
state                                                          SP
party                                                         PTB
term_id                                                        55
subquota_number                                                 5
subquota_description          Publicity of parliamentary activity
subquota_group_id                                               0
subquota_group_description                                    NaN
supplier                                      GRÁFICA TRÊS PONTAS
cnpj_cpf                                           54450705000130
document_number                                          00012480
document_type                                                   0
issue_date                                    2015-12-18 00:00:00
document_value                                             189600
remark_value                                                    0
net_value                                                  189600
month                                                          12
year                                                         2015
installment                                                     0
passenger                                                     NaN
leg_of_the_trip                                               NaN
batch_number                                              1254689
reimbursement_number                                         5266
reimbursement_value                                           NaN
applicant_id                                                  292
Name: 32353, dtype: object

Negative net values

Talking about negative values...


In [11]:
negative_net_values = data[data['net_value'] < 0]
print(len(negative_net_values), len(negative_net_values) / len(data))


17646 0.047120838273464284

In [12]:
sns.distplot(negative_net_values['net_value'])


Out[12]:
<matplotlib.axes._subplots.AxesSubplot at 0x118567550>

Not really sure what they mean.


In [13]:
negative_net_values.sample(random_state=0).iloc[0]


Out[13]:
document_id                                   NaN
congressperson_name               AFONSO FLORENCE
congressperson_id                          160508
congressperson_document                       182
term                                         2015
state                                          BA
party                                          PT
term_id                                        55
subquota_number                               999
subquota_description          Flight ticket issue
subquota_group_id                               0
subquota_group_description                    NaN
supplier                          Cia Aérea - GOL
cnpj_cpf                           07575651000159
document_number                   Bilhete: MYJH2Y
document_type                                   0
issue_date                    2015-09-18 00:00:00
document_value                            -356.73
remark_value                                    0
net_value                                 -356.73
month                                           9
year                                         2015
installment                                     0
passenger                         AFONSO FLORENCE
leg_of_the_trip                           BSB/BSB
batch_number                                    0
reimbursement_number                            0
reimbursement_value                           NaN
applicant_id                                 2406
Name: 5398, dtype: object

According to Eduardo Cuducos on a pull request and the official documentation, negative values comprehend flight tickets issued but not used.


In [14]:
negative_net_values['subquota_description'].unique()


Out[14]:
array(['Flight ticket issue'], dtype=object)

In this specific case, it seems that Afonso Florence purchased a flight ticket of R\$ 556,73. When canceled it, the returned amount was just R\$ 356.73, generating a cancellation cost of R\$200 (or 64%).


In [15]:
flight_tickets = data[data['document_number'] == 'Bilhete: MYJH2Y']
flight_tickets.iloc[0]


Out[15]:
document_id                                   NaN
congressperson_name               AFONSO FLORENCE
congressperson_id                          160508
congressperson_document                       182
term                                         2015
state                                          BA
party                                          PT
term_id                                        55
subquota_number                               999
subquota_description          Flight ticket issue
subquota_group_id                               0
subquota_group_description                    NaN
supplier                          Cia Aérea - GOL
cnpj_cpf                           07575651000159
document_number                   Bilhete: MYJH2Y
document_type                                   0
issue_date                    2015-09-10 00:00:00
document_value                             556.73
remark_value                                    0
net_value                                  556.73
month                                           9
year                                         2015
installment                                     0
passenger                         AFONSO FLORENCE
leg_of_the_trip                           BSB/SSA
batch_number                                    0
reimbursement_number                            0
reimbursement_value                           NaN
applicant_id                                 2406
Name: 5397, dtype: object

In [16]:
flight_tickets.iloc[1]


Out[16]:
document_id                                   NaN
congressperson_name               AFONSO FLORENCE
congressperson_id                          160508
congressperson_document                       182
term                                         2015
state                                          BA
party                                          PT
term_id                                        55
subquota_number                               999
subquota_description          Flight ticket issue
subquota_group_id                               0
subquota_group_description                    NaN
supplier                          Cia Aérea - GOL
cnpj_cpf                           07575651000159
document_number                   Bilhete: MYJH2Y
document_type                                   0
issue_date                    2015-09-18 00:00:00
document_value                            -356.73
remark_value                                    0
net_value                                 -356.73
month                                           9
year                                         2015
installment                                     0
passenger                         AFONSO FLORENCE
leg_of_the_trip                           BSB/BSB
batch_number                                    0
reimbursement_number                            0
reimbursement_value                           NaN
applicant_id                                 2406
Name: 5398, dtype: object

Long (right) tail

Let's try to remove outliers.


In [17]:
dist_range = data['net_value'].mean() + data['net_value'].std() * 3 * np.r_[-1, 1]
wo_outliers = \
    (data['net_value'] >= dist_range[0]) & (data['net_value'] <= dist_range[1])
data_wo_outliers = data[wo_outliers]
sns.distplot(data_wo_outliers['net_value'])


Out[17]:
<matplotlib.axes._subplots.AxesSubplot at 0x11472cd30>

Top 45%

45% of the dataset have net values larger than 3 standard deviations from the mean. Meaning: tail does not contain just a few outliers, but a good portion of the dataset. Let's study what is contained in this long tail (greater than 3 stds).


In [18]:
outliers = data[~data.isin(data_wo_outliers)['document_id']]
print(len(outliers), len(outliers) / len(data))


170204 0.4545027290885592

In [19]:
outliers.head()


Out[19]:
document_id congressperson_name congressperson_id congressperson_document term state party term_id subquota_number subquota_description ... net_value month year installment passenger leg_of_the_trip batch_number reimbursement_number reimbursement_value applicant_id
76 5851959 ABEL MESQUITA JR. 178957 1 2015 RR DEM 55 4 Consultancy, research and technical work ... 18000.0 11 2015 0 NaN NaN 1244585 5242 NaN 3074
77 5887522 ABEL MESQUITA JR. 178957 1 2015 RR DEM 55 4 Consultancy, research and technical work ... 18000.0 12 2015 0 NaN NaN 1255689 5298 NaN 3074
78 5920934 ABEL MESQUITA JR. 178957 1 2015 RR DEM 55 4 Consultancy, research and technical work ... 18000.0 12 2015 0 NaN NaN 1266231 5422 NaN 3074
79 5761747 ABEL MESQUITA JR. 178957 1 2015 RR DEM 55 4 Consultancy, research and technical work ... 20000.0 7 2015 0 NaN NaN 1217319 5124 NaN 3074
80 5793677 ABEL MESQUITA JR. 178957 1 2015 RR DEM 55 4 Consultancy, research and technical work ... 20000.0 8 2015 0 NaN NaN 1226779 5170 NaN 3074

5 rows × 29 columns


In [20]:
outliers['subquota_description'].describe()


Out[20]:
count                  170204
unique                     14
top       Flight ticket issue
freq                   116993
Name: subquota_description, dtype: object

Let's build a ranking of expenses with higher values.


In [21]:
from functools import partial

subquota_number_ranking = outliers. \
    groupby('subquota_number', as_index=False). \
    agg({'net_value': np.nansum})
subquotas = data[['subquota_number', 'subquota_description']]. \
    drop_duplicates('subquota_number', keep='first')
subquota_number_ranking = \
    pd.merge(subquota_number_ranking,
             subquotas,
             how='left',
             on='subquota_number'). \
    sort_values('net_value', ascending=False)

In [22]:
subquota_number_ranking.head()


Out[22]:
subquota_number net_value subquota_description
17 999 48229356.25 Flight ticket issue
3 5 34730760.81 Publicity of parliamentary activity
2 4 16952282.42 Consultancy, research and technical work
12 120 8362929.39 Automotive vehicle renting or charter
6 10 5103956.53 Telecommunication

In [23]:
sns.barplot(x='subquota_description',
            y='net_value',
            data=subquota_number_ranking.head())
locs, labels = plt.xticks()
plt.setp(labels, rotation=90); None


Top 1%

How the top 1% look like?


In [24]:
top_1_percent_num = int(.01 * len(data))
top_1_percent = data. \
    sort_values('net_value', ascending=False). \
    iloc[0:top_1_percent_num + 1]

top_1_percent_subquota_ranking = top_1_percent. \
    groupby('subquota_number', as_index=False). \
    agg({'net_value': np.nansum})
top_1_percent_subquota_ranking = \
    pd.merge(top_1_percent_subquota_ranking,
             subquotas,
             how='left',
             on='subquota_number'). \
    sort_values('net_value', ascending=False)

In [25]:
top_1_percent_subquota_ranking.head()


Out[25]:
subquota_number net_value subquota_description
3 5 31241274.99 Publicity of parliamentary activity
2 4 14737527.82 Consultancy, research and technical work
12 120 5121283.86 Automotive vehicle renting or charter
11 119 1754436.23 Aircraft renting or charter of aircraft
0 1 1728364.32 Maintenance of office supporting parliamentary...

In [26]:
sns.barplot(x='subquota_description',
            y='net_value',
            data=top_1_percent_subquota_ranking.head())
locs, labels = plt.xticks()
plt.setp(labels, rotation=90); None


This is the most expensive reimbursement from last year: R$189,600 for printing 120,000 units of something about the Elderly Statute.


In [27]:
most_expensive_reimbursement


Out[27]:
document_id                                               5884288
congressperson_name                           ARNALDO FARIA DE SÁ
congressperson_id                                           73434
congressperson_document                                       337
term                                                         2015
state                                                          SP
party                                                         PTB
term_id                                                        55
subquota_number                                                 5
subquota_description          Publicity of parliamentary activity
subquota_group_id                                               0
subquota_group_description                                    NaN
supplier                                      GRÁFICA TRÊS PONTAS
cnpj_cpf                                           54450705000130
document_number                                          00012480
document_type                                                   0
issue_date                                    2015-12-18 00:00:00
document_value                                             189600
remark_value                                                    0
net_value                                                  189600
month                                                          12
year                                                         2015
installment                                                     0
passenger                                                     NaN
leg_of_the_trip                                               NaN
batch_number                                              1254689
reimbursement_number                                         5266
reimbursement_value                                           NaN
applicant_id                                                  292
Name: 32353, dtype: object

Found at camara.gov.br, the URL of this expense receipt, in PDF: http://www.camara.gov.br/cota-parlamentar/documentos/publ/292/2015/5884288.pdf

Description of each expense

Let's try to match a document (PDF file) found at http://www.camara.gov.br/cota-parlamentar/index.jsp with this dataset. Can we generate URLs for the documents received for review?

Taking the following PDF as an example: http://www.camara.gov.br/cota-parlamentar/documentos/publ/3016/2015/5651163.pdf


In [28]:
records = \
    (data['applicant_id'] == 3016) & \
    (data['month'] == 4) & \
    (data['subquota_number'] == 3)
data[records].iloc[0]


Out[28]:
document_id                                    5651163
congressperson_name                     ADAIL CARNEIRO
congressperson_id                               178864
congressperson_document                             92
term                                              2015
state                                               CE
party                                               PP
term_id                                             55
subquota_number                                      3
subquota_description              Fuels and lubricants
subquota_group_id                                    1
subquota_group_description        Veículos Automotores
supplier                      BRASAL COMBUSTíVEIS LTDA
cnpj_cpf                                00097626000400
document_number                                 572808
document_type                                        0
issue_date                         2015-04-08 00:00:00
document_value                                     100
remark_value                                         0
net_value                                          100
month                                                4
year                                              2015
installment                                          0
passenger                                          NaN
leg_of_the_trip                                    NaN
batch_number                                   1182395
reimbursement_number                              4997
reimbursement_value                                NaN
applicant_id                                      3016
Name: 414, dtype: object

It works!

From the document PDF, we could extract new features such as names of the products/services purchased, name of the seller, address of the business among other things.


In [29]:
def document_url(record):
    return 'http://www.camara.gov.br/cota-parlamentar/documentos/publ/%s/%s/%s.pdf' % \
        (record['applicant_id'], record['year'], record['document_id'])

record = data[data['document_number'] == '632604'].iloc[0]
record


Out[29]:
document_id                                    5637835
congressperson_name                      ARNALDO JORDY
congressperson_id                               160602
congressperson_document                             18
term                                              2015
state                                               PA
party                                              PPS
term_id                                             55
subquota_number                                      3
subquota_description              Fuels and lubricants
subquota_group_id                                    1
subquota_group_description        Veículos Automotores
supplier                      POSTO SÃO DOMINGOS LTDA.
cnpj_cpf                                63814339000141
document_number                                 632604
document_type                                        0
issue_date                         2015-03-16 00:00:00
document_value                                      20
remark_value                                         0
net_value                                           20
month                                                3
year                                              2015
installment                                          0
passenger                                          NaN
leg_of_the_trip                                    NaN
batch_number                                   1177939
reimbursement_number                              4974
reimbursement_value                                NaN
applicant_id                                      2341
Name: 33266, dtype: object

In [30]:
print(document_url(record))


http://www.camara.gov.br/cota-parlamentar/documentos/publ/2341/2015/5637835.pdf

How about a random record? Is its document_url valid? YES!


In [31]:
record = data.sample(random_state=0).iloc[0]
print(document_url(record))
record


http://www.camara.gov.br/cota-parlamentar/documentos/publ/2398/2015/5635048.pdf
Out[31]:
document_id                                  5635048
congressperson_name                     SIBÁ MACHADO
congressperson_id                             160613
congressperson_document                           58
term                                            2015
state                                             AC
party                                             PT
term_id                                           55
subquota_number                                   13
subquota_description             Congressperson meal
subquota_group_id                                  0
subquota_group_description                       NaN
supplier                      J. BEZERRA DA CUNHA ME
cnpj_cpf                              04036977000110
document_number                                15283
document_type                                      0
issue_date                       2015-02-28 00:00:00
document_value                                   180
remark_value                                       0
net_value                                        180
month                                              2
year                                            2015
installment                                        0
passenger                                        NaN
leg_of_the_trip                                  NaN
batch_number                                 1176917
reimbursement_number                            4973
reimbursement_value                              NaN
applicant_id                                    2398
Name: 331877, dtype: object

Who are these applicants?

There were 803 different people receiving reimbursements last year.


In [32]:
len(data['applicant_id'].unique())


Out[32]:
803

In [33]:
len(data['congressperson_name'].cat.categories)


Out[33]:
803

In [34]:
applicants_by_net_value = \
    pd.DataFrame(data.groupby(['applicant_id'], as_index=False).sum()[['applicant_id', 'net_value']])
applicants_by_net_value.head()


Out[34]:
applicant_id net_value
0 19 416089.93
1 74 396689.18
2 80 477709.44
3 84 22762.99
4 98 395691.19

In [35]:
congressperson_list = data[
    ['applicant_id', 'congressperson_name', 'party', 'state']]
congressperson_list = congressperson_list. \
    drop_duplicates('applicant_id', keep='first')
ranking = pd.merge(applicants_by_net_value,
                   congressperson_list,
                   how='left',
                   on='applicant_id').sort_values('net_value', ascending=False)
ranking.head(10)


Out[35]:
applicant_id net_value congressperson_name party state
450 2333 542411.99 JHONATAN DE JESUS PRB RR
250 1818 537204.88 EDIO LOPES PR RR
216 1703 520499.10 WELLINGTON ROBERTO PR PB
404 2280 516710.27 VINICIUS GURGEL PR AP
117 1289 513407.27 NILTON CAPIXABA PTB RO
545 2473 510808.79 MARCOS ROGÉRIO DEM RO
456 2341 508400.84 ARNALDO JORDY PPS PA
508 2398 506268.40 SIBÁ MACHADO PT AC
666 2969 504966.44 SHÉRIDAN PSDB RR
286 1881 501329.70 MANOEL JUNIOR PMDB PB

In [36]:
ranking['net_value'].describe()


Out[36]:
count       803.000000
mean     266087.234832
std      189000.038242
min       -1791.570000
25%       33621.015000
50%      362625.870000
75%      429826.050000
max      542411.990000
Name: net_value, dtype: float64

In [37]:
graph = sns.barplot(x='congressperson_name',
                    y='net_value',
                    data=ranking)
graph.axes.get_xaxis().set_ticks([]); None



In [38]:
def x_label_generator(record):
    return '%s (%s - %s)' % (record['congressperson_name'],
                             record['party'],
                             record['state'])

ranking['x_label'] = ranking.apply(x_label_generator, axis=1)

Apparently, politicians from states further away from Distrito Federal expent more. We could perform an analysis on distance to the capital and the home state from the politician.


In [39]:
sns.barplot(x='x_label',
            y='net_value',
            data=ranking.head(30))
locs, labels = plt.xticks()
plt.setp(labels, rotation=90); None



In [40]:
list(congressperson_list['congressperson_name'].cat.categories)


Out[40]:
['ABEL MESQUITA JR.',
 'ABELARDO CAMARINHA',
 'ABELARDO LUPION',
 'ACELINO POPÓ',
 'ADAIL CARNEIRO',
 'ADALBERTO CAVALCANTI',
 'ADELMO CARNEIRO LEÃO',
 'ADELSON BARRETO',
 'ADEMIR CAMILO',
 'ADILTON SACHETTI',
 'ADRIAN',
 'AELTON FREITAS',
 'AFONSO FLORENCE',
 'AFONSO HAMM',
 'AFONSO MOTTA',
 'AGUINALDO RIBEIRO',
 'AKIRA OTSUBO',
 'ALAN RICK',
 'ALBERTO FILHO',
 'ALBERTO FRAGA',
 'ALCEU MOREIRA',
 'ALESSANDRO MOLON',
 'ALEX CANZIANI',
 'ALEX MANENTE',
 'ALEXANDRE BALDY',
 'ALEXANDRE LEITE',
 'ALEXANDRE ROSO',
 'ALEXANDRE SANTOS',
 'ALEXANDRE SERFIOTIS',
 'ALEXANDRE SILVEIRA',
 'ALEXANDRE TOLEDO',
 'ALEXANDRE VALLE',
 'ALFREDO KAEFER',
 'ALFREDO NASCIMENTO',
 'ALFREDO SIRKIS',
 'ALICE PORTUGAL',
 'ALIEL MACHADO',
 'ALINE CORRÊA',
 'ALMEIDA LIMA',
 'ALTINEU CÔRTES',
 'ALUISIO MENDES',
 'AMAURI TEIXEIRA',
 'AMIR LANDO',
 'ANA PERUGINI',
 'ANDERSON FERREIRA',
 'ANDRE MOURA',
 'ANDREIA ZITO',
 'ANDRES SANCHEZ',
 'ANDRÉ ABDON',
 'ANDRÉ DE PAULA',
 'ANDRÉ FIGUEIREDO',
 'ANDRÉ FUFUCA',
 'ANDRÉ ZACHAROW',
 'ANGELA ALBINO',
 'ANGELIM',
 'ANGELO VANHONI',
 'ANSELMO DE JESUS',
 'ANTHONY GAROTINHO',
 'ANTONIO BALHMANN',
 'ANTONIO BRITO',
 'ANTONIO BULHÕES',
 'ANTONIO CARLOS MENDES THAME',
 'ANTONIO IMBASSAHY',
 'ANTÔNIA LÚCIA',
 'ANTÔNIO ANDRADE',
 'ANTÔNIO JÁCOME',
 'ANÍBAL GOMES',
 'ARIOSTO HOLANDA',
 'ARLINDO CHINAGLIA',
 'ARMANDO VERGÍLIO',
 'ARNALDO FARIA DE SÁ',
 'ARNALDO JARDIM',
 'ARNALDO JORDY',
 'ARNON BEZERRA',
 'AROLDE DE OLIVEIRA',
 'ARTHUR LIRA',
 'ARTHUR OLIVEIRA MAIA',
 'ARTHUR VIRGÍLIO BISNETO',
 'ARTUR BRUNO',
 'ASSIS CARVALHO',
 'ASSIS DO COUTO',
 'ASSIS MELO',
 'AUGUSTO CARVALHO',
 'AUGUSTO COUTINHO ',
 'AUREO',
 'BACELAR',
 'BALEIA ROSSI',
 'BEBETO',
 'BENEDITA DA SILVA',
 'BENITO GAMA',
 'BENJAMIN MARANHÃO',
 'BERINHO BANTIM',
 'BERNARDO SANTANA DE VASCONCELLOS',
 'BETINHO GOMES',
 'BETINHO ROSADO',
 'BETO ALBUQUERQUE',
 'BETO FARO',
 'BETO MANSUR',
 'BETO ROSADO',
 'BETO SALAME',
 'BIFFI',
 'BILAC PINTO',
 'BOHN GASS',
 'BONIFÁCIO DE ANDRADA',
 'BRUNA FURLAN',
 'BRUNNY',
 'BRUNO ARAÚJO',
 'BRUNO COVAS',
 'BRUNO RODRIGUES',
 'CABO DACIOLO',
 'CABO SABINO',
 'CABUÇU BORGES',
 'CACÁ LEÃO',
 'CAETANO',
 'CAIO NARCIO',
 'CAMILO COLA',
 'CAPITÃO AUGUSTO',
 'CAPITÃO FÁBIO ABREU',
 'CARLOS ALBERTO LERÉIA',
 'CARLOS ANDRADE',
 'CARLOS BEZERRA',
 'CARLOS EDUARDO CADOCA',
 'CARLOS GOMES',
 'CARLOS HENRIQUE GAGUIM',
 'CARLOS MAGNO',
 'CARLOS MANATO',
 'CARLOS MARTINS',
 'CARLOS MARUN',
 'CARLOS MELLES',
 'CARLOS SAMPAIO',
 'CARLOS ZARATTINI',
 'CARMEN ZANOTTO',
 'CELSO JACOB',
 'CELSO MALDANER',
 'CELSO PANSERA',
 'CELSO RUSSOMANNO',
 'CESAR SOUZA',
 'CEZAR SILVESTRI',
 'CHICO ALENCAR',
 "CHICO D'ANGELO",
 'CHICO DAS VERDURAS',
 'CHICO LOPES',
 'CHRISTIANE DE SOUZA YARED',
 'CLARISSA GAROTINHO',
 'CLAUDIO CAJADO',
 'CLEBER VERDE',
 'CLÁUDIO PUTY',
 'CONCEIÇÃO SAMPAIO',
 'COVATTI FILHO',
 'CRISTIANE BRASIL',
 'CÂNDIDO VACCAREZZA',
 'CÉLIO SILVEIRA',
 'CÉSAR HALUM',
 'CÉSAR MESSIAS',
 'CÍCERO ALMEIDA',
 'DAGOBERTO',
 'DALVA FIGUEIREDO',
 'DAMIÃO FELICIANO',
 'DANIEL ALMEIDA',
 'DANIEL COELHO',
 'DANIEL VILELA',
 'DANILO CABRAL',
 'DANILO FORTE',
 'DANRLEI DE DEUS HINTERHOLZ',
 'DARCÍSIO PERONDI',
 'DAVI ALCOLUMBRE',
 'DAVI ALVES SILVA JÚNIOR',
 'DAVIDSON MAGALHÃES',
 'DELEGADO EDSON MOREIRA',
 'DELEGADO PROTÓGENES',
 'DELEGADO WALDIR',
 'DELEGADO ÉDER MAURO',
 'DELEY',
 'DENILSON TEIXEIRA',
 'DEOCLIDES MACEDO',
 'DEVANIR RIBEIRO',
 'DIEGO ANDRADE',
 'DIEGO GARCIA',
 'DILCEU SPERAFICO',
 'DIMAS FABIANO',
 'DOMINGOS DUTRA',
 'DOMINGOS NETO',
 'DOMINGOS SÁVIO',
 'DR. ADILSON SOARES',
 'DR. CARLOS ALBERTO',
 'DR. GRILO',
 'DR. JORGE SILVA',
 'DR. JOÃO',
 'DR. LUIZ FERNANDO',
 'DR. PAULO CÉSAR',
 'DR. ROSINHA',
 'DR. SINVAL MALHEIROS',
 'DR. UBIALI',
 'DRA. ELAINE ABISSAMRA',
 'DUARTE NOGUEIRA',
 'DUDIMAR PAXIUBA',
 'DULCE MIRANDA',
 'DÂMINA PEREIRA',
 'DÉCIO LIMA',
 'EDINHO ARAÚJO',
 'EDINHO BEZ',
 'EDIO LOPES',
 'EDMAR ARRUDA',
 'EDMILSON RODRIGUES',
 'EDSON EZEQUIEL',
 'EDSON PIMENTA',
 'EDSON SANTOS',
 'EDSON SILVA',
 'EDUARDO BARBOSA',
 'EDUARDO BOLSONARO',
 'EDUARDO CUNHA',
 'EDUARDO CURY',
 'EDUARDO DA FONTE',
 'EDUARDO GOMES',
 'EDUARDO SCIARRA',
 'EFRAIM FILHO',
 'ELCIONE BARBALHO',
 'ELEUSES PAIVA',
 'ELI CORRÊA FILHO',
 'ELIENE LIMA',
 'ELISEU PADILHA',
 'ELIZEU DIONIZIO',
 'ELIZIANE GAMA',
 'ELMAR NASCIMENTO',
 'EMANUEL FERNANDES',
 'EMILIANO JOSÉ',
 'ENIO BACCI',
 'ENIO VERRI',
 'ERIKA KOKAY',
 'ERIVELTON SANTANA',
 'EROS BIONDINI',
 'ESPERIDIÃO AMIN',
 'EUDES XAVIER',
 'EURICO JÚNIOR',
 'EVAIR VIEIRA DE MELO',
 'EVANDRO GUSSI',
 'EVANDRO MILHOMEN',
 'EVANDRO ROMAN',
 'EXPEDITO NETTO',
 'EZEQUIEL FONSECA',
 'EZEQUIEL TEIXEIRA',
 'FABIANO HORTA',
 'FABIO GARCIA',
 'FABIO REIS',
 'FABRICIO OLIVEIRA',
 'FAUSTO PINATO',
 'FELIPE BORNIER',
 'FELIPE MAIA',
 'FERNANDO COELHO FILHO',
 'FERNANDO FERRO',
 'FERNANDO FRANCISCHINI',
 'FERNANDO JORDÃO',
 'FERNANDO MARRONI',
 'FERNANDO MONTEIRO',
 'FERNANDO TORRES',
 'FERNANDO ZACHIA',
 'FILIPE PEREIRA',
 'FLAVIANO MELO',
 'FLAVINHO',
 'FLAVIO NOGUEIRA',
 'FLORIANO PESARO',
 'FLÁVIA MORAIS',
 'FRANCISCO CHAGAS',
 'FRANCISCO CHAPADINHA',
 'FRANCISCO FLORIANO',
 'FRANCISCO PRACIANO',
 'FRANCISCO TENÓRIO',
 'FRANKLIN LIMA',
 'FÁBIO FARIA',
 'FÁBIO MITIDIERI',
 'FÁBIO RAMALHO',
 'FÁBIO SOUSA',
 'FÁBIO SOUTO',
 'FÁBIO TRAD',
 'FÁTIMA BEZERRA',
 'FÁTIMA PELAES',
 'FÉLIX MENDONÇA JÚNIOR',
 'GABRIEL CHALITA',
 'GABRIEL GUIMARÃES',
 'GASTÃO VIEIRA',
 'GENECIAS NORONHA',
 'GEORGE HILTON',
 'GEOVANIA DE SÁ',
 'GERALDO RESENDE',
 'GERALDO SIMÕES',
 'GERALDO THADEU',
 'GIACOBO',
 'GILBERTO COUTINHO',
 'GILBERTO NASCIMENTO',
 'GIOVANI CHERINI',
 'GIOVANI FELTES',
 'GIOVANNI QUEIROZ',
 'GIROTO',
 'GIUSEPPE VECCI',
 'GIVALDO CARIMBÃO',
 'GIVALDO VIEIRA',
 'GLADSON CAMELI',
 'GLAUBER BRAGA',
 'GONZAGA PATRIOTA',
 'GORETE PEREIRA',
 'GOULART',
 'GUILHERME CAMPOS',
 'GUILHERME MUSSI',
 'GUSTAVO PETTA',
 'HEITOR SCHUCH',
 'HELCIO SILVA',
 'HELDER SALOMÃO',
 'HENRIQUE AFONSO',
 'HENRIQUE EDUARDO ALVES',
 'HENRIQUE FONTANA',
 'HENRIQUE OLIVEIRA',
 'HERCULANO PASSOS',
 'HERMES PARCIANELLO',
 'HERÁCLITO FORTES',
 'HEULER CRUVINEL',
 'HILDO ROCHA',
 'HIRAN GONÇALVES',
 'HISSA ABRAHÃO',
 'HUGO LEAL',
 'HUGO MOTTA',
 'HUGO NAPOLEÃO',
 'HUMBERTO MICHILES',
 'HÉLIO LEITE',
 'HÉLIO SANTOS',
 'IARA BERNARDI',
 'INDIO DA COSTA',
 'INOCÊNCIO OLIVEIRA',
 'IRACEMA PORTELLA',
 'IRAJÁ ABREU',
 'IRINY LOPES',
 'IRMÃO LAZARO',
 'IVAN VALENTE',
 'IZALCI',
 'JAIME MARTINS',
 'JAIR BOLSONARO',
 'JAIRO ATAÍDE',
 'JANDIRA FEGHALI',
 'JANETE CAPIBERIBE',
 'JANETE ROCHA PIETÁ',
 'JAQUELINE RORIZ',
 'JARBAS VASCONCELOS',
 'JEAN WYLLYS',
 'JEFFERSON CAMPOS',
 'JERÔNIMO GOERGEN',
 'JESUS RODRIGUES',
 'JHC',
 'JHONATAN DE JESUS',
 'JOAQUIM PASSARINHO',
 'JONY MARCOS',
 'JORGE BITTAR',
 'JORGE BOEIRA',
 'JORGE CÔRTE REAL',
 'JORGE SOLLA',
 'JORGE TADEU MUDALEN',
 'JORGINHO MELLO',
 'JOSE STÉDILE',
 'JOSEPH BANDEIRA',
 'JOSI NUNES',
 'JOSIAS GOMES',
 'JOSUÉ BENGTSON',
 'JOSÉ AIRTON CIRILO',
 'JOSÉ ANÍBAL',
 'JOSÉ AUGUSTO MAIA',
 'JOSÉ CARLOS ALELUIA',
 'JOSÉ CARLOS ARAÚJO',
 'JOSÉ CHAVES',
 'JOSÉ FOGAÇA ',
 'JOSÉ GUIMARÃES',
 'JOSÉ HUMBERTO',
 'JOSÉ LINHARES',
 'JOSÉ MENTOR',
 'JOSÉ NUNES',
 'JOSÉ OTÁVIO GERMANO',
 'JOSÉ PRIANTE',
 'JOSÉ REINALDO',
 'JOSÉ ROCHA',
 'JOVAIR ARANTES',
 'JOZI ARAÚJO',
 'JOÃO ANANIAS',
 'JOÃO ARRUDA',
 'JOÃO BITTAR',
 'JOÃO CALDAS',
 'JOÃO CAMPOS',
 'JOÃO CARLOS BACELAR',
 'JOÃO CASTELO',
 'JOÃO DADO',
 'JOÃO DANIEL',
 'JOÃO DERLY',
 'JOÃO FERNANDO COUTINHO',
 'JOÃO GUALBERTO',
 'JOÃO LYRA',
 'JOÃO MAGALHÃES',
 'JOÃO MAIA',
 'JOÃO MARCELO SOUZA',
 'JOÃO PAULO KLEINÜBING',
 'JOÃO PAULO LIMA',
 'JOÃO PAULO PAPA',
 'JOÃO PIZZOLATTI',
 'JOÃO RODRIGUES',
 'JULIO LOPES',
 'JULIÃO AMIN',
 'JUNIOR MARRECA',
 'JUNJI ABE',
 'JUSCELINO FILHO',
 'JUTAHY JUNIOR',
 'JÂNIO NATAL',
 'JÉSSICA SALES',
 'JÔ MORAES',
 'JÚLIA MARINHO',
 'JÚLIO CAMPOS',
 'JÚLIO CESAR',
 'JÚLIO DELGADO',
 'JÚNIOR COIMBRA',
 'KAIO MANIÇOBA',
 'KEIKO OTA',
 'LAEL VARELLA',
 'LAERCIO OLIVEIRA',
 'LAERTE BESSA',
 'LAUDIVIO CARVALHO',
 'LAURA CARNEIRO',
 'LAURIETE',
 'LEANDRE',
 'LEANDRO VILELA',
 'LELO COIMBRA',
 'LEO DE BRITO',
 'LEONARDO MONTEIRO',
 'LEONARDO PICCIANI',
 'LEONARDO QUINTÃO',
 'LEOPOLDO MEYER',
 'LEÔNIDAS CRISTINO',
 'LIDERANÇA DO PSDB',
 'LIDERANÇA DO PT',
 'LILIAM SÁ',
 'LINCOLN PORTELA',
 'LINDOMAR GARÇON',
 'LIRA MAIA',
 'LOBBE NETO',
 'LOURIVAL MENDES',
 'LUCAS VERGILIO',
 'LUCI CHOINACKI',
 'LUCIANA SANTOS',
 'LUCIANO CASTRO',
 'LUCIANO DUCCI',
 'LUCIANO PIZZATTO',
 'LUCIO MOSQUINI',
 'LUCIO VIEIRA LIMA',
 'LUIS CARLOS HEINZE',
 'LUIS TIBÉ',
 'LUIZ ALBERTO',
 'LUIZ ARGÔLO',
 'LUIZ CARLOS',
 'LUIZ CARLOS BUSATO',
 'LUIZ CARLOS HAULY',
 'LUIZ CARLOS RAMOS ',
 'LUIZ CLÁUDIO',
 'LUIZ COUTO',
 'LUIZ DE DEUS',
 'LUIZ FERNANDO FARIA',
 'LUIZ FERNANDO MACHADO',
 'LUIZ LAURO FILHO',
 'LUIZ NISHIMORI',
 'LUIZ OTAVIO',
 'LUIZ PITIMAN',
 'LUIZ SÉRGIO',
 'LUIZA ERUNDINA',
 'LUIZIANNE LINS',
 'LÁZARO BOTELHO',
 'LÉO OLIVEIRA',
 'LÚCIO VALE',
 'MACEDO',
 'MAGDA MOFATTO',
 'MAGELA',
 'MAIA FILHO',
 'MAJOR FÁBIO',
 'MAJOR OLIMPIO',
 'MANDETTA',
 'MANOEL ANTUNES',
 'MANOEL JUNIOR',
 'MANOEL SALVIANO',
 'MANUEL ROSA NECA',
 "MANUELA D'ÁVILA",
 'MARA GABRILLI',
 'MARCELINHO CARIOCA',
 'MARCELO AGUIAR',
 'MARCELO ALMEIDA',
 'MARCELO ARO',
 'MARCELO BELINATI',
 'MARCELO CASTRO',
 'MARCELO MATOS',
 'MARCELO SQUASSONI',
 'MARCELO ÁLVARO ANTÔNIO',
 'MARCIO ALVINO',
 'MARCIO BITTAR',
 'MARCIO JUNQUEIRA',
 'MARCIO MONTEIRO',
 'MARCO ANTÔNIO CABRAL',
 'MARCO MAIA',
 'MARCO TEBALDI',
 'MARCON',
 'MARCONDES GADELHA',
 'MARCOS ABRÃO',
 'MARCOS LIMA',
 'MARCOS MEDRADO',
 'MARCOS MONTES',
 'MARCOS REATEGUI',
 'MARCOS ROGÉRIO',
 'MARCOS ROTTA',
 'MARCOS SOARES',
 'MARCUS PESTANA',
 'MARCUS VICENTE',
 'MARGARIDA SALOMÃO',
 'MARIA DO ROSÁRIO',
 'MARIA HELENA',
 'MARIA LUCIA PRANDI ',
 'MARIANA CARVALHO',
 'MARINALDO ROSENDO',
 'MARINHA RAUPP',
 'MARLLOS SAMPAIO',
 'MARQUINHO MENDES',
 'MARX BELTRÃO',
 'MARÇAL FILHO',
 'MASSAMI MIKI',
 'MAURO BENEVIDES',
 'MAURO LOPES',
 'MAURO MARIANI',
 'MAURO PEREIRA',
 'MAURÍCIO ALMEIDA ',
 'MAURÍCIO QUINTELLA LESSA',
 'MAURÍCIO TRINDADE',
 'MAX FILHO',
 'MENDONÇA FILHO',
 'MENDONÇA PRADO',
 'MERLONG SOLANO',
 'MIGUEL CORRÊA',
 'MIGUEL HADDAD',
 'MIGUEL LOMBARDI',
 'MILTON MONTI',
 'MIRIQUINHO BATISTA',
 'MIRO TEIXEIRA',
 'MISAEL VARELLA',
 'MISSIONÁRIO JOSÉ OLIMPIO',
 'MOEMA GRAMACHO',
 'MOREIRA MENDES',
 'MORONI TORGAN',
 'MOSES RODRIGUES',
 'MÁRCIO BIOLCHI',
 'MÁRCIO FRANÇA',
 'MÁRCIO MACÊDO',
 'MÁRCIO MARINHO',
 'MÁRIO FEITOZA',
 'MÁRIO HERINGER',
 'MÁRIO NEGROMONTE JR.',
 'NARCIO RODRIGUES',
 'NELSON MARCHEZAN JUNIOR',
 'NELSON MARQUEZELLI',
 'NELSON MEURER',
 'NELSON NAHIM ',
 'NELSON PADOVANI',
 'NELSON PELLEGRINO',
 'NEWTON CARDOSO',
 'NEWTON CARDOSO JR',
 'NEWTON LIMA',
 'NICE LOBÃO',
 'NILDA GONDIM',
 'NILMÁRIO MIRANDA',
 'NILSON LEITÃO',
 'NILSON PINTO',
 'NILTO TATTO',
 'NILTON CAPIXABA',
 'ODAIR CUNHA',
 'ODELMO LEÃO',
 'ODORICO MONTEIRO',
 'ODÍLIO BALBINOTTI',
 'OLIVEIRA FILHO',
 'ONOFRE SANTO AGOSTINI',
 'ONYX LORENZONI',
 'ORLANDO SILVA',
 'OSMAR BERTOLDI',
 'OSMAR JÚNIOR',
 'OSMAR SERRAGLIO',
 'OSMAR TERRA',
 'OSVALDO COELHO',
 'OSVALDO REIS',
 'OTAVIO LEITE',
 'OTONIEL LIMA',
 'OZIEL OLIVEIRA',
 'PADRE JOÃO',
 'PADRE TON',
 'PAES LANDIM',
 'PASTOR EURICO',
 'PAUDERNEY AVELINO',
 'PAULO ABI-ACKEL',
 'PAULO AZI',
 'PAULO BORNHAUSEN',
 'PAULO FEIJÓ',
 'PAULO FOLETTO',
 'PAULO FREIRE',
 'PAULO HENRIQUE LUSTOSA',
 'PAULO MAGALHÃES',
 'PAULO MALUF',
 'PAULO PEREIRA DA SILVA',
 'PAULO PIMENTA',
 'PAULO RUBEM SANTIAGO',
 'PAULO TEIXEIRA',
 'PAULÃO',
 'PDT',
 'PEDRO CHAVES',
 'PEDRO CUNHA LIMA',
 'PEDRO EUGÊNIO',
 'PEDRO FERNANDES',
 'PEDRO NOVAIS',
 'PEDRO PAULO',
 'PEDRO UCZAI',
 'PEDRO VILELA',
 'PENNA',
 'PEPE VARGAS',
 'PERPÉTUA ALMEIDA',
 'PINTO DE LUNA',
 'PINTO ITAMARATY',
 'POLICARPO',
 'POMPEO DE MATTOS',
 'PR. MARCO FELICIANO',
 'PROFESSOR SETIMO',
 'PROFESSOR SÉRGIO DE OLIVEIRA',
 'PROFESSOR VICTÓRIO GALLI',
 'PROFESSORA DORINHA SEABRA REZENDE',
 'PROFESSORA MARCIVANIA',
 'PROS',
 'RAFAEL MOTTA',
 'RAIMUNDO GOMES DE MATOS',
 'RAQUEL MUNIZ',
 'RATINHO JUNIOR',
 'RAUL JUNGMANN',
 'RAUL LIMA',
 'REBECCA GARCIA',
 'REGINALDO LOPES',
 'REGUFFE',
 'REINHOLD STEPHANES',
 'REJANE DIAS',
 'REMÍDIO MONAI',
 'RENAN FILHO',
 'RENATA ABREU',
 'RENATO MOLLING',
 'RENATO SIMÕES',
 'RENZO BRAZ',
 'RICARDO BARROS',
 'RICARDO IZAR',
 'RICARDO TEOBALDO ',
 'RICARDO TRIPOLI',
 'ROBERTO ALVES',
 'ROBERTO BALESTRA',
 'ROBERTO BRITTO',
 'ROBERTO DE LUCENA',
 'ROBERTO DORNER',
 'ROBERTO FREIRE',
 'ROBERTO GÓES',
 'ROBERTO SALES',
 'ROBERTO SANTIAGO',
 'ROBERTO TEIXEIRA',
 'ROCHA',
 'RODRIGO BETHLEM',
 'RODRIGO DE CASTRO',
 'RODRIGO GARCIA',
 'RODRIGO MAIA',
 'RODRIGO MARTINS',
 'RODRIGO PACHECO',
 'ROGÉRIO CARVALHO',
 'ROGÉRIO MARINHO',
 'ROGÉRIO PENINHA MENDONÇA',
 'ROGÉRIO ROSSO',
 'ROMÁRIO',
 'RONALDO BENEDET',
 'RONALDO CAIADO',
 'RONALDO CARLETTO',
 'RONALDO FONSECA',
 'RONALDO LESSA',
 'RONALDO MARTINS',
 'RONALDO NOGUEIRA',
 'RONALDO ZULKE',
 'ROSANE FERREIRA',
 'ROSANGELA GOMES',
 'ROSE DE FREITAS',
 'ROSSONI',
 'ROSY DE SOUSA',
 'ROSÂNGELA CURADO',
 'RUBENS BUENO',
 'RUBENS OTONI',
 'RUBENS PEREIRA JÚNIOR',
 'RUI COSTA',
 'RUY CARNEIRO',
 'RÔMULO GOUVEIA',
 'RÔNEY NEMER',
 'SABINO CASTELO BRANCO',
 'SALVADOR ZIMBALDI',
 'SAMUEL MOREIRA',
 'SANDES JÚNIOR',
 'SANDRA ROSADO',
 'SANDRO ALEX',
 'SANDRO MABEL',
 'SARAIVA FELIPE',
 'SARNEY FILHO',
 'SDD',
 'SEBASTIÃO BALA ROCHA',
 'SEBASTIÃO OLIVEIRA',
 'SERGIO SOUZA ',
 'SERGIO VIDIGAL',
 'SERGIO ZVEITER',
 'SEVERINO NINHO',
 'SHÉRIDAN',
 'SIBÁ MACHADO',
 'SILAS BRASILEIRO',
 'SILAS CÂMARA',
 'SILAS FREIRE',
 'SILVIO COSTA',
 'SILVIO TORRES',
 'SIMONE MORGADO',
 'SIMPLÍCIO ARAÚJO',
 'SIMÃO SESSIM',
 'SORAYA SANTOS',
 'STEFANO AGUIAR',
 'STEPAN NERCESSIAN',
 'SUBTENENTE GONZAGA',
 'SUELI VIDIGAL',
 'SÁGUAS MORAES',
 'SÉRGIO BARRADAS CARNEIRO',
 'SÉRGIO BRITO',
 'SÉRGIO MORAES',
 'SÉRGIO REIS',
 'SÓSTENES CAVALCANTE',
 'TADEU ALENCAR',
 'TAKAYAMA',
 'TAUMATURGO LIMA',
 'TELMA PINHEIRO',
 'TENENTE LÚCIO',
 'TEREZA CRISTINA',
 'THIAGO PEIXOTO',
 'TIA ERON',
 'TIRIRICA',
 'TONINHO PINHEIRO',
 'TONINHO WANDSCHEER',
 'ULDURICO JUNIOR',
 'URZENI ROCHA',
 'VALADARES FILHO',
 'VALDIR COLATTO',
 'VALMIR ASSUNÇÃO',
 'VALMIR PRASCIDELLI',
 'VALTENIR PEREIRA',
 'VANDER LOUBET',
 'VANDERLEI MACRIS',
 'VANDERLEI SIRAQUE',
 'VAZ DE LIMA',
 'VENEZIANO VITAL DO RÊGO',
 'VICENTE ARRUDA',
 'VICENTE CANDIDO',
 'VICENTINHO',
 'VICENTINHO JÚNIOR',
 'VICTOR MENDES',
 'VIEIRA DA CUNHA',
 'VILALBA',
 'VILMAR ROCHA',
 'VILSON COVATTI',
 'VINICIUS CARVALHO',
 'VINICIUS GURGEL',
 'VITOR LIPPI',
 'VITOR PAULO',
 'VITOR PENIDO',
 'VITOR VALIM',
 'WADIH DAMOUS',
 'WADSON RIBEIRO',
 'WALDENOR PEREIRA',
 'WALDIR MARANHÃO',
 'WALNEY ROCHA',
 'WALTER ALVES',
 'WALTER IHOSHI',
 'WALTER TOSTA',
 'WANDENKOLK GONÇALVES',
 'WASHINGTON REIS',
 'WELITON PRADO',
 'WELLINGTON FAGUNDES',
 'WELLINGTON ROBERTO',
 'WELLINGTON SALGADO',
 'WEVERTON ROCHA',
 'WILLIAM DIB',
 'WILLIAM WOO',
 'WILSON BESERRA',
 'WILSON FILHO',
 'WLADIMIR COSTA',
 'WOLNEY QUEIROZ',
 'ZE CARLOS DA PESCA',
 'ZECA CAVALCANTI',
 'ZECA DIRCEU',
 'ZECA DO PT',
 'ZENAIDE MAIA',
 'ZOINHO',
 'ZÉ AUGUSTO NALIN',
 'ZÉ CARLOS',
 'ZÉ GERALDO',
 'ZÉ SILVA',
 'ZÉ VIEIRA',
 'ÁTILA LINS',
 'ÁTILA LIRA',
 'ÂNGELO AGNOLIN',
 'ÍRIS DE ARAÚJO']

A few congressperson_names I can't properly explain yet:


In [41]:
sdd = data[data['congressperson_name'] == 'SDD'].sample(random_state=0).iloc[0]
print(document_url(sdd))


http://www.camara.gov.br/cota-parlamentar/documentos/publ/2864/2015/5700427.pdf

721 expenses reimbursed to parties.


In [42]:
parties = congressperson_list[congressperson_list['party'].isnull()]
parties


Out[42]:
applicant_id congressperson_name party state
206216 2442 LIDERANÇA DO PSDB NaN NaN
206345 2439 LIDERANÇA DO PT NaN NaN
286699 2715 PDT NaN NaN
294136 2865 PROS NaN NaN
326358 2864 SDD NaN NaN

In [43]:
party_expenses = data[data['applicant_id'].isin(parties['applicant_id'])]
len(party_expenses)


Out[43]:
721

In [44]:
party_expenses.head()


Out[44]:
document_id congressperson_name congressperson_id congressperson_document term state party term_id subquota_number subquota_description ... net_value month year installment passenger leg_of_the_trip batch_number reimbursement_number reimbursement_value applicant_id
206216 5623603 LIDERANÇA DO PSDB NaN NaN 0 NaN NaN NaN 3 Fuels and lubricants ... 106.20 3 2015 0 NaN NaN 1173314 4958 NaN 2442
206217 5711868 LIDERANÇA DO PSDB NaN NaN 0 NaN NaN NaN 3 Fuels and lubricants ... 137.20 6 2015 0 NaN NaN 1201648 5066 NaN 2442
206218 5623622 LIDERANÇA DO PSDB NaN NaN 0 NaN NaN NaN 3 Fuels and lubricants ... 115.88 2 2015 0 NaN NaN 1173310 4964 NaN 2442
206219 5673529 LIDERANÇA DO PSDB NaN NaN 0 NaN NaN NaN 3 Fuels and lubricants ... 96.90 5 2015 0 NaN NaN 1188923 5025 NaN 2442
206220 5719383 LIDERANÇA DO PSDB NaN NaN 0 NaN NaN NaN 3 Fuels and lubricants ... 118.61 6 2015 0 NaN NaN 1203914 5073 NaN 2442

5 rows × 29 columns

Expenses abroad

Are the expenses made outside of Brazil easily identifiable?


In [45]:
wo_cnpj_cpf = data[data['cnpj_cpf'].isnull()]
len(wo_cnpj_cpf)


Out[45]:
48268

In [46]:
wo_cnpj_cpf.head()


Out[46]:
document_id congressperson_name congressperson_id congressperson_document term state party term_id subquota_number subquota_description ... net_value month year installment passenger leg_of_the_trip batch_number reimbursement_number reimbursement_value applicant_id
92 NaN ABEL MESQUITA JR. 178957 1 2015 RR DEM 55 10 Telecommunication ... 99.44 6 2015 0 NaN NaN 0 0 NaN 3074
93 NaN ABEL MESQUITA JR. 178957 1 2015 RR DEM 55 10 Telecommunication ... 49.62 7 2015 0 NaN NaN 0 0 NaN 3074
94 NaN ABEL MESQUITA JR. 178957 1 2015 RR DEM 55 10 Telecommunication ... 16.26 3 2015 0 NaN NaN 0 0 NaN 3074
95 NaN ABEL MESQUITA JR. 178957 1 2015 RR DEM 55 10 Telecommunication ... 177.05 4 2015 0 NaN NaN 0 0 NaN 3074
96 NaN ABEL MESQUITA JR. 178957 1 2015 RR DEM 55 10 Telecommunication ... 115.54 3 2015 0 NaN NaN 0 0 NaN 3074

5 rows × 29 columns


In [47]:
wo_cnpj_cpf.sample(random_state=10).iloc[0]


Out[47]:
document_id                                             NaN
congressperson_name                             MAURO LOPES
congressperson_id                                     74749
congressperson_document                                 252
term                                                   2015
state                                                    MG
party                                                  PMDB
term_id                                                  55
subquota_number                                          11
subquota_description                        Postal services
subquota_group_id                                         0
subquota_group_description                              NaN
supplier                      CORREIOS - SEDEX CONVENCIONAL
cnpj_cpf                                                NaN
document_number                               RSP : 2106004
document_type                                             1
issue_date                       2015-01-19 12:39:05.247000
document_value                                        30.04
remark_value                                              0
net_value                                             30.04
month                                                     1
year                                                   2015
installment                                               0
passenger                                               NaN
leg_of_the_trip                                         NaN
batch_number                                              0
reimbursement_number                                      0
reimbursement_value                                     NaN
applicant_id                                           1125
Name: 251309, dtype: object

We could match politicians' location (from oficial agenda and social networks GPS info) with their expenses in a future analysis.


In [48]:
wo_cnpj_cpf['supplier'].unique()


Out[48]:
array(['CELULAR FUNCIONAL', 'RAMAL', 'CORREIOS - SEDEX CONVENCIONAL',
       'IMÓVEL FUNCIONAL', 'CORREIOS - CARTA COMERCIAL',
       'CORREIOS - ENCOMENDA PAC', 'CORREIOS - SEDEX 10',
       'CORREIOS - SEDEX 10 COM AR',
       'CORREIOS - SEDEX CONVENCIONAL COM AR',
       'CORREIOS - CARTA REGISTRADA', 'CORREIOS - CARTA REGISTRADA COM AR',
       'CORREIOS - ENCOMENDA PAC COM AR', 'CORREIOS - SEDEX 12',
       'CORREIOS - TELEGRAMA COM CONFIRMAÇÃO', 'CORREIOS - EMS MERCADORIA',
       'CORREIOS - EMS MERCADORIA COM AR', 'CORREIOS - TELEGRAMA',
       'CORREIOS - MALA DIRETA POSTAL BÁSICA',
       'CORREIOS - SEDEX 12 COM AR', 'CORREIOS - EMS DOCUMENTO',
       'CORREIOS - SEDEX MUNDI', 'CopaAirlines', 'Bellagio',
       'COLOSSEUM BAR', 'DGI', "GORDON RAMSAY'S", "JOHNNY ROCKET'S",
       'MANDALAY BAY SHOWROOM', 'SHAKE SHACK', 'STARBUCKS STORE',
       'Bellagio Las Vegas', 'AceCab', 'Anlv Cab', 'Cash Selected',
       'VeriFone', 'CORREIOS - MALA DIRETA POSTAL DOMICILIÁRIA',
       'Le pain Quat idien', 'Pollo Cabana', 'Union Station', "Willy K's",
       'HOLYDAY INN', 'CORREIOS - TELEGRAMA COM CÓPIA',
       'CORREIOS - TELEGRAMA COM CÓPIA E CONFIRMAÇÃO',
       'CORREIOS - DOCUMENTO PRIORITÁRIO REGISTRADO COM AR',
       'El Viejo Y El Mar', 'Hotelera San Francisco S.A',
       'Intercity Premiun', 'CORREIOS - SEDEX HOJE',
       'CORREIOS - SEDEX HOJE COM AR', 'GOL LINHAS AÉREAS',
       'TAM LINHAS AÉREAS S.A.', 'DAZZLER HOTEL MONTEVIDEO',
       'ACADEMIC TRAVEL TURISMO LTDA', 'DA PENTELLA', 'RESTAURANTE PATRIA',
       'CORREIOS - DOCUMENTO PRIORITÁRIO', 'CORREIOS - MALOTE',
       'PRESIDENT AND FELLOWS OF HARVARD COLLEGE',
       'PRESIDENT AND FELLOWS OF HAVARD', 'AMERICAN AIRLINES',
       'DELTA AIR LINES', 'PARK LANE HOTEL', 'PARK LANE HOTEL NEW YORK',
       'CORREIOS - EMS DOCUMENTO COM AR', 'BIROME S.A LA PERDIZ',
       'PARRILHA EL FOGON S.A', 'CORREIOS - MERCADORIA ECONÔMICA', 'EL AL',
       'TAP AIR', 'TAP PORTUGAL', 'Despesa alimentação Israel',
       'PADIGLIONE GIAPPONE', 'AIRPORT HOTEL & CONFERENCE CENTRE',
       'Citadines Apart Hotel',
       'DAN PANORAMA TEL AVIV HOTEL & CONVENTION CENTER',
       'Hotel Trip Lisboa', "STARHOTELS L'ITÁLIA NEL CUORE",
       'TRYP BY WYNDHAM LISBOA AEROPORTO',
       'CASA TUA CUCINA DELLE PROVINCE TOSCANE',
       'Jo cintra Tailor Made Tours', 'decolar.com',
       'CORREIOS - DOCUMENTO PRIORITÁRIO REGISTRADO', 'LUMIAR TURISMO',
       'AMUSH S.A', 'LA CHACRA DEL PUERTO',
       'LA PARRILLA DE WILIMAN GUAVIARE S.A', 'Dazzler Hotel Montevideo',
       'Renata Teixeira Taiana ME', 'TAM LINHAS AÉREAS S/A',
       'Arecenter S.A.', 'DAZZLER Hotel Montevideo',
       'Uno Buenos Aires Suites', 'Cabana Veronica', 'Cafe Patria',
       'Camelia', 'Don Peperone', 'La Compania del Oriente', 'La Criolla',
       'BOUDIN BAKERY - San Francisco', 'Cafe Bellini - San Francisco',
       'California Pizza Kitchen',
       'DGI - Internacional Meal Company Panama S.A.',
       'DGI - Internacional Meal Company Panamá S.A.', 'IL CARPACCIO',
       'MAPISA S.A.', 'The Academy Cafe - San Francisco  CA',
       'The Cheesecake Factory Restaurants Natiowide',
       'Tomokazu Japanese Cusine',
       "Whole Foods Market - America's Healthiest Grocery Store",
       'Crowne Polaza San Francisco International Airport - Burlingame, CA - EUA',
       'INTERCONTINETAL MARK HOPKINS SAN FRANCISCO - ONE NOB HILL',
       'Pacific Inn of Redwood City - CA - EUA',
       'SFMTA - San Francisco  - CA', 'UBER TAXI  - San Francisco  CA',
       'UBER Taxi - San Francisco  CA', 'UBER Taxi - SanFrancisco  CA',
       'UNITED BRASIL', "Fishbeck's Shell", 'Houston St. Bistrô',
       'Oceanaire Seafood Room', 'Restaurant Las Canarias', 'STARBUCKS',
       'Wildcat Express', 'ZORI BISTRO', 'FRTOPERADORA DE TURISMO LTDA',
       'JW MARRIOT HOTEL', 'CAFE PATRIA',
       'CENTER ON THE DEVELOPING CHILD - NCPI', 'EXPOUY S.A',
       'El Palenque', 'Frog Maxi Shop', 'Pablo Andrés Paleo Inzaurralde',
       'AK design hotel', 'Mercat Taxi', 'cutcsa', 'KLM CIA REAL',
       'JET HOTEL', 'KLIMA HOTEL MILANO FIERE', 'NH EMBASCIATORI',
       'NH TORINO EMBASCIATORI', 'CONSORZIO TORINESE AUTOSERVIZI',
       'IT TAXI', 'PRONTO TAXIS5737.IT', 'RADIO TAXI', 'RADIO TAXI 6969',
       'SCARICA L´APP TAXI', 'TAXIBLU', 'TRENITALIA', 'GARCIA',
       'DAZZLER MONTEVIDEO', 'kempinsk hotel bristol',
       'A & S TURISMO E EXCURSÕES LTDA', 'A&S TURISMO E EXCURSÕES LTDA',
       'ASCANIO TOUR', 'Hotel Nacional de Cuba', 'HAPPENING STGO',
       'PARK PLAZA HOTEL - SANTIAGO/CHILE', 'Café Patria',
       'dazzler hotel montevideo', '7 ELEVEN',
       'CORREIOS - LEVE INTERNACIONAL',
       'KLEVITZ E KLAS VIAGENS E TURISMO LTDA EPP', 'ELIOR ROISSY SARL',
       "SARL L' EQUIPE VOUS REMERCIE", 'NICKS LIGHTHOUSE', 'NOLA',
       'TACOLICIOUS', 'THE CHEESECAKE FACTORY', 'THE VOYA RESTAURANT',
       'PACIFIC EURO HOTEL OF REDWOOD CITY', 'Gol Linhas Aereas',
       'TAM LINHAS AÉREAS.', 'NÚCLEO CIÊNCIA PELA INFÂNCIA',
       'CENTER ON THE  DEVELOPING CHILD', 'CENTER ON THE DEVELOPING CHILD',
       'SHERATON COMMANDER HOTEL-CAMBRIDGE',
       'COPA – COMPANIA PANAMERICA DE AVIAÇÃO S.A (COPA AIRLINES)',
       'BARCELONA', 'COMMITTEE', 'CROWNE PLAZA-MANHATTAN', 'GIULIA',
       'LEGAL SEA FOODS', 'MET BACK BAY', 'RUSSELL HOUSE',
       "SARDI'S RESTAURANT", 'TOP OF THE HUB', 'VINO VOLO',
       'DAMUS LIMOUSINE SERVICE', 'AIRFRANCE', 'AIRFRRANCE',
       'PARRILLADA EL FOGON S.A', 'Delta Air Lines', 'HOTEL NACIONAL',
       'CORREIOS - (EM DESUSO) MALA DIRETA POSTAL ESPECIAL', 'El Fogón',
       'EL FOGÓN', 'ESTANCIA DEL PUERTO', 'La Chacra Del Puerto',
       'PATRIA EXPRESS', 'THE FAMOUS GROUSE', 'Radio Taxi Comunitario',
       'Táxi', 'Taxi', 'TAXI', 'VOYENTAXI', 'Voyentaxi', 'HOLIDAY INN',
       'DAKOTA', 'DAKOTA ESTRELLA BRILHANTE SA', 'EL PALENQUE', 'HRU S.A',
       'Restaurant 1921', 'SOFITEL', 'SOFTEL LUXURY HOTELS', 'DAZZLER',
       'BORGATTA', 'EL VIEJO Y EL MAR', 'PUNTA CUORE',
       'UNO BUENOS AIRS SUITES', 'DAKOTA ESTRELLA BRILHANTE S.A.',
       'Center on the Developing Child Harvard University',
       'ARCOS DORADOS URUGUAY S.A.', 'arcos dorados uruguay sa',
       'BIARRITZ SRL', 'DAKOTA ESTRELA BRILHANTE S.A.',
       'dakota estrella brillante sa', 'Burguer Redwood City',
       "Marina's Cafe", 'The Cheesecake Factory SS', 'The Voya Restaurant',
       'THE EURO PACIFIC HOTEL', 'THE PACIFIC EURO HOTEL',
       'A Yellow TaxiCabs', 'SF TOWN TAXI', 'Taxicab Fare',
       'Yellow CAB Cooperative', 'YELLOW CARD SRVS'], dtype=object)

Let's see how one that we know for sure being from another country, try to find specificities. Aparently, nothing special about it.


In [49]:
montevideo_expense = wo_cnpj_cpf[wo_cnpj_cpf['supplier'] == 'Dazzler Hotel Montevideo'].iloc[0]
montevideo_expense


Out[49]:
document_id                                                             5773449
congressperson_name                                               DOMINGOS NETO
congressperson_id                                                        143632
congressperson_document                                                      97
term                                                                       2015
state                                                                        CE
party                                                                       PSD
term_id                                                                      55
subquota_number                                                              14
subquota_description          Lodging, except for congressperson from Distri...
subquota_group_id                                                             0
subquota_group_description                                                  NaN
supplier                                               Dazzler Hotel Montevideo
cnpj_cpf                                                                    NaN
document_number                                                           37264
document_type                                                                 3
issue_date                                                  2015-08-18 00:00:00
document_value                                                           706.54
remark_value                                                                  0
net_value                                                                706.54
month                                                                         8
year                                                                       2015
installment                                                                   0
passenger                                                                   NaN
leg_of_the_trip                                                             NaN
batch_number                                                            1220672
reimbursement_number                                                       5137
reimbursement_value                                                         NaN
applicant_id                                                               2329
Name: 94608, dtype: object

In [50]:
print(document_url(montevideo_expense))


http://www.camara.gov.br/cota-parlamentar/documentos/publ/2329/2015/5773449.pdf

In [51]:
wo_cnpj_cpf['supplier'] = wo_cnpj_cpf['supplier'].str.lower()
ranking_suppliers_wo_cnpj = wo_cnpj_cpf. \
    groupby('supplier', as_index=False). \
    count()[['supplier', 'applicant_id']]. \
    sort_values('applicant_id', ascending=False)
ranking_suppliers_wo_cnpj.head()


/Users/irio/anaconda3/envs/serenata_de_amor/lib/python3.5/site-packages/ipykernel/__main__.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
Out[51]:
supplier applicant_id
33 celular funcional 12244
66 correios - sedex convencional 10163
169 ramal 6238
45 correios - carta comercial 5870
55 correios - encomenda pac 3207

In [52]:
expenses_in_brazil = ranking_suppliers_wo_cnpj['supplier'].str.contains('correios') | \
    ranking_suppliers_wo_cnpj['supplier'].isin([
            'celular funcional',
            'imóvel funcional',
            'ramal'])
ranking_suppliers_wo_cnpj[~expenses_in_brazil]


Out[52]:
supplier applicant_id
87 dazzler hotel montevideo 27
216 verifone 11
174 russell house 7
124 it taxi 5
193 taxi 4
18 bellagio 4
98 el palenque 4
36 center on the developing child - ncpi 3
37 center on the developing child harvard university 3
99 el viejo y el mar 3
148 nola 3
27 cafe patria 3
149 núcleo ciência pela infância 3
109 gol linhas aéreas 3
154 padiglione giappone 2
130 klevitz e klas viagens e turismo ltda epp 2
133 la chacra del puerto 2
88 dazzler montevideo 2
155 park lane hotel 2
163 president and fellows of havard 2
77 cutcsa 2
167 radio taxi 6969 2
97 el fogón 2
173 restaurante patria 2
90 delta air lines 2
187 tacolicious 2
165 punta cuore 2
112 holiday inn 2
214 uno buenos aires suites 2
203 the voya restaurant 2
... ... ...
127 johnny rocket's 1
128 jw marriot hotel 1
129 kempinsk hotel bristol 1
131 klima hotel milano fiere 1
132 klm cia real 1
134 la compania del oriente 1
135 la criolla 1
113 holyday inn 1
111 happening stgo 1
84 damus limousine service 1
96 el al 1
85 dan panorama tel aviv hotel & convention center 1
86 dazzler 1
89 decolar.com 1
91 despesa alimentação israel 1
92 dgi 1
93 dgi - internacional meal company panama s.a. 1
94 dgi - internacional meal company panamá s.a. 1
95 don peperone 1
100 elior roissy sarl 1
110 gordon ramsay's 1
101 estancia del puerto 1
102 expouy s.a 1
103 fishbeck's shell 1
104 frog maxi shop 1
105 frtoperadora de turismo ltda 1
106 garcia 1
107 giulia 1
108 gol linhas aereas 1
224 zori bistro 1

191 rows × 2 columns


In [53]:
expense = data[data['supplier'].str.lower() == 'gordon ramsay\'s'].iloc[0]
expense


Out[53]:
document_id                               5660757
congressperson_name                  AFONSO MOTTA
congressperson_id                          178835
congressperson_document                       490
term                                         2015
state                                          RS
party                                         PDT
term_id                                        55
subquota_number                                13
subquota_description          Congressperson meal
subquota_group_id                               0
subquota_group_description                    NaN
supplier                          GORDON RAMSAY'S
cnpj_cpf                                      NaN
document_number                           8881096
document_type                                   2
issue_date                    2015-04-14 00:00:00
document_value                              83.45
remark_value                                    0
net_value                                   83.45
month                                           4
year                                         2015
installment                                     0
passenger                                     NaN
leg_of_the_trip                               NaN
batch_number                              1184874
reimbursement_number                         5009
reimbursement_value                           NaN
applicant_id                                 2880
Name: 7363, dtype: object

In [54]:
print(document_url(expense))


http://www.camara.gov.br/cota-parlamentar/documentos/publ/2880/2015/5660757.pdf

In [ ]: