notebook.community

Edit and run



In [ ]:



In [1]:

    
import pandas as pd
import glob
import os
import numpy as np
import matplotlib.pyplot as plt
import sklearn
import sklearn.ensemble
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score, train_test_split, cross_val_predict, learning_curve
import sklearn.metrics

%matplotlib inline
%load_ext autoreload
%autoreload 2

# There's a lot of columns in the DF. 
# Therefore, we add this option so that we can see more columns
pd.options.display.max_columns = 100

Loads the NLP results



In [2]:

    
path = '../../datas/nlp_results/'
voting_df = pd.read_csv(path+'voting_with_topics_unique_sentiment.csv')
print('Entries in the DataFrame',voting_df.shape)

#Dropping the useless column
#voting_df = voting_df.drop('Unnamed: 0',1)

#Putting numerical values into the columns that should have numerical values

#num_cols = ['BillTitle', 'BusinessTitle','text','text_eng','FirstName','LastName']
#voting = voting_df.drop(num_cols,axis=1).apply(pd.to_numeric)
#voting['text'] = voting_df.text
#Inserting the full name at the second position
#voting.insert(1,'Name', voting_df['FirstName'] + ' ' + voting_df['LastName'])

voting_df.head(3)









    



Entries in the DataFrame (3470, 17)






    Out[2]:






  
    
      
      armée
      asile / immigration
      assurances
      budget
      dunno
      entreprise/ finance
      environnement
      famille / enfants
      imposition
      politique internationale
      retraite
      text
      text_eng
      positive
      negative
      neutral
      compound
    
  
  
    
      0
      0.006995
      0.930066
      0.006993
      0.006993
      0.006993
      0.006994
      0.006993
      0.006993
      0.006993
      0.006993
      0.006993
      Arrêté fédéral concernant la contribution de l...
      Federal decree on Switzerland's contribution i...
      0.075
      0.000
      0.925
      0.4404
    
    
      1
      0.018182
      0.018185
      0.018183
      0.271557
      0.564795
      0.018183
      0.018182
      0.018183
      0.018183
      0.018183
      0.018184
      Renforcement du Traité sur la non-proliférati...
      Strengthening of the Treaty on the non-prolif...
      0.227
      0.206
      0.567
      0.0772
    
    
      2
      0.015152
      0.389858
      0.015152
      0.232520
      0.256405
      0.015152
      0.015152
      0.015152
      0.015152
      0.015154
      0.015152
      Une zone exempte d'armes nucléaires au coeur ...
      A nuclear weapon free zone in the heart of Eu...
      0.264
      0.176
      0.560
      0.2732

Separates each deputee



In [ ]:

    
def split_df(df, field):
    """
        Splits the input df along a certain field into multiple dictionaries which links each unique
        entry of the field to the entries in the dataframe
    """
    # Retrieve first all the unique Name entries
    unique_field = df[field].unique()
    print('Number of unique entries in',field,':',len(unique_field))
    #Create a dictionary of DataFrames which stores all the info relative to a single deputee
    df_dict = {elem : pd.DataFrame for elem in unique_field}

    for key in df_dict.keys():
        df_dict[key] = df.loc[df[field] == key]
    
    return df_dict

voting_dict = split_df(voting, 'Name')



In [ ]:

    
depute_dict = {}
for deputee in voting_dict :
    df_deputee = voting_dict[deputee]
    df = df_deputee.groupby('Decision')['Decision'].count()
    #df.plot(kind='bar',title=deputee)
    plt.show()
    depute_dict[deputee] = df

Stats on each deputee (yes/no/abstention) regarding to the topics



In [ ]:

    
max_ = [0]*7
frequency = [0]*7
winner_max = ['']*7
winner_freq = ['']*7
Decisions = ['Yes','No','Abstention','','Away','Excused','President']
for deputee_frame in depute_dict :
    for i in range(1,8) :
        if i in depute_dict[deputee_frame].index :
            frequency_old = frequency[i-1]
            max_old = max_[i-1]
            max_[i-1] = max(max_[i-1],depute_dict[deputee_frame][i])
            frequency[i-1] = max(frequency[i-1],depute_dict[deputee_frame][i]/depute_dict[deputee_frame].sum())
            if frequency_old != frequency[i-1]:
                winner_freq[i-1] = deputee_frame
            if max_old != max_[i-1]:
                winner_max[i-1] = deputee_frame
for i in range(7) : 
    if max_[i]!=0 :
        print("for {0} :".format(Decisions[i]) )
        print("{0} as the highest frequency:{1}".format(winner_freq[i],frequency[i],i+1))
        print("{0} as the highest value:{1}".format(winner_max[i],max_[i],i+1))
        print()



In [ ]:

    
dataset_tmp = []
path = '../../datas/scrap/Voting'
allFiles = glob.glob(os.path.join(path, 'Session*.csv'))

for file_ in allFiles:
    print(file_)
    data_tmp = pd.read_csv(file_)
    print(data_tmp.shape)
    dataset_tmp += [data_tmp] 
data_frame = pd.concat(dataset_tmp)
print(data_frame.shape)
data_tmp.drop('Unnamed: 0',1,inplace=True)



In [ ]:

    
columns = ['BillTitle','BusinessTitle','Canton','Decision','FirstName','LastName','ParlGroupName','VoteEnd']
treated_data =data_frame[columns]
treated_data['text'] =  treated_data['BillTitle'] + ' ' + treated_data['BusinessTitle']
treated_data.head()



In [ ]:

    
#columns = ['Decision','Name','ParlGroupCode','positive','negative','neutral','compound','text']
columns= ['text','text_eng','positive','negative','neutral','compound']
vote  = voting_df.drop(columns,1)
to_merge = voting_df[['text']]
to_merge['subject']= vote.idxmax(axis=1)



In [ ]:

    
to_merge.head()



In [ ]:

    
data_for_viz = pd.merge(treated_data,to_merge)



In [ ]:

    
data_for_viz['VoteEnd'] = [x[0:7] for x in data_for_viz.VoteEnd]



In [ ]:

    
data_for_viz.head()



In [ ]:

    
data_for_viz[['VoteEnd']].to_json('viz_data_vote_month.json')



In [ ]:

    
data_for_viz.sort_values('VoteEnd', ascending = ['True'])



In [ ]:

	armée	asile / immigration	assurances	budget	dunno	entreprise/ finance	environnement	famille / enfants	imposition	politique internationale	retraite	text	text_eng	positive	negative	neutral	compound
0	0.006995	0.930066	0.006993	0.006993	0.006993	0.006994	0.006993	0.006993	0.006993	0.006993	0.006993	Arrêté fédéral concernant la contribution de l...	Federal decree on Switzerland's contribution i...	0.075	0.000	0.925	0.4404
1	0.018182	0.018185	0.018183	0.271557	0.564795	0.018183	0.018182	0.018183	0.018183	0.018183	0.018184	Renforcement du Traité sur la non-proliférati...	Strengthening of the Treaty on the non-prolif...	0.227	0.206	0.567	0.0772
2	0.015152	0.389858	0.015152	0.232520	0.256405	0.015152	0.015152	0.015152	0.015152	0.015154	0.015152	Une zone exempte d'armes nucléaires au coeur ...	A nuclear weapon free zone in the heart of Eu...	0.264	0.176	0.560	0.2732