In [ ]:
import pandas as pd
import glob
import os
import numpy as np
from time import time
import logging
import gensim
import bz2
import re
from stop_words import get_stop_words
from TwitterSearch import *
import time

In [ ]:
#df = pd.read_csv('../datas/nlp_results/voting_with_topics.csv')
df = pd.read_csv('../datas/nlp_results/voting_with_topics_unique.csv')

In [ ]:
df.head()

In [ ]:
texts = [[word for word in re.split(' |\'|\n|\(|\)|,|;|:|-',
                                    document.lower()) if len(word)>1 and (word in words)] 
             for document in df.text_eng]

In [ ]:
def demo_vader_instance(text):
    """
    Output polarity scores for a text using Vader approach.

    :param text: a text whose polarity has to be evaluated.
    """
    from nltk.sentiment import SentimentIntensityAnalyzer
    vader_analyzer = SentimentIntensityAnalyzer()
    #print(vader_analyzer.polarity_scores(text))
    return vader_analyzer

In [ ]:
from collections import defaultdict
from nltk.corpus import opinion_lexicon
from nltk.corpus import opinion_lexicon
from nltk.tokenize import treebank
pos =[]
neg =[]
neu =[]
compound = []
i = 0
for text in df.text_eng:
    
    if i%500 ==0 :
        print('{0}%'.format((i*100)/len(df.text_eng)))
    i +=1
    #email_str = ' '.join(email)
    s = demo_vader_instance(text)
    polarity = s.polarity_scores(text)
    pos += [polarity['pos']]
    neg += [polarity['neg']]
    neu += [polarity['neu']]
    compound += [polarity['compound']]
    #pol = 1*(pos>neg) - 1*(neg>pos)
    #pol = demo_liu_hu_lexicon(email_str,positive,negative,tokenizer)             
    #pola += [pol]

In [ ]:
df['positive']= pos
df['negative']= neg
df['neutral']= neu
df['compound']= compound

In [ ]:
#df = df.drop('sentiment',1)
df.head()

In [ ]:
df.to_csv('../datas/nlp_results/voting_with_topics_unique_sentiment.csv',index=False)
#df.to_csv('../datas/nlp_results/voting_with_topics.csv',index=False)

In [ ]:
dataset_tmp = []
path = '../datas/scrap/Voting'
allFiles = glob.glob(os.path.join(path, 'Session*.csv'))

for file_ in allFiles:
    print(file_)
    data_tmp = pd.read_csv(file_)
    dataset_tmp += [data_tmp] 
data_frame = pd.concat(dataset_tmp)

In [ ]:
parl = data_frame.ParlGroupCode.unique().tolist()
for group in parl :
    data_frame.loc[data_frame.ParlGroupCode==group,'ParlGroupCode']= parl.index(group)

In [ ]:
votation_frame = data_frame[['BillTitle','BusinessTitle','FirstName','LastName','Decision','ParlGroupCode']]
votation_frame = votation_frame.fillna(value='')
votation_frame['text'] = votation_frame['BillTitle']+' '+votation_frame['BusinessTitle']

In [ ]:
votation_frame.head()

In [ ]:
(pd.merge(votation_frame,df)).to_csv('../datas/nlp_results/voting_with_topics_sentiment.csv',index=False)