In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import datetime
plt.style.use('seaborn-whitegrid')
In [22]:
sejm = pd.read_csv('./data/sejm/wystapienia.csv')
slowa = pd.read_csv('./data/sejm/slowka.csv')
slowa_l = slowa.to_dict('records')
slowa_d = {}
# for performance reasons, I've put words in dictionary - selecting them from DataFrame is much slower than using dict
for sl in slowa_l:
slowa_d[sl['word']] = sl
# @TODO: refactor with index
In [3]:
slowa.head().T
Out[3]:
In [4]:
labs = {
"1":"Platforma Obywatelska",
"10":"Koło poselskie Bezpieczeństwo i Gospodarka",
"11":"Zjednoczona Prawica",
"12":"Bialo-Czerwoni",
"13":"Kukiz'15",
"14":"Nowoczesna",
"2":"Prawo i Sprawiedliwosc",
"3":"Polskie Stronnictwo Ludowe",
"4":"Sojusz Lewicy Demokratycznej",
"5":"Ruch Palikota",
"6":"Solidarna Polska",
"7":"Niezrzeszeni",
"8":"Koło Poselskie Inicjatywa Dialogu",
"9":"Klub Parlamentarny Sprawiedliwa Polska",
"0":"None"
}
In [5]:
sc = sejm.copy()
sc.set_index('id')
sc['slowa'] = sc.stem.str.split(' ')
sc['klub_nazwa'] = sc.klub_id.apply(lambda x: labs[str(x)])
In [6]:
def get_param_calc_func(param):
def func(x):
sre = []
try:
for slowo in x.loc['slowa']:
ary = slowa_d[slowo] if slowo in slowa_d else dict()
if param in ary:
sre.append(ary[param])
elif param.startswith('impact_') and slowo in slowa_d and param.endswith(ary['category']):
sre.append(calc_kolaps_factor(ary, param))
except Exception as e:
pass
return (np.mean(sre)*len(sre))/len(x.loc['slowa']) if len(sre) > 0 else 1
return func
def calc_kolaps_factor(ar, typ):
pre = ar['AR'] + ar['IM']
if typ.endswith('H'):
return pre * 2 * ar['mean_H']+ 1.5 * np.mean([ar['dist_A'], ar['dist_S'], ar['dist_F'], ar['dist_D'], ar['dist_N']])
elif typ.endswith('A'):
return pre * 2 * ar['mean_A'] + 1.5 * np.mean([ar['dist_H'], ar['dist_S'], ar['dist_F'], ar['dist_D'], ar['dist_N']])
elif typ.endswith('S'):
return pre * 2 * ar['mean_S'] + 1.5 * np.mean([ar['dist_H'], ar['dist_A'], ar['dist_F'], ar['dist_D'], ar['dist_N']])
elif typ.endswith('F'):
return pre * 2 * ar['mean_F'] + 1.5 * np.mean([ar['dist_H'], ar['dist_A'], ar['dist_S'], ar['dist_D'], ar['dist_N']])
elif typ.endswith('D'):
return pre * 2 * ar['mean_D'] + 1.5 * np.mean([ar['dist_H'], ar['dist_A'], ar['dist_S'], ar['dist_F'], ar['dist_N']])
return 1
In [7]:
%%time
par = [
'impact_H',
'impact_A',
'impact_S',
'impact_F',
'impact_D',
'f',
'VA',
'AR',
'IM',
'mean_H',
'mean_A',
'mean_S',
'mean_F',
'mean_D',
'dist_H',
'dist_A',
'dist_S',
'dist_F',
'dist_D',
'dist_N',
'badness'
]
for a in par:
print("Calculating {}...".format(a))
sc[a] = sc.apply(get_param_calc_func(a), axis=1)
In [8]:
sc['Happiness'] = sc['impact_H']
sc['Anger'] = sc['impact_A']
sc['Sadness'] = sc['impact_S']
sc['Fear'] = sc['impact_F']
sc['Disgust'] = sc['impact_D']
sc['Neutrality Distance'] = sc['dist_N']
In [9]:
sc.set_index(sc.id, inplace=True)
In [10]:
sc.head().T
Out[10]:
In [11]:
def plot_emotion(emotion):
sc \
.groupby('klub_nazwa') \
.agg(np.mean) \
.sort_values(by=emotion)[emotion] \
.T \
.plot(kind='bar', title=emotion)
In [12]:
plot_emotion('Happiness')
In [13]:
plot_emotion('Anger')
In [14]:
plot_emotion('Sadness')
In [15]:
plot_emotion('Fear')
In [16]:
plot_emotion('Disgust')
In [17]:
plot_emotion('Neutrality Distance')
In [18]:
sc.groupby('klub_nazwa') \
.agg(np.std) \
.sort_values(by='dist_N')['dist_N'] \
.plot(kind='bar', title='Neutrality distance (stddev)')
Out[18]:
In [19]:
sc['lk'] = sc['ludzie_nazwa'] + ' (' + sc['klub_nazwa'] + ')'
In [20]:
sc.groupby('lk') \
.agg(np.mean) \
.sort_values(by='Happiness', ascending=False) \
.head(10)['Happiness'] \
.plot(kind='bar', title='Neutrality distance (stddev)')
Out[20]:
In [21]:
sc.groupby(['data']) \
.agg(np.mean)[['Anger', 'Fear', 'Sadness', 'Disgust']] \
.plot(figsize=(8,8))
Out[21]: