In [1]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import numpy as np
In [2]:
# Parameters
plt.style.use('ggplot')
competitions_to_plot = ['League', 'CL', 'Cup'] #This also defines their order
The sentiment match index ($I$) is:
$$ I = {{N_\text{pos} - N_\text{neg}} \over {N_\text{pos} + N_\text{neg}}} $$Where $N$ is "the number of".
This means that when there is a greater number of positive comments, this is positive. And negative when there were more negative comments.
In [3]:
def pos_neg_match_index(df):
Npos = len(df[df['sentiment']>0])
Nneg = len(df[df['sentiment']<0])
I = ( Npos - Nneg ) / ( Npos + Nneg )
return I
In [4]:
# Load each match
match_info = pd.read_csv('./data/matchinfo.csv',index_col=0)
match_sentiment_index = []
# loop through directory of data
for match in match_info.iterrows():
# load data
df = pd.read_csv('./data/' + match[1].filenames)
# load
I = pos_neg_match_index(df)
match_sentiment_index.append(I)
# Create a multiindex dataframe with competition and match index as the indexes
multi_ind = pd.MultiIndex.from_arrays([match_info['compeition'],match_info['matchnr']], names=['competition', 'match_index'])
mi = pd.DataFrame(data={'opponent':match_info['opponent'].values, 'result':match_info['result'].values, 'sentiment_index': match_sentiment_index},index=multi_ind)
Plot figure
In [5]:
fig = plt.figure(figsize=(8,6))
grid_rows = mi.groupby('competition').count().max()['sentiment_index']
grid_cols = len(competitions_to_plot)
# Loop through competitions and plot sentiment index for each match
ax = []
cols = {'win':'red','draw':'gray','loss':'black'}
for i,c in enumerate(competitions_to_plot):
n = len(mi['sentiment_index'][c])
colors = [cols[v] for v in mi['result'][c].sort_index()]
ax = plt.subplot2grid((grid_rows+1,grid_cols),(grid_rows-n,i),rowspan=n)
ax.scatter(mi['sentiment_index'][c].sort_index(),np.arange(0,n),c=colors)
ax.invert_yaxis()
ax.set_yticks(np.arange(0,n))
ax.set_yticklabels(mi['opponent'][c].sort_index())
ax.set_title(c)
# This may need to change if comments get very negative or very positive
ax.set_xlim([-.3,.3])
ax.set_xticks([0])
ax.set_xticklabels(['<< Negative | | Positive >> '])
ax = plt.subplot2grid((grid_rows+1,grid_cols),(grid_rows,0),colspan=1)
ax.scatter(0,1,c=cols['win'],s=30)
ax.scatter(1,1,c=cols['loss'],s=30)
ax.scatter(2,1,c=cols['draw'],s=30)
ax.text(0.15,0.8,'Win')
ax.text(1.15,0.8,'Loss')
ax.text(2.15,0.8,'Draw')
ax.set_xlim(-0.5,2.5)
ax.set_ylim(0.3,1.5)
ax.axis('off')
fig.tight_layout()
# Add title to entire figure
suptitle = fig.suptitle('Overall sentiment in match threads')
fig.subplots_adjust(top=0.85)
suptitle.set_y(0.95)
# save figure - add the number of data points in the figure
fig.savefig('./figures/analysis-over-matches_n-' + str(len(mi)) + '.png',dpi=600)
fig.savefig('./figures/analysis-over-matches_n-' + str(len(mi)) + '.pdf',r=600)