Ongoing analysis of sentiment timelines.

This will expand as time goes on and as more matches go on.


In [1]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import numpy as np

In [2]:
# Parameters
plt.style.use('ggplot')
competitions_to_plot = ['League', 'CL', 'Cup'] #This also defines their order

sentiment index per match index

The sentiment match index ($I$) is:

$$ I = {{N_\text{pos} - N_\text{neg}} \over {N_\text{pos} + N_\text{neg}}} $$

Where $N$ is "the number of".

This means that when there is a greater number of positive comments, this is positive. And negative when there were more negative comments.

Pos Neg match index


In [3]:
def pos_neg_match_index(df):
    Npos = len(df[df['sentiment']>0])
    Nneg = len(df[df['sentiment']<0])
    I = ( Npos - Nneg ) / ( Npos + Nneg )
    return I

Get overlal sentiment per game


In [4]:
# Load each match
match_info = pd.read_csv('./data/matchinfo.csv',index_col=0)
match_sentiment_index = []
# loop through directory of data
for match in match_info.iterrows():
    # load data
    df = pd.read_csv('./data/' + match[1].filenames)
    # load
    I = pos_neg_match_index(df)
    match_sentiment_index.append(I)


# Create a multiindex dataframe with competition and match index as the indexes
multi_ind = pd.MultiIndex.from_arrays([match_info['compeition'],match_info['matchnr']], names=['competition', 'match_index'])
mi = pd.DataFrame(data={'opponent':match_info['opponent'].values, 'result':match_info['result'].values, 'sentiment_index': match_sentiment_index},index=multi_ind)

Plot figure


In [5]:
fig = plt.figure(figsize=(8,6))
grid_rows = mi.groupby('competition').count().max()['sentiment_index']
grid_cols = len(competitions_to_plot)

# Loop through competitions and plot sentiment index for each match
ax = []
cols = {'win':'red','draw':'gray','loss':'black'}
for i,c in enumerate(competitions_to_plot):
    n = len(mi['sentiment_index'][c])
    colors = [cols[v] for v in mi['result'][c].sort_index()]
    ax = plt.subplot2grid((grid_rows+1,grid_cols),(grid_rows-n,i),rowspan=n)
    ax.scatter(mi['sentiment_index'][c].sort_index(),np.arange(0,n),c=colors)
    ax.invert_yaxis()
    ax.set_yticks(np.arange(0,n))
    ax.set_yticklabels(mi['opponent'][c].sort_index())
    ax.set_title(c)
    # This may need to change if comments get very negative or very positive
    ax.set_xlim([-.3,.3])
    ax.set_xticks([0])
    ax.set_xticklabels(['<< Negative | | Positive >>  '])

ax = plt.subplot2grid((grid_rows+1,grid_cols),(grid_rows,0),colspan=1)
ax.scatter(0,1,c=cols['win'],s=30)
ax.scatter(1,1,c=cols['loss'],s=30)
ax.scatter(2,1,c=cols['draw'],s=30)
ax.text(0.15,0.8,'Win')
ax.text(1.15,0.8,'Loss')
ax.text(2.15,0.8,'Draw')
ax.set_xlim(-0.5,2.5)
ax.set_ylim(0.3,1.5)
ax.axis('off')
fig.tight_layout()

# Add title to entire figure
suptitle = fig.suptitle('Overall sentiment in match threads')
fig.subplots_adjust(top=0.85)
suptitle.set_y(0.95)

# save figure - add the number of data points in the figure
fig.savefig('./figures/analysis-over-matches_n-' + str(len(mi)) + '.png',dpi=600)
fig.savefig('./figures/analysis-over-matches_n-' + str(len(mi)) + '.pdf',r=600)