In [5]:
import charts_function_list
import os
In [6]:
base_path,data,outputs = charts_function_list.folder_setup()
In [7]:
os.chdir(data)
In [8]:
board_games = pd.read_csv('bgg_db_2017_04.csv',encoding='latin-1')
In [9]:
board_games = board_games[~board_games['designer'].isin(['(Uncredited)','none'])].reset_index(drop=True)
#board_games = board_games[board_games['num_votes']>=board_games['num_votes'].median()] #filter to only include games rated > than median
rating_group_ = board_games.groupby('designer')
agg_funcs = {'geek_rating':np.mean,'names':np.count_nonzero}
rating_frame = rating_group_.agg(agg_funcs)
rating_frame.sort_values(by='names',ascending=False,inplace=True)
def top_ratings(df_ranks,df_full,values=5):
top_list = df_ranks.head(values)
top_df = df_full[df_full['designer'].isin(top_list.index)].reset_index(drop=True)
design_map = {v:i for i,v in enumerate(top_df['designer'].unique())}
top_df.loc[:,'cmap'] = top_df['designer'].map(design_map)
return top_df
In [10]:
top_5_ratings = top_ratings(rating_frame,board_games,values=5)
top_10_ratings = top_ratings(rating_frame,board_games,values=10)
In [11]:
max_scores = top_10_ratings.groupby('designer').max()['geek_rating']
top_max_scores = top_10_ratings[(top_10_ratings['designer'].isin(max_scores.index))&top_10_ratings['geek_rating'].isin(max_scores)][['designer','geek_rating','names']]
top_max_scores = top_max_scores.merge(pd.DataFrame(top_10_ratings.groupby('designer').agg({'median','count'})['geek_rating']),
left_on='designer',right_index=True).sort_values(by='median').reset_index(drop=True)
top_max_scores['designer_']=top_max_scores['designer'].replace({'Michael Kiesling, Wolfgang Kramer':'Michael Kiesling,\nWolfgang Kramer'})
top_max_scores['names_']= top_max_scores['names'].replace({'Thunderbolt Apache Leader':'Thunderbolt\nApache Leader',
'Tigris & Euphrates':'Tigris &\nEuphrates',
'Brass: Lancashire':'Brass:\nLancashire',
'Caverna: The Cave Farmers':'Caverna:\nThe Cave Farmers',
'The Castles of Burgundy':'The Castles\nof Burgundy'})
In [12]:
#empty list
os.chdir(outputs)
data = []
#set up objects
box_plots_ = charts_function_list.chart_maker('Box Plots of the Top 10 Most Prolific Board Game Designers',title_size=20)
fig = box_plots_.initial_fig_axis(figsize=(11,8))
fig.suptitle(box_plots_.title,fontname='Oswald',size=box_plots_.title_size)
ax = box_plots_.axes_set_up(fig)
#set up data for box plot
for i in top_max_scores['designer'].values:
data.append(top_10_ratings[top_10_ratings['designer']==i]['geek_rating'])
#boxplot
plt.boxplot(data,patch_artist=True,boxprops=dict(facecolor='#d9d9d9', color='#d9d9d9',linewidth=.7),notch=True,
medianprops=dict(color='red'),whiskerprops=dict(color='#737373'),capprops=dict(color='black',alpha=.8),
flierprops = dict(markeredgecolor='#fb6a4a'))
#setting up ticks
box_plots_.tick_params_(ax,labelcolor='#737373')
#labelling x with designer and games
ax.set_xticklabels([top_max_scores['designer_'].values[x]+'\n('+str(top_max_scores['count'].values[x])+' games)' for x in range(len(top_max_scores))],rotation=90,fontname='Oswald',weight='normal',fontsize=14)
#setting up y label
ax.set_ylabel('BoardGameGeek \'Geek Rating\'',size=16,color='#525252',labelpad=20,fontname='Oswald')
#labelling max points and scatters
for item,v in enumerate(top_max_scores.index):
a = top_10_ratings[top_10_ratings['designer']==top_max_scores['designer'][item]]
plt.scatter(np.linspace(item+1,item+1,len(a)),a['geek_rating'],s=1,facecolor='black',alpha=.6,zorder=3)
if item != 9:
y_adjust = .05
else:
y_adjust = .12
ax.text(top_max_scores.index[item]+1,top_max_scores['geek_rating'][item]+y_adjust,top_max_scores['names_'][item],
ha='center',fontname='Futura Bk BT',alpha=.9)
ax.annotate('Each designer\'s\nhighest rated game',xy=(3.6,7.7),xytext=(3,7.58),arrowprops=dict(arrowstyle="-",
connectionstyle="arc3",color='#fb6a4a',alpha=.7),fontname='Futura Bk BT',fontsize=10,alpha=.8,color='#fb6a4a',ha='right',va='center')
#add citations
box_plots_.citations(ax,source='Source: kaggle.com/mrpantherson/board-game-data',chart_tag_y=-.44)
charts_function_list.chart_save('Boardgames',dpi=200)
plt.show()
In [229]:
board_games.corr()[board_games.corr()>.5].fillna('')
Out[229]:
In [14]:
#playing with scatters
scatter_top = charts_function_list.chart_maker('The Top 10 Most Prolific Games Designers')
fig = scatter_top.initial_fig_axis(figsize=(8,5))
ax = scatter_top.axes_set_up(fig)
def top_scatters(df,cmap='Dark2'):
value_counts_ = df['designer'].value_counts().sort_values(ascending=True)
for i,v in enumerate(value_counts_):
a = df[df['designer']==value_counts_.index[i]]
plt.scatter(a['year'],
np.linspace(i,i,len(a)),
s=(a['geek_rating']**2)*2,facecolor='none',edgecolor='black',linewidth=.8)
return value_counts_
abc = top_scatters(top_10_ratings)
ax.set_yticks([i for i,v in enumerate(abc.index)])
ax.set_yticklabels([v for i,v in enumerate(abc.index)])
plt.show()
#box plots ..
In [11]:
#assess via kmeans
top_designers = board_games['designer'].value_counts()[board_games['designer'].value_counts()>=5]
board_games_top_design=board_games[board_games['designer'].isin(top_designers.index)]
rating_group_top = board_games_top_design.groupby('designer')
rating_frame_top = rating_group_top.agg(agg_funcs)
from sklearn.cluster import KMeans
f1 = rating_frame_top['geek_rating'].values
f2 = rating_frame_top['names'].values
X=np.matrix(list(zip(f1,f2)))
kmeans = KMeans(n_clusters=8,algorithm='auto',n_init=50).fit(X)
chart_object = charts_function_list.chart_maker('Games vs Rating Clusters')
fig = chart_object.initial_fig_axis(figsize=(8,5))
ax = chart_object.axes_set_up(fig)
ax.scatter(f2,f1,alpha=.8,c=kmeans.labels_,zorder=1,cmap='Dark2')
for x,y in kmeans.cluster_centers_ :
ax.scatter(y,x,marker='*',facecolors='none',s=300,zorder=2,alpha=.5,edgecolor='black')
plt.show()