In [3]:
%run "0. config.ipynb"
In [101]:
players_stats = pd.DataFrame.from_csv("data/players_stats.csv", encoding="utf8")
In [102]:
players_stats.head()
Out[102]:
In [103]:
from sklearn.decomposition import PCA
In [104]:
players_stats.mean()
Out[104]:
In [94]:
X = players_stats.as_matrix()
n = X.shape[0]
pca = PCA(n_components=2)
X_r = pca.fit(X).transform(X)
X_vr = pca.explained_variance_ratio_
cmap = plt.get_cmap("YlOrRd")
print sum(X_vr)
X_std = 10 + 40 * players_stats["duration (seconds)"] / players_stats["duration (seconds)"].std()
X_color = cmap(players_stats["section"]/8)
plt.figure()
plt.scatter(X_r[:, 0], X_r[:, 1], s=X_std, c=X_color, alpha=.6, lw=0) #, c=c, label=target_name)
plt.title("all players (n = %s)" % (n))
plt.legend()
In [96]:
for title in list_types:
df = players_stats[ players_stats[title] > 0 ]
X = df.as_matrix()
n = X.shape[0]
# print X
# print n
if n > 1:
pca = PCA(n_components=2)
X_r = pca.fit(X).transform(X)
# print sum(pca.explained_variance_ratio_)
d = X_std[df.index]
c = cmap(df["section"]/8)
plt.figure()
plt.scatter(X_r[:, 0], X_r[:, 1], s=d, c=c, alpha=.6 , lw=0) #, c=c, label=target_name)
plt.title("%s > 0 (n = %s)" % (title, n))
plt.legend()
else:
print "we skipped %s because there is no enough data" % title
plt.show();
In [17]:
plt.hist(players_stats["reach"], bins=20)
Out[17]:
In [92]:
for checkpoint in range(0,9):
df = players_stats[ players_stats["section"] == checkpoint ]
X = df.as_matrix()
n = X.shape[0]
# print X
# print n
if n > 1:
pca = PCA(n_components=2)
X_r = pca.fit(X).transform(X)
d = X_std[df.index]
c = cmap(df["section"]/8)
plt.figure()
plt.scatter(X_r[:, 0], X_r[:, 1], s=d, alpha=.6 , lw=0) #, c=c, label=target_name)
plt.title("checkpoint = %s (n = %s)" % (checkpoint, n))
plt.legend()
else:
print "we skipped %s because there is no enough data" % checkpoint
plt.show();