In [2]:
%run "0. config.ipynb"
In [27]:
players_stats = pd.DataFrame.from_csv("data/players_stats.csv", encoding="utf8")
In [28]:
players_stats[ (players_stats["complete"] == 1) & (players_stats["adventure"] == True) ].mean()
Out[28]:
In [3]:
from sklearn import tree
In [5]:
players_stats.head(5)
Out[5]:
In [6]:
print len(players_stats)
print len(players_stats.drop_duplicates())
players_stats = players_stats.drop_duplicates()
In [17]:
from sklearn import tree
X = players_stats[ [ c for c in players_stats.columns if c not in [ "complete" ] ] ].as_matrix()
X = X.astype(int)
Y = players_stats["complete"].as_matrix()
clf = tree.DecisionTreeClassifier()
clf = clf.fit(X, Y)
In [18]:
from IPython.display import Image
from StringIO import StringIO
import pydot
dot_data = StringIO()
tree.export_graphviz(clf, out_file=dot_data,
feature_names=[ c for c in players_stats.columns if c not in [ "complete" ] ],
class_names= [ str(c) for c in range(0,2) ],
filled=True, rounded=True,
special_characters=True)
graph = pydot.graph_from_dot_data(dot_data.getvalue())
Image(graph.create_png())
Out[18]:
In [26]:
X_uncompleted = players_stats[ players_stats["complete"] == 0 ]
X_uncompleted = X_uncompleted.drop_duplicates()
print len(X_uncompleted)
X_uncompleted = X_uncompleted[ [ c for c in players_stats.columns if c not in [ "complete" ] ] ].as_matrix()
p = clf.predict(X)
print sum(p)
In [20]:
players_coordinates = pd.DataFrame.from_csv("data/players_coordinates.csv")
In [21]:
l = [ i for i, y in enumerate(p) if y == 1 ]
print len(l)
In [23]:
df = players_coordinates.iloc[l,:]
df = df[ df["y"] < 1500 ]
print len(df.index.unique())
players_list = list(df.index.unique())
plt.figure()
f, axarr = plt.subplots(len(players_list)/5+1, 5, figsize=(20,10))
for i, p in enumerate(players_list):
select = df.loc[p,:]
# print i
ax = axarr[i/5, i%5]
x = select["x"]
y = select["y"]
ax.plot(x,y, "-o")
ax.axis('equal')
ax.set_xlim([-300, 700])
ax.set_ylim([0, 1200])
plt.show();
Create data with progression within games instead of just finished games