Dota 2

Here is an example of using datapot on Dota 2 game logs.


In [8]:
import datapot as dp

In [9]:
data = dp.DataPot()
data


Out[9]:
DataPot class instance
 - number of features without transformation: 0
 - number of new features: Unknown
features to transform: 

In [10]:
# f = open('data/matches_test.jsonlines', 'r')
import bz2
f = bz2.BZ2File('../data/matches_test.jsonlines.bz2')

Detect transformers


In [11]:
data.detect(f)


Out[11]:
DataPot class instance
 - number of features without transformation: 7
 - number of new features: 4
features to transform: 
	('match_id', [NumericTransformer])
	('start_time', [NumericTransformer])
	('times', [ComplexTransformer(average_len_of_array=None)])
	('lobby_type', [SVDOneHotTransformer, NumericTransformer])

In [12]:
data.fit(f)
data


Out[12]:
DataPot class instance
 - number of features without transformation: 7
 - number of new features: 7
features to transform: 
	('match_id', [NumericTransformer])
	('start_time', [NumericTransformer])
	('times', [ComplexTransformer(average_len_of_array=5.99400360947779)])
	('lobby_type', [SVDOneHotTransformer, NumericTransformer])

In [13]:
data.transform(f)


/usr/local/lib/python3.6/site-packages/datapot/__init__.py:137: FutureWarning: convert_objects is deprecated.  Use the data-type specific converters pd.to_datetime, pd.to_timedelta and pd.to_numeric.
  return pd.DataFrame(data=np.hstack(columns), columns=names).convert_objects(convert_numeric=True)
Out[13]:
match_id start_time times_sum lobby_type_0 lobby_type_7 lobby_type_1 lobby_type
0 18.0 1.430334e+09 150.15006 1.0 0.0 0.0 0.0
1 55.0 1.430393e+09 150.15006 0.0 1.0 0.0 7.0
2 89.0 1.430424e+09 150.15006 1.0 0.0 0.0 0.0
3 42.0 1.430380e+09 150.15006 0.0 1.0 0.0 7.0
4 6.0 1.430288e+09 150.15006 1.0 0.0 0.0 0.0
5 92.0 1.430427e+09 150.15006 0.0 1.0 0.0 7.0
6 111.0 1.430443e+09 150.15006 0.0 0.0 1.0 1.0
7 33.0 1.430366e+09 150.15006 0.0 0.0 1.0 1.0
8 141.0 1.430467e+09 150.15006 0.0 0.0 1.0 1.0
9 7.0 1.430293e+09 150.15006 0.0 0.0 1.0 1.0
10 153.0 1.430472e+09 150.15006 0.0 0.0 1.0 1.0
11 304.0 1.430537e+09 150.15006 0.0 0.0 1.0 1.0
12 41.0 1.430378e+09 150.15006 0.0 0.0 1.0 1.0
13 245.0 1.430514e+09 150.15006 0.0 0.0 1.0 1.0
14 16.0 1.430331e+09 150.15006 0.0 0.0 1.0 1.0
15 178.0 1.430487e+09 150.15006 0.0 0.0 1.0 1.0
16 37.0 1.430375e+09 150.15006 0.0 1.0 0.0 7.0
17 83.0 1.430421e+09 150.15006 0.0 0.0 1.0 1.0
18 140.0 1.430467e+09 150.15006 0.0 0.0 1.0 1.0
19 317.0 1.430541e+09 150.15006 0.0 1.0 0.0 7.0
20 19.0 1.430335e+09 150.15006 0.0 0.0 1.0 1.0
21 130.0 1.430459e+09 150.15006 0.0 0.0 1.0 1.0
22 160.0 1.430477e+09 150.15006 0.0 0.0 1.0 1.0
23 10.0 1.430302e+09 150.15006 0.0 0.0 1.0 1.0
24 352.0 1.430555e+09 150.15006 0.0 0.0 1.0 1.0
25 24.0 1.430349e+09 150.15006 0.0 0.0 1.0 1.0
26 316.0 1.430541e+09 150.15006 0.0 0.0 1.0 1.0
27 72.0 1.430409e+09 150.15006 0.0 0.0 1.0 1.0
28 13.0 1.430324e+09 150.15006 0.0 0.0 1.0 1.0
29 296.0 1.430537e+09 150.15006 0.0 0.0 1.0 1.0
... ... ... ... ... ... ... ...
17147 114217.0 1.450144e+09 150.15006 0.0 0.0 1.0 1.0
17148 114171.0 1.450133e+09 150.15006 0.0 0.0 1.0 1.0
17149 110541.0 1.449924e+09 150.15006 0.0 0.0 1.0 1.0
17150 113482.0 1.450062e+09 150.15006 0.0 0.0 1.0 1.0
17151 113799.0 1.450085e+09 150.15006 0.0 0.0 1.0 1.0
17152 112509.0 1.450005e+09 150.15006 0.0 0.0 1.0 1.0
17153 114236.0 1.450149e+09 150.15006 1.0 0.0 0.0 0.0
17154 113006.0 1.450030e+09 150.15006 0.0 1.0 0.0 7.0
17155 114072.0 1.450116e+09 150.15006 0.0 0.0 1.0 1.0
17156 113779.0 1.450084e+09 150.15006 0.0 0.0 1.0 1.0
17157 113749.0 1.450082e+09 150.15006 0.0 0.0 1.0 1.0
17158 113577.0 1.450069e+09 150.15006 0.0 0.0 1.0 1.0
17159 111934.0 1.449979e+09 150.15006 1.0 0.0 0.0 0.0
17160 111422.0 1.449958e+09 150.15006 0.0 0.0 1.0 1.0
17161 112220.0 1.449992e+09 150.15006 0.0 0.0 1.0 1.0
17162 113917.0 1.450097e+09 150.15006 0.0 0.0 1.0 1.0
17163 113637.0 1.450073e+09 150.15006 1.0 0.0 0.0 0.0
17164 113781.0 1.450084e+09 150.15006 0.0 0.0 1.0 1.0
17165 112232.0 1.449992e+09 150.15006 0.0 0.0 1.0 1.0
17166 112497.0 1.450004e+09 150.15006 0.0 1.0 0.0 7.0
17167 113945.0 1.450100e+09 150.15006 0.0 0.0 1.0 1.0
17168 114096.0 1.450120e+09 150.15006 0.0 0.0 1.0 1.0
17169 112811.0 1.450019e+09 150.15006 0.0 1.0 0.0 7.0
17170 113453.0 1.450059e+09 150.15006 0.0 0.0 1.0 1.0
17171 112301.0 1.449996e+09 150.15006 0.0 0.0 1.0 1.0
17172 112647.0 1.450011e+09 150.15006 0.0 0.0 1.0 1.0
17173 110929.0 1.449939e+09 150.15006 0.0 1.0 0.0 7.0
17174 114377.0 1.450223e+09 150.15006 0.0 0.0 1.0 1.0
17175 113994.0 1.450105e+09 150.15006 0.0 0.0 1.0 1.0
17176 112916.0 1.450024e+09 150.15006 0.0 0.0 1.0 1.0

17177 rows × 7 columns

We can manualy add new transformer to the specific field


In [14]:
from datapot.transformer.timestamp_transformer import TimestampTransformer

data.add_transformer('start_time', TimestampTransformer())


Out[14]:
DataPot class instance
 - number of features without transformation: 7
 - number of new features: 7
features to transform: 
	('match_id', [NumericTransformer])
	('start_time', [NumericTransformer, TimestampTransformer])
	('times', [ComplexTransformer(average_len_of_array=5.99400360947779)])
	('lobby_type', [SVDOneHotTransformer, NumericTransformer])

And also remove it


In [15]:
data.remove_transformer("lobby_type", 1)
data


Out[15]:
DataPot class instance
 - number of features without transformation: 7
 - number of new features: 7
features to transform: 
	('match_id', [NumericTransformer])
	('start_time', [NumericTransformer, TimestampTransformer])
	('times', [ComplexTransformer(average_len_of_array=5.99400360947779)])
	('lobby_type', [SVDOneHotTransformer])

In [17]:
df = data.transform(f)
df.head()


/usr/local/lib/python3.6/site-packages/datapot/__init__.py:137: FutureWarning: convert_objects is deprecated.  Use the data-type specific converters pd.to_datetime, pd.to_timedelta and pd.to_numeric.
  return pd.DataFrame(data=np.hstack(columns), columns=names).convert_objects(convert_numeric=True)
Out[17]:
match_id start_time start_time_timestamp_unixtime start_time_timestamp_week_day start_time_timestamp_month_day start_time_timestamp_hour start_time_timestamp_minute times_sum lobby_type_0 lobby_type_7 lobby_type_1
0 18.0 1.430334e+09 1.430334e+09 3.0 29.0 22.0 4.0 150.15006 1.0 0.0 0.0
1 55.0 1.430393e+09 1.430393e+09 3.0 30.0 14.0 31.0 150.15006 0.0 1.0 0.0
2 89.0 1.430424e+09 1.430424e+09 3.0 30.0 23.0 4.0 150.15006 1.0 0.0 0.0
3 42.0 1.430380e+09 1.430380e+09 3.0 30.0 10.0 39.0 150.15006 0.0 1.0 0.0
4 6.0 1.430288e+09 1.430288e+09 3.0 29.0 9.0 12.0 150.15006 1.0 0.0 0.0

df['match_id_timestamp_unixtime'].head() # and one more


In [ ]: