In [2]:
import pandas as pd
In [3]:
train_raw = pd.read_csv('../train.raw.csv')
/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py:2717: DtypeWarning: Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.
interactivity=interactivity, compiler=compiler, result=result)
In [ ]:
train_raw.head
In [7]:
s = train_raw['device_model']
In [12]:
len(s.unique())
Out[12]:
8251
In [13]:
len(train_raw['device_type'].unique())
Out[13]:
5
In [15]:
train_raw['device_type'].unique()
Out[15]:
array([1, 0, 4, 5, 2])
In [19]:
len(train_raw.columns)
Out[19]:
24
In [20]:
train_raw['banner_pos'].unique()
Out[20]:
array([0, 1, 4, 5, 2, 7, 3])
In [23]:
train_raw['app_id' == 'ecad2386']
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-23-3477250eed87> in <module>()
----> 1 train_raw['app_id' == 'ecad2386']
/usr/local/lib/python2.7/dist-packages/pandas/core/frame.pyc in __getitem__(self, key)
2057 return self._getitem_multilevel(key)
2058 else:
-> 2059 return self._getitem_column(key)
2060
2061 def _getitem_column(self, key):
/usr/local/lib/python2.7/dist-packages/pandas/core/frame.pyc in _getitem_column(self, key)
2064 # get column
2065 if self.columns.is_unique:
-> 2066 return self._get_item_cache(key)
2067
2068 # duplicate columns & possible reduce dimensionality
/usr/local/lib/python2.7/dist-packages/pandas/core/generic.pyc in _get_item_cache(self, item)
1384 res = cache.get(item)
1385 if res is None:
-> 1386 values = self._data.get(item)
1387 res = self._box_item_values(item, values)
1388 cache[item] = res
/usr/local/lib/python2.7/dist-packages/pandas/core/internals.pyc in get(self, item, fastpath)
3541
3542 if not isnull(item):
-> 3543 loc = self.items.get_loc(item)
3544 else:
3545 indexer = np.arange(len(self.items))[isnull(self.items)]
/usr/local/lib/python2.7/dist-packages/pandas/indexes/base.pyc in get_loc(self, key, method, tolerance)
2134 return self._engine.get_loc(key)
2135 except KeyError:
-> 2136 return self._engine.get_loc(self._maybe_cast_indexer(key))
2137
2138 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4433)()
pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4279)()
pandas/src/hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13742)()
pandas/src/hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13696)()
KeyError: False
In [25]:
train_raw
Out[25]:
id
click
hour
C1
banner_pos
site_id
site_domain
site_category
app_id
app_domain
...
device_type
device_conn_type
C14
C15
C16
C17
C18
C19
C20
C21
0
1000009418151094273
0
14102100
1005
0
1fbe01fe
f3845767
28905ebd
ecad2386
7801e8d9
...
1
2
15706
320
50
1722
0
35
-1
79
1
10000169349117863715
0
14102100
1005
0
1fbe01fe
f3845767
28905ebd
ecad2386
7801e8d9
...
1
0
15704
320
50
1722
0
35
100084
79
2
10000371904215119486
0
14102100
1005
0
1fbe01fe
f3845767
28905ebd
ecad2386
7801e8d9
...
1
0
15704
320
50
1722
0
35
100084
79
3
10000640724480838376
0
14102100
1005
0
1fbe01fe
f3845767
28905ebd
ecad2386
7801e8d9
...
1
0
15706
320
50
1722
0
35
100084
79
4
10000679056417042096
0
14102100
1005
1
fe8cc448
9166c161
0569f928
ecad2386
7801e8d9
...
1
0
18993
320
50
2161
0
35
-1
157
5
10000720757801103869
0
14102100
1005
0
d6137915
bb1ef334
f028772b
ecad2386
7801e8d9
...
1
0
16920
320
50
1899
0
431
100077
117
6
10000724729988544911
0
14102100
1005
0
8fda644b
25d4cfcd
f028772b
ecad2386
7801e8d9
...
1
0
20362
320
50
2333
0
39
-1
157
7
10000918755742328737
0
14102100
1005
1
e151e245
7e091613
f028772b
ecad2386
7801e8d9
...
1
0
20632
320
50
2374
3
39
-1
23
8
10000949271186029916
1
14102100
1005
0
1fbe01fe
f3845767
28905ebd
ecad2386
7801e8d9
...
1
2
15707
320
50
1722
0
35
-1
79
9
10001264480619467364
0
14102100
1002
0
84c7ba46
c4e18dd6
50e219e0
ecad2386
7801e8d9
...
0
0
21689
320
50
2496
3
167
100191
23
10
10001868339616595934
0
14102100
1005
1
e151e245
7e091613
f028772b
ecad2386
7801e8d9
...
1
0
17747
320
50
1974
2
39
100019
33
11
10001966791793526909
0
14102100
1005
0
1fbe01fe
f3845767
28905ebd
ecad2386
7801e8d9
...
1
0
15701
320
50
1722
0
35
-1
79
12
10002028568167339219
0
14102100
1005
0
9e8cf15d
0d3cb7be
f028772b
ecad2386
7801e8d9
...
1
2
20596
320
50
2161
0
35
100148
157
13
10002044883120869786
0
14102100
1005
0
d6137915
bb1ef334
f028772b
ecad2386
7801e8d9
...
1
0
19771
320
50
2227
0
687
100077
48
14
10002518649031436658
0
14102100
1005
0
85f751fd
c4e18dd6
50e219e0
98fed791
d9b5648e
...
1
0
20984
320
50
2371
0
551
-1
46
15
10003539039235338011
0
14102100
1005
0
1fbe01fe
f3845767
28905ebd
ecad2386
7801e8d9
...
1
0
15699
320
50
1722
0
35
100084
79
16
10003585669470236873
0
14102100
1005
0
d9750ee7
98572c79
f028772b
ecad2386
7801e8d9
...
1
0
17914
320
50
2043
2
39
-1
32
17
10004105575081229495
0
14102100
1005
0
1fbe01fe
f3845767
28905ebd
ecad2386
7801e8d9
...
1
2
15708
320
50
1722
0
35
100084
79
18
10004181428767727519
0
14102100
1005
1
0c2fe9d6
27e3c518
28905ebd
ecad2386
7801e8d9
...
1
0
6558
320
50
571
2
39
-1
32
19
10004482643316086592
0
14102100
1005
0
85f751fd
c4e18dd6
50e219e0
66a5f0f3
d9b5648e
...
1
0
21234
320
50
2434
3
163
100088
61
20
10004510652136496837
0
14102100
1005
0
543a539e
c7ca3108
3e814130
ecad2386
7801e8d9
...
1
0
20352
320
50
2333
0
39
-1
157
21
10004574413841529209
0
14102100
1005
0
1fbe01fe
f3845767
28905ebd
ecad2386
7801e8d9
...
1
0
15706
320
50
1722
0
35
-1
79
22
10004670021948955159
0
14102100
1005
0
543a539e
c7ca3108
3e814130
ecad2386
7801e8d9
...
1
0
20366
320
50
2333
0
39
-1
157
23
10004765361151096125
1
14102100
1005
0
1fbe01fe
f3845767
28905ebd
ecad2386
7801e8d9
...
1
0
15701
320
50
1722
0
35
-1
79
24
10005249248600843539
0
14102100
1005
0
1fbe01fe
f3845767
28905ebd
ecad2386
7801e8d9
...
1
0
15706
320
50
1722
0
35
100083
79
25
10005334911727438633
0
14102100
1010
1
85f751fd
c4e18dd6
50e219e0
ffc6ffd0
7801e8d9
...
4
0
21665
320
50
2493
3
35
-1
117
26
10005541670676403131
0
14102100
1005
1
e151e245
7e091613
f028772b
ecad2386
7801e8d9
...
1
0
20984
320
50
2371
0
551
100217
46
27
10005609489911213467
1
14102100
1005
0
85f751fd
c4e18dd6
50e219e0
54c5d545
2347f47a
...
1
0
21611
320
50
2480
3
297
100111
61
28
10005649443863261125
0
14102100
1005
0
543a539e
c7ca3108
3e814130
ecad2386
7801e8d9
...
1
0
20366
320
50
2333
0
39
-1
157
29
10005951398749600249
0
14102100
1005
0
1fbe01fe
f3845767
28905ebd
ecad2386
7801e8d9
...
1
0
15706
320
50
1722
0
35
-1
79
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
40428937
9994363992766759561
0
14103023
1005
1
0eb72673
d2f72222
f028772b
ecad2386
7801e8d9
...
1
0
19772
320
50
2227
0
935
100077
48
40428938
9994637981423829789
0
14103023
1005
0
1fbe01fe
f3845767
28905ebd
ecad2386
7801e8d9
...
1
0
22257
320
50
2545
0
431
100084
221
40428939
9994670492261359346
1
14103023
1005
1
d9750ee7
98572c79
f028772b
ecad2386
7801e8d9
...
1
0
17753
320
50
1993
2
1063
-1
33
40428940
9995064718229733761
0
14103023
1002
0
c135a32f
b8393312
50e219e0
ecad2386
7801e8d9
...
0
0
17894
320
50
2039
2
39
100077
32
40428941
9995422670224714350
0
14103023
1005
0
85f751fd
c4e18dd6
50e219e0
d36838b1
0e8616ad
...
1
0
23866
320
50
2736
0
33
-1
246
40428942
9995585359240422336
1
14103023
1005
0
85f751fd
c4e18dd6
50e219e0
3c4b944d
2347f47a
...
1
0
16859
320
50
1887
3
39
-1
23
40428943
9995700942528439110
0
14103023
1005
0
85f751fd
c4e18dd6
50e219e0
9c13b419
2347f47a
...
1
0
23725
320
50
2716
3
47
100217
23
40428944
9995851231658276345
1
14103023
1005
1
b7e9786d
b12b9f85
f028772b
ecad2386
7801e8d9
...
1
0
16858
320
50
1887
3
39
100199
23
40428945
9996037780338178315
0
14103023
1005
0
85f751fd
c4e18dd6
50e219e0
e2fcccd2
5c5a694b
...
1
0
20633
320
50
2374
3
39
-1
23
40428946
9996342298084120766
0
14103023
1005
0
1fbe01fe
f3845767
28905ebd
ecad2386
7801e8d9
...
1
0
22676
320
50
2616
0
35
-1
51
40428947
9996680829496062830
1
14103023
1005
1
16c73019
8025317b
28905ebd
ecad2386
7801e8d9
...
1
0
22193
320
50
2556
3
167
100194
23
40428948
9996821883297875226
1
14103023
1005
1
d9750ee7
98572c79
f028772b
ecad2386
7801e8d9
...
1
0
17614
320
50
1993
2
1063
100084
33
40428949
9997352145588717924
1
14103023
1005
0
7294ea0f
863fa89d
3e814130
ecad2386
7801e8d9
...
1
0
17239
320
50
1973
3
39
100148
23
40428950
9997366151542576761
0
14103023
1005
1
e151e245
7e091613
f028772b
ecad2386
7801e8d9
...
1
0
22815
320
50
2647
2
39
100148
23
40428951
9997481344885640671
0
14103023
1005
0
85f751fd
c4e18dd6
50e219e0
685d1c4c
2347f47a
...
1
3
23222
320
50
2676
0
299
100176
221
40428952
9997781251272087830
0
14103023
1005
0
85f751fd
c4e18dd6
50e219e0
7e7baafa
2347f47a
...
1
0
23866
320
50
2736
0
33
100170
246
40428953
9997782484512570704
0
14103023
1005
1
85f751fd
c4e18dd6
50e219e0
cf0327f9
2347f47a
...
1
0
23644
300
50
2709
3
39
100013
23
40428954
9997850534923982041
0
14103023
1005
0
85f751fd
c4e18dd6
50e219e0
e2fcccd2
5c5a694b
...
1
0
20632
320
50
2374
3
39
-1
23
40428955
9998205295831446187
0
14103023
1005
0
85f751fd
c4e18dd6
50e219e0
febd1138
82e27996
...
1
0
18648
320
50
1092
3
809
100156
61
40428956
9998265546800238489
0
14103023
1005
0
85f751fd
c4e18dd6
50e219e0
f0d41ff1
2347f47a
...
1
0
22592
320
50
2603
3
171
100161
61
40428957
9998354075836702668
0
14103023
1005
0
85f751fd
c4e18dd6
50e219e0
d36838b1
0e8616ad
...
1
2
23866
320
50
2736
0
33
100170
246
40428958
9998487258543214200
1
14103023
1005
0
83a0ad1a
5c9ae867
f028772b
ecad2386
7801e8d9
...
1
0
19772
320
50
2227
0
935
-1
48
40428959
9998515968748286661
0
14103023
1005
1
856e6d3f
58a89a43
f028772b
ecad2386
7801e8d9
...
1
0
23997
320
50
2748
0
35
-1
79
40428960
9998613662398752368
0
14103023
1005
0
85f751fd
c4e18dd6
50e219e0
685d1c4c
2347f47a
...
1
3
23735
320
50
2676
0
299
100176
221
40428961
9998654904628431953
0
14103023
1005
0
85f751fd
c4e18dd6
50e219e0
396df801
2347f47a
...
1
0
23866
320
50
2736
0
33
-1
246
40428962
9998752756639797808
1
14103023
1005
1
e151e245
7e091613
f028772b
ecad2386
7801e8d9
...
1
0
17262
320
50
1872
3
39
100173
23
40428963
9999037534674210613
0
14103023
1005
0
85f751fd
c4e18dd6
50e219e0
9c13b419
2347f47a
...
1
2
23160
320
50
2667
0
47
-1
221
40428964
9999585120349625051
0
14103023
1005
1
f61eaaae
6b59f079
f028772b
ecad2386
7801e8d9
...
1
0
20969
320
50
2372
0
813
-1
46
40428965
9999636335882369227
1
14103023
1005
0
85f751fd
c4e18dd6
50e219e0
3c4b944d
2347f47a
...
1
0
16859
320
50
1887
3
39
100194
23
40428966
9999746639881208566
0
14103023
1005
0
1fbe01fe
f3845767
28905ebd
ecad2386
7801e8d9
...
1
0
22257
320
50
2545
0
431
100084
221
40428967 rows × 24 columns
In [33]:
train_raw[train_raw.click == 0]
Out[33]:
id
click
hour
C1
banner_pos
site_id
site_domain
site_category
app_id
app_domain
...
device_type
device_conn_type
C14
C15
C16
C17
C18
C19
C20
C21
0
1000009418151094273
0
14102100
1005
0
1fbe01fe
f3845767
28905ebd
ecad2386
7801e8d9
...
1
2
15706
320
50
1722
0
35
-1
79
1
10000169349117863715
0
14102100
1005
0
1fbe01fe
f3845767
28905ebd
ecad2386
7801e8d9
...
1
0
15704
320
50
1722
0
35
100084
79
2
10000371904215119486
0
14102100
1005
0
1fbe01fe
f3845767
28905ebd
ecad2386
7801e8d9
...
1
0
15704
320
50
1722
0
35
100084
79
3
10000640724480838376
0
14102100
1005
0
1fbe01fe
f3845767
28905ebd
ecad2386
7801e8d9
...
1
0
15706
320
50
1722
0
35
100084
79
4
10000679056417042096
0
14102100
1005
1
fe8cc448
9166c161
0569f928
ecad2386
7801e8d9
...
1
0
18993
320
50
2161
0
35
-1
157
5
10000720757801103869
0
14102100
1005
0
d6137915
bb1ef334
f028772b
ecad2386
7801e8d9
...
1
0
16920
320
50
1899
0
431
100077
117
6
10000724729988544911
0
14102100
1005
0
8fda644b
25d4cfcd
f028772b
ecad2386
7801e8d9
...
1
0
20362
320
50
2333
0
39
-1
157
7
10000918755742328737
0
14102100
1005
1
e151e245
7e091613
f028772b
ecad2386
7801e8d9
...
1
0
20632
320
50
2374
3
39
-1
23
9
10001264480619467364
0
14102100
1002
0
84c7ba46
c4e18dd6
50e219e0
ecad2386
7801e8d9
...
0
0
21689
320
50
2496
3
167
100191
23
10
10001868339616595934
0
14102100
1005
1
e151e245
7e091613
f028772b
ecad2386
7801e8d9
...
1
0
17747
320
50
1974
2
39
100019
33
11
10001966791793526909
0
14102100
1005
0
1fbe01fe
f3845767
28905ebd
ecad2386
7801e8d9
...
1
0
15701
320
50
1722
0
35
-1
79
12
10002028568167339219
0
14102100
1005
0
9e8cf15d
0d3cb7be
f028772b
ecad2386
7801e8d9
...
1
2
20596
320
50
2161
0
35
100148
157
13
10002044883120869786
0
14102100
1005
0
d6137915
bb1ef334
f028772b
ecad2386
7801e8d9
...
1
0
19771
320
50
2227
0
687
100077
48
14
10002518649031436658
0
14102100
1005
0
85f751fd
c4e18dd6
50e219e0
98fed791
d9b5648e
...
1
0
20984
320
50
2371
0
551
-1
46
15
10003539039235338011
0
14102100
1005
0
1fbe01fe
f3845767
28905ebd
ecad2386
7801e8d9
...
1
0
15699
320
50
1722
0
35
100084
79
16
10003585669470236873
0
14102100
1005
0
d9750ee7
98572c79
f028772b
ecad2386
7801e8d9
...
1
0
17914
320
50
2043
2
39
-1
32
17
10004105575081229495
0
14102100
1005
0
1fbe01fe
f3845767
28905ebd
ecad2386
7801e8d9
...
1
2
15708
320
50
1722
0
35
100084
79
18
10004181428767727519
0
14102100
1005
1
0c2fe9d6
27e3c518
28905ebd
ecad2386
7801e8d9
...
1
0
6558
320
50
571
2
39
-1
32
19
10004482643316086592
0
14102100
1005
0
85f751fd
c4e18dd6
50e219e0
66a5f0f3
d9b5648e
...
1
0
21234
320
50
2434
3
163
100088
61
20
10004510652136496837
0
14102100
1005
0
543a539e
c7ca3108
3e814130
ecad2386
7801e8d9
...
1
0
20352
320
50
2333
0
39
-1
157
21
10004574413841529209
0
14102100
1005
0
1fbe01fe
f3845767
28905ebd
ecad2386
7801e8d9
...
1
0
15706
320
50
1722
0
35
-1
79
22
10004670021948955159
0
14102100
1005
0
543a539e
c7ca3108
3e814130
ecad2386
7801e8d9
...
1
0
20366
320
50
2333
0
39
-1
157
24
10005249248600843539
0
14102100
1005
0
1fbe01fe
f3845767
28905ebd
ecad2386
7801e8d9
...
1
0
15706
320
50
1722
0
35
100083
79
25
10005334911727438633
0
14102100
1010
1
85f751fd
c4e18dd6
50e219e0
ffc6ffd0
7801e8d9
...
4
0
21665
320
50
2493
3
35
-1
117
26
10005541670676403131
0
14102100
1005
1
e151e245
7e091613
f028772b
ecad2386
7801e8d9
...
1
0
20984
320
50
2371
0
551
100217
46
28
10005649443863261125
0
14102100
1005
0
543a539e
c7ca3108
3e814130
ecad2386
7801e8d9
...
1
0
20366
320
50
2333
0
39
-1
157
29
10005951398749600249
0
14102100
1005
0
1fbe01fe
f3845767
28905ebd
ecad2386
7801e8d9
...
1
0
15706
320
50
1722
0
35
-1
79
30
10006192453619779489
0
14102100
1005
0
85f751fd
c4e18dd6
50e219e0
685d1c4c
2347f47a
...
1
3
15708
320
50
1722
0
35
-1
79
31
10006415976094813740
0
14102100
1005
0
f84e52b6
d7e2f29b
28905ebd
ecad2386
7801e8d9
...
1
0
16838
320
50
1882
3
35
-1
13
33
10006557235872316145
0
14102100
1005
0
1fbe01fe
f3845767
28905ebd
ecad2386
7801e8d9
...
1
0
15699
320
50
1722
0
35
-1
79
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
40428920
9991636647469923807
0
14103023
1005
1
d9750ee7
98572c79
f028772b
ecad2386
7801e8d9
...
1
0
17753
320
50
1993
2
1063
-1
33
40428924
9992306685826528392
0
14103023
1005
0
85f751fd
c4e18dd6
50e219e0
685d1c4c
2347f47a
...
1
3
24001
320
50
2749
0
43
100177
221
40428925
9992327029368026061
0
14103023
1005
0
1fbe01fe
f3845767
28905ebd
ecad2386
7801e8d9
...
1
0
22676
320
50
2616
0
35
100083
51
40428926
9992484712417106617
0
14103023
1005
0
d6137915
bb1ef334
f028772b
ecad2386
7801e8d9
...
1
0
19771
320
50
2227
0
935
-1
48
40428927
9992868128976521374
0
14103023
1005
0
85f751fd
c4e18dd6
50e219e0
98fed791
d9b5648e
...
1
0
19743
320
50
2264
3
425
100000
61
40428928
9993169168198214540
0
14103023
1005
0
1fbe01fe
f3845767
28905ebd
ecad2386
7801e8d9
...
1
0
22676
320
50
2616
0
35
100084
51
40428932
9993490976166478268
0
14103023
1005
0
85f751fd
c4e18dd6
50e219e0
d36838b1
0e8616ad
...
1
0
23866
320
50
2736
0
33
100170
246
40428934
9993728571358213414
0
14103023
1005
0
1fbe01fe
f3845767
28905ebd
ecad2386
7801e8d9
...
1
0
22676
320
50
2616
0
35
100084
51
40428936
99941876396800446
0
14103023
1005
1
0eb72673
d2f72222
f028772b
ecad2386
7801e8d9
...
1
0
23015
320
50
2658
3
35
100148
23
40428937
9994363992766759561
0
14103023
1005
1
0eb72673
d2f72222
f028772b
ecad2386
7801e8d9
...
1
0
19772
320
50
2227
0
935
100077
48
40428938
9994637981423829789
0
14103023
1005
0
1fbe01fe
f3845767
28905ebd
ecad2386
7801e8d9
...
1
0
22257
320
50
2545
0
431
100084
221
40428940
9995064718229733761
0
14103023
1002
0
c135a32f
b8393312
50e219e0
ecad2386
7801e8d9
...
0
0
17894
320
50
2039
2
39
100077
32
40428941
9995422670224714350
0
14103023
1005
0
85f751fd
c4e18dd6
50e219e0
d36838b1
0e8616ad
...
1
0
23866
320
50
2736
0
33
-1
246
40428943
9995700942528439110
0
14103023
1005
0
85f751fd
c4e18dd6
50e219e0
9c13b419
2347f47a
...
1
0
23725
320
50
2716
3
47
100217
23
40428945
9996037780338178315
0
14103023
1005
0
85f751fd
c4e18dd6
50e219e0
e2fcccd2
5c5a694b
...
1
0
20633
320
50
2374
3
39
-1
23
40428946
9996342298084120766
0
14103023
1005
0
1fbe01fe
f3845767
28905ebd
ecad2386
7801e8d9
...
1
0
22676
320
50
2616
0
35
-1
51
40428950
9997366151542576761
0
14103023
1005
1
e151e245
7e091613
f028772b
ecad2386
7801e8d9
...
1
0
22815
320
50
2647
2
39
100148
23
40428951
9997481344885640671
0
14103023
1005
0
85f751fd
c4e18dd6
50e219e0
685d1c4c
2347f47a
...
1
3
23222
320
50
2676
0
299
100176
221
40428952
9997781251272087830
0
14103023
1005
0
85f751fd
c4e18dd6
50e219e0
7e7baafa
2347f47a
...
1
0
23866
320
50
2736
0
33
100170
246
40428953
9997782484512570704
0
14103023
1005
1
85f751fd
c4e18dd6
50e219e0
cf0327f9
2347f47a
...
1
0
23644
300
50
2709
3
39
100013
23
40428954
9997850534923982041
0
14103023
1005
0
85f751fd
c4e18dd6
50e219e0
e2fcccd2
5c5a694b
...
1
0
20632
320
50
2374
3
39
-1
23
40428955
9998205295831446187
0
14103023
1005
0
85f751fd
c4e18dd6
50e219e0
febd1138
82e27996
...
1
0
18648
320
50
1092
3
809
100156
61
40428956
9998265546800238489
0
14103023
1005
0
85f751fd
c4e18dd6
50e219e0
f0d41ff1
2347f47a
...
1
0
22592
320
50
2603
3
171
100161
61
40428957
9998354075836702668
0
14103023
1005
0
85f751fd
c4e18dd6
50e219e0
d36838b1
0e8616ad
...
1
2
23866
320
50
2736
0
33
100170
246
40428959
9998515968748286661
0
14103023
1005
1
856e6d3f
58a89a43
f028772b
ecad2386
7801e8d9
...
1
0
23997
320
50
2748
0
35
-1
79
40428960
9998613662398752368
0
14103023
1005
0
85f751fd
c4e18dd6
50e219e0
685d1c4c
2347f47a
...
1
3
23735
320
50
2676
0
299
100176
221
40428961
9998654904628431953
0
14103023
1005
0
85f751fd
c4e18dd6
50e219e0
396df801
2347f47a
...
1
0
23866
320
50
2736
0
33
-1
246
40428963
9999037534674210613
0
14103023
1005
0
85f751fd
c4e18dd6
50e219e0
9c13b419
2347f47a
...
1
2
23160
320
50
2667
0
47
-1
221
40428964
9999585120349625051
0
14103023
1005
1
f61eaaae
6b59f079
f028772b
ecad2386
7801e8d9
...
1
0
20969
320
50
2372
0
813
-1
46
40428966
9999746639881208566
0
14103023
1005
0
1fbe01fe
f3845767
28905ebd
ecad2386
7801e8d9
...
1
0
22257
320
50
2545
0
431
100084
221
33563901 rows × 24 columns
In [34]:
len(train_raw[train_raw.click == 0])
Out[34]:
33563901
In [35]:
len(train_raw[train_raw.click == 1])
Out[35]:
6865066
In [38]:
6865066 / (33563901.0 + 6865066)
Out[38]:
0.16980562476404604
In [42]:
train_raw['date'] = train_raw['hour'] / 100
In [51]:
first_day = train_raw[train_raw.date == 141021]
In [54]:
g = first_day.groupby(['C1'])
In [55]:
g.count()
Out[55]:
id
click
hour
banner_pos
site_id
site_domain
site_category
app_id
app_domain
app_category
...
device_conn_type
C14
C15
C16
C17
C18
C19
C20
C21
date
C1
1001
38
38
38
38
38
38
38
38
38
38
...
38
38
38
38
38
38
38
38
38
38
1002
5028
5028
5028
5028
5028
5028
5028
5028
5028
5028
...
5028
5028
5028
5028
5028
5028
5028
5028
5028
5028
1005
109997
109997
109997
109997
109997
109997
109997
109997
109997
109997
...
109997
109997
109997
109997
109997
109997
109997
109997
109997
109997
1007
113
113
113
113
113
113
113
113
113
113
...
113
113
113
113
113
113
113
113
113
113
1008
11
11
11
11
11
11
11
11
11
11
...
11
11
11
11
11
11
11
11
11
11
1010
3819
3819
3819
3819
3819
3819
3819
3819
3819
3819
...
3819
3819
3819
3819
3819
3819
3819
3819
3819
3819
6 rows × 24 columns
In [56]:
first_day_click = first_day[first_day.click == 1]
In [57]:
g1 = first_day_click.groupby(['C1'])
In [65]:
g1.count()['id']
Out[65]:
C1
1001 4
1002 1155
1005 19349
1008 5
1010 279
Name: id, dtype: int64
In [59]:
s = train_raw['banner_pos']
In [60]:
s.unique()
Out[60]:
array([0, 1, 4, 5, 2, 7, 3])
In [69]:
m = {}
for key, value in zip(_65.index, _65):
m[key] = value
In [70]:
m
Out[70]:
{1001: 4, 1002: 1155, 1005: 19349, 1008: 5, 1010: 279}
In [71]:
g = train_raw.groupby('device_id')
In [72]:
g.count()
Out[72]:
id
click
hour
C1
banner_pos
site_id
site_domain
site_category
app_id
app_domain
...
device_conn_type
C14
C15
C16
C17
C18
C19
C20
C21
date
device_id
00000414
1
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
1
1
1
00000715
1
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
1
1
1
00000919
1
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
1
1
1
00000b7c
1
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
1
1
1
00001237
6
6
6
6
6
6
6
6
6
6
...
6
6
6
6
6
6
6
6
6
6
0000194a
6
6
6
6
6
6
6
6
6
6
...
6
6
6
6
6
6
6
6
6
6
000022f3
1
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
1
1
1
00002c39
1
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
1
1
1
00003255
1
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
1
1
1
000032d7
9
9
9
9
9
9
9
9
9
9
...
9
9
9
9
9
9
9
9
9
9
00003e42
10
10
10
10
10
10
10
10
10
10
...
10
10
10
10
10
10
10
10
10
10
00004686
1
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
1
1
1
000048d5
2
2
2
2
2
2
2
2
2
2
...
2
2
2
2
2
2
2
2
2
2
000050fc
1
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
1
1
1
00005365
1
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
1
1
1
0000552b
12
12
12
12
12
12
12
12
12
12
...
12
12
12
12
12
12
12
12
12
12
00006524
2
2
2
2
2
2
2
2
2
2
...
2
2
2
2
2
2
2
2
2
2
00006911
1
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
1
1
1
00006ee3
1
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
1
1
1
000070cc
5
5
5
5
5
5
5
5
5
5
...
5
5
5
5
5
5
5
5
5
5
000071a8
2
2
2
2
2
2
2
2
2
2
...
2
2
2
2
2
2
2
2
2
2
00007707
1
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
1
1
1
00007d32
1
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
1
1
1
00007ee0
1
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
1
1
1
00007faa
1
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
1
1
1
0000822f
5
5
5
5
5
5
5
5
5
5
...
5
5
5
5
5
5
5
5
5
5
0000893a
1
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
1
1
1
00008be9
6
6
6
6
6
6
6
6
6
6
...
6
6
6
6
6
6
6
6
6
6
00008c5c
1
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
1
1
1
00008ed4
1
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
1
1
1
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
ffff2b1f
1
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
1
1
1
ffff2c9d
29
29
29
29
29
29
29
29
29
29
...
29
29
29
29
29
29
29
29
29
29
ffff391f
1
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
1
1
1
ffff3fbb
3
3
3
3
3
3
3
3
3
3
...
3
3
3
3
3
3
3
3
3
3
ffff4106
2
2
2
2
2
2
2
2
2
2
...
2
2
2
2
2
2
2
2
2
2
ffff430b
6
6
6
6
6
6
6
6
6
6
...
6
6
6
6
6
6
6
6
6
6
ffff4f9a
2
2
2
2
2
2
2
2
2
2
...
2
2
2
2
2
2
2
2
2
2
ffff5822
2
2
2
2
2
2
2
2
2
2
...
2
2
2
2
2
2
2
2
2
2
ffff59da
1
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
1
1
1
ffff60f9
1
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
1
1
1
ffff6186
1
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
1
1
1
ffff6ae3
2
2
2
2
2
2
2
2
2
2
...
2
2
2
2
2
2
2
2
2
2
ffff70ae
2
2
2
2
2
2
2
2
2
2
...
2
2
2
2
2
2
2
2
2
2
ffff7735
1
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
1
1
1
ffff9201
1
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
1
1
1
ffff9249
28
28
28
28
28
28
28
28
28
28
...
28
28
28
28
28
28
28
28
28
28
ffffa2c2
1
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
1
1
1
ffffa5a3
1
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
1
1
1
ffffabfb
3
3
3
3
3
3
3
3
3
3
...
3
3
3
3
3
3
3
3
3
3
ffffb0fc
2
2
2
2
2
2
2
2
2
2
...
2
2
2
2
2
2
2
2
2
2
ffffb18a
3
3
3
3
3
3
3
3
3
3
...
3
3
3
3
3
3
3
3
3
3
ffffb919
1
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
1
1
1
ffffbe39
2
2
2
2
2
2
2
2
2
2
...
2
2
2
2
2
2
2
2
2
2
ffffd2eb
3
3
3
3
3
3
3
3
3
3
...
3
3
3
3
3
3
3
3
3
3
ffffd382
3
3
3
3
3
3
3
3
3
3
...
3
3
3
3
3
3
3
3
3
3
ffffd970
1
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
1
1
1
ffffd98b
8
8
8
8
8
8
8
8
8
8
...
8
8
8
8
8
8
8
8
8
8
ffffde2c
2
2
2
2
2
2
2
2
2
2
...
2
2
2
2
2
2
2
2
2
2
ffffe321
4
4
4
4
4
4
4
4
4
4
...
4
4
4
4
4
4
4
4
4
4
ffffe5da
1
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
1
1
1
2686408 rows × 24 columns
In [74]:
len(_72[_72.id > 10])
Out[74]:
87981
In [75]:
len(_72)
Out[75]:
2686408
In [76]:
2686408 - 87981
Out[76]:
2598427
In [ ]:
Content source: JasonWayne/avazu-essay
Similar notebooks: