In [2]:
import pandas as pd

In [3]:
train_raw = pd.read_csv('../train.raw.csv')


/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py:2717: DtypeWarning: Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)

In [ ]:
train_raw.head

In [7]:
s = train_raw['device_model']

In [12]:
len(s.unique())


Out[12]:
8251

In [13]:
len(train_raw['device_type'].unique())


Out[13]:
5

In [15]:
train_raw['device_type'].unique()


Out[15]:
array([1, 0, 4, 5, 2])

In [19]:
len(train_raw.columns)


Out[19]:
24

In [20]:
train_raw['banner_pos'].unique()


Out[20]:
array([0, 1, 4, 5, 2, 7, 3])

In [23]:
train_raw['app_id' == 'ecad2386']


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-23-3477250eed87> in <module>()
----> 1 train_raw['app_id' == 'ecad2386']

/usr/local/lib/python2.7/dist-packages/pandas/core/frame.pyc in __getitem__(self, key)
   2057             return self._getitem_multilevel(key)
   2058         else:
-> 2059             return self._getitem_column(key)
   2060 
   2061     def _getitem_column(self, key):

/usr/local/lib/python2.7/dist-packages/pandas/core/frame.pyc in _getitem_column(self, key)
   2064         # get column
   2065         if self.columns.is_unique:
-> 2066             return self._get_item_cache(key)
   2067 
   2068         # duplicate columns & possible reduce dimensionality

/usr/local/lib/python2.7/dist-packages/pandas/core/generic.pyc in _get_item_cache(self, item)
   1384         res = cache.get(item)
   1385         if res is None:
-> 1386             values = self._data.get(item)
   1387             res = self._box_item_values(item, values)
   1388             cache[item] = res

/usr/local/lib/python2.7/dist-packages/pandas/core/internals.pyc in get(self, item, fastpath)
   3541 
   3542             if not isnull(item):
-> 3543                 loc = self.items.get_loc(item)
   3544             else:
   3545                 indexer = np.arange(len(self.items))[isnull(self.items)]

/usr/local/lib/python2.7/dist-packages/pandas/indexes/base.pyc in get_loc(self, key, method, tolerance)
   2134                 return self._engine.get_loc(key)
   2135             except KeyError:
-> 2136                 return self._engine.get_loc(self._maybe_cast_indexer(key))
   2137 
   2138         indexer = self.get_indexer([key], method=method, tolerance=tolerance)

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4433)()

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4279)()

pandas/src/hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13742)()

pandas/src/hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13696)()

KeyError: False

In [25]:
train_raw


Out[25]:
id click hour C1 banner_pos site_id site_domain site_category app_id app_domain ... device_type device_conn_type C14 C15 C16 C17 C18 C19 C20 C21
0 1000009418151094273 0 14102100 1005 0 1fbe01fe f3845767 28905ebd ecad2386 7801e8d9 ... 1 2 15706 320 50 1722 0 35 -1 79
1 10000169349117863715 0 14102100 1005 0 1fbe01fe f3845767 28905ebd ecad2386 7801e8d9 ... 1 0 15704 320 50 1722 0 35 100084 79
2 10000371904215119486 0 14102100 1005 0 1fbe01fe f3845767 28905ebd ecad2386 7801e8d9 ... 1 0 15704 320 50 1722 0 35 100084 79
3 10000640724480838376 0 14102100 1005 0 1fbe01fe f3845767 28905ebd ecad2386 7801e8d9 ... 1 0 15706 320 50 1722 0 35 100084 79
4 10000679056417042096 0 14102100 1005 1 fe8cc448 9166c161 0569f928 ecad2386 7801e8d9 ... 1 0 18993 320 50 2161 0 35 -1 157
5 10000720757801103869 0 14102100 1005 0 d6137915 bb1ef334 f028772b ecad2386 7801e8d9 ... 1 0 16920 320 50 1899 0 431 100077 117
6 10000724729988544911 0 14102100 1005 0 8fda644b 25d4cfcd f028772b ecad2386 7801e8d9 ... 1 0 20362 320 50 2333 0 39 -1 157
7 10000918755742328737 0 14102100 1005 1 e151e245 7e091613 f028772b ecad2386 7801e8d9 ... 1 0 20632 320 50 2374 3 39 -1 23
8 10000949271186029916 1 14102100 1005 0 1fbe01fe f3845767 28905ebd ecad2386 7801e8d9 ... 1 2 15707 320 50 1722 0 35 -1 79
9 10001264480619467364 0 14102100 1002 0 84c7ba46 c4e18dd6 50e219e0 ecad2386 7801e8d9 ... 0 0 21689 320 50 2496 3 167 100191 23
10 10001868339616595934 0 14102100 1005 1 e151e245 7e091613 f028772b ecad2386 7801e8d9 ... 1 0 17747 320 50 1974 2 39 100019 33
11 10001966791793526909 0 14102100 1005 0 1fbe01fe f3845767 28905ebd ecad2386 7801e8d9 ... 1 0 15701 320 50 1722 0 35 -1 79
12 10002028568167339219 0 14102100 1005 0 9e8cf15d 0d3cb7be f028772b ecad2386 7801e8d9 ... 1 2 20596 320 50 2161 0 35 100148 157
13 10002044883120869786 0 14102100 1005 0 d6137915 bb1ef334 f028772b ecad2386 7801e8d9 ... 1 0 19771 320 50 2227 0 687 100077 48
14 10002518649031436658 0 14102100 1005 0 85f751fd c4e18dd6 50e219e0 98fed791 d9b5648e ... 1 0 20984 320 50 2371 0 551 -1 46
15 10003539039235338011 0 14102100 1005 0 1fbe01fe f3845767 28905ebd ecad2386 7801e8d9 ... 1 0 15699 320 50 1722 0 35 100084 79
16 10003585669470236873 0 14102100 1005 0 d9750ee7 98572c79 f028772b ecad2386 7801e8d9 ... 1 0 17914 320 50 2043 2 39 -1 32
17 10004105575081229495 0 14102100 1005 0 1fbe01fe f3845767 28905ebd ecad2386 7801e8d9 ... 1 2 15708 320 50 1722 0 35 100084 79
18 10004181428767727519 0 14102100 1005 1 0c2fe9d6 27e3c518 28905ebd ecad2386 7801e8d9 ... 1 0 6558 320 50 571 2 39 -1 32
19 10004482643316086592 0 14102100 1005 0 85f751fd c4e18dd6 50e219e0 66a5f0f3 d9b5648e ... 1 0 21234 320 50 2434 3 163 100088 61
20 10004510652136496837 0 14102100 1005 0 543a539e c7ca3108 3e814130 ecad2386 7801e8d9 ... 1 0 20352 320 50 2333 0 39 -1 157
21 10004574413841529209 0 14102100 1005 0 1fbe01fe f3845767 28905ebd ecad2386 7801e8d9 ... 1 0 15706 320 50 1722 0 35 -1 79
22 10004670021948955159 0 14102100 1005 0 543a539e c7ca3108 3e814130 ecad2386 7801e8d9 ... 1 0 20366 320 50 2333 0 39 -1 157
23 10004765361151096125 1 14102100 1005 0 1fbe01fe f3845767 28905ebd ecad2386 7801e8d9 ... 1 0 15701 320 50 1722 0 35 -1 79
24 10005249248600843539 0 14102100 1005 0 1fbe01fe f3845767 28905ebd ecad2386 7801e8d9 ... 1 0 15706 320 50 1722 0 35 100083 79
25 10005334911727438633 0 14102100 1010 1 85f751fd c4e18dd6 50e219e0 ffc6ffd0 7801e8d9 ... 4 0 21665 320 50 2493 3 35 -1 117
26 10005541670676403131 0 14102100 1005 1 e151e245 7e091613 f028772b ecad2386 7801e8d9 ... 1 0 20984 320 50 2371 0 551 100217 46
27 10005609489911213467 1 14102100 1005 0 85f751fd c4e18dd6 50e219e0 54c5d545 2347f47a ... 1 0 21611 320 50 2480 3 297 100111 61
28 10005649443863261125 0 14102100 1005 0 543a539e c7ca3108 3e814130 ecad2386 7801e8d9 ... 1 0 20366 320 50 2333 0 39 -1 157
29 10005951398749600249 0 14102100 1005 0 1fbe01fe f3845767 28905ebd ecad2386 7801e8d9 ... 1 0 15706 320 50 1722 0 35 -1 79
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
40428937 9994363992766759561 0 14103023 1005 1 0eb72673 d2f72222 f028772b ecad2386 7801e8d9 ... 1 0 19772 320 50 2227 0 935 100077 48
40428938 9994637981423829789 0 14103023 1005 0 1fbe01fe f3845767 28905ebd ecad2386 7801e8d9 ... 1 0 22257 320 50 2545 0 431 100084 221
40428939 9994670492261359346 1 14103023 1005 1 d9750ee7 98572c79 f028772b ecad2386 7801e8d9 ... 1 0 17753 320 50 1993 2 1063 -1 33
40428940 9995064718229733761 0 14103023 1002 0 c135a32f b8393312 50e219e0 ecad2386 7801e8d9 ... 0 0 17894 320 50 2039 2 39 100077 32
40428941 9995422670224714350 0 14103023 1005 0 85f751fd c4e18dd6 50e219e0 d36838b1 0e8616ad ... 1 0 23866 320 50 2736 0 33 -1 246
40428942 9995585359240422336 1 14103023 1005 0 85f751fd c4e18dd6 50e219e0 3c4b944d 2347f47a ... 1 0 16859 320 50 1887 3 39 -1 23
40428943 9995700942528439110 0 14103023 1005 0 85f751fd c4e18dd6 50e219e0 9c13b419 2347f47a ... 1 0 23725 320 50 2716 3 47 100217 23
40428944 9995851231658276345 1 14103023 1005 1 b7e9786d b12b9f85 f028772b ecad2386 7801e8d9 ... 1 0 16858 320 50 1887 3 39 100199 23
40428945 9996037780338178315 0 14103023 1005 0 85f751fd c4e18dd6 50e219e0 e2fcccd2 5c5a694b ... 1 0 20633 320 50 2374 3 39 -1 23
40428946 9996342298084120766 0 14103023 1005 0 1fbe01fe f3845767 28905ebd ecad2386 7801e8d9 ... 1 0 22676 320 50 2616 0 35 -1 51
40428947 9996680829496062830 1 14103023 1005 1 16c73019 8025317b 28905ebd ecad2386 7801e8d9 ... 1 0 22193 320 50 2556 3 167 100194 23
40428948 9996821883297875226 1 14103023 1005 1 d9750ee7 98572c79 f028772b ecad2386 7801e8d9 ... 1 0 17614 320 50 1993 2 1063 100084 33
40428949 9997352145588717924 1 14103023 1005 0 7294ea0f 863fa89d 3e814130 ecad2386 7801e8d9 ... 1 0 17239 320 50 1973 3 39 100148 23
40428950 9997366151542576761 0 14103023 1005 1 e151e245 7e091613 f028772b ecad2386 7801e8d9 ... 1 0 22815 320 50 2647 2 39 100148 23
40428951 9997481344885640671 0 14103023 1005 0 85f751fd c4e18dd6 50e219e0 685d1c4c 2347f47a ... 1 3 23222 320 50 2676 0 299 100176 221
40428952 9997781251272087830 0 14103023 1005 0 85f751fd c4e18dd6 50e219e0 7e7baafa 2347f47a ... 1 0 23866 320 50 2736 0 33 100170 246
40428953 9997782484512570704 0 14103023 1005 1 85f751fd c4e18dd6 50e219e0 cf0327f9 2347f47a ... 1 0 23644 300 50 2709 3 39 100013 23
40428954 9997850534923982041 0 14103023 1005 0 85f751fd c4e18dd6 50e219e0 e2fcccd2 5c5a694b ... 1 0 20632 320 50 2374 3 39 -1 23
40428955 9998205295831446187 0 14103023 1005 0 85f751fd c4e18dd6 50e219e0 febd1138 82e27996 ... 1 0 18648 320 50 1092 3 809 100156 61
40428956 9998265546800238489 0 14103023 1005 0 85f751fd c4e18dd6 50e219e0 f0d41ff1 2347f47a ... 1 0 22592 320 50 2603 3 171 100161 61
40428957 9998354075836702668 0 14103023 1005 0 85f751fd c4e18dd6 50e219e0 d36838b1 0e8616ad ... 1 2 23866 320 50 2736 0 33 100170 246
40428958 9998487258543214200 1 14103023 1005 0 83a0ad1a 5c9ae867 f028772b ecad2386 7801e8d9 ... 1 0 19772 320 50 2227 0 935 -1 48
40428959 9998515968748286661 0 14103023 1005 1 856e6d3f 58a89a43 f028772b ecad2386 7801e8d9 ... 1 0 23997 320 50 2748 0 35 -1 79
40428960 9998613662398752368 0 14103023 1005 0 85f751fd c4e18dd6 50e219e0 685d1c4c 2347f47a ... 1 3 23735 320 50 2676 0 299 100176 221
40428961 9998654904628431953 0 14103023 1005 0 85f751fd c4e18dd6 50e219e0 396df801 2347f47a ... 1 0 23866 320 50 2736 0 33 -1 246
40428962 9998752756639797808 1 14103023 1005 1 e151e245 7e091613 f028772b ecad2386 7801e8d9 ... 1 0 17262 320 50 1872 3 39 100173 23
40428963 9999037534674210613 0 14103023 1005 0 85f751fd c4e18dd6 50e219e0 9c13b419 2347f47a ... 1 2 23160 320 50 2667 0 47 -1 221
40428964 9999585120349625051 0 14103023 1005 1 f61eaaae 6b59f079 f028772b ecad2386 7801e8d9 ... 1 0 20969 320 50 2372 0 813 -1 46
40428965 9999636335882369227 1 14103023 1005 0 85f751fd c4e18dd6 50e219e0 3c4b944d 2347f47a ... 1 0 16859 320 50 1887 3 39 100194 23
40428966 9999746639881208566 0 14103023 1005 0 1fbe01fe f3845767 28905ebd ecad2386 7801e8d9 ... 1 0 22257 320 50 2545 0 431 100084 221

40428967 rows × 24 columns


In [33]:
train_raw[train_raw.click == 0]


Out[33]:
id click hour C1 banner_pos site_id site_domain site_category app_id app_domain ... device_type device_conn_type C14 C15 C16 C17 C18 C19 C20 C21
0 1000009418151094273 0 14102100 1005 0 1fbe01fe f3845767 28905ebd ecad2386 7801e8d9 ... 1 2 15706 320 50 1722 0 35 -1 79
1 10000169349117863715 0 14102100 1005 0 1fbe01fe f3845767 28905ebd ecad2386 7801e8d9 ... 1 0 15704 320 50 1722 0 35 100084 79
2 10000371904215119486 0 14102100 1005 0 1fbe01fe f3845767 28905ebd ecad2386 7801e8d9 ... 1 0 15704 320 50 1722 0 35 100084 79
3 10000640724480838376 0 14102100 1005 0 1fbe01fe f3845767 28905ebd ecad2386 7801e8d9 ... 1 0 15706 320 50 1722 0 35 100084 79
4 10000679056417042096 0 14102100 1005 1 fe8cc448 9166c161 0569f928 ecad2386 7801e8d9 ... 1 0 18993 320 50 2161 0 35 -1 157
5 10000720757801103869 0 14102100 1005 0 d6137915 bb1ef334 f028772b ecad2386 7801e8d9 ... 1 0 16920 320 50 1899 0 431 100077 117
6 10000724729988544911 0 14102100 1005 0 8fda644b 25d4cfcd f028772b ecad2386 7801e8d9 ... 1 0 20362 320 50 2333 0 39 -1 157
7 10000918755742328737 0 14102100 1005 1 e151e245 7e091613 f028772b ecad2386 7801e8d9 ... 1 0 20632 320 50 2374 3 39 -1 23
9 10001264480619467364 0 14102100 1002 0 84c7ba46 c4e18dd6 50e219e0 ecad2386 7801e8d9 ... 0 0 21689 320 50 2496 3 167 100191 23
10 10001868339616595934 0 14102100 1005 1 e151e245 7e091613 f028772b ecad2386 7801e8d9 ... 1 0 17747 320 50 1974 2 39 100019 33
11 10001966791793526909 0 14102100 1005 0 1fbe01fe f3845767 28905ebd ecad2386 7801e8d9 ... 1 0 15701 320 50 1722 0 35 -1 79
12 10002028568167339219 0 14102100 1005 0 9e8cf15d 0d3cb7be f028772b ecad2386 7801e8d9 ... 1 2 20596 320 50 2161 0 35 100148 157
13 10002044883120869786 0 14102100 1005 0 d6137915 bb1ef334 f028772b ecad2386 7801e8d9 ... 1 0 19771 320 50 2227 0 687 100077 48
14 10002518649031436658 0 14102100 1005 0 85f751fd c4e18dd6 50e219e0 98fed791 d9b5648e ... 1 0 20984 320 50 2371 0 551 -1 46
15 10003539039235338011 0 14102100 1005 0 1fbe01fe f3845767 28905ebd ecad2386 7801e8d9 ... 1 0 15699 320 50 1722 0 35 100084 79
16 10003585669470236873 0 14102100 1005 0 d9750ee7 98572c79 f028772b ecad2386 7801e8d9 ... 1 0 17914 320 50 2043 2 39 -1 32
17 10004105575081229495 0 14102100 1005 0 1fbe01fe f3845767 28905ebd ecad2386 7801e8d9 ... 1 2 15708 320 50 1722 0 35 100084 79
18 10004181428767727519 0 14102100 1005 1 0c2fe9d6 27e3c518 28905ebd ecad2386 7801e8d9 ... 1 0 6558 320 50 571 2 39 -1 32
19 10004482643316086592 0 14102100 1005 0 85f751fd c4e18dd6 50e219e0 66a5f0f3 d9b5648e ... 1 0 21234 320 50 2434 3 163 100088 61
20 10004510652136496837 0 14102100 1005 0 543a539e c7ca3108 3e814130 ecad2386 7801e8d9 ... 1 0 20352 320 50 2333 0 39 -1 157
21 10004574413841529209 0 14102100 1005 0 1fbe01fe f3845767 28905ebd ecad2386 7801e8d9 ... 1 0 15706 320 50 1722 0 35 -1 79
22 10004670021948955159 0 14102100 1005 0 543a539e c7ca3108 3e814130 ecad2386 7801e8d9 ... 1 0 20366 320 50 2333 0 39 -1 157
24 10005249248600843539 0 14102100 1005 0 1fbe01fe f3845767 28905ebd ecad2386 7801e8d9 ... 1 0 15706 320 50 1722 0 35 100083 79
25 10005334911727438633 0 14102100 1010 1 85f751fd c4e18dd6 50e219e0 ffc6ffd0 7801e8d9 ... 4 0 21665 320 50 2493 3 35 -1 117
26 10005541670676403131 0 14102100 1005 1 e151e245 7e091613 f028772b ecad2386 7801e8d9 ... 1 0 20984 320 50 2371 0 551 100217 46
28 10005649443863261125 0 14102100 1005 0 543a539e c7ca3108 3e814130 ecad2386 7801e8d9 ... 1 0 20366 320 50 2333 0 39 -1 157
29 10005951398749600249 0 14102100 1005 0 1fbe01fe f3845767 28905ebd ecad2386 7801e8d9 ... 1 0 15706 320 50 1722 0 35 -1 79
30 10006192453619779489 0 14102100 1005 0 85f751fd c4e18dd6 50e219e0 685d1c4c 2347f47a ... 1 3 15708 320 50 1722 0 35 -1 79
31 10006415976094813740 0 14102100 1005 0 f84e52b6 d7e2f29b 28905ebd ecad2386 7801e8d9 ... 1 0 16838 320 50 1882 3 35 -1 13
33 10006557235872316145 0 14102100 1005 0 1fbe01fe f3845767 28905ebd ecad2386 7801e8d9 ... 1 0 15699 320 50 1722 0 35 -1 79
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
40428920 9991636647469923807 0 14103023 1005 1 d9750ee7 98572c79 f028772b ecad2386 7801e8d9 ... 1 0 17753 320 50 1993 2 1063 -1 33
40428924 9992306685826528392 0 14103023 1005 0 85f751fd c4e18dd6 50e219e0 685d1c4c 2347f47a ... 1 3 24001 320 50 2749 0 43 100177 221
40428925 9992327029368026061 0 14103023 1005 0 1fbe01fe f3845767 28905ebd ecad2386 7801e8d9 ... 1 0 22676 320 50 2616 0 35 100083 51
40428926 9992484712417106617 0 14103023 1005 0 d6137915 bb1ef334 f028772b ecad2386 7801e8d9 ... 1 0 19771 320 50 2227 0 935 -1 48
40428927 9992868128976521374 0 14103023 1005 0 85f751fd c4e18dd6 50e219e0 98fed791 d9b5648e ... 1 0 19743 320 50 2264 3 425 100000 61
40428928 9993169168198214540 0 14103023 1005 0 1fbe01fe f3845767 28905ebd ecad2386 7801e8d9 ... 1 0 22676 320 50 2616 0 35 100084 51
40428932 9993490976166478268 0 14103023 1005 0 85f751fd c4e18dd6 50e219e0 d36838b1 0e8616ad ... 1 0 23866 320 50 2736 0 33 100170 246
40428934 9993728571358213414 0 14103023 1005 0 1fbe01fe f3845767 28905ebd ecad2386 7801e8d9 ... 1 0 22676 320 50 2616 0 35 100084 51
40428936 99941876396800446 0 14103023 1005 1 0eb72673 d2f72222 f028772b ecad2386 7801e8d9 ... 1 0 23015 320 50 2658 3 35 100148 23
40428937 9994363992766759561 0 14103023 1005 1 0eb72673 d2f72222 f028772b ecad2386 7801e8d9 ... 1 0 19772 320 50 2227 0 935 100077 48
40428938 9994637981423829789 0 14103023 1005 0 1fbe01fe f3845767 28905ebd ecad2386 7801e8d9 ... 1 0 22257 320 50 2545 0 431 100084 221
40428940 9995064718229733761 0 14103023 1002 0 c135a32f b8393312 50e219e0 ecad2386 7801e8d9 ... 0 0 17894 320 50 2039 2 39 100077 32
40428941 9995422670224714350 0 14103023 1005 0 85f751fd c4e18dd6 50e219e0 d36838b1 0e8616ad ... 1 0 23866 320 50 2736 0 33 -1 246
40428943 9995700942528439110 0 14103023 1005 0 85f751fd c4e18dd6 50e219e0 9c13b419 2347f47a ... 1 0 23725 320 50 2716 3 47 100217 23
40428945 9996037780338178315 0 14103023 1005 0 85f751fd c4e18dd6 50e219e0 e2fcccd2 5c5a694b ... 1 0 20633 320 50 2374 3 39 -1 23
40428946 9996342298084120766 0 14103023 1005 0 1fbe01fe f3845767 28905ebd ecad2386 7801e8d9 ... 1 0 22676 320 50 2616 0 35 -1 51
40428950 9997366151542576761 0 14103023 1005 1 e151e245 7e091613 f028772b ecad2386 7801e8d9 ... 1 0 22815 320 50 2647 2 39 100148 23
40428951 9997481344885640671 0 14103023 1005 0 85f751fd c4e18dd6 50e219e0 685d1c4c 2347f47a ... 1 3 23222 320 50 2676 0 299 100176 221
40428952 9997781251272087830 0 14103023 1005 0 85f751fd c4e18dd6 50e219e0 7e7baafa 2347f47a ... 1 0 23866 320 50 2736 0 33 100170 246
40428953 9997782484512570704 0 14103023 1005 1 85f751fd c4e18dd6 50e219e0 cf0327f9 2347f47a ... 1 0 23644 300 50 2709 3 39 100013 23
40428954 9997850534923982041 0 14103023 1005 0 85f751fd c4e18dd6 50e219e0 e2fcccd2 5c5a694b ... 1 0 20632 320 50 2374 3 39 -1 23
40428955 9998205295831446187 0 14103023 1005 0 85f751fd c4e18dd6 50e219e0 febd1138 82e27996 ... 1 0 18648 320 50 1092 3 809 100156 61
40428956 9998265546800238489 0 14103023 1005 0 85f751fd c4e18dd6 50e219e0 f0d41ff1 2347f47a ... 1 0 22592 320 50 2603 3 171 100161 61
40428957 9998354075836702668 0 14103023 1005 0 85f751fd c4e18dd6 50e219e0 d36838b1 0e8616ad ... 1 2 23866 320 50 2736 0 33 100170 246
40428959 9998515968748286661 0 14103023 1005 1 856e6d3f 58a89a43 f028772b ecad2386 7801e8d9 ... 1 0 23997 320 50 2748 0 35 -1 79
40428960 9998613662398752368 0 14103023 1005 0 85f751fd c4e18dd6 50e219e0 685d1c4c 2347f47a ... 1 3 23735 320 50 2676 0 299 100176 221
40428961 9998654904628431953 0 14103023 1005 0 85f751fd c4e18dd6 50e219e0 396df801 2347f47a ... 1 0 23866 320 50 2736 0 33 -1 246
40428963 9999037534674210613 0 14103023 1005 0 85f751fd c4e18dd6 50e219e0 9c13b419 2347f47a ... 1 2 23160 320 50 2667 0 47 -1 221
40428964 9999585120349625051 0 14103023 1005 1 f61eaaae 6b59f079 f028772b ecad2386 7801e8d9 ... 1 0 20969 320 50 2372 0 813 -1 46
40428966 9999746639881208566 0 14103023 1005 0 1fbe01fe f3845767 28905ebd ecad2386 7801e8d9 ... 1 0 22257 320 50 2545 0 431 100084 221

33563901 rows × 24 columns


In [34]:
len(train_raw[train_raw.click == 0])


Out[34]:
33563901

In [35]:
len(train_raw[train_raw.click == 1])


Out[35]:
6865066

In [38]:
6865066 / (33563901.0 + 6865066)


Out[38]:
0.16980562476404604

In [42]:
train_raw['date'] = train_raw['hour'] / 100

In [51]:
first_day = train_raw[train_raw.date == 141021]

In [54]:
g = first_day.groupby(['C1'])

In [55]:
g.count()


Out[55]:
id click hour banner_pos site_id site_domain site_category app_id app_domain app_category ... device_conn_type C14 C15 C16 C17 C18 C19 C20 C21 date
C1
1001 38 38 38 38 38 38 38 38 38 38 ... 38 38 38 38 38 38 38 38 38 38
1002 5028 5028 5028 5028 5028 5028 5028 5028 5028 5028 ... 5028 5028 5028 5028 5028 5028 5028 5028 5028 5028
1005 109997 109997 109997 109997 109997 109997 109997 109997 109997 109997 ... 109997 109997 109997 109997 109997 109997 109997 109997 109997 109997
1007 113 113 113 113 113 113 113 113 113 113 ... 113 113 113 113 113 113 113 113 113 113
1008 11 11 11 11 11 11 11 11 11 11 ... 11 11 11 11 11 11 11 11 11 11
1010 3819 3819 3819 3819 3819 3819 3819 3819 3819 3819 ... 3819 3819 3819 3819 3819 3819 3819 3819 3819 3819

6 rows × 24 columns


In [56]:
first_day_click = first_day[first_day.click == 1]

In [57]:
g1 = first_day_click.groupby(['C1'])

In [65]:
g1.count()['id']


Out[65]:
C1
1001        4
1002     1155
1005    19349
1008        5
1010      279
Name: id, dtype: int64

In [59]:
s = train_raw['banner_pos']

In [60]:
s.unique()


Out[60]:
array([0, 1, 4, 5, 2, 7, 3])

In [69]:
m = {}
for key, value in zip(_65.index, _65):
    m[key] = value

In [70]:
m


Out[70]:
{1001: 4, 1002: 1155, 1005: 19349, 1008: 5, 1010: 279}

In [71]:
g = train_raw.groupby('device_id')

In [72]:
g.count()


Out[72]:
id click hour C1 banner_pos site_id site_domain site_category app_id app_domain ... device_conn_type C14 C15 C16 C17 C18 C19 C20 C21 date
device_id
00000414 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
00000715 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
00000919 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
00000b7c 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
00001237 6 6 6 6 6 6 6 6 6 6 ... 6 6 6 6 6 6 6 6 6 6
0000194a 6 6 6 6 6 6 6 6 6 6 ... 6 6 6 6 6 6 6 6 6 6
000022f3 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
00002c39 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
00003255 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
000032d7 9 9 9 9 9 9 9 9 9 9 ... 9 9 9 9 9 9 9 9 9 9
00003e42 10 10 10 10 10 10 10 10 10 10 ... 10 10 10 10 10 10 10 10 10 10
00004686 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
000048d5 2 2 2 2 2 2 2 2 2 2 ... 2 2 2 2 2 2 2 2 2 2
000050fc 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
00005365 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
0000552b 12 12 12 12 12 12 12 12 12 12 ... 12 12 12 12 12 12 12 12 12 12
00006524 2 2 2 2 2 2 2 2 2 2 ... 2 2 2 2 2 2 2 2 2 2
00006911 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
00006ee3 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
000070cc 5 5 5 5 5 5 5 5 5 5 ... 5 5 5 5 5 5 5 5 5 5
000071a8 2 2 2 2 2 2 2 2 2 2 ... 2 2 2 2 2 2 2 2 2 2
00007707 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
00007d32 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
00007ee0 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
00007faa 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
0000822f 5 5 5 5 5 5 5 5 5 5 ... 5 5 5 5 5 5 5 5 5 5
0000893a 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
00008be9 6 6 6 6 6 6 6 6 6 6 ... 6 6 6 6 6 6 6 6 6 6
00008c5c 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
00008ed4 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
ffff2b1f 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
ffff2c9d 29 29 29 29 29 29 29 29 29 29 ... 29 29 29 29 29 29 29 29 29 29
ffff391f 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
ffff3fbb 3 3 3 3 3 3 3 3 3 3 ... 3 3 3 3 3 3 3 3 3 3
ffff4106 2 2 2 2 2 2 2 2 2 2 ... 2 2 2 2 2 2 2 2 2 2
ffff430b 6 6 6 6 6 6 6 6 6 6 ... 6 6 6 6 6 6 6 6 6 6
ffff4f9a 2 2 2 2 2 2 2 2 2 2 ... 2 2 2 2 2 2 2 2 2 2
ffff5822 2 2 2 2 2 2 2 2 2 2 ... 2 2 2 2 2 2 2 2 2 2
ffff59da 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
ffff60f9 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
ffff6186 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
ffff6ae3 2 2 2 2 2 2 2 2 2 2 ... 2 2 2 2 2 2 2 2 2 2
ffff70ae 2 2 2 2 2 2 2 2 2 2 ... 2 2 2 2 2 2 2 2 2 2
ffff7735 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
ffff9201 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
ffff9249 28 28 28 28 28 28 28 28 28 28 ... 28 28 28 28 28 28 28 28 28 28
ffffa2c2 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
ffffa5a3 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
ffffabfb 3 3 3 3 3 3 3 3 3 3 ... 3 3 3 3 3 3 3 3 3 3
ffffb0fc 2 2 2 2 2 2 2 2 2 2 ... 2 2 2 2 2 2 2 2 2 2
ffffb18a 3 3 3 3 3 3 3 3 3 3 ... 3 3 3 3 3 3 3 3 3 3
ffffb919 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
ffffbe39 2 2 2 2 2 2 2 2 2 2 ... 2 2 2 2 2 2 2 2 2 2
ffffd2eb 3 3 3 3 3 3 3 3 3 3 ... 3 3 3 3 3 3 3 3 3 3
ffffd382 3 3 3 3 3 3 3 3 3 3 ... 3 3 3 3 3 3 3 3 3 3
ffffd970 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
ffffd98b 8 8 8 8 8 8 8 8 8 8 ... 8 8 8 8 8 8 8 8 8 8
ffffde2c 2 2 2 2 2 2 2 2 2 2 ... 2 2 2 2 2 2 2 2 2 2
ffffe321 4 4 4 4 4 4 4 4 4 4 ... 4 4 4 4 4 4 4 4 4 4
ffffe5da 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1

2686408 rows × 24 columns


In [74]:
len(_72[_72.id > 10])


Out[74]:
87981

In [75]:
len(_72)


Out[75]:
2686408

In [76]:
2686408 - 87981


Out[76]:
2598427

In [ ]: