In [2]:
import pandas as pd
import numpy as np

In [3]:
log = pd.read_csv('log.csv')
moe = pd.read_csv('moe.csv')

In [4]:
moe.shape


Out[4]:
(700640, 2)

In [5]:
log.shape


Out[5]:
(700640, 2)

In [6]:
gap_log = 0.707
gap_moe = 0.742
w_moe = np.exp(0.742/(1-0.742))
w_log = np.exp(0.707/(1-0.707))

In [7]:
#transfer moe data into tensor
mat_moe = np.zeros((700640, 20,2))
arr_moe = np.array(moe.values[:,1])
for i in range(700640):
    mat_moe[i] = np.array(arr_moe[i].split(" ")).reshape((20,2))

#do the same thing to log data
mat_log = np.zeros((700640, 20,2))
arr_log = np.array(log.values[:,1])
for i in range(700640):
    mat_log[i] = np.array(arr_log[i].split(" ")).reshape((20,2))

In [8]:
#split label and pred
temp_moe = mat_moe.reshape((700640*20,2))
temp_log = mat_log.reshape((700640*20,2))
label_moe = temp_moe[:,0].astype(np.int)
label_log = temp_log[:,0].astype(np.int)
pred_moe = temp_moe[:,1].astype(np.float16)
pred_log = temp_log[:,1].astype(np.float16)

In [9]:
#merge with weights
pred = (pred_moe*w_moe + pred_log*w_log)/(w_moe + w_log)

In [10]:
new = np.concatenate((label_moe.reshape((14012800,1)).astype(np.str),pred.reshape((14012800,1)).astype(np.str)),axis=1)

In [11]:
new_ = (new.reshape((700640,40))).tolist()
col = []

In [12]:
for i in range(700640):
    col.append(" ".join(new_[i]))

In [29]:
saver = pd.DataFrame({"VideoId":list(moe.values[:,0]),"LabelConfidencePairs":col}, columns=["VideoId","LabelConfidencePairs"])


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-29-c716cd518527> in <module>()
----> 1 saver = pd.DataFrame({"VideoId":list(moe.values[:,0]),"LabelConfidencePairs":col}, columns=["VideoId","LabelConfidencePairs"], index=False)

/home/jhz/anaconda2/lib/python2.7/site-packages/pandas/core/frame.pyc in __init__(self, data, index, columns, dtype, copy)
    264                                  dtype=dtype, copy=copy)
    265         elif isinstance(data, dict):
--> 266             mgr = self._init_dict(data, index, columns, dtype=dtype)
    267         elif isinstance(data, ma.MaskedArray):
    268             import numpy.ma.mrecords as mrecords

/home/jhz/anaconda2/lib/python2.7/site-packages/pandas/core/frame.pyc in _init_dict(self, data, index, columns, dtype)
    368 
    369             else:
--> 370                 index = _ensure_index(index)
    371 
    372             arrays = []

/home/jhz/anaconda2/lib/python2.7/site-packages/pandas/indexes/base.pyc in _ensure_index(index_like, copy)
   3663             index_like = copy(index_like)
   3664 
-> 3665     return Index(index_like)
   3666 
   3667 

/home/jhz/anaconda2/lib/python2.7/site-packages/pandas/indexes/base.pyc in __new__(cls, data, dtype, copy, name, fastpath, tupleize_cols, **kwargs)
    291                          **kwargs)
    292         elif data is None or is_scalar(data):
--> 293             cls._scalar_data_error(data)
    294         else:
    295             if (tupleize_cols and isinstance(data, list) and data and

/home/jhz/anaconda2/lib/python2.7/site-packages/pandas/indexes/base.pyc in _scalar_data_error(cls, data)
    581         raise TypeError('{0}(...) must be called with a collection of some '
    582                         'kind, {1} was passed'.format(cls.__name__,
--> 583                                                       repr(data)))
    584 
    585     @classmethod

TypeError: Index(...) must be called with a collection of some kind, False was passed

In [31]:
saver.to_csv("merge.csv", index=False)

In [30]:
#moe
saver


Out[30]:
VideoId LabelConfidencePairs
0 100011194 1 0.95458984375 2292 0.91455078125 4 0.8818359...
1 100253546 112 0.95458984375 77 0.93408203125 142 0.91308...
2 100347260 46 0.8564453125 7 0.77392578125 3 0.4858398437...
3 100436527 1 0.46044921875 85 0.27197265625 62 0.09295654...
4 100573509 14 0.798828125 7 0.72412109375 24 0.2658691406...
5 100122848 387 0.98779296875 20 0.94140625 89 0.46875 424...
6 100306012 2 0.9716796875 0 0.923828125 237 0.50390625 19...
7 100404872 102 0.564453125 209 0.235107421875 142 0.19921...
8 100049134 142 0.98681640625 112 0.84375 59 0.7294921875 ...
9 100311706 8 0.99169921875 0 0.77587890625 65 0.666503906...
10 100007070 56 0.7021484375 106 0.56640625 162 0.120666503...
11 100445434 8 0.943359375 0 0.46923828125 21 0.07818603515...
12 100085312 5 0.77294921875 3 0.288818359375 38 0.10174560...
13 100319764 616 0.322265625 260 0.132934570312 6 0.0857543...
14 100041402 1 0.99462890625 4 0.86181640625 360 0.30908203...
15 100301065 8 0.51611328125 18 0.39794921875 81 0.21704101...
16 100176684 78 0.990234375 190 0.83349609375 9 0.663574218...
17 100046077 61 0.53857421875 148 0.235961914062 9 0.106872...
18 100303689 1 1.0 124 0.99609375 156 0.99365234375 174 0.7...
19 100019665 3 0.97119140625 13 0.87451171875 7 0.81640625 ...
20 100234130 1503 0.99560546875 0 0.98583984375 19 0.433593...
21 100460839 0 0.99560546875 159 0.57080078125 2 0.47045898...
22 100388528 35 1.0 0 0.9990234375 19 0.50146484375 2 0.353...
23 100032532 773 0.99658203125 638 0.99365234375 2 0.978515...
24 100647289 31 0.0982055664062 5 0.0768432617188 110 0.071...
25 100182650 2 0.849609375 0 0.76611328125 4311 0.654296875...
26 100407676 7 1.0 13 0.9990234375 38 0.99560546875 3 0.988...
27 100257529 6 0.73876953125 0 0.69677734375 2 0.4243164062...
28 100194970 2 0.71728515625 55 0.68310546875 0 0.351318359...
29 100155245 10 1.0 132 0.99267578125 22 0.8076171875 2573 ...
... ... ...
700610 100563419 56 0.67578125 106 0.5068359375 45 0.2236328125...
700611 100232467 1 0.78662109375 392 0.66015625 160 0.594238281...
700612 100182196 62 0.99560546875 1 0.9072265625 64 0.786621093...
700613 100375816 0 0.131469726562 272 0.0791625976562 72 0.0671...
700614 100190958 59 0.6953125 112 0.53076171875 5 0.43481445312...
700615 100130494 394 0.327392578125 110 0.0413513183594 2 0.040...
700616 100656289 12 0.88427734375 86 0.385498046875 867 0.18017...
700617 100329783 15 0.92431640625 6 0.87353515625 61 0.03460693...
700618 100523663 0 0.98681640625 2176 0.9697265625 2 0.96386718...
700619 100667210 3 1.0 7 0.73095703125 30 0.59130859375 16 0.56...
700620 100466760 0 0.67138671875 724 0.494140625 2 0.3999023437...
700621 100682168 3 0.9990234375 16 0.416259765625 68 0.26953125...
700622 100530157 59 1.0 789 0.80419921875 177 0.52978515625 183...
700623 100358427 4 1.0 1 0.9990234375 70 0.98291015625 526 0.19...
700624 100473836 603 0.99658203125 51 0.93115234375 732 0.83447...
700625 100352657 75 0.998046875 977 0.94384765625 1619 0.769042...
700626 100444298 90 0.99951171875 0 0.99072265625 8 0.978515625...
700627 100668832 15 0.388916015625 6 0.30615234375 343 0.083801...
700628 100327011 3 0.998046875 16 0.56640625 95 0.368408203125 ...
700629 100267863 456 0.998046875 1 0.99169921875 11 0.890625 4 ...
700630 100434129 159 0.9677734375 0 0.728515625 19 0.3627929687...
700631 100179511 323 0.78271484375 99 0.215209960938 25 0.19079...
700632 100461097 0 0.970703125 35 0.9296875 2 0.26123046875 19 ...
700633 100085382 176 0.50830078125 0 0.419677734375 55 0.365722...
700634 100299158 12 1.0 52 0.99560546875 26 0.99560546875 32 0....
700635 100410121 1866 0.9814453125 6 0.880859375 15 0.821777343...
700636 100403404 12 0.200927734375 23 0.0332336425781 29 0.0266...
700637 100002891 410 0.281982421875 49 0.176513671875 3 0.10882...
700638 100676681 0 0.27685546875 2 0.106506347656 31 0.08178710...
700639 100144838 3 1.0 16 0.13916015625 5 0.0278167724609 38 0....

700640 rows × 2 columns


In [ ]: