In [1]:
print(__doc__)

import numpy as np
import matplotlib.pyplot as plt

import pandas as pd

from sklearn.preprocessing import label_binarize
from sklearn.metrics import confusion_matrix

%matplotlib inline

dir = '/Users/chanjinpark/GitHub/NRFAnalysis/'
df = pd.read_csv(dir + 'data/predlabel.txt')
df.columns = ['docid', 'prediction', 'label']
df.info


Out[1]:
<bound method DataFrame.info of                     docid  prediction  label
0        2013R1A1A1005147        22.0   22.0
1        2013R1A1A1005486        15.0   15.0
2        2013R1A1A1005731        23.0   23.0
3        2013R1A1A1006062         6.0   17.0
4        2013R1A1A1006606         3.0    4.0
5        2013R1A1A1006701         6.0    6.0
6        2013R1A1A1007068         6.0    6.0
7        2013R1A1A1007084         6.0    6.0
8        2013R1A1A1007475        10.0   17.0
9        2013R1A1A1007541         1.0    1.0
10       2013R1A1A1007615         5.0   15.0
11       2013R1A1A1007658         2.0   15.0
12       2013R1A1A1007706         5.0   15.0
13       2013R1A1A1008098        23.0   23.0
14       2013R1A1A1008154        12.0   15.0
15       2013R1A1A1008228        26.0    4.0
16       2013R1A1A1008431        15.0   24.0
17       2013R1A1A1008686         0.0    0.0
18       2013R1A1A1008910         3.0    4.0
19       2013R1A1A1009085         8.0    8.0
20       2013R1A1A1009259        17.0   17.0
21       2013R1A1A1009437         3.0   15.0
22       2013R1A1A1009571         7.0    7.0
23       2013R1A1A1009630        22.0   22.0
24       2013R1A1A1009816         2.0    9.0
25       2013R1A1A1009840        10.0   10.0
26       2013R1A1A1010004         1.0    1.0
27       2013R1A1A1010171        11.0   11.0
28       2013R1A1A1010185         8.0    8.0
29       2013R1A1A1010268        15.0   15.0
...                   ...         ...    ...
2369  2015R1D1A3A01020444        10.0   10.0
2370  2015R1D1A3A01020450         3.0    3.0
2371  2015R1D1A3A01020539        22.0   22.0
2372  2015R1D1A3A01020625        15.0    5.0
2373  2015R1D1A3A01020635         8.0    5.0
2374  2015R1D1A3A01020651         3.0   12.0
2375  2015R1D1A3A01020679         5.0    4.0
2376  2015R1D1A3A01020750         1.0    9.0
2377  2015R1D1A3A01020817        22.0   17.0
2378  2015R1D1A3A03015653        15.0   14.0
2379  2015R1D1A3A03020569        10.0   17.0
2380  2015R1D1A4A01016293         8.0    4.0
2381  2015R1D1A4A01016406         5.0    5.0
2382  2015R1D1A4A01016640        15.0    5.0
2383  2015R1D1A4A01016662         5.0    8.0
2384  2015R1D1A4A01017672         5.0    3.0
2385  2015R1D1A4A01018668        22.0   18.0
2386  2015R1D1A4A01019028        15.0    5.0
2387  2015R1D1A4A01019211         3.0    3.0
2388  2015R1D1A4A01019610         7.0    7.0
2389  2015R1D1A4A01019646         8.0   12.0
2390  2015R1D1A4A01019759         5.0    8.0
2391  2015R1D1A4A01019835         1.0    1.0
2392  2015R1D1A4A01019937        20.0   20.0
2393  2015R1D1A4A01019960         6.0    6.0
2394  2015R1D1A4A01020104        15.0    8.0
2395  2015R1D1A4A01020317        26.0   26.0
2396  2015R1D1A4A01020352        10.0   10.0
2397  2015R1D1A4A01020665        22.0   22.0
2398  2015R1D1A4A01020961         6.0    6.0

[2399 rows x 3 columns]>

In [ ]: