In [3]:
import numpy as np
import random 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb
%matplotlib inline

In [4]:
path = '../../data/gridsearch_basemodel2_on_cross_val.csv'
df = pd.read_csv(path)

In [5]:
def build_roc(df):
    
    df['TPR'] = df['recall']
    df['FPR'] = df['FP']/(df['FP'] + df['TN'])
    
#     plt.plot([0,1],[0,1],'k',linewidth=0.5)
    plt.figure()
    plt.plot(df.FPR.values,df.TPR.values,'r',linewidth=2)
    plt.xlabel('False Positive Rate')
    plt.xlim([0,1])
    plt.ylabel('True Positive Rate')
    plt.ylim([0,1])
    titlestr = "Doc2Vec ROC Curve".format(np.trapz(df.TPR.values[::-1],x=df.FPR.values[::-1]),int(df.k.unique()))
    plt.title(titlestr)

In [6]:
build_roc(df[df['k']==11])
plt.savefig('doc2vecROC.jpg')


/home/mgupta/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()
/home/mgupta/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py:4: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

In [5]:
for k in df.k.unique():
    build_roc(df[df['k']==k])


/home/mgupta/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()
/home/mgupta/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py:4: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

In [9]:
df[df['k']==11]


Out[9]:
Unnamed: 0 k threshold accuracy recall precision TP TN FN FP
100 100 11.0 0.1 0.5037 1.000000 0.503551 5034.0 3.0 0.0 4963.0
101 101 11.0 0.2 0.5073 0.999801 0.505372 5033.0 40.0 1.0 4926.0
102 102 11.0 0.3 0.5283 0.993047 0.516372 4999.0 284.0 35.0 4682.0
103 103 11.0 0.4 0.6077 0.964640 0.564586 4856.0 1221.0 178.0 3745.0
104 104 11.0 0.5 0.7206 0.888955 0.666915 4475.0 2731.0 559.0 2235.0
105 105 11.0 0.6 0.7843 0.752880 0.805869 3790.0 4053.0 1244.0 913.0
106 106 11.0 0.7 0.7588 0.571315 0.918850 2876.0 4712.0 2158.0 254.0
107 107 11.0 0.8 0.6656 0.347835 0.966336 1751.0 4905.0 3283.0 61.0
108 108 11.0 0.9 0.5529 0.113031 0.989565 569.0 4960.0 4465.0 6.0
109 109 11.0 1.0 0.5000 0.006754 1.000000 34.0 4966.0 5000.0 0.0

In [6]:
df


Out[6]:
Unnamed: 0 k threshold accuracy recall precision TP TN FN FP
0 0 1.0 0.1 0.5309 0.945769 0.518684 4761.0 548.0 273.0 4418.0
1 1 1.0 0.2 0.5279 0.942988 0.517046 4747.0 532.0 287.0 4434.0
2 2 1.0 0.3 0.5315 0.946166 0.519015 4763.0 552.0 271.0 4414.0
3 3 1.0 0.4 0.5308 0.944180 0.518660 4753.0 555.0 281.0 4411.0
4 4 1.0 0.5 0.5295 0.942988 0.517949 4747.0 548.0 287.0 4418.0
5 5 1.0 0.6 0.5305 0.944378 0.518486 4754.0 551.0 280.0 4415.0
6 6 1.0 0.7 0.5302 0.943981 0.518325 4752.0 550.0 282.0 4416.0
7 7 1.0 0.8 0.5310 0.944577 0.518765 4755.0 555.0 279.0 4411.0
8 8 1.0 0.9 0.5328 0.946166 0.519751 4763.0 565.0 271.0 4401.0
9 9 1.0 1.0 0.5309 0.945371 0.518692 4759.0 550.0 275.0 4416.0
10 10 2.0 0.1 0.5117 0.980532 0.507767 4936.0 181.0 98.0 4785.0
11 11 2.0 0.2 0.5115 0.980532 0.507662 4936.0 179.0 98.0 4787.0
12 12 2.0 0.3 0.5111 0.979340 0.507463 4930.0 181.0 104.0 4785.0
13 13 2.0 0.4 0.5113 0.979738 0.507564 4932.0 181.0 102.0 4785.0
14 14 2.0 0.5 0.5130 0.980930 0.508443 4938.0 192.0 96.0 4774.0
15 15 2.0 0.6 0.6524 0.841081 0.612735 4234.0 2290.0 800.0 2676.0
16 16 2.0 0.7 0.6526 0.839293 0.613208 4225.0 2301.0 809.0 2665.0
17 17 2.0 0.8 0.6502 0.841279 0.610759 4235.0 2267.0 799.0 2699.0
18 18 2.0 0.9 0.6529 0.843067 0.612852 4244.0 2285.0 790.0 2681.0
19 19 2.0 1.0 0.6556 0.839690 0.615822 4227.0 2329.0 807.0 2637.0
20 20 3.0 0.1 0.5054 0.993246 0.504439 5000.0 54.0 34.0 4912.0
21 21 3.0 0.2 0.5058 0.993842 0.504640 5003.0 55.0 31.0 4911.0
22 22 3.0 0.3 0.5041 0.992451 0.503781 4996.0 45.0 38.0 4921.0
23 23 3.0 0.4 0.5923 0.932459 0.556755 4694.0 1229.0 340.0 3737.0
24 24 3.0 0.5 0.5910 0.932062 0.555924 4692.0 1218.0 342.0 3748.0
25 25 3.0 0.6 0.5906 0.932062 0.555661 4692.0 1214.0 342.0 3752.0
26 26 3.0 0.7 0.7296 0.705205 0.744235 3550.0 3746.0 1484.0 1220.0
27 27 3.0 0.8 0.7255 0.701232 0.739887 3530.0 3725.0 1504.0 1241.0
28 28 3.0 0.9 0.7287 0.706794 0.742023 3558.0 3729.0 1476.0 1237.0
29 29 3.0 1.0 0.7283 0.703615 0.743025 3542.0 3741.0 1492.0 1225.0
... ... ... ... ... ... ... ... ... ... ...
90 90 10.0 0.1 0.5034 1.000000 0.503400 5034.0 0.0 0.0 4966.0
91 91 10.0 0.2 0.5038 1.000000 0.503601 5034.0 4.0 0.0 4962.0
92 92 10.0 0.3 0.5116 0.999007 0.507570 5029.0 87.0 5.0 4879.0
93 93 10.0 0.4 0.5532 0.987485 0.530183 4971.0 561.0 63.0 4405.0
94 94 10.0 0.5 0.6599 0.941597 0.604053 4740.0 1859.0 294.0 3107.0
95 95 10.0 0.6 0.7507 0.833135 0.717291 4194.0 3313.0 840.0 1653.0
96 96 10.0 0.7 0.7797 0.672626 0.859173 3386.0 4411.0 1648.0 555.0
97 97 10.0 0.8 0.7140 0.460270 0.941870 2317.0 4823.0 2717.0 143.0
98 98 10.0 0.9 0.6001 0.210171 0.978723 1058.0 4943.0 3976.0 23.0
99 99 10.0 1.0 0.5145 0.035757 0.994475 180.0 4965.0 4854.0 1.0
100 100 11.0 0.1 0.5037 1.000000 0.503551 5034.0 3.0 0.0 4963.0
101 101 11.0 0.2 0.5073 0.999801 0.505372 5033.0 40.0 1.0 4926.0
102 102 11.0 0.3 0.5283 0.993047 0.516372 4999.0 284.0 35.0 4682.0
103 103 11.0 0.4 0.6077 0.964640 0.564586 4856.0 1221.0 178.0 3745.0
104 104 11.0 0.5 0.7206 0.888955 0.666915 4475.0 2731.0 559.0 2235.0
105 105 11.0 0.6 0.7843 0.752880 0.805869 3790.0 4053.0 1244.0 913.0
106 106 11.0 0.7 0.7588 0.571315 0.918850 2876.0 4712.0 2158.0 254.0
107 107 11.0 0.8 0.6656 0.347835 0.966336 1751.0 4905.0 3283.0 61.0
108 108 11.0 0.9 0.5529 0.113031 0.989565 569.0 4960.0 4465.0 6.0
109 109 11.0 1.0 0.5000 0.006754 1.000000 34.0 4966.0 5000.0 0.0
110 110 12.0 0.1 0.5035 1.000000 0.503450 5034.0 1.0 0.0 4965.0
111 111 12.0 0.2 0.5050 1.000000 0.504207 5034.0 16.0 0.0 4950.0
112 112 12.0 0.3 0.5161 0.996822 0.509908 5018.0 143.0 16.0 4823.0
113 113 12.0 0.4 0.5679 0.980930 0.538906 4938.0 741.0 96.0 4225.0
114 114 12.0 0.5 0.6785 0.932459 0.620161 4694.0 2091.0 340.0 2875.0
115 115 12.0 0.6 0.7806 0.654350 0.878869 3294.0 4512.0 1740.0 454.0
116 116 12.0 0.7 0.7163 0.458880 0.953364 2310.0 4853.0 2724.0 113.0
117 117 12.0 0.8 0.6118 0.232817 0.983221 1172.0 4946.0 3862.0 20.0
118 118 12.0 0.9 0.5113 0.029201 1.000000 147.0 4966.0 4887.0 0.0
119 119 12.0 1.0 0.4966 0.000000 NaN 0.0 4966.0 5034.0 0.0

120 rows × 10 columns


In [1]:
path2 = '../../data/gridsearch_basemodel2_on_cross_val.csv'
df2 = pd.read_csv(path2)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-abef0e62fb3e> in <module>()
      1 path2 = '../../data/gridsearch_basemodel2_on_cross_val.csv'
----> 2 df2 = pd.read_csv(path2)

NameError: name 'pd' is not defined

In [31]:
for k in df2.k.unique():
    build_roc(df2[df2['k']==k])


/home/mgupta/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()
/home/mgupta/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py:4: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

In [ ]: