In [3]:
import numpy as np
import random
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb
%matplotlib inline
In [4]:
path = '../../data/gridsearch_basemodel2_on_cross_val.csv'
df = pd.read_csv(path)
In [5]:
def build_roc(df):
df['TPR'] = df['recall']
df['FPR'] = df['FP']/(df['FP'] + df['TN'])
# plt.plot([0,1],[0,1],'k',linewidth=0.5)
plt.figure()
plt.plot(df.FPR.values,df.TPR.values,'r',linewidth=2)
plt.xlabel('False Positive Rate')
plt.xlim([0,1])
plt.ylabel('True Positive Rate')
plt.ylim([0,1])
titlestr = "Doc2Vec ROC Curve".format(np.trapz(df.TPR.values[::-1],x=df.FPR.values[::-1]),int(df.k.unique()))
plt.title(titlestr)
In [6]:
build_roc(df[df['k']==11])
plt.savefig('doc2vecROC.jpg')
/home/mgupta/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py:3: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
app.launch_new_instance()
/home/mgupta/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py:4: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
In [5]:
for k in df.k.unique():
build_roc(df[df['k']==k])
/home/mgupta/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py:3: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
app.launch_new_instance()
/home/mgupta/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py:4: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
In [9]:
df[df['k']==11]
Out[9]:
Unnamed: 0
k
threshold
accuracy
recall
precision
TP
TN
FN
FP
100
100
11.0
0.1
0.5037
1.000000
0.503551
5034.0
3.0
0.0
4963.0
101
101
11.0
0.2
0.5073
0.999801
0.505372
5033.0
40.0
1.0
4926.0
102
102
11.0
0.3
0.5283
0.993047
0.516372
4999.0
284.0
35.0
4682.0
103
103
11.0
0.4
0.6077
0.964640
0.564586
4856.0
1221.0
178.0
3745.0
104
104
11.0
0.5
0.7206
0.888955
0.666915
4475.0
2731.0
559.0
2235.0
105
105
11.0
0.6
0.7843
0.752880
0.805869
3790.0
4053.0
1244.0
913.0
106
106
11.0
0.7
0.7588
0.571315
0.918850
2876.0
4712.0
2158.0
254.0
107
107
11.0
0.8
0.6656
0.347835
0.966336
1751.0
4905.0
3283.0
61.0
108
108
11.0
0.9
0.5529
0.113031
0.989565
569.0
4960.0
4465.0
6.0
109
109
11.0
1.0
0.5000
0.006754
1.000000
34.0
4966.0
5000.0
0.0
In [6]:
df
Out[6]:
Unnamed: 0
k
threshold
accuracy
recall
precision
TP
TN
FN
FP
0
0
1.0
0.1
0.5309
0.945769
0.518684
4761.0
548.0
273.0
4418.0
1
1
1.0
0.2
0.5279
0.942988
0.517046
4747.0
532.0
287.0
4434.0
2
2
1.0
0.3
0.5315
0.946166
0.519015
4763.0
552.0
271.0
4414.0
3
3
1.0
0.4
0.5308
0.944180
0.518660
4753.0
555.0
281.0
4411.0
4
4
1.0
0.5
0.5295
0.942988
0.517949
4747.0
548.0
287.0
4418.0
5
5
1.0
0.6
0.5305
0.944378
0.518486
4754.0
551.0
280.0
4415.0
6
6
1.0
0.7
0.5302
0.943981
0.518325
4752.0
550.0
282.0
4416.0
7
7
1.0
0.8
0.5310
0.944577
0.518765
4755.0
555.0
279.0
4411.0
8
8
1.0
0.9
0.5328
0.946166
0.519751
4763.0
565.0
271.0
4401.0
9
9
1.0
1.0
0.5309
0.945371
0.518692
4759.0
550.0
275.0
4416.0
10
10
2.0
0.1
0.5117
0.980532
0.507767
4936.0
181.0
98.0
4785.0
11
11
2.0
0.2
0.5115
0.980532
0.507662
4936.0
179.0
98.0
4787.0
12
12
2.0
0.3
0.5111
0.979340
0.507463
4930.0
181.0
104.0
4785.0
13
13
2.0
0.4
0.5113
0.979738
0.507564
4932.0
181.0
102.0
4785.0
14
14
2.0
0.5
0.5130
0.980930
0.508443
4938.0
192.0
96.0
4774.0
15
15
2.0
0.6
0.6524
0.841081
0.612735
4234.0
2290.0
800.0
2676.0
16
16
2.0
0.7
0.6526
0.839293
0.613208
4225.0
2301.0
809.0
2665.0
17
17
2.0
0.8
0.6502
0.841279
0.610759
4235.0
2267.0
799.0
2699.0
18
18
2.0
0.9
0.6529
0.843067
0.612852
4244.0
2285.0
790.0
2681.0
19
19
2.0
1.0
0.6556
0.839690
0.615822
4227.0
2329.0
807.0
2637.0
20
20
3.0
0.1
0.5054
0.993246
0.504439
5000.0
54.0
34.0
4912.0
21
21
3.0
0.2
0.5058
0.993842
0.504640
5003.0
55.0
31.0
4911.0
22
22
3.0
0.3
0.5041
0.992451
0.503781
4996.0
45.0
38.0
4921.0
23
23
3.0
0.4
0.5923
0.932459
0.556755
4694.0
1229.0
340.0
3737.0
24
24
3.0
0.5
0.5910
0.932062
0.555924
4692.0
1218.0
342.0
3748.0
25
25
3.0
0.6
0.5906
0.932062
0.555661
4692.0
1214.0
342.0
3752.0
26
26
3.0
0.7
0.7296
0.705205
0.744235
3550.0
3746.0
1484.0
1220.0
27
27
3.0
0.8
0.7255
0.701232
0.739887
3530.0
3725.0
1504.0
1241.0
28
28
3.0
0.9
0.7287
0.706794
0.742023
3558.0
3729.0
1476.0
1237.0
29
29
3.0
1.0
0.7283
0.703615
0.743025
3542.0
3741.0
1492.0
1225.0
...
...
...
...
...
...
...
...
...
...
...
90
90
10.0
0.1
0.5034
1.000000
0.503400
5034.0
0.0
0.0
4966.0
91
91
10.0
0.2
0.5038
1.000000
0.503601
5034.0
4.0
0.0
4962.0
92
92
10.0
0.3
0.5116
0.999007
0.507570
5029.0
87.0
5.0
4879.0
93
93
10.0
0.4
0.5532
0.987485
0.530183
4971.0
561.0
63.0
4405.0
94
94
10.0
0.5
0.6599
0.941597
0.604053
4740.0
1859.0
294.0
3107.0
95
95
10.0
0.6
0.7507
0.833135
0.717291
4194.0
3313.0
840.0
1653.0
96
96
10.0
0.7
0.7797
0.672626
0.859173
3386.0
4411.0
1648.0
555.0
97
97
10.0
0.8
0.7140
0.460270
0.941870
2317.0
4823.0
2717.0
143.0
98
98
10.0
0.9
0.6001
0.210171
0.978723
1058.0
4943.0
3976.0
23.0
99
99
10.0
1.0
0.5145
0.035757
0.994475
180.0
4965.0
4854.0
1.0
100
100
11.0
0.1
0.5037
1.000000
0.503551
5034.0
3.0
0.0
4963.0
101
101
11.0
0.2
0.5073
0.999801
0.505372
5033.0
40.0
1.0
4926.0
102
102
11.0
0.3
0.5283
0.993047
0.516372
4999.0
284.0
35.0
4682.0
103
103
11.0
0.4
0.6077
0.964640
0.564586
4856.0
1221.0
178.0
3745.0
104
104
11.0
0.5
0.7206
0.888955
0.666915
4475.0
2731.0
559.0
2235.0
105
105
11.0
0.6
0.7843
0.752880
0.805869
3790.0
4053.0
1244.0
913.0
106
106
11.0
0.7
0.7588
0.571315
0.918850
2876.0
4712.0
2158.0
254.0
107
107
11.0
0.8
0.6656
0.347835
0.966336
1751.0
4905.0
3283.0
61.0
108
108
11.0
0.9
0.5529
0.113031
0.989565
569.0
4960.0
4465.0
6.0
109
109
11.0
1.0
0.5000
0.006754
1.000000
34.0
4966.0
5000.0
0.0
110
110
12.0
0.1
0.5035
1.000000
0.503450
5034.0
1.0
0.0
4965.0
111
111
12.0
0.2
0.5050
1.000000
0.504207
5034.0
16.0
0.0
4950.0
112
112
12.0
0.3
0.5161
0.996822
0.509908
5018.0
143.0
16.0
4823.0
113
113
12.0
0.4
0.5679
0.980930
0.538906
4938.0
741.0
96.0
4225.0
114
114
12.0
0.5
0.6785
0.932459
0.620161
4694.0
2091.0
340.0
2875.0
115
115
12.0
0.6
0.7806
0.654350
0.878869
3294.0
4512.0
1740.0
454.0
116
116
12.0
0.7
0.7163
0.458880
0.953364
2310.0
4853.0
2724.0
113.0
117
117
12.0
0.8
0.6118
0.232817
0.983221
1172.0
4946.0
3862.0
20.0
118
118
12.0
0.9
0.5113
0.029201
1.000000
147.0
4966.0
4887.0
0.0
119
119
12.0
1.0
0.4966
0.000000
NaN
0.0
4966.0
5034.0
0.0
120 rows × 10 columns
In [1]:
path2 = '../../data/gridsearch_basemodel2_on_cross_val.csv'
df2 = pd.read_csv(path2)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-1-abef0e62fb3e> in <module>()
1 path2 = '../../data/gridsearch_basemodel2_on_cross_val.csv'
----> 2 df2 = pd.read_csv(path2)
NameError: name 'pd' is not defined
In [31]:
for k in df2.k.unique():
build_roc(df2[df2['k']==k])
/home/mgupta/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py:3: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
app.launch_new_instance()
/home/mgupta/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py:4: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
In [ ]:
Content source: mgupta011235/TweetSafe
Similar notebooks: