In [3]:
import numpy as np
import random
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb
%matplotlib inline
In [4]:
randnums = np.random.randint(100,size=(10,))
print randnums
In [5]:
sortind = np.argsort(randnums)
print sortind
In [6]:
sortind[-3:]
Out[6]:
In [7]:
randnums[sortind[-3:]]
Out[7]:
In [8]:
data = [1, 0.5,0.5,0.5,100,200,300,400]
labels = ['k','accuracy','recall','precision','TP','TN','FN','FP']
In [9]:
ser1 = pd.DataFrame(data=data,index=labels).T
In [10]:
print ser1
In [11]:
df = pd.DataFrame()
for i in xrange(5):
df = df.add(ser1)
print df
In [12]:
df = pd.DataFrame(columns=labels)
for i in xrange(5):
df.loc[i] = data
print df
In [13]:
path = '../../data/varying_K_on_cross_val.csv'
df = pd.read_csv(path)
In [14]:
df.columns
Out[14]:
In [15]:
df.drop(labels='Unnamed: 0',axis=1,inplace=True)
In [16]:
df
Out[16]:
In [66]:
x = df.k.values[1:12]
for col in df.columns[1:4]:
plt.plot(x,df[col].values[1:12],'*',label=col,markersize=8)
plt.legend()
plt.xlabel('k')
plt.ylabel('%')
Out[66]:
In [67]:
for col in df.columns[4:]:
plt.plot(x,df[col].values[1:12],'*',label=col,markersize=8)
plt.legend()
plt.xlabel('k')
plt.ylabel('%')
Out[67]:
In [67]:
def build_roc(df):
df['TPR'] = df['recall']
df['FPR'] = df['FP']/(df['FP'] + df['TN'])
plt.plot([0,1],[0,1],'k',linewidth=0.5)
plt.plot(df.FPR.values,df.TPR.values,'r*',markersize=7)
plt.xlabel('FPR')
plt.xlim([0,1])
plt.ylabel('TPR')
plt.ylim([0,1])
titlestr = "AUC: {}".format(np.trapz(df.TPR.values[::-1],x=df.FPR.values[::-1]))
plt.title(titlestr)
# print "TPR: ",df.TPR
# print "FPR: ",df.FPR
In [73]:
build_roc(df.loc[1:,:])
In [51]:
df.TPR.values[::-1]
In [71]:
def seperatePunct(incomingString):
newstring = incomingString
newstring = newstring.replace("!"," ! ")
newstring = newstring.replace("@"," @ ")
newstring = newstring.replace("#"," # ")
newstring = newstring.replace("$"," $ ")
newstring = newstring.replace("%"," % ")
newstring = newstring.replace("^"," ^ ")
newstring = newstring.replace("&"," & ")
newstring = newstring.replace("*"," * ")
newstring = newstring.replace("("," ( ")
newstring = newstring.replace(")"," ) ")
newstring = newstring.replace("+"," + ")
newstring = newstring.replace("="," = ")
newstring = newstring.replace("?"," ? ")
newstring = newstring.replace("\'"," \' ")
newstring = newstring.replace("\""," \" ")
newstring = newstring.replace("{"," { ")
newstring = newstring.replace("}"," } ")
newstring = newstring.replace("["," [ ")
newstring = newstring.replace("]"," ] ")
newstring = newstring.replace("<"," < ")
newstring = newstring.replace(">"," > ")
newstring = newstring.replace("~"," ~ ")
newstring = newstring.replace("`"," ` ")
newstring = newstring.replace(":"," : ")
newstring = newstring.replace(";"," ; ")
newstring = newstring.replace("|"," | ")
newstring = newstring.replace("\\"," \\ ")
newstring = newstring.replace("/"," / ")
return newstring
In [72]:
def seperatePunct2(incomingString):
outstr = ''
characters = set(['!','@','#','$',"%","^","&","*",":","\\",
"(",")","+","=","?","\'","\"",";","/",
"{","}","[","]","<",">","~","`","|"])
for char in incomingString:
if char in characters:
outstr = outstr + ' ' + char + ' '
else:
outstr = outstr + char
return outstr
In [76]:
cases = ['dsfj"kfl', '"lsfkd','fldsjfk"','lsfjdksf""fjdslf', '"lsfkd','fldsjfk"',
'dsfj"kfl "lsfkd fldsjfk" lsfjdksf""fjdslf "lsfkd fldsjfk"',]
for case in cases:
print seperatePunct2(case).split(' ')
print ""
In [ ]: