In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sb
from operator import itemgetter
%matplotlib inline

In [3]:
results = {'eval': {'auc': [0.499306, 0.494721, 0.488865, 0.478342, 0.5674]},
 'train': {'auc': [0.557143, 0.632473, 0.715275, 0.74478, 0.780549]}}

labels = ['max_depth','eta','num_rounds','eval_reslts']
data = [[3,0.1,100,results],[3,0.1,100,results],[3,0.1,100,results]]
df = pd.DataFrame(data=data,columns=labels)

In [4]:
df


Out[4]:
max_depth eta num_rounds eval_reslts
0 3 0.1 100 {u'train': {u'auc': [0.557143, 0.632473, 0.715...
1 3 0.1 100 {u'train': {u'auc': [0.557143, 0.632473, 0.715...
2 3 0.1 100 {u'train': {u'auc': [0.557143, 0.632473, 0.715...

In [2]:
def myfunc(x):
    
    val = np.array(x['eval']['auc'])
    maxInd = np.argmax(val)
    maxVal = val[maxInd]
    
    return (maxInd,maxVal)

In [21]:
test = df['eval_reslts'].loc[0]
print test['eval'].values


<built-in method values of dict object at 0x7fc0bbb6d168>

In [22]:
test = df['eval_reslts'].loc[0]
myfunc(test)


Out[22]:
(4, 0.56740000000000002)

In [24]:
df['eval_auc'] = df['eval_reslts'].map(lambda x: myfunc(x))

In [25]:
df


Out[25]:
max_depth eta num_rounds eval_reslts eval_auc
0 3 0.1 100 {u'train': {u'auc': [0.557143, 0.632473, 0.715... (4, 0.5674)
1 3 0.1 100 {u'train': {u'auc': [0.557143, 0.632473, 0.715... (4, 0.5674)
2 3 0.1 100 {u'train': {u'auc': [0.557143, 0.632473, 0.715... (4, 0.5674)

In [2]:
def build_roc(df):
    
    df['TPR'] = df['recall']
    df['FPR'] = df['FP']/(df['FP'] + df['TN'])
    
#     plt.plot([0,1],[0,1],'k',linewidth=0.5)
    plt.figure()
    plt.plot(df.FPR.values,df.TPR.values,'r*',markersize=7)
    plt.xlabel('FPR')
    plt.xlim([0,1])
    plt.ylabel('TPR')
    plt.ylim([0,1])
    titlestr = "AUC: {}  k = {}".format(np.trapz(df.TPR.values[::-1],x=df.FPR.values[::-1]),int(df.k.unique()))
    plt.title(titlestr)

In [ ]:
for k in df.k.unique():
    build_roc(df[df['k']==k])

In [3]:
path = '../../data/gridsearch_xgb.csv'

In [4]:
df = pd.read_csv(path)

In [5]:
df.head()


Out[5]:
Unnamed: 0 num_rounds max_depth eta eval_results
0 0 100 3 0.03 {'train': {'auc': ['0.552844', '0.552849', '0....
1 1 300 3 0.03 {'train': {'auc': ['0.552844', '0.552849', '0....
2 2 600 3 0.03 {'train': {'auc': ['0.552844', '0.552849', '0....
3 3 900 3 0.03 {'train': {'auc': ['0.552844', '0.552849', '0....
4 4 100 4 0.03 {'train': {'auc': ['0.564531', '0.564540', '0....

In [23]:
df['eval_results'].ix[0]


Out[23]:
"{'train': {'auc': ['0.552844', '0.552849', '0.552853', '0.552854', '0.564506', '0.564508', '0.564585', '0.570304', '0.570369', '0.570379', '0.574745', '0.574746', '0.574748', '0.574748', '0.574755', '0.574814', '0.577942', '0.577889', '0.577944', '0.591977', '0.591976', '0.591979', '0.595014', '0.605616', '0.609023', '0.609023', '0.609026', '0.609065', '0.615975', '0.616123', '0.616305', '0.616303', '0.616452', '0.616523', '0.616298', '0.616303', '0.648427', '0.648950', '0.648568', '0.652931', '0.652968', '0.652983', '0.653288', '0.653320', '0.657873', '0.657791', '0.657966', '0.657924', '0.658021', '0.658081', '0.658085', '0.658091', '0.663862', '0.663871', '0.663874', '0.663901', '0.665859', '0.668477', '0.668617', '0.668639', '0.668658', '0.668621', '0.668623', '0.670125', '0.670147', '0.670208', '0.669043', '0.671257', '0.671260', '0.677831', '0.680031', '0.680049', '0.680268', '0.680302', '0.680307', '0.680937', '0.680934', '0.682168', '0.682185', '0.682234', '0.682249', '0.683827', '0.685565', '0.686696', '0.686442', '0.687807', '0.687826', '0.687395', '0.686994', '0.687051', '0.687506', '0.690304', '0.690345', '0.690778', '0.690929', '0.691567', '0.691241', '0.696854', '0.696816', '0.697191']}, 'eval': {'auc': ['0.58177', '0.58177', '0.58177', '0.58177', '0.57943', '0.57943', '0.57943', '0.57936', '0.57936', '0.57936', '0.57965', '0.57965', '0.57965', '0.57965', '0.57965', '0.57966', '0.57966', '0.57965', '0.57966', '0.67961', '0.67961', '0.67961', '0.67961', '0.68028', '0.68028', '0.68027', '0.68027', '0.68028', '0.68180', '0.68180', '0.68180', '0.68180', '0.68220', '0.68220', '0.68180', '0.68180', '0.70203', '0.70203', '0.70203', '0.70193', '0.70204', '0.70204', '0.70204', '0.70204', '0.70215', '0.70214', '0.70215', '0.70203', '0.70193', '0.70204', '0.70204', '0.70204', '0.69895', '0.69895', '0.69894', '0.69894', '0.69894', '0.69910', '0.69943', '0.69944', '0.69944', '0.69944', '0.69944', '0.69927', '0.69928', '0.69929', '0.69931', '0.69916', '0.69916', '0.69586', '0.69586', '0.69588', '0.69584', '0.69584', '0.69584', '0.69512', '0.69513', '0.69506', '0.69509', '0.69509', '0.69509', '0.69585', '0.69584', '0.69583', '0.69561', '0.69560', '0.69560', '0.69548', '0.69547', '0.69547', '0.69557', '0.70430', '0.70409', '0.70395', '0.70395', '0.70396', '0.70396', '0.70401', '0.70427', '0.70424']}}"

In [6]:
def findtrain(x):
    badset = {"{","}","'",",","t","r","a","i","n","u","c",":","[","]"}
    outstr = ""
    results = x.split("eval")
    
    for char in results[0]:
        if char not in badset:
            outstr = outstr + char
            
    numbers = outstr.split(" ")
    aucVals = np.array([float(number) for number in numbers[2:-1]])
    maxInd = np.argmax(aucVals)
    maxVal = aucVals[maxInd]
    
    return (maxInd,maxVal)
  
def findbest(x):
    numvals = len(x)
    bestrow = None
    bestVal = None
    
    for row in xrange(numvals):
        if x[row][1] > bestVal:
            bestrow = row
            bestVal = x[row][1]
            
    return bestrow, x[bestrow]


def findeval(x):
    badset = {"{","}","'",",","t","r","a","i","n","u","c",":","[","]"}
    outstr = ""
    results = x.split("eval")
    
    for char in results[1]:
        if char not in badset:
            outstr = outstr + char
            
    numbers = outstr.split(" ")
    aucVals = np.array([float(number) for number in numbers[2:]])
    maxInd = np.argmax(aucVals)
    maxVal = aucVals[maxInd]
    
    return (maxInd,maxVal)

In [7]:
df['eval_auc'] = df['eval_results'].map(lambda x: findeval(x))

In [8]:
df['train_auc'] = df['eval_results'].map(lambda x: findtrain(x))

In [9]:
df


Out[9]:
Unnamed: 0 num_rounds max_depth eta eval_results eval_auc train_auc
0 0 100 3 0.03 {'train': {'auc': ['0.552844', '0.552849', '0.... (91, 0.7043) (99, 0.697191)
1 1 300 3 0.03 {'train': {'auc': ['0.552844', '0.552849', '0.... (284, 0.74588) (299, 0.743419)
2 2 600 3 0.03 {'train': {'auc': ['0.552844', '0.552849', '0.... (596, 0.80778) (599, 0.767899)
3 3 900 3 0.03 {'train': {'auc': ['0.552844', '0.552849', '0.... (661, 0.80968) (897, 0.782626)
4 4 100 4 0.03 {'train': {'auc': ['0.564531', '0.564540', '0.... (97, 0.70845) (98, 0.708373)
5 5 300 4 0.03 {'train': {'auc': ['0.564531', '0.564540', '0.... (292, 0.77796) (299, 0.753988)
6 6 600 4 0.03 {'train': {'auc': ['0.564531', '0.564540', '0.... (522, 0.81082) (599, 0.77964)
7 7 900 4 0.03 {'train': {'auc': ['0.564531', '0.564540', '0.... (899, 0.81823) (899, 0.792982)
8 8 100 5 0.03 {'train': {'auc': ['0.570415', '0.570409', '0.... (91, 0.71407) (99, 0.718837)
9 9 300 5 0.03 {'train': {'auc': ['0.570415', '0.570409', '0.... (280, 0.79814) (299, 0.76341)
10 10 600 5 0.03 {'train': {'auc': ['0.570415', '0.570409', '0.... (388, 0.81023) (599, 0.788445)
11 11 900 5 0.03 {'train': {'auc': ['0.570415', '0.570409', '0.... (881, 0.82387) (899, 0.802094)
12 12 100 6 0.03 {'train': {'auc': ['0.574951', '0.574936', '0.... (99, 0.72097) (97, 0.727374)
13 13 300 6 0.03 {'train': {'auc': ['0.574951', '0.574936', '0.... (290, 0.80857) (299, 0.771393)
14 14 600 6 0.03 {'train': {'auc': ['0.574951', '0.574936', '0.... (596, 0.81735) (599, 0.795562)
15 15 900 6 0.03 {'train': {'auc': ['0.574951', '0.574936', '0.... (837, 0.82755) (899, 0.809408)
16 16 100 3 0.06 {'train': {'auc': ['0.552844', '0.552849', '0.... (98, 0.73037) (99, 0.725216)
17 17 300 3 0.06 {'train': {'auc': ['0.552844', '0.552849', '0.... (286, 0.80978) (299, 0.76855)
18 18 600 3 0.06 {'train': {'auc': ['0.552844', '0.552849', '0.... (583, 0.81891) (599, 0.792096)
19 19 900 3 0.06 {'train': {'auc': ['0.552844', '0.552849', '0.... (886, 0.82937) (899, 0.805351)
20 20 100 4 0.06 {'train': {'auc': ['0.564531', '0.570346', '0.... (95, 0.74017) (99, 0.739443)
21 21 300 4 0.06 {'train': {'auc': ['0.564531', '0.570346', '0.... (257, 0.81035) (299, 0.780313)
22 22 600 4 0.06 {'train': {'auc': ['0.564531', '0.570346', '0.... (534, 0.8261) (599, 0.803442)
23 23 900 4 0.06 {'train': {'auc': ['0.564531', '0.570346', '0.... (839, 0.84644) (899, 0.815612)
24 24 100 5 0.06 {'train': {'auc': ['0.570415', '0.570412', '0.... (92, 0.74551) (99, 0.748918)
25 25 300 5 0.06 {'train': {'auc': ['0.570415', '0.570412', '0.... (205, 0.81014) (299, 0.789367)
26 26 600 5 0.06 {'train': {'auc': ['0.570415', '0.570412', '0.... (599, 0.8387) (599, 0.812053)
27 27 900 5 0.06 {'train': {'auc': ['0.570415', '0.570412', '0.... (892, 0.84947) (899, 0.823539)
28 28 100 6 0.06 {'train': {'auc': ['0.574951', '0.613902', '0.... (78, 0.74694) (99, 0.755981)
29 29 300 6 0.06 {'train': {'auc': ['0.574951', '0.613902', '0.... (299, 0.82282) (299, 0.795567)
... ... ... ... ... ... ... ...
66 66 600 3 0.60 {'train': {'auc': ['0.552844', '0.570280', '0.... (152, 0.85972) (599, 0.851604)
67 67 900 3 0.60 {'train': {'auc': ['0.552844', '0.570280', '0.... (152, 0.85972) (899, 0.86043)
68 68 100 4 0.60 {'train': {'auc': ['0.564531', '0.613752', '0.... (71, 0.83969) (99, 0.81354)
69 69 300 4 0.60 {'train': {'auc': ['0.564531', '0.613752', '0.... (235, 0.85638) (299, 0.844328)
70 70 600 4 0.60 {'train': {'auc': ['0.564531', '0.613752', '0.... (235, 0.85638) (599, 0.86115)
71 71 900 4 0.60 {'train': {'auc': ['0.564531', '0.613752', '0.... (235, 0.85638) (899, 0.870383)
72 72 100 5 0.60 {'train': {'auc': ['0.570415', '0.603725', '0.... (93, 0.85893) (99, 0.820764)
73 73 300 5 0.60 {'train': {'auc': ['0.570415', '0.603725', '0.... (93, 0.85893) (299, 0.852038)
74 74 600 5 0.60 {'train': {'auc': ['0.570415', '0.603725', '0.... (93, 0.85893) (599, 0.869036)
75 75 900 5 0.60 {'train': {'auc': ['0.570415', '0.603725', '0.... (93, 0.85893) (899, 0.879071)
76 76 100 6 0.60 {'train': {'auc': ['0.574951', '0.644546', '0.... (74, 0.84766) (99, 0.828738)
77 77 300 6 0.60 {'train': {'auc': ['0.574951', '0.644546', '0.... (131, 0.85167) (299, 0.859913)
78 78 600 6 0.60 {'train': {'auc': ['0.574951', '0.644546', '0.... (131, 0.85167) (599, 0.877316)
79 79 900 6 0.60 {'train': {'auc': ['0.574951', '0.644546', '0.... (131, 0.85167) (899, 0.88754)
80 80 100 3 0.90 {'train': {'auc': ['0.552844', '0.574873', '0.... (94, 0.8327) (99, 0.805588)
81 81 300 3 0.90 {'train': {'auc': ['0.552844', '0.574873', '0.... (138, 0.85149) (299, 0.839067)
82 82 600 3 0.90 {'train': {'auc': ['0.552844', '0.574873', '0.... (138, 0.85149) (599, 0.856192)
83 83 900 3 0.90 {'train': {'auc': ['0.552844', '0.574873', '0.... (138, 0.85149) (899, 0.865057)
84 84 100 4 0.90 {'train': {'auc': ['0.564531', '0.626056', '0.... (95, 0.83787) (99, 0.816703)
85 85 300 4 0.90 {'train': {'auc': ['0.564531', '0.626056', '0.... (134, 0.83875) (299, 0.849048)
86 86 600 4 0.90 {'train': {'auc': ['0.564531', '0.626056', '0.... (134, 0.83875) (599, 0.866013)
87 87 900 4 0.90 {'train': {'auc': ['0.564531', '0.626056', '0.... (134, 0.83875) (899, 0.875472)
88 88 100 5 0.90 {'train': {'auc': ['0.570415', '0.637539', '0.... (74, 0.84632) (99, 0.825646)
89 89 300 5 0.90 {'train': {'auc': ['0.570415', '0.637539', '0.... (113, 0.84751) (299, 0.857447)
90 90 600 5 0.90 {'train': {'auc': ['0.570415', '0.637539', '0.... (113, 0.84751) (599, 0.874347)
91 91 900 5 0.90 {'train': {'auc': ['0.570415', '0.637539', '0.... (113, 0.84751) (899, 0.884148)
92 92 100 6 0.90 {'train': {'auc': ['0.574951', '0.645773', '0.... (85, 0.84649) (99, 0.833498)
93 93 300 6 0.90 {'train': {'auc': ['0.574951', '0.645773', '0.... (85, 0.84649) (299, 0.864634)
94 94 600 6 0.90 {'train': {'auc': ['0.574951', '0.645773', '0.... (85, 0.84649) (599, 0.882257)
95 95 900 6 0.90 {'train': {'auc': ['0.574951', '0.645773', '0.... (85, 0.84649) (899, 0.892832)

96 rows × 7 columns


In [63]:
max(df['eval_auc'].values,key=itemgetter(1))


Out[63]:
(163, 0.86024999999999996)

In [67]:
findbest(df['eval_auc'].values)


Out[67]:
(53, (163, 0.86024999999999996))

In [68]:
df.ix[53]


Out[68]:
Unnamed: 0                                                     53
num_rounds                                                    300
max_depth                                                       4
eta                                                           0.3
eval_results    {'train': {'auc': ['0.564531', '0.574791', '0....
eval_auc                                           (163, 0.86025)
train_auc                                         (299, 0.829813)
Name: 53, dtype: object

In [ ]:


In [34]:
results = x.split("eval")

In [51]:
results[0]


Out[51]:
"{'train': {'auc': ['0.552844', '0.552849', '0.552853', '0.552854', '0.564506', '0.564508', '0.564585', '0.570304', '0.570369', '0.570379', '0.574745', '0.574746', '0.574748', '0.574748', '0.574755', '0.574814', '0.577942', '0.577889', '0.577944', '0.591977', '0.591976', '0.591979', '0.595014', '0.605616', '0.609023', '0.609023', '0.609026', '0.609065', '0.615975', '0.616123', '0.616305', '0.616303', '0.616452', '0.616523', '0.616298', '0.616303', '0.648427', '0.648950', '0.648568', '0.652931', '0.652968', '0.652983', '0.653288', '0.653320', '0.657873', '0.657791', '0.657966', '0.657924', '0.658021', '0.658081', '0.658085', '0.658091', '0.663862', '0.663871', '0.663874', '0.663901', '0.665859', '0.668477', '0.668617', '0.668639', '0.668658', '0.668621', '0.668623', '0.670125', '0.670147', '0.670208', '0.669043', '0.671257', '0.671260', '0.677831', '0.680031', '0.680049', '0.680268', '0.680302', '0.680307', '0.680937', '0.680934', '0.682168', '0.682185', '0.682234', '0.682249', '0.683827', '0.685565', '0.686696', '0.686442', '0.687807', '0.687826', '0.687395', '0.686994', '0.687051', '0.687506', '0.690304', '0.690345', '0.690778', '0.690929', '0.691567', '0.691241', '0.696854', '0.696816', '0.697191']}, '"

In [54]:
outstr = ""
badset = {"{","}","'",",","t","r","a","i","n","u","c",":","[","]"}
for char in results[0]:
        if char not in badset:
            outstr = outstr + char

In [55]:
outstr.split(" ")


Out[55]:
['',
 '',
 '0.552844',
 '0.552849',
 '0.552853',
 '0.552854',
 '0.564506',
 '0.564508',
 '0.564585',
 '0.570304',
 '0.570369',
 '0.570379',
 '0.574745',
 '0.574746',
 '0.574748',
 '0.574748',
 '0.574755',
 '0.574814',
 '0.577942',
 '0.577889',
 '0.577944',
 '0.591977',
 '0.591976',
 '0.591979',
 '0.595014',
 '0.605616',
 '0.609023',
 '0.609023',
 '0.609026',
 '0.609065',
 '0.615975',
 '0.616123',
 '0.616305',
 '0.616303',
 '0.616452',
 '0.616523',
 '0.616298',
 '0.616303',
 '0.648427',
 '0.648950',
 '0.648568',
 '0.652931',
 '0.652968',
 '0.652983',
 '0.653288',
 '0.653320',
 '0.657873',
 '0.657791',
 '0.657966',
 '0.657924',
 '0.658021',
 '0.658081',
 '0.658085',
 '0.658091',
 '0.663862',
 '0.663871',
 '0.663874',
 '0.663901',
 '0.665859',
 '0.668477',
 '0.668617',
 '0.668639',
 '0.668658',
 '0.668621',
 '0.668623',
 '0.670125',
 '0.670147',
 '0.670208',
 '0.669043',
 '0.671257',
 '0.671260',
 '0.677831',
 '0.680031',
 '0.680049',
 '0.680268',
 '0.680302',
 '0.680307',
 '0.680937',
 '0.680934',
 '0.682168',
 '0.682185',
 '0.682234',
 '0.682249',
 '0.683827',
 '0.685565',
 '0.686696',
 '0.686442',
 '0.687807',
 '0.687826',
 '0.687395',
 '0.686994',
 '0.687051',
 '0.687506',
 '0.690304',
 '0.690345',
 '0.690778',
 '0.690929',
 '0.691567',
 '0.691241',
 '0.696854',
 '0.696816',
 '0.697191',
 '']

In [10]:
s = "ljklj{}".format(5)

In [11]:
s


Out[11]:
'ljklj5'

In [ ]: