In [5]:
import pandas as pd
import numpy as np
from sklearn.cross_validation import StratifiedKFold
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.cross_validation import cross_val_score
from sklearn import metrics
from sklearn.base import clone

In [3]:
def replaceSeuil(DF , col , seuil) :
    R = DF[col].value_counts()
    Ind = list(R.index)
    R = list(R)
    Replace = Ind[R.index(min(R))]
    for i in range(0 , len(R)):
        if R[i] <= seuil :
            DF.loc[df[col] == Ind[i] , col] = Replace
    return DF
def getChanges(df , Y, column , clf , seuil ):
    DF = df.copy()
    R = DF[column].value_counts()
    Ind = list(R.index)
    R = list(R)
    Replace = Ind[R.index(min(R))]
    for i in range(0 , len(R)):
        if R[i] <= seuil :
            DF.loc[df[column] == Ind[i] , column] = Replace
    return  df 
def getScoreOfSeuil(df , Y, column , clf , seuil ):
    DF = df.copy()
    R = DF[column].value_counts()
    Ind = list(R.index)
    R = list(R)
    Replace = Ind[R.index(min(R))]
    for i in range(0 , len(R)):
        if R[i] <= seuil :
            DF.loc[df[column] == Ind[i] , column] = Replace
    return getScoreClf(DF , Y , clf) , df 
def getBestSeuil(df , Y , clf , score  , List):
    maxS=score
    ins = -1
    DR = df.copy()
    Seuilx = 0
    Choosen = ""
    for col in DR.columns.values :
        if col not in List :
            print col
            for s in np.unique(np.array(DR[col].value_counts()))[:10]:
                sc = getScoreOfSeuil(DR , Y, col , clf , s )
                print("Column :" , col , "Seuil : " , s  , "ROC :" , sc )
                if sc >= maxS :
                    ins = 1
                    maxS = sc 
                    Seuilx = s 
                    Choosen = col
    if ins != -1 :
        print("Choose :" , Choosen , "Roc :" ,  maxS)
        List.append(Choosen)
        Xr = replaceSeuil(df , Choosen , Seuilx)
        return getBestSeuil(Xr , Y , clf , maxS , List)
    else :
        return DR     
def getScoreClf(DF , Y , clf ):
    clf = clone(clf)
    return max(cross_val_score(clf , DF , Y , scoring='roc_auc' ,  cv = 5 , n_jobs=-1))
def getScoreForSeuil(df , Y, column , clf , seuil , score ) :
    DF  = df.copy()
    R = DF[column].value_counts()
    Ind = list(R.index)
    R = list(R)
    Replace = Ind[R.index(min(R))]
    for i in range(0 , len(R)):
        if R[i] <= seuil :
            DF.loc[df[column] == Ind[i] , column] = Replace
    scoreF = getScoreClf(DF , Y , clf)
    if scoreF > score :
        seuil =  seuil + 1
        return getScoreForSeuil(DF , Y , column ,  clf , seuil , scoreF )
    else :
        return seuil , df    
def getBestRelationROC(df , Y , realtions , clf , ScoreM ):
    What = df.copy()
    maxScore = ScoreM
    i = 0
    index = -1
    for relation in realtions:
        print("Doing :" , relation )
        if relation[0] in list(df.columns.values) and relation[1] in list(df.columns.values):
            Score , Df = getScoreRelation( df  , Y ,  relation[0] , relation[1] , clf )
            if Score >= maxScore :
                print("Choose :" , relation )
                maxScore = Score
                What = Df
                index = i
        elif relation[0] in list(df.columns.values) :
            Dec ,Sc ,  DC  =  getDelete(df , Y , relation[0] , clf)
            if Dec == True :
                if Sc >= maxScore :
                    print("Delete :" , relation[0] )
                    maxScore = Sc 
                    What = DC 
                    index = i 
                
        elif relation[1] in list(df.columns.values) :
            Dec ,Sc ,  DC  =  getDelete(df , Y , relation[1] , clf)
            if Dec == True :
                if Sc >= maxScore :
                    print("Delete :" , relation[1] )
                    maxScore = Sc 
                    What = DC 
                    index = i 
        i= i+1
    return index , maxScore , What 
def getScoreDf(DF , Y , clf):
    return max(cross_val_score(clf , DF , Y , scoring='roc_auc' ,  cv = 2 , n_jobs=2))
def getScoreRelation(DF,Y,coli , colj , clf):
    D1 = DF.copy()
    D2 = DF.copy()
    D3 = DF.copy()
    D1 , D2   = D1.drop([coli] , axis = 1) , D2.drop([colj] , axis = 1) 
    Temp = D3[coli].astype(str) + D3[colj].astype(str)
    newName = coli+":"+colj
    D3  = D3.drop([colj] , axis = 1)
    D3 = D3.rename(columns = {coli:newName})
    Scores = [getScoreDf(DF , Y , clf) , getScoreDf(D1 , Y , clf) , getScoreDf(D2 , Y , clf) , getScoreDf(D3 , Y , clf)]
    DataFrames = [DF , D1 , D2 , D3 ]
    return max(Scores) , DataFrames[Scores.index(max(Scores))]

def getRelationOneToN( df , seuil ) :
    #clf = Notre Classifieur 
    #DF = Notre DataFrame 
    # Seuil = Seuil utilise pour  detecter les relations 1->n (meme avec le bruit)
    List = []
    Banned = []
    for i in df.columns.values :
        for j in df.columns.values :
            
            if i != j :
                R = [i , j ]
                P = [j , i ]
                if R not in Banned and P not in Banned :
                    x =  df.groupby(i)[j].apply(lambda f : len(np.unique(f)))
                    if x.quantile(seuil)*1.0 == 1 :
                        List.append(R)
                        Banned.append(R)
                        Banned.append(P)
    return List
def getDelete(DF , Y , ColumnI , clf ):
    DF = DF.copy()
    X1 = DF.drop([ColumnI] , axis =1 )
    clf = clone(clf)
    clf1 = clone(clf)
    sss = StratifiedShuffleSplit(Y, 1, test_size=0.2, random_state=0)
    for train , test in sss:
        Train , Test  = train , test
    clf.fit(DF.ix[Train].values , Y[Train])
    clf1.fit(X1.ix[Train].values , Y[Train])
    
    Score = clf.predict_proba(DF.ix[Test].values)[:,1]
    Score1 = clf1.predict_proba(X1.ix[Test].values)[:,1]
    roc0 = metrics.roc_auc_score(Y[Test] , Score )
    roc1 = metrics.roc_auc_score(Y[Test] , Score1 )
    if roc0 <= roc1 :
        return True ,roc1 , X1
    else :
        return False ,roc0 , DF

In [6]:
df =  pd.read_csv('tr.csv')
df = pd.DataFrame(df)
Y = df.ACTION.values
df =  pd.read_csv('Fin.csv')
df = pd.DataFrame(df)

In [7]:
clf4 = RandomForestClassifier(max_depth = 22 , min_samples_leaf = 9 , min_samples_split = 19 ,n_estimators = 97, random_state=5)

In [18]:
df = replaceSeuil(df , 'ROLE_FAMILY_DESC' , 8)
df = replaceSeuil(df , 'ROLE_CODE' , 4)

In [48]:
Le = [ ]
df = getBestSeuil(df , Y , clf4 , roc1  , Le)


RESOURCE
('Column :', 'RESOURCE', 'Seuil : ', 1, 'ROC :', 0.86563044617268947)
('Column :', 'RESOURCE', 'Seuil : ', 2, 'ROC :', 0.86437272484065886)
('Column :', 'RESOURCE', 'Seuil : ', 3, 'ROC :', 0.86260238484135954)
('Column :', 'RESOURCE', 'Seuil : ', 4, 'ROC :', 0.86663046925014509)
('Column :', 'RESOURCE', 'Seuil : ', 5, 'ROC :', 0.86632367355636042)
('Column :', 'RESOURCE', 'Seuil : ', 6, 'ROC :', 0.8629485466758634)
('Column :', 'RESOURCE', 'Seuil : ', 7, 'ROC :', 0.86586256126145322)
('Column :', 'RESOURCE', 'Seuil : ', 8, 'ROC :', 0.86567334327722134)
('Column :', 'RESOURCE', 'Seuil : ', 9, 'ROC :', 0.86222160664819947)
('Column :', 'RESOURCE', 'Seuil : ', 10, 'ROC :', 0.86153483883816129)
MGR_ID
('Column :', 'MGR_ID', 'Seuil : ', 1, 'ROC :', 0.86760014974704558)
('Column :', 'MGR_ID', 'Seuil : ', 2, 'ROC :', 0.86569732805799793)
('Column :', 'MGR_ID', 'Seuil : ', 3, 'ROC :', 0.86786532680668693)
('Column :', 'MGR_ID', 'Seuil : ', 4, 'ROC :', 0.86889932502715861)
('Column :', 'MGR_ID', 'Seuil : ', 5, 'ROC :', 0.86942561922369155)
('Column :', 'MGR_ID', 'Seuil : ', 6, 'ROC :', 0.86750249792089207)
('Column :', 'MGR_ID', 'Seuil : ', 7, 'ROC :', 0.868088195197667)
('Column :', 'MGR_ID', 'Seuil : ', 8, 'ROC :', 0.86571063285744732)
('Column :', 'MGR_ID', 'Seuil : ', 9, 'ROC :', 0.86496910612467126)
('Column :', 'MGR_ID', 'Seuil : ', 10, 'ROC :', 0.86272512252290645)
ROLE_ROLLUP_2
('Column :', 'ROLE_ROLLUP_2', 'Seuil : ', 1, 'ROC :', 0.86716060968928355)
('Column :', 'ROLE_ROLLUP_2', 'Seuil : ', 2, 'ROC :', 0.86783306110482894)
('Column :', 'ROLE_ROLLUP_2', 'Seuil : ', 3, 'ROC :', 0.8683681161872967)
('Column :', 'ROLE_ROLLUP_2', 'Seuil : ', 4, 'ROC :', 0.86773733239997863)
('Column :', 'ROLE_ROLLUP_2', 'Seuil : ', 5, 'ROC :', 0.86822730097190282)
('Column :', 'ROLE_ROLLUP_2', 'Seuil : ', 6, 'ROC :', 0.86692684361092087)
('Column :', 'ROLE_ROLLUP_2', 'Seuil : ', 7, 'ROC :', 0.86658922898220714)
('Column :', 'ROLE_ROLLUP_2', 'Seuil : ', 8, 'ROC :', 0.86705633377864277)
('Column :', 'ROLE_ROLLUP_2', 'Seuil : ', 9, 'ROC :', 0.8667858147153823)
('Column :', 'ROLE_ROLLUP_2', 'Seuil : ', 10, 'ROC :', 0.8681527266013831)
ROLE_DEPTNAME
('Column :', 'ROLE_DEPTNAME', 'Seuil : ', 1, 'ROC :', 0.86741766690342437)
('Column :', 'ROLE_DEPTNAME', 'Seuil : ', 2, 'ROC :', 0.86690611663687966)
('Column :', 'ROLE_DEPTNAME', 'Seuil : ', 3, 'ROC :', 0.86703856808011937)
('Column :', 'ROLE_DEPTNAME', 'Seuil : ', 4, 'ROC :', 0.86584231834647352)
('Column :', 'ROLE_DEPTNAME', 'Seuil : ', 5, 'ROC :', 0.86626422331131459)
('Column :', 'ROLE_DEPTNAME', 'Seuil : ', 6, 'ROC :', 0.86570019177498403)
('Column :', 'ROLE_DEPTNAME', 'Seuil : ', 7, 'ROC :', 0.86570758470494624)
('Column :', 'ROLE_DEPTNAME', 'Seuil : ', 8, 'ROC :', 0.86599573833368848)
('Column :', 'ROLE_DEPTNAME', 'Seuil : ', 9, 'ROC :', 0.86716002556999805)
('Column :', 'ROLE_DEPTNAME', 'Seuil : ', 10, 'ROC :', 0.8655163409753901)
ROLE_FAMILY_DESC
('Column :', 'ROLE_FAMILY_DESC', 'Seuil : ', 1, 'ROC :', 0.86692492048961811)
('Column :', 'ROLE_FAMILY_DESC', 'Seuil : ', 2, 'ROC :', 0.86675782261641932)
('Column :', 'ROLE_FAMILY_DESC', 'Seuil : ', 3, 'ROC :', 0.8654090735427229)
('Column :', 'ROLE_FAMILY_DESC', 'Seuil : ', 4, 'ROC :', 0.86781553933295896)
('Column :', 'ROLE_FAMILY_DESC', 'Seuil : ', 5, 'ROC :', 0.86686209852705998)
('Column :', 'ROLE_FAMILY_DESC', 'Seuil : ', 6, 'ROC :', 0.86594861590823036)
('Column :', 'ROLE_FAMILY_DESC', 'Seuil : ', 7, 'ROC :', 0.86502786816447907)
('Column :', 'ROLE_FAMILY_DESC', 'Seuil : ', 8, 'ROC :', 0.86672534323441663)
('Column :', 'ROLE_FAMILY_DESC', 'Seuil : ', 9, 'ROC :', 0.86978737970876241)
('Column :', 'ROLE_FAMILY_DESC', 'Seuil : ', 10, 'ROC :', 0.86502722712404478)
ROLE_FAMILY
('Column :', 'ROLE_FAMILY', 'Seuil : ', 2, 'ROC :', 0.86844162215709253)
('Column :', 'ROLE_FAMILY', 'Seuil : ', 3, 'ROC :', 0.86843905799535559)
('Column :', 'ROLE_FAMILY', 'Seuil : ', 4, 'ROC :', 0.86844504103940867)
('Column :', 'ROLE_FAMILY', 'Seuil : ', 5, 'ROC :', 0.86845487032606727)
('Column :', 'ROLE_FAMILY', 'Seuil : ', 6, 'ROC :', 0.86845487032606727)
('Column :', 'ROLE_FAMILY', 'Seuil : ', 7, 'ROC :', 0.86844974200259317)
('Column :', 'ROLE_FAMILY', 'Seuil : ', 9, 'ROC :', 0.86771254550318666)
('Column :', 'ROLE_FAMILY', 'Seuil : ', 15, 'ROC :', 0.86775186264982196)
('Column :', 'ROLE_FAMILY', 'Seuil : ', 16, 'ROC :', 0.86804588652900527)
('Column :', 'ROLE_FAMILY', 'Seuil : ', 19, 'ROC :', 0.86639221588874271)
ROLE_CODE
('Column :', 'ROLE_CODE', 'Seuil : ', 1, 'ROC :', 0.86902796047430153)
('Column :', 'ROLE_CODE', 'Seuil : ', 2, 'ROC :', 0.869163861046366)
('Column :', 'ROLE_CODE', 'Seuil : ', 3, 'ROC :', 0.86855166743164158)
('Column :', 'ROLE_CODE', 'Seuil : ', 4, 'ROC :', 0.86891834256004208)
('Column :', 'ROLE_CODE', 'Seuil : ', 5, 'ROC :', 0.87141583609194395)
('Column :', 'ROLE_CODE', 'Seuil : ', 6, 'ROC :', 0.87096924458940517)
('Column :', 'ROLE_CODE', 'Seuil : ', 7, 'ROC :', 0.8671950121925891)
('Column :', 'ROLE_CODE', 'Seuil : ', 8, 'ROC :', 0.86718731970737783)
('Column :', 'ROLE_CODE', 'Seuil : ', 9, 'ROC :', 0.86680119968580471)
('Column :', 'ROLE_CODE', 'Seuil : ', 10, 'ROC :', 0.86696124611422654)
('Choose :', 'ROLE_CODE', 'Roc :', 0.87141583609194395)
RESOURCE
('Column :', 'RESOURCE', 'Seuil : ', 1, 'ROC :', 0.86735783646289266)
('Column :', 'RESOURCE', 'Seuil : ', 2, 'ROC :', 0.86449366780259029)
('Column :', 'RESOURCE', 'Seuil : ', 3, 'ROC :', 0.86287811634349021)
('Column :', 'RESOURCE', 'Seuil : ', 4, 'ROC :', 0.86638418921798421)
('Column :', 'RESOURCE', 'Seuil : ', 5, 'ROC :', 0.86717259748561693)
('Column :', 'RESOURCE', 'Seuil : ', 6, 'ROC :', 0.8636079266993395)
('Column :', 'RESOURCE', 'Seuil : ', 7, 'ROC :', 0.86552248028979339)
('Column :', 'RESOURCE', 'Seuil : ', 8, 'ROC :', 0.86344150676981424)
('Column :', 'RESOURCE', 'Seuil : ', 9, 'ROC :', 0.86594587683784363)
('Column :', 'RESOURCE', 'Seuil : ', 10, 'ROC :', 0.86087050726811642)
MGR_ID
('Column :', 'MGR_ID', 'Seuil : ', 1, 'ROC :', 0.86783904414888213)
('Column :', 'MGR_ID', 'Seuil : ', 2, 'ROC :', 0.86785421543915975)
('Column :', 'MGR_ID', 'Seuil : ', 3, 'ROC :', 0.86739864937054101)
('Column :', 'MGR_ID', 'Seuil : ', 4, 'ROC :', 0.87023931321492032)
('Column :', 'MGR_ID', 'Seuil : ', 5, 'ROC :', 0.86850914508283505)
('Column :', 'MGR_ID', 'Seuil : ', 6, 'ROC :', 0.86946344060931313)
('Column :', 'MGR_ID', 'Seuil : ', 7, 'ROC :', 0.86801575762859484)
('Column :', 'MGR_ID', 'Seuil : ', 8, 'ROC :', 0.86519752823353946)
('Column :', 'MGR_ID', 'Seuil : ', 9, 'ROC :', 0.86270089138809192)
('Column :', 'MGR_ID', 'Seuil : ', 10, 'ROC :', 0.86432090347325807)
ROLE_ROLLUP_2
('Column :', 'ROLE_ROLLUP_2', 'Seuil : ', 1, 'ROC :', 0.87182011892582123)
('Column :', 'ROLE_ROLLUP_2', 'Seuil : ', 2, 'ROC :', 0.87144404187105173)
('Column :', 'ROLE_ROLLUP_2', 'Seuil : ', 3, 'ROC :', 0.87066838294558946)
('Column :', 'ROLE_ROLLUP_2', 'Seuil : ', 4, 'ROC :', 0.87049487466804787)
('Column :', 'ROLE_ROLLUP_2', 'Seuil : ', 5, 'ROC :', 0.87056474807538298)
('Column :', 'ROLE_ROLLUP_2', 'Seuil : ', 6, 'ROC :', 0.86849546955357093)
('Column :', 'ROLE_ROLLUP_2', 'Seuil : ', 7, 'ROC :', 0.86830486686444897)
('Column :', 'ROLE_ROLLUP_2', 'Seuil : ', 8, 'ROC :', 0.86849034123009683)
('Column :', 'ROLE_ROLLUP_2', 'Seuil : ', 9, 'ROC :', 0.86785229231785699)
('Column :', 'ROLE_ROLLUP_2', 'Seuil : ', 10, 'ROC :', 0.8685988907436325)
ROLE_DEPTNAME
('Column :', 'ROLE_DEPTNAME', 'Seuil : ', 1, 'ROC :', 0.86931664234986605)
('Column :', 'ROLE_DEPTNAME', 'Seuil : ', 2, 'ROC :', 0.86832345703704283)
('Column :', 'ROLE_DEPTNAME', 'Seuil : ', 3, 'ROC :', 0.8663496697208608)
('Column :', 'ROLE_DEPTNAME', 'Seuil : ', 4, 'ROC :', 0.86499325198102861)
('Column :', 'ROLE_DEPTNAME', 'Seuil : ', 5, 'ROC :', 0.86665267419561054)
('Column :', 'ROLE_DEPTNAME', 'Seuil : ', 6, 'ROC :', 0.86819667590027694)
('Column :', 'ROLE_DEPTNAME', 'Seuil : ', 7, 'ROC :', 0.86538813288853667)
('Column :', 'ROLE_DEPTNAME', 'Seuil : ', 8, 'ROC :', 0.86639926733351968)
('Column :', 'ROLE_DEPTNAME', 'Seuil : ', 9, 'ROC :', 0.8659377796718517)
('Column :', 'ROLE_DEPTNAME', 'Seuil : ', 10, 'ROC :', 0.86567254116120629)
ROLE_FAMILY_DESC
('Column :', 'ROLE_FAMILY_DESC', 'Seuil : ', 1, 'ROC :', 0.86645365437886201)
('Column :', 'ROLE_FAMILY_DESC', 'Seuil : ', 2, 'ROC :', 0.86903052463603858)
('Column :', 'ROLE_FAMILY_DESC', 'Seuil : ', 3, 'ROC :', 0.86535757662783674)
('Column :', 'ROLE_FAMILY_DESC', 'Seuil : ', 4, 'ROC :', 0.8700519157279698)
('Column :', 'ROLE_FAMILY_DESC', 'Seuil : ', 5, 'ROC :', 0.86437678476340907)
('Column :', 'ROLE_FAMILY_DESC', 'Seuil : ', 6, 'ROC :', 0.86697556268392528)
('Column :', 'ROLE_FAMILY_DESC', 'Seuil : ', 7, 'ROC :', 0.86414280500490182)
('Column :', 'ROLE_FAMILY_DESC', 'Seuil : ', 8, 'ROC :', 0.86512487895019807)
('Column :', 'ROLE_FAMILY_DESC', 'Seuil : ', 9, 'ROC :', 0.8659573767941654)
('Column :', 'ROLE_FAMILY_DESC', 'Seuil : ', 10, 'ROC :', 0.86436375027457901)
ROLE_FAMILY
('Column :', 'ROLE_FAMILY', 'Seuil : ', 2, 'ROC :', 0.87141754553310202)
('Column :', 'ROLE_FAMILY', 'Seuil : ', 3, 'ROC :', 0.87144703339307827)
('Column :', 'ROLE_FAMILY', 'Seuil : ', 4, 'ROC :', 0.87141241720962803)
('Column :', 'ROLE_FAMILY', 'Seuil : ', 5, 'ROC :', 0.87114916327128911)
('Column :', 'ROLE_FAMILY', 'Seuil : ', 6, 'ROC :', 0.87114617174926257)
('Column :', 'ROLE_FAMILY', 'Seuil : ', 7, 'ROC :', 0.87113847926405152)
('Column :', 'ROLE_FAMILY', 'Seuil : ', 9, 'ROC :', 0.87040918893000097)
('Column :', 'ROLE_FAMILY', 'Seuil : ', 15, 'ROC :', 0.87040363324623737)
('Column :', 'ROLE_FAMILY', 'Seuil : ', 16, 'ROC :', 0.87065257061487733)
('Column :', 'ROLE_FAMILY', 'Seuil : ', 19, 'ROC :', 0.86772985359491206)
('Choose :', 'ROLE_ROLLUP_2', 'Roc :', 0.87182011892582123)
RESOURCE
('Column :', 'RESOURCE', 'Seuil : ', 1, 'ROC :', 0.86822003584698115)
('Column :', 'RESOURCE', 'Seuil : ', 2, 'ROC :', 0.86354920156277115)
('Column :', 'RESOURCE', 'Seuil : ', 3, 'ROC :', 0.86202407841466022)
('Column :', 'RESOURCE', 'Seuil : ', 4, 'ROC :', 0.8673635201363733)
('Column :', 'RESOURCE', 'Seuil : ', 5, 'ROC :', 0.8676298742808437)
('Column :', 'RESOURCE', 'Seuil : ', 6, 'ROC :', 0.86371979544001709)
('Column :', 'RESOURCE', 'Seuil : ', 7, 'ROC :', 0.8655099083741743)
('Column :', 'RESOURCE', 'Seuil : ', 8, 'ROC :', 0.8630988706584275)
('Column :', 'RESOURCE', 'Seuil : ', 9, 'ROC :', 0.86514340507138299)
('Column :', 'RESOURCE', 'Seuil : ', 10, 'ROC :', 0.86180140634988267)
MGR_ID
('Column :', 'MGR_ID', 'Seuil : ', 1, 'ROC :', 0.86867581559574447)
('Column :', 'MGR_ID', 'Seuil : ', 2, 'ROC :', 0.86568066100670693)
('Column :', 'MGR_ID', 'Seuil : ', 3, 'ROC :', 0.86795571350791856)
('Column :', 'MGR_ID', 'Seuil : ', 4, 'ROC :', 0.87056154287321164)
('Column :', 'MGR_ID', 'Seuil : ', 5, 'ROC :', 0.86832174759588465)
('Column :', 'MGR_ID', 'Seuil : ', 6, 'ROC :', 0.86918736586228917)
('Column :', 'MGR_ID', 'Seuil : ', 7, 'ROC :', 0.8681582822851468)
('Column :', 'MGR_ID', 'Seuil : ', 8, 'ROC :', 0.86543618154698487)
('Column :', 'MGR_ID', 'Seuil : ', 9, 'ROC :', 0.86433832233735308)
('Column :', 'MGR_ID', 'Seuil : ', 10, 'ROC :', 0.8647240571063286)
ROLE_DEPTNAME
('Column :', 'ROLE_DEPTNAME', 'Seuil : ', 1, 'ROC :', 0.86785293335829117)
('Column :', 'ROLE_DEPTNAME', 'Seuil : ', 2, 'ROC :', 0.86768839964682953)
('Column :', 'ROLE_DEPTNAME', 'Seuil : ', 3, 'ROC :', 0.86552248028979339)
('Column :', 'ROLE_DEPTNAME', 'Seuil : ', 4, 'ROC :', 0.86631507735648605)
('Column :', 'ROLE_DEPTNAME', 'Seuil : ', 5, 'ROC :', 0.86576241210313232)
('Column :', 'ROLE_DEPTNAME', 'Seuil : ', 6, 'ROC :', 0.86679693160025584)
('Column :', 'ROLE_DEPTNAME', 'Seuil : ', 7, 'ROC :', 0.86481931634319753)
('Column :', 'ROLE_DEPTNAME', 'Seuil : ', 8, 'ROC :', 0.86505821074503431)
('Column :', 'ROLE_DEPTNAME', 'Seuil : ', 9, 'ROC :', 0.86381461751544864)
('Column :', 'ROLE_DEPTNAME', 'Seuil : ', 10, 'ROC :', 0.866302042867656)
ROLE_FAMILY_DESC
('Column :', 'ROLE_FAMILY_DESC', 'Seuil : ', 1, 'ROC :', 0.86555998295333492)
('Column :', 'ROLE_FAMILY_DESC', 'Seuil : ', 2, 'ROC :', 0.86715804552754627)
('Column :', 'ROLE_FAMILY_DESC', 'Seuil : ', 3, 'ROC :', 0.86446631674406171)
('Column :', 'ROLE_FAMILY_DESC', 'Seuil : ', 4, 'ROC :', 0.87085898563471109)
('Column :', 'ROLE_FAMILY_DESC', 'Seuil : ', 5, 'ROC :', 0.86432806569040488)
('Column :', 'ROLE_FAMILY_DESC', 'Seuil : ', 6, 'ROC :', 0.86705612009849797)
('Column :', 'ROLE_FAMILY_DESC', 'Seuil : ', 7, 'ROC :', 0.86286649350027733)
('Column :', 'ROLE_FAMILY_DESC', 'Seuil : ', 8, 'ROC :', 0.86655529512039209)
('Column :', 'ROLE_FAMILY_DESC', 'Seuil : ', 9, 'ROC :', 0.86610182457202012)
('Column :', 'ROLE_FAMILY_DESC', 'Seuil : ', 10, 'ROC :', 0.86519453867738827)
ROLE_FAMILY
('Column :', 'ROLE_FAMILY', 'Seuil : ', 2, 'ROC :', 0.87182225572726879)
('Column :', 'ROLE_FAMILY', 'Seuil : ', 3, 'ROC :', 0.87181328116118917)
('Column :', 'ROLE_FAMILY', 'Seuil : ', 4, 'ROC :', 0.87187140216056269)
('Column :', 'ROLE_FAMILY', 'Seuil : ', 5, 'ROC :', 0.87158720756803787)
('Column :', 'ROLE_FAMILY', 'Seuil : ', 6, 'ROC :', 0.87127523455669498)
('Column :', 'ROLE_FAMILY', 'Seuil : ', 7, 'ROC :', 0.87127523455669498)
('Column :', 'ROLE_FAMILY', 'Seuil : ', 9, 'ROC :', 0.87061539026968993)
('Column :', 'ROLE_FAMILY', 'Seuil : ', 15, 'ROC :', 0.87054701262336831)
('Column :', 'ROLE_FAMILY', 'Seuil : ', 16, 'ROC :', 0.87100257869198683)
('Column :', 'ROLE_FAMILY', 'Seuil : ', 19, 'ROC :', 0.86704094880822025)
('Choose :', 'ROLE_FAMILY', 'Roc :', 0.87187140216056269)
RESOURCE
('Column :', 'RESOURCE', 'Seuil : ', 1, 'ROC :', 0.86818285550179364)
('Column :', 'RESOURCE', 'Seuil : ', 2, 'ROC :', 0.86293123858413834)
('Column :', 'RESOURCE', 'Seuil : ', 3, 'ROC :', 0.86202450458129132)
('Column :', 'RESOURCE', 'Seuil : ', 4, 'ROC :', 0.86746580012784991)
('Column :', 'RESOURCE', 'Seuil : ', 5, 'ROC :', 0.86767590027700825)
('Column :', 'RESOURCE', 'Seuil : ', 6, 'ROC :', 0.86371936927338588)
('Column :', 'RESOURCE', 'Seuil : ', 7, 'ROC :', 0.86550905604091199)
('Column :', 'RESOURCE', 'Seuil : ', 8, 'ROC :', 0.86309801832516508)
('Column :', 'RESOURCE', 'Seuil : ', 9, 'ROC :', 0.8653673556360536)
('Column :', 'RESOURCE', 'Seuil : ', 10, 'ROC :', 0.86130364372469626)
MGR_ID
('Column :', 'MGR_ID', 'Seuil : ', 1, 'ROC :', 0.8686642768679278)
('Column :', 'MGR_ID', 'Seuil : ', 2, 'ROC :', 0.86585929760772251)
('Column :', 'MGR_ID', 'Seuil : ', 3, 'ROC :', 0.86808648575650893)
('Column :', 'MGR_ID', 'Seuil : ', 4, 'ROC :', 0.87051496060165479)
('Column :', 'MGR_ID', 'Seuil : ', 5, 'ROC :', 0.86832388439733221)
('Column :', 'MGR_ID', 'Seuil : ', 6, 'ROC :', 0.86907368802527929)
('Column :', 'MGR_ID', 'Seuil : ', 7, 'ROC :', 0.86816084644688385)
('Column :', 'MGR_ID', 'Seuil : ', 8, 'ROC :', 0.86545578521201794)
('Column :', 'MGR_ID', 'Seuil : ', 9, 'ROC :', 0.86477038359004865)
('Column :', 'MGR_ID', 'Seuil : ', 10, 'ROC :', 0.86472448327295981)
ROLE_DEPTNAME
('Column :', 'ROLE_DEPTNAME', 'Seuil : ', 1, 'ROC :', 0.86784823239510667)
('Column :', 'ROLE_DEPTNAME', 'Seuil : ', 2, 'ROC :', 0.86760463703008539)
('Column :', 'ROLE_DEPTNAME', 'Seuil : ', 3, 'ROC :', 0.86596484125292994)
('Column :', 'ROLE_DEPTNAME', 'Seuil : ', 4, 'ROC :', 0.86631507735648605)
('Column :', 'ROLE_DEPTNAME', 'Seuil : ', 5, 'ROC :', 0.86576241210313232)
('Column :', 'ROLE_DEPTNAME', 'Seuil : ', 6, 'ROC :', 0.86686256126145333)
('Column :', 'ROLE_DEPTNAME', 'Seuil : ', 7, 'ROC :', 0.86463426933783927)
('Column :', 'ROLE_DEPTNAME', 'Seuil : ', 8, 'ROC :', 0.86535351670508642)
('Column :', 'ROLE_DEPTNAME', 'Seuil : ', 9, 'ROC :', 0.86387826898569453)
('Column :', 'ROLE_DEPTNAME', 'Seuil : ', 10, 'ROC :', 0.86605930222321392)
ROLE_FAMILY_DESC
('Column :', 'ROLE_FAMILY_DESC', 'Seuil : ', 1, 'ROC :', 0.86555998295333492)
('Column :', 'ROLE_FAMILY_DESC', 'Seuil : ', 2, 'ROC :', 0.86755292643505444)
('Column :', 'ROLE_FAMILY_DESC', 'Seuil : ', 3, 'ROC :', 0.86464773118695915)
('Column :', 'ROLE_FAMILY_DESC', 'Seuil : ', 4, 'ROC :', 0.87085129314950005)
('Column :', 'ROLE_FAMILY_DESC', 'Seuil : ', 5, 'ROC :', 0.86452700190517207)
('Column :', 'ROLE_FAMILY_DESC', 'Seuil : ', 6, 'ROC :', 0.86704970969415529)
('Column :', 'ROLE_FAMILY_DESC', 'Seuil : ', 7, 'ROC :', 0.86279256017019201)
('Column :', 'ROLE_FAMILY_DESC', 'Seuil : ', 8, 'ROC :', 0.86655486895376088)
('Column :', 'ROLE_FAMILY_DESC', 'Seuil : ', 9, 'ROC :', 0.86609584152796693)
('Column :', 'ROLE_FAMILY_DESC', 'Seuil : ', 10, 'ROC :', 0.86528449801833029)

In [8]:
Regle = [['ROLE_CODE' ,  5] , ['ROLE_ROLLUP_2' , 1 ] ,['ROLE_FAMILY' , 4 ] ]

In [58]:
df.to_csv('De.csv',index=False)