Comparison of various Sibling Detection Approaches, including Beverly, Hand-Tuned, and Machine-Learning

For questions and comments, please contact me under scheitle@net.in.tum.de



In [814]:

    
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import matthews_corrcoef
from sklearn.metrics import precision_score
from sklearn.metrics import f1_score
from IPython.display import Image  
import pandas
import seaborn
import matplotlib.pyplot as plt
%matplotlib inline

Versions:

* Python: 3.5.3
* numpy: 1.12.1
* pandas: 0.19.2
* sklearn: 0.18.1
* matplotlib 2.0.0
* jupyter notebook 5.0.0



In [815]:

    
def get_pd_files(folder):
    sibf = folder + "hosts.csvcapture.pcap.ts.siblingresult.csv"
    nonsibf = folder + "hosts.csv__nonsiblings_seed1_n*capture.pcap.ts.siblingresult.csv"
    import glob
    for filename in glob.glob(nonsibf):
        nonsibf = filename
    import os.path
    if os.path.isfile(sibf) and os.path.isfile(nonsibf):
        print("Loading from filenames {} and {}".format(sibf, nonsibf))
    else:
        print("Files not found {} and {}".format(sibf, nonsibf))
        
    sib = pd.read_csv(sibf, index_col=0)
    sib['label'] = 1
    nonsib = pd.read_csv(nonsibf, index_col=0)
    nonsib['label'] = 0
    print("Read {} siblings and {} non-siblings from files.".format(len(sib), len(nonsib)))
    return sib, nonsib
       
def dec2prd_ours(df):
    df.loc[df["decision"].str.contains("^sibling"), "dec_prd"] =  1
    df.loc[df["decision"].str.contains("^non-sibling"), "dec_prd"] =  0
    return  # df is changed in place so no returning necessary

def dec2prd_bev(df):
    df.loc[df["dec_bev"].str.contains("^sibling"), "dec_bev_prd"] =  1
    df.loc[df["dec_bev"].str.contains("^non-sibling"), "dec_bev_prd"] =  0
    return  # df is changed in place so no returning necessary

def mix_sib_nonsib(sib, nonsib, mode, rs=42):
    if mode == "equal":
        nonsibint = nonsib.sample(n=len(sib), replace=True, weights=None, random_state=rs)
    else:
        nonsibint = nonsib
    datain = pd.concat([sib,nonsibint])
    return datain


def get_ouralgo_stats(sib, nonsib):
    #print("Our algo stats:")
    df = mix_sib_nonsib(sib, nonsib, "full", 42)
    df_ours = df[["label", "decision"]].copy()
    dec2prd_ours(df_ours)
    undec = len(df_ours[df_ours.isnull().any(axis=1)])
    print("Our algo: Not deciding on {} pairs for unknown/error reasons.".format(undec))
    df_ours = df_ours.dropna()
    weights = get_sample_weight_one_input(df_ours)
    mcc = matthews_corrcoef(df_ours["label"], df_ours["dec_prd"], sample_weight=None)
    f1 = f1_score(df_ours["label"], df_ours["dec_prd"], sample_weight=None)
    print("Our algo stats: ({}) undecided, mcc: {}, f1: {}".format(undec, mcc, f1))
    statsv = list(stats(df_ours["label"], df_ours["dec_prd"]))
    statsv.append(mcc)
    return statsv

def get_bev_stats(sib, nonsib):
    df = mix_sib_nonsib(sib, nonsib, "full", 42)
    df_tmp = df[["label", "dec_bev"]].copy()
    dec_nan = len(df_tmp[df_tmp["dec_bev"].isnull() == True])
    df_tmp = df_tmp[df_tmp["dec_bev"].isnull() == False]
    dec2prd_bev(df_tmp)
    undec = len(df_tmp[df_tmp.isnull().any(axis=1)])
    df_tmp = df_tmp.dropna()
    weights = get_sample_weight_one_input(df_tmp)
    mcc = matthews_corrcoef(df_tmp["label"], df_tmp["dec_bev_prd"], sample_weight=None)
    f1 = f1_score(df_tmp["label"], df_tmp["dec_bev_prd"], sample_weight=None)
    print("Beverly algo: Not deciding on {} pairs for NaN and {} pairs for unknown/error reasons.".format(dec_nan, undec))
    print("Beverly algo stats: ({}) undecided, mcc: {}, f1: {}".format(undec, mcc, f1))
    statsv =  list(stats(df_tmp["label"], df_tmp["dec_bev_prd"]))
    statsv.append(mcc)
    return statsv
    

def match_nonsibs_slow(sib, nonsib, rs=42):
    ctr = 0 
    for i, ii in sib.iterrows():
        for j, jj in sib.iterrows():
            if ii[1] != jj[1]:
                nscand = ii[1] + "_+_" +  jj[1]
                ctr += 1 
    return
                
def match_nonsibs(sib, nonsib, rs=42):
    ctr = 0 
    a = []
    sd = dict() # siblings dict
    nsd = dict()  # non siblings dict
    for i in sib.itertuples():
        sd[i[0]] = 0
    for i in nonsib.itertuples():
        nsd[i[0]] = 0
    nscand = dict()
    #nstmp = pandas.DataFrame()
    for i in sd.keys():
        for j in sd.keys():
            if i != j:
                nscandstr = i + "_+_" +  j
                nscand[nscandstr] = 1
    print("Generated {} non-sibling candidates from {} siblings.".format(len(nscand), len(sib)))
    fails = []
    for k1 in nsd.keys():
        if k1 not in nscand.keys():
            fails.append(k1)
            #print("fail! {} ".format(i))    
    nsfiltered = nonsib.copy()
    nsfiltered.drop(fails, inplace=True)
    return nsfiltered

def assign_groups_old(datain):
    datain["group"] = "servers"
    datain.loc[datain["domain"].str.contains("nlnog.net"), "group"] = "nlnog"
    datain.loc[datain["domain"].str.contains("RA_"), "group"] = "RA"
    datain.loc[datain["domain"].str.extract("RA_([0-9]{4})") < 6019, "group"] = "RAv1"
    datain.loc[datain["domain"].str.extract("RA_([0-9]{4})") > 6018, "group"] = "RAv2"
    return

def assign_groups(datain):
    datain["group"] = "servers"
    #sib.loc[sib.index.str.contains("nlnog.net"), "group"] = "nlnog"
    datain.loc[datain.index.str.contains("nlnog.net"), "group"] = "nlnog"
    datain.loc[datain.index.str.contains("RA_"), "group"] = "RA"
    datain["ra_id"] = datain.index.str.extract("RA_([0-9]{4})", expand=False).astype(float).fillna(0).astype(int) 
    datain.loc[(datain.ra_id > 5999) & (datain.ra_id < 6019), "group"] = "RAv1"
    datain.loc[datain.ra_id > 6018, "group"] = "RAv2"    
    groups = datain["group"].as_matrix()
    return groups
    
    
def prune_datain(datain):
    errorc = len(datain[datain["decision"].str.contains("ERROR|error") == True])
    print("Removing {} errors values from datain.".format(errorc))
    datain = datain[datain["decision"].str.contains("ERROR|error") == False]

    hzdiffc = len(datain[datain["hzdiff"] != 0])
    print("Deciding {} hzdiff hosts as non-sib, stats:".format(hzdiffc))
    lbl = datain[datain["hzdiff"] != 0]["label"]
    prd = lbl.copy()
    prd[:] = 0
    stats(lbl,prd)
    dataout = datain[datain["hzdiff"] == 0]
    return dataout, lbl, prd

def prune_data_for_ml(datain):
    # problem: NaNs might be in non-feature columns such as RA_ID
    erridx = datain[datain.decision.str.contains("ERROR|error") == True].index
    labels, features = make_labels_features(datain)
    naidx = datain[features.isnull().any(axis=1) == True].index
    bothidx = erridx | naidx
    dataout = datain.drop(bothidx)
    lbl = datain.loc[bothidx, "label"]
    prd = lbl.copy()
    prd[:] = 0
    stats(lbl,prd)    
    print("Removing {} rows with error results and {} rows with NaNs (typically hz different) from a \
    total of {} entries, resulting in {} entries.".format(
            len(erridx), len(naidx), len(datain), len(dataout)))
    return dataout, lbl, prd


def stats(lbl, prd):
        tp = np.sum((lbl == 1) & (prd == 1)) 
        fp = np.sum(lbl < prd ) 
        tn = np.sum((lbl == 0) & (prd == 0)) 
        fn = np.sum(lbl > prd ) 
        try:
            prec =  round(100*tp/(tp+fp),2) # TPR?
            recall = round(100*tp/(tp+fn),2) 
            spec= round(100*tn/(tn+fp),2) # TNR?
            acc = round(100*(tn+tp)/(tn+fn+fp+tp),2)
        except ZeroDivisionError as e:
            print("Catching ZeroDivisionError at stats!")
            prec = 0
            recall = 0
            spec = 0
            acc = 0
        print("Correct: {}, incorrect {}, TP {}, FP {}, TN {}, FN{}, Prec. {}, Rec. {}, Spec. {}, Acc. {}%".format(
        np.sum(lbl == prd),
        np.sum(lbl != prd),
        tp, fp, tn, fn, 
        prec, recall, spec, acc
        ))
        return prec, recall, spec, acc
        
def make_labels_features(dfin):
    labels = dfin["label"]
    features = dfin[["hzdiff", "hzr2diff", "timestamps_diff", "adiff", 
                        "theta", "r2diff", "ott_rng_diff_rel", "optsdiff",
                       "perc_85_val"]].copy()
    features["hzr2mean"] = (dfin["hz4r2"] + dfin["hz6r2"])  / 2.0
    features["r2mean"] = (dfin["r4_sqr"] + dfin["r6_sqr"]) / 2.0     
    features["ott_rng_mean"] = (dfin["ott4_rng"] + dfin["ott6_rng"]) / 2.0
    features["splinediff_scaled"] = dfin["perc_85_val"] / features["ott_rng_mean"]
    return labels, features   

def get_sample_weight(sib, nonsib):
    sl = len(sib)
    nsl = len(nonsib)
    tl = sl + nsl
    nsw = sl / tl
    sw = nsl / tl
    print("Found {} sibs and {} nonsibs, weights: {} and {}".format(sl, nsl, sw, nsw))
    weight = np.zeros(len(datain))
    weight = np.float32(datain["label"].as_matrix())
    weight[weight == 1] = sw
    weight[weight == 0] = nsw
    
    
def get_sample_weight_one_input(dfin):
    sl = len(dfin[dfin["label"] == 1])
    nsl = len(dfin[dfin["label"] == 0])
    tl = sl + nsl
    nsw = sl / tl
    sw = nsl / tl
    weight = np.zeros(len(dfin))
    weight = np.float32(dfin["label"].as_matrix())
    weight[weight == 1] = sw
    weight[weight == 0] = nsw
    print("Found {} sibs and {} nonsibs, weights: {} and {}, #weights: {}".format(
        sl, nsl, round(sw,4), round(nsw,4), len(weight)))
    return weight


# functions for ML with proprtional group sampling
def split_stratified_groups(sib, splits, nr):
    from sklearn.model_selection import KFold # non-overlapping!
    groups = assign_groups(sib)
    groupset = set(groups)
    gsibdf_train = pd.DataFrame(columns=sib.columns)
    gsibdf_test = pd.DataFrame(columns=sib.columns)
    for i in groupset:
        groupsib = sib[sib["group"] == i].copy()
        if len(groupsib ) < splits:
            # can not split into more folds than files...
            print("ERROR: more splits ({}) than samples ({}), reducing to sample nr".format(splits, len(groupsib)))
            splits = len(groupsib)
        #print("## GROUP: {} with {} elements.".format(i, len(groupsib)))
        ks = KFold(n_splits=splits, random_state=42, shuffle=True)
        labels, features = make_labels_features(groupsib)
        ctr = -1
        for train_index, test_index in ks.split(groupsib):
            ctr += 1                
            if (ctr == nr):
            #print("TRAIN:", train_index, "TEST:", test_index)
                gsibdf_train = gsibdf_train.append(groupsib.iloc[train_index])
                gsibdf_test = gsibdf_test.append(groupsib.iloc[test_index])
                break
    return [gsibdf_train, gsibdf_test]


def dt_train(labels, features, weight, rs=42):
    estimator = DecisionTreeClassifier(max_depth=30, min_samples_leaf=5, random_state=42)
    est = estimator.fit(features, labels, sample_weight=weight)
    return est

def kfold_train_test(sib, nonsib):
    kfolds = 10
    stats_train_error = np.empty((10,5), dtype=float)
    stats_test_error = np.empty((10,5), dtype=float)
    graphs = []
    for i in range(10):
        print("Round {}".format(i))
        # pick proportionally from each group
        train_sib, test_sib = split_stratified_groups(sib, 10, i)
        # create, select, and mix matching nonsibs
        train_nonsib = match_nonsibs(train_sib, nonsib)
        test_nonsib = match_nonsibs(test_sib, nonsib)
        train = mix_sib_nonsib(train_sib,train_nonsib, "all")
        # prune NaNs out
        train, train_prune_lbl, train_prune_prd = prune_data_for_ml(train)
        test = mix_sib_nonsib(test_sib,test_nonsib, "all")
        test, test_prune_lbl, test_prune_prd = prune_data_for_ml(test)
        # split out features, labels, and weights
        train_lbl, train_ftr = make_labels_features(train)
        test_lbl, test_ftr = make_labels_features(test)
        train_weight = get_sample_weight_one_input(train)
        test_weight = get_sample_weight_one_input(test)
        # train estimator
        est = dt_train(train_lbl, train_ftr, train_weight)   
        mcc = matthews_corrcoef(train_lbl, est.predict(train_ftr), sample_weight=train_weight)
        statsv = list(stats(train_lbl, est.predict(train_ftr)))
        statsv.append(mcc)
        stats_train_error[i] = statsv
        #print("test error: mcc of {}".format(mcc))
        mcc = matthews_corrcoef(test_lbl, est.predict(test_ftr), sample_weight=test_weight)
        statsv = list(stats(test_lbl, est.predict(test_ftr)))
        statsv.append(mcc)
        stats_test_error[i] = statsv
        #stats_test_error[i]  =  stats(test_lbl, est.predict(test_ftr))
        graph = dt_plot(est, train_ftr)
        graphs.append(graph)
        #Image(graph.create_png())  
    return stats_train_error, stats_test_error, graphs

One function to evaluate them all



In [867]:

    
runs = 8
hlostats = np.zeros((runs-1,5), dtype=float)
hlbstats = np.zeros((runs-1,5), dtype=float)
gsdo =  dict() # group stats dict
gsdb =  dict() # group stats dict
mlstatsd_tee = dict()
mlstatsd_tre = dict()
graphs = []
for i in range(1,runs):
    print("############# Round {} ##############".format(i))
    sib, nonsib = get_pd_files("../../../gt{}/".format(i))
    # high level
    hlostats[i - 1] = tuple(get_ouralgo_stats(sib, nonsib))
    hlbstats[i - 1] = get_bev_stats(sib, nonsib)
    
    # group-level
    groups = assign_groups(sib)
    groupset = set(groups)
    for j in groupset:
        if j not in gsdo:
            gsdo[j] = np.zeros((runs,5), dtype=float)
            gsdb[j] = np.zeros((runs,5), dtype=float)
        print("## GROUP: {}".format(j))
        groupsib = sib[sib["group"] == j].copy()
        groupnonsib = match_nonsibs(groupsib, nonsib)
        gsdo[j][i-1] = get_ouralgo_stats(groupsib, groupnonsib)
        gsdb[j][i-1] = get_bev_stats(groupsib, groupnonsib)
    # decision-tree
    mlstatsd_tre[str(i)+"_tre"], mlstatsd_tee[str(i)+"_tee"], graph = kfold_train_test(sib, nonsib) # returns 2 sets of 10x4 arrays
    graphs.append(graph)









    



############# Round 1 ##############
Loading from filenames ../../../gt1/hosts.csvcapture.pcap.ts.siblingresult.csv and ../../../gt1/hosts.csv__nonsiblings_seed1_n679capture.pcap.ts.siblingresult.csv
Read 279 siblings and 82026 non-siblings from files.
Our algo: Not deciding on 70 pairs for unknown/error reasons.
Found 261 sibs and 81974 nonsibs, weights: 0.9968 and 0.0032, #weights: 82235
Our algo stats: (70) undecided, mcc: 0.988402745932266, f1: 0.9883720930232558
Correct: 82229, incorrect 6, TP 255, FP 0, TN 81974, FN6, Prec. 100.0, Rec. 97.7, Spec. 100.0, Acc. 99.99%
Found 279 sibs and 81974 nonsibs, weights: 0.9966 and 0.0034, #weights: 82253
Beverly algo: Not deciding on 52 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.10670720121610743, f1: 0.029152684563758385
Correct: 63737, incorrect 18516, TP 278, FP 18515, TN 63459, FN1, Prec. 1.48, Rec. 99.64, Spec. 77.41, Acc. 77.49%
## GROUP: nlnog
Generated 72630 non-sibling candidates from 270 siblings.
Our algo: Not deciding on 66 pairs for unknown/error reasons.
Found 252 sibs and 72582 nonsibs, weights: 0.9965 and 0.0035, #weights: 72834
Our algo stats: (66) undecided, mcc: 0.9940092551257894, f1: 0.9940119760479043
Correct: 72831, incorrect 3, TP 249, FP 0, TN 72582, FN3, Prec. 100.0, Rec. 98.81, Spec. 100.0, Acc. 100.0%
Found 270 sibs and 72582 nonsibs, weights: 0.9963 and 0.0037, #weights: 72852
Beverly algo: Not deciding on 48 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.11249292431428837, f1: 0.03221942747634447
Correct: 56692, incorrect 16160, TP 269, FP 16159, TN 56423, FN1, Prec. 1.64, Rec. 99.63, Spec. 77.74, Acc. 77.82%
## GROUP: servers
Generated 72 non-sibling candidates from 9 siblings.
Our algo: Not deciding on 0 pairs for unknown/error reasons.
Found 9 sibs and 72 nonsibs, weights: 0.8889 and 0.1111, #weights: 81
Our algo stats: (0) undecided, mcc: 0.7999999999999992, f1: 0.8
Correct: 78, incorrect 3, TP 6, FP 0, TN 72, FN3, Prec. 100.0, Rec. 66.67, Spec. 100.0, Acc. 96.3%
Found 9 sibs and 72 nonsibs, weights: 0.8889 and 0.1111, #weights: 81
Beverly algo: Not deciding on 0 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.349215147884789, f1: 0.36
Correct: 49, incorrect 32, TP 9, FP 32, TN 40, FN0, Prec. 21.95, Rec. 100.0, Spec. 55.56, Acc. 60.49%
Round 0
ERROR: more splits (10) than samples (9), reducing to sample nr
Generated 62750 non-sibling candidates from 251 siblings.
Generated 756 non-sibling candidates from 28 siblings.






    



/usr/local/lib/python3.5/dist-packages/ipykernel/__main__.py:184: RuntimeWarning: invalid value encountered in long_scalars
/usr/local/lib/python3.5/dist-packages/ipykernel/__main__.py:185: RuntimeWarning: invalid value encountered in long_scalars






    



Correct: 4900, incorrect 0, TP 0, FP 0, TN 4900, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 48 rows with error results and 4900 rows with NaNs (typically hz different) from a     total of 63001 entries, resulting in 58101 entries.
Catching ZeroDivisionError at stats!
Correct: 0, incorrect 0, TP 0, FP 0, TN 0, FN0, Prec. 0, Rec. 0, Spec. 0, Acc. 0%
Removing 0 rows with error results and 0 rows with NaNs (typically hz different) from a     total of 784 entries, resulting in 784 entries.
Found 251 sibs and 57850 nonsibs, weights: 0.9957 and 0.0043, #weights: 58101
Found 28 sibs and 756 nonsibs, weights: 0.9643 and 0.0357, #weights: 784
Correct: 58101, incorrect 0, TP 251, FP 0, TN 57850, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 784, incorrect 0, TP 28, FP 0, TN 756, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 1
ERROR: more splits (10) than samples (9), reducing to sample nr
Generated 62750 non-sibling candidates from 251 siblings.
Generated 756 non-sibling candidates from 28 siblings.
Correct: 4428, incorrect 0, TP 0, FP 0, TN 4428, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 44 rows with error results and 4428 rows with NaNs (typically hz different) from a     total of 63001 entries, resulting in 58573 entries.
Correct: 54, incorrect 0, TP 0, FP 0, TN 54, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 54 rows with NaNs (typically hz different) from a     total of 784 entries, resulting in 730 entries.
Found 251 sibs and 58322 nonsibs, weights: 0.9957 and 0.0043, #weights: 58573
Found 28 sibs and 702 nonsibs, weights: 0.9616 and 0.0384, #weights: 730
Correct: 58573, incorrect 0, TP 251, FP 0, TN 58322, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 730, incorrect 0, TP 28, FP 0, TN 702, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 2
ERROR: more splits (10) than samples (9), reducing to sample nr
Generated 62750 non-sibling candidates from 251 siblings.
Generated 756 non-sibling candidates from 28 siblings.
Correct: 4898, incorrect 0, TP 0, FP 0, TN 4898, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 46 rows with error results and 4898 rows with NaNs (typically hz different) from a     total of 63001 entries, resulting in 58103 entries.
Catching ZeroDivisionError at stats!
Correct: 0, incorrect 0, TP 0, FP 0, TN 0, FN0, Prec. 0, Rec. 0, Spec. 0, Acc. 0%
Removing 0 rows with error results and 0 rows with NaNs (typically hz different) from a     total of 784 entries, resulting in 784 entries.
Found 251 sibs and 57852 nonsibs, weights: 0.9957 and 0.0043, #weights: 58103
Found 28 sibs and 756 nonsibs, weights: 0.9643 and 0.0357, #weights: 784
Correct: 58103, incorrect 0, TP 251, FP 0, TN 57852, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 784, incorrect 0, TP 28, FP 0, TN 756, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 3
ERROR: more splits (10) than samples (9), reducing to sample nr
Generated 62750 non-sibling candidates from 251 siblings.
Generated 756 non-sibling candidates from 28 siblings.
Correct: 4414, incorrect 0, TP 0, FP 0, TN 4414, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 42 rows with error results and 4414 rows with NaNs (typically hz different) from a     total of 63001 entries, resulting in 58587 entries.
Correct: 54, incorrect 0, TP 0, FP 0, TN 54, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 54 rows with NaNs (typically hz different) from a     total of 784 entries, resulting in 730 entries.
Found 251 sibs and 58336 nonsibs, weights: 0.9957 and 0.0043, #weights: 58587
Found 28 sibs and 702 nonsibs, weights: 0.9616 and 0.0384, #weights: 730
Correct: 58587, incorrect 0, TP 251, FP 0, TN 58336, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 730, incorrect 0, TP 28, FP 0, TN 702, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 4
ERROR: more splits (10) than samples (9), reducing to sample nr
Generated 62750 non-sibling candidates from 251 siblings.
Generated 756 non-sibling candidates from 28 siblings.
Correct: 4428, incorrect 0, TP 0, FP 0, TN 4428, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 44 rows with error results and 4428 rows with NaNs (typically hz different) from a     total of 63001 entries, resulting in 58573 entries.
Correct: 54, incorrect 0, TP 0, FP 0, TN 54, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 54 rows with NaNs (typically hz different) from a     total of 784 entries, resulting in 730 entries.
Found 251 sibs and 58322 nonsibs, weights: 0.9957 and 0.0043, #weights: 58573
Found 28 sibs and 702 nonsibs, weights: 0.9616 and 0.0384, #weights: 730
Correct: 58573, incorrect 0, TP 251, FP 0, TN 58322, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 730, incorrect 0, TP 28, FP 0, TN 702, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 5
ERROR: more splits (10) than samples (9), reducing to sample nr
Generated 62750 non-sibling candidates from 251 siblings.
Generated 756 non-sibling candidates from 28 siblings.
Correct: 3958, incorrect 0, TP 0, FP 0, TN 3958, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 46 rows with error results and 3958 rows with NaNs (typically hz different) from a     total of 63001 entries, resulting in 59043 entries.
Correct: 104, incorrect 0, TP 0, FP 0, TN 104, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 104 rows with NaNs (typically hz different) from a     total of 784 entries, resulting in 680 entries.
Found 251 sibs and 58792 nonsibs, weights: 0.9957 and 0.0043, #weights: 59043
Found 28 sibs and 652 nonsibs, weights: 0.9588 and 0.0412, #weights: 680
Correct: 59043, incorrect 0, TP 251, FP 0, TN 58792, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 680, incorrect 0, TP 28, FP 0, TN 652, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 6
ERROR: more splits (10) than samples (9), reducing to sample nr
Generated 62750 non-sibling candidates from 251 siblings.
Generated 756 non-sibling candidates from 28 siblings.
Correct: 4418, incorrect 0, TP 0, FP 0, TN 4418, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 46 rows with error results and 4418 rows with NaNs (typically hz different) from a     total of 63001 entries, resulting in 58583 entries.
Correct: 54, incorrect 0, TP 0, FP 0, TN 54, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 54 rows with NaNs (typically hz different) from a     total of 784 entries, resulting in 730 entries.
Found 251 sibs and 58332 nonsibs, weights: 0.9957 and 0.0043, #weights: 58583
Found 28 sibs and 702 nonsibs, weights: 0.9616 and 0.0384, #weights: 730
Correct: 58583, incorrect 0, TP 251, FP 0, TN 58332, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 730, incorrect 0, TP 28, FP 0, TN 702, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 7
ERROR: more splits (10) than samples (9), reducing to sample nr
Generated 62750 non-sibling candidates from 251 siblings.
Generated 756 non-sibling candidates from 28 siblings.
Correct: 4426, incorrect 0, TP 0, FP 0, TN 4426, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 42 rows with error results and 4426 rows with NaNs (typically hz different) from a     total of 63001 entries, resulting in 58575 entries.
Correct: 54, incorrect 0, TP 0, FP 0, TN 54, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 54 rows with NaNs (typically hz different) from a     total of 784 entries, resulting in 730 entries.
Found 251 sibs and 58324 nonsibs, weights: 0.9957 and 0.0043, #weights: 58575
Found 28 sibs and 702 nonsibs, weights: 0.9616 and 0.0384, #weights: 730
Correct: 58575, incorrect 0, TP 251, FP 0, TN 58324, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 730, incorrect 0, TP 28, FP 0, TN 702, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 8
ERROR: more splits (10) than samples (9), reducing to sample nr
Generated 62750 non-sibling candidates from 251 siblings.
Generated 756 non-sibling candidates from 28 siblings.
Correct: 3959, incorrect 0, TP 0, FP 0, TN 3959, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 47 rows with error results and 3959 rows with NaNs (typically hz different) from a     total of 63001 entries, resulting in 59042 entries.
Correct: 104, incorrect 0, TP 0, FP 0, TN 104, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 104 rows with NaNs (typically hz different) from a     total of 784 entries, resulting in 680 entries.
Found 251 sibs and 58791 nonsibs, weights: 0.9957 and 0.0043, #weights: 59042
Found 28 sibs and 652 nonsibs, weights: 0.9588 and 0.0412, #weights: 680
Correct: 59042, incorrect 0, TP 251, FP 0, TN 58791, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 680, incorrect 0, TP 28, FP 0, TN 652, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 9
ERROR: more splits (10) than samples (9), reducing to sample nr
Generated 58806 non-sibling candidates from 243 siblings.
Generated 702 non-sibling candidates from 27 siblings.
Correct: 3304, incorrect 0, TP 0, FP 0, TN 3304, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 3304 rows with NaNs (typically hz different) from a     total of 59049 entries, resulting in 55745 entries.
Correct: 57, incorrect 0, TP 0, FP 0, TN 57, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 5 rows with error results and 57 rows with NaNs (typically hz different) from a     total of 729 entries, resulting in 672 entries.
Found 243 sibs and 55502 nonsibs, weights: 0.9956 and 0.0044, #weights: 55745
Found 27 sibs and 645 nonsibs, weights: 0.9598 and 0.0402, #weights: 672
Correct: 55745, incorrect 0, TP 243, FP 0, TN 55502, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 672, incorrect 0, TP 27, FP 0, TN 645, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
############# Round 2 ##############
Loading from filenames ../../../gt2/hosts.csvcapture.pcap.ts.siblingresult.csv and ../../../gt2/hosts.csv__nonsiblings_seed1_n679capture.pcap.ts.siblingresult.csv
Read 624 siblings and 410626 non-siblings from files.
Our algo: Not deciding on 124 pairs for unknown/error reasons.
Found 552 sibs and 410574 nonsibs, weights: 0.9987 and 0.0013, #weights: 411126
Our algo stats: (124) undecided, mcc: 0.980775474614777, f1: 0.9806807727690893
Correct: 411105, incorrect 21, TP 533, FP 2, TN 410572, FN19, Prec. 99.63, Rec. 96.56, Spec. 100.0, Acc. 99.99%
Found 622 sibs and 410575 nonsibs, weights: 0.9985 and 0.0015, #weights: 411197
Beverly algo: Not deciding on 53 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.08042930763160049, f1: 0.015917988872862148
Correct: 334785, incorrect 76412, TP 618, FP 76408, TN 334167, FN4, Prec. 0.8, Rec. 99.36, Spec. 81.39, Acc. 81.42%
## GROUP: RAv1
Generated 132 non-sibling candidates from 12 siblings.
Our algo: Not deciding on 0 pairs for unknown/error reasons.
Found 12 sibs and 132 nonsibs, weights: 0.9167 and 0.0833, #weights: 144
Our algo stats: (0) undecided, mcc: 1.0, f1: 1.0
Correct: 144, incorrect 0, TP 12, FP 0, TN 132, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Found 12 sibs and 132 nonsibs, weights: 0.9167 and 0.0833, #weights: 144
Beverly algo: Not deciding on 0 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.0, f1: 0.15384615384615385
Correct: 12, incorrect 132, TP 12, FP 132, TN 0, FN0, Prec. 8.33, Rec. 100.0, Spec. 0.0, Acc. 8.33%
## GROUP: RAv2






    



/usr/local/lib/python3.5/dist-packages/sklearn/metrics/classification.py:516: RuntimeWarning: invalid value encountered in double_scalars
  mcc = cov_ytyp / np.sqrt(var_yt * var_yp)






    



Generated 42642 non-sibling candidates from 207 siblings.
Our algo: Not deciding on 4 pairs for unknown/error reasons.
Found 205 sibs and 42640 nonsibs, weights: 0.9952 and 0.0048, #weights: 42845
Our algo stats: (4) undecided, mcc: 0.9802498011436569, f1: 0.9802955665024631
Correct: 42837, incorrect 8, TP 199, FP 2, TN 42638, FN6, Prec. 99.0, Rec. 97.07, Spec. 100.0, Acc. 99.98%
Found 205 sibs and 42640 nonsibs, weights: 0.9952 and 0.0048, #weights: 42845
Beverly algo: Not deciding on 4 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.05440088590079272, f1: 0.015414840562188304
Correct: 16785, incorrect 26060, TP 204, FP 26059, TN 16581, FN1, Prec. 0.78, Rec. 99.51, Spec. 38.89, Acc. 39.18%
## GROUP: nlnog
Generated 145542 non-sibling candidates from 382 siblings.
Our algo: Not deciding on 114 pairs for unknown/error reasons.
Found 312 sibs and 145498 nonsibs, weights: 0.9979 and 0.0021, #weights: 145810
Our algo stats: (114) undecided, mcc: 0.9838100405673874, f1: 0.9837133550488599
Correct: 145800, incorrect 10, TP 302, FP 0, TN 145498, FN10, Prec. 100.0, Rec. 96.79, Spec. 100.0, Acc. 99.99%
Found 382 sibs and 145498 nonsibs, weights: 0.9974 and 0.0026, #weights: 145880
Beverly algo: Not deciding on 44 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.08510041061435372, f1: 0.019642394402694996
Correct: 108048, incorrect 37832, TP 379, FP 37829, TN 107669, FN3, Prec. 0.99, Rec. 99.21, Spec. 74.0, Acc. 74.07%
## GROUP: servers
Generated 506 non-sibling candidates from 23 siblings.
Our algo: Not deciding on 0 pairs for unknown/error reasons.
Found 23 sibs and 506 nonsibs, weights: 0.9565 and 0.0435, #weights: 529
Our algo stats: (0) undecided, mcc: 0.9297526975413737, f1: 0.9302325581395349
Correct: 526, incorrect 3, TP 20, FP 0, TN 506, FN3, Prec. 100.0, Rec. 86.96, Spec. 100.0, Acc. 99.43%
Found 23 sibs and 506 nonsibs, weights: 0.9565 and 0.0435, #weights: 529
Beverly algo: Not deciding on 0 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.1403897717142205, f1: 0.1173469387755102
Correct: 183, incorrect 346, TP 23, FP 346, TN 160, FN0, Prec. 6.23, Rec. 100.0, Spec. 31.62, Acc. 34.59%
Round 0
Generated 311922 non-sibling candidates from 559 siblings.
Generated 4160 non-sibling candidates from 65 siblings.
Correct: 166135, incorrect 3, TP 0, FP 0, TN 166135, FN3, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 45 rows with error results and 166138 rows with NaNs (typically hz different) from a     total of 312481 entries, resulting in 146343 entries.
Correct: 2392, incorrect 0, TP 0, FP 0, TN 2392, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 2392 rows with NaNs (typically hz different) from a     total of 4225 entries, resulting in 1833 entries.
Found 556 sibs and 145787 nonsibs, weights: 0.9962 and 0.0038, #weights: 146343
Found 65 sibs and 1768 nonsibs, weights: 0.9645 and 0.0355, #weights: 1833
Correct: 146339, incorrect 4, TP 556, FP 4, TN 145783, FN0, Prec. 99.29, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1833, incorrect 0, TP 65, FP 0, TN 1768, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 1
Generated 311922 non-sibling candidates from 559 siblings.
Generated 4160 non-sibling candidates from 65 siblings.
Correct: 166309, incorrect 2, TP 0, FP 0, TN 166309, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 43 rows with error results and 166311 rows with NaNs (typically hz different) from a     total of 312481 entries, resulting in 146170 entries.
Correct: 2378, incorrect 1, TP 0, FP 0, TN 2378, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 99.96%
Removing 0 rows with error results and 2379 rows with NaNs (typically hz different) from a     total of 4225 entries, resulting in 1846 entries.
Found 557 sibs and 145613 nonsibs, weights: 0.9962 and 0.0038, #weights: 146170
Found 64 sibs and 1782 nonsibs, weights: 0.9653 and 0.0347, #weights: 1846
Correct: 146166, incorrect 4, TP 557, FP 4, TN 145609, FN0, Prec. 99.29, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1845, incorrect 1, TP 63, FP 0, TN 1782, FN1, Prec. 100.0, Rec. 98.44, Spec. 100.0, Acc. 99.95%
Round 2
Generated 314160 non-sibling candidates from 561 siblings.
Generated 3906 non-sibling candidates from 63 siblings.
Correct: 169518, incorrect 3, TP 0, FP 0, TN 169518, FN3, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 47 rows with error results and 169521 rows with NaNs (typically hz different) from a     total of 314721 entries, resulting in 145200 entries.
Correct: 2014, incorrect 0, TP 0, FP 0, TN 2014, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 2014 rows with NaNs (typically hz different) from a     total of 3969 entries, resulting in 1955 entries.
Found 558 sibs and 144642 nonsibs, weights: 0.9962 and 0.0038, #weights: 145200
Found 63 sibs and 1892 nonsibs, weights: 0.9678 and 0.0322, #weights: 1955
Correct: 145194, incorrect 6, TP 558, FP 6, TN 144636, FN0, Prec. 98.94, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1955, incorrect 0, TP 63, FP 0, TN 1892, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 3
Generated 315282 non-sibling candidates from 562 siblings.
Generated 3782 non-sibling candidates from 62 siblings.
Correct: 168783, incorrect 3, TP 0, FP 0, TN 168783, FN3, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 37 rows with error results and 168786 rows with NaNs (typically hz different) from a     total of 315844 entries, resulting in 147058 entries.
Correct: 2079, incorrect 0, TP 0, FP 0, TN 2079, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 1 rows with error results and 2079 rows with NaNs (typically hz different) from a     total of 3844 entries, resulting in 1765 entries.
Found 559 sibs and 146499 nonsibs, weights: 0.9962 and 0.0038, #weights: 147058
Found 62 sibs and 1703 nonsibs, weights: 0.9649 and 0.0351, #weights: 1765
Correct: 147054, incorrect 4, TP 559, FP 4, TN 146495, FN0, Prec. 99.29, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1765, incorrect 0, TP 62, FP 0, TN 1703, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 4
Generated 315282 non-sibling candidates from 562 siblings.
Generated 3782 non-sibling candidates from 62 siblings.
Correct: 169939, incorrect 3, TP 0, FP 0, TN 169939, FN3, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 44 rows with error results and 169942 rows with NaNs (typically hz different) from a     total of 315844 entries, resulting in 145902 entries.
Correct: 1970, incorrect 0, TP 0, FP 0, TN 1970, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 1970 rows with NaNs (typically hz different) from a     total of 3844 entries, resulting in 1874 entries.
Found 559 sibs and 145343 nonsibs, weights: 0.9962 and 0.0038, #weights: 145902
Found 62 sibs and 1812 nonsibs, weights: 0.9669 and 0.0331, #weights: 1874
Correct: 145898, incorrect 4, TP 559, FP 4, TN 145339, FN0, Prec. 99.29, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1874, incorrect 0, TP 62, FP 0, TN 1812, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 5
Generated 315282 non-sibling candidates from 562 siblings.
Generated 3782 non-sibling candidates from 62 siblings.
Correct: 168832, incorrect 3, TP 0, FP 0, TN 168832, FN3, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 43 rows with error results and 168835 rows with NaNs (typically hz different) from a     total of 315844 entries, resulting in 147009 entries.
Correct: 2086, incorrect 0, TP 0, FP 0, TN 2086, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 2 rows with error results and 2086 rows with NaNs (typically hz different) from a     total of 3844 entries, resulting in 1758 entries.
Found 559 sibs and 146450 nonsibs, weights: 0.9962 and 0.0038, #weights: 147009
Found 62 sibs and 1696 nonsibs, weights: 0.9647 and 0.0353, #weights: 1758
Correct: 147005, incorrect 4, TP 559, FP 4, TN 146446, FN0, Prec. 99.29, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1758, incorrect 0, TP 62, FP 0, TN 1696, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 6
Generated 315282 non-sibling candidates from 562 siblings.
Generated 3782 non-sibling candidates from 62 siblings.
Correct: 168811, incorrect 3, TP 0, FP 0, TN 168811, FN3, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 42 rows with error results and 168814 rows with NaNs (typically hz different) from a     total of 315844 entries, resulting in 147030 entries.
Correct: 2095, incorrect 0, TP 0, FP 0, TN 2095, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 1 rows with error results and 2095 rows with NaNs (typically hz different) from a     total of 3844 entries, resulting in 1749 entries.
Found 559 sibs and 146471 nonsibs, weights: 0.9962 and 0.0038, #weights: 147030
Found 62 sibs and 1687 nonsibs, weights: 0.9646 and 0.0354, #weights: 1749
Correct: 147024, incorrect 6, TP 559, FP 6, TN 146465, FN0, Prec. 98.94, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1749, incorrect 0, TP 62, FP 0, TN 1687, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 7
Generated 316406 non-sibling candidates from 563 siblings.
Generated 3660 non-sibling candidates from 61 siblings.
Correct: 170243, incorrect 3, TP 0, FP 0, TN 170243, FN3, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 44 rows with error results and 170246 rows with NaNs (typically hz different) from a     total of 316969 entries, resulting in 146723 entries.
Correct: 1938, incorrect 0, TP 0, FP 0, TN 1938, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 1938 rows with NaNs (typically hz different) from a     total of 3721 entries, resulting in 1783 entries.
Found 560 sibs and 146163 nonsibs, weights: 0.9962 and 0.0038, #weights: 146723
Found 61 sibs and 1722 nonsibs, weights: 0.9658 and 0.0342, #weights: 1783
Correct: 146719, incorrect 4, TP 560, FP 4, TN 146159, FN0, Prec. 99.29, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1783, incorrect 0, TP 61, FP 0, TN 1722, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 8
Generated 316406 non-sibling candidates from 563 siblings.
Generated 3660 non-sibling candidates from 61 siblings.
Correct: 170484, incorrect 2, TP 0, FP 0, TN 170484, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 19 rows with error results and 170486 rows with NaNs (typically hz different) from a     total of 316969 entries, resulting in 146483 entries.
Correct: 1909, incorrect 1, TP 0, FP 0, TN 1909, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 99.95%
Removing 2 rows with error results and 1910 rows with NaNs (typically hz different) from a     total of 3721 entries, resulting in 1811 entries.
Found 561 sibs and 145922 nonsibs, weights: 0.9962 and 0.0038, #weights: 146483
Found 60 sibs and 1751 nonsibs, weights: 0.9669 and 0.0331, #weights: 1811
Correct: 146477, incorrect 6, TP 561, FP 6, TN 145916, FN0, Prec. 98.94, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1811, incorrect 0, TP 60, FP 0, TN 1751, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 9
Generated 316406 non-sibling candidates from 563 siblings.
Generated 3660 non-sibling candidates from 61 siblings.
Correct: 170062, incorrect 2, TP 0, FP 0, TN 170062, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 44 rows with error results and 170064 rows with NaNs (typically hz different) from a     total of 316969 entries, resulting in 146905 entries.
Correct: 1951, incorrect 1, TP 0, FP 0, TN 1951, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 99.95%
Removing 2 rows with error results and 1952 rows with NaNs (typically hz different) from a     total of 3721 entries, resulting in 1769 entries.
Found 561 sibs and 146344 nonsibs, weights: 0.9962 and 0.0038, #weights: 146905
Found 60 sibs and 1709 nonsibs, weights: 0.9661 and 0.0339, #weights: 1769
Correct: 146901, incorrect 4, TP 561, FP 4, TN 146340, FN0, Prec. 99.29, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1769, incorrect 0, TP 60, FP 0, TN 1709, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
############# Round 3 ##############
Loading from filenames ../../../gt3/hosts.csvcapture.pcap.ts.siblingresult.csv and ../../../gt3/hosts.csv__nonsiblings_seed1_n679capture.pcap.ts.siblingresult.csv
Read 625 siblings and 411909 non-siblings from files.
Our algo: Not deciding on 1412 pairs for unknown/error reasons.
Found 529 sibs and 410593 nonsibs, weights: 0.9987 and 0.0013, #weights: 411122
Our algo stats: (1412) undecided, mcc: 0.9741213413846989, f1: 0.9739130434782609
Correct: 411095, incorrect 27, TP 504, FP 2, TN 410591, FN25, Prec. 99.6, Rec. 95.27, Spec. 100.0, Acc. 99.99%
Found 624 sibs and 410593 nonsibs, weights: 0.9985 and 0.0015, #weights: 411217
Beverly algo: Not deciding on 1317 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.0801639276960036, f1: 0.01584279855370581
Correct: 334188, incorrect 77029, TP 620, FP 77025, TN 333568, FN4, Prec. 0.8, Rec. 99.36, Spec. 81.24, Acc. 81.27%
## GROUP: RAv1
Generated 132 non-sibling candidates from 12 siblings.
Our algo: Not deciding on 0 pairs for unknown/error reasons.
Found 12 sibs and 132 nonsibs, weights: 0.9167 and 0.0833, #weights: 144
Our algo stats: (0) undecided, mcc: 1.0, f1: 1.0
Correct: 144, incorrect 0, TP 12, FP 0, TN 132, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Found 12 sibs and 132 nonsibs, weights: 0.9167 and 0.0833, #weights: 144
Beverly algo: Not deciding on 0 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.0, f1: 0.15384615384615385
Correct: 12, incorrect 132, TP 12, FP 132, TN 0, FN0, Prec. 8.33, Rec. 100.0, Spec. 0.0, Acc. 8.33%
## GROUP: RAv2
Generated 42642 non-sibling candidates from 207 siblings.
Our algo: Not deciding on 3 pairs for unknown/error reasons.
Found 207 sibs and 42639 nonsibs, weights: 0.9952 and 0.0048, #weights: 42846
Our algo stats: (3) undecided, mcc: 0.9729893273796464, f1: 0.972972972972973
Correct: 42835, incorrect 11, TP 198, FP 2, TN 42637, FN9, Prec. 99.0, Rec. 95.65, Spec. 100.0, Acc. 99.97%
Found 207 sibs and 42639 nonsibs, weights: 0.9952 and 0.0048, #weights: 42846
Beverly algo: Not deciding on 3 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.054671938147755984, f1: 0.015564202334630352
Correct: 16787, incorrect 26059, TP 206, FP 26058, TN 16581, FN1, Prec. 0.78, Rec. 99.52, Spec. 38.89, Acc. 39.18%
## GROUP: nlnog
Generated 145542 non-sibling candidates from 382 siblings.
Our algo: Not deciding on 873 pairs for unknown/error reasons.
Found 288 sibs and 144763 nonsibs, weights: 0.998 and 0.002, #weights: 145051
Our algo stats: (873) undecided, mcc: 0.9771260770335519, f1: 0.9769094138543517
Correct: 145038, incorrect 13, TP 275, FP 0, TN 144763, FN13, Prec. 100.0, Rec. 95.49, Spec. 100.0, Acc. 99.99%
Found 381 sibs and 144763 nonsibs, weights: 0.9974 and 0.0026, #weights: 145144
Beverly algo: Not deciding on 780 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.08461691742211067, f1: 0.019493579495642305
Correct: 107118, incorrect 38026, TP 378, FP 38023, TN 106740, FN3, Prec. 0.98, Rec. 99.21, Spec. 73.73, Acc. 73.8%
## GROUP: servers
Generated 552 non-sibling candidates from 24 siblings.
Our algo: Not deciding on 2 pairs for unknown/error reasons.
Found 22 sibs and 552 nonsibs, weights: 0.9617 and 0.0383, #weights: 574
Our algo stats: (2) undecided, mcc: 0.9268052972270169, f1: 0.9268292682926829
Correct: 571, incorrect 3, TP 19, FP 0, TN 552, FN3, Prec. 100.0, Rec. 86.36, Spec. 100.0, Acc. 99.48%
Found 24 sibs and 552 nonsibs, weights: 0.9583 and 0.0417, #weights: 576
Beverly algo: Not deciding on 0 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.13380139498238697, f1: 0.1111111111111111
Correct: 192, incorrect 384, TP 24, FP 384, TN 168, FN0, Prec. 5.88, Rec. 100.0, Spec. 30.43, Acc. 33.33%
Round 0
Generated 313040 non-sibling candidates from 560 siblings.
Generated 4160 non-sibling candidates from 65 siblings.
Correct: 167384, incorrect 2, TP 0, FP 0, TN 167384, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1135 rows with error results and 167386 rows with NaNs (typically hz different) from a     total of 313600 entries, resulting in 146214 entries.
Correct: 2304, incorrect 0, TP 0, FP 0, TN 2304, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 2304 rows with NaNs (typically hz different) from a     total of 4225 entries, resulting in 1921 entries.
Found 558 sibs and 145656 nonsibs, weights: 0.9962 and 0.0038, #weights: 146214
Found 65 sibs and 1856 nonsibs, weights: 0.9662 and 0.0338, #weights: 1921
Correct: 146210, incorrect 4, TP 558, FP 4, TN 145652, FN0, Prec. 99.29, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1920, incorrect 1, TP 64, FP 0, TN 1856, FN1, Prec. 100.0, Rec. 98.46, Spec. 100.0, Acc. 99.95%
Round 1
Generated 313040 non-sibling candidates from 560 siblings.
Generated 4160 non-sibling candidates from 65 siblings.
Correct: 167409, incorrect 2, TP 0, FP 0, TN 167409, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1137 rows with error results and 167411 rows with NaNs (typically hz different) from a     total of 313600 entries, resulting in 146189 entries.
Correct: 2296, incorrect 0, TP 0, FP 0, TN 2296, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 2296 rows with NaNs (typically hz different) from a     total of 4225 entries, resulting in 1929 entries.
Found 558 sibs and 145631 nonsibs, weights: 0.9962 and 0.0038, #weights: 146189
Found 65 sibs and 1864 nonsibs, weights: 0.9663 and 0.0337, #weights: 1929
Correct: 146181, incorrect 8, TP 558, FP 8, TN 145623, FN0, Prec. 98.59, Rec. 100.0, Spec. 99.99, Acc. 99.99%
Correct: 1929, incorrect 0, TP 65, FP 0, TN 1864, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 2
Generated 315282 non-sibling candidates from 562 siblings.
Generated 3906 non-sibling candidates from 63 siblings.
Correct: 167624, incorrect 1, TP 0, FP 0, TN 167624, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1141 rows with error results and 167625 rows with NaNs (typically hz different) from a     total of 315844 entries, resulting in 148219 entries.
Correct: 2246, incorrect 1, TP 0, FP 0, TN 2246, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 99.96%
Removing 0 rows with error results and 2247 rows with NaNs (typically hz different) from a     total of 3969 entries, resulting in 1722 entries.
Found 561 sibs and 147658 nonsibs, weights: 0.9962 and 0.0038, #weights: 148219
Found 62 sibs and 1660 nonsibs, weights: 0.964 and 0.036, #weights: 1722
Correct: 148213, incorrect 6, TP 561, FP 6, TN 147652, FN0, Prec. 98.94, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1722, incorrect 0, TP 62, FP 0, TN 1660, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 3
Generated 315282 non-sibling candidates from 562 siblings.
Generated 3906 non-sibling candidates from 63 siblings.
Correct: 168797, incorrect 2, TP 0, FP 0, TN 168797, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1140 rows with error results and 168799 rows with NaNs (typically hz different) from a     total of 315844 entries, resulting in 147045 entries.
Correct: 2142, incorrect 0, TP 0, FP 0, TN 2142, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 2142 rows with NaNs (typically hz different) from a     total of 3969 entries, resulting in 1827 entries.
Found 560 sibs and 146485 nonsibs, weights: 0.9962 and 0.0038, #weights: 147045
Found 63 sibs and 1764 nonsibs, weights: 0.9655 and 0.0345, #weights: 1827
Correct: 147041, incorrect 4, TP 560, FP 4, TN 146481, FN0, Prec. 99.29, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1825, incorrect 2, TP 63, FP 2, TN 1762, FN0, Prec. 96.92, Rec. 100.0, Spec. 99.89, Acc. 99.89%
Round 4
Generated 316406 non-sibling candidates from 563 siblings.
Generated 3782 non-sibling candidates from 62 siblings.
Correct: 169050, incorrect 2, TP 0, FP 0, TN 169050, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1131 rows with error results and 169052 rows with NaNs (typically hz different) from a     total of 316969 entries, resulting in 147917 entries.
Correct: 2111, incorrect 0, TP 0, FP 0, TN 2111, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 1 rows with error results and 2111 rows with NaNs (typically hz different) from a     total of 3844 entries, resulting in 1733 entries.
Found 561 sibs and 147356 nonsibs, weights: 0.9962 and 0.0038, #weights: 147917
Found 62 sibs and 1671 nonsibs, weights: 0.9642 and 0.0358, #weights: 1733
Correct: 147912, incorrect 5, TP 561, FP 5, TN 147351, FN0, Prec. 99.12, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1733, incorrect 0, TP 62, FP 0, TN 1671, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 5
Generated 316406 non-sibling candidates from 563 siblings.
Generated 3782 non-sibling candidates from 62 siblings.
Correct: 170188, incorrect 2, TP 0, FP 0, TN 170188, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1141 rows with error results and 170190 rows with NaNs (typically hz different) from a     total of 316969 entries, resulting in 146779 entries.
Correct: 1985, incorrect 0, TP 0, FP 0, TN 1985, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 1 rows with error results and 1985 rows with NaNs (typically hz different) from a     total of 3844 entries, resulting in 1859 entries.
Found 561 sibs and 146218 nonsibs, weights: 0.9962 and 0.0038, #weights: 146779
Found 62 sibs and 1797 nonsibs, weights: 0.9666 and 0.0334, #weights: 1859
Correct: 146774, incorrect 5, TP 561, FP 5, TN 146213, FN0, Prec. 99.12, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1859, incorrect 0, TP 62, FP 0, TN 1797, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 6
Generated 316406 non-sibling candidates from 563 siblings.
Generated 3782 non-sibling candidates from 62 siblings.
Correct: 170037, incorrect 2, TP 0, FP 0, TN 170037, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1143 rows with error results and 170039 rows with NaNs (typically hz different) from a     total of 316969 entries, resulting in 146930 entries.
Correct: 1994, incorrect 0, TP 0, FP 0, TN 1994, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 1994 rows with NaNs (typically hz different) from a     total of 3844 entries, resulting in 1850 entries.
Found 561 sibs and 146369 nonsibs, weights: 0.9962 and 0.0038, #weights: 146930
Found 62 sibs and 1788 nonsibs, weights: 0.9665 and 0.0335, #weights: 1850
Correct: 146925, incorrect 5, TP 561, FP 5, TN 146364, FN0, Prec. 99.12, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1850, incorrect 0, TP 62, FP 0, TN 1788, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 7
Generated 317532 non-sibling candidates from 564 siblings.
Generated 3660 non-sibling candidates from 61 siblings.
Correct: 170342, incorrect 1, TP 0, FP 0, TN 170342, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 15 rows with error results and 170343 rows with NaNs (typically hz different) from a     total of 318096 entries, resulting in 147753 entries.
Correct: 1962, incorrect 1, TP 0, FP 0, TN 1962, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 99.95%
Removing 121 rows with error results and 1963 rows with NaNs (typically hz different) from a     total of 3721 entries, resulting in 1758 entries.
Found 563 sibs and 147190 nonsibs, weights: 0.9962 and 0.0038, #weights: 147753
Found 60 sibs and 1698 nonsibs, weights: 0.9659 and 0.0341, #weights: 1758
Correct: 147745, incorrect 8, TP 563, FP 8, TN 147182, FN0, Prec. 98.6, Rec. 100.0, Spec. 99.99, Acc. 99.99%
Correct: 1758, incorrect 0, TP 60, FP 0, TN 1698, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 8
Generated 317532 non-sibling candidates from 564 siblings.
Generated 3660 non-sibling candidates from 61 siblings.
Correct: 171619, incorrect 2, TP 0, FP 0, TN 171619, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1147 rows with error results and 171621 rows with NaNs (typically hz different) from a     total of 318096 entries, resulting in 146475 entries.
Correct: 1816, incorrect 0, TP 0, FP 0, TN 1816, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 1816 rows with NaNs (typically hz different) from a     total of 3721 entries, resulting in 1905 entries.
Found 562 sibs and 145913 nonsibs, weights: 0.9962 and 0.0038, #weights: 146475
Found 61 sibs and 1844 nonsibs, weights: 0.968 and 0.032, #weights: 1905
Correct: 146467, incorrect 8, TP 562, FP 8, TN 145905, FN0, Prec. 98.6, Rec. 100.0, Spec. 99.99, Acc. 99.99%
Correct: 1905, incorrect 0, TP 61, FP 0, TN 1844, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 9
Generated 317532 non-sibling candidates from 564 siblings.
Generated 3660 non-sibling candidates from 61 siblings.
Correct: 170214, incorrect 2, TP 0, FP 0, TN 170214, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1145 rows with error results and 170216 rows with NaNs (typically hz different) from a     total of 318096 entries, resulting in 147880 entries.
Correct: 1984, incorrect 0, TP 0, FP 0, TN 1984, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 1984 rows with NaNs (typically hz different) from a     total of 3721 entries, resulting in 1737 entries.
Found 562 sibs and 147318 nonsibs, weights: 0.9962 and 0.0038, #weights: 147880
Found 61 sibs and 1676 nonsibs, weights: 0.9649 and 0.0351, #weights: 1737
Correct: 147875, incorrect 5, TP 562, FP 5, TN 147313, FN0, Prec. 99.12, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1737, incorrect 0, TP 61, FP 0, TN 1676, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
############# Round 4 ##############
Loading from filenames ../../../gt4/hosts.csvcapture.pcap.ts.siblingresult.csv and ../../../gt4/hosts.csv__nonsiblings_seed1_n679capture.pcap.ts.siblingresult.csv
Read 620 siblings and 408034 non-siblings from files.
Our algo: Not deciding on 1367 pairs for unknown/error reasons.
Found 591 sibs and 406696 nonsibs, weights: 0.9985 and 0.0015, #weights: 407287
Our algo stats: (1367) undecided, mcc: 0.9759962072315428, f1: 0.9758203799654578
Correct: 407259, incorrect 28, TP 565, FP 2, TN 406694, FN26, Prec. 99.65, Rec. 95.6, Spec. 100.0, Acc. 99.99%
Found 619 sibs and 406698 nonsibs, weights: 0.9985 and 0.0015, #weights: 407317
Beverly algo: Not deciding on 1337 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.08018782271325639, f1: 0.01585562359007412
Correct: 330972, incorrect 76345, TP 615, FP 76341, TN 330357, FN4, Prec. 0.8, Rec. 99.35, Spec. 81.23, Acc. 81.26%
## GROUP: RAv1
Generated 132 non-sibling candidates from 12 siblings.
Our algo: Not deciding on 0 pairs for unknown/error reasons.
Found 12 sibs and 132 nonsibs, weights: 0.9167 and 0.0833, #weights: 144
Our algo stats: (0) undecided, mcc: 1.0, f1: 1.0
Correct: 144, incorrect 0, TP 12, FP 0, TN 132, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Found 12 sibs and 132 nonsibs, weights: 0.9167 and 0.0833, #weights: 144
Beverly algo: Not deciding on 0 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.0, f1: 0.15384615384615385
Correct: 12, incorrect 132, TP 12, FP 132, TN 0, FN0, Prec. 8.33, Rec. 100.0, Spec. 0.0, Acc. 8.33%
## GROUP: RAv2
Generated 41006 non-sibling candidates from 203 siblings.
Our algo: Not deciding on 0 pairs for unknown/error reasons.
Found 203 sibs and 41006 nonsibs, weights: 0.9951 and 0.0049, #weights: 41209
Our algo stats: (0) undecided, mcc: 0.9647865310847772, f1: 0.9646464646464645
Correct: 41195, incorrect 14, TP 191, FP 2, TN 41004, FN12, Prec. 98.96, Rec. 94.09, Spec. 100.0, Acc. 99.97%
Found 203 sibs and 41006 nonsibs, weights: 0.9951 and 0.0049, #weights: 41209
Beverly algo: Not deciding on 0 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.05464444412875073, f1: 0.015746803866541943
Correct: 15957, incorrect 25252, TP 202, FP 25251, TN 15755, FN1, Prec. 0.79, Rec. 99.51, Spec. 38.42, Acc. 38.72%
## GROUP: nlnog
Generated 144780 non-sibling candidates from 381 siblings.
Our algo: Not deciding on 839 pairs for unknown/error reasons.
Found 353 sibs and 143969 nonsibs, weights: 0.9976 and 0.0024, #weights: 144322
Our algo stats: (839) undecided, mcc: 0.9842583543629252, f1: 0.9841726618705036
Correct: 144311, incorrect 11, TP 342, FP 0, TN 143969, FN11, Prec. 100.0, Rec. 96.88, Spec. 100.0, Acc. 99.99%
Found 380 sibs and 143970 nonsibs, weights: 0.9974 and 0.0026, #weights: 144350
Beverly algo: Not deciding on 811 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.08472052371957776, f1: 0.01954329851481299
Correct: 106523, incorrect 37827, TP 377, FP 37824, TN 106146, FN3, Prec. 0.99, Rec. 99.21, Spec. 73.73, Acc. 73.79%
## GROUP: servers
Generated 552 non-sibling candidates from 24 siblings.
Our algo: Not deciding on 1 pairs for unknown/error reasons.
Found 23 sibs and 552 nonsibs, weights: 0.96 and 0.04, #weights: 575
Our algo stats: (1) undecided, mcc: 0.9299811099505543, f1: 0.9302325581395349
Correct: 572, incorrect 3, TP 20, FP 0, TN 552, FN3, Prec. 100.0, Rec. 86.96, Spec. 100.0, Acc. 99.48%
Found 24 sibs and 552 nonsibs, weights: 0.9583 and 0.0417, #weights: 576
Beverly algo: Not deciding on 0 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.13380139498238694, f1: 0.1111111111111111
Correct: 192, incorrect 384, TP 24, FP 384, TN 168, FN0, Prec. 5.88, Rec. 100.0, Spec. 30.43, Acc. 33.33%
Round 0
Generated 307470 non-sibling candidates from 555 siblings.
Generated 4160 non-sibling candidates from 65 siblings.
Correct: 164587, incorrect 2, TP 0, FP 0, TN 164587, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1144 rows with error results and 164589 rows with NaNs (typically hz different) from a     total of 308025 entries, resulting in 143436 entries.
Correct: 2158, incorrect 0, TP 0, FP 0, TN 2158, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 2 rows with error results and 2158 rows with NaNs (typically hz different) from a     total of 4225 entries, resulting in 2067 entries.
Found 553 sibs and 142883 nonsibs, weights: 0.9961 and 0.0039, #weights: 143436
Found 65 sibs and 2002 nonsibs, weights: 0.9686 and 0.0314, #weights: 2067
Correct: 143429, incorrect 7, TP 553, FP 7, TN 142876, FN0, Prec. 98.75, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 2067, incorrect 0, TP 65, FP 0, TN 2002, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 1
Generated 308580 non-sibling candidates from 556 siblings.
Generated 4032 non-sibling candidates from 64 siblings.
Correct: 163503, incorrect 1, TP 0, FP 0, TN 163503, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1162 rows with error results and 163504 rows with NaNs (typically hz different) from a     total of 309136 entries, resulting in 145632 entries.
Correct: 2281, incorrect 1, TP 0, FP 0, TN 2281, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 99.96%
Removing 0 rows with error results and 2282 rows with NaNs (typically hz different) from a     total of 4096 entries, resulting in 1814 entries.
Found 555 sibs and 145077 nonsibs, weights: 0.9962 and 0.0038, #weights: 145632
Found 63 sibs and 1751 nonsibs, weights: 0.9653 and 0.0347, #weights: 1814
Correct: 145625, incorrect 7, TP 555, FP 7, TN 145070, FN0, Prec. 98.75, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1814, incorrect 0, TP 63, FP 0, TN 1751, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 2
Generated 309692 non-sibling candidates from 557 siblings.
Generated 3906 non-sibling candidates from 63 siblings.
Correct: 165435, incorrect 2, TP 0, FP 0, TN 165435, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1143 rows with error results and 165437 rows with NaNs (typically hz different) from a     total of 310249 entries, resulting in 144812 entries.
Correct: 2059, incorrect 0, TP 0, FP 0, TN 2059, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 5 rows with error results and 2059 rows with NaNs (typically hz different) from a     total of 3969 entries, resulting in 1910 entries.
Found 555 sibs and 144257 nonsibs, weights: 0.9962 and 0.0038, #weights: 144812
Found 63 sibs and 1847 nonsibs, weights: 0.967 and 0.033, #weights: 1910
Correct: 144808, incorrect 4, TP 555, FP 4, TN 144253, FN0, Prec. 99.28, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1910, incorrect 0, TP 63, FP 0, TN 1847, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 3
Generated 310806 non-sibling candidates from 558 siblings.
Generated 3782 non-sibling candidates from 62 siblings.
Correct: 163988, incorrect 1, TP 0, FP 0, TN 163988, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 43 rows with error results and 163989 rows with NaNs (typically hz different) from a     total of 311364 entries, resulting in 147375 entries.
Correct: 2205, incorrect 1, TP 0, FP 0, TN 2205, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 99.95%
Removing 124 rows with error results and 2206 rows with NaNs (typically hz different) from a     total of 3844 entries, resulting in 1638 entries.
Found 557 sibs and 146818 nonsibs, weights: 0.9962 and 0.0038, #weights: 147375
Found 61 sibs and 1577 nonsibs, weights: 0.9628 and 0.0372, #weights: 1638
Correct: 147373, incorrect 2, TP 557, FP 2, TN 146816, FN0, Prec. 99.64, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1637, incorrect 1, TP 60, FP 0, TN 1577, FN1, Prec. 100.0, Rec. 98.36, Spec. 100.0, Acc. 99.94%
Round 4
Generated 311922 non-sibling candidates from 559 siblings.
Generated 3660 non-sibling candidates from 61 siblings.
Correct: 165964, incorrect 2, TP 0, FP 0, TN 165964, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1171 rows with error results and 165966 rows with NaNs (typically hz different) from a     total of 312481 entries, resulting in 146515 entries.
Correct: 2000, incorrect 0, TP 0, FP 0, TN 2000, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 2000 rows with NaNs (typically hz different) from a     total of 3721 entries, resulting in 1721 entries.
Found 557 sibs and 145958 nonsibs, weights: 0.9962 and 0.0038, #weights: 146515
Found 61 sibs and 1660 nonsibs, weights: 0.9646 and 0.0354, #weights: 1721
Correct: 146510, incorrect 5, TP 557, FP 5, TN 145953, FN0, Prec. 99.11, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1721, incorrect 0, TP 61, FP 0, TN 1660, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 5
Generated 311922 non-sibling candidates from 559 siblings.
Generated 3660 non-sibling candidates from 61 siblings.
Correct: 166416, incorrect 2, TP 0, FP 0, TN 166416, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1166 rows with error results and 166418 rows with NaNs (typically hz different) from a     total of 312481 entries, resulting in 146063 entries.
Correct: 1954, incorrect 0, TP 0, FP 0, TN 1954, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 1954 rows with NaNs (typically hz different) from a     total of 3721 entries, resulting in 1767 entries.
Found 557 sibs and 145506 nonsibs, weights: 0.9962 and 0.0038, #weights: 146063
Found 61 sibs and 1706 nonsibs, weights: 0.9655 and 0.0345, #weights: 1767
Correct: 146058, incorrect 5, TP 557, FP 5, TN 145501, FN0, Prec. 99.11, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1767, incorrect 0, TP 61, FP 0, TN 1706, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 6
Generated 311922 non-sibling candidates from 559 siblings.
Generated 3660 non-sibling candidates from 61 siblings.
Correct: 166714, incorrect 2, TP 0, FP 0, TN 166714, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1165 rows with error results and 166716 rows with NaNs (typically hz different) from a     total of 312481 entries, resulting in 145765 entries.
Correct: 1918, incorrect 0, TP 0, FP 0, TN 1918, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 1918 rows with NaNs (typically hz different) from a     total of 3721 entries, resulting in 1803 entries.
Found 557 sibs and 145208 nonsibs, weights: 0.9962 and 0.0038, #weights: 145765
Found 61 sibs and 1742 nonsibs, weights: 0.9662 and 0.0338, #weights: 1803
Correct: 145759, incorrect 6, TP 557, FP 6, TN 145202, FN0, Prec. 98.93, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1803, incorrect 0, TP 61, FP 0, TN 1742, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 7
Generated 311922 non-sibling candidates from 559 siblings.
Generated 3660 non-sibling candidates from 61 siblings.
Correct: 166719, incorrect 2, TP 0, FP 0, TN 166719, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1170 rows with error results and 166721 rows with NaNs (typically hz different) from a     total of 312481 entries, resulting in 145760 entries.
Correct: 1918, incorrect 0, TP 0, FP 0, TN 1918, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 1918 rows with NaNs (typically hz different) from a     total of 3721 entries, resulting in 1803 entries.
Found 557 sibs and 145203 nonsibs, weights: 0.9962 and 0.0038, #weights: 145760
Found 61 sibs and 1742 nonsibs, weights: 0.9662 and 0.0338, #weights: 1803
Correct: 145753, incorrect 7, TP 557, FP 7, TN 145196, FN0, Prec. 98.76, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1803, incorrect 0, TP 61, FP 0, TN 1742, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 8
Generated 311922 non-sibling candidates from 559 siblings.
Generated 3660 non-sibling candidates from 61 siblings.
Correct: 166540, incorrect 2, TP 0, FP 0, TN 166540, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1167 rows with error results and 166542 rows with NaNs (typically hz different) from a     total of 312481 entries, resulting in 145939 entries.
Correct: 1940, incorrect 0, TP 0, FP 0, TN 1940, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 1940 rows with NaNs (typically hz different) from a     total of 3721 entries, resulting in 1781 entries.
Found 557 sibs and 145382 nonsibs, weights: 0.9962 and 0.0038, #weights: 145939
Found 61 sibs and 1720 nonsibs, weights: 0.9657 and 0.0343, #weights: 1781
Correct: 145934, incorrect 5, TP 557, FP 5, TN 145377, FN0, Prec. 99.11, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1779, incorrect 2, TP 61, FP 2, TN 1718, FN0, Prec. 96.83, Rec. 100.0, Spec. 99.88, Acc. 99.89%
Round 9
Generated 311922 non-sibling candidates from 559 siblings.
Generated 3660 non-sibling candidates from 61 siblings.
Correct: 166259, incorrect 2, TP 0, FP 0, TN 166259, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1168 rows with error results and 166261 rows with NaNs (typically hz different) from a     total of 312481 entries, resulting in 146220 entries.
Correct: 1972, incorrect 0, TP 0, FP 0, TN 1972, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 1972 rows with NaNs (typically hz different) from a     total of 3721 entries, resulting in 1749 entries.
Found 557 sibs and 145663 nonsibs, weights: 0.9962 and 0.0038, #weights: 146220
Found 61 sibs and 1688 nonsibs, weights: 0.9651 and 0.0349, #weights: 1749
Correct: 146213, incorrect 7, TP 557, FP 7, TN 145656, FN0, Prec. 98.76, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1749, incorrect 0, TP 61, FP 0, TN 1688, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
############# Round 5 ##############
Loading from filenames ../../../gt5/hosts.csvcapture.pcap.ts.siblingresult.csv and ../../../gt5/hosts.csv__nonsiblings_seed1_n679capture.pcap.ts.siblingresult.csv
Read 620 siblings and 408034 non-siblings from files.
Our algo: Not deciding on 727 pairs for unknown/error reasons.
Found 563 sibs and 407364 nonsibs, weights: 0.9986 and 0.0014, #weights: 407927
Our algo stats: (727) undecided, mcc: 0.9856686349153984, f1: 0.9855855855855855
Correct: 407911, incorrect 16, TP 547, FP 0, TN 407364, FN16, Prec. 100.0, Rec. 97.16, Spec. 100.0, Acc. 100.0%
Found 619 sibs and 407366 nonsibs, weights: 0.9985 and 0.0015, #weights: 407985
Beverly algo: Not deciding on 669 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.08042295308866737, f1: 0.015925422412118858
Correct: 331980, incorrect 76005, TP 615, FP 76001, TN 331365, FN4, Prec. 0.8, Rec. 99.35, Spec. 81.34, Acc. 81.37%
## GROUP: RAv1
Generated 132 non-sibling candidates from 12 siblings.
Our algo: Not deciding on 10 pairs for unknown/error reasons.
Found 2 sibs and 132 nonsibs, weights: 0.9851 and 0.0149, #weights: 134
Our algo stats: (10) undecided, mcc: 1.0, f1: 1.0
Correct: 134, incorrect 0, TP 2, FP 0, TN 132, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Found 12 sibs and 132 nonsibs, weights: 0.9167 and 0.0833, #weights: 144
Beverly algo: Not deciding on 0 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.0, f1: 0.15384615384615385
Correct: 12, incorrect 132, TP 12, FP 132, TN 0, FN0, Prec. 8.33, Rec. 100.0, Spec. 0.0, Acc. 8.33%
## GROUP: RAv2
Generated 41006 non-sibling candidates from 203 siblings.
Our algo: Not deciding on 0 pairs for unknown/error reasons.
Found 203 sibs and 41006 nonsibs, weights: 0.9951 and 0.0049, #weights: 41209
Our algo stats: (0) undecided, mcc: 0.9825235107738732, f1: 0.9824561403508771
Correct: 41202, incorrect 7, TP 196, FP 0, TN 41006, FN7, Prec. 100.0, Rec. 96.55, Spec. 100.0, Acc. 99.98%
Found 203 sibs and 41006 nonsibs, weights: 0.9951 and 0.0049, #weights: 41209
Beverly algo: Not deciding on 0 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.0546444441287507, f1: 0.015746803866541943
Correct: 15957, incorrect 25252, TP 202, FP 25251, TN 15755, FN1, Prec. 0.79, Rec. 99.51, Spec. 38.42, Acc. 38.72%
## GROUP: nlnog
Generated 144780 non-sibling candidates from 381 siblings.
Our algo: Not deciding on 434 pairs for unknown/error reasons.
Found 336 sibs and 144391 nonsibs, weights: 0.9977 and 0.0023, #weights: 144727
Our algo stats: (434) undecided, mcc: 0.9910106190324149, f1: 0.9909909909909909
Correct: 144721, incorrect 6, TP 330, FP 0, TN 144391, FN6, Prec. 100.0, Rec. 98.21, Spec. 100.0, Acc. 100.0%
Found 380 sibs and 144393 nonsibs, weights: 0.9974 and 0.0026, #weights: 144773
Beverly algo: Not deciding on 388 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.08520748226387029, f1: 0.019691311274189757
Correct: 107236, incorrect 37537, TP 377, FP 37534, TN 106859, FN3, Prec. 0.99, Rec. 99.21, Spec. 74.01, Acc. 74.07%
## GROUP: servers
Generated 552 non-sibling candidates from 24 siblings.
Our algo: Not deciding on 2 pairs for unknown/error reasons.
Found 22 sibs and 552 nonsibs, weights: 0.9617 and 0.0383, #weights: 574
Our algo stats: (2) undecided, mcc: 0.9268052972270169, f1: 0.9268292682926829
Correct: 571, incorrect 3, TP 19, FP 0, TN 552, FN3, Prec. 100.0, Rec. 86.36, Spec. 100.0, Acc. 99.48%
Found 24 sibs and 552 nonsibs, weights: 0.9583 and 0.0417, #weights: 576
Beverly algo: Not deciding on 0 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.13380139498238694, f1: 0.1111111111111111
Correct: 192, incorrect 384, TP 24, FP 384, TN 168, FN0, Prec. 5.88, Rec. 100.0, Spec. 30.43, Acc. 33.33%
Round 0
Generated 307470 non-sibling candidates from 555 siblings.
Generated 4160 non-sibling candidates from 65 siblings.
Correct: 162646, incorrect 2, TP 0, FP 0, TN 162646, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 561 rows with error results and 162648 rows with NaNs (typically hz different) from a     total of 308025 entries, resulting in 145377 entries.
Correct: 2329, incorrect 0, TP 0, FP 0, TN 2329, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 1 rows with error results and 2329 rows with NaNs (typically hz different) from a     total of 4225 entries, resulting in 1896 entries.
Found 553 sibs and 144824 nonsibs, weights: 0.9962 and 0.0038, #weights: 145377
Found 65 sibs and 1831 nonsibs, weights: 0.9657 and 0.0343, #weights: 1896
Correct: 145375, incorrect 2, TP 553, FP 2, TN 144822, FN0, Prec. 99.64, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1896, incorrect 0, TP 65, FP 0, TN 1831, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 1
Generated 308580 non-sibling candidates from 556 siblings.
Generated 4032 non-sibling candidates from 64 siblings.
Correct: 163936, incorrect 2, TP 0, FP 0, TN 163936, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 564 rows with error results and 163938 rows with NaNs (typically hz different) from a     total of 309136 entries, resulting in 145198 entries.
Correct: 2191, incorrect 0, TP 0, FP 0, TN 2191, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 1 rows with error results and 2191 rows with NaNs (typically hz different) from a     total of 4096 entries, resulting in 1905 entries.
Found 554 sibs and 144644 nonsibs, weights: 0.9962 and 0.0038, #weights: 145198
Found 64 sibs and 1841 nonsibs, weights: 0.9664 and 0.0336, #weights: 1905
Correct: 145194, incorrect 4, TP 554, FP 4, TN 144640, FN0, Prec. 99.28, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1905, incorrect 0, TP 64, FP 0, TN 1841, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 2
Generated 309692 non-sibling candidates from 557 siblings.
Generated 3906 non-sibling candidates from 63 siblings.
Correct: 164780, incorrect 2, TP 0, FP 0, TN 164780, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 568 rows with error results and 164782 rows with NaNs (typically hz different) from a     total of 310249 entries, resulting in 145467 entries.
Correct: 2094, incorrect 0, TP 0, FP 0, TN 2094, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 2094 rows with NaNs (typically hz different) from a     total of 3969 entries, resulting in 1875 entries.
Found 555 sibs and 144912 nonsibs, weights: 0.9962 and 0.0038, #weights: 145467
Found 63 sibs and 1812 nonsibs, weights: 0.9664 and 0.0336, #weights: 1875
Correct: 145463, incorrect 4, TP 555, FP 4, TN 144908, FN0, Prec. 99.28, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1875, incorrect 0, TP 63, FP 0, TN 1812, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 3
Generated 310806 non-sibling candidates from 558 siblings.
Generated 3782 non-sibling candidates from 62 siblings.
Correct: 164464, incorrect 2, TP 0, FP 0, TN 164464, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 570 rows with error results and 164466 rows with NaNs (typically hz different) from a     total of 311364 entries, resulting in 146898 entries.
Correct: 2098, incorrect 0, TP 0, FP 0, TN 2098, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 2098 rows with NaNs (typically hz different) from a     total of 3844 entries, resulting in 1746 entries.
Found 556 sibs and 146342 nonsibs, weights: 0.9962 and 0.0038, #weights: 146898
Found 62 sibs and 1684 nonsibs, weights: 0.9645 and 0.0355, #weights: 1746
Correct: 146894, incorrect 4, TP 556, FP 4, TN 146338, FN0, Prec. 99.29, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1746, incorrect 0, TP 62, FP 0, TN 1684, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 4
Generated 311922 non-sibling candidates from 559 siblings.
Generated 3660 non-sibling candidates from 61 siblings.
Correct: 165570, incorrect 2, TP 0, FP 0, TN 165570, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 571 rows with error results and 165572 rows with NaNs (typically hz different) from a     total of 312481 entries, resulting in 146909 entries.
Correct: 2000, incorrect 0, TP 0, FP 0, TN 2000, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 2000 rows with NaNs (typically hz different) from a     total of 3721 entries, resulting in 1721 entries.
Found 557 sibs and 146352 nonsibs, weights: 0.9962 and 0.0038, #weights: 146909
Found 61 sibs and 1660 nonsibs, weights: 0.9646 and 0.0354, #weights: 1721
Correct: 146907, incorrect 2, TP 557, FP 2, TN 146350, FN0, Prec. 99.64, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1721, incorrect 0, TP 61, FP 0, TN 1660, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 5
Generated 311922 non-sibling candidates from 559 siblings.
Generated 3660 non-sibling candidates from 61 siblings.
Correct: 165624, incorrect 2, TP 0, FP 0, TN 165624, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 570 rows with error results and 165626 rows with NaNs (typically hz different) from a     total of 312481 entries, resulting in 146855 entries.
Correct: 1994, incorrect 0, TP 0, FP 0, TN 1994, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 1994 rows with NaNs (typically hz different) from a     total of 3721 entries, resulting in 1727 entries.
Found 557 sibs and 146298 nonsibs, weights: 0.9962 and 0.0038, #weights: 146855
Found 61 sibs and 1666 nonsibs, weights: 0.9647 and 0.0353, #weights: 1727
Correct: 146851, incorrect 4, TP 557, FP 4, TN 146294, FN0, Prec. 99.29, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1727, incorrect 0, TP 61, FP 0, TN 1666, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 6
Generated 311922 non-sibling candidates from 559 siblings.
Generated 3660 non-sibling candidates from 61 siblings.
Correct: 165134, incorrect 2, TP 0, FP 0, TN 165134, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 567 rows with error results and 165136 rows with NaNs (typically hz different) from a     total of 312481 entries, resulting in 147345 entries.
Correct: 2046, incorrect 0, TP 0, FP 0, TN 2046, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 2046 rows with NaNs (typically hz different) from a     total of 3721 entries, resulting in 1675 entries.
Found 557 sibs and 146788 nonsibs, weights: 0.9962 and 0.0038, #weights: 147345
Found 61 sibs and 1614 nonsibs, weights: 0.9636 and 0.0364, #weights: 1675
Correct: 147341, incorrect 4, TP 557, FP 4, TN 146784, FN0, Prec. 99.29, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1675, incorrect 0, TP 61, FP 0, TN 1614, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 7
Generated 311922 non-sibling candidates from 559 siblings.
Generated 3660 non-sibling candidates from 61 siblings.
Correct: 166590, incorrect 1, TP 0, FP 0, TN 166590, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 569 rows with error results and 166591 rows with NaNs (typically hz different) from a     total of 312481 entries, resulting in 145890 entries.
Correct: 1889, incorrect 1, TP 0, FP 0, TN 1889, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 99.95%
Removing 0 rows with error results and 1890 rows with NaNs (typically hz different) from a     total of 3721 entries, resulting in 1831 entries.
Found 558 sibs and 145332 nonsibs, weights: 0.9962 and 0.0038, #weights: 145890
Found 60 sibs and 1771 nonsibs, weights: 0.9672 and 0.0328, #weights: 1831
Correct: 145888, incorrect 2, TP 558, FP 2, TN 145330, FN0, Prec. 99.64, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1831, incorrect 0, TP 60, FP 0, TN 1771, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 8
Generated 311922 non-sibling candidates from 559 siblings.
Generated 3660 non-sibling candidates from 61 siblings.
Correct: 166721, incorrect 2, TP 0, FP 0, TN 166721, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 569 rows with error results and 166723 rows with NaNs (typically hz different) from a     total of 312481 entries, resulting in 145758 entries.
Correct: 1860, incorrect 0, TP 0, FP 0, TN 1860, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 1860 rows with NaNs (typically hz different) from a     total of 3721 entries, resulting in 1861 entries.
Found 557 sibs and 145201 nonsibs, weights: 0.9962 and 0.0038, #weights: 145758
Found 61 sibs and 1800 nonsibs, weights: 0.9672 and 0.0328, #weights: 1861
Correct: 145756, incorrect 2, TP 557, FP 2, TN 145199, FN0, Prec. 99.64, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1861, incorrect 0, TP 61, FP 0, TN 1800, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 9
Generated 311922 non-sibling candidates from 559 siblings.
Generated 3660 non-sibling candidates from 61 siblings.
Correct: 167071, incorrect 1, TP 0, FP 0, TN 167071, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 10 rows with error results and 167072 rows with NaNs (typically hz different) from a     total of 312481 entries, resulting in 145409 entries.
Correct: 1795, incorrect 1, TP 0, FP 0, TN 1795, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 99.94%
Removing 61 rows with error results and 1796 rows with NaNs (typically hz different) from a     total of 3721 entries, resulting in 1925 entries.
Found 558 sibs and 144851 nonsibs, weights: 0.9962 and 0.0038, #weights: 145409
Found 60 sibs and 1865 nonsibs, weights: 0.9688 and 0.0312, #weights: 1925
Correct: 145405, incorrect 4, TP 558, FP 4, TN 144847, FN0, Prec. 99.29, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1925, incorrect 0, TP 60, FP 0, TN 1865, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
############# Round 6 ##############
Loading from filenames ../../../gt6/hosts.csvcapture.pcap.ts.siblingresult.csv and ../../../gt6/hosts.csv__nonsiblings_seed1_n679capture.pcap.ts.siblingresult.csv
Read 620 siblings and 408034 non-siblings from files.
Our algo: Not deciding on 1357 pairs for unknown/error reasons.
Found 563 sibs and 406734 nonsibs, weights: 0.9986 and 0.0014, #weights: 407297
Our algo stats: (1357) undecided, mcc: 0.9829842551468645, f1: 0.9829596412556053
Correct: 407278, incorrect 19, TP 548, FP 4, TN 406730, FN15, Prec. 99.28, Rec. 97.34, Spec. 100.0, Acc. 100.0%
Found 619 sibs and 406734 nonsibs, weights: 0.9985 and 0.0015, #weights: 407353
Beverly algo: Not deciding on 1301 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.08067887540424964, f1: 0.015986090025562175
Correct: 331518, incorrect 75835, TP 616, FP 75832, TN 330902, FN3, Prec. 0.81, Rec. 99.52, Spec. 81.36, Acc. 81.38%
## GROUP: RAv1
Generated 132 non-sibling candidates from 12 siblings.
Our algo: Not deciding on 11 pairs for unknown/error reasons.
Found 1 sibs and 132 nonsibs, weights: 0.9925 and 0.0075, #weights: 133
Our algo stats: (11) undecided, mcc: 1.0, f1: 1.0
Correct: 133, incorrect 0, TP 1, FP 0, TN 132, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Found 12 sibs and 132 nonsibs, weights: 0.9167 and 0.0833, #weights: 144
Beverly algo: Not deciding on 0 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.0, f1: 0.15384615384615385
Correct: 12, incorrect 132, TP 12, FP 132, TN 0, FN0, Prec. 8.33, Rec. 100.0, Spec. 0.0, Acc. 8.33%
## GROUP: RAv2
Generated 41006 non-sibling candidates from 203 siblings.
Our algo: Not deciding on 2 pairs for unknown/error reasons.
Found 203 sibs and 41004 nonsibs, weights: 0.9951 and 0.0049, #weights: 41207
Our algo stats: (2) undecided, mcc: 0.9724481217690122, f1: 0.9724310776942356
Correct: 41196, incorrect 11, TP 194, FP 2, TN 41002, FN9, Prec. 98.98, Rec. 95.57, Spec. 100.0, Acc. 99.97%
Found 203 sibs and 41004 nonsibs, weights: 0.9951 and 0.0049, #weights: 41207
Beverly algo: Not deciding on 2 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.0546479584463799, f1: 0.015748031496062992
Correct: 15957, incorrect 25250, TP 202, FP 25249, TN 15755, FN1, Prec. 0.79, Rec. 99.51, Spec. 38.42, Acc. 38.72%
## GROUP: nlnog
Generated 144780 non-sibling candidates from 381 siblings.
Our algo: Not deciding on 824 pairs for unknown/error reasons.
Found 337 sibs and 144000 nonsibs, weights: 0.9977 and 0.0023, #weights: 144337
Our algo stats: (824) undecided, mcc: 0.9925543193161604, f1: 0.9925705794947994
Correct: 144332, incorrect 5, TP 334, FP 2, TN 143998, FN3, Prec. 99.4, Rec. 99.11, Spec. 100.0, Acc. 100.0%
Found 380 sibs and 144000 nonsibs, weights: 0.9974 and 0.0026, #weights: 144380
Beverly algo: Not deciding on 781 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.08568368231929237, f1: 0.01981443623211197
Correct: 106982, incorrect 37398, TP 378, FP 37396, TN 106604, FN2, Prec. 1.0, Rec. 99.47, Spec. 74.03, Acc. 74.1%
## GROUP: servers
Generated 552 non-sibling candidates from 24 siblings.
Our algo: Not deciding on 2 pairs for unknown/error reasons.
Found 22 sibs and 552 nonsibs, weights: 0.9617 and 0.0383, #weights: 574
Our algo stats: (2) undecided, mcc: 0.9268052972270169, f1: 0.9268292682926829
Correct: 571, incorrect 3, TP 19, FP 0, TN 552, FN3, Prec. 100.0, Rec. 86.36, Spec. 100.0, Acc. 99.48%
Found 24 sibs and 552 nonsibs, weights: 0.9583 and 0.0417, #weights: 576
Beverly algo: Not deciding on 0 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.13380139498238694, f1: 0.1111111111111111
Correct: 192, incorrect 384, TP 24, FP 384, TN 168, FN0, Prec. 5.88, Rec. 100.0, Spec. 30.43, Acc. 33.33%
Round 0
Generated 307470 non-sibling candidates from 555 siblings.
Generated 4160 non-sibling candidates from 65 siblings.
Correct: 163734, incorrect 2, TP 0, FP 0, TN 163734, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1124 rows with error results and 163736 rows with NaNs (typically hz different) from a     total of 308025 entries, resulting in 144289 entries.
Correct: 2254, incorrect 0, TP 0, FP 0, TN 2254, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 2254 rows with NaNs (typically hz different) from a     total of 4225 entries, resulting in 1971 entries.
Found 553 sibs and 143736 nonsibs, weights: 0.9962 and 0.0038, #weights: 144289
Found 65 sibs and 1906 nonsibs, weights: 0.967 and 0.033, #weights: 1971
Correct: 144281, incorrect 8, TP 553, FP 8, TN 143728, FN0, Prec. 98.57, Rec. 100.0, Spec. 99.99, Acc. 99.99%
Correct: 1971, incorrect 0, TP 65, FP 0, TN 1906, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 1
Generated 308580 non-sibling candidates from 556 siblings.
Generated 4032 non-sibling candidates from 64 siblings.
Correct: 165404, incorrect 2, TP 0, FP 0, TN 165404, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1132 rows with error results and 165406 rows with NaNs (typically hz different) from a     total of 309136 entries, resulting in 143730 entries.
Correct: 2038, incorrect 0, TP 0, FP 0, TN 2038, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 2038 rows with NaNs (typically hz different) from a     total of 4096 entries, resulting in 2058 entries.
Found 554 sibs and 143176 nonsibs, weights: 0.9961 and 0.0039, #weights: 143730
Found 64 sibs and 1994 nonsibs, weights: 0.9689 and 0.0311, #weights: 2058
Correct: 143725, incorrect 5, TP 554, FP 5, TN 143171, FN0, Prec. 99.11, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 2058, incorrect 0, TP 64, FP 0, TN 1994, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 2
Generated 309692 non-sibling candidates from 557 siblings.
Generated 3906 non-sibling candidates from 63 siblings.
Correct: 163998, incorrect 1, TP 0, FP 0, TN 163998, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 10 rows with error results and 163999 rows with NaNs (typically hz different) from a     total of 310249 entries, resulting in 146250 entries.
Correct: 2218, incorrect 1, TP 0, FP 0, TN 2218, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 99.95%
Removing 125 rows with error results and 2219 rows with NaNs (typically hz different) from a     total of 3969 entries, resulting in 1750 entries.
Found 556 sibs and 145694 nonsibs, weights: 0.9962 and 0.0038, #weights: 146250
Found 62 sibs and 1688 nonsibs, weights: 0.9646 and 0.0354, #weights: 1750
Correct: 146246, incorrect 4, TP 556, FP 4, TN 145690, FN0, Prec. 99.29, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1749, incorrect 1, TP 61, FP 0, TN 1688, FN1, Prec. 100.0, Rec. 98.39, Spec. 100.0, Acc. 99.94%
Round 3
Generated 310806 non-sibling candidates from 558 siblings.
Generated 3782 non-sibling candidates from 62 siblings.
Correct: 165623, incorrect 2, TP 0, FP 0, TN 165623, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1135 rows with error results and 165625 rows with NaNs (typically hz different) from a     total of 311364 entries, resulting in 145739 entries.
Correct: 2002, incorrect 0, TP 0, FP 0, TN 2002, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 2002 rows with NaNs (typically hz different) from a     total of 3844 entries, resulting in 1842 entries.
Found 556 sibs and 145183 nonsibs, weights: 0.9962 and 0.0038, #weights: 145739
Found 62 sibs and 1780 nonsibs, weights: 0.9663 and 0.0337, #weights: 1842
Correct: 145733, incorrect 6, TP 556, FP 6, TN 145177, FN0, Prec. 98.93, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1842, incorrect 0, TP 62, FP 0, TN 1780, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 4
Generated 311922 non-sibling candidates from 559 siblings.
Generated 3660 non-sibling candidates from 61 siblings.
Correct: 166817, incorrect 2, TP 0, FP 0, TN 166817, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1138 rows with error results and 166819 rows with NaNs (typically hz different) from a     total of 312481 entries, resulting in 145662 entries.
Correct: 1894, incorrect 0, TP 0, FP 0, TN 1894, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 1894 rows with NaNs (typically hz different) from a     total of 3721 entries, resulting in 1827 entries.
Found 557 sibs and 145105 nonsibs, weights: 0.9962 and 0.0038, #weights: 145662
Found 61 sibs and 1766 nonsibs, weights: 0.9666 and 0.0334, #weights: 1827
Correct: 145652, incorrect 10, TP 557, FP 10, TN 145095, FN0, Prec. 98.24, Rec. 100.0, Spec. 99.99, Acc. 99.99%
Correct: 1827, incorrect 0, TP 61, FP 0, TN 1766, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 5
Generated 311922 non-sibling candidates from 559 siblings.
Generated 3660 non-sibling candidates from 61 siblings.
Correct: 165646, incorrect 2, TP 0, FP 0, TN 165646, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1135 rows with error results and 165648 rows with NaNs (typically hz different) from a     total of 312481 entries, resulting in 146833 entries.
Correct: 2032, incorrect 0, TP 0, FP 0, TN 2032, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 2032 rows with NaNs (typically hz different) from a     total of 3721 entries, resulting in 1689 entries.
Found 557 sibs and 146276 nonsibs, weights: 0.9962 and 0.0038, #weights: 146833
Found 61 sibs and 1628 nonsibs, weights: 0.9639 and 0.0361, #weights: 1689
Correct: 146827, incorrect 6, TP 557, FP 6, TN 146270, FN0, Prec. 98.93, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1689, incorrect 0, TP 61, FP 0, TN 1628, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 6
Generated 311922 non-sibling candidates from 559 siblings.
Generated 3660 non-sibling candidates from 61 siblings.
Correct: 166544, incorrect 2, TP 0, FP 0, TN 166544, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1138 rows with error results and 166546 rows with NaNs (typically hz different) from a     total of 312481 entries, resulting in 145935 entries.
Correct: 1938, incorrect 0, TP 0, FP 0, TN 1938, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 1938 rows with NaNs (typically hz different) from a     total of 3721 entries, resulting in 1783 entries.
Found 557 sibs and 145378 nonsibs, weights: 0.9962 and 0.0038, #weights: 145935
Found 61 sibs and 1722 nonsibs, weights: 0.9658 and 0.0342, #weights: 1783
Correct: 145929, incorrect 6, TP 557, FP 6, TN 145372, FN0, Prec. 98.93, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1783, incorrect 0, TP 61, FP 0, TN 1722, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 7
Generated 311922 non-sibling candidates from 559 siblings.
Generated 3660 non-sibling candidates from 61 siblings.
Correct: 165966, incorrect 2, TP 0, FP 0, TN 165966, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1136 rows with error results and 165968 rows with NaNs (typically hz different) from a     total of 312481 entries, resulting in 146513 entries.
Correct: 2002, incorrect 0, TP 0, FP 0, TN 2002, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 2002 rows with NaNs (typically hz different) from a     total of 3721 entries, resulting in 1719 entries.
Found 557 sibs and 145956 nonsibs, weights: 0.9962 and 0.0038, #weights: 146513
Found 61 sibs and 1658 nonsibs, weights: 0.9645 and 0.0355, #weights: 1719
Correct: 146503, incorrect 10, TP 557, FP 10, TN 145946, FN0, Prec. 98.24, Rec. 100.0, Spec. 99.99, Acc. 99.99%
Correct: 1719, incorrect 0, TP 61, FP 0, TN 1658, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 8
Generated 311922 non-sibling candidates from 559 siblings.
Generated 3660 non-sibling candidates from 61 siblings.
Correct: 166507, incorrect 2, TP 0, FP 0, TN 166507, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1134 rows with error results and 166509 rows with NaNs (typically hz different) from a     total of 312481 entries, resulting in 145972 entries.
Correct: 1940, incorrect 0, TP 0, FP 0, TN 1940, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 1940 rows with NaNs (typically hz different) from a     total of 3721 entries, resulting in 1781 entries.
Found 557 sibs and 145415 nonsibs, weights: 0.9962 and 0.0038, #weights: 145972
Found 61 sibs and 1720 nonsibs, weights: 0.9657 and 0.0343, #weights: 1781
Correct: 145968, incorrect 4, TP 557, FP 4, TN 145411, FN0, Prec. 99.29, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1781, incorrect 0, TP 61, FP 0, TN 1720, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 9
Generated 311922 non-sibling candidates from 559 siblings.
Generated 3660 non-sibling candidates from 61 siblings.
Correct: 165568, incorrect 1, TP 0, FP 0, TN 165568, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1139 rows with error results and 165569 rows with NaNs (typically hz different) from a     total of 312481 entries, resulting in 146912 entries.
Correct: 2041, incorrect 1, TP 0, FP 0, TN 2041, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 99.95%
Removing 0 rows with error results and 2042 rows with NaNs (typically hz different) from a     total of 3721 entries, resulting in 1679 entries.
Found 558 sibs and 146354 nonsibs, weights: 0.9962 and 0.0038, #weights: 146912
Found 60 sibs and 1619 nonsibs, weights: 0.9643 and 0.0357, #weights: 1679
Correct: 146906, incorrect 6, TP 558, FP 6, TN 146348, FN0, Prec. 98.94, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1679, incorrect 0, TP 60, FP 0, TN 1619, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
############# Round 7 ##############
Loading from filenames ../../../gt7/hosts.csvcapture.pcap.ts.siblingresult.csv and ../../../gt7/hosts.csv__nonsiblings_seed1_n681capture.pcap.ts.siblingresult.csv
Read 618 siblings and 406792 non-siblings from files.
Our algo: Not deciding on 1417 pairs for unknown/error reasons.
Found 515 sibs and 405478 nonsibs, weights: 0.9987 and 0.0013, #weights: 405993
Our algo stats: (1417) undecided, mcc: 0.9763999694141065, f1: 0.9762376237623762
Correct: 405969, incorrect 24, TP 493, FP 2, TN 405476, FN22, Prec. 99.6, Rec. 95.73, Spec. 100.0, Acc. 99.99%
Found 617 sibs and 405478 nonsibs, weights: 0.9985 and 0.0015, #weights: 406095
Beverly algo: Not deciding on 1315 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.08111991226804503, f1: 0.0161526198600809
Correct: 331420, incorrect 74675, TP 613, FP 74671, TN 330807, FN4, Prec. 0.81, Rec. 99.35, Spec. 81.58, Acc. 81.61%
## GROUP: RAv1
Generated 132 non-sibling candidates from 12 siblings.
Our algo: Not deciding on 2 pairs for unknown/error reasons.
Found 12 sibs and 130 nonsibs, weights: 0.9155 and 0.0845, #weights: 142
Our algo stats: (2) undecided, mcc: 1.0, f1: 1.0
Correct: 142, incorrect 0, TP 12, FP 0, TN 130, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Found 12 sibs and 130 nonsibs, weights: 0.9155 and 0.0845, #weights: 142
Beverly algo: Not deciding on 2 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.0, f1: 0.15584415584415587
Correct: 12, incorrect 130, TP 12, FP 130, TN 0, FN0, Prec. 8.45, Rec. 100.0, Spec. 0.0, Acc. 8.45%
## GROUP: RAv2
Generated 41006 non-sibling candidates from 203 siblings.
Our algo: Not deciding on 5 pairs for unknown/error reasons.
Found 203 sibs and 41001 nonsibs, weights: 0.9951 and 0.0049, #weights: 41204
Our algo stats: (5) undecided, mcc: 0.9875963883026432, f1: 0.9876543209876543
Correct: 41199, incorrect 5, TP 200, FP 2, TN 40999, FN3, Prec. 99.01, Rec. 98.52, Spec. 100.0, Acc. 99.99%
Found 203 sibs and 41001 nonsibs, weights: 0.9951 and 0.0049, #weights: 41204
Beverly algo: Not deciding on 5 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.05678349344226616, f1: 0.01621708413615928
Correct: 16696, incorrect 24508, TP 202, FP 24507, TN 16494, FN1, Prec. 0.82, Rec. 99.51, Spec. 40.23, Acc. 40.52%
## GROUP: nlnog
Generated 141000 non-sibling candidates from 376 siblings.
Our algo: Not deciding on 881 pairs for unknown/error reasons.
Found 274 sibs and 140221 nonsibs, weights: 0.998 and 0.002, #weights: 140495
Our algo stats: (881) undecided, mcc: 0.9721904996026752, f1: 0.9718574108818011
Correct: 140480, incorrect 15, TP 259, FP 0, TN 140221, FN15, Prec. 100.0, Rec. 94.53, Spec. 100.0, Acc. 99.99%
Found 375 sibs and 140221 nonsibs, weights: 0.9973 and 0.0027, #weights: 140596
Beverly algo: Not deciding on 780 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.0858877314624804, f1: 0.020005915727768962
Correct: 104151, incorrect 36445, TP 372, FP 36442, TN 103779, FN3, Prec. 1.01, Rec. 99.2, Spec. 74.01, Acc. 74.08%
## GROUP: servers
Generated 702 non-sibling candidates from 27 siblings.
Our algo: Not deciding on 2 pairs for unknown/error reasons.
Found 26 sibs and 701 nonsibs, weights: 0.9642 and 0.0358, #weights: 727
Our algo stats: (2) undecided, mcc: 0.9172529494462625, f1: 0.9166666666666666
Correct: 723, incorrect 4, TP 22, FP 0, TN 701, FN4, Prec. 100.0, Rec. 84.62, Spec. 100.0, Acc. 99.45%
Found 27 sibs and 701 nonsibs, weights: 0.9629 and 0.0371, #weights: 728
Beverly algo: Not deciding on 1 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.17284009833634095, f1: 0.12356979405034325
Correct: 345, incorrect 383, TP 27, FP 383, TN 318, FN0, Prec. 6.59, Rec. 100.0, Spec. 45.36, Acc. 47.39%
Round 0
Generated 306362 non-sibling candidates from 554 siblings.
Generated 4032 non-sibling candidates from 64 siblings.
Correct: 163759, incorrect 1, TP 0, FP 0, TN 163759, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1134 rows with error results and 163760 rows with NaNs (typically hz different) from a     total of 306916 entries, resulting in 143156 entries.
Correct: 2342, incorrect 1, TP 0, FP 0, TN 2342, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 99.96%
Removing 2 rows with error results and 2343 rows with NaNs (typically hz different) from a     total of 4096 entries, resulting in 1753 entries.
Found 553 sibs and 142603 nonsibs, weights: 0.9961 and 0.0039, #weights: 143156
Found 63 sibs and 1690 nonsibs, weights: 0.9641 and 0.0359, #weights: 1753
Correct: 143156, incorrect 0, TP 553, FP 0, TN 142603, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1753, incorrect 0, TP 63, FP 0, TN 1690, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 1
Generated 306362 non-sibling candidates from 554 siblings.
Generated 4032 non-sibling candidates from 64 siblings.
Correct: 165010, incorrect 2, TP 0, FP 0, TN 165010, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1134 rows with error results and 165012 rows with NaNs (typically hz different) from a     total of 306916 entries, resulting in 141904 entries.
Correct: 2206, incorrect 0, TP 0, FP 0, TN 2206, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 2206 rows with NaNs (typically hz different) from a     total of 4096 entries, resulting in 1890 entries.
Found 552 sibs and 141352 nonsibs, weights: 0.9961 and 0.0039, #weights: 141904
Found 64 sibs and 1826 nonsibs, weights: 0.9661 and 0.0339, #weights: 1890
Correct: 141904, incorrect 0, TP 552, FP 0, TN 141352, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1890, incorrect 0, TP 64, FP 0, TN 1826, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 2
Generated 307470 non-sibling candidates from 555 siblings.
Generated 3906 non-sibling candidates from 63 siblings.
Correct: 165828, incorrect 2, TP 0, FP 0, TN 165828, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1143 rows with error results and 165830 rows with NaNs (typically hz different) from a     total of 308025 entries, resulting in 142195 entries.
Correct: 2098, incorrect 0, TP 0, FP 0, TN 2098, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 2098 rows with NaNs (typically hz different) from a     total of 3969 entries, resulting in 1871 entries.
Found 553 sibs and 141642 nonsibs, weights: 0.9961 and 0.0039, #weights: 142195
Found 63 sibs and 1808 nonsibs, weights: 0.9663 and 0.0337, #weights: 1871
Correct: 142195, incorrect 0, TP 553, FP 0, TN 141642, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1871, incorrect 0, TP 63, FP 0, TN 1808, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 3
Generated 308580 non-sibling candidates from 556 siblings.
Generated 3782 non-sibling candidates from 62 siblings.
Correct: 165299, incorrect 1, TP 0, FP 0, TN 165299, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 25 rows with error results and 165300 rows with NaNs (typically hz different) from a     total of 309136 entries, resulting in 143836 entries.
Correct: 2154, incorrect 1, TP 0, FP 0, TN 2154, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 99.95%
Removing 123 rows with error results and 2155 rows with NaNs (typically hz different) from a     total of 3844 entries, resulting in 1689 entries.
Found 555 sibs and 143281 nonsibs, weights: 0.9961 and 0.0039, #weights: 143836
Found 61 sibs and 1628 nonsibs, weights: 0.9639 and 0.0361, #weights: 1689
Correct: 143836, incorrect 0, TP 555, FP 0, TN 143281, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1689, incorrect 0, TP 61, FP 0, TN 1628, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 4
Generated 308580 non-sibling candidates from 556 siblings.
Generated 3782 non-sibling candidates from 62 siblings.
Correct: 165037, incorrect 2, TP 0, FP 0, TN 165037, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1142 rows with error results and 165039 rows with NaNs (typically hz different) from a     total of 309136 entries, resulting in 144097 entries.
Correct: 2196, incorrect 0, TP 0, FP 0, TN 2196, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 2196 rows with NaNs (typically hz different) from a     total of 3844 entries, resulting in 1648 entries.
Found 554 sibs and 143543 nonsibs, weights: 0.9962 and 0.0038, #weights: 144097
Found 62 sibs and 1586 nonsibs, weights: 0.9624 and 0.0376, #weights: 1648
Correct: 144097, incorrect 0, TP 554, FP 0, TN 143543, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1648, incorrect 0, TP 62, FP 0, TN 1586, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 5
Generated 308580 non-sibling candidates from 556 siblings.
Generated 3782 non-sibling candidates from 62 siblings.
Correct: 167231, incorrect 2, TP 0, FP 0, TN 167231, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1144 rows with error results and 167233 rows with NaNs (typically hz different) from a     total of 309136 entries, resulting in 141903 entries.
Correct: 1952, incorrect 0, TP 0, FP 0, TN 1952, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 1952 rows with NaNs (typically hz different) from a     total of 3844 entries, resulting in 1892 entries.
Found 554 sibs and 141349 nonsibs, weights: 0.9961 and 0.0039, #weights: 141903
Found 62 sibs and 1830 nonsibs, weights: 0.9672 and 0.0328, #weights: 1892
Correct: 141903, incorrect 0, TP 554, FP 0, TN 141349, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1891, incorrect 1, TP 62, FP 1, TN 1829, FN0, Prec. 98.41, Rec. 100.0, Spec. 99.95, Acc. 99.95%
Round 6
Generated 309692 non-sibling candidates from 557 siblings.
Generated 3660 non-sibling candidates from 61 siblings.
Correct: 168076, incorrect 2, TP 0, FP 0, TN 168076, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1144 rows with error results and 168078 rows with NaNs (typically hz different) from a     total of 310249 entries, resulting in 142171 entries.
Correct: 1861, incorrect 0, TP 0, FP 0, TN 1861, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 1 rows with error results and 1861 rows with NaNs (typically hz different) from a     total of 3721 entries, resulting in 1860 entries.
Found 555 sibs and 141616 nonsibs, weights: 0.9961 and 0.0039, #weights: 142171
Found 61 sibs and 1799 nonsibs, weights: 0.9672 and 0.0328, #weights: 1860
Correct: 142171, incorrect 0, TP 555, FP 0, TN 141616, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1860, incorrect 0, TP 61, FP 0, TN 1799, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 7
Generated 310806 non-sibling candidates from 558 siblings.
Generated 3540 non-sibling candidates from 60 siblings.
Correct: 168361, incorrect 2, TP 0, FP 0, TN 168361, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1143 rows with error results and 168363 rows with NaNs (typically hz different) from a     total of 311364 entries, resulting in 143001 entries.
Correct: 1835, incorrect 0, TP 0, FP 0, TN 1835, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 1 rows with error results and 1835 rows with NaNs (typically hz different) from a     total of 3600 entries, resulting in 1765 entries.
Found 556 sibs and 142445 nonsibs, weights: 0.9961 and 0.0039, #weights: 143001
Found 60 sibs and 1705 nonsibs, weights: 0.966 and 0.034, #weights: 1765
Correct: 143001, incorrect 0, TP 556, FP 0, TN 142445, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1765, incorrect 0, TP 60, FP 0, TN 1705, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 8
Generated 310806 non-sibling candidates from 558 siblings.
Generated 3540 non-sibling candidates from 60 siblings.
Correct: 167940, incorrect 2, TP 0, FP 0, TN 167940, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1148 rows with error results and 167942 rows with NaNs (typically hz different) from a     total of 311364 entries, resulting in 143422 entries.
Correct: 1882, incorrect 0, TP 0, FP 0, TN 1882, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 1882 rows with NaNs (typically hz different) from a     total of 3600 entries, resulting in 1718 entries.
Found 556 sibs and 142866 nonsibs, weights: 0.9961 and 0.0039, #weights: 143422
Found 60 sibs and 1658 nonsibs, weights: 0.9651 and 0.0349, #weights: 1718
Correct: 143422, incorrect 0, TP 556, FP 0, TN 142866, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1718, incorrect 0, TP 60, FP 0, TN 1658, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 9
Generated 310806 non-sibling candidates from 558 siblings.
Generated 3540 non-sibling candidates from 60 siblings.
Correct: 167115, incorrect 2, TP 0, FP 0, TN 167115, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1146 rows with error results and 167117 rows with NaNs (typically hz different) from a     total of 311364 entries, resulting in 144247 entries.
Correct: 1970, incorrect 0, TP 0, FP 0, TN 1970, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 1970 rows with NaNs (typically hz different) from a     total of 3600 entries, resulting in 1630 entries.
Found 556 sibs and 143691 nonsibs, weights: 0.9961 and 0.0039, #weights: 144247
Found 60 sibs and 1570 nonsibs, weights: 0.9632 and 0.0368, #weights: 1630
Correct: 144247, incorrect 0, TP 556, FP 0, TN 143691, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1630, incorrect 0, TP 60, FP 0, TN 1570, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%

High-Level Stats for Beverly and Our Algorithm



In [871]:

    
# cleanse rows with all zeros (something went wrong in those)
hlbstats = hlbstats[~np.all(hlbstats == 0, axis=1)]
mean_prec = round(np.mean(hlbstats[:,0]),2)
mean_mcc = round(np.mean(hlbstats[:,4]),2)
print("High-Level Beverly stats against 2016 gt, mean across all measurements: Precision {}%, MCC {}".format(
    mean_prec, mean_mcc))
hlostats = hlostats[~np.all(hlostats == 0, axis=1)]
mean_prec = round(np.mean(hlostats[:,0]),2)
mean_mcc = round(np.mean(hlostats[:,4]),2)
print("High-Level Algo stats against 2016 gt, mean across all measurements: Precision {}%, MCC {}".format(
    mean_prec, mean_mcc))









    



High-Level Beverly stats against 2016 gt, mean across all measurements: Precision 0.9%, MCC 0.08
High-Level Algo stats against 2016 gt, mean across all measurements: Precision 99.68%, MCC 0.98

These numbers are for the upper part of Table III in the paper



In [872]:

    
for group in set(gsdb.keys()):
    x = gsdb[group]
    x = x[~np.all(x == 0, axis=1)]
    mean_prec = round(np.mean(x[:,0]),2)
    mean_mcc = round(np.mean(x[:,4]),2)
    print("Beverly group stats against 2016 gt, mean across all measurements: {}, Precision {}%, MCC {}".format(
    group, mean_prec, mean_mcc))









    



Beverly group stats against 2016 gt, mean across all measurements: servers, Precision 8.33%, MCC 0.17
Beverly group stats against 2016 gt, mean across all measurements: RAv2, Precision 0.79%, MCC 0.05
Beverly group stats against 2016 gt, mean across all measurements: nlnog, Precision 1.09%, MCC 0.09
Beverly group stats against 2016 gt, mean across all measurements: RAv1, Precision 8.35%, MCC 0.0

The numbers in the cell below are not used in the paper, also-eval_used_version is used



In [873]:

    
for group in set(gsdo.keys()):
    x = gsdo[group]
    x = x[~np.all(x == 0, axis=1)]
    mean_prec = round(np.mean(x[0:,0]),2)
    mean_mcc = round(np.mean(x[0:,4]),2)
    print("Algo group stats against 2016 gt, mean across all measurements: {}, Precision {}%, MCC {}".format(
    group, mean_prec, mean_mcc))









    



Algo group stats against 2016 gt, mean across all measurements: servers, Precision 100.0%, MCC 0.91
Algo group stats against 2016 gt, mean across all measurements: RAv2, Precision 99.16%, MCC 0.98
Algo group stats against 2016 gt, mean across all measurements: nlnog, Precision 99.91%, MCC 0.98
Algo group stats against 2016 gt, mean across all measurements: RAv1, Precision 100.0%, MCC 1.0

The 2 cells below compute the values for rows 5+6 of Table II



In [874]:

    
mp = []
mc = []
for group in set(mlstatsd_tre.keys()):
    x = mlstatsd_tre[group]
    x = x[~np.all(x == 0, axis=1)]
    mean_prec = round(np.mean(x[0:,0]),2)
    mean_mcc = round(np.mean(x[0:,4]),2)
    print("ML1 train stats against 2016 gt, mean across cross-vals: {} , Precision {}%, MCC {}".format(
    group, mean_prec, mean_mcc))
    mp.append(mean_prec)
    mc.append(mean_mcc)
    
print("ML1 train stats against 2016 gt, mean across all groups and cross-vals: Precision {}%, MCC {}".format(
    np.mean(mp), np.mean(mc)))









    



ML1 train stats against 2016 gt, mean across cross-vals: 4_tre , Precision 99.02%, MCC 1.0
ML1 train stats against 2016 gt, mean across cross-vals: 5_tre , Precision 99.43%, MCC 1.0
ML1 train stats against 2016 gt, mean across cross-vals: 1_tre , Precision 100.0%, MCC 1.0
ML1 train stats against 2016 gt, mean across cross-vals: 6_tre , Precision 98.85%, MCC 1.0
ML1 train stats against 2016 gt, mean across cross-vals: 7_tre , Precision 100.0%, MCC 1.0
ML1 train stats against 2016 gt, mean across cross-vals: 3_tre , Precision 98.98%, MCC 1.0
ML1 train stats against 2016 gt, mean across cross-vals: 2_tre , Precision 99.19%, MCC 1.0
ML1 train stats against 2016 gt, mean across all groups and cross-vals: Precision 99.35285714285715%, MCC 1.0



In [866]:

    
mp = []
mc = []
for group in set(mlstatsd_tee.keys()):
    x = mlstatsd_tee[group]
    x = x[~np.all(x == 0, axis=1)]
    mean_prec = round(np.mean(x[0:,0]),2)
    mean_mcc = round(np.mean(x[0:,4]),2)
    print("ML1 test stats against 2016 gt, mean across cross-vals: {} , Precision {}%, MCC {}".format(
    group, mean_prec, mean_mcc))
    mp.append(mean_prec)
    mc.append(mean_mcc)
    
print("ML1 test stats against 2016 gt, mean across all groups and cross-vals: Precision {}%, MCC {}".format(
    np.mean(mp), np.mean(mc)))









    



ML1 test stats against 2016 gt, mean across cross-vals: 3_tee , Precision 99.69%, MCC 1.0
ML1 test stats against 2016 gt, mean across cross-vals: 6_tee , Precision 100.0%, MCC 1.0
ML1 test stats against 2016 gt, mean across cross-vals: 1_tee , Precision 100.0%, MCC 1.0
ML1 test stats against 2016 gt, mean across cross-vals: 4_tee , Precision 99.68%, MCC 1.0
ML1 test stats against 2016 gt, mean across cross-vals: 5_tee , Precision 100.0%, MCC 1.0
ML1 test stats against 2016 gt, mean across cross-vals: 7_tee , Precision 99.84%, MCC 1.0
ML1 test stats against 2016 gt, mean across cross-vals: 2_tee , Precision 100.0%, MCC 1.0
ML1 test stats against 2016 gt, mean across all groups and cross-vals: Precision 99.88714285714286%, MCC 1.0



In [ ]:



In [ ]:

Investigations of Details and Individual Cases

Investigation of ground truth false negatives



In [898]:

    
falsecalls = pd.DataFrame()
for i in range(1,runs):
    print("############# Round {} ##############".format(i))
    sib, nonsib = get_pd_files("../../../gt{}/".format(i))
    falsecalls = falsecalls.append(sib[sib.decision.str.contains("ERROR|error|optsdiff|hz") == True])









    



############# Round 1 ##############
Loading from filenames ../../../gt1/hosts.csvcapture.pcap.ts.siblingresult.csv and ../../../gt1/hosts.csv__nonsiblings_seed1_n679capture.pcap.ts.siblingresult.csv
Read 279 siblings and 82026 non-siblings from files.
############# Round 2 ##############
Loading from filenames ../../../gt2/hosts.csvcapture.pcap.ts.siblingresult.csv and ../../../gt2/hosts.csv__nonsiblings_seed1_n679capture.pcap.ts.siblingresult.csv
Read 624 siblings and 410626 non-siblings from files.
############# Round 3 ##############
Loading from filenames ../../../gt3/hosts.csvcapture.pcap.ts.siblingresult.csv and ../../../gt3/hosts.csv__nonsiblings_seed1_n679capture.pcap.ts.siblingresult.csv
Read 625 siblings and 411909 non-siblings from files.
############# Round 4 ##############
Loading from filenames ../../../gt4/hosts.csvcapture.pcap.ts.siblingresult.csv and ../../../gt4/hosts.csv__nonsiblings_seed1_n679capture.pcap.ts.siblingresult.csv
Read 620 siblings and 408034 non-siblings from files.
############# Round 5 ##############
Loading from filenames ../../../gt5/hosts.csvcapture.pcap.ts.siblingresult.csv and ../../../gt5/hosts.csv__nonsiblings_seed1_n679capture.pcap.ts.siblingresult.csv
Read 620 siblings and 408034 non-siblings from files.
############# Round 6 ##############
Loading from filenames ../../../gt6/hosts.csvcapture.pcap.ts.siblingresult.csv and ../../../gt6/hosts.csv__nonsiblings_seed1_n679capture.pcap.ts.siblingresult.csv
Read 620 siblings and 408034 non-siblings from files.
############# Round 7 ##############
Loading from filenames ../../../gt7/hosts.csvcapture.pcap.ts.siblingresult.csv and ../../../gt7/hosts.csv__nonsiblings_seed1_n681capture.pcap.ts.siblingresult.csv
Read 618 siblings and 406792 non-siblings from files.






    Out[898]:






  
    
      
      ip4
      ip6
      hz4
      hz6
      hzdiff
      hz4r2
      hz6r2
      hzr2diff
      tcp_t_offset4
      tcp_t_offset6
      ...
      ott6_rng
      ott_rng_diff
      ott_rng_diff_rel
      opts4
      opts6
      optsdiff
      perc_85_val
      dec_bev
      decision
      label
    
    
      domain
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      ovh04.ring.nlnog.net
      192.99.153.129
      2607:5300:101::599
      250.0
      250.0
      0.0
      1.000000
      1.000000
      4.107825e-14
      2.168821e+08
      2.168670e+08
      ...
      52.681
      2.641
      0.048906
      MSS-
      MSS-SACK-TS-N-WS07-
      1
      2.025465
      non-sibling(optsdiff)
      non-sibling(optsdiff)
      1
    
    
      ovh03.ring.nlnog.net
      5.196.13.221
      2001:41d0:52:600::671
      250.0
      250.0
      0.0
      1.000000
      1.000000
      4.440892e-16
      1.130031e+09
      1.129895e+09
      ...
      38.706
      0.215
      0.005539
      MSS-
      MSS-SACK-TS-N-WS07-
      1
      0.227048
      non-sibling(optsdiff)
      non-sibling(optsdiff)
      1
    
    
      ovh02.ring.nlnog.net
      37.187.50.166
      2001:41d0:52:400::53b
      250.0
      250.0
      0.0
      1.000000
      1.000000
      1.154632e-14
      1.130830e+09
      1.130722e+09
      ...
      2143.810
      0.062
      0.000029
      MSS-
      MSS-SACK-TS-N-WS07-
      1
      0.532364
      non-sibling(optsdiff)
      non-sibling(optsdiff)
      1
    
    
      ovh04.ring.nlnog.net
      192.99.153.129
      2607:5300:101::599
      250.0
      250.0
      0.0
      1.000000
      1.000000
      2.542411e-13
      3.747511e+08
      3.746817e+08
      ...
      1492.405
      5.014
      0.003365
      MSS-
      MSS-SACK-TS-N-WS07-
      1
      5.104061
      non-sibling(optsdiff)
      non-sibling(optsdiff)
      1
    
    
      RA_6088
      5.57.17.65
      2a01:5040:20:30::1
      1001.0
      1001.0
      0.0
      1.000000
      1.000000
      4.884981e-15
      1.156965e+09
      1.156945e+09
      ...
      11797.220
      0.803
      0.000068
      MSS-SACK-TS-N-WS07-
      MSS-SACK-TS-N-WS07-
      0
      NaN
      NaN
      ERROR: spline calculation failed!
      1
    
    
      RA_6131
      217.196.147.89
      2a02:16a8:dc:200::1
      1001.0
      1001.0
      0.0
      1.000000
      1.000000
      1.776357e-15
      1.067010e+08
      1.066939e+08
      ...
      11345.467
      0.672
      0.000059
      MSS-SACK-TS-N-WS07-
      MSS-SACK-TS-N-WS07-
      0
      NaN
      NaN
      ERROR: spline calculation failed!
      1
    
    
      RA_6220
      217.196.33.252
      2a02:310:0:2958::16
      100.0
      1001.0
      901.0
      0.999904
      1.000000
      NaN
      1.161751e+08
      3.607975e+08
      ...
      NaN
      NaN
      NaN
      MSS-SACK-TS-N-WS07-
      MSS-SACK-TS-N-WS07-
      0
      NaN
      non-sibling (hz different)
      non-sibling (hz different)
      1
    
    
      ovh03.ring.nlnog.net
      5.196.13.221
      2001:41d0:52:600::671
      250.0
      250.0
      0.0
      1.000000
      1.000000
      1.567901e-11
      1.175528e+09
      1.174713e+09
      ...
      263.147
      2.446
      0.009339
      MSS-
      MSS-SACK-TS-N-WS07-
      1
      0.527214
      non-sibling(optsdiff)
      non-sibling(optsdiff)
      1
    
    
      ovh02.ring.nlnog.net
      37.187.50.166
      2001:41d0:52:400::53b
      250.0
      250.0
      0.0
      1.000000
      1.000000
      2.220446e-16
      1.175640e+09
      1.175541e+09
      ...
      2148.670
      0.043
      0.000020
      MSS-
      MSS-SACK-TS-N-WS07-
      1
      0.581357
      non-sibling(optsdiff)
      non-sibling(optsdiff)
      1
    
    
      tilaa01.ring.nlnog.net
      46.19.36.12
      2a02:2770::21a:4aff:feac:4576
      29381.0
      29508.0
      NaN
      0.128195
      0.128773
      NaN
      1.382994e+08
      1.382964e+08
      ...
      NaN
      NaN
      NaN
      MSS-SACK-TS-N-WS04-
      MSS-SACK-TS-N-WS04-
      0
      NaN
      NaN
      ERROR: too small clock hertz r-squares
      1
    
    
      ovh04.ring.nlnog.net
      192.99.153.129
      2607:5300:101::599
      250.0
      250.0
      0.0
      1.000000
      1.000000
      8.881784e-16
      4.204438e+08
      4.195066e+08
      ...
      1965.143
      0.023
      0.000012
      MSS-
      MSS-SACK-TS-N-WS07-
      1
      0.718889
      non-sibling(optsdiff)
      non-sibling(optsdiff)
      1
    
    
      RA_6220
      217.196.33.252
      2a02:310:0:2958::16
      100.0
      1001.0
      901.0
      0.998920
      1.000000
      NaN
      8.694358e+07
      5.403645e+08
      ...
      NaN
      NaN
      NaN
      MSS-SACK-TS-N-WS07-
      MSS-SACK-TS-N-WS07-
      0
      NaN
      non-sibling (hz different)
      non-sibling (hz different)
      1
    
    
      ovh03.ring.nlnog.net
      5.196.13.221
      2001:41d0:52:600::671
      250.0
      250.0
      0.0
      1.000000
      1.000000
      1.833814e-09
      1.281100e+09
      1.274004e+09
      ...
      64.741
      1.645
      0.025736
      MSS-
      MSS-SACK-TS-N-WS07-
      1
      41.154072
      non-sibling(optsdiff)
      non-sibling(optsdiff)
      1
    
    
      ovh02.ring.nlnog.net
      37.187.50.166
      2001:41d0:52:400::53b
      250.0
      250.0
      0.0
      1.000000
      1.000000
      7.549517e-15
      1.274883e+09
      1.274828e+09
      ...
      2152.142
      0.069
      0.000032
      MSS-
      MSS-SACK-TS-N-WS07-
      1
      0.461552
      non-sibling(optsdiff)
      non-sibling(optsdiff)
      1
    
    
      trueinternet01.ring.nlnog.net
      203.144.167.57
      2001:fb0:100:ffff:211:25ff:fe40:9468
      159389.0
      159706.0
      NaN
      0.687427
      0.695245
      NaN
      1.010581e+08
      1.010674e+08
      ...
      NaN
      NaN
      NaN
      MSS-SACK-TS-N-WS11-
      MSS-SACK-TS-N-WS11-
      0
      NaN
      NaN
      ERROR: too small clock hertz r-squares
      1
    
    
      ovh04.ring.nlnog.net
      192.99.153.129
      2607:5300:101::599
      250.0
      250.0
      0.0
      1.000000
      1.000000
      9.727774e-13
      5.188355e+08
      5.188099e+08
      ...
      1171.543
      0.535
      0.000457
      MSS-
      MSS-SACK-TS-N-WS07-
      1
      2.562532
      non-sibling(optsdiff)
      non-sibling(optsdiff)
      1
    
    
      RA_6220
      217.196.33.252
      2a02:310:0:2958::16
      100.0
      1001.0
      901.0
      0.999394
      1.000000
      NaN
      1.266629e+08
      9.380604e+08
      ...
      NaN
      NaN
      NaN
      MSS-SACK-TS-N-WS07-
      MSS-SACK-TS-N-WS07-
      0
      NaN
      non-sibling (hz different)
      non-sibling (hz different)
      1
    
    
      ovh03.ring.nlnog.net
      5.196.13.221
      2001:41d0:52:600::671
      250.0
      250.0
      0.0
      1.000000
      1.000000
      1.791678e-12
      1.283146e+09
      1.283025e+09
      ...
      272.415
      3.689
      0.013451
      MSS-
      MSS-SACK-TS-N-WS07-
      1
      4.235207
      non-sibling(optsdiff)
      non-sibling(optsdiff)
      1
    
    
      ovh02.ring.nlnog.net
      37.187.50.166
      2001:41d0:52:400::53b
      250.0
      250.0
      0.0
      1.000000
      1.000000
      3.996803e-15
      1.283867e+09
      1.283837e+09
      ...
      2142.631
      4.127
      0.001924
      MSS-
      MSS-SACK-TS-N-WS07-
      1
      0.421635
      non-sibling(optsdiff)
      non-sibling(optsdiff)
      1
    
    
      ovh04.ring.nlnog.net
      192.99.153.129
      2607:5300:101::599
      250.0
      250.0
      0.0
      1.000000
      1.000000
      4.440892e-16
      5.279198e+08
      5.278201e+08
      ...
      1529.123
      2.322
      0.001520
      MSS-
      MSS-SACK-TS-N-WS07-
      1
      0.412238
      non-sibling(optsdiff)
      non-sibling(optsdiff)
      1
    
    
      gossamerthreads01.ring.nlnog.net
      208.70.247.50
      2607:fcc0:2:1:208:70:247:50
      250.0
      1607.0
      NaN
      1.000000
      0.008749
      NaN
      4.294911e+09
      2.475138e+08
      ...
      NaN
      NaN
      NaN
      MSS-SACK-TS-N-WS06-
      MSS-SACK-TS-N-WS07-
      1
      NaN
      NaN
      ERROR: too small clock hertz r-squares
      1
    
    
      RA_6220
      217.196.33.252
      2a02:310:0:2958::16
      100.0
      1001.0
      901.0
      0.999994
      1.000000
      NaN
      1.302641e+08
      9.741713e+08
      ...
      NaN
      NaN
      NaN
      MSS-SACK-TS-N-WS07-
      MSS-SACK-TS-N-WS07-
      0
      NaN
      non-sibling (hz different)
      non-sibling (hz different)
      1
    
    
      pix01.ring.nlnog.net
      185.90.125.134
      2a03:87a0:125:134::1
      149497.0
      150262.0
      NaN
      0.728900
      0.733638
      NaN
      2.544141e+08
      2.544131e+08
      ...
      NaN
      NaN
      NaN
      MSS-SACK-TS-N-WS07-
      MSS-SACK-TS-N-WS07-
      0
      NaN
      NaN
      ERROR: too small clock hertz r-squares
      1
    
    
      ovh03.ring.nlnog.net
      5.196.13.221
      2001:41d0:52:600::671
      250.0
      250.0
      0.0
      1.000000
      1.000000
      4.174439e-14
      1.296654e+09
      1.292033e+09
      ...
      32.893
      2.376
      0.069716
      MSS-
      MSS-SACK-TS-N-WS07-
      1
      0.643468
      non-sibling(optsdiff)
      non-sibling(optsdiff)
      1
    
    
      ovh02.ring.nlnog.net
      37.187.50.166
      2001:41d0:52:400::53b
      250.0
      250.0
      0.0
      1.000000
      1.000000
      3.197442e-14
      1.297270e+09
      1.292859e+09
      ...
      1102.110
      2.223
      0.002019
      MSS-
      MSS-SACK-TS-N-WS07-
      1
      0.683236
      non-sibling(optsdiff)
      non-sibling(optsdiff)
      1
    
    
      RA_6220
      217.196.33.252
      2a02:310:0:2958::16
      100.0
      1001.0
      901.0
      0.999994
      1.000000
      NaN
      1.338738e+08
      1.010255e+09
      ...
      NaN
      NaN
      NaN
      MSS-SACK-TS-N-WS07-
      MSS-SACK-TS-N-WS07-
      0
      NaN
      non-sibling (hz different)
      non-sibling (hz different)
      1
    
    
      ovh03.ring.nlnog.net
      5.196.13.221
      2001:41d0:52:600::671
      250.0
      250.0
      0.0
      1.000000
      1.000000
      3.010037e-12
      1.628076e+09
      1.627960e+09
      ...
      739.895
      6.856
      0.009309
      MSS-
      MSS-SACK-TS-N-WS07-
      1
      3.770015
      non-sibling(optsdiff)
      non-sibling(optsdiff)
      1
    
    
      ovh02.ring.nlnog.net
      37.187.50.166
      2001:41d0:52:400::53b
      250.0
      250.0
      0.0
      1.000000
      1.000000
      4.440892e-16
      1.628848e+09
      1.628782e+09
      ...
      2184.444
      0.479
      0.000219
      MSS-
      MSS-SACK-TS-N-WS07-
      1
      4.116483
      non-sibling(optsdiff)
      non-sibling(optsdiff)
      1
    
    
      trueinternet01.ring.nlnog.net
      203.144.167.57
      2001:fb0:100:ffff:211:25ff:fe40:9468
      107919.0
      106731.0
      NaN
      0.457027
      0.450581
      NaN
      2.100992e+07
      2.100415e+07
      ...
      NaN
      NaN
      NaN
      MSS-SACK-TS-N-WS11-
      MSS-SACK-TS-N-WS11-
      0
      NaN
      NaN
      ERROR: too small clock hertz r-squares
      1
    
    
      ovh04.ring.nlnog.net
      192.99.153.129
      2607:5300:101::599
      250.0
      250.0
      0.0
      1.000000
      1.000000
      8.224532e-13
      8.728248e+08
      8.727867e+08
      ...
      588.936
      0.055
      0.000093
      MSS-
      MSS-SACK-TS-N-WS07-
      1
      1.657471
      non-sibling(optsdiff)
      non-sibling(optsdiff)
      1
    
    
      RA_6220
      217.196.33.252
      2a02:310:0:2958::16
      100.0
      1001.0
      901.0
      0.999937
      1.000000
      NaN
      2.682442e+08
      2.355818e+09
      ...
      NaN
      NaN
      NaN
      MSS-SACK-TS-N-WS07-
      MSS-SACK-TS-N-WS07-
      0
      NaN
      non-sibling (hz different)
      non-sibling (hz different)
      1
    
  

31 rows × 29 columns



In [900]:

    
from collections import Counter
Counter(falsecalls.decision.as_matrix())









    Out[900]:





Counter({'ERROR: spline calculation failed!': 2,
         'ERROR: too small clock hertz r-squares': 5,
         'non-sibling (hz different)': 6,
         'non-sibling(optsdiff)': 18})



In [903]:

    
set(falsecalls.index)









    Out[903]:





{'RA_6088',
 'RA_6131',
 'RA_6220',
 'gossamerthreads01.ring.nlnog.net',
 'ovh02.ring.nlnog.net',
 'ovh03.ring.nlnog.net',
 'ovh04.ring.nlnog.net',
 'pix01.ring.nlnog.net',
 'tilaa01.ring.nlnog.net',
 'trueinternet01.ring.nlnog.net'}



In [907]:

    
falsecalls.to_csv("falsecalls.csv")



In [908]:

    
falsecalls









    Out[908]:






  
    
      
      ip4
      ip6
      hz4
      hz6
      hzdiff
      hz4r2
      hz6r2
      hzr2diff
      tcp_t_offset4
      tcp_t_offset6
      ...
      ott6_rng
      ott_rng_diff
      ott_rng_diff_rel
      opts4
      opts6
      optsdiff
      perc_85_val
      dec_bev
      decision
      label
    
    
      domain
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      RA_6088
      5.57.17.65
      2a01:5040:20:30::1
      1001.0
      1001.0
      0.0
      1.000000
      1.000000
      4.884981e-15
      1.156965e+09
      1.156945e+09
      ...
      11797.220
      0.803
      0.000068
      MSS-SACK-TS-N-WS07-
      MSS-SACK-TS-N-WS07-
      0
      NaN
      NaN
      ERROR: spline calculation failed!
      1
    
    
      RA_6131
      217.196.147.89
      2a02:16a8:dc:200::1
      1001.0
      1001.0
      0.0
      1.000000
      1.000000
      1.776357e-15
      1.067010e+08
      1.066939e+08
      ...
      11345.467
      0.672
      0.000059
      MSS-SACK-TS-N-WS07-
      MSS-SACK-TS-N-WS07-
      0
      NaN
      NaN
      ERROR: spline calculation failed!
      1
    
    
      RA_6220
      217.196.33.252
      2a02:310:0:2958::16
      100.0
      1001.0
      901.0
      0.999937
      1.000000
      NaN
      2.682442e+08
      2.355818e+09
      ...
      NaN
      NaN
      NaN
      MSS-SACK-TS-N-WS07-
      MSS-SACK-TS-N-WS07-
      0
      NaN
      non-sibling (hz different)
      non-sibling (hz different)
      1
    
    
      RA_6220
      217.196.33.252
      2a02:310:0:2958::16
      100.0
      1001.0
      901.0
      0.999394
      1.000000
      NaN
      1.266629e+08
      9.380604e+08
      ...
      NaN
      NaN
      NaN
      MSS-SACK-TS-N-WS07-
      MSS-SACK-TS-N-WS07-
      0
      NaN
      non-sibling (hz different)
      non-sibling (hz different)
      1
    
    
      RA_6220
      217.196.33.252
      2a02:310:0:2958::16
      100.0
      1001.0
      901.0
      0.998920
      1.000000
      NaN
      8.694358e+07
      5.403645e+08
      ...
      NaN
      NaN
      NaN
      MSS-SACK-TS-N-WS07-
      MSS-SACK-TS-N-WS07-
      0
      NaN
      non-sibling (hz different)
      non-sibling (hz different)
      1
    
    
      RA_6220
      217.196.33.252
      2a02:310:0:2958::16
      100.0
      1001.0
      901.0
      0.999994
      1.000000
      NaN
      1.302641e+08
      9.741713e+08
      ...
      NaN
      NaN
      NaN
      MSS-SACK-TS-N-WS07-
      MSS-SACK-TS-N-WS07-
      0
      NaN
      non-sibling (hz different)
      non-sibling (hz different)
      1
    
    
      RA_6220
      217.196.33.252
      2a02:310:0:2958::16
      100.0
      1001.0
      901.0
      0.999994
      1.000000
      NaN
      1.338738e+08
      1.010255e+09
      ...
      NaN
      NaN
      NaN
      MSS-SACK-TS-N-WS07-
      MSS-SACK-TS-N-WS07-
      0
      NaN
      non-sibling (hz different)
      non-sibling (hz different)
      1
    
    
      RA_6220
      217.196.33.252
      2a02:310:0:2958::16
      100.0
      1001.0
      901.0
      0.999904
      1.000000
      NaN
      1.161751e+08
      3.607975e+08
      ...
      NaN
      NaN
      NaN
      MSS-SACK-TS-N-WS07-
      MSS-SACK-TS-N-WS07-
      0
      NaN
      non-sibling (hz different)
      non-sibling (hz different)
      1
    
    
      gossamerthreads01.ring.nlnog.net
      208.70.247.50
      2607:fcc0:2:1:208:70:247:50
      250.0
      1607.0
      NaN
      1.000000
      0.008749
      NaN
      4.294911e+09
      2.475138e+08
      ...
      NaN
      NaN
      NaN
      MSS-SACK-TS-N-WS06-
      MSS-SACK-TS-N-WS07-
      1
      NaN
      NaN
      ERROR: too small clock hertz r-squares
      1
    
    
      ovh02.ring.nlnog.net
      37.187.50.166
      2001:41d0:52:400::53b
      250.0
      250.0
      0.0
      1.000000
      1.000000
      2.220446e-16
      1.175640e+09
      1.175541e+09
      ...
      2148.670
      0.043
      0.000020
      MSS-
      MSS-SACK-TS-N-WS07-
      1
      0.581357
      non-sibling(optsdiff)
      non-sibling(optsdiff)
      1
    
    
      ovh02.ring.nlnog.net
      37.187.50.166
      2001:41d0:52:400::53b
      250.0
      250.0
      0.0
      1.000000
      1.000000
      4.440892e-16
      1.628848e+09
      1.628782e+09
      ...
      2184.444
      0.479
      0.000219
      MSS-
      MSS-SACK-TS-N-WS07-
      1
      4.116483
      non-sibling(optsdiff)
      non-sibling(optsdiff)
      1
    
    
      ovh02.ring.nlnog.net
      37.187.50.166
      2001:41d0:52:400::53b
      250.0
      250.0
      0.0
      1.000000
      1.000000
      1.154632e-14
      1.130830e+09
      1.130722e+09
      ...
      2143.810
      0.062
      0.000029
      MSS-
      MSS-SACK-TS-N-WS07-
      1
      0.532364
      non-sibling(optsdiff)
      non-sibling(optsdiff)
      1
    
    
      ovh02.ring.nlnog.net
      37.187.50.166
      2001:41d0:52:400::53b
      250.0
      250.0
      0.0
      1.000000
      1.000000
      7.549517e-15
      1.274883e+09
      1.274828e+09
      ...
      2152.142
      0.069
      0.000032
      MSS-
      MSS-SACK-TS-N-WS07-
      1
      0.461552
      non-sibling(optsdiff)
      non-sibling(optsdiff)
      1
    
    
      ovh02.ring.nlnog.net
      37.187.50.166
      2001:41d0:52:400::53b
      250.0
      250.0
      0.0
      1.000000
      1.000000
      3.197442e-14
      1.297270e+09
      1.292859e+09
      ...
      1102.110
      2.223
      0.002019
      MSS-
      MSS-SACK-TS-N-WS07-
      1
      0.683236
      non-sibling(optsdiff)
      non-sibling(optsdiff)
      1
    
    
      ovh02.ring.nlnog.net
      37.187.50.166
      2001:41d0:52:400::53b
      250.0
      250.0
      0.0
      1.000000
      1.000000
      3.996803e-15
      1.283867e+09
      1.283837e+09
      ...
      2142.631
      4.127
      0.001924
      MSS-
      MSS-SACK-TS-N-WS07-
      1
      0.421635
      non-sibling(optsdiff)
      non-sibling(optsdiff)
      1
    
    
      ovh03.ring.nlnog.net
      5.196.13.221
      2001:41d0:52:600::671
      250.0
      250.0
      0.0
      1.000000
      1.000000
      1.567901e-11
      1.175528e+09
      1.174713e+09
      ...
      263.147
      2.446
      0.009339
      MSS-
      MSS-SACK-TS-N-WS07-
      1
      0.527214
      non-sibling(optsdiff)
      non-sibling(optsdiff)
      1
    
    
      ovh03.ring.nlnog.net
      5.196.13.221
      2001:41d0:52:600::671
      250.0
      250.0
      0.0
      1.000000
      1.000000
      3.010037e-12
      1.628076e+09
      1.627960e+09
      ...
      739.895
      6.856
      0.009309
      MSS-
      MSS-SACK-TS-N-WS07-
      1
      3.770015
      non-sibling(optsdiff)
      non-sibling(optsdiff)
      1
    
    
      ovh03.ring.nlnog.net
      5.196.13.221
      2001:41d0:52:600::671
      250.0
      250.0
      0.0
      1.000000
      1.000000
      1.833814e-09
      1.281100e+09
      1.274004e+09
      ...
      64.741
      1.645
      0.025736
      MSS-
      MSS-SACK-TS-N-WS07-
      1
      41.154072
      non-sibling(optsdiff)
      non-sibling(optsdiff)
      1
    
    
      ovh03.ring.nlnog.net
      5.196.13.221
      2001:41d0:52:600::671
      250.0
      250.0
      0.0
      1.000000
      1.000000
      4.174439e-14
      1.296654e+09
      1.292033e+09
      ...
      32.893
      2.376
      0.069716
      MSS-
      MSS-SACK-TS-N-WS07-
      1
      0.643468
      non-sibling(optsdiff)
      non-sibling(optsdiff)
      1
    
    
      ovh03.ring.nlnog.net
      5.196.13.221
      2001:41d0:52:600::671
      250.0
      250.0
      0.0
      1.000000
      1.000000
      4.440892e-16
      1.130031e+09
      1.129895e+09
      ...
      38.706
      0.215
      0.005539
      MSS-
      MSS-SACK-TS-N-WS07-
      1
      0.227048
      non-sibling(optsdiff)
      non-sibling(optsdiff)
      1
    
    
      ovh03.ring.nlnog.net
      5.196.13.221
      2001:41d0:52:600::671
      250.0
      250.0
      0.0
      1.000000
      1.000000
      1.791678e-12
      1.283146e+09
      1.283025e+09
      ...
      272.415
      3.689
      0.013451
      MSS-
      MSS-SACK-TS-N-WS07-
      1
      4.235207
      non-sibling(optsdiff)
      non-sibling(optsdiff)
      1
    
    
      ovh04.ring.nlnog.net
      192.99.153.129
      2607:5300:101::599
      250.0
      250.0
      0.0
      1.000000
      1.000000
      4.107825e-14
      2.168821e+08
      2.168670e+08
      ...
      52.681
      2.641
      0.048906
      MSS-
      MSS-SACK-TS-N-WS07-
      1
      2.025465
      non-sibling(optsdiff)
      non-sibling(optsdiff)
      1
    
    
      ovh04.ring.nlnog.net
      192.99.153.129
      2607:5300:101::599
      250.0
      250.0
      0.0
      1.000000
      1.000000
      8.224532e-13
      8.728248e+08
      8.727867e+08
      ...
      588.936
      0.055
      0.000093
      MSS-
      MSS-SACK-TS-N-WS07-
      1
      1.657471
      non-sibling(optsdiff)
      non-sibling(optsdiff)
      1
    
    
      ovh04.ring.nlnog.net
      192.99.153.129
      2607:5300:101::599
      250.0
      250.0
      0.0
      1.000000
      1.000000
      8.881784e-16
      4.204438e+08
      4.195066e+08
      ...
      1965.143
      0.023
      0.000012
      MSS-
      MSS-SACK-TS-N-WS07-
      1
      0.718889
      non-sibling(optsdiff)
      non-sibling(optsdiff)
      1
    
    
      ovh04.ring.nlnog.net
      192.99.153.129
      2607:5300:101::599
      250.0
      250.0
      0.0
      1.000000
      1.000000
      2.542411e-13
      3.747511e+08
      3.746817e+08
      ...
      1492.405
      5.014
      0.003365
      MSS-
      MSS-SACK-TS-N-WS07-
      1
      5.104061
      non-sibling(optsdiff)
      non-sibling(optsdiff)
      1
    
    
      ovh04.ring.nlnog.net
      192.99.153.129
      2607:5300:101::599
      250.0
      250.0
      0.0
      1.000000
      1.000000
      4.440892e-16
      5.279198e+08
      5.278201e+08
      ...
      1529.123
      2.322
      0.001520
      MSS-
      MSS-SACK-TS-N-WS07-
      1
      0.412238
      non-sibling(optsdiff)
      non-sibling(optsdiff)
      1
    
    
      ovh04.ring.nlnog.net
      192.99.153.129
      2607:5300:101::599
      250.0
      250.0
      0.0
      1.000000
      1.000000
      9.727774e-13
      5.188355e+08
      5.188099e+08
      ...
      1171.543
      0.535
      0.000457
      MSS-
      MSS-SACK-TS-N-WS07-
      1
      2.562532
      non-sibling(optsdiff)
      non-sibling(optsdiff)
      1
    
    
      pix01.ring.nlnog.net
      185.90.125.134
      2a03:87a0:125:134::1
      149497.0
      150262.0
      NaN
      0.728900
      0.733638
      NaN
      2.544141e+08
      2.544131e+08
      ...
      NaN
      NaN
      NaN
      MSS-SACK-TS-N-WS07-
      MSS-SACK-TS-N-WS07-
      0
      NaN
      NaN
      ERROR: too small clock hertz r-squares
      1
    
    
      tilaa01.ring.nlnog.net
      46.19.36.12
      2a02:2770::21a:4aff:feac:4576
      29381.0
      29508.0
      NaN
      0.128195
      0.128773
      NaN
      1.382994e+08
      1.382964e+08
      ...
      NaN
      NaN
      NaN
      MSS-SACK-TS-N-WS04-
      MSS-SACK-TS-N-WS04-
      0
      NaN
      NaN
      ERROR: too small clock hertz r-squares
      1
    
    
      trueinternet01.ring.nlnog.net
      203.144.167.57
      2001:fb0:100:ffff:211:25ff:fe40:9468
      159389.0
      159706.0
      NaN
      0.687427
      0.695245
      NaN
      1.010581e+08
      1.010674e+08
      ...
      NaN
      NaN
      NaN
      MSS-SACK-TS-N-WS11-
      MSS-SACK-TS-N-WS11-
      0
      NaN
      NaN
      ERROR: too small clock hertz r-squares
      1
    
    
      trueinternet01.ring.nlnog.net
      203.144.167.57
      2001:fb0:100:ffff:211:25ff:fe40:9468
      107919.0
      106731.0
      NaN
      0.457027
      0.450581
      NaN
      2.100992e+07
      2.100415e+07
      ...
      NaN
      NaN
      NaN
      MSS-SACK-TS-N-WS11-
      MSS-SACK-TS-N-WS11-
      0
      NaN
      NaN
      ERROR: too small clock hertz r-squares
      1
    
  

31 rows × 29 columns



In [ ]:



In [ ]:



In [ ]:



In [798]:

    
hlostats[:,0]









    Out[798]:





array([  0. ,   0. ,   0. ,   0. ,   0. ,   0. ,   0. ,  99.6])



In [757]:

    
hlbstats









    Out[757]:





array([[  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          0.00000000e+00,   0.00000000e+00],
       [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          0.00000000e+00,   0.00000000e+00],
       [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          0.00000000e+00,   0.00000000e+00],
       [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          0.00000000e+00,   0.00000000e+00],
       [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          0.00000000e+00,   0.00000000e+00],
       [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          0.00000000e+00,   0.00000000e+00],
       [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          0.00000000e+00,   0.00000000e+00],
       [  8.10000000e-01,   9.93500000e+01,   8.15800000e+01,
          8.16100000e+01,   8.11199123e-02]])



In [802]:

    
set(gsdb.keys())









    Out[802]:





{'RAv1', 'RAv2', 'nlnog', 'servers'}



In [771]:

    
gsdo









    Out[771]:





{'RAv1': array([[   0.,    0.,    0.,    0.,    0.],
        [   0.,    0.,    0.,    0.,    0.],
        [   0.,    0.,    0.,    0.,    0.],
        [   0.,    0.,    0.,    0.,    0.],
        [   0.,    0.,    0.,    0.,    0.],
        [   0.,    0.,    0.,    0.,    0.],
        [   0.,    0.,    0.,    0.,    0.],
        [ 100.,  100.,  100.,  100.,    1.]]),
 'RAv2': array([[   0.        ,    0.        ,    0.        ,    0.        ,    0.        ],
        [   0.        ,    0.        ,    0.        ,    0.        ,    0.        ],
        [   0.        ,    0.        ,    0.        ,    0.        ,    0.        ],
        [   0.        ,    0.        ,    0.        ,    0.        ,    0.        ],
        [   0.        ,    0.        ,    0.        ,    0.        ,    0.        ],
        [   0.        ,    0.        ,    0.        ,    0.        ,    0.        ],
        [   0.        ,    0.        ,    0.        ,    0.        ,    0.        ],
        [  99.01      ,   98.52      ,  100.        ,   99.99      ,
            0.98759639]]),
 'nlnog': array([[   0.       ,    0.       ,    0.       ,    0.       ,    0.       ],
        [   0.       ,    0.       ,    0.       ,    0.       ,    0.       ],
        [   0.       ,    0.       ,    0.       ,    0.       ,    0.       ],
        [   0.       ,    0.       ,    0.       ,    0.       ,    0.       ],
        [   0.       ,    0.       ,    0.       ,    0.       ,    0.       ],
        [   0.       ,    0.       ,    0.       ,    0.       ,    0.       ],
        [   0.       ,    0.       ,    0.       ,    0.       ,    0.       ],
        [ 100.       ,   94.53     ,  100.       ,   99.99     ,
            0.9721905]]),
 'servers': array([[   0.        ,    0.        ,    0.        ,    0.        ,    0.        ],
        [   0.        ,    0.        ,    0.        ,    0.        ,    0.        ],
        [   0.        ,    0.        ,    0.        ,    0.        ,    0.        ],
        [   0.        ,    0.        ,    0.        ,    0.        ,    0.        ],
        [   0.        ,    0.        ,    0.        ,    0.        ,    0.        ],
        [   0.        ,    0.        ,    0.        ,    0.        ,    0.        ],
        [   0.        ,    0.        ,    0.        ,    0.        ,    0.        ],
        [ 100.        ,   84.62      ,  100.        ,   99.45      ,
            0.91725295]])}



In [808]:

    
mlstatsd_tre









    Out[808]:





{'7_tre': array([[ 100.,  100.,  100.,  100.,    1.],
        [ 100.,  100.,  100.,  100.,    1.],
        [ 100.,  100.,  100.,  100.,    1.],
        [ 100.,  100.,  100.,  100.,    1.],
        [ 100.,  100.,  100.,  100.,    1.],
        [ 100.,  100.,  100.,  100.,    1.],
        [ 100.,  100.,  100.,  100.,    1.],
        [ 100.,  100.,  100.,  100.,    1.],
        [ 100.,  100.,  100.,  100.,    1.],
        [ 100.,  100.,  100.,  100.,    1.]])}



In [690]:

    
Image(graphs[-1][-1].create_png())









    Out[690]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [731]:

    
df = mix_sib_nonsib(sib, nonsib, "full", 42)
df_ours = df[["label", "decision"]].copy()
dec2prd_ours(df_ours)
undec = len(df_ours[df_ours.isnull().any(axis=1)])
print("Our algo: Not deciding on {} pairs for unknown/error reasons.".format(undec))
df_ours = df_ours.dropna()
print("Our algo stats: ({}) undecided".format(undec))
a = stats(df_ours["label"], df_ours["dec_prd"])









    



Our algo: Not deciding on 1417 pairs for unknown/error reasons.
Our algo stats: (1417) undecided
Correct: 405969, incorrect 24, TP 493, FP 2, TN 405476, FN22, Prec. 99.6, Rec. 95.73, Spec. 100.0, Acc. 99.99%



In [737]:

    
list(a)
a = list(a)
a.append(5)
a









    Out[737]:





[99.599999999999994, 95.730000000000004, 100.0, 99.989999999999995, 5, 5]



In [693]:

    
from sklearn.metrics import f1_score
f1_score(df_ours["label"], df_ours["dec_prd"])









    Out[693]:





0.97623762376237622



In [695]:

    
from sklearn.metrics import matthews_corrcoef
matthews_corrcoef(df_ours["label"], df_ours["dec_prd"])









    Out[695]:





0.97639996941410645



In [ ]:



In [ ]:

Evaluate Hand-Tuned Algo For Overfitting

Calculate Training Error
Evaluate only new hosts to get Test error



In [109]:

    
for i in range(1,2):
    print("############# Round {} ##############".format(i))
    sib, nonsib = get_pd_files("../../../gt{}/".format(i))
    #print("Columns: {}".format(list(sib.columns.values)))
    get_ouralgo_stats(sib, nonsib)









    



############# Round 1 ##############
Loading from filenames ../../../gt1/hosts.csvcapture.pcap.ts.siblingresult.csv and ../../../gt1/hosts.csv__nonsiblings_seed1_n679capture.pcap.ts.siblingresult.csv
Read 279 siblings and 82026 non-siblings from files.
Our algo: Not deciding on 70 pairs for unknown/error reasons.
Our algo stats: (70) undecided
Correct: 82229, incorrect 6, TP 255, FP 0, TN 81974, FN6, Prec. 100.0, Rec. 97.7, Spec. 100.0, Acc. 99.99%



In [136]:

    
for i in range(2,8):
    print("############# Round {} ##############".format(i))
    sib, nonsib = get_pd_files("../../../algo-eval/gt{}/".format(i))
    #print("Columns: {}".format(list(sib.columns.values)))
    get_ouralgo_stats(sib, nonsib)









    



############# Round 2 ##############
Loading from filenames ../../../algo-eval/gt2/hosts.csvcapture.pcap.ts.siblingresult.csv and ../../../algo-eval/gt2/hosts.csv__nonsiblings_seed1_n407_capture.pcap.ts.siblingresult.csv
Read 369 siblings and 145041 non-siblings from files.
Our algo: Not deciding on 32 pairs for unknown/error reasons.
Our algo stats: (32) undecided
Correct: 145365, incorrect 13, TP 328, FP 2, TN 145037, FN11, Prec. 99.39, Rec. 96.76, Spec. 100.0, Acc. 99.99%
############# Round 3 ##############
Loading from filenames ../../../algo-eval/gt3/hosts.csvcapture.pcap.ts.siblingresult.csv and ../../../algo-eval/gt3/hosts.csv__nonsiblings_seed1_n407_capture.pcap.ts.siblingresult.csv
Read 370 siblings and 145804 non-siblings from files.
Our algo: Not deciding on 44 pairs for unknown/error reasons.
Our algo stats: (44) undecided
Correct: 146113, incorrect 17, TP 317, FP 2, TN 145796, FN15, Prec. 99.37, Rec. 95.48, Spec. 100.0, Acc. 99.99%
############# Round 4 ##############
Loading from filenames ../../../algo-eval/gt4/hosts.csvcapture.pcap.ts.siblingresult.csv and ../../../algo-eval/gt4/hosts.csv__nonsiblings_seed1_n407_capture.pcap.ts.siblingresult.csv
Read 366 siblings and 144258 non-siblings from files.
Our algo: Not deciding on 779 pairs for unknown/error reasons.
Our algo stats: (779) undecided
Correct: 143827, incorrect 18, TP 335, FP 2, TN 143492, FN16, Prec. 99.41, Rec. 95.44, Spec. 100.0, Acc. 99.99%
############# Round 5 ##############
Loading from filenames ../../../algo-eval/gt5/hosts.csvcapture.pcap.ts.siblingresult.csv and ../../../algo-eval/gt5/hosts.csv__nonsiblings_seed1_n407_capture.pcap.ts.siblingresult.csv
Read 366 siblings and 144258 non-siblings from files.
Our algo: Not deciding on 23 pairs for unknown/error reasons.
Our algo stats: (23) undecided
Correct: 144590, incorrect 11, TP 334, FP 0, TN 144256, FN11, Prec. 100.0, Rec. 96.81, Spec. 100.0, Acc. 99.99%
############# Round 6 ##############
Loading from filenames ../../../algo-eval/gt6/hosts.csvcapture.pcap.ts.siblingresult.csv and ../../../algo-eval/gt6/hosts.csv__nonsiblings_seed1_n407_capture.pcap.ts.siblingresult.csv
Read 366 siblings and 144258 non-siblings from files.
Our algo: Not deciding on 792 pairs for unknown/error reasons.
Our algo stats: (792) undecided
Correct: 143819, incorrect 13, TP 324, FP 2, TN 143495, FN11, Prec. 99.39, Rec. 96.72, Spec. 100.0, Acc. 99.99%
############# Round 7 ##############
Loading from filenames ../../../algo-eval/gt7/hosts.csvcapture.pcap.ts.siblingresult.csv and ../../../algo-eval/gt7/hosts.csv__nonsiblings_seed1_n407_capture.pcap.ts.siblingresult.csv
Read 369 siblings and 146571 non-siblings from files.
Our algo: Not deciding on 822 pairs for unknown/error reasons.
Our algo stats: (822) undecided
Correct: 146105, incorrect 13, TP 314, FP 2, TN 145791, FN11, Prec. 99.37, Rec. 96.62, Spec. 100.0, Acc. 99.99%



In [137]:

    
for i in range(2,8):
    print("############# Round {} ##############".format(i))
    #sib, nonsib = get_pd_files("../../../eval-algo/gt{}/".format(i))
    groups = assign_groups(sib)
    groupset = set(groups)
    for i in groupset:
        print("## GROUP: {}".format(i))
        groupsib = sib[sib["group"] == i].copy()
        groupnonsib = match_nonsibs(groupsib, nonsib)
        get_ouralgo_stats(groupsib, groupnonsib)
        #get_bev_stats(groupsib, groupnonsib)









    



############# Round 2 ##############
## GROUP: RAv1
Generated 132 non-sibling candidates from 12 siblings.
Our algo: Not deciding on 2 pairs for unknown/error reasons.
Our algo stats: (2) undecided
Correct: 142, incorrect 0, TP 12, FP 0, TN 130, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
## GROUP: RAv2
Generated 41006 non-sibling candidates from 203 siblings.
Our algo: Not deciding on 5 pairs for unknown/error reasons.
Our algo stats: (5) undecided
Correct: 41199, incorrect 5, TP 200, FP 2, TN 40999, FN3, Prec. 99.01, Rec. 98.52, Spec. 100.0, Acc. 99.99%
## GROUP: nlnog
Generated 18360 non-sibling candidates from 136 siblings.
Our algo: Not deciding on 316 pairs for unknown/error reasons.
Our algo stats: (316) undecided
Correct: 18173, incorrect 7, TP 86, FP 0, TN 18087, FN7, Prec. 100.0, Rec. 92.47, Spec. 100.0, Acc. 99.96%
## GROUP: servers
Generated 306 non-sibling candidates from 18 siblings.
Our algo: Not deciding on 2 pairs for unknown/error reasons.
Our algo stats: (2) undecided
Correct: 321, incorrect 1, TP 16, FP 0, TN 305, FN1, Prec. 100.0, Rec. 94.12, Spec. 100.0, Acc. 99.69%
############# Round 3 ##############
## GROUP: RAv1
Generated 132 non-sibling candidates from 12 siblings.
Our algo: Not deciding on 2 pairs for unknown/error reasons.
Our algo stats: (2) undecided
Correct: 142, incorrect 0, TP 12, FP 0, TN 130, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
## GROUP: RAv2
Generated 41006 non-sibling candidates from 203 siblings.
Our algo: Not deciding on 5 pairs for unknown/error reasons.
Our algo stats: (5) undecided
Correct: 41199, incorrect 5, TP 200, FP 2, TN 40999, FN3, Prec. 99.01, Rec. 98.52, Spec. 100.0, Acc. 99.99%
## GROUP: nlnog
Generated 18360 non-sibling candidates from 136 siblings.
Our algo: Not deciding on 316 pairs for unknown/error reasons.
Our algo stats: (316) undecided
Correct: 18173, incorrect 7, TP 86, FP 0, TN 18087, FN7, Prec. 100.0, Rec. 92.47, Spec. 100.0, Acc. 99.96%
## GROUP: servers
Generated 306 non-sibling candidates from 18 siblings.
Our algo: Not deciding on 2 pairs for unknown/error reasons.
Our algo stats: (2) undecided
Correct: 321, incorrect 1, TP 16, FP 0, TN 305, FN1, Prec. 100.0, Rec. 94.12, Spec. 100.0, Acc. 99.69%
############# Round 4 ##############
## GROUP: RAv1
Generated 132 non-sibling candidates from 12 siblings.
Our algo: Not deciding on 2 pairs for unknown/error reasons.
Our algo stats: (2) undecided
Correct: 142, incorrect 0, TP 12, FP 0, TN 130, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
## GROUP: RAv2
Generated 41006 non-sibling candidates from 203 siblings.
Our algo: Not deciding on 5 pairs for unknown/error reasons.
Our algo stats: (5) undecided
Correct: 41199, incorrect 5, TP 200, FP 2, TN 40999, FN3, Prec. 99.01, Rec. 98.52, Spec. 100.0, Acc. 99.99%
## GROUP: nlnog
Generated 18360 non-sibling candidates from 136 siblings.
Our algo: Not deciding on 316 pairs for unknown/error reasons.
Our algo stats: (316) undecided
Correct: 18173, incorrect 7, TP 86, FP 0, TN 18087, FN7, Prec. 100.0, Rec. 92.47, Spec. 100.0, Acc. 99.96%
## GROUP: servers
Generated 306 non-sibling candidates from 18 siblings.
Our algo: Not deciding on 2 pairs for unknown/error reasons.
Our algo stats: (2) undecided
Correct: 321, incorrect 1, TP 16, FP 0, TN 305, FN1, Prec. 100.0, Rec. 94.12, Spec. 100.0, Acc. 99.69%
############# Round 5 ##############
## GROUP: RAv1
Generated 132 non-sibling candidates from 12 siblings.
Our algo: Not deciding on 2 pairs for unknown/error reasons.
Our algo stats: (2) undecided
Correct: 142, incorrect 0, TP 12, FP 0, TN 130, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
## GROUP: RAv2
Generated 41006 non-sibling candidates from 203 siblings.
Our algo: Not deciding on 5 pairs for unknown/error reasons.
Our algo stats: (5) undecided
Correct: 41199, incorrect 5, TP 200, FP 2, TN 40999, FN3, Prec. 99.01, Rec. 98.52, Spec. 100.0, Acc. 99.99%
## GROUP: nlnog
Generated 18360 non-sibling candidates from 136 siblings.
Our algo: Not deciding on 316 pairs for unknown/error reasons.
Our algo stats: (316) undecided
Correct: 18173, incorrect 7, TP 86, FP 0, TN 18087, FN7, Prec. 100.0, Rec. 92.47, Spec. 100.0, Acc. 99.96%
## GROUP: servers
Generated 306 non-sibling candidates from 18 siblings.
Our algo: Not deciding on 2 pairs for unknown/error reasons.
Our algo stats: (2) undecided
Correct: 321, incorrect 1, TP 16, FP 0, TN 305, FN1, Prec. 100.0, Rec. 94.12, Spec. 100.0, Acc. 99.69%
############# Round 6 ##############
## GROUP: RAv1
Generated 132 non-sibling candidates from 12 siblings.
Our algo: Not deciding on 2 pairs for unknown/error reasons.
Our algo stats: (2) undecided
Correct: 142, incorrect 0, TP 12, FP 0, TN 130, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
## GROUP: RAv2
Generated 41006 non-sibling candidates from 203 siblings.
Our algo: Not deciding on 5 pairs for unknown/error reasons.
Our algo stats: (5) undecided
Correct: 41199, incorrect 5, TP 200, FP 2, TN 40999, FN3, Prec. 99.01, Rec. 98.52, Spec. 100.0, Acc. 99.99%
## GROUP: nlnog
Generated 18360 non-sibling candidates from 136 siblings.
Our algo: Not deciding on 316 pairs for unknown/error reasons.
Our algo stats: (316) undecided
Correct: 18173, incorrect 7, TP 86, FP 0, TN 18087, FN7, Prec. 100.0, Rec. 92.47, Spec. 100.0, Acc. 99.96%
## GROUP: servers
Generated 306 non-sibling candidates from 18 siblings.
Our algo: Not deciding on 2 pairs for unknown/error reasons.
Our algo stats: (2) undecided
Correct: 321, incorrect 1, TP 16, FP 0, TN 305, FN1, Prec. 100.0, Rec. 94.12, Spec. 100.0, Acc. 99.69%
############# Round 7 ##############
## GROUP: RAv1
Generated 132 non-sibling candidates from 12 siblings.
Our algo: Not deciding on 2 pairs for unknown/error reasons.
Our algo stats: (2) undecided
Correct: 142, incorrect 0, TP 12, FP 0, TN 130, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
## GROUP: RAv2
Generated 41006 non-sibling candidates from 203 siblings.
Our algo: Not deciding on 5 pairs for unknown/error reasons.
Our algo stats: (5) undecided
Correct: 41199, incorrect 5, TP 200, FP 2, TN 40999, FN3, Prec. 99.01, Rec. 98.52, Spec. 100.0, Acc. 99.99%
## GROUP: nlnog
Generated 18360 non-sibling candidates from 136 siblings.
Our algo: Not deciding on 316 pairs for unknown/error reasons.
Our algo stats: (316) undecided
Correct: 18173, incorrect 7, TP 86, FP 0, TN 18087, FN7, Prec. 100.0, Rec. 92.47, Spec. 100.0, Acc. 99.96%
## GROUP: servers
Generated 306 non-sibling candidates from 18 siblings.
Our algo: Not deciding on 2 pairs for unknown/error reasons.
Our algo stats: (2) undecided
Correct: 321, incorrect 1, TP 16, FP 0, TN 305, FN1, Prec. 100.0, Rec. 94.12, Spec. 100.0, Acc. 99.69%

High-Level Comparison of Results



In [9]:

    
for i in range(7,8):
    print("############# Round {} ##############".format(i))
    sib, nonsib = get_pd_files("../../../gt{}/".format(i))
    #print("Columns: {}".format(list(sib.columns.values)))
    get_ouralgo_stats(sib, nonsib)
    get_bev_stats(sib, nonsib)
    nonsibfil = match_nonsibs(sib, nonsib)









    



############# Round 7 ##############
Loading from filenames ../../../gt7/hosts.csvcapture.pcap.ts.siblingresult.csv and ../../../gt7/hosts.csv__nonsiblings_seed1_n681capture.pcap.ts.siblingresult.csv
Read 618 siblings and 406792 non-siblings from files.
Our algo: Not deciding on 1417 pairs for unknown/error reasons.
Our algo stats: (1417) undecided
Correct: 405969, incorrect 24, TP 493, FP 2, TN 405476, FN22, Prec. 99.6, Rec. 95.73, Spec. 100.0, Acc. 99.99%
Beverly algo stats:
Beverly algo: Not deciding on 1315 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided
Correct: 331420, incorrect 74675, TP 613, FP 74671, TN 330807, FN4, Prec. 0.81, Rec. 99.35, Spec. 81.58, Acc. 81.61%
Generated 381306 non-sibling candidates from 618 siblings.

Comparison of results at group level



In [84]:

    
for i in range(2,3):
    print("############# Round {} ##############".format(i))
    #sib, nonsib = get_pd_files("../../../gt{}/".format(i))
    groups = assign_groups(sib)
    groupset = set(groups)
    for i in groupset:
        print("## GROUP: {}".format(i))
        groupsib = sib[sib["group"] == i].copy()
        groupnonsib = match_nonsibs(groupsib, nonsib)
        get_ouralgo_stats(groupsib, groupnonsib)
        get_bev_stats(groupsib, groupnonsib)
    #print("Columns: {}".format(list(sib.columns.values)))
    #get_ouralgo_stats(sib, nonsib)
    #get_bev_stats(sib, nonsib)
    #nonsibfil = match_nonsibs(sib, nonsib)









    



############# Round 2 ##############
## GROUP: RAv1
Generated 132 non-sibling candidates from 12 siblings.
Our algo: Not deciding on 0 pairs for unknown/error reasons.
Our algo stats: (0) undecided
Correct: 144, incorrect 0, TP 12, FP 0, TN 132, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Beverly algo stats:
Beverly algo: Not deciding on 0 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided
Correct: 12, incorrect 132, TP 12, FP 132, TN 0, FN0, Prec. 8.33, Rec. 100.0, Spec. 0.0, Acc. 8.33%
## GROUP: RAv2
Generated 42642 non-sibling candidates from 207 siblings.
Our algo: Not deciding on 4 pairs for unknown/error reasons.
Our algo stats: (4) undecided
Correct: 42837, incorrect 8, TP 199, FP 2, TN 42638, FN6, Prec. 99.0, Rec. 97.07, Spec. 100.0, Acc. 99.98%
Beverly algo stats:
Beverly algo: Not deciding on 4 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided
Correct: 16785, incorrect 26060, TP 204, FP 26059, TN 16581, FN1, Prec. 0.78, Rec. 99.51, Spec. 38.89, Acc. 39.18%
## GROUP: nlnog
Generated 145542 non-sibling candidates from 382 siblings.
Our algo: Not deciding on 114 pairs for unknown/error reasons.
Our algo stats: (114) undecided
Correct: 145800, incorrect 10, TP 302, FP 0, TN 145498, FN10, Prec. 100.0, Rec. 96.79, Spec. 100.0, Acc. 99.99%
Beverly algo stats:
Beverly algo: Not deciding on 44 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided
Correct: 108048, incorrect 37832, TP 379, FP 37829, TN 107669, FN3, Prec. 0.99, Rec. 99.21, Spec. 74.0, Acc. 74.07%
## GROUP: servers
Generated 506 non-sibling candidates from 23 siblings.
Our algo: Not deciding on 0 pairs for unknown/error reasons.
Our algo stats: (0) undecided
Correct: 526, incorrect 3, TP 20, FP 0, TN 506, FN3, Prec. 100.0, Rec. 86.96, Spec. 100.0, Acc. 99.43%
Beverly algo stats:
Beverly algo: Not deciding on 0 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided
Correct: 183, incorrect 346, TP 23, FP 346, TN 160, FN0, Prec. 6.23, Rec. 100.0, Spec. 31.62, Acc. 34.59%



In [79]:









    Out[79]:





{'RAv1', 'RAv2', 'nlnog', 'servers'}

ML with proportional group sampling

Strategy:

take siblings from each group and generate 10 ShuffleSplit iterations
create matching nonsibs and mix all
adjust weights



In [621]:

    
# functions for ML with proprtional group sampling
def split_stratified_groups(sib, splits, nr):
    from sklearn.model_selection import KFold # non-overlapping!
    groups = assign_groups(sib)
    groupset = set(groups)
    gsibdf_train = pd.DataFrame(columns=sib.columns)
    gsibdf_test = pd.DataFrame(columns=sib.columns)
    for i in groupset:
        groupsib = sib[sib["group"] == i].copy()
        #print("## GROUP: {} with {} elements.".format(i, len(groupsib)))
        ks = KFold(n_splits=splits, random_state=42, shuffle=True)
        labels, features = make_labels_features(groupsib)
        ctr = -1
        for train_index, test_index in ks.split(groupsib):
            ctr += 1                
            if (ctr == nr):
            #print("TRAIN:", train_index, "TEST:", test_index)
                gsibdf_train = gsibdf_train.append(groupsib.iloc[train_index])
                gsibdf_test = gsibdf_test.append(groupsib.iloc[test_index])
                break
    return [gsibdf_train, gsibdf_test]


def dt_train(labels, features, weight, rs=42):
    estimator = DecisionTreeClassifier(max_depth=30, min_samples_leaf=5, random_state=42)
    est = estimator.fit(features, labels, sample_weight=weight)
    return est

def kfold_train_test(sib, nonsib):
    kfolds = 10
    stats_train_error = np.empty((10,4), dtype=float)
    stats_test_error = np.empty((10,4), dtype=float)
    graphs = []
    for i in range(10):
        print("Round {}".format(i))
        # pick proportionally from each group
        train_sib, test_sib = split_stratified_groups(sib, 10, i)
        # create, select, and mix matching nonsibs
        train_nonsib = match_nonsibs(train_sib, nonsib)
        test_nonsib = match_nonsibs(test_sib, nonsib)
        train = mix_sib_nonsib(train_sib,train_nonsib, "all")
        # prune NaNs out
        train, train_prune_lbl, train_prune_prd = prune_data_for_ml(train)
        test = mix_sib_nonsib(test_sib,test_nonsib, "all")
        test, test_prune_lbl, test_prune_prd = prune_data_for_ml(test)
        # split out features, labels, and weights
        train_lbl, train_ftr = make_labels_features(train)
        test_lbl, test_ftr = make_labels_features(test)
        train_weight = get_sample_weight_one_input(train)
        test_weight = get_sample_weight_one_input(test)
        # train estimator
        est = dt_train(train_lbl, train_ftr, train_weight)   
        stats_train_error[i] = stats(train_lbl, est.predict(train_ftr))
        stats_test_error[i]  =  stats(test_lbl, est.predict(test_ftr))
        graph = dt_plot(est, train_ftr)
        graphs.append(graph)
        #Image(graph.create_png())  
    return stats_train_error, stats_test_error



In [618]:

    
tre, tee = kfold_train_test(sib, nonsib)









    



Round 0
Generated 108570 non-sibling candidates from 330 siblings.
Generated 1482 non-sibling candidates from 39 siblings.






    



/usr/local/lib/python3.5/dist-packages/ipykernel/__main__.py:172: RuntimeWarning: invalid value encountered in long_scalars






    



Correct: 70828, incorrect 1, TP 0, FP 0, TN 70828, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 667 rows with error results and 70829 rows with NaNs (typically hz different) from a     total of 108900 entries, resulting in 38071 entries.
Correct: 991, incorrect 1, TP 0, FP 0, TN 991, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 99.9%
Removing 1 rows with error results and 992 rows with NaNs (typically hz different) from a     total of 1521 entries, resulting in 529 entries.
Found 329 sibs and 37742 nonsibs, weights: 0.9914 and 0.0086, #weights: 38071
Found 38 sibs and 491 nonsibs, weights: 0.9282 and 0.0718, #weights: 529
Correct: 38071, incorrect 0, TP 329, FP 0, TN 37742, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 529, incorrect 0, TP 38, FP 0, TN 491, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 1
Generated 108570 non-sibling candidates from 330 siblings.
Generated 1482 non-sibling candidates from 39 siblings.
Correct: 70452, incorrect 1, TP 0, FP 0, TN 70452, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 10 rows with error results and 70453 rows with NaNs (typically hz different) from a     total of 108900 entries, resulting in 38447 entries.
Correct: 1034, incorrect 1, TP 0, FP 0, TN 1034, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 99.9%
Removing 77 rows with error results and 1035 rows with NaNs (typically hz different) from a     total of 1521 entries, resulting in 486 entries.
Found 329 sibs and 38118 nonsibs, weights: 0.9914 and 0.0086, #weights: 38447
Found 38 sibs and 448 nonsibs, weights: 0.9218 and 0.0782, #weights: 486
Correct: 38447, incorrect 0, TP 329, FP 0, TN 38118, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 486, incorrect 0, TP 38, FP 0, TN 448, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 2
Generated 109230 non-sibling candidates from 331 siblings.
Generated 1406 non-sibling candidates from 38 siblings.
Correct: 71525, incorrect 2, TP 0, FP 0, TN 71525, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 669 rows with error results and 71527 rows with NaNs (typically hz different) from a     total of 109561 entries, resulting in 38034 entries.
Correct: 898, incorrect 0, TP 0, FP 0, TN 898, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 898 rows with NaNs (typically hz different) from a     total of 1444 entries, resulting in 546 entries.
Found 329 sibs and 37705 nonsibs, weights: 0.9913 and 0.0087, #weights: 38034
Found 38 sibs and 508 nonsibs, weights: 0.9304 and 0.0696, #weights: 546
Correct: 38034, incorrect 0, TP 329, FP 0, TN 37705, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 546, incorrect 0, TP 38, FP 0, TN 508, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%






    



/usr/local/lib/python3.5/dist-packages/ipykernel/__main__.py:173: RuntimeWarning: invalid value encountered in long_scalars






    



Round 3
Generated 109892 non-sibling candidates from 332 siblings.
Generated 1332 non-sibling candidates from 37 siblings.
Correct: 71593, incorrect 2, TP 0, FP 0, TN 71593, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 670 rows with error results and 71595 rows with NaNs (typically hz different) from a     total of 110224 entries, resulting in 38629 entries.
Correct: 890, incorrect 0, TP 0, FP 0, TN 890, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 2 rows with error results and 890 rows with NaNs (typically hz different) from a     total of 1369 entries, resulting in 479 entries.
Found 330 sibs and 38299 nonsibs, weights: 0.9915 and 0.0085, #weights: 38629
Found 37 sibs and 442 nonsibs, weights: 0.9228 and 0.0772, #weights: 479
Correct: 38629, incorrect 0, TP 330, FP 0, TN 38299, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 479, incorrect 0, TP 37, FP 0, TN 442, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 4
Generated 109892 non-sibling candidates from 332 siblings.
Generated 1332 non-sibling candidates from 37 siblings.
Correct: 72037, incorrect 2, TP 0, FP 0, TN 72037, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 673 rows with error results and 72039 rows with NaNs (typically hz different) from a     total of 110224 entries, resulting in 38185 entries.
Correct: 852, incorrect 0, TP 0, FP 0, TN 852, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 852 rows with NaNs (typically hz different) from a     total of 1369 entries, resulting in 517 entries.
Found 330 sibs and 37855 nonsibs, weights: 0.9914 and 0.0086, #weights: 38185
Found 37 sibs and 480 nonsibs, weights: 0.9284 and 0.0716, #weights: 517
Correct: 38185, incorrect 0, TP 330, FP 0, TN 37855, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 517, incorrect 0, TP 37, FP 0, TN 480, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 5
Generated 109892 non-sibling candidates from 332 siblings.
Generated 1332 non-sibling candidates from 37 siblings.
Correct: 71539, incorrect 2, TP 0, FP 0, TN 71539, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 674 rows with error results and 71541 rows with NaNs (typically hz different) from a     total of 110224 entries, resulting in 38683 entries.
Correct: 904, incorrect 0, TP 0, FP 0, TN 904, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 904 rows with NaNs (typically hz different) from a     total of 1369 entries, resulting in 465 entries.
Found 330 sibs and 38353 nonsibs, weights: 0.9915 and 0.0085, #weights: 38683
Found 37 sibs and 428 nonsibs, weights: 0.9204 and 0.0796, #weights: 465
Correct: 38683, incorrect 0, TP 330, FP 0, TN 38353, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 463, incorrect 2, TP 37, FP 2, TN 426, FN0, Prec. 94.87, Rec. 100.0, Spec. 99.53, Acc. 99.57%
Round 6
Generated 110556 non-sibling candidates from 333 siblings.
Generated 1260 non-sibling candidates from 36 siblings.
Correct: 72442, incorrect 2, TP 0, FP 0, TN 72442, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 672 rows with error results and 72444 rows with NaNs (typically hz different) from a     total of 110889 entries, resulting in 38445 entries.
Correct: 811, incorrect 0, TP 0, FP 0, TN 811, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 1 rows with error results and 811 rows with NaNs (typically hz different) from a     total of 1296 entries, resulting in 485 entries.
Found 331 sibs and 38114 nonsibs, weights: 0.9914 and 0.0086, #weights: 38445
Found 36 sibs and 449 nonsibs, weights: 0.9258 and 0.0742, #weights: 485
Correct: 38445, incorrect 0, TP 331, FP 0, TN 38114, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 485, incorrect 0, TP 36, FP 0, TN 449, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 7
Generated 110556 non-sibling candidates from 333 siblings.
Generated 1260 non-sibling candidates from 36 siblings.
Correct: 71653, incorrect 2, TP 0, FP 0, TN 71653, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 676 rows with error results and 71655 rows with NaNs (typically hz different) from a     total of 110889 entries, resulting in 39234 entries.
Correct: 890, incorrect 0, TP 0, FP 0, TN 890, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 890 rows with NaNs (typically hz different) from a     total of 1296 entries, resulting in 406 entries.
Found 331 sibs and 38903 nonsibs, weights: 0.9916 and 0.0084, #weights: 39234
Found 36 sibs and 370 nonsibs, weights: 0.9113 and 0.0887, #weights: 406
Correct: 39234, incorrect 0, TP 331, FP 0, TN 38903, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 406, incorrect 0, TP 36, FP 0, TN 370, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 8
Generated 111222 non-sibling candidates from 334 siblings.
Generated 1190 non-sibling candidates from 35 siblings.
Correct: 72854, incorrect 2, TP 0, FP 0, TN 72854, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 678 rows with error results and 72856 rows with NaNs (typically hz different) from a     total of 111556 entries, resulting in 38700 entries.
Correct: 768, incorrect 0, TP 0, FP 0, TN 768, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 768 rows with NaNs (typically hz different) from a     total of 1225 entries, resulting in 457 entries.
Found 332 sibs and 38368 nonsibs, weights: 0.9914 and 0.0086, #weights: 38700
Found 35 sibs and 422 nonsibs, weights: 0.9234 and 0.0766, #weights: 457
Correct: 38700, incorrect 0, TP 332, FP 0, TN 38368, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 457, incorrect 0, TP 35, FP 0, TN 422, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 9
Generated 111222 non-sibling candidates from 334 siblings.
Generated 1190 non-sibling candidates from 35 siblings.
Correct: 72707, incorrect 2, TP 0, FP 0, TN 72707, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 676 rows with error results and 72709 rows with NaNs (typically hz different) from a     total of 111556 entries, resulting in 38847 entries.
Correct: 784, incorrect 0, TP 0, FP 0, TN 784, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 784 rows with NaNs (typically hz different) from a     total of 1225 entries, resulting in 441 entries.
Found 332 sibs and 38515 nonsibs, weights: 0.9915 and 0.0085, #weights: 38847
Found 35 sibs and 406 nonsibs, weights: 0.9206 and 0.0794, #weights: 441
Correct: 38847, incorrect 0, TP 332, FP 0, TN 38515, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 441, incorrect 0, TP 35, FP 0, TN 406, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%



In [619]:

    
tre









    Out[619]:





array([[ 100.,  100.,  100.,  100.],
       [ 100.,  100.,  100.,  100.],
       [ 100.,  100.,  100.,  100.],
       [ 100.,  100.,  100.,  100.],
       [ 100.,  100.,  100.,  100.],
       [ 100.,  100.,  100.,  100.],
       [ 100.,  100.,  100.,  100.],
       [ 100.,  100.,  100.,  100.],
       [ 100.,  100.,  100.,  100.],
       [ 100.,  100.,  100.,  100.]])



In [620]:

    
tee









    Out[620]:





array([[ 100.  ,  100.  ,  100.  ,  100.  ],
       [ 100.  ,  100.  ,  100.  ,  100.  ],
       [ 100.  ,  100.  ,  100.  ,  100.  ],
       [ 100.  ,  100.  ,  100.  ,  100.  ],
       [ 100.  ,  100.  ,  100.  ,  100.  ],
       [  94.87,  100.  ,   99.53,   99.57],
       [ 100.  ,  100.  ,  100.  ,  100.  ],
       [ 100.  ,  100.  ,  100.  ,  100.  ],
       [ 100.  ,  100.  ,  100.  ,  100.  ],
       [ 100.  ,  100.  ,  100.  ,  100.  ]])



In [683]:

    
Image(graph.create_png())









    Out[683]:



In [608]:

    
for i in graphs:
    Image(i.create_png())



In [615]:

    
Image(graphs[5].create_png())









    Out[615]:



In [598]:

    
train_lbl.as_matrix()









    Out[598]:





array([ 1.,  1.,  1., ...,  0.,  0.,  0.])

Old and unused approaches

ML test with shuffled, pseudo-stratified group sampling

Strategy:

take siblings from each group and generate 10 ShuffleSplit iterations
create matching nonsibs and mix all
adjust weights



In [407]:

    
#groups = assign_groups(sib)
#from sklearn.model_selection import ShuffleSplit # shufflesplit does not generate non-overlapping splits!
#from sklearn.model_selection import KFold # non-overlapping!



In [408]:

    
def split_stratified_groups(sib, splits, nr):
    from sklearn.model_selection import KFold # non-overlapping!
    groups = assign_groups(sib)
    groupset = set(groups)
    gsibdf_train = pd.DataFrame(columns=sib.columns)
    gsibdf_test = pd.DataFrame(columns=sib.columns)
    for i in groupset:
        groupsib = sib[sib["group"] == i].copy()
        print("## GROUP: {} with {} elements.".format(i, len(groupsib)))
        ks = KFold(n_splits=splits, random_state=42, shuffle=True)
        labels, features = make_labels_features(groupsib)
        ctr = 0
        for train_index, test_index in ks.split(groupsib):
            ctr += 1
            if (ctr == nr):
            #print("TRAIN:", train_index, "TEST:", test_index)
                gsibdf_train = gsibdf_train.append(groupsib.iloc[train_index])
                gsibdf_test = gsibdf_test.append(groupsib.iloc[test_index])
                break
    return [gsibdf_train, gsibdf_test]



In [409]:

    
train_sib, test_sib = split_stratified_groups(sib, 10, 1)









    



## GROUP: RAv1 with 12 elements.
## GROUP: RAv2 with 203 elements.
## GROUP: nlnog with 136 elements.
## GROUP: servers with 18 elements.



In [410]:

    
train_nonsib = match_nonsibs(train_sib, nonsib)
test_nonsib = match_nonsibs(test_sib, nonsib)









    



Generated 108570 non-sibling candidates from 330 siblings.
Generated 1482 non-sibling candidates from 39 siblings.



In [531]:

    
train = mix_sib_nonsib(train_sib,train_nonsib, "all")
train, train_prune_lbl, train_prune_prd = prune_data_for_ml(train)
test = mix_sib_nonsib(test_sib,test_nonsib, "all")
test, test_prune_lbl, test_prune_prd = prune_data_for_ml(test)









    



Correct: 70828, incorrect 1, TP 0, FP 0, TN 70828, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 667 rows with error results and 70829 rows with NaNs (typically hz different) from a     total of 108900 entries, resulting in 38071 entries.
Correct: 991, incorrect 1, TP 0, FP 0, TN 991, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 99.9%
Removing 1 rows with error results and 992 rows with NaNs (typically hz different) from a     total of 1521 entries, resulting in 529 entries.






    



/usr/local/lib/python3.5/dist-packages/ipykernel/__main__.py:172: RuntimeWarning: invalid value encountered in long_scalars



In [532]:

    
train_lbl, train_ftr = make_labels_features(train)
test_lbl, test_ftr = make_labels_features(test)



In [533]:

    
train_weight = get_sample_weight_one_input(train)
test_weight = get_sample_weight_one_input(test)









    



Found 329 sibs and 37742 nonsibs, weights: 0.9914 and 0.0086, #weights: 38071
Found 38 sibs and 491 nonsibs, weights: 0.9282 and 0.0718, #weights: 529



In [549]:

    
def dt_train(labels, features, weight, rs=42):
    estimator = DecisionTreeClassifier(max_depth=30, min_samples_leaf=5, random_state=42)
    est = estimator.fit(features, labels, sample_weight=weight)
    return est



In [550]:

    
est = dt_train(train_lbl, train_ftr, train_weight)



In [551]:

    
## WIP TODO now build scoring function



In [552]:

    
prd = est.predict(train_ftr)



In [553]:

    
print("score: {}".format(est.score(train_ftr, train_lbl, sample_weight=train_weight)))









    



score: 1.0



In [554]:

    
stats(train_lbl, prd)









    



Correct: 38071, incorrect 0, TP 329, FP 0, TN 37742, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%



In [555]:

    
stats(test_lbl, est.predict(test_ftr))









    



Correct: 529, incorrect 0, TP 38, FP 0, TN 491, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%



In [547]:

    
def dt_plot(estimator, features):
    from sklearn import tree
    dot_data = tree.export_graphviz(estimator, out_file=None, 
                         feature_names=list(features.columns.values),  
                         class_names=["non-sibling", "sibling"],  
                         filled=True, rounded=True,  
                         special_characters=True) 
    import pydotplus
    graph = pydotplus.graph_from_dot_data(dot_data) 
    
    return graph
    #print("score: {}".format(est.score(features_test, labels_test, sample_weight=weights_test)))
    # return prd, lbl, graph



In [556]:

    
graph = dt_plot(est, train_ftr)
Image(graph.create_png())









    Out[556]:



In [ ]:



In [ ]:

	ip4	ip6	hz4	hz6	hzdiff	hz4r2	hz6r2	hzr2diff	tcp_t_offset4	tcp_t_offset6	...	ott6_rng	ott_rng_diff	ott_rng_diff_rel	opts4	opts6	optsdiff	perc_85_val	dec_bev	decision	label
domain
ovh04.ring.nlnog.net	192.99.153.129	2607:5300:101::599	250.0	250.0	0.0	1.000000	1.000000	4.107825e-14	2.168821e+08	2.168670e+08	...	52.681	2.641	0.048906	MSS-	MSS-SACK-TS-N-WS07-	1	2.025465	non-sibling(optsdiff)	non-sibling(optsdiff)	1
ovh03.ring.nlnog.net	5.196.13.221	2001:41d0:52:600::671	250.0	250.0	0.0	1.000000	1.000000	4.440892e-16	1.130031e+09	1.129895e+09	...	38.706	0.215	0.005539	MSS-	MSS-SACK-TS-N-WS07-	1	0.227048	non-sibling(optsdiff)	non-sibling(optsdiff)	1
ovh02.ring.nlnog.net	37.187.50.166	2001:41d0:52:400::53b	250.0	250.0	0.0	1.000000	1.000000	1.154632e-14	1.130830e+09	1.130722e+09	...	2143.810	0.062	0.000029	MSS-	MSS-SACK-TS-N-WS07-	1	0.532364	non-sibling(optsdiff)	non-sibling(optsdiff)	1
ovh04.ring.nlnog.net	192.99.153.129	2607:5300:101::599	250.0	250.0	0.0	1.000000	1.000000	2.542411e-13	3.747511e+08	3.746817e+08	...	1492.405	5.014	0.003365	MSS-	MSS-SACK-TS-N-WS07-	1	5.104061	non-sibling(optsdiff)	non-sibling(optsdiff)	1
RA_6088	5.57.17.65	2a01:5040:20:30::1	1001.0	1001.0	0.0	1.000000	1.000000	4.884981e-15	1.156965e+09	1.156945e+09	...	11797.220	0.803	0.000068	MSS-SACK-TS-N-WS07-	MSS-SACK-TS-N-WS07-	0	NaN	NaN	ERROR: spline calculation failed!	1
RA_6131	217.196.147.89	2a02:16a8:dc:200::1	1001.0	1001.0	0.0	1.000000	1.000000	1.776357e-15	1.067010e+08	1.066939e+08	...	11345.467	0.672	0.000059	MSS-SACK-TS-N-WS07-	MSS-SACK-TS-N-WS07-	0	NaN	NaN	ERROR: spline calculation failed!	1
RA_6220	217.196.33.252	2a02:310:0:2958::16	100.0	1001.0	901.0	0.999904	1.000000	NaN	1.161751e+08	3.607975e+08	...	NaN	NaN	NaN	MSS-SACK-TS-N-WS07-	MSS-SACK-TS-N-WS07-	0	NaN	non-sibling (hz different)	non-sibling (hz different)	1
ovh03.ring.nlnog.net	5.196.13.221	2001:41d0:52:600::671	250.0	250.0	0.0	1.000000	1.000000	1.567901e-11	1.175528e+09	1.174713e+09	...	263.147	2.446	0.009339	MSS-	MSS-SACK-TS-N-WS07-	1	0.527214	non-sibling(optsdiff)	non-sibling(optsdiff)	1
ovh02.ring.nlnog.net	37.187.50.166	2001:41d0:52:400::53b	250.0	250.0	0.0	1.000000	1.000000	2.220446e-16	1.175640e+09	1.175541e+09	...	2148.670	0.043	0.000020	MSS-	MSS-SACK-TS-N-WS07-	1	0.581357	non-sibling(optsdiff)	non-sibling(optsdiff)	1
tilaa01.ring.nlnog.net	46.19.36.12	2a02:2770::21a:4aff:feac:4576	29381.0	29508.0	NaN	0.128195	0.128773	NaN	1.382994e+08	1.382964e+08	...	NaN	NaN	NaN	MSS-SACK-TS-N-WS04-	MSS-SACK-TS-N-WS04-	0	NaN	NaN	ERROR: too small clock hertz r-squares	1
ovh04.ring.nlnog.net	192.99.153.129	2607:5300:101::599	250.0	250.0	0.0	1.000000	1.000000	8.881784e-16	4.204438e+08	4.195066e+08	...	1965.143	0.023	0.000012	MSS-	MSS-SACK-TS-N-WS07-	1	0.718889	non-sibling(optsdiff)	non-sibling(optsdiff)	1
RA_6220	217.196.33.252	2a02:310:0:2958::16	100.0	1001.0	901.0	0.998920	1.000000	NaN	8.694358e+07	5.403645e+08	...	NaN	NaN	NaN	MSS-SACK-TS-N-WS07-	MSS-SACK-TS-N-WS07-	0	NaN	non-sibling (hz different)	non-sibling (hz different)	1
ovh03.ring.nlnog.net	5.196.13.221	2001:41d0:52:600::671	250.0	250.0	0.0	1.000000	1.000000	1.833814e-09	1.281100e+09	1.274004e+09	...	64.741	1.645	0.025736	MSS-	MSS-SACK-TS-N-WS07-	1	41.154072	non-sibling(optsdiff)	non-sibling(optsdiff)	1
ovh02.ring.nlnog.net	37.187.50.166	2001:41d0:52:400::53b	250.0	250.0	0.0	1.000000	1.000000	7.549517e-15	1.274883e+09	1.274828e+09	...	2152.142	0.069	0.000032	MSS-	MSS-SACK-TS-N-WS07-	1	0.461552	non-sibling(optsdiff)	non-sibling(optsdiff)	1
trueinternet01.ring.nlnog.net	203.144.167.57	2001:fb0:100:ffff:211:25ff:fe40:9468	159389.0	159706.0	NaN	0.687427	0.695245	NaN	1.010581e+08	1.010674e+08	...	NaN	NaN	NaN	MSS-SACK-TS-N-WS11-	MSS-SACK-TS-N-WS11-	0	NaN	NaN	ERROR: too small clock hertz r-squares	1
ovh04.ring.nlnog.net	192.99.153.129	2607:5300:101::599	250.0	250.0	0.0	1.000000	1.000000	9.727774e-13	5.188355e+08	5.188099e+08	...	1171.543	0.535	0.000457	MSS-	MSS-SACK-TS-N-WS07-	1	2.562532	non-sibling(optsdiff)	non-sibling(optsdiff)	1
RA_6220	217.196.33.252	2a02:310:0:2958::16	100.0	1001.0	901.0	0.999394	1.000000	NaN	1.266629e+08	9.380604e+08	...	NaN	NaN	NaN	MSS-SACK-TS-N-WS07-	MSS-SACK-TS-N-WS07-	0	NaN	non-sibling (hz different)	non-sibling (hz different)	1
ovh03.ring.nlnog.net	5.196.13.221	2001:41d0:52:600::671	250.0	250.0	0.0	1.000000	1.000000	1.791678e-12	1.283146e+09	1.283025e+09	...	272.415	3.689	0.013451	MSS-	MSS-SACK-TS-N-WS07-	1	4.235207	non-sibling(optsdiff)	non-sibling(optsdiff)	1
ovh02.ring.nlnog.net	37.187.50.166	2001:41d0:52:400::53b	250.0	250.0	0.0	1.000000	1.000000	3.996803e-15	1.283867e+09	1.283837e+09	...	2142.631	4.127	0.001924	MSS-	MSS-SACK-TS-N-WS07-	1	0.421635	non-sibling(optsdiff)	non-sibling(optsdiff)	1
ovh04.ring.nlnog.net	192.99.153.129	2607:5300:101::599	250.0	250.0	0.0	1.000000	1.000000	4.440892e-16	5.279198e+08	5.278201e+08	...	1529.123	2.322	0.001520	MSS-	MSS-SACK-TS-N-WS07-	1	0.412238	non-sibling(optsdiff)	non-sibling(optsdiff)	1
gossamerthreads01.ring.nlnog.net	208.70.247.50	2607:fcc0:2:1:208:70:247:50	250.0	1607.0	NaN	1.000000	0.008749	NaN	4.294911e+09	2.475138e+08	...	NaN	NaN	NaN	MSS-SACK-TS-N-WS06-	MSS-SACK-TS-N-WS07-	1	NaN	NaN	ERROR: too small clock hertz r-squares	1
RA_6220	217.196.33.252	2a02:310:0:2958::16	100.0	1001.0	901.0	0.999994	1.000000	NaN	1.302641e+08	9.741713e+08	...	NaN	NaN	NaN	MSS-SACK-TS-N-WS07-	MSS-SACK-TS-N-WS07-	0	NaN	non-sibling (hz different)	non-sibling (hz different)	1
pix01.ring.nlnog.net	185.90.125.134	2a03:87a0:125:134::1	149497.0	150262.0	NaN	0.728900	0.733638	NaN	2.544141e+08	2.544131e+08	...	NaN	NaN	NaN	MSS-SACK-TS-N-WS07-	MSS-SACK-TS-N-WS07-	0	NaN	NaN	ERROR: too small clock hertz r-squares	1
ovh03.ring.nlnog.net	5.196.13.221	2001:41d0:52:600::671	250.0	250.0	0.0	1.000000	1.000000	4.174439e-14	1.296654e+09	1.292033e+09	...	32.893	2.376	0.069716	MSS-	MSS-SACK-TS-N-WS07-	1	0.643468	non-sibling(optsdiff)	non-sibling(optsdiff)	1
ovh02.ring.nlnog.net	37.187.50.166	2001:41d0:52:400::53b	250.0	250.0	0.0	1.000000	1.000000	3.197442e-14	1.297270e+09	1.292859e+09	...	1102.110	2.223	0.002019	MSS-	MSS-SACK-TS-N-WS07-	1	0.683236	non-sibling(optsdiff)	non-sibling(optsdiff)	1
RA_6220	217.196.33.252	2a02:310:0:2958::16	100.0	1001.0	901.0	0.999994	1.000000	NaN	1.338738e+08	1.010255e+09	...	NaN	NaN	NaN	MSS-SACK-TS-N-WS07-	MSS-SACK-TS-N-WS07-	0	NaN	non-sibling (hz different)	non-sibling (hz different)	1
ovh03.ring.nlnog.net	5.196.13.221	2001:41d0:52:600::671	250.0	250.0	0.0	1.000000	1.000000	3.010037e-12	1.628076e+09	1.627960e+09	...	739.895	6.856	0.009309	MSS-	MSS-SACK-TS-N-WS07-	1	3.770015	non-sibling(optsdiff)	non-sibling(optsdiff)	1
ovh02.ring.nlnog.net	37.187.50.166	2001:41d0:52:400::53b	250.0	250.0	0.0	1.000000	1.000000	4.440892e-16	1.628848e+09	1.628782e+09	...	2184.444	0.479	0.000219	MSS-	MSS-SACK-TS-N-WS07-	1	4.116483	non-sibling(optsdiff)	non-sibling(optsdiff)	1
trueinternet01.ring.nlnog.net	203.144.167.57	2001:fb0:100:ffff:211:25ff:fe40:9468	107919.0	106731.0	NaN	0.457027	0.450581	NaN	2.100992e+07	2.100415e+07	...	NaN	NaN	NaN	MSS-SACK-TS-N-WS11-	MSS-SACK-TS-N-WS11-	0	NaN	NaN	ERROR: too small clock hertz r-squares	1
ovh04.ring.nlnog.net	192.99.153.129	2607:5300:101::599	250.0	250.0	0.0	1.000000	1.000000	8.224532e-13	8.728248e+08	8.727867e+08	...	588.936	0.055	0.000093	MSS-	MSS-SACK-TS-N-WS07-	1	1.657471	non-sibling(optsdiff)	non-sibling(optsdiff)	1
RA_6220	217.196.33.252	2a02:310:0:2958::16	100.0	1001.0	901.0	0.999937	1.000000	NaN	2.682442e+08	2.355818e+09	...	NaN	NaN	NaN	MSS-SACK-TS-N-WS07-	MSS-SACK-TS-N-WS07-	0	NaN	non-sibling (hz different)	non-sibling (hz different)	1