Comparison of various Sibling Detection Approaches, including Beverly, Hand-Tuned, and Machine-Learning

For questions and comments, please contact me under scheitle@net.in.tum.de


In [814]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import matthews_corrcoef
from sklearn.metrics import precision_score
from sklearn.metrics import f1_score
from IPython.display import Image  
import pandas
import seaborn
import matplotlib.pyplot as plt
%matplotlib inline

Versions:

* Python: 3.5.3
* numpy: 1.12.1
* pandas: 0.19.2
* sklearn: 0.18.1
* matplotlib 2.0.0
* jupyter notebook 5.0.0

In [815]:
def get_pd_files(folder):
    sibf = folder + "hosts.csvcapture.pcap.ts.siblingresult.csv"
    nonsibf = folder + "hosts.csv__nonsiblings_seed1_n*capture.pcap.ts.siblingresult.csv"
    import glob
    for filename in glob.glob(nonsibf):
        nonsibf = filename
    import os.path
    if os.path.isfile(sibf) and os.path.isfile(nonsibf):
        print("Loading from filenames {} and {}".format(sibf, nonsibf))
    else:
        print("Files not found {} and {}".format(sibf, nonsibf))
        
    sib = pd.read_csv(sibf, index_col=0)
    sib['label'] = 1
    nonsib = pd.read_csv(nonsibf, index_col=0)
    nonsib['label'] = 0
    print("Read {} siblings and {} non-siblings from files.".format(len(sib), len(nonsib)))
    return sib, nonsib
       
def dec2prd_ours(df):
    df.loc[df["decision"].str.contains("^sibling"), "dec_prd"] =  1
    df.loc[df["decision"].str.contains("^non-sibling"), "dec_prd"] =  0
    return  # df is changed in place so no returning necessary

def dec2prd_bev(df):
    df.loc[df["dec_bev"].str.contains("^sibling"), "dec_bev_prd"] =  1
    df.loc[df["dec_bev"].str.contains("^non-sibling"), "dec_bev_prd"] =  0
    return  # df is changed in place so no returning necessary

def mix_sib_nonsib(sib, nonsib, mode, rs=42):
    if mode == "equal":
        nonsibint = nonsib.sample(n=len(sib), replace=True, weights=None, random_state=rs)
    else:
        nonsibint = nonsib
    datain = pd.concat([sib,nonsibint])
    return datain


def get_ouralgo_stats(sib, nonsib):
    #print("Our algo stats:")
    df = mix_sib_nonsib(sib, nonsib, "full", 42)
    df_ours = df[["label", "decision"]].copy()
    dec2prd_ours(df_ours)
    undec = len(df_ours[df_ours.isnull().any(axis=1)])
    print("Our algo: Not deciding on {} pairs for unknown/error reasons.".format(undec))
    df_ours = df_ours.dropna()
    weights = get_sample_weight_one_input(df_ours)
    mcc = matthews_corrcoef(df_ours["label"], df_ours["dec_prd"], sample_weight=None)
    f1 = f1_score(df_ours["label"], df_ours["dec_prd"], sample_weight=None)
    print("Our algo stats: ({}) undecided, mcc: {}, f1: {}".format(undec, mcc, f1))
    statsv = list(stats(df_ours["label"], df_ours["dec_prd"]))
    statsv.append(mcc)
    return statsv

def get_bev_stats(sib, nonsib):
    df = mix_sib_nonsib(sib, nonsib, "full", 42)
    df_tmp = df[["label", "dec_bev"]].copy()
    dec_nan = len(df_tmp[df_tmp["dec_bev"].isnull() == True])
    df_tmp = df_tmp[df_tmp["dec_bev"].isnull() == False]
    dec2prd_bev(df_tmp)
    undec = len(df_tmp[df_tmp.isnull().any(axis=1)])
    df_tmp = df_tmp.dropna()
    weights = get_sample_weight_one_input(df_tmp)
    mcc = matthews_corrcoef(df_tmp["label"], df_tmp["dec_bev_prd"], sample_weight=None)
    f1 = f1_score(df_tmp["label"], df_tmp["dec_bev_prd"], sample_weight=None)
    print("Beverly algo: Not deciding on {} pairs for NaN and {} pairs for unknown/error reasons.".format(dec_nan, undec))
    print("Beverly algo stats: ({}) undecided, mcc: {}, f1: {}".format(undec, mcc, f1))
    statsv =  list(stats(df_tmp["label"], df_tmp["dec_bev_prd"]))
    statsv.append(mcc)
    return statsv
    

def match_nonsibs_slow(sib, nonsib, rs=42):
    ctr = 0 
    for i, ii in sib.iterrows():
        for j, jj in sib.iterrows():
            if ii[1] != jj[1]:
                nscand = ii[1] + "_+_" +  jj[1]
                ctr += 1 
    return
                
def match_nonsibs(sib, nonsib, rs=42):
    ctr = 0 
    a = []
    sd = dict() # siblings dict
    nsd = dict()  # non siblings dict
    for i in sib.itertuples():
        sd[i[0]] = 0
    for i in nonsib.itertuples():
        nsd[i[0]] = 0
    nscand = dict()
    #nstmp = pandas.DataFrame()
    for i in sd.keys():
        for j in sd.keys():
            if i != j:
                nscandstr = i + "_+_" +  j
                nscand[nscandstr] = 1
    print("Generated {} non-sibling candidates from {} siblings.".format(len(nscand), len(sib)))
    fails = []
    for k1 in nsd.keys():
        if k1 not in nscand.keys():
            fails.append(k1)
            #print("fail! {} ".format(i))    
    nsfiltered = nonsib.copy()
    nsfiltered.drop(fails, inplace=True)
    return nsfiltered

def assign_groups_old(datain):
    datain["group"] = "servers"
    datain.loc[datain["domain"].str.contains("nlnog.net"), "group"] = "nlnog"
    datain.loc[datain["domain"].str.contains("RA_"), "group"] = "RA"
    datain.loc[datain["domain"].str.extract("RA_([0-9]{4})") < 6019, "group"] = "RAv1"
    datain.loc[datain["domain"].str.extract("RA_([0-9]{4})") > 6018, "group"] = "RAv2"
    return

def assign_groups(datain):
    datain["group"] = "servers"
    #sib.loc[sib.index.str.contains("nlnog.net"), "group"] = "nlnog"
    datain.loc[datain.index.str.contains("nlnog.net"), "group"] = "nlnog"
    datain.loc[datain.index.str.contains("RA_"), "group"] = "RA"
    datain["ra_id"] = datain.index.str.extract("RA_([0-9]{4})", expand=False).astype(float).fillna(0).astype(int) 
    datain.loc[(datain.ra_id > 5999) & (datain.ra_id < 6019), "group"] = "RAv1"
    datain.loc[datain.ra_id > 6018, "group"] = "RAv2"    
    groups = datain["group"].as_matrix()
    return groups
    
    
def prune_datain(datain):
    errorc = len(datain[datain["decision"].str.contains("ERROR|error") == True])
    print("Removing {} errors values from datain.".format(errorc))
    datain = datain[datain["decision"].str.contains("ERROR|error") == False]

    hzdiffc = len(datain[datain["hzdiff"] != 0])
    print("Deciding {} hzdiff hosts as non-sib, stats:".format(hzdiffc))
    lbl = datain[datain["hzdiff"] != 0]["label"]
    prd = lbl.copy()
    prd[:] = 0
    stats(lbl,prd)
    dataout = datain[datain["hzdiff"] == 0]
    return dataout, lbl, prd

def prune_data_for_ml(datain):
    # problem: NaNs might be in non-feature columns such as RA_ID
    erridx = datain[datain.decision.str.contains("ERROR|error") == True].index
    labels, features = make_labels_features(datain)
    naidx = datain[features.isnull().any(axis=1) == True].index
    bothidx = erridx | naidx
    dataout = datain.drop(bothidx)
    lbl = datain.loc[bothidx, "label"]
    prd = lbl.copy()
    prd[:] = 0
    stats(lbl,prd)    
    print("Removing {} rows with error results and {} rows with NaNs (typically hz different) from a \
    total of {} entries, resulting in {} entries.".format(
            len(erridx), len(naidx), len(datain), len(dataout)))
    return dataout, lbl, prd


def stats(lbl, prd):
        tp = np.sum((lbl == 1) & (prd == 1)) 
        fp = np.sum(lbl < prd ) 
        tn = np.sum((lbl == 0) & (prd == 0)) 
        fn = np.sum(lbl > prd ) 
        try:
            prec =  round(100*tp/(tp+fp),2) # TPR?
            recall = round(100*tp/(tp+fn),2) 
            spec= round(100*tn/(tn+fp),2) # TNR?
            acc = round(100*(tn+tp)/(tn+fn+fp+tp),2)
        except ZeroDivisionError as e:
            print("Catching ZeroDivisionError at stats!")
            prec = 0
            recall = 0
            spec = 0
            acc = 0
        print("Correct: {}, incorrect {}, TP {}, FP {}, TN {}, FN{}, Prec. {}, Rec. {}, Spec. {}, Acc. {}%".format(
        np.sum(lbl == prd),
        np.sum(lbl != prd),
        tp, fp, tn, fn, 
        prec, recall, spec, acc
        ))
        return prec, recall, spec, acc
        
def make_labels_features(dfin):
    labels = dfin["label"]
    features = dfin[["hzdiff", "hzr2diff", "timestamps_diff", "adiff", 
                        "theta", "r2diff", "ott_rng_diff_rel", "optsdiff",
                       "perc_85_val"]].copy()
    features["hzr2mean"] = (dfin["hz4r2"] + dfin["hz6r2"])  / 2.0
    features["r2mean"] = (dfin["r4_sqr"] + dfin["r6_sqr"]) / 2.0     
    features["ott_rng_mean"] = (dfin["ott4_rng"] + dfin["ott6_rng"]) / 2.0
    features["splinediff_scaled"] = dfin["perc_85_val"] / features["ott_rng_mean"]
    return labels, features   

def get_sample_weight(sib, nonsib):
    sl = len(sib)
    nsl = len(nonsib)
    tl = sl + nsl
    nsw = sl / tl
    sw = nsl / tl
    print("Found {} sibs and {} nonsibs, weights: {} and {}".format(sl, nsl, sw, nsw))
    weight = np.zeros(len(datain))
    weight = np.float32(datain["label"].as_matrix())
    weight[weight == 1] = sw
    weight[weight == 0] = nsw
    
    
def get_sample_weight_one_input(dfin):
    sl = len(dfin[dfin["label"] == 1])
    nsl = len(dfin[dfin["label"] == 0])
    tl = sl + nsl
    nsw = sl / tl
    sw = nsl / tl
    weight = np.zeros(len(dfin))
    weight = np.float32(dfin["label"].as_matrix())
    weight[weight == 1] = sw
    weight[weight == 0] = nsw
    print("Found {} sibs and {} nonsibs, weights: {} and {}, #weights: {}".format(
        sl, nsl, round(sw,4), round(nsw,4), len(weight)))
    return weight


# functions for ML with proprtional group sampling
def split_stratified_groups(sib, splits, nr):
    from sklearn.model_selection import KFold # non-overlapping!
    groups = assign_groups(sib)
    groupset = set(groups)
    gsibdf_train = pd.DataFrame(columns=sib.columns)
    gsibdf_test = pd.DataFrame(columns=sib.columns)
    for i in groupset:
        groupsib = sib[sib["group"] == i].copy()
        if len(groupsib ) < splits:
            # can not split into more folds than files...
            print("ERROR: more splits ({}) than samples ({}), reducing to sample nr".format(splits, len(groupsib)))
            splits = len(groupsib)
        #print("## GROUP: {} with {} elements.".format(i, len(groupsib)))
        ks = KFold(n_splits=splits, random_state=42, shuffle=True)
        labels, features = make_labels_features(groupsib)
        ctr = -1
        for train_index, test_index in ks.split(groupsib):
            ctr += 1                
            if (ctr == nr):
            #print("TRAIN:", train_index, "TEST:", test_index)
                gsibdf_train = gsibdf_train.append(groupsib.iloc[train_index])
                gsibdf_test = gsibdf_test.append(groupsib.iloc[test_index])
                break
    return [gsibdf_train, gsibdf_test]


def dt_train(labels, features, weight, rs=42):
    estimator = DecisionTreeClassifier(max_depth=30, min_samples_leaf=5, random_state=42)
    est = estimator.fit(features, labels, sample_weight=weight)
    return est

def kfold_train_test(sib, nonsib):
    kfolds = 10
    stats_train_error = np.empty((10,5), dtype=float)
    stats_test_error = np.empty((10,5), dtype=float)
    graphs = []
    for i in range(10):
        print("Round {}".format(i))
        # pick proportionally from each group
        train_sib, test_sib = split_stratified_groups(sib, 10, i)
        # create, select, and mix matching nonsibs
        train_nonsib = match_nonsibs(train_sib, nonsib)
        test_nonsib = match_nonsibs(test_sib, nonsib)
        train = mix_sib_nonsib(train_sib,train_nonsib, "all")
        # prune NaNs out
        train, train_prune_lbl, train_prune_prd = prune_data_for_ml(train)
        test = mix_sib_nonsib(test_sib,test_nonsib, "all")
        test, test_prune_lbl, test_prune_prd = prune_data_for_ml(test)
        # split out features, labels, and weights
        train_lbl, train_ftr = make_labels_features(train)
        test_lbl, test_ftr = make_labels_features(test)
        train_weight = get_sample_weight_one_input(train)
        test_weight = get_sample_weight_one_input(test)
        # train estimator
        est = dt_train(train_lbl, train_ftr, train_weight)   
        mcc = matthews_corrcoef(train_lbl, est.predict(train_ftr), sample_weight=train_weight)
        statsv = list(stats(train_lbl, est.predict(train_ftr)))
        statsv.append(mcc)
        stats_train_error[i] = statsv
        #print("test error: mcc of {}".format(mcc))
        mcc = matthews_corrcoef(test_lbl, est.predict(test_ftr), sample_weight=test_weight)
        statsv = list(stats(test_lbl, est.predict(test_ftr)))
        statsv.append(mcc)
        stats_test_error[i] = statsv
        #stats_test_error[i]  =  stats(test_lbl, est.predict(test_ftr))
        graph = dt_plot(est, train_ftr)
        graphs.append(graph)
        #Image(graph.create_png())  
    return stats_train_error, stats_test_error, graphs

One function to evaluate them all


In [867]:
runs = 8
hlostats = np.zeros((runs-1,5), dtype=float)
hlbstats = np.zeros((runs-1,5), dtype=float)
gsdo =  dict() # group stats dict
gsdb =  dict() # group stats dict
mlstatsd_tee = dict()
mlstatsd_tre = dict()
graphs = []
for i in range(1,runs):
    print("############# Round {} ##############".format(i))
    sib, nonsib = get_pd_files("../../../gt{}/".format(i))
    # high level
    hlostats[i - 1] = tuple(get_ouralgo_stats(sib, nonsib))
    hlbstats[i - 1] = get_bev_stats(sib, nonsib)
    
    # group-level
    groups = assign_groups(sib)
    groupset = set(groups)
    for j in groupset:
        if j not in gsdo:
            gsdo[j] = np.zeros((runs,5), dtype=float)
            gsdb[j] = np.zeros((runs,5), dtype=float)
        print("## GROUP: {}".format(j))
        groupsib = sib[sib["group"] == j].copy()
        groupnonsib = match_nonsibs(groupsib, nonsib)
        gsdo[j][i-1] = get_ouralgo_stats(groupsib, groupnonsib)
        gsdb[j][i-1] = get_bev_stats(groupsib, groupnonsib)
    # decision-tree
    mlstatsd_tre[str(i)+"_tre"], mlstatsd_tee[str(i)+"_tee"], graph = kfold_train_test(sib, nonsib) # returns 2 sets of 10x4 arrays
    graphs.append(graph)


############# Round 1 ##############
Loading from filenames ../../../gt1/hosts.csvcapture.pcap.ts.siblingresult.csv and ../../../gt1/hosts.csv__nonsiblings_seed1_n679capture.pcap.ts.siblingresult.csv
Read 279 siblings and 82026 non-siblings from files.
Our algo: Not deciding on 70 pairs for unknown/error reasons.
Found 261 sibs and 81974 nonsibs, weights: 0.9968 and 0.0032, #weights: 82235
Our algo stats: (70) undecided, mcc: 0.988402745932266, f1: 0.9883720930232558
Correct: 82229, incorrect 6, TP 255, FP 0, TN 81974, FN6, Prec. 100.0, Rec. 97.7, Spec. 100.0, Acc. 99.99%
Found 279 sibs and 81974 nonsibs, weights: 0.9966 and 0.0034, #weights: 82253
Beverly algo: Not deciding on 52 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.10670720121610743, f1: 0.029152684563758385
Correct: 63737, incorrect 18516, TP 278, FP 18515, TN 63459, FN1, Prec. 1.48, Rec. 99.64, Spec. 77.41, Acc. 77.49%
## GROUP: nlnog
Generated 72630 non-sibling candidates from 270 siblings.
Our algo: Not deciding on 66 pairs for unknown/error reasons.
Found 252 sibs and 72582 nonsibs, weights: 0.9965 and 0.0035, #weights: 72834
Our algo stats: (66) undecided, mcc: 0.9940092551257894, f1: 0.9940119760479043
Correct: 72831, incorrect 3, TP 249, FP 0, TN 72582, FN3, Prec. 100.0, Rec. 98.81, Spec. 100.0, Acc. 100.0%
Found 270 sibs and 72582 nonsibs, weights: 0.9963 and 0.0037, #weights: 72852
Beverly algo: Not deciding on 48 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.11249292431428837, f1: 0.03221942747634447
Correct: 56692, incorrect 16160, TP 269, FP 16159, TN 56423, FN1, Prec. 1.64, Rec. 99.63, Spec. 77.74, Acc. 77.82%
## GROUP: servers
Generated 72 non-sibling candidates from 9 siblings.
Our algo: Not deciding on 0 pairs for unknown/error reasons.
Found 9 sibs and 72 nonsibs, weights: 0.8889 and 0.1111, #weights: 81
Our algo stats: (0) undecided, mcc: 0.7999999999999992, f1: 0.8
Correct: 78, incorrect 3, TP 6, FP 0, TN 72, FN3, Prec. 100.0, Rec. 66.67, Spec. 100.0, Acc. 96.3%
Found 9 sibs and 72 nonsibs, weights: 0.8889 and 0.1111, #weights: 81
Beverly algo: Not deciding on 0 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.349215147884789, f1: 0.36
Correct: 49, incorrect 32, TP 9, FP 32, TN 40, FN0, Prec. 21.95, Rec. 100.0, Spec. 55.56, Acc. 60.49%
Round 0
ERROR: more splits (10) than samples (9), reducing to sample nr
Generated 62750 non-sibling candidates from 251 siblings.
Generated 756 non-sibling candidates from 28 siblings.
/usr/local/lib/python3.5/dist-packages/ipykernel/__main__.py:184: RuntimeWarning: invalid value encountered in long_scalars
/usr/local/lib/python3.5/dist-packages/ipykernel/__main__.py:185: RuntimeWarning: invalid value encountered in long_scalars
Correct: 4900, incorrect 0, TP 0, FP 0, TN 4900, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 48 rows with error results and 4900 rows with NaNs (typically hz different) from a     total of 63001 entries, resulting in 58101 entries.
Catching ZeroDivisionError at stats!
Correct: 0, incorrect 0, TP 0, FP 0, TN 0, FN0, Prec. 0, Rec. 0, Spec. 0, Acc. 0%
Removing 0 rows with error results and 0 rows with NaNs (typically hz different) from a     total of 784 entries, resulting in 784 entries.
Found 251 sibs and 57850 nonsibs, weights: 0.9957 and 0.0043, #weights: 58101
Found 28 sibs and 756 nonsibs, weights: 0.9643 and 0.0357, #weights: 784
Correct: 58101, incorrect 0, TP 251, FP 0, TN 57850, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 784, incorrect 0, TP 28, FP 0, TN 756, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 1
ERROR: more splits (10) than samples (9), reducing to sample nr
Generated 62750 non-sibling candidates from 251 siblings.
Generated 756 non-sibling candidates from 28 siblings.
Correct: 4428, incorrect 0, TP 0, FP 0, TN 4428, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 44 rows with error results and 4428 rows with NaNs (typically hz different) from a     total of 63001 entries, resulting in 58573 entries.
Correct: 54, incorrect 0, TP 0, FP 0, TN 54, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 54 rows with NaNs (typically hz different) from a     total of 784 entries, resulting in 730 entries.
Found 251 sibs and 58322 nonsibs, weights: 0.9957 and 0.0043, #weights: 58573
Found 28 sibs and 702 nonsibs, weights: 0.9616 and 0.0384, #weights: 730
Correct: 58573, incorrect 0, TP 251, FP 0, TN 58322, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 730, incorrect 0, TP 28, FP 0, TN 702, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 2
ERROR: more splits (10) than samples (9), reducing to sample nr
Generated 62750 non-sibling candidates from 251 siblings.
Generated 756 non-sibling candidates from 28 siblings.
Correct: 4898, incorrect 0, TP 0, FP 0, TN 4898, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 46 rows with error results and 4898 rows with NaNs (typically hz different) from a     total of 63001 entries, resulting in 58103 entries.
Catching ZeroDivisionError at stats!
Correct: 0, incorrect 0, TP 0, FP 0, TN 0, FN0, Prec. 0, Rec. 0, Spec. 0, Acc. 0%
Removing 0 rows with error results and 0 rows with NaNs (typically hz different) from a     total of 784 entries, resulting in 784 entries.
Found 251 sibs and 57852 nonsibs, weights: 0.9957 and 0.0043, #weights: 58103
Found 28 sibs and 756 nonsibs, weights: 0.9643 and 0.0357, #weights: 784
Correct: 58103, incorrect 0, TP 251, FP 0, TN 57852, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 784, incorrect 0, TP 28, FP 0, TN 756, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 3
ERROR: more splits (10) than samples (9), reducing to sample nr
Generated 62750 non-sibling candidates from 251 siblings.
Generated 756 non-sibling candidates from 28 siblings.
Correct: 4414, incorrect 0, TP 0, FP 0, TN 4414, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 42 rows with error results and 4414 rows with NaNs (typically hz different) from a     total of 63001 entries, resulting in 58587 entries.
Correct: 54, incorrect 0, TP 0, FP 0, TN 54, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 54 rows with NaNs (typically hz different) from a     total of 784 entries, resulting in 730 entries.
Found 251 sibs and 58336 nonsibs, weights: 0.9957 and 0.0043, #weights: 58587
Found 28 sibs and 702 nonsibs, weights: 0.9616 and 0.0384, #weights: 730
Correct: 58587, incorrect 0, TP 251, FP 0, TN 58336, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 730, incorrect 0, TP 28, FP 0, TN 702, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 4
ERROR: more splits (10) than samples (9), reducing to sample nr
Generated 62750 non-sibling candidates from 251 siblings.
Generated 756 non-sibling candidates from 28 siblings.
Correct: 4428, incorrect 0, TP 0, FP 0, TN 4428, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 44 rows with error results and 4428 rows with NaNs (typically hz different) from a     total of 63001 entries, resulting in 58573 entries.
Correct: 54, incorrect 0, TP 0, FP 0, TN 54, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 54 rows with NaNs (typically hz different) from a     total of 784 entries, resulting in 730 entries.
Found 251 sibs and 58322 nonsibs, weights: 0.9957 and 0.0043, #weights: 58573
Found 28 sibs and 702 nonsibs, weights: 0.9616 and 0.0384, #weights: 730
Correct: 58573, incorrect 0, TP 251, FP 0, TN 58322, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 730, incorrect 0, TP 28, FP 0, TN 702, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 5
ERROR: more splits (10) than samples (9), reducing to sample nr
Generated 62750 non-sibling candidates from 251 siblings.
Generated 756 non-sibling candidates from 28 siblings.
Correct: 3958, incorrect 0, TP 0, FP 0, TN 3958, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 46 rows with error results and 3958 rows with NaNs (typically hz different) from a     total of 63001 entries, resulting in 59043 entries.
Correct: 104, incorrect 0, TP 0, FP 0, TN 104, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 104 rows with NaNs (typically hz different) from a     total of 784 entries, resulting in 680 entries.
Found 251 sibs and 58792 nonsibs, weights: 0.9957 and 0.0043, #weights: 59043
Found 28 sibs and 652 nonsibs, weights: 0.9588 and 0.0412, #weights: 680
Correct: 59043, incorrect 0, TP 251, FP 0, TN 58792, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 680, incorrect 0, TP 28, FP 0, TN 652, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 6
ERROR: more splits (10) than samples (9), reducing to sample nr
Generated 62750 non-sibling candidates from 251 siblings.
Generated 756 non-sibling candidates from 28 siblings.
Correct: 4418, incorrect 0, TP 0, FP 0, TN 4418, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 46 rows with error results and 4418 rows with NaNs (typically hz different) from a     total of 63001 entries, resulting in 58583 entries.
Correct: 54, incorrect 0, TP 0, FP 0, TN 54, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 54 rows with NaNs (typically hz different) from a     total of 784 entries, resulting in 730 entries.
Found 251 sibs and 58332 nonsibs, weights: 0.9957 and 0.0043, #weights: 58583
Found 28 sibs and 702 nonsibs, weights: 0.9616 and 0.0384, #weights: 730
Correct: 58583, incorrect 0, TP 251, FP 0, TN 58332, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 730, incorrect 0, TP 28, FP 0, TN 702, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 7
ERROR: more splits (10) than samples (9), reducing to sample nr
Generated 62750 non-sibling candidates from 251 siblings.
Generated 756 non-sibling candidates from 28 siblings.
Correct: 4426, incorrect 0, TP 0, FP 0, TN 4426, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 42 rows with error results and 4426 rows with NaNs (typically hz different) from a     total of 63001 entries, resulting in 58575 entries.
Correct: 54, incorrect 0, TP 0, FP 0, TN 54, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 54 rows with NaNs (typically hz different) from a     total of 784 entries, resulting in 730 entries.
Found 251 sibs and 58324 nonsibs, weights: 0.9957 and 0.0043, #weights: 58575
Found 28 sibs and 702 nonsibs, weights: 0.9616 and 0.0384, #weights: 730
Correct: 58575, incorrect 0, TP 251, FP 0, TN 58324, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 730, incorrect 0, TP 28, FP 0, TN 702, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 8
ERROR: more splits (10) than samples (9), reducing to sample nr
Generated 62750 non-sibling candidates from 251 siblings.
Generated 756 non-sibling candidates from 28 siblings.
Correct: 3959, incorrect 0, TP 0, FP 0, TN 3959, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 47 rows with error results and 3959 rows with NaNs (typically hz different) from a     total of 63001 entries, resulting in 59042 entries.
Correct: 104, incorrect 0, TP 0, FP 0, TN 104, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 104 rows with NaNs (typically hz different) from a     total of 784 entries, resulting in 680 entries.
Found 251 sibs and 58791 nonsibs, weights: 0.9957 and 0.0043, #weights: 59042
Found 28 sibs and 652 nonsibs, weights: 0.9588 and 0.0412, #weights: 680
Correct: 59042, incorrect 0, TP 251, FP 0, TN 58791, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 680, incorrect 0, TP 28, FP 0, TN 652, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 9
ERROR: more splits (10) than samples (9), reducing to sample nr
Generated 58806 non-sibling candidates from 243 siblings.
Generated 702 non-sibling candidates from 27 siblings.
Correct: 3304, incorrect 0, TP 0, FP 0, TN 3304, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 3304 rows with NaNs (typically hz different) from a     total of 59049 entries, resulting in 55745 entries.
Correct: 57, incorrect 0, TP 0, FP 0, TN 57, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 5 rows with error results and 57 rows with NaNs (typically hz different) from a     total of 729 entries, resulting in 672 entries.
Found 243 sibs and 55502 nonsibs, weights: 0.9956 and 0.0044, #weights: 55745
Found 27 sibs and 645 nonsibs, weights: 0.9598 and 0.0402, #weights: 672
Correct: 55745, incorrect 0, TP 243, FP 0, TN 55502, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 672, incorrect 0, TP 27, FP 0, TN 645, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
############# Round 2 ##############
Loading from filenames ../../../gt2/hosts.csvcapture.pcap.ts.siblingresult.csv and ../../../gt2/hosts.csv__nonsiblings_seed1_n679capture.pcap.ts.siblingresult.csv
Read 624 siblings and 410626 non-siblings from files.
Our algo: Not deciding on 124 pairs for unknown/error reasons.
Found 552 sibs and 410574 nonsibs, weights: 0.9987 and 0.0013, #weights: 411126
Our algo stats: (124) undecided, mcc: 0.980775474614777, f1: 0.9806807727690893
Correct: 411105, incorrect 21, TP 533, FP 2, TN 410572, FN19, Prec. 99.63, Rec. 96.56, Spec. 100.0, Acc. 99.99%
Found 622 sibs and 410575 nonsibs, weights: 0.9985 and 0.0015, #weights: 411197
Beverly algo: Not deciding on 53 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.08042930763160049, f1: 0.015917988872862148
Correct: 334785, incorrect 76412, TP 618, FP 76408, TN 334167, FN4, Prec. 0.8, Rec. 99.36, Spec. 81.39, Acc. 81.42%
## GROUP: RAv1
Generated 132 non-sibling candidates from 12 siblings.
Our algo: Not deciding on 0 pairs for unknown/error reasons.
Found 12 sibs and 132 nonsibs, weights: 0.9167 and 0.0833, #weights: 144
Our algo stats: (0) undecided, mcc: 1.0, f1: 1.0
Correct: 144, incorrect 0, TP 12, FP 0, TN 132, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Found 12 sibs and 132 nonsibs, weights: 0.9167 and 0.0833, #weights: 144
Beverly algo: Not deciding on 0 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.0, f1: 0.15384615384615385
Correct: 12, incorrect 132, TP 12, FP 132, TN 0, FN0, Prec. 8.33, Rec. 100.0, Spec. 0.0, Acc. 8.33%
## GROUP: RAv2
/usr/local/lib/python3.5/dist-packages/sklearn/metrics/classification.py:516: RuntimeWarning: invalid value encountered in double_scalars
  mcc = cov_ytyp / np.sqrt(var_yt * var_yp)
Generated 42642 non-sibling candidates from 207 siblings.
Our algo: Not deciding on 4 pairs for unknown/error reasons.
Found 205 sibs and 42640 nonsibs, weights: 0.9952 and 0.0048, #weights: 42845
Our algo stats: (4) undecided, mcc: 0.9802498011436569, f1: 0.9802955665024631
Correct: 42837, incorrect 8, TP 199, FP 2, TN 42638, FN6, Prec. 99.0, Rec. 97.07, Spec. 100.0, Acc. 99.98%
Found 205 sibs and 42640 nonsibs, weights: 0.9952 and 0.0048, #weights: 42845
Beverly algo: Not deciding on 4 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.05440088590079272, f1: 0.015414840562188304
Correct: 16785, incorrect 26060, TP 204, FP 26059, TN 16581, FN1, Prec. 0.78, Rec. 99.51, Spec. 38.89, Acc. 39.18%
## GROUP: nlnog
Generated 145542 non-sibling candidates from 382 siblings.
Our algo: Not deciding on 114 pairs for unknown/error reasons.
Found 312 sibs and 145498 nonsibs, weights: 0.9979 and 0.0021, #weights: 145810
Our algo stats: (114) undecided, mcc: 0.9838100405673874, f1: 0.9837133550488599
Correct: 145800, incorrect 10, TP 302, FP 0, TN 145498, FN10, Prec. 100.0, Rec. 96.79, Spec. 100.0, Acc. 99.99%
Found 382 sibs and 145498 nonsibs, weights: 0.9974 and 0.0026, #weights: 145880
Beverly algo: Not deciding on 44 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.08510041061435372, f1: 0.019642394402694996
Correct: 108048, incorrect 37832, TP 379, FP 37829, TN 107669, FN3, Prec. 0.99, Rec. 99.21, Spec. 74.0, Acc. 74.07%
## GROUP: servers
Generated 506 non-sibling candidates from 23 siblings.
Our algo: Not deciding on 0 pairs for unknown/error reasons.
Found 23 sibs and 506 nonsibs, weights: 0.9565 and 0.0435, #weights: 529
Our algo stats: (0) undecided, mcc: 0.9297526975413737, f1: 0.9302325581395349
Correct: 526, incorrect 3, TP 20, FP 0, TN 506, FN3, Prec. 100.0, Rec. 86.96, Spec. 100.0, Acc. 99.43%
Found 23 sibs and 506 nonsibs, weights: 0.9565 and 0.0435, #weights: 529
Beverly algo: Not deciding on 0 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.1403897717142205, f1: 0.1173469387755102
Correct: 183, incorrect 346, TP 23, FP 346, TN 160, FN0, Prec. 6.23, Rec. 100.0, Spec. 31.62, Acc. 34.59%
Round 0
Generated 311922 non-sibling candidates from 559 siblings.
Generated 4160 non-sibling candidates from 65 siblings.
Correct: 166135, incorrect 3, TP 0, FP 0, TN 166135, FN3, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 45 rows with error results and 166138 rows with NaNs (typically hz different) from a     total of 312481 entries, resulting in 146343 entries.
Correct: 2392, incorrect 0, TP 0, FP 0, TN 2392, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 2392 rows with NaNs (typically hz different) from a     total of 4225 entries, resulting in 1833 entries.
Found 556 sibs and 145787 nonsibs, weights: 0.9962 and 0.0038, #weights: 146343
Found 65 sibs and 1768 nonsibs, weights: 0.9645 and 0.0355, #weights: 1833
Correct: 146339, incorrect 4, TP 556, FP 4, TN 145783, FN0, Prec. 99.29, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1833, incorrect 0, TP 65, FP 0, TN 1768, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 1
Generated 311922 non-sibling candidates from 559 siblings.
Generated 4160 non-sibling candidates from 65 siblings.
Correct: 166309, incorrect 2, TP 0, FP 0, TN 166309, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 43 rows with error results and 166311 rows with NaNs (typically hz different) from a     total of 312481 entries, resulting in 146170 entries.
Correct: 2378, incorrect 1, TP 0, FP 0, TN 2378, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 99.96%
Removing 0 rows with error results and 2379 rows with NaNs (typically hz different) from a     total of 4225 entries, resulting in 1846 entries.
Found 557 sibs and 145613 nonsibs, weights: 0.9962 and 0.0038, #weights: 146170
Found 64 sibs and 1782 nonsibs, weights: 0.9653 and 0.0347, #weights: 1846
Correct: 146166, incorrect 4, TP 557, FP 4, TN 145609, FN0, Prec. 99.29, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1845, incorrect 1, TP 63, FP 0, TN 1782, FN1, Prec. 100.0, Rec. 98.44, Spec. 100.0, Acc. 99.95%
Round 2
Generated 314160 non-sibling candidates from 561 siblings.
Generated 3906 non-sibling candidates from 63 siblings.
Correct: 169518, incorrect 3, TP 0, FP 0, TN 169518, FN3, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 47 rows with error results and 169521 rows with NaNs (typically hz different) from a     total of 314721 entries, resulting in 145200 entries.
Correct: 2014, incorrect 0, TP 0, FP 0, TN 2014, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 2014 rows with NaNs (typically hz different) from a     total of 3969 entries, resulting in 1955 entries.
Found 558 sibs and 144642 nonsibs, weights: 0.9962 and 0.0038, #weights: 145200
Found 63 sibs and 1892 nonsibs, weights: 0.9678 and 0.0322, #weights: 1955
Correct: 145194, incorrect 6, TP 558, FP 6, TN 144636, FN0, Prec. 98.94, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1955, incorrect 0, TP 63, FP 0, TN 1892, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 3
Generated 315282 non-sibling candidates from 562 siblings.
Generated 3782 non-sibling candidates from 62 siblings.
Correct: 168783, incorrect 3, TP 0, FP 0, TN 168783, FN3, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 37 rows with error results and 168786 rows with NaNs (typically hz different) from a     total of 315844 entries, resulting in 147058 entries.
Correct: 2079, incorrect 0, TP 0, FP 0, TN 2079, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 1 rows with error results and 2079 rows with NaNs (typically hz different) from a     total of 3844 entries, resulting in 1765 entries.
Found 559 sibs and 146499 nonsibs, weights: 0.9962 and 0.0038, #weights: 147058
Found 62 sibs and 1703 nonsibs, weights: 0.9649 and 0.0351, #weights: 1765
Correct: 147054, incorrect 4, TP 559, FP 4, TN 146495, FN0, Prec. 99.29, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1765, incorrect 0, TP 62, FP 0, TN 1703, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 4
Generated 315282 non-sibling candidates from 562 siblings.
Generated 3782 non-sibling candidates from 62 siblings.
Correct: 169939, incorrect 3, TP 0, FP 0, TN 169939, FN3, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 44 rows with error results and 169942 rows with NaNs (typically hz different) from a     total of 315844 entries, resulting in 145902 entries.
Correct: 1970, incorrect 0, TP 0, FP 0, TN 1970, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 1970 rows with NaNs (typically hz different) from a     total of 3844 entries, resulting in 1874 entries.
Found 559 sibs and 145343 nonsibs, weights: 0.9962 and 0.0038, #weights: 145902
Found 62 sibs and 1812 nonsibs, weights: 0.9669 and 0.0331, #weights: 1874
Correct: 145898, incorrect 4, TP 559, FP 4, TN 145339, FN0, Prec. 99.29, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1874, incorrect 0, TP 62, FP 0, TN 1812, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 5
Generated 315282 non-sibling candidates from 562 siblings.
Generated 3782 non-sibling candidates from 62 siblings.
Correct: 168832, incorrect 3, TP 0, FP 0, TN 168832, FN3, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 43 rows with error results and 168835 rows with NaNs (typically hz different) from a     total of 315844 entries, resulting in 147009 entries.
Correct: 2086, incorrect 0, TP 0, FP 0, TN 2086, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 2 rows with error results and 2086 rows with NaNs (typically hz different) from a     total of 3844 entries, resulting in 1758 entries.
Found 559 sibs and 146450 nonsibs, weights: 0.9962 and 0.0038, #weights: 147009
Found 62 sibs and 1696 nonsibs, weights: 0.9647 and 0.0353, #weights: 1758
Correct: 147005, incorrect 4, TP 559, FP 4, TN 146446, FN0, Prec. 99.29, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1758, incorrect 0, TP 62, FP 0, TN 1696, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 6
Generated 315282 non-sibling candidates from 562 siblings.
Generated 3782 non-sibling candidates from 62 siblings.
Correct: 168811, incorrect 3, TP 0, FP 0, TN 168811, FN3, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 42 rows with error results and 168814 rows with NaNs (typically hz different) from a     total of 315844 entries, resulting in 147030 entries.
Correct: 2095, incorrect 0, TP 0, FP 0, TN 2095, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 1 rows with error results and 2095 rows with NaNs (typically hz different) from a     total of 3844 entries, resulting in 1749 entries.
Found 559 sibs and 146471 nonsibs, weights: 0.9962 and 0.0038, #weights: 147030
Found 62 sibs and 1687 nonsibs, weights: 0.9646 and 0.0354, #weights: 1749
Correct: 147024, incorrect 6, TP 559, FP 6, TN 146465, FN0, Prec. 98.94, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1749, incorrect 0, TP 62, FP 0, TN 1687, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 7
Generated 316406 non-sibling candidates from 563 siblings.
Generated 3660 non-sibling candidates from 61 siblings.
Correct: 170243, incorrect 3, TP 0, FP 0, TN 170243, FN3, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 44 rows with error results and 170246 rows with NaNs (typically hz different) from a     total of 316969 entries, resulting in 146723 entries.
Correct: 1938, incorrect 0, TP 0, FP 0, TN 1938, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 1938 rows with NaNs (typically hz different) from a     total of 3721 entries, resulting in 1783 entries.
Found 560 sibs and 146163 nonsibs, weights: 0.9962 and 0.0038, #weights: 146723
Found 61 sibs and 1722 nonsibs, weights: 0.9658 and 0.0342, #weights: 1783
Correct: 146719, incorrect 4, TP 560, FP 4, TN 146159, FN0, Prec. 99.29, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1783, incorrect 0, TP 61, FP 0, TN 1722, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 8
Generated 316406 non-sibling candidates from 563 siblings.
Generated 3660 non-sibling candidates from 61 siblings.
Correct: 170484, incorrect 2, TP 0, FP 0, TN 170484, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 19 rows with error results and 170486 rows with NaNs (typically hz different) from a     total of 316969 entries, resulting in 146483 entries.
Correct: 1909, incorrect 1, TP 0, FP 0, TN 1909, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 99.95%
Removing 2 rows with error results and 1910 rows with NaNs (typically hz different) from a     total of 3721 entries, resulting in 1811 entries.
Found 561 sibs and 145922 nonsibs, weights: 0.9962 and 0.0038, #weights: 146483
Found 60 sibs and 1751 nonsibs, weights: 0.9669 and 0.0331, #weights: 1811
Correct: 146477, incorrect 6, TP 561, FP 6, TN 145916, FN0, Prec. 98.94, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1811, incorrect 0, TP 60, FP 0, TN 1751, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 9
Generated 316406 non-sibling candidates from 563 siblings.
Generated 3660 non-sibling candidates from 61 siblings.
Correct: 170062, incorrect 2, TP 0, FP 0, TN 170062, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 44 rows with error results and 170064 rows with NaNs (typically hz different) from a     total of 316969 entries, resulting in 146905 entries.
Correct: 1951, incorrect 1, TP 0, FP 0, TN 1951, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 99.95%
Removing 2 rows with error results and 1952 rows with NaNs (typically hz different) from a     total of 3721 entries, resulting in 1769 entries.
Found 561 sibs and 146344 nonsibs, weights: 0.9962 and 0.0038, #weights: 146905
Found 60 sibs and 1709 nonsibs, weights: 0.9661 and 0.0339, #weights: 1769
Correct: 146901, incorrect 4, TP 561, FP 4, TN 146340, FN0, Prec. 99.29, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1769, incorrect 0, TP 60, FP 0, TN 1709, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
############# Round 3 ##############
Loading from filenames ../../../gt3/hosts.csvcapture.pcap.ts.siblingresult.csv and ../../../gt3/hosts.csv__nonsiblings_seed1_n679capture.pcap.ts.siblingresult.csv
Read 625 siblings and 411909 non-siblings from files.
Our algo: Not deciding on 1412 pairs for unknown/error reasons.
Found 529 sibs and 410593 nonsibs, weights: 0.9987 and 0.0013, #weights: 411122
Our algo stats: (1412) undecided, mcc: 0.9741213413846989, f1: 0.9739130434782609
Correct: 411095, incorrect 27, TP 504, FP 2, TN 410591, FN25, Prec. 99.6, Rec. 95.27, Spec. 100.0, Acc. 99.99%
Found 624 sibs and 410593 nonsibs, weights: 0.9985 and 0.0015, #weights: 411217
Beverly algo: Not deciding on 1317 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.0801639276960036, f1: 0.01584279855370581
Correct: 334188, incorrect 77029, TP 620, FP 77025, TN 333568, FN4, Prec. 0.8, Rec. 99.36, Spec. 81.24, Acc. 81.27%
## GROUP: RAv1
Generated 132 non-sibling candidates from 12 siblings.
Our algo: Not deciding on 0 pairs for unknown/error reasons.
Found 12 sibs and 132 nonsibs, weights: 0.9167 and 0.0833, #weights: 144
Our algo stats: (0) undecided, mcc: 1.0, f1: 1.0
Correct: 144, incorrect 0, TP 12, FP 0, TN 132, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Found 12 sibs and 132 nonsibs, weights: 0.9167 and 0.0833, #weights: 144
Beverly algo: Not deciding on 0 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.0, f1: 0.15384615384615385
Correct: 12, incorrect 132, TP 12, FP 132, TN 0, FN0, Prec. 8.33, Rec. 100.0, Spec. 0.0, Acc. 8.33%
## GROUP: RAv2
Generated 42642 non-sibling candidates from 207 siblings.
Our algo: Not deciding on 3 pairs for unknown/error reasons.
Found 207 sibs and 42639 nonsibs, weights: 0.9952 and 0.0048, #weights: 42846
Our algo stats: (3) undecided, mcc: 0.9729893273796464, f1: 0.972972972972973
Correct: 42835, incorrect 11, TP 198, FP 2, TN 42637, FN9, Prec. 99.0, Rec. 95.65, Spec. 100.0, Acc. 99.97%
Found 207 sibs and 42639 nonsibs, weights: 0.9952 and 0.0048, #weights: 42846
Beverly algo: Not deciding on 3 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.054671938147755984, f1: 0.015564202334630352
Correct: 16787, incorrect 26059, TP 206, FP 26058, TN 16581, FN1, Prec. 0.78, Rec. 99.52, Spec. 38.89, Acc. 39.18%
## GROUP: nlnog
Generated 145542 non-sibling candidates from 382 siblings.
Our algo: Not deciding on 873 pairs for unknown/error reasons.
Found 288 sibs and 144763 nonsibs, weights: 0.998 and 0.002, #weights: 145051
Our algo stats: (873) undecided, mcc: 0.9771260770335519, f1: 0.9769094138543517
Correct: 145038, incorrect 13, TP 275, FP 0, TN 144763, FN13, Prec. 100.0, Rec. 95.49, Spec. 100.0, Acc. 99.99%
Found 381 sibs and 144763 nonsibs, weights: 0.9974 and 0.0026, #weights: 145144
Beverly algo: Not deciding on 780 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.08461691742211067, f1: 0.019493579495642305
Correct: 107118, incorrect 38026, TP 378, FP 38023, TN 106740, FN3, Prec. 0.98, Rec. 99.21, Spec. 73.73, Acc. 73.8%
## GROUP: servers
Generated 552 non-sibling candidates from 24 siblings.
Our algo: Not deciding on 2 pairs for unknown/error reasons.
Found 22 sibs and 552 nonsibs, weights: 0.9617 and 0.0383, #weights: 574
Our algo stats: (2) undecided, mcc: 0.9268052972270169, f1: 0.9268292682926829
Correct: 571, incorrect 3, TP 19, FP 0, TN 552, FN3, Prec. 100.0, Rec. 86.36, Spec. 100.0, Acc. 99.48%
Found 24 sibs and 552 nonsibs, weights: 0.9583 and 0.0417, #weights: 576
Beverly algo: Not deciding on 0 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.13380139498238697, f1: 0.1111111111111111
Correct: 192, incorrect 384, TP 24, FP 384, TN 168, FN0, Prec. 5.88, Rec. 100.0, Spec. 30.43, Acc. 33.33%
Round 0
Generated 313040 non-sibling candidates from 560 siblings.
Generated 4160 non-sibling candidates from 65 siblings.
Correct: 167384, incorrect 2, TP 0, FP 0, TN 167384, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1135 rows with error results and 167386 rows with NaNs (typically hz different) from a     total of 313600 entries, resulting in 146214 entries.
Correct: 2304, incorrect 0, TP 0, FP 0, TN 2304, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 2304 rows with NaNs (typically hz different) from a     total of 4225 entries, resulting in 1921 entries.
Found 558 sibs and 145656 nonsibs, weights: 0.9962 and 0.0038, #weights: 146214
Found 65 sibs and 1856 nonsibs, weights: 0.9662 and 0.0338, #weights: 1921
Correct: 146210, incorrect 4, TP 558, FP 4, TN 145652, FN0, Prec. 99.29, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1920, incorrect 1, TP 64, FP 0, TN 1856, FN1, Prec. 100.0, Rec. 98.46, Spec. 100.0, Acc. 99.95%
Round 1
Generated 313040 non-sibling candidates from 560 siblings.
Generated 4160 non-sibling candidates from 65 siblings.
Correct: 167409, incorrect 2, TP 0, FP 0, TN 167409, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1137 rows with error results and 167411 rows with NaNs (typically hz different) from a     total of 313600 entries, resulting in 146189 entries.
Correct: 2296, incorrect 0, TP 0, FP 0, TN 2296, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 2296 rows with NaNs (typically hz different) from a     total of 4225 entries, resulting in 1929 entries.
Found 558 sibs and 145631 nonsibs, weights: 0.9962 and 0.0038, #weights: 146189
Found 65 sibs and 1864 nonsibs, weights: 0.9663 and 0.0337, #weights: 1929
Correct: 146181, incorrect 8, TP 558, FP 8, TN 145623, FN0, Prec. 98.59, Rec. 100.0, Spec. 99.99, Acc. 99.99%
Correct: 1929, incorrect 0, TP 65, FP 0, TN 1864, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 2
Generated 315282 non-sibling candidates from 562 siblings.
Generated 3906 non-sibling candidates from 63 siblings.
Correct: 167624, incorrect 1, TP 0, FP 0, TN 167624, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1141 rows with error results and 167625 rows with NaNs (typically hz different) from a     total of 315844 entries, resulting in 148219 entries.
Correct: 2246, incorrect 1, TP 0, FP 0, TN 2246, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 99.96%
Removing 0 rows with error results and 2247 rows with NaNs (typically hz different) from a     total of 3969 entries, resulting in 1722 entries.
Found 561 sibs and 147658 nonsibs, weights: 0.9962 and 0.0038, #weights: 148219
Found 62 sibs and 1660 nonsibs, weights: 0.964 and 0.036, #weights: 1722
Correct: 148213, incorrect 6, TP 561, FP 6, TN 147652, FN0, Prec. 98.94, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1722, incorrect 0, TP 62, FP 0, TN 1660, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 3
Generated 315282 non-sibling candidates from 562 siblings.
Generated 3906 non-sibling candidates from 63 siblings.
Correct: 168797, incorrect 2, TP 0, FP 0, TN 168797, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1140 rows with error results and 168799 rows with NaNs (typically hz different) from a     total of 315844 entries, resulting in 147045 entries.
Correct: 2142, incorrect 0, TP 0, FP 0, TN 2142, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 2142 rows with NaNs (typically hz different) from a     total of 3969 entries, resulting in 1827 entries.
Found 560 sibs and 146485 nonsibs, weights: 0.9962 and 0.0038, #weights: 147045
Found 63 sibs and 1764 nonsibs, weights: 0.9655 and 0.0345, #weights: 1827
Correct: 147041, incorrect 4, TP 560, FP 4, TN 146481, FN0, Prec. 99.29, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1825, incorrect 2, TP 63, FP 2, TN 1762, FN0, Prec. 96.92, Rec. 100.0, Spec. 99.89, Acc. 99.89%
Round 4
Generated 316406 non-sibling candidates from 563 siblings.
Generated 3782 non-sibling candidates from 62 siblings.
Correct: 169050, incorrect 2, TP 0, FP 0, TN 169050, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1131 rows with error results and 169052 rows with NaNs (typically hz different) from a     total of 316969 entries, resulting in 147917 entries.
Correct: 2111, incorrect 0, TP 0, FP 0, TN 2111, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 1 rows with error results and 2111 rows with NaNs (typically hz different) from a     total of 3844 entries, resulting in 1733 entries.
Found 561 sibs and 147356 nonsibs, weights: 0.9962 and 0.0038, #weights: 147917
Found 62 sibs and 1671 nonsibs, weights: 0.9642 and 0.0358, #weights: 1733
Correct: 147912, incorrect 5, TP 561, FP 5, TN 147351, FN0, Prec. 99.12, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1733, incorrect 0, TP 62, FP 0, TN 1671, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 5
Generated 316406 non-sibling candidates from 563 siblings.
Generated 3782 non-sibling candidates from 62 siblings.
Correct: 170188, incorrect 2, TP 0, FP 0, TN 170188, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1141 rows with error results and 170190 rows with NaNs (typically hz different) from a     total of 316969 entries, resulting in 146779 entries.
Correct: 1985, incorrect 0, TP 0, FP 0, TN 1985, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 1 rows with error results and 1985 rows with NaNs (typically hz different) from a     total of 3844 entries, resulting in 1859 entries.
Found 561 sibs and 146218 nonsibs, weights: 0.9962 and 0.0038, #weights: 146779
Found 62 sibs and 1797 nonsibs, weights: 0.9666 and 0.0334, #weights: 1859
Correct: 146774, incorrect 5, TP 561, FP 5, TN 146213, FN0, Prec. 99.12, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1859, incorrect 0, TP 62, FP 0, TN 1797, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 6
Generated 316406 non-sibling candidates from 563 siblings.
Generated 3782 non-sibling candidates from 62 siblings.
Correct: 170037, incorrect 2, TP 0, FP 0, TN 170037, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1143 rows with error results and 170039 rows with NaNs (typically hz different) from a     total of 316969 entries, resulting in 146930 entries.
Correct: 1994, incorrect 0, TP 0, FP 0, TN 1994, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 1994 rows with NaNs (typically hz different) from a     total of 3844 entries, resulting in 1850 entries.
Found 561 sibs and 146369 nonsibs, weights: 0.9962 and 0.0038, #weights: 146930
Found 62 sibs and 1788 nonsibs, weights: 0.9665 and 0.0335, #weights: 1850
Correct: 146925, incorrect 5, TP 561, FP 5, TN 146364, FN0, Prec. 99.12, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1850, incorrect 0, TP 62, FP 0, TN 1788, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 7
Generated 317532 non-sibling candidates from 564 siblings.
Generated 3660 non-sibling candidates from 61 siblings.
Correct: 170342, incorrect 1, TP 0, FP 0, TN 170342, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 15 rows with error results and 170343 rows with NaNs (typically hz different) from a     total of 318096 entries, resulting in 147753 entries.
Correct: 1962, incorrect 1, TP 0, FP 0, TN 1962, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 99.95%
Removing 121 rows with error results and 1963 rows with NaNs (typically hz different) from a     total of 3721 entries, resulting in 1758 entries.
Found 563 sibs and 147190 nonsibs, weights: 0.9962 and 0.0038, #weights: 147753
Found 60 sibs and 1698 nonsibs, weights: 0.9659 and 0.0341, #weights: 1758
Correct: 147745, incorrect 8, TP 563, FP 8, TN 147182, FN0, Prec. 98.6, Rec. 100.0, Spec. 99.99, Acc. 99.99%
Correct: 1758, incorrect 0, TP 60, FP 0, TN 1698, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 8
Generated 317532 non-sibling candidates from 564 siblings.
Generated 3660 non-sibling candidates from 61 siblings.
Correct: 171619, incorrect 2, TP 0, FP 0, TN 171619, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1147 rows with error results and 171621 rows with NaNs (typically hz different) from a     total of 318096 entries, resulting in 146475 entries.
Correct: 1816, incorrect 0, TP 0, FP 0, TN 1816, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 1816 rows with NaNs (typically hz different) from a     total of 3721 entries, resulting in 1905 entries.
Found 562 sibs and 145913 nonsibs, weights: 0.9962 and 0.0038, #weights: 146475
Found 61 sibs and 1844 nonsibs, weights: 0.968 and 0.032, #weights: 1905
Correct: 146467, incorrect 8, TP 562, FP 8, TN 145905, FN0, Prec. 98.6, Rec. 100.0, Spec. 99.99, Acc. 99.99%
Correct: 1905, incorrect 0, TP 61, FP 0, TN 1844, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 9
Generated 317532 non-sibling candidates from 564 siblings.
Generated 3660 non-sibling candidates from 61 siblings.
Correct: 170214, incorrect 2, TP 0, FP 0, TN 170214, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1145 rows with error results and 170216 rows with NaNs (typically hz different) from a     total of 318096 entries, resulting in 147880 entries.
Correct: 1984, incorrect 0, TP 0, FP 0, TN 1984, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 1984 rows with NaNs (typically hz different) from a     total of 3721 entries, resulting in 1737 entries.
Found 562 sibs and 147318 nonsibs, weights: 0.9962 and 0.0038, #weights: 147880
Found 61 sibs and 1676 nonsibs, weights: 0.9649 and 0.0351, #weights: 1737
Correct: 147875, incorrect 5, TP 562, FP 5, TN 147313, FN0, Prec. 99.12, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1737, incorrect 0, TP 61, FP 0, TN 1676, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
############# Round 4 ##############
Loading from filenames ../../../gt4/hosts.csvcapture.pcap.ts.siblingresult.csv and ../../../gt4/hosts.csv__nonsiblings_seed1_n679capture.pcap.ts.siblingresult.csv
Read 620 siblings and 408034 non-siblings from files.
Our algo: Not deciding on 1367 pairs for unknown/error reasons.
Found 591 sibs and 406696 nonsibs, weights: 0.9985 and 0.0015, #weights: 407287
Our algo stats: (1367) undecided, mcc: 0.9759962072315428, f1: 0.9758203799654578
Correct: 407259, incorrect 28, TP 565, FP 2, TN 406694, FN26, Prec. 99.65, Rec. 95.6, Spec. 100.0, Acc. 99.99%
Found 619 sibs and 406698 nonsibs, weights: 0.9985 and 0.0015, #weights: 407317
Beverly algo: Not deciding on 1337 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.08018782271325639, f1: 0.01585562359007412
Correct: 330972, incorrect 76345, TP 615, FP 76341, TN 330357, FN4, Prec. 0.8, Rec. 99.35, Spec. 81.23, Acc. 81.26%
## GROUP: RAv1
Generated 132 non-sibling candidates from 12 siblings.
Our algo: Not deciding on 0 pairs for unknown/error reasons.
Found 12 sibs and 132 nonsibs, weights: 0.9167 and 0.0833, #weights: 144
Our algo stats: (0) undecided, mcc: 1.0, f1: 1.0
Correct: 144, incorrect 0, TP 12, FP 0, TN 132, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Found 12 sibs and 132 nonsibs, weights: 0.9167 and 0.0833, #weights: 144
Beverly algo: Not deciding on 0 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.0, f1: 0.15384615384615385
Correct: 12, incorrect 132, TP 12, FP 132, TN 0, FN0, Prec. 8.33, Rec. 100.0, Spec. 0.0, Acc. 8.33%
## GROUP: RAv2
Generated 41006 non-sibling candidates from 203 siblings.
Our algo: Not deciding on 0 pairs for unknown/error reasons.
Found 203 sibs and 41006 nonsibs, weights: 0.9951 and 0.0049, #weights: 41209
Our algo stats: (0) undecided, mcc: 0.9647865310847772, f1: 0.9646464646464645
Correct: 41195, incorrect 14, TP 191, FP 2, TN 41004, FN12, Prec. 98.96, Rec. 94.09, Spec. 100.0, Acc. 99.97%
Found 203 sibs and 41006 nonsibs, weights: 0.9951 and 0.0049, #weights: 41209
Beverly algo: Not deciding on 0 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.05464444412875073, f1: 0.015746803866541943
Correct: 15957, incorrect 25252, TP 202, FP 25251, TN 15755, FN1, Prec. 0.79, Rec. 99.51, Spec. 38.42, Acc. 38.72%
## GROUP: nlnog
Generated 144780 non-sibling candidates from 381 siblings.
Our algo: Not deciding on 839 pairs for unknown/error reasons.
Found 353 sibs and 143969 nonsibs, weights: 0.9976 and 0.0024, #weights: 144322
Our algo stats: (839) undecided, mcc: 0.9842583543629252, f1: 0.9841726618705036
Correct: 144311, incorrect 11, TP 342, FP 0, TN 143969, FN11, Prec. 100.0, Rec. 96.88, Spec. 100.0, Acc. 99.99%
Found 380 sibs and 143970 nonsibs, weights: 0.9974 and 0.0026, #weights: 144350
Beverly algo: Not deciding on 811 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.08472052371957776, f1: 0.01954329851481299
Correct: 106523, incorrect 37827, TP 377, FP 37824, TN 106146, FN3, Prec. 0.99, Rec. 99.21, Spec. 73.73, Acc. 73.79%
## GROUP: servers
Generated 552 non-sibling candidates from 24 siblings.
Our algo: Not deciding on 1 pairs for unknown/error reasons.
Found 23 sibs and 552 nonsibs, weights: 0.96 and 0.04, #weights: 575
Our algo stats: (1) undecided, mcc: 0.9299811099505543, f1: 0.9302325581395349
Correct: 572, incorrect 3, TP 20, FP 0, TN 552, FN3, Prec. 100.0, Rec. 86.96, Spec. 100.0, Acc. 99.48%
Found 24 sibs and 552 nonsibs, weights: 0.9583 and 0.0417, #weights: 576
Beverly algo: Not deciding on 0 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.13380139498238694, f1: 0.1111111111111111
Correct: 192, incorrect 384, TP 24, FP 384, TN 168, FN0, Prec. 5.88, Rec. 100.0, Spec. 30.43, Acc. 33.33%
Round 0
Generated 307470 non-sibling candidates from 555 siblings.
Generated 4160 non-sibling candidates from 65 siblings.
Correct: 164587, incorrect 2, TP 0, FP 0, TN 164587, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1144 rows with error results and 164589 rows with NaNs (typically hz different) from a     total of 308025 entries, resulting in 143436 entries.
Correct: 2158, incorrect 0, TP 0, FP 0, TN 2158, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 2 rows with error results and 2158 rows with NaNs (typically hz different) from a     total of 4225 entries, resulting in 2067 entries.
Found 553 sibs and 142883 nonsibs, weights: 0.9961 and 0.0039, #weights: 143436
Found 65 sibs and 2002 nonsibs, weights: 0.9686 and 0.0314, #weights: 2067
Correct: 143429, incorrect 7, TP 553, FP 7, TN 142876, FN0, Prec. 98.75, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 2067, incorrect 0, TP 65, FP 0, TN 2002, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 1
Generated 308580 non-sibling candidates from 556 siblings.
Generated 4032 non-sibling candidates from 64 siblings.
Correct: 163503, incorrect 1, TP 0, FP 0, TN 163503, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1162 rows with error results and 163504 rows with NaNs (typically hz different) from a     total of 309136 entries, resulting in 145632 entries.
Correct: 2281, incorrect 1, TP 0, FP 0, TN 2281, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 99.96%
Removing 0 rows with error results and 2282 rows with NaNs (typically hz different) from a     total of 4096 entries, resulting in 1814 entries.
Found 555 sibs and 145077 nonsibs, weights: 0.9962 and 0.0038, #weights: 145632
Found 63 sibs and 1751 nonsibs, weights: 0.9653 and 0.0347, #weights: 1814
Correct: 145625, incorrect 7, TP 555, FP 7, TN 145070, FN0, Prec. 98.75, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1814, incorrect 0, TP 63, FP 0, TN 1751, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 2
Generated 309692 non-sibling candidates from 557 siblings.
Generated 3906 non-sibling candidates from 63 siblings.
Correct: 165435, incorrect 2, TP 0, FP 0, TN 165435, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1143 rows with error results and 165437 rows with NaNs (typically hz different) from a     total of 310249 entries, resulting in 144812 entries.
Correct: 2059, incorrect 0, TP 0, FP 0, TN 2059, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 5 rows with error results and 2059 rows with NaNs (typically hz different) from a     total of 3969 entries, resulting in 1910 entries.
Found 555 sibs and 144257 nonsibs, weights: 0.9962 and 0.0038, #weights: 144812
Found 63 sibs and 1847 nonsibs, weights: 0.967 and 0.033, #weights: 1910
Correct: 144808, incorrect 4, TP 555, FP 4, TN 144253, FN0, Prec. 99.28, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1910, incorrect 0, TP 63, FP 0, TN 1847, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 3
Generated 310806 non-sibling candidates from 558 siblings.
Generated 3782 non-sibling candidates from 62 siblings.
Correct: 163988, incorrect 1, TP 0, FP 0, TN 163988, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 43 rows with error results and 163989 rows with NaNs (typically hz different) from a     total of 311364 entries, resulting in 147375 entries.
Correct: 2205, incorrect 1, TP 0, FP 0, TN 2205, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 99.95%
Removing 124 rows with error results and 2206 rows with NaNs (typically hz different) from a     total of 3844 entries, resulting in 1638 entries.
Found 557 sibs and 146818 nonsibs, weights: 0.9962 and 0.0038, #weights: 147375
Found 61 sibs and 1577 nonsibs, weights: 0.9628 and 0.0372, #weights: 1638
Correct: 147373, incorrect 2, TP 557, FP 2, TN 146816, FN0, Prec. 99.64, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1637, incorrect 1, TP 60, FP 0, TN 1577, FN1, Prec. 100.0, Rec. 98.36, Spec. 100.0, Acc. 99.94%
Round 4
Generated 311922 non-sibling candidates from 559 siblings.
Generated 3660 non-sibling candidates from 61 siblings.
Correct: 165964, incorrect 2, TP 0, FP 0, TN 165964, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1171 rows with error results and 165966 rows with NaNs (typically hz different) from a     total of 312481 entries, resulting in 146515 entries.
Correct: 2000, incorrect 0, TP 0, FP 0, TN 2000, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 2000 rows with NaNs (typically hz different) from a     total of 3721 entries, resulting in 1721 entries.
Found 557 sibs and 145958 nonsibs, weights: 0.9962 and 0.0038, #weights: 146515
Found 61 sibs and 1660 nonsibs, weights: 0.9646 and 0.0354, #weights: 1721
Correct: 146510, incorrect 5, TP 557, FP 5, TN 145953, FN0, Prec. 99.11, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1721, incorrect 0, TP 61, FP 0, TN 1660, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 5
Generated 311922 non-sibling candidates from 559 siblings.
Generated 3660 non-sibling candidates from 61 siblings.
Correct: 166416, incorrect 2, TP 0, FP 0, TN 166416, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1166 rows with error results and 166418 rows with NaNs (typically hz different) from a     total of 312481 entries, resulting in 146063 entries.
Correct: 1954, incorrect 0, TP 0, FP 0, TN 1954, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 1954 rows with NaNs (typically hz different) from a     total of 3721 entries, resulting in 1767 entries.
Found 557 sibs and 145506 nonsibs, weights: 0.9962 and 0.0038, #weights: 146063
Found 61 sibs and 1706 nonsibs, weights: 0.9655 and 0.0345, #weights: 1767
Correct: 146058, incorrect 5, TP 557, FP 5, TN 145501, FN0, Prec. 99.11, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1767, incorrect 0, TP 61, FP 0, TN 1706, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 6
Generated 311922 non-sibling candidates from 559 siblings.
Generated 3660 non-sibling candidates from 61 siblings.
Correct: 166714, incorrect 2, TP 0, FP 0, TN 166714, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1165 rows with error results and 166716 rows with NaNs (typically hz different) from a     total of 312481 entries, resulting in 145765 entries.
Correct: 1918, incorrect 0, TP 0, FP 0, TN 1918, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 1918 rows with NaNs (typically hz different) from a     total of 3721 entries, resulting in 1803 entries.
Found 557 sibs and 145208 nonsibs, weights: 0.9962 and 0.0038, #weights: 145765
Found 61 sibs and 1742 nonsibs, weights: 0.9662 and 0.0338, #weights: 1803
Correct: 145759, incorrect 6, TP 557, FP 6, TN 145202, FN0, Prec. 98.93, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1803, incorrect 0, TP 61, FP 0, TN 1742, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 7
Generated 311922 non-sibling candidates from 559 siblings.
Generated 3660 non-sibling candidates from 61 siblings.
Correct: 166719, incorrect 2, TP 0, FP 0, TN 166719, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1170 rows with error results and 166721 rows with NaNs (typically hz different) from a     total of 312481 entries, resulting in 145760 entries.
Correct: 1918, incorrect 0, TP 0, FP 0, TN 1918, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 1918 rows with NaNs (typically hz different) from a     total of 3721 entries, resulting in 1803 entries.
Found 557 sibs and 145203 nonsibs, weights: 0.9962 and 0.0038, #weights: 145760
Found 61 sibs and 1742 nonsibs, weights: 0.9662 and 0.0338, #weights: 1803
Correct: 145753, incorrect 7, TP 557, FP 7, TN 145196, FN0, Prec. 98.76, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1803, incorrect 0, TP 61, FP 0, TN 1742, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 8
Generated 311922 non-sibling candidates from 559 siblings.
Generated 3660 non-sibling candidates from 61 siblings.
Correct: 166540, incorrect 2, TP 0, FP 0, TN 166540, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1167 rows with error results and 166542 rows with NaNs (typically hz different) from a     total of 312481 entries, resulting in 145939 entries.
Correct: 1940, incorrect 0, TP 0, FP 0, TN 1940, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 1940 rows with NaNs (typically hz different) from a     total of 3721 entries, resulting in 1781 entries.
Found 557 sibs and 145382 nonsibs, weights: 0.9962 and 0.0038, #weights: 145939
Found 61 sibs and 1720 nonsibs, weights: 0.9657 and 0.0343, #weights: 1781
Correct: 145934, incorrect 5, TP 557, FP 5, TN 145377, FN0, Prec. 99.11, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1779, incorrect 2, TP 61, FP 2, TN 1718, FN0, Prec. 96.83, Rec. 100.0, Spec. 99.88, Acc. 99.89%
Round 9
Generated 311922 non-sibling candidates from 559 siblings.
Generated 3660 non-sibling candidates from 61 siblings.
Correct: 166259, incorrect 2, TP 0, FP 0, TN 166259, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1168 rows with error results and 166261 rows with NaNs (typically hz different) from a     total of 312481 entries, resulting in 146220 entries.
Correct: 1972, incorrect 0, TP 0, FP 0, TN 1972, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 1972 rows with NaNs (typically hz different) from a     total of 3721 entries, resulting in 1749 entries.
Found 557 sibs and 145663 nonsibs, weights: 0.9962 and 0.0038, #weights: 146220
Found 61 sibs and 1688 nonsibs, weights: 0.9651 and 0.0349, #weights: 1749
Correct: 146213, incorrect 7, TP 557, FP 7, TN 145656, FN0, Prec. 98.76, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1749, incorrect 0, TP 61, FP 0, TN 1688, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
############# Round 5 ##############
Loading from filenames ../../../gt5/hosts.csvcapture.pcap.ts.siblingresult.csv and ../../../gt5/hosts.csv__nonsiblings_seed1_n679capture.pcap.ts.siblingresult.csv
Read 620 siblings and 408034 non-siblings from files.
Our algo: Not deciding on 727 pairs for unknown/error reasons.
Found 563 sibs and 407364 nonsibs, weights: 0.9986 and 0.0014, #weights: 407927
Our algo stats: (727) undecided, mcc: 0.9856686349153984, f1: 0.9855855855855855
Correct: 407911, incorrect 16, TP 547, FP 0, TN 407364, FN16, Prec. 100.0, Rec. 97.16, Spec. 100.0, Acc. 100.0%
Found 619 sibs and 407366 nonsibs, weights: 0.9985 and 0.0015, #weights: 407985
Beverly algo: Not deciding on 669 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.08042295308866737, f1: 0.015925422412118858
Correct: 331980, incorrect 76005, TP 615, FP 76001, TN 331365, FN4, Prec. 0.8, Rec. 99.35, Spec. 81.34, Acc. 81.37%
## GROUP: RAv1
Generated 132 non-sibling candidates from 12 siblings.
Our algo: Not deciding on 10 pairs for unknown/error reasons.
Found 2 sibs and 132 nonsibs, weights: 0.9851 and 0.0149, #weights: 134
Our algo stats: (10) undecided, mcc: 1.0, f1: 1.0
Correct: 134, incorrect 0, TP 2, FP 0, TN 132, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Found 12 sibs and 132 nonsibs, weights: 0.9167 and 0.0833, #weights: 144
Beverly algo: Not deciding on 0 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.0, f1: 0.15384615384615385
Correct: 12, incorrect 132, TP 12, FP 132, TN 0, FN0, Prec. 8.33, Rec. 100.0, Spec. 0.0, Acc. 8.33%
## GROUP: RAv2
Generated 41006 non-sibling candidates from 203 siblings.
Our algo: Not deciding on 0 pairs for unknown/error reasons.
Found 203 sibs and 41006 nonsibs, weights: 0.9951 and 0.0049, #weights: 41209
Our algo stats: (0) undecided, mcc: 0.9825235107738732, f1: 0.9824561403508771
Correct: 41202, incorrect 7, TP 196, FP 0, TN 41006, FN7, Prec. 100.0, Rec. 96.55, Spec. 100.0, Acc. 99.98%
Found 203 sibs and 41006 nonsibs, weights: 0.9951 and 0.0049, #weights: 41209
Beverly algo: Not deciding on 0 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.0546444441287507, f1: 0.015746803866541943
Correct: 15957, incorrect 25252, TP 202, FP 25251, TN 15755, FN1, Prec. 0.79, Rec. 99.51, Spec. 38.42, Acc. 38.72%
## GROUP: nlnog
Generated 144780 non-sibling candidates from 381 siblings.
Our algo: Not deciding on 434 pairs for unknown/error reasons.
Found 336 sibs and 144391 nonsibs, weights: 0.9977 and 0.0023, #weights: 144727
Our algo stats: (434) undecided, mcc: 0.9910106190324149, f1: 0.9909909909909909
Correct: 144721, incorrect 6, TP 330, FP 0, TN 144391, FN6, Prec. 100.0, Rec. 98.21, Spec. 100.0, Acc. 100.0%
Found 380 sibs and 144393 nonsibs, weights: 0.9974 and 0.0026, #weights: 144773
Beverly algo: Not deciding on 388 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.08520748226387029, f1: 0.019691311274189757
Correct: 107236, incorrect 37537, TP 377, FP 37534, TN 106859, FN3, Prec. 0.99, Rec. 99.21, Spec. 74.01, Acc. 74.07%
## GROUP: servers
Generated 552 non-sibling candidates from 24 siblings.
Our algo: Not deciding on 2 pairs for unknown/error reasons.
Found 22 sibs and 552 nonsibs, weights: 0.9617 and 0.0383, #weights: 574
Our algo stats: (2) undecided, mcc: 0.9268052972270169, f1: 0.9268292682926829
Correct: 571, incorrect 3, TP 19, FP 0, TN 552, FN3, Prec. 100.0, Rec. 86.36, Spec. 100.0, Acc. 99.48%
Found 24 sibs and 552 nonsibs, weights: 0.9583 and 0.0417, #weights: 576
Beverly algo: Not deciding on 0 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.13380139498238694, f1: 0.1111111111111111
Correct: 192, incorrect 384, TP 24, FP 384, TN 168, FN0, Prec. 5.88, Rec. 100.0, Spec. 30.43, Acc. 33.33%
Round 0
Generated 307470 non-sibling candidates from 555 siblings.
Generated 4160 non-sibling candidates from 65 siblings.
Correct: 162646, incorrect 2, TP 0, FP 0, TN 162646, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 561 rows with error results and 162648 rows with NaNs (typically hz different) from a     total of 308025 entries, resulting in 145377 entries.
Correct: 2329, incorrect 0, TP 0, FP 0, TN 2329, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 1 rows with error results and 2329 rows with NaNs (typically hz different) from a     total of 4225 entries, resulting in 1896 entries.
Found 553 sibs and 144824 nonsibs, weights: 0.9962 and 0.0038, #weights: 145377
Found 65 sibs and 1831 nonsibs, weights: 0.9657 and 0.0343, #weights: 1896
Correct: 145375, incorrect 2, TP 553, FP 2, TN 144822, FN0, Prec. 99.64, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1896, incorrect 0, TP 65, FP 0, TN 1831, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 1
Generated 308580 non-sibling candidates from 556 siblings.
Generated 4032 non-sibling candidates from 64 siblings.
Correct: 163936, incorrect 2, TP 0, FP 0, TN 163936, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 564 rows with error results and 163938 rows with NaNs (typically hz different) from a     total of 309136 entries, resulting in 145198 entries.
Correct: 2191, incorrect 0, TP 0, FP 0, TN 2191, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 1 rows with error results and 2191 rows with NaNs (typically hz different) from a     total of 4096 entries, resulting in 1905 entries.
Found 554 sibs and 144644 nonsibs, weights: 0.9962 and 0.0038, #weights: 145198
Found 64 sibs and 1841 nonsibs, weights: 0.9664 and 0.0336, #weights: 1905
Correct: 145194, incorrect 4, TP 554, FP 4, TN 144640, FN0, Prec. 99.28, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1905, incorrect 0, TP 64, FP 0, TN 1841, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 2
Generated 309692 non-sibling candidates from 557 siblings.
Generated 3906 non-sibling candidates from 63 siblings.
Correct: 164780, incorrect 2, TP 0, FP 0, TN 164780, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 568 rows with error results and 164782 rows with NaNs (typically hz different) from a     total of 310249 entries, resulting in 145467 entries.
Correct: 2094, incorrect 0, TP 0, FP 0, TN 2094, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 2094 rows with NaNs (typically hz different) from a     total of 3969 entries, resulting in 1875 entries.
Found 555 sibs and 144912 nonsibs, weights: 0.9962 and 0.0038, #weights: 145467
Found 63 sibs and 1812 nonsibs, weights: 0.9664 and 0.0336, #weights: 1875
Correct: 145463, incorrect 4, TP 555, FP 4, TN 144908, FN0, Prec. 99.28, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1875, incorrect 0, TP 63, FP 0, TN 1812, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 3
Generated 310806 non-sibling candidates from 558 siblings.
Generated 3782 non-sibling candidates from 62 siblings.
Correct: 164464, incorrect 2, TP 0, FP 0, TN 164464, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 570 rows with error results and 164466 rows with NaNs (typically hz different) from a     total of 311364 entries, resulting in 146898 entries.
Correct: 2098, incorrect 0, TP 0, FP 0, TN 2098, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 2098 rows with NaNs (typically hz different) from a     total of 3844 entries, resulting in 1746 entries.
Found 556 sibs and 146342 nonsibs, weights: 0.9962 and 0.0038, #weights: 146898
Found 62 sibs and 1684 nonsibs, weights: 0.9645 and 0.0355, #weights: 1746
Correct: 146894, incorrect 4, TP 556, FP 4, TN 146338, FN0, Prec. 99.29, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1746, incorrect 0, TP 62, FP 0, TN 1684, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 4
Generated 311922 non-sibling candidates from 559 siblings.
Generated 3660 non-sibling candidates from 61 siblings.
Correct: 165570, incorrect 2, TP 0, FP 0, TN 165570, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 571 rows with error results and 165572 rows with NaNs (typically hz different) from a     total of 312481 entries, resulting in 146909 entries.
Correct: 2000, incorrect 0, TP 0, FP 0, TN 2000, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 2000 rows with NaNs (typically hz different) from a     total of 3721 entries, resulting in 1721 entries.
Found 557 sibs and 146352 nonsibs, weights: 0.9962 and 0.0038, #weights: 146909
Found 61 sibs and 1660 nonsibs, weights: 0.9646 and 0.0354, #weights: 1721
Correct: 146907, incorrect 2, TP 557, FP 2, TN 146350, FN0, Prec. 99.64, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1721, incorrect 0, TP 61, FP 0, TN 1660, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 5
Generated 311922 non-sibling candidates from 559 siblings.
Generated 3660 non-sibling candidates from 61 siblings.
Correct: 165624, incorrect 2, TP 0, FP 0, TN 165624, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 570 rows with error results and 165626 rows with NaNs (typically hz different) from a     total of 312481 entries, resulting in 146855 entries.
Correct: 1994, incorrect 0, TP 0, FP 0, TN 1994, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 1994 rows with NaNs (typically hz different) from a     total of 3721 entries, resulting in 1727 entries.
Found 557 sibs and 146298 nonsibs, weights: 0.9962 and 0.0038, #weights: 146855
Found 61 sibs and 1666 nonsibs, weights: 0.9647 and 0.0353, #weights: 1727
Correct: 146851, incorrect 4, TP 557, FP 4, TN 146294, FN0, Prec. 99.29, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1727, incorrect 0, TP 61, FP 0, TN 1666, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 6
Generated 311922 non-sibling candidates from 559 siblings.
Generated 3660 non-sibling candidates from 61 siblings.
Correct: 165134, incorrect 2, TP 0, FP 0, TN 165134, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 567 rows with error results and 165136 rows with NaNs (typically hz different) from a     total of 312481 entries, resulting in 147345 entries.
Correct: 2046, incorrect 0, TP 0, FP 0, TN 2046, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 2046 rows with NaNs (typically hz different) from a     total of 3721 entries, resulting in 1675 entries.
Found 557 sibs and 146788 nonsibs, weights: 0.9962 and 0.0038, #weights: 147345
Found 61 sibs and 1614 nonsibs, weights: 0.9636 and 0.0364, #weights: 1675
Correct: 147341, incorrect 4, TP 557, FP 4, TN 146784, FN0, Prec. 99.29, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1675, incorrect 0, TP 61, FP 0, TN 1614, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 7
Generated 311922 non-sibling candidates from 559 siblings.
Generated 3660 non-sibling candidates from 61 siblings.
Correct: 166590, incorrect 1, TP 0, FP 0, TN 166590, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 569 rows with error results and 166591 rows with NaNs (typically hz different) from a     total of 312481 entries, resulting in 145890 entries.
Correct: 1889, incorrect 1, TP 0, FP 0, TN 1889, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 99.95%
Removing 0 rows with error results and 1890 rows with NaNs (typically hz different) from a     total of 3721 entries, resulting in 1831 entries.
Found 558 sibs and 145332 nonsibs, weights: 0.9962 and 0.0038, #weights: 145890
Found 60 sibs and 1771 nonsibs, weights: 0.9672 and 0.0328, #weights: 1831
Correct: 145888, incorrect 2, TP 558, FP 2, TN 145330, FN0, Prec. 99.64, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1831, incorrect 0, TP 60, FP 0, TN 1771, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 8
Generated 311922 non-sibling candidates from 559 siblings.
Generated 3660 non-sibling candidates from 61 siblings.
Correct: 166721, incorrect 2, TP 0, FP 0, TN 166721, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 569 rows with error results and 166723 rows with NaNs (typically hz different) from a     total of 312481 entries, resulting in 145758 entries.
Correct: 1860, incorrect 0, TP 0, FP 0, TN 1860, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 1860 rows with NaNs (typically hz different) from a     total of 3721 entries, resulting in 1861 entries.
Found 557 sibs and 145201 nonsibs, weights: 0.9962 and 0.0038, #weights: 145758
Found 61 sibs and 1800 nonsibs, weights: 0.9672 and 0.0328, #weights: 1861
Correct: 145756, incorrect 2, TP 557, FP 2, TN 145199, FN0, Prec. 99.64, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1861, incorrect 0, TP 61, FP 0, TN 1800, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 9
Generated 311922 non-sibling candidates from 559 siblings.
Generated 3660 non-sibling candidates from 61 siblings.
Correct: 167071, incorrect 1, TP 0, FP 0, TN 167071, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 10 rows with error results and 167072 rows with NaNs (typically hz different) from a     total of 312481 entries, resulting in 145409 entries.
Correct: 1795, incorrect 1, TP 0, FP 0, TN 1795, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 99.94%
Removing 61 rows with error results and 1796 rows with NaNs (typically hz different) from a     total of 3721 entries, resulting in 1925 entries.
Found 558 sibs and 144851 nonsibs, weights: 0.9962 and 0.0038, #weights: 145409
Found 60 sibs and 1865 nonsibs, weights: 0.9688 and 0.0312, #weights: 1925
Correct: 145405, incorrect 4, TP 558, FP 4, TN 144847, FN0, Prec. 99.29, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1925, incorrect 0, TP 60, FP 0, TN 1865, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
############# Round 6 ##############
Loading from filenames ../../../gt6/hosts.csvcapture.pcap.ts.siblingresult.csv and ../../../gt6/hosts.csv__nonsiblings_seed1_n679capture.pcap.ts.siblingresult.csv
Read 620 siblings and 408034 non-siblings from files.
Our algo: Not deciding on 1357 pairs for unknown/error reasons.
Found 563 sibs and 406734 nonsibs, weights: 0.9986 and 0.0014, #weights: 407297
Our algo stats: (1357) undecided, mcc: 0.9829842551468645, f1: 0.9829596412556053
Correct: 407278, incorrect 19, TP 548, FP 4, TN 406730, FN15, Prec. 99.28, Rec. 97.34, Spec. 100.0, Acc. 100.0%
Found 619 sibs and 406734 nonsibs, weights: 0.9985 and 0.0015, #weights: 407353
Beverly algo: Not deciding on 1301 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.08067887540424964, f1: 0.015986090025562175
Correct: 331518, incorrect 75835, TP 616, FP 75832, TN 330902, FN3, Prec. 0.81, Rec. 99.52, Spec. 81.36, Acc. 81.38%
## GROUP: RAv1
Generated 132 non-sibling candidates from 12 siblings.
Our algo: Not deciding on 11 pairs for unknown/error reasons.
Found 1 sibs and 132 nonsibs, weights: 0.9925 and 0.0075, #weights: 133
Our algo stats: (11) undecided, mcc: 1.0, f1: 1.0
Correct: 133, incorrect 0, TP 1, FP 0, TN 132, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Found 12 sibs and 132 nonsibs, weights: 0.9167 and 0.0833, #weights: 144
Beverly algo: Not deciding on 0 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.0, f1: 0.15384615384615385
Correct: 12, incorrect 132, TP 12, FP 132, TN 0, FN0, Prec. 8.33, Rec. 100.0, Spec. 0.0, Acc. 8.33%
## GROUP: RAv2
Generated 41006 non-sibling candidates from 203 siblings.
Our algo: Not deciding on 2 pairs for unknown/error reasons.
Found 203 sibs and 41004 nonsibs, weights: 0.9951 and 0.0049, #weights: 41207
Our algo stats: (2) undecided, mcc: 0.9724481217690122, f1: 0.9724310776942356
Correct: 41196, incorrect 11, TP 194, FP 2, TN 41002, FN9, Prec. 98.98, Rec. 95.57, Spec. 100.0, Acc. 99.97%
Found 203 sibs and 41004 nonsibs, weights: 0.9951 and 0.0049, #weights: 41207
Beverly algo: Not deciding on 2 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.0546479584463799, f1: 0.015748031496062992
Correct: 15957, incorrect 25250, TP 202, FP 25249, TN 15755, FN1, Prec. 0.79, Rec. 99.51, Spec. 38.42, Acc. 38.72%
## GROUP: nlnog
Generated 144780 non-sibling candidates from 381 siblings.
Our algo: Not deciding on 824 pairs for unknown/error reasons.
Found 337 sibs and 144000 nonsibs, weights: 0.9977 and 0.0023, #weights: 144337
Our algo stats: (824) undecided, mcc: 0.9925543193161604, f1: 0.9925705794947994
Correct: 144332, incorrect 5, TP 334, FP 2, TN 143998, FN3, Prec. 99.4, Rec. 99.11, Spec. 100.0, Acc. 100.0%
Found 380 sibs and 144000 nonsibs, weights: 0.9974 and 0.0026, #weights: 144380
Beverly algo: Not deciding on 781 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.08568368231929237, f1: 0.01981443623211197
Correct: 106982, incorrect 37398, TP 378, FP 37396, TN 106604, FN2, Prec. 1.0, Rec. 99.47, Spec. 74.03, Acc. 74.1%
## GROUP: servers
Generated 552 non-sibling candidates from 24 siblings.
Our algo: Not deciding on 2 pairs for unknown/error reasons.
Found 22 sibs and 552 nonsibs, weights: 0.9617 and 0.0383, #weights: 574
Our algo stats: (2) undecided, mcc: 0.9268052972270169, f1: 0.9268292682926829
Correct: 571, incorrect 3, TP 19, FP 0, TN 552, FN3, Prec. 100.0, Rec. 86.36, Spec. 100.0, Acc. 99.48%
Found 24 sibs and 552 nonsibs, weights: 0.9583 and 0.0417, #weights: 576
Beverly algo: Not deciding on 0 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.13380139498238694, f1: 0.1111111111111111
Correct: 192, incorrect 384, TP 24, FP 384, TN 168, FN0, Prec. 5.88, Rec. 100.0, Spec. 30.43, Acc. 33.33%
Round 0
Generated 307470 non-sibling candidates from 555 siblings.
Generated 4160 non-sibling candidates from 65 siblings.
Correct: 163734, incorrect 2, TP 0, FP 0, TN 163734, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1124 rows with error results and 163736 rows with NaNs (typically hz different) from a     total of 308025 entries, resulting in 144289 entries.
Correct: 2254, incorrect 0, TP 0, FP 0, TN 2254, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 2254 rows with NaNs (typically hz different) from a     total of 4225 entries, resulting in 1971 entries.
Found 553 sibs and 143736 nonsibs, weights: 0.9962 and 0.0038, #weights: 144289
Found 65 sibs and 1906 nonsibs, weights: 0.967 and 0.033, #weights: 1971
Correct: 144281, incorrect 8, TP 553, FP 8, TN 143728, FN0, Prec. 98.57, Rec. 100.0, Spec. 99.99, Acc. 99.99%
Correct: 1971, incorrect 0, TP 65, FP 0, TN 1906, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 1
Generated 308580 non-sibling candidates from 556 siblings.
Generated 4032 non-sibling candidates from 64 siblings.
Correct: 165404, incorrect 2, TP 0, FP 0, TN 165404, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1132 rows with error results and 165406 rows with NaNs (typically hz different) from a     total of 309136 entries, resulting in 143730 entries.
Correct: 2038, incorrect 0, TP 0, FP 0, TN 2038, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 2038 rows with NaNs (typically hz different) from a     total of 4096 entries, resulting in 2058 entries.
Found 554 sibs and 143176 nonsibs, weights: 0.9961 and 0.0039, #weights: 143730
Found 64 sibs and 1994 nonsibs, weights: 0.9689 and 0.0311, #weights: 2058
Correct: 143725, incorrect 5, TP 554, FP 5, TN 143171, FN0, Prec. 99.11, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 2058, incorrect 0, TP 64, FP 0, TN 1994, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 2
Generated 309692 non-sibling candidates from 557 siblings.
Generated 3906 non-sibling candidates from 63 siblings.
Correct: 163998, incorrect 1, TP 0, FP 0, TN 163998, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 10 rows with error results and 163999 rows with NaNs (typically hz different) from a     total of 310249 entries, resulting in 146250 entries.
Correct: 2218, incorrect 1, TP 0, FP 0, TN 2218, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 99.95%
Removing 125 rows with error results and 2219 rows with NaNs (typically hz different) from a     total of 3969 entries, resulting in 1750 entries.
Found 556 sibs and 145694 nonsibs, weights: 0.9962 and 0.0038, #weights: 146250
Found 62 sibs and 1688 nonsibs, weights: 0.9646 and 0.0354, #weights: 1750
Correct: 146246, incorrect 4, TP 556, FP 4, TN 145690, FN0, Prec. 99.29, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1749, incorrect 1, TP 61, FP 0, TN 1688, FN1, Prec. 100.0, Rec. 98.39, Spec. 100.0, Acc. 99.94%
Round 3
Generated 310806 non-sibling candidates from 558 siblings.
Generated 3782 non-sibling candidates from 62 siblings.
Correct: 165623, incorrect 2, TP 0, FP 0, TN 165623, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1135 rows with error results and 165625 rows with NaNs (typically hz different) from a     total of 311364 entries, resulting in 145739 entries.
Correct: 2002, incorrect 0, TP 0, FP 0, TN 2002, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 2002 rows with NaNs (typically hz different) from a     total of 3844 entries, resulting in 1842 entries.
Found 556 sibs and 145183 nonsibs, weights: 0.9962 and 0.0038, #weights: 145739
Found 62 sibs and 1780 nonsibs, weights: 0.9663 and 0.0337, #weights: 1842
Correct: 145733, incorrect 6, TP 556, FP 6, TN 145177, FN0, Prec. 98.93, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1842, incorrect 0, TP 62, FP 0, TN 1780, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 4
Generated 311922 non-sibling candidates from 559 siblings.
Generated 3660 non-sibling candidates from 61 siblings.
Correct: 166817, incorrect 2, TP 0, FP 0, TN 166817, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1138 rows with error results and 166819 rows with NaNs (typically hz different) from a     total of 312481 entries, resulting in 145662 entries.
Correct: 1894, incorrect 0, TP 0, FP 0, TN 1894, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 1894 rows with NaNs (typically hz different) from a     total of 3721 entries, resulting in 1827 entries.
Found 557 sibs and 145105 nonsibs, weights: 0.9962 and 0.0038, #weights: 145662
Found 61 sibs and 1766 nonsibs, weights: 0.9666 and 0.0334, #weights: 1827
Correct: 145652, incorrect 10, TP 557, FP 10, TN 145095, FN0, Prec. 98.24, Rec. 100.0, Spec. 99.99, Acc. 99.99%
Correct: 1827, incorrect 0, TP 61, FP 0, TN 1766, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 5
Generated 311922 non-sibling candidates from 559 siblings.
Generated 3660 non-sibling candidates from 61 siblings.
Correct: 165646, incorrect 2, TP 0, FP 0, TN 165646, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1135 rows with error results and 165648 rows with NaNs (typically hz different) from a     total of 312481 entries, resulting in 146833 entries.
Correct: 2032, incorrect 0, TP 0, FP 0, TN 2032, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 2032 rows with NaNs (typically hz different) from a     total of 3721 entries, resulting in 1689 entries.
Found 557 sibs and 146276 nonsibs, weights: 0.9962 and 0.0038, #weights: 146833
Found 61 sibs and 1628 nonsibs, weights: 0.9639 and 0.0361, #weights: 1689
Correct: 146827, incorrect 6, TP 557, FP 6, TN 146270, FN0, Prec. 98.93, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1689, incorrect 0, TP 61, FP 0, TN 1628, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 6
Generated 311922 non-sibling candidates from 559 siblings.
Generated 3660 non-sibling candidates from 61 siblings.
Correct: 166544, incorrect 2, TP 0, FP 0, TN 166544, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1138 rows with error results and 166546 rows with NaNs (typically hz different) from a     total of 312481 entries, resulting in 145935 entries.
Correct: 1938, incorrect 0, TP 0, FP 0, TN 1938, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 1938 rows with NaNs (typically hz different) from a     total of 3721 entries, resulting in 1783 entries.
Found 557 sibs and 145378 nonsibs, weights: 0.9962 and 0.0038, #weights: 145935
Found 61 sibs and 1722 nonsibs, weights: 0.9658 and 0.0342, #weights: 1783
Correct: 145929, incorrect 6, TP 557, FP 6, TN 145372, FN0, Prec. 98.93, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1783, incorrect 0, TP 61, FP 0, TN 1722, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 7
Generated 311922 non-sibling candidates from 559 siblings.
Generated 3660 non-sibling candidates from 61 siblings.
Correct: 165966, incorrect 2, TP 0, FP 0, TN 165966, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1136 rows with error results and 165968 rows with NaNs (typically hz different) from a     total of 312481 entries, resulting in 146513 entries.
Correct: 2002, incorrect 0, TP 0, FP 0, TN 2002, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 2002 rows with NaNs (typically hz different) from a     total of 3721 entries, resulting in 1719 entries.
Found 557 sibs and 145956 nonsibs, weights: 0.9962 and 0.0038, #weights: 146513
Found 61 sibs and 1658 nonsibs, weights: 0.9645 and 0.0355, #weights: 1719
Correct: 146503, incorrect 10, TP 557, FP 10, TN 145946, FN0, Prec. 98.24, Rec. 100.0, Spec. 99.99, Acc. 99.99%
Correct: 1719, incorrect 0, TP 61, FP 0, TN 1658, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 8
Generated 311922 non-sibling candidates from 559 siblings.
Generated 3660 non-sibling candidates from 61 siblings.
Correct: 166507, incorrect 2, TP 0, FP 0, TN 166507, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1134 rows with error results and 166509 rows with NaNs (typically hz different) from a     total of 312481 entries, resulting in 145972 entries.
Correct: 1940, incorrect 0, TP 0, FP 0, TN 1940, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 1940 rows with NaNs (typically hz different) from a     total of 3721 entries, resulting in 1781 entries.
Found 557 sibs and 145415 nonsibs, weights: 0.9962 and 0.0038, #weights: 145972
Found 61 sibs and 1720 nonsibs, weights: 0.9657 and 0.0343, #weights: 1781
Correct: 145968, incorrect 4, TP 557, FP 4, TN 145411, FN0, Prec. 99.29, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1781, incorrect 0, TP 61, FP 0, TN 1720, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 9
Generated 311922 non-sibling candidates from 559 siblings.
Generated 3660 non-sibling candidates from 61 siblings.
Correct: 165568, incorrect 1, TP 0, FP 0, TN 165568, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1139 rows with error results and 165569 rows with NaNs (typically hz different) from a     total of 312481 entries, resulting in 146912 entries.
Correct: 2041, incorrect 1, TP 0, FP 0, TN 2041, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 99.95%
Removing 0 rows with error results and 2042 rows with NaNs (typically hz different) from a     total of 3721 entries, resulting in 1679 entries.
Found 558 sibs and 146354 nonsibs, weights: 0.9962 and 0.0038, #weights: 146912
Found 60 sibs and 1619 nonsibs, weights: 0.9643 and 0.0357, #weights: 1679
Correct: 146906, incorrect 6, TP 558, FP 6, TN 146348, FN0, Prec. 98.94, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1679, incorrect 0, TP 60, FP 0, TN 1619, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
############# Round 7 ##############
Loading from filenames ../../../gt7/hosts.csvcapture.pcap.ts.siblingresult.csv and ../../../gt7/hosts.csv__nonsiblings_seed1_n681capture.pcap.ts.siblingresult.csv
Read 618 siblings and 406792 non-siblings from files.
Our algo: Not deciding on 1417 pairs for unknown/error reasons.
Found 515 sibs and 405478 nonsibs, weights: 0.9987 and 0.0013, #weights: 405993
Our algo stats: (1417) undecided, mcc: 0.9763999694141065, f1: 0.9762376237623762
Correct: 405969, incorrect 24, TP 493, FP 2, TN 405476, FN22, Prec. 99.6, Rec. 95.73, Spec. 100.0, Acc. 99.99%
Found 617 sibs and 405478 nonsibs, weights: 0.9985 and 0.0015, #weights: 406095
Beverly algo: Not deciding on 1315 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.08111991226804503, f1: 0.0161526198600809
Correct: 331420, incorrect 74675, TP 613, FP 74671, TN 330807, FN4, Prec. 0.81, Rec. 99.35, Spec. 81.58, Acc. 81.61%
## GROUP: RAv1
Generated 132 non-sibling candidates from 12 siblings.
Our algo: Not deciding on 2 pairs for unknown/error reasons.
Found 12 sibs and 130 nonsibs, weights: 0.9155 and 0.0845, #weights: 142
Our algo stats: (2) undecided, mcc: 1.0, f1: 1.0
Correct: 142, incorrect 0, TP 12, FP 0, TN 130, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Found 12 sibs and 130 nonsibs, weights: 0.9155 and 0.0845, #weights: 142
Beverly algo: Not deciding on 2 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.0, f1: 0.15584415584415587
Correct: 12, incorrect 130, TP 12, FP 130, TN 0, FN0, Prec. 8.45, Rec. 100.0, Spec. 0.0, Acc. 8.45%
## GROUP: RAv2
Generated 41006 non-sibling candidates from 203 siblings.
Our algo: Not deciding on 5 pairs for unknown/error reasons.
Found 203 sibs and 41001 nonsibs, weights: 0.9951 and 0.0049, #weights: 41204
Our algo stats: (5) undecided, mcc: 0.9875963883026432, f1: 0.9876543209876543
Correct: 41199, incorrect 5, TP 200, FP 2, TN 40999, FN3, Prec. 99.01, Rec. 98.52, Spec. 100.0, Acc. 99.99%
Found 203 sibs and 41001 nonsibs, weights: 0.9951 and 0.0049, #weights: 41204
Beverly algo: Not deciding on 5 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.05678349344226616, f1: 0.01621708413615928
Correct: 16696, incorrect 24508, TP 202, FP 24507, TN 16494, FN1, Prec. 0.82, Rec. 99.51, Spec. 40.23, Acc. 40.52%
## GROUP: nlnog
Generated 141000 non-sibling candidates from 376 siblings.
Our algo: Not deciding on 881 pairs for unknown/error reasons.
Found 274 sibs and 140221 nonsibs, weights: 0.998 and 0.002, #weights: 140495
Our algo stats: (881) undecided, mcc: 0.9721904996026752, f1: 0.9718574108818011
Correct: 140480, incorrect 15, TP 259, FP 0, TN 140221, FN15, Prec. 100.0, Rec. 94.53, Spec. 100.0, Acc. 99.99%
Found 375 sibs and 140221 nonsibs, weights: 0.9973 and 0.0027, #weights: 140596
Beverly algo: Not deciding on 780 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.0858877314624804, f1: 0.020005915727768962
Correct: 104151, incorrect 36445, TP 372, FP 36442, TN 103779, FN3, Prec. 1.01, Rec. 99.2, Spec. 74.01, Acc. 74.08%
## GROUP: servers
Generated 702 non-sibling candidates from 27 siblings.
Our algo: Not deciding on 2 pairs for unknown/error reasons.
Found 26 sibs and 701 nonsibs, weights: 0.9642 and 0.0358, #weights: 727
Our algo stats: (2) undecided, mcc: 0.9172529494462625, f1: 0.9166666666666666
Correct: 723, incorrect 4, TP 22, FP 0, TN 701, FN4, Prec. 100.0, Rec. 84.62, Spec. 100.0, Acc. 99.45%
Found 27 sibs and 701 nonsibs, weights: 0.9629 and 0.0371, #weights: 728
Beverly algo: Not deciding on 1 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided, mcc: 0.17284009833634095, f1: 0.12356979405034325
Correct: 345, incorrect 383, TP 27, FP 383, TN 318, FN0, Prec. 6.59, Rec. 100.0, Spec. 45.36, Acc. 47.39%
Round 0
Generated 306362 non-sibling candidates from 554 siblings.
Generated 4032 non-sibling candidates from 64 siblings.
Correct: 163759, incorrect 1, TP 0, FP 0, TN 163759, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1134 rows with error results and 163760 rows with NaNs (typically hz different) from a     total of 306916 entries, resulting in 143156 entries.
Correct: 2342, incorrect 1, TP 0, FP 0, TN 2342, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 99.96%
Removing 2 rows with error results and 2343 rows with NaNs (typically hz different) from a     total of 4096 entries, resulting in 1753 entries.
Found 553 sibs and 142603 nonsibs, weights: 0.9961 and 0.0039, #weights: 143156
Found 63 sibs and 1690 nonsibs, weights: 0.9641 and 0.0359, #weights: 1753
Correct: 143156, incorrect 0, TP 553, FP 0, TN 142603, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1753, incorrect 0, TP 63, FP 0, TN 1690, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 1
Generated 306362 non-sibling candidates from 554 siblings.
Generated 4032 non-sibling candidates from 64 siblings.
Correct: 165010, incorrect 2, TP 0, FP 0, TN 165010, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1134 rows with error results and 165012 rows with NaNs (typically hz different) from a     total of 306916 entries, resulting in 141904 entries.
Correct: 2206, incorrect 0, TP 0, FP 0, TN 2206, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 2206 rows with NaNs (typically hz different) from a     total of 4096 entries, resulting in 1890 entries.
Found 552 sibs and 141352 nonsibs, weights: 0.9961 and 0.0039, #weights: 141904
Found 64 sibs and 1826 nonsibs, weights: 0.9661 and 0.0339, #weights: 1890
Correct: 141904, incorrect 0, TP 552, FP 0, TN 141352, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1890, incorrect 0, TP 64, FP 0, TN 1826, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 2
Generated 307470 non-sibling candidates from 555 siblings.
Generated 3906 non-sibling candidates from 63 siblings.
Correct: 165828, incorrect 2, TP 0, FP 0, TN 165828, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1143 rows with error results and 165830 rows with NaNs (typically hz different) from a     total of 308025 entries, resulting in 142195 entries.
Correct: 2098, incorrect 0, TP 0, FP 0, TN 2098, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 2098 rows with NaNs (typically hz different) from a     total of 3969 entries, resulting in 1871 entries.
Found 553 sibs and 141642 nonsibs, weights: 0.9961 and 0.0039, #weights: 142195
Found 63 sibs and 1808 nonsibs, weights: 0.9663 and 0.0337, #weights: 1871
Correct: 142195, incorrect 0, TP 553, FP 0, TN 141642, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1871, incorrect 0, TP 63, FP 0, TN 1808, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 3
Generated 308580 non-sibling candidates from 556 siblings.
Generated 3782 non-sibling candidates from 62 siblings.
Correct: 165299, incorrect 1, TP 0, FP 0, TN 165299, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 25 rows with error results and 165300 rows with NaNs (typically hz different) from a     total of 309136 entries, resulting in 143836 entries.
Correct: 2154, incorrect 1, TP 0, FP 0, TN 2154, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 99.95%
Removing 123 rows with error results and 2155 rows with NaNs (typically hz different) from a     total of 3844 entries, resulting in 1689 entries.
Found 555 sibs and 143281 nonsibs, weights: 0.9961 and 0.0039, #weights: 143836
Found 61 sibs and 1628 nonsibs, weights: 0.9639 and 0.0361, #weights: 1689
Correct: 143836, incorrect 0, TP 555, FP 0, TN 143281, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1689, incorrect 0, TP 61, FP 0, TN 1628, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 4
Generated 308580 non-sibling candidates from 556 siblings.
Generated 3782 non-sibling candidates from 62 siblings.
Correct: 165037, incorrect 2, TP 0, FP 0, TN 165037, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1142 rows with error results and 165039 rows with NaNs (typically hz different) from a     total of 309136 entries, resulting in 144097 entries.
Correct: 2196, incorrect 0, TP 0, FP 0, TN 2196, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 2196 rows with NaNs (typically hz different) from a     total of 3844 entries, resulting in 1648 entries.
Found 554 sibs and 143543 nonsibs, weights: 0.9962 and 0.0038, #weights: 144097
Found 62 sibs and 1586 nonsibs, weights: 0.9624 and 0.0376, #weights: 1648
Correct: 144097, incorrect 0, TP 554, FP 0, TN 143543, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1648, incorrect 0, TP 62, FP 0, TN 1586, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 5
Generated 308580 non-sibling candidates from 556 siblings.
Generated 3782 non-sibling candidates from 62 siblings.
Correct: 167231, incorrect 2, TP 0, FP 0, TN 167231, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1144 rows with error results and 167233 rows with NaNs (typically hz different) from a     total of 309136 entries, resulting in 141903 entries.
Correct: 1952, incorrect 0, TP 0, FP 0, TN 1952, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 1952 rows with NaNs (typically hz different) from a     total of 3844 entries, resulting in 1892 entries.
Found 554 sibs and 141349 nonsibs, weights: 0.9961 and 0.0039, #weights: 141903
Found 62 sibs and 1830 nonsibs, weights: 0.9672 and 0.0328, #weights: 1892
Correct: 141903, incorrect 0, TP 554, FP 0, TN 141349, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1891, incorrect 1, TP 62, FP 1, TN 1829, FN0, Prec. 98.41, Rec. 100.0, Spec. 99.95, Acc. 99.95%
Round 6
Generated 309692 non-sibling candidates from 557 siblings.
Generated 3660 non-sibling candidates from 61 siblings.
Correct: 168076, incorrect 2, TP 0, FP 0, TN 168076, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1144 rows with error results and 168078 rows with NaNs (typically hz different) from a     total of 310249 entries, resulting in 142171 entries.
Correct: 1861, incorrect 0, TP 0, FP 0, TN 1861, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 1 rows with error results and 1861 rows with NaNs (typically hz different) from a     total of 3721 entries, resulting in 1860 entries.
Found 555 sibs and 141616 nonsibs, weights: 0.9961 and 0.0039, #weights: 142171
Found 61 sibs and 1799 nonsibs, weights: 0.9672 and 0.0328, #weights: 1860
Correct: 142171, incorrect 0, TP 555, FP 0, TN 141616, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1860, incorrect 0, TP 61, FP 0, TN 1799, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 7
Generated 310806 non-sibling candidates from 558 siblings.
Generated 3540 non-sibling candidates from 60 siblings.
Correct: 168361, incorrect 2, TP 0, FP 0, TN 168361, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1143 rows with error results and 168363 rows with NaNs (typically hz different) from a     total of 311364 entries, resulting in 143001 entries.
Correct: 1835, incorrect 0, TP 0, FP 0, TN 1835, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 1 rows with error results and 1835 rows with NaNs (typically hz different) from a     total of 3600 entries, resulting in 1765 entries.
Found 556 sibs and 142445 nonsibs, weights: 0.9961 and 0.0039, #weights: 143001
Found 60 sibs and 1705 nonsibs, weights: 0.966 and 0.034, #weights: 1765
Correct: 143001, incorrect 0, TP 556, FP 0, TN 142445, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1765, incorrect 0, TP 60, FP 0, TN 1705, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 8
Generated 310806 non-sibling candidates from 558 siblings.
Generated 3540 non-sibling candidates from 60 siblings.
Correct: 167940, incorrect 2, TP 0, FP 0, TN 167940, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1148 rows with error results and 167942 rows with NaNs (typically hz different) from a     total of 311364 entries, resulting in 143422 entries.
Correct: 1882, incorrect 0, TP 0, FP 0, TN 1882, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 1882 rows with NaNs (typically hz different) from a     total of 3600 entries, resulting in 1718 entries.
Found 556 sibs and 142866 nonsibs, weights: 0.9961 and 0.0039, #weights: 143422
Found 60 sibs and 1658 nonsibs, weights: 0.9651 and 0.0349, #weights: 1718
Correct: 143422, incorrect 0, TP 556, FP 0, TN 142866, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1718, incorrect 0, TP 60, FP 0, TN 1658, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 9
Generated 310806 non-sibling candidates from 558 siblings.
Generated 3540 non-sibling candidates from 60 siblings.
Correct: 167115, incorrect 2, TP 0, FP 0, TN 167115, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 1146 rows with error results and 167117 rows with NaNs (typically hz different) from a     total of 311364 entries, resulting in 144247 entries.
Correct: 1970, incorrect 0, TP 0, FP 0, TN 1970, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 1970 rows with NaNs (typically hz different) from a     total of 3600 entries, resulting in 1630 entries.
Found 556 sibs and 143691 nonsibs, weights: 0.9961 and 0.0039, #weights: 144247
Found 60 sibs and 1570 nonsibs, weights: 0.9632 and 0.0368, #weights: 1630
Correct: 144247, incorrect 0, TP 556, FP 0, TN 143691, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 1630, incorrect 0, TP 60, FP 0, TN 1570, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%

High-Level Stats for Beverly and Our Algorithm


In [871]:
# cleanse rows with all zeros (something went wrong in those)
hlbstats = hlbstats[~np.all(hlbstats == 0, axis=1)]
mean_prec = round(np.mean(hlbstats[:,0]),2)
mean_mcc = round(np.mean(hlbstats[:,4]),2)
print("High-Level Beverly stats against 2016 gt, mean across all measurements: Precision {}%, MCC {}".format(
    mean_prec, mean_mcc))
hlostats = hlostats[~np.all(hlostats == 0, axis=1)]
mean_prec = round(np.mean(hlostats[:,0]),2)
mean_mcc = round(np.mean(hlostats[:,4]),2)
print("High-Level Algo stats against 2016 gt, mean across all measurements: Precision {}%, MCC {}".format(
    mean_prec, mean_mcc))


High-Level Beverly stats against 2016 gt, mean across all measurements: Precision 0.9%, MCC 0.08
High-Level Algo stats against 2016 gt, mean across all measurements: Precision 99.68%, MCC 0.98

These numbers are for the upper part of Table III in the paper


In [872]:
for group in set(gsdb.keys()):
    x = gsdb[group]
    x = x[~np.all(x == 0, axis=1)]
    mean_prec = round(np.mean(x[:,0]),2)
    mean_mcc = round(np.mean(x[:,4]),2)
    print("Beverly group stats against 2016 gt, mean across all measurements: {}, Precision {}%, MCC {}".format(
    group, mean_prec, mean_mcc))


Beverly group stats against 2016 gt, mean across all measurements: servers, Precision 8.33%, MCC 0.17
Beverly group stats against 2016 gt, mean across all measurements: RAv2, Precision 0.79%, MCC 0.05
Beverly group stats against 2016 gt, mean across all measurements: nlnog, Precision 1.09%, MCC 0.09
Beverly group stats against 2016 gt, mean across all measurements: RAv1, Precision 8.35%, MCC 0.0

The numbers in the cell below are not used in the paper, also-eval_used_version is used


In [873]:
for group in set(gsdo.keys()):
    x = gsdo[group]
    x = x[~np.all(x == 0, axis=1)]
    mean_prec = round(np.mean(x[0:,0]),2)
    mean_mcc = round(np.mean(x[0:,4]),2)
    print("Algo group stats against 2016 gt, mean across all measurements: {}, Precision {}%, MCC {}".format(
    group, mean_prec, mean_mcc))


Algo group stats against 2016 gt, mean across all measurements: servers, Precision 100.0%, MCC 0.91
Algo group stats against 2016 gt, mean across all measurements: RAv2, Precision 99.16%, MCC 0.98
Algo group stats against 2016 gt, mean across all measurements: nlnog, Precision 99.91%, MCC 0.98
Algo group stats against 2016 gt, mean across all measurements: RAv1, Precision 100.0%, MCC 1.0

The 2 cells below compute the values for rows 5+6 of Table II


In [874]:
mp = []
mc = []
for group in set(mlstatsd_tre.keys()):
    x = mlstatsd_tre[group]
    x = x[~np.all(x == 0, axis=1)]
    mean_prec = round(np.mean(x[0:,0]),2)
    mean_mcc = round(np.mean(x[0:,4]),2)
    print("ML1 train stats against 2016 gt, mean across cross-vals: {} , Precision {}%, MCC {}".format(
    group, mean_prec, mean_mcc))
    mp.append(mean_prec)
    mc.append(mean_mcc)
    
print("ML1 train stats against 2016 gt, mean across all groups and cross-vals: Precision {}%, MCC {}".format(
    np.mean(mp), np.mean(mc)))


ML1 train stats against 2016 gt, mean across cross-vals: 4_tre , Precision 99.02%, MCC 1.0
ML1 train stats against 2016 gt, mean across cross-vals: 5_tre , Precision 99.43%, MCC 1.0
ML1 train stats against 2016 gt, mean across cross-vals: 1_tre , Precision 100.0%, MCC 1.0
ML1 train stats against 2016 gt, mean across cross-vals: 6_tre , Precision 98.85%, MCC 1.0
ML1 train stats against 2016 gt, mean across cross-vals: 7_tre , Precision 100.0%, MCC 1.0
ML1 train stats against 2016 gt, mean across cross-vals: 3_tre , Precision 98.98%, MCC 1.0
ML1 train stats against 2016 gt, mean across cross-vals: 2_tre , Precision 99.19%, MCC 1.0
ML1 train stats against 2016 gt, mean across all groups and cross-vals: Precision 99.35285714285715%, MCC 1.0

In [866]:
mp = []
mc = []
for group in set(mlstatsd_tee.keys()):
    x = mlstatsd_tee[group]
    x = x[~np.all(x == 0, axis=1)]
    mean_prec = round(np.mean(x[0:,0]),2)
    mean_mcc = round(np.mean(x[0:,4]),2)
    print("ML1 test stats against 2016 gt, mean across cross-vals: {} , Precision {}%, MCC {}".format(
    group, mean_prec, mean_mcc))
    mp.append(mean_prec)
    mc.append(mean_mcc)
    
print("ML1 test stats against 2016 gt, mean across all groups and cross-vals: Precision {}%, MCC {}".format(
    np.mean(mp), np.mean(mc)))


ML1 test stats against 2016 gt, mean across cross-vals: 3_tee , Precision 99.69%, MCC 1.0
ML1 test stats against 2016 gt, mean across cross-vals: 6_tee , Precision 100.0%, MCC 1.0
ML1 test stats against 2016 gt, mean across cross-vals: 1_tee , Precision 100.0%, MCC 1.0
ML1 test stats against 2016 gt, mean across cross-vals: 4_tee , Precision 99.68%, MCC 1.0
ML1 test stats against 2016 gt, mean across cross-vals: 5_tee , Precision 100.0%, MCC 1.0
ML1 test stats against 2016 gt, mean across cross-vals: 7_tee , Precision 99.84%, MCC 1.0
ML1 test stats against 2016 gt, mean across cross-vals: 2_tee , Precision 100.0%, MCC 1.0
ML1 test stats against 2016 gt, mean across all groups and cross-vals: Precision 99.88714285714286%, MCC 1.0

In [ ]:


In [ ]:

Investigations of Details and Individual Cases

Investigation of ground truth false negatives


In [898]:
falsecalls = pd.DataFrame()
for i in range(1,runs):
    print("############# Round {} ##############".format(i))
    sib, nonsib = get_pd_files("../../../gt{}/".format(i))
    falsecalls = falsecalls.append(sib[sib.decision.str.contains("ERROR|error|optsdiff|hz") == True])


############# Round 1 ##############
Loading from filenames ../../../gt1/hosts.csvcapture.pcap.ts.siblingresult.csv and ../../../gt1/hosts.csv__nonsiblings_seed1_n679capture.pcap.ts.siblingresult.csv
Read 279 siblings and 82026 non-siblings from files.
############# Round 2 ##############
Loading from filenames ../../../gt2/hosts.csvcapture.pcap.ts.siblingresult.csv and ../../../gt2/hosts.csv__nonsiblings_seed1_n679capture.pcap.ts.siblingresult.csv
Read 624 siblings and 410626 non-siblings from files.
############# Round 3 ##############
Loading from filenames ../../../gt3/hosts.csvcapture.pcap.ts.siblingresult.csv and ../../../gt3/hosts.csv__nonsiblings_seed1_n679capture.pcap.ts.siblingresult.csv
Read 625 siblings and 411909 non-siblings from files.
############# Round 4 ##############
Loading from filenames ../../../gt4/hosts.csvcapture.pcap.ts.siblingresult.csv and ../../../gt4/hosts.csv__nonsiblings_seed1_n679capture.pcap.ts.siblingresult.csv
Read 620 siblings and 408034 non-siblings from files.
############# Round 5 ##############
Loading from filenames ../../../gt5/hosts.csvcapture.pcap.ts.siblingresult.csv and ../../../gt5/hosts.csv__nonsiblings_seed1_n679capture.pcap.ts.siblingresult.csv
Read 620 siblings and 408034 non-siblings from files.
############# Round 6 ##############
Loading from filenames ../../../gt6/hosts.csvcapture.pcap.ts.siblingresult.csv and ../../../gt6/hosts.csv__nonsiblings_seed1_n679capture.pcap.ts.siblingresult.csv
Read 620 siblings and 408034 non-siblings from files.
############# Round 7 ##############
Loading from filenames ../../../gt7/hosts.csvcapture.pcap.ts.siblingresult.csv and ../../../gt7/hosts.csv__nonsiblings_seed1_n681capture.pcap.ts.siblingresult.csv
Read 618 siblings and 406792 non-siblings from files.
Out[898]:
ip4 ip6 hz4 hz6 hzdiff hz4r2 hz6r2 hzr2diff tcp_t_offset4 tcp_t_offset6 ... ott6_rng ott_rng_diff ott_rng_diff_rel opts4 opts6 optsdiff perc_85_val dec_bev decision label
domain
ovh04.ring.nlnog.net 192.99.153.129 2607:5300:101::599 250.0 250.0 0.0 1.000000 1.000000 4.107825e-14 2.168821e+08 2.168670e+08 ... 52.681 2.641 0.048906 MSS- MSS-SACK-TS-N-WS07- 1 2.025465 non-sibling(optsdiff) non-sibling(optsdiff) 1
ovh03.ring.nlnog.net 5.196.13.221 2001:41d0:52:600::671 250.0 250.0 0.0 1.000000 1.000000 4.440892e-16 1.130031e+09 1.129895e+09 ... 38.706 0.215 0.005539 MSS- MSS-SACK-TS-N-WS07- 1 0.227048 non-sibling(optsdiff) non-sibling(optsdiff) 1
ovh02.ring.nlnog.net 37.187.50.166 2001:41d0:52:400::53b 250.0 250.0 0.0 1.000000 1.000000 1.154632e-14 1.130830e+09 1.130722e+09 ... 2143.810 0.062 0.000029 MSS- MSS-SACK-TS-N-WS07- 1 0.532364 non-sibling(optsdiff) non-sibling(optsdiff) 1
ovh04.ring.nlnog.net 192.99.153.129 2607:5300:101::599 250.0 250.0 0.0 1.000000 1.000000 2.542411e-13 3.747511e+08 3.746817e+08 ... 1492.405 5.014 0.003365 MSS- MSS-SACK-TS-N-WS07- 1 5.104061 non-sibling(optsdiff) non-sibling(optsdiff) 1
RA_6088 5.57.17.65 2a01:5040:20:30::1 1001.0 1001.0 0.0 1.000000 1.000000 4.884981e-15 1.156965e+09 1.156945e+09 ... 11797.220 0.803 0.000068 MSS-SACK-TS-N-WS07- MSS-SACK-TS-N-WS07- 0 NaN NaN ERROR: spline calculation failed! 1
RA_6131 217.196.147.89 2a02:16a8:dc:200::1 1001.0 1001.0 0.0 1.000000 1.000000 1.776357e-15 1.067010e+08 1.066939e+08 ... 11345.467 0.672 0.000059 MSS-SACK-TS-N-WS07- MSS-SACK-TS-N-WS07- 0 NaN NaN ERROR: spline calculation failed! 1
RA_6220 217.196.33.252 2a02:310:0:2958::16 100.0 1001.0 901.0 0.999904 1.000000 NaN 1.161751e+08 3.607975e+08 ... NaN NaN NaN MSS-SACK-TS-N-WS07- MSS-SACK-TS-N-WS07- 0 NaN non-sibling (hz different) non-sibling (hz different) 1
ovh03.ring.nlnog.net 5.196.13.221 2001:41d0:52:600::671 250.0 250.0 0.0 1.000000 1.000000 1.567901e-11 1.175528e+09 1.174713e+09 ... 263.147 2.446 0.009339 MSS- MSS-SACK-TS-N-WS07- 1 0.527214 non-sibling(optsdiff) non-sibling(optsdiff) 1
ovh02.ring.nlnog.net 37.187.50.166 2001:41d0:52:400::53b 250.0 250.0 0.0 1.000000 1.000000 2.220446e-16 1.175640e+09 1.175541e+09 ... 2148.670 0.043 0.000020 MSS- MSS-SACK-TS-N-WS07- 1 0.581357 non-sibling(optsdiff) non-sibling(optsdiff) 1
tilaa01.ring.nlnog.net 46.19.36.12 2a02:2770::21a:4aff:feac:4576 29381.0 29508.0 NaN 0.128195 0.128773 NaN 1.382994e+08 1.382964e+08 ... NaN NaN NaN MSS-SACK-TS-N-WS04- MSS-SACK-TS-N-WS04- 0 NaN NaN ERROR: too small clock hertz r-squares 1
ovh04.ring.nlnog.net 192.99.153.129 2607:5300:101::599 250.0 250.0 0.0 1.000000 1.000000 8.881784e-16 4.204438e+08 4.195066e+08 ... 1965.143 0.023 0.000012 MSS- MSS-SACK-TS-N-WS07- 1 0.718889 non-sibling(optsdiff) non-sibling(optsdiff) 1
RA_6220 217.196.33.252 2a02:310:0:2958::16 100.0 1001.0 901.0 0.998920 1.000000 NaN 8.694358e+07 5.403645e+08 ... NaN NaN NaN MSS-SACK-TS-N-WS07- MSS-SACK-TS-N-WS07- 0 NaN non-sibling (hz different) non-sibling (hz different) 1
ovh03.ring.nlnog.net 5.196.13.221 2001:41d0:52:600::671 250.0 250.0 0.0 1.000000 1.000000 1.833814e-09 1.281100e+09 1.274004e+09 ... 64.741 1.645 0.025736 MSS- MSS-SACK-TS-N-WS07- 1 41.154072 non-sibling(optsdiff) non-sibling(optsdiff) 1
ovh02.ring.nlnog.net 37.187.50.166 2001:41d0:52:400::53b 250.0 250.0 0.0 1.000000 1.000000 7.549517e-15 1.274883e+09 1.274828e+09 ... 2152.142 0.069 0.000032 MSS- MSS-SACK-TS-N-WS07- 1 0.461552 non-sibling(optsdiff) non-sibling(optsdiff) 1
trueinternet01.ring.nlnog.net 203.144.167.57 2001:fb0:100:ffff:211:25ff:fe40:9468 159389.0 159706.0 NaN 0.687427 0.695245 NaN 1.010581e+08 1.010674e+08 ... NaN NaN NaN MSS-SACK-TS-N-WS11- MSS-SACK-TS-N-WS11- 0 NaN NaN ERROR: too small clock hertz r-squares 1
ovh04.ring.nlnog.net 192.99.153.129 2607:5300:101::599 250.0 250.0 0.0 1.000000 1.000000 9.727774e-13 5.188355e+08 5.188099e+08 ... 1171.543 0.535 0.000457 MSS- MSS-SACK-TS-N-WS07- 1 2.562532 non-sibling(optsdiff) non-sibling(optsdiff) 1
RA_6220 217.196.33.252 2a02:310:0:2958::16 100.0 1001.0 901.0 0.999394 1.000000 NaN 1.266629e+08 9.380604e+08 ... NaN NaN NaN MSS-SACK-TS-N-WS07- MSS-SACK-TS-N-WS07- 0 NaN non-sibling (hz different) non-sibling (hz different) 1
ovh03.ring.nlnog.net 5.196.13.221 2001:41d0:52:600::671 250.0 250.0 0.0 1.000000 1.000000 1.791678e-12 1.283146e+09 1.283025e+09 ... 272.415 3.689 0.013451 MSS- MSS-SACK-TS-N-WS07- 1 4.235207 non-sibling(optsdiff) non-sibling(optsdiff) 1
ovh02.ring.nlnog.net 37.187.50.166 2001:41d0:52:400::53b 250.0 250.0 0.0 1.000000 1.000000 3.996803e-15 1.283867e+09 1.283837e+09 ... 2142.631 4.127 0.001924 MSS- MSS-SACK-TS-N-WS07- 1 0.421635 non-sibling(optsdiff) non-sibling(optsdiff) 1
ovh04.ring.nlnog.net 192.99.153.129 2607:5300:101::599 250.0 250.0 0.0 1.000000 1.000000 4.440892e-16 5.279198e+08 5.278201e+08 ... 1529.123 2.322 0.001520 MSS- MSS-SACK-TS-N-WS07- 1 0.412238 non-sibling(optsdiff) non-sibling(optsdiff) 1
gossamerthreads01.ring.nlnog.net 208.70.247.50 2607:fcc0:2:1:208:70:247:50 250.0 1607.0 NaN 1.000000 0.008749 NaN 4.294911e+09 2.475138e+08 ... NaN NaN NaN MSS-SACK-TS-N-WS06- MSS-SACK-TS-N-WS07- 1 NaN NaN ERROR: too small clock hertz r-squares 1
RA_6220 217.196.33.252 2a02:310:0:2958::16 100.0 1001.0 901.0 0.999994 1.000000 NaN 1.302641e+08 9.741713e+08 ... NaN NaN NaN MSS-SACK-TS-N-WS07- MSS-SACK-TS-N-WS07- 0 NaN non-sibling (hz different) non-sibling (hz different) 1
pix01.ring.nlnog.net 185.90.125.134 2a03:87a0:125:134::1 149497.0 150262.0 NaN 0.728900 0.733638 NaN 2.544141e+08 2.544131e+08 ... NaN NaN NaN MSS-SACK-TS-N-WS07- MSS-SACK-TS-N-WS07- 0 NaN NaN ERROR: too small clock hertz r-squares 1
ovh03.ring.nlnog.net 5.196.13.221 2001:41d0:52:600::671 250.0 250.0 0.0 1.000000 1.000000 4.174439e-14 1.296654e+09 1.292033e+09 ... 32.893 2.376 0.069716 MSS- MSS-SACK-TS-N-WS07- 1 0.643468 non-sibling(optsdiff) non-sibling(optsdiff) 1
ovh02.ring.nlnog.net 37.187.50.166 2001:41d0:52:400::53b 250.0 250.0 0.0 1.000000 1.000000 3.197442e-14 1.297270e+09 1.292859e+09 ... 1102.110 2.223 0.002019 MSS- MSS-SACK-TS-N-WS07- 1 0.683236 non-sibling(optsdiff) non-sibling(optsdiff) 1
RA_6220 217.196.33.252 2a02:310:0:2958::16 100.0 1001.0 901.0 0.999994 1.000000 NaN 1.338738e+08 1.010255e+09 ... NaN NaN NaN MSS-SACK-TS-N-WS07- MSS-SACK-TS-N-WS07- 0 NaN non-sibling (hz different) non-sibling (hz different) 1
ovh03.ring.nlnog.net 5.196.13.221 2001:41d0:52:600::671 250.0 250.0 0.0 1.000000 1.000000 3.010037e-12 1.628076e+09 1.627960e+09 ... 739.895 6.856 0.009309 MSS- MSS-SACK-TS-N-WS07- 1 3.770015 non-sibling(optsdiff) non-sibling(optsdiff) 1
ovh02.ring.nlnog.net 37.187.50.166 2001:41d0:52:400::53b 250.0 250.0 0.0 1.000000 1.000000 4.440892e-16 1.628848e+09 1.628782e+09 ... 2184.444 0.479 0.000219 MSS- MSS-SACK-TS-N-WS07- 1 4.116483 non-sibling(optsdiff) non-sibling(optsdiff) 1
trueinternet01.ring.nlnog.net 203.144.167.57 2001:fb0:100:ffff:211:25ff:fe40:9468 107919.0 106731.0 NaN 0.457027 0.450581 NaN 2.100992e+07 2.100415e+07 ... NaN NaN NaN MSS-SACK-TS-N-WS11- MSS-SACK-TS-N-WS11- 0 NaN NaN ERROR: too small clock hertz r-squares 1
ovh04.ring.nlnog.net 192.99.153.129 2607:5300:101::599 250.0 250.0 0.0 1.000000 1.000000 8.224532e-13 8.728248e+08 8.727867e+08 ... 588.936 0.055 0.000093 MSS- MSS-SACK-TS-N-WS07- 1 1.657471 non-sibling(optsdiff) non-sibling(optsdiff) 1
RA_6220 217.196.33.252 2a02:310:0:2958::16 100.0 1001.0 901.0 0.999937 1.000000 NaN 2.682442e+08 2.355818e+09 ... NaN NaN NaN MSS-SACK-TS-N-WS07- MSS-SACK-TS-N-WS07- 0 NaN non-sibling (hz different) non-sibling (hz different) 1

31 rows × 29 columns


In [900]:
from collections import Counter
Counter(falsecalls.decision.as_matrix())


Out[900]:
Counter({'ERROR: spline calculation failed!': 2,
         'ERROR: too small clock hertz r-squares': 5,
         'non-sibling (hz different)': 6,
         'non-sibling(optsdiff)': 18})

In [903]:
set(falsecalls.index)


Out[903]:
{'RA_6088',
 'RA_6131',
 'RA_6220',
 'gossamerthreads01.ring.nlnog.net',
 'ovh02.ring.nlnog.net',
 'ovh03.ring.nlnog.net',
 'ovh04.ring.nlnog.net',
 'pix01.ring.nlnog.net',
 'tilaa01.ring.nlnog.net',
 'trueinternet01.ring.nlnog.net'}

In [907]:
falsecalls.to_csv("falsecalls.csv")

In [908]:
falsecalls


Out[908]:
ip4 ip6 hz4 hz6 hzdiff hz4r2 hz6r2 hzr2diff tcp_t_offset4 tcp_t_offset6 ... ott6_rng ott_rng_diff ott_rng_diff_rel opts4 opts6 optsdiff perc_85_val dec_bev decision label
domain
RA_6088 5.57.17.65 2a01:5040:20:30::1 1001.0 1001.0 0.0 1.000000 1.000000 4.884981e-15 1.156965e+09 1.156945e+09 ... 11797.220 0.803 0.000068 MSS-SACK-TS-N-WS07- MSS-SACK-TS-N-WS07- 0 NaN NaN ERROR: spline calculation failed! 1
RA_6131 217.196.147.89 2a02:16a8:dc:200::1 1001.0 1001.0 0.0 1.000000 1.000000 1.776357e-15 1.067010e+08 1.066939e+08 ... 11345.467 0.672 0.000059 MSS-SACK-TS-N-WS07- MSS-SACK-TS-N-WS07- 0 NaN NaN ERROR: spline calculation failed! 1
RA_6220 217.196.33.252 2a02:310:0:2958::16 100.0 1001.0 901.0 0.999937 1.000000 NaN 2.682442e+08 2.355818e+09 ... NaN NaN NaN MSS-SACK-TS-N-WS07- MSS-SACK-TS-N-WS07- 0 NaN non-sibling (hz different) non-sibling (hz different) 1
RA_6220 217.196.33.252 2a02:310:0:2958::16 100.0 1001.0 901.0 0.999394 1.000000 NaN 1.266629e+08 9.380604e+08 ... NaN NaN NaN MSS-SACK-TS-N-WS07- MSS-SACK-TS-N-WS07- 0 NaN non-sibling (hz different) non-sibling (hz different) 1
RA_6220 217.196.33.252 2a02:310:0:2958::16 100.0 1001.0 901.0 0.998920 1.000000 NaN 8.694358e+07 5.403645e+08 ... NaN NaN NaN MSS-SACK-TS-N-WS07- MSS-SACK-TS-N-WS07- 0 NaN non-sibling (hz different) non-sibling (hz different) 1
RA_6220 217.196.33.252 2a02:310:0:2958::16 100.0 1001.0 901.0 0.999994 1.000000 NaN 1.302641e+08 9.741713e+08 ... NaN NaN NaN MSS-SACK-TS-N-WS07- MSS-SACK-TS-N-WS07- 0 NaN non-sibling (hz different) non-sibling (hz different) 1
RA_6220 217.196.33.252 2a02:310:0:2958::16 100.0 1001.0 901.0 0.999994 1.000000 NaN 1.338738e+08 1.010255e+09 ... NaN NaN NaN MSS-SACK-TS-N-WS07- MSS-SACK-TS-N-WS07- 0 NaN non-sibling (hz different) non-sibling (hz different) 1
RA_6220 217.196.33.252 2a02:310:0:2958::16 100.0 1001.0 901.0 0.999904 1.000000 NaN 1.161751e+08 3.607975e+08 ... NaN NaN NaN MSS-SACK-TS-N-WS07- MSS-SACK-TS-N-WS07- 0 NaN non-sibling (hz different) non-sibling (hz different) 1
gossamerthreads01.ring.nlnog.net 208.70.247.50 2607:fcc0:2:1:208:70:247:50 250.0 1607.0 NaN 1.000000 0.008749 NaN 4.294911e+09 2.475138e+08 ... NaN NaN NaN MSS-SACK-TS-N-WS06- MSS-SACK-TS-N-WS07- 1 NaN NaN ERROR: too small clock hertz r-squares 1
ovh02.ring.nlnog.net 37.187.50.166 2001:41d0:52:400::53b 250.0 250.0 0.0 1.000000 1.000000 2.220446e-16 1.175640e+09 1.175541e+09 ... 2148.670 0.043 0.000020 MSS- MSS-SACK-TS-N-WS07- 1 0.581357 non-sibling(optsdiff) non-sibling(optsdiff) 1
ovh02.ring.nlnog.net 37.187.50.166 2001:41d0:52:400::53b 250.0 250.0 0.0 1.000000 1.000000 4.440892e-16 1.628848e+09 1.628782e+09 ... 2184.444 0.479 0.000219 MSS- MSS-SACK-TS-N-WS07- 1 4.116483 non-sibling(optsdiff) non-sibling(optsdiff) 1
ovh02.ring.nlnog.net 37.187.50.166 2001:41d0:52:400::53b 250.0 250.0 0.0 1.000000 1.000000 1.154632e-14 1.130830e+09 1.130722e+09 ... 2143.810 0.062 0.000029 MSS- MSS-SACK-TS-N-WS07- 1 0.532364 non-sibling(optsdiff) non-sibling(optsdiff) 1
ovh02.ring.nlnog.net 37.187.50.166 2001:41d0:52:400::53b 250.0 250.0 0.0 1.000000 1.000000 7.549517e-15 1.274883e+09 1.274828e+09 ... 2152.142 0.069 0.000032 MSS- MSS-SACK-TS-N-WS07- 1 0.461552 non-sibling(optsdiff) non-sibling(optsdiff) 1
ovh02.ring.nlnog.net 37.187.50.166 2001:41d0:52:400::53b 250.0 250.0 0.0 1.000000 1.000000 3.197442e-14 1.297270e+09 1.292859e+09 ... 1102.110 2.223 0.002019 MSS- MSS-SACK-TS-N-WS07- 1 0.683236 non-sibling(optsdiff) non-sibling(optsdiff) 1
ovh02.ring.nlnog.net 37.187.50.166 2001:41d0:52:400::53b 250.0 250.0 0.0 1.000000 1.000000 3.996803e-15 1.283867e+09 1.283837e+09 ... 2142.631 4.127 0.001924 MSS- MSS-SACK-TS-N-WS07- 1 0.421635 non-sibling(optsdiff) non-sibling(optsdiff) 1
ovh03.ring.nlnog.net 5.196.13.221 2001:41d0:52:600::671 250.0 250.0 0.0 1.000000 1.000000 1.567901e-11 1.175528e+09 1.174713e+09 ... 263.147 2.446 0.009339 MSS- MSS-SACK-TS-N-WS07- 1 0.527214 non-sibling(optsdiff) non-sibling(optsdiff) 1
ovh03.ring.nlnog.net 5.196.13.221 2001:41d0:52:600::671 250.0 250.0 0.0 1.000000 1.000000 3.010037e-12 1.628076e+09 1.627960e+09 ... 739.895 6.856 0.009309 MSS- MSS-SACK-TS-N-WS07- 1 3.770015 non-sibling(optsdiff) non-sibling(optsdiff) 1
ovh03.ring.nlnog.net 5.196.13.221 2001:41d0:52:600::671 250.0 250.0 0.0 1.000000 1.000000 1.833814e-09 1.281100e+09 1.274004e+09 ... 64.741 1.645 0.025736 MSS- MSS-SACK-TS-N-WS07- 1 41.154072 non-sibling(optsdiff) non-sibling(optsdiff) 1
ovh03.ring.nlnog.net 5.196.13.221 2001:41d0:52:600::671 250.0 250.0 0.0 1.000000 1.000000 4.174439e-14 1.296654e+09 1.292033e+09 ... 32.893 2.376 0.069716 MSS- MSS-SACK-TS-N-WS07- 1 0.643468 non-sibling(optsdiff) non-sibling(optsdiff) 1
ovh03.ring.nlnog.net 5.196.13.221 2001:41d0:52:600::671 250.0 250.0 0.0 1.000000 1.000000 4.440892e-16 1.130031e+09 1.129895e+09 ... 38.706 0.215 0.005539 MSS- MSS-SACK-TS-N-WS07- 1 0.227048 non-sibling(optsdiff) non-sibling(optsdiff) 1
ovh03.ring.nlnog.net 5.196.13.221 2001:41d0:52:600::671 250.0 250.0 0.0 1.000000 1.000000 1.791678e-12 1.283146e+09 1.283025e+09 ... 272.415 3.689 0.013451 MSS- MSS-SACK-TS-N-WS07- 1 4.235207 non-sibling(optsdiff) non-sibling(optsdiff) 1
ovh04.ring.nlnog.net 192.99.153.129 2607:5300:101::599 250.0 250.0 0.0 1.000000 1.000000 4.107825e-14 2.168821e+08 2.168670e+08 ... 52.681 2.641 0.048906 MSS- MSS-SACK-TS-N-WS07- 1 2.025465 non-sibling(optsdiff) non-sibling(optsdiff) 1
ovh04.ring.nlnog.net 192.99.153.129 2607:5300:101::599 250.0 250.0 0.0 1.000000 1.000000 8.224532e-13 8.728248e+08 8.727867e+08 ... 588.936 0.055 0.000093 MSS- MSS-SACK-TS-N-WS07- 1 1.657471 non-sibling(optsdiff) non-sibling(optsdiff) 1
ovh04.ring.nlnog.net 192.99.153.129 2607:5300:101::599 250.0 250.0 0.0 1.000000 1.000000 8.881784e-16 4.204438e+08 4.195066e+08 ... 1965.143 0.023 0.000012 MSS- MSS-SACK-TS-N-WS07- 1 0.718889 non-sibling(optsdiff) non-sibling(optsdiff) 1
ovh04.ring.nlnog.net 192.99.153.129 2607:5300:101::599 250.0 250.0 0.0 1.000000 1.000000 2.542411e-13 3.747511e+08 3.746817e+08 ... 1492.405 5.014 0.003365 MSS- MSS-SACK-TS-N-WS07- 1 5.104061 non-sibling(optsdiff) non-sibling(optsdiff) 1
ovh04.ring.nlnog.net 192.99.153.129 2607:5300:101::599 250.0 250.0 0.0 1.000000 1.000000 4.440892e-16 5.279198e+08 5.278201e+08 ... 1529.123 2.322 0.001520 MSS- MSS-SACK-TS-N-WS07- 1 0.412238 non-sibling(optsdiff) non-sibling(optsdiff) 1
ovh04.ring.nlnog.net 192.99.153.129 2607:5300:101::599 250.0 250.0 0.0 1.000000 1.000000 9.727774e-13 5.188355e+08 5.188099e+08 ... 1171.543 0.535 0.000457 MSS- MSS-SACK-TS-N-WS07- 1 2.562532 non-sibling(optsdiff) non-sibling(optsdiff) 1
pix01.ring.nlnog.net 185.90.125.134 2a03:87a0:125:134::1 149497.0 150262.0 NaN 0.728900 0.733638 NaN 2.544141e+08 2.544131e+08 ... NaN NaN NaN MSS-SACK-TS-N-WS07- MSS-SACK-TS-N-WS07- 0 NaN NaN ERROR: too small clock hertz r-squares 1
tilaa01.ring.nlnog.net 46.19.36.12 2a02:2770::21a:4aff:feac:4576 29381.0 29508.0 NaN 0.128195 0.128773 NaN 1.382994e+08 1.382964e+08 ... NaN NaN NaN MSS-SACK-TS-N-WS04- MSS-SACK-TS-N-WS04- 0 NaN NaN ERROR: too small clock hertz r-squares 1
trueinternet01.ring.nlnog.net 203.144.167.57 2001:fb0:100:ffff:211:25ff:fe40:9468 159389.0 159706.0 NaN 0.687427 0.695245 NaN 1.010581e+08 1.010674e+08 ... NaN NaN NaN MSS-SACK-TS-N-WS11- MSS-SACK-TS-N-WS11- 0 NaN NaN ERROR: too small clock hertz r-squares 1
trueinternet01.ring.nlnog.net 203.144.167.57 2001:fb0:100:ffff:211:25ff:fe40:9468 107919.0 106731.0 NaN 0.457027 0.450581 NaN 2.100992e+07 2.100415e+07 ... NaN NaN NaN MSS-SACK-TS-N-WS11- MSS-SACK-TS-N-WS11- 0 NaN NaN ERROR: too small clock hertz r-squares 1

31 rows × 29 columns


In [ ]:


In [ ]:


In [ ]:


In [798]:
hlostats[:,0]


Out[798]:
array([  0. ,   0. ,   0. ,   0. ,   0. ,   0. ,   0. ,  99.6])

In [757]:
hlbstats


Out[757]:
array([[  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          0.00000000e+00,   0.00000000e+00],
       [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          0.00000000e+00,   0.00000000e+00],
       [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          0.00000000e+00,   0.00000000e+00],
       [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          0.00000000e+00,   0.00000000e+00],
       [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          0.00000000e+00,   0.00000000e+00],
       [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          0.00000000e+00,   0.00000000e+00],
       [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          0.00000000e+00,   0.00000000e+00],
       [  8.10000000e-01,   9.93500000e+01,   8.15800000e+01,
          8.16100000e+01,   8.11199123e-02]])

In [802]:
set(gsdb.keys())


Out[802]:
{'RAv1', 'RAv2', 'nlnog', 'servers'}

In [771]:
gsdo


Out[771]:
{'RAv1': array([[   0.,    0.,    0.,    0.,    0.],
        [   0.,    0.,    0.,    0.,    0.],
        [   0.,    0.,    0.,    0.,    0.],
        [   0.,    0.,    0.,    0.,    0.],
        [   0.,    0.,    0.,    0.,    0.],
        [   0.,    0.,    0.,    0.,    0.],
        [   0.,    0.,    0.,    0.,    0.],
        [ 100.,  100.,  100.,  100.,    1.]]),
 'RAv2': array([[   0.        ,    0.        ,    0.        ,    0.        ,    0.        ],
        [   0.        ,    0.        ,    0.        ,    0.        ,    0.        ],
        [   0.        ,    0.        ,    0.        ,    0.        ,    0.        ],
        [   0.        ,    0.        ,    0.        ,    0.        ,    0.        ],
        [   0.        ,    0.        ,    0.        ,    0.        ,    0.        ],
        [   0.        ,    0.        ,    0.        ,    0.        ,    0.        ],
        [   0.        ,    0.        ,    0.        ,    0.        ,    0.        ],
        [  99.01      ,   98.52      ,  100.        ,   99.99      ,
            0.98759639]]),
 'nlnog': array([[   0.       ,    0.       ,    0.       ,    0.       ,    0.       ],
        [   0.       ,    0.       ,    0.       ,    0.       ,    0.       ],
        [   0.       ,    0.       ,    0.       ,    0.       ,    0.       ],
        [   0.       ,    0.       ,    0.       ,    0.       ,    0.       ],
        [   0.       ,    0.       ,    0.       ,    0.       ,    0.       ],
        [   0.       ,    0.       ,    0.       ,    0.       ,    0.       ],
        [   0.       ,    0.       ,    0.       ,    0.       ,    0.       ],
        [ 100.       ,   94.53     ,  100.       ,   99.99     ,
            0.9721905]]),
 'servers': array([[   0.        ,    0.        ,    0.        ,    0.        ,    0.        ],
        [   0.        ,    0.        ,    0.        ,    0.        ,    0.        ],
        [   0.        ,    0.        ,    0.        ,    0.        ,    0.        ],
        [   0.        ,    0.        ,    0.        ,    0.        ,    0.        ],
        [   0.        ,    0.        ,    0.        ,    0.        ,    0.        ],
        [   0.        ,    0.        ,    0.        ,    0.        ,    0.        ],
        [   0.        ,    0.        ,    0.        ,    0.        ,    0.        ],
        [ 100.        ,   84.62      ,  100.        ,   99.45      ,
            0.91725295]])}

In [808]:
mlstatsd_tre


Out[808]:
{'7_tre': array([[ 100.,  100.,  100.,  100.,    1.],
        [ 100.,  100.,  100.,  100.,    1.],
        [ 100.,  100.,  100.,  100.,    1.],
        [ 100.,  100.,  100.,  100.,    1.],
        [ 100.,  100.,  100.,  100.,    1.],
        [ 100.,  100.,  100.,  100.,    1.],
        [ 100.,  100.,  100.,  100.,    1.],
        [ 100.,  100.,  100.,  100.,    1.],
        [ 100.,  100.,  100.,  100.,    1.],
        [ 100.,  100.,  100.,  100.,    1.]])}

In [690]:
Image(graphs[-1][-1].create_png())


Out[690]:

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [731]:
df = mix_sib_nonsib(sib, nonsib, "full", 42)
df_ours = df[["label", "decision"]].copy()
dec2prd_ours(df_ours)
undec = len(df_ours[df_ours.isnull().any(axis=1)])
print("Our algo: Not deciding on {} pairs for unknown/error reasons.".format(undec))
df_ours = df_ours.dropna()
print("Our algo stats: ({}) undecided".format(undec))
a = stats(df_ours["label"], df_ours["dec_prd"])


Our algo: Not deciding on 1417 pairs for unknown/error reasons.
Our algo stats: (1417) undecided
Correct: 405969, incorrect 24, TP 493, FP 2, TN 405476, FN22, Prec. 99.6, Rec. 95.73, Spec. 100.0, Acc. 99.99%

In [737]:
list(a)
a = list(a)
a.append(5)
a


Out[737]:
[99.599999999999994, 95.730000000000004, 100.0, 99.989999999999995, 5, 5]

In [693]:
from sklearn.metrics import f1_score
f1_score(df_ours["label"], df_ours["dec_prd"])


Out[693]:
0.97623762376237622

In [695]:
from sklearn.metrics import matthews_corrcoef
matthews_corrcoef(df_ours["label"], df_ours["dec_prd"])


Out[695]:
0.97639996941410645

In [ ]:


In [ ]:

Evaluate Hand-Tuned Algo For Overfitting

  1. Calculate Training Error
  2. Evaluate only new hosts to get Test error

In [109]:
for i in range(1,2):
    print("############# Round {} ##############".format(i))
    sib, nonsib = get_pd_files("../../../gt{}/".format(i))
    #print("Columns: {}".format(list(sib.columns.values)))
    get_ouralgo_stats(sib, nonsib)


############# Round 1 ##############
Loading from filenames ../../../gt1/hosts.csvcapture.pcap.ts.siblingresult.csv and ../../../gt1/hosts.csv__nonsiblings_seed1_n679capture.pcap.ts.siblingresult.csv
Read 279 siblings and 82026 non-siblings from files.
Our algo: Not deciding on 70 pairs for unknown/error reasons.
Our algo stats: (70) undecided
Correct: 82229, incorrect 6, TP 255, FP 0, TN 81974, FN6, Prec. 100.0, Rec. 97.7, Spec. 100.0, Acc. 99.99%

In [136]:
for i in range(2,8):
    print("############# Round {} ##############".format(i))
    sib, nonsib = get_pd_files("../../../algo-eval/gt{}/".format(i))
    #print("Columns: {}".format(list(sib.columns.values)))
    get_ouralgo_stats(sib, nonsib)


############# Round 2 ##############
Loading from filenames ../../../algo-eval/gt2/hosts.csvcapture.pcap.ts.siblingresult.csv and ../../../algo-eval/gt2/hosts.csv__nonsiblings_seed1_n407_capture.pcap.ts.siblingresult.csv
Read 369 siblings and 145041 non-siblings from files.
Our algo: Not deciding on 32 pairs for unknown/error reasons.
Our algo stats: (32) undecided
Correct: 145365, incorrect 13, TP 328, FP 2, TN 145037, FN11, Prec. 99.39, Rec. 96.76, Spec. 100.0, Acc. 99.99%
############# Round 3 ##############
Loading from filenames ../../../algo-eval/gt3/hosts.csvcapture.pcap.ts.siblingresult.csv and ../../../algo-eval/gt3/hosts.csv__nonsiblings_seed1_n407_capture.pcap.ts.siblingresult.csv
Read 370 siblings and 145804 non-siblings from files.
Our algo: Not deciding on 44 pairs for unknown/error reasons.
Our algo stats: (44) undecided
Correct: 146113, incorrect 17, TP 317, FP 2, TN 145796, FN15, Prec. 99.37, Rec. 95.48, Spec. 100.0, Acc. 99.99%
############# Round 4 ##############
Loading from filenames ../../../algo-eval/gt4/hosts.csvcapture.pcap.ts.siblingresult.csv and ../../../algo-eval/gt4/hosts.csv__nonsiblings_seed1_n407_capture.pcap.ts.siblingresult.csv
Read 366 siblings and 144258 non-siblings from files.
Our algo: Not deciding on 779 pairs for unknown/error reasons.
Our algo stats: (779) undecided
Correct: 143827, incorrect 18, TP 335, FP 2, TN 143492, FN16, Prec. 99.41, Rec. 95.44, Spec. 100.0, Acc. 99.99%
############# Round 5 ##############
Loading from filenames ../../../algo-eval/gt5/hosts.csvcapture.pcap.ts.siblingresult.csv and ../../../algo-eval/gt5/hosts.csv__nonsiblings_seed1_n407_capture.pcap.ts.siblingresult.csv
Read 366 siblings and 144258 non-siblings from files.
Our algo: Not deciding on 23 pairs for unknown/error reasons.
Our algo stats: (23) undecided
Correct: 144590, incorrect 11, TP 334, FP 0, TN 144256, FN11, Prec. 100.0, Rec. 96.81, Spec. 100.0, Acc. 99.99%
############# Round 6 ##############
Loading from filenames ../../../algo-eval/gt6/hosts.csvcapture.pcap.ts.siblingresult.csv and ../../../algo-eval/gt6/hosts.csv__nonsiblings_seed1_n407_capture.pcap.ts.siblingresult.csv
Read 366 siblings and 144258 non-siblings from files.
Our algo: Not deciding on 792 pairs for unknown/error reasons.
Our algo stats: (792) undecided
Correct: 143819, incorrect 13, TP 324, FP 2, TN 143495, FN11, Prec. 99.39, Rec. 96.72, Spec. 100.0, Acc. 99.99%
############# Round 7 ##############
Loading from filenames ../../../algo-eval/gt7/hosts.csvcapture.pcap.ts.siblingresult.csv and ../../../algo-eval/gt7/hosts.csv__nonsiblings_seed1_n407_capture.pcap.ts.siblingresult.csv
Read 369 siblings and 146571 non-siblings from files.
Our algo: Not deciding on 822 pairs for unknown/error reasons.
Our algo stats: (822) undecided
Correct: 146105, incorrect 13, TP 314, FP 2, TN 145791, FN11, Prec. 99.37, Rec. 96.62, Spec. 100.0, Acc. 99.99%

In [137]:
for i in range(2,8):
    print("############# Round {} ##############".format(i))
    #sib, nonsib = get_pd_files("../../../eval-algo/gt{}/".format(i))
    groups = assign_groups(sib)
    groupset = set(groups)
    for i in groupset:
        print("## GROUP: {}".format(i))
        groupsib = sib[sib["group"] == i].copy()
        groupnonsib = match_nonsibs(groupsib, nonsib)
        get_ouralgo_stats(groupsib, groupnonsib)
        #get_bev_stats(groupsib, groupnonsib)


############# Round 2 ##############
## GROUP: RAv1
Generated 132 non-sibling candidates from 12 siblings.
Our algo: Not deciding on 2 pairs for unknown/error reasons.
Our algo stats: (2) undecided
Correct: 142, incorrect 0, TP 12, FP 0, TN 130, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
## GROUP: RAv2
Generated 41006 non-sibling candidates from 203 siblings.
Our algo: Not deciding on 5 pairs for unknown/error reasons.
Our algo stats: (5) undecided
Correct: 41199, incorrect 5, TP 200, FP 2, TN 40999, FN3, Prec. 99.01, Rec. 98.52, Spec. 100.0, Acc. 99.99%
## GROUP: nlnog
Generated 18360 non-sibling candidates from 136 siblings.
Our algo: Not deciding on 316 pairs for unknown/error reasons.
Our algo stats: (316) undecided
Correct: 18173, incorrect 7, TP 86, FP 0, TN 18087, FN7, Prec. 100.0, Rec. 92.47, Spec. 100.0, Acc. 99.96%
## GROUP: servers
Generated 306 non-sibling candidates from 18 siblings.
Our algo: Not deciding on 2 pairs for unknown/error reasons.
Our algo stats: (2) undecided
Correct: 321, incorrect 1, TP 16, FP 0, TN 305, FN1, Prec. 100.0, Rec. 94.12, Spec. 100.0, Acc. 99.69%
############# Round 3 ##############
## GROUP: RAv1
Generated 132 non-sibling candidates from 12 siblings.
Our algo: Not deciding on 2 pairs for unknown/error reasons.
Our algo stats: (2) undecided
Correct: 142, incorrect 0, TP 12, FP 0, TN 130, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
## GROUP: RAv2
Generated 41006 non-sibling candidates from 203 siblings.
Our algo: Not deciding on 5 pairs for unknown/error reasons.
Our algo stats: (5) undecided
Correct: 41199, incorrect 5, TP 200, FP 2, TN 40999, FN3, Prec. 99.01, Rec. 98.52, Spec. 100.0, Acc. 99.99%
## GROUP: nlnog
Generated 18360 non-sibling candidates from 136 siblings.
Our algo: Not deciding on 316 pairs for unknown/error reasons.
Our algo stats: (316) undecided
Correct: 18173, incorrect 7, TP 86, FP 0, TN 18087, FN7, Prec. 100.0, Rec. 92.47, Spec. 100.0, Acc. 99.96%
## GROUP: servers
Generated 306 non-sibling candidates from 18 siblings.
Our algo: Not deciding on 2 pairs for unknown/error reasons.
Our algo stats: (2) undecided
Correct: 321, incorrect 1, TP 16, FP 0, TN 305, FN1, Prec. 100.0, Rec. 94.12, Spec. 100.0, Acc. 99.69%
############# Round 4 ##############
## GROUP: RAv1
Generated 132 non-sibling candidates from 12 siblings.
Our algo: Not deciding on 2 pairs for unknown/error reasons.
Our algo stats: (2) undecided
Correct: 142, incorrect 0, TP 12, FP 0, TN 130, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
## GROUP: RAv2
Generated 41006 non-sibling candidates from 203 siblings.
Our algo: Not deciding on 5 pairs for unknown/error reasons.
Our algo stats: (5) undecided
Correct: 41199, incorrect 5, TP 200, FP 2, TN 40999, FN3, Prec. 99.01, Rec. 98.52, Spec. 100.0, Acc. 99.99%
## GROUP: nlnog
Generated 18360 non-sibling candidates from 136 siblings.
Our algo: Not deciding on 316 pairs for unknown/error reasons.
Our algo stats: (316) undecided
Correct: 18173, incorrect 7, TP 86, FP 0, TN 18087, FN7, Prec. 100.0, Rec. 92.47, Spec. 100.0, Acc. 99.96%
## GROUP: servers
Generated 306 non-sibling candidates from 18 siblings.
Our algo: Not deciding on 2 pairs for unknown/error reasons.
Our algo stats: (2) undecided
Correct: 321, incorrect 1, TP 16, FP 0, TN 305, FN1, Prec. 100.0, Rec. 94.12, Spec. 100.0, Acc. 99.69%
############# Round 5 ##############
## GROUP: RAv1
Generated 132 non-sibling candidates from 12 siblings.
Our algo: Not deciding on 2 pairs for unknown/error reasons.
Our algo stats: (2) undecided
Correct: 142, incorrect 0, TP 12, FP 0, TN 130, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
## GROUP: RAv2
Generated 41006 non-sibling candidates from 203 siblings.
Our algo: Not deciding on 5 pairs for unknown/error reasons.
Our algo stats: (5) undecided
Correct: 41199, incorrect 5, TP 200, FP 2, TN 40999, FN3, Prec. 99.01, Rec. 98.52, Spec. 100.0, Acc. 99.99%
## GROUP: nlnog
Generated 18360 non-sibling candidates from 136 siblings.
Our algo: Not deciding on 316 pairs for unknown/error reasons.
Our algo stats: (316) undecided
Correct: 18173, incorrect 7, TP 86, FP 0, TN 18087, FN7, Prec. 100.0, Rec. 92.47, Spec. 100.0, Acc. 99.96%
## GROUP: servers
Generated 306 non-sibling candidates from 18 siblings.
Our algo: Not deciding on 2 pairs for unknown/error reasons.
Our algo stats: (2) undecided
Correct: 321, incorrect 1, TP 16, FP 0, TN 305, FN1, Prec. 100.0, Rec. 94.12, Spec. 100.0, Acc. 99.69%
############# Round 6 ##############
## GROUP: RAv1
Generated 132 non-sibling candidates from 12 siblings.
Our algo: Not deciding on 2 pairs for unknown/error reasons.
Our algo stats: (2) undecided
Correct: 142, incorrect 0, TP 12, FP 0, TN 130, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
## GROUP: RAv2
Generated 41006 non-sibling candidates from 203 siblings.
Our algo: Not deciding on 5 pairs for unknown/error reasons.
Our algo stats: (5) undecided
Correct: 41199, incorrect 5, TP 200, FP 2, TN 40999, FN3, Prec. 99.01, Rec. 98.52, Spec. 100.0, Acc. 99.99%
## GROUP: nlnog
Generated 18360 non-sibling candidates from 136 siblings.
Our algo: Not deciding on 316 pairs for unknown/error reasons.
Our algo stats: (316) undecided
Correct: 18173, incorrect 7, TP 86, FP 0, TN 18087, FN7, Prec. 100.0, Rec. 92.47, Spec. 100.0, Acc. 99.96%
## GROUP: servers
Generated 306 non-sibling candidates from 18 siblings.
Our algo: Not deciding on 2 pairs for unknown/error reasons.
Our algo stats: (2) undecided
Correct: 321, incorrect 1, TP 16, FP 0, TN 305, FN1, Prec. 100.0, Rec. 94.12, Spec. 100.0, Acc. 99.69%
############# Round 7 ##############
## GROUP: RAv1
Generated 132 non-sibling candidates from 12 siblings.
Our algo: Not deciding on 2 pairs for unknown/error reasons.
Our algo stats: (2) undecided
Correct: 142, incorrect 0, TP 12, FP 0, TN 130, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
## GROUP: RAv2
Generated 41006 non-sibling candidates from 203 siblings.
Our algo: Not deciding on 5 pairs for unknown/error reasons.
Our algo stats: (5) undecided
Correct: 41199, incorrect 5, TP 200, FP 2, TN 40999, FN3, Prec. 99.01, Rec. 98.52, Spec. 100.0, Acc. 99.99%
## GROUP: nlnog
Generated 18360 non-sibling candidates from 136 siblings.
Our algo: Not deciding on 316 pairs for unknown/error reasons.
Our algo stats: (316) undecided
Correct: 18173, incorrect 7, TP 86, FP 0, TN 18087, FN7, Prec. 100.0, Rec. 92.47, Spec. 100.0, Acc. 99.96%
## GROUP: servers
Generated 306 non-sibling candidates from 18 siblings.
Our algo: Not deciding on 2 pairs for unknown/error reasons.
Our algo stats: (2) undecided
Correct: 321, incorrect 1, TP 16, FP 0, TN 305, FN1, Prec. 100.0, Rec. 94.12, Spec. 100.0, Acc. 99.69%

High-Level Comparison of Results


In [9]:
for i in range(7,8):
    print("############# Round {} ##############".format(i))
    sib, nonsib = get_pd_files("../../../gt{}/".format(i))
    #print("Columns: {}".format(list(sib.columns.values)))
    get_ouralgo_stats(sib, nonsib)
    get_bev_stats(sib, nonsib)
    nonsibfil = match_nonsibs(sib, nonsib)


############# Round 7 ##############
Loading from filenames ../../../gt7/hosts.csvcapture.pcap.ts.siblingresult.csv and ../../../gt7/hosts.csv__nonsiblings_seed1_n681capture.pcap.ts.siblingresult.csv
Read 618 siblings and 406792 non-siblings from files.
Our algo: Not deciding on 1417 pairs for unknown/error reasons.
Our algo stats: (1417) undecided
Correct: 405969, incorrect 24, TP 493, FP 2, TN 405476, FN22, Prec. 99.6, Rec. 95.73, Spec. 100.0, Acc. 99.99%
Beverly algo stats:
Beverly algo: Not deciding on 1315 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided
Correct: 331420, incorrect 74675, TP 613, FP 74671, TN 330807, FN4, Prec. 0.81, Rec. 99.35, Spec. 81.58, Acc. 81.61%
Generated 381306 non-sibling candidates from 618 siblings.

Comparison of results at group level


In [84]:
for i in range(2,3):
    print("############# Round {} ##############".format(i))
    #sib, nonsib = get_pd_files("../../../gt{}/".format(i))
    groups = assign_groups(sib)
    groupset = set(groups)
    for i in groupset:
        print("## GROUP: {}".format(i))
        groupsib = sib[sib["group"] == i].copy()
        groupnonsib = match_nonsibs(groupsib, nonsib)
        get_ouralgo_stats(groupsib, groupnonsib)
        get_bev_stats(groupsib, groupnonsib)
    #print("Columns: {}".format(list(sib.columns.values)))
    #get_ouralgo_stats(sib, nonsib)
    #get_bev_stats(sib, nonsib)
    #nonsibfil = match_nonsibs(sib, nonsib)


############# Round 2 ##############
## GROUP: RAv1
Generated 132 non-sibling candidates from 12 siblings.
Our algo: Not deciding on 0 pairs for unknown/error reasons.
Our algo stats: (0) undecided
Correct: 144, incorrect 0, TP 12, FP 0, TN 132, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Beverly algo stats:
Beverly algo: Not deciding on 0 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided
Correct: 12, incorrect 132, TP 12, FP 132, TN 0, FN0, Prec. 8.33, Rec. 100.0, Spec. 0.0, Acc. 8.33%
## GROUP: RAv2
Generated 42642 non-sibling candidates from 207 siblings.
Our algo: Not deciding on 4 pairs for unknown/error reasons.
Our algo stats: (4) undecided
Correct: 42837, incorrect 8, TP 199, FP 2, TN 42638, FN6, Prec. 99.0, Rec. 97.07, Spec. 100.0, Acc. 99.98%
Beverly algo stats:
Beverly algo: Not deciding on 4 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided
Correct: 16785, incorrect 26060, TP 204, FP 26059, TN 16581, FN1, Prec. 0.78, Rec. 99.51, Spec. 38.89, Acc. 39.18%
## GROUP: nlnog
Generated 145542 non-sibling candidates from 382 siblings.
Our algo: Not deciding on 114 pairs for unknown/error reasons.
Our algo stats: (114) undecided
Correct: 145800, incorrect 10, TP 302, FP 0, TN 145498, FN10, Prec. 100.0, Rec. 96.79, Spec. 100.0, Acc. 99.99%
Beverly algo stats:
Beverly algo: Not deciding on 44 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided
Correct: 108048, incorrect 37832, TP 379, FP 37829, TN 107669, FN3, Prec. 0.99, Rec. 99.21, Spec. 74.0, Acc. 74.07%
## GROUP: servers
Generated 506 non-sibling candidates from 23 siblings.
Our algo: Not deciding on 0 pairs for unknown/error reasons.
Our algo stats: (0) undecided
Correct: 526, incorrect 3, TP 20, FP 0, TN 506, FN3, Prec. 100.0, Rec. 86.96, Spec. 100.0, Acc. 99.43%
Beverly algo stats:
Beverly algo: Not deciding on 0 pairs for NaN and 0 pairs for unknown/error reasons.
Beverly algo stats: (0) undecided
Correct: 183, incorrect 346, TP 23, FP 346, TN 160, FN0, Prec. 6.23, Rec. 100.0, Spec. 31.62, Acc. 34.59%

In [79]:



Out[79]:
{'RAv1', 'RAv2', 'nlnog', 'servers'}

ML with proportional group sampling

Strategy:

  1. take siblings from each group and generate 10 ShuffleSplit iterations
  2. create matching nonsibs and mix all
  3. adjust weights

In [621]:
# functions for ML with proprtional group sampling
def split_stratified_groups(sib, splits, nr):
    from sklearn.model_selection import KFold # non-overlapping!
    groups = assign_groups(sib)
    groupset = set(groups)
    gsibdf_train = pd.DataFrame(columns=sib.columns)
    gsibdf_test = pd.DataFrame(columns=sib.columns)
    for i in groupset:
        groupsib = sib[sib["group"] == i].copy()
        #print("## GROUP: {} with {} elements.".format(i, len(groupsib)))
        ks = KFold(n_splits=splits, random_state=42, shuffle=True)
        labels, features = make_labels_features(groupsib)
        ctr = -1
        for train_index, test_index in ks.split(groupsib):
            ctr += 1                
            if (ctr == nr):
            #print("TRAIN:", train_index, "TEST:", test_index)
                gsibdf_train = gsibdf_train.append(groupsib.iloc[train_index])
                gsibdf_test = gsibdf_test.append(groupsib.iloc[test_index])
                break
    return [gsibdf_train, gsibdf_test]


def dt_train(labels, features, weight, rs=42):
    estimator = DecisionTreeClassifier(max_depth=30, min_samples_leaf=5, random_state=42)
    est = estimator.fit(features, labels, sample_weight=weight)
    return est

def kfold_train_test(sib, nonsib):
    kfolds = 10
    stats_train_error = np.empty((10,4), dtype=float)
    stats_test_error = np.empty((10,4), dtype=float)
    graphs = []
    for i in range(10):
        print("Round {}".format(i))
        # pick proportionally from each group
        train_sib, test_sib = split_stratified_groups(sib, 10, i)
        # create, select, and mix matching nonsibs
        train_nonsib = match_nonsibs(train_sib, nonsib)
        test_nonsib = match_nonsibs(test_sib, nonsib)
        train = mix_sib_nonsib(train_sib,train_nonsib, "all")
        # prune NaNs out
        train, train_prune_lbl, train_prune_prd = prune_data_for_ml(train)
        test = mix_sib_nonsib(test_sib,test_nonsib, "all")
        test, test_prune_lbl, test_prune_prd = prune_data_for_ml(test)
        # split out features, labels, and weights
        train_lbl, train_ftr = make_labels_features(train)
        test_lbl, test_ftr = make_labels_features(test)
        train_weight = get_sample_weight_one_input(train)
        test_weight = get_sample_weight_one_input(test)
        # train estimator
        est = dt_train(train_lbl, train_ftr, train_weight)   
        stats_train_error[i] = stats(train_lbl, est.predict(train_ftr))
        stats_test_error[i]  =  stats(test_lbl, est.predict(test_ftr))
        graph = dt_plot(est, train_ftr)
        graphs.append(graph)
        #Image(graph.create_png())  
    return stats_train_error, stats_test_error

In [618]:
tre, tee = kfold_train_test(sib, nonsib)


Round 0
Generated 108570 non-sibling candidates from 330 siblings.
Generated 1482 non-sibling candidates from 39 siblings.
/usr/local/lib/python3.5/dist-packages/ipykernel/__main__.py:172: RuntimeWarning: invalid value encountered in long_scalars
Correct: 70828, incorrect 1, TP 0, FP 0, TN 70828, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 667 rows with error results and 70829 rows with NaNs (typically hz different) from a     total of 108900 entries, resulting in 38071 entries.
Correct: 991, incorrect 1, TP 0, FP 0, TN 991, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 99.9%
Removing 1 rows with error results and 992 rows with NaNs (typically hz different) from a     total of 1521 entries, resulting in 529 entries.
Found 329 sibs and 37742 nonsibs, weights: 0.9914 and 0.0086, #weights: 38071
Found 38 sibs and 491 nonsibs, weights: 0.9282 and 0.0718, #weights: 529
Correct: 38071, incorrect 0, TP 329, FP 0, TN 37742, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 529, incorrect 0, TP 38, FP 0, TN 491, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 1
Generated 108570 non-sibling candidates from 330 siblings.
Generated 1482 non-sibling candidates from 39 siblings.
Correct: 70452, incorrect 1, TP 0, FP 0, TN 70452, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 10 rows with error results and 70453 rows with NaNs (typically hz different) from a     total of 108900 entries, resulting in 38447 entries.
Correct: 1034, incorrect 1, TP 0, FP 0, TN 1034, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 99.9%
Removing 77 rows with error results and 1035 rows with NaNs (typically hz different) from a     total of 1521 entries, resulting in 486 entries.
Found 329 sibs and 38118 nonsibs, weights: 0.9914 and 0.0086, #weights: 38447
Found 38 sibs and 448 nonsibs, weights: 0.9218 and 0.0782, #weights: 486
Correct: 38447, incorrect 0, TP 329, FP 0, TN 38118, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 486, incorrect 0, TP 38, FP 0, TN 448, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 2
Generated 109230 non-sibling candidates from 331 siblings.
Generated 1406 non-sibling candidates from 38 siblings.
Correct: 71525, incorrect 2, TP 0, FP 0, TN 71525, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 669 rows with error results and 71527 rows with NaNs (typically hz different) from a     total of 109561 entries, resulting in 38034 entries.
Correct: 898, incorrect 0, TP 0, FP 0, TN 898, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 898 rows with NaNs (typically hz different) from a     total of 1444 entries, resulting in 546 entries.
Found 329 sibs and 37705 nonsibs, weights: 0.9913 and 0.0087, #weights: 38034
Found 38 sibs and 508 nonsibs, weights: 0.9304 and 0.0696, #weights: 546
Correct: 38034, incorrect 0, TP 329, FP 0, TN 37705, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 546, incorrect 0, TP 38, FP 0, TN 508, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
/usr/local/lib/python3.5/dist-packages/ipykernel/__main__.py:173: RuntimeWarning: invalid value encountered in long_scalars
Round 3
Generated 109892 non-sibling candidates from 332 siblings.
Generated 1332 non-sibling candidates from 37 siblings.
Correct: 71593, incorrect 2, TP 0, FP 0, TN 71593, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 670 rows with error results and 71595 rows with NaNs (typically hz different) from a     total of 110224 entries, resulting in 38629 entries.
Correct: 890, incorrect 0, TP 0, FP 0, TN 890, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 2 rows with error results and 890 rows with NaNs (typically hz different) from a     total of 1369 entries, resulting in 479 entries.
Found 330 sibs and 38299 nonsibs, weights: 0.9915 and 0.0085, #weights: 38629
Found 37 sibs and 442 nonsibs, weights: 0.9228 and 0.0772, #weights: 479
Correct: 38629, incorrect 0, TP 330, FP 0, TN 38299, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 479, incorrect 0, TP 37, FP 0, TN 442, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 4
Generated 109892 non-sibling candidates from 332 siblings.
Generated 1332 non-sibling candidates from 37 siblings.
Correct: 72037, incorrect 2, TP 0, FP 0, TN 72037, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 673 rows with error results and 72039 rows with NaNs (typically hz different) from a     total of 110224 entries, resulting in 38185 entries.
Correct: 852, incorrect 0, TP 0, FP 0, TN 852, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 852 rows with NaNs (typically hz different) from a     total of 1369 entries, resulting in 517 entries.
Found 330 sibs and 37855 nonsibs, weights: 0.9914 and 0.0086, #weights: 38185
Found 37 sibs and 480 nonsibs, weights: 0.9284 and 0.0716, #weights: 517
Correct: 38185, incorrect 0, TP 330, FP 0, TN 37855, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 517, incorrect 0, TP 37, FP 0, TN 480, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 5
Generated 109892 non-sibling candidates from 332 siblings.
Generated 1332 non-sibling candidates from 37 siblings.
Correct: 71539, incorrect 2, TP 0, FP 0, TN 71539, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 674 rows with error results and 71541 rows with NaNs (typically hz different) from a     total of 110224 entries, resulting in 38683 entries.
Correct: 904, incorrect 0, TP 0, FP 0, TN 904, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 904 rows with NaNs (typically hz different) from a     total of 1369 entries, resulting in 465 entries.
Found 330 sibs and 38353 nonsibs, weights: 0.9915 and 0.0085, #weights: 38683
Found 37 sibs and 428 nonsibs, weights: 0.9204 and 0.0796, #weights: 465
Correct: 38683, incorrect 0, TP 330, FP 0, TN 38353, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 463, incorrect 2, TP 37, FP 2, TN 426, FN0, Prec. 94.87, Rec. 100.0, Spec. 99.53, Acc. 99.57%
Round 6
Generated 110556 non-sibling candidates from 333 siblings.
Generated 1260 non-sibling candidates from 36 siblings.
Correct: 72442, incorrect 2, TP 0, FP 0, TN 72442, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 672 rows with error results and 72444 rows with NaNs (typically hz different) from a     total of 110889 entries, resulting in 38445 entries.
Correct: 811, incorrect 0, TP 0, FP 0, TN 811, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 1 rows with error results and 811 rows with NaNs (typically hz different) from a     total of 1296 entries, resulting in 485 entries.
Found 331 sibs and 38114 nonsibs, weights: 0.9914 and 0.0086, #weights: 38445
Found 36 sibs and 449 nonsibs, weights: 0.9258 and 0.0742, #weights: 485
Correct: 38445, incorrect 0, TP 331, FP 0, TN 38114, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 485, incorrect 0, TP 36, FP 0, TN 449, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 7
Generated 110556 non-sibling candidates from 333 siblings.
Generated 1260 non-sibling candidates from 36 siblings.
Correct: 71653, incorrect 2, TP 0, FP 0, TN 71653, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 676 rows with error results and 71655 rows with NaNs (typically hz different) from a     total of 110889 entries, resulting in 39234 entries.
Correct: 890, incorrect 0, TP 0, FP 0, TN 890, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 890 rows with NaNs (typically hz different) from a     total of 1296 entries, resulting in 406 entries.
Found 331 sibs and 38903 nonsibs, weights: 0.9916 and 0.0084, #weights: 39234
Found 36 sibs and 370 nonsibs, weights: 0.9113 and 0.0887, #weights: 406
Correct: 39234, incorrect 0, TP 331, FP 0, TN 38903, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 406, incorrect 0, TP 36, FP 0, TN 370, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 8
Generated 111222 non-sibling candidates from 334 siblings.
Generated 1190 non-sibling candidates from 35 siblings.
Correct: 72854, incorrect 2, TP 0, FP 0, TN 72854, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 678 rows with error results and 72856 rows with NaNs (typically hz different) from a     total of 111556 entries, resulting in 38700 entries.
Correct: 768, incorrect 0, TP 0, FP 0, TN 768, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 768 rows with NaNs (typically hz different) from a     total of 1225 entries, resulting in 457 entries.
Found 332 sibs and 38368 nonsibs, weights: 0.9914 and 0.0086, #weights: 38700
Found 35 sibs and 422 nonsibs, weights: 0.9234 and 0.0766, #weights: 457
Correct: 38700, incorrect 0, TP 332, FP 0, TN 38368, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 457, incorrect 0, TP 35, FP 0, TN 422, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Round 9
Generated 111222 non-sibling candidates from 334 siblings.
Generated 1190 non-sibling candidates from 35 siblings.
Correct: 72707, incorrect 2, TP 0, FP 0, TN 72707, FN2, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 676 rows with error results and 72709 rows with NaNs (typically hz different) from a     total of 111556 entries, resulting in 38847 entries.
Correct: 784, incorrect 0, TP 0, FP 0, TN 784, FN0, Prec. nan, Rec. nan, Spec. 100.0, Acc. 100.0%
Removing 0 rows with error results and 784 rows with NaNs (typically hz different) from a     total of 1225 entries, resulting in 441 entries.
Found 332 sibs and 38515 nonsibs, weights: 0.9915 and 0.0085, #weights: 38847
Found 35 sibs and 406 nonsibs, weights: 0.9206 and 0.0794, #weights: 441
Correct: 38847, incorrect 0, TP 332, FP 0, TN 38515, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%
Correct: 441, incorrect 0, TP 35, FP 0, TN 406, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%

In [619]:
tre


Out[619]:
array([[ 100.,  100.,  100.,  100.],
       [ 100.,  100.,  100.,  100.],
       [ 100.,  100.,  100.,  100.],
       [ 100.,  100.,  100.,  100.],
       [ 100.,  100.,  100.,  100.],
       [ 100.,  100.,  100.,  100.],
       [ 100.,  100.,  100.,  100.],
       [ 100.,  100.,  100.,  100.],
       [ 100.,  100.,  100.,  100.],
       [ 100.,  100.,  100.,  100.]])

In [620]:
tee


Out[620]:
array([[ 100.  ,  100.  ,  100.  ,  100.  ],
       [ 100.  ,  100.  ,  100.  ,  100.  ],
       [ 100.  ,  100.  ,  100.  ,  100.  ],
       [ 100.  ,  100.  ,  100.  ,  100.  ],
       [ 100.  ,  100.  ,  100.  ,  100.  ],
       [  94.87,  100.  ,   99.53,   99.57],
       [ 100.  ,  100.  ,  100.  ,  100.  ],
       [ 100.  ,  100.  ,  100.  ,  100.  ],
       [ 100.  ,  100.  ,  100.  ,  100.  ],
       [ 100.  ,  100.  ,  100.  ,  100.  ]])

In [683]:
Image(graph.create_png())


Out[683]:

In [608]:
for i in graphs:
    Image(i.create_png())

In [615]:
Image(graphs[5].create_png())


Out[615]:

In [598]:
train_lbl.as_matrix()


Out[598]:
array([ 1.,  1.,  1., ...,  0.,  0.,  0.])

Old and unused approaches

ML test with shuffled, pseudo-stratified group sampling

Strategy:

  1. take siblings from each group and generate 10 ShuffleSplit iterations
  2. create matching nonsibs and mix all
  3. adjust weights

In [407]:
#groups = assign_groups(sib)
#from sklearn.model_selection import ShuffleSplit # shufflesplit does not generate non-overlapping splits!
#from sklearn.model_selection import KFold # non-overlapping!

In [408]:
def split_stratified_groups(sib, splits, nr):
    from sklearn.model_selection import KFold # non-overlapping!
    groups = assign_groups(sib)
    groupset = set(groups)
    gsibdf_train = pd.DataFrame(columns=sib.columns)
    gsibdf_test = pd.DataFrame(columns=sib.columns)
    for i in groupset:
        groupsib = sib[sib["group"] == i].copy()
        print("## GROUP: {} with {} elements.".format(i, len(groupsib)))
        ks = KFold(n_splits=splits, random_state=42, shuffle=True)
        labels, features = make_labels_features(groupsib)
        ctr = 0
        for train_index, test_index in ks.split(groupsib):
            ctr += 1
            if (ctr == nr):
            #print("TRAIN:", train_index, "TEST:", test_index)
                gsibdf_train = gsibdf_train.append(groupsib.iloc[train_index])
                gsibdf_test = gsibdf_test.append(groupsib.iloc[test_index])
                break
    return [gsibdf_train, gsibdf_test]

In [409]:
train_sib, test_sib = split_stratified_groups(sib, 10, 1)


## GROUP: RAv1 with 12 elements.
## GROUP: RAv2 with 203 elements.
## GROUP: nlnog with 136 elements.
## GROUP: servers with 18 elements.

In [410]:
train_nonsib = match_nonsibs(train_sib, nonsib)
test_nonsib = match_nonsibs(test_sib, nonsib)


Generated 108570 non-sibling candidates from 330 siblings.
Generated 1482 non-sibling candidates from 39 siblings.

In [531]:
train = mix_sib_nonsib(train_sib,train_nonsib, "all")
train, train_prune_lbl, train_prune_prd = prune_data_for_ml(train)
test = mix_sib_nonsib(test_sib,test_nonsib, "all")
test, test_prune_lbl, test_prune_prd = prune_data_for_ml(test)


Correct: 70828, incorrect 1, TP 0, FP 0, TN 70828, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 100.0%
Removing 667 rows with error results and 70829 rows with NaNs (typically hz different) from a     total of 108900 entries, resulting in 38071 entries.
Correct: 991, incorrect 1, TP 0, FP 0, TN 991, FN1, Prec. nan, Rec. 0.0, Spec. 100.0, Acc. 99.9%
Removing 1 rows with error results and 992 rows with NaNs (typically hz different) from a     total of 1521 entries, resulting in 529 entries.
/usr/local/lib/python3.5/dist-packages/ipykernel/__main__.py:172: RuntimeWarning: invalid value encountered in long_scalars

In [532]:
train_lbl, train_ftr = make_labels_features(train)
test_lbl, test_ftr = make_labels_features(test)

In [533]:
train_weight = get_sample_weight_one_input(train)
test_weight = get_sample_weight_one_input(test)


Found 329 sibs and 37742 nonsibs, weights: 0.9914 and 0.0086, #weights: 38071
Found 38 sibs and 491 nonsibs, weights: 0.9282 and 0.0718, #weights: 529

In [549]:
def dt_train(labels, features, weight, rs=42):
    estimator = DecisionTreeClassifier(max_depth=30, min_samples_leaf=5, random_state=42)
    est = estimator.fit(features, labels, sample_weight=weight)
    return est

In [550]:
est = dt_train(train_lbl, train_ftr, train_weight)

In [551]:
## WIP TODO now build scoring function

In [552]:
prd = est.predict(train_ftr)

In [553]:
print("score: {}".format(est.score(train_ftr, train_lbl, sample_weight=train_weight)))


score: 1.0

In [554]:
stats(train_lbl, prd)


Correct: 38071, incorrect 0, TP 329, FP 0, TN 37742, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%

In [555]:
stats(test_lbl, est.predict(test_ftr))


Correct: 529, incorrect 0, TP 38, FP 0, TN 491, FN0, Prec. 100.0, Rec. 100.0, Spec. 100.0, Acc. 100.0%

In [547]:
def dt_plot(estimator, features):
    from sklearn import tree
    dot_data = tree.export_graphviz(estimator, out_file=None, 
                         feature_names=list(features.columns.values),  
                         class_names=["non-sibling", "sibling"],  
                         filled=True, rounded=True,  
                         special_characters=True) 
    import pydotplus
    graph = pydotplus.graph_from_dot_data(dot_data) 
    
    return graph
    #print("score: {}".format(est.score(features_test, labels_test, sample_weight=weights_test)))
    # return prd, lbl, graph

In [556]:
graph = dt_plot(est, train_ftr)
Image(graph.create_png())


Out[556]:

In [ ]:


In [ ]: