This notebook is a work in progress, but should give you an idea how to analyze TCP options in iPython notebooks.

To analyze a QoF-generated IPFIX file for presence of TCP options, set the filename in the following code block, then run it:


In [ ]:
ipfix_file = "../test/tsdag-nonat-191800.ipfix"

Run the following code to initialize required modules and define some useful functions:


In [ ]:
import ipfix
import qof
import pandas as pd

ipfix.ie.use_iana_default() # loads IANA default IEs from module core definitions
ipfix.ie.use_5103_default() # loads reverse IEs for RFC5103 biflows
ipfix.ie.use_specfile("qof.iespec") # loads enterprise-specific IEs for QoF

ipfix.types.use_integer_ipv4() # accelerate dataframe processing of per-IP stuff

def characteristic_present(df, flag):
    return ((df["qofTcpCharacteristics"] & flag) == flag) | \
           ((df["reverseQofTcpCharacteristics"] & flag) == flag)

def ecn_negotiated(df):
    ecn_syn = qof.TCP_SYN | qof.TCP_ECE | qof.TCP_CWR
    ecn_ack = qof.TCP_SYN | qof.TCP_ACK | qof.TCP_ECE
    ecn_mask = ecn_syn | ecn_ack
    return ((df["initialTCPFlags"] & ecn_mask) == ecn_syn) & \
           ((df["reverseInitialTCPFlags"] & ecn_ack) == ecn_ack)
    
def print_proportion(feature, feature_s):
    try:
        print ("%-10s observed on %8u flows (%8.5f%%)" % 
               (feature,
                feature_s.value_counts()[True],
                feature_s.value_counts()[True] * 100 / len(feature_s)))
        return feature_s.value_counts()[True]
    except:
        print ("%-10s not observed" % feature)

def ip4_sources(df):
    return pd.concat((df['sourceIPv4Address'], df['destinationIPv4Address']), ignore_index=1).unique()

def ip4_sources_given(df, fcond_s, rcond_s):
    csrc_fwd = df[fcond_s]["sourceIPv4Address"]
    csrc_rev = df[rcond_s]["destinationIPv4Address"]
    return pd.concat((csrc_fwd, csrc_rev), ignore_index=1).unique()

def ip4_sources_characteristic(df, flag):
    return ip4_sources_given(df, (df["qofTcpCharacteristics"] & flag) == flag, 
                                 (df["reverseQofTcpCharacteristics"] & flag) == flag)

and run this to load the file into the dataframe:


In [ ]:
df = qof.dataframe_from_ipfix(ipfix_file, ("initialTCPFlags","reverseInitialTCPFlags",
                                           "unionTCPFlags","reverseUnionTCPFlags",
                                           "qofTcpCharacteristics", "reverseQofTcpCharacteristics",
                                           "tcpSequenceLossCount", "reverseTcpSequenceLossCount",
                                           "packetDeltaCount", "reversePacketDeltaCount",
                                           "sourceIPv4Address", "destinationIPv4Address"))

Drop all incomplete flows (i.e., that don't have SYN set on the first packet) and flows with observation loss


In [ ]:
print ("Total flows:         "+str(len(df)))
df = qof.drop_lossy(df)
print ("  of which lossless: "+str(len(df)))
df = qof.drop_incomplete(df)
print ("  of which complete: "+str(len(df)))

The following snippet reports on ECN usage per flow:


In [ ]:
ecn_nego_s = ecn_negotiated(df)    
ecn_ect0_s = characteristic_present(df, qof.QOF_ECT0)
ecn_ect1_s = characteristic_present(df, qof.QOF_ECT1)
ecn_ce_s = characteristic_present(df, qof.QOF_CE)

print_proportion("ECN nego", ecn_nego_s)
print_proportion("ECT0", ecn_ect0_s)
print_proportion("ECT1", ecn_ect1_s)
print_proportion("nego->ECT0", ecn_nego_s & ecn_ect0_s)
print_proportion("nego->ECT1", ecn_nego_s & ecn_ect1_s)
print_proportion("CE", ecn_ce_s)
print()
print_proportion("ECT0+ECT1", ecn_ect0_s & ecn_ect1_s)
print_proportion("ECT0+CE", ecn_ce_s & ecn_ect0_s)
print_proportion("ECT1+CE", ecn_ce_s & ecn_ect1_s)
print_proportion("all ECx", ecn_ce_s & ecn_ect0_s & ecn_ect1_s)
print()

The following snippet reports on other TCP options per flow:


In [ ]:
tcp_ws_s = characteristic_present(df, qof.QOF_WS)
tcp_ts_s = characteristic_present(df, qof.QOF_TS)
tcp_sack_s = characteristic_present(df, qof.QOF_SACK)

print_proportion("WS", tcp_ws_s)
print_proportion("TS", tcp_ts_s)
print_proportion("SACK", tcp_sack_s)
print()

The following snippet reports on other TCP options per source:


In [ ]:
all_sources = ip4_sources(df)
ws_sources = ip4_sources_characteristic(df, qof.QOF_WS)
print("WS   observed from %8u sources (%8.5f%%)" % (len(ws_sources), len(ws_sources) * 100 / len(all_sources)))
ts_sources = ip4_sources_characteristic(df, qof.QOF_TS)
print("TS   observed from %8u sources (%8.5f%%)" % (len(ts_sources), len(ts_sources) * 100 / len(all_sources)))
sack_sources = ip4_sources_characteristic(df, qof.QOF_SACK)
print("SACK observed from %8u sources (%8.5f%%)" % (len(sack_sources), len(sack_sources) * 100 / len(all_sources)))

In [ ]:
qof.derive_flag_strings(df)

In [ ]:
qof.derive_tcpchar_strings(df)

In [ ]:
negotiated["reversePacketDeltaCount"].hist(bins=100)

In [ ]: