This notebook is a work in progress, but should give you an idea how to analyze TCP options in iPython notebooks.
To analyze a QoF-generated IPFIX file for presence of TCP options, set the filename in the following code block, then run it:
In [ ]:
ipfix_file = "../test/tsdag-nonat-191800.ipfix"
Run the following code to initialize required modules and define some useful functions:
In [ ]:
import ipfix
import qof
import pandas as pd
ipfix.ie.use_iana_default() # loads IANA default IEs from module core definitions
ipfix.ie.use_5103_default() # loads reverse IEs for RFC5103 biflows
ipfix.ie.use_specfile("qof.iespec") # loads enterprise-specific IEs for QoF
ipfix.types.use_integer_ipv4() # accelerate dataframe processing of per-IP stuff
def characteristic_present(df, flag):
return ((df["qofTcpCharacteristics"] & flag) == flag) | \
((df["reverseQofTcpCharacteristics"] & flag) == flag)
def ecn_negotiated(df):
ecn_syn = qof.TCP_SYN | qof.TCP_ECE | qof.TCP_CWR
ecn_ack = qof.TCP_SYN | qof.TCP_ACK | qof.TCP_ECE
ecn_mask = ecn_syn | ecn_ack
return ((df["initialTCPFlags"] & ecn_mask) == ecn_syn) & \
((df["reverseInitialTCPFlags"] & ecn_ack) == ecn_ack)
def print_proportion(feature, feature_s):
try:
print ("%-10s observed on %8u flows (%8.5f%%)" %
(feature,
feature_s.value_counts()[True],
feature_s.value_counts()[True] * 100 / len(feature_s)))
return feature_s.value_counts()[True]
except:
print ("%-10s not observed" % feature)
def ip4_sources(df):
return pd.concat((df['sourceIPv4Address'], df['destinationIPv4Address']), ignore_index=1).unique()
def ip4_sources_given(df, fcond_s, rcond_s):
csrc_fwd = df[fcond_s]["sourceIPv4Address"]
csrc_rev = df[rcond_s]["destinationIPv4Address"]
return pd.concat((csrc_fwd, csrc_rev), ignore_index=1).unique()
def ip4_sources_characteristic(df, flag):
return ip4_sources_given(df, (df["qofTcpCharacteristics"] & flag) == flag,
(df["reverseQofTcpCharacteristics"] & flag) == flag)
and run this to load the file into the dataframe:
In [ ]:
df = qof.dataframe_from_ipfix(ipfix_file, ("initialTCPFlags","reverseInitialTCPFlags",
"unionTCPFlags","reverseUnionTCPFlags",
"qofTcpCharacteristics", "reverseQofTcpCharacteristics",
"tcpSequenceLossCount", "reverseTcpSequenceLossCount",
"packetDeltaCount", "reversePacketDeltaCount",
"sourceIPv4Address", "destinationIPv4Address"))
Drop all incomplete flows (i.e., that don't have SYN set on the first packet) and flows with observation loss
In [ ]:
print ("Total flows: "+str(len(df)))
df = qof.drop_lossy(df)
print (" of which lossless: "+str(len(df)))
df = qof.drop_incomplete(df)
print (" of which complete: "+str(len(df)))
The following snippet reports on ECN usage per flow:
In [ ]:
ecn_nego_s = ecn_negotiated(df)
ecn_ect0_s = characteristic_present(df, qof.QOF_ECT0)
ecn_ect1_s = characteristic_present(df, qof.QOF_ECT1)
ecn_ce_s = characteristic_present(df, qof.QOF_CE)
print_proportion("ECN nego", ecn_nego_s)
print_proportion("ECT0", ecn_ect0_s)
print_proportion("ECT1", ecn_ect1_s)
print_proportion("nego->ECT0", ecn_nego_s & ecn_ect0_s)
print_proportion("nego->ECT1", ecn_nego_s & ecn_ect1_s)
print_proportion("CE", ecn_ce_s)
print()
print_proportion("ECT0+ECT1", ecn_ect0_s & ecn_ect1_s)
print_proportion("ECT0+CE", ecn_ce_s & ecn_ect0_s)
print_proportion("ECT1+CE", ecn_ce_s & ecn_ect1_s)
print_proportion("all ECx", ecn_ce_s & ecn_ect0_s & ecn_ect1_s)
print()
The following snippet reports on other TCP options per flow:
In [ ]:
tcp_ws_s = characteristic_present(df, qof.QOF_WS)
tcp_ts_s = characteristic_present(df, qof.QOF_TS)
tcp_sack_s = characteristic_present(df, qof.QOF_SACK)
print_proportion("WS", tcp_ws_s)
print_proportion("TS", tcp_ts_s)
print_proportion("SACK", tcp_sack_s)
print()
The following snippet reports on other TCP options per source:
In [ ]:
all_sources = ip4_sources(df)
ws_sources = ip4_sources_characteristic(df, qof.QOF_WS)
print("WS observed from %8u sources (%8.5f%%)" % (len(ws_sources), len(ws_sources) * 100 / len(all_sources)))
ts_sources = ip4_sources_characteristic(df, qof.QOF_TS)
print("TS observed from %8u sources (%8.5f%%)" % (len(ts_sources), len(ts_sources) * 100 / len(all_sources)))
sack_sources = ip4_sources_characteristic(df, qof.QOF_SACK)
print("SACK observed from %8u sources (%8.5f%%)" % (len(sack_sources), len(sack_sources) * 100 / len(all_sources)))
In [ ]:
qof.derive_flag_strings(df)
In [ ]:
qof.derive_tcpchar_strings(df)
In [ ]:
negotiated["reversePacketDeltaCount"].hist(bins=100)
In [ ]: