To analyze a QoF-generated IPFIX file for timestamp frequencies, set the filename in the following code block, then run it:
In [ ]:
    
ipfix_file = "../test/mawi-0330-3hours.ipfix"
    
Run the following code to initialize required modules and define some useful functions:
In [ ]:
    
import ipfix
import qof
import pandas as pd
import numpy as np
ipfix.ie.use_iana_default() # loads IANA default IEs from module core definitions
ipfix.ie.use_5103_default() # loads reverse IEs for RFC5103 biflows
ipfix.ie.use_specfile("qof.iespec") # loads enterprise-specific IEs for QoF
ipfix.types.use_integer_ipv4() # accelerate dataframe processing of per-IP stuff
    
and run this to load the file into the dataframe (for biflow data):
In [ ]:
    
df = qof.dataframe_from_ipfix(ipfix_file, ("tcpTimestampFrequency", "reverseTcpTimestampFrequency",
                                           "meanTcpChirpMilliseconds", "reverseMeanTcpChirpMilliseconds",
                                           "packetDeltaCount", "reversePacketDeltaCount",
                                           "tcpSequenceLossCount", "reverseTcpSequenceLossCount"))
    
or this one (for uniflow data):
In [ ]:
    
df = qof.dataframe_from_ipfix(ipfix_file, ("tcpTimestampFrequency",
                                           "meanTcpChirpMilliseconds",
                                           "packetDeltaCount",
                                           "tcpSequenceLossCount",))
    
Classify timestamp distribution by probable frequency:
In [ ]:
    
df["ts10"] = (df["tcpTimestampFrequency"] >= 8) & (df["tcpTimestampFrequency"] <= 12)
df["ts100"] = (df["tcpTimestampFrequency"] >= 80) & (df["tcpTimestampFrequency"] <= 120)
df["ts250"] = (df["tcpTimestampFrequency"] >= 200) & (df["tcpTimestampFrequency"] <= 300)
df["ts1000"] = (df["tcpTimestampFrequency"] >= 800) & (df["tcpTimestampFrequency"] <= 1200)
df["tsvalid"] = df["ts10"] | df["ts100"] | df["ts250"] | df["ts1000"]
df["tserror"] = np.where(df["ts10"] , abs(10 - df["tcpTimestampFrequency"]) / 10, 
                 np.where(df["ts100"], abs(100 - df["tcpTimestampFrequency"]) / 100, 
                 np.where(df["ts250"], abs(250 - df["tcpTimestampFrequency"]) / 250,
                 np.where(df["ts1000"], abs(1000 - df["tcpTimestampFrequency"]) / 1000, 0))))
    
(and the same thing for the reverse):
In [ ]:
    
df["rts10"] = (df["tcpTimestampFrequency"] >= 8) & (df["reverseTcpTimestampFrequency"] <= 12)
df["rts100"] = (df["tcpTimestampFrequency"] >= 80) & (df["reverseTcpTimestampFrequency"] <= 120)
df["rts250"] = (df["tcpTimestampFrequency"] >= 200) & (df["reverseTcpTimestampFrequency"] <= 300)
df["rts1000"] = (df["tcpTimestampFrequency"] >= 800) & (df["reverseTcpTimestampFrequency"] <= 1200)
df["rtsvalid"] = df["rts10"] | df["rts100"] | df["rts250"] | df["rts1000"]
df["rtserror"] = np.where(df["rts10"] , abs(10 - df["reverseTcpTimestampFrequency"]) / 10, 
                 np.where(df["rts100"], abs(100 - df["reverseTcpTimestampFrequency"]) / 100, 
                 np.where(df["rts250"], abs(250 - df["reverseTcpTimestampFrequency"]) / 250,
                 np.where(df["rts1000"], abs(1000 - df["reverseTcpTimestampFrequency"]) / 1000, 0))))
    
A graphical display of the timestamp frequency spectrum:
In [ ]:
    
df["tcpTimestampFrequency"].hist(bins=150,range=(900,1050),weights=df["packetDeltaCount"])
    
and the error spectrum:
In [ ]:
    
df[df["tsvalid"]]["tserror"].hist(bins=100,weights=df[df["tsvalid"]]["packetDeltaCount"])
    
In [ ]:
    
df[df["tsvalid"]]["tserror"].describe()
    
with proportion of how many flows are valid:
In [ ]:
    
df["tsvalid"].value_counts()[True]/len(df)
    
In [ ]: