In [ ]:
ipfix_file = "../test/auckland-midday.ipfix.bz2"

In [ ]:
import ipfix
import qof
import bz2
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np

ipfix.ie.use_iana_default() # loads IANA default IEs from module core definitions
ipfix.ie.use_5103_default() # loads reverse IEs for RFC5103 biflows
ipfix.ie.use_specfile("qof.iespec") # loads enterprise-specific IEs for QoF

ipfix.types.use_integer_ipv4() # accelerate dataframe processing of per-IP stuff

In [ ]:
def plot_rate_duration(df, filename=None):
    plt.figure(figsize=(5,4))
    plt.hexbin(x = df["durationSeconds"],
           y = ((df["octetDeltaCount"] + df["reverseOctetDeltaCount"])) * 8 / df["durationSeconds"], 
           C = df["packetDeltaCount"] + df["reversePacketDeltaCount"],
           reduce_C_function = np.sum,
           yscale='log',
           bins='log',
           cmap = plt.cm.binary)
    cb = plt.colorbar()
    cb.set_label("log10(packets)")
    plt.xlabel("duration (s)")
    plt.ylabel("data rate (bps)")
    if filename:
        plt.savefig(filename)

In [ ]:
with bz2.open(ipfix_file) as stream:
    df = qof.dataframe_from_ipfix_stream(
        stream, ("flowStartMilliseconds", "flowEndMilliseconds",
             "packetDeltaCount", "reversePacketDeltaCount", 
             "transportPacketDeltaCount", "reverseTransportPacketDeltaCount",
             "octetDeltaCount", "reverseOctetDeltaCount",
             "transportOctetDeltaCount", "reverseTransportOctetDeltaCount",
             "tcpSequenceCount", "reverseTcpSequenceCount",
             "tcpSequenceLossCount", "reverseTcpSequenceLossCount"))
qof.coerce_timestamps(df)
qof.derive_duration(df)

In [ ]:
lossy = (df["tcpSequenceLossCount"] > 0) | (df["reverseTcpSequenceLossCount"] > 0)
lossydf = df[lossy]
notlossydf = df[~ lossy]

In [ ]:
plt.figure(figsize=(9,8))
plt.hexbin(x = df["durationSeconds"],
           y = df["octetDeltaCount"] + df["reverseOctetDeltaCount"], 
           C = df["packetDeltaCount"] + df["reversePacketDeltaCount"],
           reduce_C_function = np.sum,
           yscale='log', bins='log'),
cb = plt.colorbar()
cb.set_label("log10(packets)")

In [ ]:
plot_rate_duration(lossydf, "auckland_lossy_flows.pdf")

In [ ]:
plot_rate_duration(notlossydf, "auckland_not_lossy_flows.pdf")

In [ ]: