In [2]:
import pandas as pd
capadf = pd.read_csv("data/bifcapab.csv",header=None,sep="^")
# Scala Code
"""p.airlineCode,
p.flightNumber.toString,
p.departureDate.toString,
p.cabinCode,
p.daysToDeparture.toString,
"%.2f".format(p.loadFactor),
p.capacity.toString"""

capadf.columns=["airline","flightNumber","departureDate","cabinCode","daysToDeparture","loadFactor","capacity"]
capadf["loadFactor"]=capadf["loadFactor"].astype(float)
capadf["daysToDeparture"]=capadf["daysToDeparture"].astype(int)
capadf["flightNumber"]=capadf["flightNumber"].astype(str)

In [3]:
import numpy as np
# We need to extract for each combination of flightnumber departureDate the max loadfactor
def extractCurve(df):
    df = df.groupby("daysToDeparture").first().reset_index().sort_values(by="daysToDeparture")
    df.index=df["daysToDeparture"]
    size=len(df)
    df=df.reindex(range(0,365))
    curve = list(reversed(df["loadFactor"].fillna(0).values.tolist()))
    if size>120:
        return curve
    else:
        return np.nan

lpf = capadf.groupby(["flightNumber","departureDate"])\
    .apply(extractCurve)
lpf = lpf.reset_index()
lpf = lpf.dropna()
lpf["title"]=lpf.apply(lambda row:row["departureDate"]+":"+row["flightNumber"],axis=1)
lpf["values"]=lpf[0]
lpf=lpf.sort_values(by="departureDate")

In [4]:
data = lpf[["values","title"]].to_dict(orient="records")

In [5]:
import jupyterviz as jz

In [6]:
len(data)


Out[6]:
990

In [7]:
jz.viz(type="horizon",data=data)


Out[7]:

In [8]:
jz.viz(type="sparkline",data=data)


Out[8]:

In [ ]: