In [2]:
import pandas as pd
capadf = pd.read_csv("data/bifcapab.csv",header=None,sep="^")
# Scala Code
"""p.airlineCode,
p.flightNumber.toString,
p.departureDate.toString,
p.cabinCode,
p.daysToDeparture.toString,
"%.2f".format(p.loadFactor),
p.capacity.toString"""
capadf.columns=["airline","flightNumber","departureDate","cabinCode","daysToDeparture","loadFactor","capacity"]
capadf["loadFactor"]=capadf["loadFactor"].astype(float)
capadf["daysToDeparture"]=capadf["daysToDeparture"].astype(int)
capadf["flightNumber"]=capadf["flightNumber"].astype(str)
In [3]:
import numpy as np
# We need to extract for each combination of flightnumber departureDate the max loadfactor
def extractCurve(df):
df = df.groupby("daysToDeparture").first().reset_index().sort_values(by="daysToDeparture")
df.index=df["daysToDeparture"]
size=len(df)
df=df.reindex(range(0,365))
curve = list(reversed(df["loadFactor"].fillna(0).values.tolist()))
if size>120:
return curve
else:
return np.nan
lpf = capadf.groupby(["flightNumber","departureDate"])\
.apply(extractCurve)
lpf = lpf.reset_index()
lpf = lpf.dropna()
lpf["title"]=lpf.apply(lambda row:row["departureDate"]+":"+row["flightNumber"],axis=1)
lpf["values"]=lpf[0]
lpf=lpf.sort_values(by="departureDate")
In [4]:
data = lpf[["values","title"]].to_dict(orient="records")
In [5]:
import jupyterviz as jz
In [6]:
len(data)
Out[6]:
In [7]:
jz.viz(type="horizon",data=data)
Out[7]:
In [8]:
jz.viz(type="sparkline",data=data)
Out[8]:
In [ ]: