In [135]:
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
pd.set_option('display.mpl_style', 'default') # Make the graphs a bit prettier
plt.rcParams['figure.figsize'] = (15, 5)
In [15]:
raw_data = pd.read_csv('../data/2014_BAB_S.txt', sep=";")
In [20]:
data = raw_data
data[['Strklas', 'Strnum']] = data[['Strklas', 'Strnum']].astype(str)
data["Str"] = data['Strklas']+data['Strnum']
In [21]:
data[0:1]
Out[21]:
In [27]:
# constraint: also achsendaten at Zst
zsts_num = [9627, 9629]
zsts = pd.DataFrame({
'zst' : pd.Series(zsts_num),
'Strklas' : pd.Series(['A', 'A']),
'Strnum' : pd.Series([3, 93]),
'Ort' : pd.Series(['Schlüsselfeld', 'Fischbach am Inn']),
'Route' : pd.Series(['kurz', 'lang'])
})
zsts
Out[27]:
In [145]:
engins_birthday = 140221
standard_mask = ["Zst", "Str", "Datum", "Stunde", "Lkw_R1", "Lkw_R2"]
def slice_between( low, high, data=data):
a = data[data["Datum"] >= low]
b = a[a["Datum"] < high ]
return b
def get_data_slice( data=data,
zst_set=set(zsts_num),
mask=False,
date=False):
if type(zst_set) is int:
zst_set = set([zst_set])
data = data[data["Zst"].isin(zst_set)]
if date:
if type(date) is int:
data = data[data["Datum"]==date]
else:
data = data[data["Datum"].isin(date)]
if mask:
return data[mask]
else:
return data
In [218]:
feb = slice_between(140201,140301)
mar = slice_between(140301,140401)
apr = slice_between(140401,140501)
In [254]:
mask = ["Wotag","Stunde", "Lkw_R1", "Lkw_R2"]
agg_kurz = get_data_slice(zst_set=9627, mask=mask, data=mar )
agg_lang = get_data_slice(zst_set=9629, mask=mask, data=mar )
days = ["Montag", "Dienstag", "Mittwoch", "Donnerstag", "Freitag", "Samstag", "Sonntag"]
for i in range(len(days)):
day_kurz = agg_kurz[ agg_kurz["Wotag"] == i+1 ].groupby("Stunde").median()[["Lkw_R1", "Lkw_R2"]]
day_lang = agg_lang[ agg_lang["Wotag"] == i+1 ].groupby("Stunde").median()[["Lkw_R1", "Lkw_R2"]]
day = pd.concat( [ day_kurz["Lkw_R1"], day_kurz["Lkw_R2"], day_lang["Lkw_R1"], day_lang["Lkw_R2"]], axis=1)
day.columns = ["A3 R1", "A3 R2", "A93 R1", "A93 R2"]
fig = day[["A3 R1","A93 R1"]].plot(title=days[i]+" R1" ).get_figure()
fig.savefig("vm_maerz_tag"+str(i)+"s_lkws_pro_stunde_R1.png")
# day[["kurz R2","lang R2"]].plot(title=days[i]+" R2" ) # there is not interesting switch here
In [255]:
# aggregate: sum lkws in one directions per day
mask = ["Datum","Lkw_R1", "Lkw_R2"]
agg_kurz = get_data_slice(zst_set=9627, mask=mask ).groupby("Datum").sum()
print agg_kurz.ix[engins_birthday]
agg_lang = get_data_slice(zst_set=9629, mask=mask ).groupby("Datum").sum()
print agg_lang.ix[engins_birthday]
r_both = pd.concat( [ agg_kurz["Lkw_R1"], agg_kurz["Lkw_R2"], agg_lang["Lkw_R1"], agg_lang["Lkw_R2"]], axis=1)
r_both.columns = [ "kurz R1", "kurz R2", "lang R1", "lang R2" ]
r_both["Datum"] = r_both.index
mask = [ "kurz R1", "kurz R2", "lang R1", "lang R2" ]
df = slice_between(140301,140401, r_both )[mask]
df.columns = ["A3 R1", "A3 R2", "A93 R1", "A93 R2"]
fig = df.plot(title="Maerz").get_figure()
fig.savefig('vm_maerz_lkws_pro_tag.png')
In [253]:
# 15.03.2014 (Samstag) LKWs pro Stunde
mask = ["Stunde", "Lkw_R1"]
kurz = get_data_slice(zst_set=9627, mask=mask, data=mar, date=140315 ).set_index("Stunde")
lang = get_data_slice(zst_set=9629, mask=mask, data=mar, date=140315 ).set_index("Stunde")
both = pd.concat([kurz["Lkw_R1"], lang["Lkw_R1"]], axis=1)
both.columns = ['A3', 'A93']
both.plot(title="15.03.14")
Out[253]:
In [336]:
# ferien
month = get_data_slice(zst_set=set([9627,9629]), data=mar )
m_sum = month[["Fahrtzw", "KFZ_R1", "KFZ_R2","Lkw_R1", "Lkw_R2"]].groupby("Fahrtzw").sum()
m_size = month[["Fahrtzw", "KFZ_R1", "KFZ_R2","Lkw_R1", "Lkw_R2"]].groupby("Fahrtzw").size()
ferien = (m_sum.T / m_size).T
ferien.rename(index={"u": 'Urlaub', "s": "Feiertag", "w": "Werktag"})
ferien.plot(kind='barh')
Out[336]:
In [ ]: