In [1]:
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
pd.set_option('display.mpl_style', 'default') # Make the graphs a bit prettier
plt.rcParams['figure.figsize'] = (15, 5)
In [2]:
raw_data = pd.read_csv('../data/2014_BAB_S.txt', sep=";")
In [14]:
data = raw_data
data[['Strklas', 'Strnum', 'Zst']] = data[['Strklas', 'Strnum', 'Zst']].astype(str)
data["Str"] = data['Strklas']+data['Strnum']
In [5]:
def get_data_slice( data=data,
zst_set=False,
mask=False,
date=False):
# check if one or more zsts are given
if zst_set:
if type(zst_set) is int:
zst_set = set([zst_set])
data = data[data["Zst"].isin(zst_set)]
# check if one or more dates are given
if date:
if type(date) is int:
data = data[data["Datum"]==date]
else:
data = data[data["Datum"].isin(date)]
# check if you a mask for columns is given
if mask:
return data[mask]
else:
return data
def get_hourly_normalized_lkw_numbers(date, hour, zst, direction=False):
df = get_data_slice(date=date)
df = df[df["Stunde"] == hour]
d = df.set_index("Zst")
# sum
if not direction:
d["result"] = d[["Lkw_R1", "Lkw_R2"]].sum(axis=1)
else:
d["result"] = d.loc[:, ["Lkw_"+direction]]
r = d["result"]
r_max = r.max()
r_min = r.min()
#normalize
n = (r-r_min)/(r_max-r_min)
return n[zst]
# usage: date, hour, zst, [direction]
print get_hourly_normalized_lkw_numbers( 140315, 2, "9008" ) # aggregation
print get_hourly_normalized_lkw_numbers( 140315, 2, "9008", "R1")
print get_hourly_normalized_lkw_numbers( 140315, 2, "9008", "R2")
In [154]:
zsts_dict = {
"71: TH - Werntal": "9167",
"7: HE - Schweinfurt": "9152",
"70: Werntal - Schweinfurt": "9203",
"7: Schweinfurt - Biebelried": "9020",
"3: Seligenstädt - Würzburg-West": "9510",
"7: Biebelried - Feuchtwangen": "9534",
"7: Feuchtwangen - BW": "9568",
"6: Feuchtwangen - Nürnberg Süd": "9525",
"3: Biebelried - Fürth": "9041",
"70: Bamberg - Werntal": "9545",
"73: TH - Bamberg": "9306",
"73: Bamberg - Fürth": "9585",
"70: Bayreuth - Bamberg": "9068",
"9: SN - Vogtland": "9903",
"72: SN - Vogtland": "9094",
"9: Vogtland - Bayreuth": "9620",
"93: Hochfranken - Oberpfälzer Wald": "9099",
"9: Bayreuth - Nürnberg": "9536",
"6: Oberpfälzer Wald - CZ": "9008",
"6: Nürnberg Ost - Altdorf": "9018",
"73: Fürth - Nürnberg": "9111",
"3: Fürth - Nürnberg": "9520",
"3: Altdorf - Regensburg": "9060",
"9: Nürnberg - Nürnberg-Ost": "9040",
"9: Nürnberg Ost - Nürnberg Feucht": "9754",
"73: Nürnberg-Hafen - Nürnberg-Süd": "9004",
"9: Nürnberg Feucht - Holledau": "9539",
"3: Nürnberg - Altdorf": "9081",
"93: Oberpfälzer Wald - Regensburg": "9902",
"93: Regensburg - Holledau": "9030",
"3: Regensburg - Deggendorf": "9077",
"92: Deggendorf - Neufahrn": "9017",
"Holledau - Neufarn": "9058",
"3: Deggendorf - AT": "9628",
"94: Heldenstein - Burghausen": "9984",
"8: Inntal - AT": "9028",
"8: München-Süd - Inntal": "9171",
"93: Inntal - AT": "9629",
"95: Starnberg - Eschenlohe": "9161",
"96: München-Süd-West - Memmingen": "9136",
"8: BW - Eschenried": "9965",
"7: Ulm - Allgäu": "9016",
"94: München-Ost - Pastetten": "9213"
}
print "zst_json = {"
for zst in zsts_dict.keys():
print ' "'+zst+'": "'+zsts_dict[zst]+'"'
print "};"
In [155]:
# usage for zsts at 2pm
print "zst_usage ={"
for route in zsts_dict.keys():
zst = zsts_dict[route]
usage_percent = get_hourly_normalized_lkw_numbers(140315, 14, zst)
print ' "'+route+'": "'+str(usage_percent)+'"'
print "};"
In [157]:
kurz = "9627"
lang = "9629"
print "kurz", get_hourly_normalized_lkw_numbers(140315, 14, kurz)
print "lang", get_hourly_normalized_lkw_numbers(140315, 14, lang)
In [159]:
df = get_data_slice(date=140315)
df = df[df["Stunde"] == 14]
d = df.set_index("Zst")
direction =False
# sum
if not direction:
d["result"] = d[["Lkw_R1", "Lkw_R2"]].sum(axis=1)
else:
d["result"] = d.loc[:, ["Lkw_"+direction]]
r = d["result"]
r_max = r.max()
r_min = r.min()
print 'max', r_max
print 'min', r_min
In [ ]: