In [1]:
%matplotlib inline
import math
import pytz
import traceback
import time
import pandas as pd
import numpy as np
import seaborn as sns
from datetime import datetime
import matplotlib.pyplot as plt
import cPickle as pickle
In [2]:
%run src/data/helper.py
In [3]:
start_time = time.time()
with open('data/parsed/stations_dataset_final.p', 'rb') as f:
stations = pickle.load(f)
with open('data/parsed/readings_clean.p', "rb") as f:
readings = pickle.load(f)
end_time = time.time()
print 'Opening redistribution data took %s' % (end_time - start_time)
In [4]:
split_training = lambda df: df[datetime(2016,5,15,0,0,0,0):datetime(2016,6,12,23,59,59,999999)]
split_validation = lambda df: df[datetime(2016,6,13,0,0,0,0):datetime(2016,6,19,23,59,59,999999)]
split_test = lambda df: df[datetime(2016,5,20,0,0,0,0):datetime(2016,6,26,23,59,59,999999)]
In [5]:
def split_datasets(df, station_id):
station_df = df.loc[station_id]
training = split_training(station_df)
validation = split_validation(station_df)
test = split_test(station_df)
return training, validation, test
In [6]:
import sys
def clip_and_round(arr):
arr = np.clip(arr, 0, sys.maxint)
return np.round(arr)
In [7]:
import inspect
from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.metrics import mean_squared_error
from rpy2 import robjects
from rpy2.robjects import pandas2ri
from rpy2.robjects.packages import importr
from rpy2.robjects import IntVector, Formula
pandas2ri.activate()
r = robjects.r
base = importr('base')
stats = importr('stats')
mgcv = importr('mgcv')
class GAMRegressor(BaseEstimator, RegressorMixin):
last_data = None
def __init__(self, features=None, formula_str=None,
FogTMinus2_parametric=None, RainTMinus2_parametric=None,
DistNbBikes_parametric=None, CollNbBikes_parametric=None,
TempTMinus2AndHumidityTMinus2_sp=None,
NbBikesTMinus2_sp=None, NbBikesTMinus3_sp=None,
NbBikesTMinus12_sp=None, NbBikesTMinus18_sp=None,
TimeOfDay_1_sp=None, TimeOfDay_2_sp=None, TimeOfDay_3_sp=None,
TimeOfDay_1_by=None, TimeOfDay_2_by=None, TimeOfDay_3_by=None):
args, _, _, values = inspect.getargvalues(inspect.currentframe())
values.pop("self")
self.args_to_set = []
for arg, val in values.items():
# save a list of the arguments
if arg != 'features' and arg != 'formula_str':
self.args_to_set.append(arg)
setattr(self, arg, val)
def fit(self, X, y=None):
if self.formula_str is None:
features_dicts = self.build_features_dicts()
self.formula_str = self.build_formula_str(features_dicts)
GAMRegressor.last_data=X
frm = Formula(self.formula_str)
self.gam = mgcv.gam(frm, data=X)
return self
def predict(self, X):
assert (self.gam is not None), "GAM must be set"
p_val = clip_and_round(stats.predict(self.gam, newdata=X))
return p_val
def score(self, X):
p_val = self.predict(X)
y_val = X.NbBikes
rmse = mean_squared_error(y_val, p_val)**0.5
return rmse * (-1)
def build_features_dicts(self):
assert (self.features is not None), "features must be set"
# initialize the dictionaries
features_dicts = {}
for feature in self.features:
features_dicts[feature] = {
'name': feature,
'bs': 'tp',
'sp': None,
'by': None,
'k': None,
'parametric': False
}
# set parameter values
for arg in self.args_to_set:
val = getattr(self, arg)
if val is None:
continue
feature, parameter = arg.rsplit('_',1)
features_dicts[feature][parameter] = val
return features_dicts
def build_formula_str(self, features_dicts):
formula = 'NbBikes ~ '
for feature, feature_dict in features_dicts.iteritems():
if feature_dict['parametric']:
formula += '%(name)s+' % feature_dict
continue
tokens = feature_dict['name'].split('_')
name, index = (tokens[0],None) if len(tokens) == 1 else (tokens[0], tokens[1])
formula += "s(%s" % name.replace('And', ',')
if feature_dict['bs'] is not None:
formula += ", bs='%s'" % feature_dict['bs']
if feature_dict['sp'] is not None:
formula += ", sp=%f" % feature_dict['sp']
if feature_dict['by'] is not None:
formula += ", by=%s" % feature_dict['by']
if feature_dict['k'] is not None:
formula += ", k=%s" % feature_dict['k']
formula += ")+" % feature_dict
return formula[:-1]
class LRegressor(BaseEstimator, RegressorMixin):
def __init__(self, formula_str):
self.formula_str = formula_str
def fit(self, X, y=None):
self.lr = stats.lm(Formula(self.formula_str), data=X)
return self
def predict(self, X):
assert (self.lr is not None), "LR must be set"
p_val = clip_and_round(stats.predict(self.lr, newdata=X))
return p_val
def score(self, X):
p_val = self.predict(X)
y_val = X.NbBikes
rmse = mean_squared_error(y_val, p_val)**0.5
return rmse * (-1)
In [8]:
from sklearn.linear_model import LinearRegression
def fit_and_predict_lm(training, validation, test, formula):
lm = LRegressor(formula_str=formula)
lm.fit(training)
return lm, clip_and_round(lm.predict(training)), clip_and_round(lm.predict(validation)), clip_and_round(lm.predict(test))
In [9]:
def fit_and_predict_gam(training, validation, test, formula):
gam = GAMRegressor(formula_str=formula)
gam.fit(training)
return gam, clip_and_round(gam.predict(training)), clip_and_round(gam.predict(validation)), clip_and_round(gam.predict(test))
In [10]:
def fit_and_predict_hist_avg(training, validation, test):
return None, training.HistAvg, validation.HistAvg, test.HistAvg
In [11]:
def model(df, station_ids, gam_formula, lr_formula, pred_col):
results = []
for station_id in station_ids:
print 'Fitting %s' % station_id
training, validation, test = split_datasets(df, station_id)
t_train = training[pred_col]
t_validation = validation[pred_col]
t_test = test[pred_col]
try:
# Linear Model
lm_fit = fit_and_predict_lm(training, validation, test, lr_formula)
lm_rmse_train = mean_squared_error(t_train, lm_fit[1])**0.5
lm_rmse_val = mean_squared_error(t_validation, lm_fit[2])**0.5
lm_rmse_test = mean_squared_error(t_test, lm_fit[3])**0.5
# GAM Model
gam_fit = fit_and_predict_gam(training, validation, test, gam_formula)
gam_rmse_train = mean_squared_error(t_train, gam_fit[1])**0.5
gam_rmse_val = mean_squared_error(t_validation, gam_fit[2])**0.5
gam_rmse_test = mean_squared_error(t_test, gam_fit[3])**0.5
# Hist Avg
havg_fit = fit_and_predict_hist_avg(training, validation, test)
havg_rmse_train = mean_squared_error(t_train, havg_fit[1])**0.5
havg_rmse_val = mean_squared_error(t_validation, havg_fit[2])**0.5
havh_rmse_test = mean_squared_error(t_test, havg_fit[3])**0.5
except Exception as e:
logging.error(traceback.format_exc())
results.append({'Id': station_id,
'LR-TRAIN-ERR': lm_rmse_train, 'LR-VAL-ERR': lm_rmse_val, 'LR-TEST-ERR': lm_rmse_test,
'GAM-TRAIN-ERR': gam_rmse_train, 'GAM-VAL-ERR': gam_rmse_val, 'GAM-TEST-ERR': gam_rmse_test,
'HAVG-TRAIN-ERR': havg_rmse_train, 'HAVG-VAL-ERR': havg_rmse_val, 'HAVG-TEST-ERR': havh_rmse_test,#})
'LR-MOD': lm_fit[0], 'GAM-MOD': gam_fit[0]})
return results
In [12]:
def convert_results_to_df(results, name):
dfs = [pd.DataFrame(result).set_index('Id') for result in results]
for i,df in enumerate(dfs):
df.columns = ['%s-EXP%d-%s' % (name, i,col) for col in df.columns]
return pd.concat(dfs, axis=1)
In [13]:
stations_to_use = readings.index.get_level_values(0).unique().tolist()
In [14]:
gam_formula_short = "NbBikes ~ s(TempTMinus2, HumidityTMinus2, bs='tp', sp=30.0) + s(TimeOfDay, by=Weekday, bs='tp', sp=1.1) "
gam_formula_short += "+ s(TimeOfDay, by=Weekend, bs='tp', sp=50.0) + s(TimeOfDay, by=Holiday, bs='tp', sp=0.2) + s(NbBikesTMinus2, bs='tp', sp=8.0) "
gam_formula_short += "+ s(NbBikesTMinus3, bs='tp', sp=11.0) + RainTMinus2 + FogTMinus2 "
In [15]:
lr_formula_short = "NbBikes ~ TempTMinus2 + HumidityTMinus2 + TimeOfDay + Weekday "
lr_formula_short += "+ NbBikesTMinus2 + NbBikesTMinus3 + RainTMinus2 + FogTMinus2 "
In [16]:
# choose the columns to use in the model
boolean_cols_short = ['Weekday', 'Weekend', 'Holiday', 'RainTMinus2', 'FogTMinus2']
numeric_cols_short = ['HumidityTMinus2', 'TempTMinus2', 'TimeOfDay',
'NbBikesTMinus2', 'NbBikesTMinus3', 'HistAvg']
pred_col_short = 'NbBikes'
feature_cols_short = numeric_cols_short + boolean_cols_short
cols_short = [pred_col_short] + feature_cols_short
# select the columns chosen columns
readings_short = readings.loc[stations_to_use][cols_short]
# remove na
readings_short.dropna(inplace=True)
In [17]:
baseline_short = [model(readings_short, stations_to_use, gam_formula_short, lr_formula_short, 'NbBikes')]
In [18]:
baseline_short_df = convert_results_to_df(baseline_short, 'SHORT')
In [19]:
baseline_short_df
Out[19]:
In [20]:
with open("data/parsed/models_short.p", "wb") as f:
pickle.dump(baseline_short, f)
In [103]:
baseline_short_df.mean()
Out[103]:
In [14]:
lr_formula_mid = "NbBikes ~ TempTMinus12 + HumidityTMinus12 + TimeOfDay + Weekday + DistNbBikes + CollNbBikes"
lr_formula_mid += "+ NbBikesTMinus12 + NbBikesTMinus18 + Near1TMinus12 + Near2TMinus12 + Near1TMinus18 + Near2TMinus18 "
In [15]:
gam_formula_mid = "NbBikes ~ s(TempTMinus12, HumidityTMinus12, bs='tp') + s(TimeOfDay, by=Weekday, bs='tp') "
gam_formula_mid += "+ s(TimeOfDay, by=Weekend, bs='tp') + s(TimeOfDay, by=Holiday, bs='tp') + s(NbBikesTMinus12, bs='tp') "
gam_formula_mid += "+ s(NbBikesTMinus18, bs='tp') "
In [16]:
# choose the columns to use in the model
boolean_cols_mid = ['Weekday', 'Weekend', 'Holiday']
numeric_cols_mid = ['HumidityTMinus12', 'TempTMinus12', 'TimeOfDay', 'NbBikesTMinus12', 'NbBikesTMinus18', 'HistAvg',
'Near1TMinus12', 'Near2TMinus12', 'Near1TMinus18', 'Near2TMinus18', 'DistNbBikes', 'CollNbBikes']
pred_col_mid = 'NbBikes'
feature_cols_mid = numeric_cols_mid + boolean_cols_mid
cols_mid = [pred_col_mid] + feature_cols_mid
# select the columns chosen columns
readings_mid = readings.loc[stations_to_use][cols_mid]
# remove na
readings_mid.dropna(inplace=True)
In [17]:
baseline_mid = [model(readings_mid, stations_to_use, gam_formula_mid, lr_formula_mid, 'NbBikes')]
In [21]:
baseline_mid_df = convert_results_to_df(baseline_mid, 'MID')
In [ ]:
with open("data/parsed/models_mid.p", "wb") as f:
pickle.dump(baseline_mid, f)
In [157]:
# choose the columns to use in the model
redistribution_cols = ['CollNbBikesCum6', 'DistNbBikesCum6']
boolean_cols_long = ['Weekday', 'Weekend', 'Holiday']
numeric_cols_long = ['TimeOfDay', 'HistAvg']
pred_col_long = 'NbBikes'
feature_cols_long = numeric_cols_long + boolean_cols_long + redistribution_cols
cols_long = [pred_col_long] + feature_cols_long
# select the columns chosen columns
readings_long = readings.loc[stations_to_use][cols_long]
# remove na
readings_long.dropna(inplace=True)
In [158]:
gam_formula_long = "NbBikes ~ s(TimeOfDay, by=Weekday, bs='tp', sp=35.0) + s(TimeOfDay, by=Weekend, bs='tp', sp=0.7) "
gam_formula_long += "+ s(TimeOfDay, by=Holiday, bs='tp', sp=3.0) + s(HistAvg, bs='tp', k=5) + CollNbBikesCum6 + DistNbBikesCum6 "
In [159]:
lr_formula_long = "NbBikes ~ TimeOfDay + Weekday + Holiday + Weekend + HistAvg + CollNbBikesCum6 + DistNbBikesCum6"
In [160]:
all_long = [model(readings_long, stations_to_use, gam_formula_long, lr_formula_long, 'NbBikes')]
In [162]:
all_long_df = convert_results_to_df(all_long, 'LONG')
In [86]:
with open("data/parsed/models_long.p", "wb") as f:
pickle.dump(all_long, f)
In [21]:
with open("data/parsed/models_short.p", "rb") as f:
models_short = pickle.load(f)
with open("data/parsed/models_mid.p", "rb") as f:
models_mid = pickle.load(f)
In [163]:
short_df = convert_results_to_df(models_short, 'SHORT')
mid_df = convert_results_to_df(models_mid, 'MID')
long_df = convert_results_to_df(all_long, 'LONG')
In [164]:
long_df.describe()
Out[164]:
In [165]:
short_df['Scenario'] = 'Short-Term'
short_df.set_index('Scenario', append=True, inplace=True)
short_df = short_df.reorder_levels(['Scenario', 'Id'])[['SHORT-EXP0-HAVG-TEST-ERR', 'SHORT-EXP0-LR-TEST-ERR', 'SHORT-EXP0-GAM-TEST-ERR']]
short_df.columns=['HAVG-TEST-ERR', 'LR-TEST-ERR', 'GAM-TEST-ERR']
mid_df['Scenario'] = 'Mid-Term'
mid_df.set_index('Scenario', append=True, inplace=True)
mid_df = mid_df.reorder_levels(['Scenario', 'Id'])[['MID-EXP0-HAVG-TEST-ERR', 'MID-EXP0-LR-TEST-ERR', 'MID-EXP0-GAM-TEST-ERR']]
mid_df.columns=['HAVG-TEST-ERR', 'LR-TEST-ERR', 'GAM-TEST-ERR']
long_df['Scenario'] = 'Long-Term'
long_df.set_index('Scenario', append=True, inplace=True)
long_df = long_df.reorder_levels(['Scenario', 'Id'])[['LONG-EXP0-HAVG-TEST-ERR', 'LONG-EXP0-LR-TEST-ERR', 'LONG-EXP0-GAM-TEST-ERR']]
long_df.columns=['HAVG-TEST-ERR', 'LR-TEST-ERR', 'GAM-TEST-ERR']
In [166]:
aggregations = {
'HAVG-TEST-ERR': {
'mean': 'mean',
'std': 'std'
},
'LR-TEST-ERR': {
'mean': 'mean',
'std': 'std'
},
'GAM-TEST-ERR': {
'mean': 'mean',
'std': 'std'
}
}
In [167]:
rmse_results = pd.concat([short_df, mid_df, long_df])
rmse_results['Metric'] = 'RMSE'
In [168]:
wrmse_results = pd.concat([short_df, mid_df, long_df]).merge(readings.groupby(level='Id')['NbDocks'].mean().to_frame(), how='inner', right_index=True, left_index=True)
wrmse_results['HAVG-TEST-ERR'] = wrmse_results['HAVG-TEST-ERR'] / wrmse_results.NbDocks
wrmse_results['LR-TEST-ERR'] = wrmse_results['LR-TEST-ERR'] / wrmse_results.NbDocks
wrmse_results['GAM-TEST-ERR'] = wrmse_results['GAM-TEST-ERR'] / wrmse_results.NbDocks
wrmse_results['Metric'] = 'WRMSE'
wrmse_results.drop('NbDocks', axis=1, inplace=True)
In [169]:
results = pd.concat([rmse_results, wrmse_results]).reset_index().groupby(['Metric', 'Scenario']).agg(aggregations)
results['HAVG-TEST-ERR', 'HAVG-TEST-ERR'] = results['HAVG-TEST-ERR', 'mean'].map(str) + " (" + results['HAVG-TEST-ERR', 'std'].map(str) + ")"
results['LR-TEST-ERR', 'LR-TEST-ERR'] = results['LR-TEST-ERR', 'mean'].map(str) + " (" + results['LR-TEST-ERR', 'std'].map(str) + ")"
results['GAM-TEST-ERR', 'GAM-TEST-ERR'] = results['GAM-TEST-ERR', 'mean'].map(str) + " (" + results['GAM-TEST-ERR', 'std'].map(str) + ")"
results.columns = results.columns.droplevel()
results = results[['HAVG-TEST-ERR', 'LR-TEST-ERR', 'GAM-TEST-ERR']]
results.index.name = None
In [170]:
results
Out[170]:
In [121]:
print results.to_latex()
In [ ]:
with open("data/parsed/r_eval_short.p", "wb") as f:
pickle.dump(training, f)
In [81]:
baseline_short_df['SHORT-EXP0-GAM-TEST-ERR'].sort_values()[765/2:765/2 + 1]
Out[81]:
In [82]:
training, validation, test = split_datasets(readings_short, 'BikePoints_148')
with open("data/parsed/r_eval_short.p", "wb") as f:
pickle.dump(training, f)
In [99]:
with open("data/parsed/r_eval_short.p", "rb") as f:
r_eval_short = pickle.load(f)
In [100]:
%load_ext rpy2.ipython
robjects.globalenv['data'] = r_eval_short
In [101]:
lr_formula_short
Out[101]:
In [102]:
%%R
lrModel <- lm(NbBikes ~ TempTMinus2 + HumidityTMinus2 + TimeOfDay + Weekday + NbBikesTMinus2 + NbBikesTMinus3 + RainTMinus2 + FogTMinus2 , data=data)
summary(lrModel)
In [85]:
%%R
library(mgcv)
gamModel <- gam(NbBikes ~ s(TempTMinus2, HumidityTMinus2, bs='tp', sp=30.0) + s(TimeOfDay, by=Weekday, bs='tp', sp=1.1)
+ s(TimeOfDay, by=Weekend, bs='tp', sp=50.0) + s(TimeOfDay, by=Holiday, bs='tp', sp=0.2)
+ s(NbBikesTMinus2, bs='tp', sp=8.0) + s(NbBikesTMinus3, bs='tp', sp=11.0)
+ RainTMinus2 + FogTMinus2, data=data)
summary(gamModel)
In [86]:
%%R
postscript("reports/images/short-check.eps",horizontal=FALSE, paper="special",height=6,width=10, onefile=TRUE)
gam.check(gamModel)
dev.off()
In [87]:
%%R
postscript("reports/images/short-splines.eps",horizontal=FALSE, paper="special",height=6,width=10, onefile=TRUE)
layout(matrix(1:2, ncol = 2))
plot(gamModel, scale = 0)
layout(1)
dev.off()
In [88]:
%%R
postscript("reports/images/short-correlation.eps",horizontal=FALSE, paper="special",height=6,width=10, onefile=TRUE)
layout(matrix(1:2, ncol = 2))
acf(resid(gamModel), lag.max = 36, main = "ACF")
pacf(resid(gamModel), lag.max = 36, main = "pACF")
layout(1)
dev.off()
In [89]:
baseline_mid_df['MID-EXP0-GAM-TEST-ERR'].sort_values()[765/2:765/2 + 1]
Out[89]:
In [90]:
training, validation, test = split_datasets(readings_mid, 'BikePoints_231')
with open("data/parsed/r_eval_mid.p", "wb") as f:
pickle.dump(training, f)
In [91]:
with open("data/parsed/r_eval_mid.p", "rb") as f:
r_eval_mid = pickle.load(f)
In [92]:
%load_ext rpy2.ipython
robjects.globalenv['data'] = r_eval_mid
In [93]:
gam_formula_mid
Out[93]:
In [94]:
%%R
library(mgcv)
gamModel <- gam(NbBikes ~ s(TempTMinus12, HumidityTMinus12, bs='tp') + s(TimeOfDay, by=Weekday, bs='tp')
+ s(TimeOfDay, by=Weekend, bs='tp') + s(TimeOfDay, by=Holiday, bs='tp')
+ s(NbBikesTMinus12, bs='tp') + s(NbBikesTMinus18, bs='tp'), data=data)
summary(gamModel)
In [95]:
%%R
postscript("reports/images/mid-check.eps",horizontal=FALSE, paper="special",height=6,width=10, onefile=TRUE)
gam.check(gamModel)
dev.off()
In [96]:
%%R
postscript("reports/images/mid-splines.eps",horizontal=FALSE, paper="special",height=6,width=10, onefile=TRUE)
layout(matrix(1:2, ncol = 2))
plot(gamModel, scale = 0)
layout(1)
dev.off()
In [97]:
%%R
postscript("reports/images/mid-correlation.eps",horizontal=FALSE, paper="special",height=6,width=10, onefile=TRUE)
layout(matrix(1:2, ncol = 2))
acf(resid(gamModel), lag.max = 36, main = "ACF")
pacf(resid(gamModel), lag.max = 36, main = "pACF")
layout(1)
dev.off()
In [171]:
all_long_df['LONG-EXP0-GAM-TEST-ERR'].sort_values()[765/2:765/2 + 1]
Out[171]:
In [40]:
gam_formula_long
Out[40]:
In [172]:
training, validation, test = split_datasets(readings_long, 'BikePoints_610')
with open("data/parsed/r_eval_long.p", "wb") as f:
pickle.dump(training, f)
In [173]:
with open("data/parsed/r_eval_long.p", "rb") as f:
r_eval_long = pickle.load(f)
In [174]:
%load_ext rpy2.ipython
robjects.globalenv['data'] = r_eval_long
In [175]:
%%R
library(mgcv)
gamModel <- gam(NbBikes ~ s(TimeOfDay, by=Weekday, bs='tp', sp=35.0) + s(TimeOfDay, by=Weekend, bs='tp', sp=0.7)
+ s(TimeOfDay, by=Holiday, bs='tp', sp=3.0) + s(HistAvg, bs='tp', k=5)
+ CollNbBikesCum6 + DistNbBikesCum6 , data=data)
summary(gamModel)
In [177]:
%%R
postscript("reports/images/long-check.eps",horizontal=FALSE, paper="special",height=6,width=10, onefile=TRUE)
gam.check(gamModel)
dev.off()
In [178]:
%%R
postscript("reports/images/long-splines.eps",horizontal=FALSE, paper="special",height=6,width=10, onefile=TRUE)
layout(matrix(1:2, ncol = 2))
plot(gamModel, scale = 0)
layout(1)
dev.off()
In [179]:
%%R
postscript("reports/images/long-correlation.eps",horizontal=FALSE, paper="special",height=6,width=10, onefile=TRUE)
layout(matrix(1:2, ncol = 2))
acf(resid(gamModel), lag.max = 36, main = "ACF")
pacf(resid(gamModel), lag.max = 36, main = "pACF")
layout(1)
dev.off()
In [48]:
%run src/data/visualization.py
In [93]:
now = datetime(year=2016, month=6, day=22, hour=9, minute=0, second=0)
single_reading = readings.xs(now, level='Timestamp')
In [102]:
predictions = []
for idx, row in single_reading.iterrows():
gam = baseline_mid_df.loc[idx]['MID-EXP0-GAM-MOD']
prediction = gam.predict(row.to_frame().transpose())[0]
predictions.append({'Prediction': prediction, 'Id': idx,
'NbDocks': row.NbDocks,
'HistAvg': row.HistAvg,
'Current': row.NbBikesTMinus12})
predictions = pd.DataFrame(predictions).set_index('Id')
In [103]:
predictions_df = add_station_info(predictions, stations.set_index('Id'), use_indexes=True)
predictions_df['NormalizedPrediction'] = predictions_df.Prediction.astype(float) / predictions_df.NbDocks.astype(float)
In [168]:
from palettable.colorbrewer.diverging import RdYlBu_10
def create_prediction_marker(station):
font_color_now = 'red' if station.Current < 5 else 'black'
font_color_mid = 'red' if station.Prediction < 5 else 'black'
font_color_long = 'red' if station.HistAvg < 5 else 'black'
html="""
<h1>%s</h1>
<p><strong>Id</strong>: %s</p>
<p>
<strong>Bicycle Availability:</strong>
<ul>
<font color="%s"><li><strong>%s:</strong> %d bicycles</li></font>
<font color="%s"><li><strong>%s: </strong> %d bicycles</li></font>
<font color="%s"><li><strong>%s: </strong> %d bicycles</li></font>
</ul>
</p>
<p>
<strong>Availability Trend:</strong><br/><br/>
<img src="http://localhost:8888/files/reports/images/trend.png" width=210 height=210 hspace=16/>
</p>
""" % (station.Name, station.name,
font_color_now, now.strftime('%d/%m - %H:%M'), station.Current,
font_color_mid, (now + timedelta(hours=1)).strftime('%d/%m - %H:%M'), station.Prediction,
font_color_long, (now + timedelta(hours=3)).strftime('%d/%m - %H:%M'), station.HistAvg)
iframe = folium.element.IFrame(html=html, width=350, height=530)
popup = folium.Popup(iframe, max_width=2650)
fill_color = cmap_to_hex(RdYlBu_10.mpl_colormap, station.NormalizedPrediction)
label = "%s - %s: %d bicycle(s)"
return folium.CircleMarker(location=[station['Latitude'], station['Longitude']], radius=100,
popup=popup, fill_color=fill_color, color=fill_color, fill_opacity=0.99)
predictions_map = draw_stations_map(predictions_df, create_prediction_marker)
In [169]:
folium.TileLayer('stamentoner').add_to(predictions_map)
folium.Map.save(predictions_map, 'reports/maps/predicted_availability_map.html')
predictions_map
Out[169]:
In [ ]: