notebook.community

Edit and run



In [2]:

    
import pandas as pd
%matplotlib inline
from fbprophet import Prophet
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt



In [ ]:

    
df = pd.read_csv('weather.csv', sep = '\t',parse_dates= True)



In [16]:

    
df['REPORT_DATE'] = pd.to_datetime(df['REPORT_DATE'], format='%d.%m.%Y', dayfirst= True)



In [166]:

    
df.columns = ['ds','y']
df.loc[(df['y'] < 40000) | (df['y'] > 80000), 'y'] = None
m = Prophet(  mcmc_samples = 300       )
m.fit(df)









    



INFO:fbprophet.forecaster:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
C:\Users\Insaf.Ashrapov\AppData\Local\Continuum\miniconda3\lib\site-packages\pystan\misc.py:399: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
  elif np.issubdtype(np.asarray(v).dtype, float):
C:\Users\Insaf.Ashrapov\AppData\Local\Continuum\miniconda3\lib\site-packages\pystan\misc.py:456: DeprecationWarning: inspect.getargspec() is deprecated, use inspect.signature() or inspect.getfullargspec()
  if "chain_id" in inspect.getargspec(init).args:






    Out[166]:





<fbprophet.forecaster.Prophet at 0x1c9add6cb70>



In [ ]:

    
future = m.make_future_dataframe(periods=12, freq = 'M')
forecast = m.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()



In [ ]:

    
fig1 = m.plot(forecast)



In [169]:

    
fig2 = m.plot_components(forecast)



In [ ]:

    
from fbprophet.diagnostics import cross_validation
df_cv = cross_validation(m, initial='360 days', period='25 days', horizon = '61 days')
df_cv.head()



In [159]:

    
def mean_absolute_percentage_error(y_true, y_pred):  
    return np.mean(np.abs((y_true - y_pred) / y_true))



In [160]:

    
df_m = df_cv.copy()
df_m['horizon'] = df_m['ds'] - df_m['cutoff']
df_m.sort_values('horizon', inplace=True)



In [161]:

    
#df_m = df_m[df_m.horizon > '30 days']



In [ ]:

    
total = df_m.groupby('cutoff', as_index=False).sum()
mapes = []
for bgn_date in total.cutoff:
    val = mean_absolute_percentage_error(total[total.cutoff == bgn_date].y, total[total.cutoff == bgn_date].yhat)
    mapes.append(val)



In [163]:

    
print('колво предиктов ', len(mapes))
num_more5 = 0
for val in mapes:
    if val > 0.05:
        num_more5 += 1
print('Кол-во предиктов больше 5%: ', num_more5)









    



колво предиктов  24
Кол-во предиктов больше 5%:  9



In [164]:

    
np.mean(mapes)









    Out[164]:





0.0526174800018338



In [65]:

    
from fbprophet.diagnostics import performance_metrics
df_p = performance_metrics(df_cv)
df_p.head()









    Out[65]:







  
    
      
      horizon
      mse
      rmse
      mae
      mape
      coverage
    
  
  
    
      254
      7 days
      1.471472e+07
      3835.976497
      3162.424837
      0.053222
      0.813953
    
    
      68
      7 days
      1.420103e+07
      3768.425406
      3080.341268
      0.051694
      0.837209
    
    
      192
      7 days
      1.399253e+07
      3740.659582
      3021.770266
      0.050748
      0.837209
    
    
      130
      7 days
      1.378017e+07
      3712.164720
      2981.706536
      0.049877
      0.837209
    
    
      316
      7 days
      1.302994e+07
      3609.701498
      2867.694888
      0.048264
      0.860465



In [60]:

    
import seaborn as sn
def plot_cross_validation_metricv2(df_cv, metric, rolling_window=0.1, ax=None):
    if ax is None:
        fig = plt.figure(facecolor='w', figsize=(10, 6))
        ax = fig.add_subplot(111)
    else:
        fig = ax.get_figure()
    # Get the metric at the level of individual predictions, and with the rolling window.
    df_none = performance_metrics(df_cv, metrics=[metric], rolling_window=0)
    df_h = performance_metrics(df_cv, metrics=[metric], rolling_window=rolling_window)

    # Some work because matplotlib does not handle timedelta
    # Target ~10 ticks.
    tick_w = max(df_none['horizon'].astype('timedelta64[ns]')) / 10.
    # Find the largest time resolution that has <1 unit per bin.
    dts = ['D', 'h', 'm', 's', 'ms', 'us', 'ns']
    dt_names = [
        'days', 'hours', 'minutes', 'seconds', 'milliseconds', 'microseconds',
        'nanoseconds'
    ]
    dt_conversions = [
        24 * 60 * 60 * 10 ** 9,
        60 * 60 * 10 ** 9,
        60 * 10 ** 9,
        10 ** 9,
        10 ** 6,
        10 ** 3,
        1.,
    ]
    for i, dt in enumerate(dts):
        if np.timedelta64(1, dt) < np.timedelta64(tick_w, 'ns'):
            break

    x_plt = df_none['horizon'].astype('timedelta64[ns]').astype(np.int64) / float(dt_conversions[i])
    x_plt_h = df_h['horizon'].astype('timedelta64[ns]').astype(np.int64) / float(dt_conversions[i])

    ax.plot(x_plt, df_none[metric], '.', alpha=0.5, c='gray')
    ax.plot(x_plt_h, df_h[metric], '-', c='b')
    ax.grid(True)

    ax.set_xlabel('Horizon ({})'.format(dt_names[i]))
    ax.set_ylabel(metric)
    return fig



In [61]:

    
from fbprophet.plot import plot_cross_validation_metric
fig = plot_cross_validation_metricv2(df_cv, metric='mape')



In [ ]:

	horizon	mse	rmse	mae	mape	coverage
254	7 days	1.471472e+07	3835.976497	3162.424837	0.053222	0.813953
68	7 days	1.420103e+07	3768.425406	3080.341268	0.051694	0.837209
192	7 days	1.399253e+07	3740.659582	3021.770266	0.050748	0.837209
130	7 days	1.378017e+07	3712.164720	2981.706536	0.049877	0.837209
316	7 days	1.302994e+07	3609.701498	2867.694888	0.048264	0.860465