In [1]:
import numpy as np
import pandas as pd

In [8]:
df = pd.read_csv('issue_19_data_1.csv')

In [9]:
df


Out[9]:
date number info
0 1890 328.0 prince de Galles futur Edouard VII
1 1900 6043.0 grand-duc Vladimir de Russie
2 NaN 33642.0 Theodore Roosevelt
3 1914 40362.0 Alphonse XII
4 1921 51327.0 Thomas Rockefeller
5 1921 53211.0 l'empereur Hirohito.
6 1929 112151.0 Franklin D. Roosevelt
7 1938 147844.0 le duc de Windsor
8 1948 185397.0 Princesse Elisabeth d'Angleterre
9 NaN 203728.0 Marlene Dietrich
10 NaN 248903.0 Jean Cocteau
11 NaN 253652.0 Charlie Chaplin
12 1950 285793.0
13 NaN 448212.0 Charlie Chaplin
14 1955 NaN Charlie Chaplin
15 1973 NaN Charlie Chaplin
16 1976-03-17 500000.0
17 1976 536814.0 Gainsbourg
18 1978 531147.0 Mick Jagger
19 1979 554711.0 Roman Polanski
20 NaN 662614.0 Coluche
21 NaN 821208.0 Catherine Deneuve
22 2000-02-xx 908683.0 Ronaldo
23 NaN 921785.0 Vladimir Poutine
24 NaN 954466.0 Woody Allen
25 2003-04-29 1000000.0
26 NaN 1042624.0 Barbara Carlotti.
27 NaN 1079006.0 Bill Gates
28 2010 1089170.0 Buzz
29 2012 1113000.0
30 NaN NaN Lionel Jospin
31 NaN NaN Lionel Jospin
32 NaN NaN Jacques Chirac

In [11]:
%matplotlib inline

In [13]:
df.plot('date' , 'number')


Out[13]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f485fd73a90>

In [25]:
import datetime

def convert_date(x):
    y = np.nan
    try:
        y = datetime.datetime.strptime(str(x), "%Y")
    except:
        pass
    return y


df['date'] = df['date'].apply(convert_date)

df_train = df.dropna()

In [26]:
df_train


Out[26]:
date number info
0 1890-01-01 328.0 prince de Galles futur Edouard VII
1 1900-01-01 6043.0 grand-duc Vladimir de Russie
3 1914-01-01 40362.0 Alphonse XII
4 1921-01-01 51327.0 Thomas Rockefeller
5 1921-01-01 53211.0 l'empereur Hirohito.
6 1929-01-01 112151.0 Franklin D. Roosevelt
7 1938-01-01 147844.0 le duc de Windsor
8 1948-01-01 185397.0 Princesse Elisabeth d'Angleterre
12 1950-01-01 285793.0
17 1976-01-01 536814.0 Gainsbourg
18 1978-01-01 531147.0 Mick Jagger
19 1979-01-01 554711.0 Roman Polanski
28 2010-01-01 1089170.0 Buzz
29 2012-01-01 1113000.0

In [27]:
import pyaf.ForecastEngine as autof
lEngine = autof.cForecastEngine()

lEngine.train(iInputDS = df_train, iTime = 'date', iSignal = 'number', iHorizon = 7);
lEngine.getModelInfo()


INFO:pyaf.std:START_TRAINING 'number'
INFO:pyaf.std:END_TRAINING_TIME_IN_SECONDS 'number' 2.8104207515716553
INFO:pyaf.std:TIME_DETAIL TimeVariable='date' TimeMin=1890-01-01T00:00:00.000000 TimeMax=2012-01-01T00:00:00.000000 TimeDelta=3427 days 14:46:09.230769 Estimation = (0 , 14) Validation = (0 , 14) Test = (0 , 14) Horizon=7
INFO:pyaf.std:SIGNAL_DETAIL SignalVariable='CumSum_number' Min=328.0 Max=4707298.0  Mean=1188070.07143 StdDev=1434737.29059
INFO:pyaf.std:BEST_TRANSOFORMATION_TYPE 'CumSum_'
INFO:pyaf.std:BEST_DECOMPOSITION  'CumSum_number_Lag1Trend_residue_zeroCycle_residue_AR(3)' [Lag1Trend + NoCycle + AR(3)]
INFO:pyaf.std:TREND_DETAIL 'CumSum_number_Lag1Trend' [Lag1Trend]
INFO:pyaf.std:CYCLE_DETAIL 'CumSum_number_Lag1Trend_residue_zeroCycle' [NoCycle]
INFO:pyaf.std:AUTOREG_DETAIL 'CumSum_number_Lag1Trend_residue_zeroCycle_residue_AR(3)' [AR(3)]
INFO:pyaf.std:MODEL_MAPE MAPE_Fit=0.2965 MAPE_Forecast=0.2965 MAPE_Test=0.2965
INFO:pyaf.std:MODEL_SMAPE SMAPE_Fit=0.4822 SMAPE_Forecast=0.4822 SMAPE_Test=0.4822
INFO:pyaf.std:MODEL_MASE MASE_Fit=0.5357 MASE_Forecast=0.5357 MASE_Test=0.5357
INFO:pyaf.std:MODEL_L1 L1_Fit=46318.1679123 L1_Forecast=46318.1679123 L1_Test=46318.1679123
INFO:pyaf.std:MODEL_L2 L2_Fit=70082.7714655 L2_Forecast=70082.7714655 L2_Test=70082.7714655
INFO:pyaf.std:MODEL_COMPLEXITY 67
INFO:pyaf.std:AR_MODEL_DETAIL_START
INFO:pyaf.std:AR_MODEL_COEFF 1 CumSum_number_Lag1Trend_residue_zeroCycle_residue_Lag3 1.83751069759
INFO:pyaf.std:AR_MODEL_COEFF 2 CumSum_number_Lag1Trend_residue_zeroCycle_residue_Lag1 0.225997870452
INFO:pyaf.std:AR_MODEL_COEFF 3 CumSum_number_Lag1Trend_residue_zeroCycle_residue_Lag2 -0.17684817679
INFO:pyaf.std:AR_MODEL_DETAIL_END

In [28]:
lEngine.standardPlots()


INFO:pyaf.std:START_PLOTTING
/home/antoine/dev/python/packages/pyaf/TS/Plots.py:31: UserWarning: 
This call to matplotlib.use() has no effect because the backend has already
been chosen; matplotlib.use() must be called *before* pylab, matplotlib.pyplot,
or matplotlib.backends is imported for the first time.

The backend was *originally* set to 'module://ipykernel.pylab.backend_inline' by the following code:
  File "/usr/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/usr/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/antoine/.local/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/usr/local/lib/python3.6/dist-packages/ipykernel/kernelapp.py", line 477, in start
    ioloop.IOLoop.instance().start()
  File "/home/antoine/.local/lib/python3.6/site-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/home/antoine/.local/lib/python3.6/site-packages/tornado/ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "/home/antoine/.local/lib/python3.6/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/antoine/.local/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/home/antoine/.local/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/home/antoine/.local/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/home/antoine/.local/lib/python3.6/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/usr/local/lib/python3.6/dist-packages/ipykernel/kernelbase.py", line 235, in dispatch_shell
    handler(stream, idents, msg)
  File "/usr/local/lib/python3.6/dist-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/usr/local/lib/python3.6/dist-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/usr/local/lib/python3.6/dist-packages/ipykernel/zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/usr/lib/python3/dist-packages/IPython/core/interactiveshell.py", line 2718, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/usr/lib/python3/dist-packages/IPython/core/interactiveshell.py", line 2828, in run_ast_nodes
    if self.run_code(code, result):
  File "/usr/lib/python3/dist-packages/IPython/core/interactiveshell.py", line 2882, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-11-e63dad7f4786>", line 1, in <module>
    get_ipython().magic('matplotlib inline')
  File "/usr/lib/python3/dist-packages/IPython/core/interactiveshell.py", line 2160, in magic
    return self.run_line_magic(magic_name, magic_arg_s)
  File "/usr/lib/python3/dist-packages/IPython/core/interactiveshell.py", line 2081, in run_line_magic
    result = fn(*args,**kwargs)
  File "<decorator-gen-105>", line 2, in matplotlib
  File "/usr/lib/python3/dist-packages/IPython/core/magic.py", line 188, in <lambda>
    call = lambda f, *a, **k: f(*a, **k)
  File "/usr/lib/python3/dist-packages/IPython/core/magics/pylab.py", line 100, in matplotlib
    gui, backend = self.shell.enable_matplotlib(args.gui)
  File "/usr/lib/python3/dist-packages/IPython/core/interactiveshell.py", line 2950, in enable_matplotlib
    pt.activate_matplotlib(backend)
  File "/usr/lib/python3/dist-packages/IPython/core/pylabtools.py", line 309, in activate_matplotlib
    matplotlib.pyplot.switch_backend(backend)
  File "/home/antoine/.local/lib/python3.6/site-packages/matplotlib/pyplot.py", line 229, in switch_backend
    matplotlib.use(newbackend, warn=False, force=True)
  File "/home/antoine/.local/lib/python3.6/site-packages/matplotlib/__init__.py", line 1305, in use
    reload(sys.modules['matplotlib.backends'])
  File "/usr/lib/python3.6/importlib/__init__.py", line 166, in reload
    _bootstrap._exec(spec, module)
  File "/home/antoine/.local/lib/python3.6/site-packages/matplotlib/backends/__init__.py", line 14, in <module>
    line for line in traceback.format_stack()


  matplotlib.use('Agg')
/home/antoine/.local/lib/python3.6/site-packages/pandas/plotting/_core.py:1714: UserWarning: Pandas doesn't allow columns to be created via a new attribute name - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access
  series.name = label
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-28-f0a4d8ebc9c6> in <module>()
----> 1 lEngine.standardPlots()

/home/antoine/dev/python/packages/pyaf/ForecastEngine.py in standardPlots(self, name, format)
     46 
     47     def standardPlots(self , name = None, format = 'png'):
---> 48         self.mSignalDecomposition.standardPlots(name, format);
     49 
     50     def getPlotsAsDict(self):

/home/antoine/dev/python/packages/pyaf/TS/SignalDecomposition.py in standardPlots(self, name, format)
    623         logger.info("START_PLOTTING")
    624         start_time = time.time()
--> 625         self.mBestModel.standardPlots(name, format);
    626         lPlotTime = time.time() - start_time;
    627         logger.info("END_PLOTTING_TIME_IN_SECONDS " + str(lPlotTime))

/home/antoine/dev/python/packages/pyaf/TS/TimeSeriesModel.py in standardPlots(self, name, format)
    309                                         lForecastColumn + '_Upper_Bound',
    310                                         name = name,
--> 311                                         format= format);
    312         #lOutput.plot()
    313 

/home/antoine/dev/python/packages/pyaf/TS/Plots.py in prediction_interval_plot(df, time, signal, estimator, lower, upper, name, format, max_length)
    100     # last value of the signal
    101     lLastSignalPos = df1[signal].dropna().tail(1).index;
--> 102     lEstimtorValue = df1[estimator][lLastSignalPos];
    103     df1.loc[lLastSignalPos , lower] = lEstimtorValue;
    104     df1.loc[lLastSignalPos , upper] = lEstimtorValue;

/home/antoine/.local/lib/python3.6/site-packages/pandas/core/series.py in __getitem__(self, key)
    662             key = check_bool_indexer(self.index, key)
    663 
--> 664         return self._get_with(key)
    665 
    666     def _get_with(self, key):

/home/antoine/.local/lib/python3.6/site-packages/pandas/core/series.py in _get_with(self, key)
    705                         return self.loc[key]
    706 
--> 707                     return self.reindex(key)
    708                 except Exception:
    709                     # [slice(0, 5, None)] will break if you convert to ndarray,

/home/antoine/.local/lib/python3.6/site-packages/pandas/core/series.py in reindex(self, index, **kwargs)
   2638     @Appender(generic._shared_docs['reindex'] % _shared_doc_kwargs)
   2639     def reindex(self, index=None, **kwargs):
-> 2640         return super(Series, self).reindex(index=index, **kwargs)
   2641 
   2642     @Appender(generic._shared_docs['fillna'] % _shared_doc_kwargs)

/home/antoine/.local/lib/python3.6/site-packages/pandas/core/generic.py in reindex(self, *args, **kwargs)
   3021         # perform the reindex on the axes
   3022         return self._reindex_axes(axes, level, limit, tolerance, method,
-> 3023                                   fill_value, copy).__finalize__(self)
   3024 
   3025     def _reindex_axes(self, axes, level, limit, tolerance, method, fill_value,

/home/antoine/.local/lib/python3.6/site-packages/pandas/core/generic.py in _reindex_axes(self, axes, level, limit, tolerance, method, fill_value, copy)
   3039             obj = obj._reindex_with_indexers({axis: [new_index, indexer]},
   3040                                              fill_value=fill_value,
-> 3041                                              copy=copy, allow_dups=False)
   3042 
   3043         return obj

/home/antoine/.local/lib/python3.6/site-packages/pandas/core/generic.py in _reindex_with_indexers(self, reindexers, fill_value, copy, allow_dups)
   3143                                                 fill_value=fill_value,
   3144                                                 allow_dups=allow_dups,
-> 3145                                                 copy=copy)
   3146 
   3147         if copy and new_data is self._data:

/home/antoine/.local/lib/python3.6/site-packages/pandas/core/internals.py in reindex_indexer(self, new_axis, indexer, axis, fill_value, allow_dups, copy)
   4132         # some axes don't allow reindexing with dups
   4133         if not allow_dups:
-> 4134             self.axes[axis]._can_reindex(indexer)
   4135 
   4136         if axis >= self.ndim:

/home/antoine/.local/lib/python3.6/site-packages/pandas/core/indexes/base.py in _can_reindex(self, indexer)
   2939         # trying to reindex on an axis with duplicates
   2940         if not self.is_unique and len(indexer):
-> 2941             raise ValueError("cannot reindex from a duplicate axis")
   2942 
   2943     def reindex(self, target, method=None, level=None, limit=None,

ValueError: cannot reindex from a duplicate axis

In [32]:
df_forecast = lEngine.forecast(iInputDS = df_train, iHorizon = 7)
print(df_forecast.columns)
df_forecast[['date', 'number_Forecast', 'number_Forecast_Lower_Bound', 'number_Forecast_Upper_Bound']].tail(7)


INFO:pyaf.std:START_FORECASTING
INFO:pyaf.std:END_FORECAST_TIME_IN_SECONDS 0.20051145553588867
Index(['CumSum_number', 'date', 'number', 'row_number', 'date_Normalized',
       'CumSum_number_Lag1Trend', 'CumSum_number_Lag1Trend_residue',
       'CumSum_number_Lag1Trend_residue_zeroCycle',
       'CumSum_number_Lag1Trend_residue_zeroCycle_residue',
       'CumSum_number_Lag1Trend_residue_zeroCycle_residue_AR(3)',
       'CumSum_number_Lag1Trend_residue_zeroCycle_residue_AR(3)_residue',
       'CumSum_number_Trend', 'CumSum_number_Trend_residue',
       'CumSum_number_Cycle', 'CumSum_number_Cycle_residue',
       'CumSum_number_AR', 'CumSum_number_AR_residue',
       'CumSum_number_TransformedForecast', 'number_Forecast',
       'CumSum_number_TransformedResidue', 'number_Residue',
       'number_Forecast_Lower_Bound', 'number_Forecast_Upper_Bound'],
      dtype='object')
Out[32]:
date number_Forecast number_Forecast_Lower_Bound number_Forecast_Upper_Bound
14 2021-05-20 14:46:09.230769216 1.067167e+06 9.298043e+05 1.204529e+06
15 2030-10-08 05:32:18.461538432 2.034668e+06 1.634727e+06 2.434609e+06
16 2040-02-25 20:18:27.692307648 2.305215e+06 1.951239e+06 2.659191e+06
17 2049-07-15 11:04:36.923076864 2.111037e+06 1.613392e+06 2.608682e+06
18 2058-12-03 01:50:46.153846080 3.797103e+06 1.929540e+06 5.664665e+06
19 2068-04-21 16:36:55.384615296 4.709622e+06 1.929136e+06 7.490109e+06
20 2077-09-09 07:23:04.615384512 4.260869e+06 1.424380e+06 7.097359e+06

In [ ]: