In [1]:
import numpy as np
import pandas as pd
In [8]:
df = pd.read_csv('issue_19_data_1.csv')
In [9]:
df
Out[9]:
date
number
info
0
1890
328.0
prince de Galles futur Edouard VII
1
1900
6043.0
grand-duc Vladimir de Russie
2
NaN
33642.0
Theodore Roosevelt
3
1914
40362.0
Alphonse XII
4
1921
51327.0
Thomas Rockefeller
5
1921
53211.0
l'empereur Hirohito.
6
1929
112151.0
Franklin D. Roosevelt
7
1938
147844.0
le duc de Windsor
8
1948
185397.0
Princesse Elisabeth d'Angleterre
9
NaN
203728.0
Marlene Dietrich
10
NaN
248903.0
Jean Cocteau
11
NaN
253652.0
Charlie Chaplin
12
1950
285793.0
13
NaN
448212.0
Charlie Chaplin
14
1955
NaN
Charlie Chaplin
15
1973
NaN
Charlie Chaplin
16
1976-03-17
500000.0
17
1976
536814.0
Gainsbourg
18
1978
531147.0
Mick Jagger
19
1979
554711.0
Roman Polanski
20
NaN
662614.0
Coluche
21
NaN
821208.0
Catherine Deneuve
22
2000-02-xx
908683.0
Ronaldo
23
NaN
921785.0
Vladimir Poutine
24
NaN
954466.0
Woody Allen
25
2003-04-29
1000000.0
26
NaN
1042624.0
Barbara Carlotti.
27
NaN
1079006.0
Bill Gates
28
2010
1089170.0
Buzz
29
2012
1113000.0
30
NaN
NaN
Lionel Jospin
31
NaN
NaN
Lionel Jospin
32
NaN
NaN
Jacques Chirac
In [11]:
%matplotlib inline
In [13]:
df.plot('date' , 'number')
Out[13]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f485fd73a90>
In [25]:
import datetime
def convert_date(x):
y = np.nan
try:
y = datetime.datetime.strptime(str(x), "%Y")
except:
pass
return y
df['date'] = df['date'].apply(convert_date)
df_train = df.dropna()
In [26]:
df_train
Out[26]:
date
number
info
0
1890-01-01
328.0
prince de Galles futur Edouard VII
1
1900-01-01
6043.0
grand-duc Vladimir de Russie
3
1914-01-01
40362.0
Alphonse XII
4
1921-01-01
51327.0
Thomas Rockefeller
5
1921-01-01
53211.0
l'empereur Hirohito.
6
1929-01-01
112151.0
Franklin D. Roosevelt
7
1938-01-01
147844.0
le duc de Windsor
8
1948-01-01
185397.0
Princesse Elisabeth d'Angleterre
12
1950-01-01
285793.0
17
1976-01-01
536814.0
Gainsbourg
18
1978-01-01
531147.0
Mick Jagger
19
1979-01-01
554711.0
Roman Polanski
28
2010-01-01
1089170.0
Buzz
29
2012-01-01
1113000.0
In [27]:
import pyaf.ForecastEngine as autof
lEngine = autof.cForecastEngine()
lEngine.train(iInputDS = df_train, iTime = 'date', iSignal = 'number', iHorizon = 7);
lEngine.getModelInfo()
INFO:pyaf.std:START_TRAINING 'number'
INFO:pyaf.std:END_TRAINING_TIME_IN_SECONDS 'number' 2.8104207515716553
INFO:pyaf.std:TIME_DETAIL TimeVariable='date' TimeMin=1890-01-01T00:00:00.000000 TimeMax=2012-01-01T00:00:00.000000 TimeDelta=3427 days 14:46:09.230769 Estimation = (0 , 14) Validation = (0 , 14) Test = (0 , 14) Horizon=7
INFO:pyaf.std:SIGNAL_DETAIL SignalVariable='CumSum_number' Min=328.0 Max=4707298.0 Mean=1188070.07143 StdDev=1434737.29059
INFO:pyaf.std:BEST_TRANSOFORMATION_TYPE 'CumSum_'
INFO:pyaf.std:BEST_DECOMPOSITION 'CumSum_number_Lag1Trend_residue_zeroCycle_residue_AR(3)' [Lag1Trend + NoCycle + AR(3)]
INFO:pyaf.std:TREND_DETAIL 'CumSum_number_Lag1Trend' [Lag1Trend]
INFO:pyaf.std:CYCLE_DETAIL 'CumSum_number_Lag1Trend_residue_zeroCycle' [NoCycle]
INFO:pyaf.std:AUTOREG_DETAIL 'CumSum_number_Lag1Trend_residue_zeroCycle_residue_AR(3)' [AR(3)]
INFO:pyaf.std:MODEL_MAPE MAPE_Fit=0.2965 MAPE_Forecast=0.2965 MAPE_Test=0.2965
INFO:pyaf.std:MODEL_SMAPE SMAPE_Fit=0.4822 SMAPE_Forecast=0.4822 SMAPE_Test=0.4822
INFO:pyaf.std:MODEL_MASE MASE_Fit=0.5357 MASE_Forecast=0.5357 MASE_Test=0.5357
INFO:pyaf.std:MODEL_L1 L1_Fit=46318.1679123 L1_Forecast=46318.1679123 L1_Test=46318.1679123
INFO:pyaf.std:MODEL_L2 L2_Fit=70082.7714655 L2_Forecast=70082.7714655 L2_Test=70082.7714655
INFO:pyaf.std:MODEL_COMPLEXITY 67
INFO:pyaf.std:AR_MODEL_DETAIL_START
INFO:pyaf.std:AR_MODEL_COEFF 1 CumSum_number_Lag1Trend_residue_zeroCycle_residue_Lag3 1.83751069759
INFO:pyaf.std:AR_MODEL_COEFF 2 CumSum_number_Lag1Trend_residue_zeroCycle_residue_Lag1 0.225997870452
INFO:pyaf.std:AR_MODEL_COEFF 3 CumSum_number_Lag1Trend_residue_zeroCycle_residue_Lag2 -0.17684817679
INFO:pyaf.std:AR_MODEL_DETAIL_END
In [28]:
lEngine.standardPlots()
INFO:pyaf.std:START_PLOTTING
/home/antoine/dev/python/packages/pyaf/TS/Plots.py:31: UserWarning:
This call to matplotlib.use() has no effect because the backend has already
been chosen; matplotlib.use() must be called *before* pylab, matplotlib.pyplot,
or matplotlib.backends is imported for the first time.
The backend was *originally* set to 'module://ipykernel.pylab.backend_inline' by the following code:
File "/usr/lib/python3.6/runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "/usr/lib/python3.6/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py", line 16, in <module>
app.launch_new_instance()
File "/home/antoine/.local/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
app.start()
File "/usr/local/lib/python3.6/dist-packages/ipykernel/kernelapp.py", line 477, in start
ioloop.IOLoop.instance().start()
File "/home/antoine/.local/lib/python3.6/site-packages/zmq/eventloop/ioloop.py", line 177, in start
super(ZMQIOLoop, self).start()
File "/home/antoine/.local/lib/python3.6/site-packages/tornado/ioloop.py", line 888, in start
handler_func(fd_obj, events)
File "/home/antoine/.local/lib/python3.6/site-packages/tornado/stack_context.py", line 277, in null_wrapper
return fn(*args, **kwargs)
File "/home/antoine/.local/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
self._handle_recv()
File "/home/antoine/.local/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
self._run_callback(callback, msg)
File "/home/antoine/.local/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
callback(*args, **kwargs)
File "/home/antoine/.local/lib/python3.6/site-packages/tornado/stack_context.py", line 277, in null_wrapper
return fn(*args, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/ipykernel/kernelbase.py", line 283, in dispatcher
return self.dispatch_shell(stream, msg)
File "/usr/local/lib/python3.6/dist-packages/ipykernel/kernelbase.py", line 235, in dispatch_shell
handler(stream, idents, msg)
File "/usr/local/lib/python3.6/dist-packages/ipykernel/kernelbase.py", line 399, in execute_request
user_expressions, allow_stdin)
File "/usr/local/lib/python3.6/dist-packages/ipykernel/ipkernel.py", line 196, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File "/usr/local/lib/python3.6/dist-packages/ipykernel/zmqshell.py", line 533, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
File "/usr/lib/python3/dist-packages/IPython/core/interactiveshell.py", line 2718, in run_cell
interactivity=interactivity, compiler=compiler, result=result)
File "/usr/lib/python3/dist-packages/IPython/core/interactiveshell.py", line 2828, in run_ast_nodes
if self.run_code(code, result):
File "/usr/lib/python3/dist-packages/IPython/core/interactiveshell.py", line 2882, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-11-e63dad7f4786>", line 1, in <module>
get_ipython().magic('matplotlib inline')
File "/usr/lib/python3/dist-packages/IPython/core/interactiveshell.py", line 2160, in magic
return self.run_line_magic(magic_name, magic_arg_s)
File "/usr/lib/python3/dist-packages/IPython/core/interactiveshell.py", line 2081, in run_line_magic
result = fn(*args,**kwargs)
File "<decorator-gen-105>", line 2, in matplotlib
File "/usr/lib/python3/dist-packages/IPython/core/magic.py", line 188, in <lambda>
call = lambda f, *a, **k: f(*a, **k)
File "/usr/lib/python3/dist-packages/IPython/core/magics/pylab.py", line 100, in matplotlib
gui, backend = self.shell.enable_matplotlib(args.gui)
File "/usr/lib/python3/dist-packages/IPython/core/interactiveshell.py", line 2950, in enable_matplotlib
pt.activate_matplotlib(backend)
File "/usr/lib/python3/dist-packages/IPython/core/pylabtools.py", line 309, in activate_matplotlib
matplotlib.pyplot.switch_backend(backend)
File "/home/antoine/.local/lib/python3.6/site-packages/matplotlib/pyplot.py", line 229, in switch_backend
matplotlib.use(newbackend, warn=False, force=True)
File "/home/antoine/.local/lib/python3.6/site-packages/matplotlib/__init__.py", line 1305, in use
reload(sys.modules['matplotlib.backends'])
File "/usr/lib/python3.6/importlib/__init__.py", line 166, in reload
_bootstrap._exec(spec, module)
File "/home/antoine/.local/lib/python3.6/site-packages/matplotlib/backends/__init__.py", line 14, in <module>
line for line in traceback.format_stack()
matplotlib.use('Agg')
/home/antoine/.local/lib/python3.6/site-packages/pandas/plotting/_core.py:1714: UserWarning: Pandas doesn't allow columns to be created via a new attribute name - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access
series.name = label
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-28-f0a4d8ebc9c6> in <module>()
----> 1 lEngine.standardPlots()
/home/antoine/dev/python/packages/pyaf/ForecastEngine.py in standardPlots(self, name, format)
46
47 def standardPlots(self , name = None, format = 'png'):
---> 48 self.mSignalDecomposition.standardPlots(name, format);
49
50 def getPlotsAsDict(self):
/home/antoine/dev/python/packages/pyaf/TS/SignalDecomposition.py in standardPlots(self, name, format)
623 logger.info("START_PLOTTING")
624 start_time = time.time()
--> 625 self.mBestModel.standardPlots(name, format);
626 lPlotTime = time.time() - start_time;
627 logger.info("END_PLOTTING_TIME_IN_SECONDS " + str(lPlotTime))
/home/antoine/dev/python/packages/pyaf/TS/TimeSeriesModel.py in standardPlots(self, name, format)
309 lForecastColumn + '_Upper_Bound',
310 name = name,
--> 311 format= format);
312 #lOutput.plot()
313
/home/antoine/dev/python/packages/pyaf/TS/Plots.py in prediction_interval_plot(df, time, signal, estimator, lower, upper, name, format, max_length)
100 # last value of the signal
101 lLastSignalPos = df1[signal].dropna().tail(1).index;
--> 102 lEstimtorValue = df1[estimator][lLastSignalPos];
103 df1.loc[lLastSignalPos , lower] = lEstimtorValue;
104 df1.loc[lLastSignalPos , upper] = lEstimtorValue;
/home/antoine/.local/lib/python3.6/site-packages/pandas/core/series.py in __getitem__(self, key)
662 key = check_bool_indexer(self.index, key)
663
--> 664 return self._get_with(key)
665
666 def _get_with(self, key):
/home/antoine/.local/lib/python3.6/site-packages/pandas/core/series.py in _get_with(self, key)
705 return self.loc[key]
706
--> 707 return self.reindex(key)
708 except Exception:
709 # [slice(0, 5, None)] will break if you convert to ndarray,
/home/antoine/.local/lib/python3.6/site-packages/pandas/core/series.py in reindex(self, index, **kwargs)
2638 @Appender(generic._shared_docs['reindex'] % _shared_doc_kwargs)
2639 def reindex(self, index=None, **kwargs):
-> 2640 return super(Series, self).reindex(index=index, **kwargs)
2641
2642 @Appender(generic._shared_docs['fillna'] % _shared_doc_kwargs)
/home/antoine/.local/lib/python3.6/site-packages/pandas/core/generic.py in reindex(self, *args, **kwargs)
3021 # perform the reindex on the axes
3022 return self._reindex_axes(axes, level, limit, tolerance, method,
-> 3023 fill_value, copy).__finalize__(self)
3024
3025 def _reindex_axes(self, axes, level, limit, tolerance, method, fill_value,
/home/antoine/.local/lib/python3.6/site-packages/pandas/core/generic.py in _reindex_axes(self, axes, level, limit, tolerance, method, fill_value, copy)
3039 obj = obj._reindex_with_indexers({axis: [new_index, indexer]},
3040 fill_value=fill_value,
-> 3041 copy=copy, allow_dups=False)
3042
3043 return obj
/home/antoine/.local/lib/python3.6/site-packages/pandas/core/generic.py in _reindex_with_indexers(self, reindexers, fill_value, copy, allow_dups)
3143 fill_value=fill_value,
3144 allow_dups=allow_dups,
-> 3145 copy=copy)
3146
3147 if copy and new_data is self._data:
/home/antoine/.local/lib/python3.6/site-packages/pandas/core/internals.py in reindex_indexer(self, new_axis, indexer, axis, fill_value, allow_dups, copy)
4132 # some axes don't allow reindexing with dups
4133 if not allow_dups:
-> 4134 self.axes[axis]._can_reindex(indexer)
4135
4136 if axis >= self.ndim:
/home/antoine/.local/lib/python3.6/site-packages/pandas/core/indexes/base.py in _can_reindex(self, indexer)
2939 # trying to reindex on an axis with duplicates
2940 if not self.is_unique and len(indexer):
-> 2941 raise ValueError("cannot reindex from a duplicate axis")
2942
2943 def reindex(self, target, method=None, level=None, limit=None,
ValueError: cannot reindex from a duplicate axis
In [32]:
df_forecast = lEngine.forecast(iInputDS = df_train, iHorizon = 7)
print(df_forecast.columns)
df_forecast[['date', 'number_Forecast', 'number_Forecast_Lower_Bound', 'number_Forecast_Upper_Bound']].tail(7)
INFO:pyaf.std:START_FORECASTING
INFO:pyaf.std:END_FORECAST_TIME_IN_SECONDS 0.20051145553588867
Index(['CumSum_number', 'date', 'number', 'row_number', 'date_Normalized',
'CumSum_number_Lag1Trend', 'CumSum_number_Lag1Trend_residue',
'CumSum_number_Lag1Trend_residue_zeroCycle',
'CumSum_number_Lag1Trend_residue_zeroCycle_residue',
'CumSum_number_Lag1Trend_residue_zeroCycle_residue_AR(3)',
'CumSum_number_Lag1Trend_residue_zeroCycle_residue_AR(3)_residue',
'CumSum_number_Trend', 'CumSum_number_Trend_residue',
'CumSum_number_Cycle', 'CumSum_number_Cycle_residue',
'CumSum_number_AR', 'CumSum_number_AR_residue',
'CumSum_number_TransformedForecast', 'number_Forecast',
'CumSum_number_TransformedResidue', 'number_Residue',
'number_Forecast_Lower_Bound', 'number_Forecast_Upper_Bound'],
dtype='object')
Out[32]:
date
number_Forecast
number_Forecast_Lower_Bound
number_Forecast_Upper_Bound
14
2021-05-20 14:46:09.230769216
1.067167e+06
9.298043e+05
1.204529e+06
15
2030-10-08 05:32:18.461538432
2.034668e+06
1.634727e+06
2.434609e+06
16
2040-02-25 20:18:27.692307648
2.305215e+06
1.951239e+06
2.659191e+06
17
2049-07-15 11:04:36.923076864
2.111037e+06
1.613392e+06
2.608682e+06
18
2058-12-03 01:50:46.153846080
3.797103e+06
1.929540e+06
5.664665e+06
19
2068-04-21 16:36:55.384615296
4.709622e+06
1.929136e+06
7.490109e+06
20
2077-09-09 07:23:04.615384512
4.260869e+06
1.424380e+06
7.097359e+06
In [ ]:
Content source: antoinecarme/pyaf
Similar notebooks: