notebook.community

Edit and run



In [1]:

    
%matplotlib inline



In [2]:

    
import requests
from pandas.io.json import json_normalize
import pandas as pd
import numpy as np



In [3]:

    
sensorId = 4727
r = requests.get('http://api.gios.gov.pl/pjp-api/rest/data/getData/' + str(sensorId))



In [4]:

    
concentration = json_normalize(r.json())
concentrationFrame = pd.DataFrame()
concentrationFrame["dates"] = [d[u'date'] for d in concentration["values"].values.item()]
concentrationFrame["values"] = [d[u'value'] for d in concentration["values"].values.item()]
concentrationFrame = concentrationFrame.iloc[::-1]
rng = pd.date_range(start = concentrationFrame["dates"][:1].item(), periods=len(concentrationFrame), freq='H')
concentrationFrame["dates"] = rng
concentrationFrame = concentrationFrame[:-1]  #removing last line
firstDatetime = concentrationFrame["dates"].iloc[0]
concentrationFrame["relativeTime"] = (concentrationFrame["dates"] - firstDatetime) / np.timedelta64(1, 'h')



In [5]:

    
concentrationFrame









    Out[5]:






  
    
      
      dates
      values
      relativeTime
    
  
  
    
      60
      2017-04-21 01:00:00
      19.14730
      0.0
    
    
      59
      2017-04-21 02:00:00
      1.65206
      1.0
    
    
      58
      2017-04-21 03:00:00
      12.53800
      2.0
    
    
      57
      2017-04-21 04:00:00
      1.00000
      3.0
    
    
      56
      2017-04-21 05:00:00
      20.45770
      4.0
    
    
      55
      2017-04-21 06:00:00
      22.90880
      5.0
    
    
      54
      2017-04-21 07:00:00
      13.48710
      6.0
    
    
      53
      2017-04-21 08:00:00
      32.13200
      7.0
    
    
      52
      2017-04-21 09:00:00
      17.73870
      8.0
    
    
      51
      2017-04-21 10:00:00
      12.90010
      9.0
    
    
      50
      2017-04-21 11:00:00
      4.74283
      10.0
    
    
      49
      2017-04-21 12:00:00
      10.70450
      11.0
    
    
      48
      2017-04-21 13:00:00
      1.00000
      12.0
    
    
      47
      2017-04-21 14:00:00
      2.95744
      13.0
    
    
      46
      2017-04-21 15:00:00
      27.01130
      14.0
    
    
      45
      2017-04-21 16:00:00
      22.96910
      15.0
    
    
      44
      2017-04-21 17:00:00
      21.15800
      16.0
    
    
      43
      2017-04-21 18:00:00
      1.00000
      17.0
    
    
      42
      2017-04-21 19:00:00
      5.51700
      18.0
    
    
      41
      2017-04-21 20:00:00
      4.58017
      19.0
    
    
      40
      2017-04-21 21:00:00
      6.83833
      20.0
    
    
      39
      2017-04-21 22:00:00
      1.00000
      21.0
    
    
      38
      2017-04-21 23:00:00
      1.00000
      22.0
    
    
      37
      2017-04-22 00:00:00
      1.00000
      23.0
    
    
      36
      2017-04-22 01:00:00
      6.34769
      24.0
    
    
      35
      2017-04-22 02:00:00
      1.90883
      25.0
    
    
      34
      2017-04-22 03:00:00
      12.55590
      26.0
    
    
      33
      2017-04-22 04:00:00
      16.61140
      27.0
    
    
      32
      2017-04-22 05:00:00
      18.76860
      28.0
    
    
      31
      2017-04-22 06:00:00
      18.08410
      29.0
    
    
      30
      2017-04-22 07:00:00
      13.14770
      30.0
    
    
      29
      2017-04-22 08:00:00
      16.37040
      31.0
    
    
      28
      2017-04-22 09:00:00
      7.80183
      32.0
    
    
      27
      2017-04-22 10:00:00
      4.57194
      33.0
    
    
      26
      2017-04-22 11:00:00
      8.06875
      34.0
    
    
      25
      2017-04-22 12:00:00
      5.24153
      35.0
    
    
      24
      2017-04-22 13:00:00
      6.09681
      36.0
    
    
      23
      2017-04-22 14:00:00
      20.31990
      37.0
    
    
      22
      2017-04-22 15:00:00
      14.82320
      38.0
    
    
      21
      2017-04-22 16:00:00
      9.87542
      39.0
    
    
      20
      2017-04-22 17:00:00
      2.72983
      40.0
    
    
      19
      2017-04-22 18:00:00
      3.98144
      41.0
    
    
      18
      2017-04-22 19:00:00
      6.63619
      42.0
    
    
      17
      2017-04-22 20:00:00
      1.95631
      43.0
    
    
      16
      2017-04-22 21:00:00
      16.38690
      44.0
    
    
      15
      2017-04-22 22:00:00
      18.20030
      45.0
    
    
      14
      2017-04-22 23:00:00
      15.53640
      46.0
    
    
      13
      2017-04-23 00:00:00
      17.38690
      47.0
    
    
      12
      2017-04-23 01:00:00
      22.25380
      48.0
    
    
      11
      2017-04-23 02:00:00
      9.44294
      49.0
    
    
      10
      2017-04-23 03:00:00
      10.27420
      50.0
    
    
      9
      2017-04-23 04:00:00
      14.60180
      51.0
    
    
      8
      2017-04-23 05:00:00
      1.00000
      52.0
    
    
      7
      2017-04-23 06:00:00
      6.04558
      53.0
    
    
      6
      2017-04-23 07:00:00
      14.71620
      54.0
    
    
      5
      2017-04-23 08:00:00
      34.49780
      55.0
    
    
      4
      2017-04-23 09:00:00
      5.20961
      56.0
    
    
      3
      2017-04-23 10:00:00
      1.00000
      57.0
    
    
      2
      2017-04-23 11:00:00
      5.86842
      58.0
    
    
      1
      2017-04-23 12:00:00
      9.66539
      59.0



In [6]:

    
from tpot import TPOTRegressor
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(concentrationFrame["relativeTime"].values.reshape((concentrationFrame.shape[0],1)), concentrationFrame["values"].values,
                                                    train_size=0.75, test_size=0.25)

tpot = TPOTRegressor(generations=5, population_size=50, verbosity=2)
tpot.fit(X_train, y_train)
print(tpot.score(X_test, y_test))
tpot.export('tpot_concentration_pipeline.py')









    



Version 0.7.1 of tpot is outdated. Version 0.7.2 was released 2 days ago.






    



Optimization Progress:  33%|███▎      | 100/300 [02:35<02:49,  1.18pipeline/s]





    



Generation 1 - Current best internal CV score: 59.2572857379






    



Optimization Progress:  50%|█████     | 150/300 [03:49<03:28,  1.39s/pipeline]





    



Generation 2 - Current best internal CV score: 59.2178981236






    



Optimization Progress:  67%|██████▋   | 200/300 [05:49<01:54,  1.14s/pipeline]





    



Generation 3 - Current best internal CV score: 59.2178981236






    



Optimization Progress:  83%|████████▎ | 250/300 [08:42<01:29,  1.78s/pipeline]





    



Generation 4 - Current best internal CV score: 56.3100117396






    









    



Generation 5 - Current best internal CV score: 56.3100117396

Best pipeline: RandomForestRegressor(RBFSampler(input_matrix, RBFSampler__gamma=0.75), RandomForestRegressor__bootstrap=DEFAULT, RandomForestRegressor__max_features=0.05, RandomForestRegressor__min_samples_leaf=DEFAULT, RandomForestRegressor__min_samples_split=3, RandomForestRegressor__n_estimators=100)
82.4052705079



In [7]:

    
ftr = pd.date_range(start = concentrationFrame["dates"][-1:].item(), periods=25, freq='H', closed='right')
predctionFrame = pd.DataFrame( {"dates" : ftr})
predctionFrame["relativeTime"] = (predctionFrame["dates"] - firstDatetime) / np.timedelta64(1, 'h')
predctionFrame["predictedValues"] = predctionFrame["relativeTime"].apply(lambda x: tpot.predict(x).item())
wholeFrame = pd.concat([concentrationFrame, predctionFrame])
wholeFrame.set_index(["dates"], inplace=True)



In [8]:

    
wholeFrame[["predictedValues", "values"]].plot(figsize=(15,5), grid=True)









    Out[8]:





<matplotlib.axes._subplots.AxesSubplot at 0x53c1050>



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:

	dates	values	relativeTime
60	2017-04-21 01:00:00	19.14730	0.0
59	2017-04-21 02:00:00	1.65206	1.0
58	2017-04-21 03:00:00	12.53800	2.0
57	2017-04-21 04:00:00	1.00000	3.0
56	2017-04-21 05:00:00	20.45770	4.0
55	2017-04-21 06:00:00	22.90880	5.0
54	2017-04-21 07:00:00	13.48710	6.0
53	2017-04-21 08:00:00	32.13200	7.0
52	2017-04-21 09:00:00	17.73870	8.0
51	2017-04-21 10:00:00	12.90010	9.0
50	2017-04-21 11:00:00	4.74283	10.0
49	2017-04-21 12:00:00	10.70450	11.0
48	2017-04-21 13:00:00	1.00000	12.0
47	2017-04-21 14:00:00	2.95744	13.0
46	2017-04-21 15:00:00	27.01130	14.0
45	2017-04-21 16:00:00	22.96910	15.0
44	2017-04-21 17:00:00	21.15800	16.0
43	2017-04-21 18:00:00	1.00000	17.0
42	2017-04-21 19:00:00	5.51700	18.0
41	2017-04-21 20:00:00	4.58017	19.0
40	2017-04-21 21:00:00	6.83833	20.0
39	2017-04-21 22:00:00	1.00000	21.0
38	2017-04-21 23:00:00	1.00000	22.0
37	2017-04-22 00:00:00	1.00000	23.0
36	2017-04-22 01:00:00	6.34769	24.0
35	2017-04-22 02:00:00	1.90883	25.0
34	2017-04-22 03:00:00	12.55590	26.0
33	2017-04-22 04:00:00	16.61140	27.0
32	2017-04-22 05:00:00	18.76860	28.0
31	2017-04-22 06:00:00	18.08410	29.0
30	2017-04-22 07:00:00	13.14770	30.0
29	2017-04-22 08:00:00	16.37040	31.0
28	2017-04-22 09:00:00	7.80183	32.0
27	2017-04-22 10:00:00	4.57194	33.0
26	2017-04-22 11:00:00	8.06875	34.0
25	2017-04-22 12:00:00	5.24153	35.0
24	2017-04-22 13:00:00	6.09681	36.0
23	2017-04-22 14:00:00	20.31990	37.0
22	2017-04-22 15:00:00	14.82320	38.0
21	2017-04-22 16:00:00	9.87542	39.0
20	2017-04-22 17:00:00	2.72983	40.0
19	2017-04-22 18:00:00	3.98144	41.0
18	2017-04-22 19:00:00	6.63619	42.0
17	2017-04-22 20:00:00	1.95631	43.0
16	2017-04-22 21:00:00	16.38690	44.0
15	2017-04-22 22:00:00	18.20030	45.0
14	2017-04-22 23:00:00	15.53640	46.0
13	2017-04-23 00:00:00	17.38690	47.0
12	2017-04-23 01:00:00	22.25380	48.0
11	2017-04-23 02:00:00	9.44294	49.0
10	2017-04-23 03:00:00	10.27420	50.0
9	2017-04-23 04:00:00	14.60180	51.0
8	2017-04-23 05:00:00	1.00000	52.0
7	2017-04-23 06:00:00	6.04558	53.0
6	2017-04-23 07:00:00	14.71620	54.0
5	2017-04-23 08:00:00	34.49780	55.0
4	2017-04-23 09:00:00	5.20961	56.0
3	2017-04-23 10:00:00	1.00000	57.0
2	2017-04-23 11:00:00	5.86842	58.0
1	2017-04-23 12:00:00	9.66539	59.0