# Задача: Спрогнозировать цену закрытия последней свечи торгового дня



In [2]:

    
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt



In [3]:

    
data = pd.read_csv('SBERP_prepared.csv')



In [4]:

    
print(data[:10])
print(len(data))









    



       DATE   TIME1  OPEN1  HIGH1   LOW1  CLOSE1     VOL1   TIME2  OPEN2  \
0  20120301  110000  75.38  75.45  74.91   74.95  2334800  120000  74.95   
1  20120302  110000  76.01  76.35  75.79   76.13  3527600  120000  76.14   
2  20120305  110000  77.09  78.59  77.09   78.39  5464300  120000  78.38   
3  20120306  110000  77.15  77.20  76.71   77.03  2307800  120000  77.03   
4  20120307  110000  74.00  74.20  73.23   73.86  3703100  120000  73.86   
5  20120311  110000  76.00  76.19  75.81   75.93  3123500  120000  75.89   
6  20120312  110000  75.78  75.79  75.30   75.78  2782200  120000  75.79   
7  20120313  110000  77.39  77.92  77.33   77.61  1430000  120000  77.62   
8  20120314  110000  80.05  80.60  80.00   80.25  5095200  120000  80.25   
9  20120315  110000  83.37  83.38  81.92   82.40  6368600  120000  82.40   

   HIGH2    ...      HIGH8   LOW8  CLOSE8     VOL8     TIME9  OPEN9  HIGH9  \
0  75.21    ...      75.30  75.12   75.16   754600  190000.0  75.14  75.61   
1  76.30    ...      76.72  76.03   76.72  4305900  190000.0  76.72  77.48   
2  78.40    ...      78.11  77.72   78.10  1385100  190000.0  78.10  78.11   
3  77.11    ...      75.83  75.03   75.07  1457600  190000.0  75.07  75.11   
4  74.37    ...      74.56  73.90   74.40  2827700  190000.0  74.40  74.78   
5  76.04    ...      75.94  75.90   75.94   318400  190000.0  75.94  76.25   
6  76.05    ...      77.19  76.91   77.00  1332100  190000.0  76.99  77.10   
7  77.88    ...      78.84  78.15   78.77  3240000  190000.0  78.77  79.45   
8  80.30    ...      83.19  82.60   83.09  3638200  190000.0  83.09  83.60   
9  82.56    ...      81.90  80.11   80.72  9559500  190000.0  80.75  81.08   

    LOW9  CLOSE9       VOL9  
0  75.12   75.58  2269200.0  
1  76.68   77.09  6021500.0  
2  77.53   77.59  2659900.0  
3  73.80   73.81  5285200.0  
4  74.21   74.53  5004100.0  
5  75.92   76.13  2409400.0  
6  76.90   77.08  1374200.0  
7  78.72   79.22  5444000.0  
8  83.01   83.24  8129900.0  
9  80.50   80.99  3224700.0  

[10 rows x 55 columns]
1590



In [5]:

    
train_data = data[:1000].dropna()
test_data = data[1000:].dropna()



In [7]:

    
train_features = train_data.drop(["DATE", "CLOSE9", "LOW9", "HIGH9", "VOL9","TIME1", "TIME2", "TIME3", "TIME4", 
                                  "TIME5", "TIME6", "TIME7", "TIME8", "TIME9"], axis=1)
train_target = train_data["CLOSE9"]

test_features = test_data.drop(["DATE", "CLOSE9", "LOW9", "HIGH9", "VOL9","TIME1", "TIME2", "TIME3", "TIME4", 
                                  "TIME5", "TIME6", "TIME7", "TIME8", "TIME9"], axis=1)
test_target = test_data["CLOSE9"]



In [8]:

    
train_features.head()









    Out[8]:







  
    
      
      OPEN1
      HIGH1
      LOW1
      CLOSE1
      VOL1
      OPEN2
      HIGH2
      LOW2
      CLOSE2
      VOL2
      ...
      HIGH7
      LOW7
      CLOSE7
      VOL7
      OPEN8
      HIGH8
      LOW8
      CLOSE8
      VOL8
      OPEN9
    
  
  
    
      0
      75.38
      75.45
      74.91
      74.95
      2334800
      74.95
      75.21
      74.83
      74.90
      1617800
      ...
      75.45
      75.09
      75.19
      1137900
      75.20
      75.30
      75.12
      75.16
      754600
      75.14
    
    
      1
      76.01
      76.35
      75.79
      76.13
      3527600
      76.14
      76.30
      75.95
      76.22
      2527000
      ...
      76.20
      75.65
      76.15
      4247300
      76.06
      76.72
      76.03
      76.72
      4305900
      76.72
    
    
      2
      77.09
      78.59
      77.09
      78.39
      5464300
      78.38
      78.40
      77.49
      78.00
      3856900
      ...
      77.75
      77.42
      77.73
      287600
      77.72
      78.11
      77.72
      78.10
      1385100
      78.10
    
    
      3
      77.15
      77.20
      76.71
      77.03
      2307800
      77.03
      77.11
      76.70
      76.72
      1415800
      ...
      75.90
      75.34
      75.71
      2142600
      75.71
      75.83
      75.03
      75.07
      1457600
      75.07
    
    
      4
      74.00
      74.20
      73.23
      73.86
      3703100
      73.86
      74.37
      73.86
      74.15
      1540400
      ...
      74.12
      73.69
      73.94
      867300
      73.91
      74.56
      73.90
      74.40
      2827700
      74.40
    
  

5 rows × 41 columns



In [14]:

    
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
train_features_scaled = scaler.fit_transform(train_features)
test_features_scaled = scaler.transform(test_features.dropna())



In [23]:

    
test_features_scaled[:10]









    Out[23]:





array([[ 0.68690351,  0.67092634,  0.63475935,  0.64903916, -0.73922116,
         0.64458445,  0.64056894,  0.62933942,  0.65490576, -0.7694075 ,
         0.65498757,  0.74830473,  0.67590819,  0.77136266,  0.07125648,
         0.77507511,  0.79106297,  0.7964571 ,  0.79594853,  0.89827193,
         0.79797789,  0.78361049,  0.79361029,  0.79056248, -0.33122932,
         0.79131609,  0.7720053 ,  0.77653772,  0.75879514, -0.65599519,
         0.75958877,  0.76165937,  0.77479855,  0.77911745, -0.92728655,
         0.77804419,  0.77221618,  0.75552748,  0.75389648, -0.73844219,
         0.74724916],
       [ 0.83643222,  0.85223209,  0.85737366,  0.87025839, -0.38284844,
         0.86673926,  0.88830257,  0.88885508,  0.90193789,  0.67112094,
         0.90017134,  0.92673952,  0.92753246,  0.95083171, -0.2916796 ,
         0.95081369,  0.94447161,  0.96096849,  0.96150053, -0.40409452,
         0.9616756 ,  0.9474978 ,  0.94980842,  0.94328789, -0.56637054,
         0.94123265,  0.91906104,  0.93186017,  0.93356215, -0.75090049,
         0.93433992,  0.94288998,  0.95396134,  0.9415165 , -0.62251011,
         0.94322007,  0.92741442,  0.93370737,  0.92219218, -0.82252115,
         0.91739687],
       [ 0.87172472,  0.85688096,  0.87128706,  0.88791875, -0.77702898,
         0.88625914,  0.91344846,  0.90461139,  0.9130822 , -0.57579492,
         0.91038734,  0.91001126,  0.9080339 ,  0.93595345, -0.80391277,
         0.93500652,  0.9556286 ,  0.95632127,  0.96708094, -0.50752941,
         0.96725621,  0.95215369,  0.95631667,  0.95446292, -0.90231808,
         0.95054424,  0.93209129,  0.93651055,  0.9214772 , -0.7555354 ,
         0.92411512,  0.94381937,  0.93261033,  0.95079644, -0.78132559,
         0.95342757,  0.95622368,  0.96061995,  0.96775289, -0.67383782,
         0.96667463],
       [ 1.08812341,  1.10884948,  1.09575649,  1.10913799, -0.33999285,
         1.10934347,  1.14628082,  1.1140776 ,  1.14154048, -0.15017111,
         1.14442639,  1.12097322,  1.09094896,  1.09589478, -0.37115379,
         1.09586775,  1.11926449,  1.10874991,  1.11217146, -0.51673526,
         1.11514221,  1.10672923,  1.09763879,  1.08949454, -0.80540872,
         1.09394269,  1.09683094,  1.07509166,  1.10368111, -0.60677685,
         1.09793675,  1.13434387,  1.11084481,  1.13175538, -0.59528065,
         1.13159481,  1.11420992,  1.10075101,  1.11745238, -0.63727922,
         1.11636743],
       [ 1.19678713,  1.16835496,  1.1736715 ,  1.14817668, -0.79939061,
         1.14094709,  1.12579157,  1.12334602,  1.11646579, -0.80277502,
         1.11192097,  1.08937539,  1.05659436,  1.05590945, -0.31521206,
         1.05216557,  1.05232253,  1.04554779,  1.06101775, -0.73483168,
         1.06119637,  1.05085856,  1.05394051,  1.06341947, -0.84018727,
         1.05855865,  1.06797824,  1.06486084,  1.0637078 , -0.82807542,
         1.07190998,  1.04976958,  1.0477201 ,  1.02874798, -0.70634091,
         1.02859188,  1.01477152,  1.00052482,  1.02075209, -0.43629434,
         1.01595238],
       [ 0.95716969,  0.96380487,  0.98166665,  0.97993852, -0.83361451,
         0.98385854,  1.0000621 ,  1.00749081,  0.98552019, -0.76067527,
         0.98189927,  0.99365256,  1.0055267 ,  1.01406433, -0.84484138,
         1.01404238,  1.01885156,  1.02974726,  1.03869613, -0.57923029,
         1.03887395,  1.01919851,  1.02604798,  1.01685684, -0.88187741,
         1.01572535,  1.00561916,  1.00905636,  1.01722721, -0.7739279 ,
         1.01706786,  1.02653488,  0.95303303,  0.93409254, -0.12567612,
         0.93208462,  0.93949572,  0.92349914,  0.92777104, -0.49285231,
         0.92948387],
       [ 1.00082093,  0.99541716,  0.99650761,  0.97622055, -0.66673515,
         0.97549287,  0.99819944,  0.99544187,  1.01245226, -0.89749945,
         1.01254724,  0.99272321,  1.0045982 ,  0.99453661, -0.91329403,
         0.99079654,  1.01048381,  1.01022896,  1.03032552, -0.74770491,
         1.02957295,  1.02757911,  1.03534549,  1.04106941, -0.54262113,
         1.04645359,  1.04843285,  1.05649017,  1.04976363, -0.54541947,
         1.05424896,  1.03954631,  1.04029366,  1.025964  , -0.8471186 ,
         1.02673597,  1.03057014,  1.04506979,  1.03655887, -0.64894065,
         1.03826684],
       [ 1.12248715,  1.1116388 ,  1.13100375,  1.11564444, -0.97157012,
         1.11492058,  1.13417354,  1.12890707,  1.13875441, -0.88159639,
         1.13885404,  1.13212539,  1.14573063,  1.13123066, -0.94749057,
         1.1302716 ,  1.13785947,  1.15057483,  1.15588463, -0.66410378,
         1.15792684,  1.14025164,  1.14784533,  1.14257593, -0.85415751,
         1.14236295,  1.13964464,  1.15321792,  1.14644325, -0.86890678,
         1.15184934,  1.13527326,  1.15447513,  1.13732335, -0.86080244,
         1.13716254,  1.12907921,  1.15457618,  1.13511878, -0.50381406,
         1.13589258],
       [ 1.1308459 ,  1.10420061,  1.09575649,  1.06173387, -0.71302323,
         1.06100854,  1.05314788,  1.0538329 ,  1.0561008 , -0.80832663,
         1.05898356,  1.06707104,  1.06866489,  1.04661053, -0.67326293,
         1.0503059 ,  1.03372755,  1.04740668,  1.03590593, -0.32550796,
         1.03608365,  1.02664793,  1.0502215 ,  1.04758817, -0.61144295,
         1.04459127,  1.03726406,  1.04532927,  1.02466411, -0.05595824,
         1.02450408,  1.06278101,  1.04400688,  1.02874798, -0.27474705,
         1.03601551,  1.02127684,  1.04506979,  1.03841849, -0.64322655,
         1.03547753],
       [ 1.03889967,  1.00843398,  1.03175487,  1.02548366, -0.92301055,
         1.02568685,  1.06059851,  1.05197921,  1.08303287, -0.40074556,
         1.08405918,  1.08008191,  1.08166393,  1.07543717, -0.26725351,
         1.07541141,  1.06533902,  1.04183002,  1.03404579, -0.69561325,
         1.03515355,  1.04992738,  1.05487026,  1.05131318, -0.23887868,
         1.05204054,  1.04005626,  1.05463002,  1.04046751, -0.51775715,
         1.03472888,  1.0479108 ,  1.05421823,  1.03895592, -0.09934509,
         1.03879938,  1.02592349,  0.98846056,  0.98262986,  0.07634245,
         0.98155093]])



In [130]:

    
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import StandardScaler

knr = KNeighborsRegressor(n_neighbors=5, weights='distance')
knr.fit(train_features_scaled, train_target)









    Out[130]:





KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='minkowski',
          metric_params=None, n_jobs=1, n_neighbors=5, p=2,
          weights='distance')



In [145]:

    
knr_predictions = knr.predict(test_features_scaled)
plt.plot(knr_predictions[:200], c='blue')
plt.plot(test_target.values.tolist()[:200], c='green')
plt.show()



In [181]:

    
print("Правильность на обучающем наборе: %.4f" % knr.score(train_features_scaled, train_target))
print("Правильность на тестовом наборе: %.4f" % knr.score(test_features_scaled, test_target))









    



Правильность на обучающем наборе: 1.0000
Правильность на тестовом наборе: -1.5759



In [147]:

    
from sklearn.linear_model import LinearRegression

lr = LinearRegression()
lr.fit(train_features_scaled, train_target)









    Out[147]:





LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)



In [150]:

    
lr_predictions = lr.predict(test_features_scaled)
plt.plot(lr_predictions, c='blue')
plt.plot(test_target.values.tolist(), c='green')
plt.show()



In [180]:

    
print("Правильность на обучающем наборе: %.4f" % lr.score(train_features_scaled, train_target))
print("Правильность на тестовом наборе: %.4f" % lr.score(test_features_scaled, test_target))









    



Правильность на обучающем наборе: 0.9989
Правильность на тестовом наборе: 0.9998



In [18]:

    
from sklearn.linear_model import Ridge

ridge = Ridge()
ridge.fit(train_features_scaled, train_target)









    Out[18]:





Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=None, solver='auto', tol=0.001)



In [24]:

    
p = ridge.predict([[0.68690351,  0.67092634,  0.63475935,  0.64903916, -0.73922116,
         0.64458445,  0.64056894,  0.62933942,  0.65490576, -0.7694075 ,
         0.65498757,  0.74830473,  0.67590819,  0.77136266,  0.07125648,
         0.77507511,  0.79106297,  0.7964571 ,  0.79594853,  0.89827193,
         0.79797789,  0.78361049,  0.79361029,  0.79056248, -0.33122932,
         0.79131609,  0.7720053 ,  0.77653772,  0.75879514, -0.65599519,
         0.75958877,  0.76165937,  0.77479855,  0.77911745, -0.92728655,
         0.77804419,  0.77221618,  0.75552748,  0.75389648, -0.73844219,
         0.74724916]])



In [26]:

    
print(p)

test_target









    



[73.62203801]






    Out[26]:





1000     73.76
1001     75.21
1002     76.90
1003     77.31
1004     75.20
1005     75.80
1006     76.82
1007     77.82
1008     76.51
1009     75.56
1010     77.03
1011     76.80
1012     75.62
1013     77.00
1014     78.45
1015     79.70
1016     79.81
1017     80.03
1018     78.93
1019     78.75
1020     79.65
1021     78.65
1022     77.60
1023     79.09
1024     79.09
1025     77.29
1026     76.26
1027     75.13
1028     76.10
1029     76.80
         ...  
1560    197.62
1561    195.49
1562    198.68
1563    198.27
1564    195.29
1565    196.70
1566    196.03
1567    197.00
1568    193.80
1569    195.50
1570    196.00
1571    196.02
1572    195.39
1573    191.30
1574    193.10
1575    192.50
1576    189.32
1577    189.20
1578    188.00
1579    190.14
1580    189.26
1581    187.45
1582    185.00
1583    183.50
1584    188.44
1585    186.43
1586    191.50
1587    179.90
1588    178.27
1589    183.66
Name: CLOSE9, Length: 589, dtype: float64



In [182]:

    
ridge_predictions = ridge.predict(test_features_scaled)
plt.plot(ridge_predictions, c='blue')
plt.plot(test_target.values.tolist(), c='green')
plt.show()



In [183]:

    
print("Правильность на обучающем наборе: %.4f" % ridge.score(train_features_scaled, train_target))
print("Правильность на тестовом наборе: %.4f" % ridge.score(test_features_scaled, test_target))









    



Правильность на обучающем наборе: 0.9988
Правильность на тестовом наборе: 0.9997



In [184]:

    
from sklearn.linear_model import Lasso

lasso = Lasso()
lasso.fit(train_features_scaled, train_target)









    



/usr/local/lib/python2.7/site-packages/sklearn/linear_model/coordinate_descent.py:491: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Fitting data with very small alpha may cause precision problems.
  ConvergenceWarning)






    Out[184]:





Lasso(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)



In [185]:

    
lasso_predictions = lasso.predict(test_features_scaled)
plt.plot(lasso_predictions, c='blue')
plt.plot(test_target.values.tolist(), c='green')
plt.show()



In [192]:

    
print("Правильность на обучающем наборе: %.4f" % lasso.score(train_features_scaled, train_target))
print("Правильность на тестовом наборе: %.4f" % lasso.score(test_features_scaled, test_target))
print("Количество использованных признаков: %d" % np.sum(lasso.coef_ != 0))









    



Правильность на обучающем наборе: 0.9900
Правильность на тестовом наборе: 0.9657
Количество использованных признаков: 8



In [195]:

    
to_day_data = [[182.8000000,184.5000000,182.4000000,183.9400000,1073500,
    183.8500000,184.0200000,183.3800000,183.9000000,435300,
    183.8300000,184.4500000,183.1300000,183.3400000,603300,
    183.3400000,183.6900000,182.6400000,182.7500000,351600,
    182.7500000,183.8900000,182.7500000,183.0600000,248400,
    183.0600000,183.1800000,181.7800000,182.3800000,507800,
    182.2400000,183.4000000,182.0300000,183.2400000,525900,
    183.1400000,183.7500000,182.9000000,183.3000000,556000,
    183.3000000]]

print("Прогноз от knr: %.4f" % knr.predict(scaler.transform(to_day_data))[0])
print("Прогноз от lr: %.4f" % lr.predict(scaler.transform(to_day_data))[0])
print("Прогноз от ridge: %.4f" % ridge.predict(scaler.transform(to_day_data))[0])
print("Прогноз от lasso: %.4f" % lasso.predict(scaler.transform(to_day_data))[0])
print("Реальное значение: %.4f" % 182.9100000)









    



Прогноз от knr: 85.2905
Прогноз от lr: 183.4307
Прогноз от ridge: 183.2162
Прогноз от lasso: 172.3347
Реальное значение: 182.9100



In [1]:

    
res = pd.DataFrame(test_data["CLOSE9"].tolist(), columns = ["ACTUAL_CLOSE9"])
res["OPEN9"] = pd.Series(test_features["OPEN9"].tolist())
res["PREDICTED_CLOSE9"] = pd.Series(lr_predictions)
res["RESULT"] = np.where(res['PREDICTED_CLOSE9'] >= res['OPEN9'], res.ACTUAL_CLOSE9 - res.OPEN9, (res.OPEN9 - res.ACTUAL_CLOSE9))

print(np.sum(res["RESULT"]))









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-0b80c4929cfe> in <module>()
----> 1 res = pd.DataFrame(test_data["CLOSE9"].tolist(), columns = ["ACTUAL_CLOSE9"])
      2 res["OPEN9"] = pd.Series(test_features["OPEN9"].tolist())
      3 res["PREDICTED_CLOSE9"] = pd.Series(lr_predictions)
      4 res["RESULT"] = np.where(res['PREDICTED_CLOSE9'] >= res['OPEN9'], res.ACTUAL_CLOSE9 - res.OPEN9, (res.OPEN9 - res.ACTUAL_CLOSE9))
      5 

NameError: name 'pd' is not defined



In [ ]:

	OPEN1	HIGH1	LOW1	CLOSE1	VOL1	OPEN2	HIGH2	LOW2	CLOSE2	VOL2	...	HIGH7	LOW7	CLOSE7	VOL7	OPEN8	HIGH8	LOW8	CLOSE8	VOL8	OPEN9
0	75.38	75.45	74.91	74.95	2334800	74.95	75.21	74.83	74.90	1617800	...	75.45	75.09	75.19	1137900	75.20	75.30	75.12	75.16	754600	75.14
1	76.01	76.35	75.79	76.13	3527600	76.14	76.30	75.95	76.22	2527000	...	76.20	75.65	76.15	4247300	76.06	76.72	76.03	76.72	4305900	76.72
2	77.09	78.59	77.09	78.39	5464300	78.38	78.40	77.49	78.00	3856900	...	77.75	77.42	77.73	287600	77.72	78.11	77.72	78.10	1385100	78.10
3	77.15	77.20	76.71	77.03	2307800	77.03	77.11	76.70	76.72	1415800	...	75.90	75.34	75.71	2142600	75.71	75.83	75.03	75.07	1457600	75.07
4	74.00	74.20	73.23	73.86	3703100	73.86	74.37	73.86	74.15	1540400	...	74.12	73.69	73.94	867300	73.91	74.56	73.90	74.40	2827700	74.40