In [2]:
from data.finance.LoadDataYahoo import LoadDataFromSQL, LoadDataFromYahoo
from data.finance.addins.main import (
Utils,
testVisualization )
from data.finance.addins.mlToFinance1 import PredictSVMOnQuandl, LinkQuandl
from data.finance.addins.neuralNetTestModified import NNtrainer, NeuralNetForward
from data.finance.addins.reloadSandP import LoadYahooFinance, CompanyHistory
import numpy as np
In [2]:
'''
Test and upload of required data-sources (MSSQL) and libraries
'''
#from data.finance.addins.reloadSandP import main
pq = PredictSVMOnQuandl()
pq.populateCompanyData(readClassifier = True, runList = False)
pq.SupportVectorMachineAsAutomated()
loading stocks to shared dataframe, recordset based on query:
SELECT distinct [GICS_Sector] FROM [finance].[dbo].[SandP500Index]
loading stocks to shared dataframe, recordset based on query:
SELECT [symbol], [dateAdded], [CIK], [security] FROM [dbo].[SandP500Index]
statistics loaded
confidence: -0.0468546548935
Out[2]:
(SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='auto',
kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False),
-0.046854654893454084)
In [3]:
'''
The two plots, visualized at bottom part, measure
- the similarity of specific companies using a heatmap and
- volatility by measuring (rolling) mean as red and (rolling) standard deviation as black signals.
'''
#from data.finance.addins.main import main as financeAddinsMain
testVisualization ()
loading stocks to shared dataframe
Stocks loaded: 0
Stocks loaded: 10
Stocks loaded: 20
Stocks loaded: 30
Stocks loaded: 40
Stocks loaded: 50
symbol Date Open High Low Close Volume \
0 AAP 2001-11-29 40.159999 43.399999 40.079999 41.640000 371100.0
1 AAP 2001-11-30 41.640000 42.799999 41.640000 42.799999 165300.0
2 AAP 2001-12-03 42.700001 42.700001 41.350001 41.350001 127500.0
3 AAP 2001-12-04 41.350001 41.350001 39.700001 39.700001 95400.0
4 AAP 2001-12-05 40.300000 44.350001 40.300000 44.000001 598200.0
Adj Close 100ma
0 13.273657 13.273657
1 13.643432 13.458545
2 13.181214 13.366101
3 12.655240 13.188386
4 14.025959 13.355900
Column labels: Index(['AAP', 'BBBY', 'BBY', 'BWA', 'CBS', 'CCL', 'CMCSA', 'COH', 'DG', 'DHI',
'DIS', 'DLPH', 'DRI', 'EXPE', 'F', 'FL', 'FOX', 'FOXA', 'GM', 'GPC',
'GPS', 'GRMN', 'GT', 'HAR', 'HAS', 'HBI', 'HD', 'HOG', 'IPG', 'JWN',
'KSS', 'LEN', 'LOW', 'M', 'MAR', 'MAT', 'NKE', 'NWL', 'OMC', 'PHM',
'PVH', 'RCL', 'RL', 'ROST', 'SBUX', 'SIG', 'SNA', 'SNI', 'SWK', 'TGNA',
'TGT', 'TIF', 'TJX', 'TSCO', 'TWX', 'ULTA', 'VFC', 'VIAB', 'WHR',
'WYN'],
dtype='object', name='symbol')
Row labels: Index(['AAP', 'BBBY', 'BBY', 'BWA', 'CBS', 'CCL', 'CMCSA', 'COH', 'DG', 'DHI',
'DIS', 'DLPH', 'DRI', 'EXPE', 'F', 'FL', 'FOX', 'FOXA', 'GM', 'GPC',
'GPS', 'GRMN', 'GT', 'HAR', 'HAS', 'HBI', 'HD', 'HOG', 'IPG', 'JWN',
'KSS', 'LEN', 'LOW', 'M', 'MAR', 'MAT', 'NKE', 'NWL', 'OMC', 'PHM',
'PVH', 'RCL', 'RL', 'ROST', 'SBUX', 'SIG', 'SNA', 'SNI', 'SWK', 'TGNA',
'TGT', 'TIF', 'TJX', 'TSCO', 'TWX', 'ULTA', 'VFC', 'VIAB', 'WHR',
'WYN'],
dtype='object', name='symbol')
loading stocks to shared dataframe
Stocks loaded: 0
Stocks loaded: 10
Stocks loaded: 20
Stocks loaded: 30
symbol Date Open High Low Close Volume \
0 ADM 2000-01-03 11.999999 12.062499 11.8750 11.999999 984600.0
1 ADM 2000-01-04 11.812500 12.187499 11.8125 11.875000 1088000.0
2 ADM 2000-01-05 11.875000 11.875000 11.6250 11.687500 1087900.0
3 ADM 2000-01-06 11.625000 11.875000 11.5625 11.750000 899900.0
4 ADM 2000-01-07 11.875000 11.999999 11.8125 11.937500 1186200.0
Adj Close 100ma
0 7.855350 7.855350
1 7.773524 7.814437
2 7.650784 7.759886
3 7.691697 7.742839
4 7.814437 7.757158
Column labels: Index(['ADM', 'BF-B', 'CAG', 'CHD', 'CLX', 'COST', 'COTY', 'CPB', 'CVS', 'DPS',
'EL', 'GIS', 'HRL', 'HSY', 'K', 'KHC', 'KO', 'KR', 'MDLZ', 'MKC', 'MO',
'PEP', 'PG', 'RAI', 'SJM', 'STZ', 'SYY', 'TAP', 'TSN', 'WBA', 'WFM',
'WMT'],
dtype='object', name='symbol')
Row labels: Index(['ADM', 'BF-B', 'CAG', 'CHD', 'CLX', 'COST', 'COTY', 'CPB', 'CVS', 'DPS',
'EL', 'GIS', 'HRL', 'HSY', 'K', 'KHC', 'KO', 'KR', 'MDLZ', 'MKC', 'MO',
'PEP', 'PG', 'RAI', 'SJM', 'STZ', 'SYY', 'TAP', 'TSN', 'WBA', 'WFM',
'WMT'],
dtype='object', name='symbol')
loading stocks to shared dataframe, recordset based on query:
SELECT [symbol], [dateAdded], [CIK], [security] FROM [dbo].[SandP500Index]
Stocks loaded: 0
Stocks loaded: 100
Stocks loaded: 200
Stocks loaded: 300
Stocks loaded: 400
Stocks loaded: 500
statistics loaded
C:\Users\Markus.Walden\Documents\arcgis\data\finance\addins\main.py:71: FutureWarning: pd.rolling_mean is deprecated for DataFrame and will be removed in a future version, replace with
DataFrame.rolling(window=12,center=False).mean()
rolmean = pd.rolling_mean(i, window=12)
C:\Users\Markus.Walden\Documents\arcgis\data\finance\addins\main.py:72: FutureWarning: pd.rolling_std is deprecated for DataFrame and will be removed in a future version, replace with
DataFrame.rolling(window=12,center=False).std()
rolstd = pd.rolling_std(i, window=12)
C:\python\New folder\lib\site-packages\matplotlib\axes\_axes.py:531: UserWarning: No labelled objects found. Use label='...' kwarg on individual plots.
warnings.warn("No labelled objects found. "
0
In [2]:
'''
The prediction and confidence of Alphabet (Google) stock performance based on the adjusted close field
using model based on linear regression as kernel.
The prediction can also be based around nonlinear or polynomial kernels
'''
# machine learning to finance - predicting performance
lq = LinkQuandl()
df = lq.createQuandlDataFrame(ticker = 'GOOGL', database = 'WIKI')
X, X_lately, y = lq.produceForecast(df = df)
forecast_set, clf, accurary = lq.forecastLinear(X = X, X_lately = X_lately, y = y)
lq.visualizeForecast(clf = clf, accuracy = accurary, forecast_set = forecast_set, df = df)
Adj. Close HL_PCT PCT_change Adj. Volume label
Date
2004-08-19 50.322842 3.712563 0.324968 44659000.0 68.752232
2004-08-20 54.322689 0.710922 7.227007 22834300.0 69.639972
2004-08-23 54.869377 3.729433 -1.227880 18256100.0 69.078238
2004-08-24 52.597363 6.417469 -5.726357 15247300.0 67.839414
2004-08-25 53.164113 1.886792 1.183658 9188600.0 68.912727
2004-08-26 54.122070 0.037068 2.820391 7094800.0 70.668146
2004-08-27 53.239345 2.326896 -1.803885 6211700.0 71.219849
2004-08-30 51.162935 3.411430 -3.106003 5196700.0 72.278116
2004-08-31 51.343492 1.308977 0.048866 4917800.0 74.810934
2004-09-01 50.280210 2.713217 -2.385589 9138200.0 74.199045
Linear
C:\python\New folder\lib\site-packages\sklearn\preprocessing\data.py:167: UserWarning: Numerical issues were encountered when centering the data and might not be solved. Dataset may contain too large values. You may need to prescale your features.
warnings.warn("Numerical issues were encountered "
forecast:
[ 959.29338243 968.16614648 972.81379578 982.53439819 989.07636073
996.7404547 1009.35525457 1012.06430964 1015.08126821 1005.21745978
1007.22124831 1014.7868354 1022.38933579 1014.84571938 1020.55266751
1023.56132083 985.98830891 978.76024708 988.50215338 985.91037292
977.81571562 977.12537519 993.34699829 986.93749547 997.1360472
995.49540203 1004.44478835 989.38017353 964.8594157 978.41716956
955.10892384 947.04218191 936.92058002]
confidence: 0.974404988515
Out[2]:
Adj. Close
HL_PCT
PCT_change
Adj. Volume
label
Forecast
2004-08-19
50.322842
3.712563
0.324968
44659000.0
68.752232
NaN
2004-08-20
54.322689
0.710922
7.227007
22834300.0
69.639972
NaN
2004-08-23
54.869377
3.729433
-1.227880
18256100.0
69.078238
NaN
2004-08-24
52.597363
6.417469
-5.726357
15247300.0
67.839414
NaN
2004-08-25
53.164113
1.886792
1.183658
9188600.0
68.912727
NaN
2004-08-26
54.122070
0.037068
2.820391
7094800.0
70.668146
NaN
2004-08-27
53.239345
2.326896
-1.803885
6211700.0
71.219849
NaN
2004-08-30
51.162935
3.411430
-3.106003
5196700.0
72.278116
NaN
2004-08-31
51.343492
1.308977
0.048866
4917800.0
74.810934
NaN
2004-09-01
50.280210
2.713217
-2.385589
9138200.0
74.199045
NaN
2004-09-02
50.912161
0.847207
2.442224
15118600.0
70.462511
NaN
2004-09-03
50.159839
1.729827
-0.931154
5152400.0
74.921275
NaN
2004-09-07
50.947269
0.413467
0.564301
5847500.0
86.481962
NaN
2004-09-08
51.308384
0.713587
1.548541
4985600.0
93.990139
NaN
2004-09-09
51.313400
0.390969
-0.185366
4061700.0
91.181468
NaN
2004-09-10
52.828075
1.167758
3.804080
8698800.0
93.272925
NaN
2004-09-13
53.916435
0.846512
0.815905
7844100.0
96.949273
NaN
2004-09-14
55.917612
0.457440
3.769546
10828900.0
95.615155
NaN
2004-09-15
56.173402
1.991071
1.302460
10713000.0
98.318500
NaN
2004-09-16
57.161452
1.605686
1.450952
9266300.0
97.736704
NaN
2004-09-17
58.926902
0.000000
2.683097
9472500.0
96.131750
NaN
2004-09-20
59.864797
1.876676
2.060710
10628700.0
92.635958
NaN
2004-09-21
59.102444
2.189409
-1.963394
7228700.0
84.937193
NaN
2004-09-22
59.373280
1.089711
0.791826
7581200.0
86.542147
NaN
2004-09-23
60.597057
1.498096
1.666106
8535600.0
84.611187
NaN
2004-09-24
60.100525
3.563381
-0.942382
9123400.0
84.189886
NaN
2004-09-27
59.313094
2.215457
-1.087320
7066100.0
91.793357
NaN
2004-09-28
63.626409
0.425666
4.713165
16929000.0
91.281778
NaN
2004-09-29
65.742942
3.005798
3.595985
30516400.0
92.721222
NaN
2004-09-30
65.000651
2.083333
-0.230179
13758000.0
86.539640
NaN
...
...
...
...
...
...
...
2017-05-20
NaN
NaN
NaN
NaN
NaN
982.534398
2017-05-21
NaN
NaN
NaN
NaN
NaN
989.076361
2017-05-22
NaN
NaN
NaN
NaN
NaN
996.740455
2017-05-23
NaN
NaN
NaN
NaN
NaN
1009.355255
2017-05-24
NaN
NaN
NaN
NaN
NaN
1012.064310
2017-05-25
NaN
NaN
NaN
NaN
NaN
1015.081268
2017-05-26
NaN
NaN
NaN
NaN
NaN
1005.217460
2017-05-27
NaN
NaN
NaN
NaN
NaN
1007.221248
2017-05-28
NaN
NaN
NaN
NaN
NaN
1014.786835
2017-05-29
NaN
NaN
NaN
NaN
NaN
1022.389336
2017-05-30
NaN
NaN
NaN
NaN
NaN
1014.845719
2017-05-31
NaN
NaN
NaN
NaN
NaN
1020.552668
2017-06-01
NaN
NaN
NaN
NaN
NaN
1023.561321
2017-06-02
NaN
NaN
NaN
NaN
NaN
985.988309
2017-06-03
NaN
NaN
NaN
NaN
NaN
978.760247
2017-06-04
NaN
NaN
NaN
NaN
NaN
988.502153
2017-06-05
NaN
NaN
NaN
NaN
NaN
985.910373
2017-06-06
NaN
NaN
NaN
NaN
NaN
977.815716
2017-06-07
NaN
NaN
NaN
NaN
NaN
977.125375
2017-06-08
NaN
NaN
NaN
NaN
NaN
993.346998
2017-06-09
NaN
NaN
NaN
NaN
NaN
986.937495
2017-06-10
NaN
NaN
NaN
NaN
NaN
997.136047
2017-06-11
NaN
NaN
NaN
NaN
NaN
995.495402
2017-06-12
NaN
NaN
NaN
NaN
NaN
1004.444788
2017-06-13
NaN
NaN
NaN
NaN
NaN
989.380174
2017-06-14
NaN
NaN
NaN
NaN
NaN
964.859416
2017-06-15
NaN
NaN
NaN
NaN
NaN
978.417170
2017-06-16
NaN
NaN
NaN
NaN
NaN
955.108924
2017-06-17
NaN
NaN
NaN
NaN
NaN
947.042182
2017-06-18
NaN
NaN
NaN
NaN
NaN
936.920580
3241 rows × 6 columns
In [ ]:
def testUtils():
#principle coordinate analysis
x = NeuralNetForward.x/np.amax(NeuralNetForward.x, axis = 0)
Utils.correlation(x)
Utils.covariance(x)
Content source: markus-antero/Stock
Similar notebooks: