In [2]:
from data.finance.LoadDataYahoo import LoadDataFromSQL, LoadDataFromYahoo
from data.finance.addins.main import (
    Utils,
    testVisualization )

from data.finance.addins.mlToFinance1 import PredictSVMOnQuandl, LinkQuandl
from data.finance.addins.neuralNetTestModified import NNtrainer, NeuralNetForward
from data.finance.addins.reloadSandP import LoadYahooFinance, CompanyHistory

import numpy as np

In [2]:
'''
Test and upload of required data-sources (MSSQL) and libraries         
'''
#from data.finance.addins.reloadSandP import main 
pq = PredictSVMOnQuandl()
pq.populateCompanyData(readClassifier = True, runList = False)
pq.SupportVectorMachineAsAutomated()


loading stocks to shared dataframe, recordset based on query:
  SELECT distinct [GICS_Sector] FROM [finance].[dbo].[SandP500Index] 
loading stocks to shared dataframe, recordset based on query:
  SELECT [symbol], [dateAdded], [CIK], [security] FROM [dbo].[SandP500Index] 

statistics loaded

confidence:  -0.0468546548935
Out[2]:
(SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='auto',
   kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False),
 -0.046854654893454084)

In [3]:
'''
The two plots, visualized at bottom part, measure  
- the similarity of specific companies using a heatmap and 
- volatility by measuring (rolling) mean as red and (rolling) standard deviation as black signals.         
'''

#from data.finance.addins.main import main as financeAddinsMain
testVisualization ()


loading stocks to shared dataframe
Stocks loaded:  0
Stocks loaded:  10
Stocks loaded:  20
Stocks loaded:  30
Stocks loaded:  40
Stocks loaded:  50
  symbol       Date       Open       High        Low      Close    Volume  \
0    AAP 2001-11-29  40.159999  43.399999  40.079999  41.640000  371100.0   
1    AAP 2001-11-30  41.640000  42.799999  41.640000  42.799999  165300.0   
2    AAP 2001-12-03  42.700001  42.700001  41.350001  41.350001  127500.0   
3    AAP 2001-12-04  41.350001  41.350001  39.700001  39.700001   95400.0   
4    AAP 2001-12-05  40.300000  44.350001  40.300000  44.000001  598200.0   

   Adj Close      100ma  
0  13.273657  13.273657  
1  13.643432  13.458545  
2  13.181214  13.366101  
3  12.655240  13.188386  
4  14.025959  13.355900  

Column labels: Index(['AAP', 'BBBY', 'BBY', 'BWA', 'CBS', 'CCL', 'CMCSA', 'COH', 'DG', 'DHI',
       'DIS', 'DLPH', 'DRI', 'EXPE', 'F', 'FL', 'FOX', 'FOXA', 'GM', 'GPC',
       'GPS', 'GRMN', 'GT', 'HAR', 'HAS', 'HBI', 'HD', 'HOG', 'IPG', 'JWN',
       'KSS', 'LEN', 'LOW', 'M', 'MAR', 'MAT', 'NKE', 'NWL', 'OMC', 'PHM',
       'PVH', 'RCL', 'RL', 'ROST', 'SBUX', 'SIG', 'SNA', 'SNI', 'SWK', 'TGNA',
       'TGT', 'TIF', 'TJX', 'TSCO', 'TWX', 'ULTA', 'VFC', 'VIAB', 'WHR',
       'WYN'],
      dtype='object', name='symbol') 

Row labels: Index(['AAP', 'BBBY', 'BBY', 'BWA', 'CBS', 'CCL', 'CMCSA', 'COH', 'DG', 'DHI',
       'DIS', 'DLPH', 'DRI', 'EXPE', 'F', 'FL', 'FOX', 'FOXA', 'GM', 'GPC',
       'GPS', 'GRMN', 'GT', 'HAR', 'HAS', 'HBI', 'HD', 'HOG', 'IPG', 'JWN',
       'KSS', 'LEN', 'LOW', 'M', 'MAR', 'MAT', 'NKE', 'NWL', 'OMC', 'PHM',
       'PVH', 'RCL', 'RL', 'ROST', 'SBUX', 'SIG', 'SNA', 'SNI', 'SWK', 'TGNA',
       'TGT', 'TIF', 'TJX', 'TSCO', 'TWX', 'ULTA', 'VFC', 'VIAB', 'WHR',
       'WYN'],
      dtype='object', name='symbol')
loading stocks to shared dataframe
Stocks loaded:  0
Stocks loaded:  10
Stocks loaded:  20
Stocks loaded:  30
  symbol       Date       Open       High      Low      Close     Volume  \
0    ADM 2000-01-03  11.999999  12.062499  11.8750  11.999999   984600.0   
1    ADM 2000-01-04  11.812500  12.187499  11.8125  11.875000  1088000.0   
2    ADM 2000-01-05  11.875000  11.875000  11.6250  11.687500  1087900.0   
3    ADM 2000-01-06  11.625000  11.875000  11.5625  11.750000   899900.0   
4    ADM 2000-01-07  11.875000  11.999999  11.8125  11.937500  1186200.0   

   Adj Close     100ma  
0   7.855350  7.855350  
1   7.773524  7.814437  
2   7.650784  7.759886  
3   7.691697  7.742839  
4   7.814437  7.757158  

Column labels: Index(['ADM', 'BF-B', 'CAG', 'CHD', 'CLX', 'COST', 'COTY', 'CPB', 'CVS', 'DPS',
       'EL', 'GIS', 'HRL', 'HSY', 'K', 'KHC', 'KO', 'KR', 'MDLZ', 'MKC', 'MO',
       'PEP', 'PG', 'RAI', 'SJM', 'STZ', 'SYY', 'TAP', 'TSN', 'WBA', 'WFM',
       'WMT'],
      dtype='object', name='symbol') 

Row labels: Index(['ADM', 'BF-B', 'CAG', 'CHD', 'CLX', 'COST', 'COTY', 'CPB', 'CVS', 'DPS',
       'EL', 'GIS', 'HRL', 'HSY', 'K', 'KHC', 'KO', 'KR', 'MDLZ', 'MKC', 'MO',
       'PEP', 'PG', 'RAI', 'SJM', 'STZ', 'SYY', 'TAP', 'TSN', 'WBA', 'WFM',
       'WMT'],
      dtype='object', name='symbol')
loading stocks to shared dataframe, recordset based on query:
  SELECT [symbol], [dateAdded], [CIK], [security] FROM [dbo].[SandP500Index] 
Stocks loaded:  0
Stocks loaded:  100
Stocks loaded:  200
Stocks loaded:  300
Stocks loaded:  400
Stocks loaded:  500

statistics loaded
C:\Users\Markus.Walden\Documents\arcgis\data\finance\addins\main.py:71: FutureWarning: pd.rolling_mean is deprecated for DataFrame and will be removed in a future version, replace with 
	DataFrame.rolling(window=12,center=False).mean()
  rolmean = pd.rolling_mean(i, window=12)
C:\Users\Markus.Walden\Documents\arcgis\data\finance\addins\main.py:72: FutureWarning: pd.rolling_std is deprecated for DataFrame and will be removed in a future version, replace with 
	DataFrame.rolling(window=12,center=False).std()
  rolstd = pd.rolling_std(i, window=12)
C:\python\New folder\lib\site-packages\matplotlib\axes\_axes.py:531: UserWarning: No labelled objects found. Use label='...' kwarg on individual plots.
  warnings.warn("No labelled objects found. "
0

In [2]:
'''
The prediction and confidence of Alphabet (Google) stock performance based on the adjusted close field 
using model based on linear regression as kernel.

The prediction can also be based around nonlinear or polynomial kernels 
'''

# machine learning to finance - predicting performance
lq = LinkQuandl()
df = lq.createQuandlDataFrame(ticker = 'GOOGL', database = 'WIKI')
X, X_lately, y = lq.produceForecast(df = df)
forecast_set, clf, accurary = lq.forecastLinear(X = X, X_lately = X_lately, y = y)
lq.visualizeForecast(clf = clf, accuracy = accurary, forecast_set = forecast_set, df = df)


            Adj. Close    HL_PCT  PCT_change  Adj. Volume      label
Date                                                                
2004-08-19   50.322842  3.712563    0.324968   44659000.0  68.752232
2004-08-20   54.322689  0.710922    7.227007   22834300.0  69.639972
2004-08-23   54.869377  3.729433   -1.227880   18256100.0  69.078238
2004-08-24   52.597363  6.417469   -5.726357   15247300.0  67.839414
2004-08-25   53.164113  1.886792    1.183658    9188600.0  68.912727
2004-08-26   54.122070  0.037068    2.820391    7094800.0  70.668146
2004-08-27   53.239345  2.326896   -1.803885    6211700.0  71.219849
2004-08-30   51.162935  3.411430   -3.106003    5196700.0  72.278116
2004-08-31   51.343492  1.308977    0.048866    4917800.0  74.810934
2004-09-01   50.280210  2.713217   -2.385589    9138200.0  74.199045
Linear
C:\python\New folder\lib\site-packages\sklearn\preprocessing\data.py:167: UserWarning: Numerical issues were encountered when centering the data and might not be solved. Dataset may contain too large values. You may need to prescale your features.
  warnings.warn("Numerical issues were encountered "
forecast: 
 [  959.29338243   968.16614648   972.81379578   982.53439819   989.07636073
   996.7404547   1009.35525457  1012.06430964  1015.08126821  1005.21745978
  1007.22124831  1014.7868354   1022.38933579  1014.84571938  1020.55266751
  1023.56132083   985.98830891   978.76024708   988.50215338   985.91037292
   977.81571562   977.12537519   993.34699829   986.93749547   997.1360472
   995.49540203  1004.44478835   989.38017353   964.8594157    978.41716956
   955.10892384   947.04218191   936.92058002]

confidence:  0.974404988515
Out[2]:
Adj. Close HL_PCT PCT_change Adj. Volume label Forecast
2004-08-19 50.322842 3.712563 0.324968 44659000.0 68.752232 NaN
2004-08-20 54.322689 0.710922 7.227007 22834300.0 69.639972 NaN
2004-08-23 54.869377 3.729433 -1.227880 18256100.0 69.078238 NaN
2004-08-24 52.597363 6.417469 -5.726357 15247300.0 67.839414 NaN
2004-08-25 53.164113 1.886792 1.183658 9188600.0 68.912727 NaN
2004-08-26 54.122070 0.037068 2.820391 7094800.0 70.668146 NaN
2004-08-27 53.239345 2.326896 -1.803885 6211700.0 71.219849 NaN
2004-08-30 51.162935 3.411430 -3.106003 5196700.0 72.278116 NaN
2004-08-31 51.343492 1.308977 0.048866 4917800.0 74.810934 NaN
2004-09-01 50.280210 2.713217 -2.385589 9138200.0 74.199045 NaN
2004-09-02 50.912161 0.847207 2.442224 15118600.0 70.462511 NaN
2004-09-03 50.159839 1.729827 -0.931154 5152400.0 74.921275 NaN
2004-09-07 50.947269 0.413467 0.564301 5847500.0 86.481962 NaN
2004-09-08 51.308384 0.713587 1.548541 4985600.0 93.990139 NaN
2004-09-09 51.313400 0.390969 -0.185366 4061700.0 91.181468 NaN
2004-09-10 52.828075 1.167758 3.804080 8698800.0 93.272925 NaN
2004-09-13 53.916435 0.846512 0.815905 7844100.0 96.949273 NaN
2004-09-14 55.917612 0.457440 3.769546 10828900.0 95.615155 NaN
2004-09-15 56.173402 1.991071 1.302460 10713000.0 98.318500 NaN
2004-09-16 57.161452 1.605686 1.450952 9266300.0 97.736704 NaN
2004-09-17 58.926902 0.000000 2.683097 9472500.0 96.131750 NaN
2004-09-20 59.864797 1.876676 2.060710 10628700.0 92.635958 NaN
2004-09-21 59.102444 2.189409 -1.963394 7228700.0 84.937193 NaN
2004-09-22 59.373280 1.089711 0.791826 7581200.0 86.542147 NaN
2004-09-23 60.597057 1.498096 1.666106 8535600.0 84.611187 NaN
2004-09-24 60.100525 3.563381 -0.942382 9123400.0 84.189886 NaN
2004-09-27 59.313094 2.215457 -1.087320 7066100.0 91.793357 NaN
2004-09-28 63.626409 0.425666 4.713165 16929000.0 91.281778 NaN
2004-09-29 65.742942 3.005798 3.595985 30516400.0 92.721222 NaN
2004-09-30 65.000651 2.083333 -0.230179 13758000.0 86.539640 NaN
... ... ... ... ... ... ...
2017-05-20 NaN NaN NaN NaN NaN 982.534398
2017-05-21 NaN NaN NaN NaN NaN 989.076361
2017-05-22 NaN NaN NaN NaN NaN 996.740455
2017-05-23 NaN NaN NaN NaN NaN 1009.355255
2017-05-24 NaN NaN NaN NaN NaN 1012.064310
2017-05-25 NaN NaN NaN NaN NaN 1015.081268
2017-05-26 NaN NaN NaN NaN NaN 1005.217460
2017-05-27 NaN NaN NaN NaN NaN 1007.221248
2017-05-28 NaN NaN NaN NaN NaN 1014.786835
2017-05-29 NaN NaN NaN NaN NaN 1022.389336
2017-05-30 NaN NaN NaN NaN NaN 1014.845719
2017-05-31 NaN NaN NaN NaN NaN 1020.552668
2017-06-01 NaN NaN NaN NaN NaN 1023.561321
2017-06-02 NaN NaN NaN NaN NaN 985.988309
2017-06-03 NaN NaN NaN NaN NaN 978.760247
2017-06-04 NaN NaN NaN NaN NaN 988.502153
2017-06-05 NaN NaN NaN NaN NaN 985.910373
2017-06-06 NaN NaN NaN NaN NaN 977.815716
2017-06-07 NaN NaN NaN NaN NaN 977.125375
2017-06-08 NaN NaN NaN NaN NaN 993.346998
2017-06-09 NaN NaN NaN NaN NaN 986.937495
2017-06-10 NaN NaN NaN NaN NaN 997.136047
2017-06-11 NaN NaN NaN NaN NaN 995.495402
2017-06-12 NaN NaN NaN NaN NaN 1004.444788
2017-06-13 NaN NaN NaN NaN NaN 989.380174
2017-06-14 NaN NaN NaN NaN NaN 964.859416
2017-06-15 NaN NaN NaN NaN NaN 978.417170
2017-06-16 NaN NaN NaN NaN NaN 955.108924
2017-06-17 NaN NaN NaN NaN NaN 947.042182
2017-06-18 NaN NaN NaN NaN NaN 936.920580

3241 rows × 6 columns


In [ ]:
def testUtils():    
#principle coordinate analysis
    x = NeuralNetForward.x/np.amax(NeuralNetForward.x, axis = 0)
    Utils.correlation(x)
    Utils.covariance(x)