CSV2SQL-checkpoint



In [118]:
import pandas as pd
import os
import time
from datetime import datetime
from matplotlib.dates import date2num
import sqlite3

In [119]:
# Create NEW SQL database
connect = sqlite3.connect('Yahoo_sqlite.db')
cursor = connect.cursor()


PATH = '/Users/insuyu/GitHub/historical.data/yahoo.csv/'
#PATH = '/Users/insuyu/GitHub/python/11. Sqlite3/'


# For All Files in PATH
nFiles = 0
for filename in os.listdir(PATH):
    # Find only *.csv files
    if filename.endswith(".csv"):
        print ("[%d] %s" % (nFiles, filename))
        # Load CVS to Database
        DB = pd.read_csv(PATH+filename)
    
        # Generate Moving Average Data
        DB['MA20'] = pd.stats.moments.rolling_mean(DB['Adj Close'], 20)
        DB['MA30'] = pd.stats.moments.rolling_mean(DB['Adj Close'], 30)
        #DB['MA30'] = pd.stats.moments.rolling_mean(DB['Adj Close'], 30)
        #DB['MA60'] = pd.stats.moments.rolling_mean(DB['Adj Close'], 60)
        #DB['MA120'] = pd.stats.moments.rolling_mean(DB['Adj Close'], 120)
        DB['MA200']     = pd.stats.moments.rolling_mean(DB['Adj Close'], 200)
        DB['MA240']     = pd.stats.moments.rolling_mean(DB['Adj Close'], 240)
        DB['DateStamp'] = [ date2num(datetime.strptime(date,"%Y-%m-%d")) for date in DB['Date'] ]
        
        ###########################################################################
        Code = filename[:-4] # Trading Code
        
        # Create a New Table for Code
        # DateStamp(REAL) | Date(TEXT) | Open(REAL) | High(REAL) | Low(REAL) 
        #    | Close(REAL) | Volume(REAL) | AdjClose(REAL) | MA20(REAL) | MA30(REAL) | MA200(REAL) | MA240 (REAL)
        command = "CREATE TABLE KS%s(DateStamp INT, Date TEXT, Open REAL, High REAL, Low REAL,  \
                        Close REAL, Volume REAL, AdjClose REAL, MA20 REAL, MA30 REAL, MA200 REAL, MA240 REAL)" % Code
        print(command)
        cursor.execute(command)
        
        command = "INSERT INTO KS%s(DateStamp, Date, Open, High, Low, Close, Volume, AdjClose, MA20, MA30, MA200, MA240) \
        VALUES (?,?,?,?,?,?,?,?,?,?,?,?)" % Code
        print(command)
        
        for i in range(len(DB)):
            DateStamp = DB['DateStamp'][i]
            Date = DB['Date'][i]
            Open = DB['Open'][i]
            High = DB['High'][i]
            Low = DB['Low'][i] 
            Close = DB['Close'][i]
            Volume = DB['Volume'][i]
            AdjClose = DB['Adj Close'][i]
            MA20 = DB['MA20'][i]
            MA30 = DB['MA30'][i]
            MA200 = DB['MA200'][i]
            MA240 = DB['MA240'][i]
            
            cursor.execute(command,(DateStamp, Date, Open, High, Low, Close, Volume, AdjClose, MA20, MA30, MA200, MA240) )
            connect.commit()
        nFiles += 1


[0] 000020.csv
CREATE TABLE KS000020(DateStamp INT, Date TEXT, Open REAL, High REAL, Low REAL,                          Close REAL, Volume REAL, AdjClose REAL, MA20 REAL, MA30 REAL, MA200 REAL, MA240 REAL)
INSERT INTO KS000020(DateStamp, Date, Open, High, Low, Close, Volume, AdjClose, MA20, MA30, MA200, MA240)         VALUES (?,?,?,?,?,?,?,?,?,?,?,?)
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-119-82360804f457> in <module>()
     47             Volume = DB['Volume'][i]
     48             AdjClose = DB['Adj Close'][i]
---> 49             MA20 = DB['MA20'][i]
     50             MA30 = DB['MA30'][i]
     51             MA200 = DB['MA200'][i]

/Users/insuyu/anaconda/lib/python3.4/site-packages/pandas/core/frame.py in __getitem__(self, key)
   1795             return self._getitem_multilevel(key)
   1796         else:
-> 1797             return self._getitem_column(key)
   1798 
   1799     def _getitem_column(self, key):

/Users/insuyu/anaconda/lib/python3.4/site-packages/pandas/core/frame.py in _getitem_column(self, key)
   1802         # get column
   1803         if self.columns.is_unique:
-> 1804             return self._get_item_cache(key)
   1805 
   1806         # duplicate columns & possible reduce dimensionaility

/Users/insuyu/anaconda/lib/python3.4/site-packages/pandas/core/generic.py in _get_item_cache(self, item)
   1082         res = cache.get(item)
   1083         if res is None:
-> 1084             values = self._data.get(item)
   1085             res = self._box_item_values(item, values)
   1086             cache[item] = res

/Users/insuyu/anaconda/lib/python3.4/site-packages/pandas/core/internals.py in get(self, item, fastpath)
   2849 
   2850             if not isnull(item):
-> 2851                 loc = self.items.get_loc(item)
   2852             else:
   2853                 indexer = np.arange(len(self.items))[isnull(self.items)]

/Users/insuyu/anaconda/lib/python3.4/site-packages/pandas/core/index.py in get_loc(self, key, method)
   1570         """
   1571         if method is None:
-> 1572             return self._engine.get_loc(_values_from_object(key))
   1573 
   1574         indexer = self.get_indexer([key], method=method)

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:3824)()

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:3704)()

pandas/hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:12280)()

pandas/hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:12231)()

KeyError: 'MA20'

Convert a string "2015-8-1" to 73468.0


In [110]:
# conversion "%Y-%m-%d" string to date number
DB = pd.read_csv(PATH+'003490.csv')
date = DB['Date'][20]
date2num(datetime.strptime(date,"%Y-%m-%d"))


Out[110]:
734168.0

Add DateStamp to DB


In [103]:
DB['MA20'] = pd.stats.moments.rolling_mean(CSV_TBL['Adj Close'], 20)
DB['DateStamp'] = [ date2num(datetime.strptime(date,"%Y-%m-%d")) for date in DB['Date'] ]
DB


Out[103]:
Date Open High Low Close Volume Adj Close MA20 MA200 DateStamp
0 2011-01-03 69900.04 69900.04 68500.01 68800.02 653400 85423.1 NaN NaN 734140
1 2011-01-04 69000.00 70699.97 68800.02 70699.97 1077700 87782.1 NaN NaN 734141
2 2011-01-05 70400.04 70499.98 69399.97 69999.99 720200 86913.0 NaN NaN 734142
3 2011-01-06 69999.99 70199.97 69100.03 69100.03 569400 85795.6 NaN NaN 734143
4 2011-01-07 68800.02 73000.03 68800.02 73000.03 1926900 90637.9 NaN NaN 734144
5 2011-01-10 72599.99 74200.00 72299.98 73200.02 1583900 90886.2 NaN NaN 734147
6 2011-01-11 73299.97 74300.04 72799.97 73599.98 929200 91382.8 NaN NaN 734148
7 2011-01-12 73599.98 73700.01 72499.96 73299.97 619900 91010.3 NaN NaN 734149
8 2011-01-13 73299.97 74200.00 72900.01 73599.98 752100 91382.8 NaN NaN 734150
9 2011-01-14 74000.02 75199.99 71199.96 71199.96 1714600 88402.9 NaN NaN 734151
10 2011-01-17 71799.98 72299.98 70799.99 71199.96 644300 88402.9 NaN NaN 734154
11 2011-01-18 70999.98 72299.98 70799.99 71100.01 509600 88278.8 NaN NaN 734155
12 2011-01-19 71199.96 73400.00 70900.03 73099.99 776700 90762.0 NaN NaN 734156
13 2011-01-20 73299.97 76000.00 72599.99 75699.99 1905700 93990.2 NaN NaN 734157
14 2011-01-21 75800.02 76399.96 74200.00 75000.01 1219500 93121.1 NaN NaN 734158
15 2011-01-24 75600.04 78499.97 75500.01 78100.01 1707600 96970.1 NaN NaN 734161
16 2011-01-25 78700.04 78799.99 76699.97 77499.98 1019100 96225.1 NaN NaN 734162
17 2011-01-26 78199.96 78900.01 76399.96 77100.02 596000 95728.5 NaN NaN 734163
18 2011-01-27 77699.96 78400.02 75399.98 77999.98 1573500 96845.9 NaN NaN 734164
19 2011-01-28 77400.03 78100.01 76000.00 76000.00 712500 94362.7 91215.200 NaN 734165
20 2011-01-31 74300.04 74700.00 72200.03 72700.02 981400 90265.4 91457.315 NaN 734168
21 2011-02-01 73000.03 73400.00 71100.01 71900.02 785100 89272.1 91531.815 NaN 734169
22 2011-02-07 73099.99 73899.99 72499.96 72599.99 421200 90141.2 91693.225 NaN 734175
23 2011-02-08 73799.96 74700.00 73400.00 73400.00 638900 91134.5 91960.170 NaN 734176
24 2011-02-09 73599.98 75000.01 73599.98 74399.99 694400 92376.1 92047.080 NaN 734177
25 2011-02-10 74300.04 74800.03 72700.02 72900.01 433800 90513.7 92028.455 NaN 734178
26 2011-02-11 73299.97 74899.98 72100.00 72100.00 431500 89520.4 91935.335 NaN 734179
27 2011-02-14 72900.01 72900.01 70100.02 71400.02 624600 88651.3 91817.385 NaN 734182
28 2011-02-15 71499.97 72200.03 70799.99 71299.99 444400 88527.1 91674.600 NaN 734183
29 2011-02-16 70999.98 71900.02 70999.98 71299.99 329300 88527.1 91680.810 NaN 734184
... ... ... ... ... ... ... ... ... ... ...
1151 2015-06-19 36000.00 38150.00 35800.00 37850.00 1011600 37850.0 38747.500 43534.50 735768
1152 2015-06-22 38750.00 39700.00 38750.00 39350.00 835800 39350.0 38517.500 43538.75 735771
1153 2015-06-23 39350.00 39750.00 38650.00 39700.00 494900 39700.0 38397.500 43546.00 735772
1154 2015-06-24 39750.00 40450.00 39300.00 39950.00 418900 39950.0 38332.500 43551.00 735773
1155 2015-06-25 39650.00 40250.00 39300.00 39800.00 278700 39800.0 38270.000 43556.00 735774
1156 2015-06-26 39800.00 39800.00 38800.00 39100.00 296900 39100.0 38202.500 43557.00 735775
1157 2015-06-29 38400.00 38750.00 37750.00 37900.00 380500 37900.0 38155.000 43557.50 735778
1158 2015-06-30 37750.00 40600.00 37700.00 40500.00 632700 40500.0 38250.000 43573.25 735779
1159 2015-07-01 40600.00 42300.00 39650.00 41850.00 807000 41850.0 38407.500 43598.25 735780
1160 2015-07-02 42500.00 43850.00 41900.00 42900.00 1340700 42900.0 38622.500 43631.25 735781
1161 2015-07-03 42950.00 43450.00 41450.00 41500.00 583100 41500.0 38747.500 43664.00 735782
1162 2015-07-06 40800.00 42600.00 40700.00 41200.00 587800 41200.0 38862.500 43691.75 735785
1163 2015-07-07 42600.00 44500.00 42050.00 43550.00 1506500 43550.0 39052.500 43729.50 735786
1164 2015-07-08 43550.00 43900.00 41900.00 42000.00 601500 42000.0 39250.000 43765.00 735787
1165 2015-07-09 41000.00 41500.00 38950.00 39700.00 1608600 39700.0 39330.000 43787.75 735788
1166 2015-07-10 40100.00 40150.00 39500.00 39750.00 512700 39750.0 39442.500 43810.75 735789
1167 2015-07-13 40050.00 40550.00 39750.00 40450.00 452200 40450.0 39705.000 43837.75 735792
1168 2015-07-14 40700.00 40800.00 39400.00 39450.00 657700 39450.0 39890.000 43859.50 735793
1169 2015-07-15 40450.00 40500.00 39400.00 39450.00 886900 39450.0 40082.500 43880.75 735794
1170 2015-07-16 39000.00 40300.00 38700.00 39450.00 9033400 39450.0 40270.000 43902.00 735795
1171 2015-07-17 39550.00 40000.00 38250.00 39000.00 1149500 39000.0 40327.500 43919.50 735796
1172 2015-07-20 39050.00 39400.00 38100.00 38450.00 828900 38450.0 40282.500 43932.75 735799
1173 2015-07-21 38500.00 38900.00 37500.00 37700.00 1122800 37700.0 40182.500 43944.25 735800
1174 2015-07-22 37900.00 38200.00 37250.00 37500.00 651800 37500.0 40060.000 43945.75 735801
1175 2015-07-23 38100.00 39350.00 37800.00 39350.00 1077900 39350.0 40037.500 43955.50 735802
1176 2015-07-24 39300.00 39750.00 38600.00 39650.00 1072900 39650.0 40065.000 43970.75 735803
1177 2015-07-27 39650.00 39650.00 38300.00 38600.00 611700 38600.0 40100.000 43982.25 735806
1178 2015-07-28 38150.00 38400.00 37300.00 37450.00 761200 37450.0 39947.500 43985.25 735807
1179 2015-07-29 37700.00 38000.00 37400.00 37600.00 395900 37600.0 39735.000 43988.25 735808
1180 2015-07-30 37850.00 37850.00 35550.00 35600.00 1235300 35600.0 39370.000 43979.75 735809

1181 rows × 10 columns


In [22]:



Out[22]:
730123.0