In [38]:
import pandas as pd
import MySQLdb as mdb

%matplotlib inline

In [39]:
import MySQLdb as mdb

def executeSQL(statement):
    if len(statement) <= 0:
        return 'no statement is given'

    conn = mdb.connect('localhost', 'usstock', 'usstock', 'usstock')
    cur = conn.cursor()
    
    try:
        res = None
        res = cur.execute(statement)
        conn.commit()
    except mdb.Error as e:
        print 'MySQL exception has been caught.'

    finally: 
        cur.close()
        conn.close()
    return res

In [72]:
#returns list of [market, stock_sybol] by querying database.

MAX_LIMIT = 100000

#limit means the number of results to be returned. 0 means everything
def getSymbols(limit=10):
    if limit <= 0:
        limit = MAX_LIMIT
        
    conn = mdb.connect('localhost', 'usstock', 'usstock', 'usstock')
    cur = conn.cursor()
    sql = "SELECT market, symbol FROM symbols WHERE symbol REGEXP '^[A-Z]+$'"
    sql = sql + ' limit %d;'%(limit)
    #print sql
    #and market=\'NYSE\' 
    cur.execute(sql)
    stocks = []
    for i in range(cur.rowcount):
        row = cur.fetchone()
        market = row[0]
        stock = row[1]
        stocks.append([market, stock])
    # print cur.rowcount
    conn.close()
    return stocks

In [73]:
#returns db column names from the dataframe arg. names are a form of string without special characters.

import re

def getFields(df):
    res = []
    y = df.iloc[:,0:1] #extract 1st column
    #y.iloc[1:10].values
    for z in y.iloc[:].values:
        x=re.sub('[ /,\.\&\-()\']','',z[0]) #remove special chars
        if isinstance(x, unicode):
            x = x.encode('UTF-8')
            #print type(x)
        res.append(x)
    return res

In [74]:
#generate a statement for creating a table

def generateFinTableStatement(tablename, fields):
    statement = 'CREATE TABLE IF NOT EXISTS ' + tablename + ' (\n'
    is_first=True
    
    #mandatory fields
    statement = statement + 'Symbol VARCHAR(20) NOT NULL,\n'
    statement = statement + 'Date DATE NOT NULL,\n'
    statement = statement + 'Period VARCHAR(20) NOT NULL,\n'
    
    #financial fields
    for x in fields:
        if not is_first:
            statement = statement + ',\n'
        is_first=False
        tmp = '%s DOUBLE NULL DEFAULT NULL'%x
        #print statement
        #print type(statement)
        statement = statement + tmp
    statement = statement +  ');\n\n'
    #print statement
    return statement

In [ ]:


In [75]:
#make tables

data_to_extract = {1:'IncomeStatement', #index in the dataframe, the name of financial data
                    3:'BalanceSheet',
                    5:'CashFlow'}

def makeFinTables():
    stocks = getSymbols(1)
    stock = stocks[0]
    #print stocks
    url = "https://www.google.com/finance?q="+stock[0]+"%3A"+stock[1]+"&fstype=ii"
    #print url
    df = pd.read_html(url, encoding=False)
    #print len(df)
   
    for k in data_to_extract.keys():
        #print k
        x = df[k]
        y = x.iloc[:,0:]
        fields = getFields(y)
        statement = generateFinTableStatement(data_to_extract[k], fields)
        executeSQL(statement)
    
makeFinTables()


C:\Anaconda2\lib\site-packages\ipykernel\__main__.py:12: Warning: Table 'incomestatement' already exists
C:\Anaconda2\lib\site-packages\ipykernel\__main__.py:12: Warning: Table 'balancesheet' already exists
C:\Anaconda2\lib\site-packages\ipykernel\__main__.py:12: Warning: Table 'cashflow' already exists

In [76]:
def getTableName(data):
    y = data.iloc[:,0:1]
    val =  y.iloc[0:1].values[0][0]
    if isinstance(val, unicode):
        val = val.encode('UTF-8')
    
    if val.startswith('Revenue'):
        return 'IncomeStatement'
    elif val.startswith('Cash '):
        return 'BalanceSheet'
    elif val.startswith('Net '):
        return 'CashFlow'
    else:
        return None

In [ ]:
#get values from fin tables for each stock

import re

def getPeriodAndDate(column_value):
    if isinstance(column_value, unicode):
        column_value = column_value.encode('UTF-8')

    column_value = re.sub('[\n]','',column_value) #remove special chars

    size = len(column_value)
    period = column_value[:size-11]
    date = column_value[size-10:]
    return (period,date)    
    
def gatherFinValues(limit=10):
    if limit <= 0:
        limit = MAX_LIMIT
        
    stocks = getSymbols(limit) #get every symbol

    for stock in stocks[:limit]:
        market = stock[0]
        stockname = stock[1]
        url = "https://www.google.com/finance?q="+market+"%3A"+stockname+"&fstype=ii"
        print url

        df = None
        try:
            df = pd.read_html(url, encoding=False)
        except Exception:
            print "Exception has been caught while processing [%s:%s]. No financial data."%(market, stockname)
            
        if df is None:
            continue     #skip the rest if there is no financial data for this stock
            
        print 'num of data is %d'%(len(df))

        if len(df) <= 1:
            print "Exception has been caught while processing [%s:%s]. No financial data."%(market, stockname)
            continue    #skip the rest if there is no financial data for this stock
        
        #data_to_extract : this is declared already
        
        #columns_to_extract = [1,2,3,4]  #from 2015 to 2012

        for k in range(0, len(df)):
            data = df[k]
            columns_to_extract = range(1,len(data.columns.values)) # all data columns
            
            for c in columns_to_extract:
                #print getPeriodAndDate(data.columns.values[c])
                tablename = getTableName(data)
                (period, date) = getPeriodAndDate(data.columns.values[c])
                
                statement = 'INSERT INTO %s \n'%(tablename)
                statement = statement + 'VALUES (\n'
                statement = statement + '\'%s\',\n'%(stockname)
                statement = statement + '\'%s\',\n'%(date)
                statement = statement + '\'%s\'\n'%(period)

                y = data.iloc[:,c:c+1]
                for z in y.iloc[:].values:
                    try:
                        val = z[0].encode('UTF-8')
                    except AttributeError:
                        print z[0]
                        val = z[0]
                        
                    #print val
                    if val == '-':
                        statement = statement + ',NULL\n'
                    else:
                        statement = statement + ',%s\n'%(val)
                statement = statement + ');\n'                
                #print statement
                
                res = executeSQL(statement)
                #print res
            

gatherFinValues(0)


https://www.google.com/finance?q=NASDAQ%3AAAAP&fstype=ii
num of data is 6
MySQL exception has been caught.
MySQL exception has been caught.
https://www.google.com/finance?q=NASDAQ%3AAAL&fstype=ii
num of data is 6
MySQL exception has been caught.
MySQL exception has been caught.
https://www.google.com/finance?q=NASDAQ%3AAAME&fstype=ii
num of data is 6
MySQL exception has been caught.
MySQL exception has been caught.
https://www.google.com/finance?q=NASDAQ%3AAAOI&fstype=ii
num of data is 6
MySQL exception has been caught.
MySQL exception has been caught.
https://www.google.com/finance?q=NASDAQ%3AAAON&fstype=ii
num of data is 6
MySQL exception has been caught.
MySQL exception has been caught.
https://www.google.com/finance?q=NASDAQ%3AAAPC&fstype=ii
num of data is 6
MySQL exception has been caught.
MySQL exception has been caught.
https://www.google.com/finance?q=NASDAQ%3AAAPL&fstype=ii
num of data is 6
MySQL exception has been caught.
MySQL exception has been caught.
MySQL exception has been caught.
https://www.google.com/finance?q=NASDAQ%3AAAWW&fstype=ii
num of data is 6
MySQL exception has been caught.
MySQL exception has been caught.
https://www.google.com/finance?q=NASDAQ%3AAAXJ&fstype=ii
num of data is 1
Exception has been caught while processing [NASDAQ:AAXJ]. No financial data.
https://www.google.com/finance?q=NASDAQ%3AABAC&fstype=ii
num of data is 6
MySQL exception has been caught.
MySQL exception has been caught.
MySQL exception has been caught.
https://www.google.com/finance?q=NASDAQ%3AABAX&fstype=ii
num of data is 6
MySQL exception has been caught.
MySQL exception has been caught.
https://www.google.com/finance?q=NASDAQ%3AABCB&fstype=ii
num of data is 6
MySQL exception has been caught.
MySQL exception has been caught.
https://www.google.com/finance?q=NASDAQ%3AABCD&fstype=ii
num of data is 6
MySQL exception has been caught.
MySQL exception has been caught.
https://www.google.com/finance?q=NASDAQ%3AABCO&fstype=ii
num of data is 6
MySQL exception has been caught.
MySQL exception has been caught.
https://www.google.com/finance?q=NASDAQ%3AABDC&fstype=ii
Exception has been caught while processing [NASDAQ:ABDC]. No financial data.
https://www.google.com/finance?q=NASDAQ%3AABEO&fstype=ii
num of data is 6
MySQL exception has been caught.
MySQL exception has been caught.
https://www.google.com/finance?q=NASDAQ%3AABEOW&fstype=ii
Exception has been caught while processing [NASDAQ:ABEOW]. No financial data.
https://www.google.com/finance?q=NASDAQ%3AABIL&fstype=ii
num of data is 6
MySQL exception has been caught.
MySQL exception has been caught.
https://www.google.com/finance?q=NASDAQ%3AABIO&fstype=ii
num of data is 6
MySQL exception has been caught.
MySQL exception has been caught.
https://www.google.com/finance?q=NASDAQ%3AABMD&fstype=ii
num of data is 6
MySQL exception has been caught.
MySQL exception has been caught.
https://www.google.com/finance?q=NASDAQ%3AABTL&fstype=ii
num of data is 6
MySQL exception has been caught.
MySQL exception has been caught.
https://www.google.com/finance?q=NASDAQ%3AABTX&fstype=ii
num of data is 6
MySQL exception has been caught.
MySQL exception has been caught.
https://www.google.com/finance?q=NASDAQ%3AABUS&fstype=ii
num of data is 6
MySQL exception has been caught.
MySQL exception has been caught.
https://www.google.com/finance?q=NASDAQ%3AABY&fstype=ii
num of data is 6
MySQL exception has been caught.
MySQL exception has been caught.
https://www.google.com/finance?q=NASDAQ%3AACAD&fstype=ii
num of data is 6
MySQL exception has been caught.
MySQL exception has been caught.
https://www.google.com/finance?q=NASDAQ%3AACAS&fstype=ii
num of data is 6
MySQL exception has been caught.
MySQL exception has been caught.
https://www.google.com/finance?q=NASDAQ%3AACAT&fstype=ii
num of data is 6
MySQL exception has been caught.
MySQL exception has been caught.
https://www.google.com/finance?q=NASDAQ%3AACBI&fstype=ii
num of data is 6
MySQL exception has been caught.
MySQL exception has been caught.
https://www.google.com/finance?q=NASDAQ%3AACET&fstype=ii
num of data is 6
MySQL exception has been caught.
MySQL exception has been caught.
https://www.google.com/finance?q=NASDAQ%3AACFC&fstype=ii
num of data is 6
MySQL exception has been caught.
MySQL exception has been caught.
https://www.google.com/finance?q=NASDAQ%3AACGL&fstype=ii
num of data is 6
MySQL exception has been caught.
MySQL exception has been caught.
https://www.google.com/finance?q=NASDAQ%3AACGLP&fstype=ii
Exception has been caught while processing [NASDAQ:ACGLP]. No financial data.
https://www.google.com/finance?q=NASDAQ%3AACHC&fstype=ii
num of data is 6
MySQL exception has been caught.
MySQL exception has been caught.
https://www.google.com/finance?q=NASDAQ%3AACHN&fstype=ii
num of data is 6
MySQL exception has been caught.
MySQL exception has been caught.
https://www.google.com/finance?q=NASDAQ%3AACIA&fstype=ii
num of data is 6
MySQL exception has been caught.
MySQL exception has been caught.
https://www.google.com/finance?q=NASDAQ%3AACIU&fstype=ii
num of data is 6
MySQL exception has been caught.
MySQL exception has been caught.
https://www.google.com/finance?q=NASDAQ%3AACIW&fstype=ii
num of data is 6
MySQL exception has been caught.
MySQL exception has been caught.
https://www.google.com/finance?q=NASDAQ%3AACLS&fstype=ii
num of data is 6
MySQL exception has been caught.
MySQL exception has been caught.
https://www.google.com/finance?q=NASDAQ%3AACNB&fstype=ii
num of data is 6
MySQL exception has been caught.
MySQL exception has been caught.
https://www.google.com/finance?q=NASDAQ%3AACOR&fstype=ii
num of data is 6
MySQL exception has been caught.
MySQL exception has been caught.
https://www.google.com/finance?q=NASDAQ%3AACPW&fstype=ii
num of data is 6
MySQL exception has been caught.
MySQL exception has been caught.
https://www.google.com/finance?q=NASDAQ%3AACRS&fstype=ii
num of data is 6
MySQL exception has been caught.
MySQL exception has been caught.
https://www.google.com/finance?q=NASDAQ%3AACRX&fstype=ii
num of data is 6
MySQL exception has been caught.
MySQL exception has been caught.
https://www.google.com/finance?q=NASDAQ%3AACSF&fstype=ii