In [9]:
import pandas as pd
import sqlite3
import xlrd as xl
# Set up a SQLite database using an excel file: ## http://nbviewer.ipython.org/github/jvns/pandas-cookbook/blob/master/cookbook/Chapter%209%20-%20Loading%20data%20from%20SQL%20databases.ipynb # Select a sample file. # Using an excel file purely because pandas can select different sheets from .xlsx files and this will be useful when creating multiple tables within a single SQLite database path = ('car_sales.xlsx') # if this .sqlite db doesn't already exists, this will create it # if the .sqlite db *does* already exist, this establishes the desired connection con = sqlite3.connect("sqlite_cars.sqlite") # this pulls out the names of the sheets in the workbook. We'll use these to name the different tables in the SQLite database that we'll create table_names = xl.open_workbook(path).sheet_names() # this loop makes it possible to use any other .xls sheet, since the sheet names aren't called out specifically for table in table_names: df = pd.read_excel(path, sheetname='{}'.format(table)) con.execute("DROP TABLE IF EXISTS {}".format(table)) pd.io.sql.to_sql(df, "{}".format(table), con, index=False) # now the spreadsheets are in tables in a mini database! # Finally, a little function to make it easy to run queries on this mini-database def run(query): results = pd.read_sql("{}".format(query), con) return results

In [2]:
# Set up a SQLite database using several .csv files:

# if this .sqlite db doesn't already exists, this will create it
# if the .sqlite db *does* already exist, this establishes the desired connection
con = sqlite3.connect("sqlite_cars.sqlite")

# create dataframes from each .csv file:
sales_table = pd.read_csv('https://raw.githubusercontent.com/DaveBackus/Data_Bootcamp/master/Code/SQL/sales_table.csv')
car_table = pd.read_csv('https://raw.githubusercontent.com/DaveBackus/Data_Bootcamp/master/Code/SQL/car_table.csv')
salesman_table = pd.read_csv('https://raw.githubusercontent.com/DaveBackus/Data_Bootcamp/master/Code/SQL/salesman_table.csv')
cust_table = pd.read_csv('https://raw.githubusercontent.com/DaveBackus/Data_Bootcamp/master/Code/SQL/cust_table.csv')
# make a list of the tables (dataframes) and table names:
tables = [sales_table, car_table, salesman_table, cust_table]
table_names = ['sales_table', 'car_table', 'salesman_table', 'cust_table']
# drop each table name if it already exists to avoid error if you rerun this bit of code
# then add it back (or for the first time, if the table didn't already exist)
for i in range(len(tables)):
    table_name = table_names[i]
    table = tables[i]
    con.execute("DROP TABLE IF EXISTS {}".format(table_name))
    pd.io.sql.to_sql(table, "{}".format(table_name), con, index=False)

In [3]:
# Finally, a little function to make it easy to run queries on this mini-database
def run(query):
    results = pd.read_sql("{}".format(query), con)
    return results

In [4]:
# don't read, just run!
join_df_index = ['JOIN or INNER JOIN', 'LEFT JOIN or LEFT OUTER JOIN', 'RIGHT JOIN or RIGHT OUTER JOIN', 'OUTER JOIN or FULL OUTER JOIN']
join_df = pd.DataFrame({'SQLite' : pd.Series(['✓', '✓', 'not supported', 'not supported'], index=join_df_index),
                        'MySQL' : pd.Series(['✓', '✓', '✓', 'not supported'], index=join_df_index),
                        'Microsoft SQL Server' : pd.Series(['✓','✓','✓','✓'], index=join_df_index),
                        'Oracle' : pd.Series(['✓','✓','✓','✓'], index=join_df_index)})

In [5]:
# don't read, just run!
path = ('/Users/sarahbeckett-hile/Dropbox/join_examples.xlsx')
table_names = xl.open_workbook(path).sheet_names()
for table in table_names:
    df = pd.read_excel(path, sheetname='{}'.format(table))
    con.execute("DROP TABLE IF EXISTS {}".format(table))
    pd.io.sql.to_sql(df, "{}".format(table), con, index=False)

# now the spreadsheets are in tables in our mini database!

In [3]:
select_cheat1 = '''
    SELECT
        *
    FROM
        car_table
    '''

In [4]:
print(select_cheat1)


    SELECT
        *
    FROM
        car_table
    

In [1]:
from matplotlib_venn import venn2

In [3]:
venn2([set(['A', 'B', 'C', 'D']), set(['D', 'E', 'F'])])


Out[3]:
<matplotlib_venn._common.VennDiagram at 0x1075e4b00>

In [3]:
sales_table = pd.read_csv('https://raw.githubusercontent.com/DaveBackus/Data_Bootcamp/master/Code/SQL/sales_table.csv')
car_table = pd.read_csv('https://raw.githubusercontent.com/DaveBackus/Data_Bootcamp/master/Code/SQL/car_table.csv')
salesman_table = pd.read_csv('https://raw.githubusercontent.com/DaveBackus/Data_Bootcamp/master/Code/SQL/salesman_table.csv')
cust_table = pd.read_csv('https://raw.githubusercontent.com/DaveBackus/Data_Bootcamp/master/Code/SQL/cust_table.csv')
dog_table = pd.read_csv('https://raw.githubusercontent.com/DaveBackus/Data_Bootcamp/master/Code/SQL/dog_table.csv')
cat_table = pd.read_csv('https://raw.githubusercontent.com/DaveBackus/Data_Bootcamp/master/Code/SQL/cat_table.csv')

In [4]:
tables = [sales_table, car_table, salesman_table, cust_table, dog_table, cat_table]

In [5]:
table_names = []

In [6]:
for table in tables:
    table_name = str(table)

In [7]:
table_name


Out[7]:
'         Owner_Name    Cat_Name\n0  Michael Martinez   Mrs. Paws\n1    Gilbert Henkel     Mittens\n2        May Reeves      Clover\n3  Elizabeth Minier  Sweetcakes\n4         Donna Ona      Barnie\n5     Monika Turner        Cara\n6   Adrian Hardiman   Annabelle\n7      Loyd Mossman    Garfield\n8     Heather Emery      Engine\n9     Lauren Larson    Midnight'

In [13]:
rdbms_key = pd.DataFrame()

In [15]:
describe_index = ['Reading a table']
describe_df = pd.DataFrame({'SQLite' : pd.Series(['PRAGMA TABLE_INFO(table_name)'], index=describe_index), 
							'MySQL' : pd.Series(['DESCRIBE table_name'], index=describe_index),
							'Microsoft SQL Server' : pd.Series(['SP_HELP table_name'], index=describe_index),
							'Oracle' : pd.Series(['DESCRIBE table_table'], index=describe_index)})
rdbms_key = rdbms_key.append(describe_df)

# show limit options
limit_df_index = ['LIMITING']
limit_df = pd.DataFrame({'SQLite' : pd.Series(['LIMIT N'], index=limit_df_index),
                        'MySQL' : pd.Series(['LIMIT N'], index=limit_df_index),
                        'Microsoft SQL Server' : pd.Series(['SELECT TOP N column_a...'], index=limit_df_index),
                        'Oracle' : pd.Series(['WHERE ROWNUM <=N'], index=limit_df_index)})
rdbms_key = rdbms_key.append(limit_df)                   

# show concat options: 
concat_df_index = ['Concatenating']
concat_df = pd.DataFrame({'SQLite' : pd.Series(['||'], index=concat_df_index),
                        'MySQL' : pd.Series(['CONCAT(column_a, column_b)'], index=concat_df_index),
                        'Microsoft SQL Server' : pd.Series(['CONCAT(column_a, column_b) or +'], index=concat_df_index),
                        'Oracle' : pd.Series(['CONCAT(column_a, column_b) or ||'], index=concat_df_index)})

rdbms_key = rdbms_key.append(concat_df)    

# show compatibility with joins and different DBs 
join_df_index = ['JOIN or INNER JOIN', 'LEFT JOIN or LEFT OUTER JOIN', 'RIGHT JOIN or RIGHT OUTER JOIN', 'OUTER JOIN or FULL OUTER JOIN']
join_df = pd.DataFrame({'SQLite' : pd.Series(['✓', '✓', 'not supported', 'not supported'], index=join_df_index),
                        'MySQL' : pd.Series(['✓', '✓', '✓', 'not supported'], index=join_df_index),
                        'Microsoft SQL Server' : pd.Series(['✓','✓','✓','✓'], index=join_df_index),
                        'Oracle' : pd.Series(['✓','✓','✓','✓'], index=join_df_index)})

rdbms_key = rdbms_key.append(join_df)

In [16]:
rdbms_key


Out[16]:
Microsoft SQL Server MySQL Oracle SQLite
Reading a table SP_HELP table_name DESCRIBE table_name DESCRIBE table_table PRAGMA TABLE_INFO(table_name)
LIMITING SELECT TOP N column_a... LIMIT N WHERE ROWNUM <=N LIMIT N
Concatenating CONCAT(column_a, column_b) or + CONCAT(column_a, column_b) CONCAT(column_a, column_b) or || ||
JOIN or INNER JOIN
LEFT JOIN or LEFT OUTER JOIN
RIGHT JOIN or RIGHT OUTER JOIN not supported
OUTER JOIN or FULL OUTER JOIN not supported not supported

In [ ]: