In [9]:

    
import pandas as pd
import sqlite3
import xlrd as xl

# Set up a SQLite database using an excel file: ## http://nbviewer.ipython.org/github/jvns/pandas-cookbook/blob/master/cookbook/Chapter%209%20-%20Loading%20data%20from%20SQL%20databases.ipynb # Select a sample file. # Using an excel file purely because pandas can select different sheets from .xlsx files and this will be useful when creating multiple tables within a single SQLite database path = ('car_sales.xlsx') # if this .sqlite db doesn't already exists, this will create it # if the .sqlite db *does* already exist, this establishes the desired connection con = sqlite3.connect("sqlite_cars.sqlite") # this pulls out the names of the sheets in the workbook. We'll use these to name the different tables in the SQLite database that we'll create table_names = xl.open_workbook(path).sheet_names() # this loop makes it possible to use any other .xls sheet, since the sheet names aren't called out specifically for table in table_names: df = pd.read_excel(path, sheetname='{}'.format(table)) con.execute("DROP TABLE IF EXISTS {}".format(table)) pd.io.sql.to_sql(df, "{}".format(table), con, index=False) # now the spreadsheets are in tables in a mini database! # Finally, a little function to make it easy to run queries on this mini-database def run(query): results = pd.read_sql("{}".format(query), con) return results



In [2]:

    
# Set up a SQLite database using several .csv files:

# if this .sqlite db doesn't already exists, this will create it
# if the .sqlite db *does* already exist, this establishes the desired connection
con = sqlite3.connect("sqlite_cars.sqlite")

# create dataframes from each .csv file:
sales_table = pd.read_csv('https://raw.githubusercontent.com/DaveBackus/Data_Bootcamp/master/Code/SQL/sales_table.csv')
car_table = pd.read_csv('https://raw.githubusercontent.com/DaveBackus/Data_Bootcamp/master/Code/SQL/car_table.csv')
salesman_table = pd.read_csv('https://raw.githubusercontent.com/DaveBackus/Data_Bootcamp/master/Code/SQL/salesman_table.csv')
cust_table = pd.read_csv('https://raw.githubusercontent.com/DaveBackus/Data_Bootcamp/master/Code/SQL/cust_table.csv')
# make a list of the tables (dataframes) and table names:
tables = [sales_table, car_table, salesman_table, cust_table]
table_names = ['sales_table', 'car_table', 'salesman_table', 'cust_table']
# drop each table name if it already exists to avoid error if you rerun this bit of code
# then add it back (or for the first time, if the table didn't already exist)
for i in range(len(tables)):
    table_name = table_names[i]
    table = tables[i]
    con.execute("DROP TABLE IF EXISTS {}".format(table_name))
    pd.io.sql.to_sql(table, "{}".format(table_name), con, index=False)



In [3]:

    
# Finally, a little function to make it easy to run queries on this mini-database
def run(query):
    results = pd.read_sql("{}".format(query), con)
    return results



In [4]:

    
# don't read, just run!
join_df_index = ['JOIN or INNER JOIN', 'LEFT JOIN or LEFT OUTER JOIN', 'RIGHT JOIN or RIGHT OUTER JOIN', 'OUTER JOIN or FULL OUTER JOIN']
join_df = pd.DataFrame({'SQLite' : pd.Series(['✓', '✓', 'not supported', 'not supported'], index=join_df_index),
                        'MySQL' : pd.Series(['✓', '✓', '✓', 'not supported'], index=join_df_index),
                        'Microsoft SQL Server' : pd.Series(['✓','✓','✓','✓'], index=join_df_index),
                        'Oracle' : pd.Series(['✓','✓','✓','✓'], index=join_df_index)})



In [5]:

    
# don't read, just run!
path = ('/Users/sarahbeckett-hile/Dropbox/join_examples.xlsx')
table_names = xl.open_workbook(path).sheet_names()
for table in table_names:
    df = pd.read_excel(path, sheetname='{}'.format(table))
    con.execute("DROP TABLE IF EXISTS {}".format(table))
    pd.io.sql.to_sql(df, "{}".format(table), con, index=False)

# now the spreadsheets are in tables in our mini database!



In [3]:

    
select_cheat1 = '''
    SELECT
        *
    FROM
        car_table
    '''



In [4]:

    
print(select_cheat1)









    



    SELECT
        *
    FROM
        car_table



In [1]:

    
from matplotlib_venn import venn2



In [3]:

    
venn2([set(['A', 'B', 'C', 'D']), set(['D', 'E', 'F'])])









    Out[3]:





<matplotlib_venn._common.VennDiagram at 0x1075e4b00>



In [3]:

    
sales_table = pd.read_csv('https://raw.githubusercontent.com/DaveBackus/Data_Bootcamp/master/Code/SQL/sales_table.csv')
car_table = pd.read_csv('https://raw.githubusercontent.com/DaveBackus/Data_Bootcamp/master/Code/SQL/car_table.csv')
salesman_table = pd.read_csv('https://raw.githubusercontent.com/DaveBackus/Data_Bootcamp/master/Code/SQL/salesman_table.csv')
cust_table = pd.read_csv('https://raw.githubusercontent.com/DaveBackus/Data_Bootcamp/master/Code/SQL/cust_table.csv')
dog_table = pd.read_csv('https://raw.githubusercontent.com/DaveBackus/Data_Bootcamp/master/Code/SQL/dog_table.csv')
cat_table = pd.read_csv('https://raw.githubusercontent.com/DaveBackus/Data_Bootcamp/master/Code/SQL/cat_table.csv')



In [4]:

    
tables = [sales_table, car_table, salesman_table, cust_table, dog_table, cat_table]



In [5]:

    
table_names = []



In [6]:

    
for table in tables:
    table_name = str(table)



In [7]:

    
table_name









    Out[7]:





'         Owner_Name    Cat_Name\n0  Michael Martinez   Mrs. Paws\n1    Gilbert Henkel     Mittens\n2        May Reeves      Clover\n3  Elizabeth Minier  Sweetcakes\n4         Donna Ona      Barnie\n5     Monika Turner        Cara\n6   Adrian Hardiman   Annabelle\n7      Loyd Mossman    Garfield\n8     Heather Emery      Engine\n9     Lauren Larson    Midnight'



In [13]:

    
rdbms_key = pd.DataFrame()



In [15]:

    
describe_index = ['Reading a table']
describe_df = pd.DataFrame({'SQLite' : pd.Series(['PRAGMA TABLE_INFO(table_name)'], index=describe_index), 
							'MySQL' : pd.Series(['DESCRIBE table_name'], index=describe_index),
							'Microsoft SQL Server' : pd.Series(['SP_HELP table_name'], index=describe_index),
							'Oracle' : pd.Series(['DESCRIBE table_table'], index=describe_index)})
rdbms_key = rdbms_key.append(describe_df)

# show limit options
limit_df_index = ['LIMITING']
limit_df = pd.DataFrame({'SQLite' : pd.Series(['LIMIT N'], index=limit_df_index),
                        'MySQL' : pd.Series(['LIMIT N'], index=limit_df_index),
                        'Microsoft SQL Server' : pd.Series(['SELECT TOP N column_a...'], index=limit_df_index),
                        'Oracle' : pd.Series(['WHERE ROWNUM <=N'], index=limit_df_index)})
rdbms_key = rdbms_key.append(limit_df)                   

# show concat options: 
concat_df_index = ['Concatenating']
concat_df = pd.DataFrame({'SQLite' : pd.Series(['||'], index=concat_df_index),
                        'MySQL' : pd.Series(['CONCAT(column_a, column_b)'], index=concat_df_index),
                        'Microsoft SQL Server' : pd.Series(['CONCAT(column_a, column_b) or +'], index=concat_df_index),
                        'Oracle' : pd.Series(['CONCAT(column_a, column_b) or ||'], index=concat_df_index)})

rdbms_key = rdbms_key.append(concat_df)    

# show compatibility with joins and different DBs 
join_df_index = ['JOIN or INNER JOIN', 'LEFT JOIN or LEFT OUTER JOIN', 'RIGHT JOIN or RIGHT OUTER JOIN', 'OUTER JOIN or FULL OUTER JOIN']
join_df = pd.DataFrame({'SQLite' : pd.Series(['✓', '✓', 'not supported', 'not supported'], index=join_df_index),
                        'MySQL' : pd.Series(['✓', '✓', '✓', 'not supported'], index=join_df_index),
                        'Microsoft SQL Server' : pd.Series(['✓','✓','✓','✓'], index=join_df_index),
                        'Oracle' : pd.Series(['✓','✓','✓','✓'], index=join_df_index)})

rdbms_key = rdbms_key.append(join_df)



In [16]:

    
rdbms_key









    Out[16]:






  
    
      
      Microsoft SQL Server
      MySQL
      Oracle
      SQLite
    
  
  
    
      Reading a table
                    SP_HELP table_name
              DESCRIBE table_name
                   DESCRIBE table_table
       PRAGMA TABLE_INFO(table_name)
    
    
      LIMITING
              SELECT TOP N column_a...
                          LIMIT N
                       WHERE ROWNUM <=N
                             LIMIT N
    
    
      Concatenating
       CONCAT(column_a, column_b) or +
       CONCAT(column_a, column_b)
       CONCAT(column_a, column_b) or ||
                                  ||
    
    
      JOIN or INNER JOIN
                                     ✓
                                ✓
                                      ✓
                                   ✓
    
    
      LEFT JOIN or LEFT OUTER JOIN
                                     ✓
                                ✓
                                      ✓
                                   ✓
    
    
      RIGHT JOIN or RIGHT OUTER JOIN
                                     ✓
                                ✓
                                      ✓
                       not supported
    
    
      OUTER JOIN or FULL OUTER JOIN
                                     ✓
                    not supported
                                      ✓
                       not supported



In [ ]:

	Microsoft SQL Server	MySQL	Oracle	SQLite
Reading a table	SP_HELP table_name	DESCRIBE table_name	DESCRIBE table_table	PRAGMA TABLE_INFO(table_name)
LIMITING	SELECT TOP N column_a...	LIMIT N	WHERE ROWNUM <=N	LIMIT N
Concatenating	CONCAT(column_a, column_b) or +	CONCAT(column_a, column_b)	CONCAT(column_a, column_b) or \|\|	\|\|
JOIN or INNER JOIN	✓	✓	✓	✓
LEFT JOIN or LEFT OUTER JOIN	✓	✓	✓	✓
RIGHT JOIN or RIGHT OUTER JOIN	✓	✓	✓	not supported
OUTER JOIN or FULL OUTER JOIN	✓	not supported	✓	not supported