In [9]:
import pandas as pd
import sqlite3
import xlrd as xl
In [2]:
# Set up a SQLite database using several .csv files:
# if this .sqlite db doesn't already exists, this will create it
# if the .sqlite db *does* already exist, this establishes the desired connection
con = sqlite3.connect("sqlite_cars.sqlite")
# create dataframes from each .csv file:
sales_table = pd.read_csv('https://raw.githubusercontent.com/DaveBackus/Data_Bootcamp/master/Code/SQL/sales_table.csv')
car_table = pd.read_csv('https://raw.githubusercontent.com/DaveBackus/Data_Bootcamp/master/Code/SQL/car_table.csv')
salesman_table = pd.read_csv('https://raw.githubusercontent.com/DaveBackus/Data_Bootcamp/master/Code/SQL/salesman_table.csv')
cust_table = pd.read_csv('https://raw.githubusercontent.com/DaveBackus/Data_Bootcamp/master/Code/SQL/cust_table.csv')
# make a list of the tables (dataframes) and table names:
tables = [sales_table, car_table, salesman_table, cust_table]
table_names = ['sales_table', 'car_table', 'salesman_table', 'cust_table']
# drop each table name if it already exists to avoid error if you rerun this bit of code
# then add it back (or for the first time, if the table didn't already exist)
for i in range(len(tables)):
table_name = table_names[i]
table = tables[i]
con.execute("DROP TABLE IF EXISTS {}".format(table_name))
pd.io.sql.to_sql(table, "{}".format(table_name), con, index=False)
In [3]:
# Finally, a little function to make it easy to run queries on this mini-database
def run(query):
results = pd.read_sql("{}".format(query), con)
return results
In [4]:
# don't read, just run!
join_df_index = ['JOIN or INNER JOIN', 'LEFT JOIN or LEFT OUTER JOIN', 'RIGHT JOIN or RIGHT OUTER JOIN', 'OUTER JOIN or FULL OUTER JOIN']
join_df = pd.DataFrame({'SQLite' : pd.Series(['✓', '✓', 'not supported', 'not supported'], index=join_df_index),
'MySQL' : pd.Series(['✓', '✓', '✓', 'not supported'], index=join_df_index),
'Microsoft SQL Server' : pd.Series(['✓','✓','✓','✓'], index=join_df_index),
'Oracle' : pd.Series(['✓','✓','✓','✓'], index=join_df_index)})
In [5]:
# don't read, just run!
path = ('/Users/sarahbeckett-hile/Dropbox/join_examples.xlsx')
table_names = xl.open_workbook(path).sheet_names()
for table in table_names:
df = pd.read_excel(path, sheetname='{}'.format(table))
con.execute("DROP TABLE IF EXISTS {}".format(table))
pd.io.sql.to_sql(df, "{}".format(table), con, index=False)
# now the spreadsheets are in tables in our mini database!
In [3]:
select_cheat1 = '''
SELECT
*
FROM
car_table
'''
In [4]:
print(select_cheat1)
In [1]:
from matplotlib_venn import venn2
In [3]:
venn2([set(['A', 'B', 'C', 'D']), set(['D', 'E', 'F'])])
Out[3]:
In [3]:
sales_table = pd.read_csv('https://raw.githubusercontent.com/DaveBackus/Data_Bootcamp/master/Code/SQL/sales_table.csv')
car_table = pd.read_csv('https://raw.githubusercontent.com/DaveBackus/Data_Bootcamp/master/Code/SQL/car_table.csv')
salesman_table = pd.read_csv('https://raw.githubusercontent.com/DaveBackus/Data_Bootcamp/master/Code/SQL/salesman_table.csv')
cust_table = pd.read_csv('https://raw.githubusercontent.com/DaveBackus/Data_Bootcamp/master/Code/SQL/cust_table.csv')
dog_table = pd.read_csv('https://raw.githubusercontent.com/DaveBackus/Data_Bootcamp/master/Code/SQL/dog_table.csv')
cat_table = pd.read_csv('https://raw.githubusercontent.com/DaveBackus/Data_Bootcamp/master/Code/SQL/cat_table.csv')
In [4]:
tables = [sales_table, car_table, salesman_table, cust_table, dog_table, cat_table]
In [5]:
table_names = []
In [6]:
for table in tables:
table_name = str(table)
In [7]:
table_name
Out[7]:
In [13]:
rdbms_key = pd.DataFrame()
In [15]:
describe_index = ['Reading a table']
describe_df = pd.DataFrame({'SQLite' : pd.Series(['PRAGMA TABLE_INFO(table_name)'], index=describe_index),
'MySQL' : pd.Series(['DESCRIBE table_name'], index=describe_index),
'Microsoft SQL Server' : pd.Series(['SP_HELP table_name'], index=describe_index),
'Oracle' : pd.Series(['DESCRIBE table_table'], index=describe_index)})
rdbms_key = rdbms_key.append(describe_df)
# show limit options
limit_df_index = ['LIMITING']
limit_df = pd.DataFrame({'SQLite' : pd.Series(['LIMIT N'], index=limit_df_index),
'MySQL' : pd.Series(['LIMIT N'], index=limit_df_index),
'Microsoft SQL Server' : pd.Series(['SELECT TOP N column_a...'], index=limit_df_index),
'Oracle' : pd.Series(['WHERE ROWNUM <=N'], index=limit_df_index)})
rdbms_key = rdbms_key.append(limit_df)
# show concat options:
concat_df_index = ['Concatenating']
concat_df = pd.DataFrame({'SQLite' : pd.Series(['||'], index=concat_df_index),
'MySQL' : pd.Series(['CONCAT(column_a, column_b)'], index=concat_df_index),
'Microsoft SQL Server' : pd.Series(['CONCAT(column_a, column_b) or +'], index=concat_df_index),
'Oracle' : pd.Series(['CONCAT(column_a, column_b) or ||'], index=concat_df_index)})
rdbms_key = rdbms_key.append(concat_df)
# show compatibility with joins and different DBs
join_df_index = ['JOIN or INNER JOIN', 'LEFT JOIN or LEFT OUTER JOIN', 'RIGHT JOIN or RIGHT OUTER JOIN', 'OUTER JOIN or FULL OUTER JOIN']
join_df = pd.DataFrame({'SQLite' : pd.Series(['✓', '✓', 'not supported', 'not supported'], index=join_df_index),
'MySQL' : pd.Series(['✓', '✓', '✓', 'not supported'], index=join_df_index),
'Microsoft SQL Server' : pd.Series(['✓','✓','✓','✓'], index=join_df_index),
'Oracle' : pd.Series(['✓','✓','✓','✓'], index=join_df_index)})
rdbms_key = rdbms_key.append(join_df)
In [16]:
rdbms_key
Out[16]:
In [ ]: