SQL

Accessing data stored in databases is a routine exercise. I demonstrate a few helpful methods in the Jupyter Notebook.


In [ ]:
%load_ext sql_magic

In [ ]:
import sqlalchemy
import pandas as pd
import sqlite3
from sqlalchemy import create_engine
sqlite_engine = create_engine('sqlite://')

In [ ]:
%config SQL.conn_name = "sqlite_engine"

In [ ]:
%config SQL

In [ ]:
%config SQL.output_result = False
CREATE TABLE presidents (first_name, last_name, year_of_birth);
INSERT INTO presidents VALUES ('George', 'Washington', 1732);
INSERT INTO presidents VALUES ('John', 'Adams', 1735);
INSERT INTO presidents VALUES ('Thomas', 'Jefferson', 1743);
INSERT INTO presidents VALUES ('James', 'Madison', 1751);
INSERT INTO presidents VALUES ('James', 'Monroe', 1758);
INSERT INTO presidents VALUES ('Zachary', 'Taylor', 1784);
INSERT INTO presidents VALUES ('Abraham', 'Lincoln', 1809);
INSERT INTO presidents VALUES ('Theodore', 'Roosevelt', 1858);
INSERT INTO presidents VALUES ('Richard', 'Nixon', 1913);
INSERT INTO presidents VALUES ('Barack', 'Obama', 1961);

In [ ]:
%%read_sql temp
CREATE TABLE presidents (first_name, last_name, year_of_birth);
INSERT INTO presidents VALUES ('George', 'Washington', 1732);
INSERT INTO presidents VALUES ('John', 'Adams', 1735);
INSERT INTO presidents VALUES ('Thomas', 'Jefferson', 1743);
INSERT INTO presidents VALUES ('James', 'Madison', 1751);
INSERT INTO presidents VALUES ('James', 'Monroe', 1758);
INSERT INTO presidents VALUES ('Zachary', 'Taylor', 1784);
INSERT INTO presidents VALUES ('Abraham', 'Lincoln', 1809);
INSERT INTO presidents VALUES ('Theodore', 'Roosevelt', 1858);
INSERT INTO presidents VALUES ('Richard', 'Nixon', 1913);
INSERT INTO presidents VALUES ('Barack', 'Obama', 1961);

In [ ]:
%%read_sql df
SELECT * FROM presidents

In [ ]:
df

Inline magic


In [ ]:
later_presidents = %read_sql SELECT * FROM presidents WHERE year_of_birth > 1825
later_presidents

In [ ]:
%%read_sql later_presidents
SELECT * FROM presidents WHERE year_of_birth > 1825

Through pandas directly


In [ ]:
birthyear = 1800

In [ ]:
%%read_sql df1
SELECT first_name,
       last_name,
       year_of_birth
FROM presidents
WHERE year_of_birth > {birthyear}

In [ ]:
df1

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:
con = sqlite3.connect("presidents.sqlite")
# cur = con.cursor()

new_dataframe = pd.read_sql(f"""SELECT first_name, last_name, year_of_birth
                               FROM presidents
                               WHERE year_of_birth > {birthyear}
                               """, 
                            con=con)

con.close()

In [ ]:
new_dataframe

In [ ]:
df.head()

In [ ]:
coal = pd.read_csv("../data/coal_prod_cleaned.csv")
coal.head()

In [ ]:
new_dataframe.to_sql('presidents', con=sqlite_engine, if_exists='append', index=False)

In [ ]:
coal.to_sql('coal', con=sqlite_engine, if_exists='append', index=False)

In [ ]:


In [ ]:


In [ ]:
%%read_sql example
SELECT * FROM coal

In [ ]:
example.head()

In [ ]:
example.columns