SQL

Accessing data stored in databases is a routine exercise. I demonstrate a few helpful methods in the Jupyter Notebook.


In [ ]:
!hive create_features.sql

In [ ]:
import warnings
warnings.filterwarnings('ignore')

In [ ]:
!conda install -c conda-forge ipython-sql -y

In [ ]:
%load_ext sql

In [ ]:
%config SqlMagic.autopandas=True

In [ ]:
import pandas as pd
import sqlite3
CREATE TABLE presidents (first_name, last_name, year_of_birth);
INSERT INTO presidents VALUES ('George', 'Washington', 1732);
INSERT INTO presidents VALUES ('John', 'Adams', 1735);
INSERT INTO presidents VALUES ('Thomas', 'Jefferson', 1743);
INSERT INTO presidents VALUES ('James', 'Madison', 1751);
INSERT INTO presidents VALUES ('James', 'Monroe', 1758);
INSERT INTO presidents VALUES ('Zachary', 'Taylor', 1784);
INSERT INTO presidents VALUES ('Abraham', 'Lincoln', 1809);
INSERT INTO presidents VALUES ('Theodore', 'Roosevelt', 1858);
INSERT INTO presidents VALUES ('Richard', 'Nixon', 1913);
INSERT INTO presidents VALUES ('Barack', 'Obama', 1961);

In [ ]:
%%sql sqlite://
CREATE TABLE presidents (first_name, last_name, year_of_birth);
INSERT INTO presidents VALUES ('George', 'Washington', 1732);
INSERT INTO presidents VALUES ('John', 'Adams', 1735);
INSERT INTO presidents VALUES ('Thomas', 'Jefferson', 1743);
INSERT INTO presidents VALUES ('James', 'Madison', 1751);
INSERT INTO presidents VALUES ('James', 'Monroe', 1758);
INSERT INTO presidents VALUES ('Zachary', 'Taylor', 1784);
INSERT INTO presidents VALUES ('Abraham', 'Lincoln', 1809);
INSERT INTO presidents VALUES ('Theodore', 'Roosevelt', 1858);
INSERT INTO presidents VALUES ('Richard', 'Nixon', 1913);
INSERT INTO presidents VALUES ('Barack', 'Obama', 1961);

In [ ]:
later_presidents = %sql SELECT * FROM presidents WHERE year_of_birth > 1825
later_presidents

In [ ]:
type(later_presidents)

In [ ]:
con = sqlite3.connect("presidents.sqlite")
later_presidents.to_sql("presidents", con, if_exists='replace')

Through pandas directly


In [ ]:
%%sql
SELECT first_name,
       last_name,
       year_of_birth
FROM presidents
WHERE year_of_birth > 1800;
SELECT first_name,
       last_name,
       year_of_birth
FROM presidents
WHERE year_of_birth > 1800;

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:
con = sqlite3.connect("presidents.sqlite")
cur = con.cursor()

new_dataframe = pd.read_sql("""SELECT first_name, last_name, year_of_birth
                               FROM presidents
                               WHERE year_of_birth > 1800
                               """, 
                            con=con)

con.close()

In [ ]:
new_dataframe

In [ ]:
type(new_dataframe)

In [ ]:


In [ ]: