SQL

Accessing data stored in databases is a routine exercise. I demonstrate a few helpful methods in the Jupyter Notebook.


In [1]:
!conda install ipython-sql -y


Fetching package metadata .........
Solving package specifications: ..........

# All requested packages already installed.
# packages in environment at /Users/jonathan/miniconda3/envs/py3:
#
ipython-sql               0.3.6                    py35_0    conda-forge

In [2]:
%load_ext sql


/Users/jonathan/miniconda3/envs/py3/lib/python3.5/site-packages/IPython/config.py:13: ShimWarning: The `IPython.config` package has been deprecated. You should import from traitlets.config instead.
  "You should import from traitlets.config instead.", ShimWarning)
/Users/jonathan/miniconda3/envs/py3/lib/python3.5/site-packages/IPython/utils/traitlets.py:5: UserWarning: IPython.utils.traitlets has moved to a top-level traitlets package.
  warn("IPython.utils.traitlets has moved to a top-level traitlets package.")

In [3]:
%config SqlMagic.autopandas=True

In [4]:
import pandas as pd
import sqlite3
CREATE TABLE presidents (first_name, last_name, year_of_birth);
INSERT INTO presidents VALUES ('George', 'Washington', 1732);
INSERT INTO presidents VALUES ('John', 'Adams', 1735);
INSERT INTO presidents VALUES ('Thomas', 'Jefferson', 1743);
INSERT INTO presidents VALUES ('James', 'Madison', 1751);
INSERT INTO presidents VALUES ('James', 'Monroe', 1758);
INSERT INTO presidents VALUES ('Zachary', 'Taylor', 1784);
INSERT INTO presidents VALUES ('Abraham', 'Lincoln', 1809);
INSERT INTO presidents VALUES ('Theodore', 'Roosevelt', 1858);
INSERT INTO presidents VALUES ('Richard', 'Nixon', 1913);
INSERT INTO presidents VALUES ('Barack', 'Obama', 1961);

In [5]:
%%sql sqlite://
CREATE TABLE presidents (first_name, last_name, year_of_birth);
INSERT INTO presidents VALUES ('George', 'Washington', 1732);
INSERT INTO presidents VALUES ('John', 'Adams', 1735);
INSERT INTO presidents VALUES ('Thomas', 'Jefferson', 1743);
INSERT INTO presidents VALUES ('James', 'Madison', 1751);
INSERT INTO presidents VALUES ('James', 'Monroe', 1758);
INSERT INTO presidents VALUES ('Zachary', 'Taylor', 1784);
INSERT INTO presidents VALUES ('Abraham', 'Lincoln', 1809);
INSERT INTO presidents VALUES ('Theodore', 'Roosevelt', 1858);
INSERT INTO presidents VALUES ('Richard', 'Nixon', 1913);
INSERT INTO presidents VALUES ('Barack', 'Obama', 1961);


Done.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
Out[5]:

In [6]:
later_presidents = %sql SELECT * FROM presidents WHERE year_of_birth > 1825
later_presidents


Done.
Out[6]:
first_name last_name year_of_birth
0 Theodore Roosevelt 1858
1 Richard Nixon 1913
2 Barack Obama 1961

In [7]:
type(later_presidents)


Out[7]:
pandas.core.frame.DataFrame

In [8]:
later_presidents


Out[8]:
first_name last_name year_of_birth
0 Theodore Roosevelt 1858
1 Richard Nixon 1913
2 Barack Obama 1961

In [9]:
con = sqlite3.connect("presidents.sqlite")
later_presidents.to_sql("presidents", con, if_exists='replace')

Through pandas directly


In [10]:
con = sqlite3.connect("presidents.sqlite")
cur = con.cursor()

new_dataframe = pd.read_sql("""SELECT * 
                                 FROM presidents""", 
                 con=con)

con.close()

In [11]:
new_dataframe


Out[11]:
index first_name last_name year_of_birth
0 0 Theodore Roosevelt 1858
1 1 Richard Nixon 1913
2 2 Barack Obama 1961

In [12]:
type(new_dataframe)


Out[12]:
pandas.core.frame.DataFrame

In [ ]: