In [2]:
%autosave 10
!!AI isn't this why HDF5 was invented? Or is that only suitable for numeric data?
In [6]:
import pandas as pd
import pandasql
# Useful shim, saves typing
pysqldf = lambda q: pandasql.sqldf(q, globals())
# !!AI maybe use examples from Intro to Data Science course,
# it's identical to this.
math.stackexchange.com Posts.xml.etree.iterparse because the XML file is massive, don't load it all into memory.
In [8]:
# !!AI won't run, just the gist
import pandas.io.sql
import psycopg2
connection = psycopg2.connect() # !!AI TODO fill in
math_by_date = pandas.io.sql.read_sql("""\
SELECT ...
FROM...
WHERE ...
AND .
AND ...
GROUP BY ...
""", connection)
In [9]:
# More work with pandas.io.sql
pandas.io.sql.read_sql knows about HSTORE obviously, because it executes queries directly using a psycopg2 connection.
In [ ]: