In [1]:
%load_ext sql
%sql sqlite:///smaller.db
Out[1]:
In [5]:
%sql select name, unitid, min(year) from data where fuel_type is 'Coal' group by orispl_code, unitid
Out[5]:
In [1]:
import csv, sqlite3
import os.path
In [7]:
con = sqlite3.connect("cems.db")
cur = con.cursor()
In [8]:
# Set up table to receive CEMS data
cur.execute('drop table if exists test;')
cur.execute(
"create table test( \
state varchar, \
facility_name varchar, \
orispl_code int, \
unitid varchar, \
op_date date, \
op_hour int, \
op_time float, \
gload float, \
SO2_MASS float, \
NOX_MASS float, \
CO2_MASS float, \
HEAT_INPUT float \
);")
Out[8]:
In [14]:
# Iterate through files to read data into database
for year in range(2001, 2018):
print(year)
for n_month in range(1, 13):
for state in ['wa', 'or', 'ca', 'id', 'nv', 'ut', 'az', 'nm', 'co', 'wy', 'mt', 'tx']:
month = str(n_month) if n_month > 9 else "0" + str(n_month)
name = "unzipped/" + str(year) + state + month + ".csv"
if os.path.isfile(name):
with open(name, 'rb') as fin:
dr = csv.DictReader(fin)
try:
to_db = [(i['STATE'], i['FACILITY_NAME'], i['ORISPL_CODE'], i['UNITID'], i['OP_DATE'], i['OP_HOUR'], i['OP_TIME'], i['GLOAD (MW)'],
i['SO2_MASS (lbs)'], i['NOX_MASS (lbs)'], i['CO2_MASS (tons)'], i['HEAT_INPUT (mmBtu)']) for i in dr]
except:
try:
to_db = [(i['STATE'], i['FACILITY_NAME'], i['ORISPL_CODE'], i['UNITID'], i['OP_DATE'], i['OP_HOUR'], i['OP_TIME'], i['GLOAD'],
i['SO2_MASS'], i['NOX_MASS'], i['CO2_MASS'], i['HEAT_INPUT']) for i in dr]
except:
print(name)
# print to_db
cur.executemany("insert into test (STATE, FACILITY_NAME, ORISPL_CODE, UNITID, OP_DATE, \
OP_HOUR, OP_TIME, GLOAD, SO2_MASS, NOX_MASS, CO2_MASS, HEAT_INPUT ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?);", to_db)
con.commit()
In [16]:
%sql sqlite:///cems.db
%sql select * from test limit 5
Out[16]:
In [ ]: