In [1]:
%load_ext sql
%sql sqlite:///smaller.db


Out[1]:
u'Connected: None@smaller.db'

In [5]:
%sql select name, unitid, min(year) from data where fuel_type is 'Coal' group by orispl_code, unitid


Done.
Out[5]:
name unitid min(year)
Escalante 1 2001
Cholla 1 2001
Cholla 2 2001
Cholla 3 2001
Cholla 4 2001
Irvington Generating Station 4 2001
Apache Station 2 2001
Apache Station 3 2001
Arapahoe 1 2001
Arapahoe 2 2001
Arapahoe 3 2001
Arapahoe 4 2001
Cameo 2 2001
Cherokee 1 2001
Cherokee 2 2001
Cherokee 3 2001
Cherokee 4 2001
Comanche (470) 1 2001
Comanche (470) 2 2001
Comanche (470) 3 2010
Valmont 5 2001
Martin Drake 5 2001
Martin Drake 6 2001
Martin Drake 7 2001
Lamar 8 2009
Hayden H1 2001
Hayden H2 2001
Nucla 1 2001
J E Corette 2 2001
Reid Gardner 1 2001
Reid Gardner 2 2001
Reid Gardner 3 2001
Reid Gardner 4 2001
Mohave 1 2001
Mohave 2 2001
Four Corners Steam Elec Station 1 2001
Four Corners Steam Elec Station 2 2001
Four Corners Steam Elec Station 3 2001
Four Corners Steam Elec Station 4 2001
Four Corners Steam Elec Station 5 2001
San Juan 1 2001
San Juan 2 2001
San Juan 3 2001
San Juan 4 2001
Carbon 1 2001
Carbon 2 2001
Centralia BW21 2001
Centralia BW22 2001
Dave Johnston BW41 2001
Dave Johnston BW42 2001
Dave Johnston BW43 2001
Dave Johnston BW44 2001
Naughton 1 2001
Naughton 2 2001
Naughton 3 2001
Navajo Generating Station 1 2001
Navajo Generating Station 2 2001
Navajo Generating Station 3 2001
Craig C1 2001
Craig C2 2001
Craig C3 2001
Colstrip 1 2001
Colstrip 2 2001
Colstrip 3 2001
Colstrip 4 2001
Lewis & Clark B1 2001
Wyodak BW91 2001
Boardman 1SG 2001
Hunter 1 2001
Hunter 2 2001
Hunter 3 2001
Coronado Generating Station U1B 2001
Coronado Generating Station U2B 2001
Laramie River 1 2001
Laramie River 2 2001
Laramie River 3 2001
Pawnee 1 2001
Intermountain 1SGA 2001
Intermountain 2SGA 2001
Rawhide Energy Station 101 2001
Neil Simpson II 001 2001
Bonanza 1-1 2001
Jim Bridger BW71 2001
Jim Bridger BW72 2001
Jim Bridger BW73 2001
Jim Bridger BW74 2001
Huntington 1 2001
Huntington 2 2001
Ray D Nixon 1 2001
Springerville Generating Station 1 2001
Springerville Generating Station 2 2001
Springerville Generating Station 4 2009
Springerville Generating Station TS3 2006
North Valmy 1 2001
North Valmy 2 2001
Sunnyside Cogeneration Associates 1 2015
Wygen I 001 2003
Hardin U1 2006
TS Power Plant 001 2008
Wygen II 001 2008
Wygen III 001 2010
Dry Fork Station 01 2011

In [1]:
import csv, sqlite3
import os.path

In [7]:
con = sqlite3.connect("cems.db")
cur = con.cursor()

In [8]:
# Set up table to receive CEMS data
cur.execute('drop table if exists test;')
cur.execute(
"create table test( \
    state varchar, \
    facility_name varchar, \
    orispl_code int, \
    unitid varchar, \
    op_date date, \
    op_hour int, \
    op_time float, \
    gload float, \
    SO2_MASS float, \
    NOX_MASS float, \
    CO2_MASS float, \
    HEAT_INPUT float \
);")


Out[8]:
<sqlite3.Cursor at 0x9d47d20>

In [14]:
# Iterate through files to read data into database
for year in range(2001, 2018):
    print(year)
    for n_month in range(1, 13):
        for state in ['wa', 'or', 'ca', 'id', 'nv', 'ut', 'az', 'nm', 'co', 'wy', 'mt', 'tx']:
            month = str(n_month) if n_month > 9 else "0" + str(n_month)
            name = "unzipped/" + str(year) + state + month + ".csv"
            if os.path.isfile(name):
                with open(name, 'rb') as fin:
                    dr = csv.DictReader(fin)
                    try:
                        to_db = [(i['STATE'], i['FACILITY_NAME'], i['ORISPL_CODE'], i['UNITID'], i['OP_DATE'], i['OP_HOUR'], i['OP_TIME'], i['GLOAD (MW)'], 
                                  i['SO2_MASS (lbs)'], i['NOX_MASS (lbs)'], i['CO2_MASS (tons)'], i['HEAT_INPUT (mmBtu)']) for i in dr]
                    except:
                        try:
                            to_db = [(i['STATE'], i['FACILITY_NAME'], i['ORISPL_CODE'], i['UNITID'], i['OP_DATE'], i['OP_HOUR'], i['OP_TIME'], i['GLOAD'], 
                                  i['SO2_MASS'], i['NOX_MASS'], i['CO2_MASS'], i['HEAT_INPUT']) for i in dr]
                        except:
                            print(name)
#                     print to_db
                    cur.executemany("insert into test (STATE, FACILITY_NAME, ORISPL_CODE, UNITID, OP_DATE, \
OP_HOUR, OP_TIME, GLOAD, SO2_MASS, NOX_MASS, CO2_MASS, HEAT_INPUT ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?);", to_db)

con.commit()


2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017

In [16]:
%sql sqlite:///cems.db
%sql select * from test limit 5


Done.
Out[16]:
state facility_name orispl_code unitid op_date op_hour op_time gload SO2_MASS NOX_MASS CO2_MASS HEAT_INPUT
WA Centralia 3845 BW21 01-01-2001 0 1.0 704.0 11493.6 2506.854 720.5 7022.0
WA Centralia 3845 BW21 01-01-2001 1 1.0 706.0 11774.7 2577.369 736.6 7179.3
WA Centralia 3845 BW21 01-01-2001 2 1.0 704.0 11156.9 2606.306 718.8 7006.2
WA Centralia 3845 BW21 01-01-2001 3 1.0 702.0 10990.0 2561.697 716.2 6980.1
WA Centralia 3845 BW21 01-01-2001 4 1.0 702.0 11027.6 2631.762 716.2 6980.8

In [ ]: