In [4]:
import csv

In [11]:
with open('coal_plants.csv', 'rb') as csvfile:
    reader = csv.DictReader(csvfile)
    epa = []
    cems = []
    for row in reader:
        epa.append(row['\xef\xbb\xbfORIS Code'])
        cems.append(row[' Facility ID (ORISPL)'])
    print("Things in EPA, not in CEMS")
    for entry in epa:
        if entry not in cems:
            print(entry)
    print("Things in CEMS, not in EPA")
    for entry in cems:
        if entry not in epa:
            print(entry)


Things in EPA, not in CEMS
6204.2
6204.2
6204.1
50931
10002
50951
10784
10768
10769
10640
10373
54238
2341
2341
Things in CEMS, not in EPA
126
508
6089
6204
6204
6204









In [15]:
import sqlite3
import pandas
con = sqlite3.connect('navajo.db')
table = pandas.read_sql('select * from data', con)
table.to_csv('output.csv')

In [16]:
%load_ext sql


The sql extension is already loaded. To reload it, use:
  %reload_ext sql

In [18]:
%sql sqlite:///navajo.db
%sql select * from data limit 0


Done.
Out[18]:
state name orispl_code unitid op_date op_hour op_time gload SO2_MASS NOX_MASS CO2_MASS heat_input county latitude longitude fuel_type

In [30]:
%sql select max(CAST(gload as float)) as global_max_generation, max(CAST(heat_input as float)) as global_max_heat_input, name, orispl_code, unitid from data group by name, unitid


Done.
Out[30]:
global_max_generation global_max_heat_input name orispl_code unitid
132.0 1653.9 Cholla 113 1
306.0 3616.46 Cholla 113 2
307.0 8860.4 Cholla 113 3
436.0 5182.3 Cholla 113 4

In [ ]: