In [1]:
%load_ext sql

In [ ]:
%%sql
sqlite:///smaller.db
ATTACH DATABASE 'coal.db' AS NG;
create table NG.data as select * from data where fuel_type is 'Coal';


Done.
Done.
Out[ ]:
[]

In [2]:
%%sql
sqlite:///coal.db
drop table if exists cf;
create table cf as 
select name, d.orispl_code, d.unitid, state, op_date, op_hour, gload, 
cast(HEAT_INPUT as float) / cast(max_gen_hr as float) as capacity_factor_hr,
cast(gload as float) / cast(max_gen_gl as float) as capacity_factor_gl
from (select orispl_code, unitid, max(CAST(HEAT_INPUT as float)) as max_gen_hr,
      max(CAST(gload as float)) as max_gen_gl
from data group by orispl_code, unitid) as m, data as d
where d.orispl_code = m.orispl_code and d.unitid = m.unitid;
select * from cf limit 5;


Done.
Done.
Done.
Out[2]:
name orispl_code unitid state op_date op_hour gload capacity_factor_hr capacity_factor_gl
Centralia 3845 BW21 WA 01-01-2001 0 704.0 0.563378021678 0.907216494845
Centralia 3845 BW21 WA 01-01-2001 1 706.0 0.575998267023 0.909793814433
Centralia 3845 BW21 WA 01-01-2001 2 704.0 0.562110381014 0.907216494845
Centralia 3845 BW21 WA 01-01-2001 3 702.0 0.560016367006 0.904639175258
Centralia 3845 BW21 WA 01-01-2001 4 702.0 0.560072528301 0.904639175258

In [3]:
import sqlite3
import pandas
import os.path
con = sqlite3.connect('coal.db')
labels = pandas.read_sql("select orispl_code, unitid from cf group by orispl_code, unitid", con)

In [4]:
# Generates csv file for each unit, for each year, of capacity factors.
if not os.path.exists('cf'):
    os.makedirs('cf')
for i, row in labels.iterrows():
    print row['orispl_code'], row['unitid']
    for year in range(2001, 2017):
        file_name = 'cf/' + str(row['orispl_code']) + '_' + row['unitid'].replace('*', '') + '_' + str(year) + '.csv'
        if os.path.isfile(file_name): continue
        table = pandas.read_sql("select name, orispl_code, unitid, op_date, op_hour, capacity_factor_gl, capacity_factor_hr from cf\
        where orispl_code is " + str(row['orispl_code']) + " and unitid is '" + row['unitid'] + "' and \
        substr(op_date, -4) is '" + str(year) + "'", con)
        table.to_csv(file_name)


87 1
113 1
113 2
113 3
113 4
126 4
160 2
160 3
465 1
465 2
465 3
465 4
468 2
469 1
469 2
469 3
469 4
470 1
470 2
470 3
477 5
492 5
492 6
492 7
508 8
525 H1
525 H2
527 1
2187 2
2324 1
2324 2
2324 3
2324 4
2341 1
2341 2
2442 1
2442 2
2442 3
2442 4
2442 5
2451 1
2451 2
2451 3
2451 4
3644 1
3644 2
3845 BW21
3845 BW22
4158 BW41
4158 BW42
4158 BW43
4158 BW44
4162 1
4162 2
4162 3
4941 1
4941 2
4941 3
6021 C1
6021 C2
6021 C3
6076 1
6076 2
6076 3
6076 4
6089 B1
6101 BW91
6106 1SG
6165 1
6165 2
6165 3
6177 U1B
6177 U2B
6204 1
6204 2
6204 3
6248 1
6481 1SGA
6481 2SGA
6761 101
7504 001
7790 1-1
8066 BW71
8066 BW72
8066 BW73
8066 BW74
8069 1
8069 2
8219 1
8223 1
8223 2
8223 4
8223 TS3
8224 1
8224 2
50951 1
55479 001
55749 U1
56224 001
56319 001
56596 001
56609 01

In [ ]: