In [1]:
%load_ext sql
In [ ]:
%%sql
sqlite:///smaller.db
ATTACH DATABASE 'coal.db' AS NG;
create table NG.data as select * from data where fuel_type is 'Coal';
Out[ ]:
In [2]:
%%sql
sqlite:///coal.db
drop table if exists cf;
create table cf as
select name, d.orispl_code, d.unitid, state, op_date, op_hour, gload,
cast(HEAT_INPUT as float) / cast(max_gen_hr as float) as capacity_factor_hr,
cast(gload as float) / cast(max_gen_gl as float) as capacity_factor_gl
from (select orispl_code, unitid, max(CAST(HEAT_INPUT as float)) as max_gen_hr,
max(CAST(gload as float)) as max_gen_gl
from data group by orispl_code, unitid) as m, data as d
where d.orispl_code = m.orispl_code and d.unitid = m.unitid;
select * from cf limit 5;
Out[2]:
In [3]:
import sqlite3
import pandas
import os.path
con = sqlite3.connect('coal.db')
labels = pandas.read_sql("select orispl_code, unitid from cf group by orispl_code, unitid", con)
In [4]:
# Generates csv file for each unit, for each year, of capacity factors.
if not os.path.exists('cf'):
os.makedirs('cf')
for i, row in labels.iterrows():
print row['orispl_code'], row['unitid']
for year in range(2001, 2017):
file_name = 'cf/' + str(row['orispl_code']) + '_' + row['unitid'].replace('*', '') + '_' + str(year) + '.csv'
if os.path.isfile(file_name): continue
table = pandas.read_sql("select name, orispl_code, unitid, op_date, op_hour, capacity_factor_gl, capacity_factor_hr from cf\
where orispl_code is " + str(row['orispl_code']) + " and unitid is '" + row['unitid'] + "' and \
substr(op_date, -4) is '" + str(year) + "'", con)
table.to_csv(file_name)
In [ ]: