notebook.community

Edit and run



In [1]:

    
%load_ext sql



In [ ]:

    
%%sql
sqlite:///smaller.db
ATTACH DATABASE 'coal.db' AS NG;
create table NG.data as select * from data where fuel_type is 'Coal';









    



Done.
Done.






    Out[ ]:





[]



In [2]:

    
%%sql
sqlite:///coal.db
drop table if exists cf;
create table cf as 
select name, d.orispl_code, d.unitid, state, op_date, op_hour, gload, 
cast(HEAT_INPUT as float) / cast(max_gen_hr as float) as capacity_factor_hr,
cast(gload as float) / cast(max_gen_gl as float) as capacity_factor_gl
from (select orispl_code, unitid, max(CAST(HEAT_INPUT as float)) as max_gen_hr,
      max(CAST(gload as float)) as max_gen_gl
from data group by orispl_code, unitid) as m, data as d
where d.orispl_code = m.orispl_code and d.unitid = m.unitid;
select * from cf limit 5;









    



Done.
Done.
Done.






    Out[2]:





    
        name
        orispl_code
        unitid
        state
        op_date
        op_hour
        gload
        capacity_factor_hr
        capacity_factor_gl
    
    
        Centralia
        3845
        BW21
        WA
        01-01-2001
        0
        704.0
        0.563378021678
        0.907216494845
    
    
        Centralia
        3845
        BW21
        WA
        01-01-2001
        1
        706.0
        0.575998267023
        0.909793814433
    
    
        Centralia
        3845
        BW21
        WA
        01-01-2001
        2
        704.0
        0.562110381014
        0.907216494845
    
    
        Centralia
        3845
        BW21
        WA
        01-01-2001
        3
        702.0
        0.560016367006
        0.904639175258
    
    
        Centralia
        3845
        BW21
        WA
        01-01-2001
        4
        702.0
        0.560072528301
        0.904639175258



In [3]:

    
import sqlite3
import pandas
import os.path
con = sqlite3.connect('coal.db')
labels = pandas.read_sql("select orispl_code, unitid from cf group by orispl_code, unitid", con)



In [4]:

    
# Generates csv file for each unit, for each year, of capacity factors.
if not os.path.exists('cf'):
    os.makedirs('cf')
for i, row in labels.iterrows():
    print row['orispl_code'], row['unitid']
    for year in range(2001, 2017):
        file_name = 'cf/' + str(row['orispl_code']) + '_' + row['unitid'].replace('*', '') + '_' + str(year) + '.csv'
        if os.path.isfile(file_name): continue
        table = pandas.read_sql("select name, orispl_code, unitid, op_date, op_hour, capacity_factor_gl, capacity_factor_hr from cf\
        where orispl_code is " + str(row['orispl_code']) + " and unitid is '" + row['unitid'] + "' and \
        substr(op_date, -4) is '" + str(year) + "'", con)
        table.to_csv(file_name)



In [ ]:

name	orispl_code	unitid	state	op_date	op_hour	gload	capacity_factor_hr	capacity_factor_gl
Centralia	3845	BW21	WA	01-01-2001	0	704.0	0.563378021678	0.907216494845
Centralia	3845	BW21	WA	01-01-2001	1	706.0	0.575998267023	0.909793814433
Centralia	3845	BW21	WA	01-01-2001	2	704.0	0.562110381014	0.907216494845
Centralia	3845	BW21	WA	01-01-2001	3	702.0	0.560016367006	0.904639175258
Centralia	3845	BW21	WA	01-01-2001	4	702.0	0.560072528301	0.904639175258