In [4]:
from epa_client import *
import os
from pandas.io import sql
import csv

regions = {
    'all': ['al', 'az', 'ar', 'ca', 'co', 'ct', 'dc',
            'de', 'fl', 'ga', 'id', 'il', 'in', 'ia',
            'ks', 'ky', 'la', 'me', 'md', 'ma', 'mi',
            'mn', 'mi', 'mo', 'mt', 'ne', 'nv', 'nh',
            'nj', 'nm', 'ny', 'nc', 'nd', 'oh', 'ok',
            'or', 'pa', 'ri', 'sc', 'sd', 'tn', 'tx',
            'ut', 'vt', 'va', 'wa', 'wv', 'wi', 'wy'],
    }

In [8]:
for st in ['ny']:
    load('2010', st)
    print st, np.sum(data['2010'][st]["GLOAD (MW)"])
    data['2010'].pop(st)


loading 2010 ny
ny 65114159.0

In [1]:
import sqlite3
conn = sqlite3.connect('watttime-grid.db')

In [9]:
def facility_load():
    """read facility data from csv and return"""
    print "loading facilities"

    # set up path
    csvfname = os.path.join('data/original', 'facilities_balancingauthority.csv')
    
    # csv -> dataframe
    df = pd.read_csv(csvfname)
    return df

def ferc_load():
    """read FERC region from csv and return"""
    # set up path
    csvfname = os.path.join('data/ferc_714', 'FERC system lambda data.csv' )
    
    # csv -> dataframe
    df = pd.read_csv(csvfname, parse_dates=3, index_col=range(5), usecols=range(29))
    df.columns = range(24)
    stacked = df.stack()
  #  stacked.index.names = ['OP_DATE', 'OP_HOUR']
    return stacked

In [24]:
facility_df = facility_load()


loading facilities

In [21]:
stfac_df = facility_df[facility_df['State']=='CT']
for ba in stfac_df["BA_ABBREV"].unique():
    bafac_df = stfac_df[stfac_df["BA_ABBREV"]==ba]
    print ba, bafac_df[" Facility ID (ORISPL)"].unique()


ISNE [ 10675.   6635.  10567.    540.  55042.    568.  50498.    542.    544.
    561.  54657.  56798.  55149.    562.  55126.    546.   6156.    548.
    581.  54236.  54605.  50736.    563.    565.    557.  55517.  56629.
  56189.]

In [42]:
for st in ['ca']:
    load('2010', st)
    epa_df = data['2010'][st]
    
    facilities_in_st_df = facility_df[facility_df['State']==st.upper()]
   # fac2ba_map = dict(set(zip(ba2fac_df[" Facility ID (ORISPL)"], ba2fac_df["BA_ABBREV"])))
    
    for ba in facilities_in_st_df["BA_ABBREV"].unique():
        print ba
        facilities_in_ba_df = facilities_in_st_df[facilities_in_st_df["BA_ABBREV"]==ba]
        facilities_in_ba = list(facilities_in_ba_df[" Facility ID (ORISPL)"].unique())
        
        df = epa_df[epa_df["ORISPL_CODE"].isin(facilities_in_ba)]
        hrly_agg = df.groupby(['OP_DATE', 'OP_HOUR']).aggregate(np.sum)[['GLOAD (MW)', 'CO2_MASS (tons)']]
        hrly_agg['BA_ABBREV'] = pd.Series(ba, index=hrly_agg.index)
        hrly_agg['STATE'] = pd.Series(st.upper(), index=hrly_agg.index)
        hrly_agg.to_csv('data/original/%s_%s_aggregate_raw.csv' % (ba, st.upper()), quoting=csv.QUOTE_NONNUMERIC)

   # data['2010'].pop(st)


CISO
TID
SMUD
IID
LDWP

In [74]:
lambda_s = ferc_load()

In [75]:
lambda_s.index.names[5] = 'hour'
lambda_df = lambda_s.reset_index()
lambda_df.rename(columns={0:'lambda'}, inplace=True)
print lambda_df.head()


  BA  respondent_id  report_yr         lambda_date timezone  hour  lambda
0  0          99991       2006 2006-01-01 00:00:00      EST     0  111.00
1  0          99991       2006 2006-01-01 00:00:00      EST     1   21.00
2  0          99991       2006 2006-01-01 00:00:00      EST     2   43.44
3  0          99991       2006 2006-01-01 00:00:00      EST     3   41.00
4  0          99991       2006 2006-01-01 00:00:00      EST     4   51.00

In [76]:
sql.write_frame(lambda_df, 'lambda', conn)

In [77]:
sql.write_frame(facility_df, 'facilities', conn)

In [18]:
for yr in ['2010', '2011', '2012']:
    for st in regions['all']:
        print yr, st
        load(yr, st)
        try:
            sql.write_frame(data[yr][st], 'epa', conn, if_exists='append')
        except Exception as e:
            print '..skipping: %s' % e
        data[yr].pop(st)


2010 al
loading 2010 al
2010 az
loading 2010 az
2010 ar
loading 2010 ar
2010 ca
loading 2010 ca
2010 co
loading 2010 co
2010 ct
loading 2010 ct
2010 dc
loading 2010 dc
2010 de
loading 2010 de
2010 fl
loading 2010 fl
2010 ga
loading 2010 ga
2010 id
loading 2010 id
2010 il
loading 2010 il
2010 in
loading 2010 in
2010 ia
loading 2010 ia
2010 ks
loading 2010 ks
2010 ky
loading 2010 ky
2010 la
loading 2010 la
2010 me
loading 2010 me
2010 md
loading 2010 md
2010 ma
loading 2010 ma
2010 mi
loading 2010 mi
2010 mn
loading 2010 mn
2010 mi
loading 2010 mi
2010 mo
loading 2010 mo
2010 mt
loading 2010 mt
2010 ne
loading 2010 ne
2010 nv
loading 2010 nv
2010 nh
loading 2010 nh
2010 nj
loading 2010 nj
2010 nm
loading 2010 nm
2010 ny
loading 2010 ny
2010 nc
loading 2010 nc
2010 nd
loading 2010 nd
2010 oh
loading 2010 oh
2010 ok
loading 2010 ok
2010 or
loading 2010 or
2010 pa
loading 2010 pa
2010 ri
loading 2010 ri
2010 sc
loading 2010 sc
2010 sd
loading 2010 sd
2010 tn
loading 2010 tn
2010 tx
loading 2010 tx
2010 ut
loading 2010 ut
2010 vt
loading 2010 vt
2010 va
loading 2010 va
2010 wa
loading 2010 wa
2010 wv
loading 2010 wv
2010 wi
loading 2010 wi
2010 wy
loading 2010 wy
2011
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-18-70f2504218f8> in <module>()
      2     for st in regions['all']:
      3         print yr, st
----> 4         load(yr, st)
      5         try:
      6             sql.write_frame(data[yr][st], 'epa', conn, if_exists='append')

/Users/anna/github/watttime-grid/epa_client.py in load(yr, st)
     88         # combine into one big dataframe
     89         df = pd.concat(pieces)
---> 90         df['OP_DATE'] = pd.to_datetime(df['OP_DATE'])
     91         data[yr][st] = df
     92 

/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pandas/tseries/tools.pyc in to_datetime(arg, errors, dayfirst, utc, box, format)
    102         values = arg.values
    103         if not com.is_datetime64_dtype(values):
--> 104             values = _convert_f(values)
    105         return Series(values, index=arg.index, name=arg.name)
    106     elif isinstance(arg, (np.ndarray, list)):

/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pandas/tseries/tools.pyc in _convert_f(arg)
     84             else:
     85                 result = tslib.array_to_datetime(arg, raise_=errors == 'raise',
---> 86                                                  utc=utc, dayfirst=dayfirst)
     87             if com.is_datetime64_dtype(result) and box:
     88                 result = DatetimeIndex(result, tz='utc' if utc else None)

/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pandas/tslib.so in pandas.tslib.array_to_datetime (pandas/tslib.c:14701)()

/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/dateutil/parser.pyc in parse(timestr, parserinfo, **kwargs)
    746         return parser(parserinfo).parse(timestr, **kwargs)
    747     else:
--> 748         return DEFAULTPARSER.parse(timestr, **kwargs)
    749 
    750 

/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/dateutil/parser.pyc in parse(self, timestr, default, ignoretz, tzinfos, **kwargs)
    308 
    309 
--> 310         res, skipped_tokens = self._parse(timestr, **kwargs)
    311 
    312         if res is None:

/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/dateutil/parser.pyc in _parse(self, timestr, dayfirst, yearfirst, fuzzy, fuzzy_with_tokens)
    364             yearfirst = info.yearfirst
    365         res = self._result()
--> 366         l = _timelex.split(timestr)
    367 
    368 

/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/dateutil/parser.pyc in split(cls, s)
    148 
    149     def split(cls, s):
--> 150         return list(cls(s))
    151     split = classmethod(split)
    152 

/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/dateutil/parser.pyc in next(self)
    145 
    146     def next(self):
--> 147         return self.__next__()  # Python 2.x support
    148 
    149     def split(cls, s):

/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/dateutil/parser.pyc in __next__(self)
    139 
    140     def __next__(self):
--> 141         token = self.get_token()
    142         if token is None:
    143             raise StopIteration

/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/dateutil/parser.pyc in get_token(self)
     81                     state = 'a'
     82                 elif nextchar in numchars:
---> 83                     state = '0'
     84                 elif nextchar in whitespace:
     85                     token = ' '

KeyboardInterrupt: 
 al
loading 2011 al

In [52]:
test = sql.read_frame('SELECT * from epa', conn)
test.head()


Out[52]:
&ltclass 'pandas.core.frame.DataFrame'>
Int64Index: 5 entries, 0 to 4
Data columns (total 10 columns):
STATE                   5  non-null values
FACILITY_NAME           5  non-null values
ORISPL_CODE             5  non-null values
UNITID                  5  non-null values
OP_DATE                 5  non-null values
OP_HOUR                 5  non-null values
OP_TIME                 5  non-null values
GLOAD_(MW)              0  non-null values
CO2_MASS_(tons)         0  non-null values
CO2_MASS_MEASURE_FLG    0  non-null values
dtypes: float64(3), int64(2), object(5)

In [17]:
ba_abbrevs = [r[0] for r in conn.execute('SELECT distinct "BA_ABBREV" FROM facilities')]
print ba_abbrevs
print len(ba_abbrevs)

orispls = [r[0] for r in conn.execute('SELECT distinct "_Facility_ID_(ORISPL)" FROM facilities')]
print orispls
print len(orispls)


[u'SOCO', u'TVA', u'AEC', u'EES', u'NLRK', u'SPA', u'AEPW', u'PLUM', u'PUPP', u'AZPS', u'SRP', u'TEPC', u'DEAA', u'GRIF', u'HGMA', u'WALC', u'CISO', u'TID', u'SMUD', u'IID', u'LDWP', u'PSCO', u'WACM', u'ISNE', u'PJM', u'FPC', u'TAL', u'TEC', u'FPL', u'JEA', u'GVL', u'SEC', u'RC', u'MISO', u'IPCO', u'AVA', u'EEI', u'SECI', u'WKPL', u'KCPL', u'KACY', u'EDE', u'LGEE', u'BREC', u'EKPC', u'CLEC', u'LAFA', u'LEPA', u'DPC', u'AECI', u'MPS', u'MPCO', u'CPLW', u'DUK', u'WAUE', u'NPPD', u'OPPD', u'LES', u'ELE', u'PNM', u'SPS', u'NEVP', u'SPPC', u'NYIS', u'OVEC', u'WFEC', u'GRDA', u'OKGE', u'PACW', u'BPAT', None, u'SCEG', u'SC', u'ERCO', u'PACE', u'PSEI']
76
[56018.0, 880041.0, 880075.0, 3.0, 54216.0, 55409.0, 56.0, 47.0, 55292.0, 55138.0, 7897.0, 26.0, 7.0, 8.0, 10.0, 55411.0, 55241.0, 50245.0, 52140.0, 54096.0, 6002.0, 7063.0, 533.0, 54802.0, 55293.0, 7710.0, 7698.0, 55440.0, 55271.0, 7721.0, 50730.0, 7697.0, 50.0, 202.0, 167.0, 56505.0, 55340.0, 6138.0, 7825.0, 168.0, 56328.0, 169.0, 55418.0, 6641.0, 56564.0, 170.0, 55714.0, 203.0, 55221.0, 55075.0, 56456.0, 173.0, 201.0, 55380.0, 6009.0, 118.0, 117.0, 141.0, 160.0, 55282.0, 56482.0, 113.0, 56948.0, 6177.0, 124.0, 55129.0, 55306.0, 55124.0, 126.0, 147.0, 55481.0, 4941.0, 55372.0, 116.0, 55455.0, 8068.0, 55177.0, 8223.0, 55522.0, 120.0, 315.0, 335.0, 356.0, 55951.0, 7315.0, 7693.0, 56801.0, 56474.0, 55295.0, 420.0, 57482.0, 302.0, 55510.0, 55513.0, 55508.0, 55499.0, 10034.0, 55112.0, 57027.0, 7527.0, 10169.0, 56475.0, 55540.0, 50131.0, 56532.0, 228.0, 329.0, 55970.0, 55625.0, 55512.0, 54238.0, 58122.0, 55333.0, 56026.0, 310.0, 57001.0, 389.0, 330.0, 55400.0, 55538.0, 331.0, 55847.0, 10156.0, 56476.0, 55810.0, 10294.0, 422.0, 55627.0, 56472.0, 377.0, 10350.0, 55698.0, 399.0, 400.0, 55807.0, 55518.0, 246.0, 55541.0, 55853.0, 57074.0, 57073.0, 57075.0, 56239.0, 10405.0, 55151.0, 7987.0, 55626.0, 55542.0, 57978.0, 341.0, 55748.0, 55217.0, 56046.0, 56041.0, 345.0, 57483.0, 57267.0, 56471.0, 55393.0, 56473.0, 56232.0, 259.0, 260.0, 358.0, 7449.0, 56569.0, 10812.0, 6013.0, 56914.0, 350.0, 55345.0, 55985.0, 56803.0, 55656.0, 57555.0, 271.0, 273.0, 7307.0, 56135.0, 56143.0, 55963.0, 56298.0, 56467.0, 7551.0, 7552.0, 50865.0, 50864.0, 404.0, 56639.0, 55182.0, 55933.0, 408.0, 57515.0, 56078.0, 55875.0, 55855.0, 7266.0, 10349.0, 465.0, 55200.0, 55645.0, 10682.0, 468.0, 469.0, 470.0, 6021.0, 6112.0, 55453.0, 55505.0, 55283.0, 525.0, 50707.0, 508.0, 55504.0, 55127.0, 492.0, 527.0, 6248.0, 56998.0, 6761.0, 8219.0, 55835.0, 56445.0, 477.0, 55207.0, 478.0, 10675.0, 6635.0, 10567.0, 540.0, 55042.0, 568.0, 50498.0, 542.0, 544.0, 561.0, 54657.0, 56798.0, 55149.0, 562.0, 55126.0, 546.0, 6156.0, 548.0, 581.0, 54236.0, 54605.0, 50736.0, 563.0, 565.0, 557.0, 55517.0, 56629.0, 56189.0, 603.0, 880004.0, 591.0, 592.0, 52193.0, 593.0, 57349.0, 7153.0, 594.0, 599.0, 10030.0, 7318.0, 7962.0, 597.0, 8048.0, 688.0, 55833.0, 624.0, 627.0, 7873.0, 645.0, 7846.0, 676.0, 7238.0, 609.0, 10672.0, 675.0, 641.0, 628.0, 564.0, 610.0, 6046.0, 663.0, 55422.0, 10333.0, 612.0, 629.0, 57241.0, 56799.0, 50949.0, 630.0, 7302.0, 55318.0, 683.0, 50976.0, 8049.0, 666.0, 664.0, 643.0, 613.0, 6042.0, 6043.0, 7380.0, 54426.0, 667.0, 55286.0, 54365.0, 54466.0, 55192.0, 55412.0, 634.0, 7242.0, 617.0, 6246.0, 54658.0, 54423.0, 54424.0, 7254.0, 54529.0, 619.0, 672.0, 689.0, 620.0, 55242.0, 642.0, 136.0, 55414.0, 207.0, 55821.0, 6584.0, 638.0, 7699.0, 673.0, 56400.0, 621.0, 7345.0, 55415.0, 693.0, 56407.0, 55672.0, 6258.0, 55304.0, 703.0, 7917.0, 7765.0, 55244.0, 55267.0, 55406.0, 708.0, 709.0, 70454.0, 55141.0, 710.0, 733.0, 7764.0, 6124.0, 56150.0, 715.0, 55040.0, 727.0, 7348.0, 56015.0, 6257.0, 7813.0, 7829.0, 7768.0, 7916.0, 55061.0, 55382.0, 7968.0, 55128.0, 6052.0, 7946.0, 55965.0, 55332.0, 728.0, 1122.0, 1104.0, 1105.0, 6463.0, 1046.0, 1217.0, 6063.0, 8031.0, 56013.0, 1218.0, 1091.0, 7343.0, 7985.0, 7137.0, 1047.0, 7155.0, 6664.0, 1068.0, 1048.0, 1167.0, 6254.0, 1175.0, 7145.0, 1073.0, 1081.0, 1058.0, 1131.0, 1206.0, 1077.0, 8029.0, 1082.0, 55733.0, 7953.0, 57028.0, 7456.0, 55179.0, 7818.0, 10865.0, 10866.0, 55279.0, 880086.0, 889.0, 55296.0, 861.0, 55188.0, 867.0, 55253.0, 55245.0, 963.0, 6016.0, 55236.0, 856.0, 55438.0, 55199.0, 50627.0, 8016.0, 886.0, 880089.0, 7842.0, 55174.0, 55201.0, 55496.0, 862.0, 891.0, 892.0, 55334.0, 863.0, 54556.0, 7425.0, 384.0, 874.0, 887.0, 55131.0, 876.0, 55204.0, 55640.0, 880076.0, 55222.0, 7858.0, 880088.0, 976.0, 864.0, 55216.0, 55238.0, 55936.0, 6017.0, 55202.0, 879.0, 55856.0, 55417.0, 55109.0, 55237.0, 55281.0, 10867.0, 7760.0, 55250.0, 913.0, 897.0, 883.0, 884.0, 898.0, 880067.0, 55392.0, 6137.0, 6705.0, 7336.0, 10474.0, 52130.0, 995.0, 1011.0, 992.0, 1001.0, 983.0, 1002.0, 996.0, 1004.0, 1012.0, 1043.0, 7759.0, 6113.0, 7763.0, 7948.0, 991.0, 990.0, 994.0, 55502.0, 6213.0, 997.0, 55229.0, 880087.0, 1007.0, 55096.0, 50240.0, 1008.0, 6085.0, 7335.0, 6166.0, 981.0, 55364.0, 988.0, 50733.0, 55111.0, 1010.0, 55224.0, 1040.0, 55259.0, 55148.0, None, 1268.0, 1230.0, 8037.0, 1271.0, 7013.0, 56502.0, 1233.0, 1336.0, 1240.0, 1235.0, 108.0, 1248.0, 6068.0, 1241.0, 1250.0, 1305.0, 7515.0, 1242.0, 6064.0, 1243.0, 7928.0, 1295.0, 1239.0, 1252.0, 7929.0, 1353.0, 55164.0, 1363.0, 1381.0, 6823.0, 55308.0, 1355.0, 6018.0, 1374.0, 1356.0, 1357.0, 6041.0, 1382.0, 1384.0, 55232.0, 1364.0, 1366.0, 56556.0, 1378.0, 6639.0, 55198.0, 1383.0, 1379.0, 54.0, 6071.0, 1361.0, 880065.0, 1385.0, 55173.0, 1416.0, 55433.0, 1464.0, 6055.0, 6190.0, 55165.0, 55404.0, 1396.0, 6558.0, 1443.0, 51.0, 56283.0, 1439.0, 1417.0, 1402.0, 1391.0, 1409.0, 1449.0, 1450.0, 50030.0, 1403.0, 55467.0, 55620.0, 55419.0, 55117.0, 1393.0, 1404.0, 56108.0, 55089.0, 1400.0, 8056.0, 1394.0, 55211.0, 55212.0, 10307.0, 55041.0, 1594.0, 1619.0, 1599.0, 1682.0, 52026.0, 10823.0, 55026.0, 1631.0, 55317.0, 1586.0, 10029.0, 1595.0, 880023.0, 54586.0, 10802.0, 10726.0, 54907.0, 1592.0, 54805.0, 55079.0, 1606.0, 1588.0, 1589.0, 50002.0, 1660.0, 1626.0, 1613.0, 10176.0, 6081.0, 1678.0, 1642.0, 1643.0, 10678.0, 602.0, 1552.0, 1571.0, 1572.0, 1553.0, 1554.0, 50282.0, 1573.0, 54832.0, 1556.0, 1570.0, 1559.0, 7835.0, 10485.0, 1564.0, 1560.0, 55031.0, 50243.0, 55068.0, 55100.0, 55294.0, 1507.0, 7258.0, 1695.0, 6034.0, 54415.0, 1726.0, 55718.0, 10111.0, 1702.0, 55088.0, 1728.0, 1831.0, 4259.0, 1832.0, 54751.0, 10698.0, 10822.0, 6035.0, 1730.0, 1731.0, 1825.0, 1720.0, 1710.0, 1723.0, 55270.0, 1830.0, 55101.0, 7984.0, 58427.0, 55102.0, 880029.0, 1732.0, 54915.0, 10745.0, 1822.0, 1733.0, 55297.0, 55799.0, 1769.0, 55402.0, 1740.0, 1843.0, 1743.0, 7972.0, 10328.0, 50835.0, 880031.0, 1719.0, 1745.0, 880045.0, 1866.0, 55087.0, 1915.0, 1904.0, 8027.0, 1893.0, 2038.0, 56241.0, 6058.0, 55010.0, 2039.0, 56164.0, 55867.0, 1888.0, 1897.0, 1912.0, 1943.0, 6358.0, 1913.0, 7925.0, 1891.0, 56104.0, 2042.0, 7844.0, 2001.0, 1961.0, 10849.0, 7843.0, 1927.0, 6741.0, 6090.0, 2008.0, 7947.0, 6824.0, 10075.0, 2076.0, 55234.0, 2132.0, 2169.0, 2122.0, 2123.0, 55447.0, 55178.0, 6223.0, 7749.0, 2082.0, 6074.0, 2079.0, 2131.0, 7848.0, 2102.0, 6065.0, 2161.0, 6195.0, 2103.0, 2098.0, 7903.0, 2104.0, 6650.0, 6651.0, 2080.0, 6652.0, 2167.0, 7754.0, 2081.0, 7964.0, 2092.0, 6155.0, 2094.0, 6768.0, 2107.0, 56151.0, 7604.0, 7296.0, 2168.0, 2096.0, 55220.0, 55063.0, 2050.0, 55197.0, 2047.0, 55706.0, 55395.0, 6073.0, 57037.0, 2051.0, 8054.0, 55218.0, 7960.0, 55451.0, 2070.0, 55694.0, 6061.0, 55076.0, 2053.0, 7988.0, 55269.0, 2048.0, 7989.0, 2049.0, 6076.0, 56606.0, 2176.0, 55749.0, 57480.0, 2187.0, 6089.0, 56908.0, 2706.0, 8042.0, 2707.0, 50244.0, 2720.0, 1016.0, 10379.0, 10378.0, 2708.0, 57029.0, 2721.0, 10381.0, 10525.0, 2723.0, 50189.0, 10384.0, 10380.0, 2718.0, 2709.0, 50254.0, 2713.0, 7277.0, 10382.0, 2727.0, 6250.0, 56249.0, 56292.0, 7826.0, 7805.0, 2732.0, 55116.0, 50555.0, 2712.0, 54276.0, 2716.0, 54035.0, 54755.0, 50188.0, 6469.0, 6030.0, 8222.0, 2817.0, 57943.0, 2823.0, 57881.0, 2790.0, 2824.0, 8000.0, 2241.0, 2226.0, 55972.0, 6077.0, 60.0, 2265.0, 2266.0, 2250.0, 2290.0, 2240.0, 2271.0, 6096.0, 2291.0, 59.0, 6373.0, 2292.0, 2277.0, 7887.0, 58054.0, 55661.0, 55170.0, 2364.0, 8002.0, 2367.0, 2378.0, 56964.0, 50497.0, 2398.0, 2399.0, 10751.0, 2379.0, 10566.0, 2380.0, 5083.0, 2384.0, 10805.0, 50799.0, 50561.0, 2400.0, 50852.0, 54640.0, 55938.0, 2401.0, 7138.0, 2393.0, 2434.0, 2403.0, 2404.0, 50006.0, 2406.0, 10043.0, 2408.0, 8008.0, 2382.0, 2383.0, 2409.0, 50385.0, 10308.0, 50628.0, 10099.0, 55239.0, 2410.0, 2390.0, 2411.0, 7288.0, 55113.0, 2385.0, 56963.0, 6776.0, 55210.0, 55977.0, 2454.0, 87.0, 2442.0, 56458.0, 7967.0, 55343.0, 2446.0, 54814.0, 55039.0, 7975.0, 2450.0, 2444.0, 2451.0, 55802.0, 55514.0, 55322.0, 2322.0, 55077.0, 2330.0, 7082.0, 10761.0, 2341.0, 8224.0, 2324.0, 55841.0, 2326.0, 56224.0, 2336.0, 55494.0, 55687.0, 7910.0, 2503.0, 2504.0, 10803.0, 10619.0, 2490.0, 55375.0, 55243.0, 8906.0, 55405.0, 54593.0, 55699.0, 2539.0, 50292.0, 55600.0, 10464.0, 2625.0, 7912.0, 54914.0, 8006.0, 56234.0, 50978.0, 10620.0, 10190.0, 2535.0, 2491.0, 50368.0, 50472.0, 2480.0, 2511.0, 2512.0, 2493.0, 55786.0, 56259.0, 56032.0, 2513.0, 54131.0, 2679.0, 2527.0, 2526.0, 2514.0, 7869.0, 2494.0, 7914.0, 55969.0, 7913.0, 2628.0, 880043.0, 8007.0, 2496.0, 2549.0, 50458.0, 54076.0, 50450.0, 50449.0, 50451.0, 54547.0, 54114.0, 880044.0, 10617.0, 10621.0, 880052.0, 54041.0, 54592.0, 880024.0, 2554.0, 2499.0, 52056.0, 50202.0, 54149.0, 7915.0, 2516.0, 2594.0, 56188.0, 56196.0, 2517.0, 8053.0, 54425.0, 10025.0, 2500.0, 880100.0, 54034.0, 7314.0, 52168.0, 2682.0, 54574.0, 10725.0, 2632.0, 55787.0, 6082.0, 50744.0, 50651.0, 54099.0, 7909.0, 7146.0, 2521.0, 880042.0, 55262.0, 55263.0, 55264.0, 10398.0, 54207.0, 2835.0, 2836.0, 880030.0, 2878.0, 2828.0, 880039.0, 2840.0, 55247.0, 2831.0, 55350.0, 55736.0, 55397.0, 2837.0, 880033.0, 2847.0, 55701.0, 8102.0, 55228.0, 2917.0, 2850.0, 6031.0, 2876.0, 2838.0, 880083.0, 2860.0, 55110.0, 2832.0, 2872.0, 2861.0, 2848.0, 7783.0, 7782.0, 10244.0, 2843.0, 880028.0, 2864.0, 7253.0, 2880.0, 7872.0, 55401.0, 50247.0, 55248.0, 50044.0, 55348.0, 2866.0, 6019.0, 2830.0, 55503.0, 2869.0, 7158.0, 3006.0, 7757.0, 8059.0, 165.0, 55146.0, 2951.0, 6772.0, 55457.0, 3008.0, 2952.0, 2953.0, 2963.0, 55225.0, 762.0, 55463.0, 4940.0, 2956.0, 6095.0, 2964.0, 55651.0, 55501.0, 2965.0, 6106.0, 7350.0, 54761.0, 55328.0, 55103.0, 55544.0, 56227.0, 10676.0, 55196.0, 55710.0, 55377.0, 880071.0, 55347.0, 3178.0, 880049.0, 55690.0, 6094.0, 3140.0, 3096.0, 10641.0, 55654.0, 8226.0, 10143.0, 3118.0, 3159.0, 8012.0, 54638.0, 55516.0, 10603.0, 3161.0, 3098.0, 880072.0, 50074.0, 55801.0, 55298.0, 7701.0, 50130.0, 10113.0, 54785.0, 55233.0, 3179.0, 10870.0, 3122.0, 880007.0, 3176.0, 56397.0, 55976.0, 3136.0, 50410.0, 55231.0, 55667.0, 3148.0, 52149.0, 3181.0, 3149.0, 3111.0, 10343.0, 880009.0, 3138.0, 54571.0, 50888.0, 50039.0, 55193.0, 50397.0, 50279.0, 55337.0, 50776.0, 52106.0, 54144.0, 3113.0, 50463.0, 3168.0, 3169.0, 50974.0, 3130.0, 3131.0, 880050.0, 54634.0, 3152.0, 3115.0, 3116.0, 880025.0, 50729.0, 50732.0, 880099.0, 880006.0, 50607.0, 50611.0, 3132.0, 50879.0, 55524.0, 55107.0, 3236.0, 51030.0, 54324.0, 54056.0, 55048.0, 2440.0, 880092.0, 55166.0, 3280.0, 55043.0, 7737.0, 55386.0, 7210.0, 130.0, 3250.0, 3317.0, 3251.0, 3285.0, 3318.0, 880057.0, 52151.0, 55927.0, 3319.0, 7834.0, 880074.0, 3287.0, 7981.0, 3320.0, 50806.0, 880078.0, 3295.0, 3264.0, 3297.0, 3298.0, 6249.0, 3338.0, 7237.0, 6098.0, 56610.0, 56238.0, 3344.0, 55478.0, 3393.0, 50956.0, 55081.0, 3396.0, 10729.0, 3399.0, 880001.0, 10797.0, 50481.0, 3403.0, 55251.0, 3405.0, 3406.0, 3407.0, 7845.0, 55703.0, 50296.0, 880079.0, 10670.0, 3602.0, 4939.0, 55168.0, 10298.0, 55327.0, 3497.0, 55064.0, 55172.0, 55357.0, 3574.0, 52176.0, 7762.0, 3460.0, 56806.0, 55299.0, 55187.0, 10741.0, 6178.0, 56350.0, 9.0, 55206.0, 55358.0, 3548.0, 8063.0, 55464.0, 55176.0, 55223.0, 55365.0, 50625.0, 55480.0, 56152.0, 55226.0, 55098.0, 6136.0, 3490.0, 3464.0, 55086.0, 55153.0, 7902.0, 3491.0, 56604.0, 6193.0, 55664.0, 55144.0, 7097.0, 3604.0, 6181.0, 55230.0, 54817.0, 3482.0, 3476.0, 3502.0, 3452.0, 55097.0, 3439.0, 3609.0, 3457.0, 298.0, 3477.0, 55154.0, 55123.0, 6146.0, 55091.0, 6147.0, 3483.0, 3492.0, 3453.0, 55065.0, 56326.0, 55122.0, 55708.0, 50137.0, 3456.0, 3484.0, 3441.0, 3611.0, 6180.0, 55215.0, 127.0, 50815.0, 7030.0, 54676.0, 7678.0, 58005.0, 58001.0, 50109.0, 55047.0, 3494.0, 3485.0, 54748.0, 4195.0, 56349.0, 3628.0, 3576.0, 55137.0, 6243.0, 55120.0, 3459.0, 55104.0, 3468.0, 3631.0, 6179.0, 56603.0, 7325.0, 6183.0, 7900.0, 6648.0, 52071.0, 56611.0, 3559.0, 3601.0, 55470.0, 4266.0, 3504.0, 55015.0, 50615.0, 4937.0, 3469.0, 55062.0, 55132.0, 52088.0, 6194.0, 3506.0, 3507.0, 3612.0, 3508.0, 3443.0, 3470.0, 3613.0, 6139.0, 3478.0, 56674.0, 55320.0, 55139.0, 7790.0, 3644.0, 56102.0, 3648.0, 6165.0, 8069.0, 6481.0, 56237.0, 56253.0, 56177.0, 55622.0, 10773.0, 56807.0, 50966.0, 54304.0, 3796.0, 55738.0, 52089.0, 3803.0, 3797.0, 3775.0, 7213.0, 10377.0, 10071.0, 55381.0, 7212.0, 52019.0, 52087.0, 50479.0, 3776.0, 54844.0, 7032.0, 880093.0, 10633.0, 10771.0, 52152.0, 7839.0, 7837.0, 7836.0, 50900.0, 52007.0, 3804.0, 3788.0, 7838.0, 10017.0, 10774.0, 54081.0, 3785.0, 55439.0, 56808.0, 55939.0, 55285.0, 3809.0, 3734.0, 589.0, 3754.0, 51026.0, 3845.0, 55662.0, 7870.0, 54537.0, 55818.0, 607.0, 55482.0, 7999.0, 55700.0, 7605.0, 54476.0, 4140.0, 3982.0, 3992.0, 8023.0, 55558.0, 7159.0, 4146.0, 55029.0, 4050.0, 7863.0, 56068.0, 3991.0, 56031.0, 4005.0, 4143.0, 6253.0, 55836.0, 4271.0, 4125.0, 56480.0, 55135.0, 4054.0, 7270.0, 6170.0, 4040.0, 4072.0, 55641.0, 4057.0, 55391.0, 58124.0, 56166.0, 4059.0, 7203.0, 4041.0, 4042.0, 7991.0, 4076.0, 4078.0, 4014.0, 55011.0, 3942.0, 54344.0, 50491.0, 880053.0, 55284.0, 55276.0, 10788.0, 3943.0, 10151.0, 3944.0, 3935.0, 3947.0, 3936.0, 56671.0, 3948.0, 10743.0, 3954.0, 6264.0, 7537.0, 3938.0, 55349.0, 6004.0, 3945.0, 50151.0, 3946.0, 57703.0, 4158.0, 56609.0, 8066.0, 6204.0, 4162.0, 7504.0, 55477.0, 55479.0, 56319.0, 56596.0, 6101.0]
1585

In [25]:
conn.execute('CREATE INDEX idx_date3 ON epa (ORISPL_CODE, OP_DATE, OP_HOUR, "GLOAD_(MW)", "CO2_MASS_(tons)")')


Out[25]:
<sqlite3.Cursor at 0x107ca0730>

In [22]:
conn.execute('CREATE INDEX idx_fac ON facilities (BA_ABBREV, "_Facility_ID_(ORISPL)")')


Out[22]:
<sqlite3.Cursor at 0x107ca05e0>

In [28]:
import time


for ba in ['SMUD']:
    if ba is not None:
        queries = ['SELECT facilities.BA_ABBREV, epa.OP_DATE, epa.OP_HOUR,',
                   'total(epa."GLOAD_(MW)") as RF_MW,',
                   'count(epa."GLOAD_(MW)") as N_UNITS_ON,',
                   'total(epa."CO2_MASS_(tons)")*2000 as RE_lb',
                   'FROM facilities JOIN epa',
                   'ON epa.ORISPL_CODE = facilities."_Facility_ID_(ORISPL)"',
                   'WHERE facilities.BA_ABBREV="%s"' % ba,
                   'GROUP BY epa.OP_DATE, epa.OP_HOUR'
                #   'GROUP BY facilities.BA_ABBREV, epa.OP_DATE, epa.OP_HOUR'
                   #'LIMIT 10'
                 ]
        query = ' '.join(queries)

        t1 = time.time()
        df = sql.read_frame(query, conn)
        t2 = time.time()
        print ba
        print t2-t1
        df.to_csv('data/original/%s_aggregate_raw.csv' % ba, quoting=csv.QUOTE_NONNUMERIC)


SMUD
15.3507540226

In [ ]:
queries = ['SELECT facilities.BA_ABBREV, epa.OP_DATE, epa.OP_HOUR,',
                   'total(epa."GLOAD_(MW)") as RF_MW,',
                   'count(epa."GLOAD_(MW)") as N_UNITS_ON,',
                   'total(epa."CO2_MASS_(tons)")*2000 as RE_lb',
                   'FROM facilities JOIN epa',
                   'ON epa.ORISPL_CODE = facilities."_Facility_ID_(ORISPL)"',
                   'WHERE facilities.BA_ABBREV="%s"' % 'PJM',
                   'GROUP BY epa.OP_DATE, epa.OP_HOUR'
                #   'GROUP BY facilities.BA_ABBREV, epa.OP_DATE, epa.OP_HOUR'
                   #'LIMIT 10'
                 ]
query = ' '.join(queries)

df = sql.read_frame(query, conn)