In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import os
import glob
import re
import cPickle as pickle
import gzip
import seaborn as sns
In [4]:
path = os.path.join('EIA data', 'Natural gas price', 'N3045TX3m.xls')
ng_price = pd.read_excel(path, sheetname='Data 1', header=2)
In [5]:
ng_price.head()
Out[5]:
In [6]:
ng_price.dtypes
Out[6]:
In [8]:
ng_price.rename(columns={"Texas Natural Gas Price Sold to Electric Power Consumers (Dollars per Thousand Cubic Feet)":
"NG Price ($/mcf)"}, inplace=True)
In [9]:
ng_price.head()
Out[9]:
In [12]:
ng_price.loc[:,'Month'] = ng_price.loc[:,'Date'].apply(lambda x: x.month)
ng_price.loc[:,'Year'] = ng_price.loc[:,'Date'].apply(lambda x: x.year)
In [13]:
ng_price.head()
Out[13]:
In [55]:
ng_price.dtypes
Out[55]:
In [93]:
path = os.path.join('EIA data', 'Coal prices', 'Texas electric sector coal price.xlsx')
coal_temp = pd.read_excel(path, sheetname='Sheet1')
In [94]:
coal_temp.head()
Out[94]:
In [95]:
coal_temp.loc[:,'Quarter'] = coal_temp.loc[:,'Date'].apply(lambda x: int(x[1]))
coal_temp.loc[:,'Year'] = coal_temp.loc[:,'Date'].apply(lambda x: int(x[-4:]))
In [96]:
coal_temp.head()
Out[96]:
In [99]:
for coal in ['All coal', 'Lignite', 'Subbituminous']:
plt.plot(coal_temp.index, coal_temp[coal], label=coal)
plt.legend(loc=0)
Out[99]:
Out[99]:
Out[99]:
Out[99]:
In [102]:
coal_temp[['All coal', 'Lignite', 'Subbituminous']].corr()
Out[102]:
In [103]:
coal_2007 = pd.DataFrame(columns=coal_temp.columns)
In [104]:
coal_2007['Quarter'] = [1,2,3,4]
coal_2007['Year'] = 2007
for coal in ['All coal', 'Lignite', 'Subbituminous']:
coal_2007[coal] = coal_temp.loc[coal_temp['Year']==2008,coal].mean()
In [105]:
coal_2007
Out[105]:
In [106]:
coal_temp = coal_temp.append(coal_2007)
In [107]:
coal_temp.tail()
Out[107]:
In [108]:
df = pd.DataFrame(pd.date_range('2007-1-15', periods=12*9, freq='M'), columns=['tempdate'])
In [109]:
df.head()
Out[109]:
In [110]:
df.loc[:,'Quarter'] = df.loc[:,'tempdate'].apply(lambda x: x.quarter)
df.loc[:,'Month'] = df.loc[:,'tempdate'].apply(lambda x: x.month)
df.loc[:,'Year'] = df.loc[:,'tempdate'].apply(lambda x: x.year)
In [116]:
df.head()
df.tail()
Out[116]:
Out[116]:
In [112]:
coal_price = pd.merge(coal_temp, df, on=['Quarter', 'Year'])
In [113]:
coal_price.head()
Out[113]:
In [114]:
fuel_price = pd.merge(ng_price, coal_price, how='right', on=['Month', 'Year'])
In [115]:
fuel_price.head()
Out[115]:
In [117]:
fuel_price.tail()
Out[117]:
In [118]:
fuel_price.drop(['Date_x', 'Date_y', 'tempdate', 'Quarter'], axis=1, inplace=True)
In [119]:
path = os.path.join('..', 'Clean Data', 'Fuel prices.csv')
fuel_price.to_csv(path)