In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
pd.options.display.max_rows = 999
pd.options.display.max_columns = 999
from __future__ import division
In [2]:
df = pd.read_csv('data/colorado20150701.txt')
df
Out[2]:
In [3]:
#plt.plot(df['Avg Global PAR [umol/s/m^2]'].values)
In [4]:
col_0701_df = pd.read_csv('data/colorado20150701.txt')
def to_str(x):
if x < 10:
return '0' + x.astype(str)
else:
return x.astype(str)
col_0701_df['datetime'] = pd.to_datetime(col_0701_df['DATE (MM/DD/YYYY)']+' '+col_0701_df['HOUR-MST'].map(to_str))
intensities = np.insert(col_0701_df['Avg Global PSP (vent/cor) [W/m^2]'].values,0,0)
In [5]:
spectrometer = col_0701_df[col_0701_df.index==10].values.reshape(43,)[7:12]
In [6]:
wavelengths = np.array([400, 500, 675, 940, 1020])
In [7]:
plt.plot(wavelengths,spectrometer)
Out[7]:
In [8]:
from scipy import interpolate
x = wavelengths
y = spectrometer
f = interpolate.interp1d(x, y)
wav_new = np.arange(400,1020, 0.5) #300 nm to 1180 nm with 0.5 nm spacing
spec_new = f(wav_new) #recreate AM1.5 with 0.5 nm spacing
plt.plot(x, y, 'o', wav_new, spec_new, '-')
plt.show()
In [9]:
sum(wav_new*spec_new)*.5/1240/100
Out[9]:
In [10]:
import mechanize
import cookielib
import os
PVOUTPUT_USERNAME = os.environ['PVOUTPUT_USERNAME']
PVOUTPUT_PASSWORD = os.environ['PVOUTPUT_PASSWORD']
PVOUTPUT_API = os.environ['PVOUTPUT_API']
#web scraping
cj = cookielib.CookieJar() #saves the cookie?
br = mechanize.Browser() #instansiates a browser
br.set_handle_robots(False) #I am not a robot...
br.set_cookiejar(cj) #use cookie?
br.open("http://pvoutput.org/") #open login page
br.select_form(nr=0) #select login form
br.form['login'] = PVOUTPUT_USERNAME #login using saved env vars
br.form['password'] = PVOUTPUT_PASSWORD #login using saved env vars
br.submit() #press enter
#print br.response().read() #see if it worked
br.open("http://pvoutput.org/intraday.jsp?id=5446&sid=5473&dt=20150701") #open desired page
html = br.response().read() #read desired page
df_html = pd.read_html(html)[0]
#cleaning the data
df_html.columns = pd.MultiIndex.from_arrays(df_html[df_html.index==1].values)
df_html.drop(df_html.index[[0,1]],inplace=True)
df_html['Power'] = df_html['Power'].str.replace(',','')
df_html['Power'] = df_html['Power'].map(lambda x: x.rstrip('W'))
def make_a_no(x):
try:
return int(x)
except:
x = 0
return x
df_html['Power'] = df_html['Power'].map(make_a_no)
df_html['datetime'] = pd.to_datetime(df_html['Date'] + ' ' + df_html['Time'], unit='h')
df_html.set_index(['datetime'],inplace=True)
df_html.head()
Out[10]:
In [11]:
powers = df_html.resample('H', how='mean')['Power'].values
In [12]:
plt.plot(intensities,powers,'ro') #output powers vs. intensities
Out[12]:
In [13]:
import statsmodels.api as sm
# Fit and summarize OLS model
mod = sm.OLS(powers,intensities)
res = mod.fit()
print res.summary()
In [14]:
fig, ax = plt.subplots()
fig = sm.graphics.plot_fit(res, 0, ax=ax)
In [15]:
col_0702_df = pd.read_csv('data/colorado20150702.txt')
def to_str(x):
if x < 10:
return '0' + x.astype(str)
else:
return x.astype(str)
col_0702_df['datetime'] = pd.to_datetime(col_0702_df['DATE (MM/DD/YYYY)']+' '+col_0702_df['HOUR-MST'].map(to_str))
intensities = np.insert(col_0702_df['Avg Global PSP (vent/cor) [W/m^2]'].values,0,0)
In [16]:
import mechanize
import cookielib
import os
PVOUTPUT_USERNAME = os.environ['PVOUTPUT_USERNAME']
PVOUTPUT_PASSWORD = os.environ['PVOUTPUT_PASSWORD']
PVOUTPUT_API = os.environ['PVOUTPUT_API']
#web scraping
cj = cookielib.CookieJar() #saves the cookie?
br = mechanize.Browser() #instansiates a browser
br.set_handle_robots(False) #I am not a robot...
br.set_cookiejar(cj) #use cookie?
br.open("http://pvoutput.org/") #open login page
br.select_form(nr=0) #select login form
br.form['login'] = PVOUTPUT_USERNAME #login using saved env vars
br.form['password'] = PVOUTPUT_PASSWORD #login using saved env vars
br.submit() #press enter
#print br.response().read() #see if it worked
br.open("http://pvoutput.org/intraday.jsp?id=5446&sid=5473&dt=20150702") #open desired page
html = br.response().read() #read desired page
df_html = pd.read_html(html)[0]
#cleaning the data
df_html.columns = pd.MultiIndex.from_arrays(df_html[df_html.index==1].values)
df_html.drop(df_html.index[[0,1]],inplace=True)
df_html['Power'] = df_html['Power'].str.replace(',','')
df_html['Power'] = df_html['Power'].map(lambda x: x.rstrip('W'))
def make_a_no(x):
try:
return int(x)
except:
x = 0
return x
df_html['Power'] = df_html['Power'].map(make_a_no)
df_html['datetime'] = pd.to_datetime(df_html['Date'] + ' ' + df_html['Time'], unit='h')
df_html.set_index(['datetime'],inplace=True)
df_html.head()
Out[16]:
In [17]:
powers = df_html.resample('H', how='mean')['Power'].values
In [18]:
powers_pred = res.predict(intensities)
In [19]:
plt.plot(powers,powers_pred, 'ro')
Out[19]:
In [20]:
mod2 = sm.OLS(powers_pred,powers)
res2 = mod2.fit()
print res2.summary()
In [21]:
fig, ax = plt.subplots()
fig = sm.graphics.plot_fit(res2, 0, ax=ax)
In [41]:
import netCDF4
from netCDF4 import Dataset
rootgrp = Dataset("data/satellite/colorado/summer6months/data/", "a", format="NETCDF4")
print "type of data: ", rootgrp.data_model #netcdf3_classic, not netcdf4
myvars = []
for var in rootgrp.variables: #list of variables
myvars.append(var)
print "variables in data: ", myvars
print "latitude of one point; ", rootgrp.variables['lat'][0][0] #verify that one latitude is where we expect
lons = rootgrp.variables['lon'][:]
lats = rootgrp.variables['lat'][:]
data = rootgrp.variables['data'][:] #data is sensor data
rootgrp.close() #need to close before you can open again
from mpl_toolkits.basemap import Basemap #map stuff
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
# Get some parameters for the Stereographic Projection
m = Basemap(width=800000,height=800000,
resolution='l',projection='stere',\
lat_ts=40,lat_0=39.5,lon_0=-104.5)
xi, yi = m(lons, lats) #map onton x and y for plotting
plt.figure(figsize=(10,10)) # Plot Data
cs = m.pcolor(xi,yi,np.squeeze(data)) #data is 1 x 14 x 36, squeeze makes it 14 x 36
m.drawparallels(np.arange(-80., 81., 1.), labels=[1,0,0,0], fontsize=10) # Add Grid Lines
m.drawmeridians(np.arange(-180., 181., 1.), labels=[0,0,0,1], fontsize=10) # Add Grid Lines
m.drawstates(linewidth=3) # Add state boundaries
cbar = m.colorbar(cs, location='bottom', pad="10%") # Add Colorbar
plt.title('GOES 15 - Sensor 1') # Add Title
plt.show()
In [23]:
print data[0].shape
print data[0][0].shape
print data.mean()
In [24]:
import os
import datetime
data_files = os.listdir('data/satellite/colorado/20150701')
list_of_files = []
list_of_files_details = []
for myfile in data_files:
if myfile[-2:] == 'nc':
list_of_files.append(myfile)
list_of_files_details.append(myfile.split('.'))
list_of_dates = []
for i,_ in enumerate(list_of_files_details):
mytime = list_of_files_details[i][1]+" "+list_of_files_details[i][2]+" "+list_of_files_details[i][3]
mydatetime = datetime.datetime.strptime(mytime, '%Y %j %H%M%S')
list_of_dates.append(mydatetime)
In [25]:
channel_list = ['BAND_01', 'BAND_02', 'BAND_03', 'BAND_04', 'BAND_06']
index = np.unique(list_of_dates)
mydata_df = pd.DataFrame(index=index,columns=channel_list)
mydata_df.fillna(0).head()
Out[25]:
In [26]:
for i , myfile in enumerate(list_of_files):
rootgrp = Dataset("data/satellite/colorado/20150701/" + myfile, "a", format="NETCDF4")
lons = rootgrp.variables['lon'][:]
lats = rootgrp.variables['lat'][:]
data = rootgrp.variables['data'][:] #data is sensor data
rootgrp.close() #need to close before you can open again
mytime = list_of_files_details[i][1]+" "+list_of_files_details[i][2]+" "+list_of_files_details[i][3]
mydatetime = datetime.datetime.strptime(mytime, '%Y %j %H%M%S')
mydata_df.loc[mydatetime, list_of_files_details[i][4]] = data.mean()
mydata_df = mydata_df.convert_objects(convert_numeric=True)
mydata_df = mydata_df.resample('H', how='mean')
mydata_df.head()
Out[26]:
In [27]:
X = mydata_df.values
In [28]:
X = X[-19:]
In [ ]:
In [29]:
intensities_2 = intensities[-19:]
In [30]:
from sklearn.linear_model import LinearRegression
lm = LinearRegression()
# Fit and summarize OLS model
sens_mod = lm.fit(X,intensities_2)
In [31]:
import os
import datetime
data_files = os.listdir('data/satellite/colorado/20150702')
list_of_files = []
list_of_files_details = []
for myfile in data_files:
if myfile[-2:] == 'nc':
list_of_files.append(myfile)
list_of_files_details.append(myfile.split('.'))
list_of_dates = []
for i,_ in enumerate(list_of_files_details):
mytime = list_of_files_details[i][1]+" "+list_of_files_details[i][2]+" "+list_of_files_details[i][3]
mydatetime = datetime.datetime.strptime(mytime, '%Y %j %H%M%S')
list_of_dates.append(mydatetime)
channel_list = ['BAND_01', 'BAND_02', 'BAND_03', 'BAND_04', 'BAND_06']
index = np.unique(list_of_dates)
mydata_df = pd.DataFrame(index=index,columns=channel_list)
mydata_df.fillna(0).head()
for i , myfile in enumerate(list_of_files):
rootgrp = Dataset("data/satellite/colorado/20150702/" + myfile, "a", format="NETCDF4")
lons = rootgrp.variables['lon'][:]
lats = rootgrp.variables['lat'][:]
data = rootgrp.variables['data'][:] #data is sensor data
rootgrp.close() #need to close before you can open again
mytime = list_of_files_details[i][1]+" "+list_of_files_details[i][2]+" "+list_of_files_details[i][3]
mydatetime = datetime.datetime.strptime(mytime, '%Y %j %H%M%S')
mydata_df.loc[mydatetime, list_of_files_details[i][4]] = data.mean()
mydata_df = mydata_df.convert_objects(convert_numeric=True)
mydata_df = mydata_df.resample('H', how='mean')
mydata_df
Out[31]:
In [32]:
X_test = mydata_df.values[-19:]
In [ ]:
In [33]:
sens_mod.predict(X_test) #predicted sensor info
Out[33]:
In [34]:
.75*sum(sens_mod.predict(X_test))*1 #coefficient from mod2 above
Out[34]: