In [1]:
import os
import h5py
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import pvlib
import seaborn as sns
import statsmodels.api as sm
from pvsc44_clearsky_aod import ecmwf_macc_tools
%matplotlib inline
sns.set_context('notebook', rc={'figure.figsize': (8, 6)})
sns.set(font_scale=1.5)
# get the "metadata" that contains the station id codes for the SURFRAD data that was analyzed
METADATA = pd.read_csv('metadata.csv', index_col=0)
# load calculations for each station
atm_params_3min_clear = {}
for station_id in METADATA.index:
with h5py.File('%s_3min_clear_atm_params.h5' % station_id, 'r') as f:
np_atm_params_3min_clear = pd.DataFrame(np.array(f['data']))
np_atm_params_3min_clear['index'] = pd.DatetimeIndex(np_atm_params_3min_clear['index'])
np_atm_params_3min_clear.set_index('index', inplace=True)
np_atm_params_3min_clear.index.rename('timestamps', inplace=True)
atm_params_3min_clear[station_id] = np_atm_params_3min_clear
# filter out low light
# CONSTANTS
MODELS = {'solis': 'SOLIS', 'lt': 'Linke', 'macc': 'ECMWF-MACC', 'bird': 'Bird'}
CS = ['dni', 'dhi', 'ghi']
LOW_LIGHT = 200 # threshold for low light in W/m^2
is_bright = {}
for station_id, station_atm_params_3min_clear in atm_params_3min_clear.iteritems():
is_bright[station_id] = station_atm_params_3min_clear['ghi'] > LOW_LIGHT
TIMES = pd.DatetimeIndex(start='2003-01-01T00:00:00', freq='3H', end='2012-12-31T23:59:59').tz_localize('UTC')
LT = pvlib.clearsky.lookup_linke_turbidity(
time=TIMES,
latitude=METADATA['latitude'][station_id],
longitude=METADATA['longitude'][station_id],)
In [2]:
print METADATA
In [24]:
def change_lt(df_in, value):
df = df_in.copy(deep=True)
df = df[df['ghi']>200]
df['lt_ref'] = df['lt']
df['lt'] = value
# solarposition, airmass, pressure-corrected & water vapor
SOLPOS = pvlib.solarposition.get_solarposition(
time=df.index,
latitude=METADATA['latitude'][station_id],
longitude=METADATA['longitude'][station_id],
altitude=METADATA['elevation'][station_id],
pressure=df['press'],
temperature=df['ta']
)
AIRMASS = pvlib.atmosphere.relativeairmass(SOLPOS.apparent_zenith) # relative airmass
AM_PRESS = pvlib.atmosphere.absoluteairmass(AIRMASS, pressure=df['press']) # pressure corrected airmass
ETR = pvlib.irradiance.extraradiation(df.index) # extra-terrestrial total solar radiation
# append these to the SOLPOS dataframe to keep them together more easily
df['apparent_zenith'] = SOLPOS.apparent_zenith
df['am']= AIRMASS
df['amp']= AM_PRESS
df['etr']= ETR
INEICHEN_LT = pvlib.clearsky.ineichen(
df['apparent_zenith'], df['amp'], df['lt'], altitude=METADATA['elevation'][station_id],
dni_extra=df['etr']
)
df['lt_dhi']= INEICHEN_LT['dhi']
df['lt_dni']= INEICHEN_LT['dni']
df['lt_ghi']= INEICHEN_LT['ghi']
return df
def get_monthly_mbe(df):
model = 'lt'
output = pd.DataFrame()
for cs in CS:
monthly_avg = df[cs].resample('M').mean()
output['mbe_'+ cs] = (df['%s_%s' % (model, cs)] - df[cs]).resample('M').mean() / monthly_avg
output['month'] = output.index.month
return output
def find_best_lt(df_in, month, lt_start):
df = df_in.copy(deep=True)
df = df[df.index.month==month]
median_error =10
lt = lt_start
amount = 1.0
df = change_lt(df, lt_start)
mbe = get_monthly_mbe(df)
median_error = mbe['mbe_ghi'].median()
direction = median_error/abs(median_error)
last_direction = median_error/abs(median_error)
while abs(median_error) > 0.01:
direction = median_error/abs(median_error)
if last_direction != direction:
amount *= 0.5
last_direction = direction
df = change_lt(df, lt)
mbe = get_monthly_mbe(df)
median_error = mbe['mbe_ghi'].median()
if median_error > 0:
lt += amount
else:
lt -= amount
#rint direction, median_error, lt, amount
#print i
return lt
In [25]:
#make a table of
def get_lt(station_id):
TIMES = pd.DatetimeIndex(start='2003-01-01T00:00:00', freq='3H', end='2012-12-31T23:59:59').tz_localize('UTC')
LT = pvlib.clearsky.lookup_linke_turbidity(
time=TIMES,
latitude=METADATA['latitude'][station_id],
longitude=METADATA['longitude'][station_id],)
lt_df = pd.DataFrame(index=LT.index, data=LT, columns=['lt'])
lt_df['month'] = lt_df.index.month
lt_m = lt_df.groupby("month").median()
return lt_m
In [ ]:
summary=pd.DataFrame()
for station in METADATA.index:
print station
for month in range(1,13):
lt_m = get_lt(station)
lt_ref = lt_m.loc[month,'lt']
lt_opt = find_best_lt(atm_params_3min_clear[station], month, lt_ref)
df = pd.DataFrame(index=[month])
df['station'] = station
df['lt_opt'] = lt_opt
df['lt_ref'] = lt_ref
df['month'] = month
print station, month, 'lt_ref', lt_ref, 'lt_opt', lt_opt
summary = summary.append(df, ignore_index=True)
summary.to_csv('summary.csv')
In [ ]:
In [ ]: