In [1]:
import os

import h5py
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import pvlib
import seaborn as sns
import statsmodels.api as sm

from pvsc44_clearsky_aod import ecmwf_macc_tools

%matplotlib inline

sns.set_context('notebook', rc={'figure.figsize': (8, 6)})
sns.set(font_scale=1.5)

# get the "metadata" that contains the station id codes for the SURFRAD data that was analyzed
METADATA = pd.read_csv('metadata.csv', index_col=0)


# load calculations for each station
atm_params_3min_clear = {}
for station_id in METADATA.index:
    with h5py.File('%s_3min_clear_atm_params.h5' % station_id, 'r') as f:
        np_atm_params_3min_clear = pd.DataFrame(np.array(f['data']))
    np_atm_params_3min_clear['index'] = pd.DatetimeIndex(np_atm_params_3min_clear['index'])
    np_atm_params_3min_clear.set_index('index', inplace=True)
    np_atm_params_3min_clear.index.rename('timestamps', inplace=True)    
    atm_params_3min_clear[station_id] = np_atm_params_3min_clear
    
    
    
# filter out low light

# CONSTANTS
MODELS = {'solis': 'SOLIS', 'lt': 'Linke', 'macc': 'ECMWF-MACC', 'bird': 'Bird'}
CS = ['dni', 'dhi', 'ghi']
LOW_LIGHT = 200  # threshold for low light in W/m^2

is_bright = {}
for station_id, station_atm_params_3min_clear in atm_params_3min_clear.iteritems():
    is_bright[station_id] = station_atm_params_3min_clear['ghi'] > LOW_LIGHT
    
    
    
TIMES = pd.DatetimeIndex(start='2003-01-01T00:00:00', freq='3H', end='2012-12-31T23:59:59').tz_localize('UTC')
LT = pvlib.clearsky.lookup_linke_turbidity(
    time=TIMES,
    latitude=METADATA['latitude'][station_id],
    longitude=METADATA['longitude'][station_id],)

In [2]:
print METADATA


            utc offset           tz    station name  latitude  longitude  \
station id                                                                 
bon               -6.0   US/Central       Bondville     40.05     -88.37   
tbl               -7.0  US/Mountain  Table Mountain     40.13    -105.24   
dra               -8.0   US/Pacific     Desert Rock     36.62    -116.02   
fpk               -7.0  US/Mountain       Fort Peck     48.31    -105.10   
gwn               -6.0   US/Central   Goodwin Creek     34.25     -89.87   
psu               -5.0   US/Eastern      Penn State     40.72     -77.93   
sxf               -6.0   US/Central     Sioux Falls     43.73     -96.62   

            elevation  
station id             
bon               213  
tbl              1689  
dra              1007  
fpk               634  
gwn                98  
psu               376  
sxf               473  

In [24]:
def change_lt(df_in, value):   
    
    df = df_in.copy(deep=True)
    df = df[df['ghi']>200]
    
    df['lt_ref'] = df['lt']
    df['lt'] = value       
    # solarposition, airmass, pressure-corrected & water vapor
    SOLPOS = pvlib.solarposition.get_solarposition(
        time=df.index,
        latitude=METADATA['latitude'][station_id],
        longitude=METADATA['longitude'][station_id],
        altitude=METADATA['elevation'][station_id],
        pressure=df['press'],
        temperature=df['ta']
    )
    AIRMASS = pvlib.atmosphere.relativeairmass(SOLPOS.apparent_zenith)  # relative airmass
    AM_PRESS = pvlib.atmosphere.absoluteairmass(AIRMASS, pressure=df['press'])  # pressure corrected airmass
    ETR = pvlib.irradiance.extraradiation(df.index)  # extra-terrestrial total solar radiation

    # append these to the SOLPOS dataframe to keep them together more easily
    df['apparent_zenith'] = SOLPOS.apparent_zenith
    df['am']= AIRMASS
    df['amp']= AM_PRESS

    df['etr']= ETR
    
    
    INEICHEN_LT = pvlib.clearsky.ineichen(
    df['apparent_zenith'], df['amp'], df['lt'], altitude=METADATA['elevation'][station_id],
    dni_extra=df['etr']
    )

    df['lt_dhi']= INEICHEN_LT['dhi']
    df['lt_dni']= INEICHEN_LT['dni']
    df['lt_ghi']= INEICHEN_LT['ghi']
    
    
    return df


def get_monthly_mbe(df):
    
    model = 'lt'
    
    output = pd.DataFrame()
    
    for cs in CS:
        monthly_avg = df[cs].resample('M').mean()
        output['mbe_'+ cs] = (df['%s_%s' % (model, cs)] - df[cs]).resample('M').mean() / monthly_avg    
        
    output['month'] = output.index.month
    
    return output


def find_best_lt(df_in, month, lt_start):
    
    df = df_in.copy(deep=True)
    df = df[df.index.month==month]
    
    
    median_error =10
    lt = lt_start
    amount = 1.0
    
    
    df = change_lt(df, lt_start)
    mbe = get_monthly_mbe(df)
    median_error = mbe['mbe_ghi'].median()
    
    direction = median_error/abs(median_error)
    last_direction = median_error/abs(median_error)
    
    
    while abs(median_error) > 0.01:
        
        direction = median_error/abs(median_error)
        
        if last_direction != direction:
            amount *= 0.5
            
        last_direction = direction
                
        
        df = change_lt(df, lt)
        mbe = get_monthly_mbe(df)
        median_error = mbe['mbe_ghi'].median()
        
        
        
        
        if median_error > 0:
            lt += amount
        else:
            lt -= amount
            
        #rint direction, median_error, lt, amount
            
    
    #print i
    
    return lt

In [25]:
#make a table of 
    
    
def get_lt(station_id):
    
    
    TIMES = pd.DatetimeIndex(start='2003-01-01T00:00:00', freq='3H', end='2012-12-31T23:59:59').tz_localize('UTC')
    LT = pvlib.clearsky.lookup_linke_turbidity(
        time=TIMES,
        latitude=METADATA['latitude'][station_id],
        longitude=METADATA['longitude'][station_id],)
    lt_df = pd.DataFrame(index=LT.index, data=LT, columns=['lt'])
    lt_df['month'] = lt_df.index.month
    lt_m = lt_df.groupby("month").median()
    return lt_m

In [ ]:
summary=pd.DataFrame()


for station in METADATA.index:
    
    print station

    for month in range(1,13):
        
        lt_m = get_lt(station)
        lt_ref = lt_m.loc[month,'lt']
        
        lt_opt = find_best_lt(atm_params_3min_clear[station], month, lt_ref)  
        
        df = pd.DataFrame(index=[month])
        df['station'] = station
        df['lt_opt'] = lt_opt
        df['lt_ref'] = lt_ref
        df['month'] = month

        
        print station, month, 'lt_ref', lt_ref, 'lt_opt', lt_opt
        summary = summary.append(df, ignore_index=True)    
        summary.to_csv('summary.csv')


bon
bon 1 lt_ref 2.35508474576 lt_opt 1.91758474576
bon 2 lt_ref 2.655 lt_opt 2.155
bon 3 lt_ref 2.95573770492 lt_opt 2.58073770492
bon 4 lt_ref 3.31803278689 lt_opt 2.56803278689
bon 5 lt_ref 4.35245901639 lt_opt 4.35245901639
bon 6 lt_ref 4.29672131148 lt_opt 3.29672131148
bon 7 lt_ref 4.12016129032 lt_opt 3.87016129032
bon 8 lt_ref 4.13770491803 lt_opt 3.38770491803
bon 9 lt_ref 4.08360655738 lt_opt 4.08360655738
bon 10 lt_ref 3.09262295082 lt_opt 2.84262295082
bon 11 lt_ref 2.64508196721 lt_opt 2.64508196721
bon 12 lt_ref 2.35 lt_opt 2.225
tbl
tbl 1 lt_ref 2.75338983051 lt_opt 0.253389830508
tbl 2 lt_ref 2.95166666667 lt_opt 0.951666666667
tbl 3 lt_ref 3.05983606557 lt_opt 0.309836065574
tbl 4 lt_ref 3.65327868852 lt_opt 2.40327868852
tbl 5 lt_ref 3.85327868852 lt_opt 3.85327868852
tbl 6 lt_ref 4.05491803279 lt_opt 4.80491803279
tbl 7 lt_ref 4.26612903226 lt_opt 4.26612903226
tbl 8 lt_ref 3.94426229508 lt_opt 3.94426229508
tbl 9 lt_ref 3.59508196721 lt_opt 2.22008196721
tbl 10 lt_ref 3.29262295082 lt_opt 1.04262295082
tbl 11 lt_ref 2.84836065574 lt_opt 0.348360655738
C:\Users\gkimball\Envs\env_ecmwf\lib\site-packages\pvlib\clearsky.py:138: RuntimeWarning: invalid value encountered in minimum
  dni = np.minimum(bnci, bnci_2)
tbl 12 lt_ref 2.75 lt_opt -0.25
dra
dra 1 lt_ref 2.40338983051 lt_opt -1.15911016949

In [ ]:


In [ ]: