EDA with solar data


In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
pd.options.display.max_rows = 999
pd.options.display.max_columns = 999
from __future__ import division

Sensor data from NREL

Hawaii:


In [2]:
df = pd.read_csv('data/colorado20150701.txt')
df


Out[2]:
DATE (MM/DD/YYYY) HOUR-MST Avg Global PSP (vent/cor) [W/m^2] Avg Global Extraterrestrial (calc) [W/m^2] Avg Global Photometric LI-210 [klux] Avg 500nm TWC Photometer [V] Avg 315nm POM-01 Photometer [nA] Avg 400nm POM-01 Photometer [uA] Avg 500nm POM-01 Photometer [uA] Avg 675nm POM-01 Photometer [uA] Avg 870nm POM-01 Photometer [uA] Avg 940nm POM-01 Photometer [uA] Avg 1020nm POM-01 Photometer [uA] Avg Global PSP [mV] Avg Zenith Angle [degrees] Avg Azimuth Angle [degrees] Avg Airmass Avg Tower Dry Bulb Temp [deg C] Avg Deck Dry Bulb Temp [deg C] Avg SE Dry Bulb Temp [deg C] Avg Data lab Dry Bulb Temp [deg C] Avg Tower Wet Bulb Temp [deg C] Avg Tower Dew Point Temp [deg C] Avg Tower Wind Chill Temp [deg C] Avg Deck Wind Chill Temp [deg C] Avg Tower RH [%] Avg Deck RH [%] Avg SE RH [%] Avg Data lab RH [%] Avg Total Cloud Cover [%] Avg Opaque Cloud Cover [%] Avg Station Pressure [mBar] Avg Sea-Level Pressure (Est) [mBar] Avg Atmospheric Electric Field [kV/m] Avg 500nm TWC AOD Avg 500nm Estimated AOD Avg Broadband Turbidity Avg Albedo (CM22) Avg Albedo (CM3) Avg Albedo (LI-200) Avg Albedo Quantum (LI-190) Avg SE-POA Angle [degrees]
0 7/1/2015 1 2.5324 0.0000 0.0011 -0.0001 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0219 116.7211 30.6832 -1.0000 20.4007 20.2898 19.7155 21.5255 14.4772 11.1037 20.4007 20.2898 55.1772 55.1703 56.7908 43.0565 -1.0000 -1.0000 819.7822 1021.0570 0.0266 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0850
1 7/1/2015 2 4.6280 0.0000 0.0014 -0.0001 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0242 113.9060 21.5977 -1.0000 20.2025 20.0793 19.5970 21.4163 13.7762 9.9055 20.2025 20.0793 51.6217 51.5198 52.7787 43.4797 -1.0000 -1.0000 819.2624 1020.5380 0.0352 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0786
2 7/1/2015 3 4.5013 0.0000 0.0013 -0.0001 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0243 108.4288 35.1641 -1.0000 20.0422 20.0528 19.4850 21.2288 13.2922 9.0829 20.0422 20.0528 49.2762 48.7005 50.0063 43.6992 -1.0000 -1.0000 818.8139 1020.0883 0.0357 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0743
3 7/1/2015 4 4.7167 0.0000 0.0024 -0.0001 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0231 100.8536 47.0792 -1.0000 20.8902 20.9502 20.3470 21.3292 12.8787 7.6099 20.8902 20.9502 42.3137 41.6873 42.5780 42.4400 -1.0000 -1.0000 818.7855 1020.0597 0.0359 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0756
4 7/1/2015 5 15.9336 13.8694 0.7915 0.0176 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0542 91.6214 57.5053 16.4303 21.3253 21.2743 20.7990 21.3492 12.6926 6.8696 21.3253 21.2743 39.1450 38.9337 39.2247 41.9788 -1.0000 -1.0000 819.2428 1020.5175 0.0396 0.0434 0.0445 0.0424 0.0547 0.0697 0.0828 0.0188 -0.6433
5 7/1/2015 6 118.0414 195.9917 10.0504 0.4042 0.1462 2.9836 28.4961 64.2558 56.9158 6.1720 44.9604 0.7506 81.4664 66.8372 7.2498 21.6385 21.6668 21.0190 21.2943 13.1869 7.6948 21.6385 21.6668 40.6402 40.2233 43.0547 41.6642 54.3167 17.6167 819.8238 1021.0995 0.0262 0.3580 0.3265 0.3077 0.2289 0.2519 0.3407 0.0904 -10.6266
6 7/1/2015 7 217.9495 438.0659 20.0517 0.6964 1.1863 6.9162 43.7869 75.7626 59.0010 10.9697 45.1562 1.4401 70.6263 75.5662 3.0726 23.1493 23.2990 23.0220 21.2703 14.2000 8.7063 23.1493 23.2990 39.7152 38.9662 39.3143 42.3057 49.5000 24.2500 820.3857 1021.6615 0.0262 0.5318 0.5378 0.4967 0.2118 0.2153 0.3222 0.0890 -27.6192
7 7/1/2015 8 480.9686 673.8892 45.2380 2.2440 9.3796 29.3960 156.1474 248.3056 191.6128 52.9251 149.8746 3.2704 59.3126 84.2791 1.9759 25.3035 25.5020 25.3470 21.2883 15.1875 9.2351 25.3035 25.5020 36.2950 34.3340 33.4890 43.2112 10.6667 8.6833 820.7214 1021.9962 0.0399 0.1683 0.2052 0.1423 0.2103 0.2120 0.2994 0.0853 -42.7402
8 7/1/2015 9 652.9859 886.6412 61.7165 2.5145 16.5455 35.5327 172.7500 261.1344 197.4602 65.2446 154.4458 4.4965 47.8183 93.8111 1.4959 26.4898 26.2257 25.7722 21.4102 15.7556 9.6001 26.4898 26.2257 34.5742 33.5665 33.3917 44.2793 9.0500 6.8333 820.8427 1022.1180 0.0395 0.1847 0.2307 0.1475 0.1940 0.1952 0.2641 0.0790 -42.7727
9 7/1/2015 10 825.2706 1061.6918 78.0455 2.8416 24.1557 42.4201 197.1067 291.2135 218.7123 80.5881 171.3048 5.7571 36.4898 105.6985 1.2469 27.1288 26.8578 26.0793 21.6108 15.9822 9.6520 27.1288 26.8578 33.4158 32.4565 34.5428 45.5420 6.1167 5.1667 820.8257 1022.1002 0.0500 0.1468 0.2023 0.1027 0.1805 0.1823 0.2384 0.0752 -33.7446
10 7/1/2015 11 933.2405 1187.0675 87.9767 2.8608 27.5339 43.4272 198.2650 291.0050 218.1102 83.8238 170.9607 6.5427 26.0312 123.4901 1.1142 28.1125 28.9172 27.4825 21.4112 16.4533 9.9845 28.1125 28.9172 32.2588 29.3727 32.9277 46.5993 6.3667 5.7833 820.7232 1021.9982 0.0580 0.1729 0.2260 0.1141 0.1700 0.1724 0.2214 0.0727 -21.5203
11 7/1/2015 12 971.4325 1254.2033 91.8277 2.6301 25.7531 38.8332 178.4910 264.7217 199.3636 76.2793 157.0150 6.8342 18.4567 154.8721 1.0542 28.7433 29.8105 28.6467 21.1982 16.5713 9.7962 28.7433 29.8105 30.7047 27.5533 30.9782 47.3998 18.2667 14.4667 820.4975 1021.7723 0.0799 0.3004 0.3357 0.2178 0.1664 0.1688 0.2153 0.0722 -7.8768
12 7/1/2015 13 542.0391 1258.5128 51.7260 1.0684 11.3937 17.3526 80.1308 119.1934 89.5178 33.6462 69.6471 3.7995 17.8774 199.4488 1.0505 28.6305 29.1715 28.5232 21.6500 16.2856 9.2801 28.6305 29.1715 29.8787 27.8507 29.9682 46.8187 73.0667 54.4667 820.1124 1021.3877 -1.4718 5.4933 3.6551 3.5377 0.1776 0.1772 0.2592 0.0788 5.8846
13 7/1/2015 14 100.6168 1199.6970 11.2292 0.0006 0.0051 0.0006 0.0024 0.0028 0.0021 0.0004 0.0011 0.6830 24.7643 233.2437 1.1024 24.3672 24.1847 23.9412 21.0532 16.5975 12.5107 24.3672 24.1847 48.6518 47.9002 49.7477 49.4410 93.7833 61.5333 820.3846 1021.6595 -2.6606 9.9314 9.0845 8.9656 0.1760 0.1654 0.2642 0.0746 19.5775
14 7/1/2015 15 335.6721 1081.7639 34.0473 0.1328 0.9901 1.3517 6.2526 9.0548 6.6049 2.2476 4.9118 2.3426 35.0009 252.3311 1.2236 24.1938 23.8805 23.9442 20.9387 17.0408 13.4748 24.1938 23.8805 51.2957 51.3188 53.2068 51.9705 94.7167 74.1667 820.3197 1021.5947 0.6295 6.0704 4.7781 4.6688 0.1877 0.1878 0.2619 0.0779 32.5110
15 7/1/2015 16 258.5899 912.7596 26.2242 0.0343 0.3322 0.4800 2.4630 3.8351 2.9136 0.8771 2.2624 1.7744 46.2695 264.7374 1.4526 22.9393 22.7583 22.7660 20.9572 16.4506 13.1281 22.9393 22.7583 54.2035 54.3307 55.4808 53.2622 88.4500 69.0667 820.4284 1021.7037 -1.4623 5.1190 4.0620 3.9701 0.1857 0.1844 0.2633 0.0774 42.1207
16 7/1/2015 17 222.5601 704.2318 21.3928 0.0520 0.3428 0.6530 3.5031 5.6102 4.2832 1.0275 3.2950 1.5074 57.7683 274.4717 1.8893 24.5292 24.5627 24.2828 20.7917 16.4013 12.1303 24.5292 24.5627 46.0172 45.2393 46.6132 54.2010 92.8667 77.1000 820.5150 1021.7890 0.0429 4.1574 3.3485 3.2789 0.1866 0.1865 0.2662 0.0769 42.9637
17 7/1/2015 18 333.6781 470.4942 31.4128 0.9876 1.4444 9.8392 69.4087 135.4315 114.4881 16.7390 91.9034 2.2623 69.1271 283.2361 2.8542 25.0120 25.6610 24.7488 21.4043 17.1986 13.3089 25.0120 25.6610 48.2623 45.2658 48.5370 51.4940 80.2000 58.4167 820.3916 1021.6672 0.1090 0.6742 0.5847 0.5372 0.2174 0.2161 0.3070 0.0849 27.9557
18 7/1/2015 19 108.3959 228.0230 9.8866 0.4919 0.1741 3.1769 34.6385 89.2541 83.9435 5.8164 67.5236 0.6776 80.0581 291.9233 6.1362 23.0587 23.7545 23.1380 21.4967 16.7407 13.5900 23.0587 23.7545 55.3607 52.3858 54.6693 50.8775 15.8500 11.3833 820.5424 1021.8175 0.1165 0.2598 0.2414 0.2175 0.1935 0.1904 0.3145 0.0858 11.1606
19 7/1/2015 20 12.7719 26.2899 0.8109 0.0043 -99999.0000 -99999.0000 -99999.0000 -99999.0000 -99999.0000 -99999.0000 -99999.0000 0.0316 90.2997 301.1415 18.2121 20.2288 20.2482 20.3298 21.5295 15.4459 12.9198 20.2288 20.2482 62.8425 62.3130 62.0218 51.6495 1.3333 1.0167 820.6264 1021.9020 0.0593 0.2096 0.1490 0.1453 0.0380 0.0162 0.0933 0.0241 1.4827
20 7/1/2015 21 4.0704 0.0000 0.0014 -0.0001 -99999.0000 -99999.0000 -99999.0000 -99999.0000 -99999.0000 -99999.0000 -99999.0000 -0.0251 99.7315 311.3925 -1.0000 19.4237 19.4873 19.3912 21.3350 15.3190 13.1466 19.4237 19.4873 67.0448 66.2732 66.6298 51.5807 -1.0000 -1.0000 821.0423 1022.3182 0.0344 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.9508
21 7/1/2015 22 3.8518 0.0000 0.0009 -0.0001 -99999.0000 -99999.0000 -99999.0000 -99999.0000 -99999.0000 -99999.0000 -99999.0000 -0.0244 107.5559 323.0906 -1.0000 18.6528 18.4813 18.1633 21.2897 15.1777 13.3415 18.6528 18.4813 71.2508 71.5827 72.8630 51.1348 -1.0000 -1.0000 820.9562 1022.2317 0.0316 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.9399
22 7/1/2015 23 4.1588 0.0000 0.0015 -0.0001 -99999.0000 -99999.0000 -99999.0000 -99999.0000 -99999.0000 -99999.0000 -99999.0000 -0.0204 113.3627 336.4444 -1.0000 18.6702 18.5083 18.0022 21.3420 14.9999 13.0439 18.6702 18.5083 69.8018 69.9325 72.4372 51.5668 -1.0000 -1.0000 820.1538 1021.4285 0.0565 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.9215

In [3]:
#plt.plot(df['Avg Global PAR [umol/s/m^2]'].values)

Colorado:


In [4]:
col_0701_df = pd.read_csv('data/colorado20150701.txt')
def to_str(x):
    if x < 10:
        return '0' + x.astype(str)
    else:
        return x.astype(str)
        
col_0701_df['datetime'] = pd.to_datetime(col_0701_df['DATE (MM/DD/YYYY)']+' '+col_0701_df['HOUR-MST'].map(to_str))
intensities = np.insert(col_0701_df['Avg Global PSP (vent/cor) [W/m^2]'].values,0,0)

In [5]:
spectrometer = col_0701_df[col_0701_df.index==10].values.reshape(43,)[7:12]

In [6]:
wavelengths = np.array([400, 500, 675, 940, 1020])

In [7]:
plt.plot(wavelengths,spectrometer)


Out[7]:
[<matplotlib.lines.Line2D at 0x1077d7dd0>]

In [8]:
from scipy import interpolate

x = wavelengths
y = spectrometer

f = interpolate.interp1d(x, y)
wav_new = np.arange(400,1020, 0.5) #300 nm to 1180 nm with 0.5 nm spacing
spec_new = f(wav_new)   #recreate AM1.5 with 0.5 nm spacing

plt.plot(x, y, 'o', wav_new, spec_new, '-')
plt.show()



In [9]:
sum(wav_new*spec_new)*.5/1240/100


Out[9]:
780.22615203649048

Web scraping for pvoutput.org


In [10]:
import mechanize
import cookielib
import os
PVOUTPUT_USERNAME = os.environ['PVOUTPUT_USERNAME']
PVOUTPUT_PASSWORD = os.environ['PVOUTPUT_PASSWORD']
PVOUTPUT_API = os.environ['PVOUTPUT_API']

#web scraping
cj = cookielib.CookieJar() #saves the cookie?
br = mechanize.Browser() #instansiates a browser
br.set_handle_robots(False) #I am not a robot...
br.set_cookiejar(cj) #use cookie?
br.open("http://pvoutput.org/") #open login page
br.select_form(nr=0) #select login form
br.form['login'] = PVOUTPUT_USERNAME #login using saved env vars
br.form['password'] = PVOUTPUT_PASSWORD #login using saved env vars
br.submit() #press enter
#print br.response().read() #see if it worked
br.open("http://pvoutput.org/intraday.jsp?id=5446&sid=5473&dt=20150701") #open desired page
html = br.response().read() #read desired page
df_html = pd.read_html(html)[0]

#cleaning the data
df_html.columns = pd.MultiIndex.from_arrays(df_html[df_html.index==1].values)
df_html.drop(df_html.index[[0,1]],inplace=True)
df_html['Power'] = df_html['Power'].str.replace(',','')
df_html['Power'] = df_html['Power'].map(lambda x: x.rstrip('W'))

def make_a_no(x):                  
    try:
        return int(x)
    except:
        x = 0 
        return x
    
df_html['Power'] = df_html['Power'].map(make_a_no)
df_html['datetime'] = pd.to_datetime(df_html['Date'] + ' ' + df_html['Time'], unit='h')
df_html.set_index(['datetime'],inplace=True)
df_html.head()


Out[10]:
Date Time Energy Efficiency Power Average Normalised Temperature Voltage Energy Used Power Used nan
datetime
2015-07-01 23:55:00 2015-07-01 23:55 61.260kWh 4.317kWh/kW 0 0W 0.000kW/kW - - 66.801kWh 1,921W NaN
2015-07-01 23:50:00 2015-07-01 23:50 61.260kWh 4.317kWh/kW 0 0W 0.000kW/kW - - 66.641kWh 2,215W NaN
2015-07-01 23:45:00 2015-07-01 23:45 61.260kWh 4.317kWh/kW 0 0W 0.000kW/kW - - 66.456kWh 2,230W NaN
2015-07-01 23:40:00 2015-07-01 23:40 61.260kWh 4.317kWh/kW 0 0W 0.000kW/kW - - 66.271kWh 2,237W NaN
2015-07-01 23:35:00 2015-07-01 23:35 61.260kWh 4.317kWh/kW 0 0W 0.000kW/kW - - 66.084kWh 2,230W NaN

Making a simple linear model


In [11]:
powers = df_html.resample('H', how='mean')['Power'].values

In [12]:
plt.plot(intensities,powers,'ro') #output powers vs. intensities


Out[12]:
[<matplotlib.lines.Line2D at 0x10a25c250>]

In [13]:
import statsmodels.api as sm

# Fit and summarize OLS model
mod = sm.OLS(powers,intensities)
res = mod.fit()
print res.summary()


                            OLS Regression Results                            
==============================================================================
Dep. Variable:                      y   R-squared:                       0.909
Model:                            OLS   Adj. R-squared:                  0.906
Method:                 Least Squares   F-statistic:                     231.0
Date:                Wed, 05 Aug 2015   Prob (F-statistic):           1.74e-13
Time:                        22:50:32   Log-Likelihood:                -204.38
No. Observations:                  24   AIC:                             410.8
Df Residuals:                      23   BIC:                             411.9
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
x1             9.5328      0.627     15.199      0.000         8.235    10.830
==============================================================================
Omnibus:                        3.884   Durbin-Watson:                   2.531
Prob(Omnibus):                  0.143   Jarque-Bera (JB):                2.215
Skew:                          -0.398   Prob(JB):                        0.330
Kurtosis:                       4.257   Cond. No.                         1.00
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

In [14]:
fig, ax = plt.subplots()
fig = sm.graphics.plot_fit(res, 0, ax=ax)


Another day: the next day (July 2nd)


In [15]:
col_0702_df = pd.read_csv('data/colorado20150702.txt')
def to_str(x):
    if x < 10:
        return '0' + x.astype(str)
    else:
        return x.astype(str)
        
col_0702_df['datetime'] = pd.to_datetime(col_0702_df['DATE (MM/DD/YYYY)']+' '+col_0702_df['HOUR-MST'].map(to_str))
intensities = np.insert(col_0702_df['Avg Global PSP (vent/cor) [W/m^2]'].values,0,0)

copy from above


In [16]:
import mechanize
import cookielib
import os
PVOUTPUT_USERNAME = os.environ['PVOUTPUT_USERNAME']
PVOUTPUT_PASSWORD = os.environ['PVOUTPUT_PASSWORD']
PVOUTPUT_API = os.environ['PVOUTPUT_API']

#web scraping
cj = cookielib.CookieJar() #saves the cookie?
br = mechanize.Browser() #instansiates a browser
br.set_handle_robots(False) #I am not a robot...
br.set_cookiejar(cj) #use cookie?
br.open("http://pvoutput.org/") #open login page
br.select_form(nr=0) #select login form
br.form['login'] = PVOUTPUT_USERNAME #login using saved env vars
br.form['password'] = PVOUTPUT_PASSWORD #login using saved env vars
br.submit() #press enter
#print br.response().read() #see if it worked
br.open("http://pvoutput.org/intraday.jsp?id=5446&sid=5473&dt=20150702") #open desired page
html = br.response().read() #read desired page
df_html = pd.read_html(html)[0]

#cleaning the data
df_html.columns = pd.MultiIndex.from_arrays(df_html[df_html.index==1].values)
df_html.drop(df_html.index[[0,1]],inplace=True)
df_html['Power'] = df_html['Power'].str.replace(',','')
df_html['Power'] = df_html['Power'].map(lambda x: x.rstrip('W'))

def make_a_no(x):                  
    try:
        return int(x)
    except:
        x = 0 
        return x
    
df_html['Power'] = df_html['Power'].map(make_a_no)
df_html['datetime'] = pd.to_datetime(df_html['Date'] + ' ' + df_html['Time'], unit='h')
df_html.set_index(['datetime'],inplace=True)
df_html.head()


Out[16]:
Date Time Energy Efficiency Power Average Normalised Temperature Voltage Energy Used Power Used nan
datetime
2015-07-02 23:55:00 2015-07-02 23:55 74.866kWh 5.276kWh/kW 0 0W 0.000kW/kW - - 53.868kWh 1,789W NaN
2015-07-02 23:50:00 2015-07-02 23:50 74.866kWh 5.276kWh/kW 0 0W 0.000kW/kW - - 53.719kWh 1,881W NaN
2015-07-02 23:45:00 2015-07-02 23:45 74.866kWh 5.276kWh/kW 0 0W 0.000kW/kW - - 53.562kWh 2,032W NaN
2015-07-02 23:40:00 2015-07-02 23:40 74.866kWh 5.276kWh/kW 0 0W 0.000kW/kW - - 53.393kWh 2,031W NaN
2015-07-02 23:35:00 2015-07-02 23:35 74.866kWh 5.276kWh/kW 0 0W 0.000kW/kW - - 53.224kWh 2,057W NaN

use yesterdays model to predict today


In [17]:
powers = df_html.resample('H', how='mean')['Power'].values

In [18]:
powers_pred = res.predict(intensities)

In [19]:
plt.plot(powers,powers_pred, 'ro')


Out[19]:
[<matplotlib.lines.Line2D at 0x10bca1d10>]

In [20]:
mod2 = sm.OLS(powers_pred,powers)
res2 = mod2.fit()
print res2.summary()


                            OLS Regression Results                            
==============================================================================
Dep. Variable:                      y   R-squared:                       0.979
Model:                            OLS   Adj. R-squared:                  0.978
Method:                 Least Squares   F-statistic:                     1085.
Date:                Wed, 05 Aug 2015   Prob (F-statistic):           7.38e-21
Time:                        22:50:37   Log-Likelihood:                -185.41
No. Observations:                  24   AIC:                             372.8
Df Residuals:                      23   BIC:                             374.0
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
x1             0.7454      0.023     32.940      0.000         0.699     0.792
==============================================================================
Omnibus:                        4.908   Durbin-Watson:                   1.661
Prob(Omnibus):                  0.086   Jarque-Bera (JB):                2.970
Skew:                           0.625   Prob(JB):                        0.226
Kurtosis:                       4.187   Cond. No.                         1.00
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

In [21]:
fig, ax = plt.subplots()
fig = sm.graphics.plot_fit(res2, 0, ax=ax)


Now a satellite? (or what are we gunna do?)


In [2]:
import netCDF4
from netCDF4 import Dataset
rootgrp = Dataset("../../data/satellite/colorado/summer6months/data/", "a", format="NETCDF4")

print "type of data: ", rootgrp.data_model #netcdf3_classic, not netcdf4
myvars = []
for var in rootgrp.variables: #list of variables
    myvars.append(var)
print "variables in data: ", myvars
print "latitude of one point; ", rootgrp.variables['lat'][0][0] #verify that one latitude is where we expect

lons = rootgrp.variables['lon'][:]
lats = rootgrp.variables['lat'][:]
data = rootgrp.variables['data'][:] #data is sensor data
rootgrp.close() #need to close before you can open again

from mpl_toolkits.basemap import Basemap  #map stuff
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# Get some parameters for the Stereographic Projection
m = Basemap(width=800000,height=800000,
            resolution='l',projection='stere',\
            lat_ts=40,lat_0=39.5,lon_0=-104.5)

xi, yi = m(lons, lats) #map onton x and y for plotting
plt.figure(figsize=(10,10)) # Plot Data
cs = m.pcolor(xi,yi,np.squeeze(data)) #data is 1 x 14 x 36, squeeze makes it 14 x 36

m.drawparallels(np.arange(-80., 81., 1.), labels=[1,0,0,0], fontsize=10) # Add Grid Lines
m.drawmeridians(np.arange(-180., 181., 1.), labels=[0,0,0,1], fontsize=10) # Add Grid Lines
m.drawstates(linewidth=3) # Add state boundaries

cbar = m.colorbar(cs, location='bottom', pad="10%") # Add Colorbar
plt.title('GOES 15 - Sensor 1') # Add Title
plt.show()


---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-2-0100f39abcde> in <module>()
      1 import netCDF4
      2 from netCDF4 import Dataset
----> 3 rootgrp = Dataset("../../data/satellite/colorado/summer6months/data/", "a", format="NETCDF4")
      4 
      5 print "type of data: ", rootgrp.data_model #netcdf3_classic, not netcdf4

netCDF4/_netCDF4.pyx in netCDF4._netCDF4.Dataset.__init__ (netCDF4/_netCDF4.c:9551)()

RuntimeError: NetCDF: Unknown file format

In [23]:
print data[0].shape
print data[0][0].shape
print data.mean()


(14, 36)
(36,)
3001.71

In [24]:
import os
import datetime

data_files = os.listdir('data/satellite/colorado/20150701')

list_of_files = []
list_of_files_details = []
for myfile in data_files:
    if myfile[-2:] == 'nc':
        list_of_files.append(myfile)
        list_of_files_details.append(myfile.split('.'))

list_of_dates = []
for i,_ in enumerate(list_of_files_details):
    mytime = list_of_files_details[i][1]+" "+list_of_files_details[i][2]+" "+list_of_files_details[i][3]
    mydatetime = datetime.datetime.strptime(mytime, '%Y %j %H%M%S')
    list_of_dates.append(mydatetime)

In [25]:
channel_list = ['BAND_01', 'BAND_02', 'BAND_03', 'BAND_04', 'BAND_06']
index = np.unique(list_of_dates)
mydata_df = pd.DataFrame(index=index,columns=channel_list)
mydata_df.fillna(0).head()


Out[25]:
BAND_01 BAND_02 BAND_03 BAND_04 BAND_06
2015-07-01 00:30:21 0 0 0 0 0
2015-07-01 01:00:19 0 0 0 0 0
2015-07-01 01:30:20 0 0 0 0 0
2015-07-01 02:00:19 0 0 0 0 0
2015-07-01 02:30:19 0 0 0 0 0

In [26]:
for i , myfile in enumerate(list_of_files):
    rootgrp = Dataset("data/satellite/colorado/20150701/" + myfile, "a", format="NETCDF4")
    lons = rootgrp.variables['lon'][:]
    lats = rootgrp.variables['lat'][:]
    data = rootgrp.variables['data'][:] #data is sensor data
    rootgrp.close() #need to close before you can open again
    
    mytime = list_of_files_details[i][1]+" "+list_of_files_details[i][2]+" "+list_of_files_details[i][3]
    mydatetime = datetime.datetime.strptime(mytime, '%Y %j %H%M%S')
    
    mydata_df.loc[mydatetime, list_of_files_details[i][4]] = data.mean()
    
mydata_df = mydata_df.convert_objects(convert_numeric=True)
mydata_df = mydata_df.resample('H', how='mean')
mydata_df.head()


Out[26]:
BAND_01 BAND_02 BAND_03 BAND_04 BAND_06
2015-07-01 00:00:00 3688.063477 5766.603027 5456.698242 10962.159180 12312.253906
2015-07-01 01:00:00 2616.984131 4904.635010 5238.285645 9897.301758 11597.555664
2015-07-01 02:00:00 1218.698395 3891.936523 4831.047607 8473.460449 10540.889160
2015-07-01 03:00:00 931.555542 3422.095215 4476.507812 7416.317383 9665.904297
2015-07-01 04:00:00 929.777771 3171.904785 4401.714355 6903.301758 9335.111328

Try a model to fit the channel means


In [27]:
X = mydata_df.values

In [28]:
X = X[-19:]

In [ ]:


In [29]:
intensities_2 = intensities[-19:]

In [30]:
from sklearn.linear_model import LinearRegression
lm = LinearRegression()

# Fit and summarize OLS model
sens_mod = lm.fit(X,intensities_2)

Now using this, can we make virtual sensors for the next day?


In [31]:
import os
import datetime

data_files = os.listdir('data/satellite/colorado/20150702')

list_of_files = []
list_of_files_details = []
for myfile in data_files:
    if myfile[-2:] == 'nc':
        list_of_files.append(myfile)
        list_of_files_details.append(myfile.split('.'))

list_of_dates = []
for i,_ in enumerate(list_of_files_details):
    mytime = list_of_files_details[i][1]+" "+list_of_files_details[i][2]+" "+list_of_files_details[i][3]
    mydatetime = datetime.datetime.strptime(mytime, '%Y %j %H%M%S')
    list_of_dates.append(mydatetime)
    
channel_list = ['BAND_01', 'BAND_02', 'BAND_03', 'BAND_04', 'BAND_06']
index = np.unique(list_of_dates)
mydata_df = pd.DataFrame(index=index,columns=channel_list)
mydata_df.fillna(0).head()

for i , myfile in enumerate(list_of_files):
    rootgrp = Dataset("data/satellite/colorado/20150702/" + myfile, "a", format="NETCDF4")
    lons = rootgrp.variables['lon'][:]
    lats = rootgrp.variables['lat'][:]
    data = rootgrp.variables['data'][:] #data is sensor data
    rootgrp.close() #need to close before you can open again
    
    mytime = list_of_files_details[i][1]+" "+list_of_files_details[i][2]+" "+list_of_files_details[i][3]
    mydatetime = datetime.datetime.strptime(mytime, '%Y %j %H%M%S')
    
    mydata_df.loc[mydatetime, list_of_files_details[i][4]] = data.mean()
    
mydata_df = mydata_df.convert_objects(convert_numeric=True)
mydata_df = mydata_df.resample('H', how='mean')
mydata_df


Out[31]:
BAND_01 BAND_02 BAND_03 BAND_04 BAND_06
2015-07-02 00:00:00 4857.079590 3866.031738 4324.761719 6720.952148 8998.032227
2015-07-02 01:00:00 3061.111084 3602.857056 4403.396729 6866.888916 9164.158691
2015-07-02 02:00:00 1231.809540 3509.079346 4790.285889 7725.364990 9999.587402
2015-07-02 03:00:00 931.936523 3724.190430 5297.460449 8932.825195 11185.396484
2015-07-02 04:00:00 932.603180 4025.460327 5562.539795 9867.143066 11948.000000
2015-07-02 05:00:00 932.888885 4246.095215 5855.809326 10593.904785 12608.285645
2015-07-02 06:00:00 933.079346 4709.587402 6212.253906 11744.063477 13557.206055
2015-07-02 07:00:00 936.126984 4795.079346 6306.063477 11921.873047 13687.650391
2015-07-02 08:00:00 942.444427 4649.841309 6304.158691 11640.539551 13507.777832
2015-07-02 09:00:00 947.555542 4168.825195 6117.523926 10538.920898 12683.873047
2015-07-02 10:00:00 946.222229 3964.984131 6062.317383 10104.984375 12383.586914
2015-07-02 11:00:00 1047.492096 3919.460327 6047.619141 9989.047852 12296.634766
2015-07-02 12:00:00 3356.063477 4929.714355 6110.603027 10140.952148 12437.840820
2015-07-02 13:00:00 4361.428589 5352.666748 6217.206299 10693.587402 12847.873047
2015-07-02 14:00:00 4883.936523 5771.714111 6377.587402 11362.856934 13241.841309
2015-07-02 15:00:00 4320.126953 6336.507812 6641.143066 12449.269531 13814.032227
2015-07-02 16:00:00 4228.095459 6703.714355 6746.730225 12944.603027 14005.618652
2015-07-02 17:00:00 4333.269775 7433.873047 6908.730225 13656.539551 14404.063477
2015-07-02 18:00:00 5103.746094 8481.333008 7086.539551 14490.413086 14949.777344
2015-07-02 19:00:00 5842.984131 8903.714355 7105.904785 14627.174805 14969.714355

In [32]:
X_test = mydata_df.values[-19:]

In [ ]:


In [33]:
sens_mod.predict(X_test) #predicted sensor info


Out[33]:
array([ -110.78542948,   126.37876165,   547.02272773,   750.4129594 ,
        1006.2984068 ,  1246.95210089,  1356.63690273,  1416.45821632,
        1397.19927116,  1407.32988332,  1426.7423635 ,  1298.26756202,
        1387.79823646,  1581.9892228 ,  1805.64231986,  1906.74400662,
        1886.4749812 ,  1767.06701731,  1745.57925968])

In [34]:
.75*sum(sens_mod.predict(X_test))*1  #coefficient from mod2 above


Out[34]:
17962.656577455851