In [1]:
!mkdir -p data
# ZIP code shapefiles from Census.gov
!wget -N http://www2.census.gov/geo/tiger/GENZ2014/shp/cb_2014_us_zcta510_500k.zip -P data
!unzip -o data/cb_2014_us_zcta510_500k.zip -d data
!mapshaper -i data/cb_2014_us_zcta510_500k.shp -o format=geojson data/cb_2014_us_zcta510_500k.json
!rm data/cb_2014_us_zcta510_500k.cpg
!rm data/cb_2014_us_zcta510_500k.dbf
!rm data/cb_2014_us_zcta510_500k.prj
!rm data/cb_2014_us_zcta510_500k.shp
!rm data/cb_2014_us_zcta510_500k.shp.ea.iso.xml
!rm data/cb_2014_us_zcta510_500k.shp.iso.xml
!rm data/cb_2014_us_zcta510_500k.shp.xml
!rm data/cb_2014_us_zcta510_500k.shx
!rm data/cb_2014_us_zcta510_500k.zip
# County shapefiles from Census.gov
!wget -N http://www2.census.gov/geo/tiger/GENZ2013/cb_2013_us_county_500k.zip -P data
!unzip -o data/cb_2013_us_county_500k.zip -d data
!mapshaper -i data/cb_2013_us_county_500k.shp -o format=geojson data/cb_2013_us_county_500k.json
!rm data/cb_2013_us_county_500k.dbf
!rm data/cb_2013_us_county_500k.prj
!rm data/cb_2013_us_county_500k.shp
!rm data/cb_2013_us_county_500k.shp.iso.xml
!rm data/cb_2013_us_county_500k.shp.xml
!rm data/cb_2013_us_county_500k.shx
!rm data/cb_2013_us_county_500k.zip
# CA climate zone division shapefiles and transform from CEC
!wget -N http://www.energy.ca.gov/maps/renewable/CA_Building_Standards_Climate_Zones.zip -P data
!unzip -o data/CA_Building_Standards_Climate_Zones.zip -d data
!ogr2ogr -f "ESRI Shapefile" -t_srs EPSG:4326 data/CA_Building_Standards_Climate_Zones_reprojected.shp data/CA_Building_Standards_Climate_Zones.shp
!mapshaper -i data/CA_Building_Standards_Climate_Zones_reprojected.shp -o format=geojson data/CA_Building_Standards_Climate_Zones.json
!rm data/CA\ Building\ Standards\ Climate\ Zones.lyr
!rm data/CA_Building_Standards_Climate_Zones.cpg
!rm data/CA_Building_Standards_Climate_Zones.dbf
!rm data/CA_Building_Standards_Climate_Zones.prj
!rm data/CA_Building_Standards_Climate_Zones.sbn
!rm data/CA_Building_Standards_Climate_Zones.sbx
!rm data/CA_Building_Standards_Climate_Zones.shp
!rm data/CA_Building_Standards_Climate_Zones.shp.xml
!rm data/CA_Building_Standards_Climate_Zones.shx
!rm data/CA_Building_Standards_Climate_Zones.zip
!rm data/CA_Building_Standards_Climate_Zones_reprojected.dbf
!rm data/CA_Building_Standards_Climate_Zones_reprojected.prj
!rm data/CA_Building_Standards_Climate_Zones_reprojected.shp
!rm data/CA_Building_Standards_Climate_Zones_reprojected.shx
# NCDC weather data quality
!wget -N ftp://ftp.ncdc.noaa.gov/pub/data/noaa/isd-inventory.csv -P data
# NCDC station lat lngs
!wget -N ftp://ftp.ncdc.noaa.gov/pub/data/noaa/isd-history.csv -P data
# IECC/Building America climate zone csv. Custom; assembled and corrected from primary source
!wget -N https://gist.githubusercontent.com/philngo/d3e251040569dba67942/raw/d1d2e13d73cc50147be6c90d8232f2e4c3eeaffc/climate_zones.csv -P data
# County ids - for reference - used to derive climate_zones.csv
!wget -N http://www2.census.gov/geo/docs/reference/codes/files/national_county.txt -P data
# Building america climate zone guide - for reference - used to derive file below.
!wget -N http://energy.gov/sites/prod/files/2015/02/f19/ba_climate_guide_2013.pdf -P data
In [2]:
%matplotlib inline
from datetime import datetime, timedelta
from collections import defaultdict
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap # only for showing maps - can be skipped.
from shapely.geometry import shape, Point, asShape
import pyproj
import requests
from bs4 import BeautifulSoup
In [3]:
isd_history = pd.read_csv('data/isd-history.csv', dtype={"USAF": str, "WBAN": str}, parse_dates=["BEGIN", "END"])
hasGEO = (
isd_history.LAT.notnull() &
isd_history.LON.notnull() &
(isd_history.LAT != 0)
)
# RQ = Peurto Rico
# GQ = Guam
isUS = (((isd_history.CTRY == "US") & (isd_history.STATE.notnull()))
| (isd_history.CTRY == "RQ") | (isd_history.CTRY == "GQ"))
isd_history = isd_history[hasGEO & isUS]
isd_history["hasUSAF"] = isd_history.USAF != "999999"
usaf_station_lat_lngs = {}
for usaf_station, rows in isd_history[isd_history.hasUSAF].groupby("USAF"):
# take most recent as most accurate lat long
row = rows.ix[rows.END.argmax()]
usaf_station_lat_lngs[usaf_station] = (row.LAT, row.LON)
usaf_station_ids = sorted(list(usaf_station_lat_lngs.keys()))
len(usaf_station_ids)
Out[3]:
In [4]:
def map_points(lats, lons, title):
plt.figure(figsize=(15,7))
m = Basemap(
width=15000000,
height=7000000,
resolution='l',
projection='laea',\
lat_ts=50,
lat_0=50,
lon_0=-107.
)
#m.drawcoastlines()
#m.drawcountries()
m.fillcontinents(color='coral', lake_color='aqua')
m.drawmapboundary(fill_color='aqua')
m.drawparallels(np.arange(0., 90, 10.), labels=[1, 0, 0, 0], fontsize=10)
m.drawmeridians(np.arange(150., 360., 10.), labels=[0, 0, 0, 1], fontsize=10)
x, y = m(lons, lats)
m.scatter(x, y, 3, marker='o', color='k', zorder=10)
plt.title(title)
plt.show()
lats, lons = zip(*list(usaf_station_lat_lngs.values()))
title = "Available US USAF weather stations (n={})".format(len(lats))
map_points(lats, lons, title)
In [5]:
# load up station inventory - data points per station per month
isd_inventory = pd.read_csv('data/isd-inventory.csv', dtype={"USAF": str, "WBAN": str, "YEAR": str})
In [6]:
isd_index = defaultdict(set)
for usaf, full in zip(isd_inventory.USAF, isd_inventory.USAF + '-' + isd_inventory.WBAN):
if usaf is not '999999':
isd_index[usaf].add(full)
isd_index = {k: list(v) for k, v in isd_index.items()}
In [7]:
isd_inventory.head()
Out[7]:
In [8]:
n_years = 5
months = {
i + 1: month
for i, month in enumerate([
"JAN", "FEB", "MAR", "APR", "MAY", "JUN",
"JUL", "AUG", "SEP", "OCT", "NOV", "DEC"
])
}
dates = pd.date_range((datetime.now() - timedelta(days=365*n_years + 31)).date(), freq='M', periods=12 * n_years + 1)
selectors = [(str(i.year), months[i.month]) for i in dates]
print(selectors)
In [9]:
# format the data nicely, grabbing the data from each of the months listed above ^^
usaf_station_inventory_data = {}
for station, group in isd_inventory.groupby("USAF"):
if station in usaf_station_ids:
group = group.sort_values(by='YEAR')
group = group.drop(['WBAN', 'USAF'], 1)
# put months in a single column
group = group.reset_index(drop=True).set_index(['YEAR']) \
.unstack(level='YEAR').T.swaplevel(0, 1).sortlevel(0)
# set index names
group.index = group.index.set_names(["YEAR", "MONTH"])
# combine duplicate years
group = group.groupby(level=["YEAR", "MONTH"]).sum()
# get n for each relevant month
data = np.array([group.get(selector, 0) for selector in selectors])
usaf_station_inventory_data[station] = data
In [10]:
# explore a bit to see where we should draw the missing data cutoff
means = []
quantiles = []
for station, data in list(usaf_station_inventory_data.items()):
means.append(data.mean())
quantiles.append(np.percentile(data, 5))
plt.title("Histogram of means of monthly data point counts - all stations")
plt.hist(means, bins=50)
plt.show()
plt.title("Histogram of 5th percentiles of monthly data point counts - all stations")
plt.hist(quantiles, bins=50)
plt.show()
Accept stations that have greater than 600 points at their 5th percentile data-intensity month.
In [11]:
accepted_usaf_stations = set()
for station, data in list(usaf_station_inventory_data.items()):
if np.percentile(data, 5) > 600:
accepted_usaf_stations.add(station)
print len(accepted_usaf_stations)
In [12]:
accepted_usaf_station_lat_lngs = {}
rejected_usaf_station_lat_lngs = {}
for station, data in usaf_station_lat_lngs.items():
if station in accepted_usaf_stations:
accepted_usaf_station_lat_lngs[station] = data
else:
rejected_usaf_station_lat_lngs[station] = data
accepted_usaf_station_points = {s: Point(lng, lat) for s, (lat, lng) in accepted_usaf_station_lat_lngs.items()}
lats, lons = zip(*list(accepted_usaf_station_lat_lngs.values()))
title = "Accepted US USAF weather stations (n={})".format(len(lats))
map_points(lats, lons, title)
lats, lons = zip(*list(rejected_usaf_station_lat_lngs.values()))
title = "Rejected US USAF weather stations (n={})".format(len(lats))
map_points(lats, lons, title)
In [13]:
print(list(accepted_usaf_station_lat_lngs.items())[:2])
In [14]:
len(usaf_station_inventory_data.keys())
Out[14]:
In [15]:
# load zipcode geojson
with open('data/cb_2014_us_zcta510_500k.json', 'r') as f:
zip_js = json.load(f)
zipcode_polygons = {}
for zip_feature in zip_js['features']:
zipcode = zip_feature['properties']['GEOID10']
polygon = shape(zip_feature['geometry'])
zipcode_polygons[zipcode] = polygon
In [16]:
zipcode_polygons["11542"]
Out[16]:
In [17]:
# load county geojson
with open('data/cb_2013_us_county_500k.json', 'r') as f:
county_js = json.load(f)
county_polygons = {}
for county_feature in county_js['features']:
county = county_feature['properties']['GEOID']
polygon = shape(county_feature['geometry'])
county_polygons[county] = polygon
In [18]:
county_polygons["16079"]
Out[18]:
In [19]:
# load county climate zones:
climate_zones = pd.read_csv('data/climate_zones.csv',
dtype={"State FIPS": str, "County FIPS": str},
usecols=["State FIPS", "County FIPS", "IECC Climate Zone", "IECC Moisture Regime", "BA Climate Zone"])
In [20]:
climate_zones.head()
Out[20]:
In [21]:
# gather a list of counties (not including california)
counties_dict = {}
for i, row in climate_zones.iterrows():
# if not in california
if row["State FIPS"] != "06":
county_id = row["State FIPS"] + row["County FIPS"]
county_polygon = county_polygons.get(county_id)
if county_polygon is not None:
counties_dict[county_id] = {
"climate_zone": "{}|{}|{}".format(
row["IECC Climate Zone"],
row["IECC Moisture Regime"] if not pd.isnull( row["IECC Moisture Regime"]) else "NA",
row["BA Climate Zone"]),
"polygon": county_polygon,
"centroid": county_polygon.centroid
}
else:
print("Could not find county {}, skipping.".format(county_id))
In [22]:
counties_dict["16079"]
Out[22]:
In [23]:
# treat CA climate zones specially:
with open('data/CA_Building_Standards_Climate_Zones.json', 'r') as f:
ca_js = json.load(f)
california_climate_zone_polygons = {}
california_climate_zone_centroids = {}
for ca_feature in ca_js['features']:
zone = "CA_{:02d}".format(int(ca_feature['properties']['Zone']))
polygon = shape(ca_feature['geometry'])
california_climate_zone_polygons[zone] = polygon
california_climate_zone_centroids[zone] = polygon.centroid
In [24]:
california_climate_zone_polygons['CA_02']
Out[24]:
In [25]:
n_zipcodes = len(zipcode_polygons)
zipcode_centroid_points = {}
zipcode_centroid_lat_lngs = {}
for i, (zipcode, zipcode_poly) in enumerate(zipcode_polygons.items()):
zipcode_centroid = zipcode_poly.centroid
zipcode_centroid_points[zipcode] = zipcode_centroid
zipcode_centroid_lat_lngs[zipcode] = (zipcode_centroid.coords[0][1], zipcode_centroid.coords[0][0])
In [26]:
[(z, p.x, p.y) for z, p in zipcode_centroid_points.items()][:10]
Out[26]:
In [27]:
lats, lons = zip(*list(zipcode_centroid_lat_lngs.values()))
title = "ZCTAs".format(len(lats))
map_points(lats, lons, title)
In [28]:
zipcode_to_climate_zone = {}
# some optimizations for the loop.
counties_dict_items = list(counties_dict.items())
california_climate_zone_polygons_items = list(california_climate_zone_polygons.items())
for i, zipcode in enumerate(zipcode_polygons.keys()):
print '\r{} of {}'.format(i + 1, n_zipcodes),
zipcode_centroid = zipcode_centroid_points[zipcode]
# check to see if non-CA counties contain centroid
for county, county_dict in counties_dict_items:
county_poly = county_dict["polygon"]
if county_poly.contains(zipcode_centroid):
zipcode_to_climate_zone[zipcode] = county_dict["climate_zone"]
break
else: # for--else - executes if no break encountered in loop
# check to see if a CA climate zone contains centroid
for ca_cz, ca_cz_poly in california_climate_zone_polygons_items:
if ca_cz_poly.contains(zipcode_centroid):
zipcode_to_climate_zone[zipcode] = ca_cz
break
else: # for--else - executes if no break encountered in loop
zipcode_to_climate_zone[zipcode] = None
In [29]:
np.unique([cz for z, cz in zipcode_to_climate_zone.items()])
Out[29]:
In [30]:
list(zipcode_to_climate_zone.items())[:2]
Out[30]:
In [31]:
# non-matches ?
len([z for z, cz in zipcode_to_climate_zone.items() if cz is None])
Out[31]:
In [32]:
no_climate_zone_zipcode_lat_lngs = [zipcode_centroid_lat_lngs[z] for z, cz in zipcode_to_climate_zone.items() if cz is None]
lats, lons = zip(*no_climate_zone_zipcode_lat_lngs)
title = "ZIP codes not inside climate zone (n={})".format(len(lats))
map_points(lats, lons, title)
In [33]:
geod = pyproj.Geod(ellps='WGS84')
cutoff = 25 # km
near_zipcode_zipcodes = {}
near_zipcode_lat_lngs = {}
zipcode_centroid_lat_lngs_items = [(z, ll) for z, ll in zipcode_centroid_lat_lngs.items() if zipcode_to_climate_zone[z] is not None]
lats2, lngs2 = zip(*[ll for z, ll in zipcode_centroid_lat_lngs_items])
lats2, lngs2 = np.array(lats2), np.array(lngs2)
for z, cz in zipcode_to_climate_zone.items():
if cz is None:
lat1, lng1 = zipcode_centroid_lat_lngs[z]
lats1, lngs1 = np.tile(lat1, lats2.shape), np.tile(lng1, lngs2.shape)
dists = geod.inv(lngs1, lats1, lngs2, lats2)[2] / 1000. # km
min_dist_i = np.argmin(dists)
min_dist = dists[min_dist_i]
min_zipcode = zipcode_centroid_lat_lngs_items[min_dist_i][0]
near_zipcode_zipcodes[z] = min_zipcode
near_zipcode_lat_lngs[z] = lat1, lng1
In [34]:
zipcode_climate_zone_update = {z1: zipcode_to_climate_zone[z2] for z1, z2 in near_zipcode_zipcodes.items()}
In [35]:
list(zipcode_climate_zone_update.items())[:2]
Out[35]:
In [36]:
len(zipcode_climate_zone_update)
Out[36]:
In [37]:
# patch zipcode -> climate zone matching
zipcode_to_climate_zone.update(zipcode_climate_zone_update)
In [38]:
# Make sure no zipcodes map to None
np.unique([cz for z, cz in zipcode_to_climate_zone.items()])
Out[38]:
Map weather stations to climate zones - same method as for zipcodes
In [39]:
# map weather stations to climate zones
station_to_climate_zone = {}
n_stations = len(accepted_usaf_station_points)
for i, (station, station_point) in enumerate(accepted_usaf_station_points.items()):
print '\r{} of {}'.format(i+1, n_stations),
# Is the station in a non-CA county?
for county, county_dict in counties_dict_items:
county_poly = county_dict["polygon"]
if county_poly.contains(station_point):
station_to_climate_zone[station] = county_dict["climate_zone"]
break
else: # if no break
# is the station in a california climate zone?
for ca_cz, ca_cz_poly in california_climate_zone_polygons_items:
if ca_cz_poly.contains(station_point):
station_to_climate_zone[station] = ca_cz
break
else: # if no break
station_to_climate_zone[station] = None
In [40]:
list(station_to_climate_zone.items())[:2]
Out[40]:
In [41]:
# non-matches ?
len([s for s, cz in station_to_climate_zone.items() if cz is None])
Out[41]:
In [42]:
no_climate_zone_station_lat_lngs = [usaf_station_lat_lngs[s] for s, cz in station_to_climate_zone.items() if cz is None]
lats, lons = zip(*no_climate_zone_station_lat_lngs)
title = "US USAF weather stations not inside climate zone (n={})".format(len(lats))
map_points(lats, lons, title)
In [43]:
geod = pyproj.Geod(ellps='WGS84')
cutoff = 25 # km
near_station_zipcodes = {}
far_station_zipcodes = {}
near_station_lat_lngs = {}
far_station_lat_lngs = {}
min_dists = []
zipcode_centroid_lat_lngs_items = zipcode_centroid_lat_lngs.items()
lats2, lngs2 = zip(*[p for z, p in zipcode_centroid_lat_lngs_items])
lats2, lngs2 = np.array(lats2), np.array(lngs2)
for s, cz in station_to_climate_zone.items():
if cz is None:
lat1, lng1 = accepted_usaf_station_lat_lngs[s]
lats1, lngs1 = np.tile(lat1, lats2.shape), np.tile(lng1, lngs2.shape)
dists = geod.inv(lngs1, lats1, lngs2, lats2)[2] / 1000. # km
min_dist_i = np.argmin(dists)
min_dist = dists[min_dist_i]
min_dists.append(min_dist)
min_zipcode = zipcode_centroid_lat_lngs_items[min_dist_i][0]
if min_dist < cutoff:
near_station_zipcodes[s] = min_zipcode
near_station_lat_lngs[s] = lat1, lng1
else:
far_station_zipcodes[s] = min_zipcode
far_station_lat_lngs[s] = lat1, lng1
plt.hist(min_dists, bins=30)
plt.title("Histogram of distances to nearest zipcodes: cutoff={}km, n={}/{}".format(cutoff, len(near_station_zipcodes), len(min_dists)))
plt.show()
In [44]:
lats, lons = zip(*far_station_lat_lngs.values())
title = "US USAF weather stations >25km from nearest ZIP code (n={}) -> DISCARD".format(len(lats))
map_points(lats, lons, title)
In [45]:
# correct station_to_climate_zone mapping
accepted_near_usaf_station_lat_lngs = {s: ll for s, ll in accepted_usaf_station_lat_lngs.items() if s not in far_station_lat_lngs}
rejected_far_usaf_station_lat_lngs = {s: ll for s, ll in rejected_usaf_station_lat_lngs.items()}
rejected_far_usaf_station_lat_lngs.update(far_station_lat_lngs)
In [46]:
filtered_station_to_climate_zone = {}
for s, cz in station_to_climate_zone.items():
if cz is None:
zipcode = near_station_zipcodes.get(s)
if zipcode is not None:
filtered_station_to_climate_zone[s] = zipcode_to_climate_zone[zipcode]
else:
filtered_station_to_climate_zone[s] = cz
In [47]:
len(filtered_station_to_climate_zone)
Out[47]:
In [48]:
len([s for s, cz in filtered_station_to_climate_zone.items() if cz is None])
Out[48]:
In [49]:
tmy3_index_page_url = (
"http://rredc.nrel.gov/solar/old_data/"
"nsrdb/1991-2005/tmy3/by_USAFN.html"
)
soup = BeautifulSoup(requests.get(tmy3_index_page_url).text)
tmy3_station_elements = soup.select('td .hide')
tmy3_station_data = {}
for station_el in tmy3_station_elements:
station_name_el = station_el.findNext('td').findNext('td')
station_class_el = station_name_el.findNext('td')
tmy3_station_data[station_el.text.strip()] = {
"name": (
"".join(station_name_el.text.split(",")[:-1])
.replace("\n", "").replace("\t", "").strip()
),
"state": station_name_el.text.split(",")[-1].strip(),
"class": station_class_el.text.split()[1].strip(),
}
In [50]:
list(tmy3_station_data.items())[:2]
Out[50]:
In [51]:
accepted_near_tmy3_station_lat_lngs = {}
rejected_far_tmy3_station_lat_lngs = {}
unknown_tmy3_stations = set()
for station, data in tmy3_station_data.items():
if station in usaf_station_lat_lngs:
if station in accepted_near_usaf_station_lat_lngs:
accepted_near_tmy3_station_lat_lngs[station] = usaf_station_lat_lngs[station]
else:
rejected_far_tmy3_station_lat_lngs[station] = usaf_station_lat_lngs[station]
else:
unknown_tmy3_stations.add(station)
In [52]:
# Take only TMY3 stations that match to USAF stations - 0% unknown
n_unknown = len(unknown_tmy3_stations)
n_rejected = len(rejected_far_tmy3_station_lat_lngs)
n_accepted = len(accepted_near_tmy3_station_lat_lngs)
n_total = len(tmy3_station_data)
print "Unknown: {} ({:.1f}%)".format(n_unknown, float(n_unknown) / n_total * 100.)
print "Accepted: {} ({:.1f}%)".format(n_accepted, float(n_accepted) / n_total * 100.)
print "Rejected: {} ({:.1f}%)".format(n_rejected, float(n_rejected) / n_total * 100.)
print "Total: {}".format(n_total)
In [53]:
lats, lons = zip(*accepted_near_tmy3_station_lat_lngs.values())
title = "Accepted TMY3 weather stations".format(len(lats))
map_points(lats, lons, title)
In [54]:
lats, lons = zip(*rejected_far_tmy3_station_lat_lngs.values())
title = "Rejected TMY3 weather stations".format(len(lats))
map_points(lats, lons, title)
In [55]:
# climate zone to station list mapping
climate_zone_to_usaf_stations = defaultdict(list)
for station, climate_zone in filtered_station_to_climate_zone.items():
assert climate_zone is not None
climate_zone_to_usaf_stations[climate_zone].append(station)
climate_zone_to_usaf_stations = dict(climate_zone_to_usaf_stations)
print list(climate_zone_to_usaf_stations.items())[:2]
In [56]:
for cz, stations in sorted(climate_zone_to_usaf_stations.items(), key=lambda x: x[0]):
lats, lons = zip(*[usaf_station_lat_lngs[s] for s in stations])
title = "Climate Zone {} USAF stations (n={})".format(cz, len(lats))
map_points(lats, lons, title)
In [57]:
# climate zone to station list mapping
climate_zone_to_tmy3_stations = defaultdict(list)
for station, climate_zone in filtered_station_to_climate_zone.items():
if station in accepted_near_tmy3_station_lat_lngs:
assert climate_zone is not None
climate_zone_to_tmy3_stations[climate_zone].append(station)
climate_zone_to_tmy3_stations = dict(climate_zone_to_tmy3_stations)
print list(climate_zone_to_tmy3_stations.items())[:2]
In [58]:
empty_climate_zones = [cz for cz in climate_zone_to_usaf_stations.keys() if climate_zone_to_tmy3_stations.get(cz) is None]
empty_climate_zones
Out[58]:
In [59]:
geod = pyproj.Geod(ellps='WGS84')
cutoff = 25 # km
climate_zone_tmy3_station_updates = {}
min_dists = []
accepted_near_tmy3_station_lat_lngs_items = accepted_near_tmy3_station_lat_lngs.items()
lats2, lngs2 = zip(*[p for z, p in accepted_near_tmy3_station_lat_lngs_items])
lats2, lngs2 = np.array(lats2), np.array(lngs2)
for empty_climate_zone in empty_climate_zones:
min_dists = []
min_stations = []
for usaf_station in climate_zone_to_usaf_stations[empty_climate_zone]:
lat1, lng2 = usaf_station_lat_lngs[usaf_station]
lats1, lngs1 = np.tile(lat1, lats2.shape), np.tile(lng1, lngs2.shape)
dists = geod.inv(lngs1, lats1, lngs2, lats2)[2] / 1000. # km
min_dist_i = np.argmin(dists)
min_dist = dists[min_dist_i]
min_dists.append(min_dist)
min_station = accepted_near_tmy3_station_lat_lngs_items[min_dist_i][0]
min_stations.append(min_station)
if not all(min_stations[0] == s for s in min_stations):
print "Not all close to single TMY3 station"
climate_zone_tmy3_station_updates[empty_climate_zone] = [min_stations[0]]
In [60]:
climate_zone_tmy3_station_updates
Out[60]:
In [61]:
# remove if exist somewhere else:
for cz, stations in climate_zone_to_tmy3_stations.items():
for cz_update, station_update in climate_zone_tmy3_station_updates.items():
if station_update[0] in stations:
stations.remove(station_update[0])
In [62]:
climate_zone_to_tmy3_stations.update(climate_zone_tmy3_station_updates)
In [63]:
empty_climate_zones = [cz for cz in climate_zone_to_usaf_stations.keys() if climate_zone_to_tmy3_stations.get(cz) is None]
empty_climate_zones
Out[63]:
In [64]:
for cz, stations in sorted(climate_zone_to_tmy3_stations.items(), key=lambda x: x[0]):
lats, lons = zip(*[usaf_station_lat_lngs[s] for s in stations])
title = "Climate Zone {} TMY3 stations (n={})".format(cz, len(lats))
map_points(lats, lons, title)
Map stations to zipcodes by looking, if possible, for the closest weather station within the same climate zone. If not within a climate zone, just pick the station which is closest overall.
In [65]:
# zipcode to usaf station mapping
zipcode_to_usaf_station = {}
all_stations = accepted_near_usaf_station_lat_lngs.keys()
for i, (zipcode, (lat1, lng1)) in enumerate(zipcode_centroid_lat_lngs.items()):
print("\r{} of {}".format(i + 1, n_zipcodes)),
# get set of stations to compare for distance.
climate_zone = zipcode_to_climate_zone[zipcode]
assert climate_zone is not None
stations = climate_zone_to_usaf_stations[climate_zone]
# find minimum distance
lats2, lngs2 = zip(*[usaf_station_lat_lngs[s] for s in stations])
lats2, lngs2 = np.array(lats2), np.array(lngs2)
lats1, lngs1 = np.tile(lat1, lats2.shape), np.tile(lng1, lngs2.shape)
dists = geod.inv(lngs1, lats1, lngs2, lats2)[2] # meters
min_station = stations[np.argmin(dists)]
zipcode_to_usaf_station[zipcode] = min_station
In [66]:
# zipcode to tmy3 station mapping
zipcode_to_tmy3_station = {}
all_stations = accepted_near_tmy3_station_lat_lngs.keys()
for i, (zipcode, (lat1, lng1)) in enumerate(zipcode_centroid_lat_lngs.items()):
print("\r{} of {}".format(i + 1, n_zipcodes)),
# get set of stations to compare for distance.
climate_zone = zipcode_to_climate_zone[zipcode]
assert climate_zone is not None
stations = climate_zone_to_tmy3_stations[climate_zone]
# find minimum distance
lats2, lngs2 = zip(*[usaf_station_lat_lngs[s] for s in stations])
lats2, lngs2 = np.array(lats2), np.array(lngs2)
lats1, lngs1 = np.tile(lat1, lats2.shape), np.tile(lng1, lngs2.shape)
dists = geod.inv(lngs1, lats1, lngs2, lats2)[2] # meters
min_station = stations[np.argmin(dists)]
zipcode_to_tmy3_station[zipcode] = min_station
Make some other JSON products that may be useful.
In [67]:
# climate zone to zipcode list mapping
climate_zone_to_zipcodes = defaultdict(list)
for zipcode, climate_zone in zipcode_to_climate_zone.items():
if climate_zone is not None:
climate_zone_to_zipcodes[climate_zone].append(zipcode)
climate_zone_to_zipcodes = dict(climate_zone_to_zipcodes)
In [68]:
# usaf station to zipcode list mapping
usaf_station_to_zipcodes = defaultdict(list)
for zipcode, station in zipcode_to_usaf_station.items():
if station is not None:
usaf_station_to_zipcodes[station].append(zipcode)
usaf_station_to_zipcodes = dict(usaf_station_to_zipcodes)
In [69]:
# tmy3 station to zipcode list mapping
tmy3_station_to_zipcodes = defaultdict(list)
for zipcode, station in zipcode_to_tmy3_station.items():
if station is not None:
tmy3_station_to_zipcodes[station].append(zipcode)
tmy3_station_to_zipcodes = dict(tmy3_station_to_zipcodes)
In [70]:
# usaf station to climate zone mapping
usaf_station_to_climate_zone = {}
for climate_zone, stations in climate_zone_to_usaf_stations.items():
for station in stations:
usaf_station_to_climate_zone[station] = climate_zone
In [71]:
# tmy3 station to climate zone mapping
tmy3_station_to_climate_zone = {}
for climate_zone, stations in climate_zone_to_tmy3_stations.items():
for station in stations:
tmy3_station_to_climate_zone[station] = climate_zone
In [72]:
# write all outputs:
!mkdir -p outputs
### USAF station -> X
# USAF station -> lat lng
with open('outputs/usaf_station_lat_lngs.json', 'w') as f:
json.dump(accepted_near_usaf_station_lat_lngs, f)
# USAF station -> zipcodes
with open('outputs/usaf_station_zipcodes.json', 'w') as f:
json.dump(usaf_station_to_zipcodes, f)
# USAF station -> climate_zone
with open('outputs/usaf_station_climate_zone.json', 'w') as f:
json.dump(usaf_station_to_climate_zone, f)
# supported USAF stations
with open('outputs/supported_usaf_stations.json', 'w') as f:
json.dump(sorted(accepted_near_usaf_station_lat_lngs.keys()), f)
### TMY3 station -> X
# TMY3 station -> lat lng
with open('outputs/tmy3_station_lat_lngs.json', 'w') as f:
json.dump(accepted_near_tmy3_station_lat_lngs, f)
# TMY3 station -> zipcodes
with open('outputs/tmy3_station_zipcodes.json', 'w') as f:
json.dump(tmy3_station_to_zipcodes, f)
# TMY3 station -> climate_zone
with open('outputs/tmy3_station_climate_zone.json', 'w') as f:
json.dump(tmy3_station_to_climate_zone, f)
# supported TMY3 stations
with open('outputs/supported_tmy3_stations.json', 'w') as f:
json.dump(sorted(accepted_near_tmy3_station_lat_lngs.keys()), f)
### Zipcode -> X
# zipcode -> lat long
with open('outputs/zipcode_centroid_lat_lngs.json', 'w') as f:
json.dump(zipcode_centroid_lat_lngs, f)
# zipcode -> climate_zone
with open('outputs/zipcode_climate_zone.json', 'w') as f:
json.dump(zipcode_to_climate_zone, f)
# zipcode -> USAF station
with open('outputs/zipcode_usaf_station.json', 'w') as f:
json.dump(zipcode_to_usaf_station, f)
# zipcode -> TMY3 station
with open('outputs/zipcode_tmy3_station.json', 'w') as f:
json.dump(zipcode_to_tmy3_station, f)
# supported ZIP codes (ZCTA only)
with open('outputs/supported_zipcodes.json', 'w') as f:
json.dump(sorted(zipcode_centroid_lat_lngs.keys()), f)
### climate zone -> X
# climate_zone -> USAF stations
with open('outputs/climate_zone_usaf_stations.json', 'w') as f:
json.dump(climate_zone_to_usaf_stations, f)
# climate_zone -> stations
with open('outputs/climate_zone_tmy3_stations.json', 'w') as f:
json.dump(climate_zone_to_tmy3_stations, f)
# climate_zone -> zipcodes
with open('outputs/climate_zone_zipcodes.json', 'w') as f:
json.dump(climate_zone_to_zipcodes, f)
# supported Climate Zones
with open('outputs/supported_climate_zones.json', 'w') as f:
json.dump(sorted(climate_zone_to_zipcodes.keys()), f)
# OTHER
with open('outputs/usaf_station_inventory_data.json', 'w') as f:
json.dump({s: list(d) for s, d in usaf_station_inventory_data.items()}, f)
with open('outputs/tmy3_station_metadata.json', 'w') as f:
json.dump(tmy3_station_data, f)
with open('outputs/GSOD-ISD_station_index.json', 'w') as f:
json.dump(isd_index, f)