In [ ]:
"""
Convert centroids to meshgrid
https://jakevdp.github.io/PythonDataScienceHandbook/04.13-geographic-data-with-basemap.html
http://bagrow.com/dsv/heatmap_basemap.html
"""
"""
Fast Geospatial analytics with dask and geopandas
http://matthewrocklin.com/blog/work/2017/09/21/accelerating-geopandas-1
"""
In [1]:
import ogh
In [4]:
type(ogh.ogh_meta())
Out[4]:
In [30]:
len(dict(ogh.ogh_meta()))>0
Out[30]:
In [11]:
path ='test.json'
obj = dict()
assert type(path) is str
assert type(obj) is dict
#ogh.saveDictOfDf(outfilepath=path, dictionaryObject=obj)
In [33]:
import pandas as pd
In [34]:
test = ogh.readShapefileTable(shapefile='/home/jovyan/work/Observatory-1/tests/data/shape.shp')
if type(test) is pd.DataFrame():
assert True
In [39]:
import fiona
In [48]:
import geopandas as gpd
import numpy as np
path='/home/jovyan/work/Observatory-1/tests/data/shape.shp'
test_path = gpd.read_file(path)
lon, lat= np.array(test_path.centroid[0])
lon
Out[48]:
In [58]:
ogh.filterPointsinShape(shape=test_poly.geometry[0], points_lat=[test_lat], points_lon=[test_lon], points_elev=None, buffer_distance=0.06,
buffer_resolution=16, labels=['LAT', 'LONG_', 'ELEV'])
Out[58]:
In [51]:
inpath='tests/data/shape.shp'
outpath='tests/data/test_mappingfile.csv'
test_poly=gpd.read_file('/home/jovyan/work/Observatory-1/'+inpath)
test_lon, test_lat= np.array(test_poly.centroid[0])
print(test_lon)
In [ ]:
def test_mapToBlock():
listofpoints = [point.Point([0,0])]
listofregions =
def mapToBlock(df_points, df_regions):
"""
Map block membership for each coordinate point
df_points: (dataframe) a dataframe containing the lat and long for each time-series datafile
dr_regions: (dataframe) a dataframe containing the bounding box (bbox) for each block cluster
"""
for index, eachblock in df_regions.iterrows():
for ind, row in df_points.iterrows():
if point.Point(row['LONG_'], row['LAT']).intersects(eachblock['bbox']):
df_points.loc[ind, 'blocks'] = str(eachblock['dirname'])
return(df_points)
In [2]:
# initialize ogh_meta
meta_file = dict(ogh.ogh_meta())
sorted(meta_file.keys())
Out[2]:
In [35]:
obj1 = pd.DataFrame({'foo':['a','b'],'bar':[1,2]})
obj1
Out[35]:
In [36]:
import os
In [37]:
os.path.exists('/home/jovyan/work/Observatory-1/tests/data/shape.shp')
Out[37]:
In [ ]:
"""
unit test: ftp source
"""
import ftplib
html = 'ftp://livnehpublicstorage.colorado.edu/public/Livneh.2013.CONUS.Dataset/Fluxes.asc.v.1.2.1915.2011.bz2/fluxes.100.95.25.36/VIC_fluxes_Livneh_CONUSExt_v.1.2_2013_25.90625_-97.40625.bz2'
def ftp_cataloging(html):
domain = html.replace('ftp://','').split('/',1)[0]
subdomain = html.replace('ftp://','').split('/',1)[1].rsplit('/',1)[0]
filename = html.rsplit('/',1)[1]
ftp=ftplib.FTP(domain)
ftp.login()
ftp.cwd(subdomain)
existence=len(ftp.nlst(filename))==1
ftp.close()
return(existence)
ipaddress = html.replace('ftp://','').split('/',1)[0]
path = html.replace('ftp://','').split('/',1)[1].rsplit('/',1)[0]
filename = html.rsplit('/',1)[1]
fakename = filename+str(1)
ftp=ftplib.FTP(ipaddress)
ftp.login()
ftp.cwd(path)
if len(ftp.nlst(filename))==1:
assert True
if len(ftp.nlst(fakename))==0:
assert True
In [ ]:
In [ ]:
# cataloging the existence of target ftp files
mfs=[]
for shp, filename in zip([sauk, elwha, riosalado],
['Sauk_mappingfile.csv','Elwha_mappingfile.csv','RioSalado_mappingfile.csv']):
obj=dask.delayed(ogh.treatgeoself)(shapefile=shp, NAmer=NAmer, buffer_distance=0.06,
mappingfile=os.path.join(os.getcwd(),filename))
mfs.append(obj)
dask.visualize(mfs)
import ftplib
def ftp_cataloging(html):
domain = html.replace('ftp://','').split('/',1)[0]
subdomain = html.replace('ftp://','').split('/',1)[1].rsplit('/',1)[0]
filename = html.rsplit('/',1)[1]
ftp=ftplib.FTP(domain)
ftp.login()
ftp.cwd(subdomain)
existence=len(ftp.nlst(filename))==1
ftp.close()
return(existence)
# generate table of lats and long coordinates
maptable = pd.read_csv(mappingfile3)
# compile the longitude and latitude points
locations = ogh.compile_dailyMET_Livneh2013_locations(maptable)
output = [dask.delayed(ftp_cataloging)(html) for html in locations]
print(output)
output2 = dask.compute(output)
output2
# Download the files
# ftp_download_p(locations)
In [ ]:
# ogh.getDailyMET_livneh2013(homedir, mappingfile2)
# ogh.getDailyMET_bcLivneh2013(homedir, mappingfile2)
# ogh.getDailyMET_livneh2015(homedir, mappingfile2)
# ogh.getDailyVIC_livneh2013(homedir, mappingfile2)
# ogh.getDailyVIC_livneh2015(homedir, mappingfile2)
# ogh.getDailyWRF_salathe2014(homedir, mappingfile2)
# ogh.getDailyWRF_bcsalathe2014(homedir, mappingfile2)
In [ ]:
"""
All watersheds
shape_path: (dir) the path to the watershed ESRI shapefile without the .shp suffix
espg: (int) the epsg map projection
polygon_color: (str) the name or single-letter code for the color of the watershed polygons
map_margins: (float) the multiplier for the map margins outside of the watershed bounding box
"""
shape_path = os.path.join(homedir, 'allwatersheds')
epsg = 3857
polygon_color = 'm' # magenta
map_margin = 0.25 # 25% of width and height dimensions
mapscaleloc = 'botleft'
# generate the figure axis
fig = plt.figure(figsize=(3,3), dpi=500)
ax1 = plt.subplot2grid((1,1),(0,0))
# normalize the color distribution according to the value distribution
cmap = mpl.cm.gnuplot2
# calculate bounding box based on the watershed shapefile
watershed = fiona.open(shape_path+'.shp')
minx, miny, maxx, maxy = watershed.bounds
w, h = maxx - minx, maxy - miny
watershed.close()
# generate basemap
m = Basemap(projection='merc', epsg=epsg, resolution='h', ax=ax1,
llcrnrlon=minx - map_margin*w, llcrnrlat=miny - map_margin*h,
urcrnrlon=maxx + map_margin*w, urcrnrlat=maxy + map_margin*h)
# affix boundaries
m.drawcountries(linewidth=0.1)
m.drawcoastlines(linewidth=0.1)
m.drawmapboundary(fill_color='lightskyblue')
m.fillcontinents(color='cornsilk', lake_color='lightskyblue')
m.drawrivers(color='lightskyblue', linewidth=0.1)
m.drawstates(linewidth=0.1, linestyle='solid', color='gray')
m.drawcountries(color='gray', linewidth=0.1)
# draw distance scale
# length = round(w*50,-2)
# yoffset = w*1000
# m.drawmapscale(minx, miny-0.5*map_margin*h, maxx, maxy, length, yoffset=yoffset, barstyle='fancy', fontsize=3, linewidth=0)
m.drawmapscale(minx, miny, maxx, maxy, 500, yoffset=35000, barstyle='fancy', fontsize=3, linewidth=0)
# draw cardinal markers
m.drawparallels(np.arange(10,70,10),labels=[1,0,0,0], fontsize=3, color='black', linewidth=0.5)
m.drawmeridians(np.arange(-160,0,10),labels=[0,0,1,0], fontsize=3, color='black', linewidth=0.5)
# read and transform the watershed shapefiles
m.readshapefile(shapefile = shape_path, name='allwatersheds', linewidth=0)
# load and transform each polygon in shape
patches = [PolygonPatch(Polygon(np.array(shape)), fc=polygon_color, ec=polygon_color, linewidth=0.1, zorder=5.0)
for info, shape in zip(m.allwatersheds_info, m.allwatersheds)]
# assimilate shapes to plot axis
coll = PatchCollection(patches, cmap=cmap, match_original=True, zorder=5.0)
ax1.add_collection(coll)
plt.savefig(os.path.join(homedir, 'statemap.png'), dpi=500)
plt.show()
In [ ]:
%%time
import tarfile
def tar_tree(tarfilename, source_dir):
with tarfile.open(tarfilename, "w:gz") as tar:
tar.add(source_dir, arcname=os.path.basename(source_dir))
tar.close()
return(tarfilename)
tarlist=[]
for folder in ['livneh2013', 'livneh2015','salathe2014']:
sourcepath = os.path.join(homedir, folder)
outfilename = sourcepath+'.tar.gz'
tar = dask.delayed(tar_tree)(tarfilename=outfilename, source_dir=sourcepath)
tarlist.append(tar)
dask.compute(tarlist)