In [1]:
%pylab notebook
from charistools.readers import read_tile
import glob
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import re
pd.options.display.max_rows = 200
In [ ]:
#basin_version = "IN_v01"
#basin_version = "SY"
#basin_version = "BR"
#basin_version = "GA_v01"
basin_version = "SY_v01"
basin_dir = "%s_snow_subs_basins_modis_masks" % basin_version
%cd /Users/brodzik/projects/CHARIS/basins/subbasins_of_major_basins/
%cd $basin_dir
In [ ]:
list = glob.glob("%s_OBJECTID*tif" % basin_version)
list = sort(list)
list
In [ ]:
list.shape
In [ ]:
# Count how many unique basins there are:
basins = [re.search('.._(OBJECTID[0-9]{1,3})', file).group(1) for file in sort(list)]
print(len(basins))
uniq_basins = set(basins)
print(len(uniq_basins))
In [2]:
def display_basin_mask(file):
data = read_tile(filename=file)
fig, ax = plt.subplots()
print(np.amin(data), np.amax(data))
ax.imshow(data, cmap='Greys', vmin=np.amin(data), vmax=np.amax(data), interpolation='None')
ax.set_title(file)
plt.axis('off')
In [ ]:
data = read_tile(filename="GA_Langtang_at_Kyanjin.basin_mask.h25v06.tif")
w2 = data[data == 2]
w2.shape[0] * 463. * 463. / (1000. * 1000.)
#%ls
display_basin_mask(filename)
In [ ]:
for file in sort(list):
display_basin_mask(file)
#plt.close('all')
In [ ]:
df = pd.DataFrame(list, columns = ["File"])
df["basename"] = [re.search('(.+_OBJECTID[0-9]{1,3})', file).group(1) for file in df["File"]]
df["tileID"] = [re.search('(h[0-9]{2}v[0-9]{2})', file).group(1) for file in df["File"]]
df
In [ ]:
print(set(df["tileID"]))
In [ ]:
# Count the number of tiles per basin
basins = df.groupby(['basename'], as_index=False).count().sort_values(by=["File", "basename"])
basins.drop('tileID', axis=1, inplace=True)
basins.columns = ['basename', 'num_tiles']
basins
In [ ]:
# Get the basin prefix (basically the 2-letter ID) for output files
basin_prefix = re.search('(..)_', basins["basename"][0]).group(1)
for num_tiles in set(basins["num_tiles"]):
out_filename = "%s_%dtile_basins.txt" % (basin_prefix, num_tiles)
print("out file: ", out_filename)
out_fh = open(out_filename, 'w')
for subbasin in basins[basins["num_tiles"] == num_tiles]["basename"].values:
# Get the list of tileIDs for this basin
out_fh.write("%s\n" % subbasin)
out_fh.close()
In [ ]:
%cat GA_4tile_basins.txt
In [ ]:
%cat SY_3tile_basins.txt
In [ ]:
%cat SY_2tile_basins.txt
In [ ]:
%cat SY_1tile_basins.txt
In [ ]:
# Special processing if we need to separate out basins that intersect only certain tiles
# Get the basin prefix (basically the 2-letter ID) for output files
basin_prefix = re.search('(..)_', basins["basename"][0]).group(1)
for num_tiles in set(basins["num_tiles"]):
wh23_4_filename = "%s_%dtile_basins_with_h23-4.txt" % (basin_prefix, num_tiles)
other_filename = "%s_%dtile_basins_other.txt" % (basin_prefix, num_tiles)
print("next file: ", wh23_4_filename)
print("other file: ", other_filename)
h23_fh = open(wh23_4_filename, 'w')
other_fh = open(other_filename, 'w')
for subbasin in basins[basins["num_tiles"] == num_tiles]["basename"].values:
# Get the list of tileIDs for this basin
# Skip this basin if tileIDs include anything other than: [h23v05 h24v05]
h23_4_basin = True
for tileID in df.loc[df['basename'] == subbasin]["tileID"].values:
if re.search('(h2[34]v05)', tileID):
print("have this one")
else:
print("Don't have tileID=%s, need to skip basin=%s" % (tileID, subbasin))
h23_4_basin=False
if h23_4_basin:
print("H23-4 BASIN: %s: %s" % (subbasin, ' '.join(df.loc[df['basename'] == subbasin]["tileID"].values)))
h23_fh.write("%s\n" % subbasin)
else:
print("OTHER BASIN: %s: %s" % (subbasin, ' '.join(df.loc[df['basename'] == subbasin]["tileID"].values)))
other_fh.write("%s\n" % subbasin)
h23_fh.close()
other_fh.close()
In [ ]:
%cat IN_2tile_basins_with_h23-4.txt
In [ ]:
%cat IN_2tile_basins_other.txt
In [ ]:
%cat IN_1tile_basins_with_h23-4.txt
In [ ]:
%cat IN_1tile_basins_other.txt
In [ ]: