In [ ]:
from planet4 import region_data, io, markings
from pathlib import Path
In [ ]:
root = io.analysis_folder() / 'inca_s23_0.5cut_applied/'
In [ ]:
fan_fnames = list(root.glob("*_fans_latlons.csv"))
In [ ]:
blotch_fnames = list(root.glob("*_blotches_latlons.csv"))
In [ ]:
from planet4 import markings
In [ ]:
def get_marking(line, cut=0.8):
data = []
for marking in ['fan_','blotch_']:
m = line[line.index.str.startswith(marking)]
data.append(m.rename_axis(lambda x: x[x.index('_')+1:]))
fnotch = markings.Fnotch(line.fnotch_value, data[0], data[1])
return fnotch.get_marking(cut)
In [ ]:
def get_final_markings_counts(img_name, cut=0.5):
# writing in dictionary here b/c later I convert it to pd.DataFrame
# for which a dictionary is a natural input format
d = {}
d['obsid'] = img_name
blotch_fname = root / '{}_blotches_latlons.csv'.format(img_name)
d['n_blotches'] = len(pd.read_csv(str(blotch_fname)))
fan_fname = root / '{}_fans_latlons.csv'.format(img_name)
d['n_fans'] = len(pd.read_csv(str(fan_fname)))
return d
In [ ]:
get_final_markings_counts('ESP_020115_0985')
In [ ]:
from nbtools import ListProgressBar
In [ ]:
results = []
progbar = ListProgressBar(region_data.Inca.season2)
for img_name in region_data.Inca.season2:
progbar.value = img_name
try:
results.append(get_final_markings_counts(img_name))
except OSError:
continue
season2 = pd.DataFrame(results).sort_values(by='obsid')
In [ ]:
results = []
progbar = ListProgressBar(region_data.Inca.season3)
for img_name in region_data.Inca.season3:
progbar.value = img_name
try:
results.append(get_final_markings_counts(img_name))
except OSError:
continue
season3 = pd.DataFrame(results).sort_values(by='obsid')
In [ ]:
season2.head()
In [ ]:
season2.head()
In [ ]:
season2_meta = pd.read_csv(io.analysis_folder() / 'inca_season2_metadata.csv')
# dropping the label path here as it's not required
# if it is, delete this line.
season2_meta.drop('path', axis=1, inplace=True)
In [ ]:
season2_meta.head()
In [ ]:
season2 = season2.merge(season2_meta, on='obsid')
In [ ]:
season2.head()
In [ ]:
path = io.analysis_folder() / 'inca_season3_metadata.csv'
season3_meta = pd.read_csv(path)
In [ ]:
season3 = season3.merge(season3_meta, on='obsid')
In [ ]:
season2.set_index('l_s', inplace=True)
season3.set_index('l_s', inplace=True)
In [ ]:
season2['both'] = season2.n_blotches + season2.n_fans
season3['both'] = season3.n_blotches + season3.n_fans
In [ ]:
season2
In [ ]:
season2['scaled'] = season2.both / season2.line_samples #/ season2.binning
season3['scaled'] = season3.both / season3.line_samples #/ season3.binning
In [ ]:
%matplotlib inline
season2.scaled.plot(style='*', ms=14,
xlim=(season3.index.min(), season3.index.max()), label='season2')
season3.scaled.plot(style='*', color='red', label='season3', ms=14)
plt.legend(loc='best')
plt.ylabel('Scaled prevalence of markings')
plt.title("Number of markings in Inca City region,scaled for binning and image size.")
#plt.savefig('/Users/klay6683/Desktop/inca_s23.pdf')
In [ ]:
map_proj_metadata = pd.read_csv('/Users/Anya/Dropbox/myPy/IC_season2_LineSampleScale.csv')
In [ ]:
map_proj_metadata['area_km'] = 1e-6*map_proj_metadata.lines * \
map_proj_metadata.samples*(map_proj_metadata.map_scale)**2
map_proj_metadata.set_index('id', inplace=True)
In [ ]:
season2.set_index('obsid', inplace=True)
In [ ]:
map_proj_metadata['scaled_nr_fans'] = season2.n_fans / map_proj_metadata.area_km
map_proj_metadata['scaled_nr_blos'] = season2.n_blotches / map_proj_metadata.area_km
map_proj_metadata['scaled_nr_both'] = season2.both / map_proj_metadata.area_km
In [ ]:
map_proj_metadata.set_index('l_s', inplace=True)
In [ ]:
map_proj_metadata.scaled_nr_fans.plot(style='*', ms=14,
xlim=(season3.index.min(), season3.index.max()), label='fans ')
map_proj_metadata.scaled_nr_blos.plot(style='.', color='red', label='blotches', ms=14)
map_proj_metadata.scaled_nr_both.plot(style='*', color='green', label='both', ms=14)
plt.legend(loc='best')
plt.ylabel('Scaled prevalence of markings')
plt.title("Number of markings in Inca City region,season 2, scaled for binning and image size.")
In [ ]:
season2.n_blotches
In [ ]:
fan_fnames_hdf = list(root.glob("*_fans.hdf"))
blotch_fnames_hdf = list(root.glob("*_blotches.hdf"))
In [ ]:
n = 8
bc = markings.BlotchContainer.from_fname(blotch_fnames_hdf[n])
fc = markings.FanContainer.from_fname(fan_fnames_hdf[n])
# bc.content is list of Blotch objects
all_bl_areas = np.array([obj.area for obj in bc.content])
all_fan_areas = np.array([obj.area for obj in fc.content])
In [ ]:
# some_other_stuff = [some other stuff]
# if above lists have same length then u can do
# df = pd.DataFrame({'areas':all_areas,
# 'name2':some_other_stuff})
In [ ]:
blotch_fnames[n], fan_fnames[n]
In [ ]:
#pd.read_csv(str(blotch_fnames[n]))
In [ ]:
min_bl = np.nanmin(all_bl_areas)
min_fan = np.nanmin(all_fan_areas)
print(min_bl, np.nanmax(all_bl_areas), len(all_bl_areas))
print(min_fan, np.nanmax(all_fan_areas), len(all_fan_areas))
all_fan_areas[all_fan_areas == np.nan] = all_fan_areas.max() +1
In [ ]:
fh = plt.hist(all_fan_areas, bins = 200, range = (min_fan, np.nanmax(all_fan_areas)), alpha=0.75, color = 'red')
bh = plt.hist(all_bl_areas, bins = 200, range = (min_bl, min_bl+1e3), alpha=0.75, color = 'blue')
#fh = plt.hist(all_fan_areas, 200)
In [ ]:
fh = plt.hist(all_fan_areas, bins = 200, range = (min_fan, 5e3), normed=True)
In [ ]:
bh[1][:4]
In [ ]: