In [1]:
%load_ext autoreload
%autoreload 2
In [46]:
import os
import matplotlib.pyplot as plt
import json
import pandas as pd
%matplotlib notebook
from pygeotools.lib import iolib, timelib, malib, geolib
from imview.lib import pltlib
import numpy as np
import glob
import multiprocessing as mp
In [49]:
source = 'WV'
round2 = False
if source == 'WV':
topdir = '/nobackupp8/deshean/hma/dem_coreg'
globstr = '*track/2*align/*align_stats.json'
if round2:
globstr = '*track/2*align/2*align/*align_stats.json'
bad_globstr = '*track/%s/2*align/*align_stats.json'
elif source =='ASTER':
topdir = '/nobackupp8/deshean/hma/aster/dsm'
globstr = '2*/AST*align/*align_stats.json'
bad_globstr = '%s/AST*align/*align_stats.json'
if round2:
globstr = '2*/AST*align/AST*align/*align_stats.json'
#Directory for bad align files
baddir = os.path.join(topdir, 'dem_align_%s_bad' % source)
if round2:
baddir += '_round2'
if not os.path.exists(baddir): os.makedirs(baddir)
prefix = os.path.join(topdir, 'dem_align_%s' % source)
if round2:
prefix += '_round2'
json_fn_list = glob.glob(os.path.join(topdir, globstr))
#This was a hack to reintegrate the bad files for analysis/plotting
#if not round2:
# json_fn_list.extend(glob.glob(os.path.join(topdir, bad_globstr % os.path.split(baddir)[-1])))
In [50]:
len(json_fn_list)
Out[50]:
See nyc-taxi example from dask pd website, use dask to load all *json into df, rather than mp.Pool
In [51]:
#Function to parse json
def parse_json(json_fn):
with open(json_fn, 'r') as f:
d = json.load(f)
#dt = timelib.fn_getdatetime(os.path.split(d['src_fn'])[-1][0:12])
dt = timelib.fn_getdatetime(os.path.split(d['src_fn'])[-1])
#Check for nmad - this was updated from 'mad' recently
#if 'nmad' not in d['before_filt'].keys():
# print(json_fn)
out = [dt, d['src_fn'], d['align_fn'], \
d['shift']['dx'], d['shift']['dy'], d['shift']['dz'], d['shift']['dm'], \
d['center_coord']['x'], d['center_coord']['y'], \
d['before_filt']['med'], d['before_filt']['nmad'], \
d['before_filt']['p16'], d['before_filt']['p84'], d['before_filt']['spread'], \
d['after_filt']['med'], d['after_filt']['nmad'], \
d['after_filt']['p16'], d['after_filt']['p84'], d['after_filt']['spread']]
return out
#Parallel read
pool = mp.Pool()
#out = [pool.apply(parse_json, args=(json_fn,)) for json_fn in json_fn_list]
out = pool.map(parse_json, json_fn_list)
#out = []
#for i,json_fn in enumerate(json_fn_list):
# out.append(parse_json(json_fn))
#Can read directly in pandas, but need to reformat
#df = pd.read_json(json_fn, orient='index')
In [52]:
#outa = np.array(out)
columns=['dt','src_fn', 'align_fn', 'dx', 'dy', 'dz', 'dm', 'cx', 'cy', \
'med_before', 'nmad_before', 'p16_before', 'p84_before', 'spread_before', \
'med_after', 'nmad_after', 'p16_after', 'p84_after', 'spread_after']
df = pd.DataFrame(out, columns=columns)
df = df.sort_values(by='dt')
#df = pd.DataFrame(outa, index=outa[:,0], columns=['dt','src_fn','med_before', 'p16_before', 'p84_before', 'med_after', 'p16_after', 'p84_after'])
#df = df.sort_index()
In [53]:
df.columns
Out[53]:
In [54]:
df.mean(), df.median(), df.std()
Out[54]:
In [55]:
med_ax = df.plot('dt', ['med_before', 'med_after', 'dm'], ls='none', marker='.', ms=1)
med_ax.set_ylabel('Median DEM error (m)')
med_ax.set_xlim(df['dt'].min(), df['dt'].max())
#med_ax.set_ylim(-100,100)
Out[55]:
In [56]:
C = 3.0
def outlier_filter(df, col=None, C=3.0, perc=None, absdiff=None):
df_mean = df[col].mean()
#df_mean = df[col].median()
if absdiff is not None:
minval = df_mean - absdiff
maxval = df_mean + absdiff
elif perc is not None:
minval, maxval = malib.calcperc(df[col], perc=perc)
else:
df_std = df[col].std()
#df_std = malib.mad(df[col])
minval = df_mean - C * df_std
maxval = df_mean + C * df_std
idx = (df[col] < minval) | (df[col] > maxval)
print("Removing outliers: %s, (%0.2f to %0.2f), %i" % (col,minval,maxval,idx.sum()))
#return df[~idx]
return idx
In [57]:
dm_idx = False
med_after_idx = False
spread_idx = False
p16_idx = False
p84_idx = False
med_idx = False
nmad_idx = False
if source == 'WV':
#WV/GE
print(source, 'round1')
max_dm = 20
ylim=(-20, 20)
spread_idx = outlier_filter(df, col='spread_after', perc=(0.0, 99.9))
p16_idx = outlier_filter(df, col='p16_after', perc=(2, 100.0))
p84_idx = outlier_filter(df, col='p84_after', perc=(0.0, 98))
elif source == 'ASTER':
#ASTER
ylim=(-60, 60)
max_dm = 60
if round2:
print(source, 'round2')
spread_idx = outlier_filter(df, col='spread_after', perc=(0.0, 99.95))
p16_idx = outlier_filter(df, col='p16_after', perc=(0.05, 99.9))
p84_idx = outlier_filter(df, col='p84_after', perc=(0.01, 99.9))
else:
print(source, 'round1')
spread_idx = outlier_filter(df, col='spread_after')
p16_idx = outlier_filter(df, col='p16_after', perc=(0.5, 100.0))
p84_idx = outlier_filter(df, col='p84_after', perc=(0.0, 99.5))
med_after_idx = outlier_filter(df, col='med_after', absdiff=1)
#dm_idx = outlier_filter(df, col='dm', absdiff=max_dm)
dm_idx = (df['dm'] > max_dm)
print("Removing outliers, dm thresh (%0.2f m): %i" % (max_dm, dm_idx.sum()))
med_idx = (df['med_after'].abs() > 1.1*df['med_before'].abs())
print("Removing outliers, med increase: %i" % med_idx.sum())
#Check if nmad after is larger than before
nmad_idx = (df['nmad_after'] > 1.1*df['nmad_before'])
print("Removing outliers, nmad increase: %i" % nmad_idx.sum())
#idx = dm_idx | med_after_idx | spread_idx
idx = dm_idx | med_after_idx | spread_idx | p16_idx | p84_idx | med_idx | nmad_idx
print(idx.sum(), df.shape[0], '%0.2f%%' % (100*idx.sum()/df.shape[0]))
print(df.shape)
df_filt = df[~idx]
print(df_filt.shape)
In [58]:
#Hack, as the default pandas dt format won't work with the vectorized np_dt2decyear
#dt = timelib.dt2o(df['dt'])
#dt = outa[:,0]
#dt = timelib.np_dt2decyear(df['dt'])
dt = timelib.np_dt2decyear(timelib.o2dt(timelib.dt2o(df['dt'])))
dt
Out[58]:
In [59]:
#C = 3.5
#ylim = (C * df['p16_after'].mean(), C * df['p84_after'].mean())
#ylim = (-20, 20)
#idx = (df['p16_after'] < ylim[0]) | (df['p84_after'] > ylim[1])
#print(ylim)
#ylim=(-20, 20)
#ylim=(-60, 60)
In [60]:
#Should isolate alongtrack and crosstrack
#Potentially plot different colors for different sensors
f, axa = plt.subplots(2,1, sharex=True, sharey=True, figsize=(10,6))
f.suptitle("%s DEM elevation error, relative to filtered TanDEM-X reference" % source)
axa[0].set_title("Before co-registration")
axa[1].set_title("After co-registration")
#ax.plot(df['dt'], df['med_before'], ls='none', marker='o')
errprop={'ls':'none', 'alpha':1.0, 'elinewidth':0.5, 'marker':'.', 'ms':2}
yerr=np.array([df['med_before'] - df['p16_before'], df['p84_before'] - df['med_before']])
axa[0].errorbar(dt, df['med_before'], yerr=yerr, **errprop)
#yerr=np.array([df_filt['med_before'] - df_filt['p16_before'], df_filt['p84_before'] - df_filt['med_before']])
#axa[0].errorbar(dt[~idx], df_filt['med_before'], marker='', yerr=yerr, **errprop)
yerr=np.array([df['med_after'] - df['p16_after'], df['p84_after'] - df['med_after']])
axa[1].errorbar(dt, df['med_after'], yerr=yerr, label=None, **errprop)
yerr=np.array([df_filt['med_after'] - df_filt['p16_after'], df_filt['p84_after'] - df_filt['med_after']])
axa[1].errorbar(dt[~idx], df_filt['med_after'], label='Inliers', yerr=yerr, **errprop)
axa[0].set_ylim(*ylim)
#axa[0].set_xlim(dt.min(), dt.max())
axa[0].set_xlim(2000, 2018.5)
start, end = axa[0].get_xlim()
axa[1].legend()
for ax in axa:
ax.set_ylabel('DEM error (m)\n (median, 16-84% spread)')
ax.axhline(0, c='0.5', zorder=99)
ax.xaxis.set_ticks(np.arange(start, end, 1.0))
#ax.grid()
"""
import matplotlib.dates as mdates
years = mdates.YearLocator() # every year
yearsFmt = mdates.DateFormatter('%Y')
ax.xaxis.set_major_locator(years)
ax.xaxis.set_major_formatter(yearsFmt)
"""
f.autofmt_xdate()
In [32]:
print(df_filt.mean())
print(df_filt.std())
print(df_filt.median())
In [61]:
df[idx]
Out[61]:
In [62]:
df[idx]['align_fn'].to_csv(prefix+'_bad_fn.txt', index=False)
df[~idx]['align_fn'].to_csv(prefix+'_good_fn.txt', index=False)
In [63]:
baddir
Out[63]:
In [64]:
import shutil
for i in df[idx]['align_fn']:
dirname = os.path.join(topdir, os.path.split(i)[0])
print("Moving %s to %s" % (dirname, baddir))
shutil.move(dirname, baddir)
In [65]:
def make_plot3d(df, title=None, orthogonal_fig=True, maxdim=None):
x = df['dx']
y = df['dy']
z = df['dz']
cmean = np.mean([x,y,z], axis=1)
cstd = np.std([x,y,z], axis=1)
cmed = np.median([x,y,z], axis=1)
cnmad = malib.mad([x,y,z], axis=1)
x_corr = x - cmean[0]
y_corr = y - cmean[1]
z_corr = z - cmean[2]
ce90 = geolib.CE90(x,y)
ce90_corr = geolib.CE90(x_corr,y_corr)
le90 = geolib.LE90(z)
le90_corr = geolib.LE90(z_corr)
coefs = [ce90, ce90, le90]
if maxdim is None:
#maxdim = np.ceil(np.max([np.max(np.abs([x, y, z])), ce90, le90]))
maxdim = np.ceil(np.max([np.percentile(np.abs([x, y, z]), 99), ce90, le90]))
if orthogonal_fig:
from matplotlib.patches import Ellipse
#fig_ortho, axa = plt.subplots(1, 3, sharex=True, sharey=True, figsize=(10,5))
fig_ortho, axa = plt.subplots(1, 3, figsize=(10,5))
title = 'Co-registration Translation Vector Components, n=%i\n' % x.shape[0]
title += 'mean: (%0.2f, %0.2f, %0.2f), std: (%0.2f, %0.2f, %0.2f)\n' % (tuple(cmean) + tuple(cstd))
title += 'med: (%0.2f, %0.2f, %0.2f), nmad: (%0.2f, %0.2f, %0.2f)\n' % (tuple(cmed) + tuple(cnmad))
title += 'CE90: %0.2f (Bias-corrected: %0.2f), LE90: %0.2f (Bias-corrected: %0.2f)' % (ce90, ce90_corr, le90, le90_corr)
plt.suptitle(title)
dot_prop={'color':'k', 'linestyle':'None', 'marker':'.', 'ms':3, 'label':'ICP correction vector', 'alpha':0.1}
mean_prop={'color':'r', 'linestyle':'None', 'marker':'o', 'label':'Mean'}
ell_prop={'linewidth':0, 'alpha':0.3, 'label':'CE90/LE90'}
for ax in axa:
ax.set_xlim(-maxdim, maxdim)
ax.set_ylim(-maxdim, maxdim)
ax.minorticks_on()
ax.set_aspect('equal')
axa[0].plot(x, y, **dot_prop)
axa[0].plot(cmean[0], cmean[1], **mean_prop)
axa[0].set_xlabel('X offset (m)')
axa[0].set_ylabel('Y offset (m)')
e = Ellipse((0,0), 2*ce90, 2*ce90, **ell_prop)
axa[0].add_artist(e)
axa[0].legend(prop={'size':8}, numpoints=1, loc='upper left')
axa[1].plot(x, z, **dot_prop)
axa[1].plot(cmean[0], cmean[2], **mean_prop)
axa[1].set_xlabel('X offset (m)')
axa[1].set_ylabel('Z offset (m)')
e = Ellipse((0,0), 2*ce90, 2*le90, **ell_prop)
axa[1].add_artist(e)
axa[2].plot(y, z, **dot_prop)
axa[2].plot(cmean[1], cmean[2], **mean_prop)
axa[2].set_xlabel('Y offset (m)')
axa[2].set_ylabel('Z offset (m)')
e = Ellipse((0,0), 2*ce90, 2*le90, **ell_prop)
axa[2].add_artist(e)
plt.tight_layout()
#Note: postscript doesn't properly handle tansparency
#fig_fn = '%s_translation_vec_local_meters_orthogonal.pdf' % out_fn_prefix
#plt.savefig(fig_fn, dpi=600, bbox_inches='tight')
In [66]:
def make_map(df):
x = df['dx']
y = df['dy']
z = df['dz']
cx = df['cx']
cy = df['cy']
f, axa = plt.subplots(3, sharex=True, sharey=True, figsize=(5,10))
maxdim = np.ceil(np.percentile(np.abs([x, y, z]), 99))
#vmin, vmax = (-15, 15)
vmin, vmax = (-maxdim, maxdim)
s=5
cmap='RdYlBu'
opt={'edgecolor':'k', 'vmin':vmin, 'vmax':vmax, 'cmap':cmap, 's':s, 'lw':0.3}
sc = axa[0].scatter(cx, cy, c=x, **opt)
axa[0].set_title("X-offset required to align")
axa[1].scatter(cx, cy, c=y, **opt)
axa[1].set_title("Y-offset required to align")
axa[2].scatter(cx, cy, c=z, **opt)
axa[2].set_title("Z-offset required to align")
for ax in axa:
#ax.set_aspect('equal')
pltlib.add_cbar(ax, sc, clim=(vmin, vmax))
#fig_fn = '%s_map.png' % out_fn_prefix
#f.savefig(fig_fn, dpi=300, bbox_inches='tight')
In [68]:
df_filt_sort = df_filt.sort_values(by='dm', ascending=False)
print("Creating plot")
make_plot3d(df_filt_sort, maxdim=max_dm)
#print("Creating map")
#make_map(df_filt_sort)
In [ ]: