In [ ]:
# setup
import planet4 as p4
from planet4.dbscan import DBScanner
from planet4 import io, markings
import pandas as pd
from pathlib import Path
import pkg_resources as pr
with pr.resource_stream('planet4', 'data/test_db.csv') as f:
data = pd.read_csv(f)
# import warnings
# with warnings.catch_warnings():
# warnings.filterwarnings("ignore",category=DeprecationWarning)
from tempfile import TemporaryDirectory
import tempfile
_tdir = TemporaryDirectory()
tdir = Path(_tdir.name)
imid1 = 'APF000012w'
imid2 = 'APF000012q'
imid1data = data[data.image_id==imid1]
imid2data = data[data.image_id==imid2]
fans1 = imid1data[imid1data.marking=='fan']
blotches1 = imid1data[imid1data.marking=='blotch']
fans2 = imid2data[imid2data.marking=='fan']
blotches2 = imid2data[imid2data.marking=='blotch']
In [ ]:
dbscanner = DBScanner(save_results=False)
In [ ]:
from planet4 import region_data
In [ ]:
region_data.Ithaca.season2
In [ ]:
obsid = region_data.Ithaca.season2[-1]
dbscanner.cluster_image_name(obsid)
In [ ]:
db = io.DBManager()
In [ ]:
data = db.get_image_name_markings(obsid)
In [ ]:
data.shape
In [ ]:
db.dbname
In [ ]:
dbscanner.eps_values
In [ ]:
dbscanner.cluster_image_id('12w')
In [ ]:
p4id = markings.ImageID('12w')
In [ ]:
dbscanner.reduced_data['fan'].shape
In [ ]:
%matplotlib inline
In [ ]:
dbscanner.cluster_and_plot('17a', 'blotch')
In [ ]:
dbscanner.parameter_scan('17a', 'fan', [0.1, 0.13], [50, 60, 70], size_to_scan='small')
In [ ]:
dbscanner.save_results
In [ ]:
# test_calc_fnotch
actual = clustering.calc_fnotch(4, 4)
assert actual == 0.5
actual = clustering.calc_fnotch(4, 0)
assert actual == 1
actual = clustering.calc_fnotch(0, 4)
assert actual == 0
In [ ]:
# test_dbscan_xy_blotch
# using only x and y (or image_x,image_y)
coords = ['image_x','image_y']
X = blotches1[coords].values
dbscanner = clustering.DBScanner(X, min_samples=2)
assert dbscanner.n_clusters == 26
assert dbscanner.n_rejected == 25
In [ ]:
# test_dbscan_xy_fan
# using only x and y (or image_x,image_y)
coords = ['image_x','image_y']
X = fans1[coords].values
dbscanner = clustering.DBScanner(X, min_samples=2)
assert dbscanner.n_clusters == 7
assert dbscanner.n_rejected == 11
In [ ]:
# test_dbscan_xy_angle_blotch
coords = ['image_x','image_y', 'angle']
X = blotches1[coords].values
dbscanner = clustering.DBScanner(X, min_samples=2)
assert dbscanner.n_clusters == 35
assert dbscanner.n_rejected == 102
In [ ]:
# test_dbscan_xy_angle_fan
coords = ['image_x','image_y', 'angle']
X = fans1[coords].values
dbscanner = clustering.DBScanner(X, min_samples=2)
assert dbscanner.n_clusters == 6
assert dbscanner.n_rejected == 15
In [ ]:
# test_clustering_basics
cm.cluster_image_id(imid1, data=imid1data)
assert cm.n_classifications == 28
In [ ]:
cm.cluster_image_id(imid2, data=imid2data)
assert cm.n_classifications == 23
for subdir in ['just_clustering']: # 'applied_cut_0.5',
expected = tdir / subdir
assert expected.exists() and expected.is_dir()
In [ ]:
# test_output_file_creation_just_clustering
for marking in ['blotches', 'fans']:
expected = tdir / 'just_clustering' / (imid1 + '_' + marking + '.csv')
print()
assert expected.exists()
for marking in ['blotches']:
expected = tdir / 'just_clustering' / (imid2 + '_' + marking + '.csv')
if marking == 'blotches':
assert expected.exists()
else: # 12q,i.e. imdid2 only has blotches
assert not expected.exists()
In [ ]:
s = """
x y image_x image_y angle spread distance user_name marking classification_id
67.0 320.0 67.0 27172.0 90.0 2.01745014480398 10.0 test_user fan 50ec9c10861cf8095600017b
74.0 318.0 74.0 27170.0 90.0 2.01745014480398 10.0 test_user fan 50ec9c10861cf8095600017b
82.0 336.0 82.0 27188.0 90.0 2.01745014480398 10.0 test_user fan 50ec9c10861cf8095600017b
57.0 310.0 57.0 27162.0 90.0 2.01745014480398 10.0 test_user fan 50ec9c10861cf8095600017b
60.0 315.0 60.0 27167.0 35.21759296819272 10.434250055350423 41.617304093369626 test_user fan 50ec9c10861cf8095600017b
63.0 307.0 63.0 27159.0 62.38162109858792 2.01745014480398 437.89610639968015 test_user fan 50ec9c10861cf8095600017b
"""
In [ ]:
from io import StringIO
In [ ]:
df = pd.read_table(StringIO(s))
df
In [ ]:
cm.data = df
cm.pm.id_ = 'test_id'
In [ ]:
cm.cluster_data()
In [ ]:
cm.reduced_data['fan'][0]
In [ ]:
def hunt_bug(fname):
df = pd.read_csv(fname)
return (df[df.duplicated()].shape[0])
In [ ]:
fnames = p.glob('*.csv')
obsids = []
no_of_dupes = []
kind = []
for fname in fnames:
tokens = fname.name.split('_')
if fname.name.startswith('ESP'):
obsids.append('_'.join(tokens[:3]))
kind.append(tokens[3].split('.')[0])
else:
obsids.append(tokens[0])
kind.append(tokens[1].split('.')[0])
no_of_dupes.append(hunt_bug(fname))
In [ ]:
df = pd.DataFrame(dict(obsids=obsids, no_of_dupes=no_of_dupes,
kind=kind))
df
In [ ]:
%matplotlib nbagg
In [ ]:
db.get_image_id_markings('apx').iloc[0]
In [ ]:
db.get_image_id_markings('ani').iloc[0]
In [ ]:
plotting.plot_clustered_fans('apx', _dir=tdir)
In [ ]:
plotting.plot_clustered_fans('ani', _dir=tdir)
In [ ]:
plotting.plot_raw_fans('ani')
In [ ]:
newblotches = cm.newblotches.apply(lambda x: x.store())
In [ ]:
newblotches[newblotches.duplicated(keep=False)].head()
In [ ]:
cm.pm.fnotchdf.head()
In [ ]:
cm.pm.fnotchdf.filter(regex='_image_id').head()
In [ ]:
cm.pm.fnotchdf.iloc[2:4].T
In [ ]:
fn1 = markings.Fnotch.from_series(cm.pm.fnotchdf.iloc[2], scope='hirise')
In [ ]:
fn2 = markings.Fnotch.from_series(cm.pm.fnotchdf.iloc[3], scope='hirise')
In [ ]:
fn1.fan
In [ ]:
fn2.fan
In [ ]:
norm(fn1.fan.midpoint - fn2.fan.midpoint)
In [ ]:
p4id = markings.ImageID('apx', data=data, scope='planet4')
In [ ]:
%matplotlib inline
In [ ]:
p4id.plot_blotches()
In [ ]:
from planet4 import plotting
In [ ]:
plotting.plot_clustered_blotches('apx', _dir=tdir)
In [ ]:
pm = io.PathManager(id_='apx', datapath=tdir)
In [ ]:
pm.reduced_blotchfile
In [ ]:
tdir
In [ ]:
list(tdir.glob('just_clustering/*'))
In [ ]:
newblotches = cm.newblotches.apply(lambda x: x.store())
In [ ]:
newblotches.head()
In [ ]:
b1 = markings.Blotch(cm.newblotches.iloc[0].data)
b2 = markings.Blotch(cm.newblotches.iloc[1].data)
b1 == b2
In [ ]:
df = cm.pm.fnotchdf
In [ ]:
df.apply?
In [ ]:
df.duplicated().value_counts()
In [ ]:
final_clusters = df.apply(markings.Fnotch.from_series, axis=1).apply(lambda x: x.get_marking(0.5))
In [ ]:
pd.set_option('display.width', 10000)
In [ ]:
final_clusters.head()
In [ ]:
df.filter(regex='fan_').head()
In [ ]:
df.filter(regex='blotch_').head()
In [ ]:
from planet4 import markings
In [ ]:
fnotch = markings.Fnotch.from_series(df.iloc[0], scope='planet4')
In [ ]:
from numpy.linalg import norm
In [ ]:
norm(fnotch.blotch.center - fnotch.fan.midpoint)
In [ ]:
def filter_for_fans(x):
if isinstance(x, markings.Fan):
return x
def filter_for_blotches(x):
if isinstance(x, markings.Blotch):
return x
In [ ]:
final_clusters.apply(filter_for_blotches)
In [ ]:
pd.read_csv(cm.pm.final_fanfile).duplicated().value_counts()
In [ ]:
# teardown
_tdir.cleanup()
In [ ]: