In [ ]:
# setup
import planet4 as p4
from planet4.dbscan import DBScanner
from planet4 import io, markings
import pandas as pd
from pathlib import Path
import pkg_resources as pr

with pr.resource_stream('planet4', 'data/test_db.csv') as f:
    data = pd.read_csv(f)

# import warnings
# with warnings.catch_warnings():
#     warnings.filterwarnings("ignore",category=DeprecationWarning)
    
from tempfile import TemporaryDirectory
import tempfile

_tdir = TemporaryDirectory()
tdir = Path(_tdir.name)

imid1 = 'APF000012w'
imid2 = 'APF000012q'

imid1data = data[data.image_id==imid1]
imid2data = data[data.image_id==imid2]

fans1 = imid1data[imid1data.marking=='fan']
blotches1 = imid1data[imid1data.marking=='blotch']
fans2 = imid2data[imid2data.marking=='fan']
blotches2 = imid2data[imid2data.marking=='blotch']

In [ ]:
dbscanner = DBScanner(save_results=False)

In [ ]:
from planet4 import region_data

In [ ]:
region_data.Ithaca.season2

In [ ]:
obsid = region_data.Ithaca.season2[-1]
dbscanner.cluster_image_name(obsid)

In [ ]:
db = io.DBManager()

In [ ]:
data = db.get_image_name_markings(obsid)

In [ ]:
data.shape

In [ ]:
db.dbname

In [ ]:
dbscanner.eps_values

In [ ]:
dbscanner.cluster_image_id('12w')

In [ ]:
p4id = markings.ImageID('12w')

In [ ]:
dbscanner.reduced_data['fan'].shape

In [ ]:
%matplotlib inline

In [ ]:
dbscanner.cluster_and_plot('17a', 'blotch')

In [ ]:
dbscanner.parameter_scan('17a', 'fan', [0.1, 0.13], [50, 60, 70], size_to_scan='small')

In [ ]:
dbscanner.save_results

In [ ]:
# test_calc_fnotch
actual = clustering.calc_fnotch(4, 4)
assert actual == 0.5
actual = clustering.calc_fnotch(4, 0)
assert actual == 1
actual = clustering.calc_fnotch(0, 4)
assert actual == 0

In [ ]:
# test_dbscan_xy_blotch

# using only x and y (or image_x,image_y)
coords = ['image_x','image_y']
X = blotches1[coords].values
dbscanner = clustering.DBScanner(X, min_samples=2)

assert dbscanner.n_clusters == 26
assert dbscanner.n_rejected == 25

In [ ]:
# test_dbscan_xy_fan

# using only x and y (or image_x,image_y)
coords = ['image_x','image_y']
X = fans1[coords].values
dbscanner = clustering.DBScanner(X, min_samples=2)

assert dbscanner.n_clusters == 7
assert dbscanner.n_rejected == 11

In [ ]:
# test_dbscan_xy_angle_blotch

coords = ['image_x','image_y', 'angle']
X = blotches1[coords].values
dbscanner = clustering.DBScanner(X, min_samples=2)

assert dbscanner.n_clusters == 35
assert dbscanner.n_rejected == 102

In [ ]:
# test_dbscan_xy_angle_fan

coords = ['image_x','image_y', 'angle']
X = fans1[coords].values
dbscanner = clustering.DBScanner(X, min_samples=2)

assert dbscanner.n_clusters == 6
assert dbscanner.n_rejected == 15

In [ ]:
# test_clustering_basics

cm.cluster_image_id(imid1, data=imid1data)

assert cm.n_classifications == 28

In [ ]:
cm.cluster_image_id(imid2, data=imid2data)
assert cm.n_classifications == 23

for subdir in ['just_clustering']:  # 'applied_cut_0.5', 
    expected = tdir / subdir
    assert expected.exists() and expected.is_dir()

In [ ]:
# test_output_file_creation_just_clustering

for marking in ['blotches', 'fans']:
    expected = tdir / 'just_clustering' / (imid1 + '_' + marking + '.csv')
    print()
    assert expected.exists()

for marking in ['blotches']:
    expected = tdir / 'just_clustering' / (imid2 + '_' + marking + '.csv')
    if marking == 'blotches':
        assert expected.exists()
    else: # 12q,i.e. imdid2 only has blotches
        assert not expected.exists()

same user rejection


In [ ]:
s = """
x	y	image_x	image_y	angle	spread	distance	user_name	marking	classification_id
67.0	320.0	67.0	27172.0	90.0	2.01745014480398	10.0	test_user	fan	50ec9c10861cf8095600017b
74.0	318.0	74.0	27170.0	90.0	2.01745014480398	10.0	test_user	fan	50ec9c10861cf8095600017b
82.0	336.0	82.0	27188.0	90.0	2.01745014480398	10.0	test_user	fan	50ec9c10861cf8095600017b
57.0	310.0	57.0	27162.0	90.0	2.01745014480398	10.0	test_user	fan	50ec9c10861cf8095600017b
60.0	315.0	60.0	27167.0	35.21759296819272	10.434250055350423	41.617304093369626	test_user	fan	50ec9c10861cf8095600017b
63.0	307.0	63.0	27159.0	62.38162109858792	2.01745014480398	437.89610639968015	test_user	fan	50ec9c10861cf8095600017b
"""

In [ ]:
from io import StringIO

In [ ]:
df = pd.read_table(StringIO(s))
df

In [ ]:
cm.data = df
cm.pm.id_ = 'test_id'

In [ ]:
cm.cluster_data()

In [ ]:
cm.reduced_data['fan'][0]

some bug hunting


In [ ]:
def hunt_bug(fname):
    df = pd.read_csv(fname)
    return (df[df.duplicated()].shape[0])

In [ ]:
fnames = p.glob('*.csv')
obsids = []
no_of_dupes = []
kind = []
for fname in fnames:
    tokens = fname.name.split('_')
    if fname.name.startswith('ESP'):
        obsids.append('_'.join(tokens[:3]))
        kind.append(tokens[3].split('.')[0])
    else:
        obsids.append(tokens[0])
        kind.append(tokens[1].split('.')[0])
    no_of_dupes.append(hunt_bug(fname))

In [ ]:
df = pd.DataFrame(dict(obsids=obsids, no_of_dupes=no_of_dupes,
                       kind=kind))
df

In [ ]:
%matplotlib nbagg

In [ ]:
db.get_image_id_markings('apx').iloc[0]

In [ ]:
db.get_image_id_markings('ani').iloc[0]

In [ ]:
plotting.plot_clustered_fans('apx', _dir=tdir)

In [ ]:
plotting.plot_clustered_fans('ani', _dir=tdir)

In [ ]:
plotting.plot_raw_fans('ani')

In [ ]:
newblotches = cm.newblotches.apply(lambda x: x.store())

In [ ]:
newblotches[newblotches.duplicated(keep=False)].head()

In [ ]:
cm.pm.fnotchdf.head()

In [ ]:
cm.pm.fnotchdf.filter(regex='_image_id').head()

In [ ]:
cm.pm.fnotchdf.iloc[2:4].T

In [ ]:
fn1 = markings.Fnotch.from_series(cm.pm.fnotchdf.iloc[2], scope='hirise')

In [ ]:
fn2 = markings.Fnotch.from_series(cm.pm.fnotchdf.iloc[3], scope='hirise')

In [ ]:
fn1.fan

In [ ]:
fn2.fan

In [ ]:
norm(fn1.fan.midpoint - fn2.fan.midpoint)

In [ ]:
p4id = markings.ImageID('apx', data=data, scope='planet4')

In [ ]:
%matplotlib inline

In [ ]:
p4id.plot_blotches()

In [ ]:
from planet4 import plotting

In [ ]:
plotting.plot_clustered_blotches('apx', _dir=tdir)

In [ ]:
pm = io.PathManager(id_='apx', datapath=tdir)

In [ ]:
pm.reduced_blotchfile

In [ ]:
tdir

In [ ]:
list(tdir.glob('just_clustering/*'))

In [ ]:
newblotches = cm.newblotches.apply(lambda x: x.store())

In [ ]:
newblotches.head()

In [ ]:
b1 = markings.Blotch(cm.newblotches.iloc[0].data)
b2 = markings.Blotch(cm.newblotches.iloc[1].data)
b1 == b2

In [ ]:
df = cm.pm.fnotchdf

In [ ]:
df.apply?

In [ ]:
df.duplicated().value_counts()

In [ ]:
final_clusters = df.apply(markings.Fnotch.from_series, axis=1).apply(lambda x: x.get_marking(0.5))

In [ ]:
pd.set_option('display.width', 10000)

In [ ]:
final_clusters.head()

In [ ]:
df.filter(regex='fan_').head()

In [ ]:
df.filter(regex='blotch_').head()

In [ ]:
from planet4 import markings

In [ ]:
fnotch = markings.Fnotch.from_series(df.iloc[0], scope='planet4')

In [ ]:
from numpy.linalg import norm

In [ ]:
norm(fnotch.blotch.center - fnotch.fan.midpoint)

In [ ]:
def filter_for_fans(x):
            if isinstance(x, markings.Fan):
                return x

def filter_for_blotches(x):
    if isinstance(x, markings.Blotch):
        return x

In [ ]:
final_clusters.apply(filter_for_blotches)

In [ ]:
pd.read_csv(cm.pm.final_fanfile).duplicated().value_counts()

In [ ]:
# teardown
_tdir.cleanup()

In [ ]: