In [ ]:
from planet4.clustering import DBScanner

In [ ]:
dbname = '/Users/klay6683/data/planet4/2015-06-07_planet_four_classifications_queryable_cleaned_seasons2and3.h5'

In [ ]:
from planet4 import markings, io

class ClusteringManager(object):
    def __init__(self, dbname, scope='hirise'):
        self.db = io.DBManager(dbname)
        self.dbname = dbname
        self.scope = scope
        self.confusion = []
        self.dbscanners = []
        self.clustered_fans = []
        self.clustered_blotches = []
        
    @property
    def n_clustered_fans(self):
        return len(self.clustered_fans)
    
    @property
    def n_clustered_blotches(self):
        return len(self.clustered_blotches)
    
    def dbscan_data(self, data):
        for kind in ['fan','blotch']:
            markings = data[data.marking==kind]
            dbscanner = DBScanner(markings, 
                                  kind, 
                                  scope=self.scope)
            self.confusion.append((self.data_id,
                                   kind,
                                   len(markings),
                                   dbscanner.n_reduced_data,
                                   dbscanner.n_rejected))
            if kind == 'fan':
                self.clustered_fans.extend(dbscanner.reduced_data)
            else:
                self.clustered_blotches.extend(dbscanner.reduced_data)

    def dbscan_image_id(self, image_id):
        self.data_id = image_id
        self.p4id = markings.ImageID(image_id, self.dbname)
        self.dbscan_data(self.p4id.data)
        
    def dbscan_image_name(self, image_name):
        data = self.db.get_image_name_markings(image_name)
        self.data_id = image_name
        self.dbscan_data(data)
        
    def dbscan_all(self):
        image_names = self.db.image_names
        for i, image_name in enumerate(image_names):
            print('{:.1f}'.format(100*i/len(image_names)))
            data = self.db.get_image_name_markings(image_name)
            self.data_id = image_name
            self.dbscan_data(data)

In [ ]:
cm = ClusteringManager(dbname)

In [ ]:
cm.dbscan_image_name('ESP_011544_0985')

In [ ]:
from numpy.linalg import norm

n_close = 0
for blotch in cm.clustered_blotches:
    for fan in cm.clustered_fans:
        delta = blotch.center - (fan.base+fan.midpoint)
        if norm(delta) < 10 :
           n_close += 1

In [ ]:
n_close

In [ ]:
cm.n_clustered_blotches

In [ ]:
cm.n_clustered_fans

In [ ]:
confusion_data = pd.DataFrame(cm.confusion, columns=['image_name', 'kind', 'n_markings',
                                    'n_cluster_members', 'n_rejected'])

In [ ]:
confusion_data.to_csv('/Users/klay6683/Dropbox/DDocuments/planet4/confusion_data.csv')

In [ ]:
from numpy.linalg import norm

for blotch in reduced_blotches:
    print(blotch.center)
    for fan in reduced_fans:
        delta = blotch.center - (fan.base+fan.midpoint)
        print(norm(delta))