In [ ]:
import planet4 as p4
import pandas as pd
from planet4 import helper_functions as hf
reload(hf)
print pd.__version__

In [ ]:
root = '/Users/maye/data/planet4/'
db_fname = root + '2014-10-02_planet_four_classifications_queryable.h5'

In [ ]:
with open('../data/inca_images.txt') as f:
    inca_images = f.readlines()
inca_images = [i.strip() for i in inca_images]
inca_images

In [ ]:
obs_id = inca_images[0]

In [ ]:
def get_done_for_obs_id(obs_id):
    print obs_id
    "obs_id are called 'image_name' in P4 lingo."
    df = pd.read_hdf(db_fname, 'df', where='image_name='+obs_id)
    if len(df) == 0:
        return np.nan
    return hf.get_status(df)

In [ ]:
get_done_for_obs_id(obs_id)

In [ ]:
results = pd.DataFrame(inca_images)

In [ ]:
results.columns = ['image_name']

In [ ]:
results['status'] = results.image_name.map(get_done_for_obs_id)

In [ ]:
results

In [ ]:
ids_to_do = []
for obs_id in results[results.status<100].image_name:
    print obs_id
    df = pd.read_hdf(db_fname, 'df', where='image_name='+obs_id)
    counts = hf.classification_counts_per_image(df)
    ids_to_do.append(counts[counts<30].index.values)

In [ ]:
ids_to_do = np.concatenate(ids_to_do)

In [ ]:
ids_to_do.tofile('unfinished_inca_ids.txt',sep='\n')

In [ ]:
counts = hf.classification_counts_per_image(df)

In [ ]:
towrite = counts[counts<30].index.values

In [ ]:
with open('testout.csv','w') as f:
    f.write(towrite.values)

In [ ]:
towrite.tofile('testout.csv', sep='\n')

In [ ]: