In [ ]:
import planet4 as p4
import pandas as pd
from planet4 import helper_functions as hf
reload(hf)
print pd.__version__
In [ ]:
root = '/Users/maye/data/planet4/'
db_fname = root + '2014-10-02_planet_four_classifications_queryable.h5'
In [ ]:
with open('../data/inca_images.txt') as f:
inca_images = f.readlines()
inca_images = [i.strip() for i in inca_images]
inca_images
In [ ]:
obs_id = inca_images[0]
In [ ]:
def get_done_for_obs_id(obs_id):
print obs_id
"obs_id are called 'image_name' in P4 lingo."
df = pd.read_hdf(db_fname, 'df', where='image_name='+obs_id)
if len(df) == 0:
return np.nan
return hf.get_status(df)
In [ ]:
get_done_for_obs_id(obs_id)
In [ ]:
results = pd.DataFrame(inca_images)
In [ ]:
results.columns = ['image_name']
In [ ]:
results['status'] = results.image_name.map(get_done_for_obs_id)
In [ ]:
results
In [ ]:
ids_to_do = []
for obs_id in results[results.status<100].image_name:
print obs_id
df = pd.read_hdf(db_fname, 'df', where='image_name='+obs_id)
counts = hf.classification_counts_per_image(df)
ids_to_do.append(counts[counts<30].index.values)
In [ ]:
ids_to_do = np.concatenate(ids_to_do)
In [ ]:
ids_to_do.tofile('unfinished_inca_ids.txt',sep='\n')
In [ ]:
counts = hf.classification_counts_per_image(df)
In [ ]:
towrite = counts[counts<30].index.values
In [ ]:
with open('testout.csv','w') as f:
f.write(towrite.values)
In [ ]:
towrite.tofile('testout.csv', sep='\n')
In [ ]: