In [ ]:
from pymongo import MongoClient

In [ ]:
client = MongoClient('localhost', 27017)

In [ ]:
db = client['planet_four']

In [ ]:
from odo import odo

In [ ]:
odo('mongodb://localhost:27017/planet_four::planet_four_subjects',
    'hdfstore://Users/klay6683/data/planet4/2015-03-31.hdf5::/df')

In [ ]:
import blaze as bz

In [ ]:
bz.Data('mongodb://localhost/planet_four::planet_four_subjects')

In [ ]:
blaze.__version__

In [ ]:
subjects = db['planet_four_subjects']
classifications = db['planet_four_classifications']
users = db['planet_four_users']

In [ ]:
df = pd.DataFrame(list(classifications.find({'user_name':'michaelaye'})))

In [ ]:
df.annotations[1]

In [ ]:
subjects.find_one().keys()

In [ ]:
from toolz import take

In [ ]:
from bson.objectid import ObjectId

In [ ]:
from datashape import discover

In [ ]:
items = list(take(2, classifications.find()))
oid_cols = [k for k,v in items[0].items() if isinstance(v, ObjectId)]
print("OID_cols:\n",oid_cols)
for item in items:
    for col in oid_cols:
        del item[col]
print('items[0].keys():\n',items[0].keys())
discover(items)

In [ ]:
with open('classification_fields.txt','w') as f:
    for key in items[0].keys():
        f.write(key)
        f.write('\n')

In [ ]:
!cat classification_fields.txt

In [ ]:
pwd

In [ ]: