In [ ]:
from pymongo import MongoClient
In [ ]:
client = MongoClient('localhost', 27017)
In [ ]:
db = client['planet_four']
In [ ]:
from odo import odo
In [ ]:
odo('mongodb://localhost:27017/planet_four::planet_four_subjects',
'hdfstore://Users/klay6683/data/planet4/2015-03-31.hdf5::/df')
In [ ]:
import blaze as bz
In [ ]:
bz.Data('mongodb://localhost/planet_four::planet_four_subjects')
In [ ]:
blaze.__version__
In [ ]:
subjects = db['planet_four_subjects']
classifications = db['planet_four_classifications']
users = db['planet_four_users']
In [ ]:
df = pd.DataFrame(list(classifications.find({'user_name':'michaelaye'})))
In [ ]:
df.annotations[1]
In [ ]:
subjects.find_one().keys()
In [ ]:
from toolz import take
In [ ]:
from bson.objectid import ObjectId
In [ ]:
from datashape import discover
In [ ]:
items = list(take(2, classifications.find()))
oid_cols = [k for k,v in items[0].items() if isinstance(v, ObjectId)]
print("OID_cols:\n",oid_cols)
for item in items:
for col in oid_cols:
del item[col]
print('items[0].keys():\n',items[0].keys())
discover(items)
In [ ]:
with open('classification_fields.txt','w') as f:
for key in items[0].keys():
f.write(key)
f.write('\n')
In [ ]:
!cat classification_fields.txt
In [ ]:
pwd
In [ ]: