In [2]:
import numpy as np
import pandas as pd
from monary import Monary
In [3]:
db_name = 'radio'
In [5]:
features = ['_id', 'activated_at', 'classification_count', 'coords', 'created_at',
'location', 'metadata', 'project_id', 'random', 'state', 'updated_at', 'workflow_ids', 'zooinverse_id']
with Monary() as m:
data = m.query(db_name, 'radio_subjects', {}, ['classification_count', 'state'], ['uint8', 'string:10'])
df = pd.DataFrame(np.ma.filled(data).T, columns=['classification_count', 'state'])
In [6]:
df.head()
Out[6]:
In [7]:
data
Out[7]:
{u'_id': ObjectId('52af7d53eb9a9b05ef000001'), u'activated_at': datetime.datetime(2013, 12, 17, 17, 45, 13, 844000), u'classification_count': 20, u'coords': [206.419375, 23.382361111111113], u'created_at': datetime.datetime(2013, 12, 17, 9, 16, 38, 435000), u'location': {u'contours': u'http://radio.galaxyzoo.org/subjects/contours/52af7d53eb9a9b05ef000001.json', u'radio': u'http://radio.galaxyzoo.org/subjects/radio/52af7d53eb9a9b05ef000001.jpg', u'standard': u'http://radio.galaxyzoo.org/subjects/standard/52af7d53eb9a9b05ef000001.jpg'}, u'metadata': {u'dec_dms': u'23.0 22.0 56.5', u'ra_hms': u'13.0 45.0 40.65', u'rms': u'0.000178', u'source': u'FIRSTJ134540.6+232256'}, u'project_id': ObjectId('52afdb804d69636532000001'), u'random': 0.5988090089044151, u'state': u'complete', u'updated_at': datetime.datetime(2013, 12, 17, 9, 16, 38, 468000), u'workflow_ids': [ObjectId('52afdb804d69636532000002')], u'zooniverse_id': u'ARG000255t'}