Extracting MongoDB data with Pandas


In [2]:
import numpy as np
import pandas as pd
from monary import Monary

In [3]:
db_name = 'radio'

In [5]:
features = ['_id', 'activated_at', 'classification_count', 'coords', 'created_at',
           'location', 'metadata', 'project_id', 'random', 'state', 'updated_at', 'workflow_ids', 'zooinverse_id']
with Monary() as m:
    data = m.query(db_name, 'radio_subjects', {}, ['classification_count', 'state'], ['uint8', 'string:10'])
df = pd.DataFrame(np.ma.filled(data).T, columns=['classification_count', 'state'])

In [6]:
df.head()


Out[6]:
classification_count state
0 b'20' b'complete'
1 b'20' b'complete'
2 b'21' b'complete'
3 b'20' b'complete'
4 b'20' b'complete'

In [7]:
data


Out[7]:
[masked_array(data = [20 20 21 ..., 0 20 9],
              mask = [False False False ..., False False False],
        fill_value = 999999),
 masked_array(data = [b'complete' b'complete' b'complete' ..., b'inactive' b'complete'
  b'complete'],
              mask = [False False False ..., False False False],
        fill_value = N/A)]

{u'_id': ObjectId('52af7d53eb9a9b05ef000001'), u'activated_at': datetime.datetime(2013, 12, 17, 17, 45, 13, 844000), u'classification_count': 20, u'coords': [206.419375, 23.382361111111113], u'created_at': datetime.datetime(2013, 12, 17, 9, 16, 38, 435000), u'location': {u'contours': u'http://radio.galaxyzoo.org/subjects/contours/52af7d53eb9a9b05ef000001.json', u'radio': u'http://radio.galaxyzoo.org/subjects/radio/52af7d53eb9a9b05ef000001.jpg', u'standard': u'http://radio.galaxyzoo.org/subjects/standard/52af7d53eb9a9b05ef000001.jpg'}, u'metadata': {u'dec_dms': u'23.0 22.0 56.5', u'ra_hms': u'13.0 45.0 40.65', u'rms': u'0.000178', u'source': u'FIRSTJ134540.6+232256'}, u'project_id': ObjectId('52afdb804d69636532000001'), u'random': 0.5988090089044151, u'state': u'complete', u'updated_at': datetime.datetime(2013, 12, 17, 9, 16, 38, 468000), u'workflow_ids': [ObjectId('52afdb804d69636532000002')], u'zooniverse_id': u'ARG000255t'}