To run this example you need to install pymongo.

pip install pymongo


In [1]:
import pymongo

In [2]:
client = pymongo.MongoClient('localhost', 27017)
db = client['adults-db']

In [3]:
incomeCollection = db['income']

In [9]:
column_headings = 'age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,label'.split(',')

In [10]:
with open('adult.data') as f_in:
    for line in f_in:
        row_list = [x.strip() for x in line.rstrip('\n').split(',')]
        try:
            row_dict = dict(zip(column_headings, row_list))
            row_dict['age'] = int(row_dict['age'])
            incomeCollection.insert_one(row_dict)
        except:
            pass

In [11]:
incomeCollection.estimated_document_count()


Out[11]:
32561

In [12]:
over_35 = incomeCollection.find({'age': {'$gt': 35}})

In [13]:
type(over_35)


Out[13]:
pymongo.cursor.Cursor

In [14]:
over_35.next()


Out[14]:
{'_id': ObjectId('5c4747c80adc7432f4092783'),
 'age': 39,
 'workclass': 'State-gov',
 'fnlwgt': '77516',
 'education': 'Bachelors',
 'education-num': '13',
 'marital-status': 'Never-married',
 'occupation': 'Adm-clerical',
 'relationship': 'Not-in-family',
 'race': 'White',
 'sex': 'Male',
 'capital-gain': '2174',
 'capital-loss': '0',
 'hours-per-week': '40',
 'native-country': 'United-States',
 'label': '<=50K'}

In [15]:
import time

In [16]:
start = time.time()
age_50 = incomeCollection.find({'age': {'$gt': 50}})
end = time.time()
print(end - start)


0.0

In [17]:
index_result = db.profiles.create_index([('age', pymongo.ASCENDING)], unique=False)

In [18]:
start = time.time()
age_45 = incomeCollection.find({'age': {'$gt': 25}})
end = time.time()
print(end - start)


0.0

In [19]:
client.close()

In [ ]: