To run this example you need to install pymongo.
pip install pymongo
In [1]:
import pymongo
In [2]:
client = pymongo.MongoClient('localhost', 27017)
db = client['adults-db']
In [3]:
incomeCollection = db['income']
In [9]:
column_headings = 'age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,label'.split(',')
In [10]:
with open('adult.data') as f_in:
for line in f_in:
row_list = [x.strip() for x in line.rstrip('\n').split(',')]
try:
row_dict = dict(zip(column_headings, row_list))
row_dict['age'] = int(row_dict['age'])
incomeCollection.insert_one(row_dict)
except:
pass
In [11]:
incomeCollection.estimated_document_count()
Out[11]:
In [12]:
over_35 = incomeCollection.find({'age': {'$gt': 35}})
In [13]:
type(over_35)
Out[13]:
In [14]:
over_35.next()
Out[14]:
In [15]:
import time
In [16]:
start = time.time()
age_50 = incomeCollection.find({'age': {'$gt': 50}})
end = time.time()
print(end - start)
In [17]:
index_result = db.profiles.create_index([('age', pymongo.ASCENDING)], unique=False)
In [18]:
start = time.time()
age_45 = incomeCollection.find({'age': {'$gt': 25}})
end = time.time()
print(end - start)
In [19]:
client.close()
In [ ]: