In [40]:
from pymongo import MongoClient, IndexModel, ASCENDING, DESCENDING
from bson.son import SON
cl = MongoClient()
scratch_db = cl.scratch
In [6]:
first = scratch_db.zips.find().limit(10)
for item in first:
print(item)
In [23]:
scratch_db.zips.drop_indexes()
count = scratch_db.zips.find().count()
city_count = scratch_db.zips.find({"city": "FLAGSTAFF"}).count()
city_explain = scratch_db.zips.find({"city": "FLAGSTAFF"}).explain()['executionStats']
print(count)
print(city_count)
print(city_explain)
In [38]:
scratch_db.zips.drop_indexes()
scratch_db.zips.create_index([("city", ASCENDING)])
count = scratch_db.zips.find().count()
city_count = scratch_db.zips.find({"city": "FLAGSTAFF"}).count()
city_explain = scratch_db.zips.find({"city": "FLAGSTAFF"}).explain()['executionStats']
print(count)
print(city_count)
print(city_explain)
You can see with the index it's execution is a bit different.
Seeing the executionTimeMillis parameter shows that the second one is executed much faster.
This is because the index allow you to search the index instead of all the documents.
In [47]:
print("Amount of cities per state:")
pipeline = [
{"$unwind": "$state"},
{"$group": {"_id": "$state", "count": {"$sum": 1}}},
{"$sort": SON([("count", -1), ("_id", -1)])}
]
results = scratch_db.zips.aggregate(pipeline)
for result in results:
print("State %s: %d" % tuple(result.values()))
In [50]:
print("Amount of cities with fewer then 50 people")
lt = scratch_db.zips.find({"pop": {"$lt": 50}})
print("%d cities" % lt.count())
for city in lt.limit(10):
print("%s: %d" % (city['city'], city['pop']))
In [67]:
scratch_db.zips.create_index([("loc", "2dsphere")])
flagstaff = scratch_db.zips.find_one({"city": "FLAGSTAFF"})
nearby = scratch_db.zips.find({"loc": {
"$near": {
"$geometry": {
'type': 'Point',
'coordinates': flagstaff['loc']
},
"$maxDistance": 50000
}
}})
for city in nearby:
print(city['city'])