22.12.2016

In this notebook I learn basic commands for querying data from the MongoDB.


insert

_id is given automatically:

> db.publishers.insert({publisherId:"chweng",publisherName:"OREILLY"})
WriteResult({ "nInserted" : 1 })
> db.publishers.insert({publisherId:"WOLF",publisherName:"WOLFRAM"})
WriteResult({ "nInserted" : 1 })
> db.publishers.find()
{ "_id" : ObjectId("585a3f9c138f0a966cf3031e"), "publisherId" : "chweng", "publisherName" : "OREILLY" }
{ "_id" : ObjectId("585a3fb5138f0a966cf3031f"), "publisherId" : "WOLF", "publisherName" : "WOLFRAM" }

_id is given by the user:

>     db.publishers.insert({_id: "PH", publisherName: "Prentice Hall PTR"})
WriteResult({ "nInserted" : 1 })
>     db.publishers.insert({_id: "OA", publisherName: "O’Reilly & Associates"})
WriteResult({ "nInserted" : 1 })

>     db.publishers.find()
{ "_id" : ObjectId("585a3f9c138f0a966cf3031e"), "publisherId" : "chweng", "publisherName" : "OREILLY" }
{ "_id" : ObjectId("585a3fb5138f0a966cf3031f"), "publisherId" : "WOLF", "publisherName" : "WOLFRAM" }
{ "_id" : "PH", "publisherName" : "Prentice Hall PTR" }
{ "_id" : "OA", "publisherName" : "O’Reilly & Associates" }

remove

> db.publishers.remove({publisherId:{$exists:true}})
WriteResult({ "nRemoved" : 2 })

> db.publishers.remove({publisherName:{$exists:true}})
WriteResult({ "nRemoved" : 2 })
> db.publishers.find()
>

example: a collection of books:

db.books.insert({_id: 1, isbn: "0131002872", title: "Thinking in Java", releaseDate: "2002-12-01", listPrice: 54.99, pubId: "PH"})
db.books.insert({_id: 2, isbn: "059600530X", title: "Enterprise JavaBeans", releaseDate: "2004-06-02", listPrice: 44.95, pubId: "OA"})
db.books.insert({_id: 3, isbn: "0596005717", title: "Head First EJB", releaseDate: "2003-10-03", listPrice: 44.95, pubId: "OA"})
db.books.insert({_id: 4, isbn: "0596004656", title: "Head First Java", releaseDate: "2003-05-04", listPrice: 39.95, pubId: "OA"})
db.books.find()

find books which its price is more than 45 dollars

> db.books.find({listPrice:{$gt:45}})
{ "_id" : 1, "isbn" : "0131002872", "title" : "Thinking in Java", "releaseDate" : "2002-12-01", "listPrice" : 54.99, "pubId" : "PH" }

use regular expression to filter out the book title

> db.books.find({title:/.*in.*/g,listPrice:{$gt:50}})
{ "_id" : 1, "isbn" : "0131002872", "title" : "Thinking in Java", "releaseDate" : "2002-12-01", "listPrice" : 54.99, "pubId" : "PH" }

see only specific columns

> db.books.find(null,{title:1, listPrice: 1, _id: 0})
{ "title" : "Thinking in Java", "listPrice" : 54.99 }
{ "title" : "Enterprise JavaBeans", "listPrice" : 44.95 }
{ "title" : "Head First EJB", "listPrice" : 44.95 }
{ "title" : "Head First Java", "listPrice" : 39.95 }

sort the books according to their prices

>     db.books.find(null, {title:1, listPrice: 1, _id: 0}).sort({listPrice: 1})
{ "title" : "Head First Java", "listPrice" : 39.95 }
{ "title" : "Enterprise JavaBeans", "listPrice" : 44.95 }
{ "title" : "Head First EJB", "listPrice" : 44.95 }
{ "title" : "Thinking in Java", "listPrice" : 54.99 }
>     db.books.find(null, {title:1, listPrice: 1, _id: 0}).sort({listPrice: -1})
{ "title" : "Thinking in Java", "listPrice" : 54.99 }
{ "title" : "Enterprise JavaBeans", "listPrice" : 44.95 }
{ "title" : "Head First EJB", "listPrice" : 44.95 }
{ "title" : "Head First Java", "listPrice" : 39.95 }

update the price of a book

> db.books.update({title: "Thinking in Java"}, {$set: {listPrice: 55.99}})
WriteResult({ "nMatched" : 1, "nUpserted" : 0, "nModified" : 1 })
> db.books.find(null,{title:1, listPrice: 1, _id: 0})
{ "title" : "Thinking in Java", "listPrice" : 55.99 }
{ "title" : "Enterprise JavaBeans", "listPrice" : 44.95 }
{ "title" : "Head First EJB", "listPrice" : 44.95 }
{ "title" : "Head First Java", "listPrice" : 39.95 }

example: cities and populations

use mongoimport (from the terminal) to import the JSON file into the database:

Last login: Thu Dec 22 10:52:03 on ttys006
Wengs-MacBook-Air:~ chweng$ mongoimport --db test --collection zips --drop --file /Volumes/Transcend/mongodblab/samples/zips.json
2016-12-22T10:53:20.324+0800    connected to: localhost
2016-12-22T10:53:20.326+0800    dropping: test.zips
2016-12-22T10:53:21.077+0800    imported 29353 documents
> db.zips.find({city: "BEVERLY HILLS"})
{ "_id" : "76711", "city" : "BEVERLY HILLS", "loc" : [ -97.150254, 31.519863 ], "pop" : 8736, "state" : "TX" }
{ "_id" : "90210", "city" : "BEVERLY HILLS", "loc" : [ -118.406477, 34.090107 ], "pop" : 20700, "state" : "CA" }
{ "_id" : "90211", "city" : "BEVERLY HILLS", "loc" : [ -118.383007, 34.065217 ], "pop" : 7746, "state" : "CA" }
{ "_id" : "90212", "city" : "BEVERLY HILLS", "loc" : [ -118.399544, 34.061855 ], "pop" : 10725, "state" : "CA" }

create the index for the geo location of the cities:

> db.zips.ensureIndex({loc: "2d"})
{
    "createdCollectionAutomatically" : false,
    "numIndexesBefore" : 1,
    "numIndexesAfter" : 2,
    "ok" : 1
}

choose a location of a city (beverlyhills.loc)

> db.zips.findOne({ city: "BEVERLY HILLS", _id: "90210" })
{
    "_id" : "90210",
    "city" : "BEVERLY HILLS",
    "loc" : [
        -118.406477,
        34.090107
    ],
    "pop" : 20700,
    "state" : "CA"
}
> var beverlyhills= db.zips.findOne({ city: "BEVERLY HILLS", _id: "90210" })

> beverlyhills.loc
[ -118.406477, 34.090107 ]

find places which are near to the chosen location (beverlyhills.loc)

>     db.zips.find({
...         loc: {
...             $near: beverlyhills.loc, 
...             $maxDistance: 0.03
...         }
...     })
{ "_id" : "90210", "city" : "BEVERLY HILLS", "loc" : [ -118.406477, 34.090107 ], "pop" : 20700, "state" : "CA" }
{ "_id" : "90069", "city" : "WEST HOLLYWOOD", "loc" : [ -118.378753, 34.090573 ], "pop" : 20587, "state" : "CA" }
{ "_id" : "90212", "city" : "BEVERLY HILLS", "loc" : [ -118.399544, 34.061855 ], "pop" : 10725, "state" : "CA" }

group by: calculate the total population of a state, by adding all the populations of the cities within that state:

> db.zips.aggregate({  $group:{_id:"$state",totalpop:{$sum:"$pop"} }  })
{ "_id" : "NV", "totalpop" : 1201833 }
{ "_id" : "ID", "totalpop" : 1006749 }
{ "_id" : "CO", "totalpop" : 3293755 }
{ "_id" : "NC", "totalpop" : 6628637 }
{ "_id" : "TX", "totalpop" : 16984601 }
{ "_id" : "NH", "totalpop" : 1109252 }
{ "_id" : "OK", "totalpop" : 3145585 }
{ "_id" : "LA", "totalpop" : 4217595 }
{ "_id" : "KS", "totalpop" : 2475285 }
{ "_id" : "MO", "totalpop" : 5110648 }
{ "_id" : "IL", "totalpop" : 11427576 }
{ "_id" : "IN", "totalpop" : 5544136 }
{ "_id" : "VA", "totalpop" : 6181479 }
{ "_id" : "SD", "totalpop" : 695397 }
{ "_id" : "WI", "totalpop" : 4891769 }
{ "_id" : "AK", "totalpop" : 544698 }
{ "_id" : "WV", "totalpop" : 1793146 }
{ "_id" : "IA", "totalpop" : 2776420 }
{ "_id" : "HI", "totalpop" : 1108229 }
{ "_id" : "MI", "totalpop" : 9295297 }
Type "it" for more

group by + where

> db.zips.aggregate(   [  { $group:{_id:"$state",totalpop:{$sum:"$pop"} }  } , {$match:{totalpop:{$gt:100000}}}  ] )

{ "_id" : "NV", "totalpop" : 1201833 }
{ "_id" : "ID", "totalpop" : 1006749 }
{ "_id" : "CO", "totalpop" : 3293755 }
{ "_id" : "NC", "totalpop" : 6628637 }
{ "_id" : "TX", "totalpop" : 16984601 }
{ "_id" : "NH", "totalpop" : 1109252 }
{ "_id" : "OK", "totalpop" : 3145585 }
{ "_id" : "LA", "totalpop" : 4217595 }
{ "_id" : "KS", "totalpop" : 2475285 }
{ "_id" : "MO", "totalpop" : 5110648 }
{ "_id" : "IL", "totalpop" : 11427576 }
{ "_id" : "IN", "totalpop" : 5544136 }
{ "_id" : "VA", "totalpop" : 6181479 }
{ "_id" : "SD", "totalpop" : 695397 }
{ "_id" : "WI", "totalpop" : 4891769 }
{ "_id" : "AK", "totalpop" : 544698 }
{ "_id" : "WV", "totalpop" : 1793146 }
{ "_id" : "IA", "totalpop" : 2776420 }
{ "_id" : "HI", "totalpop" : 1108229 }
{ "_id" : "MI", "totalpop" : 9295297 }
Type "it" for more

group by + where + order by

> db.zips.aggregate(   [  { $group:{_id:"$state",totalpop:{$sum:"$pop"} }  } , {$match:{totalpop:{$gt:100000}}} , {$sort:{totalpop:-1}} ] )

{ "_id" : "CA", "totalpop" : 29754890 }
{ "_id" : "NY", "totalpop" : 17990402 }
{ "_id" : "TX", "totalpop" : 16984601 }
{ "_id" : "FL", "totalpop" : 12686644 }
{ "_id" : "PA", "totalpop" : 11881643 }
{ "_id" : "IL", "totalpop" : 11427576 }
{ "_id" : "OH", "totalpop" : 10846517 }
{ "_id" : "MI", "totalpop" : 9295297 }
{ "_id" : "NJ", "totalpop" : 7730188 }
{ "_id" : "NC", "totalpop" : 6628637 }
{ "_id" : "GA", "totalpop" : 6478216 }
{ "_id" : "VA", "totalpop" : 6181479 }
{ "_id" : "MA", "totalpop" : 6016425 }
{ "_id" : "IN", "totalpop" : 5544136 }
{ "_id" : "MO", "totalpop" : 5110648 }
{ "_id" : "WI", "totalpop" : 4891769 }
{ "_id" : "TN", "totalpop" : 4876457 }
{ "_id" : "WA", "totalpop" : 4866692 }
{ "_id" : "MD", "totalpop" : 4781379 }
{ "_id" : "MN", "totalpop" : 4372982 }
Type "it" for more

exercises (group by)

We have the following table:

> db.zips.find()
{ "_id" : "01001", "city" : "AGAWAM", "loc" : [ -72.622739, 42.070206 ], "pop" : 15338, "state" : "MA" }
{ "_id" : "01002", "city" : "CUSHMAN", "loc" : [ -72.51565, 42.377017 ], "pop" : 36963, "state" : "MA" }
{ "_id" : "01005", "city" : "BARRE", "loc" : [ -72.108354, 42.409698 ], "pop" : 4546, "state" : "MA" }
{ "_id" : "01007", "city" : "BELCHERTOWN", "loc" : [ -72.410953, 42.275103 ], "pop" : 10579, "state" : "MA" }
{ "_id" : "01008", "city" : "BLANDFORD", "loc" : [ -72.936114, 42.182949 ], "pop" : 1240, "state" : "MA" }
{ "_id" : "01010", "city" : "BRIMFIELD", "loc" : [ -72.188455, 42.116543 ], "pop" : 3706, "state" : "MA" }
{ "_id" : "01011", "city" : "CHESTER", "loc" : [ -72.988761, 42.279421 ], "pop" : 1688, "state" : "MA" }
{ "_id" : "01012", "city" : "CHESTERFIELD", "loc" : [ -72.833309, 42.38167 ], "pop" : 177, "state" : "MA" }
{ "_id" : "01013", "city" : "CHICOPEE", "loc" : [ -72.607962, 42.162046 ], "pop" : 23396, "state" : "MA" }
{ "_id" : "01020", "city" : "CHICOPEE", "loc" : [ -72.576142, 42.176443 ], "pop" : 31495, "state" : "MA" }
{ "_id" : "01022", "city" : "WESTOVER AFB", "loc" : [ -72.558657, 42.196672 ], "pop" : 1764, "state" : "MA" }
{ "_id" : "01026", "city" : "CUMMINGTON", "loc" : [ -72.905767, 42.435296 ], "pop" : 1484, "state" : "MA" }
{ "_id" : "01027", "city" : "MOUNT TOM", "loc" : [ -72.679921, 42.264319 ], "pop" : 16864, "state" : "MA" }
{ "_id" : "01028", "city" : "EAST LONGMEADOW", "loc" : [ -72.505565, 42.067203 ], "pop" : 13367, "state" : "MA" }
{ "_id" : "01030", "city" : "FEEDING HILLS", "loc" : [ -72.675077, 42.07182 ], "pop" : 11985, "state" : "MA" }
{ "_id" : "01031", "city" : "GILBERTVILLE", "loc" : [ -72.198585, 42.332194 ], "pop" : 2385, "state" : "MA" }
{ "_id" : "01032", "city" : "GOSHEN", "loc" : [ -72.844092, 42.466234 ], "pop" : 122, "state" : "MA" }
{ "_id" : "01033", "city" : "GRANBY", "loc" : [ -72.520001, 42.255704 ], "pop" : 5526, "state" : "MA" }
{ "_id" : "01034", "city" : "TOLLAND", "loc" : [ -72.908793, 42.070234 ], "pop" : 1652, "state" : "MA" }
{ "_id" : "01035", "city" : "HADLEY", "loc" : [ -72.571499, 42.36062 ], "pop" : 4231, "state" : "MA" }
Type "it" for more

Now, here's the task:

  1. obtain the population of every cities.
  2. calculate the averaged city population for every states.
> db.zips.aggregate({  $group:{_id:{city:"$city", state:"$state"} ,totalpop:{$sum:"$pop"} }  })

{ "_id" : { "city" : "WRANGELL", "state" : "AK" }, "totalpop" : 2573 }
{ "_id" : { "city" : "POINT BAKER", "state" : "AK" }, "totalpop" : 426 }
{ "_id" : { "city" : "KLAWOCK", "state" : "AK" }, "totalpop" : 851 }
{ "_id" : { "city" : "CRAIG", "state" : "AK" }, "totalpop" : 1398 }
{ "_id" : { "city" : "HOONAH", "state" : "AK" }, "totalpop" : 1670 }
{ "_id" : { "city" : "NUIQSUT", "state" : "AK" }, "totalpop" : 354 }
{ "_id" : { "city" : "AMBLER", "state" : "AK" }, "totalpop" : 8 }
{ "_id" : { "city" : "WHITE MOUNTAIN", "state" : "AK" }, "totalpop" : 194 }
{ "_id" : { "city" : "VENETIE", "state" : "AK" }, "totalpop" : 184 }
{ "_id" : { "city" : "BORDER", "state" : "AK" }, "totalpop" : 1805 }
{ "_id" : { "city" : "STEVENS VILLAGE", "state" : "AK" }, "totalpop" : 110 }
{ "_id" : { "city" : "SHUNGNAK", "state" : "AK" }, "totalpop" : 0 }
{ "_id" : { "city" : "SHAKTOOLIK", "state" : "AK" }, "totalpop" : 183 }
{ "_id" : { "city" : "POINT HOPE", "state" : "AK" }, "totalpop" : 640 }
{ "_id" : { "city" : "NOORVIK", "state" : "AK" }, "totalpop" : 534 }
{ "_id" : { "city" : "NENANA", "state" : "AK" }, "totalpop" : 393 }
{ "_id" : { "city" : "POINT LAY", "state" : "AK" }, "totalpop" : 139 }
{ "_id" : { "city" : "KOYUK", "state" : "AK" }, "totalpop" : 231 }
{ "_id" : { "city" : "KIVALINA", "state" : "AK" }, "totalpop" : 689 }
{ "_id" : { "city" : "KAKTOVIK", "state" : "AK" }, "totalpop" : 245 }
> db.zips.aggregate([
...     {$group:{_id:{city:"$city", state:"$state"} ,totalpop:{$sum:"$pop"} }  },
...     {$group:{_id:"$_id.state" ,avg:{$avg:"$totalpop"} } }
...     ])

{ "_id" : "DC", "avg" : 303450 }
{ "_id" : "RI", "avg" : 19292.653846153848 }
{ "_id" : "DE", "avg" : 14481.91304347826 }
{ "_id" : "VT", "avg" : 2315.8765432098767 }
{ "_id" : "CT", "avg" : 14674.625 }
{ "_id" : "GA", "avg" : 11547.62210338681 }
{ "_id" : "ME", "avg" : 3006.4901960784314 }
{ "_id" : "WA", "avg" : 12258.670025188916 }
{ "_id" : "SC", "avg" : 11139.626198083068 }
{ "_id" : "OK", "avg" : 6155.743639921722 }
{ "_id" : "NH", "avg" : 5232.320754716981 }
{ "_id" : "PA", "avg" : 8679.067202337472 }
{ "_id" : "MS", "avg" : 7524.023391812865 }
{ "_id" : "IN", "avg" : 9271.130434782608 }
{ "_id" : "SD", "avg" : 1839.6746031746031 }
{ "_id" : "VA", "avg" : 8526.177931034483 }
{ "_id" : "TN", "avg" : 9656.350495049504 }
{ "_id" : "HI", "avg" : 15831.842857142858 }
{ "_id" : "MI", "avg" : 12087.512353706112 }
{ "_id" : "AK", "avg" : 2976.4918032786886 }
Type "it" for more