To run this example you need to install pymongo.

pip install pymongo


In [1]:
import pymongo
import pandas as pd

In [2]:
client = pymongo.MongoClient('localhost', 27017)
db = client['adults-db']
collection = db['income']

In [3]:
income_df = pd.DataFrame(list(collection.find()))

In [4]:
client.close()

In [6]:
income_df.head()


Out[6]:
_id age capital-gain capital-loss education education-num fnlwgt hours-per-week label marital-status native-country occupation race relationship sex workclass
0 5c473e970adc7435a0930fb9 39 2174 0 Bachelors 13 77516 40 <=50K Never-married United-States Adm-clerical White Not-in-family Male State-gov
1 5c473e970adc7435a0930fba 50 0 0 Bachelors 13 83311 13 <=50K Married-civ-spouse United-States Exec-managerial White Husband Male Self-emp-not-inc
2 5c473e970adc7435a0930fbb 38 0 0 HS-grad 9 215646 40 <=50K Divorced United-States Handlers-cleaners White Not-in-family Male Private
3 5c473e970adc7435a0930fbc 53 0 0 11th 7 234721 40 <=50K Married-civ-spouse United-States Handlers-cleaners Black Husband Male Private
4 5c473e970adc7435a0930fbd 28 0 0 Bachelors 13 338409 40 <=50K Married-civ-spouse Cuba Prof-specialty Black Wife Female Private

In [7]:
income_df.tail()


Out[7]:
_id age capital-gain capital-loss education education-num fnlwgt hours-per-week label marital-status native-country occupation race relationship sex workclass
32556 5c473ea60adc7435a0938ee5 27 0 0 Assoc-acdm 12 257302 38 <=50K Married-civ-spouse United-States Tech-support White Wife Female Private
32557 5c473ea60adc7435a0938ee6 40 0 0 HS-grad 9 154374 40 >50K Married-civ-spouse United-States Machine-op-inspct White Husband Male Private
32558 5c473ea60adc7435a0938ee7 58 0 0 HS-grad 9 151910 40 <=50K Widowed United-States Adm-clerical White Unmarried Female Private
32559 5c473ea60adc7435a0938ee8 22 0 0 HS-grad 9 201490 20 <=50K Never-married United-States Adm-clerical White Own-child Male Private
32560 5c473ea60adc7435a0938ee9 52 15024 0 HS-grad 9 287927 40 >50K Married-civ-spouse United-States Exec-managerial White Wife Female Self-emp-inc

In [8]:
income_df['age'].describe()


Out[8]:
count    32561.000000
mean        38.581647
std         13.640433
min         17.000000
25%         28.000000
50%         37.000000
75%         48.000000
max         90.000000
Name: age, dtype: float64

In [9]:
income_df['education'].describe()


Out[9]:
count        32561
unique          16
top        HS-grad
freq         10501
Name: education, dtype: object

In [ ]: