In [1]:
!pip install pymongo

%matplotlib inline

import numpy
import scipy
import scipy.stats
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sea

import pymongo


Collecting pymongo
  Downloading pymongo-3.4.0-cp35-cp35m-manylinux1_x86_64.whl (359kB)
    100% |████████████████████████████████| 368kB 1.6MB/s 
Installing collected packages: pymongo
Successfully installed pymongo-3.4.0
You are using pip version 8.1.2, however version 9.0.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.
/opt/conda/lib/python3.5/site-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment.
  warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')
/opt/conda/lib/python3.5/site-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment.
  warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')

In [2]:
mongo = pymongo.MongoClient('mainmongo', 27017)
records = [i for i in mongo.nutrition.nndb.find()]

In [3]:
print("Record Count:", len(records))

nutrients = dict()
for rec in records:
    descrip = rec['descrip']
    
    for nut in rec['nutrients']:
        nid = nut['nutrient_id']
        val = float(nut['nutrient_val'])
        
        if nid not in nutrients:
            nut['min'] = val
            nut['min_food'] = descrip
            nut['max'] = val
            nut['max_food'] = descrip
            nutrients[nid] = nut
        elif val > 0.0:
            old = nutrients[nid]
            if val < old['min'] or old['min'] <= 0.0:
                old['min'] = val
                old['min_food'] = descrip
            if val > old['max']:
                old['max'] = val
                old['max_food'] = descrip

print("Nutrient Count:", len(nutrients))


Record Count: 8618
Nutrient Count: 150

In [4]:
print("%6s %-20s %12s %-20s %12s %-20s" % ('ID', 'Nutrient', 'MinVal', 'MinFood', 'MaxVal', 'MaxFood'))
print("%6s %-20s %12s %-20s %12s %-20s" % ('---', '-'*20, '-'*12, '-'*20, '-'*12, '-'*20))

for nid, nut in sorted(nutrients.items()):
    print("%6s %-20s %12.3f %-20s %12.3f %-20s" % (
        nid, nut['descrip'][:20], 
        nut['min'], nut['min_food'][:20], 
        nut['max'], nut['max_food'][:20]
    ))


    ID Nutrient                   MinVal MinFood                    MaxVal MaxFood             
   --- -------------------- ------------ -------------------- ------------ --------------------
   203 Protein                     0.010 Tea, instant, sweete       88.320 Soy protein isolate,
   204 Total lipid (fat)           0.010 Sauce, fish, ready-t      100.000 Fat, beef tallow    
   205 Carbohydrate, by dif        0.010 Chicken, skin (drums      100.000 Sweetener, herbal ex
   207 Ash                         0.010 Vanilla extract, imi       99.800 Salt, table         
   208 Energy                      1.000 Beverages, Green tea      902.000 Fat, beef tallow    
   209 Starch                      0.010 Turkey, retail parts       73.770 Rice, white, long-gr
   210 Sucrose                     0.010 Milk, reduced fat, f       99.800 Sugars, granulated  
   211 Glucose (dextrose)          0.010 Milk, reduced fat, f       57.000 Infant formula, MEAD
   212 Fructose                    0.010 Milk, reduced fat, f       55.600 Sweetener, syrup, ag
   213 Lactose                     0.010 Babyfood, fruit, pea       56.000 Infant formula, MEAD
   214 Maltose                     0.010 Milk, reduced fat, f       10.040 Formulated bar, SLIM
   221 Alcohol, ethyl              0.300 Malt beverage, inclu       42.500 Alcoholic beverage, 
   255 Water                       0.020 Sugars, granulated        100.000 Beverages, ICELANDIC
   257 Adjusted Protein            2.180 Mushrooms, white, ra       12.900 Baking chocolate, un
   262 Caffeine                    1.000 Milk, chocolate, flu     5714.000 Tea, instant, unswee
   263 Theobromine                 1.000 Tea, instant, sweete     2634.000 Cocoa, dry powder, u
   268 Energy                      1.000 Coffee, brewed from      3774.000 Fat, beef tallow    
   269 Sugars, total               0.010 Turkey, retail parts       99.800 Sugars, granulated  
   287 Galactose                   0.010 Milk, chocolate beve        5.620 Formulated bar, SLIM
   291 Fiber, total dietary        0.100 Cheese, pasteurized        79.000 Corn bran, crude    
   301 Calcium, Ca                 1.000 Babyfood, fruit, app     7364.000 Leavening agents, ba
   303 Iron, Fe                    0.010 Salad dressing, mayo      123.600 Spices, thyme, dried
   304 Magnesium, Mg               1.000 Dessert topping, pre      781.000 Rice bran, crude    
   305 Phosphorus, P               1.000 Salad dressing, thou     9918.000 Leavening agents, ba
   306 Potassium, K                1.000 Oil, olive, salad or    16500.000 Leavening agents, cr
   307 Sodium, Na                  1.000 Babyfood, fruit, app    38758.000 Salt, table         
   309 Zinc, Zn                    0.010 Butter oil, anhydrou       90.950 Mollusks, oyster, ea
   312 Copper, Cu                  0.001 Butter oil, anhydrou       15.050 Veal, variety meats 
   313 Fluoride, F                 0.100 Babyfood, crackers,       584.000 Tea, instant, sweete
   315 Manganese, Mn               0.001 Cream, fluid, half a      328.000 Beef, New Zealand, i
   317 Selenium, Se                0.100 Salt, table              1917.000 Nuts, brazilnuts, dr
   318 Vitamin A, IU               1.000 Babyfood, Baby MUM M   100000.000 Fish oil, cod liver 
   319 Retinol                     1.000 Milk, dry, nonfat, c    30000.000 Fish oil, cod liver 
   320 Vitamin A, RAE              1.000 Cream substitute, li    30000.000 Fish oil, cod liver 
   321 Carotene, beta              1.000 Milk, buttermilk, fl    42891.000 Peppers, sweet, red,
   322 Carotene, alpha             1.000 Cheese food, pasteur    14251.000 Carrot, dehydrated  
   323 Vitamin E (alpha-toc        0.010 Cheese, cottage, non      149.400 Oil, wheat germ     
   324 Vitamin D                   1.000 Milk, buttermilk, fl    10000.000 Fish oil, cod liver 
   325 Vitamin D2 (ergocalc        0.100 Mushrooms, brown, it       28.100 Mushrooms, maitake, 
   326 Vitamin D3 (cholecal        0.100 Cheese, cottage, cre       27.400 Fish, halibut, Green
   328 Vitamin D (D2 + D3)         0.100 Cheese, cottage, cre      250.000 Fish oil, cod liver 
   334 Cryptoxanthin, beta         1.000 Cheese spread, Ameri     6252.000 Spices, pepper, red 
   337 Lycopene                    1.000 Babyfood, dinner, ve    46260.000 Tomato powder       
   338 Lutein + zeaxanthin         1.000 Cream substitute, fl    19697.000 Kale, frozen, cooked
   341 Tocopherol, beta            0.010 Egg, whole, raw, fre       10.460 Snacks, potato chips
   342 Tocopherol, gamma           0.010 Cheese, cottage, non      100.880 Oil, cooking and sal
   343 Tocopherol, delta           0.010 Cheese food, pasteur       30.880 Oil, cooking and sal
   344 Tocotrienol, alpha          0.010 Cheese, white, queso        5.700 Babyfood, cereal, mi
   345 Tocotrienol, beta           0.010 Cheese, dry white, q        2.240 Cereals ready-to-eat
   346 Tocotrienol, gamma          0.010 Egg, whole, raw, fre        4.490 Nuts, pistachio nuts
   347 Tocotrienol, delta          0.010 Dairy, Cheese produc        1.460 Popcorn, microwave, 
   401 Vitamin C, total asc        0.100 Whey, acid, fluid        2400.000 Beverages, Orange-fl
   404 Thiamin                     0.001 Butter oil, anhydrou       23.375 Yeast extract spread
   405 Riboflavin                  0.001 Waxgourd, (chinese p       17.500 Yeast extract spread
   406 Niacin                      0.001 Fruit punch-flavor d      127.500 Yeast extract spread
   410 Pantothenic acid            0.001 Margarine-like, vege       34.500 Cereals ready-to-eat
   415 Vitamin B-6                 0.001 Butter oil, anhydrou       12.000 Cereals ready-to-eat
   417 Folate, total               1.000 Eggnog                   3786.000 Yeast extract spread
   418 Vitamin B-12                0.010 Butter oil, anhydrou       98.890 Mollusks, clam, mixe
   421 Choline, total              0.100 Dessert topping, pow     2403.300 Egg, yolk, dried    
   428 Menaquinone-4               0.100 Cheese, cream, fat f       41.700 Pepperoni, pork, bee
   429 Dihydrophylloquinone        0.100 Egg, whole, raw, fre      164.900 Shortening, vegetabl
   430 Vitamin K (phylloqui        0.100 Cheese, cottage, low     1714.500 Spices, basil, dried
   431 Folic acid                  1.000 Babyfood, dessert, p     2993.000 Yeast extract spread
   432 Folate, food                1.000 Eggnog                   2340.000 Leavening agents, ye
   435 Folate, DFE                 1.000 Eggnog                   5881.000 Yeast extract spread
   454 Betaine                     0.100 Cream substitute, po     4393.300 APPLEBEE'S, Double C
   501 Tryptophan                  0.001 Apples, raw, with sk        1.600 Sea lion, Steller, m
   502 Threonine                   0.001 Coffee, brewed from         3.685 Egg, white, dried   
   503 Isoleucine                  0.001 Babyfood, beverage,         5.023 Egg, white, dried, p
   504 Leucine                     0.002 Babyfood, beverage,         7.407 Egg, white, dried, s
   505 Lysine                      0.001 Coffee, brewed from         6.690 Seal, bearded (Oogru
   506 Methionine                  0.001 Apples, raw, with sk        3.204 Egg, white, dried, p
   507 Cystine                     0.001 Apples, raw, with sk        2.227 Egg, white, dried, s
   508 Phenylalanine               0.001 Guavas, strawberry,         5.181 Egg, white, dried, p
   509 Tyrosine                    0.001 Apples, raw, with sk        3.437 Egg, white, dried, s
   510 Valine                      0.001 Syrups, table blends        6.170 Egg, white, dried, p
   511 Arginine                    0.001 Coffee, brewed from         7.436 Seeds, sesame flour,
   512 Histidine                   0.001 Babyfood, beverage,         2.999 Whale, beluga, meat,
   513 Alanine                     0.001 Syrups, table blends        8.009 Gelatins, dry powder
   514 Aspartic acid               0.001 Mollusks, clam, mixe       10.203 Soy protein isolate 
   515 Glutamic acid               0.004 Syrups, table blends       17.452 Soy protein isolate 
   516 Glycine                     0.001 Mollusks, clam, mixe       19.049 Gelatins, dry powder
   517 Proline                     0.002 Babyfood, beverage,        12.295 Gelatins, dry powder
   518 Serine                      0.001 Coffee, brewed from         6.200 Egg, white, dried, s
   521 Hydroxyproline              0.002 Chicken, stewing, me        1.130 Turkey, skin, from r
   573 Vitamin E, added            0.040 Fruit juice drink, g       60.240 Cereals ready-to-eat
   578 Vitamin B-12, added         0.050 Candies, milk chocol       20.700 Cereals ready-to-eat
   601 Cholesterol                 1.000 Whey, acid, fluid        3100.000 Beef, variety meats 
   605 Fatty acids, total t        0.001 Soup, chunky chicken       42.851 Shortening, industri
   606 Fatty acids, total s        0.001 Fruit cocktail, (pea       95.600 Fish oil, menhaden, 
   607 4:0                         0.001 Ice cream sandwich          6.400 Roast beef spread   
   608 6:0                         0.001 Whey, acid, fluid           2.007 Butter, salted      
   609 8:0                         0.001 Milk, nonfat, fluid,       13.558 Child formula, MEAD 
   610 10:0                        0.001 Babyfood, dinner, ma        6.000 Oil, coconut        
   611 12:0                        0.001 Milk, nonfat, fluid,       53.428 Oil, industrial, pal
   612 14:0                        0.001 Dill weed, fresh           82.600 Oil, nutmeg butter  
   613 16:0                        0.001 Sauce, fish, ready-t       43.500 Oil, palm           
   614 18:0                        0.001 Vanilla extract            83.094 Oil, industrial, soy
   615 20:0                        0.001 Cheese, cream, fat f        1.944 Nuts, macadamia nuts
   617 18:1 undifferentiate        0.001 Babyfood, potatoes,        82.630 Oil, sunflower, high
   618 18:2 undifferentiate        0.001 Hyacinth-beans, imma       74.623 Oil, safflower, sala
   619 18:3 undifferentiate        0.001 Milk, nonfat, fluid,       53.368 Oil, flaxseed, cold 
   620 20:4 undifferentiate        0.001 Ice cream sandwich,         1.756 Fish oil, sardine   
   621 22:6 n-3 (DHA)              0.001 Ice cream sandwich         18.232 Fish oil, salmon    
   624 22:0                        0.001 Cheese, cottage, low        3.653 Chocolate-flavored h
   625 14:1                        0.001 Yogurt, Greek, plain        1.430 Oil, beluga, whale (
   626 16:1 undifferentiate        0.001 Milk, filled, fluid,       20.330 Oil, beluga, whale (
   627 18:4                        0.001 Ice cream sandwich          3.025 Fish oil, sardine   
   628 20:1                        0.001 Ice cream sandwich         15.020 Oil, spotted seal (A
   629 20:5 n-3 (EPA)              0.001 Cheese, cottage, low       13.168 Fish oil, menhaden  
   630 22:1 undifferentiate        0.001 Cheese, cheddar            41.175 Oil, mustard        
   631 22:5 n-3 (DPA)              0.001 Cheese, cottage, low        5.613 Oil, bearded seal (O
   636 Phytosterols                1.000 Frankfurter, pork        9060.000 Margarine-like, vege
   638 Stigmasterol                1.000 Margarine-like, vege       59.000 Oil, soybean, salad 
   639 Campesterol                 1.000 Margarine-like, vege      241.000 Oil, canola         
   641 Beta-sitosterol             2.000 Margarine-like, vege      621.000 Oil, corn, industria
   645 Fatty acids, total m        0.001 Babyfood, apple-cran       83.689 Oil, sunflower, high
   646 Fatty acids, total p        0.001 Jerusalem-artichokes       74.623 Oil, safflower, sala
   652 15:0                        0.001 Chicken, broilers or        0.800 Oil, sunflower, high
   653 17:0                        0.001 Babyfood, teething b        1.485 Lamb, New Zealand, i
   654 24:0                        0.001 Salad dressing, hone        1.174 Chocolate-flavored h
   662 16:1 t                      0.001 Turkey, gizzard, all        0.115 Beef, New Zealand, i
   663 18:1 t                      0.001 Sauce, pasta, spaghe       40.118 Shortening, industri
   664 22:1 t                      0.001 Edamame, frozen, pre        0.022 Turkey, retail parts
   665 18:2 t not further d        0.001 Chicken, broiler or         0.811 Margarine-like, vege
   666 18:2 i                      0.001 Cereals, oats, regul        4.029 Oil, industrial, soy
   669 18:2 t,t                    0.001 Beef, New Zealand, i        0.697 McDONALD'S, Baked Ap
   670 18:2 CLAs                   0.001 Chicken, broiler or         1.074 Beef, chuck, short r
   671 24:1 c                      0.001 Cheese, pasteurized         0.644 Spices, mustard seed
   672 20:2 n-6 c,c                0.001 Dulce de Leche              0.445 Pork, fresh, separab
   673 16:1 c                      0.001 Yogurt, Greek, non f        2.797 Chicken, skin (drums
   674 18:1 c                      0.004 Lemon juice, raw           74.742 Oil, safflower, sala
   675 18:2 n-6 c,c                0.002 Fish, pollock, Alask       56.601 USDA Commodity Food,
   676 22:1 c                      0.001 Cheese, cheddar             1.056 Mustard, prepared, y
   685 18:3 n-6 c,c,c              0.001 Cheese, cheddar             0.590 Margarine, 80% fat, 
   687 17:1                        0.001 Turkey, whole, meat         1.085 Tofu, extra firm, pr
   689 20:3 undifferentiate        0.001 Margarine, regular,         0.353 Nuts, pine nuts, dri
   693 Fatty acids, total t        0.001 Sauce, pasta, spaghe       40.118 Shortening, industri
   695 Fatty acids, total t        0.001 Chicken, broiler or         4.524 Oil, industrial, soy
   696 13:0                        0.001 Salad dressing, ital        0.079 Spices, curry powder
   697 15:1                        0.001 Fish, ocean perch, A        2.169 Tofu, extra firm, pr
   851 18:3 n-3 c,c,c (ALA)        0.001 Yogurt, Greek, plain       53.368 Oil, flaxseed, cold 
   852 20:3 n-3                    0.001 Egg, whole, raw, fre        0.081 Oil, flaxseed, cold 
   853 20:3 n-6                    0.001 Margarine, regular,         0.102 Lamb, New Zealand, i
   855 20:4 n-6                    0.010 Beef, New Zealand, i        0.670 Fish, salmon, coho (
   856 18:3i                       0.001 Cheese spread, Ameri        1.446 USDA Commodity Food,
   857 21:5                        0.005 Fish, whitefish, egg        0.350 Oil, bearded seal (O
   858 22:4                        0.001 Ruffed Grouse, breas        0.170 Oil, bearded seal (O
   859 18:1-11 t (18:1t n-7        0.021 Beef, Australian, gr        2.579 Beef, Australian, Wa

In [5]:
def _nutrient_vals(nid):
    for food in records:
        val = 0.0
        for nut in food['nutrients']:
            if nut['nutrient_id'] == nid:
                val = float(nut['nutrient_val'])
                break
        yield val

def nutrient_vals(nid):
    return scipy.array(list(_nutrient_vals(nid)))

In [7]:
vits = pd.DataFrame({
    'B12': nutrient_vals('418'),
    'C': nutrient_vals('401'),
    'D': nutrient_vals('324'),
    'Calcium': nutrient_vals('301'),
})

vits.describe()


Out[7]:
B12 C Calcium D
count 8618.000000 8618.000000 8618.000000 8618.000000
mean 1.225260 7.925377 73.411348 14.292527
std 4.319183 57.582758 201.363662 122.674571
min 0.000000 0.000000 0.000000 0.000000
25% 0.000000 0.000000 9.000000 0.000000
50% 0.080000 0.000000 19.000000 0.000000
75% 1.297500 2.500000 62.000000 1.000000
max 98.890000 2400.000000 7364.000000 10000.000000

In [9]:
sea.pairplot(vits)


Out[9]:
<seaborn.axisgrid.PairGrid at 0x7f83b17b1da0>