In [1]:
import pandas as pd
import numpy as np
from pandas import DataFrame, Series
In [2]:
val = 'a,b, guido'
val.split(',')
Out[2]:
['a', 'b', ' guido']
In [3]:
pieces = [x.strip() for x in val.split(',')]
pieces
Out[3]:
['a', 'b', 'guido']
In [4]:
first, second, third = pieces
first + '::' + second + '::' + third
Out[4]:
'a::b::guido'
In [5]:
'::'.join(pieces)
Out[5]:
'a::b::guido'
In [6]:
'guido' in val
Out[6]:
True
In [7]:
val.index(',')
Out[7]:
1
In [8]:
val.find(':')
Out[8]:
-1
In [9]:
val.index(':')
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-9-280f8b2856ce> in <module>()
----> 1 val.index(':')
ValueError: substring not found
In [10]:
val.count(',')
Out[10]:
2
In [11]:
val.replace(',', '::')
Out[11]:
'a::b:: guido'
In [12]:
val.replace(',', '')
Out[12]:
'ab guido'
In [14]:
import re
text = "foo bar\t baz \tqux"
re.split('\s+', text)
Out[14]:
['foo', 'bar', 'baz', 'qux']
In [15]:
regex = re.compile('\s+')
regex.split(text)
Out[15]:
['foo', 'bar', 'baz', 'qux']
In [16]:
regex.findall(text)
Out[16]:
[' ', '\t ', ' \t']
In [18]:
text = """Dave dave@google.com
Steve steve@gmail.com
Rob rob@gmail.com
Ryan ryan@yahoo.com
"""
pattern = r'[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}'
regex = re.compile(pattern, flags=re.IGNORECASE)
regex.findall(text)
Out[18]:
['dave@google.com', 'steve@gmail.com', 'rob@gmail.com', 'ryan@yahoo.com']
In [19]:
m = regex.search(text)
m
Out[19]:
<_sre.SRE_Match object; span=(5, 20), match='dave@google.com'>
In [21]:
text[m.start():m.end()]
Out[21]:
'dave@google.com'
In [23]:
print(regex.match(text))
None
In [24]:
print(regex.sub('REDACTED', text))
Dave REDACTED
Steve REDACTED
Rob REDACTED
Ryan REDACTED
In [25]:
pattern = r'([A-Z0-9._%+-]+)@([A-Z0-9.-]+)\.([A-Z]{2,4})'
regex = re.compile(pattern, flags=re.IGNORECASE)
m = regex.match('wesm@bright.net')
m.groups()
Out[25]:
('wesm', 'bright', 'net')
In [26]:
regex.findall(text)
Out[26]:
[('dave', 'google', 'com'),
('steve', 'gmail', 'com'),
('rob', 'gmail', 'com'),
('ryan', 'yahoo', 'com')]
In [27]:
print(regex.sub(r'Username: \1, Domain: \2, Suffix: \3', text))
Dave Username: dave, Domain: google, Suffix: com
Steve Username: steve, Domain: gmail, Suffix: com
Rob Username: rob, Domain: gmail, Suffix: com
Ryan Username: ryan, Domain: yahoo, Suffix: com
In [28]:
regex = re.compile(r"""
(?P<username>[A-Z0-9._%+-]+)
@
(?P<domain>[A-Z0-9.-]+)
\.
(?P<suffix>[A-Z]{2,4})""", flags=re.IGNORECASE|re.VERBOSE)
m = regex.match('wesm@bright.net')
m.groupdict()
Out[28]:
{'domain': 'bright', 'suffix': 'net', 'username': 'wesm'}
In [29]:
data = {'Dave': 'dave@google.com', 'Steve': 'steve@gmail.com',
'Rob': 'rob@gmail.com', 'Wes': np.nan}
data = Series(data)
data
Out[29]:
Dave dave@google.com
Rob rob@gmail.com
Steve steve@gmail.com
Wes NaN
dtype: object
In [30]:
data.isnull()
Out[30]:
Dave False
Rob False
Steve False
Wes True
dtype: bool
In [31]:
data.str.contains('gmail')
Out[31]:
Dave False
Rob True
Steve True
Wes NaN
dtype: object
In [32]:
pattern
Out[32]:
'([A-Z0-9._%+-]+)@([A-Z0-9.-]+)\\.([A-Z]{2,4})'
In [34]:
data.str.findall(pattern, flags=re.IGNORECASE)
Out[34]:
Dave [(dave, google, com)]
Rob [(rob, gmail, com)]
Steve [(steve, gmail, com)]
Wes NaN
dtype: object
In [35]:
matches = data.str.match(pattern, flags=re.IGNORECASE)
matches
/Users/alexkirnas/anaconda/lib/python3.6/site-packages/ipykernel/__main__.py:1: FutureWarning: In future versions of pandas, match will change to always return a bool indexer.
if __name__ == '__main__':
Out[35]:
Dave (dave, google, com)
Rob (rob, gmail, com)
Steve (steve, gmail, com)
Wes NaN
dtype: object
In [36]:
matches.str.get(1)
Out[36]:
Dave google
Rob gmail
Steve gmail
Wes NaN
dtype: object
In [37]:
matches.str[0]
Out[37]:
Dave dave
Rob rob
Steve steve
Wes NaN
dtype: object
In [38]:
data.str[:5]
Out[38]:
Dave dave@
Rob rob@g
Steve steve
Wes NaN
dtype: object
In [39]:
import json
db = json.load(open('foods-2011-10-03.json'))
len(db)
Out[39]:
6636
In [40]:
db[0].keys()
Out[40]:
dict_keys(['id', 'description', 'tags', 'manufacturer', 'group', 'portions', 'nutrients'])
In [41]:
db[0]['nutrients'][0]
Out[41]:
{'description': 'Protein',
'group': 'Composition',
'units': 'g',
'value': 25.18}
In [42]:
nutrients = DataFrame(db[0]['nutrients'])
nutrients[:7]
Out[42]:
description
group
units
value
0
Protein
Composition
g
25.18
1
Total lipid (fat)
Composition
g
29.20
2
Carbohydrate, by difference
Composition
g
3.06
3
Ash
Other
g
3.28
4
Energy
Energy
kcal
376.00
5
Water
Composition
g
39.28
6
Energy
Energy
kJ
1573.00
In [43]:
info_keys = ['description', 'group', 'id', 'manufacturer']
info = DataFrame(db, columns=info_keys)
info
Out[43]:
description
group
id
manufacturer
0
Cheese, caraway
Dairy and Egg Products
1008
1
Cheese, cheddar
Dairy and Egg Products
1009
2
Cheese, edam
Dairy and Egg Products
1018
3
Cheese, feta
Dairy and Egg Products
1019
4
Cheese, mozzarella, part skim milk
Dairy and Egg Products
1028
5
Cheese, mozzarella, part skim milk, low moisture
Dairy and Egg Products
1029
6
Cheese, romano
Dairy and Egg Products
1038
7
Cheese, roquefort
Dairy and Egg Products
1039
8
Cheese spread, pasteurized process, american, ...
Dairy and Egg Products
1048
9
Cream, fluid, half and half
Dairy and Egg Products
1049
10
Sour dressing, non-butterfat, cultured, filled...
Dairy and Egg Products
1058
11
Milk, filled, fluid, with blend of hydrogenate...
Dairy and Egg Products
1059
12
Cream substitute, liquid, with lauric acid oil...
Dairy and Egg Products
1068
13
Cream substitute, powdered
Dairy and Egg Products
1069
14
Milk, producer, fluid, 3.7% milkfat
Dairy and Egg Products
1078
15
Milk, reduced fat, fluid, 2% milkfat, with add...
Dairy and Egg Products
1079
None
16
Milk, reduced fat, fluid, 2% milkfat, with add...
Dairy and Egg Products
1080
17
Milk, reduced fat, fluid, 2% milkfat, protein ...
Dairy and Egg Products
1081
18
Milk, lowfat, fluid, 1% milkfat, with added vi...
Dairy and Egg Products
1082
19
Milk, lowfat, fluid, 1% milkfat, with added no...
Dairy and Egg Products
1083
20
Milk, lowfat, fluid, 1% milkfat, protein forti...
Dairy and Egg Products
1084
21
Milk, nonfat, fluid, with added vitamin A and ...
Dairy and Egg Products
1085
22
Milk, nonfat, fluid, with added nonfat milk so...
Dairy and Egg Products
1086
23
Milk, nonfat, fluid, protein fortified, with a...
Dairy and Egg Products
1087
24
Milk, buttermilk, fluid, cultured, lowfat
Dairy and Egg Products
1088
25
Milk, low sodium, fluid
Dairy and Egg Products
1089
26
Milk, dry, whole, with added vitamin D
Dairy and Egg Products
1090
27
Milk, dry, nonfat, regular, without added vita...
Dairy and Egg Products
1091
28
Milk, dry, nonfat, instant, with added vitamin...
Dairy and Egg Products
1092
29
Milk, dry, nonfat, calcium reduced
Dairy and Egg Products
1093
...
...
...
...
...
6606
Beef, tenderloin, steak, separable lean only, ...
Beef Products
23628
6607
Beef, top sirloin, steak, separable lean only,...
Beef Products
23629
6608
Beef, short loin, top loin, steak, separable l...
Beef Products
23630
6609
Beef, chuck, arm pot roast, separable lean onl...
Beef Products
23631
6610
Beef, brisket, flat half, separable lean only,...
Beef Products
23632
6611
Beef, chuck, arm pot roast, separable lean onl...
Beef Products
23633
6612
Beef, brisket, flat half, separable lean only,...
Beef Products
23634
6613
Beef, round, eye of round, roast, separable le...
Beef Products
23635
6614
Beef, round, top round, steak, separable lean ...
Beef Products
23636
6615
Beef, round, bottom round, roast, separable le...
Beef Products
23637
6616
Beef, rib, small end (ribs 10-12), separable l...
Beef Products
23638
6617
CAMPBELL Soup Company, CAMPBELL'S Red and Whit...
Soups, Sauces, and Gravies
27015
Campbell Soup Co.
6618
CAMPBELL Soup Company, CAMPBELL's Red and Whit...
Soups, Sauces, and Gravies
27016
Campbell Soup Co.
6619
CAMPBELL Soup Company, CAMPBELL'S SELECT Soups...
Soups, Sauces, and Gravies
27021
Campbell Soup Co.
6620
CAMPBELL Soup Company, CAMPBELL'S SOUP AT HAND...
Soups, Sauces, and Gravies
27022
Campbell Soup Co.
6621
CAMPBELL Soup Company, CAMPBELL'S SOUP AT HAND...
Soups, Sauces, and Gravies
27023
Campbell Soup Co.
6622
CAMPBELL Soup Company, CAMPBELL'S SELECT Gold ...
Soups, Sauces, and Gravies
27024
Campbell Soup Co.
6623
CAMPBELL Soup Company, CAMPBELL'S SELECT Gold ...
Soups, Sauces, and Gravies
27025
Campbell Soup Co.
6624
CAMPBELL Soup Company, CAMPBELL'S SELECT Gold ...
Soups, Sauces, and Gravies
27026
Campbell Soup Co.
6625
CAMPBELL Soup Company, CAMPBELL'S Red and Whit...
Soups, Sauces, and Gravies
27032
Campbell Soup Co.
6626
CAMPBELL Soup Company, V8 Vegetable Juice, Ess...
Vegetables and Vegetable Products
31010
Campbell Soup Co.
6627
CAMPBELL Soup Company, V8 Vegetable Juice, Spi...
Vegetables and Vegetable Products
31013
Campbell Soup Co.
6628
CAMPBELL Soup Company, PACE, Jalapenos Nacho S...
Vegetables and Vegetable Products
31014
Campbell Soup Co.
6629
CAMPBELL Soup Company, V8 60% Vegetable Juice,...
Vegetables and Vegetable Products
31016
Campbell Soup Co.
6630
CAMPBELL Soup Company, V8 Vegetable Juice, Low...
Vegetables and Vegetable Products
31017
Campbell Soup Co.
6631
Bologna, beef, low fat
Sausages and Luncheon Meats
42161
6632
Turkey and pork sausage, fresh, bulk, patty or...
Sausages and Luncheon Meats
42173
6633
Babyfood, juice, pear
Baby Foods
43408
None
6634
Babyfood, dessert, banana yogurt, strained
Baby Foods
43539
None
6635
Babyfood, banana no tapioca, strained
Baby Foods
43546
None
6636 rows × 4 columns
In [44]:
pd.value_counts(info.group)[:10]
Out[44]:
Vegetables and Vegetable Products 812
Beef Products 618
Baked Products 496
Breakfast Cereals 403
Legumes and Legume Products 365
Fast Foods 365
Lamb, Veal, and Game Products 345
Sweets 341
Pork Products 328
Fruits and Fruit Juices 328
Name: group, dtype: int64
In [45]:
nutrients = []
for rec in db:
fnuts = DataFrame(rec['nutrients'])
fnuts['id'] = rec['id']
nutrients.append(fnuts)
nutrients = pd.concat(nutrients, ignore_index=True)
nutrients
Out[45]:
description
group
units
value
id
0
Protein
Composition
g
25.180
1008
1
Total lipid (fat)
Composition
g
29.200
1008
2
Carbohydrate, by difference
Composition
g
3.060
1008
3
Ash
Other
g
3.280
1008
4
Energy
Energy
kcal
376.000
1008
5
Water
Composition
g
39.280
1008
6
Energy
Energy
kJ
1573.000
1008
7
Fiber, total dietary
Composition
g
0.000
1008
8
Calcium, Ca
Elements
mg
673.000
1008
9
Iron, Fe
Elements
mg
0.640
1008
10
Magnesium, Mg
Elements
mg
22.000
1008
11
Phosphorus, P
Elements
mg
490.000
1008
12
Potassium, K
Elements
mg
93.000
1008
13
Sodium, Na
Elements
mg
690.000
1008
14
Zinc, Zn
Elements
mg
2.940
1008
15
Copper, Cu
Elements
mg
0.024
1008
16
Manganese, Mn
Elements
mg
0.021
1008
17
Selenium, Se
Elements
mcg
14.500
1008
18
Vitamin A, IU
Vitamins
IU
1054.000
1008
19
Retinol
Vitamins
mcg
262.000
1008
20
Vitamin A, RAE
Vitamins
mcg_RAE
271.000
1008
21
Vitamin C, total ascorbic acid
Vitamins
mg
0.000
1008
22
Thiamin
Vitamins
mg
0.031
1008
23
Riboflavin
Vitamins
mg
0.450
1008
24
Niacin
Vitamins
mg
0.180
1008
25
Pantothenic acid
Vitamins
mg
0.190
1008
26
Vitamin B-6
Vitamins
mg
0.074
1008
27
Folate, total
Vitamins
mcg
18.000
1008
28
Vitamin B-12
Vitamins
mcg
0.270
1008
29
Folic acid
Vitamins
mcg
0.000
1008
...
...
...
...
...
...
389325
Selenium, Se
Elements
mcg
1.100
43546
389326
Vitamin A, IU
Vitamins
IU
5.000
43546
389327
Retinol
Vitamins
mcg
0.000
43546
389328
Vitamin A, RAE
Vitamins
mcg_RAE
0.000
43546
389329
Carotene, beta
Vitamins
mcg
2.000
43546
389330
Carotene, alpha
Vitamins
mcg
2.000
43546
389331
Vitamin E (alpha-tocopherol)
Vitamins
mg
0.250
43546
389332
Vitamin D
Vitamins
IU
0.000
43546
389333
Vitamin D (D2 + D3)
Vitamins
mcg
0.000
43546
389334
Cryptoxanthin, beta
Vitamins
mcg
0.000
43546
389335
Lycopene
Vitamins
mcg
0.000
43546
389336
Lutein + zeaxanthin
Vitamins
mcg
20.000
43546
389337
Vitamin C, total ascorbic acid
Vitamins
mg
21.900
43546
389338
Thiamin
Vitamins
mg
0.020
43546
389339
Riboflavin
Vitamins
mg
0.060
43546
389340
Niacin
Vitamins
mg
0.540
43546
389341
Vitamin B-6
Vitamins
mg
0.260
43546
389342
Folate, total
Vitamins
mcg
17.000
43546
389343
Vitamin B-12
Vitamins
mcg
0.000
43546
389344
Choline, total
Vitamins
mg
4.100
43546
389345
Vitamin K (phylloquinone)
Vitamins
mcg
0.500
43546
389346
Folic acid
Vitamins
mcg
0.000
43546
389347
Folate, food
Vitamins
mcg
17.000
43546
389348
Folate, DFE
Vitamins
mcg_DFE
17.000
43546
389349
Vitamin E, added
Vitamins
mg
0.000
43546
389350
Vitamin B-12, added
Vitamins
mcg
0.000
43546
389351
Cholesterol
Other
mg
0.000
43546
389352
Fatty acids, total saturated
Other
g
0.072
43546
389353
Fatty acids, total monounsaturated
Other
g
0.028
43546
389354
Fatty acids, total polyunsaturated
Other
g
0.041
43546
389355 rows × 5 columns
In [46]:
nutrients.duplicated().sum()
Out[46]:
14179
In [47]:
nutrients = nutrients.drop_duplicates()
In [48]:
col_mapping = {'description' : 'food',
'group' : 'fgroup'}
info = info.rename(columns=col_mapping, copy=False)
info
Out[48]:
food
fgroup
id
manufacturer
0
Cheese, caraway
Dairy and Egg Products
1008
1
Cheese, cheddar
Dairy and Egg Products
1009
2
Cheese, edam
Dairy and Egg Products
1018
3
Cheese, feta
Dairy and Egg Products
1019
4
Cheese, mozzarella, part skim milk
Dairy and Egg Products
1028
5
Cheese, mozzarella, part skim milk, low moisture
Dairy and Egg Products
1029
6
Cheese, romano
Dairy and Egg Products
1038
7
Cheese, roquefort
Dairy and Egg Products
1039
8
Cheese spread, pasteurized process, american, ...
Dairy and Egg Products
1048
9
Cream, fluid, half and half
Dairy and Egg Products
1049
10
Sour dressing, non-butterfat, cultured, filled...
Dairy and Egg Products
1058
11
Milk, filled, fluid, with blend of hydrogenate...
Dairy and Egg Products
1059
12
Cream substitute, liquid, with lauric acid oil...
Dairy and Egg Products
1068
13
Cream substitute, powdered
Dairy and Egg Products
1069
14
Milk, producer, fluid, 3.7% milkfat
Dairy and Egg Products
1078
15
Milk, reduced fat, fluid, 2% milkfat, with add...
Dairy and Egg Products
1079
None
16
Milk, reduced fat, fluid, 2% milkfat, with add...
Dairy and Egg Products
1080
17
Milk, reduced fat, fluid, 2% milkfat, protein ...
Dairy and Egg Products
1081
18
Milk, lowfat, fluid, 1% milkfat, with added vi...
Dairy and Egg Products
1082
19
Milk, lowfat, fluid, 1% milkfat, with added no...
Dairy and Egg Products
1083
20
Milk, lowfat, fluid, 1% milkfat, protein forti...
Dairy and Egg Products
1084
21
Milk, nonfat, fluid, with added vitamin A and ...
Dairy and Egg Products
1085
22
Milk, nonfat, fluid, with added nonfat milk so...
Dairy and Egg Products
1086
23
Milk, nonfat, fluid, protein fortified, with a...
Dairy and Egg Products
1087
24
Milk, buttermilk, fluid, cultured, lowfat
Dairy and Egg Products
1088
25
Milk, low sodium, fluid
Dairy and Egg Products
1089
26
Milk, dry, whole, with added vitamin D
Dairy and Egg Products
1090
27
Milk, dry, nonfat, regular, without added vita...
Dairy and Egg Products
1091
28
Milk, dry, nonfat, instant, with added vitamin...
Dairy and Egg Products
1092
29
Milk, dry, nonfat, calcium reduced
Dairy and Egg Products
1093
...
...
...
...
...
6606
Beef, tenderloin, steak, separable lean only, ...
Beef Products
23628
6607
Beef, top sirloin, steak, separable lean only,...
Beef Products
23629
6608
Beef, short loin, top loin, steak, separable l...
Beef Products
23630
6609
Beef, chuck, arm pot roast, separable lean onl...
Beef Products
23631
6610
Beef, brisket, flat half, separable lean only,...
Beef Products
23632
6611
Beef, chuck, arm pot roast, separable lean onl...
Beef Products
23633
6612
Beef, brisket, flat half, separable lean only,...
Beef Products
23634
6613
Beef, round, eye of round, roast, separable le...
Beef Products
23635
6614
Beef, round, top round, steak, separable lean ...
Beef Products
23636
6615
Beef, round, bottom round, roast, separable le...
Beef Products
23637
6616
Beef, rib, small end (ribs 10-12), separable l...
Beef Products
23638
6617
CAMPBELL Soup Company, CAMPBELL'S Red and Whit...
Soups, Sauces, and Gravies
27015
Campbell Soup Co.
6618
CAMPBELL Soup Company, CAMPBELL's Red and Whit...
Soups, Sauces, and Gravies
27016
Campbell Soup Co.
6619
CAMPBELL Soup Company, CAMPBELL'S SELECT Soups...
Soups, Sauces, and Gravies
27021
Campbell Soup Co.
6620
CAMPBELL Soup Company, CAMPBELL'S SOUP AT HAND...
Soups, Sauces, and Gravies
27022
Campbell Soup Co.
6621
CAMPBELL Soup Company, CAMPBELL'S SOUP AT HAND...
Soups, Sauces, and Gravies
27023
Campbell Soup Co.
6622
CAMPBELL Soup Company, CAMPBELL'S SELECT Gold ...
Soups, Sauces, and Gravies
27024
Campbell Soup Co.
6623
CAMPBELL Soup Company, CAMPBELL'S SELECT Gold ...
Soups, Sauces, and Gravies
27025
Campbell Soup Co.
6624
CAMPBELL Soup Company, CAMPBELL'S SELECT Gold ...
Soups, Sauces, and Gravies
27026
Campbell Soup Co.
6625
CAMPBELL Soup Company, CAMPBELL'S Red and Whit...
Soups, Sauces, and Gravies
27032
Campbell Soup Co.
6626
CAMPBELL Soup Company, V8 Vegetable Juice, Ess...
Vegetables and Vegetable Products
31010
Campbell Soup Co.
6627
CAMPBELL Soup Company, V8 Vegetable Juice, Spi...
Vegetables and Vegetable Products
31013
Campbell Soup Co.
6628
CAMPBELL Soup Company, PACE, Jalapenos Nacho S...
Vegetables and Vegetable Products
31014
Campbell Soup Co.
6629
CAMPBELL Soup Company, V8 60% Vegetable Juice,...
Vegetables and Vegetable Products
31016
Campbell Soup Co.
6630
CAMPBELL Soup Company, V8 Vegetable Juice, Low...
Vegetables and Vegetable Products
31017
Campbell Soup Co.
6631
Bologna, beef, low fat
Sausages and Luncheon Meats
42161
6632
Turkey and pork sausage, fresh, bulk, patty or...
Sausages and Luncheon Meats
42173
6633
Babyfood, juice, pear
Baby Foods
43408
None
6634
Babyfood, dessert, banana yogurt, strained
Baby Foods
43539
None
6635
Babyfood, banana no tapioca, strained
Baby Foods
43546
None
6636 rows × 4 columns
In [49]:
col_mapping = {'description' : 'nutrient',
'group' : 'nutgroup'}
nutrients = nutrients.rename(columns=col_mapping, copy=False)
nutrients
Out[49]:
nutrient
nutgroup
units
value
id
0
Protein
Composition
g
25.180
1008
1
Total lipid (fat)
Composition
g
29.200
1008
2
Carbohydrate, by difference
Composition
g
3.060
1008
3
Ash
Other
g
3.280
1008
4
Energy
Energy
kcal
376.000
1008
5
Water
Composition
g
39.280
1008
6
Energy
Energy
kJ
1573.000
1008
7
Fiber, total dietary
Composition
g
0.000
1008
8
Calcium, Ca
Elements
mg
673.000
1008
9
Iron, Fe
Elements
mg
0.640
1008
10
Magnesium, Mg
Elements
mg
22.000
1008
11
Phosphorus, P
Elements
mg
490.000
1008
12
Potassium, K
Elements
mg
93.000
1008
13
Sodium, Na
Elements
mg
690.000
1008
14
Zinc, Zn
Elements
mg
2.940
1008
15
Copper, Cu
Elements
mg
0.024
1008
16
Manganese, Mn
Elements
mg
0.021
1008
17
Selenium, Se
Elements
mcg
14.500
1008
18
Vitamin A, IU
Vitamins
IU
1054.000
1008
19
Retinol
Vitamins
mcg
262.000
1008
20
Vitamin A, RAE
Vitamins
mcg_RAE
271.000
1008
21
Vitamin C, total ascorbic acid
Vitamins
mg
0.000
1008
22
Thiamin
Vitamins
mg
0.031
1008
23
Riboflavin
Vitamins
mg
0.450
1008
24
Niacin
Vitamins
mg
0.180
1008
25
Pantothenic acid
Vitamins
mg
0.190
1008
26
Vitamin B-6
Vitamins
mg
0.074
1008
27
Folate, total
Vitamins
mcg
18.000
1008
28
Vitamin B-12
Vitamins
mcg
0.270
1008
29
Folic acid
Vitamins
mcg
0.000
1008
...
...
...
...
...
...
389325
Selenium, Se
Elements
mcg
1.100
43546
389326
Vitamin A, IU
Vitamins
IU
5.000
43546
389327
Retinol
Vitamins
mcg
0.000
43546
389328
Vitamin A, RAE
Vitamins
mcg_RAE
0.000
43546
389329
Carotene, beta
Vitamins
mcg
2.000
43546
389330
Carotene, alpha
Vitamins
mcg
2.000
43546
389331
Vitamin E (alpha-tocopherol)
Vitamins
mg
0.250
43546
389332
Vitamin D
Vitamins
IU
0.000
43546
389333
Vitamin D (D2 + D3)
Vitamins
mcg
0.000
43546
389334
Cryptoxanthin, beta
Vitamins
mcg
0.000
43546
389335
Lycopene
Vitamins
mcg
0.000
43546
389336
Lutein + zeaxanthin
Vitamins
mcg
20.000
43546
389337
Vitamin C, total ascorbic acid
Vitamins
mg
21.900
43546
389338
Thiamin
Vitamins
mg
0.020
43546
389339
Riboflavin
Vitamins
mg
0.060
43546
389340
Niacin
Vitamins
mg
0.540
43546
389341
Vitamin B-6
Vitamins
mg
0.260
43546
389342
Folate, total
Vitamins
mcg
17.000
43546
389343
Vitamin B-12
Vitamins
mcg
0.000
43546
389344
Choline, total
Vitamins
mg
4.100
43546
389345
Vitamin K (phylloquinone)
Vitamins
mcg
0.500
43546
389346
Folic acid
Vitamins
mcg
0.000
43546
389347
Folate, food
Vitamins
mcg
17.000
43546
389348
Folate, DFE
Vitamins
mcg_DFE
17.000
43546
389349
Vitamin E, added
Vitamins
mg
0.000
43546
389350
Vitamin B-12, added
Vitamins
mcg
0.000
43546
389351
Cholesterol
Other
mg
0.000
43546
389352
Fatty acids, total saturated
Other
g
0.072
43546
389353
Fatty acids, total monounsaturated
Other
g
0.028
43546
389354
Fatty acids, total polyunsaturated
Other
g
0.041
43546
375176 rows × 5 columns
In [51]:
ndata = pd.merge(nutrients, info, on='id', how='outer')
ndata
Out[51]:
nutrient
nutgroup
units
value
id
food
fgroup
manufacturer
0
Protein
Composition
g
25.180
1008
Cheese, caraway
Dairy and Egg Products
1
Total lipid (fat)
Composition
g
29.200
1008
Cheese, caraway
Dairy and Egg Products
2
Carbohydrate, by difference
Composition
g
3.060
1008
Cheese, caraway
Dairy and Egg Products
3
Ash
Other
g
3.280
1008
Cheese, caraway
Dairy and Egg Products
4
Energy
Energy
kcal
376.000
1008
Cheese, caraway
Dairy and Egg Products
5
Water
Composition
g
39.280
1008
Cheese, caraway
Dairy and Egg Products
6
Energy
Energy
kJ
1573.000
1008
Cheese, caraway
Dairy and Egg Products
7
Fiber, total dietary
Composition
g
0.000
1008
Cheese, caraway
Dairy and Egg Products
8
Calcium, Ca
Elements
mg
673.000
1008
Cheese, caraway
Dairy and Egg Products
9
Iron, Fe
Elements
mg
0.640
1008
Cheese, caraway
Dairy and Egg Products
10
Magnesium, Mg
Elements
mg
22.000
1008
Cheese, caraway
Dairy and Egg Products
11
Phosphorus, P
Elements
mg
490.000
1008
Cheese, caraway
Dairy and Egg Products
12
Potassium, K
Elements
mg
93.000
1008
Cheese, caraway
Dairy and Egg Products
13
Sodium, Na
Elements
mg
690.000
1008
Cheese, caraway
Dairy and Egg Products
14
Zinc, Zn
Elements
mg
2.940
1008
Cheese, caraway
Dairy and Egg Products
15
Copper, Cu
Elements
mg
0.024
1008
Cheese, caraway
Dairy and Egg Products
16
Manganese, Mn
Elements
mg
0.021
1008
Cheese, caraway
Dairy and Egg Products
17
Selenium, Se
Elements
mcg
14.500
1008
Cheese, caraway
Dairy and Egg Products
18
Vitamin A, IU
Vitamins
IU
1054.000
1008
Cheese, caraway
Dairy and Egg Products
19
Retinol
Vitamins
mcg
262.000
1008
Cheese, caraway
Dairy and Egg Products
20
Vitamin A, RAE
Vitamins
mcg_RAE
271.000
1008
Cheese, caraway
Dairy and Egg Products
21
Vitamin C, total ascorbic acid
Vitamins
mg
0.000
1008
Cheese, caraway
Dairy and Egg Products
22
Thiamin
Vitamins
mg
0.031
1008
Cheese, caraway
Dairy and Egg Products
23
Riboflavin
Vitamins
mg
0.450
1008
Cheese, caraway
Dairy and Egg Products
24
Niacin
Vitamins
mg
0.180
1008
Cheese, caraway
Dairy and Egg Products
25
Pantothenic acid
Vitamins
mg
0.190
1008
Cheese, caraway
Dairy and Egg Products
26
Vitamin B-6
Vitamins
mg
0.074
1008
Cheese, caraway
Dairy and Egg Products
27
Folate, total
Vitamins
mcg
18.000
1008
Cheese, caraway
Dairy and Egg Products
28
Vitamin B-12
Vitamins
mcg
0.270
1008
Cheese, caraway
Dairy and Egg Products
29
Folic acid
Vitamins
mcg
0.000
1008
Cheese, caraway
Dairy and Egg Products
...
...
...
...
...
...
...
...
...
375146
Selenium, Se
Elements
mcg
1.100
43546
Babyfood, banana no tapioca, strained
Baby Foods
None
375147
Vitamin A, IU
Vitamins
IU
5.000
43546
Babyfood, banana no tapioca, strained
Baby Foods
None
375148
Retinol
Vitamins
mcg
0.000
43546
Babyfood, banana no tapioca, strained
Baby Foods
None
375149
Vitamin A, RAE
Vitamins
mcg_RAE
0.000
43546
Babyfood, banana no tapioca, strained
Baby Foods
None
375150
Carotene, beta
Vitamins
mcg
2.000
43546
Babyfood, banana no tapioca, strained
Baby Foods
None
375151
Carotene, alpha
Vitamins
mcg
2.000
43546
Babyfood, banana no tapioca, strained
Baby Foods
None
375152
Vitamin E (alpha-tocopherol)
Vitamins
mg
0.250
43546
Babyfood, banana no tapioca, strained
Baby Foods
None
375153
Vitamin D
Vitamins
IU
0.000
43546
Babyfood, banana no tapioca, strained
Baby Foods
None
375154
Vitamin D (D2 + D3)
Vitamins
mcg
0.000
43546
Babyfood, banana no tapioca, strained
Baby Foods
None
375155
Cryptoxanthin, beta
Vitamins
mcg
0.000
43546
Babyfood, banana no tapioca, strained
Baby Foods
None
375156
Lycopene
Vitamins
mcg
0.000
43546
Babyfood, banana no tapioca, strained
Baby Foods
None
375157
Lutein + zeaxanthin
Vitamins
mcg
20.000
43546
Babyfood, banana no tapioca, strained
Baby Foods
None
375158
Vitamin C, total ascorbic acid
Vitamins
mg
21.900
43546
Babyfood, banana no tapioca, strained
Baby Foods
None
375159
Thiamin
Vitamins
mg
0.020
43546
Babyfood, banana no tapioca, strained
Baby Foods
None
375160
Riboflavin
Vitamins
mg
0.060
43546
Babyfood, banana no tapioca, strained
Baby Foods
None
375161
Niacin
Vitamins
mg
0.540
43546
Babyfood, banana no tapioca, strained
Baby Foods
None
375162
Vitamin B-6
Vitamins
mg
0.260
43546
Babyfood, banana no tapioca, strained
Baby Foods
None
375163
Folate, total
Vitamins
mcg
17.000
43546
Babyfood, banana no tapioca, strained
Baby Foods
None
375164
Vitamin B-12
Vitamins
mcg
0.000
43546
Babyfood, banana no tapioca, strained
Baby Foods
None
375165
Choline, total
Vitamins
mg
4.100
43546
Babyfood, banana no tapioca, strained
Baby Foods
None
375166
Vitamin K (phylloquinone)
Vitamins
mcg
0.500
43546
Babyfood, banana no tapioca, strained
Baby Foods
None
375167
Folic acid
Vitamins
mcg
0.000
43546
Babyfood, banana no tapioca, strained
Baby Foods
None
375168
Folate, food
Vitamins
mcg
17.000
43546
Babyfood, banana no tapioca, strained
Baby Foods
None
375169
Folate, DFE
Vitamins
mcg_DFE
17.000
43546
Babyfood, banana no tapioca, strained
Baby Foods
None
375170
Vitamin E, added
Vitamins
mg
0.000
43546
Babyfood, banana no tapioca, strained
Baby Foods
None
375171
Vitamin B-12, added
Vitamins
mcg
0.000
43546
Babyfood, banana no tapioca, strained
Baby Foods
None
375172
Cholesterol
Other
mg
0.000
43546
Babyfood, banana no tapioca, strained
Baby Foods
None
375173
Fatty acids, total saturated
Other
g
0.072
43546
Babyfood, banana no tapioca, strained
Baby Foods
None
375174
Fatty acids, total monounsaturated
Other
g
0.028
43546
Babyfood, banana no tapioca, strained
Baby Foods
None
375175
Fatty acids, total polyunsaturated
Other
g
0.041
43546
Babyfood, banana no tapioca, strained
Baby Foods
None
375176 rows × 8 columns
In [52]:
ndata.ix[30000]
Out[52]:
nutrient Glycine
nutgroup Amino Acids
units g
value 0.04
id 6158
food Soup, tomato bisque, canned, condensed
fgroup Soups, Sauces, and Gravies
manufacturer
Name: 30000, dtype: object
In [55]:
%matplotlib inline
result = ndata.groupby(['nutrient', 'fgroup'])['value'].quantile(0.5)
result['Zinc, Zn'].sort_values().plot(kind='barh')
Out[55]:
<matplotlib.axes._subplots.AxesSubplot at 0x11f28cf28>
In [58]:
by_nutrient = ndata.groupby(['nutgroup', 'nutrient'])
get_maximum = lambda x: x.xs(x.value.idxmax())
get_minimum = lambda x: x.xs(x.value.idxmin())
max_foods = by_nutrient.apply(get_maximum)[['value', 'food']]
max_foods.food = max_foods.food.str[:50]
In [59]:
max_foods.ix['Amino Acids']['food']
Out[59]:
nutrient
Alanine Gelatins, dry powder, unsweetened
Arginine Seeds, sesame flour, low-fat
Aspartic acid Soy protein isolate
Cystine Seeds, cottonseed flour, low fat (glandless)
Glutamic acid Soy protein isolate
Glycine Gelatins, dry powder, unsweetened
Histidine Whale, beluga, meat, dried (Alaska Native)
Hydroxyproline KENTUCKY FRIED CHICKEN, Fried Chicken, ORIGINA...
Isoleucine Soy protein isolate, PROTEIN TECHNOLOGIES INTE...
Leucine Soy protein isolate, PROTEIN TECHNOLOGIES INTE...
Lysine Seal, bearded (Oogruk), meat, dried (Alaska Na...
Methionine Fish, cod, Atlantic, dried and salted
Phenylalanine Soy protein isolate, PROTEIN TECHNOLOGIES INTE...
Proline Gelatins, dry powder, unsweetened
Serine Soy protein isolate, PROTEIN TECHNOLOGIES INTE...
Threonine Soy protein isolate, PROTEIN TECHNOLOGIES INTE...
Tryptophan Sea lion, Steller, meat with fat (Alaska Native)
Tyrosine Soy protein isolate, PROTEIN TECHNOLOGIES INTE...
Valine Soy protein isolate, PROTEIN TECHNOLOGIES INTE...
Name: food, dtype: object
In [ ]:
Content source: ALEXKIRNAS/DataScience
Similar notebooks: