Processing the open food databse to extract a dataset to use for the visualization.


In [1]:
import pandas as pd
import numpy as np
import re
from scipy import sparse as sparse

# SK-learn libraries for feature extraction from text.
from sklearn.feature_extraction.text import *

data_dir = "/Users/seddont/Dropbox/Tom/MIDS/W209_work/Tom_project/"
code_dir = "/Users/seddont/Dropbox/Tom/MIDS/W209_work/w209finalproject/"

Working from the full database, because the usda_imports_filtered.csv file in the shared drive does not have brand information, which might be useful for displaying.


In [2]:
# Get sample of the full database to understand what columns we want
smp = pd.read_csv(data_dir+"en.openfoodfacts.org.products.csv", sep = "\t", nrows = 100)

In [3]:
for c in smp.columns:
    print(c)


code
url
creator
created_t
created_datetime
last_modified_t
last_modified_datetime
product_name
generic_name
quantity
packaging
packaging_tags
brands
brands_tags
categories
categories_tags
categories_en
origins
origins_tags
manufacturing_places
manufacturing_places_tags
labels
labels_tags
labels_en
emb_codes
emb_codes_tags
first_packaging_code_geo
cities
cities_tags
purchase_places
stores
countries
countries_tags
countries_en
ingredients_text
allergens
allergens_en
traces
traces_tags
traces_en
serving_size
no_nutriments
additives_n
additives
additives_tags
additives_en
ingredients_from_palm_oil_n
ingredients_from_palm_oil
ingredients_from_palm_oil_tags
ingredients_that_may_be_from_palm_oil_n
ingredients_that_may_be_from_palm_oil
ingredients_that_may_be_from_palm_oil_tags
nutrition_grade_uk
nutrition_grade_fr
pnns_groups_1
pnns_groups_2
states
states_tags
states_en
main_category
main_category_en
image_url
image_small_url
energy_100g
energy-from-fat_100g
fat_100g
saturated-fat_100g
-butyric-acid_100g
-caproic-acid_100g
-caprylic-acid_100g
-capric-acid_100g
-lauric-acid_100g
-myristic-acid_100g
-palmitic-acid_100g
-stearic-acid_100g
-arachidic-acid_100g
-behenic-acid_100g
-lignoceric-acid_100g
-cerotic-acid_100g
-montanic-acid_100g
-melissic-acid_100g
monounsaturated-fat_100g
polyunsaturated-fat_100g
omega-3-fat_100g
-alpha-linolenic-acid_100g
-eicosapentaenoic-acid_100g
-docosahexaenoic-acid_100g
omega-6-fat_100g
-linoleic-acid_100g
-arachidonic-acid_100g
-gamma-linolenic-acid_100g
-dihomo-gamma-linolenic-acid_100g
omega-9-fat_100g
-oleic-acid_100g
-elaidic-acid_100g
-gondoic-acid_100g
-mead-acid_100g
-erucic-acid_100g
-nervonic-acid_100g
trans-fat_100g
cholesterol_100g
carbohydrates_100g
sugars_100g
-sucrose_100g
-glucose_100g
-fructose_100g
-lactose_100g
-maltose_100g
-maltodextrins_100g
starch_100g
polyols_100g
fiber_100g
proteins_100g
casein_100g
serum-proteins_100g
nucleotides_100g
salt_100g
sodium_100g
alcohol_100g
vitamin-a_100g
beta-carotene_100g
vitamin-d_100g
vitamin-e_100g
vitamin-k_100g
vitamin-c_100g
vitamin-b1_100g
vitamin-b2_100g
vitamin-pp_100g
vitamin-b6_100g
vitamin-b9_100g
folates_100g
vitamin-b12_100g
biotin_100g
pantothenic-acid_100g
silica_100g
bicarbonate_100g
potassium_100g
chloride_100g
calcium_100g
phosphorus_100g
iron_100g
magnesium_100g
zinc_100g
copper_100g
manganese_100g
fluoride_100g
selenium_100g
chromium_100g
molybdenum_100g
iodine_100g
caffeine_100g
taurine_100g
ph_100g
fruits-vegetables-nuts_100g
collagen-meat-protein-ratio_100g
cocoa_100g
chlorophyl_100g
carbon-footprint_100g
nutrition-score-fr_100g
nutrition-score-uk_100g
glycemic-index_100g
water-hardness_100g

In [4]:
# Specify what columns we need for the visualization.  For speed purposes going to
# remove any we don't really need

wanted_cols = ['code', 'creator', 'product_name', 'brands', 'brands_tags', 'serving_size',
       'serving_size', 'energy_100g', 'fat_100g', 'cholesterol_100g',
       'carbohydrates_100g', 'sugars_100g', 'fiber_100g', 'proteins_100g', 'sodium_100g']

# Create a list of columns to drop to check it worked ok
drop_cols = [c for c in smp.columns if c not in wanted_cols]
print(drop_cols)


['url', 'created_t', 'created_datetime', 'last_modified_t', 'last_modified_datetime', 'generic_name', 'quantity', 'packaging', 'packaging_tags', 'categories', 'categories_tags', 'categories_en', 'origins', 'origins_tags', 'manufacturing_places', 'manufacturing_places_tags', 'labels', 'labels_tags', 'labels_en', 'emb_codes', 'emb_codes_tags', 'first_packaging_code_geo', 'cities', 'cities_tags', 'purchase_places', 'stores', 'countries', 'countries_tags', 'countries_en', 'ingredients_text', 'allergens', 'allergens_en', 'traces', 'traces_tags', 'traces_en', 'no_nutriments', 'additives_n', 'additives', 'additives_tags', 'additives_en', 'ingredients_from_palm_oil_n', 'ingredients_from_palm_oil', 'ingredients_from_palm_oil_tags', 'ingredients_that_may_be_from_palm_oil_n', 'ingredients_that_may_be_from_palm_oil', 'ingredients_that_may_be_from_palm_oil_tags', 'nutrition_grade_uk', 'nutrition_grade_fr', 'pnns_groups_1', 'pnns_groups_2', 'states', 'states_tags', 'states_en', 'main_category', 'main_category_en', 'image_url', 'image_small_url', 'energy-from-fat_100g', 'saturated-fat_100g', '-butyric-acid_100g', '-caproic-acid_100g', '-caprylic-acid_100g', '-capric-acid_100g', '-lauric-acid_100g', '-myristic-acid_100g', '-palmitic-acid_100g', '-stearic-acid_100g', '-arachidic-acid_100g', '-behenic-acid_100g', '-lignoceric-acid_100g', '-cerotic-acid_100g', '-montanic-acid_100g', '-melissic-acid_100g', 'monounsaturated-fat_100g', 'polyunsaturated-fat_100g', 'omega-3-fat_100g', '-alpha-linolenic-acid_100g', '-eicosapentaenoic-acid_100g', '-docosahexaenoic-acid_100g', 'omega-6-fat_100g', '-linoleic-acid_100g', '-arachidonic-acid_100g', '-gamma-linolenic-acid_100g', '-dihomo-gamma-linolenic-acid_100g', 'omega-9-fat_100g', '-oleic-acid_100g', '-elaidic-acid_100g', '-gondoic-acid_100g', '-mead-acid_100g', '-erucic-acid_100g', '-nervonic-acid_100g', 'trans-fat_100g', '-sucrose_100g', '-glucose_100g', '-fructose_100g', '-lactose_100g', '-maltose_100g', '-maltodextrins_100g', 'starch_100g', 'polyols_100g', 'casein_100g', 'serum-proteins_100g', 'nucleotides_100g', 'salt_100g', 'alcohol_100g', 'vitamin-a_100g', 'beta-carotene_100g', 'vitamin-d_100g', 'vitamin-e_100g', 'vitamin-k_100g', 'vitamin-c_100g', 'vitamin-b1_100g', 'vitamin-b2_100g', 'vitamin-pp_100g', 'vitamin-b6_100g', 'vitamin-b9_100g', 'folates_100g', 'vitamin-b12_100g', 'biotin_100g', 'pantothenic-acid_100g', 'silica_100g', 'bicarbonate_100g', 'potassium_100g', 'chloride_100g', 'calcium_100g', 'phosphorus_100g', 'iron_100g', 'magnesium_100g', 'zinc_100g', 'copper_100g', 'manganese_100g', 'fluoride_100g', 'selenium_100g', 'chromium_100g', 'molybdenum_100g', 'iodine_100g', 'caffeine_100g', 'taurine_100g', 'ph_100g', 'fruits-vegetables-nuts_100g', 'collagen-meat-protein-ratio_100g', 'cocoa_100g', 'chlorophyl_100g', 'carbon-footprint_100g', 'nutrition-score-fr_100g', 'nutrition-score-uk_100g', 'glycemic-index_100g', 'water-hardness_100g']

In [5]:
# Pull in full dataset, only the columns we want
df = pd.read_csv(data_dir+"en.openfoodfacts.org.products.csv", sep = "\t")


/Users/seddont/anaconda/lib/python3.5/site-packages/IPython/core/interactiveshell.py:2717: DtypeWarning: Columns (0,3,5,19,20,24,25,26,27,28,35,36,37,38,39,48) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)

In [6]:
# Drop unwanted columns
df.drop(drop_cols, axis = 1, inplace = True)

In [7]:
# Take a quick look
df


Out[7]:
code creator product_name brands brands_tags serving_size energy_100g fat_100g cholesterol_100g carbohydrates_100g sugars_100g fiber_100g proteins_100g sodium_100g
0 3087 openfoodfacts-contributors Farine de blé noir Ferme t'y R'nao ferme-t-y-r-nao NaN NaN NaN NaN NaN NaN NaN NaN NaN
1 4530 usda-ndb-import Banana Chips Sweetened (Whole) NaN NaN 28 g (1 ONZ) 2243.0 28.57 0.018 64.29 14.29 3.6 3.57 0.000000
2 4559 usda-ndb-import Peanuts Torn & Glasser torn-glasser 28 g (0.25 cup) 1941.0 17.86 0.000 60.71 17.86 7.1 17.86 0.250000
3 16087 usda-ndb-import Organic Salted Nut Mix Grizzlies grizzlies 28 g (0.25 cup) 2540.0 57.14 NaN 17.86 3.57 7.1 17.86 0.482000
4 16094 usda-ndb-import Organic Polenta Bob's Red Mill bob-s-red-mill 35 g (0.25 cup) 1552.0 1.43 NaN 77.14 NaN 5.7 8.57 NaN
5 16100 usda-ndb-import Breadshop Honey Gone Nuts Granola Unfi unfi 52 g (0.5 cup) 1933.0 18.27 NaN 63.46 11.54 7.7 13.46 NaN
6 16117 usda-ndb-import Organic Long Grain White Rice Lundberg lundberg 45 g (0.25 cup) 1490.0 NaN NaN 80.00 NaN NaN 8.89 NaN
7 16124 usda-ndb-import Organic Muesli Daddy's Muesli daddy-s-muesli 64 g (0.5 cup) 1833.0 18.75 NaN 57.81 15.62 9.4 14.06 0.055000
8 16193 usda-ndb-import Organic Dark Chocolate Minis Equal Exchange equal-exchange 40 g (40 g) 2406.0 37.50 NaN 55.00 42.50 7.5 5.00 NaN
9 16513 usda-ndb-import Organic Sunflower Oil Napa Valley Naturals napa-valley-naturals 14 g (1 Tbsp) 3586.0 100.00 NaN NaN NaN NaN NaN NaN
10 16612 usda-ndb-import Organic Adzuki Beans Unfi unfi 48 g (0.25 cup) 1393.0 1.04 NaN 62.50 NaN 12.5 22.92 NaN
11 16650 usda-ndb-import Organic Penne Pasta Gardentime gardentime 57 g (0.5 cup) 1540.0 1.75 NaN 73.68 NaN 1.8 14.04 NaN
12 16872 usda-ndb-import Zen Party Mix Sunridge sunridge 30 g (0.25 cup) 2230.0 36.67 NaN 36.67 3.33 6.7 16.67 0.633000
13 16933 usda-ndb-import Organic Golden Flax Seeds Unfi unfi 21 g (2 Tbsp) 2590.0 42.86 NaN 38.10 NaN 38.1 19.05 0.038000
14 17497 usda-ndb-import Organic Spicy Punks Eden eden 33 g (0.25 cup) 2536.0 48.48 NaN 15.15 NaN 15.2 30.30 0.227000
15 18012 usda-ndb-import Cinnamon Nut Granola Grizzlies grizzlies 55 g (0.5 cup) 1824.0 18.18 NaN 60.00 21.82 9.1 14.55 0.009000
16 18050 usda-ndb-import Organic Hazelnuts Grizzlies grizzlies 28 g (0.25 cup) 2632.0 60.71 NaN 17.86 3.57 10.7 14.29 0.004000
17 18173 usda-ndb-import Organic Sweetened Banana Chips Unfi unfi 30 g (0.25 cup) 2092.0 26.67 NaN 66.67 16.67 3.3 3.33 NaN
18 18197 usda-ndb-import Lotus Organic Brown Jasmine Rice Unfi unfi 45 g (0.25 cup) 1582.0 2.22 NaN 77.78 2.22 2.2 8.89 NaN
19 18227 usda-ndb-import Organic Oat Groats Pcc pcc 42 g (0.25 cup) 1096.0 5.95 NaN 66.67 2.38 9.5 16.67 0.010000
20 18265 usda-ndb-import Energy Power Mix Sunridge sunridge 40 g (0.25 cup) 1464.0 17.50 NaN 42.50 32.50 5.0 7.50 0.112000
21 18289 usda-ndb-import Antioxidant Mix - Berries & Chocolate Sunridge sunridge 30 g (0.25 cup) 2092.0 33.33 NaN 46.67 30.00 6.7 13.33 0.183000
22 18319 usda-ndb-import Organic Quinoa Coconut Granola With Mango Sunridge sunridge 55 g (0.5 cup) 1674.0 10.91 NaN 69.09 27.27 9.1 10.91 0.009000
23 18340 usda-ndb-import Fire Roasted Hatch Green Chile Almonds Sunridge sunridge 30 g (0.25 cup) 2372.0 50.00 NaN 23.33 6.67 10.0 6.67 0.500000
24 18357 usda-ndb-import Peanut Butter Power Chews Sunridge sunridge 45 g (45 GRM) 1954.0 20.00 NaN 60.00 33.33 4.4 8.89 0.078000
25 18371 usda-ndb-import Real Salt Granular Redmond redmond 1.4 g (0.25 tsp) NaN NaN NaN NaN NaN NaN NaN 37.857000
26 18388 usda-ndb-import Organic Unswt Berry Coconut Granola New England Naturals new-england-naturals 54 g (0.5 cup) 1548.0 22.22 NaN 57.41 5.56 9.3 12.96 0.111000
27 18395 usda-ndb-import Roasted Salted Black Pepper Cashews Sunridge sunridge 30 g (30 g) 2372.0 46.67 NaN 30.00 6.67 3.3 16.67 0.400000
28 18401 usda-ndb-import Thai Curry Roasted Cashews Sunridge sunridge 30 g (30 g) 2372.0 43.33 NaN 30.00 6.67 3.3 16.67 0.533000
29 18418 usda-ndb-import Wasabi Tamari Almonds Sunridge sunridge 30 g (30 g) 2230.0 46.67 NaN 23.33 3.33 10.0 20.00 0.800000
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
330420 9789961637609 openfoodfacts-contributors Tarifs djoghrafia Chihab chihab NaN NaN NaN NaN NaN NaN NaN NaN NaN
330421 9791463741241 scailyna Gingembre Terre exotique terre-exotique NaN NaN NaN NaN NaN NaN NaN NaN NaN
330422 9800120499 openfoodfacts-contributors Ferrero Rocher NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
330423 9800801107 tacite-mass-editor NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
330424 9800895007 tacite-mass-editor NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
330425 9800895250 tacite-mass-editor NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
330426 9802735 date-limite-app NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
330427 9820111458900 openfoodfacts-contributors Les Belles Tranches Bacon fumé Justin Bridou justin-bridou NaN NaN NaN NaN NaN NaN NaN NaN NaN
330428 9836654056565 usda-ndb-import Raspados Ice Bars Jarritos, The Jel Sert Company jarritos,the-jel-sert-company 57 g (57 g) 368.0 0.00 NaN 21.05 19.30 NaN 0.00 0.018000
330429 9847548283004 drmalabar Tartines craquantes bio au sarrasin Le Pain des fleurs le-pain-des-fleurs NaN 1643.0 2.80 NaN 74.80 2.60 5.9 13.00 0.267717
330430 98709870987 openfoodfacts-contributors NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
330431 9871311007 aleene Lager Carib carib NaN NaN NaN NaN NaN NaN NaN NaN NaN
330432 9876567898765 openfoodfacts-contributors nf test NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
330433 988639 tacite-mass-editor NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
330434 9900000000233 kiliweb Amandes Biosic biosic NaN 2406.0 NaN NaN NaN 3.89 12.2 21.22 0.039370
330435 9900010011557 jeanbono Miel de Camargue L. Boulaire l-boulaire NaN NaN NaN NaN NaN NaN NaN NaN NaN
330436 9901198001583 openfoodfacts-contributors Лимон — лайм Пеликан пеликан NaN NaN NaN NaN NaN NaN NaN NaN NaN
330437 9901942000794 gaspardbenoit Mleko wiejskie Piątnica piątnica NaN NaN NaN NaN NaN NaN NaN NaN NaN
330438 99044169 date-limite-app Poireaux NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
330439 9906410000009 agamitsudo Roussette du Bugey (2011) Roussette du Bugey roussette-du-bugey NaN NaN NaN NaN NaN NaN NaN NaN NaN
330440 9908278636246 andre Szprot w oleju roslinnym EvraFish evrafish NaN NaN NaN NaN NaN NaN NaN NaN NaN
330441 99111250 balooval Thé vert Earl grey Lobodis lobodis NaN 21.0 0.20 NaN 0.50 0.50 0.2 0.50 0.010000
330442 9918 woshilapin Cheese cake thé vert, yuzu NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
330443 9935010000003 sebleouf Rillette d'oie Sans marque,D.Lambert sans-marque,d-lambert NaN NaN NaN NaN NaN NaN NaN NaN NaN
330444 99410148 date-limite-app NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
330445 9948282780603 openfoodfacts-contributors Tomato & ricotta Panzani panzani NaN NaN NaN NaN NaN NaN NaN NaN NaN
330446 99567453 usda-ndb-import Mint Melange Tea A Blend Of Peppermint, Lemon ... Trader Joe's trader-joe-s 20 g (0.7 oz) 0.0 0.00 0.000 0.00 0.00 0.0 0.00 0.000000
330447 9970229501521 tomato 乐吧泡菜味薯片 乐吧 乐吧 NaN NaN NaN NaN NaN NaN NaN NaN NaN
330448 9980282863788 openfoodfacts-contributors Tomates aux Vermicelles Knorr knorr NaN NaN NaN NaN NaN NaN NaN NaN NaN
330449 999990026839 usda-ndb-import Sugar Free Drink Mix, Peach Tea Market Pantry market-pantry 1 g (0.5 PACKET) 2092.0 0.00 NaN 0.00 0.00 NaN 0.00 0.000000

330450 rows × 14 columns


In [8]:
# Drop all rows that are not from the usda ndb import
df = df[df.creator == "usda-ndb-import"]

# Drop all rows where Brands == Nan as we can't really identify those products
df = df[df.brands.notnull()]

In [9]:
df


Out[9]:
code creator product_name brands brands_tags serving_size energy_100g fat_100g cholesterol_100g carbohydrates_100g sugars_100g fiber_100g proteins_100g sodium_100g
2 4559 usda-ndb-import Peanuts Torn & Glasser torn-glasser 28 g (0.25 cup) 1941.0 17.86 0.000 60.71 17.86 7.1 17.86 0.250
3 16087 usda-ndb-import Organic Salted Nut Mix Grizzlies grizzlies 28 g (0.25 cup) 2540.0 57.14 NaN 17.86 3.57 7.1 17.86 0.482
4 16094 usda-ndb-import Organic Polenta Bob's Red Mill bob-s-red-mill 35 g (0.25 cup) 1552.0 1.43 NaN 77.14 NaN 5.7 8.57 NaN
5 16100 usda-ndb-import Breadshop Honey Gone Nuts Granola Unfi unfi 52 g (0.5 cup) 1933.0 18.27 NaN 63.46 11.54 7.7 13.46 NaN
6 16117 usda-ndb-import Organic Long Grain White Rice Lundberg lundberg 45 g (0.25 cup) 1490.0 NaN NaN 80.00 NaN NaN 8.89 NaN
7 16124 usda-ndb-import Organic Muesli Daddy's Muesli daddy-s-muesli 64 g (0.5 cup) 1833.0 18.75 NaN 57.81 15.62 9.4 14.06 0.055
8 16193 usda-ndb-import Organic Dark Chocolate Minis Equal Exchange equal-exchange 40 g (40 g) 2406.0 37.50 NaN 55.00 42.50 7.5 5.00 NaN
9 16513 usda-ndb-import Organic Sunflower Oil Napa Valley Naturals napa-valley-naturals 14 g (1 Tbsp) 3586.0 100.00 NaN NaN NaN NaN NaN NaN
10 16612 usda-ndb-import Organic Adzuki Beans Unfi unfi 48 g (0.25 cup) 1393.0 1.04 NaN 62.50 NaN 12.5 22.92 NaN
11 16650 usda-ndb-import Organic Penne Pasta Gardentime gardentime 57 g (0.5 cup) 1540.0 1.75 NaN 73.68 NaN 1.8 14.04 NaN
12 16872 usda-ndb-import Zen Party Mix Sunridge sunridge 30 g (0.25 cup) 2230.0 36.67 NaN 36.67 3.33 6.7 16.67 0.633
13 16933 usda-ndb-import Organic Golden Flax Seeds Unfi unfi 21 g (2 Tbsp) 2590.0 42.86 NaN 38.10 NaN 38.1 19.05 0.038
14 17497 usda-ndb-import Organic Spicy Punks Eden eden 33 g (0.25 cup) 2536.0 48.48 NaN 15.15 NaN 15.2 30.30 0.227
15 18012 usda-ndb-import Cinnamon Nut Granola Grizzlies grizzlies 55 g (0.5 cup) 1824.0 18.18 NaN 60.00 21.82 9.1 14.55 0.009
16 18050 usda-ndb-import Organic Hazelnuts Grizzlies grizzlies 28 g (0.25 cup) 2632.0 60.71 NaN 17.86 3.57 10.7 14.29 0.004
17 18173 usda-ndb-import Organic Sweetened Banana Chips Unfi unfi 30 g (0.25 cup) 2092.0 26.67 NaN 66.67 16.67 3.3 3.33 NaN
18 18197 usda-ndb-import Lotus Organic Brown Jasmine Rice Unfi unfi 45 g (0.25 cup) 1582.0 2.22 NaN 77.78 2.22 2.2 8.89 NaN
19 18227 usda-ndb-import Organic Oat Groats Pcc pcc 42 g (0.25 cup) 1096.0 5.95 NaN 66.67 2.38 9.5 16.67 0.010
20 18265 usda-ndb-import Energy Power Mix Sunridge sunridge 40 g (0.25 cup) 1464.0 17.50 NaN 42.50 32.50 5.0 7.50 0.112
21 18289 usda-ndb-import Antioxidant Mix - Berries & Chocolate Sunridge sunridge 30 g (0.25 cup) 2092.0 33.33 NaN 46.67 30.00 6.7 13.33 0.183
22 18319 usda-ndb-import Organic Quinoa Coconut Granola With Mango Sunridge sunridge 55 g (0.5 cup) 1674.0 10.91 NaN 69.09 27.27 9.1 10.91 0.009
23 18340 usda-ndb-import Fire Roasted Hatch Green Chile Almonds Sunridge sunridge 30 g (0.25 cup) 2372.0 50.00 NaN 23.33 6.67 10.0 6.67 0.500
24 18357 usda-ndb-import Peanut Butter Power Chews Sunridge sunridge 45 g (45 GRM) 1954.0 20.00 NaN 60.00 33.33 4.4 8.89 0.078
25 18371 usda-ndb-import Real Salt Granular Redmond redmond 1.4 g (0.25 tsp) NaN NaN NaN NaN NaN NaN NaN 37.857
26 18388 usda-ndb-import Organic Unswt Berry Coconut Granola New England Naturals new-england-naturals 54 g (0.5 cup) 1548.0 22.22 NaN 57.41 5.56 9.3 12.96 0.111
27 18395 usda-ndb-import Roasted Salted Black Pepper Cashews Sunridge sunridge 30 g (30 g) 2372.0 46.67 NaN 30.00 6.67 3.3 16.67 0.400
28 18401 usda-ndb-import Thai Curry Roasted Cashews Sunridge sunridge 30 g (30 g) 2372.0 43.33 NaN 30.00 6.67 3.3 16.67 0.533
29 18418 usda-ndb-import Wasabi Tamari Almonds Sunridge sunridge 30 g (30 g) 2230.0 46.67 NaN 23.33 3.33 10.0 20.00 0.800
30 18449 usda-ndb-import Organic Medium Shredded Coconut Unfi unfi 30 g (0.25 cup) 2230.0 30.00 NaN 53.33 6.67 6.7 6.67 NaN
31 18456 usda-ndb-import Organic Red Quinoa Pcc pcc 45 g (0.25 cup) 1490.0 6.67 NaN 64.44 11.11 8.9 13.33 0.004
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
330245 9556041600040 usda-ndb-import Sardines In Chili Oil Ayam Brand ayam-brand 56 g (2 oz) 1343.0 23.21 0.009 7.14 0.00 0.0 19.64 0.464
330246 9556041600057 usda-ndb-import Sardines In Kung Pao Sauce Ayam Brand ayam-brand 56 g (2 oz) 820.0 10.71 0.009 8.93 5.36 0.0 19.64 0.607
330248 9556041600255 usda-ndb-import Mackerel In Tomato Sauce Ayam Brand ayam-brand 56 g (2 oz) 820.0 14.29 0.054 1.79 1.79 1.8 12.50 0.268
330249 9556041600293 usda-ndb-import Sardines In Extra Virgin Olive Oil Ayam Brand ayam-brand 56 g (56 g) 1197.0 23.21 0.036 7.14 0.00 0.0 14.29 0.357
330250 9556041602143 usda-ndb-import Ayam, Satay Peanut Sauce Guinea Foods Sdn Bhd guinea-foods-sdn-bhd 70 g (2.5 oz) 1017.0 17.14 0.000 17.14 8.57 2.9 5.71 0.343
330251 9556041602211 usda-ndb-import Baked Beans In Tomato Sauce Ayam ayam 130 g (4.6 oz) 418.0 0.77 0.000 19.23 7.69 3.8 3.85 0.300
330258 9556041608947 usda-ndb-import Ayam, Malaysian Satay Sauce, Mild Guinea Foods Sdn Bhd guinea-foods-sdn-bhd 85 g (3 oz) 837.0 12.94 0.000 16.47 4.71 2.4 5.88 0.247
330259 9556041608954 usda-ndb-import Ayam, Medium Thai Satay Sauce, Lemongrass And ... Guinea Foods Sdn Bhd guinea-foods-sdn-bhd 85 g (3 oz) 937.0 11.76 0.000 21.18 9.41 3.5 7.06 0.282
330266 9556041610940 usda-ndb-import Coconut Spread (Kaya) Ayam ayam 18.5 g (2.6 oz) 1356.0 10.81 0.054 48.65 43.24 0.0 5.41 0.054
330273 9556041612593 usda-ndb-import Malaysian Curry Paste, Mild Ayam ayam 46 g (46 g) 820.0 13.04 0.000 17.39 6.52 4.3 2.17 1.630
330274 9556041612609 usda-ndb-import Curry Paste For Beef Rendang, Medium Ayam ayam 46 g (46 g) 820.0 13.04 0.000 19.57 8.70 4.3 2.17 1.609
330276 9556041620369 usda-ndb-import Sardines In Spicy Tomato Sauce, Chili And Lime Ayam Brand ayam-brand 56 g (56 g) 598.0 7.14 0.071 1.79 1.79 0.0 17.86 0.411
330284 9556107022601 usda-ndb-import Goldwilly, Original Instant Chrysanthemum Tea,... Hsc International Pte. Ltd. hsc-international-pte-ltd 18 g (18 g) 1628.0 0.00 0.000 100.00 22.22 0.0 0.00 0.000
330297 9556173386461 usda-ndb-import Chewy Candy Fruit Plus fruit-plus 12.5 g (0.45 oz) 1674.0 8.00 0.000 88.00 56.00 0.0 0.00 0.000
330311 9556390158162 usda-ndb-import Lee, Special Crackers Lee Biscuits (Pte.) Ltd. lee-biscuits-pte-ltd 22 g (22 g) 1979.0 18.18 0.009 68.18 13.64 4.5 9.09 0.400
330312 9556390178160 usda-ndb-import Sugar Crackers Lee Biscuits (Pte.) Ltd. lee-biscuits-pte-ltd 22 g (3 PCS) 1845.0 13.64 0.009 72.73 9.09 4.6 9.09 0.718
330331 956088003170 usda-ndb-import Blueberry Chia Buckwheat Muesli Seven Sundays, Seven Sundays Llc seven-sundays,seven-sundays-llc 57 g (0.5 cup) 1690.0 10.53 0.000 66.67 15.79 8.8 12.28 0.018
330332 95656335 usda-ndb-import Chipotle Peppers In Adobo Sauce La Costena la-costena 30 g (30 g) 280.0 0.00 0.000 16.67 6.67 10.0 0.00 1.267
330333 95659305 usda-ndb-import Dal Makhani Truly Indian truly-indian 150 g (150 g) 502.0 5.33 0.007 13.33 0.00 2.0 4.67 0.540
330345 9603636634635 usda-ndb-import Smoked Gouda Cheese Specially Selected specially-selected 28 g (1 oz) 1644.0 32.14 0.107 NaN NaN NaN 25.00 0.929
330355 96083338 usda-ndb-import Haywards, Baby Beetroot Mizkan Euro Ltd. mizkan-euro-ltd 100 g (100 g) 866.0 0.50 NaN 11.00 NaN 1.5 0.90 NaN
330363 96092613 usda-ndb-import Umami Pepper, Hot Taste #5 taste-5 100 g (100 g) 1243.0 4.80 NaN 35.70 5.30 NaN 16.00 NaN
330375 9644000330654 usda-ndb-import Balsamic Vinegar Of Modena Glaze Monari Fererzoni monari-fererzoni 18 g (1 Tbsp) 929.0 0.00 NaN 50.00 33.33 NaN 0.00 0.056
330398 9755635018555 usda-ndb-import Sour Licorice Twists, Sour Cherry Newman's Own newman-s-own 40 g (40 g) 1360.0 1.25 NaN 72.50 32.50 0.0 2.50 0.000
330399 9763556636543 usda-ndb-import Taco Seasoning Mix Mccormick & Company Inc. mccormick-company-inc 6 g (2 tsp) 1393.0 0.00 NaN 66.67 16.67 NaN 16.67 6.333
330406 9780803738782 usda-ndb-import Organic Z Bar Clif Kid clif-kid 36 g (1 BAR) 1393.0 9.72 0.000 61.11 30.56 8.3 5.56 0.375
330419 9787461062105 usda-ndb-import Natural Cassava Industria De Casabe Paul industria-de-casabe-paul 85 g (85 GRM) 1477.0 0.00 0.000 87.06 2.35 4.7 1.18 0.012
330428 9836654056565 usda-ndb-import Raspados Ice Bars Jarritos, The Jel Sert Company jarritos,the-jel-sert-company 57 g (57 g) 368.0 0.00 NaN 21.05 19.30 NaN 0.00 0.018
330446 99567453 usda-ndb-import Mint Melange Tea A Blend Of Peppermint, Lemon ... Trader Joe's trader-joe-s 20 g (0.7 oz) 0.0 0.00 0.000 0.00 0.00 0.0 0.00 0.000
330449 999990026839 usda-ndb-import Sugar Free Drink Mix, Peach Tea Market Pantry market-pantry 1 g (0.5 PACKET) 2092.0 0.00 NaN 0.00 0.00 NaN 0.00 0.000

169012 rows × 14 columns


In [ ]:

The code column can be problematic as it's a long number that can be truncated to a floating point representation when manipulated by certain programs. Going to convert it to append a character at the start to it will be read unambiguously as a string.

First convert the code column to a string


In [10]:
df.code.apply(str)


Out[10]:
2                  4559
3                 16087
4                 16094
5                 16100
6                 16117
7                 16124
8                 16193
9                 16513
10                16612
11                16650
12                16872
13                16933
14                17497
15                18012
16                18050
17                18173
18                18197
19                18227
20                18265
21                18289
22                18319
23                18340
24                18357
25                18371
26                18388
27                18395
28                18401
29                18418
30                18449
31                18456
              ...      
330245    9556041600040
330246    9556041600057
330248    9556041600255
330249    9556041600293
330250    9556041602143
330251    9556041602211
330258    9556041608947
330259    9556041608954
330266    9556041610940
330273    9556041612593
330274    9556041612609
330276    9556041620369
330284    9556107022601
330297    9556173386461
330311    9556390158162
330312    9556390178160
330331     956088003170
330332         95656335
330333         95659305
330345    9603636634635
330355         96083338
330363         96092613
330375    9644000330654
330398    9755635018555
330399    9763556636543
330406    9780803738782
330419    9787461062105
330428    9836654056565
330446         99567453
330449     999990026839
Name: code, dtype: object

In [11]:
# Add an N in front of the number string
df.code = "N"+df.code.astype(str)

Quick check for NA values


In [12]:
df.isnull().sum(axis = 0)


Out[12]:
code                      0
creator                   0
product_name             45
brands                    0
brands_tags               0
serving_size              2
energy_100g             396
fat_100g                399
cholesterol_100g      28029
carbohydrates_100g      273
sugars_100g           11314
fiber_100g            31282
proteins_100g           891
sodium_100g             790
dtype: int64

Need to get rid of rows with null product_name as we will be using that later for display etc.


In [13]:
df = df[df.product_name.notnull()]

Now going to want to try and find similar products to a specific item. In the demo, did this by just pulling items with certain text in the name and then doing some hand sorting. Can't scale that approach. Going to try to do it based on a combination of words in the name and the ingredients.

Try out the approach of using CountVectorizer on the product_name field to see how it does.


In [14]:
df


Out[14]:
code creator product_name brands brands_tags serving_size energy_100g fat_100g cholesterol_100g carbohydrates_100g sugars_100g fiber_100g proteins_100g sodium_100g
2 N4559 usda-ndb-import Peanuts Torn & Glasser torn-glasser 28 g (0.25 cup) 1941.0 17.86 0.000 60.71 17.86 7.1 17.86 0.250
3 N16087 usda-ndb-import Organic Salted Nut Mix Grizzlies grizzlies 28 g (0.25 cup) 2540.0 57.14 NaN 17.86 3.57 7.1 17.86 0.482
4 N16094 usda-ndb-import Organic Polenta Bob's Red Mill bob-s-red-mill 35 g (0.25 cup) 1552.0 1.43 NaN 77.14 NaN 5.7 8.57 NaN
5 N16100 usda-ndb-import Breadshop Honey Gone Nuts Granola Unfi unfi 52 g (0.5 cup) 1933.0 18.27 NaN 63.46 11.54 7.7 13.46 NaN
6 N16117 usda-ndb-import Organic Long Grain White Rice Lundberg lundberg 45 g (0.25 cup) 1490.0 NaN NaN 80.00 NaN NaN 8.89 NaN
7 N16124 usda-ndb-import Organic Muesli Daddy's Muesli daddy-s-muesli 64 g (0.5 cup) 1833.0 18.75 NaN 57.81 15.62 9.4 14.06 0.055
8 N16193 usda-ndb-import Organic Dark Chocolate Minis Equal Exchange equal-exchange 40 g (40 g) 2406.0 37.50 NaN 55.00 42.50 7.5 5.00 NaN
9 N16513 usda-ndb-import Organic Sunflower Oil Napa Valley Naturals napa-valley-naturals 14 g (1 Tbsp) 3586.0 100.00 NaN NaN NaN NaN NaN NaN
10 N16612 usda-ndb-import Organic Adzuki Beans Unfi unfi 48 g (0.25 cup) 1393.0 1.04 NaN 62.50 NaN 12.5 22.92 NaN
11 N16650 usda-ndb-import Organic Penne Pasta Gardentime gardentime 57 g (0.5 cup) 1540.0 1.75 NaN 73.68 NaN 1.8 14.04 NaN
12 N16872 usda-ndb-import Zen Party Mix Sunridge sunridge 30 g (0.25 cup) 2230.0 36.67 NaN 36.67 3.33 6.7 16.67 0.633
13 N16933 usda-ndb-import Organic Golden Flax Seeds Unfi unfi 21 g (2 Tbsp) 2590.0 42.86 NaN 38.10 NaN 38.1 19.05 0.038
14 N17497 usda-ndb-import Organic Spicy Punks Eden eden 33 g (0.25 cup) 2536.0 48.48 NaN 15.15 NaN 15.2 30.30 0.227
15 N18012 usda-ndb-import Cinnamon Nut Granola Grizzlies grizzlies 55 g (0.5 cup) 1824.0 18.18 NaN 60.00 21.82 9.1 14.55 0.009
16 N18050 usda-ndb-import Organic Hazelnuts Grizzlies grizzlies 28 g (0.25 cup) 2632.0 60.71 NaN 17.86 3.57 10.7 14.29 0.004
17 N18173 usda-ndb-import Organic Sweetened Banana Chips Unfi unfi 30 g (0.25 cup) 2092.0 26.67 NaN 66.67 16.67 3.3 3.33 NaN
18 N18197 usda-ndb-import Lotus Organic Brown Jasmine Rice Unfi unfi 45 g (0.25 cup) 1582.0 2.22 NaN 77.78 2.22 2.2 8.89 NaN
19 N18227 usda-ndb-import Organic Oat Groats Pcc pcc 42 g (0.25 cup) 1096.0 5.95 NaN 66.67 2.38 9.5 16.67 0.010
20 N18265 usda-ndb-import Energy Power Mix Sunridge sunridge 40 g (0.25 cup) 1464.0 17.50 NaN 42.50 32.50 5.0 7.50 0.112
21 N18289 usda-ndb-import Antioxidant Mix - Berries & Chocolate Sunridge sunridge 30 g (0.25 cup) 2092.0 33.33 NaN 46.67 30.00 6.7 13.33 0.183
22 N18319 usda-ndb-import Organic Quinoa Coconut Granola With Mango Sunridge sunridge 55 g (0.5 cup) 1674.0 10.91 NaN 69.09 27.27 9.1 10.91 0.009
23 N18340 usda-ndb-import Fire Roasted Hatch Green Chile Almonds Sunridge sunridge 30 g (0.25 cup) 2372.0 50.00 NaN 23.33 6.67 10.0 6.67 0.500
24 N18357 usda-ndb-import Peanut Butter Power Chews Sunridge sunridge 45 g (45 GRM) 1954.0 20.00 NaN 60.00 33.33 4.4 8.89 0.078
25 N18371 usda-ndb-import Real Salt Granular Redmond redmond 1.4 g (0.25 tsp) NaN NaN NaN NaN NaN NaN NaN 37.857
26 N18388 usda-ndb-import Organic Unswt Berry Coconut Granola New England Naturals new-england-naturals 54 g (0.5 cup) 1548.0 22.22 NaN 57.41 5.56 9.3 12.96 0.111
27 N18395 usda-ndb-import Roasted Salted Black Pepper Cashews Sunridge sunridge 30 g (30 g) 2372.0 46.67 NaN 30.00 6.67 3.3 16.67 0.400
28 N18401 usda-ndb-import Thai Curry Roasted Cashews Sunridge sunridge 30 g (30 g) 2372.0 43.33 NaN 30.00 6.67 3.3 16.67 0.533
29 N18418 usda-ndb-import Wasabi Tamari Almonds Sunridge sunridge 30 g (30 g) 2230.0 46.67 NaN 23.33 3.33 10.0 20.00 0.800
30 N18449 usda-ndb-import Organic Medium Shredded Coconut Unfi unfi 30 g (0.25 cup) 2230.0 30.00 NaN 53.33 6.67 6.7 6.67 NaN
31 N18456 usda-ndb-import Organic Red Quinoa Pcc pcc 45 g (0.25 cup) 1490.0 6.67 NaN 64.44 11.11 8.9 13.33 0.004
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
330245 N9556041600040 usda-ndb-import Sardines In Chili Oil Ayam Brand ayam-brand 56 g (2 oz) 1343.0 23.21 0.009 7.14 0.00 0.0 19.64 0.464
330246 N9556041600057 usda-ndb-import Sardines In Kung Pao Sauce Ayam Brand ayam-brand 56 g (2 oz) 820.0 10.71 0.009 8.93 5.36 0.0 19.64 0.607
330248 N9556041600255 usda-ndb-import Mackerel In Tomato Sauce Ayam Brand ayam-brand 56 g (2 oz) 820.0 14.29 0.054 1.79 1.79 1.8 12.50 0.268
330249 N9556041600293 usda-ndb-import Sardines In Extra Virgin Olive Oil Ayam Brand ayam-brand 56 g (56 g) 1197.0 23.21 0.036 7.14 0.00 0.0 14.29 0.357
330250 N9556041602143 usda-ndb-import Ayam, Satay Peanut Sauce Guinea Foods Sdn Bhd guinea-foods-sdn-bhd 70 g (2.5 oz) 1017.0 17.14 0.000 17.14 8.57 2.9 5.71 0.343
330251 N9556041602211 usda-ndb-import Baked Beans In Tomato Sauce Ayam ayam 130 g (4.6 oz) 418.0 0.77 0.000 19.23 7.69 3.8 3.85 0.300
330258 N9556041608947 usda-ndb-import Ayam, Malaysian Satay Sauce, Mild Guinea Foods Sdn Bhd guinea-foods-sdn-bhd 85 g (3 oz) 837.0 12.94 0.000 16.47 4.71 2.4 5.88 0.247
330259 N9556041608954 usda-ndb-import Ayam, Medium Thai Satay Sauce, Lemongrass And ... Guinea Foods Sdn Bhd guinea-foods-sdn-bhd 85 g (3 oz) 937.0 11.76 0.000 21.18 9.41 3.5 7.06 0.282
330266 N9556041610940 usda-ndb-import Coconut Spread (Kaya) Ayam ayam 18.5 g (2.6 oz) 1356.0 10.81 0.054 48.65 43.24 0.0 5.41 0.054
330273 N9556041612593 usda-ndb-import Malaysian Curry Paste, Mild Ayam ayam 46 g (46 g) 820.0 13.04 0.000 17.39 6.52 4.3 2.17 1.630
330274 N9556041612609 usda-ndb-import Curry Paste For Beef Rendang, Medium Ayam ayam 46 g (46 g) 820.0 13.04 0.000 19.57 8.70 4.3 2.17 1.609
330276 N9556041620369 usda-ndb-import Sardines In Spicy Tomato Sauce, Chili And Lime Ayam Brand ayam-brand 56 g (56 g) 598.0 7.14 0.071 1.79 1.79 0.0 17.86 0.411
330284 N9556107022601 usda-ndb-import Goldwilly, Original Instant Chrysanthemum Tea,... Hsc International Pte. Ltd. hsc-international-pte-ltd 18 g (18 g) 1628.0 0.00 0.000 100.00 22.22 0.0 0.00 0.000
330297 N9556173386461 usda-ndb-import Chewy Candy Fruit Plus fruit-plus 12.5 g (0.45 oz) 1674.0 8.00 0.000 88.00 56.00 0.0 0.00 0.000
330311 N9556390158162 usda-ndb-import Lee, Special Crackers Lee Biscuits (Pte.) Ltd. lee-biscuits-pte-ltd 22 g (22 g) 1979.0 18.18 0.009 68.18 13.64 4.5 9.09 0.400
330312 N9556390178160 usda-ndb-import Sugar Crackers Lee Biscuits (Pte.) Ltd. lee-biscuits-pte-ltd 22 g (3 PCS) 1845.0 13.64 0.009 72.73 9.09 4.6 9.09 0.718
330331 N956088003170 usda-ndb-import Blueberry Chia Buckwheat Muesli Seven Sundays, Seven Sundays Llc seven-sundays,seven-sundays-llc 57 g (0.5 cup) 1690.0 10.53 0.000 66.67 15.79 8.8 12.28 0.018
330332 N95656335 usda-ndb-import Chipotle Peppers In Adobo Sauce La Costena la-costena 30 g (30 g) 280.0 0.00 0.000 16.67 6.67 10.0 0.00 1.267
330333 N95659305 usda-ndb-import Dal Makhani Truly Indian truly-indian 150 g (150 g) 502.0 5.33 0.007 13.33 0.00 2.0 4.67 0.540
330345 N9603636634635 usda-ndb-import Smoked Gouda Cheese Specially Selected specially-selected 28 g (1 oz) 1644.0 32.14 0.107 NaN NaN NaN 25.00 0.929
330355 N96083338 usda-ndb-import Haywards, Baby Beetroot Mizkan Euro Ltd. mizkan-euro-ltd 100 g (100 g) 866.0 0.50 NaN 11.00 NaN 1.5 0.90 NaN
330363 N96092613 usda-ndb-import Umami Pepper, Hot Taste #5 taste-5 100 g (100 g) 1243.0 4.80 NaN 35.70 5.30 NaN 16.00 NaN
330375 N9644000330654 usda-ndb-import Balsamic Vinegar Of Modena Glaze Monari Fererzoni monari-fererzoni 18 g (1 Tbsp) 929.0 0.00 NaN 50.00 33.33 NaN 0.00 0.056
330398 N9755635018555 usda-ndb-import Sour Licorice Twists, Sour Cherry Newman's Own newman-s-own 40 g (40 g) 1360.0 1.25 NaN 72.50 32.50 0.0 2.50 0.000
330399 N9763556636543 usda-ndb-import Taco Seasoning Mix Mccormick & Company Inc. mccormick-company-inc 6 g (2 tsp) 1393.0 0.00 NaN 66.67 16.67 NaN 16.67 6.333
330406 N9780803738782 usda-ndb-import Organic Z Bar Clif Kid clif-kid 36 g (1 BAR) 1393.0 9.72 0.000 61.11 30.56 8.3 5.56 0.375
330419 N9787461062105 usda-ndb-import Natural Cassava Industria De Casabe Paul industria-de-casabe-paul 85 g (85 GRM) 1477.0 0.00 0.000 87.06 2.35 4.7 1.18 0.012
330428 N9836654056565 usda-ndb-import Raspados Ice Bars Jarritos, The Jel Sert Company jarritos,the-jel-sert-company 57 g (57 g) 368.0 0.00 NaN 21.05 19.30 NaN 0.00 0.018
330446 N99567453 usda-ndb-import Mint Melange Tea A Blend Of Peppermint, Lemon ... Trader Joe's trader-joe-s 20 g (0.7 oz) 0.0 0.00 0.000 0.00 0.00 0.0 0.00 0.000
330449 N999990026839 usda-ndb-import Sugar Free Drink Mix, Peach Tea Market Pantry market-pantry 1 g (0.5 PACKET) 2092.0 0.00 NaN 0.00 0.00 NaN 0.00 0.000

168967 rows × 14 columns


In [15]:
# Get all the non-null product name fields
name_data = df.product_name

# Pass them to the Count Vectorizer
vectorizer = CountVectorizer()
wv = vectorizer.fit_transform(name_data)

# Get some basic stats
print("Size of vocabulary:", wv.shape[1],"words")
print("Average non-zero entries per example:%5.2f" % (1.0*wv.nnz/wv.shape[0]))


Size of vocabulary: 19299 words
Average non-zero entries per example: 3.99

Get the extracted words


In [16]:
a = vectorizer.get_feature_names()
print("First feature string:", sorted(a)[0])
print("Last feature string:", sorted(a)[len(a)-1])


First feature string: 00
Last feature string: zymbom

Count occurences by word


In [17]:
fn = vectorizer.get_feature_names()
wc = wv.sum(axis = 0)
word_frame = pd.DataFrame({"word": fn, "count":np.ravel(wc)})

In [18]:
word_frame.sort_values(by = ["count"], ascending = False)


Out[18]:
count word
3523 10278 chocolate
3247 9972 cheese
12211 9052 organic
18888 7899 with
15012 7646 sauce
11097 6765 mix
4348 5740 cream
10976 4825 milk
8889 4200 juice
6662 4178 fruit
3468 3979 chips
3360 3933 chicken
4134 3774 cookies
2752 3722 candy
2510 3697 butter
18772 3606 whole
16899 3543 sweet
8355 3439 ice
16650 3406 style
19120 3399 yogurt
8426 3396 in
1405 3239 beans
18754 3185 white
14457 3139 roasted
11593 3115 natural
13527 3063 premium
12043 2978 oil
12236 2976 original
14331 2913 rice
18622 2882 water
... ... ...
9043 1 kasih
9045 1 kasmati
9046 1 kasondra
9047 1 kasseri
9049 1 kasza
9051 1 katchup
9054 1 kathleen
9021 1 karamah
9018 1 kapoya
9017 1 kapalua
9001 1 kama
8986 1 kalasan
8990 1 kaleifornia
8991 1 kalekopita
8992 1 kalenola
8994 1 kalinga
8995 1 kalmes
8996 1 kalocsa
8999 1 kalua
9002 1 kamaboko
9016 1 kanu
9003 1 kamoi
9006 1 kan
9007 1 kancho
9009 1 kandylas
9010 1 kane
9011 1 kaneli
9012 1 kani
9013 1 kanikama
19298 1 zymbom

19299 rows × 2 columns

Ok -- challenge here is that many of the words are descriptive adjectives vs nouns. Even the ones that are nouns are going to be hard to separate e.g. rice could be rice crackers, chicken and rice, etc.

Wonder if I can get anywhere with bigrams or trigrams...try again


In [19]:
# Get all the non-null product name fields
name_data = df.product_name[df.product_name.notnull()]

# Pass them to the Count Vectorizer
vectorizer = CountVectorizer(analyzer = "word", ngram_range = (3,3))
wv = vectorizer.fit_transform(name_data)

# Get some basic stats
print("Size of vocabulary:", wv.shape[1],"words")
print("Average non-zero entries per example:%5.2f" % (1.0*wv.nnz/wv.shape[0]))

fn = vectorizer.get_feature_names()
wc = wv.sum(axis = 0)

word_frame = pd.DataFrame({"word": fn, "count":np.ravel(wc)}) 

word_frame.sort_values(by = ["count"], ascending = False)


Size of vocabulary: 192887 words
Average non-zero entries per example: 2.09
Out[19]:
count word
182900 810 virgin olive oil
60742 807 extra virgin olive
59799 750 enriched macaroni product
134808 558 premium ice cream
152804 367 sharp cheddar cheese
79494 358 greek nonfat yogurt
42253 303 cooked potato chips
160718 296 sparkling water beverage
88910 280 in light syrup
142163 268 reduced fat milk
178619 250 tuna in water
88866 242 in heavy syrup
94633 240 kettle cooked potato
45911 222 cream cheese spread
50406 221 cut green beans
189478 216 with other natural
110324 208 monterey jack cheese
34208 208 chocolate chip cookies
93176 200 juice from concentrate
47205 198 creamy peanut butter
141617 193 red kidney beans
189934 185 with sea salt
98665 180 light ice cream
62518 178 fat free milk
35265 175 chocolate peanut butter
70329 174 frosted sugar cookies
36563 171 chunk light tuna
106539 171 mild cheddar cheese
98847 165 light tuna in
137309 165 pudding pie filling
... ... ...
72497 1 fully cooked smoke
72499 1 fully cooked tequila
72500 1 fully cooked teriyaki
72501 1 fully cooked traditional
72504 1 fully cooked white
72505 1 fully cooked whole
72506 1 fully cooked wieners
72507 1 fully cooked wild
72508 1 fully cooked with
72481 1 fully cooked mini
72480 1 fully cooked mesquite
72479 1 fully cooked menudo
72466 1 fully cooked duck
72455 1 fully cooked black
72457 1 fully cooked bratwurst
72459 1 fully cooked burgers
72461 1 fully cooked cheese
72463 1 fully cooked chorizo
72464 1 fully cooked coated
72465 1 fully cooked cured
72467 1 fully cooked frozen
72477 1 fully cooked meat
72469 1 fully cooked ham
72470 1 fully cooked hatch
72471 1 fully cooked heat
72473 1 fully cooked kicking
72474 1 fully cooked kneidlach
72475 1 fully cooked liverwurst
72476 1 fully cooked maple
192886 1 zymbom energy drink

192887 rows × 2 columns

So some kind of bigram and trigram approach might be scalable here. But don't really have a lot of time to perfect that. However, could use these trigrams to expand the simple 'demo cat' approach by picking only certain bigrams and then using the simple algorithm from the demo approach to find a range of other products with that bigram.

Going to write out the trigrams, do some hand coding of which ones we want to use, then bring the results back in


In [20]:
word_frame.to_csv(data_dir+"trigrams_uncoded.csv")

In [81]:
tri_w = pd.read_csv(code_dir+"trigrams_wanted_v2.csv")

In [82]:
tri_w


Out[82]:
count word wanted
0 811 virgin olive oil NaN
1 808 extra virgin olive NaN
2 751 enriched macaroni product NaN
3 558 premium ice cream 1.0
4 369 sharp cheddar cheese NaN
5 358 greek nonfat yogurt 1.0
6 303 cooked potato chips 1.0
7 296 sparkling water beverage NaN
8 285 in light syrup NaN
9 268 reduced fat milk NaN
10 250 tuna in water NaN
11 244 in heavy syrup NaN
12 240 kettle cooked potato NaN
13 224 cut green beans NaN
14 223 cream cheese spread 1.0
15 216 with other natural NaN
16 209 chocolate chip cookies NaN
17 209 monterey jack cheese NaN
18 204 creamy peanut butter NaN
19 200 juice from concentrate NaN
20 197 red kidney beans 1.0
21 185 with sea salt NaN
22 180 light ice cream NaN
23 178 fat free milk NaN
24 175 chocolate peanut butter NaN
25 174 frosted sugar cookies 1.0
26 173 chunk light tuna 1.0
27 173 mild cheddar cheese NaN
28 166 light tuna in NaN
29 165 pudding pie filling NaN
... ... ... ...
969 22 corn on the NaN
970 22 country fresh premium NaN
971 22 craisins dried cranberries NaN
972 22 cream ice cream NaN
973 22 dark chocolate cherry NaN
974 22 decorated shortbread cookie NaN
975 22 diet iced tea NaN
976 22 energy bar chocolate NaN
977 22 flavored juice blend NaN
978 22 frozen greek yogurt NaN
979 22 fruit grain cereal NaN
980 22 fruit yogurt smoothie NaN
981 22 garden of eatin NaN
982 22 garlic pasta sauce NaN
983 22 garlic sea salt NaN
984 22 gelatin dessert lime NaN
985 22 grape soda grape NaN
986 22 greek low fat NaN
987 22 honey mustard onion NaN
988 22 honey nut toasted NaN
989 22 ice cream banana NaN
990 22 iced oatmeal cookies NaN
991 22 in tomato and NaN
992 22 instant lunch ramen NaN
993 22 italian waffle cookie NaN
994 22 lime soda lemon NaN
995 22 milk chocolate almonds NaN
996 22 milk chocolate bars NaN
997 22 movie theater butter NaN
998 22 naturally flavored sparkling NaN

999 rows × 3 columns

Can drop all the rows where wanted is NaN


In [83]:
tri_w = tri_w[tri_w.wanted.notnull()]
tri_w


Out[83]:
count word wanted
3 558 premium ice cream 1.0
5 358 greek nonfat yogurt 1.0
6 303 cooked potato chips 1.0
14 223 cream cheese spread 1.0
20 197 red kidney beans 1.0
25 174 frosted sugar cookies 1.0
26 173 chunk light tuna 1.0
39 144 roasted turkey breast 1.0
40 142 organic baby food 1.0
42 136 chocolate chip cookie 1.0
43 135 whole wheat bread 1.0
45 129 hot dog buns 1.0
47 126 pureed baby food 1.0
49 122 ice cream sandwiches 1.0
50 121 cookies chocolate chip 1.0
52 120 ice cream bars 1.0
55 116 crunchy peanut butter 1.0
57 116 pancake waffle mix 1.0
60 113 chewy granola bars 1.0
61 111 dry roasted peanuts 1.0
67 108 dairy frozen dessert 1.0
73 102 thin crust pizza 1.0
74 101 caffeine free soda 1.0
75 101 macaroni cheese dinner 1.0
83 98 iced tea mix 1.0
85 98 vegetable oil spread 1.0
91 95 chili with beans 1.0
96 93 whole kernel corn 1.0
97 92 chicken noodle soup 1.0
98 92 peanut butter cups 1.0
... ... ... ...
454 36 white corn tortilla 1.0
461 35 corn puff cereal 1.0
467 35 organic black beans 1.0
499 33 beans in tomato 1.0
510 33 frosted shredded wheat 1.0
527 33 sweet potato chips 1.0
551 32 puffed wheat cereal 1.0
555 32 style potato chips 1.0
564 31 black bean soup 1.0
567 31 classic potato chips 1.0
576 31 ice cream sandwich 1.0
582 31 pico de gallo 1.0
597 30 beef ravioli in 1.0
600 30 chunk white albacore 1.0
607 30 green split peas 1.0
630 30 sloppy joe sauce 1.0
642 29 chicken breast nuggets 1.0
643 29 chocolate covered pretzels 1.0
645 29 corned beef hash 1.0
654 29 hot italian sausage 1.0
689 28 noodles with vegetables 1.0
698 28 soft baked cookies 1.0
702 28 sweet italian sausage 1.0
707 28 tomato sauce with 1.0
710 28 yellow corn tortilla 1.0
715 27 chicken gravy mix 1.0
716 27 chocolate chunk cookies 1.0
724 27 instant mashed potatoes 1.0
743 27 tomato basil soup 1.0
758 26 instant hot cereal 1.0

123 rows × 3 columns

So now got an identifier to use to append wanted categories from

Want to append the trigram as the 'demo category' for any product which contains that trigram. It is possible that a product will fit into multiple trigrams, in which case I will choose to put it in the lowest total count trigram category. Going to do this by looping through the wanted trigrams from highest to lowest and updating the product's category as needed.


In [84]:
# Initialize demo category with None
df["demo_cat"] = "None"

# loop over each trigram
for trigram in tri_w.word:
 
    # get the index of the correct column for that trigram in the vectorized output
    wv_index = fn.index(trigram)
    
    # Get locations of matches and convert to a dense representation for indexing
    matches = wv[:,wv_index] == 1    
    matches = np.ravel(sparse.csr_matrix.todense(matches))
      
    # Set the 'demo_cat' field to that trigram value
    df.loc[matches, ["demo_cat"]] = trigram


/Users/seddont/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
/Users/seddont/anaconda/lib/python3.5/site-packages/pandas/core/indexing.py:477: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s

In [85]:
df[df.demo_cat != "None"]


Out[85]:
code creator product_name brands brands_tags serving_size energy_100g fat_100g cholesterol_100g carbohydrates_100g sugars_100g fiber_100g proteins_100g sodium_100g demo_cat
39 N18708 usda-ndb-import Organic Black Beans Unfi unfi 45 g (0.25 cup) 1393.0 NaN NaN 62.22 2.22 24.4 22.22 NaN organic black beans
51 N30557 usda-ndb-import Organic Green Split Peas Coombs Family coombs-family 45 g (0.25 cup) 1582.0 1.11 NaN 68.89 NaN 28.9 28.89 0.022 green split peas
130 N38713 usda-ndb-import Organic Steel-Cut Oats Unfi unfi 40 g (0.25 cup) 1778.0 7.50 NaN 72.50 NaN 12.5 17.50 NaN steel cut oats
139 N39536 usda-ndb-import Organic Short Grain Brown Rice Lundberg lundberg 51 g (0.25 cup) 1393.0 2.94 NaN 78.43 NaN 5.9 5.88 NaN grain brown rice
292 N433821494 usda-ndb-import Oatmeal Raisin Cookies Bart & Judy's bart-judy-s 28 g (6 COOKIES) 1552.0 21.43 0.036 67.86 28.57 3.6 7.14 0.214 oatmeal raisin cookies
417 N901000017 usda-ndb-import Yellow Corn Tortilla Chips Guiltless Gourmet guiltless-gourmet 28 g (1 oz) 1795.0 10.71 0.000 78.57 0.00 7.1 7.14 0.643 yellow corn tortilla
483 N16056 usda-ndb-import Whole Kernel Corn Trader Joe's trader-joe-s 125 g (0.5 cup) 201.0 1.20 0.000 7.20 5.60 1.6 1.60 0.160 whole kernel corn
671 N70662455029 usda-ndb-import Top Ramen Noodle Soup Nissin nissin 42 g (0.5 DRY NOODLE BLOCK AND 1 TSP SEASONING... 1891.0 16.67 0.000 64.29 0.00 4.8 11.90 1.810 ramen noodle soup
718 N79969 usda-ndb-import Dry Roasted & Salted Almonds Trader Joe's trader-joe-s 30 g (0.25 cup) 2372.0 50.00 0.000 16.67 3.33 13.3 23.33 0.383 roasted salted almonds
720 N8001000661 usda-ndb-import Cool Beans, Red Pepper Hummus Bobbi's Best Inc. bobbi-s-best-inc 28 g (2 Tbsp) 895.0 21.43 0.000 10.71 0.00 3.6 3.57 0.375 red pepper hummus
893 N8725247052 usda-ndb-import Dark Chocolate Almonds Long Grove Confectionery Co. long-grove-confectionery-co 37 g (5 cup) 2264.0 40.54 0.014 45.95 32.43 8.1 10.81 0.000 dark chocolate almonds
959 N9300000765 usda-ndb-import Kosher Dill Spears Mt. Olive Pickle Company Inc. mt-olive-pickle-company-inc 28 g (1 oz) 75.0 0.00 NaN 3.57 NaN NaN 0.00 0.929 kosher dill spears
961 N9300000802 usda-ndb-import Kosher Dill Spears Mt. Olive mt-olive 28 g (1 oz) 75.0 0.00 NaN 3.57 NaN NaN 0.00 0.929 kosher dill spears
965 N9300000888 usda-ndb-import Kosher Dill Spears Mt. Olive mt-olive 28 g (1 oz) 75.0 0.00 NaN 3.57 NaN NaN 0.00 0.929 kosher dill spears
997 N9300003506 usda-ndb-import Kosher Dill Spears Pickles Mt. Olive, Mount Olive Pickle Company Inc. mt-olive,mount-olive-pickle-company-inc 28 g (1 SPEAR | ABOUT) 75.0 0.00 NaN 3.57 NaN NaN 0.00 0.714 kosher dill spears
1022 N9300006507 usda-ndb-import Kosher Dill Spears Made With Sea Salt Mt. Olive mt-olive 28 g (1 oz) 75.0 0.00 NaN 3.57 NaN NaN 0.00 0.929 kosher dill spears
1030 N9300006811 usda-ndb-import Simply Pickles, Kosher Dill Spears Mt. Olive mt-olive 28 g (1 oz) 75.0 0.00 NaN 3.57 NaN NaN 0.00 0.857 kosher dill spears
1366 N98601 usda-ndb-import Chicken Noodle Soup Trader Joe's trader-joe-s 248 g (1 cup) 151.0 0.40 0.008 5.65 0.40 0.4 2.42 0.294 chicken noodle soup
1552 N10374168084 usda-ndb-import The Father's Table, Peanut Butter Cup Cheesecake The Father's Table Llc the-father-s-table-llc 113 g (1 SLICE) 1665.0 23.01 0.040 38.05 25.66 1.8 5.31 0.301 peanut butter cup
1573 N10374727014 usda-ndb-import The Father's Table, Red Velvet Cake Roll The Father's Table Llc the-father-s-table-llc 113 g (113 GRM) 1481.0 13.27 0.075 54.87 38.94 0.9 4.42 0.248 red velvet cake
1602 N10449777227 usda-ndb-import Flavored Popcorn, White Cheddar Rocky Mountain Popcorn rocky-mountain-popcorn 28 g (2 cup) 2389.0 39.29 0.018 42.86 7.14 7.1 10.71 0.929 popcorn white cheddar
1746 N11110002532 usda-ndb-import Psst..., Enriched Hot Dog Buns Psst... psst 39 g (39 g) 1180.0 3.85 0.000 53.85 7.69 2.6 7.69 0.487 hot dog buns
1750 N11110002853 usda-ndb-import Hot Dog Buns Psst psst 43 g (1 BUN) 1167.0 3.49 0.000 53.49 6.98 2.3 9.30 0.488 hot dog buns
1771 N11110004666 usda-ndb-import Ice Cream, Chocolate Chip Cookie Dough Private Selection private-selection 84 g (0.5 cup) 1146.0 14.29 0.048 30.95 23.81 0.0 4.76 0.089 chocolate chip cookie
1859 N11110015907 usda-ndb-import Chipmates, Chunky Chocolate Chip Cookies, Pean... Kroger kroger 26 g (26 g) 1933.0 26.92 0.000 57.69 26.92 0.0 7.69 0.288 cookies peanut butter
1865 N11110016195 usda-ndb-import Crunchy Peanut Butter Kroger kroger 32 g (2 Tbsp) 2351.0 46.88 0.000 28.12 12.50 6.2 21.88 0.469 crunchy peanut butter
1867 N11110016508 usda-ndb-import Crunchy Peanut Butter Kroger kroger 32 g (2 Tbsp) 2485.0 50.00 0.000 21.88 3.12 9.4 25.00 0.391 crunchy peanut butter
1889 N11110017734 usda-ndb-import Ice Cream Sandwiches Kroger kroger 74 g (1 SANDWICH) 1188.0 10.81 0.027 41.89 21.62 1.4 4.05 0.203 ice cream sandwiches
1910 N11110018649 usda-ndb-import P$$T..., Sandwich Cookies, Peanut Butter P$$T... p-t 33 g (33 g) 2029.0 18.18 0.000 72.73 33.33 3.0 6.06 0.303 cookies peanut butter
1927 N11110019073 usda-ndb-import Crunchy Peanut Butter Fred Meyer fred-meyer 32 g (2 Tbsp) 2485.0 50.00 0.000 21.88 3.12 9.4 25.00 0.391 crunchy peanut butter
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
293542 N7411000500347 usda-ndb-import Refried Black Beans San Francisco san-francisco 113 g (4 oz) 854.0 9.73 0.000 17.70 0.88 7.1 7.96 0.412 refried black beans
293560 N74164300819 usda-ndb-import Premium Ice Cream Lowes Foods lowes-foods 67 g (0.5 cup) 874.0 10.45 0.045 26.87 23.88 0.0 2.99 0.075 premium ice cream
293561 N74164300820 usda-ndb-import Premium Ice Cream Lowes Foods lowes-foods 67 g (0.5 cup) 874.0 10.45 0.037 28.36 25.37 0.0 2.99 0.075 premium ice cream
293575 N74164303635 usda-ndb-import Traditional Pasta Sauce Lowes Foods lowes-foods 127 g (0.5 cup) 230.0 0.79 0.000 9.45 4.72 2.4 1.57 0.386 traditional pasta sauce
293817 N7501088109980 usda-ndb-import Pico De Gallo Seasoning Mix Terana terana 0.5 g (0.25 tsp) 0.0 0.00 NaN 0.00 NaN NaN 0.00 0.000 pico de gallo
294045 N760712090012 usda-ndb-import Boylan Bottling Co, Cane Sugar Soda, Root Beer Grip Inc. grip-inc 355 ml (1 BOTTLE) 201.0 0.00 0.000 11.83 11.83 0.0 0.00 0.010 soda root beer
313056 N8018759000198 usda-ndb-import Tomato Sauce With Basil Laselva laselva 124 g (0.5 cup) 117.0 0.00 0.000 4.84 3.23 1.6 0.81 0.016 tomato sauce with
314068 N8056457280368 usda-ndb-import Niasca Portofino, Tomato Sauce With Basil Niasca Portofino S.R.L. niasca-portofino-s-r-l 61 g (0.25 cup) 96.0 0.00 0.000 2.95 2.95 2.1 1.31 0.020 tomato sauce with
314147 N806800 usda-ndb-import Chunk White Albacore Tuna In Water Starkist Selects starkist-selects 56 g (2 oz) 448.0 0.89 0.045 0.00 NaN 0.0 25.00 0.062 chunk white albacore
315418 N8410111005573 usda-ndb-import Pasta Salad With Chunk Light Tuna Isabel isabel 85 g (3 oz) 887.0 15.29 0.028 10.59 2.35 1.2 8.24 0.388 chunk light tuna
315419 N8410111005580 usda-ndb-import Isabel, Vegetable Medley With Chunk Light Tuna Isabela North America Inc isabela-north-america-inc 85 g (3 oz) 402.0 4.71 0.020 7.06 4.71 2.4 4.71 0.329 chunk light tuna
315421 N8410111005627 usda-ndb-import Pasta Salad With Chunk Light Tuna Isabel isabel 85 g (3 oz) 887.0 15.29 0.028 10.59 2.35 1.2 8.24 0.388 chunk light tuna
315426 N8410111006785 usda-ndb-import Solid White Albacore Tuna In Olive Oil Isabel isabel 76 g (1 CAN DRAINED) 824.0 10.53 0.032 0.00 0.00 0.0 27.63 0.553 albacore tuna in
315432 N8410111633301 usda-ndb-import Sardines In Tomato Sauce Isabel isabel 56 g (2 ONZ) 649.0 8.93 0.080 1.79 1.79 0.0 16.07 0.500 sardines in tomato
315437 N8410111903800 usda-ndb-import Vegetable Medley With Chunk Light Tuna Isabel isabel 85 g (3 ONZ) 402.0 4.71 0.020 0.00 4.71 2.4 4.71 0.329 chunk light tuna
318211 N8426920635009 usda-ndb-import Sardines In Tomato Sauce Isabel isabel 80 g (0.5 PIECE) 649.0 8.75 0.056 2.50 2.50 0.0 16.25 0.350 sardines in tomato
321354 N84873700722 usda-ndb-import Organic Thousand Island Dressing Wild Oats wild-oats 30 ml (2 Tbsp) 975.0 20.00 0.033 10.00 6.67 3.3 6.67 0.733 thousand island dressing
321466 N853186066119 usda-ndb-import Steel Cut Meals, Steel Cut Oats For Dinner, Ch... Grainful grainful 283 g (283 g) 490.0 6.01 0.012 10.25 0.35 1.8 4.95 0.170 steel cut oats
321834 N8595645935654 usda-ndb-import Caffeine Free Soda, Apple Sidral Mundet sidral-mundet 240 ml (8 fl oz) 159.0 0.00 NaN 10.42 10.00 NaN 0.00 0.038 caffeine free soda
325687 N876958618376 usda-ndb-import Organic Steel Cut Oats Whole Foods Market whole-foods-market 45 g (0.33 cup) 1582.0 6.67 0.000 68.89 2.22 8.9 13.33 0.000 steel cut oats
326816 N88670009963 usda-ndb-import French Fried Potatoes Wellsley Farms wellsley-farms 85 g (18 PIECES | ABOUT) 540.0 4.12 0.000 22.35 0.00 2.4 2.35 0.053 french fried potatoes
326993 N890180001984 usda-ndb-import Organic Baby Food, Just Prunes Plum Organics plum-organics 99 g (99 g) 255.0 0.00 NaN 15.15 13.13 1.0 1.01 0.005 organic baby food
327082 N89107100498 usda-ndb-import Authentic French Brioche Hot Dog Buns Euro Classics euro-classics 45 g (1 HOT DOG) 1322.0 6.67 0.056 51.11 11.11 2.2 8.89 0.467 hot dog buns
328879 N9305033434565 usda-ndb-import Simply 100, Non Fat Greek Yogurt, Pineapple Co... Chobani chobani 150 g (150 g) 280.0 0.00 0.005 10.00 4.67 3.3 8.00 0.040 fat greek yogurt
329871 N9348603001811 usda-ndb-import Greek Nonfat Yogurt Welch's welch-s 150 g (1 CONTAINER) 305.0 0.00 0.003 10.00 8.67 0.0 8.67 0.030 greek nonfat yogurt
329872 N9348603001828 usda-ndb-import Greek Nonfat Yogurt Welch's welch-s 150 g (1 CONTAINER) 305.0 0.00 0.003 10.00 8.67 0.0 8.67 0.030 greek nonfat yogurt
329873 N9348603001842 usda-ndb-import Greek Nonfat Yogurt Welch's welch-s 150 g (1 CONTAINER) 335.0 0.00 0.003 11.33 10.67 0.0 8.67 0.030 greek nonfat yogurt
329874 N9348603001859 usda-ndb-import Greek Nonfat Yogurt With Fruit On The Bottom Welch's welch-s 150 g (1 CONTAINER) 305.0 0.00 0.003 10.00 9.33 0.0 8.67 0.030 greek nonfat yogurt
329875 N9348603001866 usda-ndb-import Greek Nonfat Yogurt Welch's welch-s 150 g (1 CONTAINER) 305.0 0.00 0.003 10.00 9.33 0.0 8.67 0.030 greek nonfat yogurt
330251 N9556041602211 usda-ndb-import Baked Beans In Tomato Sauce Ayam ayam 130 g (4.6 oz) 418.0 0.77 0.000 19.23 7.69 3.8 3.85 0.300 beans in tomato

8647 rows × 15 columns

Some categories have very little variation in them. Going to check for that and drop them.


In [86]:
# loop over each trigram
enough_variance = []

for trigram in tri_w.word:
    
    fat_var = df[df.demo_cat == trigram]["fat_100g"].var()
    
    if fat_var > 2:
        enough_variance.append(trigram)

print(enough_variance)
print(len(enough_variance))


['premium ice cream', 'cooked potato chips', 'cream cheese spread', 'frosted sugar cookies', 'chunk light tuna', 'roasted turkey breast', 'chocolate chip cookie', 'hot dog buns', 'ice cream sandwiches', 'cookies chocolate chip', 'ice cream bars', 'crunchy peanut butter', 'pancake waffle mix', 'chewy granola bars', 'dry roasted peanuts', 'dairy frozen dessert', 'thin crust pizza', 'macaroni cheese dinner', 'vegetable oil spread', 'chili with beans', 'chicken noodle soup', 'peanut butter cups', 'albacore tuna in', 'dark chocolate bar', 'potato chips original', 'honey roasted peanuts', 'roasted salted almonds', 'ramen noodle soup', 'fat greek yogurt', 'whole grain bread', 'smoked turkey breast', 'marinara pasta sauce', 'wavy potato chips', 'dark chocolate almonds', 'peanut butter cup', 'high protein bar', 'french fried potatoes', 'organic pasta sauce', 'sardines in tomato', 'italian style meatballs', 'fruit nut bar', 'thousand island dressing', 'baked snack crackers', 'cookies peanut butter', 'chips salt vinegar', 'mushroom condensed soup', 'refried black beans', 'steel cut oats', 'chocolate ice cream', 'whipped cream cheese', 'oatmeal raisin cookies', 'salsa con queso', 'popcorn white cheddar', 'chicken condensed soup', 'greek style yogurt', 'red velvet cake', 'potato chips cheddar', 'salad with chicken', 'cinnamon rolls with', 'red pepper hummus', 'white corn tortilla', 'corn puff cereal', 'sweet potato chips', 'style potato chips', 'classic potato chips', 'ice cream sandwich', 'pico de gallo', 'chunk white albacore', 'chicken breast nuggets', 'chocolate covered pretzels', 'corned beef hash', 'hot italian sausage', 'noodles with vegetables', 'soft baked cookies', 'sweet italian sausage', 'tomato sauce with', 'yellow corn tortilla', 'chicken gravy mix', 'chocolate chunk cookies', 'instant mashed potatoes', 'tomato basil soup', 'instant hot cereal']
82

Now got a category applied to a subset of the database. Can run the same code as before to use that category to create a subset of recommendations for each category picked...


In [87]:
# What we want to get variation on
pick_factors = ['fat_100g', 'sugars_100g', 'proteins_100g', 'sodium_100g']

# Points we want to pick (percentiles).  Can tune this to get more or fewer picks.

pick_percentiles = [0.1, 0.5, 0.9]
# pick_percentiles = [0, 0.25, 0.5, 0.75, 1.0]

demo_picks = []

# loop over each trigram that has enough variance in it

for cat in enough_variance:
    
    # first get all the items containing the cat word
    catf = df[df["demo_cat"] == cat]
        
    # Identify what rank each product is in that category, for each main factor
    for p in pick_factors:
        
        catf[p + "_rank"] = catf[p].rank(method = "first")
        
        # Select products at chosen percentiles on each
        
        high = catf[p + "_rank"].max()
        
        pick_index = [max(1, round(n * high)) for n in pick_percentiles]
        
        # add codes for those products
        demo_picks.extend(catf[catf[p+"_rank"].isin(pick_index)].code)
        
    
demo_df = df[df.code.isin(demo_picks)]
demo_df


/Users/seddont/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:21: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
Out[87]:
code creator product_name brands brands_tags serving_size energy_100g fat_100g cholesterol_100g carbohydrates_100g sugars_100g fiber_100g proteins_100g sodium_100g demo_cat
417 N901000017 usda-ndb-import Yellow Corn Tortilla Chips Guiltless Gourmet guiltless-gourmet 28 g (1 oz) 1795.0 10.71 0.000 78.57 0.00 7.1 7.14 0.643 yellow corn tortilla
1366 N98601 usda-ndb-import Chicken Noodle Soup Trader Joe's trader-joe-s 248 g (1 cup) 151.0 0.40 0.008 5.65 0.40 0.4 2.42 0.294 chicken noodle soup
2267 N11110197900 usda-ndb-import Triple Layer Red Velvet Cake, Peppermint Bakery Fresh Goodness, The Kroger Co. bakery-fresh-goodness,the-kroger-co 85 g (1 SLICE) 1971.0 17.65 0.024 77.65 65.88 0.0 1.18 0.353 red velvet cake
2669 N11110577276 usda-ndb-import Ice Cream Sandwich The Kroger Co. the-kroger-co 63 g (1 SANDWICH) 1130.0 9.52 0.032 44.44 20.63 1.6 4.76 0.238 ice cream sandwich
3172 N11110791559 usda-ndb-import Crunchy Peanut Butter Simple Truth Organic simple-truth-organic 32 g (2 Tbsp) 2615.0 50.00 0.000 18.75 6.25 9.4 25.00 0.375 crunchy peanut butter
3187 N11110796486 usda-ndb-import Kettle Cooked Potato Chips, Sea Salt & Cracked... Kroger kroger 28 g (1 oz) 2243.0 32.14 0.000 53.57 0.00 3.6 7.14 0.393 cooked potato chips
3427 N11110819376 usda-ndb-import Dry Roasted Peanuts Kroger kroger 28 g (1 oz) 2389.0 46.43 0.000 21.43 3.57 7.1 21.43 0.607 dry roasted peanuts
3638 N11110836281 usda-ndb-import 53% Vegetable Oil Spread P$$T... Big Savings... Pass It On p-t-big-savings-pass-it-on 14 g (1 Tbsp) 2092.0 50.00 0.000 0.00 0.00 0.0 0.00 0.643 vegetable oil spread
3654 N11110837158 usda-ndb-import 98% Fat Free Cream Of Mushroom Condensed Soup Kroger kroger 125 g (0.5 cup) 268.0 2.00 0.004 8.80 1.60 0.0 1.60 0.640 mushroom condensed soup
3701 N11110839305 usda-ndb-import Chili With Beans Kroger kroger 250 g (1 cup) 502.0 4.00 0.010 14.00 2.00 4.4 6.80 0.496 chili with beans
3705 N11110839510 usda-ndb-import Corned Beef Hash Kroger kroger 236 g (1 cup) 707.0 10.17 0.030 8.90 0.42 0.8 9.32 0.390 corned beef hash
3926 N11110852748 usda-ndb-import Instant Mashed Potatoes Kroger kroger 19 g (0.33 cup) 1540.0 0.00 0.000 78.95 0.00 5.3 10.53 0.079 instant mashed potatoes
4151 N11110863997 usda-ndb-import Cream Cheese Spread, Whipped Strawberry Kroger, The Kroger Co. kroger,the-kroger-co 22 g (2 Tbsp) 1331.0 22.73 0.068 18.18 13.64 0.0 4.55 0.227 cream cheese spread
4664 N11110888082 usda-ndb-import Chunk Light Tuna In Oil The Kroger Co. the-kroger-co 56 g (2 OZ DRAINED) 523.0 4.46 0.045 0.00 0.00 0.0 19.64 0.321 chunk light tuna
5052 N11115224274 usda-ndb-import Imperial, 30% Vegetable Oil Spread Unilever unilever 14 g (1 Tbsp) 1197.0 28.57 NaN 0.00 NaN NaN 0.00 0.643 vegetable oil spread
5061 N11115621240 usda-ndb-import Promise, Buttery, 60% Vegetable Oil Spread Unilever unilever 14 g (1 Tbsp) 2389.0 57.14 0.000 0.00 NaN NaN 0.00 0.607 vegetable oil spread
5113 N11150001298 usda-ndb-import Chunk White Albacore Tuna In Water Roundy's roundy-s 56 g (0.25 cup) 523.0 0.89 0.045 0.00 0.00 0.0 25.00 0.464 chunk white albacore
5252 N11150040419 usda-ndb-import Organic Yellow Corn Tortilla Chips Simply Roundy's simply-roundy-s 28 g (1 oz) 2092.0 25.00 0.000 57.14 0.00 7.1 7.14 0.571 yellow corn tortilla
5271 N11150041430 usda-ndb-import Potato Chips, Cheddar & Sour Cream Roundy's roundy-s 57 g (1 PACKAGE) 2201.0 31.58 0.000 54.39 1.75 3.5 7.02 0.596 potato chips cheddar
5500 N11150100205 usda-ndb-import Cream Of Chicken Condensed Soup Roundy's roundy-s 125 g (0.5 cup) 268.0 2.00 0.004 9.60 0.80 0.8 1.60 0.640 chicken condensed soup
5504 N11150100304 usda-ndb-import Cream Of Mushroom Condensed Soup Roundy's roundy-s 125 g (0.5 cup) 234.0 2.00 0.008 7.20 0.00 1.6 2.40 0.360 mushroom condensed soup
5732 N11150183482 usda-ndb-import Instant Mashed Potatoes Roundy's roundy-s 19 g (0.33 cup) 1540.0 0.00 0.000 78.95 0.00 5.3 10.53 0.079 instant mashed potatoes
5733 N11150183499 usda-ndb-import Instant Mashed Potatoes Roundy's roundy-s 19 g (0.33 cup) 1540.0 0.00 0.000 78.95 0.00 5.3 10.53 0.079 instant mashed potatoes
6111 N11150538008 usda-ndb-import Hot Italian Sausage Roundy's roundy-s 70 g (1 GRILLED LINK) 1377.0 25.71 0.086 4.29 4.29 0.0 20.00 1.043 hot italian sausage
6113 N11150538022 usda-ndb-import Sweet Italian Sausage Roundy's roundy-s 70 g (1 GRILLED LINK) 1377.0 25.71 0.086 2.86 2.86 0.0 20.00 0.871 sweet italian sausage
6164 N11150550086 usda-ndb-import Curly French Fried Potatoes Roundy's roundy-s 85 g (3 ONZ) 787.0 9.41 0.000 24.71 0.00 2.4 2.35 0.435 french fried potatoes
6247 N11150589147 usda-ndb-import Ice Cream Bars, Orange Roundy's roundy-s 54 g (54 g) 699.0 5.56 0.019 27.78 18.52 0.0 1.85 0.065 ice cream bars
6254 N11150590037 usda-ndb-import Ice Cream Bars Roundy's roundy-s 77 g (77 g) 1795.0 29.87 0.026 36.36 31.17 2.6 3.90 0.117 ice cream bars
6928 N11161030577 usda-ndb-import Baked Snack Crackers Shurfine, Topco Associates Inc. shurfine,topco-associates-inc 30 g (40 CRACKERS) 1674.0 15.00 0.000 56.67 3.33 0.0 6.67 0.733 baked snack crackers
6934 N11161031352 usda-ndb-import Classic Potato Chips Shurfine shurfine 28 g (17 CHIPS) 2389.0 35.71 0.000 57.14 0.00 3.6 7.14 0.643 classic potato chips
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
169098 N869955000011 usda-ndb-import Chicken Noodle Soup Daily Bread daily-bread 53 g (1 CUP PREPARED) | (MAKES) 1577.0 3.77 0.028 66.04 1.89 1.9 18.87 1.717 chicken noodle soup
169233 N872181000069 usda-ndb-import Milk Chocolate Covered Pretzels Flipz flipz 28 g (1 oz) 1941.0 17.86 0.000 71.43 39.29 3.6 7.14 0.500 chocolate covered pretzels
169598 N875343000099 usda-ndb-import Cream Cheese Spread With Garden Vegetables Einstein Noah Restaurant Group Inc. einstein-noah-restaurant-group-inc 20 g (2 Tbsp) 1255.0 25.00 0.075 15.00 5.00 0.0 5.00 0.500 cream cheese spread
169611 N875386000667 usda-ndb-import Dairy-Free Greek Style Yogurt, Raspberry Tempt tempt 150 g (1 CONTAINER) 364.0 2.67 0.000 10.00 6.00 2.0 5.33 0.017 greek style yogurt
169800 N876314002340 usda-ndb-import Tomato & Basil Soup With Fresh Calabrian Basil Zuppa Rustica zuppa-rustica 245 g (1 cup) 255.0 1.84 0.000 9.80 4.49 2.4 1.63 0.216 tomato basil soup
170020 N877245006056 usda-ndb-import Dutch Valley Food Distributors, Dark Chocolate... Dutch Valley Food Distributors Inc. dutch-valley-food-distributors-inc 40 g (40 GRM) 2301.0 37.50 NaN 50.00 37.50 NaN 10.00 0.000 dark chocolate almonds
170688 N883967381319 usda-ndb-import Creme Sandwich Cookies, Peanut Butter Smart Sense smart-sense 25 g (25 g) 2176.0 24.00 0.000 64.00 20.00 4.0 8.00 0.340 cookies peanut butter
170750 N883967392087 usda-ndb-import Macaroni & Cheese Dinner Smart Sense smart-sense 58 g (1 PKG) 1586.0 6.03 0.000 70.69 8.62 1.7 10.34 0.810 macaroni cheese dinner
171181 N886002506517 usda-ndb-import Milk Chocolate Chip Soft Baked Cookies Mrs. Fields mrs-fields 28 g (1 oz) 1941.0 17.86 0.036 67.86 39.29 3.6 3.57 0.411 soft baked cookies
171313 N886926014211 usda-ndb-import Bistro Style, Bistro Style Spicy Thai-Style Ch... Meijer meijer 245 g (1 cup) 172.0 0.41 0.006 6.12 0.00 0.4 2.86 0.282 chicken noodle soup
171447 N886926296648 usda-ndb-import Kettle Cooked Crinkle Cut Potato Chips, Chedda... Meijer meijer 28 g (1 oz) 2092.0 28.57 0.000 60.71 7.14 3.6 7.14 0.679 potato chips cheddar
171854 N888670019573 usda-ndb-import Albacore Tuna In Water Wellsley Farms wellsley-farms 56 g (0.25 CUP | ABOUT) 448.0 0.00 0.045 0.00 0.00 0.0 23.21 0.250 albacore tuna in
171915 N888670025505 usda-ndb-import Italian Style Meatballs Wellsley Farms, Bj's Wholesale Club / Corpora... wellsley-farms,bj-s-wholesale-club-corporate-b... 88 g (5 MEATBALLS) 1142.0 19.32 0.040 6.82 2.27 1.1 17.05 0.545 italian style meatballs
171946 N888670029633 usda-ndb-import Organic Thin Crust Pizza Wellsley Farms wellsley-farms 112 g (0.33 PIZZA) 971.0 8.04 0.022 27.68 0.89 1.8 10.71 0.455 thin crust pizza
172760 N891991007082 usda-ndb-import Waffle Lady, Organic Gingerbread Pancake & Waf... Waffle Lady Inc. waffle-lady-inc 46 g (8 THICK BELGIAN STYLE WAFFLES | PANCAKES... 1456.0 0.00 0.000 80.43 26.09 2.2 6.52 0.696 pancake waffle mix
173109 N893913001840 usda-ndb-import Leaf Cuisine, Not Cream Cheese Spread, Classic... Rod Rotondi Enterprises Llc rod-rotondi-enterprises-llc 28 g (1 ONZ) 895.0 16.07 0.000 14.29 3.57 3.6 7.14 0.500 cream cheese spread
173808 N896767001219 usda-ndb-import Larry's, Coconut Bliss, Organic Non-Dairy Froz... Luna, Luna & Larry's luna,luna-larry-s 95 g (0.5 cup) 1059.0 17.89 0.000 21.05 13.68 2.1 4.21 0.042 dairy frozen dessert
173854 N896859000588 usda-ndb-import Organic Non Dairy Thousand Island Dressing Organic Ville, Sky Valley Foods Inc. organic-ville,sky-valley-foods-inc 30 ml (2 Tbsp) 1393.0 33.33 0.000 13.33 10.00 0.0 0.00 0.467 thousand island dressing
174070 N897552001469 usda-ndb-import Chicken Gravy Mix Full Flavor Foods full-flavor-foods 6 g (1 Tbsp) 1745.0 0.00 0.000 100.00 16.67 0.0 16.67 4.500 chicken gravy mix
174431 N898575001368 usda-ndb-import Sea Salt & Nibs Dark Chocolate Bar Madecasse madecasse 75 g (2.64 oz) 1226.0 20.00 0.000 24.00 18.67 1.3 4.00 0.020 dark chocolate bar
174839 N939662096321 usda-ndb-import Bowl Noodles, Ramen Noodle Soup With Shrimp Co... Nissin, Nissin Foods (Usa) Co. Inc. nissin,nissin-foods-usa-co-inc 46 g (46 g) 1912.0 19.57 0.011 60.87 6.52 4.3 10.87 1.913 ramen noodle soup
177933 N20032319 usda-ndb-import Non Fat Greek Yogurt Fresh & Easy fresh-easy 170 g (6 ONZ) 343.0 0.00 0.000 11.76 10.59 0.0 8.24 0.041 fat greek yogurt
270315 N4178900121 usda-ndb-import Instant Lunch, Ramen Noodles With Vegetables, ... Maruchan, Maruchan Inc. maruchan,maruchan-inc 64 g (1 CONTAINER, PER CONTAINER) 1895.0 18.75 0.000 60.94 3.12 3.1 10.94 1.859 noodles with vegetables
280147 N5051379022655 usda-ndb-import Ice Cream Sandwiches Fresh & Easy fresh-easy 60 g (1 SANDWICH) 1117.0 8.33 0.025 41.67 21.67 1.7 5.00 0.167 ice cream sandwiches
280344 N5051379079499 usda-ndb-import Non Fat Greek Yogurt Fresh & Easy fresh-easy 150 g (1 CONTAINER) 335.0 0.00 0.003 12.00 10.00 0.0 8.00 0.033 fat greek yogurt
290879 N60788003540 usda-ndb-import Ultra Thin Crust Pizza Southern Home southern-home 140 g (0.333 PIZZA) 808.0 8.57 0.021 20.71 2.14 0.7 8.57 0.364 thin crust pizza
290956 N6078802558 usda-ndb-import Chunk Light Tuna In Water Southern Home southern-home 56 g (0.25 CUP | DRAINED, ANOUT) 372.0 0.89 0.036 0.00 0.00 0.0 19.64 0.321 chunk light tuna
292097 N68826717758 usda-ndb-import Twisted Chocolate Jumbo Cinnamon Rolls With Icing Ahold ahold 99 g (1 ROLL WITH ICING) 1310.0 10.10 0.000 50.51 21.21 2.0 5.05 0.677 cinnamon rolls with
292898 N725493300360 usda-ndb-import Thousand Island Dressing Cha Ching cha-ching 30 ml (2 Tbsp) 1117.0 20.00 0.017 20.00 16.67 0.0 0.00 1.000 thousand island dressing
293817 N7501088109980 usda-ndb-import Pico De Gallo Seasoning Mix Terana terana 0.5 g (0.25 tsp) 0.0 0.00 NaN 0.00 NaN NaN 0.00 0.000 pico de gallo

906 rows × 15 columns


In [88]:
demo_df[demo_df.demo_cat == "pasta enriched macaroni"]


Out[88]:
code creator product_name brands brands_tags serving_size energy_100g fat_100g cholesterol_100g carbohydrates_100g sugars_100g fiber_100g proteins_100g sodium_100g demo_cat

In [89]:
df[df.demo_cat == "pasta enriched macaroni"]


Out[89]:
code creator product_name brands brands_tags serving_size energy_100g fat_100g cholesterol_100g carbohydrates_100g sugars_100g fiber_100g proteins_100g sodium_100g demo_cat
3846 N11110849168 usda-ndb-import Pot-Ready Angel Hair Half Length Pasta, Enrich... Kroger kroger 56 g (2 oz) 1494.0 1.79 0.0 73.21 3.57 3.6 12.50 0.000 pasta enriched macaroni
5688 N11150175951 usda-ndb-import No Boil Lasagna Pasta, Enriched Macaroni Product Roundy's roundy-s 56 g (2 oz) 1494.0 1.79 0.0 73.21 3.57 3.6 12.50 0.000 pasta enriched macaroni
5691 N11150175999 usda-ndb-import Orzo Pasta, Enriched Macaroni Product Roundy's roundy-s 56 g (2 oz) 1494.0 1.79 0.0 73.21 3.57 3.6 12.50 0.000 pasta enriched macaroni
5693 N11150176040 usda-ndb-import Spaghetti Pasta, Enriched Macaroni Product Roundy's roundy-s 56 g (2 oz) 1494.0 1.79 0.0 73.21 3.57 3.6 12.50 0.000 pasta enriched macaroni
5694 N11150176095 usda-ndb-import Thin Spaghetti Pasta, Enriched Macaroni Product Roundy's roundy-s 56 g (2 oz) 1494.0 1.79 0.0 73.21 3.57 3.6 12.50 0.000 pasta enriched macaroni
5695 N11150176125 usda-ndb-import Mostaccioli Pasta, Enriched Macaroni Product Roundy's roundy-s 56 g (2 oz) 1494.0 1.79 0.0 73.21 3.57 3.6 12.50 0.000 pasta enriched macaroni
5696 N11150176132 usda-ndb-import Angel Hair Pasta, Enriched Macaroni Product Roundy's roundy-s 56 g (2 oz) 1494.0 1.79 0.0 73.21 3.57 3.6 12.50 0.000 pasta enriched macaroni
5697 N11150176156 usda-ndb-import Medium Shells Pasta, Enriched Macaroni Product Roundy's roundy-s 56 g (2 oz) 1494.0 1.79 0.0 73.21 3.57 3.6 12.50 0.000 pasta enriched macaroni
5698 N11150176163 usda-ndb-import Penne Rigate Pasta, Enriched Macaroni Product Roundy's roundy-s 56 g (2 oz) 1494.0 1.79 0.0 73.21 3.57 3.6 12.50 0.000 pasta enriched macaroni
5699 N11150176170 usda-ndb-import Farfalle Pasta, Enriched Macaroni Product Roundy's roundy-s 56 g (2 oz) 1494.0 1.79 0.0 73.21 3.57 3.6 12.50 0.000 pasta enriched macaroni
5700 N11150176422 usda-ndb-import Fettuccine Pasta, Enriched Macaroni Product Roundy's roundy-s 56 g (2 oz) 1494.0 1.79 0.0 73.21 3.57 3.6 12.50 0.000 pasta enriched macaroni
5701 N11150176460 usda-ndb-import Rigatoni Pasta, Enriched Macaroni Product Roundy's roundy-s 56 g (2 oz) 1494.0 1.79 0.0 73.21 3.57 3.6 12.50 0.000 pasta enriched macaroni
5705 N11150178006 usda-ndb-import Lasagna Pasta, Enriched Macaroni Product Roundy's roundy-s 56 g (2 oz) 1494.0 1.79 0.0 73.21 3.57 3.6 12.50 0.000 pasta enriched macaroni
7159 N11161150787 usda-ndb-import Fettuccine Pasta, Enriched Macaroni Product Shurfine shurfine 56 g (45 PIECES) 1494.0 0.89 0.0 75.00 1.79 3.6 12.50 0.000 pasta enriched macaroni
10564 N12700000150 usda-ndb-import Skinner, Texas Shape Pasta, Enriched Macaroni ... New World Pasta Company new-world-pasta-company 56 g (0.75 cup) 1494.0 1.79 0.0 75.00 1.79 3.6 12.50 0.000 pasta enriched macaroni
18374 N21130506286 usda-ndb-import Kitchens, Angel Hair Pasta, Enriched Macaroni ... Signature signature 56 g (2 oz) 1494.0 1.79 0.0 73.21 3.57 3.6 12.50 0.000 pasta enriched macaroni
22940 N26800002464 usda-ndb-import Large Elbows Pasta, Enriched Macaroni Product American Beauty, New World Pasta Company american-beauty,new-world-pasta-company 56 g (0.667 CUP) 1494.0 1.79 0.0 75.00 1.79 3.6 12.50 0.000 pasta enriched macaroni
22951 N26800006363 usda-ndb-import Mostaccioli Pasta, Enriched Macaroni Product American Beauty, New World Pasta Company american-beauty,new-world-pasta-company 56 g (0.667 CUP) 1494.0 1.79 0.0 75.00 1.79 3.6 12.50 0.000 pasta enriched macaroni
22953 N26800006387 usda-ndb-import Angel Hair Pasta, Enriched Macaroni Product American Beauty american-beauty 56 g (0.125 OF PACKAGE) 1569.0 1.79 0.0 75.00 3.57 3.6 12.50 0.000 pasta enriched macaroni
25809 N30034091415 usda-ndb-import Gemelli Pasta, Enriched Macaroni Product Giant Eagle Inc. giant-eagle-inc 56 g (2 ONZ) 1494.0 1.79 0.0 73.21 3.57 3.6 12.50 0.000 pasta enriched macaroni
30361 N35826041104 usda-ndb-import Ziti Pasta, Enriched Macaroni Product Food Lion food-lion 56 g (0.666 cup) 1569.0 1.79 0.0 73.21 3.57 3.6 12.50 0.000 pasta enriched macaroni
30671 N35826079442 usda-ndb-import Classic Angel Hair Pasta, Enriched Macaroni Pr... Food Lion food-lion 56 g (2 oz) 1494.0 1.79 0.0 73.21 3.57 3.6 12.50 0.000 pasta enriched macaroni
38958 N41190035717 usda-ndb-import Angel Hair No. 11 Pasta, Enriched Macaroni Pro... Shoprite shoprite 56 g (2 oz) 1494.0 1.79 0.0 73.21 3.57 3.6 12.50 0.000 pasta enriched macaroni
39237 N41190048472 usda-ndb-import Pennoni Italian Pasta, Enriched Macaroni Product Shoprite shoprite 56 g (0.75 cup) 1569.0 1.79 0.0 73.21 3.57 3.6 12.50 0.000 pasta enriched macaroni
48339 N41486001242 usda-ndb-import Luigi Vitelli, Rotini Pasta, Enriched Macaroni... Vitelli Foods Llc vitelli-foods-llc 55 g (0.75 cup) 1490.0 1.82 0.0 72.73 3.09 2.9 11.82 0.000 pasta enriched macaroni
49867 N41497279944 usda-ndb-import Classic Pasta, Enriched Macaroni Product Weis Quality weis-quality 56 g (0.75 " CIRCLE) 1494.0 1.79 0.0 73.21 3.57 3.6 12.50 0.000 pasta enriched macaroni
49868 N41497279951 usda-ndb-import Farfalle Classic Pasta, Enriched Macaroni Product Weis Quality weis-quality 56 g (2 oz) 1494.0 1.79 0.0 73.21 3.57 3.6 12.50 0.000 pasta enriched macaroni
49870 N41497279975 usda-ndb-import Classic Spaghetti Pasta, Enriched Macaroni Pro... Weis Quality weis-quality 56 g (2 oz) 1494.0 1.79 0.0 73.21 3.57 3.6 12.50 0.000 pasta enriched macaroni
49878 N41497280056 usda-ndb-import Classic Rigatoni Pasta, Enriched Macaroni Product Weis Quality weis-quality 56 g (2 oz) 1494.0 1.79 0.0 73.21 3.57 3.6 12.50 0.000 pasta enriched macaroni
49886 N41497280148 usda-ndb-import Classic Linguine Pasta, Enriched Macaroni Product Weis Quality weis-quality 56 g (2 oz) 1494.0 1.79 0.0 73.21 3.57 3.6 12.50 0.000 pasta enriched macaroni
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
70074 N70253469916 usda-ndb-import Fusilli Pasta, Enriched Macaroni Product Our Family our-family 56 g (0.75 CUP) 1494.0 1.79 0.0 73.21 3.57 3.6 12.50 0.000 pasta enriched macaroni
73134 N70796330506 usda-ndb-import Shells #50 Pasta, Enriched Macaroni Product Anna anna 56 g (2 oz) 1494.0 1.79 0.0 73.21 1.79 3.6 12.50 0.000 pasta enriched macaroni
73138 N70796330810 usda-ndb-import Elbows Pasta, Enriched Macaroni Product Anna anna 56 g (2 oz) 1494.0 1.79 0.0 73.21 1.79 3.6 12.50 0.000 pasta enriched macaroni
75942 N71403033087 usda-ndb-import Linguine Fine #8 Pasta, Enriched Macaroni Product Ferrara ferrara 56 g (2 oz) 1569.0 1.79 0.0 73.21 5.36 3.6 NaN 0.000 pasta enriched macaroni
80542 N72368510538 usda-ndb-import Penne Rigate No. 36 Pasta, Enriched Macaroni P... Delallo, George Delallo Co. Inc. delallo,george-delallo-co-inc 56 g (2 oz) 1494.0 1.79 0.0 73.21 3.57 3.6 12.50 0.000 pasta enriched macaroni
80554 N72368512006 usda-ndb-import Pasta, Enriched Macaroni Product Delallo delallo 56 g (2 oz) 1644.0 1.79 0.0 78.57 3.57 3.6 12.50 0.000 pasta enriched macaroni
80555 N72368512044 usda-ndb-import La Famiglia Rigatoni #21 Pasta, Enriched Macar... Delallo delallo 56 g (0.75 cup) 1644.0 1.79 0.0 78.57 3.57 3.6 12.50 0.000 pasta enriched macaroni
80557 N72368512181 usda-ndb-import Elbows #68 Family Pasta, Enriched Macaroni Pro... Delallo delallo 56 g (0.75 cup) 1644.0 1.79 0.0 78.57 3.57 3.6 12.50 0.000 pasta enriched macaroni
84041 N73324000780 usda-ndb-import Pasta, Enriched Macaroni Product Baresi baresi 56 g (2 oz) 1494.0 1.79 0.0 75.00 1.79 3.6 12.50 0.000 pasta enriched macaroni
92544 N76808006223 usda-ndb-import Penne Pasta, Enriched Macaroni Product Barilla G & R F.Lli S.P.A. barilla-g-r-f-lli-s-p-a 56 g (0.125 BOX | ABOUT) 1494.0 1.79 0.0 75.00 3.57 3.6 12.50 0.000 pasta enriched macaroni
92548 N76808006469 usda-ndb-import Thin Spaghetti Pasta, Enriched Macaroni Product Barilla barilla 56 g (0.125 BOX) | (ABOUT) 1494.0 1.79 0.0 75.00 3.57 3.6 12.50 0.000 pasta enriched macaroni
92552 N76808006513 usda-ndb-import Angel Hair Pasta, Enriched Macaroni Product Barilla G & R F.Lli S.P.A. barilla-g-r-f-lli-s-p-a 56 g (2 ONZ) 1343.0 2.68 0.0 69.64 3.57 10.7 14.29 0.000 pasta enriched macaroni
92556 N76808006568 usda-ndb-import Organic Penne Pasta, Enriched Macaroni Product Barilla G & R F.Lli S.P.A. barilla-g-r-f-lli-s-p-a 56 g (0.125 BOX | ABOUT) 1494.0 1.79 0.0 75.00 3.57 3.6 12.50 0.000 pasta enriched macaroni
92567 N76808034295 usda-ndb-import Linguine Pasta, Enriched Macaroni Product Barilla, Barilla G & R F.Lli S.P.A. barilla,barilla-g-r-f-lli-s-p-a 56 g (0.125 BOX | ABOUT) 1494.0 1.79 0.0 75.00 3.57 3.6 12.50 0.000 pasta enriched macaroni
94550 N77890233894 usda-ndb-import Elbows Pasta, Enriched Macaroni Product Wegmans wegmans 56 g (0.5 cup) 1569.0 1.79 0.0 73.21 3.57 3.6 12.50 0.000 pasta enriched macaroni
94553 N77890234037 usda-ndb-import Spaghetti Pasta, Enriched Macaroni Product Wegmans wegmans 56 g (0.125 BOX) 1569.0 1.79 0.0 73.21 3.57 3.6 12.50 0.000 pasta enriched macaroni
94554 N77890234044 usda-ndb-import Thin Spaghetti Pasta, Enriched Macaroni Product Wegmans wegmans 56 g (0.125 BOX DRY) 1569.0 1.79 0.0 73.21 3.57 3.6 12.50 0.000 pasta enriched macaroni
94922 N77890362792 usda-ndb-import Half-Sized Thin Spaghetti Pasta, Enriched Maca... Wegmans wegmans 56 g (0.125 BOX DRY | ABOUT) 1494.0 1.79 0.0 73.21 3.57 3.6 12.50 0.000 pasta enriched macaroni
94983 N77890365595 usda-ndb-import Spaghetti Pasta, Enriched Macaroni Product Wegmans wegmans 56 g (0.03 OF PKG) 1494.0 1.79 0.0 73.21 3.57 3.6 12.50 0.000 pasta enriched macaroni
96800 N78742036991 usda-ndb-import Pot-Sized Angel Hair Pasta, Enriched Macaroni ... Great Value great-value 56 g (2 oz) 1494.0 1.79 0.0 73.21 3.57 3.6 12.50 0.000 pasta enriched macaroni
97814 N78742085845 usda-ndb-import Fettuccine Pasta, Enriched Macaroni Product Great Value great-value 56 g (2 oz) 1494.0 1.79 0.0 73.21 3.57 3.6 12.50 0.000 pasta enriched macaroni
102479 N85164444174 usda-ndb-import Penne Ziti Rigate Pasta, Enriched Macaroni Pro... Garofalo garofalo 56 g (0.66 cup) 1569.0 1.79 0.0 78.57 3.57 3.6 12.50 0.000 pasta enriched macaroni
104153 N85239381861 usda-ndb-import Manicotti Pasta, Enriched Macaroni Product Market Pantry, Target Stores market-pantry,target-stores 56 g (0.25 PKG.) DRY | () 1494.0 1.79 0.0 73.21 3.57 3.6 12.50 0.000 pasta enriched macaroni
104158 N85239382806 usda-ndb-import Ziti Pasta, Enriched Macaroni Product Market Pantry, Target Stores market-pantry,target-stores 56 g (0.666 cup) 1494.0 1.79 0.0 73.21 3.57 3.6 12.50 0.000 pasta enriched macaroni
104162 N85239382912 usda-ndb-import Vegetable Rotini Pasta, Enriched Macaroni Product Market Pantry, Target Stores market-pantry,target-stores 56 g (0.75 cup) 1494.0 1.79 0.0 71.43 3.57 5.4 12.50 0.062 pasta enriched macaroni
104168 N85239385869 usda-ndb-import Small Shells Pasta, Enriched Macaroni Product Market Pantry, Target Stores market-pantry,target-stores 56 g (0.5 cup) 1494.0 1.79 0.0 73.21 3.57 3.6 12.50 0.000 pasta enriched macaroni
111564 N01121307483 usda-ndb-import Mostaccioli Pasta, Enriched Macaroni Product Spartan spartan 56 g (0.75 CUP DRY) 1494.0 1.79 0.0 73.21 3.57 3.6 12.50 0.000 pasta enriched macaroni
113838 N021333400547 usda-ndb-import Thin Spaghetti Pasta, Enriched Macaroni Product Fareway fareway 56 g (0.5 " CIRDE) 1494.0 1.79 0.0 73.21 3.57 3.6 12.50 0.000 pasta enriched macaroni
142181 N75450134488 usda-ndb-import Pot Sized Linguine Pasta, Enriched Macaroni Pr... Hy-Vee hy-vee 56 g (1.5 CIRCLE) 1494.0 1.79 0.0 73.21 3.57 3.6 12.50 0.000 pasta enriched macaroni
175137 N1041498152628 usda-ndb-import Elbow Macaroni Pasta, Enriched Macaroni Product Reggano reggano 56 g (0.5 cup) 1494.0 1.79 0.0 75.00 1.79 3.6 12.50 0.000 pasta enriched macaroni

70 rows × 15 columns

Now need to try some processing on product name as many will be too similar and uninformative without brand name.


In [90]:
def truncate_brand(s):
    if type(s) != str:
        return ""
    elif s.find(",") == -1:
        return s
    else:
        return s[:s.find(",")]

print(truncate_brand("Kroger, The Kroger Co."))
print(truncate_brand("Roundy's"))


Kroger
Roundy's

In [91]:
demo_df.dtypes


Out[91]:
code                   object
creator                object
product_name           object
brands                 object
brands_tags            object
serving_size           object
energy_100g           float64
fat_100g              float64
cholesterol_100g      float64
carbohydrates_100g    float64
sugars_100g           float64
fiber_100g            float64
proteins_100g         float64
sodium_100g           float64
demo_cat               object
dtype: object

In [92]:
demo_df["short_brand"] = demo_df.brands.apply(truncate_brand)


/Users/seddont/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':

In [93]:
demo_df


Out[93]:
code creator product_name brands brands_tags serving_size energy_100g fat_100g cholesterol_100g carbohydrates_100g sugars_100g fiber_100g proteins_100g sodium_100g demo_cat short_brand
417 N901000017 usda-ndb-import Yellow Corn Tortilla Chips Guiltless Gourmet guiltless-gourmet 28 g (1 oz) 1795.0 10.71 0.000 78.57 0.00 7.1 7.14 0.643 yellow corn tortilla Guiltless Gourmet
1366 N98601 usda-ndb-import Chicken Noodle Soup Trader Joe's trader-joe-s 248 g (1 cup) 151.0 0.40 0.008 5.65 0.40 0.4 2.42 0.294 chicken noodle soup Trader Joe's
2267 N11110197900 usda-ndb-import Triple Layer Red Velvet Cake, Peppermint Bakery Fresh Goodness, The Kroger Co. bakery-fresh-goodness,the-kroger-co 85 g (1 SLICE) 1971.0 17.65 0.024 77.65 65.88 0.0 1.18 0.353 red velvet cake Bakery Fresh Goodness
2669 N11110577276 usda-ndb-import Ice Cream Sandwich The Kroger Co. the-kroger-co 63 g (1 SANDWICH) 1130.0 9.52 0.032 44.44 20.63 1.6 4.76 0.238 ice cream sandwich The Kroger Co.
3172 N11110791559 usda-ndb-import Crunchy Peanut Butter Simple Truth Organic simple-truth-organic 32 g (2 Tbsp) 2615.0 50.00 0.000 18.75 6.25 9.4 25.00 0.375 crunchy peanut butter Simple Truth Organic
3187 N11110796486 usda-ndb-import Kettle Cooked Potato Chips, Sea Salt & Cracked... Kroger kroger 28 g (1 oz) 2243.0 32.14 0.000 53.57 0.00 3.6 7.14 0.393 cooked potato chips Kroger
3427 N11110819376 usda-ndb-import Dry Roasted Peanuts Kroger kroger 28 g (1 oz) 2389.0 46.43 0.000 21.43 3.57 7.1 21.43 0.607 dry roasted peanuts Kroger
3638 N11110836281 usda-ndb-import 53% Vegetable Oil Spread P$$T... Big Savings... Pass It On p-t-big-savings-pass-it-on 14 g (1 Tbsp) 2092.0 50.00 0.000 0.00 0.00 0.0 0.00 0.643 vegetable oil spread P$$T... Big Savings... Pass It On
3654 N11110837158 usda-ndb-import 98% Fat Free Cream Of Mushroom Condensed Soup Kroger kroger 125 g (0.5 cup) 268.0 2.00 0.004 8.80 1.60 0.0 1.60 0.640 mushroom condensed soup Kroger
3701 N11110839305 usda-ndb-import Chili With Beans Kroger kroger 250 g (1 cup) 502.0 4.00 0.010 14.00 2.00 4.4 6.80 0.496 chili with beans Kroger
3705 N11110839510 usda-ndb-import Corned Beef Hash Kroger kroger 236 g (1 cup) 707.0 10.17 0.030 8.90 0.42 0.8 9.32 0.390 corned beef hash Kroger
3926 N11110852748 usda-ndb-import Instant Mashed Potatoes Kroger kroger 19 g (0.33 cup) 1540.0 0.00 0.000 78.95 0.00 5.3 10.53 0.079 instant mashed potatoes Kroger
4151 N11110863997 usda-ndb-import Cream Cheese Spread, Whipped Strawberry Kroger, The Kroger Co. kroger,the-kroger-co 22 g (2 Tbsp) 1331.0 22.73 0.068 18.18 13.64 0.0 4.55 0.227 cream cheese spread Kroger
4664 N11110888082 usda-ndb-import Chunk Light Tuna In Oil The Kroger Co. the-kroger-co 56 g (2 OZ DRAINED) 523.0 4.46 0.045 0.00 0.00 0.0 19.64 0.321 chunk light tuna The Kroger Co.
5052 N11115224274 usda-ndb-import Imperial, 30% Vegetable Oil Spread Unilever unilever 14 g (1 Tbsp) 1197.0 28.57 NaN 0.00 NaN NaN 0.00 0.643 vegetable oil spread Unilever
5061 N11115621240 usda-ndb-import Promise, Buttery, 60% Vegetable Oil Spread Unilever unilever 14 g (1 Tbsp) 2389.0 57.14 0.000 0.00 NaN NaN 0.00 0.607 vegetable oil spread Unilever
5113 N11150001298 usda-ndb-import Chunk White Albacore Tuna In Water Roundy's roundy-s 56 g (0.25 cup) 523.0 0.89 0.045 0.00 0.00 0.0 25.00 0.464 chunk white albacore Roundy's
5252 N11150040419 usda-ndb-import Organic Yellow Corn Tortilla Chips Simply Roundy's simply-roundy-s 28 g (1 oz) 2092.0 25.00 0.000 57.14 0.00 7.1 7.14 0.571 yellow corn tortilla Simply Roundy's
5271 N11150041430 usda-ndb-import Potato Chips, Cheddar & Sour Cream Roundy's roundy-s 57 g (1 PACKAGE) 2201.0 31.58 0.000 54.39 1.75 3.5 7.02 0.596 potato chips cheddar Roundy's
5500 N11150100205 usda-ndb-import Cream Of Chicken Condensed Soup Roundy's roundy-s 125 g (0.5 cup) 268.0 2.00 0.004 9.60 0.80 0.8 1.60 0.640 chicken condensed soup Roundy's
5504 N11150100304 usda-ndb-import Cream Of Mushroom Condensed Soup Roundy's roundy-s 125 g (0.5 cup) 234.0 2.00 0.008 7.20 0.00 1.6 2.40 0.360 mushroom condensed soup Roundy's
5732 N11150183482 usda-ndb-import Instant Mashed Potatoes Roundy's roundy-s 19 g (0.33 cup) 1540.0 0.00 0.000 78.95 0.00 5.3 10.53 0.079 instant mashed potatoes Roundy's
5733 N11150183499 usda-ndb-import Instant Mashed Potatoes Roundy's roundy-s 19 g (0.33 cup) 1540.0 0.00 0.000 78.95 0.00 5.3 10.53 0.079 instant mashed potatoes Roundy's
6111 N11150538008 usda-ndb-import Hot Italian Sausage Roundy's roundy-s 70 g (1 GRILLED LINK) 1377.0 25.71 0.086 4.29 4.29 0.0 20.00 1.043 hot italian sausage Roundy's
6113 N11150538022 usda-ndb-import Sweet Italian Sausage Roundy's roundy-s 70 g (1 GRILLED LINK) 1377.0 25.71 0.086 2.86 2.86 0.0 20.00 0.871 sweet italian sausage Roundy's
6164 N11150550086 usda-ndb-import Curly French Fried Potatoes Roundy's roundy-s 85 g (3 ONZ) 787.0 9.41 0.000 24.71 0.00 2.4 2.35 0.435 french fried potatoes Roundy's
6247 N11150589147 usda-ndb-import Ice Cream Bars, Orange Roundy's roundy-s 54 g (54 g) 699.0 5.56 0.019 27.78 18.52 0.0 1.85 0.065 ice cream bars Roundy's
6254 N11150590037 usda-ndb-import Ice Cream Bars Roundy's roundy-s 77 g (77 g) 1795.0 29.87 0.026 36.36 31.17 2.6 3.90 0.117 ice cream bars Roundy's
6928 N11161030577 usda-ndb-import Baked Snack Crackers Shurfine, Topco Associates Inc. shurfine,topco-associates-inc 30 g (40 CRACKERS) 1674.0 15.00 0.000 56.67 3.33 0.0 6.67 0.733 baked snack crackers Shurfine
6934 N11161031352 usda-ndb-import Classic Potato Chips Shurfine shurfine 28 g (17 CHIPS) 2389.0 35.71 0.000 57.14 0.00 3.6 7.14 0.643 classic potato chips Shurfine
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
169098 N869955000011 usda-ndb-import Chicken Noodle Soup Daily Bread daily-bread 53 g (1 CUP PREPARED) | (MAKES) 1577.0 3.77 0.028 66.04 1.89 1.9 18.87 1.717 chicken noodle soup Daily Bread
169233 N872181000069 usda-ndb-import Milk Chocolate Covered Pretzels Flipz flipz 28 g (1 oz) 1941.0 17.86 0.000 71.43 39.29 3.6 7.14 0.500 chocolate covered pretzels Flipz
169598 N875343000099 usda-ndb-import Cream Cheese Spread With Garden Vegetables Einstein Noah Restaurant Group Inc. einstein-noah-restaurant-group-inc 20 g (2 Tbsp) 1255.0 25.00 0.075 15.00 5.00 0.0 5.00 0.500 cream cheese spread Einstein Noah Restaurant Group Inc.
169611 N875386000667 usda-ndb-import Dairy-Free Greek Style Yogurt, Raspberry Tempt tempt 150 g (1 CONTAINER) 364.0 2.67 0.000 10.00 6.00 2.0 5.33 0.017 greek style yogurt Tempt
169800 N876314002340 usda-ndb-import Tomato & Basil Soup With Fresh Calabrian Basil Zuppa Rustica zuppa-rustica 245 g (1 cup) 255.0 1.84 0.000 9.80 4.49 2.4 1.63 0.216 tomato basil soup Zuppa Rustica
170020 N877245006056 usda-ndb-import Dutch Valley Food Distributors, Dark Chocolate... Dutch Valley Food Distributors Inc. dutch-valley-food-distributors-inc 40 g (40 GRM) 2301.0 37.50 NaN 50.00 37.50 NaN 10.00 0.000 dark chocolate almonds Dutch Valley Food Distributors Inc.
170688 N883967381319 usda-ndb-import Creme Sandwich Cookies, Peanut Butter Smart Sense smart-sense 25 g (25 g) 2176.0 24.00 0.000 64.00 20.00 4.0 8.00 0.340 cookies peanut butter Smart Sense
170750 N883967392087 usda-ndb-import Macaroni & Cheese Dinner Smart Sense smart-sense 58 g (1 PKG) 1586.0 6.03 0.000 70.69 8.62 1.7 10.34 0.810 macaroni cheese dinner Smart Sense
171181 N886002506517 usda-ndb-import Milk Chocolate Chip Soft Baked Cookies Mrs. Fields mrs-fields 28 g (1 oz) 1941.0 17.86 0.036 67.86 39.29 3.6 3.57 0.411 soft baked cookies Mrs. Fields
171313 N886926014211 usda-ndb-import Bistro Style, Bistro Style Spicy Thai-Style Ch... Meijer meijer 245 g (1 cup) 172.0 0.41 0.006 6.12 0.00 0.4 2.86 0.282 chicken noodle soup Meijer
171447 N886926296648 usda-ndb-import Kettle Cooked Crinkle Cut Potato Chips, Chedda... Meijer meijer 28 g (1 oz) 2092.0 28.57 0.000 60.71 7.14 3.6 7.14 0.679 potato chips cheddar Meijer
171854 N888670019573 usda-ndb-import Albacore Tuna In Water Wellsley Farms wellsley-farms 56 g (0.25 CUP | ABOUT) 448.0 0.00 0.045 0.00 0.00 0.0 23.21 0.250 albacore tuna in Wellsley Farms
171915 N888670025505 usda-ndb-import Italian Style Meatballs Wellsley Farms, Bj's Wholesale Club / Corpora... wellsley-farms,bj-s-wholesale-club-corporate-b... 88 g (5 MEATBALLS) 1142.0 19.32 0.040 6.82 2.27 1.1 17.05 0.545 italian style meatballs Wellsley Farms
171946 N888670029633 usda-ndb-import Organic Thin Crust Pizza Wellsley Farms wellsley-farms 112 g (0.33 PIZZA) 971.0 8.04 0.022 27.68 0.89 1.8 10.71 0.455 thin crust pizza Wellsley Farms
172760 N891991007082 usda-ndb-import Waffle Lady, Organic Gingerbread Pancake & Waf... Waffle Lady Inc. waffle-lady-inc 46 g (8 THICK BELGIAN STYLE WAFFLES | PANCAKES... 1456.0 0.00 0.000 80.43 26.09 2.2 6.52 0.696 pancake waffle mix Waffle Lady Inc.
173109 N893913001840 usda-ndb-import Leaf Cuisine, Not Cream Cheese Spread, Classic... Rod Rotondi Enterprises Llc rod-rotondi-enterprises-llc 28 g (1 ONZ) 895.0 16.07 0.000 14.29 3.57 3.6 7.14 0.500 cream cheese spread Rod Rotondi Enterprises Llc
173808 N896767001219 usda-ndb-import Larry's, Coconut Bliss, Organic Non-Dairy Froz... Luna, Luna & Larry's luna,luna-larry-s 95 g (0.5 cup) 1059.0 17.89 0.000 21.05 13.68 2.1 4.21 0.042 dairy frozen dessert Luna
173854 N896859000588 usda-ndb-import Organic Non Dairy Thousand Island Dressing Organic Ville, Sky Valley Foods Inc. organic-ville,sky-valley-foods-inc 30 ml (2 Tbsp) 1393.0 33.33 0.000 13.33 10.00 0.0 0.00 0.467 thousand island dressing Organic Ville
174070 N897552001469 usda-ndb-import Chicken Gravy Mix Full Flavor Foods full-flavor-foods 6 g (1 Tbsp) 1745.0 0.00 0.000 100.00 16.67 0.0 16.67 4.500 chicken gravy mix Full Flavor Foods
174431 N898575001368 usda-ndb-import Sea Salt & Nibs Dark Chocolate Bar Madecasse madecasse 75 g (2.64 oz) 1226.0 20.00 0.000 24.00 18.67 1.3 4.00 0.020 dark chocolate bar Madecasse
174839 N939662096321 usda-ndb-import Bowl Noodles, Ramen Noodle Soup With Shrimp Co... Nissin, Nissin Foods (Usa) Co. Inc. nissin,nissin-foods-usa-co-inc 46 g (46 g) 1912.0 19.57 0.011 60.87 6.52 4.3 10.87 1.913 ramen noodle soup Nissin
177933 N20032319 usda-ndb-import Non Fat Greek Yogurt Fresh & Easy fresh-easy 170 g (6 ONZ) 343.0 0.00 0.000 11.76 10.59 0.0 8.24 0.041 fat greek yogurt Fresh & Easy
270315 N4178900121 usda-ndb-import Instant Lunch, Ramen Noodles With Vegetables, ... Maruchan, Maruchan Inc. maruchan,maruchan-inc 64 g (1 CONTAINER, PER CONTAINER) 1895.0 18.75 0.000 60.94 3.12 3.1 10.94 1.859 noodles with vegetables Maruchan
280147 N5051379022655 usda-ndb-import Ice Cream Sandwiches Fresh & Easy fresh-easy 60 g (1 SANDWICH) 1117.0 8.33 0.025 41.67 21.67 1.7 5.00 0.167 ice cream sandwiches Fresh & Easy
280344 N5051379079499 usda-ndb-import Non Fat Greek Yogurt Fresh & Easy fresh-easy 150 g (1 CONTAINER) 335.0 0.00 0.003 12.00 10.00 0.0 8.00 0.033 fat greek yogurt Fresh & Easy
290879 N60788003540 usda-ndb-import Ultra Thin Crust Pizza Southern Home southern-home 140 g (0.333 PIZZA) 808.0 8.57 0.021 20.71 2.14 0.7 8.57 0.364 thin crust pizza Southern Home
290956 N6078802558 usda-ndb-import Chunk Light Tuna In Water Southern Home southern-home 56 g (0.25 CUP | DRAINED, ANOUT) 372.0 0.89 0.036 0.00 0.00 0.0 19.64 0.321 chunk light tuna Southern Home
292097 N68826717758 usda-ndb-import Twisted Chocolate Jumbo Cinnamon Rolls With Icing Ahold ahold 99 g (1 ROLL WITH ICING) 1310.0 10.10 0.000 50.51 21.21 2.0 5.05 0.677 cinnamon rolls with Ahold
292898 N725493300360 usda-ndb-import Thousand Island Dressing Cha Ching cha-ching 30 ml (2 Tbsp) 1117.0 20.00 0.017 20.00 16.67 0.0 0.00 1.000 thousand island dressing Cha Ching
293817 N7501088109980 usda-ndb-import Pico De Gallo Seasoning Mix Terana terana 0.5 g (0.25 tsp) 0.0 0.00 NaN 0.00 NaN NaN 0.00 0.000 pico de gallo Terana

906 rows × 16 columns

Now going to save original product name and replace with a combination of that plus the brand


In [94]:
demo_df["orig_product_name"] = demo_df.product_name


/Users/seddont/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':

In [95]:
demo_df["new_product_name"] = demo_df.short_brand + " " + demo_df.product_name


/Users/seddont/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':

In [96]:
demo_df.product_name = demo_df.new_product_name


/Users/seddont/anaconda/lib/python3.5/site-packages/pandas/core/generic.py:2773: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[name] = value

Need to add blanks for the other columns which only exist in the hand-curated demo data.


In [97]:
demo_df["hos"] = 0
demo_df["image_url"] = None


/Users/seddont/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
/Users/seddont/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app

Now want to append the original demo data products and their hall of shame status so we always have them in too.


In [98]:
orig_demo = pd.read_csv(data_dir+"demo_food_data_latest.csv")

In [99]:
# Specify what columns we need to keep to match the regular dataframe above.

wanted_cols = ['code', 'creator', 'hos', 'image_url', 'product_name', 'brands', 'brands_tags', 'serving_size',
       'serving_size', 'energy_100g', 'fat_100g', 'cholesterol_100g',
       'carbohydrates_100g', 'sugars_100g', 'fiber_100g', 'proteins_100g', 'sodium_100g',
        'demo_cat']

# Create a list of columns to drop to check it worked ok
drop_cols = [c for c in orig_demo.columns if c not in wanted_cols]
print(drop_cols)


['generic_name', 'quantity', 'categories', 'categories_tags', 'starch_100g', 'salt_100g', 'alcohol_100g', 'folates_100g', 'bicarbonate_100g', 'potassium_100g', 'chloride_100g', 'calcium_100g', 'iron_100g', 'fluoride_100g', 'iodine_100g', 'caffeine_100g', 'cocoa_100g']

In [100]:
# Drop unwanted columns in orig demo
orig_demo.drop(drop_cols, axis = 1, inplace = True)
orig_demo


Out[100]:
code creator hos product_name image_url brands brands_tags serving_size energy_100g fat_100g cholesterol_100g carbohydrates_100g sugars_100g fiber_100g proteins_100g sodium_100g demo_cat
0 N713733903387 usda-ndb-import 1 Low Sodium Applause Crackers https://static.meijer.com/Media/007/13733/0071... Meijer meijer 16 g (16 g) 2092 28.12 0.000 62.50 6.25 0.0 6.25 0.188 cracker
1 N876274001858 usda-ndb-import 0 Snack Time, Stack Pack Salted Crackers NaN Global global 28 g (28 g) 2092 21.43 0.000 71.43 7.14 0.0 7.14 0.304 cracker
2 N888109050023 usda-ndb-import 1 Donettes, Mini Donuts, Frosted https://s-media-cache-ak0.pinimg.com/originals... Hostess hostess 85 g (3 oz) 2017 30.59 0.012 48.24 30.59 1.2 3.53 0.306 donut
3 N72030015712 usda-ndb-import 0 Frosted Donuts http://www.shugarysweets.com/wp-content/upload... Entenmann's entenmann-s 48 g (48 g) 2004 31.25 0.021 47.92 27.08 2.1 4.17 0.375 donut
4 N24300806247 usda-ndb-import 1 Mini Donuts http://bitemefoods.com.au/wp-content/uploads/2... Little Debbie, Mckee Foods Corporation little-debbie,mckee-foods-corporation 122 g (8 DONUTS) 1987 27.05 0.020 55.74 30.33 1.6 4.10 0.418 donut
5 N72030021645 usda-ndb-import 0 Pop'Ettes Powdered Donuts https://jpg.seasonskosher.com/content/images/t... Entenmann's, Bimbo Bakeries Usa Inc. entenmann-s,bimbo-bakeries-usa-inc 57 g (57 g) 1983 28.07 0.026 52.63 26.32 1.8 3.51 0.368 donut
6 N11150146098 usda-ndb-import 1 Kids Graham Crackers, Chocolate http://www.candywarehouse.com/assets/item/regu... Roundy's roundy-s 30 g (19 COOKIES) 1954 15.00 0.000 73.33 23.33 3.3 6.67 0.417 cracker
7 N76606720956 usda-ndb-import 0 Savory Snack Cracker, Celery & Onion http://www.baldersonvillagecheese.com/gourmet-... Medford Farms medford-farms 15 g (15 g) 1954 26.67 0.000 60.00 0.00 0.0 6.67 1.000 cracker
8 N76265185905 usda-ndb-import 0 Plain Cake Mini Donuts http://www.fridaycakenight.com/wp-content/uplo... Maple Donuts Inc maple-donuts-inc 45 g (2 DONUTS) 1858 28.89 0.011 40.00 11.11 2.2 2.22 0.511 donut
9 N41224710146 usda-ndb-import 0 Roland Water Crackers http://www.rolandfood.com/i/product_images/l/0... Roland roland 16 g (16 g) 1833 6.25 0.000 75.00 0.00 6.2 6.25 0.750 cracker
10 N78742129082 usda-ndb-import 0 Baked Buttery Crackers, Naturally Flavored NaN Great Value, Wal-Mart Stores Inc. great-value,wal-mart-stores-inc 16 g (5 CRACKERS) 1833 25.00 0.000 50.00 6.25 0.0 6.25 0.656 cracker
11 N11110008817 usda-ndb-import 0 Maple Glazed Cake Donuts NaN Kroger, The Kroger Co. kroger,the-kroger-co 71 g (1 DONUT) 1828 19.72 0.000 59.15 35.21 1.4 4.23 0.451 donut
12 N708820381599 usda-ndb-import 0 Woven Wheats, Baked Whole Wheat Crackers, Rose... NaN Meijer meijer 28 g (28 g) 1795 14.29 0.000 71.43 3.57 10.7 7.14 0.536 cracker
13 N888109111298 usda-ndb-import 0 Red Velvet Mini Donuts NaN Hostess hostess 53 g (3 DONUTS) 1736 20.75 0.009 52.83 30.19 1.9 3.77 0.255 donut
14 N42400109754 usda-ndb-import 0 Cocoa Dyno-Bites, Sweetened Rice Cereal With R... http://media.fooducate.com/products/images/180... Mom Brands mom-brands 29 g (0.75 cup) 1732 3.45 0.000 89.66 44.83 0.0 3.45 0.517 cereal
15 N51933150546 usda-ndb-import 0 Sweetened Puffed Wheat Cereal, Honey NaN Kiggins kiggins 27 g (0.75 cup) 1703 0.00 0.000 88.89 55.56 0.0 7.41 0.241 cereal
16 N35826097101 usda-ndb-import 0 Sweet & Crispy Graham Crackers NaN Food Lion, Food Town Stores Inc. food-lion,food-town-stores-inc 35 g (2 FULL CRACKER SHEETS) 1674 5.71 0.000 80.00 22.86 5.7 5.71 0.514 cracker
17 N42400060024 usda-ndb-import 0 Naturally & Artificially Flavored Sweetened Co... NaN Chocolatey Colossal Crunch chocolatey-colossal-crunch 30 g (0.75 cup) 1674 5.00 0.000 86.67 43.33 3.3 3.33 0.667 cereal
18 N42400236245 usda-ndb-import 0 Berry Bunch Crunch, Sweetened Corn & Oat Cereal https://www.postconsumerbrands.com/wp-content/... Mom Brands mom-brands 30 g (0.75 cup) 1674 3.33 0.000 86.67 46.67 3.3 3.33 0.467 cereal
19 N42400240815 usda-ndb-import 0 Honey Graham Cereal https://s-media-cache-ak0.pinimg.com/736x/63/e... Honey Graham Toasters honey-graham-toasters 30 g (0.75 cup) 1674 10.00 0.000 76.67 33.33 3.3 6.67 0.867 cereal
20 N980125260001 usda-ndb-import 0 Quinoa Hot Cereal Flakes NaN Ancient Harvest ancient-harvest 34 g (0.33 cup) 1611 5.88 0.000 67.65 5.88 7.1 12.65 0.006 cereal
21 N7750106000048 usda-ndb-import 0 San Jorge Water Crackers NaN Panaderia San Jorge S.A. panaderia-san-jorge-s-a 30 g (30 GRM) 1602 3.33 0.000 76.67 3.33 NaN 13.33 0.600 cracker
22 N11153041949 usda-ndb-import 1 Crunchy Oat Squares With Cinnamon Cereal https://www.dlmdriveup.com/img/CentralOrdering... Foodtown, Foodtown Inc. foodtown,foodtown-inc 32 g (0.75 cup) 1569 3.12 0.000 81.25 25.00 6.2 9.38 0.422 cereal
23 N21130281022 usda-ndb-import 0 Corn Flakes Cereal NaN Safeway Kitchens, Safeway Inc. safeway-kitchens,safeway-inc 28 g (1 cup) 1494 0.00 0.000 85.71 7.14 3.6 7.14 0.571 cereal
24 N76185003815 usda-ndb-import 0 Variety Pack Donut Holes NaN Arnie's arnie-s 71 g (71 g) 1473 16.90 0.028 43.66 18.31 1.4 5.63 0.493 donut
25 N5051379078669 usda-ndb-import 0 Baked Donuts NaN Fresh & Easy fresh-easy 57 g (1 DONUT) 1469 14.04 0.070 56.14 36.84 0.0 7.02 0.140 donut
26 N58449620020 usda-ndb-import 0 Eco Pac, Cereal, Millet Puffs NaN Nature's Path nature-s-path 16 g (1 cup) 1305 0.00 0.000 87.50 0.00 6.2 12.50 0.000 cereal

In [101]:
# Drop unwanted columns in demo_df
# Create a list of columns to drop to check it worked ok
drop_cols = [c for c in demo_df.columns if c not in wanted_cols]
print(drop_cols)
demo_df.drop(drop_cols, axis = 1, inplace = True)
demo_df


['short_brand', 'orig_product_name', 'new_product_name']
/Users/seddont/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:5: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
Out[101]:
code creator product_name brands brands_tags serving_size energy_100g fat_100g cholesterol_100g carbohydrates_100g sugars_100g fiber_100g proteins_100g sodium_100g demo_cat hos image_url
417 N901000017 usda-ndb-import Guiltless Gourmet Yellow Corn Tortilla Chips Guiltless Gourmet guiltless-gourmet 28 g (1 oz) 1795.0 10.71 0.000 78.57 0.00 7.1 7.14 0.643 yellow corn tortilla 0 None
1366 N98601 usda-ndb-import Trader Joe's Chicken Noodle Soup Trader Joe's trader-joe-s 248 g (1 cup) 151.0 0.40 0.008 5.65 0.40 0.4 2.42 0.294 chicken noodle soup 0 None
2267 N11110197900 usda-ndb-import Bakery Fresh Goodness Triple Layer Red Velvet ... Bakery Fresh Goodness, The Kroger Co. bakery-fresh-goodness,the-kroger-co 85 g (1 SLICE) 1971.0 17.65 0.024 77.65 65.88 0.0 1.18 0.353 red velvet cake 0 None
2669 N11110577276 usda-ndb-import The Kroger Co. Ice Cream Sandwich The Kroger Co. the-kroger-co 63 g (1 SANDWICH) 1130.0 9.52 0.032 44.44 20.63 1.6 4.76 0.238 ice cream sandwich 0 None
3172 N11110791559 usda-ndb-import Simple Truth Organic Crunchy Peanut Butter Simple Truth Organic simple-truth-organic 32 g (2 Tbsp) 2615.0 50.00 0.000 18.75 6.25 9.4 25.00 0.375 crunchy peanut butter 0 None
3187 N11110796486 usda-ndb-import Kroger Kettle Cooked Potato Chips, Sea Salt & ... Kroger kroger 28 g (1 oz) 2243.0 32.14 0.000 53.57 0.00 3.6 7.14 0.393 cooked potato chips 0 None
3427 N11110819376 usda-ndb-import Kroger Dry Roasted Peanuts Kroger kroger 28 g (1 oz) 2389.0 46.43 0.000 21.43 3.57 7.1 21.43 0.607 dry roasted peanuts 0 None
3638 N11110836281 usda-ndb-import P$$T... Big Savings... Pass It On 53% Vegetabl... P$$T... Big Savings... Pass It On p-t-big-savings-pass-it-on 14 g (1 Tbsp) 2092.0 50.00 0.000 0.00 0.00 0.0 0.00 0.643 vegetable oil spread 0 None
3654 N11110837158 usda-ndb-import Kroger 98% Fat Free Cream Of Mushroom Condense... Kroger kroger 125 g (0.5 cup) 268.0 2.00 0.004 8.80 1.60 0.0 1.60 0.640 mushroom condensed soup 0 None
3701 N11110839305 usda-ndb-import Kroger Chili With Beans Kroger kroger 250 g (1 cup) 502.0 4.00 0.010 14.00 2.00 4.4 6.80 0.496 chili with beans 0 None
3705 N11110839510 usda-ndb-import Kroger Corned Beef Hash Kroger kroger 236 g (1 cup) 707.0 10.17 0.030 8.90 0.42 0.8 9.32 0.390 corned beef hash 0 None
3926 N11110852748 usda-ndb-import Kroger Instant Mashed Potatoes Kroger kroger 19 g (0.33 cup) 1540.0 0.00 0.000 78.95 0.00 5.3 10.53 0.079 instant mashed potatoes 0 None
4151 N11110863997 usda-ndb-import Kroger Cream Cheese Spread, Whipped Strawberry Kroger, The Kroger Co. kroger,the-kroger-co 22 g (2 Tbsp) 1331.0 22.73 0.068 18.18 13.64 0.0 4.55 0.227 cream cheese spread 0 None
4664 N11110888082 usda-ndb-import The Kroger Co. Chunk Light Tuna In Oil The Kroger Co. the-kroger-co 56 g (2 OZ DRAINED) 523.0 4.46 0.045 0.00 0.00 0.0 19.64 0.321 chunk light tuna 0 None
5052 N11115224274 usda-ndb-import Unilever Imperial, 30% Vegetable Oil Spread Unilever unilever 14 g (1 Tbsp) 1197.0 28.57 NaN 0.00 NaN NaN 0.00 0.643 vegetable oil spread 0 None
5061 N11115621240 usda-ndb-import Unilever Promise, Buttery, 60% Vegetable Oil S... Unilever unilever 14 g (1 Tbsp) 2389.0 57.14 0.000 0.00 NaN NaN 0.00 0.607 vegetable oil spread 0 None
5113 N11150001298 usda-ndb-import Roundy's Chunk White Albacore Tuna In Water Roundy's roundy-s 56 g (0.25 cup) 523.0 0.89 0.045 0.00 0.00 0.0 25.00 0.464 chunk white albacore 0 None
5252 N11150040419 usda-ndb-import Simply Roundy's Organic Yellow Corn Tortilla C... Simply Roundy's simply-roundy-s 28 g (1 oz) 2092.0 25.00 0.000 57.14 0.00 7.1 7.14 0.571 yellow corn tortilla 0 None
5271 N11150041430 usda-ndb-import Roundy's Potato Chips, Cheddar & Sour Cream Roundy's roundy-s 57 g (1 PACKAGE) 2201.0 31.58 0.000 54.39 1.75 3.5 7.02 0.596 potato chips cheddar 0 None
5500 N11150100205 usda-ndb-import Roundy's Cream Of Chicken Condensed Soup Roundy's roundy-s 125 g (0.5 cup) 268.0 2.00 0.004 9.60 0.80 0.8 1.60 0.640 chicken condensed soup 0 None
5504 N11150100304 usda-ndb-import Roundy's Cream Of Mushroom Condensed Soup Roundy's roundy-s 125 g (0.5 cup) 234.0 2.00 0.008 7.20 0.00 1.6 2.40 0.360 mushroom condensed soup 0 None
5732 N11150183482 usda-ndb-import Roundy's Instant Mashed Potatoes Roundy's roundy-s 19 g (0.33 cup) 1540.0 0.00 0.000 78.95 0.00 5.3 10.53 0.079 instant mashed potatoes 0 None
5733 N11150183499 usda-ndb-import Roundy's Instant Mashed Potatoes Roundy's roundy-s 19 g (0.33 cup) 1540.0 0.00 0.000 78.95 0.00 5.3 10.53 0.079 instant mashed potatoes 0 None
6111 N11150538008 usda-ndb-import Roundy's Hot Italian Sausage Roundy's roundy-s 70 g (1 GRILLED LINK) 1377.0 25.71 0.086 4.29 4.29 0.0 20.00 1.043 hot italian sausage 0 None
6113 N11150538022 usda-ndb-import Roundy's Sweet Italian Sausage Roundy's roundy-s 70 g (1 GRILLED LINK) 1377.0 25.71 0.086 2.86 2.86 0.0 20.00 0.871 sweet italian sausage 0 None
6164 N11150550086 usda-ndb-import Roundy's Curly French Fried Potatoes Roundy's roundy-s 85 g (3 ONZ) 787.0 9.41 0.000 24.71 0.00 2.4 2.35 0.435 french fried potatoes 0 None
6247 N11150589147 usda-ndb-import Roundy's Ice Cream Bars, Orange Roundy's roundy-s 54 g (54 g) 699.0 5.56 0.019 27.78 18.52 0.0 1.85 0.065 ice cream bars 0 None
6254 N11150590037 usda-ndb-import Roundy's Ice Cream Bars Roundy's roundy-s 77 g (77 g) 1795.0 29.87 0.026 36.36 31.17 2.6 3.90 0.117 ice cream bars 0 None
6928 N11161030577 usda-ndb-import Shurfine Baked Snack Crackers Shurfine, Topco Associates Inc. shurfine,topco-associates-inc 30 g (40 CRACKERS) 1674.0 15.00 0.000 56.67 3.33 0.0 6.67 0.733 baked snack crackers 0 None
6934 N11161031352 usda-ndb-import Shurfine Classic Potato Chips Shurfine shurfine 28 g (17 CHIPS) 2389.0 35.71 0.000 57.14 0.00 3.6 7.14 0.643 classic potato chips 0 None
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
169098 N869955000011 usda-ndb-import Daily Bread Chicken Noodle Soup Daily Bread daily-bread 53 g (1 CUP PREPARED) | (MAKES) 1577.0 3.77 0.028 66.04 1.89 1.9 18.87 1.717 chicken noodle soup 0 None
169233 N872181000069 usda-ndb-import Flipz Milk Chocolate Covered Pretzels Flipz flipz 28 g (1 oz) 1941.0 17.86 0.000 71.43 39.29 3.6 7.14 0.500 chocolate covered pretzels 0 None
169598 N875343000099 usda-ndb-import Einstein Noah Restaurant Group Inc. Cream Che... Einstein Noah Restaurant Group Inc. einstein-noah-restaurant-group-inc 20 g (2 Tbsp) 1255.0 25.00 0.075 15.00 5.00 0.0 5.00 0.500 cream cheese spread 0 None
169611 N875386000667 usda-ndb-import Tempt Dairy-Free Greek Style Yogurt, Raspberry Tempt tempt 150 g (1 CONTAINER) 364.0 2.67 0.000 10.00 6.00 2.0 5.33 0.017 greek style yogurt 0 None
169800 N876314002340 usda-ndb-import Zuppa Rustica Tomato & Basil Soup With Fresh C... Zuppa Rustica zuppa-rustica 245 g (1 cup) 255.0 1.84 0.000 9.80 4.49 2.4 1.63 0.216 tomato basil soup 0 None
170020 N877245006056 usda-ndb-import Dutch Valley Food Distributors Inc. Dutch Val... Dutch Valley Food Distributors Inc. dutch-valley-food-distributors-inc 40 g (40 GRM) 2301.0 37.50 NaN 50.00 37.50 NaN 10.00 0.000 dark chocolate almonds 0 None
170688 N883967381319 usda-ndb-import Smart Sense Creme Sandwich Cookies, Peanut Butter Smart Sense smart-sense 25 g (25 g) 2176.0 24.00 0.000 64.00 20.00 4.0 8.00 0.340 cookies peanut butter 0 None
170750 N883967392087 usda-ndb-import Smart Sense Macaroni & Cheese Dinner Smart Sense smart-sense 58 g (1 PKG) 1586.0 6.03 0.000 70.69 8.62 1.7 10.34 0.810 macaroni cheese dinner 0 None
171181 N886002506517 usda-ndb-import Mrs. Fields Milk Chocolate Chip Soft Baked Coo... Mrs. Fields mrs-fields 28 g (1 oz) 1941.0 17.86 0.036 67.86 39.29 3.6 3.57 0.411 soft baked cookies 0 None
171313 N886926014211 usda-ndb-import Meijer Bistro Style, Bistro Style Spicy Thai-S... Meijer meijer 245 g (1 cup) 172.0 0.41 0.006 6.12 0.00 0.4 2.86 0.282 chicken noodle soup 0 None
171447 N886926296648 usda-ndb-import Meijer Kettle Cooked Crinkle Cut Potato Chips,... Meijer meijer 28 g (1 oz) 2092.0 28.57 0.000 60.71 7.14 3.6 7.14 0.679 potato chips cheddar 0 None
171854 N888670019573 usda-ndb-import Wellsley Farms Albacore Tuna In Water Wellsley Farms wellsley-farms 56 g (0.25 CUP | ABOUT) 448.0 0.00 0.045 0.00 0.00 0.0 23.21 0.250 albacore tuna in 0 None
171915 N888670025505 usda-ndb-import Wellsley Farms Italian Style Meatballs Wellsley Farms, Bj's Wholesale Club / Corpora... wellsley-farms,bj-s-wholesale-club-corporate-b... 88 g (5 MEATBALLS) 1142.0 19.32 0.040 6.82 2.27 1.1 17.05 0.545 italian style meatballs 0 None
171946 N888670029633 usda-ndb-import Wellsley Farms Organic Thin Crust Pizza Wellsley Farms wellsley-farms 112 g (0.33 PIZZA) 971.0 8.04 0.022 27.68 0.89 1.8 10.71 0.455 thin crust pizza 0 None
172760 N891991007082 usda-ndb-import Waffle Lady Inc. Waffle Lady, Organic Gingerbr... Waffle Lady Inc. waffle-lady-inc 46 g (8 THICK BELGIAN STYLE WAFFLES | PANCAKES... 1456.0 0.00 0.000 80.43 26.09 2.2 6.52 0.696 pancake waffle mix 0 None
173109 N893913001840 usda-ndb-import Rod Rotondi Enterprises Llc Leaf Cuisine, Not ... Rod Rotondi Enterprises Llc rod-rotondi-enterprises-llc 28 g (1 ONZ) 895.0 16.07 0.000 14.29 3.57 3.6 7.14 0.500 cream cheese spread 0 None
173808 N896767001219 usda-ndb-import Luna Larry's, Coconut Bliss, Organic Non-Dairy... Luna, Luna & Larry's luna,luna-larry-s 95 g (0.5 cup) 1059.0 17.89 0.000 21.05 13.68 2.1 4.21 0.042 dairy frozen dessert 0 None
173854 N896859000588 usda-ndb-import Organic Ville Organic Non Dairy Thousand Islan... Organic Ville, Sky Valley Foods Inc. organic-ville,sky-valley-foods-inc 30 ml (2 Tbsp) 1393.0 33.33 0.000 13.33 10.00 0.0 0.00 0.467 thousand island dressing 0 None
174070 N897552001469 usda-ndb-import Full Flavor Foods Chicken Gravy Mix Full Flavor Foods full-flavor-foods 6 g (1 Tbsp) 1745.0 0.00 0.000 100.00 16.67 0.0 16.67 4.500 chicken gravy mix 0 None
174431 N898575001368 usda-ndb-import Madecasse Sea Salt & Nibs Dark Chocolate Bar Madecasse madecasse 75 g (2.64 oz) 1226.0 20.00 0.000 24.00 18.67 1.3 4.00 0.020 dark chocolate bar 0 None
174839 N939662096321 usda-ndb-import Nissin Bowl Noodles, Ramen Noodle Soup With Sh... Nissin, Nissin Foods (Usa) Co. Inc. nissin,nissin-foods-usa-co-inc 46 g (46 g) 1912.0 19.57 0.011 60.87 6.52 4.3 10.87 1.913 ramen noodle soup 0 None
177933 N20032319 usda-ndb-import Fresh & Easy Non Fat Greek Yogurt Fresh & Easy fresh-easy 170 g (6 ONZ) 343.0 0.00 0.000 11.76 10.59 0.0 8.24 0.041 fat greek yogurt 0 None
270315 N4178900121 usda-ndb-import Maruchan Instant Lunch, Ramen Noodles With Veg... Maruchan, Maruchan Inc. maruchan,maruchan-inc 64 g (1 CONTAINER, PER CONTAINER) 1895.0 18.75 0.000 60.94 3.12 3.1 10.94 1.859 noodles with vegetables 0 None
280147 N5051379022655 usda-ndb-import Fresh & Easy Ice Cream Sandwiches Fresh & Easy fresh-easy 60 g (1 SANDWICH) 1117.0 8.33 0.025 41.67 21.67 1.7 5.00 0.167 ice cream sandwiches 0 None
280344 N5051379079499 usda-ndb-import Fresh & Easy Non Fat Greek Yogurt Fresh & Easy fresh-easy 150 g (1 CONTAINER) 335.0 0.00 0.003 12.00 10.00 0.0 8.00 0.033 fat greek yogurt 0 None
290879 N60788003540 usda-ndb-import Southern Home Ultra Thin Crust Pizza Southern Home southern-home 140 g (0.333 PIZZA) 808.0 8.57 0.021 20.71 2.14 0.7 8.57 0.364 thin crust pizza 0 None
290956 N6078802558 usda-ndb-import Southern Home Chunk Light Tuna In Water Southern Home southern-home 56 g (0.25 CUP | DRAINED, ANOUT) 372.0 0.89 0.036 0.00 0.00 0.0 19.64 0.321 chunk light tuna 0 None
292097 N68826717758 usda-ndb-import Ahold Twisted Chocolate Jumbo Cinnamon Rolls W... Ahold ahold 99 g (1 ROLL WITH ICING) 1310.0 10.10 0.000 50.51 21.21 2.0 5.05 0.677 cinnamon rolls with 0 None
292898 N725493300360 usda-ndb-import Cha Ching Thousand Island Dressing Cha Ching cha-ching 30 ml (2 Tbsp) 1117.0 20.00 0.017 20.00 16.67 0.0 0.00 1.000 thousand island dressing 0 None
293817 N7501088109980 usda-ndb-import Terana Pico De Gallo Seasoning Mix Terana terana 0.5 g (0.25 tsp) 0.0 0.00 NaN 0.00 NaN NaN 0.00 0.000 pico de gallo 0 None

906 rows × 17 columns


In [102]:
missing_cols = [col for col in demo_df.columns if col not in orig_demo.columns]
print("Missing columns", missing_cols)


Missing columns []

Now want to append the original demo data to the other data


In [103]:
finished = demo_df.append(orig_demo)
finished


Out[103]:
brands brands_tags carbohydrates_100g cholesterol_100g code creator demo_cat energy_100g fat_100g fiber_100g hos image_url product_name proteins_100g serving_size sodium_100g sugars_100g
417 Guiltless Gourmet guiltless-gourmet 78.57 0.000 N901000017 usda-ndb-import yellow corn tortilla 1795.0 10.71 7.1 0 None Guiltless Gourmet Yellow Corn Tortilla Chips 7.14 28 g (1 oz) 0.643 0.00
1366 Trader Joe's trader-joe-s 5.65 0.008 N98601 usda-ndb-import chicken noodle soup 151.0 0.40 0.4 0 None Trader Joe's Chicken Noodle Soup 2.42 248 g (1 cup) 0.294 0.40
2267 Bakery Fresh Goodness, The Kroger Co. bakery-fresh-goodness,the-kroger-co 77.65 0.024 N11110197900 usda-ndb-import red velvet cake 1971.0 17.65 0.0 0 None Bakery Fresh Goodness Triple Layer Red Velvet ... 1.18 85 g (1 SLICE) 0.353 65.88
2669 The Kroger Co. the-kroger-co 44.44 0.032 N11110577276 usda-ndb-import ice cream sandwich 1130.0 9.52 1.6 0 None The Kroger Co. Ice Cream Sandwich 4.76 63 g (1 SANDWICH) 0.238 20.63
3172 Simple Truth Organic simple-truth-organic 18.75 0.000 N11110791559 usda-ndb-import crunchy peanut butter 2615.0 50.00 9.4 0 None Simple Truth Organic Crunchy Peanut Butter 25.00 32 g (2 Tbsp) 0.375 6.25
3187 Kroger kroger 53.57 0.000 N11110796486 usda-ndb-import cooked potato chips 2243.0 32.14 3.6 0 None Kroger Kettle Cooked Potato Chips, Sea Salt & ... 7.14 28 g (1 oz) 0.393 0.00
3427 Kroger kroger 21.43 0.000 N11110819376 usda-ndb-import dry roasted peanuts 2389.0 46.43 7.1 0 None Kroger Dry Roasted Peanuts 21.43 28 g (1 oz) 0.607 3.57
3638 P$$T... Big Savings... Pass It On p-t-big-savings-pass-it-on 0.00 0.000 N11110836281 usda-ndb-import vegetable oil spread 2092.0 50.00 0.0 0 None P$$T... Big Savings... Pass It On 53% Vegetabl... 0.00 14 g (1 Tbsp) 0.643 0.00
3654 Kroger kroger 8.80 0.004 N11110837158 usda-ndb-import mushroom condensed soup 268.0 2.00 0.0 0 None Kroger 98% Fat Free Cream Of Mushroom Condense... 1.60 125 g (0.5 cup) 0.640 1.60
3701 Kroger kroger 14.00 0.010 N11110839305 usda-ndb-import chili with beans 502.0 4.00 4.4 0 None Kroger Chili With Beans 6.80 250 g (1 cup) 0.496 2.00
3705 Kroger kroger 8.90 0.030 N11110839510 usda-ndb-import corned beef hash 707.0 10.17 0.8 0 None Kroger Corned Beef Hash 9.32 236 g (1 cup) 0.390 0.42
3926 Kroger kroger 78.95 0.000 N11110852748 usda-ndb-import instant mashed potatoes 1540.0 0.00 5.3 0 None Kroger Instant Mashed Potatoes 10.53 19 g (0.33 cup) 0.079 0.00
4151 Kroger, The Kroger Co. kroger,the-kroger-co 18.18 0.068 N11110863997 usda-ndb-import cream cheese spread 1331.0 22.73 0.0 0 None Kroger Cream Cheese Spread, Whipped Strawberry 4.55 22 g (2 Tbsp) 0.227 13.64
4664 The Kroger Co. the-kroger-co 0.00 0.045 N11110888082 usda-ndb-import chunk light tuna 523.0 4.46 0.0 0 None The Kroger Co. Chunk Light Tuna In Oil 19.64 56 g (2 OZ DRAINED) 0.321 0.00
5052 Unilever unilever 0.00 NaN N11115224274 usda-ndb-import vegetable oil spread 1197.0 28.57 NaN 0 None Unilever Imperial, 30% Vegetable Oil Spread 0.00 14 g (1 Tbsp) 0.643 NaN
5061 Unilever unilever 0.00 0.000 N11115621240 usda-ndb-import vegetable oil spread 2389.0 57.14 NaN 0 None Unilever Promise, Buttery, 60% Vegetable Oil S... 0.00 14 g (1 Tbsp) 0.607 NaN
5113 Roundy's roundy-s 0.00 0.045 N11150001298 usda-ndb-import chunk white albacore 523.0 0.89 0.0 0 None Roundy's Chunk White Albacore Tuna In Water 25.00 56 g (0.25 cup) 0.464 0.00
5252 Simply Roundy's simply-roundy-s 57.14 0.000 N11150040419 usda-ndb-import yellow corn tortilla 2092.0 25.00 7.1 0 None Simply Roundy's Organic Yellow Corn Tortilla C... 7.14 28 g (1 oz) 0.571 0.00
5271 Roundy's roundy-s 54.39 0.000 N11150041430 usda-ndb-import potato chips cheddar 2201.0 31.58 3.5 0 None Roundy's Potato Chips, Cheddar & Sour Cream 7.02 57 g (1 PACKAGE) 0.596 1.75
5500 Roundy's roundy-s 9.60 0.004 N11150100205 usda-ndb-import chicken condensed soup 268.0 2.00 0.8 0 None Roundy's Cream Of Chicken Condensed Soup 1.60 125 g (0.5 cup) 0.640 0.80
5504 Roundy's roundy-s 7.20 0.008 N11150100304 usda-ndb-import mushroom condensed soup 234.0 2.00 1.6 0 None Roundy's Cream Of Mushroom Condensed Soup 2.40 125 g (0.5 cup) 0.360 0.00
5732 Roundy's roundy-s 78.95 0.000 N11150183482 usda-ndb-import instant mashed potatoes 1540.0 0.00 5.3 0 None Roundy's Instant Mashed Potatoes 10.53 19 g (0.33 cup) 0.079 0.00
5733 Roundy's roundy-s 78.95 0.000 N11150183499 usda-ndb-import instant mashed potatoes 1540.0 0.00 5.3 0 None Roundy's Instant Mashed Potatoes 10.53 19 g (0.33 cup) 0.079 0.00
6111 Roundy's roundy-s 4.29 0.086 N11150538008 usda-ndb-import hot italian sausage 1377.0 25.71 0.0 0 None Roundy's Hot Italian Sausage 20.00 70 g (1 GRILLED LINK) 1.043 4.29
6113 Roundy's roundy-s 2.86 0.086 N11150538022 usda-ndb-import sweet italian sausage 1377.0 25.71 0.0 0 None Roundy's Sweet Italian Sausage 20.00 70 g (1 GRILLED LINK) 0.871 2.86
6164 Roundy's roundy-s 24.71 0.000 N11150550086 usda-ndb-import french fried potatoes 787.0 9.41 2.4 0 None Roundy's Curly French Fried Potatoes 2.35 85 g (3 ONZ) 0.435 0.00
6247 Roundy's roundy-s 27.78 0.019 N11150589147 usda-ndb-import ice cream bars 699.0 5.56 0.0 0 None Roundy's Ice Cream Bars, Orange 1.85 54 g (54 g) 0.065 18.52
6254 Roundy's roundy-s 36.36 0.026 N11150590037 usda-ndb-import ice cream bars 1795.0 29.87 2.6 0 None Roundy's Ice Cream Bars 3.90 77 g (77 g) 0.117 31.17
6928 Shurfine, Topco Associates Inc. shurfine,topco-associates-inc 56.67 0.000 N11161030577 usda-ndb-import baked snack crackers 1674.0 15.00 0.0 0 None Shurfine Baked Snack Crackers 6.67 30 g (40 CRACKERS) 0.733 3.33
6934 Shurfine shurfine 57.14 0.000 N11161031352 usda-ndb-import classic potato chips 2389.0 35.71 3.6 0 None Shurfine Classic Potato Chips 7.14 28 g (17 CHIPS) 0.643 0.00
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
292097 Ahold ahold 50.51 0.000 N68826717758 usda-ndb-import cinnamon rolls with 1310.0 10.10 2.0 0 None Ahold Twisted Chocolate Jumbo Cinnamon Rolls W... 5.05 99 g (1 ROLL WITH ICING) 0.677 21.21
292898 Cha Ching cha-ching 20.00 0.017 N725493300360 usda-ndb-import thousand island dressing 1117.0 20.00 0.0 0 None Cha Ching Thousand Island Dressing 0.00 30 ml (2 Tbsp) 1.000 16.67
293817 Terana terana 0.00 NaN N7501088109980 usda-ndb-import pico de gallo 0.0 0.00 NaN 0 None Terana Pico De Gallo Seasoning Mix 0.00 0.5 g (0.25 tsp) 0.000 NaN
0 Meijer meijer 62.50 0.000 N713733903387 usda-ndb-import cracker 2092.0 28.12 0.0 1 https://static.meijer.com/Media/007/13733/0071... Low Sodium Applause Crackers 6.25 16 g (16 g) 0.188 6.25
1 Global global 71.43 0.000 N876274001858 usda-ndb-import cracker 2092.0 21.43 0.0 0 NaN Snack Time, Stack Pack Salted Crackers 7.14 28 g (28 g) 0.304 7.14
2 Hostess hostess 48.24 0.012 N888109050023 usda-ndb-import donut 2017.0 30.59 1.2 1 https://s-media-cache-ak0.pinimg.com/originals... Donettes, Mini Donuts, Frosted 3.53 85 g (3 oz) 0.306 30.59
3 Entenmann's entenmann-s 47.92 0.021 N72030015712 usda-ndb-import donut 2004.0 31.25 2.1 0 http://www.shugarysweets.com/wp-content/upload... Frosted Donuts 4.17 48 g (48 g) 0.375 27.08
4 Little Debbie, Mckee Foods Corporation little-debbie,mckee-foods-corporation 55.74 0.020 N24300806247 usda-ndb-import donut 1987.0 27.05 1.6 1 http://bitemefoods.com.au/wp-content/uploads/2... Mini Donuts 4.10 122 g (8 DONUTS) 0.418 30.33
5 Entenmann's, Bimbo Bakeries Usa Inc. entenmann-s,bimbo-bakeries-usa-inc 52.63 0.026 N72030021645 usda-ndb-import donut 1983.0 28.07 1.8 0 https://jpg.seasonskosher.com/content/images/t... Pop'Ettes Powdered Donuts 3.51 57 g (57 g) 0.368 26.32
6 Roundy's roundy-s 73.33 0.000 N11150146098 usda-ndb-import cracker 1954.0 15.00 3.3 1 http://www.candywarehouse.com/assets/item/regu... Kids Graham Crackers, Chocolate 6.67 30 g (19 COOKIES) 0.417 23.33
7 Medford Farms medford-farms 60.00 0.000 N76606720956 usda-ndb-import cracker 1954.0 26.67 0.0 0 http://www.baldersonvillagecheese.com/gourmet-... Savory Snack Cracker, Celery & Onion 6.67 15 g (15 g) 1.000 0.00
8 Maple Donuts Inc maple-donuts-inc 40.00 0.011 N76265185905 usda-ndb-import donut 1858.0 28.89 2.2 0 http://www.fridaycakenight.com/wp-content/uplo... Plain Cake Mini Donuts 2.22 45 g (2 DONUTS) 0.511 11.11
9 Roland roland 75.00 0.000 N41224710146 usda-ndb-import cracker 1833.0 6.25 6.2 0 http://www.rolandfood.com/i/product_images/l/0... Roland Water Crackers 6.25 16 g (16 g) 0.750 0.00
10 Great Value, Wal-Mart Stores Inc. great-value,wal-mart-stores-inc 50.00 0.000 N78742129082 usda-ndb-import cracker 1833.0 25.00 0.0 0 NaN Baked Buttery Crackers, Naturally Flavored 6.25 16 g (5 CRACKERS) 0.656 6.25
11 Kroger, The Kroger Co. kroger,the-kroger-co 59.15 0.000 N11110008817 usda-ndb-import donut 1828.0 19.72 1.4 0 NaN Maple Glazed Cake Donuts 4.23 71 g (1 DONUT) 0.451 35.21
12 Meijer meijer 71.43 0.000 N708820381599 usda-ndb-import cracker 1795.0 14.29 10.7 0 NaN Woven Wheats, Baked Whole Wheat Crackers, Rose... 7.14 28 g (28 g) 0.536 3.57
13 Hostess hostess 52.83 0.009 N888109111298 usda-ndb-import donut 1736.0 20.75 1.9 0 NaN Red Velvet Mini Donuts 3.77 53 g (3 DONUTS) 0.255 30.19
14 Mom Brands mom-brands 89.66 0.000 N42400109754 usda-ndb-import cereal 1732.0 3.45 0.0 0 http://media.fooducate.com/products/images/180... Cocoa Dyno-Bites, Sweetened Rice Cereal With R... 3.45 29 g (0.75 cup) 0.517 44.83
15 Kiggins kiggins 88.89 0.000 N51933150546 usda-ndb-import cereal 1703.0 0.00 0.0 0 NaN Sweetened Puffed Wheat Cereal, Honey 7.41 27 g (0.75 cup) 0.241 55.56
16 Food Lion, Food Town Stores Inc. food-lion,food-town-stores-inc 80.00 0.000 N35826097101 usda-ndb-import cracker 1674.0 5.71 5.7 0 NaN Sweet & Crispy Graham Crackers 5.71 35 g (2 FULL CRACKER SHEETS) 0.514 22.86
17 Chocolatey Colossal Crunch chocolatey-colossal-crunch 86.67 0.000 N42400060024 usda-ndb-import cereal 1674.0 5.00 3.3 0 NaN Naturally & Artificially Flavored Sweetened Co... 3.33 30 g (0.75 cup) 0.667 43.33
18 Mom Brands mom-brands 86.67 0.000 N42400236245 usda-ndb-import cereal 1674.0 3.33 3.3 0 https://www.postconsumerbrands.com/wp-content/... Berry Bunch Crunch, Sweetened Corn & Oat Cereal 3.33 30 g (0.75 cup) 0.467 46.67
19 Honey Graham Toasters honey-graham-toasters 76.67 0.000 N42400240815 usda-ndb-import cereal 1674.0 10.00 3.3 0 https://s-media-cache-ak0.pinimg.com/736x/63/e... Honey Graham Cereal 6.67 30 g (0.75 cup) 0.867 33.33
20 Ancient Harvest ancient-harvest 67.65 0.000 N980125260001 usda-ndb-import cereal 1611.0 5.88 7.1 0 NaN Quinoa Hot Cereal Flakes 12.65 34 g (0.33 cup) 0.006 5.88
21 Panaderia San Jorge S.A. panaderia-san-jorge-s-a 76.67 0.000 N7750106000048 usda-ndb-import cracker 1602.0 3.33 NaN 0 NaN San Jorge Water Crackers 13.33 30 g (30 GRM) 0.600 3.33
22 Foodtown, Foodtown Inc. foodtown,foodtown-inc 81.25 0.000 N11153041949 usda-ndb-import cereal 1569.0 3.12 6.2 1 https://www.dlmdriveup.com/img/CentralOrdering... Crunchy Oat Squares With Cinnamon Cereal 9.38 32 g (0.75 cup) 0.422 25.00
23 Safeway Kitchens, Safeway Inc. safeway-kitchens,safeway-inc 85.71 0.000 N21130281022 usda-ndb-import cereal 1494.0 0.00 3.6 0 NaN Corn Flakes Cereal 7.14 28 g (1 cup) 0.571 7.14
24 Arnie's arnie-s 43.66 0.028 N76185003815 usda-ndb-import donut 1473.0 16.90 1.4 0 NaN Variety Pack Donut Holes 5.63 71 g (71 g) 0.493 18.31
25 Fresh & Easy fresh-easy 56.14 0.070 N5051379078669 usda-ndb-import donut 1469.0 14.04 0.0 0 NaN Baked Donuts 7.02 57 g (1 DONUT) 0.140 36.84
26 Nature's Path nature-s-path 87.50 0.000 N58449620020 usda-ndb-import cereal 1305.0 0.00 6.2 0 NaN Eco Pac, Cereal, Millet Puffs 12.50 16 g (1 cup) 0.000 0.00

933 rows × 17 columns

Can now write this out to file.


In [104]:
# finished.to_csv(code_dir+"demo_food_data_final.csv", index = False)
finished.to_csv(code_dir+"demo_food_data_final2.csv", index = False)