In [16]:
import requests as r
import json
from sklearn.feature_extraction.text import CountVectorizer
from scipy.spatial.distance import cosine, euclidean, jaccard

In [17]:
api_key = 'DEMO_KEY'

In [18]:
def get_product_list(q = ''):
#   https://api.nal.usda.gov/ndb/search/?format=json&q=butter&sort=n&max=25&offset=0&api_key=DEMO_KEY
    url_root = 'https://api.nal.usda.gov/ndb/search/?q={query}&format={format}&sort={sort}&max={records_limit}&&offset={offset}&api_key={api_key}'
    params = { 'format': 'json', 'offset': 0, 'records_limit': 1000, 'sort': 'n', 'api_key': api_key, 'query': q }
    resp = r.get(url_root.format_map(params))
    data = json.loads(resp._content)
    result = list([(x['ndbno'], x['name']) for x in data['list']['item']])
    return result

In [21]:
lst = get_product_list('carrots raw')
# print(len(lst))
for item in lst:
    print(item)
txt_data = [x[1] for x in lst]


('45187288', 'BEAGLE BAY ORGANICS, RAW SAUERKRAUT, CARROT & DILL, UPC: 654367229521')
('11960', 'Carrots, baby, raw')
('11124', 'Carrots, raw')
('45161981', 'JUISI, RAW & COLD PRESSED JUICE, CARROT ROOT, UPC: 855917004104')
('45154209', 'WONDERFULLY RAW, DIPPERZ, CHEEZY BROCCOLI, SUNFLOWER & CARROT CRUNCHERS, UPC: 850370005194')

In [26]:
txt_data
vectorizer = CountVectorizer(min_df=1)
X = vectorizer.fit_transform(txt_data)
req_X = vectorizer.transform(['carrots raw']).toarray()
for idx, item in enumerate(X):
    lst[idx] = (lst[idx][0], lst[idx][1], jaccard(item.toarray(), req_X)) 
#     print(cosine(item.toarray(), req_X), euclidean(item.toarray(), req_X), jaccard(item.toarray(), req_X))
for item in lst:
    print(item)
min(lst, key = lambda x: x[2])


('45187288', 'BEAGLE BAY ORGANICS, RAW SAUERKRAUT, CARROT & DILL, UPC: 654367229521', 0.90000000000000002)
('11960', 'Carrots, baby, raw', 0.33333333333333331)
('11124', 'Carrots, raw', 0.0)
('45161981', 'JUISI, RAW & COLD PRESSED JUICE, CARROT ROOT, UPC: 855917004104', 0.90000000000000002)
('45154209', 'WONDERFULLY RAW, DIPPERZ, CHEEZY BROCCOLI, SUNFLOWER & CARROT CRUNCHERS, UPC: 850370005194', 0.90909090909090906)
Out[26]:
('11124', 'Carrots, raw', 0.0)

In [84]:
def get_product_detail(product_id):
#   https://api.nal.usda.gov/ndb/reports/?ndbno=01009&type=f&format=json&api_key=DEMO_KEY
    url_root = 'https://api.nal.usda.gov/ndb/reports/?ndbno={product_id}&format={format}&type={type}&api_key={api_key}'
    params = { 'format': 'json', 'type': 'f', 'api_key': api_key, 'product_id': product_id }
    resp = r.get(url_root.format_map(params))
    data = json.loads(resp._content)
    item = data['report']['food']
    result = {}
    result['id'] = item['ndbno']
    result['name'] = item['name']
    nutrients = item['nutrients']
    for n_item in nutrients:
        if n_item['nutrient_id'] == '208':
           result['energy'] = (int(n_item['value']), n_item['unit'])
        if n_item['nutrient_id'] == '203':
           result['protein'] = (float(n_item['value']), n_item['unit'])
        if n_item['nutrient_id'] == '204':
           result['fat'] = (float(n_item['value']), n_item['unit'])
        if n_item['nutrient_id'] == '205':
           result['carbohydrate'] = (float(n_item['value']), n_item['unit'])
        if n_item['nutrient_id'] == '291':
           result['fiber'] = (float(n_item['value']), n_item['unit'])
        if n_item['nutrient_id'] == '269':
           result['sugar'] = (float(n_item['value']), n_item['unit'])
    return result

In [85]:
get_product_detail('45051561')


{'id': '45051561', 'name': 'A BLEND OF BROCCOLI, CARROTS, CAULIFLOWER, ITALIAN BEANS, ZUCCHINI & LIMA BEANS, UPC: 041497033560', 'energy': (40, 'kcal'), 'protein': (1.33, 'g'), 'fat': (0.0, 'g'), 'carbohydrate': (8.0, 'g'), 'fiber': (2.7, 'g'), 'sugar': (2.67, 'g')}

In [ ]: