In [ ]:
import pandas as pd
import numpy as np
import sqlalchemy
import re
from elasticsearch import Elasticsearch
from elasticsearch import helpers as eshelper
from tqdm import tqdm

In [ ]:
db = sqlalchemy.create_engine('postgresql://postgres@localhost:5432/postgres', client_encoding='utf8')

In [ ]:
categories = pd.read_sql_table('categories', db)
categories.head()

In [ ]:
categorisations = pd.read_sql_table('categorisations', db)
categorisations.head()

In [ ]:
images = pd.read_sql_table('images', db)
images.head()

In [ ]:
nutrients = pd.read_sql_table('nutrients', db).fillna(0)
nutrients.head()

In [ ]:
product_nutrients = pd.read_sql_table('product_nutrients', db).fillna(0)
product_nutrients.head()

In [ ]:
products = pd.read_sql_table('products', db).fillna(0)
products.head()

In [ ]:
re_brand = re.compile('([^:]*):(.+)')

In [ ]:
entries = []
for i, product in tqdm(products.iterrows()):
    name = product.name_translations.get('fr')
    
    if name:
    
        nuts = []
        mapping = product_nutrients[product_nutrients.product_id == product.id]
        for i, m in mapping.iterrows():

            nut = nutrients[nutrients.id == m.nutrient_id]        
            nuts.append({
                'name': nut.name_translations.values[0]['fr'],
                'unit': nut.unit.values[0],
                'rdi': np.nan_to_num(nut.rdi.values[0]),
                'per_day': np.nan_to_num(m.per_day),
                'per_hundred': np.nan_to_num(m.per_hundred),
                'per_portion': np.nan_to_num(m.per_portion),
            })

        if len(nuts):
            
            brand_existence = re_brand.search(name)
            if brand_existence:
                brand, name_cleaned = re_brand.search(name).groups()
            else:
                brand = ''
                name_cleaned = name
            
            fmt = {
                '_index': 'products1',
                '_type': 'openfood',
                '_id': product.id,
                '_source': {
                    'name': name_cleaned,
                    'brand': brand,
                    'unit': product.unit,
                    'quantity': product.quantity,
                    'unit_portion': product.portion_quantity,
                    'unit_quantity': product.portion_unit,
                    'nutriments': nuts
                }
            }

            entries.append(fmt)
        
len(entries)

In [ ]:
entries[0]

In [ ]:
client = Elasticsearch(hosts='http://')

In [ ]:
eshelper.bulk(client, entries)

In [ ]: