In [2]:
import json
from elasticsearch import Elasticsearch

es = Elasticsearch()

In [3]:
def import_to_es(filename):
    with open(filename) as f:
        
        f.readline()
        f.readline()
        list(map(to_es, f))

In [4]:
def to_es(record):
    jsond = json.loads(record[0:-2])
    es.index('grid', 'institutions', jsond, id=jsond['id'])

In [5]:
import_to_es('grid_2015_10_09.json')


---------------------------------------------------------------------------
JSONDecodeError                           Traceback (most recent call last)
<ipython-input-5-18a78f9d9cd2> in <module>()
----> 1 import_to_es('grid_2015_10_09.json')

<ipython-input-3-c60a1e7d90bb> in import_to_es(filename)
      4         f.readline()
      5         f.readline()
----> 6         list(map(to_es, f))

<ipython-input-4-821173d27921> in to_es(record)
      1 def to_es(record):
----> 2     jsond = json.loads(record[0:-2])
      3     es.index('grid', 'inst', jsond, id=jsond['id'])

/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/json/__init__.py in loads(s, encoding, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
    317             parse_int is None and parse_float is None and
    318             parse_constant is None and object_pairs_hook is None and not kw):
--> 319         return _default_decoder.decode(s)
    320     if cls is None:
    321         cls = JSONDecoder

/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/json/decoder.py in decode(self, s, _w)
    337 
    338         """
--> 339         obj, end = self.raw_decode(s, idx=_w(s, 0).end())
    340         end = _w(s, end).end()
    341         if end != len(s):

/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/json/decoder.py in raw_decode(self, s, idx)
    353         """
    354         try:
--> 355             obj, end = self.scan_once(s, idx)
    356         except StopIteration as err:
    357             raise JSONDecodeError("Expecting value", s, err.value) from None

JSONDecodeError: Expecting ',' delimiter: line 1 column 1060 (char 1059)

In [25]:
list(actions)


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-25-fcafa48da057> in <module>()
----> 1 list(actions)

ValueError: I/O operation on closed file.

In [42]:
def eager_import(filename):
    with open(filename) as f:
        records = json.load(f)['institutes']
        for record in records:
            record['num_types'] = len(record.get('types', []))
            es.index('grid', 'institutions', strip_empty(record), id=record['id'])

In [43]:
eager_import('grid_2015_10_09.json')

In [41]:
def strip_empty(document, required=tuple()):
    ''' Removes empty fields from the processed schema
    '''
    new_doc = {}
    for k, v in document.items():
        if k in required:
            new_doc[k] = v
        else:
            new_val = do_strip_empty(v)
            if k == 'otherProperties':
                new_val = [property for property in new_val if property.get('properties')]
            if new_val:
                new_doc[k] = new_val
    return new_doc


def strip_list(l):
    return list(filter(lambda x: x, map(do_strip_empty, l)))


def do_strip_empty(value):
    ''' Filters empty values from container types
    '''
    return {
        dict: strip_empty,
        list: strip_list,
        tuple: strip_list
    }.get(type(value), lambda x: x)(value)

In [ ]: