In [1]:
import elasticsearch
from elasticsearch import Elasticsearch
from jsmin import jsmin
import os

In [3]:
%install_ext https://raw.githubusercontent.com/szeitlin/watermark/master/watermark.py


Installed watermark.py. To use it, type:
  %load_ext watermark

In [4]:
%load_ext watermark

In [5]:
%watermark -a "Samantha Zeitlin" -d -u -p elasticsearch,jsmin,python


Samantha Zeitlin 
Last updated: 05/04/2015 

elasticsearch 1.4.0
jsmin 2.1.1
python 2.7.9

In [6]:
es = Elasticsearch()

In [8]:
try:
    es.indices.create(index='official_test')
except elasticsearch.RequestError as re:
    print re


WARNING:elasticsearch:PUT /official_test [status:400 request:0.203s]
TransportError(400, u'IndexAlreadyExistsException[[official_test] already exists]')

In [17]:
current_map = es.indices.get_mapping(index='official_test', doc_type='notebook')
current_map


Out[17]:
{u'official_test': {u'mappings': {u'notebook': {u'properties': {u'metadata': {u'properties': {u'name': {u'type': u'string'},
       u'signature': {u'type': u'string'}}},
     u'nbformat': {u'type': u'long'},
     u'nbformat_minor': {u'type': u'long'},
     u'worksheets': {u'properties': {u'cells': {u'properties': {u'cell_type': {u'type': u'string'},
         u'collapsed': {u'type': u'boolean'},
         u'input': {u'fields': {u'raw': {u'index': u'not_analyzed',
            u'type': u'string'}},
          u'type': u'string'},
         u'language': {u'type': u'string'},
         u'metadata': {u'type': u'object'},
         u'outputs': {u'include_in_parent': True,
          u'properties': {u'ename': {u'type': u'string'},
           u'evalue': {u'type': u'string'},
           u'html': {u'type': u'string'},
           u'metadata': {u'type': u'object'},
           u'output_type': {u'type': u'string'},
           u'png': {u'type': u'binary'},
           u'prompt_number': {u'type': u'long'},
           u'stream': {u'type': u'string'},
           u'text': {u'type': u'string'},
           u'traceback': {u'type': u'string'}},
          u'type': u'nested'},
         u'prompt_number': {u'type': u'long'}}},
       u'metadata': {u'type': u'object'}}}}}}}}

In [14]:
with open('mapping_files/mapping_official.json', 'r') as map:
    body = jsmin(map.read())# + '\n'
    
    #the trick is that the mapping starts at the level of the document type, not the index, and doesn't need the word 
    #'mappings' in it, as was shown in some (misleading!) examples
    
    try:
        es.indices.put_mapping(index ='official_test',doc_type='notebook',body=body)
    except elasticsearch.RequestError as re:
        print re

In [97]:
print es.indices.put_mapping.__doc__


        Register specific mapping definition for a specific type.
        `<http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/indices-put-mapping.html>`_

        :arg index: A comma-separated list of index names the mapping should be
            added to (supports wildcards); use `_all` or omit to add the
            mapping on all indices.
        :arg doc_type: The name of the document type
        :arg body: The mapping definition
        :arg allow_no_indices: Whether to ignore if a wildcard indices
            expression resolves into no concrete indices. (This includes `_all`
            string or when no indices have been specified)
        :arg expand_wildcards: Whether to expand wildcard expression to concrete
            indices that are open, closed or both., default u'open'
        :arg ignore_conflicts: Specify whether to ignore conflicts while
            updating the mapping (default: false)
        :arg ignore_unavailable: Whether specified concrete indices should be
            ignored when unavailable (missing or closed)
        :arg master_timeout: Specify timeout for connection to master
        :arg timeout: Explicit operation timeout
        

In [108]:
path = '/Users/szeitlin/mystuff/projects/nbindex/nbindex/testdir'

prefix = '{"index": {"_index": "official_test", "_type":"notebook"}}\n'

newline = '\n'

with open('test_temp.json', 'wb') as body:
    for eachfile in os.listdir(path):
        fullname = os.path.join(path,eachfile)
        with open(fullname,'r') as each:
            body.write(prefix)
            body.write(jsmin(each.read()) + newline)

In [110]:
#for some reason, looks like test3 mapping 'sort of' worked, not sure about official_test mapping yet
#next try adding files that actually do contain png

path = '/Users/szeitlin/mystuff/projects/nbindex/nbindex/pngdir'

prefix = '{"index": {"_index": "official_test", "_type":"notebook"}}\n'

newline = '\n'

with open('png_temp.json', 'wb') as body:
    for eachfile in os.listdir(path):
        fullname = os.path.join(path,eachfile)
        with open(fullname,'r') as each:
            body.write(prefix)
            body.write(jsmin(each.read()) + newline)

with open('png_temp.json', 'r') as body:
    obj = body.read()
    es.bulk(obj, index='official_test', doc_type='notebook')

In [109]:
with open('test_temp.json', 'r') as body:
    obj = body.read()
    es.bulk(obj, index='official_test', doc_type='notebook')

In [34]:
print es.cat.help()


=^.^=
/_cat/allocation
/_cat/shards
/_cat/shards/{index}
/_cat/master
/_cat/nodes
/_cat/indices
/_cat/indices/{index}
/_cat/segments
/_cat/segments/{index}
/_cat/count
/_cat/count/{index}
/_cat/recovery
/_cat/recovery/{index}
/_cat/health
/_cat/pending_tasks
/_cat/aliases
/_cat/aliases/{alias}
/_cat/thread_pool
/_cat/plugins
/_cat/fielddata
/_cat/fielddata/{fields}


In [15]:
print es.cat.indices()


green  open official_test 1 0 85 0   2.5mb   2.5mb 
green  open test2         1 0  0 0    115b    115b 
yellow open mapping       5 1  0 0    575b    575b 
yellow open .kibana       1 1  6 1  24.7kb  24.7kb 
green  open test3         1 0  3 0  75.7kb  75.7kb 
green  open test          1 0  0 0    115b    115b 
yellow open notebooks     5 1  0 0    575b    575b 
green  open {mappings:    1 0  0 0    115b    115b 
green  open official_map  1 0  6 0 151.4kb 151.4kb 
yellow open {             5 1  0 0    575b    575b 


In [ ]:


In [ ]: