notebook.community

Edit and run



In [26]:

    
#from elasticsearch import Elasticsearch
import os
import json
from jsmin import jsmin
import pyelasticsearch
from pyelasticsearch import ElasticSearch #see if this is better than 'official API' - the docs look better to me
from pyelasticsearch import bulk_chunks



In [2]:

    
%install_ext https://raw.githubusercontent.com/szeitlin/watermark/master/watermark.py









    



Installed watermark.py. To use it, type:
  %load_ext watermark



In [3]:

    
%load_ext watermark



In [47]:

    
%watermark -a "Samantha Zeitlin" -d -u -p pyelasticsearch,jsmin,python









    



Samantha Zeitlin 
Last updated: 04/11/2015 

pyelasticsearch 1.2.1
jsmin 2.1.1
python 2.7.9



In [27]:

    
try:
    es = ElasticSearch('http://localhost:9200')
except:
    ConnectionError
    print "I can't connect!"



In [28]:

    
#start fresh to see if mapping first is better than mapping after
es.delete_all_indexes() #the deletion worked, but then it gave an error b/c there was nothing to delete. 
#print ElasticSearch.send_request.__doc__
try:
    es.send_request("GET", ["_status"],{},{}) 
except ElasticHttpError as vague:
    print "look at vague error message"









    



WARNING:elasticsearch:DELETE / [status:400 request:0.020s]






    



---------------------------------------------------------------------------
ElasticHttpError                          Traceback (most recent call last)
<ipython-input-28-d1d097aa6c4d> in <module>()
      1 #start fresh to see if mapping first is better than mapping after
----> 2 es.delete_all_indexes() #the deletion worked, but then it gave an error b/c there was nothing to delete.
      3 #print ElasticSearch.send_request.__doc__
      4 try:
      5     es.send_request("GET", ["_status"],{},{})

/Users/szeitlin/anaconda/envs/elasticsearch/lib/python2.7/site-packages/pyelasticsearch/client.pyc in delete_all_indexes(self, **kwargs)
   1030     def delete_all_indexes(self, **kwargs):
   1031         """Delete all indexes."""
-> 1032         return self.delete_index('_all', **kwargs)
   1033 
   1034     @es_kwargs()

/Users/szeitlin/anaconda/envs/elasticsearch/lib/python2.7/site-packages/pyelasticsearch/client.pyc in decorate(*args, **kwargs)
     90                 elif k in convertible_args:
     91                     query_params[k] = kwargs.pop(k)
---> 92             return func(*args, query_params=query_params, **kwargs)
     93         return decorate
     94     return decorator

/Users/szeitlin/anaconda/envs/elasticsearch/lib/python2.7/site-packages/pyelasticsearch/client.pyc in delete_index(self, index, query_params)
   1026                              ' delete_all_indexes().')
   1027         return self.send_request('DELETE', [self._concat(index)],
-> 1028                                  query_params=query_params)
   1029 
   1030     def delete_all_indexes(self, **kwargs):

/Users/szeitlin/anaconda/envs/elasticsearch/lib/python2.7/site-packages/pyelasticsearch/client.pyc in send_request(self, method, path_components, body, query_params)
    257             status = exc.args[0]
    258             error_message = exc.args[1]
--> 259             self._raise_exception(status, error_message)
    260 
    261         return prepped_response

/Users/szeitlin/anaconda/envs/elasticsearch/lib/python2.7/site-packages/pyelasticsearch/client.pyc in _raise_exception(self, status, error_message)
    271             error_class = IndexAlreadyExistsError
    272 
--> 273         raise error_class(status, error_message)
    274 
    275     def _encode_json(self, value):

ElasticHttpError: (400, u'ActionRequestValidationException[Validation Failed: 1: index / indices is missing;]')



In [75]:

    
!pwd









    



/Users/szeitlin/mystuff/projects/nbindex/nbindex



In [10]:

    
#may need to initialize an index first, then bulk insert, then map?
print ElasticSearch.create_index.__doc__









    



        Create an index with optional settings.

        :arg index: The name of the index to create
        :arg settings: A dictionary of settings

        If the index already exists, raise
        :class:`~pyelasticsearch.exceptions.IndexAlreadyExistsError`.

        See `ES's create-index API`_ for more detail.

        .. _`ES's create-index API`:
            http://www.elastic.co/guide/en/elasticsearch/reference/current/indices-create-index.html



In [29]:

    
es.create_index('test3')









    



WARNING:elasticsearch:PUT /test3 [status:400 request:0.026s]






    



---------------------------------------------------------------------------
IndexAlreadyExistsError                   Traceback (most recent call last)
<ipython-input-29-ad7d4d7101a2> in <module>()
----> 1 es.create_index('test3')

/Users/szeitlin/anaconda/envs/elasticsearch/lib/python2.7/site-packages/pyelasticsearch/client.pyc in decorate(*args, **kwargs)
     90                 elif k in convertible_args:
     91                     query_params[k] = kwargs.pop(k)
---> 92             return func(*args, query_params=query_params, **kwargs)
     93         return decorate
     94     return decorator

/Users/szeitlin/anaconda/envs/elasticsearch/lib/python2.7/site-packages/pyelasticsearch/client.pyc in create_index(self, index, settings, query_params)
   1005         """
   1006         return self.send_request('PUT', [index], body=settings or {},
-> 1007                                  query_params=query_params)
   1008 
   1009     @es_kwargs()

/Users/szeitlin/anaconda/envs/elasticsearch/lib/python2.7/site-packages/pyelasticsearch/client.pyc in send_request(self, method, path_components, body, query_params)
    257             status = exc.args[0]
    258             error_message = exc.args[1]
--> 259             self._raise_exception(status, error_message)
    260 
    261         return prepped_response

/Users/szeitlin/anaconda/envs/elasticsearch/lib/python2.7/site-packages/pyelasticsearch/client.pyc in _raise_exception(self, status, error_message)
    271             error_class = IndexAlreadyExistsError
    272 
--> 273         raise error_class(status, error_message)
    274 
    275     def _encode_json(self, value):

IndexAlreadyExistsError: (400, u'IndexAlreadyExistsException[[test3] already exists]')



In [38]:

    
#put a few target files in folder and iterate through that)

path = '/Users/szeitlin/mystuff/projects/nbindex/nbindex/testdir'
# newline = '\n'

def onefile(newline):
    """
    Test generator on one notebook file. 
    """
    with open('testdir/compare_2013_2014.ipynb', 'r') as one:
        yield jsmin(one.read()) + newline
        
for chunk in bulk_chunks(onefile(newline)):
    try:
        es.bulk(chunk, index='test3')
    except pyelasticsearch.exceptions.BulkError as bulk_failure:
        result = "check bulk_failure"
    except pyelasticsearch.exceptions.ElasticHttpError as vague:
        result = "not sure"

# def notebooks(path, newline):
#     for eachfile in os.listdir(path):
#         fullname = os.path.join(path,eachfile)
#         with open(fullname, 'r') as each:
#             yield jsmin(each.read()) + newline
 
# for chunk in bulk_chunks(notebooks(path, newline)):
#     print chunk
#     try:
#         es.bulk(chunk, doc_type='notebook', index='test3')
#     except pyelasticsearch.exceptions.BulkError as bulk_failure:
#         result = "check bulk_failure"
#     except pyelasticsearch.exceptions.ElasticHttpError as vague:
#         result = "not sure"









    



WARNING:elasticsearch:POST /test3/_bulk [status:400 request:0.002s]



In [39]:

    
result









    Out[39]:





'not sure'



In [40]:

    
vague









    Out[40]:





pyelasticsearch.exceptions.ElasticHttpError(400,
                                            u'ActionRequestValidationException[Validation Failed: 1: no requests added;]')



In [6]:

    
#looks like it wants you to create an index first and put_mapping second
#print es.put_mapping.__doc__

with open("mapping_minus_curl.json", 'r') as map:
     minimap = jsmin(map.read())
     es.put_mapping(index='test3', doc_type='notebook', mapping=minimap)









    



WARNING:elasticsearch:PUT /test3/notebook/_mapping [status:404 request:0.063s]






    



---------------------------------------------------------------------------
ElasticHttpNotFoundError                  Traceback (most recent call last)
<ipython-input-6-b684e1185e25> in <module>()
      4 with open("mapping_minus_curl.json", 'r') as map:
      5      minimap = jsmin(map.read())
----> 6      es.put_mapping(index='test3', doc_type='notebook', mapping=minimap)

/Users/szeitlin/anaconda/envs/elasticsearch/lib/python2.7/site-packages/pyelasticsearch/client.pyc in decorate(*args, **kwargs)
     90                 elif k in convertible_args:
     91                     query_params[k] = kwargs.pop(k)
---> 92             return func(*args, query_params=query_params, **kwargs)
     93         return decorate
     94     return decorator

/Users/szeitlin/anaconda/envs/elasticsearch/lib/python2.7/site-packages/pyelasticsearch/client.pyc in put_mapping(self, index, doc_type, mapping, query_params)
    899             [self._concat(index), doc_type, '_mapping'],
    900             mapping,
--> 901             query_params=query_params)
    902 
    903     @es_kwargs('search_type', 'search_indices', 'search_types',

/Users/szeitlin/anaconda/envs/elasticsearch/lib/python2.7/site-packages/pyelasticsearch/client.pyc in send_request(self, method, path_components, body, query_params)
    257             status = exc.args[0]
    258             error_message = exc.args[1]
--> 259             self._raise_exception(status, error_message)
    260 
    261         return prepped_response

/Users/szeitlin/anaconda/envs/elasticsearch/lib/python2.7/site-packages/pyelasticsearch/client.pyc in _raise_exception(self, status, error_message)
    271             error_class = IndexAlreadyExistsError
    272 
--> 273         raise error_class(status, error_message)
    274 
    275     def _encode_json(self, value):

ElasticHttpNotFoundError: (404, u'IndexMissingException[[test3] missing]')



In [64]:

    
#replace this with bulk_chunks instead 
#es.bulk([es.index_op(minified), es.index_op(mini2)], doc_type='notebook', index='test2')



In [66]:

    
!ls









    



Dockerfile                                 another_query.txt                          first_try_mapping.json                     minified_testfile.json                     testcreate.json
LICENSE                                    draft_bulk_import_multiple_notebooks.ipynb import_statements_query.txt                pyminified_testfile.json                   testnb.json
README.md                                  elasticsearch.yml                          match_query.txt                            test2.json                                 testprefix.json
action.json                                elk_test                                   minified.json                              testbulk.json                              testquery.txt



In [48]:

    
with open('nocurl_query.txt', 'r') as query:
    q=jsmin(query.read())
    #print q
    #says it wants a \n character? 
    newline = '\n'
    try:
        r = es.search(str(q + newline), index='test2')
        print r
    except: 
        e = pyelasticsearch.ElasticHttpError
        print e









    



WARNING:elasticsearch:GET /test2/_search?q=%7B%22query%22%3A%7B%22match%22%3A%7B%22input%22%3A%22import%22%7D%7D%7D%0A [status:400 request:0.006s]






    



<class 'pyelasticsearch.exceptions.ElasticHttpError'>



In [21]:

    
%%bash
curl -s -XPOST localhost:9200/_bulk --data-binary @test2.json









    



{"took":251,"errors":false,"items":[{"index":{"_index":"test","_type":"type1","_id":"2","_version":1,"status":201}}]}



In [ ]:

    
#looks like kibana barely works at all with only 1 document, suddenly with 2 it starts showing more functionality.