In [26]:
#from elasticsearch import Elasticsearch
import os
import json
from jsmin import jsmin
import pyelasticsearch
from pyelasticsearch import ElasticSearch #see if this is better than 'official API' - the docs look better to me
from pyelasticsearch import bulk_chunks
In [2]:
%install_ext https://raw.githubusercontent.com/szeitlin/watermark/master/watermark.py
In [3]:
%load_ext watermark
In [47]:
%watermark -a "Samantha Zeitlin" -d -u -p pyelasticsearch,jsmin,python
In [27]:
try:
es = ElasticSearch('http://localhost:9200')
except:
ConnectionError
print "I can't connect!"
In [28]:
#start fresh to see if mapping first is better than mapping after
es.delete_all_indexes() #the deletion worked, but then it gave an error b/c there was nothing to delete.
#print ElasticSearch.send_request.__doc__
try:
es.send_request("GET", ["_status"],{},{})
except ElasticHttpError as vague:
print "look at vague error message"
In [75]:
!pwd
In [10]:
#may need to initialize an index first, then bulk insert, then map?
print ElasticSearch.create_index.__doc__
In [29]:
es.create_index('test3')
In [38]:
#put a few target files in folder and iterate through that)
path = '/Users/szeitlin/mystuff/projects/nbindex/nbindex/testdir'
# newline = '\n'
def onefile(newline):
"""
Test generator on one notebook file.
"""
with open('testdir/compare_2013_2014.ipynb', 'r') as one:
yield jsmin(one.read()) + newline
for chunk in bulk_chunks(onefile(newline)):
try:
es.bulk(chunk, index='test3')
except pyelasticsearch.exceptions.BulkError as bulk_failure:
result = "check bulk_failure"
except pyelasticsearch.exceptions.ElasticHttpError as vague:
result = "not sure"
# def notebooks(path, newline):
# for eachfile in os.listdir(path):
# fullname = os.path.join(path,eachfile)
# with open(fullname, 'r') as each:
# yield jsmin(each.read()) + newline
# for chunk in bulk_chunks(notebooks(path, newline)):
# print chunk
# try:
# es.bulk(chunk, doc_type='notebook', index='test3')
# except pyelasticsearch.exceptions.BulkError as bulk_failure:
# result = "check bulk_failure"
# except pyelasticsearch.exceptions.ElasticHttpError as vague:
# result = "not sure"
In [39]:
result
Out[39]:
In [40]:
vague
Out[40]:
In [6]:
#looks like it wants you to create an index first and put_mapping second
#print es.put_mapping.__doc__
with open("mapping_minus_curl.json", 'r') as map:
minimap = jsmin(map.read())
es.put_mapping(index='test3', doc_type='notebook', mapping=minimap)
In [64]:
#replace this with bulk_chunks instead
#es.bulk([es.index_op(minified), es.index_op(mini2)], doc_type='notebook', index='test2')
In [66]:
!ls
In [48]:
with open('nocurl_query.txt', 'r') as query:
q=jsmin(query.read())
#print q
#says it wants a \n character?
newline = '\n'
try:
r = es.search(str(q + newline), index='test2')
print r
except:
e = pyelasticsearch.ElasticHttpError
print e
In [21]:
%%bash
curl -s -XPOST localhost:9200/_bulk --data-binary @test2.json
In [ ]:
#looks like kibana barely works at all with only 1 document, suddenly with 2 it starts showing more functionality.