Imports


In [4]:
%pylab inline

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from future.builtins import (bytes, str, open, super, range,
                      zip, round, input, int, pow, object)

if sys.version_info.major == 2:
    # in Python 2 cPickle is much faster than pickle but doesn't 
    # deal w/ unicode
    import cPickle as pickle
else:
    # Python 3 loads the faster pickle by default if it's available
    import pickle

# ---- Standard Libraries not included in pylab
import collections
import json
import random
import time

# ---- Extra Libraries for additional functionality
import elasticsearch

# setup es for use through the notebook
es = elasticsearch.Elasticsearch(['http://search-01.ec2.internal:9200'])

# -------1---------2---------3---------4---------5---------6---------7---------8


Populating the interactive namespace from numpy and matplotlib
WARNING: pylab import has clobbered these variables: ['bytes', 'random']
`%matplotlib` prevents importing * from pylab and numpy

ElasticSearch Experimentation


In [17]:
index_name = "schiefjm_test_index"
type_name = "schiefjm_test_type"
es.indices.create(index_name)
es.cluster.health(wait_for_status="yellow")


Out[17]:
{u'active_primary_shards': 218,
 u'active_shards': 435,
 u'cluster_name': u'protoglobe',
 u'initializing_shards': 1,
 u'number_of_data_nodes': 6,
 u'number_of_nodes': 6,
 u'relocating_shards': 0,
 u'status': u'yellow',
 u'timed_out': False,
 u'unassigned_shards': 0}

In [18]:
es.indices.optimize(index_name)


Out[18]:
{u'_shards': {u'failed': 0, u'successful': 10, u'total': 10}}

In [16]:
es.indices.delete(index_name)


Out[16]:
{u'acknowledged': True}

In [15]:
es.indices.close(index_name)


Out[15]:
{u'acknowledged': True}

In [20]:
es.indices.put_mapping(
    index=index_name, 
    doc_type=type_name, 
    body={
        type_name:{
            "_type": {
                "store": "yes"}, 
                "properties": {
                    "uuid": {
                        "index": "not_analyzed", 
                        "type": "string", 
                        "store": "yes"
                    },
                    "title": {
                        "index": "analyzed", 
                        "type": "string", 
                        "store": "yes", 
                        "term_vector": "with_positions_offsets"
                    },
                    "parsedtext": {
                        "index": "analyzed", 
                        "type": "string", 
                        "store": "yes", 
                        "term_vector": "with_positions_offsets"
                    }
            }
        }
    }
)


Out[20]:
{u'acknowledged': True}

In [23]:
es.indices.get_mapping(index_name)


Out[23]:
{u'schiefjm_test_index': {u'mappings': {u'schiefjm_test_type': {u'_type': {u'store': True},
    u'properties': {u'parsedtext': {u'store': True,
      u'term_vector': u'with_positions_offsets',
      u'type': u'string'},
     u'title': {u'store': True,
      u'term_vector': u'with_positions_offsets',
      u'type': u'string'},
     u'uuid': {u'index': u'not_analyzed',
      u'store': True,
      u'type': u'string'}}}}}}

In [24]:
index_name = "gsod"
type_name = "gsod"

es.cluster.health(wait_for_status="yellow")


Out[24]:
{u'active_primary_shards': 218,
 u'active_shards': 436,
 u'cluster_name': u'protoglobe',
 u'initializing_shards': 0,
 u'number_of_data_nodes': 6,
 u'number_of_nodes': 6,
 u'relocating_shards': 0,
 u'status': u'green',
 u'timed_out': False,
 u'unassigned_shards': 0}

In [25]:
import pyes
dir(pyes)


Out[25]:
['ANDFilter',
 'AggFactory',
 'BoolFilter',
 'BoolQuery',
 'ConstantScoreQuery',
 'CouchDBRiver',
 'CustomScoreQuery',
 'DisMaxQuery',
 'ES',
 'ESRange',
 'ESRangeOp',
 'EqualityComparableUsingAttributeDictionary',
 'ExistsFilter',
 'FacetFactory',
 'FieldParameter',
 'Filter',
 'FilterList',
 'FilterQuery',
 'FilteredQuery',
 'FunctionScoreQuery',
 'FuzzyLikeThisFieldQuery',
 'FuzzyLikeThisQuery',
 'FuzzyQuery',
 'GeoBoundingBoxFilter',
 'GeoDistanceFilter',
 'GeoIndexedShapeFilter',
 'GeoPolygonFilter',
 'GeoShapeFilter',
 'HasChildFilter',
 'HasChildQuery',
 'HasFilter',
 'HasParentFilter',
 'HasParentQuery',
 'HasQuery',
 'HighLighter',
 'IdsFilter',
 'IdsQuery',
 'InvalidParameterQuery',
 'InvalidQuery',
 'JDBCRiver',
 'LimitFilter',
 'MatchAllFilter',
 'MatchAllQuery',
 'MatchQuery',
 'MissingFilter',
 'MongoDBRiver',
 'MoreLikeThisFieldQuery',
 'MoreLikeThisQuery',
 'MultiMatchQuery',
 'NestedFilter',
 'NestedQuery',
 'NotFilter',
 'NumericRangeFilter',
 'ORFilter',
 'PercolatorQuery',
 'PrefixFilter',
 'PrefixQuery',
 'Query',
 'QueryError',
 'QueryFilter',
 'QueryParameterError',
 'QueryStringQuery',
 'RabbitMQRiver',
 'RangeFilter',
 'RangeQuery',
 'RawFilter',
 'RegexTermFilter',
 'RegexTermQuery',
 'RescoreQuery',
 'River',
 'ScriptFields',
 'ScriptFieldsError',
 'ScriptFilter',
 'Search',
 'SimpleQueryStringQuery',
 'SortFactory',
 'SpanFirstQuery',
 'SpanMultiQuery',
 'SpanNearQuery',
 'SpanNotQuery',
 'SpanOrQuery',
 'SpanTermQuery',
 'Suggest',
 'TermFilter',
 'TermQuery',
 'TermsFilter',
 'TermsLookup',
 'TermsQuery',
 'TextQuery',
 'TopChildrenQuery',
 'TwitterRiver',
 'TypeFilter',
 'VERSION',
 'WildcardQuery',
 '__author__',
 '__builtins__',
 '__contact__',
 '__doc__',
 '__docformat__',
 '__file__',
 '__homepage__',
 '__name__',
 '__package__',
 '__path__',
 '__version__',
 'absolute_import',
 'aggs',
 'clean_string',
 'connection',
 'connection_http',
 'convert_errors',
 'copy',
 'es',
 'exceptions',
 'facets',
 'fakettypes',
 'file_to_attachment',
 'filters',
 'helpers',
 'highlight',
 'is_a_spanquery',
 'is_stable_release',
 'json',
 'logger',
 'logging',
 'make_id',
 'make_path',
 'managers',
 'mappings',
 'models',
 'pyesthrift',
 'query',
 'rivers',
 'scriptfields',
 'six',
 'sort',
 'string_b64decode',
 'string_b64encode',
 'utils',
 'version_with_meta']

In [26]:
dir(pyes.mappings)


Out[26]:
['AbstractField',
 'AttachmentField',
 'BinaryField',
 'BooleanField',
 'ByteField',
 'DateField',
 'DocumentObjectField',
 'DotDict',
 'DoubleField',
 'FloatField',
 'GeoPointField',
 'IntegerField',
 'IpField',
 'LongField',
 'MAPPING_NAME_TYPE',
 'MappedFieldNotFoundException',
 'Mapper',
 'MultiField',
 'NestedObject',
 'NumericFieldAbstract',
 'ObjectField',
 'OrderedDict',
 'ShortField',
 'SortedDict',
 'StringField',
 '__builtins__',
 '__doc__',
 '__file__',
 '__name__',
 '__package__',
 '_thread_locals',
 'check_values',
 'date',
 'datetime',
 'get_field',
 'keys_to_string',
 'six',
 'threading',
 'to_bool']

In [28]:
es.indices.get_mapping(index_name)


Out[28]:
{u'gsod': {u'mappings': {u'observation': {u'properties': {u'Date': {u'format': u'dateOptionalTime',
      u'type': u'date'},
     u'Dew Point': {u'type': u'string'},
     u'FRSHTT': {u'type': u'string'},
     u'Gust': {u'type': u'string'},
     u'Max Temp': {u'type': u'string'},
     u'Max Wind Speed': {u'type': u'string'},
     u'Mean Temp': {u'type': u'string'},
     u'Min Temp': {u'type': u'string'},
     u'Num of Obs': {u'type': u'string'},
     u'Precipitation': {u'type': u'string'},
     u'SLP': {u'type': u'string'},
     u'STP': {u'type': u'string'},
     u'Snow Depth': {u'type': u'string'},
     u'Station Id': {u'type': u'string'},
     u'Visibility': {u'type': u'string'},
     u'WBAN': {u'type': u'string'},
     u'Wind Speed': {u'type': u'string'}}}}}}

In [ ]: