Imports


In [109]:
%pylab inline

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from future.builtins import (bytes, str, open, super, range,
                      zip, round, input, int, pow, object)

if sys.version_info.major == 2:
    # in Python 2 cPickle is much faster than pickle but doesn't 
    # deal w/ unicode
    import cPickle as pickle
else:
    # Python 3 loads the faster pickle by default if it's available
    import pickle

# ---- Standard Libraries not included in pylab
import collections
import glob
import json
import random
import time
from StringIO import StringIO

# ---- Extra Libraries for additional functionality
import elasticsearch
from elasticsearch import Elasticsearch

# -------1---------2---------3---------4---------5---------6---------7---------8


Populating the interactive namespace from numpy and matplotlib
WARNING: pylab import has clobbered these variables: ['f', 'random', 'bytes']
`%matplotlib` prevents importing * from pylab and numpy

In [128]:
def parse_observation(ob):
    """ Given an observation (ob) as a string return a dictionary of the 
    observation with names and values.
    
    """
    header = ["Station Id", "WBAN", "Date", "Mean Temp", "Num of Obs", "Dew Point",
              "SLP", "STP", "Visibility", "Wind Speed", "Max Wind Speed",
              "Gust", "Max Temp", "Min Temp", "Precipitation", "Snow Depth", "FRSHTT"]
    
    ob = ob.strip("\n").split()
    
    # remove excess observation counts
    for i in range(14, 4, -2): # backwards to preserve index nums
        ob.pop(i)
    
    # add hyphens to date to enable elasticsearch to create a date from it
    ob[2] = ob[2][0:4] + "-" + ob[2][4:6] + "-" + ob[2][6:8]
    
    # zip with header information into a dictionary and return it
    return dict((element[0], element[1].strip("*")) 
                           for element in zip(header, ob))

In [129]:
def es_insert_year(index_name, year):
    """ Given a year in the range 1929 to 2009 inclusive parses and inserts the
    observations for that year into an elasticseach index of index_name.
    
    """
    es = Elasticsearch(['http://search-01.ec2.internal:9200'])
    year = str(year)
    gsod_dir = "/home/schiefjm/weather/gsod/" + year + "/"
    
    for file_name in glob.glob(gsod_dir + "*"):
        with open(file_name, "r") as obs:
            file_header = obs.readline()

            for ob in obs:
                es.index(
                    index=index_name, 
                    doc_type="observation", 
                    body=json.dumps(parse_observation(ob))
                )

In [9]:
from elasticsearch import Elasticsearch
es = Elasticsearch(['http://search-01.ec2.internal:9200'])

doc = {
    'title': 'Test',
    'text': 'This is some text to test with.'
}

result = es.index(
             index = "test_blog", 
             doc_type = 'blog',
             body = doc
         )
print(result['created'])


True

In [6]:
curl -XDELETE "http://search-01.ec2.internal:9200/test_blog"

In [ ]: