In [109]:
%pylab inline
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from future.builtins import (bytes, str, open, super, range,
zip, round, input, int, pow, object)
if sys.version_info.major == 2:
# in Python 2 cPickle is much faster than pickle but doesn't
# deal w/ unicode
import cPickle as pickle
else:
# Python 3 loads the faster pickle by default if it's available
import pickle
# ---- Standard Libraries not included in pylab
import collections
import glob
import json
import random
import time
from StringIO import StringIO
# ---- Extra Libraries for additional functionality
import elasticsearch
from elasticsearch import Elasticsearch
# -------1---------2---------3---------4---------5---------6---------7---------8
In [128]:
def parse_observation(ob):
""" Given an observation (ob) as a string return a dictionary of the
observation with names and values.
"""
header = ["Station Id", "WBAN", "Date", "Mean Temp", "Num of Obs", "Dew Point",
"SLP", "STP", "Visibility", "Wind Speed", "Max Wind Speed",
"Gust", "Max Temp", "Min Temp", "Precipitation", "Snow Depth", "FRSHTT"]
ob = ob.strip("\n").split()
# remove excess observation counts
for i in range(14, 4, -2): # backwards to preserve index nums
ob.pop(i)
# add hyphens to date to enable elasticsearch to create a date from it
ob[2] = ob[2][0:4] + "-" + ob[2][4:6] + "-" + ob[2][6:8]
# zip with header information into a dictionary and return it
return dict((element[0], element[1].strip("*"))
for element in zip(header, ob))
In [129]:
def es_insert_year(index_name, year):
""" Given a year in the range 1929 to 2009 inclusive parses and inserts the
observations for that year into an elasticseach index of index_name.
"""
es = Elasticsearch(['http://search-01.ec2.internal:9200'])
year = str(year)
gsod_dir = "/home/schiefjm/weather/gsod/" + year + "/"
for file_name in glob.glob(gsod_dir + "*"):
with open(file_name, "r") as obs:
file_header = obs.readline()
for ob in obs:
es.index(
index=index_name,
doc_type="observation",
body=json.dumps(parse_observation(ob))
)
In [9]:
from elasticsearch import Elasticsearch
es = Elasticsearch(['http://search-01.ec2.internal:9200'])
doc = {
'title': 'Test',
'text': 'This is some text to test with.'
}
result = es.index(
index = "test_blog",
doc_type = 'blog',
body = doc
)
print(result['created'])
In [6]:
curl -XDELETE "http://search-01.ec2.internal:9200/test_blog"
In [ ]: