Imports


In [ ]:
%pylab inline

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from future.builtins import (bytes, str, open, super, range,
                      zip, round, input, int, pow, object)

if sys.version_info.major == 2:
    # in Python 2 cPickle is much faster than pickle but doesn't 
    # deal w/ unicode
    import cPickle as pickle
else:
    # Python 3 loads the faster pickle by default if it's available
    import pickle

# ---- Standard Libraries not included in pylab
import collections
import json
import random
import time

# ---- Extra Libraries for additional functionality
import elasticsearch
from elasticsearch import Elasticsearch

# setup es for use through the notebook
es = Elasticsearch(['http://search-01.ec2.internal:9200'])


# -------1---------2---------3---------4---------5---------6---------7---------8

ElasticSearch Experimentation


In [1]:
sc


Out[1]:
<pyspark.context.SparkContext at 0x7fc4fcf84110>

In [3]:
import random
rdd = sc.parallelize([random.randint(1, 26) for x in range(10000)])
rdd.count()


Out[3]:
10000

In [ ]: