In [ ]:
!(cd ~/data/libs/ ; wget "https://github.com/guyGerson/sparkCloudantObjstorDriver/raw/master/sparkCloudantObjstorDriver-assembly-1.0.jar")

In [ ]:
from pyspark import SparkConf, SparkContext, SQLContext

# Since Spark instance runs context when opening a notebook,
# you will need to stop the old Spark context
# in order to define the configurations for Cloudant
sc.stop()

# Define Cloudant configuration
conf = (SparkConf().setAppName("SeamlessStorageDriver")
    .set("cloudant.host","YOUR_USER_NAME-bluemix.cloudant.com")
    .set("cloudant.username", "YOUR_USER_NAME")
    .set("cloudant.password","YOUR_PASSWORD") 
    .set("schemaSampleSize", "1")
    .set("cloudant.account", "YOUR_USER_NAME")
    .set("cloudant.iotp.orgid", "YOUR_WATSON_IOTP_ORGID")
)

# start a new context with the new configurations
sc=SparkContext(conf=conf)
sqlContext = SQLContext(sc)

sqlContext.sql("CREATE TEMPORARY TABLE cloudantData \
                USING cloudantObjectstore.CloudantObjectstoreRelation \
                OPTIONS(dbChoiceName 'default', timestampField 'timestamp')")

In [ ]:
# The query
d = sqlContext.sql("SELECT timestamp, data.d.temperature AS temperature \
                    FROM cloudantData \
                    WHERE deviceId='YOUR_DEVICE_ID' \
                    AND timestamp BETWEEN '2017-02-28T23:59:59.00' AND '2017-03-01T10:43:00.00'")

In [ ]:
import matplotlib.pyplot as plt
import matplotlib.dates as dates
import pandas as pd
import datetime

%matplotlib inline

if d.collect() == []:
    print("Oops! No mathcing data to your query, please try again...")
else:
    # Sample a small portion of all the data retrieved from the query
    d = d.sample(True, fraction=0.07)

    # Arrange the data for visualization
    df = d.toPandas()
    df = df.sort_values(by=["timestamp"])
    df['timestamp'] = pd.to_datetime(df['timestamp'])

    # Define and create plot
    locator = dates.MinuteLocator(interval=2)
    xFmt = dates.DateFormatter('%H:%M:%S')

    fig, ax = plt.subplots(1, 1,figsize=(20, 5))
    ax.plot_date(df['timestamp'], df['temperature'],'b-')
    plt.title("Temperature of IoT Device over Time")
    plt.xlabel('Timestamp')
    plt.ylabel('Temperature')

    # format the ticks
    ax.xaxis.set_major_locator(locator)
    ax.xaxis.set_major_formatter(xFmt)
    ax.autoscale_view()
    ax.fmt_xdata = dates.DateFormatter('%Y-%m-%dT%H:%M:%S.%fZ')
    ax.set_ylim([14,21])

    fig.autofmt_xdate()
    plt.show()

In [ ]: