In [ ]:
!(cd ~/data/libs/ ; wget "https://github.com/guyGerson/sparkCloudantObjstorDriver/raw/master/sparkCloudantObjstorDriver-assembly-1.0.jar")
In [ ]:
from pyspark import SparkConf, SparkContext, SQLContext
# Since Spark instance runs context when opening a notebook,
# you will need to stop the old Spark context
# in order to define the configurations for Cloudant
sc.stop()
# Define Cloudant configuration
conf = (SparkConf().setAppName("SeamlessStorageDriver")
.set("cloudant.host","YOUR_USER_NAME-bluemix.cloudant.com")
.set("cloudant.username", "YOUR_USER_NAME")
.set("cloudant.password","YOUR_PASSWORD")
.set("schemaSampleSize", "1")
.set("cloudant.account", "YOUR_USER_NAME")
.set("cloudant.iotp.orgid", "YOUR_WATSON_IOTP_ORGID")
)
# start a new context with the new configurations
sc=SparkContext(conf=conf)
sqlContext = SQLContext(sc)
sqlContext.sql("CREATE TEMPORARY TABLE cloudantData \
USING cloudantObjectstore.CloudantObjectstoreRelation \
OPTIONS(dbChoiceName 'default', timestampField 'timestamp')")
In [ ]:
# The query
d = sqlContext.sql("SELECT timestamp, data.d.temperature AS temperature \
FROM cloudantData \
WHERE deviceId='YOUR_DEVICE_ID' \
AND timestamp BETWEEN '2017-02-28T23:59:59.00' AND '2017-03-01T10:43:00.00'")
In [ ]:
import matplotlib.pyplot as plt
import matplotlib.dates as dates
import pandas as pd
import datetime
%matplotlib inline
if d.collect() == []:
print("Oops! No mathcing data to your query, please try again...")
else:
# Sample a small portion of all the data retrieved from the query
d = d.sample(True, fraction=0.07)
# Arrange the data for visualization
df = d.toPandas()
df = df.sort_values(by=["timestamp"])
df['timestamp'] = pd.to_datetime(df['timestamp'])
# Define and create plot
locator = dates.MinuteLocator(interval=2)
xFmt = dates.DateFormatter('%H:%M:%S')
fig, ax = plt.subplots(1, 1,figsize=(20, 5))
ax.plot_date(df['timestamp'], df['temperature'],'b-')
plt.title("Temperature of IoT Device over Time")
plt.xlabel('Timestamp')
plt.ylabel('Temperature')
# format the ticks
ax.xaxis.set_major_locator(locator)
ax.xaxis.set_major_formatter(xFmt)
ax.autoscale_view()
ax.fmt_xdata = dates.DateFormatter('%Y-%m-%dT%H:%M:%S.%fZ')
ax.set_ylim([14,21])
fig.autofmt_xdate()
plt.show()
In [ ]: