In [3]:
import pandas as pd
import numpy as np
In [4]:
from pymongo import MongoClient
def _connect_mongo(host, port, db):
mongo_str = 'mongodb://%s:%s/%s' % (host, port, db)
client = MongoClient(mongo_str)
return client[db]
def read_mongo(db, collection, query={}, host = 'localhost', port = 27017, no_id = True):
"""Read from MongoDB and store into DataFrame"""
# Connect to MongoDB
db = _connect_mongo(host = host, port = port, db = db)
# Query to specific DB and collection
collection = db[collection].find(query)
# Create DataFrame
global df
df = pd.DataFrame(list(collection))
# Delete the _id
if no_id:
del df['_id']
read_mongo('workshopdb','zapis1',{}, '192.168.84.17')
In [3]:
df.head(10)
Out[3]:
In [4]:
# Create new df with only timestamp, sourceId and rssi
df_important = df[['timestamp','sourceId','rssi']].copy()
# Print some info about this df
df_important.info()
In [9]:
print("Number of rows: " + str(df_important.shape[0]))
print("Number of columns: " + str(df_important.shape[1]))
In [13]:
# Delete duplicates of records where is same timestamp and sourceId
# Gateways send data to the cloud only if the RSSI changes.
# If there were no changes in RSSI, MQTT takes the last record and print it, that is why timestamp do not change.
df_important_filtered = df_important.drop_duplicates(['timestamp', 'sourceId'])
# Get info about this data
df_important_filtered.info()
In [14]:
# Print first 10 elements
df_important_filtered.head(10)
Out[14]:
In [5]:
def rssi_to_distance(rssi):
### Declare local variable TxPower
# This is value of rssi @ 1m
TxPower = -65
ratio = rssi * 1.0 / TxPower
# If rssi was 0
if (rssi == 0):
return -1
if (ratio < 1.0):
return ratio**10
else:
dist = (0.89976) * (ratio ** 7.7095) + 0.111
return dist
In [25]:
# Test
rssi_to_distance(-77)
Out[25]:
In [24]:
#df_important_filtered["dist"] = df_important_filtered.apply(lambda row: rssi_to_distance(row['rssi']), axis=1)
df["dist"] = df.apply(lambda row: rssi_to_distance(row['rssi']), axis=1)
#df_important_filtered.head(10)
In [6]:
# Sort by timestamp
df_sorted = df.sort_values(['sourceId','timestamp'], ascending = True)
df_sorted.head(30)
Out[6]:
In [19]:
# How many GTW do I have and how many records they have
df_list = df.groupby('sourceId').count()['rssi']
df_list
Out[19]:
In [25]:
# Split data from GTW to different sources
df_source1 = df[df.sourceId == '8JRGb'].sort_values('timestamp', ascending = True)
df_source2 = df[df.sourceId == '9McaT'].sort_values('timestamp', ascending = True)
df_source3 = df[df.sourceId == 'H3vx9'].sort_values('timestamp', ascending = True)
df_source4 = df[df.sourceId == 'HNenF'].sort_values('timestamp', ascending = True)
df_source5 = df[df.sourceId == 'QQhDc'].sort_values('timestamp', ascending = True)
df_source6 = df[df.sourceId == 'ZN6Xd'].sort_values('timestamp', ascending = True)
df_source7 = df[df.sourceId == 'rNt0R'].sort_values('timestamp', ascending = True)
In [89]:
# Check what is there
df_source3.head(20)
Out[89]:
In [38]:
# List all the timestamps
timestamps = df.drop_duplicates('timestamp')
timestamps = timestamps['timestamp']
timestamps.count()
Out[38]:
In [45]:
sources = [dfsources = [df_source1, df_source2, df_source3, df_source4, df_source5, df_source6, df_source7]
for s in sources:
s.drop(['deviceAddress', 'proximity', 'scanType', 'sourceId', 'trackingId'], axis=1))
result_0 = pd.merge(df_source1, df_source2, how='inner', on = 'timestamp')
result_1 = pd.merge(result_0, df_source3, on = 'timestamp')
result_2 = pd.merge(result_1, df_source4, on = 'timestamp')
result_3 = pd.merge(result_2, df_source5, on = 'timestamp')
result_4 = pd.merge(result_3, df_source6, on = 'timestamp')
result = pd.merge(result_4, df_source7, on = 'timestamp')
result_0.head(20)
# df["dist"] = df.apply(lambda row: rssi_to_distance(row['rssi']), axis=1)
#for tp in timestamps:
# if tp == df_source1['timestamp'].item():
# df_new['8JRGb'] = df_source1['dist']
Out[45]:
In [84]:
sources = [df_source1, df_source2, df_source3, df_source4, df_source5, df_source6, df_source7]
try:
# Drop unnecesary columns
df_source3 = df_source3.drop(['deviceAddress', 'proximity', 'scanType', 'sourceId', 'trackingId'], axis=1)
df_source4 = df_source4.drop(['deviceAddress', 'proximity', 'scanType', 'sourceId', 'trackingId'], axis=1)
df_source5 = df_source5.drop(['deviceAddress', 'proximity', 'scanType', 'sourceId', 'trackingId'], axis=1)
df_source6 = df_source6.drop(['deviceAddress', 'proximity', 'scanType', 'sourceId', 'trackingId'], axis=1)
df_source7 = df_source7.drop(['deviceAddress', 'proximity', 'scanType', 'sourceId', 'trackingId'], axis=1)
df_source1 = df_source1.drop(['deviceAddress', 'proximity', 'scanType', 'sourceId', 'trackingId'], axis=1)
df_source2 = df_source2.drop(['deviceAddress', 'proximity', 'scanType', 'sourceId', 'trackingId'], axis=1)
except ValueError:
#print("Value Error, there might be no labels like this")
finally:
# Merge the DataFrames by the timestamp values
result_0 = pd.merge(df_source1, df_source2, how='inner', on = 'timestamp')
In [85]:
result_0 = pd.merge(df_source1, df_source2, how='inner', on = 'timestamp')
result_0.info()
In [87]:
result_1 = pd.merge(result_0, df_source3, how='inner', on = 'timestamp')
In [88]:
result_1
Out[88]:
In [ ]:
for