In [3]:
import pandas as pd
import numpy as np

In [4]:
from pymongo import MongoClient



def _connect_mongo(host, port, db):
    mongo_str = 'mongodb://%s:%s/%s' % (host, port, db)
    client = MongoClient(mongo_str)
    return client[db]

def read_mongo(db, collection, query={}, host = 'localhost', port = 27017, no_id = True):
    """Read from MongoDB and store into DataFrame"""

    # Connect to MongoDB
    db = _connect_mongo(host = host, port = port, db = db)

    # Query to specific DB and collection
    collection = db[collection].find(query)

    # Create DataFrame
    global df
    df = pd.DataFrame(list(collection))

    # Delete the _id
    if no_id:
        del df['_id']

read_mongo('workshopdb','zapis1',{}, '192.168.84.17')

In [3]:
df.head(10)


Out[3]:
deviceAddress proximity rssi scanType sourceId timestamp trackingId
0 e2:02:00:04:d7:40 FAR -103 BLE rNt0R 1505214866 mabs
1 e2:02:00:04:d7:40 NEAR -74 BLE HNenF 1505214865 mabs
2 e2:02:00:04:d7:40 FAR -103 BLE rNt0R 1505214866 mabs
3 e2:02:00:04:d7:40 FAR -103 BLE rNt0R 1505214866 mabs
4 e2:02:00:04:d7:40 FAR -96 BLE rNt0R 1505214873 mabs
5 e2:02:00:04:d7:40 NEAR -73 BLE HNenF 1505214873 mabs
6 e2:02:00:04:d7:40 FAR -96 BLE ZN6Xd 1505214872 mabs
7 e2:02:00:04:d7:40 FAR -88 BLE 9McaT 1505214873 mabs
8 e2:02:00:04:d7:40 FAR -96 BLE rNt0R 1505214873 mabs
9 e2:02:00:04:d7:40 NEAR -73 BLE HNenF 1505214873 mabs

In [4]:
# Create new df with only timestamp, sourceId and rssi
df_important = df[['timestamp','sourceId','rssi']].copy()

# Print some info about this df
df_important.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9779 entries, 0 to 9778
Data columns (total 3 columns):
timestamp    9779 non-null int64
sourceId     9779 non-null object
rssi         9779 non-null int64
dtypes: int64(2), object(1)
memory usage: 191.0+ KB

In [9]:
print("Number of rows: " + str(df_important.shape[0]))
print("Number of columns: " + str(df_important.shape[1]))


Number of rows: 9779
Number of columns: 3

In [13]:
# Delete duplicates of records where is same timestamp and sourceId
# Gateways send data to the cloud only if the RSSI changes. 
# If there were no changes in RSSI, MQTT takes the last record and print it, that is why timestamp do not change.
df_important_filtered = df_important.drop_duplicates(['timestamp', 'sourceId'])

# Get info about this data
df_important_filtered.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 1778 entries, 0 to 9776
Data columns (total 3 columns):
timestamp    1778 non-null int64
sourceId     1778 non-null object
rssi         1778 non-null int64
dtypes: int64(2), object(1)
memory usage: 48.6+ KB

In [14]:
# Print first 10 elements
df_important_filtered.head(10)


Out[14]:
timestamp sourceId rssi
0 1505214866 rNt0R -103
1 1505214865 HNenF -74
4 1505214873 rNt0R -96
5 1505214873 HNenF -73
6 1505214872 ZN6Xd -96
7 1505214873 9McaT -88
25 1505214880 8JRGb -84
29 1505214882 9McaT -103
36 1505214887 QQhDc -89
41 1505214894 9McaT -93

In [5]:
def rssi_to_distance(rssi):

    ### Declare local variable TxPower
    # This is value of rssi @ 1m
    TxPower = -65
    ratio = rssi * 1.0 / TxPower
    
    # If rssi was 0
    if (rssi == 0):
      return -1
    
    if (ratio < 1.0):
        return ratio**10
    else:
      dist = (0.89976) * (ratio ** 7.7095) + 0.111
    return dist

In [25]:
# Test
rssi_to_distance(-77)


Out[25]:
3.432784121672608

In [24]:
#df_important_filtered["dist"] = df_important_filtered.apply(lambda row: rssi_to_distance(row['rssi']), axis=1)
df["dist"] = df.apply(lambda row: rssi_to_distance(row['rssi']), axis=1)
#df_important_filtered.head(10)

In [6]:
# Sort by timestamp
df_sorted = df.sort_values(['sourceId','timestamp'], ascending = True)

df_sorted.head(30)


Out[6]:
deviceAddress proximity rssi scanType sourceId timestamp trackingId
25 e2:02:00:04:d7:40 FAR -84 BLE 8JRGb 1505214880 mabs
26 e2:02:00:04:d7:40 FAR -84 BLE 8JRGb 1505214880 mabs
27 e2:02:00:04:d7:40 FAR -84 BLE 8JRGb 1505214880 mabs
28 e2:02:00:04:d7:40 FAR -84 BLE 8JRGb 1505214880 mabs
30 e2:02:00:04:d7:40 FAR -84 BLE 8JRGb 1505214880 mabs
32 e2:02:00:04:d7:40 FAR -84 BLE 8JRGb 1505214880 mabs
42 e2:02:00:04:d7:40 FAR -83 BLE 8JRGb 1505214896 mabs
44 e2:02:00:04:d7:40 FAR -83 BLE 8JRGb 1505214896 mabs
46 e2:02:00:04:d7:40 FAR -83 BLE 8JRGb 1505214896 mabs
48 e2:02:00:04:d7:40 FAR -83 BLE 8JRGb 1505214896 mabs

In [19]:
# How many GTW do I have and how many records they have
df_list = df.groupby('sourceId').count()['rssi']
df_list


Out[19]:
sourceId
8JRGb    1487
9McaT    1514
H3vx9    1713
HNenF    1357
QQhDc    1797
ZN6Xd    1336
rNt0R     575
Name: rssi, dtype: int64

In [25]:
# Split data from GTW to different sources
df_source1 = df[df.sourceId == '8JRGb'].sort_values('timestamp', ascending = True)
df_source2 = df[df.sourceId == '9McaT'].sort_values('timestamp', ascending = True)
df_source3 = df[df.sourceId == 'H3vx9'].sort_values('timestamp', ascending = True)
df_source4 = df[df.sourceId == 'HNenF'].sort_values('timestamp', ascending = True)
df_source5 = df[df.sourceId == 'QQhDc'].sort_values('timestamp', ascending = True)
df_source6 = df[df.sourceId == 'ZN6Xd'].sort_values('timestamp', ascending = True)
df_source7 = df[df.sourceId == 'rNt0R'].sort_values('timestamp', ascending = True)

In [89]:
# Check what is there
df_source3.head(20)


Out[89]:
rssi timestamp dist
64 -81 1505214909 5.019360
66 -81 1505214909 5.019360
68 -81 1505214909 5.019360
70 -81 1505214909 5.019360
75 -85 1505214915 7.228482
78 -85 1505214915 7.228482
81 -85 1505214915 7.228482
84 -85 1505214915 7.228482
87 -85 1505214915 7.228482
103 -85 1505214922 7.228482
101 -85 1505214922 7.228482
99 -85 1505214922 7.228482
97 -85 1505214922 7.228482
95 -85 1505214922 7.228482
92 -85 1505214922 7.228482
111 -80 1505214937 4.571088
115 -80 1505214937 4.571088
119 -80 1505214937 4.571088
123 -80 1505214937 4.571088
127 -80 1505214937 4.571088

In [38]:
# List all the timestamps
timestamps = df.drop_duplicates('timestamp')
timestamps = timestamps['timestamp']
timestamps.count()


Out[38]:
1469

In [45]:
sources = [dfsources = [df_source1, df_source2, df_source3, df_source4, df_source5, df_source6, df_source7]

for s in sources:
    s.drop(['deviceAddress', 'proximity', 'scanType', 'sourceId', 'trackingId'], axis=1))
result_0 = pd.merge(df_source1, df_source2, how='inner', on = 'timestamp')
result_1 = pd.merge(result_0, df_source3, on = 'timestamp')
result_2 = pd.merge(result_1, df_source4, on = 'timestamp')
result_3 = pd.merge(result_2, df_source5, on = 'timestamp')
result_4 = pd.merge(result_3, df_source6, on = 'timestamp')
result = pd.merge(result_4, df_source7, on = 'timestamp')

result_0.head(20)

# df["dist"] = df.apply(lambda row: rssi_to_distance(row['rssi']), axis=1)

#for tp in timestamps:
#    if tp == df_source1['timestamp'].item():
#        df_new['8JRGb'] =  df_source1['dist']


Out[45]:
deviceAddress_x proximity_x rssi_x scanType_x sourceId_x timestamp trackingId_x dist_x deviceAddress_y proximity_y rssi_y scanType_y sourceId_y trackingId_y dist_y
0 e2:02:00:04:d7:40 NEAR -66 BLE 8JRGb 1505215002 mabs 1.123151 e2:02:00:04:d7:40 FAR -95 BLE 9McaT mabs 16.888656
1 e2:02:00:04:d7:40 NEAR -66 BLE 8JRGb 1505215002 mabs 1.123151 e2:02:00:04:d7:40 FAR -95 BLE 9McaT mabs 16.888656
2 e2:02:00:04:d7:40 NEAR -66 BLE 8JRGb 1505215002 mabs 1.123151 e2:02:00:04:d7:40 FAR -95 BLE 9McaT mabs 16.888656
3 e2:02:00:04:d7:40 NEAR -66 BLE 8JRGb 1505215002 mabs 1.123151 e2:02:00:04:d7:40 FAR -95 BLE 9McaT mabs 16.888656
4 e2:02:00:04:d7:40 NEAR -66 BLE 8JRGb 1505215002 mabs 1.123151 e2:02:00:04:d7:40 FAR -95 BLE 9McaT mabs 16.888656
5 e2:02:00:04:d7:40 NEAR -66 BLE 8JRGb 1505215002 mabs 1.123151 e2:02:00:04:d7:40 FAR -95 BLE 9McaT mabs 16.888656
6 e2:02:00:04:d7:40 NEAR -66 BLE 8JRGb 1505215002 mabs 1.123151 e2:02:00:04:d7:40 FAR -95 BLE 9McaT mabs 16.888656
7 e2:02:00:04:d7:40 NEAR -66 BLE 8JRGb 1505215002 mabs 1.123151 e2:02:00:04:d7:40 FAR -95 BLE 9McaT mabs 16.888656
8 e2:02:00:04:d7:40 NEAR -66 BLE 8JRGb 1505215002 mabs 1.123151 e2:02:00:04:d7:40 FAR -95 BLE 9McaT mabs 16.888656
9 e2:02:00:04:d7:40 NEAR -66 BLE 8JRGb 1505215002 mabs 1.123151 e2:02:00:04:d7:40 FAR -95 BLE 9McaT mabs 16.888656
10 e2:02:00:04:d7:40 NEAR -66 BLE 8JRGb 1505215002 mabs 1.123151 e2:02:00:04:d7:40 FAR -95 BLE 9McaT mabs 16.888656
11 e2:02:00:04:d7:40 NEAR -66 BLE 8JRGb 1505215002 mabs 1.123151 e2:02:00:04:d7:40 FAR -95 BLE 9McaT mabs 16.888656
12 e2:02:00:04:d7:40 NEAR -66 BLE 8JRGb 1505215002 mabs 1.123151 e2:02:00:04:d7:40 FAR -95 BLE 9McaT mabs 16.888656
13 e2:02:00:04:d7:40 NEAR -66 BLE 8JRGb 1505215002 mabs 1.123151 e2:02:00:04:d7:40 FAR -95 BLE 9McaT mabs 16.888656
14 e2:02:00:04:d7:40 NEAR -66 BLE 8JRGb 1505215002 mabs 1.123151 e2:02:00:04:d7:40 FAR -95 BLE 9McaT mabs 16.888656
15 e2:02:00:04:d7:40 FAR -83 BLE 8JRGb 1505215027 mabs 6.034845 e2:02:00:04:d7:40 FAR -76 BLE 9McaT mabs 3.114333
16 e2:02:00:04:d7:40 FAR -83 BLE 8JRGb 1505215027 mabs 6.034845 e2:02:00:04:d7:40 FAR -76 BLE 9McaT mabs 3.114333
17 e2:02:00:04:d7:40 FAR -83 BLE 8JRGb 1505215027 mabs 6.034845 e2:02:00:04:d7:40 FAR -76 BLE 9McaT mabs 3.114333
18 e2:02:00:04:d7:40 FAR -83 BLE 8JRGb 1505215027 mabs 6.034845 e2:02:00:04:d7:40 FAR -76 BLE 9McaT mabs 3.114333
19 e2:02:00:04:d7:40 FAR -83 BLE 8JRGb 1505215027 mabs 6.034845 e2:02:00:04:d7:40 FAR -76 BLE 9McaT mabs 3.114333

In [84]:
sources = [df_source1, df_source2, df_source3, df_source4, df_source5, df_source6, df_source7]

try:
    # Drop unnecesary columns
    df_source3 = df_source3.drop(['deviceAddress', 'proximity', 'scanType', 'sourceId', 'trackingId'], axis=1)
    df_source4 = df_source4.drop(['deviceAddress', 'proximity', 'scanType', 'sourceId', 'trackingId'], axis=1)
    df_source5 = df_source5.drop(['deviceAddress', 'proximity', 'scanType', 'sourceId', 'trackingId'], axis=1)
    df_source6 = df_source6.drop(['deviceAddress', 'proximity', 'scanType', 'sourceId', 'trackingId'], axis=1)
    df_source7 = df_source7.drop(['deviceAddress', 'proximity', 'scanType', 'sourceId', 'trackingId'], axis=1)
    df_source1 = df_source1.drop(['deviceAddress', 'proximity', 'scanType', 'sourceId', 'trackingId'], axis=1)
    df_source2 = df_source2.drop(['deviceAddress', 'proximity', 'scanType', 'sourceId', 'trackingId'], axis=1)
except ValueError:
    #print("Value Error, there might be no labels like this")

finally:
    # Merge the DataFrames by the timestamp values
    result_0 = pd.merge(df_source1, df_source2, how='inner', on = 'timestamp')


  File "<ipython-input-84-4dd9e021afe5>", line 15
    finally:
          ^
IndentationError: expected an indented block

In [85]:
result_0 = pd.merge(df_source1, df_source2, how='inner', on = 'timestamp')
result_0.info()

In [87]:
result_1 = pd.merge(result_0, df_source3, how='inner', on = 'timestamp')

In [88]:
result_1


Out[88]:
rssi_x timestamp dist_x rssi_y dist_y rssi dist
0 -83 1505215027 6.034845 -76 3.114333 -79 4.158876
1 -83 1505215027 6.034845 -76 3.114333 -79 4.158876
2 -83 1505215027 6.034845 -76 3.114333 -79 4.158876
3 -83 1505215027 6.034845 -76 3.114333 -79 4.158876
4 -83 1505215027 6.034845 -76 3.114333 -79 4.158876
5 -83 1505215027 6.034845 -76 3.114333 -79 4.158876
6 -83 1505215027 6.034845 -76 3.114333 -79 4.158876
7 -83 1505215027 6.034845 -76 3.114333 -79 4.158876
8 -83 1505215027 6.034845 -76 3.114333 -79 4.158876
9 -83 1505215027 6.034845 -76 3.114333 -79 4.158876
10 -83 1505215027 6.034845 -76 3.114333 -79 4.158876
11 -83 1505215027 6.034845 -76 3.114333 -79 4.158876
12 -83 1505215027 6.034845 -76 3.114333 -79 4.158876
13 -83 1505215027 6.034845 -76 3.114333 -79 4.158876
14 -83 1505215027 6.034845 -76 3.114333 -79 4.158876
15 -83 1505215027 6.034845 -76 3.114333 -79 4.158876
16 -83 1505215027 6.034845 -76 3.114333 -79 4.158876
17 -83 1505215027 6.034845 -76 3.114333 -79 4.158876
18 -83 1505215027 6.034845 -76 3.114333 -79 4.158876
19 -83 1505215027 6.034845 -76 3.114333 -79 4.158876
20 -83 1505215027 6.034845 -76 3.114333 -79 4.158876
21 -83 1505215027 6.034845 -76 3.114333 -79 4.158876
22 -83 1505215027 6.034845 -76 3.114333 -79 4.158876
23 -83 1505215027 6.034845 -76 3.114333 -79 4.158876
24 -83 1505215027 6.034845 -76 3.114333 -79 4.158876
25 -83 1505215027 6.034845 -76 3.114333 -79 4.158876
26 -83 1505215027 6.034845 -76 3.114333 -79 4.158876
27 -83 1505215027 6.034845 -76 3.114333 -79 4.158876
28 -83 1505215027 6.034845 -76 3.114333 -79 4.158876
29 -83 1505215027 6.034845 -76 3.114333 -79 4.158876
... ... ... ... ... ... ... ...
270 -75 1505216635 2.822789 -93 14.350359 -77 3.432784
271 -75 1505216635 2.822789 -93 14.350359 -77 3.432784
272 -75 1505216635 2.822789 -93 14.350359 -77 3.432784
273 -75 1505216635 2.822789 -93 14.350359 -77 3.432784
274 -75 1505216635 2.822789 -93 14.350359 -77 3.432784
275 -75 1505216635 2.822789 -93 14.350359 -77 3.432784
276 -75 1505216635 2.822789 -93 14.350359 -77 3.432784
277 -75 1505216635 2.822789 -93 14.350359 -77 3.432784
278 -75 1505216635 2.822789 -93 14.350359 -77 3.432784
279 -75 1505216635 2.822789 -93 14.350359 -77 3.432784
280 -75 1505216635 2.822789 -93 14.350359 -77 3.432784
281 -75 1505216635 2.822789 -93 14.350359 -77 3.432784
282 -75 1505216635 2.822789 -93 14.350359 -77 3.432784
283 -75 1505216635 2.822789 -93 14.350359 -77 3.432784
284 -75 1505216635 2.822789 -93 14.350359 -77 3.432784
285 -75 1505216635 2.822789 -93 14.350359 -77 3.432784
286 -75 1505216635 2.822789 -93 14.350359 -77 3.432784
287 -75 1505216635 2.822789 -93 14.350359 -77 3.432784
288 -75 1505216635 2.822789 -93 14.350359 -77 3.432784
289 -75 1505216635 2.822789 -93 14.350359 -77 3.432784
290 -75 1505216635 2.822789 -93 14.350359 -77 3.432784
291 -75 1505216635 2.822789 -93 14.350359 -77 3.432784
292 -75 1505216635 2.822789 -93 14.350359 -77 3.432784
293 -75 1505216635 2.822789 -93 14.350359 -77 3.432784
294 -75 1505216635 2.822789 -93 14.350359 -77 3.432784
295 -75 1505216635 2.822789 -93 14.350359 -77 3.432784
296 -75 1505216635 2.822789 -93 14.350359 -77 3.432784
297 -75 1505216635 2.822789 -93 14.350359 -77 3.432784
298 -75 1505216635 2.822789 -93 14.350359 -77 3.432784
299 -75 1505216635 2.822789 -93 14.350359 -77 3.432784

300 rows × 7 columns


In [ ]:
for