In [148]:

    
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from datetime import datetime

%matplotlib inline



In [49]:

    
# Pre-declare the data columns
data_columns = ['tracker_id', 'dB', 'year', 'month', 'day', 'hour', 'minute', 'second', 'distance']



In [50]:

    
# Function to convert the data's date columns into a single datetime object.
def to_datetime(x):
    """
    Parameters
    ==========
    - x     : a row in the dataframe of data.
    
    Returns
    =======
    - datetime version of the date.
    """
    return datetime(x['year'], x['month'], x['day'], x['hour'], x['minute'], x['second'])

def read_data(handle):
    """
    Parameters
    ==========
    - handle    : path to the data
    
    Returns
    =======
    - df        : pandas dataframe
    """
    df = pd.read_csv(handle)
    # df.columns = data_columns
    df['date'] = df.apply(lambda x:to_datetime(x), axis=1)
    df.set_index('date', inplace=True)
    
    return df



In [51]:

    
# Read in the data, set the date
southern = read_data('southern_pi.csv')
origin = read_data('origin_pi.csv')
eastern = read_data('eastern_pi.csv')



In [93]:

    
origin.dropna().head()









    Out[93]:






  
    
      
      tracker_id
      dB
      year
      month
      day
      hour
      minute
      second
      distance
    
    
      date
      
      
      
      
      
      
      
      
      
    
  
  
    
      2015-12-21 14:42:06
      68:9E:19:11:A6:DB
      -68
      2015
      12
      21
      14
      42
      6
      0
    
    
      2015-12-21 14:42:16
      68:9E:19:11:A6:DB
      -65
      2015
      12
      21
      14
      42
      16
      0
    
    
      2015-12-21 14:42:26
      68:9E:19:11:A6:DB
      -71
      2015
      12
      21
      14
      42
      26
      0
    
    
      2015-12-21 14:42:36
      68:9E:19:11:A6:DB
      -63
      2015
      12
      21
      14
      42
      36
      0
    
    
      2015-12-21 14:42:47
      68:9E:19:11:A6:DB
      -68
      2015
      12
      21
      14
      42
      47
      0

Experiment Details

I will first analyze the data where each of the Pis were placed at origin.

This experiment's data was collected at 2:42 pm, and ended at 3:21 pm.

In this experiment, Nichola and Jon moved from 0 ft to 48 ft away from origin.



In [71]:

    
# Let's plot the data for each of the devices, from each of the base stations.

dfs = [(origin, 'origin'), (eastern, 'eastern'), (southern, 'southern')]

def plot_signal_vs_distance(device):
    fig = plt.figure(figsize=(9,3))
    ax1 = fig.add_subplot(131)
    ax2 = fig.add_subplot(132)
    ax3 = fig.add_subplot(133)
    axes = [ax1, ax2, ax3]
    for i, (df, title) in enumerate(dfs):
        df.dropna().query('tracker_id=="{0}"'.format(device)).plot(x='distance', y='dB', kind='scatter', ax=axes[i])
        axes[i].set_title(title)
    plt.tight_layout()

device = '68:9E:19:11:A6:DB'
plot_signal_vs_distance(device)



In [73]:

    
device = 'F4:B8:5E:C4:56:22'
plot_signal_vs_distance(device)

Preliminary Conclusions

As we can see here, the data for device 68:9E:19:11:A6:DB was much better than the device F4:B8:5E:C4:56:22. There seems to be a stronger correlation with distance on device 68 than device F4. I will attempt Bayesian modelling to quantify the uncertainty around each distance measurement for device 68.



In [160]:

    
# In order to have more data put together, I will concatenate the 3 data frames.
device = '68:9E:19:11:A6:DB'
# device = 'F4:B8:5E:C4:56:22'

all_data = origin.dropna().append(eastern.dropna()).append(southern.dropna()).query('tracker_id=="{0}"'.format(device))
all_data.plot(x='distance', y='dB', kind='scatter', title='{0} data points'.format(len(all_data)))









    Out[160]:





<matplotlib.axes._subplots.AxesSubplot at 0x132bcda20>



In [161]:

    
sns.violinplot(x=all_data['distance'], y=all_data['dB'])









    



/Users/ericmjl/anaconda/lib/python3.4/site-packages/matplotlib/__init__.py:892: UserWarning: axes.color_cycle is deprecated and replaced with axes.prop_cycle; please use the latter.
  warnings.warn(self.msg_depr % (key, alt_key))






    Out[161]:





<matplotlib.axes._subplots.AxesSubplot at 0x132fba668>

Looking at the distribution of data points, it looks like it will be difficult for us to resolve distances less than 24 ft.

I think a second experiment is in order.



In [181]:

    
import pymc3 as pm

with pm.Model() as model:
    # Hyperparameters for A and B
    # mu_a = pm.Normal('mu_A', mu=0, sd=10)
    # sig_a = pm.Uniform('sigma_A', lower=0, upper=10)
    
    # mu_b = pm.Normal('mu_B', mu=0, sd=10)
    # sig_b = pm.Uniform('sigma_B', lower=0, upper=10)
    
    # Estimate the intercepts
    A = pm.Normal('A', mu=-0.2, sd=3)
    B = pm.Normal('B', mu=-50, sd=3)
    
    # Error term
    err = pm.Normal('error', mu=-30, sd=10)
    
    # Model prediction:
    dB = A * all_data['distance'] + B + err
    
    # dB likelihood
    db_like = pm.Normal('dB', mu=dB, sd=10, observed=all_data['dB'])



In [182]:

    
with model:
    start = pm.find_MAP()
    step = pm.NUTS()
    trace = pm.sample(10000, step)









    



 [-----------------100%-----------------] 10000 of 10000 complete in 40.7 sec



In [183]:

    
pm.traceplot(trace)









    Out[183]:





array([[<matplotlib.axes._subplots.AxesSubplot object at 0x13950e860>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x139557470>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x139590ba8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1395e04e0>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x13970b8d0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x13975f470>]], dtype=object)



In [184]:

    
pm.summary(trace)









    



A:
 
  Mean             SD               MC Error         95% HPD interval
  -------------------------------------------------------------------
  
  -0.257           0.027            0.000            [-0.309, -0.205]

  Posterior quantiles:
  2.5            25             50             75             97.5
  |--------------|==============|==============|--------------|
  
  -0.309         -0.275         -0.257         -0.240         -0.204


B:
 
  Mean             SD               MC Error         95% HPD interval
  -------------------------------------------------------------------
  
  -49.898          2.780            0.071            [-55.325, -44.295]

  Posterior quantiles:
  2.5            25             50             75             97.5
  |--------------|==============|==============|--------------|
  
  -55.449        -51.772        -49.835        -48.043        -44.328


error:
 
  Mean             SD               MC Error         95% HPD interval
  -------------------------------------------------------------------
  
  -29.506          2.847            0.071            [-35.242, -23.981]

  Posterior quantiles:
  2.5            25             50             75             97.5
  |--------------|==============|==============|--------------|
  
  -35.086        -31.378        -29.521        -27.600        -23.802



In [ ]:

	tracker_id	dB	year	month	day	hour	minute	second	distance
date
2015-12-21 14:42:06	68:9E:19:11:A6:DB	-68	2015	12	21	14	42	6	0
2015-12-21 14:42:16	68:9E:19:11:A6:DB	-65	2015	12	21	14	42	16	0
2015-12-21 14:42:26	68:9E:19:11:A6:DB	-71	2015	12	21	14	42	26	0
2015-12-21 14:42:36	68:9E:19:11:A6:DB	-63	2015	12	21	14	42	36	0
2015-12-21 14:42:47	68:9E:19:11:A6:DB	-68	2015	12	21	14	42	47	0