In [5]:
import pandas as pd
import matplotlib.pylab as plt
import os
import numpy as np
import sys

import math
from datetime import datetime
from glob import glob
from datetime import timedelta
plt.style.use('ggplot')
from mpl_toolkits.basemap import Basemap
from igrf12py.igrf12fun import runigrf12, plotigrf 
%matplotlib inline
import requests
import os
import utils
import autoreload
from scipy.optimize import fmin
from scipy import signal

In [6]:
# relative path to data dir
drifter_data_dir = 'data'
# I'm setting pretty conservative start/stop times based on plots of the data
base_drifter_dict = {
            'bashful':{'type':'HMR', 'id':68740, 'launch':'2012-09-05 00:00:00', 'end':'2013-02-15 00:00:00'}, 
            'grumpy': {'type':'HMR', 'id':11070, 'launch':'2012-08-23 00:00:00', 'end':'2013-01-15 00:00:00'}, 
            'dopey':  {'type':'APS', 'id':68760, 'bias':[-40., -125., -26.],
                       'launch':'2012-09-15 00:00:00', 'end':'2013-01-01 00:00:00'}, 
             # sleepy looks like the z measurement is dominating the signal 
            'sleepy': {'type':'APS', 'id':19370, 
                       'launch':'2012-09-01 00:00:00', 'end':'2013-06-01 00:00:00'}, 
            'sneezy': {'type':'APS', 'id':66760, 
                       'launch':'2012-08-26 00:00:00', 'end':'2012-10-12 00:00:00'}, 
            }

In [7]:
# load the txt files that we've created
reload(utils)
#drifter_dict = utils.parse_raw_files(drifter_data_dir, base_drifter_dict)
drifter_dict = utils.parse_txt_files(drifter_data_dir, base_drifter_dict)


Loading sneezy meas, cal, and list data files
Loading sleepy meas, cal, and list data files
Loading grumpy meas, cal, and list data files
Loading bashful meas, cal, and list data files
Loading dopey meas, cal, and list data files

In [81]:
# build a map of all of the data files
# reference buoys are denoted by circles
plt.figure(figsize=(20,12))
m = Basemap(projection='merc', ellps='WGS84',
           llcrnrlat=39, 
           urcrnrlat=56, 
           llcrnrlon=-133,
           urcrnrlon=-122,
           resolution='l')

cc = ['lime', 'orangered', 'violet', 'cornflowerblue', 'magenta']
for nn, drifter in enumerate(drifter_dict.keys()):
    md = drifter_dict[drifter]['cal']
    x, y = m(np.array(md['lon']), np.array(md['lat']))
    m.scatter(x, y, c=cc[nn], edgecolor="None", s=2, label=drifter)

  
for bid in utils.buoy_list.keys():    
    latb, lonb = utils.buoy_list[bid]
    xb, yb = m(np.array(lonb), np.array(latb))
    m.scatter(xb, yb, s=35, c='navy', marker='o')

m.drawcoastlines()
plt.title("cal lat/lon")
plt.legend()
plt.show()


# build a map of all of the data files
# reference buoys are denoted by circles
plt.figure(figsize=(20,12))
m = Basemap(projection='merc', ellps='WGS84',
           llcrnrlat=39, 
           urcrnrlat=56, 
           llcrnrlon=-133,
           urcrnrlon=-122,
           resolution='l')

cc = ['lime', 'orangered', 'violet', 'cornflowerblue', 'magenta']
for nn, drifter in enumerate(drifter_dict.keys()):
    md = drifter_dict[drifter]['meas']
    x, y = m(np.array(md['lon']), np.array(md['lat']))
    m.scatter(x, y, c=cc[nn], edgecolor="None", s=4, label=drifter)

  
for bid in utils.buoy_list.keys():    
    latb, lonb = utils.buoy_list[bid]
    xb, yb = m(np.array(lonb), np.array(latb))
    m.scatter(xb, yb, s=35, c='navy', marker='o')

m.drawcoastlines()
plt.title("Meas lat/lon")
plt.legend()
plt.show()



# why is meas lat/lon so much better? were these values also being averaged?


plt.figure(figsize=(20,12))
m = Basemap(projection='merc', ellps='WGS84',
           llcrnrlat=39, 
           urcrnrlat=56, 
           llcrnrlon=-133,
           urcrnrlon=-122,
           resolution='l')

cc = ['lime', 'orangered', 'violet', 'cornflowerblue', 'magenta']
for nn, drifter in enumerate(drifter_dict.keys()):
    md = drifter_dict[drifter]['cal']
    md = md.groupby('cal_start_datetime').mean().sort()
  
    x, y = m(np.array(md['lon']), np.array(md['lat']))
    m.scatter(x, y, c=cc[nn], edgecolor="None", s=4, label=drifter)

  
for bid in utils.buoy_list.keys():    
    latb, lonb = utils.buoy_list[bid]
    xb, yb = m(np.array(lonb), np.array(latb))
    m.scatter(xb, yb, s=35, c='navy', marker='o')

m.drawcoastlines()
plt.title("average cal lat/lon")
plt.legend()
plt.show()



In [ ]:
# need to remove bad calibration lat/lon

In [47]:
# load drifter calibration routine
dn = 'sleepy'
num = 251
sn = drifter_dict[dn]['cal']
sn = sn.drop_duplicates(take_last=True)
# the last few samples seem to be bad, remove these from the system
sn = sn.loc[sn['num']<num]
# for this dataset, only look at data that we have found WVHT data for
wv = sn.loc[sn['WVHT'] > 0]
# 99 is used as an invalid marker in the buoy dataset
wv = wv.loc[wv['WVHT'] < 99]
plt.title("Wave Height")
wv['WVHT'].plot()


Out[47]:
<matplotlib.axes._subplots.AxesSubplot at 0x10e034090>

In [49]:
# the data below suggests that we see much less variance in the measurements on days in which 
# the wave height was low. 
# set the variable to compare here
column = 'f'
# set maximum wave height to consider low height
low_height = 1
# set minimum height to consider
high_height = 3

plt.figure()
plt.title(column)


high = wv.loc[wv['WVHT'] > high_height]
high = high.loc[high.index.unique()]
sample_start_times_high = high['cal_start_datetime'].unique()
high_waves = []
vsh = 0
wave_h = 0

for ss in sample_start_times_high:
    # look at the sample_times from this calibration start time
    hl = high.loc[high['cal_start_datetime'] == ss]
    h = list(hl[column])
    # only use lists that have the full calibration routines
    if  len(h) == num-1:
        vh = round(np.var(h),1)
        vsh += vh
        rr = plt.plot(h-np.mean(h), 'teal')
        high_waves.append(h)
        wave_h += np.mean(hl['WVHT'])
if len(sample_start_times_high):        
    vsh = int((vsh/len(high_waves)))
    wave_h = wave_h/len(high_waves)
    ###################################################
    print('teal - high waves (avg %s m) had variance in %s measured of %s' %(round(wave_h,1), column, vsh))
else:
    print("no samples found where wave height was above %s m" %high_height)
    
    
# select only wave height under low_height
low = wv.loc[wv['WVHT'] < low_height]
# the dataframe is indexed by sample time, but we want a list of all samples in this cal routine, 
# so isolate based on cal_start time
sample_start_times_low = low['cal_start_datetime'].unique()
low_waves = []
# keep track of sum of variances
vsl = 0
wave_l = 0
for ss in sample_start_times_low:
    # look at the sample_times from this calibration start time
    ll = low.loc[low['cal_start_datetime'] == ss]
    l = list(ll[column])
    # only use lists that have the full calibration routines
    if  len(l) == num-1:
        vl = round(np.var(l),1)
        bb = plt.plot(l-np.mean(l), 'orange')
        vsl += vl
        wave_l += np.mean(ll['WVHT'])
        low_waves.append(l)
if len(sample_start_times_low):        
    vsl = int((vsl/len(low_waves)))
    wave_l = wave_l/len(low_waves)
    ###################################################
    print('orange - low waves (avg %s m) had variance in %s measured of %s' %(round(wave_l,1), column, vsl))
else:
    print("no samples found where wave height was below %m" %low_hight)


teal - high waves (avg 3.9 m) had variance in f measured of 349695375
orange - low waves (avg 0.8 m) had variance in f measured of 160768378

In [154]:
plt.plot(low_waves[0])


Out[154]:
[<matplotlib.lines.Line2D at 0x122e32810>]

In [50]:
# look at the specgram to determine if there is any structured noise
NFFT = 32
overlap = 30
Fs=1
if len(low_waves):
    # look at time series of each calibration that occurred during a low wave period
    xl = np.mean(np.vstack(low_waves), axis=0)
    xnl = xl-np.mean(xl)
    plt.figure()
    plt.title("low wave calibrations averaged together")
    plt.plot(xnl)
    plt.figure()
    Pxxl, freqsl, binsl, iml = plt.specgram(xnl, Fs=Fs, NFFT=NFFT, noverlap=overlap)
    
    # filter low waves
    # data looks just as bad throughout the cal cycle - 
    # electronics temperature is much warmer than air or water temp and increases over the calibration period
    # electronic temp increases about 5' each cal cycle
    plt.figure()
    lw = np.array(low_waves[0])
    plt.title('Example calibration cycle of low wave')
    # filter noise from low waves
    plt.plot(lw, 'b', label='raw signal')
    my = signal.medfilt(lw, kernel_size=23)
    plt.plot(my, 'g', label='filtered')
    plt.legend()
    guess = my.mean()/1000
    ln = sample_start_times_low[0]
    ll = low.loc[low['cal_start_datetime'] == ln]
    model = ll['igrff'].mean()/1000
    print('wave height mean %s' %(round(ll['WVHT'].mean(),2)))
    print('calc f:%s model f:%s error:%s' %(round(guess, 2), round(model,2), round(abs(guess-model), 2)))
    la = ll.resample('D')
    la.head()
    plt.show()

    ll.rename(columns={'temp':'drifter temp'}, inplace=True)
    ll[['drifter temp', 'ATMP', 'WTMP']].plot(title='temperature over calibration sample')
    la.head()


wave height mean 0.5
calc f:129.75 model f:53.33 error:76.42
/Users/jhansen/anaconda/lib/python2.7/site-packages/pandas/core/frame.py:2524: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  **kwargs)

In [51]:
# there is a ton more high frequency content in the data taken during 
# periods of high waves
if len(high_waves):
    xh = np.mean(np.vstack(high_waves), axis=0)
    xnh = xh-np.mean(xh)
    plt.figure()
    plt.title("high wave calibrations averaged together")
    plt.plot(xnh)
    plt.figure()
    Pxxh, freqsh, binsh, imh = plt.specgram(xnh, Fs=1, NFFT=NFFT, noverlap=overlap)
    
    # filter noise from high waves
    xx = 0
    plt.figure()
    plt.title('Example calibration cycle of high wave')
    hw = np.array(high_waves[xx])
    plt.plot(hw, 'b', label='raw signal')
    my = signal.medfilt(hw, kernel_size=5)
    plt.plot(my, 'g', label='filtered')
    plt.legend()
    guess = my.mean()/1000
    hn = sample_start_times_high[xx]
    hh = high.loc[high['cal_start_datetime'] == hn]
    model = hh['igrff'].mean()/1000
    print('wave height mean %s' %(round(hh['WVHT'].mean(),2)))
    print('calc f:%s model f:%s error:%s' %(round(guess, 2), round(model,2), round(abs(guess-model), 2)))
    la = hh.resample('D')
    la.head()
    plt.show()
    hh.rename(columns={'temp':'drifter temp'}, inplace=True)
    hh[['drifter temp', 'ATMP', 'WTMP']].plot(title='temperature over calibration sample')


wave height mean 4.56
calc f:125.21 model f:53.01 error:72.2

In [ ]:

It is obvious that the data collected on high (>4m) wave days has much more high frequency content than the data collected during calm days (<1m) . Let's see if we can pull some of the high frequency content out. I looked at a low pass filter in and didn't have much luck. Below, I revert to a simple median filter for smoothing outliers and find pretty good results.


In [146]:
def sensor_minimize(vec, xf, yf, zf, ref):
#def sensor_minimize(vec, x, ref):
    """ 
    Minimize the total field variation of the Drifter magnetometer by varying offsets and calibration factors.  
    """
    #tf1=np.sqrt(((xf-xdc)*xcf)**2+((yf-ydc)*ycf)**2+((zf-zdc)*zcf)**2)
    #tfmn=np.sum(ref-np.mean(tf1));
    tf1=np.sqrt((xf+vec[0])**2+(yf+vec[1])**2+(zf+vec[2])**2)
    beta = 1.0
    # the difference between the reference f and biased f should be close to zero
    tfmn = np.mean(np.abs(ref-tf1))
    #tfdif=np.diff(tf1);
    #s=np.sqrt(sum(tfdif**2))/(len(tf1)+beta*(tfmn**2))
    return tfmn

In [147]:
def find_bias(bias, x, y, z, ref):
    bias = fmin(sensor_minimize, x0=bias, args=(x, y, z, ref))
    ft = utils.to_total_field(x+bias[0], y+bias[1], z+bias[2])
    return bias, ft

def filter_and_bias(df, kernel_size, bias):
    
    x = signal.medfilt(df['x'], kernel_size=kernel_size)
    y = signal.medfilt(df['y'], kernel_size=kernel_size)
    z = signal.medfilt(df['z'], kernel_size=kernel_size)

    
    bias, ft = find_bias(bias, x, y, z, df['igrff'])
    print('found bias of', bias)
    

    df.loc[:,'clean f'] = ft
    error = abs(df['igrff']-df['clean f'])
    df.loc[:,'error'] = error
    
    
    df.loc[:,'clean x'] = x + bias[0]
    df.loc[:,'clean y'] = y + bias[1]
    df.loc[:,'clean z'] = z + bias[2]
    
    return df

In [148]:
def plot_relationships(df):
    """expects data frame that has already been run through add bias"""
    cols = ['bat', 'temp', 'WVHT', 'WTMP', 'ATMP', 'WDIR', 'WSPD', 'GST',
            'DPD', 'APD', 'MWD', 'PRES', 'lat', 'lon']

    df.loc[:,cols+['error']] = df[cols+['error']].apply(lambda x: (x - x.mean()) / (x.max() - x.min()))
    for col in cols:
        # some buoy data isn't available
        ll = df.loc[df[col] != 99]
        ll = ll.loc[ll[col] != 999]
        ll = ll.loc[ll[col] != 9999]
        print(col, ll.shape)
        if ll.shape[0]:
            plt.figure()
            ll[['error', col]].plot(figsize=(18,6))
            plt.show()
    df.plot(kind='scatter', x='num', y='error', edgecolor="None", figsize=(18,6))

In [149]:
# apply all of the things we've learned
dn = 'sleepy'
# when the electronics tmp gets above about 13, the signal tends to get noiser
# temperature rises during the 4 minute calibration routine pretty steadily
max_tmp = 25
max_bat = 11
on = drifter_dict[dn]['cal']
on = on.drop_duplicates(take_last=True)
# the last few samples seem to be bad, remove these from the system
on = on.loc[on['num']<253]
on = utils.get_model_df(on)

In [136]:
#sn = filter_and_bias(sn, median_kernel, bias)
on['bat'].plot()


Out[136]:
<matplotlib.axes._subplots.AxesSubplot at 0x118775ed0>

In [178]:
bias = [45000., 125000., 66000.]
# remove electronic tmps above max_tmp
sn = on.loc[on['temp'] < max_tmp]

sn = sn.loc[sn['bat'] > max_bat]
# kernel size for x,y,z median filter
median_kernel = 11
sn['z'] = -sn['z']
sn = filter_and_bias(sn, median_kernel, bias)
sn[['clean f', 'igrff']].plot(title='Cleaned data vs model data f', figsize=(18,6))
sn[['clean x', 'igrfx']].plot(title='Cleaned data vs model data x', figsize=(18,6))
sn[['clean y', 'igrfy']].plot(title='Cleaned data vs model data y', figsize=(18,6))
sn[['clean z', 'igrfz']].plot(title='Cleaned data vs model data z', figsize=(18,6))
sn[['z', 'igrfz']].plot(title='Cleaned data vs model data z', figsize=(18,6))
sn[['x', 'igrfx']].plot(title='Cleaned data vs model data x', figsize=(18,6))
#TODO
# lay lots of cal error on top of each other
# lay lots of tmp increases
#SUMMARY
# sleepy seems to be really tmp dependent - 
# near the end of life, mag seems to go opposite of expected values on z!!!


Optimization terminated successfully.
         Current function value: 1693.234086
         Iterations: 162
         Function evaluations: 290
('found bias of', array([  30725.41410354,  118677.42989255,   73419.10234188]))
Out[178]:
<matplotlib.axes._subplots.AxesSubplot at 0x12fdae8d0>

In [172]:
plot_relationships(sn)


('bat', (65880, 48))
<matplotlib.figure.Figure at 0x134a7da10>
('temp', (65880, 48))
<matplotlib.figure.Figure at 0x13a2008d0>
('WVHT', (65880, 48))
<matplotlib.figure.Figure at 0x139d13a50>
('WTMP', (65880, 48))
<matplotlib.figure.Figure at 0x139ba2c90>
('ATMP', (65880, 48))
<matplotlib.figure.Figure at 0x134782f90>
('WDIR', (65880, 48))
<matplotlib.figure.Figure at 0x1362aa1d0>
('WSPD', (65880, 48))
<matplotlib.figure.Figure at 0x13553b790>
('GST', (65880, 48))
<matplotlib.figure.Figure at 0x139cceb90>
('DPD', (65880, 48))
<matplotlib.figure.Figure at 0x139a301d0>
('APD', (65880, 48))
<matplotlib.figure.Figure at 0x13bd82950>
('MWD', (65880, 48))
<matplotlib.figure.Figure at 0x139a1e390>
('PRES', (65880, 48))
<matplotlib.figure.Figure at 0x13ab584d0>
('lat', (65880, 48))
<matplotlib.figure.Figure at 0x139be25d0>
('lon', (65880, 48))
<matplotlib.figure.Figure at 0x139f2de50>

In [175]:
sn['z mean'] = sn['z']/sn['z'].mean()
sn['clean z mean'] = sn['clean z']/sn['clean z'].mean()
sn['igrfz mean'] = sn['igrfz']/sn['igrfz'].mean()

sn[['z mean', 'igrff mean', 'clean f mean']].plot(title='Cleaned data vs model data f', figsize=(18,6))

#sn['f mean'] = sn['f']/sn['f'].mean()
#sn['clean f mean'] = sn['clean f']/sn['clean f'].mean()
#sn['igrff mean'] = sn['igrff']/sn['igrff'].mean()
#sn['error mean'] = abs(sn['igrff mean'] - sn['clean f mean'])
#sn['error mean'].plot(figsize=(20,6))
#sn[['f mean',  'clean f mean', 'igrff mean', 'error mean']].plot(linewidth=2, title='Cleaned data vs model data f', figsize=(18,6))


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-175-346a729809b9> in <module>()
      4 sn['igrfz mean'] = sn['igrfz']/sn['igrfz'].mean()
      5 
----> 6 sn[['z mean', 'igrff mean', 'clean f mean']].plot(title='Cleaned data vs model data f', figsize=(18,6))
      7 
      8 #sn['f mean'] = sn['f']/sn['f'].mean()

/Users/jhansen/anaconda/lib/python2.7/site-packages/pandas/core/frame.pyc in __getitem__(self, key)
   1789         if isinstance(key, (Series, np.ndarray, Index, list)):
   1790             # either boolean or fancy integer index
-> 1791             return self._getitem_array(key)
   1792         elif isinstance(key, DataFrame):
   1793             return self._getitem_frame(key)

/Users/jhansen/anaconda/lib/python2.7/site-packages/pandas/core/frame.pyc in _getitem_array(self, key)
   1833             return self.take(indexer, axis=0, convert=False)
   1834         else:
-> 1835             indexer = self.ix._convert_to_indexer(key, axis=1)
   1836             return self.take(indexer, axis=1, convert=True)
   1837 

/Users/jhansen/anaconda/lib/python2.7/site-packages/pandas/core/indexing.pyc in _convert_to_indexer(self, obj, axis, is_setter)
   1110                 mask = check == -1
   1111                 if mask.any():
-> 1112                     raise KeyError('%s not in index' % objarr[mask])
   1113 
   1114                 return _values_from_object(indexer)

KeyError: "['igrff mean' 'clean f mean'] not in index"

In [ ]:
# apply all of the things we've learned
dn = 'sneezy'
# when the electronics tmp gets above about 13, the signal tends to get noiser
# temperature rises during the 4 minute calibration routine pretty steadily

max_tmp = 20
on = drifter_dict[dn]['cal']
on = on.drop_duplicates(take_last=True)
# the last few samples seem to be bad, remove these from the system
on = on.loc[on['num']<253]
on = utils.get_model_df(on)
bias = [-40000., -125000., 26000.]
# remove electronic tmps above max_tmp
sn = on.loc[on['temp'] < max_tmp]
# the ends of sneezy seem really bad, lets try cutting them off
#sn = sn.loc[sn['num'] < 140]
#sn = sn.loc[sn['num'] > 40]
# kernel size for x,y,z median filter
median_kernel = 11
#sn, bias = 
sn = filter_and_bias(sn, median_kernel, bias)
sn[['f', 'clean f', 'igrff']].plot(lw=2, title='Cleaned data vs model data f', figsize=(18,6))
sn[['x', 'clean x', 'igrfx']].plot(lw=2, title='Cleaned data vs model data x', figsize=(18,6))
sn[['y', 'clean y', 'igrfy']].plot(lw=2, title='Cleaned data vs model data y', figsize=(18,6))
sn[['z', 'clean z', 'igrfz']].plot(lw=2, title='Cleaned data vs model data z', figsize=(18,6))
# notice that the signal gets noiser at the end of the mission
plot_relationships(sn)

In [ ]:
# apply all of the things we've learned
dn = 'dopey'
# when the electronics tmp gets above about 13, the signal tends to get noiser
# temperature rises during the 4 minute calibration routine pretty steadily
max_tmp = 25
on = drifter_dict[dn]['cal']

on = on.drop_duplicates(take_last=True)
# the last few samples seem to be bad, remove these from the system
on = on.loc[on['num']<253]
on = utils.get_model_df(on)
bias = [-40000., -125000., -26000.]
# remove electronic tmps above max_tmp
sn = on.loc[on['temp'] < max_tmp]

# kernel size for x,y,z median filter
median_kernel = 3
#sn, bias = 
sn = filter_and_bias(sn, median_kernel, bias)
sn[['f', 'clean f', 'igrff']].plot(title='Cleaned data vs model data f', figsize=(18,6))
sn[['x', 'clean x', 'igrfx']].plot(title='Cleaned data vs model data x', figsize=(18,6))
sn[['y', 'clean y', 'igrfy']].plot(title='Cleaned data vs model data y', figsize=(18,6))
sn[['z', 'clean z', 'igrfz']].plot(title='Cleaned data vs model data z', figsize=(18,6))
# notice that the signal gets noiser at the end of the mission
plot_relationships(sn)

In [ ]:
# apply all of the things we've learned
# I think that I've already fixed the HMR sensors in the raw file, but I should double check
# TODO: this looks bad. check out the offset
dn = 'grumpy'
# when the electronics tmp gets above about 13, the signal tends to get noiser
# temperature rises during the 4 minute calibration routine pretty steadily
max_tmp = 25
on = drifter_dict[dn]['cal']

on = on.drop_duplicates(take_last=True)
# the last few samples seem to be bad, remove these from the system
on = on.loc[on['num']<253]
on = utils.get_model_df(on)
bias = [-40000., -125000., -26000.]
# remove electronic tmps above max_tmp
sn = on.loc[on['temp'] < max_tmp]

# kernel size for x,y,z median filter
median_kernel = 11
#sn, bias = 
sn = filter_and_bias(sn, median_kernel, bias)
sn[['f', 'clean f', 'igrff']].plot(title='Cleaned data vs model data f', figsize=(18,6))
sn[['x', 'clean x', 'igrfx']].plot(title='Cleaned data vs model data x', figsize=(18,6))
sn[['y', 'clean y', 'igrfy']].plot(title='Cleaned data vs model data y', figsize=(18,6))
sn[['z', 'clean z', 'igrfz']].plot(title='Cleaned data vs model data z', figsize=(18,6))
# notice that the signal gets noiser at the end of the mission
plot_relationships(sn)

In [142]:
# apply all of the things we've learned
dn = 'bashful'
# when the electronics tmp gets above about 13, the signal tends to get noiser
# temperature rises during the 4 minute calibration routine pretty steadily
max_tmp = 25
on = drifter_dict[dn]['cal']
# the last few samples seem to be bad, remove these from the system
on = on.loc[on['num']<253]
on = on.drop_duplicates(take_last=True)
on = utils.get_model_df(on)
bias = [-40000., -125000., -26000.]
# remove electronic tmps above max_tmp
sn = on.loc[on['temp'] < max_tmp]

# kernel size for x,y,z median filter
median_kernel = 11
#sn, bias = 
sn = filter_and_bias(sn, median_kernel, bias)
sn[['f', 'clean f', 'igrff']].plot(title='Cleaned data vs model data f', figsize=(18,6))
sn[['x', 'clean x', 'igrfx']].plot(title='Cleaned data vs model data x', figsize=(18,6))
sn[['y', 'clean y', 'igrfy']].plot(title='Cleaned data vs model data y', figsize=(18,6))
sn[['z', 'clean z', 'igrfz']].plot(title='Cleaned data vs model data z', figsize=(18,6))
# notice that the signal gets noiser at the end of the mission
plot_relationships(sn)


Warning: Maximum number of function evaluations has been exceeded.
('found bias of', array([ -3.30586054e+20,  -3.02466576e+20,   3.91205677e+20]))
('bat', (34474, 40))
<matplotlib.figure.Figure at 0x1155a7510>
('temp', (34474, 40))
<matplotlib.figure.Figure at 0x119688dd0>
('WVHT', (34474, 40))
<matplotlib.figure.Figure at 0x11cd6eed0>
('WTMP', (34474, 40))
<matplotlib.figure.Figure at 0x11702b790>
('ATMP', (34474, 40))
<matplotlib.figure.Figure at 0x1196b2950>
('WDIR', (34474, 40))
<matplotlib.figure.Figure at 0x11bd0d6d0>
('WSPD', (34474, 40))
<matplotlib.figure.Figure at 0x11bf75850>
('GST', (34474, 40))
<matplotlib.figure.Figure at 0x110099c90>
('DPD', (34474, 40))
<matplotlib.figure.Figure at 0x11bd03cd0>
('APD', (34474, 40))
<matplotlib.figure.Figure at 0x11af3f610>
('MWD', (34474, 40))
<matplotlib.figure.Figure at 0x116363810>
('PRES', (34474, 40))
<matplotlib.figure.Figure at 0x11c8c0250>
('lat', (34474, 40))
<matplotlib.figure.Figure at 0x119541250>
('lon', (34474, 40))
<matplotlib.figure.Figure at 0x11877b390>

In [ ]:


In [ ]:
# things that might contribute to noise
# battery voltage dropping 
# increased electronic temperature

In [ ]:


In [ ]:


In [ ]:


In [ ]: