In [2]:
import pandas as pd
import numpy as np

In [16]:
from pykalman import KalmanFilter

In [3]:
import matplotlib.pyplot as plt
import seaborn as sns
from os import path
import glob
%matplotlib inline
sns.set('notebook')
current_palette = sns.color_palette()

In [4]:
def load_grimm(folder):
    grimm = pd.read_csv(path.join(folder, 'DATA.TSV'), delimiter='\t',skiprows=7)
    grimm.index = [pd.to_datetime(grimm.Date[i]+' '+grimm.Time[i]) for i in range(len(grimm))]
    return grimm

In [5]:
grimm = load_grimm('data/090915B')
plt.plot(grimm[u'2.0um'])


Out[5]:
[<matplotlib.lines.Line2D at 0x1095998d0>]

In [20]:
def load_specks(folder, time_offset=3600, cov=1075):
    specks = []
    for speck_file in glob.glob(path.join(folder, '*.csv')):
        speck = pd.read_csv(speck_file)
        speck.index = pd.to_datetime(speck['timestamp_utc_secs']+time_offset, unit='s')
        kf = KalmanFilter(initial_state_mean=0, n_dim_obs=1, transition_matrices=[[1]], observation_matrices=[[1]], observation_covariance=[[cov]], transition_covariance=[[1]])
        speck['filtered'] = kf.filter(speck.raw_particles.values)[0]
        speck.filtered = lfilter(b4,a4, speck.filtered)
        specks.append(speck)
    return specks

In [21]:
specks = load_specks('data/090915B/data')

In [22]:
for speck in specks:
    plt.plot(speck.filtered)



In [23]:
def get_scale(speck, grimm):
    d = []
    scales = np.linspace(0,10,100)
    for x in scales:
        d.append(np.nanmean((x*speck.loc[grimm.index, 'filtered'] - grimm[u'2.0um'])**2))
    return min(zip(d, scales))


def get_scale_old(speck, grimm):
    d = []
    scales = np.linspace(20,40,60)
    for x in scales:
        d.append(np.nanmean((x*speck.loc[grimm.index, 'particle_concentration'] - grimm[u'2.0um'])**2))
    return min(zip(d, scales))

In [24]:
scales = [get_scale_old(speck, grimm) for speck in specks]
errors, scales = zip(*scales)

In [25]:
ax = plt.subplot(111)
ax.plot(grimm.index, grimm[u'2.0um'], label='GRM')

for scale, speck in zip(scales, specks):
    ax.plot(speck.index, speck.particle_concentration*scale, alpha=0.5, label='Speck')
    #ax.plot(speck.index, speck.filtered*scale, alpha=0.5, label='Speck')

plt.ylabel('2.0um conc.')
plt.xlabel('Time')
plt.legend()


Out[25]:
<matplotlib.legend.Legend at 0x10a0c7b10>

In [26]:
scales = [get_scale(speck, grimm) for speck in specks]
errors, scales = zip(*scales)
sum(errors)


Out[26]:
664247.51292585407

In [27]:
ax = plt.subplot(111)
ax.plot(grimm.index, grimm[u'2.0um'], label='GRM')

for scale, speck in zip(scales, specks):
    #ax.plot(speck.index, speck.particle_concentration*30, alpha=0.5)
    ax.plot(speck.index, speck.filtered*scale, alpha=0.5, label='Speck')

plt.legend()
plt.ylabel('2.0um conc.')
plt.xlabel('Time')
plt.legend()
plt.savefig('kf.pdf')



In [1928]:
def test_folder(folder, time_offset=3600):
    grimm = load_grimm(folder)
    specks = load_specks(path.join(folder,'data'), time_offset=time_offset)

    scales = [get_scale(speck, grimm) for speck in specks]
    errors, scales = zip(*scales)
    print errors
    ax = plt.subplot(111)
    ax.plot(grimm.index, grimm[u'2.0um'], label='GRM')

    for scale, speck in zip(scales, specks):
        ax.plot(speck.index, speck.filtered*scale, alpha=0.5, label='Speck')

    plt.legend()

In [1929]:
test_folder('data/090915B')


(95661.540958321144, 77299.936072935976, 71828.167748134045, 37750.339302478023, 28095.835500983078, 110407.63731830945, 94842.81139441613, 92435.048046209442, 55926.196584066696)

In [1930]:
test_folder('data/090915C', time_offset = 3600)


(136719.69658367868, 168944.68700109248, 229099.65691632012, 128168.2107633421, 101512.57405333401, 149264.77015539599, 97660.129167416351, 351488.90977179806, 169317.64522261068)

In [1931]:
test_folder('data/092115A')


(160482.57682402426, 277343.15075399698, 177034.66745854568, 232435.90682084052, 270185.52370622545, 477878.85821799742, 343843.996669552, 217566.24387537321, 138491.84105838084)

In [1940]:
test_folder('data/091115C', time_offset=3600)


(56740.738264933621, 48302.527450758018, 1197294.5397712125, 76245.083527931449, 58241.682033852441, 143800.02065786102, 54044.217633835178, 77033.773171741705, 65415.426773158164)

In [1933]:
test_folder('data/091415B')


(48397.892866700699, 30409.545062126748, 40452.843099615457, 63952.682691118964, 81503.505101751056, 72087.521369789654, 48299.891431952215, 55839.935922069089, 44957.100519807165)

In [1934]:
test_folder('data/091615A', time_offset=3600)


(256715.16178863784, 590165.306245349, 719400.31616055127, 247928.44016005006, 168266.16391329651, 245018.10464051989, 371121.13970904425, 398357.95347896474, 486174.59110047726)

In [1935]:
test_folder('data/091815A/')


(61841.114386494206, 83355.450363460986, 235106.5512176484, 49268.948067630961, 51557.975453852006, 50195.435487199087, 48235.005097069421, 55217.374246967316, 21850.470327280804)

In [1936]:
test_folder('data/091815B/')


(35923.778522082801, 38443.583467873003, 59045.8174973524, 29407.768021995718, 21587.189169484842, 40249.622281058131, 29812.315880970484, 35288.450287892498, 33034.747432185111)

In [1937]:
test_folder('data/092115A')


(160482.57682402426, 277343.15075399698, 177034.66745854568, 232435.90682084052, 270185.52370622545, 477878.85821799742, 343843.996669552, 217566.24387537321, 138491.84105838084)

In [1938]:
test_folder('data/092115B/')


(91785.503640444382, 119552.28063442423, 103759.39456874724, 74987.933933044798, 85675.058805638342, 109498.80736090247, 81933.399677611596, 134171.71626366914, 82391.777724183339)