# Smooth Data

``````

In [45]:

import pandas as pd

file_path = "./Physionet_ECGs/sel_100_tabsep.csv"

``````
``````

Out[45]:

1
2

0

0.000
4.725
4.775

0.004
4.735
4.745

0.008
4.725
4.735

0.012
4.715
4.745

0.016
4.720
4.735

``````

Select the column you want to smooth

``````

In [110]:

import numpy as np
import matplotlib.pyplot as plt
col = 2 #choose 1 or 2
values = data[col].values
times = data[col].index

``````
``````

Out[110]:

Float64Index([    0.0,   0.004,   0.008,   0.012,   0.016,    0.02,   0.024,
0.028,   0.032,   0.036,
...
899.96, 899.964, 899.968, 899.972, 899.976,  899.98, 899.984,
899.988, 899.992, 899.996],
dtype='float64', name=0, length=225000)

``````

## Smooth Data With Fast Fourier Transform

Fast Fourier Transform and plot the data

``````

In [95]:

from numpy import fft
signal = np.array(values)
transformed = fft.fft(signal)
real = transformed.real
plt.plot(range(len(real)),real)
plt.ylim(1000,-1000)
plt.show()

``````
``````

In [117]:

ps = np.abs(transformed)**2
timestep = times[1]-times[0]
freqs = np.fft.fftfreq(signal.size,timestep)
idx = np.argsort(freqs)
print ps
plt.plot(freqs[idx], ps[idx])
#plt.plot(freqs, real)
plt.show()

``````
``````

[  1.23266034e+12   3.77145727e+06   2.14703001e+06 ...,   2.09865291e+06
2.14703001e+06   3.77145727e+06]

``````
``````

In [135]:

#from glowing-python: http://glowingpython.blogspot.com/2011/08/how-to-plot-frequency-spectrum-with.html
from numpy import sin, linspace, pi
from pylab import plot, show, title, xlabel, ylabel, subplot
from scipy import fft, arange

def plotSpectrum(y,Fs=None):
"""
Plots a Single-Sided Amplitude Spectrum of y(t)
calcs the sampling rate from the first two elements of the array.
"""

Fs = Fs or 1/(signal[1]-signal[0]) #sampling rate
n = int(len(y)) # length of the signal
print n
k = arange(n)
T = n/Fs
frq = k/T # two sides frequency range
frq = frq[range(int(n/2))] # one side frequency range

Y = fft(y)/n # fft computing and normalization
Y = Y[range(int(n/2))]

plot(frq,abs(Y),'r') # plotting the spectrum
xlabel('Freq (Hz)')
ylabel('|Y(freq)|')

#subplot(2,1,1)
#plot(times,signal)
#xlabel('Time')
#ylabel('Amplitude')
plotSpectrum(signal[0:1000],Fs)
plt.ylim(0,.03)
show()

``````
``````

1000

``````

Mask the noise in the FFT'ed data and plot the signal with the mask

``````

In [58]:

'''
The default settings work well for column 1
'''
real2 = transformed.real
plt.plot(range(len(real2)),real2)
plt.ylim(*ylims)
plt.gca()
plt.show()

``````

reverse FFT the signal and plot the smoothed data

``````

In [55]:

ifft = fft.ifft(transformed)
plt.plot(times, ifft.real, "r", label = 'transformed')
plt.plot(times, signal, 'b', label="original")
plt.legend()
plt.xlim(0,1000)
plt.show()

``````

Save!

``````

In [ ]:

``````

## Working in R

``````

In [6]:

``````
``````

/Users/morganfine-morris/anaconda/lib/python2.7/site-packages/IPython/extensions/rmagic.py:693: UserWarning: The rmagic extension in IPython is deprecated in favour of rpy2.ipython. If available, that will be loaded instead.
http://rpy.sourceforge.net/
warnings.warn("The rmagic extension in IPython is deprecated in favour of "

``````
``````

In [70]:

%%R -i values -o values_spect
values_spect = spectrum(values)

#print(summary(values_spect))

``````
``````

``````
``````

In [94]:

print values_spect.names
for i in values_spect.items():
print i
print

``````
``````

[1] "freq"      "spec"      "coh"       "phase"     "kernel"    "df"
[7] "bandwidth" "n.used"    "orig.n"    "series"    "snames"    "method"

('freq', <FloatVector - Python:0x1146cdcf8 / R:0x114249000>
[0.000004, 0.000009, 0.000013, ..., 0.499991, 0.499996, 0.500000])

('spec', <FloatVector - Python:0x1146cd320 / R:0x115245000>
[3.928853, 1.672682, 1.579693, ..., 0.000074, 0.000108, 0.000003])

('coh', rpy2.rinterface.NULL)

('phase', rpy2.rinterface.NULL)

('kernel', rpy2.rinterface.NULL)

('df', <FloatVector - Python:0x1146cd758 / R:0x113928d08>
[1.791590])

('bandwidth', <FloatVector - Python:0x1146cdc68 / R:0x113928cd8>
[0.000001])

('n.used', <IntVector - Python:0x1146cdd40 / R:0x113c968b8>
[  225000])

('orig.n', <IntVector - Python:0x108de4878 / R:0x113906e58>
[  225000])

('series', <StrVector - Python:0x108de45f0 / R:0x11390a628>
[str])

('snames', rpy2.rinterface.NULL)

('method', <StrVector - Python:0x108de4fc8 / R:0x113ca1a98>
[str])

('taper', <FloatVector - Python:0x108de4cb0 / R:0x113cd7ed8>
[0.100000])

('pad', <FloatVector - Python:0x108de4950 / R:0x113cd7ff8>
[0.000000])

('detrend', <Vector - Python:0x108de4128 / R:0x113cd8358>
[       1])

('demean', <Vector - Python:0x108de4b48 / R:0x113cd8238>
[       0])

``````

### Savitzky-Golay

``````

In [38]:

values = ifft

``````
``````

In [39]:

%%R -i values -o sg_smoothed
sg5 = c(-3, 12, 17, 12, -3)/35
sg_smoothed = filter(values,sg5)

``````
``````

In [40]:

df_sg = pd.DataFrame(data = np.array(sg_smoothed), index = times, columns=[col]).dropna()
plt.plot(df_sg.index, df_sg.values, lw=2)
plt.plot(times, values, 'r', lw=1)
#plt.plot(times,ifft.real,"g", lw=1)
plt.show()

``````
``````

/Users/morganfine-morris/anaconda/lib/python2.7/site-packages/numpy/core/numeric.py:462: ComplexWarning: Casting complex values to real discards the imaginary part
return array(a, dtype, copy=False, order=order)

``````

## Moving Average

``````

In [41]:

%%R -i values -o smoothed
ma5 = c(1, 1, 1, 1, 1)/5;
smoothed =filter(values,ma5)
print(smoothed[0:100])

``````
``````

[1]       NA       NA 4.755170 4.735266 4.728668 4.725531 4.720334 4.713546
[9] 4.707950 4.704606 4.702447 4.700526 4.699285 4.699606 4.701676 4.705418
[17] 4.711531 4.721254 4.734957 4.751331 4.768145 4.783608 4.797020 4.808519
[25] 4.818550 4.827453 4.835248 4.841739 4.846934 4.851298 4.855391 4.859189
[33] 4.861916 4.862654 4.861105 4.857816 4.853961 4.851015 4.850307 4.852278
[41] 4.855917 4.859127 4.860060 4.858335 4.855105 4.852009 4.850026 4.849107
[49] 4.848616 4.848044 4.847371 4.846828 4.846323 4.845224 4.842856 4.839267
[57] 4.835363 4.832152 4.829865 4.827860 4.825327 4.822015 4.818392 4.815303
[65] 4.813471 4.813099 4.813739 4.814554 4.814819 4.814253 4.812997 4.811458
[73] 4.810245 4.809968 4.810736 4.811838 4.812257 4.811685 4.810891 4.810909
[81] 4.811982 4.813459 4.814537 4.814773 4.813980 4.812149 4.809764 4.807836
[89] 4.807157 4.807560 4.808241 4.808813 4.809689 4.811388 4.813901 4.817053
[97] 4.821143 4.826710 4.833617 4.840720

``````
``````

In [44]:

df = pd.DataFrame(data = np.array(smoothed), index = times, columns=[col])
df_clean = df.dropna()
plt.plot(df_clean.index, df_clean.values, lw=3, label="ifft MA Smoothed")
plt.plot(times, values, 'r', lw=2, label="original")
plt.plot(times,ifft.real,"g", lw=1, label="ifft")
plt.legend()
plt.show()

``````
``````

In [9]:

len(df_clean.values), len(df_clean.index)

``````
``````

Out[9]:

(224996, 224996)

``````
``````

In [10]:

``````
``````

In [ ]:

def calc_slopes(X, Y):
dY = (np.roll(Y, -1, axis=1) - Y)[:-1]
dX = (np.roll(X, -1, axis=0) - X)[:-1]

slopes = dY/dX
return slopes

calc_slopes(df_clean.index, df_clean.values)

``````
``````

In [62]:

import numpy as np
import matplotlib.pyplot as plt

t = np.arange(0.0, 2.0, 0.01)
s0 = np.sin(2*np.pi*t)
s1 = np.sin(4*np.pi*t)
s2 = np.sin(8*np.pi*t)

fig, ax = plt.subplots()
l, = ax.plot(t, s0, lw=2, color='red')

axcolor = 'lightgoldenrodyellow'
rax = plt.axes([0.05, 0.7, 0.15, 0.15], axisbg=axcolor)
def hzfunc(label):
hzdict = {'2 Hz':s0, '4 Hz':s1, '8 Hz':s2}
ydata = hzdict[label]
l.set_ydata(ydata)
plt.draw()

rax = plt.axes([0.05, 0.4, 0.15, 0.15], axisbg=axcolor)
def colorfunc(label):
l.set_color(label)
plt.draw()

rax = plt.axes([0.05, 0.1, 0.15, 0.15], axisbg=axcolor)
def stylefunc(label):
l.set_linestyle(label)
plt.draw()

plt.show()

``````
``````

In [ ]:

``````