Open Quick and Dirty Storage(no run)
Prepare Interactive Demo(run init)
In [2]:
#For author
%qtconsole
In [81]:
#Reader starts from here
#pylab means lots of functions can be use without load module.
%pylab inline
import pandas as pd
import sys, os
sys.version
from IPython.display import Image
In [4]:
whos
In [149]:
# plot wave pack and fft wave pack
# change w and rerun
timeLength = 128 #data length
amplitude = 1 #amplitude
peakWidth = 72.0 #Width, W > 0, and .0 is required
timeIndex = arange(-timeLength/2,timeLength/2)
data = amplitude*exp(-timeIndex**2/peakWidth)
An example figure:
In [54]:
fig, axes = plt.subplots()
axes.plot(time, data, label="Gaussian Wave Pack Example.")
axes.set_xlabel('x')
axes.set_ylabel('y')
axes.set_ylim([0,1.2])
axes.set_title('title')
axes.legend(loc=2)
fig.show()
#fig.savefig("figures/GausianWavePack.png")
In [46]:
# plot wave pack and fft wave pack
# change w and rerun
timeLength = 128 #data length
amplitude = 1 #amplitude
peakWidth = 72.0 #Width, W > 0, and .0 is required
timeIndex = arange(-timeLength/2,timeLength/2)
data = amplitude*exp(-timeIndex**2/peakWidth)
In [63]:
fig, axes = plt.subplots(2, 1, figsize=(9, 6))
axes[0].plot(timeIndex, data)
axes[0].set_title("Time Domain")
axes[1].plot(timeIndex, fftshift(abs(fft.fft(data))),'r')
axes[1].set_title("Frequency Domain(FFT)")
fig.tight_layout()
fig.show()
In [64]:
# For non-interactive version view:
# Amplitude is normailzed
N = 128
A = 1.0
t = arange(-N/2, N/2)
W1 = 3.0
W2 = 90.0
W3 = 1000.0
data1 = A*exp(-t**2/W1)
data2 = A*exp(-t**2/W2)
data3 = A*exp(-t**2/W3)
fft_data1 = fftshift(abs(fft.fft(data1)))
fft_data2 = fftshift(abs(fft.fft(data2)))
fft_data3 = fftshift(abs(fft.fft(data3)))
In [65]:
fig, axes = plt.subplots(2, 1, figsize=(6, 6))
axes[0].plot(t,data1,t,data2, t,data3)
axes[0].legend(["W1", "W2", "W3"])
axes[0].set_title("Time Domain")
axes[1].plot(t, fft_data1/max(fft_data1), t, fft_data2/max(fft_data2), t, fft_data3/max(fft_data3))
axes[1].legend(["W1", "W2", "W3"])
axes[1].set_title("Frequency Domain(FFT)")
fig.tight_layout()
fig.show()
In [ ]:
##out dated
figure(1)
plot(t,data1,t,data2, t,data3)
legend(["W1", "W2", "W3"])
title("Time Domain")
figure(2)
plot(t, fft_data1/max(fft_data1), t, fft_data2/max(fft_data2), t, fft_data3/max(fft_data3))
legend(["W1", "W2", "W3"])
title("Frequency Domain")
Although the position shift, the curve formation in frequency domain did not change, unless......
In [67]:
# plot wave pack and fft wave pack
# change tau and rerun
timeLength = 128 #data length
amplitude = 1 #amplitude
peakWidth = 72.0 #Width, W > 0, and .0 is required
timeIndex = arange(-timeLength/2,timeLength/2)
tau = -40
data = amplitude*exp(-(timeIndex-tau)**2/peakWidth)
In [79]:
fig, axes = plt.subplots(2, 1, figsize=(9, 6))
axes[0].plot(timeIndex, data)
axes[0].set_title("Time Domain")
axes[1].plot(timeIndex, fftshift(abs(fft.fft(data))),'r')
axes[1].set_title("Frequency Domain(FFT)")
fig.tight_layout()
fig.show()
In [76]:
# For non-interactive version view:
# Amplitude is normailzed
N = 128
A = 1.0
t = arange(-N/2, N/2)
W = 36.0
tau1 = 40
tau2 = -40
tau3 = 0
data1 = A*exp(-(t-tau1)**2/W)
data2 = A*exp(-(t-tau2)**2/W)
data3 = A*exp(-(t-tau3)**2/W)
fft_data1 = fftshift(abs(fft.fft(data1)))
fft_data2 = fftshift(abs(fft.fft(data2)))
fft_data3 = fftshift(abs(fft.fft(data3)))
In [78]:
fig, axes = plt.subplots(2, 1, figsize=(6, 6))
axes[0].plot(t,data1,t,data2, t,data3)
axes[0].legend([r'$\tau1$', r"$\tau2$", r"$\tau3$"])
axes[0].set_title("Time Domain")
axes[1].plot(t, fft_data1/max(fft_data1), t, fft_data2/max(fft_data2), t, fft_data3/max(fft_data3))
axes[1].legend([r'$\tau1$', r"$\tau2$", r"$\tau3$"])
axes[1].set_title("Frequency Domain(FFT)")
fig.tight_layout()
fig.show()
In [ ]:
#outdated
figure(1)
subplot(2,1,1)
plot(t,data1,t,data2, t,data3)
legend(["W1", "W2", "W3"])
title("Time Domain")
subplot(2,1,2)
plot(t, fft_data1/max(fft_data1), t, fft_data2/max(fft_data2), t, fft_data3/max(fft_data3))
legend(["W1", "W2", "W3"])
title("Frequency Domain")
In [85]:
Image("figures/phase.jpg", width = 600, height = 500)
Out[85]:
Normalized distributed Gaussian:
Poisson Distributed:
In [92]:
#np.random.normal(mu, sigma, size)
noise = random.normal(0,1,1000)
#plot a white noise, with mean around zero, std around 1
fig, axes = subplots(2,1,figsize=(6, 6))
axes[0].plot(arange(-500,500),noise)
axes[0].set_title("Gaussian noise in time domain")
title("time domain")
axes[1].plot(arange(-500,500),fftshift(abs(fft.fft(noise))))
axes[1].set_title("frequency domain")
fig.tight_layout()
fig.show()
#print summary
print "Mean of this noise is: ", noise.mean(), " and std is: ", noise.std()
Definition:
In [93]:
# Normalized cumulative sum plot of a normalized white noise
# nearly a straight means spectrom power (information) is everagely distributed.
noise = random.normal(0,1,128)
fft_noise = abs(fft.fft(noise))
fft_noise = fft_noise[arange(0,len(fft_noise)/2)]
#plot noise in time and frequency domain
fig, axes = subplots(2,1,figsize=(6, 6))
axes[0].plot(arange(0,len(fft_noise)), fftshift(fft_noise))
axes[0].set_title("Frequency of noise")
axes[1].plot(fft_noise.cumsum()/max(fft_noise.cumsum()))
axes[1].set_title("normalized cumlative plot")
fig.tight_layout()
fig.show()
#print basic information of this noise, in time domain of course.
print "The mean value for this noise is: ", noise.mean(), " and the std is: ", noise.std()
In [97]:
# Normalized cumulative sum plot of a white noise with a offset mean_value
# nearly a straight means spectrom power (information) is everagely distributed.
mean_value = 3 #mu
std_value = 1 #sigma
size = 128 #size
noise = random.normal(mean_value,std_value,size)
fft_noise = abs(fft.fft(noise))
fft_noise = fft_noise[arange(0,len(fft_noise)/2)]
#plot noise in time and frequency domain
fig, axes = subplots(2,1,figsize=(6, 6))
axes[0].plot(arange(0,len(fft_noise)), fftshift(fft_noise))
axes[0].set_title("Frequency of noise")
axes[1].plot(fft_noise.cumsum()/max(fft_noise.cumsum()))
axes[1].set_ylim([0,1])
axes[1].set_title("normalized cumlative plot")
fig.tight_layout()
fig.show()
#print basic information of this noise, in time domain of course.
print "The mean value for this noise is: ", noise.mean(), " and the std is: ", noise.std()
In [100]:
# plot wave pack and fft wave pack
# and cumulative plot
N = 128 #data length
A = 1 #amplitude
W = 72.0 #Width, W > 0, and .0 is required
t = arange(-N/2,N/2)
data = A*exp(-t**2/W)
fft_data = abs(fft.fft(data))
fft_data = fft_data[[arange(0,len(fft_data)/2)]]
In [101]:
fig, axes = subplots(3,1,figsize=(3, 6))
axes[0].plot(t, data)
axes[0].set_title("Time Domain")
axes[1].plot(t, fftshift(abs(fft.fft(data))),'r')
axes[1].set_title("Fourier Domain")
axes[2].plot(fft_data.cumsum()/max(fft_data.cumsum()))
axes[2].set_title("cum sum")
fig.tight_layout()
fig.show()
In [106]:
#A Gaussian plus noise
N = 160 #data length
A = 1 #amplitude
W = 72.0 #Width, W > 0, and .0 is required
noise_rate = 1 #noise rate, the magic part
#noise
noise_data = noise_rate * randn(N)
#time range
t = arange(-N/2,N/2)
#generate clean data
clean_data = A*exp(-t**2/W)
#plus noise to generate dirty data
dirty_data = clean_data.__add__(noise_data)
In [117]:
fig, axes = subplots(2,2,figsize=(8, 8))
axes[0,0].plot(clean_data)
axes[0,0].set_title("clean data")
axes[0,1].plot(t, dirty_data,'r')
axes[0,1].set_title("dirty data")
axes[1,0].plot(fftshift(abs(fft.fft(clean_data))))
axes[1,0].set_title("fft of clean data")
axes[1,1].plot(abs(fft.fft(dirty_data)),'r')
axes[1,1].set_title("fft of dirty data")
fig.tight_layout()
fig.show()
pause(0.1)
In [ ]:
#plot the clean and dirty data
#out
figure(1)
subplot(2,2,1)
plot(clean_data)
legend(["clean data"])
subplot(2,2,2)
plot(dirty_data)
legend(["dirty data"])
subplot(2,2,3)
plot(fftshift(abs(fft.fft(clean_data))))
legend(["fft of clean data"])
subplot(2,2,4)
plot(fftshift(abs(fft.fft(dirty_data))))
legend(["fft of dirty data"])
In [119]:
#A Gaussian plus noise
N = 160 #data length
A = 1 #amplitude
W = 72.0 #Width, W > 0, and .0 is required
noise_rate = 1 #noise rate, the magic part
#Let's sample it for several times:
sample_time = 30
accumulated_dirty_data = zeros(N)
for i in xrange(sample_time):
noise_data = noise_rate * randn(N)
t = arange(-N/2,N/2)
#clean data
clean_data = A*exp(-t**2/W)
#add noise to generate dirty data
dirty_data = clean_data.__add__(noise_data)
#accumulate data in time domain
accumulated_dirty_data = accumulated_dirty_data + dirty_data
everage_dirty_data = accumulated_dirty_data/sample_time
In [121]:
fig, axes = subplots(2,2,figsize=(8, 8))
axes[0,0].plot(t, clean_data)
axes[0,0].set_title("clean data")
axes[0,1].plot(t, everage_dirty_data,'r')
axes[0,1].set_title(str(sample_time) + " everaged dirty")
axes[1,0].plot(t, clean_data)
axes[1,0].set_title("clean data")
axes[1,1].plot(t, dirty_data,'r')
axes[1,1].set_title("single dirty")
fig.tight_layout()
fig.show()
In [ ]:
#plot the clean and dirty data
figure(1)
subplot(2,2,1)
plot(clean_data)
legend(["clean data"])
subplot(2,2,2)
plot(everage_dirty_data)
legend([str(sample_time) + " everaged dirty"])
subplot(2,2,3)
plot(clean_data)
legend(["clean data"])
subplot(2,2,4)
plot(dirty_data)
legend(["dirty data"])
In [122]:
#A Gaussian plus noise
N = 160 #data length
A = 1 #amplitude
W = 72.0 #Width, W > 0, and .0 is required
noise_rate = 1 #noise rate, the magic part
#Let's sample it for several times, this time do a time shift:
sample_time = 30
accumulated_dirty_data = zeros(N)
i = 0
In [131]:
##manual loop
##for i in arange(sample_time):
#time shift:
tau = 50*sin(i)
noise_data = noise_rate * randn(N)
t = arange(-N/2,N/2)
#clean data
clean_data = A*exp(-(t-tau)**2/W)
#add noise to generated dirty data
dirty_data = clean_data.__add__(noise_data)
#and accumulate them
accumulated_dirty_data = accumulated_dirty_data + dirty_data
figure(1)
#plot clean data
subplot(3,1,1)
plot(clean_data)
legend(["clean data"])
subplot(3,1,2)
plot(dirty_data)
legend(["dirty data"])
subplot(3,1,3)
plot(accumulated_dirty_data)
legend(["Dirty accumulate data"])
#pause(0.3)
i+= 1
everage_dirty_data = accumulated_dirty_data/sample_time
In [132]:
#A Gaussian plus noise
N = 160 #data length
A = 1 #amplitude
W = 72.0 #Width, W > 0, and .0 is required
noise_rate = 1 #noise rate, the magic part
#Let's sample it for several times, this time do a time shift:
sample_time = 30
accumulated_dirty_data = zeros(N)
accumulated_dirty_data_fft = zeros(N)
In [135]:
for i in arange(sample_time):
#time shift:
tau = 50*sin(i)
#noise
noise_data = noise_rate * randn(N)
t = arange(-N/2,N/2)
#clean data
clean_data = A*exp(-(t-tau)**2/W)
#add noise to generate dirty data
dirty_data = clean_data.__add__(noise_data)
#accumulate dirty data
accumulated_dirty_data = accumulated_dirty_data + dirty_data
#fft dirty data and plus to accumulate_dirty_data_fft
accumulated_dirty_data_fft = accumulated_dirty_data_fft + fft.fft(dirty_data)
#after the loop, do the everage
average_dirty_data = accumulated_dirty_data/sample_time
average_dirty_data_fft = accumulated_dirty_data_fft/sample_time
figure(1)
#plot the averaged dirty data
subplot(2,1,1)
plot(average_dirty_data)
legend(["averaged dirty data"])
#plot the averaged fft
subplot(2,1,2)
plot(fftshift(abs(average_dirty_data_fft)))
legend(["averaged dirty data fft"])
show()
In [134]:
#inverse average fft data and plot it
plot(abs(ifft(average_dirty_data_fft)))
Out[134]:
In [143]:
#Let us plot the averaged fft data again:
plot(t, fftshift(abs(average_dirty_data_fft)),'bx-')
title("accumalated in frequency domain")
show()
In [144]:
#N = 1000 #len(data)
#points = 3 #set points around 0
#low_filter = concatenate((ones(points+1),zeros(N-2*points-1),ones(points)), axis=0)
N = len(average_dirty_data_fft)
points = 3
low_filter = concatenate((ones(points+1),zeros(N-2*points-1),ones(points)), axis=0)
In [145]:
plot(t, fftshift(abs(average_dirty_data_fft)),'bx-')
plot(t, fftshift(low_filter)*max(abs(average_dirty_data_fft)),'r')
show()
In [148]:
#A Gaussian plus noise
N = 160 #data length
A = 1 #amplitude
W = 72.0 #Width, W > 0, and .0 is required
noise_rate = 1 #noise rate, the magic part
#Let's sample it for several times, this time do a time shift:
sample_time = 30
accumulated_dirty_data = zeros(N)
for i in arange(sample_time):
#time shift:
tau = 50*sin(i)
noise_data = noise_rate * randn(N)
t = arange(-N/2,N/2)
#clean data
clean_data = A*exp(-(t-tau)**2/W)
#add noise to generated dirty data
dirty_data = clean_data.__add__(noise_data)
#and accumulate them
accumulated_dirty_data = accumulated_dirty_data + dirty_data
#filter:
points = 3 #set points around 0
low_filter = concatenate((ones(points+1),zeros(N-2*points-1),ones(points)), axis=0)
dirty_data_fft = fft.fft(dirty_data)
filtered_fft = dirty_data_fft.__mul__(low_filter)
filtered_data = ifft(filtered_fft)
figure(1)
#plot clean data
subplot(4,1,1)
plot(clean_data)
legend(["clean data"])
subplot(4,1,2)
plot(dirty_data)
legend(["dirty data"])
subplot(4,1,3)
plot(abs(filtered_data))
legend(["filtered data"])
subplot(4,1,4)
plot(accumulated_dirty_data)
legend(["Dirty accumulated data"])
#pause(0.3)
#everage_dirty_data = accumulated_dirty_data/sample_time
show()
For real measured raw data directly from instrument, $I(t) \ge 0$, and $\overline{I(t)} \ge 1 $
Then we can (somehow) prove:
Need this for the boundary condition
Although we do not interest in $g(\tau) < 0$, but $g(\tau) < 1$ should be a crucial boundary.
To be confirmed by expert.
We will use the conclusion for the following example.
In [151]:
#Calculate Auto-correlation function based on fft
#only support 1D
#for cross correlation, in optimisation
#input data suggest to be numpy.ndarray(1d) or pandas.core.series.Series
def acf_fft(data):
""" Series or 1darray ---> Series
Calculate the auto correlation function of input data
"""
data_mean = data.mean()
#Step 1: calculate fft for data
fft_data = fft.fft(data)
#Step 2: Calculate the pow spectrom,
S_f = fft_data.__mul__(fft_data.conj())
#Step 3: calculate auto correlation function, result as g(\tau)
acf = ifft(S_f)/fft_data[0]/data_mean - 1;
#Clean up, return half of data
acf = acf.real[0:int(len(acf)/2)]
return acf
In [152]:
#Test case:
log_tag = 10
N = 2 ** log_tag #data length, 2^log_tag
A = 1 #Amplitude
t = arange(-N/2,N/2)
#Change here to see what will change
left_shift = 200.0
right_shift = 200.0
left_var = 1000.0
right_var = 1000.0
#Generate 2 gaussian pack
left_pack = A * exp(-(t+left_shift)**2/left_var)
right_pack = A * exp(-(t-right_shift)**2/right_var)
off_set = A * ones(N) * 1
#Make up data:
test_data = left_pack + right_pack+off_set
In [154]:
#plot data:
figure(1)
#plot test_data
subplot(3,1,1)
plot(t,test_data)
#plot abs fft
subplot(3,1,2)
plot(t,fftshift(abs(fft.fft(test_data-mean(test_data)))),'o')
#plot acf, extra peak should at position equals to peak distance
subplot(3,1,3)
plot(acf_fft(test_data),'o')
show()
In [2]:
#initialize
%pylab inline
import pandas as pd
import os
In [35]:
#OUT DAte
work_directory = '/home/chen/Documents/FFT_data_view/From_alex/'
os.chdir(work_directory)
!ls -l
In [36]:
#Load
#use pandas to read in ACS file
file_to_play = "10sec_DNA5nM_fast.ASC"
#set the column names
data_set = ["time_lag", "Correlation"]
#set number of header lines to ignore
num_header = 25
#set number of rows to read:
num_rows = 175
#read in file to df
df = pd.read_csv(file_to_play, header=num_header, sep="\t", names=data_set, skipinitialspace=True, nrows=num_rows)
In [37]:
#print header lines and data type
print df.head(),"\n"
print df.dtypes, "\n"
print df.describe()
In [16]:
#plot it
semilogx(df.time_lag, df.Correlation)
#grid(True, which="both", ls="-")
title("Auto-correlation function from data " + file_to_play)
xlabel("Time lag [s]")
ylabel("$g(\tau)$")
Out[16]:
In [18]:
#Set boundary condition to g(0) = 1
time_new = concatenate(([0],df.time_lag), axis=0)
data_new = concatenate(([1], df.Correlation), axis=0)
In [19]:
from scipy.interpolate import interp1d
#Generate interpolation function use time_new and data_new
polate_function = interp1d(time_new, data_new, kind="cubic")
#plot it against the raw data.
#time range:
time_target = linspace(0.0002,100,500000)
In [20]:
#plot it
semilogx(df.time_lag, df.Correlation,"o", time_target, polate_function(time_target),'-')
grid(True)
legend(["Initial ACF","Interpolated ACF"])
Out[20]:
In [10]:
#First, we calculate the interpolated ACF from [0,100], with everage time lag = 0.0002
time_lag_size = 0.0002
time_upto = 100
time_lag_vector = linspace(0,time_upto,time_upto/time_lag_size)
Interpolated_ACF = polate_function(time_lag_vector)
#Reconstruct data to do 3rd step. basicly make the new data symmetricly
ACF_to_3rd = concatenate((Interpolated_ACF, Interpolated_ACF[len(Interpolated_ACF)-2:0:-1]),axis=0)
#Calculate S(f)
S_f = fft.fft(ACF_to_3rd)
In [11]:
#Plot S(f)
plot(abs(fftshift(S_f)),'x')
Out[11]:
In [13]:
#cum sum plot
sf_target = S_f[0:int(len(S_f)/2)]
plot(abs(sf_target.cumsum())/abs(max(sf_target.cumsum())),'r-')
Out[13]:
In [108]:
#construct filter:
#total points
N = len(S_f)
#First filter with points keeps 50% information
points1 = 18000
filter1 = concatenate((ones(points1 + 1),zeros(N - 2*points1 - 1),ones(points1)), axis=0)
#Second filter with points keeps 20% information
points2 = 200
filter2 = concatenate((ones(points2 + 1),zeros(N - 2*points2 - 1),ones(points2)), axis=0)
#Now do the filter,
S_filtered1 = S_f.__mul__(filter1)
S_filtered2 = S_f.__mul__(filter2)
#re-calculate ACF after filtered
acf_filtered1 = abs(ifft(S_filtered1))
acf_filtered2 = abs(ifft(S_filtered2))
#re-scaled:
acf_filtered_toplot1 = acf_filtered1[0:len(acf_filtered1)/2 + 1]
acf_filtered_toplot2 = acf_filtered2[0:len(acf_filtered2)/2 + 1]
In [114]:
#Then plot with original data
semilogx(df.time_lag, df.Correlation,'o', time_target,acf_filtered_toplot1,'-', time_target,acf_filtered_toplot2,'--')
legend(["origina data", "50% information", "20% information"])
grid(True)
We can use FFT to disign low pass filter and clean up data even only with calculated ACF curve.
It will be better clear to view how this method work with raw data.
In [26]:
#Initialize
%pylab inline
#work_directory = "/home/chen/Documents/FFT_data_view/FFT_FRET/080513/"
import os
import pandas as pd
os.chdir(work_directory)
!ls -l *.txt
In [27]:
#use pandas to read in ACS file
file_to_play = "nucH3acs500.txt"
#set the column names
data_set = ["data_id","timetag", "time_s", "channel", "route"]
#Column specifications, special for spFret file
col_specs = [(0,7),(11,16),(18,30),(33,37),(40,42)]
#set number of header lines to ignore
num_header = 39
#read in file to df
df = pd.read_fwf(file_to_play, header=num_header, colspecs=col_specs, names=data_set,skipinitialspace=False)
#print header lines and data type
print df.head() , "\n"
print df.dtypes , "\n"
print df.describe()
In [ ]:
def binning(data_frame, time_lag):
'''
input:
raw spFRET data frame, loaded from above section
return:
time_vector: everaged distributed vector as a unit of time_lag, start from time_lag to final experiemnt time
donor : donor channel photo counts correspond to time_vector
acceptor : acceptor channel photo counts correspond to time_vector
total_d_a : total count correspond to time_vector
'''
In [283]:
# Binning raw data to certain time scale
# NOTE: this function is only use to show how the methods work, Since this is not the efficient style.
# Rewrite to some efficient version if it is necessary
def binning(data_frame, time_lag):
'''
input:
raw spFRET data frame, loaded from above section
return:
time_vector: everaged distributed vector as a unit of time_lag, start from time_lag to final experiemnt time
donor : donor channel photo counts correspond to time_vector
acceptor : acceptor channel photo counts correspond to time_vector
total_d_a : total count correspond to time_vector
'''
#count of total photons
total_data_points = len(data_frame)
#get the final experiment time, unit: s
time_final = ceil(data_frame.time_s[total_data_points-1])
#count how many points we need to store the data
return_data_points = int(ceil(time_final/time_lag))
#initial return data:
time_vector = linspace(time_lag, time_final, ceil(time_final/time_lag))
donor = zeros(return_data_points)
acceptor = zeros(return_data_points)
total_d_a = zeros(return_data_points)
#loop the route colunm in data_frame and categories donor and acceptor
#first, create a vector map time distance to index
time_to_index = data_frame.time_s / time_lag
for i in arange(total_data_points):
#get index to put data
DA_index = floor(time_to_index[i])
#seprate data to donor or acceptor channel
if data_frame.route[i] == 0.0: #donor channel
donor[DA_index] = donor[DA_index] + 1
else: #acceptor channel
acceptor[DA_index] = acceptor[DA_index] + 1
total_d_a[DA_index] = total_d_a[DA_index] + 1
return time_vector, donor, acceptor, total_d_a
In [275]:
#Test case:
#set binning time lag:
time_lag = 0.4/1000 #unit: s
#use the binning function to seperate donor and acceptor.
#brute force, slow, but acceptable
time_vector, donor, acceptor, total_d_a = binning(df, time_lag)
In [282]:
# Check
# Check if it consistant with each other
print "Length of time vector is: ", len(time_vector)
print "Length of donor vector is: ", len(donor)
print "Total photo count should be:", len(df)
print "Sum of donor and acceptor channel photos are:", donor.sum() + acceptor.sum()
Brief methods:
Why?
First, as we do not know which window size is suitable (actually it really depends), we design a function to work for the general case:
In [ ]:
#Do sliding window accumulate fft for 1d data, window width = 2 ^ fft_factor
#Again, this is not efficient
def window_fft(data, fft_factor):
'''
Do sliding window accumulate fft for 1d data
input:
data: 1d data vector that long enough
fft_factor: determine window width. window width = 2 ^ fft_fector
return:
accumlate_fft: accumalted window spectrom, use to do cumulative plot
'''
In [357]:
#Do sliding window accumulate fft for 1d data, window width = 2 ^ fft_factor
#Again, this is not efficient
def window_fft(data, fft_factor):
'''
Do sliding window accumulate fft for 1d data
input:
data: 1d data vector that long enough
fft_factor: determine window width. window width = 2 ^ fft_fector
return:
accumlate_fft: accumalted window spectrom, use to do cumulative plot
'''
#set window width
window_width = 2 ** fft_factor
#get data length
data_length = len(data)
#take out the offset (mean value) from data, because that will affect the cumulative plot
target = data - data.mean()
#real used calculate data length, that should be integer power of window_width
data_use_length = data_length - data_length % window_width
#How many windows will we have?
window_num = data_use_length / window_width
#initial output
accumulate_fft = zeros(window_width)
#do the main loop for each window
for i in arange(window_num):
accumulate_fft = accumulate_fft.__add__((fft.fft(target[i*window_width: (i+1)*window_width])))
#return data
return accumulate_fft
In [375]:
#test case:
#set fft_factor
fft_factor = 5
accumulate_fft = window_fft(total_d_a, fft_factor)
figure(1)
plot(abs(fftshift(accumulate_fft)))
Out[375]:
In [376]:
#cumsum plot:
data_length = len(accumulate_fft)
accumulate_power = accumulate_fft.__mul__(accumulate_fft.conjugate())
accum_toplot = accumulate_power[0: data_length/2]
plot(accum_toplot.cumsum()/max(accum_toplot.cumsum()),'x')
Out[376]:
Work done by the following function:
In [ ]:
#Main burst select code:
def fft_filter_burst(ref, total_d_a, acceptor, fft_scale, filter_ratio, threshold, max_half_width):
'''
Calculate the proximity ratio, by using fft based burst selection
input:
ref: which channel used for determining burst? total? donor? or acceptor?
total_d_a: total photo count, everaged time lag vector
acceptor: acceptor channel photo count, everaged time lag vector
fft_scale: 2 ^ fft_scale is the local window size
filter_ratio: ratio of points in frequency domain to use for low pass filter (in [0, 1])
threshold: threshold in time domain (play with it now)
max_half_width: how many (half) points to take into account as a burst in time domain
output:
proximity: proximity fret ratio
'''
In [394]:
#Main function
def fft_filter_burst(ref, total_d_a, acceptor, fft_scale, filter_ratio, threshold, max_half_width):
'''
Calculate the proximity ratio, by using fft based burst selection
input:
ref: which channel used for determining burst? total? donor? or acceptor?
total_d_a: total photo count, everaged time lag vector
acceptor: acceptor channel photo count, everaged time lag vector
fft_scale: 2 ^ fft_scale is the local window size
filter_ratio: ratio of points in frequency domain to use for low pass filter (in [0, 1])
threshold: threshold in time domain (play with it now)
max_half_width: how many (half) points to take into account as a burst in time domain
output:
proximity: proximity fret ratio
'''
#get the data length:
data_length = len(total_d_a)
#set window width
window_width = 2 ** fft_scale
#How many point is going to use?
data_use_length = data_length - data_length % window_width
#How many windows in total?
window_num = data_use_length / window_width
#set parameters for low pass filter
fft_filter_point = floor(window_width * filter_ratio / 2)
#Initial filter
fft_filter = concatenate((ones(fft_filter_point + 1),zeros(window_width- 2*fft_filter_point - 1),ones(fft_filter_point)), axis=0)
#Initial output
proximity = []
#The main loop
for i in arange(window_num):
fft_local = fft.fft(ref[i*window_width:(i+1)*window_width])
fft_filtered = fft_local.__mul__(fft_filter)
#ifft to get filtered data
data_filtered_local = ifft(fft_filtered)
local_max = data_filtered_local.max()
local_index = data_filtered_local.argmax()
#Check if it is a vilid burst
#should be larger than threshold and not on the boundary.
#Boundary case can be optimized
if local_max > threshold and local_index - fft_scale/2 > 0 and local_index + fft_scale/2 < window_width:
burst_begin = local_index - max_half_width + i * window_width
burst_end = local_index + max_half_width + i * window_width
#P = A/(D+A)
proximity.append(sum(acceptor[burst_begin:burst_end])/sum(total_d_a[burst_begin:burst_end]))
return proximity
Now let us try to use it to calculate proximity FRET, to see if we can repeat the job of FretChen
In [407]:
#Test case:
#Initial case, can be changed
filter_ratio = 10/16.0
fft_scale = 6
threshold = 12
max_half_width = 3
#calculate proximity ratio for the certain case
P1 = fft_filter_burst(total_d_a, total_d_a, acceptor, fft_scale, filter_ratio,threshold, max_half_width)
figure(1)
hist(P1,40)
title("Proximity FRET Ratio calculate by using FFT")
Out[407]:
In [408]:
#threshold using acceptor channel
P2 = fft_filter_burst(acceptor, total_d_a, acceptor, fft_scale, filter_ratio,threshold, max_half_width)
figure(1)
hist(P2,50)
title("Proximity FRET ratio use acceptor as reference")
Out[408]:
In [4]:
#How to set filter?
#A symmetric spectrom with N points.
#0 point is the center
##filter:
## number of points:N
## around: points
#N = 1000 #len(data)
#points = 3 #set points around 0
#low_filter = concatenate((ones(points+1),zeros(N-2*points-1),ones(points)), axis=0)
In [257]:
#test if g(\tau) can < 0
figure(1)
data = A* ones(N) + A* sin(t)
plot(data)
plot(acf_fft(data))
Out[257]:
In [258]:
#plot new data
plot(time_new, data_new,'x')
grid(True, which="both", ls="-")
title("Auto-correlation function of new data")
xlabel("Time lag [s]")
ylabel("$g(\tau)$")
Out[258]:
In [276]:
#Interpolation:
test_t = linspace(0.0002, 10, 100000)
from scipy.interpolate import interp1d
f = interp1d(df.time_lag, df.Correlation)
f2 = interp1d(df.time_lag, df.Correlation, kind="cubic")
df.time_lag.max
semilogx(df.time_lag, df.Correlation,'o', test_t, f(test_t),'x-', test_t, f2(test_t),'-')
Out[276]:
In [1]:
#Youtube
from IPython.display import YouTubeVideo
#YouTubeVideo('7tHc9xWhFH4') #Haydn - Emperor Quartet Mvt.2
In [30]:
whos
In [1]:
pwd
Out[1]:
In [ ]: