In [118]:
from scipy.io.wavfile import read
import matplotlib.pyplot as plt
import sys
import math
import numpy as np
from matplotlib import cm
def set_max_recursion(num):
sys.setrecursionlimit(num)
def get_audio_data(filename):
data = read(filename)
rate = data[0]
audio_data = [rate, []]
for i in range(0, len(data[1])):
data_i = data[1][i]
audio_data[1].append(data_i[0].astype(np.int32))
return audio_data
def get_audio_data_const(filename):
data = read(filename)
rate = data[0]
audio_data = [rate, []]
for i in range(0, len(data[1])):
data_i = data[1][i]
audio_data[1].append(((data_i.astype(np.int32))))
return audio_data
def get_max_time(audio_data):
return len(audio_data[1]) / audio_data[0]
def f(audio_data, t):
rate = audio_data[0]
index = int(t * rate)
return audio_data[1][index]
def fsin2pi(f, audio_data, w, t):
return f(audio_data, t) * math.sin(2 * math.pi * w * t)
def fcos2pi(f, audio_data, w, t):
return f(audio_data, t) * math.cos(2 * math.pi * w * t)
def simpson(g, audio_data, a, b, w):
rate = audio_data[0]
h = 1 / rate
if a + 2 * h > b: return 0
y0 = g(f, audio_data, w, a)
y1 = g(f, audio_data, w, a + h)
y2 = g(f, audio_data, w, a + 2 * h)
return h / 3 * (y0 + 4 * y1 + y2) + simpson(g, audio_data, a + 2 * h, b, w)
def get_freq_magnitude(start_time, end_time, audio_data, w):
sin_val = simpson(fsin2pi, audio_data, start_time, end_time, w)
cos_val = simpson(fcos2pi, audio_data, start_time, end_time, w)
return math.sqrt(sin_val**2 + cos_val**2)
def get_table(filename, start_time, end_time, start_w, end_w, step_w):
audio_data = get_audio_data(filename)
w = start_w
while w <= end_w:
print(w, "Hz:", get_freq_magnitude(start_time, end_time, audio_data, w))
w += step_w
def get_freq_data(audio_data, start_time, end_time, step_t, start_w, end_w, step_w):
x = []
group_size_x = 0
y = []
group_size_y = int((end_w - start_w) / step_w) + 1
z = []
w = start_w
t = start_time
while t + step_t <= end_time:
group_size_x += 1
while w <= end_w:
x.append(t+0.5*step_t)
y.append(w)
z.append(get_freq_magnitude(t, t+step_t, audio_data, w))
w += step_w
t += step_t
w = start_w
return (group_size_x, group_size_y, x, y, z)
def draw_file(filename, i=0):
# read audio samples
input_data = read(filename)
audio = input_data[1]
# plot first 100
if i == 0:
plt.plot(audio)
else:
plt.plot(audio[:i])
# label the axes
plt.ylabel("Amplitude")
plt.xlabel("Time")
# set the title
plt.title(filename)
# display the plot
plt.show()
def draw_data(audio_data, i=0):
# plot first i
if i == 0:
plt.plot(audio_data[1], color="green")
else:
plt.plot(audio_data[1][:i], color="blue")
# label the axes
plt.ylabel("Amplitude")
plt.xlabel("Time")
# set the title
plt.title(filename)
# display the plot
plt.show()
def draw_freq(freq_data):
scale = 10000 / freq_data[0]
print(scale)
plt.scatter(freq_data[2], freq_data[3], c=freq_data[4], cmap='gray_r', s=scale, alpha=0.3)
plt.ylabel("Frequency")
plt.xlabel("Time")
plt.show()
def draw_freq2(freq_data):
b = freq_data[0] * (2 / 5)
plt.hist2d(freq_data[2], freq_data[3], cmap=plt.cm.jet, bins=b, weights=freq_data[4])
plt.colorbar()
plt.show()
def get_model_data(freq_data, num_ary, size_ary):
model_data = []
size_x = freq_data[0]
size_y = freq_data[1]
z = freq_data[4]
pointer = 0
for i in range(0, num_ary):
data = []
for j in range(0, size_ary):
sum = 0
for k in range(0, int(size_y/size_ary)):
if pointer + 1 < size_y * (i + 1):
sum += z[pointer]
pointer += 1
data.append(sum)
model_data.append(data)
return model_data
In [4]:
set_max_recursion(1000000)
filename = 'audio/play/play0.wav'
audio_data = get_audio_data(filename)
max_time = get_max_time(audio_data)
In [77]:
draw_data(audio_data)
In [17]:
freq_data = get_freq_data(audio_data, 0, max_time, max_time*0.01, 20, 4000, 100)
In [125]:
draw_freq2(freq_data)
In [ ]:
freq_data[0]
In [7]:
freq_data[1]
Out[7]:
In [54]:
model_data = get_model_data(freq_data, 3, 30)
In [55]:
def get_similarity(model_data, test_data):
similarities = []
for i in range(0, len(model_data)):
similarities.append(get_distance(model_data[i], test_data[i]))
return similarities
def get_distance(li1, li2):
sum = 0
for i in range(0, len(li1)):
sum += (li1[i] - li2[i])**2
return math.sqrt(sum)
In [56]:
filename2 = 'audio/stop/stop0.wav'
audio_data2 = get_audio_data(filename2)
max_time2 = get_max_time(audio_data2)
freq_data2 = get_freq_data(audio_data2, 0, max_time2, max_time2*0.1, 20, 4000, 100)
model_data2 = get_model_data(freq_data2, 3, 30)
In [57]:
filename3 = 'audio/play/play5.wav'
audio_data3 = get_audio_data(filename3)
max_time3 = get_max_time(audio_data3)
freq_data3 = get_freq_data(audio_data3, 0, max_time3, max_time3*0.1, 20, 4000, 100)
model_data3 = get_model_data(freq_data3, 3, 30)
In [123]:
draw_freq(freq_data2)
In [124]:
draw_freq(freq_data3)
In [37]:
get_similarity(model_data, model_data2)
Out[37]:
In [38]:
get_similarity(model_data, model_data3)
Out[38]:
In [33]:
for i in range(1, 4):
filename = 'audio/play/play' + str(i) + '.wav'
audio_data = get_audio_data(filename)
max_time = get_max_time(audio_data)
freq_data = get_freq_data(audio_data, 0, max_time, max_time*0.1, 20, 4000, 100)
model_data2 = get_model_data(freq_data, 3, 30)
for i in range(0, len(model_data)):
for j in range(0, len(model_data[i])):
model_data[i][j] += model_data2[i][j]
for i in range(0, len(model_data)):
for j in range(0, len(model_data[i])):
model_data[i][j] /= 3
In [ ]:
In [ ]:
In [ ]: