This is the Python code used to convert .wav files to spectrograms

Audio data is from this Kaggle dataset, which is from this DCASE 2018 paper.


In [ ]:
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
from scipy import signal
from scipy.io import wavfile

In [ ]:
prefix = 'gs://path/to/audio/files'
spectro_prefix = 'gs://path/to/spectrograms'

for i in range(len(names)):
    fname = prefix + names[i]
    !gsutil cp $fname .

    # Create spectrogram with scipy
    sample_rate, samples = wavfile.read(names[i])
    freq, times, spectro = signal.spectrogram(samples, sample_rate)
    
    # Create specrogram image with matplotlib
    plt.figure(figsize=(28,28), frameon=False)
    fig = plt.gcf()
    plt.axis('off')
    plt.pcolormesh(times, freq, np.log(spectro))

    # Save spectrogram to local file
    name = names[i].split('.')[0]
    fig.savefig(name)
    spectro_path = spectro_prefix + name + '.png'
    local_path = name + '.png'

    # Convert image to 3 channels and shrink
    im = Image.open(local_path)
    im.thumbnail((128,128), Image.ANTIALIAS)
    im.convert(mode='RGB')
    im.save(local_path)

    # Save spectrogram to GCS and remove local files
    !gsutil cp $local_path $spectro_path
    !rm $local_path
    local_audio = names[i]
    !rm $local_audio

Copyright 2020 Google Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License