的过程可以通过以下步骤实现:
import numpy as np
import scipy.io.wavfile as wav
from scipy.fftpack import dct
from scipy.signal import lfilter
import matplotlib.pyplot as plt
get_mel_spectrum
,该函数用于计算对数Mel谱图:def get_mel_spectrum(signal, sample_rate, frame_size=0.025, frame_stride=0.01, num_filters=26, num_ceps=13, low_freq=0, high_freq=None):
# 预加重处理
pre_emphasis = 0.97
emphasized_signal = np.append(signal[0], signal[1:] - pre_emphasis * signal[:-1])
# 分帧
frame_length = int(round(frame_size * sample_rate))
frame_step = int(round(frame_stride * sample_rate))
signal_length = len(emphasized_signal)
num_frames = int(np.ceil(float(np.abs(signal_length - frame_length)) / frame_step))
pad_signal_length = num_frames * frame_step + frame_length
z = np.zeros((pad_signal_length - signal_length))
pad_signal = np.append(emphasized_signal, z)
indices = np.tile(np.arange(0, frame_length), (num_frames, 1)) + np.tile(np.arange(0, num_frames * frame_step, frame_step), (frame_length, 1)).T
frames = pad_signal[indices.astype(np.int32, copy=False)]
# 加窗
hamming_window = np.hamming(frame_length)
frames *= hamming_window
# 快速傅里叶变换
NFFT = 512
mag_frames = np.absolute(np.fft.rfft(frames, NFFT)) # Magnitude of the FFT
# Mel滤波器
low_freq_mel = (2595 * np.log10(1 + low_freq / 700)) # Convert Hz to Mel
high_freq_mel = (2595 * np.log10(1 + high_freq / 700)) if high_freq else (sample_rate / 2) # Convert Hz to Mel
mel_points = np.linspace(low_freq_mel, high_freq_mel, num_filters + 2) # Equally spaced in Mel scale
hz_points = (700 * (10**(mel_points / 2595) - 1)) # Convert Mel to Hz
bin = np.floor((NFFT + 1) * hz_points / sample_rate).astype(int)
filters = np.zeros((num_filters, int(np.floor(NFFT / 2 + 1))))
for m in range(1, num_filters + 1):
filters[m - 1, bin[m - 1]:bin[m]] = (hz_points[m] - hz_points[m - 1]) / (bin[m] - bin[m - 1]) # Left slope
filters[m - 1, bin[m]:bin[m + 1]] = (hz_points[m + 1] - hz_points[m]) / (bin[m + 1] - bin[m]) # Right slope
# Mel滤波器应用
mel_spectrum = np.dot(mag_frames, filters.T)
mel_spectrum = np.where(mel_spectrum == 0, np.finfo(float).eps, mel_spectrum) # Avoid zero division
mel_spectrum = 20 * np.log10(mel_spectrum) # dB
# Cepstral系数计算
dct_filter = dct(mel_spectrum, type=2, axis=1, norm='ortho')[:, :num_ceps]
return dct_filter
get_mel_spectrum
函数并绘制对数Mel谱图:# 加载音频文件
sample_rate, signal = wav.read("audio.wav")
# 获取对数Mel谱图
mel_spectrum = get_mel_spectrum(signal, sample_rate)
# 绘制对数Mel谱图
plt.imshow(mel_spectrum.T, cmap='hot', origin='lower', aspect='auto')
plt.xlabel('Frame')
plt.ylabel('Mel Filter')
plt.title('Log Mel Spectrogram')
plt.colorbar(format='%+2.0f dB')
plt.tight_layout()
plt.show()
以上代码中,假设音频文件为"audio.wav",需要提前准备好。
对数Mel谱图可以通过将音频信号进行预加重、分帧、加窗、快速傅里叶变换、Mel滤波器应用、对数化等一系列处理得到。这种谱图常用于语音识别、音频信号处理等领域。
腾讯云相关产品和产品介绍链接地址:
领取专属 10元无门槛券
手把手带您无忧上云