
测试题:参考博文
建立一个神经元机器翻译(NMT)模型来将人类可读日期(25th of June, 2009)翻译成机器可读日期(“2009—06—25”)
将使用注意力模型来实现这一点,这是最复杂的 序列到序列 模型之一
注意安装包
pip install Faker==2.0.0
pip install babelfrom keras.layers import Bidirectional, Concatenate, Permute, Dot, Input, LSTM, Multiply
from keras.layers import RepeatVector, Dense, Activation, Lambda
from keras.optimizers import Adam
from keras.utils import to_categorical
from keras.models import load_model, Model
import keras.backend as K
import numpy as np
from faker import Faker
import random
from tqdm import tqdm
from babel.dates import format_date
from nmt_utils import *
import matplotlib.pyplot as plt
%matplotlib inline模型将输入以各种可能格式书写的日期(例如"the 29th of August 1958", "03/30/1968", "24 JUNE 1987"),并将其转换为标准化、机器可读的日期(如 "1958-08-29", "1968-03-30", "1987-06-24")。我们将让模型学习以通用机器可读格式YYYY-MM-DD输出日期
m = 10000
dataset, human_vocab, machine_vocab, inv_machine_vocab = load_dataset(m)dataset[:10]输出:
[('9 may 1998', '1998-05-09'),
('10.11.19', '2019-11-10'),
('9/10/70', '1970-09-10'),
('saturday april 28 1990', '1990-04-28'),
('thursday january 26 1995', '1995-01-26'),
('monday march 7 1983', '1983-03-07'),
('sunday may 22 1988', '1988-05-22'),
('08 jul 2008', '2008-07-08'),
('8 sep 1999', '1999-09-08'),
('thursday january 1 1981', '1981-01-01')]上面加载了:
datasethuman_vocab: 字典, human readable dates : an integer-valued indexmachine_vocab: 字典, machine readable dates : an integer-valued indexinv_machine_vocab: 字典,machine_vocab的反向映射,indices : charactersTx = 30 # 最大输入长度,如果大了,就截断
Ty = 10 # 输出日期长度 YYYY-MM-DD
X, Y, Xoh, Yoh = preprocess_data(dataset, human_vocab, machine_vocab, Tx, Ty)
print("X.shape:", X.shape)
print("Y.shape:", Y.shape)
print("Xoh.shape:", Xoh.shape)
print("Yoh.shape:", Yoh.shape)输出:
X.shape: (10000, 30)
Y.shape: (10000, 10)
Xoh.shape: (10000, 30, 37) # 37 是 len(human_vocab)
Yoh.shape: (10000, 10, 11) # 11 是 日期中的字符种类 0-9 和 ‘-’index = 52
print("Source date:", dataset[index][0])
print("Target date:", dataset[index][1])
print()
print("Source after preprocessing (indices):", X[index])
print("Target after preprocessing (indices):", Y[index])
print()
print("Source after preprocessing (one-hot):", Xoh[index])
print("Target after preprocessing (one-hot):", Yoh[index])输出:
Source date: saturday october 9 1976
Target date: 1976-10-09
Source after preprocessing (indices): [29 13 30 31 28 16 13 34 0 26 15 30 26 14 17 28 0 12 0 4 12 10 9 36
36 36 36 36 36 36]
Target after preprocessing (indices): [ 2 10 8 7 0 2 1 0 1 10]
Source after preprocessing (one-hot): [[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]
...
[0. 0. 0. ... 0. 0. 1.]
[0. 0. 0. ... 0. 0. 1.]
[0. 0. 0. ... 0. 0. 1.]]
Target after preprocessing (one-hot): [[0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
[0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
[0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]]

将输入重复几次:https://keras.io/zh/layers/core/#repeatvector
输入张量通过 axis 轴串联起来 https://keras.io/zh/layers/merge/#concatenate_1
https://keras.io/zh/layers/wrappers/#bidirectional
# Defined shared layers as global variables
repeator = RepeatVector(Tx)
concatenator = Concatenate(axis=-1)
densor1 = Dense(10, activation = "tanh")
densor2 = Dense(1, activation = "relu")
activator = Activation(softmax, name='attention_weights')
# We are using a custom softmax(axis = 1) loaded in this notebook
dotor = Dot(axes = 1)# GRADED FUNCTION: one_step_attention
def one_step_attention(a, s_prev):
"""
Performs one step of attention: Outputs a context vector computed as a dot product of the attention weights
"alphas" and the hidden states "a" of the Bi-LSTM.
Arguments:
a -- hidden state output of the Bi-LSTM, numpy-array of shape (m, Tx, 2*n_a)
s_prev -- previous hidden state of the (post-attention) LSTM, numpy-array of shape (m, n_s)
Returns:
context -- context vector, input of the next (post-attetion) LSTM cell
"""
### START CODE HERE ###
# Use repeator to repeat s_prev to be of shape (m, Tx, n_s) so that you can concatenate it with all hidden states "a" (≈ 1 line)
s_prev = repeator(s_prev)
# Use concatenator to concatenate a and s_prev on the last axis (≈ 1 line)
concat = concatenator(inputs=[a, s_prev])
# Use densor1 to propagate concat through a small fully-connected neural network to compute the "intermediate energies" variable e. (≈1 lines)
e = densor1(concat)
# Use densor2 to propagate e through a small fully-connected neural network to compute the "energies" variable energies. (≈1 lines)
energies = densor2(e)
# Use "activator" on "energies" to compute the attention weights "alphas" (≈ 1 line)
alphas = activator(energies)
# Use dotor together with "alphas" and "a" to compute the context vector to be given to the next (post-attention) LSTM-cell (≈ 1 line)
context = dotor([alphas, a])
### END CODE HERE ###
return contextn_a = 32
n_s = 64
post_activation_LSTM_cell = LSTM(n_s, return_state = True)
output_layer = Dense(len(machine_vocab), activation=softmax)# GRADED FUNCTION: model
def model(Tx, Ty, n_a, n_s, human_vocab_size, machine_vocab_size):
"""
Arguments:
Tx -- length of the input sequence
Ty -- length of the output sequence
n_a -- hidden state size of the Bi-LSTM
n_s -- hidden state size of the post-attention LSTM
human_vocab_size -- size of the python dictionary "human_vocab"
machine_vocab_size -- size of the python dictionary "machine_vocab"
Returns:
model -- Keras model instance
"""
# Define the inputs of your model with a shape (Tx,)
# Define s0 and c0, initial hidden state for the decoder LSTM of shape (n_s,)
X = Input(shape=(Tx, human_vocab_size))
s0 = Input(shape=(n_s,), name='s0')
c0 = Input(shape=(n_s,), name='c0')
s = s0
c = c0
# Initialize empty list of outputs
outputs = []
### START CODE HERE ###
# Step 1: Define your pre-attention Bi-LSTM. Remember to use return_sequences=True. (≈ 1 line)
a = Bidirectional(LSTM(n_a, return_sequences=True))(X)
# Step 2: Iterate for Ty steps
for t in range(Ty):
# Step 2.A: Perform one step of the attention mechanism to get back the context vector at step t (≈ 1 line)
context = one_step_attention(a, s)
# Step 2.B: Apply the post-attention LSTM cell to the "context" vector.
# Don't forget to pass: initial_state = [hidden state, cell state] (≈ 1 line)
s, _, c = post_activation_LSTM_cell(context, initial_state=[s, c])
# Step 2.C: Apply Dense layer to the hidden state output of the post-attention LSTM (≈ 1 line)
out = output_layer(s)
# Step 2.D: Append "out" to the "outputs" list (≈ 1 line)
outputs.append(out)
# Step 3: Create model instance taking three inputs and returning the list of outputs. (≈ 1 line)
model = Model(inputs=[X, s0, c0], outputs=outputs)
### END CODE HERE ###
return modelmodel = model(Tx, Ty, n_a, n_s, len(human_vocab), len(machine_vocab))### START CODE HERE ### (≈2 lines)
opt = Adam(learning_rate=0.005, beta_1=0.9, beta_2=0.999,decay=0.01)
model.compile(loss='categorical_crossentropy',optimizer=opt, metrics=['accuracy'])
### END CODE HERE ###s0 = np.zeros((m, n_s))
c0 = np.zeros((m, n_s))
outputs = list(Yoh.swapaxes(0,1))model.fit([Xoh, s0, c0], outputs, epochs=1, batch_size=100)model.load_weights('models/model.h5')EXAMPLES = ['5th Otc 2019', '5 April 09', '21th of August 2016', 'Tue 10 Jul 2007', 'Saturday May 9 2018', 'March 3 2001', 'March 3rd 2001', '1 March 2001']
for example in EXAMPLES:
source = string_to_int(example, Tx, human_vocab)
source = np.array(list(map(lambda x: to_categorical(x, num_classes=len(human_vocab)), source))).swapaxes(0,1)
source = source.transpose() #交换两个轴
source = np.expand_dims(source, axis=0) #增加一维轴
prediction = model.predict([source, s0, c0])
prediction = np.argmax(prediction, axis = -1)
output = [inv_machine_vocab[int(i)] for i in prediction]
print("source:", example)
print("output:", ''.join(output))输出:
source: 5th Otc 2019
output: 2019-10-05
source: 5 April 09
output: 2009-04-05
source: 21th of August 2016
output: 2016-08-20
source: Tue 10 Jul 2007
output: 2007-07-10
source: Saturday May 9 2018
output: 2018-05-09
source: March 3 2001
output: 2001-03-03
source: March 3rd 2001
output: 2001-03-03
source: 1 March 2001
output: 2001-03-01attention_map = plot_attention_map(model, human_vocab, inv_machine_vocab, "Tuesday 09 Oct 1993", num = 7, n_s = 64)
可以看出大部分的注意力用来预测年份
import numpy as np
from pydub import AudioSegment
import random
import sys
import io
import os
import glob
import IPython
from td_utils import *
%matplotlib inline有正向音频 activates(触发词)、负向音频(非触发词)、背景噪声
IPython.display.Audio("./raw_data/backgrounds/1.wav")音频为 44100 Hz 的,时长 10秒
x = graph_spectrogram("audio_examples/example_train.wav")

本作业训练样本时长 10 秒,频谱时间步为 5511,所以 Tx=5511
_, data = wavfile.read("audio_examples/example_train.wav")
print("Time steps in audio recording before spectrogram", data[:,0].shape)
print("Time steps in input after spectrogram", x.shape)输出:
Time steps in audio recording before spectrogram (441000,)
Time steps in input after spectrogram (101, 5511)Tx = 5511 # The number of time steps input to the model from the spectrogram
n_freq = 101 # Number of frequencies input to the model at each time step of the spectrogram
Ty = 1375 # The number of time steps in the output of our model# Load audio segments using pydub
activates, negatives, backgrounds = load_raw_audio()
print("background len: " + str(len(backgrounds[0]))) # Should be 10,000, since it is a 10 sec clip
print("activate[0] len: " + str(len(activates[0]))) # Maybe around 1000, since an "activate" audio clip is usually around 1 sec (but varies a lot)
print("activate[1] len: " + str(len(activates[1]))) # Different "activate" clips can have different lengths 输出:
background len: 10000
activate[0] len: 721
activate[1] len: 731def get_random_time_segment(segment_ms):
"""
Gets a random time segment of duration segment_ms in a 10,000 ms audio clip.
Arguments:
segment_ms -- the duration of the audio clip in ms ("ms" stands for "milliseconds")
Returns:
segment_time -- a tuple of (segment_start, segment_end) in ms
"""
segment_start = np.random.randint(low=0, high=10000-segment_ms)
# Make sure segment doesn't run past the 10sec background
segment_end = segment_start + segment_ms - 1
return (segment_start, segment_end)# GRADED FUNCTION: is_overlapping
def is_overlapping(segment_time, previous_segments):
"""
Checks if the time of a segment overlaps with the times of existing segments.
Arguments:
segment_time -- a tuple of (segment_start, segment_end) for the new segment
previous_segments -- a list of tuples of (segment_start, segment_end) for the existing segments
Returns:
True if the time segment overlaps with any of the existing segments, False otherwise
"""
segment_start, segment_end = segment_time
### START CODE HERE ### (≈ 4 line)
# Step 1: Initialize overlap as a "False" flag. (≈ 1 line)
overlap = False
# Step 2: loop over the previous_segments start and end times.
# Compare start/end times and set the flag to True if there is an overlap (≈ 3 lines)
for previous_start, previous_end in previous_segments:
if previous_end >= segment_start and previous_start <= segment_end:
overlap = True
### END CODE HERE ###
return overlap# GRADED FUNCTION: insert_audio_clip
def insert_audio_clip(background, audio_clip, previous_segments):
"""
Insert a new audio segment over the background noise at a random time step, ensuring that the
audio segment does not overlap with existing segments.
Arguments:
background -- a 10 second background audio recording.
audio_clip -- the audio clip to be inserted/overlaid.
previous_segments -- times where audio segments have already been placed
Returns:
new_background -- the updated background audio
"""
# Get the duration of the audio clip in ms
segment_ms = len(audio_clip)
### START CODE HERE ###
# Step 1: Use one of the helper functions to pick a random time segment onto which to insert
# the new audio clip. (≈ 1 line)
segment_time = get_random_time_segment(segment_ms)
# Step 2: Check if the new segment_time overlaps with one of the previous_segments. If so, keep
# picking new segment_time at random until it doesn't overlap. (≈ 2 lines)
while is_overlapping(segment_time, previous_segments):
segment_time = get_random_time_segment(segment_ms)
# Step 3: Add the new segment_time to the list of previous_segments (≈ 1 line)
previous_segments.append(segment_time)
### END CODE HERE ###
# Step 4: Superpose audio segment and background
new_background = background.overlay(audio_clip, position = segment_time[0])
return new_background, segment_time# GRADED FUNCTION: insert_ones
def insert_ones(y, segment_end_ms):
"""
Update the label vector y. The labels of the 50 output steps strictly after the end of the segment
should be set to 1. By strictly we mean that the label of segment_end_y should be 0 while, the
50 followinf labels should be ones.
Arguments:
y -- numpy array of shape (1, Ty), the labels of the training example
segment_end_ms -- the end time of the segment in ms
Returns:
y -- updated labels
"""
# duration of the background (in terms of spectrogram time-steps)
segment_end_y = int(segment_end_ms * Ty / 10000.0)
# Add 1 to the correct index in the background label (y)
### START CODE HERE ### (≈ 3 lines)
for i in range(segment_end_y+1, segment_end_y+51):
if i < Ty:
y[0, i] = 1
### END CODE HERE ###
return y# GRADED FUNCTION: create_training_example
def create_training_example(background, activates, negatives):
"""
Creates a training example with a given background, activates, and negatives.
Arguments:
background -- a 10 second background audio recording
activates -- a list of audio segments of the word "activate"
negatives -- a list of audio segments of random words that are not "activate"
Returns:
x -- the spectrogram of the training example
y -- the label at each time step of the spectrogram
"""
# Set the random seed
np.random.seed(18)
# Make background quieter
background = background - 20
### START CODE HERE ###
# Step 1: Initialize y (label vector) of zeros (≈ 1 line)
y = np.zeros((1, Ty))
# Step 2: Initialize segment times as empty list (≈ 1 line)
previous_segments = []
### END CODE HERE ###
# Select 0-4 random "activate" audio clips from the entire list of "activates" recordings
number_of_activates = np.random.randint(0, 5)
random_indices = np.random.randint(len(activates), size=number_of_activates)
random_activates = [activates[i] for i in random_indices]
### START CODE HERE ### (≈ 3 lines)
# Step 3: Loop over randomly selected "activate" clips and insert in background
for random_activate in random_activates:
# Insert the audio clip on the background
background, segment_time = insert_audio_clip(background, random_activate, previous_segments)
# Retrieve segment_start and segment_end from segment_time
segment_start, segment_end = segment_time
# Insert labels in "y"
y = insert_ones(y, segment_end)
### END CODE HERE ###
# Select 0-2 random negatives audio recordings from the entire list of "negatives" recordings
number_of_negatives = np.random.randint(0, 3)
random_indices = np.random.randint(len(negatives), size=number_of_negatives)
random_negatives = [negatives[i] for i in random_indices]
### START CODE HERE ### (≈ 2 lines)
# Step 4: Loop over randomly selected negative clips and insert in background
for random_negative in random_negatives:
# Insert the audio clip on the background
background, _ = insert_audio_clip(background, random_negative, previous_segments)
### END CODE HERE ###
# Standardize the volume of the audio clip
background = match_target_amplitude(background, -20.0)
# Export new training example
file_handle = background.export("train" + ".wav", format="wav")
print("File (train.wav) was saved in your directory.")
# Get and plot spectrogram of the new recording (background with superposition of positive and negatives)
x = graph_spectrogram("train.wav")
return x, yx, y = create_training_example(backgrounds[0], activates, negatives)
plt.plot(y[0])
老师已经处理完了所有数据
# Load preprocessed training examples
X = np.load("./XY_train/X.npy")
Y = np.load("./XY_train/Y.npy")使用真人录制的音频
# Load preprocessed dev set examples
X_dev = np.load("./XY_dev/X_dev.npy")
Y_dev = np.load("./XY_dev/Y_dev.npy")from keras.callbacks import ModelCheckpoint
from keras.models import Model, load_model, Sequential
from keras.layers import Dense, Activation, Dropout, Input, Masking, TimeDistributed, LSTM, Conv1D
from keras.layers import GRU, Bidirectional, BatchNormalization, Reshape
from keras.optimizers import Adam
模型先由一个 1维的卷积 来抽取一些特征,还可以加速GRU计算只需要处理 1375 个时间步,而不是5511个
注意:不要使用双向RNN,我们需要检测到触发词后马上输出动作,如果使用双向RNN,我们需要等待 10s 音频被记录下来,再判断
conv1d https://keras.io/zh/layers/convolutional/#conv1d
BN https://keras.io/zh/layers/normalization/#batchnormalization
GRU https://keras.io/zh/layers/recurrent/#gru
timedistributed https://keras.io/zh/layers/wrappers/#timedistributed
# GRADED FUNCTION: model
def model(input_shape):
"""
Function creating the model's graph in Keras.
Argument:
input_shape -- shape of the model's input data (using Keras conventions)
Returns:
model -- Keras model instance
"""
X_input = Input(shape = input_shape)
### START CODE HERE ###
# Step 1: CONV layer (≈4 lines)
X = Conv1D(filters=196,kernel_size=15,strides=4)(X_input) # CONV1D
X = BatchNormalization()(X) # Batch normalization
X = Activation('relu')(X) # ReLu activation
X = Dropout(rate=0.8)(X) # dropout (use 0.8)
# Step 2: First GRU Layer (≈4 lines)
X = GRU(128, return_sequences=True)(X) # GRU (use 128 units and return the sequences)
X = Dropout(rate=0.8)(X) # dropout (use 0.8)
X = BatchNormalization()(X) # Batch normalization
# Step 3: Second GRU Layer (≈4 lines)
X = GRU(128, return_sequences=True)(X) # GRU (use 128 units and return the sequences)
X = Dropout(rate=0.8)(X) # dropout (use 0.8)
X = BatchNormalization()(X) # Batch normalization
X = Dropout(rate=0.8)(X) # dropout (use 0.8)
# Step 4: Time-distributed dense layer (≈1 line)
X = TimeDistributed(Dense(1, activation = "sigmoid"))(X) # time distributed (sigmoid)
### END CODE HERE ###
model = Model(inputs = X_input, outputs = X)
return model model = model(input_shape = (Tx, n_freq))model.summary()输出:
Model: "model_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_2 (InputLayer) (None, 5511, 101) 0
_________________________________________________________________
conv1d_2 (Conv1D) (None, 1375, 196) 297136
_________________________________________________________________
batch_normalization_2 (Batch (None, 1375, 196) 784
_________________________________________________________________
activation_2 (Activation) (None, 1375, 196) 0
_________________________________________________________________
dropout_2 (Dropout) (None, 1375, 196) 0
_________________________________________________________________
gru_2 (GRU) (None, 1375, 128) 124800
_________________________________________________________________
dropout_3 (Dropout) (None, 1375, 128) 0
_________________________________________________________________
batch_normalization_3 (Batch (None, 1375, 128) 512
_________________________________________________________________
gru_3 (GRU) (None, 1375, 128) 98688
_________________________________________________________________
dropout_4 (Dropout) (None, 1375, 128) 0
_________________________________________________________________
batch_normalization_4 (Batch (None, 1375, 128) 512
_________________________________________________________________
dropout_5 (Dropout) (None, 1375, 128) 0
_________________________________________________________________
time_distributed_1 (TimeDist (None, 1375, 1) 129
=================================================================
Total params: 522,561
Trainable params: 521,657
Non-trainable params: 904训练很费时,在4000个样本上,老师已经训练好了该模型
model = load_model('./models/tr_model.h5')再用我们的数据集,训练1代
opt = Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, decay=0.01)
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=["accuracy"])model.fit(X, Y, batch_size = 5, epochs=1)loss, acc = model.evaluate(X_dev, Y_dev)
print("Dev set accuracy = ", acc)输出:
25/25 [==============================] - 1s 46ms/step
Dev set accuracy = 0.9427199959754944但是 准确率 在这里不是一个好的衡量标准,因为大部分标签都是0,都预测为0,准确率也会很高,应该用 F1值等
def detect_triggerword(filename):
plt.subplot(2, 1, 1)
x = graph_spectrogram(filename)
# the spectogram outputs (freqs, Tx) and we want (Tx, freqs) to input into the model
x = x.swapaxes(0,1)
x = np.expand_dims(x, axis=0)
predictions = model.predict(x)
plt.subplot(2, 1, 2)
plt.plot(predictions[0,:,0])
plt.ylabel('probability')
plt.show()
return predictions一旦估计了在每个输出步骤检测到单词“activate”的概率,当概率高于某个阈值时,您可以触发“chiming”声音播放。此外,在说“activate”之后,有很多个 y 值可能接近1,但我们只想发出一次蜂鸣音。所以最多每75个输出步骤插入一个蜂鸣音。这将有助于防止我们为“activate”的单个实例插入两个蜂鸣音。(这与计算机视觉的非最大值抑制类似)
chime_file = "audio_examples/chime.wav"
def chime_on_activate(filename, predictions, threshold):
audio_clip = AudioSegment.from_wav(filename)
chime = AudioSegment.from_wav(chime_file)
Ty = predictions.shape[1]
# Step 1: Initialize the number of consecutive output steps to 0
consecutive_timesteps = 0
# Step 2: Loop over the output steps in the y
for i in range(Ty):
# Step 3: Increment consecutive output steps
consecutive_timesteps += 1
# Step 4: If prediction is higher than the threshold and more than 75 consecutive output steps have passed
if predictions[0,i,0] > threshold and consecutive_timesteps > 75:
# Step 5: Superpose audio and background using pydub
audio_clip = audio_clip.overlay(chime, position = ((i / Ty) * audio_clip.duration_seconds)*1000)
# Step 6: Reset consecutive output steps to 0
consecutive_timesteps = 0
audio_clip.export("chime_output.wav", format='wav')filename = "./raw_data/dev/1.wav"
prediction = detect_triggerword(filename)
chime_on_activate(filename, prediction, 0.5)
IPython.display.Audio("./chime_output.wav")

# Preprocess the audio to the correct format
def preprocess_audio(filename):
# Trim or pad audio segment to 10000ms
padding = AudioSegment.silent(duration=10000)
segment = AudioSegment.from_wav(filename)[:10000]
segment = padding.overlay(segment)
# Set frame rate to 44100
segment = segment.set_frame_rate(44100)
# Export as wav
segment.export(filename, format='wav')your_filename = "audio_examples/my_audio.wav"
preprocess_audio(your_filename)
IPython.display.Audio(your_filename) # listen to the audio you uploaded chime_threshold = 0.5
prediction = detect_triggerword(your_filename)
chime_on_activate(your_filename, prediction, chime_threshold)
IPython.display.Audio("./chime_output.wav")