-
Notifications
You must be signed in to change notification settings - Fork 41
/
tools.py
55 lines (41 loc) · 1.68 KB
/
tools.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import torch
import numpy as np
from scipy.io.wavfile import write
from audio.audio_processing import griffin_lim
def get_mel_from_wav(audio, _stft):
audio = torch.clip(torch.FloatTensor(audio).unsqueeze(0), -1, 1)
audio = torch.autograd.Variable(audio, requires_grad=False)
melspec, energy = _stft.mel_spectrogram(audio)
melspec = torch.squeeze(melspec, 0).numpy().astype(np.float32)
energy = torch.squeeze(energy, 0).numpy().astype(np.float32)
return melspec, energy
def inv_mel_spec(mel, out_filename, _stft, griffin_iters=60):
mel = torch.stack([mel])
mel_decompress = _stft.spectral_de_normalize(mel)
mel_decompress = mel_decompress.transpose(1, 2).data.cpu()
spec_from_mel_scaling = 1000
spec_from_mel = torch.mm(mel_decompress[0], _stft.mel_basis)
spec_from_mel = spec_from_mel.transpose(0, 1).unsqueeze(0)
spec_from_mel = spec_from_mel * spec_from_mel_scaling
audio = griffin_lim(
torch.autograd.Variable(spec_from_mel[:, :, :-1]), _stft._stft_fn, griffin_iters
)
audio = audio.squeeze()
audio = audio.cpu().numpy()
audio_path = out_filename
write(audio_path, _stft.sampling_rate, audio)
def librosa_pad_lr(x, fsize, fshift, pad_sides=1):
'''compute right padding (final frame) or both sides padding (first and final frames)
'''
assert pad_sides in (1, 2)
# return int(fsize // 2)
pad = (x.shape[0] // fshift + 1) * fshift - x.shape[0]
if pad_sides == 1:
return 0, pad
else:
return pad // 2, pad // 2 + pad % 2
# Conversions
def amp_to_db(x):
return 20 * np.log10(np.maximum(1e-5, x))
def normalize(S, min_level_db):
return (S - min_level_db) / -min_level_db