Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

intel的工程师自己有没有测过? #2612

Open
ranzsz opened this issue Dec 21, 2024 · 2 comments
Open

intel的工程师自己有没有测过? #2612

ranzsz opened this issue Dec 21, 2024 · 2 comments
Assignees

Comments

@ranzsz
Copy link

ranzsz commented Dec 21, 2024

Describe the bug
在执行音频处理时,总是报错如下,试了很多方法无法解决。
File "E:\openvinotoolkit-openvino_notebooks-master\openvino_notebooks\notebooks\wav2lip\ov_inference.py", line 12, in
from Wav2Lip import audio
File "E:\openvinotoolkit-openvino_notebooks-master\openvino_notebooks\notebooks\wav2lip\Wav2Lip\audio.py", line 8, in
from hparams.hparams import preemphasis
ImportError: cannot import name 'preemphasis' from 'hparams.hparams' (D:\Anaconda\envs\py310\lib\site-packages\hparams\hparams.py)
Expected behavior
搞了好多遍,崩溃。
Screenshots
image

Installation instructions (Please mark the checkbox)
[ ] I followed the installation guide at https://github.com/openvinotoolkit/openvino_notebooks#-installation-guide to install the notebooks.

** Environment information **
???

Additional context
经过查看,干脆把两个文件合并了,就可以执行了。
image
w但后面还有问题,再摸索吧。
把合并的代码放在下面。请大神指导一下。

import librosa
import librosa.filters
import numpy as np
from scipy import signal
from scipy.io import wavfile
import lws

class HParams:
    def __init__(self, **kwargs):
        self.data = {}
        for key, value in kwargs.items():
            self.data[key] = value

    def __getattr__(self, key):
        if key not in self.data:
            raise AttributeError(f"'HParams' object has no attribute {key}")
        return self.data[key]

    def set_hparam(self, key, value):
        self.data[key] = value

# Default hyperparameters
hparams = HParams(
    num_mels=80,
    rescale=True,
    rescaling_max=0.9,
    use_lws=False,
    n_fft=800,
    hop_size=200,
    win_size=800,
    sample_rate=16000,
    frame_shift_ms=None,
    signal_normalization=True,
    allow_clipping_in_normalization=True,
    symmetric_mels=True,
    max_abs_value=4.0,
    preemphasize=True,
    preemphasis=0.97,
    min_level_db=-100,
    ref_level_db=20,
    fmin=55,
    fmax=7600,
    img_size=96,
    fps=25,
    batch_size=16,
    initial_learning_rate=1e-4,
    nepochs=200000000000000000,
    num_workers=16,
    checkpoint_interval=3000,
    eval_interval=3000,
    save_optimizer_state=True,
    syncnet_wt=0.0,
    syncnet_batch_size=64,
    syncnet_lr=1e-4,
    syncnet_eval_interval=10000,
    syncnet_checkpoint_interval=10000,
    disc_wt=0.07,
    disc_initial_learning_rate=1e-4,
)

def hparams_debug_string():
    values = hparams.data
    hp = [f"  {name}: {values[name]}" for name in sorted(values) if name != "sentences"]
    return "Hyperparameters:\n" + "\n".join(hp)

# Audio processing functions
def load_wav(path, sr):
    return librosa.core.load(path, sr=sr)[0]

def save_wav(wav, path, sr):
    wav *= 32767 / max(0.01, np.max(np.abs(wav)))
    wavfile.write(path, sr, wav.astype(np.int16))

def save_wavenet_wav(wav, path, sr):
    librosa.output.write_wav(path, wav, sr=sr)

def preemphasis(wav, k=0.97, preemphasize=True):
    if preemphasize:
        return signal.lfilter([1, -k], [1], wav)
    return wav

def inv_preemphasis(wav, k=0.97, inv_preemphasize=True):
    if inv_preemphasize:
        return signal.lfilter([1], [1, -k], wav)
    return wav

def get_hop_size():
    hop_size = hparams.hop_size
    if hop_size is None:
        assert hparams.frame_shift_ms is not None
        hop_size = int(hparams.frame_shift_ms / 1000 * hparams.sample_rate)
    return hop_size

def linearspectrogram(wav):
    D = _stft(preemphasis(wav, hparams.preemphasis, hparams.preemphasize))
    S = _amp_to_db(np.abs(D)) - hparams.ref_level_db
    
    if hparams.signal_normalization:
        return _normalize(S)
    return S

def melspectrogram(wav):
    D = _stft(preemphasis(wav, hparams.preemphasis, hparams.preemphasize))
    S = _amp_to_db(_linear_to_mel(np.abs(D))) - hparams.ref_level_db
    
    if hparams.signal_normalization:
        return _normalize(S)
    return S

def _lws_processor():
    return lws.lws(hparams.n_fft, get_hop_size(), fftsize=hparams.win_size, mode="speech")

def _stft(y):
    if hparams.use_lws:
        return _lws_processor().stft(y).T
    else:
        return librosa.stft(y=y, n_fft=hparams.n_fft, hop_length=get_hop_size(), win_length=hparams.win_size)

def num_frames(length, fsize, fshift):
    pad = (fsize - fshift)
    if length % fshift == 0:
        M = (length + pad * 2 - fsize) // fshift + 1
    else:
        M = (length + pad * 2 - fsize) // fshift + 2
    return M

def pad_lr(x, fsize, fshift):
    M = num_frames(len(x), fsize, fshift)
    pad = (fsize - fshift)
    T = len(x) + 2 * pad
    r = (M - 1) * fshift + fsize - T
    return pad, pad + r

def librosa_pad_lr(x, fsize, fshift):
    return 0, (x.shape[0] // fshift + 1) * fshift - x.shape[0]

_mel_basis = None

def _linear_to_mel(spectogram):
    global _mel_basis
    if _mel_basis is None:
        _mel_basis = _build_mel_basis()
    return np.dot(_mel_basis, spectogram)

def _build_mel_basis():
    assert hparams.fmax <= hparams.sample_rate // 2
    return librosa.filters.mel(hparams.sample_rate, hparams.n_fft, n_mels=hparams.num_mels,
                               fmin=hparams.fmin, fmax=hparams.fmax)

def _amp_to_db(x):
    min_level = np.exp(hparams.min_level_db / 20 * np.log(10))
    return 20 * np.log10(np.maximum(min_level, x))

def _db_to_amp(x):
    return np.power(10.0, (x) * 0.05)

def _normalize(S):
    if hparams.allow_clipping_in_normalization:
        if hparams.symmetric_mels:
            return np.clip((2 * hparams.max_abs_value) * ((S - hparams.min_level_db) / (-hparams.min_level_db)) - hparams.max_abs_value,
                           -hparams.max_abs_value, hparams.max_abs_value)
        else:
            return np.clip(hparams.max_abs_value * ((S - hparams.min_level_db) / (-hparams.min_level_db)), 0, hparams.max_abs_value)
    
    assert S.max() <= 0 and S.min() - hparams.min_level_db >= 0
    if hparams.symmetric_mels:
        return (2 * hparams.max_abs_value) * ((S - hparams.min_level_db) / (-hparams.min_level_db)) - hparams.max_abs_value
    else:
        return hparams.max_abs_value * ((S - hparams.min_level_db) / (-hparams.min_level_db))

def _denormalize(D):
    if hparams.allow_clipping_in_normalization:
        if hparams.symmetric_mels:
            return (((np.clip(D, -hparams.max_abs_value,
                              hparams.max_abs_value) + hparams.max_abs_value) * -hparams.min_level_db / (2 * hparams.max_abs_value))
                    + hparams.min_level_db)
        else:
            return ((np.clip(D, 0, hparams.max_abs_value) * -hparams.min_level_db / hparams.max_abs_value) + hparams.min_level_db)
    
    if hparams.symmetric_mels:
        return (((D + hparams.max_abs_value) * -hparams.min_level_db / (2 * hparams.max_abs_value)) + hparams.min_level_db)
    else:
        return ((D * -hparams.min_level_db / hparams.max_abs_value) + hparams.min_level_db)
@YuChern-Intel
Copy link

I was able to reproduce the issue when importing the hparams Python script. It might due to the duplicate name of "hparams" in defining hparams in hparams.py file.

Yes, I agree with you that copying the content of hparams.py file directly to audio.py file is much easier. I was able to run ov_inference.py and generate outputs with the following approaches.

  1. Copy the content of hparams.py file directly to audio.py file, which you are doing the same thing. Then, change hparams = HParams( to hp = HParams(
  2. Add ov_inference("data_video_sun_5s.mp4","data_audio_sun_5s.wav") at the last line in ov_inference.py.
  3. Run command winget install ffmpeg if ffmpeg is not installed.

@ranzsz
Copy link
Author

ranzsz commented Dec 23, 2024

Thank U! I will try again.
谢谢!我再试试。

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

3 participants