Merge branch 'xx' into speechbrain_asr_eval

DigitalPhonetics · Dec 23, 2023 · bae671c · bae671c
2 parents b0da6a1 + 062c6d7
commit bae671c
Show file tree

Hide file tree

Showing 5 changed files with 244 additions and 1 deletion.
diff --git a/anonymization/modules/dsp/anonymise_dir_mcadams_rand_seed.py b/anonymization/modules/dsp/anonymise_dir_mcadams_rand_seed.py
@@ -0,0 +1,142 @@
+#!/usr/bin/env python3.0
+# -*- coding: utf-8 -*-
+"""
+@author: Jose Patino, Massimiliano Todisco, Pramod Bachhav, Nicholas Evans
+Audio Security and Privacy Group, EURECOM
+modified version (N.T.)
+"""
+import os
+import librosa
+import numpy as np
+import scipy
+import wave
+import argparse
+from pathlib import Path
+import matplotlib.pyplot as plt
+import random
+from kaldiio import ReadHelper
+import shutil
+
+def load_utt2spk(path):
+    assert os.path.isfile(path), f'File does not exist {path}'
+    table = np.genfromtxt(path, dtype='U')
+    utt2spk = {utt: spk for utt, spk in table}
+    return utt2spk
+
+def process_data(dataset_path, anon_level, settings):
+
+    utt2spk = None
+    if anon_level == 'spk':
+        utt2spk = load_utt2spk( dataset_path / 'utt2spk')
+
+    output_path = Path(str(dataset_path) + settings['anon_suffix'])
+    if os.path.exists(output_path):
+        shutil.rmtree(output_path)
+    shutil.copytree(dataset_path, output_path)
+    if not os.path.exists(output_path /  'wav'):
+        os.makedirs(output_path / 'wav')
+    wav_scp = dataset_path / 'wav.scp'
+    path_wav_scp_out = output_path / 'wav.scp'
+    with open(path_wav_scp_out, 'wt', encoding='utf-8') as writer:
+        with ReadHelper(f'scp:{wav_scp}') as reader:
+            print(reader)
+            for utid, (freq, samples) in reader:
+                print(utid)
+                output_file = os.path.join(output_path / 'wav', f'{utid}.wav')
+                print(output_file)
+                if os.path.exists(output_file):
+                    print('file already exists')
+                    continue
+                samples = samples / (np.iinfo(np.int16).max + 1)
+                if anon_level == 'spk':
+                    assert utid in utt2spk, f'Failed to find speaker ID for utterance {utid}'
+                    spid = utt2spk[utid]
+                    random.seed(np.abs(hash(spid)))
+                rand_mc_coeff = random.uniform(settings['mc_coeff_min'], settings['mc_coeff_max'])
+
+                samples = anonym(freq=freq, samples=samples, 
+                    winLengthinms=settings['winLengthinms'],
+                    shiftLengthinms=settings['shiftLengthinms'], 
+                    lp_order=settings['n_coeffs'], mcadams=rand_mc_coeff)
+
+                with wave.open(output_file, 'wb') as stream:
+                    stream.setframerate(freq)
+                    stream.setnchannels(1)
+                    stream.setsampwidth(2)
+                    stream.writeframes(samples)
+                print(f'{utid} {output_file}', file=writer)
+    print('Done')
+
+def anonym(freq, samples, winLengthinms=20, shiftLengthinms=10, lp_order=20, mcadams=0.8):
+
+
+    print(mcadams)
+    eps = np.finfo(np.float32).eps
+    samples = samples + eps
+
+    # simulation parameters
+    winlen = np.floor(winLengthinms * 0.001 * freq).astype(int)
+    shift = np.floor(shiftLengthinms * 0.001 * freq).astype(int)
+    length_sig = len(samples)
+
+    # fft processing parameters
+    NFFT = 2 ** (np.ceil((np.log2(winlen)))).astype(int)
+    # anaysis and synth window which satisfies the constraint
+    wPR = np.hanning(winlen)
+    K = np.sum(wPR) / shift
+    win = np.sqrt(wPR / K)
+    Nframes = 1 + np.floor((length_sig - winlen) / shift).astype(int) # nr of complete frames   
+
+    # carry out the overlap - add FFT processing
+    sig_rec = np.zeros([length_sig]) # allocate output+'ringing' vector
+
+    for m in np.arange(1, Nframes):
+        # indices of the mth frame
+        index = np.arange(m * shift, np.minimum(m * shift + winlen, length_sig))    
+        # windowed mth frame (other than rectangular window)
+        frame = samples[index] * win 
+        # get lpc coefficients
+        a_lpc = librosa.core.lpc(frame + eps, order=lp_order)
+        # get poles
+        poles = scipy.signal.tf2zpk(np.array([1]), a_lpc)[1]
+        #index of imaginary poles
+        ind_imag = np.where(np.isreal(poles) == False)[0]
+        #index of first imaginary poles
+        ind_imag_con = ind_imag[np.arange(0, np.size(ind_imag), 2)]
+
+        # here we define the new angles of the poles, shifted accordingly to the mcadams coefficient
+        # values >1 expand the spectrum, while values <1 constract it for angles>1
+        # values >1 constract the spectrum, while values <1 expand it for angles<1
+        # the choice of this value is strongly linked to the number of lpc coefficients
+        # a bigger lpc coefficients number constraints the effect of the coefficient to very small variations
+        # a smaller lpc coefficients number allows for a bigger flexibility
+        new_angles = np.angle(poles[ind_imag_con]) ** mcadams
+        #new_angles = np.angle(poles[ind_imag_con])**path[m]
+
+        # make sure new angles stay between 0 and pi
+        new_angles[np.where(new_angles >= np.pi)] = np.pi        
+        new_angles[np.where(new_angles <= 0)] = 0  
+
+        # copy of the original poles to be adjusted with the new angles
+        new_poles = poles
+        for k in np.arange(np.size(ind_imag_con)):
+            # compute new poles with the same magnitued and new angles
+            new_poles[ind_imag_con[k]] = np.abs(poles[ind_imag_con[k]]) * np.exp(1j * new_angles[k])
+            # applied also to the conjugate pole
+            new_poles[ind_imag_con[k] + 1] = np.abs(poles[ind_imag_con[k] + 1]) * np.exp(-1j * new_angles[k])            
+
+        # recover new, modified lpc coefficients
+        a_lpc_new = np.real(np.poly(new_poles))
+        # get residual excitation for reconstruction
+        res = scipy.signal.lfilter(a_lpc,np.array(1),frame)
+        # reconstruct frames with new lpc coefficient
+        frame_rec = scipy.signal.lfilter(np.array([1]),a_lpc_new,res)
+        frame_rec = frame_rec * win    
+
+        outindex = np.arange(m * shift, m * shift + len(frame_rec))
+        # overlap add
+        sig_rec[outindex] = sig_rec[outindex] + frame_rec
+    sig_rec = (sig_rec / np.max(np.abs(sig_rec)) * (np.iinfo(np.int16).max - 1)).astype(np.int16)
+    return sig_rec
+    #scipy.io.wavfile.write(output_file, freq, np.float32(sig_rec))
+    #awk -F'[/.]' '{print $5 " sox " $0 " -t wav -R -b 16 - |"}' > data/$dset$anon_data_suffix/wav.scp
diff --git a/anonymization/pipelines/dsp_pipeline.py b/anonymization/pipelines/dsp_pipeline.py
@@ -0,0 +1,28 @@
+from pathlib import Path
+from anonymization.modules.dsp.anonymise_dir_mcadams_rand_seed import process_data
+
+
+class DSPPipeline:
+    """
+    This pipeline consists of:
+          - ASR -> phone sequence                             -
+    input - (prosody extractor -> prosody anonymizer)         - TTS -> output
+          - speaker embedding extractor -> speaker anonymizer -
+    """
+
+    def __init__(self, config):
+        self.config = config
+        self.libri_360_data_dir = Path(config['dataset_libri_360']) if 'dataset_libri_360' in config else None
+        self.modules_config = config['modules']
+
+    def run_anonymization_pipeline(self, datasets):
+
+        for i, (dataset_name, dataset_path) in enumerate(datasets.items()):
+            print(f'{i + 1}/{len(datasets)}: Processing {dataset_name}...')
+            process_data(dataset_path=dataset_path, anon_level=self.modules_config['anon_level'], settings=self.modules_config)
+            print('Done')
+
+        if self.libri_360_data_dir:
+            process_data(dataset_path=self.libri_360_data_dir, 
+                    anon_level=self.modules_config['anon_level_libri_360'], 
+                    settings=self.modules_config)
diff --git a/configs/anon_dsp.yaml b/configs/anon_dsp.yaml
@@ -0,0 +1,41 @@
+root_dir : ..
+data_dir: !ref data  # TODO adjust path
+save_output: true
+
+datasets:
+  - name: libri_dev
+    data: libri
+    set: dev
+    enrolls: [enrolls]
+    trials: [trials_f, trials_m]
+  - name: libri_test
+    data: libri
+    set: test
+    enrolls: [enrolls]
+    trials: [trials_f, trials_m]
+  - name: vctk_dev
+    data: vctk
+    set: dev
+    enrolls: [enrolls]
+    trials: [trials_f_all, trials_m_all]
+  - name: vctk_test
+    data: vctk
+    set: test
+    enrolls: [enrolls]
+    trials: [trials_f_all, trials_m_all]
+
+
+dataset_libri_360: !ref <data_dir>/train-clean-360-asv
+results_dir: !ref results # TODO adjust path
+pipeline: dsp
+
+modules:
+  anon_suffix: _dsp
+  n_coeffs: 20
+  mc_coeff_min: 0.5
+  mc_coeff_max: 0.9
+  winLengthinms: 20
+  shiftLengthinms: 10
+  seed: 0
+  anon_level: spk
+  anon_level_libri_360: utt
diff --git a/configs/eval_pre_ecapa_cos.yaml b/configs/eval_pre_ecapa_cos.yaml
@@ -51,7 +51,7 @@ privacy:
     training:
       anon: false  # true or false, depending on whether the training data for the ASV is anonymized or original
       train_data_dir: !ref <utility[asr][libri_dir]>/LibriSpeech/train-clean-360  # path to original or anonymized training data for ASV
-      train_config: evaluation/privacy/asv_train/hparams/train_ecapa_tdnn_small.yaml
+      train_config: evaluation/privacy/asv/asv_train/hparams/train_ecapa_tdnn_small.yaml
       finetuning: false # true (ft) or false (scratch)
       pretrained_model: null  # path to pretrained model, only used for finetuning
       lr: 0.01

diff --git a/run_anonymization_dsp.py b/run_anonymization_dsp.py
@@ -0,0 +1,32 @@
+from pathlib import Path
+from argparse import ArgumentParser
+import torch
+from anonymization.pipelines.dsp_pipeline import DSPPipeline
+from utils import parse_yaml, get_datasets
+
+PIPELINES = {
+    'dsp': DSPPipeline
+}
+
+if __name__ == '__main__':
+    parser = ArgumentParser()
+    parser.add_argument('--config', default='anon_config.yaml')
+    parser.add_argument('--gpu_ids', default='0')
+    parser.add_argument('--force_compute', default=False, type=bool)
+    args = parser.parse_args()
+
+    config = parse_yaml(Path('configs', args.config))
+    datasets = get_datasets(config)
+
+    gpus = args.gpu_ids.split(',')
+
+    devices = []
+    if torch.cuda.is_available():
+        for gpu in gpus:
+            devices.append(torch.device(f'cuda:{gpu}'))
+    else:
+        devices.append(torch.device('cpu'))
+
+    pipeline = PIPELINES[config['pipeline']](config=config)
+    pipeline.run_anonymization_pipeline(datasets)
+