Skip to content

Commit

Permalink
Merge branch 'xx' into speechbrain_asr_eval
Browse files Browse the repository at this point in the history
  • Loading branch information
Sarina Meyer committed Dec 23, 2023
2 parents b0da6a1 + 062c6d7 commit bae671c
Show file tree
Hide file tree
Showing 5 changed files with 244 additions and 1 deletion.
142 changes: 142 additions & 0 deletions anonymization/modules/dsp/anonymise_dir_mcadams_rand_seed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
#!/usr/bin/env python3.0
# -*- coding: utf-8 -*-
"""
@author: Jose Patino, Massimiliano Todisco, Pramod Bachhav, Nicholas Evans
Audio Security and Privacy Group, EURECOM
modified version (N.T.)
"""
import os
import librosa
import numpy as np
import scipy
import wave
import argparse
from pathlib import Path
import matplotlib.pyplot as plt
import random
from kaldiio import ReadHelper
import shutil

def load_utt2spk(path):
assert os.path.isfile(path), f'File does not exist {path}'
table = np.genfromtxt(path, dtype='U')
utt2spk = {utt: spk for utt, spk in table}
return utt2spk

def process_data(dataset_path, anon_level, settings):

utt2spk = None
if anon_level == 'spk':
utt2spk = load_utt2spk( dataset_path / 'utt2spk')

output_path = Path(str(dataset_path) + settings['anon_suffix'])
if os.path.exists(output_path):
shutil.rmtree(output_path)
shutil.copytree(dataset_path, output_path)
if not os.path.exists(output_path / 'wav'):
os.makedirs(output_path / 'wav')
wav_scp = dataset_path / 'wav.scp'
path_wav_scp_out = output_path / 'wav.scp'
with open(path_wav_scp_out, 'wt', encoding='utf-8') as writer:
with ReadHelper(f'scp:{wav_scp}') as reader:
print(reader)
for utid, (freq, samples) in reader:
print(utid)
output_file = os.path.join(output_path / 'wav', f'{utid}.wav')
print(output_file)
if os.path.exists(output_file):
print('file already exists')
continue
samples = samples / (np.iinfo(np.int16).max + 1)
if anon_level == 'spk':
assert utid in utt2spk, f'Failed to find speaker ID for utterance {utid}'
spid = utt2spk[utid]
random.seed(np.abs(hash(spid)))
rand_mc_coeff = random.uniform(settings['mc_coeff_min'], settings['mc_coeff_max'])

samples = anonym(freq=freq, samples=samples,
winLengthinms=settings['winLengthinms'],
shiftLengthinms=settings['shiftLengthinms'],
lp_order=settings['n_coeffs'], mcadams=rand_mc_coeff)

with wave.open(output_file, 'wb') as stream:
stream.setframerate(freq)
stream.setnchannels(1)
stream.setsampwidth(2)
stream.writeframes(samples)
print(f'{utid} {output_file}', file=writer)
print('Done')

def anonym(freq, samples, winLengthinms=20, shiftLengthinms=10, lp_order=20, mcadams=0.8):


print(mcadams)
eps = np.finfo(np.float32).eps
samples = samples + eps

# simulation parameters
winlen = np.floor(winLengthinms * 0.001 * freq).astype(int)
shift = np.floor(shiftLengthinms * 0.001 * freq).astype(int)
length_sig = len(samples)

# fft processing parameters
NFFT = 2 ** (np.ceil((np.log2(winlen)))).astype(int)
# anaysis and synth window which satisfies the constraint
wPR = np.hanning(winlen)
K = np.sum(wPR) / shift
win = np.sqrt(wPR / K)
Nframes = 1 + np.floor((length_sig - winlen) / shift).astype(int) # nr of complete frames

# carry out the overlap - add FFT processing
sig_rec = np.zeros([length_sig]) # allocate output+'ringing' vector

for m in np.arange(1, Nframes):
# indices of the mth frame
index = np.arange(m * shift, np.minimum(m * shift + winlen, length_sig))
# windowed mth frame (other than rectangular window)
frame = samples[index] * win
# get lpc coefficients
a_lpc = librosa.core.lpc(frame + eps, order=lp_order)
# get poles
poles = scipy.signal.tf2zpk(np.array([1]), a_lpc)[1]
#index of imaginary poles
ind_imag = np.where(np.isreal(poles) == False)[0]
#index of first imaginary poles
ind_imag_con = ind_imag[np.arange(0, np.size(ind_imag), 2)]

# here we define the new angles of the poles, shifted accordingly to the mcadams coefficient
# values >1 expand the spectrum, while values <1 constract it for angles>1
# values >1 constract the spectrum, while values <1 expand it for angles<1
# the choice of this value is strongly linked to the number of lpc coefficients
# a bigger lpc coefficients number constraints the effect of the coefficient to very small variations
# a smaller lpc coefficients number allows for a bigger flexibility
new_angles = np.angle(poles[ind_imag_con]) ** mcadams
#new_angles = np.angle(poles[ind_imag_con])**path[m]

# make sure new angles stay between 0 and pi
new_angles[np.where(new_angles >= np.pi)] = np.pi
new_angles[np.where(new_angles <= 0)] = 0

# copy of the original poles to be adjusted with the new angles
new_poles = poles
for k in np.arange(np.size(ind_imag_con)):
# compute new poles with the same magnitued and new angles
new_poles[ind_imag_con[k]] = np.abs(poles[ind_imag_con[k]]) * np.exp(1j * new_angles[k])
# applied also to the conjugate pole
new_poles[ind_imag_con[k] + 1] = np.abs(poles[ind_imag_con[k] + 1]) * np.exp(-1j * new_angles[k])

# recover new, modified lpc coefficients
a_lpc_new = np.real(np.poly(new_poles))
# get residual excitation for reconstruction
res = scipy.signal.lfilter(a_lpc,np.array(1),frame)
# reconstruct frames with new lpc coefficient
frame_rec = scipy.signal.lfilter(np.array([1]),a_lpc_new,res)
frame_rec = frame_rec * win

outindex = np.arange(m * shift, m * shift + len(frame_rec))
# overlap add
sig_rec[outindex] = sig_rec[outindex] + frame_rec
sig_rec = (sig_rec / np.max(np.abs(sig_rec)) * (np.iinfo(np.int16).max - 1)).astype(np.int16)
return sig_rec
#scipy.io.wavfile.write(output_file, freq, np.float32(sig_rec))
#awk -F'[/.]' '{print $5 " sox " $0 " -t wav -R -b 16 - |"}' > data/$dset$anon_data_suffix/wav.scp
28 changes: 28 additions & 0 deletions anonymization/pipelines/dsp_pipeline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from pathlib import Path
from anonymization.modules.dsp.anonymise_dir_mcadams_rand_seed import process_data


class DSPPipeline:
"""
This pipeline consists of:
- ASR -> phone sequence -
input - (prosody extractor -> prosody anonymizer) - TTS -> output
- speaker embedding extractor -> speaker anonymizer -
"""

def __init__(self, config):
self.config = config
self.libri_360_data_dir = Path(config['dataset_libri_360']) if 'dataset_libri_360' in config else None
self.modules_config = config['modules']

def run_anonymization_pipeline(self, datasets):

for i, (dataset_name, dataset_path) in enumerate(datasets.items()):
print(f'{i + 1}/{len(datasets)}: Processing {dataset_name}...')
process_data(dataset_path=dataset_path, anon_level=self.modules_config['anon_level'], settings=self.modules_config)
print('Done')

if self.libri_360_data_dir:
process_data(dataset_path=self.libri_360_data_dir,
anon_level=self.modules_config['anon_level_libri_360'],
settings=self.modules_config)
41 changes: 41 additions & 0 deletions configs/anon_dsp.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
root_dir : ..
data_dir: !ref data # TODO adjust path
save_output: true

datasets:
- name: libri_dev
data: libri
set: dev
enrolls: [enrolls]
trials: [trials_f, trials_m]
- name: libri_test
data: libri
set: test
enrolls: [enrolls]
trials: [trials_f, trials_m]
- name: vctk_dev
data: vctk
set: dev
enrolls: [enrolls]
trials: [trials_f_all, trials_m_all]
- name: vctk_test
data: vctk
set: test
enrolls: [enrolls]
trials: [trials_f_all, trials_m_all]


dataset_libri_360: !ref <data_dir>/train-clean-360-asv
results_dir: !ref results # TODO adjust path
pipeline: dsp

modules:
anon_suffix: _dsp
n_coeffs: 20
mc_coeff_min: 0.5
mc_coeff_max: 0.9
winLengthinms: 20
shiftLengthinms: 10
seed: 0
anon_level: spk
anon_level_libri_360: utt
2 changes: 1 addition & 1 deletion configs/eval_pre_ecapa_cos.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ privacy:
training:
anon: false # true or false, depending on whether the training data for the ASV is anonymized or original
train_data_dir: !ref <utility[asr][libri_dir]>/LibriSpeech/train-clean-360 # path to original or anonymized training data for ASV
train_config: evaluation/privacy/asv_train/hparams/train_ecapa_tdnn_small.yaml
train_config: evaluation/privacy/asv/asv_train/hparams/train_ecapa_tdnn_small.yaml
finetuning: false # true (ft) or false (scratch)
pretrained_model: null # path to pretrained model, only used for finetuning
lr: 0.01
Expand Down
32 changes: 32 additions & 0 deletions run_anonymization_dsp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from pathlib import Path
from argparse import ArgumentParser
import torch
from anonymization.pipelines.dsp_pipeline import DSPPipeline
from utils import parse_yaml, get_datasets

PIPELINES = {
'dsp': DSPPipeline
}

if __name__ == '__main__':
parser = ArgumentParser()
parser.add_argument('--config', default='anon_config.yaml')
parser.add_argument('--gpu_ids', default='0')
parser.add_argument('--force_compute', default=False, type=bool)
args = parser.parse_args()

config = parse_yaml(Path('configs', args.config))
datasets = get_datasets(config)

gpus = args.gpu_ids.split(',')

devices = []
if torch.cuda.is_available():
for gpu in gpus:
devices.append(torch.device(f'cuda:{gpu}'))
else:
devices.append(torch.device('cpu'))

pipeline = PIPELINES[config['pipeline']](config=config)
pipeline.run_anonymization_pipeline(datasets)

0 comments on commit bae671c

Please sign in to comment.