diff --git a/anonymization/modules/prosody/extraction/ims_prosody_extraction.py b/anonymization/modules/prosody/extraction/ims_prosody_extraction.py index cc700af..7aad79e 100644 --- a/anonymization/modules/prosody/extraction/ims_prosody_extraction.py +++ b/anonymization/modules/prosody/extraction/ims_prosody_extraction.py @@ -1,3 +1,4 @@ +import logging import torch torch.set_num_threads(1) @@ -11,6 +12,7 @@ from anonymization.modules.tts.IMSToucan.TrainingInterfaces.Text_to_Spectrogram.FastSpeech2.EnergyCalculator import EnergyCalculator from anonymization.modules.tts.IMSToucan.TrainingInterfaces.Text_to_Spectrogram.FastSpeech2.PitchCalculator import Parselmouth +logger = logging.getLogger(__name__) class ImsProsodyExtractor: @@ -54,7 +56,7 @@ def extract_prosody(self, try: norm_wave = self.ap.audio_to_wave_tensor(normalize=True, audio=wave) except ValueError: - print('Something went wrong, the reference wave might be too short.') + logger.error('Something went wrong, the reference wave might be too short.') raise RuntimeError with torch.inference_mode(): diff --git a/anonymization/modules/prosody/prosody_extraction.py b/anonymization/modules/prosody/prosody_extraction.py index 478f574..dcc6c2d 100644 --- a/anonymization/modules/prosody/prosody_extraction.py +++ b/anonymization/modules/prosody/prosody_extraction.py @@ -1,3 +1,4 @@ +import logging import torch torch.set_num_threads(1) @@ -8,6 +9,7 @@ from .extraction import * from utils import read_kaldi_format +logger = logging.getLogger(__name__) class ProsodyExtraction: @@ -47,7 +49,7 @@ def extract_prosody(self, dataset_path: Path, texts, dataset_name=None): wav_scp = {utt: wav_scp[utt] for utt in unprocessed_utts} if wav_scp: - print(f'Extract prosody for {len(wav_scp)} of {len(wav_scp) + len(data_prosody)} utterances') + logger.info(f'Extract prosody for {len(wav_scp)} of {len(wav_scp) + len(data_prosody)} utterances') data_prosody.new = True i = 0 for utt, wav_path in tqdm(wav_scp.items()): @@ -56,7 +58,7 @@ def extract_prosody(self, dataset_path: Path, texts, dataset_name=None): utt_prosody = self.extractor.extract_prosody(transcript=text, ref_audio_path=wav_path, input_is_phones=text_is_phones) except IndexError: - print(f'Index Error for {utt}') + logger.warn(f'IndexError for {utt}') continue duration, pitch, energy, start_silence, end_silence = utt_prosody data_prosody.add_instance(utterance=utt, duration=duration, pitch=pitch, energy=energy, @@ -69,8 +71,8 @@ def extract_prosody(self, dataset_path: Path, texts, dataset_name=None): data_prosody.save_prosody(dataset_results_dir) elif len(data_prosody.utterances) > 0: - print('No prosody extraction necessary; load stored values instead...') + logger.info('No prosody extraction necessary; load stored values instead...') else: - print(f'No utterances could be found in {dataset_path}!') + logger.warn(f'No utterances could be found in {dataset_path}!') return data_prosody diff --git a/anonymization/modules/speaker_embeddings/anonymization/gan_anon.py b/anonymization/modules/speaker_embeddings/anonymization/gan_anon.py index 0fb5757..5d1ab84 100644 --- a/anonymization/modules/speaker_embeddings/anonymization/gan_anon.py +++ b/anonymization/modules/speaker_embeddings/anonymization/gan_anon.py @@ -1,3 +1,4 @@ +import logging from pathlib import Path import torch import numpy as np @@ -10,6 +11,7 @@ from ..speaker_embeddings import SpeakerEmbeddings from .utils.WGAN import EmbeddingsGenerator +logger = logging.getLogger(__name__) class GANAnonymizer(BaseAnonymizer): """ @@ -84,9 +86,9 @@ def anonymize_embeddings( or 'utt' for utterance level). """ if emb_level == "spk": - print(f"Anonymize embeddings of {len(speaker_embeddings)} speakers...") + logger.info(f"Anonymize embeddings of {len(speaker_embeddings)} speakers...") elif emb_level == "utt": - print(f"Anonymize embeddings of {len(speaker_embeddings)} utterances...") + logger.info(f"Anonymize embeddings of {len(speaker_embeddings)} utterances...") identifiers = [] speakers = [] @@ -117,7 +119,7 @@ def anonymize_embeddings( return anon_embeddings def _generate_artificial_embeddings(self, gan_model_path: Path, n: int): - print(f"Generate {n} artificial speaker embeddings...") + logger.info(f"Generate {n} artificial speaker embeddings...") generator = EmbeddingsGenerator(gan_path=gan_model_path, device=self.device) gan_vectors = generator.generate_embeddings(n=n) unused_indices = np.arange(len(gan_vectors)) diff --git a/anonymization/modules/speaker_embeddings/anonymization/pool_anon.py b/anonymization/modules/speaker_embeddings/anonymization/pool_anon.py index 3a21db1..c24151f 100644 --- a/anonymization/modules/speaker_embeddings/anonymization/pool_anon.py +++ b/anonymization/modules/speaker_embeddings/anonymization/pool_anon.py @@ -1,3 +1,4 @@ +import logging from pathlib import Path import numpy as np import torch @@ -15,6 +16,8 @@ from ..speaker_embeddings import SpeakerEmbeddings from utils import transform_path +logger = logging.getLogger(__name__) + REVERSED_GENDERS = { "m": "f", "f": "m" @@ -144,7 +147,7 @@ def __init__( self.stats_per_dim_path = stats_per_dim_path or Path() def _load_pool_embeddings(self, pool_data_dir, pool_vec_path, embed_model_path): - print(pool_data_dir) + logger.debug(pool_data_dir) if pool_vec_path.exists(): pool_embeddings = SpeakerEmbeddings( vec_type=self.vec_type, emb_level="spk", device=self.device @@ -168,7 +171,7 @@ def anonymize_embeddings(self, speaker_embeddings: torch.Tensor, emb_level: str vectors_a=self.pool_embeddings.vectors, vectors_b=speaker_embeddings.vectors ) - print(f"Anonymize embeddings of {len(speaker_embeddings)} speakers...") + logging.info(f"Anonymize embeddings of {len(speaker_embeddings)} speakers...") identifiers = [] speakers = [] anon_vectors = [] diff --git a/anonymization/modules/speaker_embeddings/anonymization/random_anon.py b/anonymization/modules/speaker_embeddings/anonymization/random_anon.py index 98ffc84..f0d2463 100644 --- a/anonymization/modules/speaker_embeddings/anonymization/random_anon.py +++ b/anonymization/modules/speaker_embeddings/anonymization/random_anon.py @@ -1,4 +1,5 @@ import json +import logging from pathlib import Path import torch from os import PathLike @@ -8,6 +9,7 @@ from .base_anon import BaseAnonymizer from ..speaker_embeddings import SpeakerEmbeddings +logger = logging.getLogger(__name__) class RandomAnonymizer(BaseAnonymizer): """ @@ -73,7 +75,7 @@ def anonymize_embeddings(self, speaker_embeddings, emb_level="spk"): utterance level). """ if self.scaling_ranges: - print("Anonymize vectors in scale!") + logger.debug("Anonymize vectors in scale!") return self._anonymize_data_in_scale(speaker_embeddings) else: identifiers = [] diff --git a/anonymization/modules/speaker_embeddings/anonymization/utils/plda_model.py b/anonymization/modules/speaker_embeddings/anonymization/utils/plda_model.py index 3abf1fa..18482de 100644 --- a/anonymization/modules/speaker_embeddings/anonymization/utils/plda_model.py +++ b/anonymization/modules/speaker_embeddings/anonymization/utils/plda_model.py @@ -1,9 +1,11 @@ # This code is based on the descriptions in https://github.com/speechbrain/speechbrain/blob/develop/speechbrain/processing/PLDA_LDA.py +import logging from pathlib import Path from speechbrain.processing.PLDA_LDA import PLDA, StatObject_SB, Ndx, fast_PLDA_scoring import numpy as np import torch +logger = logging.getLogger(__name__) class PLDAModel: def __init__(self, train_embeddings, results_path: Path=None, save_plda=True): @@ -64,13 +66,13 @@ def _train_plda(self, train_embeddings): vectors = train_embeddings.vectors.to(torch.float64) modelset = np.array([f'md{speaker}' for speaker in train_embeddings.original_speakers], dtype="|O") - print(len(modelset), len(set(modelset))) + logger.debug(len(modelset), len(set(modelset))) segset, s, stat0 = self._get_vector_stats(vectors, sg_tag='sg', utt_ids=train_embeddings.get_utt_list()) xvectors_stat = StatObject_SB(modelset=modelset, segset=segset, start=s, stop=s, stat0=stat0, stat1=vectors.cpu().numpy()) - print(vectors.shape) + logger.debug(vectors.shape) plda = PLDA(rank_f=100) plda.plda(xvectors_stat) diff --git a/anonymization/modules/speaker_embeddings/speaker_anonymization.py b/anonymization/modules/speaker_embeddings/speaker_anonymization.py index 5c82113..f156eaf 100644 --- a/anonymization/modules/speaker_embeddings/speaker_anonymization.py +++ b/anonymization/modules/speaker_embeddings/speaker_anonymization.py @@ -1,8 +1,10 @@ +import logging from pathlib import Path from .anonymization.base_anon import BaseAnonymizer from .speaker_embeddings import SpeakerEmbeddings +logger = logging.getLogger(__name__) class SpeakerAnonymization: @@ -38,14 +40,14 @@ def anonymize_embeddings(self, speaker_embeddings, dataset_name): self.force_compute: # if there are already anonymized speaker embeddings from this model and the computation is not forced, # simply load them - print('No computation of anonymized embeddings necessary; load existing anonymized speaker embeddings ' + logger.info('No computation of anonymized embeddings necessary; load existing anonymized speaker embeddings ' 'instead...') anon_embeddings = SpeakerEmbeddings(vec_type=self.vec_type, emb_level=self.emb_level, device=self.device) anon_embeddings.load_vectors(dataset_results_dir) return anon_embeddings else: # otherwise, create new anonymized speaker embeddings - print('Anonymize speaker embeddings...') + logger.info('Anonymize speaker embeddings...') anon_embeddings = self.anonymizer.anonymize_embeddings(speaker_embeddings, emb_level=self.emb_level) if self.save_intermediate: @@ -58,5 +60,5 @@ def _load_anonymizer(self, settings: dict): 'The anonymizer must be an instance of BaseAnonymizer, or a ' \ f'subclass of it, but received an instance of {type(anon_method)}' - print(f'Model type of anonymizer: {type(anon_method).__name__}') + logger.info(f'Model type of anonymizer: {type(anon_method).__name__}') return anon_method diff --git a/anonymization/modules/speaker_embeddings/speaker_extraction.py b/anonymization/modules/speaker_embeddings/speaker_extraction.py index 9600f1f..507781f 100644 --- a/anonymization/modules/speaker_embeddings/speaker_extraction.py +++ b/anonymization/modules/speaker_embeddings/speaker_extraction.py @@ -1,3 +1,4 @@ +import logging from tqdm import tqdm from pathlib import Path import torch @@ -14,7 +15,7 @@ from utils import read_kaldi_format set_start_method('spawn', force=True) - +logger = logging.getLogger(__name__) class SpeakerExtraction: @@ -57,12 +58,12 @@ def extract_speakers(self, dataset_path, dataset_name=None, emb_level=None): speaker_embeddings = SpeakerEmbeddings(vec_type=self.vec_type, emb_level='utt', device=self.devices[0]) if (dataset_results_dir / 'speaker_vectors.pt').exists() and not self.force_compute: - print('No speaker extraction necessary; load existing embeddings instead...') + logger.info('No speaker extraction necessary; load existing embeddings instead...') speaker_embeddings.load_vectors(dataset_results_dir) # assume the loaded vectors are computed according to the setting in config speaker_embeddings.emb_level = emb_level else: - print(f'Extract embeddings of {len(wav_scp)} utterances') + logger.info(f'Extract embeddings of {len(wav_scp)} utterances') speaker_embeddings.new = True if self.n_processes > 1: @@ -126,7 +127,7 @@ def extraction_job(data): try: spk_embs = [extractor.extract_vector(audio=norm_wave, sr=fs) for extractor in speaker_extractors] except RuntimeError as e: - print(f'Runtime error: {utt}, {signal.shape}, {norm_wave.shape}') + logger.warn(f'Runtime error: {utt}, {signal.shape}, {norm_wave.shape}') continue if len(spk_embs) == 1: diff --git a/anonymization/modules/text/speech_recognition.py b/anonymization/modules/text/speech_recognition.py index 3635426..8992b8c 100644 --- a/anonymization/modules/text/speech_recognition.py +++ b/anonymization/modules/text/speech_recognition.py @@ -1,6 +1,7 @@ from tqdm import tqdm from tqdm.contrib.concurrent import process_map import time +import logging from torch.multiprocessing import set_start_method from itertools import cycle, repeat import numpy as np @@ -11,7 +12,7 @@ from utils import read_kaldi_format set_start_method('spawn', force=True) - +logger = logging.getLogger(__name__) class SpeechRecognition: @@ -49,13 +50,13 @@ def recognize_speech(self, dataset_path, dataset_name=None, utterance_list=None) texts.load_text(in_dir=dataset_results_dir) if len(texts) == len(utt2spk): - print('No speech recognition necessary; load existing text instead...') + logger.info('No speech recognition necessary; load existing text instead...') else: if len(texts) > 0: - print(f'No speech recognition necessary for {len(texts)} of {len(utt2spk)} utterances') + logger.info(f'No speech recognition necessary for {len(texts)} of {len(utt2spk)} utterances') # otherwise, recognize the speech dataset_results_dir.mkdir(exist_ok=True, parents=True) - print(f'Recognize speech of {len(utt2spk)} utterances...') + logger.info(f'Recognize speech of {len(utt2spk)} utterances...') wav_scp = read_kaldi_format(dataset_path / 'wav.scp') utterances = [] @@ -86,7 +87,7 @@ def recognize_speech(self, dataset_path, dataset_name=None, utterance_list=None) end = time.time() total_time = round(end - start, 2) - print(f'Total time for speech recognition: {total_time} seconds ({round(total_time / 60, 2)} minutes / ' + logger.info(f'Total time for speech recognition: {total_time} seconds ({round(total_time / 60, 2)} minutes / ' f'{round(total_time / 60 / 60, 2)} hours)') texts = self._combine_texts(main_text_instance=texts, additional_text_instances=new_texts) diff --git a/anonymization/modules/tts/IMSToucan/InferenceInterfaces/AnonFastSpeech2.py b/anonymization/modules/tts/IMSToucan/InferenceInterfaces/AnonFastSpeech2.py index 8ecd359..a2ba7c3 100644 --- a/anonymization/modules/tts/IMSToucan/InferenceInterfaces/AnonFastSpeech2.py +++ b/anonymization/modules/tts/IMSToucan/InferenceInterfaces/AnonFastSpeech2.py @@ -1,5 +1,6 @@ import itertools import os +import logging import librosa.display as lbd import matplotlib.pyplot as plt @@ -15,6 +16,7 @@ from ..Preprocessing.TextFrontend import get_language_id from ..TrainingInterfaces.Spectrogram_to_Embedding.StyleEmbedding import StyleEmbedding +logger = logging.getLogger(__name__) class AnonFastSpeech2(torch.nn.Module): @@ -174,7 +176,7 @@ def read_to_file(self, for (text, durations, pitch, energy) in itertools.zip_longest(text_list, dur_list, pitch_list, energy_list): if text.strip() != "": if not silent: - print("Now synthesizing: {}".format(text)) + logger.info("Now synthesizing: {}".format(text)) if wav is None: if durations is not None: durations = durations.to(self.device) diff --git a/anonymization/modules/tts/IMSToucan/Utility/utils.py b/anonymization/modules/tts/IMSToucan/Utility/utils.py index 5fa60eb..9809d58 100644 --- a/anonymization/modules/tts/IMSToucan/Utility/utils.py +++ b/anonymization/modules/tts/IMSToucan/Utility/utils.py @@ -4,9 +4,10 @@ import os from abc import ABC - +import logging import torch +logger = logging.getLogger(__name__) def cumsum_durations(durations): out = [0] @@ -39,11 +40,11 @@ def get_most_recent_checkpoint(checkpoint_dir, verbose=True): if el.endswith(".pt") and el != "best.pt": checkpoint_list.append(int(el.split(".")[0].split("_")[1])) if len(checkpoint_list) == 0: - print("No previous checkpoints found, cannot reload.") + logger.info("No previous checkpoints found, cannot reload.") return None checkpoint_list.sort(reverse=True) if verbose: - print("Reloading checkpoint_{}.pt".format(checkpoint_list[0])) + logger.info("Reloading checkpoint_{}.pt".format(checkpoint_list[0])) return os.path.join(checkpoint_dir, "checkpoint_{}.pt".format(checkpoint_list[0])) diff --git a/anonymization/modules/tts/IMSToucan/UtteranceCloner.py b/anonymization/modules/tts/IMSToucan/UtteranceCloner.py index 6d05e8e..df02d78 100644 --- a/anonymization/modules/tts/IMSToucan/UtteranceCloner.py +++ b/anonymization/modules/tts/IMSToucan/UtteranceCloner.py @@ -1,3 +1,4 @@ +import logging import soundfile as sf import torch from torch.optim import SGD @@ -10,6 +11,7 @@ from .TrainingInterfaces.Text_to_Spectrogram.FastSpeech2.EnergyCalculator import EnergyCalculator from .TrainingInterfaces.Text_to_Spectrogram.FastSpeech2.PitchCalculator import Parselmouth +logger = logging.getLogger(__name__) class UtteranceCloner: @@ -59,7 +61,7 @@ def extract_prosody(self, try: norm_wave = self.ap.audio_to_wave_tensor(normalize=True, audio=wave) except ValueError: - print('Something went wrong, the reference wave might be too short.') + logger.error('Something went wrong, the reference wave might be too short.') raise RuntimeError with torch.inference_mode(): diff --git a/anonymization/modules/tts/ims_tts.py b/anonymization/modules/tts/ims_tts.py index a5a4cc6..edcabb1 100644 --- a/anonymization/modules/tts/ims_tts.py +++ b/anonymization/modules/tts/ims_tts.py @@ -1,8 +1,10 @@ import torch import resampy +import logging from .IMSToucan.InferenceInterfaces.AnonFastSpeech2 import AnonFastSpeech2 +logger = logging.getLogger(__name__) class ImsTTS: @@ -36,7 +38,7 @@ def read_text(self, text, speaker_embedding, text_is_phones=True, duration=None, if i > 30: break if i > 0: - print(f'Synthesized utt in {i} takes') + logger.info(f'Synthesized utt in {i} takes') # start and end silence are computed for 16000, so we have to adapt this to different output sr factor = self.output_sr // 16000 diff --git a/anonymization/modules/tts/speech_synthesis.py b/anonymization/modules/tts/speech_synthesis.py index 911e68c..17d996a 100644 --- a/anonymization/modules/tts/speech_synthesis.py +++ b/anonymization/modules/tts/speech_synthesis.py @@ -1,6 +1,7 @@ from tqdm import tqdm import soundfile import time +import logging from torch.multiprocessing import Pool, set_start_method from itertools import repeat @@ -8,7 +9,7 @@ from utils import create_clean_dir set_start_method('spawn', force=True) - +logger = logging.getLogger(__name__) class SpeechSynthesis: @@ -52,7 +53,7 @@ def synthesize_speech(self, dataset_name, texts, speaker_embeddings, prosody=Non if wav_file.stem in texts.utterances} if len(already_synthesized_utts): - print(f'No synthesis necessary for {len(already_synthesized_utts)} of {len(texts)} utterances...') + logger.info(f'No synthesis necessary for {len(already_synthesized_utts)} of {len(texts)} utterances...') texts.remove_instances(list(already_synthesized_utts.keys())) if self.save_output: wavs = already_synthesized_utts @@ -63,7 +64,7 @@ def synthesize_speech(self, dataset_name, texts, speaker_embeddings, prosody=Non wavs[utt] = wav if texts: - print(f'Synthesize {len(texts)} utterances...') + logger.info(f'Synthesize {len(texts)} utterances...') if self.force_compute or not dataset_results_dir.exists(): create_clean_dir(dataset_results_dir) @@ -84,7 +85,7 @@ def synthesize_speech(self, dataset_name, texts, speaker_embeddings, prosody=Non utt_prosody_dict = {} instances.append((text, utt, speaker_embedding, utt_prosody_dict)) except KeyError: - print(f'Key error at {utt}') + logger.warn(f'Key error at {utt}') continue wavs.update(synthesis_job(instances=instances, tts_model=self.tts_models[0], out_dir=dataset_results_dir, sleep=0, text_is_phones=text_is_phones, @@ -111,7 +112,7 @@ def synthesize_speech(self, dataset_name, texts, speaker_embeddings, prosody=Non utt_prosody_dict = {} job_instances.append((text, utt, speaker_embedding, utt_prosody_dict)) except KeyError: - print(f'Key error at {utt}') + logger.warn(f'Key error at {utt}') continue instances.append(job_instances) diff --git a/anonymization/pipelines/sttts_pipeline.py b/anonymization/pipelines/sttts_pipeline.py index 10f5a1b..9226ace 100644 --- a/anonymization/pipelines/sttts_pipeline.py +++ b/anonymization/pipelines/sttts_pipeline.py @@ -1,5 +1,6 @@ from pathlib import Path from datetime import datetime +import logging from anonymization.modules import ( SpeechRecognition, @@ -12,6 +13,7 @@ import typing from utils import prepare_evaluation_data, save_yaml +logger = logging.getLogger(__name__) class STTTSPipeline: def __init__(self, config: dict, force_compute: bool, devices: list): @@ -110,7 +112,7 @@ def run_anonymization_pipeline( anon_wav_scps = {} for i, (dataset_name, dataset_path) in enumerate(datasets.items()): - print(f"{i + 1}/{len(datasets)}: Processing {dataset_name}...") + logger.info(f"{i + 1}/{len(datasets)}: Processing {dataset_name}...") # Step 1: Recognize speech, extract speaker embeddings, extract prosody texts = self.speech_recognition.recognize_speech( dataset_path=dataset_path, dataset_name=dataset_name @@ -147,9 +149,10 @@ def run_anonymization_pipeline( emb_level=anon_embeddings.emb_level, ) anon_wav_scps[dataset_name] = wav_scp - print("Done") + logger.info("Anonymization pipeline completed.") if prepare_results: + logger.info("Preparing results according to the Kaldi format.") if self.speaker_anonymization: anon_vectors_path = self.speaker_anonymization.results_dir else: diff --git a/evaluation/privacy/asv/asv.py b/evaluation/privacy/asv/asv.py index 6792d44..66d8080 100644 --- a/evaluation/privacy/asv/asv.py +++ b/evaluation/privacy/asv/asv.py @@ -1,5 +1,6 @@ # This code is partly based on # https://github.com/speechbrain/speechbrain/blob/develop/recipes/VoxCeleb/SpeakerRec/speaker_verification_plda.py +import logging from pathlib import Path import torch from speechbrain.utils.metric_stats import EER @@ -10,6 +11,7 @@ from anonymization.modules.speaker_embeddings import SpeakerExtraction from utils import write_table, read_kaldi_format, save_kaldi_format +logger = logging.getLogger(__name__) class ASV: @@ -138,7 +140,7 @@ def compute_distances(self, enrol_vectors, enrol_ids, test_vectors, test_ids): if self.plda_model_dir.exists(): self.plda = PLDAModel(train_embeddings=None, results_path=self.plda_model_dir) else: - print('Train PLDA model...') + logger.info('Train PLDA model...') plda_data_dir = self.plda_train_data_dir if self.plda_anon: @@ -146,7 +148,7 @@ def compute_distances(self, enrol_vectors, enrol_ids, test_vectors, test_ids): self.select_data_for_plda(all_data_dir=self.plda_train_data_dir, selected_data_dir=self.model_dir.parent, out_dir=plda_data_dir) - print(f'Using data under {plda_data_dir}') + logger.info(f'Using data under {plda_data_dir}') train_dict = self.extractor.extract_speakers(dataset_path=plda_data_dir, emb_level='utt') self.plda = PLDAModel(train_embeddings=train_dict, results_path=self.plda_model_dir) diff --git a/evaluation/privacy/asv/asv_train/libri_prepare.py b/evaluation/privacy/asv/asv_train/libri_prepare.py index 65be867..ab1630e 100644 --- a/evaluation/privacy/asv/asv_train/libri_prepare.py +++ b/evaluation/privacy/asv/asv_train/libri_prepare.py @@ -180,7 +180,7 @@ def _get_utt_split_lists( train_lst = [] dev_lst = [] - print("Getting file list...") + logger.debug("Getting file list...") for data_folder in data_folders: if anon: suffix = 'wav' @@ -212,14 +212,14 @@ def _get_utt_split_lists( selected_spk = {} #select the number of speakers if num_spk != 'ALL': - print("selected %s speakers for training"%num_spk) + logger.debug("selected %s speakers for training"%num_spk) selected_spks_pure = random.sample(spks_pure,int(num_spk)) for k,v in spk_files.items(): if k.split('-')[0] in selected_spks_pure: selected_spk[k] = v #selected_spk = dict(random.sample(spk_files.items(), int(num_spk))) elif num_spk == 'ALL': - print("selected all speakers for training") + logger.debug("selected all speakers for training") selected_spk = spk_files else: sys.exit("invalid $utt_spk value") @@ -228,7 +228,7 @@ def _get_utt_split_lists( if num_utt != 'ALL': # select the number of utterances for each speaker-sess-id if utt_selected_ways == 'spk-sess': - print("selected %s utterances for each selected speaker-sess-id" % num_utt) + logger.info("selected %s utterances for each selected speaker-sess-id" % num_utt) for spk in selected_spk: if len(selected_spk[spk]) >= int(num_utt): selected_list.extend(random.sample(selected_spk[spk], int(num_utt))) @@ -236,7 +236,7 @@ def _get_utt_split_lists( selected_list.extend(selected_spk[spk]) elif utt_selected_ways == 'spk-random': - print("randomly selected %s utterances for each selected speaker-id" % num_utt) + logger.info("randomly selected %s utterances for each selected speaker-id" % num_utt) selected_spks_pure = {} for k, v in selected_spk.items(): spk_pure = k.split('-')[0] @@ -253,7 +253,7 @@ def _get_utt_split_lists( selected_list.extend(selected_spk[spk]) elif utt_selected_ways == 'spk-diverse-sess': - print("diversely selected %s utterances for each selected speaker-id" % num_utt) + logger.info("diversely selected %s utterances for each selected speaker-id" % num_utt) selected_spks_pure = {} for k, v in selected_spk.items(): spk_pure = k.split('-')[0] @@ -273,7 +273,7 @@ def _get_utt_split_lists( elif num_utt == 'ALL': - print("selected all utterances for each selected speaker") + logger.info("selected all utterances for each selected speaker") for value in selected_spk.values(): for v in value: @@ -297,8 +297,8 @@ def _get_utt_split_lists( full = f'Full training set:{full_utt}' used = f'Used for training:{len(selected_list)}' - print(full) - print(used) + logger.debug(full) + logger.debug(used) split = int(0.01 * split_ratio[0] * len(selected_list)) train_snts = selected_list[:split] @@ -417,7 +417,7 @@ def prepare_csv(seg_dur, wav_lst, csv_file, random_segment=False, amp_th=0): ] entry.append(csv_line) - print(f'Skipped {len(problematic_wavs)} invalid audios') + logger.info(f'Skipped {len(problematic_wavs)} invalid audios') csv_output = csv_output + entry # Writing the csv lines diff --git a/evaluation/privacy/asv/metrics/cllr.py b/evaluation/privacy/asv/metrics/cllr.py index c11ba2d..40ba800 100644 --- a/evaluation/privacy/asv/metrics/cllr.py +++ b/evaluation/privacy/asv/metrics/cllr.py @@ -1,9 +1,11 @@ +import logging import numpy as np from scipy.special import expit from .helpers import optimal_llr from .utils.io import read_targets_and_nontargets +logger = logging.getLogger(__name__) def compute_cllr(score_file, key_file, compute_eer=False): # Computing Cllr and min Cllr for binary decision classifiers @@ -15,11 +17,9 @@ def compute_cllr(score_file, key_file, compute_eer=False): else: cllr_min = min_cllr(tar, non) - print("Cllr (min/act): %.3f/%.3f" % (cllr_min, cllr_act)) + logger.info("Cllr (min/act): %.3f/%.3f" % (cllr_min, cllr_act)) if compute_eer: - print("ROCCH-EER: %2.3f%%" % (100*eer)) - - print("") + logger.info("ROCCH-EER: %2.3f%%" % (100*eer)) def cllr(tar_llrs, nontar_llrs): diff --git a/evaluation/privacy/asv/metrics/linkability.py b/evaluation/privacy/asv/metrics/linkability.py index 0d0e057..91529e2 100644 --- a/evaluation/privacy/asv/metrics/linkability.py +++ b/evaluation/privacy/asv/metrics/linkability.py @@ -1,8 +1,10 @@ +import logging import numpy as np from .utils.visualization import draw_linkability_scores from .utils.io import read_targets_and_nontargets +logger = logging.getLogger(__name__) def compute_linkability(score_file, key_file, omega=1.0, use_draw_scores=False, output_file=None): # Computing the global linkability measure for a list of linkage function score @@ -20,8 +22,7 @@ def compute_linkability(score_file, key_file, omega=1.0, use_draw_scores=False, output_file = "linkability_" + score_file draw_linkability_scores(mated_scores, non_mated_scores, Dsys, D, bin_centers, bin_edges, str(output_file)) - print("linkability: %f" % (Dsys)) - print("") + logger.info("linkability: %f" % (Dsys)) def linkability(mated_scores, non_mated_scores, omega=1): diff --git a/evaluation/privacy/asv/metrics/utils/zebra_plots.py b/evaluation/privacy/asv/metrics/utils/zebra_plots.py index 81757d5..a59095a 100644 --- a/evaluation/privacy/asv/metrics/utils/zebra_plots.py +++ b/evaluation/privacy/asv/metrics/utils/zebra_plots.py @@ -1,3 +1,4 @@ +import logging import numpy as np from matplotlib._cm import datad import matplotlib.pyplot as mpl @@ -7,6 +8,7 @@ from .plo_plots import PriorLogOddsPlots from .io import read_targets_and_nontargets +logger = logging.getLogger(__name__) __author__ = "Andreas Nautsch" __email__ = "nautsch@eurecom.fr" @@ -84,10 +86,9 @@ def zebra_framework(plo_plot, scr_path, key_path, label='ZEBRA profile', str_max_abs_llr = '0' # print outs - print('') - print("%s" % label) - print("Population: %s bit" % str_dece) - print("Individual: %s (%s)" % (str_max_abs_llr, cat_tag)) + logger.info("%s" % label) + logger.info("Population: %s bit" % str_dece) + logger.info("Individual: %s (%s)" % (str_max_abs_llr, cat_tag)) # Creating log-odds plots if color_min is not None: @@ -99,7 +100,7 @@ def zebra_framework(plo_plot, scr_path, key_path, label='ZEBRA profile', # DCF if dcf_pot: plo_plot.plot_dcf(color_min=color_min, style_min=style_min, color_act=color_act, style_act=style_act) - print("1 - min Cllr: %.3f (0 is good)" % plo_plot.get_delta_DCF()) + logger.info("1 - min Cllr: %.3f (0 is good)" % plo_plot.get_delta_DCF()) plo_plot.add_legend_entry(legend_entry) diff --git a/evaluation/utility/asr/pyscripts/utils/plot_sinc_filters.py b/evaluation/utility/asr/pyscripts/utils/plot_sinc_filters.py index 6ca071f..8fc34b1 100755 --- a/evaluation/utility/asr/pyscripts/utils/plot_sinc_filters.py +++ b/evaluation/utility/asr/pyscripts/utils/plot_sinc_filters.py @@ -12,6 +12,7 @@ """ import argparse +import logging import sys from pathlib import Path @@ -19,6 +20,7 @@ import numpy as np import torch +logger = logging.getLogger(__name__) def get_parser(): """Construct the parser.""" @@ -141,7 +143,7 @@ def plot_filtergraph( ax.fill_between(x, f_mins, f_maxs, color="green", alpha=0.3) ax.legend(loc="upper left", prop={"size": 15}) plt.savefig(img_path, bbox_inches="tight") - print("Plotted %s" % img_path) + logger.debug("Plotted %s" % img_path) def plot_filter_kernels(filters: torch.Tensor, sample_rate: int, args): @@ -154,7 +156,7 @@ def plot_filter_kernels(filters: torch.Tensor, sample_rate: int, args): """ from espnet2.layers.sinc_conv import SincConv - print( + logger.warn( "When plotting filter kernels, make sure the script has the" " correct SincConv settings (currently hard-coded)." ) @@ -202,7 +204,7 @@ def plot_filter_kernels(filters: torch.Tensor, sample_rate: int, args): img_name = "filter_pre_kernel_%s.%s" % (str(i).zfill(2), args.filetype) img_path = str(args.out_folder / img_name) plt.savefig(img_path, bbox_inches="tight") - print("Plotted %s" % img_path) + logger.debug("Plotted %s" % img_path) kernel = kernels[i][0] plt.clf() @@ -212,7 +214,7 @@ def plot_filter_kernels(filters: torch.Tensor, sample_rate: int, args): img_name = "filter_kernel_%s.%s" % (str(i).zfill(2), args.filetype) img_path = str(args.out_folder / img_name) plt.savefig(img_path, bbox_inches="tight") - print("Plotted %s" % img_path) + logger.debug("Plotted %s" % img_path) plt.clf() plt.xlabel("kernel index") @@ -221,7 +223,7 @@ def plot_filter_kernels(filters: torch.Tensor, sample_rate: int, args): img_name = "filter_kernel_both_%s.%s" % (str(i).zfill(2), args.filetype) img_path = str(args.out_folder / img_name) plt.savefig(img_path, bbox_inches="tight") - print("Plotted %s" % img_path) + logger.debug("Plotted %s" % img_path) y = np.zeros_like(x_f) y[F_mins[i] : F_maxs[i]] = 1.0 @@ -230,7 +232,7 @@ def plot_filter_kernels(filters: torch.Tensor, sample_rate: int, args): img_name = "filter_freq_%s.%s" % (str(i).zfill(2), args.filetype) img_path = str(args.out_folder / img_name) plt.savefig(img_path, bbox_inches="tight") - print("Plotted %s" % img_path) + logger.debug("Plotted %s" % img_path) pre_y = np.zeros_like(x_f) pre_y[pre_F_mins[i] : pre_F_maxs[i]] = 1.0 @@ -240,7 +242,7 @@ def plot_filter_kernels(filters: torch.Tensor, sample_rate: int, args): img_name = "filter_freq_both_%s.%s" % (str(i).zfill(2), args.filetype) img_path = args.out_folder / img_name plt.savefig(img_path, bbox_inches="tight") - print("Plotted %s" % img_path) + logger.debug("Plotted %s" % img_path) plt.clf() filters = [32, 71, 113, 126] @@ -259,7 +261,7 @@ def plot_filter_kernels(filters: torch.Tensor, sample_rate: int, args): img_path = str(args.out_folder / img_name) plt.savefig(img_path, bbox_inches="tight") plt.close(fig) - print("Plotted %s" % img_path) + logger.debug("Plotted %s" % img_path) def plot_filters(indices, filename, F_mins, F_maxs, output_folder): @@ -282,7 +284,7 @@ def plot_filters(indices, filename, F_mins, F_maxs, output_folder): plt.plot(x, y) img_path = str(output_folder / filename) plt.savefig(img_path, bbox_inches="tight") - print("Plotted %s" % img_path) + logger.debug("Plotted %s" % img_path) def main(argv): diff --git a/evaluation/utility/voice_distinctiveness/deid_gvd.py b/evaluation/utility/voice_distinctiveness/deid_gvd.py index fb2c9df..6733756 100644 --- a/evaluation/utility/voice_distinctiveness/deid_gvd.py +++ b/evaluation/utility/voice_distinctiveness/deid_gvd.py @@ -1,3 +1,4 @@ +import logging from pathlib import Path import numpy as np import pandas as pd @@ -11,6 +12,7 @@ from evaluation.privacy import ASV from evaluation.privacy.asv.metrics.helpers import optimal_llr +logger = logging.getLogger(__name__) class VoiceDistinctiveness: @@ -98,7 +100,7 @@ def _select_utterances(self, spk2utt_x, spk2utt_y): y = [(spk, utt) for spk, utt_list in spk2utt_y.items() for utt in utt_list] else: - print("choose %d utterances for each spk to create trial" % int(self.num_per_spk)) + logger.info("choose %d utterances for each spk to create trial" % int(self.num_per_spk)) x = [(spk, utt) for spk, utt_list in spk2utt_x.items() for utt in random.sample(utt_list, k=min(self.num_per_spk, len(utt_list)))] y = [(spk, utt) for spk, utt_list in spk2utt_y.items() diff --git a/run_anonymization.py b/run_anonymization.py index 14085d3..49cf865 100644 --- a/run_anonymization.py +++ b/run_anonymization.py @@ -1,3 +1,4 @@ +import logging from pathlib import Path from argparse import ArgumentParser import torch @@ -29,5 +30,7 @@ devices.append(torch.device('cpu')) with torch.no_grad(): + logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s- %(levelname)s - %(message)s') + logging.info(f'Running pipeline: {config["pipeline"]}') pipeline = PIPELINES[config['pipeline']](config=config, force_compute=args.force_compute, devices=devices) pipeline.run_anonymization_pipeline(datasets) diff --git a/run_evaluation.py b/run_evaluation.py index d81ab07..13b4976 100644 --- a/run_evaluation.py +++ b/run_evaluation.py @@ -1,4 +1,5 @@ # We need to set CUDA_VISIBLE_DEVICES before we import Pytorch so we will read all arguments directly on startup +import logging import os from argparse import ArgumentParser from pathlib import Path @@ -74,7 +75,7 @@ def find_asv_model_checkpoint(model_dir): def asv_train(train_params, output_dir): - print(f'Train ASV model: {output_dir}') + logging.info(f'Train ASV model: {output_dir}') hparams = { 'pretrained_path': str(train_params['pretrained_model']), 'batch_size': train_params['batch_size'], @@ -103,7 +104,7 @@ def asv_train(train_params, output_dir): def asv_eval(eval_datasets, eval_data_dir, params, device, anon_data_suffix, model_dir=None): model_dir = model_dir or find_asv_model_checkpoint(params['model_dir']) - print(f'Use ASV model for evaluation: {model_dir}') + logging.info(f'Use ASV model for evaluation: {model_dir}') save_dir = params['evaluation']['results_dir'] / f'{params["evaluation"]["distance"]}_out' asv = ASV(model_dir=model_dir, device=device, score_save_dir=save_dir, distance=params['evaluation']['distance'], @@ -121,7 +122,7 @@ def asv_eval(eval_datasets, eval_data_dir, params, device, anon_data_suffix, mod EER = asv.eer_compute(enrol_dir=eval_data_dir / enroll_name, test_dir=eval_data_dir / test_name, trial_runs_file=eval_data_dir / trial / 'trials') - print(f'{enroll_name}-{test_name}: {scenario.upper()}-EER={EER}') + logging.info(f'{enroll_name}-{test_name}: {scenario.upper()}-EER={EER}') trials_info = trial.split('_') gender = trials_info[3] if 'common' in trial: @@ -131,7 +132,7 @@ def asv_eval(eval_datasets, eval_data_dir, params, device, anon_data_suffix, mod 'trial': 'original' if scenario[1] == 'o' else 'anon', 'EER': round(EER * 100, 3)}) results_df = pd.DataFrame(results) - print(results_df) + logging.info(results_df) results_df.to_csv(save_dir / 'results.csv') @@ -164,7 +165,7 @@ def get_similarity_matrix(vd_model, out_dir, exp_name, segments_folder): **vd_settings) vd_orig, vd_anon = None, None save_dir_orig, save_dir_anon = None, None - print(f'Use ASV model {spk_ext_model_dir} for computing voice similarities of original and anonymized speakers') + logging.info(f'Use ASV model {spk_ext_model_dir} for computing voice similarities of original and anonymized speakers') elif 'orig_model_dir' in params['asv_params'] and 'anon_model_dir' in params['asv_params']: # use different ASV models for original and anon speaker spaces spk_ext_model_dir_orig = find_asv_model_checkpoint(params['asv_params']['orig_model_dir']) @@ -176,7 +177,7 @@ def get_similarity_matrix(vd_model, out_dir, exp_name, segments_folder): vd_anon = VoiceDistinctiveness(spk_ext_model_dir=spk_ext_model_dir_anon, score_save_dir=save_dir_anon, **vd_settings) vd = None - print(f'Use ASV model {spk_ext_model_dir_orig} for computing voice similarities of original speakers and ASV ' + logging.info(f'Use ASV model {spk_ext_model_dir_orig} for computing voice similarities of original speakers and ASV ' f'model {spk_ext_model_dir_anon} for voice similarities of anonymized speakers') else: raise ValueError('GVD: You either need to specify one "model_dir" for both original and anonymized data or ' @@ -208,11 +209,11 @@ def get_similarity_matrix(vd_model, out_dir, exp_name, segments_folder): gvd_value = vd.gvd(oo_sim, pp_sim) if vd else vd_orig.gvd(oo_sim, pp_sim) with open(trial_out_dir / 'gain_of_voice_distinctiveness', 'w') as f: f.write(str(gvd_value)) - print(f'{trial} gvd={gvd_value}') + logging.info(f'{trial} gvd={gvd_value}') def asr_train(params: dict, libri_dir: Path, model_name: str, model_dir: Path, anon_data_suffix: str): - print(f'Train ASR model: {model_dir}') + logging.info(f'Train ASR model: {model_dir}') exp_dir = Path('exp', model_name) libri_dir = Path(libri_dir).expanduser() # could be relative to userdir ngpu = min(params.get('num_gpus', 0), torch.cuda.device_count()) # cannot use more gpus than available @@ -250,7 +251,7 @@ def asr_train(params: dict, libri_dir: Path, model_name: str, model_dir: Path, a cwd = Path.cwd() os.chdir('evaluation/utility/asr') # espnet recipe needs several files at specific relative positions - print(Path.cwd()) + logging.debug(Path.cwd()) subprocess.run(['./asr.sh'] + train_params) subprocess.run(['ln', '-srf', exp_dir, model_dir]) @@ -258,7 +259,7 @@ def asr_train(params: dict, libri_dir: Path, model_name: str, model_dir: Path, a def asr_eval_sh(eval_datasets: List[str], eval_data_dir: Path, params, model_path, libri_dir, anon_data_suffix): - print(f'Use ASR model for evaluation: {model_path}') + logging.info(f'Use ASR model for evaluation: {model_path}') test_sets = [] for asr_dataset in eval_datasets: @@ -292,6 +293,8 @@ def asr_eval_sh(eval_datasets: List[str], eval_data_dir: Path, params, model_pat if __name__ == '__main__': + logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s- %(levelname)s - %(message)s') + params = parse_yaml(Path('configs', args.config)) device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') @@ -312,9 +315,9 @@ def asr_eval_sh(eval_datasets: List[str], eval_data_dir: Path, params, model_pat asv_train_params = asv_params['training'] if not model_dir.exists() or asv_train_params.get('retrain', True) is True: start_time = time.time() - print('Perform ASV training') + logging.info('Perform ASV training') asv_train(train_params=asv_train_params, output_dir=asv_params['model_dir']) - print("ASV training time: %f min ---" % (float(time.time() - start_time) / 60)) + logging.info("ASV training time: %f min ---" % (float(time.time() - start_time) / 60)) model_dir = scan_checkpoint(model_dir, 'CKPT') if asv_params['vec_type'] == 'xvector': shutil.copy('evaluation/privacy/asv/asv_train/hparams/xvector/hyperparams.yaml', model_dir) @@ -322,11 +325,11 @@ def asr_eval_sh(eval_datasets: List[str], eval_data_dir: Path, params, model_pat shutil.copy('evaluation/privacy/asv/asv_train/hparams/ecapa/hyperparams.yaml', model_dir) if 'evaluation' in asv_params: - print('Perform ASV evaluation') + logging.info('Perform ASV evaluation') start_time = time.time() asv_eval(eval_datasets=eval_data_trials, eval_data_dir=eval_data_dir, params=asv_params, device=device, model_dir=model_dir, anon_data_suffix=anon_suffix) - print("--- EER computation time: %f min ---" % (float(time.time() - start_time) / 60)) + logging.info("--- EER computation time: %f min ---" % (float(time.time() - start_time) / 60)) if 'utility' in eval_steps: if 'asr' in eval_steps['utility']: @@ -344,10 +347,10 @@ def asr_eval_sh(eval_datasets: List[str], eval_data_dir: Path, params, model_pat if not model_dir.exists() or asr_train_params.get('retrain', True) is True: start_time = time.time() - print('Perform ASR training') + logging.info('Perform ASR training') asr_train(params=asr_train_params, libri_dir=libri_dir, model_name=model_name, model_dir=model_dir, anon_data_suffix=anon_suffix) - print("--- ASR training time: %f min ---" % (float(time.time() - start_time) / 60)) + logging.info("--- ASR training time: %f min ---" % (float(time.time() - start_time) / 60)) if 'evaluation' in asr_params: asr_eval_params = asr_params['evaluation'] @@ -358,15 +361,15 @@ def asr_eval_sh(eval_datasets: List[str], eval_data_dir: Path, params, model_pat asr_model_path = model_dir / 'asr_train_asr_transformer_raw_en_bpe5000' start_time = time.time() - print('Perform ASR evaluation') + logging.info('Perform ASR evaluation') asr_eval_sh(eval_datasets=eval_data_asr, eval_data_dir=eval_data_dir, params=asr_eval_params, model_path=asr_model_path, anon_data_suffix=anon_suffix, libri_dir=libri_dir) - print("--- ASR evaluation time: %f min ---" % (float(time.time() - start_time) / 60)) + logging.info("--- ASR evaluation time: %f min ---" % (float(time.time() - start_time) / 60)) if 'gvd' in eval_steps['utility']: gvd_params = params['utility']['gvd'] start_time = time.time() - print('Perform GVD evaluation') + logging.info('Perform GVD evaluation') gvd_eval(eval_datasets=eval_data_trials, eval_data_dir=eval_data_dir, params=gvd_params, device=device, anon_data_suffix=anon_suffix) - print("--- GVD computation time: %f min ---" % (float(time.time() - start_time) / 60)) + logging.info("--- GVD computation time: %f min ---" % (float(time.time() - start_time) / 60)) diff --git a/utils/data_io.py b/utils/data_io.py index 0bd03ee..4dabab0 100644 --- a/utils/data_io.py +++ b/utils/data_io.py @@ -2,7 +2,9 @@ from hyperpyyaml import load_hyperpyyaml, dump_hyperpyyaml import json import pandas as pd +import logging +logger = logging.getLogger(__name__) def read_kaldi_format(filename, return_as_dict=True, values_as_string=False): key_list = [] @@ -56,7 +58,7 @@ def save_kaldi_format(data, filename): #value = value.encode('utf-8') f.write(f'{key} {value}\n') except UnicodeEncodeError: - print(f'{key} {value}') + logger.error(f'{key} {value}') raise