Merge branch 'develop' into mimic-updates

speechbrain · May 23, 2022 · 7b1088f · 7b1088f
2 parents 9461e89 + d6736ad
commit 7b1088f
Show file tree

Hide file tree

Showing 311 changed files with 23,648 additions and 1,567 deletions.
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
@@ -0,0 +1,16 @@
+name: SpeechBrain pre-commit
+
+on: # yamllint disable-line rule:truthy
+  push:
+    branches: [main, develop]
+  pull_request:
+
+jobs:
+  pre-commit:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions/setup-python@v2
+        with:
+          python-version: '3.8'
+      - uses: pre-commit/action@v2.0.3
diff --git a/.github/workflows/pythonapp.yml b/.github/workflows/pythonapp.yml
@@ -1,4 +1,4 @@
-# SpeechBrain lint and unit tests to ease continuous integration
+# SpeechBrain  unit tests to ease continuous integration
 # NOTE: Caching these offers no speedup
 name: SpeechBrain toolkit CI
 
@@ -9,41 +9,28 @@ on:    # yamllint disable-line rule:truthy
     pull_request:
 
 jobs:
-    linters:
-        name: Linters
-        runs-on: ubuntu-latest
-        steps:
-            - uses: actions/checkout@v2
-            - name: Set up Python 3.8
-              uses: actions/setup-python@v1
-              with:
-                  python-version: 3.8
-            - name: Lint dependencies
-              run: |
-                  python -m pip install --upgrade pip
-                  pip install -r lint-requirements.txt
-            - name: Run linters
-              run: |
-                  flake8 . --count --show-source --statistics
-                  black --check --diff .
-                  yamllint .
-
     tests:
         name: Tests
         runs-on: ubuntu-latest
+        strategy:
+            matrix:
+                python-version: [3.7, 3.8, 3.9]
         steps:
             - uses: actions/checkout@v2
-            - name: Set up Python 3.8
+            - name: Set up Python ${{ matrix.python-version }}
               uses: actions/setup-python@v1
               with:
-                  python-version: 3.8
+                  python-version: ${{ matrix.python-version }}
             - name: Install libsndfile
               run: |
                   sudo apt-get install -y libsndfile1
+            - name: Display Python version
+              run: python -c "import sys; print(sys.version)"
             - name: Full dependencies
               run: |
                   pip install -r requirements.txt
                   pip install --editable .
+                  pip install ctc-segmentation
             - name: Unittests with pytest
               run: |
                   pytest tests/unittests

diff --git a/.github/workflows/verify-docs-gen.yml b/.github/workflows/verify-docs-gen.yml
@@ -0,0 +1,26 @@
+name: Verify docs generation
+
+# Runs on pushes to master and all pull requests
+on:    # yamllint disable-line rule:truthy
+    push:
+        branches: [main, develop]
+    pull_request:
+
+jobs:
+    docs:
+        runs-on: ubuntu-latest
+        steps:
+            - uses: actions/checkout@v2
+            - name: Setup Python 3.8
+              uses: actions/setup-python@v2
+              with:
+                  python-version: '3.8'
+            - name: Full dependencies
+              run: |
+                  pip install -r requirements.txt
+                  pip install --editable .
+                  pip install -r docs/docs-requirements.txt
+            - name: Generate docs
+              run: |
+                  cd docs
+                  make html
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -18,7 +18,7 @@ repos:
     hooks:
       - id: black
         types: [python]
-
+        additional_dependencies: ['click==8.0.4']
   - repo: https://gitlab.com/pycqa/flake8.git
     rev: 3.7.9
     hooks:

diff --git a/README.md b/README.md
diff --git a/conftest.py b/conftest.py
@@ -1,3 +1,15 @@
+def pytest_addoption(parser):
+    parser.addoption("--device", action="store", default="cpu")
+
+
+def pytest_generate_tests(metafunc):
+    # This is called for every test. Only get/set command line arguments
+    # if the argument is specified in the list of test "fixturenames".
+    option_value = metafunc.config.option.device
+    if "device" in metafunc.fixturenames and option_value is not None:
+        metafunc.parametrize("device", [option_value])
+
+
 collect_ignore = ["setup.py"]
 try:
     import numba  # noqa: F401

diff --git a/docs/README.md b/docs/README.md
@@ -20,6 +20,9 @@ docstrings. Sphinx natively supports reStructuredText directives.
 Automatically generating documentation based on docstrings is not the
 core of Sphinx. For this, after much searching, we use better-apidoc.
 
+It seems better-apidoc doesn't use autodoc\_mock\_imports so we currently just
+add all extra dependencies to docs-requirements.txt
+
 ## Future work
 
 Besides automatic API documentation, Sphinx will facilitate manual prose

diff --git a/docs/conf.py b/docs/conf.py
@@ -14,8 +14,7 @@
 import sys
 import hyperpyyaml
 
-
-sys.path.insert(0, os.path.abspath("../speechbrain"))
+sys.path.insert(-1, os.path.abspath("../"))
 
 
 # -- Project information -----------------------------------------------------
@@ -69,7 +68,7 @@
 autodoc_default_options = {}
 
 # Autodoc mock extra dependencies:
-autodoc_mock_imports = ["numba", "sklearn"]
+autodoc_mock_imports = []
 
 # Order of API items:
 autodoc_member_order = "bysource"

diff --git a/docs/docs-requirements.txt b/docs/docs-requirements.txt
@@ -1,6 +1,10 @@
 better-apidoc>=0.3.1
-numba
+ctc-segmentation>=1.7.0
+fairseq
+numba>=0.54.1
 recommonmark>=0.7.1
 six
+sklearn
 sphinx-rtd-theme>=0.4.3
 Sphinx>=3.4.3
+transformers==4.13
diff --git a/docs/index.rst b/docs/index.rst
@@ -32,13 +32,13 @@ Referencing SpeechBrain
 .. code-block:: txt
 
   @misc{speechbrain,
-      title={SpeechBrain: A General-Purpose Speech Toolkit}, 
+      title={SpeechBrain: A General-Purpose Speech Toolkit},
       author={Mirco Ravanelli and Titouan Parcollet and Peter Plantinga and Aku Rouhe and Samuele Cornell and Loren Lugosch and Cem Subakan and Nauman Dawalatabad and Abdelwahab Heba and Jianyuan Zhong and Ju-Chieh Chou and Sung-Lin Yeh and Szu-Wei Fu and Chien-Feng Liao and Elena Rastorgueva and François Grondin and William Aris and Hwidong Na and Yan Gao and Renato De Mori and Yoshua Bengio},
       year={2021},
       eprint={2106.04624},
       archivePrefix={arXiv},
       primaryClass={eess.AS}
-}
+  }
 
 
 .. toctree::

diff --git a/docs/installation.md b/docs/installation.md
@@ -44,7 +44,14 @@ Any modification made to the `speechbrain` package will be automatically interpr
 Please, run the following script  from the main folder to make sure your installation is working:
 ```
 pytest tests
-pytest --doctest-modules speechbrain
+```
+
+If you have a GPU, you can run the tests with `pytest tests --device='cuda'`
+
+You can run doctests with:
+
+```
+tests/.run-doctests.sh
 ```
 
 ## Operating Systems
@@ -65,7 +72,7 @@ Anaconda can be installed by simply following [this tutorial](https://docs.anaco
 Once Anaconda is installed, you can create a new environment with:
 
 ```
-conda create --name speechbrain python=3.8
+conda create --name speechbrain python=3.9
 ```
 
 Then, activate it with:

diff --git a/docs/multigpu.md b/docs/multigpu.md
@@ -19,29 +19,77 @@ Important: the batch size for each GPU process will be: `batch_size / Number of
 
 ## Multi-GPU training using Distributed Data Parallel (DDP)
 
-*We would like to advise our users that despite being more efficient, DDP is also
-more prone to exhibit unexpected bugs. Indeed, DDP is quite server dependent and some setups might generate errors with the PyTorch implementation of DDP. If you encounter an issue, please report it on our github with as much information as possible. Indeed, DDP bugs are very challenging to replicate ...*
+DDP implements data parallelism on different processes. This way, the GPUs do not necessarily have to be in the same server. This solution is much more flexible. However, the training routines must be written considering multi-threading.
+
+With SpeechBrain, we put several efforts to make sure the code is compliant with DDP. For instance, to avoid conflicts across processes we develop the `run_on_main` function. It is called when critical operations such as writing a file on disk are performed. It ensures that these operations are run in a single process only. The other processes are waiting until this operation is completed.
+
+Using DDP in speechbrain with a single server (node) is quite easy:
 
-The common pattern for using multi-GPU training with DDP (on a single machine with 4 GPUs):
 ```
 cd recipes/<dataset>/<task>/
 python -m torch.distributed.launch --nproc_per_node=4 experiment.py hyperparams.yaml --distributed_launch --distributed_backend='nccl'
 ```
-Try to switch the DDP backend if you have issues with `nccl`.
 
-To using DDP, you should consider using `torch.distributed.launch` for setting the subprocess with the right Unix variables `local_rank` and `rank`. The `local_rank` variable allows setting the right `device` argument for each DDP subprocess, while the `rank` variable (which is unique for each subprocess) will be used for registering the subprocess rank to the DDP group. In that way, **we can manage multi-GPU training over multiple machines**.
+Where:
+- nproc_per_node must be equal to the number of GPUs.
+- distributed_backend is the type of backend managing multiple processes synchronizations (e.g, 'nccl', 'gloo'). Try to switch the DDP backend if you have issues with nccl.
 
-### With multiple machines (suppose you have 2 servers with 2 GPUs):
-```
+Running DDP over multiple servers (nodes) is quite system dependent. Let's start with a simple example where a user is able to connect to each node directly. If we want to run 2 GPUs on 2 different nodes (i.e total of 4 GPUs), we must do:
+
+```shell
 # Machine 1
 cd recipes/<dataset>/<task>/
-python -m torch.distributed.launch --nproc_per_node=2 --nnodes=2 --node=0 --master_addr machine_1_adress --master_port 5555 experiment.py hyperparams.yaml --distributed_launch --distributed_backend='nccl'
+python -m torch.distributed.launch --nproc_per_node=2 --nnodes=2 --node_rank=0 --master_addr machine_1_adress --master_port 5555 experiment.py hyperparams.yaml --distributed_launch --distributed_backend='nccl'
 
 # Machine 2
 cd recipes/<dataset>/<task>/
-python -m torch.distributed.launch --nproc_per_node=2 --nnodes=2 --node=1 --master_addr machine_1_adress --master_port 5555 experiment.py hyperparams.yaml --distributed_launch --distributed_backend='nccl'
+python -m torch.distributed.launch --nproc_per_node=2 --nnodes=2 --node_rank=1 --master_addr machine_1_adress --master_port 5555 experiment.py hyperparams.yaml --distributed_launch --distributed_backend='nccl'
+```
+
+In this case, Machine 1 will have 2 subprocesses (subprocess1: with local_rank=0, rank=0, and subprocess2: with local_rank=1, rank=1). Machine 2 will have 2 subprocess (subprocess1: with local_rank=0, rank=2, and subprocess2: with local_rank=1, rank=3).
+
+In practice, using `torch.distributed.launch` ensures that the right environment variables are set (`local_rank` and `rank`), so you don't have to bother about it.
+
+Now, let's try to scale this up a bit with a resource manager like SLURM. Here, we will create two scripts:
+- a SBATCH script that will request the node configuration and call the second script.
+- a SRUN script that will call the training on each node.
+
+```shell
+## sbatch.sh
+
+#SBATCH --nodes=2 # We want two nodes (servers)
+#SBATCH --ntasks-per-node=1 # we will run once the next srun per node
+#SBATCH --gres=gpu:4 # we want 4 GPUs per node
+#SBATCH --job-name=SBisSOcool
+#SBATCH --cpus-per-task=10 # the only task will request 10 cores
+#SBATCH --time=20:00:00 # Everything will run for 20H.
+
+# We jump into the submission dir
+cd ${SLURM_SUBMIT_DIR}
+
+# And we call the srun that will run --ntasks-per-node times (once here) per node
+srun srun_script.sh
+```
+
+```shell
+## srun_script.sh
+
+#!/bin/bash
+
+# We jump into the submission dir
+cd ${SLURM_SUBMIT_DIR}
+
+# We activate our env
+conda activate super_cool_sb_env
+
+# We extract the master node address (the one that every node must connects to)
+LISTNODES=`scontrol show hostname $SLURM_JOB_NODELIST`
+MASTER=`echo $LISTNODES | cut -d" " -f1`
+
+# here --nproc_per_node=4 because we want torch.distributed to spawn 4 processes (4 GPUs). Then we give the total amount of nodes requested (--nnodes) and then --node_rank that is necessary to dissociate the node that we are calling this from.
+python -m torch.distributed.launch --nproc_per_node=4 --nnodes=${SLURM_JOB_NUM_NODES} --node_rank=${SLURM_NODEID} --master_addr=${MASTER} --master_port=5555 train.py hparams/myrecipe.yaml
 ```
-Machine 1 will have 2 subprocess (subprocess1: with `local_rank=0`, `rank=0`, and subprocess2: with `local_rank=1`, `rank=1`).
-Machine 2 will have 2 subprocess (subprocess1: with `local_rank=0`, `rank=2`, and subprocess2: with `local_rank=1`, `rank=3`).
 
-In this way, the current DDP group will contain 4 GPUs.
+Note that using DDP on different machines introduces a **communication overhead** that might slow down training (depending on how fast is the connection across the different machines).
+
+We would like to advise our users that despite being more efficient, DDP is also more prone to exhibit unexpected bugs. Indeed, DDP is quite server-dependent and some setups might generate errors with the PyTorch implementation of DDP.  The future version of pytorch will improve the stability of DDP.
diff --git a/lint-requirements.txt b/lint-requirements.txt
@@ -1,4 +1,5 @@
 black==19.10b0
+click==8.0.4
 flake8==3.7.9
 pycodestyle==2.5.0
 pytest==5.4.1

diff --git a/recipes/AISHELL-1/ASR/seq2seq/README.md b/recipes/AISHELL-1/ASR/seq2seq/README.md
@@ -12,8 +12,8 @@ cd ../../Tokenizer
 python train.py hparams/tokenizer_bpe5000.yaml --data_folder=/localscratch/aishell/
 ```
 If not present in the specified data_folder, the dataset will be automatically downloaded there.
-This step is not mandatory. We will use the official tokenizer downloaded from the web if you do not 
-specify a different tokenizer in the speech recognition recipe. 
+This step is not mandatory. We will use the official tokenizer downloaded from the web if you do not
+specify a different tokenizer in the speech recognition recipe.
 
 2- Train the speech recognizer
 ```

diff --git a/recipes/AISHELL-1/ASR/transformer/README.md b/recipes/AISHELL-1/ASR/transformer/README.md
@@ -9,14 +9,16 @@ cd ../../Tokenizer
 python train.py hparams/train_transformer_tokenizer_bpe5000.yaml --data_folder=/localscratch/aishell/
 ```
 If not present in the specified data_folder, the dataset will be automatically downloaded there.
-This step is not mandatory. We will use the official tokenizer downloaded from the web if you do not 
-specify a different tokenizer in the speech recognition recipe. 
+This step is not mandatory. We will use the official tokenizer downloaded from the web if you do not
+specify a different tokenizer in the speech recognition recipe.
 
 2- Train the speech recognizer
 ```
 python train.py hparams/train_ASR_transformer.yaml --data_folder=/localscratch/aishell/
 ```
 
+Make sure to have "transformers" installed if you use the wav2vec2 recipe (see extra-requirements.txt)
+
 # Performance summary
 Results are reported in terms of Character Error Rate (CER).
 
@@ -37,7 +39,7 @@ and about 5 hours minutes on a NVIDIA V100 (32GB) for rain_ASR_transformer_with_
 You can find the pre-trained model with an easy-inference function on HuggingFace
 - https://huggingface.co/speechbrain/asr-transformer-aishell
 - https://huggingface.co/speechbrain/asr-wav2vec2-transformer-aishell
- 
+
 
 # **About SpeechBrain**
 - Website: https://speechbrain.github.io/
@@ -58,4 +60,4 @@ Please, cite SpeechBrain if you use it for your research or business.
   primaryClass={eess.AS},
   note={arXiv:2106.04624}
 }
-```
+```
diff --git a/recipes/AISHELL-1/ASR/transformer/extra_requirements.txt b/recipes/AISHELL-1/ASR/transformer/extra_requirements.txt
@@ -0,0 +1,2 @@
+# For wav2vect recipe (HuggingFace)
+transformers
diff --git a/recipes/AMI/Diarization/README.md b/recipes/AMI/Diarization/README.md
@@ -2,7 +2,7 @@
 This directory contains the scripts for speaker diarization on the AMI corpus (http://groups.inf.ed.ac.uk/ami/corpus/).
 
 ## Extra requirements
-The code requires sklearn as an additional dependency. 
+The code requires sklearn as an additional dependency.
 To install it, type: `pip install sklearn`
 
 ## How to run
@@ -11,7 +11,7 @@ Use the following command to run diarization on AMI corpus.
 
 
 ## Speaker Diarization using Deep Embedding and Spectral Clustering
-The script assumes the pre-trained model. Please refer to speechbrain/recipes/VoxCeleb/SpeakerRec/README.md to know more about the available pre-trained models that can easily be downloaded. You can also train the speaker embedding model from scratch using instructions in the same file. 
+The script assumes the pre-trained model. Please refer to speechbrain/recipes/VoxCeleb/SpeakerRec/README.md to know more about the available pre-trained models that can easily be downloaded. You can also train the speaker embedding model from scratch using instructions in the same file.
 
 
 ## Best performance in terms of Diarization Error Rate (DER).

diff --git a/recipes/AMI/Diarization/experiment.py b/recipes/AMI/Diarization/experiment.py
@@ -557,7 +557,7 @@ def audio_pipeline(wav):
     # We download the pretrained Model from HuggingFace (or elsewhere depending on
     # the path given in the YAML file).
     run_on_main(params["pretrainer"].collect_files)
-    params["pretrainer"].load_collected()
+    params["pretrainer"].load_collected(device=(params["device"]))
     params["embedding_model"].eval()
     params["embedding_model"].to(params["device"])
 

diff --git a/recipes/Aishell1Mix/extra-dependencies.txt b/recipes/Aishell1Mix/extra-dependencies.txt
@@ -0,0 +1,10 @@
+mir-eval==0.6
+pyloudnorm
+soundfile>=0.10.3.post1
+tqdm>=4.46.1
+pysndfx>=0.3.6
+pandas>=1.0.1
+numpy>=1.18.1
+pyloudnorm>=0.1.0
+scipy>=1.4.1
+matplotlib>=3.1.3
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# For wav2vect recipe (HuggingFace)
		transformers