asteroid-team · mpariente · Nov 30, 2020 · Nov 27, 2020 · Nov 28, 2020 · Nov 28, 2020
diff --git a/asteroid/complex_nn.py b/asteroid/complex_nn.py
@@ -165,7 +165,7 @@ def forward(self, mask: ComplexTensor):
 def bound_complex_mask(mask: ComplexTensor, bound_type="tanh"):
     r"""Bound a complex mask, as proposed in [1], section 3.2.
 
-    Valid bound types, for a complex mask $M = |M| ⋅ e^{i φ(M)}$:
+    Valid bound types, for a complex mask :math:`M = |M| ⋅ e^{i φ(M)}`:
 
     - Unbounded ("UBD"): :math:`M_{\mathrm{UBD}} = M`
     - Sigmoid ("BDSS"): :math:`M_{\mathrm{BDSS}} = σ(|M|) e^{i σ(φ(M))}`
@@ -176,8 +176,8 @@ def bound_complex_mask(mask: ComplexTensor, bound_type="tanh"):
             "tanh"/"bdt" (default), "sigmoid"/"bdss" or None/"bdt".
 
     References
-        - [1] : "Phase-aware Speech Enhancement with Deep Complex U-Net",
-          Hyeong-Seok Choi et al. https://arxiv.org/abs/1903.03107
+        [1] : "Phase-aware Speech Enhancement with Deep Complex U-Net",
+        Hyeong-Seok Choi et al. https://arxiv.org/abs/1903.03107
     """
     if bound_type in {"BDSS", "sigmoid"}:
         return on_reim(torch.sigmoid)(mask)

diff --git a/asteroid/data/avspeech_dataset.py b/asteroid/data/avspeech_dataset.py
@@ -120,8 +120,8 @@ class AVSpeechDataset(data.Dataset):
         n_src (int): number of sources.
 
     References
-        - [1]: "Looking to Listen at the Cocktail Party: A Speaker-Independent Audio-Visual
-          Model for Speech Separation" Ephrat et. al https://arxiv.org/abs/1804.03619
+        "Looking to Listen at the Cocktail Party: A Speaker-Independent Audio-Visual
+        Model for Speech Separation" Ephrat et. al https://arxiv.org/abs/1804.03619
     """
 
     dataset_name = "AVSpeech"

diff --git a/asteroid/data/dampvsep_dataset.py b/asteroid/data/dampvsep_dataset.py
@@ -18,8 +18,10 @@ class DAMPVSEPSinglesDataset(torch.utils.data.Dataset):
     Args:
         root_path (str): Root path to DAMP-VSEP dataset.
         task (str): one of ``'enh_vocal'``,``'separation'``.
+
             * ``'enh_vocal'`` for vocal enhanced.
             * ``'separation'`` for vocal and background separation.
+
         split (str):  one of ``'train_english'``, ``'train_singles'``,
             ``'valid'`` and ``'test'``.
             Default to ``'train_singles'``.
@@ -32,21 +34,25 @@ class DAMPVSEPSinglesDataset(torch.utils.data.Dataset):
         segment (float, optional): Duration of segments in seconds,
             Defaults to ``None`` which loads the full-length audio tracks.
         norm (Str, optional): Type of normalisation to use. Default to ``None``
+
             * ``'song_level'`` use mixture mean and std.
             * ```None``` no normalisation
+
         source_augmentations (Callable, optional): Augmentations applied to the sources (only).
             Default to ``None``.
         mixture (str, optional): Whether to use the original mixture with non-linear effects
             or remix sources. Default to original.
+
             * ``'remix'`` for use addition to remix the sources.
             * ``'original'`` for use the original mixture.
 
     .. note:: There are 2 train set available:
-        1- train_english: Uses all English spoken song.
-            Duets are converted into 2 singles.
-            Totalling 9243 performances and 77Hrs.
-        2- train_singles: Uses all singles performances, discarding all duets.
-            Totalling 20660 performances and 149 hrs.
+
+        * train_english: Uses all English spoken song. Duets are converted into 2 singles.
+          Totalling 9243 performances and 77Hrs.
+        * train_singles: Uses all singles performances, discarding all duets.
+          Totalling 20660 performances and 149 hrs.
+
     """
 
     dataset_name = "DAMP-VSEP"

diff --git a/asteroid/data/dns_dataset.py b/asteroid/data/dns_dataset.py
@@ -12,7 +12,7 @@ class DNSDataset(data.Dataset):
         json_dir (str): path to the JSON directory (from the recipe).
 
     References
-        - "The INTERSPEECH 2020 Deep Noise Suppression Challenge: Datasets,
+        "The INTERSPEECH 2020 Deep Noise Suppression Challenge: Datasets,
         Subjective Testing Framework, and Challenge Results", Reddy et al. 2020.
     """
 

diff --git a/asteroid/data/fuss_dataset.py b/asteroid/data/fuss_dataset.py
@@ -6,7 +6,7 @@
 
 
 class FUSSDataset(Dataset):
-    """Dataset class for FUSS[1] tasks.
+    """Dataset class for FUSS [1] tasks.
 
     Args:
         file_list_path (str): Path to the txt (csv) file created at stage 2
@@ -15,7 +15,7 @@ class FUSSDataset(Dataset):
             and sources (useful for SIR, SAR computation). Default: False.
 
     References
-        - [1] Scott Wisdom et al. "What's All the FUSS About Free Universal
+        [1] Scott Wisdom et al. "What's All the FUSS About Free Universal
         Sound Separation Data?", 2020, in preparation.
     """
 

diff --git a/asteroid/data/kinect_wsj.py b/asteroid/data/kinect_wsj.py
@@ -41,7 +41,7 @@ class KinectWsjMixDataset(Wsj0mixDataset):
         n_src (int, optional): Number of sources in the training targets.
 
     References
-        - "Analyzing the impact of speaker localization errors on speech separation
+        "Analyzing the impact of speaker localization errors on speech separation
         for automatic speech recognition", Sunit Sivasankaran et al. 2020.
     """
 

diff --git a/asteroid/data/librimix_dataset.py b/asteroid/data/librimix_dataset.py
@@ -15,24 +15,24 @@
 
 
 class LibriMix(Dataset):
-    """Dataset class for Librimix source separation tasks.
+    """Dataset class for LibriMix source separation tasks.
 
     Args:
-        csv_dir (str): The path to the metatdata file
+        csv_dir (str): The path to the metadata file.
         task (str): One of ``'enh_single'``, ``'enh_both'``, ``'sep_clean'`` or
-            ``'sep_noisy'``.
+            ``'sep_noisy'`` :
 
             * ``'enh_single'`` for single speaker speech enhancement.
             * ``'enh_both'`` for multi speaker speech enhancement.
             * ``'sep_clean'`` for two-speaker clean source separation.
             * ``'sep_noisy'`` for two-speaker noisy source separation.
 
-        sample_rate (int) : The sample rate of the sources and mixtures
-        n_src (int) : The number of sources in the mixture
-        segment (int) : The desired sources and mixtures length in s
+        sample_rate (int) : The sample rate of the sources and mixtures.
+        n_src (int) : The number of sources in the mixture.
+        segment (int) : The desired sources and mixtures length in s.
 
     References
-        - [1] "LibriMix: An Open-Source Dataset for Generalizable Speech Separation",
+        [1] "LibriMix: An Open-Source Dataset for Generalizable Speech Separation",
         Cosentino et al. 2020.
     """
 
@@ -122,11 +122,11 @@ def loaders_from_mini(cls, batch_size=4, **kwargs):
                 instantiate the DatalLoader.
             **kwargs: keyword arguments to pass the `LibriMix`, see `__init__`.
                 The kwargs will be fed to both the training set and validation
-                set
+                set.
 
         Returns:
             train_loader, val_loader: training and validation DataLoader out of
-                `LibriMix` Dataset.
+            `LibriMix` Dataset.
 
         Examples
             >>> from asteroid.data import LibriMix
@@ -152,7 +152,7 @@ def mini_from_download(cls, **kwargs):
 
         Returns:
             train_set, val_set: training and validation instances of
-                `LibriMix` (data.Dataset).
+            `LibriMix` (data.Dataset).
 
         Examples
             >>> from asteroid.data import LibriMix

diff --git a/asteroid/data/musdb18_dataset.py b/asteroid/data/musdb18_dataset.py
@@ -86,7 +86,7 @@ class MUSDB18Dataset(torch.utils.data.Dataset):
         tracks (:obj:`list` of :obj:`Dict`): List of track metadata
 
     References
-        - "The 2018 Signal Separation Evaluation Campaign" Stoter et al. 2018.
+        "The 2018 Signal Separation Evaluation Campaign" Stoter et al. 2018.
     """
 
     dataset_name = "MUSDB18"

diff --git a/asteroid/data/sms_wsj_dataset.py b/asteroid/data/sms_wsj_dataset.py
@@ -60,8 +60,8 @@ class SmsWsjDataset(data.Dataset):
             normalized with the standard deviation of the mixture.
 
     References
-        - "SMS-WSJ: Database, performance measures, and baseline recipe for
-          multi-channel source separation and recognition", Drude et al. 2019
+        "SMS-WSJ: Database, performance measures, and baseline recipe for
+        multi-channel source separation and recognition", Drude et al. 2019
     """
 
     dataset_name = "SMS_WSJ"

diff --git a/asteroid/data/wham_dataset.py b/asteroid/data/wham_dataset.py
@@ -56,7 +56,7 @@ class WhamDataset(data.Dataset):
             normalized with the standard deviation of the mixture.
 
     References
-        - "WHAM!: Extending Speech Separation to Noisy Environments",
+        "WHAM!: Extending Speech Separation to Noisy Environments",
         Wichern et al. 2019
     """
 

diff --git a/asteroid/data/whamr_dataset.py b/asteroid/data/whamr_dataset.py
@@ -73,8 +73,7 @@ class WhamRDataset(data.Dataset):
             separation tasks.
 
     References
-        - "WHAMR!: Noisy and Reverberant Single-Channel Speech Separation",
-          Maciejewski et al. 2020
+        "WHAMR!: Noisy and Reverberant Single-Channel Speech Separation", Maciejewski et al. 2020
     """
 
     dataset_name = "WHAMR"

diff --git a/asteroid/data/wsj0_mix.py b/asteroid/data/wsj0_mix.py
@@ -39,8 +39,8 @@ class Wsj0mixDataset(data.Dataset):
         n_src (int, optional): Number of sources in the training targets.
 
     References
-        - "Deep clustering: Discriminative embeddings for segmentation and
-          separation", Hershey et al. 2015.
+        "Deep clustering: Discriminative embeddings for segmentation and
+        separation", Hershey et al. 2015.
     """
 
     dataset_name = "wsj0-mix"

diff --git a/asteroid/dsp/consistency.py b/asteroid/dsp/consistency.py
@@ -10,10 +10,9 @@ def mixture_consistency(
 ) -> torch.Tensor:
     """Applies mixture consistency to a tensor of estimated sources.
 
-    Args
+    Args:
         mixture (torch.Tensor): Mixture waveform or TF representation.
-        est_sources (torch.Tensor): Estimated sources waveforms or TF
-            representations.
+        est_sources (torch.Tensor): Estimated sources waveforms or TF representations.
         src_weights (torch.Tensor): Consistency weight for each source.
             Shape needs to be broadcastable to `est_source`.
             We make sure that the weights sum up to 1 along dim `dim`.
@@ -24,11 +23,6 @@ def mixture_consistency(
         torch.Tensor with same shape as `est_sources`, after applying mixture
         consistency.
 
-    Notes
-        This method can be used only in 'complete' separation tasks, otherwise
-        the residual error will contain unwanted sources. For example, this
-        won't work with the task `sep_noisy` from WHAM.
-
     Examples
         >>> # Works on waveforms
         >>> mix = torch.randn(10, 16000)
@@ -39,10 +33,14 @@ def mixture_consistency(
         >>> est_sources = torch.randn(10, 2, 514, 400)
         >>> new_est_sources = mixture_consistency(mix, est_sources, dim=1)
 
+    .. note::
+        This method can be used only in 'complete' separation tasks, otherwise
+        the residual error will contain unwanted sources. For example, this
+        won't work with the task `"sep_noisy"` from WHAM.
+
     References
-        - Scott Wisdom, John R Hershey, Kevin Wilson, Jeremy Thorpe, Michael
-        Chinen, Brian Patton, and Rif A Saurous. "Differentiable consistency
-        constraints for improved deep speech enhancement", ICASSP 2019.
+        Scott Wisdom et al. "Differentiable consistency constraints for improved
+        deep speech enhancement", ICASSP 2019.
     """
     # If the source weights are not specified, the weights are the relative
     # power of each source to the sum. w_i = P_i / (P_all), P for power.

diff --git a/asteroid/dsp/overlap_add.py b/asteroid/dsp/overlap_add.py
@@ -175,9 +175,6 @@ def _reorder_sources(
                                     both current and previous.
         hop_size (:class:`int`): hop_size between current and previous tensors.
 
-    Returns:
-        current:
-
     """
     batch, frames = current.size()
     current = current.reshape(-1, n_src, frames)
@@ -201,15 +198,16 @@ def reorder_func(x, y):
 
 
 class DualPathProcessing(nn.Module):
-    """Perform Dual-Path processing via overlap-add as in DPRNN [1].
+    """
+    Perform Dual-Path processing via overlap-add as in DPRNN [1].
 
-     Args:
+    Args:
         chunk_size (int): Size of segmenting window.
         hop_size (int): segmentation hop size.
 
     References
-        - [1] "Dual-path RNN: efficient long sequence modeling for time-domain
-        single-channel speech separation", Yi Luo, Zhuo Chen and Takuya Yoshioka.
+        [1] Yi Luo, Zhuo Chen and Takuya Yoshioka. "Dual-path RNN: efficient
+        long sequence modeling for time-domain single-channel speech separation"
         https://arxiv.org/abs/1910.06379
     """
 
@@ -220,16 +218,16 @@ def __init__(self, chunk_size, hop_size):
         self.n_orig_frames = None
 
     def unfold(self, x):
-        """Unfold the feature tensor from
-
-        (batch, channels, time) to (batch, channels, chunk_size, n_chunks).
+        r"""
+        Unfold the feature tensor from $(batch, channels, time)$ to
+        $(batch, channels, chunksize, nchunks)$.
 
         Args:
-            x: (:class:`torch.Tensor`): feature tensor of shape (batch, channels, time).
+            x (:class:`torch.Tensor`): feature tensor of shape $(batch, channels, time)$.
 
         Returns:
-            x: (:class:`torch.Tensor`): spliced feature tensor of shape
-                (batch, channels, chunk_size, n_chunks).
+            :class:`torch.Tensor`: spliced feature tensor of shape
+            $(batch, channels, chunksize, nchunks)$.
 
         """
         # x is (batch, chan, frames)
@@ -248,22 +246,22 @@ def unfold(self, x):
         )  # (batch, chan, chunk_size, n_chunks)
 
     def fold(self, x, output_size=None):
-        """Folds back the spliced feature tensor.
-
-        Input shape (batch, channels, chunk_size, n_chunks) to original shape
-        (batch, channels, time) using overlap-add.
+        r"""
+        Folds back the spliced feature tensor.
+        Input shape $(batch, channels, chunksize, nchunks)$ to original shape
+        $(batch, channels, time)$ using overlap-add.
 
         Args:
-            x: (:class:`torch.Tensor`): spliced feature tensor of shape
-                (batch, channels, chunk_size, n_chunks).
-            output_size: (int, optional): sequence length of original feature tensor.
-                If None, the original length cached by the previous call of `unfold`
-                will be used.
+            x (:class:`torch.Tensor`): spliced feature tensor of shape
+                $(batch, channels, chunksize, nchunks)$.
+            output_size (int, optional): sequence length of original feature tensor.
+                If None, the original length cached by the previous call of
+                :func:`~overlap_add.DualPathProcessing.unfold` will be used.
 
         Returns:
-            x: (:class:`torch.Tensor`):  feature tensor of shape (batch, channels, time).
+            :class:`torch.Tensor`:  feature tensor of shape $(batch, channels, time)$.
 
-        .. note:: `fold` caches the original length of the pr
+        .. note:: `fold` caches the original length of the input.
 
         """
         output_size = output_size if output_size is not None else self.n_orig_frames
@@ -285,21 +283,20 @@ def fold(self, x, output_size=None):
 
     @staticmethod
     def intra_process(x, module):
-        """Performs intra-chunk processing.
+        r"""Performs intra-chunk processing.
 
         Args:
             x (:class:`torch.Tensor`): spliced feature tensor of shape
                 (batch, channels, chunk_size, n_chunks).
             module (:class:`torch.nn.Module`): module one wish to apply to each chunk
                 of the spliced feature tensor.
 
-
         Returns:
-            x (:class:`torch.Tensor`): processed spliced feature tensor of shape
-                (batch, channels, chunk_size, n_chunks).
+            :class:`torch.Tensor`: processed spliced feature tensor of shape
+            $(batch, channels, chunksize, nchunks)$.
 
         .. note:: the module should have the channel first convention and accept
-            a 3D tensor of shape (batch, channels, time).
+            a 3D tensor of shape $(batch, channels, time)$.
         """
 
         # x is (batch, channels, chunk_size, n_chunks)
@@ -312,21 +309,21 @@ def intra_process(x, module):
 
     @staticmethod
     def inter_process(x, module):
-        """Performs inter-chunk processing.
+        r"""Performs inter-chunk processing.
 
         Args:
             x (:class:`torch.Tensor`): spliced feature tensor of shape
-                (batch, channels, chunk_size, n_chunks).
+                $(batch, channels, chunksize, nchunks)$.
             module (:class:`torch.nn.Module`): module one wish to apply between
                 each chunk of the spliced feature tensor.
 
 
         Returns:
             x (:class:`torch.Tensor`): processed spliced feature tensor of shape
-                (batch, channels, chunk_size, n_chunks).
+            $(batch, channels, chunksize, nchunks)$.
 
         .. note:: the module should have the channel first convention and accept
-            a 3D tensor of shape (batch, channels, time).
+            a 3D tensor of shape $(batch, channels, time)$.
         """
 
         batch, channels, chunk_size, n_chunks = x.size()