update vocoder

nipponjo · Apr 24, 2023 · 4a965de · 4a965de
1 parent 7a40abd
commit 4a965de
Show file tree

Hide file tree

Showing 5 changed files with 46 additions and 7 deletions.
diff --git a/.gitignore b/.gitignore
@@ -7,7 +7,7 @@ checkpoints/
 logs/
 tb_logs/
 node_modules/
-config.json
+# config.json
 g_02500000
 old/
 samples/

diff --git a/README.md b/README.md
@@ -2,7 +2,9 @@
 
 TTS models (Tacotron2, FastPitch), trained on Nawar Halabi's [Arabic Speech Corpus](http://en.arabicspeechcorpus.com/), including the [HiFi-GAN vocoder](https://github.com/jik876/hifi-gan) for direct TTS inference.
 
-![ar2wave5](https://user-images.githubusercontent.com/28433296/227660976-0d1e2033-276e-45e5-b232-a5a9b6b3f2a8.png)
+<div align="center">
+  <img src="https://user-images.githubusercontent.com/28433296/227660976-0d1e2033-276e-45e5-b232-a5a9b6b3f2a8.png" width="95%"></img>
+</div>
 
 Papers:
 
@@ -30,12 +32,12 @@ Download the pretrained weights for the Tacotron2 model ([mse](https://drive.goo
 
 Download the pretrained weights for the FastPitch model ([mse](https://drive.google.com/u/0/uc?id=1sliRc62wjPTnPWBVQ95NDUgnCSH5E8M0&export=download) | [adv](https://drive.google.com/u/0/uc?id=1-vZOhi9To_78-yRslC6sFLJBUjwgJT-D&export=download)).
 
-Download the [HiFi-GAN vocoder](https://github.com/jik876/hifi-gan) weights and config file ([direct link](https://drive.google.com/drive/folders/1YuOoV3lO2-Hhn1F2HJ2aQ4S0LC1JdKLd)). Either put them into `pretrained/hifigan-universal-v1` or edit the following lines in `configs/basic.yaml`.
+Download the [HiFi-GAN vocoder](https://github.com/jik876/hifi-gan) weights ([link](https://drive.google.com/u/0/uc?id=1zSYYnJFS-gQox-IeI71hVY-fdPysxuFK&export=download)). Either put them into `pretrained/hifigan-asc-v1` or edit the following lines in `configs/basic.yaml`.
 
 ```yaml
 # vocoder
-vocoder_state_path: pretrained/hifigan-universal-v1/g_02500000
-vocoder_config_path: pretrained/hifigan-universal-v1/config.json
+vocoder_state_path: pretrained/hifigan-asc-v1/hifigan-asc.pth
+vocoder_config_path: pretrained/hifigan-asc-v1/config.json
 ```
 
 ## Using the models

diff --git a/configs/basic.yaml b/configs/basic.yaml
@@ -15,6 +15,6 @@ use_cuda_if_available: True
 balanced_sampling: False
 
 # vocoder
-vocoder_state_path: pretrained/hifigan-universal-v1/g_02500000
-vocoder_config_path: pretrained/hifigan-universal-v1/config.json
+vocoder_state_path: pretrained/hifigan-asc-v1/hifigan-asc.pth
+vocoder_config_path: pretrained/hifigan-asc-v1/config.json
 
diff --git a/pretrained/hifigan-asc-v1/config.json b/pretrained/hifigan-asc-v1/config.json
@@ -0,0 +1,37 @@
+{
+    "resblock": "1",
+    "num_gpus": 0,
+    "batch_size": 16,
+    "learning_rate": 0.0002,
+    "adam_b1": 0.8,
+    "adam_b2": 0.99,
+    "lr_decay": 0.999,
+    "seed": 1234,
+
+    "upsample_rates": [8,8,2,2],
+    "upsample_kernel_sizes": [16,16,4,4],
+    "upsample_initial_channel": 512,
+    "resblock_kernel_sizes": [3,7,11],
+    "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
+
+    "segment_size": 8192,
+    "num_mels": 80,
+    "num_freq": 1025,
+    "n_fft": 1024,
+    "hop_size": 256,
+    "win_size": 1024,
+
+    "sampling_rate": 22050,
+
+    "fmin": 0,
+    "fmax": 8000,
+    "fmax_for_loss": null,
+
+    "num_workers": 4,
+
+    "dist_config": {
+        "dist_backend": "nccl",
+        "dist_url": "tcp://localhost:54321",
+        "world_size": 1
+    }
+}
diff --git a/pretrained/hifigan-universal-v1/.gitkeep b/pretrained/hifigan-universal-v1/.gitkeep