Code refactor

Artrajz · Sep 30, 2023 · 491fd31 · 491fd31
1 parent b9cec2c
commit 491fd31
Show file tree

Hide file tree

Showing 8 changed files with 248 additions and 508 deletions.
diff --git a/app.py b/app.py
@@ -6,7 +6,7 @@
 from werkzeug.utils import secure_filename
 from flask_apscheduler import APScheduler
 from functools import wraps
-from utils import clean_folder, check_is_none
+from utils.data_utils import save_audio, clean_folder, check_is_none
 from utils.load_model import load_model
 from io import BytesIO
 
@@ -67,30 +67,23 @@ def voice_speakers_api():
 def voice_vits_api():
     try:
         if request.method == "GET":
-            text = request.args.get("text", "")
-            id = int(request.args.get("id", app.config.get("ID", 0)))
-            format = request.args.get("format", app.config.get("FORMAT", "wav"))
-            lang = request.args.get("lang", app.config.get("LANG", "auto")).lower()
-            length = float(request.args.get("length", app.config.get("LENGTH", 1)))
-            noise = float(request.args.get("noise", app.config.get("NOISE", 0.667)))
-            noisew = float(request.args.get("noisew", app.config.get("NOISEW", 0.8)))
-            max = int(request.args.get("max", app.config.get("MAX", 50)))
-            use_streaming = request.args.get('streaming', False, type=bool)
+            request_data = request.args
         elif request.method == "POST":
             content_type = request.headers.get('Content-Type')
             if content_type == 'application/json':
-                data = request.get_json()
+                request_data = request.get_json()
             else:
-                data = request.form
-            text = data.get("text", "")
-            id = int(data.get("id", app.config.get("ID", 0)))
-            format = data.get("format", app.config.get("FORMAT", "wav"))
-            lang = data.get("lang", app.config.get("LANG", "auto")).lower()
-            length = float(data.get("length", app.config.get("LENGTH", 1)))
-            noise = float(data.get("noise", app.config.get("NOISE", 0.667)))
-            noisew = float(data.get("noisew", app.config.get("NOISEW", 0.8)))
-            max = int(data.get("max", app.config.get("MAX", 50)))
-            use_streaming = request.form.get('streaming', False, type=bool)
+                request_data = request.form
+
+        text = request_data.get("text", "")
+        id = int(request_data.get("id", app.config.get("ID", 0)))
+        format = request_data.get("format", app.config.get("FORMAT", "wav"))
+        lang = request_data.get("lang", app.config.get("LANG", "auto")).lower()
+        length = float(request_data.get("length", app.config.get("LENGTH", 1)))
+        noise = float(request_data.get("noise", app.config.get("NOISE", 0.667)))
+        noisew = float(request_data.get("noisew", app.config.get("NOISEW", 0.8)))
+        max = int(request_data.get("max", app.config.get("MAX", 50)))
+        use_streaming = request_data.get('streaming', False, type=bool)
     except Exception as e:
         logger.error(f"[VITS] {e}")
         return make_response("parameter error", 400)
@@ -136,20 +129,23 @@ def voice_vits_api():
             "lang": lang,
             "speaker_lang": speaker_lang}
 
-    if app.config.get("SAVE_AUDIO", False):
-        logger.debug(f"[VITS] {fname}")
-
     if use_streaming:
-        audio = tts.stream_vits_infer(task, fname)
+        audio = tts.stream_vits_infer(task)
         response = make_response(audio)
         response.headers['Content-Disposition'] = f'attachment; filename={fname}'
         response.headers['Content-Type'] = file_type
         return response
     else:
         t1 = time.time()
-        audio = tts.vits_infer(task, fname)
+        audio = tts.vits_infer(task)
         t2 = time.time()
         logger.info(f"[VITS] finish in {(t2 - t1):.2f}s")
+
+        if app.config.get("SAVE_AUDIO", False):
+            logger.debug(f"[VITS] {fname}")
+            path = os.path.join(app.config.get('CACHE_PATH'), fname)
+            save_audio(audio.getvalue(), path)
+
         return send_file(path_or_file=audio, mimetype=file_type, download_name=fname)
 
 
@@ -191,11 +187,15 @@ def voice_hubert_api():
             "audio_path": os.path.join(app.config['UPLOAD_FOLDER'], fname)}
 
     t1 = time.time()
-    audio = tts.hubert_vits_infer(task, fname)
+    audio = tts.hubert_vits_infer(task)
     t2 = time.time()
+    logger.info(f"[hubert] finish in {(t2 - t1):.2f}s")
+
     if app.config.get("SAVE_AUDIO", False):
         logger.debug(f"[hubert] {fname}")
-    logger.info(f"[hubert] finish in {(t2 - t1):.2f}s")
+        path = os.path.join(app.config.get('CACHE_PATH'), fname)
+        save_audio(audio.getvalue(), path)
+
     if use_streaming:
         audio = tts.generate_audio_chunks(audio)
         response = make_response(audio)
@@ -211,32 +211,24 @@ def voice_hubert_api():
 def voice_w2v2_api():
     try:
         if request.method == "GET":
-            text = request.args.get("text", "")
-            id = int(request.args.get("id", app.config.get("ID", 0)))
-            format = request.args.get("format", app.config.get("FORMAT", "wav"))
-            lang = request.args.get("lang", app.config.get("LANG", "auto")).lower()
-            length = float(request.args.get("length", app.config.get("LENGTH", 1)))
-            noise = float(request.args.get("noise", app.config.get("NOISE", 0.667)))
-            noisew = float(request.args.get("noisew", app.config.get("NOISEW", 0.8)))
-            max = int(request.args.get("max", app.config.get("MAX", 50)))
-            emotion = int(request.args.get("emotion", app.config.get("EMOTION", 0)))
-            use_streaming = request.args.get('streaming', False, type=bool)
+            request_data = request.args
         elif request.method == "POST":
             content_type = request.headers.get('Content-Type')
             if content_type == 'application/json':
-                data = request.get_json()
+                request_data = request.get_json()
             else:
-                data = request.form
-            text = data.get("text", "")
-            id = int(data.get("id", app.config.get("ID", 0)))
-            format = data.get("format", app.config.get("FORMAT", "wav"))
-            lang = data.get("lang", app.config.get("LANG", "auto")).lower()
-            length = float(data.get("length"))
-            noise = float(data.get("noise", app.config.get("NOISE", 0.667)))
-            noisew = float(data.get("noisew", app.config.get("NOISEW", 0.8)))
-            max = int(data.get("max", app.config.get("MAX", 50)))
-            emotion = int(data.get("emotion", app.config.get("EMOTION", 0)))
-            use_streaming = request.form.get('streaming', False, type=bool)
+                request_data = request.form
+
+        text = request_data.get("text", "")
+        id = int(request_data.get("id", app.config.get("ID", 0)))
+        format = request_data.get("format", app.config.get("FORMAT", "wav"))
+        lang = request_data.get("lang", app.config.get("LANG", "auto")).lower()
+        length = float(request_data.get("length", app.config.get("LENGTH", 1)))
+        noise = float(request_data.get("noise", app.config.get("NOISE", 0.667)))
+        noisew = float(request_data.get("noisew", app.config.get("NOISEW", 0.8)))
+        max = int(request_data.get("max", app.config.get("MAX", 50)))
+        emotion = int(request_data.get("emotion", app.config.get("EMOTION", 0)))
+        use_streaming = request_data.get('streaming', False, type=bool)
     except Exception as e:
         logger.error(f"[w2v2] {e}")
         return make_response(f"parameter error", 400)
@@ -285,18 +277,22 @@ def voice_w2v2_api():
             "speaker_lang": speaker_lang}
 
     t1 = time.time()
-    audio = tts.w2v2_vits_infer(task, fname)
+    audio = tts.w2v2_vits_infer(task)
     t2 = time.time()
+    logger.info(f"[w2v2] finish in {(t2 - t1):.2f}s")
+
     if app.config.get("SAVE_AUDIO", False):
-        logger.debug(f"[W2V2] {fname}")
+        logger.debug(f"[w2v2] {fname}")
+        path = os.path.join(app.config.get('CACHE_PATH'), fname)
+        save_audio(audio.getvalue(), path)
+
     if use_streaming:
         audio = tts.generate_audio_chunks(audio)
         response = make_response(audio)
         response.headers['Content-Disposition'] = f'attachment; filename={fname}'
         response.headers['Content-Type'] = file_type
         return response
     else:
-        logger.info(f"[w2v2] finish in {(t2 - t1):.2f}s")
         return send_file(path_or_file=audio, mimetype=file_type, download_name=fname)
 
 
@@ -326,11 +322,15 @@ def vits_voice_conversion_api():
                 "format": format}
 
         t1 = time.time()
-        audio = tts.vits_voice_conversion(task, fname)
+        audio = tts.vits_voice_conversion(task)
         t2 = time.time()
+        logger.info(f"[Voice conversion] finish in {(t2 - t1):.2f}s")
+
         if app.config.get("SAVE_AUDIO", False):
             logger.debug(f"[Voice conversion] {fname}")
-        logger.info(f"[Voice conversion] finish in {(t2 - t1):.2f}s")
+            path = os.path.join(app.config.get('CACHE_PATH'), fname)
+            save_audio(audio.getvalue(), path)
+
         if use_streaming:
             audio = tts.generate_audio_chunks(audio)
             response = make_response(audio)
@@ -343,14 +343,15 @@ def vits_voice_conversion_api():
 
 @app.route('/voice/ssml', methods=["POST"])
 @require_api_key
-def ssml():
+def ssml_api():
     try:
         content_type = request.headers.get('Content-Type')
         if content_type == 'application/json':
-            data = request.get_json()
+            request_data = request.get_json()
         else:
-            data = request.form
-        ssml = data.get("ssml")
+            request_data = request.form
+
+        ssml = request_data.get("ssml")
     except Exception as e:
         logger.info(f"[ssml] {e}")
         return make_response(jsonify({"status": "error", "message": f"parameter error"}), 400)
@@ -361,11 +362,14 @@ def ssml():
     file_type = f"audio/{format}"
 
     t1 = time.time()
-    audio = tts.create_ssml_infer_task(voice_tasks, format, fname)
+    audio = tts.create_ssml_infer_task(voice_tasks, format)
     t2 = time.time()
+    logger.info(f"[ssml] finish in {(t2 - t1):.2f}s")
+
     if app.config.get("SAVE_AUDIO", False):
         logger.debug(f"[ssml] {fname}")
-    logger.info(f"[ssml] finish in {(t2 - t1):.2f}s")
+        path = os.path.join(app.config.get('CACHE_PATH'), fname)
+        save_audio(audio.getvalue(), path)
 
     return send_file(path_or_file=audio, mimetype=file_type, download_name=fname)
 
@@ -385,46 +389,39 @@ def dimensional_emotion():
 
     file_type = "application/octet-stream; charset=ascii"
     fname = os.path.splitext(audio.filename)[0] + ".npy"
-    audio = tts.get_dimensional_emotion_npy(content)
+    emotion_npy = tts.get_dimensional_emotion_npy(content)
     if use_streaming:
-        audio = tts.generate_audio_chunks(audio)
-        response = make_response(audio)
+        emotion_npy = tts.generate_audio_chunks(emotion_npy)
+        response = make_response(emotion_npy)
         response.headers['Content-Disposition'] = f'attachment; filename={fname}'
         response.headers['Content-Type'] = file_type
         return response
     else:
-        return send_file(path_or_file=audio, mimetype=file_type, download_name=fname)
+        return send_file(path_or_file=emotion_npy, mimetype=file_type, download_name=fname)
 
 
 @app.route('/voice/bert-vits2', methods=["GET", "POST"])
 @require_api_key
 def voice_bert_vits2_api():
     try:
         if request.method == "GET":
-            text = request.args.get("text", "")
-            id = int(request.args.get("id", app.config.get("ID", 0)))
-            format = request.args.get("format", app.config.get("FORMAT", "wav"))
-            lang = request.args.get("lang", "auto").lower()
-            length = float(request.args.get("length", app.config.get("LENGTH", 1)))
-            noise = float(request.args.get("noise", app.config.get("NOISE", 0.5)))
-            noisew = float(request.args.get("noisew", app.config.get("NOISEW", 0.6)))
-            sdp_ratio = float(request.args.get("sdp_ratio", 0.2))
-            max = int(request.args.get("max", app.config.get("MAX", 50)))
+            request_data = request.args
         elif request.method == "POST":
             content_type = request.headers.get('Content-Type')
             if content_type == 'application/json':
-                data = request.get_json()
+                request_data = request.get_json()
             else:
-                data = request.form
-            text = data.get("text", "")
-            id = int(data.get("id", app.config.get("ID", 0)))
-            format = data.get("format", app.config.get("FORMAT", "wav"))
-            lang = data.get("lang", "auto").lower()
-            length = float(data.get("length", app.config.get("LENGTH", 1)))
-            noise = float(data.get("noise", app.config.get("NOISE", 0.667)))
-            noisew = float(data.get("noisew", app.config.get("NOISEW", 0.8)))
-            sdp_ratio = float(data.get("noisew", app.config.get("SDP_RATIO", 0.2)))
-            max = int(data.get("max", app.config.get("MAX", 50)))
+                request_data = request.form
+
+        text = request_data.get("text", "")
+        id = int(request_data.get("id", app.config.get("ID", 0)))
+        format = request_data.get("format", app.config.get("FORMAT", "wav"))
+        lang = request_data.get("lang", "auto").lower()
+        length = float(request_data.get("length", app.config.get("LENGTH", 1)))
+        noise = float(request_data.get("noise", app.config.get("NOISE", 0.667)))
+        noisew = float(request_data.get("noisew", app.config.get("NOISEW", 0.8)))
+        sdp_ratio = float(request_data.get("noisew", app.config.get("SDP_RATIO", 0.2)))
+        max = int(request_data.get("max", app.config.get("MAX", 50)))
     except Exception as e:
         logger.error(f"[Bert-VITS2] {e}")
         return make_response("parameter error", 400)
@@ -468,30 +465,33 @@ def voice_bert_vits2_api():
             "lang": lang,
             "speaker_lang": speaker_lang}
 
-    if app.config.get("SAVE_AUDIO", False):
-        logger.debug(f"[Bert-VITS2] {fname}")
-
     t1 = time.time()
-    audio = tts.bert_vits2_infer(task, fname)
+    audio = tts.bert_vits2_infer(task)
     t2 = time.time()
     logger.info(f"[Bert-VITS2] finish in {(t2 - t1):.2f}s")
+
+    if app.config.get("SAVE_AUDIO", False):
+        logger.debug(f"[Bert-VITS2] {fname}")
+        path = os.path.join(app.config.get('CACHE_PATH'), fname)
+        save_audio(audio.getvalue(), path)
+
     return send_file(path_or_file=audio, mimetype=file_type, download_name=fname)
 
 
 @app.route('/voice/check', methods=["GET", "POST"])
 def check():
     try:
         if request.method == "GET":
-            model = request.args.get("model")
-            id = int(request.args.get("id"))
+            request_data = request.args
         elif request.method == "POST":
             content_type = request.headers.get('Content-Type')
             if content_type == 'application/json':
-                data = request.get_json()
+                request_data = request.get_json()
             else:
-                data = request.form
-            model = data.get("model")
-            id = int(data.get("id"))
+                request_data = request.form
+
+        model = request_data.get("model")
+        id = int(request_data.get("id"))
     except Exception as e:
         logger.info(f"[check] {e}")
         return make_response(jsonify({"status": "error", "message": "parameter error"}), 400)

diff --git a/bert_vits2/bert_vits2.py b/bert_vits2/bert_vits2.py
@@ -6,13 +6,13 @@
 from bert_vits2.models import SynthesizerTrn
 from bert_vits2.text import *
 from bert_vits2.text.cleaner import clean_text
-from utils import classify_language
+from utils import classify_language, get_hparams_from_file
 from utils.sentence import sentence_split_and_markup, cut
 
 
 class Bert_VITS2:
     def __init__(self, model, config, device=torch.device("cpu"), **kwargs):
-        self.hps_ms = bert_vits2_utils.get_hparams_from_file(config)
+        self.hps_ms = get_hparams_from_file(config)
         self.n_speakers = getattr(self.hps_ms.data, 'n_speakers', 0)
         self.speakers = [item[0] for item in
                          sorted(list(getattr(self.hps_ms.data, 'spk2id', {'0': 0}).items()), key=lambda x: x[1])]