Skip to content

Commit

Permalink
Code refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
Artrajz committed Sep 30, 2023
1 parent b9cec2c commit 491fd31
Show file tree
Hide file tree
Showing 8 changed files with 248 additions and 508 deletions.
190 changes: 95 additions & 95 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from werkzeug.utils import secure_filename
from flask_apscheduler import APScheduler
from functools import wraps
from utils import clean_folder, check_is_none
from utils.data_utils import save_audio, clean_folder, check_is_none
from utils.load_model import load_model
from io import BytesIO

Expand Down Expand Up @@ -67,30 +67,23 @@ def voice_speakers_api():
def voice_vits_api():
try:
if request.method == "GET":
text = request.args.get("text", "")
id = int(request.args.get("id", app.config.get("ID", 0)))
format = request.args.get("format", app.config.get("FORMAT", "wav"))
lang = request.args.get("lang", app.config.get("LANG", "auto")).lower()
length = float(request.args.get("length", app.config.get("LENGTH", 1)))
noise = float(request.args.get("noise", app.config.get("NOISE", 0.667)))
noisew = float(request.args.get("noisew", app.config.get("NOISEW", 0.8)))
max = int(request.args.get("max", app.config.get("MAX", 50)))
use_streaming = request.args.get('streaming', False, type=bool)
request_data = request.args
elif request.method == "POST":
content_type = request.headers.get('Content-Type')
if content_type == 'application/json':
data = request.get_json()
request_data = request.get_json()
else:
data = request.form
text = data.get("text", "")
id = int(data.get("id", app.config.get("ID", 0)))
format = data.get("format", app.config.get("FORMAT", "wav"))
lang = data.get("lang", app.config.get("LANG", "auto")).lower()
length = float(data.get("length", app.config.get("LENGTH", 1)))
noise = float(data.get("noise", app.config.get("NOISE", 0.667)))
noisew = float(data.get("noisew", app.config.get("NOISEW", 0.8)))
max = int(data.get("max", app.config.get("MAX", 50)))
use_streaming = request.form.get('streaming', False, type=bool)
request_data = request.form

text = request_data.get("text", "")
id = int(request_data.get("id", app.config.get("ID", 0)))
format = request_data.get("format", app.config.get("FORMAT", "wav"))
lang = request_data.get("lang", app.config.get("LANG", "auto")).lower()
length = float(request_data.get("length", app.config.get("LENGTH", 1)))
noise = float(request_data.get("noise", app.config.get("NOISE", 0.667)))
noisew = float(request_data.get("noisew", app.config.get("NOISEW", 0.8)))
max = int(request_data.get("max", app.config.get("MAX", 50)))
use_streaming = request_data.get('streaming', False, type=bool)
except Exception as e:
logger.error(f"[VITS] {e}")
return make_response("parameter error", 400)
Expand Down Expand Up @@ -136,20 +129,23 @@ def voice_vits_api():
"lang": lang,
"speaker_lang": speaker_lang}

if app.config.get("SAVE_AUDIO", False):
logger.debug(f"[VITS] {fname}")

if use_streaming:
audio = tts.stream_vits_infer(task, fname)
audio = tts.stream_vits_infer(task)
response = make_response(audio)
response.headers['Content-Disposition'] = f'attachment; filename={fname}'
response.headers['Content-Type'] = file_type
return response
else:
t1 = time.time()
audio = tts.vits_infer(task, fname)
audio = tts.vits_infer(task)
t2 = time.time()
logger.info(f"[VITS] finish in {(t2 - t1):.2f}s")

if app.config.get("SAVE_AUDIO", False):
logger.debug(f"[VITS] {fname}")
path = os.path.join(app.config.get('CACHE_PATH'), fname)
save_audio(audio.getvalue(), path)

return send_file(path_or_file=audio, mimetype=file_type, download_name=fname)


Expand Down Expand Up @@ -191,11 +187,15 @@ def voice_hubert_api():
"audio_path": os.path.join(app.config['UPLOAD_FOLDER'], fname)}

t1 = time.time()
audio = tts.hubert_vits_infer(task, fname)
audio = tts.hubert_vits_infer(task)
t2 = time.time()
logger.info(f"[hubert] finish in {(t2 - t1):.2f}s")

if app.config.get("SAVE_AUDIO", False):
logger.debug(f"[hubert] {fname}")
logger.info(f"[hubert] finish in {(t2 - t1):.2f}s")
path = os.path.join(app.config.get('CACHE_PATH'), fname)
save_audio(audio.getvalue(), path)

if use_streaming:
audio = tts.generate_audio_chunks(audio)
response = make_response(audio)
Expand All @@ -211,32 +211,24 @@ def voice_hubert_api():
def voice_w2v2_api():
try:
if request.method == "GET":
text = request.args.get("text", "")
id = int(request.args.get("id", app.config.get("ID", 0)))
format = request.args.get("format", app.config.get("FORMAT", "wav"))
lang = request.args.get("lang", app.config.get("LANG", "auto")).lower()
length = float(request.args.get("length", app.config.get("LENGTH", 1)))
noise = float(request.args.get("noise", app.config.get("NOISE", 0.667)))
noisew = float(request.args.get("noisew", app.config.get("NOISEW", 0.8)))
max = int(request.args.get("max", app.config.get("MAX", 50)))
emotion = int(request.args.get("emotion", app.config.get("EMOTION", 0)))
use_streaming = request.args.get('streaming', False, type=bool)
request_data = request.args
elif request.method == "POST":
content_type = request.headers.get('Content-Type')
if content_type == 'application/json':
data = request.get_json()
request_data = request.get_json()
else:
data = request.form
text = data.get("text", "")
id = int(data.get("id", app.config.get("ID", 0)))
format = data.get("format", app.config.get("FORMAT", "wav"))
lang = data.get("lang", app.config.get("LANG", "auto")).lower()
length = float(data.get("length"))
noise = float(data.get("noise", app.config.get("NOISE", 0.667)))
noisew = float(data.get("noisew", app.config.get("NOISEW", 0.8)))
max = int(data.get("max", app.config.get("MAX", 50)))
emotion = int(data.get("emotion", app.config.get("EMOTION", 0)))
use_streaming = request.form.get('streaming', False, type=bool)
request_data = request.form

text = request_data.get("text", "")
id = int(request_data.get("id", app.config.get("ID", 0)))
format = request_data.get("format", app.config.get("FORMAT", "wav"))
lang = request_data.get("lang", app.config.get("LANG", "auto")).lower()
length = float(request_data.get("length", app.config.get("LENGTH", 1)))
noise = float(request_data.get("noise", app.config.get("NOISE", 0.667)))
noisew = float(request_data.get("noisew", app.config.get("NOISEW", 0.8)))
max = int(request_data.get("max", app.config.get("MAX", 50)))
emotion = int(request_data.get("emotion", app.config.get("EMOTION", 0)))
use_streaming = request_data.get('streaming', False, type=bool)
except Exception as e:
logger.error(f"[w2v2] {e}")
return make_response(f"parameter error", 400)
Expand Down Expand Up @@ -285,18 +277,22 @@ def voice_w2v2_api():
"speaker_lang": speaker_lang}

t1 = time.time()
audio = tts.w2v2_vits_infer(task, fname)
audio = tts.w2v2_vits_infer(task)
t2 = time.time()
logger.info(f"[w2v2] finish in {(t2 - t1):.2f}s")

if app.config.get("SAVE_AUDIO", False):
logger.debug(f"[W2V2] {fname}")
logger.debug(f"[w2v2] {fname}")
path = os.path.join(app.config.get('CACHE_PATH'), fname)
save_audio(audio.getvalue(), path)

if use_streaming:
audio = tts.generate_audio_chunks(audio)
response = make_response(audio)
response.headers['Content-Disposition'] = f'attachment; filename={fname}'
response.headers['Content-Type'] = file_type
return response
else:
logger.info(f"[w2v2] finish in {(t2 - t1):.2f}s")
return send_file(path_or_file=audio, mimetype=file_type, download_name=fname)


Expand Down Expand Up @@ -326,11 +322,15 @@ def vits_voice_conversion_api():
"format": format}

t1 = time.time()
audio = tts.vits_voice_conversion(task, fname)
audio = tts.vits_voice_conversion(task)
t2 = time.time()
logger.info(f"[Voice conversion] finish in {(t2 - t1):.2f}s")

if app.config.get("SAVE_AUDIO", False):
logger.debug(f"[Voice conversion] {fname}")
logger.info(f"[Voice conversion] finish in {(t2 - t1):.2f}s")
path = os.path.join(app.config.get('CACHE_PATH'), fname)
save_audio(audio.getvalue(), path)

if use_streaming:
audio = tts.generate_audio_chunks(audio)
response = make_response(audio)
Expand All @@ -343,14 +343,15 @@ def vits_voice_conversion_api():

@app.route('/voice/ssml', methods=["POST"])
@require_api_key
def ssml():
def ssml_api():
try:
content_type = request.headers.get('Content-Type')
if content_type == 'application/json':
data = request.get_json()
request_data = request.get_json()
else:
data = request.form
ssml = data.get("ssml")
request_data = request.form

ssml = request_data.get("ssml")
except Exception as e:
logger.info(f"[ssml] {e}")
return make_response(jsonify({"status": "error", "message": f"parameter error"}), 400)
Expand All @@ -361,11 +362,14 @@ def ssml():
file_type = f"audio/{format}"

t1 = time.time()
audio = tts.create_ssml_infer_task(voice_tasks, format, fname)
audio = tts.create_ssml_infer_task(voice_tasks, format)
t2 = time.time()
logger.info(f"[ssml] finish in {(t2 - t1):.2f}s")

if app.config.get("SAVE_AUDIO", False):
logger.debug(f"[ssml] {fname}")
logger.info(f"[ssml] finish in {(t2 - t1):.2f}s")
path = os.path.join(app.config.get('CACHE_PATH'), fname)
save_audio(audio.getvalue(), path)

return send_file(path_or_file=audio, mimetype=file_type, download_name=fname)

Expand All @@ -385,46 +389,39 @@ def dimensional_emotion():

file_type = "application/octet-stream; charset=ascii"
fname = os.path.splitext(audio.filename)[0] + ".npy"
audio = tts.get_dimensional_emotion_npy(content)
emotion_npy = tts.get_dimensional_emotion_npy(content)
if use_streaming:
audio = tts.generate_audio_chunks(audio)
response = make_response(audio)
emotion_npy = tts.generate_audio_chunks(emotion_npy)
response = make_response(emotion_npy)
response.headers['Content-Disposition'] = f'attachment; filename={fname}'
response.headers['Content-Type'] = file_type
return response
else:
return send_file(path_or_file=audio, mimetype=file_type, download_name=fname)
return send_file(path_or_file=emotion_npy, mimetype=file_type, download_name=fname)


@app.route('/voice/bert-vits2', methods=["GET", "POST"])
@require_api_key
def voice_bert_vits2_api():
try:
if request.method == "GET":
text = request.args.get("text", "")
id = int(request.args.get("id", app.config.get("ID", 0)))
format = request.args.get("format", app.config.get("FORMAT", "wav"))
lang = request.args.get("lang", "auto").lower()
length = float(request.args.get("length", app.config.get("LENGTH", 1)))
noise = float(request.args.get("noise", app.config.get("NOISE", 0.5)))
noisew = float(request.args.get("noisew", app.config.get("NOISEW", 0.6)))
sdp_ratio = float(request.args.get("sdp_ratio", 0.2))
max = int(request.args.get("max", app.config.get("MAX", 50)))
request_data = request.args
elif request.method == "POST":
content_type = request.headers.get('Content-Type')
if content_type == 'application/json':
data = request.get_json()
request_data = request.get_json()
else:
data = request.form
text = data.get("text", "")
id = int(data.get("id", app.config.get("ID", 0)))
format = data.get("format", app.config.get("FORMAT", "wav"))
lang = data.get("lang", "auto").lower()
length = float(data.get("length", app.config.get("LENGTH", 1)))
noise = float(data.get("noise", app.config.get("NOISE", 0.667)))
noisew = float(data.get("noisew", app.config.get("NOISEW", 0.8)))
sdp_ratio = float(data.get("noisew", app.config.get("SDP_RATIO", 0.2)))
max = int(data.get("max", app.config.get("MAX", 50)))
request_data = request.form

text = request_data.get("text", "")
id = int(request_data.get("id", app.config.get("ID", 0)))
format = request_data.get("format", app.config.get("FORMAT", "wav"))
lang = request_data.get("lang", "auto").lower()
length = float(request_data.get("length", app.config.get("LENGTH", 1)))
noise = float(request_data.get("noise", app.config.get("NOISE", 0.667)))
noisew = float(request_data.get("noisew", app.config.get("NOISEW", 0.8)))
sdp_ratio = float(request_data.get("noisew", app.config.get("SDP_RATIO", 0.2)))
max = int(request_data.get("max", app.config.get("MAX", 50)))
except Exception as e:
logger.error(f"[Bert-VITS2] {e}")
return make_response("parameter error", 400)
Expand Down Expand Up @@ -468,30 +465,33 @@ def voice_bert_vits2_api():
"lang": lang,
"speaker_lang": speaker_lang}

if app.config.get("SAVE_AUDIO", False):
logger.debug(f"[Bert-VITS2] {fname}")

t1 = time.time()
audio = tts.bert_vits2_infer(task, fname)
audio = tts.bert_vits2_infer(task)
t2 = time.time()
logger.info(f"[Bert-VITS2] finish in {(t2 - t1):.2f}s")

if app.config.get("SAVE_AUDIO", False):
logger.debug(f"[Bert-VITS2] {fname}")
path = os.path.join(app.config.get('CACHE_PATH'), fname)
save_audio(audio.getvalue(), path)

return send_file(path_or_file=audio, mimetype=file_type, download_name=fname)


@app.route('/voice/check', methods=["GET", "POST"])
def check():
try:
if request.method == "GET":
model = request.args.get("model")
id = int(request.args.get("id"))
request_data = request.args
elif request.method == "POST":
content_type = request.headers.get('Content-Type')
if content_type == 'application/json':
data = request.get_json()
request_data = request.get_json()
else:
data = request.form
model = data.get("model")
id = int(data.get("id"))
request_data = request.form

model = request_data.get("model")
id = int(request_data.get("id"))
except Exception as e:
logger.info(f"[check] {e}")
return make_response(jsonify({"status": "error", "message": "parameter error"}), 400)
Expand Down
4 changes: 2 additions & 2 deletions bert_vits2/bert_vits2.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@
from bert_vits2.models import SynthesizerTrn
from bert_vits2.text import *
from bert_vits2.text.cleaner import clean_text
from utils import classify_language
from utils import classify_language, get_hparams_from_file
from utils.sentence import sentence_split_and_markup, cut


class Bert_VITS2:
def __init__(self, model, config, device=torch.device("cpu"), **kwargs):
self.hps_ms = bert_vits2_utils.get_hparams_from_file(config)
self.hps_ms = get_hparams_from_file(config)
self.n_speakers = getattr(self.hps_ms.data, 'n_speakers', 0)
self.speakers = [item[0] for item in
sorted(list(getattr(self.hps_ms.data, 'spk2id', {'0': 0}).items()), key=lambda x: x[1])]
Expand Down
Loading

0 comments on commit 491fd31

Please sign in to comment.