From 0b0d153196ce8728e848ec615fe09aa49b1a26ca Mon Sep 17 00:00:00 2001
From: eternalliving <eternalliving@gmail.com>
Date: Fri, 18 Mar 2022 20:23:57 -0700
Subject: [PATCH 1/6] Created Remote engine for Glados TTS

Former-commit-id: f877ce93657a0aecb61b17b22f73c4262f4e9f74
---
 .gitignore       |  2 +-
 README.md        | 12 ++++++++
 engine-remote.py | 75 ++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 88 insertions(+), 1 deletion(-)
 create mode 100644 engine-remote.py

diff --git a/.gitignore b/.gitignore
index ae2c404..ed52ba6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,2 @@
 **/__pycache__/**
-*.wav
+*.wav
\ No newline at end of file
diff --git a/README.md b/README.md
index ddc6cbe..714d2bb 100644
--- a/README.md
+++ b/README.md
@@ -6,6 +6,18 @@ If you want to just play around with the TTS, this works as stand-alone.
 python3 glados-tts/glados.py
 ```
 
+the TTS Engine can also be used remotely on a machine more powerful then the Pi to process in house TTS: (executed from glados-tts directory
+```console
+python3 engine-remote.py
+```
+
+Default port is 8124
+Be sure to update settings.env variable in your main GLados-voice-assistant directory:
+```
+TTS_ENGINE_URL			= http://192.168.1.3:8124/synthesize/
+```
+
+
 ## Description
 The initial, regular Tacotron model was trained first on LJSpeech, and then on a heavily modified version of the Ellen McClain dataset (all non-Portal 2 voice lines removed, punctuation added).
 
diff --git a/engine-remote.py b/engine-remote.py
new file mode 100644
index 0000000..f8dd32c
--- /dev/null
+++ b/engine-remote.py
@@ -0,0 +1,75 @@
+
+from flask import Flask, request, send_file
+import torch
+from utils.tools import prepare_text
+from scipy.io.wavfile import write
+import time
+import os
+import urllib.parse
+
+app = Flask(__name__)
+
+@app.route('/synthesize/', defaults={'text': ''})
+@app.route('/synthesize/<path:text>')
+def synthesize(text):
+	text = request.url[request.url.find('synthesize/')+11:]
+	if(text == ''): return 'No input'
+	if(glados_tts(urllib.parse.unquote(text))):
+		return send_file(os.getcwd()+'/output.wav')
+	else:
+		return 'TTS Engine Failed'
+		
+print("Initializing TTS Engine...")
+
+# Select the device
+if torch.is_vulkan_available():
+	device = 'vulkan'
+if torch.cuda.is_available():
+	device = 'cuda'
+else:
+	device = 'cpu'
+
+# Load models
+glados = torch.jit.load('models/glados.pt')
+vocoder = torch.jit.load('models/vocoder-gpu.pt', map_location=device)
+
+# Prepare models in RAM
+for i in range(4):
+	init = glados.generate_jit(prepare_text(str(i)))
+	init_mel = init['mel_post'].to(device)
+	init_vo = vocoder(init_mel)
+
+
+def glados_tts(text):
+
+	# Tokenize, clean and phonemize input text
+	x = prepare_text(text).to('cpu')
+
+	with torch.no_grad():
+
+		# Generate generic TTS-output
+		old_time = time.time()
+		tts_output = glados.generate_jit(x)
+		print("Forward Tacotron took " + str((time.time() - old_time) * 1000) + "ms")
+
+		# Use HiFiGAN as vocoder to make output sound like GLaDOS
+		old_time = time.time()
+		mel = tts_output['mel_post'].to(device)
+		audio = vocoder(mel)
+		print("HiFiGAN took " + str((time.time() - old_time) * 1000) + "ms")
+
+		# Normalize audio to fit in wav-file
+		audio = audio.squeeze()
+		audio = audio * 32768.0
+		audio = audio.cpu().numpy().astype('int16')
+		output_file = ('output.wav')
+
+		# Write audio file to disk
+		# 22,05 kHz sample rate 
+		write(output_file, 22050, audio)
+
+	return True
+
+print("Initializing TTS Server...")
+if __name__ == "__main__":
+	app.run(host="0.0.0.0", port=8124)
\ No newline at end of file

From 4ee1b8dd7bcea5330dbc1b9f3cd8706eb8541990 Mon Sep 17 00:00:00 2001
From: eternalliving <eternalliving@gmail.com>
Date: Sun, 20 Mar 2022 04:14:36 -0700
Subject: [PATCH 2/6] Updated TTS Engine to be in single script

Former-commit-id: a3f85ac4be7be2496e0f85e2bc771252b0d12b88
---
 .gitignore       |  3 +-
 engine-remote.py | 75 ------------------------------------------------
 engine.py        | 74 +++++++++++++++++++++++++++++++++++++++--------
 utils/tools.py   |  9 ++++++
 4 files changed, 73 insertions(+), 88 deletions(-)
 delete mode 100644 engine-remote.py

diff --git a/.gitignore b/.gitignore
index ed52ba6..0bba77b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
 **/__pycache__/**
-*.wav
\ No newline at end of file
+*.wav
+audio/*
\ No newline at end of file
diff --git a/engine-remote.py b/engine-remote.py
deleted file mode 100644
index f8dd32c..0000000
--- a/engine-remote.py
+++ /dev/null
@@ -1,75 +0,0 @@
-
-from flask import Flask, request, send_file
-import torch
-from utils.tools import prepare_text
-from scipy.io.wavfile import write
-import time
-import os
-import urllib.parse
-
-app = Flask(__name__)
-
-@app.route('/synthesize/', defaults={'text': ''})
-@app.route('/synthesize/<path:text>')
-def synthesize(text):
-	text = request.url[request.url.find('synthesize/')+11:]
-	if(text == ''): return 'No input'
-	if(glados_tts(urllib.parse.unquote(text))):
-		return send_file(os.getcwd()+'/output.wav')
-	else:
-		return 'TTS Engine Failed'
-		
-print("Initializing TTS Engine...")
-
-# Select the device
-if torch.is_vulkan_available():
-	device = 'vulkan'
-if torch.cuda.is_available():
-	device = 'cuda'
-else:
-	device = 'cpu'
-
-# Load models
-glados = torch.jit.load('models/glados.pt')
-vocoder = torch.jit.load('models/vocoder-gpu.pt', map_location=device)
-
-# Prepare models in RAM
-for i in range(4):
-	init = glados.generate_jit(prepare_text(str(i)))
-	init_mel = init['mel_post'].to(device)
-	init_vo = vocoder(init_mel)
-
-
-def glados_tts(text):
-
-	# Tokenize, clean and phonemize input text
-	x = prepare_text(text).to('cpu')
-
-	with torch.no_grad():
-
-		# Generate generic TTS-output
-		old_time = time.time()
-		tts_output = glados.generate_jit(x)
-		print("Forward Tacotron took " + str((time.time() - old_time) * 1000) + "ms")
-
-		# Use HiFiGAN as vocoder to make output sound like GLaDOS
-		old_time = time.time()
-		mel = tts_output['mel_post'].to(device)
-		audio = vocoder(mel)
-		print("HiFiGAN took " + str((time.time() - old_time) * 1000) + "ms")
-
-		# Normalize audio to fit in wav-file
-		audio = audio.squeeze()
-		audio = audio * 32768.0
-		audio = audio.cpu().numpy().astype('int16')
-		output_file = ('output.wav')
-
-		# Write audio file to disk
-		# 22,05 kHz sample rate 
-		write(output_file, 22050, audio)
-
-	return True
-
-print("Initializing TTS Server...")
-if __name__ == "__main__":
-	app.run(host="0.0.0.0", port=8124)
\ No newline at end of file
diff --git a/engine.py b/engine.py
index d06721f..287c32b 100644
--- a/engine.py
+++ b/engine.py
@@ -1,16 +1,12 @@
-# importing sys
-import sys
-import os
-
-sys.path.insert(0, os.getcwd()+'/glados_tts')
-
 import torch
 from utils.tools import prepare_text
 from scipy.io.wavfile import write
 import time
+import sys
+import os
 
 sys.path.insert(0, os.getcwd()+'/glados_tts')
-
+		
 print("\033[1;94mINFO:\033[;97m Initializing TTS Engine...")
 
 # Select the device
@@ -22,8 +18,8 @@
     device = 'cpu'
 
 # Load models
-glados = torch.jit.load('glados_tts/models/glados.pt')
-vocoder = torch.jit.load('glados_tts/models/vocoder-gpu.pt', map_location=device)
+glados = torch.jit.load('models/glados.pt')
+vocoder = torch.jit.load('models/vocoder-gpu.pt', map_location=device)
 
 # Prepare models in RAM
 for i in range(4):
@@ -32,7 +28,7 @@
     init_vo = vocoder(init_mel)
 
 
-def glados_tts(text):
+def glados_tts(text, key=False):
 
     # Tokenize, clean and phonemize input text
     x = prepare_text(text).to('cpu')
@@ -52,10 +48,64 @@ def glados_tts(text):
         audio = audio.squeeze()
         audio = audio * 32768.0
         audio = audio.cpu().numpy().astype('int16')
-        output_file = ('output.wav')
+		if(key):
+			output_file = ('audio/GLaDOS-tts-temp-output-'+key+'.wav')
+		else:
+			output_file = ('audio/GLaDOS-tts-temp-output.wav')
 
         # Write audio file to disk
         # 22,05 kHz sample rate 
         write(output_file, 22050, audio)
 
-    return True
\ No newline at end of file
+    return True
+
+
+# If the script is run directly, assume remote engine
+if __name__ == "__main__":
+	
+	# Remote Engine Veritables
+	PORT = 8124
+	CACHE = True
+
+	from flask import Flask, request, send_file
+	from utils.tools import cleanTSSFile
+	import urllib.parse
+	
+	print("Initializing TTS Server...")
+	
+	app = Flask(__name__)
+
+	@app.route('/synthesize/', defaults={'text': ''})
+	@app.route('/synthesize/<path:text>')
+	def synthesize(text):
+		if(text == ''): return 'No input'
+		
+		line = urllib.parse.unquote(request.url[request.url.find('synthesize/')+11:])
+		file = os.getcwd()+'/audio/'+cleanTTSFile(line)
+		
+		# Check for Local Cache
+		if(os.path.isfile(file)):
+		
+			# Update access time. This will allow for routine cleanups
+			os.utime(file, None)
+			
+			return send_file(file)
+			
+		# Generate New Sample
+		key = time.time()[7:]
+		if(glados_tts(line, key)):
+			tempfile = os.getcwd()+'/audio/GLaDOS-tts-temp-output-'+key+'.wav'
+			return send_file(tempfile)
+			
+			# If the line isn't too long, store in cache
+			if(len(line) < 200):
+				shutil.move(tempfile, 'audio/'+file)
+			else:
+				os.remove(tempfile)
+				
+		else:
+			return 'TTS Engine Failed'
+			
+	cli = sys.modules['flask.cli']
+	cli.show_server_banner = lambda *x: None
+	app.run(host="0.0.0.0", PORT)
\ No newline at end of file
diff --git a/utils/tools.py b/utils/tools.py
index f79bee0..8c6c449 100644
--- a/utils/tools.py
+++ b/utils/tools.py
@@ -9,3 +9,12 @@ def prepare_text(text: str)->str:
     cleaner = Cleaner('english_cleaners', True, 'en-us')
     tokenizer = Tokenizer()
     return torch.as_tensor(tokenizer(cleaner(text)), dtype=torch.int, device='cpu').unsqueeze(0)
+	
+# Check Local Cache
+def cleanTTSFile(line):
+	filename = "GLaDOS-tts-"+line.replace(" ", "-")
+	filename = filename.replace("!", "")
+	filename = filename.replace("°c", "degrees celcius")
+	filename = filename.replace(",", "")+".wav"
+
+	return filename
\ No newline at end of file

From b3a28a8f09ce43a2b0daff14eb2b5ca029e22e1c Mon Sep 17 00:00:00 2001
From: eternalliving <eternalliving@gmail.com>
Date: Sun, 20 Mar 2022 07:41:03 -0700
Subject: [PATCH 3/6] Fixing all my mistakes :)

Former-commit-id: 1ffa7d7203f9019d09538751996086cc64a24259
---
 engine.py      | 65 ++++++++++++++++++++++++++------------------------
 utils/tools.py | 11 +--------
 2 files changed, 35 insertions(+), 41 deletions(-)

diff --git a/engine.py b/engine.py
index 287c32b..87c5a23 100644
--- a/engine.py
+++ b/engine.py
@@ -11,11 +11,11 @@
 
 # Select the device
 if torch.is_vulkan_available():
-    device = 'vulkan'
+	device = 'vulkan'
 if torch.cuda.is_available():
-    device = 'cuda'
+	device = 'cuda'
 else:
-    device = 'cpu'
+	device = 'cpu'
 
 # Load models
 glados = torch.jit.load('models/glados.pt')
@@ -23,41 +23,41 @@
 
 # Prepare models in RAM
 for i in range(4):
-    init = glados.generate_jit(prepare_text(str(i)))
-    init_mel = init['mel_post'].to(device)
-    init_vo = vocoder(init_mel)
+	init = glados.generate_jit(prepare_text(str(i)))
+	init_mel = init['mel_post'].to(device)
+	init_vo = vocoder(init_mel)
 
 
 def glados_tts(text, key=False):
 
-    # Tokenize, clean and phonemize input text
-    x = prepare_text(text).to('cpu')
+	# Tokenize, clean and phonemize input text
+	x = prepare_text(text).to('cpu')
 
-    with torch.no_grad():
+	with torch.no_grad():
 
-        # Generate generic TTS-output
-        old_time = time.time()
-        tts_output = glados.generate_jit(x)
+		# Generate generic TTS-output
+		old_time = time.time()
+		tts_output = glados.generate_jit(x)
 
-        # Use HiFiGAN as vocoder to make output sound like GLaDOS
-        mel = tts_output['mel_post'].to(device)
-        audio = vocoder(mel)
-        print("\033[1;94mINFO:\033[;97m The audio sample took " + str(round((time.time() - old_time) * 1000)) + " ms to generate.")
+		# Use HiFiGAN as vocoder to make output sound like GLaDOS
+		mel = tts_output['mel_post'].to(device)
+		audio = vocoder(mel)
+		print("\033[1;94mINFO:\033[;97m The audio sample took " + str(round((time.time() - old_time) * 1000)) + " ms to generate.")
 
-        # Normalize audio to fit in wav-file
-        audio = audio.squeeze()
-        audio = audio * 32768.0
-        audio = audio.cpu().numpy().astype('int16')
+		# Normalize audio to fit in wav-file
+		audio = audio.squeeze()
+		audio = audio * 32768.0
+		audio = audio.cpu().numpy().astype('int16')
 		if(key):
 			output_file = ('audio/GLaDOS-tts-temp-output-'+key+'.wav')
 		else:
 			output_file = ('audio/GLaDOS-tts-temp-output.wav')
 
-        # Write audio file to disk
-        # 22,05 kHz sample rate 
-        write(output_file, 22050, audio)
+		# Write audio file to disk
+		# 22,05 kHz sample rate 
+		write(output_file, 22050, audio)
 
-    return True
+	return True
 
 
 # If the script is run directly, assume remote engine
@@ -68,10 +68,9 @@ def glados_tts(text, key=False):
 	CACHE = True
 
 	from flask import Flask, request, send_file
-	from utils.tools import cleanTSSFile
 	import urllib.parse
 	
-	print("Initializing TTS Server...")
+	print("\033[1;94mINFO:\033[;97m Initializing TTS Server...")
 	
 	app = Flask(__name__)
 
@@ -81,7 +80,11 @@ def synthesize(text):
 		if(text == ''): return 'No input'
 		
 		line = urllib.parse.unquote(request.url[request.url.find('synthesize/')+11:])
-		file = os.getcwd()+'/audio/'+cleanTTSFile(line)
+		filename = "GLaDOS-tts-"+line.replace(" ", "-")
+		filename = filename.replace("!", "")
+		filename = filename.replace("°c", "degrees celcius")
+		filename = filename.replace(",", "")+".wav"
+		file = os.getcwd()+'/audio/'+filename
 		
 		# Check for Local Cache
 		if(os.path.isfile(file)):
@@ -92,14 +95,14 @@ def synthesize(text):
 			return send_file(file)
 			
 		# Generate New Sample
-		key = time.time()[7:]
+		key = str(time.time())[7:]
 		if(glados_tts(line, key)):
 			tempfile = os.getcwd()+'/audio/GLaDOS-tts-temp-output-'+key+'.wav'
 			return send_file(tempfile)
 			
 			# If the line isn't too long, store in cache
-			if(len(line) < 200):
-				shutil.move(tempfile, 'audio/'+file)
+			if(len(line) < 200 and CACHE):
+				shutil.move(tempfile, filename)
 			else:
 				os.remove(tempfile)
 				
@@ -108,4 +111,4 @@ def synthesize(text):
 			
 	cli = sys.modules['flask.cli']
 	cli.show_server_banner = lambda *x: None
-	app.run(host="0.0.0.0", PORT)
\ No newline at end of file
+	app.run(host="0.0.0.0", port=PORT)
\ No newline at end of file
diff --git a/utils/tools.py b/utils/tools.py
index 8c6c449..32e01bd 100644
--- a/utils/tools.py
+++ b/utils/tools.py
@@ -8,13 +8,4 @@ def prepare_text(text: str)->str:
         text = text + '.'
     cleaner = Cleaner('english_cleaners', True, 'en-us')
     tokenizer = Tokenizer()
-    return torch.as_tensor(tokenizer(cleaner(text)), dtype=torch.int, device='cpu').unsqueeze(0)
-	
-# Check Local Cache
-def cleanTTSFile(line):
-	filename = "GLaDOS-tts-"+line.replace(" ", "-")
-	filename = filename.replace("!", "")
-	filename = filename.replace("°c", "degrees celcius")
-	filename = filename.replace(",", "")+".wav"
-
-	return filename
\ No newline at end of file
+    return torch.as_tensor(tokenizer(cleaner(text)), dtype=torch.int, device='cpu').unsqueeze(0)
\ No newline at end of file

From c5dd8ef6bdefa60c1495182bf0dae5b2f3d2f8f6 Mon Sep 17 00:00:00 2001
From: eternalliving <eternalliving@gmail.com>
Date: Sun, 20 Mar 2022 07:44:36 -0700
Subject: [PATCH 4/6] fixed readme error

Former-commit-id: 086e4011c089d079ae174a954a86f1a6d6737ac3
---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 714d2bb..fd71608 100644
--- a/README.md
+++ b/README.md
@@ -12,9 +12,9 @@ python3 engine-remote.py
 ```
 
 Default port is 8124
-Be sure to update settings.env variable in your main GLados-voice-assistant directory:
+Be sure to update settings.env variable in your main Glados-voice-assistant directory:
 ```
-TTS_ENGINE_URL			= http://192.168.1.3:8124/synthesize/
+TTS_ENGINE_API			= http://192.168.1.3:8124/synthesize/
 ```
 
 

From 0aa6b5565297de9ab118f0b319909adbd611d2a5 Mon Sep 17 00:00:00 2001
From: eternalliving <eternalliving@gmail.com>
Date: Sun, 20 Mar 2022 08:22:14 -0700
Subject: [PATCH 5/6] Fixed model locations

Former-commit-id: db2b8ebbcf0149fa4d9f9ade83f6dc8fd6844a51
---
 engine.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/engine.py b/engine.py
index 87c5a23..c222a85 100644
--- a/engine.py
+++ b/engine.py
@@ -1,11 +1,11 @@
+import sys
+import os
+sys.path.insert(0, os.getcwd()+'/glados_tts')
+
 import torch
 from utils.tools import prepare_text
 from scipy.io.wavfile import write
 import time
-import sys
-import os
-
-sys.path.insert(0, os.getcwd()+'/glados_tts')
 		
 print("\033[1;94mINFO:\033[;97m Initializing TTS Engine...")
 
@@ -18,8 +18,12 @@
 	device = 'cpu'
 
 # Load models
-glados = torch.jit.load('models/glados.pt')
-vocoder = torch.jit.load('models/vocoder-gpu.pt', map_location=device)
+if __name__ == "__main__":
+	glados = torch.jit.load('models/glados.pt')
+	vocoder = torch.jit.load('models/vocoder-gpu.pt', map_location=device)
+else:
+	glados = torch.jit.load('glados_tts/models/glados.pt')
+	vocoder = torch.jit.load('glados_tts/models/vocoder-gpu.pt', map_location=device)
 
 # Prepare models in RAM
 for i in range(4):

From 7f22a9c4c736ca28e4b285195942580d8870519b Mon Sep 17 00:00:00 2001
From: eternalliving <eternalliving@gmail.com>
Date: Sun, 20 Mar 2022 09:03:04 -0700
Subject: [PATCH 6/6] Fixed cache arrangement and all my other errors

Former-commit-id: 285a9cf34a5f892c87e19c472ceef4c8d0bd5fcd
---
 engine.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/engine.py b/engine.py
index c222a85..1680f6c 100644
--- a/engine.py
+++ b/engine.py
@@ -73,6 +73,7 @@ def glados_tts(text, key=False):
 
 	from flask import Flask, request, send_file
 	import urllib.parse
+	import shutil
 	
 	print("\033[1;94mINFO:\033[;97m Initializing TTS Server...")
 	
@@ -95,21 +96,23 @@ def synthesize(text):
 		
 			# Update access time. This will allow for routine cleanups
 			os.utime(file, None)
-			
+			print("\033[1;94mINFO:\033[;97m The audio sample sent from cache.")
 			return send_file(file)
 			
 		# Generate New Sample
 		key = str(time.time())[7:]
 		if(glados_tts(line, key)):
 			tempfile = os.getcwd()+'/audio/GLaDOS-tts-temp-output-'+key+'.wav'
-			return send_file(tempfile)
-			
+						
 			# If the line isn't too long, store in cache
 			if(len(line) < 200 and CACHE):
-				shutil.move(tempfile, filename)
+				shutil.move(tempfile, file)
 			else:
+				return send_file(tempfile)
 				os.remove(tempfile)
 				
+			return send_file(file)
+				
 		else:
 			return 'TTS Engine Failed'