-
Notifications
You must be signed in to change notification settings - Fork 2
/
main.py
114 lines (96 loc) · 3.78 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import os
import shutil
import subprocess
import json
import tempfile
from audiosr import build_model, super_resolution, save_wave
def get_sample_rate(file_path):
command = [
'ffprobe',
'-v', 'error',
'-select_streams', 'a:0',
'-show_entries', 'stream=sample_rate',
'-of', 'json',
file_path
]
result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
output = json.loads(result.stdout)
return int(output['streams'][0]['sample_rate'])
def get_duration(file_location):
command = [
'ffprobe',
'-v', 'error',
'-select_streams', 'a:0',
'-show_entries', 'format=duration',
'-sexagesimal',
'-of', 'json',
file_location
]
result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
output = json.loads(result.stdout)
return output['format']['duration']
def remove_silence(duration, input_path, output_path):
command = [
'ffmpeg',
'-ss', '00:00:00',
'-i', input_path,
'-t', duration,
'-c', 'copy',
output_path
]
subprocess.run(command)
os.remove(input_path)
tmp_dir = tempfile.gettempdir()
datasets_path = './datasets/'
speakers = [d for d in os.listdir(datasets_path) if os.path.isdir(os.path.join(datasets_path, d))]
audiosr = build_model(model_name='speech', device="auto")
for speaker in speakers:
folder_path = os.path.join(datasets_path, speaker)
source_wavs_folder = os.path.join(folder_path, 'source_wavs')
if not os.path.exists(source_wavs_folder):
os.makedirs(source_wavs_folder)
files = os.listdir(folder_path)
for file in files:
input_path = os.path.join(folder_path, file)
base_name, _ = os.path.splitext(file)
wav_file_name = f"{base_name}.wav"
wav_path = os.path.join(folder_path, wav_file_name)
if file.endswith('.mp3'):
try:
sample_rate = get_sample_rate(input_path)
subprocess.run(["ffmpeg", "-i", input_path, "-ar", str(sample_rate), "-ac", "1", wav_path], check=True)
os.remove(input_path)
except subprocess.CalledProcessError as e:
print(f"Error occurred while converting {input_path} to WAV: {e}")
except Exception as e:
print(f"An error occurred: {e}")
if file.endswith('.wav') or file.endswith('.mp3'):
shutil.move(wav_path, os.path.join(source_wavs_folder, wav_file_name))
for speaker in speakers:
folder_path = os.path.join(datasets_path, speaker)
wavs_folder = os.path.join(folder_path, 'wavs')
source_wavs_folder = os.path.join(folder_path, 'source_wavs')
if not os.path.exists(wavs_folder):
os.makedirs(wavs_folder)
source_wavs_files = os.listdir(source_wavs_folder)
for file in source_wavs_files:
if file.endswith('.wav'):
input_path = os.path.join(source_wavs_folder, file)
input_filename = os.path.basename(input_path)
duration = get_duration(input_path)
save_path = os.path.join(wavs_folder)
try:
waveform = super_resolution(
audiosr,
input_path,
seed=42,
guidance_scale=3.5,
ddim_steps=50,
latent_t_per_second=12.8
)
base_name, _ = os.path.splitext(file)
tmp_file_path = os.path.join(tmp_dir, f"{base_name}.wav")
save_wave(waveform, tmp_dir, name=base_name, samplerate=48000)
remove_silence(duration, tmp_file_path, os.path.join(save_path, input_filename))
except Exception as e:
print(f"An error occurred: {e}")