-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathparser.py
239 lines (233 loc) · 8 KB
/
parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
# Author: T. Junttila
# License: The MIT License
import re
class AST:
def __init__(self):
pass
def to_ssml(self, neural):
assert False
def to_words(self):
assert False
def to_sub(self):
assert False
class ASTWord(AST):
def __init__(self, text):
self.text = text
def to_ssml(self, neural):
return self.text
def to_words(self):
return [self.text]
def to_sub(self):
return self.text
class ASTBreak(AST):
def __init__(self, time):
self.time = time
def to_ssml(self, neural):
return '<break time="'+str(self.time*100)+'ms" />'
def to_words(self):
return []
def to_sub(self):
return ''
class ASTDelim(AST):
def __init__(self, text):
self.text = text
def to_ssml(self, neural):
return self.text
def to_words(self):
return []
def to_sub(self):
return self.text
class ASTSpace(AST):
def __init__(self):
pass
def to_ssml(self, neural):
return ' '
def to_words(self):
return []
def to_sub(self):
return ' '
class ASTEmph(AST):
def __init__(self, children):
self.children = children
def to_ssml(self, neural):
children_ssml = "".join([child.to_ssml(neural) for child in self.children])
if neural:
return '<prosody rate="90%" volume="loud">'+children_ssml+'</prosody>'
else:
return '<prosody pitch="high" volume="loud">'+children_ssml+'</prosody>'
def to_words(self):
result = []
for child in self.children: result += child.to_words()
return result
def to_sub(self):
return "".join([child.to_sub() for child in self.children])
class ASTPhoneme(AST):
def __init__(self, text, xsampa):
self.text = text
self.xsampa = xsampa
def to_ssml(self, neural):
return f'<phoneme alphabet="x-sampa" ph="{self.xsampa}">{self.text}</phoneme>'
def to_words(self):
return re.split('\s+', self.text.strip())
def to_sub(self):
return self.text
class ASTSub(AST):
def __init__(self, children, subtitles):
self.children = children
self.subtitles = subtitles
def to_ssml(self, neural):
children_ssml = [child.to_ssml(neural) for child in self.children]
return "".join(children_ssml)
def to_words(self):
result = []
for child in self.children: result += child.to_words()
return result
def to_sub(self):
return self.subtitles
class ASTLow(AST):
def __init__(self, children):
self.children = children
def to_ssml(self, neural):
children_ssml = "".join([child.to_ssml(neural) for child in self.children])
if neural:
# prosody pitch not yet in neural TTS, make it slightly slower
return '<prosody rate="80%">'+children_ssml+'</prosody>'
else:
return '<prosody pitch="low">'+children_ssml+'</prosody>'
def to_words(self):
result = []
for child in self.children: result += child.to_words()
return result
def to_sub(self):
return "".join([child.to_sub() for child in self.children])
class ASTHigh(AST):
def __init__(self, children):
self.children = children
def to_ssml(self, neural):
children_ssml = "".join([child.to_ssml(neural) for child in self.children])
if neural:
# prosody pitch not yet in neural TTS, make it slightly faster
return '<prosody rate="120%">'+children_ssml+'</prosody>'
else:
return '<prosody pitch="high">'+children_ssml+'</prosody>'
def to_words(self):
result = []
for child in self.children: result += child.to_words()
return result
def to_sub(self):
return "".join([child.to_sub() for child in self.children])
class ASTSayAs(AST):
def __init__(self, letters):
self.letters = letters
def to_ssml(self, neural):
return '<say-as interpret-as="characters">'+self.letters+'</say-as>'
def to_words(self):
return re.split('\s+', self.letters.strip())
def to_sub(self):
return self.letters
def parse_(string):
i = 0
n = len(string)
def read_until(chars):
nonlocal i
tmp = i
while i < n and string[i] not in chars:
i += 1
return string[tmp:i]
def err(msg):
assert False, msg
result = []
while i < n:
if string[i] == '#':
if string[i:i+4] == '#sub':
m = re.match('^#sub(.)(?P<text>((?!\1).)*?)\\1(?P<sub>((?!\1).)+?)\\1', string[i:])
if m == None:
err(f'Malformed #sub "{string[i:]}"')
t = parse_(m['text'])
result.append(ASTSub(t, m['sub']))
i += len(m.group(0))
continue
if string[i:i+4] == '#low':
m = re.match('^#low(.)(?P<text>((?!\1).)+?)\\1', string[i:])
if m == None:
err(f'Malformed #low "{string[i:]}"')
t = parse_(m['text'])
result.append(ASTLow(t))
i += len(m.group(0))
continue
if string[i:i+5] == '#high':
m = re.match('^#high(.)(?P<text>((?!\1).)+?)\\1', string[i:])
if m == None:
err(f'Malformed #high "{string[i:]}"')
t = parse_(m['text'])
result.append(ASTHigh(t))
i += len(m.group(0))
continue
if string[i:i+3] == '#ph':
m = re.match('^#ph(.)(?P<text>((?!\1).)+?)\\1(?P<ph>((?!\1).)+?)\\1', string[i:])
if m == None:
err(f'Malformed #ph "{string[i:]}"')
result.append(ASTPhoneme(m['text'], m['ph']))
i += len(m.group(0))
continue
# Break #10
m = re.match('^#(?P<time>\d+)', string[i:])
if m != None:
result.append(ASTBreak(int(m['time'])))
i += len(m.group(0))
continue
err(f'Unrecognized command "{string[i:]}"')
elif string[i] == '*':
m = re.match('^\*(?P<text>[^\*]+)\*', string[i:])
if m == None:
err(f'Malformed emphasis "{string[i:]}"')
t = parse_(m['text'])
result.append(ASTEmph(t))
i += len(m.group(0))
elif string[i] == '@':
m = re.match('^@(?P<text>[^@]+)@', string[i:])
if m == None:
err(f'Malformed say-ass "{string[i:]}"')
result.append(ASTSayAs(m['text']))
i += len(m.group(0))
else:
m = re.match('^\s+', string[i:])
if m != None:
result.append(ASTSpace())
i += len(m.group(0))
continue
# Negative numbers are words
m = re.match('^-\d+', string[i:])
if m != None:
result.append(ASTWord(m.group(0)))
i += len(m.group(0))
continue
# Delimiters
m = re.match('^[-.,:;!?"]', string[i:])
if m != None:
result.append(ASTDelim(m.group(0)))
i += len(m.group(0))
continue
word = read_until([' ','\t','#','*','@','"','.',',',':',';','!','?'])
result.append(ASTWord(word))
return result
def parse(string, neural):
ast = parse_(string)
ssml = "".join([node.to_ssml(neural) for node in ast])
words = []
for node in ast:
words += node.to_words()
sub = "".join([node.to_sub() for node in ast])
return (ssml, words, sub)
#if __name__ == '__main__':
# ast = parse("#sub#*@NP@-complete problem*#NPC problem#, Fourier-Motzkin")
# print(ast)
# print(">"+"".join([node.to_ssml(True) for node in ast])+"<")
# words = []
# for node in ast:
# words += node.to_words()
# print(words)
#
# sub = "".join([node.to_sub() for node in ast])
# print(sub)
# #print(ast.to_sub())