Skip to content

Commit

Permalink
support config tags to translate (yihong0618#107)
Browse files Browse the repository at this point in the history
  • Loading branch information
hleft authored Mar 8, 2023
1 parent 3472f3e commit b25c4ca
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 2 deletions.
8 changes: 8 additions & 0 deletions book_maker/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,13 @@ def main():
type=str,
help="specify base url other than the OpenAI's official API address",
)
parser.add_argument(
"--translate-tags",
dest="translate_tags",
type=str,
default="p",
help="example --translate-tags p,blockquote",
)

options = parser.parse_args()
PROXY = options.proxy
Expand Down Expand Up @@ -121,6 +128,7 @@ def main():
model_api_base=model_api_base,
is_test=options.test,
test_num=options.test_num,
translate_tags=options.translate_tags,
)
e.make_bilingual_book()

Expand Down
7 changes: 5 additions & 2 deletions book_maker/loader/epub_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,14 @@ def __init__(
model_api_base=None,
is_test=False,
test_num=5,
translate_tags="p",
):
self.epub_name = epub_name
self.new_epub = epub.EpubBook()
self.translate_model = model(key, language, model_api_base)
self.is_test = is_test
self.test_num = test_num
self.translate_tags = translate_tags

try:
self.origin_book = epub.read_epub(self.epub_name)
Expand Down Expand Up @@ -68,10 +70,11 @@ def _make_new_book(self, book):
def make_bilingual_book(self):
new_book = self._make_new_book(self.origin_book)
all_items = list(self.origin_book.get_items())
trans_taglist = self.translate_tags.split(",")
all_p_length = sum(
0
if i.get_type() != ITEM_DOCUMENT
else len(bs(i.content, "html.parser").findAll("p"))
else len(bs(i.content, "html.parser").findAll(trans_taglist))
for i in all_items
)
pbar = tqdm(total=self.test_num) if self.is_test else tqdm(total=all_p_length)
Expand All @@ -81,7 +84,7 @@ def make_bilingual_book(self):
for item in self.origin_book.get_items():
if item.get_type() == ITEM_DOCUMENT:
soup = bs(item.content, "html.parser")
p_list = soup.findAll("p")
p_list = soup.findAll(trans_taglist)
is_test_done = self.is_test and index > self.test_num
for p in p_list:
if is_test_done or not p.text or self._is_special_text(p.text):
Expand Down
Binary file added test_books/Liber_Esther.epub
Binary file not shown.

0 comments on commit b25c4ca

Please sign in to comment.