From 73f35eed6671124d36bcab41f7aac88d207e2740 Mon Sep 17 00:00:00 2001 From: FLoutione <108987302+FLoutione@users.noreply.github.com> Date: Thu, 8 Jun 2023 09:49:53 +0800 Subject: [PATCH] updata support_CPM_generate (#567) --- mindnlp/transforms/tokenizers/cpm_tokenizer.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/mindnlp/transforms/tokenizers/cpm_tokenizer.py b/mindnlp/transforms/tokenizers/cpm_tokenizer.py index e7a5208c5..7d29162e5 100644 --- a/mindnlp/transforms/tokenizers/cpm_tokenizer.py +++ b/mindnlp/transforms/tokenizers/cpm_tokenizer.py @@ -108,3 +108,9 @@ def _convert_to_unicode(self, text_input): text_input = np.char.decode(text_input, "utf-8") return str(text_input) raise ValueError(f"Unsupported string type: {type(text_input)}, {text_input.dtype}") + + def _convert_token_to_id(self, token): + return self._tokenizer.token_to_id(token) + + def _convert_id_to_token(self, index): + return self._tokenizer.id_to_token(index)