chatglm use modelscope url to download

mindspore-lab · Dec 17, 2023 · 6025953 · 6025953
1 parent 166babf
commit 6025953
Show file tree

Hide file tree

Showing 9 changed files with 15 additions and 74 deletions.
diff --git a/llm/inference/chatglm/cli_demo.py b/llm/inference/chatglm/cli_demo.py
@@ -4,9 +4,9 @@
 from mindnlp.transformers import AutoModelForSeq2SeqLM, AutoTokenizer
 
 
-model = AutoModelForSeq2SeqLM.from_pretrained("THUDM/chatglm-6b", from_pt=True).half()
+model = AutoModelForSeq2SeqLM.from_pretrained("THUDM/chatglm-6b").half()
 model.set_train(False)
-tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", from_pt=True)
+tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b")
 
 os_name = platform.system()
 clear_command = 'cls' if os_name == 'Windows' else 'clear'

diff --git a/llm/inference/chatglm/web_demo.py b/llm/inference/chatglm/web_demo.py
@@ -2,9 +2,9 @@
 import gradio as gr
 import mdtex2html
 
-model = AutoModelForSeq2SeqLM.from_pretrained("THUDM/chatglm-6b", from_pt=True).half()
+model = AutoModelForSeq2SeqLM.from_pretrained("THUDM/chatglm-6b").half()
 model.set_train(False)
-tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", from_pt=True)
+tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b")
 
 """Override Chatbot.postprocess"""
 

diff --git a/mindnlp/modules/__init__.py b/mindnlp/modules/__init__.py
@@ -20,7 +20,7 @@
 from mindnlp.modules import encoder, decoder, embeddings, loss, attentions, crf, rnns, \
     accumulator
 from mindnlp.modules.attentions import ScaledDotAttention, SelfAttention, \
-    BinaryAttention, AdditiveAttention, CosineAttention, LocationAwareAttention, \
+    BinaryAttention, AdditiveAttention, CosineAttention, \
     LinearAttention
 from mindnlp.modules.encoder import RNNEncoder, CNNEncoder
 from mindnlp.modules.decoder import RNNDecoder

diff --git a/mindnlp/modules/attentions.py b/mindnlp/modules/attentions.py
@@ -531,6 +531,5 @@ def construct(self, query, value, last_attn=None):
     "BinaryAttention",
     "AdditiveAttention",
     "CosineAttention",
-    "LocationAwareAttention",
     "LinearAttention"
 ]
diff --git a/mindnlp/transformers/modeling_utils.py b/mindnlp/transformers/modeling_utils.py
@@ -784,7 +784,7 @@ def from_pretrained(
                     }
                     # try safetensors
                     resolved_archive_file = cached_file(pretrained_model_name_or_path, filename, **cached_file_kwargs)
-                    if resolved_archive_file is None:
+                    if resolved_archive_file is None and from_pt:
                         filename = _add_variant(PT_WEIGHTS_NAME, variant)
                         resolved_archive_file = cached_file(pretrained_model_name_or_path, filename, **cached_file_kwargs)
 

diff --git a/mindnlp/utils/download.py b/mindnlp/utils/download.py
@@ -484,6 +484,7 @@ def download(
     url = build_download_url(repo_id, filename, repo_type=repo_type, endpoint=endpoint)
     # check model whether exist
     model_url = url[: url.rfind('/')].replace('resolve/main', '')
+
     req = requests.get(model_url, timeout=3, proxies=proxies)
     status = req.status_code
     if status == 404:
@@ -492,6 +493,7 @@ def download(
     pointer_path = http_get(url, storage_folder, download_file_name=relative_filename, proxies=proxies)
     return pointer_path
 
+# https://modelscope.cn/api/v1/models/mindnlp/THUDM_chatglm-6b/repo?Revision=master&FilePath=mindspore-00001-of-00008.ckpt
 
 def match_file(filename: str, cache_dir: str) -> str:
     r"""
@@ -743,4 +745,5 @@ def build_download_url(
 ) -> str:
     """Construct the URL of a file from the given information.
     """
+    repo_id = repo_id.replace('/', '_')
     return endpoint.format(repo_id, filename)
diff --git a/tests/ut/modules/attentions/test_location_aware_attention.py b/tests/ut/modules/attentions/test_location_aware_attention.py
diff --git a/tests/ut/transformers/models/chatglm/test_modeling_chatglm.py b/tests/ut/transformers/models/chatglm/test_modeling_chatglm.py
@@ -40,16 +40,16 @@ def ids_tensor(shape, vocab_size):
 
 
 def get_model_and_tokenizer():
-    model = AutoModelForSeq2SeqLM.from_pretrained("THUDM/chatglm-6b", from_pt=True).half()
+    model = AutoModelForSeq2SeqLM.from_pretrained("THUDM/chatglm-6b").half()
     model.set_train(False)
-    tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", from_pt=True)
+    tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b")
     return model, tokenizer
 
 def get_model_and_tokenizer_random_init():
-    config = AutoConfig.from_pretrained("THUDM/chatglm-6b", from_pt=True)
+    config = AutoConfig.from_pretrained("THUDM/chatglm-6b")
     model = AutoModelForSeq2SeqLM.from_config(config).half()
     model.set_train(False)
-    tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", from_pt=True)
+    tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b")
     return model, tokenizer
 
 @require_mindspore

diff --git a/tests/ut/transformers/models/chatglm/test_modeling_graph_chatglm.py b/tests/ut/transformers/models/chatglm/test_modeling_graph_chatglm.py
@@ -43,9 +43,9 @@ def ids_tensor(shape, vocab_size):
 
 
 def get_model_and_tokenizer():
-    model = MSChatGLMForConditionalGeneration.from_pretrained("THUDM/chatglm-6b", from_pt=True).half()
+    model = MSChatGLMForConditionalGeneration.from_pretrained("THUDM/chatglm-6b").half()
     model.set_train(False)
-    tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", from_pt=True)
+    tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b")
     return model, tokenizer
 
 @require_mindspore