mindspore-lab · lvyufeng · Dec 17, 2023 · Dec 17, 2023 · Dec 17, 2023
diff --git a/.github/workflows/ci_pipeline.yaml b/.github/workflows/ci_pipeline.yaml
@@ -53,7 +53,7 @@ jobs:
     needs: pylint-check
     strategy:
       matrix:
-        os: [ubuntu-latest, macos-latest]
+        os: [ubuntu-latest, macos-latest, windows-latest]
         python: [3.8, 3.9]
     runs-on: ${{ matrix.os }}
     steps:

diff --git a/README.md b/README.md
@@ -102,6 +102,8 @@ The table below represents the current support in the library for each of those
 | BLIP2                         | TODO              | ✅             |
 | BLOOM                         | ✅                | ❌             |
 | ChatGLM                       | ✅                | ❌             |
+| ChatGLM2                      | ✅                | ❌             |
+| ChatGLM3                      | ✅                | ❌             |
 | CLIP                          | ✅                | ❌             |
 | CodeGen                       | ✅                | ❌             |
 | ConvBERT                      | TODO              | ❌             |

diff --git a/llm/inference/chatglm/cli_demo.py b/llm/inference/chatglm/cli_demo.py
@@ -4,9 +4,9 @@
 from mindnlp.transformers import AutoModelForSeq2SeqLM, AutoTokenizer
 
 
-model = AutoModelForSeq2SeqLM.from_pretrained("THUDM/chatglm-6b", from_pt=True).half()
+model = AutoModelForSeq2SeqLM.from_pretrained("THUDM/chatglm-6b").half()
 model.set_train(False)
-tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", from_pt=True)
+tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b")
 
 os_name = platform.system()
 clear_command = 'cls' if os_name == 'Windows' else 'clear'

diff --git a/llm/inference/chatglm/web_demo.py b/llm/inference/chatglm/web_demo.py
@@ -2,9 +2,9 @@
 import gradio as gr
 import mdtex2html
 
-model = AutoModelForSeq2SeqLM.from_pretrained("THUDM/chatglm-6b", from_pt=True).half()
+model = AutoModelForSeq2SeqLM.from_pretrained("THUDM/chatglm-6b").half()
 model.set_train(False)
-tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", from_pt=True)
+tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b")
 
 """Override Chatbot.postprocess"""
 

diff --git a/mindnlp/modules/__init__.py b/mindnlp/modules/__init__.py
@@ -20,7 +20,7 @@
 from mindnlp.modules import encoder, decoder, embeddings, loss, attentions, crf, rnns, \
     accumulator
 from mindnlp.modules.attentions import ScaledDotAttention, SelfAttention, \
-    BinaryAttention, AdditiveAttention, CosineAttention, LocationAwareAttention, \
+    BinaryAttention, AdditiveAttention, CosineAttention, \
     LinearAttention
 from mindnlp.modules.encoder import RNNEncoder, CNNEncoder
 from mindnlp.modules.decoder import RNNDecoder

diff --git a/mindnlp/modules/attentions.py b/mindnlp/modules/attentions.py
@@ -531,6 +531,5 @@ def construct(self, query, value, last_attn=None):
     "BinaryAttention",
     "AdditiveAttention",
     "CosineAttention",
-    "LocationAwareAttention",
     "LinearAttention"
 ]
diff --git a/mindnlp/transformers/modeling_utils.py b/mindnlp/transformers/modeling_utils.py
@@ -784,7 +784,7 @@ def from_pretrained(
                     }
                     # try safetensors
                     resolved_archive_file = cached_file(pretrained_model_name_or_path, filename, **cached_file_kwargs)
-                    if resolved_archive_file is None:
+                    if resolved_archive_file is None and from_pt:
                         filename = _add_variant(PT_WEIGHTS_NAME, variant)
                         resolved_archive_file = cached_file(pretrained_model_name_or_path, filename, **cached_file_kwargs)
 

diff --git a/mindnlp/transformers/models/mistral/modeling_mistral.py b/mindnlp/transformers/models/mistral/modeling_mistral.py
@@ -115,8 +115,7 @@ def construct(self, x, seq_len=None):
 # Copied from transformers.models.llama.modeling_llama.rotate_half
 def rotate_half(x):
     """Rotates half the hidden dims of the input."""
-    x1 = x[..., : x.shape[-1] // 2]
-    x2 = x[..., x.shape[-1] // 2 :]
+    x1, x2 = x.chunk(2, -1)
     return ops.cat((-x2, x1), axis=-1)
 
 

diff --git a/mindnlp/utils/download.py b/mindnlp/utils/download.py
@@ -484,6 +484,7 @@ def download(
     url = build_download_url(repo_id, filename, repo_type=repo_type, endpoint=endpoint)
     # check model whether exist
     model_url = url[: url.rfind('/')].replace('resolve/main', '')
+
     req = requests.get(model_url, timeout=3, proxies=proxies)
     status = req.status_code
     if status == 404:
@@ -492,6 +493,7 @@ def download(
     pointer_path = http_get(url, storage_folder, download_file_name=relative_filename, proxies=proxies)
     return pointer_path
 
+# https://modelscope.cn/api/v1/models/mindnlp/THUDM_chatglm-6b/repo?Revision=master&FilePath=mindspore-00001-of-00008.ckpt
 
 def match_file(filename: str, cache_dir: str) -> str:
     r"""
@@ -743,4 +745,5 @@ def build_download_url(
 ) -> str:
     """Construct the URL of a file from the given information.
     """
+    repo_id = repo_id.replace('/', '_')
     return endpoint.format(repo_id, filename)
diff --git a/tests/ut/modules/attentions/test_location_aware_attention.py b/tests/ut/modules/attentions/test_location_aware_attention.py
diff --git a/tests/ut/transformers/models/chatglm/test_modeling_chatglm.py b/tests/ut/transformers/models/chatglm/test_modeling_chatglm.py
@@ -40,16 +40,16 @@ def ids_tensor(shape, vocab_size):
 
 
 def get_model_and_tokenizer():
-    model = AutoModelForSeq2SeqLM.from_pretrained("THUDM/chatglm-6b", from_pt=True).half()
+    model = AutoModelForSeq2SeqLM.from_pretrained("THUDM/chatglm-6b").half()
     model.set_train(False)
-    tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", from_pt=True)
+    tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b")
     return model, tokenizer
 
 def get_model_and_tokenizer_random_init():
-    config = AutoConfig.from_pretrained("THUDM/chatglm-6b", from_pt=True)
+    config = AutoConfig.from_pretrained("THUDM/chatglm-6b")
     model = AutoModelForSeq2SeqLM.from_config(config).half()
     model.set_train(False)
-    tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", from_pt=True)
+    tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b")
     return model, tokenizer
 
 @require_mindspore

diff --git a/tests/ut/transformers/models/chatglm/test_modeling_graph_chatglm.py b/tests/ut/transformers/models/chatglm/test_modeling_graph_chatglm.py
@@ -43,9 +43,9 @@ def ids_tensor(shape, vocab_size):
 
 
 def get_model_and_tokenizer():
-    model = MSChatGLMForConditionalGeneration.from_pretrained("THUDM/chatglm-6b", from_pt=True).half()
+    model = MSChatGLMForConditionalGeneration.from_pretrained("THUDM/chatglm-6b").half()
     model.set_train(False)
-    tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", from_pt=True)
+    tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b")
     return model, tokenizer
 
 @require_mindspore

diff --git a/tests/ut/transformers/models/mistral/test_modeling_mistral.py b/tests/ut/transformers/models/mistral/test_modeling_mistral.py
@@ -45,7 +45,6 @@
         MistralModel,
     )
 
-
 class MistralModelTester:
     def __init__(
         self,
-Original file line number
+Diff line change
@@ Expand Up / @@ -45,7 +45,6 @@ @@
             MistralModel,
         )
     class MistralModelTester:
         def __init__(
             self,
@@ Expand Down @@