Review the paddlenlp code

Signed-off-by: SimFG <bang.fu@zilliz.com>
zilliztech · May 22, 2023 · 1825e90 · 1825e90
1 parent dff1c77
commit 1825e90
Show file tree

Hide file tree

Showing 7 changed files with 40 additions and 18 deletions.
diff --git a/.github/workflows/Nightly_CI_main.yaml b/.github/workflows/Nightly_CI_main.yaml
@@ -68,4 +68,6 @@ jobs:
         shell: bash
         working-directory: tests
         run: |
-          IS_CI=true python3 -m pytest ./ --tags L2
+          export IS_CI=true
+          export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
+          python3 -m pytest ./ --tags L2
diff --git a/.github/workflows/unit_test_main.yaml b/.github/workflows/unit_test_main.yaml
@@ -73,7 +73,9 @@ jobs:
         shell: bash
         working-directory: tests
         run: |
-          IS_CI=true python3 -m pytest ./
+          export IS_CI=true
+          export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
+          python3 -m pytest ./
 
       - name: Generate coverage report
         run: |

diff --git a/gptcache/adapter/api.py b/gptcache/adapter/api.py
@@ -5,15 +5,30 @@
 import gptcache.processor.pre
 from gptcache import Cache, cache, Config
 from gptcache.adapter.adapter import adapt
-from gptcache.embedding import Onnx, Huggingface, SBERT, FastText, Data2VecAudio, Timm, ViT, OpenAI, Cohere, Rwkv
+from gptcache.embedding import (
+    Onnx,
+    Huggingface,
+    SBERT,
+    FastText,
+    Data2VecAudio,
+    Timm,
+    ViT,
+    OpenAI,
+    Cohere,
+    Rwkv,
+    PaddleNLP,
+)
 from gptcache.embedding.base import BaseEmbedding
 from gptcache.manager import manager_factory
 from gptcache.manager.data_manager import DataManager
 from gptcache.processor.post import first
 from gptcache.processor.pre import get_prompt
 from gptcache.similarity_evaluation import (
-    SearchDistanceEvaluation, NumpyNormEvaluation, OnnxModelEvaluation,
-    ExactMatchEvaluation, KReciprocalEvaluation
+    SearchDistanceEvaluation,
+    NumpyNormEvaluation,
+    OnnxModelEvaluation,
+    ExactMatchEvaluation,
+    KReciprocalEvaluation,
 )
 from gptcache.utils import import_ruamel
 
@@ -145,7 +160,9 @@ def init_similar_cache(
         embedding = Onnx()
     if not data_manager:
         data_manager = manager_factory(
-            "sqlite,faiss", data_dir=data_dir, vector_params={"dimension": embedding.dimension}
+            "sqlite,faiss",
+            data_dir=data_dir,
+            vector_params={"dimension": embedding.dimension},
         )
     evaluation = SearchDistanceEvaluation()
     cache_obj = cache_obj if cache_obj else cache
@@ -207,7 +224,7 @@ def init_similar_cache_from_config(config_dir: str, cache_obj: Optional[Cache] =
     )
 
 
-def _get_model(model_src, model_config = None):
+def _get_model(model_src, model_config=None):
     model_src = model_src.lower()
     model_config = model_config or {}
 
@@ -231,9 +248,11 @@ def _get_model(model_src, model_config = None):
         return Cohere(**model_config)
     if model_src == "rwkv":
         return Rwkv(**model_config)
+    if model_src == "paddlenlp":
+        return PaddleNLP(**model_config)
 
 
-def _get_eval(strategy, kws = None):
+def _get_eval(strategy, kws=None):
     strategy = strategy.lower()
     kws = kws or {}
 

diff --git a/gptcache/adapter/stability_sdk.py b/gptcache/adapter/stability_sdk.py
@@ -1,15 +1,15 @@
-from io import BytesIO
 import base64
 import warnings
 from dataclasses import dataclass
+from io import BytesIO
 from typing import List
 
 from gptcache.adapter.adapter import adapt
 from gptcache.manager.scalar_data.base import Answer, DataType
-from gptcache.utils.error import CacheError
 from gptcache.utils import (
     import_stability, import_pillow
-    )
+)
+from gptcache.utils.error import CacheError
 
 import_pillow()
 import_stability()
@@ -19,7 +19,6 @@
 import stability_sdk.interfaces.gooseai.generation.generation_pb2 as generation  # pylint: disable=C0413
 
 
-
 class StabilityInference(client.StabilityInference):
     """client.StabilityInference Wrapper
 

diff --git a/gptcache/embedding/paddlenlp.py b/gptcache/embedding/paddlenlp.py
@@ -1,14 +1,14 @@
 import numpy as np
 
-from gptcache.utils import import_paddlenlp,import_paddle
 from gptcache.embedding.base import BaseEmbedding
+from gptcache.utils import import_paddlenlp, import_paddle
 
 import_paddle()
 import_paddlenlp()
 
 
-import paddle # pylint: disable=C0413
-from paddlenlp.transformers import AutoModel,AutoTokenizer # pylint: disable=C0413
+import paddle  # pylint: disable=C0413
+from paddlenlp.transformers import AutoModel, AutoTokenizer  # pylint: disable=C0413
 
 class PaddleNLP(BaseEmbedding):
     """Generate sentence embedding for given text using pretrained models from PaddleNLP transformers.
@@ -35,7 +35,6 @@ def __init__(self, model: str = "ernie-3.0-medium-zh"):
             self.tokenizer.pad_token = "<pad>"
         self.__dimension = None
 
-
     def to_embeddings(self, data, **_):
         """Generate embedding given text input
 
@@ -63,7 +62,6 @@ def post_proc(self, token_embeddings, inputs):
         ) / paddle.clip(input_mask_expanded.sum(1), min=1e-9)
         return sentence_embs
 
-
     @property
     def dimension(self):
         """Embedding dimension.

diff --git a/gptcache/utils/__init__.py b/gptcache/utils/__init__.py
@@ -214,7 +214,8 @@ def import_docarray():
 
 
 def import_paddle():
-    _check_library("paddlepaddle", package="paddlepaddle==2.4.0")
+    prompt_install("protobuf==3.20.0")
+    _check_library("paddlepaddle")
 
 
 def import_paddlenlp():

diff --git a/tests/requirements.txt b/tests/requirements.txt
@@ -20,3 +20,4 @@ mock
 pexpect
 spacy
 safetensors
+protobuf==3.20.0
-Original file line number
+Diff line change
@@ Expand Up / @@ -20,3 +20,4 @@ mock @@
     pexpect
     spacy
     safetensors
+    protobuf==3.20.0