diff --git a/.github/workflows/Nightly_CI_main.yaml b/.github/workflows/Nightly_CI_main.yaml index ab37f91f..c3465321 100644 --- a/.github/workflows/Nightly_CI_main.yaml +++ b/.github/workflows/Nightly_CI_main.yaml @@ -68,4 +68,6 @@ jobs: shell: bash working-directory: tests run: | - IS_CI=true python3 -m pytest ./ --tags L2 + export IS_CI=true + export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python + python3 -m pytest ./ --tags L2 diff --git a/.github/workflows/unit_test_main.yaml b/.github/workflows/unit_test_main.yaml index 648810c2..9fc6cbc0 100644 --- a/.github/workflows/unit_test_main.yaml +++ b/.github/workflows/unit_test_main.yaml @@ -73,7 +73,9 @@ jobs: shell: bash working-directory: tests run: | - IS_CI=true python3 -m pytest ./ + export IS_CI=true + export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python + python3 -m pytest ./ - name: Generate coverage report run: | diff --git a/gptcache/adapter/api.py b/gptcache/adapter/api.py index 6174066d..e2229edb 100644 --- a/gptcache/adapter/api.py +++ b/gptcache/adapter/api.py @@ -5,15 +5,30 @@ import gptcache.processor.pre from gptcache import Cache, cache, Config from gptcache.adapter.adapter import adapt -from gptcache.embedding import Onnx, Huggingface, SBERT, FastText, Data2VecAudio, Timm, ViT, OpenAI, Cohere, Rwkv +from gptcache.embedding import ( + Onnx, + Huggingface, + SBERT, + FastText, + Data2VecAudio, + Timm, + ViT, + OpenAI, + Cohere, + Rwkv, + PaddleNLP, +) from gptcache.embedding.base import BaseEmbedding from gptcache.manager import manager_factory from gptcache.manager.data_manager import DataManager from gptcache.processor.post import first from gptcache.processor.pre import get_prompt from gptcache.similarity_evaluation import ( - SearchDistanceEvaluation, NumpyNormEvaluation, OnnxModelEvaluation, - ExactMatchEvaluation, KReciprocalEvaluation + SearchDistanceEvaluation, + NumpyNormEvaluation, + OnnxModelEvaluation, + ExactMatchEvaluation, + KReciprocalEvaluation, ) from gptcache.utils import import_ruamel @@ -145,7 +160,9 @@ def init_similar_cache( embedding = Onnx() if not data_manager: data_manager = manager_factory( - "sqlite,faiss", data_dir=data_dir, vector_params={"dimension": embedding.dimension} + "sqlite,faiss", + data_dir=data_dir, + vector_params={"dimension": embedding.dimension}, ) evaluation = SearchDistanceEvaluation() cache_obj = cache_obj if cache_obj else cache @@ -207,7 +224,7 @@ def init_similar_cache_from_config(config_dir: str, cache_obj: Optional[Cache] = ) -def _get_model(model_src, model_config = None): +def _get_model(model_src, model_config=None): model_src = model_src.lower() model_config = model_config or {} @@ -231,9 +248,11 @@ def _get_model(model_src, model_config = None): return Cohere(**model_config) if model_src == "rwkv": return Rwkv(**model_config) + if model_src == "paddlenlp": + return PaddleNLP(**model_config) -def _get_eval(strategy, kws = None): +def _get_eval(strategy, kws=None): strategy = strategy.lower() kws = kws or {} diff --git a/gptcache/adapter/stability_sdk.py b/gptcache/adapter/stability_sdk.py index 5e1b7895..fd0e749b 100644 --- a/gptcache/adapter/stability_sdk.py +++ b/gptcache/adapter/stability_sdk.py @@ -1,15 +1,15 @@ -from io import BytesIO import base64 import warnings from dataclasses import dataclass +from io import BytesIO from typing import List from gptcache.adapter.adapter import adapt from gptcache.manager.scalar_data.base import Answer, DataType -from gptcache.utils.error import CacheError from gptcache.utils import ( import_stability, import_pillow - ) +) +from gptcache.utils.error import CacheError import_pillow() import_stability() @@ -19,7 +19,6 @@ import stability_sdk.interfaces.gooseai.generation.generation_pb2 as generation # pylint: disable=C0413 - class StabilityInference(client.StabilityInference): """client.StabilityInference Wrapper diff --git a/gptcache/embedding/paddlenlp.py b/gptcache/embedding/paddlenlp.py index 61f2c997..1b00571b 100644 --- a/gptcache/embedding/paddlenlp.py +++ b/gptcache/embedding/paddlenlp.py @@ -1,14 +1,14 @@ import numpy as np -from gptcache.utils import import_paddlenlp,import_paddle from gptcache.embedding.base import BaseEmbedding +from gptcache.utils import import_paddlenlp, import_paddle import_paddle() import_paddlenlp() -import paddle # pylint: disable=C0413 -from paddlenlp.transformers import AutoModel,AutoTokenizer # pylint: disable=C0413 +import paddle # pylint: disable=C0413 +from paddlenlp.transformers import AutoModel, AutoTokenizer # pylint: disable=C0413 class PaddleNLP(BaseEmbedding): """Generate sentence embedding for given text using pretrained models from PaddleNLP transformers. @@ -35,7 +35,6 @@ def __init__(self, model: str = "ernie-3.0-medium-zh"): self.tokenizer.pad_token = "" self.__dimension = None - def to_embeddings(self, data, **_): """Generate embedding given text input @@ -63,7 +62,6 @@ def post_proc(self, token_embeddings, inputs): ) / paddle.clip(input_mask_expanded.sum(1), min=1e-9) return sentence_embs - @property def dimension(self): """Embedding dimension. diff --git a/gptcache/utils/__init__.py b/gptcache/utils/__init__.py index e3c5d931..92ad5678 100644 --- a/gptcache/utils/__init__.py +++ b/gptcache/utils/__init__.py @@ -214,7 +214,8 @@ def import_docarray(): def import_paddle(): - _check_library("paddlepaddle", package="paddlepaddle==2.4.0") + prompt_install("protobuf==3.20.0") + _check_library("paddlepaddle") def import_paddlenlp(): diff --git a/tests/requirements.txt b/tests/requirements.txt index c84c4420..7983da14 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -20,3 +20,4 @@ mock pexpect spacy safetensors +protobuf==3.20.0