Skip to content

Commit

Permalink
Support the selective context processor (#326)
Browse files Browse the repository at this point in the history
Signed-off-by: SimFG <bang.fu@zilliz.com>
  • Loading branch information
SimFG authored May 7, 2023
1 parent 4b871eb commit fbe301e
Show file tree
Hide file tree
Showing 9 changed files with 232 additions and 39 deletions.
6 changes: 6 additions & 0 deletions .github/workflows/unit_test_main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,12 @@ jobs:
run: |
pip install -r requirements.txt
- name: Download the `en_core_web_sm` model
shell: bash
working-directory: tests
run: |
python3 -m spacy download en_core_web_sm
- name: Unit Tests
timeout-minutes: 30
shell: bash
Expand Down
72 changes: 72 additions & 0 deletions examples/context_examples/selective_context.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import os
import time

from gptcache import cache
from gptcache.adapter import openai
from gptcache.embedding import Onnx
from gptcache.manager import manager_factory
from gptcache.processor.context.selective_context import SelectiveContextProcess
from gptcache.similarity_evaluation import SearchDistanceEvaluation
from gptcache.utils import import_selective_context

import_selective_context()


def response_text(openai_resp):
return openai_resp["choices"][0]["message"]["content"]


# Need to download the corresponding model before use
# `pip install spacy && python -m spacy download en_core_web_sm`


def cache_init():
context_processor = SelectiveContextProcess()
onnx = Onnx()
data_manager = manager_factory(
"sqlite,faiss", vector_params={"dimension": onnx.dimension}
)
evaluation = SearchDistanceEvaluation()
cache.init(
pre_embedding_func=context_processor.pre_process,
embedding_func=onnx.to_embeddings,
data_manager=data_manager,
similarity_evaluation=evaluation,
)
os.environ["OPENAI_API_KEY"] = "API KEY"
cache.set_openai_key()


def base_request():
cache_init()
for _ in range(2):
start_time = time.time()
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{
"role": "user",
"content": "Can you give me some tips for staying focused while working from home?",
},
{
"role": "system",
"content": "Sure! Here are some tips: create a designated workspace, set a schedule, take breaks, minimize distractions, and practice good time management.",
},
{
"role": "user",
"content": "Those are all great suggestions. Do you have any tips for maintaining a healthy work-life balance while working from home?",
},
{
"role": "system",
"content": "Definitely! Setting clear boundaries between work and personal time, scheduling regular breaks throughout the day, and finding ways to disconnect from work after hours can help. Additionally, make time for hobbies and other activities you enjoy outside of work to help you relax and recharge.",
},
{"role": "user", "content": "can you give meore tips?"},
],
temperature=0,
)
print("Time consuming: {:.2f}s".format(time.time() - start_time))
print(f"Received: {response_text(response)}")


if __name__ == "__main__":
base_request()
52 changes: 27 additions & 25 deletions examples/context_examples/summarization_context.py
Original file line number Diff line number Diff line change
@@ -1,68 +1,70 @@
import os
import time

from gptcache.manager import get_data_manager, VectorBase
from gptcache import cache, Cache
from gptcache.embedding import Onnx
from gptcache.similarity_evaluation.distance import SearchDistanceEvaluation
from gptcache import cache
from gptcache.adapter import openai
from gptcache.embedding import Onnx
from gptcache.manager import manager_factory
from gptcache.processor.context.summarization_context import SummarizationContextProcess
from gptcache.similarity_evaluation.distance import SearchDistanceEvaluation
from gptcache.utils import import_huggingface
from gptcache.manager import manager_factory

import_huggingface()
import transformers # pylint: disable=C0413
import transformers # pylint: disable=C0413


def response_text(openai_resp):
return openai_resp['choices'][0]['message']['content']
return openai_resp["choices"][0]["message"]["content"]


def cache_init():
onnx = Onnx()
summarizer = transformers.pipeline("summarization", model="facebook/bart-large-cnn")
context_process = SummarizationContextProcess(summarizer, None, 512)
data_manager = manager_factory("sqlite,faiss", vector_params={"dimension": onnx.dimension})
data_manager = manager_factory(
"sqlite,faiss", vector_params={"dimension": onnx.dimension}
)
dir_name, _ = os.path.split(os.path.abspath(__file__))
cache.init(
pre_embedding_func=context_process.pre_process,
embedding_func=onnx.to_embeddings,
data_manager=data_manager,
similarity_evaluation=SearchDistanceEvaluation(),
)
os.environ['OPENAI_API_KEY'] = 'API KEY'
os.environ["OPENAI_API_KEY"] = "API KEY"
cache.set_openai_key()


def base_request():
cache_init()
for _ in range(2):
start_time = time.time()
response = openai.ChatCompletion.create(
model='gpt-3.5-turbo',
model="gpt-3.5-turbo",
messages=[
{
'role': 'user',
'content': 'Can you give me some tips for staying focused while working from home?'
"role": "user",
"content": "Can you give me some tips for staying focused while working from home?",
},
{
'role': 'system',
'content': 'Sure! Here are some tips: create a designated workspace, set a schedule, take breaks, minimize distractions, and practice good time management.'
"role": "system",
"content": "Sure! Here are some tips: create a designated workspace, set a schedule, take breaks, minimize distractions, and practice good time management.",
},
{
'role': 'user',
'content': 'Those are all great suggestions. Do you have any tips for maintaining a healthy work-life balance while working from home?'
"role": "user",
"content": "Those are all great suggestions. Do you have any tips for maintaining a healthy work-life balance while working from home?",
},
{
'role': 'system',
'content': 'Definitely! Setting clear boundaries between work and personal time, scheduling regular breaks throughout the day, and finding ways to disconnect from work after hours can help. Additionally, make time for hobbies and other activities you enjoy outside of work to help you relax and recharge.'
"role": "system",
"content": "Definitely! Setting clear boundaries between work and personal time, scheduling regular breaks throughout the day, and finding ways to disconnect from work after hours can help. Additionally, make time for hobbies and other activities you enjoy outside of work to help you relax and recharge.",
},
{
'role': 'user',
'content': 'can you give meore tips?'
}
{"role": "user", "content": "can you give meore tips?"},
],
temperature=0,
)
print('Time consuming: {:.2f}s'.format(time.time() - start_time))
print(f'Received: {response_text(response)}')
print("Time consuming: {:.2f}s".format(time.time() - start_time))
print(f"Received: {response_text(response)}")


if __name__ == '__main__':
if __name__ == "__main__":
base_request()
52 changes: 52 additions & 0 deletions gptcache/processor/context/selective_context.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
from typing import Any, Dict

from gptcache.processor import ContextProcess
from gptcache.utils import import_selective_context

import_selective_context()

from selective_context import SelectiveContext # pylint: disable=C0413


class SelectiveContextProcess(ContextProcess):
"""A context processor for selecting context
Need to download the corresponding model before use, the default English model is: en_core_web_sm
`pip install spacy && python -m spacy download en_core_web_sm`
:param model_type: the selective context model name, default value is 'gpt2'
:type model_type: str
:param lang: the content lang type, default value is 'en'.
:type lang: str
:param reduce_ratio: selective context ratio. The range for the value is between 0 and 1, with a default value of 0.35.
:type reduce_ratio: float
:param reduce_level: selective context level. The valid values include 'sent', 'phrase', and 'token', with the default value being 'phrase'.
:type reduce_level: str
more details: https://github.com/liyucheng09/Selective_Context
"""

content: str = ""

def __init__(
self,
model_type: str = "gpt2",
lang: str = "en",
reduce_ratio: float = 0.35,
reduce_level: str = "phrase",
):
self.sc = SelectiveContext(model_type=model_type, lang=lang)
self.reduce_ratio = reduce_ratio
self.reduce_level = reduce_level

def format_all_content(self, data: Dict[str, Any], **params: Dict[str, Any]):
for query in data["messages"]:
self.content += f"{query['role']}: {query['content']} \n"

def process_all_content(self) -> (Any, Any):
selective_content, _ = self.sc(
self.content, reduce_ratio=self.reduce_ratio, reduce_level=self.reduce_level
)
return self.content, selective_content
11 changes: 8 additions & 3 deletions gptcache/processor/context/summarization_context.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,21 @@
from typing import Dict, Any

import numpy as np

from gptcache.processor import ContextProcess
from gptcache.utils import import_huggingface
import numpy as np

import_huggingface()

import transformers # pylint: disable=C0413
import transformers # pylint: disable=C0413


class SummarizationContextProcess(ContextProcess):
"""A context processor for summarizing large amounts of text data using a summarizer model.
:param summarizer: The summarizer model to use for summarization.
:type summarizer: transformers.PreTrainedModel :param tokenizer: The tokenizer to use for tokenizing the text data.
:type summarizer: transformers.PreTrainedModel
:param tokenizer: The tokenizer to use for tokenizing the text data.
It used for measuring the output length.
:type tokenizer: transformers.PreTrainedTokenizer
:param target_length: The length of the summarized text.
Expand Down
8 changes: 7 additions & 1 deletion gptcache/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@
"import_stability",
"import_scipy",
"import_llama_cpp_python",
"import_ruamel"
"import_ruamel",
"import_selective_context",
]

import importlib.util
Expand Down Expand Up @@ -184,5 +185,10 @@ def import_scipy():
def import_llama_cpp_python():
_check_library("llama_cpp", package="llama-cpp-python")


def import_ruamel():
_check_library("ruamel-yaml")


def import_selective_context():
_check_library("selective_context")
1 change: 1 addition & 0 deletions tests/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,4 @@ pytest-parallel
torch
mock
pexpect
spacy
36 changes: 36 additions & 0 deletions tests/unit_tests/processor/test_selective_context.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from gptcache.processor.context.selective_context import SelectiveContextProcess
from gptcache.utils import import_selective_context


def test_selective_context_process():
import_selective_context()
context_process = SelectiveContextProcess()
chat = []
chat.append(
{
"role": "user",
"content": "What is the meaning of life? Explain it in 5 paragraphs.",
}
)
chat.append(
{
"role": "system",
"content": "The meaning of life is a philosophical question that has been debated throughout history. It is a complex and multifaceted topic, and different people may have different answers depending on their beliefs, values, and experiences. In general, though, the meaning of life is thought to be the reason for which we exist, the purpose that gives our lives significance and direction. \
One possible answer to the question of the meaning of life is that it is to seek happiness and fulfillment. This is a common theme in many philosophical and religious traditions, which often emphasize the importance of living a good and virtuous life in order to achieve happiness and fulfillment. For some, this may mean striving for personal growth and self-improvement, while for others it may involve pursuing spiritual enlightenment or a connection with a higher power. \
Another possible answer to the question of the meaning of life is that it is to contribute to the greater good. This approach emphasizes the importance of serving others and making the world a better place, whether through acts of kindness, charitable work, or other forms of service. For those who believe in this approach, the meaning of life is not just about individual happiness, but also about making a positive impact on the world and the lives of others. \
A third possible answer to the question of the meaning of life is that it is to discover and pursue one's passions and interests. This approach emphasizes the importance of living a life that is authentic and true to oneself, and of pursuing one's passions and interests with enthusiasm and dedication. For those who believe in this approach, the meaning of life is not just about achieving some external goal, but about living a life that is rich, fulfilling, and true to who we are. \
In conclusion, the meaning of life is a deeply personal and subjective question, and different people may have different answers. Some may believe that the meaning of life is to seek happiness and fulfillment, while others may believe that it is to contribute to the greater good or to pursue one's passions and interests. Ultimately, the meaning of life is a mystery that may never be fully understood, but that we can strive to uncover and understand through our experiences, beliefs, and values.",
}
)
chat.append({"role": "user", "content": "Shouldn't the answer be 42?"})
chat.append(
{
"role": "system",
"content": """The answer "42" is a reference to a famous line in the book "The Hitchhiker's Guide to the Galaxy" by Douglas Adams. In the book, the character Deep Thought is asked to find the answer to the ultimate question of life, the universe, and everything. After thinking for a very long time, Deep Thought reveals that the answer is "42." However, the characters in the book then realize that they don't actually know what the question is, so the answer is ultimately meaningless. \
In other words, the answer "42" is a humorous and satirical take on the idea that there may be a single, definitive answer to the question of the meaning of life. It suggests that the search for such an answer may be futile, and that the meaning of life may be something that is ultimately unknowable. Therefore, while "42" is a famous and memorable line from a popular book, it is not necessarily a serious or meaningful answer to the question of the meaning of life.""",
}
)

context_process.format_all_content({"messages": chat})
save_content, embedding_content = context_process.process_all_content()
assert len(save_content) > len(embedding_content)
Loading

0 comments on commit fbe301e

Please sign in to comment.