Skip to content

Commit

Permalink
Add How to better configure your cache document (#450)
Browse files Browse the repository at this point in the history
Signed-off-by: SimFG <bang.fu@zilliz.com>
  • Loading branch information
SimFG authored Jun 27, 2023
1 parent c663379 commit 38c946b
Show file tree
Hide file tree
Showing 8 changed files with 571 additions and 21 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ cache.set_openai_key()
questions = [
"what's github",
"can you explain what GitHub is",
"can you tell me more about GitHub"
"can you tell me more about GitHub",
"what is the purpose of GitHub"
]

Expand Down
6 changes: 3 additions & 3 deletions docs/bootcamp/langchain/question_answering.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@
"metadata": {},
"source": [
"## Prepare Data\n",
"First we [prepare the data](https://raw.githubusercontent.com/hwchase17/langchain/master/docs/modules/state_of_the_union.txt). For this example we do similarity search over a vector database, but these documents could be fetched in any manner (the point of this notebook to highlight what to do AFTER you fetch the documents). You can learn more detail about Milvus in Langchain refer to [it](https://python.langchain.com/en/latest/modules/indexes/vectorstores/examples/milvus.html?highlight=milvus)."
"First we [prepare the data](https://raw.githubusercontent.com/hwchase17/langchain/master/docs/extras/modules/state_of_the_union.txt). For this example we do similarity search over a vector database, but these documents could be fetched in any manner (the point of this notebook to highlight what to do AFTER you fetch the documents). You can learn more detail about Milvus in Langchain refer to [it](https://python.langchain.com/en/latest/modules/indexes/vectorstores/examples/milvus.html?highlight=milvus)."
]
},
{
Expand Down Expand Up @@ -386,7 +386,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
Expand All @@ -400,7 +400,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.12"
"version": "3.8.8"
},
"vscode": {
"interpreter": {
Expand Down
530 changes: 530 additions & 0 deletions docs/configure_it.md

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions docs/toc.bak
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

usage.md
feature.md
configure_it.md
release_note.md

.. toctree::
Expand Down
4 changes: 4 additions & 0 deletions gptcache/adapter/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ def post_process():
kwargs["cache_context"] = context
kwargs["cache_skip"] = cache_skip
kwargs["cache_factor"] = cache_factor
kwargs["search_only_flag"] = search_only_flag
llm_data = adapt(
llm_handler, cache_data_convert, update_cache_callback, *args, **kwargs
)
Expand All @@ -199,6 +200,9 @@ def post_process():
llm_handler, func_name="llm_request", report_func=chat_cache.report.llm
)(*args, **kwargs)

if not llm_data:
return None

if cache_enable:
try:
def update_cache_func(handled_llm_data, question=None):
Expand Down
6 changes: 3 additions & 3 deletions gptcache/manager/vector_data/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,9 +201,9 @@ def get(name, **kwargs):
from gptcache.manager.vector_data.redis_vectorstore import RedisVectorStore
host = kwargs.get("host", "localhost")
port = kwargs.get("port", "6379")
user = kwargs.get("user")
password = kwargs.get("password")
namespace = kwargs.get("namespace")
user = kwargs.get("user", "")
password = kwargs.get("password", "")
namespace = kwargs.get("namespace", "")
dimension = kwargs.get("dimension", DIMENSION)
collection_name = kwargs.get("collection_name", COLLECTION_NAME)

Expand Down
35 changes: 27 additions & 8 deletions gptcache/processor/context/summarization_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,23 +9,33 @@

import transformers # pylint: disable=C0413


def summarize_to_length(summarizer, text, target_len, max_len=1024):
tokenizer = summarizer.tokenizer

def token_length(text):
return len(tokenizer.encode(text))

segment_len = max_len - 100
summary_result = text
while token_length(text) > target_len:
tokens = tokenizer.encode(text)
segments = [tokens[i:i+segment_len] for i in range(0, len(tokens), segment_len-1)]
segments = [
tokens[i : i + segment_len] for i in range(0, len(tokens), segment_len - 1)
]
summary_result = ""
for segment in segments:
len_seg = int(len(segment)/4)
summary = summarizer(tokenizer.decode(segment), min_length=max(len_seg-10, 1), max_length=len_seg)
len_seg = int(len(segment) / 4)
summary = summarizer(
tokenizer.decode(segment),
min_length=max(len_seg - 10, 1),
max_length=len_seg,
)
summary_result += summary[0]["summary_text"]
text = summary_result
return summary_result


class SummarizationContextProcess(ContextProcess):
"""A context processor for summarizing large amounts of text data using a summarizer model.
Expand All @@ -45,8 +55,10 @@ class SummarizationContextProcess(ContextProcess):
context_process = SummarizationContextProcess()
cache.init(pre_embedding_func=context_process.pre_process)
"""
def __init__(self, model_name="facebook/bart-large-cnn",
tokenizer=None, target_length=512):

def __init__(
self, model_name="facebook/bart-large-cnn", tokenizer=None, target_length=512
):
summarizer = transformers.pipeline(task="summarization", model=model_name)
self.summarizer = summarizer
self.target_length = target_length
Expand All @@ -64,7 +76,9 @@ def summarize_to_sentence(self, sentences, target_size=1000):
target_sentences = []
for sent, target_len in zip(sentences, target_lengths):
if len(self.tokenizer.tokenize(sent)) > target_len:
response = summarize_to_length(self.summarizer, sent, target_len, self.tokenizer.model_max_length)
response = summarize_to_length(
self.summarizer, sent, target_len, self.tokenizer.model_max_length
)
target_sentence = response
else:
target_sentence = sent
Expand All @@ -84,9 +98,14 @@ def process_all_content(self) -> (Any, Any):
def serialize_content(content):
ret = ""
for message in content:
ret += "[#RS]{}[#RE][#CS]{}[#CE]".format(message["role"], message["content"])
ret += "[#RS]{}[#RE][#CS]{}[#CE]".format(
message["role"], message["content"]
)
return ret
result = self.summarize_to_sentence([message["content"] for message in self.content], self.target_length)

result = self.summarize_to_sentence(
[message["content"] for message in self.content], self.target_length
)
save_content = serialize_content(self.content)
embedding_content = result
return save_content, embedding_content
8 changes: 2 additions & 6 deletions tests/unit_tests/adapter/test_langchain_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from gptcache.adapter import openai
from gptcache.adapter.api import init_similar_cache, get
from gptcache.adapter.langchain_models import LangChainLLMs, LangChainChat, _cache_msg_data_convert
from gptcache.processor.pre import get_prompt, last_content_without_template
from gptcache.processor.pre import get_prompt, last_content_without_template, get_messages_last_content
from gptcache.utils import import_pydantic, import_langchain
from gptcache.utils.response import get_message_from_openai_answer

Expand Down Expand Up @@ -60,10 +60,6 @@ def test_langchain_llms():
assert expect_answer == answer


def get_msg_func(data, **_):
return data.get("messages")[-1].content


def test_langchain_chats():
question = [HumanMessage(content="test_langchain_chats")]
question2 = [HumanMessage(content="test_langchain_chats2")]
Expand All @@ -76,7 +72,7 @@ def test_langchain_chats():

llm_cache = Cache()
llm_cache.init(
pre_embedding_func=get_msg_func,
pre_embedding_func=get_messages_last_content,
)

os.environ["OPENAI_API_KEY"] = "API"
Expand Down

0 comments on commit 38c946b

Please sign in to comment.