Skip to content

Commit

Permalink
Improve the usage example
Browse files Browse the repository at this point in the history
Signed-off-by: SimFG <bang.fu@zilliz.com>
  • Loading branch information
SimFG committed Mar 31, 2023
1 parent 6df0846 commit 453ac0f
Show file tree
Hide file tree
Showing 7 changed files with 63 additions and 39 deletions.
5 changes: 2 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,8 @@ dmypy.json
.pyre/

.idea
**/data_map.txt
**/data_map**.txt
**/faiss.index
**/sqlite.db
**/faiss**.index
**/sqlite**.db
**/example.py
**/example.db
23 changes: 14 additions & 9 deletions example/benchmark/benchmark_sf_towhee.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
import os
import time

from gpt_cache.view import openai
Expand All @@ -23,6 +24,10 @@ def sf_evaluation(src_dict, cache_dict, **kwargs):
return rank2 if rank2 != 0 else 1
return 0

sqlite_file = "sqlite.db"
faiss_file = "faiss.index"
has_data = os.path.isfile(sqlite_file) and os.path.isfile(faiss_file)

data_manager = get_si_data_manager("sqlite", "faiss", dimension=embedding_towhee.dimension(), max_size=100000)
cache.init(embedding_func=embedding_towhee.to_embeddings,
data_manager=data_manager,
Expand All @@ -36,15 +41,15 @@ def sf_evaluation(src_dict, cache_dict, **kwargs):
pair["id"] = str(i)
i += 1

# you should CLOSE it if you SECONDLY run it
print("insert data")
id_origin = {}
for pair in mock_data:
question = pair["origin"]
answer = pair["id"]
id_origin[answer] = question
cache.data_manager.save(question, answer, cache.embedding_func(question))
print("end insert data")
if not has_data:
print("insert data")
id_origin = {}
for pair in mock_data:
question = pair["origin"]
answer = pair["id"]
id_origin[answer] = question
cache.data_manager.save(question, answer, cache.embedding_func(question))
print("end insert data")

all_time = 0.0
hit_cache_positive, hit_cache_negative = 0, 0
Expand Down
25 changes: 14 additions & 11 deletions example/map/map_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,27 +8,30 @@
def run():
dirname, _ = os.path.split(os.path.abspath(__file__))
bak_cache = Cache()
bak_data_file = dirname + "/data_map_bak.txt"
bak_cache.init(data_manager=get_data_manager("map",
data_path=dirname + "/data_map_bak.txt",
data_path=bak_data_file,
max_size=10))
data_file = dirname + "/data_map.txt"
cache.init(data_manager=get_data_manager("map",
data_path=dirname + "/data_map.txt",
data_path=data_file,
max_size=10),
next_cache=bak_cache)
mock_messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "foo15"}
]

# you should CLOSE it if you SECONDLY run it
for i in range(10):
question = f"foo{i}"
answer = f"receiver the foo {i}"
cache.data_manager.save(question, answer, cache.embedding_func(question))
for i in range(10, 20):
question = f"foo{i}"
answer = f"receiver the foo {i}"
bak_cache.data_manager.save(question, answer, bak_cache.embedding_func(question))
if not os.path.isfile(bak_data_file):
for i in range(10):
question = f"foo{i}"
answer = f"receiver the foo {i}"
cache.data_manager.save(question, answer, cache.embedding_func(question))
if not os.path.isfile(data_file):
for i in range(10, 20):
question = f"foo{i}"
answer = f"receiver the foo {i}"
bak_cache.data_manager.save(question, answer, bak_cache.embedding_func(question))

answer = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
Expand Down
18 changes: 12 additions & 6 deletions example/sf_mock/sf_manager.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import os

from gpt_cache.view import openai
from gpt_cache.core import cache, Config
from gpt_cache.cache.factory import get_si_data_manager
Expand All @@ -13,7 +15,11 @@ def mock_embeddings(data, **kwargs):


def run():
data_manager = get_si_data_manager("sqlite", "faiss", dimension=d, max_size=8, clean_size=2, top_k=3)
sqlite_file = "sqlite.db"
faiss_file = "faiss.index"
has_data = os.path.isfile(sqlite_file) and os.path.isfile(faiss_file)
data_manager = get_si_data_manager("sqlite", "faiss",
dimension=d, max_size=8, clean_size=2, top_k=3)
cache.init(embedding_func=mock_embeddings,
data_manager=data_manager,
evaluation_func=pair_evaluation,
Expand All @@ -26,11 +32,11 @@ def run():
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "foo"}
]
# you should CLOSE it if you SECONDLY run it
for i in range(10):
question = f"foo{i}"
answer = f"receiver the foo {i}"
cache.data_manager.save(question, answer, cache.embedding_func(question))
if not has_data:
for i in range(10):
question = f"foo{i}"
answer = f"receiver the foo {i}"
cache.data_manager.save(question, answer, cache.embedding_func(question))

answer = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
Expand Down
15 changes: 10 additions & 5 deletions example/sf_towhee/sf_manager.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import time

from gpt_cache.view import openai
Expand All @@ -9,17 +10,21 @@

def run():
towhee = Towhee()
data_manager = get_si_data_manager("sqlite", "faiss", dimension=towhee.dimension(), max_size=2000)
sqlite_file = "sqlite.db"
faiss_file = "faiss.index"
has_data = os.path.isfile(sqlite_file) and os.path.isfile(faiss_file)
data_manager = get_si_data_manager("sqlite", "faiss",
dimension=towhee.dimension(), max_size=2000)
cache.init(embedding_func=towhee.to_embeddings,
data_manager=data_manager,
evaluation_func=pair_evaluation,
similarity_threshold=10000,
similarity_positive=False)

# you should CLOSE it if you SECONDLY run it
question = "what do you think about chatgpt"
answer = "chatgpt is a good application"
cache.data_manager.save(question, answer, cache.embedding_func(question))
if not has_data:
question = "what do you think about chatgpt"
answer = "chatgpt is a good application"
cache.data_manager.save(question, answer, cache.embedding_func(question))

# distance 77
mock_messages = [
Expand Down
14 changes: 9 additions & 5 deletions example/sqlite_milvus_mock/sqlite_milvus_mock.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import os

from gpt_cache.view import openai
from gpt_cache.core import cache, Config
from gpt_cache.cache.factory import get_ss_data_manager
Expand All @@ -13,6 +15,8 @@ def mock_embeddings(data, **kwargs):


def run():
sqlite_file = "sqlite.db"
has_data = os.path.isfile(sqlite_file)
# milvus
data_manager = get_ss_data_manager("sqlite", "milvus", dimension=d, max_size=8, clean_size=2)
# zilliz cloud
Expand All @@ -33,11 +37,11 @@ def run():
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "foo"}
]
# you should CLOSE it if you SECONDLY run it
for i in range(10):
question = f"foo{i}"
answer = f"receiver the foo {i}"
cache.data_manager.save(question, answer, cache.embedding_func(question))
if not has_data:
for i in range(10):
question = f"foo{i}"
answer = f"receiver the foo {i}"
cache.data_manager.save(question, answer, cache.embedding_func(question))

answer = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
Expand Down
2 changes: 2 additions & 0 deletions gpt_cache/cache/vector_data/faiss.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ def mult_add(self, datas):
self.index.add(np_data)

def search(self, data):
if self.index.ntotal == 0:
return None
np_data = np.array(data).astype('float32').reshape(1, -1)
D, I = self.index.search(np_data, self.top_k)
distances = []
Expand Down

0 comments on commit 453ac0f

Please sign in to comment.