Skip to content

Commit

Permalink
Support for Request accessed GenAI Models (onyx-dot-app#270)
Browse files Browse the repository at this point in the history
  • Loading branch information
yuhongsun96 authored Aug 7, 2023
1 parent 0e667d3 commit 3bfc724
Show file tree
Hide file tree
Showing 19 changed files with 613 additions and 351 deletions.
6 changes: 0 additions & 6 deletions backend/danswer/configs/app_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,12 +138,6 @@
CHUNK_MAX_CHAR_OVERLAP = 50


#####
# Other API Keys
#####
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "")


#####
# Encoder Model Endpoint Configs (Currently unused, running the models in memory)
#####
Expand Down
25 changes: 24 additions & 1 deletion backend/danswer/configs/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
ALLOWED_USERS = "allowed_users"
ALLOWED_GROUPS = "allowed_groups"
METADATA = "metadata"
OPENAI_API_KEY_STORAGE_KEY = "openai_api_key"
GEN_AI_API_KEY_STORAGE_KEY = "genai_api_key"
HTML_SEPARATOR = "\n"
PUBLIC_DOC_PAT = "PUBLIC"

Expand All @@ -30,3 +30,26 @@ class DocumentSource(str, Enum):
PRODUCTBOARD = "productboard"
FILE = "file"
NOTION = "notion"


class DanswerGenAIModel(str, Enum):
"""This represents the internal Danswer GenAI model which determines the class that is used
to generate responses to the user query. Different models/services require different internal
handling, this allows for modularity of implementation within Danswer"""

OPENAI = "openai-completion"
OPENAI_CHAT = "openai-chat-completion"
GPT4ALL = "gpt4all-completion"
GPT4ALL_CHAT = "gpt4all-chat-completion"
HUGGINGFACE = "huggingface-inference-completion"
HUGGINGFACE_CHAT = "huggingface-inference-chat-completion"
REQUEST = "request-completion"


class ModelHostType(str, Enum):
"""For GenAI models interfaced via requests, different services have different
expectations for what fields are included in the request"""

# https://huggingface.co/docs/api-inference/detailed_parameters#text-generation-task
HUGGINGFACE = "huggingface" # HuggingFace test-generation Inference API
# TODO support for Azure, AWS, GCP GenAI model hosting
65 changes: 40 additions & 25 deletions backend/danswer/configs/model_configs.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
import os
from enum import Enum

from danswer.configs.constants import DanswerGenAIModel
from danswer.configs.constants import ModelHostType

# Important considerations when choosing models
# Max tokens count needs to be high considering use case (at least 512)
Expand Down Expand Up @@ -30,35 +34,46 @@
# Purely an optimization, memory limitation consideration
BATCH_SIZE_ENCODE_CHUNKS = 8

# QA Model API Configs
# refer to https://platform.openai.com/docs/models/model-endpoint-compatibility for OpenAI models
# Valid list:
# - openai-completion
# - openai-chat-completion
# - gpt4all-completion -> Due to M1 Macs not having compatible gpt4all version, please install dependency yourself
# - gpt4all-chat-completion-> Due to M1 Macs not having compatible gpt4all version, please install dependency yourself
# To use gpt4all, run: pip install --upgrade gpt4all==1.0.5
# These support HuggingFace Inference API, Inference Endpoints and servers running the text-generation-inference backend
# - huggingface-inference-completion
# - huggingface-inference-chat-completion

#####
# Generative AI Model Configs
#####
# Other models should work as well, check the library/API compatibility.
# But these are the models that have been verified to work with the existing prompts.
# Using a different model may require some prompt tuning. See qa_prompts.py
VERIFIED_MODELS = {
DanswerGenAIModel.OPENAI: ["text-davinci-003"],
DanswerGenAIModel.OPENAI_CHAT: ["gpt-3.5-turbo", "gpt-4"],
DanswerGenAIModel.GPT4ALL: ["ggml-model-gpt4all-falcon-q4_0.bin"],
DanswerGenAIModel.GPT4ALL_CHAT: ["ggml-model-gpt4all-falcon-q4_0.bin"],
# The "chat" model below is actually "instruction finetuned" and does not support conversational
DanswerGenAIModel.HUGGINGFACE.value: ["meta-llama/Llama-2-70b-chat-hf"],
DanswerGenAIModel.HUGGINGFACE_CHAT.value: ["meta-llama/Llama-2-70b-hf"],
}

# Sets the internal Danswer model class to use
INTERNAL_MODEL_VERSION = os.environ.get(
"INTERNAL_MODEL_VERSION", "openai-chat-completion"
"INTERNAL_MODEL_VERSION", DanswerGenAIModel.OPENAI_CHAT.value
)
# For GPT4ALL, use "ggml-model-gpt4all-falcon-q4_0.bin" for the below for a tested model
GEN_AI_MODEL_VERSION = os.environ.get("GEN_AI_MODEL_VERSION", "gpt-3.5-turbo")
GEN_AI_MAX_OUTPUT_TOKENS = int(os.environ.get("GEN_AI_MAX_OUTPUT_TOKENS", "512"))
# Use HuggingFace API Token for Huggingface inference client
GEN_AI_HUGGINGFACE_API_TOKEN = os.environ.get("GEN_AI_HUGGINGFACE_API_TOKEN", None)
# Use the conversational API with the huggingface-inference-chat-completion internal model
# Note - this only works with models that support conversational interfaces
GEN_AI_HUGGINGFACE_USE_CONVERSATIONAL = (
os.environ.get("GEN_AI_HUGGINGFACE_USE_CONVERSATIONAL", "").lower() == "true"
)
# Disable streaming responses. Set this to true to "polyfill" streaming for models that don't support streaming
GEN_AI_HUGGINGFACE_DISABLE_STREAM = (
os.environ.get("GEN_AI_HUGGINGFACE_DISABLE_STREAM", "").lower() == "true"

# If the Generative AI model requires an API key for access, otherwise can leave blank
GEN_AI_API_KEY = os.environ.get("GEN_AI_API_KEY", "")

# If using GPT4All or OpenAI, specify the model version
GEN_AI_MODEL_VERSION = os.environ.get(
"GEN_AI_MODEL_VERSION",
VERIFIED_MODELS.get(DanswerGenAIModel(INTERNAL_MODEL_VERSION), [""])[0],
)

# If the Generative Model is hosted to accept requests (DanswerGenAIModel.REQUEST) then
# set the two below to specify
# - Where to hit the endpoint
# - How should the request be formed
GEN_AI_ENDPOINT = os.environ.get("GEN_AI_ENDPOINT", "")
GEN_AI_HOST_TYPE = os.environ.get("GEN_AI_HOST_TYPE", ModelHostType.HUGGINGFACE.value)

# Set this to be enough for an answer + quotes
GEN_AI_MAX_OUTPUT_TOKENS = int(os.environ.get("GEN_AI_MAX_OUTPUT_TOKENS", "512"))

# Danswer custom Deep Learning Models
INTENT_MODEL_VERSION = "danswer/intent-model"
97 changes: 68 additions & 29 deletions backend/danswer/direct_qa/__init__.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,30 @@
from typing import Any

import pkg_resources
from openai.error import AuthenticationError
from openai.error import Timeout

from danswer.configs.app_configs import QA_TIMEOUT
from danswer.configs.model_configs import (
GEN_AI_HUGGINGFACE_API_TOKEN,
INTERNAL_MODEL_VERSION,
)
from danswer.configs.constants import DanswerGenAIModel
from danswer.configs.constants import ModelHostType
from danswer.configs.model_configs import GEN_AI_API_KEY
from danswer.configs.model_configs import GEN_AI_ENDPOINT
from danswer.configs.model_configs import GEN_AI_HOST_TYPE
from danswer.configs.model_configs import INTERNAL_MODEL_VERSION
from danswer.direct_qa.exceptions import UnknownModelError
from danswer.direct_qa.gpt_4_all import GPT4AllChatCompletionQA
from danswer.direct_qa.gpt_4_all import GPT4AllCompletionQA
from danswer.direct_qa.huggingface import HuggingFaceChatCompletionQA
from danswer.direct_qa.huggingface import HuggingFaceCompletionQA
from danswer.direct_qa.interfaces import QAModel
from danswer.direct_qa.huggingface_inference import (
HuggingFaceInferenceChatCompletionQA,
HuggingFaceInferenceCompletionQA,
)
from danswer.direct_qa.open_ai import OpenAIChatCompletionQA
from danswer.direct_qa.open_ai import OpenAICompletionQA
from danswer.direct_qa.qa_prompts import WeakModelFreeformProcessor
from danswer.direct_qa.qa_utils import get_gen_ai_api_key
from danswer.direct_qa.request_model import RequestCompletionQA
from danswer.dynamic_configs.interface import ConfigNotFoundError
from danswer.utils.logger import setup_logger

# Imports commented out temporarily due to incompatibility of gpt4all with M1 Mac hardware currently
# from danswer.direct_qa.gpt_4_all import GPT4AllChatCompletionQA
# from danswer.direct_qa.gpt_4_all import GPT4AllCompletionQA
logger = setup_logger()


def check_model_api_key_is_valid(model_api_key: str) -> bool:
Expand All @@ -35,32 +40,66 @@ def check_model_api_key_is_valid(model_api_key: str) -> bool:
return True
except AuthenticationError:
return False
except Timeout:
pass
except Exception as e:
logger.warning(f"GenAI API key failed for the following reason: {e}")

return False


def get_default_backend_qa_model(
internal_model: str = INTERNAL_MODEL_VERSION,
api_key: str | None = None,
endpoint: str | None = GEN_AI_ENDPOINT,
model_host_type: str | None = GEN_AI_HOST_TYPE,
api_key: str | None = GEN_AI_API_KEY,
timeout: int = QA_TIMEOUT,
**kwargs: Any
**kwargs: Any,
) -> QAModel:
if internal_model == "openai-completion":
if not api_key:
try:
api_key = get_gen_ai_api_key()
except ConfigNotFoundError:
pass

if internal_model in [
DanswerGenAIModel.GPT4ALL.value,
DanswerGenAIModel.GPT4ALL_CHAT.value,
]:
# gpt4all is not compatible M1 Mac hardware as of Aug 2023
pkg_resources.get_distribution("gpt4all")

if internal_model == DanswerGenAIModel.OPENAI.value:
return OpenAICompletionQA(timeout=timeout, api_key=api_key, **kwargs)
elif internal_model == "openai-chat-completion":
elif internal_model == DanswerGenAIModel.OPENAI_CHAT.value:
return OpenAIChatCompletionQA(timeout=timeout, api_key=api_key, **kwargs)
elif internal_model == "huggingface-inference-completion":
api_key = api_key if api_key is not None else GEN_AI_HUGGINGFACE_API_TOKEN
return HuggingFaceInferenceCompletionQA(api_key=api_key, **kwargs)
elif internal_model == "huggingface-inference-chat-completion":
api_key = api_key if api_key is not None else GEN_AI_HUGGINGFACE_API_TOKEN
return HuggingFaceInferenceChatCompletionQA(api_key=api_key, **kwargs)
# Note GPT4All is not supported for M1 Mac machines currently, removing until support is added
# elif internal_model == "gpt4all-completion":
# return GPT4AllCompletionQA(**kwargs)
# elif internal_model == "gpt4all-chat-completion":
# return GPT4AllChatCompletionQA(**kwargs)
elif internal_model == DanswerGenAIModel.GPT4ALL.value:
return GPT4AllCompletionQA(**kwargs)
elif internal_model == DanswerGenAIModel.GPT4ALL_CHAT.value:
return GPT4AllChatCompletionQA(**kwargs)
elif internal_model == DanswerGenAIModel.HUGGINGFACE.value:
return HuggingFaceCompletionQA(api_key=api_key, **kwargs)
elif internal_model == DanswerGenAIModel.HUGGINGFACE_CHAT.value:
return HuggingFaceChatCompletionQA(api_key=api_key, **kwargs)
elif internal_model == DanswerGenAIModel.REQUEST.value:
if endpoint is None or model_host_type is None:
raise ValueError(
"Request based GenAI model requires an endpoint and host type"
)
if model_host_type == ModelHostType.HUGGINGFACE.value:
# Assuming user is hosting the smallest size LLMs with weaker capabilities and token limits
# With the 7B Llama2 Chat model, there is a max limit of 1512 tokens
# This is the sum of input and output tokens, so cannot take in full Danswer context
return RequestCompletionQA(
endpoint=endpoint,
model_host_type=model_host_type,
api_key=api_key,
prompt_processor=WeakModelFreeformProcessor(),
timeout=timeout,
)
return RequestCompletionQA(
endpoint=endpoint,
model_host_type=model_host_type,
api_key=api_key,
timeout=timeout,
)
else:
raise UnknownModelError(internal_model)
36 changes: 33 additions & 3 deletions backend/danswer/direct_qa/gpt_4_all.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
from collections.abc import Generator
from typing import Any

from gpt4all import GPT4All # type:ignore

from danswer.chunking.models import InferenceChunk
from danswer.configs.model_configs import GEN_AI_MAX_OUTPUT_TOKENS
from danswer.configs.model_configs import GEN_AI_MODEL_VERSION
Expand All @@ -18,9 +16,30 @@
from danswer.utils.logger import setup_logger
from danswer.utils.timing import log_function_time


logger = setup_logger()


class DummyGPT4All:
"""In the case of import failure due to M1 Mac incompatibility,
so this module does not raise exceptions during server startup,
as long as this module isn't actually used"""

def __init__(self, *args: Any, **kwargs: Any) -> None:
raise RuntimeError("GPT4All library not installed.")


try:
from gpt4all import GPT4All # type:ignore
except ImportError:
logger.warning(
"GPT4All library not installed. "
"If you wish to run GPT4ALL (in memory) to power Danswer's "
"Generative AI features, please install gpt4all==1.0.5. "
"As of Aug 2023, this library is not compatible with M1 Mac."
)
GPT4All = DummyGPT4All


GPT4ALL_MODEL: GPT4All | None = None


Expand Down Expand Up @@ -56,6 +75,10 @@ def __init__(
self.max_output_tokens = max_output_tokens
self.include_metadata = include_metadata

@property
def requires_api_key(self) -> bool:
return False

def warm_up_model(self) -> None:
get_gpt_4_all_model(self.model_version)

Expand Down Expand Up @@ -117,6 +140,13 @@ def __init__(
self.max_output_tokens = max_output_tokens
self.include_metadata = include_metadata

@property
def requires_api_key(self) -> bool:
return False

def warm_up_model(self) -> None:
get_gpt_4_all_model(self.model_version)

@log_function_time()
def answer_question(
self, query: str, context_docs: list[InferenceChunk]
Expand Down
Loading

0 comments on commit 3bfc724

Please sign in to comment.