diff --git a/docs/open_source/scan/scan_llm/index.md b/docs/open_source/scan/scan_llm/index.md index d630c4e443..2582edc916 100644 --- a/docs/open_source/scan/scan_llm/index.md +++ b/docs/open_source/scan/scan_llm/index.md @@ -57,194 +57,135 @@ like this: ```python import os import giskard -from giskard.llm.client.openai import OpenAIClient -# Set the OpenAI API key -os.environ["OPENAI_API_KEY"] = "sk-…" +os.environ["OPENAI_API_KEY"] = "your-api-key" -# Create a giskard OpenAI client -openai_client = OpenAIClient(model="gpt-4o") +# Optional, setup a model (default model is gpt-4) +giskard.llm.set_llm_model("gpt-4") +giskard.llm.set_embedding_model("text-embedding-ada-002") -# Set the default client -giskard.llm.set_llm_api("openai") -giskard.llm.set_default_client(openai_client) +# Optional Keys - OpenAI Organization, OpenAI API Base +os.environ["OPENAI_ORGANIZATION"] = "your-org-id" +os.environ["OPENAI_API_BASE"] = "openaiai-api-base" ``` +More information on [LiteLLM documentation](https://docs.litellm.ai/docs/providers/openai) + :::::: ::::::{tab-item} Azure OpenAI -Require `openai>=1.0.0` - ```python import os import giskard -# Set the Azure OpenAI API key and endpoint -os.environ['AZURE_OPENAI_API_KEY'] = '...' -os.environ['AZURE_OPENAI_ENDPOINT'] = 'https://xxx.openai.azure.com' -os.environ['OPENAI_API_VERSION'] = '2023-07-01-preview' +os.environ["AZURE_API_KEY"] = "" # "my-azure-api-key" +os.environ["AZURE_API_BASE"] = "" # "https://example-endpoint.openai.azure.com" +os.environ["AZURE_API_VERSION"] = "" # "2023-05-15" -# You'll need to provide the name of the model that you've deployed -# Beware, the model provided must be capable of using function calls -giskard.llm.set_llm_model('my-gpt-4-model') +giskard.llm.set_llm_model("azure/") +giskard.llm.set_embedding_model("azure/") + +# optional +os.environ["AZURE_AD_TOKEN"] = "" +os.environ["AZURE_API_TYPE"] = "" giskard.llm.set_embedding_model('my-embedding-model') ``` +More information on [LiteLLM documentation](https://docs.litellm.ai/docs/providers/azure) + :::::: ::::::{tab-item} Mistral ```python import os import giskard -from giskard.llm.client.mistral import MistralClient -# Set the Mistral API key -os.environ["MISTRAL_API_KEY"] = "…" +os.environ['MISTRAL_API_KEY'] = "" -# Create a giskard Mistral client -mistral_client = MistralClient() - -# Set the default client -giskard.llm.set_default_client(mistral_client) - -# You may also want to set the default embedding model -# Check the Custom Client code snippet for more details +giskard.llm.set_llm_model("mistral/mistral-tiny") +giskard.llm.set_embedding_model("mistral/mistral-embed") ``` +More information on [LiteLLM documentation](https://docs.litellm.ai/docs/providers/mistral) + :::::: ::::::{tab-item} Ollama ```python import giskard -from openai import OpenAI -from giskard.llm.client.openai import OpenAIClient -from giskard.llm.embeddings.openai import OpenAIEmbedding -# Setup the OpenAI client with API key and base URL for Ollama -_client = OpenAI(base_url="http://localhost:11434/v1/", api_key="ollama") +giskard.llm.set_llm_model("ollama/llama2", api_base="http://localhost:11434") # See supported models here: https://docs.litellm.ai/docs/providers/ollama#ollama-models +``` -# Wrap the original OpenAI client with giskard OpenAI client and embedding -llm_client = OpenAIClient(model="llama3.2", client=_client) -embed_client = OpenAIEmbedding(model="nomic-embed-text", client=_client) +More information on [LiteLLM documentation](https://docs.litellm.ai/docs/providers/ollama) -# Set the default client and embedding -giskard.llm.set_default_client(llm_client) -giskard.llm.embeddings.set_default_embedding(embed_client) -``` :::::: -::::::{tab-item} Claude 3 +::::::{tab-item} AWS Bedrock + +More information on [LiteLLM documentation](https://docs.litellm.ai/docs/providers/bedrock) ```python import os -import boto3 import giskard -from giskard.llm.client.bedrock import ClaudeBedrockClient -from giskard.llm.embeddings.bedrock import BedrockEmbedding - -# Create a Bedrock client -bedrock_runtime = boto3.client("bedrock-runtime", region_name=os.environ["AWS_DEFAULT_REGION"]) +os.environ["AWS_ACCESS_KEY_ID"] = "" +os.environ["AWS_SECRET_ACCESS_KEY"] = "" +os.environ["AWS_REGION_NAME"] = "" -# Wrap the Beddock client with giskard Bedrock client and embedding -claude_client = ClaudeBedrockClient(bedrock_runtime, model="anthropic.claude-3-haiku-20240307-v1:0") -embed_client = BedrockEmbedding(bedrock_runtime, model="amazon.titan-embed-text-v1") - -# Set the default client and embedding -giskard.llm.set_default_client(claude_client) -giskard.llm.embeddings.set_default_embedding(embed_client) +giskard.llm.set_llm_model("bedrock/anthropic.claude-3-sonnet-20240229-v1:0") +giskard.llm.set_embedding_model("bedrock/amazon.titan-embed-text-v1") ``` :::::: ::::::{tab-item} Gemini +More information on [LiteLLM documentation](https://docs.litellm.ai/docs/providers/gemini) + ```python import os import giskard -import google.generativeai as genai -from giskard.llm.client.gemini import GeminiClient - -# Set the Gemini API key -os.environ["GEMINI_API_KEY"] = "…" -# Configure the Gemini API -genai.configure(api_key=os.environ["GEMINI_API_KEY"]) +os.environ["GEMINI_API_KEY"] = "your-api-key" -# Create a giskard Gemini client -gemini_client = GeminiClient() - -# Set the default client -giskard.llm.set_default_client(gemini_client) - -# You may also want to set the default embedding model -# Check the Custom Client code snippet for more details +giskard.llm.set_llm_model("gemini/gemini-pro") ``` :::::: ::::::{tab-item} Custom Client +More information on [LiteLLM documentation](https://docs.litellm.ai/docs/providers/custom_llm_server ) + ```python +import requests import giskard -from typing import Sequence, Optional -from giskard.llm.client import set_default_client -from giskard.llm.client.base import LLMClient, ChatMessage - -# Create a custom client by extending the LLMClient class -class MyLLMClient(LLMClient): - def __init__(self, my_client): - self._client = my_client - - def complete( - self, - messages: Sequence[ChatMessage], - temperature: float = 1, - max_tokens: Optional[int] = None, - caller_id: Optional[str] = None, - seed: Optional[int] = None, - format=None, - ) -> ChatMessage: - # Create the prompt - prompt = "" - for msg in messages: - if msg.role.lower() == "assistant": - prefix = "\n\nAssistant: " - else: - prefix = "\n\nHuman: " - - prompt += prefix + msg.content - - prompt += "\n\nAssistant: " - - # Create the body - params = { - "prompt": prompt, - "max_tokens_to_sample": max_tokens or 1000, - "temperature": temperature, - "top_p": 0.9, - } - body = json.dumps(params) - - response = self._client.invoke_model( - body=body, - modelId=self._model_id, - accept="application/json", - contentType="application/json", - ) - data = json.loads(response.get("body").read()) - - return ChatMessage(role="assistant", message=data["completion"]) - -# Create an instance of the custom client -llm_client = MyLLMClient() - -# Set the default client -set_default_client(llm_client) - -# It's also possible to create a custom embedding class extending BaseEmbedding -# Or you can use FastEmbed for a pre-built embedding model: -from giskard.llm.embeddings.fastembed import try_get_fastembed_embeddings -embed_client = try_get_fastembed_embeddings() -giskard.llm.embeddings.set_default_embedding(embed_client) +import litellm +import os +from typing import Optional + + +class MyCustomLLM(litellm.CustomLLM): + def completion(self, messages: str, api_key: Optional[str] = None, **kwargs) -> litellm.ModelResponse: + api_key = api_key or os.environ.get('MY_SECRET_KEY') + if api_key is None: + raise litellm.AuthenticationError("Api key is not provided") + + response = requests.post('https://www.my-fake-llm.ai/chat/completion', json={ + 'messages': messages + }, headers={'Authorization': api_key}) + + return litellm.ModelResponse(**response.json()) + + +my_custom_llm = MyCustomLLM() + +litellm.custom_provider_map = [ # 👈 KEY STEP - REGISTER HANDLER + {"provider": "my-custom-llm", "custom_handler": my_custom_llm} +] + +api_key = os.environ['MY_SECRET_KEY'] + +giskard.llm.set_llm_model("my-custom-llm/my-fake-llm-model", api_key=api_key) ``` :::::: diff --git a/docs/open_source/testset_generation/testset_generation/index.md b/docs/open_source/testset_generation/testset_generation/index.md index c71e29f45b..4187735093 100644 --- a/docs/open_source/testset_generation/testset_generation/index.md +++ b/docs/open_source/testset_generation/testset_generation/index.md @@ -91,194 +91,135 @@ like this: ```python import os import giskard -from giskard.llm.client.openai import OpenAIClient -# Set the OpenAI API key -os.environ["OPENAI_API_KEY"] = "sk-…" +os.environ["OPENAI_API_KEY"] = "your-api-key" -# Create a giskard OpenAI client -openai_client = OpenAIClient(model="gpt-4o") +# Optional, setup a model (default model is gpt-4) +giskard.llm.set_llm_model("gpt-4") +giskard.llm.set_embedding_model("text-embedding-ada-002") -# Set the default client -giskard.llm.set_llm_api("openai") -giskard.llm.set_default_client(openai_client) +# Optional Keys - OpenAI Organization, OpenAI API Base +os.environ["OPENAI_ORGANIZATION"] = "your-org-id" +os.environ["OPENAI_API_BASE"] = "openaiai-api-base" ``` +More information on [LiteLLM documentation](https://docs.litellm.ai/docs/providers/openai) + :::::: ::::::{tab-item} Azure OpenAI -Require `openai>=1.0.0` - ```python import os import giskard -# Set the Azure OpenAI API key and endpoint -os.environ['AZURE_OPENAI_API_KEY'] = '...' -os.environ['AZURE_OPENAI_ENDPOINT'] = 'https://xxx.openai.azure.com' -os.environ['OPENAI_API_VERSION'] = '2023-07-01-preview' +os.environ["AZURE_API_KEY"] = "" # "my-azure-api-key" +os.environ["AZURE_API_BASE"] = "" # "https://example-endpoint.openai.azure.com" +os.environ["AZURE_API_VERSION"] = "" # "2023-05-15" -# You'll need to provide the name of the model that you've deployed -# Beware, the model provided must be capable of using function calls -giskard.llm.set_llm_model('my-gpt-4-model') +giskard.llm.set_llm_model("azure/") +giskard.llm.set_embedding_model("azure/") + +# optional +os.environ["AZURE_AD_TOKEN"] = "" +os.environ["AZURE_API_TYPE"] = "" giskard.llm.set_embedding_model('my-embedding-model') ``` +More information on [LiteLLM documentation](https://docs.litellm.ai/docs/providers/azure) + :::::: ::::::{tab-item} Mistral ```python import os import giskard -from giskard.llm.client.mistral import MistralClient -# Set the Mistral API key -os.environ["MISTRAL_API_KEY"] = "…" +os.environ['MISTRAL_API_KEY'] = "" -# Create a giskard Mistral client -mistral_client = MistralClient() - -# Set the default client -giskard.llm.set_default_client(mistral_client) - -# You may also want to set the default embedding model -# Check the Custom Client code snippet for more details +giskard.llm.set_llm_model("mistral/mistral-tiny") +giskard.llm.set_embedding_model("mistral/mistral-embed") ``` +More information on [LiteLLM documentation](https://docs.litellm.ai/docs/providers/mistral) + :::::: ::::::{tab-item} Ollama ```python import giskard -from openai import OpenAI -from giskard.llm.client.openai import OpenAIClient -from giskard.llm.embeddings.openai import OpenAIEmbedding -# Setup the OpenAI client with API key and base URL for Ollama -_client = OpenAI(base_url="http://localhost:11434/v1/", api_key="ollama") +giskard.llm.set_llm_model("ollama/llama2", api_base="http://localhost:11434") # See supported models here: https://docs.litellm.ai/docs/providers/ollama#ollama-models +``` -# Wrap the original OpenAI client with giskard OpenAI client and embedding -llm_client = OpenAIClient(model="llama3.2", client=_client) -embed_client = OpenAIEmbedding(model="nomic-embed-text", client=_client) +More information on [LiteLLM documentation](https://docs.litellm.ai/docs/providers/ollama) -# Set the default client and embedding -giskard.llm.set_default_client(llm_client) -giskard.llm.embeddings.set_default_embedding(embed_client) -``` :::::: -::::::{tab-item} Claude 3 +::::::{tab-item} AWS Bedrock + +More information on [LiteLLM documentation](https://docs.litellm.ai/docs/providers/bedrock) ```python import os -import boto3 import giskard -from giskard.llm.client.bedrock import ClaudeBedrockClient -from giskard.llm.embeddings.bedrock import BedrockEmbedding - -# Create a Bedrock client -bedrock_runtime = boto3.client("bedrock-runtime", region_name=os.environ["AWS_DEFAULT_REGION"]) +os.environ["AWS_ACCESS_KEY_ID"] = "" +os.environ["AWS_SECRET_ACCESS_KEY"] = "" +os.environ["AWS_REGION_NAME"] = "" -# Wrap the Beddock client with giskard Bedrock client and embedding -claude_client = ClaudeBedrockClient(bedrock_runtime, model="anthropic.claude-3-haiku-20240307-v1:0") -embed_client = BedrockEmbedding(bedrock_runtime, model="amazon.titan-embed-text-v1") - -# Set the default client and embedding -giskard.llm.set_default_client(claude_client) -giskard.llm.embeddings.set_default_embedding(embed_client) +giskard.llm.set_llm_model("bedrock/anthropic.claude-3-sonnet-20240229-v1:0") +giskard.llm.set_embedding_model("bedrock/amazon.titan-embed-text-v1") ``` :::::: ::::::{tab-item} Gemini +More information on [LiteLLM documentation](https://docs.litellm.ai/docs/providers/gemini) + ```python import os import giskard -import google.generativeai as genai -from giskard.llm.client.gemini import GeminiClient - -# Set the Gemini API key -os.environ["GEMINI_API_KEY"] = "…" -# Configure the Gemini API -genai.configure(api_key=os.environ["GEMINI_API_KEY"]) +os.environ["GEMINI_API_KEY"] = "your-api-key" -# Create a giskard Gemini client -gemini_client = GeminiClient() - -# Set the default client -giskard.llm.set_default_client(gemini_client) - -# You may also want to set the default embedding model -# Check the Custom Client code snippet for more details +giskard.llm.set_llm_model("gemini/gemini-pro") ``` :::::: ::::::{tab-item} Custom Client +More information on [LiteLLM documentation](https://docs.litellm.ai/docs/providers/custom_llm_server ) + ```python +import requests import giskard -from typing import Sequence, Optional -from giskard.llm.client import set_default_client -from giskard.llm.client.base import LLMClient, ChatMessage - -# Create a custom client by extending the LLMClient class -class MyLLMClient(LLMClient): - def __init__(self, my_client): - self._client = my_client - - def complete( - self, - messages: Sequence[ChatMessage], - temperature: float = 1, - max_tokens: Optional[int] = None, - caller_id: Optional[str] = None, - seed: Optional[int] = None, - format=None, - ) -> ChatMessage: - # Create the prompt - prompt = "" - for msg in messages: - if msg.role.lower() == "assistant": - prefix = "\n\nAssistant: " - else: - prefix = "\n\nHuman: " - - prompt += prefix + msg.content - - prompt += "\n\nAssistant: " - - # Create the body - params = { - "prompt": prompt, - "max_tokens_to_sample": max_tokens or 1000, - "temperature": temperature, - "top_p": 0.9, - } - body = json.dumps(params) - - response = self._client.invoke_model( - body=body, - modelId=self._model_id, - accept="application/json", - contentType="application/json", - ) - data = json.loads(response.get("body").read()) - - return ChatMessage(role="assistant", message=data["completion"]) - -# Create an instance of the custom client -llm_client = MyLLMClient() - -# Set the default client -set_default_client(llm_client) - -# It's also possible to create a custom embedding class extending BaseEmbedding -# Or you can use FastEmbed for a pre-built embedding model: -from giskard.llm.embeddings.fastembed import try_get_fastembed_embeddings -embed_client = try_get_fastembed_embeddings() -giskard.llm.embeddings.set_default_embedding(embed_client) +import litellm +import os +from typing import Optional + + +class MyCustomLLM(litellm.CustomLLM): + def completion(self, messages: str, api_key: Optional[str] = None, **kwargs) -> litellm.ModelResponse: + api_key = api_key or os.environ.get('MY_SECRET_KEY') + if api_key is None: + raise litellm.AuthenticationError("Api key is not provided") + + response = requests.post('https://www.my-fake-llm.ai/chat/completion', json={ + 'messages': messages + }, headers={'Authorization': api_key}) + + return litellm.ModelResponse(**response.json()) + + +my_custom_llm = MyCustomLLM() + +litellm.custom_provider_map = [ # 👈 KEY STEP - REGISTER HANDLER + {"provider": "my-custom-llm", "custom_handler": my_custom_llm} +] + +api_key = os.environ['MY_SECRET_KEY'] + +giskard.llm.set_llm_model("my-custom-llm/my-fake-llm-model", api_key=api_key) ``` :::::: diff --git a/giskard/llm/__init__.py b/giskard/llm/__init__.py index 5d02fe0544..b1b279ad7e 100644 --- a/giskard/llm/__init__.py +++ b/giskard/llm/__init__.py @@ -1,12 +1,10 @@ -from .client import get_default_client, set_default_client, set_llm_api, set_llm_model +from .client import get_default_client, set_llm_model from .embeddings import set_default_embedding, set_embedding_model from .errors import LLMImportError __all__ = [ "LLMImportError", "get_default_client", - "set_default_client", - "set_llm_api", "set_llm_model", "set_default_embedding", "set_embedding_model", diff --git a/giskard/llm/client/__init__.py b/giskard/llm/client/__init__.py index c91f77689e..20ca3d4b7e 100644 --- a/giskard/llm/client/__init__.py +++ b/giskard/llm/client/__init__.py @@ -1,57 +1,19 @@ -from typing import Optional -from typing_extensions import deprecated - -import logging import os from .base import ChatMessage, LLMClient from .logger import LLMLogger _default_client = None -_default_llm_api: Optional[str] = None _default_llm_model = os.getenv("GSK_LLM_MODEL", "gpt-4o") _default_completion_params = dict() -_default_llm_base_url = os.getenv("GSK_LLM_BASE_URL", None) - - -@deprecated( - "set_default_client is deprecated, check documentation to setup llm: https://docs.giskard.ai/en/latest/open_source/setting_up/index.html" -) -def set_default_client(client: LLMClient): - global _default_client - _default_client = client - def _unset_default_client(): global _default_client _default_client = None -@deprecated( - "set_llm_api is deprecated, check documentation to setup llm: https://docs.giskard.ai/en/latest/open_source/setting_up/index.html" -) -def set_llm_api(llm_api: str): - if llm_api.lower() not in {"azure", "openai"}: - raise ValueError("Giskard LLM-based evaluators is only working with `azure` and `openai`") - - global _default_llm_api - _default_llm_api = llm_api.lower() - # If the API is set, we unset the default client - _unset_default_client() - - -@deprecated( - "set_llm_base_url is deprecated, check documentation to setup llm: https://docs.giskard.ai/en/latest/open_source/setting_up/index.html" -) -def set_llm_base_url(llm_base_url: Optional[str]): - global _default_llm_base_url - _default_llm_base_url = llm_base_url - # If the model is set, we unset the default client - _unset_default_client() - - def set_llm_model(llm_model: str, **kwargs): global _default_llm_model global _default_completion_params @@ -63,22 +25,6 @@ def set_llm_model(llm_model: str, **kwargs): _unset_default_client() -def get_default_llm_api() -> str: - global _default_llm_api - if _default_llm_api is None: - _default_llm_api = os.getenv( - "GSK_LLM_API", "azure" if "AZURE_OPENAI_API_KEY" in os.environ else "openai" - ).lower() - - if _default_llm_api not in {"azure", "openai"}: - logging.warning( - f"LLM-based evaluation is only working with `azure` and `openai`. Found {_default_llm_api} in GSK_LLM_API, falling back to `openai`" - ) - _default_llm_api = "openai" - - return _default_llm_api - - def get_default_client() -> LLMClient: global _default_client @@ -95,13 +41,4 @@ def get_default_client() -> LLMClient: return _default_client -__all__ = [ - "LLMClient", - "ChatMessage", - "LLMLogger", - "get_default_client", - "set_llm_model", - "set_llm_api", - "set_default_client", - "set_llm_base_url", -] +__all__ = ["LLMClient", "ChatMessage", "LLMLogger", "get_default_client", "set_llm_model"] diff --git a/giskard/llm/client/bedrock.py b/giskard/llm/client/bedrock.py deleted file mode 100644 index eeafde0d7c..0000000000 --- a/giskard/llm/client/bedrock.py +++ /dev/null @@ -1,177 +0,0 @@ -from typing import Dict, List, Optional, Sequence -from typing_extensions import deprecated - -import json -from abc import ABC, abstractmethod - -from ..config import LLMConfigurationError -from ..errors import LLMImportError -from . import LLMClient -from .base import ChatMessage - -try: - import boto3 # noqa: F401 -except ImportError as err: - raise LLMImportError( - flavor="llm", msg="To use Bedrock models, please install the `boto3` package with `pip install boto3`" - ) from err - - -@deprecated( - "BaseBedrockClient is deprecated, check documentation to setup llm: https://docs.giskard.ai/en/latest/open_source/setting_up/index.html" -) -class BaseBedrockClient(LLMClient, ABC): - def __init__(self, bedrock_runtime_client, model: str): - self._client = bedrock_runtime_client - self.model = model - - @abstractmethod - def _format_body( - self, - messages: Sequence[ChatMessage], - temperature: float = 1, - max_tokens: Optional[int] = 1000, - caller_id: Optional[str] = None, - seed: Optional[int] = None, - format=None, - ) -> Dict: - ... - - @abstractmethod - def _parse_completion(self, completion, caller_id: Optional[str] = None) -> ChatMessage: - ... - - def complete( - self, - messages: Sequence[ChatMessage], - temperature: float = 1, - max_tokens: Optional[int] = 1000, - caller_id: Optional[str] = None, - seed: Optional[int] = None, - format=None, - ) -> ChatMessage: - # create the json body to send to the API - body = self._format_body(messages, temperature, max_tokens, caller_id, seed, format) - - # invoke the model and get the response - try: - accept = "application/json" - contentType = "application/json" - response = self._client.invoke_model(body=body, modelId=self.model, accept=accept, contentType=contentType) - completion = json.loads(response.get("body").read()) - except RuntimeError as err: - raise LLMConfigurationError("Could not get response from Bedrock API") from err - - return self._parse_completion(completion, caller_id) - - -class ClaudeBedrockClient(BaseBedrockClient): - def __init__( - self, - bedrock_runtime_client, - model: str = "anthropic.claude-3-sonnet-20240229-v1:0", - anthropic_version: str = "bedrock-2023-05-31", - ): - # only supporting claude 3 - if "claude-3" not in model: - raise LLMConfigurationError(f"Only claude-3 models are supported as of now, got {self.model}") - - super().__init__(bedrock_runtime_client, model) - self.anthropic_version = anthropic_version - - def _format_body( - self, - messages: Sequence[ChatMessage], - temperature: float = 1, - max_tokens: Optional[int] = 1000, - caller_id: Optional[str] = None, - seed: Optional[int] = None, - format=None, - ) -> Dict: - input_msg_prompt: List = [] - system_prompts = [] - - for msg in messages: - # System prompt is a specific parameter in Claude - if msg.role.lower() == "system": - system_prompts.append(msg.content) - continue - - # Only role user and assistant are allowed - role = msg.role.lower() - role = role if role in ["assistant", "user"] else "user" - - # Consecutive messages need to be grouped - last_message = None if len(input_msg_prompt) == 0 else input_msg_prompt[-1] - if last_message is not None and last_message["role"] == role: - last_message["content"].append({"type": "text", "text": msg.content}) - continue - - input_msg_prompt.append({"role": role, "content": [{"type": "text", "text": msg.content}]}) - - return json.dumps( - { - "anthropic_version": "bedrock-2023-05-31", - "max_tokens": max_tokens, - "temperature": temperature, - "system": "\n".join(system_prompts), - "messages": input_msg_prompt, - } - ) - - def _parse_completion(self, completion, caller_id: Optional[str] = None) -> ChatMessage: - self.logger.log_call( - prompt_tokens=completion["usage"]["input_tokens"], - sampled_tokens=completion["usage"]["output_tokens"], - model=self.model, - client_class=self.__class__.__name__, - caller_id=caller_id, - ) - - msg = completion["content"][0]["text"] - return ChatMessage(role="assistant", content=msg) - - -class LLamaBedrockClient(BaseBedrockClient): - def __init__(self, bedrock_runtime_client, model: str = "meta.llama3-8b-instruct-v1:0"): - # only supporting llama - if "llama" not in model: - raise LLMConfigurationError(f"Only Llama models are supported as of now, got {self.model}") - - super().__init__(bedrock_runtime_client, model) - - def _format_body( - self, - messages: Sequence[ChatMessage], - temperature: float = 1, - max_tokens: Optional[int] = 1000, - caller_id: Optional[str] = None, - seed: Optional[int] = None, - format=None, - ) -> Dict: - # Create the messages format needed for llama bedrock specifically - prompts = [] - for msg in messages: - prompts.append(f"# {msg.role}:\n{msg.content}\n") - - # create the json body to send to the API - messages = "\n".join(prompts) - return json.dumps( - { - "max_gen_len": max_tokens, - "temperature": temperature, - "prompt": f"{messages}\n# assistant:\n", - } - ) - - def _parse_completion(self, completion, caller_id: Optional[str] = None) -> ChatMessage: - self.logger.log_call( - prompt_tokens=completion["prompt_token_count"], - sampled_tokens=completion["generation_token_count"], - model=self.model, - client_class=self.__class__.__name__, - caller_id=caller_id, - ) - - msg = completion["generation"] - return ChatMessage(role="assistant", content=msg) diff --git a/giskard/llm/client/copilot.py b/giskard/llm/client/copilot.py index 34002841a6..a671d94e64 100644 --- a/giskard/llm/client/copilot.py +++ b/giskard/llm/client/copilot.py @@ -1,5 +1,4 @@ from typing import Optional, Sequence -from typing_extensions import deprecated from dataclasses import dataclass from logging import warning @@ -43,9 +42,6 @@ def _format_message(msg: ChatMessage) -> dict: return fmt_msg -@deprecated( - "GiskardCopilotClient is deprecated, check documentation to setup llm: https://docs.giskard.ai/en/latest/open_source/setting_up/index.html" -) class GiskardCopilotClient(OpenAIClient): def complete( self, diff --git a/giskard/llm/client/gemini.py b/giskard/llm/client/gemini.py deleted file mode 100644 index 8be07dd85e..0000000000 --- a/giskard/llm/client/gemini.py +++ /dev/null @@ -1,94 +0,0 @@ -from typing import Optional, Sequence -from typing_extensions import deprecated - -from logging import warning - -from ..config import LLMConfigurationError -from ..errors import LLMImportError -from . import LLMClient -from .base import ChatMessage - -try: - import google.generativeai as genai - from google.generativeai.types import ContentDict -except ImportError as err: - raise LLMImportError( - flavor="llm", - msg="To use Gemini models, please install the `genai` package with `pip install google-generativeai`", - ) from err - -AUTH_ERROR_MESSAGE = ( - "Could not get Response from Gemini API. Please make sure you have configured the API key by " - "setting GOOGLE_API_KEY in the environment." -) - - -def _format(messages: Sequence[ChatMessage]) -> Sequence[ContentDict]: - system_prompts = [] - content = [] - - for message in messages: - if message.role == "system": - system_prompts.append(message.content) - - if len(content) == 0: - content.append(ContentDict(role="model", parts=[])) - - content[0]["parts"].insert(0, f"# System:\n{message.content}") - - continue - - role = "model" if message.role == "assistant" else "user" - - # Consecutive messages need to be grouped - last_message = None if len(content) == 0 else content[-1] - if last_message is not None and last_message["role"] == role: - last_message["parts"].append(message.content) - continue - - content.append(ContentDict(role=role, parts=[message.content])) - - return content - - -@deprecated( - "GeminiClient is deprecated, check documentation to setup llm: https://docs.giskard.ai/en/latest/open_source/setting_up/index.html" -) -class GeminiClient(LLMClient): - def __init__(self, model: str = "gemini-pro", _client=None): - self.model = model - self._client = _client or genai.GenerativeModel(self.model) - - def complete( - self, - messages: Sequence[ChatMessage], - temperature: float = 1.0, - max_tokens: Optional[int] = None, - caller_id: Optional[str] = None, - seed: Optional[int] = None, - format=None, - ) -> ChatMessage: - if seed is not None: - warning("Unsupported seed, ignoring.") - - if format: - warning(f"Unsupported format '{format}', ignoring.") - - try: - completion = self._client.generate_content( - contents=_format(messages), - generation_config=genai.types.GenerationConfig(temperature=temperature, max_output_tokens=max_tokens), - ) - except RuntimeError as err: - raise LLMConfigurationError(AUTH_ERROR_MESSAGE) from err - - self.logger.log_call( - prompt_tokens=self._client.count_tokens([m.content for m in messages]).total_tokens, - sampled_tokens=self._client.count_tokens(completion.text).total_tokens, - model=self.model, - client_class=self.__class__.__name__, - caller_id=caller_id, - ) - - # Assuming the response structure is similar to the ChatMessage structure - return ChatMessage(role=completion.candidates[0].content.role, content=completion.text) diff --git a/giskard/llm/client/mistral.py b/giskard/llm/client/mistral.py deleted file mode 100644 index 2aa4e6bcab..0000000000 --- a/giskard/llm/client/mistral.py +++ /dev/null @@ -1,70 +0,0 @@ -from typing import Optional, Sequence -from typing_extensions import deprecated - -import os -from dataclasses import asdict -from logging import warning - -from ..config import LLMConfigurationError -from ..errors import LLMImportError -from . import LLMClient -from .base import ChatMessage - -try: - from mistralai import Mistral -except ImportError as err: - raise LLMImportError( - flavor="llm", msg="To use Mistral models, please install the `mistralai` package with `pip install mistralai`" - ) from err - - -@deprecated( - "MistralClient is deprecated, check documentation to setup llm: https://docs.giskard.ai/en/latest/open_source/setting_up/index.html" -) -class MistralClient(LLMClient): - def __init__(self, model: str = "mistral-large-latest", client: Mistral = None): - self.model = model - self._client = client or Mistral(api_key=os.getenv("MISTRAL_API_KEY", "")) - - def complete( - self, - messages: Sequence[ChatMessage], - temperature: float = 1.0, - max_tokens: Optional[int] = None, - caller_id: Optional[str] = None, - seed: Optional[int] = None, - format: str = None, - ) -> ChatMessage: - extra_params = dict() - if seed is not None: - extra_params["random_seed"] = seed - - if format not in (None, "json", "json_object") and "large" not in self.model: - warning(f"Unsupported format '{format}', ignoring.") - format = None - - if format == "json" or format == "json_object": - extra_params["response_format"] = {"type": "json_object"} - - try: - completion = self._client.chat.complete( - model=self.model, - messages=[asdict(m) for m in messages], - temperature=temperature, - max_tokens=max_tokens, - **extra_params, - ) - except RuntimeError as err: - raise LLMConfigurationError("Could not get response from Mistral API") from err - - self.logger.log_call( - prompt_tokens=completion.usage.prompt_tokens, - sampled_tokens=completion.usage.completion_tokens, - model=self.model, - client_class=self.__class__.__name__, - caller_id=caller_id, - ) - - msg = completion.choices[0].message - - return ChatMessage(role=msg.role, content=msg.content) diff --git a/giskard/llm/client/openai.py b/giskard/llm/client/openai.py index 4b85ea940b..0b5d138e30 100644 --- a/giskard/llm/client/openai.py +++ b/giskard/llm/client/openai.py @@ -1,5 +1,4 @@ from typing import Optional, Sequence -from typing_extensions import deprecated from dataclasses import asdict from logging import warning @@ -30,9 +29,6 @@ def _supports_json_format(model: str) -> bool: return False -@deprecated( - "OpenAIClient is deprecated, check documentation to setup llm: https://docs.giskard.ai/en/latest/open_source/setting_up/index.html" -) class OpenAIClient(LLMClient): def __init__( self, model: str = "gpt-4-turbo-preview", client: openai.Client = None, json_mode: Optional[bool] = None diff --git a/tests/llm/test_llm_client.py b/tests/llm/test_llm_client.py index 428c0d4dbe..b1f67e3d9c 100644 --- a/tests/llm/test_llm_client.py +++ b/tests/llm/test_llm_client.py @@ -1,16 +1,12 @@ -import json -from unittest.mock import MagicMock, Mock +from unittest.mock import Mock, patch import pydantic -import pytest -from google.generativeai.types import ContentDict from openai.types import CompletionUsage from openai.types.chat import ChatCompletion, ChatCompletionMessage from openai.types.chat.chat_completion import Choice from giskard.llm.client import ChatMessage -from giskard.llm.client.bedrock import ClaudeBedrockClient -from giskard.llm.client.gemini import GeminiClient +from giskard.llm.client.litellm import LiteLLMClient from giskard.llm.client.openai import OpenAIClient PYDANTIC_V2 = pydantic.__version__.startswith("2.") @@ -48,110 +44,21 @@ def test_llm_complete_message(): assert res.content == "This is a test!" -@pytest.mark.skipif(not PYDANTIC_V2, reason="Mistral raise an error with pydantic < 2") -def test_mistral_client(): - from mistralai.models import ChatCompletionChoice, ChatCompletionResponse, UsageInfo - - demo_response = ChatCompletionResponse( - id="2d62260a7a354e02922a4f6ad36930d3", - object="chat.completion", - created=1630000000, - model="mistral-large", - choices=[ - ChatCompletionChoice( - index=0, - message={"role": "assistant", "content": "This is a test!"}, - finish_reason="stop", - ) - ], - usage=UsageInfo(prompt_tokens=9, total_tokens=89, completion_tokens=80), - ) - +@patch("litellm.completion") +def test_litellm_client(completion): + completion.return_value = DEMO_OPENAI_RESPONSE client = Mock() - client.chat.complete.return_value = demo_response - - from giskard.llm.client.mistral import MistralClient - - res = MistralClient(model="mistral-large", client=client).complete( - [ChatMessage(role="user", content="Hello")], temperature=0.11, max_tokens=12 - ) - - client.chat.complete.assert_called_once() - assert client.chat.complete.call_args[1]["messages"] == [{"role": "user", "content": "Hello"}] - assert client.chat.complete.call_args[1]["temperature"] == 0.11 - assert client.chat.complete.call_args[1]["max_tokens"] == 12 - - assert isinstance(res, ChatMessage) - assert res.content == "This is a test!" - - -def test_claude_bedrock_client(): - # Mock the bedrock_runtime_client - bedrock_runtime_client = Mock() - bedrock_runtime_client.invoke_model = MagicMock( - return_value={ - "body": MagicMock( - read=MagicMock( - return_value=json.dumps( - { - "id": "chatcmpl-abc123", - "model": "anthropic.claude-3-sonnet-20240229-v1:0", - "type": "message", - "role": "assistant", - "content": [{"type": "text", "text": "This is a test!"}], - "stop_reason": "end_turn", - "usage": { - "input_tokens": 9, - "output_tokens": 89, - }, - } - ) - ) - ) - } - ) - - # Initialize the ClaudeBedrockClient with the mocked bedrock_runtime_client - client = ClaudeBedrockClient( - bedrock_runtime_client, model="anthropic.claude-3-sonnet-20240229-v1:0", anthropic_version="bedrock-2023-05-31" - ) - - # Call the complete method - res = client.complete([ChatMessage(role="user", content="Hello")], temperature=0.11, max_tokens=12) - - # Assert that the invoke_model method was called with the correct arguments - bedrock_runtime_client.invoke_model.assert_called_once() - - # Assert that the response is a ChatMessage and has the correct content - assert isinstance(res, ChatMessage) - assert res.content == "This is a test!" - - -def test_gemini_client(): - # Mock the Gemini client - gemini_api_client = Mock() - gemini_api_client.generate_content = MagicMock( - return_value=Mock(text="This is a test!", candidates=[Mock(content=Mock(role="assistant"))]) - ) - gemini_api_client.count_tokens = MagicMock( - side_effect=lambda text: Mock( - total_tokens=sum(len(t.split()) for t in text) if isinstance(text, list) else len(text.split()) - ) + client.chat.completions.create.return_value = DEMO_OPENAI_RESPONSE + res = LiteLLMClient("gpt-4o", {"api_key": "api_key"}).complete( + [ChatMessage(role="system", content="Hello")], temperature=0.11, max_tokens=1 ) - # Initialize the GeminiClient with the mocked gemini_api_client - client = GeminiClient(model="gemini-pro", _client=gemini_api_client) - - # Call the complete method - res = client.complete([ChatMessage(role="user", content="Hello")], temperature=0.11, max_tokens=12) - print(res) - - # Assert that the generate_content method was called with the correct arguments - gemini_api_client.generate_content.assert_called_once() - assert gemini_api_client.generate_content.call_args[1]["contents"] == ([ContentDict(role="user", parts=["Hello"])]) - assert gemini_api_client.generate_content.call_args[1]["generation_config"].temperature == 0.11 - assert gemini_api_client.generate_content.call_args[1]["generation_config"].max_output_tokens == 12 + completion.assert_called_once() + assert completion.call_args[1]["messages"] == [{"role": "system", "content": "Hello"}] + assert completion.call_args[1]["temperature"] == 0.11 + assert completion.call_args[1]["max_tokens"] == 1 + assert completion.call_args[1]["api_key"] == "api_key" + assert completion.call_args[1]["model"] == "gpt-4o" - # Assert that the response is a ChatMessage and has the correct content assert isinstance(res, ChatMessage) assert res.content == "This is a test!" diff --git a/tests/utils/test_logging_utils.py b/tests/utils/test_logging_utils.py index af54944bed..4b3028dfd0 100644 --- a/tests/utils/test_logging_utils.py +++ b/tests/utils/test_logging_utils.py @@ -1,7 +1,6 @@ import logging import giskard -from giskard.llm.client import openai def test_giskard_log_level(): @@ -12,7 +11,7 @@ def test_giskard_log_level(): def test_other_package_log_level_unset(): assert ( - logging.getLogger(openai.__name__).level == logging.NOTSET + logging.getLogger(giskard.llm.client.__name__).level == logging.NOTSET ), "Non giskard package log level should't be touched by giskard (NOTSET)"