Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: openai.Model.list() #499

Merged
merged 9 commits into from
Oct 14, 2023
9 changes: 9 additions & 0 deletions openllm-python/src/openllm/_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import _service_vars as svars
import orjson

from fastapi import FastAPI
aarnphm marked this conversation as resolved.
Show resolved Hide resolved
from starlette.applications import Starlette
from starlette.responses import JSONResponse
from starlette.routing import Route
Expand Down Expand Up @@ -62,6 +63,7 @@ async def generate_stream_v1(input_dict: dict[str, t.Any]) -> t.AsyncGenerator[s
input=bentoml.io.JSON.from_sample(openllm.utils.bentoml_cattr.unstructure(openllm.openai.CompletionRequest(prompt='What is 1+1?', model=runner.llm_type))),
output=bentoml.io.Text())
async def completion_v1(input_dict: dict[str, t.Any], ctx: bentoml.Context) -> str | t.AsyncGenerator[str, None]:
if input_dict.get('model', None) is not model: raise ValueError(f"Model '{input_dict['model']}' is not supported. Run openai.Model.list() to see all supported models.")
prompt = input_dict.pop('prompt', None)
if prompt is None: raise ValueError("'prompt' should not be None.")
stream = input_dict.pop('stream', False)
Expand Down Expand Up @@ -118,6 +120,7 @@ async def stream_response_generator(responses: t.AsyncGenerator[str, None]) -> t
}], model=runner.llm_type))),
output=bentoml.io.Text())
async def chat_completion_v1(input_dict: dict[str, t.Any], ctx: bentoml.Context) -> str | t.AsyncGenerator[str, None]:
if input_dict.get('model', None) is not model: raise ValueError(f"Model '{input_dict['model']}' is not supported. Run openai.Model.list() to see all supported models.")
prompt = openllm.openai.messages_to_prompt(input_dict['messages'])
stream = input_dict.pop('stream', False)
config = {
Expand Down Expand Up @@ -164,6 +167,12 @@ async def stream_response_generator(responses: t.AsyncGenerator[str, None]) -> t
model=model) # TODO: logprobs, finish_reason and usage
)).decode('utf-8')

fastapi_app = FastAPI()
@fastapi_app.get('/v1/models')
def models_v1() -> t.List[dict[str, t.Any]]:
return [{'data': {'model': model, 'id': model_id}}]
svc.mount_asgi_app(fastapi_app)

@svc.api(route='/v1/metadata',
input=bentoml.io.Text(),
output=bentoml.io.JSON.from_sample({
Expand Down
Loading