Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fold function_resolver into model_server #103

Merged
merged 1 commit into from
Oct 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion arch/envoy.template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ static_resources:
- endpoint:
address:
socket_address:
address: function_resolver
address: model_server
port_value: 80
hostname: "arch_fc"
{% for _, cluster in arch_clusters.items() %}
Expand Down
1 change: 0 additions & 1 deletion config_generator/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,5 @@ COPY config_generator/requirements.txt .
RUN pip install -r requirements.txt
COPY config_generator/config_generator.py .
COPY arch/envoy.template.yaml .
COPY arch/katanemo-config.yaml .

CMD ["python", "config_generator.py"]
15 changes: 1 addition & 14 deletions demos/function_calling/docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,23 +41,10 @@ services:
volumes:
- ~/.cache/huggingface:/root/.cache/huggingface
- ./arch_config.yaml:/root/arch_config.yaml

function_resolver:
build:
context: ../../function_resolver
dockerfile: Dockerfile
ports:
- "18082:80"
healthcheck:
test: ["CMD", "curl" ,"http://localhost:80/healthz"]
interval: 5s
retries: 20
volumes:
- ~/.cache/huggingface:/root/.cache/huggingface
environment:
# use ollama endpoint that is hosted by host machine (no virtualization)
- OLLAMA_ENDPOINT=${OLLAMA_ENDPOINT:-host.docker.internal}
- OLLAMA_MODEL=Arch-Function-Calling-3B-Q4_K_M
# use ollama endpoint that is hosted by host machine (no virtualization)
# uncomment following line to use ollama endpoint that is hosted by docker
# - OLLAMA_ENDPOINT=ollama
# - OLLAMA_MODEL=Arch-Function-Calling-1.5B:Q4_K_M
Expand Down
16 changes: 0 additions & 16 deletions function_resolver/.vscode/launch.json

This file was deleted.

30 changes: 0 additions & 30 deletions function_resolver/Dockerfile

This file was deleted.

4 changes: 0 additions & 4 deletions function_resolver/requirements.txt

This file was deleted.

5 changes: 0 additions & 5 deletions function_resolver/test/test.sh

This file was deleted.

5 changes: 0 additions & 5 deletions function_resolver/test/test_envoy_cluster.sh

This file was deleted.

1 change: 0 additions & 1 deletion function_resolver/test/test_missing_param.sh

This file was deleted.

33 changes: 0 additions & 33 deletions function_resolver/test/test_payload.json

This file was deleted.

32 changes: 0 additions & 32 deletions function_resolver/test/test_payload_missing_param.json

This file was deleted.

29 changes: 0 additions & 29 deletions function_resolver/test/test_weather.json

This file was deleted.

4 changes: 0 additions & 4 deletions gateway.code-workspace
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,6 @@
"name": "model_server",
"path": "model_server"
},
{
"name": "function_resolver",
"path": "function_resolver"
},
{
"name": "chatbot_ui",
"path": "chatbot_ui"
Expand Down
5 changes: 2 additions & 3 deletions model_server/.vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,11 @@
"version": "0.2.0",
"configurations": [
{
"name": "embedding server",
"cwd": "${workspaceFolder}/app",
"name": "model server",
"type": "debugpy",
"request": "launch",
"module": "uvicorn",
"args": ["main:app","--reload", "--port", "8000"],
"args": ["app.main:app","--reload", "--port", "8000"],
}
]
}
4 changes: 2 additions & 2 deletions model_server/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ ENV NER_MODELS="urchade/gliner_large-v2.1"

COPY --from=builder /runtime /usr/local

COPY /app /app
COPY ./ /app
WORKDIR /app

RUN apt-get update && apt-get install -y \
Expand All @@ -45,4 +45,4 @@ RUN apt-get update && apt-get install -y \
# RUN python install.py && \
# find /root/.cache/torch/sentence_transformers/ -name onnx -exec rm -rf {} +

CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80"]
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "80"]
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import json
import random
from fastapi import FastAPI, Response
from arch_handler import ArchHandler
from bolt_handler import BoltHandler
from common import ChatMessage
from app.arch_fc.arch_handler import ArchHandler
from app.arch_fc.bolt_handler import BoltHandler
from app.arch_fc.common import ChatMessage
import logging
from openai import OpenAI
import os
Expand All @@ -21,7 +21,7 @@
logger.info(f"using model: {ollama_model}")
logger.info(f"using ollama endpoint: {ollama_endpoint}")

app = FastAPI()
# app = FastAPI()

client = OpenAI(
base_url='http://{}:11434/v1/'.format(ollama_endpoint),
Expand All @@ -30,14 +30,7 @@
api_key='ollama',
)

@app.get("/healthz")
async def healthz():
return {
"status": "ok"
}


@app.post("/v1/chat/completions")
async def chat_completion(req: ChatMessage, res: Response):
logger.info("starting request")
tools_encoded = handler._format_system(req.tools)
Expand Down
File renamed without changes.
File renamed without changes.
4 changes: 2 additions & 2 deletions model_server/app/load_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
from gliner import GLiNER
from transformers import AutoTokenizer, pipeline
import sqlite3
from employee_data_generator import generate_employee_data
from network_data_generator import (
from app.employee_data_generator import generate_employee_data
from app.network_data_generator import (
generate_device_data,
generate_interface_stats_data,
generate_flow_data,
Expand Down
20 changes: 16 additions & 4 deletions model_server/app/main.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,20 @@
import os
from fastapi import FastAPI, Response, HTTPException
from pydantic import BaseModel
from load_models import (
from app.load_models import (
load_ner_models,
load_transformers,
load_guard_model,
load_zero_shot_models,
)
from utils import GuardHandler, split_text_into_chunks
from app.utils import GuardHandler, split_text_into_chunks
import torch
import yaml
import string
import time
import logging
from app.arch_fc.arch_fc import chat_completion as arch_fc_chat_completion, ChatMessage
import os.path

logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
Expand All @@ -22,8 +25,11 @@
ner_models = load_ner_models()
zero_shot_models = load_zero_shot_models()

with open("/root/arch_config.yaml", "r") as file:
config = yaml.safe_load(file)
config = {}

if os.path.exists("/root/arch_config.yaml"):
with open("/root/arch_config.yaml", "r") as file:
config = yaml.safe_load(file)
with open("guard_model_config.yaml") as f:
guard_model_config = yaml.safe_load(f)

Expand Down Expand Up @@ -231,6 +237,12 @@ async def zeroshot(req: ZeroShotRequest, res: Response):
}


@app.post("/v1/chat/completions")
async def chat_completion(req: ChatMessage, res: Response):
result = await arch_fc_chat_completion(req, res)
return result


'''
*****
Adding new functions to test the usecases - Sampreeth
Expand Down
3 changes: 3 additions & 0 deletions model_server/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,6 @@ openvino
psutil
pandas
dateparser
openai
pandas
tf-keras
Loading