diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ad59fb239..8297324dc 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,7 +8,7 @@ default_language_version: python: python3.11 # NOTE: sync with .python-version-default repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: "v0.6.7" + rev: "v0.6.8" hooks: - id: ruff alias: r diff --git a/src/openllm/__main__.py b/src/openllm/__main__.py index 30f96582d..74029accb 100644 --- a/src/openllm/__main__.py +++ b/src/openllm/__main__.py @@ -9,14 +9,12 @@ import questionary import typer -from openllm.accelerator_spec import (DeploymentTarget, can_run, - get_local_machine_spec) +from openllm.accelerator_spec import DeploymentTarget, can_run, get_local_machine_spec from openllm.analytic import DO_NOT_TRACK, OpenLLMTyper from openllm.clean import app as clean_app from openllm.cloud import deploy as cloud_deploy from openllm.cloud import ensure_cloud_context, get_cloud_machine_spec -from openllm.common import (CHECKED, INTERACTIVE, VERBOSE_LEVEL, BentoInfo, - output) +from openllm.common import CHECKED, INTERACTIVE, VERBOSE_LEVEL, BentoInfo, output from openllm.local import run as local_run from openllm.local import serve as local_serve from openllm.model import app as model_app @@ -24,42 +22,35 @@ from openllm.repo import app as repo_app app = OpenLLMTyper( - help="`openllm hello` to get started. " - "OpenLLM is a CLI tool to manage and deploy open source LLMs and" - " get an OpenAI API compatible chat server in seconds." + help='`openllm hello` to get started. ' + 'OpenLLM is a CLI tool to manage and deploy open source LLMs and' + ' get an OpenAI API compatible chat server in seconds.' ) -app.add_typer(repo_app, name="repo") -app.add_typer(model_app, name="model") -app.add_typer(clean_app, name="clean") +app.add_typer(repo_app, name='repo') +app.add_typer(model_app, name='model') +app.add_typer(clean_app, name='clean') def _select_bento_name(models: list[BentoInfo], target: DeploymentTarget): from tabulate import tabulate options = [] - model_infos = [ - (model.repo.name, model.name, can_run(model, target)) for model in models - ] + model_infos = [(model.repo.name, model.name, can_run(model, target)) for model in models] model_name_groups = defaultdict(lambda: 0.0) for repo, name, score in model_infos: model_name_groups[repo, name] += score - table_data = [ - (name, repo, CHECKED if score > 0 else "") - for (repo, name), score in model_name_groups.items() - ] + table_data = [(name, repo, CHECKED if score > 0 else '') for (repo, name), score in model_name_groups.items()] if not table_data: - output("No model found", style="red") + output('No model found', style='red') raise typer.Exit(1) - table = tabulate(table_data, headers=["model", "repo", "locally runnable"]).split( - "\n" - ) - headers = f"{table[0]}\n {table[1]}" + table = tabulate(table_data, headers=['model', 'repo', 'locally runnable']).split('\n') + headers = f'{table[0]}\n {table[1]}' options.append(questionary.Separator(headers)) for table_data, table_line in zip(table_data, table[2:]): options.append(questionary.Choice(table_line, value=table_data[:2])) - selected = questionary.select("Select a model", options).ask() + selected = questionary.select('Select a model', options).ask() if selected is None: raise typer.Exit(1) return selected @@ -69,26 +60,24 @@ def _select_bento_version(models, target, bento_name, repo): from tabulate import tabulate model_infos = [ - [model, can_run(model, target)] - for model in models - if model.name == bento_name and model.repo.name == repo + [model, can_run(model, target)] for model in models if model.name == bento_name and model.repo.name == repo ] table_data = [ - [model.tag, CHECKED if score > 0 else ""] + [model.tag, CHECKED if score > 0 else ''] for model, score in model_infos if model.name == bento_name and model.repo.name == repo ] if not table_data: - output(f"No model found for {bento_name} in {repo}", style="red") + output(f'No model found for {bento_name} in {repo}', style='red') raise typer.Exit(1) - table = tabulate(table_data, headers=["version", "locally runnable"]).split("\n") + table = tabulate(table_data, headers=['version', 'locally runnable']).split('\n') options = [] - options.append(questionary.Separator(f"{table[0]}\n {table[1]}")) + options.append(questionary.Separator(f'{table[0]}\n {table[1]}')) for table_data, table_line in zip(model_infos, table[2:]): options.append(questionary.Choice(table_line, value=table_data)) - selected = questionary.select("Select a version", options).ask() + selected = questionary.select('Select a version', options).ask() if selected is None: raise typer.Exit(1) return selected @@ -100,7 +89,7 @@ def _select_target(bento, targets): options = [] targets.sort(key=lambda x: can_run(bento, x), reverse=True) if not targets: - output("No available instance type, check your bentocloud account", style="red") + output('No available instance type, check your bentocloud account', style='red') raise typer.Exit(1) table = tabulate( @@ -108,18 +97,18 @@ def _select_target(bento, targets): [ target.name, target.accelerators_repr, - f"${target.price}", - CHECKED if can_run(bento, target) else "insufficient res.", + f'${target.price}', + CHECKED if can_run(bento, target) else 'insufficient res.', ] for target in targets ], - headers=["instance type", "accelerator", "price/hr", "deployable"], - ).split("\n") - options.append(questionary.Separator(f"{table[0]}\n {table[1]}")) + headers=['instance type', 'accelerator', 'price/hr', 'deployable'], + ).split('\n') + options.append(questionary.Separator(f'{table[0]}\n {table[1]}')) for target, line in zip(targets, table[2:]): - options.append(questionary.Choice(f"{line}", value=target)) - selected = questionary.select("Select an instance type", options).ask() + options.append(questionary.Choice(f'{line}', value=target)) + selected = questionary.select('Select an instance type', options).ask() if selected is None: raise typer.Exit(1) return selected @@ -128,102 +117,84 @@ def _select_target(bento, targets): def _select_action(bento: BentoInfo, score): if score > 0: options = [ - questionary.Separator("Available actions"), - questionary.Choice( - "0. Run the model in terminal", value="run", shortcut_key="0" - ), - questionary.Separator(f" $ openllm run {bento}"), - questionary.Separator(" "), + questionary.Separator('Available actions'), + questionary.Choice('0. Run the model in terminal', value='run', shortcut_key='0'), + questionary.Separator(f' $ openllm run {bento}'), + questionary.Separator(' '), + questionary.Choice('1. Serve the model locally and get a chat server', value='serve', shortcut_key='1'), + questionary.Separator(f' $ openllm serve {bento}'), + questionary.Separator(' '), questionary.Choice( - "1. Serve the model locally and get a chat server", - value="serve", - shortcut_key="1", + '2. Deploy the model to bentocloud and get a scalable chat server', value='deploy', shortcut_key='2' ), - questionary.Separator(f" $ openllm serve {bento}"), - questionary.Separator(" "), - questionary.Choice( - "2. Deploy the model to bentocloud and get a scalable chat server", - value="deploy", - shortcut_key="2", - ), - questionary.Separator(f" $ openllm deploy {bento}"), + questionary.Separator(f' $ openllm deploy {bento}'), ] else: options = [ - questionary.Separator("Available actions"), + questionary.Separator('Available actions'), questionary.Choice( - "0. Run the model in terminal", - value="run", - disabled="insufficient res.", - shortcut_key="0", + '0. Run the model in terminal', value='run', disabled='insufficient res.', shortcut_key='0' ), - questionary.Separator(f" $ openllm run {bento}"), - questionary.Separator(" "), + questionary.Separator(f' $ openllm run {bento}'), + questionary.Separator(' '), questionary.Choice( - "1. Serve the model locally and get a chat server", - value="serve", - disabled="insufficient res.", - shortcut_key="1", + '1. Serve the model locally and get a chat server', + value='serve', + disabled='insufficient res.', + shortcut_key='1', ), - questionary.Separator(f" $ openllm serve {bento}"), - questionary.Separator(" "), + questionary.Separator(f' $ openllm serve {bento}'), + questionary.Separator(' '), questionary.Choice( - "2. Deploy the model to bentocloud and get a scalable chat server", - value="deploy", - shortcut_key="2", + '2. Deploy the model to bentocloud and get a scalable chat server', value='deploy', shortcut_key='2' ), - questionary.Separator(f" $ openllm deploy {bento}"), + questionary.Separator(f' $ openllm deploy {bento}'), ] - action = questionary.select("Select an action", options).ask() + action = questionary.select('Select an action', options).ask() if action is None: raise typer.Exit(1) - if action == "run": + if action == 'run': try: port = random.randint(30000, 40000) local_run(bento, port=port) finally: - output("\nUse this command to run the action again:", style="green") - output(f" $ openllm run {bento}", style="orange") - elif action == "serve": + output('\nUse this command to run the action again:', style='green') + output(f' $ openllm run {bento}', style='orange') + elif action == 'serve': try: local_serve(bento) finally: - output("\nUse this command to run the action again:", style="green") - output(f" $ openllm serve {bento}", style="orange") - elif action == "deploy": + output('\nUse this command to run the action again:', style='green') + output(f' $ openllm serve {bento}', style='orange') + elif action == 'deploy': ensure_cloud_context() targets = get_cloud_machine_spec() target = _select_target(bento, targets) try: cloud_deploy(bento, target) finally: - output("\nUse this command to run the action again:", style="green") - output( - f" $ openllm deploy {bento} --instance-type {target.name}", - style="orange", - ) + output('\nUse this command to run the action again:', style='green') + output(f' $ openllm deploy {bento} --instance-type {target.name}', style='orange') -@app.command(help="get started interactively") +@app.command(help='get started interactively') def hello(): INTERACTIVE.set(True) # VERBOSE_LEVEL.set(20) target = get_local_machine_spec() - output(f" Detected Platform: {target.platform}", style="green") + output(f' Detected Platform: {target.platform}', style='green') if target.accelerators: - output(" Detected Accelerators: ", style="green") + output(' Detected Accelerators: ', style='green') for a in target.accelerators: - output(f" - {a.model} {a.memory_size}GB", style="green") + output(f' - {a.model} {a.memory_size}GB', style='green') else: - output(" Detected Accelerators: None", style="yellow") + output(' Detected Accelerators: None', style='yellow') models = list_bento() if not models: - output( - "No model found, you probably need to update the model repo:", style="red" - ) - output(" $ openllm repo update", style="orange") + output('No model found, you probably need to update the model repo:', style='red') + output(' $ openllm repo update', style='orange') raise typer.Exit(1) bento_name, repo = _select_bento_name(models, target) @@ -231,12 +202,9 @@ def hello(): _select_action(bento, score) -@app.command(help="start an OpenAI API compatible chat server and chat in browser") +@app.command(help='start an OpenAI API compatible chat server and chat in browser') def serve( - model: Annotated[str, typer.Argument()] = "", - repo: Optional[str] = None, - port: int = 3000, - verbose: bool = False, + model: Annotated[str, typer.Argument()] = '', repo: Optional[str] = None, port: int = 3000, verbose: bool = False ): if verbose: VERBOSE_LEVEL.set(20) @@ -245,9 +213,9 @@ def serve( local_serve(bento, port=port) -@app.command(help="run the model and chat in terminal") +@app.command(help='run the model and chat in terminal') def run( - model: Annotated[str, typer.Argument()] = "", + model: Annotated[str, typer.Argument()] = '', repo: Optional[str] = None, port: Optional[int] = None, timeout: int = 600, @@ -262,11 +230,9 @@ def run( local_run(bento, port=port, timeout=timeout) -@app.command( - help="deploy an production-ready OpenAI API compatible chat server to bentocloud ($100 free credit)" -) +@app.command(help='deploy an production-ready OpenAI API compatible chat server to bentocloud ($100 free credit)') def deploy( - model: Annotated[str, typer.Argument()] = "", + model: Annotated[str, typer.Argument()] = '', instance_type: Optional[str] = None, repo: Optional[str] = None, verbose: bool = False, @@ -281,10 +247,10 @@ def deploy( targets = filter(lambda x: can_run(bento, x) > 0, targets) targets = sorted(targets, key=lambda x: can_run(bento, x), reverse=True) if not targets: - output("No available instance type, check your bentocloud account", style="red") + output('No available instance type, check your bentocloud account', style='red') raise typer.Exit(1) target = targets[0] - output(f"Recommended instance type: {target.name}", style="green") + output(f'Recommended instance type: {target.name}', style='green') cloud_deploy(bento, target) @@ -292,12 +258,9 @@ def deploy( def typer_callback( verbose: int = 0, do_not_track: bool = typer.Option( - False, - "--do-not-track", - help="Whether to disable usage tracking", - envvar=DO_NOT_TRACK, + False, '--do-not-track', help='Whether to disable usage tracking', envvar=DO_NOT_TRACK ), - version: bool = typer.Option(False, "--version", "-v", help="Show version"), + version: bool = typer.Option(False, '--version', '-v', help='Show version'), ): if verbose: VERBOSE_LEVEL.set(verbose) @@ -310,5 +273,5 @@ def typer_callback( os.environ[DO_NOT_TRACK] = str(True) -if __name__ == "__main__": +if __name__ == '__main__': app() diff --git a/src/openllm/accelerator_spec.py b/src/openllm/accelerator_spec.py index 5394f88ae..a6cfdae45 100644 --- a/src/openllm/accelerator_spec.py +++ b/src/openllm/accelerator_spec.py @@ -21,14 +21,14 @@ def __eq__(self, other): return self.memory_size == other.memory_size def __repr__(self): - return f"{self.model}({self.memory_size}GB)" + return f'{self.model}({self.memory_size}GB)' class Resource(SimpleNamespace): cpu: int = 0 memory: float gpu: int = 0 - gpu_type: str = "" + gpu_type: str = '' def __hash__(self): # type: ignore return hash((self.cpu, self.memory, self.gpu, self.gpu_type)) @@ -38,49 +38,53 @@ def __bool__(self): ACCELERATOR_SPEC_DICT: dict[str, dict] = { - "nvidia-gtx-1650": {"model": "GTX 1650", "memory_size": 4.0}, - "nvidia-gtx-1060": {"model": "GTX 1060", "memory_size": 6.0}, - "nvidia-gtx-1080-ti": {"model": "GTX 1080 Ti", "memory_size": 11.0}, - "nvidia-rtx-3060": {"model": "RTX 3060", "memory_size": 12.0}, - "nvidia-rtx-3060-ti": {"model": "RTX 3060 Ti", "memory_size": 8.0}, - "nvidia-rtx-3070-ti": {"model": "RTX 3070 Ti", "memory_size": 8.0}, - "nvidia-rtx-3080": {"model": "RTX 3080", "memory_size": 10.0}, - "nvidia-rtx-3080-ti": {"model": "RTX 3080 Ti", "memory_size": 12.0}, - "nvidia-rtx-3090": {"model": "RTX 3090", "memory_size": 24.0}, - "nvidia-rtx-4070-ti": {"model": "RTX 4070 Ti", "memory_size": 12.0}, - "nvidia-tesla-p4": {"model": "P4", "memory_size": 8.0}, - "nvidia-tesla-p100": {"model": "P100", "memory_size": 16.0}, - "nvidia-tesla-k80": {"model": "K80", "memory_size": 12.0}, - "nvidia-tesla-t4": {"model": "T4", "memory_size": 16.0}, - "nvidia-tesla-v100": {"model": "V100", "memory_size": 16.0}, - "nvidia-l4": {"model": "L4", "memory_size": 24.0}, - "nvidia-tesla-l4": {"model": "L4", "memory_size": 24.0}, - "nvidia-tesla-a10g": {"model": "A10G", "memory_size": 24.0}, - "nvidia-a100-80g": {"model": "A100", "memory_size": 80.0}, - "nvidia-a100-80gb": {"model": "A100", "memory_size": 80.0}, - "nvidia-tesla-a100": {"model": "A100", "memory_size": 40.0}, + 'nvidia-gtx-1650': {'model': 'GTX 1650', 'memory_size': 4.0}, + 'nvidia-gtx-1060': {'model': 'GTX 1060', 'memory_size': 6.0}, + 'nvidia-gtx-1080-ti': {'model': 'GTX 1080 Ti', 'memory_size': 11.0}, + 'nvidia-rtx-3060': {'model': 'RTX 3060', 'memory_size': 12.0}, + 'nvidia-rtx-3060-ti': {'model': 'RTX 3060 Ti', 'memory_size': 8.0}, + 'nvidia-rtx-3070-ti': {'model': 'RTX 3070 Ti', 'memory_size': 8.0}, + 'nvidia-rtx-3080': {'model': 'RTX 3080', 'memory_size': 10.0}, + 'nvidia-rtx-3080-ti': {'model': 'RTX 3080 Ti', 'memory_size': 12.0}, + 'nvidia-rtx-3090': {'model': 'RTX 3090', 'memory_size': 24.0}, + 'nvidia-rtx-4070-ti': {'model': 'RTX 4070 Ti', 'memory_size': 12.0}, + 'nvidia-tesla-p4': {'model': 'P4', 'memory_size': 8.0}, + 'nvidia-tesla-p100': {'model': 'P100', 'memory_size': 16.0}, + 'nvidia-tesla-k80': {'model': 'K80', 'memory_size': 12.0}, + 'nvidia-tesla-t4': {'model': 'T4', 'memory_size': 16.0}, + 'nvidia-tesla-v100': {'model': 'V100', 'memory_size': 16.0}, + 'nvidia-l4': {'model': 'L4', 'memory_size': 24.0}, + 'nvidia-tesla-l4': {'model': 'L4', 'memory_size': 24.0}, + 'nvidia-tesla-a10g': {'model': 'A10G', 'memory_size': 24.0}, + 'nvidia-a100-80g': {'model': 'A100', 'memory_size': 80.0}, + 'nvidia-a100-80gb': {'model': 'A100', 'memory_size': 80.0}, + 'nvidia-tesla-a100': {'model': 'A100', 'memory_size': 40.0}, } -ACCELERATOR_SPECS: dict[str, Accelerator] = { - key: Accelerator(**value) for key, value in ACCELERATOR_SPEC_DICT.items() -} +ACCELERATOR_SPECS: dict[str, Accelerator] = {key: Accelerator(**value) for key, value in ACCELERATOR_SPEC_DICT.items()} @functools.lru_cache def get_local_machine_spec(): if psutil.MACOS: - return DeploymentTarget(accelerators=[], source="local", platform="macos") + return DeploymentTarget(accelerators=[], source='local', platform='macos') if psutil.WINDOWS: - platform = "windows" + platform = 'windows' elif psutil.LINUX: - platform = "linux" + platform = 'linux' else: - raise NotImplementedError("Unsupported platform") - - from pynvml import (nvmlDeviceGetCount, nvmlDeviceGetCudaComputeCapability, - nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo, - nvmlDeviceGetName, nvmlInit, nvmlShutdown) + raise NotImplementedError('Unsupported platform') + + from pynvml import ( + nvmlDeviceGetCount, + nvmlDeviceGetCudaComputeCapability, + nvmlDeviceGetHandleByIndex, + nvmlDeviceGetMemoryInfo, + nvmlDeviceGetName, + nvmlInit, + nvmlShutdown, + ) try: nvmlInit() @@ -90,48 +94,37 @@ def get_local_machine_spec(): handle = nvmlDeviceGetHandleByIndex(i) name = nvmlDeviceGetName(handle) memory_info = nvmlDeviceGetMemoryInfo(handle) - accelerators.append( - Accelerator( - model=name, memory_size=math.ceil(int(memory_info.total) / 1024**3) - ) - ) + accelerators.append(Accelerator(model=name, memory_size=math.ceil(int(memory_info.total) / 1024**3))) compute_capability = nvmlDeviceGetCudaComputeCapability(handle) if compute_capability < (7, 5): output( - f"GPU {name} with compute capability {compute_capability} " - "may not be supported, 7.5 or higher is recommended. check " - "https://developer.nvidia.com/cuda-gpus for more information", - style="yellow", + f'GPU {name} with compute capability {compute_capability} ' + 'may not be supported, 7.5 or higher is recommended. check ' + 'https://developer.nvidia.com/cuda-gpus for more information', + style='yellow', ) nvmlShutdown() - return DeploymentTarget( - accelerators=accelerators, source="local", platform=platform - ) + return DeploymentTarget(accelerators=accelerators, source='local', platform=platform) except Exception as e: output( - "Failed to get local GPU info. Ensure nvidia driver is installed to enable local GPU deployment", - style="yellow", + 'Failed to get local GPU info. Ensure nvidia driver is installed to enable local GPU deployment', + style='yellow', ) - output(f"Error: {e}", style="red", level=20) - return DeploymentTarget(accelerators=[], source="local", platform=platform) + output(f'Error: {e}', style='red', level=20) + return DeploymentTarget(accelerators=[], source='local', platform=platform) @functools.lru_cache() -def can_run( - bento: typing.Union[Resource, BentoInfo], - target: typing.Optional[DeploymentTarget] = None, -) -> float: +def can_run(bento: typing.Union[Resource, BentoInfo], target: typing.Optional[DeploymentTarget] = None) -> float: """ Calculate if the bento can be deployed on the target. """ if target is None: target = get_local_machine_spec() - resource_spec = Resource( - **(bento.bento_yaml["services"][0]["config"].get("resources", {})) - ) - labels = bento.bento_yaml.get("labels", {}) - platforms = labels.get("platforms", "linux").split(",") + resource_spec = Resource(**(bento.bento_yaml['services'][0]['config'].get('resources', {}))) + labels = bento.bento_yaml.get('labels', {}) + platforms = labels.get('platforms', 'linux').split(',') if target.platform not in platforms: return 0.0 @@ -142,18 +135,10 @@ def can_run( if resource_spec.gpu > 0: required_gpu = ACCELERATOR_SPECS[resource_spec.gpu_type] - filtered_accelerators = [ - ac - for ac in target.accelerators - if ac.memory_size >= required_gpu.memory_size - ] + filtered_accelerators = [ac for ac in target.accelerators if ac.memory_size >= required_gpu.memory_size] if resource_spec.gpu > len(filtered_accelerators): return 0.0 - return ( - required_gpu.memory_size - * resource_spec.gpu - / sum(ac.memory_size for ac in target.accelerators) - ) + return required_gpu.memory_size * resource_spec.gpu / sum(ac.memory_size for ac in target.accelerators) if target.accelerators: return 0.01 / sum(ac.memory_size for ac in target.accelerators) return 1.0 diff --git a/src/openllm/analytic.py b/src/openllm/analytic.py index 716095eba..c5b898b3f 100644 --- a/src/openllm/analytic.py +++ b/src/openllm/analytic.py @@ -12,16 +12,16 @@ import typer import typer.core -DO_NOT_TRACK = "BENTOML_DO_NOT_TRACK" +DO_NOT_TRACK = 'BENTOML_DO_NOT_TRACK' class EventMeta(ABC): @property def event_name(self): # camel case to snake case - event_name = re.sub(r"(? typing.Iterable[str]: # type: igno class OpenLLMTyper(typer.Typer): def __init__(self, *args: typing.Any, **kwargs: typing.Any): - no_args_is_help = kwargs.pop("no_args_is_help", True) - context_settings = kwargs.pop("context_settings", {}) - if "help_option_names" not in context_settings: - context_settings["help_option_names"] = ("-h", "--help") - if "max_content_width" not in context_settings: - context_settings["max_content_width"] = int( - os.environ.get("COLUMNS", str(120)) - ) - klass = kwargs.pop("cls", OrderedCommands) + no_args_is_help = kwargs.pop('no_args_is_help', True) + context_settings = kwargs.pop('context_settings', {}) + if 'help_option_names' not in context_settings: + context_settings['help_option_names'] = ('-h', '--help') + if 'max_content_width' not in context_settings: + context_settings['max_content_width'] = int(os.environ.get('COLUMNS', str(120))) + klass = kwargs.pop('cls', OrderedCommands) super().__init__( - *args, - cls=klass, - no_args_is_help=no_args_is_help, - context_settings=context_settings, - **kwargs, + *args, cls=klass, no_args_is_help=no_args_is_help, context_settings=context_settings, **kwargs ) # NOTE: Since OpenLLMTyper only wraps command to add analytics, the default type-hint for @app.command @@ -79,9 +73,7 @@ def decorator(f): def wrapped(ctx: click.Context, *args, **kwargs): from bentoml._internal.utils.analytics import track - do_not_track = ( - os.environ.get(DO_NOT_TRACK, str(False)).lower() == "true" - ) + do_not_track = os.environ.get(DO_NOT_TRACK, str(False)).lower() == 'true' # so we know that the root program is openllm command_name = ctx.info_name @@ -90,7 +82,7 @@ def wrapped(ctx: click.Context, *args, **kwargs): command_group = ctx.parent.info_name elif ctx.parent.info_name == ctx.find_root().info_name: # openllm run - command_group = "openllm" + command_group = 'openllm' if do_not_track: return f(*args, **kwargs) @@ -100,9 +92,7 @@ def wrapped(ctx: click.Context, *args, **kwargs): duration_in_ns = time.time_ns() - start_time track( OpenllmCliEvent( - cmd_group=command_group, - cmd_name=command_name, - duration_in_ms=duration_in_ns / 1e6, + cmd_group=command_group, cmd_name=command_name, duration_in_ms=duration_in_ns / 1e6 ) ) return return_value @@ -114,9 +104,7 @@ def wrapped(ctx: click.Context, *args, **kwargs): cmd_name=command_name, duration_in_ms=duration_in_ns / 1e6, error_type=type(e).__name__, - return_code=( - 2 if isinstance(e, KeyboardInterrupt) else 1 - ), + return_code=(2 if isinstance(e, KeyboardInterrupt) else 1), ) ) raise diff --git a/src/openllm/clean.py b/src/openllm/clean.py index ebe0afeeb..a5aa11ea7 100644 --- a/src/openllm/clean.py +++ b/src/openllm/clean.py @@ -5,20 +5,19 @@ import questionary from openllm.analytic import OpenLLMTyper -from openllm.common import (CONFIG_FILE, REPO_DIR, VENV_DIR, VERBOSE_LEVEL, - output) +from openllm.common import CONFIG_FILE, REPO_DIR, VENV_DIR, VERBOSE_LEVEL, output -app = OpenLLMTyper(help="clean up and release disk space used by OpenLLM") +app = OpenLLMTyper(help='clean up and release disk space used by OpenLLM') -HUGGINGFACE_CACHE = pathlib.Path.home() / ".cache" / "huggingface" / "hub" +HUGGINGFACE_CACHE = pathlib.Path.home() / '.cache' / 'huggingface' / 'hub' def _du(path: pathlib.Path) -> int: seen_paths = set() used_space = 0 - for f in path.rglob("*"): - if os.name == "nt": # Windows system + for f in path.rglob('*'): + if os.name == 'nt': # Windows system # On Windows, directly add file sizes without considering hard links used_space += f.stat().st_size else: @@ -30,52 +29,52 @@ def _du(path: pathlib.Path) -> int: return used_space -@app.command(help="Clean up all the cached models from huggingface") +@app.command(help='Clean up all the cached models from huggingface') def model_cache(verbose: bool = False): if verbose: VERBOSE_LEVEL.set(20) used_space = _du(HUGGINGFACE_CACHE) sure = questionary.confirm( - f"This will remove all models cached by Huggingface (~{used_space / 1024 / 1024:.2f}MB), are you sure?" + f'This will remove all models cached by Huggingface (~{used_space / 1024 / 1024:.2f}MB), are you sure?' ).ask() if not sure: return shutil.rmtree(HUGGINGFACE_CACHE, ignore_errors=True) - output("All models cached by Huggingface have been removed", style="green") + output('All models cached by Huggingface have been removed', style='green') -@app.command(help="Clean up all the virtual environments created by OpenLLM") +@app.command(help='Clean up all the virtual environments created by OpenLLM') def venvs(verbose: bool = False): if verbose: VERBOSE_LEVEL.set(20) used_space = _du(VENV_DIR) sure = questionary.confirm( - f"This will remove all virtual environments created by OpenLLM (~{used_space / 1024 / 1024:.2f}MB), are you sure?" + f'This will remove all virtual environments created by OpenLLM (~{used_space / 1024 / 1024:.2f}MB), are you sure?' ).ask() if not sure: return shutil.rmtree(VENV_DIR, ignore_errors=True) - output("All virtual environments have been removed", style="green") + output('All virtual environments have been removed', style='green') -@app.command(help="Clean up all the repositories cloned by OpenLLM") +@app.command(help='Clean up all the repositories cloned by OpenLLM') def repos(verbose: bool = False): if verbose: VERBOSE_LEVEL.set(20) shutil.rmtree(REPO_DIR, ignore_errors=True) - output("All repositories have been removed", style="green") + output('All repositories have been removed', style='green') -@app.command(help="Reset configurations to default") +@app.command(help='Reset configurations to default') def configs(verbose: bool = False): if verbose: VERBOSE_LEVEL.set(20) shutil.rmtree(CONFIG_FILE, ignore_errors=True) - output("All configurations have been reset", style="green") + output('All configurations have been reset', style='green') -@app.command(name="all", help="Clean up all above and bring OpenLLM to a fresh start") +@app.command(name='all', help='Clean up all above and bring OpenLLM to a fresh start') def all_cache(verbose: bool = False): if verbose: VERBOSE_LEVEL.set(20) diff --git a/src/openllm/cloud.py b/src/openllm/cloud.py index 8ad9869d8..83e8363bf 100644 --- a/src/openllm/cloud.py +++ b/src/openllm/cloud.py @@ -9,66 +9,61 @@ from openllm.accelerator_spec import ACCELERATOR_SPECS from openllm.analytic import OpenLLMTyper -from openllm.common import (INTERACTIVE, BentoInfo, DeploymentTarget, output, - run_command) +from openllm.common import INTERACTIVE, BentoInfo, DeploymentTarget, output, run_command app = OpenLLMTyper() def resolve_cloud_config() -> pathlib.Path: - env = os.environ.get("BENTOML_HOME") + env = os.environ.get('BENTOML_HOME') if env is not None: - return pathlib.Path(env) / ".yatai.yaml" - return pathlib.Path.home() / "bentoml" / ".yatai.yaml" + return pathlib.Path(env) / '.yatai.yaml' + return pathlib.Path.home() / 'bentoml' / '.yatai.yaml' def _get_deploy_cmd(bento: BentoInfo, target: typing.Optional[DeploymentTarget] = None): - cmd = ["bentoml", "deploy", bento.bentoml_tag] - env = {"BENTOML_HOME": f"{bento.repo.path}/bentoml"} + cmd = ['bentoml', 'deploy', bento.bentoml_tag] + env = {'BENTOML_HOME': f'{bento.repo.path}/bentoml'} - required_envs = bento.bento_yaml.get("envs", []) - required_env_names = [env["name"] for env in required_envs if "name" in env] + required_envs = bento.bento_yaml.get('envs', []) + required_env_names = [env['name'] for env in required_envs if 'name' in env] if required_env_names: output( - f"This model requires the following environment variables to run: {required_env_names!r}", - style="yellow", + f'This model requires the following environment variables to run: {required_env_names!r}', style='yellow' ) - for env_info in bento.bento_yaml.get("envs", []): - if "name" not in env_info: + for env_info in bento.bento_yaml.get('envs', []): + if 'name' not in env_info: continue - if os.environ.get(env_info["name"]): - default = os.environ[env_info["name"]] - elif "value" in env_info: - default = env_info["value"] + if os.environ.get(env_info['name']): + default = os.environ[env_info['name']] + elif 'value' in env_info: + default = env_info['value'] else: - default = "" + default = '' if INTERACTIVE.get(): import questionary value = questionary.text(f"{env_info['name']}:", default=default).ask() else: - if default == "": - output( - f"Environment variable {env_info['name']} is required but not provided", - style="red", - ) + if default == '': + output(f"Environment variable {env_info['name']} is required but not provided", style='red') raise typer.Exit(1) else: value = default if value is None: raise typer.Exit(1) - cmd += ["--env", f"{env_info['name']}={value}"] + cmd += ['--env', f"{env_info['name']}={value}"] if target: - cmd += ["--instance-type", target.name] + cmd += ['--instance-type', target.name] base_config = resolve_cloud_config() if not base_config.exists(): - raise Exception("Cannot find cloud config.") - shutil.copy(base_config, bento.repo.path / "bentoml" / ".yatai.yaml") + raise Exception('Cannot find cloud config.') + shutil.copy(base_config, bento.repo.path / 'bentoml' / '.yatai.yaml') return cmd, env, None @@ -76,90 +71,67 @@ def _get_deploy_cmd(bento: BentoInfo, target: typing.Optional[DeploymentTarget] def ensure_cloud_context(): import questionary - cmd = ["bentoml", "cloud", "current-context"] + cmd = ['bentoml', 'cloud', 'current-context'] try: result = subprocess.check_output(cmd, stderr=subprocess.DEVNULL) context = json.loads(result) - output( - f" bentoml already logged in: {context['endpoint']}", - style="green", - level=20, - ) + output(f" bentoml already logged in: {context['endpoint']}", style='green', level=20) except subprocess.CalledProcessError: - output(" bentoml not logged in", style="red") + output(' bentoml not logged in', style='red') if not INTERACTIVE.get(): - output("\n get bentoml logged in by:") - output(" $ bentoml cloud login", style="orange") - output("") + output('\n get bentoml logged in by:') + output(' $ bentoml cloud login', style='orange') + output('') output( """ * you may need to visit https://cloud.bentoml.com to get an account. you can also bring your own bentoml cluster (BYOC) to your team from https://bentoml.com/contact""", - style="yellow", + style='yellow', ) raise typer.Exit(1) else: action = questionary.select( - "Choose an action:", - choices=[ - "I have a BentoCloud account", - "get an account in two minutes", - ], + 'Choose an action:', choices=['I have a BentoCloud account', 'get an account in two minutes'] ).ask() if action is None: raise typer.Exit(1) - elif action == "get an account in two minutes": - output( - "Please visit https://cloud.bentoml.com to get your token", - style="yellow", - ) - endpoint = questionary.text( - "Enter the endpoint: (similar to https://my-org.cloud.bentoml.com)" - ).ask() + elif action == 'get an account in two minutes': + output('Please visit https://cloud.bentoml.com to get your token', style='yellow') + endpoint = questionary.text('Enter the endpoint: (similar to https://my-org.cloud.bentoml.com)').ask() if endpoint is None: raise typer.Exit(1) - token = questionary.text( - "Enter your token: (similar to cniluaxxxxxxxx)" - ).ask() + token = questionary.text('Enter your token: (similar to cniluaxxxxxxxx)').ask() if token is None: raise typer.Exit(1) - cmd = [ - "bentoml", - "cloud", - "login", - "--api-token", - token, - "--endpoint", - endpoint, - ] + cmd = ['bentoml', 'cloud', 'login', '--api-token', token, '--endpoint', endpoint] try: result = subprocess.check_output(cmd) - output(" Logged in successfully", style="green") + output(' Logged in successfully', style='green') except subprocess.CalledProcessError: - output(" Failed to login", style="red") + output(' Failed to login', style='red') raise typer.Exit(1) def get_cloud_machine_spec() -> list[DeploymentTarget]: ensure_cloud_context() - cmd = ["bentoml", "deployment", "list-instance-types", "-o", "json"] + cmd = ['bentoml', 'deployment', 'list-instance-types', '-o', 'json'] try: result = subprocess.check_output(cmd, stderr=subprocess.DEVNULL) instance_types = json.loads(result) return [ DeploymentTarget( - source="cloud", - name=it["name"], - price=it["price"], - platform="linux", + source='cloud', + name=it['name'], + price=it['price'], + platform='linux', accelerators=( - [ACCELERATOR_SPECS[it["gpu_type"]] for _ in range(int(it["gpu"]))] - if it.get("gpu") and it["gpu_type"] in ACCELERATOR_SPECS + [ACCELERATOR_SPECS[it['gpu_type']] for _ in range(int(it['gpu']))] + if it.get('gpu') and it['gpu_type'] in ACCELERATOR_SPECS else [] ), ) for it in instance_types ] except (subprocess.CalledProcessError, json.JSONDecodeError): - output("Failed to get cloud instance types", style="red") + output('Failed to get cloud instance types', style='red') return [] diff --git a/src/openllm/common.py b/src/openllm/common.py index 2bb852991..a575cf831 100644 --- a/src/openllm/common.py +++ b/src/openllm/common.py @@ -19,23 +19,23 @@ import typer import typer.core -ERROR_STYLE = "red" -SUCCESS_STYLE = "green" +ERROR_STYLE = 'red' +SUCCESS_STYLE = 'green' -OPENLLM_HOME = pathlib.Path(os.getenv("OPENLLM_HOME", pathlib.Path.home() / ".openllm")) -REPO_DIR = OPENLLM_HOME / "repos" -TEMP_DIR = OPENLLM_HOME / "temp" -VENV_DIR = OPENLLM_HOME / "venv" +OPENLLM_HOME = pathlib.Path(os.getenv('OPENLLM_HOME', pathlib.Path.home() / '.openllm')) +REPO_DIR = OPENLLM_HOME / 'repos' +TEMP_DIR = OPENLLM_HOME / 'temp' +VENV_DIR = OPENLLM_HOME / 'venv' REPO_DIR.mkdir(exist_ok=True, parents=True) TEMP_DIR.mkdir(exist_ok=True, parents=True) VENV_DIR.mkdir(exist_ok=True, parents=True) -CONFIG_FILE = OPENLLM_HOME / "config.json" +CONFIG_FILE = OPENLLM_HOME / 'config.json' -CHECKED = "Yes" +CHECKED = 'Yes' -T = typing.TypeVar("T") +T = typing.TypeVar('T') class ContextVar(typing.Generic[T]): @@ -75,18 +75,16 @@ def output(content, level=0, style=None, end=None): out = io.StringIO() pyaml.pprint(content, dst=out, sort_dicts=False, sort_keys=False) - questionary.print(out.getvalue(), style=style, end="" if end is None else end) + questionary.print(out.getvalue(), style=style, end='' if end is None else end) out.close() if isinstance(content, str): - questionary.print(content, style=style, end="\n" if end is None else end) + questionary.print(content, style=style, end='\n' if end is None else end) class Config(SimpleNamespace): - repos: dict[str, str] = { - "default": "https://github.com/bentoml/openllm-models@main" - } - default_repo: str = "default" + repos: dict[str, str] = {'default': 'https://github.com/bentoml/openllm-models@main'} + default_repo: str = 'default' def tolist(self): return dict(repos=self.repos, default_repo=self.default_repo) @@ -103,7 +101,7 @@ def load_config() -> Config: def save_config(config: Config) -> None: - with open(CONFIG_FILE, "w") as f: + with open(CONFIG_FILE, 'w') as f: json.dump(config.tolist(), f, indent=2) @@ -131,15 +129,13 @@ class RepoInfo(SimpleNamespace): def tolist(self): if VERBOSE_LEVEL.get() <= 0: - return f"{self.name} ({self.url}@{self.branch})" + return f'{self.name} ({self.url}@{self.branch})' if VERBOSE_LEVEL.get() <= 10: - return dict( - name=self.name, url=f"{self.url}@{self.branch}", path=str(self.path) - ) + return dict(name=self.name, url=f'{self.url}@{self.branch}', path=str(self.path)) if VERBOSE_LEVEL.get() <= 20: return dict( name=self.name, - url=f"{self.url}@{self.branch}", + url=f'{self.url}@{self.branch}', path=str(self.path), server=self.server, owner=self.owner, @@ -150,13 +146,13 @@ def tolist(self): class BentoInfo(SimpleNamespace): repo: RepoInfo path: pathlib.Path - alias: str = "" + alias: str = '' def __str__(self): - if self.repo.name == "default": - return f"{self.tag}" + if self.repo.name == 'default': + return f'{self.tag}' else: - return f"{self.repo.name}/{self.tag}" + return f'{self.repo.name}/{self.tag}' def __hash__(self): # type: ignore return md5(str(self.path)) @@ -164,12 +160,12 @@ def __hash__(self): # type: ignore @property def tag(self) -> str: if self.alias: - return f"{self.path.parent.name}:{self.alias}" - return f"{self.path.parent.name}:{self.path.name}" + return f'{self.path.parent.name}:{self.alias}' + return f'{self.path.parent.name}:{self.path.name}' @property def bentoml_tag(self) -> str: - return f"{self.path.parent.name}:{self.path.name}" + return f'{self.path.parent.name}:{self.path.name}' @property def name(self) -> str: @@ -181,42 +177,40 @@ def version(self) -> str: @property def labels(self) -> dict[str, str]: - return self.bento_yaml["labels"] + return self.bento_yaml['labels'] @property def envs(self) -> list[dict[str, str]]: - return self.bento_yaml["envs"] + return self.bento_yaml['envs'] @functools.cached_property def bento_yaml(self) -> dict: import yaml - bento_file = self.path / "bento.yaml" + bento_file = self.path / 'bento.yaml' return yaml.safe_load(bento_file.read_text()) @functools.cached_property def platforms(self) -> list[str]: - return self.bento_yaml["labels"].get("platforms", "linux").split(",") + return self.bento_yaml['labels'].get('platforms', 'linux').split(',') @functools.cached_property def pretty_yaml(self) -> dict: def _pretty_routes(routes): return { - route["route"]: { - "input": { - k: v["type"] for k, v in route["input"]["properties"].items() - }, - "output": route["output"]["type"], + route['route']: { + 'input': {k: v['type'] for k, v in route['input']['properties'].items()}, + 'output': route['output']['type'], } for route in routes } - if len(self.bento_yaml["services"]) == 1: + if len(self.bento_yaml['services']) == 1: pretty_yaml = { - "apis": _pretty_routes(self.bento_yaml["schema"]["routes"]), - "resources": self.bento_yaml["services"][0]["config"]["resources"], - "envs": self.bento_yaml["envs"], - "platforms": self.platforms, + 'apis': _pretty_routes(self.bento_yaml['schema']['routes']), + 'resources': self.bento_yaml['services'][0]['config']['resources'], + 'envs': self.bento_yaml['envs'], + 'platforms': self.platforms, } return pretty_yaml return self.bento_yaml @@ -226,41 +220,31 @@ def pretty_gpu(self) -> str: from openllm.accelerator_spec import ACCELERATOR_SPECS try: - resources = self.bento_yaml["services"][0]["config"]["resources"] - if resources["gpu"] > 1: - acc = ACCELERATOR_SPECS[resources["gpu_type"]] + resources = self.bento_yaml['services'][0]['config']['resources'] + if resources['gpu'] > 1: + acc = ACCELERATOR_SPECS[resources['gpu_type']] return f"{acc.memory_size:.0f}Gx{resources['gpu']}" - elif resources["gpu"] > 0: - acc = ACCELERATOR_SPECS[resources["gpu_type"]] - return f"{acc.memory_size:.0f}G" + elif resources['gpu'] > 0: + acc = ACCELERATOR_SPECS[resources['gpu_type']] + return f'{acc.memory_size:.0f}G' except KeyError: pass - return "" + return '' def tolist(self): verbose = VERBOSE_LEVEL.get() if verbose <= 0: return str(self) if verbose <= 10: - return dict( - tag=self.tag, - repo=self.repo.tolist(), - path=str(self.path), - model_card=self.pretty_yaml, - ) + return dict(tag=self.tag, repo=self.repo.tolist(), path=str(self.path), model_card=self.pretty_yaml) if verbose <= 20: - return dict( - tag=self.tag, - repo=self.repo.tolist(), - path=str(self.path), - bento_yaml=self.bento_yaml, - ) + return dict(tag=self.tag, repo=self.repo.tolist(), path=str(self.path), bento_yaml=self.bento_yaml) class VenvSpec(SimpleNamespace): python_version: str requirements_txt: str - name_prefix = "" + name_prefix = '' envs: EnvVars @functools.cached_property @@ -272,27 +256,23 @@ def normalized_requirements_txt(self) -> str: for line in self.requirements_txt.splitlines(): if not line.strip(): continue - elif line.strip().startswith("#"): + elif line.strip().startswith('#'): comment_lines.append(line.strip()) - elif line.strip().startswith("-"): + elif line.strip().startswith('-'): parameter_lines.append(line.strip()) else: dependency_lines.append(line.strip()) parameter_lines.sort() dependency_lines.sort() - return "\n".join(parameter_lines + dependency_lines).strip() + return '\n'.join(parameter_lines + dependency_lines).strip() @functools.cached_property def normalized_envs(self) -> str: """ sorted by name """ - return "\n".join( - f"{k}={v}" - for k, v in sorted(self.envs.items(), key=lambda x: x[0]) - if not v - ) + return '\n'.join(f'{k}={v}' for k, v in sorted(self.envs.items(), key=lambda x: x[0]) if not v) def __hash__(self): # type: ignore return md5( @@ -314,10 +294,10 @@ def __eq__(self, other): class DeploymentTarget(SimpleNamespace): - source: str = "local" - name: str = "local" - price: str = "" - platform = "linux" + source: str = 'local' + name: str = 'local' + price: str = '' + platform = 'linux' accelerators: list[Accelerator] def __hash__(self): # type: ignore @@ -327,31 +307,29 @@ def __hash__(self): # type: ignore def accelerators_repr(self) -> str: accs = {a.model for a in self.accelerators} if len(accs) == 0: - return "null" + return 'null' if len(accs) == 1: a = self.accelerators[0] - return f"{a.model} x{len(self.accelerators)}" - return ", ".join((f"{a.model}" for a in self.accelerators)) + return f'{a.model} x{len(self.accelerators)}' + return ', '.join((f'{a.model}' for a in self.accelerators)) -def run_command( - cmd, cwd=None, env=None, copy_env=True, venv=None, silent=False -) -> subprocess.CompletedProcess: +def run_command(cmd, cwd=None, env=None, copy_env=True, venv=None, silent=False) -> subprocess.CompletedProcess: import shlex env = env or {} cmd = [str(c) for c in cmd] - bin_dir = "Scripts" if os.name == "nt" else "bin" + bin_dir = 'Scripts' if os.name == 'nt' else 'bin' if not silent: - output("\n") + output('\n') if cwd: - output(f"$ cd {cwd}", style="orange") + output(f'$ cd {cwd}', style='orange') if env: for k, v in env.items(): - output(f"$ export {k}={shlex.quote(v)}", style="orange") + output(f'$ export {k}={shlex.quote(v)}', style='orange') if venv: - output(f"$ source {venv / 'bin' / 'activate'}", style="orange") - output(f"$ {' '.join(cmd)}", style="orange") + output(f"$ source {venv / 'bin' / 'activate'}", style='orange') + output(f"$ {' '.join(cmd)}", style='orange') if venv: py = venv / bin_dir / f"python{sysconfig.get_config_var('EXE')}" @@ -361,80 +339,69 @@ def run_command( if copy_env: env = {**os.environ, **env} - if cmd and cmd[0] == "bentoml": - cmd = [py, "-m", "bentoml"] + cmd[1:] - if cmd and cmd[0] == "python": + if cmd and cmd[0] == 'bentoml': + cmd = [py, '-m', 'bentoml'] + cmd[1:] + if cmd and cmd[0] == 'python': cmd = [py] + cmd[1:] try: if silent: return subprocess.run( # type: ignore - cmd, - cwd=cwd, - env=env, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - check=True, + cmd, cwd=cwd, env=env, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True ) else: return subprocess.run(cmd, cwd=cwd, env=env, check=True) except Exception as e: if VERBOSE_LEVEL.get() >= 10: - output(e, style="red") - output("Command failed", style="red") + output(e, style='red') + output('Command failed', style='red') raise typer.Exit(1) -async def stream_command_output(stream, style="gray"): +async def stream_command_output(stream, style='gray'): async for line in stream: - output(line.decode(), style=style, end="") + output(line.decode(), style=style, end='') @asynccontextmanager -async def async_run_command( - cmd, cwd=None, env=None, copy_env=True, venv=None, silent=True -): +async def async_run_command(cmd, cwd=None, env=None, copy_env=True, venv=None, silent=True): import shlex env = env or {} cmd = [str(c) for c in cmd] if not silent: - output("\n") + output('\n') if cwd: - output(f"$ cd {cwd}", style="orange") + output(f'$ cd {cwd}', style='orange') if env: for k, v in env.items(): - output(f"$ export {k}={shlex.quote(v)}", style="orange") + output(f'$ export {k}={shlex.quote(v)}', style='orange') if venv: - output(f"$ source {venv / 'bin' / 'activate'}", style="orange") - output(f"$ {' '.join(cmd)}", style="orange") + output(f"$ source {venv / 'bin' / 'activate'}", style='orange') + output(f"$ {' '.join(cmd)}", style='orange') if venv: - py = venv / "bin" / "python" + py = venv / 'bin' / 'python' else: py = sys.executable if copy_env: env = {**os.environ, **env} - if cmd and cmd[0] == "bentoml": - cmd = [py, "-m", "bentoml"] + cmd[1:] - if cmd and cmd[0] == "python": + if cmd and cmd[0] == 'bentoml': + cmd = [py, '-m', 'bentoml'] + cmd[1:] + if cmd and cmd[0] == 'python': cmd = [py] + cmd[1:] proc = None try: proc = await asyncio.create_subprocess_shell( - " ".join(map(str, cmd)), - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - cwd=cwd, - env=env, + ' '.join(map(str, cmd)), stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, cwd=cwd, env=env ) yield proc except subprocess.CalledProcessError: - output("Command failed", style="red") + output('Command failed', style='red') raise typer.Exit(1) finally: if proc: diff --git a/src/openllm/local.py b/src/openllm/local.py index bb1655c17..e474cc105 100644 --- a/src/openllm/local.py +++ b/src/openllm/local.py @@ -4,8 +4,7 @@ import httpx -from openllm.common import (BentoInfo, EnvVars, async_run_command, output, - run_command, stream_command_output) +from openllm.common import BentoInfo, EnvVars, async_run_command, output, run_command, stream_command_output from openllm.venv import ensure_venv @@ -14,20 +13,18 @@ def prep_env_vars(bento: BentoInfo): env_vars = bento.envs for env_var in env_vars: - if "value" not in env_var: + if 'value' not in env_var: continue - key = env_var["name"] - value = env_var["value"] + key = env_var['name'] + value = env_var['value'] os.environ[key] = value -def _get_serve_cmd( - bento: BentoInfo, port: int = 3000 -) -> tuple[list[str], EnvVars, Optional[str]]: - cmd = ["bentoml", "serve", bento.bentoml_tag] +def _get_serve_cmd(bento: BentoInfo, port: int = 3000) -> tuple[list[str], EnvVars, Optional[str]]: + cmd = ['bentoml', 'serve', bento.bentoml_tag] if port != 3000: - cmd += ["--port", str(port)] - env = EnvVars({"BENTOML_HOME": f"{bento.repo.path}/bentoml"}) + cmd += ['--port', str(port)] + env = EnvVars({'BENTOML_HOME': f'{bento.repo.path}/bentoml'}) return cmd, env, None @@ -35,41 +32,37 @@ def serve(bento: BentoInfo, port: int = 3000): prep_env_vars(bento) cmd, env, cwd = _get_serve_cmd(bento, port=port) venv = ensure_venv(bento, runtime_envs=env) - output(f"Access the Chat UI at http://localhost:{port}/chat (or with you IP)") + output(f'Access the Chat UI at http://localhost:{port}/chat (or with you IP)') run_command(cmd, env=env, cwd=cwd, venv=venv) async def _run_model(bento: BentoInfo, port: int = 3000, timeout: int = 600): cmd, env, cwd = _get_serve_cmd(bento, port) venv = ensure_venv(bento, runtime_envs=env) - async with async_run_command( - cmd, env=env, cwd=cwd, venv=venv, silent=False - ) as server_proc: - output(f"Model server started {server_proc.pid}") + async with async_run_command(cmd, env=env, cwd=cwd, venv=venv, silent=False) as server_proc: + output(f'Model server started {server_proc.pid}') stdout_streamer = None stderr_streamer = None start_time = time.time() - output("Model loading...", style="green") + output('Model loading...', style='green') for _ in range(timeout): try: - resp = httpx.get(f"http://localhost:{port}/readyz", timeout=3) + resp = httpx.get(f'http://localhost:{port}/readyz', timeout=3) if resp.status_code == 200: break except httpx.RequestError: if time.time() - start_time > 30: if not stdout_streamer: - stdout_streamer = asyncio.create_task( - stream_command_output(server_proc.stdout, style="gray") - ) + stdout_streamer = asyncio.create_task(stream_command_output(server_proc.stdout, style='gray')) if not stderr_streamer: stderr_streamer = asyncio.create_task( - stream_command_output(server_proc.stderr, style="#BD2D0F") + stream_command_output(server_proc.stderr, style='#BD2D0F') ) await asyncio.sleep(1) else: - output("Model failed to load", style="red") + output('Model failed to load', style='red') server_proc.terminate() return @@ -78,37 +71,37 @@ async def _run_model(bento: BentoInfo, port: int = 3000, timeout: int = 600): if stderr_streamer: stderr_streamer.cancel() - output("Model is ready", style="green") + output('Model is ready', style='green') messages: list[dict[str, str]] = [] from openai import AsyncOpenAI - client = AsyncOpenAI(base_url=f"http://localhost:{port}/v1", api_key="local") + client = AsyncOpenAI(base_url=f'http://localhost:{port}/v1', api_key='local') model_id = (await client.models.list()).data[0].id while True: try: - message = input("user: ") - if message == "": - output("empty message, please enter something", style="yellow") + message = input('user: ') + if message == '': + output('empty message, please enter something', style='yellow') continue - messages.append(dict(role="user", content=message)) - output("assistant: ", end="", style="lightgreen") - assistant_message = "" + messages.append(dict(role='user', content=message)) + output('assistant: ', end='', style='lightgreen') + assistant_message = '' stream = await client.chat.completions.create( model=model_id, messages=messages, # type: ignore stream=True, ) async for chunk in stream: - text = chunk.choices[0].delta.content or "" + text = chunk.choices[0].delta.content or '' assistant_message += text - output(text, end="", style="lightgreen") - messages.append(dict(role="assistant", content=assistant_message)) - output("") + output(text, end='', style='lightgreen') + messages.append(dict(role='assistant', content=assistant_message)) + output('') except KeyboardInterrupt: break - output("\nStopping model server...", style="green") - output("Stopped model server", style="green") + output('\nStopping model server...', style='green') + output('Stopped model server', style='green') def run(bento: BentoInfo, port: int = 3000, timeout: int = 600): diff --git a/src/openllm/model.py b/src/openllm/model.py index 655647c5a..8e78edbe7 100644 --- a/src/openllm/model.py +++ b/src/openllm/model.py @@ -10,10 +10,10 @@ from openllm.common import VERBOSE_LEVEL, BentoInfo, load_config, output from openllm.repo import ensure_repo_updated, parse_repo_url -app = OpenLLMTyper(help="manage models") +app = OpenLLMTyper(help='manage models') -@app.command(help="get model") +@app.command(help='get model') def get(tag: str, repo: Optional[str] = None, verbose: bool = False): if verbose: VERBOSE_LEVEL.set(20) @@ -22,10 +22,8 @@ def get(tag: str, repo: Optional[str] = None, verbose: bool = False): output(bento_info) -@app.command(name="list", help="list available models") -def list_model( - tag: Optional[str] = None, repo: Optional[str] = None, verbose: bool = False -): +@app.command(name='list', help='list available models') +def list_model(tag: Optional[str] = None, repo: Optional[str] = None, verbose: bool = False): if verbose: VERBOSE_LEVEL.set(20) @@ -43,46 +41,42 @@ def is_seen(value): table = tabulate.tabulate( [ [ - "" if is_seen(bento.name) else bento.name, + '' if is_seen(bento.name) else bento.name, bento.tag, bento.repo.name, bento.pretty_gpu, - ",".join(bento.platforms), + ','.join(bento.platforms), ] for bento in bentos ], - headers=["model", "version", "repo", "required GPU RAM", "platforms"], + headers=['model', 'version', 'repo', 'required GPU RAM', 'platforms'], ) output(table) -def ensure_bento( - model: str, - target: Optional[DeploymentTarget] = None, - repo_name: Optional[str] = None, -) -> BentoInfo: +def ensure_bento(model: str, target: Optional[DeploymentTarget] = None, repo_name: Optional[str] = None) -> BentoInfo: bentos = list_bento(model, repo_name=repo_name) if len(bentos) == 0: - output(f"No model found for {model}", style="red") + output(f'No model found for {model}', style='red') raise typer.Exit(1) if len(bentos) == 1: - output(f"Found model {bentos[0]}", style="green") + output(f'Found model {bentos[0]}', style='green') if target is not None and can_run(bentos[0], target) <= 0: output( - f"The machine({target.name}) with {target.accelerators_repr} does not appear to have sufficient " - f"resources to run model {bentos[0]}\n", - style="yellow", + f'The machine({target.name}) with {target.accelerators_repr} does not appear to have sufficient ' + f'resources to run model {bentos[0]}\n', + style='yellow', ) return bentos[0] # multiple models, pick one according to target - output(f"Multiple models match {model}, did you mean one of these?", style="red") + output(f'Multiple models match {model}, did you mean one of these?', style='red') list_model(model, repo=repo_name) raise typer.Exit(1) -NUMBER_RE = re.compile(r"\d+") +NUMBER_RE = re.compile(r'\d+') def _extract_first_number(s: str): @@ -94,30 +88,28 @@ def _extract_first_number(s: str): def list_bento( - tag: typing.Optional[str] = None, - repo_name: typing.Optional[str] = None, - include_alias: bool = False, + tag: typing.Optional[str] = None, repo_name: typing.Optional[str] = None, include_alias: bool = False ) -> typing.List[BentoInfo]: ensure_repo_updated() - if repo_name is None and tag and "/" in tag: - repo_name, tag = tag.split("/", 1) + if repo_name is None and tag and '/' in tag: + repo_name, tag = tag.split('/', 1) if repo_name is not None: config = load_config() if repo_name not in config.repos: - output(f"Repo `{repo_name}` not found, did you mean one of these?") + output(f'Repo `{repo_name}` not found, did you mean one of these?') for repo_name in config.repos: - output(f" {repo_name}") + output(f' {repo_name}') raise typer.Exit(1) if not tag: - glob_pattern = "bentoml/bentos/*/*" - elif ":" in tag: - bento_name, version = tag.split(":") - glob_pattern = f"bentoml/bentos/{bento_name}/{version}" + glob_pattern = 'bentoml/bentos/*/*' + elif ':' in tag: + bento_name, version = tag.split(':') + glob_pattern = f'bentoml/bentos/{bento_name}/{version}' else: - glob_pattern = f"bentoml/bentos/{tag}/*" + glob_pattern = f'bentoml/bentos/{tag}/*' model_list = [] config = load_config() @@ -128,15 +120,10 @@ def list_bento( paths = sorted( repo.path.glob(glob_pattern), - key=lambda x: ( - x.parent.name, - _extract_first_number(x.name), - len(x.name), - x.name, - ), + key=lambda x: (x.parent.name, _extract_first_number(x.name), len(x.name), x.name), ) for path in paths: - if path.is_dir() and (path / "bento.yaml").exists(): + if path.is_dir() and (path / 'bento.yaml').exists(): model = BentoInfo(repo=repo, path=path) elif path.is_file(): with open(path) as f: diff --git a/src/openllm/repo.py b/src/openllm/repo.py index caec0b73e..e6e43f390 100644 --- a/src/openllm/repo.py +++ b/src/openllm/repo.py @@ -8,46 +8,43 @@ import typer from openllm.analytic import OpenLLMTyper -from openllm.common import (INTERACTIVE, REPO_DIR, VERBOSE_LEVEL, RepoInfo, - load_config, output, save_config) +from openllm.common import INTERACTIVE, REPO_DIR, VERBOSE_LEVEL, RepoInfo, load_config, output, save_config UPDATE_INTERVAL = datetime.timedelta(days=3) -app = OpenLLMTyper(help="manage repos") +app = OpenLLMTyper(help='manage repos') -@app.command(name="list", help="list available repo") +@app.command(name='list', help='list available repo') def list_repo(verbose: bool = False): if verbose: VERBOSE_LEVEL.set(20) config = load_config() pyaml.pprint( - [parse_repo_url(repo, name) for name, repo in config.repos.items()], - sort_dicts=False, - sort_keys=False, + [parse_repo_url(repo, name) for name, repo in config.repos.items()], sort_dicts=False, sort_keys=False ) -@app.command(help="remove given repo") +@app.command(help='remove given repo') def remove(name: str): config = load_config() if name not in config.repos: - output(f"Repo {name} does not exist", style="red") + output(f'Repo {name} does not exist', style='red') return del config.repos[name] save_config(config) - output(f"Repo {name} removed", style="green") + output(f'Repo {name} removed', style='green') def _complete_alias(repo_name: str): from openllm.model import list_bento for bento in list_bento(repo_name=repo_name): - alias = bento.labels.get("openllm_alias", "").strip() + alias = bento.labels.get('openllm_alias', '').strip() if alias: - for a in alias.split(","): - with open(bento.path.parent / a, "w") as f: + for a in alias.split(','): + with open(bento.path.parent / a, 'w') as f: f.write(bento.version) @@ -58,20 +55,15 @@ def _clone_repo(repo: RepoInfo): import subprocess try: - subprocess.run( - ["git", "clone", "--depth=1", "-b", repo.branch, repo.url, str(repo.path)], - check=True, - ) + subprocess.run(['git', 'clone', '--depth=1', '-b', repo.branch, repo.url, str(repo.path)], check=True) except (subprocess.CalledProcessError, FileNotFoundError): import dulwich import dulwich.porcelain - dulwich.porcelain.clone( - repo.url, str(repo.path), checkout=True, depth=1, branch=repo.branch - ) + dulwich.porcelain.clone(repo.url, str(repo.path), checkout=True, depth=1, branch=repo.branch) -@app.command(help="update default repo") +@app.command(help='update default repo') def update(): config = load_config() repos_in_use = set() @@ -83,59 +75,59 @@ def update(): repo.path.parent.mkdir(parents=True, exist_ok=True) try: _clone_repo(repo) - output("") - output(f"Repo `{repo.name}` updated", style="green") + output('') + output(f'Repo `{repo.name}` updated', style='green') except Exception as e: shutil.rmtree(repo.path, ignore_errors=True) - output(f"Failed to clone repo {repo.name}", style="red") + output(f'Failed to clone repo {repo.name}', style='red') output(e) - for c in REPO_DIR.glob("*/*/*/*"): + for c in REPO_DIR.glob('*/*/*/*'): repo_spec = tuple(c.parts[-4:]) if repo_spec not in repos_in_use: shutil.rmtree(c, ignore_errors=True) - output(f"Removed unused repo cache {c}") - with open(REPO_DIR / "last_update", "w") as f: + output(f'Removed unused repo cache {c}') + with open(REPO_DIR / 'last_update', 'w') as f: f.write(datetime.datetime.now().isoformat()) for repo_name in config.repos: _complete_alias(repo_name) def ensure_repo_updated(): - last_update_file = REPO_DIR / "last_update" + last_update_file = REPO_DIR / 'last_update' if not last_update_file.exists(): if INTERACTIVE.get(): choice = questionary.confirm( - "The repo cache is never updated, do you want to update it to fetch the latest model list?" + 'The repo cache is never updated, do you want to update it to fetch the latest model list?' ).ask() if choice: update() return else: output( - "The repo cache is never updated, please run `openllm repo update` to fetch the latest model list", - style="red", + 'The repo cache is never updated, please run `openllm repo update` to fetch the latest model list', + style='red', ) raise typer.Exit(1) last_update = datetime.datetime.fromisoformat(last_update_file.read_text().strip()) if datetime.datetime.now() - last_update > UPDATE_INTERVAL: if INTERACTIVE.get(): choice = questionary.confirm( - "The repo cache is outdated, do you want to update it to fetch the latest model list?" + 'The repo cache is outdated, do you want to update it to fetch the latest model list?' ).ask() if choice: update() else: output( - "The repo cache is outdated, please run `openllm repo update` to fetch the latest model list", - style="yellow", + 'The repo cache is outdated, please run `openllm repo update` to fetch the latest model list', + style='yellow', ) GIT_HTTP_RE = re.compile( - r"(?Pgit|ssh|http|https):\/\/(?P[\.\w\d\-]+)\/(?P[\w\d\-]+)\/(?P[\w\d\-\_\.]+)(@(?P.+))?(\/)?$" + r'(?Pgit|ssh|http|https):\/\/(?P[\.\w\d\-]+)\/(?P[\w\d\-]+)\/(?P[\w\d\-\_\.]+)(@(?P.+))?(\/)?$' ) GIT_SSH_RE = re.compile( - r"git@(?P[\.\w\d-]+):(?P[\w\d\-]+)\/(?P[\w\d\-\_\.]+)(@(?P.+))?(\/)?$" + r'git@(?P[\.\w\d-]+):(?P[\w\d\-]+)\/(?P[\w\d\-\_\.]+)(@(?P.+))?(\/)?$' ) @@ -156,27 +148,27 @@ def parse_repo_url(repo_url: str, repo_name: typing.Optional[str] = None) -> Rep """ match = GIT_HTTP_RE.match(repo_url) if match: - schema = match.group("schema") + schema = match.group('schema') else: match = GIT_SSH_RE.match(repo_url) if not match: - raise ValueError(f"Invalid git repo url: {repo_url}") + raise ValueError(f'Invalid git repo url: {repo_url}') schema = None - if match.group("branch") is not None: - repo_url = repo_url[: match.start("branch") - 1] + if match.group('branch') is not None: + repo_url = repo_url[: match.start('branch') - 1] - server = match.group("server") - owner = match.group("owner") - repo = match.group("repo") - if repo.endswith(".git"): + server = match.group('server') + owner = match.group('owner') + repo = match.group('repo') + if repo.endswith('.git'): repo = repo[:-4] - branch = match.group("branch") or "main" + branch = match.group('branch') or 'main' if schema is not None: - repo_url = f"{schema}://{server}/{owner}/{repo}" + repo_url = f'{schema}://{server}/{owner}/{repo}' else: - repo_url = f"git@{server}:{owner}/{repo}" + repo_url = f'git@{server}:{owner}/{repo}' path = REPO_DIR / server / owner / repo / branch return RepoInfo( @@ -190,40 +182,35 @@ def parse_repo_url(repo_url: str, repo_name: typing.Optional[str] = None) -> Rep ) -@app.command(help="add new repo") +@app.command(help='add new repo') def add(name: str, repo: str): name = name.lower() if not name.isidentifier(): - output( - f"Invalid repo name: {name}, should only contain letters, numbers and underscores", - style="red", - ) + output(f'Invalid repo name: {name}, should only contain letters, numbers and underscores', style='red') return try: parse_repo_url(repo) except ValueError: - output(f"Invalid repo url: {repo}", style="red") + output(f'Invalid repo url: {repo}', style='red') return config = load_config() if name in config.repos: - override = questionary.confirm( - f"Repo {name} already exists({config.repos[name]}), override?" - ).ask() + override = questionary.confirm(f'Repo {name} already exists({config.repos[name]}), override?').ask() if not override: return config.repos[name] = repo save_config(config) - output(f"Repo {name} added", style="green") + output(f'Repo {name} added', style='green') -@app.command(help="get default repo path") +@app.command(help='get default repo path') def default(): - output((info := parse_repo_url(load_config().repos["default"], "default")).path) + output((info := parse_repo_url(load_config().repos['default'], 'default')).path) return info.path -if __name__ == "__main__": +if __name__ == '__main__': app() diff --git a/src/openllm/venv.py b/src/openllm/venv.py index f68e08f21..084b6555d 100644 --- a/src/openllm/venv.py +++ b/src/openllm/venv.py @@ -7,97 +7,67 @@ import typer import yaml -from openllm.common import (VENV_DIR, VERBOSE_LEVEL, BentoInfo, EnvVars, - VenvSpec, output, run_command) +from openllm.common import VENV_DIR, VERBOSE_LEVEL, BentoInfo, EnvVars, VenvSpec, output, run_command @functools.lru_cache -def _resolve_bento_venv_spec( - bento: BentoInfo, runtime_envs: Optional[EnvVars] = None, -) -> VenvSpec: - ver_file = bento.path / "env" / "python" / "version.txt" - assert ver_file.exists(), f"cannot find version file in {bento.path}" +def _resolve_bento_venv_spec(bento: BentoInfo, runtime_envs: Optional[EnvVars] = None) -> VenvSpec: + ver_file = bento.path / 'env' / 'python' / 'version.txt' + assert ver_file.exists(), f'cannot find version file in {bento.path}' - lock_file = bento.path / "env" / "python" / "requirements.lock.txt" + lock_file = bento.path / 'env' / 'python' / 'requirements.lock.txt' if not lock_file.exists(): - lock_file = bento.path / "env" / "python" / "requirements.txt" + lock_file = bento.path / 'env' / 'python' / 'requirements.txt' ver = ver_file.read_text().strip() reqs = lock_file.read_text().strip() - bentofile = bento.path / "bento.yaml" - bento_env_list = yaml.safe_load(bentofile.read_text()).get("envs", []) - bento_envs = {e["name"]: e.get("value") for e in bento_env_list} - envs = ( - {k: runtime_envs.get(k, v) for k, v in bento_envs.items()} - if runtime_envs - else {} - ) + bentofile = bento.path / 'bento.yaml' + bento_env_list = yaml.safe_load(bentofile.read_text()).get('envs', []) + bento_envs = {e['name']: e.get('value') for e in bento_env_list} + envs = {k: runtime_envs.get(k, v) for k, v in bento_envs.items()} if runtime_envs else {} return VenvSpec( - python_version=ver, - requirements_txt=reqs, - name_prefix=f"{bento.tag.replace(':', '_')}-1-", - envs=EnvVars(envs), + python_version=ver, requirements_txt=reqs, name_prefix=f"{bento.tag.replace(':', '_')}-1-", envs=EnvVars(envs) ) def _ensure_venv(venv_spec: VenvSpec) -> pathlib.Path: venv = VENV_DIR / str(hash(venv_spec)) - if venv.exists() and not (venv / "DONE").exists(): + if venv.exists() and not (venv / 'DONE').exists(): shutil.rmtree(venv, ignore_errors=True) if not venv.exists(): - output(f"Installing model dependencies({venv})...", style="green") + output(f'Installing model dependencies({venv})...', style='green') - venv_py = ( - venv / "Scripts" / "python.exe" - if os.name == "nt" - else venv / "bin" / "python" - ) + venv_py = venv / 'Scripts' / 'python.exe' if os.name == 'nt' else venv / 'bin' / 'python' try: + run_command(['python', '-m', 'uv', 'venv', venv], silent=VERBOSE_LEVEL.get() < 10) run_command( - ["python", "-m", "uv", "venv", venv], silent=VERBOSE_LEVEL.get() < 10 - ) - run_command( - ["python", "-m", "uv", "pip", "install", "-p", str(venv_py), "bentoml"], + ['python', '-m', 'uv', 'pip', 'install', '-p', str(venv_py), 'bentoml'], silent=VERBOSE_LEVEL.get() < 10, env=venv_spec.envs, ) - with open(venv / "requirements.txt", "w") as f: + with open(venv / 'requirements.txt', 'w') as f: f.write(venv_spec.normalized_requirements_txt) run_command( - [ - "python", - "-m", - "uv", - "pip", - "install", - "-p", - str(venv_py), - "-r", - venv / "requirements.txt", - ], + ['python', '-m', 'uv', 'pip', 'install', '-p', str(venv_py), '-r', venv / 'requirements.txt'], silent=VERBOSE_LEVEL.get() < 10, env=venv_spec.envs, ) - with open(venv / "DONE", "w") as f: - f.write("DONE") + with open(venv / 'DONE', 'w') as f: + f.write('DONE') except Exception as e: shutil.rmtree(venv, ignore_errors=True) if VERBOSE_LEVEL.get() >= 10: - output(e, style="red") - output( - f"Failed to install dependencies to {venv}. Cleaned up.", style="red" - ) + output(e, style='red') + output(f'Failed to install dependencies to {venv}. Cleaned up.', style='red') raise typer.Exit(1) - output(f"Successfully installed dependencies to {venv}.", style="green") + output(f'Successfully installed dependencies to {venv}.', style='green') return venv else: return venv -def ensure_venv( - bento: BentoInfo, runtime_envs: Optional[EnvVars] = None -) -> pathlib.Path: +def ensure_venv(bento: BentoInfo, runtime_envs: Optional[EnvVars] = None) -> pathlib.Path: venv_spec = _resolve_bento_venv_spec(bento, runtime_envs=EnvVars(runtime_envs)) venv = _ensure_venv(venv_spec) assert venv is not None @@ -109,6 +79,6 @@ def check_venv(bento: BentoInfo) -> bool: venv = VENV_DIR / str(hash(venv_spec)) if not venv.exists(): return False - if venv.exists() and not (venv / "DONE").exists(): + if venv.exists() and not (venv / 'DONE').exists(): return False return True