Skip to content

Commit

Permalink
Add Optimum Intel (#2609)
Browse files Browse the repository at this point in the history
  • Loading branch information
NoushNabi authored May 13, 2024
1 parent 6bd30be commit f868c99
Show file tree
Hide file tree
Showing 5 changed files with 74 additions and 11 deletions.
14 changes: 14 additions & 0 deletions docs/huggingface_models.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,17 @@ helm-run \
--suite v1 \
--max-eval-instances 10
```

To use Optimum Intel, add `--openvino` flag to `helm-run`. Optimum Intel provides a simple interface to optimize Transformer models and convert them to OpenVINO™ Intermediate Representation format to accelerate end-to-end pipelines on Intel® architectures using OpenVINO™ runtime. It runs the model on the CPU.

Examples:

```bash
# Run boolq on stanford-crfm/BioMedLM optimized by Optimum Intel OpenNIVO
helm-run \
--run-entries boolq:model=stanford-crfm/BioMedLM \
--enable-huggingface-models stanford-crfm/BioMedLM \
--suite v1 \
--max-eval-instances 10 \
--openvino
```
3 changes: 3 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ unitxt =
aleph-alpha =
aleph-alpha-client~=2.14.0
tokenizers>=0.13.3
openvino =
optimum[openvino]~=1.19

allenai =
ai2-olmo~=0.2
Expand Down Expand Up @@ -158,6 +160,7 @@ models =
crfm-helm[together]
crfm-helm[tsinghua]
crfm-helm[yandex]
crfm-helm[openvino]

vlm =
crfm-helm[openai]
Expand Down
17 changes: 12 additions & 5 deletions src/helm/benchmark/huggingface_registration.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import os
from typing import Optional
from typing import Optional, Dict, Union

from helm.benchmark.model_deployment_registry import (
ClientSpec,
Expand All @@ -17,11 +17,16 @@


def register_huggingface_model(
helm_model_name: str, pretrained_model_name_or_path: str, revision: Optional[str] = None
helm_model_name: str,
pretrained_model_name_or_path: str,
revision: Optional[str] = None,
openvino: Optional[bool] = False,
) -> None:
object_spec_args = {"pretrained_model_name_or_path": pretrained_model_name_or_path}
object_spec_args: Dict[str, Union[str, bool]] = {"pretrained_model_name_or_path": pretrained_model_name_or_path}
if revision:
object_spec_args["revision"] = revision
if openvino:
object_spec_args["openvino"] = openvino

# Auto-infer model properties from the tokenizer.
with HuggingFaceTokenizer.create_tokenizer(**object_spec_args) as tokenizer:
Expand Down Expand Up @@ -71,7 +76,7 @@ def register_huggingface_model(
register_tokenizer_config(tokenizer_config)


def register_huggingface_hub_model_from_flag_value(raw_model_string: str) -> None:
def register_huggingface_hub_model_from_flag_value(raw_model_string: str, openvino=False) -> None:
raw_model_string_parts = raw_model_string.split("@")
pretrained_model_name_or_path: str
revision: Optional[str]
Expand All @@ -88,15 +93,17 @@ def register_huggingface_hub_model_from_flag_value(raw_model_string: str) -> Non
helm_model_name=raw_model_string,
pretrained_model_name_or_path=pretrained_model_name_or_path,
revision=revision,
openvino=openvino,
)


def register_huggingface_local_model_from_flag_value(path: str) -> None:
def register_huggingface_local_model_from_flag_value(path: str, openvino=False) -> None:
if not path:
raise ValueError("Path to Hugging Face model must be non-empty")
path_parts = os.path.split(path)
helm_model_name = f"huggingface/{path_parts[-1]}"
register_huggingface_model(
helm_model_name=helm_model_name,
pretrained_model_name_or_path=path,
openvino=openvino,
)
18 changes: 16 additions & 2 deletions src/helm/benchmark/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,13 @@ def main():
default=None,
help="Full class name of the Runner class to use. If unset, uses the default Runner.",
)
parser.add_argument(
"--openvino",
action="store_true",
default=False,
help="Experimental: Apply openvino optimization to Hugging Face AutoModelForCausalLM models "
"specified with the --enable-huggingface-models and --enable-local-huggingface-models flags.",
)
add_run_args(parser)
args = parser.parse_args()
validate_args(args)
Expand All @@ -275,12 +282,19 @@ def main():
from helm.benchmark.huggingface_registration import register_huggingface_hub_model_from_flag_value

for huggingface_model_name in args.enable_huggingface_models:
register_huggingface_hub_model_from_flag_value(huggingface_model_name)
if args.openvino:
register_huggingface_hub_model_from_flag_value(huggingface_model_name, args.openvino)
else:
register_huggingface_hub_model_from_flag_value(huggingface_model_name)

if args.enable_local_huggingface_models:
from helm.benchmark.huggingface_registration import register_huggingface_local_model_from_flag_value

for huggingface_model_path in args.enable_local_huggingface_models:
register_huggingface_local_model_from_flag_value(huggingface_model_path)
if args.openvino:
register_huggingface_local_model_from_flag_value(huggingface_model_path, args.openvino)
else:
register_huggingface_local_model_from_flag_value(huggingface_model_path)

run_entries: List[RunEntry] = []
if args.conf_paths:
Expand Down
33 changes: 29 additions & 4 deletions src/helm/clients/huggingface_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,17 +53,42 @@ class HuggingFaceRequest(TypedDict):
class HuggingFaceServer:
"""A thin wrapper around a Hugging Face AutoModelForCausalLM for HuggingFaceClient to call."""

def __init__(self, pretrained_model_name_or_path: str, **kwargs):
def __init__(self, pretrained_model_name_or_path: str, openvino=False, **kwargs):
if torch.cuda.is_available():
hlog("CUDA is available, initializing with a GPU...")
self.device: str = "cuda:0"
else:
self.device = "cpu"
with htrack_block(f"Loading Hugging Face model {pretrained_model_name_or_path}"):
# WARNING this may fail if your GPU does not have enough memory
self.model = AutoModelForCausalLM.from_pretrained(
pretrained_model_name_or_path, trust_remote_code=True, **kwargs
).to(self.device)
if openvino:
"""
Optimum Intel provides a simple interface to optimize Transformer models and convert them to \
OpenVINO™ Intermediate Representation (IR) format to accelerate end-to-end pipelines on \
Intel® architectures using OpenVINO™ runtime.
"""
from pathlib import Path
from helm.common.optional_dependencies import handle_module_not_found_error

try:
from optimum.intel.openvino import OVModelForCausalLM
except ModuleNotFoundError as e:
handle_module_not_found_error(e, ["openvino"])

model_file = Path(pretrained_model_name_or_path) / "openvino_model.xml"
if model_file.exists():
export = False
else:
export = True

self.device = "cpu"
self.model = OVModelForCausalLM.from_pretrained(
pretrained_model_name_or_path, export=export, trust_remote_code=True, **kwargs
).to(self.device)
else:
self.model = AutoModelForCausalLM.from_pretrained(
pretrained_model_name_or_path, trust_remote_code=True, **kwargs
).to(self.device)
with htrack_block(f"Loading Hugging Face tokenizer for model {pretrained_model_name_or_path}"):
self.wrapped_tokenizer: WrappedPreTrainedTokenizer = HuggingFaceTokenizer.create_tokenizer(
pretrained_model_name_or_path, **kwargs
Expand Down

0 comments on commit f868c99

Please sign in to comment.