Skip to content

Commit

Permalink
feat: Upload aggregate metrics and generative models used in an evalu…
Browse files Browse the repository at this point in the history
…ation to GCS.

PiperOrigin-RevId: 719037901
  • Loading branch information
vertex-sdk-bot authored and copybara-github committed Jan 23, 2025
1 parent c2e7ce4 commit 713ffac
Show file tree
Hide file tree
Showing 3 changed files with 109 additions and 27 deletions.
39 changes: 28 additions & 11 deletions tests/unit/vertexai/test_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -558,13 +558,9 @@ def mock_experiment_tracker():


@pytest.fixture
def mock_storage_blob_upload_from_filename():
with mock.patch(
"google.cloud.storage.Blob.upload_from_filename"
) as mock_blob_upload_from_filename, mock.patch(
"google.cloud.storage.Bucket.exists", return_value=True
):
yield mock_blob_upload_from_filename
def mock_storage_blob_from_string():
with mock.patch("google.cloud.storage.Blob.from_string") as mock_blob_from_string:
yield mock_blob_from_string


@pytest.mark.usefixtures("google_auth_mock")
Expand Down Expand Up @@ -1948,8 +1944,29 @@ def test_pairtwise_metric_prompt_template_with_default_values(self):
== _EXPECTED_PAIRWISE_PROMPT_TEMPLATE_WITH_DEFAULT_VALUES.strip()
)

def test_upload_results(self, mock_storage_blob_upload_from_filename):
evaluation.utils.upload_evaluation_results(
_TEST_CSV, _TEST_BUCKET, _TEST_FILE_NAME
def test_upload_results(self, mock_storage_blob_from_string):
with mock.patch("json.dump") as mock_json_dump:
evaluation.utils.upload_evaluation_results(
MOCK_EVAL_RESULT,
_TEST_BUCKET,
_TEST_FILE_NAME,
"candidate_model",
"baseline_model",
)

mock_storage_blob_from_string.assert_any_call(
uri="gs://test-bucket/test-file-name/test-file-name.csv",
client=mock.ANY,
)
mock_storage_blob_from_string.assert_any_call(
uri="gs://test-bucket/test-file-name/summary_metrics.json",
client=mock.ANY,
)
mock_json_dump.assert_called_once_with(
{
"summary_metrics": MOCK_EVAL_RESULT.summary_metrics,
"candidate_model_name": "candidate_model",
"baseline_model_name": "baseline_model",
},
mock.ANY,
)
assert mock_storage_blob_upload_from_filename.called_once_with(_TEST_CSV)
23 changes: 22 additions & 1 deletion vertexai/evaluation/eval_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -464,8 +464,29 @@ def evaluate(
evaluation_service_qps=evaluation_service_qps,
retry_timeout=retry_timeout,
)

candidate_model_name = None
if isinstance(model, generative_models.GenerativeModel):
candidate_model_name = model._model_name

baseline_model_name = None
pairwise_metrics = [
metric
for metric in self.metrics
if isinstance(metric, pairwise_metric.PairwiseMetric)
]
if pairwise_metrics:
# All pairwise metrics should have the same baseline model.
baseline_model = pairwise_metrics[0].baseline_model
if isinstance(baseline_model, generative_models.GenerativeModel):
baseline_model_name = baseline_model._model_name

utils.upload_evaluation_results(
eval_result.metrics_table, self.output_uri_prefix, output_file_name
eval_result,
self.output_uri_prefix,
output_file_name,
candidate_model_name,
baseline_model_name,
)
return eval_result

Expand Down
74 changes: 59 additions & 15 deletions vertexai/evaluation/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,12 @@

import functools
import io
import json
import os
import tempfile
import threading
import time
from typing import Any, Dict, Optional, TYPE_CHECKING, Union, Callable, Literal
from typing import Any, Callable, Dict, Literal, Optional, TYPE_CHECKING, Union

from google.cloud import bigquery
from google.cloud import storage
Expand All @@ -33,6 +34,7 @@
from google.cloud.aiplatform_v1.services import (
evaluation_service as gapic_evaluation_services,
)
from vertexai.evaluation import _base as eval_base


if TYPE_CHECKING:
Expand Down Expand Up @@ -276,35 +278,77 @@ def _upload_pandas_df_to_gcs(
" Please provide a valid GCS path with `jsonl` or `csv` suffix."
)

storage_client = storage.Client(
project=initializer.global_config.project,
credentials=initializer.global_config.credentials,
)
storage.Blob.from_string(
uri=upload_gcs_path, client=storage_client
).upload_from_filename(filename=local_dataset_path)
_upload_file_to_gcs(upload_gcs_path, local_dataset_path)


def _upload_evaluation_summary_to_gcs(
summary_metrics: Dict[str, float],
upload_gcs_path: str,
candidate_model_name: Optional[str] = None,
baseline_model_name: Optional[str] = None,
) -> None:
"""Uploads the evaluation summary to a GCS bucket."""
summary = {
"summary_metrics": summary_metrics,
}
if candidate_model_name:
summary["candidate_model_name"] = candidate_model_name
if baseline_model_name:
summary["baseline_model_name"] = baseline_model_name

with tempfile.TemporaryDirectory() as temp_dir:
local_summary_path = os.path.join(temp_dir, "summary_metrics.json")
json.dump(summary, open(local_summary_path, "w"))
_upload_file_to_gcs(upload_gcs_path, local_summary_path)


def _upload_file_to_gcs(upload_gcs_path: str, filename: str) -> None:
storage_client = storage.Client(
project=initializer.global_config.project,
credentials=initializer.global_config.credentials,
)
storage.Blob.from_string(
uri=upload_gcs_path, client=storage_client
).upload_from_filename(filename)


def upload_evaluation_results(
dataset: "pd.DataFrame", destination_uri_prefix: str, file_name: str
eval_result: eval_base.EvalResult,
destination_uri_prefix: str,
file_name: str,
candidate_model_name: Optional[str] = None,
baseline_model_name: Optional[str] = None,
) -> None:
"""Uploads eval results to GCS destination.
Args:
dataset: Pandas dataframe to upload.
eval_result: Eval results to upload.
destination_uri_prefix: GCS folder to store the data.
file_name: File name to store the data.
file_name: File name to store the metrics table.
candidate_model_name: Optional. Candidate model name.
baseline_model_name: Optional. Baseline model name.
"""
if not destination_uri_prefix:
_ipython_utils.display_gen_ai_evaluation_results_button()
return
if eval_result.metrics_table is None:
return
if destination_uri_prefix.startswith(_GCS_PREFIX):
_, extension = os.path.splitext(file_name)
base_name, extension = os.path.splitext(file_name)
file_type = extension.lower()[1:]
output_path = destination_uri_prefix + "/" + file_name
_upload_pandas_df_to_gcs(dataset, output_path, file_type)
output_folder = destination_uri_prefix + "/" + base_name
metrics_table_path = output_folder + "/" + file_name
_upload_pandas_df_to_gcs(
eval_result.metrics_table, metrics_table_path, file_type
)
_upload_evaluation_summary_to_gcs(
eval_result.summary_metrics,
output_folder + "/summary_metrics.json",
candidate_model_name,
baseline_model_name,
)
_ipython_utils.display_gen_ai_evaluation_results_button(
output_path.split(_GCS_PREFIX)[1]
metrics_table_path.split(_GCS_PREFIX)[1]
)
else:
raise ValueError(
Expand Down

0 comments on commit 713ffac

Please sign in to comment.