Skip to content

Commit

Permalink
feat: add 12 model-based pointwise metric classes to `vertexai.previe…
Browse files Browse the repository at this point in the history
…w.evaluation.metrics`

PiperOrigin-RevId: 644484669
  • Loading branch information
jsondai authored and copybara-github committed Jun 18, 2024
1 parent 04e07db commit 4742a87
Show file tree
Hide file tree
Showing 14 changed files with 525 additions and 1 deletion.
70 changes: 70 additions & 0 deletions tests/unit/vertexai/test_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@
from vertexai.preview.evaluation import _base as eval_base
from vertexai.preview.evaluation import _evaluation
from vertexai.preview.evaluation import utils
from vertexai.preview.evaluation.metrics import (
_summarization_quality,
)
from vertexai.preview.evaluation.metrics import (
_pairwise_summarization_quality,
)
Expand Down Expand Up @@ -318,6 +321,73 @@ def test_compute_pointwise_metrics(self, api_transport):
0.5,
]

@pytest.mark.parametrize("api_transport", ["grpc", "rest"])
def test_compute_pointwise_metrics_with_custom_metric_spec(self, api_transport):
aiplatform.init(
project=_TEST_PROJECT,
location=_TEST_LOCATION,
api_transport=api_transport,
)
eval_dataset = pd.DataFrame(
{
"context": ["test", "context"],
"instruction": ["test", "instruction"],
"reference": ["test", "reference"],
}
)
mock_model = mock.create_autospec(
generative_models.GenerativeModel, instance=True
)
mock_model.generate_content.return_value = _MOCK_MODEL_INFERENCE_RESPONSE
mock_model._model_name = "publishers/google/model/gemini-1.0-pro"
test_metrics = [
_summarization_quality.SummarizationQuality(
use_reference=True,
)
]
test_eval_task = evaluation.EvalTask(dataset=eval_dataset, metrics=test_metrics)
mock_metric_results = _MOCK_SUMMARIZATION_QUALITY_RESULT
with mock.patch.object(
target=gapic_evaluation_services.EvaluationServiceAsyncClient,
attribute="evaluate_instances",
side_effect=mock_metric_results,
):
test_result = test_eval_task.evaluate(
model=mock_model,
prompt_template="{instruction} test prompt template {context}",
)

assert test_result.summary_metrics["row_count"] == 2
assert test_result.summary_metrics["summarization_quality/mean"] == 4.5
assert test_result.summary_metrics[
"summarization_quality/std"
] == pytest.approx(0.7, 0.1)
assert set(test_result.metrics_table.columns.values) == set(
[
"context",
"instruction",
"reference",
"completed_prompt",
"response",
"summarization_quality",
"summarization_quality/explanation",
"summarization_quality/confidence",
]
)
assert list(test_result.metrics_table["summarization_quality"].values) == [5, 4]
assert list(
test_result.metrics_table["summarization_quality/explanation"].values
) == [
"explanation",
"explanation",
]
assert list(
test_result.metrics_table["summarization_quality/confidence"].values
) == [
1.0,
0.5,
]

@pytest.mark.parametrize("api_transport", ["grpc", "rest"])
def test_compute_pairwise_metrics_with_model_inference(self, api_transport):
aiplatform.init(
Expand Down
70 changes: 69 additions & 1 deletion vertexai/preview/evaluation/metrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,84 @@
#
"""Evaluation Metrics Module."""

from vertexai.preview.evaluation.metrics import _base
from vertexai.preview.evaluation.metrics import _coherence
from vertexai.preview.evaluation.metrics import _fluency
from vertexai.preview.evaluation.metrics import _fulfillment
from vertexai.preview.evaluation.metrics import _groundedness
from vertexai.preview.evaluation.metrics import (
_base,
_pairwise_question_answering_quality,
)
from vertexai.preview.evaluation.metrics import (
_pairwise_summarization_quality,
)
from vertexai.preview.evaluation.metrics import (
_question_answering_correctness,
)
from vertexai.preview.evaluation.metrics import (
_question_answering_helpfulness,
)
from vertexai.preview.evaluation.metrics import (
_question_answering_quality,
)
from vertexai.preview.evaluation.metrics import (
_question_answering_relevance,
)
from vertexai.preview.evaluation.metrics import _safety
from vertexai.preview.evaluation.metrics import (
_summarization_helpfulness,
)
from vertexai.preview.evaluation.metrics import (
_summarization_quality,
)
from vertexai.preview.evaluation.metrics import (
_summarization_verbosity,
)


CustomMetric = _base.CustomMetric
PairwiseMetric = _base.PairwiseMetric
make_metric = _base.make_metric

Coherence = _coherence.Coherence
Fluency = _fluency.Fluency
Safety = _safety.Safety
Groundedness = _groundedness.Groundedness
Fulfillment = _fulfillment.Fulfillment
SummarizationQuality = _summarization_quality.SummarizationQuality
SummarizationHelpfulness = _summarization_helpfulness.SummarizationHelpfulness
SummarizationVerbosity = _summarization_verbosity.SummarizationVerbosity
QuestionAnsweringQuality = _question_answering_quality.QuestionAnsweringQuality
QuestionAnsweringRelevance = _question_answering_relevance.QuestionAnsweringRelevance
QuestionAnsweringHelpfulness = (
_question_answering_helpfulness.QuestionAnsweringHelpfulness
)
QuestionAnsweringCorrectness = (
_question_answering_correctness.QuestionAnsweringCorrectness
)
PairwiseSummarizationQuality = (
_pairwise_summarization_quality.PairwiseSummarizationQuality
)
PairwiseQuestionAnsweringQuality = (
_pairwise_question_answering_quality.PairwiseQuestionAnsweringQuality
)

__all__ = [
"CustomMetric",
"PairwiseMetric",
"make_metric",
"Coherence",
"Fluency",
"Safety",
"Groundedness",
"Fulfillment",
"SummarizationQuality",
"SummarizationHelpfulness",
"SummarizationVerbosity",
"QuestionAnsweringQuality",
"QuestionAnsweringRelevance",
"QuestionAnsweringHelpfulness",
"QuestionAnsweringCorrectness",
"PairwiseSummarizationQuality",
"PairwiseQuestionAnsweringQuality",
]
31 changes: 31 additions & 0 deletions vertexai/preview/evaluation/metrics/_coherence.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# -*- coding: utf-8 -*-

# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from typing import Optional
from vertexai.preview.evaluation import constants
from vertexai.preview.evaluation.metrics import _base


class Coherence(_base._ModelBasedMetric):
"""The model-based pointwise metric for Coherence."""

_metric_name = constants.Metric.COHERENCE

def __init__(self, *, version: Optional[int] = None):
super().__init__(
metric=Coherence._metric_name,
version=version,
)
31 changes: 31 additions & 0 deletions vertexai/preview/evaluation/metrics/_fluency.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# -*- coding: utf-8 -*-

# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from typing import Optional
from vertexai.preview.evaluation import constants
from vertexai.preview.evaluation.metrics import _base


class Fluency(_base._ModelBasedMetric):
"""The model-based pointwise metric for Fluency."""

_metric_name = constants.Metric.FLUENCY

def __init__(self, *, version: Optional[int] = None):
super().__init__(
metric=Fluency._metric_name,
version=version,
)
31 changes: 31 additions & 0 deletions vertexai/preview/evaluation/metrics/_fulfillment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# -*- coding: utf-8 -*-

# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from typing import Optional
from vertexai.preview.evaluation import constants
from vertexai.preview.evaluation.metrics import _base


class Fulfillment(_base._ModelBasedMetric):
"""The model-based pointwise metric for Fulfillment."""

_metric_name = constants.Metric.FULFILLMENT

def __init__(self, *, version: Optional[int] = None):
super().__init__(
metric=Fulfillment._metric_name,
version=version,
)
31 changes: 31 additions & 0 deletions vertexai/preview/evaluation/metrics/_groundedness.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# -*- coding: utf-8 -*-

# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from typing import Optional
from vertexai.preview.evaluation import constants
from vertexai.preview.evaluation.metrics import _base


class Groundedness(_base._ModelBasedMetric):
"""The model-based pointwise metric for Groundedness."""

_metric_name = constants.Metric.GROUNDEDNESS

def __init__(self, *, version: Optional[int] = None):
super().__init__(
metric=Groundedness._metric_name,
version=version,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# -*- coding: utf-8 -*-

# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from typing import Optional
from vertexai.preview.evaluation import constants
from vertexai.preview.evaluation.metrics import _base


class QuestionAnsweringCorrectness(_base._ModelBasedMetric):
"""The model-based pointwise metric for Question Answering Correctness."""

_metric_name = constants.Metric.QUESTION_ANSWERING_CORRECTNESS

def __init__(self, *, use_reference: bool = True, version: Optional[int] = None):
super().__init__(
metric=QuestionAnsweringCorrectness._metric_name,
use_reference=use_reference,
version=version,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# -*- coding: utf-8 -*-

# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from typing import Optional
from vertexai.preview.evaluation import constants
from vertexai.preview.evaluation.metrics import _base


class QuestionAnsweringHelpfulness(_base._ModelBasedMetric):
"""The model-based pointwise metric for Question Answering Helpfulness."""

_metric_name = constants.Metric.QUESTION_ANSWERING_HELPFULNESS

def __init__(self, *, use_reference: bool = False, version: Optional[int] = None):
super().__init__(
metric=QuestionAnsweringHelpfulness._metric_name,
use_reference=use_reference,
version=version,
)
33 changes: 33 additions & 0 deletions vertexai/preview/evaluation/metrics/_question_answering_quality.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# -*- coding: utf-8 -*-

# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from typing import Optional
from vertexai.preview.evaluation import constants
from vertexai.preview.evaluation.metrics import _base


class QuestionAnsweringQuality(_base._ModelBasedMetric):
"""The model-based pointwise metric for Question Answering Quality."""

_metric_name = constants.Metric.QUESTION_ANSWERING_QUALITY

def __init__(self, *, use_reference: bool = False, version: Optional[int] = None):
super().__init__(
metric=QuestionAnsweringQuality._metric_name,
use_reference=use_reference,
version=version,
)
Loading

0 comments on commit 4742a87

Please sign in to comment.