[ML][Pipelines]test: shorten test name to enable recording (Azure#26995)

* feat: shorten test name to enable recording * fix: fix ci
danigm · Oct 25, 2022 · 9bfdf2e · 9bfdf2e
1 parent 1063e29
commit 9bfdf2e
Show file tree

Hide file tree

Showing 39 changed files with 28,995 additions and 966 deletions.
diff --git a/sdk/ml/azure-ai-ml/tests/internal/_utils.py b/sdk/ml/azure-ai-ml/tests/internal/_utils.py
@@ -1,3 +1,5 @@
+from pathlib import Path
+
 import pydash
 
 from azure.ai.ml import Input
@@ -178,6 +180,12 @@
     # component containing v1.5 nodes
 ]
 
+# this is to shorten the test name
+TEST_CASE_NAME_ENUMERATE = list(enumerate(map(
+    lambda params: Path(params[0]).name,
+    PARAMETERS_TO_TEST,
+)))
+
 
 def set_run_settings(node, runsettings_dict):
     for dot_key, value in runsettings_dict.items():

diff --git a/sdk/ml/azure-ai-ml/tests/internal/e2etests/test_component.py b/sdk/ml/azure-ai-ml/tests/internal/e2etests/test_component.py
@@ -52,7 +52,11 @@ def bodiless_matching(test_proxy):
 
 
 @pytest.mark.usefixtures(
-    "recorded_test", "enable_internal_components", "mock_code_hash", "mock_asset_name", "mock_component_hash"
+    "recorded_test",
+    "enable_internal_components",
+    "mock_code_hash",
+    "mock_asset_name",
+    "mock_component_hash"
 )
 @pytest.mark.e2etest
 @pytest.mark.pipeline_test

diff --git a/sdk/ml/azure-ai-ml/tests/internal/e2etests/test_pipeline_job.py b/sdk/ml/azure-ai-ml/tests/internal/e2etests/test_pipeline_job.py
@@ -5,7 +5,7 @@
 from pathlib import Path
 from typing import Callable
 
-from devtools_testutils import AzureRecordedTestCase, is_live
+from devtools_testutils import AzureRecordedTestCase
 import pydash
 import pytest
 
@@ -17,7 +17,7 @@
 from azure.core.exceptions import HttpResponseError
 from azure.core.polling import LROPoller
 
-from .._utils import DATA_VERSION, PARAMETERS_TO_TEST, set_run_settings
+from .._utils import DATA_VERSION, PARAMETERS_TO_TEST, set_run_settings, TEST_CASE_NAME_ENUMERATE
 
 _dependent_datasets = {}
 
@@ -60,7 +60,6 @@ def create_internal_sample_dependent_datasets(client: MLClient):
     "create_internal_sample_dependent_datasets",
     "enable_internal_components",
 )
-@pytest.mark.skipif(condition=not is_live(), reason="Works in live mode, does not work in playback")
 @pytest.mark.e2etest
 @pytest.mark.pipeline_test
 class TestPipelineJob(AzureRecordedTestCase):
@@ -111,33 +110,37 @@ def pipeline_func():
         )
 
     @pytest.mark.parametrize(
-        "yaml_path,inputs,runsettings_dict,pipeline_runsettings_dict",
-        PARAMETERS_TO_TEST,
+        "test_case_i,test_case_name",
+        TEST_CASE_NAME_ENUMERATE,
     )
-    def test_pipeline_anonymous(
-        self, client: MLClient, yaml_path, inputs, runsettings_dict, pipeline_runsettings_dict
+    def test_pipeline_job_with_anonymous_internal_component(
+        self,
+        client: MLClient,
+        test_case_i: int,
+        test_case_name: str,
     ):
+        yaml_path, inputs, runsettings_dict, pipeline_runsettings_dict = PARAMETERS_TO_TEST[test_case_i]
         # curated env with name & version
         node_func: InternalComponent = load_component(yaml_path)
 
         self._test_component(node_func, inputs, runsettings_dict, pipeline_runsettings_dict, client)
 
     @pytest.mark.skip(reason="TODO: can't find newly registered component?")
     @pytest.mark.parametrize(
-        "yaml_path,inputs,runsettings_dict,pipeline_runsettings_dict",
-        PARAMETERS_TO_TEST,
+        "test_case_i,test_case_name",
+        TEST_CASE_NAME_ENUMERATE,
     )
-    def test_created_internal_component_in_pipeline(
+    def test_pipeline_job_with_registered_internal_component(
         self,
         client: MLClient,
-        randstr: Callable[[], str],
-        yaml_path,
-        inputs,
-        runsettings_dict,
-        pipeline_runsettings_dict,
+        randstr: Callable[[str], str],
+        test_case_i: int,
+        test_case_name: str,
     ):
-        component_to_register = load_component(yaml_path, params_override=[{"name": randstr("name")}])
+        yaml_path, inputs, runsettings_dict, pipeline_runsettings_dict = PARAMETERS_TO_TEST[test_case_i]
         component_name = randstr("component_name")
+
+        component_to_register = load_component(yaml_path, params_override=[{"name": component_name}])
         component_resource = client.components.create_or_update(component_to_register)
 
         created_component = client.components.get(component_name, component_resource.version)
@@ -178,23 +181,19 @@ def pipeline_func(pipeline_input):
         except HttpResponseError as ex:
             assert "CancelPipelineRunInTerminalStatus" in str(ex)
 
-    @pytest.mark.skip(
-        reason="Skip for pipeline component compute bug: https://msdata.visualstudio.com/Vienna/_workitems/edit/1920464"
-    )
+    @pytest.mark.skip(reason="marshmallow.exceptions.ValidationError: miss required jobs.node.component")
     @pytest.mark.parametrize(
-        "yaml_path,inputs,runsettings_dict,pipeline_runsettings_dict",
-        PARAMETERS_TO_TEST,
+        "test_case_i,test_case_name",
+        TEST_CASE_NAME_ENUMERATE,
     )
-    def test_internal_in_pipeline_component(
+    def test_pipeline_component_with_anonymous_internal_component(
         self,
         client: MLClient,
-        randstr: Callable[[], str],
-        yaml_path,
-        inputs,
-        runsettings_dict,
-        pipeline_runsettings_dict,
+        test_case_i: int,
+        test_case_name: str,
     ):
-        component_func = load_component(yaml_path, params_override=[{"name": randstr("name")}])
+        yaml_path, inputs, runsettings_dict, pipeline_runsettings_dict = PARAMETERS_TO_TEST[test_case_i]
+        component_func = load_component(yaml_path)
 
         @pipeline()
         def sub_pipeline_func():

diff --git a/sdk/ml/azure-ai-ml/tests/pipeline_job/_util.py b/sdk/ml/azure-ai-ml/tests/pipeline_job/_util.py
@@ -1,3 +1,5 @@
+from pathlib import Path
+
 from azure.ai.ml.exceptions import JobException
 from azure.core.exceptions import HttpResponseError
 
@@ -47,3 +49,9 @@
     ),
     ("./tests/test_configs/dsl_pipeline/data_binding_expression/run_settings_sweep_limits.yml", None),
 ]
+
+# this is to shorten the test name
+DATABINDING_EXPRESSION_TEST_CASE_ENUMERATE = list(enumerate(map(
+    lambda params: Path(params[0]).name,
+    DATABINDING_EXPRESSION_TEST_CASES,
+)))
diff --git a/sdk/ml/azure-ai-ml/tests/pipeline_job/e2etests/test_pipeline_job.py b/sdk/ml/azure-ai-ml/tests/pipeline_job/e2etests/test_pipeline_job.py
@@ -1,11 +1,12 @@
 import json
+import os.path
 import re
 import time
 from datetime import datetime
 from pathlib import Path
-from typing import Any, Callable, Dict, Optional
+from typing import Any, Callable, Dict
 
-from devtools_testutils import AzureRecordedTestCase, is_live, set_bodiless_matcher
+from devtools_testutils import AzureRecordedTestCase, set_bodiless_matcher
 import pydash
 import pytest
 from marshmallow import ValidationError
@@ -27,7 +28,8 @@
 from azure.core.exceptions import HttpResponseError, ResourceNotFoundError
 from azure.core.polling import LROPoller
 
-from .._util import _PIPELINE_JOB_TIMEOUT_SECOND, DATABINDING_EXPRESSION_TEST_CASES
+from .._util import _PIPELINE_JOB_TIMEOUT_SECOND, DATABINDING_EXPRESSION_TEST_CASES, \
+    DATABINDING_EXPRESSION_TEST_CASE_ENUMERATE
 
 
 def assert_job_input_output_types(job: PipelineJob):
@@ -188,35 +190,28 @@ def test_pipeline_job_get_child_run(self, client: MLClient, generate_weekly_fixe
         assert isinstance(retrieved_child_run, Job)
         assert retrieved_child_run.name == child_job.name
 
-    @pytest.mark.skipif(condition=not is_live(), reason="Recording file names are too long and need to be shortened")
     @pytest.mark.parametrize(
-        "pipeline_job_path, expected_error_type",
+        "pipeline_job_path",
         [
             # flaky parameterization
-            # ("./tests/test_configs/pipeline_jobs/invalid/non_existent_remote_component.yml", Exception),
-            (
-                "tests/test_configs/pipeline_jobs/invalid/non_existent_remote_version.yml",
-                Exception,
-            ),
-            (
-                "tests/test_configs/pipeline_jobs/invalid/non_existent_compute.yml",
-                Exception,
-            ),
+            # "non_existent_remote_component.yml",
+            "non_existent_remote_version.yml",
+            "non_existent_compute.yml",
         ],
     )
     def test_pipeline_job_validation_remote(
         self,
         client: MLClient,
         randstr: Callable[[str], str],
         pipeline_job_path: str,
-        expected_error_type,
     ) -> None:
+        base_dir = "./tests/test_configs/pipeline_jobs/invalid/"
         pipeline_job: PipelineJob = load_job(
-            source=pipeline_job_path,
+            source=os.path.join(base_dir, pipeline_job_path),
             params_override=[{"name": randstr("name")}],
         )
         with pytest.raises(
-            expected_error_type,
+            Exception,
             # hide this as server side error message is not consistent
             # match=str(expected_error),
         ):
@@ -415,10 +410,22 @@ def test_pipeline_job_default_datastore_compute(self, client: MLClient, randstr:
             else:
                 assert job.compute in pipeline_job.jobs[job_name].compute
 
-    @pytest.mark.skipif(condition=not is_live(), reason="Recording file names are too long and need to be shortened")
     @pytest.mark.parametrize(
-        "pipeline_job_path, converted_jobs, expected_dict, fields_to_omit",
+        "test_case_i,test_case_name",
         [
+            # TODO: enable this after identity support released to canary
+            # (0, "helloworld_pipeline_job_with_component_output"),
+            (1, "helloworld_pipeline_job_with_paths"),
+        ]
+    )
+    def test_pipeline_job_with_command_job(
+        self,
+        client: MLClient,
+        randstr: Callable[[str], str],
+        test_case_i,
+        test_case_name,
+    ) -> None:
+        params = [
             (
                 "tests/test_configs/pipeline_jobs/helloworld_pipeline_job_defaults_with_command_job_e2e.yml",
                 2,
@@ -587,17 +594,9 @@ def test_pipeline_job_default_datastore_compute(self, client: MLClient, randstr:
                     "source_job_id",
                 ],
             ),
-        ],
-    )
-    def test_pipeline_job_with_command_job(
-        self,
-        client: MLClient,
-        randstr: Callable[[str], str],
-        pipeline_job_path: str,
-        converted_jobs,
-        expected_dict,
-        fields_to_omit,
-    ) -> None:
+        ]
+        pipeline_job_path, converted_jobs, expected_dict, fields_to_omit = params[test_case_i]
+
         params_override = [{"name": randstr("name")}]
         pipeline_job = load_job(
             source=pipeline_job_path,
@@ -616,21 +615,21 @@ def test_pipeline_job_with_command_job(
         actual_dict = pydash.omit(pipeline_dict["properties"], *fields_to_omit)
         assert actual_dict == expected_dict
 
-    @pytest.mark.skipif(condition=not is_live(), reason="Recording file names are too long and need to be shortened")
     @pytest.mark.parametrize(
         "pipeline_job_path",
         [
-            "tests/test_configs/pipeline_jobs/helloworld_pipeline_job_defaults_with_parallel_job_file_component_input_e2e.yml",
-            "tests/test_configs/pipeline_jobs/helloworld_pipeline_job_defaults_with_parallel_job_file_input_e2e.yml",
-            "tests/test_configs/pipeline_jobs/helloworld_pipeline_job_defaults_with_parallel_job_tabular_input_e2e.yml",
+            "file_component_input_e2e.yml",
+            "file_input_e2e.yml",
+            "tabular_input_e2e.yml",
         ],
     )
     def test_pipeline_job_with_parallel_job(
         self, client: MLClient, randstr: Callable[[str], str], pipeline_job_path: str
     ) -> None:
+        base_file_name = "./tests/test_configs/pipeline_jobs/helloworld_pipeline_job_defaults_with_parallel_job_"
         params_override = [{"name": randstr("name")}]
         pipeline_job = load_job(
-            source=pipeline_job_path,
+            source=base_file_name + pipeline_job_path,
             params_override=params_override,
         )
         created_job = client.jobs.create_or_update(pipeline_job)
@@ -942,18 +941,19 @@ def test_pipeline_job_with_sweep_node_early_termination_policy(
         created_pipeline_dict = created_pipeline._to_dict()
         assert pydash.get(created_pipeline_dict, "jobs.hello_sweep_inline_trial.early_termination") == policy_yaml_dict
 
-    @pytest.mark.skipif(condition=not is_live(), reason="Recording file names are too long and need to be shortened")
     @pytest.mark.parametrize(
-        "pipeline_job_path, expected_error",
-        DATABINDING_EXPRESSION_TEST_CASES,
+        "test_case_i, test_case_name",
+        DATABINDING_EXPRESSION_TEST_CASE_ENUMERATE,
     )
     def test_pipeline_job_with_data_binding_expression(
         self,
         client: MLClient,
         randstr: Callable[[str], str],
-        pipeline_job_path: str,
-        expected_error: Optional[Exception],
+        test_case_i: int,
+        test_case_name: str,
     ):
+        pipeline_job_path, expected_error = DATABINDING_EXPRESSION_TEST_CASES[test_case_i]
+
         pipeline: PipelineJob = load_job(source=pipeline_job_path, params_override=[{"name": randstr("name")}])
         if expected_error is None:
             assert_job_cancel(pipeline, client)