Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix #19110: Implement job to copy mis-placed translation images #21378

Closed
wants to merge 10 commits into from
Prev Previous commit
Next Next commit
Separate debugging and audit jobs
  • Loading branch information
U8NWXD committed Dec 10, 2024
commit f929aa146f38771b79b09ef7eca26dc38088d7fe
27 changes: 25 additions & 2 deletions core/jobs/batch_jobs/missing_translation_images_repair_jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,8 +311,9 @@ def run(self) -> beam.PCollection[job_run_result.JobRunResult]:
)


class AuditMissingTranslationImagesJob(base_jobs.JobBase):
"""Return planned copy operations without making any changes."""
class DebugMissingTranslationImagesJob(base_jobs.JobBase):
"""Return debugging information about files to be copied without making any
changes."""

def run(self) -> beam.PCollection[job_run_result.JobRunResult]:
"""List the copy operations to be performed along with debugging
Expand Down Expand Up @@ -358,3 +359,25 @@ def run(self) -> beam.PCollection[job_run_result.JobRunResult]:
| 'Group outputs' >> beam.CoGroupByKey()
| 'Map as stdout' >> beam.Map(job_run_result.JobRunResult.as_stdout)
)


class AuditMissingTranslationImagesJob(base_jobs.JobBase):
"""Return planned copy operations without making any changes."""

def run(self) -> beam.PCollection[job_run_result.JobRunResult]:
"""List the copy operations to be performed.

Returns:
PCollection. Job run results describing an ordered pair of the
source and destination files for each copy to be performed.
"""
dst_to_copy_by_src, _ = (
self.pipeline
| 'Plan copy operations'
>> CopyMissingTranslationImages()
)

return (
dst_to_copy_by_src
| 'Map as stdout' >> beam.Map(job_run_result.JobRunResult.as_stdout)
)
104 changes: 101 additions & 3 deletions core/jobs/batch_jobs/missing_translation_images_repair_jobs_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,13 +232,13 @@ def test_do_nothing_when_no_suggestions(self) -> None:
)


class AuditMissingTranslationImagesJobTests(
class DebugMissingTranslationImagesJobTests(
MissingTranslationImagesJobTestsBase
):
"""Tests for CopyMissingTranslationImagesJob."""
"""Tests for DebugMissingTranslationImagesJob."""

JOB_CLASS = (
missing_translation_images_repair_jobs.AuditMissingTranslationImagesJob)
missing_translation_images_repair_jobs.DebugMissingTranslationImagesJob)

def test_copy_images_when_dst_missing(self) -> None:
self._add_suggestion(
Expand Down Expand Up @@ -394,3 +394,101 @@ def test_do_nothing_when_no_suggestions(self) -> None:
self._gcs_ls('exploration_suggestions/e1/assets/image'),
set()
)


class AuditMissingTranslationImagesJobTests(
MissingTranslationImagesJobTestsBase
):
"""Tests for AuditMissingTranslationImagesJob."""

JOB_CLASS = (
missing_translation_images_repair_jobs.AuditMissingTranslationImagesJob)

def test_copy_images_when_dst_missing(self) -> None:
self._add_suggestion(
'e1', ['image1.png', 'image2.png'], False, True)
self.assertSetEqual(
self._gcs_ls('exploration/e1/assets/image'),
{'image1.png', 'image2.png'}
)
self.assertSetEqual(
self._gcs_ls('exploration_suggestions/e1/assets/image'),
set(),
)
self.assert_job_output_is([
job_run_result.JobRunResult.as_stdout((SRC1, DST1)),
job_run_result.JobRunResult.as_stdout((SRC2, DST2)),
])

self.assertSetEqual(
self._gcs_ls('exploration/e1/assets/image'),
{'image1.png', 'image2.png'}
)
self.assertSetEqual(
self._gcs_ls('exploration_suggestions/e1/assets/image'),
set(),
)

def test_do_not_copy_images_when_dst_present(self) -> None:
self._add_suggestion(
'e1', ['image1.png', 'image2.png'], True, True)
self.assertSetEqual(
self._gcs_ls('exploration/e1/assets/image'),
{'image1.png', 'image2.png'}
)
self.assertSetEqual(
self._gcs_ls('exploration_suggestions/e1/assets/image'),
{'image1.png', 'image2.png'}
)
self.assert_job_output_is([])

self.assertSetEqual(
self._gcs_ls('exploration/e1/assets/image'),
{'image1.png', 'image2.png'}
)
self.assertSetEqual(
self._gcs_ls('exploration_suggestions/e1/assets/image'),
{'image1.png', 'image2.png'}
)

def test_do_not_copy_images_when_src_missing(self) -> None:
self._add_suggestion(
'e1', ['image1.png', 'image2.png'], True, False)
self.assertSetEqual(
self._gcs_ls('exploration/e1/assets/image'),
set()
)
self.assertSetEqual(
self._gcs_ls('exploration_suggestions/e1/assets/image'),
{'image1.png', 'image2.png'}
)
self.assert_job_output_is([])

self.assertSetEqual(
self._gcs_ls('exploration/e1/assets/image'),
set()
)
self.assertSetEqual(
self._gcs_ls('exploration_suggestions/e1/assets/image'),
{'image1.png', 'image2.png'}
)

def test_do_nothing_when_no_suggestions(self) -> None:
self.assertSetEqual(
self._gcs_ls('exploration/e1/assets/image'),
set()
)
self.assertSetEqual(
self._gcs_ls('exploration_suggestions/e1/assets/image'),
set()
)
self.assert_job_output_is([])

self.assertSetEqual(
self._gcs_ls('exploration/e1/assets/image'),
set()
)
self.assertSetEqual(
self._gcs_ls('exploration_suggestions/e1/assets/image'),
set()
)
Loading