M 2.2.1: Audit and Fix invalid profile picture inside UserSettingsMod…

…el (#16492)
oppia · Jan 2, 2023 · 839ab02 · 839ab02
1 parent fcd081c
commit 839ab02
Show file tree

Hide file tree

Showing 8 changed files with 490 additions and 6 deletions.
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -3,9 +3,7 @@
 # in production. Note that this package is generated
 # by running 'python setup.py sdist' in build.py.
 
-include assets/constants.ts
-include assets/release_constants.json
-include assets/rich_text_components_definitions.ts
+graft assets
 include extensions/interactions/html_field_types_to_rule_specs.json
 include extensions/interactions/legacy_interaction_specs_by_state_version/*.json
 include extensions/interactions/rule_templates.json

diff --git a/assets/images/avatar/user_blue_150px.png b/assets/images/avatar/user_blue_150px.png
diff --git a/core/constants.py b/core/constants.py
@@ -24,7 +24,7 @@
 import pkgutil
 import re
 
-from typing import Any, Dict
+from typing import Any, Dict, Literal, Union, overload
 
 
 # Here we use type Any because we need to parse and return the generic JSON
@@ -66,7 +66,25 @@ def remove_comments(text: str) -> str:
 # the chronology of our files execution. utils imports constants and constants
 # need utils.get_package_file_contents but it does not have it loaded to memory
 # yet. If called from utils we get error as `module has no attribute`.
-def get_package_file_contents(package: str, filepath: str) -> str:
+@overload
+def get_package_file_contents(
+    package: str, filepath: str, *, binary_mode: Literal[True]
+) -> bytes: ...
+
+
+@overload
+def get_package_file_contents(package: str, filepath: str) -> str: ...
+
+
+@overload
+def get_package_file_contents(
+    package: str, filepath: str, *, binary_mode: Literal[False]
+) -> str: ...
+
+
+def get_package_file_contents(
+    package: str, filepath: str, *, binary_mode: bool = False
+) -> Union[str, bytes]:
     """Open file and return its contents. This needs to be used for files that
     are loaded by the Python code directly, like constants.ts or
     rich_text_components.json. This function is needed to make loading these
@@ -77,6 +95,7 @@ def get_package_file_contents(package: str, filepath: str) -> str:
             For Oppia the package is usually the folder in the root folder,
             like 'core' or 'extensions'.
         filepath: str. The path to the file in the package.
+        binary_mode: bool. True when we want to read file in binary mode.
 
     Returns:
         str. The contents of the file.
@@ -85,6 +104,12 @@ def get_package_file_contents(package: str, filepath: str) -> str:
         FileNotFoundError. The file does not exist.
     """
     try:
+        if binary_mode:
+            with io.open(
+                os.path.join(package, filepath), 'rb', encoding=None
+            ) as binary_file:
+                read_binary_mode_data: bytes = binary_file.read()
+                return read_binary_mode_data
         with io.open(
             os.path.join(package, filepath), 'r', encoding='utf-8'
         ) as file:
@@ -93,6 +118,8 @@ def get_package_file_contents(package: str, filepath: str) -> str:
         file_data = pkgutil.get_data(package, filepath)
         if file_data is None:
             raise e
+        if binary_mode:
+            return file_data
         return file_data.decode('utf-8')
 
 

diff --git a/core/constants_test.py b/core/constants_test.py
@@ -54,7 +54,32 @@ def test_loading_non_existing_file_throws_error(self) -> None:
                 FileNotFoundError,
                 'No such file or directory: \'assets/non_exist.xy\''
             ):
-                constants.get_package_file_contents('assets', 'non_exist.xy')
+                constants.get_package_file_contents(
+                    'assets', 'non_exist.xy', binary_mode=False)
+
+    def test_loading_binary_file_in_package_returns_the_content(self) -> None:
+        """Test get_package_file_contents with imaginary binary file."""
+        with self.swap_to_always_return(pkgutil, 'get_data', 'File data'):
+            self.assertEqual(
+                constants.get_package_file_contents(
+                    'assets', 'non_exist.xy', binary_mode=True), 'File data'
+            )
+
+    def test_loading_binary_file_returns_the_content(self) -> None:
+        """Test get_package_file_contents with binary file."""
+        with utils.open_file(
+            os.path.join(
+                'assets', 'images', 'avatar', 'user_blue_150px.png'),
+            'rb',
+            encoding=None
+        ) as f:
+            raw_image_png = f.read()
+        default_image_path = os.path.join(
+            'images', 'avatar', 'user_blue_150px.png')
+        self.assertEqual(
+            constants.get_package_file_contents(
+                'assets', default_image_path, binary_mode=True), raw_image_png
+        )
 
     def test_loading_file_in_package_returns_the_content(self) -> None:
         """Test get_package_file_contents with imaginary file."""
@@ -72,6 +97,8 @@ def test_loading_file_in_non_existent_package_throws_error(self) -> None:
                 'No such file or directory: \'assets/non_exist.xy\''
             ):
                 constants.get_package_file_contents('assets', 'non_exist.xy')
+                constants.get_package_file_contents(
+                    'assets', 'non_exist.xy', binary_mode=True)
 
     def test_difficulty_values_are_matched(self) -> None:
         """Tests that the difficulty values and strings are matched in the

diff --git a/core/jobs/batch_jobs/user_settings_profile_picture_jobs.py b/core/jobs/batch_jobs/user_settings_profile_picture_jobs.py
@@ -0,0 +1,227 @@
+# coding: utf-8
+#
+# Copyright 2022 The Oppia Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS-IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Jobs for UserSettingsModel profile picture."""
+
+from __future__ import annotations
+
+import io
+import logging
+import os
+
+from core import constants
+from core import utils
+from core.domain import image_services
+from core.domain import user_services
+from core.jobs import base_jobs
+from core.jobs.io import ndb_io
+from core.jobs.transforms import job_result_transforms
+from core.jobs.types import job_run_result
+from core.platform import models
+
+from PIL import Image
+import apache_beam as beam
+from typing import List, Optional, Tuple
+
+MYPY = False
+if MYPY:  # pragma: no cover
+    from mypy_imports import user_models
+
+(user_models,) = models.Registry.import_models([models.Names.USER])
+
+
+class AuditInvalidProfilePictureJob(base_jobs.JobBase):
+    """Audit job to fetch invalid images from UserSettingsModel."""
+
+    def _get_invalid_image(
+        self, picture_str: Optional[str]
+    ) -> Optional[List[str]]:
+        """Helper function to filter the invalid profile pictures.
+
+        Args:
+            picture_str: Optional[str]. The profile picture data.
+
+        Returns:
+            Optional[List[str]]. None, if the image is valid otherwise
+            the invalid image data.
+        """
+        invalid_image = []
+        try:
+            # Ruling out the possibility of different types for
+            # mypy type checking.
+            assert isinstance(picture_str, str)
+            imgdata = utils.convert_png_data_url_to_binary(picture_str)
+            image = Image.open(io.BytesIO(imgdata))
+            width, height = image.size
+            if width != 150 and height != 150:
+                invalid_image.append(
+                    f'wrong dimensions - height = {height} and width = {width}'
+                )
+        except Exception:
+            logging.exception('ERRORED EXCEPTION AUDIT')
+            invalid_image.append(
+                f'Image is not base64 having value - {picture_str}')
+
+        if len(invalid_image) != 0:
+            return invalid_image
+        return None
+
+    def run(self) -> beam.PCollection[job_run_result.JobRunResult]:
+        users_with_valid_username = (
+            self.pipeline
+            | 'Get all non-deleted UserSettingsModel' >> ndb_io.GetModels(
+                user_models.UserSettingsModel.get_all(include_deleted=False))
+            | 'Filter valid users with not None username' >> beam.Filter(
+                lambda model: model.username is not None)
+        )
+
+        invalid_user_profile_picture = (
+            users_with_valid_username
+            | 'Get invalid images' >> beam.Map(
+                lambda model: (model.username, self._get_invalid_image(
+                    model.profile_picture_data_url)))
+            | 'Filter invalid images' >> beam.Filter(
+                lambda model: model[1] is not None)
+        )
+
+        total_invalid_images = (
+            invalid_user_profile_picture
+            | 'Total number of invalid images' >> (
+                job_result_transforms.CountObjectsToJobRunResult(
+                    'TOTAL INVALID IMAGES'))
+        )
+
+        total_user_with_valid_username = (
+            users_with_valid_username
+            | 'Total valid users' >> (
+                job_result_transforms.CountObjectsToJobRunResult(
+                    'TOTAL USERS WITH VALID USERNAME'))
+        )
+
+        report_invalid_images = (
+            invalid_user_profile_picture
+            | 'Report the data' >> beam.Map(lambda data: (
+                job_run_result.JobRunResult.as_stderr(
+                    f'The username is {data[0]} and the invalid image '
+                    f'details are {data[1]}.'
+                )
+            ))
+        )
+
+        return (
+            (
+                total_invalid_images,
+                report_invalid_images,
+                total_user_with_valid_username
+            )
+            | 'Combine results' >> beam.Flatten()
+        )
+
+
+class FixInvalidProfilePictureJob(base_jobs.JobBase):
+    """Fix invalid profile pictures inside UserSettingsModel."""
+
+    def _fix_invalid_images(
+        self, user_model: user_models.UserSettingsModel
+    ) -> Tuple[user_models.UserSettingsModel, bool]:
+        """Helper function to fix the invalid images.
+
+        Args:
+            user_model: user_models.UserSettingsModel. The UserSettingsModel.
+
+        Returns:
+            Tuple[user_models.UserSettingsModel, bool]. The tuple containing
+            updated UserSettingsModel and a bool value that tells whether the
+            profile picture present is the default data url or not.
+        """
+        profile_picture_data = user_model.profile_picture_data_url
+        width, height = 0, 0
+
+        try:
+            imgdata = utils.convert_png_data_url_to_binary(profile_picture_data)
+            height, width = image_services.get_image_dimensions(imgdata)
+        except Exception:
+            logging.exception('ERRORED EXCEPTION MIGRATION')
+            user_model.profile_picture_data_url = (
+                user_services.fetch_gravatar(user_model.email))
+
+        if (
+            user_model.profile_picture_data_url ==
+            user_services.DEFAULT_IDENTICON_DATA_URL or (
+                width == 76 and height == 76)
+        ):
+            default_image_path = os.path.join(
+                'images', 'avatar', 'user_blue_150px.png')
+            raw_image_png = constants.get_package_file_contents(
+                'assets', default_image_path, binary_mode=True)
+            user_model.profile_picture_data_url = (
+                utils.convert_png_binary_to_data_url(raw_image_png))
+
+        # Here we need to check for the default image again because there is a
+        # possibility that in the above check we are not able to generate the
+        # gravatar for the user having default image and we want to keep track
+        # of all the default images.
+        imgdata = utils.convert_png_data_url_to_binary(
+            user_model.profile_picture_data_url)
+        height, width = image_services.get_image_dimensions(imgdata)
+        if (
+            user_model.profile_picture_data_url ==
+            user_services.DEFAULT_IDENTICON_DATA_URL or (
+                width == 76 and height == 76)
+        ):
+            return (user_model, False)
+
+        return (user_model, True)
+
+    def run(self) -> beam.PCollection[job_run_result.JobRunResult]:
+        fixed_user_profile_picture = (
+            self.pipeline
+            | 'Get all non-deleted UserSettingsModel' >> ndb_io.GetModels(
+                user_models.UserSettingsModel.get_all(include_deleted=False))
+            | 'Filter user with valid usernames' >> beam.Filter(
+                lambda model: model.username is not None)
+            | 'Get invalid images' >> beam.Map(self._fix_invalid_images)
+        )
+
+        default_profile_picture = (
+            fixed_user_profile_picture
+            | 'Filter default profile pictures' >> beam.Filter(
+                lambda value: value[1] is False)
+            | 'Total count for user models having default profile picture' >> (
+                job_result_transforms.CountObjectsToJobRunResult(
+                    'DEFAULT PROFILE PICTURE'))
+        )
+
+        count_user_models_iterated = (
+            fixed_user_profile_picture
+            | 'Total count for user models' >> (
+                job_result_transforms.CountObjectsToJobRunResult(
+                    'USER MODELS ITERATED'))
+        )
+
+        unused_put_results = (
+            fixed_user_profile_picture
+            | 'Map with only models' >> beam.Map(lambda value: value[0])
+            | 'Put models into the datastore' >> ndb_io.PutModels()
+        )
+
+        return (
+            (
+                count_user_models_iterated,
+                default_profile_picture
+            )
+            | 'Combine results' >> beam.Flatten()
+        )