Skip to content

Commit

Permalink
M 2.2.1: Audit and Fix invalid profile picture inside UserSettingsMod…
Browse files Browse the repository at this point in the history
…el (#16492)
  • Loading branch information
lkbhitesh07 authored Jan 2, 2023
1 parent fcd081c commit 839ab02
Show file tree
Hide file tree
Showing 8 changed files with 490 additions and 6 deletions.
4 changes: 1 addition & 3 deletions MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,7 @@
# in production. Note that this package is generated
# by running 'python setup.py sdist' in build.py.

include assets/constants.ts
include assets/release_constants.json
include assets/rich_text_components_definitions.ts
graft assets
include extensions/interactions/html_field_types_to_rule_specs.json
include extensions/interactions/legacy_interaction_specs_by_state_version/*.json
include extensions/interactions/rule_templates.json
Expand Down
Binary file added assets/images/avatar/user_blue_150px.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
31 changes: 29 additions & 2 deletions core/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import pkgutil
import re

from typing import Any, Dict
from typing import Any, Dict, Literal, Union, overload


# Here we use type Any because we need to parse and return the generic JSON
Expand Down Expand Up @@ -66,7 +66,25 @@ def remove_comments(text: str) -> str:
# the chronology of our files execution. utils imports constants and constants
# need utils.get_package_file_contents but it does not have it loaded to memory
# yet. If called from utils we get error as `module has no attribute`.
def get_package_file_contents(package: str, filepath: str) -> str:
@overload
def get_package_file_contents(
package: str, filepath: str, *, binary_mode: Literal[True]
) -> bytes: ...


@overload
def get_package_file_contents(package: str, filepath: str) -> str: ...


@overload
def get_package_file_contents(
package: str, filepath: str, *, binary_mode: Literal[False]
) -> str: ...


def get_package_file_contents(
package: str, filepath: str, *, binary_mode: bool = False
) -> Union[str, bytes]:
"""Open file and return its contents. This needs to be used for files that
are loaded by the Python code directly, like constants.ts or
rich_text_components.json. This function is needed to make loading these
Expand All @@ -77,6 +95,7 @@ def get_package_file_contents(package: str, filepath: str) -> str:
For Oppia the package is usually the folder in the root folder,
like 'core' or 'extensions'.
filepath: str. The path to the file in the package.
binary_mode: bool. True when we want to read file in binary mode.
Returns:
str. The contents of the file.
Expand All @@ -85,6 +104,12 @@ def get_package_file_contents(package: str, filepath: str) -> str:
FileNotFoundError. The file does not exist.
"""
try:
if binary_mode:
with io.open(
os.path.join(package, filepath), 'rb', encoding=None
) as binary_file:
read_binary_mode_data: bytes = binary_file.read()
return read_binary_mode_data
with io.open(
os.path.join(package, filepath), 'r', encoding='utf-8'
) as file:
Expand All @@ -93,6 +118,8 @@ def get_package_file_contents(package: str, filepath: str) -> str:
file_data = pkgutil.get_data(package, filepath)
if file_data is None:
raise e
if binary_mode:
return file_data
return file_data.decode('utf-8')


Expand Down
29 changes: 28 additions & 1 deletion core/constants_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,32 @@ def test_loading_non_existing_file_throws_error(self) -> None:
FileNotFoundError,
'No such file or directory: \'assets/non_exist.xy\''
):
constants.get_package_file_contents('assets', 'non_exist.xy')
constants.get_package_file_contents(
'assets', 'non_exist.xy', binary_mode=False)

def test_loading_binary_file_in_package_returns_the_content(self) -> None:
"""Test get_package_file_contents with imaginary binary file."""
with self.swap_to_always_return(pkgutil, 'get_data', 'File data'):
self.assertEqual(
constants.get_package_file_contents(
'assets', 'non_exist.xy', binary_mode=True), 'File data'
)

def test_loading_binary_file_returns_the_content(self) -> None:
"""Test get_package_file_contents with binary file."""
with utils.open_file(
os.path.join(
'assets', 'images', 'avatar', 'user_blue_150px.png'),
'rb',
encoding=None
) as f:
raw_image_png = f.read()
default_image_path = os.path.join(
'images', 'avatar', 'user_blue_150px.png')
self.assertEqual(
constants.get_package_file_contents(
'assets', default_image_path, binary_mode=True), raw_image_png
)

def test_loading_file_in_package_returns_the_content(self) -> None:
"""Test get_package_file_contents with imaginary file."""
Expand All @@ -72,6 +97,8 @@ def test_loading_file_in_non_existent_package_throws_error(self) -> None:
'No such file or directory: \'assets/non_exist.xy\''
):
constants.get_package_file_contents('assets', 'non_exist.xy')
constants.get_package_file_contents(
'assets', 'non_exist.xy', binary_mode=True)

def test_difficulty_values_are_matched(self) -> None:
"""Tests that the difficulty values and strings are matched in the
Expand Down
227 changes: 227 additions & 0 deletions core/jobs/batch_jobs/user_settings_profile_picture_jobs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,227 @@
# coding: utf-8
#
# Copyright 2022 The Oppia Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Jobs for UserSettingsModel profile picture."""

from __future__ import annotations

import io
import logging
import os

from core import constants
from core import utils
from core.domain import image_services
from core.domain import user_services
from core.jobs import base_jobs
from core.jobs.io import ndb_io
from core.jobs.transforms import job_result_transforms
from core.jobs.types import job_run_result
from core.platform import models

from PIL import Image
import apache_beam as beam
from typing import List, Optional, Tuple

MYPY = False
if MYPY: # pragma: no cover
from mypy_imports import user_models

(user_models,) = models.Registry.import_models([models.Names.USER])


class AuditInvalidProfilePictureJob(base_jobs.JobBase):
"""Audit job to fetch invalid images from UserSettingsModel."""

def _get_invalid_image(
self, picture_str: Optional[str]
) -> Optional[List[str]]:
"""Helper function to filter the invalid profile pictures.
Args:
picture_str: Optional[str]. The profile picture data.
Returns:
Optional[List[str]]. None, if the image is valid otherwise
the invalid image data.
"""
invalid_image = []
try:
# Ruling out the possibility of different types for
# mypy type checking.
assert isinstance(picture_str, str)
imgdata = utils.convert_png_data_url_to_binary(picture_str)
image = Image.open(io.BytesIO(imgdata))
width, height = image.size
if width != 150 and height != 150:
invalid_image.append(
f'wrong dimensions - height = {height} and width = {width}'
)
except Exception:
logging.exception('ERRORED EXCEPTION AUDIT')
invalid_image.append(
f'Image is not base64 having value - {picture_str}')

if len(invalid_image) != 0:
return invalid_image
return None

def run(self) -> beam.PCollection[job_run_result.JobRunResult]:
users_with_valid_username = (
self.pipeline
| 'Get all non-deleted UserSettingsModel' >> ndb_io.GetModels(
user_models.UserSettingsModel.get_all(include_deleted=False))
| 'Filter valid users with not None username' >> beam.Filter(
lambda model: model.username is not None)
)

invalid_user_profile_picture = (
users_with_valid_username
| 'Get invalid images' >> beam.Map(
lambda model: (model.username, self._get_invalid_image(
model.profile_picture_data_url)))
| 'Filter invalid images' >> beam.Filter(
lambda model: model[1] is not None)
)

total_invalid_images = (
invalid_user_profile_picture
| 'Total number of invalid images' >> (
job_result_transforms.CountObjectsToJobRunResult(
'TOTAL INVALID IMAGES'))
)

total_user_with_valid_username = (
users_with_valid_username
| 'Total valid users' >> (
job_result_transforms.CountObjectsToJobRunResult(
'TOTAL USERS WITH VALID USERNAME'))
)

report_invalid_images = (
invalid_user_profile_picture
| 'Report the data' >> beam.Map(lambda data: (
job_run_result.JobRunResult.as_stderr(
f'The username is {data[0]} and the invalid image '
f'details are {data[1]}.'
)
))
)

return (
(
total_invalid_images,
report_invalid_images,
total_user_with_valid_username
)
| 'Combine results' >> beam.Flatten()
)


class FixInvalidProfilePictureJob(base_jobs.JobBase):
"""Fix invalid profile pictures inside UserSettingsModel."""

def _fix_invalid_images(
self, user_model: user_models.UserSettingsModel
) -> Tuple[user_models.UserSettingsModel, bool]:
"""Helper function to fix the invalid images.
Args:
user_model: user_models.UserSettingsModel. The UserSettingsModel.
Returns:
Tuple[user_models.UserSettingsModel, bool]. The tuple containing
updated UserSettingsModel and a bool value that tells whether the
profile picture present is the default data url or not.
"""
profile_picture_data = user_model.profile_picture_data_url
width, height = 0, 0

try:
imgdata = utils.convert_png_data_url_to_binary(profile_picture_data)
height, width = image_services.get_image_dimensions(imgdata)
except Exception:
logging.exception('ERRORED EXCEPTION MIGRATION')
user_model.profile_picture_data_url = (
user_services.fetch_gravatar(user_model.email))

if (
user_model.profile_picture_data_url ==
user_services.DEFAULT_IDENTICON_DATA_URL or (
width == 76 and height == 76)
):
default_image_path = os.path.join(
'images', 'avatar', 'user_blue_150px.png')
raw_image_png = constants.get_package_file_contents(
'assets', default_image_path, binary_mode=True)
user_model.profile_picture_data_url = (
utils.convert_png_binary_to_data_url(raw_image_png))

# Here we need to check for the default image again because there is a
# possibility that in the above check we are not able to generate the
# gravatar for the user having default image and we want to keep track
# of all the default images.
imgdata = utils.convert_png_data_url_to_binary(
user_model.profile_picture_data_url)
height, width = image_services.get_image_dimensions(imgdata)
if (
user_model.profile_picture_data_url ==
user_services.DEFAULT_IDENTICON_DATA_URL or (
width == 76 and height == 76)
):
return (user_model, False)

return (user_model, True)

def run(self) -> beam.PCollection[job_run_result.JobRunResult]:
fixed_user_profile_picture = (
self.pipeline
| 'Get all non-deleted UserSettingsModel' >> ndb_io.GetModels(
user_models.UserSettingsModel.get_all(include_deleted=False))
| 'Filter user with valid usernames' >> beam.Filter(
lambda model: model.username is not None)
| 'Get invalid images' >> beam.Map(self._fix_invalid_images)
)

default_profile_picture = (
fixed_user_profile_picture
| 'Filter default profile pictures' >> beam.Filter(
lambda value: value[1] is False)
| 'Total count for user models having default profile picture' >> (
job_result_transforms.CountObjectsToJobRunResult(
'DEFAULT PROFILE PICTURE'))
)

count_user_models_iterated = (
fixed_user_profile_picture
| 'Total count for user models' >> (
job_result_transforms.CountObjectsToJobRunResult(
'USER MODELS ITERATED'))
)

unused_put_results = (
fixed_user_profile_picture
| 'Map with only models' >> beam.Map(lambda value: value[0])
| 'Put models into the datastore' >> ndb_io.PutModels()
)

return (
(
count_user_models_iterated,
default_profile_picture
)
| 'Combine results' >> beam.Flatten()
)
Loading

0 comments on commit 839ab02

Please sign in to comment.