Skip to content

Commit

Permalink
Fix part of oppia#3047: Collection contents migration (oppia#3209)
Browse files Browse the repository at this point in the history
* create collection_content

* Add collection migration job

* Fix some bugs

* fix lint errors

* fix frontend tests

* Add backend test checking that schema migration methods exist

* Fix test

* fix some typos

* respond to review comments

* fix backend test

* respond to review comments

* yield job status in collection migration job

* Edit error message

* fix test

* respond to review comments

* put error message back
  • Loading branch information
wxyxinyu authored and seanlip committed Mar 28, 2017
1 parent af1d5d8 commit 8530045
Show file tree
Hide file tree
Showing 9 changed files with 383 additions and 26 deletions.
36 changes: 35 additions & 1 deletion core/domain/collection_domain.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ def __init__(self, collection_id, title, category, objective,
function will need to be added to this class to convert from the
current schema version to the new one. This function should be called
in both from_yaml in this class and
collection_services._migrate_collection_to_latest_schema.
collection_services._migrate_collection_contents_to_latest_schema.
feconf.CURRENT_COLLECTION_SCHEMA_VERSION should be incremented and the
new value should be saved in the collection after the migration
process, ensuring it represents the latest schema version.
Expand Down Expand Up @@ -380,6 +380,40 @@ def from_yaml(cls, collection_id, yaml_content):
collection_dict['id'] = collection_id
return Collection.from_dict(collection_dict)

@classmethod
def _convert_collection_contents_v1_dict_to_v2_dict(
cls, collection_contents):
"""Converts from version 1 to 2. Does nothing since this migration only
changes the language code.
"""
return collection_contents

@classmethod
def update_collection_contents_from_model(
cls, versioned_collection_contents, current_version):
"""Converts the states blob contained in the given
versioned_collection_contents dict from current_version to
current_version + 1.
Note that the versioned_collection_contents being passed in is modified
in-place.
"""
if (versioned_collection_contents['schema_version'] + 1 >
feconf.CURRENT_COLLECTION_SCHEMA_VERSION):
raise Exception('Collection is version %d but current collection'
' schema version is %d' % (
versioned_collection_contents['schema_version'],
feconf.CURRENT_COLLECTION_SCHEMA_VERSION))

versioned_collection_contents['schema_version'] = (
current_version + 1)

conversion_fn = getattr(
cls, '_convert_collection_contents_v%s_dict_to_v%s_dict' % (
current_version, current_version + 1))
versioned_collection_contents['collection_contents'] = conversion_fn(
versioned_collection_contents['collection_contents'])

@property
def skills(self):
"""The skills of a collection are made up of all prerequisite and
Expand Down
41 changes: 41 additions & 0 deletions core/domain/collection_domain_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -573,6 +573,47 @@ def test_yaml_import_and_export(self):
collection_domain.Collection.from_yaml('collection3', None)


class SchemaMigrationMethodsUnitTests(test_utils.GenericTestBase):
"""Tests the presence of appropriate schema migration methods in the
Collection domain object class.
"""

def test_correct_collection_contents_schema_conversion_methods_exist(self):
"""Test that the right collection_contents schema conversion methods
exist.
"""
current_collection_schema_version = (
feconf.CURRENT_COLLECTION_SCHEMA_VERSION)
for version_num in range(1, current_collection_schema_version):
self.assertTrue(hasattr(
collection_domain.Collection,
'_convert_collection_contents_v%s_dict_to_v%s_dict' % (
version_num, version_num + 1)))

self.assertFalse(hasattr(
collection_domain.Collection,
'_convert_collection_contents_v%s_dict_to_v%s_dict' % (
current_collection_schema_version,
current_collection_schema_version + 1)))

def test_correct_collection_schema_conversion_methods_exist(self):
"""Test that the right collection schema conversion methods exist."""
current_collection_schema_version = (
feconf.CURRENT_COLLECTION_SCHEMA_VERSION)

for version_num in range(1, current_collection_schema_version):
self.assertTrue(hasattr(
collection_domain.Collection,
'_convert_v%s_dict_to_v%s_dict' % (
version_num, version_num + 1)))

self.assertFalse(hasattr(
collection_domain.Collection,
'_convert_v%s_dict_to_v%s_dict' % (
current_collection_schema_version,
current_collection_schema_version + 1)))


class SchemaMigrationUnitTests(test_utils.GenericTestBase):
"""Test migration methods for yaml content."""

Expand Down
84 changes: 84 additions & 0 deletions core/domain/collection_jobs_one_off.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
# coding: utf-8
#
# Copyright 2017 The Oppia Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""One-off jobs for collections."""

import logging

from core import jobs
from core.domain import collection_domain
from core.domain import collection_services
from core.platform import models
import feconf

(base_models, collection_models,) = models.Registry.import_models([
models.NAMES.base_model, models.NAMES.collection])


class CollectionMigrationJob(jobs.BaseMapReduceJobManager):
"""A reusable one-time job that may be used to migrate collection schema
versions. This job will load all existing collections from the data store
and immediately store them back into the data store. The loading process of
a collection in collection_services automatically performs schema updating.
This job persists that conversion work, keeping collections up-to-date and
improving the load time of new collections.
"""

_DELETED_KEY = 'collection_deleted'
_ERROR_KEY = 'validation_error'
_MIGRATED_KEY = 'collection_migrated'

@classmethod
def entity_classes_to_map_over(cls):
return [collection_models.CollectionModel]

@staticmethod
def map(item):
if item.deleted:
yield (CollectionMigrationJob._DELETED_KEY,
'Encountered deleted collection.')
return

# Note: the read will bring the collection up to the newest version.
collection = collection_services.get_collection_by_id(item.id)
try:
collection.validate()
except Exception as e:
logging.error(
'Collection %s failed validation: %s' % (item.id, e))
yield (CollectionMigrationJob._ERROR_KEY,
'Collection %s failed validation: %s' % (item.id, e))
return

# Write the new collection into the datastore if it's different from
# the old version.
if item.schema_version <= feconf.CURRENT_COLLECTION_SCHEMA_VERSION:
commit_cmds = [{
'cmd': collection_domain.CMD_MIGRATE_SCHEMA_TO_LATEST_VERSION,
'from_version': item.schema_version,
'to_version': str(
feconf.CURRENT_COLLECTION_SCHEMA_VERSION)
}]
collection_services.update_collection(
feconf.MIGRATION_BOT_USERNAME, item.id, commit_cmds,
'Update collection schema version to %d.' % (
feconf.CURRENT_COLLECTION_SCHEMA_VERSION))
yield (CollectionMigrationJob._MIGRATED_KEY,
'Collection successfully migrated.')

@staticmethod
def reduce(key, values):
yield (key, values)
152 changes: 152 additions & 0 deletions core/domain/collection_jobs_one_off_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
# coding: utf-8
#
# Copyright 2017 The Oppia Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Tests for Collection-related one-off jobs."""

from core.domain import collection_domain
from core.domain import collection_jobs_one_off
from core.domain import collection_services
from core.domain import rights_manager
from core.platform import models
from core.tests import test_utils
import feconf

(job_models, collection_models,) = models.Registry.import_models([
models.NAMES.job, models.NAMES.collection])
search_services = models.Registry.import_search_services()


class CollectionMigrationJobTest(test_utils.GenericTestBase):

ALBERT_EMAIL = 'albert@example.com'
ALBERT_NAME = 'albert'

COLLECTION_ID = 'collection_id'
EXP_ID = 'exp_id'

def setUp(self):
super(CollectionMigrationJobTest, self).setUp()

# Setup user who will own the test collections.
self.albert_id = self.get_user_id_from_email(self.ALBERT_EMAIL)
self.signup(self.ALBERT_EMAIL, self.ALBERT_NAME)
self.process_and_flush_pending_tasks()

def test_migration_job_does_not_convert_up_to_date_collection(self):
"""Tests that the collection migration job does not convert an
collection that is already the latest collection content schema version.
"""
# Create a new, collection that should not be affected by the
# job.
collection = collection_domain.Collection.create_default_collection(
self.COLLECTION_ID, 'A title', 'A Category', 'An Objective')
collection_services.save_new_collection(self.albert_id, collection)
self.assertEqual(
collection.schema_version,
feconf.CURRENT_COLLECTION_SCHEMA_VERSION)
yaml_before_migration = collection.to_yaml()

# Start migration job.
job_id = (
collection_jobs_one_off.CollectionMigrationJob.create_new())
collection_jobs_one_off.CollectionMigrationJob.enqueue(job_id)
self.process_and_flush_pending_tasks()

# Verify the collection is exactly the same after migration.
updated_collection = (
collection_services.get_collection_by_id(self.COLLECTION_ID))
self.assertEqual(
updated_collection.schema_version,
feconf.CURRENT_COLLECTION_SCHEMA_VERSION)
after_converted_yaml = updated_collection.to_yaml()
self.assertEqual(after_converted_yaml, yaml_before_migration)

def test_migration_job_skips_deleted_collection(self):
"""Tests that the collection migration job skips deleted collection
and does not attempt to migrate.
"""
collection = collection_domain.Collection.create_default_collection(
self.COLLECTION_ID, 'A title', 'A Category', 'An Objective')
collection_services.save_new_collection(self.albert_id, collection)

# Note: This creates a summary based on the upgraded model (which is
# fine). A summary is needed to delete the collection.
collection_services.create_collection_summary(
self.COLLECTION_ID, None)

# Delete the exploration before migration occurs.
collection_services.delete_collection(
self.albert_id, self.COLLECTION_ID)

# Ensure the exploration is deleted.
with self.assertRaisesRegexp(Exception, 'Entity .* not found'):
collection_services.get_collection_by_id(self.COLLECTION_ID)

# Start migration job on sample exploration.
job_id = (
collection_jobs_one_off.CollectionMigrationJob.create_new())
collection_jobs_one_off.CollectionMigrationJob.enqueue(job_id)

# This running without errors indicates the deleted collection is
# being ignored.
self.process_and_flush_pending_tasks()

# Ensure the exploration is still deleted.
with self.assertRaisesRegexp(Exception, 'Entity .* not found'):
collection_services.get_collection_by_id(self.COLLECTION_ID)

def test_migration_job_migrates_collection_nodes(self):
"""Tests that the collection migration job migrates content from
nodes to collection_contents if collection_contents is empty.
"""
# Create an exploration to put in the collection.
self.save_new_default_exploration(self.EXP_ID, self.albert_id)
node = collection_domain.CollectionNode.create_default_node(self.EXP_ID)

# Create a collection directly using the model, so that the collection
# nodes are stored in the 'nodes' property rather than the
# 'collection_contents' property.
collection_title = 'A title'
collection_category = 'A category'
rights_manager.create_new_collection_rights(
self.COLLECTION_ID, self.albert_id)
model = collection_models.CollectionModel(
id=self.COLLECTION_ID,
category=collection_category,
title=collection_title,
objective='An objective',
tags=[],
schema_version=1,
nodes=[node.to_dict()],
)
model.commit(self.albert_id, 'Made a new collection!', [{
'cmd': collection_services.CMD_CREATE_NEW,
'title': collection_title,
'category': collection_category,
}])

# Check that collection_contents is empty
self.assertEqual(model.collection_contents, {})

# Run the job. This should populate collection_contents.
job_id = (
collection_jobs_one_off.CollectionMigrationJob.create_new())
collection_jobs_one_off.CollectionMigrationJob.enqueue(job_id)
self.process_and_flush_pending_tasks()

new_model = collection_models.CollectionModel.get(self.COLLECTION_ID)
self.assertEqual(
new_model.collection_contents, {'nodes': [node.to_dict()]})
Loading

0 comments on commit 8530045

Please sign in to comment.