Fix classifier models when exploration is reverted and add test case …

…for auto migrations (oppia#5888) * Fix classifier training jobs when exploration is reverted. * test classifier verification test for auto migration of exploration. * Update test case * Addressed review comments * Add test case for exploration revert * Addressed review comments.
quantumferret · Jan 6, 2019 · becb800 · becb800
1 parent cd22391
commit becb800
Show file tree

Hide file tree

Showing 6 changed files with 300 additions and 4 deletions.
diff --git a/core/controllers/reader.py b/core/controllers/reader.py
@@ -271,8 +271,8 @@ def get(self, exploration_id):
         state_classifier_mapping = {}
         classifier_training_jobs = (
             classifier_services.get_classifier_training_jobs(
-                exploration_id, exploration.version, exploration.states))
-        for index, state_name in enumerate(exploration.states):
+                exploration_id, exploration.version, exploration.states.keys()))
+        for index, state_name in enumerate(exploration.states.keys()):
             if classifier_training_jobs[index] is not None:
                 classifier_data = classifier_training_jobs[
                     index].classifier_data

diff --git a/core/domain/classifier_services.py b/core/domain/classifier_services.py
@@ -443,3 +443,32 @@ def get_classifier_training_jobs(exp_id, exp_version, state_names):
         if mapping_model is None:
             classifier_training_jobs.insert(index, None)
     return classifier_training_jobs
+
+
+def create_classifier_training_job_for_reverted_exploration(
+        exploration, exploration_to_revert_to):
+    """Create classifier training job model when an exploration is reverted.
+
+    Args:
+        exploration: Exploration. Exploration domain object.
+        exploration_to_revert_to: Exploration. Exploration to revert to.
+    """
+    classifier_training_jobs_for_old_version = get_classifier_training_jobs(
+        exploration.id, exploration_to_revert_to.version,
+        exploration_to_revert_to.states.keys())
+    job_exploration_mappings = []
+    state_names = exploration_to_revert_to.states.keys()
+    for index, classifier_training_job in enumerate(
+            classifier_training_jobs_for_old_version):
+        if classifier_training_job is None:
+            continue
+        state_name = state_names[index]
+        job_exploration_mapping = (
+            classifier_domain.TrainingJobExplorationMapping(
+                exploration.id, exploration.version + 1, state_name,
+                classifier_training_job.job_id))
+        job_exploration_mapping.validate()
+        job_exploration_mappings.append(job_exploration_mapping)
+
+    classifier_models.TrainingJobExplorationMappingModel.create_multi(
+        job_exploration_mappings)
diff --git a/core/domain/exp_jobs_one_off_test.py b/core/domain/exp_jobs_one_off_test.py
@@ -16,6 +16,7 @@
 
 """Tests for Exploration-related jobs."""
 
+import datetime
 import json
 import os
 
@@ -32,8 +33,8 @@
 import feconf
 import utils
 
-(job_models, exp_models,) = models.Registry.import_models([
-    models.NAMES.job, models.NAMES.exploration])
+(job_models, exp_models, classifier_models) = models.Registry.import_models([
+    models.NAMES.job, models.NAMES.exploration, models.NAMES.classifier])
 search_services = models.Registry.import_search_services()
 
 
@@ -668,6 +669,42 @@ def test_migration_job_skips_deleted_explorations(self):
         with self.assertRaisesRegexp(Exception, 'Entity .* not found'):
             exp_services.get_exploration_by_id(self.NEW_EXP_ID)
 
+    def test_migration_job_creates_appropriate_classifier_models(self):
+        """Tests that the exploration migration job creates appropriate
+        classifier data models for explorations.
+        """
+        self.save_new_exp_with_states_schema_v21(
+            self.NEW_EXP_ID, self.albert_id, self.EXP_TITLE)
+        exploration = exp_services.get_exploration_by_id(self.NEW_EXP_ID)
+
+        initial_state_name = exploration.states.keys()[0]
+        # Store classifier model for the new exploration.
+        classifier_model_id = classifier_models.ClassifierTrainingJobModel.create( # pylint: disable=line-too-long
+            'TextClassifier', 'TextInput', self.NEW_EXP_ID, exploration.version,
+            datetime.datetime.utcnow(), {}, initial_state_name,
+            feconf.TRAINING_JOB_STATUS_COMPLETE, None, 1)
+        # Store training job model for the classifier model.
+        classifier_models.TrainingJobExplorationMappingModel.create(
+            self.NEW_EXP_ID, exploration.version, initial_state_name,
+            classifier_model_id)
+
+        # Start migration job on sample exploration.
+        job_id = exp_jobs_one_off.ExplorationMigrationJobManager.create_new()
+        exp_jobs_one_off.ExplorationMigrationJobManager.enqueue(job_id)
+        with self.swap(feconf, 'ENABLE_ML_CLASSIFIERS', True):
+            with self.swap(feconf, 'MIN_TOTAL_TRAINING_EXAMPLES', 2):
+                with self.swap(feconf, 'MIN_ASSIGNED_LABELS', 1):
+                    self.process_and_flush_pending_tasks()
+
+        new_exploration = exp_services.get_exploration_by_id(self.NEW_EXP_ID)
+        initial_state_name = new_exploration.states.keys()[0]
+        self.assertLess(exploration.version, new_exploration.version)
+        classifier_exp_mapping_model = classifier_models.TrainingJobExplorationMappingModel.get_models( # pylint: disable=line-too-long
+            self.NEW_EXP_ID, new_exploration.version,
+            [initial_state_name])[0]
+        self.assertEqual(
+            classifier_exp_mapping_model.job_id, classifier_model_id)
+
 
 class ExplorationStateIdMappingJobTest(test_utils.GenericTestBase):
     """Tests for the ExplorationStateIdMapping one off job."""

diff --git a/core/domain/exp_services.py b/core/domain/exp_services.py
@@ -1412,6 +1412,12 @@ def revert_exploration(
         current_exploration, exp_versions_diff=None,
         revert_to_version=revert_to_version)
 
+    if feconf.ENABLE_ML_CLASSIFIERS:
+        exploration_to_revert_to = get_exploration_by_id(
+            exploration_id, version=revert_to_version)
+        classifier_services.create_classifier_training_job_for_reverted_exploration( # pylint: disable=line-too-long
+            current_exploration, exploration_to_revert_to)
+
     # Save state id mapping model for the new exploration version.
     create_and_save_state_id_mapping_model_for_reverted_exploration(
         exploration_id, current_version, revert_to_version)

diff --git a/core/domain/exp_services_test.py b/core/domain/exp_services_test.py
@@ -22,6 +22,7 @@
 import os
 import zipfile
 
+from core.domain import classifier_services
 from core.domain import exp_domain
 from core.domain import exp_jobs_one_off
 from core.domain import exp_services
@@ -97,6 +98,94 @@ def setUp(self):
         self.user_id_admin = self.get_user_id_from_email(self.ADMIN_EMAIL)
 
 
+class ExplorationRevertClassifierTests(ExplorationServicesUnitTests):
+    """Test that classifier models are correctly mapped when an exploration
+    is reverted.
+    """
+
+    def test_reverting_an_exploration_maintains_classifier_models(self):
+        """Test that when exploration is reverted to previous version
+        it maintains appropriate classifier models mapping.
+        """
+        with self.swap(feconf, 'ENABLE_ML_CLASSIFIERS', True):
+            self.save_new_valid_exploration(
+                self.EXP_ID, self.owner_id, title='Bridges in England',
+                category='Architecture', language_code='en')
+
+        interaction_answer_groups = [{
+            'rule_specs': [{
+                'rule_type': 'Equals',
+                'inputs': {'x': 'abc'},
+            }],
+            'outcome': {
+                'dest': feconf.DEFAULT_INIT_STATE_NAME,
+                'feedback': {
+                    'content_id': 'feedback_1',
+                    'html': 'Try again'
+                },
+                'labelled_as_correct': False,
+                'param_changes': [],
+                'refresher_exploration_id': None,
+                'missing_prerequisite_skill_id': None
+            },
+            'training_data': ['answer1', 'answer2', 'answer3'],
+            'tagged_misconception_id': None
+        }]
+
+        change_list = [exp_domain.ExplorationChange({
+            'cmd': exp_domain.CMD_EDIT_STATE_PROPERTY,
+            'state_name': feconf.DEFAULT_INIT_STATE_NAME,
+            'property_name': (
+                exp_domain.STATE_PROPERTY_INTERACTION_ANSWER_GROUPS),
+            'new_value': interaction_answer_groups
+        }), exp_domain.ExplorationChange({
+            'cmd': exp_domain.CMD_EDIT_STATE_PROPERTY,
+            'state_name': feconf.DEFAULT_INIT_STATE_NAME,
+            'property_name': (
+                exp_domain.STATE_PROPERTY_CONTENT_IDS_TO_AUDIO_TRANSLATIONS),
+            'new_value': {
+                'content': {},
+                'feedback_1': {},
+                'default_outcome': {}
+            }
+        })]
+
+        with self.swap(feconf, 'ENABLE_ML_CLASSIFIERS', True):
+            with self.swap(feconf, 'MIN_TOTAL_TRAINING_EXAMPLES', 2):
+                with self.swap(feconf, 'MIN_ASSIGNED_LABELS', 1):
+                    exp_services.update_exploration(
+                        self.owner_id, self.EXP_ID, change_list, '')
+
+        exp = exp_services.get_exploration_by_id(self.EXP_ID)
+        job = classifier_services.get_classifier_training_jobs(
+            self.EXP_ID, exp.version, [feconf.DEFAULT_INIT_STATE_NAME])[0]
+        self.assertIsNotNone(job)
+
+        change_list = [exp_domain.ExplorationChange({
+            'cmd': exp_domain.CMD_EDIT_EXPLORATION_PROPERTY,
+            'property_name': 'title',
+            'new_value': 'A new title'
+        })]
+
+        with self.swap(feconf, 'ENABLE_ML_CLASSIFIERS', True):
+            with self.swap(feconf, 'MIN_TOTAL_TRAINING_EXAMPLES', 2):
+                with self.swap(feconf, 'MIN_ASSIGNED_LABELS', 1):
+                    exp_services.update_exploration(
+                        self.owner_id, self.EXP_ID, change_list, '')
+
+                    exp = exp_services.get_exploration_by_id(self.EXP_ID)
+                    # Revert exploration to previous version.
+                    exp_services.revert_exploration(
+                        self.owner_id, self.EXP_ID, exp.version,
+                        exp.version - 1)
+
+        exp = exp_services.get_exploration_by_id(self.EXP_ID)
+        new_job = classifier_services.get_classifier_training_jobs(
+            self.EXP_ID, exp.version, [feconf.DEFAULT_INIT_STATE_NAME])[0]
+        self.assertIsNotNone(new_job)
+        self.assertEqual(job.job_id, new_job.job_id)
+
+
 class ExplorationQueriesUnitTests(ExplorationServicesUnitTests):
     """Tests query methods."""
 

diff --git a/core/tests/test_utils.py b/core/tests/test_utils.py
@@ -225,6 +225,75 @@ class TestBase(unittest.TestCase):
         'classifier_model_id': None
     }
 
+    VERSION_21_STATE_DICT = {
+        'END': {
+            'classifier_model_id': None,
+            'content': {
+                'content_id': 'content',
+                'html': 'Congratulations, you have finished!'},
+            'content_ids_to_audio_translations': {
+                'content': {}},
+            'interaction': {
+                'answer_groups': [],
+                'confirmed_unclassified_answers': [],
+                'customization_args': {
+                    'recommendedExplorationIds': {'value': []}},
+                'default_outcome': None,
+                'hints': [],
+                'id': 'EndExploration',
+                'solution': None
+            },
+            'param_changes': []
+        },
+        'Introduction': {
+            'classifier_model_id': None,
+            'content': {
+                'content_id': 'content',
+                'html': ''
+            },
+            'content_ids_to_audio_translations': {
+                'content': {},
+                'default_outcome': {},
+                'feedback_1': {}
+            },
+            'interaction': {
+                'answer_groups': [{
+                    'outcome': {
+                        'dest': 'END',
+                        'feedback': {
+                            'content_id': 'feedback_1',
+                            'html': 'Correct!'},
+                        'labelled_as_correct': False,
+                        'missing_prerequisite_skill_id': None,
+                        'param_changes': [],
+                        'refresher_exploration_id': None},
+                    'rule_specs': [{
+                        'inputs': {'x': 'InputString'},
+                        'rule_type': 'Equals'}],
+                    'tagged_misconception_id': None,
+                    'training_data': ['answer1', 'answer2', 'answer3']}],
+                'confirmed_unclassified_answers': [],
+                'customization_args': {
+                    'placeholder': {'value': ''},
+                    'rows': {'value': 1}},
+                'default_outcome': {
+                    'dest': 'Introduction',
+                    'feedback': {
+                        'content_id': 'default_outcome',
+                        'html': ''},
+                    'labelled_as_correct': False,
+                    'missing_prerequisite_skill_id': None,
+                    'param_changes': [],
+                    'refresher_exploration_id': None},
+                'hints': [],
+                'id': 'TextInput',
+                'solution': None
+            },
+            'param_changes': []
+        }
+    }
+
+
     VERSION_1_STORY_CONTENTS_DICT = {
         'nodes': [{
             'outline': u'',
@@ -930,6 +999,72 @@ def save_new_exp_with_states_schema_v0(self, exp_id, user_id, title):
         exploration = exp_services.get_exploration_from_model(exp_model)
         exp_services.create_and_save_state_id_mapping_model(exploration, [])
 
+    def save_new_exp_with_states_schema_v21(self, exp_id, user_id, title):
+        """Saves a new default exploration with a default version 21 states
+        dictionary. Version 21 is where training data of exploration is stored
+        with the states dict.
+
+        This function should only be used for creating explorations in tests
+        involving migration of datastore explorations that use an old states
+        schema version.
+
+        Note that it makes an explicit commit to the datastore instead of using
+        the usual functions for updating and creating explorations. This is
+        because the latter approach would result in an exploration with the
+        *current* states schema version.
+
+        Args:
+            exp_id: str. The exploration ID.
+            user_id: str. The user_id of the creator.
+            title: str. The title of the exploration.
+        """
+        exp_model = exp_models.ExplorationModel(
+            id=exp_id,
+            category='category',
+            title=title,
+            objective='Old objective',
+            language_code='en',
+            tags=[],
+            blurb='',
+            author_notes='',
+            states_schema_version=21,
+            init_state_name=feconf.DEFAULT_INIT_STATE_NAME,
+            states=self.VERSION_21_STATE_DICT,
+            param_specs={},
+            param_changes=[]
+        )
+        rights_manager.create_new_exploration_rights(exp_id, user_id)
+
+        commit_message = 'New exploration created with title \'%s\'.' % title
+        exp_model.commit(
+            user_id, commit_message, [{
+                'cmd': 'create_new',
+                'title': 'title',
+                'category': 'category',
+            }])
+        exp_rights = exp_models.ExplorationRightsModel.get_by_id(exp_id)
+        exp_summary_model = exp_models.ExpSummaryModel(
+            id=exp_id,
+            title=title,
+            category='category',
+            objective='Old objective',
+            language_code='en',
+            tags=[],
+            ratings=feconf.get_empty_ratings(),
+            scaled_average_rating=feconf.EMPTY_SCALED_AVERAGE_RATING,
+            status=exp_rights.status,
+            community_owned=exp_rights.community_owned,
+            owner_ids=exp_rights.owner_ids,
+            contributor_ids=[],
+            contributors_summary={},
+        )
+        exp_summary_model.put()
+
+        # Note: Also save state id mappping model for new exploration. If not
+        # saved, it may cause errors in test cases.
+        exploration = exp_services.get_exploration_from_model(exp_model)
+        exp_services.create_and_save_state_id_mapping_model(exploration, [])
+
     def publish_exploration(self, owner_id, exploration_id):
         """Publish the exploration with the given exploration_id.