Address review comments.

anna-gavrilova · Mar 29, 2017 · 0483cec · 0483cec
1 parent 85f21f1
commit 0483cec
Show file tree

Hide file tree

Showing 22 changed files with 584 additions and 629 deletions.
diff --git a/core/controllers/editor.py b/core/controllers/editor.py
@@ -585,7 +585,7 @@ def get(self, exploration_id, escaped_state_name):
         # The total number of possible answers is 100 because it requests the
         # top 50 answers matched to the default rule and the top 50 answers
         # matched to the classifier individually.
-        answers = stats_services.get_top_state_rule_answers(
+        submitted_answers = stats_services.get_top_state_rule_answers(
             exploration_id, state_name, [
                 exp_domain.DEFAULT_OUTCOME_CLASSIFICATION,
                 exp_domain.TRAINING_DATA_CLASSIFICATION])[
@@ -600,7 +600,7 @@ def get(self, exploration_id, escaped_state_name):
 
             try:
                 # Normalize the answers.
-                for answer in answers:
+                for answer in submitted_answers:
                     answer['answer'] = interaction_instance.normalize_answer(
                         answer['answer'])
 
@@ -622,7 +622,7 @@ def get(self, exploration_id, escaped_state_name):
                     in interaction.confirmed_unclassified_answers))
 
                 unhandled_answers = [
-                    answer for answer in answers
+                    answer for answer in submitted_answers
                     if answer['answer'] not in trained_answers
                 ]
             except Exception as e:

diff --git a/core/controllers/editor_test.py b/core/controllers/editor_test.py
@@ -213,13 +213,14 @@ def _create_training_data(*arg):
                 return [_create_answer(value) for value in arg]
 
             def _submit_answer(
-                    exp_id, state_name, answer_group_index, rule_spec_index,
-                    classification_categorization, answer, exp_version=1,
-                    session_id='dummy_session_id', time_spent_in_secs=0.0,
-                    params={}):
+                    exp_id, state_name, interaction_id, answer_group_index,
+                    rule_spec_index, classification_categorization, answer,
+                    exp_version=1, session_id='dummy_session_id',
+                    time_spent_in_secs=0.0, params={}):
                 event_services.AnswerSubmissionEventHandler.record(
-                    exp_id, exp_version, state_name, answer_group_index,
-                    rule_spec_index, classification_categorization, session_id,
+                    exp_id, exp_version, state_name, interaction_id,
+                    answer_group_index, rule_spec_index,
+                    classification_categorization, session_id,
                     time_spent_in_secs, params, answer)
 
             # Load the string classifier demo exploration.
@@ -248,24 +249,24 @@ def _submit_answer(
 
             # Input happy since there is an explicit rule checking for that.
             _submit_answer(
-                exp_id, state_name, 0, 0, exp_domain.EXPLICIT_CLASSIFICATION,
-                'happy')
+                exp_id, state_name, 'TextInput', 0, 0,
+                exp_domain.EXPLICIT_CLASSIFICATION, 'happy')
 
             # Input text not at all similar to happy (default outcome).
             _submit_answer(
-                exp_id, state_name, 2, 0,
+                exp_id, state_name, 'TextInput', 2, 0,
                 exp_domain.DEFAULT_OUTCOME_CLASSIFICATION, 'sad')
 
             # Input cheerful: this is current training data and falls under the
             # classifier.
             _submit_answer(
-                exp_id, state_name, 1, 0,
+                exp_id, state_name, 'TextInput', 1, 0,
                 exp_domain.TRAINING_DATA_CLASSIFICATION, 'cheerful')
 
             # Input joyful: this is not training data but it will later be
             # classified under the classifier.
             _submit_answer(
-                exp_id, state_name, 2, 0,
+                exp_id, state_name, 'TextInput', 2, 0,
                 exp_domain.DEFAULT_OUTCOME_CLASSIFICATION, 'joyful')
 
             # Perform answer summarization on the summarized answers.

diff --git a/core/controllers/reader.py b/core/controllers/reader.py
@@ -380,14 +380,11 @@ def post(self, exploration_id):
 
         normalized_answer = old_interaction_instance.normalize_answer(answer)
 
-        # Don't persist the parameter-stored answer value.
-        if 'answer' in params:
-            del params['answer']
-
         event_services.AnswerSubmissionEventHandler.record(
-            exploration_id, version, old_state_name, answer_group_index,
-            rule_spec_index, classification_categorization, session_id,
-            client_time_spent_in_secs, params, normalized_answer)
+            exploration_id, version, old_state_name,
+            exploration.states[old_state_name].interaction.id,
+            answer_group_index, rule_spec_index, classification_categorization,
+            session_id, client_time_spent_in_secs, params, normalized_answer)
         self.render_json({})
 
 

diff --git a/core/domain/event_services.py b/core/domain/event_services.py
@@ -77,18 +77,17 @@ def _notify_continuous_computation_listeners_async(cls, *args, **kwargs):
     @classmethod
     def _handle_event(
             cls, exploration_id, exploration_version, state_name,
-            answer_group_index, rule_spec_index, classification_categorization,
-            session_id, time_spent_in_secs, params, normalized_answer):
+            interaction_id, answer_group_index, rule_spec_index,
+            classification_categorization, session_id, time_spent_in_secs,
+            params, normalized_answer):
         """Records an event when an answer triggers a rule. The answer recorded
         here is a Python-representation of the actual answer submitted by the
         user.
         """
         # TODO(sll): Escape these args?
-        exploration = exp_services.get_exploration_by_id(
-            exploration_id, version=exploration_version)
-        interaction_id = exploration.states[state_name].interaction.id
         stats_services.record_answer(
-            exploration, state_name, stats_domain.SubmittedAnswer(
+            exploration_id, exploration_version, state_name, interaction_id,
+            stats_domain.SubmittedAnswer(
                 normalized_answer, interaction_id, answer_group_index,
                 rule_spec_index, classification_categorization, params,
                 session_id, time_spent_in_secs))

diff --git a/core/domain/exp_domain.py b/core/domain/exp_domain.py
@@ -127,7 +127,8 @@ def _validate_customization_args_and_values(
     validation.
 
     Note that this may modify the given customization_args dict, if it has
-    extra or missing keys.
+    extra or missing keys. It also normalizes any HTML in the customization_args
+    dict.
     """
     ca_spec_names = [
         ca_spec.name for ca_spec in ca_specs_to_validate_against]
@@ -160,9 +161,10 @@ def _validate_customization_args_and_values(
     # Check that each value has the correct type.
     for ca_spec in ca_specs_to_validate_against:
         try:
-            schema_utils.normalize_against_schema(
-                customization_args[ca_spec.name]['value'],
-                ca_spec.schema)
+            customization_args[ca_spec.name]['value'] = (
+                schema_utils.normalize_against_schema(
+                    customization_args[ca_spec.name]['value'],
+                    ca_spec.schema))
         except Exception:
             # TODO(sll): Raise an actual exception here if parameters are not
             # involved. (If they are, can we get sample values for the state
@@ -2223,6 +2225,8 @@ def _convert_states_v6_dict_to_v7_dict(cls, states_dict):
 
         return states_dict
 
+    # TODO(bhenning): Remove pre_v4_states_conversion_func when the answer
+    # migration is completed.
     @classmethod
     def update_states_from_model(
             cls, versioned_exploration_states, current_states_schema_version,
@@ -2231,6 +2235,11 @@ def update_states_from_model(
         versioned_exploration_states dict from current_states_schema_version to
         current_states_schema_version + 1.
 
+        If pre_v4_states_conversion_func is provided, it will be called in the
+        migration process before migrating from v3 to v4. It is provided a
+        states dict and will overwrite the migrated states dict with the dict it
+        returns.
+
         Note that the versioned_exploration_states being passed in is modified
         in-place.
         """

diff --git a/core/domain/exp_services.py b/core/domain/exp_services.py
@@ -715,16 +715,15 @@ def delete_exploration(committer_id, exploration_id, force_deletion=False):
 
 
 # Operations on exploration snapshots.
-def get_exploration_snapshots_metadata(
-        exploration_id, max_version=None, allow_deleted=False):
+def get_exploration_snapshots_metadata(exploration_id, allow_deleted=False):
     """Returns the snapshots for this exploration, as dicts. If a version is
     specified, only the snapshots up to that version (inclusively) will be
     returned. Otherwise, the snapshots for all versions of the exploration will
     be returned.
 
     Args:
         exploration_id: str. The id of the exploration in question.
-        max_version: number. The last version that should be returned within the
+        max_version: int. The last version that should be returned within the
             retrieved snapshots list.
         allow_deleted: bool. Whether to allow retrieval of deleted snapshots.
 
@@ -735,10 +734,9 @@ def get_exploration_snapshots_metadata(
         in ascending order. There are exploration.version_number items in the
         returned list.
     """
-    if not max_version:
-        exploration = get_exploration_by_id(exploration_id)
-        max_version = exploration.version
-    version_nums = range(1, max_version + 1)
+    exploration = get_exploration_by_id(exploration_id)
+    current_version = exploration.version
+    version_nums = range(1, current_version + 1)
 
     return exp_models.ExplorationModel.get_snapshots_metadata(
         exploration_id, version_nums, allow_deleted=allow_deleted)

diff --git a/core/domain/stats_domain.py b/core/domain/stats_domain.py
@@ -16,7 +16,7 @@
 
 """Domain object for statistics models."""
 
-import json
+import number
 import sys
 import utils
 
@@ -37,22 +37,32 @@
 # NOTE TO DEVELOPERS: All other state answer data model entities must not ever
 # store this session ID unless it was created by the AnswerMigrationJob. Also,
 # this string must never change.
-MIGRATED_STATE_ANSWER_SESSION_ID = 'migrated_state_answer_session_id'
+MIGRATED_STATE_ANSWER_SESSION_ID = 'migrated_state_answer_session_id_2017'
 MIGRATED_STATE_ANSWER_TIME_SPENT_IN_SEC = 0.0
 
 
+# TODO(bhenning): Monitor sizes (lengths of submitted_answer_list) of these
+# objects and determine if we should enforce an upper bound for
+# submitted_answer_list.
 class StateAnswers(object):
     """Domain object containing answers submitted to an exploration state."""
 
     def __init__(self, exploration_id, exploration_version, state_name,
                  interaction_id, submitted_answer_list,
                  schema_version=feconf.CURRENT_STATE_ANSWERS_SCHEMA_VERSION):
-        """Initialize domain object for state answers.
-
-        interaction_id contains the interaction type of the state, e.g.
-        multiple choice.
-
-        submitted_answer_list contains a list of SubmittedAnswer objects.
+        """Constructs a StateAnswers domain object.
+
+        Args:
+            exploration_id. The ID of the exploration corresponding to submitted
+                answers.
+            exploration_version. The version of the exploration corresponding to
+                submitted answers.
+            state_name. The state to which the answers were submitted.
+            interaction_id. The ID of the interaction which created the answers.
+            submitted_answer_list. The list of SubmittedAnswer domain objects
+                that were submitted to the exploration and version specified in
+                this object.
+            schema_version. The schema version of this answers object.
         """
         self.exploration_id = exploration_id
         self.exploration_version = exploration_version
@@ -114,23 +124,24 @@ def validate(self):
                     feconf.CURRENT_STATE_ANSWERS_SCHEMA_VERSION,
                     self.schema_version))
 
-        for submitted_answer in self.submitted_answer_list:
-            submitted_answer.validate()
-
 
 class SubmittedAnswer(object):
     """Domain object representing an answer submitted to a state."""
 
     # NOTE TO DEVELOPERS: do not use the rule_spec_str and answer_str
-    # parameters, as they are here as part of the answer migration.
+    # parameters; they are only populated by the answer migration job. They only
+    # represent context that is lost as part of the answer migration and are
+    # used as part of validating the migration was correct. They may be
+    # referenced in future migration or mapreduce jobs, or they may be removed
+    # without warning or migration.
 
     # TODO(bhenning): Remove large_bucket_entity_id once the answer migration is
     # completed in production.
-    def __init__(self, normalized_answer, interaction_id, answer_group_index,
+    def __init__(self, answer, interaction_id, answer_group_index,
                  rule_spec_index, classification_categorization, params,
                  session_id, time_spent_in_sec, rule_spec_str=None,
                  answer_str=None, large_bucket_entity_id=None):
-        self.normalized_answer = normalized_answer
+        self.answer = answer
         self.interaction_id = interaction_id
         self.answer_group_index = answer_group_index
         self.rule_spec_index = rule_spec_index
@@ -142,25 +153,16 @@ def __init__(self, normalized_answer, interaction_id, answer_group_index,
         self.answer_str = answer_str
         self.large_bucket_entity_id = large_bucket_entity_id
 
-    @property
-    def json_size(self):
-        """Returns the size of the dict returned by to_dict(). This may not be
-        exactly the size of the returned dict, since it assumes the property in
-        the dict for the json_size is sys.maxint.
-        """
-        return sys.getsizeof(json.dumps(self.to_dict(json_size=sys.maxint)))
-
-    def to_dict(self, json_size=None):
+    def to_dict(self):
         submitted_answer_dict = {
-            'answer': self.normalized_answer,
+            'answer': self.answer,
             'interaction_id': self.interaction_id,
             'answer_group_index': self.answer_group_index,
             'rule_spec_index': self.rule_spec_index,
             'classification_categorization': self.classification_categorization,
             'params': self.params,
             'session_id': self.session_id,
             'time_spent_in_sec': self.time_spent_in_sec,
-            'json_size': self.json_size if not json_size else json_size,
         }
         if self.rule_spec_str is not None:
             submitted_answer_dict['rule_spec_str'] = self.rule_spec_str
@@ -215,9 +217,9 @@ def validate(self):
                 'Expected session_id to be a string, received %s' %
                 str(self.session_id))
 
-        if not isinstance(self.time_spent_in_sec, float):
+        if not isinstance(self.time_spent_in_sec, number.Number):
             raise utils.ValidationError(
-                'Expected time_spent_in_sec to be a float, received %s' %
+                'Expected time_spent_in_sec to be a number, received %s' %
                 str(self.time_spent_in_sec))
 
         if not isinstance(self.params, dict):
@@ -234,11 +236,6 @@ def validate(self):
                 'Expected rule_spec_index to be an integer, received %s' %
                 str(self.rule_spec_index))
 
-        if not isinstance(self.classification_categorization, basestring):
-            raise utils.ValidationError(
-                'Expected classification_categorization to be a string, '
-                'received %s' % str(self.classification_categorization))
-
         if self.answer_group_index < 0:
             raise utils.ValidationError(
                 'Expected answer_group_index to be non-negative, received %d' %
@@ -254,10 +251,10 @@ def validate(self):
                 'Expected time_spent_in_sec to be non-negative, received %f' %
                 self.time_spent_in_sec)
 
-        if self.normalized_answer is None and self.interaction_id != 'Continue':
+        if self.answer is None and self.interaction_id != 'Continue':
             raise utils.ValidationError(
-                'SubmittedAnswers must have a provided normalized_answer '
-                'except for Continue interactions')
+                'SubmittedAnswers must have a provided answer except for '
+                'Continue interactions')
 
         valid_classification_categories = [
             exp_domain.EXPLICIT_CLASSIFICATION,