Use get_multi() for retrieving state answer logs, to speed up loading.

engaboda · Nov 23, 2013 · aa0c6fa · aa0c6fa
1 parent 03badf3
commit aa0c6fa
Showing 4 changed files with 121 additions and 25 deletions.
diff --git a/core/domain/stats_domain.py b/core/domain/stats_domain.py
@@ -100,12 +100,29 @@ def total_answer_count(self):
             total_count += count
         return total_count
 
+    @classmethod
+    def get_multi(cls, exploration_id, rule_data):
+        """Gets domain objects corresponding to the given rule data.
+
+        Args:
+            exploration_id: the exploration id
+            rule_data: a list of dicts, each with the following keys:
+                (state_id, handler_name, rule_str).
+        """
+        # TODO(sll): Should each rule_str be unicode instead?
+        answer_log_models = stats_models.StateRuleAnswerLogModel.get_or_create_multi(
+            exploration_id, rule_data)
+        return [cls(answer_log_model.answers)
+                for answer_log_model in answer_log_models]
+
     @classmethod
     def get(cls, exploration_id, state_id, handler_name, rule_str):
-        # TODO(sll): Should rule_str be unicode instead?
-        answer_log_model = stats_models.StateRuleAnswerLogModel.get_or_create(
-            exploration_id, state_id, handler_name, rule_str)
-        return cls(answer_log_model.answers)
+        # TODO(sll): Deprecate this method.
+        return cls.get_multi(exploration_id, [{
+            'state_id': state_id,
+            'handler_name': handler_name,
+            'rule_str': rule_str
+        }])[0]
 
     def get_top_answers(self, N):
         """Returns the top N answers.

diff --git a/core/domain/stats_services.py b/core/domain/stats_services.py
@@ -79,6 +79,39 @@ def get_exploration_completed_count(exploration_id):
         exploration_id, feconf.END_DEST).first_entry_count
 
 
+def _get_state_rule_stats(exploration_id, state_id):
+    """Gets statistics for the handlers and rules of this state.
+
+    Returns:
+        A dict, keyed by the string '{HANDLER_NAME}.{RULE_STR}', whose
+        values are the corresponding stats_domain.StateRuleAnswerLog
+        instances.
+    """
+    state = exp_services.get_state_by_id(exploration_id, state_id)
+
+    rule_keys = []
+    for handler in state.widget.handlers:
+        for rule in handler.rule_specs:
+            rule_keys.append((handler.name, str(rule)))
+
+    answer_logs = stats_domain.StateRuleAnswerLog.get_multi(
+        exploration_id, [{
+            'state_id': state.id,
+            'handler_name': rule_key[0],
+            'rule_str': rule_key[1]
+        } for rule_key in rule_keys]
+    )
+
+    results = {}
+    for ind, answer_log in enumerate(answer_logs):
+        results['.'.join(rule_keys[ind])] = {
+            'answers': answer_log.get_top_answers(10),
+            'rule_hits': answer_log.total_answer_count
+        }
+
+    return results
+
+
 def get_state_stats_for_exploration(exploration_id):
     """Returns a dict with state statistics for the given exploration id."""
     exploration = exp_services.get_exploration_by_id(exploration_id)
@@ -91,18 +124,12 @@ def get_state_stats_for_exploration(exploration_id):
         first_entry_count = state_counts.first_entry_count
         total_entry_count = state_counts.total_entry_count
 
-        state = exp_services.get_state_by_id(exploration_id, state_id)
-
-        rule_stats = {}
-        for handler in state.widget.handlers:
-            for rule in handler.rule_specs:
-                answer_log = stats_domain.StateRuleAnswerLog.get(
-                    exploration_id, state.id, SUBMIT_HANDLER_NAME, str(rule))
-                rule_stats['.'.join([SUBMIT_HANDLER_NAME, str(rule)])] = {
-                    'answers': answer_log.get_top_answers(10),
-                    'rule_hits': answer_log.total_answer_count
-                }
+        # TODO(sll): Do not compute this here. Only display rule stats when the
+        # editor zooms into a particular state, rather than doing this all at
+        # the start of the exploration.
+        rule_stats = _get_state_rule_stats(exploration_id, state_id)
 
+        state = exp_services.get_state_by_id(exploration_id, state_id)
         state_stats[state_id] = {
             'name': state.name,
             'firstEntryCount': first_entry_count,
@@ -128,12 +155,17 @@ def get_top_improvable_states(exploration_ids, N):
     ranked_states = []
     for exploration_id in exploration_ids:
         exploration = exp_services.get_exploration_by_id(exploration_id)
-        for state_id in exploration.state_ids:
+
+        answer_logs = stats_domain.StateRuleAnswerLog.get_multi(exploration_id, [{
+            'state_id': state_id,
+            'handler_name': SUBMIT_HANDLER_NAME,
+            'rule_str': exp_domain.DEFAULT_RULESPEC_STR
+        } for state_id in exploration.state_ids])
+
+        for ind, state_id in enumerate(exploration.state_ids):
             state_counts = stats_domain.StateCounter.get(
                 exploration_id, state_id)
-            default_rule_answer_log = stats_domain.StateRuleAnswerLog.get(
-                exploration.id, state_id, SUBMIT_HANDLER_NAME,
-                exp_domain.DEFAULT_RULESPEC_STR)
+            default_rule_answer_log = answer_logs[ind]
 
             total_entry_count = state_counts.total_entry_count
             if total_entry_count == 0:

diff --git a/core/storage/statistics/django_models.py b/core/storage/statistics/django_models.py
@@ -106,13 +106,28 @@ class StateRuleAnswerLogModel(base_models.BaseModel):
 
     @classmethod
     def get_or_create(cls, exploration_id, state_id, handler_name, rule_str):
+        # TODO(sll): Deprecate this method.
         instance_id = '.'.join([
             exploration_id, state_id, handler_name, rule_str])
         answer_log = cls.get(instance_id, strict=False)
         if not answer_log:
             answer_log = cls(id=instance_id, answers={})
         return answer_log
 
+    @classmethod
+    def get_or_create_multi(cls, exploration_id, rule_data):
+        """Gets or creates entities for the given rules.
+
+        Args:
+            exploration_id: the exploration id
+            rule_data: a list of dicts, each with the following keys:
+                (state_id, handler_name, rule_str).
+        """
+        return [cls.get_or_create([
+            exploration_id, datum['state_id'],
+            datum['handler_name'], datum['rule_str']
+        ]) for datum in rule_data]
+
 
 def record_state_feedback_from_reader(
         exploration_id, state_id, feedback, history):

diff --git a/core/storage/statistics/gae_models.py b/core/storage/statistics/gae_models.py
@@ -106,12 +106,44 @@ class StateRuleAnswerLogModel(base_models.BaseModel):
 
     @classmethod
     def get_or_create(cls, exploration_id, state_id, handler_name, rule_str):
-        instance_id = '.'.join([
-            exploration_id, state_id, handler_name, rule_str])
-        answer_log = cls.get(instance_id, strict=False)
-        if not answer_log:
-            answer_log = cls(id=instance_id, answers={})
-        return answer_log
+        # TODO(sll): Deprecate this method.
+        return cls.get_or_create_multi(exploration_id, [{
+            'state_id': state_id,
+            'handler_name': handler_name,
+            'rule_str': rule_str
+        }])[0]
+
+    @classmethod
+    def _get_entity_key(cls, exploration_id, entity_id):
+        return ndb.Key(cls._get_kind(), entity_id)
+
+    @classmethod
+    def get_or_create_multi(cls, exploration_id, rule_data):
+        """Gets or creates entities for the given rules.
+
+        Args:
+            exploration_id: the exploration id
+            rule_data: a list of dicts, each with the following keys:
+                (state_id, handler_name, rule_str).
+        """
+        entity_ids = ['.'.join([
+            exploration_id, datum['state_id'],
+            datum['handler_name'], datum['rule_str']
+        ]) for datum in rule_data]
+
+        entity_keys = [cls._get_entity_key(exploration_id, entity_id)
+                       for entity_id in entity_ids]
+
+        entities = ndb.get_multi(entity_keys)
+        entities_to_put = []
+        for ind, entity in enumerate(entities):
+            if entity is None:
+                new_entity = cls(id=entity_ids[ind], answers={})
+                entities_to_put.append(new_entity)
+                entities[ind] = new_entity
+
+        ndb.put_multi(entities_to_put)
+        return entities
 
 
 def record_state_feedback_from_reader(