Skip to content

Commit

Permalink
Use get_multi() for retrieving state answer logs, to speed up loading.
Browse files Browse the repository at this point in the history
  • Loading branch information
seanlip committed Nov 23, 2013
1 parent 03badf3 commit aa0c6fa
Showing 4 changed files with 121 additions and 25 deletions.
25 changes: 21 additions & 4 deletions core/domain/stats_domain.py
Original file line number Diff line number Diff line change
@@ -100,12 +100,29 @@ def total_answer_count(self):
total_count += count
return total_count

@classmethod
def get_multi(cls, exploration_id, rule_data):
"""Gets domain objects corresponding to the given rule data.
Args:
exploration_id: the exploration id
rule_data: a list of dicts, each with the following keys:
(state_id, handler_name, rule_str).
"""
# TODO(sll): Should each rule_str be unicode instead?
answer_log_models = stats_models.StateRuleAnswerLogModel.get_or_create_multi(
exploration_id, rule_data)
return [cls(answer_log_model.answers)
for answer_log_model in answer_log_models]

@classmethod
def get(cls, exploration_id, state_id, handler_name, rule_str):
# TODO(sll): Should rule_str be unicode instead?
answer_log_model = stats_models.StateRuleAnswerLogModel.get_or_create(
exploration_id, state_id, handler_name, rule_str)
return cls(answer_log_model.answers)
# TODO(sll): Deprecate this method.
return cls.get_multi(exploration_id, [{
'state_id': state_id,
'handler_name': handler_name,
'rule_str': rule_str
}])[0]

def get_top_answers(self, N):
"""Returns the top N answers.
62 changes: 47 additions & 15 deletions core/domain/stats_services.py
Original file line number Diff line number Diff line change
@@ -79,6 +79,39 @@ def get_exploration_completed_count(exploration_id):
exploration_id, feconf.END_DEST).first_entry_count


def _get_state_rule_stats(exploration_id, state_id):
"""Gets statistics for the handlers and rules of this state.
Returns:
A dict, keyed by the string '{HANDLER_NAME}.{RULE_STR}', whose
values are the corresponding stats_domain.StateRuleAnswerLog
instances.
"""
state = exp_services.get_state_by_id(exploration_id, state_id)

rule_keys = []
for handler in state.widget.handlers:
for rule in handler.rule_specs:
rule_keys.append((handler.name, str(rule)))

answer_logs = stats_domain.StateRuleAnswerLog.get_multi(
exploration_id, [{
'state_id': state.id,
'handler_name': rule_key[0],
'rule_str': rule_key[1]
} for rule_key in rule_keys]
)

results = {}
for ind, answer_log in enumerate(answer_logs):
results['.'.join(rule_keys[ind])] = {
'answers': answer_log.get_top_answers(10),
'rule_hits': answer_log.total_answer_count
}

return results


def get_state_stats_for_exploration(exploration_id):
"""Returns a dict with state statistics for the given exploration id."""
exploration = exp_services.get_exploration_by_id(exploration_id)
@@ -91,18 +124,12 @@ def get_state_stats_for_exploration(exploration_id):
first_entry_count = state_counts.first_entry_count
total_entry_count = state_counts.total_entry_count

state = exp_services.get_state_by_id(exploration_id, state_id)

rule_stats = {}
for handler in state.widget.handlers:
for rule in handler.rule_specs:
answer_log = stats_domain.StateRuleAnswerLog.get(
exploration_id, state.id, SUBMIT_HANDLER_NAME, str(rule))
rule_stats['.'.join([SUBMIT_HANDLER_NAME, str(rule)])] = {
'answers': answer_log.get_top_answers(10),
'rule_hits': answer_log.total_answer_count
}
# TODO(sll): Do not compute this here. Only display rule stats when the
# editor zooms into a particular state, rather than doing this all at
# the start of the exploration.
rule_stats = _get_state_rule_stats(exploration_id, state_id)

state = exp_services.get_state_by_id(exploration_id, state_id)
state_stats[state_id] = {
'name': state.name,
'firstEntryCount': first_entry_count,
@@ -128,12 +155,17 @@ def get_top_improvable_states(exploration_ids, N):
ranked_states = []
for exploration_id in exploration_ids:
exploration = exp_services.get_exploration_by_id(exploration_id)
for state_id in exploration.state_ids:

answer_logs = stats_domain.StateRuleAnswerLog.get_multi(exploration_id, [{
'state_id': state_id,
'handler_name': SUBMIT_HANDLER_NAME,
'rule_str': exp_domain.DEFAULT_RULESPEC_STR
} for state_id in exploration.state_ids])

for ind, state_id in enumerate(exploration.state_ids):
state_counts = stats_domain.StateCounter.get(
exploration_id, state_id)
default_rule_answer_log = stats_domain.StateRuleAnswerLog.get(
exploration.id, state_id, SUBMIT_HANDLER_NAME,
exp_domain.DEFAULT_RULESPEC_STR)
default_rule_answer_log = answer_logs[ind]

total_entry_count = state_counts.total_entry_count
if total_entry_count == 0:
15 changes: 15 additions & 0 deletions core/storage/statistics/django_models.py
Original file line number Diff line number Diff line change
@@ -106,13 +106,28 @@ class StateRuleAnswerLogModel(base_models.BaseModel):

@classmethod
def get_or_create(cls, exploration_id, state_id, handler_name, rule_str):
# TODO(sll): Deprecate this method.
instance_id = '.'.join([
exploration_id, state_id, handler_name, rule_str])
answer_log = cls.get(instance_id, strict=False)
if not answer_log:
answer_log = cls(id=instance_id, answers={})
return answer_log

@classmethod
def get_or_create_multi(cls, exploration_id, rule_data):
"""Gets or creates entities for the given rules.
Args:
exploration_id: the exploration id
rule_data: a list of dicts, each with the following keys:
(state_id, handler_name, rule_str).
"""
return [cls.get_or_create([
exploration_id, datum['state_id'],
datum['handler_name'], datum['rule_str']
]) for datum in rule_data]


def record_state_feedback_from_reader(
exploration_id, state_id, feedback, history):
44 changes: 38 additions & 6 deletions core/storage/statistics/gae_models.py
Original file line number Diff line number Diff line change
@@ -106,12 +106,44 @@ class StateRuleAnswerLogModel(base_models.BaseModel):

@classmethod
def get_or_create(cls, exploration_id, state_id, handler_name, rule_str):
instance_id = '.'.join([
exploration_id, state_id, handler_name, rule_str])
answer_log = cls.get(instance_id, strict=False)
if not answer_log:
answer_log = cls(id=instance_id, answers={})
return answer_log
# TODO(sll): Deprecate this method.
return cls.get_or_create_multi(exploration_id, [{
'state_id': state_id,
'handler_name': handler_name,
'rule_str': rule_str
}])[0]

@classmethod
def _get_entity_key(cls, exploration_id, entity_id):
return ndb.Key(cls._get_kind(), entity_id)

@classmethod
def get_or_create_multi(cls, exploration_id, rule_data):
"""Gets or creates entities for the given rules.
Args:
exploration_id: the exploration id
rule_data: a list of dicts, each with the following keys:
(state_id, handler_name, rule_str).
"""
entity_ids = ['.'.join([
exploration_id, datum['state_id'],
datum['handler_name'], datum['rule_str']
]) for datum in rule_data]

entity_keys = [cls._get_entity_key(exploration_id, entity_id)
for entity_id in entity_ids]

entities = ndb.get_multi(entity_keys)
entities_to_put = []
for ind, entity in enumerate(entities):
if entity is None:
new_entity = cls(id=entity_ids[ind], answers={})
entities_to_put.append(new_entity)
entities[ind] = new_entity

ndb.put_multi(entities_to_put)
return entities


def record_state_feedback_from_reader(

0 comments on commit aa0c6fa

Please sign in to comment.