Change scaled average assignment to assign right after new rating (op…

…pia#1976) * Implement scaled average calculation in rating assignment * Fix linter issues * Reduce info being fed into scaled rating calculation * Change function names and add feconf setting * Change arg name * Update index.yaml and minor changes
lunrong · Jun 4, 2016 · 3bed8f3 · 3bed8f3
1 parent 9ee760b
commit 3bed8f3
Show file tree

Hide file tree

Showing 10 changed files with 89 additions and 36 deletions.
diff --git a/core/domain/exp_domain.py b/core/domain/exp_domain.py
@@ -2583,7 +2583,7 @@ class ExplorationSummary(object):
     """Domain object for an Oppia exploration summary."""
 
     def __init__(self, exploration_id, title, category, objective,
-                 language_code, tags, ratings, status,
+                 language_code, tags, ratings, scaled_average_rating, status,
                  community_owned, owner_ids, editor_ids,
                  viewer_ids, contributor_ids, contributors_summary, version,
                  exploration_model_created_on,
@@ -2601,6 +2601,7 @@ def __init__(self, exploration_id, title, category, objective,
         self.language_code = language_code
         self.tags = tags
         self.ratings = ratings
+        self.scaled_average_rating = scaled_average_rating
         self.status = status
         self.community_owned = community_owned
         self.owner_ids = owner_ids

diff --git a/core/domain/exp_jobs_one_off_test.py b/core/domain/exp_jobs_one_off_test.py
@@ -180,6 +180,7 @@ def _run_batch_job_once_and_verify_output(
                     exploration.language_code,
                     exploration.tags,
                     feconf.get_empty_ratings(),
+                    feconf.EMPTY_SCALED_AVERAGE_RATING,
                     spec['status'],
                     exp_rights_model.community_owned,
                     exp_rights_model.owner_ids,

diff --git a/core/domain/exp_services.py b/core/domain/exp_services.py
@@ -159,9 +159,10 @@ def get_exploration_summary_from_model(exp_summary_model):
         exp_summary_model.id, exp_summary_model.title,
         exp_summary_model.category, exp_summary_model.objective,
         exp_summary_model.language_code, exp_summary_model.tags,
-        exp_summary_model.ratings, exp_summary_model.status,
-        exp_summary_model.community_owned, exp_summary_model.owner_ids,
-        exp_summary_model.editor_ids, exp_summary_model.viewer_ids,
+        exp_summary_model.ratings, exp_summary_model.scaled_average_rating,
+        exp_summary_model.status, exp_summary_model.community_owned,
+        exp_summary_model.owner_ids, exp_summary_model.editor_ids,
+        exp_summary_model.viewer_ids,
         exp_summary_model.contributor_ids,
         exp_summary_model.contributors_summary, exp_summary_model.version,
         exp_summary_model.exploration_model_created_on,
@@ -375,6 +376,15 @@ def get_featured_exploration_summaries():
     return _get_exploration_summaries_from_models(
         exp_models.ExpSummaryModel.get_featured())
 
+
+def get_top_rated_exploration_summaries():
+    """Returns a dict with top rated exploration summary domain objects,
+    keyed by their id.
+    """
+    return _get_exploration_summaries_from_models(
+        exp_models.ExpSummaryModel.get_top_rated())
+
+
 def get_recently_published_exploration_summaries():
     """Returns a dict with all featured exploration summary domain objects,
     keyed by their id.
@@ -1059,10 +1069,13 @@ def compute_summary_of_exploration(exploration, contributor_id_to_add):
     if exp_summary_model:
         old_exp_summary = get_exploration_summary_from_model(exp_summary_model)
         ratings = old_exp_summary.ratings or feconf.get_empty_ratings()
+        scaled_average_rating = get_scaled_average_rating(
+            old_exp_summary.ratings)
         contributor_ids = old_exp_summary.contributor_ids or []
         contributors_summary = old_exp_summary.contributors_summary or {}
     else:
         ratings = feconf.get_empty_ratings()
+        scaled_average_rating = feconf.EMPTY_SCALED_AVERAGE_RATING
         contributor_ids = []
         contributors_summary = {}
 
@@ -1092,7 +1105,7 @@ def compute_summary_of_exploration(exploration, contributor_id_to_add):
     exp_summary = exp_domain.ExplorationSummary(
         exploration.id, exploration.title, exploration.category,
         exploration.objective, exploration.language_code,
-        exploration.tags, ratings, exp_rights.status,
+        exploration.tags, ratings, scaled_average_rating, exp_rights.status,
         exp_rights.community_owned, exp_rights.owner_ids,
         exp_rights.editor_ids, exp_rights.viewer_ids, contributor_ids,
         contributors_summary, exploration.version,
@@ -1139,6 +1152,7 @@ def save_exploration_summary(exp_summary):
         language_code=exp_summary.language_code,
         tags=exp_summary.tags,
         ratings=exp_summary.ratings,
+        scaled_average_rating=exp_summary.scaled_average_rating,
         status=exp_summary.status,
         community_owned=exp_summary.community_owned,
         owner_ids=exp_summary.owner_ids,
@@ -1370,32 +1384,32 @@ def _should_index(exp):
     return rights.status != rights_manager.ACTIVITY_STATUS_PRIVATE
 
 
-def get_average_rating_from_exp_summary(exp_summary):
-    """Returns the average rating of the document as a float. If there are no
+def get_average_rating(ratings):
+    """Returns the average rating of the ratings as a float. If there are no
     ratings, it will return 0.
     """
     rating_weightings = {'1': 1, '2': 2, '3': 3, '4': 4, '5': 5}
-    if exp_summary.ratings:
+    if ratings:
         rating_sum = 0.0
         number_of_ratings = 0.0
-        for rating_value, rating_count in exp_summary.ratings.items():
+        for rating_value, rating_count in ratings.items():
             rating_sum += rating_weightings[rating_value] * rating_count
             number_of_ratings += rating_count
         if number_of_ratings > 0:
             return rating_sum / number_of_ratings
     return 0
 
 
-def get_scaled_average_rating_from_exp_summary(exp_summary):
-    """Returns the lower bound wilson score of the document as a float. If
+def get_scaled_average_rating(ratings):
+    """Returns the lower bound wilson score of the ratings as a float. If
     there are no ratings, it will return 0. The confidence of this result is
     95%.
     """
     # The following is the number of ratings.
-    n = sum(exp_summary.ratings.values())
+    n = sum(ratings.values())
     if n == 0:
         return 0
-    average_rating = get_average_rating_from_exp_summary(exp_summary)
+    average_rating = get_average_rating(ratings)
     z = 1.9599639715843482
     x = (average_rating - 1) / 4
     # The following calculates the lower bound Wilson Score as documented

diff --git a/core/domain/exp_services_test.py b/core/domain/exp_services_test.py
@@ -1975,44 +1975,44 @@ def mock_search(query_string, index, cursor=None, limit=20, sort='',
         self.assertEqual(cursor, expected_result_cursor)
         self.assertEqual(result, doc_ids)
 
-    def test_get_average_rating_from_exp_summary(self):
+    def test_get_average_rating(self):
         self.save_new_valid_exploration(self.EXP_ID, self.owner_id)
         exp = exp_services.get_exploration_summary_by_id(self.EXP_ID)
 
         self.assertEqual(
-            exp_services.get_average_rating_from_exp_summary(exp), 0)
+            exp_services.get_average_rating(exp.ratings), 0)
 
         rating_services.assign_rating_to_exploration(
             self.owner_id, self.EXP_ID, 5)
         self.assertEqual(
-            exp_services.get_average_rating_from_exp_summary(exp), 5)
+            exp_services.get_average_rating(exp.ratings), 5)
 
         rating_services.assign_rating_to_exploration(
             self.USER_ID_1, self.EXP_ID, 2)
 
         exp = exp_services.get_exploration_summary_by_id(self.EXP_ID)
         self.assertEqual(
-            exp_services.get_average_rating_from_exp_summary(exp), 3.5)
+            exp_services.get_average_rating(exp.ratings), 3.5)
 
     def test_get_lower_bound_wilson_rating_from_exp_summary(self):
         self.save_new_valid_exploration(self.EXP_ID, self.owner_id)
         exp = exp_services.get_exploration_summary_by_id(self.EXP_ID)
 
         self.assertEqual(
-            exp_services.get_scaled_average_rating_from_exp_summary(exp), 0)
+            exp_services.get_scaled_average_rating(exp.ratings), 0)
 
         rating_services.assign_rating_to_exploration(
             self.owner_id, self.EXP_ID, 5)
         self.assertAlmostEqual(
-            exp_services.get_scaled_average_rating_from_exp_summary(exp),
+            exp_services.get_scaled_average_rating(exp.ratings),
             1.8261731658956, places=4)
 
         rating_services.assign_rating_to_exploration(
             self.USER_ID_1, self.EXP_ID, 4)
 
         exp = exp_services.get_exploration_summary_by_id(self.EXP_ID)
         self.assertAlmostEqual(
-            exp_services.get_scaled_average_rating_from_exp_summary(exp),
+            exp_services.get_scaled_average_rating(exp.ratings),
             2.056191454757, places=4)
 
 
@@ -2263,7 +2263,7 @@ def test_get_non_private_exploration_summaries(self):
             self.EXP_ID_2: exp_domain.ExplorationSummary(
                 self.EXP_ID_2, 'Exploration 2 Albert title',
                 'A category', 'An objective', 'en', [],
-                feconf.get_empty_ratings(),
+                feconf.get_empty_ratings(), feconf.EMPTY_SCALED_AVERAGE_RATING,
                 rights_manager.ACTIVITY_STATUS_PUBLIC,
                 False, [self.albert_id], [], [], [self.albert_id],
                 {self.albert_id: 1},
@@ -2277,7 +2277,8 @@ def test_get_non_private_exploration_summaries(self):
         self.assertEqual(actual_summaries.keys(),
                          expected_summaries.keys())
         simple_props = ['id', 'title', 'category', 'objective',
-                        'language_code', 'tags', 'ratings', 'status',
+                        'language_code', 'tags', 'ratings',
+                        'scaled_average_rating', 'status',
                         'community_owned', 'owner_ids',
                         'editor_ids', 'viewer_ids',
                         'contributor_ids', 'version',
@@ -2295,7 +2296,7 @@ def test_get_all_exploration_summaries(self):
             self.EXP_ID_1: exp_domain.ExplorationSummary(
                 self.EXP_ID_1, 'Exploration 1 title',
                 'A category', 'An objective', 'en', [],
-                feconf.get_empty_ratings(),
+                feconf.get_empty_ratings(), feconf.EMPTY_SCALED_AVERAGE_RATING,
                 rights_manager.ACTIVITY_STATUS_PRIVATE,
                 False, [self.albert_id], [], [], [self.albert_id, self.bob_id],
                 {self.albert_id: 1, self.bob_id: 1}, self.EXPECTED_VERSION_1,
@@ -2306,7 +2307,7 @@ def test_get_all_exploration_summaries(self):
             self.EXP_ID_2: exp_domain.ExplorationSummary(
                 self.EXP_ID_2, 'Exploration 2 Albert title',
                 'A category', 'An objective', 'en', [],
-                feconf.get_empty_ratings(),
+                feconf.get_empty_ratings(), feconf.EMPTY_SCALED_AVERAGE_RATING,
                 rights_manager.ACTIVITY_STATUS_PUBLIC,
                 False, [self.albert_id], [], [], [self.albert_id],
                 {self.albert_id: 1}, self.EXPECTED_VERSION_2,

diff --git a/core/domain/rating_services.py b/core/domain/rating_services.py
@@ -71,6 +71,11 @@ def _update_user_rating():
     exploration_summary.ratings[str(new_rating)] += 1
     if old_rating:
         exploration_summary.ratings[str(old_rating)] -= 1
+
+    exploration_summary.scaled_average_rating = (
+        exp_services.get_scaled_average_rating(
+            exploration_summary.ratings))
+
     exp_services.save_exploration_summary(exploration_summary)
 
 

diff --git a/core/domain/rating_services_test.py b/core/domain/rating_services_test.py
@@ -42,6 +42,10 @@ def test_rating_assignation(self):
             rating_services.get_overall_ratings_for_exploration(self.EXP_ID),
             {'1': 0, '2': 0, '3': 0, '4': 0, '5': 0})
 
+        exp_summary = exp_services.get_exploration_summary_by_id(self.EXP_ID)
+        self.assertEqual(
+            exp_summary.scaled_average_rating, 0)
+
         self.assertEqual(
             rating_services.get_user_specific_rating_for_exploration(
                 self.USER_ID_1, self.EXP_ID), None)
@@ -53,6 +57,10 @@ def test_rating_assignation(self):
         rating_services.assign_rating_to_exploration(
             self.USER_ID_1, self.EXP_ID, 3)
 
+        exp_summary = exp_services.get_exploration_summary_by_id(self.EXP_ID)
+        self.assertAlmostEqual(
+            exp_summary.scaled_average_rating, 1.5667471839848, places=4)
+
         self.assertEqual(
             rating_services.get_user_specific_rating_for_exploration(
                 self.USER_ID_1, self.EXP_ID), 3)

diff --git a/core/domain/summary_services.py b/core/domain/summary_services.py
@@ -21,6 +21,7 @@
 from core.domain import rights_manager
 from core.domain import stats_jobs_continuous
 from core.domain import user_services
+import feconf
 import utils
 
 _LIBRARY_INDEX_GROUPS = [{
@@ -53,7 +54,6 @@
         'Business', 'Economics', 'Geography', 'Government', 'History', 'Law'],
 }]
 
-NUMBER_OF_TOP_RATED_EXPLORATIONS = 8
 
 def get_human_readable_contributors_summary(contributors_summary):
     contributor_ids = contributors_summary.keys()
@@ -379,21 +379,14 @@ def get_top_rated_exploration_summary_dicts(language_codes):
     """
     filtered_exp_summaries = [
         exp_summary for exp_summary in
-        exp_services.get_non_private_exploration_summaries().values()
+        exp_services.get_top_rated_exploration_summaries().values()
         if exp_summary.language_code in language_codes and
         sum(exp_summary.ratings.values()) > 0]
 
-    lower_bound_wilson_scores = {
-        exp_summary.id:
-            exp_services.get_scaled_average_rating_from_exp_summary(
-                exp_summary)
-        for exp_summary in filtered_exp_summaries
-    }
-
     sorted_exp_summaries = sorted(
         filtered_exp_summaries,
-        key=lambda exp_summary: lower_bound_wilson_scores[exp_summary.id],
-        reverse=True)[:NUMBER_OF_TOP_RATED_EXPLORATIONS]
+        key=lambda exp_summary: exp_summary.scaled_average_rating,
+        reverse=True)[:feconf.NUMBER_OF_TOP_RATED_EXPLORATIONS]
 
     return _get_displayable_exp_summary_dicts(
         sorted_exp_summaries, include_contributors=False)

diff --git a/core/storage/exploration/gae_models.py b/core/storage/exploration/gae_models.py
@@ -317,6 +317,9 @@ class ExpSummaryModel(base_models.BaseModel):
     # Aggregate user-assigned ratings of the exploration
     ratings = ndb.JsonProperty(default=None, indexed=False)
 
+    # Scaled average rating for the exploration.
+    scaled_average_rating = ndb.FloatProperty(indexed=True)
+
     # Time when the exploration model was last updated (not to be
     # confused with last_updated, which is the time when the
     # exploration *summary* model was last updated)
@@ -376,6 +379,20 @@ def get_featured(cls):
             ExpSummaryModel.deleted == False  # pylint: disable=singleton-comparison
         ).fetch(feconf.DEFAULT_QUERY_LIMIT)
 
+    @classmethod
+    def get_top_rated(cls):
+        """Returns an iterable with the top rated exp summaries that are
+        public in descending order.
+        """
+        return ExpSummaryModel.query().filter(
+            ndb.OR(ExpSummaryModel.status == feconf.ACTIVITY_STATUS_PUBLIC,
+                   ExpSummaryModel.status == feconf.ACTIVITY_STATUS_PUBLICIZED)
+        ).filter(
+            ExpSummaryModel.deleted == False  # pylint: disable=singleton-comparison
+        ).order(
+            -ExpSummaryModel.scaled_average_rating
+        ).fetch(feconf.NUMBER_OF_TOP_RATED_EXPLORATIONS)
+
     @classmethod
     def get_private_at_least_viewable(cls, user_id):
         """Returns an iterable with private exp summaries that are at least
@@ -416,4 +433,3 @@ def get_recently_published(cls):
         ).order(
             -ExpSummaryModel.first_published_msec
         ).fetch(feconf.RECENTLY_PUBLISHED_QUERY_LIMIT)
-
diff --git a/feconf.py b/feconf.py
@@ -54,6 +54,10 @@
 # The maximum number of results to retrieve in a datastore query.
 DEFAULT_QUERY_LIMIT = 1000
 
+# The maximum number of results to retrieve in a datastore query
+# for top rated published explorations.
+NUMBER_OF_TOP_RATED_EXPLORATIONS = 8
+
 # The maximum number of results to retrieve in a datastore query
 # for recently published explorations.
 RECENTLY_PUBLISHED_QUERY_LIMIT = 8
@@ -157,6 +161,9 @@
 def get_empty_ratings():
     return copy.deepcopy(_EMPTY_RATINGS)
 
+# Empty scaled average rating as a float.
+EMPTY_SCALED_AVERAGE_RATING = 0.0
+
 # Committer id for system actions.
 SYSTEM_COMMITTER_ID = 'admin'
 SYSTEM_EMAIL_ADDRESS = 'system@example.com'

diff --git a/index.yaml b/index.yaml
@@ -37,6 +37,13 @@ indexes:
   - name: first_published_msec
     direction: desc
 
+- kind: ExpSummaryModel
+  properties:
+  - name: deleted
+  - name: status
+  - name: scaled_average_rating
+    direction: desc
+
 - kind: ExpSummaryRealtimeModel
   properties:
   - name: realtime_layer