Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

retention-period: Add archiving expired messages tool by retention pe… #2172

Closed
wants to merge 8 commits into from
Prev Previous commit
Next Next commit
retention-period: Fix wording and typos.
- Add additional comments.
- Rephrase comments to make them more clear.
- Fix typos.

Fixes #106
  • Loading branch information
kkanahin committed May 30, 2017
commit eca59ba8d725dbcbf57256e82eb7ccce0564eafe
5 changes: 3 additions & 2 deletions puppet/zulip/files/cron.d/archive-messages
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
MAILTO=root
# Removing old archived data should follow after archiving messages,
# cause it checks related archived messages and original attachments rows before deleting .
# The retention policy data deletion process should remove old archived
# data *after* archiving messages, to check related archived messages
# and the original attachments' rows before deleting.

# Archive messages once a day. Time is in UTC.
0 1 * * * zulip cd /home/zulip/deployments/current && ./manage.py archive_messages
Expand Down
18 changes: 18 additions & 0 deletions zerver/lib/retention.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,8 @@ def move_expired_attachments_message_rows_to_archive(realm):

def delete_expired_messages(realm):
# type: (Realm) -> None
# Delete messages after retention period
# if they already have been moved to archive.
removing_messages = Message.objects.filter(
usermessage__isnull=True, id__in=ArchivedMessage.objects.all(),
sender__realm_id=realm.id
Expand All @@ -115,6 +117,8 @@ def delete_expired_messages(realm):

def delete_expired_user_messages(realm):
# type: (Realm) -> None
# Delete user_messages after retention period
# if they are already moved to archive.
removing_user_messages = UserMessage.objects.filter(
id__in=ArchivedUserMessage.objects.all(),
user_profile__realm_id=realm.id
Expand All @@ -124,6 +128,8 @@ def delete_expired_user_messages(realm):

def delete_expired_attachments(realm):
# type: (Realm) -> None
# Delete attachments after retention period
# if they already have been moved to archive.
attachments_to_remove = Attachment.objects.filter(
messages__isnull=True, id__in=ArchivedAttachment.objects.all(),
realm_id=realm.id
Expand All @@ -133,6 +139,7 @@ def delete_expired_attachments(realm):

def clean_unused_messages():
# type: () -> None
# Remove unused messages without user_messages.
unused_messages = Message.objects.filter(
usermessage__isnull=True, id__in=ArchivedMessage.objects.all()
)
Expand All @@ -141,6 +148,7 @@ def clean_unused_messages():

def archive_messages():
# type: () -> None
# The main function for archiving messages' data.
for realm in Realm.objects.filter(message_retention_days__isnull=False):
move_expired_messages_to_archive(realm)
move_expired_user_messages_to_archive(realm)
Expand All @@ -154,6 +162,8 @@ def archive_messages():

def delete_expired_archived_attachments_by_realm(realm_id):
# type: (int) -> None
# Delete old archived attachments from archive table
# after retention period for archived data.
expired_date = timezone_now() - timedelta(days=settings.ARCHIVED_DATA_RETENTION_DAYS)
arc_attachments = ArchivedAttachment.objects \
.filter(archive_timestamp__lt=expired_date, realm_id=realm_id, messages__isnull=True) \
Expand All @@ -165,6 +175,8 @@ def delete_expired_archived_attachments_by_realm(realm_id):

def delete_expired_archived_data_by_realm(realm_id):
# type: (int) -> None
# Delete old archived messages and user_messages from archive tables
# after retention period for archived data.
arc_expired_date = timezone_now() - timedelta(days=settings.ARCHIVED_DATA_RETENTION_DAYS)
ArchivedUserMessage.objects.filter(archive_timestamp__lt=arc_expired_date,
user_profile__realm_id=realm_id).delete()
Expand Down Expand Up @@ -234,6 +246,7 @@ def move_message_to_archive(message_id):

def restore_archived_messages_by_realm(realm_id):
# type: (int) -> None
# Function for restoring archived messages by realm for emergency cases.
query = """
INSERT INTO zerver_message ({dst_fields})
SELECT {src_fields}
Expand All @@ -248,6 +261,7 @@ def restore_archived_messages_by_realm(realm_id):

def restore_archived_usermessages_by_realm(realm_id):
# type: (int) -> None
# Function for restoring archived user_messages by realm for emergency cases.
query = """
INSERT INTO zerver_usermessage ({dst_fields})
SELECT {src_fields}
Expand All @@ -263,6 +277,7 @@ def restore_archived_usermessages_by_realm(realm_id):

def restore_archived_attachments_by_realm(realm_id):
# type: (int) -> None
# Function for restoring archived attachments by realm for emergency cases.
query = """
INSERT INTO zerver_attachment ({dst_fields})
SELECT {src_fields}
Expand All @@ -282,6 +297,8 @@ def restore_archived_attachments_by_realm(realm_id):

def restore_archived_attachments_message_rows_by_realm(realm_id):
# type: (int) -> None
# Function for restoring archived data in many-to-many attachment_messages
# table by realm for emergency cases.
query = """
INSERT INTO zerver_attachment_messages (id, attachment_id, message_id)
SELECT zerver_archivedattachment_messages.id, zerver_archivedattachment_messages.archivedattachment_id,
Expand All @@ -301,6 +318,7 @@ def restore_archived_attachments_message_rows_by_realm(realm_id):

def restore_realm_archived_data(realm_id):
# type: (int) -> None
# The main function for restoring archived messages' data by realm.
restore_archived_messages_by_realm(realm_id)
restore_archived_usermessages_by_realm(realm_id)
restore_archived_attachments_by_realm(realm_id)
Expand Down
2 changes: 1 addition & 1 deletion zerver/management/commands/restore_messages_by_realm.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class Command(BaseCommand):
def add_arguments(self, parser):
# type: (ArgumentParser) -> None
parser.add_argument('domain', metavar='<domain>', type=str,
help='domain of realm to restore messages data')
help='The domain of the realm to restore messages data to.')

def handle(self, *args, **options):
# type: (*Any, **str) -> None
Expand Down
26 changes: 13 additions & 13 deletions zerver/tests/test_retention.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,19 +59,19 @@ def _change_msgs_pub_date(msgs_ids, pub_date):
msgs.update(pub_date=pub_date)
return msgs

def _make_mit_msgs(self, msg_qauntity, pub_date):
def _make_mit_msgs(self, msg_quantity, pub_date):
# type: (int, datetime) -> Any
# Send messages from mit.edu realm and change messages pub_date.
sender = self.mit_user('espuser')
recipient = self.mit_user('starnine')
msgs_ids = [self.send_message(sender.email, recipient.email, Recipient.PERSONAL) for i in
range(msg_qauntity)]
range(msg_quantity)]
mit_msgs = self._change_msgs_pub_date(msgs_ids, pub_date)
return mit_msgs

def _send_cross_realm_message(self):
def _send_cross_realms_message(self):
# type: () -> int
# Send message from bot to users from different realm.
# Send a message from bot to users in different realms.
bot_email = 'notification-bot@zulip.com'
get_user_profile_by_email(bot_email)
mit_user = UserProfile.objects.filter(realm=self.mit_realm).first()
Expand Down Expand Up @@ -140,7 +140,7 @@ def _add_expired_date_to_archive_data(self):

def _check_cross_realm_messages_archiving(self, arc_user_msg_qty, period, realm=None):
# type: (int, int, Realm) -> int
sended_message_id = self._send_cross_realm_message()
sended_message_id = self._send_cross_realms_message()
all_user_messages_qty = UserMessage.objects.count()
self._change_msgs_pub_date([sended_message_id], timezone_now() - timedelta(days=period))
realms = Realm.objects.filter(message_retention_days__isnull=False)
Expand Down Expand Up @@ -236,9 +236,9 @@ def test_expired_messages_in_one_realm(self):
archived_user_messages = ArchivedUserMessage.objects.all()
self.assertEqual(ArchivedMessage.objects.count(), 5)
self.assertEqual(archived_user_messages.count(), 10)
# Compare expected messages ids with archived messages in mit realm
# Compare expected messages' IDs with IDs of archived messages in MIT realm
self._check_archive_data_by_realm(exp_mit_msgs, self.mit_realm)
# Check no archive messages for zulip realm.
# Check that the zulip realm does not contain any archived messages.
self.assertEqual(
ArchivedMessage.objects.filter(
archivedusermessage__user_profile__realm=self.zulip_realm).count(),
Expand All @@ -251,30 +251,30 @@ def test_expired_messages_in_one_realm(self):

def test_cross_realm_messages_archiving_one_realm_expired(self):
# type: () -> None
# Check archiving messages which is sent to different realms
# Check archived message that is sent to different realms
# and expired just on on one of them.
arc_msg_id = self._check_cross_realm_messages_archiving(1, 31, realm=self.zulip_realm)
self.assertTrue(Message.objects.filter(id=arc_msg_id).exists())

def test_cross_realm_messages_archiving_two_realm_expired(self):
# type: () -> None
# Check archiving cross realm message wich is expired on both realms.
# Check archived cross-realm message which is expired on both realms.
arc_msg_id = self._check_cross_realm_messages_archiving(2, 101)
self.assertFalse(Message.objects.filter(id=arc_msg_id).exists())

def test_archive_message_tool(self):
# type: () -> None
# Check archiving tool.
exp_msgs_ids_dict = self._make_expired_messages() # List of expired messages ids.
sended_cross_realm_message_id = self._send_cross_realm_message()
exp_msgs_ids_dict = self._make_expired_messages() # List of expired messages' IDs.
sended_cross_realm_message_id = self._send_cross_realms_message()
exp_msgs_ids_dict['mit_msgs_ids'].append(sended_cross_realm_message_id)
# Add cross realm message id.
# Add cross-realm message id.
self._change_msgs_pub_date(
[sended_cross_realm_message_id],
timezone_now() - timedelta(days=101)
)
exp_msgs_ids = exp_msgs_ids_dict['mit_msgs_ids'] + exp_msgs_ids_dict['zulip_msgs_ids']
# Get expired user messages by message ids
# Get expired user messages by message IDs
exp_user_msgs_ids = list(UserMessage.objects.filter(
message_id__in=exp_msgs_ids).order_by('id').values_list('id', flat=True))
msgs_qty = Message.objects.count()
Expand Down