Skip to content

Commit

Permalink
Remove some badly-formatted e164's.
Browse files Browse the repository at this point in the history
  • Loading branch information
greyson-signal committed Dec 12, 2024
1 parent 5c075b0 commit 8ddbc57
Show file tree
Hide file tree
Showing 5 changed files with 192 additions and 68 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2133,67 +2133,6 @@ open class RecipientTable(context: Context, databaseHelper: SignalDatabase) : Da
.readToSingleObject { PhoneNumberDiscoverableState.fromId(it.requireInt(PHONE_NUMBER_DISCOVERABLE)) }
}

/**
* @return True if setting the phone number resulted in changed recipientId, otherwise false.
*/
fun setPhoneNumber(id: RecipientId, e164: String): Boolean {
val db = writableDatabase

db.beginTransaction()
return try {
setPhoneNumberOrThrow(id, e164)
db.setTransactionSuccessful()
false
} catch (e: SQLiteConstraintException) {
Log.w(TAG, "[setPhoneNumber] Hit a conflict when trying to update $id. Possibly merging.")

val existing: RecipientRecord = getRecord(id)
val newId = getAndPossiblyMerge(existing.aci, e164)
Log.w(TAG, "[setPhoneNumber] Resulting id: $newId")

db.setTransactionSuccessful()
newId != existing.id
} finally {
db.endTransaction()
}
}

private fun removePhoneNumber(recipientId: RecipientId) {
val values = ContentValues().apply {
putNull(E164)
putNull(PNI_COLUMN)
}

if (update(recipientId, values)) {
rotateStorageId(recipientId)
}
}

/**
* Should only use if you are confident that this will not result in any contact merging.
*/
@Throws(SQLiteConstraintException::class)
fun setPhoneNumberOrThrow(id: RecipientId, e164: String) {
val contentValues = ContentValues(1).apply {
put(E164, e164)
}
if (update(id, contentValues)) {
rotateStorageId(id)
AppDependencies.databaseObserver.notifyRecipientChanged(id)
StorageSyncHelper.scheduleSyncForDataChange()
}
}

@Throws(SQLiteConstraintException::class)
fun setPhoneNumberOrThrowSilent(id: RecipientId, e164: String) {
val contentValues = ContentValues(1).apply {
put(E164, e164)
}
if (update(id, contentValues)) {
rotateStorageId(id)
}
}

/**
* Associates the provided IDs together. The assumption here is that all of the IDs correspond to the local user and have been verified.
*/
Expand Down Expand Up @@ -4051,6 +3990,13 @@ open class RecipientTable(context: Context, databaseHelper: SignalDatabase) : Da
}
}

/**
* Exposes the merge functionality for the sake of an app migration.
*/
fun mergeForMigration(primaryId: RecipientId, secondaryId: RecipientId) {
merge(primaryId, secondaryId, pniVerified = true)
}

/**
* Merges one ACI recipient with an E164 recipient. It is assumed that the E164 recipient does
* *not* have an ACI.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
import org.thoughtcrime.securesms.migrations.BackfillDigestsMigrationJob;
import org.thoughtcrime.securesms.migrations.BackupJitterMigrationJob;
import org.thoughtcrime.securesms.migrations.BackupNotificationMigrationJob;
import org.thoughtcrime.securesms.migrations.BadE164MigrationJob;
import org.thoughtcrime.securesms.migrations.BlobStorageLocationMigrationJob;
import org.thoughtcrime.securesms.migrations.CachedAttachmentsMigrationJob;
import org.thoughtcrime.securesms.migrations.ClearGlideCacheMigrationJob;
Expand Down Expand Up @@ -277,6 +278,7 @@ public static Map<String, Job.Factory> getJobFactories(@NonNull Application appl
put(BackupMediaSnapshotSyncJob.KEY, new BackupMediaSnapshotSyncJob.Factory());
put(BackupNotificationMigrationJob.KEY, new BackupNotificationMigrationJob.Factory());
put(BackupRefreshJob.KEY, new BackupRefreshJob.Factory());
put(BadE164MigrationJob.KEY, new BadE164MigrationJob.Factory());
put(BlobStorageLocationMigrationJob.KEY, new BlobStorageLocationMigrationJob.Factory());
put(CachedAttachmentsMigrationJob.KEY, new CachedAttachmentsMigrationJob.Factory());
put(ClearGlideCacheMigrationJob.KEY, new ClearGlideCacheMigrationJob.Factory());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -164,9 +164,10 @@ static final class Version {
static final int GROUP_EXTRAS_DB_FIX = 120;
static final int EMOJI_SEARCH_INDEX_CHECK_2 = 121;
static final int QUOTE_AUTHOR_FIX = 122;
static final int BAD_E164_FIX = 123;
}

public static final int CURRENT_VERSION = 122;
public static final int CURRENT_VERSION = 123;

/**
* This *must* be called after the {@link JobManager} has been instantiated, but *before* the call
Expand Down Expand Up @@ -753,6 +754,10 @@ private static LinkedHashMap<Integer, MigrationJob> getMigrationJobs(@NonNull Co
jobs.put(Version.QUOTE_AUTHOR_FIX, new DatabaseMigrationJob());
}

if (lastSeenVersion < Version.BAD_E164_FIX) {
jobs.put(Version.BAD_E164_FIX, new BadE164MigrationJob());
}

return jobs;
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
package org.thoughtcrime.securesms.migrations

import android.database.sqlite.SQLiteConstraintException
import org.signal.core.util.Stopwatch
import org.signal.core.util.delete
import org.signal.core.util.logging.Log
import org.signal.core.util.readToList
import org.signal.core.util.requireLong
import org.signal.core.util.requireNonNullString
import org.signal.core.util.select
import org.signal.core.util.update
import org.signal.core.util.withinTransaction
import org.thoughtcrime.securesms.database.MessageTable
import org.thoughtcrime.securesms.database.RecipientTable
import org.thoughtcrime.securesms.database.RecipientTable.Companion.ACI_COLUMN
import org.thoughtcrime.securesms.database.RecipientTable.Companion.E164
import org.thoughtcrime.securesms.database.RecipientTable.Companion.ID
import org.thoughtcrime.securesms.database.RecipientTable.Companion.PNI_COLUMN
import org.thoughtcrime.securesms.database.RecipientTable.Companion.TABLE_NAME
import org.thoughtcrime.securesms.database.SignalDatabase
import org.thoughtcrime.securesms.jobmanager.Job
import org.thoughtcrime.securesms.phonenumbers.PhoneNumberFormatter
import org.thoughtcrime.securesms.recipients.RecipientId

/**
* Through testing, we've discovered that some badly-formatted e164's wound up in the e164 column of the recipient table.
* We believe this was mostly a result of a legacy chat-creation path where we'd basically create a recipient with any number that was input.
* This makes a best-effort to clear out that bad data in a safe manner.
* Normally we'd do something like this in a DB migration, but we wanted to have access to recipient merging and number formatting, which could
* be unnecessarily difficult in a DB migration.
*/
internal class BadE164MigrationJob(
parameters: Parameters = Parameters.Builder().build()
) : MigrationJob(parameters) {

companion object {
val TAG = Log.tag(BadE164MigrationJob::class.java)
const val KEY = "BadE164MigrationJob"

/**
* Accounts for the following types of invalid numbers:
* - Contains an invalid char anywhere (i.e. not a digit or +)
* - A shortcode that doesn't start with a number
* - A non-shortcode (longcode?) that doesn't start with +{digit}
* - A number with exactly 7 chars (strange but true -- neither shortcodes nor longcodes can be 7 chars long)
*/
private const val INVALID_E164_QUERY =
"""
(
$E164 NOT NULL AND (
$E164 GLOB '*[^+0-9]*'
OR (LENGTH($E164) > 7 AND $E164 NOT GLOB '+[0-9]*')
OR (LENGTH($E164) == 7)
OR (LENGTH($E164) < 7 AND $E164 NOT GLOB '[0-9]*')
)
)
"""
}

override fun getFactoryKey(): String = KEY

override fun isUiBlocking(): Boolean = false

override fun performMigration() {
val stopwatch = Stopwatch("bad-e164")

val invalidWithOtherIdentifiersCount = SignalDatabase.recipients.removeInvalidE164sFromRecipientsWithOtherIdentifiers()
Log.d(TAG, "Removed $invalidWithOtherIdentifiersCount invalid e164's from recipients that had another identifier.")
stopwatch.split("alt-id")

val invalidWithNoMessagesCount = SignalDatabase.recipients.deleteRecipientsWithInvalidE164sAndNoMessages()
Log.d(TAG, "Deleted $invalidWithNoMessagesCount recipients with invalid e164's and no messages.")
stopwatch.split("no-msg")

val invalidEntries = SignalDatabase.recipients.getRecipientsWithInvalidE164s()
stopwatch.split("fetch-invalid")
if (invalidEntries.isEmpty()) {
Log.d(TAG, "No more invalid e164s, we're done.")
stopwatch.stop(TAG)
return
}

Log.i(TAG, "There are ${invalidEntries.size} remaining entries ")

val hasLettersRegex = "[^+0-9]".toRegex()
for (entry in invalidEntries) {
if (entry.e164.matches(hasLettersRegex)) {
Log.w(TAG, "We have encountered an e164-only recipient, with messages, that has invalid characters in the e164.")
continue
}

val formattedNumber = PhoneNumberFormatter.get(context).formatOrNull(entry.e164)
if (formattedNumber == null) {
Log.w(TAG, "We have encountered an e164-only recipient, with messages, whose value characters are all valid, but still remains completely unparsable.")
continue
}

if (formattedNumber == entry.e164) {
Log.w(TAG, "We have encountered an e164-only recipient, with messages, whose value characters are all valid, but after formatting the number, it's the same!")
continue
}

SignalDatabase.recipients.updateE164(entry.id, entry.e164)
Log.w(TAG, "Update the E164 to the new, formatted number.")
}

stopwatch.split("merge")
stopwatch.stop(TAG)
}

override fun shouldRetry(e: Exception): Boolean = false

private fun RecipientTable.removeInvalidE164sFromRecipientsWithOtherIdentifiers(): Int {
return readableDatabase
.update(TABLE_NAME)
.values(E164 to null)
.where("$INVALID_E164_QUERY AND ($ACI_COLUMN NOT NULL OR $PNI_COLUMN NOT NULL)")
.run()
}

private fun RecipientTable.deleteRecipientsWithInvalidE164sAndNoMessages(): Int {
return readableDatabase
.delete(TABLE_NAME)
.where(
"""
$INVALID_E164_QUERY AND
$ID NOT IN (
SELECT ${MessageTable.TO_RECIPIENT_ID} FROM ${MessageTable.TABLE_NAME}
UNION
SELECT ${MessageTable.FROM_RECIPIENT_ID} FROM ${MessageTable.TABLE_NAME}
)
"""
)
.run()
}

private fun RecipientTable.getRecipientsWithInvalidE164s(): List<InvalidEntry> {
return readableDatabase
.select(ID, E164)
.from(TABLE_NAME)
.where(INVALID_E164_QUERY)
.run()
.readToList {
InvalidEntry(
id = it.requireLong(ID),
e164 = it.requireNonNullString(E164)
)
}
}

private fun RecipientTable.updateE164(id: Long, e164: String) {
writableDatabase.withinTransaction { db ->
try {
db.update(TABLE_NAME)
.values(E164 to e164)
.where("$ID = ?", id)
.run()
} catch (e: SQLiteConstraintException) {
Log.w(TAG, "There was a conflict when trying to update Recipient::$id with a properly-formatted E164. Merging.")
val existing = getByE164(e164).get()
mergeForMigration(existing, RecipientId.from(id))
}
}
}

private data class InvalidEntry(
val id: Long,
val e164: String
)

class Factory : Job.Factory<BadE164MigrationJob> {
override fun create(parameters: Parameters, serializedData: ByteArray?): BadE164MigrationJob {
return BadE164MigrationJob(parameters)
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ public class PhoneNumberFormatter {
private static final String UNKNOWN_NUMBER = "Unknown";

private static final Set<String> EXCLUDE_FROM_MANUAL_SHORTCODE_4 = SetUtil.newHashSet("AC", "NC", "NU", "TK");
private static final Set<String> MANUAL_SHORTCODE_6 = SetUtil.newHashSet("DE", "FI", "GB", "SK");
private static final Set<Integer> NATIONAL_FORMAT_COUNTRY_CODES = SetUtil.newHashSet(1 /*US*/, 44 /*UK*/);

private static final Pattern US_NO_AREACODE = Pattern.compile("^(\\d{7})$");
Expand Down Expand Up @@ -128,11 +127,7 @@ public static int getLocalCountryCode() {
else return number.trim();
}

if (bareNumber.length() <= 6 && MANUAL_SHORTCODE_6.contains(localCountryCode)) {
return bareNumber;
}

if (bareNumber.length() <= 4 && !EXCLUDE_FROM_MANUAL_SHORTCODE_4.contains(localCountryCode)) {
if (bareNumber.length() <= 6) {
return bareNumber;
}

Expand Down

0 comments on commit 8ddbc57

Please sign in to comment.