Skip to content

Commit

Permalink
only select columns needed; separate script for subscribing
Browse files Browse the repository at this point in the history
  • Loading branch information
groovecoder committed Sep 19, 2019
1 parent 074c2be commit 976568a
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 2 deletions.
4 changes: 2 additions & 2 deletions scripts/lowercase-all-records.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ async function subscribeLowercaseHashToHIBP(emailAddress) {


(async () => {
const subRecordsWithUpperChars = await knex("subscribers")
const subRecordsWithUpperChars = await knex.select("id", "primary_email").from("subscribers")
.whereRaw("primary_email != lower(primary_email)");
const subsWithUpperCount = subRecordsWithUpperChars.length;
console.log(`found ${subsWithUpperCount} subscribers records with primary_email != lower(primary_email). fixing ...`);
Expand All @@ -32,7 +32,7 @@ async function subscribeLowercaseHashToHIBP(emailAddress) {
console.log(`fixed subscribers record ID: ${subRecord.id}`);
}

const emailRecordsWithUpperChars = await knex("email_addresses")
const emailRecordsWithUpperChars = await knex.select("id", "email").from("email_addresses")
.whereRaw("email != lower(email)");
const emailsWithUpperCount = emailRecordsWithUpperChars.length;
console.log(`found ${emailsWithUpperCount} email_addresses records with email != lower(email)`);
Expand Down
68 changes: 68 additions & 0 deletions scripts/subscribe-lowercase-hashes.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
"use strict";


const Knex = require("knex");
const knexConfig = require("../db/knexfile");
const knex = Knex(knexConfig);


const HIBP = require("../hibp");
const getSha1 = require("../sha1-utils");


async function subscribeLowercaseHashToHIBP(emailAddress) {
const lowerCasedEmail = emailAddress.toLowerCase();
const lowerCasedSha1 = getSha1(lowerCasedEmail);
await HIBP.subscribeHash(lowerCasedSha1);
return lowerCasedSha1;
}


(async () => {
const chunkSize = process.argv[2];
console.log(`subscribing lower-cased hashes in ${chunkSize}-sized chunks`);

const subRecordsThatNeedFixing = await knex("subscribers").count().whereRaw("primary_email != lower(primary_email)");
const subsWithUpperCount = subRecordsThatNeedFixing[0].count;
console.log(`found ${subsWithUpperCount} subscribers records with primary_email != lower(primary_email). fixing ...`);

let subRecordsFixed = 0;
let subPrevMaxId = 0;
while (subRecordsFixed < subsWithUpperCount) {
console.log(`working on chunk where id > ${subPrevMaxId} ...`);
const subRecordsWithUpperCharsChunk = await knex.select("id", "primary_email").from("subscribers")
.where("id", ">", subPrevMaxId)
.whereRaw("primary_email != lower(primary_email)")
.orderBy("id", "asc")
.limit(chunkSize);
for (const subRecord of subRecordsWithUpperCharsChunk) {
await subscribeLowercaseHashToHIBP(subRecord.primary_email);
subPrevMaxId = subRecord.id;
subRecordsFixed++;
console.log(`subscribed lower-case address hash for subscribers record ID: ${subRecord.id}`);
}
}

const emailRecordsThatNeedFixing = await knex("email_addresses").count().whereRaw("email != lower(email)");
const emailWithUpperCount = emailRecordsThatNeedFixing[0].count;
console.log(`found ${emailWithUpperCount} email_address records with email != lower(email). fixing ...`);

let emailRecordsFixed = 0;
let emailPrevMaxId = 0;
while (emailRecordsFixed < emailWithUpperCount) {
console.log(`working on chunk where id > ${emailPrevMaxId} ...`);
const emailRecordsWithUpperChars = await knex.select("id", "email").from("email_addresses")
.where("id", ">", emailPrevMaxId)
.whereRaw("email != lower(email)")
.orderBy("id", "asc")
.limit(chunkSize);
for (const emailRecord of emailRecordsWithUpperChars) {
await subscribeLowercaseHashToHIBP(emailRecord.email);
emailPrevMaxId = emailRecord.id;
emailRecordsFixed++;
console.log(`fixed email_addresses record ID: ${emailRecord.id}`);
}
}
console.log("done.");
process.exit();
})();

0 comments on commit 976568a

Please sign in to comment.