-
-
Notifications
You must be signed in to change notification settings - Fork 44
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(unit-number-extractor): mapper stream to separate concatenated u…
…nit numbers (#502) * feat(refactor): update documentMapper to allow easier extensions via external mappers * feat(unit-number-extractor): add new mapper stream to separate concatenated unit numbers
- Loading branch information
1 parent
5095f9d
commit f3331af
Showing
12 changed files
with
792 additions
and
65 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
/** | ||
The GNAF mapper is responsible for extracting Australian GNAF | ||
identifiers from the OA 'ID' property, where available. | ||
**/ | ||
|
||
const _ = require('lodash'); | ||
const through = require('through2'); | ||
const logger = require('pelias-logger').get('openaddresses'); | ||
|
||
// examples: GAACT718519668, GASA_424005553 | ||
const GNAF_PID_PATTERN = /^(GA)(NSW|VIC|QLD|SA_|WA_|TAS|NT_|ACT|OT_)([0-9]{9})$/; | ||
|
||
module.exports = function () { | ||
return through.obj((doc, enc, next) => { | ||
try { | ||
if (doc.getMeta('country_code') === 'AU') { | ||
|
||
// detect Australian G-NAF PID concordances | ||
const oaid = _.get(doc.getMeta('oa'), 'ID'); | ||
if (oaid.length === 14 && oaid.match(GNAF_PID_PATTERN)) { | ||
doc.setAddendum('concordances', { 'gnaf:pid': oaid }); | ||
} | ||
} | ||
} | ||
|
||
catch (e) { | ||
logger.error('gnaf_mapper error'); | ||
logger.error(e.stack); | ||
logger.error(JSON.stringify(doc, null, 2)); | ||
} | ||
|
||
return next(null, doc); | ||
}); | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
/** | ||
The unit splitting mapper is responsible for detecting when the address.number | ||
field contains the concatenation of the unit and the housenumber. | ||
eg. Flat 2 14 Smith St | ||
In this case we attempt to split the two terms into their consituent parts. | ||
note: Addressing formats vary between countries, it's unlikely that a pattern | ||
which works for one country will also work internationally. For this reason this | ||
mapper accepts a country code which can be used to select the appropriate pattern(s). | ||
Feel free to make changes to this mapping file! | ||
**/ | ||
|
||
const _ = require('lodash'); | ||
const through = require('through2'); | ||
const logger = require('pelias-logger').get('openaddresses'); | ||
const mappers = {}; | ||
|
||
// Australasian Unit Number Mapper | ||
// https://auspost.com.au/content/dam/auspost_corp/media/documents/Appendix-01.pdf | ||
// https://www.nzpost.co.nz/sites/nz/files/2021-10/adv358-address-standards.pdf | ||
const australasian = (doc) =>{ | ||
const number = doc.getAddress('number'); | ||
if(!_.isString(number) || number.length < 3){ return; } | ||
|
||
// 2/14 | ||
const solidus = number.match(/^(\d+)\s*\/\s*(\d+)$/); | ||
if (solidus) { | ||
doc.setAddress('unit', solidus[1]); | ||
doc.setAddress('number', solidus[2]); | ||
return; | ||
} | ||
|
||
// Flat 2 14 | F 2 14 | Unit 2 14 | APT 2 14 | ||
const verbose = number.match(/^(flat|f|unit|apartment|apt)\s*(\d+)\s+(\d+)$/i); | ||
if (verbose) { | ||
doc.setAddress('unit', verbose[2]); | ||
doc.setAddress('number', verbose[3]); | ||
return; | ||
} | ||
}; | ||
|
||
// associate mappers with country codes | ||
mappers.AU = australasian; | ||
mappers.NZ = australasian; | ||
|
||
module.exports = function () { | ||
return through.obj((doc, enc, next) => { | ||
try { | ||
// only applies to records with a 'number' set and no 'unit' set (yet). | ||
if (doc.hasAddress('number') && !doc.hasAddress('unit')) { | ||
|
||
// select the appropriate mapper based on country code | ||
const mapper = _.get(mappers, doc.getMeta('country_code')); | ||
if (_.isFunction(mapper)) { | ||
|
||
// run the country-specific mapper | ||
mapper(doc); | ||
} | ||
} | ||
} | ||
|
||
catch (e) { | ||
logger.error('unit_mapper error'); | ||
logger.error(e.stack); | ||
logger.error(JSON.stringify(doc, null, 2)); | ||
} | ||
|
||
return next(null, doc); | ||
}); | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
LON,LAT,NUMBER,STREET,UNIT,CITY,DISTRICT,REGION,POSTCODE,ID | ||
144.931874,-37.791488,10,Smith Street,,input city,input district,input region,input postcode,GAVIC718519668 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
LON,LAT,HASH,NUMBER,STREET,UNIT,CITY,DISTRICT,REGION,POSTCODE,ID | ||
144.9804144,-37.8723977,710daac656ffd0c3,10/244,BARKLY STREET,,ST KILDA,,VIC,"3182","50579518" | ||
145.0378718,-37.8637847,92862c98c20bbe3d,10/244-246,WATTLETREE ROAD,,MALVERN,,VIC,"3144","208518759" | ||
145.0003807,-37.8289596,d0a21035cebcd8ab,10/244-246,MARY STREET,,RICHMOND,,VIC,"3121","51463974" | ||
144.978361,-37.8002503,4e891155eb009dc3,10/244,BRUNSWICK STREET,,FITZROY,,VIC,"3065","210464257" | ||
144.9591621,-37.8331898,e20c57c01d5d42c0,110/244,DORCAS STREET,,SOUTH MELBOURNE,,VIC,"3205","423672310" | ||
144.9591621,-37.8331898,50c85f85cce9181f,210/244,DORCAS STREET,,SOUTH MELBOURNE,,VIC,"3205","423672321" | ||
144.9591621,-37.8331898,4e737a8cc6ada9ec,310/244,DORCAS STREET,,SOUTH MELBOURNE,,VIC,"3205","423672332" | ||
144.9591621,-37.8331898,d6ed0494e8c53ff8,410/244,DORCAS STREET,,SOUTH MELBOURNE,,VIC,"3205","423672343" | ||
144.9591621,-37.8331898,fa0691071a173dab,510/244,DORCAS STREET,,SOUTH MELBOURNE,,VIC,"3205","423672353" | ||
144.925714,-37.7516895,00be263cea28bea0,10/244,PASCOE VALE ROAD,,ESSENDON,,VIC,"3040","429232726" |
Oops, something went wrong.