Last active
February 6, 2021 01:45
-
-
Save fitnr/5818825 to your computer and use it in GitHub Desktop.
Split an address into number and street parts
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// http://stackoverflow.com/questions/18082/validate-numbers-in-javascript-isnumeric/1830844#1830844 | |
function isNumber(n) { return !isNaN(parseFloat(n)) && isFinite(n); } | |
// Check if character is a fraction, e.g. ¼ | |
function isFractionalChar(n) { | |
c = n.charCodeAt(); | |
return (c >= 188 && c <= 190) || (c >= 8531 && c <= 8542); | |
} | |
// return the first fractional character in a string | |
// return false if there is none | |
// Could easily return the index of the character, but this creates a parallelism with RegExp.exec | |
function indexFractionalChar(m) { | |
var a = m.split(''), i; | |
for (i in a) { | |
if (isFractionalChar(a[i])) | |
return i; | |
} | |
return false; | |
} | |
/** | |
* Splits an address into the number and street part. | |
* with input: "100 Main Street", outputs: {number: "100", space: ' ', street: "Main Street"} | |
* The special sauce is handling fractional addresses. | |
* With input "22½ Baker Street", outputs: {number: "22½", space: ' ', street: "Baker Street"} | |
* | |
* @param string x An address with leading number | |
* @return Object An object with the number, street and a space, for inserting between. | |
* The space parameter is useful for situations where you want to glue the pieces back together for a user. | |
* If user inputs "Main Street", without a number, .space is returned empty, so you don't have to bother testing | |
* and just glue it like: x.number + x.space + x.street | |
* while processing x.number and x.street separately on the back end. | |
*/ | |
function splitAddress(x) { | |
var a = x.trim().split(' '), number, street; | |
if (a.length <= 1) | |
return {number: '', space: '', street: a.join('')}; | |
if (isNumber(a[0].substr(0, 1)) || isFractionalChar(a[0].substr(0, 1))) { | |
number = a.shift(); | |
} else { | |
// If there isn't a leading number, just return the trimmed input as the street | |
return {number: '', space: '', street: x.trim()} | |
} | |
if (/[0-9]\/[0-9]/.exec(a[0]) || indexFractionalChar(a[0]) !== false) | |
number += ' ' + a.shift(); | |
return {number: number, space: ' ', street: a.join(' ')}; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Test cases | |
*/ | |
var cases = [ | |
["100 Main Street", {number: "100", space: " ", street: "Main Street"}], | |
// Address with directional indicator | |
["100 South Main Street", {number: "100", space: " ", street: "South Main Street"}], | |
// Fractional address with slash | |
["100 1/2 Main Street", {number: "100 1/2", space: " ", street: "Main Street"}], | |
// Fractional address number with fractional character | |
["100½ Main Street", {number: "100½", space: " ", street: "Main Street"}], | |
// Address number with leading fractional address | |
["½ Main Street", {number: "½", space: " ", street: "Main Street"}], | |
// Address number with letter | |
["221B Baker Street", {number: "221B", space: " ", street: "Baker Street"}], | |
// Numeric street name | |
["100 10th Street", {number: "100", space: " ", street: "10th Street"}], | |
// Address with hyphen (common in Queens, NYC) | |
["34-56 107th Street", {number: "34-56", space: " ", street: "107th Street"}], | |
// Multi-word street name | |
["99 Dr. Martin Luther King, Jr. Blvd", {number: "99", space: " ", street: "Dr. Martin Luther King, Jr. Blvd"}], | |
// Street name without address | |
["Main Street", {number: "", space: "", street: "Main Street"}], | |
// Address with leading/trailing whitespace | |
[" 100 Main Street ", {number: "100", space: " ", street: "Main Street"}], | |
// Limitations | |
// Street name that starts with a number | |
// There is no way to distinguish between "6 Ave S" (a possible representation of a street, "Sixth Avenue South", in Manhattan) | |
// from "6 Ave S" (a possible address on "Avenue S" in Brooklyn) | |
// This script will treat it as an address: | |
["6 Ave S", {number: "6", space: " ", street: "Ave S"}], | |
// possibly also correct: {number: "", space: "", street: "6 Ave S"} | |
// The problem also occurs on a numeric street name without an address | |
// correct | |
["42 Street", {number: "", space: " ", street: "42 Street"}], | |
// current result: {number: "42", space: " ", street: "Street"} | |
// Address with one leading letter (common in Puerto Rico) | |
// This fails. A solution would have to distinguish between this example and "E42 St", | |
// which should be recognized as a street without an address number. | |
// correct: | |
["A19 Calle Amapola", {number: "A19", space: "", street: "Calle Amapola"}] | |
// current result: {number: "", space: "", street: "A19 Calle Amapola"} | |
] | |
function testAddress(fn, tests) { | |
var output, passed=0, failed=0; | |
for (i in tests) { | |
output = fn(tests[i][0]); | |
if ( | |
output.number == tests[i][1].number && | |
output.space == tests[i][1].space && | |
output.street == tests[i][1].street | |
) { | |
passed++; | |
//console.log('passed', tests[i][0]); | |
} else { | |
failed++ | |
console.log('failed', tests[i][0], '=>', output) | |
} | |
} | |
console.log('passed', passed); | |
console.log('failed', failed); | |
} | |
// testAddress(splitAddress, cases); |
Added. Turning it into a full unit-testing suite is left as an exercise to the reader.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Suggestion: a separate list of test cases.