Last active
March 2, 2025 07:41
-
-
Save petered/36a3a733263403c4bff8b8dbef015c63 to your computer and use it in GitHub Desktop.
Vocal Coords - A system for robust communication of coordinates over voice
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Vocal Coords Coordinate System | |
* | |
* This file is part of the Vocal Coords app — A free, offline, voice-enabled coordinate converter | |
* designed to reduce miscommunication of coordinates. The system encodes positions to roughly 2.4m (8ft) | |
* precision using a 12-token alphanumeric code with built-in error detection. | |
* | |
* For more details on how the system works, please visit: | |
* https://vocalcoords.com/about | |
* | |
* This project is released under the MIT License. | |
* | |
* Copyright (c) 2025 Eagle Eyes Search Inc | |
* | |
* Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated | |
* documentation files (the "Software"), to deal in the Software without restriction, including without limitation | |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and | |
* to permit persons to whom the Software is furnished to do so. | |
* | |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED | |
* TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |
*/ | |
import 'dart:convert'; | |
import 'package:crclib/catalog.dart'; | |
import 'package:dart_geohash/dart_geohash.dart'; | |
import 'package:latlong2/latlong.dart'; | |
import 'package:result_dart/result_dart.dart'; | |
import 'package:vocal_coords/conversion.dart'; | |
const fullAlphaNumeric = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; | |
const alphaNumericToPhonetic = { | |
"0": "Zero", | |
"1": "One", | |
"2": "Two", | |
"3": "Three", | |
"4": "Four", | |
"5": "Five", | |
"6": "Six", | |
"7": "Seven", | |
"8": "Eight", | |
"9": "Niner", | |
"A": "Alfa", | |
"B": "Bravo", | |
"C": "Charlie", | |
"D": "Delta", | |
"E": "Echo", | |
"F": "Foxtrot", | |
"G": "Golf", | |
"H": "Hotel", | |
"I": "India", | |
"J": "Juliett", | |
"K": "Kilo", | |
"L": "Lima", | |
"M": "Mike", | |
"N": "November", | |
"O": "Oscar", | |
"P": "Papa", | |
"Q": "Quebec", | |
"R": "Romeo", | |
"S": "Sierra", | |
"T": "Tango", | |
"U": "Uniform", | |
"V": "Victor", | |
"W": "Whiskey", | |
"X": "X-ray", | |
"Y": "Yankee", | |
"Z": "Zulu", | |
}; | |
const phoneticMisSpellings = { | |
"Oh": "0", | |
"Owe": "0", | |
"Won": "1", | |
"To": "2", | |
"Too": "2", | |
"Tu": "2", | |
"II": "2", | |
"Tree": "3", | |
"For": "4", | |
"Fore": "4", | |
"Fife": "5", | |
"Sex": "6", | |
"Sick": "6", | |
"Sicks": "6", | |
"Sven": "7", | |
"Hate": "8", | |
"Ate": "8", | |
"Nine": "9", | |
"Nein": "9", | |
"9er": "9", | |
"Alpha": "A", | |
"Charley": "C", | |
"Charly": "C", | |
"Ecko": "E", | |
"Gulf": "G", | |
"Goth": "G", | |
"Key low": "K", | |
"Juliet": "J", | |
"Lemur": "L", | |
"Mic": "M", | |
"Oskar": "O", | |
"Oscer": "O", | |
"Rodeo": "R", | |
"Sarah": "S", | |
"Sara": "S", | |
"Fox Trot": "F", | |
"Unicorn": "U", | |
"Viktor": "V", | |
"Whisky": "W", | |
"Xray": "X", | |
"X ray": "X", | |
"Yankie": "Y", | |
}; | |
// Initialize the inverted mapping for phonetic-to-alphanumeric conversion. | |
final Map<String, String> phoneticToAlphaNumeric = { | |
for (final entry in alphaNumericToPhonetic.entries) entry.value.toLowerCase(): entry.key, | |
for (final entry in phoneticMisSpellings.entries) entry.key.toLowerCase(): entry.value, | |
}; | |
// Data class for parsing the result of a coordinate string. | |
class ParsedValue<T> { | |
final T value; | |
final String canonicalString; | |
ParsedValue(this.value, this.canonicalString); | |
} | |
// Extension method on Result<ParsedValue<T>> | |
extension ParsedValueResultExtensions<T> on Result<ParsedValue<T>> { | |
Result<ParsedValue<U>> mapParsed<U>(U Function(T) transform, {String Function(String)? stringModifier = null}) { | |
if (this.isSuccess()) { | |
final parsed = this.getOrThrow(); | |
final newCanonicalString = stringModifier != null? stringModifier(parsed.canonicalString): parsed.canonicalString; | |
return Success(ParsedValue(transform(parsed.value), newCanonicalString)); | |
} else { | |
// Pass the failure along unchanged. | |
return Failure(this.exceptionOrNull()!); | |
} | |
} | |
} | |
const geoHashAlphabet = "0123456789BCDEFGHJKMNPQRSTUVWXYZ"; | |
// Build a regex that matches exactly 12 tokens separated by whitespace. | |
// The first token, followed by 11 occurrences of (whitespace + token). | |
final String separatorPattern = r'\s+(?:-|\bdash\b)?\s*'; // Separator: whitespace, optional "-" or "dash", then optional whitespace | |
// Escape each token for regex safety, then join them with '|' | |
final String phoneticTokenPattern = r"(?:"+phoneticTokens.map(RegExp.escape).join('|').replaceAll(" ", r'\s') + r")"; | |
final String intraGroupSeparator = r'[\s,\.]+'; // Separators within a group: whitespace only | |
final String interGroupSeparator = r'[\s,\.]+(?:-|\bdash\b)?[\s,\.]*'; // Separators between groups: whitespace plus optional "-" or "dash" | |
// Spoken coords: 12 tokens split into 3 groups of 4 tokens each. | |
// Group 1: Tokens 1-4 | |
// Group 2: Tokens 5-8 | |
// Group 3: Tokens 9-12, with Token 10 forced to be letters only. | |
final RegExp spokenCoordsRegex = RegExp( | |
// Group 1: Token 1 + 3 tokens with intra-group whitespace-only separators | |
phoneticTokenPattern + // Token 1 | |
'(?:' + intraGroupSeparator + phoneticTokenPattern + '){3}' + // Tokens 2-4 | |
interGroupSeparator + // Optional dash allowed between groups | |
// Group 2: Token 5 + 3 tokens with intra-group whitespace-only separators | |
phoneticTokenPattern + // Token 5 | |
'(?:' + intraGroupSeparator + phoneticTokenPattern + '){3}' + // Tokens 6-8 | |
interGroupSeparator + // Optional dash allowed between groups | |
// Group 3: Token 9, then Token 10 (letters only), then Tokens 11-12 | |
phoneticTokenPattern + // Token 9 | |
intraGroupSeparator + r'[A-Za-z]+' + // Token 10 must be letters only | |
'(?:' + intraGroupSeparator + phoneticTokenPattern + '){2}' + // Tokens 11-12 | |
r'\b(?!\d)', // Word boundary not followed by a digit | |
caseSensitive: false, | |
); | |
final String writtenSeparatorPattern = r'-?'; // Optional hyphen only, no whitespace | |
final RegExp writtenVocalCoordRegexp = RegExp( | |
"([${geoHashAlphabet}]{4})" + // Group 1: first 4 characters from geoHashAlphabet | |
writtenSeparatorPattern + // Optional hyphen separator | |
"([${geoHashAlphabet}]{4})" + // Group 2: next 4 characters from geoHashAlphabet | |
writtenSeparatorPattern + // Optional hyphen separator | |
"(" + // Group 3 start: | |
"[${geoHashAlphabet}]" + // First character: any allowed character | |
"[A-Za-z]" + // Second character (10th overall): must be a letter | |
"[${geoHashAlphabet}]{2}" + // Next 2 characters: any allowed character | |
")" + // End Group 3 | |
r"\b(?!\d)", // Word boundary not followed by a digit | |
caseSensitive: false, | |
); | |
// Get the list of all valid phonetic words (in lower-case) | |
// Sort by descending length so that tokens with spaces/hyphens are prioritized. | |
final List<String> phoneticTokens = [] | |
..addAll(phoneticToAlphaNumeric.keys.toSet()) | |
..addAll('0123456789'.split('')); | |
String _collapsePhoneticWordsToLetters(String input) { | |
final tokens = input.split(RegExp(r'(?:, |\s|\.)+')); | |
final alphaNumeric = tokens.map((token) => phoneticToAlphaNumeric[token.toLowerCase()] ?? token).join(''); | |
return alphaNumeric; | |
} | |
/// Normalizes the input by converting ambiguous characters: | |
/// - 'I' and 'L' become '1' | |
/// - 'O' becomes '0' | |
String _normalizeVocalCoordWrittenInput(String input) { | |
return input.toUpperCase() | |
.replaceAll('I', '1') | |
.replaceAll('L', '1') | |
.replaceAll('O', '0'); | |
} | |
/// Class representing VocalCoords | |
class VocalCoords { | |
final String geohash; // 9 character geohash code | |
// .. First bit is 1 to ensure it's not a number, | |
// .. second bit is 0 to represent version, | |
// .. next 3 bits are additional check bits, | |
final String checkTokens; // 3 character version/check tokens (in base32 alphabet. They check the first 9) | |
// The tenth token is a bit of a wildcard. It should always remain a letter for parsing purposes, | |
// so that strings of numbers are not mistaken for Vocal Coords. | |
// In version 0, the first 2 bits are '10' and the remaining 3 bits are a checksum. | |
// Hopefully they will always remain that way - but if some change does come along, we can bump the | |
// version bits to 11 and use the remaining 3 bits for something else (including leaving space | |
// for future expansion of the version number). | |
// So - for future compatibility, if any change to version number is made, we just use the last 2 tokens | |
// for checksum. That brings the "missed error" rate from 1/2^13 (1/8192) to ~1/2^10 (1/1024) but | |
// that's still pretty good. | |
static final tenthBitVersionMask = 0x18; // 11000 The first two bits of the 10th token are used for versioning. | |
static final tenthBitVersionZeroIdentifier = 0x10; // 10000 The first bit is 1 to ensure it's a letter, the second bit is 0 to represent version 0. | |
VocalCoords._(this.geohash, this.checkTokens); | |
/// Creates VocalCoords from a LatLng. | |
static VocalCoords fromLatLng(LatLng latlng) { | |
final String geohash = GeoHash.fromDecimalDegrees( | |
latlng.longitude, | |
latlng.latitude, | |
precision: 9, | |
).geohash.toUpperCase(); | |
final String checkCharacters = nineDigitGeohashTo3endCharacters(geohash); | |
return VocalCoords._(geohash, checkCharacters); | |
} | |
static Result<ParsedValue<VocalCoords>>? parseFromString(String potentialCoords) { | |
if (!VocalCoords.isInMatchinngFormat(potentialCoords)) { | |
return null; | |
} | |
final result = VocalCoords.maybeFromString(potentialCoords); | |
if (result.isError()) { | |
return Failure(result.exceptionOrNull()!); | |
} | |
final vc = result.getOrThrow(); | |
return Success(ParsedValue(vc, vc.toString())); | |
} | |
static isInMatchinngFormat(String input) { | |
return _matchPhoneticInput(input) != null; | |
} | |
/// Normalize the input to a 12-character string of alphanumeric characters. | |
static RegExpMatch? _matchPhoneticInput(String input, {bool strict = false, acceptSpokenInput = true}) { | |
// First check if it's a valid spoken input | |
// E.g. "Coordinates are Charlie 9 November Victor India Oskar X-Ray Sierra Romeo Echo Yankee Echo" | |
if (acceptSpokenInput) { | |
final phoneticInput = input.replaceAllMapped(RegExp(r'(\d)(?=\d)'), (match) => '${match[1]} '); | |
final match = spokenCoordsRegex.firstMatch(phoneticInput); | |
if (match != null) { | |
input = _collapsePhoneticWordsToLetters(match.group(0)!); | |
} | |
} | |
// Then check if it's a string-format match | |
final normalizedInput = strict? input: _normalizeVocalCoordWrittenInput(input); | |
final match = writtenVocalCoordRegexp.firstMatch(normalizedInput); | |
return match; | |
} | |
/// Parses VocalCoords from a string and returns a Result object. | |
/// This method normalizes ambiguous characters (I, L, O) before parsing. | |
static Result<VocalCoords> maybeFromString(String input, {bool strict = false, acceptSpokenInput = true}) { | |
// Separate all consecutive numbers with a space | |
final match = _matchPhoneticInput(input, strict: strict, acceptSpokenInput: acceptSpokenInput); | |
if (match == null) { | |
return Failure(ParseException("Invalid VocalCoords format: '$input'")); | |
} | |
// The regex captures 3 groups (4 characters each). | |
// The first 8 characters (groups 1 and 2) plus the first 2 of group 3 form the 10-character geohash. | |
final extractedGeohash = match.group(1)! + match.group(2)! + match.group(3)!.substring(0, 1); | |
// The remaining 2 characters of group 3 are the check characters. | |
final extractedCheckCharacters = match.group(3)!.substring(1, 4).toUpperCase(); | |
final int tenthTokenValue = geoHashAlphabet.indexOf(extractedCheckCharacters[0]); | |
if (tenthTokenValue < 0) { | |
return Failure(ParseException("Invalid 10th-token in VC: ${extractedCheckCharacters[0]}")); | |
} | |
final expectedCheckCharacters = nineDigitGeohashTo3endCharacters(extractedGeohash); | |
// In a 5-bit value, the two most significant bits are checked using the mask 0x18 (binary 11000). | |
// For version 0 the two most significant bits should be "10", which in binary is 10000 (or 0x10). | |
// Note: We want to guarantee that the first character is a letter, so the most significant bit is always 1. | |
// final versionMask = 0x18; | |
// final versionZeroIdentifier = 0x10; | |
final isVersionZero = (tenthTokenValue & tenthBitVersionMask) == tenthBitVersionZeroIdentifier; | |
// If we're on version 0 - we check all 3 characters. | |
final nCharactersInRedundancyCheck = isVersionZero? 3: 2; | |
if (extractedCheckCharacters.substring(3-nCharactersInRedundancyCheck, 3) != expectedCheckCharacters.substring(3-nCharactersInRedundancyCheck, 3)) { | |
return Failure(ParseException("Vocal Coords consistency check failed ($expectedCheckCharacters != $extractedCheckCharacters) check your coords for errors")); | |
} | |
return Success(VocalCoords._(extractedGeohash, extractedCheckCharacters)); | |
} | |
/// Converts VocalCoords to a LatLng. | |
LatLng toLatLng() { | |
final decoded = GeoHash(geohash.toLowerCase()); | |
return LatLng(decoded.latitude(), decoded.longitude()); | |
} | |
/// Converts VocalCoords to a formatted string. | |
@override | |
String toString() { | |
return "${geohash.substring(0, 4).toUpperCase()}-${geohash.substring(4, 8).toUpperCase()}-${geohash.substring(8, 9).toUpperCase()}$checkTokens"; | |
} | |
/// Returns a spoken version of the VocalCoords. | |
String toSpokenString({forTTS = false}) { | |
final writtenString = toString(); | |
final spokenString = writtenString | |
.split('-') | |
.map((group) => | |
group.split('') | |
.map((char) => alphaNumericToPhonetic[char] ?? char) | |
.map((char) => forTTS && char == "Juliett"? "Juliet": char) | |
.join(', ') | |
).join('. '); | |
return spokenString; | |
} | |
/// Computes the check characters for a 9-character geo-hash. | |
/// So here's how it works. We got 15 bits to play with here. | |
/// first bit: Always 1.. ensuring that there's at least one letter in the Vocal Coord, which is useful for pattern matching. | |
/// second bit: Version number: 0 for now. We can change this to 1 later and make use of the next 3 bits for something else... | |
/// 3rd to 5th bits: CRC32 checksum bits which MAY be repurposed for something else in the future | |
/// .. in which case we'd bump the version number to 1 and use some of these bits for the version. | |
/// 6th to 15th bits: CRC32 checksum of the 9-character geohash. These'll always be checksum bits so that we can say there's a 99.9% chance of errors being caught. | |
static String nineDigitGeohashTo3endCharacters(String geohash) { | |
final normalizedGeoHash = geohash.toUpperCase(); | |
assert(normalizedGeoHash.length == 9); | |
final crcVal = Crc32Xz().convert(ascii.encode(normalizedGeoHash)).toBigInt().toInt(); | |
final lastCharacter = crcVal & 0x1F; | |
final secondLastCharacter = (crcVal >> 5) & 0x1F; | |
final thirdLastCharacter = tenthBitVersionZeroIdentifier | ((crcVal >> 10) & 0x07); // (1 to ensure letter, 0 version, next three bit for checksum) | |
return geoHashAlphabet[thirdLastCharacter] + geoHashAlphabet[secondLastCharacter] + geoHashAlphabet[lastCharacter]; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment