Created
October 26, 2022 16:13
-
-
Save unixdj/0f824fd82138b9b8ccce2993c6113b1a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
/* | |
[...] angles [...] are specified as | |
up to three numbers representing degrees, minutes and seconds. The | |
last number may be fractional. The numbers are either separated by | |
<,> (commas) or suffixed, in order: | |
degrees by <°> (degree sign), | |
minutes by <′> (prime) or <'> (apostrophe), | |
seconds by <″> (double prime) or <"> (quotation mark). | |
The last suffix may be omitted. | |
*/ | |
// angleStringOK returns ok if the string looks valid and suffixed if it | |
// contains <">, <'> or multibyte UTF-8 characters. | |
func angleStringOK(s string) (ok, suffixed bool) { | |
// The main goals of this function are: | |
// - to disallow strings with: | |
// - signs (<+> and <->), to avoid validating strings before | |
// passing them to strconv.ParseFloat | |
// - more than 2 commas, to avoid excessive allocation | |
// - to decide whether the numbers are separated by commas or | |
// suffixed with <°> etc. | |
// Additionally, it disallows: | |
// - illegal ASCII characters | |
// - more than 3 suffixes or multibyte characters | |
// - strings having both commas and multibyte characters | |
// The validation is preliminary, optimised for speed and very crude. | |
// In particular, it: | |
// - doesn't check which multibyte characters are present and counts | |
// any such character as a suffix | |
// - doesn't validate syntax (character sequences) | |
// - doesn't even validate UTF-8 (byte sequences and invalid bytes | |
// 0xc0, 0xc1, 0xfe, 0xff) | |
const ( | |
invalid = iota | |
valid | |
comma | |
suffix | |
tbl20 | |
) | |
var commas, suffixes byte | |
// map of 8 byte ranges (top 3 bits) | |
hi := [8]byte{ | |
0x20 >> 5: tbl20, // 0x20 - 0x3f: see table lo20 | |
0x80 >> 5: valid, // 0x80 - 0x9f: 10xxxxxx, UTF-8 continuation | |
0xa0 >> 5: valid, // 0xa0 - 0xbf: 10xxxxxx, UTF-8 continuation | |
0xc0 >> 5: suffix, // 0xc0 - 0xdf: 11xxxxxx, UTF-8 start | |
0xe0 >> 5: suffix, // 0xe0 - 0xff: 11xxxxxx, UTF-8 start | |
} | |
// map of valid characters in the 0x20 - 0x3f range (bottom 5 bits) | |
lo20 := [32]byte{ | |
'"' - 0x20: suffix, // 0x22: count suffixes | |
'\'' - 0x20: suffix, // 0x27: count suffixes | |
',' - 0x20: comma, // 0x2c: count commas | |
'.' - 0x20: valid, // 0x2e | |
'0' - 0x20: valid, // 0x30 | |
'1' - 0x20: valid, // 0x31 | |
'2' - 0x20: valid, // 0x32 | |
'3' - 0x20: valid, // 0x33 | |
'4' - 0x20: valid, // 0x34 | |
'5' - 0x20: valid, // 0x35 | |
'6' - 0x20: valid, // 0x36 | |
'7' - 0x20: valid, // 0x37 | |
'8' - 0x20: valid, // 0x38 | |
'9' - 0x20: valid, // 0x39 | |
} | |
for i := 0; i < len(s); i++ { | |
c := s[i] | |
act := hi[c>>5] | |
if act == tbl20 { | |
act = lo20[c&0x1f] | |
} | |
switch act { | |
case invalid: | |
return false, false | |
case valid: | |
case comma: | |
// up to 2 commas, no suffixes | |
if commas>>1|suffixes != 0 { | |
return false, false | |
} | |
commas++ | |
default: | |
// up to 3 suffixes, no commas | |
if suffixes++; suffixes>>2|commas != 0 { | |
return false, false | |
} | |
} | |
} | |
return true, suffixes != 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment