Created
August 26, 2016 16:19
-
-
Save facchinm/07db68e35e419bc5fef3a190ad6d7528 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
From 970583587fbb9860cef9c17829f26b4c5f6170f0 Mon Sep 17 00:00:00 2001 | |
From: Martino Facchin <[email protected]> | |
Date: Fri, 26 Aug 2016 18:08:52 +0200 | |
Subject: [PATCH] Treat cpp string as UTF8 | |
Solves https://github.com/arduino/Arduino/issues/5277 | |
--- | |
src/arduino.cc/builder/utils/utils.go | 118 ++++++++++++++++++++++++++++------ | |
1 file changed, 100 insertions(+), 18 deletions(-) | |
diff --git a/src/arduino.cc/builder/utils/utils.go b/src/arduino.cc/builder/utils/utils.go | |
index b57c268..d145a34 100644 | |
--- a/src/arduino.cc/builder/utils/utils.go | |
+++ b/src/arduino.cc/builder/utils/utils.go | |
@@ -34,14 +34,18 @@ import ( | |
"arduino.cc/builder/gohasissues" | |
"arduino.cc/builder/i18n" | |
"arduino.cc/builder/types" | |
+ "bytes" | |
"crypto/md5" | |
"encoding/hex" | |
+ "errors" | |
+ "fmt" | |
"io/ioutil" | |
"os" | |
"os/exec" | |
"path/filepath" | |
"runtime" | |
"strings" | |
+ "unicode/utf8" | |
) | |
func KeysOfMapOfStringInterface(input map[string]interface{}) []string { | |
@@ -435,30 +439,108 @@ func ParseCppString(line string) (string, string, bool) { | |
return "", line, false | |
} | |
+ s, err := consumeQuotedString(line) | |
+ if err == nil { | |
+ return s, "", true | |
+ } else { | |
+ return "", line, false | |
+ } | |
+} | |
+ | |
+// from package net/mail/message.go | |
+// consumeQuotedString parses the quoted string at the start of p. | |
+func consumeQuotedString(s string) (qs string, err error) { | |
+ // Assume first byte is '"'. | |
i := 1 | |
- res := "" | |
+ qsb := make([]rune, 0, 10) | |
+ | |
+ escaped := false | |
+ | |
+Loop: | |
for { | |
- if i >= len(line) { | |
- return "", line, false | |
- } | |
+ r, size := utf8.DecodeRuneInString(s[i:]) | |
- switch line[i] { | |
- // Backslash, next character is used unmodified | |
- case '\\': | |
- i++ | |
- if i >= len(line) { | |
- return "", line, false | |
+ switch { | |
+ case size == 0: | |
+ return "", errors.New("mail: unclosed quoted-string") | |
+ | |
+ case size == 1 && r == utf8.RuneError: | |
+ return "", fmt.Errorf("mail: invalid utf-8 in quoted-string: %q", s) | |
+ | |
+ case escaped: | |
+ // quoted-pair = ("\" (VCHAR / WSP)) | |
+ | |
+ if !isVchar(r) && !isWSP(r) { | |
+ return "", fmt.Errorf("mail: bad character in quoted-string: %q", r) | |
} | |
- res += string(line[i]) | |
- break | |
- // Quote, end of string | |
- case '"': | |
- return res, line[i+1:], true | |
+ | |
+ qsb = append(qsb, r) | |
+ escaped = false | |
+ | |
+ case isQtext(r) || isWSP(r): | |
+ // qtext (printable US-ASCII excluding " and \), or | |
+ // FWS (almost; we're ignoring CRLF) | |
+ qsb = append(qsb, r) | |
+ | |
+ case r == '"': | |
+ break Loop | |
+ | |
+ case r == '\\': | |
+ escaped = true | |
+ | |
default: | |
- res += string(line[i]) | |
- break | |
+ return "", fmt.Errorf("mail: bad character in quoted-string: %q", r) | |
+ | |
} | |
- i++ | |
+ i += size | |
+ } | |
+ s = s[i+1:] | |
+ if len(qsb) == 0 { | |
+ return "", errors.New("mail: empty quoted-string") | |
+ } | |
+ return string(qsb), nil | |
+} | |
+ | |
+// isQtext reports whether r is an RFC 5322 qtext character. | |
+func isQtext(r rune) bool { | |
+ // Printable US-ASCII, excluding backslash or quote. | |
+ if r == '\\' || r == '"' { | |
+ return false | |
+ } | |
+ return isVchar(r) | |
+} | |
+ | |
+// quoteString renders a string as an RFC 5322 quoted-string. | |
+func quoteString(s string) string { | |
+ var buf bytes.Buffer | |
+ buf.WriteByte('"') | |
+ for _, r := range s { | |
+ if isQtext(r) || isWSP(r) { | |
+ buf.WriteRune(r) | |
+ } else if isVchar(r) { | |
+ buf.WriteByte('\\') | |
+ buf.WriteRune(r) | |
+ } | |
} | |
+ buf.WriteByte('"') | |
+ return buf.String() | |
+} | |
+ | |
+// isVchar reports whether r is an RFC 5322 VCHAR character. | |
+func isVchar(r rune) bool { | |
+ // Visible (printing) characters. | |
+ return '!' <= r && r <= '~' || isMultibyte(r) | |
+} | |
+ | |
+// isMultibyte reports whether r is a multi-byte UTF-8 character | |
+// as supported by RFC 6532 | |
+func isMultibyte(r rune) bool { | |
+ return r >= utf8.RuneSelf | |
+} | |
+ | |
+// isWSP reports whether r is a WSP (white space). | |
+// WSP is a space or horizontal tab (RFC 5234 Appendix B). | |
+func isWSP(r rune) bool { | |
+ return r == ' ' || r == '\t' | |
} | |
-- | |
2.9.2 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment