Created
April 4, 2024 18:40
-
-
Save ole/50c299fba72bf6a0ed374a9f1d11ff0f to your computer and use it in GitHub Desktop.
Mutating a Substring apparently makes a copy of the entire original String (in some scenarios)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Make sure both the string and the substring are larger than 15 UTF-8 bytes | |
// to avoid the small string optimization | |
var str = "Hello world 1 Hello world 2 Hello world 3 Hello world 4 Hello world 5 Hello world 6 Hello world 7 Hello world 8 Hello world 9" | |
let prefixToStrip = 14 | |
var substr = str.dropFirst(prefixToStrip).prefix(27) | |
let strToAppend = "+++APPENDED+++" | |
// ⚠️ It makes a difference how you mutate the Substring: | |
// - substr.append → Apparently makes a copy of the entire original string | |
// *and* even shifts the original string contents back to make room, | |
// i.e. making append an O(n) operation where `n == str.count` (bad). | |
// - substr.append(contentsOf:) → Apparently only copies the substring part (good) | |
for char in strToAppend { | |
substr.append(char) | |
} | |
//substr.append(contentsOf: strToAppend) | |
print("Substring") | |
print("---------") | |
print("contents:", substr) | |
print("utf8.count:", substr.utf8.count) | |
print("isContiguousUTF8:", substr.isContiguousUTF8) | |
substr.withUTF8 { utf8Buffer in | |
print("utf8Buffer.baseAddress:", utf8Buffer.baseAddress!) | |
// Unsafe!!! Assume that the Substring copied the entire original collection | |
// on mutation, so we can get back the full buffer by extending the pointer | |
// out of bounds. | |
let fullBuffer = UnsafeBufferPointer( | |
start: utf8Buffer.baseAddress! - prefixToStrip, | |
count: str.utf8.count + strToAppend.utf8.count | |
) | |
let newString = String(decoding: fullBuffer, as: UTF8.self) | |
print(newString) | |
} | |
print("") | |
print("Original string") | |
print("---------") | |
print("isContiguousUTF8:", str.isContiguousUTF8) | |
str.withUTF8 { utf8Buffer in | |
print("utf8Buffer.baseAddress:", utf8Buffer.baseAddress!) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Typical output for me with the
str.append(char)
loop (note that thefullBuffer
is printed correctly, indicating that the "real" buffer is larger than the segment we can access through the mutated Substring):Typical output for me with
str.append(contentsOf:)
(note that the full buffer prints garbage, indicating, that we're going past the bounds of the buffer):