Created
November 28, 2023 21:32
-
-
Save davidbalbert/fc107e91d507cd0836bb02bcc936f2b3 to your computer and use it in GitHub Desktop.
Original fixup(withPrevious: Chunk)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
extension Chunk { | |
mutating func fixup(withPrevious prev: Chunk) -> Bool { | |
var i = string.startIndex | |
var first: String.Index? | |
var old = startBreakState | |
var new = prev.endBreakState | |
startBreakState = new | |
while i < string.unicodeScalars.endIndex { | |
let scalar = string.unicodeScalars[i] | |
let a = old.hasBreak(before: scalar) | |
let b = new.hasBreak(before: scalar) | |
if b { | |
first = first ?? i | |
} | |
if a && b { | |
// Found the same break. We're done | |
break | |
} else if !a && !b && old == new { | |
// GraphemeBreakers are in the same state. We're done. | |
break | |
} | |
i = string.unicodeScalars.index(after: i) | |
} | |
if let first { | |
// We found a new first break | |
prefixCount = string.utf8.distance(from: string.startIndex, to: first) | |
} else if i >= lastBreak { | |
// We made it up through lastBreak without finding any breaks | |
// and now we're in sync. We know there are no more breaks | |
// ahead of us, which means there are no breaks in the chunk. | |
// N.b. there is a special case where lastBreak < firstBreak – | |
// when there were no breaks in the chunk previously. In that | |
// case lastBreak == startIndex and firstBreak == endIndex. | |
// But this code works for that situation too. If there were no | |
// breaks in the chunk previously, and we get in sync anywhere | |
// in the chunk without finding a break, we know there are still | |
// no breaks in the chunk, so this code is a no-op. | |
prefixCount = string.utf8.count | |
} else if i >= firstBreak { | |
// We made it up through firstBreak without finding any breaks | |
// but we got in sync before lastBreak. Find a new firstBreak: | |
let j = string.unicodeScalars.index(after: i) | |
var tmp = new | |
let first = tmp.firstBreak(in: string[j...])!.lowerBound | |
prefixCount = string.utf8.distance(from: string.startIndex, to: first) | |
// If this is false, there's a bug in the code, or my assumptions are wrong. | |
assert(firstBreak <= lastBreak) | |
} | |
// There's an implicit else clause to the above– we're in sync, and we | |
// didn't even get to the old firstBreak. This means the breaks didn't | |
// change at all. | |
// We got to the end, either because we're not in sync yet, or because we got | |
// in sync at right at the end of the chunk. Save the break state. | |
if i == string.endIndex { | |
endBreakState = new | |
} | |
// We're done if we synced up before the end of the chunk. | |
return i < string.endIndex | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment