-
-
Save regexident/19b066f86c701fae256600f4ae656934 to your computer and use it in GitHub Desktop.
Implementation of Paul Heckel's Diff Algorithm in Swift 3
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/// Implementation of Paul Heckel's Diff Algorithm | |
/// | |
/// > To compare two files, it is usually convenient to take a line as the | |
/// > basic unit, although other units are possible, such as word, sentence, | |
/// > paragraph, or character. We approach the problem by finding | |
/// similarities until only differences remain. We make two observations: | |
/// | |
/// Preliminary Observations | |
/// | |
/// 1. If a line occurs only once in each file, then it must be the same line, | |
/// although it may have been moved. | |
/// | |
/// We use this observation to locate unaltered lines that we subsequently | |
/// exclude from further treatment. | |
/// | |
/// 2. If a line has been found to be unaltered, and the lines immediately | |
/// adjacent to it in both files are identical, then these lines must be | |
/// the same line. This information can be used to find blocks of unchanged | |
/// lines. | |
/// | |
/// References: | |
/// | |
/// + https://gist.github.com/ndarville/3166060 (good breakdown) | |
/// + http://dl.acm.org/citation.cfm?id=359467 (paper) | |
/// | |
/// Similar to: | |
/// | |
/// + https://github.com/Instagram/IGListKit | |
class Diff { | |
private var oldReferences = [Reference]() | |
private var newReferences = [Reference]() | |
/// A reference to an entry in the array, can either be a pointer (a change) or an index (unchanged) | |
enum Reference { | |
/// - pointer: A pointer to a symbol in the list of entries. This is used to identify changes. | |
case pointer(Symbol) | |
/// - index: An index reference. This is used to signify things which have not changed. | |
case index(Int) | |
/// A reference symbol. | |
class Symbol { | |
/// The count of the symbol in the old array | |
var oldCount: Count = .zero | |
/// The count of the symbol in the new array | |
var newCount: Count = .zero | |
/// The reference indexes in the old array | |
var oldLineReferenceIndexes = [Int]() | |
/// true if the symbol is available in both of the arrays | |
var inBoth: Bool { | |
return !(oldCount == .zero || newCount == .zero) | |
} | |
} | |
/// An enum representing zero, one or many | |
enum Count { | |
/// - zero: equivalent to 0 | |
case zero | |
/// - one: equivalent to 1 | |
case one | |
/// - many: equivalent to anything larger than 1 | |
case many | |
/// Advance to to the next value. | |
/// | |
/// zero -> one | |
/// one -> many | |
mutating func next() { | |
switch self { | |
case .zero: | |
self = .one | |
case .one: | |
self = .many | |
case .many: | |
break | |
} | |
} | |
} | |
} | |
/// Process the old and new array to produce a list of diff steps. | |
/// | |
/// - Parameters: | |
/// - old: The array to compare. | |
/// - new: The array to compare against. | |
/// - Returns: A list of DiffStep operations to perform on the old array to get the new array. | |
func process<T: Diffable>(old: [T], new: [T]) -> [DiffStep<T>] { | |
setupContext(old: old, new: new) | |
/// Final pass | |
/// | |
/// one entry for each index in the new array containing either: | |
/// > a pointer to table[line] | |
/// > the entry index in the old array | |
var steps = [DiffStep<T>]() | |
var deleteOffsets = Array(repeating: 0, count: old.count) | |
var offset = 0 | |
for (index, item) in oldReferences.enumerated() { | |
deleteOffsets[index] = offset | |
if case .pointer(_) = item { | |
steps.append(.delete(index: index, value: old[index])) | |
offset += 1 | |
} | |
} | |
offset = 0 | |
for (index, item) in newReferences.enumerated() { | |
switch item { | |
case .pointer(_): | |
steps.append(.insert(index: index, value: new[index])) | |
offset += 1 | |
case .index(let oldIndex): | |
if old[oldIndex] != new[index] { | |
steps.append(.update(index: index, value: new[index])) | |
} | |
let deleteOffset = deleteOffsets[oldIndex] | |
if (oldIndex - deleteOffset + offset) != index { | |
steps.append(.move(from: oldIndex, to: index, value: new[index])) | |
} | |
} | |
} | |
return steps | |
} | |
/// Setup the context for the diffing operation | |
/// | |
/// This goes through the 5 passes of Paul Heckel's Diff Algorithm | |
/// | |
/// ## First Pass | |
/// | |
/// a. Each entry of array `new` is read in sequence | |
/// b. An entry for each is created in the table, if it doesn't already exist | |
/// c. `newCount` for the table entry is incremented | |
/// d. `new[i]` is set to point to the table entry of index i | |
/// | |
/// ## Second Pass | |
/// | |
/// a. Each entry of array `old` is read in sequence | |
/// b. An entry for each is created in the table, if it doesn't already exist | |
/// c. `oldCount` for the table entry is incremented | |
/// d. Add a reference index for the position of the entry in old | |
/// e. `old[i]` is set to point to the table entry of index i | |
/// | |
/// ## Third Pass | |
/// | |
/// a. We use Observation 1: | |
/// > If a entry occurs only once in each array, then it must be the same entry, although it may have been moved. | |
/// > We use this observation to locate unaltered entries that we subsequently exclude from further treatment. | |
/// | |
/// b. Using this, we only process the entries where `oldCount` == `newCount` == 1. | |
/// | |
/// c. As the entries between `old` and `new` "must be the same entry, although it may have been moved", we alter the table pointers to the number of the entry in the other array. | |
/// | |
/// d. We also locate unique virtual entries | |
/// - immediately before the first and | |
/// - immediately after the last | |
/// | |
/// ## Fourth Pass | |
/// | |
/// a. We use Observation 2: | |
/// > If a entry has been found to be unaltered, and the entries immediately adjacent to it in both arrays are identical, then these entries must be the same entry. | |
/// > This information can be used to find blocks of unchanged entries. | |
/// | |
/// b. Using this, we process each entry in ascending order. | |
/// | |
/// c. If | |
/// | |
/// - new[i] points to old[j], and | |
/// - new[i + 1] and old[j + 1] contain identical table entry pointers | |
/// **then** | |
/// - old[j + 1] is set to entry i + 1, and | |
/// - old[i + 1] is set to entry j + 1 | |
/// | |
/// ## Fifth Pass | |
/// | |
/// Similar to fourth pass, except: | |
/// | |
/// It processes each entry in descending order | |
/// It uses j - 1 and i - 1 instead of j + 1 and i + 1 | |
/// | |
/// - Parameters: | |
/// - old: The array to compare. | |
/// - new: The array to compare against. | |
private func setupContext<T: Diffable>(old: [T], new: [T]) { | |
/// First pass | |
newReferences = makeTableReferences(with: new, counter: { $0.newCount.next() }) | |
/// Second pass | |
oldReferences = makeTableReferences(with: old, updateLineReference: true, counter: { $0.oldCount.next() }) | |
/// If a line occurs only once in each file, then it must be the same line, although it may have been moved. | |
/// We use this observation to locate unaltered lines that we subsequently exclude from further treatment. | |
/// Third pass | |
findUniqueVirtualEntries() | |
/// If a line has been found to be unaltered, and the lines immediately adjacent to it in both files are identical, | |
/// then these lines must be the same line. This information can be used to find blocks of unchanged lines. | |
/// Fourth pass | |
expandUniqueEntries(direction: .ascending) | |
/// Fifth pass | |
expandUniqueEntries(direction: .descending) | |
} | |
private var table: [String: Reference.Symbol] = [:] | |
/// Generate table entries for the array, if updateLineReference is true the oldLineReferenceIndexes are also populated. | |
/// | |
/// First & Second pass | |
/// | |
/// - Parameters: | |
/// - array: The array calee to generate refernces for. | |
/// - updateLineReference: true if the oldLineReferenceIndexes should be updated | |
/// - counter: A function used to increase the counter for the reference symbol. | |
/// - Returns: A list of symbol references for array `array`. | |
private func makeTableReferences<T: Diffable>(with array: [T], updateLineReference: Bool = false, counter: (Reference.Symbol) -> Void) -> [Reference] { | |
var entries = [Reference]() | |
for (index, item) in array.enumerated() { | |
let entry = table[item.primaryKeyValue] ?? Reference.Symbol() | |
table[item.primaryKeyValue] = entry | |
counter(entry) | |
if updateLineReference { | |
entry.oldLineReferenceIndexes.append(index) | |
} | |
entries.append(.pointer(entry)) | |
} | |
return entries | |
} | |
/// Third pass | |
private func findUniqueVirtualEntries() { | |
for (index, item) in newReferences.enumerated() { | |
if case .pointer(let entry) = item, entry.inBoth { | |
guard entry.oldLineReferenceIndexes.count > 0 else { | |
continue | |
} | |
let oldIndex = entry.oldLineReferenceIndexes.removeFirst() | |
newReferences[index] = .index(oldIndex) | |
oldReferences[oldIndex] = .index(index) | |
} | |
} | |
} | |
/// An enumeration to specify the direction of the traversal of references. | |
enum ReferenceWalker { | |
/// - ascending: Walk the references in ascending order. | |
case ascending | |
/// - descending: Walk the references in decending order. | |
case descending | |
/// The starting value of the walk. | |
/// | |
/// - Parameter references: The references which are being walked. | |
/// - Returns: The start index. | |
func start(references: [Reference]) -> Int { | |
switch self { | |
case .ascending: | |
return 1 | |
case .descending: | |
return references.count - 1 | |
} | |
} | |
/// The step increase when walking references. | |
var step: Int { | |
switch self { | |
case .ascending: | |
return 1 | |
case .descending: | |
return -1 | |
} | |
} | |
/// Compare the index with the list of indexes to ensure it is valid. | |
/// | |
/// - Parameters: | |
/// - i: the index to validate | |
/// - references: The array of references, the count of these determines if the traversal is still valid. | |
/// - Returns: true if the traversal is still valid. | |
func isValid(i: Int, references: [Reference]) -> Bool { | |
switch self { | |
case .ascending: | |
return i < references.count - 1 | |
case .descending: | |
return i > 0 | |
} | |
} | |
/// Determine if the index is in range and can be continued. | |
/// | |
/// - Parameters: | |
/// - i: the index to validate | |
/// - references: The array of references, the count of these determines if the traversal is still valid. | |
/// - Returns: true if the index is in range. | |
func inRange(i: Int, references: [Reference]) -> Bool { | |
switch self { | |
case .ascending: | |
return i + step < references.count | |
case .descending: | |
return i + step >= 0 | |
} | |
} | |
} | |
/// Fourth & Fifth pass | |
/// | |
/// - Parameter direction: The direction to walk, ascending or descending | |
private func expandUniqueEntries(direction: ReferenceWalker) { | |
var i = direction.start(references: newReferences) | |
while direction.isValid(i: i, references: newReferences) { | |
if case .index(let j) = newReferences[i], direction.inRange(i: j, references: oldReferences) { | |
if case .pointer(let new) = newReferences[i + direction.step], case .pointer(let old) = oldReferences[j + direction.step], new === old { | |
newReferences[i + direction.step] = .index(j + direction.step) | |
oldReferences[j + direction.step] = .index(i + direction.step) | |
} | |
} | |
i += direction.step | |
} | |
} | |
} | |
/// A description of a step to apply to an array to be able to transform one into the other. | |
enum DiffStep<T: Diffable>: CustomDebugStringConvertible { | |
/// - insert: A insertation step. | |
case insert(index: Int, value: T) | |
/// - delete: A deletion step. | |
case delete(index: Int, value: T) | |
/// - move: A move step. | |
case move(from: Int, to: Int, value: T) | |
/// update: A update step. | |
case update(index: Int, value: T) | |
/// A debug string describing the diff step. | |
public var debugDescription: String { | |
switch self { | |
case .insert(let idx, let value): | |
return "+\(idx)@\(value)" | |
case .delete(let idx, let value): | |
return "-\(idx)@\(value)" | |
case .move(from: let from, to: let to, value: let value): | |
return "\(from)>\(to)@\(value)" | |
case .update(index: let idx, value: let value): | |
return "!\(idx)@\(value)" | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public protocol Diffable: Hashable { | |
var primaryKeyValue: String { get } | |
} | |
public struct AnyDiffable: Diffable { | |
private let _primaryKeyValue: () -> String | |
var base: AnyHashable | |
public init<D: Diffable>(_ base: D) { | |
self.base = base | |
_primaryKeyValue = { base.primaryKeyValue } | |
} | |
public static func == (x: AnyDiffable, y: AnyDiffable) -> Bool { | |
return x.base == y.base | |
} | |
public var hashValue: Int { | |
return base.hashValue | |
} | |
public var primaryKeyValue: String { | |
return _primaryKeyValue() | |
} | |
} | |
extension AnyDiffable { | |
/// Evaluates the given closure when this `AnyDiffable` instance is type `T`, | |
/// passing the unwrapped value as a parameter. | |
/// | |
/// Use the `map` method with a closure that returns a nonoptional value. | |
/// | |
/// - Parameter transform: A closure that takes the unwrapped value | |
/// of the instance. | |
/// - Returns: The result of the given closure. If this instance is not type T, | |
/// returns `self`. | |
func map<T: Diffable, U: Diffable>(_ transform: (T) throws -> U) rethrows -> AnyDiffable { | |
guard let object = base as? T else { | |
return self | |
} | |
return try AnyDiffable(transform(object)) | |
} | |
/// Evaluates the given closure when this `AnyDiffable` instance is not `nil`, | |
/// passing the unwrapped value as a parameter. | |
/// | |
/// Use the `flatMap` method with a closure that returns an optional value. | |
/// | |
/// - Parameter transform: A closure that takes the unwrapped value | |
/// of the instance. | |
/// - Returns: The result of the given closure. If this instance is `nil`, | |
/// returns `nil`. | |
func flatMap<T: Diffable, U: Diffable>(_ transform: (T) throws -> U?) rethrows -> AnyDiffable? { | |
guard let object = base as? T else { | |
return .none | |
} | |
switch try transform(object) { | |
case (let wrapped)? where wrapped is AnyDiffable: | |
return wrapped as? AnyDiffable | |
case (let wrapped)?: | |
return AnyDiffable(wrapped) | |
case .none: | |
return .none | |
} | |
} | |
/// Evaluates the given closure when this `AnyDiffable` instance is not `nil`, | |
/// passing the unwrapped value as a parameter. | |
/// | |
/// Use the `apply` method with an optional closure that returns a nonoptional value. | |
/// | |
/// - Parameter transform: A closure that takes the unwrapped value | |
/// of the instance. | |
/// - Returns: The result of the given closure. If the closure is `nil`, | |
/// returns `nil`. | |
func apply<T: Diffable, U: Diffable>(transform: ((T) throws -> U)?) throws -> AnyDiffable? { | |
return try transform.flatMap(map) | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
extension Array where Element: Diffable { | |
/// Calculate the changes between self and the `new` array. | |
/// | |
/// - Parameters: | |
/// - new: a collection to compare the calee to | |
/// - section: The section in which this diff should be applied to, this is used to create indexPath's. Default is 0 | |
/// - Returns: A tuple containing the changes. | |
public func diff(_ new: [Element], forSection section: Int = 0) -> (updates: [IndexPath], insertions: [IndexPath], deletions: [IndexPath], moves: [(IndexPath, IndexPath)]) { | |
let diff = Diff() | |
let result = diff.process(old: self, new: new) | |
var deletions = [IndexPath]() | |
var insertions = [IndexPath]() | |
var updates = [IndexPath]() | |
var moves = [(from: IndexPath, to: IndexPath)]() | |
for step in result { | |
switch step { | |
case .delete(let index, _): | |
deletions.append(IndexPath(row: index, section: section)) | |
case .insert(let index, _): | |
insertions.append(IndexPath(row: index, section: section)) | |
case .update(let index, _): | |
updates.append(IndexPath(row: index, section: section)) | |
case let .move(from, to, _): | |
moves.append((from: IndexPath(row: from, section: section), to: IndexPath(row: to, section: section))) | |
} | |
} | |
return (updates, insertions, deletions, moves) | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
extension UICollectionView { | |
func apply(updates: [IndexPath], deletions: [IndexPath], insertions: [IndexPath], moves: [(from: IndexPath, to: IndexPath)], completion: (() -> Void)?) { | |
let group = DispatchGroup() | |
group.enter() | |
performBatchUpdates({ | |
self.deleteItems(at: deletions) | |
self.insertItems(at: insertions) | |
for move in moves { | |
self.moveItem(at: move.from, to: move.to) | |
} | |
}, completion: { _ in | |
group.leave() | |
}) | |
group.enter() | |
performBatchUpdates({ | |
self.reloadItems(at: updates) | |
}, completion: { _ in | |
group.leave() | |
}) | |
group.notify(queue: .main, execute: { | |
completion?() | |
}) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment