Created
February 29, 2020 23:43
-
-
Save CTMacUser/cbb2417297e297829c81bc4fd88365dd to your computer and use it in GitHub Desktop.
For Swift 5.1; wrapping types for sequences and collections that vend their source into fixed-sized chunks. See <https://forums.swift.org/t/yet-another-chunked-sequence-collection-idea/34198> for more information.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//===--- ChunkedCollection.swift ------------------------------*- swift -*-===// | |
// | |
// Created by Daryle Walker on 2020-Feb-29 | |
// | |
// Copyright © 2020 Daryle Walker | |
// | |
// A generic type that models a Collection that wraps another, vending fixed- | |
// sized sub-sequences (i.e. chunks) of the inner collection as the elements of | |
// the outer collection. There is an option whether or not to vend the last | |
// chunks if they occur late enough that they are shorter than expected. | |
// | |
//===----------------------------------------------------------------------===// | |
// MARK: Index for Chunking Collections | |
/// Index for elements of `ChunkedCollection`. | |
public struct ChunkedCollectionIndex<Base: Comparable>: Equatable { | |
/// The targeted range of elements from the base collection. Should be | |
/// empty for the past-the-end index. | |
@usableFromInline | |
let range: Range<Base> | |
/// Creates a ranged index from the given bounds. | |
@inlinable init(start: Base, end: Base) { range = start..<end } | |
} | |
extension ChunkedCollectionIndex: Comparable { | |
@inlinable | |
public static func < (lhs: Self, rhs: Self) -> Bool { | |
// Assume operands are valid for the same collection. | |
return lhs.range.lowerBound < rhs.range.lowerBound | |
} | |
} | |
extension ChunkedCollectionIndex: Hashable where Base: Hashable {} | |
extension ChunkedCollectionIndex: Decodable where Base: Decodable {} | |
extension ChunkedCollectionIndex: Encodable where Base: Encodable {} | |
// MARK: - Chunking Collection | |
/// Groups elements of a wrapped collection (generally) in fixed-size chunks. | |
/// | |
/// - Warning: | |
/// - If `Base` is a reference type, do not perform actions on the wrapped | |
/// collection that change its length or otherwise invalidate its indices | |
/// while this collection or any of its sub-sequences are in force. | |
/// - When `Base` does not conform to `RandomAccessCollection` because it | |
/// cannot provide constant-time index offsets, use of the `startIndex` and | |
/// `isEmpty` accessors will be O(*n*) instead of O(1), where *n* is the | |
/// length of `base`. | |
public struct ChunkedCollection<Base: Collection> { | |
/// The source for the elements within vended elements. | |
@usableFromInline | |
let base: Base | |
/// The number of inner elements to use per (unshortened) vended element. | |
let chunkLength: Int | |
/// The number of inner elements to skip between the end of a vended element | |
/// and the start of the next vended element. | |
let padding: Int | |
/// Whether to vend generated elements that are shorter than the desired | |
/// length. This can only happen at the end of the sequence. | |
let allowShorterChunks: Bool | |
/// Creates a collection that groups elements from the given collection into | |
/// chunks of the given length, with the given amount of padding between | |
/// chunks, and possibly vends any incomplete trailing chunks. | |
/// | |
/// If `padding` is negative, then consecutive vended elements will share | |
/// `abs(padding)` inner elements (*a.k.a.* overlap). | |
/// | |
/// - Precondition: | |
/// - `chunkLength > 0`. | |
/// - `padding > -chunkLength`. | |
/// - If `allowShorterChunks` is `true`, then `padding` shall not be | |
/// negative. | |
/// | |
/// - Parameters: | |
/// - base: The collection to extract inner elements from for each vended | |
/// element. | |
/// - chunkLength: The count of inner elements per full-length vended | |
/// element. | |
/// - padding: A number such that `chunkLength + padding` is the count of | |
/// inner elements between the start of some vended element and the | |
/// start of its succeeding vended element (if any). When | |
/// non-negative, it is the count of inner elements between the end of | |
/// some full-length vended element and its succeeding vended element | |
/// (if any). | |
/// - allowShorterChunks: Whether or not vend chunks that can be started | |
/// with at least one inner element, but `base` no longer has enough | |
/// elements to provide a count of `chunkLength`. | |
@usableFromInline | |
init(_ base: Base, chunkLength: Int, padding: Int, allowShorterChunks: Bool) { | |
precondition(chunkLength > 0) | |
precondition(padding > -chunkLength) | |
precondition(!allowShorterChunks || padding >= 0) | |
self.base = base | |
self.chunkLength = chunkLength | |
self.padding = padding | |
self.allowShorterChunks = allowShorterChunks | |
} | |
} | |
extension ChunkedCollection: Decodable where Base: Decodable {} | |
extension ChunkedCollection: Encodable where Base: Encodable {} | |
extension ChunkedCollection: Collection { | |
public typealias Index = ChunkedCollectionIndex<Base.Index> | |
public typealias Element = Base.SubSequence | |
public typealias SubSequence = ChunkedCollection<Base.SubSequence> | |
public var isEmpty: Bool { | |
base.isEmpty | |
|| !allowShorterChunks | |
&& base.index(base.startIndex, offsetBy: chunkLength, limitedBy: base.endIndex) == nil | |
} | |
public var startIndex: Index { | |
let start = base.startIndex, end = base.endIndex | |
guard let firstEnd = base.index(start, offsetBy: chunkLength, limitedBy: end) else { | |
if allowShorterChunks { | |
// This is the same as `endIndex` when `base` is empty. | |
return Index(start: start, end: end) | |
} else { | |
return endIndex | |
} | |
} | |
return Index(start: start, end: firstEnd) | |
} | |
@inlinable | |
public var endIndex: Index { | |
Index(start: base.endIndex, end: base.endIndex) | |
} | |
@inlinable | |
public subscript(position: Index) -> Element { | |
precondition(!position.range.isEmpty) | |
return base[position.range] | |
} | |
public subscript(bounds: Range<Index>) -> SubSequence { | |
let start = bounds.lowerBound.range.lowerBound | |
var stop = bounds.upperBound.range.lowerBound | |
if padding < 0, start < stop { | |
_ = base.formIndex(&stop, offsetBy: -padding, limitedBy: base.endIndex) | |
} | |
return SubSequence(base[start..<stop], chunkLength: chunkLength, padding: padding, allowShorterChunks: allowShorterChunks) | |
} | |
public func index(after i: Index) -> Index { | |
precondition(i < endIndex) | |
let startBase: Base.Index, startOffset, endOffset: Int | |
if padding >= 0 { | |
startBase = i.range.upperBound | |
startOffset = padding | |
endOffset = chunkLength | |
} else { | |
// Even if `Base` conformed from `BidirectionalCollection`, we would | |
// not be able to count backwards from `i.range.upperBound`, because | |
// that wouldn't work when there are multiple short chunks at the | |
// end. So we adjust the start point for offsetting so we're always | |
// going forward. | |
startBase = i.range.lowerBound | |
startOffset = chunkLength + padding | |
endOffset = startOffset | |
} | |
let end = base.endIndex | |
guard let nextStart = base.index(startBase, offsetBy: startOffset, limitedBy: end) else { | |
return endIndex | |
} | |
let endBase = padding >= 0 ? nextStart : i.range.upperBound | |
guard let nextEnd = base.index(endBase, offsetBy: endOffset, limitedBy: end) else { | |
// This still works when `nextStart` was at `base.endIndex`. | |
return Index(start: allowShorterChunks ? nextStart : end, end: end) | |
} | |
return Index(start: nextStart, end: nextEnd) | |
} | |
} | |
extension ChunkedCollection: BidirectionalCollection where Base: RandomAccessCollection { | |
/// Index of the last vended element that can be generated, even if its | |
/// chunk length is short, if possible. | |
var lastIndexEvenIfElementIsShort: Index? { | |
guard !base.isEmpty else { return nil } | |
guard case let (period, tooBig) = chunkLength.addingReportingOverflow(padding), !tooBig else { | |
// There's only at most one element. | |
let start = startIndex | |
return start < endIndex ? start : nil | |
} | |
let periodCount = (base.count - 1) / period | |
let lastStart = base.index(base.startIndex, offsetBy: periodCount * period) | |
let end = base.endIndex | |
return Index(start: lastStart, end: base.index(lastStart, offsetBy: chunkLength, limitedBy: end) ?? end) | |
} | |
/// Index of the last vended element that isn't a short chunk, if possible. | |
var lastIndexWithFullLengthElement: Index? { | |
guard let lastShort = lastIndexEvenIfElementIsShort else { return nil } | |
guard case let lastCount = base[lastShort.range].count, lastCount < chunkLength else { return lastShort } | |
guard case let (period, tooBig) = chunkLength.addingReportingOverflow(padding), !tooBig else { | |
// We already covered the only element in `lastShort`. | |
return nil | |
} | |
let (additionalPeriods, leftOvers) = (chunkLength - lastCount).quotientAndRemainder(dividingBy: period) | |
let backPeriodCount = additionalPeriods + leftOvers.signum() | |
guard let lastFullStart = base.index(lastShort.range.lowerBound, offsetBy: -backPeriodCount * period, limitedBy: base.startIndex) else { | |
return nil | |
} | |
return Index(start: lastFullStart, end: base.index(lastFullStart, offsetBy: chunkLength)) | |
} | |
public func index(before i: Index) -> Index { | |
if i.range.isEmpty { | |
assert(i == endIndex) | |
return (allowShorterChunks | |
? lastIndexEvenIfElementIsShort | |
: lastIndexWithFullLengthElement)! | |
} else { | |
// It's OK if `chunkLength + padding` overflows, because it would | |
// have done so when we're already at the first element. | |
let previousStart = base.index(i.range.lowerBound, offsetBy: -(chunkLength + padding)) | |
let end = base.endIndex | |
return Index(start: previousStart, end: base.index(previousStart, offsetBy: chunkLength, limitedBy: end) ?? end) | |
} | |
} | |
} | |
extension ChunkedCollection: RandomAccessCollection where Base: RandomAccessCollection { | |
public func distance(from start: Index, to end: Index) -> Int { | |
guard start != end else { return 0 } | |
guard start < end else { return -distance(from: end, to: start) } | |
guard end < endIndex else { return 1 + distance(from: start, to: index(before: end)) } | |
// The straight addition is OK because cases where it could overflow | |
// correspond to collection parameters that are already covered by the | |
// guards. | |
return base.distance(from: start.range.lowerBound, to: end.range.lowerBound) / (chunkLength + padding) | |
} | |
public func index(_ i: Index, offsetBy distance: Int) -> Index { | |
guard distance != 0 else { return i } | |
// If the collection is empty, we're already screwed if the distance is | |
// non-zero. | |
let lastIndex = index(before: endIndex) | |
guard i <= lastIndex else { return index(lastIndex, offsetBy: distance + 1) } | |
// The straight addition is OK because cases where it could overflow | |
// correspond to collection parameters that are already covered by the | |
// guards. | |
let period = chunkLength + padding | |
let newStart = base.index(i.range.lowerBound, offsetBy: distance * period) | |
let end = base.endIndex | |
if newStart < end, newStart > lastIndex.range.lowerBound { | |
assert(!allowShorterChunks) | |
return endIndex | |
} else { | |
return Index(start: newStart, end: base.index(newStart, offsetBy: chunkLength, limitedBy: end) ?? end) | |
} | |
} | |
} | |
// MARK: - Chunking Generator | |
extension Collection { | |
/// Divides the collection into sub-sequences with the given length and | |
/// spacing/overlap, possibly including suffixes that are shorter than the | |
/// target span. | |
/// | |
/// If `padding` is negative, then consecutive vended elements will share | |
/// `abs(padding)` inner elements (*a.k.a.* overlap). | |
/// | |
/// - Precondition: | |
/// - `chunkLength > 0`. | |
/// - `padding > -chunkLength`. | |
/// - If `allowShorterChunks` is `true`, then `padding` shall not be | |
/// negative. | |
/// | |
/// - Parameters: | |
/// - chunkLength: The number of inner elements each vended element | |
/// should have. | |
/// - allowShorterChunks: Whether or not to vend chunks when `self` can | |
/// still supply inner elements, but not enough to fill a chunk to | |
/// `chunkLength`. If not given, defaults to `false`, *i.e.* all | |
/// vended chunks will have a `count` of `chunkLength` but any trailing | |
/// inner elements will never be vended. | |
/// - padding: A number such that `chunkLength + padding` is the count of | |
/// inner elements between the start of some vended element and the | |
/// start of its succeeding vended element (if any). When | |
/// non-negative, it is the count of inner elements between the end of | |
/// some full-length vended element and its succeeding vended element | |
/// (if any). If not given, defaults to zero, *i.e.* consecutive | |
/// vended elements take up abutting ranges in the wrapped sequence. | |
/// - Returns: A chunk-partitioning collection. | |
@inlinable | |
public func chunks(of chunkLength: Int, butPossiblyShorter allowShorterChunks: Bool = false, withSkipCount padding: Int = 0) -> ChunkedCollection<Self> { | |
return ChunkedCollection(self, chunkLength: chunkLength, padding: padding, allowShorterChunks: allowShorterChunks) | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//===--- ChunkingSequence.swift -------------------------------*- swift -*-===// | |
// | |
// Created by Daryle Walker on 2020-Feb-29 | |
// | |
// Copyright © 2020 Daryle Walker | |
// | |
// A generic type that models a Sequence that wraps another, vending the inner | |
// sequence's elements as fixed-sized chunks of a given collection type. There | |
// is an option whether or not to vend the last chunks if they occur late enough | |
// that they are shorter than expected. | |
// | |
//===----------------------------------------------------------------------===// | |
// MARK: Chunking Iterator | |
/// Groups elements of a wrapped iterator (generally) in fixed-size chunks. | |
public struct ChunkingIterator<Element: RangeReplaceableCollection, Base: IteratorProtocol> where Element.Element == Base.Element { | |
/// The source for the elements within vended elements. | |
var base: Base | |
/// The number of inner elements to use per (unshortened) vended element. | |
let chunkLength: Int | |
/// The number of inner elements to skip between the end of a vended element | |
/// and the start of the next vended element. | |
let padding: Int | |
/// Whether to vend generated elements that are shorter than the desired | |
/// length. This can only happen at the end of the sequence. | |
let allowShorterChunks: Bool | |
/// The next element to be vended. | |
var upcoming = Element() | |
/// Whether or not to keep vending. | |
var isDone = false | |
/// Creates an iterator that groups elements from the given iterator into | |
/// chunks of the given length, with the given amount of padding between | |
/// chunks, and possibly vends any incomplete trailing chunks. | |
/// | |
/// If `padding` is negative, then consecutive vended elements will share | |
/// `abs(padding)` inner elements (*a.k.a.* overlap). | |
/// | |
/// - Precondition: | |
/// - `chunkLength > 0`. | |
/// - `padding > -chunkLength`. | |
/// | |
/// - Parameters: | |
/// - base: The iterator to extract inner elements from for each vended | |
/// element. | |
/// - chunkLength: The count of inner elements per full-length vended | |
/// element. | |
/// - padding: A number such that `chunkLength + padding` is the count of | |
/// inner elements between the start of some vended element and the | |
/// start of its succeeding vended element (if any). When | |
/// non-negative, it is the count of inner elements between the end of | |
/// some full-length vended element and its succeeding vended element | |
/// (if any). | |
/// - allowShorterChunks: Whether or not vend chunks that can be started | |
/// with at least one inner element, but `base` no longer has enough | |
/// elements to provide a count of `chunkLength`. | |
init(_ base: Base, chunkLength: Int, padding: Int, allowShorterChunks: Bool) { | |
precondition(chunkLength > 0) | |
precondition(padding > -chunkLength) | |
self.base = base | |
self.chunkLength = chunkLength | |
self.padding = padding | |
self.allowShorterChunks = allowShorterChunks | |
// Set up the first vended element. | |
upcoming.reserveCapacity(self.chunkLength) | |
for _ in 0..<self.chunkLength { | |
guard let inner = self.base.next() else { | |
if !self.allowShorterChunks { | |
isDone = true | |
} | |
break | |
} | |
upcoming.append(inner) | |
} | |
if !isDone { | |
isDone = upcoming.isEmpty | |
} | |
} | |
} | |
extension ChunkingIterator: IteratorProtocol { | |
public mutating func next() -> Element? { | |
guard !isDone else { return nil } | |
defer { | |
// Discard no-longer-needed inner elements, then skip padding. | |
let readCount: Int | |
if padding >= 0 { | |
readCount = chunkLength | |
upcoming.removeAll(keepingCapacity: true) | |
for _ in 0..<padding { | |
if base.next() == nil { | |
isDone = true | |
break | |
} | |
} | |
} else { | |
readCount = chunkLength + padding | |
// Can't use `upcoming.removeFirst` since it crashes if there | |
// aren't at least the given count of elements. | |
let (uStart, uEnd) = (upcoming.startIndex, upcoming.endIndex) | |
let removeIndex = upcoming.index(uStart, offsetBy: readCount, limitedBy: uEnd) ?? uEnd | |
upcoming.removeSubrange(..<removeIndex) | |
} | |
// Read more inner elements. | |
for _ in 0..<(isDone ? 0 : readCount) { | |
guard let inner = base.next() else { | |
if !self.allowShorterChunks { | |
isDone = true | |
} | |
break | |
} | |
upcoming.append(inner) | |
} | |
if !isDone { | |
isDone = upcoming.isEmpty | |
} | |
} | |
return upcoming | |
} | |
} | |
// MARK: - Chunking Sequence | |
/// Groups elements of a wrapped sequence (generally) in fixed-size chunks. | |
public struct ChunkingSequence<Element: RangeReplaceableCollection, Base: Sequence> where Element.Element == Base.Element { | |
/// The source for the elements within vended elements. | |
let base: Base | |
/// The number of inner elements to use per (unshortened) vended element. | |
let chunkLength: Int | |
/// The number of inner elements to skip between the end of a vended element | |
/// and the start of the next vended element. | |
let padding: Int | |
/// Whether to vend generated elements that are shorter than the desired | |
/// length. This can only happen at the end of the sequence. | |
let allowShorterChunks: Bool | |
/// Creates a sequence that groups elements from the given sequence into | |
/// chunks of the given length, with the given amount of padding between | |
/// chunks, and possibly vends any incomplete trailing chunks. | |
/// | |
/// If `padding` is negative, then consecutive vended elements will share | |
/// `abs(padding)` inner elements (*a.k.a.* overlap). | |
/// | |
/// - Precondition: | |
/// - `chunkLength > 0`. | |
/// - `padding > -chunkLength`. | |
/// | |
/// - Parameters: | |
/// - base: The sequence to extract inner elements from for each vended | |
/// element. | |
/// - chunkLength: The count of inner elements per full-length vended | |
/// element. | |
/// - padding: A number such that `chunkLength + padding` is the count of | |
/// inner elements between the start of some vended element and the | |
/// start of its succeeding vended element (if any). When | |
/// non-negative, it is the count of inner elements between the end of | |
/// some full-length vended element and its succeeding vended element | |
/// (if any). | |
/// - allowShorterChunks: Whether or not vend chunks that can be started | |
/// with at least one inner element, but `base` no longer has enough | |
/// elements to provide a count of `chunkLength`. | |
@usableFromInline | |
init(_ base: Base, chunkLength: Int, padding: Int, allowShorterChunks: Bool) { | |
precondition(chunkLength > 0) | |
precondition(padding > -chunkLength) | |
self.base = base | |
self.chunkLength = chunkLength | |
self.padding = padding | |
self.allowShorterChunks = allowShorterChunks | |
} | |
} | |
extension ChunkingSequence: Sequence { | |
public var underestimatedCount: Int { | |
guard case let buc = base.underestimatedCount, buc > 0 else { return 0 } | |
guard case let (period, tooBig) = chunkLength.addingReportingOverflow(padding), !tooBig else { | |
return buc >= chunkLength || allowShorterChunks ? 1 : 0 | |
} | |
if padding >= 0 { | |
let (fullPeriodCount, remainderCount) = buc.quotientAndRemainder(dividingBy: period) | |
return fullPeriodCount + (remainderCount >= chunkLength ? 1 : 0) | |
} else { | |
let lastOffset = buc - (allowShorterChunks ? 1 : chunkLength) | |
guard lastOffset >= 0 else { return 0 } | |
return lastOffset / period + 1 | |
} | |
} | |
public __consuming func makeIterator() -> ChunkingIterator<Element, Base.Iterator> { | |
return ChunkingIterator(base.makeIterator(), chunkLength: chunkLength, padding: padding, allowShorterChunks: allowShorterChunks) | |
} | |
} | |
extension ChunkingSequence: Decodable where Base: Decodable {} | |
extension ChunkingSequence: Encodable where Base: Encodable {} | |
// MARK: - Chunking Generators | |
extension Sequence { | |
/// Divides the sequence into collections of the given type with the given | |
/// length and spacing/overlap, possibly including suffixes that are shorter | |
/// than the target span. | |
/// | |
/// If `padding` is negative, then consecutive vended elements will share | |
/// `abs(padding)` inner elements (*a.k.a.* overlap). | |
/// | |
/// - Precondition: | |
/// - `chunkLength > 0`. | |
/// - `padding > -chunkLength`. | |
/// | |
/// - Parameters: | |
/// - chunkLength: The number of inner elements each vended element | |
/// should have. | |
/// - type: A metatype specifier for the vended element type. | |
/// - allowShorterChunks: Whether or not to vend chunks when `self` can | |
/// still supply inner elements, but not enough to fill a chunk to | |
/// `chunkLength`. If not given, defaults to `false`, *i.e.* all | |
/// vended chunks will have a `count` of `chunkLength` but any trailing | |
/// inner elements will never be vended. | |
/// - padding: A number such that `chunkLength + padding` is the count of | |
/// inner elements between the start of some vended element and the | |
/// start of its succeeding vended element (if any). When | |
/// non-negative, it is the count of inner elements between the end of | |
/// some full-length vended element and its succeeding vended element | |
/// (if any). If not given, defaults to zero, *i.e.* consecutive | |
/// vended elements take up abutting ranges in the wrapped sequence. | |
/// - Returns: A chunk-generating sequence. | |
@inlinable | |
public func chunks<T>(of chunkLength: Int, eachAs type: T.Type, butPossiblyShorter allowShorterChunks: Bool = false, withSkipCount padding: Int = 0) -> ChunkingSequence<T, Self> where T: RangeReplaceableCollection, T.Element == Element { | |
return ChunkingSequence(self, chunkLength: chunkLength, padding: padding, allowShorterChunks: allowShorterChunks) | |
} | |
/// Divides the sequence into arrays with the given length and | |
/// spacing/overlap, possibly including suffixes that are shorter than the | |
/// target span. | |
/// | |
/// If `padding` is negative, then consecutive vended elements will share | |
/// `abs(padding)` inner elements (*a.k.a.* overlap). | |
/// | |
/// - Precondition: | |
/// - `chunkLength > 0`. | |
/// - `padding > -chunkLength`. | |
/// | |
/// - Parameters: | |
/// - chunkLength: The number of inner elements each vended element | |
/// should have. | |
/// - allowShorterChunks: Whether or not to vend chunks when `self` can | |
/// still supply inner elements, but not enough to fill a chunk to | |
/// `chunkLength`. If not given, defaults to `false`, *i.e.* all | |
/// vended chunks will have a `count` of `chunkLength` but any trailing | |
/// inner elements will never be vended. | |
/// - padding: A number such that `chunkLength + padding` is the count of | |
/// inner elements between the start of some vended element and the | |
/// start of its succeeding vended element (if any). When | |
/// non-negative, it is the count of inner elements between the end of | |
/// some full-length vended element and its succeeding vended element | |
/// (if any). If not given, defaults to zero, *i.e.* consecutive | |
/// vended elements take up abutting ranges in the wrapped sequence. | |
/// - Returns: A chunk-generating sequence. | |
@inlinable | |
public func chunks(of chunkLength: Int, butPossiblyShorter allowShorterChunks: Bool = false, withSkipCount padding: Int = 0) -> ChunkingSequence<[Element], Self> { | |
return chunks(of: chunkLength, eachAs: Array.self, butPossiblyShorter: allowShorterChunks, withSkipCount: padding) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment