Last active
July 7, 2018 10:11
-
-
Save acerosalazar/5749f338a7daf903db7266f146768b62 to your computer and use it in GitHub Desktop.
A bidirectional, memory efficient file reader. Good for uses cases where only a portion of the file needs to be read – e.g read last 10 lines of the file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Foundation | |
// MARK: - | |
class FileReader: Sequence, IteratorProtocol { | |
// MARK: - Private Members | |
private let file: URL | |
private let delimiter: Data | |
private let chunkSize: Int | |
private let maxChunks: Int | |
private let readBackwards: Bool | |
private let encoding: String.Encoding | |
private let fileHandle: FileHandle | |
private var buffer: Data | |
// MARK: - Initializers | |
init(file: URL, | |
delimiter: String = "\n", | |
encoding: String.Encoding = .utf8, | |
chunkSize: Int = 4_096, | |
maxChunks: Int = 2, | |
fromEndOfFile: Bool = false) throws { | |
self.file = file | |
self.delimiter = delimiter.data(using: .utf8)! | |
self.encoding = encoding | |
self.chunkSize = chunkSize | |
self.maxChunks = maxChunks | |
self.readBackwards = fromEndOfFile | |
self.buffer = Data() | |
self.fileHandle = try FileHandle(forReadingFrom: file) | |
self.fileHandle.seek(toFileOffset: readBackwards ? .eof : .bof) | |
} | |
// MARK: - IteratorProtocol | |
func next() -> String? { | |
var nextLine: String? | |
(buffer, nextLine) = { () -> (Data, String?) in | |
for attempts in 0..<(maxChunks + 1) { | |
if let range = buffer.range(of: delimiter, options: readBackwards ? .backwards : []) { | |
let (lhs, rhs) = buffer.split(byRange: range) | |
return readBackwards ? (lhs, rhs.string()) : (rhs, lhs.string()) | |
} else if attempts < maxChunks { | |
loadBuffer() | |
guard buffer.count > 0 else { return (Data(), nil) } | |
} else { | |
return (Data(), buffer.string(encoding: encoding)) | |
} | |
} | |
return (Data(), nil) | |
}() | |
return nextLine | |
} | |
// MARK: - Private API | |
private func loadBuffer() { | |
buffer = { () -> Data in | |
if readBackwards { | |
guard fileHandle.offsetInFile > 0 else { return buffer } | |
let offset = UInt64(Swift.max(0, Int(fileHandle.offsetInFile) - chunkSize)) | |
let length = Int(fileHandle.offsetInFile - offset) | |
return fileHandle.readData(fromOffset: offset, ofLength: length, withRewind: true) + buffer | |
} else { | |
let offset = fileHandle.offsetInFile | |
let length = chunkSize | |
return buffer + fileHandle.readData(fromOffset: offset, ofLength: length, withRewind: false) | |
} | |
}() | |
} | |
} | |
// MARK: - | |
extension Data { | |
func split(byRange range: Range<Data.Index>) -> (lhs: Data, rhs: Data) { | |
let lhs = prefix(upTo: range.lowerBound) | |
let rhs = suffix(from: range.upperBound) | |
return (lhs, rhs) | |
} | |
func string(encoding: String.Encoding = .utf8) -> String? { | |
return String(data: self, encoding: encoding) | |
} | |
} | |
// MARK: - | |
extension FileHandle { | |
enum FileOffset { | |
case eof | |
case bof | |
} | |
func seek(toFileOffset offset: FileOffset) { | |
switch offset { | |
case .eof: | |
seekToEndOfFile() | |
case .bof: | |
seek(toFileOffset: 0) | |
} | |
} | |
func readData(fromOffset offset: UInt64, ofLength length: Int, withRewind rewind: Bool) -> Data { | |
seek(toFileOffset: offset) | |
let data = readData(ofLength: length) | |
if rewind { seek(toFileOffset: offset) } | |
return data | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment