Skip to content

Instantly share code, notes, and snippets.

@sooop
Last active June 23, 2024 22:49
Show Gist options
  • Save sooop/a2b110f8eebdf904d0664ed171bcd7a2 to your computer and use it in GitHub Desktop.
Save sooop/a2b110f8eebdf904d0664ed171bcd7a2 to your computer and use it in GitHub Desktop.
Read a large text file line by line - Swift 3
import Foundation
class StreamReader {
let encoding: String.Encoding
let chunkSize: Int
let fileHandle: FileHandle
var buffer: Data
let delimPattern : Data
var isAtEOF: Bool = false
init?(url: URL, delimeter: String = "\n", encoding: String.Encoding = .utf8, chunkSize: Int = 4096)
{
guard let fileHandle = try? FileHandle(forReadingFrom: url) else { return nil }
self.fileHandle = fileHandle
self.chunkSize = chunkSize
self.encoding = encoding
buffer = Data(capacity: chunkSize)
delimPattern = delimeter.data(using: .utf8)!
}
deinit {
fileHandle.closeFile()
}
func rewind() {
fileHandle.seek(toFileOffset: 0)
buffer.removeAll(keepingCapacity: true)
isAtEOF = false
}
func nextLine() -> String? {
if isAtEOF { return nil }
repeat {
if let range = buffer.range(of: delimPattern, options: [], in: buffer.startIndex..<buffer.endIndex) {
let subData = buffer.subdata(in: buffer.startIndex..<range.lowerBound)
let line = String(data: subData, encoding: encoding)
buffer.replaceSubrange(buffer.startIndex..<range.upperBound, with: [])
return line
} else {
let tempData = fileHandle.readData(ofLength: chunkSize)
if tempData.count == 0 {
isAtEOF = true
return (buffer.count > 0) ? String(data: buffer, encoding: encoding) : nil
}
buffer.append(tempData)
}
} while true
}
}
@MarkusBux
Copy link

For the newline part, maybe another option would be CharacterSet.newlines
https://developer.apple.com/documentation/foundation/characterset/1780325-newlines

@kevinbhayes
Copy link

kevinbhayes commented Sep 30, 2021

For the newline part, maybe another option would be CharacterSet.newlines
https://developer.apple.com/documentation/foundation/characterset/1780325-newlines

You might have to encode the buffer into a string before searching. Not impossible, but would make it a bit more cumbersome; and a bit slower.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment