Last active
June 23, 2024 22:49
-
-
Save sooop/a2b110f8eebdf904d0664ed171bcd7a2 to your computer and use it in GitHub Desktop.
Read a large text file line by line - Swift 3
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Foundation | |
class StreamReader { | |
let encoding: String.Encoding | |
let chunkSize: Int | |
let fileHandle: FileHandle | |
var buffer: Data | |
let delimPattern : Data | |
var isAtEOF: Bool = false | |
init?(url: URL, delimeter: String = "\n", encoding: String.Encoding = .utf8, chunkSize: Int = 4096) | |
{ | |
guard let fileHandle = try? FileHandle(forReadingFrom: url) else { return nil } | |
self.fileHandle = fileHandle | |
self.chunkSize = chunkSize | |
self.encoding = encoding | |
buffer = Data(capacity: chunkSize) | |
delimPattern = delimeter.data(using: .utf8)! | |
} | |
deinit { | |
fileHandle.closeFile() | |
} | |
func rewind() { | |
fileHandle.seek(toFileOffset: 0) | |
buffer.removeAll(keepingCapacity: true) | |
isAtEOF = false | |
} | |
func nextLine() -> String? { | |
if isAtEOF { return nil } | |
repeat { | |
if let range = buffer.range(of: delimPattern, options: [], in: buffer.startIndex..<buffer.endIndex) { | |
let subData = buffer.subdata(in: buffer.startIndex..<range.lowerBound) | |
let line = String(data: subData, encoding: encoding) | |
buffer.replaceSubrange(buffer.startIndex..<range.upperBound, with: []) | |
return line | |
} else { | |
let tempData = fileHandle.readData(ofLength: chunkSize) | |
if tempData.count == 0 { | |
isAtEOF = true | |
return (buffer.count > 0) ? String(data: buffer, encoding: encoding) : nil | |
} | |
buffer.append(tempData) | |
} | |
} while true | |
} | |
} |
Thank you for your code.
I modified it to extract chunks of data in between 2 search strings, and store that chunks to some String array.
Its working fine for me.
hiteshjain4/StreamReader.swift
For the newline part, maybe another option would be CharacterSet.newlines
https://developer.apple.com/documentation/foundation/characterset/1780325-newlines
For the newline part, maybe another option would be
CharacterSet.newlines
https://developer.apple.com/documentation/foundation/characterset/1780325-newlines
You might have to encode the buffer into a string before searching. Not impossible, but would make it a bit more cumbersome; and a bit slower.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@oprisk: Some even use CR/LF, not just CR or LF. :-)