Skip to content

Instantly share code, notes, and snippets.

@aehlke
Last active June 14, 2025 23:58
Show Gist options
  • Save aehlke/48bc4dce84a4cf75da66be5a19b7613d to your computer and use it in GitHub Desktop.
Save aehlke/48bc4dce84a4cf75da66be5a19b7613d to your computer and use it in GitHub Desktop.
import Foundation
import LRUCache
@globalActor
fileprivate actor LRUFileCacheActor {
static var shared = LRUFileCacheActor()
}
#if DEBUG
fileprivate let debugBuildID = UUID()
#endif
public func stableHash(data: Data) -> UInt64 {
var result = UInt64 (5381)
let buf = [UInt8](data)
for b in buf {
result = 127 * (result & 0x00ffffffffffffff) + UInt64(b)
}
return result
}
/// Large objects get stored on disk in the cache directory that Apple manages, which doesn't need LRU management.
open class LRUFileCache<I: Encodable, O: Codable>: ObservableObject {
@Published public var cacheDirectory: URL
private let cache: LRUCache<String, Any?>
/// Maximum size (in bytes) for items kept in-memory. Larger items are disk-only.
private let memoryThreshold = 1_048_576 // 1 MB
/// Keys stored on disk but not loaded into the in-memory cache.
private var diskOnlyKeys: Set<String> = []
private var jsonEncoder: JSONEncoder {
let encoder = JSONEncoder()
encoder.outputFormatting = [.sortedKeys, .withoutEscapingSlashes]
return encoder
}
public init(namespace: String, version: Int? = nil, totalBytesLimit: Int = .max, countLimit: Int = .max) {
assert(!namespace.isEmpty, "LRUFileCache namespace must not be empty")
let fileManager = FileManager.default
let cacheRoot = fileManager.urls(for: .cachesDirectory, in: .userDomainMask).first!
let cacheDirectory = cacheRoot.appendingPathComponent("LRUFileCache").appendingPathComponent(namespace)
self.cacheDirectory = cacheDirectory
if !fileManager.fileExists(atPath: cacheDirectory.path) {
try? fileManager.createDirectory(at: cacheDirectory, withIntermediateDirectories: true)
}
cache = LRUCache(totalCostLimit: totalBytesLimit, countLimit: countLimit)
let versionFileURL = cacheRoot.appendingPathComponent("lru-cache-version-\(namespace).txt")
var versionString = version.map(String.init) ?? Bundle.main.versionString
#if DEBUG
versionString += debugBuildID.uuidString
#endif
if let versionData = try? Data(contentsOf: versionFileURL) {
if String(data: versionData, encoding: .utf8) != versionString {
removeAll()
try? fileManager.removeItem(at: cacheDirectory)
try? fileManager.createDirectory(at: cacheDirectory, withIntermediateDirectories: true)
}
} else {
try? fileManager.removeItem(at: cacheDirectory)
try? fileManager.createDirectory(at: cacheDirectory, withIntermediateDirectories: true)
}
try? versionString.data(using: .utf8)?.write(to: versionFileURL)
rebuild()
}
private func cacheKeyHash(_ key: I) -> String? {
guard let data = try? jsonEncoder.encode(key) else { return nil }
let hash = stableHash(data: data)
var hashData = withUnsafeBytes(of: hash) { Data($0) }
while hashData.first == 0 { hashData.removeFirst() }
let base64 = hashData.base64EncodedString()
var output = [UInt8]()
output.reserveCapacity(base64.utf8.count)
for c in base64.utf8 {
switch c {
case UInt8(ascii: "+"): output.append(UInt8(ascii: "-"))
case UInt8(ascii: "/"): output.append(UInt8(ascii: "_"))
case UInt8(ascii: "="): break
default: output.append(c)
}
}
return String(decoding: output, as: UTF8.self)
}
public func removeValue(forKey key: I) {
guard let keyHash = cacheKeyHash(key) else { return }
// debugPrint("# REMOVE", key, cacheDirectory.lastPathComponent)
cache.removeValue(forKey: keyHash)
let baseURL = cacheDirectory.appendingPathComponent(keyHash)
let fileManager = FileManager.default
if let files = try? fileManager.contentsOfDirectory(at: baseURL.deletingLastPathComponent(), includingPropertiesForKeys: nil) {
for file in files where file.deletingPathExtension().lastPathComponent == keyHash {
try? fileManager.removeItem(at: file)
}
}
diskOnlyKeys.remove(keyHash)
}
public func removeAll() {
// debugPrint("# REMOVE ALL", cacheDirectory.lastPathComponent)
cache.removeAllValues()
let fileManager = FileManager.default
if let files = try? fileManager.contentsOfDirectory(at: cacheDirectory, includingPropertiesForKeys: nil) {
for file in files where !file.lastPathComponent.hasPrefix(".") {
try? fileManager.removeItem(at: file)
}
}
diskOnlyKeys.removeAll()
}
public func hasKey(_ key: I) -> Bool {
guard let keyHash = cacheKeyHash(key) else {
return false
}
return diskOnlyKeys.contains(keyHash) || cache.hasKey(keyHash)
}
public func value(forKey key: I) -> O? {
guard let keyHash = cacheKeyHash(key) else {
// debugPrint("# no key hash", key, cacheDirectory.lastPathComponent)
return nil
}
// 1) Try in-memory cache
if let cached = cache.value(forKey: keyHash) as? O {
// debugPrint("# got cache", key, cacheDirectory.lastPathComponent)
return cached
}
// 2) If marked on disk, load from disk each time
guard diskOnlyKeys.contains(keyHash) else {
// debugPrint("# no disk", key, cacheDirectory.lastPathComponent)
return nil
}
let baseURL = cacheDirectory.appendingPathComponent(keyHash)
let exts = ["lz4", "json-lz4", "json", "raw", "nil"]
for ext in exts {
let fileURL = baseURL.appendingPathExtension(ext)
guard FileManager.default.fileExists(atPath: fileURL.path) else {
continue
}
// debugPrint("# got disk ext", key, fileURL.lastPathComponent, cacheDirectory.lastPathComponent)
do {
var data: Data?
switch ext {
case "nil":
// FIXME: Test this... must return "nil"ish
return nil
case "raw":
data = try Data(contentsOf: fileURL)
case "lz4":
let compressed = try Data(contentsOf: fileURL)
data = try (compressed as NSData).decompressed(using: .lz4) as Data
case "json":
data = try Data(contentsOf: fileURL)
// return try JSONDecoder().decode(O.self, from: jsonData)
case "json-lz4":
let compressed = try Data(contentsOf: fileURL)
data = try (compressed as NSData).decompressed(using: .lz4) as Data
// return try JSONDecoder().decode(O.self, from: decompressed)
default:
break
}
guard let data else {
return nil
}
if O.self == String.self {
return String(data: data, encoding: .utf8) as? O
} else if O.self == [UInt8].self {
return [UInt8](data) as? O
} else if O.self == Data.self {
return data as? O
} else {
return try JSONDecoder().decode(O.self, from: data)
}
} catch {
continue
}
}
return nil
}
public func setValue(_ value: O?, forKey key: I) {
// debugPrint("# setval ", key, value.debugDescription.prefix(300))
// debugPrint("# setval ", key, cacheDirectory.lastPathComponent)
guard let keyHash = cacheKeyHash(key) else { return }
var dataToStore: Data?
var encoding = ""
if let value = value {
do {
if let uint8Array = value as? [UInt8] {
let rawData = Data(uint8Array)
if rawData.count > 200_000 {
dataToStore = try (rawData as NSData).compressed(using: .lz4) as Data
encoding = "lz4"
} else {
dataToStore = rawData
encoding = "raw"
}
} else if let stringValue = value as? String {
if stringValue.utf16.count > 200_000 {
dataToStore = try (stringValue.data(using: .utf8)! as NSData)
.compressed(using: .lz4) as Data
encoding = "lz4"
} else {
dataToStore = stringValue.data(using: .utf8)
encoding = "raw"
}
} else if let dataValue = value as? Data {
if dataValue.count ?? 0 > 200_000 {
encoding = "lz4"
} else {
encoding = "raw"
}
} else {
dataToStore = try jsonEncoder.encode(value)
if let rawData = dataToStore, rawData.count ?? 0 > 200_000 {
dataToStore = try (rawData as NSData).compressed(using: .lz4) as Data
encoding = "json-lz4"
} else {
encoding = "json"
}
}
} catch {
print("Encoding error: \(error)")
}
} else {
encoding = "nil"
}
// DispatchQueue.main.async {
let dataSize = dataToStore?.count ?? 1
let isLarge = dataSize > memoryThreshold
if !isLarge {
// small enough: cache in memory
self.cache.setValue(value, forKey: keyHash, cost: dataSize)
diskOnlyKeys.remove(keyHash)
} else {
// too large: disk-only
diskOnlyKeys.insert(keyHash)
}
// }
let baseURL = cacheDirectory.appendingPathComponent(keyHash)
let fileURL = baseURL.appendingPathExtension(encoding)
if let data = dataToStore {
try? data.write(to: fileURL, options: .atomic)
} else {
FileManager.default.createFile(atPath: fileURL.path, contents: nil, attributes: nil)
}
}
private func rebuild() {
diskOnlyKeys.removeAll()
let fileManager = FileManager.default
if let contents = try? fileManager.contentsOfDirectory(at: cacheDirectory, includingPropertiesForKeys: nil, options: .skipsHiddenFiles) {
for item in contents {
let keyHash = item.deletingPathExtension().lastPathComponent
let ext = item.pathExtension
// Track this file on disk
// diskAllKeys.insert(keyHash)
// Determine size to decide memory vs disk-only
let attrs = try? FileManager.default.attributesOfItem(atPath: item.path)
let fileSize = (attrs?[.size] as? NSNumber)?.intValue ?? 0
var value: O?
if fileSize <= memoryThreshold {
if ext == "nil" {
value = nil
} else if ext == "json" {
if let data = try? Data(contentsOf: item),
let decoded = try? JSONDecoder().decode(O.self, from: data) {
value = decoded
}
} else if ext == "lz4" {
if let compressed = try? Data(contentsOf: item),
let decompressed = try? (compressed as NSData).decompressed(using: .lz4),
let decoded = try? JSONDecoder().decode(O.self, from: decompressed as Data) {
value = decoded
}
} else {
if O.self == String.self {
if let data = try? Data(contentsOf: item),
let string = String(data: data, encoding: .utf8) as? O {
value = string
}
} else if O.self == [UInt8].self {
if let data = try? Data(contentsOf: item) {
value = [UInt8](data) as? O
}
} else if let data = try? Data(contentsOf: item),
let decoded = try? JSONDecoder().decode(O.self, from: data) {
value = decoded
}
}
// TODO: Reuse data objects from above
let cost = (try? Data(contentsOf: item).count) ?? 1
// DispatchQueue.main.async {
self.cache.setValue(value, forKey: keyHash, cost: cost)
// }
diskOnlyKeys.remove(keyHash)
} else {
diskOnlyKeys.insert(keyHash)
}
}
// debugPrint("# FIN REBUILD", cacheDirectory, cache.allKeys)
}
}
// private func deleteOrphanFiles() throws {
// let fileManager = FileManager.default
// let existing = Set(cache.allKeys).union(diskOnlyKeys)
// debugPrint("# del", cacheDirectory, cache.allKeys)
// if let contents = try? fileManager.contentsOfDirectory(at: cacheDirectory, includingPropertiesForKeys: nil) {
// for file in contents where !file.lastPathComponent.hasPrefix(".") {
// let keyHash = file.deletingPathExtension().lastPathComponent
// if !existing.contains(keyHash) {
// try? fileManager.removeItem(at: file)
// diskOnlyKeys.remove(keyHash)
// }
// }
// }
// }
}
@aehlke
Copy link
Author

aehlke commented Jun 14, 2025

MIT license

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment