-
-
Save erica/5e273e9521276a7af35d375bed7a4578 to your computer and use it in GitHub Desktop.
import Foundation | |
/// Provides NSRegularExpression pattern matching against strings | |
/// in `switch` expressions | |
/// | |
/// Regular expressions are expensive to construct. The built-in | |
/// class cache stores already-constructed pattern instances using | |
/// the pattern string (coerced to `NSString`) as its keys. Modify | |
/// matching options at the `match(_, options:)` call-site if needed. | |
/// | |
/// - Note: This type is implemented as a class as `NSCache` | |
/// is not compatible with Swift structures. Its keys, which | |
/// must also be `AnyObject`, are coerced from `String` to | |
/// `NSString`. | |
public class Regex { | |
/// Store (expensive) existing `Regex` instances | |
public static let regexCache = NSCache<NSString, Regex>() | |
public let regex: NSRegularExpression | |
public var options: NSRegularExpression.Options = [] | |
/// Initializes a `Regex` instance that defaults to | |
/// "no options". Update as needed for case or | |
/// diacritical insensitivity using publicly modifiable | |
/// `options` property. | |
/// | |
/// - parameter regexPattern: A regular expression pattern string | |
/// - parameter options: Regular expression matching options. (See `NSRegularExpression.Options`) | |
public init(_ regexPattern: String, options: NSRegularExpression.Options = []) { | |
self.options = options | |
// Fail loudly if valid regular expression cannot | |
// be constructed | |
self.regex = try! NSRegularExpression(pattern: regexPattern, options: options) | |
Regex.regexCache.setObject(self, forKey: regexPattern as NSString) | |
} | |
/// Create or retrieve a `Regex` instance from the the | |
/// class cache. The instance can than be used with `~=` to | |
/// match against a string. | |
/// | |
/// - parameter pattern: A regular expression pattern string. | |
/// - parameter options: Regular expression matching options. (See `NSRegularExpression.Options`). Defaults to `[]`. | |
/// | |
/// - Note: Existing options are re-used unless they are | |
/// overwritten by a non-empty set. To update options | |
/// to `[]`, access them directly from the cached version | |
/// outside the `match` function. | |
public static func match(_ pattern: String, options: NSRegularExpression.Options = []) -> Regex { | |
if let regex = Regex.regexCache.object(forKey: pattern as NSString) { | |
// print("Reusing") // uncomment to see cache operation | |
// Apply option-reuse policy | |
if options.rawValue != 0 { | |
regex.options = options | |
} | |
return regex | |
} else { | |
// print("Creating") // uncomment to see cache operation | |
let regex = Regex(pattern, options: options) | |
Regex.regexCache.setObject(regex, forKey: pattern as NSString) | |
return regex | |
} | |
} | |
/// Extends pattern matching to use the pattern and | |
/// options stored in the `Regex` matcher | |
public static func ~= ( | |
lhs: Regex, | |
rhs: String | |
) -> Bool { | |
let range = NSRange(location: 0, length: rhs.utf16.count) | |
if let _ = lhs.regex.firstMatch(in: rhs, range: range) { return true } | |
return false | |
} | |
} | |
// For example | |
let str = "Hello, playground" | |
str ~= "Hello" // false | |
str ~= "Hello, playground" // true | |
Regex.match("H.*o") ~= str // true | |
Regex.match("H.*o") ~= "Out of luck" // false | |
// matches | |
switch str { | |
case Regex.match("H.*o"): print("Hello to you!") | |
default: print("Nope") | |
} | |
// does not match | |
switch "Out of luck" { | |
case Regex.match("H.*o"): print("Hello to you!") | |
default: print("Nope") | |
} | |
let pattern = "sailor" | |
let _ = Regex(pattern, options: [.caseInsensitive]) | |
print(Regex.match(pattern) ~= "sailor") // true | |
print(Regex.match(pattern) ~= "Sailor") // true | |
print(Regex.match(pattern) ~= "SAILOR") // true | |
print(Regex.regexCache.object(forKey: pattern as NSString)?.options) | |
Regex.regexCache.object(forKey: pattern as NSString)?.options.remove(.caseInsensitive) | |
print(Regex.regexCache.object(forKey: pattern as NSString)?.options) | |
print(Regex.match(pattern) ~= "sailor") // true | |
print(Regex.match(pattern) ~= "Sailor") // false WRONG (says true) | |
print(Regex.match(pattern) ~= "SAILOR") // false WRONG (says true) | |
print(Regex.regexCache.object(forKey: pattern as NSString)?.options) |
NSRegular expression is "An immutable representation of a compiled regular expression…". Consequently, "options" is readonly.
If you want to match the same pattern with other options only, you will need to create a new regex. As AliSoftware commented, your code will work if you make the options part of the cache key:
init:
Regex.regexCache.setObject(self, forKey: "\(regexPattern)\(options.rawValue)" as NSString)
and in the match function:
if let regex = Regex.regexCache.object(forKey: "\(pattern)\(options.rawValue)" as NSString)
You'll have more objects in the cache, you won't get the decoupling you were after, but it will still be useful to have it.
I forked the gist, and refactored the code so that you can still decouple the matching options from the pattern in the way you were trying to: gist
There is no need for a local "options" storage, since the actual regular expression already provides for that. The refactored code recreates the stored regular expression when the matching options change. This entails that the cache provides no gain when the pattern stays the same, but the matching options change. A pattern is only stored once, with the last used matching options. This way the cache is useful, though not as useful as when incorporating the matching options into the cache key, on the other hand it will be a more compact cache. Whether this is preferable to having a separate cache for each pattern + options combination I guess depends on the use case.
The problem is that you don't include the options in your cache key.
When your retrieve a regex from the cache, that cached RegEx might have options already set from when it was first stored in your nscache.
But then your code change that retrieved regex's option only if the new options are not zero. So if the old cached RegEx was cached with non-zero options you don't change them and reuse the old options.