Skip to content

Instantly share code, notes, and snippets.

@ashleymills
Last active August 29, 2015 14:14
Show Gist options
  • Save ashleymills/549ab8aff05ec90f4350 to your computer and use it in GitHub Desktop.
Save ashleymills/549ab8aff05ec90f4350 to your computer and use it in GitHub Desktop.
Fetch all lower case English language words from Wiktionary
import Foundation
extension String {
func stringsBetween(fromTag: String, and toTag: String) -> [String]? {
let fromTagLen = countElements(fromTag)
let toTagLen = countElements(toTag)
var strings: [String]?
var startIndex = rangeOfString(fromTag, options: nil, range: rangeOfString(self))?.startIndex
while startIndex != nil {
for i in 0..<fromTagLen {
startIndex = startIndex!.successor()
}
var testRange = Range(start: startIndex!, end: self.endIndex)
var endIndex = rangeOfString(toTag, options: nil, range: testRange)?.startIndex
if endIndex != nil {
let string = substringWithRange(Range(start: startIndex!, end: endIndex!))
if strings == nil {
strings = [string]
} else {
strings?.append(string)
}
for i in 0..<toTagLen {
endIndex = endIndex!.successor()
}
testRange = Range(start: endIndex!, end: self.endIndex)
} else {
return strings
}
startIndex = rangeOfString(fromTag, options: nil, range: testRange)?.startIndex
}
return strings
}
func isValidDictionaryString() -> Bool {
let nonLower = NSCharacterSet.lowercaseLetterCharacterSet().invertedSet
return rangeOfCharacterFromSet(nonLower) == nil
}
}
let baseURL = NSURL(string: "http://en.wiktionary.org/wiki/Index:English/")
let letters = Array("abcdefghijklmnopqrstuvwxyz")
let numbers = Array("12")
var error: NSError?
for letter in letters {
for number in numbers {
if let URL = baseURL?.URLByAppendingPathComponent(String(letter) + String(number)) {
if let response = String(contentsOfURL: URL, encoding: NSUTF8StringEncoding, error: &error) {
if let olStrings = response.stringsBetween("<ol>", and: "</ol>") {
for olString in olStrings {
if let liStrings = olString.stringsBetween("<li>", and: "</li>") {
for liString in liStrings {
if let wordStrings = liString.stringsBetween("<a href=\"/wiki/", and: "\" title") {
for wordString in wordStrings {
if wordString.isValidDictionaryString() {
println("\(wordString)")
}
}
}
}
}
}
}
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment