jaredsinclair · December 18, 2017 05:21 · cedricbahirwe · Nov 3, 2020
diff --git a/NSString+HTML.swift b/NSString+HTML.swift
 /// Quick-n-dirty translation of MWFeedParser's algorithm from Objective-C to Swift
 /// seealso: https://github.com/mwaterfall/MWFeedParser/blob/master/Classes/NSString%2BHTML.m
 public extension NSString {
    
    public func byConvertingHTMLToPlainText() -> String {
        
        let stopCharacters = CharacterSet(charactersIn: "< \t\n\r\(0x0085)\(0x000C)\(0x2028)\(0x2029)")
        let newLineAndWhitespaceCharacters = CharacterSet(charactersIn: " \t\n\r\(0x0085)\(0x000C)\(0x2028)\(0x2029)")
        let tagNameCharacters = CharacterSet(charactersIn: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
        
        let result = NSMutableString(capacity: length)
        let scanner = Scanner(string: self as String)
        scanner.charactersToBeSkipped = nil
        scanner.caseSensitive = true
        var str: NSString? = nil
        var tagName: NSString? = nil
        var dontReplaceTagWithSpace = false
        
        repeat {
            // Scan up to the start of a tag or whitespace
            if scanner.scanUpToCharacters(from: stopCharacters, into: &str), let s = str {
                result.append(s as String)
                str = nil
            }
            // Check if we've stopped at a tag/comment or whitespace
            if scanner.scanString("<", into: nil) {
                // Stopped at a comment, script tag, or other tag
                if scanner.scanString("!--", into: nil) {
                    // Comment
                    scanner.scanUpTo("-->", into: nil)
                    scanner.scanString("-->", into: nil)
                } else if scanner.scanString("script", into: nil) {
                    // Script tag where things don't need escaping!
                    scanner.scanUpTo("</script>", into: nil)
                    scanner.scanString("</script>", into: nil)
                } else {
                    // Tag - remove and replace with space unless it's
                    // a closing inline tag then dont replace with a space
                    if scanner.scanString("/", into: nil) {
                        // Closing tag - replace with space unless it's inline
                        tagName = nil
                        dontReplaceTagWithSpace = false
                        if scanner.scanCharacters(from: tagNameCharacters, into: &tagName), let t = tagName {
                            tagName = t.lowercased as NSString
                            dontReplaceTagWithSpace =
                                tagName == "a" ||
                                tagName == "b" ||
                                tagName == "i" ||
                                tagName == "q" ||
                                tagName == "span" ||
                                tagName == "em" ||
                                tagName == "strong" ||
                                tagName == "cite" ||
                                tagName == "abbr" ||
                                tagName == "acronym" ||
                                tagName == "label"
                        }
                        // Replace tag with string unless it was an inline
                        if !dontReplaceTagWithSpace && result.length > 0 && !scanner.isAtEnd {
                            result.append(" ")
                        }
                    }
                    // Scan past tag
                    scanner.scanUpTo(">", into: nil)
                    scanner.scanString(">", into: nil)
                }
            } else {
                // Stopped at whitespace - replace all whitespace and newlines with a space
                if scanner.scanCharacters(from: newLineAndWhitespaceCharacters, into: nil) {
                    if result.length > 0 && !scanner.isAtEnd {
                        result.append(" ") // Dont append space to beginning or end of result
                    }
                }
            }
        } while !scanner.isAtEnd
        
        // Cleanup
        
        // Decode HTML entities and return (this isn't included in this gist, but is often important)
        // let retString = (result as String).stringByDecodingHTMLEntities
        
        // Return
        return result as String // retString;
    }
    
 }
	/// Quick-n-dirty translation of MWFeedParser's algorithm from Objective-C to Swift
	/// seealso: https://github.com/mwaterfall/MWFeedParser/blob/master/Classes/NSString%2BHTML.m
	public extension NSString {

	public func byConvertingHTMLToPlainText() -> String {

	let stopCharacters = CharacterSet(charactersIn: "< \t\n\r\(0x0085)\(0x000C)\(0x2028)\(0x2029)")
	let newLineAndWhitespaceCharacters = CharacterSet(charactersIn: " \t\n\r\(0x0085)\(0x000C)\(0x2028)\(0x2029)")
	let tagNameCharacters = CharacterSet(charactersIn: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")

	let result = NSMutableString(capacity: length)
	let scanner = Scanner(string: self as String)
	scanner.charactersToBeSkipped = nil
	scanner.caseSensitive = true
	var str: NSString? = nil
	var tagName: NSString? = nil
	var dontReplaceTagWithSpace = false

	repeat {
	// Scan up to the start of a tag or whitespace
	if scanner.scanUpToCharacters(from: stopCharacters, into: &str), let s = str {
	result.append(s as String)
	str = nil
	}
	// Check if we've stopped at a tag/comment or whitespace
	if scanner.scanString("<", into: nil) {
	// Stopped at a comment, script tag, or other tag
	if scanner.scanString("!--", into: nil) {
	// Comment
	scanner.scanUpTo("-->", into: nil)
	scanner.scanString("-->", into: nil)
	} else if scanner.scanString("script", into: nil) {
	// Script tag where things don't need escaping!
	scanner.scanUpTo("</script>", into: nil)
	scanner.scanString("</script>", into: nil)
	} else {
	// Tag - remove and replace with space unless it's
	// a closing inline tag then dont replace with a space
	if scanner.scanString("/", into: nil) {
	// Closing tag - replace with space unless it's inline
	tagName = nil
	dontReplaceTagWithSpace = false
	if scanner.scanCharacters(from: tagNameCharacters, into: &tagName), let t = tagName {
	tagName = t.lowercased as NSString
	dontReplaceTagWithSpace =
	tagName == "a" \|\|
	tagName == "b" \|\|
	tagName == "i" \|\|
	tagName == "q" \|\|
	tagName == "span" \|\|
	tagName == "em" \|\|
	tagName == "strong" \|\|
	tagName == "cite" \|\|
	tagName == "abbr" \|\|
	tagName == "acronym" \|\|
	tagName == "label"
	}
	// Replace tag with string unless it was an inline
	if !dontReplaceTagWithSpace && result.length > 0 && !scanner.isAtEnd {
	result.append(" ")
	}
	}
	// Scan past tag
	scanner.scanUpTo(">", into: nil)
	scanner.scanString(">", into: nil)
	}
	} else {
	// Stopped at whitespace - replace all whitespace and newlines with a space
	if scanner.scanCharacters(from: newLineAndWhitespaceCharacters, into: nil) {
	if result.length > 0 && !scanner.isAtEnd {
	result.append(" ") // Dont append space to beginning or end of result
	}
	}
	}
	} while !scanner.isAtEnd

	// Cleanup

	// Decode HTML entities and return (this isn't included in this gist, but is often important)
	// let retString = (result as String).stringByDecodingHTMLEntities

	// Return
	return result as String // retString;
	}

	}