Last active
February 21, 2018 22:45
-
-
Save leighmcculloch/1202238 to your computer and use it in GitHub Desktop.
NSString + Strip HTML
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// NSString_stripHtml.h | |
// Copyright 2011 Leigh McCulloch. Released under the MIT license. | |
#import <Foundation/Foundation.h> | |
@interface NSString (stripHtml) | |
- (NSString*)stripHtml; | |
@end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// NSString_stripHtml.m | |
// Copyright 2011 Leigh McCulloch. Released under the MIT license. | |
#import "NSString_stripHtml.h" | |
@interface NSString_stripHtml_XMLParsee : NSObject<NSXMLParserDelegate> { | |
@private | |
NSMutableArray* strings; | |
} | |
- (NSString*)getCharsFound; | |
@end | |
@implementation NSString_stripHtml_XMLParsee | |
- (id)init { | |
if((self = [super init])) { | |
strings = [[NSMutableArray alloc] init]; | |
} | |
return self; | |
} | |
- (void)dealloc { | |
[strings release]; | |
[super dealloc]; | |
} | |
- (void)parser:(NSXMLParser*)parser foundCharacters:(NSString*)string { | |
[strings addObject:string]; | |
} | |
- (NSString*)getCharsFound { | |
return [strings componentsJoinedByString:@""]; | |
} | |
@end | |
@implementation NSString (stripHtml) | |
- (NSString*)stripHtml { | |
// take this string obj and wrap it in a root element to ensure only a single root element exists | |
// and that any ampersands are escaped to preserve the escaped sequences | |
NSString* string = [self stringByReplacingOccurrencesOfString:@"&" withString:@"&"]; | |
string = [NSString stringWithFormat:@"<root>%@</root>", string]; | |
// add the string to the xml parser | |
NSStringEncoding encoding = string.fastestEncoding; | |
NSData* data = [string dataUsingEncoding:encoding]; | |
NSXMLParser* parser = [[NSXMLParser alloc] initWithData:data]; | |
// parse the content keeping track of any chars found outside tags (this will be the stripped content) | |
NSString_stripHtml_XMLParsee* parsee = [[NSString_stripHtml_XMLParsee alloc] init]; | |
parser.delegate = parsee; | |
[parser parse]; | |
// log any errors encountered while parsing | |
//NSError * error = nil; | |
//if((error = [parser parserError])) { | |
// NSLog(@"This is a warning only. There was an error parsing the string to strip HTML. This error may be because the string did not contain valid XML, however the result will likely have been decoded correctly anyway.: %@", error); | |
//} | |
// any chars found while parsing are the stripped content | |
NSString* strippedString = [parsee getCharsFound]; | |
// clean up | |
[parser release]; | |
[parsee release]; | |
// get the raw text out of the parsee after parsing, and return it | |
return strippedString; | |
} | |
@end |
Thanks for this! Just used it in a project.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@rohitdhiman Fixed the ampersand (
&
) problem. The category now also preserves any ampersand escaped sequences (e.g.&
,
,–
, etc) in the string rather than resolving them, which has the consequence that if your string contains&
that it will be preserved after callingstripHtml
.