Skip to content

Instantly share code, notes, and snippets.

@cooldaemon
Created September 11, 2009 02:17
Show Gist options
  • Save cooldaemon/185012 to your computer and use it in GitHub Desktop.
Save cooldaemon/185012 to your computer and use it in GitHub Desktop.

KissXML+HTML

Added methods for parsing HTML to KissXML.

How to Use

#import <Foundation/Foundation.h>
#import "DDXML+HTML.h"

NSError *error = nil;

// html
NSXMLDocument *htmlDocument = [[DDXMLDocument alloc]
    initWithHTMLData:htmlData
             options:HTML_PARSE_NOWARNING | HTML_PARSE_NOERROR
               error:&error
];

// xml
NSXMLDocument *xmlDocument = [[DDXMLDocument alloc]
    initWithData:htmlData
         options:XML_PARSE_RECOVER 
           error:&error
];

// xpath
NSArray *array = [htmlDocument
    nodesForXPath:@"id(\"maincol\")/div[@class=\"content\"]/h2/following-sibling::node()[not(./preceding-sibling::node()/descendant-or-self::div[@class=\"posted\"])]"
            error:&error
];
#import "DDXML.h";
#import "DDXMLNode+HTML.h"
#import "DDXMLDocument+HTML.h"
#import <Foundation/Foundation.h>
#import <libxml/HTMLparser.h>
#import "DDXMLDocument.h"
@interface DDXMLDocument (HTML)
- (id)initWithHTMLString:(NSString *)string
options:(NSUInteger)options
error:(NSError **)error;
- (id)initWithHTMLData:(NSData *)data
options:(NSUInteger)options
error:(NSError **)error;
- (id)initWithData:(NSData *)data
options:(NSUInteger)options
error:(NSError **)error;
@end
#import "DDXMLDocument+HTML.h"
#import "DDXMLPrivate.h"
enum {
XMLDocument,
HTMLDocument
};
typedef NSUInteger DocumentContent;
@implementation DDXMLDocument (HTML)
- (void)setError:(NSError **)error code:(NSInteger)code
{
if (!error) {
return;
}
*error = [NSError
errorWithDomain:@"DDXMLErrorDomain"
code:code
userInfo:nil
];
}
- (id)initWithData:(NSData *)data
content:(DocumentContent)content
options:(NSUInteger)options
error:(NSError **)error
{
if (data == nil || [data length] == 0) {
[self setError:error code:0];
[self release];
return nil;
}
xmlKeepBlanksDefault(0);
xmlDocPtr doc;
if (HTMLDocument == content) {
doc = htmlReadMemory(
[data bytes], [data length],
"", NULL, options
);
} else {
doc = xmlReadMemory(
[data bytes], [data length],
"", NULL, options
);
}
if (doc == NULL) {
[self setError:error code:1];
[self release];
return nil;
}
return [self initWithCheckedPrimitive:(xmlKindPtr)doc];
}
- (id)initWithHTMLString:(NSString *)string
options:(NSUInteger)options
error:(NSError **)error
{
return [self
initWithHTMLData:[string dataUsingEncoding:NSUTF8StringEncoding]
options:options
error:error
];
}
- (id)initWithHTMLData:(NSData *)data
options:(NSUInteger)options
error:(NSError **)error
{
return [self
initWithData:data
content:HTMLDocument
options:options
error:error
];
}
- (id)initWithData:(NSData *)data
options:(NSUInteger)options
error:(NSError **)error
{
return [self
initWithData:data
content:XMLDocument
options:options
error:error
];
}
@end
#import <Foundation/Foundation.h>
#import "DDXMLNode.h"
@interface DDXMLNode (HTML)
+ (BOOL)isXmlDocPtr:(xmlKindPtr)kindPtr;
@end
#import "DDXMLNode+HTML.h"
@implementation DDXMLNode (HTML)
+ (BOOL)isXmlDocPtr:(xmlKindPtr)kindPtr
{
return kindPtr->type == XML_DOCUMENT_NODE
|| kindPtr->type == XML_HTML_DOCUMENT_NODE;
}
@end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment