Created
November 8, 2009 18:37
-
-
Save minase/229419 to your computer and use it in GitHub Desktop.
XPathEvaluator.m
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#import "Foundation/Foundation.h" | |
#include <libxml/HTMLparser.h> | |
#include <libxml/HTMLtree.h> | |
#include <libxml/xpath.h> | |
@interface XPathEvaluator : NSObject | |
+(NSArray*)arrayWithXPathQuery:(NSString*)query fromHTML:(NSString*)html; | |
@end | |
@implementation XPathEvaluator | |
+(NSArray*)arrayWithXPathQuery:(NSString*)query fromHTML:(NSString*)html | |
{ | |
id pool = [NSAutoreleasePool new]; | |
const char *chtml = [html cStringUsingEncoding:NSUTF8StringEncoding]; | |
const char *cxpath = [query cStringUsingEncoding:NSUTF8StringEncoding]; | |
NSMutableArray *results = [NSMutableArray new]; | |
// XPath | |
htmlDocPtr doc = htmlParseDoc((xmlChar*)chtml, "UTF-8"); | |
xmlXPathContextPtr xctx = xmlXPathNewContext(doc); | |
xmlXPathObjectPtr xobj = xmlXPathEval((xmlChar*)cxpath, xctx); | |
xmlNodeSetPtr nodes = xobj->nodesetval; | |
if(doc == NULL) NSLog(@"parse fail"); | |
if(xobj == NULL) NSLog(@"xpath fail"); | |
for(int n = 0; n < xmlXPathNodeSetGetLength(nodes); n++) | |
{ | |
xmlNodePtr node = nodes->nodeTab[n]; | |
xmlOutputBufferPtr outbuf = xmlAllocOutputBuffer(NULL); | |
if(outbuf == NULL) NSLog(@"outbuf fail"); | |
// NSLog(@"==== div.body:%d %s %d ====", n, node->name, node->type); | |
// inner html | |
xmlNodePtr child = node->children; | |
while(child) | |
{ | |
htmlNodeDumpFormatOutput(outbuf, doc, child, "UTF-8", 0); | |
child = child->next; | |
} | |
xmlOutputBufferFlush(outbuf); | |
// attributes | |
NSMutableDictionary *attributes = [NSMutableDictionary dictionary]; | |
xmlAttr *attr = node->properties; | |
while(attr) | |
{ | |
[attributes | |
setObject:[NSString stringWithCString:(char*)attr->children->content encoding:NSUTF8StringEncoding] | |
forKey: [NSString stringWithCString:(char*)attr->name encoding:NSUTF8StringEncoding] | |
]; | |
attr = attr->next; | |
} | |
// Dictionary | |
NSString *name = [NSString stringWithCString:(char*)node->name encoding:NSUTF8StringEncoding]; | |
NSString *content = [[NSString | |
stringWithCString:(char*)outbuf->buffer->content encoding:NSUTF8StringEncoding | |
] stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]]; | |
NSDictionary *nodeinfo = [NSDictionary dictionaryWithObjectsAndKeys: | |
name, @"name", | |
attributes, @"attributes", | |
content, @"content", | |
nil | |
]; | |
[results addObject:nodeinfo]; | |
xmlOutputBufferClose(outbuf); | |
} | |
xmlXPathFreeObject(xobj); | |
xmlXPathFreeContext(xctx); | |
xmlFreeDoc(doc); | |
[pool release]; | |
return [results autorelease]; | |
} | |
@end | |
int main(int argc, char **argv) | |
{ | |
id pool = [NSAutoreleasePool new]; | |
char *cxpath = (argc > 1) ? argv[1] : "//span[@class='msg']"; | |
char *chtml = (argc > 3) ? argv[2] : "<span class=\"msg\">oppai</span><span>futomomo</span>"; | |
NSString *query = [NSString stringWithCString:cxpath encoding:NSUTF8StringEncoding]; | |
NSString *html = [NSString stringWithCString:chtml encoding:NSUTF8StringEncoding]; | |
NSArray *nodes = [XPathEvaluator arrayWithXPathQuery:query fromHTML:html]; | |
NSLog(@"%@", nodes); | |
NSLog(@"%d", [nodes count]); | |
[pool release]; | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment