1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
|
static htmlDocPtr parseHtmlDocument(const char * d, const char * b /* base url */) {
if (!b)
b = "";
htmlParserCtxtPtr parser_context = htmlNewParserCtxt();
htmlDocPtr document = htmlCtxtReadMemory(parser_context, d, strlen(d), b, NULL /* encoding */, HTML_PARSE_NOWARNING | HTML_PARSE_NOERROR | HTML_PARSE_RECOVER);
htmlFreeParserCtxt(parser_context);
return document;
}
static xmlXPathObjectPtr findNodes(htmlDocPtr document, const char * xpath_query) {
xmlXPathContextPtr xpath_ctx = xmlXPathNewContext(document);
xmlXPathObjectPtr nodes = xmlXPathEvalExpression(BAD_CAST xpath_query, xpath_ctx);
xmlXPathFreeContext(xpath_ctx);
return nodes;
}
typedef void (*node_function_t)(xmlNodePtr node, void * data);
static void eachNode(xmlXPathObjectPtr nodes, node_function_t f, void * data) {
xmlNodeSetPtr nodeset = nodes->nodesetval;
int i, size = nodeset->nodeNr;
for (i = 0; i < size; i++) {
xmlNodePtr cur;
cur = (xmlNodePtr)nodeset->nodeTab[i];
f(cur, data);
}
}
void printLinkNode(xmlNodePtr node, void * data) {
if (node->type == XML_ELEMENT_NODE) {
xmlAttrPtr href = xmlHasProp(node, BAD_CAST "href");
if (href) {
printf("-> Link to '%s'\n", xmlGetProp(node, BAD_CAST "href"));
}
}
}
|