htmlDocPtr parseHtmlDocument (const char * d, const char * b /* base url */) {
if (!b)
b = "";
htmlParserCtxtPtr parser_context = htmlNewParserCtxt();
htmlDocPtr document = htmlCtxtReadMemory(parser_context, d, strlen(d), b, NULL /* encoding */, HTML_PARSE_NOWARNING | HTML_PARSE_NOERROR | HTML_PARSE_RECOVER);
htmlFreeParserCtxt(parser_context);
return document;
}
xmlXPathObjectPtr findNodes (htmlDocPtr document, const char * xpath_query) {
xmlXPathContextPtr xpath_ctx = xmlXPathNewContext(document);
xmlXPathObjectPtr nodes = xmlXPathEvalExpression(BAD_CAST xpath_query, xpath_ctx);
if (xmlXPathNodeSetIsEmpty(nodes->nodesetval)) {
xmlXPathFreeContext(xpath_ctx);
xmlXPathFreeObject(nodes);
return NULL;
}
xmlXPathFreeContext(xpath_ctx);
return nodes;
}
xmlXPathObjectPtr findNodesN (xmlNodePtr node, const char * xpath_query) {
xmlXPathContextPtr xpath_ctx = xmlXPathNewContext(node->doc);
xmlXPathSetContextNode(node, xpath_ctx);
xmlXPathObjectPtr nodes = xmlXPathNodeEval(node, BAD_CAST xpath_query, xpath_ctx);
if (xmlXPathNodeSetIsEmpty(nodes->nodesetval)) {
xmlXPathFreeContext(xpath_ctx);
xmlXPathFreeObject(nodes);
return NULL;
}
xmlXPathFreeContext(xpath_ctx);
return nodes;
}
typedef void (*node_function_t) (xmlNodePtr node, void * data);
void eachNode (xmlXPathObjectPtr nodes, node_function_t f, void * data) { /* you can instead use EACHNODE macro */
xmlNodeSetPtr nodeset = nodes->nodesetval;
int i, size = nodeset->nodeNr;
for (i = 0; i < size; i++) {
xmlNodePtr cur;
cur = (xmlNodePtr) nodeset->nodeTab[i];
f(cur, data);
}
}
void eachNodeX (htmlDocPtr doc, const char * xpath, node_function_t f, void * data) {
xmlXPathObjectPtr nodes = findNodes(doc, xpath);
if (!nodes)
return;
eachNode(nodes, f, data);
xmlXPathFreeObject(nodes);
}
#define nthNodeFunctionGenerator(type, x) \
xmlNodePtr nthNodeX##x (type node, const char * xpath, int n) { \
xmlXPathObjectPtr nodes = findNodes##x(node, xpath); \
if (!nodes) \
return NULL; \
xmlNodeSetPtr nodeset = nodes->nodesetval; \
int size = nodeset->nodeNr; \
if (size <= n) { \
xmlXPathFreeObject(nodes); \
return NULL; \
} \
xmlNodePtr toreturn = (xmlNodePtr) nodeset->nodeTab[n]; \
xmlXPathFreeObject(nodes); \
return toreturn; \
}
nthNodeFunctionGenerator(htmlDocPtr,) // this one gets doc
nthNodeFunctionGenerator(xmlNodePtr, N)
#define EACHNODE(node, nodes) /* you can instead use eachNodeX with anonymous function - no need to free and findnodes separatl */ \
for (int EACHNODE_i = 0; \
nodes ? nodes->nodesetval ? \
((EACHNODE_i < nodes->nodesetval->nodeNr) && (node = (xmlNodePtr)nodes->nodesetval->nodeTab[EACHNODE_i])) \
: 0 : 0; \
EACHNODE_i++)
/* // to ne dela
#define EACHNODEX(node, target, xpath) \
xmlXPathObjectPtr EACHNODEX_nodes##__LINE__ = findNodes(target, xpath); \
for (size_t EACHNODEX_i = 0; \
EACHNODEX_nodes##__LINE__ ? EACHNODEX_nodes##__LINE__->nodesetval \
? ((EACHNODEX_i < EACHNODEX_nodes##__LINE__->nodesetval->nodeNr) \
&& (node = (xmlNodePtr) EACHNODEX_nodes##__LINE__->nodesetval->nodeTab[EACHNODEX_i])) \
: xmlXPathFreeObject(EACHNODEX_nodes##__LINE__) \
: 0 : 0; \
EACHNODEX_i++)
*/
void printNode (xmlNodePtr node, void * data) {
if (data){}
if (node->type == XML_ELEMENT_NODE) {
printf("-> content: '%s'\n", (char *) xmlNodeGetContent(node));
}
}
#define gnu_code_start \
_Pragma ("GCC diagnostic push") \
_Pragma ("GCC diagnostic ignored \"-Wpedantic\"") \
_Pragma ("GCC diagnostic ignored \"-Wformat=\"")
#define gnu_code_end \
_Pragma ("GCC diagnostic pop")
/* this is the definition of the anonymous function - source: https://en.wikipedia.org/wiki/Anonymous_function#GCC */
#define lambda(l_ret_type, l_arguments, l_body) \
({ \
l_ret_type l_anonymous_functions_name l_arguments \
l_body \
&l_anonymous_functions_name; \
})
char * htmlspecialchars (const char * i) { /* remember to free the output */
if (!i)
return NULL;
size_t s = 128;
char * o = malloc(s);
size_t w = 0;
for (; *i; i++) {
if (s - w <= 10)
o = realloc(o, (s *= 1.5));
switch (*i) {
case '<':
w += sprintf(o+w, "<");
break;
case '"':
w += sprintf(o+w, """);
break;
case '\'':
w += sprintf(o+w, "'");
break;
default:
o[w++] = *i;
break;
}
}
o[w++] = '\0';
return o;
}