summaryrefslogblamecommitdiffstats
path: root/src/lib.c
blob: 62ddf846a31ce67d4dc75338878a7b194d5d352f (plain) (tree)
1
2
3
4
5
6
7
8
9
                                                                              






                                                                                                                                                                        
                                                                             

                                                                                          
















                                                                                          


                                       

                                                                                                                  



                                                  
                                                       


                             






                                                                                     













                                                                 
 

                                                          


















                                                                                                                                           
                                             




                                                                               
                                                           
                                                        









                                                                                                                     

                            












                                                            


                                                            


                                            

                 

                      
 
htmlDocPtr parseHtmlDocument (const char * d, const char * b /* base url */) {
	if (!b)
		b = "";
	htmlParserCtxtPtr parser_context = htmlNewParserCtxt();
	htmlDocPtr document = htmlCtxtReadMemory(parser_context, d, strlen(d), b, NULL /* encoding */, HTML_PARSE_NOWARNING | HTML_PARSE_NOERROR  | HTML_PARSE_RECOVER);
	htmlFreeParserCtxt(parser_context);
	return document;
}
xmlXPathObjectPtr findNodes (htmlDocPtr document, const char * xpath_query) {
	xmlXPathContextPtr xpath_ctx = xmlXPathNewContext(document);
	xmlXPathObjectPtr nodes = xmlXPathEvalExpression(BAD_CAST xpath_query, xpath_ctx);
	if (xmlXPathNodeSetIsEmpty(nodes->nodesetval)) {
		xmlXPathFreeContext(xpath_ctx);
		xmlXPathFreeObject(nodes);
		return NULL;
	}
	xmlXPathFreeContext(xpath_ctx);
	return nodes;
}
xmlXPathObjectPtr findNodesN (xmlNodePtr node, const char * xpath_query) {
	xmlXPathContextPtr xpath_ctx = xmlXPathNewContext(node->doc);
	xmlXPathSetContextNode(node, xpath_ctx);
	xmlXPathObjectPtr nodes = xmlXPathNodeEval(node, BAD_CAST xpath_query, xpath_ctx);
	if (xmlXPathNodeSetIsEmpty(nodes->nodesetval)) {
		xmlXPathFreeContext(xpath_ctx);
		xmlXPathFreeObject(nodes);
		return NULL;
	}
	xmlXPathFreeContext(xpath_ctx);
	return nodes;
}
typedef void (*node_function_t) (xmlNodePtr node, void * data);
void eachNode (xmlXPathObjectPtr nodes, node_function_t f, void * data) { /* you can instead use EACHNODE macro */
	xmlNodeSetPtr nodeset = nodes->nodesetval;
	int i, size = nodeset->nodeNr;
	for (i = 0; i < size; i++) {
		xmlNodePtr cur;
		cur = (xmlNodePtr) nodeset->nodeTab[i];
		f(cur, data);
	}
}
void eachNodeX (htmlDocPtr doc, const char * xpath, node_function_t f, void * data) {
	xmlXPathObjectPtr nodes = findNodes(doc, xpath);
	if (!nodes)
		return;
	eachNode(nodes, f, data);
	xmlXPathFreeObject(nodes);
}
#define nthNodeFunctionGenerator(type, x) \
xmlNodePtr nthNodeX##x (type node, const char * xpath, int n) { \
	xmlXPathObjectPtr nodes = findNodes##x(node, xpath); \
	if (!nodes) \
		return NULL; \
	xmlNodeSetPtr nodeset = nodes->nodesetval; \
	int size = nodeset->nodeNr; \
	if (size <= n) { \
		xmlXPathFreeObject(nodes); \
		return NULL; \
	} \
	xmlNodePtr toreturn = (xmlNodePtr) nodeset->nodeTab[n]; \
	xmlXPathFreeObject(nodes); \
	return toreturn; \
}
nthNodeFunctionGenerator(htmlDocPtr,) // this one gets doc
nthNodeFunctionGenerator(xmlNodePtr, N)
#define EACHNODE(node, nodes) /* you can instead use eachNodeX with anonymous function - no need to free and findnodes separatl */ \
	for (int EACHNODE_i = 0; \
			nodes ? nodes->nodesetval ? \
				((EACHNODE_i < nodes->nodesetval->nodeNr) && (node = (xmlNodePtr)nodes->nodesetval->nodeTab[EACHNODE_i])) \
			: 0 : 0; \
			EACHNODE_i++)
/* // to ne dela
#define EACHNODEX(node, target, xpath) \
	xmlXPathObjectPtr EACHNODEX_nodes##__LINE__ = findNodes(target, xpath); \
		for (size_t EACHNODEX_i = 0; \
				EACHNODEX_nodes##__LINE__ ? EACHNODEX_nodes##__LINE__->nodesetval \
					? ((EACHNODEX_i < EACHNODEX_nodes##__LINE__->nodesetval->nodeNr) \
						&& (node = (xmlNodePtr) EACHNODEX_nodes##__LINE__->nodesetval->nodeTab[EACHNODEX_i])) \
					: xmlXPathFreeObject(EACHNODEX_nodes##__LINE__) \
				: 0 : 0; \
				EACHNODEX_i++)
*/
void printNode (xmlNodePtr node, void * data) {
	if (data){}
	if (node->type == XML_ELEMENT_NODE) {
		printf("-> content: '%s'\n", (char *) xmlNodeGetContent(node));
	}
}
#define gnu_code_start \
	_Pragma ("GCC diagnostic push") \
	_Pragma ("GCC diagnostic ignored \"-Wpedantic\"") \
	_Pragma ("GCC diagnostic ignored \"-Wformat=\"")
#define gnu_code_end \
	_Pragma ("GCC diagnostic pop") 
/* this is the definition of the anonymous function - source: https://en.wikipedia.org/wiki/Anonymous_function#GCC */
#define lambda(l_ret_type, l_arguments, l_body)        \
	({                                                   \
	 l_ret_type l_anonymous_functions_name l_arguments   \
	 l_body                                              \
	 &l_anonymous_functions_name;                        \
	 })
char * htmlspecialchars (const char * i) { /* remember to free the output */
	if (!i)
		return NULL;
	size_t s = 128;
	char * o = malloc(s);
	size_t w = 0;
	for (; *i; i++) {
		if (s - w <= 10)
			o = realloc(o, (s *= 1.5));
		switch (*i) {
			case '<':
				w += sprintf(o+w, "&lt;");
				break;
			case '"':
				w += sprintf(o+w, "&quot;");
				break;
			case '\'':
				w += sprintf(o+w, "&apos;");
				break;
			default:
				o[w++] = *i;
				break;
		}
	}
	o[w++] = '\0';
	return o;
}