diff options
-rw-r--r-- | Makefile | 6 | ||||
-rw-r--r-- | README.md | 14 | ||||
-rw-r--r-- | debian/changelog | 9 | ||||
-rw-r--r-- | debian/control | 2 | ||||
-rw-r--r-- | src/api.c | 6 | ||||
-rw-r--r-- | src/hp.html | 11 | ||||
-rw-r--r-- | src/httpd.c | 38 | ||||
-rw-r--r-- | src/log.c | 23 | ||||
-rw-r--r-- | src/main.c | 2 | ||||
-rw-r--r-- | src/structs.c | 27 | ||||
-rw-r--r-- | test/bug/.gitignore | 3 | ||||
-rw-r--r-- | test/bug/Makefile | 6 | ||||
-rw-r--r-- | test/bug/example-valgrind.txt | 250 | ||||
-rw-r--r-- | test/bug/prog.c | 29 |
14 files changed, 381 insertions, 45 deletions
@@ -1,8 +1,12 @@ DESTDIR=/ +.NOTPARALLEL: default: mkdir tmp -p - xxd -i < src/hp.html > tmp/hp.xxd + cp src/hp.html tmp/hp.html + -[ ! -f tmp/hp.css ] && wget -c https://sijanec.eu/assets/css/styles.css?ref=sear.c-make -Otmp/hp.css + php -r "file_put_contents('tmp/hp.html', str_replace('INSERT-STYLESHEET-HERE', str_replace('%', '%%', file_get_contents('tmp/hp.css')), file_get_contents('tmp/hp.html')));" + xxd -i < tmp/hp.html > tmp/hp.xxd echo ', 0' >> tmp/hp.xxd xxd -i < src/osdd.xml > tmp/osdd.xxd echo ', 0' >> tmp/osdd.xxd @@ -16,12 +16,12 @@ service sear.c start * a POSIX system * GNU C library -* GNU compiler collection (it's written in GNU C - it uses ~~anonymous~~ nested functions) - - anonymous functions were a pain to debug +* GNU compiler collection (it's written in GNU C - it uses nested functions) * GNU Make * libxml2-dev (for the simple HTML/1.0 client and HTML parser) * libmicrohttpd-dev (for serving results - use a reverse proxy, such as nginx, for HTTPS) * xxd (for converting HTML pages into C arrays when compiling from source) +* php-cli for a single line of Makefile (and I talk about bloat) ## compiling from source @@ -37,8 +37,10 @@ make * navigate to [http://localhost:7327](http://localhost:7327) and do a couple of searches to see if everything works * the horseshoe button redirects directly to the first result without wasting time on the results page. use if you feel lucky. (BP) * the painting button performs a search for images. PRIVACY WARNING: images are loaded directly from servers (not from google) -* check logs by navigating to /logs.html +* ~~check logs by navigating to /logs.html~~ logging to memory was disabled for consuming less memory, you can define SC\_LOGMEM while compiling to enable deprecated memory logging support * program also writes all logs to standard error +* setting the h parameter will rewrite links to HTTP from HTTPS +* setting the l parameter with a number will limit number of displayed links to that number. ## prebuilt binaries @@ -59,3 +61,9 @@ before downloading, check that the build passed, indicated below on the badge: ![screenshot in chromium 3](https://cdn.sijanec.eu/img/2021/04/sear.c_prtsc3.png) ![screenshot in chromium 4](https://cdn.sijanec.eu/img/2021/04/sear.c_prtsc4.png) ![screenshot in chromium 5](https://cdn.sijanec.eu/img/2021/04/sear.c_prtsc5.png) + +# additional information + +* valgrind reports a memory leak, leak is bigger with every API search query. run `make valgrind` and you'll see it. I was unable to find the bug, but it just bothers me. I wrote a small bug PoC (test/bug) but I could not replicate the bug (`cd tmp/bug; make; make valgrind; less valgrind-out.txt` - process exits with no leaks possible). Example output from sear.c valgrind with one request done is included in test/bug/example-valgrind.txt. Such small memory leak is not a problem, since we store all extracted data from the query indefinetley anyways, but it's still pretty dumb to leak memory. +* memory allocations are not checked for failures. This needs to be done to use fanalyzer +* __attribute__s such as nonnull are not set in struct members of query types and in functions such as htmlspecialchars but `if (!arg) return NULL` is done instead, which is poor coding style and fanalyzing can't be done in this case. This needs to be fixed to use fanalyzer. diff --git a/debian/changelog b/debian/changelog index bec307a..7845d90 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,12 @@ +sear.c (0.0.12-1) stable; urgency=low + + * added l and h parameters + * removed logging to memory + * embedding CSS in response + * added dependency: php-cli + + -- Anton Luka Šijanec <anton@sijanec.eu> Sun, 22 Aug 2021 04:00:00 +0200 + sear.c (0.0.11-1) stable; urgency=low * added image search, fixed some memory leaks with valgrind diff --git a/debian/control b/debian/control index f315f85..3da2cc8 100644 --- a/debian/control +++ b/debian/control @@ -2,7 +2,7 @@ Source: sear.c Section: web Priority: optional Maintainer: Anton Luka Šijanec <anton@sijanec.eu> -Build-Depends: debhelper (>=11~), libmicrohttpd-dev, libxml2-dev, dh-systemd (>=1.5) +Build-Depends: debhelper (>=11~), libmicrohttpd-dev, libxml2-dev, php-cli, dh-systemd (>=1.5) Standards-Version: 4.1.4 Homepage: http://git.sijanec.eu/sijanec/sear.c @@ -142,11 +142,7 @@ struct sc_query * sc_query_google (const char * s, struct sc_cache * c, struct s * result dates are sometimes relative ("an hour ago") and heavily depend on the client location, based on IP. - we won't parse those yet * I couldn't find anything with ratings, so we won't parse thouse either yet - * captcha: google knows that this nokia phone we're pretending to be doesn't support javascript - - the request limiting captcha must work on a phone without javascript. it is probably loaded inside an iframe, but has - origin protection, so we can't just solve it client-side. we would have to proxy images and create some sort of a session - based http request-response based user interface so we can ask the user to complete the captcha. this is not yet - implemeted and will be hard work. + * captcha: google knows that this nokia phone we're pretending to be doesn't support javascript, but does not care, and loads an obfuscated captcha anyways that would be hard to defeat for now without some kind of chromium emulation we really don't want. */ int rs = 1; char * xpath = NULL; diff --git a/src/hp.html b/src/hp.html index 7d1307b..e1ab301 100644 --- a/src/hp.html +++ b/src/hp.html @@ -1,14 +1,16 @@ <!DOCTYPE html> <html lang=sl> <!-- this file is a printf format. be sure to escape percent signs with percent percent. --> - <!-- this format requires the following types (in order): query string, query string, result info string, results html string --> + <!-- this format requires the following types (in order): query string, query string, additional form elements, result info string, results html string --> <head> <meta charset=UTF-8 /> <title> %s :: sear.c </title> <meta name=viewport content="width=device-width, initial-scale=1.0"> - <link rel=stylesheet href=//sijanec.eu/assets/css/styles.css?ref=sear.c /> <!-- TODO: direktno vstavljanje v dokument --> + <style> + INSERT-STYLESHEET-HERE + </style> <link rel="shortcut icon" href=data:image/x-icon;, type=image/x-icon> <!-- prevents favicon lookups --> <link rel=icon href=data:;base64,iVBORw0KGgo=> <link rel=search type=application/opensearchdescription+xml href=/osdd.xml> @@ -67,12 +69,13 @@ </style> </head> <body> - <form class=container> + <form class=container action=.> <input accesskey=4 type=text name=q value="%s" placeholder="sear.c ..." /> <!-- see www.standardaccesskeys.com --> <input type=submit value=🔍 /> <!-- magnifying glass emoji --> <input type=submit name=f value=Ʊ /> <!-- horseshoe unicode character --> <input type=submit name=i value=🖼 /> <!-- framed picture emoji - img search --> - <input type=submit name=v value=🎬 hidden=hidden /> <!-- that thing they use in movies - vid search N/I --> + <!-- <input type=submit name=v value=🎬/> --> <!-- that thing they use in movies - vid search N/I --> + %s </form> <h3> %s diff --git a/src/httpd.c b/src/httpd.c index 6e2c4bf..61c775d 100644 --- a/src/httpd.c +++ b/src/httpd.c @@ -1,19 +1,24 @@ -char * sc_queryhtml (struct sc_query * q) { /* remember to free returned string in the caller */ /* caller takes care of locking */ +char * sc_https2http (char * i) { + if (i && strlen(i) >= 4 && i[4] == 's') + memmove(i+4, i+5, strlen(i)-3); + return i; +} +char * sc_queryhtml (struct sc_query * q, const char * h, size_t l) { /* remember to free returned string in the caller */ /* caller takes care of freeing */ size_t resultshtml_written = 0; size_t resultshtml_sizeof = SC_ALLOC_CHUNK; char * resultshtml = malloc(resultshtml_sizeof); resultshtml[0] = '\0'; - for (size_t i = 0; i < q->results_length; i++) { + for (size_t i = 0; i < q->results_length && (!l || i < l); i++) { #define SC_HRC(string, wanted) \ if (string##_written+wanted >= string##_sizeof) { \ string##_sizeof = (string##_written+wanted+1)*SC_REALLOC_K; \ string = realloc(string, string##_sizeof); \ } -#define SC_HRF "<div class=result id=result%lu><h4><a href=\"%s\" accesskey=%lu>%s</a> " \ +#define SC_HRF "<div class=result id=result%zu><h4><a href=\"%s\" accesskey=%zu>%s</a> " \ "<span class=breadcrumb>%s</span></h4><p>%s</p></div>" -#define SC_HIF "<a class=result id=result%lu href=\"%s\" accesskey=%lu><img data-title=\"%s\"" \ +#define SC_HIF "<a class=result id=result%zu href=\"%s\" accesskey=%zu><img data-title=\"%s\"" \ "data-breadcrumb=\"%s\" src=\"%s\" /></a>" -#define SC_HRA i, safeurl ? safeurl : SC_I18N_NO_HREFLINK, i, safetitle ? safetitle : SC_I18N_NO_TITLE, \ +#define SC_HRA i, safeurl ? h ? sc_https2http(safeurl) : safeurl : SC_I18N_NO_HREFLINK, i, safetitle ? safetitle : SC_I18N_NO_TITLE, \ safebreadcrumbs ? safebreadcrumbs : safeurl ? safeurl : SC_I18N_NO_HREFLINK, safebody ? safebody : SC_I18N_NO_DESCRIPTION char * safetitle = htmlspecialchars(q->results[i]->title); /* htmlspecialchars returns NULL if input is null */ char * safebody = htmlspecialchars(q->results[i]->desc); @@ -34,7 +39,7 @@ char * sc_queryhtml (struct sc_query * q) { /* remember to free returned string free(safebody); free(safeurl); } -#define SC_HRS SC_I18N_NUMBER_OF_RESULTS ": %ld | " SC_I18N_QUERY_TIME ": %s" +#define SC_HRS SC_I18N_NUMBER_OF_RESULTS ": %zu | " SC_I18N_QUERY_TIME ": %s" char formatted_time[128]; struct tm tm; localtime_r(&q->lookup_time, &tm); @@ -42,12 +47,13 @@ char * sc_queryhtml (struct sc_query * q) { /* remember to free returned string char queryinfo[256]; snprintf(queryinfo, 256, SC_HRS, q->results_length, formatted_time); char * safequery = htmlspecialchars(q->string); - char * response = malloc(strlen((char *) sc_hp)+2*strlen(safequery)+strlen(queryinfo)+strlen(resultshtml)); - sprintf(response, (char *) sc_hp, safequery, safequery, queryinfo, resultshtml); + char * response = malloc(strlen((char *) sc_hp)+2*strlen(safequery)+strlen(queryinfo)+strlen(resultshtml)+strlen("<input type=hidden name=h value=h />")); + sprintf(response, (char *) sc_hp, safequery, safequery, h ? "<input type=hidden name=h value=h />" : "", queryinfo, resultshtml); free(safequery); free(resultshtml); return response; } +#ifdef SC_LOGMEM char * sc_logshtml (struct sc_cache * c) { /* remember to free on caller, remember not to report errors here whilst locked */ char * html = malloc(SC_ALLOC_CHUNK); html[0] = '\0'; @@ -59,8 +65,8 @@ char * sc_logshtml (struct sc_cache * c) { /* remember to free on caller, rememb return NULL; } for (size_t i = 0; i < c->logentries_length; i++) { -#define SC_HLF "<div class=result id=log%lu>[<span class=%s>%s</span>] %s " \ - "<a href=\"" SC_I18N_GIT_URL "/src/branch/master/%s#L%lu\">%s()@%s:%lu</a>: %s</div>" +#define SC_HLF "<div class=result id=log%zu>[<span class=%s>%s</span>] %s " \ + "<a href=\"" SC_I18N_GIT_URL "/src/branch/master/%s#L%zu\">%s()@%s:%zu</a>: %s</div>" #define SC_HLA i, \ sc_log_str(c->logentries[i]->type), \ sc_log_str(c->logentries[i]->type), \ @@ -84,7 +90,8 @@ char * sc_logshtml (struct sc_cache * c) { /* remember to free on caller, rememb pthread_rwlock_unlock(c->logentries_lock); return html; } -int sc_httpd (void * cls, +#endif +enum MHD_Result sc_httpd (void * cls, struct MHD_Connection * connection, const char * url, const char * method, @@ -138,6 +145,7 @@ int sc_httpd (void * cls, sprintf(response, sc_osdd, host); content_type = "application/opensearchdescription+xml"; break; +#ifdef SC_LOGMEM case 'l': /* logs.html */ { char * logshtml = sc_logshtml(c); @@ -146,10 +154,11 @@ int sc_httpd (void * cls, free(logshtml); } break; +#endif } if (!response) { - response = malloc(strlen((char *) sc_hp)+strlen(SC_I18N_HP_HEADING)+strlen(SC_I18N_HP_BODY)); - sprintf(response, (char *) sc_hp, "", "", SC_I18N_HP_HEADING, SC_I18N_HP_BODY); + response = malloc(strlen((char *) sc_hp)+strlen(SC_I18N_HP_HEADING)+strlen(SC_I18N_HP_BODY)+strlen("<input type=hidden name=h value=h />")); + sprintf(response, (char *) sc_hp, "", "", MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "h") ? "<input type=hidden name=h value=h />" : "", SC_I18N_HP_HEADING, SC_I18N_HP_BODY); } } else { int already_retried = 0; @@ -159,7 +168,8 @@ retry: if (!strcmp(c->queries[i]->string, query) && c->queries[i]->opt == opt) q = c->queries[i]; if (q) { - response = sc_queryhtml(q); /* MHD_create_response_from_buffer will free response (; */ + const char * l = MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "l"); + response = sc_queryhtml(q, MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "h" /* insecure http */), atoi(l ? l : "0")); /* MHD_create_response_from_buffer will free response (; */ if (MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "f") && q->results_length > 0) { status_code = 307; location = q->results[0]->url ? q->results[0]->url : SC_I18N_NO_HREFLINK; @@ -13,6 +13,7 @@ const char * sc_log_str (int t) { } /* interestingly, gcc figures out there's no way for code to reach this section, therefore there's no warning "-Wreturn-type" */ } +#ifdef SC_LOGMEM int sc_logentry_free (struct sc_logentry * l) { free(l->message); l->message = NULL; free(l); @@ -22,8 +23,11 @@ struct sc_logentry * sc_logentry_init () { struct sc_logentry * l = calloc(1, sizeof(struct sc_logentry)); return l; } +#endif int sc_push_log (unsigned char t, struct sc_cache * c, const char * ca, char * f, size_t l, unsigned short int isf, char * m, ...) { #define SC_PLL c->logentries[c->logentries_length-1] + char * compiled_message = NULL; +#ifdef SC_LOGMEM if (!c) return -1; pthread_rwlock_t * lock = c->logentries_lock; @@ -34,6 +38,7 @@ int sc_push_log (unsigned char t, struct sc_cache * c, const char * ca, char * f if (c->logentries_sizeof <= c->logentries_length) SC_BIGGER_ARRAY(c->logentries, sc_logentry, 1); c->logentries_length++; +#endif size_t strlenm = strlen(m); size_t va_count = parse_printf_format(m, 0, NULL); if (isf && va_count > 0) { @@ -41,21 +46,29 @@ int sc_push_log (unsigned char t, struct sc_cache * c, const char * ca, char * f va_start(ap, m); va_copy(ap2, ap); strlenm = vsnprintf(NULL, 0, m, ap); - SC_PLL->message = malloc(sizeof(char)*strlenm+1); - vsnprintf(SC_PLL->message, strlenm+1, m, ap2); + compiled_message = malloc(sizeof(char)*strlenm+1); + vsnprintf(compiled_message, strlenm+1, m, ap2); va_end(ap); va_end(ap2); } else { - SC_PLL->message = malloc(sizeof(char)*strlenm+1); - strcpy(SC_PLL->message, m); + compiled_message = malloc(sizeof(char)*strlenm+1); + strcpy(compiled_message, m); } +#ifdef SC_LOGMEM SC_PLL->file = f; SC_PLL->line = l; SC_PLL->function = ca; SC_PLL->time = time(NULL); SC_PLL->type = t; - fprintf(stderr, "[sear.c] %s %s()@%s:%lu: %s\n", sc_log_str(t), ca, f, l, SC_PLL->message); /* in posix, this is thread safe */ + SC_PLL->message = compiled_message; +#endif + fprintf(stderr, "[sear.c] %s %s()@%s:%zu: %s\n", sc_log_str(t), ca, f, l, compiled_message); /* in posix, this is thread safe */ +#ifdef SC_LOGMEM if (lock && pthread_rwlock_unlock(lock)) return -4; +#endif +#ifndef SC_LOGMEM + free(compiled_message); +#endif return 1; } @@ -72,7 +72,7 @@ int main (int argc, char ** argv) { fflush(stderr); rc: xmlCleanupParser(); + MHD_stop_daemon(d); /* stop the daemon first and the free, threads might still be running */ sc_cache_free(c); - MHD_stop_daemon(d); return rs; } diff --git a/src/structs.c b/src/structs.c index dce460e..83d19b9 100644 --- a/src/structs.c +++ b/src/structs.c @@ -19,6 +19,8 @@ } while (0); #define SC_OPT_TYPE unsigned char #define SC_OPT_IMAGE (1 << 0) +#define SC_STR(x) #x +#ifdef SC_LOGMEM struct sc_logentry { unsigned char type; /* SC_LOG_ERROR, SC_LOG_WARNING, SC_LOG_INFO, SC_LOG_DEBUG */ size_t line; @@ -29,7 +31,7 @@ struct sc_logentry { }; int sc_logentry_free (struct sc_logentry * l); /* defined in log.c */ struct sc_logentry * sc_logentry_init (); /* defined in log.c */ - +#endif struct sc_result { struct sc_query * query; /* nofree - free from sc_cache */ char * url; /* yesfree - url of referer page when image searching */ @@ -90,38 +92,41 @@ int sc_query_free (struct sc_query * q) { struct sc_cache { SC_IN_STRUCT_ARRAY(struct sc_query, queries); /* yesfree */ pthread_rwlock_t * queries_lock; +#ifdef SC_LOGMEM SC_IN_STRUCT_ARRAY(struct sc_logentry, logentries); /* yesfree */ pthread_rwlock_t * logentries_lock; +#endif }; struct sc_cache * sc_cache_init() { +#define SC_CILI(name) do { name##_lock = malloc(sizeof(pthread_rwlock_t)); pthread_rwlock_init(name##_lock, NULL); } while (0) struct sc_cache * c = calloc(1, sizeof(struct sc_cache)); c->queries_sizeof = SC_ALLOC_CHUNK; - c->logentries_sizeof = SC_ALLOC_CHUNK; c->queries = calloc(c->queries_sizeof, sizeof(struct sc_query *)); +#ifdef SC_LOGMEM + c->logentries_sizeof = SC_ALLOC_CHUNK; c->logentries = calloc(c->logentries_sizeof, sizeof(struct sc_logentry *)); - for (size_t i = 0; i < c->logentries_sizeof; i++) { - /* c->queries[i] = sc_query_init(); */ /* queries are not inited for performance reasons, they are inited by query function */ - /* c->queries[i]->cache = c; */ + for (size_t i = 0; i < c->logentries_sizeof; i++) c->logentries[i] = sc_logentry_init(); - } -#define SC_CILI(name) do { name##_lock = malloc(sizeof(pthread_rwlock_t)); pthread_rwlock_init(name##_lock, NULL); } while (0) - SC_CILI(c->queries); SC_CILI(c->logentries); +#endif + SC_CILI(c->queries); return c; } int sc_cache_free(struct sc_cache * c) { + #define SC_CFLD(name) do { pthread_rwlock_destroy(name##_lock); free(name##_lock); } while(0) if (!c) return -1; - fprintf(stderr, "c->queries_sizeof = %lu\n", c->queries_sizeof); + fprintf(stderr, "c->queries_sizeof = %zu\n", c->queries_sizeof); for (size_t i = 0; i < c->queries_sizeof; i++) sc_query_free(c->queries[i]); free(c->queries); +#ifdef SC_LOGMEM for (size_t i = 0; i < c->logentries_sizeof; i++) sc_logentry_free(c->logentries[i]); + SC_CFLD(c->logentries); free(c->logentries); - #define SC_CFLD(name) do { pthread_rwlock_destroy(name##_lock); free(name##_lock); } while(0) +#endif SC_CFLD(c->queries); - SC_CFLD(c->logentries); free(c); return 1; } diff --git a/test/bug/.gitignore b/test/bug/.gitignore new file mode 100644 index 0000000..973b081 --- /dev/null +++ b/test/bug/.gitignore @@ -0,0 +1,3 @@ +a.out +s.html +valgrind-out.txt diff --git a/test/bug/Makefile b/test/bug/Makefile new file mode 100644 index 0000000..88dfd75 --- /dev/null +++ b/test/bug/Makefile @@ -0,0 +1,6 @@ +default: + curl -H "User-Agent: Nokia WAP Gateway 4.1 CD1/ECD13_D/4.1.04)" "http://wap.google.com/search?q=libxml2&num=100&ie=UTF-8" > s.html + gcc -Wall -pedantic -g prog.c $$(xml2-config --cflags --libs) + +valgrind: + valgrind --leak-check=full --show-leak-kinds=all --track-origins=yes --verbose --log-file=valgrind-out.txt ./a.out s.html diff --git a/test/bug/example-valgrind.txt b/test/bug/example-valgrind.txt new file mode 100644 index 0000000..13a8bea --- /dev/null +++ b/test/bug/example-valgrind.txt @@ -0,0 +1,250 @@ +==31983== Memcheck, a memory error detector +==31983== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al. +==31983== Using Valgrind-3.16.1-36d6727e1d-20200622X and LibVEX; rerun with -h for copyright info +==31983== Command: ./sear.c +==31983== Parent PID: 31982 +==31983== +--31983-- +--31983-- Valgrind options: +--31983-- --leak-check=full +--31983-- --show-leak-kinds=all +--31983-- --track-origins=yes +--31983-- --verbose +--31983-- --log-file=valgrind-out.txt +--31983-- Contents of /proc/version: +--31983-- Linux version 5.10.0-8-686-pae (debian-kernel@lists.debian.org) (gcc-10 (Debian 10.2.1-6) 10.2.1 20210110, GNU ld (GNU Binutils for Debian) 2.35.2) #1 SMP Debian 5.10.46-4 (2021-08-03) +--31983-- +--31983-- Arch and hwcaps: X86, LittleEndian, x86-mmxext-sse1-sse2-sse3 +--31983-- Page sizes: currently 4096, max supported 4096 +--31983-- Valgrind library directory: /usr/lib/i386-linux-gnu/valgrind +--31983-- Reading syms from /home/a/projects/sear.c/sear.c +--31983-- Reading syms from /usr/lib/i386-linux-gnu/ld-2.31.so +--31983-- Considering /usr/lib/debug/.build-id/dc/670cc30bc6fa0d1eda127a7b90991fe834df22.debug .. +--31983-- .. build-id is valid +--31983-- Reading syms from /usr/lib/i386-linux-gnu/valgrind/memcheck-x86-linux +--31983-- Considering /usr/lib/debug/.build-id/31/675c23a8bb0b9cc8165905b7c42c350891faba.debug .. +--31983-- .. build-id is valid +--31983-- object doesn't have a dynamic symbol table +--31983-- Scheduler: using generic scheduler lock implementation. +--31983-- Reading suppressions file: /usr/lib/i386-linux-gnu/valgrind/default.supp +==31983== embedded gdbserver: reading from /tmp/vgdb-pipe-from-vgdb-to-31983-by-a-on-??? +==31983== embedded gdbserver: writing to /tmp/vgdb-pipe-to-vgdb-from-31983-by-a-on-??? +==31983== embedded gdbserver: shared mem /tmp/vgdb-pipe-shared-mem-vgdb-31983-by-a-on-??? +==31983== +==31983== TO CONTROL THIS PROCESS USING vgdb (which you probably +==31983== don't want to do, unless you know exactly what you're doing, +==31983== or are doing some strange experiment): +==31983== /usr/bin/vgdb --pid=31983 ...command... +==31983== +==31983== TO DEBUG THIS PROCESS USING GDB: start GDB like this +==31983== /path/to/gdb ./sear.c +==31983== and then give GDB the following command +==31983== target remote | /usr/bin/vgdb --pid=31983 +==31983== --pid is optional if only one valgrind process is running +==31983== +--31983-- REDIR: 0x401d950 (ld-linux.so.2:strlen) redirected to 0x580c489d (vgPlain_x86_linux_REDIR_FOR_strlen) +--31983-- REDIR: 0x401d6b0 (ld-linux.so.2:index) redirected to 0x580c4878 (vgPlain_x86_linux_REDIR_FOR_index) +--31983-- Reading syms from /usr/lib/i386-linux-gnu/valgrind/vgpreload_core-x86-linux.so +--31983-- Considering /usr/lib/debug/.build-id/e8/1255a6ad8a76ac3f8417dea7a2845c6c0c4fa1.debug .. +--31983-- .. build-id is valid +--31983-- Reading syms from /usr/lib/i386-linux-gnu/valgrind/vgpreload_memcheck-x86-linux.so +--31983-- Considering /usr/lib/debug/.build-id/a8/40b96e742e02d898730a45f3e1e7a93396ec83.debug .. +--31983-- .. build-id is valid +==31983== WARNING: new redirection conflicts with existing -- ignoring it +--31983-- old: 0x0401d950 (strlen ) R-> (0000.0) 0x580c489d vgPlain_x86_linux_REDIR_FOR_strlen +--31983-- new: 0x0401d950 (strlen ) R-> (2007.0) 0x04839c30 strlen +--31983-- Reading syms from /usr/lib/i386-linux-gnu/libxml2.so.2.9.10 +--31983-- object doesn't have a symbol table +--31983-- Reading syms from /usr/lib/i386-linux-gnu/libmicrohttpd.so.12.57.0 +--31983-- object doesn't have a symbol table +--31983-- Reading syms from /usr/lib/i386-linux-gnu/libm-2.31.so +--31983-- Considering /usr/lib/debug/.build-id/96/3a273546c86b705fac6a35dc00ec08425a6aab.debug .. +--31983-- .. build-id is valid +--31983-- Reading syms from /usr/lib/i386-linux-gnu/libpthread-2.31.so +--31983-- Considering /usr/lib/debug/.build-id/b9/08ee509a4f0d7a9b8a683fafdd56fc2b8c1452.debug .. +--31983-- .. build-id is valid +--31983-- Reading syms from /usr/lib/i386-linux-gnu/libc-2.31.so +--31983-- Considering /usr/lib/debug/.build-id/5e/c744c5f00b41d8449d105c55ebd96c3efa3a0f.debug .. +--31983-- .. build-id is valid +--31983-- Reading syms from /usr/lib/i386-linux-gnu/libdl-2.31.so +--31983-- Considering /usr/lib/debug/.build-id/59/fbf8455e886a9ad5a97e562674a0506797c72d.debug .. +--31983-- .. build-id is valid +--31983-- Reading syms from /usr/lib/i386-linux-gnu/libicuuc.so.67.1 +--31983-- object doesn't have a symbol table +--31983-- Reading syms from /usr/lib/i386-linux-gnu/libz.so.1.2.11 +--31983-- object doesn't have a symbol table +--31983-- Reading syms from /usr/lib/i386-linux-gnu/liblzma.so.5.2.5 +--31983-- object doesn't have a symbol table +--31983-- Reading syms from /usr/lib/i386-linux-gnu/libgnutls.so.30.29.1 +--31983-- object doesn't have a symbol table +--31983-- Reading syms from /usr/lib/i386-linux-gnu/libicudata.so.67.1 +--31983-- object doesn't have a symbol table +--31983-- Reading syms from /usr/lib/i386-linux-gnu/libstdc++.so.6.0.28 +--31983-- object doesn't have a symbol table +--31983-- Reading syms from /usr/lib/i386-linux-gnu/libgcc_s.so.1 +--31983-- object doesn't have a symbol table +--31983-- Reading syms from /usr/lib/i386-linux-gnu/libp11-kit.so.0.3.0 +--31983-- object doesn't have a symbol table +--31983-- Reading syms from /usr/lib/i386-linux-gnu/libidn2.so.0.3.7 +--31983-- object doesn't have a symbol table +--31983-- Reading syms from /usr/lib/i386-linux-gnu/libunistring.so.2.1.0 +--31983-- object doesn't have a symbol table +--31983-- Reading syms from /usr/lib/i386-linux-gnu/libtasn1.so.6.6.0 +--31983-- object doesn't have a symbol table +--31983-- Reading syms from /usr/lib/i386-linux-gnu/libnettle.so.8.4 +--31983-- object doesn't have a symbol table +--31983-- Reading syms from /usr/lib/i386-linux-gnu/libhogweed.so.6.4 +--31983-- object doesn't have a symbol table +--31983-- Reading syms from /usr/lib/i386-linux-gnu/libgmp.so.10.4.1 +--31983-- object doesn't have a symbol table +--31983-- Reading syms from /usr/lib/i386-linux-gnu/libffi.so.7.1.0 +--31983-- object doesn't have a symbol table +--31983-- REDIR: 0x4c0e260 (libc.so.6:strncasecmp) redirected to 0x482f1f0 (_vgnU_ifunc_wrapper) +--31983-- REDIR: 0x4c14180 (libc.so.6:memrchr) redirected to 0x482f1f0 (_vgnU_ifunc_wrapper) +--31983-- REDIR: 0x4c27d60 (libc.so.6:wcslen) redirected to 0x482f1f0 (_vgnU_ifunc_wrapper) +--31983-- REDIR: 0x4c0cd50 (libc.so.6:strcmp) redirected to 0x482f1f0 (_vgnU_ifunc_wrapper) +--31983-- REDIR: 0x4c0dfd0 (libc.so.6:memmove) redirected to 0x482f1f0 (_vgnU_ifunc_wrapper) +--31983-- REDIR: 0x4c0e3a0 (libc.so.6:memcpy) redirected to 0x482f1f0 (_vgnU_ifunc_wrapper) +--31983-- REDIR: 0x4c0df90 (libc.so.6:bcmp) redirected to 0x482f1f0 (_vgnU_ifunc_wrapper) +--31983-- REDIR: 0x4c0e1a0 (libc.so.6:stpcpy) redirected to 0x482f1f0 (_vgnU_ifunc_wrapper) +--31983-- REDIR: 0x4c0ccd0 (libc.so.6:strcat) redirected to 0x482f1f0 (_vgnU_ifunc_wrapper) +--31983-- REDIR: 0x4c0cdc0 (libc.so.6:strcpy) redirected to 0x482f1f0 (_vgnU_ifunc_wrapper) +--31983-- REDIR: 0x4c95bd0 (libc.so.6:__memcpy_chk) redirected to 0x482f1f0 (_vgnU_ifunc_wrapper) +--31983-- REDIR: 0x4c0cd10 (libc.so.6:index) redirected to 0x482f1f0 (_vgnU_ifunc_wrapper) +--31983-- REDIR: 0x4c0d290 (libc.so.6:strlen) redirected to 0x482f1f0 (_vgnU_ifunc_wrapper) +--31983-- REDIR: 0x4c0e020 (libc.so.6:memset) redirected to 0x482f1f0 (_vgnU_ifunc_wrapper) +--31983-- REDIR: 0x4c0d300 (libc.so.6:strncat) redirected to 0x482f1f0 (_vgnU_ifunc_wrapper) +--31983-- REDIR: 0x4c95c20 (libc.so.6:__memmove_chk) redirected to 0x482f1f0 (_vgnU_ifunc_wrapper) +--31983-- REDIR: 0x4c0df50 (libc.so.6:memchr) redirected to 0x482f1f0 (_vgnU_ifunc_wrapper) +--31983-- REDIR: 0x4c0d380 (libc.so.6:strncpy) redirected to 0x482f1f0 (_vgnU_ifunc_wrapper) +--31983-- REDIR: 0x4c0e1e0 (libc.so.6:stpncpy) redirected to 0x482f1f0 (_vgnU_ifunc_wrapper) +--31983-- REDIR: 0x4c0d2d0 (libc.so.6:strnlen) redirected to 0x482f1f0 (_vgnU_ifunc_wrapper) +--31983-- REDIR: 0x4c0d340 (libc.so.6:strncmp) redirected to 0x482f1f0 (_vgnU_ifunc_wrapper) +--31983-- REDIR: 0x4c0d3c0 (libc.so.6:rindex) redirected to 0x482f1f0 (_vgnU_ifunc_wrapper) +--31983-- REDIR: 0x4c0ce00 (libc.so.6:strcspn) redirected to 0x482f1f0 (_vgnU_ifunc_wrapper) +--31983-- REDIR: 0x4c0d400 (libc.so.6:strpbrk) redirected to 0x482f1f0 (_vgnU_ifunc_wrapper) +--31983-- REDIR: 0x4c0d6b0 (libc.so.6:strspn) redirected to 0x482f1f0 (_vgnU_ifunc_wrapper) +--31983-- REDIR: 0x4c0db60 (libc.so.6:strstr) redirected to 0x483e8e0 (strstr) +--31983-- REDIR: 0x4c26f10 (libc.so.6:__GI_strrchr) redirected to 0x4839670 (__GI_strrchr) +--31983-- REDIR: 0x4c273b0 (libc.so.6:__GI_strlen) redirected to 0x4839bb0 (__GI_strlen) +--31983-- REDIR: 0x4c14430 (libc.so.6:__GI_strncmp) redirected to 0x483a2e0 (__GI_strncmp) +--31983-- REDIR: 0x4c26d50 (libc.so.6:__GI_strchr) redirected to 0x4839790 (__GI_strchr) +--31983-- REDIR: 0x4c27160 (libc.so.6:__GI_strcmp) redirected to 0x483ab40 (__GI_strcmp) +--31983-- REDIR: 0x4c09030 (libc.so.6:malloc) redirected to 0x48365d0 (malloc) +--31983-- REDIR: 0x4c26a20 (libc.so.6:__GI_memcpy) redirected to 0x483b890 (__GI_memcpy) +--31983-- REDIR: 0x4c09df0 (libc.so.6:calloc) redirected to 0x4838970 (calloc) +--31983-- REDIR: 0x4c144e0 (libc.so.6:__strlen_sse2_bsf) redirected to 0x4839b90 (strlen) +--31983-- REDIR: 0x4cd1cd0 (libc.so.6:__memcpy_ssse3) redirected to 0x483b4b0 (memcpy) +--31983-- REDIR: 0x4c254c0 (libc.so.6:__strchr_sse2_bsf) redirected to 0x4839820 (index) +--31983-- REDIR: 0x4cd1cc0 (libc.so.6:__memcpy_chk_ssse3) redirected to 0x483e7e0 (__memcpy_chk) +--31983-- REDIR: 0x4cde2b0 (libc.so.6:__strcmp_ssse3) redirected to 0x483aaf0 (strcmp) +--31983-- REDIR: 0x4c09630 (libc.so.6:free) redirected to 0x4837800 (free) +--31983-- REDIR: 0x4c257c0 (libc.so.6:__memchr_sse2_bsf) redirected to 0x483acd0 (memchr) +--31983-- REDIR: 0x4c0fb80 (libc.so.6:strchrnul) redirected to 0x483e280 (strchrnul) +--31983-- REDIR: 0x4c26b10 (libc.so.6:__GI_mempcpy) redirected to 0x483e4a0 (__GI_mempcpy) +--31983-- REDIR: 0x4c1fbd0 (libc.so.6:__strcat_ssse3) redirected to 0x4839850 (strcat) +--31983-- REDIR: 0x4c26590 (libc.so.6:__GI_memchr) redirected to 0x483ad00 (__GI_memchr) +--31983-- REDIR: 0x4ce3620 (libc.so.6:__strncasecmp_ssse3) redirected to 0x483a530 (strncasecmp) +--31983-- REDIR: 0x4c27240 (libc.so.6:__GI_strpbrk) redirected to 0x483eaf0 (strpbrk) +--31983-- REDIR: 0x4c27470 (libc.so.6:__GI_stpcpy) redirected to 0x483d030 (__GI_stpcpy) +--31983-- Reading syms from /usr/lib/i386-linux-gnu/libnss_files-2.31.so +--31983-- Considering /usr/lib/debug/.build-id/0d/43cdeb1dd698c21bebab4455ebde7b5d85bebf.debug .. +--31983-- .. build-id is valid +--31983-- REDIR: 0x4c0e220 (libc.so.6:strcasecmp) redirected to 0x482f1f0 (_vgnU_ifunc_wrapper) +--31983-- REDIR: 0x4c270d0 (libc.so.6:__GI_strcpy) redirected to 0x4839d50 (__GI_strcpy) +--31983-- REDIR: 0x4ce1110 (libc.so.6:__strcasecmp_ssse3) redirected to 0x483a430 (strcasecmp) +--31983-- REDIR: 0x4c26aa0 (libc.so.6:__GI_memmove) redirected to 0x483dad0 (__GI_memmove) +--31983-- Reading syms from /usr/lib/i386-linux-gnu/libnss_mdns4_minimal.so.2 +--31983-- object doesn't have a symbol table +--31983-- Reading syms from /usr/lib/i386-linux-gnu/libresolv-2.31.so +--31983-- Considering /usr/lib/debug/.build-id/ca/f47cd6f5ff926f5e6386471488e12148aee78d.debug .. +--31983-- .. build-id is valid +--31983-- REDIR: 0x4c0fb40 (libc.so.6:rawmemchr) redirected to 0x482f1f0 (_vgnU_ifunc_wrapper) +--31983-- Reading syms from /usr/lib/i386-linux-gnu/libnss_dns-2.31.so +--31983-- Considering /usr/lib/debug/.build-id/a8/37dd2371868244dc863eb11f871c0c3236e50b.debug .. +--31983-- .. build-id is valid +--31983-- REDIR: 0x4c0e060 (libc.so.6:mempcpy) redirected to 0x482f1f0 (_vgnU_ifunc_wrapper) +--31983-- REDIR: 0x4c09880 (libc.so.6:realloc) redirected to 0x4838bd0 (realloc) +--31983-- REDIR: 0x4c26730 (libc.so.6:__GI_memcmp) redirected to 0x483cc80 (__GI_memcmp) +--31983-- REDIR: 0x4cdf720 (libc.so.6:__strncmp_ssse3) redirected to 0x483a270 (strncmp) +--31983-- REDIR: 0x4cd5cb0 (libc.so.6:__memmove_ssse3) redirected to 0x483d770 (memmove) +--31983-- REDIR: 0x4c15e60 (libc.so.6:__strncpy_ssse3) redirected to 0x4839e30 (strncpy) +--31983-- REDIR: 0x4ce8540 (libc.so.6:__memcmp_ssse3) redirected to 0x483ce30 (bcmp) +--31983-- REDIR: 0x4c14610 (libc.so.6:__strcpy_ssse3) redirected to 0x4839c70 (strcpy) +--31983-- REDIR: 0x4ccf630 (libc.so.6:__memset_sse2) redirected to 0x483d680 (memset) +--31983-- Discarding syms at 0x871c300-0x87228c4 in /usr/lib/i386-linux-gnu/libnss_files-2.31.so (have_dinfo 1) +--31983-- Discarding syms at 0x872f210-0x8730684 in /usr/lib/i386-linux-gnu/libnss_mdns4_minimal.so.2 (have_dinfo 1) +--31983-- Discarding syms at 0x874e1c0-0x8750ea4 in /usr/lib/i386-linux-gnu/libnss_dns-2.31.so (have_dinfo 1) +--31983-- Discarding syms at 0x87373b0-0x8743c64 in /usr/lib/i386-linux-gnu/libresolv-2.31.so (have_dinfo 1) +==31983== +==31983== HEAP SUMMARY: +==31983== in use at exit: 543 bytes in 4 blocks +==31983== total heap usage: 15,397 allocs, 15,393 frees, 1,396,018 bytes allocated +==31983== +==31983== Searching for pointers to 4 not-freed blocks +==31983== Checked 220,616 bytes +==31983== +==31983== 1 bytes in 1 blocks are indirectly lost in loss record 1 of 4 +==31983== at 0x483663B: malloc (vg_replace_malloc.c:307) +==31983== by 0x49257E4: xmlStrdup (in /usr/lib/i386-linux-gnu/libxml2.so.2.9.10) +==31983== by 0x488CF5F: xmlCopyError (in /usr/lib/i386-linux-gnu/libxml2.so.2.9.10) +==31983== by 0x488D2F3: __xmlRaiseError (in /usr/lib/i386-linux-gnu/libxml2.so.2.9.10) +==31983== by 0x48E04B5: ??? (in /usr/lib/i386-linux-gnu/libxml2.so.2.9.10) +==31983== by 0x48E4946: ??? (in /usr/lib/i386-linux-gnu/libxml2.so.2.9.10) +==31983== by 0x48E57BC: htmlParseDocument (in /usr/lib/i386-linux-gnu/libxml2.so.2.9.10) +==31983== by 0x48E5CEF: ??? (in /usr/lib/i386-linux-gnu/libxml2.so.2.9.10) +==31983== by 0x109585: parseHtmlDocument (lib.c:5) +==31983== by 0x10AFBE: sc_query_google (api.c:201) +==31983== by 0x10C300: sc_httpd (httpd.c:176) +==31983== by 0x4A39F1B: ??? (in /usr/lib/i386-linux-gnu/libmicrohttpd.so.12.57.0) +==31983== +==31983== 2 bytes in 1 blocks are indirectly lost in loss record 2 of 4 +==31983== at 0x483663B: malloc (vg_replace_malloc.c:307) +==31983== by 0x49257E4: xmlStrdup (in /usr/lib/i386-linux-gnu/libxml2.so.2.9.10) +==31983== by 0x488CF6C: xmlCopyError (in /usr/lib/i386-linux-gnu/libxml2.so.2.9.10) +==31983== by 0x488D2F3: __xmlRaiseError (in /usr/lib/i386-linux-gnu/libxml2.so.2.9.10) +==31983== by 0x48E04B5: ??? (in /usr/lib/i386-linux-gnu/libxml2.so.2.9.10) +==31983== by 0x48E4946: ??? (in /usr/lib/i386-linux-gnu/libxml2.so.2.9.10) +==31983== by 0x48E57BC: htmlParseDocument (in /usr/lib/i386-linux-gnu/libxml2.so.2.9.10) +==31983== by 0x48E5CEF: ??? (in /usr/lib/i386-linux-gnu/libxml2.so.2.9.10) +==31983== by 0x109585: parseHtmlDocument (lib.c:5) +==31983== by 0x10AFBE: sc_query_google (api.c:201) +==31983== by 0x10C300: sc_httpd (httpd.c:176) +==31983== by 0x4A39F1B: ??? (in /usr/lib/i386-linux-gnu/libmicrohttpd.so.12.57.0) +==31983== +==31983== 24 bytes in 1 blocks are indirectly lost in loss record 3 of 4 +==31983== at 0x483663B: malloc (vg_replace_malloc.c:307) +==31983== by 0x49257E4: xmlStrdup (in /usr/lib/i386-linux-gnu/libxml2.so.2.9.10) +==31983== by 0x488CF54: xmlCopyError (in /usr/lib/i386-linux-gnu/libxml2.so.2.9.10) +==31983== by 0x488D2F3: __xmlRaiseError (in /usr/lib/i386-linux-gnu/libxml2.so.2.9.10) +==31983== by 0x48E04B5: ??? (in /usr/lib/i386-linux-gnu/libxml2.so.2.9.10) +==31983== by 0x48E4946: ??? (in /usr/lib/i386-linux-gnu/libxml2.so.2.9.10) +==31983== by 0x48E57BC: htmlParseDocument (in /usr/lib/i386-linux-gnu/libxml2.so.2.9.10) +==31983== by 0x48E5CEF: ??? (in /usr/lib/i386-linux-gnu/libxml2.so.2.9.10) +==31983== by 0x109585: parseHtmlDocument (lib.c:5) +==31983== by 0x10AFBE: sc_query_google (api.c:201) +==31983== by 0x10C300: sc_httpd (httpd.c:176) +==31983== by 0x4A39F1B: ??? (in /usr/lib/i386-linux-gnu/libmicrohttpd.so.12.57.0) +==31983== +==31983== 543 (516 direct, 27 indirect) bytes in 1 blocks are definitely lost in loss record 4 of 4 +==31983== at 0x4838A16: calloc (vg_replace_malloc.c:760) +==31983== by 0x492265E: xmlGetGlobalState (in /usr/lib/i386-linux-gnu/libxml2.so.2.9.10) +==31983== by 0x49217EC: __htmlDefaultSAXHandler (in /usr/lib/i386-linux-gnu/libxml2.so.2.9.10) +==31983== by 0x48E5F36: htmlNewParserCtxt (in /usr/lib/i386-linux-gnu/libxml2.so.2.9.10) +==31983== by 0x10955E: parseHtmlDocument (lib.c:4) +==31983== by 0x10AFBE: sc_query_google (api.c:201) +==31983== by 0x10C300: sc_httpd (httpd.c:176) +==31983== by 0x4A39F1B: ??? (in /usr/lib/i386-linux-gnu/libmicrohttpd.so.12.57.0) +==31983== by 0x4A3BDF6: ??? (in /usr/lib/i386-linux-gnu/libmicrohttpd.so.12.57.0) +==31983== by 0x4A3DF6C: ??? (in /usr/lib/i386-linux-gnu/libmicrohttpd.so.12.57.0) +==31983== by 0x4A415E4: ??? (in /usr/lib/i386-linux-gnu/libmicrohttpd.so.12.57.0) +==31983== by 0x4B6A0B3: start_thread (pthread_create.c:477) +==31983== +==31983== LEAK SUMMARY: +==31983== definitely lost: 516 bytes in 1 blocks +==31983== indirectly lost: 27 bytes in 3 blocks +==31983== possibly lost: 0 bytes in 0 blocks +==31983== still reachable: 0 bytes in 0 blocks +==31983== suppressed: 0 bytes in 0 blocks +==31983== +==31983== ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0) diff --git a/test/bug/prog.c b/test/bug/prog.c new file mode 100644 index 0000000..2371077 --- /dev/null +++ b/test/bug/prog.c @@ -0,0 +1,29 @@ +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <libxml/HTMLparser.h> +#include <string.h> +int main (int argc, char ** argv) { + xmlInitParser(); + htmlDocPtr xmldoc; + char * txtdoc; + struct stat s; + int fd = open(argv[1], O_RDONLY); + htmlParserCtxtPtr c; + stat(argv[1], &s); + txtdoc = mmap(NULL, s.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + xmlInitParser(); + + c = htmlNewParserCtxt(); + xmldoc = htmlCtxtReadMemory(c, txtdoc, strlen(txtdoc), "", NULL, HTML_PARSE_RECOVER); + /* by the way: why/how/when does libxml2 use networking when HTML_PARSE_NOT is not specified? */ + htmlFreeParserCtxt(c); + xmlFreeDoc(xmldoc); + + close(fd); + munmap(txtdoc, s.st_size); + xmlCleanupParser(); + return 0; +} |