diff options
author | Anton Luka Šijanec <anton@sijanec.eu> | 2022-11-21 20:11:12 +0100 |
---|---|---|
committer | Anton Luka Šijanec <anton@sijanec.eu> | 2022-11-21 20:11:12 +0100 |
commit | 8e1cbd8f7bd5deb3310be4f4f65f7aa6ea9a9beb (patch) | |
tree | 27508c3ffa05f5934bd7af60c34736d89e0e5954 /src/bencoding.c | |
parent | initial commit, UNTESTED bencoding parser (diff) | |
download | travnik-8e1cbd8f7bd5deb3310be4f4f65f7aa6ea9a9beb.tar travnik-8e1cbd8f7bd5deb3310be4f4f65f7aa6ea9a9beb.tar.gz travnik-8e1cbd8f7bd5deb3310be4f4f65f7aa6ea9a9beb.tar.bz2 travnik-8e1cbd8f7bd5deb3310be4f4f65f7aa6ea9a9beb.tar.lz travnik-8e1cbd8f7bd5deb3310be4f4f65f7aa6ea9a9beb.tar.xz travnik-8e1cbd8f7bd5deb3310be4f4f65f7aa6ea9a9beb.tar.zst travnik-8e1cbd8f7bd5deb3310be4f4f65f7aa6ea9a9beb.zip |
Diffstat (limited to 'src/bencoding.c')
-rw-r--r-- | src/bencoding.c | 259 |
1 files changed, 228 insertions, 31 deletions
diff --git a/src/bencoding.c b/src/bencoding.c index 8c32399..d1324c7 100644 --- a/src/bencoding.c +++ b/src/bencoding.c @@ -21,11 +21,10 @@ struct bencoding { struct bencoding * next; /**< NULL if element is not member of a list or dict */ struct bencoding * prev; struct bencoding * child; /**< NULL if element is not a list or dict or if it has 0 children */ - struct bencoding * parent; - enum benc type; /**< type of this element */ - struct bencoding * key; /**< the key element, string according to the spec, applicable for list and dict */ - char * value; /**< always set to the content of the element, value is not null terminated unless terminate opt is set */ - size_t valuelen; /**< length of string value, as value is not null terminated */ + enum benc type; /**< type | opts of this element */ + struct bencoding * key; /**< the key element, string according to the spec, applicable for dict */ + char * value; /**< set to the content of the element, value is not null terminated unless terminate opt is set. NULL for dict and list. */ + size_t valuelen; /**< length of string value, as value is not null terminated, internal value for list or dict. */ int intvalue; int index; char oldterminator; /**< when opts&terminate, the character that was replaced with \0 is stored here */ @@ -41,12 +40,10 @@ struct bencoding { void free_bencoding (struct bencoding * b) { if (!b) return; - struct bencoding * s = b; - while (s) /* we free all siblings should they exist */ - free_bencoding(s = s->next); free_bencoding(b->child); /* we free the child should it exist. it can be NULL. */ free_bencoding(b->key); /* should this be an element of a dict, free the key */ - free(b); /* we free the element */ + free_bencoding(b->next); + free(b); return; } @@ -58,6 +55,201 @@ void free_bencoding (struct bencoding * b) { #define MIN(x, y) ((x) <= (y) ? (x) : (y)) /** + * return how much space a character in a string uses + * + * @param a [in] the character in question + */ + +int b2json_charsize (char a) { + if (a == '"') + return 2; + if (a == '\\') + return 2; + if (a == '\b') + return 2; + if (a == '\f') + return 2; + if (a == '\n') + return 2; + if (a == '\r') + return 2; + if (a == '\t') + return 2; + if (a < ' ') + return 6; + return 1; +} + +/** + * write a string representation of a character in a JSON string + * + * @param dest [out] destination + * @param a [in] the character in question + * @return the destination pointer, incremented for the number of bytes written + */ + +char * b2json_charrepr (char * dest, char a) { + switch (a) { +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wstringop-truncation" + case '"': + strncpy(dest, "\\\"", 2); + return dest+2; + case '\\': + strncpy(dest, "\\\\", 2); + return dest+2; + case '\b': + strncpy(dest, "\\b", 2); + return dest+2; + case '\f': + strncpy(dest, "\\f", 2); + return dest+2; + case '\n': + strncpy(dest, "\\n", 2); + return dest+2; + case '\r': + strncpy(dest, "\\r", 2); + return dest+2; + case '\t': + strncpy(dest, "\\t", 2); + return dest+2; + default: + if (a < ' ') { + char buf[7]; + sprintf(buf, "\\u00%02x", a); + strncpy(dest, buf, 6); + return dest+6; + } else { + *dest++ = a; + return dest; + } +#pragma GCC diagnostic pop + } +} + + +/** + * get size required for JSON representation of a bencoding struct. terminating NULL byte is not counted, because b2json does not write it. write it yourself. + * + * @param b [in] bencoding structure of a bdecoded element + */ + +int b2json_length (struct bencoding * b) { + if (!b) + return 4; + if (b->type & string) { + int size = 2; + if (b->oldterminatorls) + size += b2json_charsize(b->oldterminatorls) - b2json_charsize('\0'); + for (size_t i = 0; i < b->valuelen; i++) + size += b2json_charsize(b->value[i]); + return size; + } + if (b->type & num) { + char buf[512]; + sprintf(buf, "%d", b->intvalue); + return strlen(buf); + } + if (b->type & list) { + if (!b->child) + return 2; + struct bencoding * t = b->child; + int size = 2 + b2json_length(t); + while (t->next) { + t = t->next; + size += b2json_length(t) + 1; + } + return size; + } + if (b->type & dict) { + if (!b->child) + return 2; + struct bencoding * t = b->child; + int size = 3 + b2json_length(t) + b2json_length(t->key); + while (t->next) { + t = t->next; + size += 1 + b2json_length(t) + 1 + b2json_length(t->key); + } + return size; + } + return 5; +} + +/** + * write json representation of a bencoding struct. does not write terminating nullbyte, b2json_length does not include it in count. add it yourself. should write exactly b2json_length bytes. + * + * writes false when struct has an incorrect type and null when NULL pointer is passed, this is in ordnung with b2json_length. + * + * @param dest [in] destination + * @param b [in] bencoding structure of a bdecoded element + * @return the destination pointer, incremented for the number of bytes written + */ + +char * b2json (char * dest, struct bencoding * b) { +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wstringop-truncation" + if (!b) { + strncpy(dest, "null", 4); + return dest+4; + } + if (b->type & string) { + *dest++ = '"'; + for (size_t i = 0; i < b->valuelen; i++) + if (i == b->valuelen-1 && b->oldterminatorls) + dest = b2json_charrepr(dest, b->oldterminatorls); + else + dest = b2json_charrepr(dest, b->value[i]); + *dest++ = '"'; + return dest; + } + if (b->type & num) { + char buf[512]; + sprintf(buf, "%d", b->intvalue); + strncpy(dest, buf, strlen(buf)); + return dest+strlen(buf); + } + if (b->type & list) { + if (!b->child) { + strncpy(dest, "[]", 2); + return dest+2; + } + struct bencoding * t = b->child; + *dest++ = '['; + dest = b2json(dest, t); + while (t->next) { + t = t->next; + *dest++ = ','; + dest = b2json(dest, t); + } + *dest++ = ']'; + return dest; + } + if (b->type & dict) { + if (!b->child) { + strncpy(dest, "{}", 2); + return dest+2; + } + *dest++ = '{'; + struct bencoding * t = b->child; + dest = b2json(dest, t->key); + *dest++ = ':'; + dest = b2json(dest, t); + while (t->next) { + t = t->next; + *dest++ = ','; + dest = b2json(dest, t->key); + *dest++ = ':'; + dest = b2json(dest, t); + } + *dest++ = '}'; + return dest; + } + strncpy(dest, "false", 4); + return dest+4; +#pragma GCC diagnostic pop +} + +/** * macro that allocas a C string from a bencoding string or other element. non-string elements return their raw bencoded content. * dereferences structure without checking. * resulting C string is NULL terminated, cannot contain NULL, DO NOT dereference bytes after the NULL terminator. @@ -108,7 +300,7 @@ struct bencoding * bdecode (char * s, int len, enum benc opts) { b->value = s+1; if (len == -1 || memchr(s, 'e', len)) { /* correct string or end found */ b->intvalue = strtol(b->value, &c, 10); - b->valuelen = (c-1)-b->value; + b->valuelen = c-b->value; } break; case 'd': /* dict */ @@ -117,49 +309,54 @@ struct bencoding * bdecode (char * s, int len, enum benc opts) { case 'l': /* list */ if (!b->type) b->type = list; - c = s; + c = s+1; struct bencoding * arbeit = NULL; struct bencoding * oldarbeit = NULL; struct bencoding * oldoldarbeit = NULL; /* for dicts, holds previous value */ int index = 0; - b->value = s+1; - char oldterminator = '\0'; - while (len == -1 || ++c <= s+len) { /* s+len is max we are allowed to read */ - if (opts&terminate && oldarbeit && oldarbeit->oldterminator) - c[0] = oldterminator; + while (len == -1 || c <= s+len) { /* s+len is max we are allowed to read */ + if (oldarbeit && oldarbeit->type & string && oldarbeit->type & terminate && oldarbeit->oldterminator) + c[0] = oldarbeit->oldterminator; arbeit = bdecode(c, len == -1 ? -1 : len-(c-s), opts); - if (opts&terminate && oldarbeit && oldarbeit->oldterminator) + if (oldarbeit && oldarbeit->type & string && oldarbeit->type & terminate && oldarbeit->oldterminator) c[0] = '\0'; if (!arbeit) /* bdecoding failed or last element */ break; -#define ISDICT (b->type == dict) +#define ISDICT (b->type & dict) #define ISLIST !ISDICT -#define ISVAL (index % 2 == 1) +#define ISVAL (index % 2) #define ISKEY !ISVAL if (ISDICT && ISVAL) arbeit->key = oldarbeit; - c = arbeit->value+arbeit->valuelen; /* this is safe, function's vallen should not be in forbidden */ - if (arbeit->type&(num|dict|list) && c <= s+len && c[0] == 'e') /* but vallen+1 may be */ - c++; - c--; /* while cond will inc again */ + if (arbeit->type & num) + c = arbeit->value+arbeit->valuelen+1; + else if (arbeit->type & string) + c = arbeit->value+arbeit->valuelen; + else if (arbeit->type & (list | dict)) + c += arbeit->valuelen; arbeit->prev = ISDICT ? ISVAL ? oldoldarbeit : oldarbeit : oldarbeit; arbeit->index = ISDICT ? index/2 : index; - if (ISLIST) + if (ISLIST) { if (index) oldarbeit->next = arbeit; else b->child = arbeit; - if (ISDICT) + } + if (ISDICT) { if (index == 1) - b->child = oldarbeit; + b->child = arbeit; else if (ISVAL) oldoldarbeit->next = arbeit; + } oldoldarbeit = oldarbeit; oldarbeit = arbeit; index++; } - b->valuelen = (c-1)-b->value; /* c-1 is the last character in list or last readable character if out of l */ - break; + b->valuelen = c-s + 1; + b->type = b->type | opts; + if (ISDICT && ISVAL) // e je torej value, če je prej samoten key + free_bencoding(oldarbeit); // this key would be otherwise leaked + return b; case 'e': /* end of list/dict */ free(b); return NULL; @@ -170,15 +367,15 @@ struct bencoding * bdecode (char * s, int len, enum benc opts) { } b->type = string; if (len == -1 || (b->value = memchr(s, ':', len))) { - b->valuelen = strtol(s, NULL, 10); - b->value++; + b->valuelen = strtol(s, &c, 10); + b->value = c+1; if (len != -1 && (unsigned)len < b->valuelen + (b->value - s) /* len minus prefix; strlen & colon */) b->valuelen = len - (b->value - s); /* malformed bencoded data, truncating string */ } break; } if (opts & terminate) { - if (len != -1 && b->valuelen+1+(b->value-s) < (unsigned) len) { /* no space for terminator, put it on last char */ + if (len != -1 && b->valuelen+1+(b->value-s) > (unsigned) len) { /* no space for terminator, put it on last char */ b->oldterminatorls = b->value[b->valuelen-1]; b->value[b->valuelen-1] = '\0'; } else { |