diff options
author | sijanec <anton@sijanec.eu> | 2021-05-04 23:20:33 +0200 |
---|---|---|
committer | sijanec <anton@sijanec.eu> | 2021-05-04 23:20:33 +0200 |
commit | 872a765eeebfeea314aae1f3a356f8ac280526aa (patch) | |
tree | f8acd64478f7be1f3bb5efb6ccd2e4d613f9c73c /src/bencoding.c | |
download | travnik-872a765eeebfeea314aae1f3a356f8ac280526aa.tar travnik-872a765eeebfeea314aae1f3a356f8ac280526aa.tar.gz travnik-872a765eeebfeea314aae1f3a356f8ac280526aa.tar.bz2 travnik-872a765eeebfeea314aae1f3a356f8ac280526aa.tar.lz travnik-872a765eeebfeea314aae1f3a356f8ac280526aa.tar.xz travnik-872a765eeebfeea314aae1f3a356f8ac280526aa.tar.zst travnik-872a765eeebfeea314aae1f3a356f8ac280526aa.zip |
Diffstat (limited to 'src/bencoding.c')
-rw-r--r-- | src/bencoding.c | 197 |
1 files changed, 197 insertions, 0 deletions
diff --git a/src/bencoding.c b/src/bencoding.c new file mode 100644 index 0000000..8c32399 --- /dev/null +++ b/src/bencoding.c @@ -0,0 +1,197 @@ +/** + * enum of all possible bencoding types and some options to use + * to check a type, use ORing, not direct comparison, as bdecoded structs inherit opts from bdecode function in their ->types + */ + +enum benc { + string = 1 << 0, + num = 1 << 1, + list = 1 << 2, + dict = 1 << 3, + terminate = 1 << 4 /**< bencoding strings are terminated and you do not need bencoding_string to use them. breaks input str. + note: when out of space, the terminator is placed instead of the last character of the string. **/ +}; + +/** + * structure representation of bencoded data + * the structure does not copy any data, it's assumed that the origin string that was used to create the structure does not change + */ + +struct bencoding { + struct bencoding * next; /**< NULL if element is not member of a list or dict */ + struct bencoding * prev; + struct bencoding * child; /**< NULL if element is not a list or dict or if it has 0 children */ + struct bencoding * parent; + enum benc type; /**< type of this element */ + struct bencoding * key; /**< the key element, string according to the spec, applicable for list and dict */ + char * value; /**< always set to the content of the element, value is not null terminated unless terminate opt is set */ + size_t valuelen; /**< length of string value, as value is not null terminated */ + int intvalue; + int index; + char oldterminator; /**< when opts&terminate, the character that was replaced with \0 is stored here */ + char oldterminatorls; /**< when opts&terminate when there was no more space, replaced character is stored here. + if there'd be enough space, the next one of this one would be replaced. + this is used by bencoding string, as it will repair the original string and restore the last character. */ +}; + +/** + * frees the passed bencoding struct or performs no action if NULL was passed. caller should NULL the pointer to prevent reuse. + */ + +void free_bencoding (struct bencoding * b) { + if (!b) + return; + struct bencoding * s = b; + while (s) /* we free all siblings should they exist */ + free_bencoding(s = s->next); + free_bencoding(b->child); /* we free the child should it exist. it can be NULL. */ + free_bencoding(b->key); /* should this be an element of a dict, free the key */ + free(b); /* we free the element */ + return; +} + +/** + * helper macros for number comparisons + */ + +#define MAX(x, y) ((x) >= (y) ? (x) : (y)) +#define MIN(x, y) ((x) <= (y) ? (x) : (y)) + +/** + * macro that allocas a C string from a bencoding string or other element. non-string elements return their raw bencoded content. + * dereferences structure without checking. + * resulting C string is NULL terminated, cannot contain NULL, DO NOT dereference bytes after the NULL terminator. + * + * @param stru [in] bencoding structure of a bdecoded element + * @param char [out] char * type variable that will contain allocad string. DO NOT ATTEMPT TO FREE; automatic free at return! + */ + +#define bencoding_string(stru, char) \ + char = alloca(stru->valuelen+1); \ + snprintf(char, stru->valuelen+1, "%.*s", stru->valuelen, stru->value); \ + if (stru->oldterminatorls) \ + char[stru->valuelen-1] = char[stru->oldterminatorls]; \ + +/** + * bdecodes a bencoded structure from a string into a bencoding structure that must be free_bencodinged by the caller. + * + * nonstandard things: this parser allows for dict keys to be of any type, valuekey + * + * by default input string is unmodified, unless terminate opt is set. + * + * @param len [in] * if set to -1, string is assumed to be correct and not NULL terminated, NULLs may be in strings. + * - malicious strings may trigger reads past the end of the buffer, which may lead to undefined + * behaviour, crashes (DoS) or leaks of content, stored in memory. + * - if opts&terminate, another character will be written after the bencoded structure in memory if + * that structure is a string. beware and have space allocated for it! + * * if set to -2, string is assumed to be NULL terminated and no further reading will be done after the NULL. + * - if such terminator breaks an incomplete element, the resulting structure may be incomplete, but + * will be correct - for example valuelen of a misterminated string will correctly be shortened. + * * if set to a positive number, reading will only be allowed up to that many characters. + * - if the input string reads the end and the structure is incomplete, same thing as with -2 happens. + * - if the structure ends cleanly (string length satisfied or end of list, dict or num found), + * processing stops, no mather how many characters of len are left. + * @param opts [in] sets options. do not set the type bits here, this is the same enum as the ->type enum of returned struct. + * opts will be reflected in the ->type of the returning struct. opts will apply to childs of lists&dicts too. + */ + +struct bencoding * bdecode (char * s, int len, enum benc opts) { + if (!s || len < -2 || (len >= 0 && len < 2 /* 2 being the smallest bencoding string */)) + return NULL; + if (len == -2) + len = strlen(s); + struct bencoding * b = calloc(1, sizeof(struct bencoding)); /* SEGV if OOM */ + char * c = NULL; + switch (s[0]) { + case 'i': /* num */ + b->type = num; + b->value = s+1; + if (len == -1 || memchr(s, 'e', len)) { /* correct string or end found */ + b->intvalue = strtol(b->value, &c, 10); + b->valuelen = (c-1)-b->value; + } + break; + case 'd': /* dict */ + b->type = dict; + __attribute__((fallthrough)); + case 'l': /* list */ + if (!b->type) + b->type = list; + c = s; + struct bencoding * arbeit = NULL; + struct bencoding * oldarbeit = NULL; + struct bencoding * oldoldarbeit = NULL; /* for dicts, holds previous value */ + int index = 0; + b->value = s+1; + char oldterminator = '\0'; + while (len == -1 || ++c <= s+len) { /* s+len is max we are allowed to read */ + if (opts&terminate && oldarbeit && oldarbeit->oldterminator) + c[0] = oldterminator; + arbeit = bdecode(c, len == -1 ? -1 : len-(c-s), opts); + if (opts&terminate && oldarbeit && oldarbeit->oldterminator) + c[0] = '\0'; + if (!arbeit) /* bdecoding failed or last element */ + break; +#define ISDICT (b->type == dict) +#define ISLIST !ISDICT +#define ISVAL (index % 2 == 1) +#define ISKEY !ISVAL + if (ISDICT && ISVAL) + arbeit->key = oldarbeit; + c = arbeit->value+arbeit->valuelen; /* this is safe, function's vallen should not be in forbidden */ + if (arbeit->type&(num|dict|list) && c <= s+len && c[0] == 'e') /* but vallen+1 may be */ + c++; + c--; /* while cond will inc again */ + arbeit->prev = ISDICT ? ISVAL ? oldoldarbeit : oldarbeit : oldarbeit; + arbeit->index = ISDICT ? index/2 : index; + if (ISLIST) + if (index) + oldarbeit->next = arbeit; + else + b->child = arbeit; + if (ISDICT) + if (index == 1) + b->child = oldarbeit; + else if (ISVAL) + oldoldarbeit->next = arbeit; + oldoldarbeit = oldarbeit; + oldarbeit = arbeit; + index++; + } + b->valuelen = (c-1)-b->value; /* c-1 is the last character in list or last readable character if out of l */ + break; + case 'e': /* end of list/dict */ + free(b); + return NULL; + default: + if (!(s[0] >= '0' && s[0] <= '9')) { /* not a string. not checking this would allow DoS for parsing "lx" */ + free(b); + return NULL; + } + b->type = string; + if (len == -1 || (b->value = memchr(s, ':', len))) { + b->valuelen = strtol(s, NULL, 10); + b->value++; + if (len != -1 && (unsigned)len < b->valuelen + (b->value - s) /* len minus prefix; strlen & colon */) + b->valuelen = len - (b->value - s); /* malformed bencoded data, truncating string */ + } + break; + } + if (opts & terminate) { + if (len != -1 && b->valuelen+1+(b->value-s) < (unsigned) len) { /* no space for terminator, put it on last char */ + b->oldterminatorls = b->value[b->valuelen-1]; + b->value[b->valuelen-1] = '\0'; + } else { + b->oldterminator = b->value[b->valuelen]; + b->value[b->valuelen] = '\0'; + } + } + b->type = b->type | opts; + return b; +} + +/** + * returns a pointer to bencoding struct matching bencoding path or NULL if not found + * + * [xxx] specifies xxxth child of a dict or list. if + */ |