summaryrefslogtreecommitdiffstats
path: root/src/bencoding.c
diff options
context:
space:
mode:
authorsijanec <anton@sijanec.eu>2021-05-04 23:20:33 +0200
committersijanec <anton@sijanec.eu>2021-05-04 23:20:33 +0200
commit872a765eeebfeea314aae1f3a356f8ac280526aa (patch)
treef8acd64478f7be1f3bb5efb6ccd2e4d613f9c73c /src/bencoding.c
downloadtravnik-872a765eeebfeea314aae1f3a356f8ac280526aa.tar
travnik-872a765eeebfeea314aae1f3a356f8ac280526aa.tar.gz
travnik-872a765eeebfeea314aae1f3a356f8ac280526aa.tar.bz2
travnik-872a765eeebfeea314aae1f3a356f8ac280526aa.tar.lz
travnik-872a765eeebfeea314aae1f3a356f8ac280526aa.tar.xz
travnik-872a765eeebfeea314aae1f3a356f8ac280526aa.tar.zst
travnik-872a765eeebfeea314aae1f3a356f8ac280526aa.zip
Diffstat (limited to 'src/bencoding.c')
-rw-r--r--src/bencoding.c197
1 files changed, 197 insertions, 0 deletions
diff --git a/src/bencoding.c b/src/bencoding.c
new file mode 100644
index 0000000..8c32399
--- /dev/null
+++ b/src/bencoding.c
@@ -0,0 +1,197 @@
+/**
+ * enum of all possible bencoding types and some options to use
+ * to check a type, use ORing, not direct comparison, as bdecoded structs inherit opts from bdecode function in their ->types
+ */
+
+enum benc {
+ string = 1 << 0,
+ num = 1 << 1,
+ list = 1 << 2,
+ dict = 1 << 3,
+ terminate = 1 << 4 /**< bencoding strings are terminated and you do not need bencoding_string to use them. breaks input str.
+ note: when out of space, the terminator is placed instead of the last character of the string. **/
+};
+
+/**
+ * structure representation of bencoded data
+ * the structure does not copy any data, it's assumed that the origin string that was used to create the structure does not change
+ */
+
+struct bencoding {
+ struct bencoding * next; /**< NULL if element is not member of a list or dict */
+ struct bencoding * prev;
+ struct bencoding * child; /**< NULL if element is not a list or dict or if it has 0 children */
+ struct bencoding * parent;
+ enum benc type; /**< type of this element */
+ struct bencoding * key; /**< the key element, string according to the spec, applicable for list and dict */
+ char * value; /**< always set to the content of the element, value is not null terminated unless terminate opt is set */
+ size_t valuelen; /**< length of string value, as value is not null terminated */
+ int intvalue;
+ int index;
+ char oldterminator; /**< when opts&terminate, the character that was replaced with \0 is stored here */
+ char oldterminatorls; /**< when opts&terminate when there was no more space, replaced character is stored here.
+ if there'd be enough space, the next one of this one would be replaced.
+ this is used by bencoding string, as it will repair the original string and restore the last character. */
+};
+
+/**
+ * frees the passed bencoding struct or performs no action if NULL was passed. caller should NULL the pointer to prevent reuse.
+ */
+
+void free_bencoding (struct bencoding * b) {
+ if (!b)
+ return;
+ struct bencoding * s = b;
+ while (s) /* we free all siblings should they exist */
+ free_bencoding(s = s->next);
+ free_bencoding(b->child); /* we free the child should it exist. it can be NULL. */
+ free_bencoding(b->key); /* should this be an element of a dict, free the key */
+ free(b); /* we free the element */
+ return;
+}
+
+/**
+ * helper macros for number comparisons
+ */
+
+#define MAX(x, y) ((x) >= (y) ? (x) : (y))
+#define MIN(x, y) ((x) <= (y) ? (x) : (y))
+
+/**
+ * macro that allocas a C string from a bencoding string or other element. non-string elements return their raw bencoded content.
+ * dereferences structure without checking.
+ * resulting C string is NULL terminated, cannot contain NULL, DO NOT dereference bytes after the NULL terminator.
+ *
+ * @param stru [in] bencoding structure of a bdecoded element
+ * @param char [out] char * type variable that will contain allocad string. DO NOT ATTEMPT TO FREE; automatic free at return!
+ */
+
+#define bencoding_string(stru, char) \
+ char = alloca(stru->valuelen+1); \
+ snprintf(char, stru->valuelen+1, "%.*s", stru->valuelen, stru->value); \
+ if (stru->oldterminatorls) \
+ char[stru->valuelen-1] = char[stru->oldterminatorls]; \
+
+/**
+ * bdecodes a bencoded structure from a string into a bencoding structure that must be free_bencodinged by the caller.
+ *
+ * nonstandard things: this parser allows for dict keys to be of any type, valuekey
+ *
+ * by default input string is unmodified, unless terminate opt is set.
+ *
+ * @param len [in] * if set to -1, string is assumed to be correct and not NULL terminated, NULLs may be in strings.
+ * - malicious strings may trigger reads past the end of the buffer, which may lead to undefined
+ * behaviour, crashes (DoS) or leaks of content, stored in memory.
+ * - if opts&terminate, another character will be written after the bencoded structure in memory if
+ * that structure is a string. beware and have space allocated for it!
+ * * if set to -2, string is assumed to be NULL terminated and no further reading will be done after the NULL.
+ * - if such terminator breaks an incomplete element, the resulting structure may be incomplete, but
+ * will be correct - for example valuelen of a misterminated string will correctly be shortened.
+ * * if set to a positive number, reading will only be allowed up to that many characters.
+ * - if the input string reads the end and the structure is incomplete, same thing as with -2 happens.
+ * - if the structure ends cleanly (string length satisfied or end of list, dict or num found),
+ * processing stops, no mather how many characters of len are left.
+ * @param opts [in] sets options. do not set the type bits here, this is the same enum as the ->type enum of returned struct.
+ * opts will be reflected in the ->type of the returning struct. opts will apply to childs of lists&dicts too.
+ */
+
+struct bencoding * bdecode (char * s, int len, enum benc opts) {
+ if (!s || len < -2 || (len >= 0 && len < 2 /* 2 being the smallest bencoding string */))
+ return NULL;
+ if (len == -2)
+ len = strlen(s);
+ struct bencoding * b = calloc(1, sizeof(struct bencoding)); /* SEGV if OOM */
+ char * c = NULL;
+ switch (s[0]) {
+ case 'i': /* num */
+ b->type = num;
+ b->value = s+1;
+ if (len == -1 || memchr(s, 'e', len)) { /* correct string or end found */
+ b->intvalue = strtol(b->value, &c, 10);
+ b->valuelen = (c-1)-b->value;
+ }
+ break;
+ case 'd': /* dict */
+ b->type = dict;
+ __attribute__((fallthrough));
+ case 'l': /* list */
+ if (!b->type)
+ b->type = list;
+ c = s;
+ struct bencoding * arbeit = NULL;
+ struct bencoding * oldarbeit = NULL;
+ struct bencoding * oldoldarbeit = NULL; /* for dicts, holds previous value */
+ int index = 0;
+ b->value = s+1;
+ char oldterminator = '\0';
+ while (len == -1 || ++c <= s+len) { /* s+len is max we are allowed to read */
+ if (opts&terminate && oldarbeit && oldarbeit->oldterminator)
+ c[0] = oldterminator;
+ arbeit = bdecode(c, len == -1 ? -1 : len-(c-s), opts);
+ if (opts&terminate && oldarbeit && oldarbeit->oldterminator)
+ c[0] = '\0';
+ if (!arbeit) /* bdecoding failed or last element */
+ break;
+#define ISDICT (b->type == dict)
+#define ISLIST !ISDICT
+#define ISVAL (index % 2 == 1)
+#define ISKEY !ISVAL
+ if (ISDICT && ISVAL)
+ arbeit->key = oldarbeit;
+ c = arbeit->value+arbeit->valuelen; /* this is safe, function's vallen should not be in forbidden */
+ if (arbeit->type&(num|dict|list) && c <= s+len && c[0] == 'e') /* but vallen+1 may be */
+ c++;
+ c--; /* while cond will inc again */
+ arbeit->prev = ISDICT ? ISVAL ? oldoldarbeit : oldarbeit : oldarbeit;
+ arbeit->index = ISDICT ? index/2 : index;
+ if (ISLIST)
+ if (index)
+ oldarbeit->next = arbeit;
+ else
+ b->child = arbeit;
+ if (ISDICT)
+ if (index == 1)
+ b->child = oldarbeit;
+ else if (ISVAL)
+ oldoldarbeit->next = arbeit;
+ oldoldarbeit = oldarbeit;
+ oldarbeit = arbeit;
+ index++;
+ }
+ b->valuelen = (c-1)-b->value; /* c-1 is the last character in list or last readable character if out of l */
+ break;
+ case 'e': /* end of list/dict */
+ free(b);
+ return NULL;
+ default:
+ if (!(s[0] >= '0' && s[0] <= '9')) { /* not a string. not checking this would allow DoS for parsing "lx" */
+ free(b);
+ return NULL;
+ }
+ b->type = string;
+ if (len == -1 || (b->value = memchr(s, ':', len))) {
+ b->valuelen = strtol(s, NULL, 10);
+ b->value++;
+ if (len != -1 && (unsigned)len < b->valuelen + (b->value - s) /* len minus prefix; strlen & colon */)
+ b->valuelen = len - (b->value - s); /* malformed bencoded data, truncating string */
+ }
+ break;
+ }
+ if (opts & terminate) {
+ if (len != -1 && b->valuelen+1+(b->value-s) < (unsigned) len) { /* no space for terminator, put it on last char */
+ b->oldterminatorls = b->value[b->valuelen-1];
+ b->value[b->valuelen-1] = '\0';
+ } else {
+ b->oldterminator = b->value[b->valuelen];
+ b->value[b->valuelen] = '\0';
+ }
+ }
+ b->type = b->type | opts;
+ return b;
+}
+
+/**
+ * returns a pointer to bencoding struct matching bencoding path or NULL if not found
+ *
+ * [xxx] specifies xxxth child of a dict or list. if
+ */