diff options
-rw-r--r-- | src/dht.c | 603 |
1 files changed, 466 insertions, 137 deletions
@@ -8,6 +8,8 @@ #include <string.h> #include <arpa/inet.h> #include <netinet/ip.h> +#include <limits.h> +#include <search.h> #define ECB 1 #define AES128 1 #include <aes.c> @@ -31,12 +33,24 @@ int family (struct in6_addr addr) { struct node { char id[20]; - struct sockaddr_in6 * addr; + struct sockaddr_in6 addr; int unanswered; /**< number of packets I've sent since last_received */ time_t last_received; /**< time when I received the last packet from it */ time_t last_sent; /**< time when I sent the last packet to it */ struct node * next; - struct dht * dht; /**< a reference to the library handle */ +}; + +/** + * creates a new node + */ + +struct node * node_init () { + struct node * n = calloc(1, sizeof *n); + if (!n) + return NULL; + n->last_received = seconds(); + n->last_sent = seconds(); + return n; } /** @@ -46,7 +60,6 @@ struct node { */ void node_free (struct node * n) { - free(n->addr); free(n); } @@ -58,8 +71,17 @@ struct bucket { char id[20]; /**< bucket spans from id inclusive to next->id exclusive */ struct node * nodes; struct bucket * next; - struct dht * dht; /**< a reference to the library handle */ - int count; /**< number of nodes in this bucket */ +}; + +/** + * creates a new bucket + */ + +struct bucket * bucket_init () { + struct bucket * b = calloc(1, sizeof *b); + if (!b) + return NULL; + return b; } /** @@ -79,6 +101,75 @@ void bucket_free (struct bucket * b) { } /** + * peer of a torrent + */ + +struct peer { + struct sockaddr_in6 addr; /**< peer ip address and port */ + struct peer * next; +}; + +/** + * free a torrent peer + */ + +void peer_free (struct peer * p) { + free(p); +} + +/** + * reason why a torrent is in our database. 0 just means it is stored as a result of an announce + */ + +enum torrent { + announce = 1 << 0, /**< will announce myself on every work() call with no packet */ + peers = 1 << 1, /**< will get peers on every work() call with no packet */ + info = 1 << 2, /**< download metadata into `dht->dl`/$hash.info */ + dl = 1 << 3 /**< download torrent content into `dht->dl`/$hash.blob TODO */ +}; + +/** + * torrent we are interested in + */ + +struct torrent { + enum torrent type; + char hash[20]; /**< infohash */ + struct peer * peers; + time_t last; /**< last operation on this torrent, so that inactive torrents are purged */ + struct node * nodes; /**< closest DHT nodes to this hash */ + char pieces[]; /**< when checking a torrent, this is filled. every piece is one bit. TODO */ +}; + +/** + * free a torrent object and it's nodes and peers + */ + +void torrent_free (struct torrent * t) { + struct node * n = t->nodes; + while (n) { + struct node * old = n; + n = n->next; + node_free(n); + } + struct peer * p = t->peers; + while (p) { + struct peer * old = p; + p = p->next; + peer_free(old); + } + free(t); +} + +/** + * compares two torrent objects based on their hash + */ + +int torrent_compare (const void * a, const void * b) { + return memcmp(((const struct torrent *) a)->hash, ((const struct torrent *) b)->hash, 20); +} + +/** * handle for the library */ @@ -88,24 +179,28 @@ struct dht { char secret[16]; /**< for calculating opaque write token, random */ FILE * log; /**< FILE to log to, defaults to stderr */ struct bucket * buckets; - struct bucket * buckets6: /**< IPv6 routing table */ + struct bucket * buckets6; /**< IPv6 routing table */ + struct torrent ** torrents; /**< binary tree of torrents for which we want to know peers */ + int dl; /**< dirfd storage directory for download and info torrents */ }; /** - * creates a handle. you can override id and log in the result struct. + * creates a handle. you can override log in the result struct. * * this function does not log, as log fd is not known yet * * socket must be close()d before being overriden, if the caller wants to use custom binding. * * binds UDP to all ifaces + * + * @param c [in] bencoding object containing the persistent config file, generated from a previous run with persistent(). memory ownership is NOT transfered, it's the caller's responsibility to free the object. can be NULL. */ -struct dht * dht_init (void) { +struct dht * dht_init (const struct bencoding * c) { struct dht * d = calloc(1, sizeof *d); d->log = stderr; - d->buckets = calloc(1, sizeof(struct bucket)); - d->buckets->dht = d; + d->dl = -1; + d->buckets = bucket_init(); errno = 0; if (!d) return NULL; @@ -122,6 +217,18 @@ struct dht * dht_init (void) { }; if (bind(d->socket, &a, sizeof a) == -1) goto e; + if (c) { + const struct bencoding * id = bpath(c, "id"); + if (id & id->type & string && id->valuelen == 20) + memcpy(d->id, id->value, 20); + bforeach (bpath(c, "bootstrap_nodes"), str) { + struct sockaddr_in6 addr; + char remote[INET6_ADDRSTRLEN + 7]; + strncpy(remote, str->value, str->valuelen); + if (inet_pton(AF_INET6, remote, &addr) == 1) + find_node(d, &addr, d->id); // NOTE02 + } + } return d; e: free(d); @@ -130,7 +237,6 @@ struct dht * dht_init (void) { /** * frees a handle. does nothing if handle is NULL. does not fclose log. closes socket. please set socket to -1 before calling if you don't want to close it. - * TODO: sends all my knowledge as a burst of UDP packets to nodes in my bucket. */ #define L(o, f, ...) do {char t[512]; time_t n = time(NULL); strftime(t, 512, "%c", localtime(&n)); fprintf(o, "[%s] %s()%s:%d: " f "\n", t, __func__, __FILE__, __LINE__ __VA_OPT__(,) __VA_ARGS__)} while (0) @@ -149,6 +255,23 @@ void dht_free (struct dht * d) { } /** + * generate a bencoding struct containing the persistent storage config + * + * this config contains: + * - nodes from the routing table that can be used to bootstrap into the dht net + * - this node's id + * + * please make sure that you do not start two nodes from the same config + * + * @param d [in] library handle + * @return bencoding object, whose memory ownership is transfered to the caller, which must call bencoding_free() on it. + */ + +void persistent (const struct dht * d) { + +} + +/** * generates a 16 byte token for allowing a node to store it's IP addres in our node. verify with valid() * * @param d [in] used to obtain the secret key @@ -273,7 +396,7 @@ struct node * find (const char * id, struct bucket ** b, struct ** n) { * * if the node is new, it's added in a bucket. * - * if the node is found in a bucket, it's last received time and window are updated + * if the node is found in a bucket, it's last received time and unanswered are updated * * @param d [in] handle * @param id [in] node id that was received @@ -281,49 +404,360 @@ struct node * find (const char * id, struct bucket ** b, struct ** n) { */ void replied (const struct dht * d, const char * id, const struct sockaddr_in6 * addr) { - + struct bucket * b = d->buckets; + struct node * n; + struct node * found = find(id, &b, &n); + if (found) { + found->last_received = seconds(); + found->unanswered = 0; + } + +} + +/** + * ping a node by sending a get_peers + * + * instead of sending a ping query, we send a find_node query. this gets us useful information of peers around our ID instead of just a blank ping reply. infolgedessen we don't have to actively search for our neighbour nodes, since we'll get them through pings anyways + * + * @param d [in] library handle + * @param n [in] node to ping + */ + +void ping_node (struct dht * d, const struct node * n) { + find_node(d, &n->addr, d->id); // see NOTE02 } /** - * sends a find_node query to a "raw node", a tuple if id and ip + * sends a find_node query to a "raw node" * * @param d [in] handle - * @param id [in] id of remote node to which we are sending the query * @param a [in] address of the remote node * @param query [in] 20 byte id we are querying */ -void find_node (const struct dht * d, const char * id, const struct sockaddr_in6 * a, const char * query) { - +void find_node (const struct dht * d, const struct sockaddr_in6 * a, const char * query) { + struct bencoding * b = calloc(1, sizeof *b); + b->type = dict; + struct bencoding * t = bstr(strdup("t@_a.si")); + t->key = bstr(strdup("t")); + t->value[2] = '4'; + t->type = string; + binsert(b, t); + struct bencoding * y = bstr(strdup("q")); + t->key = bstr(strdup("y")); + t->type = string; + binsert(b, y); + struct bencoding * q = bstr(strdup("find_node")); + q->key = bstr(strdup("q")); + q->type = string; + binsert(b, q); + struct bencoding * a = calloc(1, sizeof *a); + a->type = dict; + struct bencoding * id = calloc(1, sizeof *id); + id->type = string; + id->valuelen = 20; + id->key = bstr(strdup("key")); + memcpy((id->value = malloc(20)), d->id, 20); + binsert(a, id); + struct bencoding * target = calloc(1, sizeof *target); + target->key = bstr(strdup("target")); + target->type = string; + target->valuelen = 20; + memcpy((id->value = malloc(20)), query, 20); + binsert(a, id); + binsert(b, a); + sendb(d, b, a); + free_bencoding(b); +} + +/** + * returns a count of nodes in a linked list + * + * @param n [in] first node in ll + */ + +int node_count (const struct node * n) { + int c = 0; + while (n) { + c++; + n = n->next; + } + return c; +} + +/** + * returns the distance between two ids. if it cannot be represented with an unsigned int, UINT_MAX is returned. + * + * TODO: test util + */ + +unsigned int distance (const char * a, const char * b) { + char xor[20]; + memcpy(xor, a); + for (int i = 0; i < 20; i++) { + xor[i] ^= b[i]; + if (i < 20-sizeof unsigned && xor[i]) + return UINT_MAX; + } + unsigned r = 0; + for (int i = 0; i < sizeof unsigned; i++) + r |= xor[i] << (sizeof unsigned - 1 - i) * 8; +} + +enum node_grade { + good, + bad, + questionable +}; + +/** + * determines if node is considered good, bad or questionable + * + * @param n [in] node + * @return enum node_grade + */ + +enum node_grade node_grade (const struct node * n) { + if (node->last_received + 15*60 < seconds()) { + if (node->last_sent + 14*60 < seconds() && node->unanswered > 1) + return bad; + return questionable; + } + return good; } /** - * when we are sure about association nodeid<->ipaddress and we are unsure if the node is already in the routing table, we call this function, which makes a query to this node if it's a candidate for filling the routing table. this doesn't yet add it to the routing table, because we are unsure if it's a good node / can respond to queries. replied() is called if a node replied to our query. + * returns 1 if bucket is perfect, meaning it is fresh, has K nodes, and all nodes are good. bucket that contains id is almost never perfect, as it can usually be split into smaller buckets, that's why param d is required to get own id + * + * if d is NULL, it's not checked whether we fall into the bucket and whether it could be split + * + * @param d [in] library handle to get own id + * @param b [in] the bucket + */ + +int bucket_good (const struct dht * d, const struct bucket * b) { + if (d) { + if (!bucket_good(NULL, b)) + return 0; + if (in_bucket(d->id, b)) { + struct node * n = b->nodes; + while (n->next) { + if (distance(n->id, n->next->id) != 1) + return 0; + } + return 1; + } + return 1; + } else { + if (node_count(b->nodes) < K) + return 0; + struct node * n = b->nodes; + if (n) { + if (node_grade(n) != good) + return 0; + n = n->next; + } + } +} + +/** + * when we are sure that a node exists on a specific ip:port and we are unsure if the node is already in the routing table, we call this function, which makes a query to this node if it's a candidate for filling the routing table. this doesn't yet add it to the routing table, because we are unsure if it's a good node / can respond to queries. replied() is called if a node replied to our query. * * @param d [in] library handle - * @param id [in] 20 byte node id * @param a [in] pointer to sockaddr of the node */ -void potential_node (const struct dht * d, const char * id, const struct sockaddr_in6 * a) { +void potential_node (const struct dht * d, const struct sockaddr_in6 * a) { struct bucket * bucket = d->buckets; if (family(a->sin6_addr) == AF_INET6) bucket = b->buckets6; find(rid->value, &bucket, NULL); - if (bucket->count <= K || in_bucket(d->id, bucket)) { - char random[20]; - if (getrandom(random, 20, GRND_NONBLOCK) == -1) - return; - find_node(d, id, a, random); + if (!bucket_good(bucket)); + find_node(d, a, d->id); // NOTE02: an alternative to searching for our +} // ID would be to search for a random ID + +/** + * adds a torrent to a list of torrents + * + * if the torrent already exists in the database flags of this one will be anded with the flags of the old one, meaning this function can be used to set keep, announce, info and dl flags. see @return for important details. + * + * @param d [in] dht library handler + * @param t [in] torrent object, whose memory ownership is transfered to the library and must be heap allocated + * @return the new pointer to the torrent. if the torrent is already in the storage, the passed torrent will be freed, so the return value must be checked if you intend to use the torrent weiter + */ + +struct torrent * add_torrent (struct dht * d, struct torrent * t) { + return *(struct torrent **) tsearch(t, &d->torrents, torrent_compare); +} + +/** + * deletes a torrent from storage. if you downloaded a torrent and set keep/announce flags, do not remove_torrent once you're done with it, but instead just clear keep/announce bits. this will remove the torrent when necessary. + * + * @param d [in] the library handle + * @param t [in] the pointer to the torrent to be deleted. do not craft torrent yourself, it must be stored in dht and that specific instance must be passed + */ + +void remove_torrent (struct dht * d, struct torrent * t) { + tdelete(t, &d->dht->torrents, torrent_compare); + free(t); +} + +/** + * find a torrent based on hash + * + * @param d [in] the library handle + * @param h [in] a 20 byte infohash + */ + +struct torrent * find_torrent (struct dht * d, const char * h) { + struct torrent t; + memcpy(t.hash, h, 20); + const struct torrent ** t = tfind(&t, &d->torrents, torrent_compare); + if (t) + return *t; +} + +/** + * handles an incoming packet + * + * @param d [in] library handle + * @param pkt [in] incoming UDP packet content + * @param len [in] length of the packet + * @param addr [in] IP address and port of sender + */ + +void handle (struct dht * d, char * pkt, int len, struct sockaddr_in6 addr) { + struct bdecoding * b = bdecode(pkt, len, replace); + struct bdecoding * v = bpath(b, "v"); + char * node_ver = ""; + char remote[INET_ADDRSTRLEN + INET6_ADDRSTRLEN + 7 + (v && v->type & string) ? v->valuelen : 0]; + if (!inet_ntop(addr.sa_family, &addr, remote, sizeof addr)) + snprintf(remote, sizeof remote, "(inet_ntop: %s)", strerror(errno)); + sprintf(remote+strlen(remote), ":%d", ntohs(addr.sin6_port)); + } + if (v && v->type & string) { + node_ver = v->value; + sprintf(remote+strlen(remote), "-%s", node_ver); + } + struct bdecoding * y = bpath(b, "y"); + char * msg_type = ""; + if (y && y->type & string) + msg_type = y->value; + switch (msg_type[0]) { + case 'Q': + case 'q': + struct bencoding * q = bpath(b, "q"); + char * qtype = ""; + if (q && q->type & string) + qtype = q->value; + switch (qtype[0]) { + case 'P': // ping + case 'p': + struct bencoding * rid = bpath(bpath(b, "a"), "id"); + if (rid && rid->type & string && rid->valuelen == 20) { + potential_node(d, rid->value, &addr); + } + else { // see NOTE01 + int len = b2json_length(b); + char j[len+1]; + b2json(j, b); + j[len] = '\0'; + L("%s did not send a valid id in %s", remote, j); + } + struct bencoding * id = calloc(1, sizeof *d); + id->type = dict; + id->key = bstr(strdup("id")); + id->valuelen = 20; + id->value = malloc(20); + memcpy(id->value, d->id, 20); + struct bencoding * r = calloc(1, sizeof *d); + r->type = dict; + r->key = bstr(strdup("r")); + binsert(r, id); + struct bencoding * y = bstr(strdup("r")); + y->key = bstr(strdup("y")); + struct bencoding * response = calloc(1, sizeof *r); + response->type = dict; + binsert(response, y); + binsert(response, r); + binsert(response, bclone(bpath(b, "t"))); + sendb(d, response, &addr, addrlen); + free_bencoding(response); + break; + default: // see NOTE01 + int len = b2json_length(b); + char json[len+1]; + b2json(json, b); + json[len] = '\0'; + L(dht->log, "%s sent an unknown query type: %s"); + break; + } + break; + case 'R': + case 'r': + break; + case 'E': + case 'e': + struct bdecoding * e = bpath(b, "e"); + char * errtype = "Unspecified Error"; + if (e && e->child) + switch (e->child->intvalue) { + case 201: + errtype = "Generic Error"; + break; + case 202: + errtype = "Server Error"; + break; + case 203: + errtype = "Protocol Error, such as a malformed packet, invalid arguments, or bad token"; + break; + case 204: + errtype = "Method Unknown"; + break; + default: + errtype = "Unknown Error"; + break; + } + char * msg = NULL; + if (e && e->child && e->child->next && e->child->next->type & string) + msg = e->child->next->value; + L(d->log, "%s sent %s%s%s", remote, errtype, msg ? ": " : "", msg ? msg : ""); + break; + default: // NOTE01 sending an error is unfortunately bad in this case, since clever hackers can force two servers speaking entirely different UDP based protcols into sending error messages to each other, telling one another that they don't understand each other's messages. + int len = b2json_length(b); + char json[len+1]; + b2json(json, b); + json[len] = '\0'; + L(dht->log, "%s sent an unknown type: %s"); + // send_error(d, b, &addr, addrlen, 203, "unknown type"); + break; } + struct bdecoding * trans_id = bpath(b, "t"); + free_bencoding(b); } /** - * does work; syncs with the network, handles incoming queries + * does periodic work for the library, called every 13 minutes + * + * namely, it sends UDP packets: + * - announcing all torrents with announce + * - searching deeper DHT storage nodes for torrents with keep and announce + * - get_peers on torrents with keep + * + * this can be a lot of packets, so please keep number of torrents with keep and announce low + */ + +/** + * does work; syncs with the network, handles incoming queries. * * call this: * - whenever socket can be read from (via poll/epoll/select/...) - * - every 14 minutes so that nodes with no activity are pinged + * - every 13 minutes, even if it was called for incoming packets in this time period + * + stale buckets are refreshed + * + peers are refetched for joined torrents and announcements are sent + * + calling it more often is discouraged, since every periodic call sends out UDP packets for PEX and DHT searches/announces of torrents + * + * @param d [in] dht library handle */ void work (struct dht * d) { @@ -335,116 +769,11 @@ void work (struct dht * d) { L(d->log, "addrlen changed, not parsing packet"); else if (ret > 65536) L(d->log, "recvfrom()d larger packet than 65536, not parsing packet"); - else if (ret < 0) + else if (ret < 0) { if (ret != EAGAIN) L(d->log, "recvfrom(): %s", strerror(errno)); - else { - struct bdecoding * b = bdecode(packet, ret, replace); - struct bdecoding * v = bpath(b, "v"); - char * node_ver = ""; - char remote[INET_ADDRSTRLEN + INET6_ADDRSTRLEN + 7 + (v && v->type & string) ? v->valuelen : 0]; - if (!inet_ntop(addr.sa_family, &addr, remote, addrlen)) - snprintf(remote, sizeof remote, "(inet_ntop: %s)", strerror(errno)); - sprintf(remote+strlen(remote), ":%d", ntohs(addr.sin6_port)); - } - if (v && v->type & string) { - node_ver = v->value; - sprintf(remote+strlen(remote), "-%s", node_ver); - } - struct bdecoding * y = bpath(b, "y"); - char * msg_type = ""; - if (y && y->type & string) - msg_type = y->value; - switch (msg_type[0]) { - case 'Q': - case 'q': - struct bencoding * q = bpath(b, "q"); - char * qtype = ""; - if (q && q->type & string) - qtype = q->value; - switch (qtype[0]) { - case 'P': // ping - case 'p': - struct bencoding * rid = bpath(bpath(b, "a"), "id"); - if (rid && rid->type & string && rid->valuelen == 20) { - potential_node(d, rid->value, &addr, addrlen); - } - else { // see NOTE01 - int len = b2json_length(b); - char j[len+1]; - b2json(j, b); - j[len] = '\0'; - L("%s did not send a valid id in %s", remote, j); - } - struct bencoding * id = calloc(1, sizeof *d); - id->type = dict; - id->key = bstr(strdup("id")); - id->valuelen = 20; - id->value = malloc(20); - memcpy(id->value, d->id, 20); - struct bencoding * r = calloc(1, sizeof *d); - r->type = dict; - r->key = bstr(strdup("r")); - binsert(r, id); - struct bencoding * y = bstr(strdup("r")); - y->key = bstr(strdup("y")); - struct bencoding * response = calloc(1, sizeof *r); - response->type = dict; - binsert(response, y); - binsert(response, r); - binsert(response, bclone(bpath(b, "t"))); - sendb(d, response, &addr, addrlen); - free_bencoding(response); - break; - default: // see NOTE01 - int len = b2json_length(b); - char json[len+1]; - b2json(json, b); - json[len] = '\0'; - L(dht->log, "%s sent an unknown query type: %s"); - break; - } - break; - case 'R': - case 'r': - break; - case 'E': - case 'e': - struct bdecoding * e = bpath(b, "e"); - char * errtype = "Unspecified Error"; - if (e && e->child) - switch (e->child->intvalue) { - case 201: - errtype = "Generic Error"; - break; - case 202: - errtype = "Server Error"; - break; - case 203: - errtype = "Protocol Error, such as a malformed packet, invalid arguments, or bad token"; - break; - case 204: - errtype = "Method Unknown"; - break; - default: - errtype = "Unknown Error"; - break; - } - char * msg = NULL; - if (e && e->child && e->child->next && e->child->next->type & string) - msg = e->child->next->value; - L(d->log, "%s sent %s%s%s", remote, errtype, msg ? ": " : "", msg ? msg : ""); - break; - default: // NOTE01 sending an error is unfortunately bad in this case, since clever hackers can force two servers speaking entirely different UDP based protcols into sending error messages to each other, telling one another that they don't understand each other's messages. - int len = b2json_length(b); - char json[len+1]; - b2json(json, b); - json[len] = '\0'; - L(dht->log, "%s sent an unknown type: %s"); - // send_error(d, b, &addr, addrlen, 203, "unknown type"); - break; - } - struct bdecoding * trans_id = bpath(b, "t"); - free_bencoding(b); - } + else + periodic(d); + } else + handle(d, packet, ret, addr); } |