diff options
-rw-r--r-- | analiza/zvezek.ipynb | 2 | ||||
-rw-r--r-- | makefile | 3 | ||||
-rw-r--r-- | src/bencoding.c | 3 | ||||
-rw-r--r-- | src/metainfo.c | 90 | ||||
-rw-r--r-- | templates/index.html | 6 | ||||
-rw-r--r-- | travnik.py | 25 | ||||
-rwxr-xr-x | www/app.py | 17 |
7 files changed, 133 insertions, 13 deletions
diff --git a/analiza/zvezek.ipynb b/analiza/zvezek.ipynb index b88677b..9486705 100644 --- a/analiza/zvezek.ipynb +++ b/analiza/zvezek.ipynb @@ -15,7 +15,7 @@ "......\n", "fixed bad single file torrent 4f269d8aefd647ee270842d53ec98aebd23a4afe\n", "fixed bad single file torrent 7b09ae0b612dafc1744562dccbbe4becf4d633c3\n", - "38021 @ 413.0262871221639 s\n" + "43143 @ 501.31311491318047 s\n" ] } ], @@ -11,6 +11,9 @@ tmp: travnik: src/main.c $(CC) $(MYCFLAGS) $(CFLAGS) $< -o$@ $(MYLDFLAGS) $(LDFLAGS) +db: src/db.c + $(CC) $(MYCFLAGS) $(CFLAGS) $< -o$@ $(MYLDFLAGS) $(LDFLAGS) + utils: tmp/midpoint tmp/bencoding tmp/dns tmp/info tmp/%: utils/%.c src/dht.c tmp diff --git a/src/bencoding.c b/src/bencoding.c index 9ff9f89..6e8a2a2 100644 --- a/src/bencoding.c +++ b/src/bencoding.c @@ -1,3 +1,6 @@ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> #include <stdbool.h> /** diff --git a/src/metainfo.c b/src/metainfo.c new file mode 100644 index 0000000..1a0adcc --- /dev/null +++ b/src/metainfo.c @@ -0,0 +1,90 @@ +#include <bencoding.c> +struct inode { + char * name; + int length; + struct inode * next; + struct inode * child; + struct inode * parent; +}; +struct inode * next (struct inode * inode) { + if (inode->child) + return next(inode->child); + if (inode->next) { + if (inode->next->child) + return next(inode->next->child); + return inode->next; + } + struct inode * predecesor = inode; + while (!predecesor->next) { + predecesor = predecesor->parent; + if (!predecesor) + return NULL; + } + return predecesor->next; +} +enum type { + UNSPEC = 0, + V1 = 1, + V2 = 2 + HYBRID = 1 | 2 +} +struct metainfo { + struct inode * files; + struct sockaddr_in6 ip; + time_t retrieved; + time_t created; + enum type type; + char sha1[20]; + char sha256[32]; + char * name; + char * client; + char * source; + char * publisher; + char * publisher_url; + char * comment; +}; +void metainfo_free (struct metainfo * metainfo) { + inode_free(metainfo->files); + free(metainfo->name); + free(metainfo->client); + free(metainfo->source); + free(metainfo->publisher); + free(metainfo->publisher_url) + free(metainfo->comment); + free(metainfo); +} +struct metainfo * parse (const struct bencoding * metainfo) { + struct metainfo * r = calloc(1, sizeof(metainfo)); +#define EXTRACT(attribute, localvar, from) \ + struct bencoding * localvar = bpath(metainfo, from); \ + if (localvar && localvar->valuelen) { \ + free(r->attribute); \ + r->attribute = localvar->value; \ + localvar->value = NULL; /* this is nonstandard, but it's a monorepo */ \ + } + EXTRACT(name, name, "info/name"); + EXTRACT(name, nameutf8, "info/name.utf-8"); + EXTRACT(client, client, "source/v"); + EXTRACT(source, source, "info/source"); + EXTRACT(publisher, publisher, "info/publisher"); + EXTRACT(publisher, publisherutf8, "info/publisher.utf-8"); + EXTRACT(publisher_url, publisher_url, "info/publisher-url"); + EXTRACT(publisher_url, publisher_urlutf8, "info/publisher-url.utf-8"); + EXTRACT(comment, comment, "comment"); + struct bencoding * retrieved = bpath(metainfo, "info/creation date"); + if (retrieved && retrieved->valuelen) + r->retrieved = atoi(retrieved->value); + struct bencoding * created = bpath(metainfo, "creation date"); + if (created && created->intvalue) + r->created = created->intvalue; + struct bencoding * files = bpath(metainfo, "info/files"); + if (files) { + r->type = V1; + bforeach (files, file) { + struct attr * bpath(file, "attr"); + if (attr && attr->valuelen && strchr(attr, 'p')) + continue; + struct path * bpath(file, "path"); + } + } +} diff --git a/templates/index.html b/templates/index.html index 2547750..9c0a3f1 100644 --- a/templates/index.html +++ b/templates/index.html @@ -10,7 +10,7 @@ </head> <body> <h1> - travnik + <a href=/>travnik</a> </h1> <form> <label for=regex> @@ -43,7 +43,7 @@ {% endif %} {% for torrent in found_torrents %} <h2><a href={{ torrent.magnet }}>{{ torrent.name | e }}</a></h2> - <p>velikost: {{ (torrent.size/(1024**3)) | round(3) }} GiB | datum najdbe: {{ torrent.found.strftime("%c") }} | število datotek: {{ torrent.files }}</p> + <p>velikost: {{ (torrent.size/(1024**3)) | round(3) }} GiB | prejel v {{ torrent.found.strftime("%c") }} od {{ torrent.ip }} | število datotek: {{ torrent.files }}</p> <b><ul> {% macro direktorij(dir) %} {% for ime, element in dir.items() %} @@ -54,7 +54,7 @@ {{ direktorij(element) }} </ul> {% else %} - ({{ (element/(1024**3)) | round(3) }} MiB) + ({{ (element/(1024**2)) | round(3) }} MiB) {% endif %} </li> {% endfor %} @@ -13,6 +13,7 @@ class Torrent(): self.sha1 = b'' self.files = {} self.type = Type.UNDEF + self.cache = None def file(self, f): self.parse(open(f, "rb").read()) def parse(self, b): @@ -25,7 +26,7 @@ class Torrent(): if b'files' in self.dict.get(b'info').keys(): self.type = Type.V1 for file in self.dict.get(b'info').get(b'files'): - if file.get(b'attr') is not None and b'p' in file.get(b'attr') or b'padding.file' in b'/'.join(file.get(b'path')) or b'.pad' in file.get(b'path'): + if file.get(b'attr') is not None and b'p' in file.get(b'attr') or b'padding.file' in b'/'.join(file.get(b'path')) or b'.pad' in file.get(b'path') or b'_____padding_file_' in b'/'.join(file.get(b'path')): continue def insert_file(d, path, length, self): name = path.pop() @@ -74,23 +75,37 @@ class Torrent(): yield z, v for z, v in paths_r(self.files): yield z, v - def matches(self, r): + def matches(self, r, cache=False): + does = False + if cache and self.cache: + return search(r, self.cache, IGNORECASE) try: decoded = self.dict.get(b'info').get(b'name').decode() except UnicodeDecodeError: decoded = self.dict.get(b'info').get(b'name').decode("iso-8859-2") except AttributeError: decoded = str(self.dict.get(b'info').get(b'name')) + if search(r, self.dict.get(b'source').get(b'ip').decode(), IGNORECASE): + does = True + if not cache: + return True if search(r, decoded, IGNORECASE): - return True + does = True + if not cache: + return True + if cache: + self.cache = self.dict.get(b'source').get(b'ip').decode() + "|" + decoded + "|" for path, size in self.paths(): try: decd = b'/'.join(path).decode() except UnicodeDecodeError: decd = b'/'.join(path).decode("iso-8859-2") + self.cache += decd + "|" if search(r, decd, IGNORECASE): - return True - return False + does = True + if not cache: + return True + return does def matching_files(self, r, decode=False): def matching_files_r(dirc, r, decode): files = {} @@ -8,6 +8,8 @@ from psutil import Process from urllib.parse import quote from datetime import datetime from locale import setlocale, LC_ALL +if getenv("PROFILE"): + from cProfile import Profile path.append(".") from travnik import glob, Type setlocale(LC_ALL, "") @@ -23,26 +25,33 @@ def mno(quantity, types): return types[0] @app.route("/") def index(): + if getenv("PROFILE"): + pr = Profile() + pr.enable() result = [] for hash, torrent in torrents.items(): if not request.args.get("regex"): break if len(result) >= 100: break - if torrent.matches(request.args.get("regex")): + if torrent.matches(request.args.get("regex"), True if getenv("CACHE") else False): try: decodedname = torrent.dict.get(b'info').get(b'name').decode() except UnicodeDecodeError: decodedname = torrent.dict.get(b'info').get(b'name').decode("iso-8859-2") - this = {"tree": torrent.matching_files(request.args.get("regex") if sum(1 for name, size in torrent.paths()) > 10 else "", True), "files": sum(1 for name, size in torrent.paths()), "found": datetime.fromtimestamp(torrent.dict.get(b'creation date')), "size": sum(size for name, size in torrent.paths()), "name": decodedname, "magnet": "magnet:?dn=" + quote(torrent.dict.get(b'info').get(b'name')) + this = {"ip": torrent.dict.get(b'source').get(b'ip').decode(), "tree": torrent.matching_files(request.args.get("regex") if sum(1 for name, size in torrent.paths()) > 10 else "", True), "files": sum(1 for name, size in torrent.paths()), "found": datetime.fromtimestamp(torrent.dict.get(b'creation date')), "size": sum(size for name, size in torrent.paths()), "name": decodedname, "magnet": "magnet:?dn=" + quote(torrent.dict.get(b'info').get(b'name')) + (("&xt=urn:btih:" + torrent.sha1.hex()) if torrent.type == Type.V1 or torrent.type == Type.HYBRID else "") + (("&xt=urn:btmh:1220" + torrent.sha256.hex()) if torrent.type == Type.V2 or torrent.type == Type.HYBRID else "")} result.append(this) result = sorted(result, reverse=(request.args.get("order") == "padajoče"), key=lambda x:x["found"].timestamp() if request.args.get("sort") == "datumu" else x["files" if request.args.get("sort") == "datotekah" else "size" if request.args.get("sort") == "velikosti" else "crash"]) - return render_template("index.html", found_torrents=result, lentorrents=len(result)) + out = render_template("index.html", found_torrents=result, lentorrents=len(result)) + if pr is not None: + pr.disable() + pr.dump_stats(getenv("PROFILE")) + return out if __name__ == "__main__": print("zaganjam travnik", argv[0], "... zagon traja dolgo časa (~5 min za ~40k torrentov. za delovanje je potrebnih ~300 MiB RAM RES za ~40k torrentov. sharding je WIP.") start = monotonic() torrents = glob(getenv("TORRENTS") if getenv("TORRENTS") else ".") app.jinja_env.globals.update(mno=mno, zagontekst=zagon.strftime("%c"), torrentov=len(torrents), rammib=round(Process(getpid()).memory_info().rss/(1024*1024)), roundstartuptime=round(monotonic()-start)) app.jinja_env.add_extension('jinja2.ext.loopcontrols') - app.run(host="::", port=8080, debug=True) + app.run(host="::", port=8080, debug=False if getenv("NDEBUG") else True) |