summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--analiza/zvezek.ipynb2
-rw-r--r--makefile3
-rw-r--r--src/bencoding.c3
-rw-r--r--src/metainfo.c90
-rw-r--r--templates/index.html6
-rw-r--r--travnik.py25
-rwxr-xr-xwww/app.py17
7 files changed, 133 insertions, 13 deletions
diff --git a/analiza/zvezek.ipynb b/analiza/zvezek.ipynb
index b88677b..9486705 100644
--- a/analiza/zvezek.ipynb
+++ b/analiza/zvezek.ipynb
@@ -15,7 +15,7 @@
"......\n",
"fixed bad single file torrent 4f269d8aefd647ee270842d53ec98aebd23a4afe\n",
"fixed bad single file torrent 7b09ae0b612dafc1744562dccbbe4becf4d633c3\n",
- "38021 @ 413.0262871221639 s\n"
+ "43143 @ 501.31311491318047 s\n"
]
}
],
diff --git a/makefile b/makefile
index 15f54a9..f32985e 100644
--- a/makefile
+++ b/makefile
@@ -11,6 +11,9 @@ tmp:
travnik: src/main.c
$(CC) $(MYCFLAGS) $(CFLAGS) $< -o$@ $(MYLDFLAGS) $(LDFLAGS)
+db: src/db.c
+ $(CC) $(MYCFLAGS) $(CFLAGS) $< -o$@ $(MYLDFLAGS) $(LDFLAGS)
+
utils: tmp/midpoint tmp/bencoding tmp/dns tmp/info
tmp/%: utils/%.c src/dht.c tmp
diff --git a/src/bencoding.c b/src/bencoding.c
index 9ff9f89..6e8a2a2 100644
--- a/src/bencoding.c
+++ b/src/bencoding.c
@@ -1,3 +1,6 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
#include <stdbool.h>
/**
diff --git a/src/metainfo.c b/src/metainfo.c
new file mode 100644
index 0000000..1a0adcc
--- /dev/null
+++ b/src/metainfo.c
@@ -0,0 +1,90 @@
+#include <bencoding.c>
+struct inode {
+ char * name;
+ int length;
+ struct inode * next;
+ struct inode * child;
+ struct inode * parent;
+};
+struct inode * next (struct inode * inode) {
+ if (inode->child)
+ return next(inode->child);
+ if (inode->next) {
+ if (inode->next->child)
+ return next(inode->next->child);
+ return inode->next;
+ }
+ struct inode * predecesor = inode;
+ while (!predecesor->next) {
+ predecesor = predecesor->parent;
+ if (!predecesor)
+ return NULL;
+ }
+ return predecesor->next;
+}
+enum type {
+ UNSPEC = 0,
+ V1 = 1,
+ V2 = 2
+ HYBRID = 1 | 2
+}
+struct metainfo {
+ struct inode * files;
+ struct sockaddr_in6 ip;
+ time_t retrieved;
+ time_t created;
+ enum type type;
+ char sha1[20];
+ char sha256[32];
+ char * name;
+ char * client;
+ char * source;
+ char * publisher;
+ char * publisher_url;
+ char * comment;
+};
+void metainfo_free (struct metainfo * metainfo) {
+ inode_free(metainfo->files);
+ free(metainfo->name);
+ free(metainfo->client);
+ free(metainfo->source);
+ free(metainfo->publisher);
+ free(metainfo->publisher_url)
+ free(metainfo->comment);
+ free(metainfo);
+}
+struct metainfo * parse (const struct bencoding * metainfo) {
+ struct metainfo * r = calloc(1, sizeof(metainfo));
+#define EXTRACT(attribute, localvar, from) \
+ struct bencoding * localvar = bpath(metainfo, from); \
+ if (localvar && localvar->valuelen) { \
+ free(r->attribute); \
+ r->attribute = localvar->value; \
+ localvar->value = NULL; /* this is nonstandard, but it's a monorepo */ \
+ }
+ EXTRACT(name, name, "info/name");
+ EXTRACT(name, nameutf8, "info/name.utf-8");
+ EXTRACT(client, client, "source/v");
+ EXTRACT(source, source, "info/source");
+ EXTRACT(publisher, publisher, "info/publisher");
+ EXTRACT(publisher, publisherutf8, "info/publisher.utf-8");
+ EXTRACT(publisher_url, publisher_url, "info/publisher-url");
+ EXTRACT(publisher_url, publisher_urlutf8, "info/publisher-url.utf-8");
+ EXTRACT(comment, comment, "comment");
+ struct bencoding * retrieved = bpath(metainfo, "info/creation date");
+ if (retrieved && retrieved->valuelen)
+ r->retrieved = atoi(retrieved->value);
+ struct bencoding * created = bpath(metainfo, "creation date");
+ if (created && created->intvalue)
+ r->created = created->intvalue;
+ struct bencoding * files = bpath(metainfo, "info/files");
+ if (files) {
+ r->type = V1;
+ bforeach (files, file) {
+ struct attr * bpath(file, "attr");
+ if (attr && attr->valuelen && strchr(attr, 'p'))
+ continue;
+ struct path * bpath(file, "path");
+ }
+ }
+}
diff --git a/templates/index.html b/templates/index.html
index 2547750..9c0a3f1 100644
--- a/templates/index.html
+++ b/templates/index.html
@@ -10,7 +10,7 @@
</head>
<body>
<h1>
- travnik
+ <a href=/>travnik</a>
</h1>
<form>
<label for=regex>
@@ -43,7 +43,7 @@
{% endif %}
{% for torrent in found_torrents %}
<h2><a href={{ torrent.magnet }}>{{ torrent.name | e }}</a></h2>
- <p>velikost: {{ (torrent.size/(1024**3)) | round(3) }} GiB | datum najdbe: {{ torrent.found.strftime("%c") }} | število datotek: {{ torrent.files }}</p>
+ <p>velikost: {{ (torrent.size/(1024**3)) | round(3) }} GiB | prejel v {{ torrent.found.strftime("%c") }} od {{ torrent.ip }} | število datotek: {{ torrent.files }}</p>
<b><ul>
{% macro direktorij(dir) %}
{% for ime, element in dir.items() %}
@@ -54,7 +54,7 @@
{{ direktorij(element) }}
</ul>
{% else %}
- ({{ (element/(1024**3)) | round(3) }} MiB)
+ ({{ (element/(1024**2)) | round(3) }} MiB)
{% endif %}
</li>
{% endfor %}
diff --git a/travnik.py b/travnik.py
index 7aea6e4..672ed00 100644
--- a/travnik.py
+++ b/travnik.py
@@ -13,6 +13,7 @@ class Torrent():
self.sha1 = b''
self.files = {}
self.type = Type.UNDEF
+ self.cache = None
def file(self, f):
self.parse(open(f, "rb").read())
def parse(self, b):
@@ -25,7 +26,7 @@ class Torrent():
if b'files' in self.dict.get(b'info').keys():
self.type = Type.V1
for file in self.dict.get(b'info').get(b'files'):
- if file.get(b'attr') is not None and b'p' in file.get(b'attr') or b'padding.file' in b'/'.join(file.get(b'path')) or b'.pad' in file.get(b'path'):
+ if file.get(b'attr') is not None and b'p' in file.get(b'attr') or b'padding.file' in b'/'.join(file.get(b'path')) or b'.pad' in file.get(b'path') or b'_____padding_file_' in b'/'.join(file.get(b'path')):
continue
def insert_file(d, path, length, self):
name = path.pop()
@@ -74,23 +75,37 @@ class Torrent():
yield z, v
for z, v in paths_r(self.files):
yield z, v
- def matches(self, r):
+ def matches(self, r, cache=False):
+ does = False
+ if cache and self.cache:
+ return search(r, self.cache, IGNORECASE)
try:
decoded = self.dict.get(b'info').get(b'name').decode()
except UnicodeDecodeError:
decoded = self.dict.get(b'info').get(b'name').decode("iso-8859-2")
except AttributeError:
decoded = str(self.dict.get(b'info').get(b'name'))
+ if search(r, self.dict.get(b'source').get(b'ip').decode(), IGNORECASE):
+ does = True
+ if not cache:
+ return True
if search(r, decoded, IGNORECASE):
- return True
+ does = True
+ if not cache:
+ return True
+ if cache:
+ self.cache = self.dict.get(b'source').get(b'ip').decode() + "|" + decoded + "|"
for path, size in self.paths():
try:
decd = b'/'.join(path).decode()
except UnicodeDecodeError:
decd = b'/'.join(path).decode("iso-8859-2")
+ self.cache += decd + "|"
if search(r, decd, IGNORECASE):
- return True
- return False
+ does = True
+ if not cache:
+ return True
+ return does
def matching_files(self, r, decode=False):
def matching_files_r(dirc, r, decode):
files = {}
diff --git a/www/app.py b/www/app.py
index de60c37..b7df4fc 100755
--- a/www/app.py
+++ b/www/app.py
@@ -8,6 +8,8 @@ from psutil import Process
from urllib.parse import quote
from datetime import datetime
from locale import setlocale, LC_ALL
+if getenv("PROFILE"):
+ from cProfile import Profile
path.append(".")
from travnik import glob, Type
setlocale(LC_ALL, "")
@@ -23,26 +25,33 @@ def mno(quantity, types):
return types[0]
@app.route("/")
def index():
+ if getenv("PROFILE"):
+ pr = Profile()
+ pr.enable()
result = []
for hash, torrent in torrents.items():
if not request.args.get("regex"):
break
if len(result) >= 100:
break
- if torrent.matches(request.args.get("regex")):
+ if torrent.matches(request.args.get("regex"), True if getenv("CACHE") else False):
try:
decodedname = torrent.dict.get(b'info').get(b'name').decode()
except UnicodeDecodeError:
decodedname = torrent.dict.get(b'info').get(b'name').decode("iso-8859-2")
- this = {"tree": torrent.matching_files(request.args.get("regex") if sum(1 for name, size in torrent.paths()) > 10 else "", True), "files": sum(1 for name, size in torrent.paths()), "found": datetime.fromtimestamp(torrent.dict.get(b'creation date')), "size": sum(size for name, size in torrent.paths()), "name": decodedname, "magnet": "magnet:?dn=" + quote(torrent.dict.get(b'info').get(b'name'))
+ this = {"ip": torrent.dict.get(b'source').get(b'ip').decode(), "tree": torrent.matching_files(request.args.get("regex") if sum(1 for name, size in torrent.paths()) > 10 else "", True), "files": sum(1 for name, size in torrent.paths()), "found": datetime.fromtimestamp(torrent.dict.get(b'creation date')), "size": sum(size for name, size in torrent.paths()), "name": decodedname, "magnet": "magnet:?dn=" + quote(torrent.dict.get(b'info').get(b'name'))
+ (("&xt=urn:btih:" + torrent.sha1.hex()) if torrent.type == Type.V1 or torrent.type == Type.HYBRID else "") + (("&xt=urn:btmh:1220" + torrent.sha256.hex()) if torrent.type == Type.V2 or torrent.type == Type.HYBRID else "")}
result.append(this)
result = sorted(result, reverse=(request.args.get("order") == "padajoče"), key=lambda x:x["found"].timestamp() if request.args.get("sort") == "datumu" else x["files" if request.args.get("sort") == "datotekah" else "size" if request.args.get("sort") == "velikosti" else "crash"])
- return render_template("index.html", found_torrents=result, lentorrents=len(result))
+ out = render_template("index.html", found_torrents=result, lentorrents=len(result))
+ if pr is not None:
+ pr.disable()
+ pr.dump_stats(getenv("PROFILE"))
+ return out
if __name__ == "__main__":
print("zaganjam travnik", argv[0], "... zagon traja dolgo časa (~5 min za ~40k torrentov. za delovanje je potrebnih ~300 MiB RAM RES za ~40k torrentov. sharding je WIP.")
start = monotonic()
torrents = glob(getenv("TORRENTS") if getenv("TORRENTS") else ".")
app.jinja_env.globals.update(mno=mno, zagontekst=zagon.strftime("%c"), torrentov=len(torrents), rammib=round(Process(getpid()).memory_info().rss/(1024*1024)), roundstartuptime=round(monotonic()-start))
app.jinja_env.add_extension('jinja2.ext.loopcontrols')
- app.run(host="::", port=8080, debug=True)
+ app.run(host="::", port=8080, debug=False if getenv("NDEBUG") else True)