from bencodepy import decode
from enum import Enum
from hashlib import sha1, sha256
from os import scandir
from re import search, IGNORECASE
class Type(Enum):
UNDEF = 0,
V1 = 1,
V2 = 2,
HYBRID = 3
class Torrent():
def __init__(self):
self.sha1 = b''
self.files = {}
self.type = Type.UNDEF
self.cache = None
def file(self, f):
self.parse(open(f, "rb").read())
def parse(self, b):
infodict = b[b.find(b'4:info')+6:b.rfind(b'6:sourced2:ip')]
self.sha1 = sha1(infodict).digest()
self.sha256 = sha256(infodict).digest()
self.dict = decode(b)
if b'pieces' in self.dict.get(b'info'):
self.dict.get(b'info').pop(b'pieces')
if b'files' in self.dict.get(b'info').keys():
self.type = Type.V1
for file in self.dict.get(b'info').get(b'files'):
if file.get(b'attr') is not None and b'p' in file.get(b'attr') or b'padding.file' in b'/'.join(file.get(b'path')) or b'.pad' in file.get(b'path') or b'_____padding_file_' in b'/'.join(file.get(b'path')):
continue
def insert_file(d, path, length, self):
name = path.pop()
if not len(path):
d[name] = length
return
if name not in d.keys():
d[name] = {}
insert_file(d[name], path, length, self)
file.get(b'path').reverse()
insert_file(self.files, file.get(b'path'), file.get(b'length'), self)
self.dict.get(b'info').pop(b'files')
if b'file tree' in self.dict.get(b'info').keys(): # some torrents have broken file trees so we use files first
if self.type is Type.V1:
self.type = Type.HYBRID
else:
def filetree(names):
r = {}
for key in names.keys():
if key == b'':
return names.get(key).get(b'length')
r[key] = filetree(names.get(key))
return r
self.files = filetree(self.dict.get(b'info').get(b'file tree'))
self.dict.get(b'info').pop(b'file tree')
if not len(self.files):
self.type = Type.V1
self.files[self.dict.get(b'info').get(b'name')] = self.dict.get(b'info').get(b'length')
first_filename = [i for i in self.files.keys()][0]
if len(self.files) == 1 and self.files[first_filename] == {}:
print("fixed bad single file torrent", self.sha1.hex())
self.files[first_filename] = self.dict.get(b'info').get(b'length')
def paths(self):
def paths_r(d, path=None):
if path is None:
path = []
for f in d.keys():
if type(d[f]) is int:
z = path.copy()
z.append(f)
yield z, d[f]
else:
z = path.copy()
z.append(f)
for z, v in paths_r(d[f], z):
yield z, v
for z, v in paths_r(self.files):
yield z, v
def matches(self, r, cache=False):
does = False
if cache and self.cache:
return search(r, self.cache, IGNORECASE)
try:
decoded = self.dict.get(b'info').get(b'name').decode()
except UnicodeDecodeError:
decoded = self.dict.get(b'info').get(b'name').decode("iso-8859-2")
except AttributeError:
decoded = str(self.dict.get(b'info').get(b'name'))
if search(r, self.dict.get(b'source').get(b'ip').decode(), IGNORECASE):
does = True
if not cache:
return True
if search(r, decoded, IGNORECASE):
does = True
if not cache:
return True
if cache:
self.cache = self.dict.get(b'source').get(b'ip').decode() + "|" + decoded + "|"
for path, size in self.paths():
try:
decd = b'/'.join(path).decode()
except UnicodeDecodeError:
decd = b'/'.join(path).decode("iso-8859-2")
self.cache += decd + "|"
if search(r, decd, IGNORECASE):
does = True
if not cache:
return True
return does
def matching_files(self, r, decode=False):
def matching_files_r(dirc, r, decode):
files = {}
for name, content in dirc.items():
try:
decoded = name.decode()
except UnicodeDecodeError:
decoded = name.decode("iso-8859-2") # TODO we could try detecting the encoding
if search(r, decoded, IGNORECASE):
files[decoded if decode else name] = content if type(content) is int else {}
if type(content) is dict:
inhalt = matching_files_r(content, r, decode)
if inhalt:
files[decoded if decode else name] = inhalt
return files
return matching_files_r(self.files, r, decode)
def __repr__(self):
return str(self.__dict__)
def __hash__(self):
if len(self.sha1):
return int.from_bytes(self.sha1, byteorder="big")
return id(self)
def glob(d):
r = {}
for f in scandir(d):
if f.name.endswith(".torrent") and f.is_file():
t = Torrent()
t.file(f.path)
r[t.sha1] = t
return r