summaryrefslogblamecommitdiffstats
path: root/travnik.py
blob: 3373e3cab2365aac9a761f879bd0c3899253dfd2 (plain) (tree)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15














                                      
                                 
                                      








                                                                           
                                             


                                                                         
                                                                                                                                                                                                                                           















                                                                                                                              
                                                   












                                                                                                               

                                                           


















                                                                                          



                                                                



                                                                                          

                                                                          



                                                                                       
                                                  




                                                                                                       




                                                                           
                                                
                                                       



                                                   

                                                      
                                  





                                                                                                                      
                                                                                                                    
                                                         
                                                                                     
                                                  
                                                                                           
                                    
                                                              








                                                                         







                                                                                    
                
from bencodepy import decode
from enum import Enum
from hashlib import sha1, sha256
from os import scandir
from re import search, IGNORECASE
class Type(Enum):
	UNDEF = 0,
	V1 = 1,
	V2 = 2,
	HYBRID = 3
class Torrent():
	def __init__(self):
		self.sha1 = b''
		self.files = {}
		self.type = Type.UNDEF
		self.cache = None
		self.hadpieces = False
	def file(self, f):
		self.parse(open(f, "rb").read())
	def parse(self, b):
		infodict = b[b.find(b'4:info')+6:b.rfind(b'6:sourced2:ip')]
		self.sha1 = sha1(infodict).digest()
		self.sha256 = sha256(infodict).digest()
		self.dict = decode(b)
		if b'pieces' in self.dict.get(b'info'):
			self.dict.get(b'info').pop(b'pieces')
			self.hadpieces = True
		if b'files' in self.dict.get(b'info').keys():
			self.type = Type.V1
			for file in self.dict.get(b'info').get(b'files'):
				if file.get(b'attr') is not None and b'p' in file.get(b'attr') or b'padding.file' in b'/'.join(file.get(b'path')) or b'.pad' in file.get(b'path') or b'_____padding_file_' in b'/'.join(file.get(b'path')):
					continue
				def insert_file(d, path, length, self):
					name = path.pop()
					if not len(path):
						d[name] = length
						return
					if name not in d.keys():
						d[name] = {}
					insert_file(d[name], path, length, self)
				file.get(b'path').reverse()
				insert_file(self.files, file.get(b'path'), file.get(b'length'), self)
			self.dict.get(b'info').pop(b'files')
		if b'file tree' in self.dict.get(b'info').keys(): # some torrents have broken file trees so we use files first
			if self.type is Type.V1:
				self.type = Type.HYBRID
			else:
				self.type = Type.V2
				def filetree(names):
					r = {}
					for key in names.keys():
						if key == b'':
							return names.get(key).get(b'length')
						r[key] = filetree(names.get(key))
					return r
				self.files = filetree(self.dict.get(b'info').get(b'file tree'))
				self.dict.get(b'info').pop(b'file tree')
		if not len(self.files):
			self.type = Type.V1
			self.files[self.dict.get(b'info').get(b'name')] = self.dict.get(b'info').get(b'length')
		first_filename = [i for i in self.files.keys()][0]
		if self.type == Type.V2 and self.hadpieces:
			self.type = Type.HYBRID;
		if len(self.files) == 1 and self.files[first_filename] == {}:
			print("fixed bad single file torrent", self.sha1.hex())
			self.files[first_filename] = self.dict.get(b'info').get(b'length')
	def paths(self):
		def paths_r(d, path=None):
			if path is None:
				path = []
			for f in d.keys():
				if type(d[f]) is int:
					z = path.copy()
					z.append(f)
					yield z, d[f]
				else:
					z = path.copy()
					z.append(f)
					for z, v in paths_r(d[f], z):
						yield z, v
		for z, v in paths_r(self.files):
			yield z, v
	def matches(self, r, cache=False):
		does = False
		if cache and self.cache:
			return search(r, self.cache, IGNORECASE)
		try:
			decoded = self.dict.get(b'info').get(b'name').decode()
		except UnicodeDecodeError:
			decoded = self.dict.get(b'info').get(b'name').decode("iso-8859-2")
		except AttributeError:
			decoded = str(self.dict.get(b'info').get(b'name'))
		if search(r, self.dict.get(b'source').get(b'ip').decode(), IGNORECASE):
			does = True
			if not cache:
				return True
		if search(r, decoded, IGNORECASE):
			does = True
			if not cache:
				return True
		if cache:
			self.cache = self.dict.get(b'source').get(b'ip').decode() + "|" + decoded + "|"
		for path, size in self.paths():
			try:
				decd = b'/'.join(path).decode()
			except UnicodeDecodeError:
				decd = b'/'.join(path).decode("iso-8859-2")
			self.cache += decd + "|"
			if search(r, decd, IGNORECASE):
				does = True
				if not cache:
					return True
		return does
	def matching_files(self, r, decode=False):
		def matching_files_r(dirc, r, decode):
			files = {}
			for name, content in dirc.items():
				try:
					decoded = name.decode()
				except UnicodeDecodeError:
					decoded = name.decode("iso-8859-2") # TODO we could try detecting the encoding
				if search(r, decoded, IGNORECASE):
					files[decoded if decode else name] = content if type(content) is int else {}
				if type(content) is dict:
					inhalt = matching_files_r(content, r, decode)
					if inhalt:
						files[decoded if decode else name] = inhalt
			return files
		return matching_files_r(self.files, r, decode)
	def __repr__(self):
		return str(self.__dict__)
	def __hash__(self):
		if len(self.sha1):
			return int.from_bytes(self.sha1, byteorder="big")
		return id(self)
def glob(d):
	r = {}
	for f in scandir(d):
		try:
			if f.name.endswith(".torrent") and f.is_file():
				t = Torrent()
				t.file(f.path)
				r[t.sha1] = t
		except Exception as e:
			print(f"skipping broken torrent {f.name} due to exception:")
			print(e)
	return r