summaryrefslogtreecommitdiffstats
path: root/travnik.py
blob: 3373e3cab2365aac9a761f879bd0c3899253dfd2 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
from bencodepy import decode
from enum import Enum
from hashlib import sha1, sha256
from os import scandir
from re import search, IGNORECASE
class Type(Enum):
	UNDEF = 0,
	V1 = 1,
	V2 = 2,
	HYBRID = 3
class Torrent():
	def __init__(self):
		self.sha1 = b''
		self.files = {}
		self.type = Type.UNDEF
		self.cache = None
		self.hadpieces = False
	def file(self, f):
		self.parse(open(f, "rb").read())
	def parse(self, b):
		infodict = b[b.find(b'4:info')+6:b.rfind(b'6:sourced2:ip')]
		self.sha1 = sha1(infodict).digest()
		self.sha256 = sha256(infodict).digest()
		self.dict = decode(b)
		if b'pieces' in self.dict.get(b'info'):
			self.dict.get(b'info').pop(b'pieces')
			self.hadpieces = True
		if b'files' in self.dict.get(b'info').keys():
			self.type = Type.V1
			for file in self.dict.get(b'info').get(b'files'):
				if file.get(b'attr') is not None and b'p' in file.get(b'attr') or b'padding.file' in b'/'.join(file.get(b'path')) or b'.pad' in file.get(b'path') or b'_____padding_file_' in b'/'.join(file.get(b'path')):
					continue
				def insert_file(d, path, length, self):
					name = path.pop()
					if not len(path):
						d[name] = length
						return
					if name not in d.keys():
						d[name] = {}
					insert_file(d[name], path, length, self)
				file.get(b'path').reverse()
				insert_file(self.files, file.get(b'path'), file.get(b'length'), self)
			self.dict.get(b'info').pop(b'files')
		if b'file tree' in self.dict.get(b'info').keys(): # some torrents have broken file trees so we use files first
			if self.type is Type.V1:
				self.type = Type.HYBRID
			else:
				self.type = Type.V2
				def filetree(names):
					r = {}
					for key in names.keys():
						if key == b'':
							return names.get(key).get(b'length')
						r[key] = filetree(names.get(key))
					return r
				self.files = filetree(self.dict.get(b'info').get(b'file tree'))
				self.dict.get(b'info').pop(b'file tree')
		if not len(self.files):
			self.type = Type.V1
			self.files[self.dict.get(b'info').get(b'name')] = self.dict.get(b'info').get(b'length')
		first_filename = [i for i in self.files.keys()][0]
		if self.type == Type.V2 and self.hadpieces:
			self.type = Type.HYBRID;
		if len(self.files) == 1 and self.files[first_filename] == {}:
			print("fixed bad single file torrent", self.sha1.hex())
			self.files[first_filename] = self.dict.get(b'info').get(b'length')
	def paths(self):
		def paths_r(d, path=None):
			if path is None:
				path = []
			for f in d.keys():
				if type(d[f]) is int:
					z = path.copy()
					z.append(f)
					yield z, d[f]
				else:
					z = path.copy()
					z.append(f)
					for z, v in paths_r(d[f], z):
						yield z, v
		for z, v in paths_r(self.files):
			yield z, v
	def matches(self, r, cache=False):
		does = False
		if cache and self.cache:
			return search(r, self.cache, IGNORECASE)
		try:
			decoded = self.dict.get(b'info').get(b'name').decode()
		except UnicodeDecodeError:
			decoded = self.dict.get(b'info').get(b'name').decode("iso-8859-2")
		except AttributeError:
			decoded = str(self.dict.get(b'info').get(b'name'))
		if search(r, self.dict.get(b'source').get(b'ip').decode(), IGNORECASE):
			does = True
			if not cache:
				return True
		if search(r, decoded, IGNORECASE):
			does = True
			if not cache:
				return True
		if cache:
			self.cache = self.dict.get(b'source').get(b'ip').decode() + "|" + decoded + "|"
		for path, size in self.paths():
			try:
				decd = b'/'.join(path).decode()
			except UnicodeDecodeError:
				decd = b'/'.join(path).decode("iso-8859-2")
			self.cache += decd + "|"
			if search(r, decd, IGNORECASE):
				does = True
				if not cache:
					return True
		return does
	def matching_files(self, r, decode=False):
		def matching_files_r(dirc, r, decode):
			files = {}
			for name, content in dirc.items():
				try:
					decoded = name.decode()
				except UnicodeDecodeError:
					decoded = name.decode("iso-8859-2") # TODO we could try detecting the encoding
				if search(r, decoded, IGNORECASE):
					files[decoded if decode else name] = content if type(content) is int else {}
				if type(content) is dict:
					inhalt = matching_files_r(content, r, decode)
					if inhalt:
						files[decoded if decode else name] = inhalt
			return files
		return matching_files_r(self.files, r, decode)
	def __repr__(self):
		return str(self.__dict__)
	def __hash__(self):
		if len(self.sha1):
			return int.from_bytes(self.sha1, byteorder="big")
		return id(self)
def glob(d):
	r = {}
	for f in scandir(d):
		try:
			if f.name.endswith(".torrent") and f.is_file():
				t = Torrent()
				t.file(f.path)
				r[t.sha1] = t
		except Exception as e:
			print(f"skipping broken torrent {f.name} due to exception:")
			print(e)
	return r