diff options
Diffstat (limited to '')
-rw-r--r-- | .gitignore | 3 | ||||
-rw-r--r-- | analiza/zvezek.ipynb | 94 | ||||
-rw-r--r-- | templates/index.html | 37 | ||||
-rw-r--r-- | travnik.py | 109 | ||||
-rwxr-xr-x | www/app.py | 35 |
5 files changed, 188 insertions, 90 deletions
@@ -12,3 +12,6 @@ doc/ possible_torrents.L *.torrent .ipynb_checkpoints/ +J +# anez, nek prazen file +__pycache__/ diff --git a/analiza/zvezek.ipynb b/analiza/zvezek.ipynb index 1b832e2..b88677b 100644 --- a/analiza/zvezek.ipynb +++ b/analiza/zvezek.ipynb @@ -15,101 +15,15 @@ "......\n", "fixed bad single file torrent 4f269d8aefd647ee270842d53ec98aebd23a4afe\n", "fixed bad single file torrent 7b09ae0b612dafc1744562dccbbe4becf4d633c3\n", - "37769 @ 434.7589800900314 s\n" + "38021 @ 413.0262871221639 s\n" ] } ], "source": [ - "from bencodepy import decode\n", - "from enum import Enum\n", - "from hashlib import sha1, sha256\n", - "from os import scandir\n", "from time import monotonic\n", - "class Type(Enum):\n", - " UNDEF = 0,\n", - " V1 = 1,\n", - " V2 = 2,\n", - " HYBRID = 3\n", - "class Torrent():\n", - " def __init__(self):\n", - " self.sha1 = b''\n", - " self.files = {}\n", - " self.type = Type.UNDEF\n", - " def file(self, f):\n", - " self.parse(open(f, \"rb\").read())\n", - " def parse(self, b):\n", - " infodict = b[b.find(b'4:info')+6:b.rfind(b'6:sourced2:ip')]\n", - " self.sha1 = sha1(infodict).digest()\n", - " self.sha256 = sha256(infodict).digest()\n", - " self.dict = decode(b)\n", - " if b'pieces' in self.dict.get(b'info'):\n", - " self.dict.get(b'info').pop(b'pieces')\n", - " if b'files' in self.dict.get(b'info').keys():\n", - " self.type = Type.V1\n", - " for file in self.dict.get(b'info').get(b'files'):\n", - " if file.get(b'attr') is not None and b'p' in file.get(b'attr') or b'padding.file' in b'/'.join(file.get(b'path')) or b'.pad' in file.get(b'path'):\n", - " continue\n", - " def insert_file(d, path, length, self):\n", - " name = path.pop()\n", - " if not len(path):\n", - " d[name] = length\n", - " return\n", - " if name not in d.keys():\n", - " d[name] = {}\n", - " insert_file(d[name], path, length, self)\n", - " file.get(b'path').reverse()\n", - " insert_file(self.files, file.get(b'path'), file.get(b'length'), self)\n", - " self.dict.get(b'info').pop(b'files')\n", - " if b'file tree' in self.dict.get(b'info').keys(): # some torrents have broken file trees so we use files first\n", - " if self.type is Type.V1:\n", - " self.type = Type.HYBRID\n", - " else:\n", - " def filetree(names):\n", - " r = {}\n", - " for key in names.keys():\n", - " if key == b'':\n", - " return names.get(key).get(b'length')\n", - " r[key] = filetree(names.get(key))\n", - " return r\n", - " self.files = filetree(self.dict.get(b'info').get(b'file tree'))\n", - " self.dict.get(b'info').pop(b'file tree')\n", - " if not len(self.files):\n", - " self.type = Type.V1\n", - " self.files[self.dict.get(b'info').get(b'name')] = self.dict.get(b'info').get(b'length')\n", - " first_filename = [i for i in self.files.keys()][0]\n", - " if len(self.files) == 1 and self.files[first_filename] == {}:\n", - " print(\"fixed bad single file torrent\", self.sha1.hex())\n", - " self.files[first_filename] = self.dict.get(b'info').get(b'length')\n", - " def paths(self):\n", - " def paths_r(d, path=None):\n", - " if path is None:\n", - " path = []\n", - " for f in d.keys():\n", - " if type(d[f]) is int:\n", - " z = path.copy()\n", - " z.append(f)\n", - " yield z, d[f]\n", - " else:\n", - " z = path.copy()\n", - " z.append(f)\n", - " for z, v in paths_r(d[f], z):\n", - " yield z, v\n", - " for z, v in paths_r(self.files):\n", - " yield z, v\n", - " def __repr__(self):\n", - " return str(self.__dict__)\n", - " def __hash__(self):\n", - " if len(self.sha1):\n", - " return int.from_bytes(self.sha1, byteorder=\"big\")\n", - " return id(self)\n", - "def glob(d):\n", - " r = {}\n", - " for f in scandir(d):\n", - " if f.name.endswith(\".torrent\") and f.is_file():\n", - " t = Torrent()\n", - " t.file(f.path)\n", - " r[t.sha1] = t\n", - " return r\n", + "from sys import path\n", + "path.append(\"/root/projects/travnik\")\n", + "from travnik import glob\n", "print(\"......\")\n", "start = monotonic()\n", "torrents = glob(\"/root/projects/travnik\")\n", diff --git a/templates/index.html b/templates/index.html new file mode 100644 index 0000000..b4f0804 --- /dev/null +++ b/templates/index.html @@ -0,0 +1,37 @@ +<!DOCTYPE html> +<html lang=sl> + <head> + <meta charset=UTF-8 /> + <meta name=viewport content='width=device-width, initial-scale=1.0'> + <title> + travnik + </title> + <link rel=stylesheet href=//searc.šijanec.eu./css.css /> + </head> + <body> + <h1> + travnik + </h1> + <form> + <label for=regex> + regex + </label> + <input name=regex id=regex placeholder=DVDRip value="{{ request.args.get("regex")|e }}" /> + razvrsti + <select name=order> + <option {{ "selected" if request.args.get("order") == "naraščajoče" else "" }} value=naraščajoče>naraščajoče</option> + <option {{ "selected" if request.args.get("order") == "padajoče" else "" }} value=padajoče>padajoče</option> + </select> + po + <select name=sort> + <!-- <option {{ "selected" if request.args.get("sort") == "popularnosti" else "" }} value=popularnosti>popularnosti</option> --> <!-- N/I --> + <option {{ "selected" if request.args.get("sort") == "velikosti" else "" }} value=velikosti>velikosti</option> + <option {{ "selected" if request.args.get("sort") == "datumu" else "" }} value=datumu>datumu najdbe</option> + <option {{ "selected" if request.args.get("sort") == "datotekah" }} value=datotekah>številu datotek</option> + </select> + <input type=submit value=išči /> + </form> + <hr> + <p><a href=//ni.šijanec.eu./sijanec/travnik>travnik</a> ima <b>{{ torrentov }}</b> {{ mno(torrentov, ["torrentov", "torrent", "torrenta", "torrente"]) }}. prižigal se je v <b>{{ zagontekst }}</b> v {{ roundstartuptime }} s in porablja {{ rammib }} MiB fizičnega pomnilnika. + </body> +</html> diff --git a/travnik.py b/travnik.py new file mode 100644 index 0000000..20586f4 --- /dev/null +++ b/travnik.py @@ -0,0 +1,109 @@ +from bencodepy import decode +from enum import Enum +from hashlib import sha1, sha256 +from os import scandir +from re import search, IGNORECASE +class Type(Enum): + UNDEF = 0, + V1 = 1, + V2 = 2, + HYBRID = 3 +class Torrent(): + def __init__(self): + self.sha1 = b'' + self.files = {} + self.type = Type.UNDEF + def file(self, f): + self.parse(open(f, "rb").read()) + def parse(self, b): + infodict = b[b.find(b'4:info')+6:b.rfind(b'6:sourced2:ip')] + self.sha1 = sha1(infodict).digest() + self.sha256 = sha256(infodict).digest() + self.dict = decode(b) + if b'pieces' in self.dict.get(b'info'): + self.dict.get(b'info').pop(b'pieces') + if b'files' in self.dict.get(b'info').keys(): + self.type = Type.V1 + for file in self.dict.get(b'info').get(b'files'): + if file.get(b'attr') is not None and b'p' in file.get(b'attr') or b'padding.file' in b'/'.join(file.get(b'path')) or b'.pad' in file.get(b'path'): + continue + def insert_file(d, path, length, self): + name = path.pop() + if not len(path): + d[name] = length + return + if name not in d.keys(): + d[name] = {} + insert_file(d[name], path, length, self) + file.get(b'path').reverse() + insert_file(self.files, file.get(b'path'), file.get(b'length'), self) + self.dict.get(b'info').pop(b'files') + if b'file tree' in self.dict.get(b'info').keys(): # some torrents have broken file trees so we use files first + if self.type is Type.V1: + self.type = Type.HYBRID + else: + def filetree(names): + r = {} + for key in names.keys(): + if key == b'': + return names.get(key).get(b'length') + r[key] = filetree(names.get(key)) + return r + self.files = filetree(self.dict.get(b'info').get(b'file tree')) + self.dict.get(b'info').pop(b'file tree') + if not len(self.files): + self.type = Type.V1 + self.files[self.dict.get(b'info').get(b'name')] = self.dict.get(b'info').get(b'length') + first_filename = [i for i in self.files.keys()][0] + if len(self.files) == 1 and self.files[first_filename] == {}: + print("fixed bad single file torrent", self.sha1.hex()) + self.files[first_filename] = self.dict.get(b'info').get(b'length') + def paths(self): + def paths_r(d, path=None): + if path is None: + path = [] + for f in d.keys(): + if type(d[f]) is int: + z = path.copy() + z.append(f) + yield z, d[f] + else: + z = path.copy() + z.append(f) + for z, v in paths_r(d[f], z): + yield z, v + for z, v in paths_r(self.files): + yield z, v + def matches(self, r): + if search(r, self.dict.get(b'info').get(b'name'), IGNORECASE): + return True + for path, size in paths(self): + if search(r, path, IGNORECASE): + return True + return False + def matching_files(self, r): + def matching_files_r(dir, r): + files = {} + for name, content in self.paths: + if search(r, name, IGNORECASE): + files[name] = content + if type(content) is dict: + inhalt = matching_files_r(content, r) + if inhalt: + files[name] = inhalt + return files + return matching_files_r(self.paths, r) + def __repr__(self): + return str(self.__dict__) + def __hash__(self): + if len(self.sha1): + return int.from_bytes(self.sha1, byteorder="big") + return id(self) +def glob(d): + r = {} + for f in scandir(d): + if f.name.endswith(".torrent") and f.is_file(): + t = Torrent() + t.file(f.path) + r[t.sha1] = t + return r diff --git a/www/app.py b/www/app.py new file mode 100755 index 0000000..1ba6af7 --- /dev/null +++ b/www/app.py @@ -0,0 +1,35 @@ +#!/usr/bin/python +from re import search, IGNORECASE +from time import monotonic +from flask import Flask, render_template, escape, request +from sys import argv, path +from os import getpid +from psutil import Process +from urllib.parse import quote +from datetime import datetime +from locale import setlocale, LC_ALL +path.append(".") +from travnik import glob, Type +setlocale(LC_ALL, "") +app = Flask("travnik") +startuptime = -1 +zagon = datetime.now() +def mno(quantity, types): + if quantity % 100 == 1: + return quantity[1] + if quantity % 100 == 2: + return quantity[2] + if quantity % 100 == 3: + return quantity[3] + return quantity[0] +@app.route("/") +def index(): + return render_template("index.html") +if __name__ == "__main__": + print("zaganjam travnik", argv[0], "... zagon traja dolgo časa (~5 min za ~40k torrentov. za delovanje je potrebnih ~300 MiB RAM RES za ~40k torrentov. sharding je WIP.") + start = monotonic() + torrents = {} + print("zagon uspešen. v", monotonic()-start, "sem indeksiral", len(torrents), "torrentov") + app.jinja_env.globals.update(mno=mno, zagontekst=zagon.strftime("%c"), torrentov=len(torrents)) + app.jinja_env.add_extension('jinja2.ext.loopcontrols') + app.run(host="::", port=8080, debug=True) |