Created
May 20, 2025 11:08
-
-
Save zhangyoufu/2d4ca11de185ba6518a7a4bc24875044 to your computer and use it in GitHub Desktop.
modern rewrite of BitTorrent .torrent file parser from here https://web.archive.org/web/20200919043954/https://effbot.org/zone/bencode.htm
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| from collections.abc import Buffer, Generator | |
| from typing import TypeAlias | |
| import re | |
| Token: TypeAlias = int | bytes | str | |
| Result: TypeAlias = int | bytes | list['Result'] | dict[bytes, 'Result'] | |
| regexp = re.compile(rb'([del])|i(-?\d+)e|(\d+):') | |
| def tokenize(data: Buffer) -> Generator[Token, None, None]: | |
| pos = 0 | |
| while pos < len(data): | |
| m = regexp.match(data, pos) | |
| if m is None: | |
| raise ValueError | |
| pos = m.end() | |
| s = m[m.lastindex] | |
| match m.lastindex: | |
| case 1: | |
| yield s.decode() | |
| case 2: | |
| yield int(s) | |
| case 3: | |
| pos += int(s) | |
| if pos > len(data): | |
| raise ValueError | |
| yield data[m.end():pos] | |
| def decode(data: Buffer) -> Result: | |
| gen = tokenize(data) | |
| def decode_with_token(token: Token) -> Result: | |
| match token: | |
| case 'd' | 'l': | |
| elems = [] | |
| while (_token := next(gen)) != 'e': | |
| elems.append(decode_with_token(_token)) | |
| if token == 'd': | |
| return dict(zip(elems[::2], elems[1::2], strict=True)) | |
| else: | |
| return elems | |
| case int() | bytes(): | |
| return token | |
| case _: | |
| raise ValueError | |
| result = decode_with_token(next(gen)) | |
| for token in gen: | |
| raise ValueError | |
| return result | |
| ''' | |
| 'd' k0 v0 k1 v1 ... 'e' => dict | |
| 'l' e0 e1 ... 'e' => list | |
| 'i' int 'e' => int | |
| len ':' bytes[len] => bytes | |
| ''' | |
| def main() -> None: | |
| import argparse | |
| import pprint | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument('path', help='path to .torrent file') | |
| args = parser.parse_args() | |
| pprint.pprint(decode(open(args.path, 'rb').read())) | |
| if __name__ == '__main__': | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment