Skip to content

Instantly share code, notes, and snippets.

@zhangyoufu
Created May 20, 2025 11:08
Show Gist options
  • Select an option

  • Save zhangyoufu/2d4ca11de185ba6518a7a4bc24875044 to your computer and use it in GitHub Desktop.

Select an option

Save zhangyoufu/2d4ca11de185ba6518a7a4bc24875044 to your computer and use it in GitHub Desktop.
modern rewrite of BitTorrent .torrent file parser from here https://web.archive.org/web/20200919043954/https://effbot.org/zone/bencode.htm
#!/usr/bin/env python3
from collections.abc import Buffer, Generator
from typing import TypeAlias
import re
Token: TypeAlias = int | bytes | str
Result: TypeAlias = int | bytes | list['Result'] | dict[bytes, 'Result']
regexp = re.compile(rb'([del])|i(-?\d+)e|(\d+):')
def tokenize(data: Buffer) -> Generator[Token, None, None]:
pos = 0
while pos < len(data):
m = regexp.match(data, pos)
if m is None:
raise ValueError
pos = m.end()
s = m[m.lastindex]
match m.lastindex:
case 1:
yield s.decode()
case 2:
yield int(s)
case 3:
pos += int(s)
if pos > len(data):
raise ValueError
yield data[m.end():pos]
def decode(data: Buffer) -> Result:
gen = tokenize(data)
def decode_with_token(token: Token) -> Result:
match token:
case 'd' | 'l':
elems = []
while (_token := next(gen)) != 'e':
elems.append(decode_with_token(_token))
if token == 'd':
return dict(zip(elems[::2], elems[1::2], strict=True))
else:
return elems
case int() | bytes():
return token
case _:
raise ValueError
result = decode_with_token(next(gen))
for token in gen:
raise ValueError
return result
'''
'd' k0 v0 k1 v1 ... 'e' => dict
'l' e0 e1 ... 'e' => list
'i' int 'e' => int
len ':' bytes[len] => bytes
'''
def main() -> None:
import argparse
import pprint
parser = argparse.ArgumentParser()
parser.add_argument('path', help='path to .torrent file')
args = parser.parse_args()
pprint.pprint(decode(open(args.path, 'rb').read()))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment