Skip to content

Instantly share code, notes, and snippets.

@s3rgeym
Created August 14, 2024 16:40
Show Gist options
  • Save s3rgeym/3ef98366c8a655953cf8b2343df1be6f to your computer and use it in GitHub Desktop.
Save s3rgeym/3ef98366c8a655953cf8b2343df1be6f to your computer and use it in GitHub Desktop.
git index parser
#!/usr/bin/env python
# https://git-scm.com/docs/index-format
# https://github.com/sbp/gin/blob/master/gin
from __future__ import annotations
import codecs
import struct
from dataclasses import dataclass
from typing import BinaryIO, ClassVar, Iterable, Type
@dataclass
class GiEntry:
ctime_s: int
ctime_ns: int
mtime_s: int
mtime_ns: int
dev: int
ino: int
mode: int
uid: int
gid: int
file_size: int
sha1: str
assume_valid: bool
extended: bool
stage: int
extended_flags: int
filename: str
ENTRY_HEADER_STRUCT: ClassVar[struct.Struct] = struct.Struct("!10I")
@classmethod
def parse(cls: Type[GiEntry], fp: BinaryIO, version: int) -> GiEntry:
# Читаем заголовок записи
(
ctime_s,
ctime_ns,
mtime_s,
mtime_ns,
dev,
ino,
mode,
uid,
gid,
file_size,
) = cls.ENTRY_HEADER_STRUCT.unpack(
fp.read(cls.ENTRY_HEADER_STRUCT.size)
)
# Читаем SHA1 и флаги
sha1 = codecs.encode(fp.read(20), "hex").decode()
flags = struct.unpack("!H", fp.read(2))[0]
assume_valid = bool(flags & 0x8000)
extended = bool(flags & 0x4000)
stage = (flags & 0x3000) >> 12
name_len = flags & 0xFFF
extended_flags = 0
if extended and version > 2:
extended_flags = struct.unpack("!B", fp.read(1))[0]
entry_len = (
cls.ENTRY_HEADER_STRUCT.size + 20 + 2 + (1 if extended else 0)
)
if name_len < 0xFFF:
filename = fp.read(name_len)
entry_len += name_len
else:
# Читаем null-терминированное имя файла
filename = b"".join(iter(lambda: fp.read(1), b"\x00"))
entry_len += len(filename) + 1
filename = filename.decode(errors="replace")
padding = (8 - (entry_len % 8)) or 8
fp.seek(padding, 1)
return cls(
ctime_s=ctime_s,
ctime_ns=ctime_ns,
mtime_s=mtime_s,
mtime_ns=mtime_ns,
dev=dev,
ino=ino,
mode=mode,
uid=uid,
gid=gid,
file_size=file_size,
sha1=sha1,
assume_valid=assume_valid,
extended=extended,
stage=stage,
extended_flags=extended_flags,
filename=filename,
)
@dataclass
class GitIndex:
version: int
entries: list[GiEntry]
HEADER_STRUCT: ClassVar[struct.Struct] = struct.Struct("!4s2I")
@classmethod
def parse(cls, fp: BinaryIO) -> GitIndex:
# Читаем заголовок файла индекса
signature, version, num_entries = cls.HEADER_STRUCT.unpack(
fp.read(cls.HEADER_STRUCT.size)
)
assert signature == b"DIRC", "Invalid index signature"
assert version in (2, 3, 4), "Unsupported index version"
assert num_entries >= 0, "Number of entries must be non-negative"
# Читаем записи
entries = [GiEntry.parse(fp, version) for _ in range(num_entries)]
return cls(version=version, entries=entries)
def __iter__(self) -> Iterable[GiEntry]:
return iter(self.entries)
if __name__ == "__main__":
import argparse
p = argparse.ArgumentParser()
p.add_argument("input", type=argparse.FileType("rb"))
args = p.parse_args()
for entry in GitIndex.parse(args.input):
print(entry)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment