Created
March 12, 2024 22:23
-
-
Save sma/fbd6ae87330b77a014f094b6ae48dbd4 to your computer and use it in GitHub Desktop.
a library and utility to unpack BG3 pak files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import 'dart:io'; | |
import 'dart:math'; | |
import 'package:unpak/unpak.dart'; | |
void main(List<String> arguments) { | |
if (arguments.length < 2 || !<String>{'-l', '-x'}.contains(arguments[0])) { | |
stderr.writeln('usage: unpak -l <file.pak>'); | |
stderr.writeln(' unpak -x <file.pak> <index> [<output>]'); | |
exit(1); | |
} | |
if (arguments[0] == '-x') { | |
if (arguments.length < 3) { | |
stderr.writeln('missing file index'); | |
exit(1); | |
} | |
} | |
final pak = Unpak(File(arguments[1])); | |
final entries = pak.read(); | |
switch (arguments[0]) { | |
case '-l': | |
var width = 0; | |
for (final entry in entries) { | |
width = max(width, entry.name.length); | |
} | |
for (final entry in entries) { | |
stdout.writeln( | |
'${entry.name.padRight(width)} ${'${entry.uncompressedSize}'.padLeft(9)} ${entry.compressionMethod}'); | |
} | |
case '-x': | |
final index = entries.indexWhere((entry) => entry.name == arguments[2]); | |
if (index == -1) { | |
stderr.writeln('unknown file to extract'); | |
exit(1); | |
} | |
final data = pak.readFile(entries[index]); | |
if (arguments.length >= 4) { | |
File(arguments[3]).writeAsBytesSync(data); | |
} else { | |
stdout.add(data); | |
} | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import 'dart:convert'; | |
import 'dart:io'; | |
import 'dart:typed_data'; | |
/// Provides access to _Baldur's Gate_ `PAK` files (version 18). | |
/// | |
/// PAK files start with a Header struct that points to a FileList struct. That | |
/// struct contains compressed FileEntry structs which each have a name and a | |
/// reference to the compressed or uncompressed file data. Everything is | |
/// compressed using [LZ4](https://en.wikipedia.org/wiki/LZ4_(compression_algorithm)). | |
/// | |
/// ``` | |
/// Header (40 bytes) | |
/// signature: u32 ("LSPK") | |
/// version: u32 (18) | |
/// fileListOffset: u64 points to FileList struct | |
/// fileListSize: u32 size of FileList struct | |
/// flags: u8 (0) | |
/// priority: u8 ?? | |
/// md5: [16]u8 (don't know what needs to be hashed) | |
/// numParts: u16 ?? | |
/// | |
/// FileList (8+X bytes) | |
/// numFiles: u32 number of FileEntry structs | |
/// compressedSize: u32 size of uncompressed compressedData | |
/// compressedData: [_]u8 compressed FileEntry structs | |
/// | |
/// FileEntry (272 bytes) | |
/// name: [256]u8 name, utf8, padded with 0 | |
/// offsetInFile1: u32 lower part of u48 | |
/// offsetInFile2: u16 higher part of u48 | |
/// archivePart: u8 ?? | |
/// flags: u8 see CompressionMethod | |
/// diskSize: u32 ?? | |
/// uncompressedSize: u32 size of uncompressed data at offsetInFile1/2 | |
/// | |
/// CompressionMethod | |
/// none = 0 | |
/// zlib = 1 | |
/// lz4 = 2 | |
/// ``` | |
/// | |
/// To extract data, create an [Unpak] and call [read] to get a list of [Entry] | |
/// objects. Use [readFile] to read the given entry and get a [Uint8List]. | |
/// When done, call [close]. | |
/// | |
class Unpak { | |
Unpak(File path) : _r = path.openSync(); | |
final RandomAccessFile _r; | |
void close() { | |
_r.closeSync(); | |
} | |
List<Entry> read() { | |
// seek to Header | |
_r.setPositionSync(0); | |
if (_r.readUint32() != 0x4b50534c) { | |
throw UnpakException('invalid signature'); | |
} | |
if (_r.readUint32() != 18) { | |
throw UnpakException('invalid version'); | |
} | |
// determine where the FileList struct starts | |
final fileListOffset = _r.readUint64(); | |
final fileListSize = _r.readUint32(); | |
_r.skip(1); // flags: u8 | |
_r.skip(1); // priority: u8 | |
_r.skip(16); // md5: [16]u8 | |
_r.skip(2); // numParts: u16 | |
// seek to FileList | |
_r.setPositionSync(fileListOffset); | |
final numFiles = _r.readUint32(); | |
final compressedSize = _r.readUint32(); | |
if (fileListSize != compressedSize + 8) { | |
throw UnpakException('file list size mismatch'); | |
} | |
final compressedData = _r.readSync(compressedSize); | |
if (compressedData.length != compressedSize) { | |
throw UnpakException('not enough data in file list'); | |
} | |
const sizeOfFileEntry = 272; | |
final uncompressedSize = numFiles * sizeOfFileEntry; | |
final data = lz4Uncompress(compressedData, uncompressedSize); | |
if (data.length != uncompressedSize) { | |
throw UnpakException('cannot decompress file list'); | |
} | |
final bytes = ByteData.view(data.buffer); | |
final entries = <Entry>[]; | |
for (var i = 0; i < uncompressedSize; i += sizeOfFileEntry) { | |
// search for end of string | |
var j = 0; | |
for (; j < 256; j++) { | |
if (bytes.getUint8(i + j) == 0) break; | |
} | |
final name = utf8.decode(Uint8List.sublistView(data, i, i + j)); | |
final offset = bytes.getUint32(i + 256, Endian.little) + (bytes.getUint16(i + 260, Endian.little) << 32); | |
final flags = bytes.getUint8(i + 263); | |
final diskSize = bytes.getUint32(i + 264, Endian.little); | |
final uncompressedSize = bytes.getUint32(i + 268, Endian.little); | |
entries.add(Entry( | |
name: name, | |
offset: offset, | |
compressionMethod: flags & 15, | |
diskSize: diskSize, | |
uncompressedSize: uncompressedSize, | |
unpak: this, | |
)); | |
} | |
return entries; | |
} | |
Uint8List readFile(Entry entry) { | |
_r.setPositionSync(entry.offset); | |
final diskData = _r.readSync(entry.diskSize); | |
if (diskData.length != entry.diskSize) { | |
throw UnpakException("couldn't read file data completely"); | |
} | |
switch (entry.compressionMethod) { | |
case 0: | |
return diskData; | |
case 1: | |
final data = zlib.decode(diskData); | |
if (data.length != entry.uncompressedSize) { | |
throw UnpakException("couldn't uncompress file data completely"); | |
} | |
return Uint8List.fromList(data); | |
case 2: | |
final data = lz4Uncompress(diskData, entry.uncompressedSize); | |
if (data.length != entry.uncompressedSize) { | |
throw UnpakException("couldn't uncompress file data completely"); | |
} | |
return data; | |
default: | |
throw UnpakException('unknown compression method'); | |
} | |
} | |
} | |
final class UnpakException extends IOException { | |
UnpakException(this.message); | |
final String message; | |
@override | |
String toString() => 'UnpackException: $message'; | |
} | |
final class Entry { | |
const Entry({ | |
required this.name, | |
required this.offset, | |
required this.compressionMethod, | |
required this.diskSize, | |
required this.uncompressedSize, | |
required this.unpak, | |
}); | |
final String name; // name, up to 256 bytes | |
final int offset; // offset in | |
final int compressionMethod; // 0=uncompressed, 1=zlib, 2=lz4 | |
final int diskSize; | |
final int uncompressedSize; | |
final Unpak unpak; | |
@override | |
String toString() => '$name, $offset, $diskSize, $uncompressedSize'; | |
Uint8List readBytes() => unpak.readFile(this); | |
String readString() => utf8.decode(readBytes()); | |
} | |
extension on RandomAccessFile { | |
void skip(int n) { | |
setPositionSync(positionSync() + n); | |
} | |
int readUint64() { | |
final data = readSync(8); | |
return ByteData.view(data.buffer).getUint64(0, Endian.little); | |
} | |
int readUint32() { | |
final data = readSync(4); | |
return ByteData.view(data.buffer).getUint32(0, Endian.little); | |
} | |
// int readUint16() { | |
// final data = readSync(2); | |
// return ByteData.view(data.buffer).getUint16(0, Endian.little); | |
// } | |
// int readUint8() { | |
// return readByteSync(); | |
// } | |
} | |
Uint8List lz4Uncompress(Uint8List data, int uncompressedLength) { | |
final dest = Uint8List(uncompressedLength); | |
for (var op = 0, ip = 0;;) { | |
final token = data[ip++]; | |
var length = token >> 4; | |
if (length == 15) { | |
do { | |
length += data[ip]; | |
} while (data[ip++] == 255); | |
} | |
while (--length >= 0) { | |
dest[op++] = data[ip++]; | |
} | |
if (ip >= data.length) break; | |
final offset = data[ip++] + (data[ip++] << 8); | |
var matchp = op - offset; | |
var matchlen = token & 15; | |
if (matchlen == 15) { | |
do { | |
matchlen += data[ip]; | |
} while (data[ip++] == 255); | |
} | |
matchlen += 4; | |
while (--matchlen >= 0) { | |
dest[op++] = dest[matchp++]; | |
} | |
} | |
return dest; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment