Skip to content

Instantly share code, notes, and snippets.

@Diggsey
Created July 20, 2015 00:57
Show Gist options
  • Save Diggsey/cefdbd068c540a4d0daa to your computer and use it in GitHub Desktop.
Save Diggsey/cefdbd068c540a4d0daa to your computer and use it in GitHub Desktop.
PDB File Format

Program Database (PDB) File Format

The PDB file format is a compound file. It is composed of numbered data streams, each of which stores some predefined information about the target program. For example, one stream appears to contain a list of symbol names, while another describes the code and data sections used by the linker.

A special stream, the root stream, contains all the information necessary to locate the other streams within the file.

All numbers are encoded in little-endian format.

Credit to http://www.informit.com/articles/article.aspx?p=22685 for reverse-engineering an older version of the PDB format, which helped considerably.

Structure

A PDB file is composed of fixed sized pages, similar to pages in the context of virtual memory. As a result, all PDB files are an exact multiple of the page size. The size of each page is specified in the file header, which always occupies the zeroth page in the file.

The pages which make up a stream can be located anywhere within the file. The makes modifying or appending to a stream within the PDB file very efficient: the index only needs updating when a stream spills over into a new page.

Header

struct PDB_SIGNATURE {
	char m_str[0x20]; // "Microsoft C/C++ MSF 7.00\r\n\x1aDS" as of Visual Studio 2015
};

struct PDB_HEADER {
	PDB_SIGNATURE m_signature;
	uint32_t m_pageSize; // Typically 0x1000, ie. 4KiB pages, other possible values include 0x800 and 0x400
	uint32_t m_unknown; // Meaning unknown, usually contains small integer, eg. 1 or 2
	uint32_t m_filePages; // Total number of pages in the file - multiplied by the page size gives the file size
	uint32_t m_rootStreamSize; // Size of the root stream in bytes
	uint32_t m_reserved; // Zero
	uint32_t m_rootPageIndex; // Where to find the list of pages which make up the root stream
};

m_rootPageIndex locates the following structure, when multiplied by the page size:

struct PDB_STREAM {
    uint32_t m_pages[]; // All the pages which make up this stream
};

The size of the array can be calculated by dividing m_rootStreamSize by m_pageSize, rounding up if it's not an exact multiple.

The root stream can be recovered by simply concatenating the data in the specified pages together (ignoring any data past m_rootStreamSize).

Root Stream

The root stream contains an index of all the other streams in the file:

struct PDB_STREAM_INDEX {
    uint32_t m_streamCount;
    uint32_t m_streamSizes[m_streamCount];
    PDB_STREAM m_streams[m_streamCount];
};

Since PDB_STREAM is itself a variable length data structure, m_streams is effectievely a flattened jagged array. This means you can't directly access the information for stream N: you must first calculate the size (in pages) of streams 0..(N-1).

m_streamSizes may contain a special value, 0xFFFFFFFF/-1 which means that the corresponding stream is not present.

An example root stream, using an unrealistic page size of 16 bytes:

m_streamCount:   05 00 00 00
m_streamSizes:   1A 00 00 00   07 00 00 00   00 00 00 00   FF FF FF FF   02 00 00 00
    m_streams:   04 00 00 00   05 00 00 00   02 00 00 00   01 00 00 00

This describes five streams:

0: 26 bytes, at pages 4,5
1: 7 bytes, at page 2
2: 0 bytes
3: not present
4: 2 bytes, at page 1

Other streams

The format of the streams themselves is unknown. However, it should be possible to compare them to data queried from the PDB file using microsoft's DIA API, which should provide a head-start to reverse-engineering the format of a given stream.

Example code

The following windows-only code will attempt to extract all the streams from a PDB file, and dump them as numbered files to an output folder:

#include "stdafx.h"

struct PDB_SIGNATURE {
	char m_str[0x20];
};

struct PDB_HEADER {
	PDB_SIGNATURE m_signature;
	uint32_t m_pageSize;
	uint32_t m_startPage;
	uint32_t m_filePages;
	uint32_t m_rootStreamSize;
	uint32_t m_reserved;
	uint32_t m_rootPageIndex;
};

class pdb_file;

class pdb_stream {
private:
	pdb_file const* const m_file;
	size_t m_offset;
	size_t const m_size;
	std::vector<uint32_t> const m_pages;


public:
	inline pdb_stream(pdb_file const* file, size_t size, std::vector<uint32_t> const& pages)
		: m_file(file), m_size(size), m_pages(pages), m_offset(0) {
	}

	inline size_t size() const { return m_size; }
	inline size_t offset() const { return m_offset; }
	inline size_t remaining() const { return m_size - m_offset; }
	inline void read(void* buffer, size_t size);
};

class pdb_file {
private:
	PDB_HEADER* m_header;
	size_t m_pageSize;
public:
	inline uintptr_t base() const {
		return (uintptr_t)m_header;
	}
	inline size_t pages() const {
		return m_header->m_filePages;
	}
	inline size_t pageSize() const {
		return m_pageSize;
	}
	inline size_t pageCount(size_t size) const {
		return (size + pageSize() - 1) / pageSize();
	}
	inline size_t readPage(void* buffer, size_t pageIndex, size_t offsetA, size_t offsetB) const {
		auto size = offsetB - offsetA;
		memcpy(buffer, (void*)(base() + pageSize() * pageIndex + offsetA), size);
		return size;
	}
	inline size_t readPage(void* buffer, size_t pageIndex) const {
		memcpy(buffer, (void*)(base() + pageSize() * pageIndex), pageSize());
		return pageSize();
	}
	inline pdb_file(void* ptr) : m_header((PDB_HEADER*)ptr) {
		m_pageSize = m_header->m_pageSize;
	}
	inline pdb_stream rootStream() const {
		size_t rootSize = m_header->m_rootStreamSize;
		size_t rootPageCount = pageCount(rootSize);

		std::vector<uint32_t> rootPages(rootPageCount);
		readPage(rootPages.data(), m_header->m_rootPageIndex, 0, sizeof(uint32_t)*rootPageCount);

		return pdb_stream(this, rootSize, rootPages);
	}
};


class pdb_stream_index {
private:
	pdb_file const* const m_file;
	std::vector<uint32_t> m_streamSizes;
	std::vector<std::vector<uint32_t>> m_streamPages;
	std::vector<uint32_t> m_reverseLookup;
public:
	pdb_stream_index(pdb_file const* file) : m_file(file) {
		uint32_t streamCount;
		auto stream = file->rootStream();
		stream.read(&streamCount, sizeof(streamCount));

		m_streamSizes.resize(streamCount);
		m_streamPages.resize(streamCount);

		m_reverseLookup.resize(file->pages(), -1);

		stream.read(m_streamSizes.data(), sizeof(uint32_t)*streamCount);
		for (unsigned i = 0; i < streamCount; ++i) {
			auto size = m_streamSizes[i];
			if (size == -1)
				continue;
			auto pageCount = file->pageCount(size);
			auto& pages = m_streamPages[i];
			pages.resize(pageCount);
			stream.read(pages.data(), sizeof(uint32_t)*pageCount);

			for (int j = 0; j < pages.size(); ++j)
				m_reverseLookup[pages[j]] = i;
		}
	}
	inline size_t streamCount() const {
		return m_streamSizes.size();
	}
	inline bool streamExists(size_t index) const {
		return m_streamSizes[index] != -1;
	}
	inline pdb_stream stream(size_t index) const {
		return pdb_stream(m_file, m_streamSizes[index], m_streamPages[index]);
	}
};

void pdb_stream::read(void* buffer, size_t size) {
	if (!size)
		return;

	uintptr_t dest = (uintptr_t)buffer;
	auto pageSize = m_file->pageSize();

	size_t offsetA = m_offset;
	size_t offsetB = offsetA + size;
	size_t pageA = offsetA / pageSize;
	size_t pageB = offsetB / pageSize;
	size_t indexA = offsetA % pageSize;
	size_t indexB = offsetB % pageSize;

	if (pageA == pageB) {
		dest += m_file->readPage((void*)dest, m_pages[pageA], indexA, indexB);
	} else {
		dest += m_file->readPage((void*)dest, m_pages[pageA], indexA, pageSize);
		for (size_t page = pageA + 1; page < pageB; ++page) {
			dest += m_file->readPage((void*)dest, m_pages[page]);
		}
		dest += m_file->readPage((void*)dest, m_pages[pageB], 0, indexB);
	}

	m_offset += size;
}

int _tmain(int argc, _TCHAR* argv[])
{
	HANDLE hFile = CreateFile(
		argv[1],
		GENERIC_READ,
		FILE_SHARE_READ,
		nullptr,
		OPEN_EXISTING,
		0,
		nullptr
		);
	HANDLE hMapping = CreateFileMapping(
		hFile,
		NULL,
		PAGE_READONLY,
		0,
		0,
		nullptr
		);
	void* hView = MapViewOfFile(hMapping, FILE_MAP_READ, 0, 0, 0);

	pdb_file file(hView);
	pdb_stream_index index(&file);

	CreateDirectory(_T("Output"), nullptr);
	for (size_t i = 0; i < index.streamCount(); ++i) {

		TCHAR buffer[64];
		_stprintf_s(buffer, _T(R"(Output\%d)"), (int)i);

		if (!index.streamExists(i)) {
			DeleteFile(buffer);
			continue;
		}

		auto stream = index.stream(i);
		HANDLE hStreamFile = CreateFile(
			buffer,
			GENERIC_READ|GENERIC_WRITE,
			0,
			nullptr,
			CREATE_ALWAYS,
			0,
			nullptr
			);
		SetFilePointer(hStreamFile, stream.size(), 0, 0);
		SetEndOfFile(hStreamFile);
		SetFilePointer(hStreamFile, 0, 0, 0);
		HANDLE hStreamMapping = CreateFileMapping(
			hStreamFile,
			NULL,
			PAGE_READWRITE,
			0,
			0,
			nullptr
			);
		void* hStreamView = MapViewOfFile(hStreamMapping, FILE_MAP_READ|FILE_MAP_WRITE, 0, 0, 0);
		stream.read(hStreamView, stream.size());
		UnmapViewOfFile(hStreamView);
		CloseHandle(hStreamMapping);
		CloseHandle(hStreamFile);
	}

	UnmapViewOfFile(hView);
	CloseHandle(hMapping);
	CloseHandle(hFile);

	return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment