Last active
February 15, 2024 10:59
-
-
Save RealNeGate/428a274496ce9852e06f9db1852b6cc2 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// I tried not doing anything too non-portable so it should be possible to run | |
// this on Mac or Linux... probably... even then, you can't use the obj files there | |
// | |
// once you have the obj file you should be able to do: | |
// link YOUROBJ.obj /defaultlib:libcmt | |
// ^^^^^^^^^^^^^^^^^^ | |
// linking against crt | |
#define _CRT_SECURE_NO_WARNINGS | |
#include <stdint.h> | |
#include <stdlib.h> | |
#include <stdio.h> | |
#include <time.h> | |
#define STB_DS_IMPLEMENTATION | |
#include "stb_ds.h" | |
#define DynArray(T) T* | |
// IMAGE_SCN_CNT_CODE | IMAGE_SCN_MEM_EXECUTE | IMAGE_SCN_MEM_READ | IMAGE_SCN_ALIGN_16BYTES | |
#define COFF_CHARACTERISTICS_TEXT 0x60500020u | |
// IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_WRITE | IMAGE_SCN_MEM_READ | |
#define COFF_CHARACTERISTICS_DATA 0xC0000040u | |
// IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ | |
#define COFF_CHARACTERISTICS_RODATA 0x40000040u | |
// IMAGE_SCN_CNT_UNINITIALIZED_DATA | IMAGE_SCN_MEM_WRITE | IMAGE_SCN_MEM_READ | IMAGE_SCN_ALIGN_16BYTES | |
#define COFF_CHARACTERISTICS_BSS 0xC0500080u | |
// IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_ALIGN_8BYTES | IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_DISCARDABLE | |
#define COFF_CHARACTERISTICS_CV 0x42100040u | |
#define IMAGE_SYM_CLASS_EXTERNAL 0x0002 | |
#define IMAGE_SYM_CLASS_STATIC 0x0003 | |
#define IMAGE_SYM_CLASS_LABEL 0x0006 | |
#define IMAGE_SYM_CLASS_FILE 0x0067 | |
#define IMAGE_FILE_LINE_NUMS_STRIPPED 0x0004 | |
#define IMAGE_REL_AMD64_ADDR64 0x0001 | |
#define IMAGE_REL_AMD64_ADDR32 0x0002 | |
#define IMAGE_REL_AMD64_ADDR32NB 0x0003 | |
#define IMAGE_REL_AMD64_REL32 0x0004 | |
#define IMAGE_REL_AMD64_SECTION 0x000A | |
#define IMAGE_REL_AMD64_SECREL 0x000B | |
#define MD5_HASHBYTES 16 | |
typedef struct COFF_SectionHeader { | |
char name[8]; | |
union { | |
uint32_t physical_address; | |
uint32_t virtual_size; | |
} misc; | |
uint32_t virtual_address; | |
uint32_t raw_data_size; | |
uint32_t raw_data_pos; | |
uint32_t pointer_to_reloc; | |
uint32_t pointer_to_lineno; | |
uint16_t num_reloc; | |
uint16_t num_lineno; | |
uint32_t characteristics; | |
} COFF_SectionHeader; | |
static_assert(sizeof(COFF_SectionHeader) == 40, "COFF Section header size != 40 bytes"); | |
typedef struct COFF_FileHeader { | |
uint16_t machine; | |
uint16_t num_sections; | |
uint32_t timestamp; | |
uint32_t symbol_table; | |
uint32_t symbol_count; | |
uint16_t optional_header_size; | |
uint16_t characteristics; | |
} COFF_FileHeader; | |
static_assert(sizeof(COFF_FileHeader) == 20, "COFF File header size != 20 bytes"); | |
// NOTE: Symbols, relocations, and line numbers are 2 byte packed | |
#pragma pack(push,2) | |
typedef struct COFF_ImageReloc { | |
union { | |
uint32_t VirtualAddress; | |
uint32_t RelocCount; | |
}; | |
uint32_t SymbolTableIndex; | |
uint16_t Type; | |
} COFF_ImageReloc; | |
static_assert(sizeof(COFF_ImageReloc) == 10, "COFF Image Relocation size != 10 bytes"); | |
typedef struct COFF_Symbol { | |
union { | |
uint8_t short_name[8]; | |
uint32_t long_name[2]; | |
}; | |
uint32_t value; | |
int16_t section_number; | |
uint16_t type; | |
uint8_t storage_class; | |
uint8_t aux_symbols_count; | |
} COFF_Symbol; | |
static_assert(sizeof(COFF_Symbol) == 18, "COFF Symbol size != 18 bytes"); | |
typedef struct COFF_AuxSectionSymbol { | |
uint32_t length; // section length | |
uint16_t reloc_count; // number of relocation entries | |
uint16_t lineno_count; // number of line numbers | |
uint32_t checksum; // checksum for communal | |
int16_t number; // section number to associate with | |
uint8_t selection; // communal selection type | |
uint8_t reserved; | |
int16_t high_bits; // high bits of the section number | |
} COFF_AuxSectionSymbol; | |
static_assert(sizeof(COFF_AuxSectionSymbol) == 18, "COFF Aux Section Symbol size != 18 bytes"); | |
#pragma pack(pop) | |
enum { | |
COFF_MACHINE_AMD64 = 0x8664, // AMD64 (K8) | |
COFF_MACHINE_ARM64 = 0xAA64, // ARM64 Little-Endian | |
}; | |
typedef int SymbolTableIndex; | |
typedef struct { | |
const char* name; | |
uint32_t characteristics; | |
uint32_t relocation_pos; | |
DynArray(COFF_ImageReloc) relocations; | |
uint32_t raw_data_pos; | |
uint32_t raw_data_size; | |
const uint8_t* raw_data; | |
} Section; | |
typedef struct { | |
const char* name; | |
uint32_t value; | |
uint32_t section_num; | |
uint32_t storage_class; | |
} Symbol; | |
static DynArray(Symbol) symbols; | |
static DynArray(Section) section_headers; | |
static void add_reloc(Section* s, const COFF_ImageReloc rel) { | |
arrput(s->relocations, rel); | |
} | |
static void add_symbol(const Symbol sym) { | |
arrput(symbols, sym); | |
} | |
// this small .text has some simple relocations to compile an extended | |
// hello world program | |
static SymbolTableIndex generate_text_section(int rdata_section_num, int normal_symbol_base) { | |
static const uint8_t contents[] = { | |
0x48, 0x8D, 0x0D, 0x05, 0x00, 0x00, 0x00, // lea rcx, [.rdata + 5] | |
0xBA, 0x2A, 0x00, 0x00, 0x00, // mov rdx, 42 | |
0xE9, 0x00, 0x00, 0x00, 0x00, // jmp printf | |
}; | |
Section s = { | |
.name = ".text", | |
.characteristics = COFF_CHARACTERISTICS_TEXT, | |
.raw_data = contents, | |
.raw_data_size = sizeof(contents) | |
}; | |
// Apply a relocation to the Hello string | |
add_reloc(&s, (COFF_ImageReloc){ | |
.Type = IMAGE_REL_AMD64_REL32, | |
.SymbolTableIndex = rdata_section_num, | |
// This refers to the spot within the text section that | |
// the relocation will happen to, relocations add onto | |
// the value that's already there | |
.VirtualAddress = 3 | |
}); | |
add_reloc(&s, (COFF_ImageReloc){ | |
.Type = IMAGE_REL_AMD64_REL32, | |
// we're referring to the printf external symbol | |
.SymbolTableIndex = normal_symbol_base+1, | |
// this is the CALL instruction's rip relative address | |
.VirtualAddress = 13 | |
}); | |
arrput(section_headers, s); | |
// it's zero based and there's two symbols per section | |
SymbolTableIndex text_section = (arrlen(section_headers)-1)*2; | |
// there's a distinction between symbol table indices and | |
// section numbers, i explain the section headers in the symbol | |
// output code at the bottom | |
int text_section_num = arrlen(section_headers); | |
// this is symbol normal_symbol_base+0 | |
add_symbol((Symbol) { | |
.name = "main", | |
// the value means the byte offset at which the | |
// function is found in the text section | |
.value = 0, | |
.section_num = text_section_num, | |
// the external storage means it's visible beyond this TU | |
.storage_class = IMAGE_SYM_CLASS_EXTERNAL | |
}); | |
// this is symbol normal_symbol_base+1 | |
add_symbol((Symbol) { | |
.name = "printf", | |
// this symbol is importing something that's defined | |
// elsewhere, section_number = 0 means it's not bound | |
// to any section here | |
.value = 0, | |
.section_num = 0, | |
// the external storage means it's visible beyond this TU | |
.storage_class = IMAGE_SYM_CLASS_EXTERNAL | |
}); | |
return text_section; | |
} | |
static SymbolTableIndex generate_rdata_section() { | |
// we put some extra bytes at the start so we can show | |
// what relocations look like offseted a bit | |
static const uint8_t contents[] = "_x_x_Hello, Agent %d!"; | |
Section s = { | |
.name = ".rdata", | |
.characteristics = COFF_CHARACTERISTICS_RODATA, | |
.raw_data = contents, | |
.raw_data_size = sizeof(contents) | |
}; | |
arrput(section_headers, s); | |
// it's zero based and there's two symbols per section | |
return (arrlen(section_headers)-1)*2; | |
} | |
int main(int argc, char** argv) { | |
if (argc < 2) { | |
printf("Expected output path for .obj file\n"); | |
return 1; | |
} | |
FILE* file = fopen(argv[1], "wb"); | |
if (!file) { | |
printf("Could not open '%s' for writing\n", argv[1]); | |
return 1; | |
} | |
// Assemble some machine code and data | |
int normal_symbol_base = 4; | |
int rdata_section_num = generate_rdata_section(); | |
int text_section_num = generate_text_section(rdata_section_num, normal_symbol_base); | |
// Convert our abstraction over COFF into file contents | |
size_t section_count = arrlen(section_headers); | |
// normal symbols like functions and imports start right after our section symbols | |
// i dont think this is necessarily a rule more like a convention | |
assert(section_count * 2 == normal_symbol_base); | |
// The file header is at the start and just gives some basic | |
// data on where the important tables are | |
COFF_FileHeader header = { | |
.num_sections = section_count, | |
.timestamp = time(NULL), | |
// in this example the machine code is x64 but you can find the | |
// table for these values online at: | |
.machine = COFF_MACHINE_AMD64, | |
// there's 2 symbols per section (the auxillary symbol counts) | |
.symbol_count = (2 * section_count) + arrlen(symbols), | |
// we fill in this value later on | |
.symbol_table = 0, | |
.characteristics = IMAGE_FILE_LINE_NUMS_STRIPPED | |
}; | |
// layout | |
uint32_t string_table_pos; | |
{ | |
size_t pos = sizeof(COFF_FileHeader) + (section_count * sizeof(COFF_SectionHeader)); | |
// raw data | |
for (size_t i = 0; i < section_count; i++) { | |
section_headers[i].raw_data_pos = pos; | |
pos += section_headers[i].raw_data_size; | |
} | |
// relocations | |
for (size_t i = 0; i < section_count; i++) { | |
// if there's no relocations you can just leave the relocation | |
// pos as 0, it doesn't matter | |
section_headers[i].relocation_pos = pos; | |
pos += arrlen(section_headers[i].relocations) * sizeof(COFF_ImageReloc); | |
} | |
// we'll place the symbol table at the end, directly after the symbol | |
// table is the string table which is where longer symbol names will | |
// be placed | |
header.symbol_table = pos; | |
string_table_pos = pos + (header.symbol_count * sizeof(COFF_Symbol)); | |
} | |
// write the COFF headers | |
fwrite(&header, sizeof(header), 1, file); | |
for (size_t i = 0; i < section_count; i++) { | |
COFF_SectionHeader sec = { | |
.characteristics = section_headers[i].characteristics, | |
.raw_data_size = section_headers[i].raw_data_size, | |
.raw_data_pos = section_headers[i].raw_data_pos, | |
.num_reloc = arrlen(section_headers[i].relocations), | |
.pointer_to_reloc = section_headers[i].relocation_pos | |
}; | |
// We just truncate the longer section names here, doesn't | |
// matter here since all the names are small like .text | |
assert(strlen(section_headers[i].name) < 8); | |
strncpy(sec.name, section_headers[i].name, 8); | |
sec.name[8 - 1] = 0; | |
fwrite(&sec, sizeof(sec), 1, file); | |
} | |
// write out raw data | |
for (size_t i = 0; i < section_count; i++) { | |
assert(ftell(file) == section_headers[i].raw_data_pos); | |
fwrite(section_headers[i].raw_data, section_headers[i].raw_data_size, 1, file); | |
} | |
// relocations | |
for (size_t i = 0; i < section_count; i++) { | |
assert(ftell(file) == section_headers[i].relocation_pos); | |
fwrite(section_headers[i].relocations, arrlen(section_headers[i].relocations), sizeof(COFF_ImageReloc), file); | |
} | |
assert(ftell(file) == header.symbol_table); | |
for (size_t i = 0; i < section_count; i++) { | |
// section number 0 is kinda a NULL section so we skip it when | |
// labeling our sections, relocations and symbols use this number | |
// to refer to the sections so we wanna be consistent | |
int section_number = i+1; | |
COFF_Symbol sym = { | |
.section_number = section_number, | |
// section symbols have static storage because | |
// every separate translation unit will have their | |
// own separate copy | |
.storage_class = IMAGE_SYM_CLASS_STATIC, | |
// auxillary symbols just add on to the data these symbols | |
// tell us, in this case since it's a section we wanna add | |
// extra data that says how many relocations and how big | |
// the section itself is | |
.aux_symbols_count = 1 | |
}; | |
// Same thing as before, we dont care that it truncates, we'll assert | |
// but beyond that it shouldn't actually matter | |
assert(strlen(section_headers[i].name) < 8); | |
strncpy((char*) sym.short_name, section_headers[i].name, 8); | |
sym.short_name[8 - 1] = 0; | |
fwrite(&sym, sizeof(sym), 1, file); | |
// Write the auxillary section symbol | |
COFF_AuxSectionSymbol aux = { | |
.length = section_headers[i].raw_data_size, | |
.reloc_count = arrlen(section_headers[i].relocations), | |
// for odd reasons the section number is duplicated in | |
// the symbol and the aux | |
.number = section_number | |
}; | |
fwrite(&aux, sizeof(aux), 1, file); | |
} | |
// this is where our normal looking symbols go, imports, | |
// functions, globals. symbols here can be longer than 8 | |
// characters which means that we use the long name format | |
// and place the actual string into the string table | |
uint32_t string_table_mark = 4; | |
DynArray(char*) string_table = NULL; | |
size_t symbol_count = arrlen(symbols); | |
for (size_t i = 0; i < symbol_count; i++) { | |
COFF_Symbol sym = { | |
.value = symbols[i].value, | |
.section_number = symbols[i].section_num, | |
.storage_class = symbols[i].storage_class | |
}; | |
const char* name = symbols[i].name; | |
size_t name_len = strlen(name); | |
if (name_len >= 8) { | |
sym.long_name[0] = 0; // this value is 0 for the long names | |
sym.long_name[1] = string_table_mark; // and this is the position in the string table | |
// allocate some space in the string table | |
arrput(string_table, (char*) name); | |
string_table_mark += name_len + 1; | |
} else { | |
memcpy(sym.short_name, name, name_len + 1); | |
} | |
fwrite(&sym, sizeof(sym), 1, file); | |
} | |
// String table | |
// First 4 bytes are the size of the string table, then | |
// it's all just null terminated strings | |
assert(ftell(file) == string_table_pos); | |
fwrite(&string_table_mark, sizeof(string_table_mark), 1, file); | |
size_t string_table_count = arrlen(string_table); | |
for (size_t i = 0; i < string_table_count; i++) { | |
size_t len = strlen(string_table[i]) + 1; | |
fwrite(string_table[i], len, 1, file); | |
} | |
fclose(file); | |
return 0; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment