Created
April 13, 2017 13:03
-
-
Save skochinsky/07c8e95e33d9429d81a75622b5d24c8b to your computer and use it in GitHub Desktop.
MSVC PE Rich header parser with compiler version display
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# based on code from http://trendystephen.blogspot.be/2008/01/rich-header.html | |
import sys | |
import struct | |
# I'm trying not to bury the magic number... | |
CHECKSUM_MASK = 0x536e6144 # DanS (actuall SnaD) | |
RICH_TEXT = 'Rich' | |
RICH_TEXT_LENGTH = len(RICH_TEXT) | |
PE_START = 0x3c | |
PE_FIELD_LENGTH = 4 | |
# most of values up to AliasObj900 are from old MSVC leak with private PDBs; | |
# rest is from guesses/observations | |
PRODID_MAP = { | |
0: "Unknown", | |
1: "Import0", | |
2: "Linker510", | |
3: "Cvtomf510", | |
4: "Linker600", | |
5: "Cvtomf600", | |
6: "Cvtres500", | |
7: "Utc11_Basic", | |
8: "Utc11_C", | |
9: "Utc12_Basic", | |
10: "Utc12_C", | |
11: "Utc12_CPP", | |
12: "AliasObj60", | |
13: "VisualBasic60", | |
14: "Masm613", | |
15: "Masm710", | |
16: "Linker511", | |
17: "Cvtomf511", | |
18: "Masm614", | |
19: "Linker512", | |
20: "Cvtomf512", | |
21: "Utc12_C_Std", | |
22: "Utc12_CPP_Std", | |
23: "Utc12_C_Book", | |
24: "Utc12_CPP_Book", | |
25: "Implib700", | |
26: "Cvtomf700", | |
27: "Utc13_Basic", | |
28: "Utc13_C", | |
29: "Utc13_CPP", | |
30: "Linker610", | |
31: "Cvtomf610", | |
32: "Linker601", | |
33: "Cvtomf601", | |
34: "Utc12_1_Basic", | |
35: "Utc12_1_C", | |
36: "Utc12_1_CPP", | |
37: "Linker620", | |
38: "Cvtomf620", | |
39: "AliasObj70", | |
40: "Linker621", | |
41: "Cvtomf621", | |
42: "Masm615", | |
43: "Utc13_LTCG_C", | |
44: "Utc13_LTCG_CPP", | |
45: "Masm620", | |
46: "ILAsm100", | |
47: "Utc12_2_Basic", | |
48: "Utc12_2_C", | |
49: "Utc12_2_CPP", | |
50: "Utc12_2_C_Std", | |
51: "Utc12_2_CPP_Std", | |
52: "Utc12_2_C_Book", | |
53: "Utc12_2_CPP_Book", | |
54: "Implib622", | |
55: "Cvtomf622", | |
56: "Cvtres501", | |
57: "Utc13_C_Std", | |
58: "Utc13_CPP_Std", | |
59: "Cvtpgd1300", | |
60: "Linker622", | |
61: "Linker700", | |
62: "Export622", | |
63: "Export700", | |
64: "Masm700", | |
65: "Utc13_POGO_I_C", | |
66: "Utc13_POGO_I_CPP", | |
67: "Utc13_POGO_O_C", | |
68: "Utc13_POGO_O_CPP", | |
69: "Cvtres700", | |
70: "Cvtres710p", | |
71: "Linker710p", | |
72: "Cvtomf710p", | |
73: "Export710p", | |
74: "Implib710p", | |
75: "Masm710p", | |
76: "Utc1310p_C", | |
77: "Utc1310p_CPP", | |
78: "Utc1310p_C_Std", | |
79: "Utc1310p_CPP_Std", | |
80: "Utc1310p_LTCG_C", | |
81: "Utc1310p_LTCG_CPP", | |
82: "Utc1310p_POGO_I_C", | |
83: "Utc1310p_POGO_I_CPP", | |
84: "Utc1310p_POGO_O_C", | |
85: "Utc1310p_POGO_O_CPP", | |
86: "Linker624", | |
87: "Cvtomf624", | |
88: "Export624", | |
89: "Implib624", | |
90: "Linker710", | |
91: "Cvtomf710", | |
92: "Export710", | |
93: "Implib710", | |
94: "Cvtres710", | |
95: "Utc1310_C", | |
96: "Utc1310_CPP", | |
97: "Utc1310_C_Std", | |
98: "Utc1310_CPP_Std", | |
99: "Utc1310_LTCG_C", | |
100: "Utc1310_LTCG_CPP", | |
101: "Utc1310_POGO_I_C", | |
102: "Utc1310_POGO_I_CPP", | |
103: "Utc1310_POGO_O_C", | |
104: "Utc1310_POGO_O_CPP", | |
105: "AliasObj710", | |
106: "AliasObj710p", | |
107: "Cvtpgd1310", | |
108: "Cvtpgd1310p", | |
109: "Utc1400_C", | |
110: "Utc1400_CPP", | |
111: "Utc1400_C_Std", | |
112: "Utc1400_CPP_Std", | |
113: "Utc1400_LTCG_C", | |
114: "Utc1400_LTCG_CPP", | |
115: "Utc1400_POGO_I_C", | |
116: "Utc1400_POGO_I_CPP", | |
117: "Utc1400_POGO_O_C", | |
118: "Utc1400_POGO_O_CPP", | |
119: "Cvtpgd1400", | |
120: "Linker800", | |
121: "Cvtomf800", | |
122: "Export800", | |
123: "Implib800", | |
124: "Cvtres800", | |
125: "Masm800", | |
126: "AliasObj800", | |
127: "PhoenixPrerelease", | |
128: "Utc1400_CVTCIL_C", | |
129: "Utc1400_CVTCIL_CPP", | |
130: "Utc1400_LTCG_MSIL", | |
131: "Utc1500_C", | |
132: "Utc1500_CPP", | |
133: "Utc1500_C_Std", | |
134: "Utc1500_CPP_Std", | |
135: "Utc1500_CVTCIL_C", | |
136: "Utc1500_CVTCIL_CPP", | |
137: "Utc1500_LTCG_C", | |
138: "Utc1500_LTCG_CPP", | |
139: "Utc1500_LTCG_MSIL", | |
140: "Utc1500_POGO_I_C", | |
141: "Utc1500_POGO_I_CPP", | |
142: "Utc1500_POGO_O_C", | |
143: "Utc1500_POGO_O_CPP", | |
144: "Cvtpgd1500", | |
145: "Linker900", | |
146: "Export900", | |
147: "Implib900", | |
148: "Cvtres900", | |
149: "Masm900", | |
150: "AliasObj900", | |
151: "Resource900", | |
152: "AliasObj1000", | |
154: "Cvtres1000", | |
155: "Export1000", | |
156: "Implib1000", | |
157: "Linker1000", | |
158: "Masm1000", | |
170: "Utc1600_C", | |
171: "Utc1600_CPP", | |
172: "Utc1600_CVTCIL_C", | |
173: "Utc1600_CVTCIL_CPP", | |
174: "Utc1600_LTCG_C ", | |
175: "Utc1600_LTCG_CPP", | |
176: "Utc1600_LTCG_MSIL", | |
177: "Utc1600_POGO_I_C", | |
178: "Utc1600_POGO_I_CPP", | |
179: "Utc1600_POGO_O_C", | |
180: "Utc1600_POGO_O_CPP", | |
# vvv | |
183: "Linker1010", | |
184: "Export1010", | |
185: "Implib1010", | |
186: "Cvtres1010", | |
187: "Masm1010", | |
188: "AliasObj1010", | |
# ^^^ | |
199: "AliasObj1100", | |
201: "Cvtres1100", | |
202: "Export1100", | |
203: "Implib1100", | |
204: "Linker1100", | |
205: "Masm1100", | |
206: "Utc1700_C", | |
207: "Utc1700_CPP", | |
208: "Utc1700_CVTCIL_C", | |
209: "Utc1700_CVTCIL_CPP", | |
210: "Utc1700_LTCG_C ", | |
211: "Utc1700_LTCG_CPP", | |
212: "Utc1700_LTCG_MSIL", | |
213: "Utc1700_POGO_I_C", | |
214: "Utc1700_POGO_I_CPP", | |
215: "Utc1700_POGO_O_C", | |
216: "Utc1700_POGO_O_CPP", | |
} | |
## | |
# A convenient exception to raise if the Rich Header doesn't exist. | |
class RichHeaderNotFoundException(Exception): | |
def __init__(self): | |
Exception.__init__(self, "Rich footer does not appear to exist") | |
## | |
# Locate the body of the data that contains the rich header This will be | |
# (roughly) between 0x3c and the beginning of the PE header, but the entire | |
# thing up to the last checksum will be needed in order to verify the header. | |
def get_file_header(file_name): | |
f = open(file_name,'rb') | |
#start with 0x3c | |
f.seek(PE_START) | |
data = f.read(PE_FIELD_LENGTH) | |
if data == '': #File is empty, bail | |
raise RichHeaderNotFoundException() | |
end = struct.unpack('<L',data)[0] # get the value at 0x3c | |
f.seek(0) | |
data = f.read( end ) # read until that value is reached | |
f.close() | |
return data | |
## | |
# This class assists in parsing the Rich Header from PE Files. | |
# The Rich Header is the section in the PE file following the dos stub but | |
# preceding the lfa_new header which is inserted by link.exe when building with | |
# the Microsoft Compilers. The Rich Heder contains the following: | |
# <pre> | |
# marker, checksum, checksum, checksum, | |
# R_compid_i, R_occurrence_i, | |
# R_compid_i+1, R_occurrence_i+1, ... | |
# R_compid_N-1, R_occurrence_N-1, Rich, marker | |
# | |
# marker = checksum XOR 0x536e6144 | |
# R_compid_i is the ith compid XORed with the checksum | |
# R_occurrence_i is the ith occurrence XORed with the checksum | |
# Rich = the text string 'Rich' | |
# The checksum is the sum of all the PE Header values rotated by their | |
# offset and the sum of all compids rotated by their occurrence counts. | |
# </pre> | |
# @see _validate_checksum code for checksum calculation | |
class ParsedRichHeader: | |
## | |
# Creates a ParsedRichHeader from the specified PE File. | |
# @throws RichHeaderNotFoundException if the file does not contain a rich header | |
# @param file_name The PE File to be parsed | |
def __init__(self, file_name): | |
## The file that was parsed | |
self.file_name = file_name | |
self._parse( file_name ) | |
## | |
# Used internally to parse the PE File and extract Rich Header data. | |
# Initializes self.compids and self.valid_checksum. | |
# @param file_name The PE File to be parsed | |
# @throws RichHeaderNotFoundException if the file does not contain a rich header | |
def _parse(self,file_name): | |
#make sure there is a header: | |
data = get_file_header( file_name ) | |
compid_end_index = data.find(RICH_TEXT) | |
if compid_end_index == -1: | |
raise RichHeaderNotFoundException() | |
rich_offset = compid_end_index + RICH_TEXT_LENGTH | |
checksum_text = data[rich_offset:rich_offset+4] | |
checksum_value = struct.unpack('<L', checksum_text)[0] | |
#start marker denotes the beginning of the rich header | |
start_marker = struct.pack('<LLLL',checksum_value ^ CHECKSUM_MASK, checksum_value, checksum_value, checksum_value )[0] | |
rich_header_start = data.find(start_marker) | |
if rich_header_start == -1: | |
raise RichHeaderNotFoundException() | |
compid_start_index = rich_header_start + 16 # move past the marker and 3 checksums | |
compids = dict() | |
for i in range(compid_start_index, compid_end_index, 8): | |
compid = struct.unpack('<L',data[i:i+4])[0] ^ checksum_value | |
count = struct.unpack('<L',data[i+4:i+8])[0] ^ checksum_value | |
compids[compid]=count | |
## A dictionary of compids and their occurrence counts | |
self.compids = compids | |
## A value for later reference to see if the checksum was valid | |
self.valid_checksum = self._validate_checksum( data, rich_header_start, checksum_value ) | |
## | |
# Compute the checksum value and see if it matches the checksum stored in | |
# the Rich Header. | |
# The checksum is the sum of all the PE Header values rotated by their | |
# offset and the sum of all compids rotated by their occurrence counts | |
# @param data A blob of binary data that corresponds to the PE Header data | |
# @param rich_header_start The offset to marker, checksum, checksum, checksum | |
# @returns True if the checksum is valid, false otherwise | |
def _validate_checksum(self, data, rich_header_start, checksum): | |
#initialize the checksum offset at which the rich header is located | |
cksum = rich_header_start | |
#add the value from the pe header after rotating the value by its offset in the pe header | |
for i in range(0,rich_header_start): | |
if PE_START <= i <= PE_START+PE_FIELD_LENGTH-1: | |
continue | |
temp = ord(data[i]) | |
cksum+= ((temp << (i%32)) | (temp >> (32-(i%32))) & 0xff) | |
cksum &=0xffffffff | |
#add each compid to the checksum after rotating it by its occurrence count | |
for k in self.compids.keys(): | |
cksum += (k << self.compids[k]%32 | k >> ( 32 - (self.compids[k]%32))) | |
cksum &=0xffffffff | |
## A convenient place for storing the checksum that was computing during checksum validation | |
self.checksum = cksum | |
return cksum == checksum | |
if __name__ == "__main__": | |
ph = ParsedRichHeader(sys.argv[1]) | |
print ("PRODID name build count") | |
for key in ph.compids.keys(): | |
count = ph.compids[key] | |
prodid, build = (key>>16), key&0xFFFF | |
prodid_name = PRODID_MAP[prodid] if prodid in PRODID_MAP else "<unknown>" | |
print ('%6d %-15s %5d %5d' % (prodid, prodid_name, build, count)) | |
if ph.valid_checksum: | |
print ("Checksum valid") | |
else: | |
print("Checksum not valid!") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment