Skip to content

Instantly share code, notes, and snippets.

@qrealka
Forked from skochinsky/rich.py
Created May 28, 2017 08:39
Show Gist options
  • Save qrealka/0091b9065fa3266487e4bcc42da69091 to your computer and use it in GitHub Desktop.
Save qrealka/0091b9065fa3266487e4bcc42da69091 to your computer and use it in GitHub Desktop.
MSVC PE Rich header parser with compiler version display
# based on code from http://trendystephen.blogspot.be/2008/01/rich-header.html
import sys
import struct
# I'm trying not to bury the magic number...
CHECKSUM_MASK = 0x536e6144 # DanS (actuall SnaD)
RICH_TEXT = 'Rich'
RICH_TEXT_LENGTH = len(RICH_TEXT)
PE_START = 0x3c
PE_FIELD_LENGTH = 4
# most of values up to AliasObj900 are from old MSVC leak with private PDBs;
# rest is from guesses/observations
PRODID_MAP = {
0: "Unknown",
1: "Import0",
2: "Linker510",
3: "Cvtomf510",
4: "Linker600",
5: "Cvtomf600",
6: "Cvtres500",
7: "Utc11_Basic",
8: "Utc11_C",
9: "Utc12_Basic",
10: "Utc12_C",
11: "Utc12_CPP",
12: "AliasObj60",
13: "VisualBasic60",
14: "Masm613",
15: "Masm710",
16: "Linker511",
17: "Cvtomf511",
18: "Masm614",
19: "Linker512",
20: "Cvtomf512",
21: "Utc12_C_Std",
22: "Utc12_CPP_Std",
23: "Utc12_C_Book",
24: "Utc12_CPP_Book",
25: "Implib700",
26: "Cvtomf700",
27: "Utc13_Basic",
28: "Utc13_C",
29: "Utc13_CPP",
30: "Linker610",
31: "Cvtomf610",
32: "Linker601",
33: "Cvtomf601",
34: "Utc12_1_Basic",
35: "Utc12_1_C",
36: "Utc12_1_CPP",
37: "Linker620",
38: "Cvtomf620",
39: "AliasObj70",
40: "Linker621",
41: "Cvtomf621",
42: "Masm615",
43: "Utc13_LTCG_C",
44: "Utc13_LTCG_CPP",
45: "Masm620",
46: "ILAsm100",
47: "Utc12_2_Basic",
48: "Utc12_2_C",
49: "Utc12_2_CPP",
50: "Utc12_2_C_Std",
51: "Utc12_2_CPP_Std",
52: "Utc12_2_C_Book",
53: "Utc12_2_CPP_Book",
54: "Implib622",
55: "Cvtomf622",
56: "Cvtres501",
57: "Utc13_C_Std",
58: "Utc13_CPP_Std",
59: "Cvtpgd1300",
60: "Linker622",
61: "Linker700",
62: "Export622",
63: "Export700",
64: "Masm700",
65: "Utc13_POGO_I_C",
66: "Utc13_POGO_I_CPP",
67: "Utc13_POGO_O_C",
68: "Utc13_POGO_O_CPP",
69: "Cvtres700",
70: "Cvtres710p",
71: "Linker710p",
72: "Cvtomf710p",
73: "Export710p",
74: "Implib710p",
75: "Masm710p",
76: "Utc1310p_C",
77: "Utc1310p_CPP",
78: "Utc1310p_C_Std",
79: "Utc1310p_CPP_Std",
80: "Utc1310p_LTCG_C",
81: "Utc1310p_LTCG_CPP",
82: "Utc1310p_POGO_I_C",
83: "Utc1310p_POGO_I_CPP",
84: "Utc1310p_POGO_O_C",
85: "Utc1310p_POGO_O_CPP",
86: "Linker624",
87: "Cvtomf624",
88: "Export624",
89: "Implib624",
90: "Linker710",
91: "Cvtomf710",
92: "Export710",
93: "Implib710",
94: "Cvtres710",
95: "Utc1310_C",
96: "Utc1310_CPP",
97: "Utc1310_C_Std",
98: "Utc1310_CPP_Std",
99: "Utc1310_LTCG_C",
100: "Utc1310_LTCG_CPP",
101: "Utc1310_POGO_I_C",
102: "Utc1310_POGO_I_CPP",
103: "Utc1310_POGO_O_C",
104: "Utc1310_POGO_O_CPP",
105: "AliasObj710",
106: "AliasObj710p",
107: "Cvtpgd1310",
108: "Cvtpgd1310p",
109: "Utc1400_C",
110: "Utc1400_CPP",
111: "Utc1400_C_Std",
112: "Utc1400_CPP_Std",
113: "Utc1400_LTCG_C",
114: "Utc1400_LTCG_CPP",
115: "Utc1400_POGO_I_C",
116: "Utc1400_POGO_I_CPP",
117: "Utc1400_POGO_O_C",
118: "Utc1400_POGO_O_CPP",
119: "Cvtpgd1400",
120: "Linker800",
121: "Cvtomf800",
122: "Export800",
123: "Implib800",
124: "Cvtres800",
125: "Masm800",
126: "AliasObj800",
127: "PhoenixPrerelease",
128: "Utc1400_CVTCIL_C",
129: "Utc1400_CVTCIL_CPP",
130: "Utc1400_LTCG_MSIL",
131: "Utc1500_C",
132: "Utc1500_CPP",
133: "Utc1500_C_Std",
134: "Utc1500_CPP_Std",
135: "Utc1500_CVTCIL_C",
136: "Utc1500_CVTCIL_CPP",
137: "Utc1500_LTCG_C",
138: "Utc1500_LTCG_CPP",
139: "Utc1500_LTCG_MSIL",
140: "Utc1500_POGO_I_C",
141: "Utc1500_POGO_I_CPP",
142: "Utc1500_POGO_O_C",
143: "Utc1500_POGO_O_CPP",
144: "Cvtpgd1500",
145: "Linker900",
146: "Export900",
147: "Implib900",
148: "Cvtres900",
149: "Masm900",
150: "AliasObj900",
151: "Resource900",
152: "AliasObj1000",
154: "Cvtres1000",
155: "Export1000",
156: "Implib1000",
157: "Linker1000",
158: "Masm1000",
170: "Utc1600_C",
171: "Utc1600_CPP",
172: "Utc1600_CVTCIL_C",
173: "Utc1600_CVTCIL_CPP",
174: "Utc1600_LTCG_C ",
175: "Utc1600_LTCG_CPP",
176: "Utc1600_LTCG_MSIL",
177: "Utc1600_POGO_I_C",
178: "Utc1600_POGO_I_CPP",
179: "Utc1600_POGO_O_C",
180: "Utc1600_POGO_O_CPP",
# vvv
183: "Linker1010",
184: "Export1010",
185: "Implib1010",
186: "Cvtres1010",
187: "Masm1010",
188: "AliasObj1010",
# ^^^
199: "AliasObj1100",
201: "Cvtres1100",
202: "Export1100",
203: "Implib1100",
204: "Linker1100",
205: "Masm1100",
206: "Utc1700_C",
207: "Utc1700_CPP",
208: "Utc1700_CVTCIL_C",
209: "Utc1700_CVTCIL_CPP",
210: "Utc1700_LTCG_C ",
211: "Utc1700_LTCG_CPP",
212: "Utc1700_LTCG_MSIL",
213: "Utc1700_POGO_I_C",
214: "Utc1700_POGO_I_CPP",
215: "Utc1700_POGO_O_C",
216: "Utc1700_POGO_O_CPP",
}
##
# A convenient exception to raise if the Rich Header doesn't exist.
class RichHeaderNotFoundException(Exception):
def __init__(self):
Exception.__init__(self, "Rich footer does not appear to exist")
##
# Locate the body of the data that contains the rich header This will be
# (roughly) between 0x3c and the beginning of the PE header, but the entire
# thing up to the last checksum will be needed in order to verify the header.
def get_file_header(file_name):
f = open(file_name,'rb')
#start with 0x3c
f.seek(PE_START)
data = f.read(PE_FIELD_LENGTH)
if data == '': #File is empty, bail
raise RichHeaderNotFoundException()
end = struct.unpack('<L',data)[0] # get the value at 0x3c
f.seek(0)
data = f.read( end ) # read until that value is reached
f.close()
return data
##
# This class assists in parsing the Rich Header from PE Files.
# The Rich Header is the section in the PE file following the dos stub but
# preceding the lfa_new header which is inserted by link.exe when building with
# the Microsoft Compilers. The Rich Heder contains the following:
# <pre>
# marker, checksum, checksum, checksum,
# R_compid_i, R_occurrence_i,
# R_compid_i+1, R_occurrence_i+1, ...
# R_compid_N-1, R_occurrence_N-1, Rich, marker
#
# marker = checksum XOR 0x536e6144
# R_compid_i is the ith compid XORed with the checksum
# R_occurrence_i is the ith occurrence XORed with the checksum
# Rich = the text string 'Rich'
# The checksum is the sum of all the PE Header values rotated by their
# offset and the sum of all compids rotated by their occurrence counts.
# </pre>
# @see _validate_checksum code for checksum calculation
class ParsedRichHeader:
##
# Creates a ParsedRichHeader from the specified PE File.
# @throws RichHeaderNotFoundException if the file does not contain a rich header
# @param file_name The PE File to be parsed
def __init__(self, file_name):
## The file that was parsed
self.file_name = file_name
self._parse( file_name )
##
# Used internally to parse the PE File and extract Rich Header data.
# Initializes self.compids and self.valid_checksum.
# @param file_name The PE File to be parsed
# @throws RichHeaderNotFoundException if the file does not contain a rich header
def _parse(self,file_name):
#make sure there is a header:
data = get_file_header( file_name )
compid_end_index = data.find(RICH_TEXT)
if compid_end_index == -1:
raise RichHeaderNotFoundException()
rich_offset = compid_end_index + RICH_TEXT_LENGTH
checksum_text = data[rich_offset:rich_offset+4]
checksum_value = struct.unpack('<L', checksum_text)[0]
#start marker denotes the beginning of the rich header
start_marker = struct.pack('<LLLL',checksum_value ^ CHECKSUM_MASK, checksum_value, checksum_value, checksum_value )[0]
rich_header_start = data.find(start_marker)
if rich_header_start == -1:
raise RichHeaderNotFoundException()
compid_start_index = rich_header_start + 16 # move past the marker and 3 checksums
compids = dict()
for i in range(compid_start_index, compid_end_index, 8):
compid = struct.unpack('<L',data[i:i+4])[0] ^ checksum_value
count = struct.unpack('<L',data[i+4:i+8])[0] ^ checksum_value
compids[compid]=count
## A dictionary of compids and their occurrence counts
self.compids = compids
## A value for later reference to see if the checksum was valid
self.valid_checksum = self._validate_checksum( data, rich_header_start, checksum_value )
##
# Compute the checksum value and see if it matches the checksum stored in
# the Rich Header.
# The checksum is the sum of all the PE Header values rotated by their
# offset and the sum of all compids rotated by their occurrence counts
# @param data A blob of binary data that corresponds to the PE Header data
# @param rich_header_start The offset to marker, checksum, checksum, checksum
# @returns True if the checksum is valid, false otherwise
def _validate_checksum(self, data, rich_header_start, checksum):
#initialize the checksum offset at which the rich header is located
cksum = rich_header_start
#add the value from the pe header after rotating the value by its offset in the pe header
for i in range(0,rich_header_start):
if PE_START <= i <= PE_START+PE_FIELD_LENGTH-1:
continue
temp = ord(data[i])
cksum+= ((temp << (i%32)) | (temp >> (32-(i%32))) & 0xff)
cksum &=0xffffffff
#add each compid to the checksum after rotating it by its occurrence count
for k in self.compids.keys():
cksum += (k << self.compids[k]%32 | k >> ( 32 - (self.compids[k]%32)))
cksum &=0xffffffff
## A convenient place for storing the checksum that was computing during checksum validation
self.checksum = cksum
return cksum == checksum
if __name__ == "__main__":
ph = ParsedRichHeader(sys.argv[1])
print ("PRODID name build count")
for key in ph.compids.keys():
count = ph.compids[key]
prodid, build = (key>>16), key&0xFFFF
prodid_name = PRODID_MAP[prodid] if prodid in PRODID_MAP else "<unknown>"
print ('%6d %-15s %5d %5d' % (prodid, prodid_name, build, count))
if ph.valid_checksum:
print ("Checksum valid")
else:
print("Checksum not valid!")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment