Skip to content

Instantly share code, notes, and snippets.

@Emoun
Created September 2, 2025 21:39
Show Gist options
  • Select an option

  • Save Emoun/3bb6cb3192986f769ff26b50a9130ca7 to your computer and use it in GitHub Desktop.

Select an option

Save Emoun/3bb6cb3192986f769ff26b50a9130ca7 to your computer and use it in GitHub Desktop.
Calculates how much of the encoding space RISC-V uses.
import os
import subprocess
import sys
import re
with_debug = False
if len(sys.argv) > 1:
if sys.argv[1] == "debug":
with_debug = True
# Check if the risc-v opcodes repo is cloned, if not, do so
def clone_repo(repo_url):
repo_name = repo_url.split('/')[-1].replace('.git', '')
repo_path = os.path.join(os.getcwd(), repo_name)
if os.path.exists(repo_path):
if with_debug:
print(f"Repository '{repo_name}' is already cloned.")
else:
if with_debug:
print(f"Cloning repository '{repo_name}'...")
subprocess.run(['git', 'clone', repo_url])
if with_debug:
print(f"Repository '{repo_name}' cloned successfully.")
# Print the contents of the selected extension files, so that it can be used as a reference
def print_file_contents_to_output(subdir, file_names, output_file):
out_dir_name = os.path.dirname(output_file)
if not os.path.exists(out_dir_name):
os.makedirs(out_dir_name)
with open(output_file, 'w') as out_file:
for file_name in file_names:
file_path = os.path.join(subdir, file_name)
if os.path.exists(file_path):
with open(file_path, 'r') as file:
out_file.write(f"Contents of {file_name}:\n")
out_file.write(file.read())
out_file.write("\n" + "-"*40 + "\n")
else:
out_file.write(f"File '{file_name}' does not exist in the subdirectory '{subdir}'.\n")
def load_lines_ignoring_comments(subdir, file_names):
lines = []
for file_name in file_names:
file_path = os.path.join(subdir, file_name)
if os.path.exists(file_path):
with open(file_path, 'r') as file:
for line in file:
stripped_line = line.strip()
if stripped_line and not stripped_line.startswith('#') and not stripped_line.startswith('$pseudo_op'):
lines.append(line.strip())
else:
print(f"File '{file_name}' does not exist in the subdirectory '{subdir}'.")
return lines
def load_instruct_field_sizes(file_name):
field_sizes={}
if os.path.exists(file_name):
with open(file_name, 'r') as file:
for line in file:
# Remove the quotes and split the string by comma
parts = line.replace('"', '').split(',')
# Extract the first name and the two numbers
name = parts[0]
hi_bit = int(parts[1])
lo_bit = int(parts[2])
field_sizes[name] = hi_bit - lo_bit + 1
return field_sizes
else:
print(f"Field size file does not exist: '{file_name}'")
exit()
def get_encoding_points(instructions):
field_sizes = load_instruct_field_sizes("riscv-opcodes/arg_lut.csv")
free_instruction_bits = {}
for instr in instructions:
split = instr.split()
opcode = split[0]
free_instruction_bits[opcode] = 0
for field in split[1:]:
if field in field_sizes:
free_instruction_bits[opcode] += field_sizes[field]
# Calculate how many encoding points each instructions uses
encoding_points = {}
for opcode in free_instruction_bits.keys():
free_bits = free_instruction_bits[opcode]
if opcode.startswith("c."):
# Compressed instructions essentially "waste" the second part of a 32-bit instruction
free_bits += 16
encoding_points[opcode] = pow(2, free_bits)
return encoding_points
def save_dict_csv(out_file_path, dictionary, key_name, value_name):
out_file_dir = os.path.dirname(out_file_path)
if not os.path.exists(out_file_dir):
os.makedirs(out_file_dir)
with open(out_file_path, 'w') as out_file:
out_file.write(f"{key_name},{value_name}\n")
for key in dictionary.keys():
out_file.write(f"{key},{dictionary[key]}\n")
def save_dict_latex(out_file_path, dictionary, prefix, postfix):
out_file_dir = os.path.dirname(out_file_path)
if not os.path.exists(out_file_dir):
os.makedirs(out_file_dir)
with open(out_file_path, 'w') as out_file:
for key in dictionary.keys():
out_file.write("\\newcommand{")
out_file.write(f"\\{prefix}{key}{postfix}")
out_file.write("}{")
out_file.write(f"{dictionary[key]}")
out_file.write("}\n")
def list_files_in_folder(folder_path):
try:
# List all files in the specified folder
files = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]
return files
except FileNotFoundError:
print(f"The folder '{folder_path}' does not exist.")
return []
except Exception as e:
print(f"An error occurred: {e}")
return []
repo_url = 'https://github.com/riscv/riscv-opcodes'
clone_repo(repo_url)
# Extentions to do analysis on
relevant_exts = [
'rv_a',
'rv_c',
'rv_c_d',
'rv_d',
'rv_f',
'rv_i',
'rv_m',
'rv32_c',
'rv32_c_f',
'rv32_i',
'rv64_a',
'rv64_c',
'rv64_d',
'rv64_f',
'rv64_i',
'rv64_m'
]
subdir = 'riscv-opcodes/extensions' # Replace with the path to your subdirectory
output_file = 'generated/relevant-exts-raw.txt' # Replace with the desired output file name
print_file_contents_to_output(subdir, relevant_exts, output_file)
relevant_instructions = load_lines_ignoring_comments(subdir, relevant_exts)
relevant_encoding_points = get_encoding_points(relevant_instructions)
relevant_code_points_used = sum(relevant_encoding_points.values())
# Save the encoding points to file
code_point_file = "generated/relevant_code_points"
save_dict_csv(code_point_file+".csv", relevant_encoding_points, "opcode", "points")
save_dict_latex(code_point_file+".tex", {key: value for key, value in relevant_encoding_points.items() if not re.search(r'[.\d]', key)}, "codePoints", "")
unnecessary_instrs = [
# sw
'sb',
'sh',
'sd',
'fsd',
'fsw',
'sc.w',
'sc.d',
# c.sw
'c.sd',
'c.fsd',
'c.fsw',
# c.swsp
'c.fsdsp',
'c.fswsp',
'c.sdsp',
# add
'fadd.d',
'fadd.s',
'addw',
# addi
'addiw',
# c.add
'c.addw',
# c.addi
'c.addiw',
# sub
'subw',
'fsub.d',
'fsub.s',
# c.sub
'c.subw',
# mul
'fmul.d',
'fmul.s',
'mulw',
# mulh
'mulhsu',
'mulhu',
# div
'divu',
'fdiv.d',
'fdiv.s',
'divw',
'divuw',
# rem
'remu',
'remw',
'remuw',
# srl
'sra',
'srlw',
'sraw',
# srli
'srai',
'srliw',
'sraiw',
# c.srli
'c.srai',
# sll
'sllw',
# slli
'slliw',
# slti
'sltiu',
# slt
'sltu',
'flt.s',
'flt.d',
# blt
'bltu',
# bge
'bgeu',
# *.d
'feq.s',
'fle.s',
'fmadd.s',
'fmsub.s',
'fnmsub.s',
'fnmadd.s',
'fsqrt.s',
'fsgnj.s',
'fsgnjn.s',
'fsgnjx.s',
'fmin.s',
'fmax.s',
'fclass.s',
'amoswap.w',
'amoadd.w',
'amoxor.w',
'amoand.w',
'amoor.w',
# amomin.w
'amominu.w',
'amomin.d',
'amominu.d',
# amomax.w
'amomaxu.w',
'amomax.d',
'amomaxu.d',
# fcvt.d.w
'fcvt.s.w',
# fcvt.d.wu
'fcvt.s.wu',
# fcvt.d.l
'fcvt.s.l',
# fcvt.d.lu
'fcvt.s.lu',
# fcvt.d.s
'fcvt.w.s',
'fcvt.wu.s',
'fcvt.l.s',
'fcvt.lu.s',
# fcvt.s.d
'fcvt.w.d',
'fcvt.wu.d',
'fcvt.l.d',
'fcvt.lu.d',
#Unnecessary
'fmv.x.w',
'fmv.w.x',
'fmv.d.x',
'fmv.x.d',
]
def asset_no_duplicates(strings):
# Use a set to track unique strings
unique_strings = set()
for string in strings:
if string in unique_strings:
print(f"(ERROR) found duplicates: {string}")
exit()
unique_strings.add(string)
return False # No duplicates found
asset_no_duplicates(unnecessary_instrs)
# Ensure excluded ops exist (no typos)
for op in unnecessary_instrs:
if not op in relevant_encoding_points.keys():
print(f"Wrong exclusion opcode: '{op}'")
exit()
unnecessary_instrs_points = {key: relevant_encoding_points[key] for key in unnecessary_instrs if key in relevant_encoding_points}
excluded_code_points_used = sum(unnecessary_instrs_points.values())
all_extensions = list_files_in_folder(subdir)
all_instructions = load_lines_ignoring_comments(subdir, all_extensions)
all_encoding_points = get_encoding_points(all_instructions)
all_code_points_used = sum(all_encoding_points.values())
added_instructions = [
"tag_spill imm12hi rs1 rs2 imm12lo", # from 'sw'
"tag_reload rd rs1 imm12", # from 'lw'
"load_double_unsigned rd rs1 imm12", # from 'lwu'
"bit_cast_int8 rd",
"bit_cast_int16 rd",
"bit_cast_int32 rd",
"bit_cast_int64 rd",
"bit_cast_uint8 rd",
"bit_cast_uint16 rd",
"bit_cast_uint32 rd",
"bit_cast_uint64 rd",
"bit_cast_sfloat rd",
"bit_cast_dfloat rd",
]
added_encoding_points = get_encoding_points(added_instructions)
added_code_points_used = sum(added_encoding_points.values())
stats = {
"opcodesAll": len(all_encoding_points),
"opcodesRelevant": len(relevant_encoding_points),
"opcodesExcluded": len(unnecessary_instrs_points),
"opcodesAdded": len(added_encoding_points),
"pointsAll": all_code_points_used,
"pointsRelevant": relevant_code_points_used,
"pointsExcluded": excluded_code_points_used,
"pointsAdded": added_code_points_used
}
save_dict_csv("generated/stats.csv", stats, "stat", "value")
save_dict_latex("generated/stats.tex", stats, "", "")
if with_debug:
print(relevant_encoding_points)
print("Opcodes: ", len(relevant_encoding_points))
print("Encoding points sum: ", relevant_code_points_used)
print(unnecessary_instrs_points)
print("Excluded: ", len(unnecessary_instrs_points))
print("Excluded points sum: ", excluded_code_points_used)
print("Removed %: ", (excluded_code_points_used/relevant_code_points_used)*100)
print(added_encoding_points)
print("Added: ", len(added_encoding_points))
print("Added points sum: ", added_code_points_used)
print("Added %: ", (added_code_points_used/relevant_code_points_used)*100)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment