Created
September 2, 2025 21:39
-
-
Save Emoun/3bb6cb3192986f769ff26b50a9130ca7 to your computer and use it in GitHub Desktop.
Calculates how much of the encoding space RISC-V uses.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os | |
| import subprocess | |
| import sys | |
| import re | |
| with_debug = False | |
| if len(sys.argv) > 1: | |
| if sys.argv[1] == "debug": | |
| with_debug = True | |
| # Check if the risc-v opcodes repo is cloned, if not, do so | |
| def clone_repo(repo_url): | |
| repo_name = repo_url.split('/')[-1].replace('.git', '') | |
| repo_path = os.path.join(os.getcwd(), repo_name) | |
| if os.path.exists(repo_path): | |
| if with_debug: | |
| print(f"Repository '{repo_name}' is already cloned.") | |
| else: | |
| if with_debug: | |
| print(f"Cloning repository '{repo_name}'...") | |
| subprocess.run(['git', 'clone', repo_url]) | |
| if with_debug: | |
| print(f"Repository '{repo_name}' cloned successfully.") | |
| # Print the contents of the selected extension files, so that it can be used as a reference | |
| def print_file_contents_to_output(subdir, file_names, output_file): | |
| out_dir_name = os.path.dirname(output_file) | |
| if not os.path.exists(out_dir_name): | |
| os.makedirs(out_dir_name) | |
| with open(output_file, 'w') as out_file: | |
| for file_name in file_names: | |
| file_path = os.path.join(subdir, file_name) | |
| if os.path.exists(file_path): | |
| with open(file_path, 'r') as file: | |
| out_file.write(f"Contents of {file_name}:\n") | |
| out_file.write(file.read()) | |
| out_file.write("\n" + "-"*40 + "\n") | |
| else: | |
| out_file.write(f"File '{file_name}' does not exist in the subdirectory '{subdir}'.\n") | |
| def load_lines_ignoring_comments(subdir, file_names): | |
| lines = [] | |
| for file_name in file_names: | |
| file_path = os.path.join(subdir, file_name) | |
| if os.path.exists(file_path): | |
| with open(file_path, 'r') as file: | |
| for line in file: | |
| stripped_line = line.strip() | |
| if stripped_line and not stripped_line.startswith('#') and not stripped_line.startswith('$pseudo_op'): | |
| lines.append(line.strip()) | |
| else: | |
| print(f"File '{file_name}' does not exist in the subdirectory '{subdir}'.") | |
| return lines | |
| def load_instruct_field_sizes(file_name): | |
| field_sizes={} | |
| if os.path.exists(file_name): | |
| with open(file_name, 'r') as file: | |
| for line in file: | |
| # Remove the quotes and split the string by comma | |
| parts = line.replace('"', '').split(',') | |
| # Extract the first name and the two numbers | |
| name = parts[0] | |
| hi_bit = int(parts[1]) | |
| lo_bit = int(parts[2]) | |
| field_sizes[name] = hi_bit - lo_bit + 1 | |
| return field_sizes | |
| else: | |
| print(f"Field size file does not exist: '{file_name}'") | |
| exit() | |
| def get_encoding_points(instructions): | |
| field_sizes = load_instruct_field_sizes("riscv-opcodes/arg_lut.csv") | |
| free_instruction_bits = {} | |
| for instr in instructions: | |
| split = instr.split() | |
| opcode = split[0] | |
| free_instruction_bits[opcode] = 0 | |
| for field in split[1:]: | |
| if field in field_sizes: | |
| free_instruction_bits[opcode] += field_sizes[field] | |
| # Calculate how many encoding points each instructions uses | |
| encoding_points = {} | |
| for opcode in free_instruction_bits.keys(): | |
| free_bits = free_instruction_bits[opcode] | |
| if opcode.startswith("c."): | |
| # Compressed instructions essentially "waste" the second part of a 32-bit instruction | |
| free_bits += 16 | |
| encoding_points[opcode] = pow(2, free_bits) | |
| return encoding_points | |
| def save_dict_csv(out_file_path, dictionary, key_name, value_name): | |
| out_file_dir = os.path.dirname(out_file_path) | |
| if not os.path.exists(out_file_dir): | |
| os.makedirs(out_file_dir) | |
| with open(out_file_path, 'w') as out_file: | |
| out_file.write(f"{key_name},{value_name}\n") | |
| for key in dictionary.keys(): | |
| out_file.write(f"{key},{dictionary[key]}\n") | |
| def save_dict_latex(out_file_path, dictionary, prefix, postfix): | |
| out_file_dir = os.path.dirname(out_file_path) | |
| if not os.path.exists(out_file_dir): | |
| os.makedirs(out_file_dir) | |
| with open(out_file_path, 'w') as out_file: | |
| for key in dictionary.keys(): | |
| out_file.write("\\newcommand{") | |
| out_file.write(f"\\{prefix}{key}{postfix}") | |
| out_file.write("}{") | |
| out_file.write(f"{dictionary[key]}") | |
| out_file.write("}\n") | |
| def list_files_in_folder(folder_path): | |
| try: | |
| # List all files in the specified folder | |
| files = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))] | |
| return files | |
| except FileNotFoundError: | |
| print(f"The folder '{folder_path}' does not exist.") | |
| return [] | |
| except Exception as e: | |
| print(f"An error occurred: {e}") | |
| return [] | |
| repo_url = 'https://github.com/riscv/riscv-opcodes' | |
| clone_repo(repo_url) | |
| # Extentions to do analysis on | |
| relevant_exts = [ | |
| 'rv_a', | |
| 'rv_c', | |
| 'rv_c_d', | |
| 'rv_d', | |
| 'rv_f', | |
| 'rv_i', | |
| 'rv_m', | |
| 'rv32_c', | |
| 'rv32_c_f', | |
| 'rv32_i', | |
| 'rv64_a', | |
| 'rv64_c', | |
| 'rv64_d', | |
| 'rv64_f', | |
| 'rv64_i', | |
| 'rv64_m' | |
| ] | |
| subdir = 'riscv-opcodes/extensions' # Replace with the path to your subdirectory | |
| output_file = 'generated/relevant-exts-raw.txt' # Replace with the desired output file name | |
| print_file_contents_to_output(subdir, relevant_exts, output_file) | |
| relevant_instructions = load_lines_ignoring_comments(subdir, relevant_exts) | |
| relevant_encoding_points = get_encoding_points(relevant_instructions) | |
| relevant_code_points_used = sum(relevant_encoding_points.values()) | |
| # Save the encoding points to file | |
| code_point_file = "generated/relevant_code_points" | |
| save_dict_csv(code_point_file+".csv", relevant_encoding_points, "opcode", "points") | |
| save_dict_latex(code_point_file+".tex", {key: value for key, value in relevant_encoding_points.items() if not re.search(r'[.\d]', key)}, "codePoints", "") | |
| unnecessary_instrs = [ | |
| # sw | |
| 'sb', | |
| 'sh', | |
| 'sd', | |
| 'fsd', | |
| 'fsw', | |
| 'sc.w', | |
| 'sc.d', | |
| # c.sw | |
| 'c.sd', | |
| 'c.fsd', | |
| 'c.fsw', | |
| # c.swsp | |
| 'c.fsdsp', | |
| 'c.fswsp', | |
| 'c.sdsp', | |
| # add | |
| 'fadd.d', | |
| 'fadd.s', | |
| 'addw', | |
| # addi | |
| 'addiw', | |
| # c.add | |
| 'c.addw', | |
| # c.addi | |
| 'c.addiw', | |
| # sub | |
| 'subw', | |
| 'fsub.d', | |
| 'fsub.s', | |
| # c.sub | |
| 'c.subw', | |
| # mul | |
| 'fmul.d', | |
| 'fmul.s', | |
| 'mulw', | |
| # mulh | |
| 'mulhsu', | |
| 'mulhu', | |
| # div | |
| 'divu', | |
| 'fdiv.d', | |
| 'fdiv.s', | |
| 'divw', | |
| 'divuw', | |
| # rem | |
| 'remu', | |
| 'remw', | |
| 'remuw', | |
| # srl | |
| 'sra', | |
| 'srlw', | |
| 'sraw', | |
| # srli | |
| 'srai', | |
| 'srliw', | |
| 'sraiw', | |
| # c.srli | |
| 'c.srai', | |
| # sll | |
| 'sllw', | |
| # slli | |
| 'slliw', | |
| # slti | |
| 'sltiu', | |
| # slt | |
| 'sltu', | |
| 'flt.s', | |
| 'flt.d', | |
| # blt | |
| 'bltu', | |
| # bge | |
| 'bgeu', | |
| # *.d | |
| 'feq.s', | |
| 'fle.s', | |
| 'fmadd.s', | |
| 'fmsub.s', | |
| 'fnmsub.s', | |
| 'fnmadd.s', | |
| 'fsqrt.s', | |
| 'fsgnj.s', | |
| 'fsgnjn.s', | |
| 'fsgnjx.s', | |
| 'fmin.s', | |
| 'fmax.s', | |
| 'fclass.s', | |
| 'amoswap.w', | |
| 'amoadd.w', | |
| 'amoxor.w', | |
| 'amoand.w', | |
| 'amoor.w', | |
| # amomin.w | |
| 'amominu.w', | |
| 'amomin.d', | |
| 'amominu.d', | |
| # amomax.w | |
| 'amomaxu.w', | |
| 'amomax.d', | |
| 'amomaxu.d', | |
| # fcvt.d.w | |
| 'fcvt.s.w', | |
| # fcvt.d.wu | |
| 'fcvt.s.wu', | |
| # fcvt.d.l | |
| 'fcvt.s.l', | |
| # fcvt.d.lu | |
| 'fcvt.s.lu', | |
| # fcvt.d.s | |
| 'fcvt.w.s', | |
| 'fcvt.wu.s', | |
| 'fcvt.l.s', | |
| 'fcvt.lu.s', | |
| # fcvt.s.d | |
| 'fcvt.w.d', | |
| 'fcvt.wu.d', | |
| 'fcvt.l.d', | |
| 'fcvt.lu.d', | |
| #Unnecessary | |
| 'fmv.x.w', | |
| 'fmv.w.x', | |
| 'fmv.d.x', | |
| 'fmv.x.d', | |
| ] | |
| def asset_no_duplicates(strings): | |
| # Use a set to track unique strings | |
| unique_strings = set() | |
| for string in strings: | |
| if string in unique_strings: | |
| print(f"(ERROR) found duplicates: {string}") | |
| exit() | |
| unique_strings.add(string) | |
| return False # No duplicates found | |
| asset_no_duplicates(unnecessary_instrs) | |
| # Ensure excluded ops exist (no typos) | |
| for op in unnecessary_instrs: | |
| if not op in relevant_encoding_points.keys(): | |
| print(f"Wrong exclusion opcode: '{op}'") | |
| exit() | |
| unnecessary_instrs_points = {key: relevant_encoding_points[key] for key in unnecessary_instrs if key in relevant_encoding_points} | |
| excluded_code_points_used = sum(unnecessary_instrs_points.values()) | |
| all_extensions = list_files_in_folder(subdir) | |
| all_instructions = load_lines_ignoring_comments(subdir, all_extensions) | |
| all_encoding_points = get_encoding_points(all_instructions) | |
| all_code_points_used = sum(all_encoding_points.values()) | |
| added_instructions = [ | |
| "tag_spill imm12hi rs1 rs2 imm12lo", # from 'sw' | |
| "tag_reload rd rs1 imm12", # from 'lw' | |
| "load_double_unsigned rd rs1 imm12", # from 'lwu' | |
| "bit_cast_int8 rd", | |
| "bit_cast_int16 rd", | |
| "bit_cast_int32 rd", | |
| "bit_cast_int64 rd", | |
| "bit_cast_uint8 rd", | |
| "bit_cast_uint16 rd", | |
| "bit_cast_uint32 rd", | |
| "bit_cast_uint64 rd", | |
| "bit_cast_sfloat rd", | |
| "bit_cast_dfloat rd", | |
| ] | |
| added_encoding_points = get_encoding_points(added_instructions) | |
| added_code_points_used = sum(added_encoding_points.values()) | |
| stats = { | |
| "opcodesAll": len(all_encoding_points), | |
| "opcodesRelevant": len(relevant_encoding_points), | |
| "opcodesExcluded": len(unnecessary_instrs_points), | |
| "opcodesAdded": len(added_encoding_points), | |
| "pointsAll": all_code_points_used, | |
| "pointsRelevant": relevant_code_points_used, | |
| "pointsExcluded": excluded_code_points_used, | |
| "pointsAdded": added_code_points_used | |
| } | |
| save_dict_csv("generated/stats.csv", stats, "stat", "value") | |
| save_dict_latex("generated/stats.tex", stats, "", "") | |
| if with_debug: | |
| print(relevant_encoding_points) | |
| print("Opcodes: ", len(relevant_encoding_points)) | |
| print("Encoding points sum: ", relevant_code_points_used) | |
| print(unnecessary_instrs_points) | |
| print("Excluded: ", len(unnecessary_instrs_points)) | |
| print("Excluded points sum: ", excluded_code_points_used) | |
| print("Removed %: ", (excluded_code_points_used/relevant_code_points_used)*100) | |
| print(added_encoding_points) | |
| print("Added: ", len(added_encoding_points)) | |
| print("Added points sum: ", added_code_points_used) | |
| print("Added %: ", (added_code_points_used/relevant_code_points_used)*100) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment