Emoun · September 2, 2025 21:39
diff --git a/risc-v-encoding-use.py b/risc-v-encoding-use.py
 import os
 import subprocess
 import sys
 import re

 with_debug = False
 if len(sys.argv) > 1:
    if sys.argv[1] == "debug":
        with_debug = True

 # Check if the risc-v opcodes repo is cloned, if not, do so
 def clone_repo(repo_url):
    repo_name = repo_url.split('/')[-1].replace('.git', '')
    repo_path = os.path.join(os.getcwd(), repo_name)
    
    if os.path.exists(repo_path):
        if with_debug:
            print(f"Repository '{repo_name}' is already cloned.")
    else:
        if with_debug:
            print(f"Cloning repository '{repo_name}'...")
        subprocess.run(['git', 'clone', repo_url])
        if with_debug:
            print(f"Repository '{repo_name}' cloned successfully.")

 # Print the contents of the selected extension files, so that it can be used as a reference
 def print_file_contents_to_output(subdir, file_names, output_file):
    out_dir_name = os.path.dirname(output_file)
    if not os.path.exists(out_dir_name):
        os.makedirs(out_dir_name)
    with open(output_file, 'w') as out_file:
        for file_name in file_names:
            file_path = os.path.join(subdir, file_name)
            if os.path.exists(file_path):
                with open(file_path, 'r') as file:
                    out_file.write(f"Contents of {file_name}:\n")
                    out_file.write(file.read())
                    out_file.write("\n" + "-"*40 + "\n")
            else:
                out_file.write(f"File '{file_name}' does not exist in the subdirectory '{subdir}'.\n")

 def load_lines_ignoring_comments(subdir, file_names):
    lines = []
    for file_name in file_names:
        file_path = os.path.join(subdir, file_name)
        if os.path.exists(file_path):
            with open(file_path, 'r') as file:
                for line in file:
                    stripped_line = line.strip()
                    if stripped_line and not stripped_line.startswith('#') and not stripped_line.startswith('$pseudo_op'):
                        lines.append(line.strip())
        else:
            print(f"File '{file_name}' does not exist in the subdirectory '{subdir}'.")
    return lines
    
 def load_instruct_field_sizes(file_name):
    field_sizes={}
    if os.path.exists(file_name):
        with open(file_name, 'r') as file:
            for line in file:
                # Remove the quotes and split the string by comma
                parts = line.replace('"', '').split(',')
                
                # Extract the first name and the two numbers
                name = parts[0]
                hi_bit = int(parts[1])
                lo_bit = int(parts[2])
                
                field_sizes[name] = hi_bit - lo_bit + 1
        return field_sizes
    else:
        print(f"Field size file does not exist: '{file_name}'")
        exit()
        
 def get_encoding_points(instructions):
    field_sizes = load_instruct_field_sizes("riscv-opcodes/arg_lut.csv")
    free_instruction_bits = {}
    for instr in instructions:
        split = instr.split()
        opcode = split[0]
        free_instruction_bits[opcode] = 0
        for field in split[1:]:
            if field in field_sizes:
                free_instruction_bits[opcode] += field_sizes[field]

    # Calculate how many encoding points each instructions uses
    encoding_points = {}
    for opcode in free_instruction_bits.keys():
        free_bits = free_instruction_bits[opcode]
        if opcode.startswith("c."):
            # Compressed instructions essentially "waste" the second part of a 32-bit instruction
            free_bits += 16
        
        encoding_points[opcode] = pow(2, free_bits)
        
    return encoding_points

 def save_dict_csv(out_file_path, dictionary, key_name, value_name):
    out_file_dir = os.path.dirname(out_file_path)
    if not os.path.exists(out_file_dir):
        os.makedirs(out_file_dir)
    with open(out_file_path, 'w') as out_file:
        out_file.write(f"{key_name},{value_name}\n")
        for key in dictionary.keys():
            out_file.write(f"{key},{dictionary[key]}\n")
            
 def save_dict_latex(out_file_path, dictionary, prefix, postfix):
    out_file_dir = os.path.dirname(out_file_path)
    if not os.path.exists(out_file_dir):
        os.makedirs(out_file_dir)
    with open(out_file_path, 'w') as out_file:
        for key in dictionary.keys():
            out_file.write("\\newcommand{")
            out_file.write(f"\\{prefix}{key}{postfix}")
            out_file.write("}{")
            out_file.write(f"{dictionary[key]}")
            out_file.write("}\n")

 def list_files_in_folder(folder_path):
    try:
        # List all files in the specified folder
        files = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]
        return files
    except FileNotFoundError:
        print(f"The folder '{folder_path}' does not exist.")
        return []
    except Exception as e:
        print(f"An error occurred: {e}")
        return []

 repo_url = 'https://github.com/riscv/riscv-opcodes'
 clone_repo(repo_url)

 # Extentions to do analysis on
 relevant_exts = [
    'rv_a',
    'rv_c',
    'rv_c_d',
    'rv_d',
    'rv_f',
    'rv_i',
    'rv_m',
    'rv32_c',
    'rv32_c_f',
    'rv32_i',
    
    'rv64_a',
    'rv64_c',
    'rv64_d',
    'rv64_f',
    'rv64_i',
    'rv64_m'
 ]

 subdir = 'riscv-opcodes/extensions'  # Replace with the path to your subdirectory
 output_file = 'generated/relevant-exts-raw.txt'  # Replace with the desired output file name
 print_file_contents_to_output(subdir, relevant_exts, output_file)

 relevant_instructions = load_lines_ignoring_comments(subdir, relevant_exts)
 relevant_encoding_points = get_encoding_points(relevant_instructions)
 relevant_code_points_used = sum(relevant_encoding_points.values())

 # Save the encoding points to file
 code_point_file = "generated/relevant_code_points"
 save_dict_csv(code_point_file+".csv", relevant_encoding_points, "opcode", "points")
 save_dict_latex(code_point_file+".tex", {key: value for key, value in relevant_encoding_points.items() if not re.search(r'[.\d]', key)}, "codePoints", "")

 unnecessary_instrs = [
    
    # sw 
    'sb',
    'sh',
    'sd',
    'fsd',
    'fsw',
    'sc.w',
    'sc.d',
    
    # c.sw
    'c.sd',
    'c.fsd',
    'c.fsw',
    
    # c.swsp
    'c.fsdsp', 
    'c.fswsp',
    'c.sdsp',
    
    # add
    'fadd.d', 
    'fadd.s',
    'addw',
    
    # addi
    'addiw',
    
    # c.add
    'c.addw',
    
    # c.addi
    'c.addiw',
    
    # sub
    'subw',
    'fsub.d',
    'fsub.s',
    
    # c.sub
    'c.subw',
    
    # mul
    'fmul.d',
    'fmul.s',
    'mulw',
    
    # mulh
    'mulhsu',
    'mulhu',
    
    # div
    'divu',
    'fdiv.d',
    'fdiv.s',
    'divw',
    'divuw',
    
    # rem
    'remu',
    'remw',
    'remuw',
    
    # srl
    'sra',
    'srlw',
    'sraw',
    
    # srli
    'srai',
    'srliw',
    'sraiw',
    
    # c.srli
    'c.srai',
    
    # sll
    'sllw',
    
    # slli
    'slliw',
    
    # slti
    'sltiu',
    
    # slt
    'sltu',
    'flt.s',
    'flt.d',
        
    # blt
    'bltu',
    
    # bge
    'bgeu',
    
    # *.d
    'feq.s',
    'fle.s',
    'fmadd.s',
    'fmsub.s',
    'fnmsub.s',
    'fnmadd.s',
    'fsqrt.s',
    'fsgnj.s',
    'fsgnjn.s',
    'fsgnjx.s',
    'fmin.s',
    'fmax.s',
    'fclass.s',
    'amoswap.w',
    'amoadd.w',
    'amoxor.w',
    'amoand.w',
    'amoor.w',
    
    # amomin.w
    'amominu.w', 
    'amomin.d',
    'amominu.d',
    
    # amomax.w
    'amomaxu.w',
    'amomax.d',
    'amomaxu.d',
        
    # fcvt.d.w
    'fcvt.s.w',
    
    # fcvt.d.wu
    'fcvt.s.wu',
    
    # fcvt.d.l
    'fcvt.s.l',
    
    # fcvt.d.lu
    'fcvt.s.lu',
    
    # fcvt.d.s
    'fcvt.w.s',
    'fcvt.wu.s',
    'fcvt.l.s',
    'fcvt.lu.s',
    
    # fcvt.s.d
    'fcvt.w.d',
    'fcvt.wu.d',
    'fcvt.l.d',
    'fcvt.lu.d',
    
    #Unnecessary
    'fmv.x.w',
    'fmv.w.x',
    'fmv.d.x',
    'fmv.x.d',
 ]

 def asset_no_duplicates(strings):
    # Use a set to track unique strings
    unique_strings = set()
    
    for string in strings:
        if string in unique_strings:
            print(f"(ERROR) found duplicates: {string}")
            exit()
        unique_strings.add(string)
    
    return False  # No duplicates found
    
 asset_no_duplicates(unnecessary_instrs)

 # Ensure excluded ops exist (no typos)
 for op in unnecessary_instrs:
    if not op in relevant_encoding_points.keys():
        print(f"Wrong exclusion opcode: '{op}'")
        exit()

 unnecessary_instrs_points = {key: relevant_encoding_points[key] for key in unnecessary_instrs if key in relevant_encoding_points}
 excluded_code_points_used = sum(unnecessary_instrs_points.values())

 all_extensions = list_files_in_folder(subdir)
 all_instructions = load_lines_ignoring_comments(subdir, all_extensions)
 all_encoding_points = get_encoding_points(all_instructions)
 all_code_points_used = sum(all_encoding_points.values())

 added_instructions = [
    "tag_spill imm12hi rs1 rs2 imm12lo", # from 'sw'
    "tag_reload rd rs1       imm12", # from 'lw'
    
    "load_double_unsigned     rd rs1       imm12", # from 'lwu'
    
    "bit_cast_int8 rd",
    "bit_cast_int16 rd",
    "bit_cast_int32 rd",
    "bit_cast_int64 rd",
    "bit_cast_uint8 rd",
    "bit_cast_uint16 rd",
    "bit_cast_uint32 rd",
    "bit_cast_uint64 rd",
    "bit_cast_sfloat rd",
    "bit_cast_dfloat rd",
 ]

 added_encoding_points = get_encoding_points(added_instructions)
 added_code_points_used = sum(added_encoding_points.values())


 stats = {
    "opcodesAll": len(all_encoding_points),
    "opcodesRelevant": len(relevant_encoding_points),
    "opcodesExcluded": len(unnecessary_instrs_points),
    "opcodesAdded": len(added_encoding_points),
    "pointsAll": all_code_points_used,
    "pointsRelevant": relevant_code_points_used,
    "pointsExcluded": excluded_code_points_used,
    "pointsAdded": added_code_points_used
 }
 save_dict_csv("generated/stats.csv", stats, "stat", "value")
 save_dict_latex("generated/stats.tex", stats, "", "")

 if with_debug:
    print(relevant_encoding_points)
    print("Opcodes: ", len(relevant_encoding_points))
    print("Encoding points sum: ", relevant_code_points_used)

    print(unnecessary_instrs_points)
    print("Excluded: ", len(unnecessary_instrs_points))
    print("Excluded points sum: ", excluded_code_points_used)
    print("Removed %: ", (excluded_code_points_used/relevant_code_points_used)*100)

    print(added_encoding_points)
    print("Added: ", len(added_encoding_points))
    print("Added points sum: ", added_code_points_used)
    print("Added %: ", (added_code_points_used/relevant_code_points_used)*100)
	import os
	import subprocess
	import sys
	import re

	with_debug = False
	if len(sys.argv) > 1:
	if sys.argv[1] == "debug":
	with_debug = True

	# Check if the risc-v opcodes repo is cloned, if not, do so
	def clone_repo(repo_url):
	repo_name = repo_url.split('/')[-1].replace('.git', '')
	repo_path = os.path.join(os.getcwd(), repo_name)

	if os.path.exists(repo_path):
	if with_debug:
	print(f"Repository '{repo_name}' is already cloned.")
	else:
	if with_debug:
	print(f"Cloning repository '{repo_name}'...")
	subprocess.run(['git', 'clone', repo_url])
	if with_debug:
	print(f"Repository '{repo_name}' cloned successfully.")

	# Print the contents of the selected extension files, so that it can be used as a reference
	def print_file_contents_to_output(subdir, file_names, output_file):
	out_dir_name = os.path.dirname(output_file)
	if not os.path.exists(out_dir_name):
	os.makedirs(out_dir_name)
	with open(output_file, 'w') as out_file:
	for file_name in file_names:
	file_path = os.path.join(subdir, file_name)
	if os.path.exists(file_path):
	with open(file_path, 'r') as file:
	out_file.write(f"Contents of {file_name}:\n")
	out_file.write(file.read())
	out_file.write("\n" + "-"*40 + "\n")
	else:
	out_file.write(f"File '{file_name}' does not exist in the subdirectory '{subdir}'.\n")

	def load_lines_ignoring_comments(subdir, file_names):
	lines = []
	for file_name in file_names:
	file_path = os.path.join(subdir, file_name)
	if os.path.exists(file_path):
	with open(file_path, 'r') as file:
	for line in file:
	stripped_line = line.strip()
	if stripped_line and not stripped_line.startswith('#') and not stripped_line.startswith('$pseudo_op'):
	lines.append(line.strip())
	else:
	print(f"File '{file_name}' does not exist in the subdirectory '{subdir}'.")
	return lines

	def load_instruct_field_sizes(file_name):
	field_sizes={}
	if os.path.exists(file_name):
	with open(file_name, 'r') as file:
	for line in file:
	# Remove the quotes and split the string by comma
	parts = line.replace('"', '').split(',')

	# Extract the first name and the two numbers
	name = parts[0]
	hi_bit = int(parts[1])
	lo_bit = int(parts[2])

	field_sizes[name] = hi_bit - lo_bit + 1
	return field_sizes
	else:
	print(f"Field size file does not exist: '{file_name}'")
	exit()

	def get_encoding_points(instructions):
	field_sizes = load_instruct_field_sizes("riscv-opcodes/arg_lut.csv")
	free_instruction_bits = {}
	for instr in instructions:
	split = instr.split()
	opcode = split[0]
	free_instruction_bits[opcode] = 0
	for field in split[1:]:
	if field in field_sizes:
	free_instruction_bits[opcode] += field_sizes[field]

	# Calculate how many encoding points each instructions uses
	encoding_points = {}
	for opcode in free_instruction_bits.keys():
	free_bits = free_instruction_bits[opcode]
	if opcode.startswith("c."):
	# Compressed instructions essentially "waste" the second part of a 32-bit instruction
	free_bits += 16

	encoding_points[opcode] = pow(2, free_bits)

	return encoding_points

	def save_dict_csv(out_file_path, dictionary, key_name, value_name):
	out_file_dir = os.path.dirname(out_file_path)
	if not os.path.exists(out_file_dir):
	os.makedirs(out_file_dir)
	with open(out_file_path, 'w') as out_file:
	out_file.write(f"{key_name},{value_name}\n")
	for key in dictionary.keys():
	out_file.write(f"{key},{dictionary[key]}\n")

	def save_dict_latex(out_file_path, dictionary, prefix, postfix):
	out_file_dir = os.path.dirname(out_file_path)
	if not os.path.exists(out_file_dir):
	os.makedirs(out_file_dir)
	with open(out_file_path, 'w') as out_file:
	for key in dictionary.keys():
	out_file.write("\\newcommand{")
	out_file.write(f"\\{prefix}{key}{postfix}")
	out_file.write("}{")
	out_file.write(f"{dictionary[key]}")
	out_file.write("}\n")

	def list_files_in_folder(folder_path):
	try:
	# List all files in the specified folder
	files = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]
	return files
	except FileNotFoundError:
	print(f"The folder '{folder_path}' does not exist.")
	return []
	except Exception as e:
	print(f"An error occurred: {e}")
	return []

	repo_url = 'https://github.com/riscv/riscv-opcodes'
	clone_repo(repo_url)

	# Extentions to do analysis on
	relevant_exts = [
	'rv_a',
	'rv_c',
	'rv_c_d',
	'rv_d',
	'rv_f',
	'rv_i',
	'rv_m',
	'rv32_c',
	'rv32_c_f',
	'rv32_i',

	'rv64_a',
	'rv64_c',
	'rv64_d',
	'rv64_f',
	'rv64_i',
	'rv64_m'
	]

	subdir = 'riscv-opcodes/extensions' # Replace with the path to your subdirectory
	output_file = 'generated/relevant-exts-raw.txt' # Replace with the desired output file name
	print_file_contents_to_output(subdir, relevant_exts, output_file)

	relevant_instructions = load_lines_ignoring_comments(subdir, relevant_exts)
	relevant_encoding_points = get_encoding_points(relevant_instructions)
	relevant_code_points_used = sum(relevant_encoding_points.values())

	# Save the encoding points to file
	code_point_file = "generated/relevant_code_points"
	save_dict_csv(code_point_file+".csv", relevant_encoding_points, "opcode", "points")
	save_dict_latex(code_point_file+".tex", {key: value for key, value in relevant_encoding_points.items() if not re.search(r'[.\d]', key)}, "codePoints", "")

	unnecessary_instrs = [

	# sw
	'sb',
	'sh',
	'sd',
	'fsd',
	'fsw',
	'sc.w',
	'sc.d',

	# c.sw
	'c.sd',
	'c.fsd',
	'c.fsw',

	# c.swsp
	'c.fsdsp',
	'c.fswsp',
	'c.sdsp',

	# add
	'fadd.d',
	'fadd.s',
	'addw',

	# addi
	'addiw',

	# c.add
	'c.addw',

	# c.addi
	'c.addiw',

	# sub
	'subw',
	'fsub.d',
	'fsub.s',

	# c.sub
	'c.subw',

	# mul
	'fmul.d',
	'fmul.s',
	'mulw',

	# mulh
	'mulhsu',
	'mulhu',

	# div
	'divu',
	'fdiv.d',
	'fdiv.s',
	'divw',
	'divuw',

	# rem
	'remu',
	'remw',
	'remuw',

	# srl
	'sra',
	'srlw',
	'sraw',

	# srli
	'srai',
	'srliw',
	'sraiw',

	# c.srli
	'c.srai',

	# sll
	'sllw',

	# slli
	'slliw',

	# slti
	'sltiu',

	# slt
	'sltu',
	'flt.s',
	'flt.d',

	# blt
	'bltu',

	# bge
	'bgeu',

	# *.d
	'feq.s',
	'fle.s',
	'fmadd.s',
	'fmsub.s',
	'fnmsub.s',
	'fnmadd.s',
	'fsqrt.s',
	'fsgnj.s',
	'fsgnjn.s',
	'fsgnjx.s',
	'fmin.s',
	'fmax.s',
	'fclass.s',
	'amoswap.w',
	'amoadd.w',
	'amoxor.w',
	'amoand.w',
	'amoor.w',

	# amomin.w
	'amominu.w',
	'amomin.d',
	'amominu.d',

	# amomax.w
	'amomaxu.w',
	'amomax.d',
	'amomaxu.d',

	# fcvt.d.w
	'fcvt.s.w',

	# fcvt.d.wu
	'fcvt.s.wu',

	# fcvt.d.l
	'fcvt.s.l',

	# fcvt.d.lu
	'fcvt.s.lu',

	# fcvt.d.s
	'fcvt.w.s',
	'fcvt.wu.s',
	'fcvt.l.s',
	'fcvt.lu.s',

	# fcvt.s.d
	'fcvt.w.d',
	'fcvt.wu.d',
	'fcvt.l.d',
	'fcvt.lu.d',

	#Unnecessary
	'fmv.x.w',
	'fmv.w.x',
	'fmv.d.x',
	'fmv.x.d',
	]

	def asset_no_duplicates(strings):
	# Use a set to track unique strings
	unique_strings = set()

	for string in strings:
	if string in unique_strings:
	print(f"(ERROR) found duplicates: {string}")
	exit()
	unique_strings.add(string)

	return False # No duplicates found

	asset_no_duplicates(unnecessary_instrs)

	# Ensure excluded ops exist (no typos)
	for op in unnecessary_instrs:
	if not op in relevant_encoding_points.keys():
	print(f"Wrong exclusion opcode: '{op}'")
	exit()

	unnecessary_instrs_points = {key: relevant_encoding_points[key] for key in unnecessary_instrs if key in relevant_encoding_points}
	excluded_code_points_used = sum(unnecessary_instrs_points.values())

	all_extensions = list_files_in_folder(subdir)
	all_instructions = load_lines_ignoring_comments(subdir, all_extensions)
	all_encoding_points = get_encoding_points(all_instructions)
	all_code_points_used = sum(all_encoding_points.values())

	added_instructions = [
	"tag_spill imm12hi rs1 rs2 imm12lo", # from 'sw'
	"tag_reload rd rs1 imm12", # from 'lw'

	"load_double_unsigned rd rs1 imm12", # from 'lwu'

	"bit_cast_int8 rd",
	"bit_cast_int16 rd",
	"bit_cast_int32 rd",
	"bit_cast_int64 rd",
	"bit_cast_uint8 rd",
	"bit_cast_uint16 rd",
	"bit_cast_uint32 rd",
	"bit_cast_uint64 rd",
	"bit_cast_sfloat rd",
	"bit_cast_dfloat rd",
	]

	added_encoding_points = get_encoding_points(added_instructions)
	added_code_points_used = sum(added_encoding_points.values())


	stats = {
	"opcodesAll": len(all_encoding_points),
	"opcodesRelevant": len(relevant_encoding_points),
	"opcodesExcluded": len(unnecessary_instrs_points),
	"opcodesAdded": len(added_encoding_points),
	"pointsAll": all_code_points_used,
	"pointsRelevant": relevant_code_points_used,
	"pointsExcluded": excluded_code_points_used,
	"pointsAdded": added_code_points_used
	}
	save_dict_csv("generated/stats.csv", stats, "stat", "value")
	save_dict_latex("generated/stats.tex", stats, "", "")

	if with_debug:
	print(relevant_encoding_points)
	print("Opcodes: ", len(relevant_encoding_points))
	print("Encoding points sum: ", relevant_code_points_used)

	print(unnecessary_instrs_points)
	print("Excluded: ", len(unnecessary_instrs_points))
	print("Excluded points sum: ", excluded_code_points_used)
	print("Removed %: ", (excluded_code_points_used/relevant_code_points_used)*100)

	print(added_encoding_points)
	print("Added: ", len(added_encoding_points))
	print("Added points sum: ", added_code_points_used)
	print("Added %: ", (added_code_points_used/relevant_code_points_used)*100)
No results found