Skip to content

Instantly share code, notes, and snippets.

View cyyself's full-sized avatar

Yangyu Chen cyyself

View GitHub Profile

A function clone may prevent GCC from inlining a callee into the caller.

I noticed this when testing target_clones features for performance tuning.

Tested GCC version: GCC 14.2 and GCC master commit 1de156eb2bb445cd0e0a582944dcd75d085f30c9 on both x86-64 and RISC-V target.

A example is shown below:

git clone https://github.com/eembc/coremark.git
/* Types for recording extension to RISC-V C-API bitmask. */
struct riscv_ext_bitmask_table_t {
const char *ext;
int groupid;
int bit_position;
};
static const riscv_ext_bitmask_table_t riscv_ext_bitmask_table[] =
{
{"a", 0, 0},

good:

__attribute__((target("default")))
int foo(int *arr) {
    int sum = 0;
    for (int i=0;i<16;i++) sum += arr[i];
    return sum;
}
cycles:u -> r22:u
instructions:u -> r4d:u
#include <stdio.h>
char __attribute__ ((noinline)) foo() {
return -1;
}
int main() {
char ret;
ret = foo();
LLVM: 66e03eda42e1cd6d8e89d13876a44852815efb71
XiangShan: 136f64975e337409416bed849b58026e7085d758
Verilator: 0a9b31bb30fd604ff68854290cc5cc90e7dfd172
QEMU: ddcfe49ed38ebd2a659c53f9dcecf7e33899b29d
PGO: -fprofile-instr-generate (on X86 Host)
rv64gc_zba_zbb_zbc_zbc_zicond:
cpu 0 insns: 1741983881730
Branch: 103964378641
Br Taken: 93664761377
LLVM: 62fe12a3dffa5cacc8895207240c6cd542c174b1
Workload: Verilator(0a9b31bb3) NutShell(52aeba575) Microbench --no-diff
rv64gcv_zba_zbb_zbc_zbs_zicond_zicldst:
Branch: 1530928469
Branch taken: 1064371731
Cload count: 63035711
Cstore count: 16636513
Cload taken count: 3017039
#!/usr/bin/env python3
import sys
import os
itb_path = os.path.realpath(sys.argv[1])
ext_dir = os.path.realpath(sys.argv[2])
# Extract the ITB
if os.system(f"dtc -I dtb -O dts -o {ext_dir}/info.its {itb_path}") == 0:
#!/usr/bin/env python3
# Usage: python3 patcher.py <dts_file> > <patched_dts_file>
def patch_dt(orig_dts_string):
out = []
# Patch status
usb_patched = False
ethernet_patched = False
# dts scan status
orig_dts_lines = orig_dts_string.splitlines()
diff --git a/target/linux/mediatek/dts/mt7981b-cmcc-rax3000m.dts b/target/linux/mediatek/dts/mt7981b-cmcc-rax3000m.dts
index 977a613333..2ea0715f79 100644
--- a/target/linux/mediatek/dts/mt7981b-cmcc-rax3000m.dts
+++ b/target/linux/mediatek/dts/mt7981b-cmcc-rax3000m.dts
@@ -89,8 +89,8 @@
gmac1: mac@1 {
compatible = "mediatek,eth-mac";
reg = <1>;
- phy-mode = "gmii";
- phy-handle = <&int_gbe_phy>;