Created
October 25, 2023 11:13
-
-
Save foxtran/b7fedfbb0bd036629448ce62d18bd7a6 to your computer and use it in GitHub Desktop.
Original is taken from https://github.com/google/autofdo/blob/master/docs/OptimizeClangO3WithPropeller.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
## This script does the following: | |
## 1. It checks out and builds trunk LLVM. | |
## 2. It checks out and builds the create_llvm_prof tool. | |
## 3. It builds multiple clang binaries towards building a | |
## propeller optimized clang binary. | |
## 4. It runs performance comparisons of a baseline clang | |
## binary and the Propeller optimized clang binary. | |
## To run this script please set BASE_PROPELLER_CLANG_DIR and run: | |
## sh propeller_optimize_clang.sh | |
## The propeller optimized clang binary will be in: | |
## ${BASE_PROPELLER_CLANG_DIR}/propeller_build/bin/clang | |
set -eu | |
# Set this path and run the script. | |
BASE_PROPELLER_CLANG_DIR="$(cd $(dirname $0); pwd)"/propeller_optimize_clang.dir | |
if [[ -z "${BASE_PROPELLER_CLANG_DIR}" ]]; then | |
echo "Please change this script to set variable BASE_PROPELLER_CLANG_DIR to an empty directory." | |
exit 1 | |
fi | |
mkdir -p "${BASE_PROPELLER_CLANG_DIR}" | |
PATH_TO_LLVM_SOURCES=${BASE_PROPELLER_CLANG_DIR}/sources | |
PATH_TO_TRUNK_LLVM_BUILD=${BASE_PROPELLER_CLANG_DIR}/trunk_llvm_build | |
PATH_TO_TRUNK_LLVM_INSTALL=${BASE_PROPELLER_CLANG_DIR}/trunk_llvm_install | |
# Build Trunk LLVM | |
mkdir -p ${PATH_TO_LLVM_SOURCES} && cd ${PATH_TO_LLVM_SOURCES} | |
git clone -b release/17.x --single-branch https://github.com/llvm/llvm-project.git | |
mkdir -p ${PATH_TO_TRUNK_LLVM_BUILD} && cd ${PATH_TO_TRUNK_LLVM_BUILD} | |
cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -DLLVM_TARGETS_TO_BUILD=X86 \ | |
-DCMAKE_INSTALL_PREFIX="${PATH_TO_TRUNK_LLVM_INSTALL}" \ | |
-DLLVM_ENABLE_RTTI=On -DLLVM_INCLUDE_TESTS=Off \ | |
-DLLVM_ENABLE_PROJECTS="clang;lld" ${PATH_TO_LLVM_SOURCES}/llvm-project/llvm | |
numactl -C 0-75 ninja -j76 install | |
#Build create_llvm_prof | |
PATH_TO_CREATE_LLVM_PROF=${BASE_PROPELLER_CLANG_DIR}/create_llvm_prof_build | |
mkdir -p ${PATH_TO_CREATE_LLVM_PROF} && cd ${PATH_TO_CREATE_LLVM_PROF} | |
git clone --recursive https://github.com/google/autofdo.git | |
mkdir build && cd build | |
cmake -G Ninja -DCMAKE_INSTALL_PREFIX="." \ | |
-DCMAKE_C_COMPILER="${PATH_TO_TRUNK_LLVM_INSTALL}/bin/clang" \ | |
-DCMAKE_CXX_COMPILER="${PATH_TO_TRUNK_LLVM_INSTALL}/bin/clang++" \ | |
-DLLVM_PATH="${PATH_TO_TRUNK_LLVM_INSTALL}" ../autofdo/ | |
numactl -C 0-75 ninja -j76 | |
ls create_llvm_prof | |
# Common CMAKE Flags | |
COMMON_CMAKE_FLAGS=( | |
"-DLLVM_OPTIMIZED_TABLEGEN=On" | |
"-DCMAKE_BUILD_TYPE=Release" | |
"-DLLVM_TARGETS_TO_BUILD=X86" | |
"-DLLVM_ENABLE_PROJECTS=clang" | |
"-DCMAKE_C_COMPILER=${PATH_TO_TRUNK_LLVM_BUILD}/bin/clang" | |
"-DCMAKE_CXX_COMPILER=${PATH_TO_TRUNK_LLVM_BUILD}/bin/clang++" ) | |
# Additional Baseline CMAKE flags | |
BASELINE_CC_LD_CMAKE_FLAGS=( | |
"-DCMAKE_EXE_LINKER_FLAGS=-fuse-ld=lld" | |
"-DCMAKE_SHARED_LINKER_FLAGS=-fuse-ld=lld" | |
"-DCMAKE_MODULE_LINKER_FLAGS=-fuse-ld=lld" ) | |
# Build Baseline Clang Binary | |
PATH_TO_BASELINE_CLANG_BUILD=${BASE_PROPELLER_CLANG_DIR}/baseline_clang_build | |
mkdir -p ${PATH_TO_BASELINE_CLANG_BUILD} && cd ${PATH_TO_BASELINE_CLANG_BUILD} | |
cmake -G Ninja "${COMMON_CMAKE_FLAGS[@]}" "${BASELINE_CC_LD_CMAKE_FLAGS[@]}" ${PATH_TO_LLVM_SOURCES}/llvm-project/llvm | |
numactl -C 0-75 ninja -j76 clang | |
# Labels CMAKE Flags | |
LABELS_CC_LD_CMAKE_FLAGS=( | |
"-DCMAKE_C_FLAGS=-funique-internal-linkage-names -fbasic-block-sections=labels" | |
"-DCMAKE_CXX_FLAGS=-funique-internal-linkage-names -fbasic-block-sections=labels" | |
"-DCMAKE_EXE_LINKER_FLAGS=-fuse-ld=lld" | |
"-DCMAKE_SHARED_LINKER_FLAGS=-fuse-ld=lld" | |
"-DCMAKE_MODULE_LINKER_FLAGS=-fuse-ld=lld" ) | |
# Build Labels Clang binary | |
PATH_TO_LABELS_CLANG_BUILD=${BASE_PROPELLER_CLANG_DIR}/labels_clang_build | |
mkdir -p ${PATH_TO_LABELS_CLANG_BUILD} && cd ${PATH_TO_LABELS_CLANG_BUILD} | |
cmake -G Ninja "${COMMON_CMAKE_FLAGS[@]}" "${LABELS_CC_LD_CMAKE_FLAGS[@]}" ${PATH_TO_LLVM_SOURCES}/llvm-project/llvm | |
numactl -C 0-75 ninja -j76 clang | |
# Set up Benchmarking and BUILD | |
BENCHMARKING_CLANG_BUILD=${BASE_PROPELLER_CLANG_DIR}/benchmarking_clang_build | |
mkdir -p ${BENCHMARKING_CLANG_BUILD} && cd ${BENCHMARKING_CLANG_BUILD} | |
mkdir -p symlink_to_clang_binary && cd symlink_to_clang_binary | |
CLANG_VERSION=$(sed -Ene 's!^CLANG_EXECUTABLE_VERSION:STRING=(.*)$!\1!p' ${PATH_TO_TRUNK_LLVM_BUILD}/CMakeCache.txt) #' | |
ln -sf ${PATH_TO_LABELS_CLANG_BUILD}/bin/clang-${CLANG_VERSION} clang | |
ln -sf ${PATH_TO_LABELS_CLANG_BUILD}/bin/clang-${CLANG_VERSION} clang++ | |
# Setup cmake for Benchmarking BUILD | |
cd ${BENCHMARKING_CLANG_BUILD} | |
cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -DLLVM_TARGETS_TO_BUILD=X86 -DLLVM_ENABLE_PROJECTS=clang \ | |
-DCMAKE_C_COMPILER=${BENCHMARKING_CLANG_BUILD}/symlink_to_clang_binary/clang \ | |
-DCMAKE_CXX_COMPILER=${BENCHMARKING_CLANG_BUILD}/symlink_to_clang_binary/clang++ \ | |
${PATH_TO_LLVM_SOURCES}/llvm-project/llvm | |
# Profile labels binary, just 10 compilations should do. | |
ninja -t commands | head -100 >& ./perf_commands.sh | |
chmod +x ./perf_commands.sh | |
perf record -e cycles:u -j any,u -- ./perf_commands.sh | |
ls perf.data | |
# Convert profiles using create_llvm_prof | |
cd ${BENCHMARKING_CLANG_BUILD} | |
${PATH_TO_CREATE_LLVM_PROF}/build/create_llvm_prof --format=propeller \ | |
--binary=${PATH_TO_LABELS_CLANG_BUILD}/bin/clang-${CLANG_VERSION} \ | |
--profiled_binary_name=${PATH_TO_LABELS_CLANG_BUILD}/bin/clang-${CLANG_VERSION} \ | |
--profile=perf.data --out=cluster.txt --propeller_symorder=symorder.txt 2>/dev/null 1>/dev/null | |
ls cluster.txt symorder.txt | |
# Set Propeller's CMAKE Flags | |
PROPELLER_CC_LD_CMAKE_FLAGS=( | |
"-DCMAKE_C_FLAGS=-funique-internal-linkage-names -fbasic-block-sections=list=${BENCHMARKING_CLANG_BUILD}/cluster.txt" | |
"-DCMAKE_CXX_FLAGS=-funique-internal-linkage-names -fbasic-block-sections=list=${BENCHMARKING_CLANG_BUILD}/cluster.txt" | |
"-DCMAKE_EXE_LINKER_FLAGS=-Wl,--symbol-ordering-file=${BENCHMARKING_CLANG_BUILD}/symorder.txt -Wl,--no-warn-symbol-ordering -fuse-ld=lld" | |
"-DCMAKE_SHARED_LINKER_FLAGS=-Wl,--symbol-ordering-file=${BENCHMARKING_CLANG_BUILD}/symorder.txt -Wl,--no-warn-symbol-ordering -fuse-ld=lld" | |
"-DCMAKE_MODULE_LINKER_FLAGS=-Wl,--symbol-ordering-file=${BENCHMARKING_CLANG_BUILD}/symorder.txt -Wl,--no-warn-symbol-ordering -fuse-ld=lld" ) | |
# Build Propeller Optimized Clang | |
PATH_TO_PROPELLER_CLANG_BUILD=${BASE_PROPELLER_CLANG_DIR}/propeller_build | |
mkdir -p ${PATH_TO_PROPELLER_CLANG_BUILD} && cd ${PATH_TO_PROPELLER_CLANG_BUILD} | |
cmake -G Ninja "${COMMON_CMAKE_FLAGS[@]}" "${PROPELLER_CC_LD_CMAKE_FLAGS[@]}" ${PATH_TO_LLVM_SOURCES}/llvm-project/llvm | |
numactl -C 0-75 ninja -j76 clang | |
# Run comparison of baseline verus propeller optimized clang | |
cd ${BENCHMARKING_CLANG_BUILD}/symlink_to_clang_binary | |
ln -sf ${PATH_TO_BASELINE_CLANG_BUILD}/bin/clang-${CLANG_VERSION} clang | |
ln -sf ${PATH_TO_BASELINE_CLANG_BUILD}/bin/clang-${CLANG_VERSION} clang++ | |
cd .. | |
ninja clean | |
perf stat -r5 -e instructions,cycles,L1-icache-misses,iTLB-misses -- bash -c "numactl -C 0-75 ninja -j76 clang && ninja clean" | |
cd ${BENCHMARKING_CLANG_BUILD}/symlink_to_clang_binary | |
ln -sf ${PATH_TO_PROPELLER_CLANG_BUILD}/bin/clang-${CLANG_VERSION} clang | |
ln -sf ${PATH_TO_PROPELLER_CLANG_BUILD}/bin/clang-${CLANG_VERSION} clang++ | |
cd .. | |
ninja clean | |
perf stat -r5 -e instructions,cycles,L1-icache-misses,iTLB-misses -- bash -c "numactl -C 0-75 ninja -j76 clang && ninja clean" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
BASELINE (samples=3): | |
Performance counter stats for 'bash -c numactl -C 0-75 ninja -j76 clang && ninja clean' (3 runs): | |
29910755307348 instructions:u # 1.19 insn per cycle ( +- 0.00% ) | |
25108949747468 cycles:u ( +- 0.01% ) | |
1776069921406 L1-icache-misses:u ( +- 0.00% ) | |
7994150086 iTLB-misses:u ( +- 0.01% ) | |
124.271 +- 0.242 seconds time elapsed ( +- 0.19% ) | |
PROPELLER (samples=3): | |
Performance counter stats for 'bash -c numactl -C 0-75 ninja -j76 clang && ninja clean' (3 runs): | |
30835896384964 instructions:u # 1.01 insn per cycle ( +- 0.00% ) | |
30547030491210 cycles:u ( +- 0.01% ) | |
2491256701229 L1-icache-misses:u ( +- 0.00% ) | |
7456817103 iTLB-misses:u ( +- 0.05% ) | |
148.1088 +- 0.0623 seconds time elapsed ( +- 0.04% ) | |
Tested in RAM-disk on $ lscpu | |
Architecture: x86_64 | |
CPU op-mode(s): 32-bit, 64-bit | |
Byte Order: Little Endian | |
CPU(s): 152 | |
On-line CPU(s) list: 0-151 | |
Thread(s) per core: 2 | |
Core(s) per socket: 38 | |
Socket(s): 2 | |
NUMA node(s): 2 | |
Vendor ID: GenuineIntel | |
CPU family: 6 | |
Model: 106 | |
Model name: Intel(R) Xeon(R) Platinum 8368 CPU @ 2.40GHz | |
Stepping: 6 | |
CPU MHz: 3400.000 | |
CPU max MHz: 3400.0000 | |
CPU min MHz: 800.0000 | |
BogoMIPS: 4800.00 | |
Virtualization: VT-x | |
L1d cache: 48K | |
L1i cache: 32K | |
L2 cache: 1280K | |
L3 cache: 58368K | |
NUMA node0 CPU(s): 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40,42,44,46,48,50,52,54,56,58,60,62,64,66,68,70,72,74,76,78,80,82,84,86,88,90,92,94,96,98,100,102,104,106,108,110,112,114,116,118,120,122,124,126,128,130,132,134,136,138,140,142,144,146,148,150 | |
NUMA node1 CPU(s): 1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31,33,35,37,39,41,43,45,47,49,51,53,55,57,59,61,63,65,67,69,71,73,75,77,79,81,83,85,87,89,91,93,95,97,99,101,103,105,107,109,111,113,115,117,119,121,123,125,127,129,131,133,135,137,139,141,143,145,147,149,151 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment