Last active
May 19, 2023 02:58
-
-
Save drakonstein/46b7afc51fd62ad2d67161e575614de9 to your computer and use it in GitHub Desktop.
Ceph cluster rewighting scripts. Not for the feint of heart. (Note, this is no longer necessary if you can use the balancer module)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
print_help() { | |
echo " | |
usage: gen_maps.sh [options] [variables] | |
--cluster Specify cluster (default is ceph) | |
--crush Specify custom crush map | |
" | |
exit 1 | |
} | |
cluster=ceph | |
list_osds=false | |
reweight=false | |
threshold=2 | |
increment=0.01 | |
passes= | |
osdmap= | |
crush= | |
remove= | |
for i in "$@" | |
do | |
case "$i" in | |
--cluster) | |
variable=cluster | |
;; | |
--osdmap) | |
variable=osdmap | |
;; | |
--crush) | |
variable=crush | |
;; | |
--remove) | |
variable=remove | |
;; | |
--passes) | |
variable=passes | |
;; | |
*) | |
case "$variable" in | |
cluster) | |
cluster=$i | |
variable= | |
;; | |
osdmap) | |
osdmap="--osdmap $i" | |
variable= | |
;; | |
crush) | |
crush="--crush $i" | |
variable= | |
;; | |
remove) | |
remove="--remove $i" | |
variable= | |
;; | |
passes) | |
passes="--passes $i" | |
variable= | |
;; | |
*) | |
print_help | |
;; | |
esac | |
esac | |
done | |
echo Testing offsets... | |
offset=-5 | |
while true | |
do | |
echo Currently testing $offset | |
output=$(sudo ./weight_cluster.sh --cluster $cluster $osdmap $crush $remove -r --offset $offset) | |
diff=$(echo $output | awk '{print $4 - $2}') | |
[ $diff -ge 0 ] && break | |
offset=$(( $offset + 1 )) | |
done | |
echo Going to calculate maps for offsets $offset and $(( $offset - 1 )) | |
sudo ./weight_cluster.sh --cluster $cluster $osdmap $crush $remove -r --offset $offset $passes --calculate & | |
sudo ./weight_cluster.sh --cluster $cluster $osdmap $crush $remove -r --offset $(( $offset - 1 )) $passes --calculate & | |
echo "Pressing enter will stop the jobs" | |
read -s key | |
jids=$(jobs | grep ./weight_cluster.sh | grep -Eo \[[0-9]+\] | grep -Eo [0-9]+ | sed 's/^/%/') | |
[ ! -z "$jids" ] && kill $jids | |
for map in cm.$cluster-*; do echo "---------------------------------------------"; echo $map; sudo ./weight_cluster.sh --cluster $cluster --crush $map -o all | awk '{print $6}' | sort | uniq -c; done; echo "---------------------------------------------"; echo Current Map; sudo ./weight_cluster.sh --cluster $cluster -o all | awk '{print $6}' | sort | uniq -c |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# It is hardcoded to only weight based on the rbd pool. | |
# It is assumed that your drives are larger than 1TB. | |
# | |
declare -A osds_pgs | |
declare -A osds_primary | |
declare -A osds_weight | |
declare -A osds_increment | |
declare -A osds_size | |
declare -A osds_percent | |
declare -A osds_variance | |
declare -A osds_pgs_weight | |
declare -A osds_pgs_weight_per_TB | |
declare -A pools_pg_count | |
declare -A pools_pg_weight | |
declare -A pools_percent | |
declare -A pools_replica | |
declare -A manual_weight_list | |
print_help() { | |
echo " | |
usage: weight_cluster.sh [options] [variables] | |
--cluster Specify cluster (default is ceph) | |
-o osds Specify OSDs to query (comma delimited list) | |
-r reweight Reweight the cluster | |
--offset Balance for variations in deployments | |
--passes Specify the maximum amount of times to pass over a map | |
before giving up. Default is 1500. Setting this to 0 | |
will disable this function. | |
--osdmap Supply an osdmap to be used instead of getting one | |
from the cluster. | |
--crush Supply a crush map to be used instead of getting | |
one from the osdmap. | |
--remove Specify servername and/or OSDs to set the weight to | |
0.00. Servername will expand to all osds on the | |
server. (comma delimited list) | |
--manual_weight | |
Specify any OSDs you want to hardcode a weight for. | |
The format is osd:weight in a comma delimited list. | |
" | |
exit 1 | |
} | |
cluster=ceph | |
list_osds=false | |
reweight=false | |
calculate=false | |
threshold=2 | |
increment=0.005 | |
pg_offset=0 | |
custom_osdmap=false | |
custom_crush=false | |
do_remove=false | |
do_manual_weight=false | |
passes=1500 | |
list_all=false | |
for i in "$@" | |
do | |
case "$i" in | |
--crush) | |
variable=crush | |
custom_crush=true | |
;; | |
--osdmap) | |
variable=osdmap | |
custom_osdmap=true | |
;; | |
--cluster) | |
variable=cluster | |
;; | |
--osds | -o) | |
variable=osd | |
list_osds=true | |
;; | |
--reweight | -r) | |
reweight=true | |
;; | |
--calculate) | |
calculate=true | |
;; | |
--offset) | |
variable=offset | |
;; | |
--remove) | |
variable=remove | |
do_remove=true | |
;; | |
--manual_weight) | |
variable=manual_weight | |
do_manual_weight=true | |
;; | |
--passes) | |
variable=passes | |
;; | |
*) | |
case "$variable" in | |
crush) | |
crush_map=$i | |
variable= | |
;; | |
osdmap) | |
osd_map=$i | |
variable= | |
;; | |
cluster) | |
cluster=$i | |
variable= | |
;; | |
osd) | |
osds=`echo $i | tr ',' ' '` | |
[ "$osds" == "diff" ] && diff=true || diff=false | |
variable= | |
;; | |
offset) | |
pg_offset=$i | |
variable= | |
;; | |
remove) | |
remove_me=`echo $i | tr ',' ' '` | |
variable= | |
;; | |
manual_weight) | |
list=`echo $i | tr ',' '\n' | tr ':' ' '` | |
while read osd weight | |
do | |
manual_weight_me="$manual_weight_me $osd" | |
manual_weight_list[$osd]=$weight | |
done<<<"$list" | |
variable= | |
;; | |
passes) | |
if [ $i -gt 0 ] | |
then | |
passes=$i | |
else | |
passes=0 | |
fi | |
variable= | |
;; | |
*) | |
print_help | |
;; | |
esac | |
esac | |
done | |
name="${cluster}-offset_${pg_offset}-`date +%s`" | |
cm=/tmp/cm.$name | |
om=/tmp/om.$name | |
$custom_osdmap && cp $osd_map $om || ceph --cluster $cluster osd getmap -o $om > /dev/null 2>&1 | |
$custom_crush && cp $crush_map $cm || osdmaptool $om --export-crush $cm > /dev/null 2>&1 | |
tree=`crushtool -i $cm --tree 2>&1` | |
crush_osds=`echo "$tree" | grep 'osd\.' | awk '{print $3}' | cut -d. -f2 | sort -n` | |
cluster_df=`ceph --cluster $cluster df` | |
osd_df=`ceph --cluster $cluster osd df` | |
strs=`echo "$tree" | grep host | awk '{print $4}' | sort` | |
pools_num= | |
pools_total_percent=0 | |
pools_total_pg_count=0 | |
output=`echo "$cluster_df" | grep -A999 '^POOLS:$' | grep -Ev '^POOLS:$|^\s+NAME\s+'` | |
while read pool_name pool_num pool_used pool_percent junk objects | |
do | |
pool_get=`ceph --cluster $cluster osd pool get $pool_name all` | |
echo $pool_get | grep erasure_code_profile > /dev/null && ec=true || ec=false | |
$ec && ec_mod=`echo "$pool_get" | grep '^min_size: ' | awk '{print $2}'` || ec_mod=1 | |
pools_num="$pools_num $pool_num" | |
pools_replica[$pool_num]=`echo "$pool_get" | grep -E '^size:\s' | awk '{print $2}'` | |
pg_count=`osdmaptool $om --import-crush $cm --test-map-pgs --mark-up-in --clear-temp --pool $pool_num 2>/dev/null | grep "^pool $pool_num pg_num " | awk '{print $4}'` | |
if [[ $pg_count -lt 1 && $(($pg_count & $(( $pg_count - 1 )))) -ne 0 ]] | |
then | |
echo "$pool_name has $pg_count PGs. This tool only supports PG counts that are power of 2." | |
exit 1 | |
fi | |
pools_pg_count[$pool_num]=$pg_count | |
let pools_total_pg_count+=${pools_pg_count[$pool_num]} | |
$ec && pools_percent[$pool_num]=`awk 'BEGIN {print '${pool_percent}' / '${ec_mod}'}'` || pools_percent[$pool_num]=$pool_percent | |
pools_total_percent=`awk 'BEGIN {print '${pools_total_percent}' + '${pool_percent}'}'` | |
done<<<"$output" | |
for pool_num in $pools_num | |
do | |
pools_pg_weight[$pool_num]=`awk 'BEGIN {print '${pools_percent[$pool_num]}' / '${pools_total_percent}' / '${pools_pg_count[$pool_num]}' * '${pools_total_pg_count}' }'` | |
done | |
# expand servernames to remove to the osds on the server | |
if $do_remove | |
then | |
counts=`echo $(echo "$tree" | grep -Eo '\s+host .+$|\s+osd\.' | uniq -c) | grep -Eo 'host [-[:alnum:]]+ [0-9]+'` | |
for remove in $remove_me | |
do | |
echo "$strs" | grep -E "^$remove$" > /dev/null | |
if [ $? -eq 0 ] | |
then | |
count=`echo "$counts" | grep $remove | awk '{print $3}'` | |
osds=`echo $(echo "$tree" | grep -A$count $remove | grep -E '\s+osd\.' | awk '{print $1}')` | |
remove_me=`echo $remove_me | sed "s/^$remove /$osds /g" | sed "s/ $remove$/ $osds/g" | sed "s/^$remove$/$osds/g" | sed "s/ $remove / $osds /g"` | |
fi | |
done | |
remove_me=`echo $remove_me | sed 's/ /\n/g' | sort -nu` | |
fi | |
# set specified weights for osds | |
if $do_remove | |
then | |
for osd in $remove_me | |
do | |
crushtool -i $cm -o $cm --reweight-item osd.$osd 0.0 > /dev/null | |
done | |
fi | |
if $do_manual_weight | |
then | |
manual_weight_me=`echo $manual_weight_me | sed 's/ /\n/g' | sort -nu` | |
for osd in $manual_weight_me | |
do | |
crushtool -i $cm -o $cm --reweight-item osd.$osd ${manual_weight_list[$osd]} > /dev/null | |
done | |
fi | |
# Set statistics for the OSDs | |
for osd in $crush_osds | |
do | |
osds_pgs[$osd]=0 | |
osds_pgs_weight[$osd]=0 | |
osds_primary[$osd]=0 | |
done | |
for pool_num in $pools_num | |
do | |
map=`osdmaptool $om --import-crush $cm --test-map-pgs --mark-up-in --clear-temp --pool $pool_num 2>/dev/null | grep '^osd\.' | awk '{print $1" "$2" "$4" "$5}' | sed 's/^osd.//'` | |
while read osd pgs primary weight | |
do | |
let osds_pgs[$osd]+=$pgs | |
osds_pgs_weight[$osd]=`awk 'BEGIN {print '${osds_pgs_weight[$osd]}' + ( '${pgs}' * '${pools_pg_weight[$pool_num]}' ) }'` | |
let osds_primary[$osd]+=$primary | |
osds_weight[$osd]=$weight | |
done<<<"$map" | |
done | |
# Get disk information | |
cluster_size=0 | |
disks=`echo "$osd_df" | grep -v 'TOTAL\|STDDEV\|REWEIGHT' | awk '{print $1" "$4" "$7" "$8}'` | |
#[ `echo "$disks" | awk '{print $2}' | grep -Eo .$ | sort -u | wc -l` != 1 ] && echo "OSD sizes are too different. This is not supported." && exit 1 | |
#output=`echo "$disks" | awk '{print length($2)}' | sort -un` | |
#max=$(( `echo "$output" | tail -n1` - 1 )) | |
#min=$(( `echo "$output" | head -n1` - 1 )) | |
#diff=$(( $max - $min + 1 )) | |
while read osd size percent variance | |
do | |
size=`echo $size | grep -Eo [[:digit:]]+` | |
#size=`printf "%0${max}d\n" $size` | |
#size=$(( ${size:0:$diff} + 1 )) | |
# exclude osds that have pre-determined weights | |
echo $remove_me $manual_weight_me | grep -E "^$osd$|^$osd\s|\s$osd\s|\s$osd$" > /dev/null || let cluster_size+=$size | |
osds_size[$osd]=$size | |
osds_pgs_weight_per_TB[$osd]=`awk 'BEGIN {print '${osds_pgs_weight[$osd]}' / '${size}' * 1024}'` | |
osds_percent[$osd]=$percent | |
osds_variance[$osd]=`echo $variance | sed 's/\.//'` | |
done<<<"$disks" | |
# Print information if asked for | |
if $list_osds | |
then | |
if [ "$osds" == "all" ] | |
then | |
list_all=true | |
osds=$crush_osds | |
fi | |
output= | |
for osd in $osds | |
do | |
output="$output | |
osd.$osd+${osds_size[$osd]}GB+${osds_percent[$osd]}%+-+PG_Weight: ${osds_pgs_weight[$osd]:-0}+PG_Weight_Per_TB: ${osds_pgs_weight_per_TB[$osd]:-0}+Current: ${osds_pgs[$osd]:-0}+Primary: ${osds_primary[$osd]:-0}" | |
done | |
echo "$output" | column -t -s'+' | |
output=`echo "$output" | column -t -s'+' | awk '{print $8}' | sort -n` | |
min=`echo "$output" | head -n1` | |
max=`echo "$output" | tail -n1` | |
$list_all && echo "PG_Weight Difference per TB: "`awk 'BEGIN {printf "%.3f\n", '${max}' - '${min}'}'` | |
fi | |
# Calculate new map | |
if $reweight | |
then | |
# Calculate PG average numbers | |
pg_weight_per_GB=0 | |
for pool_num in $pools_num | |
do | |
pg_per_GB=`awk 'BEGIN {printf '${pools_pg_count[$pool_num]}' * '${pools_replica[$pool_num]}' / '$cluster_size'}'` | |
pg_weight_per_GB=`awk 'BEGIN {printf '${pg_weight_per_GB}' + ( '${pg_per_GB}' * '${pools_pg_weight[$pool_num]}' ) }'` | |
done | |
for osd in $crush_osds | |
do | |
[ ! -z ${osds_weight[$osd]} ] && continue | |
echo $manual_weight_me $remove_me | grep $osd > /dev/null && continue | |
osds_weight[$osd]=`echo "$disks" | awk '/^'${osd}' / {print $2}' | grep -Eo [0-9]+ | sed 's/^./&./'` | |
crushtool -i $cm -o $cm --reweight-item osd.$osd ${osds_weight[$osd]} > /dev/null | |
done | |
fi | |
pass=0 | |
while $reweight | |
do | |
if [[ $passes != 0 && $passes -eq $pass ]] | |
then | |
echo "The map did not complete in $passes passes." | |
echo "The failed Crush Map file is cm.$name-pass_$pass-incomplete" | |
mv $cm cm.$name-pass_$pass-incomplete | |
chown $USER:$USER cm.$name-pass_$pass-incomplete | |
break | |
fi | |
for osd in $crush_osds | |
do | |
osds_pgs[$osd]=0 | |
osds_pgs_weight[$osd]=0 | |
osds_primary[$osd]=0 | |
done | |
for pool_num in $pools_num | |
do | |
map=`osdmaptool $om --import-crush $cm --test-map-pgs --mark-up-in --clear-temp --pool $pool_num 2>/dev/null | grep '^osd\.' | awk '{print $1" "$2" "$4" "$5}' | sed 's/^osd.//'` | |
while read osd pgs primary weight | |
do | |
let osds_pgs[$osd]+=$pgs | |
osds_pgs_weight[$osd]=`awk 'BEGIN {print '${osds_pgs_weight[$osd]}' + ( '${pgs}' * '${pools_pg_weight[$pool_num]}' ) }'` | |
let osds_primary[$osd]+=$primary | |
osds_weight[$osd]=$weight | |
done<<<"$map" | |
done | |
up= | |
down= | |
for osd in $crush_osds | |
do | |
echo "$remove_me | |
$manual_weight_me" | grep "^$osd$" > /dev/null && continue | |
pg_avg=`awk 'BEGIN {print ( '${pg_weight_per_GB}' * '${osds_size[$osd]}' ) - '$pg_offset'}'` | |
high_diff=`awk 'BEGIN {printf "%.0f\n", '${osds_pgs_weight[$osd]:-0}' - '${pg_avg}'}'` | |
low_diff=`awk 'BEGIN {printf "%.0f\n", '${pg_avg}' - '${osds_pgs_weight[$osd]:-0}'}'` | |
if [ $high_diff -ge $threshold ] | |
then | |
down="$down $osd" | |
multiplier=`awk 'BEGIN {printf "%.0f\n", '${high_diff}' / '${threshold}'}'` | |
if [ $multiplier -ge 2 ] | |
then | |
osds_increment[$osd]=`awk 'BEGIN {print '${increment}' * '${multiplier}'}'` | |
else | |
osds_increment[$osd]="$increment" | |
fi | |
elif [ $low_diff -ge $threshold ] | |
then | |
up="$up $osd" | |
multiplier=`awk 'BEGIN {printf "%.0f\n", '${low_diff}' / '${threshold}'}'` | |
if [ $multiplier -ge 2 ] | |
then | |
osds_increment[$osd]=`awk 'BEGIN {print '${increment}' * '${multiplier}'}'` | |
else | |
osds_increment[$osd]="$increment" | |
fi | |
fi | |
done | |
up_total=`echo $up | wc -w` | |
down_total=`echo $down | wc -w` | |
if [[ $up_total -eq 0 && $down_total -eq 0 ]] | |
then | |
mv $cm cm.$name-pass_$pass | |
chown $USER:$USER cm.$name-pass_$pass | |
echo "All done weighting. The Crush Map file is cm.$name-pass_$pass" | |
break | |
fi | |
echo "Calculating $up_total up; $down_total down..." | |
if ! $calculate | |
then | |
echo | |
echo "Use --calculate to calculate the new weights for these OSDs" | |
break | |
fi | |
let pass+=1 | |
echo "Pass #$pass" | |
# Reweighting maths | |
# Go up by increment | |
for osd in $up | |
do | |
new_weight=`awk 'BEGIN {printf "%.3f\n", '${osds_weight[$osd]:-0}' + '${osds_increment[$osd]}'}'` | |
crushtool -i $cm -o $cm --reweight-item osd.$osd $new_weight > /dev/null | |
done | |
# Go down by increment | |
for osd in $down | |
do | |
new_weight=`awk 'BEGIN {printf "%.3f\n", '${osds_weight[$osd]:-0}' - '${osds_increment[$osd]}'}'` | |
crushtool -i $cm -o $cm --reweight-item osd.$osd $new_weight > /dev/null | |
done | |
done | |
unset osds_pgs | |
unset osds_primary | |
unset osds_weight | |
unset osds_increment | |
unset osds_size | |
unset osds_percent | |
unset osds_variance | |
unset osds_pgs_weight | |
unset pools_pg_count | |
unset pools_pg_weight | |
unset pools_percent | |
unset pools_replica | |
unset manual_weight_list |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# This will weight your cluster based on an algorithm of how full each pool is and how large each PG is. | |
declare -A osds_pgs | |
declare -A osds_primary | |
declare -A osds_weight | |
declare -A osds_increment | |
declare -A osds_size | |
declare -A osds_percent | |
declare -A osds_variance | |
declare -A osds_pgs_weight | |
declare -A osds_pgs_weight_per_TB | |
declare -A pools_pg_count | |
declare -A pools_pg_weight | |
declare -A pools_percent | |
declare -A pools_replica | |
declare -A manual_weight_list | |
print_help() { | |
echo " | |
usage: weight_cluster.sh [options] [variables] | |
--cluster Specify cluster (default is ceph) | |
-o osds Specify OSDs to query (comma delimited list) | |
-r reweight Reweight the cluster | |
--offset Balance for pgs in other pools | |
--passes Specify the maximum amount of times to pass over a map | |
before giving up. Default is 1500. Setting this to 0 | |
will disable this function. | |
--osdmap Supply an osdmap to be used instead of getting one | |
from the cluster. | |
--crush Supply a crush map to be used instead of getting | |
one from the osdmap. | |
--remove Specify servername and/or OSDs to set the weight to | |
0.00. Servername will expand to all osds on the | |
server. (comma delimited list) | |
--manual_weight | |
Specify any OSDs you want to hardcode a weight for. | |
The format is osd:weight in a comma delimited list. | |
" | |
exit 1 | |
} | |
cluster=ceph | |
list_osds=false | |
reweight=false | |
calculate=false | |
threshold=2 | |
increment=0.005 | |
pg_offset=2 | |
custom_osdmap=false | |
custom_crush=false | |
do_remove=false | |
do_manual_weight=false | |
passes=1500 | |
list_all=false | |
for i in "$@" | |
do | |
case "$i" in | |
--crush) | |
variable=crush | |
custom_crush=true | |
;; | |
--osdmap) | |
variable=osdmap | |
custom_osdmap=true | |
;; | |
--cluster) | |
variable=cluster | |
;; | |
--osds | -o) | |
variable=osd | |
list_osds=true | |
;; | |
--reweight | -r) | |
reweight=true | |
;; | |
--calculate) | |
calculate=true | |
;; | |
--offset) | |
variable=offset | |
;; | |
--remove) | |
variable=remove | |
do_remove=true | |
;; | |
--manual_weight) | |
variable=manual_weight | |
do_manual_weight=true | |
;; | |
--passes) | |
variable=passes | |
;; | |
*) | |
case "$variable" in | |
crush) | |
crush_map=$i | |
variable= | |
;; | |
osdmap) | |
osd_map=$i | |
variable= | |
;; | |
cluster) | |
cluster=$i | |
variable= | |
;; | |
osd) | |
osds=`echo $i | tr ',' ' '` | |
[ "$osds" == "diff" ] && diff=true || diff=false | |
variable= | |
;; | |
offset) | |
pg_offset=$i | |
variable= | |
;; | |
remove) | |
remove_me=`echo $i | tr ',' ' '` | |
variable= | |
;; | |
manual_weight) | |
list=`echo $i | tr ',' '\n' | tr ':' ' '` | |
while read osd weight | |
do | |
manual_weight_me="$manual_weight_me $osd" | |
manual_weight_list[$osd]=$weight | |
done<<<"$list" | |
variable= | |
;; | |
passes) | |
if [ $i -gt 0 ] | |
then | |
passes=$i | |
else | |
passes=0 | |
fi | |
variable= | |
;; | |
*) | |
print_help | |
;; | |
esac | |
esac | |
done | |
name="${cluster}-offset_${pg_offset}-`date +%s`" | |
cm=/tmp/cm.$name | |
om=/tmp/om.$name | |
$custom_osdmap && cp $osd_map $om || ceph --cluster $cluster osd getmap -o $om > /dev/null 2>&1 | |
$custom_crush && cp $crush_map $cm || osdmaptool $om --export-crush $cm > /dev/null 2>&1 | |
tree=`crushtool -i $cm --tree 2>&1` | |
crush_osds=`echo "$tree" | grep 'osd\.' | awk '{print $3}' | cut -d. -f2 | sort -n` | |
cluster_df=`ceph --cluster $cluster df` | |
osd_df=`ceph --cluster $cluster osd df` | |
strs=`echo "$tree" | grep host | awk '{print $4}' | sort` | |
pools_num= | |
pools_total_percent=0 | |
pools_total_pg_count=0 | |
output=`echo "$cluster_df" | grep -A999 '^POOLS:$' | grep -Ev '^POOLS:$|^\s+NAME\s+'` | |
while read pool_name pool_num pool_used pool_percent junk objects | |
do | |
pool_get=`ceph --cluster $cluster osd pool get $pool_name all` | |
echo $pool_get | grep erasure_code_profile > /dev/null && ec=true || ec=false | |
$ec && ec_mod=`echo "$pool_get" | grep '^min_size: ' | awk '{print $2}'` || ec_mod=1 | |
pools_num="$pools_num $pool_num" | |
pools_replica[$pool_num]=`echo "$pool_get" | grep -E '^size:\s' | awk '{print $2}'` | |
pg_count=`osdmaptool $om --import-crush $cm --test-map-pgs --mark-up-in --clear-temp --pool $pool_num 2>/dev/null | grep "^pool $pool_num pg_num " | awk '{print $4}'` | |
if [[ $pg_count -lt 1 && $(($pg_count & $(( $pg_count - 1 )))) -ne 0 ]] | |
then | |
echo "$pool_name has $pg_count PGs. This tool only supports PG counts that are power of 2." | |
exit 1 | |
fi | |
pools_pg_count[$pool_num]=$pg_count | |
let pools_total_pg_count+=${pools_pg_count[$pool_num]} | |
$ec && pools_percent[$pool_num]=`awk 'BEGIN {print '${pool_percent}' / '${ec_mod}'}'` || pools_percent[$pool_num]=$pool_percent | |
pools_total_percent=`awk 'BEGIN {print '${pools_total_percent}' + '${pool_percent}'}'` | |
done <<< "$output" | |
for pool_num in $pools_num | |
do | |
pools_pg_weight[$pool_num]=`awk 'BEGIN {print '${pools_percent[$pool_num]}' / '${pools_total_percent}' / '${pools_pg_count[$pool_num]}' * '${pools_total_pg_count}' }'` | |
done | |
# expand servernames to remove to the osds on the server | |
if $do_remove | |
then | |
counts=`echo $(echo "$tree" | grep -Eo '\s+host .+$|\s+osd\.' | uniq -c) | grep -Eo 'host [-[:alnum:]]+ [0-9]+'` | |
for remove in $remove_me | |
do | |
echo "$strs" | grep -E "^$remove$" > /dev/null | |
if [ $? -eq 0 ] | |
then | |
count=`echo "$counts" | grep $remove | awk '{print $3}'` | |
osds=`echo $(echo "$tree" | grep -A$count $remove | grep -E '\s+osd\.' | awk '{print $1}')` | |
remove_me=`echo $remove_me | sed "s/^$remove /$osds /g" | sed "s/ $remove$/ $osds/g" | sed "s/^$remove$/$osds/g" | sed "s/ $remove / $osds /g"` | |
fi | |
done | |
remove_me=`echo $remove_me | sed 's/ /\n/g' | sort -nu` | |
fi | |
# set specified weights for osds | |
if $do_remove | |
then | |
for osd in $remove_me | |
do | |
crushtool -i $cm -o $cm --reweight-item osd.$osd 0.0 > /dev/null | |
done | |
fi | |
if $do_manual_weight | |
then | |
manual_weight_me=`echo $manual_weight_me | sed 's/ /\n/g' | sort -nu` | |
for osd in $manual_weight_me | |
do | |
crushtool -i $cm -o $cm --reweight-item osd.$osd ${manual_weight_list[$osd]} > /dev/null | |
done | |
fi | |
# Set statistics for the OSDs | |
for osd in $crush_osds | |
do | |
osds_pgs[$osd]=0 | |
osds_pgs_weight[$osd]=0 | |
osds_primary[$osd]=0 | |
done | |
for pool_num in $pools_num | |
do | |
map=`osdmaptool $om --import-crush $cm --test-map-pgs --mark-up-in --clear-temp --pool $pool_num 2>/dev/null | grep '^osd\.' | awk '{print $1" "$2" "$4" "$5}' | sed 's/^osd.//'` | |
while read osd pgs primary weight | |
do | |
let osds_pgs[$osd]+=$pgs | |
osds_pgs_weight[$osd]=`awk 'BEGIN {print '${osds_pgs_weight[$osd]}' + ( '${pgs}' * '${pools_pg_weight[$pool_num]}' ) }'` | |
let osds_primary[$osd]+=$primary | |
osds_weight[$osd]=$weight | |
done<<<"$map" | |
done | |
# Get disk information | |
cluster_size=0 | |
disks=`echo "$osd_df" | grep -v 'TOTAL\|STDDEV\|REWEIGHT' | awk '{print $1" "$4" "$7" "$8}'` | |
#[ `echo "$disks" | awk '{print $2}' | grep -Eo .$ | sort -u | wc -l` != 1 ] && echo "OSD sizes are too different. This is not supported." && exit 1 | |
#output=`echo "$disks" | awk '{print length($2)}' | sort -un` | |
#max=$(( `echo "$output" | tail -n1` - 1 )) | |
#min=$(( `echo "$output" | head -n1` - 1 )) | |
#diff=$(( $max - $min + 1 )) | |
while read osd size percent variance | |
do | |
size=`echo $size | grep -Eo [[:digit:]]+` | |
#size=`printf "%0${max}d\n" $size` | |
#size=$(( ${size:0:$diff} + 1 )) | |
# exclude osds that have pre-determined weights | |
echo $remove_me $manual_weight_me | grep -E "^$osd$|^$osd\s|\s$osd\s|\s$osd$" > /dev/null || let cluster_size+=$size | |
osds_size[$osd]=$size | |
osds_pgs_weight_per_TB[$osd]=`awk 'BEGIN {print '${osds_pgs_weight[$osd]}' / '${size}' * 1024}'` | |
osds_percent[$osd]=$percent | |
osds_variance[$osd]=`echo $variance | sed 's/\.//'` | |
done<<<"$disks" | |
# Print information if asked for | |
if $list_osds | |
then | |
if [ "$osds" == "all" ] | |
then | |
list_all=true | |
osds=$crush_osds | |
fi | |
output= | |
for osd in $osds | |
do | |
output="$output | |
osd.$osd+${osds_size[$osd]}GB+${osds_percent[$osd]}%+-+PG_Weight: ${osds_pgs_weight[$osd]:-0}+PG_Weight_Per_TB: ${osds_pgs_weight_per_TB[$osd]:-0}+Current: ${osds_pgs[$osd]:-0}+Primary: ${osds_primary[$osd]:-0}" | |
done | |
echo "$output" | column -t -s'+' | |
output=`echo "$output" | column -t -s'+' | awk '{print $8}' | sort -n` | |
min=`echo "$output" | head -n1` | |
max=`echo "$output" | tail -n1` | |
$list_all && echo "PG_Weight Difference per TB: "`awk 'BEGIN {printf "%.3f\n", '${max}' - '${min}'}'` | |
fi | |
# Calculate new map | |
if $reweight | |
then | |
# Calculate PG average numbers | |
pg_weight_per_GB=0 | |
for pool_num in $pools_num | |
do | |
pg_per_GB=`awk 'BEGIN {printf '${pools_pg_count[$pool_num]}' * '${pools_replica[$pool_num]}' / '$cluster_size'}'` | |
pg_weight_per_GB=`awk 'BEGIN {printf '${pg_weight_per_GB}' + ( '${pg_per_GB}' * '${pools_pg_weight[$pool_num]}' ) }'` | |
done | |
for osd in $crush_osds | |
do | |
[ ! -z ${osds_weight[$osd]} ] && continue | |
echo $manual_weight_me $remove_me | grep $osd > /dev/null && continue | |
osds_weight[$osd]=`echo "$disks" | awk '/^'${osd}' / {print $2}' | grep -Eo [0-9]+ | sed 's/^./&./'` | |
crushtool -i $cm -o $cm --reweight-item osd.$osd ${osds_weight[$osd]} > /dev/null | |
done | |
fi | |
pass=0 | |
while $reweight | |
do | |
if [[ $passes != 0 && $passes -eq $pass ]] | |
then | |
echo "The map did not complete in $passes passes." | |
echo "The failed Crush Map file is cm.$name-pass_$pass-incomplete" | |
mv $cm cm.$name-pass_$pass-incomplete | |
chown $USER:$USER cm.$name-pass_$pass-incomplete | |
break | |
fi | |
for osd in $crush_osds | |
do | |
osds_pgs[$osd]=0 | |
osds_pgs_weight[$osd]=0 | |
osds_primary[$osd]=0 | |
done | |
for pool_num in $pools_num | |
do | |
map=`osdmaptool $om --import-crush $cm --test-map-pgs --mark-up-in --clear-temp --pool $pool_num 2>/dev/null | grep '^osd\.' | awk '{print $1" "$2" "$4" "$5}' | sed 's/^osd.//'` | |
while read osd pgs primary weight | |
do | |
let osds_pgs[$osd]+=$pgs | |
osds_pgs_weight[$osd]=`awk 'BEGIN {print '${osds_pgs_weight[$osd]}' + ( '${pgs}' * '${pools_pg_weight[$pool_num]}' ) }'` | |
let osds_primary[$osd]+=$primary | |
osds_weight[$osd]=$weight | |
done<<<"$map" | |
done | |
up= | |
down= | |
for osd in $crush_osds | |
do | |
echo "$remove_me | |
$manual_weight_me" | grep "^$osd$" > /dev/null && continue | |
pg_avg=`awk 'BEGIN {print ( '${pg_weight_per_GB}' * '${osds_size[$osd]}' ) - '$pg_offset'}'` | |
high_diff=`awk 'BEGIN {printf "%.0f\n", '${osds_pgs_weight[$osd]:-0}' - '${pg_avg}'}'` | |
low_diff=`awk 'BEGIN {printf "%.0f\n", '${pg_avg}' - '${osds_pgs_weight[$osd]:-0}'}'` | |
if [ $high_diff -ge $threshold ] | |
then | |
down="$down $osd" | |
multiplier=`awk 'BEGIN {printf "%.0f\n", '${high_diff}' / '${threshold}'}'` | |
if [ $multiplier -ge 2 ] | |
then | |
osds_increment[$osd]=`awk 'BEGIN {print '${increment}' * '${multiplier}'}'` | |
else | |
osds_increment[$osd]="$increment" | |
fi | |
elif [ $low_diff -ge $threshold ] | |
then | |
up="$up $osd" | |
multiplier=`awk 'BEGIN {printf "%.0f\n", '${low_diff}' / '${threshold}'}'` | |
if [ $multiplier -ge 2 ] | |
then | |
osds_increment[$osd]=`awk 'BEGIN {print '${increment}' * '${multiplier}'}'` | |
else | |
osds_increment[$osd]="$increment" | |
fi | |
fi | |
done | |
up_total=`echo $up | wc -w` | |
down_total=`echo $down | wc -w` | |
if [[ $up_total -eq 0 && $down_total -eq 0 ]] | |
then | |
mv $cm cm.$name-pass_$pass | |
chown $USER:$USER cm.$name-pass_$pass | |
echo "All done weighting. The Crush Map file is cm.$name-pass_$pass" | |
break | |
fi | |
echo "Calculating $up_total up; $down_total down..." | |
if ! $calculate | |
then | |
echo | |
echo "Use --calculate to calculate the new weights for these OSDs" | |
break | |
fi | |
let pass+=1 | |
echo "Pass #$pass" | |
# Reweighting maths | |
# Go up by increment | |
for osd in $up | |
do | |
new_weight=`awk 'BEGIN {printf "%.3f\n", '${osds_weight[$osd]:-0}' + '${osds_increment[$osd]}'}'` | |
crushtool -i $cm -o $cm --reweight-item osd.$osd $new_weight > /dev/null | |
done | |
# Go down by increment | |
for osd in $down | |
do | |
new_weight=`awk 'BEGIN {printf "%.3f\n", '${osds_weight[$osd]:-0}' - '${osds_increment[$osd]}'}'` | |
crushtool -i $cm -o $cm --reweight-item osd.$osd $new_weight > /dev/null | |
done | |
done | |
unset osds_pgs | |
unset osds_primary | |
unset osds_weight | |
unset osds_increment | |
unset osds_size | |
unset osds_percent | |
unset osds_variance | |
unset osds_pgs_weight | |
unset pools_pg_count | |
unset pools_pg_weight | |
unset pools_percent | |
unset pools_replica | |
unset manual_weight_list |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment