Skip to content

Instantly share code, notes, and snippets.

@drakonstein
Last active May 19, 2023 02:58
Show Gist options
  • Save drakonstein/46b7afc51fd62ad2d67161e575614de9 to your computer and use it in GitHub Desktop.
Save drakonstein/46b7afc51fd62ad2d67161e575614de9 to your computer and use it in GitHub Desktop.
Ceph cluster rewighting scripts. Not for the feint of heart. (Note, this is no longer necessary if you can use the balancer module)
#!/bin/bash
print_help() {
echo "
usage: gen_maps.sh [options] [variables]
--cluster Specify cluster (default is ceph)
--crush Specify custom crush map
"
exit 1
}
cluster=ceph
list_osds=false
reweight=false
threshold=2
increment=0.01
passes=
osdmap=
crush=
remove=
for i in "$@"
do
case "$i" in
--cluster)
variable=cluster
;;
--osdmap)
variable=osdmap
;;
--crush)
variable=crush
;;
--remove)
variable=remove
;;
--passes)
variable=passes
;;
*)
case "$variable" in
cluster)
cluster=$i
variable=
;;
osdmap)
osdmap="--osdmap $i"
variable=
;;
crush)
crush="--crush $i"
variable=
;;
remove)
remove="--remove $i"
variable=
;;
passes)
passes="--passes $i"
variable=
;;
*)
print_help
;;
esac
esac
done
echo Testing offsets...
offset=-5
while true
do
echo Currently testing $offset
output=$(sudo ./weight_cluster.sh --cluster $cluster $osdmap $crush $remove -r --offset $offset)
diff=$(echo $output | awk '{print $4 - $2}')
[ $diff -ge 0 ] && break
offset=$(( $offset + 1 ))
done
echo Going to calculate maps for offsets $offset and $(( $offset - 1 ))
sudo ./weight_cluster.sh --cluster $cluster $osdmap $crush $remove -r --offset $offset $passes --calculate &
sudo ./weight_cluster.sh --cluster $cluster $osdmap $crush $remove -r --offset $(( $offset - 1 )) $passes --calculate &
echo "Pressing enter will stop the jobs"
read -s key
jids=$(jobs | grep ./weight_cluster.sh | grep -Eo \[[0-9]+\] | grep -Eo [0-9]+ | sed 's/^/%/')
[ ! -z "$jids" ] && kill $jids
for map in cm.$cluster-*; do echo "---------------------------------------------"; echo $map; sudo ./weight_cluster.sh --cluster $cluster --crush $map -o all | awk '{print $6}' | sort | uniq -c; done; echo "---------------------------------------------"; echo Current Map; sudo ./weight_cluster.sh --cluster $cluster -o all | awk '{print $6}' | sort | uniq -c
#!/bin/bash
# It is hardcoded to only weight based on the rbd pool.
# It is assumed that your drives are larger than 1TB.
#
declare -A osds_pgs
declare -A osds_primary
declare -A osds_weight
declare -A osds_increment
declare -A osds_size
declare -A osds_percent
declare -A osds_variance
declare -A osds_pgs_weight
declare -A osds_pgs_weight_per_TB
declare -A pools_pg_count
declare -A pools_pg_weight
declare -A pools_percent
declare -A pools_replica
declare -A manual_weight_list
print_help() {
echo "
usage: weight_cluster.sh [options] [variables]
--cluster Specify cluster (default is ceph)
-o osds Specify OSDs to query (comma delimited list)
-r reweight Reweight the cluster
--offset Balance for variations in deployments
--passes Specify the maximum amount of times to pass over a map
before giving up. Default is 1500. Setting this to 0
will disable this function.
--osdmap Supply an osdmap to be used instead of getting one
from the cluster.
--crush Supply a crush map to be used instead of getting
one from the osdmap.
--remove Specify servername and/or OSDs to set the weight to
0.00. Servername will expand to all osds on the
server. (comma delimited list)
--manual_weight
Specify any OSDs you want to hardcode a weight for.
The format is osd:weight in a comma delimited list.
"
exit 1
}
cluster=ceph
list_osds=false
reweight=false
calculate=false
threshold=2
increment=0.005
pg_offset=0
custom_osdmap=false
custom_crush=false
do_remove=false
do_manual_weight=false
passes=1500
list_all=false
for i in "$@"
do
case "$i" in
--crush)
variable=crush
custom_crush=true
;;
--osdmap)
variable=osdmap
custom_osdmap=true
;;
--cluster)
variable=cluster
;;
--osds | -o)
variable=osd
list_osds=true
;;
--reweight | -r)
reweight=true
;;
--calculate)
calculate=true
;;
--offset)
variable=offset
;;
--remove)
variable=remove
do_remove=true
;;
--manual_weight)
variable=manual_weight
do_manual_weight=true
;;
--passes)
variable=passes
;;
*)
case "$variable" in
crush)
crush_map=$i
variable=
;;
osdmap)
osd_map=$i
variable=
;;
cluster)
cluster=$i
variable=
;;
osd)
osds=`echo $i | tr ',' ' '`
[ "$osds" == "diff" ] && diff=true || diff=false
variable=
;;
offset)
pg_offset=$i
variable=
;;
remove)
remove_me=`echo $i | tr ',' ' '`
variable=
;;
manual_weight)
list=`echo $i | tr ',' '\n' | tr ':' ' '`
while read osd weight
do
manual_weight_me="$manual_weight_me $osd"
manual_weight_list[$osd]=$weight
done<<<"$list"
variable=
;;
passes)
if [ $i -gt 0 ]
then
passes=$i
else
passes=0
fi
variable=
;;
*)
print_help
;;
esac
esac
done
name="${cluster}-offset_${pg_offset}-`date +%s`"
cm=/tmp/cm.$name
om=/tmp/om.$name
$custom_osdmap && cp $osd_map $om || ceph --cluster $cluster osd getmap -o $om > /dev/null 2>&1
$custom_crush && cp $crush_map $cm || osdmaptool $om --export-crush $cm > /dev/null 2>&1
tree=`crushtool -i $cm --tree 2>&1`
crush_osds=`echo "$tree" | grep 'osd\.' | awk '{print $3}' | cut -d. -f2 | sort -n`
cluster_df=`ceph --cluster $cluster df`
osd_df=`ceph --cluster $cluster osd df`
strs=`echo "$tree" | grep host | awk '{print $4}' | sort`
pools_num=
pools_total_percent=0
pools_total_pg_count=0
output=`echo "$cluster_df" | grep -A999 '^POOLS:$' | grep -Ev '^POOLS:$|^\s+NAME\s+'`
while read pool_name pool_num pool_used pool_percent junk objects
do
pool_get=`ceph --cluster $cluster osd pool get $pool_name all`
echo $pool_get | grep erasure_code_profile > /dev/null && ec=true || ec=false
$ec && ec_mod=`echo "$pool_get" | grep '^min_size: ' | awk '{print $2}'` || ec_mod=1
pools_num="$pools_num $pool_num"
pools_replica[$pool_num]=`echo "$pool_get" | grep -E '^size:\s' | awk '{print $2}'`
pg_count=`osdmaptool $om --import-crush $cm --test-map-pgs --mark-up-in --clear-temp --pool $pool_num 2>/dev/null | grep "^pool $pool_num pg_num " | awk '{print $4}'`
if [[ $pg_count -lt 1 && $(($pg_count & $(( $pg_count - 1 )))) -ne 0 ]]
then
echo "$pool_name has $pg_count PGs. This tool only supports PG counts that are power of 2."
exit 1
fi
pools_pg_count[$pool_num]=$pg_count
let pools_total_pg_count+=${pools_pg_count[$pool_num]}
$ec && pools_percent[$pool_num]=`awk 'BEGIN {print '${pool_percent}' / '${ec_mod}'}'` || pools_percent[$pool_num]=$pool_percent
pools_total_percent=`awk 'BEGIN {print '${pools_total_percent}' + '${pool_percent}'}'`
done<<<"$output"
for pool_num in $pools_num
do
pools_pg_weight[$pool_num]=`awk 'BEGIN {print '${pools_percent[$pool_num]}' / '${pools_total_percent}' / '${pools_pg_count[$pool_num]}' * '${pools_total_pg_count}' }'`
done
# expand servernames to remove to the osds on the server
if $do_remove
then
counts=`echo $(echo "$tree" | grep -Eo '\s+host .+$|\s+osd\.' | uniq -c) | grep -Eo 'host [-[:alnum:]]+ [0-9]+'`
for remove in $remove_me
do
echo "$strs" | grep -E "^$remove$" > /dev/null
if [ $? -eq 0 ]
then
count=`echo "$counts" | grep $remove | awk '{print $3}'`
osds=`echo $(echo "$tree" | grep -A$count $remove | grep -E '\s+osd\.' | awk '{print $1}')`
remove_me=`echo $remove_me | sed "s/^$remove /$osds /g" | sed "s/ $remove$/ $osds/g" | sed "s/^$remove$/$osds/g" | sed "s/ $remove / $osds /g"`
fi
done
remove_me=`echo $remove_me | sed 's/ /\n/g' | sort -nu`
fi
# set specified weights for osds
if $do_remove
then
for osd in $remove_me
do
crushtool -i $cm -o $cm --reweight-item osd.$osd 0.0 > /dev/null
done
fi
if $do_manual_weight
then
manual_weight_me=`echo $manual_weight_me | sed 's/ /\n/g' | sort -nu`
for osd in $manual_weight_me
do
crushtool -i $cm -o $cm --reweight-item osd.$osd ${manual_weight_list[$osd]} > /dev/null
done
fi
# Set statistics for the OSDs
for osd in $crush_osds
do
osds_pgs[$osd]=0
osds_pgs_weight[$osd]=0
osds_primary[$osd]=0
done
for pool_num in $pools_num
do
map=`osdmaptool $om --import-crush $cm --test-map-pgs --mark-up-in --clear-temp --pool $pool_num 2>/dev/null | grep '^osd\.' | awk '{print $1" "$2" "$4" "$5}' | sed 's/^osd.//'`
while read osd pgs primary weight
do
let osds_pgs[$osd]+=$pgs
osds_pgs_weight[$osd]=`awk 'BEGIN {print '${osds_pgs_weight[$osd]}' + ( '${pgs}' * '${pools_pg_weight[$pool_num]}' ) }'`
let osds_primary[$osd]+=$primary
osds_weight[$osd]=$weight
done<<<"$map"
done
# Get disk information
cluster_size=0
disks=`echo "$osd_df" | grep -v 'TOTAL\|STDDEV\|REWEIGHT' | awk '{print $1" "$4" "$7" "$8}'`
#[ `echo "$disks" | awk '{print $2}' | grep -Eo .$ | sort -u | wc -l` != 1 ] && echo "OSD sizes are too different. This is not supported." && exit 1
#output=`echo "$disks" | awk '{print length($2)}' | sort -un`
#max=$(( `echo "$output" | tail -n1` - 1 ))
#min=$(( `echo "$output" | head -n1` - 1 ))
#diff=$(( $max - $min + 1 ))
while read osd size percent variance
do
size=`echo $size | grep -Eo [[:digit:]]+`
#size=`printf "%0${max}d\n" $size`
#size=$(( ${size:0:$diff} + 1 ))
# exclude osds that have pre-determined weights
echo $remove_me $manual_weight_me | grep -E "^$osd$|^$osd\s|\s$osd\s|\s$osd$" > /dev/null || let cluster_size+=$size
osds_size[$osd]=$size
osds_pgs_weight_per_TB[$osd]=`awk 'BEGIN {print '${osds_pgs_weight[$osd]}' / '${size}' * 1024}'`
osds_percent[$osd]=$percent
osds_variance[$osd]=`echo $variance | sed 's/\.//'`
done<<<"$disks"
# Print information if asked for
if $list_osds
then
if [ "$osds" == "all" ]
then
list_all=true
osds=$crush_osds
fi
output=
for osd in $osds
do
output="$output
osd.$osd+${osds_size[$osd]}GB+${osds_percent[$osd]}%+-+PG_Weight: ${osds_pgs_weight[$osd]:-0}+PG_Weight_Per_TB: ${osds_pgs_weight_per_TB[$osd]:-0}+Current: ${osds_pgs[$osd]:-0}+Primary: ${osds_primary[$osd]:-0}"
done
echo "$output" | column -t -s'+'
output=`echo "$output" | column -t -s'+' | awk '{print $8}' | sort -n`
min=`echo "$output" | head -n1`
max=`echo "$output" | tail -n1`
$list_all && echo "PG_Weight Difference per TB: "`awk 'BEGIN {printf "%.3f\n", '${max}' - '${min}'}'`
fi
# Calculate new map
if $reweight
then
# Calculate PG average numbers
pg_weight_per_GB=0
for pool_num in $pools_num
do
pg_per_GB=`awk 'BEGIN {printf '${pools_pg_count[$pool_num]}' * '${pools_replica[$pool_num]}' / '$cluster_size'}'`
pg_weight_per_GB=`awk 'BEGIN {printf '${pg_weight_per_GB}' + ( '${pg_per_GB}' * '${pools_pg_weight[$pool_num]}' ) }'`
done
for osd in $crush_osds
do
[ ! -z ${osds_weight[$osd]} ] && continue
echo $manual_weight_me $remove_me | grep $osd > /dev/null && continue
osds_weight[$osd]=`echo "$disks" | awk '/^'${osd}' / {print $2}' | grep -Eo [0-9]+ | sed 's/^./&./'`
crushtool -i $cm -o $cm --reweight-item osd.$osd ${osds_weight[$osd]} > /dev/null
done
fi
pass=0
while $reweight
do
if [[ $passes != 0 && $passes -eq $pass ]]
then
echo "The map did not complete in $passes passes."
echo "The failed Crush Map file is cm.$name-pass_$pass-incomplete"
mv $cm cm.$name-pass_$pass-incomplete
chown $USER:$USER cm.$name-pass_$pass-incomplete
break
fi
for osd in $crush_osds
do
osds_pgs[$osd]=0
osds_pgs_weight[$osd]=0
osds_primary[$osd]=0
done
for pool_num in $pools_num
do
map=`osdmaptool $om --import-crush $cm --test-map-pgs --mark-up-in --clear-temp --pool $pool_num 2>/dev/null | grep '^osd\.' | awk '{print $1" "$2" "$4" "$5}' | sed 's/^osd.//'`
while read osd pgs primary weight
do
let osds_pgs[$osd]+=$pgs
osds_pgs_weight[$osd]=`awk 'BEGIN {print '${osds_pgs_weight[$osd]}' + ( '${pgs}' * '${pools_pg_weight[$pool_num]}' ) }'`
let osds_primary[$osd]+=$primary
osds_weight[$osd]=$weight
done<<<"$map"
done
up=
down=
for osd in $crush_osds
do
echo "$remove_me
$manual_weight_me" | grep "^$osd$" > /dev/null && continue
pg_avg=`awk 'BEGIN {print ( '${pg_weight_per_GB}' * '${osds_size[$osd]}' ) - '$pg_offset'}'`
high_diff=`awk 'BEGIN {printf "%.0f\n", '${osds_pgs_weight[$osd]:-0}' - '${pg_avg}'}'`
low_diff=`awk 'BEGIN {printf "%.0f\n", '${pg_avg}' - '${osds_pgs_weight[$osd]:-0}'}'`
if [ $high_diff -ge $threshold ]
then
down="$down $osd"
multiplier=`awk 'BEGIN {printf "%.0f\n", '${high_diff}' / '${threshold}'}'`
if [ $multiplier -ge 2 ]
then
osds_increment[$osd]=`awk 'BEGIN {print '${increment}' * '${multiplier}'}'`
else
osds_increment[$osd]="$increment"
fi
elif [ $low_diff -ge $threshold ]
then
up="$up $osd"
multiplier=`awk 'BEGIN {printf "%.0f\n", '${low_diff}' / '${threshold}'}'`
if [ $multiplier -ge 2 ]
then
osds_increment[$osd]=`awk 'BEGIN {print '${increment}' * '${multiplier}'}'`
else
osds_increment[$osd]="$increment"
fi
fi
done
up_total=`echo $up | wc -w`
down_total=`echo $down | wc -w`
if [[ $up_total -eq 0 && $down_total -eq 0 ]]
then
mv $cm cm.$name-pass_$pass
chown $USER:$USER cm.$name-pass_$pass
echo "All done weighting. The Crush Map file is cm.$name-pass_$pass"
break
fi
echo "Calculating $up_total up; $down_total down..."
if ! $calculate
then
echo
echo "Use --calculate to calculate the new weights for these OSDs"
break
fi
let pass+=1
echo "Pass #$pass"
# Reweighting maths
# Go up by increment
for osd in $up
do
new_weight=`awk 'BEGIN {printf "%.3f\n", '${osds_weight[$osd]:-0}' + '${osds_increment[$osd]}'}'`
crushtool -i $cm -o $cm --reweight-item osd.$osd $new_weight > /dev/null
done
# Go down by increment
for osd in $down
do
new_weight=`awk 'BEGIN {printf "%.3f\n", '${osds_weight[$osd]:-0}' - '${osds_increment[$osd]}'}'`
crushtool -i $cm -o $cm --reweight-item osd.$osd $new_weight > /dev/null
done
done
unset osds_pgs
unset osds_primary
unset osds_weight
unset osds_increment
unset osds_size
unset osds_percent
unset osds_variance
unset osds_pgs_weight
unset pools_pg_count
unset pools_pg_weight
unset pools_percent
unset pools_replica
unset manual_weight_list
#!/bin/bash
# This will weight your cluster based on an algorithm of how full each pool is and how large each PG is.
declare -A osds_pgs
declare -A osds_primary
declare -A osds_weight
declare -A osds_increment
declare -A osds_size
declare -A osds_percent
declare -A osds_variance
declare -A osds_pgs_weight
declare -A osds_pgs_weight_per_TB
declare -A pools_pg_count
declare -A pools_pg_weight
declare -A pools_percent
declare -A pools_replica
declare -A manual_weight_list
print_help() {
echo "
usage: weight_cluster.sh [options] [variables]
--cluster Specify cluster (default is ceph)
-o osds Specify OSDs to query (comma delimited list)
-r reweight Reweight the cluster
--offset Balance for pgs in other pools
--passes Specify the maximum amount of times to pass over a map
before giving up. Default is 1500. Setting this to 0
will disable this function.
--osdmap Supply an osdmap to be used instead of getting one
from the cluster.
--crush Supply a crush map to be used instead of getting
one from the osdmap.
--remove Specify servername and/or OSDs to set the weight to
0.00. Servername will expand to all osds on the
server. (comma delimited list)
--manual_weight
Specify any OSDs you want to hardcode a weight for.
The format is osd:weight in a comma delimited list.
"
exit 1
}
cluster=ceph
list_osds=false
reweight=false
calculate=false
threshold=2
increment=0.005
pg_offset=2
custom_osdmap=false
custom_crush=false
do_remove=false
do_manual_weight=false
passes=1500
list_all=false
for i in "$@"
do
case "$i" in
--crush)
variable=crush
custom_crush=true
;;
--osdmap)
variable=osdmap
custom_osdmap=true
;;
--cluster)
variable=cluster
;;
--osds | -o)
variable=osd
list_osds=true
;;
--reweight | -r)
reweight=true
;;
--calculate)
calculate=true
;;
--offset)
variable=offset
;;
--remove)
variable=remove
do_remove=true
;;
--manual_weight)
variable=manual_weight
do_manual_weight=true
;;
--passes)
variable=passes
;;
*)
case "$variable" in
crush)
crush_map=$i
variable=
;;
osdmap)
osd_map=$i
variable=
;;
cluster)
cluster=$i
variable=
;;
osd)
osds=`echo $i | tr ',' ' '`
[ "$osds" == "diff" ] && diff=true || diff=false
variable=
;;
offset)
pg_offset=$i
variable=
;;
remove)
remove_me=`echo $i | tr ',' ' '`
variable=
;;
manual_weight)
list=`echo $i | tr ',' '\n' | tr ':' ' '`
while read osd weight
do
manual_weight_me="$manual_weight_me $osd"
manual_weight_list[$osd]=$weight
done<<<"$list"
variable=
;;
passes)
if [ $i -gt 0 ]
then
passes=$i
else
passes=0
fi
variable=
;;
*)
print_help
;;
esac
esac
done
name="${cluster}-offset_${pg_offset}-`date +%s`"
cm=/tmp/cm.$name
om=/tmp/om.$name
$custom_osdmap && cp $osd_map $om || ceph --cluster $cluster osd getmap -o $om > /dev/null 2>&1
$custom_crush && cp $crush_map $cm || osdmaptool $om --export-crush $cm > /dev/null 2>&1
tree=`crushtool -i $cm --tree 2>&1`
crush_osds=`echo "$tree" | grep 'osd\.' | awk '{print $3}' | cut -d. -f2 | sort -n`
cluster_df=`ceph --cluster $cluster df`
osd_df=`ceph --cluster $cluster osd df`
strs=`echo "$tree" | grep host | awk '{print $4}' | sort`
pools_num=
pools_total_percent=0
pools_total_pg_count=0
output=`echo "$cluster_df" | grep -A999 '^POOLS:$' | grep -Ev '^POOLS:$|^\s+NAME\s+'`
while read pool_name pool_num pool_used pool_percent junk objects
do
pool_get=`ceph --cluster $cluster osd pool get $pool_name all`
echo $pool_get | grep erasure_code_profile > /dev/null && ec=true || ec=false
$ec && ec_mod=`echo "$pool_get" | grep '^min_size: ' | awk '{print $2}'` || ec_mod=1
pools_num="$pools_num $pool_num"
pools_replica[$pool_num]=`echo "$pool_get" | grep -E '^size:\s' | awk '{print $2}'`
pg_count=`osdmaptool $om --import-crush $cm --test-map-pgs --mark-up-in --clear-temp --pool $pool_num 2>/dev/null | grep "^pool $pool_num pg_num " | awk '{print $4}'`
if [[ $pg_count -lt 1 && $(($pg_count & $(( $pg_count - 1 )))) -ne 0 ]]
then
echo "$pool_name has $pg_count PGs. This tool only supports PG counts that are power of 2."
exit 1
fi
pools_pg_count[$pool_num]=$pg_count
let pools_total_pg_count+=${pools_pg_count[$pool_num]}
$ec && pools_percent[$pool_num]=`awk 'BEGIN {print '${pool_percent}' / '${ec_mod}'}'` || pools_percent[$pool_num]=$pool_percent
pools_total_percent=`awk 'BEGIN {print '${pools_total_percent}' + '${pool_percent}'}'`
done <<< "$output"
for pool_num in $pools_num
do
pools_pg_weight[$pool_num]=`awk 'BEGIN {print '${pools_percent[$pool_num]}' / '${pools_total_percent}' / '${pools_pg_count[$pool_num]}' * '${pools_total_pg_count}' }'`
done
# expand servernames to remove to the osds on the server
if $do_remove
then
counts=`echo $(echo "$tree" | grep -Eo '\s+host .+$|\s+osd\.' | uniq -c) | grep -Eo 'host [-[:alnum:]]+ [0-9]+'`
for remove in $remove_me
do
echo "$strs" | grep -E "^$remove$" > /dev/null
if [ $? -eq 0 ]
then
count=`echo "$counts" | grep $remove | awk '{print $3}'`
osds=`echo $(echo "$tree" | grep -A$count $remove | grep -E '\s+osd\.' | awk '{print $1}')`
remove_me=`echo $remove_me | sed "s/^$remove /$osds /g" | sed "s/ $remove$/ $osds/g" | sed "s/^$remove$/$osds/g" | sed "s/ $remove / $osds /g"`
fi
done
remove_me=`echo $remove_me | sed 's/ /\n/g' | sort -nu`
fi
# set specified weights for osds
if $do_remove
then
for osd in $remove_me
do
crushtool -i $cm -o $cm --reweight-item osd.$osd 0.0 > /dev/null
done
fi
if $do_manual_weight
then
manual_weight_me=`echo $manual_weight_me | sed 's/ /\n/g' | sort -nu`
for osd in $manual_weight_me
do
crushtool -i $cm -o $cm --reweight-item osd.$osd ${manual_weight_list[$osd]} > /dev/null
done
fi
# Set statistics for the OSDs
for osd in $crush_osds
do
osds_pgs[$osd]=0
osds_pgs_weight[$osd]=0
osds_primary[$osd]=0
done
for pool_num in $pools_num
do
map=`osdmaptool $om --import-crush $cm --test-map-pgs --mark-up-in --clear-temp --pool $pool_num 2>/dev/null | grep '^osd\.' | awk '{print $1" "$2" "$4" "$5}' | sed 's/^osd.//'`
while read osd pgs primary weight
do
let osds_pgs[$osd]+=$pgs
osds_pgs_weight[$osd]=`awk 'BEGIN {print '${osds_pgs_weight[$osd]}' + ( '${pgs}' * '${pools_pg_weight[$pool_num]}' ) }'`
let osds_primary[$osd]+=$primary
osds_weight[$osd]=$weight
done<<<"$map"
done
# Get disk information
cluster_size=0
disks=`echo "$osd_df" | grep -v 'TOTAL\|STDDEV\|REWEIGHT' | awk '{print $1" "$4" "$7" "$8}'`
#[ `echo "$disks" | awk '{print $2}' | grep -Eo .$ | sort -u | wc -l` != 1 ] && echo "OSD sizes are too different. This is not supported." && exit 1
#output=`echo "$disks" | awk '{print length($2)}' | sort -un`
#max=$(( `echo "$output" | tail -n1` - 1 ))
#min=$(( `echo "$output" | head -n1` - 1 ))
#diff=$(( $max - $min + 1 ))
while read osd size percent variance
do
size=`echo $size | grep -Eo [[:digit:]]+`
#size=`printf "%0${max}d\n" $size`
#size=$(( ${size:0:$diff} + 1 ))
# exclude osds that have pre-determined weights
echo $remove_me $manual_weight_me | grep -E "^$osd$|^$osd\s|\s$osd\s|\s$osd$" > /dev/null || let cluster_size+=$size
osds_size[$osd]=$size
osds_pgs_weight_per_TB[$osd]=`awk 'BEGIN {print '${osds_pgs_weight[$osd]}' / '${size}' * 1024}'`
osds_percent[$osd]=$percent
osds_variance[$osd]=`echo $variance | sed 's/\.//'`
done<<<"$disks"
# Print information if asked for
if $list_osds
then
if [ "$osds" == "all" ]
then
list_all=true
osds=$crush_osds
fi
output=
for osd in $osds
do
output="$output
osd.$osd+${osds_size[$osd]}GB+${osds_percent[$osd]}%+-+PG_Weight: ${osds_pgs_weight[$osd]:-0}+PG_Weight_Per_TB: ${osds_pgs_weight_per_TB[$osd]:-0}+Current: ${osds_pgs[$osd]:-0}+Primary: ${osds_primary[$osd]:-0}"
done
echo "$output" | column -t -s'+'
output=`echo "$output" | column -t -s'+' | awk '{print $8}' | sort -n`
min=`echo "$output" | head -n1`
max=`echo "$output" | tail -n1`
$list_all && echo "PG_Weight Difference per TB: "`awk 'BEGIN {printf "%.3f\n", '${max}' - '${min}'}'`
fi
# Calculate new map
if $reweight
then
# Calculate PG average numbers
pg_weight_per_GB=0
for pool_num in $pools_num
do
pg_per_GB=`awk 'BEGIN {printf '${pools_pg_count[$pool_num]}' * '${pools_replica[$pool_num]}' / '$cluster_size'}'`
pg_weight_per_GB=`awk 'BEGIN {printf '${pg_weight_per_GB}' + ( '${pg_per_GB}' * '${pools_pg_weight[$pool_num]}' ) }'`
done
for osd in $crush_osds
do
[ ! -z ${osds_weight[$osd]} ] && continue
echo $manual_weight_me $remove_me | grep $osd > /dev/null && continue
osds_weight[$osd]=`echo "$disks" | awk '/^'${osd}' / {print $2}' | grep -Eo [0-9]+ | sed 's/^./&./'`
crushtool -i $cm -o $cm --reweight-item osd.$osd ${osds_weight[$osd]} > /dev/null
done
fi
pass=0
while $reweight
do
if [[ $passes != 0 && $passes -eq $pass ]]
then
echo "The map did not complete in $passes passes."
echo "The failed Crush Map file is cm.$name-pass_$pass-incomplete"
mv $cm cm.$name-pass_$pass-incomplete
chown $USER:$USER cm.$name-pass_$pass-incomplete
break
fi
for osd in $crush_osds
do
osds_pgs[$osd]=0
osds_pgs_weight[$osd]=0
osds_primary[$osd]=0
done
for pool_num in $pools_num
do
map=`osdmaptool $om --import-crush $cm --test-map-pgs --mark-up-in --clear-temp --pool $pool_num 2>/dev/null | grep '^osd\.' | awk '{print $1" "$2" "$4" "$5}' | sed 's/^osd.//'`
while read osd pgs primary weight
do
let osds_pgs[$osd]+=$pgs
osds_pgs_weight[$osd]=`awk 'BEGIN {print '${osds_pgs_weight[$osd]}' + ( '${pgs}' * '${pools_pg_weight[$pool_num]}' ) }'`
let osds_primary[$osd]+=$primary
osds_weight[$osd]=$weight
done<<<"$map"
done
up=
down=
for osd in $crush_osds
do
echo "$remove_me
$manual_weight_me" | grep "^$osd$" > /dev/null && continue
pg_avg=`awk 'BEGIN {print ( '${pg_weight_per_GB}' * '${osds_size[$osd]}' ) - '$pg_offset'}'`
high_diff=`awk 'BEGIN {printf "%.0f\n", '${osds_pgs_weight[$osd]:-0}' - '${pg_avg}'}'`
low_diff=`awk 'BEGIN {printf "%.0f\n", '${pg_avg}' - '${osds_pgs_weight[$osd]:-0}'}'`
if [ $high_diff -ge $threshold ]
then
down="$down $osd"
multiplier=`awk 'BEGIN {printf "%.0f\n", '${high_diff}' / '${threshold}'}'`
if [ $multiplier -ge 2 ]
then
osds_increment[$osd]=`awk 'BEGIN {print '${increment}' * '${multiplier}'}'`
else
osds_increment[$osd]="$increment"
fi
elif [ $low_diff -ge $threshold ]
then
up="$up $osd"
multiplier=`awk 'BEGIN {printf "%.0f\n", '${low_diff}' / '${threshold}'}'`
if [ $multiplier -ge 2 ]
then
osds_increment[$osd]=`awk 'BEGIN {print '${increment}' * '${multiplier}'}'`
else
osds_increment[$osd]="$increment"
fi
fi
done
up_total=`echo $up | wc -w`
down_total=`echo $down | wc -w`
if [[ $up_total -eq 0 && $down_total -eq 0 ]]
then
mv $cm cm.$name-pass_$pass
chown $USER:$USER cm.$name-pass_$pass
echo "All done weighting. The Crush Map file is cm.$name-pass_$pass"
break
fi
echo "Calculating $up_total up; $down_total down..."
if ! $calculate
then
echo
echo "Use --calculate to calculate the new weights for these OSDs"
break
fi
let pass+=1
echo "Pass #$pass"
# Reweighting maths
# Go up by increment
for osd in $up
do
new_weight=`awk 'BEGIN {printf "%.3f\n", '${osds_weight[$osd]:-0}' + '${osds_increment[$osd]}'}'`
crushtool -i $cm -o $cm --reweight-item osd.$osd $new_weight > /dev/null
done
# Go down by increment
for osd in $down
do
new_weight=`awk 'BEGIN {printf "%.3f\n", '${osds_weight[$osd]:-0}' - '${osds_increment[$osd]}'}'`
crushtool -i $cm -o $cm --reweight-item osd.$osd $new_weight > /dev/null
done
done
unset osds_pgs
unset osds_primary
unset osds_weight
unset osds_increment
unset osds_size
unset osds_percent
unset osds_variance
unset osds_pgs_weight
unset pools_pg_count
unset pools_pg_weight
unset pools_percent
unset pools_replica
unset manual_weight_list
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment