spotlesscoder · May 21, 2022 07:17
diff --git a/ceph-commands.txt b/ceph-commands.txt
 ceph -w
 ceph health detail
 ceph osd df
 ceph osd find
 ceph osd blocked-by
 ceph osd pool ls detail
 ceph osd pool get rbd all

 ceph pg dump | grep pgid
 ceph pg pgid
 ceph osd primary-affinity 3 1.0
 ceph osd map rbd obj


 #Enable/Disable osd
 ceph osd out 0
 ceph osd in 0

 #PG repair
 ceph osd map rbd file
 ceph pg 0.1a query
 ceph pg 0.1a 
 ceph pg scrub 0.1a #Checks file exists on OSDs
 ceph pg deep-scrub 0.1a #Checks file integrity on OSDs
 ceph pg repair 0.1a #Fix problems


 #Delete osd
 ceph osd tree
 ceph osd out osd.1
 sudo systemctl stop [email protected]
 ceph osd crush remove osd.1
 reph auth del osd.1
 ceph osd rm osd.1


 osd primary-affinity
 ceph pg pgid mark_unfound_lost revert|delete
 ceph osd liost osdid --yes-i-really-mean-it

 #Ceph control
 ceph daemon osd.1 config get osd_scrub_min-interval

 ceph --admin-daemon socket-file-path command
 /var/run/ceph/$cluster-$type.$id.asok
 perf dump
 config show
 dump_historic_ops
 scrub_path

 #Version check
 ceph tell mon.* version
 ceph tell osd.* version



 #updates
 Update one mon at the time
 /usr/share/ceph-ansible/infrastructure-playbooks/rolling_updates.yaml


 #Ceph cluster flags
 ceph osd set

 noup    #Do not auto mark osd as up state
 nopdown #Do not auto mark OSD as down state
 noout   # Do not remove any osds from crush map. Used when performinig maintenance os parts of the cluster. Prevents crush from auto reblancing the cluster when OSDs are stopped.
 noin    # Mons will mark running OSDs with the in state. prevents data from beeing auto allocated to that specific OSD
 norecover # Prevents any recovery operations. Used when performing maintenance or a cluster shutdown
 nobackfill # Prevents any backfill operation. Used when performing maintenance of a cluster. 
 noscrub # No scrubbing operations. Scrubbing has performance impact on a PG on the OSD. If a OSD is too slow it will be marked as down.
 nodeep-scrub 
 norebalance # Prevents rebalancing from runniung



 #Crush map
 ceph osd getcrushmap -o map.bin
 crushtool -d map.bin -o map.txt
 crushtool -i map.bin --test --show-mapping --rule=5 --num-rep 3
 crushtool -c map.txt -o map.bin
 ceph osd setcrushmap -i map.bin

 ceph osd crush class ls
 ceph osd crush tree
 ceph osd crush rule ls

 ceph osd pool create fast_ssd 32 32 onssd
 ceph pg dump pgs_brief
 ceph pg dump pgs_brief | grep ^46 #Pool ID
 ceph osd lspools
 ceph df


 #Buckets
 ceph osd crush add-bucket default-pool root
 ceph osd crush add-bucket rack1 rack
 ceph osd crush add-bucket rack2 rack
 ceph osd crush add-bucket hosta host
 ceph osd crush add-bucket hostb host
 ceph osd crush move rack1 root=default-pool
 ceph osd crush move rack2 root=default-pool
 ceph osd crush move hosta rack=rack1
 ceph osd crush move hostb rack=rack2

 ceph osd crush tree

 #OSD tools

 ceph osd set-full-ratio 0.97
 ceph osd set-nearfull-ratio 0.9
 ceph osd dump

 ceph osd getmap -o ./map.bin
 osdmaptool --print ./map.bin
 osdmaptool --export-crush ./crush.bin ./map.bin
 crushtool -d ./crush.bin -o ./crush.txt 
 crushtool -c ./crush.txt -o crushnew.bin
 osdmaptool --import-crush ./crushnew.bin ./map.bin
 osdmaptool --test-map-pgs-dump ./map.bin

 #OSD files
 /var/lib/ceph/osd/ceph-1/current/0.1a_head/

 #Moving an OSD journal to an SSD
 ceph osd set noout
 systemctl stop [email protected]
 ceph-osd -i 3 --flush-journal
 rm -rf /var/lib/ceph/osd/ceph-3/journal
 ln -s /dev/sdc1 /var/lib/ceph/osd/ceph-3/journal
 ceph-osd -i 3 --mkjournal
 systemctl start [email protected]
 ceph osd unset noout

 #Placement gorup calc

 Total palcement grous = (OSDs * 100) / Number of replicas
 Start between 100 to 200 - Never go above 300+

 ceph osd pool get rbd pg_num #Total number of pgs in the pool 
 ceph osd pool get rbd pgp_num #Total number of of pgs used for hasing in the pool

 #Rados
 rados -p rbd put file /etc/ceph/ceph.conf

 #Performance
 iostat -x

 #OS Tuning
 systcl net.ipv4.tcp_mem
 systcl net.ipv4.tcp_rmem
 systcl net.ipv4.tcp_wmem
 vm.dirty_background_ratio
 vm.dirty_ratio
 vm.dirty_background_bytes
 vm.dirty_bytes
 vm.zone_reclaim_node
 vm.swappiness
 vm.min_free_kbytes

 tuned-adm list
 tuned-adm active
 tuned-adm profile network-latency
 tuned-adm profile network-throughput

 Enable jumboframes

 cat /sys/block/device/queue/scheduler
 noop deadline [cfq]
 sudo sh -c "echo deadline" > /sys/block/sdb/queue/scheduler"
 Schedulers: 
  noop      # noop elevator does nothing. Turns a disk queue in to a FIFO. Select when back-end storage device can also reorder and merge request. Default inside VMs. Useful for devices suchs as SSDs that respond more quickly to request than they are likely to arrive.
  deadline  # groups queued I/O requests together into a read/write batches. Tries to provide a guaranteed latency for requests and prioritizes read requests over writes. Ceph choice for SATA and SAS drives.
  cfq       # Completely Fair Queueing. Multiple I/O classes and priorities to admin can prioritize process ovetr others when it comes to disk access. Handled by ionice command. 
  blk-mq    # Designed for spinning drives. Designed to handle storage with latenfcies of microseconds and missions of IOPS and large internal paralellism. 

 osd_mkfs_options_xfs -f -i  size=2048
 use noatime,largeio,inode64,swalloc

 #Design scaling
 No more than six OSD journals per SATA SSD device.
 No more than 12 OSD journals per NVMe device.

 Warning: When an SSD or NVMe device used ot a host joiurnal fails, every OSD using it to host its journal also becomes unavailable.
	ceph -w
	ceph health detail
	ceph osd df
	ceph osd find
	ceph osd blocked-by
	ceph osd pool ls detail
	ceph osd pool get rbd all

	ceph pg dump \| grep pgid
	ceph pg pgid
	ceph osd primary-affinity 3 1.0
	ceph osd map rbd obj


	#Enable/Disable osd
	ceph osd out 0
	ceph osd in 0

	#PG repair
	ceph osd map rbd file
	ceph pg 0.1a query
	ceph pg 0.1a
	ceph pg scrub 0.1a #Checks file exists on OSDs
	ceph pg deep-scrub 0.1a #Checks file integrity on OSDs
	ceph pg repair 0.1a #Fix problems


	#Delete osd
	ceph osd tree
	ceph osd out osd.1
	sudo systemctl stop [email protected]
	ceph osd crush remove osd.1
	reph auth del osd.1
	ceph osd rm osd.1


	osd primary-affinity
	ceph pg pgid mark_unfound_lost revert\|delete
	ceph osd liost osdid --yes-i-really-mean-it

	#Ceph control
	ceph daemon osd.1 config get osd_scrub_min-interval

	ceph --admin-daemon socket-file-path command
	/var/run/ceph/$cluster-$type.$id.asok
	perf dump
	config show
	dump_historic_ops
	scrub_path

	#Version check
	ceph tell mon.* version
	ceph tell osd.* version



	#updates
	Update one mon at the time
	/usr/share/ceph-ansible/infrastructure-playbooks/rolling_updates.yaml


	#Ceph cluster flags
	ceph osd set

	noup #Do not auto mark osd as up state
	nopdown #Do not auto mark OSD as down state
	noout # Do not remove any osds from crush map. Used when performinig maintenance os parts of the cluster. Prevents crush from auto reblancing the cluster when OSDs are stopped.
	noin # Mons will mark running OSDs with the in state. prevents data from beeing auto allocated to that specific OSD
	norecover # Prevents any recovery operations. Used when performing maintenance or a cluster shutdown
	nobackfill # Prevents any backfill operation. Used when performing maintenance of a cluster.
	noscrub # No scrubbing operations. Scrubbing has performance impact on a PG on the OSD. If a OSD is too slow it will be marked as down.
	nodeep-scrub
	norebalance # Prevents rebalancing from runniung



	#Crush map
	ceph osd getcrushmap -o map.bin
	crushtool -d map.bin -o map.txt
	crushtool -i map.bin --test --show-mapping --rule=5 --num-rep 3
	crushtool -c map.txt -o map.bin
	ceph osd setcrushmap -i map.bin

	ceph osd crush class ls
	ceph osd crush tree
	ceph osd crush rule ls

	ceph osd pool create fast_ssd 32 32 onssd
	ceph pg dump pgs_brief
	ceph pg dump pgs_brief \| grep ^46 #Pool ID
	ceph osd lspools
	ceph df


	#Buckets
	ceph osd crush add-bucket default-pool root
	ceph osd crush add-bucket rack1 rack
	ceph osd crush add-bucket rack2 rack
	ceph osd crush add-bucket hosta host
	ceph osd crush add-bucket hostb host
	ceph osd crush move rack1 root=default-pool
	ceph osd crush move rack2 root=default-pool
	ceph osd crush move hosta rack=rack1
	ceph osd crush move hostb rack=rack2

	ceph osd crush tree

	#OSD tools

	ceph osd set-full-ratio 0.97
	ceph osd set-nearfull-ratio 0.9
	ceph osd dump

	ceph osd getmap -o ./map.bin
	osdmaptool --print ./map.bin
	osdmaptool --export-crush ./crush.bin ./map.bin
	crushtool -d ./crush.bin -o ./crush.txt
	crushtool -c ./crush.txt -o crushnew.bin
	osdmaptool --import-crush ./crushnew.bin ./map.bin
	osdmaptool --test-map-pgs-dump ./map.bin

	#OSD files
	/var/lib/ceph/osd/ceph-1/current/0.1a_head/

	#Moving an OSD journal to an SSD
	ceph osd set noout
	systemctl stop [email protected]
	ceph-osd -i 3 --flush-journal
	rm -rf /var/lib/ceph/osd/ceph-3/journal
	ln -s /dev/sdc1 /var/lib/ceph/osd/ceph-3/journal
	ceph-osd -i 3 --mkjournal
	systemctl start [email protected]
	ceph osd unset noout

	#Placement gorup calc

	Total palcement grous = (OSDs * 100) / Number of replicas
	Start between 100 to 200 - Never go above 300+

	ceph osd pool get rbd pg_num #Total number of pgs in the pool
	ceph osd pool get rbd pgp_num #Total number of of pgs used for hasing in the pool

	#Rados
	rados -p rbd put file /etc/ceph/ceph.conf

	#Performance
	iostat -x

	#OS Tuning
	systcl net.ipv4.tcp_mem
	systcl net.ipv4.tcp_rmem
	systcl net.ipv4.tcp_wmem
	vm.dirty_background_ratio
	vm.dirty_ratio
	vm.dirty_background_bytes
	vm.dirty_bytes
	vm.zone_reclaim_node
	vm.swappiness
	vm.min_free_kbytes

	tuned-adm list
	tuned-adm active
	tuned-adm profile network-latency
	tuned-adm profile network-throughput

	Enable jumboframes

	cat /sys/block/device/queue/scheduler
	noop deadline [cfq]
	sudo sh -c "echo deadline" > /sys/block/sdb/queue/scheduler"
	Schedulers:
	noop # noop elevator does nothing. Turns a disk queue in to a FIFO. Select when back-end storage device can also reorder and merge request. Default inside VMs. Useful for devices suchs as SSDs that respond more quickly to request than they are likely to arrive.
	deadline # groups queued I/O requests together into a read/write batches. Tries to provide a guaranteed latency for requests and prioritizes read requests over writes. Ceph choice for SATA and SAS drives.
	cfq # Completely Fair Queueing. Multiple I/O classes and priorities to admin can prioritize process ovetr others when it comes to disk access. Handled by ionice command.
	blk-mq # Designed for spinning drives. Designed to handle storage with latenfcies of microseconds and missions of IOPS and large internal paralellism.

	osd_mkfs_options_xfs -f -i size=2048
	use noatime,largeio,inode64,swalloc

	#Design scaling
	No more than six OSD journals per SATA SSD device.
	No more than 12 OSD journals per NVMe device.

	Warning: When an SSD or NVMe device used ot a host joiurnal fails, every OSD using it to host its journal also becomes unavailable.