Last active
March 24, 2023 15:01
-
-
Save umbertogriffo/fe1bce24f8e9ee68c75f to your computer and use it in GitHub Desktop.
This code takes a snapshot of all HBase tables, using the snapshot command (No file copies are performed). Tested on CDH-5.4.4-1
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Checking if the hbase.snapshot.enabled property in hbase-site.xml is set to true | |
# To execute script launch this command on shell: hbase shell HBaseBackup.rb | |
@clusterToSave = "hdfs:///srv2:8082/hbase" | |
# CHECK THE PATH OF HBase lib | |
@libjars = `ls /opt/cloudera/parcels/CDH-5.4.4-1.cdh5.4.4.p0.4/lib/hbase/*.jar | tr "\n" ","` | |
@ignore = [ /zipkin\..*/i, /.*_temp/i, /.*tmp/i, /test_.*/i, /.*_test/i, /.*_old/i ] | |
@mappers = "2" | |
include Java | |
java_import org.apache.hadoop.hbase.HBaseConfiguration | |
java_import org.apache.hadoop.hbase.client.HBaseAdmin | |
java_import org.apache.hadoop.hbase.snapshot.ExportSnapshot | |
java_import org.apache.hadoop.util.ToolRunner | |
@conf = org.apache.hadoop.hbase.HBaseConfiguration.create | |
@admin = org.apache.hadoop.hbase.client.HBaseAdmin.new(@conf) | |
def cleanup() | |
old_snapshots = @admin.listSnapshots(".*-BACKUP-.*").to_a | |
old_snapshots.each { |s| | |
puts "-----> Cleaning up old snapshot " + s.name | |
@admin.deleteSnapshot(s.name.to_java_bytes) | |
} | |
end | |
def tables_to_backup() | |
to_backup = Array.new | |
tables = @admin.listTables(".*").to_a | |
tables.each { |s| | |
tn = s.getTableName().getNameAsString().to_s | |
backup = true | |
@ignore.each { |i| | |
if i.match(tn) | |
backup = false | |
end | |
} | |
if backup | |
to_backup << tn | |
end | |
} | |
to_backup | |
end | |
def backup(table) | |
snapshot_name = table + "-BACKUP-" + Time.now.strftime("%Y%m%d_%H%M%S") | |
puts "-----> Creating snapshot " + snapshot_name | |
@admin.snapshot(snapshot_name.to_java_bytes, table.to_java_bytes) | |
## | |
# The ExportSnapshot tool copies all the data related to a snapshot (hfiles, logs, snapshot metadata) to another cluster. | |
# The tool executes a Map-Reduce job, similar to distcp, to copy files between the two clusters, | |
# and since it works at file-system level the hbase cluster does not have to be online. | |
#puts "-----> Export snapshot " + snapshot_name | |
#@es = org.apache.hadoop.hbase.snapshot.ExportSnapshot.new | |
#args = ["--libjars", @libjars, "-snapshot", snapshot_name, "-copy-to", @clusterToSave, "-mappers", @mappers] | |
#java_args = args.to_java :String | |
#ToolRunner.run(@conf, @es, java_args) | |
#puts "-----> Deleting snapshot " + snapshot_name | |
#@admin.deleteSnapshot(snapshot_name.to_java_bytes) | |
end | |
cleanup() | |
tables = tables_to_backup() | |
puts "-----> Tables to backup" | |
puts tables | |
puts "-----> Starting backup" | |
tables.each { |t| | |
backup(t) | |
} | |
puts "-----> Done" | |
exit 0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment