ndimiduk · February 9, 2023 08:47 · eangelou · Jun 17, 2020 · pandeysa · Feb 3, 2023
diff --git a/drop_empty_regions.rb b/drop_empty_regions.rb
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #

 # Drop all empty regions for a table. Dropping a region is implemented as a merge into
 # an adjacent region.
 #
 #    $ hbase org.jruby.Main drop_empty_regions.rb mytable

 include Java

 import java.lang.Exception
 import java.lang.RuntimeException

 import org.apache.hadoop.hbase.HBaseConfiguration
 import org.apache.hadoop.hbase.TableName
 import org.apache.hadoop.hbase.client.Admin
 import org.apache.hadoop.hbase.client.ConnectionFactory
 import org.apache.hadoop.hbase.ServerName
 import org.apache.hadoop.hbase.HRegionInfo
 import org.apache.hadoop.hbase.util.Bytes
 import org.apache.commons.logging.LogFactory

 NAME = "drop_empty_regions.rb"

 # Create a logger and disable the DEBUG-level annoying client logging
 def configureLogging()
  # Configure log4j to not spew so much
  logger = org.apache.log4j.Logger.getLogger("org.apache.hadoop.hbase.client")
  logger.setLevel(org.apache.log4j.Level::INFO)
  logger = org.apache.log4j.Logger.getLogger("org.apache.zookeeper")
  logger.setLevel(org.apache.log4j.Level::WARN)
  return LogFactory.getLog(NAME)
 end

 # Get configuration instance
 def getConfiguration()
  config = HBaseConfiguration.create()
  # No prefetching on hbase:meta This is for versions pre 0.99. Newer versions do not prefetch.
  config.setInt("hbase.client.prefetch.limit", 1)
  # Make a config that retries at short intervals many times
  config.setInt("hbase.client.pause", 500)
  config.setInt("hbase.client.retries.number", 100)
  return config
 end

 # find my HRegionInfo
 def getTargetInfo(infos, emptyRegion)
  infos.each do |region|
    return region if region.getRegionNameAsString() == emptyRegion
  end
  raise RuntimeException.new("Unable to locate region " + emptyRegion)
 end

 def getAdjacent(infos, targetRegionInfo)
  infos.each do |region|
    return region if HRegionInfo.areAdjacent(region, targetRegionInfo)
  end
  raise RuntimeException.new("No adjacent region found for " + targetRegionInfo)
 end

 if ARGV.length != 1
  puts "usage: drop_region.rb <tableName> <regionsFile>"
  exit 1
 end

 $TABLE_NAME_STRING = ARGV[0]
 $TABLE_NAME = TableName.valueOf(ARGV[0])

 # Create a logger and save it to ruby global
 $LOG = configureLogging()

 conn = nil
 admin = nil
 config = getConfiguration()
 conn = ConnectionFactory.createConnection(config)
 admin = conn.getAdmin()

 regionArray = []
 clusterStatus = admin.getClusterStatus()

 # Fetch all regions for this table that have StorageFileSizeMB == 0
 clusterStatus.getServers().each do |serverName|
  serverLoad = clusterStatus.getLoad(serverName)
  serverLoad.getRegionsLoad().entrySet().each do |entry|
    region = entry.getValue().getName()
    storeFileSize = entry.getValue().getStorefileSizeMB()
    regionArray.push(Bytes.toStringBinary(region)) if storeFileSize == 0 and entry.getValue().getNameAsString().start_with?($TABLE_NAME_STRING + ",")
  end
 end

 regionArray.each do |emptyRegion|
  begin
    tableRegions = admin.getTableRegions($TABLE_NAME)
    targetInfo = getTargetInfo(tableRegions, emptyRegion)
    adjacentInfo = getAdjacent(tableRegions, targetInfo)
    $LOG.info("'dropping' " + emptyRegion.to_s + " by merging into " + adjacentInfo.to_s)
    admin.mergeRegions(targetInfo.getEncodedNameAsBytes, adjacentInfo.getEncodedNameAsBytes(), false)
    sleep(1)
  rescue RuntimeError, Exception => e
    $LOG.info("Encountered unhandled exception.", e)
  end
 end
 admin.close() unless admin.nil?
 conn.close() unless conn.nil?
	#
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	#

	# Drop all empty regions for a table. Dropping a region is implemented as a merge into
	# an adjacent region.
	#
	# $ hbase org.jruby.Main drop_empty_regions.rb mytable

	include Java

	import java.lang.Exception
	import java.lang.RuntimeException

	import org.apache.hadoop.hbase.HBaseConfiguration
	import org.apache.hadoop.hbase.TableName
	import org.apache.hadoop.hbase.client.Admin
	import org.apache.hadoop.hbase.client.ConnectionFactory
	import org.apache.hadoop.hbase.ServerName
	import org.apache.hadoop.hbase.HRegionInfo
	import org.apache.hadoop.hbase.util.Bytes
	import org.apache.commons.logging.LogFactory

	NAME = "drop_empty_regions.rb"

	# Create a logger and disable the DEBUG-level annoying client logging
	def configureLogging()
	# Configure log4j to not spew so much
	logger = org.apache.log4j.Logger.getLogger("org.apache.hadoop.hbase.client")
	logger.setLevel(org.apache.log4j.Level::INFO)
	logger = org.apache.log4j.Logger.getLogger("org.apache.zookeeper")
	logger.setLevel(org.apache.log4j.Level::WARN)
	return LogFactory.getLog(NAME)
	end

	# Get configuration instance
	def getConfiguration()
	config = HBaseConfiguration.create()
	# No prefetching on hbase:meta This is for versions pre 0.99. Newer versions do not prefetch.
	config.setInt("hbase.client.prefetch.limit", 1)
	# Make a config that retries at short intervals many times
	config.setInt("hbase.client.pause", 500)
	config.setInt("hbase.client.retries.number", 100)
	return config
	end

	# find my HRegionInfo
	def getTargetInfo(infos, emptyRegion)
	infos.each do \|region\|
	return region if region.getRegionNameAsString() == emptyRegion
	end
	raise RuntimeException.new("Unable to locate region " + emptyRegion)
	end

	def getAdjacent(infos, targetRegionInfo)
	infos.each do \|region\|
	return region if HRegionInfo.areAdjacent(region, targetRegionInfo)
	end
	raise RuntimeException.new("No adjacent region found for " + targetRegionInfo)
	end

	if ARGV.length != 1
	puts "usage: drop_region.rb <tableName> <regionsFile>"
	exit 1
	end

	$TABLE_NAME_STRING = ARGV[0]
	$TABLE_NAME = TableName.valueOf(ARGV[0])

	# Create a logger and save it to ruby global
	$LOG = configureLogging()

	conn = nil
	admin = nil
	config = getConfiguration()
	conn = ConnectionFactory.createConnection(config)
	admin = conn.getAdmin()

	regionArray = []
	clusterStatus = admin.getClusterStatus()

	# Fetch all regions for this table that have StorageFileSizeMB == 0
	clusterStatus.getServers().each do \|serverName\|
	serverLoad = clusterStatus.getLoad(serverName)
	serverLoad.getRegionsLoad().entrySet().each do \|entry\|
	region = entry.getValue().getName()
	storeFileSize = entry.getValue().getStorefileSizeMB()
	regionArray.push(Bytes.toStringBinary(region)) if storeFileSize == 0 and entry.getValue().getNameAsString().start_with?($TABLE_NAME_STRING + ",")
	end
	end

	regionArray.each do \|emptyRegion\|
	begin
	tableRegions = admin.getTableRegions($TABLE_NAME)
	targetInfo = getTargetInfo(tableRegions, emptyRegion)
	adjacentInfo = getAdjacent(tableRegions, targetInfo)
	$LOG.info("'dropping' " + emptyRegion.to_s + " by merging into " + adjacentInfo.to_s)
	admin.mergeRegions(targetInfo.getEncodedNameAsBytes, adjacentInfo.getEncodedNameAsBytes(), false)
	sleep(1)
	rescue RuntimeError, Exception => e
	$LOG.info("Encountered unhandled exception.", e)
	end
	end
	admin.close() unless admin.nil?
	conn.close() unless conn.nil?