chamecall · March 25, 2025 13:19
diff --git a/TopBiasedRandomCrop.py b/TopBiasedRandomCrop.py
 import numpy as np
 import cv2
 from albumentations.core.transforms_interface import DualTransform

 class TopBiasedRandomCrop(DualTransform):
    """
    Randomly crops an image so that the cropped region's width is at least min_width and its height is at least min_height 
    of the original. The vertical (y) offset is biased by sampling from a Beta distribution with parameters beta_alpha and beta_beta.
    
    This transform also adjusts bounding boxes in [x_min, y_min, x_max, y_max] format.
    
    Args:
        min_width (float): Minimum relative width of the crop (0 < min_width <= 1).
        min_height (float): Minimum relative height of the crop (0 < min_height <= 1).
        beta_alpha (float): Alpha parameter for the Beta distribution (vertical bias).
        beta_beta (float): Beta parameter for the Beta distribution.
        p (float): Probability of applying the transform.
    """
    def __init__(self, min_width=0.7, min_height=0.5, beta_alpha=1.0, beta_beta=2.0, p=1.0):
        super(TopBiasedRandomCrop, self).__init__(p)
        if not (0 < min_width <= 1):
            raise ValueError("min_width must be in the interval (0, 1].")
        if not (0 < min_height <= 1):
            raise ValueError("min_height must be in the interval (0, 1].")
        self.min_width = min_width
        self.min_height = min_height
        self.beta_alpha = beta_alpha
        self.beta_beta = beta_beta

    def get_params_dependent_on_data(self, params, data):
        return self.get_params_dependent_on_targets({"image": data["image"]})

    def get_params_dependent_on_targets(self, params) -> dict:
        img = params["image"]
        height, width = img.shape[:2]

        # Determine crop dimensions.
        crop_width = int(np.random.uniform(self.min_width, 1.0) * width)
        crop_height = int(np.random.uniform(self.min_height, 1.0) * height)
        crop_width = min(crop_width, width)
        crop_height = min(crop_height, height)

        # Maximum possible offsets.
        x_max = width - crop_width
        y_max = height - crop_height

        x1 = np.random.randint(0, x_max + 1) if x_max > 0 else 0
        y_sample = np.random.beta(self.beta_alpha, self.beta_beta)
        y1 = int(y_sample * y_max) if y_max > 0 else 0

        crop_params = [x1, y1, x1 + crop_width, y1 + crop_height]
        # Return crop_params plus update new shape info (so that bbox filtering uses the cropped dimensions).
        return {"crop_params": crop_params, "rows": crop_height, "cols": crop_width}

    def apply(self, img, **params):
        crop_params = params.get("crop_params")
        if crop_params is None:
            return img
        x1, y1, x2, y2 = crop_params
        cropped = img[y1:y2, x1:x2]
        return cropped

    def apply_to_bbox(self, bbox, **params):
        crop_params = params.get("crop_params")
        if crop_params is None:
            return bbox
        x1, y1, x2, y2 = crop_params
        new_bbox = [
            np.clip(bbox[0] - x1, 0, x2 - x1),
            np.clip(bbox[1] - y1, 0, y2 - y1),
            np.clip(bbox[2] - x1, 0, x2 - x1),
            np.clip(bbox[3] - y1, 0, y2 - y1)
        ]
        if len(bbox) > 4:
            new_bbox.extend(bbox[4:])
        return new_bbox

    def apply_to_bboxes(self, bboxes, **params):
        transformed = [self.apply_to_bbox(bbox, **params) for bbox in bboxes]
        # Convert to NumPy array so that further processing (e.g., filtering) works.
        return np.array(transformed, dtype=np.float32)

    def get_transform_init_args_names(self):
        return ("min_width", "min_height", "beta_alpha", "beta_beta")
	import numpy as np
	import cv2
	from albumentations.core.transforms_interface import DualTransform

	class TopBiasedRandomCrop(DualTransform):
	"""
	Randomly crops an image so that the cropped region's width is at least min_width and its height is at least min_height
	of the original. The vertical (y) offset is biased by sampling from a Beta distribution with parameters beta_alpha and beta_beta.

	This transform also adjusts bounding boxes in [x_min, y_min, x_max, y_max] format.

	Args:
	min_width (float): Minimum relative width of the crop (0 < min_width <= 1).
	min_height (float): Minimum relative height of the crop (0 < min_height <= 1).
	beta_alpha (float): Alpha parameter for the Beta distribution (vertical bias).
	beta_beta (float): Beta parameter for the Beta distribution.
	p (float): Probability of applying the transform.
	"""
	def __init__(self, min_width=0.7, min_height=0.5, beta_alpha=1.0, beta_beta=2.0, p=1.0):
	super(TopBiasedRandomCrop, self).__init__(p)
	if not (0 < min_width <= 1):
	raise ValueError("min_width must be in the interval (0, 1].")
	if not (0 < min_height <= 1):
	raise ValueError("min_height must be in the interval (0, 1].")
	self.min_width = min_width
	self.min_height = min_height
	self.beta_alpha = beta_alpha
	self.beta_beta = beta_beta

	def get_params_dependent_on_data(self, params, data):
	return self.get_params_dependent_on_targets({"image": data["image"]})

	def get_params_dependent_on_targets(self, params) -> dict:
	img = params["image"]
	height, width = img.shape[:2]

	# Determine crop dimensions.
	crop_width = int(np.random.uniform(self.min_width, 1.0) * width)
	crop_height = int(np.random.uniform(self.min_height, 1.0) * height)
	crop_width = min(crop_width, width)
	crop_height = min(crop_height, height)

	# Maximum possible offsets.
	x_max = width - crop_width
	y_max = height - crop_height

	x1 = np.random.randint(0, x_max + 1) if x_max > 0 else 0
	y_sample = np.random.beta(self.beta_alpha, self.beta_beta)
	y1 = int(y_sample * y_max) if y_max > 0 else 0

	crop_params = [x1, y1, x1 + crop_width, y1 + crop_height]
	# Return crop_params plus update new shape info (so that bbox filtering uses the cropped dimensions).
	return {"crop_params": crop_params, "rows": crop_height, "cols": crop_width}

	def apply(self, img, **params):
	crop_params = params.get("crop_params")
	if crop_params is None:
	return img
	x1, y1, x2, y2 = crop_params
	cropped = img[y1:y2, x1:x2]
	return cropped

	def apply_to_bbox(self, bbox, **params):
	crop_params = params.get("crop_params")
	if crop_params is None:
	return bbox
	x1, y1, x2, y2 = crop_params
	new_bbox = [
	np.clip(bbox[0] - x1, 0, x2 - x1),
	np.clip(bbox[1] - y1, 0, y2 - y1),
	np.clip(bbox[2] - x1, 0, x2 - x1),
	np.clip(bbox[3] - y1, 0, y2 - y1)
	]
	if len(bbox) > 4:
	new_bbox.extend(bbox[4:])
	return new_bbox

	def apply_to_bboxes(self, bboxes, **params):
	transformed = [self.apply_to_bbox(bbox, **params) for bbox in bboxes]
	# Convert to NumPy array so that further processing (e.g., filtering) works.
	return np.array(transformed, dtype=np.float32)

	def get_transform_init_args_names(self):
	return ("min_width", "min_height", "beta_alpha", "beta_beta")