jussker · December 15, 2023 06:28
diff --git a/GPT4VImageTokenCalculator.py b/GPT4VImageTokenCalculator.py
 import math
 from typing import Dict, Tuple

 class GPT4VImageTokenCalculator:
    def __init__(self, low_resolution: bool = False) -> None:
        self.low_resolution = low_resolution
        self.base_token_cost = 85
        self.additional_token_cost = 170

    def calculate_resize_dimensions(self, width: int, height: int) -> Tuple[int, int, int, int]:
        initial_width = min(width, 2048)
        initial_height = min(height, 2048)
        if width > 2048 or height > 2048:
            if width > height:
                initial_height = round(2048 * (height / width))
            else:
                initial_width = round(2048 * (width / height))
        
        further_width = min(initial_width, 768)
        further_height = min(initial_height, 768)
        if initial_width > 768 or initial_height > 768:
            if initial_width < initial_height:
                further_width = min(768, initial_width)
                further_height = round(further_width * (initial_height / initial_width))
            else:
                further_height = min(768, initial_height)
                further_width = round(further_height * (initial_width / initial_height))

        return initial_width, initial_height, further_width, further_height

    def calculate_tiles(self, width: int, height: int) -> Tuple[int, int]:
        _, _, further_width, further_height = self.calculate_resize_dimensions(width, height)
        vertical_tiles = 1 + math.ceil((further_height - 512) / 512)
        horizontal_tiles = 1 + math.ceil((further_width - 512) / 512)
        return vertical_tiles, horizontal_tiles

    @staticmethod
    def num_tokens_from_image(width: int, height: int, low_resolution: bool=False) -> Dict[str, int]:
        calculator = GPT4VImageTokenCalculator(low_resolution=low_resolution)
        initial_width, initial_height, further_width, further_height = calculator.calculate_resize_dimensions(width, height)
        vertical_tiles, horizontal_tiles = calculator.calculate_tiles(width, height)
        total_tiles = vertical_tiles * horizontal_tiles
        total_tokens = calculator.base_token_cost if calculator.low_resolution else calculator.base_token_cost + total_tiles * calculator.additional_token_cost

        if calculator.low_resolution:
            return {
                "low_resolution": calculator.low_resolution,
                "initial_resize_width": None,
                "initial_resize_height": None,
                "further_resize_width": None,
                "further_resize_height": None,
                "vertical_tiles": None,
                "horizontal_tiles": None,
                "total_tiles": None,
                "total_tokens": total_tokens
            }
        else:
            return {
                "low_resolution": calculator.low_resolution,
                "initial_resize_width": initial_width,
                "initial_resize_height": initial_height,
                "further_resize_width": further_width,
                "further_resize_height": further_height,
                "vertical_tiles": vertical_tiles,
                "horizontal_tiles": horizontal_tiles,
                "total_tiles": total_tiles,
                "total_tokens": total_tokens
            }

 num_tokens_from_image = GPT4VImageTokenCalculator.num_tokens_from_image

 # Usage:

 # high resolution
 num_tokens = num_tokens_from_image(width=2048, height=768)
 print(f'high resolution:{num_tokens}')

 # low resolution
 num_tokens = num_tokens_from_image(2048, 768, low_resolution=True)
 print(f'low resolution:{num_tokens}')
	import math
	from typing import Dict, Tuple

	class GPT4VImageTokenCalculator:
	def __init__(self, low_resolution: bool = False) -> None:
	self.low_resolution = low_resolution
	self.base_token_cost = 85
	self.additional_token_cost = 170

	def calculate_resize_dimensions(self, width: int, height: int) -> Tuple[int, int, int, int]:
	initial_width = min(width, 2048)
	initial_height = min(height, 2048)
	if width > 2048 or height > 2048:
	if width > height:
	initial_height = round(2048 * (height / width))
	else:
	initial_width = round(2048 * (width / height))

	further_width = min(initial_width, 768)
	further_height = min(initial_height, 768)
	if initial_width > 768 or initial_height > 768:
	if initial_width < initial_height:
	further_width = min(768, initial_width)
	further_height = round(further_width * (initial_height / initial_width))
	else:
	further_height = min(768, initial_height)
	further_width = round(further_height * (initial_width / initial_height))

	return initial_width, initial_height, further_width, further_height

	def calculate_tiles(self, width: int, height: int) -> Tuple[int, int]:
	_, _, further_width, further_height = self.calculate_resize_dimensions(width, height)
	vertical_tiles = 1 + math.ceil((further_height - 512) / 512)
	horizontal_tiles = 1 + math.ceil((further_width - 512) / 512)
	return vertical_tiles, horizontal_tiles

	@staticmethod
	def num_tokens_from_image(width: int, height: int, low_resolution: bool=False) -> Dict[str, int]:
	calculator = GPT4VImageTokenCalculator(low_resolution=low_resolution)
	initial_width, initial_height, further_width, further_height = calculator.calculate_resize_dimensions(width, height)
	vertical_tiles, horizontal_tiles = calculator.calculate_tiles(width, height)
	total_tiles = vertical_tiles * horizontal_tiles
	total_tokens = calculator.base_token_cost if calculator.low_resolution else calculator.base_token_cost + total_tiles * calculator.additional_token_cost

	if calculator.low_resolution:
	return {
	"low_resolution": calculator.low_resolution,
	"initial_resize_width": None,
	"initial_resize_height": None,
	"further_resize_width": None,
	"further_resize_height": None,
	"vertical_tiles": None,
	"horizontal_tiles": None,
	"total_tiles": None,
	"total_tokens": total_tokens
	}
	else:
	return {
	"low_resolution": calculator.low_resolution,
	"initial_resize_width": initial_width,
	"initial_resize_height": initial_height,
	"further_resize_width": further_width,
	"further_resize_height": further_height,
	"vertical_tiles": vertical_tiles,
	"horizontal_tiles": horizontal_tiles,
	"total_tiles": total_tiles,
	"total_tokens": total_tokens
	}

	num_tokens_from_image = GPT4VImageTokenCalculator.num_tokens_from_image

	# Usage:

	# high resolution
	num_tokens = num_tokens_from_image(width=2048, height=768)
	print(f'high resolution:{num_tokens}')

	# low resolution
	num_tokens = num_tokens_from_image(2048, 768, low_resolution=True)
	print(f'low resolution:{num_tokens}')