Skip to content

Instantly share code, notes, and snippets.

@jussker
Created December 15, 2023 06:28
Show Gist options
  • Save jussker/8a98341047addcf59ccaa63cf9c2b700 to your computer and use it in GitHub Desktop.
Save jussker/8a98341047addcf59ccaa63cf9c2b700 to your computer and use it in GitHub Desktop.
ChatGPT4V: Counting tokens for image
import math
from typing import Dict, Tuple
class GPT4VImageTokenCalculator:
def __init__(self, low_resolution: bool = False) -> None:
self.low_resolution = low_resolution
self.base_token_cost = 85
self.additional_token_cost = 170
def calculate_resize_dimensions(self, width: int, height: int) -> Tuple[int, int, int, int]:
initial_width = min(width, 2048)
initial_height = min(height, 2048)
if width > 2048 or height > 2048:
if width > height:
initial_height = round(2048 * (height / width))
else:
initial_width = round(2048 * (width / height))
further_width = min(initial_width, 768)
further_height = min(initial_height, 768)
if initial_width > 768 or initial_height > 768:
if initial_width < initial_height:
further_width = min(768, initial_width)
further_height = round(further_width * (initial_height / initial_width))
else:
further_height = min(768, initial_height)
further_width = round(further_height * (initial_width / initial_height))
return initial_width, initial_height, further_width, further_height
def calculate_tiles(self, width: int, height: int) -> Tuple[int, int]:
_, _, further_width, further_height = self.calculate_resize_dimensions(width, height)
vertical_tiles = 1 + math.ceil((further_height - 512) / 512)
horizontal_tiles = 1 + math.ceil((further_width - 512) / 512)
return vertical_tiles, horizontal_tiles
@staticmethod
def num_tokens_from_image(width: int, height: int, low_resolution: bool=False) -> Dict[str, int]:
calculator = GPT4VImageTokenCalculator(low_resolution=low_resolution)
initial_width, initial_height, further_width, further_height = calculator.calculate_resize_dimensions(width, height)
vertical_tiles, horizontal_tiles = calculator.calculate_tiles(width, height)
total_tiles = vertical_tiles * horizontal_tiles
total_tokens = calculator.base_token_cost if calculator.low_resolution else calculator.base_token_cost + total_tiles * calculator.additional_token_cost
if calculator.low_resolution:
return {
"low_resolution": calculator.low_resolution,
"initial_resize_width": None,
"initial_resize_height": None,
"further_resize_width": None,
"further_resize_height": None,
"vertical_tiles": None,
"horizontal_tiles": None,
"total_tiles": None,
"total_tokens": total_tokens
}
else:
return {
"low_resolution": calculator.low_resolution,
"initial_resize_width": initial_width,
"initial_resize_height": initial_height,
"further_resize_width": further_width,
"further_resize_height": further_height,
"vertical_tiles": vertical_tiles,
"horizontal_tiles": horizontal_tiles,
"total_tiles": total_tiles,
"total_tokens": total_tokens
}
num_tokens_from_image = GPT4VImageTokenCalculator.num_tokens_from_image
# Usage:
# high resolution
num_tokens = num_tokens_from_image(width=2048, height=768)
print(f'high resolution:{num_tokens}')
# low resolution
num_tokens = num_tokens_from_image(2048, 768, low_resolution=True)
print(f'low resolution:{num_tokens}')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment