Created
December 15, 2023 06:28
-
-
Save jussker/8a98341047addcf59ccaa63cf9c2b700 to your computer and use it in GitHub Desktop.
ChatGPT4V: Counting tokens for image
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math | |
from typing import Dict, Tuple | |
class GPT4VImageTokenCalculator: | |
def __init__(self, low_resolution: bool = False) -> None: | |
self.low_resolution = low_resolution | |
self.base_token_cost = 85 | |
self.additional_token_cost = 170 | |
def calculate_resize_dimensions(self, width: int, height: int) -> Tuple[int, int, int, int]: | |
initial_width = min(width, 2048) | |
initial_height = min(height, 2048) | |
if width > 2048 or height > 2048: | |
if width > height: | |
initial_height = round(2048 * (height / width)) | |
else: | |
initial_width = round(2048 * (width / height)) | |
further_width = min(initial_width, 768) | |
further_height = min(initial_height, 768) | |
if initial_width > 768 or initial_height > 768: | |
if initial_width < initial_height: | |
further_width = min(768, initial_width) | |
further_height = round(further_width * (initial_height / initial_width)) | |
else: | |
further_height = min(768, initial_height) | |
further_width = round(further_height * (initial_width / initial_height)) | |
return initial_width, initial_height, further_width, further_height | |
def calculate_tiles(self, width: int, height: int) -> Tuple[int, int]: | |
_, _, further_width, further_height = self.calculate_resize_dimensions(width, height) | |
vertical_tiles = 1 + math.ceil((further_height - 512) / 512) | |
horizontal_tiles = 1 + math.ceil((further_width - 512) / 512) | |
return vertical_tiles, horizontal_tiles | |
@staticmethod | |
def num_tokens_from_image(width: int, height: int, low_resolution: bool=False) -> Dict[str, int]: | |
calculator = GPT4VImageTokenCalculator(low_resolution=low_resolution) | |
initial_width, initial_height, further_width, further_height = calculator.calculate_resize_dimensions(width, height) | |
vertical_tiles, horizontal_tiles = calculator.calculate_tiles(width, height) | |
total_tiles = vertical_tiles * horizontal_tiles | |
total_tokens = calculator.base_token_cost if calculator.low_resolution else calculator.base_token_cost + total_tiles * calculator.additional_token_cost | |
if calculator.low_resolution: | |
return { | |
"low_resolution": calculator.low_resolution, | |
"initial_resize_width": None, | |
"initial_resize_height": None, | |
"further_resize_width": None, | |
"further_resize_height": None, | |
"vertical_tiles": None, | |
"horizontal_tiles": None, | |
"total_tiles": None, | |
"total_tokens": total_tokens | |
} | |
else: | |
return { | |
"low_resolution": calculator.low_resolution, | |
"initial_resize_width": initial_width, | |
"initial_resize_height": initial_height, | |
"further_resize_width": further_width, | |
"further_resize_height": further_height, | |
"vertical_tiles": vertical_tiles, | |
"horizontal_tiles": horizontal_tiles, | |
"total_tiles": total_tiles, | |
"total_tokens": total_tokens | |
} | |
num_tokens_from_image = GPT4VImageTokenCalculator.num_tokens_from_image | |
# Usage: | |
# high resolution | |
num_tokens = num_tokens_from_image(width=2048, height=768) | |
print(f'high resolution:{num_tokens}') | |
# low resolution | |
num_tokens = num_tokens_from_image(2048, 768, low_resolution=True) | |
print(f'low resolution:{num_tokens}') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment