Skip to content

Instantly share code, notes, and snippets.

@mikechambers
Created March 9, 2025 16:27
Show Gist options
  • Save mikechambers/5ec8ce82b8c9d2e6945f22994f09ec9f to your computer and use it in GitHub Desktop.
Save mikechambers/5ec8ce82b8c9d2e6945f22994f09ec9f to your computer and use it in GitHub Desktop.
OpenCV Bungie ID Parsing
import cv2
import pytesseract
import re
# If Tesseract is not in your PATH, uncomment the following line and provide the installation path:
# pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
#can use image at: https://github.com/mikechambers/lookup/blob/main/images/screenshot.png
def main():
# Hard-coded path to your image
IMAGE_PATH = r"screenshot.png"
# 1. Load the image using OpenCV
image = cv2.imread(IMAGE_PATH)
if image is None:
print(f"Error: Could not open {IMAGE_PATH}. Make sure the path is correct.")
return
# 2. Convert to grayscale (OCR often performs better on grayscale)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# 3. Perform OCR using pytesseract
extracted_text = pytesseract.image_to_string(gray)
print("Extracted Text:")
print("----------------")
print(extracted_text)
# 4. Use regex to find Bungie IDs of the form NAME#CODE where CODE is 4 digits
pattern = r'[A-Za-z0-9 ]+#[0-9]{4}'
bungie_ids = re.findall(pattern, extracted_text)
# 5. Print any Bungie IDs found
if bungie_ids:
print("\nBungie IDs Found:")
for bid in bungie_ids:
print(bid)
else:
print("\nNo Bungie IDs found in the text.")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment