Created
March 9, 2025 16:27
-
-
Save mikechambers/5ec8ce82b8c9d2e6945f22994f09ec9f to your computer and use it in GitHub Desktop.
OpenCV Bungie ID Parsing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import cv2 | |
import pytesseract | |
import re | |
# If Tesseract is not in your PATH, uncomment the following line and provide the installation path: | |
# pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe" | |
#can use image at: https://github.com/mikechambers/lookup/blob/main/images/screenshot.png | |
def main(): | |
# Hard-coded path to your image | |
IMAGE_PATH = r"screenshot.png" | |
# 1. Load the image using OpenCV | |
image = cv2.imread(IMAGE_PATH) | |
if image is None: | |
print(f"Error: Could not open {IMAGE_PATH}. Make sure the path is correct.") | |
return | |
# 2. Convert to grayscale (OCR often performs better on grayscale) | |
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) | |
# 3. Perform OCR using pytesseract | |
extracted_text = pytesseract.image_to_string(gray) | |
print("Extracted Text:") | |
print("----------------") | |
print(extracted_text) | |
# 4. Use regex to find Bungie IDs of the form NAME#CODE where CODE is 4 digits | |
pattern = r'[A-Za-z0-9 ]+#[0-9]{4}' | |
bungie_ids = re.findall(pattern, extracted_text) | |
# 5. Print any Bungie IDs found | |
if bungie_ids: | |
print("\nBungie IDs Found:") | |
for bid in bungie_ids: | |
print(bid) | |
else: | |
print("\nNo Bungie IDs found in the text.") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment