Toy code to read wacom-exported widd files with python, for intuos pro fine-tip pens

code v1:

import json
import struct
import base64
import matplotlib.pyplot as plt
from collections import Counter

def read_widd_file(file_path):
    """
    Reads a .widd file and parses its raw content.
    
    Args:
        file_path (str): Path to the .widd file.

    Returns:
        dict: Parsed metadata from the JSON part.
        bytes: Raw binary data decoded from the base64-encoded "ink" field.
    """
    with open(file_path, 'rb') as file:
        raw_bytes = file.read()

    # Extract JSON metadata
    end_of_json = raw_bytes.find(b'}') + 1
    metadata = json.loads(raw_bytes[:end_of_json].decode('utf-8'))

    # Decode base64 ink data
    ink_data_encoded = metadata["ink"][0] if "ink" in metadata and metadata["ink"] else None
    ink_data_binary = base64.b64decode(ink_data_encoded) if ink_data_encoded else None

    return metadata, ink_data_binary

def parse_stroke_segments(ink_data_binary):
    """
    Parses the binary ink data into stroke segments based on recurring markers.

    Args:
        ink_data_binary (bytes): Binary data representing strokes.

    Returns:
        list: A list of parsed stroke segments. Each segment is a dict with markers and attributes.
    """
    # Unpack all integers safely from the binary data
    raw_integers = [
        struct.unpack('i', ink_data_binary[i:i + 4])[0]
        for i in range(0, len(ink_data_binary) - len(ink_data_binary) % 4, 4)
    ]

    # Locate marker positions (e.g., 352321536 as a potential segment start)
    marker_value = 352321536
    segment_starts = [i for i, val in enumerate(raw_integers) if val == marker_value]

    # Extract segments based on markers
    segments = [
        raw_integers[start:start + 10] for start in segment_starts  # Extract 10 integers per segment (adjust as needed)
    ]

    # Analyze segments into markers and attributes
    analyzed_segments = [
        {"markers": segment[:2], "attributes": segment[2:]}
        for segment in segments
    ]

    return analyzed_segments

def extract_and_normalize_coordinates(segments, bounding_box=(864, 592)):
    """
    Extracts and normalizes coordinate pairs from stroke segments.

    Args:
        segments (list): Parsed stroke segments.
        bounding_box (tuple): Canvas dimensions for normalization.

    Returns:
        list: Normalized coordinates for each stroke segment.
    """
    def extract_coordinates_from_attributes(attributes):
        return [
            (attributes[i], attributes[i + 1])
            for i in range(0, len(attributes) - 1, 2)
        ]

    def scale_and_normalize_coordinates(coordinates):
        scaled_coordinates = []
        for x, y in coordinates:
            normalized_x = abs(x % bounding_box[0])
            normalized_y = abs(y % bounding_box[1])
            scaled_coordinates.append((normalized_x, normalized_y))
        return scaled_coordinates

    coordinates_by_segment = [
        extract_coordinates_from_attributes(segment["attributes"]) for segment in segments
    ]
    normalized_coordinates_by_segment = [
        scale_and_normalize_coordinates(coords) for coords in coordinates_by_segment
    ]

    return normalized_coordinates_by_segment

def decode_additional_attributes(segments):
    """
    Decodes additional attributes from stroke segments to explore pressure, brush width, or other settings.

    Args:
        segments (list): Parsed stroke segments with markers and attributes.

    Returns:
        list: Enhanced stroke data including decoded attributes (e.g., pressure, timestamps).
    """
    decoded_segments = []

    for segment in segments:
        markers = segment["markers"]
        attributes = segment["attributes"]

        # Example hypothesis: First few attributes might encode pressure or brush width
        # Decode as floats to see if they make sense as normalized values
        potential_settings = []
        for attr in attributes:
            try:
                potential_settings.append(struct.unpack('f', struct.pack('I', attr & 0xFFFFFFFF))[0])  # Convert to float
            except struct.error:
                potential_settings.append(None)  # If decoding fails, skip the attribute

        decoded_segments.append({
            "markers": markers,
            "attributes": attributes,
            "decoded_settings": potential_settings
        })

    return decoded_segments

def visualize_strokes(coordinates_by_segment, bounding_box=(864, 592)):
    """
    Visualizes strokes based on extracted and normalized coordinates.

    Args:
        coordinates_by_segment (list): List of normalized coordinates for each stroke segment.
        bounding_box (tuple): Canvas dimensions for plotting.
    """
    plt.figure(figsize=(8, 6))
    for i, stroke in enumerate(coordinates_by_segment, start=1):
        x_coords, y_coords = zip(*stroke)
        plt.plot(x_coords, y_coords, marker='o', label=f"Stroke {i}")

    plt.gca().invert_yaxis()  # Invert Y-axis to match typical canvas orientation
    plt.xlim(0, bounding_box[0])
    plt.ylim(0, bounding_box[1])
    plt.title("Reconstructed Strokes")
    plt.xlabel("X Coordinate")
    plt.ylabel("Y Coordinate")
    plt.legend()
    plt.grid(True)
    plt.show()

# Main workflow
file_path = 'Inkspace_cc55ef85e931.widd'
metadata, ink_data_binary = read_widd_file(file_path)
segments = parse_stroke_segments(ink_data_binary)
decoded_segments = decode_additional_attributes(segments)
normalized_coordinates_by_segment = extract_and_normalize_coordinates(segments)
visualize_strokes(normalized_coordinates_by_segment)

decode data:

(metadata, ink_data_binary)

({'locale': 'en_US',
  'transform': 'matrix(0 -1 1 0 0 592)',
  'boundingBox': 'rect(0 0 864 592)',
  'title': '',
  'created': 1737338346000,
  'modified': 1737338346000,
  'ink': ['kgENAAAAABUAAIA/GAIiWP7VAdzoAwAABwcEBAMIAAYABAAIAAgEEAQaDDIMSgxkDIgBGp4BEKoBErABFKoBBKYBDKIBDqIBDKIBEJ4BDJ4BBJoBBJgBBIIBAnQIXAg+CCYIBBYhAAAqJJwBAAIAAgAAAgABAAAAAAACAgAAAAAAAAABAAEBAQMDBwkHADIC/gNIArwBDQAAAAAVAACAPxgCInbO6QHo8QMAAAsPEAcAAQMAAAILBAMEBAQDBAMECwgJBA8IGww1ClMMhQEQtQEW4wEYiQIepwIcxQIi0QIg2wIi4QIm6QIg7wIm7QIm4QIs3wIqvwImoQImiQIc2wEWqwEUgwEUTwozCBUIAwAIBBYANgBmAAAAKjCSAQAEAgACAgACAgAAAAAAAgACAAAAAgAAAAAAAAAAAAAAAAAAAAAAAQEBAwkNCQAyAv4DSAI=']},
 b'\x92\x01\r\x00\x00\x00\x00\x15\x00\x00\x80?\x18\x02"X\xfe\xd5\x01\xdc\xe8\x03\x00\x00\x07\x07\x04\x04\x03\x08\x00\x06\x00\x04\x00\x08\x00\x08\x04\x10\x04\x1a\x0c2\x0cJ\x0cd\x0c\x88\x01\x1a\x9e\x01\x10\xaa\x01\x12\xb0\x01\x14\xaa\x01\x04\xa6\x01\x0c\xa2\x01\x0e\xa2\x01\x0c\xa2\x01\x10\x9e\x01\x0c\x9e\x01\x04\x9a\x01\x04\x98\x01\x04\x82\x01\x02t\x08\\\x08>\x08&\x08\x04\x16!\x00\x00*$\x9c\x01\x00\x02\x00\x02\x00\x00\x02\x00\x01\x00\x00\x00\x00\x00\x02\x02\x00\x00\x00\x00\x00\x00\x00\x01\x00\x01\x01\x01\x03\x03\x07\t\x07\x002\x02\xfe\x03H\x02\xbc\x01\r\x00\x00\x00\x00\x15\x00\x00\x80?\x18\x02"v\xce\xe9\x01\xe8\xf1\x03\x00\x00\x0b\x0f\x10\x07\x00\x01\x03\x00\x00\x02\x0b\x04\x03\x04\x04\x04\x03\x04\x03\x04\x0b\x08\t\x04\x0f\x08\x1b\x0c5\nS\x0c\x85\x01\x10\xb5\x01\x16\xe3\x01\x18\x89\x02\x1e\xa7\x02\x1c\xc5\x02"\xd1\x02 \xdb\x02"\xe1\x02&\xe9\x02 \xef\x02&\xed\x02&\xe1\x02,\xdf\x02*\xbf\x02&\xa1\x02&\x89\x02\x1c\xdb\x01\x16\xab\x01\x14\x83\x01\x14O\n3\x08\x15\x08\x03\x00\x08\x04\x16\x006\x00f\x00\x00\x00*0\x92\x01\x00\x04\x02\x00\x02\x02\x00\x02\x02\x00\x00\x00\x00\x00\x02\x00\x02\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x01\x01\x03\t\r\t\x002\x02\xfe\x03H\x02')

visualize: (not quite accurate; should be a small cross on the canvas)

trojblue/wacom_widd_decode.md