code v1:
import json
import struct
import base64
import matplotlib.pyplot as plt
from collections import Counter
def read_widd_file(file_path):
"""
Reads a .widd file and parses its raw content.
Args:
file_path (str): Path to the .widd file.
Returns:
dict: Parsed metadata from the JSON part.
bytes: Raw binary data decoded from the base64-encoded "ink" field.
"""
with open(file_path, 'rb') as file:
raw_bytes = file.read()
# Extract JSON metadata
end_of_json = raw_bytes.find(b'}') + 1
metadata = json.loads(raw_bytes[:end_of_json].decode('utf-8'))
# Decode base64 ink data
ink_data_encoded = metadata["ink"][0] if "ink" in metadata and metadata["ink"] else None
ink_data_binary = base64.b64decode(ink_data_encoded) if ink_data_encoded else None
return metadata, ink_data_binary
def parse_stroke_segments(ink_data_binary):
"""
Parses the binary ink data into stroke segments based on recurring markers.
Args:
ink_data_binary (bytes): Binary data representing strokes.
Returns:
list: A list of parsed stroke segments. Each segment is a dict with markers and attributes.
"""
# Unpack all integers safely from the binary data
raw_integers = [
struct.unpack('i', ink_data_binary[i:i + 4])[0]
for i in range(0, len(ink_data_binary) - len(ink_data_binary) % 4, 4)
]
# Locate marker positions (e.g., 352321536 as a potential segment start)
marker_value = 352321536
segment_starts = [i for i, val in enumerate(raw_integers) if val == marker_value]
# Extract segments based on markers
segments = [
raw_integers[start:start + 10] for start in segment_starts # Extract 10 integers per segment (adjust as needed)
]
# Analyze segments into markers and attributes
analyzed_segments = [
{"markers": segment[:2], "attributes": segment[2:]}
for segment in segments
]
return analyzed_segments
def extract_and_normalize_coordinates(segments, bounding_box=(864, 592)):
"""
Extracts and normalizes coordinate pairs from stroke segments.
Args:
segments (list): Parsed stroke segments.
bounding_box (tuple): Canvas dimensions for normalization.
Returns:
list: Normalized coordinates for each stroke segment.
"""
def extract_coordinates_from_attributes(attributes):
return [
(attributes[i], attributes[i + 1])
for i in range(0, len(attributes) - 1, 2)
]
def scale_and_normalize_coordinates(coordinates):
scaled_coordinates = []
for x, y in coordinates:
normalized_x = abs(x % bounding_box[0])
normalized_y = abs(y % bounding_box[1])
scaled_coordinates.append((normalized_x, normalized_y))
return scaled_coordinates
coordinates_by_segment = [
extract_coordinates_from_attributes(segment["attributes"]) for segment in segments
]
normalized_coordinates_by_segment = [
scale_and_normalize_coordinates(coords) for coords in coordinates_by_segment
]
return normalized_coordinates_by_segment
def decode_additional_attributes(segments):
"""
Decodes additional attributes from stroke segments to explore pressure, brush width, or other settings.
Args:
segments (list): Parsed stroke segments with markers and attributes.
Returns:
list: Enhanced stroke data including decoded attributes (e.g., pressure, timestamps).
"""
decoded_segments = []
for segment in segments:
markers = segment["markers"]
attributes = segment["attributes"]
# Example hypothesis: First few attributes might encode pressure or brush width
# Decode as floats to see if they make sense as normalized values
potential_settings = []
for attr in attributes:
try:
potential_settings.append(struct.unpack('f', struct.pack('I', attr & 0xFFFFFFFF))[0]) # Convert to float
except struct.error:
potential_settings.append(None) # If decoding fails, skip the attribute
decoded_segments.append({
"markers": markers,
"attributes": attributes,
"decoded_settings": potential_settings
})
return decoded_segments
def visualize_strokes(coordinates_by_segment, bounding_box=(864, 592)):
"""
Visualizes strokes based on extracted and normalized coordinates.
Args:
coordinates_by_segment (list): List of normalized coordinates for each stroke segment.
bounding_box (tuple): Canvas dimensions for plotting.
"""
plt.figure(figsize=(8, 6))
for i, stroke in enumerate(coordinates_by_segment, start=1):
x_coords, y_coords = zip(*stroke)
plt.plot(x_coords, y_coords, marker='o', label=f"Stroke {i}")
plt.gca().invert_yaxis() # Invert Y-axis to match typical canvas orientation
plt.xlim(0, bounding_box[0])
plt.ylim(0, bounding_box[1])
plt.title("Reconstructed Strokes")
plt.xlabel("X Coordinate")
plt.ylabel("Y Coordinate")
plt.legend()
plt.grid(True)
plt.show()
# Main workflow
file_path = 'Inkspace_cc55ef85e931.widd'
metadata, ink_data_binary = read_widd_file(file_path)
segments = parse_stroke_segments(ink_data_binary)
decoded_segments = decode_additional_attributes(segments)
normalized_coordinates_by_segment = extract_and_normalize_coordinates(segments)
visualize_strokes(normalized_coordinates_by_segment)
decode data:
(metadata, ink_data_binary)
({'locale': 'en_US',
'transform': 'matrix(0 -1 1 0 0 592)',
'boundingBox': 'rect(0 0 864 592)',
'title': '',
'created': 1737338346000,
'modified': 1737338346000,
'ink': ['kgENAAAAABUAAIA/GAIiWP7VAdzoAwAABwcEBAMIAAYABAAIAAgEEAQaDDIMSgxkDIgBGp4BEKoBErABFKoBBKYBDKIBDqIBDKIBEJ4BDJ4BBJoBBJgBBIIBAnQIXAg+CCYIBBYhAAAqJJwBAAIAAgAAAgABAAAAAAACAgAAAAAAAAABAAEBAQMDBwkHADIC/gNIArwBDQAAAAAVAACAPxgCInbO6QHo8QMAAAsPEAcAAQMAAAILBAMEBAQDBAMECwgJBA8IGww1ClMMhQEQtQEW4wEYiQIepwIcxQIi0QIg2wIi4QIm6QIg7wIm7QIm4QIs3wIqvwImoQImiQIc2wEWqwEUgwEUTwozCBUIAwAIBBYANgBmAAAAKjCSAQAEAgACAgACAgAAAAAAAgACAAAAAgAAAAAAAAAAAAAAAAAAAAAAAQEBAwkNCQAyAv4DSAI=']},
b'\x92\x01\r\x00\x00\x00\x00\x15\x00\x00\x80?\x18\x02"X\xfe\xd5\x01\xdc\xe8\x03\x00\x00\x07\x07\x04\x04\x03\x08\x00\x06\x00\x04\x00\x08\x00\x08\x04\x10\x04\x1a\x0c2\x0cJ\x0cd\x0c\x88\x01\x1a\x9e\x01\x10\xaa\x01\x12\xb0\x01\x14\xaa\x01\x04\xa6\x01\x0c\xa2\x01\x0e\xa2\x01\x0c\xa2\x01\x10\x9e\x01\x0c\x9e\x01\x04\x9a\x01\x04\x98\x01\x04\x82\x01\x02t\x08\\\x08>\x08&\x08\x04\x16!\x00\x00*$\x9c\x01\x00\x02\x00\x02\x00\x00\x02\x00\x01\x00\x00\x00\x00\x00\x02\x02\x00\x00\x00\x00\x00\x00\x00\x01\x00\x01\x01\x01\x03\x03\x07\t\x07\x002\x02\xfe\x03H\x02\xbc\x01\r\x00\x00\x00\x00\x15\x00\x00\x80?\x18\x02"v\xce\xe9\x01\xe8\xf1\x03\x00\x00\x0b\x0f\x10\x07\x00\x01\x03\x00\x00\x02\x0b\x04\x03\x04\x04\x04\x03\x04\x03\x04\x0b\x08\t\x04\x0f\x08\x1b\x0c5\nS\x0c\x85\x01\x10\xb5\x01\x16\xe3\x01\x18\x89\x02\x1e\xa7\x02\x1c\xc5\x02"\xd1\x02 \xdb\x02"\xe1\x02&\xe9\x02 \xef\x02&\xed\x02&\xe1\x02,\xdf\x02*\xbf\x02&\xa1\x02&\x89\x02\x1c\xdb\x01\x16\xab\x01\x14\x83\x01\x14O\n3\x08\x15\x08\x03\x00\x08\x04\x16\x006\x00f\x00\x00\x00*0\x92\x01\x00\x04\x02\x00\x02\x02\x00\x02\x02\x00\x00\x00\x00\x00\x02\x00\x02\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x01\x01\x03\t\r\t\x002\x02\xfe\x03H\x02')
visualize: (not quite accurate; should be a small cross on the canvas)