Created
April 23, 2025 23:54
-
-
Save johngrimes/e54b4e62839f0a35034c444d1263c67d to your computer and use it in GitHub Desktop.
Simple ECL formatter
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
ECL Formatter - Formats SNOMED CT Expression Constraint Language queries | |
with consistent indentation and spacing. | |
Usage: cat ecl.txt | ./ecl_formatter.py > formatted_ecl.txt | |
""" | |
import re | |
import sys | |
def format_ecl(ecl): | |
"""Format ECL with consistent indentation and spacing.""" | |
# Remove extra whitespace | |
ecl = re.sub(r'\s+', ' ', ecl).strip() | |
# Track nesting level | |
level = 0 | |
result = [] | |
i = 0 | |
in_concept_reference = False | |
pipe_count = 0 | |
while i < len(ecl): | |
char = ecl[i] | |
# Handle pipes in concept references | |
if char == '|': | |
pipe_count += 1 | |
in_concept_reference = pipe_count % 2 == 1 | |
result.append(char) | |
i += 1 | |
continue | |
# Skip processing special characters when inside concept reference | |
if in_concept_reference: | |
result.append(char) | |
i += 1 | |
continue | |
if char == '(': | |
result.append(char) | |
level += 1 | |
result.append('\n' + ' ' * level) | |
i += 1 | |
elif char == ')': | |
level -= 1 | |
result.append('\n' + ' ' * level) | |
result.append(char) | |
i += 1 | |
elif char == '{': | |
result.append(char) | |
level += 1 | |
result.append('\n' + ' ' * level) | |
i += 1 | |
elif char == '}': | |
level -= 1 | |
result.append('\n' + ' ' * level) | |
result.append(char) | |
i += 1 | |
elif i + 2 <= len(ecl) and ecl[i:i+2] == 'OR': | |
result.append('\n' + ' ' * level + 'OR') | |
i += 2 | |
elif i + 3 <= len(ecl) and ecl[i:i+3] == 'AND': | |
result.append('\n' + ' ' * level + 'AND') | |
i += 3 | |
elif i + 5 <= len(ecl) and ecl[i:i+5] == 'MINUS': | |
# If level is 0, we want to add extra newlines for separation | |
if level == 0: | |
result.append('\n) MINUS (\n' + ' ' * (level+1)) | |
i += 5 | |
else: | |
result.append('\n' + ' ' * level + 'MINUS') | |
i += 5 | |
else: | |
result.append(char) | |
i += 1 | |
formatted_ecl = ''.join(result) | |
# Fix spacing around operators while preserving << and >> operators | |
formatted_ecl = re.sub(r'(?<!<)\s*([:<>=])\s*(?!>)', r' \1 ', formatted_ecl) | |
# Make sure << and >> operators are preserved (not split into < < or > >) | |
formatted_ecl = re.sub(r'< <', r'<<', formatted_ecl) | |
formatted_ecl = re.sub(r'> >', r'>>', formatted_ecl) | |
# Special handling for top-level MINUS operation | |
formatted_ecl = re.sub(r'\(\n\s+\)\s+MINUS\s+\(\n\s+', r'(\n ', formatted_ecl) | |
# Make sure the top level has proper wrapping | |
if not formatted_ecl.startswith('('): | |
formatted_ecl = '(\n ' + formatted_ecl | |
if not formatted_ecl.endswith(')'): | |
formatted_ecl = formatted_ecl + '\n)' | |
return formatted_ecl | |
if __name__ == "__main__": | |
# Read from stdin | |
ecl = sys.stdin.read() | |
# Format the ECL | |
formatted_ecl = format_ecl(ecl) | |
# Write to stdout | |
sys.stdout.write(formatted_ecl) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment