Skip to content

Instantly share code, notes, and snippets.

@Deltachaos
Created April 24, 2025 12:03
Show Gist options
  • Save Deltachaos/00d9784379391c73f6aad2aca1a03407 to your computer and use it in GitHub Desktop.
Save Deltachaos/00d9784379391c73f6aad2aca1a03407 to your computer and use it in GitHub Desktop.
import time
import xml.etree.ElementTree as ET
# Create test data
ITERATIONS = 10000
TEXT_WITH_SPECIAL_CHARS = "This is a test with <, >, &, and other symbols. " * 1000
TEXT_ESCAPED = TEXT_WITH_SPECIAL_CHARS.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
PLAIN_TEXT = "This is a test with only plain text and no special characters. " * 1000
def generate_xml_with_cdata(text):
return f"<root><![CDATA[{text}]]></root>"
def generate_xml_with_escaped(text):
return f"<root>{text}</root>"
def parse_xml(xml_string):
return ET.fromstring(xml_string).text
if __name__ == "__main__":
# Generate XML strings
xml_cdata = generate_xml_with_cdata(TEXT_WITH_SPECIAL_CHARS)
xml_escaped = generate_xml_with_escaped(TEXT_ESCAPED)
# Benchmark CDATA with special characters
start_cdata = time.perf_counter()
for _ in range(ITERATIONS):
parse_xml(xml_cdata)
end_cdata = time.perf_counter()
# Benchmark escaped text with special characters
start_escaped = time.perf_counter()
for _ in range(ITERATIONS):
parse_xml(xml_escaped)
end_escaped = time.perf_counter()
# Generate XML strings with plain text
xml_cdata_plain = generate_xml_with_cdata(PLAIN_TEXT)
xml_plain = generate_xml_with_escaped(PLAIN_TEXT)
# Benchmark CDATA with plain text
start_cdata_plain = time.perf_counter()
for _ in range(ITERATIONS):
parse_xml(xml_cdata_plain)
end_cdata_plain = time.perf_counter()
# Benchmark plain text (no escaping)
start_plain = time.perf_counter()
for _ in range(ITERATIONS):
parse_xml(xml_plain)
end_plain = time.perf_counter()
# Print results
print(f"Parsing performance ({ITERATIONS} iterations):\n")
print(f"CDATA (with special chars): {end_cdata - start_cdata:.4f} seconds")
print(f"Escaped text: {end_escaped - start_escaped:.4f} seconds")
print(f"CDATA (plain text): {end_cdata_plain - start_cdata_plain:.4f} seconds")
print(f"Plain text (no escaping): {end_plain - start_plain:.4f} seconds")
@Deltachaos
Copy link
Author

Parsing performance (10000 iterations):

CDATA (with special chars):     1.4372 seconds
Escaped text:                   2.2830 seconds
CDATA (plain text):             1.3776 seconds
Plain text (no escaping):       0.4864 seconds

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment