Skip to content

Instantly share code, notes, and snippets.

@obfusk
Last active October 29, 2024 21:11
Show Gist options
  • Save obfusk/b337e4f641991cc88a24d3f9567db80f to your computer and use it in GitHub Desktop.
Save obfusk/b337e4f641991cc88a24d3f9567db80f to your computer and use it in GitHub Desktop.

x509_canonical_name.py

Python implementation of the Java algorithm for an RFC 2253 conformant string representation of an X.500 distinguished name with additional canonicalisations (as used to compare distinguished names in X.509 certificates for equality in e.g. apksigner and Android).

See the Java documentation.

Caveats

NB: the Java documentation is incorrect with respect to whitespace handling.

  • "Leading and trailing white space characters" means "any character whose codepoint is less than or equal to U+0020 (the space character)" because String.trim() is used. Which means it strips all ASCII control characters (except ESC) and doesn't strip Unicode whitespace. NB: control characters not at the beginning or end are simply kept as-is.

  • The "unless the value consists entirely of white space characters" part does not seem to be implemented.

  • "Internal substrings of one or more white space characters" means U+0020 (the space character) only (as even though Character.isWhitespace() is used, the only whitespace character considered printable by DerValue.isPrintableStringChar() is U+0020).

#!/usr/bin/env python3
# encoding: utf-8
# SPDX-FileCopyrightText: 2024 FC (Fay) Stegerman <[email protected]>
# SPDX-License-Identifier: GPL-3.0-or-later
r"""
Python implementation of the Java algorithm for an RFC 2253 conformant string
representation of an X.500 distinguished name with additional canonicalisations
(as used to compare distinguished names in X.509 certificates for equality in
e.g. `apksigner` and Android).
See the Java documentation: https://docs.oracle.com/en/java/javase/21/docs/api/java.base/javax/security/auth/x500/X500Principal.html#getName%28java.lang.String%29
Caveats
=======
NB: the Java documentation is incorrect with respect to whitespace handling.
* "Leading and trailing white space characters" means "any character whose
codepoint is less than or equal to `U+0020` (the space character)" because
`String.trim()` is used. Which means it strips all ASCII control characters
(except ESC) and doesn't strip Unicode whitespace. NB: control characters not
at the beginning or end are simply kept as-is.
* The "unless the value consists entirely of white space characters" part does
not seem to be implemented.
* "Internal substrings of one or more white space characters" means `U+0020`
(the space character) only (as even though `Character.isWhitespace()` is used,
the only whitespace character considered printable by
`DerValue.isPrintableStringChar()` is `U+0020`).
"""
import binascii
import re
import unicodedata
from typing import List, Tuple, Union
from asn1crypto import x509 # type: ignore[import-untyped]
def x509_canonical_name(name: x509.Name, *, # type: ignore[no-any-unimported]
android: bool = False) -> str:
r"""
Canonical representation of x509.Name as str (with raw control characters
in places those are not stripped by normalisation).
>>> name = {"common_name": " Foo Bar", "organization_name": "My\x00Org\b"}
>>> x509_canonical_name(x509.Name.build(name))
'cn=foo bar,o=my\x00org'
"""
return ",".join("+".join(f"{t}={v}" for t, v in avas)
for avas in x509_comparison_name(name, android=android))
def x509_friendly_name(name: x509.Name, *, # type: ignore[no-any-unimported]
android: bool = False) -> str:
r"""
Friendly representation of x509.Name as str (with backslash escapes, no
normalisation, canonical ordering but not canonical representation).
>>> name = {"common_name": " Foo Bar", "organization_name": "My\x00Org\b"}
>>> x509_friendly_name(x509.Name.build(name))
'CN= Foo Bar, O=My\\x00Org\\x08'
"""
return ", ".join("+".join(f"{t.upper()}={repr(ev)[1:-1]}" for _, t, _, ev in avas)
for avas in x509_ordered_name(name, android=android))
def x509_comparison_name(name: x509.Name, *, # type: ignore[no-any-unimported]
android: bool = False) -> List[List[Tuple[str, str]]]:
r"""
Canonical representation of x509.Name as nested list.
Returns a list of RDNs which are a list of AVAs which are a (type, value)
tuple, where type is the standard name or dotted OID, and value is the
normalised string representation of the value.
"""
return [[(t, nv) for _, t, nv, _ in avas] for avas in x509_ordered_name(name, android=android)]
# FIXME: leading zeroes? anything missing?
def x509_ordered_name(name: x509.Name, *, # type: ignore[no-any-unimported]
android: bool = False) -> List[List[Tuple[int, str, str, str]]]:
r"""
Representation of x509.Name as nested list, in canonical ordering (but also
including non-canonical pre-normalised string values).
Returns a list of RDNs which are a list of AVAs which are a (oid, type,
normalised_value, esc_value) tuple, where oid is 0 for standard names and 1
for dotted OIDs, type is the standard name or dotted OID, normalised_value
is the normalised string representation of the value, and esc_value is the
string value before normalisation (but after escaping).
NB: control characters are not escaped, only characters in ",+<>;\"\\" and
"#" at the start (before "whitespace" trimming) are.
https://docs.oracle.com/en/java/javase/21/docs/api/java.base/javax/security/auth/x500/X500Principal.html#getName(java.lang.String)
https://github.com/openjdk/jdk/blob/jdk-21%2B35/src/java.base/share/classes/sun/security/x509/AVA.java#L805
https://github.com/openjdk/jdk/blob/jdk-21%2B35/src/java.base/share/classes/sun/security/x509/RDN.java#L472
https://android.googlesource.com/platform/libcore/+/refs/heads/android14-release/ojluni/src/main/java/sun/security/x509/RDN.java#481
'/OU=a/CN= Foo+CN=bar /OU=b/C=xx'
>>> data = b'0A1\n0\x08\x06\x03U\x04\x0b\x0c\x01a1\x1a0\x0b\x06\x03U\x04\x03\x0c\x04 Foo0\x0b\x06\x03U\x04\x03\x0c\x04bar 1\n0\x08\x06\x03U\x04\x0b\x0c\x01b1\x0b0\t\x06\x03U\x04\x06\x13\x02xx'
>>> name = x509.Name.load(data)
>>> x509_canonical_name(name)
'c=xx,ou=b,cn=bar+cn=foo,ou=a'
>>> x509_friendly_name(name)
'C=xx, OU=b, CN=bar +CN= Foo, OU=a'
>>> x509_ordered_name(name)
[[(0, 'c', 'xx', 'xx')], [(0, 'ou', 'b', 'b')], [(0, 'cn', 'bar', 'bar '), (0, 'cn', 'foo', ' Foo')], [(0, 'ou', 'a', 'a')]]
'/custom2=42+custom1=37+O=org/emailAddress=x@y/CN= x \t\t 猫x/CN=foo +CN=Ii +CN=İı +CN=\u1e9eß+CN=bar +CN=zz+CN= #,;\+\\/CN=#y'
>>> data = b'0\x81\xbc1"0\t\x06\x03\x01\x02\x03\x0c\x02370\t\x06\x03\x01\x0b\x03\x0c\x02420\n\x06\x03U\x04\n\x0c\x03org1\x120\x10\x06\t*\x86H\x86\xf7\r\x01\t\x01\x16\x03x@y1\x130\x11\x06\x03U\x04\x03\x0c\n x \t\t \xe7\x8c\xabx1`0\t\x06\x03U\x04\x03\x0c\x02zz0\x0c\x06\x03U\x04\x03\x0c\x05Ii 0\x0c\x06\x03U\x04\x03\x0c\x05bar 0\x0c\x06\x03U\x04\x03\x0c\x05foo 0\x0c\x06\x03U\x04\x03\x0c\x05\xc4\xb0\xc4\xb1 0\x0c\x06\x03U\x04\x03\x0c\x05\xe1\xba\x9e\xc3\x9f0\r\x06\x03U\x04\x03\x0c\x06 #,;+\\1\x0b0\t\x06\x03U\x04\x03\x0c\x02#y'
>>> name = x509.Name.load(data)
>>> x509_canonical_name(name)
'cn=\\#y,cn=#\\,\\;\\+\\\\+cn=bar+cn=foo+cn=ii+cn=i\u0307i+cn=zz+cn=ßss,cn=x \t\t 猫x,1.2.840.113549.1.9.1=#1603784079,o=org+0.1.11.3=#0c023432+0.1.2.3=#0c023337'
>>> x509_friendly_name(name)
'CN=\\\\#y, CN= #\\\\,\\\\;\\\\+\\\\\\\\+CN=bar +CN=foo +CN=Ii +CN=İı +CN=zz+CN=\u1e9eß, CN= x \\t\\t 猫x, 1.2.840.113549.1.9.1=#1603784079, O=org+0.1.11.3=#0c023432+0.1.2.3=#0c023337'
>>> x509_ordered_name(name)
[[(0, 'cn', '\\#y', '\\#y')], [(0, 'cn', '#\\,\\;\\+\\\\', ' #\\,\\;\\+\\\\'), (0, 'cn', 'bar', 'bar '), (0, 'cn', 'foo', 'foo '), (0, 'cn', 'ii', 'Ii '), (0, 'cn', 'i\u0307i', 'İı '), (0, 'cn', 'zz', 'zz'), (0, 'cn', 'ßss', '\u1e9eß')], [(0, 'cn', 'x \t\t 猫x', ' x \t\t 猫x')], [(1, '1.2.840.113549.1.9.1', '#1603784079', '#1603784079')], [(0, 'o', 'org', 'org'), (1, '0.1.11.3', '#0c023432', '#0c023432'), (1, '0.1.2.3', '#0c023337', '#0c023337')]]
>>> x509_ordered_name(name, android=True)
[[(0, 'cn', '\\#y', '\\#y')], [(0, 'cn', '#\\,\\;\\+\\\\', ' #\\,\\;\\+\\\\'), (0, 'cn', 'bar', 'bar '), (0, 'cn', 'foo', 'foo '), (0, 'cn', 'ii', 'Ii '), (0, 'cn', 'i\u0307i', 'İı '), (0, 'cn', 'zz', 'zz'), (0, 'cn', 'ßss', '\u1e9eß')], [(0, 'cn', 'x \t\t 猫x', ' x \t\t 猫x')], [(1, '1.2.840.113549.1.9.1', '#1603784079', '#1603784079')], [(0, 'o', 'org', 'org'), (1, '0.1.2.3', '#0c023337', '#0c023337'), (1, '0.1.11.3', '#0c023432', '#0c023432')]]
>>> control = "".join(chr(i) for i in range(32)) # no space
>>> f = lambda cn: x509_canonical_name(x509.Name.build({"common_name": cn}))
>>> f(f"{control}foo") == "cn=foo" == f(f"foo{control} ")
True
>>> f(f"\tfoo{control}bar ")
'cn=foo\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1fbar'
>>> f(f" \x00 \x7f\x00foo \x00bar    ")
'cn=\x7f\x00foo \x00bar '
"""
def key(ava: Tuple[int, str, str, str]) -> Tuple[int, Union[str, List[int]], str]:
o, t, nv, _ = ava
if android and o:
return o, [int(x) for x in t.split(".")], nv
return o, t, nv
DS, U8, PS = x509.DirectoryString, x509.UTF8String, x509.PrintableString
oids = {
"2.5.4.3": ("common_name", "cn"),
"2.5.4.6": ("country_name", "c"),
"2.5.4.7": ("locality_name", "l"),
"2.5.4.8": ("state_or_province_name", "st"),
"2.5.4.9": ("street_address", "street"),
"2.5.4.10": ("organization_name", "o"),
"2.5.4.11": ("organizational_unit_name", "ou"),
"0.9.2342.19200300.100.1.1": ("user_id", "uid"),
"0.9.2342.19200300.100.1.25": ("domain_component", "dc"),
}
esc = {ord(c): f"\\{c}" for c in ",+<>;\"\\"}
cws = "".join(chr(i) for i in range(32 + 1)) # control (but not esc) and whitespace
data = []
for rdn in reversed(name.chosen):
avas = []
for ava in rdn:
at, av = ava["type"], ava["value"]
if at.dotted in oids:
o, t = 0, oids[at.dotted][1] # order standard before OID
else:
o, t = 1, at.dotted
if o or not (isinstance(av, DS) and isinstance(av.chosen, (U8, PS))):
ev = nv = "#" + binascii.hexlify(av.dump()).decode()
else:
ev = (av.native or "").translate(esc)
if ev.startswith("#"):
ev = "\\" + ev
nv = unicodedata.normalize("NFKD", re.sub(r" +", " ", ev).strip(cws).upper().lower())
avas.append((o, t, nv, ev))
data.append(sorted(avas, key=key))
return data
if __name__ == "__main__":
import sys
for certfile in sys.argv[1:]:
with open(certfile, "rb") as fh:
cert = x509.Certificate.load(fh.read())
print(x509_canonical_name(cert["tbs_certificate"]["issuer"]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment