Created
August 13, 2014 17:59
-
-
Save notbanker/afb4301ac3007977c290 to your computer and use it in GitHub Desktop.
ISDA Legal Entity Identifier (LEI) hash. Takes 20 digit LEI to a 10 digit Unique Trade Identifier (UTI) prefix
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ISDA_PERMITTED_CHARS = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ' | |
ISDA_PRIME = 59575553 | |
def leiHash( lei, permittedChars = ISDA_PERMITTED_CHARS ): | |
""" Shorten 20 character alpha-numeric Legal Entity Identifier into 10 character alpha-numeric "short LEI" that | |
can be used as a Unique Trade Identifier (UTI) prefix. The algorithm is simply the modulo operation lifted from | |
integers to the space of alpha-numeric case-insensitive strings. | |
Argument | |
-------- | |
s str (length 20 typically) | |
Returns | |
------- | |
str (length 10) | |
""" | |
# This will break a combination of issuer and firm identifiers into two eight char strings, compress each down to five, and join | |
# LEI = LEI Issuer (4 char alphanumeric)" ++ "2 reserved characters always same" 00 ++ firm identifier (12 char alphanumeric) + 2 check digits. | |
if len(lei)==20: | |
issuer = lei[:4] | |
firm = lei[6:18] | |
leiWithoutRedundantChars = issuer + firm | |
else: | |
leiWithoutRedundantChars = lei | |
h = len( leiWithoutRedundantChars )/2 | |
firstHalf = leiWithoutRedundantChars[:h] | |
secondHalf = leiWithoutRedundantChars[h:] | |
firstHalfHash = _primeHash( s = firstHalf, newLen = 5, prime = ISDA_PRIME, permittedChars = permittedChars ) | |
secondHalfHash = _primeHash( s = secondHalf, newLen = 5, prime = ISDA_PRIME, permittedChars = permittedChars ) | |
return firstHalfHash + secondHalfHash | |
def leiHashCollisionProbability( nLei, nPermittedChars = 36, firstPrime = ISDA_PRIME, secondPrime = ISDA_PRIME ): | |
""" Probability of at least one collision if (>=20 character) LEI's are drawn from uniform distribution with replacement nLei times """ | |
nPossibilities = firstPrime*secondPrime | |
return 1- math.exp( -nLei*(nLei-1.)/(2.*nPossibilities) ) | |
@func.memoize_plus | |
def _powersModP( nPermittedChars, p ): | |
""" Cached powers of nPermittedChars mod p """ | |
return [ pow( nPermittedChars, k ) % p for k in xrange( 25 ) ] | |
def _intFromAlphaModP( s, permittedChars = ISDA_PERMITTED_CHARS, p = ISDA_PRIME ): | |
""" Convert alpha-numeric to integer modulo p """ | |
# We merge the operations of converting to an int and taking mod p not because python can't handle large numbers, which | |
# it does perfectly well, but to make translation to Excel/VBA straightforward. {n^k mod p} are cached for the same reason, | |
# and are hardwired in the VBA version provided to ISDA. | |
i = 0 | |
nPermittedChars = len( permittedChars ) | |
for k, char in enumerate( reversed(s) ): | |
digit = permittedChars.find( char ) | |
i += _powersModP( nPermittedChars, p )[k]*digit | |
i = i % p | |
return i | |
def _alphaFromInt( i, permittedChars = ISDA_PERMITTED_CHARS ): | |
""" Convert back again """ | |
# (This is an injection from 0..PRIME into the space of alpha-numeric strings) | |
a = '' | |
nPermittedChars = len( permittedChars ) | |
while i: | |
a = permittedChars[i % nPermittedChars] + a | |
i = i // nPermittedChars | |
if not a: | |
a = permittedChars[0] | |
return a | |
def _primeHash( s, permittedChars = ISDA_PERMITTED_CHARS, prime = ISDA_PRIME, newLen = 5 ): | |
""" Hash case-insensitive string to one of shorter length by applying modulus operation """ | |
# (Doesn't really have to be a prime) | |
nPermittedChars = len( permittedChars ) | |
if pow( nPermittedChars, newLen ) < prime: | |
raise ValueError("You may wish to choose a larger prime so the map from integers mod p to alpha-numeric strings is an injection") | |
iModP = _intFromAlphaModP( s, permittedChars = permittedChars, p = prime ) | |
alf = _alphaFromInt( iModP, permittedChars ) | |
return alf.ljust( 5, 'Z' ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment