Skip to content

Instantly share code, notes, and snippets.

@notbanker
Created August 13, 2014 17:59
Show Gist options
  • Save notbanker/afb4301ac3007977c290 to your computer and use it in GitHub Desktop.
Save notbanker/afb4301ac3007977c290 to your computer and use it in GitHub Desktop.
ISDA Legal Entity Identifier (LEI) hash. Takes 20 digit LEI to a 10 digit Unique Trade Identifier (UTI) prefix
ISDA_PERMITTED_CHARS = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ'
ISDA_PRIME = 59575553
def leiHash( lei, permittedChars = ISDA_PERMITTED_CHARS ):
""" Shorten 20 character alpha-numeric Legal Entity Identifier into 10 character alpha-numeric "short LEI" that
can be used as a Unique Trade Identifier (UTI) prefix. The algorithm is simply the modulo operation lifted from
integers to the space of alpha-numeric case-insensitive strings.
Argument
--------
s str (length 20 typically)
Returns
-------
str (length 10)
"""
# This will break a combination of issuer and firm identifiers into two eight char strings, compress each down to five, and join
# LEI = LEI Issuer (4 char alphanumeric)" ++ "2 reserved characters always same" 00 ++ firm identifier (12 char alphanumeric) + 2 check digits.
if len(lei)==20:
issuer = lei[:4]
firm = lei[6:18]
leiWithoutRedundantChars = issuer + firm
else:
leiWithoutRedundantChars = lei
h = len( leiWithoutRedundantChars )/2
firstHalf = leiWithoutRedundantChars[:h]
secondHalf = leiWithoutRedundantChars[h:]
firstHalfHash = _primeHash( s = firstHalf, newLen = 5, prime = ISDA_PRIME, permittedChars = permittedChars )
secondHalfHash = _primeHash( s = secondHalf, newLen = 5, prime = ISDA_PRIME, permittedChars = permittedChars )
return firstHalfHash + secondHalfHash
def leiHashCollisionProbability( nLei, nPermittedChars = 36, firstPrime = ISDA_PRIME, secondPrime = ISDA_PRIME ):
""" Probability of at least one collision if (>=20 character) LEI's are drawn from uniform distribution with replacement nLei times """
nPossibilities = firstPrime*secondPrime
return 1- math.exp( -nLei*(nLei-1.)/(2.*nPossibilities) )
@func.memoize_plus
def _powersModP( nPermittedChars, p ):
""" Cached powers of nPermittedChars mod p """
return [ pow( nPermittedChars, k ) % p for k in xrange( 25 ) ]
def _intFromAlphaModP( s, permittedChars = ISDA_PERMITTED_CHARS, p = ISDA_PRIME ):
""" Convert alpha-numeric to integer modulo p """
# We merge the operations of converting to an int and taking mod p not because python can't handle large numbers, which
# it does perfectly well, but to make translation to Excel/VBA straightforward. {n^k mod p} are cached for the same reason,
# and are hardwired in the VBA version provided to ISDA.
i = 0
nPermittedChars = len( permittedChars )
for k, char in enumerate( reversed(s) ):
digit = permittedChars.find( char )
i += _powersModP( nPermittedChars, p )[k]*digit
i = i % p
return i
def _alphaFromInt( i, permittedChars = ISDA_PERMITTED_CHARS ):
""" Convert back again """
# (This is an injection from 0..PRIME into the space of alpha-numeric strings)
a = ''
nPermittedChars = len( permittedChars )
while i:
a = permittedChars[i % nPermittedChars] + a
i = i // nPermittedChars
if not a:
a = permittedChars[0]
return a
def _primeHash( s, permittedChars = ISDA_PERMITTED_CHARS, prime = ISDA_PRIME, newLen = 5 ):
""" Hash case-insensitive string to one of shorter length by applying modulus operation """
# (Doesn't really have to be a prime)
nPermittedChars = len( permittedChars )
if pow( nPermittedChars, newLen ) < prime:
raise ValueError("You may wish to choose a larger prime so the map from integers mod p to alpha-numeric strings is an injection")
iModP = _intFromAlphaModP( s, permittedChars = permittedChars, p = prime )
alf = _alphaFromInt( iModP, permittedChars )
return alf.ljust( 5, 'Z' )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment