Last active
August 9, 2022 15:58
-
-
Save pashri/cf0f52e3bdafd5c7d145ee782c350208 to your computer and use it in GitHub Desktop.
Validate GSA unique entity identifier (UEI)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from functools import reduce | |
from itertools import starmap | |
import re | |
from typing import Iterable | |
def checksum(uei: str) -> bool: | |
"""Gets a checksum of a UEI""" | |
def reducer(chars: Iterable[int]) -> int: | |
return reduce( | |
lambda x, y: x+y, | |
starmap( | |
lambda index, char: (char * index) % 10, | |
enumerate(chars, 1), | |
), | |
0, | |
) | |
assert len(uei) == 12 | |
chars = map(ord, uei[:-1]) | |
sum_ = reducer(chars) | |
while sum_ > 9: | |
sum_ = reducer(tuple(int(d) for d in str(sum_))) | |
return sum_ == int(uei[-1]) | |
def uei_is_plausible(uei: str) -> bool: | |
"""Returns `True` if a UEI is a plausible UEI, else `False`. | |
After running this method, an API should be called | |
to check whether the UEI actually exists. | |
A GSA unique entity identifier (UEI) has the following characteristics: | |
- The Unique Entity ID is a 12-character, alphanumeric value. | |
- The letters “O” and “I” are not used to avoid confusion with zero and one. | |
- The first character is not zero to avoid cutting off digits that can occur | |
during data imports, for example, when importing data into spreadsheet | |
programs. | |
- Nine-digit sequences are not used in the identifier to avoid collision | |
with the nine-digit DUNS Number or Taxpayer Identification Number (TIN). | |
- The first five characters are structured to avoid collision with the | |
Commercial and Government Entity code formatting or CAGE code. | |
- The Unique Entity ID is not case sensitive. | |
- The final character is a checksum of the first 11 characters. | |
Checksums are used to detect errors within data. | |
Examples: | |
uei_is_plausible('VN1AJFAD19J9') # Valid | |
uei_is_plausible('99999999f995') # Valid | |
uei_is_plausible('ABCDEF12345') # Invalid: too few characters | |
uei_is_plausible('io10io10io19') # Invalid: contains 'I' and/or 'O' | |
uei_is_plausible('123456789FF4') # Invalid: too many consecutive digits | |
uei_is_plausible('1A2B3C4D5F6G') # Invalid: incorrect checksum | |
See also: GSA implementation at https://github.com/GSA-TTS/uei-js/ | |
""" | |
# The Unique Entity ID is not case sensitive | |
uei = str(uei).upper() | |
valid = True | |
# The Unique Entity ID is a 12-character, alphanumeric value | |
if len(uei) != 12 or not (uei.isalnum() and uei.isascii()): | |
valid = False | |
# The letters “O” and “I” are not included | |
elif ('O' in uei) or ('I' in uei): | |
valid = False | |
# The first character is not zero | |
elif uei[0] == '0': | |
valid = False | |
# Nine-digit sequences are not used | |
elif re.match(r'\d{9}', uei): | |
valid = False | |
# The final character is a checksum of the first 11 characters | |
elif not uei[-1].isdigit() or not checksum(uei): | |
valid = False | |
return valid |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment