Last active
December 22, 2017 07:14
-
-
Save judell/a57edeeb8584d231f718879cb15c7f07 to your computer and use it in GitHub Desktop.
consistency check
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import base64, uuid, binascii, traceback, requests | |
ES_FLAKE_MAGIC_BYTE = ['e', '5'] | |
def _must_b64_decode(data, expected_size=None): | |
result = base64.urlsafe_b64decode(data) | |
if expected_size is not None and len(result) != expected_size: | |
raise TypeError('incorrect data size') | |
return result | |
def _get_hex_from_urlsafe(value): | |
bytestr = bytes(value) | |
def _fail(): | |
raise InvalidUUID('{0!r} is not a valid encoded UUID'.format(value)) | |
if len(bytestr) == 22: | |
# 22-char inputs represent 16 bytes of data, which when normally | |
# base64-encoded would have two bytes of padding on the end, so we add | |
# that back before decoding. | |
try: | |
data = _must_b64_decode(bytestr + b'==', expected_size=16) | |
except TypeError: | |
_fail() | |
return binascii.hexlify(data) | |
if len(bytestr) == 20: | |
# 20-char inputs represent 15 bytes of data, which requires no padding | |
# corrections. | |
try: | |
data = _must_b64_decode(bytestr, expected_size=15) | |
except TypeError: | |
_fail() | |
hexstring = binascii.hexlify(data) | |
# These are ElasticSearch flake IDs, so to convert them into UUIDs we | |
# insert the magic nibbles at the appropriate points. See the comments | |
# on ES_FLAKE_MAGIC_BYTE for details. | |
return (hexstring[0:12] + | |
ES_FLAKE_MAGIC_BYTE[0] + | |
hexstring[12:15] + | |
ES_FLAKE_MAGIC_BYTE[1] + | |
hexstring[15:30]) | |
# Fallthrough: we must have a received a string of invalid length | |
_fail() | |
def _get_urlsafe_from_hex(value): | |
# Validate and normalise hex string | |
hexstring = uuid.UUID(hex=value).hex | |
is_flake_id = (hexstring[12] == ES_FLAKE_MAGIC_BYTE[0] and | |
hexstring[16] == ES_FLAKE_MAGIC_BYTE[1]) | |
if is_flake_id: | |
# The hex representation of the flake ID is simply the UUID without the | |
# two magic nibbles. | |
data = binascii.unhexlify(hexstring[0:12] + | |
hexstring[13:16] + | |
hexstring[17:32]) | |
return base64.urlsafe_b64encode(data) | |
# Encode UUID bytes and strip two bytes of padding | |
data = binascii.unhexlify(hexstring) | |
return base64.urlsafe_b64encode(data)[:-2] | |
ids = [ | |
... | |
] | |
def consistency(): | |
s = '' | |
for internal_id in ids[0:100]: | |
public_id = _get_urlsafe_from_hex(internal_id) | |
r = requests.get('https://hypothes.is/a/%s' % public_id) | |
print r.status_code | |
s = s + 'internal_id %s, public_id %s, status %s\n' % (internal_id, public_id, r.status_code) | |
with open('consistency-check.txt', 'w') as f: | |
f.write(s) | |
consistency() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment