roeniss · July 1, 2022 17:31 · roeniss · Jul 1, 2022
diff --git a/visualize_tolower.py b/visualize_tolower.py
 # This is based on the post "2022-06-27 – tolower() in bulk at speed" by Tony Finch 
 # link: https://dotat.at/@/2022-06-27-tolower-swar.html

 ## For me, it was quite hard to understand the post without some visual helps. So I made it.
 ## You can run this with python3 in terminal and I hope this would help you, too.

 OCTETS = "ABSOLUTE"

 def show_ascii(s):
    bins = bin(s)[2:].zfill(64)
    for i in range(0, 64, 8):
        print(chr(int(bins[i:i+8], 2)), end="")
    print()

 OKGREEN = "\033[92m"
 ENDC = "\033[0m"
 def color(s):
    S = ""
    for ss in s:
        if (ss == "1"):
            S += OKGREEN + ss + ENDC
        else:
            S += ss
    return S

 def show(s, memo=""):
    S = bin(s)[2:].zfill(64)
    S = str(S)

    for i in range(0, 8):
        print(color(S[i*8:i*8+8]), end=" ")
    print(color("   # " + memo))

 def to_hex(s):
    return int(s.encode("utf-8").hex(), 16)    
    
 all_bytes = 0x0101010101010101
 octets = to_hex(OCTETS)

 # show(all_bytes, "all_bytes")
 show(octets, "octets")

 steal_all_0x80 = 0x7F * all_bytes
 show(steal_all_0x80, "0x7F * all_bytes")

 heptets = octets & steal_all_0x80
 show(heptets, "heptets = octets & (0x7F * all_bytes)")

 print("-"*60)

 # show(0x7F, "0x7F")
 # show(to_hex('A'), "A")
 # show(to_hex('Z'), "Z")

 # print("-"*60)

 is_gt_Z = heptets + (0x7F - to_hex('Z')) * all_bytes
 show(is_gt_Z, "is_gt_Z = heptets + (0x7F - 'Z') * all_bytes -- (each first bit is true if heptet - 'Z' > 0)")

 is_ge_A = heptets + (0x80 - to_hex('A')) * all_bytes
 show(is_ge_A, "is_ge_A = heptets + (0x80 - 'A') * all_bytes -- (each first bit is true if heptet - 'A' >= 0)")
 print(" -- above two example works only when the original char is btw 0 ~ 127 (ascii)")

 print("-"*60)

 is_ascii = (0xFF * all_bytes) ^ octets # ~octets make something not expected, so use other way
 show(octets, "octets")
 show(is_ascii, "is_ascii = ~octets -- (each first bit is true if it's ascii char)")

 print("-"*60)

 is_upper = is_ascii & (is_ge_A ^ is_gt_Z)
 show(is_upper, "is_upper = is_ascii & (is_ge_A ^ is_gt_Z) -- (true only if is_ge_A=1 and is_gt_Z=0")
 to_lower = (is_upper >> 2) & (0x20 * all_bytes)
 show(to_lower, "to_lower = (is_upper >> 2) & (0x20 * all_bytes)")
 # show(((is_ascii >> 2) & ((is_ge_A >> 2) ^ (is_gt_Z >> 2)) & (0x20 * all_bytes)), "same result")
 show(octets, "octets")
 result = octets | to_lower
 show(result, "octets | to_lower")

 print("-"*60)

 show_ascii(result)

 print("-"*60)

 # another way
 is_ascii = ~octets & (0x80 * all_bytes)
 show(is_ascii, "is_ascii = ~octets & (0x80 * all_bytes)")
 is_upper = is_ascii & (is_ge_A ^ is_gt_Z)
 result = octets | is_upper >> 2
 show(result, "octets | is_upper >> 2")

 print("-"*60)

 show_ascii(result) # same result
	# This is based on the post "2022-06-27 – tolower() in bulk at speed" by Tony Finch
	# link: https://dotat.at/@/2022-06-27-tolower-swar.html

	## For me, it was quite hard to understand the post without some visual helps. So I made it.
	## You can run this with python3 in terminal and I hope this would help you, too.

	OCTETS = "ABSOLUTE"

	def show_ascii(s):
	bins = bin(s)[2:].zfill(64)
	for i in range(0, 64, 8):
	print(chr(int(bins[i:i+8], 2)), end="")
	print()

	OKGREEN = "\033[92m"
	ENDC = "\033[0m"
	def color(s):
	S = ""
	for ss in s:
	if (ss == "1"):
	S += OKGREEN + ss + ENDC
	else:
	S += ss
	return S

	def show(s, memo=""):
	S = bin(s)[2:].zfill(64)
	S = str(S)

	for i in range(0, 8):
	print(color(S[i8:i8+8]), end=" ")
	print(color(" # " + memo))

	def to_hex(s):
	return int(s.encode("utf-8").hex(), 16)

	all_bytes = 0x0101010101010101
	octets = to_hex(OCTETS)

	# show(all_bytes, "all_bytes")
	show(octets, "octets")

	steal_all_0x80 = 0x7F * all_bytes
	show(steal_all_0x80, "0x7F * all_bytes")

	heptets = octets & steal_all_0x80
	show(heptets, "heptets = octets & (0x7F * all_bytes)")

	print("-"*60)

	# show(0x7F, "0x7F")
	# show(to_hex('A'), "A")
	# show(to_hex('Z'), "Z")

	# print("-"*60)

	is_gt_Z = heptets + (0x7F - to_hex('Z')) * all_bytes
	show(is_gt_Z, "is_gt_Z = heptets + (0x7F - 'Z') * all_bytes -- (each first bit is true if heptet - 'Z' > 0)")

	is_ge_A = heptets + (0x80 - to_hex('A')) * all_bytes
	show(is_ge_A, "is_ge_A = heptets + (0x80 - 'A') * all_bytes -- (each first bit is true if heptet - 'A' >= 0)")
	print(" -- above two example works only when the original char is btw 0 ~ 127 (ascii)")

	print("-"*60)

	is_ascii = (0xFF * all_bytes) ^ octets # ~octets make something not expected, so use other way
	show(octets, "octets")
	show(is_ascii, "is_ascii = ~octets -- (each first bit is true if it's ascii char)")

	print("-"*60)

	is_upper = is_ascii & (is_ge_A ^ is_gt_Z)
	show(is_upper, "is_upper = is_ascii & (is_ge_A ^ is_gt_Z) -- (true only if is_ge_A=1 and is_gt_Z=0")
	to_lower = (is_upper >> 2) & (0x20 * all_bytes)
	show(to_lower, "to_lower = (is_upper >> 2) & (0x20 * all_bytes)")
	# show(((is_ascii >> 2) & ((is_ge_A >> 2) ^ (is_gt_Z >> 2)) & (0x20 * all_bytes)), "same result")
	show(octets, "octets")
	result = octets \| to_lower
	show(result, "octets \| to_lower")

	print("-"*60)

	show_ascii(result)

	print("-"*60)

	# another way
	is_ascii = ~octets & (0x80 * all_bytes)
	show(is_ascii, "is_ascii = ~octets & (0x80 * all_bytes)")
	is_upper = is_ascii & (is_ge_A ^ is_gt_Z)
	result = octets \| is_upper >> 2
	show(result, "octets \| is_upper >> 2")

	print("-"*60)

	show_ascii(result) # same result
No results found