Last active
February 24, 2026 03:16
-
-
Save rygorous/563e09067e203ac329d26419eaffd996 to your computer and use it in GitHub Desktop.
Generate all 3-input ternary logic ops on bits via ARM instructions. (SHA3 ext ops optional)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| 0x00 0 cost=10 | |
| 0x01 BCAX(A, 0xf5, 0x0c) cost=30 | |
| 0x02 BIC(0x0a, B) cost=20 | |
| 0x03 EOR(A, 0xf3) cost=20 | |
| 0x04 BIC(B, 0xfa) cost=20 | |
| 0x05 EOR(A, 0xf5) cost=20 | |
| 0x06 BCAX(0x0a, B, A) cost=20 | |
| 0x07 EOR(A, 0xf7) cost=30 | |
| 0x08 BCAX(B, B, 0x0a) cost=20 | |
| 0x09 EOR3(A, 0x0a, 0xf3) cost=30 | |
| 0x0a BIC(C, A) cost=10 | |
| 0x0b BIC(0xbb, A) cost=20 | |
| 0x0c BIC(B, A) cost=10 | |
| 0x0d BIC(0xdd, A) cost=20 | |
| 0x0e BIC(0xee, A) cost=20 | |
| 0x0f NOT(A) cost=10 | |
| 0x10 BIC(A, 0xee) cost=20 | |
| 0x11 EOR(B, 0xdd) cost=20 | |
| 0x12 BCAX(0x22, A, B) cost=20 | |
| 0x13 BCAX(0x22, 0xf5, B) cost=30 | |
| 0x14 BIC(0x3c, C) cost=20 | |
| 0x15 BCAX(0x44, 0xf3, C) cost=30 | |
| 0x16 EOR3(A, C, 0x4c) cost=30 | |
| 0x17 EOR(0xaf, 0xb8) cost=31 | |
| 0x18 BCAX(0x0a, 0x5a, B) cost=30 | |
| 0x19 BCAX(B, 0xdd, 0x0a) cost=30 | |
| 0x1a EOR3(A, C, 0x40) cost=30 | |
| 0x1b NOT(0xe4) cost=21 | |
| 0x1c BCAX(B, A, 0x22) cost=20 | |
| 0x1d NOT(0xe2) cost=21 | |
| 0x1e EOR3(B, A, 0x22) cost=20 | |
| 0x1f EOR(A, 0xef) cost=30 | |
| 0x20 BCAX(A, A, 0x22) cost=20 | |
| 0x21 EOR3(A, 0x22, 0xf3) cost=30 | |
| 0x22 BIC(C, B) cost=10 | |
| 0x23 BIC(0xaf, B) cost=20 | |
| 0x24 BCAX(0x0c, C, 0x96) cost=30 | |
| 0x25 BCAX(A, 0xf5, 0x22) cost=30 | |
| 0x26 BCAX(0x0c, C, 0xc0) cost=30 | |
| 0x27 NOT(0xd8) cost=21 | |
| 0x28 BCAX(0x0a, C, B) cost=20 | |
| 0x29 BCAX(0x0a, 0xaf, B) cost=30 | |
| 0x2a BSL(B, 0x0a, C) cost=21 | |
| 0x2b BCAX(0xaf, B, 0x5a) cost=30 | |
| 0x2c BSL(A, 0x22, B) cost=21 | |
| 0x2d EOR(A, 0xdd) cost=20 | |
| 0x2e BCAX(0x0c, C, B) cost=20 | |
| 0x2f ORN(0x22, A) cost=20 | |
| 0x30 BIC(A, B) cost=10 | |
| 0x31 BIC(0xf5, B) cost=20 | |
| 0x32 BIC(0xfa, B) cost=20 | |
| 0x33 NOT(B) cost=10 | |
| 0x34 BCAX(A, B, 0x0a) cost=20 | |
| 0x35 NOT(0xca) cost=21 | |
| 0x36 EOR3(B, A, 0x0a) cost=20 | |
| 0x37 EOR(A, 0xc7) cost=30 | |
| 0x38 BSL(B, 0x0a, A) cost=21 | |
| 0x39 BCAX(0x33, C, A) cost=20 | |
| 0x3a BCAX(0x0a, A, B) cost=20 | |
| 0x3b ORN(0x0a, B) cost=20 | |
| 0x3c EOR(B, A) cost=10 | |
| 0x3d BCAX(A, 0xdd, 0x30) cost=30 | |
| 0x3e BCAX(A, 0xee, 0x30) cost=30 | |
| 0x3f EOR(A, 0xcf) cost=20 | |
| 0x40 BIC(B, 0xaf) cost=20 | |
| 0x41 EOR3(B, 0xaf, 0x22) cost=30 | |
| 0x42 BCAX(0x0a, B, 0x96) cost=30 | |
| 0x43 BCAX(A, 0xbb, 0x0c) cost=30 | |
| 0x44 BIC(B, C) cost=10 | |
| 0x45 BIC(0xcf, C) cost=20 | |
| 0x46 BCAX(B, 0x9a, 0x55) cost=30 | |
| 0x47 NOT(0xb8) cost=21 | |
| 0x48 BCAX(B, B, 0x5a) cost=20 | |
| 0x49 BCAX(B, 0xdd, 0x5a) cost=30 | |
| 0x4a BSL(A, 0x44, C) cost=21 | |
| 0x4b EOR(A, 0xbb) cost=20 | |
| 0x4c BIC(B, 0xa0) cost=20 | |
| 0x4d BCAX(0xcf, C, 0x3c) cost=30 | |
| 0x4e BCAX(0x0a, B, C) cost=20 | |
| 0x4f ORN(0x44, A) cost=20 | |
| 0x50 BIC(A, C) cost=10 | |
| 0x51 BIC(0xf3, C) cost=20 | |
| 0x52 BCAX(A, C, 0x0c) cost=20 | |
| 0x53 NOT(0xac) cost=21 | |
| 0x54 BIC(0xfc, C) cost=20 | |
| 0x55 NOT(C) cost=10 | |
| 0x56 EOR(B, 0x9a) cost=20 | |
| 0x57 BCAX(0x55, C, 0xfc) cost=30 | |
| 0x58 BCAX(B, 0x96, 0x0a) cost=30 | |
| 0x59 BCAX(0x55, B, A) cost=20 | |
| 0x5a EOR(C, A) cost=10 | |
| 0x5b BCAX(A, 0xaf, 0x44) cost=30 | |
| 0x5c BCAX(B, A, 0x66) cost=20 | |
| 0x5d ORN(0x0c, C) cost=20 | |
| 0x5e BCAX(B, 0x96, 0x44) cost=30 | |
| 0x5f EOR(A, 0xaf) cost=20 | |
| 0x60 BCAX(A, A, 0x66) cost=20 | |
| 0x61 BCAX(A, 0xf5, 0x66) cost=30 | |
| 0x62 BCAX(A, 0x96, 0x44) cost=30 | |
| 0x63 EOR(B, 0xaf) cost=20 | |
| 0x64 BCAX(A, 0x96, 0x0a) cost=30 | |
| 0x65 BCAX(0x55, A, B) cost=20 | |
| 0x66 EOR(C, B) cost=10 | |
| 0x67 BCAX(B, 0xaf, 0x44) cost=30 | |
| 0x68 BCAX(B, 0xee, 0x5a) cost=30 | |
| 0x69 EOR3(B, A, 0x55) cost=20 | |
| 0x6a EOR(B, 0xa6) cost=20 | |
| 0x6b ORN(0x0a, 0x96) cost=30 | |
| 0x6c BCAX(B, A, 0x55) cost=20 | |
| 0x6d BCAX(B, 0xbb, 0x5a) cost=30 | |
| 0x6e BCAX(B, 0xa6, 0x55) cost=30 | |
| 0x6f ORN(0x66, A) cost=20 | |
| 0x70 BIC(A, 0x88) cost=20 | |
| 0x71 BCAX(0xf3, C, 0x3c) cost=30 | |
| 0x72 BCAX(A, C, 0x3c) cost=20 | |
| 0x73 ORN(0x50, B) cost=20 | |
| 0x74 BCAX(A, B, 0x5a) cost=20 | |
| 0x75 ORN(0x30, C) cost=20 | |
| 0x76 BCAX(B, 0xfa, 0x44) cost=30 | |
| 0x77 EOR(B, 0xbb) cost=20 | |
| 0x78 EOR3(B, 0x44, A) cost=20 | |
| 0x79 BCAX(A, 0xaf, 0x66) cost=30 | |
| 0x7a BCAX(A, C, 0x30) cost=20 | |
| 0x7b ORN(0x5a, B) cost=20 | |
| 0x7c BCAX(A, B, 0x50) cost=30 | |
| 0x7d ORN(0x3c, C) cost=20 | |
| 0x7e ORR(0x3c, 0x5a) cost=30 | |
| 0x7f BCAX(A, 0xaf, 0x30) cost=30 | |
| 0x80 BCAX(B, B, 0xa0) cost=20 | |
| 0x81 BCAX(0x8b, C, A) cost=30 | |
| 0x82 BCAX(0x88, C, A) cost=20 | |
| 0x83 BCAX(A, 0xf3, 0x88) cost=30 | |
| 0x84 BCAX(B, B, 0x96) cost=20 | |
| 0x85 BCAX(B, 0xdd, 0x96) cost=30 | |
| 0x86 EOR3(A, C, 0xdc) cost=30 | |
| 0x87 EOR3(B, A, 0xbb) cost=20 | |
| 0x88 AND(C, B) cost=10 | |
| 0x89 BCAX(B, 0x55, 0x9a) cost=30 | |
| 0x8a AND(0x9a, C) cost=20 | |
| 0x8b BCAX(0xbb, A, B) cost=20 | |
| 0x8c BCAX(B, A, 0xbb) cost=20 | |
| 0x8d BCAX(0xaf, C, B) cost=20 | |
| 0x8e BSL(0x3c, B, C) cost=21 | |
| 0x8f ORN(0x88, A) cost=20 | |
| 0x90 BCAX(A, A, 0x96) cost=20 | |
| 0x91 BCAX(B, 0xdd, 0xa0) cost=30 | |
| 0x92 EOR3(A, C, 0xc8) cost=30 | |
| 0x93 EOR3(B, A, 0xaf) cost=20 | |
| 0x94 BCAX(B, 0x5a, 0x22) cost=30 | |
| 0x95 EOR3(A, C, 0xcf) cost=20 | |
| 0x96 EOR3(C, A, B) cost=10 | |
| 0x97 EOR3(A, C, 0xcd) cost=30 | |
| 0x98 BSL(B, C, 0xd8) cost=22 | |
| 0x99 EOR3(C, 1, B) cost=20 | |
| 0x9a BCAX(C, A, B) cost=10 | |
| 0x9b EOR3(A, C, 0xc1) cost=30 | |
| 0x9c BCAX(B, A, C) cost=10 | |
| 0x9d BCAX(B, 0x55, 0xa6) cost=30 | |
| 0x9e BCAX(B, 0x5a, 0x88) cost=30 | |
| 0x9f ORN(0x96, A) cost=20 | |
| 0xa0 AND(C, A) cost=10 | |
| 0xa1 BCAX(A, 0xf3, C) cost=20 | |
| 0xa2 AND(0xa6, C) cost=20 | |
| 0xa3 BCAX(0xaf, B, A) cost=20 | |
| 0xa4 BCAX(A, 0xfc, C) cost=20 | |
| 0xa5 EOR3(C, 1, A) cost=20 | |
| 0xa6 BCAX(C, B, A) cost=10 | |
| 0xa7 EOR3(A, C, 0xfd) cost=30 | |
| 0xa8 AND(0xfc, C) cost=20 | |
| 0xa9 EOR3(A, C, 0xf3) cost=20 | |
| 0xaa C cost=0 | |
| 0xab BSL(B, C, 0xaf) cost=21 | |
| 0xac BSL(A, C, B) cost=11 | |
| 0xad BSL(A, C, 0xdd) cost=21 | |
| 0xae ORR(0x0c, C) cost=20 | |
| 0xaf ORN(C, A) cost=10 | |
| 0xb0 BCAX(A, A, 0xbb) cost=20 | |
| 0xb1 BCAX(0xbb, C, A) cost=20 | |
| 0xb2 BSL(0x3c, A, C) cost=21 | |
| 0xb3 ORN(0xa0, B) cost=20 | |
| 0xb4 BCAX(A, B, C) cost=10 | |
| 0xb5 BCAX(A, 0xcf, C) cost=20 | |
| 0xb6 EOR3(A, C, 0xec) cost=30 | |
| 0xb7 ORN(0x96, B) cost=20 | |
| 0xb8 BSL(B, C, A) cost=11 | |
| 0xb9 BCAX(A, 0xdd, 0x96) cost=30 | |
| 0xba ORR(0x30, C) cost=20 | |
| 0xbb ORN(C, B) cost=10 | |
| 0xbc BCAX(B, A, 0x88) cost=20 | |
| 0xbd ORN(0x3c, 0x5a) cost=30 | |
| 0xbe ORR(0x3c, C) cost=20 | |
| 0xbf ORN(0x9c, B) cost=20 | |
| 0xc0 AND(B, A) cost=10 | |
| 0xc1 BCAX(B, 0xdd, A) cost=20 | |
| 0xc2 BCAX(B, 0xee, A) cost=20 | |
| 0xc3 EOR3(B, 1, A) cost=20 | |
| 0xc4 BCAX(B, C, 0xa6) cost=20 | |
| 0xc5 BCAX(0xcf, C, A) cost=20 | |
| 0xc6 BCAX(B, C, A) cost=10 | |
| 0xc7 BCAX(B, 0xbb, A) cost=20 | |
| 0xc8 BCAX(B, B, 0xfa) cost=20 | |
| 0xc9 EOR3(B, C, 0xaf) cost=20 | |
| 0xca BSL(A, B, C) cost=11 | |
| 0xcb BSL(A, B, 0xbb) cost=21 | |
| 0xcc B cost=0 | |
| 0xcd ORN(B, 0xfa) cost=20 | |
| 0xce ORR(B, 0x0a) cost=20 | |
| 0xcf ORN(B, A) cost=10 | |
| 0xd0 BCAX(A, A, 0xdd) cost=20 | |
| 0xd1 BCAX(0xdd, B, A) cost=20 | |
| 0xd2 BCAX(A, C, B) cost=10 | |
| 0xd3 BCAX(A, 0xaf, B) cost=20 | |
| 0xd4 BSL(0x5a, A, B) cost=21 | |
| 0xd5 ORN(0xc0, C) cost=20 | |
| 0xd6 BCAX(B, 0x5a, 0x44) cost=30 | |
| 0xd7 ORN(0x96, C) cost=20 | |
| 0xd8 BSL(C, B, A) cost=11 | |
| 0xd9 BCAX(A, 0xaf, 0x96) cost=30 | |
| 0xda BCAX(A, C, 0xc0) cost=20 | |
| 0xdb ORN(0x5a, 0x66) cost=30 | |
| 0xdc BCAX(B, A, 0xee) cost=20 | |
| 0xdd ORN(B, C) cost=10 | |
| 0xde ORR(B, 0x5a) cost=20 | |
| 0xdf ORN(B, 0xa0) cost=20 | |
| 0xe0 BCAX(A, A, 0xee) cost=20 | |
| 0xe1 EOR3(B, 0xdd, A) cost=20 | |
| 0xe2 BSL(B, A, C) cost=11 | |
| 0xe3 BSL(B, A, 0xaf) cost=21 | |
| 0xe4 BSL(C, A, B) cost=11 | |
| 0xe5 BCAX(B, 0xaf, 0x96) cost=30 | |
| 0xe6 BSL(B, 0xe4, C) cost=22 | |
| 0xe7 ORN(0x66, 0x5a) cost=30 | |
| 0xe8 BSL(0x3c, C, A) cost=21 | |
| 0xe9 BCAX(B, 0xaf, 0x9a) cost=30 | |
| 0xea ORR(0xc0, C) cost=20 | |
| 0xeb BCAX(0xaf, B, C) cost=20 | |
| 0xec BCAX(B, C, 0x9a) cost=20 | |
| 0xed ORN(B, 0x5a) cost=20 | |
| 0xee ORR(C, B) cost=10 | |
| 0xef ORR(B, 0xaf) cost=20 | |
| 0xf0 A cost=0 | |
| 0xf1 ORN(A, 0xee) cost=20 | |
| 0xf2 BCAX(A, C, 0xfc) cost=20 | |
| 0xf3 ORN(A, B) cost=10 | |
| 0xf4 BCAX(A, B, 0xfa) cost=20 | |
| 0xf5 ORN(A, C) cost=10 | |
| 0xf6 ORR(A, 0x66) cost=20 | |
| 0xf7 ORN(A, 0x88) cost=20 | |
| 0xf8 BCAX(A, B, 0xc6) cost=20 | |
| 0xf9 ORN(A, 0x66) cost=20 | |
| 0xfa ORR(C, A) cost=10 | |
| 0xfb ORN(A, 0x44) cost=20 | |
| 0xfc ORR(B, A) cost=10 | |
| 0xfd ORN(B, 0x0a) cost=20 | |
| 0xfe ORR(B, 0xfa) cost=20 | |
| 0xff 1 cost=10 | |
| max_cost=31 avg_cost=20.9 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import heapq | |
| if __name__ == '__main__': | |
| ops = 256 * [None] | |
| worklist = [] | |
| filled = [] | |
| def add_seed(tt, cost, desc): | |
| assert ops[tt] is None | |
| ops[tt] = (cost, desc, None, None, None) | |
| heapq.heappush(worklist, (cost, tt)) | |
| filled.append(tt) | |
| def add_tt(tt, desc, cost, A, B=None, C=None): | |
| if ops[tt] is not None: | |
| if ops[tt][0] <= cost: | |
| return | |
| # leave existing stale entry in worklist around, we'll discover it later | |
| else: | |
| filled.append(tt) | |
| ops[tt] = (cost, desc, A, B, C) | |
| heapq.heappush(worklist, (cost, tt)) | |
| # Add seed entries | |
| add_seed(0b0000_0000, 10, '0') # count as single mov | |
| add_seed(0b1111_1111, 10, '1') # count as single mov | |
| add_seed(0b1111_0000, 0, 'A') # count as free | |
| add_seed(0b1100_1100, 0, 'B') # count as free | |
| add_seed(0b1010_1010, 0, 'C') # count as free | |
| SHA3_OPS = True | |
| # Build other functions inductively | |
| while len(worklist) > 0: | |
| a_cost, a_tt = heapq.heappop(worklist) | |
| # skip over stale entries with old prio | |
| if ops[a_tt][0] < a_cost: | |
| continue | |
| cost1 = a_cost + 10 # we're adding a new insn | |
| initial_filled = len(filled) | |
| # NOT | |
| add_tt(a_tt ^ 0xff, 'NOT', cost1, a_tt) | |
| # fill out all binary before any ternary so they get the slot when viable | |
| for b_tt in filled[:initial_filled]: | |
| cost2 = cost1 + ops[b_tt][0] | |
| add_tt(a_tt | b_tt, 'ORR', cost2, a_tt, b_tt) | |
| add_tt(a_tt & b_tt, 'AND', cost2, a_tt, b_tt) | |
| add_tt(a_tt | (b_tt ^ 0xff), 'ORN', cost2, a_tt, b_tt) | |
| add_tt(a_tt & ~b_tt, 'BIC', cost2, a_tt, b_tt) | |
| add_tt(a_tt ^ b_tt, 'EOR', cost2, a_tt, b_tt) | |
| # ternary ops | |
| for c_tt in filled[:initial_filled]: | |
| cost3 = cost2 + ops[c_tt][0] | |
| add_tt((a_tt & b_tt) | (~a_tt & c_tt), 'BSL', cost3 + 1, a_tt, b_tt, c_tt) # +1 cost penalty because BSLs have reg alloc limitations | |
| if SHA3_OPS: | |
| add_tt(a_tt ^ b_tt ^ c_tt, 'EOR3', cost3, a_tt, b_tt, c_tt) | |
| add_tt(a_tt ^ (b_tt & ~c_tt), 'BCAX', cost3, a_tt, b_tt, c_tt) | |
| max_cost = 0 | |
| sum_costs = 0 | |
| def format_op(A): | |
| if A is None: | |
| return '' | |
| if ops[A] is not None: | |
| desc = ops[A][1] | |
| if len(desc) == 1: | |
| return desc | |
| return f'0x{A:02x}' | |
| for i, entry in enumerate(ops): | |
| if entry is None: | |
| continue | |
| cost, desc, A, B, C = entry | |
| Aop = format_op(A) | |
| Bop = format_op(B) | |
| Cop = format_op(C) | |
| if C is not None: | |
| opdesc = f'{desc}({Aop}, {Bop}, {Cop})' | |
| elif B is not None: | |
| opdesc = f'{desc}({Aop}, {Bop})' | |
| elif A is not None: | |
| opdesc = f'{desc}({Aop})' | |
| else: | |
| opdesc = desc | |
| print(f'0x{i:02x} {opdesc} cost={cost}') | |
| sum_costs += cost | |
| max_cost = max(max_cost, cost) | |
| print(f'max_cost={max_cost} avg_cost={sum_costs/256:.1f}') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment