EvilFreelancer · November 28, 2024 12:07 · EvilFreelancer · Nov 28, 2024
diff --git a/qwq-32b-thinking.md b/qwq-32b-thinking.md
diff --git a/sa_encoding.py b/sa_encoding.py
 import threading

 # Dummy function to simulate attention computation
 def compute_attention(tokens):
    return [(token, token * 2) for token in tokens]

 def process_blocks(blocks, start_index, end_index, result_list):
    for i in range(start_index, end_index):
        # Simulate processing: compute attention on the block and discard initial elements
        kv_pairs = compute_attention(blocks[i])
        # Assuming we need to discard the first b KV pairs, but for simplicity, taking all
        result_list.extend(kv_pairs)

 def main():
    # Sample data blocks
    blocks = [
        [1, 2, 3, 4, 5],
        [6, 7, 8, 9, 10],
        [11, 12, 13, 14, 15],
        [16, 17, 18, 19, 20]
    ]
    
    num_hosts = 2
    threads = []
    host_kvs = [[] for _ in range(num_hosts)]
    
    # Divide blocks among hosts
    blocks_per_host = len(blocks) // num_hosts
    
    for h in range(num_hosts):
        start_index = h * blocks_per_host
        end_index = start_index + blocks_per_host
        if h == num_hosts - 1:
            end_index = len(blocks)  # Assign remaining blocks to the last host
        thread = threading.Thread(target=process_blocks, args=(blocks, start_index, end_index, host_kvs[h]))
        threads.append(thread)
        thread.start()
    
    # Wait for all threads to finish
    for thread in threads:
        thread.join()
    
    # Combine results from all hosts
    all_kvs = []
    for host_kv in host_kvs:
        all_kvs.extend(host_kv)
    
    print(all_kvs)

 if __name__ == "__main__":
    main()
	import threading

	# Dummy function to simulate attention computation
	def compute_attention(tokens):
	return [(token, token * 2) for token in tokens]

	def process_blocks(blocks, start_index, end_index, result_list):
	for i in range(start_index, end_index):
	# Simulate processing: compute attention on the block and discard initial elements
	kv_pairs = compute_attention(blocks[i])
	# Assuming we need to discard the first b KV pairs, but for simplicity, taking all
	result_list.extend(kv_pairs)

	def main():
	# Sample data blocks
	blocks = [
	[1, 2, 3, 4, 5],
	[6, 7, 8, 9, 10],
	[11, 12, 13, 14, 15],
	[16, 17, 18, 19, 20]
	]

	num_hosts = 2
	threads = []
	host_kvs = [[] for _ in range(num_hosts)]

	# Divide blocks among hosts
	blocks_per_host = len(blocks) // num_hosts

	for h in range(num_hosts):
	start_index = h * blocks_per_host
	end_index = start_index + blocks_per_host
	if h == num_hosts - 1:
	end_index = len(blocks) # Assign remaining blocks to the last host
	thread = threading.Thread(target=process_blocks, args=(blocks, start_index, end_index, host_kvs[h]))
	threads.append(thread)
	thread.start()

	# Wait for all threads to finish
	for thread in threads:
	thread.join()

	# Combine results from all hosts
	all_kvs = []
	for host_kv in host_kvs:
	all_kvs.extend(host_kv)

	print(all_kvs)

	if __name__ == "__main__":
	main()