itslukej · December 18, 2025 08:51
diff --git a/lib.rs b/lib.rs
 use cudarc::driver::{CudaDevice, LaunchAsync, LaunchConfig};
 use cudarc::nvrtc::compile_ptx;
 use data_encoding::HEXUPPER;
 use ring::digest::{Context, SHA256};
 use anyhow::{Result, anyhow};
 use std::panic;

 const CUDA_KERNEL: &str = r#"
 // SHA256 constants
 __constant__ unsigned int k[64] = {
    0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
    0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
    0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
    0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
    0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
    0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
    0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
    0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
 };

 __device__ unsigned int rotr(unsigned int x, unsigned int n) {
    return (x >> n) | (x << (32 - n));
 }

 __device__ void sha256_transform(unsigned int* state, const unsigned char* data) {
    unsigned int w[64];
    unsigned int a, b, c, d, e, f, g, h;
    unsigned int t1, t2;
    
    // Copy chunk into first 16 words of message schedule
    for (int i = 0; i < 16; i++) {
        w[i] = (data[i*4] << 24) | (data[i*4+1] << 16) | (data[i*4+2] << 8) | data[i*4+3];
    }
    
    // Extend the first 16 words into the remaining 48 words
    for (int i = 16; i < 64; i++) {
        unsigned int s0 = rotr(w[i-15], 7) ^ rotr(w[i-15], 18) ^ (w[i-15] >> 3);
        unsigned int s1 = rotr(w[i-2], 17) ^ rotr(w[i-2], 19) ^ (w[i-2] >> 10);
        w[i] = w[i-16] + s0 + w[i-7] + s1;
    }
    
    // Initialize working variables
    a = state[0]; b = state[1]; c = state[2]; d = state[3];
    e = state[4]; f = state[5]; g = state[6]; h = state[7];
    
    // Main loop
    for (int i = 0; i < 64; i++) {
        unsigned int S1 = rotr(e, 6) ^ rotr(e, 11) ^ rotr(e, 25);
        unsigned int ch = (e & f) ^ (~e & g);
        t1 = h + S1 + ch + k[i] + w[i];
        unsigned int S0 = rotr(a, 2) ^ rotr(a, 13) ^ rotr(a, 22);
        unsigned int maj = (a & b) ^ (a & c) ^ (b & c);
        t2 = S0 + maj;
        
        h = g; g = f; f = e; e = d + t1; d = c; c = b; b = a; a = t1 + t2;
    }
    
    // Add the compressed chunk to the current hash value
    state[0] += a; state[1] += b; state[2] += c; state[3] += d;
    state[4] += e; state[5] += f; state[6] += g; state[7] += h;
 }

 __device__ void sha256_init(unsigned int* state) {
    state[0] = 0x6a09e667; state[1] = 0xbb67ae85; state[2] = 0x3c6ef372; state[3] = 0xa54ff53a;
    state[4] = 0x510e527f; state[5] = 0x9b05688c; state[6] = 0x1f83d9ab; state[7] = 0x5be0cd19;
 }

 __device__ void sha256_update(unsigned int* state, const unsigned char* data, unsigned int len) {
    unsigned char buffer[64];
    unsigned int buffer_len = 0;
    
    // Copy data to buffer and pad
    for (unsigned int i = 0; i < len; i++) {
        buffer[buffer_len++] = data[i];
    }
    
    // Padding
    buffer[buffer_len++] = 0x80;
    
    // If we don't have enough space for length, process this block and start new one
    while (buffer_len % 64 != 56) {
        if (buffer_len >= 64) {
            sha256_transform(state, buffer);
            buffer_len = 0;
        } else {
            buffer[buffer_len++] = 0x00;
        }
    }
    
    // Append length in bits as 64-bit big-endian
    unsigned long long bit_len = len * 8;
    for (int i = 7; i >= 0; i--) {
        buffer[56 + (7 - i)] = (bit_len >> (i * 8)) & 0xff;
    }
    
    sha256_transform(state, buffer);
 }

 __device__ bool verify_nonce_gpu(const unsigned int* hash, const unsigned char* target, int len) {
    unsigned char hash_bytes[32];
    
    // Convert hash to bytes (big-endian)
    for (int i = 0; i < 8; i++) {
        hash_bytes[i*4] = (hash[i] >> 24) & 0xff;
        hash_bytes[i*4+1] = (hash[i] >> 16) & 0xff;
        hash_bytes[i*4+2] = (hash[i] >> 8) & 0xff;
        hash_bytes[i*4+3] = hash[i] & 0xff;
    }
    
    // Compare with target
    for (int i = 0; i < len; i++) {
        if (hash_bytes[i] > target[i]) {
            return false;
        } else if (hash_bytes[i] < target[i]) {
            return true;
        }
    }
    return true;
 }

 extern "C" __global__ void solve_nonce_kernel(
    const char* prefix,
    int prefix_len,
    const unsigned char* target,
    int target_len,
    unsigned long long start_nonce,
    unsigned long long* result,
    bool* found
 ) {
    unsigned long long nonce = start_nonce + blockIdx.x * blockDim.x + threadIdx.x;
    
    if (*found) return;
    
    // Convert nonce to string
    char nonce_str[32];
    int nonce_len = 0;
    unsigned long long temp = nonce;
    if (temp == 0) {
        nonce_str[0] = '0';
        nonce_len = 1;
    } else {
        while (temp > 0) {
            nonce_str[nonce_len++] = '0' + (temp % 10);
            temp /= 10;
        }
        // Reverse the string
        for (int i = 0; i < nonce_len / 2; i++) {
            char tmp = nonce_str[i];
            nonce_str[i] = nonce_str[nonce_len - 1 - i];
            nonce_str[nonce_len - 1 - i] = tmp;
        }
    }
    
    // Create input string: prefix + nonce
    char input[256];
    int input_len = 0;
    for (int i = 0; i < prefix_len; i++) {
        input[input_len++] = prefix[i];
    }
    for (int i = 0; i < nonce_len; i++) {
        input[input_len++] = nonce_str[i];
    }
    
    // Compute SHA256
    unsigned int hash_state[8];
    sha256_init(hash_state);
    sha256_update(hash_state, (unsigned char*)input, input_len);
    
    // Check if this nonce satisfies the target
    if (verify_nonce_gpu(hash_state, target, target_len)) {
        atomicExch((unsigned long long*)result, nonce);
        *found = true;
    }
 }
 "#;

 fn verify_nonce(result: &Vec<u8>, target: &Vec<u8>) -> bool {
    if result.len() != target.len() {
        return false;
    }

    for i in 0..(result.len() - 1) {
        if result[i] > target[i] {
            return false;
        } else if result[i] < target[i] {
            break;
        }
    }

    return true;
 }

 /// CPU-based proof-of-work solver. 
 /// 
 /// Searches for a nonce such that SHA256(prefix + nonce) <= target.
 /// This is the original algorithm and serves as a fallback when GPU is unavailable.
 /// 
 /// # Arguments
 /// 
 /// * `prefix` - String prefix to prepend to nonce
 /// * `target_hex` - Hex-encoded target difficulty (hash must be <= this value)
 /// 
 /// # Returns
 /// 
 /// The found nonce as a string
 pub fn solve_challenge_cpu(prefix: &str, target_hex: &str) -> String {
    let mut nonce = 0;
    let mut hashed;
    let target = HEXUPPER.decode(target_hex.as_bytes()).unwrap();

    loop {
        let mut context = Context::new(&SHA256);
        let input = format!("{}{}", prefix, nonce);
        context.update(input.as_bytes());
        hashed = context.finish().as_ref().to_vec();

        let result = verify_nonce(&hashed, &target);
        if result {
            break;
        } else {
            nonce += 1;
        }
    }

    nonce.to_string()
 }

 /// GPU-accelerated proof-of-work solver using CUDA.
 /// 
 /// Launches thousands of parallel threads to search for a nonce such that 
 /// SHA256(prefix + nonce) <= target. Can achieve 100x-1000x speedup over CPU.
 /// 
 /// # Arguments
 /// 
 /// * `prefix` - String prefix to prepend to nonce
 /// * `target_hex` - Hex-encoded target difficulty (hash must be <= this value)
 /// 
 /// # Returns
 /// 
 /// * `Ok(String)` - The found nonce as a string
 /// * `Err(anyhow::Error)` - GPU initialization or kernel execution error
 /// 
 /// # Requirements
 /// 
 /// * NVIDIA GPU with CUDA compute capability 3.5+
 /// * CUDA toolkit installed and in PATH
 /// * Sufficient GPU memory (typically works with 2GB+)
 pub fn solve_challenge_gpu(prefix: &str, target_hex: &str) -> Result<String> {
    // Safely try to initialize CUDA device, catching panics from missing libraries
    let device = panic::catch_unwind(|| {
        CudaDevice::new(0)
    }).map_err(|_| {
        anyhow!("CUDA initialization failed - likely missing CUDA libraries or no compatible GPU found")
    })??;
    
    // Compile the CUDA kernel - also catch panics here
    let ptx = panic::catch_unwind(|| {
        compile_ptx(CUDA_KERNEL)
    }).map_err(|_| {
        anyhow!("CUDA kernel compilation failed - NVRTC libraries not available")
    })??;
    
    device.load_ptx(ptx, "solve_nonce", &["solve_nonce_kernel"])?;
    
    let target = HEXUPPER.decode(target_hex.as_bytes()).unwrap();
    
    // GPU memory allocations
    let prefix_gpu = device.htod_copy(prefix.as_bytes().to_vec())?;
    let target_gpu = device.htod_copy(target.clone())?;
    let result_gpu = device.alloc_zeros::<u64>(1)?;
    let mut found_gpu = device.alloc_zeros::<bool>(1)?;
    
    let threads_per_block = 256;
    let blocks = 65536; // Start with many blocks
    let nonces_per_launch = threads_per_block * blocks;
    
    let mut start_nonce = 0u64;
    
    loop {
        // Launch kernel
        let cfg = LaunchConfig {
            grid_dim: (blocks, 1, 1),
            block_dim: (threads_per_block, 1, 1),
            shared_mem_bytes: 0,
        };
        
        // Reset found flag
        device.htod_copy_into(vec![false], &mut found_gpu)?;
        
        unsafe {
            let kernel = device.get_func("solve_nonce", "solve_nonce_kernel").unwrap();
            kernel.launch(
                cfg,
                (
                    &prefix_gpu,
                    prefix.len() as i32,
                    &target_gpu,
                    target.len() as i32,
                    start_nonce,
                    &result_gpu,
                    &found_gpu,
                ),
            )?;
        }
        
        // Check if solution was found
        let found: Vec<bool> = device.dtoh_sync_copy(&found_gpu)?;
        if found[0] {
            let result: Vec<u64> = device.dtoh_sync_copy(&result_gpu)?;
            return Ok(result[0].to_string());
        }
        
        start_nonce += nonces_per_launch as u64;
        
        // Print progress occasionally
        if start_nonce % (nonces_per_launch as u64 * 100) == 0 {
            println!("Searched {} nonces...", start_nonce);
        }
    }
 }

 #[cfg(test)]
 mod tests {
    use super::*;
    
    #[test]
    fn test_verify_nonce() {
        let result = vec![0x00, 0x00, 0xFF, 0xFF];
        let target = vec![0x00, 0x01, 0x00, 0x00];
        assert!(verify_nonce(&result, &target));
        
        let result = vec![0x00, 0x02, 0x00, 0x00];
        let target = vec![0x00, 0x01, 0xFF, 0xFF];
        assert!(!verify_nonce(&result, &target));
    }
    
    #[test]
    fn test_cpu_solver() {
        let prefix = "test";
        let target_hex = "0FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF";
        let nonce = solve_challenge_cpu(prefix, target_hex);
        
        // Verify the solution
        let mut context = Context::new(&SHA256);
        let input = format!("{}{}", prefix, nonce);
        context.update(input.as_bytes());
        let hash = context.finish().as_ref().to_vec();
        let target = HEXUPPER.decode(target_hex.as_bytes()).unwrap();
        
        assert!(verify_nonce(&hash, &target));
    }
	use cudarc::driver::{CudaDevice, LaunchAsync, LaunchConfig};
	use cudarc::nvrtc::compile_ptx;
	use data_encoding::HEXUPPER;
	use ring::digest::{Context, SHA256};
	use anyhow::{Result, anyhow};
	use std::panic;

	const CUDA_KERNEL: &str = r#"
	// SHA256 constants
	__constant__ unsigned int k[64] = {
	0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
	0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
	0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
	0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
	0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
	0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
	0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
	0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
	};

	__device__ unsigned int rotr(unsigned int x, unsigned int n) {
	return (x >> n) \| (x << (32 - n));
	}

	__device__ void sha256_transform(unsigned int* state, const unsigned char* data) {
	unsigned int w[64];
	unsigned int a, b, c, d, e, f, g, h;
	unsigned int t1, t2;

	// Copy chunk into first 16 words of message schedule
	for (int i = 0; i < 16; i++) {
	w[i] = (data[i4] << 24) \| (data[i4+1] << 16) \| (data[i4+2] << 8) \| data[i4+3];
	}

	// Extend the first 16 words into the remaining 48 words
	for (int i = 16; i < 64; i++) {
	unsigned int s0 = rotr(w[i-15], 7) ^ rotr(w[i-15], 18) ^ (w[i-15] >> 3);
	unsigned int s1 = rotr(w[i-2], 17) ^ rotr(w[i-2], 19) ^ (w[i-2] >> 10);
	w[i] = w[i-16] + s0 + w[i-7] + s1;
	}

	// Initialize working variables
	a = state[0]; b = state[1]; c = state[2]; d = state[3];
	e = state[4]; f = state[5]; g = state[6]; h = state[7];

	// Main loop
	for (int i = 0; i < 64; i++) {
	unsigned int S1 = rotr(e, 6) ^ rotr(e, 11) ^ rotr(e, 25);
	unsigned int ch = (e & f) ^ (~e & g);
	t1 = h + S1 + ch + k[i] + w[i];
	unsigned int S0 = rotr(a, 2) ^ rotr(a, 13) ^ rotr(a, 22);
	unsigned int maj = (a & b) ^ (a & c) ^ (b & c);
	t2 = S0 + maj;

	h = g; g = f; f = e; e = d + t1; d = c; c = b; b = a; a = t1 + t2;
	}

	// Add the compressed chunk to the current hash value
	state[0] += a; state[1] += b; state[2] += c; state[3] += d;
	state[4] += e; state[5] += f; state[6] += g; state[7] += h;
	}

	__device__ void sha256_init(unsigned int* state) {
	state[0] = 0x6a09e667; state[1] = 0xbb67ae85; state[2] = 0x3c6ef372; state[3] = 0xa54ff53a;
	state[4] = 0x510e527f; state[5] = 0x9b05688c; state[6] = 0x1f83d9ab; state[7] = 0x5be0cd19;
	}

	__device__ void sha256_update(unsigned int* state, const unsigned char* data, unsigned int len) {
	unsigned char buffer[64];
	unsigned int buffer_len = 0;

	// Copy data to buffer and pad
	for (unsigned int i = 0; i < len; i++) {
	buffer[buffer_len++] = data[i];
	}

	// Padding
	buffer[buffer_len++] = 0x80;

	// If we don't have enough space for length, process this block and start new one
	while (buffer_len % 64 != 56) {
	if (buffer_len >= 64) {
	sha256_transform(state, buffer);
	buffer_len = 0;
	} else {
	buffer[buffer_len++] = 0x00;
	}
	}

	// Append length in bits as 64-bit big-endian
	unsigned long long bit_len = len * 8;
	for (int i = 7; i >= 0; i--) {
	buffer[56 + (7 - i)] = (bit_len >> (i * 8)) & 0xff;
	}

	sha256_transform(state, buffer);
	}

	__device__ bool verify_nonce_gpu(const unsigned int* hash, const unsigned char* target, int len) {
	unsigned char hash_bytes[32];

	// Convert hash to bytes (big-endian)
	for (int i = 0; i < 8; i++) {
	hash_bytes[i*4] = (hash[i] >> 24) & 0xff;
	hash_bytes[i*4+1] = (hash[i] >> 16) & 0xff;
	hash_bytes[i*4+2] = (hash[i] >> 8) & 0xff;
	hash_bytes[i*4+3] = hash[i] & 0xff;
	}

	// Compare with target
	for (int i = 0; i < len; i++) {
	if (hash_bytes[i] > target[i]) {
	return false;
	} else if (hash_bytes[i] < target[i]) {
	return true;
	}
	}
	return true;
	}

	extern "C" __global__ void solve_nonce_kernel(
	const char* prefix,
	int prefix_len,
	const unsigned char* target,
	int target_len,
	unsigned long long start_nonce,
	unsigned long long* result,
	bool* found
	) {
	unsigned long long nonce = start_nonce + blockIdx.x * blockDim.x + threadIdx.x;

	if (*found) return;

	// Convert nonce to string
	char nonce_str[32];
	int nonce_len = 0;
	unsigned long long temp = nonce;
	if (temp == 0) {
	nonce_str[0] = '0';
	nonce_len = 1;
	} else {
	while (temp > 0) {
	nonce_str[nonce_len++] = '0' + (temp % 10);
	temp /= 10;
	}
	// Reverse the string
	for (int i = 0; i < nonce_len / 2; i++) {
	char tmp = nonce_str[i];
	nonce_str[i] = nonce_str[nonce_len - 1 - i];
	nonce_str[nonce_len - 1 - i] = tmp;
	}
	}

	// Create input string: prefix + nonce
	char input[256];
	int input_len = 0;
	for (int i = 0; i < prefix_len; i++) {
	input[input_len++] = prefix[i];
	}
	for (int i = 0; i < nonce_len; i++) {
	input[input_len++] = nonce_str[i];
	}

	// Compute SHA256
	unsigned int hash_state[8];
	sha256_init(hash_state);
	sha256_update(hash_state, (unsigned char*)input, input_len);

	// Check if this nonce satisfies the target
	if (verify_nonce_gpu(hash_state, target, target_len)) {
	atomicExch((unsigned long long*)result, nonce);
	*found = true;
	}
	}
	"#;

	fn verify_nonce(result: &Vec<u8>, target: &Vec<u8>) -> bool {
	if result.len() != target.len() {
	return false;
	}

	for i in 0..(result.len() - 1) {
	if result[i] > target[i] {
	return false;
	} else if result[i] < target[i] {
	break;
	}
	}

	return true;
	}

	/// CPU-based proof-of-work solver.
	///
	/// Searches for a nonce such that SHA256(prefix + nonce) <= target.
	/// This is the original algorithm and serves as a fallback when GPU is unavailable.
	///
	/// # Arguments
	///
	/// * `prefix` - String prefix to prepend to nonce
	/// * `target_hex` - Hex-encoded target difficulty (hash must be <= this value)
	///
	/// # Returns
	///
	/// The found nonce as a string
	pub fn solve_challenge_cpu(prefix: &str, target_hex: &str) -> String {
	let mut nonce = 0;
	let mut hashed;
	let target = HEXUPPER.decode(target_hex.as_bytes()).unwrap();

	loop {
	let mut context = Context::new(&SHA256);
	let input = format!("{}{}", prefix, nonce);
	context.update(input.as_bytes());
	hashed = context.finish().as_ref().to_vec();

	let result = verify_nonce(&hashed, &target);
	if result {
	break;
	} else {
	nonce += 1;
	}
	}

	nonce.to_string()
	}

	/// GPU-accelerated proof-of-work solver using CUDA.
	///
	/// Launches thousands of parallel threads to search for a nonce such that
	/// SHA256(prefix + nonce) <= target. Can achieve 100x-1000x speedup over CPU.
	///
	/// # Arguments
	///
	/// * `prefix` - String prefix to prepend to nonce
	/// * `target_hex` - Hex-encoded target difficulty (hash must be <= this value)
	///
	/// # Returns
	///
	/// * `Ok(String)` - The found nonce as a string
	/// * `Err(anyhow::Error)` - GPU initialization or kernel execution error
	///
	/// # Requirements
	///
	/// * NVIDIA GPU with CUDA compute capability 3.5+
	/// * CUDA toolkit installed and in PATH
	/// * Sufficient GPU memory (typically works with 2GB+)
	pub fn solve_challenge_gpu(prefix: &str, target_hex: &str) -> Result<String> {
	// Safely try to initialize CUDA device, catching panics from missing libraries
	let device = panic::catch_unwind(\|\| {
	CudaDevice::new(0)
	}).map_err(\|_\| {
	anyhow!("CUDA initialization failed - likely missing CUDA libraries or no compatible GPU found")
	})??;

	// Compile the CUDA kernel - also catch panics here
	let ptx = panic::catch_unwind(\|\| {
	compile_ptx(CUDA_KERNEL)
	}).map_err(\|_\| {
	anyhow!("CUDA kernel compilation failed - NVRTC libraries not available")
	})??;

	device.load_ptx(ptx, "solve_nonce", &["solve_nonce_kernel"])?;

	let target = HEXUPPER.decode(target_hex.as_bytes()).unwrap();

	// GPU memory allocations
	let prefix_gpu = device.htod_copy(prefix.as_bytes().to_vec())?;
	let target_gpu = device.htod_copy(target.clone())?;
	let result_gpu = device.alloc_zeros::<u64>(1)?;
	let mut found_gpu = device.alloc_zeros::<bool>(1)?;

	let threads_per_block = 256;
	let blocks = 65536; // Start with many blocks
	let nonces_per_launch = threads_per_block * blocks;

	let mut start_nonce = 0u64;

	loop {
	// Launch kernel
	let cfg = LaunchConfig {
	grid_dim: (blocks, 1, 1),
	block_dim: (threads_per_block, 1, 1),
	shared_mem_bytes: 0,
	};

	// Reset found flag
	device.htod_copy_into(vec![false], &mut found_gpu)?;

	unsafe {
	let kernel = device.get_func("solve_nonce", "solve_nonce_kernel").unwrap();
	kernel.launch(
	cfg,
	(
	&prefix_gpu,
	prefix.len() as i32,
	&target_gpu,
	target.len() as i32,
	start_nonce,
	&result_gpu,
	&found_gpu,
	),
	)?;
	}

	// Check if solution was found
	let found: Vec<bool> = device.dtoh_sync_copy(&found_gpu)?;
	if found[0] {
	let result: Vec<u64> = device.dtoh_sync_copy(&result_gpu)?;
	return Ok(result[0].to_string());
	}

	start_nonce += nonces_per_launch as u64;

	// Print progress occasionally
	if start_nonce % (nonces_per_launch as u64 * 100) == 0 {
	println!("Searched {} nonces...", start_nonce);
	}
	}
	}

	#[cfg(test)]
	mod tests {
	use super::*;

	#[test]
	fn test_verify_nonce() {
	let result = vec![0x00, 0x00, 0xFF, 0xFF];
	let target = vec![0x00, 0x01, 0x00, 0x00];
	assert!(verify_nonce(&result, &target));

	let result = vec![0x00, 0x02, 0x00, 0x00];
	let target = vec![0x00, 0x01, 0xFF, 0xFF];
	assert!(!verify_nonce(&result, &target));
	}

	#[test]
	fn test_cpu_solver() {
	let prefix = "test";
	let target_hex = "0FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF";
	let nonce = solve_challenge_cpu(prefix, target_hex);

	// Verify the solution
	let mut context = Context::new(&SHA256);
	let input = format!("{}{}", prefix, nonce);
	context.update(input.as_bytes());
	let hash = context.finish().as_ref().to_vec();
	let target = HEXUPPER.decode(target_hex.as_bytes()).unwrap();

	assert!(verify_nonce(&hash, &target));
	}
No results found