mimoo · July 13, 2017 14:57
diff --git a/RDRAND.c b/RDRAND.c
 // This is a quick analysis of Linux's use of RDRAND. All double-slash (//)
 // style comments are my own, all slash-star (/*) style comments are from the
 // original code. This was written by Taylor Hornby (@DefuseSec).

 // This is part of the drivers/char/random.c file.
 // I have re-ordered the procedures for clarity. Everything inside them (except
 // comments) is exactly as you will find it in linux-3.11.tar.xz

 // This comment says it 'does not use' the hardware RNG. It actually does.
 /*
 * This function is the exported kernel interface.  It returns some
 * number of good random numbers, suitable for key generation, seeding
 * TCP sequence numbers, etc.  It does not use the hw random number
 * generator, if available; use get_random_bytes_arch() for that.
 */
 void get_random_bytes(void *buf, int nbytes)
 {
 	extract_entropy(&nonblocking_pool, buf, nbytes, 0, 0);
 }
 EXPORT_SYMBOL(get_random_bytes);


 // This one is called by get_random_bytes above.
 static ssize_t extract_entropy(struct entropy_store *r, void *buf,
 				 size_t nbytes, int min, int reserved)
 {
 	ssize_t ret = 0, i;
 	__u8 tmp[EXTRACT_SIZE];
 	unsigned long flags;

 	/* if last_data isn't primed, we need EXTRACT_SIZE extra bytes */
 	if (fips_enabled) {
 		spin_lock_irqsave(&r->lock, flags);
 		if (!r->last_data_init) {
 			r->last_data_init = true;
 			spin_unlock_irqrestore(&r->lock, flags);
 			trace_extract_entropy(r->name, EXTRACT_SIZE,
 					      r->entropy_count, _RET_IP_);
 			xfer_secondary_pool(r, EXTRACT_SIZE);
 			extract_buf(r, tmp);
 			spin_lock_irqsave(&r->lock, flags);
 			memcpy(r->last_data, tmp, EXTRACT_SIZE);
 		}
 		spin_unlock_irqrestore(&r->lock, flags);
 	}

 	trace_extract_entropy(r->name, nbytes, r->entropy_count, _RET_IP_);
 	xfer_secondary_pool(r, nbytes);
 	nbytes = account(r, nbytes, min, reserved);

 	while (nbytes) {
        // Each iteration of this loop:
        // - Extracts 'EXTRACT_SIZE' bytes from extract_buf
        // - Panic if the just-extracted bytes are the same as the
        //   previously-extracted bytes.
        // - Copy either EXTRACT_SIZE or nbytes into the output.
 		extract_buf(r, tmp);

 		if (fips_enabled) {
 			spin_lock_irqsave(&r->lock, flags);
 			if (!memcmp(tmp, r->last_data, EXTRACT_SIZE))
 				panic("Hardware RNG duplicated output!\n");
 			memcpy(r->last_data, tmp, EXTRACT_SIZE);
 			spin_unlock_irqrestore(&r->lock, flags);
 		}
 		i = min_t(int, nbytes, EXTRACT_SIZE);
 		memcpy(buf, tmp, i);
 		nbytes -= i;
 		buf += i;
 		ret += i;
 	}

 	/* Wipe data just returned from memory */
 	memset(tmp, 0, sizeof(tmp));

 	return ret;
 }

 // This fills 'out' with EXTRACT_BYTES random bytes. It's what extract_entropy
 // uses to fill its output buffer.
 static void extract_buf(struct entropy_store *r, __u8 *out)
 {
    // Skip all this stuff because it doesn't matter for the point I want to
    // make...
 	int i;
 	union {
 		__u32 w[5];
 		unsigned long l[LONGS(EXTRACT_SIZE)];
 	} hash;
 	__u32 workspace[SHA_WORKSPACE_WORDS];
 	__u8 extract[64];
 	unsigned long flags;

 	/* Generate a hash across the pool, 16 words (512 bits) at a time */
 	sha_init(hash.w);
 	spin_lock_irqsave(&r->lock, flags);
 	for (i = 0; i < r->poolinfo->poolwords; i += 16)
 		sha_transform(hash.w, (__u8 *)(r->pool + i), workspace);

 	/*
 	 * We mix the hash back into the pool to prevent backtracking
 	 * attacks (where the attacker knows the state of the pool
 	 * plus the current outputs, and attempts to find previous
 	 * ouputs), unless the hash function can be inverted. By
 	 * mixing at least a SHA1 worth of hash data back, we make
 	 * brute-forcing the feedback as hard as brute-forcing the
 	 * hash.
 	 */
 	__mix_pool_bytes(r, hash.w, sizeof(hash.w), extract);
 	spin_unlock_irqrestore(&r->lock, flags);

 	/*
 	 * To avoid duplicates, we atomically extract a portion of the
 	 * pool while mixing, and hash one final time.
 	 */
 	sha_transform(hash.w, extract, workspace);
 	memset(extract, 0, sizeof(extract));
 	memset(workspace, 0, sizeof(workspace));

 	/*
 	 * In case the hash function has some recognizable output
 	 * pattern, we fold it in half. Thus, we always feed back
 	 * twice as much data as we output.
 	 */
 	hash.w[0] ^= hash.w[3];
 	hash.w[1] ^= hash.w[4];
 	hash.w[2] ^= rol32(hash.w[2], 16);

    // Ah, here we are. Finally, we found RDRAND.
    // This XOR's RDRAND *directly* into the output buffer, right before
    // returning.
 	/*
 	 * If we have a architectural hardware random number
 	 * generator, mix that in, too.
 	 */
 	for (i = 0; i < LONGS(EXTRACT_SIZE); i++) {
 		unsigned long v;
        // arch_get_random is RDRAND.
 		if (!arch_get_random_long(&v))
 			break;
 		hash.l[i] ^= v;
 	}
    // SIMPLICIO: Why is that a problem? I thought if you XOR a non-random
    //            stream with a random one, you get a random one? Remember,
    //            one-time-pads and such?
    //
    // SALVIATI:  Right, that's true. Even if RDRAND returns all zeroes, or some
    //            completely predictable sequence, the output will be random as
    //            long as it was random before the XOR.
    // 
    // SIMPLICIO: So what's the problem, then? It seems like having RDRAND here
    //            can only make things better...
    // 
    // SALVIATI:  Ah, that's true if RDRAND might only be a weak source of
    //            entropy, but if it's *actively* malicious, it could seriously
    //            compromise the security of the output. For example, it could
    //            purposely return the inverse of the bits it's going to be
    //            XORed with, resulting in this function filling 'out' with zero
    //            bytes.
    //
    // SIMPLICIO: That's rediculous! How could RDRAND know which bits it's going
    //            to be XORed with? There's no way one instruction could figure
    //            all of that out.
    //
    // SALVIATI:  Actually, it's quite possible. The procesor wouldn't even have
    //            to be smart about it. Chances are, the bits it's going to be
    //            XORed with are in cache (which is inside the CPU), so if the
    //            CPU had RDRAND return the XOR of all longs in the cache, it
    //            would cancel out and information about the state of the cache
    //            would leak out through the RNG. There are plenty of other
    //            ways. This is the CPU, remember. It can pretty much do
    //            anything it wants.
    //
    // SIMPLICIO: Ok, I see how this use of RDRAND could *in theory* weaken the
    //            whole RNG. But wouldn't that be pretty easy to detect, and
    //            can't we trust Intel? If the NSA has their hand up Intel's
    //            ass, wouldn't there be easier ways of backdooring a system?
    //
    // SALVIATI:  The RDRAND backdoor could be made so that it only activates
    //            under certain, very specific, conditions. For example, it
    //            might only activate when RAX contains 0x632472F72B3FB507,
    //            which is extremely unlikely to happen during normal use, but
    //            could be made to happen by sending the system a TCP packet,
    //            web page, etc, containing that value. So, if it existed, it
    //            would be extremely difficult to detect. It's possible in
    //            principal to reverse engineer the CPU itself, but it's
    //            extremely expensive -- and destructive -- so you can't check
    //            all of the CPUs you actually use for backdoors. Sure, if the
    //            NSA controlled Intel, there would be easier ways to backdoor
    //            a system, but backdooring the RNG is nice, because it's
    //            passive: You don't have to *do* anything to a system in order
    //            to break it. You can just listen to the system's network
    //            traffic and use your RNG backdoor to decrypt it. Futher, it's
    //            bad design to combine two RNGs by XORing them together. They
    //            may be correlated (by accident or on purpose) in subtle ways
    //            that cancel out security. To be honest, I'm not exactly sure
    //            what the best way would be, but it certainly isn't XOR.
    //

 	memcpy(out, &hash, EXTRACT_SIZE);
 	memset(&hash, 0, sizeof(hash));
 }
	// This is a quick analysis of Linux's use of RDRAND. All double-slash (//)
	// style comments are my own, all slash-star (/*) style comments are from the
	// original code. This was written by Taylor Hornby (@DefuseSec).

	// This is part of the drivers/char/random.c file.
	// I have re-ordered the procedures for clarity. Everything inside them (except
	// comments) is exactly as you will find it in linux-3.11.tar.xz

	// This comment says it 'does not use' the hardware RNG. It actually does.
	/*
	* This function is the exported kernel interface. It returns some
	* number of good random numbers, suitable for key generation, seeding
	* TCP sequence numbers, etc. It does not use the hw random number
	* generator, if available; use get_random_bytes_arch() for that.
	*/
	void get_random_bytes(void *buf, int nbytes)
	{
	extract_entropy(&nonblocking_pool, buf, nbytes, 0, 0);
	}
	EXPORT_SYMBOL(get_random_bytes);


	// This one is called by get_random_bytes above.
	static ssize_t extract_entropy(struct entropy_store r, void buf,
	size_t nbytes, int min, int reserved)
	{
	ssize_t ret = 0, i;
	__u8 tmp[EXTRACT_SIZE];
	unsigned long flags;

	/* if last_data isn't primed, we need EXTRACT_SIZE extra bytes */
	if (fips_enabled) {
	spin_lock_irqsave(&r->lock, flags);
	if (!r->last_data_init) {
	r->last_data_init = true;
	spin_unlock_irqrestore(&r->lock, flags);
	trace_extract_entropy(r->name, EXTRACT_SIZE,
	r->entropy_count, _RET_IP_);
	xfer_secondary_pool(r, EXTRACT_SIZE);
	extract_buf(r, tmp);
	spin_lock_irqsave(&r->lock, flags);
	memcpy(r->last_data, tmp, EXTRACT_SIZE);
	}
	spin_unlock_irqrestore(&r->lock, flags);
	}

	trace_extract_entropy(r->name, nbytes, r->entropy_count, _RET_IP_);
	xfer_secondary_pool(r, nbytes);
	nbytes = account(r, nbytes, min, reserved);

	while (nbytes) {
	// Each iteration of this loop:
	// - Extracts 'EXTRACT_SIZE' bytes from extract_buf
	// - Panic if the just-extracted bytes are the same as the
	// previously-extracted bytes.
	// - Copy either EXTRACT_SIZE or nbytes into the output.
	extract_buf(r, tmp);

	if (fips_enabled) {
	spin_lock_irqsave(&r->lock, flags);
	if (!memcmp(tmp, r->last_data, EXTRACT_SIZE))
	panic("Hardware RNG duplicated output!\n");
	memcpy(r->last_data, tmp, EXTRACT_SIZE);
	spin_unlock_irqrestore(&r->lock, flags);
	}
	i = min_t(int, nbytes, EXTRACT_SIZE);
	memcpy(buf, tmp, i);
	nbytes -= i;
	buf += i;
	ret += i;
	}

	/* Wipe data just returned from memory */
	memset(tmp, 0, sizeof(tmp));

	return ret;
	}

	// This fills 'out' with EXTRACT_BYTES random bytes. It's what extract_entropy
	// uses to fill its output buffer.
	static void extract_buf(struct entropy_store r, __u8 out)
	{
	// Skip all this stuff because it doesn't matter for the point I want to
	// make...
	int i;
	union {
	__u32 w[5];
	unsigned long l[LONGS(EXTRACT_SIZE)];
	} hash;
	__u32 workspace[SHA_WORKSPACE_WORDS];
	__u8 extract[64];
	unsigned long flags;

	/* Generate a hash across the pool, 16 words (512 bits) at a time */
	sha_init(hash.w);
	spin_lock_irqsave(&r->lock, flags);
	for (i = 0; i < r->poolinfo->poolwords; i += 16)
	sha_transform(hash.w, (__u8 *)(r->pool + i), workspace);

	/*
	* We mix the hash back into the pool to prevent backtracking
	* attacks (where the attacker knows the state of the pool
	* plus the current outputs, and attempts to find previous
	* ouputs), unless the hash function can be inverted. By
	* mixing at least a SHA1 worth of hash data back, we make
	* brute-forcing the feedback as hard as brute-forcing the
	* hash.
	*/
	__mix_pool_bytes(r, hash.w, sizeof(hash.w), extract);
	spin_unlock_irqrestore(&r->lock, flags);

	/*
	* To avoid duplicates, we atomically extract a portion of the
	* pool while mixing, and hash one final time.
	*/
	sha_transform(hash.w, extract, workspace);
	memset(extract, 0, sizeof(extract));
	memset(workspace, 0, sizeof(workspace));

	/*
	* In case the hash function has some recognizable output
	* pattern, we fold it in half. Thus, we always feed back
	* twice as much data as we output.
	*/
	hash.w[0] ^= hash.w[3];
	hash.w[1] ^= hash.w[4];
	hash.w[2] ^= rol32(hash.w[2], 16);

	// Ah, here we are. Finally, we found RDRAND.
	// This XOR's RDRAND directly into the output buffer, right before
	// returning.
	/*
	* If we have a architectural hardware random number
	* generator, mix that in, too.
	*/
	for (i = 0; i < LONGS(EXTRACT_SIZE); i++) {
	unsigned long v;
	// arch_get_random is RDRAND.
	if (!arch_get_random_long(&v))
	break;
	hash.l[i] ^= v;
	}
	// SIMPLICIO: Why is that a problem? I thought if you XOR a non-random
	// stream with a random one, you get a random one? Remember,
	// one-time-pads and such?
	//
	// SALVIATI: Right, that's true. Even if RDRAND returns all zeroes, or some
	// completely predictable sequence, the output will be random as
	// long as it was random before the XOR.
	//
	// SIMPLICIO: So what's the problem, then? It seems like having RDRAND here
	// can only make things better...
	//
	// SALVIATI: Ah, that's true if RDRAND might only be a weak source of
	// entropy, but if it's actively malicious, it could seriously
	// compromise the security of the output. For example, it could
	// purposely return the inverse of the bits it's going to be
	// XORed with, resulting in this function filling 'out' with zero
	// bytes.
	//
	// SIMPLICIO: That's rediculous! How could RDRAND know which bits it's going
	// to be XORed with? There's no way one instruction could figure
	// all of that out.
	//
	// SALVIATI: Actually, it's quite possible. The procesor wouldn't even have
	// to be smart about it. Chances are, the bits it's going to be
	// XORed with are in cache (which is inside the CPU), so if the
	// CPU had RDRAND return the XOR of all longs in the cache, it
	// would cancel out and information about the state of the cache
	// would leak out through the RNG. There are plenty of other
	// ways. This is the CPU, remember. It can pretty much do
	// anything it wants.
	//
	// SIMPLICIO: Ok, I see how this use of RDRAND could in theory weaken the
	// whole RNG. But wouldn't that be pretty easy to detect, and
	// can't we trust Intel? If the NSA has their hand up Intel's
	// ass, wouldn't there be easier ways of backdooring a system?
	//
	// SALVIATI: The RDRAND backdoor could be made so that it only activates
	// under certain, very specific, conditions. For example, it
	// might only activate when RAX contains 0x632472F72B3FB507,
	// which is extremely unlikely to happen during normal use, but
	// could be made to happen by sending the system a TCP packet,
	// web page, etc, containing that value. So, if it existed, it
	// would be extremely difficult to detect. It's possible in
	// principal to reverse engineer the CPU itself, but it's
	// extremely expensive -- and destructive -- so you can't check
	// all of the CPUs you actually use for backdoors. Sure, if the
	// NSA controlled Intel, there would be easier ways to backdoor
	// a system, but backdooring the RNG is nice, because it's
	// passive: You don't have to do anything to a system in order
	// to break it. You can just listen to the system's network
	// traffic and use your RNG backdoor to decrypt it. Futher, it's
	// bad design to combine two RNGs by XORing them together. They
	// may be correlated (by accident or on purpose) in subtle ways
	// that cancel out security. To be honest, I'm not exactly sure
	// what the best way would be, but it certainly isn't XOR.
	//

	memcpy(out, &hash, EXTRACT_SIZE);
	memset(&hash, 0, sizeof(hash));
	}