// This is a quick analysis of Linux's use of RDRAND. All double-slash (//)
// style comments are my own, all slash-star (/*) style comments are from the
// original code. This was written by Taylor Hornby (@DefuseSec).

// This is part of the drivers/char/random.c file.
// I have re-ordered the procedures for clarity. Everything inside them (except
// comments) is exactly as you will find it in linux-3.11.tar.xz

// This comment says it 'does not use' the hardware RNG. It actually does.
/*
 * This function is the exported kernel interface.  It returns some
 * number of good random numbers, suitable for key generation, seeding
 * TCP sequence numbers, etc.  It does not use the hw random number
 * generator, if available; use get_random_bytes_arch() for that.
 */
void get_random_bytes(void *buf, int nbytes)
{
	extract_entropy(&nonblocking_pool, buf, nbytes, 0, 0);
}
EXPORT_SYMBOL(get_random_bytes);


// This one is called by get_random_bytes above.
static ssize_t extract_entropy(struct entropy_store *r, void *buf,
				 size_t nbytes, int min, int reserved)
{
	ssize_t ret = 0, i;
	__u8 tmp[EXTRACT_SIZE];
	unsigned long flags;

	/* if last_data isn't primed, we need EXTRACT_SIZE extra bytes */
	if (fips_enabled) {
		spin_lock_irqsave(&r->lock, flags);
		if (!r->last_data_init) {
			r->last_data_init = true;
			spin_unlock_irqrestore(&r->lock, flags);
			trace_extract_entropy(r->name, EXTRACT_SIZE,
					      r->entropy_count, _RET_IP_);
			xfer_secondary_pool(r, EXTRACT_SIZE);
			extract_buf(r, tmp);
			spin_lock_irqsave(&r->lock, flags);
			memcpy(r->last_data, tmp, EXTRACT_SIZE);
		}
		spin_unlock_irqrestore(&r->lock, flags);
	}

	trace_extract_entropy(r->name, nbytes, r->entropy_count, _RET_IP_);
	xfer_secondary_pool(r, nbytes);
	nbytes = account(r, nbytes, min, reserved);

	while (nbytes) {
        // Each iteration of this loop:
        // - Extracts 'EXTRACT_SIZE' bytes from extract_buf
        // - Panic if the just-extracted bytes are the same as the
        //   previously-extracted bytes.
        // - Copy either EXTRACT_SIZE or nbytes into the output.
		extract_buf(r, tmp);

		if (fips_enabled) {
			spin_lock_irqsave(&r->lock, flags);
			if (!memcmp(tmp, r->last_data, EXTRACT_SIZE))
				panic("Hardware RNG duplicated output!\n");
			memcpy(r->last_data, tmp, EXTRACT_SIZE);
			spin_unlock_irqrestore(&r->lock, flags);
		}
		i = min_t(int, nbytes, EXTRACT_SIZE);
		memcpy(buf, tmp, i);
		nbytes -= i;
		buf += i;
		ret += i;
	}

	/* Wipe data just returned from memory */
	memset(tmp, 0, sizeof(tmp));

	return ret;
}

// This fills 'out' with EXTRACT_BYTES random bytes. It's what extract_entropy
// uses to fill its output buffer.
static void extract_buf(struct entropy_store *r, __u8 *out)
{
    // Skip all this stuff because it doesn't matter for the point I want to
    // make...
	int i;
	union {
		__u32 w[5];
		unsigned long l[LONGS(EXTRACT_SIZE)];
	} hash;
	__u32 workspace[SHA_WORKSPACE_WORDS];
	__u8 extract[64];
	unsigned long flags;

	/* Generate a hash across the pool, 16 words (512 bits) at a time */
	sha_init(hash.w);
	spin_lock_irqsave(&r->lock, flags);
	for (i = 0; i < r->poolinfo->poolwords; i += 16)
		sha_transform(hash.w, (__u8 *)(r->pool + i), workspace);

	/*
	 * We mix the hash back into the pool to prevent backtracking
	 * attacks (where the attacker knows the state of the pool
	 * plus the current outputs, and attempts to find previous
	 * ouputs), unless the hash function can be inverted. By
	 * mixing at least a SHA1 worth of hash data back, we make
	 * brute-forcing the feedback as hard as brute-forcing the
	 * hash.
	 */
	__mix_pool_bytes(r, hash.w, sizeof(hash.w), extract);
	spin_unlock_irqrestore(&r->lock, flags);

	/*
	 * To avoid duplicates, we atomically extract a portion of the
	 * pool while mixing, and hash one final time.
	 */
	sha_transform(hash.w, extract, workspace);
	memset(extract, 0, sizeof(extract));
	memset(workspace, 0, sizeof(workspace));

	/*
	 * In case the hash function has some recognizable output
	 * pattern, we fold it in half. Thus, we always feed back
	 * twice as much data as we output.
	 */
	hash.w[0] ^= hash.w[3];
	hash.w[1] ^= hash.w[4];
	hash.w[2] ^= rol32(hash.w[2], 16);

    // Ah, here we are. Finally, we found RDRAND.
    // This XOR's RDRAND *directly* into the output buffer, right before
    // returning.
	/*
	 * If we have a architectural hardware random number
	 * generator, mix that in, too.
	 */
	for (i = 0; i < LONGS(EXTRACT_SIZE); i++) {
		unsigned long v;
        // arch_get_random is RDRAND.
		if (!arch_get_random_long(&v))
			break;
		hash.l[i] ^= v;
	}
    // SIMPLICIO: Why is that a problem? I thought if you XOR a non-random
    //            stream with a random one, you get a random one? Remember,
    //            one-time-pads and such?
    //
    // SALVIATI:  Right, that's true. Even if RDRAND returns all zeroes, or some
    //            completely predictable sequence, the output will be random as
    //            long as it was random before the XOR.
    // 
    // SIMPLICIO: So what's the problem, then? It seems like having RDRAND here
    //            can only make things better...
    // 
    // SALVIATI:  Ah, that's true if RDRAND might only be a weak source of
    //            entropy, but if it's *actively* malicious, it could seriously
    //            compromise the security of the output. For example, it could
    //            purposely return the inverse of the bits it's going to be
    //            XORed with, resulting in this function filling 'out' with zero
    //            bytes.
    //
    // SIMPLICIO: That's rediculous! How could RDRAND know which bits it's going
    //            to be XORed with? There's no way one instruction could figure
    //            all of that out.
    //
    // SALVIATI:  Actually, it's quite possible. The procesor wouldn't even have
    //            to be smart about it. Chances are, the bits it's going to be
    //            XORed with are in cache (which is inside the CPU), so if the
    //            CPU had RDRAND return the XOR of all longs in the cache, it
    //            would cancel out and information about the state of the cache
    //            would leak out through the RNG. There are plenty of other
    //            ways. This is the CPU, remember. It can pretty much do
    //            anything it wants.
    //
    // SIMPLICIO: Ok, I see how this use of RDRAND could *in theory* weaken the
    //            whole RNG. But wouldn't that be pretty easy to detect, and
    //            can't we trust Intel? If the NSA has their hand up Intel's
    //            ass, wouldn't there be easier ways of backdooring a system?
    //
    // SALVIATI:  The RDRAND backdoor could be made so that it only activates
    //            under certain, very specific, conditions. For example, it
    //            might only activate when RAX contains 0x632472F72B3FB507,
    //            which is extremely unlikely to happen during normal use, but
    //            could be made to happen by sending the system a TCP packet,
    //            web page, etc, containing that value. So, if it existed, it
    //            would be extremely difficult to detect. It's possible in
    //            principal to reverse engineer the CPU itself, but it's
    //            extremely expensive -- and destructive -- so you can't check
    //            all of the CPUs you actually use for backdoors. Sure, if the
    //            NSA controlled Intel, there would be easier ways to backdoor
    //            a system, but backdooring the RNG is nice, because it's
    //            passive: You don't have to *do* anything to a system in order
    //            to break it. You can just listen to the system's network
    //            traffic and use your RNG backdoor to decrypt it. Futher, it's
    //            bad design to combine two RNGs by XORing them together. They
    //            may be correlated (by accident or on purpose) in subtle ways
    //            that cancel out security. To be honest, I'm not exactly sure
    //            what the best way would be, but it certainly isn't XOR.
    //

	memcpy(out, &hash, EXTRACT_SIZE);
	memset(&hash, 0, sizeof(hash));
}