diff --git a/puer.c b/puer.c index c204237..47e1d0b 100644 --- a/puer.c +++ b/puer.c @@ -11,8 +11,10 @@ #include // Adjusting this will render the file format incompatible -// The minimum possible buffer size is 64 -unsigned char workbuf[8 * 1024 * 1024]; +// KDF_WORKFACTOR must be a power of two between 1 and 2^32 +#define KDF_BLOCKSIZE 1024 +#define KDF_WORKFACTOR (64 * 1024) +unsigned char workbuf[KDF_WORKFACTOR * KDF_BLOCKSIZE]; void xxtea128(uint32_t const key[4], uint32_t block[4]) { // Encryption half of the XXTEA algorithm, with block size limited @@ -229,25 +231,36 @@ void finalize_hash(struct hashstate *state, unsigned char hash[32]) { explicit_bzero(state, sizeof(struct hashstate)); } -void hmac(unsigned char output[32], unsigned char key[], size_t keylen, unsigned char message[], size_t messagelen) { - // The blocksize of the underlying has function is 128 bits (16B) +void pbkdf2_1_block(unsigned char output[32], unsigned char passphrase[], size_t passphraselen, unsigned char salt[], size_t saltlen, uint32_t blockindex) { + // NOTE: This implementation is hardcoded to one round, as required + // by the MFcrypt (see Stronger Key Derivation Via Sequential + // Memory-hard Functions by Colin Percival) algorithm. This is not + // suitable as a general purpose password-based KDF. + + // This is equivalent to + // F(Password, Salt, 1, i) + // = U_1 + // = PRF(Password, Salt + INT_32_BE(i)) + // We use HMAC-MDC2-XXTEA128 as our PRF + + // The blocksize of the underlying hash function is 128 bits (16B) // but HMAC is specified assuming that the hash function output (in // our case 256 bits or 32B) fits in one block. As far as I can // tell extending the key to be two blocks long is not a problem. unsigned char padded_key[32]; - if (keylen > 16) { + if (passphraselen > 16) { // We hash it even if it is shorter than our extended key // length to avoid giving attacker any funny surfaces to // play with at the interface of two blocks struct hashstate state; initialize_hash(&state); - feed_hash(&state, key, keylen); + feed_hash(&state, passphrase, passphraselen); finalize_hash(&state, padded_key); } else { // Copy the key and zero-pad if necessary memset(padded_key, 0, 32); - memcpy(padded_key, key, keylen); + memcpy(padded_key, passphrase, passphraselen); } // Outer and inner key derivation @@ -262,7 +275,14 @@ void hmac(unsigned char output[32], unsigned char key[], size_t keylen, unsigned struct hashstate state; initialize_hash(&state); feed_hash(&state, inner_key, 32); - feed_hash(&state, message, messagelen); + // Our message is salt plus big endian encoding of blockindex + feed_hash(&state, salt, saltlen); + unsigned char be_blockindex[4]; + be_blockindex[0] = blockindex >> 24; + be_blockindex[1] = blockindex >> 16; + be_blockindex[2] = blockindex >> 8; + be_blockindex[3] = blockindex; + feed_hash(&state, be_blockindex, 4); finalize_hash(&state, inner_hash); // Outer hash @@ -272,45 +292,87 @@ void hmac(unsigned char output[32], unsigned char key[], size_t keylen, unsigned finalize_hash(&state, output); } -#define KDF_ROUNDS (sizeof(workbuf) / 32) +void mfcrypt_hash(unsigned char chunk[16]) { + uint32_t key[4], words[4]; + block2words(key, chunk); + block2words(words, chunk); + xxtea128(key, words); + words2block(chunk, words); +} -void kdf(unsigned char key[16], unsigned char salt[32], unsigned char passphrase[], size_t passphraselen) { - // This is based on the design of PBKDF2 but aims to be memory hard - // This is achieved by storing all the hashes in a buffer and the - // in the end hashing them together in reverse order, instead of - // just xoring together. - // - // The memory-hardness of this scheme rests of the assumption that - // it is not feasible to compute the final hash backwards, that is, - // starting with the first hash and working towards the final hash. - // While I cannot prove this to be the case, the fact that our hash - // is made out of a one-way compression function makes me - // relatively confident in it. +void blockmix(unsigned char block[KDF_BLOCKSIZE]) { + // r = KDF_BLOCKSIZE / 32, since block is 2r times the width of our + // hash function (xxtea128) + const size_t r = KDF_BLOCKSIZE / 32; - // Place the hash of the salt at the top of the buffer. We do not - // include the counter i from PBKDF2 since we will ever only - // produce one block of output - size_t index = KDF_ROUNDS*32 - 32; - hmac(&workbuf[index], passphrase, passphraselen, salt, 32); - index -= 32; + // accumulator (X) starts off as chunk 2r-1. Chunk k is at memory + // location 16*k and is 16 bytes long. Substituting we get: + // start = 16*(2*(KDF_BLOCKSIZE / 32) - 1) + // start = 16*(KDF_BLOCKSIZE / 16 - 1) + // start = KDF_BLOCKSIZE - 16 + unsigned char accumulator[16]; + memcpy(accumulator, &block[16 * (2*r - 1)], 16); - // Walk back along the buffer, at each step hashing the previous - // hashes - while (index > 0) { - hmac(&workbuf[index], passphrase, passphraselen, &workbuf[index+32], 32); - index -= 32; + // Chunk i is at memory location 16*i. We go through chunks < 2r + unsigned char hashedchunks[KDF_BLOCKSIZE]; + for (size_t i = 0; i < 2*r; i++) { + // X = H(X xor B_i) + for (size_t index = 0; index < 16; index++) { + accumulator[index] ^= block[16 * i + index]; + } + mfcrypt_hash(accumulator); + // Y_i = X + memcpy(&hashedchunks[16 * i], accumulator, 16); } - hmac(workbuf, passphrase, passphraselen, &workbuf[32], 32); - // Perform the final hash - unsigned char final_hash[32]; - hmac(final_hash, passphrase, passphraselen, workbuf, KDF_ROUNDS * 32); + // Interleave the blocks back into the buffer. We go through B's + // chunks < r which corresponds to indices every 16 bytes smaller + // than 16*(KDF_BLOCKSIZE / 32) = KDF_BLOCKSIZE / 2 + size_t i = 0; + for (; i < r; i++) { + // B_i = Y_{2*i} + memcpy(&block[16*i], &hashedchunks[16*2*i], 16); + } + // Now we go through B's chunks < 2r but >= r + for (; i < 2*r; i++) { + // B_i = Y_{2*(i - r) + 1} + memcpy(&block[16*i], &hashedchunks[16*(2*(i - r) + 1)], 16); + } +} - // Use first 128 bits of final hash as the key - memcpy(key, final_hash, 16); +void romix(unsigned char block[KDF_BLOCKSIZE]) { + // Block i starts at location KDF_BLOCKSIZE * i + for (size_t i = 0; i < KDF_WORKFACTOR; i++) { + // V_i = X + memcpy(&workbuf[KDF_BLOCKSIZE * i], block, KDF_BLOCKSIZE); + // X = H(X) + blockmix(block); + } - // Empty the buffer - explicit_bzero(workbuf, sizeof(workbuf)); + for (size_t i = 0; i < sizeof(workbuf) / KDF_BLOCKSIZE; i++) { + // j = Integrify(X) mod N + // N is a power of two + uint32_t j = bytes2word(&block[KDF_BLOCKSIZE - 4]) & (KDF_WORKFACTOR - 1); + // X = H(X xor V_j) + for (size_t index = 0; index < KDF_BLOCKSIZE; index++) { + block[index] ^= workbuf[KDF_BLOCKSIZE * j + index]; + } + blockmix(block); + } +} + +void kdf(unsigned char key[16], unsigned char passphrase[], size_t passphraselen, unsigned char salt[32]) { + unsigned char block[KDF_BLOCKSIZE]; + for (size_t i = 0; i < KDF_BLOCKSIZE / 32; i++) { + pbkdf2_1_block(&block[i * 32], passphrase, passphraselen, salt, 32, i); + } + + romix(block); + + unsigned char result[32]; + pbkdf2_1_block(result, passphrase, passphraselen, block, KDF_BLOCKSIZE, 0); + + memcpy(key, result, 16); } // 16 bit authentication tag @@ -728,7 +790,7 @@ int main(int argc, char *argv[]) { // Derive key unsigned char key[16]; - kdf(key, salt, passphrase, passphrase_len); + kdf(key, passphrase, passphrase_len, salt); explicit_bzero(passphrase, sizeof(passphrase)); uint64_t messageindex = 0;