#include #include #include #include void xxtea128(uint32_t const key[4], uint32_t block[4]) { // Encryption half of the XXTEA algorithm, with block size limited // to 128 bits or 4 words. This avoids all the weaknesses that // Wikipedia knows of, since both depend on only running 6 rounds // per block, and we will run 6 + 52//4 = 6 + 13 = 19 uint32_t roundconstant = 0; for (unsigned round = 0; round < 19; round++) { // This took a while to puzzle out since the original // specification is a mess, and the mess is only added to // by needing to support custom blockwidths. // // The algorithm is as follows: // // 1. Set the round constant (sum) to round * 0x9e3779b9 // (implemented by addition) // // 2. Create a reduced version of the round constant (e) // which is the bits 3…2 of the round constant. The // reduced version is needed for changing the pattern of // key accesses, since key is only 4 words long // // 3. Go through each word in the block and derive its new // value based on its current value (v[p]), the next (y) // and the previous word (z), wrapping around the ends // of the block as needed. // // The function for deriving the new value of a word is a // xor of sums of xors, followed by an in-place addition. // The first sum adds together combinations of the next and // previous word, and the second sum adds together // previous/next combined with a value dependant on the // round constant. The key is also mixed into the word in // the first xor of second sum. After this the result is // added back into the original word. // // I have changed the operand order in the second xor of // first add and in the second add. This is to keep the // part dependant of previous word on the left and the part // dependant on the next word on the right. roundconstant += 0x9e3779b9; uint32_t reduced = (roundconstant >> 2) & 3; block[0] += ((block[3]>>5 ^ block[1]<<2) + (block[3]<<4 ^ block[1]>>3)) ^ ((key[reduced ^ 0] ^ block[3]) + (roundconstant ^ block[1])); block[1] += ((block[0]>>5 ^ block[2]<<2) + (block[0]<<4 ^ block[2]>>3)) ^ ((key[reduced ^ 1] ^ block[0]) + (roundconstant ^ block[2])); block[2] += ((block[1]>>5 ^ block[3]<<2) + (block[1]<<4 ^ block[3]>>3)) ^ ((key[reduced ^ 2] ^ block[1]) + (roundconstant ^ block[3])); block[3] += ((block[2]>>5 ^ block[0]<<2) + (block[2]<<4 ^ block[0]>>3)) ^ ((key[reduced ^ 3] ^ block[2]) + (roundconstant ^ block[0])); } } uint32_t bytes2word(unsigned char const bytes[4]) { return bytes[0] | bytes[1]<<8 | bytes[2]<<16 | bytes[3]<<24; } void word2bytes(unsigned char *bytes, uint32_t word) { bytes[0] = word; bytes[1] = word>>8; bytes[2] = word>>16; bytes[3] = word>>24; } struct hashstate { // A_n and B_n of the MDC-2 algorithm uint32_t a[4]; uint32_t b[4]; // Buffer to hold data until next full block unsigned char buffer[16]; size_t length; // Counter that keeps tracks of how much data we've hashed uint64_t totalbits; }; void initialize_hash(struct hashstate *state) { // Hash function is MDC-2 with xxtea128, which is nice since it // gives us a 256 bit hash. The constants are based on binary // expansion of the square root of two, A1 being the first 128 bits // and B1 the next 128. // // If we treat A1 and B1 as 128bit little endian integers, they // have the values: // // A1 = 6a09e667 f3bcc908 b2fb1366 ea957d3e // A2 = 3adec175 12775099 da2f590b 0667322a state->a[0] = 0xea957d3eUL; state->a[1] = 0xb2fb1366UL; state->a[2] = 0xf3bcc908UL; state->a[3] = 0x6a09e667UL; state->b[0] = 0x0667322aUL; state->b[1] = 0xda2f590bUL; state->b[2] = 0x12775099UL; state->b[3] = 0x3adec175UL; memset(state->buffer, 0, sizeof(state->buffer)); state->length = 0; state->totalbits = 0; } void compress_hash(struct hashstate *state) { assert(state->length == 16); // M_i uint32_t message[4]; message[0] = bytes2word(&state->buffer[0]); message[1] = bytes2word(&state->buffer[4]); message[2] = bytes2word(&state->buffer[8]); message[3] = bytes2word(&state->buffer[12]); // V_i = M_i ^ E(M_i, A_i) // Note: In this description A_i is the *key*, not the plaintext uint32_t v[4]; memcpy(v, message, sizeof(v)); xxtea128(state->a, v); v[0] ^= message[0]; v[1] ^= message[1]; v[2] ^= message[2]; v[3] ^= message[3]; // W_i = M_i ^ E(M_i, B_i); uint32_t w[4]; memcpy(w, message, sizeof(w)); xxtea128(state->b, w); w[0] ^= message[0]; w[1] ^= message[1]; w[2] ^= message[2]; w[3] ^= message[3]; // A_{i+1} = Vwi^L || W_i^R state->a[0] = v[0]; state->a[1] = v[1]; state->a[2] = w[2]; state->a[3] = w[3]; // B_{i+1} = W_i^L || V_i^R state->b[0] = w[0]; state->b[1] = w[1]; state->b[2] = v[2]; state->b[3] = v[3]; // Mark that we have consumed the buffer state->length = 0; } void feed_hash(struct hashstate *state, unsigned char input[], size_t length) { // Invariant: The buffer will be filled somewhere between 0 and 15 // when we enter this loop. This is because once it reaches 16, the // hash compression function is executed. for (size_t i = 0; i < length; i++) { // Must not overflow the internat counter. In practice we will not // hit this. assert(state->totalbits <= UINT64_MAX - 8); state->buffer[state->length++] = input[i]; state->totalbits += 8; if (state->length == 16) { compress_hash(state); } } } void finalize_hash(struct hashstate *state, unsigned char hash[32]) { // Feed the padding. It consists of one-bit, followed by zero-bits, // followed by the number of bits in the message as big-endian // uint64. This is the same padding as in SHA-2. // We can assume that this works due to the invariant that buffer // fill when entering this function is between 0 and 15 state->buffer[state->length++] = 0x80; while(state->length != 8) { if (state->length == 16) { compress_hash(state); } state->buffer[state->length++] = 0; } // Add the number of bits, and do one last compression state->buffer[8] = state->totalbits >> 56; state->buffer[9] = state->totalbits >> 48; state->buffer[10] = state->totalbits >> 40; state->buffer[11] = state->totalbits >> 32; state->buffer[12] = state->totalbits >> 24; state->buffer[13] = state->totalbits >> 16; state->buffer[14] = state->totalbits >> 8; state->buffer[15] = state->totalbits; state->length += 8; compress_hash(state); // Extract the hash state for (size_t i = 0; i < 4; i++) { word2bytes(&hash[i*4], state->a[i]); } for (size_t i = 0; i < 4; i++) { word2bytes(&hash[i*4 + 16], state->b[i]); } // Clear all of the hash state, in case there was sth important // there explicit_bzero(state, sizeof(struct hashstate)); } void hmac(unsigned char output[32], unsigned char key[], size_t keylen, unsigned char message[], size_t messagelen) { // The blocksize of the underlying has function is 128 bits (16B) // but HMAC is specified assuming that the hash function output (in // our case 256 bits or 32B) fits in one block. As far as I can // tell extending the key to be two blocks long is not a problem. unsigned char padded_key[32]; if (keylen > 16) { // We hash it even if it is shorter than our extended key // length to avoid giving attacker any funny surfaces to // play with at the interface of two blocks struct hashstate state; initialize_hash(&state); feed_hash(&state, key, keylen); finalize_hash(&state, padded_key); } else { // Copy the key and zero-pad if necessary memset(padded_key, 0, 32); memcpy(padded_key, key, keylen); } // Outer and inner key derivation unsigned char outer_key[32], inner_key[32]; for (size_t i = 0; i < 32; i++) { outer_key[i] = padded_key[i] ^ 0x5c; inner_key[i] = padded_key[i] ^ 0x36; } // Inner hash unsigned char inner_hash[32]; struct hashstate state; initialize_hash(&state); feed_hash(&state, inner_key, 32); feed_hash(&state, message, messagelen); finalize_hash(&state, inner_hash); // Outer hash initialize_hash(&state); feed_hash(&state, outer_key, 32); feed_hash(&state, inner_hash, 32); finalize_hash(&state, output); } // KDF_ROUNDS must be at least 2 #define KDF_ROUNDS 100000 unsigned char kdf_buf[KDF_ROUNDS * 32]; void kdf(unsigned char key[16], unsigned char salt[32], unsigned char passphrase[], size_t passphraselen) { // This is based on the design of PBKDF2 but aims to be memory hard // This is achieved by storing all the hashes in a buffer and the // in the end hashing them together in reverse order, instead of // just xoring together. // // The memory-hardness of this scheme rests of the assumption that // it is not feasible to compute the final hash backwards, that is, // starting with the first hash and working towards the final hash. // While I cannot prove this to be the case, the fact that our hash // is made out of a one-way compression function makes me // relatively confident in it. // Place the hash of the salt at the top of the buffer. We do not // include the counter i from PBKDF2 since we will ever only // produce one block of output size_t index = KDF_ROUNDS*32 - 32; hmac(&kdf_buf[index], passphrase, passphraselen, salt, 32); index -= 32; // Walk back along the buffer, at each step hashing the previous // hashes while (index > 0) { hmac(&kdf_buf[index], passphrase, passphraselen, &kdf_buf[index+32], 32); index -= 32; } hmac(kdf_buf, passphrase, passphraselen, &kdf_buf[32], 32); // Perform the final hash unsigned char final_hash[32]; hmac(final_hash, passphrase, passphraselen, kdf_buf, KDF_ROUNDS * 32); // Use first 128 bits of final hash as the key memcpy(key, final_hash, 16); } int main(void) { unsigned char key[16] = {0}; unsigned char salt[32] = "seasaltrocksalt seasaltrocksalt"; unsigned char passphrase[] = "a quick brown fox jumps over the lazy dog"; kdf(key, salt, passphrase, sizeof(passphrase) - 1); for (size_t i = 0; i < 16; i++) { printf("%02hhx ", key[i]); } printf("\n"); return 0; }