From d92d9dd9d25e95e21a11fd19366fc437ade88198 Mon Sep 17 00:00:00 2001 From: Sandeep Mistry Date: Wed, 8 Nov 2017 12:13:16 -0500 Subject: [PATCH] Update to BearSSL master (4cbe51b0d039c7afc477009cf7f327f8de9da487) --- src/bearssl/aes_big_ctrcbc.c | 142 +++++++ src/bearssl/aes_ct64_ctrcbc.c | 433 +++++++++++++++++++++ src/bearssl/aes_ct_ctrcbc.c | 422 +++++++++++++++++++++ src/bearssl/aes_small_ctrcbc.c | 142 +++++++ src/bearssl/aes_x86ni.c | 48 +-- src/bearssl/aes_x86ni_cbcdec.c | 28 +- src/bearssl/aes_x86ni_cbcenc.c | 28 +- src/bearssl/aes_x86ni_ctr.c | 40 +- src/bearssl/aes_x86ni_ctrcbc.c | 596 +++++++++++++++++++++++++++++ src/bearssl/bearssl.h | 30 ++ src/bearssl/bearssl_aead.h | 513 ++++++++++++++++++++++++- src/bearssl/bearssl_block.h | 667 ++++++++++++++++++++++++++++++++- src/bearssl/bearssl_rand.h | 35 ++ src/bearssl/bearssl_ssl.h | 8 + src/bearssl/ccm.c | 346 +++++++++++++++++ src/bearssl/chacha20_sse2.c | 78 ++-- src/bearssl/config.h | 11 + src/bearssl/eax.c | 413 ++++++++++++++++++++ src/bearssl/ec_p256_m31.c | 44 +-- src/bearssl/gcm.c | 28 +- src/bearssl/ghash_pclmul.c | 200 +++++----- src/bearssl/i15_modpow2.c | 6 +- src/bearssl/i31_modpow2.c | 6 +- src/bearssl/inner.h | 358 +++++++++++++++--- src/bearssl/pemdec.c | 2 +- src/bearssl/settings.c | 306 +++++++++++++++ src/bearssl/skey_decoder.c | 2 +- src/bearssl/ssl_engine.c | 151 ++++---- src/bearssl/ssl_hs_client.c | 10 +- src/bearssl/ssl_hs_server.c | 10 +- src/bearssl/sysrng.c | 169 +++++++++ src/bearssl/x509_decoder.c | 4 +- src/bearssl/x509_minimal.c | 43 +-- 33 files changed, 4867 insertions(+), 452 deletions(-) create mode 100644 src/bearssl/aes_big_ctrcbc.c create mode 100644 src/bearssl/aes_ct64_ctrcbc.c create mode 100644 src/bearssl/aes_ct_ctrcbc.c create mode 100644 src/bearssl/aes_small_ctrcbc.c create mode 100644 src/bearssl/aes_x86ni_ctrcbc.c create mode 100644 src/bearssl/ccm.c create mode 100644 src/bearssl/eax.c create mode 100644 src/bearssl/settings.c create mode 100644 src/bearssl/sysrng.c diff --git a/src/bearssl/aes_big_ctrcbc.c b/src/bearssl/aes_big_ctrcbc.c new file mode 100644 index 0000000..d45ca76 --- /dev/null +++ b/src/bearssl/aes_big_ctrcbc.c @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2017 Thomas Pornin + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_block.h */ +void +br_aes_big_ctrcbc_init(br_aes_big_ctrcbc_keys *ctx, + const void *key, size_t len) +{ + ctx->vtable = &br_aes_big_ctrcbc_vtable; + ctx->num_rounds = br_aes_keysched(ctx->skey, key, len); +} + +static void +xorbuf(void *dst, const void *src, size_t len) +{ + unsigned char *d; + const unsigned char *s; + + d = dst; + s = src; + while (len -- > 0) { + *d ++ ^= *s ++; + } +} + +/* see bearssl_block.h */ +void +br_aes_big_ctrcbc_ctr(const br_aes_big_ctrcbc_keys *ctx, + void *ctr, void *data, size_t len) +{ + unsigned char *buf, *bctr; + uint32_t cc0, cc1, cc2, cc3; + + buf = data; + bctr = ctr; + cc3 = br_dec32be(bctr + 0); + cc2 = br_dec32be(bctr + 4); + cc1 = br_dec32be(bctr + 8); + cc0 = br_dec32be(bctr + 12); + while (len > 0) { + unsigned char tmp[16]; + uint32_t carry; + + br_enc32be(tmp + 0, cc3); + br_enc32be(tmp + 4, cc2); + br_enc32be(tmp + 8, cc1); + br_enc32be(tmp + 12, cc0); + br_aes_big_encrypt(ctx->num_rounds, ctx->skey, tmp); + xorbuf(buf, tmp, 16); + buf += 16; + len -= 16; + cc0 ++; + carry = (~(cc0 | -cc0)) >> 31; + cc1 += carry; + carry &= (~(cc1 | -cc1)) >> 31; + cc2 += carry; + carry &= (~(cc2 | -cc2)) >> 31; + cc3 += carry; + } + br_enc32be(bctr + 0, cc3); + br_enc32be(bctr + 4, cc2); + br_enc32be(bctr + 8, cc1); + br_enc32be(bctr + 12, cc0); +} + +/* see bearssl_block.h */ +void +br_aes_big_ctrcbc_mac(const br_aes_big_ctrcbc_keys *ctx, + void *cbcmac, const void *data, size_t len) +{ + const unsigned char *buf; + + buf = data; + while (len > 0) { + xorbuf(cbcmac, buf, 16); + br_aes_big_encrypt(ctx->num_rounds, ctx->skey, cbcmac); + buf += 16; + len -= 16; + } +} + +/* see bearssl_block.h */ +void +br_aes_big_ctrcbc_encrypt(const br_aes_big_ctrcbc_keys *ctx, + void *ctr, void *cbcmac, void *data, size_t len) +{ + br_aes_big_ctrcbc_ctr(ctx, ctr, data, len); + br_aes_big_ctrcbc_mac(ctx, cbcmac, data, len); +} + +/* see bearssl_block.h */ +void +br_aes_big_ctrcbc_decrypt(const br_aes_big_ctrcbc_keys *ctx, + void *ctr, void *cbcmac, void *data, size_t len) +{ + br_aes_big_ctrcbc_mac(ctx, cbcmac, data, len); + br_aes_big_ctrcbc_ctr(ctx, ctr, data, len); +} + +/* see bearssl_block.h */ +const br_block_ctrcbc_class br_aes_big_ctrcbc_vtable = { + sizeof(br_aes_big_ctrcbc_keys), + 16, + 4, + (void (*)(const br_block_ctrcbc_class **, const void *, size_t)) + &br_aes_big_ctrcbc_init, + (void (*)(const br_block_ctrcbc_class *const *, + void *, void *, void *, size_t)) + &br_aes_big_ctrcbc_encrypt, + (void (*)(const br_block_ctrcbc_class *const *, + void *, void *, void *, size_t)) + &br_aes_big_ctrcbc_decrypt, + (void (*)(const br_block_ctrcbc_class *const *, + void *, void *, size_t)) + &br_aes_big_ctrcbc_ctr, + (void (*)(const br_block_ctrcbc_class *const *, + void *, const void *, size_t)) + &br_aes_big_ctrcbc_mac +}; diff --git a/src/bearssl/aes_ct64_ctrcbc.c b/src/bearssl/aes_ct64_ctrcbc.c new file mode 100644 index 0000000..21bb8ef --- /dev/null +++ b/src/bearssl/aes_ct64_ctrcbc.c @@ -0,0 +1,433 @@ +/* + * Copyright (c) 2017 Thomas Pornin + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_block.h */ +void +br_aes_ct64_ctrcbc_init(br_aes_ct64_ctrcbc_keys *ctx, + const void *key, size_t len) +{ + ctx->vtable = &br_aes_ct64_ctrcbc_vtable; + ctx->num_rounds = br_aes_ct64_keysched(ctx->skey, key, len); +} + +static void +xorbuf(void *dst, const void *src, size_t len) +{ + unsigned char *d; + const unsigned char *s; + + d = dst; + s = src; + while (len -- > 0) { + *d ++ ^= *s ++; + } +} + +/* see bearssl_block.h */ +void +br_aes_ct64_ctrcbc_ctr(const br_aes_ct64_ctrcbc_keys *ctx, + void *ctr, void *data, size_t len) +{ + unsigned char *buf; + unsigned char *ivbuf; + uint32_t iv0, iv1, iv2, iv3; + uint64_t sk_exp[120]; + + br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey); + + /* + * We keep the counter as four 32-bit values, with big-endian + * convention, because that's what is expected for purposes of + * incrementing the counter value. + */ + ivbuf = ctr; + iv0 = br_dec32be(ivbuf + 0); + iv1 = br_dec32be(ivbuf + 4); + iv2 = br_dec32be(ivbuf + 8); + iv3 = br_dec32be(ivbuf + 12); + + buf = data; + while (len > 0) { + uint64_t q[8]; + uint32_t w[16]; + unsigned char tmp[64]; + int i, j; + + /* + * The bitslice implementation expects values in + * little-endian convention, so we have to byteswap them. + */ + j = (len >= 64) ? 16 : (int)(len >> 2); + for (i = 0; i < j; i += 4) { + uint32_t carry; + + w[i + 0] = br_swap32(iv0); + w[i + 1] = br_swap32(iv1); + w[i + 2] = br_swap32(iv2); + w[i + 3] = br_swap32(iv3); + iv3 ++; + carry = ~(iv3 | -iv3) >> 31; + iv2 += carry; + carry &= -(~(iv2 | -iv2) >> 31); + iv1 += carry; + carry &= -(~(iv1 | -iv1) >> 31); + iv0 += carry; + } + memset(w + i, 0, (16 - i) * sizeof(uint32_t)); + + for (i = 0; i < 4; i ++) { + br_aes_ct64_interleave_in( + &q[i], &q[i + 4], w + (i << 2)); + } + br_aes_ct64_ortho(q); + br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q); + br_aes_ct64_ortho(q); + for (i = 0; i < 4; i ++) { + br_aes_ct64_interleave_out( + w + (i << 2), q[i], q[i + 4]); + } + + br_range_enc32le(tmp, w, 16); + if (len <= 64) { + xorbuf(buf, tmp, len); + break; + } + xorbuf(buf, tmp, 64); + buf += 64; + len -= 64; + } + br_enc32be(ivbuf + 0, iv0); + br_enc32be(ivbuf + 4, iv1); + br_enc32be(ivbuf + 8, iv2); + br_enc32be(ivbuf + 12, iv3); +} + +/* see bearssl_block.h */ +void +br_aes_ct64_ctrcbc_mac(const br_aes_ct64_ctrcbc_keys *ctx, + void *cbcmac, const void *data, size_t len) +{ + const unsigned char *buf; + uint32_t cm0, cm1, cm2, cm3; + uint64_t q[8]; + uint64_t sk_exp[120]; + + br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey); + + cm0 = br_dec32le((unsigned char *)cbcmac + 0); + cm1 = br_dec32le((unsigned char *)cbcmac + 4); + cm2 = br_dec32le((unsigned char *)cbcmac + 8); + cm3 = br_dec32le((unsigned char *)cbcmac + 12); + + buf = data; + memset(q, 0, sizeof q); + while (len > 0) { + uint32_t w[4]; + + w[0] = cm0 ^ br_dec32le(buf + 0); + w[1] = cm1 ^ br_dec32le(buf + 4); + w[2] = cm2 ^ br_dec32le(buf + 8); + w[3] = cm3 ^ br_dec32le(buf + 12); + + br_aes_ct64_interleave_in(&q[0], &q[4], w); + br_aes_ct64_ortho(q); + br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q); + br_aes_ct64_ortho(q); + br_aes_ct64_interleave_out(w, q[0], q[4]); + + cm0 = w[0]; + cm1 = w[1]; + cm2 = w[2]; + cm3 = w[3]; + buf += 16; + len -= 16; + } + + br_enc32le((unsigned char *)cbcmac + 0, cm0); + br_enc32le((unsigned char *)cbcmac + 4, cm1); + br_enc32le((unsigned char *)cbcmac + 8, cm2); + br_enc32le((unsigned char *)cbcmac + 12, cm3); +} + +/* see bearssl_block.h */ +void +br_aes_ct64_ctrcbc_encrypt(const br_aes_ct64_ctrcbc_keys *ctx, + void *ctr, void *cbcmac, void *data, size_t len) +{ + /* + * When encrypting, the CBC-MAC processing must be lagging by + * one block, since it operates on the encrypted values, so + * it must wait for that encryption to complete. + */ + + unsigned char *buf; + unsigned char *ivbuf; + uint32_t iv0, iv1, iv2, iv3; + uint32_t cm0, cm1, cm2, cm3; + uint64_t sk_exp[120]; + uint64_t q[8]; + int first_iter; + + br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey); + + /* + * We keep the counter as four 32-bit values, with big-endian + * convention, because that's what is expected for purposes of + * incrementing the counter value. + */ + ivbuf = ctr; + iv0 = br_dec32be(ivbuf + 0); + iv1 = br_dec32be(ivbuf + 4); + iv2 = br_dec32be(ivbuf + 8); + iv3 = br_dec32be(ivbuf + 12); + + /* + * The current CBC-MAC value is kept in little-endian convention. + */ + cm0 = br_dec32le((unsigned char *)cbcmac + 0); + cm1 = br_dec32le((unsigned char *)cbcmac + 4); + cm2 = br_dec32le((unsigned char *)cbcmac + 8); + cm3 = br_dec32le((unsigned char *)cbcmac + 12); + + buf = data; + first_iter = 1; + memset(q, 0, sizeof q); + while (len > 0) { + uint32_t w[8], carry; + + /* + * The bitslice implementation expects values in + * little-endian convention, so we have to byteswap them. + */ + w[0] = br_swap32(iv0); + w[1] = br_swap32(iv1); + w[2] = br_swap32(iv2); + w[3] = br_swap32(iv3); + iv3 ++; + carry = ~(iv3 | -iv3) >> 31; + iv2 += carry; + carry &= -(~(iv2 | -iv2) >> 31); + iv1 += carry; + carry &= -(~(iv1 | -iv1) >> 31); + iv0 += carry; + + /* + * The block for CBC-MAC. + */ + w[4] = cm0; + w[5] = cm1; + w[6] = cm2; + w[7] = cm3; + + br_aes_ct64_interleave_in(&q[0], &q[4], w); + br_aes_ct64_interleave_in(&q[1], &q[5], w + 4); + br_aes_ct64_ortho(q); + br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q); + br_aes_ct64_ortho(q); + br_aes_ct64_interleave_out(w, q[0], q[4]); + br_aes_ct64_interleave_out(w + 4, q[1], q[5]); + + /* + * We do the XOR with the plaintext in 32-bit registers, + * so that the value are available for CBC-MAC processing + * as well. + */ + w[0] ^= br_dec32le(buf + 0); + w[1] ^= br_dec32le(buf + 4); + w[2] ^= br_dec32le(buf + 8); + w[3] ^= br_dec32le(buf + 12); + br_enc32le(buf + 0, w[0]); + br_enc32le(buf + 4, w[1]); + br_enc32le(buf + 8, w[2]); + br_enc32le(buf + 12, w[3]); + + buf += 16; + len -= 16; + + /* + * We set the cm* values to the block to encrypt in the + * next iteration. + */ + if (first_iter) { + first_iter = 0; + cm0 ^= w[0]; + cm1 ^= w[1]; + cm2 ^= w[2]; + cm3 ^= w[3]; + } else { + cm0 = w[0] ^ w[4]; + cm1 = w[1] ^ w[5]; + cm2 = w[2] ^ w[6]; + cm3 = w[3] ^ w[7]; + } + + /* + * If this was the last iteration, then compute the + * extra block encryption to complete CBC-MAC. + */ + if (len == 0) { + w[0] = cm0; + w[1] = cm1; + w[2] = cm2; + w[3] = cm3; + br_aes_ct64_interleave_in(&q[0], &q[4], w); + br_aes_ct64_ortho(q); + br_aes_ct64_bitslice_encrypt( + ctx->num_rounds, sk_exp, q); + br_aes_ct64_ortho(q); + br_aes_ct64_interleave_out(w, q[0], q[4]); + cm0 = w[0]; + cm1 = w[1]; + cm2 = w[2]; + cm3 = w[3]; + break; + } + } + + br_enc32be(ivbuf + 0, iv0); + br_enc32be(ivbuf + 4, iv1); + br_enc32be(ivbuf + 8, iv2); + br_enc32be(ivbuf + 12, iv3); + br_enc32le((unsigned char *)cbcmac + 0, cm0); + br_enc32le((unsigned char *)cbcmac + 4, cm1); + br_enc32le((unsigned char *)cbcmac + 8, cm2); + br_enc32le((unsigned char *)cbcmac + 12, cm3); +} + +/* see bearssl_block.h */ +void +br_aes_ct64_ctrcbc_decrypt(const br_aes_ct64_ctrcbc_keys *ctx, + void *ctr, void *cbcmac, void *data, size_t len) +{ + unsigned char *buf; + unsigned char *ivbuf; + uint32_t iv0, iv1, iv2, iv3; + uint32_t cm0, cm1, cm2, cm3; + uint64_t sk_exp[120]; + uint64_t q[8]; + + br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey); + + /* + * We keep the counter as four 32-bit values, with big-endian + * convention, because that's what is expected for purposes of + * incrementing the counter value. + */ + ivbuf = ctr; + iv0 = br_dec32be(ivbuf + 0); + iv1 = br_dec32be(ivbuf + 4); + iv2 = br_dec32be(ivbuf + 8); + iv3 = br_dec32be(ivbuf + 12); + + /* + * The current CBC-MAC value is kept in little-endian convention. + */ + cm0 = br_dec32le((unsigned char *)cbcmac + 0); + cm1 = br_dec32le((unsigned char *)cbcmac + 4); + cm2 = br_dec32le((unsigned char *)cbcmac + 8); + cm3 = br_dec32le((unsigned char *)cbcmac + 12); + + buf = data; + memset(q, 0, sizeof q); + while (len > 0) { + uint32_t w[8], carry; + unsigned char tmp[16]; + + /* + * The bitslice implementation expects values in + * little-endian convention, so we have to byteswap them. + */ + w[0] = br_swap32(iv0); + w[1] = br_swap32(iv1); + w[2] = br_swap32(iv2); + w[3] = br_swap32(iv3); + iv3 ++; + carry = ~(iv3 | -iv3) >> 31; + iv2 += carry; + carry &= -(~(iv2 | -iv2) >> 31); + iv1 += carry; + carry &= -(~(iv1 | -iv1) >> 31); + iv0 += carry; + + /* + * The block for CBC-MAC. + */ + w[4] = cm0 ^ br_dec32le(buf + 0); + w[5] = cm1 ^ br_dec32le(buf + 4); + w[6] = cm2 ^ br_dec32le(buf + 8); + w[7] = cm3 ^ br_dec32le(buf + 12); + + br_aes_ct64_interleave_in(&q[0], &q[4], w); + br_aes_ct64_interleave_in(&q[1], &q[5], w + 4); + br_aes_ct64_ortho(q); + br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q); + br_aes_ct64_ortho(q); + br_aes_ct64_interleave_out(w, q[0], q[4]); + br_aes_ct64_interleave_out(w + 4, q[1], q[5]); + + br_enc32le(tmp + 0, w[0]); + br_enc32le(tmp + 4, w[1]); + br_enc32le(tmp + 8, w[2]); + br_enc32le(tmp + 12, w[3]); + xorbuf(buf, tmp, 16); + cm0 = w[4]; + cm1 = w[5]; + cm2 = w[6]; + cm3 = w[7]; + buf += 16; + len -= 16; + } + + br_enc32be(ivbuf + 0, iv0); + br_enc32be(ivbuf + 4, iv1); + br_enc32be(ivbuf + 8, iv2); + br_enc32be(ivbuf + 12, iv3); + br_enc32le((unsigned char *)cbcmac + 0, cm0); + br_enc32le((unsigned char *)cbcmac + 4, cm1); + br_enc32le((unsigned char *)cbcmac + 8, cm2); + br_enc32le((unsigned char *)cbcmac + 12, cm3); +} + +/* see bearssl_block.h */ +const br_block_ctrcbc_class br_aes_ct64_ctrcbc_vtable = { + sizeof(br_aes_ct64_ctrcbc_keys), + 16, + 4, + (void (*)(const br_block_ctrcbc_class **, const void *, size_t)) + &br_aes_ct64_ctrcbc_init, + (void (*)(const br_block_ctrcbc_class *const *, + void *, void *, void *, size_t)) + &br_aes_ct64_ctrcbc_encrypt, + (void (*)(const br_block_ctrcbc_class *const *, + void *, void *, void *, size_t)) + &br_aes_ct64_ctrcbc_decrypt, + (void (*)(const br_block_ctrcbc_class *const *, + void *, void *, size_t)) + &br_aes_ct64_ctrcbc_ctr, + (void (*)(const br_block_ctrcbc_class *const *, + void *, const void *, size_t)) + &br_aes_ct64_ctrcbc_mac +}; diff --git a/src/bearssl/aes_ct_ctrcbc.c b/src/bearssl/aes_ct_ctrcbc.c new file mode 100644 index 0000000..8ae9fc7 --- /dev/null +++ b/src/bearssl/aes_ct_ctrcbc.c @@ -0,0 +1,422 @@ +/* + * Copyright (c) 2017 Thomas Pornin + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_block.h */ +void +br_aes_ct_ctrcbc_init(br_aes_ct_ctrcbc_keys *ctx, + const void *key, size_t len) +{ + ctx->vtable = &br_aes_ct_ctrcbc_vtable; + ctx->num_rounds = br_aes_ct_keysched(ctx->skey, key, len); +} + +static void +xorbuf(void *dst, const void *src, size_t len) +{ + unsigned char *d; + const unsigned char *s; + + d = dst; + s = src; + while (len -- > 0) { + *d ++ ^= *s ++; + } +} + +/* see bearssl_block.h */ +void +br_aes_ct_ctrcbc_ctr(const br_aes_ct_ctrcbc_keys *ctx, + void *ctr, void *data, size_t len) +{ + unsigned char *buf; + unsigned char *ivbuf; + uint32_t iv0, iv1, iv2, iv3; + uint32_t sk_exp[120]; + + br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey); + + /* + * We keep the counter as four 32-bit values, with big-endian + * convention, because that's what is expected for purposes of + * incrementing the counter value. + */ + ivbuf = ctr; + iv0 = br_dec32be(ivbuf + 0); + iv1 = br_dec32be(ivbuf + 4); + iv2 = br_dec32be(ivbuf + 8); + iv3 = br_dec32be(ivbuf + 12); + + buf = data; + while (len > 0) { + uint32_t q[8], carry; + unsigned char tmp[32]; + + /* + * The bitslice implementation expects values in + * little-endian convention, so we have to byteswap them. + */ + q[0] = br_swap32(iv0); + q[2] = br_swap32(iv1); + q[4] = br_swap32(iv2); + q[6] = br_swap32(iv3); + iv3 ++; + carry = ~(iv3 | -iv3) >> 31; + iv2 += carry; + carry &= -(~(iv2 | -iv2) >> 31); + iv1 += carry; + carry &= -(~(iv1 | -iv1) >> 31); + iv0 += carry; + q[1] = br_swap32(iv0); + q[3] = br_swap32(iv1); + q[5] = br_swap32(iv2); + q[7] = br_swap32(iv3); + if (len > 16) { + iv3 ++; + carry = ~(iv3 | -iv3) >> 31; + iv2 += carry; + carry &= -(~(iv2 | -iv2) >> 31); + iv1 += carry; + carry &= -(~(iv1 | -iv1) >> 31); + iv0 += carry; + } + + br_aes_ct_ortho(q); + br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q); + br_aes_ct_ortho(q); + + br_enc32le(tmp, q[0]); + br_enc32le(tmp + 4, q[2]); + br_enc32le(tmp + 8, q[4]); + br_enc32le(tmp + 12, q[6]); + br_enc32le(tmp + 16, q[1]); + br_enc32le(tmp + 20, q[3]); + br_enc32le(tmp + 24, q[5]); + br_enc32le(tmp + 28, q[7]); + + if (len <= 32) { + xorbuf(buf, tmp, len); + break; + } + xorbuf(buf, tmp, 32); + buf += 32; + len -= 32; + } + br_enc32be(ivbuf + 0, iv0); + br_enc32be(ivbuf + 4, iv1); + br_enc32be(ivbuf + 8, iv2); + br_enc32be(ivbuf + 12, iv3); +} + +/* see bearssl_block.h */ +void +br_aes_ct_ctrcbc_mac(const br_aes_ct_ctrcbc_keys *ctx, + void *cbcmac, const void *data, size_t len) +{ + const unsigned char *buf; + uint32_t cm0, cm1, cm2, cm3; + uint32_t q[8]; + uint32_t sk_exp[120]; + + br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey); + + buf = data; + cm0 = br_dec32le((unsigned char *)cbcmac + 0); + cm1 = br_dec32le((unsigned char *)cbcmac + 4); + cm2 = br_dec32le((unsigned char *)cbcmac + 8); + cm3 = br_dec32le((unsigned char *)cbcmac + 12); + q[1] = 0; + q[3] = 0; + q[5] = 0; + q[7] = 0; + + while (len > 0) { + q[0] = cm0 ^ br_dec32le(buf + 0); + q[2] = cm1 ^ br_dec32le(buf + 4); + q[4] = cm2 ^ br_dec32le(buf + 8); + q[6] = cm3 ^ br_dec32le(buf + 12); + + br_aes_ct_ortho(q); + br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q); + br_aes_ct_ortho(q); + + cm0 = q[0]; + cm1 = q[2]; + cm2 = q[4]; + cm3 = q[6]; + buf += 16; + len -= 16; + } + + br_enc32le((unsigned char *)cbcmac + 0, cm0); + br_enc32le((unsigned char *)cbcmac + 4, cm1); + br_enc32le((unsigned char *)cbcmac + 8, cm2); + br_enc32le((unsigned char *)cbcmac + 12, cm3); +} + +/* see bearssl_block.h */ +void +br_aes_ct_ctrcbc_encrypt(const br_aes_ct_ctrcbc_keys *ctx, + void *ctr, void *cbcmac, void *data, size_t len) +{ + /* + * When encrypting, the CBC-MAC processing must be lagging by + * one block, since it operates on the encrypted values, so + * it must wait for that encryption to complete. + */ + + unsigned char *buf; + unsigned char *ivbuf; + uint32_t iv0, iv1, iv2, iv3; + uint32_t cm0, cm1, cm2, cm3; + uint32_t sk_exp[120]; + int first_iter; + + br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey); + + /* + * We keep the counter as four 32-bit values, with big-endian + * convention, because that's what is expected for purposes of + * incrementing the counter value. + */ + ivbuf = ctr; + iv0 = br_dec32be(ivbuf + 0); + iv1 = br_dec32be(ivbuf + 4); + iv2 = br_dec32be(ivbuf + 8); + iv3 = br_dec32be(ivbuf + 12); + + /* + * The current CBC-MAC value is kept in little-endian convention. + */ + cm0 = br_dec32le((unsigned char *)cbcmac + 0); + cm1 = br_dec32le((unsigned char *)cbcmac + 4); + cm2 = br_dec32le((unsigned char *)cbcmac + 8); + cm3 = br_dec32le((unsigned char *)cbcmac + 12); + + buf = data; + first_iter = 1; + while (len > 0) { + uint32_t q[8], carry; + + /* + * The bitslice implementation expects values in + * little-endian convention, so we have to byteswap them. + */ + q[0] = br_swap32(iv0); + q[2] = br_swap32(iv1); + q[4] = br_swap32(iv2); + q[6] = br_swap32(iv3); + iv3 ++; + carry = ~(iv3 | -iv3) >> 31; + iv2 += carry; + carry &= -(~(iv2 | -iv2) >> 31); + iv1 += carry; + carry &= -(~(iv1 | -iv1) >> 31); + iv0 += carry; + + /* + * The odd values are used for CBC-MAC. + */ + q[1] = cm0; + q[3] = cm1; + q[5] = cm2; + q[7] = cm3; + + br_aes_ct_ortho(q); + br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q); + br_aes_ct_ortho(q); + + /* + * We do the XOR with the plaintext in 32-bit registers, + * so that the value are available for CBC-MAC processing + * as well. + */ + q[0] ^= br_dec32le(buf + 0); + q[2] ^= br_dec32le(buf + 4); + q[4] ^= br_dec32le(buf + 8); + q[6] ^= br_dec32le(buf + 12); + br_enc32le(buf + 0, q[0]); + br_enc32le(buf + 4, q[2]); + br_enc32le(buf + 8, q[4]); + br_enc32le(buf + 12, q[6]); + + buf += 16; + len -= 16; + + /* + * We set the cm* values to the block to encrypt in the + * next iteration. + */ + if (first_iter) { + first_iter = 0; + cm0 ^= q[0]; + cm1 ^= q[2]; + cm2 ^= q[4]; + cm3 ^= q[6]; + } else { + cm0 = q[0] ^ q[1]; + cm1 = q[2] ^ q[3]; + cm2 = q[4] ^ q[5]; + cm3 = q[6] ^ q[7]; + } + + /* + * If this was the last iteration, then compute the + * extra block encryption to complete CBC-MAC. + */ + if (len == 0) { + q[0] = cm0; + q[2] = cm1; + q[4] = cm2; + q[6] = cm3; + br_aes_ct_ortho(q); + br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q); + br_aes_ct_ortho(q); + cm0 = q[0]; + cm1 = q[2]; + cm2 = q[4]; + cm3 = q[6]; + break; + } + } + + br_enc32be(ivbuf + 0, iv0); + br_enc32be(ivbuf + 4, iv1); + br_enc32be(ivbuf + 8, iv2); + br_enc32be(ivbuf + 12, iv3); + br_enc32le((unsigned char *)cbcmac + 0, cm0); + br_enc32le((unsigned char *)cbcmac + 4, cm1); + br_enc32le((unsigned char *)cbcmac + 8, cm2); + br_enc32le((unsigned char *)cbcmac + 12, cm3); +} + +/* see bearssl_block.h */ +void +br_aes_ct_ctrcbc_decrypt(const br_aes_ct_ctrcbc_keys *ctx, + void *ctr, void *cbcmac, void *data, size_t len) +{ + unsigned char *buf; + unsigned char *ivbuf; + uint32_t iv0, iv1, iv2, iv3; + uint32_t cm0, cm1, cm2, cm3; + uint32_t sk_exp[120]; + + br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey); + + /* + * We keep the counter as four 32-bit values, with big-endian + * convention, because that's what is expected for purposes of + * incrementing the counter value. + */ + ivbuf = ctr; + iv0 = br_dec32be(ivbuf + 0); + iv1 = br_dec32be(ivbuf + 4); + iv2 = br_dec32be(ivbuf + 8); + iv3 = br_dec32be(ivbuf + 12); + + /* + * The current CBC-MAC value is kept in little-endian convention. + */ + cm0 = br_dec32le((unsigned char *)cbcmac + 0); + cm1 = br_dec32le((unsigned char *)cbcmac + 4); + cm2 = br_dec32le((unsigned char *)cbcmac + 8); + cm3 = br_dec32le((unsigned char *)cbcmac + 12); + + buf = data; + while (len > 0) { + uint32_t q[8], carry; + unsigned char tmp[16]; + + /* + * The bitslice implementation expects values in + * little-endian convention, so we have to byteswap them. + */ + q[0] = br_swap32(iv0); + q[2] = br_swap32(iv1); + q[4] = br_swap32(iv2); + q[6] = br_swap32(iv3); + iv3 ++; + carry = ~(iv3 | -iv3) >> 31; + iv2 += carry; + carry &= -(~(iv2 | -iv2) >> 31); + iv1 += carry; + carry &= -(~(iv1 | -iv1) >> 31); + iv0 += carry; + + /* + * The odd values are used for CBC-MAC. + */ + q[1] = cm0 ^ br_dec32le(buf + 0); + q[3] = cm1 ^ br_dec32le(buf + 4); + q[5] = cm2 ^ br_dec32le(buf + 8); + q[7] = cm3 ^ br_dec32le(buf + 12); + + br_aes_ct_ortho(q); + br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q); + br_aes_ct_ortho(q); + + br_enc32le(tmp + 0, q[0]); + br_enc32le(tmp + 4, q[2]); + br_enc32le(tmp + 8, q[4]); + br_enc32le(tmp + 12, q[6]); + xorbuf(buf, tmp, 16); + cm0 = q[1]; + cm1 = q[3]; + cm2 = q[5]; + cm3 = q[7]; + buf += 16; + len -= 16; + } + + br_enc32be(ivbuf + 0, iv0); + br_enc32be(ivbuf + 4, iv1); + br_enc32be(ivbuf + 8, iv2); + br_enc32be(ivbuf + 12, iv3); + br_enc32le((unsigned char *)cbcmac + 0, cm0); + br_enc32le((unsigned char *)cbcmac + 4, cm1); + br_enc32le((unsigned char *)cbcmac + 8, cm2); + br_enc32le((unsigned char *)cbcmac + 12, cm3); +} + +/* see bearssl_block.h */ +const br_block_ctrcbc_class br_aes_ct_ctrcbc_vtable = { + sizeof(br_aes_ct_ctrcbc_keys), + 16, + 4, + (void (*)(const br_block_ctrcbc_class **, const void *, size_t)) + &br_aes_ct_ctrcbc_init, + (void (*)(const br_block_ctrcbc_class *const *, + void *, void *, void *, size_t)) + &br_aes_ct_ctrcbc_encrypt, + (void (*)(const br_block_ctrcbc_class *const *, + void *, void *, void *, size_t)) + &br_aes_ct_ctrcbc_decrypt, + (void (*)(const br_block_ctrcbc_class *const *, + void *, void *, size_t)) + &br_aes_ct_ctrcbc_ctr, + (void (*)(const br_block_ctrcbc_class *const *, + void *, const void *, size_t)) + &br_aes_ct_ctrcbc_mac +}; diff --git a/src/bearssl/aes_small_ctrcbc.c b/src/bearssl/aes_small_ctrcbc.c new file mode 100644 index 0000000..2d6ba32 --- /dev/null +++ b/src/bearssl/aes_small_ctrcbc.c @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2017 Thomas Pornin + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_block.h */ +void +br_aes_small_ctrcbc_init(br_aes_small_ctrcbc_keys *ctx, + const void *key, size_t len) +{ + ctx->vtable = &br_aes_small_ctrcbc_vtable; + ctx->num_rounds = br_aes_keysched(ctx->skey, key, len); +} + +static void +xorbuf(void *dst, const void *src, size_t len) +{ + unsigned char *d; + const unsigned char *s; + + d = dst; + s = src; + while (len -- > 0) { + *d ++ ^= *s ++; + } +} + +/* see bearssl_block.h */ +void +br_aes_small_ctrcbc_ctr(const br_aes_small_ctrcbc_keys *ctx, + void *ctr, void *data, size_t len) +{ + unsigned char *buf, *bctr; + uint32_t cc0, cc1, cc2, cc3; + + buf = data; + bctr = ctr; + cc3 = br_dec32be(bctr + 0); + cc2 = br_dec32be(bctr + 4); + cc1 = br_dec32be(bctr + 8); + cc0 = br_dec32be(bctr + 12); + while (len > 0) { + unsigned char tmp[16]; + uint32_t carry; + + br_enc32be(tmp + 0, cc3); + br_enc32be(tmp + 4, cc2); + br_enc32be(tmp + 8, cc1); + br_enc32be(tmp + 12, cc0); + br_aes_small_encrypt(ctx->num_rounds, ctx->skey, tmp); + xorbuf(buf, tmp, 16); + buf += 16; + len -= 16; + cc0 ++; + carry = (~(cc0 | -cc0)) >> 31; + cc1 += carry; + carry &= (~(cc1 | -cc1)) >> 31; + cc2 += carry; + carry &= (~(cc2 | -cc2)) >> 31; + cc3 += carry; + } + br_enc32be(bctr + 0, cc3); + br_enc32be(bctr + 4, cc2); + br_enc32be(bctr + 8, cc1); + br_enc32be(bctr + 12, cc0); +} + +/* see bearssl_block.h */ +void +br_aes_small_ctrcbc_mac(const br_aes_small_ctrcbc_keys *ctx, + void *cbcmac, const void *data, size_t len) +{ + const unsigned char *buf; + + buf = data; + while (len > 0) { + xorbuf(cbcmac, buf, 16); + br_aes_small_encrypt(ctx->num_rounds, ctx->skey, cbcmac); + buf += 16; + len -= 16; + } +} + +/* see bearssl_block.h */ +void +br_aes_small_ctrcbc_encrypt(const br_aes_small_ctrcbc_keys *ctx, + void *ctr, void *cbcmac, void *data, size_t len) +{ + br_aes_small_ctrcbc_ctr(ctx, ctr, data, len); + br_aes_small_ctrcbc_mac(ctx, cbcmac, data, len); +} + +/* see bearssl_block.h */ +void +br_aes_small_ctrcbc_decrypt(const br_aes_small_ctrcbc_keys *ctx, + void *ctr, void *cbcmac, void *data, size_t len) +{ + br_aes_small_ctrcbc_mac(ctx, cbcmac, data, len); + br_aes_small_ctrcbc_ctr(ctx, ctr, data, len); +} + +/* see bearssl_block.h */ +const br_block_ctrcbc_class br_aes_small_ctrcbc_vtable = { + sizeof(br_aes_small_ctrcbc_keys), + 16, + 4, + (void (*)(const br_block_ctrcbc_class **, const void *, size_t)) + &br_aes_small_ctrcbc_init, + (void (*)(const br_block_ctrcbc_class *const *, + void *, void *, void *, size_t)) + &br_aes_small_ctrcbc_encrypt, + (void (*)(const br_block_ctrcbc_class *const *, + void *, void *, void *, size_t)) + &br_aes_small_ctrcbc_decrypt, + (void (*)(const br_block_ctrcbc_class *const *, + void *, void *, size_t)) + &br_aes_small_ctrcbc_ctr, + (void (*)(const br_block_ctrcbc_class *const *, + void *, const void *, size_t)) + &br_aes_small_ctrcbc_mac +}; diff --git a/src/bearssl/aes_x86ni.c b/src/bearssl/aes_x86ni.c index d2d6369..d5408f1 100644 --- a/src/bearssl/aes_x86ni.c +++ b/src/bearssl/aes_x86ni.c @@ -22,6 +22,7 @@ * SOFTWARE. */ +#define BR_ENABLE_INTRINSICS 1 #include "inner.h" /* @@ -31,22 +32,6 @@ #if BR_AES_X86NI -#if BR_AES_X86NI_GCC -#if BR_AES_X86NI_GCC_OLD -#pragma GCC push_options -#pragma GCC target("sse2,sse4.1,aes,pclmul") -#endif -#include -#include -#if BR_AES_X86NI_GCC_OLD -#pragma GCC pop_options -#endif -#endif - -#if BR_AES_X86NI_MSC -#include -#endif - /* see inner.h */ int br_aes_x86ni_supported(void) @@ -56,35 +41,10 @@ br_aes_x86ni_supported(void) * 19 SSE4.1 (used for _mm_insert_epi32(), for AES-CTR) * 25 AES-NI */ -#define MASK 0x02080000 - -#if BR_AES_X86NI_GCC - unsigned eax, ebx, ecx, edx; - - if (__get_cpuid(1, &eax, &ebx, &ecx, &edx)) { - return (ecx & MASK) == MASK; - } else { - return 0; - } -#elif BR_AES_X86NI_MSC - int info[4]; - - __cpuid(info, 1); - return ((uint32_t)info[2] & MASK) == MASK; -#else - return 0; -#endif - -#undef MASK + return br_cpuid(0, 0, 0x02080000, 0); } -/* - * Per-function attributes appear unreliable on old GCC, so we use the - * pragma for all remaining functions in this file. - */ -#if BR_AES_X86NI_GCC_OLD -#pragma GCC target("sse2,sse4.1,aes,pclmul") -#endif +BR_TARGETS_X86_UP BR_TARGET("sse2,aes") static inline __m128i @@ -275,4 +235,6 @@ br_aes_x86ni_keysched_dec(unsigned char *skni, const void *key, size_t len) return num_rounds; } +BR_TARGETS_X86_DOWN + #endif diff --git a/src/bearssl/aes_x86ni_cbcdec.c b/src/bearssl/aes_x86ni_cbcdec.c index c97ce48..862b1b5 100644 --- a/src/bearssl/aes_x86ni_cbcdec.c +++ b/src/bearssl/aes_x86ni_cbcdec.c @@ -22,20 +22,17 @@ * SOFTWARE. */ +#define BR_ENABLE_INTRINSICS 1 #include "inner.h" #if BR_AES_X86NI -#if BR_AES_X86NI_GCC -#if BR_AES_X86NI_GCC_OLD -#pragma GCC target("sse2,sse4.1,aes,pclmul") -#endif -#include -#endif - -#if BR_AES_X86NI_MSC -#include -#endif +/* see bearssl_block.h */ +const br_block_cbcdec_class * +br_aes_x86ni_cbcdec_get_vtable(void) +{ + return br_aes_x86ni_supported() ? &br_aes_x86ni_cbcdec_vtable : NULL; +} /* see bearssl_block.h */ void @@ -46,6 +43,8 @@ br_aes_x86ni_cbcdec_init(br_aes_x86ni_cbcdec_keys *ctx, ctx->num_rounds = br_aes_x86ni_keysched_dec(ctx->skey.skni, key, len); } +BR_TARGETS_X86_UP + /* see bearssl_block.h */ BR_TARGET("sse2,aes") void @@ -199,6 +198,8 @@ br_aes_x86ni_cbcdec_run(const br_aes_x86ni_cbcdec_keys *ctx, _mm_storeu_si128(iv, ivx); } +BR_TARGETS_X86_DOWN + /* see bearssl_block.h */ const br_block_cbcdec_class br_aes_x86ni_cbcdec_vtable = { sizeof(br_aes_x86ni_cbcdec_keys), @@ -210,13 +211,6 @@ const br_block_cbcdec_class br_aes_x86ni_cbcdec_vtable = { &br_aes_x86ni_cbcdec_run }; -/* see bearssl_block.h */ -const br_block_cbcdec_class * -br_aes_x86ni_cbcdec_get_vtable(void) -{ - return br_aes_x86ni_supported() ? &br_aes_x86ni_cbcdec_vtable : NULL; -} - #else /* see bearssl_block.h */ diff --git a/src/bearssl/aes_x86ni_cbcenc.c b/src/bearssl/aes_x86ni_cbcenc.c index 2addfb6..85feecd 100644 --- a/src/bearssl/aes_x86ni_cbcenc.c +++ b/src/bearssl/aes_x86ni_cbcenc.c @@ -22,20 +22,17 @@ * SOFTWARE. */ +#define BR_ENABLE_INTRINSICS 1 #include "inner.h" #if BR_AES_X86NI -#if BR_AES_X86NI_GCC -#if BR_AES_X86NI_GCC_OLD -#pragma GCC target("sse2,sse4.1,aes,pclmul") -#endif -#include -#endif - -#if BR_AES_X86NI_MSC -#include -#endif +/* see bearssl_block.h */ +const br_block_cbcenc_class * +br_aes_x86ni_cbcenc_get_vtable(void) +{ + return br_aes_x86ni_supported() ? &br_aes_x86ni_cbcenc_vtable : NULL; +} /* see bearssl_block.h */ void @@ -46,6 +43,8 @@ br_aes_x86ni_cbcenc_init(br_aes_x86ni_cbcenc_keys *ctx, ctx->num_rounds = br_aes_x86ni_keysched_enc(ctx->skey.skni, key, len); } +BR_TARGETS_X86_UP + /* see bearssl_block.h */ BR_TARGET("sse2,aes") void @@ -98,6 +97,8 @@ br_aes_x86ni_cbcenc_run(const br_aes_x86ni_cbcenc_keys *ctx, _mm_storeu_si128(iv, ivx); } +BR_TARGETS_X86_DOWN + /* see bearssl_block.h */ const br_block_cbcenc_class br_aes_x86ni_cbcenc_vtable = { sizeof(br_aes_x86ni_cbcenc_keys), @@ -109,13 +110,6 @@ const br_block_cbcenc_class br_aes_x86ni_cbcenc_vtable = { &br_aes_x86ni_cbcenc_run }; -/* see bearssl_block.h */ -const br_block_cbcenc_class * -br_aes_x86ni_cbcenc_get_vtable(void) -{ - return br_aes_x86ni_supported() ? &br_aes_x86ni_cbcenc_vtable : NULL; -} - #else /* see bearssl_block.h */ diff --git a/src/bearssl/aes_x86ni_ctr.c b/src/bearssl/aes_x86ni_ctr.c index 41c31b2..1cddd60 100644 --- a/src/bearssl/aes_x86ni_ctr.c +++ b/src/bearssl/aes_x86ni_ctr.c @@ -22,24 +22,17 @@ * SOFTWARE. */ +#define BR_ENABLE_INTRINSICS 1 #include "inner.h" #if BR_AES_X86NI -#if BR_AES_X86NI_GCC -#if BR_AES_X86NI_GCC_OLD -#pragma GCC target("sse2,sse4.1,aes,pclmul") -#endif -#include -#include -#define bswap32 __builtin_bswap32 -#endif - -#if BR_AES_X86NI_MSC -#include -#include -#define bswap32 _byteswap_ulong -#endif +/* see bearssl_block.h */ +const br_block_ctr_class * +br_aes_x86ni_ctr_get_vtable(void) +{ + return br_aes_x86ni_supported() ? &br_aes_x86ni_ctr_vtable : NULL; +} /* see bearssl_block.h */ void @@ -50,6 +43,8 @@ br_aes_x86ni_ctr_init(br_aes_x86ni_ctr_keys *ctx, ctx->num_rounds = br_aes_x86ni_keysched_enc(ctx->skey.skni, key, len); } +BR_TARGETS_X86_UP + /* see bearssl_block.h */ BR_TARGET("sse2,sse4.1,aes") uint32_t @@ -73,10 +68,10 @@ br_aes_x86ni_ctr_run(const br_aes_x86ni_ctr_keys *ctx, while (len > 0) { __m128i x0, x1, x2, x3; - x0 = _mm_insert_epi32(ivx, bswap32(cc + 0), 3); - x1 = _mm_insert_epi32(ivx, bswap32(cc + 1), 3); - x2 = _mm_insert_epi32(ivx, bswap32(cc + 2), 3); - x3 = _mm_insert_epi32(ivx, bswap32(cc + 3), 3); + x0 = _mm_insert_epi32(ivx, br_bswap32(cc + 0), 3); + x1 = _mm_insert_epi32(ivx, br_bswap32(cc + 1), 3); + x2 = _mm_insert_epi32(ivx, br_bswap32(cc + 2), 3); + x3 = _mm_insert_epi32(ivx, br_bswap32(cc + 3), 3); x0 = _mm_xor_si128(x0, sk[0]); x1 = _mm_xor_si128(x1, sk[0]); x2 = _mm_xor_si128(x2, sk[0]); @@ -190,6 +185,8 @@ br_aes_x86ni_ctr_run(const br_aes_x86ni_ctr_keys *ctx, return cc; } +BR_TARGETS_X86_DOWN + /* see bearssl_block.h */ const br_block_ctr_class br_aes_x86ni_ctr_vtable = { sizeof(br_aes_x86ni_ctr_keys), @@ -202,13 +199,6 @@ const br_block_ctr_class br_aes_x86ni_ctr_vtable = { &br_aes_x86ni_ctr_run }; -/* see bearssl_block.h */ -const br_block_ctr_class * -br_aes_x86ni_ctr_get_vtable(void) -{ - return br_aes_x86ni_supported() ? &br_aes_x86ni_ctr_vtable : NULL; -} - #else /* see bearssl_block.h */ diff --git a/src/bearssl/aes_x86ni_ctrcbc.c b/src/bearssl/aes_x86ni_ctrcbc.c new file mode 100644 index 0000000..f57fead --- /dev/null +++ b/src/bearssl/aes_x86ni_ctrcbc.c @@ -0,0 +1,596 @@ +/* + * Copyright (c) 2017 Thomas Pornin + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define BR_ENABLE_INTRINSICS 1 +#include "inner.h" + +#if BR_AES_X86NI + +/* see bearssl_block.h */ +const br_block_ctrcbc_class * +br_aes_x86ni_ctrcbc_get_vtable(void) +{ + return br_aes_x86ni_supported() ? &br_aes_x86ni_ctrcbc_vtable : NULL; +} + +/* see bearssl_block.h */ +void +br_aes_x86ni_ctrcbc_init(br_aes_x86ni_ctrcbc_keys *ctx, + const void *key, size_t len) +{ + ctx->vtable = &br_aes_x86ni_ctrcbc_vtable; + ctx->num_rounds = br_aes_x86ni_keysched_enc(ctx->skey.skni, key, len); +} + +BR_TARGETS_X86_UP + +/* see bearssl_block.h */ +BR_TARGET("sse2,sse4.1,aes") +void +br_aes_x86ni_ctrcbc_ctr(const br_aes_x86ni_ctrcbc_keys *ctx, + void *ctr, void *data, size_t len) +{ + unsigned char *buf; + unsigned num_rounds; + __m128i sk[15]; + __m128i ivx0, ivx1, ivx2, ivx3; + __m128i erev, zero, one, four, notthree; + unsigned u; + + buf = data; + num_rounds = ctx->num_rounds; + for (u = 0; u <= num_rounds; u ++) { + sk[u] = _mm_loadu_si128((void *)(ctx->skey.skni + (u << 4))); + } + + /* + * Some SSE2 constants. + */ + erev = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15); + zero = _mm_setzero_si128(); + one = _mm_set_epi64x(0, 1); + four = _mm_set_epi64x(0, 4); + notthree = _mm_sub_epi64(zero, four); + + /* + * Decode the counter in big-endian and pre-increment the other + * three counters. + */ + ivx0 = _mm_shuffle_epi8(_mm_loadu_si128((void *)ctr), erev); + ivx1 = _mm_add_epi64(ivx0, one); + ivx1 = _mm_sub_epi64(ivx1, + _mm_slli_si128(_mm_cmpeq_epi64(ivx1, zero), 8)); + ivx2 = _mm_add_epi64(ivx1, one); + ivx2 = _mm_sub_epi64(ivx2, + _mm_slli_si128(_mm_cmpeq_epi64(ivx2, zero), 8)); + ivx3 = _mm_add_epi64(ivx2, one); + ivx3 = _mm_sub_epi64(ivx3, + _mm_slli_si128(_mm_cmpeq_epi64(ivx3, zero), 8)); + while (len > 0) { + __m128i x0, x1, x2, x3; + + /* + * Load counter values; we need to byteswap them because + * the specification says that they use big-endian. + */ + x0 = _mm_shuffle_epi8(ivx0, erev); + x1 = _mm_shuffle_epi8(ivx1, erev); + x2 = _mm_shuffle_epi8(ivx2, erev); + x3 = _mm_shuffle_epi8(ivx3, erev); + + x0 = _mm_xor_si128(x0, sk[0]); + x1 = _mm_xor_si128(x1, sk[0]); + x2 = _mm_xor_si128(x2, sk[0]); + x3 = _mm_xor_si128(x3, sk[0]); + x0 = _mm_aesenc_si128(x0, sk[1]); + x1 = _mm_aesenc_si128(x1, sk[1]); + x2 = _mm_aesenc_si128(x2, sk[1]); + x3 = _mm_aesenc_si128(x3, sk[1]); + x0 = _mm_aesenc_si128(x0, sk[2]); + x1 = _mm_aesenc_si128(x1, sk[2]); + x2 = _mm_aesenc_si128(x2, sk[2]); + x3 = _mm_aesenc_si128(x3, sk[2]); + x0 = _mm_aesenc_si128(x0, sk[3]); + x1 = _mm_aesenc_si128(x1, sk[3]); + x2 = _mm_aesenc_si128(x2, sk[3]); + x3 = _mm_aesenc_si128(x3, sk[3]); + x0 = _mm_aesenc_si128(x0, sk[4]); + x1 = _mm_aesenc_si128(x1, sk[4]); + x2 = _mm_aesenc_si128(x2, sk[4]); + x3 = _mm_aesenc_si128(x3, sk[4]); + x0 = _mm_aesenc_si128(x0, sk[5]); + x1 = _mm_aesenc_si128(x1, sk[5]); + x2 = _mm_aesenc_si128(x2, sk[5]); + x3 = _mm_aesenc_si128(x3, sk[5]); + x0 = _mm_aesenc_si128(x0, sk[6]); + x1 = _mm_aesenc_si128(x1, sk[6]); + x2 = _mm_aesenc_si128(x2, sk[6]); + x3 = _mm_aesenc_si128(x3, sk[6]); + x0 = _mm_aesenc_si128(x0, sk[7]); + x1 = _mm_aesenc_si128(x1, sk[7]); + x2 = _mm_aesenc_si128(x2, sk[7]); + x3 = _mm_aesenc_si128(x3, sk[7]); + x0 = _mm_aesenc_si128(x0, sk[8]); + x1 = _mm_aesenc_si128(x1, sk[8]); + x2 = _mm_aesenc_si128(x2, sk[8]); + x3 = _mm_aesenc_si128(x3, sk[8]); + x0 = _mm_aesenc_si128(x0, sk[9]); + x1 = _mm_aesenc_si128(x1, sk[9]); + x2 = _mm_aesenc_si128(x2, sk[9]); + x3 = _mm_aesenc_si128(x3, sk[9]); + if (num_rounds == 10) { + x0 = _mm_aesenclast_si128(x0, sk[10]); + x1 = _mm_aesenclast_si128(x1, sk[10]); + x2 = _mm_aesenclast_si128(x2, sk[10]); + x3 = _mm_aesenclast_si128(x3, sk[10]); + } else if (num_rounds == 12) { + x0 = _mm_aesenc_si128(x0, sk[10]); + x1 = _mm_aesenc_si128(x1, sk[10]); + x2 = _mm_aesenc_si128(x2, sk[10]); + x3 = _mm_aesenc_si128(x3, sk[10]); + x0 = _mm_aesenc_si128(x0, sk[11]); + x1 = _mm_aesenc_si128(x1, sk[11]); + x2 = _mm_aesenc_si128(x2, sk[11]); + x3 = _mm_aesenc_si128(x3, sk[11]); + x0 = _mm_aesenclast_si128(x0, sk[12]); + x1 = _mm_aesenclast_si128(x1, sk[12]); + x2 = _mm_aesenclast_si128(x2, sk[12]); + x3 = _mm_aesenclast_si128(x3, sk[12]); + } else { + x0 = _mm_aesenc_si128(x0, sk[10]); + x1 = _mm_aesenc_si128(x1, sk[10]); + x2 = _mm_aesenc_si128(x2, sk[10]); + x3 = _mm_aesenc_si128(x3, sk[10]); + x0 = _mm_aesenc_si128(x0, sk[11]); + x1 = _mm_aesenc_si128(x1, sk[11]); + x2 = _mm_aesenc_si128(x2, sk[11]); + x3 = _mm_aesenc_si128(x3, sk[11]); + x0 = _mm_aesenc_si128(x0, sk[12]); + x1 = _mm_aesenc_si128(x1, sk[12]); + x2 = _mm_aesenc_si128(x2, sk[12]); + x3 = _mm_aesenc_si128(x3, sk[12]); + x0 = _mm_aesenc_si128(x0, sk[13]); + x1 = _mm_aesenc_si128(x1, sk[13]); + x2 = _mm_aesenc_si128(x2, sk[13]); + x3 = _mm_aesenc_si128(x3, sk[13]); + x0 = _mm_aesenclast_si128(x0, sk[14]); + x1 = _mm_aesenclast_si128(x1, sk[14]); + x2 = _mm_aesenclast_si128(x2, sk[14]); + x3 = _mm_aesenclast_si128(x3, sk[14]); + } + if (len >= 64) { + x0 = _mm_xor_si128(x0, + _mm_loadu_si128((void *)(buf + 0))); + x1 = _mm_xor_si128(x1, + _mm_loadu_si128((void *)(buf + 16))); + x2 = _mm_xor_si128(x2, + _mm_loadu_si128((void *)(buf + 32))); + x3 = _mm_xor_si128(x3, + _mm_loadu_si128((void *)(buf + 48))); + _mm_storeu_si128((void *)(buf + 0), x0); + _mm_storeu_si128((void *)(buf + 16), x1); + _mm_storeu_si128((void *)(buf + 32), x2); + _mm_storeu_si128((void *)(buf + 48), x3); + buf += 64; + len -= 64; + } else { + unsigned char tmp[64]; + + _mm_storeu_si128((void *)(tmp + 0), x0); + _mm_storeu_si128((void *)(tmp + 16), x1); + _mm_storeu_si128((void *)(tmp + 32), x2); + _mm_storeu_si128((void *)(tmp + 48), x3); + for (u = 0; u < len; u ++) { + buf[u] ^= tmp[u]; + } + switch (len) { + case 16: + ivx0 = ivx1; + break; + case 32: + ivx0 = ivx2; + break; + case 48: + ivx0 = ivx3; + break; + } + break; + } + + /* + * Add 4 to each counter value. For carry propagation + * into the upper 64-bit words, we would need to compare + * the results with 4, but SSE2+ has only _signed_ + * comparisons. Instead, we mask out the low two bits, + * and check whether the remaining bits are zero. + */ + ivx0 = _mm_add_epi64(ivx0, four); + ivx1 = _mm_add_epi64(ivx1, four); + ivx2 = _mm_add_epi64(ivx2, four); + ivx3 = _mm_add_epi64(ivx3, four); + ivx0 = _mm_sub_epi64(ivx0, + _mm_slli_si128(_mm_cmpeq_epi64( + _mm_and_si128(ivx0, notthree), zero), 8)); + ivx1 = _mm_sub_epi64(ivx1, + _mm_slli_si128(_mm_cmpeq_epi64( + _mm_and_si128(ivx1, notthree), zero), 8)); + ivx2 = _mm_sub_epi64(ivx2, + _mm_slli_si128(_mm_cmpeq_epi64( + _mm_and_si128(ivx2, notthree), zero), 8)); + ivx3 = _mm_sub_epi64(ivx3, + _mm_slli_si128(_mm_cmpeq_epi64( + _mm_and_si128(ivx3, notthree), zero), 8)); + } + + /* + * Write back new counter value. The loop took care to put the + * right counter value in ivx0. + */ + _mm_storeu_si128((void *)ctr, _mm_shuffle_epi8(ivx0, erev)); +} + +/* see bearssl_block.h */ +BR_TARGET("sse2,sse4.1,aes") +void +br_aes_x86ni_ctrcbc_mac(const br_aes_x86ni_ctrcbc_keys *ctx, + void *cbcmac, const void *data, size_t len) +{ + const unsigned char *buf; + unsigned num_rounds; + __m128i sk[15], ivx; + unsigned u; + + buf = data; + ivx = _mm_loadu_si128(cbcmac); + num_rounds = ctx->num_rounds; + for (u = 0; u <= num_rounds; u ++) { + sk[u] = _mm_loadu_si128((void *)(ctx->skey.skni + (u << 4))); + } + while (len > 0) { + __m128i x; + + x = _mm_xor_si128(_mm_loadu_si128((void *)buf), ivx); + x = _mm_xor_si128(x, sk[0]); + x = _mm_aesenc_si128(x, sk[1]); + x = _mm_aesenc_si128(x, sk[2]); + x = _mm_aesenc_si128(x, sk[3]); + x = _mm_aesenc_si128(x, sk[4]); + x = _mm_aesenc_si128(x, sk[5]); + x = _mm_aesenc_si128(x, sk[6]); + x = _mm_aesenc_si128(x, sk[7]); + x = _mm_aesenc_si128(x, sk[8]); + x = _mm_aesenc_si128(x, sk[9]); + if (num_rounds == 10) { + x = _mm_aesenclast_si128(x, sk[10]); + } else if (num_rounds == 12) { + x = _mm_aesenc_si128(x, sk[10]); + x = _mm_aesenc_si128(x, sk[11]); + x = _mm_aesenclast_si128(x, sk[12]); + } else { + x = _mm_aesenc_si128(x, sk[10]); + x = _mm_aesenc_si128(x, sk[11]); + x = _mm_aesenc_si128(x, sk[12]); + x = _mm_aesenc_si128(x, sk[13]); + x = _mm_aesenclast_si128(x, sk[14]); + } + ivx = x; + buf += 16; + len -= 16; + } + _mm_storeu_si128(cbcmac, ivx); +} + +/* see bearssl_block.h */ +BR_TARGET("sse2,sse4.1,aes") +void +br_aes_x86ni_ctrcbc_encrypt(const br_aes_x86ni_ctrcbc_keys *ctx, + void *ctr, void *cbcmac, void *data, size_t len) +{ + unsigned char *buf; + unsigned num_rounds; + __m128i sk[15]; + __m128i ivx, cmx; + __m128i erev, zero, one; + unsigned u; + int first_iter; + + num_rounds = ctx->num_rounds; + for (u = 0; u <= num_rounds; u ++) { + sk[u] = _mm_loadu_si128((void *)(ctx->skey.skni + (u << 4))); + } + + /* + * Some SSE2 constants. + */ + erev = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15); + zero = _mm_setzero_si128(); + one = _mm_set_epi64x(0, 1); + + /* + * Decode the counter in big-endian. + */ + ivx = _mm_shuffle_epi8(_mm_loadu_si128(ctr), erev); + cmx = _mm_loadu_si128(cbcmac); + + buf = data; + first_iter = 1; + while (len > 0) { + __m128i dx, x0, x1; + + /* + * Load initial values: + * dx encrypted block of data + * x0 counter (for CTR encryption) + * x1 input for CBC-MAC + */ + dx = _mm_loadu_si128((void *)buf); + x0 = _mm_shuffle_epi8(ivx, erev); + x1 = cmx; + + x0 = _mm_xor_si128(x0, sk[0]); + x1 = _mm_xor_si128(x1, sk[0]); + x0 = _mm_aesenc_si128(x0, sk[1]); + x1 = _mm_aesenc_si128(x1, sk[1]); + x0 = _mm_aesenc_si128(x0, sk[2]); + x1 = _mm_aesenc_si128(x1, sk[2]); + x0 = _mm_aesenc_si128(x0, sk[3]); + x1 = _mm_aesenc_si128(x1, sk[3]); + x0 = _mm_aesenc_si128(x0, sk[4]); + x1 = _mm_aesenc_si128(x1, sk[4]); + x0 = _mm_aesenc_si128(x0, sk[5]); + x1 = _mm_aesenc_si128(x1, sk[5]); + x0 = _mm_aesenc_si128(x0, sk[6]); + x1 = _mm_aesenc_si128(x1, sk[6]); + x0 = _mm_aesenc_si128(x0, sk[7]); + x1 = _mm_aesenc_si128(x1, sk[7]); + x0 = _mm_aesenc_si128(x0, sk[8]); + x1 = _mm_aesenc_si128(x1, sk[8]); + x0 = _mm_aesenc_si128(x0, sk[9]); + x1 = _mm_aesenc_si128(x1, sk[9]); + if (num_rounds == 10) { + x0 = _mm_aesenclast_si128(x0, sk[10]); + x1 = _mm_aesenclast_si128(x1, sk[10]); + } else if (num_rounds == 12) { + x0 = _mm_aesenc_si128(x0, sk[10]); + x1 = _mm_aesenc_si128(x1, sk[10]); + x0 = _mm_aesenc_si128(x0, sk[11]); + x1 = _mm_aesenc_si128(x1, sk[11]); + x0 = _mm_aesenclast_si128(x0, sk[12]); + x1 = _mm_aesenclast_si128(x1, sk[12]); + } else { + x0 = _mm_aesenc_si128(x0, sk[10]); + x1 = _mm_aesenc_si128(x1, sk[10]); + x0 = _mm_aesenc_si128(x0, sk[11]); + x1 = _mm_aesenc_si128(x1, sk[11]); + x0 = _mm_aesenc_si128(x0, sk[12]); + x1 = _mm_aesenc_si128(x1, sk[12]); + x0 = _mm_aesenc_si128(x0, sk[13]); + x1 = _mm_aesenc_si128(x1, sk[13]); + x0 = _mm_aesenclast_si128(x0, sk[14]); + x1 = _mm_aesenclast_si128(x1, sk[14]); + } + + x0 = _mm_xor_si128(x0, dx); + if (first_iter) { + cmx = _mm_xor_si128(cmx, x0); + first_iter = 0; + } else { + cmx = _mm_xor_si128(x1, x0); + } + _mm_storeu_si128((void *)buf, x0); + + buf += 16; + len -= 16; + + /* + * Increment the counter value. + */ + ivx = _mm_add_epi64(ivx, one); + ivx = _mm_sub_epi64(ivx, + _mm_slli_si128(_mm_cmpeq_epi64(ivx, zero), 8)); + + /* + * If this was the last iteration, then compute the + * extra block encryption to complete CBC-MAC. + */ + if (len == 0) { + cmx = _mm_xor_si128(cmx, sk[0]); + cmx = _mm_aesenc_si128(cmx, sk[1]); + cmx = _mm_aesenc_si128(cmx, sk[2]); + cmx = _mm_aesenc_si128(cmx, sk[3]); + cmx = _mm_aesenc_si128(cmx, sk[4]); + cmx = _mm_aesenc_si128(cmx, sk[5]); + cmx = _mm_aesenc_si128(cmx, sk[6]); + cmx = _mm_aesenc_si128(cmx, sk[7]); + cmx = _mm_aesenc_si128(cmx, sk[8]); + cmx = _mm_aesenc_si128(cmx, sk[9]); + if (num_rounds == 10) { + cmx = _mm_aesenclast_si128(cmx, sk[10]); + } else if (num_rounds == 12) { + cmx = _mm_aesenc_si128(cmx, sk[10]); + cmx = _mm_aesenc_si128(cmx, sk[11]); + cmx = _mm_aesenclast_si128(cmx, sk[12]); + } else { + cmx = _mm_aesenc_si128(cmx, sk[10]); + cmx = _mm_aesenc_si128(cmx, sk[11]); + cmx = _mm_aesenc_si128(cmx, sk[12]); + cmx = _mm_aesenc_si128(cmx, sk[13]); + cmx = _mm_aesenclast_si128(cmx, sk[14]); + } + break; + } + } + + /* + * Write back new counter value and CBC-MAC value. + */ + _mm_storeu_si128(ctr, _mm_shuffle_epi8(ivx, erev)); + _mm_storeu_si128(cbcmac, cmx); +} + +/* see bearssl_block.h */ +BR_TARGET("sse2,sse4.1,aes") +void +br_aes_x86ni_ctrcbc_decrypt(const br_aes_x86ni_ctrcbc_keys *ctx, + void *ctr, void *cbcmac, void *data, size_t len) +{ + unsigned char *buf; + unsigned num_rounds; + __m128i sk[15]; + __m128i ivx, cmx; + __m128i erev, zero, one; + unsigned u; + + num_rounds = ctx->num_rounds; + for (u = 0; u <= num_rounds; u ++) { + sk[u] = _mm_loadu_si128((void *)(ctx->skey.skni + (u << 4))); + } + + /* + * Some SSE2 constants. + */ + erev = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15); + zero = _mm_setzero_si128(); + one = _mm_set_epi64x(0, 1); + + /* + * Decode the counter in big-endian. + */ + ivx = _mm_shuffle_epi8(_mm_loadu_si128(ctr), erev); + cmx = _mm_loadu_si128(cbcmac); + + buf = data; + while (len > 0) { + __m128i dx, x0, x1; + + /* + * Load initial values: + * dx encrypted block of data + * x0 counter (for CTR encryption) + * x1 input for CBC-MAC + */ + dx = _mm_loadu_si128((void *)buf); + x0 = _mm_shuffle_epi8(ivx, erev); + x1 = _mm_xor_si128(cmx, dx); + + x0 = _mm_xor_si128(x0, sk[0]); + x1 = _mm_xor_si128(x1, sk[0]); + x0 = _mm_aesenc_si128(x0, sk[1]); + x1 = _mm_aesenc_si128(x1, sk[1]); + x0 = _mm_aesenc_si128(x0, sk[2]); + x1 = _mm_aesenc_si128(x1, sk[2]); + x0 = _mm_aesenc_si128(x0, sk[3]); + x1 = _mm_aesenc_si128(x1, sk[3]); + x0 = _mm_aesenc_si128(x0, sk[4]); + x1 = _mm_aesenc_si128(x1, sk[4]); + x0 = _mm_aesenc_si128(x0, sk[5]); + x1 = _mm_aesenc_si128(x1, sk[5]); + x0 = _mm_aesenc_si128(x0, sk[6]); + x1 = _mm_aesenc_si128(x1, sk[6]); + x0 = _mm_aesenc_si128(x0, sk[7]); + x1 = _mm_aesenc_si128(x1, sk[7]); + x0 = _mm_aesenc_si128(x0, sk[8]); + x1 = _mm_aesenc_si128(x1, sk[8]); + x0 = _mm_aesenc_si128(x0, sk[9]); + x1 = _mm_aesenc_si128(x1, sk[9]); + if (num_rounds == 10) { + x0 = _mm_aesenclast_si128(x0, sk[10]); + x1 = _mm_aesenclast_si128(x1, sk[10]); + } else if (num_rounds == 12) { + x0 = _mm_aesenc_si128(x0, sk[10]); + x1 = _mm_aesenc_si128(x1, sk[10]); + x0 = _mm_aesenc_si128(x0, sk[11]); + x1 = _mm_aesenc_si128(x1, sk[11]); + x0 = _mm_aesenclast_si128(x0, sk[12]); + x1 = _mm_aesenclast_si128(x1, sk[12]); + } else { + x0 = _mm_aesenc_si128(x0, sk[10]); + x1 = _mm_aesenc_si128(x1, sk[10]); + x0 = _mm_aesenc_si128(x0, sk[11]); + x1 = _mm_aesenc_si128(x1, sk[11]); + x0 = _mm_aesenc_si128(x0, sk[12]); + x1 = _mm_aesenc_si128(x1, sk[12]); + x0 = _mm_aesenc_si128(x0, sk[13]); + x1 = _mm_aesenc_si128(x1, sk[13]); + x0 = _mm_aesenclast_si128(x0, sk[14]); + x1 = _mm_aesenclast_si128(x1, sk[14]); + } + x0 = _mm_xor_si128(x0, dx); + cmx = x1; + _mm_storeu_si128((void *)buf, x0); + + buf += 16; + len -= 16; + + /* + * Increment the counter value. + */ + ivx = _mm_add_epi64(ivx, one); + ivx = _mm_sub_epi64(ivx, + _mm_slli_si128(_mm_cmpeq_epi64(ivx, zero), 8)); + } + + /* + * Write back new counter value and CBC-MAC value. + */ + _mm_storeu_si128(ctr, _mm_shuffle_epi8(ivx, erev)); + _mm_storeu_si128(cbcmac, cmx); +} + +BR_TARGETS_X86_DOWN + +/* see bearssl_block.h */ +const br_block_ctrcbc_class br_aes_x86ni_ctrcbc_vtable = { + sizeof(br_aes_x86ni_ctrcbc_keys), + 16, + 4, + (void (*)(const br_block_ctrcbc_class **, const void *, size_t)) + &br_aes_x86ni_ctrcbc_init, + (void (*)(const br_block_ctrcbc_class *const *, + void *, void *, void *, size_t)) + &br_aes_x86ni_ctrcbc_encrypt, + (void (*)(const br_block_ctrcbc_class *const *, + void *, void *, void *, size_t)) + &br_aes_x86ni_ctrcbc_decrypt, + (void (*)(const br_block_ctrcbc_class *const *, + void *, void *, size_t)) + &br_aes_x86ni_ctrcbc_ctr, + (void (*)(const br_block_ctrcbc_class *const *, + void *, const void *, size_t)) + &br_aes_x86ni_ctrcbc_mac +}; + +#else + +/* see bearssl_block.h */ +const br_block_ctrcbc_class * +br_aes_x86ni_ctrcbc_get_vtable(void) +{ + return NULL; +} + +#endif diff --git a/src/bearssl/bearssl.h b/src/bearssl/bearssl.h index de62963..3d5e63a 100644 --- a/src/bearssl/bearssl.h +++ b/src/bearssl/bearssl.h @@ -135,4 +135,34 @@ #include "bearssl_x509.h" #include "bearssl_pem.h" +/** \brief Type for a configuration option. + * + * A "configuration option" is a value that is selected when the BearSSL + * library itself is compiled. Most options are boolean; their value is + * then either 1 (option is enabled) or 0 (option is disabled). Some + * values have other integer values. Option names correspond to macro + * names. Some of the options can be explicitly set in the internal + * `"config.h"` file. + */ +typedef struct { + /** \brief Configurable option name. */ + const char *name; + /** \brief Configurable option value. */ + long value; +} br_config_option; + +/** \brief Get configuration report. + * + * This function returns compiled configuration options, each as a + * 'long' value. Names match internal macro names, in particular those + * that can be set in the `"config.h"` inner file. For boolean options, + * the numerical value is 1 if enabled, 0 if disabled. For maximum + * key sizes, values are expressed in bits. + * + * The returned array is terminated by an entry whose `name` is `NULL`. + * + * \return the configuration report. + */ +const br_config_option *br_get_config(void); + #endif diff --git a/src/bearssl/bearssl_aead.h b/src/bearssl/bearssl_aead.h index 09cb9e8..b1e52a3 100644 --- a/src/bearssl/bearssl_aead.h +++ b/src/bearssl/bearssl_aead.h @@ -127,7 +127,7 @@ extern "C" { * * - Nonce, plaintext and additional authenticated data all consist * in an integral number of bytes. There is no provision to use - * elements whose lengh in bits is not a multiple of 8. + * elements whose length in bits is not a multiple of 8. * * Each AEAD algorithm has its own requirements and limits on the sizes * of additional data and plaintext. This API does not provide any @@ -169,6 +169,9 @@ extern "C" { * Note that there is no OOP method for context initialisation: the * various AEAD algorithms have different requirements that would not * map well to a single initialisation API. + * + * The OOP API is not provided for CCM, due to its specific requirements + * (length of plaintext must be known in advance). */ /** @@ -213,7 +216,7 @@ struct br_aead_class_ { * * \param cc AEAD context structure. * \param data pointer to additional authenticated data. - * \param len length of additiona authenticated data (in bytes). + * \param len length of additional authenticated data (in bytes). */ void (*aad_inject)(const br_aead_class **cc, const void *data, size_t len); @@ -266,6 +269,8 @@ struct br_aead_class_ { * `check_tag()` function may be used to compute and check the * tag value. * + * Tag length depends on the AEAD algorithm. + * * \param cc AEAD context structure. * \param tag destination buffer for the tag. */ @@ -282,11 +287,44 @@ struct br_aead_class_ { * data or the tag was altered in transit, normally leading to * wholesale rejection of the complete message. * + * Tag length depends on the AEAD algorithm. + * * \param cc AEAD context structure. - * \param tag tag value to compare with (16 bytes). + * \param tag tag value to compare with. * \return 1 on success (exact match of tag value), 0 otherwise. */ uint32_t (*check_tag)(const br_aead_class **cc, const void *tag); + + /** + * \brief Compute authentication tag (with truncation). + * + * This function is similar to `get_tag()`, except that the tag + * length is provided. Some AEAD algorithms allow several tag + * lengths, usually by truncating the normal tag. Shorter tags + * mechanically increase success probability of forgeries. + * The range of allowed tag lengths depends on the algorithm. + * + * \param cc AEAD context structure. + * \param tag destination buffer for the tag. + * \param len tag length (in bytes). + */ + void (*get_tag_trunc)(const br_aead_class **cc, void *tag, size_t len); + + /** + * \brief Compute and check authentication tag (with truncation). + * + * This function is similar to `check_tag()` except that it + * works over an explicit tag length. See `get_tag()` for a + * discussion of explicit tag lengths; the range of allowed tag + * lengths depends on the algorithm. + * + * \param cc AEAD context structure. + * \param tag tag value to compare with. + * \param len tag length (in bytes). + * \return 1 on success (exact match of tag value), 0 otherwise. + */ + uint32_t (*check_tag_trunc)(const br_aead_class **cc, + const void *tag, size_t len); }; /** @@ -379,7 +417,7 @@ void br_gcm_reset(br_gcm_context *ctx, const void *iv, size_t len); * * \param ctx GCM context structure. * \param data pointer to additional authenticated data. - * \param len length of additiona authenticated data (in bytes). + * \param len length of additional authenticated data (in bytes). */ void br_gcm_aad_inject(br_gcm_context *ctx, const void *data, size_t len); @@ -449,11 +487,478 @@ void br_gcm_get_tag(br_gcm_context *ctx, void *tag); */ uint32_t br_gcm_check_tag(br_gcm_context *ctx, const void *tag); +/** + * \brief Compute GCM authentication tag (with truncation). + * + * This function is similar to `br_gcm_get_tag()`, except that it allows + * the tag to be truncated to a smaller length. The intended tag length + * is provided as `len` (in bytes); it MUST be no more than 16, but + * it may be smaller. Note that decreasing tag length mechanically makes + * forgeries easier; NIST SP 800-38D specifies that the tag length shall + * lie between 12 and 16 bytes (inclusive), but may be truncated down to + * 4 or 8 bytes, for specific applications that can tolerate it. It must + * also be noted that successful forgeries leak information on the + * authentication key, making subsequent forgeries easier. Therefore, + * tag truncation, and in particular truncation to sizes lower than 12 + * bytes, shall be envisioned only with great care. + * + * The tag is written in the provided `tag` buffer. This call terminates + * the GCM run: no data may be processed with that GCM context + * afterwards, until `br_gcm_reset()` is called to initiate a new GCM + * run. + * + * The tag value must normally be sent along with the encrypted data. + * When decrypting, the tag value must be recomputed and compared with + * the received tag: if the two tag values differ, then either the tag + * or the encrypted data was altered in transit. As an alternative to + * this function, the `br_gcm_check_tag_trunc()` function can be used to + * compute and check the tag value. + * + * \param ctx GCM context structure. + * \param tag destination buffer for the tag. + * \param len tag length (16 bytes or less). + */ +void br_gcm_get_tag_trunc(br_gcm_context *ctx, void *tag, size_t len); + +/** + * \brief Compute and check GCM authentication tag (with truncation). + * + * This function is an alternative to `br_gcm_get_tag_trunc()`, normally used + * on the receiving end (i.e. when decrypting value). The tag value is + * recomputed and compared with the provided tag value. If they match, 1 + * is returned; on mismatch, 0 is returned. A returned value of 0 means + * that the data or the tag was altered in transit, normally leading to + * wholesale rejection of the complete message. + * + * Tag length MUST be 16 bytes or less. The normal GCM tag length is 16 + * bytes. See `br_check_tag_trunc()` for some discussion on the potential + * perils of truncating authentication tags. + * + * \param ctx GCM context structure. + * \param tag tag value to compare with. + * \param len tag length (in bytes). + * \return 1 on success (exact match of tag value), 0 otherwise. + */ +uint32_t br_gcm_check_tag_trunc(br_gcm_context *ctx, + const void *tag, size_t len); + /** * \brief Class instance for GCM. */ extern const br_aead_class br_gcm_vtable; +/** + * \brief Context structure for EAX. + * + * EAX is an AEAD mode that combines a block cipher in CTR mode with + * CBC-MAC using the same block cipher and the same key, to provide + * authenticated encryption: + * + * - Any block cipher with 16-byte blocks can be used with EAX + * (technically, other block sizes are defined as well, but this + * is not implemented by these functions; shorter blocks also + * imply numerous security issues). + * + * - The nonce can have any length, as long as nonce values are + * not reused (thus, if nonces are randomly selected, the nonce + * size should be such that reuse probability is negligible). + * + * - Additional authenticated data length is unlimited. + * + * - Message length is unlimited. + * + * - The authentication tag has length 16 bytes. + * + * The EAX initialisation function receives as parameter an + * _initialised_ block cipher implementation context, with the secret + * key already set. A pointer to that context will be kept within the + * EAX context structure. It is up to the caller to allocate and + * initialise that block cipher context. + */ +typedef struct { + /** \brief Pointer to vtable for this context. */ + const br_aead_class *vtable; + +#ifndef BR_DOXYGEN_IGNORE + const br_block_ctrcbc_class **bctx; + unsigned char L2[16]; + unsigned char L4[16]; + unsigned char nonce[16]; + unsigned char head[16]; + unsigned char ctr[16]; + unsigned char cbcmac[16]; + unsigned char buf[16]; + size_t ptr; +#endif +} br_eax_context; + +/** + * \brief Initialize an EAX context. + * + * A block cipher implementation, with its initialised context + * structure, is provided. The block cipher MUST use 16-byte blocks in + * CTR + CBC-MAC mode, and its secret key MUST have been already set in + * the provided context. The parameters are linked in the EAX context. + * + * After this function has been called, the `br_eax_reset()` function must + * be called, to provide the nonce for EAX computation. + * + * \param ctx EAX context structure. + * \param bctx block cipher context (already initialised with secret key). + */ +void br_eax_init(br_eax_context *ctx, const br_block_ctrcbc_class **bctx); + +/** + * \brief Reset an EAX context. + * + * This function resets an already initialised EAX context for a new + * computation run. Implementations and keys are conserved. This function + * can be called at any time; it cancels any ongoing EAX computation that + * uses the provided context structure. + * + * It is critical to EAX security that nonce values are not repeated for + * the same encryption key. Nonces can have arbitrary length. If nonces + * are randomly generated, then a nonce length of at least 128 bits (16 + * bytes) is recommended, to make nonce reuse probability sufficiently + * low. + * + * \param ctx EAX context structure. + * \param nonce EAX nonce to use. + * \param len EAX nonce length (in bytes). + */ +void br_eax_reset(br_eax_context *ctx, const void *nonce, size_t len); + +/** + * \brief Inject additional authenticated data into EAX. + * + * The provided data is injected into a running EAX computation. Additional + * data must be injected _before_ the call to `br_eax_flip()`. + * Additional data can be injected in several chunks of arbitrary length; + * the total amount of additional authenticated data is unlimited. + * + * \param ctx EAX context structure. + * \param data pointer to additional authenticated data. + * \param len length of additional authenticated data (in bytes). + */ +void br_eax_aad_inject(br_eax_context *ctx, const void *data, size_t len); + +/** + * \brief Finish injection of additional authenticated data into EAX. + * + * This function MUST be called before beginning the actual encryption + * or decryption (with `br_eax_run()`), even if no additional authenticated + * data was injected. No additional authenticated data may be injected + * after this function call. + * + * \param ctx EAX context structure. + */ +void br_eax_flip(br_eax_context *ctx); + +/** + * \brief Encrypt or decrypt some data with EAX. + * + * Data encryption or decryption can be done after `br_eax_flip()` + * has been called on the context. If `encrypt` is non-zero, then the + * provided data shall be plaintext, and it is encrypted in place. + * Otherwise, the data shall be ciphertext, and it is decrypted in place. + * + * Data may be provided in several chunks of arbitrary length. + * + * \param ctx EAX context structure. + * \param encrypt non-zero for encryption, zero for decryption. + * \param data data to encrypt or decrypt. + * \param len data length (in bytes). + */ +void br_eax_run(br_eax_context *ctx, int encrypt, void *data, size_t len); + +/** + * \brief Compute EAX authentication tag. + * + * Compute the EAX authentication tag. The tag is a 16-byte value which + * is written in the provided `tag` buffer. This call terminates the + * EAX run: no data may be processed with that EAX context afterwards, + * until `br_eax_reset()` is called to initiate a new EAX run. + * + * The tag value must normally be sent along with the encrypted data. + * When decrypting, the tag value must be recomputed and compared with + * the received tag: if the two tag values differ, then either the tag + * or the encrypted data was altered in transit. As an alternative to + * this function, the `br_eax_check_tag()` function can be used to + * compute and check the tag value. + * + * \param ctx EAX context structure. + * \param tag destination buffer for the tag (16 bytes). + */ +void br_eax_get_tag(br_eax_context *ctx, void *tag); + +/** + * \brief Compute and check EAX authentication tag. + * + * This function is an alternative to `br_eax_get_tag()`, normally used + * on the receiving end (i.e. when decrypting value). The tag value is + * recomputed and compared with the provided tag value. If they match, 1 + * is returned; on mismatch, 0 is returned. A returned value of 0 means + * that the data or the tag was altered in transit, normally leading to + * wholesale rejection of the complete message. + * + * \param ctx EAX context structure. + * \param tag tag value to compare with (16 bytes). + * \return 1 on success (exact match of tag value), 0 otherwise. + */ +uint32_t br_eax_check_tag(br_eax_context *ctx, const void *tag); + +/** + * \brief Compute EAX authentication tag (with truncation). + * + * This function is similar to `br_eax_get_tag()`, except that it allows + * the tag to be truncated to a smaller length. The intended tag length + * is provided as `len` (in bytes); it MUST be no more than 16, but + * it may be smaller. Note that decreasing tag length mechanically makes + * forgeries easier; NIST SP 800-38D specifies that the tag length shall + * lie between 12 and 16 bytes (inclusive), but may be truncated down to + * 4 or 8 bytes, for specific applications that can tolerate it. It must + * also be noted that successful forgeries leak information on the + * authentication key, making subsequent forgeries easier. Therefore, + * tag truncation, and in particular truncation to sizes lower than 12 + * bytes, shall be envisioned only with great care. + * + * The tag is written in the provided `tag` buffer. This call terminates + * the EAX run: no data may be processed with that EAX context + * afterwards, until `br_eax_reset()` is called to initiate a new EAX + * run. + * + * The tag value must normally be sent along with the encrypted data. + * When decrypting, the tag value must be recomputed and compared with + * the received tag: if the two tag values differ, then either the tag + * or the encrypted data was altered in transit. As an alternative to + * this function, the `br_eax_check_tag_trunc()` function can be used to + * compute and check the tag value. + * + * \param ctx EAX context structure. + * \param tag destination buffer for the tag. + * \param len tag length (16 bytes or less). + */ +void br_eax_get_tag_trunc(br_eax_context *ctx, void *tag, size_t len); + +/** + * \brief Compute and check EAX authentication tag (with truncation). + * + * This function is an alternative to `br_eax_get_tag_trunc()`, normally used + * on the receiving end (i.e. when decrypting value). The tag value is + * recomputed and compared with the provided tag value. If they match, 1 + * is returned; on mismatch, 0 is returned. A returned value of 0 means + * that the data or the tag was altered in transit, normally leading to + * wholesale rejection of the complete message. + * + * Tag length MUST be 16 bytes or less. The normal EAX tag length is 16 + * bytes. See `br_check_tag_trunc()` for some discussion on the potential + * perils of truncating authentication tags. + * + * \param ctx EAX context structure. + * \param tag tag value to compare with. + * \param len tag length (in bytes). + * \return 1 on success (exact match of tag value), 0 otherwise. + */ +uint32_t br_eax_check_tag_trunc(br_eax_context *ctx, + const void *tag, size_t len); + +/** + * \brief Class instance for EAX. + */ +extern const br_aead_class br_eax_vtable; + +/** + * \brief Context structure for CCM. + * + * CCM is an AEAD mode that combines a block cipher in CTR mode with + * CBC-MAC using the same block cipher and the same key, to provide + * authenticated encryption: + * + * - Any block cipher with 16-byte blocks can be used with CCM + * (technically, other block sizes are defined as well, but this + * is not implemented by these functions; shorter blocks also + * imply numerous security issues). + * + * - The authentication tag length, and plaintext length, MUST be + * known when starting processing data. Plaintext and ciphertext + * can still be provided by chunks, but the total size must match + * the value provided upon initialisation. + * + * - The nonce length is constrained betwen 7 and 13 bytes (inclusive). + * Furthermore, the plaintext length, when encoded, must fit over + * 15-nonceLen bytes; thus, if the nonce has length 13 bytes, then + * the plaintext length cannot exceed 65535 bytes. + * + * - Additional authenticated data length is practically unlimited + * (formal limit is at 2^64 bytes). + * + * - The authentication tag has length 4 to 16 bytes (even values only). + * + * The CCM initialisation function receives as parameter an + * _initialised_ block cipher implementation context, with the secret + * key already set. A pointer to that context will be kept within the + * CCM context structure. It is up to the caller to allocate and + * initialise that block cipher context. + */ +typedef struct { +#ifndef BR_DOXYGEN_IGNORE + const br_block_ctrcbc_class **bctx; + unsigned char ctr[16]; + unsigned char cbcmac[16]; + unsigned char tagmask[16]; + unsigned char buf[16]; + size_t ptr; + size_t tag_len; +#endif +} br_ccm_context; + +/** + * \brief Initialize a CCM context. + * + * A block cipher implementation, with its initialised context + * structure, is provided. The block cipher MUST use 16-byte blocks in + * CTR + CBC-MAC mode, and its secret key MUST have been already set in + * the provided context. The parameters are linked in the CCM context. + * + * After this function has been called, the `br_ccm_reset()` function must + * be called, to provide the nonce for CCM computation. + * + * \param ctx CCM context structure. + * \param bctx block cipher context (already initialised with secret key). + */ +void br_ccm_init(br_ccm_context *ctx, const br_block_ctrcbc_class **bctx); + +/** + * \brief Reset a CCM context. + * + * This function resets an already initialised CCM context for a new + * computation run. Implementations and keys are conserved. This function + * can be called at any time; it cancels any ongoing CCM computation that + * uses the provided context structure. + * + * The `aad_len` parameter contains the total length, in bytes, of the + * additional authenticated data. It may be zero. That length MUST be + * exact. + * + * The `data_len` parameter contains the total length, in bytes, of the + * data that will be injected (plaintext or ciphertext). That length MUST + * be exact. Moreover, that length MUST be less than 2^(8*(15-nonce_len)). + * + * The nonce length (`nonce_len`), in bytes, must be in the 7..13 range + * (inclusive). + * + * The tag length (`tag_len`), in bytes, must be in the 4..16 range, and + * be an even integer. Short tags mechanically allow for higher forgery + * probabilities; hence, tag sizes smaller than 12 bytes shall be used only + * with care. + * + * It is critical to CCM security that nonce values are not repeated for + * the same encryption key. Random generation of nonces is not generally + * recommended, due to the relatively small maximum nonce value. + * + * Returned value is 1 on success, 0 on error. An error is reported if + * the tag or nonce length is out of range, or if the + * plaintext/ciphertext length cannot be encoded with the specified + * nonce length. + * + * \param ctx CCM context structure. + * \param nonce CCM nonce to use. + * \param nonce_len CCM nonce length (in bytes, 7 to 13). + * \param aad_len additional authenticated data length (in bytes). + * \param data_len plaintext/ciphertext length (in bytes). + * \param tag_len tag length (in bytes). + * \return 1 on success, 0 on error. + */ +int br_ccm_reset(br_ccm_context *ctx, const void *nonce, size_t nonce_len, + uint64_t aad_len, uint64_t data_len, size_t tag_len); + +/** + * \brief Inject additional authenticated data into CCM. + * + * The provided data is injected into a running CCM computation. Additional + * data must be injected _before_ the call to `br_ccm_flip()`. + * Additional data can be injected in several chunks of arbitrary length, + * but the total amount MUST exactly match the value which was provided + * to `br_ccm_reset()`. + * + * \param ctx CCM context structure. + * \param data pointer to additional authenticated data. + * \param len length of additional authenticated data (in bytes). + */ +void br_ccm_aad_inject(br_ccm_context *ctx, const void *data, size_t len); + +/** + * \brief Finish injection of additional authenticated data into CCM. + * + * This function MUST be called before beginning the actual encryption + * or decryption (with `br_ccm_run()`), even if no additional authenticated + * data was injected. No additional authenticated data may be injected + * after this function call. + * + * \param ctx CCM context structure. + */ +void br_ccm_flip(br_ccm_context *ctx); + +/** + * \brief Encrypt or decrypt some data with CCM. + * + * Data encryption or decryption can be done after `br_ccm_flip()` + * has been called on the context. If `encrypt` is non-zero, then the + * provided data shall be plaintext, and it is encrypted in place. + * Otherwise, the data shall be ciphertext, and it is decrypted in place. + * + * Data may be provided in several chunks of arbitrary length, provided + * that the total length exactly matches the length provided to the + * `br_ccm_reset()` call. + * + * \param ctx CCM context structure. + * \param encrypt non-zero for encryption, zero for decryption. + * \param data data to encrypt or decrypt. + * \param len data length (in bytes). + */ +void br_ccm_run(br_ccm_context *ctx, int encrypt, void *data, size_t len); + +/** + * \brief Compute CCM authentication tag. + * + * Compute the CCM authentication tag. This call terminates the CCM + * run: all data must have been injected with `br_ccm_run()` (in zero, + * one or more successive calls). After this function has been called, + * no more data can br processed; a `br_ccm_reset()` call is required + * to start a new message. + * + * The tag length was provided upon context initialisation (last call + * to `br_ccm_reset()`); it is returned by this function. + * + * The tag value must normally be sent along with the encrypted data. + * When decrypting, the tag value must be recomputed and compared with + * the received tag: if the two tag values differ, then either the tag + * or the encrypted data was altered in transit. As an alternative to + * this function, the `br_ccm_check_tag()` function can be used to + * compute and check the tag value. + * + * \param ctx CCM context structure. + * \param tag destination buffer for the tag (up to 16 bytes). + * \return the tag length (in bytes). + */ +size_t br_ccm_get_tag(br_ccm_context *ctx, void *tag); + +/** + * \brief Compute and check CCM authentication tag. + * + * This function is an alternative to `br_ccm_get_tag()`, normally used + * on the receiving end (i.e. when decrypting value). The tag value is + * recomputed and compared with the provided tag value. If they match, 1 + * is returned; on mismatch, 0 is returned. A returned value of 0 means + * that the data or the tag was altered in transit, normally leading to + * wholesale rejection of the complete message. + * + * \param ctx CCM context structure. + * \param tag tag value to compare with (up to 16 bytes). + * \return 1 on success (exact match of tag value), 0 otherwise. + */ +uint32_t br_ccm_check_tag(br_ccm_context *ctx, const void *tag); + #ifdef __cplusplus } #endif diff --git a/src/bearssl/bearssl_block.h b/src/bearssl/bearssl_block.h index 24f09ac..4772779 100644 --- a/src/bearssl/bearssl_block.h +++ b/src/bearssl/bearssl_block.h @@ -136,6 +136,73 @@ extern "C" { * chunked processing, provided that each chunk length (except possibly * the last one) is a multiple of the block size. * + * - `br_xxx_ctrcbc_keys` + * + * Context structure that contains the subkeys resulting from the + * key expansion. These subkeys are appropriate for doing combined + * CTR encryption/decryption and CBC-MAC, as used in the CCM and EAX + * authenticated encryption modes. The structure first field is + * called `vtable` and points to the appropriate OOP structure. + * + * - `br_xxx_ctrcbc_init(br_xxx_ctr_keys *ctx, const void *key, size_t len)` + * + * Perform key expansion: subkeys for combined CTR + * encryption/decryption and CBC-MAC are computed and written in the + * provided context structure. The key length MUST be adequate for + * the implemented block cipher. This function also sets the + * `vtable` field. + * + * - `br_xxx_ctrcbc_encrypt(const br_xxx_ctrcbc_keys *ctx, void *ctr, void *cbcmac, void *data, size_t len)` + * + * Perform CTR encryption of some data, and CBC-MAC. Processing is + * done "in place" (the output data replaces the input data). This + * function applies CTR encryption on the data, using a full + * block-size counter (i.e. for 128-bit blocks, the counter is + * incremented as a 128-bit value). The 'ctr' array contains the + * initial value for the counter (used in the first block) and it is + * updated with the new value after data processing. The 'cbcmac' + * value shall point to a block-sized value which is used as IV for + * CBC-MAC, computed over the encrypted data (output of CTR + * encryption); the resulting CBC-MAC is written over 'cbcmac' on + * output. + * + * The data length MUST be a multiple of the block size. + * + * - `br_xxx_ctrcbc_decrypt(const br_xxx_ctrcbc_keys *ctx, void *ctr, void *cbcmac, void *data, size_t len)` + * + * Perform CTR decryption of some data, and CBC-MAC. Processing is + * done "in place" (the output data replaces the input data). This + * function applies CTR decryption on the data, using a full + * block-size counter (i.e. for 128-bit blocks, the counter is + * incremented as a 128-bit value). The 'ctr' array contains the + * initial value for the counter (used in the first block) and it is + * updated with the new value after data processing. The 'cbcmac' + * value shall point to a block-sized value which is used as IV for + * CBC-MAC, computed over the encrypted data (input of CTR + * encryption); the resulting CBC-MAC is written over 'cbcmac' on + * output. + * + * The data length MUST be a multiple of the block size. + * + * - `br_xxx_ctrcbc_ctr(const br_xxx_ctrcbc_keys *ctx, void *ctr, void *data, size_t len)` + * + * Perform CTR encryption or decryption of the provided data. The + * data is processed "in place" (the output data replaces the input + * data). A full block-sized counter is applied (i.e. for 128-bit + * blocks, the counter is incremented as a 128-bit value). The 'ctr' + * array contains the initial value for the counter (used in the + * first block), and it is updated with the new value after data + * processing. + * + * The data length MUST be a multiple of the block size. + * + * - `br_xxx_ctrcbc_mac(const br_xxx_ctrcbc_keys *ctx, void *cbcmac, const void *data, size_t len)` + * + * Compute CBC-MAC over the provided data. The IV for CBC-MAC is + * provided as 'cbcmac'; the output is written over the same array. + * The data itself is untouched. The data length MUST be a multiple + * of the block size. + * * * It shall be noted that the key expansion functions return `void`. If * the provided key length is not allowed, then there will be no error @@ -176,6 +243,41 @@ extern "C" { * * Pointer to the encryption/decryption function. * + * For combined CTR/CBC-MAC encryption, the `vtable` has a slightly + * different structure: + * + * - `context_size` + * + * The size (in bytes) of the context structure for subkeys. + * + * - `block_size` + * + * The cipher block size (in bytes). + * + * - `log_block_size` + * + * The base-2 logarithm of cipher block size (e.g. 4 for blocks + * of 16 bytes). + * + * - `init` + * + * Pointer to the key expansion function. + * + * - `encrypt` + * + * Pointer to the CTR encryption + CBC-MAC function. + * + * - `decrypt` + * + * Pointer to the CTR decryption + CBC-MAC function. + * + * - `ctr` + * + * Pointer to the CTR encryption/decryption function. + * + * - `mac` + * + * Pointer to the CBC-MAC function. * * For block cipher "`xxx`", static, constant instances of these * structures are defined, under the names: @@ -183,6 +285,7 @@ extern "C" { * - `br_xxx_cbcenc_vtable` * - `br_xxx_cbcdec_vtable` * - `br_xxx_ctr_vtable` + * - `br_xxx_ctrcbc_vtable` * * * ## Implemented Block Ciphers @@ -460,6 +563,132 @@ struct br_block_ctr_class_ { const void *iv, uint32_t cc, void *data, size_t len); }; +/** + * \brief Class type for combined CTR and CBC-MAC implementations. + * + * A `br_block_ctrcbc_class` instance points to the functions implementing + * a specific block cipher, when used in CTR mode for encrypting or + * decrypting data, along with CBC-MAC. + */ +typedef struct br_block_ctrcbc_class_ br_block_ctrcbc_class; +struct br_block_ctrcbc_class_ { + /** + * \brief Size (in bytes) of the context structure appropriate + * for containing subkeys. + */ + size_t context_size; + + /** + * \brief Size of individual blocks (in bytes). + */ + unsigned block_size; + + /** + * \brief Base-2 logarithm of the size of individual blocks, + * expressed in bytes. + */ + unsigned log_block_size; + + /** + * \brief Initialisation function. + * + * This function sets the `vtable` field in the context structure. + * The key length MUST be one of the key lengths supported by + * the implementation. + * + * \param ctx context structure to initialise. + * \param key secret key. + * \param key_len key length (in bytes). + */ + void (*init)(const br_block_ctrcbc_class **ctx, + const void *key, size_t key_len); + + /** + * \brief Run the CTR encryption + CBC-MAC. + * + * The `ctr` parameter points to the counter; its length shall + * be equal to the block size. It is updated by this function + * as encryption proceeds. + * + * The `cbcmac` parameter points to the IV for CBC-MAC. The MAC + * is computed over the encrypted data (output of CTR + * encryption). Its length shall be equal to the block size. The + * computed CBC-MAC value is written over the `cbcmac` array. + * + * The data to encrypt is updated "in place". Its length (`len` + * bytes) MUST be a multiple of the block size. + * + * \param ctx context structure (already initialised). + * \param ctr counter for CTR encryption (initial and final). + * \param cbcmac IV and output buffer for CBC-MAC. + * \param data data to encrypt. + * \param len data length (in bytes). + */ + void (*encrypt)(const br_block_ctrcbc_class *const *ctx, + void *ctr, void *cbcmac, void *data, size_t len); + + /** + * \brief Run the CTR decryption + CBC-MAC. + * + * The `ctr` parameter points to the counter; its length shall + * be equal to the block size. It is updated by this function + * as decryption proceeds. + * + * The `cbcmac` parameter points to the IV for CBC-MAC. The MAC + * is computed over the encrypted data (i.e. before CTR + * decryption). Its length shall be equal to the block size. The + * computed CBC-MAC value is written over the `cbcmac` array. + * + * The data to decrypt is updated "in place". Its length (`len` + * bytes) MUST be a multiple of the block size. + * + * \param ctx context structure (already initialised). + * \param ctr counter for CTR encryption (initial and final). + * \param cbcmac IV and output buffer for CBC-MAC. + * \param data data to decrypt. + * \param len data length (in bytes). + */ + void (*decrypt)(const br_block_ctrcbc_class *const *ctx, + void *ctr, void *cbcmac, void *data, size_t len); + + /** + * \brief Run the CTR encryption/decryption only. + * + * The `ctr` parameter points to the counter; its length shall + * be equal to the block size. It is updated by this function + * as decryption proceeds. + * + * The data to decrypt is updated "in place". Its length (`len` + * bytes) MUST be a multiple of the block size. + * + * \param ctx context structure (already initialised). + * \param ctr counter for CTR encryption (initial and final). + * \param data data to decrypt. + * \param len data length (in bytes). + */ + void (*ctr)(const br_block_ctrcbc_class *const *ctx, + void *ctr, void *data, size_t len); + + /** + * \brief Run the CBC-MAC only. + * + * The `cbcmac` parameter points to the IV for CBC-MAC. The MAC + * is computed over the encrypted data (i.e. before CTR + * decryption). Its length shall be equal to the block size. The + * computed CBC-MAC value is written over the `cbcmac` array. + * + * The data is unmodified. Its length (`len` bytes) MUST be a + * multiple of the block size. + * + * \param ctx context structure (already initialised). + * \param cbcmac IV and output buffer for CBC-MAC. + * \param data data to decrypt. + * \param len data length (in bytes). + */ + void (*mac)(const br_block_ctrcbc_class *const *ctx, + void *cbcmac, const void *data, size_t len); +}; + /* * Traditional, table-based AES implementation. It is fast, but uses * internal tables (in particular a 1 kB table for encryption, another @@ -517,6 +746,22 @@ typedef struct { #endif } br_aes_big_ctr_keys; +/** + * \brief Context for AES subkeys (`aes_big` implementation, CTR encryption + * and decryption + CBC-MAC). + * + * First field is a pointer to the vtable; it is set by the initialisation + * function. Other fields are not supposed to be accessed by user code. + */ +typedef struct { + /** \brief Pointer to vtable for this context. */ + const br_block_ctrcbc_class *vtable; +#ifndef BR_DOXYGEN_IGNORE + uint32_t skey[60]; + unsigned num_rounds; +#endif +} br_aes_big_ctrcbc_keys; + /** * \brief Class instance for AES CBC encryption (`aes_big` implementation). */ @@ -533,6 +778,12 @@ extern const br_block_cbcdec_class br_aes_big_cbcdec_vtable; */ extern const br_block_ctr_class br_aes_big_ctr_vtable; +/** + * \brief Class instance for AES CTR encryption/decryption + CBC-MAC + * (`aes_big` implementation). + */ +extern const br_block_ctrcbc_class br_aes_big_ctrcbc_vtable; + /** * \brief Context initialisation (key schedule) for AES CBC encryption * (`aes_big` implementation). @@ -566,6 +817,17 @@ void br_aes_big_cbcdec_init(br_aes_big_cbcdec_keys *ctx, void br_aes_big_ctr_init(br_aes_big_ctr_keys *ctx, const void *key, size_t len); +/** + * \brief Context initialisation (key schedule) for AES CTR + CBC-MAC + * (`aes_big` implementation). + * + * \param ctx context to initialise. + * \param key secret key. + * \param len secret key length (in bytes). + */ +void br_aes_big_ctrcbc_init(br_aes_big_ctrcbc_keys *ctx, + const void *key, size_t len); + /** * \brief CBC encryption with AES (`aes_big` implementation). * @@ -594,13 +856,59 @@ void br_aes_big_cbcdec_run(const br_aes_big_cbcdec_keys *ctx, void *iv, * \param ctx context (already initialised). * \param iv IV (constant, 12 bytes). * \param cc initial block counter value. - * \param data data to decrypt (updated). + * \param data data to encrypt or decrypt (updated). * \param len data length (in bytes). * \return new block counter value. */ uint32_t br_aes_big_ctr_run(const br_aes_big_ctr_keys *ctx, const void *iv, uint32_t cc, void *data, size_t len); +/** + * \brief CTR encryption + CBC-MAC with AES (`aes_big` implementation). + * + * \param ctx context (already initialised). + * \param ctr counter for CTR (16 bytes, updated). + * \param cbcmac IV for CBC-MAC (updated). + * \param data data to encrypt (updated). + * \param len data length (in bytes, MUST be a multiple of 16). + */ +void br_aes_big_ctrcbc_encrypt(const br_aes_big_ctrcbc_keys *ctx, + void *ctr, void *cbcmac, void *data, size_t len); + +/** + * \brief CTR decryption + CBC-MAC with AES (`aes_big` implementation). + * + * \param ctx context (already initialised). + * \param ctr counter for CTR (16 bytes, updated). + * \param cbcmac IV for CBC-MAC (updated). + * \param data data to decrypt (updated). + * \param len data length (in bytes, MUST be a multiple of 16). + */ +void br_aes_big_ctrcbc_decrypt(const br_aes_big_ctrcbc_keys *ctx, + void *ctr, void *cbcmac, void *data, size_t len); + +/** + * \brief CTR encryption/decryption with AES (`aes_big` implementation). + * + * \param ctx context (already initialised). + * \param ctr counter for CTR (16 bytes, updated). + * \param data data to MAC (updated). + * \param len data length (in bytes, MUST be a multiple of 16). + */ +void br_aes_big_ctrcbc_ctr(const br_aes_big_ctrcbc_keys *ctx, + void *ctr, void *data, size_t len); + +/** + * \brief CBC-MAC with AES (`aes_big` implementation). + * + * \param ctx context (already initialised). + * \param cbcmac IV for CBC-MAC (updated). + * \param data data to MAC (unmodified). + * \param len data length (in bytes, MUST be a multiple of 16). + */ +void br_aes_big_ctrcbc_mac(const br_aes_big_ctrcbc_keys *ctx, + void *cbcmac, const void *data, size_t len); + /* * AES implementation optimized for size. It is slower than the * traditional table-based AES implementation, but requires much less @@ -658,6 +966,22 @@ typedef struct { #endif } br_aes_small_ctr_keys; +/** + * \brief Context for AES subkeys (`aes_small` implementation, CTR encryption + * and decryption + CBC-MAC). + * + * First field is a pointer to the vtable; it is set by the initialisation + * function. Other fields are not supposed to be accessed by user code. + */ +typedef struct { + /** \brief Pointer to vtable for this context. */ + const br_block_ctrcbc_class *vtable; +#ifndef BR_DOXYGEN_IGNORE + uint32_t skey[60]; + unsigned num_rounds; +#endif +} br_aes_small_ctrcbc_keys; + /** * \brief Class instance for AES CBC encryption (`aes_small` implementation). */ @@ -674,6 +998,12 @@ extern const br_block_cbcdec_class br_aes_small_cbcdec_vtable; */ extern const br_block_ctr_class br_aes_small_ctr_vtable; +/** + * \brief Class instance for AES CTR encryption/decryption + CBC-MAC + * (`aes_small` implementation). + */ +extern const br_block_ctrcbc_class br_aes_small_ctrcbc_vtable; + /** * \brief Context initialisation (key schedule) for AES CBC encryption * (`aes_small` implementation). @@ -707,6 +1037,17 @@ void br_aes_small_cbcdec_init(br_aes_small_cbcdec_keys *ctx, void br_aes_small_ctr_init(br_aes_small_ctr_keys *ctx, const void *key, size_t len); +/** + * \brief Context initialisation (key schedule) for AES CTR + CBC-MAC + * (`aes_small` implementation). + * + * \param ctx context to initialise. + * \param key secret key. + * \param len secret key length (in bytes). + */ +void br_aes_small_ctrcbc_init(br_aes_small_ctrcbc_keys *ctx, + const void *key, size_t len); + /** * \brief CBC encryption with AES (`aes_small` implementation). * @@ -742,6 +1083,52 @@ void br_aes_small_cbcdec_run(const br_aes_small_cbcdec_keys *ctx, void *iv, uint32_t br_aes_small_ctr_run(const br_aes_small_ctr_keys *ctx, const void *iv, uint32_t cc, void *data, size_t len); +/** + * \brief CTR encryption + CBC-MAC with AES (`aes_small` implementation). + * + * \param ctx context (already initialised). + * \param ctr counter for CTR (16 bytes, updated). + * \param cbcmac IV for CBC-MAC (updated). + * \param data data to encrypt (updated). + * \param len data length (in bytes, MUST be a multiple of 16). + */ +void br_aes_small_ctrcbc_encrypt(const br_aes_small_ctrcbc_keys *ctx, + void *ctr, void *cbcmac, void *data, size_t len); + +/** + * \brief CTR decryption + CBC-MAC with AES (`aes_small` implementation). + * + * \param ctx context (already initialised). + * \param ctr counter for CTR (16 bytes, updated). + * \param cbcmac IV for CBC-MAC (updated). + * \param data data to decrypt (updated). + * \param len data length (in bytes, MUST be a multiple of 16). + */ +void br_aes_small_ctrcbc_decrypt(const br_aes_small_ctrcbc_keys *ctx, + void *ctr, void *cbcmac, void *data, size_t len); + +/** + * \brief CTR encryption/decryption with AES (`aes_small` implementation). + * + * \param ctx context (already initialised). + * \param ctr counter for CTR (16 bytes, updated). + * \param data data to MAC (updated). + * \param len data length (in bytes, MUST be a multiple of 16). + */ +void br_aes_small_ctrcbc_ctr(const br_aes_small_ctrcbc_keys *ctx, + void *ctr, void *data, size_t len); + +/** + * \brief CBC-MAC with AES (`aes_small` implementation). + * + * \param ctx context (already initialised). + * \param cbcmac IV for CBC-MAC (updated). + * \param data data to MAC (unmodified). + * \param len data length (in bytes, MUST be a multiple of 16). + */ +void br_aes_small_ctrcbc_mac(const br_aes_small_ctrcbc_keys *ctx, + void *cbcmac, const void *data, size_t len); + /* * Constant-time AES implementation. Its size is similar to that of * 'aes_big', and its performance is similar to that of 'aes_small' (faster @@ -798,6 +1185,22 @@ typedef struct { #endif } br_aes_ct_ctr_keys; +/** + * \brief Context for AES subkeys (`aes_ct` implementation, CTR encryption + * and decryption + CBC-MAC). + * + * First field is a pointer to the vtable; it is set by the initialisation + * function. Other fields are not supposed to be accessed by user code. + */ +typedef struct { + /** \brief Pointer to vtable for this context. */ + const br_block_ctrcbc_class *vtable; +#ifndef BR_DOXYGEN_IGNORE + uint32_t skey[60]; + unsigned num_rounds; +#endif +} br_aes_ct_ctrcbc_keys; + /** * \brief Class instance for AES CBC encryption (`aes_ct` implementation). */ @@ -814,6 +1217,12 @@ extern const br_block_cbcdec_class br_aes_ct_cbcdec_vtable; */ extern const br_block_ctr_class br_aes_ct_ctr_vtable; +/** + * \brief Class instance for AES CTR encryption/decryption + CBC-MAC + * (`aes_ct` implementation). + */ +extern const br_block_ctrcbc_class br_aes_ct_ctrcbc_vtable; + /** * \brief Context initialisation (key schedule) for AES CBC encryption * (`aes_ct` implementation). @@ -847,6 +1256,17 @@ void br_aes_ct_cbcdec_init(br_aes_ct_cbcdec_keys *ctx, void br_aes_ct_ctr_init(br_aes_ct_ctr_keys *ctx, const void *key, size_t len); +/** + * \brief Context initialisation (key schedule) for AES CTR + CBC-MAC + * (`aes_ct` implementation). + * + * \param ctx context to initialise. + * \param key secret key. + * \param len secret key length (in bytes). + */ +void br_aes_ct_ctrcbc_init(br_aes_ct_ctrcbc_keys *ctx, + const void *key, size_t len); + /** * \brief CBC encryption with AES (`aes_ct` implementation). * @@ -882,6 +1302,52 @@ void br_aes_ct_cbcdec_run(const br_aes_ct_cbcdec_keys *ctx, void *iv, uint32_t br_aes_ct_ctr_run(const br_aes_ct_ctr_keys *ctx, const void *iv, uint32_t cc, void *data, size_t len); +/** + * \brief CTR encryption + CBC-MAC with AES (`aes_ct` implementation). + * + * \param ctx context (already initialised). + * \param ctr counter for CTR (16 bytes, updated). + * \param cbcmac IV for CBC-MAC (updated). + * \param data data to encrypt (updated). + * \param len data length (in bytes, MUST be a multiple of 16). + */ +void br_aes_ct_ctrcbc_encrypt(const br_aes_ct_ctrcbc_keys *ctx, + void *ctr, void *cbcmac, void *data, size_t len); + +/** + * \brief CTR decryption + CBC-MAC with AES (`aes_ct` implementation). + * + * \param ctx context (already initialised). + * \param ctr counter for CTR (16 bytes, updated). + * \param cbcmac IV for CBC-MAC (updated). + * \param data data to decrypt (updated). + * \param len data length (in bytes, MUST be a multiple of 16). + */ +void br_aes_ct_ctrcbc_decrypt(const br_aes_ct_ctrcbc_keys *ctx, + void *ctr, void *cbcmac, void *data, size_t len); + +/** + * \brief CTR encryption/decryption with AES (`aes_ct` implementation). + * + * \param ctx context (already initialised). + * \param ctr counter for CTR (16 bytes, updated). + * \param data data to MAC (updated). + * \param len data length (in bytes, MUST be a multiple of 16). + */ +void br_aes_ct_ctrcbc_ctr(const br_aes_ct_ctrcbc_keys *ctx, + void *ctr, void *data, size_t len); + +/** + * \brief CBC-MAC with AES (`aes_ct` implementation). + * + * \param ctx context (already initialised). + * \param cbcmac IV for CBC-MAC (updated). + * \param data data to MAC (unmodified). + * \param len data length (in bytes, MUST be a multiple of 16). + */ +void br_aes_ct_ctrcbc_mac(const br_aes_ct_ctrcbc_keys *ctx, + void *cbcmac, const void *data, size_t len); + /* * 64-bit constant-time AES implementation. It is similar to 'aes_ct' * but uses 64-bit registers, making it about twice faster than 'aes_ct' @@ -940,6 +1406,22 @@ typedef struct { #endif } br_aes_ct64_ctr_keys; +/** + * \brief Context for AES subkeys (`aes_ct64` implementation, CTR encryption + * and decryption + CBC-MAC). + * + * First field is a pointer to the vtable; it is set by the initialisation + * function. Other fields are not supposed to be accessed by user code. + */ +typedef struct { + /** \brief Pointer to vtable for this context. */ + const br_block_ctrcbc_class *vtable; +#ifndef BR_DOXYGEN_IGNORE + uint64_t skey[30]; + unsigned num_rounds; +#endif +} br_aes_ct64_ctrcbc_keys; + /** * \brief Class instance for AES CBC encryption (`aes_ct64` implementation). */ @@ -956,6 +1438,12 @@ extern const br_block_cbcdec_class br_aes_ct64_cbcdec_vtable; */ extern const br_block_ctr_class br_aes_ct64_ctr_vtable; +/** + * \brief Class instance for AES CTR encryption/decryption + CBC-MAC + * (`aes_ct64` implementation). + */ +extern const br_block_ctrcbc_class br_aes_ct64_ctrcbc_vtable; + /** * \brief Context initialisation (key schedule) for AES CBC encryption * (`aes_ct64` implementation). @@ -989,6 +1477,17 @@ void br_aes_ct64_cbcdec_init(br_aes_ct64_cbcdec_keys *ctx, void br_aes_ct64_ctr_init(br_aes_ct64_ctr_keys *ctx, const void *key, size_t len); +/** + * \brief Context initialisation (key schedule) for AES CTR + CBC-MAC + * (`aes_ct64` implementation). + * + * \param ctx context to initialise. + * \param key secret key. + * \param len secret key length (in bytes). + */ +void br_aes_ct64_ctrcbc_init(br_aes_ct64_ctrcbc_keys *ctx, + const void *key, size_t len); + /** * \brief CBC encryption with AES (`aes_ct64` implementation). * @@ -1024,6 +1523,52 @@ void br_aes_ct64_cbcdec_run(const br_aes_ct64_cbcdec_keys *ctx, void *iv, uint32_t br_aes_ct64_ctr_run(const br_aes_ct64_ctr_keys *ctx, const void *iv, uint32_t cc, void *data, size_t len); +/** + * \brief CTR encryption + CBC-MAC with AES (`aes_ct64` implementation). + * + * \param ctx context (already initialised). + * \param ctr counter for CTR (16 bytes, updated). + * \param cbcmac IV for CBC-MAC (updated). + * \param data data to encrypt (updated). + * \param len data length (in bytes, MUST be a multiple of 16). + */ +void br_aes_ct64_ctrcbc_encrypt(const br_aes_ct64_ctrcbc_keys *ctx, + void *ctr, void *cbcmac, void *data, size_t len); + +/** + * \brief CTR decryption + CBC-MAC with AES (`aes_ct64` implementation). + * + * \param ctx context (already initialised). + * \param ctr counter for CTR (16 bytes, updated). + * \param cbcmac IV for CBC-MAC (updated). + * \param data data to decrypt (updated). + * \param len data length (in bytes, MUST be a multiple of 16). + */ +void br_aes_ct64_ctrcbc_decrypt(const br_aes_ct64_ctrcbc_keys *ctx, + void *ctr, void *cbcmac, void *data, size_t len); + +/** + * \brief CTR encryption/decryption with AES (`aes_ct64` implementation). + * + * \param ctx context (already initialised). + * \param ctr counter for CTR (16 bytes, updated). + * \param data data to MAC (updated). + * \param len data length (in bytes, MUST be a multiple of 16). + */ +void br_aes_ct64_ctrcbc_ctr(const br_aes_ct64_ctrcbc_keys *ctx, + void *ctr, void *data, size_t len); + +/** + * \brief CBC-MAC with AES (`aes_ct64` implementation). + * + * \param ctx context (already initialised). + * \param cbcmac IV for CBC-MAC (updated). + * \param data data to MAC (unmodified). + * \param len data length (in bytes, MUST be a multiple of 16). + */ +void br_aes_ct64_ctrcbc_mac(const br_aes_ct64_ctrcbc_keys *ctx, + void *cbcmac, const void *data, size_t len); + /* * AES implementation using AES-NI opcodes (x86 platform). */ @@ -1083,6 +1628,24 @@ typedef struct { #endif } br_aes_x86ni_ctr_keys; +/** + * \brief Context for AES subkeys (`aes_x86ni` implementation, CTR encryption + * and decryption + CBC-MAC). + * + * First field is a pointer to the vtable; it is set by the initialisation + * function. Other fields are not supposed to be accessed by user code. + */ +typedef struct { + /** \brief Pointer to vtable for this context. */ + const br_block_ctrcbc_class *vtable; +#ifndef BR_DOXYGEN_IGNORE + union { + unsigned char skni[16 * 15]; + } skey; + unsigned num_rounds; +#endif +} br_aes_x86ni_ctrcbc_keys; + /** * \brief Class instance for AES CBC encryption (`aes_x86ni` implementation). * @@ -1111,6 +1674,16 @@ extern const br_block_cbcdec_class br_aes_x86ni_cbcdec_vtable; */ extern const br_block_ctr_class br_aes_x86ni_ctr_vtable; +/** + * \brief Class instance for AES CTR encryption/decryption + CBC-MAC + * (`aes_x86ni` implementation). + * + * Since this implementation might be omitted from the library, or the + * AES opcode unavailable on the current CPU, a pointer to this class + * instance should be obtained through `br_aes_x86ni_ctrcbc_get_vtable()`. + */ +extern const br_block_ctrcbc_class br_aes_x86ni_ctrcbc_vtable; + /** * \brief Context initialisation (key schedule) for AES CBC encryption * (`aes_x86ni` implementation). @@ -1144,6 +1717,17 @@ void br_aes_x86ni_cbcdec_init(br_aes_x86ni_cbcdec_keys *ctx, void br_aes_x86ni_ctr_init(br_aes_x86ni_ctr_keys *ctx, const void *key, size_t len); +/** + * \brief Context initialisation (key schedule) for AES CTR + CBC-MAC + * (`aes_x86ni` implementation). + * + * \param ctx context to initialise. + * \param key secret key. + * \param len secret key length (in bytes). + */ +void br_aes_x86ni_ctrcbc_init(br_aes_x86ni_ctrcbc_keys *ctx, + const void *key, size_t len); + /** * \brief CBC encryption with AES (`aes_x86ni` implementation). * @@ -1179,6 +1763,52 @@ void br_aes_x86ni_cbcdec_run(const br_aes_x86ni_cbcdec_keys *ctx, void *iv, uint32_t br_aes_x86ni_ctr_run(const br_aes_x86ni_ctr_keys *ctx, const void *iv, uint32_t cc, void *data, size_t len); +/** + * \brief CTR encryption + CBC-MAC with AES (`aes_x86ni` implementation). + * + * \param ctx context (already initialised). + * \param ctr counter for CTR (16 bytes, updated). + * \param cbcmac IV for CBC-MAC (updated). + * \param data data to encrypt (updated). + * \param len data length (in bytes, MUST be a multiple of 16). + */ +void br_aes_x86ni_ctrcbc_encrypt(const br_aes_x86ni_ctrcbc_keys *ctx, + void *ctr, void *cbcmac, void *data, size_t len); + +/** + * \brief CTR decryption + CBC-MAC with AES (`aes_x86ni` implementation). + * + * \param ctx context (already initialised). + * \param ctr counter for CTR (16 bytes, updated). + * \param cbcmac IV for CBC-MAC (updated). + * \param data data to decrypt (updated). + * \param len data length (in bytes, MUST be a multiple of 16). + */ +void br_aes_x86ni_ctrcbc_decrypt(const br_aes_x86ni_ctrcbc_keys *ctx, + void *ctr, void *cbcmac, void *data, size_t len); + +/** + * \brief CTR encryption/decryption with AES (`aes_x86ni` implementation). + * + * \param ctx context (already initialised). + * \param ctr counter for CTR (16 bytes, updated). + * \param data data to MAC (updated). + * \param len data length (in bytes, MUST be a multiple of 16). + */ +void br_aes_x86ni_ctrcbc_ctr(const br_aes_x86ni_ctrcbc_keys *ctx, + void *ctr, void *data, size_t len); + +/** + * \brief CBC-MAC with AES (`aes_x86ni` implementation). + * + * \param ctx context (already initialised). + * \param cbcmac IV for CBC-MAC (updated). + * \param data data to MAC (unmodified). + * \param len data length (in bytes, MUST be a multiple of 16). + */ +void br_aes_x86ni_ctrcbc_mac(const br_aes_x86ni_ctrcbc_keys *ctx, + void *cbcmac, const void *data, size_t len); + /** * \brief Obtain the `aes_x86ni` AES-CBC (encryption) implementation, if * available. @@ -1188,7 +1818,7 @@ uint32_t br_aes_x86ni_ctr_run(const br_aes_x86ni_ctr_keys *ctx, * opcodes are available on the currently running CPU. If either of * these conditions is not met, then this function returns `NULL`. * - * \return the `aes_x868ni` AES-CBC (encryption) implementation, or `NULL`. + * \return the `aes_x86ni` AES-CBC (encryption) implementation, or `NULL`. */ const br_block_cbcenc_class *br_aes_x86ni_cbcenc_get_vtable(void); @@ -1201,7 +1831,7 @@ const br_block_cbcenc_class *br_aes_x86ni_cbcenc_get_vtable(void); * opcodes are available on the currently running CPU. If either of * these conditions is not met, then this function returns `NULL`. * - * \return the `aes_x868ni` AES-CBC (decryption) implementation, or `NULL`. + * \return the `aes_x86ni` AES-CBC (decryption) implementation, or `NULL`. */ const br_block_cbcdec_class *br_aes_x86ni_cbcdec_get_vtable(void); @@ -1213,10 +1843,23 @@ const br_block_cbcdec_class *br_aes_x86ni_cbcdec_get_vtable(void); * opcodes are available on the currently running CPU. If either of * these conditions is not met, then this function returns `NULL`. * - * \return the `aes_x868ni` AES-CTR implementation, or `NULL`. + * \return the `aes_x86ni` AES-CTR implementation, or `NULL`. */ const br_block_ctr_class *br_aes_x86ni_ctr_get_vtable(void); +/** + * \brief Obtain the `aes_x86ni` AES-CTR + CBC-MAC implementation, if + * available. + * + * This function returns a pointer to `br_aes_x86ni_ctrcbc_vtable`, if + * that implementation was compiled in the library _and_ the x86 AES + * opcodes are available on the currently running CPU. If either of + * these conditions is not met, then this function returns `NULL`. + * + * \return the `aes_x86ni` AES-CTR implementation, or `NULL`. + */ +const br_block_ctrcbc_class *br_aes_x86ni_ctrcbc_get_vtable(void); + /* * AES implementation using POWER8 opcodes. */ @@ -1452,6 +2095,22 @@ typedef union { br_aes_pwr8_ctr_keys c_pwr8; } br_aes_gen_ctr_keys; +/** + * \brief Aggregate structure large enough to be used as context for + * subkeys (CTR encryption/decryption + CBC-MAC) for all AES implementations. + */ +typedef union { + const br_block_ctrcbc_class *vtable; + br_aes_big_ctrcbc_keys c_big; + br_aes_small_ctrcbc_keys c_small; + br_aes_ct_ctrcbc_keys c_ct; + br_aes_ct64_ctrcbc_keys c_ct64; + /* FIXME + br_aes_x86ni_ctrcbc_keys c_x86ni; + br_aes_pwr8_ctrcbc_keys c_pwr8; + */ +} br_aes_gen_ctrcbc_keys; + /* * Traditional, table-based implementation for DES/3DES. Since tables are * used, cache-timing attacks are conceptually possible. diff --git a/src/bearssl/bearssl_rand.h b/src/bearssl/bearssl_rand.h index 59628fb..37379d2 100644 --- a/src/bearssl/bearssl_rand.h +++ b/src/bearssl/bearssl_rand.h @@ -253,6 +253,41 @@ br_hmac_drbg_get_hash(const br_hmac_drbg_context *ctx) return ctx->digest_class; } +/** + * \brief Type for a provider of entropy seeds. + * + * A "seeder" is a function that is able to obtain random values from + * some source and inject them as entropy seed in a PRNG. A seeder + * shall guarantee that the total entropy of the injected seed is large + * enough to seed a PRNG for purposes of cryptographic key generation + * (i.e. at least 128 bits). + * + * A seeder may report a failure to obtain adequate entropy. Seeders + * shall endeavour to fix themselves transient errors by trying again; + * thus, callers may consider reported errors as permanent. + * + * \param ctx PRNG context to seed. + * \return 1 on success, 0 on error. + */ +typedef int (*br_prng_seeder)(const br_prng_class **ctx); + +/** + * \brief Get a seeder backed by the operating system or hardware. + * + * Get a seeder that feeds on RNG facilities provided by the current + * operating system or hardware. If no such facility is known, then 0 + * is returned. + * + * If `name` is not `NULL`, then `*name` is set to a symbolic string + * that identifies the seeder implemention. If no seeder is returned + * and `name` is not `NULL`, then `*name` is set to a pointer to the + * constant string `"none"`. + * + * \param name receiver for seeder name, or `NULL`. + * \return the system seeder, if available, or 0. + */ +br_prng_seeder br_prng_seeder_system(const char **name); + #ifdef __cplusplus } #endif diff --git a/src/bearssl/bearssl_ssl.h b/src/bearssl/bearssl_ssl.h index 45ac599..6640bc6 100644 --- a/src/bearssl/bearssl_ssl.h +++ b/src/bearssl/bearssl_ssl.h @@ -833,6 +833,14 @@ typedef struct { /* * Context RNG. + * + * rng_init_done is initially 0. It is set to 1 when the + * basic structure of the RNG is set, and 2 when some + * entropy has been pushed in. The value 2 marks the RNG + * as "properly seeded". + * + * rng_os_rand_done is initially 0. It is set to 1 when + * some seeding from the OS or hardware has been attempted. */ br_hmac_drbg_context rng; int rng_init_done; diff --git a/src/bearssl/ccm.c b/src/bearssl/ccm.c new file mode 100644 index 0000000..68cc913 --- /dev/null +++ b/src/bearssl/ccm.c @@ -0,0 +1,346 @@ +/* + * Copyright (c) 2017 Thomas Pornin + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* + * Implementation Notes + * ==================== + * + * The combined CTR + CBC-MAC functions can only handle full blocks, + * so some buffering is necessary. + * + * - 'ptr' contains a value from 0 to 15, which is the number of bytes + * accumulated in buf[] that still needs to be processed with the + * current CBC-MAC computation. + * + * - When processing the message itself, CTR encryption/decryption is + * also done at the same time. The first 'ptr' bytes of buf[] then + * contains the plaintext bytes, while the last '16 - ptr' bytes of + * buf[] are the remnants of the stream block, to be used against + * the next input bytes, when available. When 'ptr' is 0, the + * contents of buf[] are to be ignored. + * + * - The current counter and running CBC-MAC values are kept in 'ctr' + * and 'cbcmac', respectively. + */ + +/* see bearssl_block.h */ +void +br_ccm_init(br_ccm_context *ctx, const br_block_ctrcbc_class **bctx) +{ + ctx->bctx = bctx; +} + +/* see bearssl_block.h */ +int +br_ccm_reset(br_ccm_context *ctx, const void *nonce, size_t nonce_len, + uint64_t aad_len, uint64_t data_len, size_t tag_len) +{ + unsigned char tmp[16]; + unsigned u, q; + + if (nonce_len < 7 || nonce_len > 13) { + return 0; + } + if (tag_len < 4 || tag_len > 16 || (tag_len & 1) != 0) { + return 0; + } + q = 15 - (unsigned)nonce_len; + ctx->tag_len = tag_len; + + /* + * Block B0, to start CBC-MAC. + */ + tmp[0] = (aad_len > 0 ? 0x40 : 0x00) + | (((unsigned)tag_len - 2) << 2) + | (q - 1); + memcpy(tmp + 1, nonce, nonce_len); + for (u = 0; u < q; u ++) { + tmp[15 - u] = (unsigned char)data_len; + data_len >>= 8; + } + if (data_len != 0) { + /* + * If the data length was not entirely consumed in the + * loop above, then it exceeds the maximum limit of + * q bytes (when encoded). + */ + return 0; + } + + /* + * Start CBC-MAC. + */ + memset(ctx->cbcmac, 0, sizeof ctx->cbcmac); + (*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac, tmp, sizeof tmp); + + /* + * Assemble AAD length header. + */ + if ((aad_len >> 32) != 0) { + ctx->buf[0] = 0xFF; + ctx->buf[1] = 0xFF; + br_enc64be(ctx->buf + 2, aad_len); + ctx->ptr = 10; + } else if (aad_len >= 0xFF00) { + ctx->buf[0] = 0xFF; + ctx->buf[1] = 0xFE; + br_enc32be(ctx->buf + 2, (uint32_t)aad_len); + ctx->ptr = 6; + } else if (aad_len > 0) { + br_enc16be(ctx->buf, (unsigned)aad_len); + ctx->ptr = 2; + } else { + ctx->ptr = 0; + } + + /* + * Make initial counter value and compute tag mask. + */ + ctx->ctr[0] = q - 1; + memcpy(ctx->ctr + 1, nonce, nonce_len); + memset(ctx->ctr + 1 + nonce_len, 0, q); + memset(ctx->tagmask, 0, sizeof ctx->tagmask); + (*ctx->bctx)->ctr(ctx->bctx, ctx->ctr, + ctx->tagmask, sizeof ctx->tagmask); + + return 1; +} + +/* see bearssl_block.h */ +void +br_ccm_aad_inject(br_ccm_context *ctx, const void *data, size_t len) +{ + const unsigned char *dbuf; + size_t ptr; + + dbuf = data; + + /* + * Complete partial block, if needed. + */ + ptr = ctx->ptr; + if (ptr != 0) { + size_t clen; + + clen = (sizeof ctx->buf) - ptr; + if (clen > len) { + memcpy(ctx->buf + ptr, dbuf, len); + ctx->ptr = ptr + len; + return; + } + memcpy(ctx->buf + ptr, dbuf, clen); + dbuf += clen; + len -= clen; + (*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac, + ctx->buf, sizeof ctx->buf); + } + + /* + * Process complete blocks. + */ + ptr = len & 15; + len -= ptr; + (*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac, dbuf, len); + dbuf += len; + + /* + * Copy last partial block in the context buffer. + */ + memcpy(ctx->buf, dbuf, ptr); + ctx->ptr = ptr; +} + +/* see bearssl_block.h */ +void +br_ccm_flip(br_ccm_context *ctx) +{ + size_t ptr; + + /* + * Complete AAD partial block with zeros, if necessary. + */ + ptr = ctx->ptr; + if (ptr != 0) { + memset(ctx->buf + ptr, 0, (sizeof ctx->buf) - ptr); + (*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac, + ctx->buf, sizeof ctx->buf); + ctx->ptr = 0; + } + + /* + * Counter was already set by br_ccm_reset(). + */ +} + +/* see bearssl_block.h */ +void +br_ccm_run(br_ccm_context *ctx, int encrypt, void *data, size_t len) +{ + unsigned char *dbuf; + size_t ptr; + + dbuf = data; + + /* + * Complete a partial block, if any: ctx->buf[] contains + * ctx->ptr plaintext bytes (already reported), and the other + * bytes are CTR stream output. + */ + ptr = ctx->ptr; + if (ptr != 0) { + size_t clen; + size_t u; + + clen = (sizeof ctx->buf) - ptr; + if (clen > len) { + clen = len; + } + if (encrypt) { + for (u = 0; u < clen; u ++) { + unsigned w, x; + + w = ctx->buf[ptr + u]; + x = dbuf[u]; + ctx->buf[ptr + u] = x; + dbuf[u] = w ^ x; + } + } else { + for (u = 0; u < clen; u ++) { + unsigned w; + + w = ctx->buf[ptr + u] ^ dbuf[u]; + dbuf[u] = w; + ctx->buf[ptr + u] = w; + } + } + dbuf += clen; + len -= clen; + ptr += clen; + if (ptr < sizeof ctx->buf) { + ctx->ptr = ptr; + return; + } + (*ctx->bctx)->mac(ctx->bctx, + ctx->cbcmac, ctx->buf, sizeof ctx->buf); + } + + /* + * Process all complete blocks. Note that the ctrcbc API is for + * encrypt-then-MAC (CBC-MAC is computed over the encrypted + * blocks) while CCM uses MAC-and-encrypt (CBC-MAC is computed + * over the plaintext blocks). Therefore, we need to use the + * _decryption_ function for encryption, and the encryption + * function for decryption (this works because CTR encryption + * and decryption are identical, so the choice really is about + * computing the CBC-MAC before or after XORing with the CTR + * stream). + */ + ptr = len & 15; + len -= ptr; + if (encrypt) { + (*ctx->bctx)->decrypt(ctx->bctx, ctx->ctr, ctx->cbcmac, + dbuf, len); + } else { + (*ctx->bctx)->encrypt(ctx->bctx, ctx->ctr, ctx->cbcmac, + dbuf, len); + } + dbuf += len; + + /* + * If there is some remaining data, then we need to compute an + * extra block of CTR stream. + */ + if (ptr != 0) { + size_t u; + + memset(ctx->buf, 0, sizeof ctx->buf); + (*ctx->bctx)->ctr(ctx->bctx, ctx->ctr, + ctx->buf, sizeof ctx->buf); + if (encrypt) { + for (u = 0; u < ptr; u ++) { + unsigned w, x; + + w = ctx->buf[u]; + x = dbuf[u]; + ctx->buf[u] = x; + dbuf[u] = w ^ x; + } + } else { + for (u = 0; u < ptr; u ++) { + unsigned w; + + w = ctx->buf[u] ^ dbuf[u]; + dbuf[u] = w; + ctx->buf[u] = w; + } + } + } + ctx->ptr = ptr; +} + +/* see bearssl_block.h */ +size_t +br_ccm_get_tag(br_ccm_context *ctx, void *tag) +{ + size_t ptr; + size_t u; + + /* + * If there is some buffered data, then we need to pad it with + * zeros and finish up CBC-MAC. + */ + ptr = ctx->ptr; + if (ptr != 0) { + memset(ctx->buf + ptr, 0, (sizeof ctx->buf) - ptr); + (*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac, + ctx->buf, sizeof ctx->buf); + } + + /* + * XOR the tag mask into the CBC-MAC output. + */ + for (u = 0; u < ctx->tag_len; u ++) { + ctx->cbcmac[u] ^= ctx->tagmask[u]; + } + memcpy(tag, ctx->cbcmac, ctx->tag_len); + return ctx->tag_len; +} + +/* see bearssl_block.h */ +uint32_t +br_ccm_check_tag(br_ccm_context *ctx, const void *tag) +{ + unsigned char tmp[16]; + size_t u, tag_len; + uint32_t z; + + tag_len = br_ccm_get_tag(ctx, tmp); + z = 0; + for (u = 0; u < tag_len; u ++) { + z |= tmp[u] ^ ((const unsigned char *)tag)[u]; + } + return EQ0(z); +} diff --git a/src/bearssl/chacha20_sse2.c b/src/bearssl/chacha20_sse2.c index 0b32d51..92b4a4a 100644 --- a/src/bearssl/chacha20_sse2.c +++ b/src/bearssl/chacha20_sse2.c @@ -22,22 +22,43 @@ * SOFTWARE. */ +#define BR_ENABLE_INTRINSICS 1 #include "inner.h" +#if BR_SSE2 + /* * This file contains a ChaCha20 implementation that leverages SSE2 * opcodes for better performance. */ -#if BR_SSE2 +/* see bearssl_block.h */ +br_chacha20_run +br_chacha20_sse2_get(void) +{ + /* + * If using 64-bit mode, then SSE2 opcodes should be automatically + * available, since they are part of the ABI. + * + * In 32-bit mode, we use CPUID to detect the SSE2 feature. + */ -#if BR_SSE2_GCC -#include -#include -#endif -#if BR_SSE2_MSC -#include +#if BR_amd64 + return &br_chacha20_sse2_run; +#else + + /* + * SSE2 support is indicated by bit 26 in EDX. + */ + if (br_cpuid(0, 0, 0, 0x04000000)) { + return &br_chacha20_sse2_run; + } else { + return 0; + } #endif +} + +BR_TARGETS_X86_UP /* see bearssl_block.h */ BR_TARGET("sse2") @@ -202,48 +223,7 @@ br_chacha20_sse2_run(const void *key, | ((uint32_t)_mm_extract_epi16(iw, 1) << 16); } -/* see bearssl_block.h */ -br_chacha20_run -br_chacha20_sse2_get(void) -{ - /* - * If using 64-bit mode, then SSE2 opcodes should be automatically - * available, since they are part of the ABI. - * - * In 32-bit mode, we use CPUID to detect the SSE2 feature. - */ - -#if __x86_64__ || _M_X64 - - return &br_chacha20_sse2_run; - -#else - - /* - * SSE2 support is indicated by bit 26 in EDX. - */ -#define MASK 0x04000000 - -#if BR_SSE2_GCC - unsigned eax, ebx, ecx, edx; - - if (__get_cpuid(1, &eax, &ebx, &ecx, &edx)) { - if ((edx & MASK) == MASK) { - return &br_chacha20_sse2_run; - } - } -#elif BR_SSE2_MSC - int info[4]; - - __cpuid(info, 1); - if (((uint32_t)info[3] & MASK) == MASK) { - return &br_chacha20_sse2_run; - } -#endif - return 0; - -#endif -} +BR_TARGETS_X86_DOWN #else diff --git a/src/bearssl/config.h b/src/bearssl/config.h index b06807b..accae3e 100644 --- a/src/bearssl/config.h +++ b/src/bearssl/config.h @@ -98,6 +98,17 @@ #define BR_NO_ARITH_SHIFT 1 */ +/* + * When BR_RDRAND is enabled, the SSL engine will use the RDRAND opcode + * to automatically obtain quality randomness for seeding its internal + * PRNG. Since that opcode is present only in recent x86 CPU, its + * support is dynamically tested; if the current CPU does not support + * it, then another random source will be used, such as /dev/urandom or + * CryptGenRandom(). + * +#define BR_RDRAND 1 + */ + /* * When BR_USE_URANDOM is enabled, the SSL engine will use /dev/urandom * to automatically obtain quality randomness for seedings its internal diff --git a/src/bearssl/eax.c b/src/bearssl/eax.c new file mode 100644 index 0000000..07b1cb9 --- /dev/null +++ b/src/bearssl/eax.c @@ -0,0 +1,413 @@ +/* + * Copyright (c) 2017 Thomas Pornin + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* + * Implementation Notes + * ==================== + * + * The combined CTR + CBC-MAC functions can only handle full blocks, + * so some buffering is necessary. Moreover, EAX has a special padding + * rule for CBC-MAC, which implies that we cannot compute the MAC over + * the last received full block until we know whether we are at the + * end of the data or not. + * + * - 'ptr' contains a value from 1 to 16, which is the number of bytes + * accumulated in buf[] that still needs to be processed with the + * current OMAC computation. Beware that this can go to 16: a + * complete block cannot be processed until it is known whether it + * is the last block or not. However, it can never be 0, because + * OMAC^t works on an input that is at least one-block long. + * + * - When processing the message itself, CTR encryption/decryption is + * also done at the same time. The first 'ptr' bytes of buf[] then + * contains the encrypted bytes, while the last '16 - ptr' bytes of + * buf[] are the remnants of the stream block, to be used against + * the next input bytes, when available. + * + * - The current counter and running CBC-MAC values are kept in 'ctr' + * and 'cbcmac', respectively. + * + * - The derived keys for padding are kept in L2 and L4 (double and + * quadruple of Enc_K(0^n), in GF(2^128), respectively). + */ + +/* + * Start an OMAC computation; the first block is the big-endian + * representation of the provided value ('val' must fit on one byte). + * We make it a delayed block because it may also be the last one, + */ +static void +omac_start(br_eax_context *ctx, unsigned val) +{ + memset(ctx->cbcmac, 0, sizeof ctx->cbcmac); + memset(ctx->buf, 0, sizeof ctx->buf); + ctx->buf[15] = val; + ctx->ptr = 16; +} + +/* + * Double a value in finite field GF(2^128), defined with modulus + * X^128+X^7+X^2+X+1. + */ +static void +double_gf128(unsigned char *dst, const unsigned char *src) +{ + unsigned cc; + int i; + + cc = 0x87 & -((unsigned)src[0] >> 7); + for (i = 15; i >= 0; i --) { + unsigned z; + + z = (src[i] << 1) ^ cc; + cc = z >> 8; + dst[i] = (unsigned char)z; + } +} + +/* + * Apply padding to the last block, currently in ctx->buf (with + * ctx->ptr bytes), and finalize OMAC computation. + */ +static void +do_pad(br_eax_context *ctx) +{ + unsigned char *pad; + size_t ptr, u; + + ptr = ctx->ptr; + if (ptr == 16) { + pad = ctx->L2; + } else { + ctx->buf[ptr ++] = 0x80; + memset(ctx->buf + ptr, 0x00, 16 - ptr); + pad = ctx->L4; + } + for (u = 0; u < sizeof ctx->buf; u ++) { + ctx->buf[u] ^= pad[u]; + } + (*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac, ctx->buf, sizeof ctx->buf); +} + +/* + * Apply CBC-MAC on the provided data, with buffering management. This + * function assumes that on input, ctx->buf contains a full block of + * unprocessed data. + */ +static void +do_cbcmac_chunk(br_eax_context *ctx, const void *data, size_t len) +{ + size_t ptr; + + if (len == 0) { + return; + } + ptr = len & (size_t)15; + if (ptr == 0) { + len -= 16; + ptr = 16; + } else { + len -= ptr; + } + (*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac, ctx->buf, sizeof ctx->buf); + (*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac, data, len); + memcpy(ctx->buf, (const unsigned char *)data + len, ptr); + ctx->ptr = ptr; +} + +/* see bearssl_aead.h */ +void +br_eax_init(br_eax_context *ctx, const br_block_ctrcbc_class **bctx) +{ + unsigned char tmp[16], iv[16]; + + ctx->vtable = &br_eax_vtable; + ctx->bctx = bctx; + + /* + * Encrypt a whole-zero block to compute L2 and L4. + */ + memset(tmp, 0, sizeof tmp); + memset(iv, 0, sizeof iv); + (*bctx)->ctr(bctx, iv, tmp, sizeof tmp); + double_gf128(ctx->L2, tmp); + double_gf128(ctx->L4, ctx->L2); +} + +/* see bearssl_aead.h */ +void +br_eax_reset(br_eax_context *ctx, const void *nonce, size_t len) +{ + /* + * Process nonce with OMAC^0. + */ + omac_start(ctx, 0); + do_cbcmac_chunk(ctx, nonce, len); + do_pad(ctx); + memcpy(ctx->nonce, ctx->cbcmac, sizeof ctx->cbcmac); + + /* + * Start OMAC^1 for the AAD ("header" in the EAX specification). + */ + omac_start(ctx, 1); +} + +/* see bearssl_aead.h */ +void +br_eax_aad_inject(br_eax_context *ctx, const void *data, size_t len) +{ + size_t ptr; + + ptr = ctx->ptr; + + /* + * If there is a partial block, first complete it. + */ + if (ptr < 16) { + size_t clen; + + clen = 16 - ptr; + if (len <= clen) { + memcpy(ctx->buf + ptr, data, len); + ctx->ptr = ptr + len; + return; + } + memcpy(ctx->buf + ptr, data, clen); + data = (const unsigned char *)data + clen; + len -= clen; + } + + /* + * We now have a full block in buf[], and this is not the last + * block. + */ + do_cbcmac_chunk(ctx, data, len); +} + +/* see bearssl_aead.h */ +void +br_eax_flip(br_eax_context *ctx) +{ + /* + * Complete the OMAC computation on the AAD. + */ + do_pad(ctx); + memcpy(ctx->head, ctx->cbcmac, sizeof ctx->cbcmac); + + /* + * Start OMAC^2 for the encrypted data. + */ + omac_start(ctx, 2); + + /* + * Initial counter value for CTR is the processed nonce. + */ + memcpy(ctx->ctr, ctx->nonce, sizeof ctx->nonce); +} + +/* see bearssl_aead.h */ +void +br_eax_run(br_eax_context *ctx, int encrypt, void *data, size_t len) +{ + unsigned char *dbuf; + size_t ptr; + + /* + * Ensure that there is actual data to process. + */ + if (len == 0) { + return; + } + + dbuf = data; + ptr = ctx->ptr; + + if (ptr != 16) { + /* + * We have a partially consumed block. + */ + size_t u, clen; + + clen = 16 - ptr; + if (len <= clen) { + clen = len; + } + if (encrypt) { + for (u = 0; u < clen; u ++) { + ctx->buf[ptr + u] ^= dbuf[u]; + } + memcpy(dbuf, ctx->buf + ptr, clen); + } else { + for (u = 0; u < clen; u ++) { + unsigned dx, sx; + + sx = ctx->buf[ptr + u]; + dx = dbuf[u]; + ctx->buf[ptr + u] = dx; + dbuf[u] = sx ^ dx; + } + } + + if (len <= clen) { + ctx->ptr = ptr + clen; + return; + } + dbuf += clen; + len -= clen; + } + + /* + * We now have a complete encrypted block in buf[] that must still + * be processed with OMAC, and this is not the final buf. + */ + (*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac, ctx->buf, sizeof ctx->buf); + + /* + * Do CTR encryption or decryption and CBC-MAC for all full blocks + * except the last. + */ + ptr = len & (size_t)15; + if (ptr == 0) { + len -= 16; + ptr = 16; + } else { + len -= ptr; + } + if (encrypt) { + (*ctx->bctx)->encrypt(ctx->bctx, ctx->ctr, ctx->cbcmac, + dbuf, len); + } else { + (*ctx->bctx)->decrypt(ctx->bctx, ctx->ctr, ctx->cbcmac, + dbuf, len); + } + dbuf += len; + + /* + * Compute next block of CTR stream, and use it to finish + * encrypting or decrypting the data. + */ + memset(ctx->buf, 0, sizeof ctx->buf); + (*ctx->bctx)->ctr(ctx->bctx, ctx->ctr, ctx->buf, sizeof ctx->buf); + if (encrypt) { + size_t u; + + for (u = 0; u < ptr; u ++) { + ctx->buf[u] ^= dbuf[u]; + } + memcpy(dbuf, ctx->buf, ptr); + } else { + size_t u; + + for (u = 0; u < ptr; u ++) { + unsigned dx, sx; + + sx = ctx->buf[u]; + dx = dbuf[u]; + ctx->buf[u] = dx; + dbuf[u] = sx ^ dx; + } + } + ctx->ptr = ptr; +} + +/* + * Complete tag computation. The final tag is written in ctx->cbcmac. + */ +static void +do_final(br_eax_context *ctx) +{ + size_t u; + + do_pad(ctx); + + /* + * Authentication tag is the XOR of the three OMAC outputs for + * the nonce, AAD and encrypted data. + */ + for (u = 0; u < 16; u ++) { + ctx->cbcmac[u] ^= ctx->nonce[u] ^ ctx->head[u]; + } +} + +/* see bearssl_aead.h */ +void +br_eax_get_tag(br_eax_context *ctx, void *tag) +{ + do_final(ctx); + memcpy(tag, ctx->cbcmac, sizeof ctx->cbcmac); +} + +/* see bearssl_aead.h */ +void +br_eax_get_tag_trunc(br_eax_context *ctx, void *tag, size_t len) +{ + do_final(ctx); + memcpy(tag, ctx->cbcmac, len); +} + +/* see bearssl_aead.h */ +uint32_t +br_eax_check_tag_trunc(br_eax_context *ctx, const void *tag, size_t len) +{ + unsigned char tmp[16]; + size_t u; + int x; + + br_eax_get_tag(ctx, tmp); + x = 0; + for (u = 0; u < len; u ++) { + x |= tmp[u] ^ ((const unsigned char *)tag)[u]; + } + return EQ0(x); +} + +/* see bearssl_aead.h */ +uint32_t +br_eax_check_tag(br_eax_context *ctx, const void *tag) +{ + return br_eax_check_tag_trunc(ctx, tag, 16); +} + +/* see bearssl_aead.h */ +const br_aead_class br_eax_vtable = { + 16, + (void (*)(const br_aead_class **, const void *, size_t)) + &br_eax_reset, + (void (*)(const br_aead_class **, const void *, size_t)) + &br_eax_aad_inject, + (void (*)(const br_aead_class **)) + &br_eax_flip, + (void (*)(const br_aead_class **, int, void *, size_t)) + &br_eax_run, + (void (*)(const br_aead_class **, void *)) + &br_eax_get_tag, + (uint32_t (*)(const br_aead_class **, const void *)) + &br_eax_check_tag, + (void (*)(const br_aead_class **, void *, size_t)) + &br_eax_get_tag_trunc, + (uint32_t (*)(const br_aead_class **, const void *, size_t)) + &br_eax_check_tag_trunc +}; diff --git a/src/bearssl/ec_p256_m31.c b/src/bearssl/ec_p256_m31.c index 0462c15..ec22c3e 100644 --- a/src/bearssl/ec_p256_m31.c +++ b/src/bearssl/ec_p256_m31.c @@ -423,17 +423,17 @@ mul_f256(uint32_t *d, const uint32_t *a, const uint32_t *b) } for (i = 17; i >= 9; i --) { - uint64_t x; - - x = s[i]; - s[i - 1] += ARSHW(x, 2); - s[i - 2] += (x << 28) & 0x3FFFFFFF; - s[i - 2] -= ARSHW(x, 4); - s[i - 3] -= (x << 26) & 0x3FFFFFFF; - s[i - 5] -= ARSHW(x, 10); - s[i - 6] -= (x << 20) & 0x3FFFFFFF; - s[i - 8] += ARSHW(x, 16); - s[i - 9] += (x << 14) & 0x3FFFFFFF; + uint64_t y; + + y = s[i]; + s[i - 1] += ARSHW(y, 2); + s[i - 2] += (y << 28) & 0x3FFFFFFF; + s[i - 2] -= ARSHW(y, 4); + s[i - 3] -= (y << 26) & 0x3FFFFFFF; + s[i - 5] -= ARSHW(y, 10); + s[i - 6] -= (y << 20) & 0x3FFFFFFF; + s[i - 8] += ARSHW(y, 16); + s[i - 9] += (y << 14) & 0x3FFFFFFF; } /* @@ -535,17 +535,17 @@ square_f256(uint32_t *d, const uint32_t *a) } for (i = 17; i >= 9; i --) { - uint64_t x; - - x = s[i]; - s[i - 1] += ARSHW(x, 2); - s[i - 2] += (x << 28) & 0x3FFFFFFF; - s[i - 2] -= ARSHW(x, 4); - s[i - 3] -= (x << 26) & 0x3FFFFFFF; - s[i - 5] -= ARSHW(x, 10); - s[i - 6] -= (x << 20) & 0x3FFFFFFF; - s[i - 8] += ARSHW(x, 16); - s[i - 9] += (x << 14) & 0x3FFFFFFF; + uint64_t y; + + y = s[i]; + s[i - 1] += ARSHW(y, 2); + s[i - 2] += (y << 28) & 0x3FFFFFFF; + s[i - 2] -= ARSHW(y, 4); + s[i - 3] -= (y << 26) & 0x3FFFFFFF; + s[i - 5] -= ARSHW(y, 10); + s[i - 6] -= (y << 20) & 0x3FFFFFFF; + s[i - 8] += ARSHW(y, 16); + s[i - 9] += (y << 14) & 0x3FFFFFFF; } /* diff --git a/src/bearssl/gcm.c b/src/bearssl/gcm.c index 9cf0f38..ede5f08 100644 --- a/src/bearssl/gcm.c +++ b/src/bearssl/gcm.c @@ -56,6 +56,7 @@ br_gcm_init(br_gcm_context *ctx, const br_block_ctr_class **bctx, br_ghash gh) { unsigned char iv[12]; + ctx->vtable = &br_gcm_vtable; ctx->bctx = bctx; ctx->gh = gh; @@ -262,9 +263,19 @@ br_gcm_get_tag(br_gcm_context *ctx, void *tag) (*ctx->bctx)->run(ctx->bctx, ctx->j0_1, ctx->j0_2, tag, 16); } +/* see bearssl_aead.h */ +void +br_gcm_get_tag_trunc(br_gcm_context *ctx, void *tag, size_t len) +{ + unsigned char tmp[16]; + + br_gcm_get_tag(ctx, tmp); + memcpy(tag, tmp, len); +} + /* see bearssl_aead.h */ uint32_t -br_gcm_check_tag(br_gcm_context *ctx, const void *tag) +br_gcm_check_tag_trunc(br_gcm_context *ctx, const void *tag, size_t len) { unsigned char tmp[16]; size_t u; @@ -272,12 +283,19 @@ br_gcm_check_tag(br_gcm_context *ctx, const void *tag) br_gcm_get_tag(ctx, tmp); x = 0; - for (u = 0; u < sizeof tmp; u ++) { + for (u = 0; u < len; u ++) { x |= tmp[u] ^ ((const unsigned char *)tag)[u]; } return EQ0(x); } +/* see bearssl_aead.h */ +uint32_t +br_gcm_check_tag(br_gcm_context *ctx, const void *tag) +{ + return br_gcm_check_tag_trunc(ctx, tag, 16); +} + /* see bearssl_aead.h */ const br_aead_class br_gcm_vtable = { 16, @@ -292,5 +310,9 @@ const br_aead_class br_gcm_vtable = { (void (*)(const br_aead_class **, void *)) &br_gcm_get_tag, (uint32_t (*)(const br_aead_class **, const void *)) - &br_gcm_check_tag + &br_gcm_check_tag, + (void (*)(const br_aead_class **, void *, size_t)) + &br_gcm_get_tag_trunc, + (uint32_t (*)(const br_aead_class **, const void *, size_t)) + &br_gcm_check_tag_trunc }; diff --git a/src/bearssl/ghash_pclmul.c b/src/bearssl/ghash_pclmul.c index 7c8f2fa..a58e7dc 100644 --- a/src/bearssl/ghash_pclmul.c +++ b/src/bearssl/ghash_pclmul.c @@ -22,6 +22,7 @@ * SOFTWARE. */ +#define BR_ENABLE_INTRINSICS 1 #include "inner.h" /* @@ -31,20 +32,27 @@ #if BR_AES_X86NI -#if BR_AES_X86NI_GCC -#if BR_AES_X86NI_GCC_OLD -#pragma GCC push_options -#pragma GCC target("sse2,ssse3,pclmul") -#pragma GCC diagnostic ignored "-Wpsabi" -#endif -#include -#include -#include -#endif +/* + * Test CPU support for PCLMULQDQ. + */ +static inline int +pclmul_supported(void) +{ + /* + * Bit mask for features in ECX: + * 1 PCLMULQDQ support + */ + return br_cpuid(0, 0, 0x00000002, 0); +} -#if BR_AES_X86NI_MSC -#include -#endif +/* see bearssl_hash.h */ +br_ghash +br_ghash_pclmul_get(void) +{ + return pclmul_supported() ? &br_ghash_pclmul : 0; +} + +BR_TARGETS_X86_UP /* * GHASH is defined over elements of GF(2^128) with "full little-endian" @@ -73,6 +81,66 @@ * chunks. We number chunks from 0 to 3 in left to right order. */ +/* + * Byte-swap a complete 128-bit value. This normally uses + * _mm_shuffle_epi8(), which gets translated to pshufb (an SSSE3 opcode). + * However, this crashes old Clang versions, so, for Clang before 3.8, + * we use an alternate (and less efficient) version. + */ +#if BR_CLANG && !BR_CLANG_3_8 +#define BYTESWAP_DECL +#define BYTESWAP_PREP (void)0 +#define BYTESWAP(x) do { \ + __m128i byteswap1, byteswap2; \ + byteswap1 = (x); \ + byteswap2 = _mm_srli_epi16(byteswap1, 8); \ + byteswap1 = _mm_slli_epi16(byteswap1, 8); \ + byteswap1 = _mm_or_si128(byteswap1, byteswap2); \ + byteswap1 = _mm_shufflelo_epi16(byteswap1, 0x1B); \ + byteswap1 = _mm_shufflehi_epi16(byteswap1, 0x1B); \ + (x) = _mm_shuffle_epi32(byteswap1, 0x4E); \ + } while (0) +#else +#define BYTESWAP_DECL __m128i byteswap_index; +#define BYTESWAP_PREP do { \ + byteswap_index = _mm_set_epi8( \ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); \ + } while (0) +#define BYTESWAP(x) do { \ + (x) = _mm_shuffle_epi8((x), byteswap_index); \ + } while (0) +#endif + +/* + * Call pclmulqdq. Clang appears to have trouble with the intrinsic, so, + * for that compiler, we use inline assembly. Inline assembly is + * potentially a bit slower because the compiler does not understand + * what the opcode does, and thus cannot optimize instruction + * scheduling. + * + * We use a target of "sse2" only, so that Clang may still handle the + * '__m128i' type and allocate SSE2 registers. + */ +#if BR_CLANG +BR_TARGET("sse2") +static inline __m128i +pclmulqdq00(__m128i x, __m128i y) +{ + __asm__ ("pclmulqdq $0x00, %1, %0" : "+x" (x) : "x" (y)); + return x; +} +BR_TARGET("sse2") +static inline __m128i +pclmulqdq11(__m128i x, __m128i y) +{ + __asm__ ("pclmulqdq $0x11, %1, %0" : "+x" (x) : "x" (y)); + return x; +} +#else +#define pclmulqdq00(x, y) _mm_clmulepi64_si128(x, y, 0x00) +#define pclmulqdq11(x, y) _mm_clmulepi64_si128(x, y, 0x11) +#endif + /* * From a 128-bit value kw, compute kx as the XOR of the two 64-bit * halves of kw (into the right half of kx; left half is unspecified). @@ -150,8 +218,8 @@ */ #define SQUARE_F128(kw, dw, dx) do { \ __m128i z0, z1, z2, z3; \ - z1 = _mm_clmulepi64_si128(kw, kw, 0x11); \ - z3 = _mm_clmulepi64_si128(kw, kw, 0x00); \ + z1 = pclmulqdq11(kw, kw); \ + z3 = pclmulqdq00(kw, kw); \ z0 = _mm_shuffle_epi32(z1, 0x0E); \ z2 = _mm_shuffle_epi32(z3, 0x0E); \ SL_256(z0, z1, z2, z3); \ @@ -168,7 +236,7 @@ br_ghash_pclmul(void *y, const void *h, const void *data, size_t len) unsigned char tmp[64]; size_t num4, num1; __m128i yw, h1w, h1x; - __m128i byteswap_index; + BYTESWAP_DECL /* * We split data into two chunks. First chunk starts at buf1 @@ -188,18 +256,17 @@ br_ghash_pclmul(void *y, const void *h, const void *data, size_t len) } /* - * Constant value to perform endian conversion. + * Preparatory step for endian conversions. */ - byteswap_index = _mm_set_epi8( - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + BYTESWAP_PREP; /* * Load y and h. */ yw = _mm_loadu_si128(y); h1w = _mm_loadu_si128(h); - yw = _mm_shuffle_epi8(yw, byteswap_index); - h1w = _mm_shuffle_epi8(h1w, byteswap_index); + BYTESWAP(yw); + BYTESWAP(h1w); BK(h1w, h1x); if (num4 > 0) { @@ -214,9 +281,9 @@ br_ghash_pclmul(void *y, const void *h, const void *data, size_t len) /* * Compute h3 = h^3 = h*(h^2). */ - t1 = _mm_clmulepi64_si128(h1w, h2w, 0x11); - t3 = _mm_clmulepi64_si128(h1w, h2w, 0x00); - t2 = _mm_xor_si128(_mm_clmulepi64_si128(h1x, h2x, 0x00), + t1 = pclmulqdq11(h1w, h2w); + t3 = pclmulqdq00(h1w, h2w); + t2 = _mm_xor_si128(pclmulqdq00(h1x, h2x), _mm_xor_si128(t1, t3)); t0 = _mm_shuffle_epi32(t1, 0x0E); t1 = _mm_xor_si128(t1, _mm_shuffle_epi32(t2, 0x0E)); @@ -238,10 +305,10 @@ br_ghash_pclmul(void *y, const void *h, const void *data, size_t len) aw1 = _mm_loadu_si128((void *)(buf1 + 16)); aw2 = _mm_loadu_si128((void *)(buf1 + 32)); aw3 = _mm_loadu_si128((void *)(buf1 + 48)); - aw0 = _mm_shuffle_epi8(aw0, byteswap_index); - aw1 = _mm_shuffle_epi8(aw1, byteswap_index); - aw2 = _mm_shuffle_epi8(aw2, byteswap_index); - aw3 = _mm_shuffle_epi8(aw3, byteswap_index); + BYTESWAP(aw0); + BYTESWAP(aw1); + BYTESWAP(aw2); + BYTESWAP(aw3); buf1 += 64; aw0 = _mm_xor_si128(aw0, yw); @@ -252,25 +319,25 @@ br_ghash_pclmul(void *y, const void *h, const void *data, size_t len) t1 = _mm_xor_si128( _mm_xor_si128( - _mm_clmulepi64_si128(aw0, h4w, 0x11), - _mm_clmulepi64_si128(aw1, h3w, 0x11)), + pclmulqdq11(aw0, h4w), + pclmulqdq11(aw1, h3w)), _mm_xor_si128( - _mm_clmulepi64_si128(aw2, h2w, 0x11), - _mm_clmulepi64_si128(aw3, h1w, 0x11))); + pclmulqdq11(aw2, h2w), + pclmulqdq11(aw3, h1w))); t3 = _mm_xor_si128( _mm_xor_si128( - _mm_clmulepi64_si128(aw0, h4w, 0x00), - _mm_clmulepi64_si128(aw1, h3w, 0x00)), + pclmulqdq00(aw0, h4w), + pclmulqdq00(aw1, h3w)), _mm_xor_si128( - _mm_clmulepi64_si128(aw2, h2w, 0x00), - _mm_clmulepi64_si128(aw3, h1w, 0x00))); + pclmulqdq00(aw2, h2w), + pclmulqdq00(aw3, h1w))); t2 = _mm_xor_si128( _mm_xor_si128( - _mm_clmulepi64_si128(ax0, h4x, 0x00), - _mm_clmulepi64_si128(ax1, h3x, 0x00)), + pclmulqdq00(ax0, h4x), + pclmulqdq00(ax1, h3x)), _mm_xor_si128( - _mm_clmulepi64_si128(ax2, h2x, 0x00), - _mm_clmulepi64_si128(ax3, h1x, 0x00))); + pclmulqdq00(ax2, h2x), + pclmulqdq00(ax3, h1x))); t2 = _mm_xor_si128(t2, _mm_xor_si128(t1, t3)); t0 = _mm_shuffle_epi32(t1, 0x0E); t1 = _mm_xor_si128(t1, _mm_shuffle_epi32(t2, 0x0E)); @@ -286,15 +353,15 @@ br_ghash_pclmul(void *y, const void *h, const void *data, size_t len) __m128i t0, t1, t2, t3; aw = _mm_loadu_si128((void *)buf2); - aw = _mm_shuffle_epi8(aw, byteswap_index); + BYTESWAP(aw); buf2 += 16; aw = _mm_xor_si128(aw, yw); BK(aw, ax); - t1 = _mm_clmulepi64_si128(aw, h1w, 0x11); - t3 = _mm_clmulepi64_si128(aw, h1w, 0x00); - t2 = _mm_clmulepi64_si128(ax, h1x, 0x00); + t1 = pclmulqdq11(aw, h1w); + t3 = pclmulqdq00(aw, h1w); + t2 = pclmulqdq00(ax, h1x); t2 = _mm_xor_si128(t2, _mm_xor_si128(t1, t3)); t0 = _mm_shuffle_epi32(t1, 0x0E); t1 = _mm_xor_si128(t1, _mm_shuffle_epi32(t2, 0x0E)); @@ -304,52 +371,11 @@ br_ghash_pclmul(void *y, const void *h, const void *data, size_t len) yw = _mm_unpacklo_epi64(t1, t0); } - yw = _mm_shuffle_epi8(yw, byteswap_index); + BYTESWAP(yw); _mm_storeu_si128(y, yw); } -/* - * Test CPU support for PCLMULQDQ. - */ -static int -pclmul_supported(void) -{ - /* - * Bit mask for features in ECX: - * 1 PCLMULQDQ support - */ -#define MASK 0x00000002 - -#if BR_AES_X86NI_GCC - unsigned eax, ebx, ecx, edx; - - if (__get_cpuid(1, &eax, &ebx, &ecx, &edx)) { - return (ecx & MASK) == MASK; - } else { - return 0; - } -#elif BR_AES_X86NI_MSC - int info[4]; - - __cpuid(info, 1); - return ((uint32_t)info[2] & MASK) == MASK; -#else - return 0; -#endif - -#undef MASK -} - -/* see bearssl_hash.h */ -br_ghash -br_ghash_pclmul_get(void) -{ - return pclmul_supported() ? &br_ghash_pclmul : 0; -} - -#if BR_AES_X86NI_GCC && BR_AES_X86NI_GCC_OLD -#pragma GCC pop_options -#endif +BR_TARGETS_X86_DOWN #else diff --git a/src/bearssl/i15_modpow2.c b/src/bearssl/i15_modpow2.c index 37073a4..4b32118 100644 --- a/src/bearssl/i15_modpow2.c +++ b/src/bearssl/i15_modpow2.c @@ -134,11 +134,11 @@ br_i15_modpow_opt(uint16_t *x, br_i15_zero(t2, m[0]); base = t2 + mwlen; for (u = 1; u < ((uint32_t)1 << k); u ++) { - uint32_t m; + uint32_t mask; - m = -EQ(u, bits); + mask = -EQ(u, bits); for (v = 1; v < mwlen; v ++) { - t2[v] |= m & base[v]; + t2[v] |= mask & base[v]; } base += mwlen; } diff --git a/src/bearssl/i31_modpow2.c b/src/bearssl/i31_modpow2.c index 23ae0cd..0b8f8cf 100644 --- a/src/bearssl/i31_modpow2.c +++ b/src/bearssl/i31_modpow2.c @@ -134,11 +134,11 @@ br_i31_modpow_opt(uint32_t *x, br_i31_zero(t2, m[0]); base = t2 + mwlen; for (u = 1; u < ((uint32_t)1 << k); u ++) { - uint32_t m; + uint32_t mask; - m = -EQ(u, bits); + mask = -EQ(u, bits); for (v = 1; v < mwlen; v ++) { - t2[v] |= m & base[v]; + t2[v] |= mask & base[v]; } base += mwlen; } diff --git a/src/bearssl/inner.h b/src/bearssl/inner.h index 2829f23..fb49d0e 100644 --- a/src/bearssl/inner.h +++ b/src/bearssl/inner.h @@ -109,97 +109,212 @@ * Set BR_LOMUL on platforms where it makes sense. */ #ifndef BR_LOMUL -#if BR_ARMEL_CORTEX_GCC +#if BR_ARMEL_CORTEXM_GCC #define BR_LOMUL 1 #endif #endif /* - * Determine whether x86 AES instructions are understood by the compiler. + * Architecture detection. */ -#ifndef BR_AES_X86NI -#if (__i386__ || __x86_64__) \ - && ((__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) \ - || (__clang_major__ > 3 \ - || (__clang_major__ == 3 && __clang_minor__ >= 7))) -#define BR_AES_X86NI 1 -#elif (_M_IX86 || _M_X64) && (_MSC_VER >= 1700) -#define BR_AES_X86NI 1 +#ifndef BR_i386 +#if __i386__ || _M_IX86 +#define BR_i386 1 +#endif +#endif + +#ifndef BR_amd64 +#if __x86_64__ || _M_X64 +#define BR_amd64 1 #endif #endif /* - * If we use x86 AES instruction, determine the compiler brand. - */ -#if BR_AES_X86NI -#ifndef BR_AES_X86NI_GCC -#if __GNUC__ -#define BR_AES_X86NI_GCC 1 + * Compiler brand and version. + * + * Implementations that use intrinsics need to detect the compiler type + * and version because some specific actions may be needed to activate + * the corresponding opcodes, both for header inclusion, and when using + * them in a function. + * + * BR_GCC, BR_CLANG and BR_MSC will be set to 1 for, respectively, GCC, + * Clang and MS Visual C. For each of them, sub-macros will be defined + * for versions; each sub-macro is set whenever the compiler version is + * at least as recent as the one corresponding to the macro. + */ + +/* + * GCC thresholds are on versions 4.4 to 4.9 and 5.0. + */ +#ifndef BR_GCC +#if __GNUC__ && !__clang__ +#define BR_GCC 1 + +#if __GNUC__ > 4 +#define BR_GCC_5_0 1 +#elif __GNUC__ == 4 && __GNUC_MINOR__ >= 9 +#define BR_GCC_4_9 1 +#elif __GNUC__ == 4 && __GNUC_MINOR__ >= 8 +#define BR_GCC_4_8 1 +#elif __GNUC__ == 4 && __GNUC_MINOR__ >= 7 +#define BR_GCC_4_7 1 +#elif __GNUC__ == 4 && __GNUC_MINOR__ >= 6 +#define BR_GCC_4_6 1 +#elif __GNUC__ == 4 && __GNUC_MINOR__ >= 5 +#define BR_GCC_4_5 1 +#elif __GNUC__ == 4 && __GNUC_MINOR__ >= 4 +#define BR_GCC_4_4 1 +#endif + +#if BR_GCC_5_0 +#define BR_GCC_4_9 1 +#endif +#if BR_GCC_4_9 +#define BR_GCC_4_8 1 #endif +#if BR_GCC_4_8 +#define BR_GCC_4_7 1 #endif -#ifndef BR_AES_X86NI_MSC -#if _MSC_VER >= 1700 -#define BR_AES_X86NI_MSC 1 +#if BR_GCC_4_7 +#define BR_GCC_4_6 1 #endif +#if BR_GCC_4_6 +#define BR_GCC_4_5 1 +#endif +#if BR_GCC_4_5 +#define BR_GCC_4_4 1 +#endif + #endif #endif /* - * Determine whether SSE2 intrinsics are understood by the compiler. - * Right now, we restrict ourselves to compiler versions where things - * are documented to work well: - * -- GCC 4.4+ and Clang 3.7+ understand the function attribute "target" - * -- MS Visual Studio 2005 documents the existence of - * SSE2-powered code _might_ work with older versions, but there is no - * pressing need to do so right now. + * Clang thresholds are on versions 3.7.0 and 3.8.0. */ -#ifndef BR_SSE2 -#if (__i386__ || __x86_64__) \ - && ((__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)) \ - || (__clang_major__ > 3 \ - || (__clang_major__ == 3 && __clang_minor__ >= 7))) -#define BR_SSE2 1 -#elif (_M_IX86 || _M_X64) && (_MSC_VER >= 1400) -#define BR_SSE2 1 +#ifndef BR_CLANG +#if __clang__ +#define BR_CLANG 1 + +#if __clang_major__ > 3 || (__clang_major__ == 3 && __clang_minor__ >= 8) +#define BR_CLANG_3_8 1 +#elif __clang_major__ == 3 && __clang_minor__ >= 7 +#define BR_CLANG_3_7 1 +#endif + +#if BR_CLANG_3_8 +#define BR_CLANG_3_7 1 +#endif + #endif #endif /* - * If we use SSE2 intrinsics, determine the compiler brand. + * MS Visual C thresholds are on Visual Studio 2005 to 2015. */ -#if BR_SSE2 -#ifndef BR_SSE2_GCC -#if __GNUC__ -#define BR_SSE2_GCC 1 +#ifndef BR_MSC +#if _MSC_VER +#define BR_MSC 1 + +#if _MSC_VER >= 1900 +#define BR_MSC_2015 1 +#elif _MSC_VER >= 1800 +#define BR_MSC_2013 1 +#elif _MSC_VER >= 1700 +#define BR_MSC_2012 1 +#elif _MSC_VER >= 1600 +#define BR_MSC_2010 1 +#elif _MSC_VER >= 1500 +#define BR_MSC_2008 1 +#elif _MSC_VER >= 1400 +#define BR_MSC_2005 1 #endif + +#if BR_MSC_2015 +#define BR_MSC_2013 1 +#endif +#if BR_MSC_2013 +#define BR_MSC_2012 1 +#endif +#if BR_MSC_2012 +#define BR_MSC_2010 1 #endif -#ifndef BR_SSE2_MSC -#if _MSC_VER >= 1400 -#define BR_SSE2_MSC 1 +#if BR_MSC_2010 +#define BR_MSC_2008 1 #endif +#if BR_MSC_2008 +#define BR_MSC_2005 1 +#endif + #endif #endif /* - * A macro to tag a function with a "target" attribute (for GCC and Clang). + * GCC 4.4+ and Clang 3.7+ allow tagging specific functions with a + * 'target' attribute that activates support for specific opcodes. */ -#if BR_AES_X86NI_GCC || BR_SSE2_GCC +#if BR_GCC_4_4 || BR_CLANG_3_7 #define BR_TARGET(x) __attribute__((target(x))) #else #define BR_TARGET(x) #endif /* - * GCC versions from 4.4 to 4.8 (inclusive) must use a special #pragma - * to activate extra opcodes before including the relevant intrinsic - * AES-NI headers. But these don't work with Clang (which does not need - * them either). We also need that #pragma for GCC 4.9 in order to work - * around a compiler bug (it tends to blow up on ghash_pclmul code - * otherwise). + * AES-NI intrinsics are available on x86 (32-bit and 64-bit) with + * GCC 4.8+, Clang 3.7+ and MSC 2012+. + */ +#ifndef BR_AES_X86NI +#if (BR_i386 || BR_amd64) && (BR_GCC_4_8 || BR_CLANG_3_7 || BR_MSC_2012) +#define BR_AES_X86NI 1 +#endif +#endif + +/* + * SSE2 intrinsics are available on x86 (32-bit and 64-bit) with + * GCC 4.4+, Clang 3.7+ and MSC 2005+. */ -#if BR_AES_X86NI_GCC && !defined BR_AES_X86NI_GCC_OLD -#if __GNUC__ == 4 && __GNUC_MINOR__ >= 4 && __GNUC_MINOR__ <= 9 && !__clang__ -#define BR_AES_X86NI_GCC_OLD 1 +#ifndef BR_SSE2 +#if (BR_i386 || BR_amd64) && (BR_GCC_4_4 || BR_CLANG_3_7 || BR_MSC_2005) +#define BR_SSE2 1 +#endif +#endif + +/* + * RDRAND intrinsics are available on x86 (32-bit and 64-bit) with + * GCC 4.6+, Clang 3.7+ and MSC 2012+. + */ +#ifndef BR_RDRAND +#if (BR_i386 || BR_amd64) && (BR_GCC_4_6 || BR_CLANG_3_7 || BR_MSC_2012) +#define BR_RDRAND 1 +#endif +#endif + +/* + * Determine type of OS for random number generation. Macro names and + * values are documented on: + * https://sourceforge.net/p/predef/wiki/OperatingSystems/ + * + * TODO: enrich the list of detected system. Also add detection for + * alternate system calls like getentropy(), which are usually + * preferable when available. + */ + +#ifndef BR_USE_URANDOM +#if defined _AIX \ + || defined __ANDROID__ \ + || defined __FreeBSD__ \ + || defined __NetBSD__ \ + || defined __OpenBSD__ \ + || defined __DragonFly__ \ + || defined __linux__ \ + || (defined __sun && (defined __SVR4 || defined __svr4__)) \ + || (defined __APPLE__ && defined __MACH__) +#define BR_USE_URANDOM 1 +#endif +#endif + +#ifndef BR_USE_WIN32_RAND +#if defined _WIN32 || defined _WIN64 +#define BR_USE_WIN32_RAND 1 #endif #endif @@ -281,6 +396,24 @@ #endif +/* + * Detect support for an OS-provided time source. + */ + +#ifndef BR_USE_UNIX_TIME +#if defined __unix__ || defined __linux__ \ + || defined _POSIX_SOURCE || defined _POSIX_C_SOURCE \ + || (defined __APPLE__ && defined __MACH__) +#define BR_USE_UNIX_TIME 1 +#endif +#endif + +#ifndef BR_USE_WIN32_TIME +#if defined _WIN32 || defined _WIN64 +#define BR_USE_WIN32_TIME 1 +#endif +#endif + /* ==================================================================== */ /* * Encoding/decoding functions. @@ -2087,6 +2220,125 @@ int br_ssl_choose_hash(unsigned bf); #endif +/* ==================================================================== */ +/* + * Special "activate intrinsics" code, needed for some compiler versions. + * This is defined at the end of this file, so that it won't impact any + * of the inline functions defined previously; and it is controlled by + * a specific macro defined in the caller code. + * + * Calling code conventions: + * + * - Caller must define BR_ENABLE_INTRINSICS before including "inner.h". + * - Functions that use intrinsics must be enclosed in an "enabled" + * region (between BR_TARGETS_X86_UP and BR_TARGETS_X86_DOWN). + * - Functions that use intrinsics must be tagged with the appropriate + * BR_TARGET(). + */ + +#if BR_ENABLE_INTRINSICS && (BR_GCC_4_4 || BR_CLANG_3_7 || BR_MSC_2005) + +/* + * x86 intrinsics (both 32-bit and 64-bit). + */ +#if BR_i386 || BR_amd64 + +/* + * On GCC before version 5.0, we need to use the pragma to enable the + * target options globally, because the 'target' function attribute + * appears to be unreliable. Before 4.6 we must also avoid the + * push_options / pop_options mechanism, because it tends to trigger + * some internal compiler errors. + */ +#if BR_GCC && !BR_GCC_5_0 +#if BR_GCC_4_6 +#define BR_TARGETS_X86_UP \ + _Pragma("GCC push_options") \ + _Pragma("GCC target(\"sse2,ssse3,sse4.1,aes,pclmul,rdrnd\")") +#define BR_TARGETS_X86_DOWN \ + _Pragma("GCC pop_options") +#else +#define BR_TARGETS_X86_UP \ + _Pragma("GCC target(\"sse2,ssse3,sse4.1,aes,pclmul\")") +#endif +#define BR_TARGETS_X86_DOWN +#pragma GCC diagnostic ignored "-Wpsabi" +#endif + +#if BR_CLANG && !BR_CLANG_3_8 +#undef __SSE2__ +#undef __SSE3__ +#undef __SSSE3__ +#undef __SSE4_1__ +#undef __AES__ +#undef __PCLMUL__ +#undef __RDRND__ +#define __SSE2__ 1 +#define __SSE3__ 1 +#define __SSSE3__ 1 +#define __SSE4_1__ 1 +#define __AES__ 1 +#define __PCLMUL__ 1 +#define __RDRND__ 1 +#endif + +#ifndef BR_TARGETS_X86_UP +#define BR_TARGETS_X86_UP +#endif +#ifndef BR_TARGETS_X86_DOWN +#define BR_TARGETS_X86_DOWN +#endif + +#if BR_GCC || BR_CLANG +BR_TARGETS_X86_UP +#include +#include +#define br_bswap32 __builtin_bswap32 +BR_TARGETS_X86_DOWN +#endif + +#if BR_MSC +#include +#include +#include +#define br_bswap32 _byteswap_ulong +#endif + +static inline int +br_cpuid(uint32_t mask_eax, uint32_t mask_ebx, + uint32_t mask_ecx, uint32_t mask_edx) +{ +#if BR_GCC || BR_CLANG + unsigned eax, ebx, ecx, edx; + + if (__get_cpuid(1, &eax, &ebx, &ecx, &edx)) { + if ((eax & mask_eax) == mask_eax + && (ebx & mask_ebx) == mask_ebx + && (ecx & mask_ecx) == mask_ecx + && (edx & mask_edx) == mask_edx) + { + return 1; + } + } +#elif BR_MSC + int info[4]; + + __cpuid(info, 1); + if (((uint32_t)info[0] & mask_eax) == mask_eax + && ((uint32_t)info[1] & mask_ebx) == mask_ebx + && ((uint32_t)info[2] & mask_ecx) == mask_ecx + && ((uint32_t)info[3] & mask_edx) == mask_edx) + { + return 1; + } +#endif + return 0; +} + +#endif + +#endif + /* ==================================================================== */ #endif diff --git a/src/bearssl/pemdec.c b/src/bearssl/pemdec.c index 51e610b..db8f0e6 100644 --- a/src/bearssl/pemdec.c +++ b/src/bearssl/pemdec.c @@ -69,7 +69,7 @@ void br_pem_decoder_run(void *t0ctx); #include "inner.h" -#define CTX ((br_pem_decoder_context *)((unsigned char *)t0ctx - offsetof(br_pem_decoder_context, cpu))) +#define CTX ((br_pem_decoder_context *)(void *)((unsigned char *)t0ctx - offsetof(br_pem_decoder_context, cpu))) /* see bearssl_pem.h */ void diff --git a/src/bearssl/settings.c b/src/bearssl/settings.c new file mode 100644 index 0000000..309271c --- /dev/null +++ b/src/bearssl/settings.c @@ -0,0 +1,306 @@ +/* + * Copyright (c) 2017 Thomas Pornin + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +static const br_config_option config[] = { + { "BR_64", +#if BR_64 + 1 +#else + 0 +#endif + }, + { "BR_AES_X86NI", +#if BR_AES_X86NI + 1 +#else + 0 +#endif + }, + { "BR_amd64", +#if BR_amd64 + 1 +#else + 0 +#endif + }, + { "BR_ARMEL_CORTEXM_GCC", +#if BR_ARMEL_CORTEXM_GCC + 1 +#else + 0 +#endif + }, + { "BR_BE_UNALIGNED", +#if BR_BE_UNALIGNED + 1 +#else + 0 +#endif + }, + { "BR_CLANG", +#if BR_CLANG + 1 +#else + 0 +#endif + }, + { "BR_CLANG_3_7", +#if BR_CLANG_3_7 + 1 +#else + 0 +#endif + }, + { "BR_CLANG_3_8", +#if BR_CLANG_3_8 + 1 +#else + 0 +#endif + }, + { "BR_CT_MUL15", +#if BR_CT_MUL15 + 1 +#else + 0 +#endif + }, + { "BR_CT_MUL31", +#if BR_CT_MUL31 + 1 +#else + 0 +#endif + }, + { "BR_GCC", +#if BR_GCC + 1 +#else + 0 +#endif + }, + { "BR_GCC_4_4", +#if BR_GCC_4_4 + 1 +#else + 0 +#endif + }, + { "BR_GCC_4_5", +#if BR_GCC_4_5 + 1 +#else + 0 +#endif + }, + { "BR_GCC_4_6", +#if BR_GCC_4_6 + 1 +#else + 0 +#endif + }, + { "BR_GCC_4_7", +#if BR_GCC_4_7 + 1 +#else + 0 +#endif + }, + { "BR_GCC_4_8", +#if BR_GCC_4_8 + 1 +#else + 0 +#endif + }, + { "BR_GCC_4_9", +#if BR_GCC_4_9 + 1 +#else + 0 +#endif + }, + { "BR_GCC_5_0", +#if BR_GCC_5_0 + 1 +#else + 0 +#endif + }, + { "BR_i386", +#if BR_i386 + 1 +#else + 0 +#endif + }, + { "BR_INT128", +#if BR_INT128 + 1 +#else + 0 +#endif + }, + { "BR_LE_UNALIGNED", +#if BR_LE_UNALIGNED + 1 +#else + 0 +#endif + }, + { "BR_LOMUL", +#if BR_LOMUL + 1 +#else + 0 +#endif + }, + { "BR_MAX_EC_SIZE", BR_MAX_EC_SIZE }, + { "BR_MAX_RSA_SIZE", BR_MAX_RSA_SIZE }, + { "BR_MAX_RSA_FACTOR", BR_MAX_RSA_FACTOR }, + { "BR_MSC", +#if BR_MSC + 1 +#else + 0 +#endif + }, + { "BR_MSC_2005", +#if BR_MSC_2005 + 1 +#else + 0 +#endif + }, + { "BR_MSC_2008", +#if BR_MSC_2008 + 1 +#else + 0 +#endif + }, + { "BR_MSC_2010", +#if BR_MSC_2010 + 1 +#else + 0 +#endif + }, + { "BR_MSC_2012", +#if BR_MSC_2012 + 1 +#else + 0 +#endif + }, + { "BR_MSC_2013", +#if BR_MSC_2013 + 1 +#else + 0 +#endif + }, + { "BR_MSC_2015", +#if BR_MSC_2015 + 1 +#else + 0 +#endif + }, + { "BR_POWER8", +#if BR_POWER8 + 1 +#else + 0 +#endif + }, + { "BR_RDRAND", +#if BR_RDRAND + 1 +#else + 0 +#endif + }, + { "BR_SLOW_MUL", +#if BR_SLOW_MUL + 1 +#else + 0 +#endif + }, + { "BR_SLOW_MUL15", +#if BR_SLOW_MUL15 + 1 +#else + 0 +#endif + }, + { "BR_SSE2", +#if BR_SSE2 + 1 +#else + 0 +#endif + }, + { "BR_UMUL128", +#if BR_UMUL128 + 1 +#else + 0 +#endif + }, + { "BR_USE_UNIX_TIME", +#if BR_USE_UNIX_TIME + 1 +#else + 0 +#endif + }, + { "BR_USE_WIN32_RAND", +#if BR_USE_WIN32_RAND + 1 +#else + 0 +#endif + }, + { "BR_USE_WIN32_TIME", +#if BR_USE_WIN32_TIME + 1 +#else + 0 +#endif + }, + + { NULL, 0 } +}; + +/* see bearssl.h */ +const br_config_option * +br_get_config(void) +{ + return config; +} diff --git a/src/bearssl/skey_decoder.c b/src/bearssl/skey_decoder.c index 4c486d5..f4e43e7 100644 --- a/src/bearssl/skey_decoder.c +++ b/src/bearssl/skey_decoder.c @@ -75,7 +75,7 @@ void br_skey_decoder_run(void *t0ctx); #include "inner.h" -#define CTX ((br_skey_decoder_context *)((unsigned char *)t0ctx - offsetof(br_skey_decoder_context, cpu))) +#define CTX ((br_skey_decoder_context *)(void *)((unsigned char *)t0ctx - offsetof(br_skey_decoder_context, cpu))) #define CONTEXT_NAME br_skey_decoder_context /* see bearssl_x509.h */ diff --git a/src/bearssl/ssl_engine.c b/src/bearssl/ssl_engine.c index 529b107..1022d87 100644 --- a/src/bearssl/ssl_engine.c +++ b/src/bearssl/ssl_engine.c @@ -24,6 +24,9 @@ #include "inner.h" +#if 0 +/* obsolete */ + /* * If BR_USE_URANDOM is not defined, then try to autodetect its presence * through compiler macros. @@ -75,6 +78,8 @@ #pragma comment(lib, "advapi32") #endif +#endif + /* ==================================================================== */ /* * This part of the file does the low-level record management. @@ -456,65 +461,71 @@ engine_clearbuf(br_ssl_engine_context *rc) make_ready_out(rc); } -/* see inner.h */ -int -br_ssl_engine_init_rand(br_ssl_engine_context *cc) +/* + * Make sure the internal PRNG is initialised (but not necessarily + * seeded properly yet). + */ +static int +rng_init(br_ssl_engine_context *cc) { + const br_hash_class *h; + + if (cc->rng_init_done != 0) { + return 1; + } + /* - * TODO: use getrandom() on Linux systems, with a fallback to - * opening /dev/urandom if that system call fails. + * If using TLS-1.2, then SHA-256 or SHA-384 must be present (or + * both); we prefer SHA-256 which is faster for 32-bit systems. * - * Use similar OS facilities on other OS (getentropy() on OpenBSD, - * specialized sysctl on NetBSD and FreeBSD...). + * If using TLS-1.0 or 1.1 then SHA-1 must be present. + * + * Though HMAC_DRBG/SHA-1 is, as far as we know, as safe as + * these things can be, we still prefer the SHA-2 functions over + * SHA-1, if only for public relations (known theoretical + * weaknesses of SHA-1 with regards to collisions are mostly + * irrelevant here, but they still make people nervous). */ -#if BR_USE_URANDOM - if (!cc->rng_os_rand_done) { - int f; - - f = open("/dev/urandom", O_RDONLY); - if (f >= 0) { - unsigned char tmp[32]; - size_t u; - - for (u = 0; u < sizeof tmp;) { - ssize_t len; - - len = read(f, tmp + u, (sizeof tmp) - u); - if (len < 0) { - if (errno == EINTR) { - continue; - } - break; - } - u += (size_t)len; - } - close(f); - if (u == sizeof tmp) { - br_ssl_engine_inject_entropy(cc, tmp, u); - cc->rng_os_rand_done = 1; + h = br_multihash_getimpl(&cc->mhash, br_sha256_ID); + if (!h) { + h = br_multihash_getimpl(&cc->mhash, br_sha384_ID); + if (!h) { + h = br_multihash_getimpl(&cc->mhash, + br_sha1_ID); + if (!h) { + br_ssl_engine_fail(cc, BR_ERR_BAD_STATE); + return 0; } } } -#elif BR_USE_WIN32_RAND - if (!cc->rng_os_rand_done) { - HCRYPTPROV hp; + br_hmac_drbg_init(&cc->rng, h, NULL, 0); + cc->rng_init_done = 1; + return 1; +} - if (CryptAcquireContextW(&hp, 0, 0, PROV_RSA_FULL, - CRYPT_VERIFYCONTEXT | CRYPT_SILENT)) - { - BYTE buf[32]; +/* see inner.h */ +int +br_ssl_engine_init_rand(br_ssl_engine_context *cc) +{ + if (!rng_init(cc)) { + return 0; + } - if (CryptGenRandom(hp, sizeof buf, buf)) { - br_ssl_engine_inject_entropy(cc, - buf, sizeof buf); - cc->rng_os_rand_done = 1; - } - CryptReleaseContext(hp, 0); + /* + * We always try OS/hardware seeding once. If it works, then + * we assume proper seeding. If not, then external entropy must + * have been injected; otherwise, we report an error. + */ + if (!cc->rng_os_rand_done) { + br_prng_seeder sd; + + sd = br_prng_seeder_system(NULL); + if (sd != 0 && sd(&cc->rng.vtable)) { + cc->rng_init_done = 2; } + cc->rng_os_rand_done = 1; } -#endif - - if (!cc->rng_init_done) { + if (cc->rng_init_done < 2) { br_ssl_engine_fail(cc, BR_ERR_NO_RANDOM); return 0; } @@ -526,41 +537,17 @@ void br_ssl_engine_inject_entropy(br_ssl_engine_context *cc, const void *data, size_t len) { - if (cc->rng_init_done) { - br_hmac_drbg_update(&cc->rng, data, len); - } else { - /* - * If using TLS-1.2, then SHA-256 or SHA-384 must be - * present (or both); we prefer SHA-256 which is faster - * for 32-bit systems. - * - * If using TLS-1.0 or 1.1 then SHA-1 must be present. - * - * Though HMAC_DRBG/SHA-1 is, as far as we know, as safe - * as these things can be, we still prefer the SHA-2 - * functions over SHA-1, if only for public relations - * (known theoretical weaknesses of SHA-1 with regards to - * collisions are mostly irrelevant here, but they still - * make people nervous). - */ - const br_hash_class *h; - - h = br_multihash_getimpl(&cc->mhash, br_sha256_ID); - if (!h) { - h = br_multihash_getimpl(&cc->mhash, br_sha384_ID); - if (!h) { - h = br_multihash_getimpl(&cc->mhash, - br_sha1_ID); - if (!h) { - br_ssl_engine_fail(cc, - BR_ERR_BAD_STATE); - return; - } - } - } - br_hmac_drbg_init(&cc->rng, h, data, len); - cc->rng_init_done = 1; + /* + * Externally provided entropy is assumed to be "good enough" + * (we cannot really test its quality) so if the RNG structure + * could be initialised at all, then we marked the RNG as + * "properly seeded". + */ + if (!rng_init(cc)) { + return; } + br_hmac_drbg_update(&cc->rng, data, len); + cc->rng_init_done = 2; } /* diff --git a/src/bearssl/ssl_hs_client.c b/src/bearssl/ssl_hs_client.c index 9d3418b..2faba6a 100644 --- a/src/bearssl/ssl_hs_client.c +++ b/src/bearssl/ssl_hs_client.c @@ -75,7 +75,7 @@ void br_ssl_hs_client_run(void *t0ctx); /* * This macro evaluates to a pointer to the current engine context. */ -#define ENG ((br_ssl_engine_context *)((unsigned char *)t0ctx - offsetof(br_ssl_engine_context, cpu))) +#define ENG ((br_ssl_engine_context *)(void *)((unsigned char *)t0ctx - offsetof(br_ssl_engine_context, cpu))) @@ -1397,7 +1397,7 @@ br_ssl_hs_client_run(void *t0ctx) /* get16 */ size_t addr = (size_t)T0_POP(); - T0_PUSH(*(uint16_t *)((unsigned char *)ENG + addr)); + T0_PUSH(*(uint16_t *)(void *)((unsigned char *)ENG + addr)); } break; @@ -1405,7 +1405,7 @@ br_ssl_hs_client_run(void *t0ctx) /* get32 */ size_t addr = (size_t)T0_POP(); - T0_PUSH(*(uint32_t *)((unsigned char *)ENG + addr)); + T0_PUSH(*(uint32_t *)(void *)((unsigned char *)ENG + addr)); } break; @@ -1557,7 +1557,7 @@ br_ssl_hs_client_run(void *t0ctx) /* set16 */ size_t addr = (size_t)T0_POP(); - *(uint16_t *)((unsigned char *)ENG + addr) = (uint16_t)T0_POP(); + *(uint16_t *)(void *)((unsigned char *)ENG + addr) = (uint16_t)T0_POP(); } break; @@ -1565,7 +1565,7 @@ br_ssl_hs_client_run(void *t0ctx) /* set32 */ size_t addr = (size_t)T0_POP(); - *(uint32_t *)((unsigned char *)ENG + addr) = (uint32_t)T0_POP(); + *(uint32_t *)(void *)((unsigned char *)ENG + addr) = (uint32_t)T0_POP(); } break; diff --git a/src/bearssl/ssl_hs_server.c b/src/bearssl/ssl_hs_server.c index 3970d28..5dd30a1 100644 --- a/src/bearssl/ssl_hs_server.c +++ b/src/bearssl/ssl_hs_server.c @@ -75,7 +75,7 @@ void br_ssl_hs_server_run(void *t0ctx); /* * This macro evaluates to a pointer to the current engine context. */ -#define ENG ((br_ssl_engine_context *)((unsigned char *)t0ctx - offsetof(br_ssl_engine_context, cpu))) +#define ENG ((br_ssl_engine_context *)(void *)((unsigned char *)t0ctx - offsetof(br_ssl_engine_context, cpu))) @@ -1454,7 +1454,7 @@ br_ssl_hs_server_run(void *t0ctx) /* get16 */ size_t addr = (size_t)T0_POP(); - T0_PUSH(*(uint16_t *)((unsigned char *)ENG + addr)); + T0_PUSH(*(uint16_t *)(void *)((unsigned char *)ENG + addr)); } break; @@ -1462,7 +1462,7 @@ br_ssl_hs_server_run(void *t0ctx) /* get32 */ size_t addr = (size_t)T0_POP(); - T0_PUSH(*(uint32_t *)((unsigned char *)ENG + addr)); + T0_PUSH(*(uint32_t *)(void *)((unsigned char *)ENG + addr)); } break; @@ -1635,7 +1635,7 @@ br_ssl_hs_server_run(void *t0ctx) /* set16 */ size_t addr = (size_t)T0_POP(); - *(uint16_t *)((unsigned char *)ENG + addr) = (uint16_t)T0_POP(); + *(uint16_t *)(void *)((unsigned char *)ENG + addr) = (uint16_t)T0_POP(); } break; @@ -1643,7 +1643,7 @@ br_ssl_hs_server_run(void *t0ctx) /* set32 */ size_t addr = (size_t)T0_POP(); - *(uint32_t *)((unsigned char *)ENG + addr) = (uint32_t)T0_POP(); + *(uint32_t *)(void *)((unsigned char *)ENG + addr) = (uint32_t)T0_POP(); } break; diff --git a/src/bearssl/sysrng.c b/src/bearssl/sysrng.c new file mode 100644 index 0000000..3a10db9 --- /dev/null +++ b/src/bearssl/sysrng.c @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2017 Thomas Pornin + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define BR_ENABLE_INTRINSICS 1 +#include "inner.h" + +#if BR_USE_URANDOM +#include +#include +#include +#include +#endif + +#if BR_USE_WIN32_RAND +#include +#include +#pragma comment(lib, "advapi32") +#endif + +#if BR_RDRAND +BR_TARGETS_X86_UP +BR_TARGET("rdrnd") +static int +seeder_rdrand(const br_prng_class **ctx) +{ + unsigned char tmp[32]; + size_t u; + + for (u = 0; u < sizeof tmp; u += sizeof(uint32_t)) { + int j; + uint32_t x; + + /* + * We use the 32-bit intrinsic so that code is compatible + * with both 32-bit and 64-bit architectures. + * + * Intel recommends trying at least 10 times in case of + * failure. + */ + for (j = 0; j < 10; j ++) { + if (_rdrand32_step(&x)) { + goto next_word; + } + } + return 0; + next_word: + br_enc32le(tmp + u, x); + } + (*ctx)->update(ctx, tmp, sizeof tmp); + return 1; +} +BR_TARGETS_X86_DOWN + +static int +rdrand_supported(void) +{ + /* + * The RDRND support is bit 30 of ECX, as returned by CPUID. + */ + return br_cpuid(0, 0, 0x40000000, 0); +} + +#endif + +#if BR_USE_URANDOM +static int +seeder_urandom(const br_prng_class **ctx) +{ + int f; + + f = open("/dev/urandom", O_RDONLY); + if (f >= 0) { + unsigned char tmp[32]; + size_t u; + + for (u = 0; u < sizeof tmp;) { + ssize_t len; + + len = read(f, tmp + u, (sizeof tmp) - u); + if (len < 0) { + if (errno == EINTR) { + continue; + } + break; + } + u += (size_t)len; + } + close(f); + if (u == sizeof tmp) { + (*ctx)->update(ctx, tmp, sizeof tmp); + return 1; + } + } + return 0; +} +#endif + +#if BR_USE_WIN32_RAND +static int +seeder_win32(const br_prng_class **ctx) +{ + HCRYPTPROV hp; + + if (CryptAcquireContext(&hp, 0, 0, PROV_RSA_FULL, + CRYPT_VERIFYCONTEXT | CRYPT_SILENT)) + { + BYTE buf[32]; + BOOL r; + + r = CryptGenRandom(hp, sizeof buf, buf); + CryptReleaseContext(hp, 0); + if (r) { + (*ctx)->update(ctx, buf, sizeof buf); + return 1; + } + } + return 0; +} +#endif + +/* see bearssl_rand.h.h */ +br_prng_seeder +br_prng_seeder_system(const char **name) +{ +#if BR_RDRAND + if (rdrand_supported()) { + if (name != NULL) { + *name = "rdrand"; + } + return &seeder_rdrand; + } +#endif +#if BR_USE_URANDOM + if (name != NULL) { + *name = "urandom"; + } + return &seeder_urandom; +#elif BR_USE_WIN32_RAND + if (name != NULL) { + *name = "win32"; + } + return &seeder_win32; +#endif + if (name != NULL) { + *name = "none"; + } + return 0; +} diff --git a/src/bearssl/x509_decoder.c b/src/bearssl/x509_decoder.c index 42620c6..8dd970f 100644 --- a/src/bearssl/x509_decoder.c +++ b/src/bearssl/x509_decoder.c @@ -75,7 +75,7 @@ void br_x509_decoder_run(void *t0ctx); #include "inner.h" -#define CTX ((br_x509_decoder_context *)((unsigned char *)t0ctx - offsetof(br_x509_decoder_context, cpu))) +#define CTX ((br_x509_decoder_context *)(void *)((unsigned char *)t0ctx - offsetof(br_x509_decoder_context, cpu))) #define CONTEXT_NAME br_x509_decoder_context /* see bearssl_x509.h */ @@ -743,7 +743,7 @@ br_x509_decoder_run(void *t0ctx) /* set32 */ uint32_t addr = T0_POP(); - *(uint32_t *)((unsigned char *)CTX + addr) = T0_POP(); + *(uint32_t *)(void *)((unsigned char *)CTX + addr) = T0_POP(); } break; diff --git a/src/bearssl/x509_minimal.c b/src/bearssl/x509_minimal.c index 5da61e1..3b876ef 100644 --- a/src/bearssl/x509_minimal.c +++ b/src/bearssl/x509_minimal.c @@ -200,20 +200,6 @@ void br_x509_minimal_run(void *t0ctx); * then validation is reported as failed. */ -#ifndef BR_USE_UNIX_TIME -#if defined __unix__ || defined __linux__ \ - || defined _POSIX_SOURCE || defined _POSIX_C_SOURCE \ - || (defined __APPLE__ && defined __MACH__) -#define BR_USE_UNIX_TIME 1 -#endif -#endif - -#ifndef BR_USE_WIN32_TIME -#if defined _WIN32 || defined _WIN64 -#define BR_USE_WIN32_TIME 1 -#endif -#endif - #if BR_USE_UNIX_TIME #include #endif @@ -222,8 +208,13 @@ void br_x509_minimal_run(void *t0ctx); #include #endif +/* + * The T0 compiler will produce these prototypes declarations in the + * header. + * void br_x509_minimal_init_main(void *ctx); void br_x509_minimal_run(void *ctx); + */ /* see bearssl_x509.h */ void @@ -244,7 +235,7 @@ xm_start_chain(const br_x509_class **ctx, const char *server_name) br_x509_minimal_context *cc; size_t u; - cc = (br_x509_minimal_context *)ctx; + cc = (br_x509_minimal_context *)(void *)ctx; for (u = 0; u < cc->num_name_elts; u ++) { cc->name_elts[u].status = 0; cc->name_elts[u].buf[0] = 0; @@ -267,7 +258,7 @@ xm_start_cert(const br_x509_class **ctx, uint32_t length) { br_x509_minimal_context *cc; - cc = (br_x509_minimal_context *)ctx; + cc = (br_x509_minimal_context *)(void *)ctx; if (cc->err != 0) { return; } @@ -283,7 +274,7 @@ xm_append(const br_x509_class **ctx, const unsigned char *buf, size_t len) { br_x509_minimal_context *cc; - cc = (br_x509_minimal_context *)ctx; + cc = (br_x509_minimal_context *)(void *)ctx; if (cc->err != 0) { return; } @@ -297,7 +288,7 @@ xm_end_cert(const br_x509_class **ctx) { br_x509_minimal_context *cc; - cc = (br_x509_minimal_context *)ctx; + cc = (br_x509_minimal_context *)(void *)ctx; if (cc->err == 0 && cc->cert_length != 0) { cc->err = BR_ERR_X509_TRUNCATED; } @@ -309,7 +300,7 @@ xm_end_chain(const br_x509_class **ctx) { br_x509_minimal_context *cc; - cc = (br_x509_minimal_context *)ctx; + cc = (br_x509_minimal_context *)(void *)ctx; if (cc->err == 0) { if (cc->num_certs == 0) { cc->err = BR_ERR_X509_EMPTY_CHAIN; @@ -327,14 +318,14 @@ xm_get_pkey(const br_x509_class *const *ctx, unsigned *usages) { br_x509_minimal_context *cc; - cc = (br_x509_minimal_context *)ctx; + cc = (br_x509_minimal_context *)(void *)ctx; if (cc->err == BR_ERR_X509_OK || cc->err == BR_ERR_X509_NOT_TRUSTED) { if (usages != NULL) { *usages = cc->key_usages; } - return &((br_x509_minimal_context *)ctx)->pkey; + return &((br_x509_minimal_context *)(void *)ctx)->pkey; } else { return NULL; } @@ -351,7 +342,7 @@ const br_x509_class br_x509_minimal_vtable = { xm_get_pkey }; -#define CTX ((br_x509_minimal_context *)((unsigned char *)t0ctx - offsetof(br_x509_minimal_context, cpu))) +#define CTX ((br_x509_minimal_context *)(void *)((unsigned char *)t0ctx - offsetof(br_x509_minimal_context, cpu))) #define CONTEXT_NAME br_x509_minimal_context #define DNHASH_LEN ((CTX->dn_hash_impl->desc >> BR_HASHDESC_OUT_OFF) & BR_HASHDESC_OUT_MASK) @@ -1439,7 +1430,7 @@ br_x509_minimal_run(void *t0ctx) /* get16 */ uint32_t addr = T0_POP(); - T0_PUSH(*(uint16_t *)((unsigned char *)CTX + addr)); + T0_PUSH(*(uint16_t *)(void *)((unsigned char *)CTX + addr)); } break; @@ -1447,7 +1438,7 @@ br_x509_minimal_run(void *t0ctx) /* get32 */ uint32_t addr = T0_POP(); - T0_PUSH(*(uint32_t *)((unsigned char *)CTX + addr)); + T0_PUSH(*(uint32_t *)(void *)((unsigned char *)CTX + addr)); } break; @@ -1601,7 +1592,7 @@ br_x509_minimal_run(void *t0ctx) /* set16 */ uint32_t addr = T0_POP(); - *(uint16_t *)((unsigned char *)CTX + addr) = T0_POP(); + *(uint16_t *)(void *)((unsigned char *)CTX + addr) = T0_POP(); } break; @@ -1609,7 +1600,7 @@ br_x509_minimal_run(void *t0ctx) /* set32 */ uint32_t addr = T0_POP(); - *(uint32_t *)((unsigned char *)CTX + addr) = T0_POP(); + *(uint32_t *)(void *)((unsigned char *)CTX + addr) = T0_POP(); } break;