|  | /* sha1.c - Functions to compute SHA1 message digest of files or | 
|  | memory blocks according to the NIST specification FIPS-180-1. | 
|  |  | 
|  | Copyright (C) 2000-2025 Free Software Foundation, Inc. | 
|  |  | 
|  | This program is free software; you can redistribute it and/or modify it | 
|  | under the terms of the GNU General Public License as published by the | 
|  | Free Software Foundation; either version 2, or (at your option) any | 
|  | later version. | 
|  |  | 
|  | This program is distributed in the hope that it will be useful, | 
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
|  | GNU General Public License for more details. | 
|  |  | 
|  | You should have received a copy of the GNU General Public License | 
|  | along with this program; if not, write to the Free Software Foundation, | 
|  | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */ | 
|  |  | 
|  | /* Written by Scott G. Miller | 
|  | Credits: | 
|  | Robert Klep <robert@ilse.nl>  -- Expansion function fix | 
|  | */ | 
|  |  | 
|  | #include <config.h> | 
|  |  | 
|  | #include "sha1.h" | 
|  |  | 
|  | #include <stddef.h> | 
|  | #include <string.h> | 
|  |  | 
|  | #ifdef HAVE_X86_SHA1_HW_SUPPORT | 
|  | # include <x86intrin.h> | 
|  | # include <cpuid.h> | 
|  | #endif | 
|  |  | 
|  | #if USE_UNLOCKED_IO | 
|  | # include "unlocked-io.h" | 
|  | #endif | 
|  |  | 
|  | #ifdef WORDS_BIGENDIAN | 
|  | # define SWAP(n) (n) | 
|  | #else | 
|  | # define SWAP(n) \ | 
|  | (((n) << 24) | (((n) & 0xff00) << 8) | (((n) >> 8) & 0xff00) | ((n) >> 24)) | 
|  | #endif | 
|  |  | 
|  | #define BLOCKSIZE 4096 | 
|  | #if BLOCKSIZE % 64 != 0 | 
|  | # error "invalid BLOCKSIZE" | 
|  | #endif | 
|  |  | 
|  | /* This array contains the bytes used to pad the buffer to the next | 
|  | 64-byte boundary.  (RFC 1321, 3.1: Step 1)  */ | 
|  | static const unsigned char fillbuf[64] = { 0x80, 0 /* , 0, 0, ...  */ }; | 
|  |  | 
|  |  | 
|  | /* Take a pointer to a 160 bit block of data (five 32 bit ints) and | 
|  | initialize it to the start constants of the SHA1 algorithm.  This | 
|  | must be called before using hash in the call to sha1_hash.  */ | 
|  | void | 
|  | sha1_init_ctx (struct sha1_ctx *ctx) | 
|  | { | 
|  | ctx->A = 0x67452301; | 
|  | ctx->B = 0xefcdab89; | 
|  | ctx->C = 0x98badcfe; | 
|  | ctx->D = 0x10325476; | 
|  | ctx->E = 0xc3d2e1f0; | 
|  |  | 
|  | ctx->total[0] = ctx->total[1] = 0; | 
|  | ctx->buflen = 0; | 
|  | } | 
|  |  | 
|  | /* Put result from CTX in first 20 bytes following RESBUF.  The result | 
|  | must be in little endian byte order. | 
|  |  | 
|  | IMPORTANT: On some systems it is required that RESBUF is correctly | 
|  | aligned for a 32-bit value.  */ | 
|  | void * | 
|  | sha1_read_ctx (const struct sha1_ctx *ctx, void *resbuf) | 
|  | { | 
|  | ((sha1_uint32 *) resbuf)[0] = SWAP (ctx->A); | 
|  | ((sha1_uint32 *) resbuf)[1] = SWAP (ctx->B); | 
|  | ((sha1_uint32 *) resbuf)[2] = SWAP (ctx->C); | 
|  | ((sha1_uint32 *) resbuf)[3] = SWAP (ctx->D); | 
|  | ((sha1_uint32 *) resbuf)[4] = SWAP (ctx->E); | 
|  |  | 
|  | return resbuf; | 
|  | } | 
|  |  | 
|  | /* Process the remaining bytes in the internal buffer and the usual | 
|  | prolog according to the standard and write the result to RESBUF. | 
|  |  | 
|  | IMPORTANT: On some systems it is required that RESBUF is correctly | 
|  | aligned for a 32-bit value.  */ | 
|  | void * | 
|  | sha1_finish_ctx (struct sha1_ctx *ctx, void *resbuf) | 
|  | { | 
|  | /* Take yet unprocessed bytes into account.  */ | 
|  | sha1_uint32 bytes = ctx->buflen; | 
|  | size_t size = (bytes < 56) ? 64 / 4 : 64 * 2 / 4; | 
|  |  | 
|  | /* Now count remaining bytes.  */ | 
|  | ctx->total[0] += bytes; | 
|  | if (ctx->total[0] < bytes) | 
|  | ++ctx->total[1]; | 
|  |  | 
|  | /* Put the 64-bit file length in *bits* at the end of the buffer.  */ | 
|  | ctx->buffer[size - 2] = SWAP ((ctx->total[1] << 3) | (ctx->total[0] >> 29)); | 
|  | ctx->buffer[size - 1] = SWAP (ctx->total[0] << 3); | 
|  |  | 
|  | memcpy (&((char *) ctx->buffer)[bytes], fillbuf, (size - 2) * 4 - bytes); | 
|  |  | 
|  | /* Process last bytes.  */ | 
|  | sha1_process_block (ctx->buffer, size * 4, ctx); | 
|  |  | 
|  | return sha1_read_ctx (ctx, resbuf); | 
|  | } | 
|  |  | 
|  | /* Compute SHA1 message digest for bytes read from STREAM.  The | 
|  | resulting message digest number will be written into the 16 bytes | 
|  | beginning at RESBLOCK.  */ | 
|  | int | 
|  | sha1_stream (FILE *stream, void *resblock) | 
|  | { | 
|  | struct sha1_ctx ctx; | 
|  | char buffer[BLOCKSIZE + 72]; | 
|  | size_t sum; | 
|  |  | 
|  | /* Initialize the computation context.  */ | 
|  | sha1_init_ctx (&ctx); | 
|  |  | 
|  | /* Iterate over full file contents.  */ | 
|  | while (1) | 
|  | { | 
|  | /* We read the file in blocks of BLOCKSIZE bytes.  One call of the | 
|  | computation function processes the whole buffer so that with the | 
|  | next round of the loop another block can be read.  */ | 
|  | size_t n; | 
|  | sum = 0; | 
|  |  | 
|  | /* Read block.  Take care for partial reads.  */ | 
|  | while (1) | 
|  | { | 
|  | n = fread (buffer + sum, 1, BLOCKSIZE - sum, stream); | 
|  |  | 
|  | sum += n; | 
|  |  | 
|  | if (sum == BLOCKSIZE) | 
|  | break; | 
|  |  | 
|  | if (n == 0) | 
|  | { | 
|  | /* Check for the error flag IFF N == 0, so that we don't | 
|  | exit the loop after a partial read due to e.g., EAGAIN | 
|  | or EWOULDBLOCK.  */ | 
|  | if (ferror (stream)) | 
|  | return 1; | 
|  | goto process_partial_block; | 
|  | } | 
|  |  | 
|  | /* We've read at least one byte, so ignore errors.  But always | 
|  | check for EOF, since feof may be true even though N > 0. | 
|  | Otherwise, we could end up calling fread after EOF.  */ | 
|  | if (feof (stream)) | 
|  | goto process_partial_block; | 
|  | } | 
|  |  | 
|  | /* Process buffer with BLOCKSIZE bytes.  Note that | 
|  | BLOCKSIZE % 64 == 0 | 
|  | */ | 
|  | sha1_process_block (buffer, BLOCKSIZE, &ctx); | 
|  | } | 
|  |  | 
|  | process_partial_block:; | 
|  |  | 
|  | /* Process any remaining bytes.  */ | 
|  | if (sum > 0) | 
|  | sha1_process_bytes (buffer, sum, &ctx); | 
|  |  | 
|  | /* Construct result in desired memory.  */ | 
|  | sha1_finish_ctx (&ctx, resblock); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | /* Compute SHA1 message digest for LEN bytes beginning at BUFFER.  The | 
|  | result is always in little endian byte order, so that a byte-wise | 
|  | output yields to the wanted ASCII representation of the message | 
|  | digest.  */ | 
|  | void * | 
|  | sha1_buffer (const char *buffer, size_t len, void *resblock) | 
|  | { | 
|  | struct sha1_ctx ctx; | 
|  |  | 
|  | /* Initialize the computation context.  */ | 
|  | sha1_init_ctx (&ctx); | 
|  |  | 
|  | /* Process whole buffer but last len % 64 bytes.  */ | 
|  | sha1_process_bytes (buffer, len, &ctx); | 
|  |  | 
|  | /* Put result in desired memory area.  */ | 
|  | return sha1_finish_ctx (&ctx, resblock); | 
|  | } | 
|  |  | 
|  | void | 
|  | sha1_process_bytes (const void *buffer, size_t len, struct sha1_ctx *ctx) | 
|  | { | 
|  | /* When we already have some bits in our internal buffer concatenate | 
|  | both inputs first.  */ | 
|  | if (ctx->buflen != 0) | 
|  | { | 
|  | size_t left_over = ctx->buflen; | 
|  | size_t add = 128 - left_over > len ? len : 128 - left_over; | 
|  |  | 
|  | memcpy (&((char *) ctx->buffer)[left_over], buffer, add); | 
|  | ctx->buflen += add; | 
|  |  | 
|  | if (ctx->buflen > 64) | 
|  | { | 
|  | sha1_process_block (ctx->buffer, ctx->buflen & ~63, ctx); | 
|  |  | 
|  | ctx->buflen &= 63; | 
|  | /* The regions in the following copy operation cannot overlap.  */ | 
|  | memcpy (ctx->buffer, | 
|  | &((char *) ctx->buffer)[(left_over + add) & ~63], | 
|  | ctx->buflen); | 
|  | } | 
|  |  | 
|  | buffer = (const char *) buffer + add; | 
|  | len -= add; | 
|  | } | 
|  |  | 
|  | /* Process available complete blocks.  */ | 
|  | if (len >= 64) | 
|  | { | 
|  | #if !_STRING_ARCH_unaligned | 
|  | # define alignof(type) offsetof (struct { char c; type x; }, x) | 
|  | # define UNALIGNED_P(p) (((size_t) p) % alignof (sha1_uint32) != 0) | 
|  | if (UNALIGNED_P (buffer)) | 
|  | while (len > 64) | 
|  | { | 
|  | sha1_process_block (memcpy (ctx->buffer, buffer, 64), 64, ctx); | 
|  | buffer = (const char *) buffer + 64; | 
|  | len -= 64; | 
|  | } | 
|  | else | 
|  | #endif | 
|  | { | 
|  | sha1_process_block (buffer, len & ~63, ctx); | 
|  | buffer = (const char *) buffer + (len & ~63); | 
|  | len &= 63; | 
|  | } | 
|  | } | 
|  |  | 
|  | /* Move remaining bytes in internal buffer.  */ | 
|  | if (len > 0) | 
|  | { | 
|  | size_t left_over = ctx->buflen; | 
|  |  | 
|  | memcpy (&((char *) ctx->buffer)[left_over], buffer, len); | 
|  | left_over += len; | 
|  | if (left_over >= 64) | 
|  | { | 
|  | sha1_process_block (ctx->buffer, 64, ctx); | 
|  | left_over -= 64; | 
|  | memmove (ctx->buffer, &ctx->buffer[16], left_over); | 
|  | } | 
|  | ctx->buflen = left_over; | 
|  | } | 
|  | } | 
|  |  | 
|  | /* --- Code below is the primary difference between md5.c and sha1.c --- */ | 
|  |  | 
|  | /* SHA1 round constants */ | 
|  | #define K1 0x5a827999 | 
|  | #define K2 0x6ed9eba1 | 
|  | #define K3 0x8f1bbcdc | 
|  | #define K4 0xca62c1d6 | 
|  |  | 
|  | /* Round functions.  Note that F2 is the same as F4.  */ | 
|  | #define F1(B,C,D) ( D ^ ( B & ( C ^ D ) ) ) | 
|  | #define F2(B,C,D) (B ^ C ^ D) | 
|  | #define F3(B,C,D) ( ( B & C ) | ( D & ( B | C ) ) ) | 
|  | #define F4(B,C,D) (B ^ C ^ D) | 
|  |  | 
|  | /* Process LEN bytes of BUFFER, accumulating context into CTX. | 
|  | It is assumed that LEN % 64 == 0. | 
|  | Most of this code comes from GnuPG's cipher/sha1.c.  */ | 
|  |  | 
|  | void | 
|  | sha1_process_block (const void *buffer, size_t len, struct sha1_ctx *ctx) | 
|  | { | 
|  | const sha1_uint32 *words = (const sha1_uint32*) buffer; | 
|  | size_t nwords = len / sizeof (sha1_uint32); | 
|  | const sha1_uint32 *endp = words + nwords; | 
|  | sha1_uint32 x[16]; | 
|  | sha1_uint32 a = ctx->A; | 
|  | sha1_uint32 b = ctx->B; | 
|  | sha1_uint32 c = ctx->C; | 
|  | sha1_uint32 d = ctx->D; | 
|  | sha1_uint32 e = ctx->E; | 
|  |  | 
|  | /* First increment the byte count.  RFC 1321 specifies the possible | 
|  | length of the file up to 2^64 bits.  Here we only compute the | 
|  | number of bytes.  Do a double word increment.  */ | 
|  | ctx->total[0] += len; | 
|  | ctx->total[1] += ((len >> 31) >> 1) + (ctx->total[0] < len); | 
|  |  | 
|  | #define rol(x, n) (((x) << (n)) | ((sha1_uint32) (x) >> (32 - (n)))) | 
|  |  | 
|  | #define M(I) ( tm =   x[I&0x0f] ^ x[(I-14)&0x0f] \ | 
|  | ^ x[(I-8)&0x0f] ^ x[(I-3)&0x0f] \ | 
|  | , (x[I&0x0f] = rol(tm, 1)) ) | 
|  |  | 
|  | #define R(A,B,C,D,E,F,K,M)  do { E += rol( A, 5 )     \ | 
|  | + F( B, C, D )  \ | 
|  | + K	      \ | 
|  | + M;	      \ | 
|  | B = rol( B, 30 );    \ | 
|  | } while(0) | 
|  |  | 
|  | while (words < endp) | 
|  | { | 
|  | sha1_uint32 tm; | 
|  | int t; | 
|  | for (t = 0; t < 16; t++) | 
|  | { | 
|  | x[t] = SWAP (*words); | 
|  | words++; | 
|  | } | 
|  |  | 
|  | R( a, b, c, d, e, F1, K1, x[ 0] ); | 
|  | R( e, a, b, c, d, F1, K1, x[ 1] ); | 
|  | R( d, e, a, b, c, F1, K1, x[ 2] ); | 
|  | R( c, d, e, a, b, F1, K1, x[ 3] ); | 
|  | R( b, c, d, e, a, F1, K1, x[ 4] ); | 
|  | R( a, b, c, d, e, F1, K1, x[ 5] ); | 
|  | R( e, a, b, c, d, F1, K1, x[ 6] ); | 
|  | R( d, e, a, b, c, F1, K1, x[ 7] ); | 
|  | R( c, d, e, a, b, F1, K1, x[ 8] ); | 
|  | R( b, c, d, e, a, F1, K1, x[ 9] ); | 
|  | R( a, b, c, d, e, F1, K1, x[10] ); | 
|  | R( e, a, b, c, d, F1, K1, x[11] ); | 
|  | R( d, e, a, b, c, F1, K1, x[12] ); | 
|  | R( c, d, e, a, b, F1, K1, x[13] ); | 
|  | R( b, c, d, e, a, F1, K1, x[14] ); | 
|  | R( a, b, c, d, e, F1, K1, x[15] ); | 
|  | R( e, a, b, c, d, F1, K1, M(16) ); | 
|  | R( d, e, a, b, c, F1, K1, M(17) ); | 
|  | R( c, d, e, a, b, F1, K1, M(18) ); | 
|  | R( b, c, d, e, a, F1, K1, M(19) ); | 
|  | R( a, b, c, d, e, F2, K2, M(20) ); | 
|  | R( e, a, b, c, d, F2, K2, M(21) ); | 
|  | R( d, e, a, b, c, F2, K2, M(22) ); | 
|  | R( c, d, e, a, b, F2, K2, M(23) ); | 
|  | R( b, c, d, e, a, F2, K2, M(24) ); | 
|  | R( a, b, c, d, e, F2, K2, M(25) ); | 
|  | R( e, a, b, c, d, F2, K2, M(26) ); | 
|  | R( d, e, a, b, c, F2, K2, M(27) ); | 
|  | R( c, d, e, a, b, F2, K2, M(28) ); | 
|  | R( b, c, d, e, a, F2, K2, M(29) ); | 
|  | R( a, b, c, d, e, F2, K2, M(30) ); | 
|  | R( e, a, b, c, d, F2, K2, M(31) ); | 
|  | R( d, e, a, b, c, F2, K2, M(32) ); | 
|  | R( c, d, e, a, b, F2, K2, M(33) ); | 
|  | R( b, c, d, e, a, F2, K2, M(34) ); | 
|  | R( a, b, c, d, e, F2, K2, M(35) ); | 
|  | R( e, a, b, c, d, F2, K2, M(36) ); | 
|  | R( d, e, a, b, c, F2, K2, M(37) ); | 
|  | R( c, d, e, a, b, F2, K2, M(38) ); | 
|  | R( b, c, d, e, a, F2, K2, M(39) ); | 
|  | R( a, b, c, d, e, F3, K3, M(40) ); | 
|  | R( e, a, b, c, d, F3, K3, M(41) ); | 
|  | R( d, e, a, b, c, F3, K3, M(42) ); | 
|  | R( c, d, e, a, b, F3, K3, M(43) ); | 
|  | R( b, c, d, e, a, F3, K3, M(44) ); | 
|  | R( a, b, c, d, e, F3, K3, M(45) ); | 
|  | R( e, a, b, c, d, F3, K3, M(46) ); | 
|  | R( d, e, a, b, c, F3, K3, M(47) ); | 
|  | R( c, d, e, a, b, F3, K3, M(48) ); | 
|  | R( b, c, d, e, a, F3, K3, M(49) ); | 
|  | R( a, b, c, d, e, F3, K3, M(50) ); | 
|  | R( e, a, b, c, d, F3, K3, M(51) ); | 
|  | R( d, e, a, b, c, F3, K3, M(52) ); | 
|  | R( c, d, e, a, b, F3, K3, M(53) ); | 
|  | R( b, c, d, e, a, F3, K3, M(54) ); | 
|  | R( a, b, c, d, e, F3, K3, M(55) ); | 
|  | R( e, a, b, c, d, F3, K3, M(56) ); | 
|  | R( d, e, a, b, c, F3, K3, M(57) ); | 
|  | R( c, d, e, a, b, F3, K3, M(58) ); | 
|  | R( b, c, d, e, a, F3, K3, M(59) ); | 
|  | R( a, b, c, d, e, F4, K4, M(60) ); | 
|  | R( e, a, b, c, d, F4, K4, M(61) ); | 
|  | R( d, e, a, b, c, F4, K4, M(62) ); | 
|  | R( c, d, e, a, b, F4, K4, M(63) ); | 
|  | R( b, c, d, e, a, F4, K4, M(64) ); | 
|  | R( a, b, c, d, e, F4, K4, M(65) ); | 
|  | R( e, a, b, c, d, F4, K4, M(66) ); | 
|  | R( d, e, a, b, c, F4, K4, M(67) ); | 
|  | R( c, d, e, a, b, F4, K4, M(68) ); | 
|  | R( b, c, d, e, a, F4, K4, M(69) ); | 
|  | R( a, b, c, d, e, F4, K4, M(70) ); | 
|  | R( e, a, b, c, d, F4, K4, M(71) ); | 
|  | R( d, e, a, b, c, F4, K4, M(72) ); | 
|  | R( c, d, e, a, b, F4, K4, M(73) ); | 
|  | R( b, c, d, e, a, F4, K4, M(74) ); | 
|  | R( a, b, c, d, e, F4, K4, M(75) ); | 
|  | R( e, a, b, c, d, F4, K4, M(76) ); | 
|  | R( d, e, a, b, c, F4, K4, M(77) ); | 
|  | R( c, d, e, a, b, F4, K4, M(78) ); | 
|  | R( b, c, d, e, a, F4, K4, M(79) ); | 
|  |  | 
|  | a = ctx->A += a; | 
|  | b = ctx->B += b; | 
|  | c = ctx->C += c; | 
|  | d = ctx->D += d; | 
|  | e = ctx->E += e; | 
|  | } | 
|  | } | 
|  |  | 
|  | #if defined(HAVE_X86_SHA1_HW_SUPPORT) | 
|  | /* HW specific version of sha1_process_bytes.  */ | 
|  |  | 
|  | static void sha1_hw_process_block (const void *, size_t, struct sha1_ctx *); | 
|  |  | 
|  | static void | 
|  | sha1_hw_process_bytes (const void *buffer, size_t len, struct sha1_ctx *ctx) | 
|  | { | 
|  | /* When we already have some bits in our internal buffer concatenate | 
|  | both inputs first.  */ | 
|  | if (ctx->buflen != 0) | 
|  | { | 
|  | size_t left_over = ctx->buflen; | 
|  | size_t add = 128 - left_over > len ? len : 128 - left_over; | 
|  |  | 
|  | memcpy (&((char *) ctx->buffer)[left_over], buffer, add); | 
|  | ctx->buflen += add; | 
|  |  | 
|  | if (ctx->buflen > 64) | 
|  | { | 
|  | sha1_hw_process_block (ctx->buffer, ctx->buflen & ~63, ctx); | 
|  |  | 
|  | ctx->buflen &= 63; | 
|  | /* The regions in the following copy operation cannot overlap.  */ | 
|  | memcpy (ctx->buffer, | 
|  | &((char *) ctx->buffer)[(left_over + add) & ~63], | 
|  | ctx->buflen); | 
|  | } | 
|  |  | 
|  | buffer = (const char *) buffer + add; | 
|  | len -= add; | 
|  | } | 
|  |  | 
|  | /* Process available complete blocks.  */ | 
|  | if (len >= 64) | 
|  | { | 
|  | #if !_STRING_ARCH_unaligned | 
|  | # define alignof(type) offsetof (struct { char c; type x; }, x) | 
|  | # define UNALIGNED_P(p) (((size_t) p) % alignof (sha1_uint32) != 0) | 
|  | if (UNALIGNED_P (buffer)) | 
|  | while (len > 64) | 
|  | { | 
|  | sha1_hw_process_block (memcpy (ctx->buffer, buffer, 64), 64, ctx); | 
|  | buffer = (const char *) buffer + 64; | 
|  | len -= 64; | 
|  | } | 
|  | else | 
|  | #endif | 
|  | { | 
|  | sha1_hw_process_block (buffer, len & ~63, ctx); | 
|  | buffer = (const char *) buffer + (len & ~63); | 
|  | len &= 63; | 
|  | } | 
|  | } | 
|  |  | 
|  | /* Move remaining bytes in internal buffer.  */ | 
|  | if (len > 0) | 
|  | { | 
|  | size_t left_over = ctx->buflen; | 
|  |  | 
|  | memcpy (&((char *) ctx->buffer)[left_over], buffer, len); | 
|  | left_over += len; | 
|  | if (left_over >= 64) | 
|  | { | 
|  | sha1_hw_process_block (ctx->buffer, 64, ctx); | 
|  | left_over -= 64; | 
|  | memmove (ctx->buffer, &ctx->buffer[16], left_over); | 
|  | } | 
|  | ctx->buflen = left_over; | 
|  | } | 
|  | } | 
|  |  | 
|  | /* Process LEN bytes of BUFFER, accumulating context into CTX. | 
|  | Using CPU specific intrinsics.  */ | 
|  |  | 
|  | #ifdef HAVE_X86_SHA1_HW_SUPPORT | 
|  | __attribute__((__target__ ("sse4.1,sha"))) | 
|  | #endif | 
|  | static void | 
|  | sha1_hw_process_block (const void *buffer, size_t len, struct sha1_ctx *ctx) | 
|  | { | 
|  | #ifdef HAVE_X86_SHA1_HW_SUPPORT | 
|  | /* Implemented from | 
|  | https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sha-extensions.html  */ | 
|  | const __m128i *words = (const __m128i *) buffer; | 
|  | const __m128i *endp = (const __m128i *) ((const char *) buffer + len); | 
|  | __m128i abcd, abcd_save, e0, e0_save, e1, msg0, msg1, msg2, msg3; | 
|  | const __m128i shuf_mask | 
|  | = _mm_set_epi64x (0x0001020304050607ULL, 0x08090a0b0c0d0e0fULL); | 
|  | char check[((offsetof (struct sha1_ctx, B) | 
|  | == offsetof (struct sha1_ctx, A) + sizeof (ctx->A)) | 
|  | && (offsetof (struct sha1_ctx, C) | 
|  | == offsetof (struct sha1_ctx, A) + 2 * sizeof (ctx->A)) | 
|  | && (offsetof (struct sha1_ctx, D) | 
|  | == offsetof (struct sha1_ctx, A) + 3 * sizeof (ctx->A))) | 
|  | ? 1 : -1]; | 
|  |  | 
|  | /* First increment the byte count.  RFC 1321 specifies the possible | 
|  | length of the file up to 2^64 bits.  Here we only compute the | 
|  | number of bytes.  Do a double word increment.  */ | 
|  | ctx->total[0] += len; | 
|  | ctx->total[1] += ((len >> 31) >> 1) + (ctx->total[0] < len); | 
|  |  | 
|  | (void) &check[0]; | 
|  | abcd = _mm_loadu_si128 ((const __m128i *) &ctx->A); | 
|  | e0 = _mm_set_epi32 (ctx->E, 0, 0, 0); | 
|  | abcd = _mm_shuffle_epi32 (abcd, 0x1b); /* 0, 1, 2, 3 */ | 
|  |  | 
|  | while (words < endp) | 
|  | { | 
|  | abcd_save = abcd; | 
|  | e0_save = e0; | 
|  |  | 
|  | /* 0..3 */ | 
|  | msg0 = _mm_loadu_si128 (words); | 
|  | msg0 = _mm_shuffle_epi8 (msg0, shuf_mask); | 
|  | e0 = _mm_add_epi32 (e0, msg0); | 
|  | e1 = abcd; | 
|  | abcd = _mm_sha1rnds4_epu32 (abcd, e0, 0); | 
|  |  | 
|  | /* 4..7 */ | 
|  | msg1 = _mm_loadu_si128 (words + 1); | 
|  | msg1 = _mm_shuffle_epi8 (msg1, shuf_mask); | 
|  | e1 = _mm_sha1nexte_epu32 (e1, msg1); | 
|  | e0 = abcd; | 
|  | abcd = _mm_sha1rnds4_epu32 (abcd, e1, 0); | 
|  | msg0 = _mm_sha1msg1_epu32 (msg0, msg1); | 
|  |  | 
|  | /* 8..11 */ | 
|  | msg2 = _mm_loadu_si128 (words + 2); | 
|  | msg2 = _mm_shuffle_epi8 (msg2, shuf_mask); | 
|  | e0 = _mm_sha1nexte_epu32 (e0, msg2); | 
|  | e1 = abcd; | 
|  | abcd = _mm_sha1rnds4_epu32 (abcd, e0, 0); | 
|  | msg1 = _mm_sha1msg1_epu32 (msg1, msg2); | 
|  | msg0 = _mm_xor_si128 (msg0, msg2); | 
|  |  | 
|  | /* 12..15 */ | 
|  | msg3 = _mm_loadu_si128 (words + 3); | 
|  | msg3 = _mm_shuffle_epi8 (msg3, shuf_mask); | 
|  | e1 = _mm_sha1nexte_epu32 (e1, msg3); | 
|  | e0 = abcd; | 
|  | msg0 = _mm_sha1msg2_epu32 (msg0, msg3); | 
|  | abcd = _mm_sha1rnds4_epu32 (abcd, e1, 0); | 
|  | msg2 = _mm_sha1msg1_epu32 (msg2, msg3); | 
|  | msg1 = _mm_xor_si128 (msg1, msg3); | 
|  |  | 
|  | /* 16..19 */ | 
|  | e0 = _mm_sha1nexte_epu32 (e0, msg0); | 
|  | e1 = abcd; | 
|  | msg1 = _mm_sha1msg2_epu32 (msg1, msg0); | 
|  | abcd = _mm_sha1rnds4_epu32 (abcd, e0, 0); | 
|  | msg3 = _mm_sha1msg1_epu32 (msg3, msg0); | 
|  | msg2 = _mm_xor_si128 (msg2, msg0); | 
|  |  | 
|  | /* 20..23 */ | 
|  | e1 = _mm_sha1nexte_epu32 (e1, msg1); | 
|  | e0 = abcd; | 
|  | msg2 = _mm_sha1msg2_epu32 (msg2, msg1); | 
|  | abcd = _mm_sha1rnds4_epu32 (abcd, e1, 1); | 
|  | msg0 = _mm_sha1msg1_epu32 (msg0, msg1); | 
|  | msg3 = _mm_xor_si128 (msg3, msg1); | 
|  |  | 
|  | /* 24..27 */ | 
|  | e0 = _mm_sha1nexte_epu32 (e0, msg2); | 
|  | e1 = abcd; | 
|  | msg3 = _mm_sha1msg2_epu32 (msg3, msg2); | 
|  | abcd = _mm_sha1rnds4_epu32 (abcd, e0, 1); | 
|  | msg1 = _mm_sha1msg1_epu32 (msg1, msg2); | 
|  | msg0 = _mm_xor_si128 (msg0, msg2); | 
|  |  | 
|  | /* 28..31 */ | 
|  | e1 = _mm_sha1nexte_epu32 (e1, msg3); | 
|  | e0 = abcd; | 
|  | msg0 = _mm_sha1msg2_epu32 (msg0, msg3); | 
|  | abcd = _mm_sha1rnds4_epu32 (abcd, e1, 1); | 
|  | msg2 = _mm_sha1msg1_epu32 (msg2, msg3); | 
|  | msg1 = _mm_xor_si128 (msg1, msg3); | 
|  |  | 
|  | /* 32..35 */ | 
|  | e0 = _mm_sha1nexte_epu32 (e0, msg0); | 
|  | e1 = abcd; | 
|  | msg1 = _mm_sha1msg2_epu32 (msg1, msg0); | 
|  | abcd = _mm_sha1rnds4_epu32 (abcd, e0, 1); | 
|  | msg3 = _mm_sha1msg1_epu32 (msg3, msg0); | 
|  | msg2 = _mm_xor_si128 (msg2, msg0); | 
|  |  | 
|  | /* 36..39 */ | 
|  | e1 = _mm_sha1nexte_epu32 (e1, msg1); | 
|  | e0 = abcd; | 
|  | msg2 = _mm_sha1msg2_epu32 (msg2, msg1); | 
|  | abcd = _mm_sha1rnds4_epu32 (abcd, e1, 1); | 
|  | msg0 = _mm_sha1msg1_epu32 (msg0, msg1); | 
|  | msg3 = _mm_xor_si128 (msg3, msg1); | 
|  |  | 
|  | /* 40..43 */ | 
|  | e0 = _mm_sha1nexte_epu32 (e0, msg2); | 
|  | e1 = abcd; | 
|  | msg3 = _mm_sha1msg2_epu32 (msg3, msg2); | 
|  | abcd = _mm_sha1rnds4_epu32 (abcd, e0, 2); | 
|  | msg1 = _mm_sha1msg1_epu32 (msg1, msg2); | 
|  | msg0 = _mm_xor_si128 (msg0, msg2); | 
|  |  | 
|  | /* 44..47 */ | 
|  | e1 = _mm_sha1nexte_epu32 (e1, msg3); | 
|  | e0 = abcd; | 
|  | msg0 = _mm_sha1msg2_epu32 (msg0, msg3); | 
|  | abcd = _mm_sha1rnds4_epu32 (abcd, e1, 2); | 
|  | msg2 = _mm_sha1msg1_epu32 (msg2, msg3); | 
|  | msg1 = _mm_xor_si128 (msg1, msg3); | 
|  |  | 
|  | /* 48..51 */ | 
|  | e0 = _mm_sha1nexte_epu32 (e0, msg0); | 
|  | e1 = abcd; | 
|  | msg1 = _mm_sha1msg2_epu32 (msg1, msg0); | 
|  | abcd = _mm_sha1rnds4_epu32 (abcd, e0, 2); | 
|  | msg3 = _mm_sha1msg1_epu32 (msg3, msg0); | 
|  | msg2 = _mm_xor_si128 (msg2, msg0); | 
|  |  | 
|  | /* 52..55 */ | 
|  | e1 = _mm_sha1nexte_epu32 (e1, msg1); | 
|  | e0 = abcd; | 
|  | msg2 = _mm_sha1msg2_epu32 (msg2, msg1); | 
|  | abcd = _mm_sha1rnds4_epu32 (abcd, e1, 2); | 
|  | msg0 = _mm_sha1msg1_epu32 (msg0, msg1); | 
|  | msg3 = _mm_xor_si128 (msg3, msg1); | 
|  |  | 
|  | /* 56..59 */ | 
|  | e0 = _mm_sha1nexte_epu32 (e0, msg2); | 
|  | e1 = abcd; | 
|  | msg3 = _mm_sha1msg2_epu32 (msg3, msg2); | 
|  | abcd = _mm_sha1rnds4_epu32 (abcd, e0, 2); | 
|  | msg1 = _mm_sha1msg1_epu32 (msg1, msg2); | 
|  | msg0 = _mm_xor_si128 (msg0, msg2); | 
|  |  | 
|  | /* 60..63 */ | 
|  | e1 = _mm_sha1nexte_epu32 (e1, msg3); | 
|  | e0 = abcd; | 
|  | msg0 = _mm_sha1msg2_epu32 (msg0, msg3); | 
|  | abcd = _mm_sha1rnds4_epu32 (abcd, e1, 3); | 
|  | msg2 = _mm_sha1msg1_epu32 (msg2, msg3); | 
|  | msg1 = _mm_xor_si128 (msg1, msg3); | 
|  |  | 
|  | /* 64..67 */ | 
|  | e0 = _mm_sha1nexte_epu32 (e0, msg0); | 
|  | e1 = abcd; | 
|  | msg1 = _mm_sha1msg2_epu32 (msg1, msg0); | 
|  | abcd = _mm_sha1rnds4_epu32 (abcd, e0, 3); | 
|  | msg3 = _mm_sha1msg1_epu32 (msg3, msg0); | 
|  | msg2 = _mm_xor_si128 (msg2, msg0); | 
|  |  | 
|  | /* 68..71 */ | 
|  | e1 = _mm_sha1nexte_epu32 (e1, msg1); | 
|  | e0 = abcd; | 
|  | msg2 = _mm_sha1msg2_epu32 (msg2, msg1); | 
|  | abcd = _mm_sha1rnds4_epu32 (abcd, e1, 3); | 
|  | msg3 = _mm_xor_si128 (msg3, msg1); | 
|  |  | 
|  | /* 72..75 */ | 
|  | e0 = _mm_sha1nexte_epu32 (e0, msg2); | 
|  | e1 = abcd; | 
|  | msg3 = _mm_sha1msg2_epu32 (msg3, msg2); | 
|  | abcd = _mm_sha1rnds4_epu32 (abcd, e0, 3); | 
|  |  | 
|  | /* 76..79 */ | 
|  | e1 = _mm_sha1nexte_epu32 (e1, msg3); | 
|  | e0 = abcd; | 
|  | abcd = _mm_sha1rnds4_epu32 (abcd, e1, 3); | 
|  |  | 
|  | /* Finalize. */ | 
|  | e0 = _mm_sha1nexte_epu32 (e0, e0_save); | 
|  | abcd = _mm_add_epi32 (abcd, abcd_save); | 
|  |  | 
|  | words = words + 4; | 
|  | } | 
|  |  | 
|  | abcd = _mm_shuffle_epi32 (abcd, 0x1b); /* 0, 1, 2, 3 */ | 
|  | _mm_storeu_si128 ((__m128i *) &ctx->A, abcd); | 
|  | ctx->E = _mm_extract_epi32 (e0, 3); | 
|  | #endif | 
|  | } | 
|  | #endif | 
|  |  | 
|  | /* Return sha1_process_bytes or some hardware optimized version thereof | 
|  | depending on current CPU.  */ | 
|  |  | 
|  | sha1_process_bytes_fn | 
|  | sha1_choose_process_bytes (void) | 
|  | { | 
|  | #ifdef HAVE_X86_SHA1_HW_SUPPORT | 
|  | unsigned int eax, ebx, ecx, edx; | 
|  | if (__get_cpuid_count (7, 0, &eax, &ebx, &ecx, &edx) | 
|  | && (ebx & bit_SHA) != 0 | 
|  | && __get_cpuid (1, &eax, &ebx, &ecx, &edx) | 
|  | && (ecx & bit_SSE4_1) != 0) | 
|  | return sha1_hw_process_bytes; | 
|  | #endif | 
|  | return sha1_process_bytes; | 
|  | } |