forked from zlib-ng/zlib-ng
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Move crc32 C fallbacks to arch/generic
- Loading branch information
Showing
18 changed files
with
301 additions
and
263 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,235 @@ | ||
/* crc32_braid.c -- compute the CRC-32 of a data stream | ||
* Copyright (C) 1995-2022 Mark Adler | ||
* For conditions of distribution and use, see copyright notice in zlib.h | ||
* | ||
* This interleaved implementation of a CRC makes use of pipelined multiple | ||
* arithmetic-logic units, commonly found in modern CPU cores. It is due to | ||
* Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution. | ||
*/ | ||
|
||
#include "zbuild.h" | ||
#include "crc32_braid_p.h" | ||
#include "crc32_braid_tbl.h" | ||
|
||
/* ========================================================================= */ | ||
|
||
/* | ||
A CRC of a message is computed on N braids of words in the message, where | ||
each word consists of W bytes (4 or 8). If N is 3, for example, then three | ||
running sparse CRCs are calculated respectively on each braid, at these | ||
indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ... | ||
This is done starting at a word boundary, and continues until as many blocks | ||
of N * W bytes as are available have been processed. The results are combined | ||
into a single CRC at the end. For this code, N must be in the range 1..6 and | ||
W must be 4 or 8. The upper limit on N can be increased if desired by adding | ||
more #if blocks, extending the patterns apparent in the code. In addition, | ||
crc32 tables would need to be regenerated, if the maximum N value is increased. | ||
N and W are chosen empirically by benchmarking the execution time on a given | ||
processor. The choices for N and W below were based on testing on Intel Kaby | ||
Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC POWER9, and MIPS64 | ||
Octeon II processors. The Intel, AMD, and ARM processors were all fastest | ||
with N=5, W=8. The Sparc, PowerPC, and MIPS64 were all fastest at N=5, W=4. | ||
They were all tested with either gcc or clang, all using the -O3 optimization | ||
level. Your mileage may vary. | ||
*/ | ||
|
||
/* ========================================================================= */ | ||
|
||
#if BYTE_ORDER == LITTLE_ENDIAN | ||
# define ZSWAPWORD(word) (word) | ||
# define BRAID_TABLE crc_braid_table | ||
#elif BYTE_ORDER == BIG_ENDIAN | ||
# if W == 8 | ||
# define ZSWAPWORD(word) ZSWAP64(word) | ||
# elif W == 4 | ||
# define ZSWAPWORD(word) ZSWAP32(word) | ||
# endif | ||
# define BRAID_TABLE crc_braid_big_table | ||
#else | ||
# error "No endian defined" | ||
#endif | ||
#define DO1 c = crc_table[(c ^ *buf++) & 0xff] ^ (c >> 8) | ||
#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1 | ||
|
||
/* ========================================================================= */ | ||
#ifdef W | ||
/* | ||
Return the CRC of the W bytes in the word_t data, taking the | ||
least-significant byte of the word as the first byte of data, without any pre | ||
or post conditioning. This is used to combine the CRCs of each braid. | ||
*/ | ||
#if BYTE_ORDER == LITTLE_ENDIAN | ||
static uint32_t crc_word(z_word_t data) { | ||
int k; | ||
for (k = 0; k < W; k++) | ||
data = (data >> 8) ^ crc_table[data & 0xff]; | ||
return (uint32_t)data; | ||
} | ||
#elif BYTE_ORDER == BIG_ENDIAN | ||
static z_word_t crc_word(z_word_t data) { | ||
int k; | ||
for (k = 0; k < W; k++) | ||
data = (data << 8) ^ | ||
crc_big_table[(data >> ((W - 1) << 3)) & 0xff]; | ||
return data; | ||
} | ||
#endif /* BYTE_ORDER */ | ||
|
||
#endif /* W */ | ||
|
||
/* ========================================================================= */ | ||
Z_INTERNAL uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, size_t len) { | ||
Z_REGISTER uint32_t c; | ||
|
||
/* Pre-condition the CRC */ | ||
c = (~crc) & 0xffffffff; | ||
|
||
#ifdef W | ||
/* If provided enough bytes, do a braided CRC calculation. */ | ||
if (len >= N * W + W - 1) { | ||
size_t blks; | ||
z_word_t const *words; | ||
int k; | ||
|
||
/* Compute the CRC up to a z_word_t boundary. */ | ||
while (len && ((uintptr_t)buf & (W - 1)) != 0) { | ||
len--; | ||
DO1; | ||
} | ||
|
||
/* Compute the CRC on as many N z_word_t blocks as are available. */ | ||
blks = len / (N * W); | ||
len -= blks * N * W; | ||
words = (z_word_t const *)buf; | ||
|
||
z_word_t crc0, word0, comb; | ||
#if N > 1 | ||
z_word_t crc1, word1; | ||
#if N > 2 | ||
z_word_t crc2, word2; | ||
#if N > 3 | ||
z_word_t crc3, word3; | ||
#if N > 4 | ||
z_word_t crc4, word4; | ||
#if N > 5 | ||
z_word_t crc5, word5; | ||
#endif | ||
#endif | ||
#endif | ||
#endif | ||
#endif | ||
/* Initialize the CRC for each braid. */ | ||
crc0 = ZSWAPWORD(c); | ||
#if N > 1 | ||
crc1 = 0; | ||
#if N > 2 | ||
crc2 = 0; | ||
#if N > 3 | ||
crc3 = 0; | ||
#if N > 4 | ||
crc4 = 0; | ||
#if N > 5 | ||
crc5 = 0; | ||
#endif | ||
#endif | ||
#endif | ||
#endif | ||
#endif | ||
/* Process the first blks-1 blocks, computing the CRCs on each braid independently. */ | ||
while (--blks) { | ||
/* Load the word for each braid into registers. */ | ||
word0 = crc0 ^ words[0]; | ||
#if N > 1 | ||
word1 = crc1 ^ words[1]; | ||
#if N > 2 | ||
word2 = crc2 ^ words[2]; | ||
#if N > 3 | ||
word3 = crc3 ^ words[3]; | ||
#if N > 4 | ||
word4 = crc4 ^ words[4]; | ||
#if N > 5 | ||
word5 = crc5 ^ words[5]; | ||
#endif | ||
#endif | ||
#endif | ||
#endif | ||
#endif | ||
words += N; | ||
|
||
/* Compute and update the CRC for each word. The loop should get unrolled. */ | ||
crc0 = BRAID_TABLE[0][word0 & 0xff]; | ||
#if N > 1 | ||
crc1 = BRAID_TABLE[0][word1 & 0xff]; | ||
#if N > 2 | ||
crc2 = BRAID_TABLE[0][word2 & 0xff]; | ||
#if N > 3 | ||
crc3 = BRAID_TABLE[0][word3 & 0xff]; | ||
#if N > 4 | ||
crc4 = BRAID_TABLE[0][word4 & 0xff]; | ||
#if N > 5 | ||
crc5 = BRAID_TABLE[0][word5 & 0xff]; | ||
#endif | ||
#endif | ||
#endif | ||
#endif | ||
#endif | ||
for (k = 1; k < W; k++) { | ||
crc0 ^= BRAID_TABLE[k][(word0 >> (k << 3)) & 0xff]; | ||
#if N > 1 | ||
crc1 ^= BRAID_TABLE[k][(word1 >> (k << 3)) & 0xff]; | ||
#if N > 2 | ||
crc2 ^= BRAID_TABLE[k][(word2 >> (k << 3)) & 0xff]; | ||
#if N > 3 | ||
crc3 ^= BRAID_TABLE[k][(word3 >> (k << 3)) & 0xff]; | ||
#if N > 4 | ||
crc4 ^= BRAID_TABLE[k][(word4 >> (k << 3)) & 0xff]; | ||
#if N > 5 | ||
crc5 ^= BRAID_TABLE[k][(word5 >> (k << 3)) & 0xff]; | ||
#endif | ||
#endif | ||
#endif | ||
#endif | ||
#endif | ||
} | ||
} | ||
|
||
/* Process the last block, combining the CRCs of the N braids at the same time. */ | ||
comb = crc_word(crc0 ^ words[0]); | ||
#if N > 1 | ||
comb = crc_word(crc1 ^ words[1] ^ comb); | ||
#if N > 2 | ||
comb = crc_word(crc2 ^ words[2] ^ comb); | ||
#if N > 3 | ||
comb = crc_word(crc3 ^ words[3] ^ comb); | ||
#if N > 4 | ||
comb = crc_word(crc4 ^ words[4] ^ comb); | ||
#if N > 5 | ||
comb = crc_word(crc5 ^ words[5] ^ comb); | ||
#endif | ||
#endif | ||
#endif | ||
#endif | ||
#endif | ||
words += N; | ||
c = ZSWAPWORD(comb); | ||
|
||
/* Update the pointer to the remaining bytes to process. */ | ||
buf = (const unsigned char *)words; | ||
} | ||
|
||
#endif /* W */ | ||
|
||
/* Complete the computation of the CRC on any remaining bytes. */ | ||
while (len >= 8) { | ||
len -= 8; | ||
DO8; | ||
} | ||
while (len) { | ||
len--; | ||
DO1; | ||
} | ||
|
||
/* Return the CRC, post-conditioned. */ | ||
return c ^ 0xffffffff; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
/* crc32.h -- crc32 folding interface | ||
* Copyright (C) 2021 Nathan Moinvaziri | ||
* For conditions of distribution and use, see copyright notice in zlib.h | ||
*/ | ||
#ifndef CRC32_H_ | ||
#define CRC32_H_ | ||
|
||
#define CRC32_FOLD_BUFFER_SIZE (16 * 4) | ||
/* sizeof(__m128i) * (4 folds) */ | ||
|
||
typedef struct crc32_fold_s { | ||
uint8_t fold[CRC32_FOLD_BUFFER_SIZE]; | ||
uint32_t value; | ||
} crc32_fold; | ||
|
||
#endif |
Oops, something went wrong.