openbsd-ports/databases/mariadb/patches/patch-storage_rocksdb_rocksdb_util_crc32c_arm64_cc
sthen 79f9ff8075 update to MariaDB 10.9.4, tidy arch-specific parts of the Makefile,
build rocksdb on other 64-bit LE archs, and add run-time hw feature
detection for OpenBSD/arm64. From Brad.
2022-11-09 12:12:44 +00:00

210 lines
6.1 KiB
Plaintext

- Check for sys/auxv.h
https://github.com/facebook/rocksdb/commit/8bbd76edbfd0c187960aae31d107a9a0fa71472c
- Fix RocksDB SIGILL error on Raspberry PI 4
https://github.com/facebook/rocksdb/commit/29f7bbef995bdf83098963799c66af742e95373f
- Fix compilation on Apple Silicon
https://github.com/facebook/rocksdb/commit/ee4bd4780b321ddb5f92a0f4eb956f2a2ebd60dc
- Adding ARM AT_HWCAP support for FreeBSD
https://github.com/facebook/rocksdb/commit/93c6c18cf9731f47b386dd445dc13363423c507e
- Add OpenBSD/arm64 support for detection of CRC32 and PMULL
https://github.com/facebook/rocksdb/commit/4a6906e28ce058f6cea085b394a1adbd19aa468e
https://jira.mariadb.org/browse/MDEV-29875
Index: storage/rocksdb/rocksdb/util/crc32c_arm64.cc
--- storage/rocksdb/rocksdb/util/crc32c_arm64.cc.orig
+++ storage/rocksdb/rocksdb/util/crc32c_arm64.cc
@@ -5,13 +5,26 @@
#include "util/crc32c_arm64.h"
-#if defined(__linux__) && defined(HAVE_ARM64_CRC)
+#if defined(HAVE_ARM64_CRC)
+#if defined(__linux__)
#include <asm/hwcap.h>
+#endif
+#ifdef ROCKSDB_AUXV_GETAUXVAL_PRESENT
#include <sys/auxv.h>
+#endif
#ifndef HWCAP_CRC32
#define HWCAP_CRC32 (1 << 7)
#endif
+#ifndef HWCAP_PMULL
+#define HWCAP_PMULL (1 << 4)
+#endif
+#if defined(__OpenBSD__)
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#include <machine/cpu.h>
+#include <machine/armreg.h>
+#endif
#ifdef HAVE_ARM64_CRYPTO
/* unfolding to compute 8 * 3 = 24 bytes parallelly */
@@ -33,18 +46,72 @@
} while (0)
#endif
+extern bool pmull_runtime_flag;
+
uint32_t crc32c_runtime_check(void) {
- uint64_t auxv = getauxval(AT_HWCAP);
+#if defined(ROCKSDB_AUXV_GETAUXVAL_PRESENT) || defined(__FreeBSD__)
+ uint64_t auxv = 0;
+#if defined(ROCKSDB_AUXV_GETAUXVAL_PRESENT)
+ auxv = getauxval(AT_HWCAP);
+#elif defined(__FreeBSD__)
+ elf_aux_info(AT_HWCAP, &auxv, sizeof(auxv));
+#endif
return (auxv & HWCAP_CRC32) != 0;
+#elif defined(__OpenBSD__)
+ int r = 0;
+ const int isar0_mib[] = { CTL_MACHDEP, CPU_ID_AA64ISAR0 };
+ uint64_t isar0;
+ size_t len = sizeof(isar0);
+
+ if (sysctl(isar0_mib, 2, &isar0, &len, NULL, 0) != -1) {
+ if (ID_AA64ISAR0_CRC32(isar0) >= ID_AA64ISAR0_CRC32_BASE)
+ r = 1;
+ }
+ return r;
+#else
+ return 0;
+#endif
}
-uint32_t crc32c_arm64(uint32_t crc, unsigned char const *data,
- unsigned len) {
+bool crc32c_pmull_runtime_check(void) {
+#if defined(ROCKSDB_AUXV_GETAUXVAL_PRESENT) || defined(__FreeBSD__)
+ uint64_t auxv = 0;
+#if defined(ROCKSDB_AUXV_GETAUXVAL_PRESENT)
+ auxv = getauxval(AT_HWCAP);
+#elif defined(__FreeBSD__)
+ elf_aux_info(AT_HWCAP, &auxv, sizeof(auxv));
+#endif
+ return (auxv & HWCAP_PMULL) != 0;
+#elif defined(__OpenBSD__)
+ bool r = false;
+ const int isar0_mib[] = { CTL_MACHDEP, CPU_ID_AA64ISAR0 };
+ uint64_t isar0;
+ size_t len = sizeof(isar0);
+
+ if (sysctl(isar0_mib, 2, &isar0, &len, NULL, 0) != -1) {
+ if (ID_AA64ISAR0_AES(isar0) >= ID_AA64ISAR0_AES_PMULL)
+ r = true;
+ }
+ return r;
+#else
+ return false;
+#endif
+}
+
+uint32_t
+crc32c_arm64(uint32_t crc, unsigned char const *data, size_t len) {
const uint8_t *buf8;
const uint64_t *buf64 = (uint64_t *)data;
int length = (int)len;
crc ^= 0xffffffff;
+ /*
+ * Pmull runtime check here.
+ * Raspberry Pi supports crc32 but doesn't support pmull.
+ * Skip Crc32c Parallel computation if no crypto extension available.
+ */
+ if (pmull_runtime_flag) {
+/* Macro (HAVE_ARM64_CRYPTO) is used for compiling check */
#ifdef HAVE_ARM64_CRYPTO
/* Crc32c Parallel computation
* Algorithm comes from Intel whitepaper:
@@ -55,51 +122,53 @@ uint32_t crc32c_arm64(uint32_t crc, unsigned char cons
* One Block: 42(BLK_LENGTH) * 8(step length: crc32c_u64) bytes
*/
#define BLK_LENGTH 42
- while (length >= 1024) {
- uint64_t t0, t1;
- uint32_t crc0 = 0, crc1 = 0, crc2 = 0;
+ while (length >= 1024) {
+ uint64_t t0, t1;
+ uint32_t crc0 = 0, crc1 = 0, crc2 = 0;
- /* Parallel Param:
- * k0 = CRC32(x ^ (42 * 8 * 8 * 2 - 1));
- * k1 = CRC32(x ^ (42 * 8 * 8 - 1));
- */
- uint32_t k0 = 0xe417f38a, k1 = 0x8f158014;
+ /* Parallel Param:
+ * k0 = CRC32(x ^ (42 * 8 * 8 * 2 - 1));
+ * k1 = CRC32(x ^ (42 * 8 * 8 - 1));
+ */
+ uint32_t k0 = 0xe417f38a, k1 = 0x8f158014;
- /* Prefetch data for following block to avoid cache miss */
- PREF1KL1((uint8_t *)buf64, 1024);
+ /* Prefetch data for following block to avoid cache miss */
+ PREF1KL1((uint8_t *)buf64, 1024);
- /* First 8 byte for better pipelining */
- crc0 = crc32c_u64(crc, *buf64++);
+ /* First 8 byte for better pipelining */
+ crc0 = crc32c_u64(crc, *buf64++);
- /* 3 blocks crc32c parallel computation
- * Macro unfolding to compute parallelly
- * 168 * 6 = 1008 (bytes)
- */
- CRC32C7X24BYTES(0);
- CRC32C7X24BYTES(1);
- CRC32C7X24BYTES(2);
- CRC32C7X24BYTES(3);
- CRC32C7X24BYTES(4);
- CRC32C7X24BYTES(5);
- buf64 += (BLK_LENGTH * 3);
+ /* 3 blocks crc32c parallel computation
+ * Macro unfolding to compute parallelly
+ * 168 * 6 = 1008 (bytes)
+ */
+ CRC32C7X24BYTES(0);
+ CRC32C7X24BYTES(1);
+ CRC32C7X24BYTES(2);
+ CRC32C7X24BYTES(3);
+ CRC32C7X24BYTES(4);
+ CRC32C7X24BYTES(5);
+ buf64 += (BLK_LENGTH * 3);
- /* Last 8 bytes */
- crc = crc32c_u64(crc2, *buf64++);
+ /* Last 8 bytes */
+ crc = crc32c_u64(crc2, *buf64++);
- t0 = (uint64_t)vmull_p64(crc0, k0);
- t1 = (uint64_t)vmull_p64(crc1, k1);
+ t0 = (uint64_t)vmull_p64(crc0, k0);
+ t1 = (uint64_t)vmull_p64(crc1, k1);
- /* Merge (crc0, crc1, crc2) -> crc */
- crc1 = crc32c_u64(0, t1);
- crc ^= crc1;
- crc0 = crc32c_u64(0, t0);
- crc ^= crc0;
+ /* Merge (crc0, crc1, crc2) -> crc */
+ crc1 = crc32c_u64(0, t1);
+ crc ^= crc1;
+ crc0 = crc32c_u64(0, t0);
+ crc ^= crc0;
- length -= 1024;
- }
+ length -= 1024;
+ }
- if (length == 0) return crc ^ (0xffffffffU);
+ if (length == 0) return crc ^ (0xffffffffU);
#endif
+ } // if Pmull runtime check here
+
buf8 = (const uint8_t *)buf64;
while (length >= 8) {
crc = crc32c_u64(crc, *(const uint64_t *)buf8);