Skip to content

Commit

Permalink
fix aarch64 compile and fix previous llvm asm issue for crc64 (pingca…
Browse files Browse the repository at this point in the history
SchrodingerZhu authored Sep 10, 2021
1 parent bc9b63e commit c0d70d6
Showing 1 changed file with 12 additions and 18 deletions.
30 changes: 12 additions & 18 deletions libs/libcommon/include/common/crc64_arch/crc64_aarch64.h
Original file line number Diff line number Diff line change
@@ -4,17 +4,19 @@

#include <cstdint>
#include <cstring>
#include <utility>

namespace crc64::_detail
{
using simd_t = uint8x16_t;
typedef uint8x16_t simd_t __attribute__((__may_alias__));

class SIMD
{
public:
using Poly64Pair = std::pair<poly64_t, poly64_t>;

SIMD(uint64_t high, uint64_t low) noexcept
SIMD(uint64_t high, uint64_t low)
noexcept;

[[nodiscard]] SIMD fold16(SIMD coeff) const noexcept;

@@ -66,23 +68,15 @@ inline SIMD SIMD::fold8(uint64_t coeff) const noexcept

inline SIMD SIMD::fold16(SIMD coeff) const noexcept
{
/* GCC does not seem to use pmull2 properly and we hence use
* inline assembly to improve the performance.
* For macOS (aarch64), however, AppleClang does not recognize
* the assembly code `=w`, so we fallback to normal ways.
*/
#if defined(__APPLE__) || defined(__OSX__)
auto [x0, x1] = into_poly64pair();
auto [c0, c1] = coeff.into_poly64pair();
auto h = SIMD::from_mul(c0, x0);
auto l = SIMD::from_mul(c1, x1);
#else
/* GCC/Clang does not seem to use pmull2 properly and we hence use
* inline assembly to improve the performance.
*/

SIMD h(0, 0), l(0, 0);
asm("pmull %0.1q, %2.1d, %3.1d\n"
"pmull2 %1.1q, %2.2d, %3.2d"
: "=&w"(l), "=w"(h)
: "w"(*this), "w"(coeff));
#endif
asm("pmull\t%0.1q, %2.1d, %3.1d\n\t"
"pmull2\t%1.1q, %2.2d, %3.2d"
: "=&w"(l._inner), "=w"(h._inner)
: "w"(this->_inner), "w"(coeff._inner));
return h.bitxor(l);
}

0 comments on commit c0d70d6

Please sign in to comment.