You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

133 regels
3.6 KiB

  1. // Copyright 2016 The LevelDB Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style license that can be
  3. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  4. //
  5. // A portable implementation of crc32c, optimized to handle
  6. // four bytes at a time.
  7. //
  8. // In a separate source file to allow this accelerated CRC32C function to be
  9. // compiled with the appropriate compiler flags to enable x86 SSE 4.2
  10. // instructions.
  11. #include <stdint.h>
  12. #include <string.h>
  13. #include "port/port.h"
  14. #if defined(LEVELDB_PLATFORM_POSIX_SSE)
  15. #if defined(_MSC_VER)
  16. #include <intrin.h>
  17. #elif defined(__GNUC__) && defined(__SSE4_2__)
  18. #include <nmmintrin.h>
  19. #include <cpuid.h>
  20. #endif
  21. #endif // defined(LEVELDB_PLATFORM_POSIX_SSE)
  22. namespace leveldb {
  23. namespace port {
  24. #if defined(LEVELDB_PLATFORM_POSIX_SSE)
  25. // Used to fetch a naturally-aligned 32-bit word in little endian byte-order
  26. static inline uint32_t LE_LOAD32(const uint8_t *p) {
  27. // SSE is x86 only, so ensured that |p| is always little-endian.
  28. uint32_t word;
  29. memcpy(&word, p, sizeof(word));
  30. return word;
  31. }
  32. #if defined(_M_X64) || defined(__x86_64__) // LE_LOAD64 is only used on x64.
  33. // Used to fetch a naturally-aligned 64-bit word in little endian byte-order
  34. static inline uint64_t LE_LOAD64(const uint8_t *p) {
  35. uint64_t dword;
  36. memcpy(&dword, p, sizeof(dword));
  37. return dword;
  38. }
  39. #endif // defined(_M_X64) || defined(__x86_64__)
  40. static inline bool HaveSSE42() {
  41. #if defined(_MSC_VER)
  42. int cpu_info[4];
  43. __cpuid(cpu_info, 1);
  44. return (cpu_info[2] & (1 << 20)) != 0;
  45. #elif defined(__GNUC__)
  46. unsigned int eax, ebx, ecx, edx;
  47. __get_cpuid(1, &eax, &ebx, &ecx, &edx);
  48. return (ecx & (1 << 20)) != 0;
  49. #else
  50. return false;
  51. #endif
  52. }
  53. #endif // defined(LEVELDB_PLATFORM_POSIX_SSE)
  54. // For further improvements see Intel publication at:
  55. // http://download.intel.com/design/intarch/papers/323405.pdf
  56. uint32_t AcceleratedCRC32C(uint32_t crc, const char* buf, size_t size) {
  57. #if !defined(LEVELDB_PLATFORM_POSIX_SSE)
  58. return 0;
  59. #else
  60. static bool have = HaveSSE42();
  61. if (!have) {
  62. return 0;
  63. }
  64. const uint8_t *p = reinterpret_cast<const uint8_t *>(buf);
  65. const uint8_t *e = p + size;
  66. uint32_t l = crc ^ 0xffffffffu;
  67. #define STEP1 do { \
  68. l = _mm_crc32_u8(l, *p++); \
  69. } while (0)
  70. #define STEP4 do { \
  71. l = _mm_crc32_u32(l, LE_LOAD32(p)); \
  72. p += 4; \
  73. } while (0)
  74. #define STEP8 do { \
  75. l = _mm_crc32_u64(l, LE_LOAD64(p)); \
  76. p += 8; \
  77. } while (0)
  78. if (size > 16) {
  79. // Point x at first 8-byte aligned byte in string. This must be inside the
  80. // string, due to the size check above.
  81. const uintptr_t pval = reinterpret_cast<uintptr_t>(p);
  82. const uint8_t* x = reinterpret_cast<const uint8_t*>(((pval + 7) >> 3) << 3);
  83. // Process bytes until p is 8-byte aligned.
  84. while (p != x) {
  85. STEP1;
  86. }
  87. // _mm_crc32_u64 is only available on x64.
  88. #if defined(_M_X64) || defined(__x86_64__)
  89. // Process 8 bytes at a time
  90. while ((e-p) >= 8) {
  91. STEP8;
  92. }
  93. // Process 4 bytes at a time
  94. if ((e-p) >= 4) {
  95. STEP4;
  96. }
  97. #else // !(defined(_M_X64) || defined(__x86_64__))
  98. // Process 4 bytes at a time
  99. while ((e-p) >= 4) {
  100. STEP4;
  101. }
  102. #endif // defined(_M_X64) || defined(__x86_64__)
  103. }
  104. // Process the last few bytes
  105. while (p != e) {
  106. STEP1;
  107. }
  108. #undef STEP8
  109. #undef STEP4
  110. #undef STEP1
  111. return l ^ 0xffffffffu;
  112. #endif // defined(LEVELDB_PLATFORM_POSIX_SSE)
  113. }
  114. } // namespace port
  115. } // namespace leveldb