bitshuffle-generic.c 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221
  1. /*********************************************************************
  2. Blosc - Blocked Shuffling and Compression Library
  3. Author: Francesc Alted <francesc@blosc.org>
  4. See LICENSES/BLOSC.txt for details about copyright and rights to use.
  5. **********************************************************************/
  6. #include "bitshuffle-generic.h"
  7. /* Transpose bytes within elements, starting partway through input. */
  8. int64_t bshuf_trans_byte_elem_remainder(const void* in, void* out, const size_t size,
  9. const size_t elem_size, const size_t start) {
  10. char* in_b = (char*) in;
  11. char* out_b = (char*) out;
  12. size_t ii, jj, kk;
  13. CHECK_MULT_EIGHT(start);
  14. if (size > start) {
  15. /* ii loop separated into 2 loops so the compiler can unroll */
  16. /* the inner one. */
  17. for (ii = start; ii + 7 < size; ii += 8) {
  18. for (jj = 0; jj < elem_size; jj++) {
  19. for (kk = 0; kk < 8; kk++) {
  20. out_b[jj * size + ii + kk]
  21. = in_b[ii * elem_size + kk * elem_size + jj];
  22. }
  23. }
  24. }
  25. for (ii = size - size % 8; ii < size; ii ++) {
  26. for (jj = 0; jj < elem_size; jj++) {
  27. out_b[jj * size + ii] = in_b[ii * elem_size + jj];
  28. }
  29. }
  30. }
  31. return size * elem_size;
  32. }
  33. /* Transpose bytes within elements. */
  34. int64_t bshuf_trans_byte_elem_scal(const void* in, void* out, const size_t size,
  35. const size_t elem_size) {
  36. return bshuf_trans_byte_elem_remainder(in, out, size, elem_size, 0);
  37. }
  38. /* Transpose bits within bytes. */
  39. int64_t bshuf_trans_bit_byte_remainder(const void* in, void* out, const size_t size,
  40. const size_t elem_size, const size_t start_byte) {
  41. const uint64_t* in_b = (const uint64_t*) in;
  42. uint8_t* out_b = (uint8_t*) out;
  43. uint64_t x, t;
  44. size_t ii, kk;
  45. size_t nbyte = elem_size * size;
  46. size_t nbyte_bitrow = nbyte / 8;
  47. uint64_t e=1;
  48. const int little_endian = *(uint8_t *) &e == 1;
  49. const size_t bit_row_skip = little_endian ? nbyte_bitrow : -nbyte_bitrow;
  50. const int64_t bit_row_offset = little_endian ? 0 : 7 * nbyte_bitrow;
  51. CHECK_MULT_EIGHT(nbyte);
  52. CHECK_MULT_EIGHT(start_byte);
  53. for (ii = start_byte / 8; ii < nbyte_bitrow; ii ++) {
  54. x = in_b[ii];
  55. if (little_endian) {
  56. TRANS_BIT_8X8(x, t);
  57. } else {
  58. TRANS_BIT_8X8_BE(x, t);
  59. }
  60. for (kk = 0; kk < 8; kk ++) {
  61. out_b[bit_row_offset + kk * bit_row_skip + ii] = x;
  62. x = x >> 8;
  63. }
  64. }
  65. return size * elem_size;
  66. }
  67. /* Transpose bits within bytes. */
  68. int64_t bshuf_trans_bit_byte_scal(const void* in, void* out, const size_t size,
  69. const size_t elem_size) {
  70. return bshuf_trans_bit_byte_remainder(in, out, size, elem_size, 0);
  71. }
  72. /* General transpose of an array, optimized for large element sizes. */
  73. int64_t bshuf_trans_elem(const void* in, void* out, const size_t lda,
  74. const size_t ldb, const size_t elem_size) {
  75. char* in_b = (char*) in;
  76. char* out_b = (char*) out;
  77. size_t ii, jj;
  78. for (ii = 0; ii < lda; ii++) {
  79. for (jj = 0; jj < ldb; jj++) {
  80. memcpy(&out_b[(jj*lda + ii) * elem_size],
  81. &in_b[(ii*ldb + jj) * elem_size], elem_size);
  82. }
  83. }
  84. return lda * ldb * elem_size;
  85. }
  86. /* Transpose rows of shuffled bits (size / 8 bytes) within groups of 8. */
  87. int64_t bshuf_trans_bitrow_eight(const void* in, void* out, const size_t size,
  88. const size_t elem_size) {
  89. size_t nbyte_bitrow = size / 8;
  90. CHECK_MULT_EIGHT(size);
  91. return bshuf_trans_elem(in, out, 8, elem_size, nbyte_bitrow);
  92. }
  93. /* Transpose bits within elements. */
  94. int64_t bshuf_trans_bit_elem_scal(const void* in, void* out, const size_t size,
  95. const size_t elem_size, void* tmp_buf) {
  96. int64_t count;
  97. CHECK_MULT_EIGHT(size);
  98. count = bshuf_trans_byte_elem_scal(in, out, size, elem_size);
  99. CHECK_ERR(count);
  100. count = bshuf_trans_bit_byte_scal(out, tmp_buf, size, elem_size);
  101. CHECK_ERR(count);
  102. count = bshuf_trans_bitrow_eight(tmp_buf, out, size, elem_size);
  103. return count;
  104. }
  105. /* For data organized into a row for each bit (8 * elem_size rows), transpose
  106. * the bytes. */
  107. int64_t bshuf_trans_byte_bitrow_scal(const void* in, void* out, const size_t size,
  108. const size_t elem_size) {
  109. char* in_b = (char*) in;
  110. char* out_b = (char*) out;
  111. size_t nbyte_row = size / 8;
  112. size_t ii, jj, kk;
  113. CHECK_MULT_EIGHT(size);
  114. for (jj = 0; jj < elem_size; jj++) {
  115. for (ii = 0; ii < nbyte_row; ii++) {
  116. for (kk = 0; kk < 8; kk++) {
  117. out_b[ii * 8 * elem_size + jj * 8 + kk] = \
  118. in_b[(jj * 8 + kk) * nbyte_row + ii];
  119. }
  120. }
  121. }
  122. return size * elem_size;
  123. }
  124. /* Shuffle bits within the bytes of eight element blocks. */
  125. int64_t bshuf_shuffle_bit_eightelem_scal(const void* in, void* out, \
  126. const size_t size, const size_t elem_size) {
  127. const char *in_b;
  128. char *out_b;
  129. uint64_t x, t;
  130. size_t ii, jj, kk;
  131. size_t nbyte, out_index;
  132. uint64_t e=1;
  133. const int little_endian = *(uint8_t *) &e == 1;
  134. const size_t elem_skip = little_endian ? elem_size : -elem_size;
  135. const uint64_t elem_offset = little_endian ? 0 : 7 * elem_size;
  136. CHECK_MULT_EIGHT(size);
  137. in_b = (const char*) in;
  138. out_b = (char*) out;
  139. nbyte = elem_size * size;
  140. for (jj = 0; jj < 8 * elem_size; jj += 8) {
  141. for (ii = 0; ii + 8 * elem_size - 1 < nbyte; ii += 8 * elem_size) {
  142. x = *((uint64_t*) &in_b[ii + jj]);
  143. if (little_endian) {
  144. TRANS_BIT_8X8(x, t);
  145. } else {
  146. TRANS_BIT_8X8_BE(x, t);
  147. }
  148. for (kk = 0; kk < 8; kk++) {
  149. out_index = ii + jj / 8 + elem_offset + kk * elem_skip;
  150. *((uint8_t*) &out_b[out_index]) = x;
  151. x = x >> 8;
  152. }
  153. }
  154. }
  155. return size * elem_size;
  156. }
  157. /* Untranspose bits within elements. */
  158. int64_t bshuf_untrans_bit_elem_scal(const void* in, void* out, const size_t size,
  159. const size_t elem_size, void* tmp_buf) {
  160. int64_t count;
  161. CHECK_MULT_EIGHT(size);
  162. count = bshuf_trans_byte_bitrow_scal(in, tmp_buf, size, elem_size);
  163. CHECK_ERR(count);
  164. count = bshuf_shuffle_bit_eightelem_scal(tmp_buf, out, size, elem_size);
  165. return count;
  166. }