shuffle-generic.h 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899
  1. /*********************************************************************
  2. Blosc - Blocked Shuffling and Compression Library
  3. Author: Francesc Alted <francesc@blosc.org>
  4. See LICENSES/BLOSC.txt for details about copyright and rights to use.
  5. **********************************************************************/
  6. /* Generic (non-hardware-accelerated) shuffle/unshuffle routines.
  7. These are used when hardware-accelerated functions aren't available
  8. for a particular platform; they are also used by the hardware-
  9. accelerated functions to handle any remaining elements in a block
  10. which isn't a multiple of the hardware's vector size. */
  11. #ifndef SHUFFLE_GENERIC_H
  12. #define SHUFFLE_GENERIC_H
  13. #include "shuffle-common.h"
  14. #include <stdlib.h>
  15. #ifdef __cplusplus
  16. extern "C" {
  17. #endif
  18. /**
  19. Generic (non-hardware-accelerated) shuffle routine.
  20. This is the pure element-copying nested loop. It is used by the
  21. generic shuffle implementation and also by the vectorized shuffle
  22. implementations to process any remaining elements in a block which
  23. is not a multiple of (type_size * vector_size).
  24. */
  25. static void shuffle_generic_inline(const size_t type_size,
  26. const size_t vectorizable_blocksize, const size_t blocksize,
  27. const uint8_t* const _src, uint8_t* const _dest)
  28. {
  29. size_t i, j;
  30. /* Calculate the number of elements in the block. */
  31. const size_t neblock_quot = blocksize / type_size;
  32. const size_t neblock_rem = blocksize % type_size;
  33. const size_t vectorizable_elements = vectorizable_blocksize / type_size;
  34. /* Non-optimized shuffle */
  35. for (j = 0; j < type_size; j++) {
  36. for (i = vectorizable_elements; i < (size_t)neblock_quot; i++) {
  37. _dest[j*neblock_quot+i] = _src[i*type_size+j];
  38. }
  39. }
  40. /* Copy any leftover bytes in the block without shuffling them. */
  41. memcpy(_dest + (blocksize - neblock_rem), _src + (blocksize - neblock_rem), neblock_rem);
  42. }
  43. /**
  44. Generic (non-hardware-accelerated) unshuffle routine.
  45. This is the pure element-copying nested loop. It is used by the
  46. generic unshuffle implementation and also by the vectorized unshuffle
  47. implementations to process any remaining elements in a block which
  48. is not a multiple of (type_size * vector_size).
  49. */
  50. static void unshuffle_generic_inline(const size_t type_size,
  51. const size_t vectorizable_blocksize, const size_t blocksize,
  52. const uint8_t* const _src, uint8_t* const _dest)
  53. {
  54. size_t i, j;
  55. /* Calculate the number of elements in the block. */
  56. const size_t neblock_quot = blocksize / type_size;
  57. const size_t neblock_rem = blocksize % type_size;
  58. const size_t vectorizable_elements = vectorizable_blocksize / type_size;
  59. /* Non-optimized unshuffle */
  60. for (i = vectorizable_elements; i < (size_t)neblock_quot; i++) {
  61. for (j = 0; j < type_size; j++) {
  62. _dest[i*type_size+j] = _src[j*neblock_quot+i];
  63. }
  64. }
  65. /* Copy any leftover bytes in the block without unshuffling them. */
  66. memcpy(_dest + (blocksize - neblock_rem), _src + (blocksize - neblock_rem), neblock_rem);
  67. }
  68. /**
  69. Generic (non-hardware-accelerated) shuffle routine.
  70. */
  71. BLOSC_NO_EXPORT void shuffle_generic(const size_t bytesoftype, const size_t blocksize,
  72. const uint8_t* const _src, uint8_t* const _dest);
  73. /**
  74. Generic (non-hardware-accelerated) unshuffle routine.
  75. */
  76. BLOSC_NO_EXPORT void unshuffle_generic(const size_t bytesoftype, const size_t blocksize,
  77. const uint8_t* const _src, uint8_t* const _dest);
  78. #ifdef __cplusplus
  79. }
  80. #endif
  81. #endif /* SHUFFLE_GENERIC_H */