| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | // SPDX-FileCopyrightText: 2022-2023 Dennis Gläser <dennis.glaeser@iws.uni-stuttgart.de> | ||
| 2 | // SPDX-License-Identifier: MIT | ||
| 3 | /*! | ||
| 4 | * \file | ||
| 5 | * \ingroup Common | ||
| 6 | * \ingroup Compression | ||
| 7 | * \brief Compressor using the LZMA library. | ||
| 8 | */ | ||
| 9 | #ifndef GRIDFORMAT_COMPRESSION_LZMA_HPP_ | ||
| 10 | #define GRIDFORMAT_COMPRESSION_LZMA_HPP_ | ||
| 11 | #if GRIDFORMAT_HAVE_LZMA | ||
| 12 | |||
| 13 | #include <concepts> | ||
| 14 | #include <utility> | ||
| 15 | #include <vector> | ||
| 16 | #include <cassert> | ||
| 17 | #include <algorithm> | ||
| 18 | #include <tuple> | ||
| 19 | #include <cstdint> | ||
| 20 | |||
| 21 | #include <lzma.h> | ||
| 22 | |||
| 23 | #include <gridformat/common/exceptions.hpp> | ||
| 24 | #include <gridformat/common/serialization.hpp> | ||
| 25 | #include <gridformat/common/logging.hpp> | ||
| 26 | |||
| 27 | #include <gridformat/compression/common.hpp> | ||
| 28 | #include <gridformat/compression/decompress.hpp> | ||
| 29 | |||
| 30 | namespace GridFormat::Compression { | ||
| 31 | |||
| 32 | //! \addtogroup Compression | ||
| 33 | //! @{ | ||
| 34 | |||
| 35 | //! Options for the lzma compressor | ||
| 36 | struct LZMAOptions { | ||
| 37 | std::size_t block_size = default_block_size; | ||
| 38 | std::uint32_t compression_level = LZMA_PRESET_DEFAULT; | ||
| 39 | }; | ||
| 40 | |||
| 41 | //! Compressor using the lzma library | ||
| 42 | class LZMA { | ||
| 43 | using LZMAByte = std::uint8_t; | ||
| 44 | static_assert(sizeof(typename Serialization::Byte) == sizeof(LZMAByte)); | ||
| 45 | |||
| 46 | struct BlockDecompressor { | ||
| 47 | using ByteType = LZMAByte; | ||
| 48 | |||
| 49 | 277 | void operator()(std::span<const ByteType> in, std::span<ByteType> out) const { | |
| 50 | 277 | size_t in_pos = 0; | |
| 51 | 277 | size_t out_pos = 0; | |
| 52 | 277 | uint64_t memlim = UINT64_MAX; | |
| 53 | 277 | if (lzma_stream_buffer_decode( | |
| 54 | &memlim, // No memory limit | ||
| 55 | uint32_t{0}, // Don't use any decoder flags | ||
| 56 | nullptr, // Use default allocators (malloc/free) | ||
| 57 | in.data(), &in_pos, in.size(), | ||
| 58 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 277 times.
|
277 | out.data(), &out_pos, out.size()) != LZMA_OK) |
| 59 | ✗ | throw IOError("(LZMACompressor) Error upon decompression"); | |
| 60 | 277 | } | |
| 61 | }; | ||
| 62 | |||
| 63 | public: | ||
| 64 | using Options = LZMAOptions; | ||
| 65 | |||
| 66 | 719 | explicit constexpr LZMA(Options opts = {}) | |
| 67 | 719 | : _opts(std::move(opts)) | |
| 68 | 719 | {} | |
| 69 | |||
| 70 | template<std::integral HeaderType = std::size_t> | ||
| 71 | 29249 | CompressedBlocks<HeaderType> compress(Serialization& in) const { | |
| 72 | static_assert(sizeof(typename Serialization::Byte) == sizeof(LZMAByte)); | ||
| 73 |
1/2✗ Branch 2 not taken.
✓ Branch 3 taken 14627 times.
|
29249 | if (std::numeric_limits<HeaderType>::max() < in.size()) |
| 74 | ✗ | throw TypeError("Chosen HeaderType is too small for given number of bytes"); | |
| 75 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 14627 times.
|
29249 | if (std::numeric_limits<HeaderType>::max() < _opts.block_size) |
| 76 | ✗ | throw TypeError("Chosen HeaderType is too small for given block size"); | |
| 77 | |||
| 78 |
2/4✓ Branch 1 taken 14627 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 14627 times.
✗ Branch 5 not taken.
|
29249 | auto [blocks, out] = _compress<HeaderType>(in.template as_span_of<const LZMAByte>()); |
| 79 | 29249 | in = std::move(out); | |
| 80 |
2/4✓ Branch 1 taken 14627 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 14627 times.
✗ Branch 5 not taken.
|
29249 | in.resize(blocks.compressed_size()); |
| 81 |
1/2✓ Branch 1 taken 14627 times.
✗ Branch 2 not taken.
|
58498 | return blocks; |
| 82 | 29249 | } | |
| 83 | |||
| 84 | template<std::integral HeaderType> | ||
| 85 | 548 | static void decompress(Serialization& in, const CompressedBlocks<HeaderType>& blocks) { | |
| 86 |
1/2✓ Branch 1 taken 275 times.
✗ Branch 2 not taken.
|
548 | Compression::decompress(in, blocks, BlockDecompressor{}); |
| 87 | 548 | } | |
| 88 | |||
| 89 | 445 | static LZMA with(Options opts) { | |
| 90 | 445 | return LZMA{std::move(opts)}; | |
| 91 | } | ||
| 92 | |||
| 93 | private: | ||
| 94 | template<std::integral HeaderType> | ||
| 95 | 29249 | auto _compress(std::span<const LZMAByte> in) const { | |
| 96 | 29249 | HeaderType block_size = static_cast<HeaderType>(_opts.block_size); | |
| 97 | 29249 | HeaderType size_in_bytes = static_cast<HeaderType>(in.size()); | |
| 98 | 29249 | Blocks<HeaderType> blocks{size_in_bytes, block_size}; | |
| 99 | |||
| 100 | 29249 | Serialization compressed; | |
| 101 | 29249 | std::vector<LZMAByte> block_buffer; | |
| 102 | 29249 | std::vector<HeaderType> compressed_block_sizes; | |
| 103 |
1/2✓ Branch 2 taken 14627 times.
✗ Branch 3 not taken.
|
29249 | block_buffer.reserve(lzma_stream_buffer_bound(_opts.block_size)); |
| 104 |
1/2✓ Branch 1 taken 14627 times.
✗ Branch 2 not taken.
|
29249 | compressed_block_sizes.reserve(blocks.number_of_blocks); |
| 105 |
1/2✓ Branch 2 taken 14627 times.
✗ Branch 3 not taken.
|
29249 | compressed.resize(block_buffer.capacity()*blocks.number_of_blocks); |
| 106 | |||
| 107 | 29249 | HeaderType cur_in = 0; | |
| 108 | 29249 | HeaderType cur_out = 0; | |
| 109 |
1/2✓ Branch 1 taken 14627 times.
✗ Branch 2 not taken.
|
29249 | auto out = compressed.template as_span_of<LZMAByte>(); |
| 110 |
2/2✓ Branch 0 taken 44298 times.
✓ Branch 1 taken 14627 times.
|
117826 | while (cur_in < size_in_bytes) { |
| 111 | using std::min; | ||
| 112 | 88577 | const HeaderType cur_block_size = min(block_size, size_in_bytes - cur_in); | |
| 113 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 44298 times.
|
88577 | assert(cur_in + cur_block_size <= size_in_bytes); |
| 114 | |||
| 115 | 88577 | std::size_t out_pos = 0; | |
| 116 | 88577 | const auto lzma_ret = lzma_easy_buffer_encode( | |
| 117 | 88577 | _opts.compression_level, LZMA_CHECK_CRC32, nullptr, | |
| 118 | 88577 | in.data() + cur_in, cur_block_size, | |
| 119 | block_buffer.data(), &out_pos, block_buffer.capacity() | ||
| 120 | ); | ||
| 121 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 44298 times.
|
88577 | if (lzma_ret != LZMA_OK) |
| 122 | ✗ | throw InvalidState(as_error("(LZMACompressor) Error upon compression")); | |
| 123 | |||
| 124 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 44298 times.
|
88577 | assert(cur_out + out_pos <= out.size()); |
| 125 |
1/3✗ Branch 1 not taken.
✓ Branch 2 taken 44298 times.
✗ Branch 3 not taken.
|
88577 | std::copy_n(block_buffer.data(), |
| 126 | out_pos, | ||
| 127 | 88577 | out.data() + cur_out); | |
| 128 | 88577 | cur_in += cur_block_size; | |
| 129 | 88577 | cur_out += out_pos; | |
| 130 |
1/2✓ Branch 1 taken 44298 times.
✗ Branch 2 not taken.
|
88577 | compressed_block_sizes.push_back(static_cast<HeaderType>(out_pos)); |
| 131 | } | ||
| 132 | |||
| 133 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 14627 times.
|
29249 | if (cur_in != size_in_bytes) |
| 134 | ✗ | throw InvalidState(as_error("(LZMACompressor) unexpected number of bytes processed")); | |
| 135 | |||
| 136 |
1/2✓ Branch 1 taken 14627 times.
✗ Branch 2 not taken.
|
58498 | return std::make_tuple( |
| 137 | 29249 | CompressedBlocks<HeaderType>{blocks, std::move(compressed_block_sizes)}, | |
| 138 | compressed | ||
| 139 |
1/2✓ Branch 1 taken 14627 times.
✗ Branch 2 not taken.
|
58498 | ); |
| 140 | 29249 | } | |
| 141 | |||
| 142 | Options _opts; | ||
| 143 | }; | ||
| 144 | |||
| 145 | inline constexpr LZMA lzma; //!< Instance of the lzma compressor | ||
| 146 | |||
| 147 | #ifndef DOXYGEN | ||
| 148 | namespace Detail { inline constexpr bool _have_lzma = true; } | ||
| 149 | #endif // DOXYGEN | ||
| 150 | |||
| 151 | //! @} group Compression | ||
| 152 | |||
| 153 | } // end namespace GridFormat::Compression | ||
| 154 | |||
| 155 | #else // GRIDFORMAT_HAVE_LZMA | ||
| 156 | |||
| 157 | namespace GridFormat::Compression { | ||
| 158 | |||
| 159 | namespace Detail { inline constexpr bool _have_lzma = false; } | ||
| 160 | class LZMA { | ||
| 161 | public: | ||
| 162 | template<bool b = false, typename... Args> | ||
| 163 | explicit LZMA(Args&&...) { static_assert(b, "LZMA compressor requires the LZMA library."); } | ||
| 164 | }; | ||
| 165 | |||
| 166 | } // namespace GridFormat::Compression | ||
| 167 | |||
| 168 | #endif // GRIDFORMAT_HAVE_LZMA | ||
| 169 | #endif // GRIDFORMAT_COMPRESSION_LZMA_HPP_ | ||
| 170 |