Line | Branch | Exec | Source |
---|---|---|---|
1 | // SPDX-FileCopyrightText: 2022-2023 Dennis Gläser <dennis.glaeser@iws.uni-stuttgart.de> | ||
2 | // SPDX-License-Identifier: MIT | ||
3 | /*! | ||
4 | * \file | ||
5 | * \ingroup Common | ||
6 | * \ingroup Compression | ||
7 | * \brief Compressor using the LZMA library. | ||
8 | */ | ||
9 | #ifndef GRIDFORMAT_COMPRESSION_LZMA_HPP_ | ||
10 | #define GRIDFORMAT_COMPRESSION_LZMA_HPP_ | ||
11 | #if GRIDFORMAT_HAVE_LZMA | ||
12 | |||
13 | #include <concepts> | ||
14 | #include <utility> | ||
15 | #include <vector> | ||
16 | #include <cassert> | ||
17 | #include <algorithm> | ||
18 | #include <tuple> | ||
19 | #include <cstdint> | ||
20 | |||
21 | #include <lzma.h> | ||
22 | |||
23 | #include <gridformat/common/exceptions.hpp> | ||
24 | #include <gridformat/common/serialization.hpp> | ||
25 | #include <gridformat/common/logging.hpp> | ||
26 | |||
27 | #include <gridformat/compression/common.hpp> | ||
28 | #include <gridformat/compression/decompress.hpp> | ||
29 | |||
30 | namespace GridFormat::Compression { | ||
31 | |||
32 | //! \addtogroup Compression | ||
33 | //! @{ | ||
34 | |||
35 | //! Options for the lzma compressor | ||
36 | struct LZMAOptions { | ||
37 | std::size_t block_size = default_block_size; | ||
38 | std::uint32_t compression_level = LZMA_PRESET_DEFAULT; | ||
39 | }; | ||
40 | |||
41 | //! Compressor using the lzma library | ||
42 | class LZMA { | ||
43 | using LZMAByte = std::uint8_t; | ||
44 | static_assert(sizeof(typename Serialization::Byte) == sizeof(LZMAByte)); | ||
45 | |||
46 | struct BlockDecompressor { | ||
47 | using ByteType = LZMAByte; | ||
48 | |||
49 | 277 | void operator()(std::span<const ByteType> in, std::span<ByteType> out) const { | |
50 | 277 | size_t in_pos = 0; | |
51 | 277 | size_t out_pos = 0; | |
52 | 277 | uint64_t memlim = UINT64_MAX; | |
53 | 277 | if (lzma_stream_buffer_decode( | |
54 | &memlim, // No memory limit | ||
55 | uint32_t{0}, // Don't use any decoder flags | ||
56 | nullptr, // Use default allocators (malloc/free) | ||
57 | in.data(), &in_pos, in.size(), | ||
58 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 277 times.
|
277 | out.data(), &out_pos, out.size()) != LZMA_OK) |
59 | ✗ | throw IOError("(LZMACompressor) Error upon decompression"); | |
60 | 277 | } | |
61 | }; | ||
62 | |||
63 | public: | ||
64 | using Options = LZMAOptions; | ||
65 | |||
66 | 713 | explicit constexpr LZMA(Options opts = {}) | |
67 | 713 | : _opts(std::move(opts)) | |
68 | 713 | {} | |
69 | |||
70 | template<std::integral HeaderType = std::size_t> | ||
71 | 28889 | CompressedBlocks<HeaderType> compress(Serialization& in) const { | |
72 | static_assert(sizeof(typename Serialization::Byte) == sizeof(LZMAByte)); | ||
73 |
1/2✗ Branch 2 not taken.
✓ Branch 3 taken 14447 times.
|
28889 | if (std::numeric_limits<HeaderType>::max() < in.size()) |
74 | ✗ | throw TypeError("Chosen HeaderType is too small for given number of bytes"); | |
75 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 14447 times.
|
28889 | if (std::numeric_limits<HeaderType>::max() < _opts.block_size) |
76 | ✗ | throw TypeError("Chosen HeaderType is too small for given block size"); | |
77 | |||
78 |
2/4✓ Branch 1 taken 14447 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 14447 times.
✗ Branch 5 not taken.
|
28889 | auto [blocks, out] = _compress<HeaderType>(in.template as_span_of<const LZMAByte>()); |
79 | 28889 | in = std::move(out); | |
80 |
2/4✓ Branch 1 taken 14447 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 14447 times.
✗ Branch 5 not taken.
|
28889 | in.resize(blocks.compressed_size()); |
81 |
1/2✓ Branch 1 taken 14447 times.
✗ Branch 2 not taken.
|
57778 | return blocks; |
82 | 28889 | } | |
83 | |||
84 | template<std::integral HeaderType> | ||
85 | 548 | static void decompress(Serialization& in, const CompressedBlocks<HeaderType>& blocks) { | |
86 |
1/2✓ Branch 1 taken 275 times.
✗ Branch 2 not taken.
|
548 | Compression::decompress(in, blocks, BlockDecompressor{}); |
87 | 548 | } | |
88 | |||
89 | 439 | static LZMA with(Options opts) { | |
90 | 439 | return LZMA{std::move(opts)}; | |
91 | } | ||
92 | |||
93 | private: | ||
94 | template<std::integral HeaderType> | ||
95 | 28889 | auto _compress(std::span<const LZMAByte> in) const { | |
96 | 28889 | HeaderType block_size = static_cast<HeaderType>(_opts.block_size); | |
97 | 28889 | HeaderType size_in_bytes = static_cast<HeaderType>(in.size()); | |
98 | 28889 | Blocks<HeaderType> blocks{size_in_bytes, block_size}; | |
99 | |||
100 | 28889 | Serialization compressed; | |
101 | 28889 | std::vector<LZMAByte> block_buffer; | |
102 | 28889 | std::vector<HeaderType> compressed_block_sizes; | |
103 |
1/2✓ Branch 2 taken 14447 times.
✗ Branch 3 not taken.
|
28889 | block_buffer.reserve(lzma_stream_buffer_bound(_opts.block_size)); |
104 |
1/2✓ Branch 1 taken 14447 times.
✗ Branch 2 not taken.
|
28889 | compressed_block_sizes.reserve(blocks.number_of_blocks); |
105 |
1/2✓ Branch 2 taken 14447 times.
✗ Branch 3 not taken.
|
28889 | compressed.resize(block_buffer.capacity()*blocks.number_of_blocks); |
106 | |||
107 | 28889 | HeaderType cur_in = 0; | |
108 | 28889 | HeaderType cur_out = 0; | |
109 |
1/2✓ Branch 1 taken 14447 times.
✗ Branch 2 not taken.
|
28889 | auto out = compressed.template as_span_of<LZMAByte>(); |
110 |
2/2✓ Branch 0 taken 44088 times.
✓ Branch 1 taken 14447 times.
|
117046 | while (cur_in < size_in_bytes) { |
111 | using std::min; | ||
112 | 88157 | const HeaderType cur_block_size = min(block_size, size_in_bytes - cur_in); | |
113 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 44088 times.
|
88157 | assert(cur_in + cur_block_size <= size_in_bytes); |
114 | |||
115 | 88157 | std::size_t out_pos = 0; | |
116 | 88157 | const auto lzma_ret = lzma_easy_buffer_encode( | |
117 | 88157 | _opts.compression_level, LZMA_CHECK_CRC32, nullptr, | |
118 | 88157 | in.data() + cur_in, cur_block_size, | |
119 | block_buffer.data(), &out_pos, block_buffer.capacity() | ||
120 | ); | ||
121 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 44088 times.
|
88157 | if (lzma_ret != LZMA_OK) |
122 | ✗ | throw InvalidState(as_error("(LZMACompressor) Error upon compression")); | |
123 | |||
124 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 44088 times.
|
88157 | assert(cur_out + out_pos <= out.size()); |
125 |
1/3✗ Branch 1 not taken.
✓ Branch 2 taken 44088 times.
✗ Branch 3 not taken.
|
88157 | std::copy_n(block_buffer.data(), |
126 | out_pos, | ||
127 | 88157 | out.data() + cur_out); | |
128 | 88157 | cur_in += cur_block_size; | |
129 | 88157 | cur_out += out_pos; | |
130 |
1/2✓ Branch 1 taken 44088 times.
✗ Branch 2 not taken.
|
88157 | compressed_block_sizes.push_back(static_cast<HeaderType>(out_pos)); |
131 | } | ||
132 | |||
133 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 14447 times.
|
28889 | if (cur_in != size_in_bytes) |
134 | ✗ | throw InvalidState(as_error("(LZMACompressor) unexpected number of bytes processed")); | |
135 | |||
136 |
1/2✓ Branch 1 taken 14447 times.
✗ Branch 2 not taken.
|
57778 | return std::make_tuple( |
137 | 28889 | CompressedBlocks<HeaderType>{blocks, std::move(compressed_block_sizes)}, | |
138 | compressed | ||
139 |
1/2✓ Branch 1 taken 14447 times.
✗ Branch 2 not taken.
|
57778 | ); |
140 | 28889 | } | |
141 | |||
142 | Options _opts; | ||
143 | }; | ||
144 | |||
145 | inline constexpr LZMA lzma; //!< Instance of the lzma compressor | ||
146 | |||
147 | #ifndef DOXYGEN | ||
148 | namespace Detail { inline constexpr bool _have_lzma = true; } | ||
149 | #endif // DOXYGEN | ||
150 | |||
151 | //! @} group Compression | ||
152 | |||
153 | } // end namespace GridFormat::Compression | ||
154 | |||
155 | #else // GRIDFORMAT_HAVE_LZMA | ||
156 | |||
157 | namespace GridFormat::Compression { | ||
158 | |||
159 | namespace Detail { inline constexpr bool _have_lzma = false; } | ||
160 | class LZMA { | ||
161 | public: | ||
162 | template<bool b = false, typename... Args> | ||
163 | explicit LZMA(Args&&...) { static_assert(b, "LZMA compressor requires the LZMA library."); } | ||
164 | }; | ||
165 | |||
166 | } // namespace GridFormat::Compression | ||
167 | |||
168 | #endif // GRIDFORMAT_HAVE_LZMA | ||
169 | #endif // GRIDFORMAT_COMPRESSION_LZMA_HPP_ | ||
170 |