Line | Branch | Exec | Source |
---|---|---|---|
1 | // SPDX-FileCopyrightText: 2022-2023 Dennis Gläser <dennis.glaeser@iws.uni-stuttgart.de> | ||
2 | // SPDX-License-Identifier: MIT | ||
3 | /*! | ||
4 | * \file | ||
5 | * \ingroup Common | ||
6 | * \ingroup Compression | ||
7 | * \brief Compressor using the LZ4 library. | ||
8 | */ | ||
9 | #ifndef GRIDFORMAT_COMPRESSION_LZ4_HPP_ | ||
10 | #define GRIDFORMAT_COMPRESSION_LZ4_HPP_ | ||
11 | #if GRIDFORMAT_HAVE_LZ4 | ||
12 | |||
13 | #include <concepts> | ||
14 | #include <utility> | ||
15 | #include <vector> | ||
16 | #include <cassert> | ||
17 | #include <algorithm> | ||
18 | #include <tuple> | ||
19 | |||
20 | #include <lz4.h> | ||
21 | |||
22 | #include <gridformat/common/exceptions.hpp> | ||
23 | #include <gridformat/common/serialization.hpp> | ||
24 | #include <gridformat/common/logging.hpp> | ||
25 | |||
26 | #include <gridformat/compression/common.hpp> | ||
27 | #include <gridformat/compression/decompress.hpp> | ||
28 | |||
29 | namespace GridFormat::Compression { | ||
30 | |||
31 | //! \addtogroup Compression | ||
32 | //! @{ | ||
33 | |||
34 | //! Options for the lz4 compressor | ||
35 | struct LZ4Options { | ||
36 | std::size_t block_size = default_block_size; | ||
37 | int acceleration_factor = 1; // LZ4_ACCELERATION_DEFAULT | ||
38 | }; | ||
39 | |||
40 | //! Compressor using the lz4 compression library | ||
41 | class LZ4 { | ||
42 | using LZ4Byte = char; | ||
43 | static_assert(sizeof(typename Serialization::Byte) == sizeof(LZ4Byte)); | ||
44 | |||
45 | struct BlockDecompressor { | ||
46 | using ByteType = LZ4Byte; | ||
47 | |||
48 | 6639 | void operator()(std::span<const ByteType> in, std::span<ByteType> out) const { | |
49 | 6639 | int decompressed_length = LZ4_decompress_safe( | |
50 | in.data(), | ||
51 | out.data(), | ||
52 | 6639 | static_cast<int>(in.size()), | |
53 | 6639 | static_cast<int>(out.size()) | |
54 | ); | ||
55 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 6639 times.
|
6639 | if (decompressed_length != static_cast<int>(out.size())) |
56 | ✗ | throw IOError("(LZ4Compressor) Error upon block decompression"); | |
57 | 6639 | } | |
58 | }; | ||
59 | |||
60 | public: | ||
61 | using Options = LZ4Options; | ||
62 | |||
63 | 25018 | explicit constexpr LZ4(Options opts = {}) | |
64 | 25018 | : _opts(std::move(opts)) | |
65 | 25018 | {} | |
66 | |||
67 | template<std::integral HeaderType = std::size_t> | ||
68 | 103953 | CompressedBlocks<HeaderType> compress(Serialization& in) const { | |
69 | static_assert(sizeof(typename Serialization::Byte) == sizeof(LZ4Byte)); | ||
70 |
1/2✗ Branch 2 not taken.
✓ Branch 3 taken 51979 times.
|
103953 | if (std::numeric_limits<HeaderType>::max() < in.size()) |
71 | ✗ | throw TypeError("Chosen HeaderType is too small for given number of bytes"); | |
72 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 51979 times.
|
103953 | if (std::numeric_limits<HeaderType>::max() < _opts.block_size) |
73 | ✗ | throw TypeError("Chosen HeaderType is too small for given block size"); | |
74 | |||
75 |
2/4✓ Branch 1 taken 51979 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 51979 times.
✗ Branch 5 not taken.
|
103953 | auto [blocks, out] = _compress<HeaderType>(in.template as_span_of<const LZ4Byte>()); |
76 | 103953 | in = std::move(out); | |
77 |
2/4✓ Branch 1 taken 51979 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 51979 times.
✗ Branch 5 not taken.
|
103953 | in.resize(blocks.compressed_size()); |
78 |
1/2✓ Branch 1 taken 51979 times.
✗ Branch 2 not taken.
|
207906 | return blocks; |
79 | 103953 | } | |
80 | |||
81 | template<typename HeaderType> | ||
82 | 13422 | static void decompress(Serialization& in, const CompressedBlocks<HeaderType>& blocks) { | |
83 |
1/2✓ Branch 1 taken 6712 times.
✗ Branch 2 not taken.
|
13422 | Compression::decompress(in, blocks, BlockDecompressor{}); |
84 | 13422 | } | |
85 | |||
86 | 439 | static LZ4 with(Options opts) { | |
87 | 439 | return LZ4{std::move(opts)}; | |
88 | } | ||
89 | |||
90 | private: | ||
91 | template<std::integral HeaderType> | ||
92 | 103953 | auto _compress(std::span<const LZ4Byte> in) const { | |
93 | 103953 | HeaderType block_size = static_cast<HeaderType>(_opts.block_size); | |
94 | 103953 | HeaderType size_in_bytes = static_cast<HeaderType>(in.size()); | |
95 | 103953 | Blocks<HeaderType> blocks{size_in_bytes, block_size}; | |
96 | |||
97 | 103953 | Serialization compressed; | |
98 | 103953 | std::vector<LZ4Byte> block_buffer; | |
99 | 103953 | std::vector<HeaderType> compressed_block_sizes; | |
100 |
2/4✓ Branch 0 taken 51979 times.
✗ Branch 1 not taken.
✓ Branch 3 taken 51979 times.
✗ Branch 4 not taken.
|
103953 | block_buffer.reserve(LZ4_COMPRESSBOUND(_opts.block_size)); |
101 |
1/2✓ Branch 1 taken 51979 times.
✗ Branch 2 not taken.
|
103953 | compressed_block_sizes.reserve(blocks.number_of_blocks); |
102 |
1/2✓ Branch 2 taken 51979 times.
✗ Branch 3 not taken.
|
103953 | compressed.resize(block_buffer.capacity()*blocks.number_of_blocks); |
103 | |||
104 | 103953 | HeaderType cur_in = 0; | |
105 | 103953 | HeaderType cur_out = 0; | |
106 |
1/2✓ Branch 1 taken 51979 times.
✗ Branch 2 not taken.
|
103953 | auto out = compressed.template as_span_of<LZ4Byte>(); |
107 |
2/2✓ Branch 0 taken 81040 times.
✓ Branch 1 taken 51979 times.
|
266014 | while (cur_in < size_in_bytes) { |
108 | using std::min; | ||
109 | 162061 | const HeaderType cur_block_size = min(block_size, size_in_bytes - cur_in); | |
110 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 81040 times.
|
162061 | assert(cur_in + cur_block_size <= size_in_bytes); |
111 | |||
112 |
1/2✓ Branch 2 taken 73889 times.
✗ Branch 3 not taken.
|
309820 | const auto compressed_length = LZ4_compress_fast( |
113 |
1/2✓ Branch 2 taken 7151 times.
✗ Branch 3 not taken.
|
162061 | in.data() + cur_in, // const char* src |
114 | block_buffer.data(), // char* dst | ||
115 | cur_block_size, // src_size | ||
116 | 162061 | block_buffer.capacity(), // dst_capacity | |
117 | 162061 | _opts.acceleration_factor // lz4 acc factor | |
118 | ); | ||
119 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 81040 times.
|
162061 | if (compressed_length == 0) |
120 | ✗ | throw InvalidState(as_error("Error upon compression with LZ4")); | |
121 | |||
122 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 81040 times.
|
162061 | assert(cur_out + compressed_length <= out.size()); |
123 |
2/3✓ Branch 1 taken 7151 times.
✓ Branch 2 taken 73889 times.
✗ Branch 3 not taken.
|
162061 | std::copy_n(block_buffer.data(), |
124 | compressed_length, | ||
125 | 162061 | out.data() + cur_out); | |
126 | 162061 | cur_in += cur_block_size; | |
127 | 162061 | cur_out += compressed_length; | |
128 |
1/2✓ Branch 1 taken 81040 times.
✗ Branch 2 not taken.
|
162061 | compressed_block_sizes.push_back(static_cast<HeaderType>(compressed_length)); |
129 | } | ||
130 | |||
131 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 51979 times.
|
103953 | if (cur_in != size_in_bytes) |
132 | ✗ | throw InvalidState(as_error("(LZ4Compressor) unexpected number of bytes processed")); | |
133 | |||
134 |
1/2✓ Branch 1 taken 51979 times.
✗ Branch 2 not taken.
|
207906 | return std::make_tuple( |
135 | 103953 | CompressedBlocks<HeaderType>{blocks, std::move(compressed_block_sizes)}, | |
136 | compressed | ||
137 |
1/2✓ Branch 1 taken 51979 times.
✗ Branch 2 not taken.
|
207906 | ); |
138 | 103953 | } | |
139 | |||
140 | Options _opts; | ||
141 | }; | ||
142 | |||
143 | inline constexpr LZ4 lz4; //!< Instance of the lz4 compressor | ||
144 | |||
145 | #ifndef DOXYGEN | ||
146 | namespace Detail { inline constexpr bool _have_lz4 = true; } | ||
147 | #endif // DOXYGEN | ||
148 | |||
149 | //! @} group Compression | ||
150 | |||
151 | } // end namespace GridFormat::Compression | ||
152 | |||
153 | #else // GRIDFORMAT_HAVE_LZ4 | ||
154 | |||
155 | namespace GridFormat::Compression { | ||
156 | |||
157 | namespace Detail { inline constexpr bool _have_lz4 = false; } | ||
158 | |||
159 | class LZ4 { | ||
160 | public: | ||
161 | template<bool b = false, typename... Args> | ||
162 | explicit LZ4(Args&&...) { static_assert(b, "LZ4 compressor requires the LZ4 library."); } | ||
163 | }; | ||
164 | |||
165 | } // namespace GridFormat::Compression | ||
166 | |||
167 | #endif // GRIDFORMAT_HAVE_LZ4 | ||
168 | #endif // GRIDFORMAT_COMPRESSION_LZ4_HPP_ | ||
169 |