GridFormat 0.2.1
I/O-Library for grid-like data structures
Loading...
Searching...
No Matches
lzma.hpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: 2022-2023 Dennis Gläser <dennis.glaeser@iws.uni-stuttgart.de>
2// SPDX-License-Identifier: MIT
9#ifndef GRIDFORMAT_COMPRESSION_LZMA_HPP_
10#define GRIDFORMAT_COMPRESSION_LZMA_HPP_
11#if GRIDFORMAT_HAVE_LZMA
12
13#include <concepts>
14#include <utility>
15#include <vector>
16#include <cassert>
17#include <algorithm>
18#include <tuple>
19#include <cstdint>
20
21#include <lzma.h>
22
23#include <gridformat/common/exceptions.hpp>
24#include <gridformat/common/serialization.hpp>
25#include <gridformat/common/logging.hpp>
26
29
30namespace GridFormat::Compression {
31
34
37 std::size_t block_size = default_block_size;
38 std::uint32_t compression_level = LZMA_PRESET_DEFAULT;
39};
40
42class LZMA {
43 using LZMAByte = std::uint8_t;
44 static_assert(sizeof(typename Serialization::Byte) == sizeof(LZMAByte));
45
46 struct BlockDecompressor {
47 using ByteType = LZMAByte;
48
49 void operator()(std::span<const ByteType> in, std::span<ByteType> out) const {
50 size_t in_pos = 0;
51 size_t out_pos = 0;
52 uint64_t memlim = UINT64_MAX;
53 if (lzma_stream_buffer_decode(
54 &memlim, // No memory limit
55 uint32_t{0}, // Don't use any decoder flags
56 nullptr, // Use default allocators (malloc/free)
57 in.data(), &in_pos, in.size(),
58 out.data(), &out_pos, out.size()) != LZMA_OK)
59 throw IOError("(LZMACompressor) Error upon decompression");
60 }
61 };
62
63 public:
64 using Options = LZMAOptions;
65
66 explicit constexpr LZMA(Options opts = {})
67 : _opts(std::move(opts))
68 {}
69
70 template<std::integral HeaderType = std::size_t>
71 CompressedBlocks<HeaderType> compress(Serialization& in) const {
72 static_assert(sizeof(typename Serialization::Byte) == sizeof(LZMAByte));
73 if (std::numeric_limits<HeaderType>::max() < in.size())
74 throw TypeError("Chosen HeaderType is too small for given number of bytes");
75 if (std::numeric_limits<HeaderType>::max() < _opts.block_size)
76 throw TypeError("Chosen HeaderType is too small for given block size");
77
78 auto [blocks, out] = _compress<HeaderType>(in.template as_span_of<const LZMAByte>());
79 in = std::move(out);
80 in.resize(blocks.compressed_size());
81 return blocks;
82 }
83
84 template<std::integral HeaderType>
85 static void decompress(Serialization& in, const CompressedBlocks<HeaderType>& blocks) {
86 Compression::decompress(in, blocks, BlockDecompressor{});
87 }
88
89 static LZMA with(Options opts) {
90 return LZMA{std::move(opts)};
91 }
92
93 private:
94 template<std::integral HeaderType>
95 auto _compress(std::span<const LZMAByte> in) const {
96 HeaderType block_size = static_cast<HeaderType>(_opts.block_size);
97 HeaderType size_in_bytes = static_cast<HeaderType>(in.size());
98 Blocks<HeaderType> blocks{size_in_bytes, block_size};
99
100 Serialization compressed;
101 std::vector<LZMAByte> block_buffer;
102 std::vector<HeaderType> compressed_block_sizes;
103 block_buffer.reserve(lzma_stream_buffer_bound(_opts.block_size));
104 compressed_block_sizes.reserve(blocks.number_of_blocks);
105 compressed.resize(block_buffer.capacity()*blocks.number_of_blocks);
106
107 HeaderType cur_in = 0;
108 HeaderType cur_out = 0;
109 auto out = compressed.template as_span_of<LZMAByte>();
110 while (cur_in < size_in_bytes) {
111 using std::min;
112 const HeaderType cur_block_size = min(block_size, size_in_bytes - cur_in);
113 assert(cur_in + cur_block_size <= size_in_bytes);
114
115 std::size_t out_pos = 0;
116 const auto lzma_ret = lzma_easy_buffer_encode(
117 _opts.compression_level, LZMA_CHECK_CRC32, nullptr,
118 in.data() + cur_in, cur_block_size,
119 block_buffer.data(), &out_pos, block_buffer.capacity()
120 );
121 if (lzma_ret != LZMA_OK)
122 throw InvalidState(as_error("(LZMACompressor) Error upon compression"));
123
124 assert(cur_out + out_pos <= out.size());
125 std::copy_n(block_buffer.data(),
126 out_pos,
127 out.data() + cur_out);
128 cur_in += cur_block_size;
129 cur_out += out_pos;
130 compressed_block_sizes.push_back(static_cast<HeaderType>(out_pos));
131 }
132
133 if (cur_in != size_in_bytes)
134 throw InvalidState(as_error("(LZMACompressor) unexpected number of bytes processed"));
135
136 return std::make_tuple(
137 CompressedBlocks<HeaderType>{blocks, std::move(compressed_block_sizes)},
138 compressed
139 );
140 }
141
142 Options _opts;
143};
144
145inline constexpr LZMA lzma;
146
147#ifndef DOXYGEN
148namespace Detail { inline constexpr bool _have_lzma = true; }
149#endif // DOXYGEN
150
152
153} // end namespace GridFormat::Compression
154
155#else // GRIDFORMAT_HAVE_LZMA
156
157namespace GridFormat::Compression {
158
159namespace Detail { inline constexpr bool _have_lzma = false; }
160class LZMA {
161 public:
162 template<bool b = false, typename... Args>
163 explicit LZMA(Args&&...) { static_assert(b, "LZMA compressor requires the LZMA library."); }
164};
165
166} // namespace GridFormat::Compression
167
168#endif // GRIDFORMAT_HAVE_LZMA
169#endif // GRIDFORMAT_COMPRESSION_LZMA_HPP_
Compressor using the lzma library.
Definition: lzma.hpp:42
Common classes used in the context of data compression.
Decompress compressed data.
void decompress(Serialization &in, const CompressedBlocks< HeaderType > &blocks, const Decompressor &block_decompressor)
Decompress compressed data.
Definition: decompress.hpp:26
constexpr LZMA lzma
Instance of the lzma compressor.
Definition: lzma.hpp:145
constexpr std::size_t default_block_size
as in VTK (https://gitlab.kitware.com/vtk/vtk/-/blob/65fc526a83ac829628a9462f61fa57f1801e2c7e/IO/XML/...
Definition: common.hpp:23
Stores the block sizes used for compressing the given amount of bytes.
Definition: common.hpp:27
Stores the uncompressed/compressed block sizes after completion of a compression.
Definition: common.hpp:41
Options for the lzma compressor.
Definition: lzma.hpp:36