Line | Branch | Exec | Source |
---|---|---|---|
1 | // SPDX-FileCopyrightText: 2022-2023 Dennis Gläser <dennis.glaeser@iws.uni-stuttgart.de> | ||
2 | // SPDX-License-Identifier: MIT | ||
3 | /*! | ||
4 | * \file | ||
5 | * \ingroup Encoding | ||
6 | * \brief Encoder and stream using base64 | ||
7 | */ | ||
8 | #ifndef GRIDFORMAT_COMMON_ENCODING_BASE64_HPP_ | ||
9 | #define GRIDFORMAT_COMMON_ENCODING_BASE64_HPP_ | ||
10 | |||
11 | #include <array> | ||
12 | #include <vector> | ||
13 | #include <utility> | ||
14 | #include <cassert> | ||
15 | #include <algorithm> | ||
16 | #include <istream> | ||
17 | |||
18 | #include <gridformat/common/exceptions.hpp> | ||
19 | #include <gridformat/common/serialization.hpp> | ||
20 | #include <gridformat/common/istream_helper.hpp> | ||
21 | #include <gridformat/common/output_stream.hpp> | ||
22 | #include <gridformat/common/concepts.hpp> | ||
23 | |||
24 | namespace GridFormat { | ||
25 | |||
26 | #ifndef DOXYGEN | ||
27 | namespace Base64Detail { | ||
28 | |||
29 | static constexpr auto alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; | ||
30 | |||
31 | static constexpr std::array<unsigned char, 256> letter_to_index = [] { | ||
32 | std::array<unsigned char, 256> result; | ||
33 | std::ranges::fill(result, 0); | ||
34 | for (int i = 0; i < 64; ++i) | ||
35 | result[static_cast<unsigned>(alphabet[i])] = i; | ||
36 | return result; | ||
37 | } (); | ||
38 | |||
39 | } // namespace Base64Detail | ||
40 | #endif // DOXYGEN | ||
41 | |||
42 | namespace Base64 { | ||
43 | |||
44 | //! Return the number of decoded bytes for the given number of encoded bytes | ||
45 | ✗ | std::size_t decoded_size(std::size_t encoded_size) { | |
46 | ✗ | if (encoded_size%4 != 0) | |
47 | ✗ | throw SizeError("Given size is not a multiple of 4"); | |
48 | ✗ | return encoded_size*3/4; | |
49 | } | ||
50 | |||
51 | //! Return the number of encoded bytes for the given number of raw bytes | ||
52 | 22891 | std::size_t encoded_size(std::size_t raw_size) { | |
53 | 22891 | return 4*static_cast<std::size_t>( | |
54 | 22891 | std::ceil(static_cast<double>(raw_size)/3.0) | |
55 | 22891 | ); | |
56 | } | ||
57 | |||
58 | } // namespace Base64 | ||
59 | |||
60 | //! \addtogroup Encoding | ||
61 | //! \{ | ||
62 | |||
63 | struct Base64Decoder { | ||
64 | 22891 | Serialization decode_from(std::istream& stream, std::size_t target_num_decoded_bytes) const { | |
65 | 22891 | InputStreamHelper helper{stream}; | |
66 | 22891 | const auto encoded_size = Base64::encoded_size(target_num_decoded_bytes); | |
67 |
2/4✓ Branch 2 taken 22891 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 22891 times.
✗ Branch 6 not taken.
|
45782 | std::string chars = helper.read_until_any_of("=", encoded_size); |
68 |
2/2✓ Branch 1 taken 10546 times.
✓ Branch 2 taken 12345 times.
|
22891 | if (chars.size() != encoded_size) |
69 |
3/6✓ Branch 1 taken 10546 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 10546 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 10546 times.
✗ Branch 8 not taken.
|
21092 | chars += helper.read_until_not_any_of("="); |
70 | |||
71 |
1/2✓ Branch 2 taken 22891 times.
✗ Branch 3 not taken.
|
22891 | Serialization result{chars.size()}; |
72 |
1/2✓ Branch 1 taken 22891 times.
✗ Branch 2 not taken.
|
22891 | auto result_chars = result.template as_span_of<char>(); |
73 |
1/2✓ Branch 3 taken 22891 times.
✗ Branch 4 not taken.
|
22891 | std::ranges::move(std::move(chars), result_chars.begin()); |
74 |
2/4✓ Branch 1 taken 22891 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 22891 times.
✗ Branch 5 not taken.
|
22891 | result.resize(decode(result_chars)); |
75 | 22891 | return result; | |
76 | 22891 | } | |
77 | |||
78 | template<std::size_t s> | ||
79 | 22892 | std::size_t decode(std::span<char, s> chars) const { | |
80 |
2/2✓ Branch 1 taken 225 times.
✓ Branch 2 taken 22667 times.
|
22892 | if (chars.size() == 0) |
81 | 225 | return 0; | |
82 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 22667 times.
|
22667 | if (chars.size()%4 != 0) |
83 | ✗ | throw SizeError("Buffer size is not a multiple of 4"); | |
84 | |||
85 | 22667 | std::size_t in_offset = 0; | |
86 | 22667 | std::size_t out_offset = 0; | |
87 |
2/2✓ Branch 1 taken 558135 times.
✓ Branch 2 taken 22667 times.
|
580802 | while (in_offset < chars.size()) { |
88 |
1/2✓ Branch 1 taken 558135 times.
✗ Branch 2 not taken.
|
558135 | std::ranges::copy( |
89 | 558135 | _decode_triplet(chars.data() + in_offset), | |
90 | 558135 | chars.data() + out_offset | |
91 | ); | ||
92 | 558135 | in_offset += 4; | |
93 | 558135 | out_offset += 3; | |
94 | } | ||
95 | |||
96 | 22667 | const auto end_chars = chars.subspan(chars.size() - 3); | |
97 | 22667 | std::string_view end_str{end_chars.data(), end_chars.size()}; | |
98 |
1/2✓ Branch 1 taken 22667 times.
✗ Branch 2 not taken.
|
22667 | const auto num_padding_chars = std::ranges::count(end_str, '='); |
99 | 22667 | return out_offset - (num_padding_chars > 0 ? num_padding_chars : 0); | |
100 | } | ||
101 | |||
102 | private: | ||
103 | 558135 | std::array<char, 3> _decode_triplet(const char* in) const { | |
104 | using Base64Detail::letter_to_index; | ||
105 | std::array<char, 3> result; | ||
106 | 558135 | result[0] = ((letter_to_index[in[0]] & 0b0011'1111) << 2) | ((letter_to_index[in[1]] & 0b0011'0000) >> 4); | |
107 | 558135 | result[1] = ((letter_to_index[in[1]] & 0b0000'1111) << 4) | ((letter_to_index[in[2]] & 0b0011'1100) >> 2); | |
108 | 558135 | result[2] = ((letter_to_index[in[2]] & 0b0000'0011) << 6) | ((letter_to_index[in[3]] & 0b0011'1111)); | |
109 | 558135 | return result; | |
110 | }; | ||
111 | }; | ||
112 | |||
113 | //! Options for formatted output of ranges with base64 encoding | ||
114 | struct Base64EncoderOptions { | ||
115 | std::size_t num_cached_buffers = 4000; //!< Number of triplets cached between write operations | ||
116 | }; | ||
117 | |||
118 | //! Wrapper around a given stream to write output encoded with base64 | ||
119 | template<typename OStream> | ||
120 | class Base64Stream : public OutputStreamWrapperBase<OStream> { | ||
121 | using Byte = char; | ||
122 | static_assert(sizeof(std::byte) == sizeof(Byte)); | ||
123 | static constexpr int buffer_size = 3; | ||
124 | static constexpr int encoded_buffer_size = 4; | ||
125 | |||
126 | // Top 6 bits of byte 0 | ||
127 | 5963634 | inline Byte _encode_sextet_0(const Byte* buffer) const { | |
128 | 5963634 | return Base64Detail::alphabet[((buffer[0] & 0b1111'1100) >> 2)]; | |
129 | } | ||
130 | // Bottom 2 bits of byte 0, Top 4 bits of byte 1 | ||
131 | 5963634 | inline Byte _encode_sextet_1(const Byte* buffer) const { | |
132 | 5963634 | return Base64Detail::alphabet[((buffer[0] & 0b0000'0011) << 4) | |
133 | 5963634 | | ((buffer[1] & 0b1111'0000) >> 4)]; | |
134 | } | ||
135 | // Bottom 4 bits of byte 1, Top 2 bits of byte 2 | ||
136 | 5936124 | inline Byte _encode_sextet_2(const Byte* buffer) const { | |
137 | 5936124 | return Base64Detail::alphabet[((buffer[1] & 0b0000'1111) << 2) | |
138 | 5936124 | | ((buffer[2] & 0b1100'0000) >> 6)]; | |
139 | } | ||
140 | // Bottom 6 bits of byte 2 | ||
141 | 5848386 | inline Byte _encode_sextet_3(const Byte* buffer) const { | |
142 | 5848386 | return Base64Detail::alphabet[(buffer[2] & 0b0011'1111)]; | |
143 | } | ||
144 | |||
145 | public: | ||
146 | 88105 | explicit Base64Stream(OStream& s, Base64EncoderOptions opts = {}) | |
147 | : OutputStreamWrapperBase<OStream>(s) | ||
148 | 88105 | , _opts{std::move(opts)} | |
149 | 88105 | {} | |
150 | |||
151 | template<typename T, std::size_t size> | ||
152 | 294566 | void write(std::span<T, size> data) { | |
153 | 294566 | auto byte_span = std::as_bytes(data); | |
154 | 294566 | const Byte* bytes = reinterpret_cast<const Byte*>(byte_span.data()); | |
155 |
1/2✓ Branch 2 taken 147285 times.
✗ Branch 3 not taken.
|
294566 | _write(bytes, byte_span.size()); |
156 | 294566 | } | |
157 | |||
158 | private: | ||
159 | 294436 | std::size_t _cache_size_in() const { return _opts.num_cached_buffers*buffer_size; } | |
160 | 163161 | std::size_t _cache_size_out() const { return _opts.num_cached_buffers*encoded_buffer_size; } | |
161 | |||
162 | 147285 | void _write(const Byte* data, std::size_t size) { | |
163 | 147285 | const auto num_full_buffers = size/buffer_size; | |
164 | 147285 | const auto num_full_caches = num_full_buffers/_opts.num_cached_buffers; | |
165 |
5/8✓ Branch 1 taken 147285 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 147285 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 147285 times.
✗ Branch 8 not taken.
✓ Branch 12 taken 7 times.
✓ Branch 13 taken 147285 times.
|
147292 | for (const auto i : std::views::iota(std::size_t{0}, num_full_caches)) |
166 |
1/2✓ Branch 2 taken 7 times.
✗ Branch 3 not taken.
|
7 | _flush_full_cache(data + i*_cache_size_in()); |
167 | |||
168 | 147285 | const auto processed_bytes = num_full_caches*_cache_size_in(); | |
169 |
2/2✓ Branch 0 taken 147144 times.
✓ Branch 1 taken 141 times.
|
147285 | if (size > processed_bytes) |
170 |
1/2✓ Branch 1 taken 147144 times.
✗ Branch 2 not taken.
|
147144 | _flush_cache(data + processed_bytes, size - processed_bytes); |
171 | 147285 | } | |
172 | |||
173 | 7 | void _flush_full_cache(const Byte* data) { | |
174 |
1/2✓ Branch 2 taken 7 times.
✗ Branch 3 not taken.
|
7 | std::vector<Byte> cache(_cache_size_out()); |
175 |
2/2✓ Branch 1 taken 16003 times.
✓ Branch 2 taken 7 times.
|
16010 | for (std::size_t i = 0; i < _cache_size_out()/encoded_buffer_size; ++i) { |
176 | 16003 | const std::size_t in_offset = i*buffer_size; | |
177 | 16003 | const std::size_t out_offset = i*encoded_buffer_size; | |
178 | 16003 | cache[out_offset + 0] = _encode_sextet_0(data + in_offset); | |
179 | 16003 | cache[out_offset + 1] = _encode_sextet_1(data + in_offset); | |
180 | 16003 | cache[out_offset + 2] = _encode_sextet_2(data + in_offset); | |
181 | 16003 | cache[out_offset + 3] = _encode_sextet_3(data + in_offset); | |
182 | } | ||
183 |
1/2✓ Branch 2 taken 7 times.
✗ Branch 3 not taken.
|
7 | this->_stream.write(std::span{cache}); |
184 | 7 | } | |
185 | |||
186 | 147144 | void _flush_cache(const Byte* data, std::size_t num_bytes_in) { | |
187 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 147144 times.
|
147144 | if (num_bytes_in == 0) |
188 | ✗ | return; | |
189 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 147144 times.
|
147144 | if (num_bytes_in > _cache_size_in()) |
190 | ✗ | throw SizeError("Number of bytes cannot be larger than cache size"); | |
191 | |||
192 | 147144 | const std::size_t num_full_buffers = num_bytes_in/buffer_size; | |
193 | 147144 | const std::size_t residual = num_bytes_in%buffer_size; | |
194 | |||
195 |
1/2✓ Branch 2 taken 147144 times.
✗ Branch 3 not taken.
|
147144 | std::vector<Byte> cache(_cache_size_out()); |
196 |
2/2✓ Branch 0 taken 5832383 times.
✓ Branch 1 taken 147144 times.
|
5979527 | for (std::size_t i = 0; i < num_full_buffers; ++i) { |
197 | 5832383 | const std::size_t in_offset = i*buffer_size; | |
198 | 5832383 | const std::size_t out_offset = i*encoded_buffer_size; | |
199 | 5832383 | cache[out_offset + 0] = _encode_sextet_0(data + in_offset); | |
200 | 5832383 | cache[out_offset + 1] = _encode_sextet_1(data + in_offset); | |
201 | 5832383 | cache[out_offset + 2] = _encode_sextet_2(data + in_offset); | |
202 | 5832383 | cache[out_offset + 3] = _encode_sextet_3(data + in_offset); | |
203 | } | ||
204 | |||
205 | 147144 | const std::size_t in_offset = num_full_buffers*buffer_size; | |
206 | 147144 | const std::size_t out_offset = num_full_buffers*encoded_buffer_size; | |
207 |
2/2✓ Branch 0 taken 115248 times.
✓ Branch 1 taken 31896 times.
|
147144 | if (residual > 0) { |
208 | 345744 | Byte last_buffer[buffer_size] = { | |
209 | 115248 | *(data + in_offset), | |
210 |
2/2✓ Branch 0 taken 87738 times.
✓ Branch 1 taken 27510 times.
|
115248 | residual > 1 ? *(data + in_offset + 1) : Byte{0}, |
211 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 115248 times.
|
115248 | residual > 2 ? *(data + in_offset + 2) : Byte{0} |
212 | }; | ||
213 | 115248 | cache[out_offset] = _encode_sextet_0(last_buffer); | |
214 | 115248 | cache[out_offset + 1] = _encode_sextet_1(last_buffer); | |
215 |
2/2✓ Branch 0 taken 87738 times.
✓ Branch 1 taken 27510 times.
|
115248 | cache[out_offset + 2] = residual > 1 ? _encode_sextet_2(last_buffer) : '='; |
216 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 115248 times.
|
115248 | cache[out_offset + 3] = residual > 2 ? _encode_sextet_3(last_buffer) : '='; |
217 |
1/2✓ Branch 3 taken 115248 times.
✗ Branch 4 not taken.
|
115248 | this->_stream.write(std::span{cache.data(), out_offset + 4}); |
218 | } else { | ||
219 |
1/2✓ Branch 3 taken 31896 times.
✗ Branch 4 not taken.
|
31896 | this->_stream.write(std::span{cache.data(), out_offset}); |
220 | } | ||
221 | 147144 | } | |
222 | |||
223 | Base64EncoderOptions _opts; | ||
224 | }; | ||
225 | |||
226 | //! \} group Encoding | ||
227 | |||
228 | } // namespace GridFormat | ||
229 | |||
230 | namespace GridFormat::Encoding { | ||
231 | |||
232 | //! \addtogroup Encoding | ||
233 | //! \{ | ||
234 | |||
235 | //! Base64 encoder | ||
236 | struct Base64 { | ||
237 | //! Return a base64 stream | ||
238 | template<typename Stream> | ||
239 | 88105 | constexpr auto operator()(Stream& s) const noexcept { | |
240 | 88105 | return Base64Stream{s, _opts}; | |
241 | } | ||
242 | |||
243 | //! Return an encoder instance with different options | ||
244 | 1 | constexpr auto operator()(Base64EncoderOptions opts) const { | |
245 | 1 | Base64 other; | |
246 | 1 | other._opts = std::move(opts); | |
247 | 1 | return other; | |
248 | } | ||
249 | |||
250 | //! Return a base64 encoder with the given options | ||
251 | 1 | static Base64 with(Base64EncoderOptions opts) { | |
252 | 1 | Base64 enc; | |
253 | 1 | enc._opts = std::move(opts); | |
254 | 1 | return enc; | |
255 | } | ||
256 | |||
257 | private: | ||
258 | Base64EncoderOptions _opts = {}; | ||
259 | }; | ||
260 | |||
261 | //! Instance of the base64 encoder | ||
262 | inline constexpr Base64 base64; | ||
263 | |||
264 | //! \} group Encoding | ||
265 | |||
266 | } // namespace GridFormat::Encoding | ||
267 | |||
268 | #endif // GRIDFORMAT_COMMON_ENCODING_BASE64_HPP_ | ||
269 |