GCC Code Coverage Report


Directory: gridformat/
File: gridformat/encoding/base64.hpp
Date: 2024-11-10 16:24:00
Exec Total Coverage
Lines: 114 121 94.2%
Functions: 39 46 84.8%
Branches: 48 84 57.1%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2022-2023 Dennis Gläser <dennis.glaeser@iws.uni-stuttgart.de>
2 // SPDX-License-Identifier: MIT
3 /*!
4 * \file
5 * \ingroup Encoding
6 * \brief Encoder and stream using base64
7 */
8 #ifndef GRIDFORMAT_COMMON_ENCODING_BASE64_HPP_
9 #define GRIDFORMAT_COMMON_ENCODING_BASE64_HPP_
10
11 #include <array>
12 #include <vector>
13 #include <utility>
14 #include <cassert>
15 #include <algorithm>
16 #include <istream>
17
18 #include <gridformat/common/exceptions.hpp>
19 #include <gridformat/common/serialization.hpp>
20 #include <gridformat/common/istream_helper.hpp>
21 #include <gridformat/common/output_stream.hpp>
22 #include <gridformat/common/concepts.hpp>
23
24 namespace GridFormat {
25
26 #ifndef DOXYGEN
27 namespace Base64Detail {
28
29 static constexpr auto alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
30
31 static constexpr std::array<unsigned char, 256> letter_to_index = [] {
32 std::array<unsigned char, 256> result;
33 std::ranges::fill(result, 0);
34 for (int i = 0; i < 64; ++i)
35 result[static_cast<unsigned>(alphabet[i])] = i;
36 return result;
37 } ();
38
39 } // namespace Base64Detail
40 #endif // DOXYGEN
41
42 namespace Base64 {
43
44 //! Return the number of decoded bytes for the given number of encoded bytes
45 std::size_t decoded_size(std::size_t encoded_size) {
46 if (encoded_size%4 != 0)
47 throw SizeError("Given size is not a multiple of 4");
48 return encoded_size*3/4;
49 }
50
51 //! Return the number of encoded bytes for the given number of raw bytes
52 22891 std::size_t encoded_size(std::size_t raw_size) {
53 22891 return 4*static_cast<std::size_t>(
54 22891 std::ceil(static_cast<double>(raw_size)/3.0)
55 22891 );
56 }
57
58 } // namespace Base64
59
60 //! \addtogroup Encoding
61 //! \{
62
63 struct Base64Decoder {
64 22891 Serialization decode_from(std::istream& stream, std::size_t target_num_decoded_bytes) const {
65 22891 InputStreamHelper helper{stream};
66 22891 const auto encoded_size = Base64::encoded_size(target_num_decoded_bytes);
67
2/4
✓ Branch 2 taken 22891 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 22891 times.
✗ Branch 6 not taken.
45782 std::string chars = helper.read_until_any_of("=", encoded_size);
68
2/2
✓ Branch 1 taken 10347 times.
✓ Branch 2 taken 12544 times.
22891 if (chars.size() != encoded_size)
69
3/6
✓ Branch 1 taken 10347 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 10347 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 10347 times.
✗ Branch 8 not taken.
20694 chars += helper.read_until_not_any_of("=");
70
71
1/2
✓ Branch 2 taken 22891 times.
✗ Branch 3 not taken.
22891 Serialization result{chars.size()};
72
1/2
✓ Branch 1 taken 22891 times.
✗ Branch 2 not taken.
22891 auto result_chars = result.template as_span_of<char>();
73
1/2
✓ Branch 3 taken 22891 times.
✗ Branch 4 not taken.
22891 std::ranges::move(std::move(chars), result_chars.begin());
74
2/4
✓ Branch 1 taken 22891 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 22891 times.
✗ Branch 5 not taken.
22891 result.resize(decode(result_chars));
75 22891 return result;
76 22891 }
77
78 template<std::size_t s>
79 22892 std::size_t decode(std::span<char, s> chars) const {
80
2/2
✓ Branch 1 taken 225 times.
✓ Branch 2 taken 22667 times.
22892 if (chars.size() == 0)
81 225 return 0;
82
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 22667 times.
22667 if (chars.size()%4 != 0)
83 throw SizeError("Buffer size is not a multiple of 4");
84
85 22667 std::size_t in_offset = 0;
86 22667 std::size_t out_offset = 0;
87
2/2
✓ Branch 1 taken 557079 times.
✓ Branch 2 taken 22667 times.
579746 while (in_offset < chars.size()) {
88
1/2
✓ Branch 1 taken 557079 times.
✗ Branch 2 not taken.
557079 std::ranges::copy(
89 557079 _decode_triplet(chars.data() + in_offset),
90 557079 chars.data() + out_offset
91 );
92 557079 in_offset += 4;
93 557079 out_offset += 3;
94 }
95
96 22667 const auto end_chars = chars.subspan(chars.size() - 3);
97 22667 std::string_view end_str{end_chars.data(), end_chars.size()};
98
1/2
✓ Branch 1 taken 22667 times.
✗ Branch 2 not taken.
22667 const auto num_padding_chars = std::ranges::count(end_str, '=');
99 22667 return out_offset - (num_padding_chars > 0 ? num_padding_chars : 0);
100 }
101
102 private:
103 557079 std::array<char, 3> _decode_triplet(const char* in) const {
104 using Base64Detail::letter_to_index;
105 std::array<char, 3> result;
106 557079 result[0] = ((letter_to_index[in[0]] & 0b0011'1111) << 2) | ((letter_to_index[in[1]] & 0b0011'0000) >> 4);
107 557079 result[1] = ((letter_to_index[in[1]] & 0b0000'1111) << 4) | ((letter_to_index[in[2]] & 0b0011'1100) >> 2);
108 557079 result[2] = ((letter_to_index[in[2]] & 0b0000'0011) << 6) | ((letter_to_index[in[3]] & 0b0011'1111));
109 557079 return result;
110 };
111 };
112
113 //! Options for formatted output of ranges with base64 encoding
114 struct Base64EncoderOptions {
115 std::size_t num_cached_buffers = 4000; //!< Number of triplets cached between write operations
116 };
117
118 //! Wrapper around a given stream to write output encoded with base64
119 template<typename OStream>
120 class Base64Stream : public OutputStreamWrapperBase<OStream> {
121 using Byte = char;
122 static_assert(sizeof(std::byte) == sizeof(Byte));
123 static constexpr int buffer_size = 3;
124 static constexpr int encoded_buffer_size = 4;
125
126 // Top 6 bits of byte 0
127 5963403 inline Byte _encode_sextet_0(const Byte* buffer) const {
128 5963403 return Base64Detail::alphabet[((buffer[0] & 0b1111'1100) >> 2)];
129 }
130 // Bottom 2 bits of byte 0, Top 4 bits of byte 1
131 5963403 inline Byte _encode_sextet_1(const Byte* buffer) const {
132 5963403 return Base64Detail::alphabet[((buffer[0] & 0b0000'0011) << 4)
133 5963403 | ((buffer[1] & 0b1111'0000) >> 4)];
134 }
135 // Bottom 4 bits of byte 1, Top 2 bits of byte 2
136 5935877 inline Byte _encode_sextet_2(const Byte* buffer) const {
137 5935877 return Base64Detail::alphabet[((buffer[1] & 0b0000'1111) << 2)
138 5935877 | ((buffer[2] & 0b1100'0000) >> 6)];
139 }
140 // Bottom 6 bits of byte 2
141 5848182 inline Byte _encode_sextet_3(const Byte* buffer) const {
142 5848182 return Base64Detail::alphabet[(buffer[2] & 0b0011'1111)];
143 }
144
145 public:
146 88105 explicit Base64Stream(OStream& s, Base64EncoderOptions opts = {})
147 : OutputStreamWrapperBase<OStream>(s)
148 88105 , _opts{std::move(opts)}
149 88105 {}
150
151 template<typename T, std::size_t size>
152 294566 void write(std::span<T, size> data) {
153 294566 auto byte_span = std::as_bytes(data);
154 294566 const Byte* bytes = reinterpret_cast<const Byte*>(byte_span.data());
155
1/2
✓ Branch 2 taken 147285 times.
✗ Branch 3 not taken.
294566 _write(bytes, byte_span.size());
156 294566 }
157
158 private:
159 294436 std::size_t _cache_size_in() const { return _opts.num_cached_buffers*buffer_size; }
160 163161 std::size_t _cache_size_out() const { return _opts.num_cached_buffers*encoded_buffer_size; }
161
162 147285 void _write(const Byte* data, std::size_t size) {
163 147285 const auto num_full_buffers = size/buffer_size;
164 147285 const auto num_full_caches = num_full_buffers/_opts.num_cached_buffers;
165
5/8
✓ Branch 1 taken 147285 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 147285 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 147285 times.
✗ Branch 8 not taken.
✓ Branch 12 taken 7 times.
✓ Branch 13 taken 147285 times.
147292 for (const auto i : std::views::iota(std::size_t{0}, num_full_caches))
166
1/2
✓ Branch 2 taken 7 times.
✗ Branch 3 not taken.
7 _flush_full_cache(data + i*_cache_size_in());
167
168 147285 const auto processed_bytes = num_full_caches*_cache_size_in();
169
2/2
✓ Branch 0 taken 147144 times.
✓ Branch 1 taken 141 times.
147285 if (size > processed_bytes)
170
1/2
✓ Branch 1 taken 147144 times.
✗ Branch 2 not taken.
147144 _flush_cache(data + processed_bytes, size - processed_bytes);
171 147285 }
172
173 7 void _flush_full_cache(const Byte* data) {
174
1/2
✓ Branch 2 taken 7 times.
✗ Branch 3 not taken.
7 std::vector<Byte> cache(_cache_size_out());
175
2/2
✓ Branch 1 taken 16003 times.
✓ Branch 2 taken 7 times.
16010 for (std::size_t i = 0; i < _cache_size_out()/encoded_buffer_size; ++i) {
176 16003 const std::size_t in_offset = i*buffer_size;
177 16003 const std::size_t out_offset = i*encoded_buffer_size;
178 16003 cache[out_offset + 0] = _encode_sextet_0(data + in_offset);
179 16003 cache[out_offset + 1] = _encode_sextet_1(data + in_offset);
180 16003 cache[out_offset + 2] = _encode_sextet_2(data + in_offset);
181 16003 cache[out_offset + 3] = _encode_sextet_3(data + in_offset);
182 }
183
1/2
✓ Branch 2 taken 7 times.
✗ Branch 3 not taken.
7 this->_stream.write(std::span{cache});
184 7 }
185
186 147144 void _flush_cache(const Byte* data, std::size_t num_bytes_in) {
187
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 147144 times.
147144 if (num_bytes_in == 0)
188 return;
189
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 147144 times.
147144 if (num_bytes_in > _cache_size_in())
190 throw SizeError("Number of bytes cannot be larger than cache size");
191
192 147144 const std::size_t num_full_buffers = num_bytes_in/buffer_size;
193 147144 const std::size_t residual = num_bytes_in%buffer_size;
194
195
1/2
✓ Branch 2 taken 147144 times.
✗ Branch 3 not taken.
147144 std::vector<Byte> cache(_cache_size_out());
196
2/2
✓ Branch 0 taken 5832179 times.
✓ Branch 1 taken 147144 times.
5979323 for (std::size_t i = 0; i < num_full_buffers; ++i) {
197 5832179 const std::size_t in_offset = i*buffer_size;
198 5832179 const std::size_t out_offset = i*encoded_buffer_size;
199 5832179 cache[out_offset + 0] = _encode_sextet_0(data + in_offset);
200 5832179 cache[out_offset + 1] = _encode_sextet_1(data + in_offset);
201 5832179 cache[out_offset + 2] = _encode_sextet_2(data + in_offset);
202 5832179 cache[out_offset + 3] = _encode_sextet_3(data + in_offset);
203 }
204
205 147144 const std::size_t in_offset = num_full_buffers*buffer_size;
206 147144 const std::size_t out_offset = num_full_buffers*encoded_buffer_size;
207
2/2
✓ Branch 0 taken 115221 times.
✓ Branch 1 taken 31923 times.
147144 if (residual > 0) {
208 345663 Byte last_buffer[buffer_size] = {
209 115221 *(data + in_offset),
210
2/2
✓ Branch 0 taken 87695 times.
✓ Branch 1 taken 27526 times.
115221 residual > 1 ? *(data + in_offset + 1) : Byte{0},
211
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 115221 times.
115221 residual > 2 ? *(data + in_offset + 2) : Byte{0}
212 };
213 115221 cache[out_offset] = _encode_sextet_0(last_buffer);
214 115221 cache[out_offset + 1] = _encode_sextet_1(last_buffer);
215
2/2
✓ Branch 0 taken 87695 times.
✓ Branch 1 taken 27526 times.
115221 cache[out_offset + 2] = residual > 1 ? _encode_sextet_2(last_buffer) : '=';
216
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 115221 times.
115221 cache[out_offset + 3] = residual > 2 ? _encode_sextet_3(last_buffer) : '=';
217
1/2
✓ Branch 3 taken 115221 times.
✗ Branch 4 not taken.
115221 this->_stream.write(std::span{cache.data(), out_offset + 4});
218 } else {
219
1/2
✓ Branch 3 taken 31923 times.
✗ Branch 4 not taken.
31923 this->_stream.write(std::span{cache.data(), out_offset});
220 }
221 147144 }
222
223 Base64EncoderOptions _opts;
224 };
225
226 //! \} group Encoding
227
228 } // namespace GridFormat
229
230 namespace GridFormat::Encoding {
231
232 //! \addtogroup Encoding
233 //! \{
234
235 //! Base64 encoder
236 struct Base64 {
237 //! Return a base64 stream
238 template<typename Stream>
239 88105 constexpr auto operator()(Stream& s) const noexcept {
240 88105 return Base64Stream{s, _opts};
241 }
242
243 //! Return an encoder instance with different options
244 1 constexpr auto operator()(Base64EncoderOptions opts) const {
245 1 Base64 other;
246 1 other._opts = std::move(opts);
247 1 return other;
248 }
249
250 //! Return a base64 encoder with the given options
251 1 static Base64 with(Base64EncoderOptions opts) {
252 1 Base64 enc;
253 1 enc._opts = std::move(opts);
254 1 return enc;
255 }
256
257 private:
258 Base64EncoderOptions _opts = {};
259 };
260
261 //! Instance of the base64 encoder
262 inline constexpr Base64 base64;
263
264 //! \} group Encoding
265
266 } // namespace GridFormat::Encoding
267
268 #endif // GRIDFORMAT_COMMON_ENCODING_BASE64_HPP_
269