| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | // SPDX-FileCopyrightText: 2022-2023 Dennis Gläser <dennis.glaeser@iws.uni-stuttgart.de> | ||
| 2 | // SPDX-License-Identifier: MIT | ||
| 3 | /*! | ||
| 4 | * \file | ||
| 5 | * \ingroup Encoding | ||
| 6 | * \brief Encoder and stream using base64 | ||
| 7 | */ | ||
| 8 | #ifndef GRIDFORMAT_COMMON_ENCODING_BASE64_HPP_ | ||
| 9 | #define GRIDFORMAT_COMMON_ENCODING_BASE64_HPP_ | ||
| 10 | |||
| 11 | #include <array> | ||
| 12 | #include <vector> | ||
| 13 | #include <utility> | ||
| 14 | #include <cassert> | ||
| 15 | #include <algorithm> | ||
| 16 | #include <istream> | ||
| 17 | |||
| 18 | #include <gridformat/common/exceptions.hpp> | ||
| 19 | #include <gridformat/common/serialization.hpp> | ||
| 20 | #include <gridformat/common/istream_helper.hpp> | ||
| 21 | #include <gridformat/common/output_stream.hpp> | ||
| 22 | #include <gridformat/common/concepts.hpp> | ||
| 23 | |||
| 24 | namespace GridFormat { | ||
| 25 | |||
| 26 | #ifndef DOXYGEN | ||
| 27 | namespace Base64Detail { | ||
| 28 | |||
| 29 | static constexpr auto alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; | ||
| 30 | |||
| 31 | static constexpr std::array<unsigned char, 256> letter_to_index = [] { | ||
| 32 | std::array<unsigned char, 256> result; | ||
| 33 | std::ranges::fill(result, 0); | ||
| 34 | for (int i = 0; i < 64; ++i) | ||
| 35 | result[static_cast<unsigned>(alphabet[i])] = i; | ||
| 36 | return result; | ||
| 37 | } (); | ||
| 38 | |||
| 39 | } // namespace Base64Detail | ||
| 40 | #endif // DOXYGEN | ||
| 41 | |||
| 42 | namespace Base64 { | ||
| 43 | |||
| 44 | //! Return the number of decoded bytes for the given number of encoded bytes | ||
| 45 | ✗ | std::size_t decoded_size(std::size_t encoded_size) { | |
| 46 | ✗ | if (encoded_size%4 != 0) | |
| 47 | ✗ | throw SizeError("Given size is not a multiple of 4"); | |
| 48 | ✗ | return encoded_size*3/4; | |
| 49 | } | ||
| 50 | |||
| 51 | //! Return the number of encoded bytes for the given number of raw bytes | ||
| 52 | 22897 | std::size_t encoded_size(std::size_t raw_size) { | |
| 53 | 22897 | return 4*static_cast<std::size_t>( | |
| 54 | 22897 | std::ceil(static_cast<double>(raw_size)/3.0) | |
| 55 | 22897 | ); | |
| 56 | } | ||
| 57 | |||
| 58 | } // namespace Base64 | ||
| 59 | |||
| 60 | //! \addtogroup Encoding | ||
| 61 | //! \{ | ||
| 62 | |||
| 63 | struct Base64Decoder { | ||
| 64 | 22897 | Serialization decode_from(std::istream& stream, std::size_t target_num_decoded_bytes) const { | |
| 65 | 22897 | InputStreamHelper helper{stream}; | |
| 66 | 22897 | const auto encoded_size = Base64::encoded_size(target_num_decoded_bytes); | |
| 67 |
2/4✓ Branch 2 taken 22897 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 22897 times.
✗ Branch 6 not taken.
|
45794 | std::string chars = helper.read_until_any_of("=", encoded_size); |
| 68 |
2/2✓ Branch 1 taken 10524 times.
✓ Branch 2 taken 12373 times.
|
22897 | if (chars.size() != encoded_size) |
| 69 |
3/6✓ Branch 1 taken 10524 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 10524 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 10524 times.
✗ Branch 8 not taken.
|
21048 | chars += helper.read_until_not_any_of("="); |
| 70 | |||
| 71 |
1/2✓ Branch 2 taken 22897 times.
✗ Branch 3 not taken.
|
22897 | Serialization result{chars.size()}; |
| 72 |
1/2✓ Branch 1 taken 22897 times.
✗ Branch 2 not taken.
|
22897 | auto result_chars = result.template as_span_of<char>(); |
| 73 |
1/2✓ Branch 3 taken 22897 times.
✗ Branch 4 not taken.
|
22897 | std::ranges::move(std::move(chars), result_chars.begin()); |
| 74 |
2/4✓ Branch 1 taken 22897 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 22897 times.
✗ Branch 5 not taken.
|
22897 | result.resize(decode(result_chars)); |
| 75 | 22897 | return result; | |
| 76 | 22897 | } | |
| 77 | |||
| 78 | template<std::size_t s> | ||
| 79 | 22898 | std::size_t decode(std::span<char, s> chars) const { | |
| 80 |
2/2✓ Branch 1 taken 225 times.
✓ Branch 2 taken 22673 times.
|
22898 | if (chars.size() == 0) |
| 81 | 225 | return 0; | |
| 82 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 22673 times.
|
22673 | if (chars.size()%4 != 0) |
| 83 | ✗ | throw SizeError("Buffer size is not a multiple of 4"); | |
| 84 | |||
| 85 | 22673 | std::size_t in_offset = 0; | |
| 86 | 22673 | std::size_t out_offset = 0; | |
| 87 |
2/2✓ Branch 1 taken 557852 times.
✓ Branch 2 taken 22673 times.
|
580525 | while (in_offset < chars.size()) { |
| 88 |
1/2✓ Branch 1 taken 557852 times.
✗ Branch 2 not taken.
|
557852 | std::ranges::copy( |
| 89 | 557852 | _decode_triplet(chars.data() + in_offset), | |
| 90 | 557852 | chars.data() + out_offset | |
| 91 | ); | ||
| 92 | 557852 | in_offset += 4; | |
| 93 | 557852 | out_offset += 3; | |
| 94 | } | ||
| 95 | |||
| 96 | 22673 | const auto end_chars = chars.subspan(chars.size() - 3); | |
| 97 | 22673 | std::string_view end_str{end_chars.data(), end_chars.size()}; | |
| 98 |
1/2✓ Branch 1 taken 22673 times.
✗ Branch 2 not taken.
|
22673 | const auto num_padding_chars = std::ranges::count(end_str, '='); |
| 99 | 22673 | return out_offset - (num_padding_chars > 0 ? num_padding_chars : 0); | |
| 100 | } | ||
| 101 | |||
| 102 | private: | ||
| 103 | 557852 | std::array<char, 3> _decode_triplet(const char* in) const { | |
| 104 | using Base64Detail::letter_to_index; | ||
| 105 | std::array<char, 3> result; | ||
| 106 | 557852 | result[0] = ((letter_to_index[in[0]] & 0b0011'1111) << 2) | ((letter_to_index[in[1]] & 0b0011'0000) >> 4); | |
| 107 | 557852 | result[1] = ((letter_to_index[in[1]] & 0b0000'1111) << 4) | ((letter_to_index[in[2]] & 0b0011'1100) >> 2); | |
| 108 | 557852 | result[2] = ((letter_to_index[in[2]] & 0b0000'0011) << 6) | ((letter_to_index[in[3]] & 0b0011'1111)); | |
| 109 | 557852 | return result; | |
| 110 | }; | ||
| 111 | }; | ||
| 112 | |||
| 113 | //! Options for formatted output of ranges with base64 encoding | ||
| 114 | struct Base64EncoderOptions { | ||
| 115 | std::size_t num_cached_buffers = 4000; //!< Number of triplets cached between write operations | ||
| 116 | }; | ||
| 117 | |||
| 118 | //! Wrapper around a given stream to write output encoded with base64 | ||
| 119 | template<typename OStream> | ||
| 120 | class Base64Stream : public OutputStreamWrapperBase<OStream> { | ||
| 121 | using Byte = char; | ||
| 122 | static_assert(sizeof(std::byte) == sizeof(Byte)); | ||
| 123 | static constexpr int buffer_size = 3; | ||
| 124 | static constexpr int encoded_buffer_size = 4; | ||
| 125 | |||
| 126 | // Top 6 bits of byte 0 | ||
| 127 | 5982236 | inline Byte _encode_sextet_0(const Byte* buffer) const { | |
| 128 | 5982236 | return Base64Detail::alphabet[((buffer[0] & 0b1111'1100) >> 2)]; | |
| 129 | } | ||
| 130 | // Bottom 2 bits of byte 0, Top 4 bits of byte 1 | ||
| 131 | 5982236 | inline Byte _encode_sextet_1(const Byte* buffer) const { | |
| 132 | 5982236 | return Base64Detail::alphabet[((buffer[0] & 0b0000'0011) << 4) | |
| 133 | 5982236 | | ((buffer[1] & 0b1111'0000) >> 4)]; | |
| 134 | } | ||
| 135 | // Bottom 4 bits of byte 1, Top 2 bits of byte 2 | ||
| 136 | 5954369 | inline Byte _encode_sextet_2(const Byte* buffer) const { | |
| 137 | 5954369 | return Base64Detail::alphabet[((buffer[1] & 0b0000'1111) << 2) | |
| 138 | 5954369 | | ((buffer[2] & 0b1100'0000) >> 6)]; | |
| 139 | } | ||
| 140 | // Bottom 6 bits of byte 2 | ||
| 141 | 5865670 | inline Byte _encode_sextet_3(const Byte* buffer) const { | |
| 142 | 5865670 | return Base64Detail::alphabet[(buffer[2] & 0b0011'1111)]; | |
| 143 | } | ||
| 144 | |||
| 145 | public: | ||
| 146 | 89101 | explicit Base64Stream(OStream& s, Base64EncoderOptions opts = {}) | |
| 147 | : OutputStreamWrapperBase<OStream>(s) | ||
| 148 | 89101 | , _opts{std::move(opts)} | |
| 149 | 89101 | {} | |
| 150 | |||
| 151 | template<typename T, std::size_t size> | ||
| 152 | 297830 | void write(std::span<T, size> data) { | |
| 153 | 297830 | auto byte_span = std::as_bytes(data); | |
| 154 | 297830 | const Byte* bytes = reinterpret_cast<const Byte*>(byte_span.data()); | |
| 155 |
1/2✓ Branch 2 taken 148917 times.
✗ Branch 3 not taken.
|
297830 | _write(bytes, byte_span.size()); |
| 156 | 297830 | } | |
| 157 | |||
| 158 | private: | ||
| 159 | 297700 | std::size_t _cache_size_in() const { return _opts.num_cached_buffers*buffer_size; } | |
| 160 | 164793 | std::size_t _cache_size_out() const { return _opts.num_cached_buffers*encoded_buffer_size; } | |
| 161 | |||
| 162 | 148917 | void _write(const Byte* data, std::size_t size) { | |
| 163 | 148917 | const auto num_full_buffers = size/buffer_size; | |
| 164 | 148917 | const auto num_full_caches = num_full_buffers/_opts.num_cached_buffers; | |
| 165 |
5/8✓ Branch 1 taken 148917 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 148917 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 148917 times.
✗ Branch 8 not taken.
✓ Branch 12 taken 7 times.
✓ Branch 13 taken 148917 times.
|
148924 | for (const auto i : std::views::iota(std::size_t{0}, num_full_caches)) |
| 166 |
1/2✓ Branch 2 taken 7 times.
✗ Branch 3 not taken.
|
7 | _flush_full_cache(data + i*_cache_size_in()); |
| 167 | |||
| 168 | 148917 | const auto processed_bytes = num_full_caches*_cache_size_in(); | |
| 169 |
2/2✓ Branch 0 taken 148776 times.
✓ Branch 1 taken 141 times.
|
148917 | if (size > processed_bytes) |
| 170 |
1/2✓ Branch 1 taken 148776 times.
✗ Branch 2 not taken.
|
148776 | _flush_cache(data + processed_bytes, size - processed_bytes); |
| 171 | 148917 | } | |
| 172 | |||
| 173 | 7 | void _flush_full_cache(const Byte* data) { | |
| 174 |
1/2✓ Branch 2 taken 7 times.
✗ Branch 3 not taken.
|
7 | std::vector<Byte> cache(_cache_size_out()); |
| 175 |
2/2✓ Branch 1 taken 16003 times.
✓ Branch 2 taken 7 times.
|
16010 | for (std::size_t i = 0; i < _cache_size_out()/encoded_buffer_size; ++i) { |
| 176 | 16003 | const std::size_t in_offset = i*buffer_size; | |
| 177 | 16003 | const std::size_t out_offset = i*encoded_buffer_size; | |
| 178 | 16003 | cache[out_offset + 0] = _encode_sextet_0(data + in_offset); | |
| 179 | 16003 | cache[out_offset + 1] = _encode_sextet_1(data + in_offset); | |
| 180 | 16003 | cache[out_offset + 2] = _encode_sextet_2(data + in_offset); | |
| 181 | 16003 | cache[out_offset + 3] = _encode_sextet_3(data + in_offset); | |
| 182 | } | ||
| 183 |
1/2✓ Branch 2 taken 7 times.
✗ Branch 3 not taken.
|
7 | this->_stream.write(std::span{cache}); |
| 184 | 7 | } | |
| 185 | |||
| 186 | 148776 | void _flush_cache(const Byte* data, std::size_t num_bytes_in) { | |
| 187 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 148776 times.
|
148776 | if (num_bytes_in == 0) |
| 188 | ✗ | return; | |
| 189 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 148776 times.
|
148776 | if (num_bytes_in > _cache_size_in()) |
| 190 | ✗ | throw SizeError("Number of bytes cannot be larger than cache size"); | |
| 191 | |||
| 192 | 148776 | const std::size_t num_full_buffers = num_bytes_in/buffer_size; | |
| 193 | 148776 | const std::size_t residual = num_bytes_in%buffer_size; | |
| 194 | |||
| 195 |
1/2✓ Branch 2 taken 148776 times.
✗ Branch 3 not taken.
|
148776 | std::vector<Byte> cache(_cache_size_out()); |
| 196 |
2/2✓ Branch 0 taken 5849667 times.
✓ Branch 1 taken 148776 times.
|
5998443 | for (std::size_t i = 0; i < num_full_buffers; ++i) { |
| 197 | 5849667 | const std::size_t in_offset = i*buffer_size; | |
| 198 | 5849667 | const std::size_t out_offset = i*encoded_buffer_size; | |
| 199 | 5849667 | cache[out_offset + 0] = _encode_sextet_0(data + in_offset); | |
| 200 | 5849667 | cache[out_offset + 1] = _encode_sextet_1(data + in_offset); | |
| 201 | 5849667 | cache[out_offset + 2] = _encode_sextet_2(data + in_offset); | |
| 202 | 5849667 | cache[out_offset + 3] = _encode_sextet_3(data + in_offset); | |
| 203 | } | ||
| 204 | |||
| 205 | 148776 | const std::size_t in_offset = num_full_buffers*buffer_size; | |
| 206 | 148776 | const std::size_t out_offset = num_full_buffers*encoded_buffer_size; | |
| 207 |
2/2✓ Branch 0 taken 116566 times.
✓ Branch 1 taken 32210 times.
|
148776 | if (residual > 0) { |
| 208 | 349698 | Byte last_buffer[buffer_size] = { | |
| 209 | 116566 | *(data + in_offset), | |
| 210 |
2/2✓ Branch 0 taken 88699 times.
✓ Branch 1 taken 27867 times.
|
116566 | residual > 1 ? *(data + in_offset + 1) : Byte{0}, |
| 211 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 116566 times.
|
116566 | residual > 2 ? *(data + in_offset + 2) : Byte{0} |
| 212 | }; | ||
| 213 | 116566 | cache[out_offset] = _encode_sextet_0(last_buffer); | |
| 214 | 116566 | cache[out_offset + 1] = _encode_sextet_1(last_buffer); | |
| 215 |
2/2✓ Branch 0 taken 88699 times.
✓ Branch 1 taken 27867 times.
|
116566 | cache[out_offset + 2] = residual > 1 ? _encode_sextet_2(last_buffer) : '='; |
| 216 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 116566 times.
|
116566 | cache[out_offset + 3] = residual > 2 ? _encode_sextet_3(last_buffer) : '='; |
| 217 |
1/2✓ Branch 3 taken 116566 times.
✗ Branch 4 not taken.
|
116566 | this->_stream.write(std::span{cache.data(), out_offset + 4}); |
| 218 | } else { | ||
| 219 |
1/2✓ Branch 3 taken 32210 times.
✗ Branch 4 not taken.
|
32210 | this->_stream.write(std::span{cache.data(), out_offset}); |
| 220 | } | ||
| 221 | 148776 | } | |
| 222 | |||
| 223 | Base64EncoderOptions _opts; | ||
| 224 | }; | ||
| 225 | |||
| 226 | //! \} group Encoding | ||
| 227 | |||
| 228 | } // namespace GridFormat | ||
| 229 | |||
| 230 | namespace GridFormat::Encoding { | ||
| 231 | |||
| 232 | //! \addtogroup Encoding | ||
| 233 | //! \{ | ||
| 234 | |||
| 235 | //! Base64 encoder | ||
| 236 | struct Base64 { | ||
| 237 | //! Return a base64 stream | ||
| 238 | template<typename Stream> | ||
| 239 | 89101 | constexpr auto operator()(Stream& s) const noexcept { | |
| 240 | 89101 | return Base64Stream{s, _opts}; | |
| 241 | } | ||
| 242 | |||
| 243 | //! Return an encoder instance with different options | ||
| 244 | 1 | constexpr auto operator()(Base64EncoderOptions opts) const { | |
| 245 | 1 | Base64 other; | |
| 246 | 1 | other._opts = std::move(opts); | |
| 247 | 1 | return other; | |
| 248 | } | ||
| 249 | |||
| 250 | //! Return a base64 encoder with the given options | ||
| 251 | 1 | static Base64 with(Base64EncoderOptions opts) { | |
| 252 | 1 | Base64 enc; | |
| 253 | 1 | enc._opts = std::move(opts); | |
| 254 | 1 | return enc; | |
| 255 | } | ||
| 256 | |||
| 257 | private: | ||
| 258 | Base64EncoderOptions _opts = {}; | ||
| 259 | }; | ||
| 260 | |||
| 261 | //! Instance of the base64 encoder | ||
| 262 | inline constexpr Base64 base64; | ||
| 263 | |||
| 264 | //! \} group Encoding | ||
| 265 | |||
| 266 | } // namespace GridFormat::Encoding | ||
| 267 | |||
| 268 | #endif // GRIDFORMAT_COMMON_ENCODING_BASE64_HPP_ | ||
| 269 |