| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | // SPDX-FileCopyrightText: 2022-2023 Dennis Gläser <dennis.glaeser@iws.uni-stuttgart.de> | ||
| 2 | // SPDX-License-Identifier: MIT | ||
| 3 | /*! | ||
| 4 | * \file | ||
| 5 | * \ingroup Common | ||
| 6 | * \brief Helper for parsing data from input streams. | ||
| 7 | */ | ||
| 8 | #ifndef GRIDFORMAT_COMMON_ISTREAM_HELPER_HPP_ | ||
| 9 | #define GRIDFORMAT_COMMON_ISTREAM_HELPER_HPP_ | ||
| 10 | |||
| 11 | #include <cmath> | ||
| 12 | #include <string> | ||
| 13 | #include <istream> | ||
| 14 | #include <optional> | ||
| 15 | #include <utility> | ||
| 16 | #include <concepts> | ||
| 17 | #include <iterator> | ||
| 18 | #include <limits> | ||
| 19 | |||
| 20 | #include <gridformat/common/exceptions.hpp> | ||
| 21 | |||
| 22 | namespace GridFormat { | ||
| 23 | |||
| 24 | /*! | ||
| 25 | * \ingroup Common | ||
| 26 | * \brief Helper for parsing data from input streams. | ||
| 27 | */ | ||
| 28 | class InputStreamHelper { | ||
| 29 | public: | ||
| 30 | static constexpr std::size_t default_chunk_size = 5000; | ||
| 31 | |||
| 32 |
9/18✓ Branch 1 taken 8295 times.
✗ Branch 2 not taken.
✓ Branch 5 taken 8295 times.
✗ Branch 6 not taken.
✓ Branch 8 taken 1166 times.
✗ Branch 9 not taken.
✓ Branch 12 taken 1166 times.
✗ Branch 13 not taken.
✓ Branch 15 taken 775 times.
✗ Branch 16 not taken.
✓ Branch 19 taken 775 times.
✗ Branch 20 not taken.
✓ Branch 22 taken 1937 times.
✗ Branch 23 not taken.
✓ Branch 26 taken 1937 times.
✗ Branch 27 not taken.
✓ Branch 29 taken 22897 times.
✗ Branch 30 not taken.
|
105210 | explicit InputStreamHelper(std::istream& s, std::string whitespace_chars = " \n\t") |
| 33 | 35070 | : _stream{s} | |
| 34 | 35070 | , _whitespace_chars{std::move(whitespace_chars)} | |
| 35 | 35070 | {} | |
| 36 | |||
| 37 | //! Read a chunk of characters from the stream into the given buffer | ||
| 38 | 1572516 | void read_chunk_to(std::string& buffer, const std::size_t chunk_size = default_chunk_size) { | |
| 39 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 1572516 times.
|
1572516 | if (is_end_of_file()) |
| 40 | ✗ | throw IOError("End of file already reached"); | |
| 41 | 1572516 | buffer.resize(chunk_size); | |
| 42 | 1572516 | _stream.read(buffer.data(), chunk_size); | |
| 43 | 1572516 | _stream.clear(); | |
| 44 | 1572516 | buffer.resize(_stream.gcount()); | |
| 45 | 1572516 | } | |
| 46 | |||
| 47 | //! Read a chunk of characters from the stream | ||
| 48 | 1027392 | std::string read_chunk(const std::size_t chunk_size = default_chunk_size) { | |
| 49 |
1/2✓ Branch 1 taken 1027392 times.
✗ Branch 2 not taken.
|
1027392 | std::string tmp(chunk_size, ' '); |
| 50 |
1/2✓ Branch 1 taken 1027392 times.
✗ Branch 2 not taken.
|
1027392 | read_chunk_to(tmp, chunk_size); |
| 51 | 1027392 | return tmp; | |
| 52 | ✗ | } | |
| 53 | |||
| 54 | //! Move the position forward until any of the given characters is found or EOF is reached | ||
| 55 | 543760 | bool shift_until_any_of(const std::string& chars, std::optional<std::size_t> max_chars = {}) { | |
| 56 | 543760 | std::string tmp_buffer; | |
| 57 | 543760 | std::size_t char_count = 0; | |
| 58 | 543760 | const auto max_num_chars = max_chars.value_or(std::numeric_limits<std::size_t>::max()); | |
| 59 | |||
| 60 |
2/2✓ Branch 0 taken 543535 times.
✓ Branch 1 taken 12373 times.
|
555908 | while (char_count < max_num_chars) { |
| 61 |
1/2✓ Branch 2 taken 543535 times.
✗ Branch 3 not taken.
|
543535 | read_chunk_to(tmp_buffer, std::min(default_chunk_size, max_num_chars - char_count)); |
| 62 | 543535 | const auto str_pos = tmp_buffer.find_first_of(chars); | |
| 63 |
2/2✓ Branch 0 taken 530961 times.
✓ Branch 1 taken 12574 times.
|
543535 | if (str_pos != std::string::npos) { |
| 64 | 530961 | const auto delta_pos = tmp_buffer.size() - str_pos; | |
| 65 |
1/2✓ Branch 1 taken 530961 times.
✗ Branch 2 not taken.
|
530961 | shift_by(-delta_pos); |
| 66 | 530961 | return true; | |
| 67 | } | ||
| 68 | |||
| 69 |
3/4✓ Branch 1 taken 12574 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 426 times.
✓ Branch 4 taken 12148 times.
|
12574 | if (is_end_of_file()) |
| 70 | 426 | return false; | |
| 71 | |||
| 72 | 12148 | char_count += tmp_buffer.size(); | |
| 73 | } | ||
| 74 | |||
| 75 | 12373 | return false; | |
| 76 | 543760 | } | |
| 77 | |||
| 78 | //! Read characters from the stream until any of the given characters is found or EOF is reached | ||
| 79 | 326880 | std::string read_until_any_of(const std::string& chars, std::optional<std::size_t> max_chars = {}) { | |
| 80 | 326880 | const auto p0 = position(); | |
| 81 | 326880 | shift_until_any_of(chars, max_chars); | |
| 82 | 326880 | const auto p1 = position(); | |
| 83 | 326880 | seek_position(p0); | |
| 84 | 326880 | return read_chunk(p1 - p0); | |
| 85 | } | ||
| 86 | |||
| 87 | //! Move the position forward until a character that is none of the given ones is found or EOF is reached | ||
| 88 | 205079 | bool shift_until_not_any_of(const std::string& chars) { | |
| 89 | 205079 | std::string tmp_buffer; | |
| 90 | while (true) { | ||
| 91 |
1/2✓ Branch 1 taken 205079 times.
✗ Branch 2 not taken.
|
205079 | tmp_buffer = read_chunk(); |
| 92 | 205079 | const auto pos = tmp_buffer.find_first_not_of(chars); | |
| 93 |
1/2✓ Branch 0 taken 205079 times.
✗ Branch 1 not taken.
|
205079 | if (pos != std::string::npos) { |
| 94 | 205079 | const auto delta_pos = tmp_buffer.size() - pos; | |
| 95 |
1/2✓ Branch 1 taken 205079 times.
✗ Branch 2 not taken.
|
205079 | shift_by(-delta_pos); |
| 96 | 205079 | return true; | |
| 97 | } | ||
| 98 | |||
| 99 | ✗ | if (is_end_of_file()) | |
| 100 | ✗ | return false; | |
| 101 | ✗ | } | |
| 102 | 205079 | } | |
| 103 | |||
| 104 | //! Read from the stream until a character not matching any of the given characters is found or EOF is reached | ||
| 105 | 10524 | std::string read_until_not_any_of(const std::string& chars) { | |
| 106 | 10524 | const auto p0 = position(); | |
| 107 | 10524 | shift_until_not_any_of(chars); | |
| 108 | 10524 | const auto p1 = position(); | |
| 109 | 10524 | seek_position(p0); | |
| 110 | 10524 | return read_chunk(p1 - p0); | |
| 111 | } | ||
| 112 | |||
| 113 | //! Move the position until the given string is found or EOF is reached | ||
| 114 | 1086 | bool shift_until_substr(const std::string& substr) { | |
| 115 | 1086 | const std::streamsize delta = -substr.size(); | |
| 116 | 1086 | const auto chunk_size = std::max(substr.size()*10, default_chunk_size); | |
| 117 | 1086 | std::string chunk; | |
| 118 | while (true) { | ||
| 119 |
1/2✓ Branch 1 taken 1589 times.
✗ Branch 2 not taken.
|
1589 | const auto cur_pos = position(); |
| 120 |
1/2✓ Branch 1 taken 1589 times.
✗ Branch 2 not taken.
|
1589 | read_chunk_to(chunk, chunk_size); |
| 121 | 1589 | const auto substr_pos = chunk.find(substr); | |
| 122 |
2/2✓ Branch 0 taken 1086 times.
✓ Branch 1 taken 503 times.
|
1589 | if (substr_pos != std::string::npos) { |
| 123 |
1/2✓ Branch 1 taken 1086 times.
✗ Branch 2 not taken.
|
1086 | seek_position(cur_pos + substr_pos); |
| 124 | 1086 | return true; | |
| 125 | } | ||
| 126 | |||
| 127 |
2/4✓ Branch 1 taken 503 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 503 times.
|
503 | if (is_end_of_file()) |
| 128 | ✗ | return false; | |
| 129 | |||
| 130 | // shift back a bit in case the substr lies at chunk boundaries | ||
| 131 |
1/2✓ Branch 1 taken 503 times.
✗ Branch 2 not taken.
|
503 | shift_by(delta); |
| 132 | 503 | } | |
| 133 | 1086 | } | |
| 134 | |||
| 135 | //! Skip characters considered whitespace | ||
| 136 | 15395 | void shift_whitespace() { | |
| 137 | 15395 | shift_until_not_any_of(_whitespace_chars); | |
| 138 | 15395 | } | |
| 139 | |||
| 140 | //! Skip characters until a whitespace is found | ||
| 141 | void shift_until_whitespace() { | ||
| 142 | shift_until_any_of(_whitespace_chars); | ||
| 143 | } | ||
| 144 | |||
| 145 | //! Jump forward in the stream by n characters | ||
| 146 | 1064519 | void shift_by(std::streamsize n) { | |
| 147 | 1064519 | seek_position(position() + n); | |
| 148 | 1064519 | } | |
| 149 | |||
| 150 | //! Return the current position in the stream | ||
| 151 | 2099833 | std::streamsize position() { | |
| 152 | 2099833 | _stream.clear(); | |
| 153 |
1/2✓ Branch 1 taken 2099833 times.
✗ Branch 2 not taken.
|
2099833 | return _stream.tellg(); |
| 154 | } | ||
| 155 | |||
| 156 | //! Jump the the requested position | ||
| 157 | 1863825 | void seek_position(std::streamsize pos) { | |
| 158 | 1863825 | _stream.clear(); | |
| 159 |
1/2✓ Branch 2 taken 1863825 times.
✗ Branch 3 not taken.
|
1863825 | _stream.seekg(pos); |
| 160 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 1863825 times.
|
1863825 | if (_stream.fail()) |
| 161 | ✗ | throw SizeError("Given position is beyond EOF"); | |
| 162 | 1863825 | } | |
| 163 | |||
| 164 | //! Return true if no more characters can be read from the stream | ||
| 165 | 1656262 | bool is_end_of_file() { | |
| 166 | 1656262 | _stream.peek(); | |
| 167 | 1656262 | const bool end = _stream.eof(); | |
| 168 | 1656262 | _stream.clear(); | |
| 169 | 1656262 | return end; | |
| 170 | } | ||
| 171 | |||
| 172 | 1166 | operator std::istream&() { | |
| 173 | 1166 | return _stream; | |
| 174 | } | ||
| 175 | |||
| 176 | private: | ||
| 177 | std::istream& _stream; | ||
| 178 | std::string _whitespace_chars; | ||
| 179 | }; | ||
| 180 | |||
| 181 | } // end namespace GridFormat | ||
| 182 | |||
| 183 | #endif // GRIDFORMAT_COMMON_ISTREAM_HELPER_HPP_ | ||
| 184 |