Line | Branch | Exec | Source |
---|---|---|---|
1 | // SPDX-FileCopyrightText: 2022-2023 Dennis Gläser <dennis.glaeser@iws.uni-stuttgart.de> | ||
2 | // SPDX-License-Identifier: MIT | ||
3 | /*! | ||
4 | * \file | ||
5 | * \ingroup Common | ||
6 | * \brief Helper for parsing data from input streams. | ||
7 | */ | ||
8 | #ifndef GRIDFORMAT_COMMON_ISTREAM_HELPER_HPP_ | ||
9 | #define GRIDFORMAT_COMMON_ISTREAM_HELPER_HPP_ | ||
10 | |||
11 | #include <cmath> | ||
12 | #include <string> | ||
13 | #include <istream> | ||
14 | #include <optional> | ||
15 | #include <utility> | ||
16 | #include <concepts> | ||
17 | #include <iterator> | ||
18 | #include <limits> | ||
19 | |||
20 | #include <gridformat/common/exceptions.hpp> | ||
21 | |||
22 | namespace GridFormat { | ||
23 | |||
24 | /*! | ||
25 | * \ingroup Common | ||
26 | * \brief Helper for parsing data from input streams. | ||
27 | */ | ||
28 | class InputStreamHelper { | ||
29 | public: | ||
30 | static constexpr std::size_t default_chunk_size = 5000; | ||
31 | |||
32 |
9/18✓ Branch 1 taken 8293 times.
✗ Branch 2 not taken.
✓ Branch 5 taken 8293 times.
✗ Branch 6 not taken.
✓ Branch 8 taken 1166 times.
✗ Branch 9 not taken.
✓ Branch 12 taken 1166 times.
✗ Branch 13 not taken.
✓ Branch 15 taken 741 times.
✗ Branch 16 not taken.
✓ Branch 19 taken 741 times.
✗ Branch 20 not taken.
✓ Branch 22 taken 1932 times.
✗ Branch 23 not taken.
✓ Branch 26 taken 1932 times.
✗ Branch 27 not taken.
✓ Branch 29 taken 22891 times.
✗ Branch 30 not taken.
|
105069 | explicit InputStreamHelper(std::istream& s, std::string whitespace_chars = " \n\t") |
33 | 35023 | : _stream{s} | |
34 | 35023 | , _whitespace_chars{std::move(whitespace_chars)} | |
35 | 35023 | {} | |
36 | |||
37 | //! Read a chunk of characters from the stream into the given buffer | ||
38 | 1567017 | void read_chunk_to(std::string& buffer, const std::size_t chunk_size = default_chunk_size) { | |
39 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 1567017 times.
|
1567017 | if (is_end_of_file()) |
40 | ✗ | throw IOError("End of file already reached"); | |
41 | 1567017 | buffer.resize(chunk_size); | |
42 | 1567017 | _stream.read(buffer.data(), chunk_size); | |
43 | 1567017 | _stream.clear(); | |
44 | 1567017 | buffer.resize(_stream.gcount()); | |
45 | 1567017 | } | |
46 | |||
47 | //! Read a chunk of characters from the stream | ||
48 | 1023766 | std::string read_chunk(const std::size_t chunk_size = default_chunk_size) { | |
49 |
1/2✓ Branch 1 taken 1023766 times.
✗ Branch 2 not taken.
|
1023766 | std::string tmp(chunk_size, ' '); |
50 |
1/2✓ Branch 1 taken 1023766 times.
✗ Branch 2 not taken.
|
1023766 | read_chunk_to(tmp, chunk_size); |
51 | 1023766 | return tmp; | |
52 | ✗ | } | |
53 | |||
54 | //! Move the position forward until any of the given characters is found or EOF is reached | ||
55 | 541888 | bool shift_until_any_of(const std::string& chars, std::optional<std::size_t> max_chars = {}) { | |
56 | 541888 | std::string tmp_buffer; | |
57 | 541888 | std::size_t char_count = 0; | |
58 | 541888 | const auto max_num_chars = max_chars.value_or(std::numeric_limits<std::size_t>::max()); | |
59 | |||
60 |
2/2✓ Branch 0 taken 541663 times.
✓ Branch 1 taken 12544 times.
|
554207 | while (char_count < max_num_chars) { |
61 |
1/2✓ Branch 2 taken 541663 times.
✗ Branch 3 not taken.
|
541663 | read_chunk_to(tmp_buffer, std::min(default_chunk_size, max_num_chars - char_count)); |
62 | 541663 | const auto str_pos = tmp_buffer.find_first_of(chars); | |
63 |
2/2✓ Branch 0 taken 528918 times.
✓ Branch 1 taken 12745 times.
|
541663 | if (str_pos != std::string::npos) { |
64 | 528918 | const auto delta_pos = tmp_buffer.size() - str_pos; | |
65 |
1/2✓ Branch 1 taken 528918 times.
✗ Branch 2 not taken.
|
528918 | shift_by(-delta_pos); |
66 | 528918 | return true; | |
67 | } | ||
68 | |||
69 |
3/4✓ Branch 1 taken 12745 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 426 times.
✓ Branch 4 taken 12319 times.
|
12745 | if (is_end_of_file()) |
70 | 426 | return false; | |
71 | |||
72 | 12319 | char_count += tmp_buffer.size(); | |
73 | } | ||
74 | |||
75 | 12544 | return false; | |
76 | 541888 | } | |
77 | |||
78 | //! Read characters from the stream until any of the given characters is found or EOF is reached | ||
79 | 325917 | std::string read_until_any_of(const std::string& chars, std::optional<std::size_t> max_chars = {}) { | |
80 | 325917 | const auto p0 = position(); | |
81 | 325917 | shift_until_any_of(chars, max_chars); | |
82 | 325917 | const auto p1 = position(); | |
83 | 325917 | seek_position(p0); | |
84 | 325917 | return read_chunk(p1 - p0); | |
85 | } | ||
86 | |||
87 | //! Move the position forward until a character that is none of the given ones is found or EOF is reached | ||
88 | 204230 | bool shift_until_not_any_of(const std::string& chars) { | |
89 | 204230 | std::string tmp_buffer; | |
90 | while (true) { | ||
91 |
1/2✓ Branch 1 taken 204230 times.
✗ Branch 2 not taken.
|
204230 | tmp_buffer = read_chunk(); |
92 | 204230 | const auto pos = tmp_buffer.find_first_not_of(chars); | |
93 |
1/2✓ Branch 0 taken 204230 times.
✗ Branch 1 not taken.
|
204230 | if (pos != std::string::npos) { |
94 | 204230 | const auto delta_pos = tmp_buffer.size() - pos; | |
95 |
1/2✓ Branch 1 taken 204230 times.
✗ Branch 2 not taken.
|
204230 | shift_by(-delta_pos); |
96 | 204230 | return true; | |
97 | } | ||
98 | |||
99 | ✗ | if (is_end_of_file()) | |
100 | ✗ | return false; | |
101 | ✗ | } | |
102 | 204230 | } | |
103 | |||
104 | //! Read from the stream until a character not matching any of the given characters is found or EOF is reached | ||
105 | 10347 | std::string read_until_not_any_of(const std::string& chars) { | |
106 | 10347 | const auto p0 = position(); | |
107 | 10347 | shift_until_not_any_of(chars); | |
108 | 10347 | const auto p1 = position(); | |
109 | 10347 | seek_position(p0); | |
110 | 10347 | return read_chunk(p1 - p0); | |
111 | } | ||
112 | |||
113 | //! Move the position until the given string is found or EOF is reached | ||
114 | 1085 | bool shift_until_substr(const std::string& substr) { | |
115 | 1085 | const std::streamsize delta = -substr.size(); | |
116 | 1085 | const auto chunk_size = std::max(substr.size()*10, default_chunk_size); | |
117 | 1085 | std::string chunk; | |
118 | while (true) { | ||
119 |
1/2✓ Branch 1 taken 1588 times.
✗ Branch 2 not taken.
|
1588 | const auto cur_pos = position(); |
120 |
1/2✓ Branch 1 taken 1588 times.
✗ Branch 2 not taken.
|
1588 | read_chunk_to(chunk, chunk_size); |
121 | 1588 | const auto substr_pos = chunk.find(substr); | |
122 |
2/2✓ Branch 0 taken 1085 times.
✓ Branch 1 taken 503 times.
|
1588 | if (substr_pos != std::string::npos) { |
123 |
1/2✓ Branch 1 taken 1085 times.
✗ Branch 2 not taken.
|
1085 | seek_position(cur_pos + substr_pos); |
124 | 1085 | return true; | |
125 | } | ||
126 | |||
127 |
2/4✓ Branch 1 taken 503 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 503 times.
|
503 | if (is_end_of_file()) |
128 | ✗ | return false; | |
129 | |||
130 | // shift back a bit in case the substr lies at chunk boundaries | ||
131 |
1/2✓ Branch 1 taken 503 times.
✗ Branch 2 not taken.
|
503 | shift_by(delta); |
132 | 503 | } | |
133 | 1085 | } | |
134 | |||
135 | //! Skip characters considered whitespace | ||
136 | 15227 | void shift_whitespace() { | |
137 | 15227 | shift_until_not_any_of(_whitespace_chars); | |
138 | 15227 | } | |
139 | |||
140 | //! Skip characters until a whitespace is found | ||
141 | void shift_until_whitespace() { | ||
142 | shift_until_any_of(_whitespace_chars); | ||
143 | } | ||
144 | |||
145 | //! Jump forward in the stream by n characters | ||
146 | 1060533 | void shift_by(std::streamsize n) { | |
147 | 1060533 | seek_position(position() + n); | |
148 | 1060533 | } | |
149 | |||
150 | //! Return the current position in the stream | ||
151 | 2092334 | std::streamsize position() { | |
152 | 2092334 | _stream.clear(); | |
153 |
1/2✓ Branch 1 taken 2092334 times.
✗ Branch 2 not taken.
|
2092334 | return _stream.tellg(); |
154 | } | ||
155 | |||
156 | //! Jump the the requested position | ||
157 | 1856945 | void seek_position(std::streamsize pos) { | |
158 | 1856945 | _stream.clear(); | |
159 |
1/2✓ Branch 2 taken 1856945 times.
✗ Branch 3 not taken.
|
1856945 | _stream.seekg(pos); |
160 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 1856945 times.
|
1856945 | if (_stream.fail()) |
161 | ✗ | throw SizeError("Given position is beyond EOF"); | |
162 | 1856945 | } | |
163 | |||
164 | //! Return true if no more characters can be read from the stream | ||
165 | 1650609 | bool is_end_of_file() { | |
166 | 1650609 | _stream.peek(); | |
167 | 1650609 | const bool end = _stream.eof(); | |
168 | 1650609 | _stream.clear(); | |
169 | 1650609 | return end; | |
170 | } | ||
171 | |||
172 | 1166 | operator std::istream&() { | |
173 | 1166 | return _stream; | |
174 | } | ||
175 | |||
176 | private: | ||
177 | std::istream& _stream; | ||
178 | std::string _whitespace_chars; | ||
179 | }; | ||
180 | |||
181 | } // end namespace GridFormat | ||
182 | |||
183 | #endif // GRIDFORMAT_COMMON_ISTREAM_HELPER_HPP_ | ||
184 |