Skip to content

Commit ea993ff

Browse files
Matthew Zipkinpinheadmz
authored andcommitted
string: add LineReader
This is a helper struct to parse HTTP messages from data in buffers from sockets. HTTP messages begin with headers which are CRLF-terminated lines (\n or \r\n) followed by an arbitrary amount of body data. Whitespace is trimmed from the field lines but not the body. https://httpwg.org/specs/rfc9110.html#rfc.section.5
1 parent 9423d8a commit ea993ff

File tree

5 files changed

+160
-0
lines changed

5 files changed

+160
-0
lines changed

src/test/util_string_tests.cpp

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,4 +180,72 @@ BOOST_AUTO_TEST_CASE(ascii_case_insensitive_hash_test)
180180
BOOST_CHECK_EQUAL(hsh("Ab"), hsh("aB"));
181181
}
182182

183+
BOOST_AUTO_TEST_CASE(line_reader_test)
184+
{
185+
{
186+
// Check three lines terminated by \n, \r\n, and end of buffer, trimming whitespace
187+
const std::vector<std::byte> input{StringToBuffer("once upon a time\n there was a dog \r\nwho liked food")};
188+
LineReader reader(input, /*max_read=*/128);
189+
std::optional<std::string> line1{reader.ReadLine()};
190+
BOOST_CHECK_EQUAL(reader.Left(), 33);
191+
std::optional<std::string> line2{reader.ReadLine()};
192+
BOOST_CHECK_EQUAL(reader.Left(), 14);
193+
std::optional<std::string> line3{reader.ReadLine()};
194+
std::optional<std::string> line4{reader.ReadLine()};
195+
BOOST_CHECK(line1);
196+
BOOST_CHECK(line2);
197+
BOOST_CHECK(line3);
198+
BOOST_CHECK(!line4);
199+
BOOST_CHECK_EQUAL(line1.value(), "once upon a time");
200+
BOOST_CHECK_EQUAL(line2.value(), "there was a dog");
201+
BOOST_CHECK_EQUAL(line3.value(), "who liked food");
202+
}
203+
{
204+
// Do not exceed max_read while searching for \n
205+
// Test with 22-character line + \n + 23-character line + \n
206+
const std::vector<std::byte> input{StringToBuffer("once upon a time there\nwas a dog who liked tea\n")};
207+
208+
LineReader reader1(input, /*max_read=*/22);
209+
// First line is exactly the length of max_read
210+
BOOST_CHECK_EQUAL(reader1.ReadLine(), "once upon a time there");
211+
// Second line is +1 character too long
212+
BOOST_CHECK_THROW(reader1.ReadLine(), std::runtime_error);
213+
214+
// Increase max_read by 1
215+
LineReader reader2(input, /*max_read=*/23);
216+
// Both lines fit within limit
217+
BOOST_CHECK_EQUAL(reader2.ReadLine(), "once upon a time there");
218+
BOOST_CHECK_EQUAL(reader2.ReadLine(), "was a dog who liked tea");
219+
// End of buffer reached
220+
BOOST_CHECK(!reader2.ReadLine());
221+
}
222+
{
223+
// The end of the buffer (EOB) acts exactly like \n
224+
const std::vector<std::byte> input{StringToBuffer("once upon a time there")};
225+
226+
LineReader reader1(input, /*max_read=*/22);
227+
// First line is exactly the length of max_read
228+
BOOST_CHECK_EQUAL(reader1.ReadLine(), "once upon a time there");
229+
// End of buffer reached
230+
BOOST_CHECK(!reader1.ReadLine());
231+
232+
// Increase max_read by 1
233+
LineReader reader2(input, /*max_read=*/23);
234+
// The line fits within limit
235+
BOOST_CHECK_EQUAL(reader2.ReadLine(), "once upon a time there");
236+
// End of buffer reached
237+
BOOST_CHECK(!reader2.ReadLine());
238+
}
239+
{
240+
// Read specific number of bytes regardless of max_read or \n unless buffer is too short
241+
const std::vector<std::byte> input{StringToBuffer("once upon a time\n there was a dog \r\nwho liked food")};
242+
LineReader reader(input, /*max_read=*/1);
243+
BOOST_CHECK_EQUAL(reader.ReadLength(0), "");
244+
BOOST_CHECK_EQUAL(reader.ReadLength(3), "onc");
245+
BOOST_CHECK_EQUAL(reader.ReadLength(8), "e upon a");
246+
BOOST_CHECK_EQUAL(reader.ReadLength(8), " time\n t");
247+
BOOST_CHECK_THROW(reader.ReadLength(128), std::runtime_error);
248+
}
249+
}
250+
183251
BOOST_AUTO_TEST_SUITE_END()

src/util/strencodings.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -427,3 +427,9 @@ std::optional<uint64_t> ParseByteUnits(std::string_view str, ByteUnit default_mu
427427
}
428428
return *parsed_num * unit_amount;
429429
}
430+
431+
std::vector<std::byte> StringToBuffer(const std::string& str)
432+
{
433+
auto span = std::as_bytes(std::span(str));
434+
return {span.begin(), span.end()};
435+
}

src/util/strencodings.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,15 @@ std::string Capitalize(std::string str);
326326
*/
327327
std::optional<uint64_t> ParseByteUnits(std::string_view str, ByteUnit default_multiplier);
328328

329+
/**
330+
* Returns a byte vector filled with data from a string. Used to test string-
331+
* encoded data from a socket like HTTP headers.
332+
*
333+
* @param[in] str the string to convert into bytes
334+
* @returns byte vector
335+
*/
336+
std::vector<std::byte> StringToBuffer(const std::string& str);
337+
329338
namespace util {
330339
/** consteval version of HexDigit() without the lookup table. */
331340
consteval uint8_t ConstevalHexDigit(const char c)

src/util/string.cpp

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,49 @@ void ReplaceAll(std::string& in_out, const std::string& search, const std::strin
1313
if (search.empty()) return;
1414
in_out = std::regex_replace(in_out, std::regex(search), substitute);
1515
}
16+
17+
LineReader::LineReader(std::span<const std::byte> buffer, size_t max_read)
18+
: start(buffer.begin()), end(buffer.end()), max_read(max_read), it(buffer.begin()) {}
19+
20+
std::optional<std::string> LineReader::ReadLine()
21+
{
22+
if (it == end) {
23+
return std::nullopt;
24+
}
25+
26+
auto line_start = it;
27+
size_t count = 0;
28+
while (it != end) {
29+
// Read a character from the incoming buffer and increment the iterator
30+
auto c = static_cast<char>(*it);
31+
++it;
32+
++count;
33+
// If the character we just consumed was \n, the line is terminated
34+
if (c == '\n') break;
35+
// If we are at the end of the incoming buffer, the line is terminated
36+
if (it == end) break;
37+
// If the character we just consumed gives us a line length greater
38+
// than max_read, and we are not at the end of the line (or buffer) yet,
39+
// that means the line we are currently reading is too long, and we throw.
40+
if (count > max_read) throw std::runtime_error("max_read exceeded by LineReader");
41+
}
42+
const std::string_view untrimmed_line(reinterpret_cast<const char *>(std::to_address(line_start)), count);
43+
const std::string_view line = TrimStringView(untrimmed_line); // delete trailing \r and/or \n
44+
return std::string(line);
45+
}
46+
47+
// Ignores max_read but won't overflow
48+
std::string LineReader::ReadLength(size_t len)
49+
{
50+
if (len == 0) return "";
51+
if (Left() < len) throw std::runtime_error("Not enough data in buffer");
52+
std::string out(reinterpret_cast<const char*>(std::to_address(it)), len);
53+
it += len;
54+
return out;
55+
}
56+
57+
size_t LineReader::Left() const
58+
{
59+
return std::distance(it, end);
60+
}
1661
} // namespace util

src/util/string.h

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include <cstdint>
1212
#include <cstring>
1313
#include <locale>
14+
#include <optional>
1415
#include <sstream>
1516
#include <string> // IWYU pragma: export
1617
#include <string_view> // IWYU pragma: export
@@ -260,6 +261,37 @@ template <typename T1, size_t PREFIX_LEN>
260261
return obj.size() >= PREFIX_LEN &&
261262
std::equal(std::begin(prefix), std::end(prefix), std::begin(obj));
262263
}
264+
265+
struct LineReader {
266+
const std::span<const std::byte>::iterator start;
267+
const std::span<const std::byte>::iterator end;
268+
const size_t max_read;
269+
std::span<const std::byte>::iterator it;
270+
271+
explicit LineReader(std::span<const std::byte> buffer, size_t max_read);
272+
273+
/**
274+
* Returns a string from current iterator position up to next \n (or end of the buffer)
275+
* and advances iterator to the character following the \n, if present.
276+
* Returned string does not include the trailing \n or \r.
277+
* Will throw if the line is longer than max_read (up to but not including \n or end of buffer).
278+
*/
279+
std::optional<std::string> ReadLine();
280+
281+
/**
282+
* Returns string from current iterator position of specified length
283+
* and advances iterator. May exceed max_read but will not read past end of buffer.
284+
* @param[in] len The number of bytes to read from the buffer
285+
* @returns a string of the expected length.
286+
* @throws a std::runtime_error if there is not enough data in the buffer.
287+
*/
288+
std::string ReadLength(size_t len);
289+
290+
/**
291+
* Returns remaining size of bytes in buffer
292+
*/
293+
size_t Left() const;
294+
};
263295
} // namespace util
264296

265297
#endif // BITCOIN_UTIL_STRING_H

0 commit comments

Comments
 (0)