-
Notifications
You must be signed in to change notification settings - Fork 8.3k
Allow empty needle in function replace #69918
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
04bb11f
be7a21c
dc74f0d
d0f6649
5700d6a
962180e
85d89ca
350b9c0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,11 +8,6 @@ | |
| namespace DB | ||
| { | ||
|
|
||
| namespace ErrorCodes | ||
| { | ||
| extern const int ARGUMENT_OUT_OF_BOUND; | ||
| } | ||
|
|
||
| struct ReplaceStringTraits | ||
| { | ||
| enum class Replace : uint8_t | ||
|
|
@@ -39,7 +34,11 @@ struct ReplaceStringImpl | |
| size_t input_rows_count) | ||
| { | ||
| if (needle.empty()) | ||
| throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name); | ||
| { | ||
| res_data.assign(haystack_data.begin(), haystack_data.end()); | ||
| res_offsets.assign(haystack_offsets.begin(), haystack_offsets.end()); | ||
| return; | ||
| } | ||
|
|
||
| const UInt8 * const begin = haystack_data.data(); | ||
| const UInt8 * const end = haystack_data.data() + haystack_data.size(); | ||
|
|
@@ -145,34 +144,34 @@ struct ReplaceStringImpl | |
| const auto * const cur_needle_data = &needle_data[prev_needle_offset]; | ||
| const size_t cur_needle_length = needle_offsets[i] - prev_needle_offset - 1; | ||
|
|
||
| if (cur_needle_length == 0) | ||
| throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name); | ||
|
|
||
| /// Using "slow" "stdlib searcher instead of Volnitsky because there is a different pattern in each row | ||
| StdLibASCIIStringSearcher</*CaseInsensitive*/ false> searcher(cur_needle_data, cur_needle_length); | ||
|
|
||
| const auto * last_match = static_cast<UInt8 *>(nullptr); | ||
| const auto * start_pos = cur_haystack_data; | ||
| const auto * const cur_haystack_end = cur_haystack_data + cur_haystack_length; | ||
|
|
||
| while (start_pos < cur_haystack_end) | ||
| if (cur_needle_length) | ||
| { | ||
| if (const auto * const match = searcher.search(start_pos, cur_haystack_end); match != cur_haystack_end) | ||
| /// Using "slow" "stdlib searcher instead of Volnitsky because there is a different pattern in each row | ||
| StdLibASCIIStringSearcher</*CaseInsensitive*/ false> searcher(cur_needle_data, cur_needle_length); | ||
|
|
||
| while (start_pos < cur_haystack_end) | ||
| { | ||
| /// Copy prefix before match | ||
| copyToOutput(start_pos, match - start_pos, res_data, res_offset); | ||
| if (const auto * const match = searcher.search(start_pos, cur_haystack_end); match != cur_haystack_end) | ||
| { | ||
| /// Copy prefix before match | ||
| copyToOutput(start_pos, match - start_pos, res_data, res_offset); | ||
|
|
||
| /// Insert replacement for match | ||
| copyToOutput(replacement.data(), replacement.size(), res_data, res_offset); | ||
| /// Insert replacement for match | ||
| copyToOutput(replacement.data(), replacement.size(), res_data, res_offset); | ||
|
|
||
| last_match = match; | ||
| start_pos = match + cur_needle_length; | ||
| last_match = match; | ||
| start_pos = match + cur_needle_length; | ||
|
|
||
| if constexpr (replace == ReplaceStringTraits::Replace::First) | ||
| if constexpr (replace == ReplaceStringTraits::Replace::First) | ||
| break; | ||
| } | ||
| else | ||
| break; | ||
| } | ||
| else | ||
| break; | ||
| } | ||
|
|
||
| /// Copy suffix after last match | ||
|
|
@@ -200,7 +199,11 @@ struct ReplaceStringImpl | |
| chassert(haystack_offsets.size() == replacement_offsets.size()); | ||
|
|
||
| if (needle.empty()) | ||
| throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name); | ||
| { | ||
| res_data.assign(haystack_data.begin(), haystack_data.end()); | ||
| res_offsets.assign(haystack_offsets.begin(), haystack_offsets.end()); | ||
| return; | ||
| } | ||
|
|
||
| res_data.reserve(haystack_data.size()); | ||
| res_offsets.resize(input_rows_count); | ||
|
|
@@ -291,36 +294,35 @@ struct ReplaceStringImpl | |
| const auto * const cur_replacement_data = &replacement_data[prev_replacement_offset]; | ||
| const size_t cur_replacement_length = replacement_offsets[i] - prev_replacement_offset - 1; | ||
|
|
||
| if (cur_needle_length == 0) | ||
| throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name); | ||
|
|
||
| /// Using "slow" "stdlib searcher instead of Volnitsky because there is a different pattern in each row | ||
| StdLibASCIIStringSearcher</*CaseInsensitive*/ false> searcher(cur_needle_data, cur_needle_length); | ||
|
|
||
| const auto * last_match = static_cast<UInt8 *>(nullptr); | ||
| const auto * start_pos = cur_haystack_data; | ||
| const auto * const cur_haystack_end = cur_haystack_data + cur_haystack_length; | ||
|
|
||
| while (start_pos < cur_haystack_end) | ||
| if (cur_needle_length) | ||
| { | ||
| if (const auto * const match = searcher.search(start_pos, cur_haystack_end); match != cur_haystack_end) | ||
| /// Using "slow" "stdlib searcher instead of Volnitsky because there is a different pattern in each row | ||
| StdLibASCIIStringSearcher</*CaseInsensitive*/ false> searcher(cur_needle_data, cur_needle_length); | ||
|
|
||
| while (start_pos < cur_haystack_end) | ||
| { | ||
| /// Copy prefix before match | ||
| copyToOutput(start_pos, match - start_pos, res_data, res_offset); | ||
| if (const auto * const match = searcher.search(start_pos, cur_haystack_end); match != cur_haystack_end) | ||
| { | ||
| /// Copy prefix before match | ||
| copyToOutput(start_pos, match - start_pos, res_data, res_offset); | ||
|
|
||
| /// Insert replacement for match | ||
| copyToOutput(cur_replacement_data, cur_replacement_length, res_data, res_offset); | ||
| /// Insert replacement for match | ||
| copyToOutput(cur_replacement_data, cur_replacement_length, res_data, res_offset); | ||
|
|
||
| last_match = match; | ||
| start_pos = match + cur_needle_length; | ||
| last_match = match; | ||
| start_pos = match + cur_needle_length; | ||
|
|
||
| if constexpr (replace == ReplaceStringTraits::Replace::First) | ||
| if constexpr (replace == ReplaceStringTraits::Replace::First) | ||
| break; | ||
| } | ||
| else | ||
| break; | ||
| } | ||
| else | ||
| break; | ||
| } | ||
|
|
||
| /// Copy suffix after last match | ||
| size_t bytes = (last_match == nullptr) ? (cur_haystack_end - cur_haystack_data + 1) | ||
| : (cur_haystack_end - last_match - cur_needle_length + 1); | ||
|
|
@@ -346,7 +348,21 @@ struct ReplaceStringImpl | |
| size_t input_rows_count) | ||
| { | ||
| if (needle.empty()) | ||
| throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name); | ||
| { | ||
| chassert(input_rows_count == haystack_data.size() / n); | ||
| /// Since ColumnFixedString does not have a zero byte at the end, while ColumnString does, | ||
| /// we need to split haystack_data into strings of length n, add 1 zero byte to the end of each string | ||
| /// and then copy to res_data, ref: ColumnString.h and ColumnFixedString.h | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be nice to have a test with
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you know how to construct a ColumnConst with string type without modifying code
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Executing of the following query will lead to this part of code. Though it's strange that a more simple query just throws exception |
||
| res_data.reserve(haystack_data.size() + input_rows_count); | ||
| res_offsets.resize(input_rows_count); | ||
| for (size_t i = 0; i < input_rows_count; ++i) | ||
| { | ||
| res_data.insert(res_data.end(), haystack_data.begin() + i * n, haystack_data.begin() + (i + 1) * n); | ||
| res_data.push_back(0); | ||
| res_offsets[i] = (i + 1) * n + 1; | ||
| } | ||
| return; | ||
| } | ||
|
|
||
| const UInt8 * const begin = haystack_data.data(); | ||
| const UInt8 * const end = haystack_data.data() + haystack_data.size(); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -190,3 +190,6 @@ __.__ o_.__ o_.__ 1 | |
| __. o_. o_. 1 | ||
| __.__ o_.__ o_.__ 1 | ||
| __.__ o_.__ o_.__ 1 | ||
| ABCabc | ||
| ABCabc | ||
| ABCabc | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -99,3 +99,6 @@ SELECT s, replaceOne(s, '_', 'o') AS a, replaceRegexpOne(s, '_', 'o') AS b, a = | |
| SELECT s, replaceOne(s, '_', 'o') AS a, replaceRegexpOne(s, '_', 'o') AS b, a = b FROM (SELECT arrayJoin(['__.__', '.__']) AS s); | ||
| SELECT s, replaceOne(s, '_', 'o') AS a, replaceRegexpOne(s, '_', 'o') AS b, a = b FROM (SELECT arrayJoin(['__.__', '__.']) AS s); | ||
| SELECT s, replaceOne(s, '_', 'o') AS a, replaceRegexpOne(s, '_', 'o') AS b, a = b FROM (SELECT arrayJoin(['__.__', '__.__']) AS s); | ||
| SELECT replace('ABCabc', '', 'DEF'); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is way too dirty. We
@zhanglistar Please move l. 102 into a new test 00240_replace_with_empty_needle and address these points. |
||
| SELECT replace(materialize('ABCabc'), materialize(''), 'DEF'); | ||
| SELECT replace(materialize('ABCabc'), '', 'DEF'); | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -70,7 +70,7 @@ SELECT id, haystack, needle, replacement, replaceRegexpOne('Hello World', needle | |
| DROP TABLE IF EXISTS test_tab; | ||
|
|
||
|
|
||
| SELECT 'Check that an exception is thrown if the needle is empty'; | ||
| SELECT 'Check that whether an exception is thrown if the needle is empty'; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bad grammar, please replace by "Check what happens if the needle is empty". |
||
|
|
||
| CREATE TABLE test_tab | ||
| (id UInt32, haystack String, needle String, replacement String) | ||
|
|
@@ -80,20 +80,20 @@ CREATE TABLE test_tab | |
| INSERT INTO test_tab VALUES (1, 'Hello World', 'l', 'x') (2, 'Hello World', '', 'y'); | ||
|
|
||
| -- needle: non-const, replacement: const | ||
| SELECT replaceAll(haystack, needle, 'x') FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND } | ||
| SELECT replaceOne(haystack, needle, 'x') FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND } | ||
| SELECT replaceAll(haystack, needle, 'x') FROM test_tab; | ||
| SELECT replaceOne(haystack, needle, 'x') FROM test_tab; | ||
| SELECT replaceRegexpAll(haystack, needle, 'x') FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND } | ||
| SELECT replaceRegexpOne(haystack, needle, 'x') FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND } | ||
|
|
||
| -- needle: const, replacement: non-const | ||
| SELECT replaceAll(haystack, '', replacement) FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND } | ||
| SELECT replaceOne(haystack, '', replacement) FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND } | ||
| SELECT replaceAll(haystack, '', replacement) FROM test_tab; | ||
| SELECT replaceOne(haystack, '', replacement) FROM test_tab; | ||
| SELECT replaceRegexpAll(haystack, '', replacement) FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND } | ||
| SELECT replaceRegexpOne(haystack, '', replacement) FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND } | ||
|
|
||
| -- needle: non-const, replacement: non-const | ||
| SELECT replaceAll(haystack, needle, replacement) FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND } | ||
| SELECT replaceOne(haystack, needle, replacement) FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND } | ||
| SELECT replaceAll(haystack, needle, replacement) FROM test_tab; | ||
| SELECT replaceOne(haystack, needle, replacement) FROM test_tab; | ||
| SELECT replaceRegexpAll(haystack, needle, replacement) FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND } | ||
| SELECT replaceRegexpOne(haystack, needle, replacement) FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND } | ||
|
|
||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.