Skip to content

Commit 0a5ee12

Browse files
authored
Merge pull request #62103 from ClickHouse/pufit/fuzz-query
FuzzQuery table function
2 parents 3a7c983 + 9a02374 commit 0a5ee12

File tree

15 files changed

+475
-31
lines changed

15 files changed

+475
-31
lines changed
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
---
2+
slug: /en/sql-reference/table-functions/fuzzQuery
3+
sidebar_position: 75
4+
sidebar_label: fuzzQuery
5+
---
6+
7+
# fuzzQuery
8+
9+
Perturbs the given query string with random variations.
10+
11+
``` sql
12+
fuzzQuery(query[, max_query_length[, random_seed]])
13+
```
14+
15+
**Arguments**
16+
17+
- `query` (String) - The source query to perform the fuzzing on.
18+
- `max_query_length` (UInt64) - A maximum length the query can get during the fuzzing process.
19+
- `random_seed` (UInt64) - A random seed for producing stable results.
20+
21+
**Returned Value**
22+
23+
A table object with a single column containing perturbed query strings.
24+
25+
## Usage Example
26+
27+
``` sql
28+
SELECT * FROM fuzzQuery('SELECT materialize(\'a\' AS key) GROUP BY key') LIMIT 2;
29+
```
30+
31+
```
32+
┌─query──────────────────────────────────────────────────────────┐
33+
1. │ SELECT 'a' AS key GROUP BY key │
34+
2. │ EXPLAIN PIPELINE compact = true SELECT 'a' AS key GROUP BY key │
35+
└────────────────────────────────────────────────────────────────┘
36+
```

programs/client/Client.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,10 @@ namespace DB
99
class Client : public ClientBase
1010
{
1111
public:
12-
Client() = default;
12+
Client()
13+
{
14+
fuzzer = QueryFuzzer(randomSeed(), &std::cout, &std::cerr);
15+
}
1316

1417
void initialize(Poco::Util::Application & self) override;
1518

src/Client/ClientBase.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,13 @@
66
#include <Common/ProgressIndication.h>
77
#include <Common/InterruptListener.h>
88
#include <Common/ShellCommand.h>
9+
#include <Common/QueryFuzzer.h>
910
#include <Common/Stopwatch.h>
1011
#include <Common/DNSResolver.h>
1112
#include <Core/ExternalTable.h>
1213
#include <Poco/Util/Application.h>
1314
#include <Interpreters/Context.h>
1415
#include <Client/Suggest.h>
15-
#include <Client/QueryFuzzer.h>
1616
#include <boost/program_options.hpp>
1717
#include <Storages/StorageFile.h>
1818
#include <Storages/SelectQueryInfo.h>
Lines changed: 32 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -68,22 +68,21 @@ Field QueryFuzzer::getRandomField(int type)
6868
{
6969
case 0:
7070
{
71-
return bad_int64_values[fuzz_rand() % (sizeof(bad_int64_values)
72-
/ sizeof(*bad_int64_values))];
71+
return bad_int64_values[fuzz_rand() % std::size(bad_int64_values)];
7372
}
7473
case 1:
7574
{
7675
static constexpr double values[]
7776
= {NAN, INFINITY, -INFINITY, 0., -0., 0.0001, 0.5, 0.9999,
7877
1., 1.0001, 2., 10.0001, 100.0001, 1000.0001, 1e10, 1e20,
79-
FLT_MIN, FLT_MIN + FLT_EPSILON, FLT_MAX, FLT_MAX + FLT_EPSILON}; return values[fuzz_rand() % (sizeof(values) / sizeof(*values))];
78+
FLT_MIN, FLT_MIN + FLT_EPSILON, FLT_MAX, FLT_MAX + FLT_EPSILON}; return values[fuzz_rand() % std::size(values)];
8079
}
8180
case 2:
8281
{
8382
static constexpr UInt64 scales[] = {0, 1, 2, 10};
8483
return DecimalField<Decimal64>(
85-
bad_int64_values[fuzz_rand() % (sizeof(bad_int64_values) / sizeof(*bad_int64_values))],
86-
static_cast<UInt32>(scales[fuzz_rand() % (sizeof(scales) / sizeof(*scales))])
84+
bad_int64_values[fuzz_rand() % std::size(bad_int64_values)],
85+
static_cast<UInt32>(scales[fuzz_rand() % std::size(scales)])
8786
);
8887
}
8988
default:
@@ -165,7 +164,8 @@ Field QueryFuzzer::fuzzField(Field field)
165164
{
166165
size_t pos = fuzz_rand() % arr.size();
167166
arr.erase(arr.begin() + pos);
168-
std::cerr << "erased\n";
167+
if (debug_stream)
168+
*debug_stream << "erased\n";
169169
}
170170

171171
if (fuzz_rand() % 5 == 0)
@@ -174,12 +174,14 @@ Field QueryFuzzer::fuzzField(Field field)
174174
{
175175
size_t pos = fuzz_rand() % arr.size();
176176
arr.insert(arr.begin() + pos, fuzzField(arr[pos]));
177-
std::cerr << fmt::format("inserted (pos {})\n", pos);
177+
if (debug_stream)
178+
*debug_stream << fmt::format("inserted (pos {})\n", pos);
178179
}
179180
else
180181
{
181182
arr.insert(arr.begin(), getRandomField(0));
182-
std::cerr << "inserted (0)\n";
183+
if (debug_stream)
184+
*debug_stream << "inserted (0)\n";
183185
}
184186

185187
}
@@ -197,7 +199,9 @@ Field QueryFuzzer::fuzzField(Field field)
197199
{
198200
size_t pos = fuzz_rand() % arr.size();
199201
arr.erase(arr.begin() + pos);
200-
std::cerr << "erased\n";
202+
203+
if (debug_stream)
204+
*debug_stream << "erased\n";
201205
}
202206

203207
if (fuzz_rand() % 5 == 0)
@@ -206,12 +210,16 @@ Field QueryFuzzer::fuzzField(Field field)
206210
{
207211
size_t pos = fuzz_rand() % arr.size();
208212
arr.insert(arr.begin() + pos, fuzzField(arr[pos]));
209-
std::cerr << fmt::format("inserted (pos {})\n", pos);
213+
214+
if (debug_stream)
215+
*debug_stream << fmt::format("inserted (pos {})\n", pos);
210216
}
211217
else
212218
{
213219
arr.insert(arr.begin(), getRandomField(0));
214-
std::cerr << "inserted (0)\n";
220+
221+
if (debug_stream)
222+
*debug_stream << "inserted (0)\n";
215223
}
216224

217225
}
@@ -344,7 +352,8 @@ void QueryFuzzer::fuzzOrderByList(IAST * ast)
344352
}
345353
else
346354
{
347-
std::cerr << "No random column.\n";
355+
if (debug_stream)
356+
*debug_stream << "No random column.\n";
348357
}
349358
}
350359

@@ -378,7 +387,8 @@ void QueryFuzzer::fuzzColumnLikeExpressionList(IAST * ast)
378387
if (col)
379388
impl->children.insert(pos, col);
380389
else
381-
std::cerr << "No random column.\n";
390+
if (debug_stream)
391+
*debug_stream << "No random column.\n";
382392
}
383393

384394
// We don't have to recurse here to fuzz the children, this is handled by
@@ -1361,11 +1371,15 @@ void QueryFuzzer::fuzzMain(ASTPtr & ast)
13611371
collectFuzzInfoMain(ast);
13621372
fuzz(ast);
13631373

1364-
std::cout << std::endl;
1365-
WriteBufferFromOStream ast_buf(std::cout, 4096);
1366-
formatAST(*ast, ast_buf, false /*highlight*/);
1367-
ast_buf.finalize();
1368-
std::cout << std::endl << std::endl;
1374+
if (out_stream)
1375+
{
1376+
*out_stream << std::endl;
1377+
1378+
WriteBufferFromOStream ast_buf(*out_stream, 4096);
1379+
formatAST(*ast, ast_buf, false /*highlight*/);
1380+
ast_buf.finalize();
1381+
*out_stream << std::endl << std::endl;
1382+
}
13691383
}
13701384

13711385
}
Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,31 @@ struct ASTWindowDefinition;
3535
* queries, so you want to feed it a lot of queries to get some interesting mix
3636
* of them. Normally we feed SQL regression tests to it.
3737
*/
38-
struct QueryFuzzer
38+
class QueryFuzzer
3939
{
40-
pcg64 fuzz_rand{randomSeed()};
40+
public:
41+
explicit QueryFuzzer(pcg64 fuzz_rand_ = randomSeed(), std::ostream * out_stream_ = nullptr, std::ostream * debug_stream_ = nullptr)
42+
: fuzz_rand(fuzz_rand_)
43+
, out_stream(out_stream_)
44+
, debug_stream(debug_stream_)
45+
{
46+
}
47+
48+
// This is the only function you have to call -- it will modify the passed
49+
// ASTPtr to point to new AST with some random changes.
50+
void fuzzMain(ASTPtr & ast);
51+
52+
ASTs getInsertQueriesForFuzzedTables(const String & full_query);
53+
ASTs getDropQueriesForFuzzedTables(const ASTDropQuery & drop_query);
54+
void notifyQueryFailed(ASTPtr ast);
55+
56+
static bool isSuitableForFuzzing(const ASTCreateQuery & create);
57+
58+
private:
59+
pcg64 fuzz_rand;
60+
61+
std::ostream * out_stream = nullptr;
62+
std::ostream * debug_stream = nullptr;
4163

4264
// We add elements to expression lists with fixed probability. Some elements
4365
// are so large, that the expected number of elements we add to them is
@@ -66,20 +88,13 @@ struct QueryFuzzer
6688
std::unordered_map<std::string, size_t> index_of_fuzzed_table;
6789
std::set<IAST::Hash> created_tables_hashes;
6890

69-
// This is the only function you have to call -- it will modify the passed
70-
// ASTPtr to point to new AST with some random changes.
71-
void fuzzMain(ASTPtr & ast);
72-
7391
// Various helper functions follow, normally you shouldn't have to call them.
7492
Field getRandomField(int type);
7593
Field fuzzField(Field field);
7694
ASTPtr getRandomColumnLike();
7795
ASTPtr getRandomExpressionList();
7896
DataTypePtr fuzzDataType(DataTypePtr type);
7997
DataTypePtr getRandomType();
80-
ASTs getInsertQueriesForFuzzedTables(const String & full_query);
81-
ASTs getDropQueriesForFuzzedTables(const ASTDropQuery & drop_query);
82-
void notifyQueryFailed(ASTPtr ast);
8398
void replaceWithColumnLike(ASTPtr & ast);
8499
void replaceWithTableLike(ASTPtr & ast);
85100
void fuzzOrderByElement(ASTOrderByElement * elem);
@@ -102,8 +117,6 @@ struct QueryFuzzer
102117
void addTableLike(ASTPtr ast);
103118
void addColumnLike(ASTPtr ast);
104119
void collectFuzzInfoRecurse(ASTPtr ast);
105-
106-
static bool isSuitableForFuzzing(const ASTCreateQuery & create);
107120
};
108121

109122
}

0 commit comments

Comments
 (0)