Skip to content

Commit 9b98aba

Browse files
authored
Merge pull request #66050 from ClickHouse/regex-cache-profile-events
Add profile events for regex cache
2 parents c2b2533 + d688d41 commit 9b98aba

File tree

2 files changed

+29
-4
lines changed

2 files changed

+29
-4
lines changed

src/Common/ProfileEvents.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,12 @@
238238
\
239239
M(CannotRemoveEphemeralNode, "Number of times an error happened while trying to remove ephemeral node. This is not an issue, because our implementation of ZooKeeper library guarantee that the session will expire and the node will be removed.") \
240240
\
241-
M(RegexpCreated, "Compiled regular expressions. Identical regular expressions compiled just once and cached forever.") \
241+
M(RegexpWithMultipleNeedlesCreated, "Regular expressions with multiple needles (VectorScan library) compiled.") \
242+
M(RegexpWithMultipleNeedlesGlobalCacheHit, "Number of times we fetched compiled regular expression with multiple needles (VectorScan library) from the global cache.") \
243+
M(RegexpWithMultipleNeedlesGlobalCacheMiss, "Number of times we failed to fetch compiled regular expression with multiple needles (VectorScan library) from the global cache.") \
244+
M(RegexpLocalCacheHit, "Number of times we fetched compiled regular expression from a local cache.") \
245+
M(RegexpLocalCacheMiss, "Number of times we failed to fetch compiled regular expression from a local cache.") \
246+
\
242247
M(ContextLock, "Number of times the lock of Context was acquired or tried to acquire. This is global lock.") \
243248
M(ContextLockWaitMicroseconds, "Context lock wait time in microseconds") \
244249
\

src/Functions/Regexps.h

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,11 @@
2323

2424
namespace ProfileEvents
2525
{
26-
extern const Event RegexpCreated;
26+
extern const Event RegexpWithMultipleNeedlesCreated;
27+
extern const Event RegexpWithMultipleNeedlesGlobalCacheHit;
28+
extern const Event RegexpWithMultipleNeedlesGlobalCacheMiss;
29+
extern const Event RegexpLocalCacheHit;
30+
extern const Event RegexpLocalCacheMiss;
2731
}
2832

2933

@@ -72,18 +76,28 @@ class LocalCacheTable
7276
Bucket & bucket = known_regexps[hasher(pattern) % CACHE_SIZE];
7377

7478
if (bucket.regexp == nullptr) [[unlikely]]
79+
{
7580
/// insert new entry
81+
ProfileEvents::increment(ProfileEvents::RegexpLocalCacheMiss);
7682
bucket = {pattern, std::make_shared<OptimizedRegularExpression>(createRegexp<like, no_capture, case_insensitive>(pattern))};
83+
}
7784
else
85+
{
7886
if (pattern != bucket.pattern)
87+
{
7988
/// replace existing entry
89+
ProfileEvents::increment(ProfileEvents::RegexpLocalCacheMiss);
8090
bucket = {pattern, std::make_shared<OptimizedRegularExpression>(createRegexp<like, no_capture, case_insensitive>(pattern))};
91+
}
92+
else
93+
ProfileEvents::increment(ProfileEvents::RegexpLocalCacheHit);
94+
}
8195

8296
return bucket.regexp;
8397
}
8498

8599
private:
86-
constexpr static size_t CACHE_SIZE = 100; /// collision probability
100+
constexpr static size_t CACHE_SIZE = 1'000; /// collision probability
87101

88102
std::hash<String> hasher;
89103
struct Bucket
@@ -244,7 +258,7 @@ inline Regexps constructRegexps(const std::vector<String> & str_patterns, [[mayb
244258
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Pattern '{}' failed with error '{}'", str_patterns[error->expression], String(error->message));
245259
}
246260

247-
ProfileEvents::increment(ProfileEvents::RegexpCreated);
261+
ProfileEvents::increment(ProfileEvents::RegexpWithMultipleNeedlesCreated);
248262

249263
/// We allocate the scratch space only once, then copy it across multiple threads with hs_clone_scratch
250264
/// function which is faster than allocating scratch space each time in each thread.
@@ -322,9 +336,11 @@ inline DeferredConstructedRegexpsPtr getOrSet(const std::vector<std::string_view
322336
{
323337
return constructRegexps<save_indices, with_edit_distance>(str_patterns, edit_distance);
324338
});
339+
ProfileEvents::increment(ProfileEvents::RegexpWithMultipleNeedlesGlobalCacheMiss);
325340
bucket = {std::move(str_patterns), edit_distance, deferred_constructed_regexps};
326341
}
327342
else
343+
{
328344
if (bucket.patterns != str_patterns || bucket.edit_distance != edit_distance)
329345
{
330346
/// replace existing entry
@@ -333,8 +349,12 @@ inline DeferredConstructedRegexpsPtr getOrSet(const std::vector<std::string_view
333349
{
334350
return constructRegexps<save_indices, with_edit_distance>(str_patterns, edit_distance);
335351
});
352+
ProfileEvents::increment(ProfileEvents::RegexpWithMultipleNeedlesGlobalCacheMiss);
336353
bucket = {std::move(str_patterns), edit_distance, deferred_constructed_regexps};
337354
}
355+
else
356+
ProfileEvents::increment(ProfileEvents::RegexpWithMultipleNeedlesGlobalCacheHit);
357+
}
338358

339359
return bucket.regexps;
340360
}

0 commit comments

Comments
 (0)