Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
66 commits
Select commit Hold shift + click to select a range
f8ff179
mark filter cache
zhongyuankai Aug 23, 2024
7210854
Complete basic functions
zhongyuankai Aug 25, 2024
7753160
support analyzer
zhongyuankai Aug 30, 2024
6d78d37
batter
zhongyuankai Sep 1, 2024
92d10a3
rename
zhongyuankai Sep 1, 2024
dd29d53
Refactor
zhongyuankai Sep 4, 2024
ce76c0a
fix
zhongyuankai Sep 4, 2024
7701ebe
Merge branch 'master' into query_condition_cache
zhongyuankai Sep 4, 2024
409219b
resolve conflicts
zhongyuankai Sep 7, 2024
0e065a5
fix
zhongyuankai Sep 7, 2024
c9591eb
fix style
zhongyuankai Sep 7, 2024
6a077b7
fix test
zhongyuankai Sep 8, 2024
fc8ab23
fix test
zhongyuankai Sep 8, 2024
da59d70
Merge branch 'master' into query_condition_cache
zhongyuankai Nov 21, 2024
7858f72
fix test
zhongyuankai Nov 22, 2024
708dec0
batter
zhongyuankai Nov 25, 2024
ad1b4c1
fix style
zhongyuankai Nov 25, 2024
ca4d835
fix test
zhongyuankai Nov 26, 2024
6f68b4c
fix prewhere dag hash
zhongyuankai Nov 27, 2024
b42fa34
fix test
zhongyuankai Nov 27, 2024
014e00d
batter
zhongyuankai Nov 30, 2024
a11f45c
Merge branch 'master' into query_condition_cache
zhongyuankai Nov 30, 2024
1879115
Merge remote-tracking branch 'ClickHouse/master' into query_condition…
rschu1ze Dec 3, 2024
c098b69
Some fixups
rschu1ze Dec 3, 2024
377b992
Mini fixup
rschu1ze Dec 3, 2024
ade09eb
Merge remote-tracking branch 'ClickHouse/master' into query_condition…
rschu1ze Dec 3, 2024
a339c08
Fixups
rschu1ze Dec 3, 2024
4078567
Fix build
rschu1ze Dec 3, 2024
4432e4b
Fixups
rschu1ze Dec 3, 2024
d354d94
Update test
rschu1ze Dec 3, 2024
c1d5079
Fixups
rschu1ze Dec 3, 2024
0acd87b
fix style
zhongyuankai Dec 4, 2024
d86801b
Merge branch 'master' into query_condition_cache
zhongyuankai Dec 30, 2024
73bfce7
fix test
zhongyuankai Dec 30, 2024
03803fc
Merge branch 'master' into query_condition_cache
zhongyuankai Jan 3, 2025
477a4a5
add log
zhongyuankai Jan 4, 2025
6d899cb
fix test
zhongyuankai Jan 4, 2025
b322ffb
add debug log
zhongyuankai Jan 5, 2025
9ce438c
fix test
zhongyuankai Jan 5, 2025
b9d3a1d
not support old analyzer
zhongyuankai Jan 6, 2025
55b5799
fix test
zhongyuankai Jan 7, 2025
5e42c1e
Merge branch 'master' into query_condition_cache
zhongyuankai Feb 8, 2025
316b841
fix build
zhongyuankai Feb 8, 2025
85f7760
Merge branch 'master' into query_condition_cache
zhongyuankai Feb 21, 2025
5f04d28
Disable some tests using query condition cache because it will cause …
zhongyuankai Feb 21, 2025
5d39ae4
Merge branch 'master' into query_condition_cache
zhongyuankai Feb 21, 2025
c124551
fix build
zhongyuankai Feb 21, 2025
fd33743
fix fast test
zhongyuankai Feb 21, 2025
bb8e09c
add debug log
zhongyuankai Feb 25, 2025
774d3a0
Merge branch 'master' into query_condition_cache
zhongyuankai Feb 25, 2025
5785125
Merge branch 'master' into query_condition_cache
alexey-milovidov Feb 25, 2025
3687aee
Merge branch 'master' into query_condition_cache
zhongyuankai Feb 26, 2025
d24e380
Merge branch 'master' into query_condition_cache
alexey-milovidov Feb 27, 2025
62f2e69
entry weight
zhongyuankai Feb 27, 2025
f2bcff6
Merge branch 'master' into query_condition_cache
zhongyuankai Feb 27, 2025
c8e3f30
Update QueryConditionCache.cpp
alexey-milovidov Feb 27, 2025
196bbcf
add log
zhongyuankai Feb 28, 2025
7a458b5
Merge branch 'master' into query_condition_cache
zhongyuankai Feb 28, 2025
66778a7
log
zhongyuankai Feb 28, 2025
c41f9c2
Merge branch 'master' into query_condition_cache
zhongyuankai Feb 28, 2025
9dc1b8d
Merge branch 'master' into query_condition_cache
zhongyuankai Feb 28, 2025
4dfb32c
fix test
zhongyuankai Mar 1, 2025
94244b7
Avoid intersecting ranges
zhongyuankai Mar 1, 2025
0dd838d
Merge branch 'master' into query_condition_cache
zhongyuankai Mar 1, 2025
6e2622b
fix style
zhongyuankai Mar 1, 2025
d9a0fe8
Merge branch 'master' into query_condition_cache
zhongyuankai Mar 3, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions programs/local/LocalServer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -831,6 +831,9 @@ void LocalServer::processConfig()
/// Initialize a dummy query cache.
global_context->setQueryCache(0, 0, 0, 0);

/// Initialize a dummy query condition cache.
global_context->setQueryConditionCache(DEFAULT_QUERY_CONDITION_CACHE_POLICY, 0, 0);

/// Initialize allowed tiers
global_context->getAccessControl().setAllowTierSettings(server_settings[ServerSetting::allow_feature_tier]);

Expand Down
14 changes: 14 additions & 0 deletions programs/server/Server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,9 @@ namespace ServerSetting
extern const ServerSettingsUInt64 page_cache_chunk_size;
extern const ServerSettingsUInt64 page_cache_mmap_size;
extern const ServerSettingsUInt64 page_cache_size;
extern const ServerSettingsString query_condition_cache_policy;
extern const ServerSettingsUInt64 query_condition_cache_size;
extern const ServerSettingsDouble query_condition_cache_size_ratio;
extern const ServerSettingsBool page_cache_use_madv_free;
extern const ServerSettingsBool page_cache_use_transparent_huge_pages;
extern const ServerSettingsBool prepare_system_log_tables_on_startup;
Expand Down Expand Up @@ -1715,6 +1718,16 @@ try
}
global_context->setQueryCache(query_cache_max_size_in_bytes, query_cache_max_entries, query_cache_query_cache_max_entry_size_in_bytes, query_cache_max_entry_size_in_rows);

String query_condition_cache_policy = server_settings[ServerSetting::query_condition_cache_policy];
size_t query_condition_cache_size = server_settings[ServerSetting::query_condition_cache_size];
double query_condition_cache_size_ratio = server_settings[ServerSetting::query_condition_cache_size_ratio];
if (query_condition_cache_size > max_cache_size)
{
query_condition_cache_size = max_cache_size;
LOG_INFO(log, "Lowered query condition cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(query_condition_cache_size));
}
global_context->setQueryConditionCache(query_condition_cache_policy, query_condition_cache_size, query_condition_cache_size_ratio);

#if USE_EMBEDDED_COMPILER
size_t compiled_expression_cache_max_size_in_bytes = server_settings[ServerSetting::compiled_expression_cache_size];
size_t compiled_expression_cache_max_elements = server_settings[ServerSetting::compiled_expression_cache_elements_size];
Expand Down Expand Up @@ -2010,6 +2023,7 @@ try
global_context->updateSkippingIndexCacheConfiguration(*config);
global_context->updateMMappedFileCacheConfiguration(*config);
global_context->updateQueryCacheConfiguration(*config);
global_context->updateQueryConditionCacheConfiguration(*config);

CompressionCodecEncrypted::Configuration::instance().tryLoad(*config, "encryption_codecs");
#if USE_SSL
Expand Down
1 change: 1 addition & 0 deletions src/Access/Common/AccessType.h
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ enum class AccessType : uint8_t
M(SYSTEM_DROP_SKIPPING_INDEX_CACHE, "SYSTEM DROP SKIPPING INDEX CACHE, DROP SKIPPING INDEX CACHE", GLOBAL, SYSTEM_DROP_CACHE) \
M(SYSTEM_DROP_MMAP_CACHE, "SYSTEM DROP MMAP, DROP MMAP CACHE, DROP MMAP", GLOBAL, SYSTEM_DROP_CACHE) \
M(SYSTEM_DROP_QUERY_CACHE, "SYSTEM DROP QUERY, DROP QUERY CACHE, DROP QUERY", GLOBAL, SYSTEM_DROP_CACHE) \
M(SYSTEM_DROP_QUERY_CONDITION_CACHE, "SYSTEM DROP QUERY CONDITION, DROP QUERY CONDITION CACHE, DROP QUERY CONDITION", GLOBAL, SYSTEM_DROP_CACHE) \
M(SYSTEM_DROP_COMPILED_EXPRESSION_CACHE, "SYSTEM DROP COMPILED EXPRESSION, DROP COMPILED EXPRESSION CACHE, DROP COMPILED EXPRESSIONS", GLOBAL, SYSTEM_DROP_CACHE) \
M(SYSTEM_DROP_FILESYSTEM_CACHE, "SYSTEM DROP FILESYSTEM CACHE, DROP FILESYSTEM CACHE", GLOBAL, SYSTEM_DROP_CACHE) \
M(SYSTEM_DROP_DISTRIBUTED_CACHE, "SYSTEM DROP DISTRIBUTED CACHE, DROP DISTRIBUTED CACHE", GLOBAL, SYSTEM_DROP_CACHE) \
Expand Down
2 changes: 2 additions & 0 deletions src/Common/ProfileEvents.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@
M(SkippingIndexCacheWeightLost, "Approximate number of bytes evicted from the secondary index cache.", ValueType::Number) \
M(QueryCacheHits, "Number of times a query result has been found in the query cache (and query computation was avoided). Only updated for SELECT queries with SETTING use_query_cache = 1.", ValueType::Number) \
M(QueryCacheMisses, "Number of times a query result has not been found in the query cache (and required query computation). Only updated for SELECT queries with SETTING use_query_cache = 1.", ValueType::Number) \
M(QueryConditionCacheHits, "Number of times an entry has been found in the query condition cache (and reading of marks can be skipped). Only updated for SELECT queries with SETTING use_query_condition_cache = 1.", ValueType::Number) \
M(QueryConditionCacheMisses, "Number of times an entry has not been found in the query condition cache (and reading of mark cannot be skipped). Only updated for SELECT queries with SETTING use_query_condition_cache = 1.", ValueType::Number) \
/* Each page cache chunk access increments exactly one of the following 5 PageCacheChunk* counters. */ \
/* Something like hit rate: (PageCacheChunkShared + PageCacheChunkDataHits) / [sum of all 5]. */ \
M(PageCacheChunkMisses, "Number of times a chunk has not been found in the userspace page cache.", ValueType::Number) \
Expand Down
3 changes: 3 additions & 0 deletions src/Core/Defines.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,9 @@ static constexpr auto DEFAULT_QUERY_CACHE_MAX_SIZE = 1_GiB;
static constexpr auto DEFAULT_QUERY_CACHE_MAX_ENTRIES = 1024uz;
static constexpr auto DEFAULT_QUERY_CACHE_MAX_ENTRY_SIZE_IN_BYTES = 1_MiB;
static constexpr auto DEFAULT_QUERY_CACHE_MAX_ENTRY_SIZE_IN_ROWS = 30'000'000uz;
static constexpr auto DEFAULT_QUERY_CONDITION_CACHE_POLICY = "SLRU";
static constexpr auto DEFAULT_QUERY_CONDITION_CACHE_MAX_SIZE = 100_MiB;
static constexpr auto DEFAULT_QUERY_CONDITION_CACHE_SIZE_RATIO = 0.5l;

/// Query profiler cannot work with sanitizers.
/// Sanitizers are using quick "frame walking" stack unwinding (this implies -fno-omit-frame-pointer)
Expand Down
4 changes: 4 additions & 0 deletions src/Core/ServerSettings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -511,6 +511,10 @@ namespace DB
DECLARE(UInt64, compiled_expression_cache_size, DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_SIZE, R"(Sets the cache size (in bytes) for [compiled expressions](../../operations/caches.md).)", 0) \
\
DECLARE(UInt64, compiled_expression_cache_elements_size, DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_ENTRIES, R"(Sets the cache size (in elements) for [compiled expressions](../../operations/caches.md).)", 0) \
DECLARE(String, query_condition_cache_policy, DEFAULT_QUERY_CONDITION_CACHE_POLICY, "Query condition cache policy name.", 0) \
DECLARE(UInt64, query_condition_cache_size, DEFAULT_QUERY_CONDITION_CACHE_MAX_SIZE, "Size of the query condition cache.", 0) \
DECLARE(Double, query_condition_cache_size_ratio, DEFAULT_QUERY_CONDITION_CACHE_SIZE_RATIO, "The size of the protected queue in the query condition cache relative to the cache's total size.", 0) \
\
DECLARE(Bool, disable_internal_dns_cache, false, "Disable internal DNS caching at all.", 0) \
DECLARE(UInt64, dns_cache_max_entries, 10000, R"(Internal DNS cache max entries.)", 0) \
DECLARE(Int32, dns_cache_update_period, 15, "Internal DNS cache update period in seconds.", 0) \
Expand Down
9 changes: 8 additions & 1 deletion src/Core/Settings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4377,7 +4377,14 @@ Possible values:
DECLARE(Bool, enable_sharing_sets_for_mutations, true, R"(
Allow sharing set objects build for IN subqueries between different tasks of the same mutation. This reduces memory usage and CPU consumption
)", 0) \
\
DECLARE(Bool, use_query_condition_cache, false, R"(
Enable the query condition cache.

Possible values:

- 0 - Disabled
- 1 - Enabled
)", 0) \
DECLARE(Bool, optimize_rewrite_sum_if_to_count_if, true, R"(
Rewrite sumIf() and sum(if()) function countIf() function when logically equivalent
)", 0) \
Expand Down
1 change: 1 addition & 0 deletions src/Core/SettingsChangesHistory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ const VersionToSettingsChangesMap & getSettingsChangesHistory()
{"output_format_parquet_bloom_filter_flush_threshold_bytes", 128 * 1024 * 1024, 128 * 1024 * 1024, "New setting."},
{"output_format_pretty_max_rows", 10000, 1000, "It is better for usability - less amount to scroll."},
{"restore_replicated_merge_tree_to_shared_merge_tree", false, false, "New setting."},
{"use_query_condition_cache", false, false, "New setting."},
{"parallel_replicas_only_with_analyzer", false, true, "Parallel replicas is supported only with analyzer enabled"},
{"s3_allow_multipart_copy", true, true, "New setting."},
});
Expand Down
33 changes: 33 additions & 0 deletions src/Interpreters/ActionsDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include <stack>
#include <base/sort.h>
#include <Common/JSONBuilder.h>
#include <Common/SipHash.h>
#include <DataTypes/DataTypeSet.h>

#include <absl/container/flat_hash_map.h>
Expand Down Expand Up @@ -135,6 +136,38 @@ void ActionsDAG::Node::toTree(JSONBuilder::JSONMap & map) const
map.add("Compiled", is_function_compiled);
}

size_t ActionsDAG::Node::getHash() const
{
SipHash hash_state;
updateHash(hash_state);
return hash_state.get64();
}

void ActionsDAG::Node::updateHash(SipHash & hash_state) const
{
hash_state.update(type);

if (!result_name.empty())
hash_state.update(result_name);

if (result_type)
hash_state.update(result_type->getName());

if (function_base)
hash_state.update(function_base->getName());

if (function)
hash_state.update(function->getName());

hash_state.update(is_function_compiled);
hash_state.update(is_deterministic_constant);

if (column)
hash_state.update(column->getName());

for (const auto & child : children)
child->updateHash(hash_state);
}

ActionsDAG::ActionsDAG(const NamesAndTypesList & inputs_)
{
Expand Down
4 changes: 4 additions & 0 deletions src/Interpreters/ActionsDAG.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <Core/ColumnsWithTypeAndName.h>
#include <Core/NamesAndTypes.h>
#include <Core/Names.h>
#include <Common/SipHash.h>
#include <Interpreters/Context_fwd.h>

#include "config.h"
Expand Down Expand Up @@ -93,6 +94,9 @@ class ActionsDAG
/// If result of this not is deterministic. Checks only this node, not a subtree.
bool isDeterministic() const;
void toTree(JSONBuilder::JSONMap & map) const;
size_t getHash() const;
private:
void updateHash(SipHash & hash_state) const;
};

/// NOTE: std::list is an implementation detail.
Expand Down
101 changes: 101 additions & 0 deletions src/Interpreters/Cache/QueryConditionCache.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
#include <Interpreters/Cache/QueryConditionCache.h>
#include <Storages/MergeTree/MergeTreeData.h>
#include "Interpreters/Cache/FileSegmentInfo.h"

namespace ProfileEvents
{
extern const Event QueryConditionCacheHits;
extern const Event QueryConditionCacheMisses;
};

namespace DB
{

QueryConditionCache::QueryConditionCache(const String & cache_policy, size_t max_size_in_bytes, double size_ratio)
: cache(cache_policy, max_size_in_bytes, 0, size_ratio)
{
}

std::optional<QueryConditionCache::MatchingMarks> QueryConditionCache::read(const UUID & table_id, const String & part_name, size_t condition_hash)
{
Key key = {table_id, part_name, condition_hash};

if (auto entry = cache.get(key))
{
ProfileEvents::increment(ProfileEvents::QueryConditionCacheHits);

std::lock_guard lock(entry->mutex);
return {entry->matching_marks};
}

ProfileEvents::increment(ProfileEvents::QueryConditionCacheMisses);

return std::nullopt;
}

void QueryConditionCache::write(const UUID & table_id, const String & part_name, size_t condition_hash, const MarkRanges & mark_ranges, size_t marks_count, bool has_final_mark)
{
Key key = {table_id, part_name, condition_hash};

auto load_func = [&](){ return std::make_shared<Entry>(marks_count); };
auto [entry, _] = cache.getOrSet(key, load_func);

chassert(marks_count == entry->matching_marks.size());

/// Set MarkRanges to false, so there is no need to read these marks again later.
{
std::lock_guard lock(entry->mutex);
for (const auto & mark_range : mark_ranges)
std::fill(entry->matching_marks.begin() + mark_range.begin, entry->matching_marks.begin() + mark_range.end, false);

if (has_final_mark)
entry->matching_marks[marks_count - 1] = false;

LOG_DEBUG(
logger,
"table_id: {}, part_name: {}, condition_hash: {}, marks_count: {}, has_final_mark: {}, (ranges: {})",
table_id,
part_name,
condition_hash,
marks_count,
has_final_mark,
toString(mark_ranges));
}
}

void QueryConditionCache::clear()
{
cache.clear();
}

void QueryConditionCache::setMaxSizeInBytes(size_t max_size_in_bytes)
{
cache.setMaxSizeInBytes(max_size_in_bytes);
}

bool QueryConditionCache::Key::operator==(const Key & other) const
{
return table_id == other.table_id && part_name == other.part_name && condition_hash == other.condition_hash;
}

QueryConditionCache::Entry::Entry(size_t mark_count)
: matching_marks(mark_count, true) /// by default, all marks potentially are potential matches
{
}

size_t QueryConditionCache::KeyHasher::operator()(const Key & key) const
{
SipHash hash;
hash.update(key.table_id);
hash.update(key.part_name);
hash.update(key.condition_hash);
return hash.get64();
}

size_t QueryConditionCache::QueryConditionCacheEntryWeight::operator()(const Entry & entry) const
{
/// Estimate the memory size of `std::vector<bool>`, for bool values, only 1 bit per element.
size_t dynamic_memory = (entry.matching_marks.capacity() + 7) / 8; /// Round up to bytes.
return sizeof(decltype(entry.matching_marks)) + dynamic_memory;
}
}
66 changes: 66 additions & 0 deletions src/Interpreters/Cache/QueryConditionCache.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#pragma once

#include <Common/CacheBase.h>
#include <Storages/MergeTree/MarkRange.h>

namespace DB
{

/// Cache the mark filter corresponding to the query condition,
/// which helps to quickly filter out useless Marks and speed up the query when the index is not hit.
class QueryConditionCache
{
public:
/// 0 means none of the rows in the mark match the predicate. We can skip such marks.
/// 1 means at least one row in the mark matches the predicate. We need to read such marks.
using MatchingMarks = std::vector<bool>;

QueryConditionCache(const String & cache_policy, size_t max_size_in_bytes, double size_ratio);

/// Read the filter and return empty if it does not exist.
std::optional<MatchingMarks> read(const UUID & table_id, const String & part_name, size_t condition_hash);

/// Take out the mark filter corresponding to the query condition and set it to false on the corresponding mark.
void write(const UUID & table_id, const String & part_name, size_t condition_hash, const MarkRanges & mark_ranges, size_t marks_count, bool has_final_mark);

void clear();

void setMaxSizeInBytes(size_t max_size_in_bytes);

private:
struct Key
{
const UUID table_id;
const String part_name;
const size_t condition_hash;

bool operator==(const Key & other) const;
};

struct Entry
{
MatchingMarks matching_marks;
std::mutex mutex;

explicit Entry(size_t mark_count);
};

struct KeyHasher
{
size_t operator()(const Key & key) const;
};

struct QueryConditionCacheEntryWeight
{
size_t operator()(const Entry & entry) const;
};

using Cache = CacheBase<Key, Entry, KeyHasher, QueryConditionCacheEntryWeight>;
Cache cache;

LoggerPtr logger = getLogger("QueryConditionCache");
};

using QueryConditionCachePtr = std::shared_ptr<QueryConditionCache>;

}
Loading
Loading