-
Notifications
You must be signed in to change notification settings - Fork 8.3k
Add icebergTruncate function and support it in partition pruning for Iceberg
#77403
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
31 commits
Select commit
Hold shift + click to select a range
eb6986c
Add icebergTruncate function
alesapin 47b265b
Fix
alesapin f295e0f
Fix style
alesapin de2514f
Something
alesapin 0c7df26
Cleanup code a little
alesapin e38d96a
Better
alesapin 34cd285
Remove redundant
alesapin 91abd45
Remove logging
alesapin 4183b80
Don't need logger in ActionsDAG
alesapin c76698c
Rename function
alesapin ffa3a87
Remove redundant headers
alesapin c4322ee
Always use numeric columns
alesapin 860728e
Add comment
alesapin 0785722
Followup
alesapin a687a15
Add missing guards
alesapin 793896a
Document new functions
alesapin c1b9220
Arguments are always constant
alesapin 683ce79
Fix msan
alesapin 02bfe08
Add missing functions
alesapin 743b854
Fix tidy
alesapin 34326f4
Get rid of copies
alesapin dabc2ea
Fix potential shared ptr loop
alesapin f2bcf2a
Update src/Functions/icebergTruncate.cpp
alesapin 42b41ad
Update src/Functions/icebergTruncate.cpp
alesapin 995a901
Update src/Functions/icebergTruncate.cpp
alesapin 39d84b7
Merge remote-tracking branch 'origin/master' into add_iceberg_truncate
alesapin f00a220
Merge branch 'add_iceberg_truncate' of github.com:ClickHouse/ClickHou…
alesapin 0081307
Review fixes
alesapin 4293f53
Merge remote-tracking branch 'origin/master' into add_iceberg_truncate
alesapin f2b48fc
Review fixes
alesapin e50b3f7
Followup
alesapin File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,201 @@ | ||
| #include <Functions/FunctionFactory.h> | ||
| #include <Columns/ColumnString.h> | ||
| #include <Columns/IColumn.h> | ||
| #include <DataTypes/DataTypesNumber.h> | ||
| #include <DataTypes/DataTypesDecimal.h> | ||
| #include <DataTypes/DataTypeString.h> | ||
| #include <Functions/IFunction.h> | ||
| #include <Interpreters/Context.h> | ||
|
|
||
| namespace DB | ||
| { | ||
|
|
||
| namespace ErrorCodes | ||
| { | ||
| extern const int ILLEGAL_TYPE_OF_ARGUMENT; | ||
| extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; | ||
| extern const int BAD_ARGUMENTS; | ||
| extern const int LOGICAL_ERROR; | ||
| } | ||
|
|
||
| namespace | ||
| { | ||
|
|
||
| /// This function specification https://iceberg.apache.org/spec/#truncate-transform-details | ||
| class FunctionIcebergTruncate : public IFunction | ||
| { | ||
|
|
||
| public: | ||
| static inline const char * name = "icebergTruncate"; | ||
|
|
||
| explicit FunctionIcebergTruncate(ContextPtr) | ||
| { | ||
| } | ||
|
|
||
| static FunctionPtr create(ContextPtr context_) | ||
| { | ||
| return std::make_shared<FunctionIcebergTruncate>(context_); | ||
| } | ||
|
|
||
| String getName() const override | ||
| { | ||
| return name; | ||
| } | ||
|
|
||
| bool isVariadic() const override | ||
| { | ||
| return false; | ||
| } | ||
|
|
||
| size_t getNumberOfArguments() const override | ||
| { | ||
| return 2; | ||
| } | ||
|
|
||
| ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0}; } | ||
|
|
||
| DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override | ||
| { | ||
| /// You may ask, why use global context and not the context provided | ||
| /// in create/Constructor? Two reasons: | ||
| /// 1. We need context only to access global functions factory, that is why global context is the most suitable | ||
| /// 2. It's terribly unsafe to store ContextPtr inside function because function object is so low-level | ||
| /// that it can be stored in multiple other objects which itself stored in global context. | ||
| /// Very common example ContextPtr->Storage->KeyDescription->Expressions->Function->ContextPtr oops | ||
| /// here we have a loop and memory leak. | ||
| auto context = Context::getGlobalContextInstance(); | ||
|
|
||
| if (arguments.size() != 2) | ||
| throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Incorrect number of arguments: expected 2 arguments"); | ||
|
|
||
| const auto & truncate_number = arguments[0]; | ||
| if (!WhichDataType(truncate_number).isNativeUInt()) | ||
| throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument should be UInt data type"); | ||
|
|
||
| const auto & truncate_type = arguments[1]; | ||
| WhichDataType which_truncate(truncate_type); | ||
| if (!which_truncate.isDecimal64() && !which_truncate.isDecimal32() && !which_truncate.isStringOrFixedString() && !which_truncate.isNativeInteger()) | ||
| throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument must be of native integer type, String/FixedString, Decimal"); | ||
|
|
||
| if (which_truncate.isStringOrFixedString()) | ||
| { | ||
| return std::make_shared<DataTypeString>(); | ||
| } | ||
| else | ||
| { | ||
| auto get_column_const = [] (const DataTypePtr data_type) | ||
| { | ||
| return ColumnWithTypeAndName(data_type->createColumnConst(1, data_type->getDefault()), data_type, ""); | ||
| }; | ||
|
|
||
| ColumnsWithTypeAndName modulo_arguments; | ||
| if (which_truncate.isNativeInteger()) | ||
| { | ||
| modulo_arguments = {get_column_const(arguments[1]), get_column_const(arguments[0])}; | ||
| } | ||
| else | ||
| { | ||
| auto decimal_scaled = arguments[1]->createColumnConst(1, arguments[1]->getDefault()); | ||
| ColumnWithTypeAndName decimal_scaled_with_type(decimal_scaled, arguments[1], ""); | ||
| modulo_arguments = {get_column_const(arguments[1]), decimal_scaled_with_type}; | ||
| } | ||
|
|
||
| auto modulo_func = FunctionFactory::instance().get("positiveModulo", context)->build(modulo_arguments); | ||
| auto modulo_result_type = modulo_func->getResultType(); | ||
| auto minus_arguments = {get_column_const(arguments[1]), get_column_const(modulo_result_type)}; | ||
| auto minus_func = FunctionFactory::instance().get("minus", context)->build(minus_arguments); | ||
| auto minus_result_type = minus_func->getResultType(); | ||
|
|
||
| return minus_result_type; | ||
| } | ||
|
|
||
| } | ||
|
|
||
| bool hasInformationAboutMonotonicity() const override { return true; } | ||
| Monotonicity getMonotonicityForRange(const IDataType &, const Field &, const Field &) const override { return { .is_monotonic = true, .is_always_monotonic = true }; } | ||
|
|
||
| ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /* result_type */, size_t input_rows_count) const override | ||
| { | ||
| auto value = (*arguments[0].column)[0].safeGet<Int64>(); | ||
alesapin marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| if (value <= 0) | ||
| throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function icebergTruncate accepts only positive width"); | ||
|
|
||
| auto context = Context::getGlobalContextInstance(); | ||
| WhichDataType which_truncate(arguments[1].type); | ||
| if (which_truncate.isStringOrFixedString()) | ||
| { | ||
| auto string_arguments = {arguments[1], arguments[0]}; | ||
| if (which_truncate.isFixedString()) | ||
| { | ||
| auto substr_func = FunctionFactory::instance().get("left", context)->build(string_arguments); | ||
| return substr_func->execute(string_arguments, std::make_shared<DataTypeString>(), input_rows_count, false); | ||
| } | ||
| else | ||
| { | ||
| auto substr_func = FunctionFactory::instance().get("leftUTF8", context)->build(string_arguments); | ||
| return substr_func->execute(string_arguments, std::make_shared<DataTypeString>(), input_rows_count, false); | ||
| } | ||
| } | ||
| else if (which_truncate.isNativeInteger() || which_truncate.isDecimal()) | ||
| { | ||
| ColumnsWithTypeAndName modulo_arguments; | ||
| if (which_truncate.isNativeInteger()) | ||
| { | ||
| modulo_arguments = {arguments[1], arguments[0]}; | ||
| } | ||
| else | ||
| { | ||
| ColumnPtr decimal_scaled; | ||
| if (const auto * decimal_type = checkDecimal<Decimal32>(*arguments[1].type)) | ||
| decimal_scaled = arguments[1].type->createColumnConst(input_rows_count, DecimalField<Decimal32>(value, decimal_type->getScale())); | ||
| if (const auto * decimal_type = checkDecimal<Decimal64>(*arguments[1].type)) | ||
| decimal_scaled = arguments[1].type->createColumnConst(input_rows_count, DecimalField<Decimal64>(value, decimal_type->getScale())); | ||
|
|
||
| if (!decimal_scaled) | ||
| throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected decimal data type"); | ||
|
|
||
| ColumnWithTypeAndName decimal_scaled_with_type(decimal_scaled, arguments[1].type, ""); | ||
| modulo_arguments = {arguments[1], decimal_scaled_with_type}; | ||
| } | ||
|
|
||
| auto modulo_func = FunctionFactory::instance().get("positiveModulo", context)->build(modulo_arguments); | ||
| auto modulo_result_type = modulo_func->getResultType(); | ||
| auto modulo_result = modulo_func->execute(modulo_arguments, modulo_result_type, input_rows_count, false); | ||
| ColumnWithTypeAndName modulo_result_with_type(modulo_result, modulo_result_type, ""); | ||
| auto minus_arguments = {arguments[1], modulo_result_with_type}; | ||
| auto minus_func = FunctionFactory::instance().get("minus", context)->build(minus_arguments); | ||
| auto minus_result_type = minus_func->getResultType(); | ||
| return minus_func->execute(minus_arguments, minus_result_type, input_rows_count, false); | ||
| } | ||
|
|
||
| std::unreachable(); | ||
| } | ||
|
|
||
| bool useDefaultImplementationForConstants() const override | ||
| { | ||
| return true; | ||
| } | ||
|
|
||
| DataTypePtr getReturnTypeForDefaultImplementationForDynamic() const override | ||
| { | ||
| return std::make_shared<DataTypeString>(); | ||
| } | ||
|
|
||
| bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } | ||
| }; | ||
|
|
||
| REGISTER_FUNCTION(IcebergTruncate) | ||
| { | ||
| FunctionDocumentation::Description description = R"(Implements logic of iceberg truncate transform: https://iceberg.apache.org/spec/#truncate-transform-details.)"; | ||
| FunctionDocumentation::Syntax syntax = "icebergTruncate(N, value)"; | ||
| FunctionDocumentation::Arguments arguments = {{"value", "String, integer or Decimal value."}}; | ||
| FunctionDocumentation::ReturnedValue returned_value = "The same type as argument"; | ||
| FunctionDocumentation::Examples examples = {{"Example", "SELECT icebergTruncate(3, 'iceberg')", "ice"}}; | ||
| FunctionDocumentation::Category category = {"Other"}; | ||
|
|
||
| factory.registerFunction<FunctionIcebergTruncate>({description, syntax, arguments, returned_value, examples, category}); | ||
| } | ||
|
|
||
| } | ||
|
|
||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,24 @@ | ||
| #include <Functions/FunctionFactory.h> | ||
| #include <Functions/DateTimeTransforms.h> | ||
| #include <Functions/FunctionDateOrDateTimeToSomething.h> | ||
| #include <DataTypes/DataTypesNumber.h> | ||
|
|
||
|
|
||
| namespace DB | ||
| { | ||
|
|
||
| using FunctionToMonthNumSinceEpoch = FunctionDateOrDateTimeToSomething<DataTypeUInt32, ToMonthNumSinceEpochImpl<ResultPrecision::Standard>>; | ||
|
|
||
| REGISTER_FUNCTION(ToMonthNumSinceEpoch) | ||
| { | ||
| FunctionDocumentation::Description description = R"(Returns amount of months passed from year 1970)"; | ||
| FunctionDocumentation::Syntax syntax = "toMonthNumSinceEpoch(date)"; | ||
| FunctionDocumentation::Arguments arguments = {{"date", "Date, DateTime or DateTime64"}}; | ||
| FunctionDocumentation::ReturnedValue returned_value = "Positive integer"; | ||
| FunctionDocumentation::Examples examples = {{"Example", "SELECT toMonthNumSinceEpoch(toDate('2024-10-01'))", "657"}}; | ||
| FunctionDocumentation::Category category = {"DateTime"}; | ||
|
|
||
| factory.registerFunction<FunctionToMonthNumSinceEpoch>({description, syntax, arguments, returned_value, examples, category}); | ||
| } | ||
|
|
||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,24 @@ | ||
| #include <Functions/FunctionFactory.h> | ||
| #include <Functions/DateTimeTransforms.h> | ||
| #include <Functions/FunctionDateOrDateTimeToSomething.h> | ||
| #include <DataTypes/DataTypesNumber.h> | ||
|
|
||
|
|
||
| namespace DB | ||
| { | ||
|
|
||
| using FunctionToYearNumSinceEpoch = FunctionDateOrDateTimeToSomething<DataTypeUInt16, ToYearNumSinceEpochImpl<ResultPrecision::Standard>>; | ||
|
|
||
| REGISTER_FUNCTION(ToYearNumSinceEpoch) | ||
| { | ||
| FunctionDocumentation::Description description = R"(Returns amount of years passed from year 1970)"; | ||
| FunctionDocumentation::Syntax syntax = "toYearNumSinceEpoch(date)"; | ||
| FunctionDocumentation::Arguments arguments = {{"date", "Date, DateTime or DateTime64"}}; | ||
| FunctionDocumentation::ReturnedValue returned_value = "Positive integer"; | ||
| FunctionDocumentation::Examples examples = {{"Example", "SELECT toYearNumSinceEpoch(toDate('2024-10-01'))", "54"}}; | ||
| FunctionDocumentation::Category category = {"DateTime"}; | ||
|
|
||
| factory.registerFunction<FunctionToYearNumSinceEpoch>({description, syntax, arguments, returned_value, examples, category}); | ||
| } | ||
|
|
||
| } |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.