Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
eb6986c
Add icebergTruncate function
alesapin Mar 10, 2025
47b265b
Fix
alesapin Mar 10, 2025
f295e0f
Fix style
alesapin Mar 10, 2025
de2514f
Something
alesapin Mar 12, 2025
0c7df26
Cleanup code a little
alesapin Mar 12, 2025
e38d96a
Better
alesapin Mar 12, 2025
34cd285
Remove redundant
alesapin Mar 12, 2025
91abd45
Remove logging
alesapin Mar 12, 2025
4183b80
Don't need logger in ActionsDAG
alesapin Mar 12, 2025
c76698c
Rename function
alesapin Mar 12, 2025
ffa3a87
Remove redundant headers
alesapin Mar 12, 2025
c4322ee
Always use numeric columns
alesapin Mar 12, 2025
860728e
Add comment
alesapin Mar 12, 2025
0785722
Followup
alesapin Mar 12, 2025
a687a15
Add missing guards
alesapin Mar 12, 2025
793896a
Document new functions
alesapin Mar 12, 2025
c1b9220
Arguments are always constant
alesapin Mar 12, 2025
683ce79
Fix msan
alesapin Mar 12, 2025
02bfe08
Add missing functions
alesapin Mar 12, 2025
743b854
Fix tidy
alesapin Mar 12, 2025
34326f4
Get rid of copies
alesapin Mar 12, 2025
dabc2ea
Fix potential shared ptr loop
alesapin Mar 13, 2025
f2bcf2a
Update src/Functions/icebergTruncate.cpp
alesapin Mar 14, 2025
42b41ad
Update src/Functions/icebergTruncate.cpp
alesapin Mar 14, 2025
995a901
Update src/Functions/icebergTruncate.cpp
alesapin Mar 14, 2025
39d84b7
Merge remote-tracking branch 'origin/master' into add_iceberg_truncate
alesapin Mar 14, 2025
f00a220
Merge branch 'add_iceberg_truncate' of github.com:ClickHouse/ClickHou…
alesapin Mar 14, 2025
0081307
Review fixes
alesapin Mar 14, 2025
4293f53
Merge remote-tracking branch 'origin/master' into add_iceberg_truncate
alesapin Mar 17, 2025
f2b48fc
Review fixes
alesapin Mar 17, 2025
e50b3f7
Followup
alesapin Mar 17, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 0 additions & 12 deletions src/Common/DateLUTImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -291,15 +291,3 @@ namespace cctz_extension

ZoneInfoSourceFactory zone_info_source_factory = custom_factory;
}

DateLUTImpl::Values DateLUTImpl::lutIndexByMonthSinceEpochStartsZeroIndexing(Int32 months) const
{
Int16 year = 1970 + months / 12;
UInt8 month = months % 12 + 1;
return lut[makeLUTIndex(year, month, 1)];
}

DateLUTImpl::Values DateLUTImpl::lutIndexByYearSinceEpochStartsZeroIndexing(Int16 years) const
{
return lut[makeLUTIndex(years + 1970, 1, 1)];
}
14 changes: 10 additions & 4 deletions src/Common/DateLUTImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -659,6 +659,9 @@ class DateLUTImpl
template <typename DateOrTime>
Int16 toYear(DateOrTime v) const { return lut[toLUTIndex(v)].year; }

template <typename DateOrTime>
Int16 toYearSinceEpoch(DateOrTime v) const { return lut[toLUTIndex(v)].year - 1970; }

/// 1-based, starts on Monday
template <typename DateOrTime>
UInt8 toDayOfWeek(DateOrTime v) const { return lut[toLUTIndex(v)].day_of_week; }
Expand Down Expand Up @@ -952,6 +955,13 @@ class DateLUTImpl
return lut[i].year * 12 + lut[i].month;
}

template <typename DateOrTime>
Int32 toMonthNumSinceEpoch(DateOrTime v) const
{
const LUTIndex i = toLUTIndex(v);
return (lut[i].year - 1970) * 12 + lut[i].month - 1;
}

template <typename DateOrTime>
Int32 toRelativeQuarterNum(DateOrTime v) const
{
Expand Down Expand Up @@ -1168,10 +1178,6 @@ class DateLUTImpl
return LUTIndex{std::min(index, static_cast<UInt32>(DATE_LUT_SIZE - 1))};
}

Values lutIndexByMonthSinceEpochStartsZeroIndexing(Int32 months) const;

Values lutIndexByYearSinceEpochStartsZeroIndexing(Int16 years) const;

/// Create DayNum from year, month, day of month.
ExtendedDayNum makeDayNum(Int16 year, UInt8 month, UInt8 day_of_month, Int32 default_error_day_num = 0) const
{
Expand Down
64 changes: 64 additions & 0 deletions src/Functions/DateTimeTransforms.h
Original file line number Diff line number Diff line change
Expand Up @@ -1734,6 +1734,38 @@ struct ToRelativeYearNumImpl
using FactorTransform = ZeroTransform;
};

template <ResultPrecision precision_>
struct ToYearNumSinceEpochImpl
{
static constexpr auto name = "toYearNumSinceEpoch";

static auto execute(Int64 t, const DateLUTImpl & time_zone)
{
if constexpr (precision_ == ResultPrecision::Extended)
return time_zone.toYearSinceEpoch(t);
else
return static_cast<UInt16>(time_zone.toYearSinceEpoch(t));
}
static UInt16 execute(UInt32 t, const DateLUTImpl & time_zone)
{
return time_zone.toYearSinceEpoch(static_cast<time_t>(t));
}
static auto execute(Int32 d, const DateLUTImpl & time_zone)
{
if constexpr (precision_ == ResultPrecision::Extended)
return time_zone.toYearSinceEpoch(ExtendedDayNum(d));
else
return static_cast<UInt16>(time_zone.toYearSinceEpoch(ExtendedDayNum(d)));
}
static UInt16 execute(UInt16 d, const DateLUTImpl & time_zone)
{
return time_zone.toYearSinceEpoch(DayNum(d));
}
static constexpr bool hasPreimage() { return false; }

using FactorTransform = ZeroTransform;
};

template <ResultPrecision precision_>
struct ToRelativeQuarterNumImpl
{
Expand Down Expand Up @@ -1798,6 +1830,38 @@ struct ToRelativeMonthNumImpl
using FactorTransform = ZeroTransform;
};

template <ResultPrecision precision_>
struct ToMonthNumSinceEpochImpl
{
static constexpr auto name = "toMonthNumSinceEpoch";

static auto execute(Int64 t, const DateLUTImpl & time_zone)
{
if constexpr (precision_ == ResultPrecision::Extended)
return time_zone.toMonthNumSinceEpoch(t);
else
return static_cast<UInt16>(time_zone.toMonthNumSinceEpoch(t));
}
static UInt16 execute(UInt32 t, const DateLUTImpl & time_zone)
{
return time_zone.toMonthNumSinceEpoch(static_cast<time_t>(t));
}
static auto execute(Int32 d, const DateLUTImpl & time_zone)
{
if constexpr (precision_ == ResultPrecision::Extended)
return time_zone.toMonthNumSinceEpoch(ExtendedDayNum(d));
else
return static_cast<UInt16>(time_zone.toMonthNumSinceEpoch(ExtendedDayNum(d)));
}
static UInt16 execute(UInt16 d, const DateLUTImpl & time_zone)
{
return time_zone.toMonthNumSinceEpoch(DayNum(d));
}
static constexpr bool hasPreimage() { return false; }

using FactorTransform = ZeroTransform;
};

template <ResultPrecision precision_>
struct ToRelativeWeekNumImpl
{
Expand Down
2 changes: 1 addition & 1 deletion src/Functions/IsOperation.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ struct IsOperation

static constexpr bool bit_hamming_distance = IsSameOperation<Op, BitHammingDistanceImpl>::value;

static constexpr bool division = div_floating || int_div || int_div_or_zero || modulo;
static constexpr bool division = div_floating || int_div || int_div_or_zero || modulo || positive_modulo;
// NOTE: allow_decimal should not fully contain `division` because of divInt
static constexpr bool allow_decimal = plus || minus || multiply || division || least || greatest;
};
Expand Down
201 changes: 201 additions & 0 deletions src/Functions/icebergTruncate.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
#include <Functions/FunctionFactory.h>
#include <Columns/ColumnString.h>
#include <Columns/IColumn.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypesDecimal.h>
#include <DataTypes/DataTypeString.h>
#include <Functions/IFunction.h>
#include <Interpreters/Context.h>

namespace DB
{

namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int BAD_ARGUMENTS;
extern const int LOGICAL_ERROR;
}

namespace
{

/// This function specification https://iceberg.apache.org/spec/#truncate-transform-details
class FunctionIcebergTruncate : public IFunction
{

public:
static inline const char * name = "icebergTruncate";

explicit FunctionIcebergTruncate(ContextPtr)
{
}

static FunctionPtr create(ContextPtr context_)
{
return std::make_shared<FunctionIcebergTruncate>(context_);
}

String getName() const override
{
return name;
}

bool isVariadic() const override
{
return false;
}

size_t getNumberOfArguments() const override
{
return 2;
}

ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0}; }

DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
/// You may ask, why use global context and not the context provided
/// in create/Constructor? Two reasons:
/// 1. We need context only to access global functions factory, that is why global context is the most suitable
/// 2. It's terribly unsafe to store ContextPtr inside function because function object is so low-level
/// that it can be stored in multiple other objects which itself stored in global context.
/// Very common example ContextPtr->Storage->KeyDescription->Expressions->Function->ContextPtr oops
/// here we have a loop and memory leak.
auto context = Context::getGlobalContextInstance();

if (arguments.size() != 2)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Incorrect number of arguments: expected 2 arguments");

const auto & truncate_number = arguments[0];
if (!WhichDataType(truncate_number).isNativeUInt())
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument should be UInt data type");

const auto & truncate_type = arguments[1];
WhichDataType which_truncate(truncate_type);
if (!which_truncate.isDecimal64() && !which_truncate.isDecimal32() && !which_truncate.isStringOrFixedString() && !which_truncate.isNativeInteger())
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument must be of native integer type, String/FixedString, Decimal");

if (which_truncate.isStringOrFixedString())
{
return std::make_shared<DataTypeString>();
}
else
{
auto get_column_const = [] (const DataTypePtr data_type)
{
return ColumnWithTypeAndName(data_type->createColumnConst(1, data_type->getDefault()), data_type, "");
};

ColumnsWithTypeAndName modulo_arguments;
if (which_truncate.isNativeInteger())
{
modulo_arguments = {get_column_const(arguments[1]), get_column_const(arguments[0])};
}
else
{
auto decimal_scaled = arguments[1]->createColumnConst(1, arguments[1]->getDefault());
ColumnWithTypeAndName decimal_scaled_with_type(decimal_scaled, arguments[1], "");
modulo_arguments = {get_column_const(arguments[1]), decimal_scaled_with_type};
}

auto modulo_func = FunctionFactory::instance().get("positiveModulo", context)->build(modulo_arguments);
auto modulo_result_type = modulo_func->getResultType();
auto minus_arguments = {get_column_const(arguments[1]), get_column_const(modulo_result_type)};
auto minus_func = FunctionFactory::instance().get("minus", context)->build(minus_arguments);
auto minus_result_type = minus_func->getResultType();

return minus_result_type;
}

}

bool hasInformationAboutMonotonicity() const override { return true; }
Monotonicity getMonotonicityForRange(const IDataType &, const Field &, const Field &) const override { return { .is_monotonic = true, .is_always_monotonic = true }; }

ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /* result_type */, size_t input_rows_count) const override
{
auto value = (*arguments[0].column)[0].safeGet<Int64>();
if (value <= 0)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function icebergTruncate accepts only positive width");

auto context = Context::getGlobalContextInstance();
WhichDataType which_truncate(arguments[1].type);
if (which_truncate.isStringOrFixedString())
{
auto string_arguments = {arguments[1], arguments[0]};
if (which_truncate.isFixedString())
{
auto substr_func = FunctionFactory::instance().get("left", context)->build(string_arguments);
return substr_func->execute(string_arguments, std::make_shared<DataTypeString>(), input_rows_count, false);
}
else
{
auto substr_func = FunctionFactory::instance().get("leftUTF8", context)->build(string_arguments);
return substr_func->execute(string_arguments, std::make_shared<DataTypeString>(), input_rows_count, false);
}
}
else if (which_truncate.isNativeInteger() || which_truncate.isDecimal())
{
ColumnsWithTypeAndName modulo_arguments;
if (which_truncate.isNativeInteger())
{
modulo_arguments = {arguments[1], arguments[0]};
}
else
{
ColumnPtr decimal_scaled;
if (const auto * decimal_type = checkDecimal<Decimal32>(*arguments[1].type))
decimal_scaled = arguments[1].type->createColumnConst(input_rows_count, DecimalField<Decimal32>(value, decimal_type->getScale()));
if (const auto * decimal_type = checkDecimal<Decimal64>(*arguments[1].type))
decimal_scaled = arguments[1].type->createColumnConst(input_rows_count, DecimalField<Decimal64>(value, decimal_type->getScale()));

if (!decimal_scaled)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected decimal data type");

ColumnWithTypeAndName decimal_scaled_with_type(decimal_scaled, arguments[1].type, "");
modulo_arguments = {arguments[1], decimal_scaled_with_type};
}

auto modulo_func = FunctionFactory::instance().get("positiveModulo", context)->build(modulo_arguments);
auto modulo_result_type = modulo_func->getResultType();
auto modulo_result = modulo_func->execute(modulo_arguments, modulo_result_type, input_rows_count, false);
ColumnWithTypeAndName modulo_result_with_type(modulo_result, modulo_result_type, "");
auto minus_arguments = {arguments[1], modulo_result_with_type};
auto minus_func = FunctionFactory::instance().get("minus", context)->build(minus_arguments);
auto minus_result_type = minus_func->getResultType();
return minus_func->execute(minus_arguments, minus_result_type, input_rows_count, false);
}

std::unreachable();
}

bool useDefaultImplementationForConstants() const override
{
return true;
}

DataTypePtr getReturnTypeForDefaultImplementationForDynamic() const override
{
return std::make_shared<DataTypeString>();
}

bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
};

REGISTER_FUNCTION(IcebergTruncate)
{
FunctionDocumentation::Description description = R"(Implements logic of iceberg truncate transform: https://iceberg.apache.org/spec/#truncate-transform-details.)";
FunctionDocumentation::Syntax syntax = "icebergTruncate(N, value)";
FunctionDocumentation::Arguments arguments = {{"value", "String, integer or Decimal value."}};
FunctionDocumentation::ReturnedValue returned_value = "The same type as argument";
FunctionDocumentation::Examples examples = {{"Example", "SELECT icebergTruncate(3, 'iceberg')", "ice"}};
FunctionDocumentation::Category category = {"Other"};

factory.registerFunction<FunctionIcebergTruncate>({description, syntax, arguments, returned_value, examples, category});
}

}

}
24 changes: 24 additions & 0 deletions src/Functions/toMonthNumSinceEpoch.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#include <Functions/FunctionFactory.h>
#include <Functions/DateTimeTransforms.h>
#include <Functions/FunctionDateOrDateTimeToSomething.h>
#include <DataTypes/DataTypesNumber.h>


namespace DB
{

using FunctionToMonthNumSinceEpoch = FunctionDateOrDateTimeToSomething<DataTypeUInt32, ToMonthNumSinceEpochImpl<ResultPrecision::Standard>>;

REGISTER_FUNCTION(ToMonthNumSinceEpoch)
{
FunctionDocumentation::Description description = R"(Returns amount of months passed from year 1970)";
FunctionDocumentation::Syntax syntax = "toMonthNumSinceEpoch(date)";
FunctionDocumentation::Arguments arguments = {{"date", "Date, DateTime or DateTime64"}};
FunctionDocumentation::ReturnedValue returned_value = "Positive integer";
FunctionDocumentation::Examples examples = {{"Example", "SELECT toMonthNumSinceEpoch(toDate('2024-10-01'))", "657"}};
FunctionDocumentation::Category category = {"DateTime"};

factory.registerFunction<FunctionToMonthNumSinceEpoch>({description, syntax, arguments, returned_value, examples, category});
}

}
24 changes: 24 additions & 0 deletions src/Functions/toYearNumSinceEpoch.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#include <Functions/FunctionFactory.h>
#include <Functions/DateTimeTransforms.h>
#include <Functions/FunctionDateOrDateTimeToSomething.h>
#include <DataTypes/DataTypesNumber.h>


namespace DB
{

using FunctionToYearNumSinceEpoch = FunctionDateOrDateTimeToSomething<DataTypeUInt16, ToYearNumSinceEpochImpl<ResultPrecision::Standard>>;

REGISTER_FUNCTION(ToYearNumSinceEpoch)
{
FunctionDocumentation::Description description = R"(Returns amount of years passed from year 1970)";
FunctionDocumentation::Syntax syntax = "toYearNumSinceEpoch(date)";
FunctionDocumentation::Arguments arguments = {{"date", "Date, DateTime or DateTime64"}};
FunctionDocumentation::ReturnedValue returned_value = "Positive integer";
FunctionDocumentation::Examples examples = {{"Example", "SELECT toYearNumSinceEpoch(toDate('2024-10-01'))", "54"}};
FunctionDocumentation::Category category = {"DateTime"};

factory.registerFunction<FunctionToYearNumSinceEpoch>({description, syntax, arguments, returned_value, examples, category});
}

}
Loading
Loading