-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Description
Is your feature request related to a problem or challenge? Please describe what you are trying to do.
In DataFusion we would like to add and subtract timestamps. While we currently support this for scalar (singular) values we don't support it for arrays.
So this query will work
select * from foo where now() - INTERVAL '1 day' This one will not (aka add time column to a Interval column):
select * from foo where now() > foo.time + foo.sale_durationDescribe the solution you'd like
I would like the add_dyn and subtract_dyn kernels to handle
Timestamp(Second, ..) + Interval(YearMonth)
Timestamp(Second, ..) + Interval(DayTime)
Timestamp(Second, ..) + Interval(MonthDayNano)
Timestamp(Millisecond, ..) + Interval(YearMonth)
Timestamp(Millisecond, ..) + Interval(DayTime)
Timestamp(Millisecond, ..) + Interval(MonthDayNano)
Timestamp(MicroSecond, ..) + Interval(YearMonth)
Timestamp(Microsecond, ..) + Interval(DayTime)
Timestamp(Microsecond, ..) + Interval(MonthDayNano)
Timestamp(Nanosecond, ..) + Interval(YearMonth)
Timestamp(Nanosecond, ..) + Interval(DayTime)
Timestamp(Nanosecond, ..) + Interval(MonthDayNano)
To keep consistency with #3959 and other timestamp kernels:
- The precision will be truncated if necessary (e.g. adding an
IntervalMonthDayNanowith nanosecond precision to a `Timestamp(Millisecond) will not error and the interval will be truncated to millisecond precision) - If an overflow occurs (e.g. an
Interval(DayTime)is added toTimestamp(Nanosecond)that goes beyond the range of i64 in nanoseconds), an error would be raised
Describe alternatives you've considered
Additional context
Here is the support for Date + Interval -- we should be able to follow much the same pattern for Timestamp + interval and Timestamp - Interval:
arrow-rs/arrow-arith/src/arithmetic.rs
Lines 694 to 740 in 9bd2bae
| DataType::Date32 => { | |
| let l = left.as_primitive::<Date32Type>(); | |
| match right.data_type() { | |
| DataType::Interval(IntervalUnit::YearMonth) => { | |
| let r = right.as_primitive::<IntervalYearMonthType>(); | |
| let res = math_op(l, r, Date32Type::add_year_months)?; | |
| Ok(Arc::new(res)) | |
| } | |
| DataType::Interval(IntervalUnit::DayTime) => { | |
| let r = right.as_primitive::<IntervalDayTimeType>(); | |
| let res = math_op(l, r, Date32Type::add_day_time)?; | |
| Ok(Arc::new(res)) | |
| } | |
| DataType::Interval(IntervalUnit::MonthDayNano) => { | |
| let r = right.as_primitive::<IntervalMonthDayNanoType>(); | |
| let res = math_op(l, r, Date32Type::add_month_day_nano)?; | |
| Ok(Arc::new(res)) | |
| } | |
| _ => Err(ArrowError::CastError(format!( | |
| "Cannot perform arithmetic operation between array of type {} and array of type {}", | |
| left.data_type(), right.data_type() | |
| ))), | |
| } | |
| } | |
| DataType::Date64 => { | |
| let l = left.as_primitive::<Date64Type>(); | |
| match right.data_type() { | |
| DataType::Interval(IntervalUnit::YearMonth) => { | |
| let r = right.as_primitive::<IntervalYearMonthType>(); | |
| let res = math_op(l, r, Date64Type::add_year_months)?; | |
| Ok(Arc::new(res)) | |
| } | |
| DataType::Interval(IntervalUnit::DayTime) => { | |
| let r = right.as_primitive::<IntervalDayTimeType>(); | |
| let res = math_op(l, r, Date64Type::add_day_time)?; | |
| Ok(Arc::new(res)) | |
| } | |
| DataType::Interval(IntervalUnit::MonthDayNano) => { | |
| let r = right.as_primitive::<IntervalMonthDayNanoType>(); | |
| let res = math_op(l, r, Date64Type::add_month_day_nano)?; | |
| Ok(Arc::new(res)) | |
| } | |
| _ => Err(ArrowError::CastError(format!( | |
| "Cannot perform arithmetic operation between array of type {} and array of type {}", | |
| left.data_type(), right.data_type() | |
| ))), | |
| } |
Here is example code showing what the desire is;
fn add_timestamp_intervals() {
// Given some timestamps
let arr = TimestampNanosecondArray::from(vec![
1_000,
1_000_000,
1_000_000_000,
1_000_000_000_000,
1_000_000_000_000_000,
1_000_000_000_000_000_000,
2_000_000_000_000_000_000,
]);
let types = vec![
DataType::Timestamp(TimeUnit::Second, None),
DataType::Timestamp(TimeUnit::Millisecond, None),
DataType::Timestamp(TimeUnit::Microsecond, None),
DataType::Timestamp(TimeUnit::Nanosecond, None),
];
let input_date_time_arrays: Vec<_> = types
.iter()
.map(|dt| cast(&arr, dt).unwrap())
.collect();
for date_time_array in input_date_time_arrays.into_iter() {
// would like to add 1 day
let interval_array = IntervalDayTimeArray::from(vec![
Some(IntervalDayTimeType::make_value(1, 0)),
Some(IntervalDayTimeType::make_value(1, 0)),
Some(IntervalDayTimeType::make_value(1, 0)),
Some(IntervalDayTimeType::make_value(1, 0)),
Some(IntervalDayTimeType::make_value(1, 0)),
Some(IntervalDayTimeType::make_value(1, 0)),
Some(IntervalDayTimeType::make_value(1, 0)),
]);
// input Timestamp(Second, None):
// +---------------------+
// | input |
// +---------------------+
// | 1970-01-01T00:00:00 |
// | 1970-01-01T00:00:00 |
// | 1970-01-01T00:00:01 |
// | 1970-01-01T00:16:40 |
// | 1970-01-12T13:46:40 |
// | 2001-09-09T01:46:40 |
// | 2033-05-18T03:33:20 |
// +---------------------+
println!(
"input {}:\n{}",
date_time_array.data_type(),
pretty_format_columns("input", &[Arc::new(date_time_array.clone()) as _]).unwrap()
);
let col_name = format!(
"{} + {}",
date_time_array.data_type(),
interval_array.data_type()
);
// thread 'main' panicked at 'called `Result::unwrap()` on an `Err` value: CastError("Unsupported data type Timestamp(Second, None), Interval(DayTime)")', src/main.rs:253:66
let new_arr = add_dyn(&date_time_array, &interval_array).unwrap();
println!(
"{col_name}:\n{}",
pretty_format_columns("add", &[new_arr]).unwrap()
);
let col_name = format!(
"{} - {}",
date_time_array.data_type(),
interval_array.data_type()
);
let new_arr = subtract_dyn(&date_time_array, &interval_array).unwrap();
println!(
"{col_name}:\n{}",
pretty_format_columns("sub", &[new_arr]).unwrap()
);
}
}