Skip to content

Commit 82fac98

Browse files
Backport #95477 to 25.11: Revert "Revert "Fix schema mapping for dates""
1 parent 86b431a commit 82fac98

File tree

6 files changed

+79
-5
lines changed

6 files changed

+79
-5
lines changed

src/Functions/icebergBucketTransform.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ class FunctionIcebergHash : public IFunction
7575

7676
WhichDataType which(type);
7777

78-
if (isBool(type) || which.isInteger() || which.isDate())
78+
if (isBool(type) || which.isInteger() || which.isDate32() || which.isDate())
7979
{
8080
for (size_t i = 0; i < input_rows_count; ++i)
8181
{

src/Storages/ObjectStorage/DataLakes/Iceberg/SchemaProcessor.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ DataTypePtr IcebergSchemaProcessor::getSimpleType(const String & type_name)
234234
if (type_name == f_double)
235235
return std::make_shared<DataTypeFloat64>();
236236
if (type_name == f_date)
237-
return std::make_shared<DataTypeDate>();
237+
return std::make_shared<DataTypeDate32>();
238238
if (type_name == f_time)
239239
return std::make_shared<DataTypeInt64>();
240240
if (type_name == f_timestamp)
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
import pytest
2+
3+
from helpers.iceberg_utils import (
4+
create_iceberg_table,
5+
get_uuid_str,
6+
default_upload_directory,
7+
default_download_directory
8+
)
9+
10+
11+
def test_date_reads(started_cluster_iceberg_with_spark):
12+
instance = started_cluster_iceberg_with_spark.instances["node1"]
13+
spark = started_cluster_iceberg_with_spark.spark_session
14+
storage_type = 's3'
15+
expected_rows=2
16+
expected_date_1='2299-12-31\n'
17+
expected_date_2='1900-01-13\n'
18+
19+
20+
TABLE_NAME = (
21+
"test_date_reads_"
22+
+ storage_type
23+
+ "_"
24+
+ get_uuid_str()
25+
)
26+
27+
spark.sql(
28+
f"""
29+
CREATE TABLE {TABLE_NAME} (
30+
number INT,
31+
date_col DATE
32+
)
33+
USING iceberg
34+
"""
35+
)
36+
spark.sql(
37+
f""" INSERT INTO {TABLE_NAME} VALUES(1,DATE '2299-12-31') """
38+
)
39+
spark.sql(
40+
f""" INSERT INTO {TABLE_NAME} VALUES(2,DATE '1900-01-13') """
41+
)
42+
43+
files = default_upload_directory(
44+
started_cluster_iceberg_with_spark,
45+
storage_type,
46+
f"/iceberg_data/default/{TABLE_NAME}/",
47+
f"/iceberg_data/default/{TABLE_NAME}/",
48+
)
49+
50+
51+
create_iceberg_table(storage_type, instance, TABLE_NAME, started_cluster_iceberg_with_spark)
52+
rows_in_ch = int(
53+
instance.query(
54+
f"SELECT count() FROM {TABLE_NAME}",
55+
)
56+
)
57+
58+
assert rows_in_ch == expected_rows, f"Expected {expected_rows} rows, but got {rows_in_ch}"
59+
60+
ret_date_1 = (
61+
instance.query(
62+
f"SELECT date_col FROM {TABLE_NAME} where number=1",
63+
)
64+
)
65+
66+
assert ret_date_1==expected_date_1, f"Expected {expected_date_1} rows, but got {ret_date_1}"
67+
68+
ret_date_2 = (
69+
instance.query(
70+
f"SELECT date_col FROM {TABLE_NAME} where number=2",
71+
)
72+
)
73+
74+
assert ret_date_2==expected_date_2, f"Expected {expected_date_2} rows, but got {ret_date_2}"

tests/integration/test_storage_iceberg_with_spark/test_schema_inference.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ def test_schema_inference(started_cluster_iceberg_with_spark, format_version, st
6161
["decimalC1", "Nullable(Decimal(10, 3))"],
6262
["decimalC2", "Nullable(Decimal(20, 10))"],
6363
["decimalC3", "Nullable(Decimal(38, 30))"],
64-
["dateC", "Nullable(Date)"],
64+
["dateC", "Nullable(Date32)"],
6565
["timestampC", "Nullable(DateTime64(6, 'UTC'))"],
6666
["stringC", "Nullable(String)"],
6767
["binaryC", "Nullable(String)"],

tests/integration/test_storage_iceberg_with_spark/test_types.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ def test_types(started_cluster_iceberg_with_spark, format_version, storage_type)
8181
[
8282
["a", "Nullable(Int32)"],
8383
["b", "Nullable(String)"],
84-
["c", "Nullable(Date)"],
84+
["c", "Nullable(Date32)"],
8585
["d", "Array(Nullable(String))"],
8686
["e", "Nullable(Bool)"],
8787
]

tests/integration/test_storage_iceberg_with_spark/test_writes_with_partitioned_table.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,4 +82,4 @@ def execute_spark_query(query: str):
8282
f.write(b"3")
8383

8484
df = spark.read.format("iceberg").load(f"/var/lib/clickhouse/user_files/iceberg_data/default/{TABLE_NAME}").collect()
85-
assert len(df) == 10
85+
assert len(df) == 10

0 commit comments

Comments
 (0)