Backport #95477 to 25.11: Revert "Revert "Fix schema mapping for dates""

robot-clickhouse · robot-clickhouse · commit 82fac984e4ee · 2026-02-10T19:37:33.000Z
diff --git a/src/Functions/icebergBucketTransform.cpp b/src/Functions/icebergBucketTransform.cpp
@@ -75,7 +75,7 @@ class FunctionIcebergHash : public IFunction
 
         WhichDataType which(type);
 
-        if (isBool(type) || which.isInteger() || which.isDate())
+        if (isBool(type) || which.isInteger() || which.isDate32() || which.isDate())
         {
             for (size_t i = 0; i < input_rows_count; ++i)
             {
diff --git a/src/Storages/ObjectStorage/DataLakes/Iceberg/SchemaProcessor.cpp b/src/Storages/ObjectStorage/DataLakes/Iceberg/SchemaProcessor.cpp
@@ -234,7 +234,7 @@ DataTypePtr IcebergSchemaProcessor::getSimpleType(const String & type_name)
     if (type_name == f_double)
         return std::make_shared<DataTypeFloat64>();
     if (type_name == f_date)
-        return std::make_shared<DataTypeDate>();
+        return std::make_shared<DataTypeDate32>();
     if (type_name == f_time)
         return std::make_shared<DataTypeInt64>();
     if (type_name == f_timestamp)
diff --git a/tests/integration/test_storage_iceberg_with_spark/test_dates.py b/tests/integration/test_storage_iceberg_with_spark/test_dates.py
@@ -0,0 +1,74 @@
+import pytest
+
+from helpers.iceberg_utils import (
+    create_iceberg_table,
+    get_uuid_str,
+    default_upload_directory,
+    default_download_directory
+)
+
+
+def test_date_reads(started_cluster_iceberg_with_spark):
+    instance = started_cluster_iceberg_with_spark.instances["node1"]
+    spark = started_cluster_iceberg_with_spark.spark_session
+    storage_type = 's3'
+    expected_rows=2
+    expected_date_1='2299-12-31\n'
+    expected_date_2='1900-01-13\n'
+    
+
+    TABLE_NAME = (
+        "test_date_reads_"
+        + storage_type
+        + "_"
+        + get_uuid_str()
+    )
+
+    spark.sql(
+        f"""
+            CREATE TABLE {TABLE_NAME} (
+                number INT,
+		date_col DATE	
+            )
+            USING iceberg
+	"""
+    )
+    spark.sql(
+      	  f""" INSERT INTO {TABLE_NAME} VALUES(1,DATE '2299-12-31') """
+    ) 
+    spark.sql(
+      	  f""" INSERT INTO {TABLE_NAME} VALUES(2,DATE '1900-01-13') """
+    ) 
+   
+    files = default_upload_directory(
+        started_cluster_iceberg_with_spark,
+        storage_type,
+        f"/iceberg_data/default/{TABLE_NAME}/",
+        f"/iceberg_data/default/{TABLE_NAME}/",
+    )
+
+
+    create_iceberg_table(storage_type, instance, TABLE_NAME, started_cluster_iceberg_with_spark)
+    rows_in_ch = int(
+      instance.query(
+             f"SELECT count() FROM {TABLE_NAME}",
+      )
+    )
+    
+    assert rows_in_ch == expected_rows, f"Expected {expected_rows} rows, but got {rows_in_ch}"
+    
+    ret_date_1 = (
+      instance.query(
+             f"SELECT date_col FROM {TABLE_NAME} where number=1",
+      )
+    )
+    
+    assert ret_date_1==expected_date_1, f"Expected {expected_date_1} rows, but got {ret_date_1}"
+    
+    ret_date_2 = (
+      instance.query(
+             f"SELECT date_col FROM {TABLE_NAME} where number=2",
+      )
+    )
+    
+    assert ret_date_2==expected_date_2, f"Expected {expected_date_2} rows, but got {ret_date_2}"
diff --git a/tests/integration/test_storage_iceberg_with_spark/test_schema_inference.py b/tests/integration/test_storage_iceberg_with_spark/test_schema_inference.py
@@ -61,7 +61,7 @@ def test_schema_inference(started_cluster_iceberg_with_spark, format_version, st
                 ["decimalC1", "Nullable(Decimal(10, 3))"],
                 ["decimalC2", "Nullable(Decimal(20, 10))"],
                 ["decimalC3", "Nullable(Decimal(38, 30))"],
-                ["dateC", "Nullable(Date)"],
+                ["dateC", "Nullable(Date32)"],
                 ["timestampC", "Nullable(DateTime64(6, 'UTC'))"],
                 ["stringC", "Nullable(String)"],
                 ["binaryC", "Nullable(String)"],
diff --git a/tests/integration/test_storage_iceberg_with_spark/test_types.py b/tests/integration/test_storage_iceberg_with_spark/test_types.py
@@ -81,7 +81,7 @@ def test_types(started_cluster_iceberg_with_spark, format_version, storage_type)
         [
             ["a", "Nullable(Int32)"],
             ["b", "Nullable(String)"],
-            ["c", "Nullable(Date)"],
+            ["c", "Nullable(Date32)"],
             ["d", "Array(Nullable(String))"],
             ["e", "Nullable(Bool)"],
         ]
diff --git a/tests/integration/test_storage_iceberg_with_spark/test_writes_with_partitioned_table.py b/tests/integration/test_storage_iceberg_with_spark/test_writes_with_partitioned_table.py
@@ -82,4 +82,4 @@ def execute_spark_query(query: str):
         f.write(b"3")
 
     df = spark.read.format("iceberg").load(f"/var/lib/clickhouse/user_files/iceberg_data/default/{TABLE_NAME}").collect()
-    assert len(df) == 10
+    assert len(df) == 10

Original file line number	Diff line number	Diff line change
`@@ -75,7 +75,7 @@ class FunctionIcebergHash : public IFunction`
`75`	`75`
`76`	`76`	`WhichDataType which(type);`
`77`	`77`
`78`		`- if (isBool(type) \|\| which.isInteger() \|\| which.isDate())`
	`78`	`+ if (isBool(type) \|\| which.isInteger() \|\| which.isDate32() \|\| which.isDate())`
`79`	`79`	`{`
`80`	`80`	`for (size_t i = 0; i < input_rows_count; ++i)`
`81`	`81`	`{`
Original file line number	Diff line number	Diff line change
`@@ -81,7 +81,7 @@ def test_types(started_cluster_iceberg_with_spark, format_version, storage_type)`
`81`	`81`	`[`
`82`	`82`	`["a", "Nullable(Int32)"],`
`83`	`83`	`["b", "Nullable(String)"],`
`84`		`- ["c", "Nullable(Date)"],`
	`84`	`+ ["c", "Nullable(Date32)"],`
`85`	`85`	`["d", "Array(Nullable(String))"],`
`86`	`86`	`["e", "Nullable(Bool)"],`
`87`	`87`	`]`