Altinity
diff --git a/‎.github/workflows/master.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/master.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/pull_request.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/pull_request.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎ci/praktika/yaml_additional_templates.py‎
Lines changed: 1 addition & 1 deletion b/‎ci/praktika/yaml_additional_templates.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/en/engines/table-engines/integrations/iceberg.md‎
Lines changed: 46 additions & 18 deletions b/‎docs/en/engines/table-engines/integrations/iceberg.md‎
Lines changed: 46 additions & 18 deletions
diff --git a/‎src/Common/CurrentMetrics.cpp‎
Lines changed: 2 additions & 0 deletions b/‎src/Common/CurrentMetrics.cpp‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/Common/FailPoint.cpp‎
Lines changed: 2 additions & 1 deletion b/‎src/Common/FailPoint.cpp‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/Common/ProfileEvents.cpp‎
Lines changed: 1 addition & 0 deletions b/‎src/Common/ProfileEvents.cpp‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/Common/setThreadName.h‎
Lines changed: 1 addition & 0 deletions b/‎src/Common/setThreadName.h‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/Core/ServerSettings.cpp‎
Lines changed: 1 addition & 0 deletions b/‎src/Core/ServerSettings.cpp‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/Core/Settings.cpp‎
Lines changed: 3 additions & 1 deletion b/‎src/Core/Settings.cpp‎
Lines changed: 3 additions & 1 deletion
@@ -5111,7 +5111,7 @@ jobs:
     secrets: inherit
     with:
       runner_type: altinity-regression-tester
-      commit: a54216bbc29eb458e25011a68bacc77f4ae73c19
+      commit: c7897a6a858a9ef9c7b3c519e7291cfd3c2ec646
       arch: release
       build_sha: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
       timeout_minutes: 210
@@ -5123,7 +5123,7 @@ jobs:
     secrets: inherit
     with:
       runner_type: altinity-regression-tester-aarch64
-      commit: a54216bbc29eb458e25011a68bacc77f4ae73c19
+      commit: c7897a6a858a9ef9c7b3c519e7291cfd3c2ec646
       arch: aarch64
       build_sha: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
       timeout_minutes: 210
 
@@ -4597,7 +4597,7 @@ jobs:
     secrets: inherit
     with:
       runner_type: altinity-regression-tester
-      commit: a54216bbc29eb458e25011a68bacc77f4ae73c19
+      commit: c7897a6a858a9ef9c7b3c519e7291cfd3c2ec646
       arch: release
       build_sha: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
       timeout_minutes: 210
@@ -4609,7 +4609,7 @@ jobs:
     secrets: inherit
     with:
       runner_type: altinity-regression-tester-aarch64
-      commit: a54216bbc29eb458e25011a68bacc77f4ae73c19
+      commit: c7897a6a858a9ef9c7b3c519e7291cfd3c2ec646
       arch: aarch64
       build_sha: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
       timeout_minutes: 210
 
@@ -35,7 +35,7 @@ class AltinityWorkflowTemplates:
           echo "Workflow Run Report: [View Report]($REPORT_LINK)" >> $GITHUB_STEP_SUMMARY
 """
     # Additional jobs
-    REGRESSION_HASH = "a54216bbc29eb458e25011a68bacc77f4ae73c19"
+    REGRESSION_HASH = "c7897a6a858a9ef9c7b3c519e7291cfd3c2ec646"
     ALTINITY_JOBS = {
         "GrypeScan": r"""
   GrypeScanServer:
 
@@ -10,7 +10,7 @@ doc_type: 'reference'
 
 # Iceberg table engine {#iceberg-table-engine}
 
-:::warning 
+:::warning
 We recommend using the [Iceberg Table Function](/sql-reference/table-functions/iceberg.md) for working with Iceberg data in ClickHouse. The Iceberg Table Function currently provides sufficient functionality, offering a partial read-only interface for Iceberg tables.
 
 The Iceberg Table Engine is available but may have limitations. ClickHouse wasn't originally designed to support tables with externally changing schemas, which can affect the functionality of the Iceberg Table Engine. As a result, some features that work with regular tables may be unavailable or may not function correctly, especially when using the old analyzer.
@@ -78,7 +78,7 @@ Table engine `Iceberg` is an alias to `IcebergS3` now.
 At the moment, with the help of CH, you can read iceberg tables, the schema of which has changed over time. We currently support reading tables where columns have been added and removed, and their order has changed. You can also change a column where a value is required to one where NULL is allowed. Additionally, we support permitted type casting for simple types, namely:  
 * int -> long
 * float -> double
-* decimal(P, S) -> decimal(P', S) where P' > P. 
+* decimal(P, S) -> decimal(P', S) where P' > P.
 
 Currently, it is not possible to change nested structures or the types of elements within arrays and maps.
 
@@ -94,20 +94,20 @@ ClickHouse supports time travel for Iceberg tables, allowing you to query histor
 
 ## Processing of tables with deleted rows {#deleted-rows}
 
-Currently, only Iceberg tables with [position deletes](https://iceberg.apache.org/spec/#position-delete-files) are supported. 
+Currently, only Iceberg tables with [position deletes](https://iceberg.apache.org/spec/#position-delete-files) are supported.
 
 The following deletion methods are **not supported**:
 - [Equality deletes](https://iceberg.apache.org/spec/#equality-delete-files)
 - [Deletion vectors](https://iceberg.apache.org/spec/#deletion-vectors) (introduced in v3)
 
 ### Basic usage {#basic-usage}
  ```sql
- SELECT * FROM example_table ORDER BY 1 
+ SELECT * FROM example_table ORDER BY 1
  SETTINGS iceberg_timestamp_ms = 1714636800000
  ```
 
  ```sql
- SELECT * FROM example_table ORDER BY 1 
+ SELECT * FROM example_table ORDER BY 1
  SETTINGS iceberg_snapshot_id = 3547395809148285433
  ```
 
@@ -132,21 +132,21 @@ Consider this sequence of operations:
  ```sql
  -- Create a table with two columns
   CREATE TABLE IF NOT EXISTS spark_catalog.db.time_travel_example (
-  order_number int, 
+  order_number int,
   product_code string
-  ) 
-  USING iceberg 
+  )
+  USING iceberg
   OPTIONS ('format-version'='2')
 
 -- Insert data into the table
-  INSERT INTO spark_catalog.db.time_travel_example VALUES 
+  INSERT INTO spark_catalog.db.time_travel_example VALUES
     (1, 'Mars')
 
   ts1 = now() // A piece of pseudo code
 
 -- Alter table to add a new column
   ALTER TABLE spark_catalog.db.time_travel_example ADD COLUMN (price double)
- 
+
   ts2 = now()
 
 -- Insert data into the table
@@ -192,10 +192,10 @@ A time travel query at a current moment might show a different schema than the c
 ```sql
 -- Create a table
   CREATE TABLE IF NOT EXISTS spark_catalog.db.time_travel_example_2 (
-  order_number int, 
+  order_number int,
   product_code string
-  ) 
-  USING iceberg 
+  )
+  USING iceberg
   OPTIONS ('format-version'='2')
 
 -- Insert initial data into the table
@@ -234,10 +234,10 @@ The second one is that while doing time travel you can't get state of table befo
 ```sql
 -- Create a table
   CREATE TABLE IF NOT EXISTS spark_catalog.db.time_travel_example_3 (
-  order_number int, 
+  order_number int,
   product_code string
-  ) 
-  USING iceberg 
+  )
+  USING iceberg
   OPTIONS ('format-version'='2');
 
   ts = now();
@@ -275,9 +275,9 @@ After identifying candidate files using the above rules, the system determines w
   * The file with the highest version number is selected
   * (Version appears as `V` in filenames formatted as `V.metadata.json` or `V-uuid.metadata.json`)
 
-**Note**: All mentioned settings are engine-level settings and must be specified during table creation as shown below:
+**Note**: All mentioned settings (unless explicitly specified otherwise) are engine-level settings and must be specified during table creation as shown below:
 
-```sql 
+```sql
 CREATE TABLE example_table ENGINE = Iceberg(
     's3://bucket/path/to/iceberg_table'
 ) SETTINGS iceberg_metadata_table_uuid = '6f6f6407-c6a5-465f-a808-ea8900e35a38';
@@ -293,6 +293,34 @@ CREATE TABLE example_table ENGINE = Iceberg(
 
 `Iceberg` table engine and table function support metadata cache storing the information of manifest files, manifest list and metadata json. The cache is stored in memory. This feature is controlled by setting `use_iceberg_metadata_files_cache`, which is enabled by default.
 
+## Asynchronous metadata prefetching {#async-metadata-prefetch}
+
+Asynchronous metadata prefetching can be enabled at `Iceberg` table creation by setting `iceberg_metadata_async_prefetch_period_ms`. If set to 0 (default) or if metadata caching is not enabled, the asynchronous prefetching is disabled.
+In order to enable this feature, a non-zero value of milliseconds should be given. It represents interval between prefetching cycles.
+
+If enabled, the server will run a recurring background operation to list the remote catalog and to detect new metadata version. It will then parse it and recursively walk the snapshot, fetching active manifest list files and manifest files.
+The files already available at the metadata cache, won't be downloaded again. At the end of each prefetching cycle, the latest metadata snapshot is available at the metadata cache.
+
+```sql
+CREATE TABLE example_table ENGINE = Iceberg(
+    's3://bucket/path/to/iceberg_table'
+) SETTINGS
+    iceberg_metadata_async_prefetch_period_ms = 60000;
+```
+
+In order to make the most of asynchronous metadata prefetching at read operations, `iceberg_metadata_staleness_ms` parameter should be specified as Query or Session parameter. By default (0 - not specified) in the context of each query, the server will fetch latest metadata from the remote catalog.
+By specifying tolerance to metadata staleness, the server is allowed to use the cached version of metadata snapshot without calling the remote catalog. If there's metadata version in cache, and it has been downloaded within the given window of staleness, it will be used to process the query.
+Otherwise the latest version will be fetched from the remote catalog.
+
+```sql
+SELECT count() FROM icebench_table WHERE ...
+SETTINGS iceberg_metadata_staleness_ms=120000
+```
+
+**Note**: Asynchronous metadata prefetching runs at `ICEBERG_SCEDULE_POOL`, which is server-side threadpool for background operations on active `Iceberg` tables. The size of this threadpool is controlled by `iceberg_background_schedule_pool_size` server configuration parameter (default is 10).
+
+**Note**: Current expectation is that metadata cache size is sufficient to hold the latest metadata snapshot in full for all active tables, if asynchronous prefetching is enabled.
+
 ## Altinity Antalya branch
 
 ### Specify storage type in arguments
 
@@ -208,6 +208,8 @@
     M(IcebergCatalogThreads, "Number of threads in the IcebergCatalog thread pool.") \
     M(IcebergCatalogThreadsActive, "Number of threads in the IcebergCatalog thread pool running a task.") \
     M(IcebergCatalogThreadsScheduled, "Number of queued or active jobs in the IcebergCatalog thread pool.") \
+    M(IcebergSchedulePoolTask, "Number of tasks in the background schedule pool for Iceberg tables.") \
+    M(IcebergSchedulePoolSize, "Limit on number of tasks in the background schedule pool for Iceberg tables.") \
     M(ParallelWithQueryThreads, "Number of threads in the threadpool for processing PARALLEL WITH queries.") \
     M(ParallelWithQueryActiveThreads, "Number of active threads in the threadpool for processing PARALLEL WITH queries.") \
     M(ParallelWithQueryScheduledThreads, "Number of queued or active jobs in the threadpool for processing PARALLEL WITH queries.") \
 
@@ -138,7 +138,8 @@ static struct InitFiu
     REGULAR(rmt_delay_execute_drop_range) \
     REGULAR(rmt_delay_commit_part) \
     ONCE(local_object_storage_network_error_during_remove) \
-    ONCE(parallel_replicas_check_read_mode_always)
+    ONCE(parallel_replicas_check_read_mode_always)\
+    REGULAR(lightweight_show_tables)
 
 namespace FailPoints
 {
 
@@ -100,6 +100,7 @@
     M(PrimaryIndexCacheMisses, "Number of times an entry has not been found in the primary index cache, so we had to load a index file in memory, which is a costly operation, adding to query latency.", ValueType::Number) \
     M(IcebergMetadataFilesCacheHits, "Number of times iceberg metadata files have been found in the cache.", ValueType::Number) \
     M(IcebergMetadataFilesCacheMisses, "Number of times iceberg metadata files have not been found in the iceberg metadata cache and had to be read from (remote) disk.", ValueType::Number) \
+    M(IcebergMetadataFilesCacheStaleMisses, "Number of times iceberg metadata files have been found in the cache, but were considered stale and had to be read from (remote) disk.", ValueType::Number) \
     M(IcebergMetadataFilesCacheWeightLost, "Approximate number of bytes evicted from the iceberg metadata cache.", ValueType::Number) \
     M(IcebergMetadataReadWaitTimeMicroseconds, "Total time data readers spend waiting for iceberg metadata files to be read and parsed, summed across all reader threads.", ValueType::Microseconds) \
     M(IcebergIteratorInitializationMicroseconds, "Total time spent on synchronous initialization of iceberg data iterators.", ValueType::Microseconds) \
 
@@ -66,6 +66,7 @@ namespace DB
     M(HASHED_DICT_LOAD, "HashedDictLoad") \
     M(HTTP_HANDLER, "HTTPHandler") \
     M(ICEBERG_ITERATOR, "IcebergIter") \
+    M(ICEBERG_SCHEDULE_POOL, "IcebergSchPool") \
     M(INTERSERVER_HANDLER, "IntersrvHandler") \
     M(IO_URING_MONITOR, "IoUringMonitr") \
     M(KEEPER_HANDLER, "KeeperHandler") \
 
@@ -1138,6 +1138,7 @@ The policy on how to perform a scheduling of CPU slots specified by `concurrent_
     DECLARE(UInt64, threadpool_writer_queue_size, 10000, R"(Number of tasks which is possible to push into background pool for write requests to object storages)", 0) \
     DECLARE(UInt64, iceberg_catalog_threadpool_pool_size, 50, R"(Size of background pool for iceberg catalog)", 0) \
     DECLARE(UInt64, iceberg_catalog_threadpool_queue_size, 10000, R"(Number of tasks which is possible to push into iceberg catalog pool)", 0) \
+    DECLARE(UInt64, iceberg_background_schedule_pool_size, 10, "Size of thread pool to asynchronously fetch the latest metadata from a remote iceberg catalog; the pool is shared by all the active tables.", 0) \
     DECLARE(UInt64, drop_distributed_cache_pool_size, 8, R"(The size of the threadpool used for dropping distributed cache.)", 0) \
     DECLARE(UInt64, drop_distributed_cache_queue_size, 1000, R"(The queue size of the threadpool used for dropping distributed cache.)", 0) \
     DECLARE(Bool, distributed_cache_apply_throttling_settings_from_client, true, R"(Whether cache server should apply throttling settings received from client.)", 0) \
 
@@ -5164,7 +5164,9 @@ Possible values:
 - 0 - Disabled
 - 1 - Enabled
 )", 0) \
-    \
+    DECLARE(UInt64, iceberg_metadata_staleness_ms, 0, R"(
+If non-zero, skip fetching iceberg metadata from remote catalog if there is a cached metadata snapshot, more recent than the given staleness window. Zero means to always fetch the latest metadata version from the remote catalog. Setting this a non-zero trades staleness to a lower latency of read operations.
+)", 0) \
     DECLARE(Bool, use_query_cache, false, R"(
 If turned on, `SELECT` queries may utilize the [query cache](../query-cache.md). Parameters [enable_reads_from_query_cache](#enable_reads_from_query_cache)
 and [enable_writes_to_query_cache](#enable_writes_to_query_cache) control in more detail how the cache is used.
Original file line number	Diff line number	Diff line change
`@@ -138,7 +138,8 @@ static struct InitFiu`
`138`	`138`	`REGULAR(rmt_delay_execute_drop_range) \`
`139`	`139`	`REGULAR(rmt_delay_commit_part) \`
`140`	`140`	`ONCE(local_object_storage_network_error_during_remove) \`
`141`		`- ONCE(parallel_replicas_check_read_mode_always)`
	`141`	`+ ONCE(parallel_replicas_check_read_mode_always)\`
	`142`	`+ REGULAR(lightweight_show_tables)`
`142`	`143`
`143`	`144`	`namespace FailPoints`
`144`	`145`	`{`