Skip to content

Commit 497edb7

Browse files
authored
Merge branch 'antalya-26.1' into google_big_lake_catalog
2 parents 5d8f9db + a76c804 commit 497edb7

35 files changed

+1033
-230
lines changed

.github/workflows/master.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5111,7 +5111,7 @@ jobs:
51115111
secrets: inherit
51125112
with:
51135113
runner_type: altinity-regression-tester
5114-
commit: a54216bbc29eb458e25011a68bacc77f4ae73c19
5114+
commit: c7897a6a858a9ef9c7b3c519e7291cfd3c2ec646
51155115
arch: release
51165116
build_sha: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
51175117
timeout_minutes: 210
@@ -5123,7 +5123,7 @@ jobs:
51235123
secrets: inherit
51245124
with:
51255125
runner_type: altinity-regression-tester-aarch64
5126-
commit: a54216bbc29eb458e25011a68bacc77f4ae73c19
5126+
commit: c7897a6a858a9ef9c7b3c519e7291cfd3c2ec646
51275127
arch: aarch64
51285128
build_sha: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
51295129
timeout_minutes: 210

.github/workflows/pull_request.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4597,7 +4597,7 @@ jobs:
45974597
secrets: inherit
45984598
with:
45994599
runner_type: altinity-regression-tester
4600-
commit: a54216bbc29eb458e25011a68bacc77f4ae73c19
4600+
commit: c7897a6a858a9ef9c7b3c519e7291cfd3c2ec646
46014601
arch: release
46024602
build_sha: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
46034603
timeout_minutes: 210
@@ -4609,7 +4609,7 @@ jobs:
46094609
secrets: inherit
46104610
with:
46114611
runner_type: altinity-regression-tester-aarch64
4612-
commit: a54216bbc29eb458e25011a68bacc77f4ae73c19
4612+
commit: c7897a6a858a9ef9c7b3c519e7291cfd3c2ec646
46134613
arch: aarch64
46144614
build_sha: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
46154615
timeout_minutes: 210

ci/praktika/yaml_additional_templates.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ class AltinityWorkflowTemplates:
3535
echo "Workflow Run Report: [View Report]($REPORT_LINK)" >> $GITHUB_STEP_SUMMARY
3636
"""
3737
# Additional jobs
38-
REGRESSION_HASH = "a54216bbc29eb458e25011a68bacc77f4ae73c19"
38+
REGRESSION_HASH = "c7897a6a858a9ef9c7b3c519e7291cfd3c2ec646"
3939
ALTINITY_JOBS = {
4040
"GrypeScan": r"""
4141
GrypeScanServer:

docs/en/engines/table-engines/integrations/iceberg.md

Lines changed: 46 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ doc_type: 'reference'
1010

1111
# Iceberg table engine {#iceberg-table-engine}
1212

13-
:::warning
13+
:::warning
1414
We recommend using the [Iceberg Table Function](/sql-reference/table-functions/iceberg.md) for working with Iceberg data in ClickHouse. The Iceberg Table Function currently provides sufficient functionality, offering a partial read-only interface for Iceberg tables.
1515

1616
The Iceberg Table Engine is available but may have limitations. ClickHouse wasn't originally designed to support tables with externally changing schemas, which can affect the functionality of the Iceberg Table Engine. As a result, some features that work with regular tables may be unavailable or may not function correctly, especially when using the old analyzer.
@@ -78,7 +78,7 @@ Table engine `Iceberg` is an alias to `IcebergS3` now.
7878
At the moment, with the help of CH, you can read iceberg tables, the schema of which has changed over time. We currently support reading tables where columns have been added and removed, and their order has changed. You can also change a column where a value is required to one where NULL is allowed. Additionally, we support permitted type casting for simple types, namely:  
7979
* int -> long
8080
* float -> double
81-
* decimal(P, S) -> decimal(P', S) where P' > P.
81+
* decimal(P, S) -> decimal(P', S) where P' > P.
8282

8383
Currently, it is not possible to change nested structures or the types of elements within arrays and maps.
8484

@@ -94,20 +94,20 @@ ClickHouse supports time travel for Iceberg tables, allowing you to query histor
9494

9595
## Processing of tables with deleted rows {#deleted-rows}
9696

97-
Currently, only Iceberg tables with [position deletes](https://iceberg.apache.org/spec/#position-delete-files) are supported.
97+
Currently, only Iceberg tables with [position deletes](https://iceberg.apache.org/spec/#position-delete-files) are supported.
9898

9999
The following deletion methods are **not supported**:
100100
- [Equality deletes](https://iceberg.apache.org/spec/#equality-delete-files)
101101
- [Deletion vectors](https://iceberg.apache.org/spec/#deletion-vectors) (introduced in v3)
102102

103103
### Basic usage {#basic-usage}
104104
```sql
105-
SELECT * FROM example_table ORDER BY 1
105+
SELECT * FROM example_table ORDER BY 1
106106
SETTINGS iceberg_timestamp_ms = 1714636800000
107107
```
108108

109109
```sql
110-
SELECT * FROM example_table ORDER BY 1
110+
SELECT * FROM example_table ORDER BY 1
111111
SETTINGS iceberg_snapshot_id = 3547395809148285433
112112
```
113113

@@ -132,21 +132,21 @@ Consider this sequence of operations:
132132
```sql
133133
-- Create a table with two columns
134134
CREATE TABLE IF NOT EXISTS spark_catalog.db.time_travel_example (
135-
order_number int,
135+
order_number int,
136136
product_code string
137-
)
138-
USING iceberg
137+
)
138+
USING iceberg
139139
OPTIONS ('format-version'='2')
140140

141141
-- Insert data into the table
142-
INSERT INTO spark_catalog.db.time_travel_example VALUES
142+
INSERT INTO spark_catalog.db.time_travel_example VALUES
143143
(1, 'Mars')
144144

145145
ts1 = now() // A piece of pseudo code
146146

147147
-- Alter table to add a new column
148148
ALTER TABLE spark_catalog.db.time_travel_example ADD COLUMN (price double)
149-
149+
150150
ts2 = now()
151151

152152
-- Insert data into the table
@@ -192,10 +192,10 @@ A time travel query at a current moment might show a different schema than the c
192192
```sql
193193
-- Create a table
194194
CREATE TABLE IF NOT EXISTS spark_catalog.db.time_travel_example_2 (
195-
order_number int,
195+
order_number int,
196196
product_code string
197-
)
198-
USING iceberg
197+
)
198+
USING iceberg
199199
OPTIONS ('format-version'='2')
200200

201201
-- Insert initial data into the table
@@ -234,10 +234,10 @@ The second one is that while doing time travel you can't get state of table befo
234234
```sql
235235
-- Create a table
236236
CREATE TABLE IF NOT EXISTS spark_catalog.db.time_travel_example_3 (
237-
order_number int,
237+
order_number int,
238238
product_code string
239-
)
240-
USING iceberg
239+
)
240+
USING iceberg
241241
OPTIONS ('format-version'='2');
242242

243243
ts = now();
@@ -275,9 +275,9 @@ After identifying candidate files using the above rules, the system determines w
275275
* The file with the highest version number is selected
276276
* (Version appears as `V` in filenames formatted as `V.metadata.json` or `V-uuid.metadata.json`)
277277

278-
**Note**: All mentioned settings are engine-level settings and must be specified during table creation as shown below:
278+
**Note**: All mentioned settings (unless explicitly specified otherwise) are engine-level settings and must be specified during table creation as shown below:
279279

280-
```sql
280+
```sql
281281
CREATE TABLE example_table ENGINE = Iceberg(
282282
's3://bucket/path/to/iceberg_table'
283283
) SETTINGS iceberg_metadata_table_uuid = '6f6f6407-c6a5-465f-a808-ea8900e35a38';
@@ -293,6 +293,34 @@ CREATE TABLE example_table ENGINE = Iceberg(
293293

294294
`Iceberg` table engine and table function support metadata cache storing the information of manifest files, manifest list and metadata json. The cache is stored in memory. This feature is controlled by setting `use_iceberg_metadata_files_cache`, which is enabled by default.
295295

296+
## Asynchronous metadata prefetching {#async-metadata-prefetch}
297+
298+
Asynchronous metadata prefetching can be enabled at `Iceberg` table creation by setting `iceberg_metadata_async_prefetch_period_ms`. If set to 0 (default) or if metadata caching is not enabled, the asynchronous prefetching is disabled.
299+
In order to enable this feature, a non-zero value of milliseconds should be given. It represents interval between prefetching cycles.
300+
301+
If enabled, the server will run a recurring background operation to list the remote catalog and to detect new metadata version. It will then parse it and recursively walk the snapshot, fetching active manifest list files and manifest files.
302+
The files already available at the metadata cache, won't be downloaded again. At the end of each prefetching cycle, the latest metadata snapshot is available at the metadata cache.
303+
304+
```sql
305+
CREATE TABLE example_table ENGINE = Iceberg(
306+
's3://bucket/path/to/iceberg_table'
307+
) SETTINGS
308+
iceberg_metadata_async_prefetch_period_ms = 60000;
309+
```
310+
311+
In order to make the most of asynchronous metadata prefetching at read operations, `iceberg_metadata_staleness_ms` parameter should be specified as Query or Session parameter. By default (0 - not specified) in the context of each query, the server will fetch latest metadata from the remote catalog.
312+
By specifying tolerance to metadata staleness, the server is allowed to use the cached version of metadata snapshot without calling the remote catalog. If there's metadata version in cache, and it has been downloaded within the given window of staleness, it will be used to process the query.
313+
Otherwise the latest version will be fetched from the remote catalog.
314+
315+
```sql
316+
SELECT count() FROM icebench_table WHERE ...
317+
SETTINGS iceberg_metadata_staleness_ms=120000
318+
```
319+
320+
**Note**: Asynchronous metadata prefetching runs at `ICEBERG_SCEDULE_POOL`, which is server-side threadpool for background operations on active `Iceberg` tables. The size of this threadpool is controlled by `iceberg_background_schedule_pool_size` server configuration parameter (default is 10).
321+
322+
**Note**: Current expectation is that metadata cache size is sufficient to hold the latest metadata snapshot in full for all active tables, if asynchronous prefetching is enabled.
323+
296324
## Altinity Antalya branch
297325

298326
### Specify storage type in arguments

src/Common/CurrentMetrics.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,8 @@
208208
M(IcebergCatalogThreads, "Number of threads in the IcebergCatalog thread pool.") \
209209
M(IcebergCatalogThreadsActive, "Number of threads in the IcebergCatalog thread pool running a task.") \
210210
M(IcebergCatalogThreadsScheduled, "Number of queued or active jobs in the IcebergCatalog thread pool.") \
211+
M(IcebergSchedulePoolTask, "Number of tasks in the background schedule pool for Iceberg tables.") \
212+
M(IcebergSchedulePoolSize, "Limit on number of tasks in the background schedule pool for Iceberg tables.") \
211213
M(ParallelWithQueryThreads, "Number of threads in the threadpool for processing PARALLEL WITH queries.") \
212214
M(ParallelWithQueryActiveThreads, "Number of active threads in the threadpool for processing PARALLEL WITH queries.") \
213215
M(ParallelWithQueryScheduledThreads, "Number of queued or active jobs in the threadpool for processing PARALLEL WITH queries.") \

src/Common/FailPoint.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,8 @@ static struct InitFiu
138138
REGULAR(rmt_delay_execute_drop_range) \
139139
REGULAR(rmt_delay_commit_part) \
140140
ONCE(local_object_storage_network_error_during_remove) \
141-
ONCE(parallel_replicas_check_read_mode_always)
141+
ONCE(parallel_replicas_check_read_mode_always)\
142+
REGULAR(lightweight_show_tables)
142143

143144
namespace FailPoints
144145
{

src/Common/ProfileEvents.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@
100100
M(PrimaryIndexCacheMisses, "Number of times an entry has not been found in the primary index cache, so we had to load a index file in memory, which is a costly operation, adding to query latency.", ValueType::Number) \
101101
M(IcebergMetadataFilesCacheHits, "Number of times iceberg metadata files have been found in the cache.", ValueType::Number) \
102102
M(IcebergMetadataFilesCacheMisses, "Number of times iceberg metadata files have not been found in the iceberg metadata cache and had to be read from (remote) disk.", ValueType::Number) \
103+
M(IcebergMetadataFilesCacheStaleMisses, "Number of times iceberg metadata files have been found in the cache, but were considered stale and had to be read from (remote) disk.", ValueType::Number) \
103104
M(IcebergMetadataFilesCacheWeightLost, "Approximate number of bytes evicted from the iceberg metadata cache.", ValueType::Number) \
104105
M(IcebergMetadataReadWaitTimeMicroseconds, "Total time data readers spend waiting for iceberg metadata files to be read and parsed, summed across all reader threads.", ValueType::Microseconds) \
105106
M(IcebergIteratorInitializationMicroseconds, "Total time spent on synchronous initialization of iceberg data iterators.", ValueType::Microseconds) \

src/Common/setThreadName.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ namespace DB
6666
M(HASHED_DICT_LOAD, "HashedDictLoad") \
6767
M(HTTP_HANDLER, "HTTPHandler") \
6868
M(ICEBERG_ITERATOR, "IcebergIter") \
69+
M(ICEBERG_SCHEDULE_POOL, "IcebergSchPool") \
6970
M(INTERSERVER_HANDLER, "IntersrvHandler") \
7071
M(IO_URING_MONITOR, "IoUringMonitr") \
7172
M(KEEPER_HANDLER, "KeeperHandler") \

src/Core/ServerSettings.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1138,6 +1138,7 @@ The policy on how to perform a scheduling of CPU slots specified by `concurrent_
11381138
DECLARE(UInt64, threadpool_writer_queue_size, 10000, R"(Number of tasks which is possible to push into background pool for write requests to object storages)", 0) \
11391139
DECLARE(UInt64, iceberg_catalog_threadpool_pool_size, 50, R"(Size of background pool for iceberg catalog)", 0) \
11401140
DECLARE(UInt64, iceberg_catalog_threadpool_queue_size, 10000, R"(Number of tasks which is possible to push into iceberg catalog pool)", 0) \
1141+
DECLARE(UInt64, iceberg_background_schedule_pool_size, 10, "Size of thread pool to asynchronously fetch the latest metadata from a remote iceberg catalog; the pool is shared by all the active tables.", 0) \
11411142
DECLARE(UInt64, drop_distributed_cache_pool_size, 8, R"(The size of the threadpool used for dropping distributed cache.)", 0) \
11421143
DECLARE(UInt64, drop_distributed_cache_queue_size, 1000, R"(The queue size of the threadpool used for dropping distributed cache.)", 0) \
11431144
DECLARE(Bool, distributed_cache_apply_throttling_settings_from_client, true, R"(Whether cache server should apply throttling settings received from client.)", 0) \

src/Core/Settings.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5164,7 +5164,9 @@ Possible values:
51645164
- 0 - Disabled
51655165
- 1 - Enabled
51665166
)", 0) \
5167-
\
5167+
DECLARE(UInt64, iceberg_metadata_staleness_ms, 0, R"(
5168+
If non-zero, skip fetching iceberg metadata from remote catalog if there is a cached metadata snapshot, more recent than the given staleness window. Zero means to always fetch the latest metadata version from the remote catalog. Setting this a non-zero trades staleness to a lower latency of read operations.
5169+
)", 0) \
51685170
DECLARE(Bool, use_query_cache, false, R"(
51695171
If turned on, `SELECT` queries may utilize the [query cache](../query-cache.md). Parameters [enable_reads_from_query_cache](#enable_reads_from_query_cache)
51705172
and [enable_writes_to_query_cache](#enable_writes_to_query_cache) control in more detail how the cache is used.

0 commit comments

Comments
 (0)