Skip to content

Commit 1523447

Browse files
committed
Implement system table blob_storage_log
1 parent 5ec6261 commit 1523447

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+782
-86
lines changed

docs/en/operations/server-configuration-parameters/settings.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2740,7 +2740,7 @@ ClickHouse will use it to form the proxy URI using the following template: `{pro
27402740
<proxy_cache_time>10</proxy_cache_time>
27412741
</resolver>
27422742
</http>
2743-
2743+
27442744
<https>
27452745
<resolver>
27462746
<endpoint>http://resolver:8080/hostname</endpoint>
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
---
2+
slug: /en/operations/system-tables/blob_storage_log
3+
---
4+
# Blob Storage Operations Log
5+
6+
Contains logging entries with information about various blob storage operations such as uploads and deletes.
7+
8+
Columns:
9+
10+
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Date of the event.
11+
- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Time of the event.
12+
- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Time of the event with microseconds precision.
13+
- `event_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Type of the event. Possible values:
14+
- `'Upload'`
15+
- `'Delete'`
16+
- `'MultiPartUploadCreate'`
17+
- `'MultiPartUploadWrite'`
18+
- `'MultiPartUploadComplete'`
19+
- `'MultiPartUploadAbort'`
20+
- `query_id` ([String](../../sql-reference/data-types/string.md)) — Identifier of the query associated with the event, if any.
21+
- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Identifier of the thread performing the operation.
22+
- `thread_name` ([String](../../sql-reference/data-types/string.md)) — Name of the thread performing the operation.
23+
- `disk_name` ([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md)) — Name of the associated disk.
24+
- `bucket` ([String](../../sql-reference/data-types/string.md)) — Name of the bucket.
25+
- `remote_path` ([String](../../sql-reference/data-types/string.md)) — Path to the remote resource.
26+
- `local_path` ([String](../../sql-reference/data-types/string.md)) — Path to the metadata file on the local system, which references the remote resource.
27+
- `data_size` ([UInt32](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Size of the data involved in the upload event.
28+
- `error` ([String](../../sql-reference/data-types/string.md)) — Error message associated with the event, if any.
29+
30+
**Example**
31+
32+
Suppose a blob storage operation uploads a file, and an event is logged:
33+
34+
```sql
35+
SELECT * FROM system.blob_storage_log WHERE query_id = '7afe0450-504d-4e4b-9a80-cd9826047972' ORDER BY event_date, event_time_microseconds \G
36+
```
37+
38+
```text
39+
Row 1:
40+
──────
41+
event_date: 2023-10-31
42+
event_time: 2023-10-31 16:03:40
43+
event_time_microseconds: 2023-10-31 16:03:40.481437
44+
event_type: Upload
45+
query_id: 7afe0450-504d-4e4b-9a80-cd9826047972
46+
thread_id: 2381740
47+
disk_name: disk_s3
48+
bucket: bucket1
49+
remote_path: rrr/kxo/tbnqtrghgtnxkzgtcrlutwuslgawe
50+
local_path: store/654/6549e8b3-d753-4447-8047-d462df6e6dbe/tmp_insert_all_1_1_0/checksums.txt
51+
data_size: 259
52+
error:
53+
```
54+
55+
In this example, upload operation was associated with the `INSERT` query with ID `7afe0450-504d-4e4b-9a80-cd9826047972`. The local metadata file `store/654/6549e8b3-d753-4447-8047-d462df6e6dbe/tmp_insert_all_1_1_0/checksums.txt` refers to remote path `rrr/kxo/tbnqtrghgtnxkzgtcrlutwuslgawe` in bucket `bucket1` on disk `disk_s3`, with a size of 259 bytes.
56+
57+
**See Also**
58+
59+
- [External Disks for Storing Data](../../operations/storing-data.md)

programs/server/config.xml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1248,6 +1248,16 @@
12481248
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
12491249
</backup_log>
12501250

1251+
<!-- Blob storage object operations log.
1252+
-->
1253+
<blob_storage_log>
1254+
<database>system</database>
1255+
<table>blob_storage_log</table>
1256+
<partition_by>toYYYYMM(event_date)</partition_by>
1257+
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
1258+
<ttl>event_date + INTERVAL 30 DAY</ttl>
1259+
</blob_storage_log>
1260+
12511261
<!-- <top_level_domains_path>/var/lib/clickhouse/top_level_domains/</top_level_domains_path> -->
12521262
<!-- Custom TLD lists.
12531263
Format: <name>/path/to/file</name>

src/Backups/BackupIO_S3.cpp

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,9 @@ BackupReaderS3::BackupReaderS3(
127127
request_settings.max_single_read_retries = context_->getSettingsRef().s3_max_single_read_retries; // FIXME: Avoid taking value for endpoint
128128
request_settings.allow_native_copy = allow_s3_native_copy;
129129
client = makeS3Client(s3_uri_, access_key_id_, secret_access_key_, s3_settings, context_);
130+
131+
if (auto blob_storage_system_log = context_->getBlobStorageLog())
132+
blob_storage_log = std::make_shared<BlobStorageLogWriter>(blob_storage_system_log);
130133
}
131134

132135
BackupReaderS3::~BackupReaderS3() = default;
@@ -178,6 +181,7 @@ void BackupReaderS3::copyFileToDisk(const String & path_in_backup, size_t file_s
178181
/* dest_key= */ blob_path[0],
179182
s3_settings.request_settings,
180183
read_settings,
184+
blob_storage_log,
181185
object_attributes,
182186
threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupReaderS3"),
183187
/* for_disk_s3= */ true);
@@ -214,6 +218,12 @@ BackupWriterS3::BackupWriterS3(
214218
request_settings.allow_native_copy = allow_s3_native_copy;
215219
request_settings.setStorageClassName(storage_class_name);
216220
client = makeS3Client(s3_uri_, access_key_id_, secret_access_key_, s3_settings, context_);
221+
if (auto blob_storage_system_log = context_->getBlobStorageLog())
222+
{
223+
blob_storage_log = std::make_shared<BlobStorageLogWriter>(blob_storage_system_log);
224+
if (context_->hasQueryContext())
225+
blob_storage_log->query_id = context_->getQueryContext()->getCurrentQueryId();
226+
}
217227
}
218228

219229
void BackupWriterS3::copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path,
@@ -239,6 +249,7 @@ void BackupWriterS3::copyFileFromDisk(const String & path_in_backup, DiskPtr src
239249
fs::path(s3_uri.key) / path_in_backup,
240250
s3_settings.request_settings,
241251
read_settings,
252+
blob_storage_log,
242253
{},
243254
threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWriterS3"));
244255
return; /// copied!
@@ -262,13 +273,15 @@ void BackupWriterS3::copyFile(const String & destination, const String & source,
262273
fs::path(s3_uri.key) / destination,
263274
s3_settings.request_settings,
264275
read_settings,
276+
blob_storage_log,
265277
{},
266278
threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWriterS3"));
267279
}
268280

269281
void BackupWriterS3::copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length)
270282
{
271-
copyDataToS3File(create_read_buffer, start_pos, length, client, s3_uri.bucket, fs::path(s3_uri.key) / path_in_backup, s3_settings.request_settings, {},
283+
copyDataToS3File(create_read_buffer, start_pos, length, client, s3_uri.bucket, fs::path(s3_uri.key) / path_in_backup,
284+
s3_settings.request_settings, blob_storage_log, {},
272285
threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWriterS3"));
273286
}
274287

@@ -302,6 +315,7 @@ std::unique_ptr<WriteBuffer> BackupWriterS3::writeFile(const String & file_name)
302315
fs::path(s3_uri.key) / file_name,
303316
DBMS_DEFAULT_BUFFER_SIZE,
304317
s3_settings.request_settings,
318+
blob_storage_log,
305319
std::nullopt,
306320
threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWriterS3"),
307321
write_settings);
@@ -311,8 +325,19 @@ void BackupWriterS3::removeFile(const String & file_name)
311325
{
312326
S3::DeleteObjectRequest request;
313327
request.SetBucket(s3_uri.bucket);
314-
request.SetKey(fs::path(s3_uri.key) / file_name);
328+
auto key = fs::path(s3_uri.key) / file_name;
329+
request.SetKey(key);
330+
315331
auto outcome = client->DeleteObject(request);
332+
333+
if (blob_storage_log)
334+
{
335+
blob_storage_log->addEvent(
336+
BlobStorageLogElement::EventType::Delete,
337+
s3_uri.bucket, key, /* local_path */ "", /* data_size */ 0,
338+
outcome.IsSuccess() ? nullptr : &outcome.GetError());
339+
}
340+
316341
if (!outcome.IsSuccess() && !isNotFoundError(outcome.GetError().GetErrorType()))
317342
throw S3Exception(outcome.GetError().GetMessage(), outcome.GetError().GetErrorType());
318343
}
@@ -371,6 +396,16 @@ void BackupWriterS3::removeFilesBatch(const Strings & file_names)
371396
request.SetDelete(delkeys);
372397

373398
auto outcome = client->DeleteObjects(request);
399+
400+
if (blob_storage_log)
401+
{
402+
const auto * outcome_error = outcome.IsSuccess() ? nullptr : &outcome.GetError();
403+
auto time_now = std::chrono::system_clock::now();
404+
for (const auto & obj : current_chunk)
405+
blob_storage_log->addEvent(BlobStorageLogElement::EventType::Delete, s3_uri.bucket, obj.GetKey(),
406+
/* local_path */ "", /* data_size */ 0, outcome_error, time_now);
407+
}
408+
374409
if (!outcome.IsSuccess() && !isNotFoundError(outcome.GetError().GetErrorType()))
375410
throw S3Exception(outcome.GetError().GetMessage(), outcome.GetError().GetErrorType());
376411
}

src/Backups/BackupIO_S3.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
#include <IO/S3Common.h>
99
#include <Storages/StorageS3Settings.h>
1010
#include <Interpreters/Context_fwd.h>
11-
11+
#include <IO/S3/BlobStorageLogWriter.h>
1212

1313
namespace DB
1414
{
@@ -32,6 +32,8 @@ class BackupReaderS3 : public BackupReaderDefault
3232
const DataSourceDescription data_source_description;
3333
S3Settings s3_settings;
3434
std::shared_ptr<S3::Client> client;
35+
36+
BlobStorageLogWriterPtr blob_storage_log;
3537
};
3638

3739

@@ -63,6 +65,8 @@ class BackupWriterS3 : public BackupWriterDefault
6365
S3Settings s3_settings;
6466
std::shared_ptr<S3::Client> client;
6567
std::optional<bool> supports_batch_delete;
68+
69+
BlobStorageLogWriterPtr blob_storage_log;
6670
};
6771

6872
}

src/Common/SystemLogBase.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include <Interpreters/TransactionsInfoLog.h>
1818
#include <Interpreters/AsynchronousInsertLog.h>
1919
#include <Interpreters/BackupLog.h>
20+
#include <IO/S3/BlobStorageLogWriter.h>
2021

2122
#include <Common/MemoryTrackerBlockerInThread.h>
2223
#include <Common/SystemLogBase.h>

src/Common/SystemLogBase.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@
3131
M(FilesystemCacheLogElement) \
3232
M(FilesystemReadPrefetchesLogElement) \
3333
M(AsynchronousInsertLogElement) \
34-
M(BackupLogElement)
34+
M(BackupLogElement) \
35+
M(BlobStorageLogElement)
3536

3637
namespace Poco
3738
{

src/Coordination/KeeperSnapshotManagerS3.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,12 +147,14 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const SnapshotFileInfo & snapsh
147147

148148
const auto create_writer = [&](const auto & key)
149149
{
150+
/// blob_storage_log is not used for keeper
150151
return WriteBufferFromS3(
151152
s3_client->client,
152153
s3_client->uri.bucket,
153154
key,
154155
DBMS_DEFAULT_BUFFER_SIZE,
155-
request_settings_1
156+
request_settings_1,
157+
/* blob_log */ {}
156158
);
157159
};
158160

@@ -214,6 +216,7 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const SnapshotFileInfo & snapsh
214216
delete_request.SetBucket(s3_client->uri.bucket);
215217
delete_request.SetKey(lock_file);
216218
auto delete_outcome = s3_client->client->DeleteObject(delete_request);
219+
217220
if (!delete_outcome.IsSuccess())
218221
throw S3Exception(delete_outcome.GetError().GetMessage(), delete_outcome.GetError().GetErrorType());
219222
}

src/Coordination/Standalone/Context.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,11 @@ std::shared_ptr<FilesystemReadPrefetchesLog> Context::getFilesystemReadPrefetche
235235
return nullptr;
236236
}
237237

238+
std::shared_ptr<BlobStorageLog> Context::getBlobStorageLog() const
239+
{
240+
return nullptr;
241+
}
242+
238243
void Context::setConfig(const ConfigurationPtr & config)
239244
{
240245
auto lock = getGlobalLock();

src/Coordination/Standalone/Context.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ struct ContextSharedPart;
2727
class Macros;
2828
class FilesystemCacheLog;
2929
class FilesystemReadPrefetchesLog;
30+
class BlobStorageLog;
3031

3132
/// A small class which owns ContextShared.
3233
/// We don't use something like unique_ptr directly to allow ContextShared type to be incomplete.
@@ -115,6 +116,7 @@ class Context : public ContextData, public std::enable_shared_from_this<Context>
115116

116117
std::shared_ptr<FilesystemCacheLog> getFilesystemCacheLog() const;
117118
std::shared_ptr<FilesystemReadPrefetchesLog> getFilesystemReadPrefetchesLog() const;
119+
std::shared_ptr<BlobStorageLog> getBlobStorageLog() const;
118120

119121
enum class ApplicationType
120122
{

0 commit comments

Comments
 (0)