Skip to content

Commit 2d76d9a

Browse files
authored
GH-35519: [C++][Parquet] Fixing exception handling in parquet FileSerializer (#35520)
### Rationale for this change Mentioned in #35519 ### What changes are included in this PR? Exception handling in `CheckRowsWritten` ### Are these changes tested? No, I don't know how to mock it currently ### Are there any user-facing changes? no * Closes: #35519 Authored-by: mwish <[email protected]> Signed-off-by: Antoine Pitrou <[email protected]>
1 parent e2e3a9d commit 2d76d9a

File tree

1 file changed

+10
-9
lines changed

1 file changed

+10
-9
lines changed

cpp/src/parquet/file_writer.cc

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -219,17 +219,16 @@ class RowGroupSerializer : public RowGroupWriter::Contents {
219219
closed_ = true;
220220
CheckRowsWritten();
221221

222-
for (size_t i = 0; i < column_writers_.size(); i++) {
223-
if (column_writers_[i]) {
224-
total_bytes_written_ += column_writers_[i]->Close();
222+
// Avoid invalid state if ColumnWriter::Close() throws internally.
223+
auto column_writers = std::move(column_writers_);
224+
for (size_t i = 0; i < column_writers.size(); i++) {
225+
if (column_writers[i]) {
226+
total_bytes_written_ += column_writers[i]->Close();
225227
total_compressed_bytes_written_ +=
226-
column_writers_[i]->total_compressed_bytes_written();
227-
column_writers_[i].reset();
228+
column_writers[i]->total_compressed_bytes_written();
228229
}
229230
}
230231

231-
column_writers_.clear();
232-
233232
// Ensures all columns have been written
234233
metadata_->set_num_rows(num_rows_);
235234
metadata_->Finish(total_bytes_written_, row_group_ordinal_);
@@ -261,8 +260,10 @@ class RowGroupSerializer : public RowGroupWriter::Contents {
261260
}
262261
} else if (buffered_row_group_ &&
263262
column_writers_.size() > 0) { // when buffered_row_group = true
263+
DCHECK(column_writers_[0] != nullptr);
264264
int64_t current_col_rows = column_writers_[0]->rows_written();
265265
for (int i = 1; i < static_cast<int>(column_writers_.size()); i++) {
266+
DCHECK(column_writers_[i] != nullptr);
266267
int64_t current_col_rows_i = column_writers_[i]->rows_written();
267268
if (current_col_rows != current_col_rows_i) {
268269
ThrowRowsMisMatchError(i, current_col_rows_i, current_col_rows);
@@ -380,15 +381,15 @@ class FileSerializer : public ParquetFileWriter::Contents {
380381
void AddKeyValueMetadata(
381382
const std::shared_ptr<const KeyValueMetadata>& key_value_metadata) override {
382383
if (key_value_metadata_ == nullptr) {
383-
key_value_metadata_ = std::move(key_value_metadata);
384+
key_value_metadata_ = key_value_metadata;
384385
} else if (key_value_metadata != nullptr) {
385386
key_value_metadata_ = key_value_metadata_->Merge(*key_value_metadata);
386387
}
387388
}
388389

389390
~FileSerializer() override {
390391
try {
391-
Close();
392+
FileSerializer::Close();
392393
} catch (...) {
393394
}
394395
}

0 commit comments

Comments
 (0)