@@ -399,6 +399,24 @@ IcebergSnapshot IcebergMetadata::getSnapshot(const String & manifest_list_file)
399399 return IcebergSnapshot{manifest_lists_by_name.emplace (manifest_list_file, initializeManifestList (manifest_list_file)).first };
400400}
401401
402+ std::vector<Int32>
403+ getRelevantPartitionColumnIds (const ManifestFileEntry & entry, const IcebergSchemaProcessor & schema_processor, Int32 current_schema_id)
404+ {
405+ std::vector<Int32> partition_column_ids;
406+ partition_column_ids.reserve (entry.getContent ().getPartitionColumnInfos ().size ());
407+ for (const auto & partition_column_info : entry.getContent ().getPartitionColumnInfos ())
408+ {
409+ std::optional<NameAndTypePair> name_and_type
410+ = schema_processor.tryGetFieldCharacteristics (current_schema_id, partition_column_info.source_id );
411+ if (name_and_type)
412+ {
413+ partition_column_ids.push_back (partition_column_info.source_id );
414+ }
415+ }
416+ return partition_column_ids;
417+ }
418+
419+
402420Strings IcebergMetadata::getDataFilesImpl (const ActionsDAG * filter_dag) const
403421{
404422 if (!current_snapshot)
@@ -410,43 +428,27 @@ Strings IcebergMetadata::getDataFilesImpl(const ActionsDAG * filter_dag) const
410428 Strings data_files;
411429 for (const auto & manifest_entry : current_snapshot->getManifestList ().getManifestFiles ())
412430 {
413- NamesAndTypesList partition_pruning_names_and_types;
414- std::vector<size_t > partition_pruning_indices;
415- for (size_t i = 0 ; i < manifest_entry.getContent ().getPartitionColumnInfos ().size (); ++i)
416- {
417- // Since some columns may be renamed or deleted, we need to determine the correct column names and types for partition pruning based on the current schema.
418- std::optional<NameAndTypePair> name_and_type = schema_processor.tryGetFieldCharacteristics (
419- current_schema_id, manifest_entry.getContent ().getPartitionColumnInfos ()[i].source_id );
420- if (name_and_type)
421- {
422- partition_pruning_names_and_types.push_back (name_and_type.value ());
423- partition_pruning_indices.push_back (i);
424- }
425- }
426- ExpressionActionsPtr partition_minmax_idx_expr
427- = std::make_shared<ExpressionActions>(ActionsDAG (partition_pruning_names_and_types), ExpressionActionsSettings (getContext ()));
431+ const auto & partition_columns_ids = getRelevantPartitionColumnIds (manifest_entry, schema_processor, current_schema_id);
432+ const auto & partition_pruning_columns_names_and_types
433+ = schema_processor.tryGetFieldsCharacteristics (current_schema_id, partition_columns_ids);
434+
435+ ExpressionActionsPtr partition_minmax_idx_expr = std::make_shared<ExpressionActions>(
436+ ActionsDAG (partition_pruning_columns_names_and_types), ExpressionActionsSettings (getContext ()));
428437 const KeyCondition partition_key_condition (
429- filter_dag, getContext (), partition_pruning_names_and_types .getNames (), partition_minmax_idx_expr);
438+ filter_dag, getContext (), partition_pruning_columns_names_and_types .getNames (), partition_minmax_idx_expr);
430439
431440 const auto & data_files_in_manifest = manifest_entry.getContent ().getDataFiles ();
432441 for (const auto & data_file : data_files_in_manifest)
433442 {
434443 if (data_file.status != ManifestEntryStatus::DELETED)
435444 {
436- std::vector<Range> ranges;
437- ranges.reserve (partition_pruning_indices.size ());
438- for (const auto j : partition_pruning_indices)
439- {
440- ranges.push_back (data_file.partition_ranges [j]);
441- }
442- if (partition_key_condition.checkInHyperrectangle (ranges, partition_pruning_names_and_types.getTypes ()).can_be_true )
443- {
445+ if (partition_key_condition
446+ .checkInHyperrectangle (
447+ data_file.getPartitionRanges (partition_columns_ids), partition_pruning_columns_names_and_types.getTypes ())
448+ .can_be_true )
444449 data_files.push_back (data_file.data_file_name );
445- }
446450 else
447- {
448451 ProfileEvents::increment (ProfileEvents::IcebergPartitionPrunnedFiles);
449- }
450452 }
451453 }
452454 }
0 commit comments