Skip to content
Closed
21 changes: 16 additions & 5 deletions cpp/src/arrow/compute/exec.cc
Original file line number Diff line number Diff line change
Expand Up @@ -140,18 +140,29 @@ ExecBatch ExecBatch::Slice(int64_t offset, int64_t length) const {
return out;
}

Result<ExecBatch> ExecBatch::Make(std::vector<Datum> values) {
if (values.empty()) {
Result<ExecBatch> ExecBatch::SelectValues(const std::vector<int>& ids) const {
std::vector<Datum> selected_values;
selected_values.reserve(ids.size());
for (int id : ids) {
if (id < 0 || static_cast<size_t>(id) >= values.size()) {
return Status::Invalid("ExecBatch invalid value selection: ", id);
}
selected_values.push_back(values[id]);
}
return ExecBatch(std::move(selected_values), length);
}

Result<ExecBatch> ExecBatch::Make(std::vector<Datum> values, int64_t length) {
if (values.empty() && length < 0) {
return Status::Invalid("Cannot infer ExecBatch length without at least one value");
}

int64_t length = -1;
for (const auto& value : values) {
if (value.is_scalar()) {
continue;
}

if (length == -1) {
if (length < 0) {
length = value.length();
continue;
}
Expand All @@ -162,7 +173,7 @@ Result<ExecBatch> ExecBatch::Make(std::vector<Datum> values) {
}
}

if (length == -1) {
if (length < 0) {
length = 1;
}

Expand Down
10 changes: 9 additions & 1 deletion cpp/src/arrow/compute/exec.h
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,13 @@ struct ARROW_EXPORT ExecBatch {

explicit ExecBatch(const RecordBatch& batch);

static Result<ExecBatch> Make(std::vector<Datum> values);
/// Creates an ExecBatch with length-validation.
///
/// If any value is given, then all values must have a common length. If the given
/// length is negative, then the length of the ExecBatch is set to this common length,
/// or to 1 if no values are given. Otherwise, the given length must equal the common
/// length, if any value is given.
static Result<ExecBatch> Make(std::vector<Datum> values, int64_t length = -1);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Naively one looking at this might be confused why you now need both ExecBatch::Make and ExecBatch::ExecBatch since both take a vector of values and a length.

Looking closer it seems the Make function does the extra work of verifying that the length of the datums match the given length.

Could you add some comments explaining this for future readers?


Result<std::shared_ptr<RecordBatch>> ToRecordBatch(
std::shared_ptr<Schema> schema, MemoryPool* pool = default_memory_pool()) const;
Expand Down Expand Up @@ -227,6 +233,8 @@ struct ARROW_EXPORT ExecBatch {

ExecBatch Slice(int64_t offset, int64_t length) const;

Result<ExecBatch> SelectValues(const std::vector<int>& ids) const;

/// \brief A convenience for returning the types from the batch.
std::vector<TypeHolder> GetTypes() const {
std::vector<TypeHolder> result;
Expand Down
Loading