Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion apis/r/tests/testthat/helper-test-data.R
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,6 @@ create_arrow_table <- function(nrows = 10L, factors = FALSE) {
soma_joinid = bit64::seq.integer64(from = 0L, to = nrows - 1L),
bar = seq(nrows) + 0.1,
baz = as.character(seq.int(nrows) + 1000L)
# schema = create_arrow_schema()
# schema = create_arrow_schema(false)
)
}
81 changes: 81 additions & 0 deletions libtiledbsoma/src/soma/soma_array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1176,6 +1176,22 @@ uint64_t SOMAArray::nnz_slow() {
}

std::vector<int64_t> SOMAArray::shape() {
// There are two reasons for this:
// * Transitional, non-monolithic, phased, careful development for the
// new-shape feature
// * Even after the new-shape feature is fully released, there will be old
// arrays on disk that were created before this feature existed.
// So this is long-term code.
auto current_domain = _get_current_domain();
return current_domain.is_empty() ? _tiledb_domain() :
_tiledb_current_domain();
}

std::vector<int64_t> SOMAArray::maxshape() {
return _tiledb_domain();
}

std::vector<int64_t> SOMAArray::_tiledb_domain() {
std::vector<int64_t> result;
auto dimensions = mq_->schema()->domain().dimensions();

Comment thread
johnkerl marked this conversation as resolved.
Expand All @@ -1202,6 +1218,71 @@ std::vector<int64_t> SOMAArray::shape() {
return result;
}

std::vector<int64_t> SOMAArray::_tiledb_current_domain() {
std::vector<int64_t> result;

auto current_domain = tiledb::ArraySchemaExperimental::current_domain(
*ctx_->tiledb_ctx(), arr_->schema());

if (current_domain.is_empty()) {
throw TileDBSOMAError(
"Internal error: current domain requested for an array which does "
"not support it");
}

auto t = current_domain.type();
if (t != TILEDB_NDRECTANGLE) {
throw TileDBSOMAError("current_domain type is not NDRECTANGLE");
}

NDRectangle ndrect = current_domain.ndrectangle();

for (auto dimension_name : dimension_names()) {
// TODO: non-int64 types for SOMADataFrame extra dims.
// This simply needs to be integrated with switch statements as in the
// legacy code below.
auto range = ndrect.range<int64_t>(dimension_name);
result.push_back(range[1] + 1);
}
return result;
}

void SOMAArray::resize(const std::vector<int64_t>& newshape) {
if (mq_->query_type() != TILEDB_WRITE) {
throw TileDBSOMAError(
"[SOMAArray::resize] array must be opened in write mode");
}

auto tctx = ctx_->tiledb_ctx();
ArraySchema schema = arr_->schema();
Domain domain = schema.domain();
ArraySchemaEvolution schema_evolution(*tctx);
Comment thread
johnkerl marked this conversation as resolved.
CurrentDomain new_current_domain(*tctx);

NDRectangle ndrect(*tctx, domain);

// TODO: non-int64 for DataFrame when it has extra index dims.
// This will be via a resize-helper.

unsigned n = domain.ndim();
if ((unsigned)newshape.size() != n) {
throw TileDBSOMAError(fmt::format(
"[SOMAArray::resize]: newshape has dimension count {}; array has "
"{} ",
newshape.size(),
n));
}

for (unsigned i = 0; i < n; i++) {
ndrect.set_range<int64_t>(
domain.dimension(i).name(), 0, newshape[i] - 1);
}

new_current_domain.set_ndrectangle(ndrect);
schema_evolution.expand_current_domain(new_current_domain);
schema_evolution.array_evolve(uri_);
}

uint64_t SOMAArray::ndim() const {
return tiledb_schema()->domain().ndim();
}
Expand Down
69 changes: 67 additions & 2 deletions libtiledbsoma/src/soma/soma_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -571,13 +571,54 @@ class SOMAArray : public SOMAObject {
}

/**
* @brief Get the capacity of each dimension.
* @brief Get the current capacity of each dimension.
*
* This applies to arrays all of whose dims are of type int64_t: this
* includes SOMASparseNDArray and SOMADenseNDArray, and default-indexed
* SOMADataFrame.
*
* At the TileDB-SOMA level we call this "shape". At the TileDB Core
* storage level this maps to "current domain".
*
* Further, we map this single n to the pair (0, n-1) since core permits a
* doubly inclusive pair (lo, hi) on each dimension slot.
*
* @return A vector with length equal to the number of dimensions; each
* value in the vector is the capcity of each dimension.
* value in the vector is the capacity of each dimension.
*/
std::vector<int64_t> shape();

/**
* @brief Get the maximum resizable capacity of each dimension.
*
* This applies to arrays all of whose dims are of type int64_t: this
* includes SOMASparseNDArray and SOMADenseNDArray, and default-indexed
* SOMADataFrame.
*
* At the TileDB-SOMA level we call this "maxshape". At the TileDB Core
* storage level this maps to "domain".
*
* Further, we map this single n to the pair (0, n-1) since core permits a
* doubly inclusive pair (lo, hi) on each dimension slot.
*
* @return A vector with length equal to the number of dimensions; each
* value in the vector is the maximum capacity of each dimension.
*/
std::vector<int64_t> maxshape();

/**
* @brief Resize the shape (what core calls "current domain") up to the
* maxshape (what core calls "domain").
*
* This applies to arrays all of whose dims are of type int64_t: this
* includes SOMASparseNDArray and SOMADenseNDArray, and default-indexed
* SOMADataFrame.
*
* @return Nothing. Raises an exception if the resize would be a downsize,
* which is not supported.
*/
void resize(const std::vector<int64_t>& newshape);

/**
* @brief Get the number of dimensions.
*
Expand Down Expand Up @@ -769,6 +810,30 @@ class SOMAArray : public SOMAObject {
*/
ArraySchemaEvolution _make_se();

/**
* The caller must check the return value for .is_empty() to see if this is
* a new-style array with current-domain support (.is_empty() is false) , or
* an old-style array without current-domain support (.is_empty() is true).
* We could implement this as a std::optional<CurrentDomain> return value
* here, but, that would be a redundant indicator.
*/
CurrentDomain _get_current_domain() {
return tiledb::ArraySchemaExperimental::current_domain(
*ctx_->tiledb_ctx(), arr_->schema());
}

/**
* With old shape: core domain mapped to tiledbsoma shape; core current
* domain did not exist.
*
* With new shape: core domain maps to tiledbsoma maxshape;
* core current_domain maps to tiledbsoma shape.
*
* Here we distinguish between user-side API, and core-side implementation.
*/
std::vector<int64_t> _tiledb_domain();
std::vector<int64_t> _tiledb_current_domain();

bool _extend_enumeration(
ArrowSchema* value_schema,
ArrowArray* value_array,
Expand Down
2 changes: 1 addition & 1 deletion libtiledbsoma/src/soma/soma_collection.h
Original file line number Diff line number Diff line change
Expand Up @@ -281,4 +281,4 @@ class SOMACollection : public SOMAGroup {
};
} // namespace tiledbsoma

#endif // SOMA_COLLECTION
#endif // SOMA_COLLECTION
Loading