@@ -1654,6 +1654,90 @@ def update_matrix(
16541654 )
16551655
16561656
1657+ def _validate_matrix_to_collection (
1658+ exp : Experiment ,
1659+ meas : Measurement ,
1660+ collection_name : str ,
1661+ matrix_name : str ,
1662+ matrix_data : Matrix | h5py .Dataset ,
1663+ ) -> None :
1664+ """Validates against the Experiment shape, as defined by the axis dataframes.
1665+ Skips validation for pre-1.15 schemas till issue is resolved
1666+ https://github.com/single-cell-data/TileDB-SOMA/pull/4258#discussion_r2429676202.
1667+
1668+ Args:
1669+ exp: The experiment object
1670+ measurement_name: Name of the measurement
1671+ collection_name: Name of the collection
1672+ matrix_name: Name of the matrix
1673+ matrix_data: The matrix data to validate
1674+ meas: The already-opened measurement object
1675+
1676+ Raises:
1677+ ValueError: If the matrix shape is incompatible.
1678+ """
1679+ if not exp .obs .tiledbsoma_has_upgraded_domain :
1680+ warnings .warn (
1681+ f"Skipped validation for arrays with pre-1.15 schema\
1682+ { matrix_name } " ,
1683+ stacklevel = 2 ,
1684+ )
1685+ return
1686+
1687+ obs_soma_joinid_domain = exp .obs ._maybe_soma_joinid_shape
1688+
1689+ # Try to access the var DataFrame directly, fallback to explicit open if needed
1690+ if meas .var .closed :
1691+ with DataFrame .open (meas .var .uri , "r" , context = meas .context , tiledb_timestamp = meas .tiledb_timestamp ) as var_df :
1692+ var_soma_joinid_domain = var_df ._maybe_soma_joinid_shape
1693+ else :
1694+ var_soma_joinid_domain = meas .var ._maybe_soma_joinid_shape
1695+
1696+ target_obs_size = obs_soma_joinid_domain # O: Observation (row) size
1697+ target_var_size = var_soma_joinid_domain # V: Variable (column) size
1698+ target_shape = (target_obs_size , target_var_size )
1699+ # Check if the dimensionality is the same
1700+ if len (matrix_data .shape ) != len (target_shape ):
1701+ raise SOMAError (
1702+ f"Matrix '{ matrix_name } ' has { len (matrix_data .shape )} dimensions, "
1703+ f"but the existing matrix has { len (target_shape )} . Shapes must match."
1704+ )
1705+
1706+ if collection_name == "X" :
1707+ # X: (O, V) - Must match both dimensions exactly
1708+ if matrix_data .shape != target_shape :
1709+ raise SOMAError (
1710+ f"Matrix 'X' shape { matrix_data .shape } is incompatible with target shape { target_shape } . "
1711+ "Both dimensions must match exactly."
1712+ )
1713+ elif collection_name == "obsm" :
1714+ if matrix_data .shape [0 ] != target_obs_size :
1715+ raise SOMAError (
1716+ f"Matrix '{ matrix_name } ' must match the observation size (O). "
1717+ f"Found { matrix_data .shape [0 ]} , expected { target_obs_size } ."
1718+ )
1719+ elif collection_name == "varm" :
1720+ if matrix_data .shape [0 ] != target_var_size :
1721+ raise SOMAError (
1722+ f"Matrix '{ matrix_name } ' must match the variable size (V). "
1723+ f"Found { matrix_data .shape [0 ]} , expected { target_var_size } ."
1724+ )
1725+ elif collection_name == "obsp" :
1726+ expected_shape = (target_obs_size , target_obs_size )
1727+ if matrix_data .shape [0 :2 ] != expected_shape :
1728+ raise SOMAError (
1729+ f"Matrix '{ matrix_name } ' must match the observation size. "
1730+ f"Found { matrix_data .shape [0 :2 ]} , expected { (target_obs_size , target_obs_size )} ."
1731+ )
1732+ elif collection_name == "varp" :
1733+ expected_shape = (target_var_size , target_var_size )
1734+ if matrix_data .shape [0 :2 ] != expected_shape :
1735+ raise SOMAError (
1736+ f"Matrix '{ matrix_name } ' must match the observation size (O). "
1737+ f"Found { matrix_data .shape [0 :2 ]} , expected { (target_var_size , target_var_size )} ."
1738+ )
1739+
1740+
16571741def add_X_layer (
16581742 exp : Experiment ,
16591743 measurement_name : str ,
@@ -1663,11 +1747,34 @@ def add_X_layer(
16631747 ingest_mode : IngestMode = "write" ,
16641748 use_relative_uri : bool | None = None ,
16651749 context : SOMATileDBContext | None = None ,
1750+ schema_validation : bool = True ,
16661751) -> None :
1667- """This is useful for adding X data, for example from
1752+ """Add a new X layer to a measurement in the experiment.
1753+
1754+ This is useful for adding X data, for example from
16681755 `Scanpy <https://scanpy.readthedocs.io/>`_'s ``scanpy.pp.normalize_total``,
16691756 ``scanpy.pp.log1p``, etc.
16701757
1758+ Args:
1759+ exp: The experiment to add the X layer to. Must be open for writing.
1760+ measurement_name: Name of the measurement within the experiment.
1761+ X_layer_name: Name for the new X layer (e.g., "normalized", "log1p").
1762+ X_layer_data: Matrix data to store in the X layer. Can be a sparse matrix
1763+ (e.g., scipy.csr_matrix) or an h5py.Dataset.
1764+ ingest_mode: Mode for ingestion. Defaults to "write".
1765+ use_relative_uri: Whether to use relative URIs when storing references.
1766+ If None, the default behavior is used.
1767+ context: TileDB context for the operation. If None, uses the default context.
1768+ schema_validation: Whether to validate the matrix schema before adding.
1769+ Defaults to True.
1770+
1771+ Returns:
1772+ None
1773+
1774+ Raises:
1775+ ValueError: If the experiment is not open for writing or if schema
1776+ validation fails.
1777+
16711778 Lifecycle:
16721779 Maturing.
16731780 """
@@ -1682,6 +1789,7 @@ def add_X_layer(
16821789 ingest_mode = ingest_mode ,
16831790 use_relative_uri = use_relative_uri ,
16841791 context = context ,
1792+ schema_validation = schema_validation ,
16851793 )
16861794
16871795
@@ -1695,10 +1803,39 @@ def add_matrix_to_collection(
16951803 ingest_mode : IngestMode = "write" ,
16961804 use_relative_uri : bool | None = None ,
16971805 context : SOMATileDBContext | None = None ,
1806+ schema_validation : bool = True ,
16981807) -> None :
1699- """This is useful for adding X/obsp/varm/etc data, for example from
1808+ """Add a matrix to a specified collection within a measurement.
1809+
1810+ This is useful for adding X/obsp/varm/etc data, for example from
17001811 Scanpy's ``scanpy.pp.normalize_total``, ``scanpy.pp.log1p``, etc.
17011812
1813+ Args:
1814+ exp: The experiment to add the matrix to. Must be open for writing.
1815+ measurement_name: Name of the measurement within the experiment.
1816+ collection_name: Name of the collection to add the matrix to
1817+ (e.g., "X", "obsp", "varm").
1818+ matrix_name: Name for the new matrix within the collection.
1819+ matrix_data: Matrix data to store. Can be a sparse matrix
1820+ (e.g., scipy.csr_matrix) or an h5py.Dataset.
1821+ ingest_mode: Mode for ingestion. Defaults to "write".
1822+ use_relative_uri: Whether to use relative URIs when storing references.
1823+ If None, the default behavior is used.
1824+ context: TileDB context for the operation. If None, uses the default context.
1825+ schema_validation: Whether to validate the matrix schema before adding.
1826+ Defaults to True.
1827+
1828+ Returns:
1829+ None
1830+
1831+ Raises:
1832+ ValueError: If schema validation fails or if the collection structure
1833+ is invalid.
1834+
1835+ Notes:
1836+ - If the collection doesn't exist, it will be created automatically.
1837+ - The matrix is stored as a SparseNDArray with identity axis mappings.
1838+
17021839 Lifecycle:
17031840 Maturing.
17041841 """
@@ -1711,13 +1848,15 @@ def add_matrix_to_collection(
17111848 # tiledb://namespace/uuid. When the caller passes a creation URI (which
17121849 # they must) via exp.uri, we need to follow that.
17131850 extend_creation_uri = exp .uri .startswith ("tiledb://" )
1714-
17151851 with exp .ms [measurement_name ] as meas :
17161852 if extend_creation_uri :
17171853 coll_uri = f"{ exp .uri } /ms/{ _util .sanitize_key (measurement_name )} /{ _util .sanitize_key (collection_name )} "
17181854 else :
17191855 coll_uri = _util .uri_joinpath (meas .uri , _util .sanitize_key (collection_name ))
17201856
1857+ if schema_validation :
1858+ _validate_matrix_to_collection (exp , meas , collection_name , matrix_name , matrix_data )
1859+
17211860 if collection_name in meas :
17221861 coll = cast ("Collection[RawHandle]" , meas [collection_name ])
17231862 else :
@@ -1728,6 +1867,7 @@ def add_matrix_to_collection(
17281867 context = context ,
17291868 )
17301869 _maybe_set (meas , collection_name , coll , use_relative_uri = use_relative_uri )
1870+
17311871 with coll :
17321872 matrix_uri = _util .uri_joinpath (coll_uri , _util .sanitize_key (matrix_name ))
17331873
0 commit comments