Skip to content

Masked Structured dtype numpy array fails to save with validation error #1909

@Cadair

Description

@Cadair

I'm trying to save a masked table of FITS headers to ASDF as a numpy array to make use of views and lighter schemas but I get the following error:

import asdf
import numpy as np
from astropy.table import Table

# Make a list of dicts
headers = [{"a": i, "b": i*10} for i in range(1, 51, 5)]
# Give one a missing value
headers[5].pop("a")

headers_arr = Table(headers).as_array()
asdf.AsdfFile({"headers": headers_arr}).write_to("/tmp/test.asdf")

ValidationError: {'shape': [10], 'source': 0, 'datatype': [{'name': 'a', 'datatype': 'int64', 'byteorder': 'little'}, {'name': 'b', 'datatype': 'int64', 'byteorder': 'little'}], 'byteorder': 'big', 'mask': {'shape': [10], 'source': 1, 'datatype': [{'name': 'a', 'datatype': 'bool8', 'byteorder': 'big'}, {'name': 'b', 'datatype': 'bool8', 'byteorder': 'big'}], 'byteorder': 'big'}} is not valid under any of the given schemas

---------------------------------------------------------------------------
ValidationError                           Traceback (most recent call last)
Cell In[27], line 1
----> 1 asdf.AsdfFile({"headers": Table(headers).as_array()}).write_to("/tmp/test.asdf")

File ~/.virtualenvs/dkist/lib/python3.13/site-packages/asdf/_asdf.py:1251, in AsdfFile.write_to(self, fd, all_array_storage, all_array_compression, compression_kwargs, pad_blocks, include_block_index, version)
   1249 try:
   1250     with generic_io.get_file(fd, mode="w") as fd:
-> 1251         self._serial_write(fd, pad_blocks, include_block_index)
   1252 finally:
   1253     if version is not None:

File ~/.virtualenvs/dkist/lib/python3.13/site-packages/asdf/_asdf.py:1018, in AsdfFile._serial_write(self, fd, pad_blocks, include_block_index)
   1015     if "history" in self._tree:
   1016         tree["history"] = copy.deepcopy(self._tree["history"])
-> 1018     self._write_tree(tree, fd, pad_blocks)
   1019     self._blocks.write(pad_blocks, include_block_index)
   1020 finally:

File ~/.virtualenvs/dkist/lib/python3.13/site-packages/asdf/_asdf.py:990, in AsdfFile._write_tree(self, tree, fd, pad_blocks)
    987         else:
    988             tagged_tree.pop("history", None)
--> 990     yamlutil.dump_tree(
    991         tree,
    992         fd,
    993         self,
    994         tree_finalizer=_tree_finalizer,
    995         _serialization_context=serialization_context,
    996     )
    998 if pad_blocks:
    999     padding = util.calculate_padding(fd.tell(), pad_blocks, fd.block_size)

File ~/.virtualenvs/dkist/lib/python3.13/site-packages/asdf/yamlutil.py:418, in dump_tree(tree, fd, ctx, tree_finalizer, _serialization_context)
    416 if tree_finalizer is not None:
    417     tree_finalizer(tree)
--> 418 schema.validate(tree, ctx)
    420 # add yaml %TAG definitions from extensions
    421 if _serialization_context:

File ~/.virtualenvs/dkist/lib/python3.13/site-packages/asdf/schema.py:637, in validate(instance, ctx, schema, validators, reading, *args, **kwargs)
    634     ctx = AsdfFile()
    636 validator = get_validator({} if schema is None else schema, ctx, validators, None, *args, **kwargs)
--> 637 validator.validate(instance)
    639 additional_validators = [_validate_large_literals]
    640 if ctx.version >= versioning.RESTRICTED_KEYS_MIN_VERSION:

File ~/.virtualenvs/dkist/lib/python3.13/site-packages/asdf/_jsonschema/validators.py:312, in create.<locals>.Validator.validate(self, *args, **kwargs)
    310 def validate(self, *args, **kwargs):
    311     for error in self.iter_errors(*args, **kwargs):
--> 312         raise error

ValidationError: {'shape': [10], 'source': 0, 'datatype': [{'name': 'a', 'datatype': 'int64', 'byteorder': 'little'}, {'name': 'b', 'datatype': 'int64', 'byteorder': 'little'}], 'byteorder': 'big', 'mask': {'shape': [10], 'source': 1, 'datatype': [{'name': 'a', 'datatype': 'bool8', 'byteorder': 'big'}, {'name': 'b', 'datatype': 'bool8', 'byteorder': 'big'}], 'byteorder': 'big'}} is not valid under any of the given schemas

Failed validating 'anyOf' in schema:
    {'$schema': 'http://stsci.edu/schemas/yaml-schema/draft-01',
     'anyOf': [{'$ref': '#/definitions/inline-data'},
               {'dependencies': {'source': ['shape',
                                            'datatype',
                                            'byteorder']},
                'oneOf': [{'required': ['source']}, {'required': ['data']}],
                'properties': {'byteorder': {'description': 'The byte '
                                                            'order (big- '
                                                            'or '
                                                            'little-endian) '
                                                            'of the array '
                                                            'data.\n',
                                             'enum': ['big', 'little'],
                                             'type': 'string'},
                               'data': {'$ref': '#/definitions/inline-data',
                                        'description': 'The data for the '
                                                       'array inline.\n'
                                                       '\n'
                                                       'If `datatype` '
                                                       'and/or `shape` are '
                                                       'also provided, '
                                                       'they must\n'
                                                       'match the data '
                                                       'here and can be '
                                                       'used as a '
                                                       'consistency '
                                                       'check.\n'
                                                       '`strides`, '
                                                       '`offset` and '
                                                       '`byteorder` are '
                                                       'meaningless when\n'
                                                       '`data` is '
                                                       'provided.\n'},
                               'datatype': {'$ref': '#/definitions/datatype',
                                            'description': 'The data '
                                                           'format of the '
                                                           'array '
                                                           'elements.\n'},
                               'mask': {'anyOf': [{'type': 'number'},
                                                  {'$ref': 'complex-1.0.0'},
                                                  {'allOf': [{'$ref': 'ndarray-1.1.0'},
                                                             {'datatype': 'bool8'}]}],
                                        'description': 'Describes how '
                                                       'missing values in '
                                                       'the array are '
                                                       'stored.  If a '
                                                       'scalar number, '
                                                       'that number is '
                                                       'used to represent '
                                                       'missing values. If '
                                                       'an ndarray, the '
                                                       'given array '
                                                       'provides a mask, '
                                                       'where non-zero '
                                                       'values represent '
                                                       'missing values in '
                                                       'this array.  The '
                                                       'mask array must be '
                                                       'broadcastable to '
                                                       'the dimensions of '
                                                       'this array.\n'},
                               'offset': {'default': 0,
                                          'description': 'The offset, in '
                                                         'bytes, within '
                                                         'the data for '
                                                         'this start of '
                                                         'this view.\n',
                                          'minimum': 0,
                                          'type': 'integer'},
                               'shape': {'description': 'The shape of the '
                                                        'array.\n'
                                                        '\n'
                                                        'The first entry '
                                                        'may be the string '
                                                        '`*`, indicating '
                                                        'that the\n'
                                                        'length of the '
                                                        'first index of '
                                                        'the array will be '
                                                        'automatically\n'
                                                        'determined from '
                                                        'the size of the '
                                                        'block.  This is '
                                                        'used for\n'
                                                        'streaming '
                                                        'support.\n',
                                         'items': {'anyOf': [{'minimum': 0,
                                                              'type': 'integer'},
                                                             {'enum': ['*']}]},
                                         'type': 'array'},
                               'source': {'anyOf': [{'type': 'integer'},
                                                    {'format': 'uri',
                                                     'type': 'string'}],
                                          'description': 'The source of '
                                                         'the data.\n'
                                                         '\n'
                                                         '- If an integer: '
                                                         'If positive, the '
                                                         'zero-based index '
                                                         'of the\n'
                                                         '  block within '
                                                         'the same file. '
                                                         'If negative, the '
                                                         'index from\n'
                                                         '  the last block '
                                                         'within the same '
                                                         'file.  For '
                                                         'example, a\n'
                                                         '  source of `-1` '
                                                         'corresponds to '
                                                         'the last block '
                                                         'in the same\n'
                                                         '  file.\n'
                                                         '\n'
                                                         '- If a string, a '
                                                         'URI to an '
                                                         'external ASDF '
                                                         'file containing '
                                                         'the\n'
                                                         '  block data.  '
                                                         'Relative URIs '
                                                         'and ``file:`` '
                                                         'and ``http:``\n'
                                                         '  protocols must '
                                                         'be supported.  '
                                                         'Other protocols '
                                                         'may be '
                                                         'supported\n'
                                                         '  by specific '
                                                         'library '
                                                         'implementations.\n'
                                                         '\n'
                                                         'The ability to '
                                                         'reference block '
                                                         'data in an '
                                                         'external ASDF '
                                                         'file\n'
                                                         'is intentionally '
                                                         'limited to the '
                                                         'first block in '
                                                         'the external\n'
                                                         'ASDF file, and '
                                                         'is intended only '
                                                         'to support the '
                                                         'needs of\n'
                                                         '[exploded](ref:exploded).  '
                                                         'For the more '
                                                         'general case of\n'
                                                         'referencing data '
                                                         'in an external '
                                                         'ASDF file, use '
                                                         'tree\n'
                                                         '[references](ref:references).\n'},
                               'strides': {'description': 'The number of '
                                                          'bytes to skip '
                                                          'in each '
                                                          'dimension.  If '
                                                          'not provided, '
                                                          'the array is '
                                                          'assumed by be '
                                                          'contiguous and '
                                                          'in C order.  If '
                                                          'provided, must '
                                                          'be the same '
                                                          'length as the '
                                                          'shape '
                                                          'property.\n',
                                           'items': {'anyOf': [{'minimum': 1,
                                                                'type': 'integer'},
                                                               {'maximum': -1,
                                                                'type': 'integer'}]},
                                           'type': 'array'}},
                'propertyOrder': ['source',
                                  'data',
                                  'mask',
                                  'datatype',
                                  'byteorder',
                                  'shape',
                                  'offset',
                                  'strides'],
                'type': 'object'}],
     'definitions': {'datatype': {'anyOf': [{'$ref': '#/definitions/scalar-datatype'},
                                            {'items': {'anyOf': [{'$ref': '#/definitions/scalar-datatype'},
                                                                 {'properties': {'byteorder': {'description': 'The '
                                                                                                              'byteorder '
                                                                                                              'for '
                                                                                                              'the '
                                                                                                              'field.  '
                                                                                                              'If '
                                                                                                              'not '
                                                                                                              'provided, '
                                                                                                              'the\n'
                                                                                                              'byteorder '
                                                                                                              'of '
                                                                                                              'the '
                                                                                                              'datatype '
                                                                                                              'as '
                                                                                                              'a '
                                                                                                              'whole '
                                                                                                              'will '
                                                                                                              'be '
                                                                                                              'used.\n',
                                                                                               'enum': ['big',
                                                                                                        'little'],
                                                                                               'type': 'string'},
                                                                                 'datatype': {'$ref': '#/definitions/datatype'},
                                                                                 'name': {'description': 'The '
                                                                                                         'name '
                                                                                                         'of '
                                                                                                         'the '
                                                                                                         'field',
                                                                                          'pattern': '[A-Za-z_][A-Za-z0-9_]*',
                                                                                          'type': 'string'},
                                                                                 'shape': {'items': {'minimum': 0,
                                                                                                     'type': 'integer'},
                                                                                           'type': 'array'}},
                                                                  'required': ['datatype'],
                                                                  'type': 'object'}]},
                                             'type': 'array'}],
                                  'description': 'The data format of the '
                                                 'array elements.  May be '
                                                 'a single scalar\n'
                                                 'datatype, or may be a '
                                                 'nested list of '
                                                 'datatypes.  When a list, '
                                                 'each field\n'
                                                 'may have a name.\n'},
                     'inline-data': {'description': 'Inline data is stored '
                                                    'in YAML format '
                                                    'directly in the tree, '
                                                    'rather than\n'
                                                    'referencing a binary '
                                                    'block.  It is made '
                                                    'out of nested lists.\n'
                                                    '\n'
                                                    'If the datatype of '
                                                    'the array is not '
                                                    'specified, it is '
                                                    'inferred from\n'
                                                    'the array contents.  '
                                                    'Type inference is '
                                                    'supported only for\n'
                                                    'homogeneous arrays, '
                                                    'not tables.\n'
                                                    '\n'
                                                    '- If any of the '
                                                    'elements in the array '
                                                    'are YAML strings, '
                                                    'the\n'
                                                    '  `datatype` of the '
                                                    'entire array is '
                                                    '`ucs4`, with the '
                                                    'width of\n'
                                                    '  the largest string '
                                                    'in the column, '
                                                    'otherwise...\n'
                                                    '\n'
                                                    '- If any of the '
                                                    'elements in the array '
                                                    'are complex numbers, '
                                                    'the\n'
                                                    '  `datatype` of the '
                                                    'entire column is '
                                                    '`complex128`, '
                                                    'otherwise...\n'
                                                    '\n'
                                                    '- If any of the types '
                                                    'in the column are '
                                                    'numbers with a '
                                                    'decimal\n'
                                                    '  point, the '
                                                    '`datatype` of the '
                                                    'entire column is '
                                                    '`float64`,\n'
                                                    '  otherwise..\n'
                                                    '\n'
                                                    '- If any of the types '
                                                    'in the column are '
                                                    'integers, the '
                                                    '`datatype`\n'
                                                    '  of the entire '
                                                    'column is `int64`, '
                                                    'otherwise...\n'
                                                    '\n'
                                                    '- The `datatype` of '
                                                    'the entire column is '
                                                    '`bool8`.\n'
                                                    '\n'
                                                    'Masked values may be '
                                                    'included in the array '
                                                    'using `null`.  If an\n'
                                                    'explicit mask array '
                                                    'is also provided, it '
                                                    'takes precedence.\n',
                                     'items': {'anyOf': [{'type': 'number'},
                                                         {'type': 'string'},
                                                         {'type': 'null'},
                                                         {'$ref': 'complex-1.0.0'},
                                                         {'$ref': '#/definitions/inline-data'},
                                                         {'type': 'boolean'}]},
                                     'type': 'array'},
                     'scalar-datatype': {'anyOf': [{'enum': ['int8',
                                                             'uint8',
                                                             'int16',
                                                             'uint16',
                                                             'int32',
                                                             'uint32',
                                                             'int64',
                                                             'uint64',
                                                             'float16',
                                                             'float32',
                                                             'float64',
                                                             'complex64',
                                                             'complex128',
                                                             'bool8'],
                                                    'type': 'string'},
                                                   {'items': [{'enum': ['ascii',
                                                                        'ucs4'],
                                                               'type': 'string'},
                                                              {'minimum': 0,
                                                               'type': 'integer'}],
                                                    'maxLength': 2,
                                                    'minLength': 2,
                                                    'type': 'array'}],
                                         'description': 'Describes the '
                                                        'type of a single '
                                                        'element.\n'
                                                        '\n'
                                                        'There is a set of '
                                                        'numeric types, '
                                                        'each with a '
                                                        'single '
                                                        'identifier:\n'
                                                        '\n'
                                                        '- `int8`, '
                                                        '`int16`, `int32`, '
                                                        '`int64`: Signed '
                                                        'integer types, '
                                                        'with\n'
                                                        '  the given bit '
                                                        'size.\n'
                                                        '\n'
                                                        '- `uint8`, '
                                                        '`uint16`, '
                                                        '`uint32`, '
                                                        '`uint64`: '
                                                        'Unsigned integer '
                                                        'types,\n'
                                                        '  with the given '
                                                        'bit size.\n'
                                                        '\n'
                                                        '- `float16`: '
                                                        'Half-precision '
                                                        'floating-point '
                                                        'type or '
                                                        '"binary16",\n'
                                                        '  as defined in '
                                                        'IEEE 754.\n'
                                                        '\n'
                                                        '- `float32`: '
                                                        'Single-precision '
                                                        'floating-point '
                                                        'type or '
                                                        '"binary32",\n'
                                                        '  as defined in '
                                                        'IEEE 754.\n'
                                                        '\n'
                                                        '- `float64`: '
                                                        'Double-precision '
                                                        'floating-point '
                                                        'type or '
                                                        '"binary64",\n'
                                                        '  as defined in '
                                                        'IEEE 754.\n'
                                                        '\n'
                                                        '- `complex64`: '
                                                        'Complex number '
                                                        'where the real '
                                                        'and imaginary '
                                                        'parts\n'
                                                        '  are each '
                                                        'single-precision '
                                                        'floating-point '
                                                        '("binary32") '
                                                        'numbers,\n'
                                                        '  as defined in '
                                                        'IEEE 754.\n'
                                                        '\n'
                                                        '- `complex128`: '
                                                        'Complex number '
                                                        'where the real '
                                                        'and imaginary\n'
                                                        '  parts are each '
                                                        'double-precision '
                                                        'floating-point '
                                                        '("binary64")\n'
                                                        '  numbers, as '
                                                        'defined in IEEE '
                                                        '754.\n'
                                                        '\n'
                                                        'There are two '
                                                        'distinct '
                                                        'fixed-length '
                                                        'string types, '
                                                        'which must\n'
                                                        'be indicated with '
                                                        'a 2-element array '
                                                        'where the first '
                                                        'element is an\n'
                                                        'identifier for '
                                                        'the string type, '
                                                        'and the second is '
                                                        'a length:\n'
                                                        '\n'
                                                        '- `ascii`: A '
                                                        'string containing '
                                                        'ASCII text (all '
                                                        'codepoints <\n'
                                                        '  128), where '
                                                        'each character is '
                                                        '1 byte.\n'
                                                        '\n'
                                                        '- `ucs4`: A '
                                                        'string containing '
                                                        'unicode text in '
                                                        'the UCS-4\n'
                                                        '  encoding, where '
                                                        'each character is '
                                                        'always 4 bytes '
                                                        'long.  Here\n'
                                                        '  the number of '
                                                        'bytes used is 4 '
                                                        'times the given '
                                                        'length.\n'}},
     'description': 'There are two ways to store the data in an ndarray.\n'
                    '\n'
                    '- Inline in the tree: This is recommended only for '
                    'small arrays.  In\n'
                    '  this case, the entire ``ndarray`` tag may be a '
                    'nested list, in\n'
                    '  which case the type of the array is inferred from '
                    'the content.\n'
                    '  (See the rules for type inference in the '
                    '``inline-data``\n'
                    '  definition below.)  The inline data may also be '
                    'given in the\n'
                    '  ``data`` property, in which case it is possible to '
                    'explicitly\n'
                    '  specify the ``datatype`` and other properties.\n'
                    '\n'
                    '- External to the tree: The data comes from a '
                    '[block](ref:block)\n'
                    '  within the same ASDF file or an external ASDF file '
                    'referenced by a\n'
                    '  URI.\n',
     'examples': [['An inline array, with implicit data type',
                   'asdf-standard-1.6.0',
                   '!core/ndarray-1.1.0\n'
                   '  [[1, 0, 0],\n'
                   '   [0, 1, 0],\n'
                   '   [0, 0, 1]]\n'],
                  ['An inline array, with an explicit data type',
                   'asdf-standard-1.6.0',
                   '!core/ndarray-1.1.0\n'
                   '  datatype: float64\n'
                   '  data:\n'
                   '    [[1, 0, 0],\n'
                   '     [0, 1, 0],\n'
                   '     [0, 0, 1]]\n'],
                  ['An inline structured array, where the types of each '
                   'column are automatically detected',
                   'asdf-standard-1.6.0',
                   '!core/ndarray-1.1.0\n'
                   '  [[M110, 110, 205, And],\n'
                   '   [ M31,  31, 224, And],\n'
                   '   [ M32,  32, 221, And],\n'
                   '   [M103, 103, 581, Cas]]\n'],
                  ['An inline structured array, where the types of each '
                   'column are explicitly specified',
                   'asdf-standard-1.6.0',
                   '!core/ndarray-1.1.0\n'
                   "  datatype: [['ascii', 4], uint16, uint16, ['ascii', "
                   '4]]\n'
                   '  data:\n'
                   '    [[M110, 110, 205, And],\n'
                   '     [ M31,  31, 224, And],\n'
                   '     [ M32,  32, 221, And],\n'
                   '     [M103, 103, 581, Cas]]\n'],
                  ['A double-precision array, in contiguous memory in a '
                   'block within the same file',
                   'asdf-standard-1.6.0',
                   '!core/ndarray-1.1.0\n'
                   '  source: 0\n'
                   '  shape: [1024, 1024]\n'
                   '  datatype: float64\n'
                   '  byteorder: little\n'],
                  ['A view of a tile in that image',
                   'asdf-standard-1.6.0',
                   '!core/ndarray-1.1.0\n'
                   '  source: 0\n'
                   '  shape: [256, 256]\n'
                   '  datatype: float64\n'
                   '  byteorder: little\n'
                   '  strides: [8192, 8]\n'
                   '  offset: 2099200\n'],
                  ['A structured datatype, with nested columns for a '
                   'coordinate in (*ra*, *dec*), and a 3x3 convolution '
                   'kernel',
                   'asdf-standard-1.6.0',
                   '!core/ndarray-1.1.0\n'
                   '  source: 0\n'
                   '  shape: [64]\n'
                   '  datatype:\n'
                   '    - name: coordinate\n'
                   '      datatype:\n'
                   '        - name: ra\n'
                   '          datatype: float64\n'
                   '        - name: dec\n'
                   '          datatype: float64\n'
                   '    - name: kernel\n'
                   '      datatype: float32\n'
                   '      shape: [3, 3]\n'
                   '  byteorder: little\n'],
                  ['An array in Fortran order',
                   'asdf-standard-1.6.0',
                   '!core/ndarray-1.1.0\n'
                   '  source: 0\n'
                   '  shape: [1024, 1024]\n'
                   '  datatype: float64\n'
                   '  byteorder: little\n'
                   '  strides: [8192, 8]\n'],
                  ['An array where values of -999 are treated as missing',
                   'asdf-standard-1.6.0',
                   '!core/ndarray-1.1.0\n'
                   '  source: 0\n'
                   '  shape: [256, 256]\n'
                   '  datatype: float64\n'
                   '  byteorder: little\n'
                   '  mask: -999\n'],
                  ['An array where another array is used as a mask',
                   'asdf-standard-1.6.0',
                   '!core/ndarray-1.1.0\n'
                   '  source: 0\n'
                   '  shape: [256, 256]\n'
                   '  datatype: float64\n'
                   '  byteorder: little\n'
                   '  mask: !core/ndarray-1.1.0\n'
                   '    source: 1\n'
                   '    shape: [256, 256]\n'
                   '    datatype: bool8\n'
                   '    byteorder: little\n'],
                  ['An array where the data is stored in the first block '
                   'in another ASDF file.',
                   'asdf-standard-1.6.0',
                   '!core/ndarray-1.1.0\n'
                   '  source: external.asdf\n'
                   '  shape: [256, 256]\n'
                   '  datatype: float64\n'
                   '  byteorder: little\n']],
     'id': 'http://stsci.edu/schemas/asdf/core/ndarray-1.1.0',
     'title': 'An *n*-dimensional array.\n'}

On instance:
    {'byteorder': 'big',
     'datatype': [{'byteorder': 'little', 'datatype': 'int64', 'name': 'a'},
                  {'byteorder': 'little',
                   'datatype': 'int64',
                   'name': 'b'}],
     'mask': {'byteorder': 'big',
              'datatype': [{'byteorder': 'big',
                            'datatype': 'bool8',
                            'name': 'a'},
                           {'byteorder': 'big',
                            'datatype': 'bool8',
                            'name': 'b'}],
              'shape': [10],
              'source': 1},
     'shape': [10],
     'source': 0}

@SolarDrew

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions