|
| 1 | +# Copyright Iris contributors |
| 2 | +# |
| 3 | +# This file is part of Iris and is released under the BSD license. |
| 4 | +# See LICENSE in the root of the repository for full licensing details. |
| 5 | +"""NetCDF attribute translations for Iris attributes with special convenience types. |
| 6 | +
|
| 7 | +These are things which are stored differently in an Iris cube attribute from how they |
| 8 | +are actually stored in a netcdf file. E.G. a STASH code is stored as a special object, |
| 9 | +but in a file it is just a string. |
| 10 | +
|
| 11 | +These conversions are intended to be automatic and lossless, like a serialization. |
| 12 | +
|
| 13 | +At present, there are 3 of these : |
| 14 | + * "STASH": records/controls the exact file encoding of data loaded from or saved to |
| 15 | + UM file formats (PP/FF). |
| 16 | + * "GRIB_PARAM": does the same for GRIB data (using iris_grib). |
| 17 | + * "ukmo__process_flags": internally a tuple of strings, but stored as a single string |
| 18 | + with underscore separators. |
| 19 | +
|
| 20 | +""" |
| 21 | + |
| 22 | +from abc import ABCMeta, abstractmethod |
| 23 | +from typing import Any, Dict, List, Tuple |
| 24 | + |
| 25 | + |
| 26 | +class AttributeCodingObject(metaclass=ABCMeta): |
| 27 | + #: The user-visible attribute name used within Iris, which identifies attributes |
| 28 | + # which we should attempt to encode with this coder. |
| 29 | + IrisIdentifyingName: str = "" |
| 30 | + #: The storage name(s) which identify this type of data in actual files, which thus |
| 31 | + # identify attributes which we should attempt to decode with this coder. |
| 32 | + # NOTES: |
| 33 | + # (1) for save the attribute name is dynamically determined by the "encode" call. |
| 34 | + # (2) for load, in (presumably extremely rare) case of multiples appearing, "the" |
| 35 | + # internal attribute is taken from the earliest appearing name: The other values |
| 36 | + # are lost, and a warning will be issued. |
| 37 | + NetcdfIdentifyingNames: List[str] = [] |
| 38 | + |
| 39 | + @staticmethod |
| 40 | + @abstractmethod |
| 41 | + def encode_object(content) -> Tuple[str, str]: |
| 42 | + """Encode an object as an attribute name and value. |
| 43 | +
|
| 44 | + We already do change the name of STASH attributes to "um_stash_source" on save |
| 45 | + (as-of Iris 3.12). This structure also allows that we might produce different |
| 46 | + names for different codes. |
| 47 | + """ |
| 48 | + pass |
| 49 | + |
| 50 | + @staticmethod |
| 51 | + @abstractmethod |
| 52 | + def decode_attribute(attr_name: str, attr_value: str) -> Any: |
| 53 | + """Decode an attribute name and string to an attribute object.""" |
| 54 | + pass |
| 55 | + |
| 56 | + |
| 57 | +class StashCoder(AttributeCodingObject): |
| 58 | + """Convert STASH object attribute to/from a netcdf string attribute.""" |
| 59 | + |
| 60 | + IrisIdentifyingName = "STASH" |
| 61 | + # Note: two possible in-file attribute names, second one is a 'legacy' version. |
| 62 | + NetcdfIdentifyingNames = ["um_stash_source", "ukmo__um_stash_source"] |
| 63 | + |
| 64 | + @staticmethod |
| 65 | + def encode_object(stash): |
| 66 | + return StashCoder.NetcdfIdentifyingNames[0], str(stash) |
| 67 | + |
| 68 | + @staticmethod |
| 69 | + def decode_attribute(attr_name: str, attr_value: str): |
| 70 | + # In this case the attribute name does not matter. |
| 71 | + from iris.fileformats.pp import STASH |
| 72 | + |
| 73 | + return STASH.from_msi(attr_value) |
| 74 | + |
| 75 | + |
| 76 | +class UkmoProcessCoder(AttributeCodingObject): |
| 77 | + """Convert ukmo__process_flags tuple attribute to/from a netcdf string attribute.""" |
| 78 | + |
| 79 | + IrisIdentifyingName = "ukmo__process_flags" |
| 80 | + NetcdfIdentifyingNames = ["ukmo__process_flags"] |
| 81 | + |
| 82 | + @staticmethod |
| 83 | + def encode_object(value): |
| 84 | + def value_fix(value): |
| 85 | + value = value.replace(" ", "_") |
| 86 | + if value == "": |
| 87 | + # Special handling for an empty string entry, which otherwise upsets |
| 88 | + # the split/join process. |
| 89 | + value = "<EMPTY>" |
| 90 | + return value |
| 91 | + |
| 92 | + value = " ".join([value_fix(x) for x in value]) |
| 93 | + return UkmoProcessCoder.NetcdfIdentifyingNames[0], value |
| 94 | + |
| 95 | + @staticmethod |
| 96 | + def decode_attribute(attr_name: str, attr_value: str): |
| 97 | + # In this case the attribute name does not matter. |
| 98 | + def value_unfix(value): |
| 99 | + value = value.replace("_", " ") |
| 100 | + if value == "<EMPTY>": |
| 101 | + # A placeholder flagging where the original was an empty string. |
| 102 | + value = "" |
| 103 | + return value |
| 104 | + |
| 105 | + if attr_value == "": |
| 106 | + flags = [] |
| 107 | + else: |
| 108 | + flags = [value_unfix(x) for x in attr_value.split(" ")] |
| 109 | + |
| 110 | + return tuple(flags) |
| 111 | + |
| 112 | + |
| 113 | +class GribParamCoder(AttributeCodingObject): |
| 114 | + """Convert iris_grib GRIB_PARAM object attribute to/from a netcdf string attribute. |
| 115 | +
|
| 116 | + Use the mechanisms in iris_grib. |
| 117 | + """ |
| 118 | + |
| 119 | + IrisIdentifyingName = "GRIB_PARAM" |
| 120 | + NetcdfIdentifyingNames = ["GRIB_PARAM"] |
| 121 | + |
| 122 | + @staticmethod |
| 123 | + def encode_object(grib_param): |
| 124 | + # grib_param should be an |
| 125 | + # iris_grib.grib_phenom_translation._gribcode.GenericConcreteGRIBCode |
| 126 | + # Not typing this, as we need iris_grib to remain an optional import. |
| 127 | + return GribParamCoder.NetcdfIdentifyingNames[0], repr(grib_param) |
| 128 | + |
| 129 | + @staticmethod |
| 130 | + def decode_attribute(attr_name: str, attr_value: str): |
| 131 | + from iris_grib.grib_phenom_translation._gribcode import GRIBCode |
| 132 | + |
| 133 | + result = None |
| 134 | + # Use the helper function to construct a suitable GenericConcreteGRIBCode object. |
| 135 | + try: |
| 136 | + result = GRIBCode(attr_value) |
| 137 | + except (TypeError, ValueError): |
| 138 | + pass |
| 139 | + return result |
| 140 | + |
| 141 | + |
| 142 | +# Define the available attribute handlers. |
| 143 | +ATTRIBUTE_HANDLERS: Dict[str, AttributeCodingObject] = {} |
| 144 | + |
| 145 | + |
| 146 | +def _add_handler(handler: AttributeCodingObject): |
| 147 | + ATTRIBUTE_HANDLERS[handler.IrisIdentifyingName] = handler |
| 148 | + |
| 149 | + |
| 150 | +# Always include the "STASH" and "ukmo__process_flags" handlers. |
| 151 | +_add_handler(StashCoder()) |
| 152 | +_add_handler(UkmoProcessCoder()) |
| 153 | + |
| 154 | +try: |
| 155 | + import iris_grib # noqa: F401 |
| 156 | + |
| 157 | + # If iris-grib is available, also include the "GRIB_PARAM" handler. |
| 158 | + _add_handler(GribParamCoder()) |
| 159 | + |
| 160 | +except ImportError: |
| 161 | + pass |
| 162 | + |
| 163 | + |
| 164 | +# |
| 165 | +# Mechanism tests |
| 166 | +# |
| 167 | +def _decode_gribcode(grib_code: str): |
| 168 | + return GribParamCoder.decode_attribute("x", grib_code) |
| 169 | + # from iris_grib.grib_phenom_translation._gribcode import GRIBCode |
| 170 | + # |
| 171 | + # result = None |
| 172 | + # # Use the helper function to construct a suitable GenericConcreteGRIBCode object. |
| 173 | + # try: |
| 174 | + # result = GRIBCode(grib_code) |
| 175 | + # except (TypeError, ValueError): |
| 176 | + # pass |
| 177 | + # |
| 178 | + # return result |
| 179 | + |
| 180 | + |
| 181 | +def make_gribcode(*args, **kwargs): |
| 182 | + from iris_grib.grib_phenom_translation._gribcode import GRIBCode |
| 183 | + |
| 184 | + return GRIBCode(*args, **kwargs) |
| 185 | + |
| 186 | + |
| 187 | +class TestGribDecode: |
| 188 | + def test_grib_1(self): |
| 189 | + assert _decode_gribcode( |
| 190 | + "GRIBCode(edition=1, table_version=2, centre_number=3, number=4)" |
| 191 | + ) == make_gribcode(1, 2, 3, 4) |
| 192 | + |
| 193 | + def test_grib_2(self): |
| 194 | + assert _decode_gribcode("GRIBCode(2,5,7,13)") == make_gribcode(2, 5, 7, 13) |
| 195 | + |
| 196 | + def test_grib_3(self): |
| 197 | + assert _decode_gribcode( |
| 198 | + "GRIBCode(2,5, number=13, centre_number=7)" |
| 199 | + ) == make_gribcode(2, 5, 7, 13) |
| 200 | + |
| 201 | + def test_grib_4(self): |
| 202 | + assert _decode_gribcode("GRIBxXCode(2,5,7,13)") == make_gribcode(2, 5, 7, 13) |
| 203 | + |
| 204 | + def test_grib_5(self): |
| 205 | + assert _decode_gribcode("GRIBCode()") is None |
| 206 | + |
| 207 | + def test_grib_6(self): |
| 208 | + assert _decode_gribcode("GRIBCode(xxx)") is None |
| 209 | + |
| 210 | + def test_grib_7(self): |
| 211 | + assert _decode_gribcode( |
| 212 | + "GRIBCode(xxx-any-junk..1, 2,qytw3dsa, 4)" |
| 213 | + ) == make_gribcode(1, 2, 3, 4) |
| 214 | + |
| 215 | + |
| 216 | +def _sample_decode_rawlbproc(lbproc): |
| 217 | + from iris.fileformats._pp_lbproc_pairs import LBPROC_MAP |
| 218 | + |
| 219 | + return tuple( |
| 220 | + sorted( |
| 221 | + [ |
| 222 | + name |
| 223 | + for value, name in LBPROC_MAP.items() |
| 224 | + if isinstance(value, int) and lbproc & value |
| 225 | + ] |
| 226 | + ) |
| 227 | + ) |
| 228 | + |
| 229 | + |
| 230 | +def _check_pf_roundtrip(contents): |
| 231 | + print(f"original: {contents!r}") |
| 232 | + name, val = UkmoProcessCoder.encode_object(contents) |
| 233 | + reconstruct = UkmoProcessCoder.decode_attribute(name, val) |
| 234 | + print(f" -> encoded: {val!r}") |
| 235 | + print(f" -> reconstructed: {reconstruct!r}") |
| 236 | + assert name == "ukmo__process_flags" |
| 237 | + n_val = 0 if val == "" else len(val.split(" ")) # because split is odd |
| 238 | + assert n_val == len(contents) |
| 239 | + assert reconstruct == contents |
| 240 | + |
| 241 | + |
| 242 | +class TestProcessFlagsRoundtrip: |
| 243 | + def test_pf_1(self): |
| 244 | + sample = ("A example", "b", "another-thing with spaces") |
| 245 | + _check_pf_roundtrip(sample) |
| 246 | + |
| 247 | + def test_pf_2(self): |
| 248 | + sample = ("single",) |
| 249 | + _check_pf_roundtrip(sample) |
| 250 | + |
| 251 | + def test_pf_3(self): |
| 252 | + sample = ("nonempty", "", "nonempty2") |
| 253 | + _check_pf_roundtrip(sample) |
| 254 | + |
| 255 | + def test_pf_4(self): |
| 256 | + sample = () |
| 257 | + _check_pf_roundtrip(sample) |
| 258 | + |
| 259 | + def test_pf_5(self): |
| 260 | + sample = ("a", "") |
| 261 | + _check_pf_roundtrip(sample) |
| 262 | + |
| 263 | + def test_pf_6(self): |
| 264 | + sample = ("", "b") |
| 265 | + _check_pf_roundtrip(sample) |
| 266 | + |
| 267 | + def test_pf_7(self): |
| 268 | + sample = ("",) |
| 269 | + _check_pf_roundtrip(sample) |
| 270 | + |
| 271 | + def test_pf_8(self): |
| 272 | + sample = (" ",) |
| 273 | + _check_pf_roundtrip(sample) |
| 274 | + |
| 275 | + def test_pf_9(self): |
| 276 | + sample = ("", "") |
| 277 | + _check_pf_roundtrip(sample) |
| 278 | + |
| 279 | + def test_pf_10(self): |
| 280 | + sample = (" a", "b") |
| 281 | + _check_pf_roundtrip(sample) |
| 282 | + |
| 283 | + def test_pf_11(self): |
| 284 | + sample = ("a ", "b") |
| 285 | + _check_pf_roundtrip(sample) |
| 286 | + |
| 287 | + def test_pf_12(self): |
| 288 | + sample = ("a", " b") |
| 289 | + _check_pf_roundtrip(sample) |
| 290 | + |
| 291 | + def test_pf_13(self): |
| 292 | + sample = ("a", "b ") |
| 293 | + _check_pf_roundtrip(sample) |
| 294 | + |
| 295 | + |
| 296 | +# |
| 297 | +# NOTE: also need to test both encode + decode separately, as there are corner cases. |
| 298 | +# LIKE: leading+trailing, empty entries ... |
| 299 | +# |
0 commit comments