Skip to content

Commit 71c7bc4

Browse files
authored
Merge f95909f into 13e3d9f
2 parents 13e3d9f + f95909f commit 71c7bc4

File tree

2 files changed

+316
-7
lines changed

2 files changed

+316
-7
lines changed
Lines changed: 299 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,299 @@
1+
# Copyright Iris contributors
2+
#
3+
# This file is part of Iris and is released under the BSD license.
4+
# See LICENSE in the root of the repository for full licensing details.
5+
"""NetCDF attribute translations for Iris attributes with special convenience types.
6+
7+
These are things which are stored differently in an Iris cube attribute from how they
8+
are actually stored in a netcdf file. E.G. a STASH code is stored as a special object,
9+
but in a file it is just a string.
10+
11+
These conversions are intended to be automatic and lossless, like a serialization.
12+
13+
At present, there are 3 of these :
14+
* "STASH": records/controls the exact file encoding of data loaded from or saved to
15+
UM file formats (PP/FF).
16+
* "GRIB_PARAM": does the same for GRIB data (using iris_grib).
17+
* "ukmo__process_flags": internally a tuple of strings, but stored as a single string
18+
with underscore separators.
19+
20+
"""
21+
22+
from abc import ABCMeta, abstractmethod
23+
from typing import Any, Dict, List, Tuple
24+
25+
26+
class AttributeCodingObject(metaclass=ABCMeta):
27+
#: The user-visible attribute name used within Iris, which identifies attributes
28+
# which we should attempt to encode with this coder.
29+
IrisIdentifyingName: str = ""
30+
#: The storage name(s) which identify this type of data in actual files, which thus
31+
# identify attributes which we should attempt to decode with this coder.
32+
# NOTES:
33+
# (1) for save the attribute name is dynamically determined by the "encode" call.
34+
# (2) for load, in (presumably extremely rare) case of multiples appearing, "the"
35+
# internal attribute is taken from the earliest appearing name: The other values
36+
# are lost, and a warning will be issued.
37+
NetcdfIdentifyingNames: List[str] = []
38+
39+
@staticmethod
40+
@abstractmethod
41+
def encode_object(content) -> Tuple[str, str]:
42+
"""Encode an object as an attribute name and value.
43+
44+
We already do change the name of STASH attributes to "um_stash_source" on save
45+
(as-of Iris 3.12). This structure also allows that we might produce different
46+
names for different codes.
47+
"""
48+
pass
49+
50+
@staticmethod
51+
@abstractmethod
52+
def decode_attribute(attr_name: str, attr_value: str) -> Any:
53+
"""Decode an attribute name and string to an attribute object."""
54+
pass
55+
56+
57+
class StashCoder(AttributeCodingObject):
58+
"""Convert STASH object attribute to/from a netcdf string attribute."""
59+
60+
IrisIdentifyingName = "STASH"
61+
# Note: two possible in-file attribute names, second one is a 'legacy' version.
62+
NetcdfIdentifyingNames = ["um_stash_source", "ukmo__um_stash_source"]
63+
64+
@staticmethod
65+
def encode_object(stash):
66+
return StashCoder.NetcdfIdentifyingNames[0], str(stash)
67+
68+
@staticmethod
69+
def decode_attribute(attr_name: str, attr_value: str):
70+
# In this case the attribute name does not matter.
71+
from iris.fileformats.pp import STASH
72+
73+
return STASH.from_msi(attr_value)
74+
75+
76+
class UkmoProcessCoder(AttributeCodingObject):
77+
"""Convert ukmo__process_flags tuple attribute to/from a netcdf string attribute."""
78+
79+
IrisIdentifyingName = "ukmo__process_flags"
80+
NetcdfIdentifyingNames = ["ukmo__process_flags"]
81+
82+
@staticmethod
83+
def encode_object(value):
84+
def value_fix(value):
85+
value = value.replace(" ", "_")
86+
if value == "":
87+
# Special handling for an empty string entry, which otherwise upsets
88+
# the split/join process.
89+
value = "<EMPTY>"
90+
return value
91+
92+
value = " ".join([value_fix(x) for x in value])
93+
return UkmoProcessCoder.NetcdfIdentifyingNames[0], value
94+
95+
@staticmethod
96+
def decode_attribute(attr_name: str, attr_value: str):
97+
# In this case the attribute name does not matter.
98+
def value_unfix(value):
99+
value = value.replace("_", " ")
100+
if value == "<EMPTY>":
101+
# A placeholder flagging where the original was an empty string.
102+
value = ""
103+
return value
104+
105+
if attr_value == "":
106+
flags = []
107+
else:
108+
flags = [value_unfix(x) for x in attr_value.split(" ")]
109+
110+
return tuple(flags)
111+
112+
113+
class GribParamCoder(AttributeCodingObject):
114+
"""Convert iris_grib GRIB_PARAM object attribute to/from a netcdf string attribute.
115+
116+
Use the mechanisms in iris_grib.
117+
"""
118+
119+
IrisIdentifyingName = "GRIB_PARAM"
120+
NetcdfIdentifyingNames = ["GRIB_PARAM"]
121+
122+
@staticmethod
123+
def encode_object(grib_param):
124+
# grib_param should be an
125+
# iris_grib.grib_phenom_translation._gribcode.GenericConcreteGRIBCode
126+
# Not typing this, as we need iris_grib to remain an optional import.
127+
return GribParamCoder.NetcdfIdentifyingNames[0], repr(grib_param)
128+
129+
@staticmethod
130+
def decode_attribute(attr_name: str, attr_value: str):
131+
from iris_grib.grib_phenom_translation._gribcode import GRIBCode
132+
133+
result = None
134+
# Use the helper function to construct a suitable GenericConcreteGRIBCode object.
135+
try:
136+
result = GRIBCode(attr_value)
137+
except (TypeError, ValueError):
138+
pass
139+
return result
140+
141+
142+
# Define the available attribute handlers.
143+
ATTRIBUTE_HANDLERS: Dict[str, AttributeCodingObject] = {}
144+
145+
146+
def _add_handler(handler: AttributeCodingObject):
147+
ATTRIBUTE_HANDLERS[handler.IrisIdentifyingName] = handler
148+
149+
150+
# Always include the "STASH" and "ukmo__process_flags" handlers.
151+
_add_handler(StashCoder())
152+
_add_handler(UkmoProcessCoder())
153+
154+
try:
155+
import iris_grib # noqa: F401
156+
157+
# If iris-grib is available, also include the "GRIB_PARAM" handler.
158+
_add_handler(GribParamCoder())
159+
160+
except ImportError:
161+
pass
162+
163+
164+
#
165+
# Mechanism tests
166+
#
167+
def _decode_gribcode(grib_code: str):
168+
return GribParamCoder.decode_attribute("x", grib_code)
169+
# from iris_grib.grib_phenom_translation._gribcode import GRIBCode
170+
#
171+
# result = None
172+
# # Use the helper function to construct a suitable GenericConcreteGRIBCode object.
173+
# try:
174+
# result = GRIBCode(grib_code)
175+
# except (TypeError, ValueError):
176+
# pass
177+
#
178+
# return result
179+
180+
181+
def make_gribcode(*args, **kwargs):
182+
from iris_grib.grib_phenom_translation._gribcode import GRIBCode
183+
184+
return GRIBCode(*args, **kwargs)
185+
186+
187+
class TestGribDecode:
188+
def test_grib_1(self):
189+
assert _decode_gribcode(
190+
"GRIBCode(edition=1, table_version=2, centre_number=3, number=4)"
191+
) == make_gribcode(1, 2, 3, 4)
192+
193+
def test_grib_2(self):
194+
assert _decode_gribcode("GRIBCode(2,5,7,13)") == make_gribcode(2, 5, 7, 13)
195+
196+
def test_grib_3(self):
197+
assert _decode_gribcode(
198+
"GRIBCode(2,5, number=13, centre_number=7)"
199+
) == make_gribcode(2, 5, 7, 13)
200+
201+
def test_grib_4(self):
202+
assert _decode_gribcode("GRIBxXCode(2,5,7,13)") == make_gribcode(2, 5, 7, 13)
203+
204+
def test_grib_5(self):
205+
assert _decode_gribcode("GRIBCode()") is None
206+
207+
def test_grib_6(self):
208+
assert _decode_gribcode("GRIBCode(xxx)") is None
209+
210+
def test_grib_7(self):
211+
assert _decode_gribcode(
212+
"GRIBCode(xxx-any-junk..1, 2,qytw3dsa, 4)"
213+
) == make_gribcode(1, 2, 3, 4)
214+
215+
216+
def _sample_decode_rawlbproc(lbproc):
217+
from iris.fileformats._pp_lbproc_pairs import LBPROC_MAP
218+
219+
return tuple(
220+
sorted(
221+
[
222+
name
223+
for value, name in LBPROC_MAP.items()
224+
if isinstance(value, int) and lbproc & value
225+
]
226+
)
227+
)
228+
229+
230+
def _check_pf_roundtrip(contents):
231+
print(f"original: {contents!r}")
232+
name, val = UkmoProcessCoder.encode_object(contents)
233+
reconstruct = UkmoProcessCoder.decode_attribute(name, val)
234+
print(f" -> encoded: {val!r}")
235+
print(f" -> reconstructed: {reconstruct!r}")
236+
assert name == "ukmo__process_flags"
237+
n_val = 0 if val == "" else len(val.split(" ")) # because split is odd
238+
assert n_val == len(contents)
239+
assert reconstruct == contents
240+
241+
242+
class TestProcessFlagsRoundtrip:
243+
def test_pf_1(self):
244+
sample = ("A example", "b", "another-thing with spaces")
245+
_check_pf_roundtrip(sample)
246+
247+
def test_pf_2(self):
248+
sample = ("single",)
249+
_check_pf_roundtrip(sample)
250+
251+
def test_pf_3(self):
252+
sample = ("nonempty", "", "nonempty2")
253+
_check_pf_roundtrip(sample)
254+
255+
def test_pf_4(self):
256+
sample = ()
257+
_check_pf_roundtrip(sample)
258+
259+
def test_pf_5(self):
260+
sample = ("a", "")
261+
_check_pf_roundtrip(sample)
262+
263+
def test_pf_6(self):
264+
sample = ("", "b")
265+
_check_pf_roundtrip(sample)
266+
267+
def test_pf_7(self):
268+
sample = ("",)
269+
_check_pf_roundtrip(sample)
270+
271+
def test_pf_8(self):
272+
sample = (" ",)
273+
_check_pf_roundtrip(sample)
274+
275+
def test_pf_9(self):
276+
sample = ("", "")
277+
_check_pf_roundtrip(sample)
278+
279+
def test_pf_10(self):
280+
sample = (" a", "b")
281+
_check_pf_roundtrip(sample)
282+
283+
def test_pf_11(self):
284+
sample = ("a ", "b")
285+
_check_pf_roundtrip(sample)
286+
287+
def test_pf_12(self):
288+
sample = ("a", " b")
289+
_check_pf_roundtrip(sample)
290+
291+
def test_pf_13(self):
292+
sample = ("a", "b ")
293+
_check_pf_roundtrip(sample)
294+
295+
296+
#
297+
# NOTE: also need to test both encode + decode separately, as there are corner cases.
298+
# LIKE: leading+trailing, empty entries ...
299+
#

lib/iris/fileformats/netcdf/saver.py

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
import iris.exceptions
5050
import iris.fileformats.cf
5151
from iris.fileformats.netcdf import _dask_locks, _thread_safe_nc
52+
from iris.fileformats.netcdf._attribute_handlers import ATTRIBUTE_HANDLERS
5253
import iris.io
5354
import iris.util
5455
import iris.warnings
@@ -2317,14 +2318,23 @@ def set_packing_ncattrs(cfvar):
23172318

23182319
value = cube.attributes[attr_name]
23192320

2320-
if attr_name == "STASH":
2321-
# Adopting provisional Metadata Conventions for representing MO
2322-
# Scientific Data encoded in NetCDF Format.
2323-
attr_name = "um_stash_source"
2324-
value = str(value)
2321+
# Process any "managed" attributes which convert between an internal
2322+
# convenience representation and what is actually stored in files.
2323+
handler = ATTRIBUTE_HANDLERS.get(attr_name)
2324+
if handler is not None:
2325+
try:
2326+
attr_name, value = handler.encode_object(value)
2327+
except (TypeError, ValueError):
2328+
pass
2329+
2330+
# if attr_name == "STASH":
2331+
# # Adopting provisional Metadata Conventions for representing MO
2332+
# # Scientific Data encoded in NetCDF Format.
2333+
# attr_name = "um_stash_source"
2334+
# value = str(value)
23252335

2326-
if attr_name == "ukmo__process_flags":
2327-
value = " ".join([x.replace(" ", "_") for x in value])
2336+
# if attr_name == "ukmo__process_flags":
2337+
# value = " ".join([x.replace(" ", "_") for x in value])
23282338

23292339
if attr_name in _CF_GLOBAL_ATTRS:
23302340
msg = (

0 commit comments

Comments
 (0)