Skip to content

Make true/false expression JSON serializable #2521

@Fokko

Description

@Fokko

Feature Request / Improvement

Make sure that the true/false expression can be serialized to JSON:

class AlwaysTrue(BooleanExpression, Singleton):
"""TRUE expression."""
def __invert__(self) -> AlwaysFalse:
"""Transform the Expression into its negated version."""
return AlwaysFalse()
def __str__(self) -> str:
"""Return the string representation of the AlwaysTrue class."""
return "AlwaysTrue()"
def __repr__(self) -> str:
"""Return the string representation of the AlwaysTrue class."""
return "AlwaysTrue()"
class AlwaysFalse(BooleanExpression, Singleton):
"""FALSE expression."""
def __invert__(self) -> AlwaysTrue:
"""Transform the Expression into its negated version."""
return AlwaysTrue()
def __str__(self) -> str:
"""Return the string representation of the AlwaysFalse class."""
return "AlwaysFalse()"
def __repr__(self) -> str:
"""Return the string representation of the AlwaysFalse class."""
return "AlwaysFalse()"

This is a special case, since it will not translate into an object, but to a string instead:

"true"
"false"

This so-called root-model (since it doesn't convert to an object), needs to set the root:

class DayTransform(TimeTransform[S]):
"""Transforms a datetime value into a day value.
Example:
>>> transform = DayTransform()
>>> transform.transform(DateType())(17501)
17501
"""
root: LiteralType["day"] = Field(default="day") # noqa: F821

We use Pydantic for JSON serialization, which can be enabled by deriving from the IcebergBaseModel:

class PartitionSpec(IcebergBaseModel):

Example tests can be found here:

def test_serialize_partition_spec() -> None:
partitioned = PartitionSpec(
PartitionField(source_id=1, field_id=1000, transform=TruncateTransform(width=19), name="str_truncate"),
PartitionField(source_id=2, field_id=1001, transform=BucketTransform(num_buckets=25), name="int_bucket"),
spec_id=3,
)
assert (
partitioned.model_dump_json()
== """{"spec-id":3,"fields":[{"source-id":1,"field-id":1000,"transform":"truncate[19]","name":"str_truncate"},{"source-id":2,"field-id":1001,"transform":"bucket[25]","name":"int_bucket"}]}"""
)
def test_deserialize_unpartition_spec() -> None:
json_partition_spec = """{"spec-id":0,"fields":[]}"""
spec = PartitionSpec.model_validate_json(json_partition_spec)
assert spec == PartitionSpec(spec_id=0)

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions