apache · rdblue · Aug 16, 2023 · Jul 3, 2023 · Jul 5, 2023 · Jul 5, 2023
diff --git a/python/poetry.lock b/python/poetry.lock
diff --git a/python/pyiceberg/catalog/__init__.py b/python/pyiceberg/catalog/__init__.py
@@ -537,7 +537,7 @@ def purge_table(self, identifier: Union[str, Identifier]) -> None:
         io = load_file_io(self.properties, table.metadata_location)
         metadata = table.metadata
         manifest_lists_to_delete = set()
-        manifests_to_delete = []
+        manifests_to_delete: List[ManifestFile] = []
         for snapshot in metadata.snapshots:
             manifests_to_delete += snapshot.manifests(io)
             if snapshot.manifest_list is not None:

diff --git a/python/pyiceberg/catalog/rest.py b/python/pyiceberg/catalog/rest.py
@@ -177,8 +177,8 @@ class OAuthErrorResponse(IcebergBaseModel):
     error: Literal[
         "invalid_request", "invalid_client", "invalid_grant", "unauthorized_client", "unsupported_grant_type", "invalid_scope"
     ]
-    error_description: Optional[str]
-    error_uri: Optional[str]
+    error_description: Optional[str] = None
+    error_uri: Optional[str] = None
 
 
 class RestCatalog(Catalog):
@@ -430,7 +430,7 @@ def create_table(
             write_order=sort_order,
             properties=properties,
         )
-        serialized_json = request.json()
+        serialized_json = request.model_dump_json().encode("utf-8")
         response = self._session.post(
             self.url(Endpoints.create_table, namespace=namespace_and_table["namespace"]),
             data=serialized_json,
@@ -507,7 +507,7 @@ def _commit_table(self, table_request: CommitTableRequest) -> CommitTableRespons
         """
         response = self._session.post(
             self.url(Endpoints.update_table, prefixed=True, **self._split_identifier_for_path(table_request.identifier)),
-            data=table_request.json(),
+            data=table_request.model_dump_json().encode("utf-8"),
         )
         try:
             response.raise_for_status()

diff --git a/python/pyiceberg/cli/output.py b/python/pyiceberg/cli/output.py
@@ -200,7 +200,11 @@ class FauxTable(IcebergBaseModel):
             metadata_location: str
             metadata: TableMetadata
 
-        print(FauxTable(identifier=table.identifier, metadata=table.metadata, metadata_location=table.metadata_location).json())
+        print(
+            FauxTable(
+                identifier=table.identifier, metadata=table.metadata, metadata_location=table.metadata_location
+            ).model_dump_json()
+        )
 
     def describe_properties(self, properties: Properties) -> None:
         self._out(properties)
@@ -209,13 +213,13 @@ def text(self, response: str) -> None:
         print(json.dumps(response))
 
     def schema(self, schema: Schema) -> None:
-        print(schema.json())
+        print(schema.model_dump_json())
 
     def files(self, table: Table, history: bool) -> None:
         pass
 
     def spec(self, spec: PartitionSpec) -> None:
-        print(spec.json())
+        print(spec.model_dump_json())
 
     def uuid(self, uuid: Optional[UUID]) -> None:
         self._out({"uuid": str(uuid) if uuid else "missing"})

diff --git a/python/pyiceberg/io/pyarrow.py b/python/pyiceberg/io/pyarrow.py
@@ -781,7 +781,7 @@ def _task_to_table(
             schema_raw = metadata.get(ICEBERG_SCHEMA)
         # TODO: if field_ids are not present, Name Mapping should be implemented to look them up in the table schema,
         #  see https://github.com/apache/iceberg/issues/7451
-        file_schema = Schema.parse_raw(schema_raw) if schema_raw is not None else pyarrow_to_schema(physical_schema)
+        file_schema = Schema.model_validate_json(schema_raw) if schema_raw is not None else pyarrow_to_schema(physical_schema)
 
         pyarrow_filter = None
         if bound_row_filter is not AlwaysTrue():

diff --git a/python/pyiceberg/partitioning.py b/python/pyiceberg/partitioning.py
@@ -14,6 +14,8 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
+from __future__ import annotations
+
 from functools import cached_property
 from typing import (
     Any,
@@ -23,15 +25,21 @@
     Tuple,
 )
 
-from pydantic import Field
+from pydantic import (
+    BeforeValidator,
+    Field,
+    PlainSerializer,
+    WithJsonSchema,
+)
+from typing_extensions import Annotated
 
 from pyiceberg.schema import Schema
-from pyiceberg.transforms import Transform
+from pyiceberg.transforms import Transform, parse_transform
 from pyiceberg.typedef import IcebergBaseModel
 from pyiceberg.types import NestedField, StructType
 
 INITIAL_PARTITION_SPEC_ID = 0
-_PARTITION_DATA_ID_START: int = 1000
+PARTITION_FIELD_ID_START: int = 1000
 
 
 class PartitionField(IcebergBaseModel):
@@ -46,7 +54,12 @@ class PartitionField(IcebergBaseModel):
 
     source_id: int = Field(alias="source-id")
     field_id: int = Field(alias="field-id")
-    transform: Transform[Any, Any] = Field()
+    transform: Annotated[  # type: ignore
+        Transform,
+        BeforeValidator(parse_transform),
+        PlainSerializer(lambda c: str(c), return_type=str),  # pylint: disable=W0108
+        WithJsonSchema({"type": "string"}, mode="serialization"),
+    ] = Field()
     name: str = Field()
 
     def __init__(
@@ -65,6 +78,7 @@ def __init__(
             data["transform"] = transform
         if name is not None:
             data["name"] = name
+
         super().__init__(**data)
 
     def __str__(self) -> str:
@@ -82,7 +96,7 @@ class PartitionSpec(IcebergBaseModel):
     """
 
     spec_id: int = Field(alias="spec-id", default=INITIAL_PARTITION_SPEC_ID)
-    fields: Tuple[PartitionField, ...] = Field(alias="fields", default_factory=tuple)
+    fields: Tuple[PartitionField, ...] = Field(default_factory=tuple)
 
     def __init__(
         self,
@@ -129,7 +143,7 @@ def is_unpartitioned(self) -> bool:
     def last_assigned_field_id(self) -> int:
         if self.fields:
             return max(pf.field_id for pf in self.fields)
-        return _PARTITION_DATA_ID_START
+        return PARTITION_FIELD_ID_START
 
     @cached_property
     def source_id_to_fields_map(self) -> Dict[int, List[PartitionField]]:
@@ -143,7 +157,7 @@ def source_id_to_fields_map(self) -> Dict[int, List[PartitionField]]:
     def fields_by_source_id(self, field_id: int) -> List[PartitionField]:
         return self.source_id_to_fields_map.get(field_id, [])
 
-    def compatible_with(self, other: "PartitionSpec") -> bool:
+    def compatible_with(self, other: PartitionSpec) -> bool:
         """Produce a boolean to return True if two PartitionSpec are considered compatible."""
         if self == other:
             return True
@@ -196,7 +210,7 @@ def assign_fresh_partition_spec_ids(spec: PartitionSpec, old_schema: Schema, fre
             PartitionField(
                 name=field.name,
                 source_id=fresh_field.field_id,
-                field_id=_PARTITION_DATA_ID_START + pos,
+                field_id=PARTITION_FIELD_ID_START + pos,
                 transform=field.transform,
             )
         )

diff --git a/python/pyiceberg/serializers.py b/python/pyiceberg/serializers.py
@@ -126,6 +126,6 @@ def table_metadata(metadata: TableMetadata, output_file: OutputFile, overwrite:
             overwrite (bool): Where to overwrite the file if it already exists. Defaults to `False`.
         """
         with output_file.create(overwrite=overwrite) as output_stream:
-            json_bytes = metadata.json().encode("utf-8")
+            json_bytes = metadata.model_dump_json().encode("utf-8")
             json_bytes = Compressor.get_compressor(output_file.location).bytes_compressor()(json_bytes)
             output_stream.write(json_bytes)
diff --git a/python/pyiceberg/table/__init__.py b/python/pyiceberg/table/__init__.py
@@ -37,7 +37,7 @@
     Union,
 )
 
-from pydantic import Field
+from pydantic import Field, SerializeAsAny
 from sortedcontainers import SortedList
 
 from pyiceberg.expressions import (
@@ -365,8 +365,8 @@ class AssertDefaultSortOrderId(TableRequirement):
 
 class CommitTableRequest(IcebergBaseModel):
     identifier: Identifier = Field()
-    requirements: List[TableRequirement] = Field(default_factory=list)
-    updates: List[TableUpdate] = Field(default_factory=list)
+    requirements: List[SerializeAsAny[TableRequirement]] = Field(default_factory=list)
+    updates: List[SerializeAsAny[TableUpdate]] = Field(default_factory=list)
 
 
 class CommitTableResponse(IcebergBaseModel):