fixing tests

william-conti · william-conti · commit 10dede468c5f · 2023-10-03T19:27:14.000+02:00
diff --git a/src/databricks/labs/ucx/framework/crawlers.py b/src/databricks/labs/ucx/framework/crawlers.py
@@ -42,19 +42,24 @@ def _schema_for(cls, klass):
         return ", ".join(fields)
 
     @classmethod
-    def _filter_none_rows(cls, rows):
+    def _filter_none_rows(cls, rows, full_name):
+        if len(rows) == 0:
+            return rows
+
         results = []
-        nullable_fields = []
+        nullable_fields = set()
 
         for field in dataclasses.fields(rows[0]):
             if field.default is None:
-                nullable_fields.append(field.name)
+                nullable_fields.add(field.name)
 
         for row in rows:
+            if row is None:
+                continue
             row_contains_none = False
             for column, value in dataclasses.asdict(row).items():
                 if value is None and column not in nullable_fields:
-                    logger.debug(f"Field {column} is None, filtering row")
+                    logger.warning(f"[{full_name}] Field {column} is None, filtering row")
                     row_contains_none = True
                     break
 
@@ -81,7 +86,7 @@ def save_table(self, full_name: str, rows: list[any], mode="append"):
         if mode == "overwrite":
             msg = "Overwrite mode is not yet supported"
             raise NotImplementedError(msg)
-
+        rows = self._filter_none_rows(rows, full_name)
         if len(rows) == 0:
             return
 
@@ -136,7 +141,7 @@ def fetch(self, sql) -> Iterator[any]:
         return self._spark.sql(sql).collect()
 
     def save_table(self, full_name: str, rows: list[any], mode: str = "append"):
-        rows = self._filter_none_rows(rows)
+        rows = self._filter_none_rows(rows, full_name)
 
         if len(rows) == 0:
             return
diff --git a/tests/unit/framework/test_crawlers.py b/tests/unit/framework/test_crawlers.py
@@ -221,7 +221,7 @@ def test_runtime_backend_save_table_with_row_containing_none(mocker):
 
         rb = RuntimeBackend()
 
-        rb.save_table("a.b.c", [Foo("aaa", True), Foo("bbb", False), Foo("bbb", None)])
+        rb.save_table("a.b.c", [Foo("aaa", True), Foo("bbb", False), Foo("ccc", None)])
 
         rb._spark.createDataFrame.assert_called_with(
             [Foo(first="aaa", second=True), Foo(first="bbb", second=False)],