-
Notifications
You must be signed in to change notification settings - Fork 101
crawl_permissions run fails #359
Description
Crawl permissions task fails with the following error:
`DatabricksError: java.io.IOException: java.util.concurrent.TimeoutException: Timed out after 5 seconds
DatabricksError Traceback (most recent call last)
File ~/.ipykernel/1209/command--1-1024872113:18
15 entry = [ep for ep in metadata.distribution("databricks_labs_ucx").entry_points if ep.name == "runtime"]
16 if entry:
17 # Load and execute the entrypoint, assumes no parameters
---> 18 entry[0].load()()
19 else:
20 import databricks_labs_ucx
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/databricks/labs/ucx/runtime.py:213, in main()
212 def main():
--> 213 trigger(*sys.argv)
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/databricks/labs/ucx/framework/tasks.py:93, in trigger(*argv)
90 cfg = WorkspaceConfig.from_file(Path(args["config"]))
91 logging.getLogger("databricks").setLevel(cfg.log_level)
---> 93 current_task.fn(cfg)
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/databricks/labs/ucx/runtime.py:137, in crawl_permissions(cfg)
135 toolkit = GroupMigrationToolkit(cfg)
136 toolkit.cleanup_inventory_table()
--> 137 toolkit.inventorize_permissions()
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/databricks/labs/ucx/workspace_access/migration.py:124, in GroupMigrationToolkit.inventorize_permissions(self)
123 def inventorize_permissions(self):
--> 124 self._permissions_manager.inventorize_permissions()
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/databricks/labs/ucx/workspace_access/manager.py:26, in PermissionManager.inventorize_permissions(self)
24 crawler_tasks = list(self._get_crawler_tasks())
25 logger.info(f"Starting to crawl permissions. Total tasks: {len(crawler_tasks)}")
---> 26 results = ThreadedExecution.gather("crawl permissions", crawler_tasks)
27 items = []
28 for item in results:
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/databricks/labs/ucx/framework/parallel.py:48, in ThreadedExecution.gather(cls, name, tasks)
45 @classmethod
46 def gather(cls, name: str, tasks: list[ExecutableFunction]) -> list[ExecutableResult]:
47 reporter = ProgressReporter(len(tasks), f"{name}: ")
---> 48 return cls(tasks, num_threads=4, progress_reporter=reporter).run()
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/databricks/labs/ucx/framework/parallel.py:63, in ThreadedExecution.run(self)
60 results = concurrent.futures.wait(self._futures, return_when=ALL_COMPLETED)
62 logger.debug("Collecting the results from threaded execution")
---> 63 collected = [future.result() for future in results.done]
64 return collected
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/databricks/labs/ucx/framework/parallel.py:63, in (.0)
60 results = concurrent.futures.wait(self._futures, return_when=ALL_COMPLETED)
62 logger.debug("Collecting the results from threaded execution")
---> 63 collected = [future.result() for future in results.done]
64 return collected
File /usr/lib/python3.10/concurrent/futures/_base.py:451, in Future.result(self, timeout)
449 raise CancelledError()
450 elif self._state == FINISHED:
--> 451 return self.__get_result()
453 self._condition.wait(timeout)
455 if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
File /usr/lib/python3.10/concurrent/futures/_base.py:403, in Future.__get_result(self)
401 if self._exception:
402 try:
--> 403 raise self._exception
404 finally:
405 # Break a reference cycle with the exception in self._exception
406 self = None
File /usr/lib/python3.10/concurrent/futures/thread.py:58, in _WorkItem.run(self)
55 return
57 try:
---> 58 result = self.fn(*self.args, **self.kwargs)
59 except BaseException as exc:
60 self.future.set_exception(exc)
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/databricks/labs/ucx/mixins/hardening.py:57, in rate_limited..decorator..wrapper(*args, **kwargs)
54 @wraps(func)
55 def wrapper(*args, **kwargs):
56 rate_limiter.throttle()
---> 57 return func(*args, **kwargs)
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/databricks/labs/ucx/workspace_access/generic.py:62, in GenericPermissionsSupport._crawler_task(self, object_type, object_id)
60 @rate_limited(max_requests=100)
61 def _crawler_task(self, object_type: str, object_id: str) -> Permissions | None:
---> 62 permissions = self._safe_get_permissions(object_type, object_id)
63 if not permissions:
64 return None
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/databricks/labs/ucx/workspace_access/generic.py:84, in GenericPermissionsSupport._safe_get_permissions(self, object_type, object_id)
82 return None
83 else:
---> 84 raise e
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/databricks/labs/ucx/workspace_access/generic.py:73, in GenericPermissionsSupport._safe_get_permissions(self, object_type, object_id)
71 def _safe_get_permissions(self, object_type: str, object_id: str) -> iam.ObjectPermissions | None:
72 try:
---> 73 return self._ws.permissions.get(object_type, object_id)
74 except DatabricksError as e:
75 if e.error_code in [
76 "RESOURCE_DOES_NOT_EXIST",
77 "RESOURCE_NOT_FOUND",
78 "PERMISSION_DENIED",
79 "FEATURE_DISABLED",
80 ]:
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/databricks/sdk/service/iam.py:1919, in PermissionsAPI.get(self, request_object_type, request_object_id)
1906 """Get object permissions.
1907
1908 Gets the permissions of an object. Objects can inherit permissions from their parent objects or root
(...)
1915 :returns: :class:ObjectPermissions
1916 """
1918 headers = {'Accept': 'application/json', }
-> 1919 res = self._api.do('GET',
1920 f'/api/2.0/permissions/{request_object_type}/{request_object_id}',
1921 headers=headers)
1922 return ObjectPermissions.from_dict(res)
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/databricks/sdk/core.py:1084, in ApiClient.do(self, method, path, query, headers, body, raw, files, data)
1080 if not response.ok:
1081 # TODO: experiment with traceback pruning for better readability
1082 # See https://stackoverflow.com/a/58821552/277035
1083 payload = response.json()
-> 1084 raise self._make_nicer_error(status_code=response.status_code, **payload) from None
1085 if raw:
1086 return StreamingResponse(response)
DatabricksError: java.io.IOException: java.util.concurrent.TimeoutException: Timed out after 5 seconds`
last few lines of log output:
21:36 WARN [d.l.ucx.workspace_access.generic] Could not get permissions for directories 4342757430346325 due to PERMISSION_DENIED 21:36 WARN [d.l.ucx.workspace_access.generic] Could not get permissions for directories 4341706793123026 due to PERMISSION_DENIED 21:36 INFO [d.l.ucx.framework.parallel] crawl permissions: 12860/13030, rps: 9.603/sec 21:36 WARN [d.l.ucx.workspace_access.generic] Could not get permissions for directories 4420770739346988 due to PERMISSION_DENIED 21:36 WARN [d.l.ucx.workspace_access.generic] Could not get permissions for directories 4481203634490252 due to PERMISSION_DENIED 21:36 INFO [d.l.ucx.framework.parallel] crawl permissions: 12870/13030, rps: 9.604/sec 21:36 INFO [d.l.ucx.framework.parallel] crawl permissions: 12880/13030, rps: 9.602/sec 21:36 INFO [d.l.ucx.framework.parallel] crawl permissions: 12890/13030, rps: 9.603/sec 21:36 INFO [d.l.ucx.framework.parallel] crawl permissions: 12900/13030, rps: 9.602/sec 21:36 INFO [d.l.ucx.framework.parallel] crawl permissions: 12910/13030, rps: 9.603/sec 21:36 INFO [d.l.ucx.framework.parallel] crawl permissions: 12920/13030, rps: 9.601/sec 21:36 INFO [d.l.ucx.framework.parallel] crawl permissions: 12930/13030, rps: 9.602/sec 21:36 INFO [d.l.ucx.framework.parallel] crawl permissions: 12940/13030, rps: 9.600/sec 21:36 WARN [d.l.ucx.workspace_access.generic] Could not get permissions for authorization passwords due to FEATURE_DISABLED 21:36 INFO [d.l.ucx.framework.parallel] crawl permissions: 12950/13030, rps: 9.601/sec 21:36 INFO [d.l.ucx.framework.parallel] crawl permissions: 12960/13030, rps: 9.600/sec 21:36 INFO [d.l.ucx.framework.parallel] crawl permissions: 12970/13030, rps: 9.603/sec 21:36 INFO [d.l.ucx.framework.parallel] crawl permissions: 12980/13030, rps: 9.606/sec 21:36 INFO [d.l.ucx.framework.parallel] crawl permissions: 12990/13030, rps: 9.610/sec 21:36 INFO [d.l.ucx.framework.parallel] crawl permissions: 13000/13030, rps: 9.613/sec 21:36 INFO [d.l.ucx.framework.parallel] crawl permissions: 13010/13030, rps: 9.615/sec 21:36 INFO [d.l.ucx.framework.parallel] crawl permissions: 13020/13030, rps: 9.619/sec 21:36 INFO [d.l.ucx.framework.parallel] crawl permissions: 13030/13030, rps: 9.624/sec