-
Notifications
You must be signed in to change notification settings - Fork 101
Added more views to assessment dashboard #474
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,6 +1,7 @@ | ||
| import dataclasses | ||
| import logging | ||
| import os | ||
| import pkgutil | ||
| from abc import ABC, abstractmethod | ||
| from collections.abc import Iterator | ||
| from typing import ClassVar | ||
|
|
@@ -244,3 +245,30 @@ def _snapshot(self, fetcher, loader) -> list[any]: | |
| def _append_records(self, items): | ||
| logger.debug(f"[{self._full_name}] found {len(items)} new records for {self._table}") | ||
| self._backend.save_table(self._full_name, items, self._klass, mode="append") | ||
|
|
||
|
|
||
| class SchemaDeployer: | ||
| def __init__(self, sql_backend: SqlBackend, inventory_schema: str, mod: any): | ||
| self._sql_backend = sql_backend | ||
| self._inventory_schema = inventory_schema | ||
| self._module = mod | ||
|
|
||
| def deploy_schema(self): | ||
| logger.info(f"Ensuring {self._inventory_schema} database exists") | ||
| self._sql_backend.execute(f"CREATE SCHEMA IF NOT EXISTS hive_metastore.{self._inventory_schema}") | ||
|
|
||
| def deploy_table(self, name: str, klass: type): | ||
| logger.info(f"Ensuring {self._inventory_schema}.{name} table exists") | ||
| self._sql_backend.create_table(f"hive_metastore.{self._inventory_schema}.{name}", klass) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What happens there if we change/remove a column in a table ? I assume that this is handled by automatically by the save_table.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it is not handled yet - see #471
nfx marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| def deploy_view(self, name: str, relative_filename: str): | ||
| query = self._load(relative_filename) | ||
| logger.info(f"Ensuring {self._inventory_schema}.{name} view matches {relative_filename} contents") | ||
| ddl = f"CREATE OR REPLACE VIEW hive_metastore.{self._inventory_schema}.{name} AS {query}" | ||
| self._sql_backend.execute(ddl) | ||
|
|
||
| def _load(self, relative_filename: str) -> str: | ||
| data = pkgutil.get_data(self._module.__name__, relative_filename) | ||
| sql = data.decode("utf-8") | ||
| sql = sql.replace("$inventory", f"hive_metastore.{self._inventory_schema}") | ||
| return sql | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,6 +1,5 @@ | ||
| import logging | ||
| from collections import defaultdict | ||
| from collections.abc import Iterator | ||
| from dataclasses import dataclass | ||
| from functools import partial | ||
|
|
||
|
|
@@ -198,7 +197,7 @@ def _grants( | |
| view: str | None = None, | ||
| any_file: bool = False, | ||
| anonymous_function: bool = False, | ||
| ) -> Iterator[Grant]: | ||
| ) -> list[Grant]: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Great! I just found this as well in #477 - nice this is fixed. |
||
| """ | ||
| Fetches and yields grant information for the specified database objects. | ||
|
|
||
|
|
@@ -239,23 +238,29 @@ def _grants( | |
| anonymous_function=anonymous_function, | ||
| ) | ||
| try: | ||
| grants = [] | ||
| object_type_normalization = {"SCHEMA": "DATABASE", "CATALOG$": "CATALOG"} | ||
| for row in self._fetch(f"SHOW GRANTS ON {on_type} {key}"): | ||
| (principal, action_type, object_type, _) = row | ||
| if object_type in object_type_normalization: | ||
| object_type = object_type_normalization[object_type] | ||
| if on_type != object_type: | ||
| continue | ||
| yield Grant( | ||
| principal=principal, | ||
| action_type=action_type, | ||
| table=table, | ||
| view=view, | ||
| database=database, | ||
| catalog=catalog, | ||
| any_file=any_file, | ||
| anonymous_function=anonymous_function, | ||
| # we have to return concrete list, as with yield we're executing | ||
| # everything on the main thread. | ||
| grants.append( | ||
| Grant( | ||
| principal=principal, | ||
| action_type=action_type, | ||
| table=table, | ||
| view=view, | ||
| database=database, | ||
| catalog=catalog, | ||
| any_file=any_file, | ||
| anonymous_function=anonymous_function, | ||
| ) | ||
| ) | ||
| return grants | ||
| except Exception as e: | ||
| # TODO: https://github.com/databrickslabs/ucx/issues/406 | ||
| logger.error(f"Couldn't fetch grants for object {on_type} {key}: {e}") | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -23,12 +23,30 @@ | |
| from databricks.sdk.service.workspace import ImportFormat | ||
|
|
||
| from databricks.labs.ucx.__about__ import __version__ | ||
| from databricks.labs.ucx.assessment.crawlers import ( | ||
| AzureServicePrincipalInfo, | ||
| ClusterInfo, | ||
| GlobalInitScriptInfo, | ||
| JobInfo, | ||
| PipelineInfo, | ||
| ) | ||
| from databricks.labs.ucx.config import GroupsConfig, WorkspaceConfig | ||
| from databricks.labs.ucx.framework.crawlers import ( | ||
| SchemaDeployer, | ||
| SqlBackend, | ||
| StatementExecutionBackend, | ||
| ) | ||
| from databricks.labs.ucx.framework.dashboards import DashboardFromFiles | ||
| from databricks.labs.ucx.framework.install_state import InstallState | ||
| from databricks.labs.ucx.framework.tasks import _TASKS, Task | ||
| from databricks.labs.ucx.hive_metastore.data_objects import ExternalLocation | ||
| from databricks.labs.ucx.hive_metastore.grants import Grant | ||
| from databricks.labs.ucx.hive_metastore.hms_lineage import HiveMetastoreLineageEnabler | ||
| from databricks.labs.ucx.hive_metastore.mounts import Mount | ||
| from databricks.labs.ucx.hive_metastore.tables import Table, TableError | ||
| from databricks.labs.ucx.runtime import main | ||
| from databricks.labs.ucx.workspace_access.base import Permissions | ||
| from databricks.labs.ucx.workspace_access.generic import WorkspaceObjectInfo | ||
|
|
||
| TAG_STEP = "step" | ||
| TAG_APP = "App" | ||
|
|
@@ -89,12 +107,36 @@ | |
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
||
| def deploy_schema(sql_backend: SqlBackend, inventory_schema: str): | ||
| from databricks.labs import ucx | ||
|
|
||
| deployer = SchemaDeployer(sql_backend, inventory_schema, ucx) | ||
| deployer.deploy_schema() | ||
| deployer.deploy_table("azure_service_principals", AzureServicePrincipalInfo) | ||
| deployer.deploy_table("clusters", ClusterInfo) | ||
| deployer.deploy_table("global_init_scripts", GlobalInitScriptInfo) | ||
| deployer.deploy_table("jobs", JobInfo) | ||
| deployer.deploy_table("pipelines", PipelineInfo) | ||
| deployer.deploy_table("external_locations", ExternalLocation) | ||
| deployer.deploy_table("mounts", Mount) | ||
| deployer.deploy_table("grants", Grant) | ||
| deployer.deploy_table("tables", Table) | ||
| deployer.deploy_table("table_failures", TableError) | ||
| deployer.deploy_table("workspace_objects", WorkspaceObjectInfo) | ||
| deployer.deploy_table("permissions", Permissions) | ||
| deployer.deploy_view("objects", "queries/views/objects.sql") | ||
| deployer.deploy_view("grant_detail", "queries/views/grant_detail.sql") | ||
|
|
||
|
|
||
| class WorkspaceInstaller: | ||
| def __init__(self, ws: WorkspaceClient, *, prefix: str = "ucx", promtps: bool = True): | ||
| def __init__( | ||
| self, ws: WorkspaceClient, *, prefix: str = "ucx", promtps: bool = True, sql_backend: SqlBackend = None | ||
| ): | ||
| if "DATABRICKS_RUNTIME_VERSION" in os.environ: | ||
| msg = "WorkspaceInstaller is not supposed to be executed in Databricks Runtime" | ||
| raise SystemExit(msg) | ||
| self._ws = ws | ||
| self._sql_backend = sql_backend | ||
| self._prefix = prefix | ||
| self._prompts = promtps | ||
| self._this_file = Path(__file__) | ||
|
|
@@ -111,10 +153,16 @@ def _run_configured(self): | |
| self._install_spark_config_for_hms_lineage() | ||
| self._create_dashboards() | ||
| self._create_jobs() | ||
| self._create_database() | ||
| readme = f'{self.notebook_link(f"{self._install_folder}/README.py")}' | ||
| msg = f"Installation completed successfully! Please refer to the {readme} notebook for next steps." | ||
| logger.info(msg) | ||
|
|
||
| def _create_database(self): | ||
| if self._sql_backend is None: | ||
| self._sql_backend = StatementExecutionBackend(self._ws, self._current_config.warehouse_id) | ||
| deploy_schema(self._sql_backend, self._current_config.inventory_database) | ||
|
|
||
| def _install_spark_config_for_hms_lineage(self): | ||
| hms_lineage = HiveMetastoreLineageEnabler(ws=self._ws) | ||
| logger.info( | ||
|
|
@@ -157,10 +205,15 @@ def _install_spark_config_for_hms_lineage(self): | |
|
|
||
| @staticmethod | ||
| def run_for_config( | ||
| ws: WorkspaceClient, config: WorkspaceConfig, *, prefix="ucx", override_clusters: dict[str, str] | None = None | ||
| ws: WorkspaceClient, | ||
| config: WorkspaceConfig, | ||
| *, | ||
| prefix="ucx", | ||
| override_clusters: dict[str, str] | None = None, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why would we need this ? Looks like it's only useful for testing purposes, just override the cluster directly in the test instead ?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| sql_backend: SqlBackend = None, | ||
| ) -> "WorkspaceInstaller": | ||
| logger.info(f"Installing UCX v{__version__} on {ws.config.host}") | ||
| workspace_installer = WorkspaceInstaller(ws, prefix=prefix, promtps=False) | ||
| workspace_installer = WorkspaceInstaller(ws, prefix=prefix, promtps=False, sql_backend=sql_backend) | ||
| logger.info(f"Installing UCX v{workspace_installer._version} on {ws.config.host}") | ||
| workspace_installer._config = config | ||
| workspace_installer._write_config() | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| -- viz type=counter, name=Workspace UC readiness, counter_label=UC readiness, value_column=readiness | ||
| -- widget row=0, col=0, size_x=1, size_y=3 | ||
| WITH raw AS ( | ||
| SELECT object_type, object_id, IF(failures == '[]', 1, 0) AS ready FROM $inventory.objects | ||
| ) | ||
| SELECT CONCAT(ROUND(SUM(ready) / COUNT(*) * 100, 1), '%') AS readiness FROM raw |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,4 @@ | ||
| -- viz type=counter, name=Total Database Count, counter_label=Total Databases, value_column=count_total_databases | ||
| -- widget col=0, row=0, size_x=1, size_y=3 | ||
| -- widget row=0, col=1, size_x=1, size_y=3 | ||
| SELECT COUNT(DISTINCT `database`) AS count_total_databases | ||
| FROM $inventory.tables |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,4 @@ | ||
| -- viz type=counter, name=Storage Locations, counter_label=Storage Locations, value_column=count_total | ||
| -- widget row=0, col=4, size_x=1, size_y=3 | ||
| SELECT count(*) AS count_total | ||
| FROM $inventory.external_locations |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,4 @@ | ||
| -- viz type=counter, name=Total Table Count, counter_label=Total Tables, value_column=count_total_tables | ||
| -- widget col=1, row=0, size_x=1, size_y=3 | ||
| -- widget row=0, col=3, size_x=1, size_y=3 | ||
| SELECT count(*) AS count_total_tables | ||
| FROM $inventory.tables |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,9 @@ | ||
| -- viz type=table, name=Object Type Readiness, columns=object_type,readiness | ||
| -- widget title=Readiness, row=1, col=0, size_x=1, size_y=8 | ||
| WITH raw AS ( | ||
| SELECT object_type, object_id, IF(failures == '[]', 1, 0) AS ready FROM $inventory.objects | ||
| ) | ||
| SELECT object_type, CONCAT(ROUND(SUM(ready) / COUNT(*) * 100, 1), '%') AS readiness | ||
| FROM raw | ||
| GROUP BY object_type | ||
| ORDER BY readiness DESC |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| -- viz type=table, name=Failure Summary, search_by=failure, columns=failure,count | ||
| -- widget title=Failure Summary, row=1, col=1, size_x=2, size_y=8 | ||
| WITH raw AS ( | ||
| SELECT EXPLODE(FROM_JSON(failures, 'array<string>')) AS failure FROM $inventory.objects WHERE failures <> '[]' | ||
| ) | ||
| SELECT failure, COUNT(*) AS count FROM raw GROUP BY failure | ||
| ORDER BY count DESC, failure DESC |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,53 @@ | ||
| -- viz type=table, name=Database Summary, search_by=database, columns=database,upgrade,tables,views,dbfs_root,delta_tables,total_grants,granted_principals,database_grants,table_grants,service_principal_grants,user_grants,group_grants | ||
| -- widget title=Database Summary, col=0, row=2, size_x=6, size_y=8 | ||
| WITH table_stats AS ( | ||
| SELECT | ||
| `database`, | ||
| object_type, | ||
| UPPER(table_format) AS `format`, | ||
| `location`, | ||
| IF(object_type IN ("MANAGED", "EXTERNAL"), 1, 0) AS is_table, | ||
| IF(object_type = "VIEW", 1, 0) AS is_view, | ||
| CASE | ||
| WHEN STARTSWITH(location, "dbfs:/") AND NOT STARTSWITH(location, "dbfs:/mnt") THEN 1 | ||
| WHEN STARTSWITH(location, "/dbfs/") AND NOT STARTSWITH(location, "/dbfs/mnt") THEN 1 | ||
| ELSE 0 | ||
| END AS is_dbfs_root, | ||
| CASE | ||
| WHEN STARTSWITH(location, "wasb") THEN 1 | ||
| WHEN STARTSWITH(location, "adl") THEN 1 | ||
| ELSE 0 | ||
| END AS is_unsupported, | ||
| IF(format = "DELTA", 1, 0) AS is_delta | ||
| FROM $inventory.tables | ||
| ), database_stats AS ( | ||
| SELECT `database`, | ||
| CASE | ||
| WHEN SUM(is_table) = 0 AND SUM(is_view) > 0 THEN "View Migration Required" | ||
| WHEN SUM(is_dbfs_root)/SUM(is_table) > .3 THEN "Asset Replication Required" | ||
| WHEN SUM(is_delta)/SUM(is_table) < .7 THEN "Some Non Delta Assets" | ||
| WHEN SUM(is_unsupported)/SUM(is_table) > .7 THEN "Storage Migration Required" | ||
| ELSE "In Place Sync" | ||
| END AS upgrade, | ||
| SUM(is_table) AS tables, | ||
| SUM(is_view) AS views, | ||
| SUM(is_unsupported) AS unsupported, | ||
| SUM(is_dbfs_root) AS dbfs_root, | ||
| SUM(is_delta) AS delta_tables | ||
| FROM table_stats | ||
| GROUP BY `database` | ||
| ), grant_stats AS ( | ||
| SELECT | ||
| `database`, | ||
| COUNT(*) AS total_grants, | ||
| COUNT(DISTINCT principal) AS granted_principals, | ||
| SUM(IF(object_type == 'DATABASE', 1, 0)) AS database_grants, | ||
| SUM(IF(object_type == 'TABLE', 1, 0)) AS table_grants, | ||
| SUM(IF(principal_type == 'service-principal', 1, 0)) AS service_principal_grants, | ||
| SUM(IF(principal_type == 'user', 1, 0)) AS user_grants, | ||
| SUM(IF(principal_type == 'group', 1, 0)) AS group_grants | ||
| FROM $inventory.grant_detail | ||
| GROUP BY `database` | ||
| ) | ||
| SELECT * FROM database_stats FULL JOIN grant_stats USING (`database`) | ||
| ORDER BY tables DESC |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,4 @@ | ||
| -- viz type=table, name=External Locations, columns=location | ||
| -- widget title=External Locations, col=0, row=17, size_x=3, size_y=8 | ||
| -- widget title=External Locations, row=3, col=0, size_x=3, size_y=8 | ||
| SELECT location | ||
| FROM $inventory.external_locations |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,5 +1,5 @@ | ||
| -- viz type=table, name=Mount Points, columns=name,source | ||
| -- widget title=Mount Points, col=3, row=17, size_x=3, size_y=8 | ||
| -- widget title=Mount Points, row=3, col=3, size_x=3, size_y=8 | ||
| SELECT name, | ||
| source | ||
| FROM $inventory.mounts |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,9 @@ | ||
| -- viz type=table, name=Clusters, columns=failure,cluster_name,cluster_id,creator | ||
| -- widget title=Incompatible clusters, row=4, col=0, size_x=3, size_y=8 | ||
| SELECT EXPLODE(FROM_JSON(failures, 'array<string>')) AS failure, | ||
| cluster_id, | ||
| cluster_name, | ||
| creator | ||
| FROM $inventory.clusters | ||
| WHERE NOT STARTSWITH(cluster_name, "job-") | ||
| ORDER BY cluster_id DESC |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,10 @@ | ||
| -- viz type=table, name=Jobs, columns=failure,job_id,job_name,creator | ||
| -- widget title=Incompatible jobs, row=4, col=3, size_x=3, size_y=8 | ||
| SELECT | ||
| EXPLODE(FROM_JSON(failures, 'array<string>')) AS failure, | ||
| job_id, | ||
| job_name, | ||
| creator | ||
| FROM $inventory.jobs | ||
| WHERE job_name not like '[UCX]%' | ||
| ORDER BY job_id DESC |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it filtered by the assessment tool ? I recall that we do not have such thing
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
what do you mean?