databrickslabs
diff --git a/‎labs.yml‎
Lines changed: 7 additions & 0 deletions b/‎labs.yml‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion b/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/databricks/labs/ucx/assessment/aws.py‎
Lines changed: 20 additions & 4 deletions b/‎src/databricks/labs/ucx/assessment/aws.py‎
Lines changed: 20 additions & 4 deletions
diff --git a/‎src/databricks/labs/ucx/aws/credentials.py‎
Lines changed: 18 additions & 10 deletions b/‎src/databricks/labs/ucx/aws/credentials.py‎
Lines changed: 18 additions & 10 deletions
diff --git a/‎src/databricks/labs/ucx/azure/access.py‎
Lines changed: 131 additions & 2 deletions b/‎src/databricks/labs/ucx/azure/access.py‎
Lines changed: 131 additions & 2 deletions
diff --git a/‎src/databricks/labs/ucx/azure/credentials.py‎
Lines changed: 7 additions & 2 deletions b/‎src/databricks/labs/ucx/azure/credentials.py‎
Lines changed: 7 additions & 2 deletions
@@ -108,6 +108,13 @@ commands:
       - name: aws-profile
         description: AWS Profile to use for authentication
 
+  - name: create-uber-principal
+    description: For azure cloud, creates a service principal and gives STORAGE BLOB READER access on all the storage account 
+      used by tables in the workspace and stores the spn info in the UCX cluster policy.
+    flags:
+      - name: subscription-id
+        description: Subscription to scan storage account in
+
   - name: validate-groups-membership
     description: Validate groups to check if the groups at account level and workspace level have different memberships
     table_template: |-
 
@@ -27,7 +27,7 @@ classifiers = [
     "Programming Language :: Python :: Implementation :: CPython",
 ]
 dependencies = ["databricks-sdk~=0.20.0",
-                "databricks-labs-blueprint~=0.3.0",
+                "databricks-labs-blueprint~=0.3.1",
                 "PyYAML>=6.0.0,<7.0.0"]
 
 [project.entry-points.databricks]
 
@@ -223,8 +223,8 @@ def _s3_actions(self, actions):
             s3_actions = [actions]
         return s3_actions
 
-    def add_uc_role(self, role_name):
-        aws_role_trust_doc = {
+    def _aws_role_trust_doc(self, external_id="0000"):
+        return {
             "Version": "2012-10-17",
             "Statement": [
                 {
@@ -233,20 +233,33 @@ def add_uc_role(self, role_name):
                         "AWS": "arn:aws:iam::414351767826:role/unity-catalog-prod-UCMasterRole-14S5ZJVKOTYTL"
                     },
                     "Action": "sts:AssumeRole",
-                    "Condition": {"StringEquals": {"sts:ExternalId": "0000"}},
+                    "Condition": {"StringEquals": {"sts:ExternalId": external_id}},
                 }
             ],
         }
+
+    def add_uc_role(self, role_name):
         # the AssumeRole condition will be modified with the external ID captured from the UC credential.
         # https://docs.databricks.com/en/connect/unity-catalog/storage-credentials.html
-        assume_role_json = self._get_json_for_cli(aws_role_trust_doc)
+        assume_role_json = self._get_json_for_cli(self._aws_role_trust_doc())
         add_role = self._run_json_command(
             f"iam create-role --role-name {role_name} --assume-role-policy-document {assume_role_json}"
         )
         if not add_role:
             return False
         return True
 
+    def update_uc_trust_role(self, role_name, external_id="0000"):
+        # Modify the AssumeRole condition with the external ID captured from the UC credential.
+        # https://docs.databricks.com/en/connect/unity-catalog/storage-credentials.html
+        assume_role_json = self._get_json_for_cli(self._aws_role_trust_doc(external_id))
+        update_role = self._run_json_command(
+            f"iam update-assume-role-policy --role-name {role_name} --policy-document {assume_role_json}"
+        )
+        if not update_role:
+            return False
+        return True
+
     def add_uc_role_policy(self, role_name, policy_name, s3_prefixes: set[str], account_id: str, kms_key=None):
         s3_prefixes_enriched = sorted([self.S3_PREFIX + s3_prefix for s3_prefix in s3_prefixes])
         statement = [
@@ -374,6 +387,9 @@ def create_uc_roles_cli(self, *, single_role=True, role_name="UC_ROLE", policy_n
                     )
                 role_id += 1
 
+    def update_uc_role_trust_policy(self, role_name, external_id="0000"):
+        return self._aws_resources.update_uc_trust_role(role_name, external_id)
+
     def save_uc_compatible_roles(self):
         uc_role_access = list(self._get_role_access())
         if len(uc_role_access) == 0:
 
@@ -61,7 +61,7 @@ def create(self, role_action: AWSRoleAction) -> StorageCredentialInfo:
         return self._ws.storage_credentials.create(
             role_action.role_name,
             aws_iam_role=AwsIamRole(role_action.role_arn),
-            comment=f"Created by UCX during migration to UC using AWS instance profile: {role_action.role_name}",
+            comment=f"Created by UCX during migration to UC using AWS IAM Role: {role_action.role_name}",
         )
 
     def validate(self, role_action: AWSRoleAction) -> AWSStorageCredentialValidationResult:
@@ -74,7 +74,7 @@ def validate(self, role_action: AWSRoleAction) -> AWSStorageCredentialValidation
         except InvalidParameterValue:
             logger.warning(
                 "There is an existing external location overlaps with the prefix that is mapped to "
-                "the instance profile and used for validating the migrated storage credential. "
+                "the IAM Role and used for validating the migrated storage credential. "
                 "Skip the validation"
             )
             return AWSStorageCredentialValidationResult(
@@ -112,7 +112,7 @@ def validate(self, role_action: AWSRoleAction) -> AWSStorageCredentialValidation
         )
 
 
-class InstanceProfileMigration:
+class IamRoleMigration:
 
     def __init__(
         self,
@@ -121,7 +121,7 @@ def __init__(
         resource_permissions: AWSResourcePermissions,
         storage_credential_manager: AWSStorageCredentialManager,
     ):
-        self._output_file = "aws_instance_profile_migration_result.csv"
+        self._output_file = "aws_iam_role_migration_result.csv"
         self._installation = installation
         self._ws = ws
         self._resource_permissions = resource_permissions
@@ -135,7 +135,7 @@ def for_cli(cls, ws: WorkspaceClient, installation: Installation, aws_profile: s
 
         msg = (
             f"Have you reviewed the {AWSResourcePermissions.UC_ROLES_FILE_NAMES} "
-            "and confirm listed instance profiles to be migrated migration?"
+            "and confirm listed IAM roles to be migrated?"
         )
         if not prompts.confirm(msg):
             raise SystemExit()
@@ -162,7 +162,7 @@ def _generate_migration_list(self, include_names: set[str] | None = None) -> lis
         """
         Create the list of IAM roles that need to be migrated, output an action plan as a csv file for users to confirm
         """
-        # load instance profile list from aws_instance_profile_info.csv
+        # load IAM role list
         iam_list = self._resource_permissions.load_uc_compatible_roles()
         # list existing storage credentials
         sc_set = self._storage_credential_manager.list(include_names)
@@ -184,22 +184,30 @@ def run(
         iam_list = self._generate_migration_list(include_names)
 
         plan_confirmed = prompts.confirm(
-            "Above Instance Profiles will be migrated to UC storage credentials, please review and confirm."
+            "Above IAM roles will be migrated to UC storage credentials, please review and confirm."
         )
         if plan_confirmed is not True:
             return []
 
         execution_result = []
         for iam in iam_list:
-            self._storage_credential_manager.create(iam)
+            storage_credential = self._storage_credential_manager.create(iam)
+            if storage_credential.aws_iam_role is None:
+                logger.error(f"Failed to create storage credential for IAM role: {iam.role_arn}")
+                continue
+
+            self._resource_permissions.update_uc_role_trust_policy(
+                iam.role_arn, storage_credential.aws_iam_role.external_id
+            )
+
             execution_result.append(self._storage_credential_manager.validate(iam))
 
         if execution_result:
             results_file = self.save(execution_result)
             logger.info(
-                f"Completed migration from Instance Profile to UC Storage credentials"
+                f"Completed migration from IAM Role to UC Storage credentials"
                 f"Please check {results_file} for validation results"
             )
         else:
-            logger.info("No Instance Profile migrated to UC Storage credentials")
+            logger.info("No IAM Role migrated to UC Storage credentials")
         return execution_result
@@ -1,11 +1,20 @@
+import json
+import uuid
 from dataclasses import dataclass
 
 from databricks.labs.blueprint.installation import Installation
+from databricks.labs.blueprint.tui import Prompts
 from databricks.sdk import WorkspaceClient
+from databricks.sdk.errors import NotFound, ResourceAlreadyExists
 from databricks.sdk.service.catalog import Privilege
 
 from databricks.labs.ucx.assessment.crawlers import logger
-from databricks.labs.ucx.azure.resources import AzureResource, AzureResources
+from databricks.labs.ucx.azure.resources import (
+    AzureAPIClient,
+    AzureResource,
+    AzureResources,
+    PrincipalSecret,
+)
 from databricks.labs.ucx.config import WorkspaceConfig
 from databricks.labs.ucx.framework.crawlers import StatementExecutionBackend
 from databricks.labs.ucx.hive_metastore.locations import ExternalLocations
@@ -46,7 +55,12 @@ def for_cli(cls, ws: WorkspaceClient, product='ucx', include_subscriptions=None)
         installation = Installation.current(ws, product)
         config = installation.load(WorkspaceConfig)
         sql_backend = StatementExecutionBackend(ws, config.warehouse_id)
-        azurerm = AzureResources(ws, include_subscriptions=include_subscriptions)
+        azure_mgmt_client = AzureAPIClient(
+            ws.config.arm_environment.resource_manager_endpoint,
+            ws.config.arm_environment.service_management_endpoint,
+        )
+        graph_client = AzureAPIClient("https://graph.microsoft.com", "https://graph.microsoft.com")
+        azurerm = AzureResources(azure_mgmt_client, graph_client, include_subscriptions)
         locations = ExternalLocations(ws, sql_backend, config.inventory_database)
         return cls(installation, ws, azurerm, locations)
 
@@ -91,6 +105,121 @@ def save_spn_permissions(self) -> str | None:
             return None
         return self._installation.save(storage_account_infos, filename=self._filename)
 
+    def _update_cluster_policy_definition(
+        self,
+        policy_definition: str,
+        storage_accounts: list[AzureResource],
+        uber_principal: PrincipalSecret,
+        inventory_database: str,
+    ) -> str:
+        policy_dict = json.loads(policy_definition)
+        tenant_id = self._azurerm.tenant_id()
+        endpoint = f"https://login.microsoftonline.com/{tenant_id}/oauth2/token"
+        for storage in storage_accounts:
+            policy_dict[
+                f"spark_conf.fs.azure.account.oauth2.client.id.{storage.storage_account}.dfs.core.windows.net"
+            ] = self._policy_config(uber_principal.client.client_id)
+            policy_dict[
+                f"spark_conf.fs.azure.account.oauth.provider.type.{storage.storage_account}.dfs.core.windows.net"
+            ] = self._policy_config("org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider")
+            policy_dict[
+                f"spark_conf.fs.azure.account.oauth2.client.endpoint.{storage.storage_account}.dfs.core.windows.net"
+            ] = self._policy_config(endpoint)
+            policy_dict[f"spark_conf.fs.azure.account.auth.type.{storage.storage_account}.dfs.core.windows.net"] = (
+                self._policy_config("OAuth")
+            )
+            policy_dict[
+                f"spark_conf.fs.azure.account.oauth2.client.secret.{storage.storage_account}.dfs.core.windows.net"
+            ] = self._policy_config(f"{{secrets/{inventory_database}/uber_principal_secret}}")
+        return json.dumps(policy_dict)
+
+    @staticmethod
+    def _policy_config(value: str):
+        return {"type": "fixed", "value": value}
+
+    def _update_cluster_policy_with_spn(
+        self,
+        policy_id: str,
+        storage_accounts: list[AzureResource],
+        uber_principal: PrincipalSecret,
+        inventory_database: str,
+    ):
+        try:
+            policy_definition = ""
+            cluster_policy = self._ws.cluster_policies.get(policy_id)
+
+            self._installation.save(cluster_policy, filename="policy-backup.json")
+
+            if cluster_policy.definition is not None:
+                policy_definition = self._update_cluster_policy_definition(
+                    cluster_policy.definition, storage_accounts, uber_principal, inventory_database
+                )
+            if cluster_policy.name is not None:
+                self._ws.cluster_policies.edit(policy_id, cluster_policy.name, definition=policy_definition)
+        except NotFound:
+            msg = f"cluster policy {policy_id} not found, please run UCX installation to create UCX cluster policy"
+            raise NotFound(msg) from None
+
+    def create_uber_principal(self, prompts: Prompts):
+        config = self._installation.load(WorkspaceConfig)
+        inventory_database = config.inventory_database
+        display_name = f"unity-catalog-migration-{inventory_database}-{self._ws.get_workspace_id()}"
+        uber_principal_name = prompts.question(
+            "Enter a name for the uber service principal to be created", default=display_name
+        )
+        policy_id = config.policy_id
+        if policy_id is None:
+            msg = "UCX cluster policy not found in config. Please run latest UCX installation to set cluster policy"
+            logger.error(msg)
+            raise ValueError(msg) from None
+        if config.uber_spn_id is not None:
+            logger.warning("Uber service principal already created for this workspace.")
+            return
+        used_storage_accounts = self._get_storage_accounts()
+        if len(used_storage_accounts) == 0:
+            logger.warning(
+                "There are no external table present with azure storage account. "
+                "Please check if assessment job is run"
+            )
+            return
+        storage_account_info = []
+        for storage in self._azurerm.storage_accounts():
+            if storage.storage_account in used_storage_accounts:
+                storage_account_info.append(storage)
+        logger.info("Creating service principal")
+        uber_principal = self._azurerm.create_service_principal(uber_principal_name)
+        self._create_scope(uber_principal, inventory_database)
+        config.uber_spn_id = uber_principal.client.client_id
+        logger.info(
+            f"Created service principal of client_id {config.uber_spn_id}. " f"Applying permission on storage accounts"
+        )
+        try:
+            self._apply_storage_permission(storage_account_info, uber_principal)
+            self._installation.save(config)
+            self._update_cluster_policy_with_spn(policy_id, storage_account_info, uber_principal, inventory_database)
+        except PermissionError:
+            self._azurerm.delete_service_principal(uber_principal.client.object_id)
+        logger.info(f"Update UCX cluster policy {policy_id} with spn connection details for storage accounts")
+
+    def _apply_storage_permission(self, storage_account_info: list[AzureResource], uber_principal: PrincipalSecret):
+        for storage in storage_account_info:
+            role_name = str(uuid.uuid4())
+            self._azurerm.apply_storage_permission(
+                uber_principal.client.object_id, storage, "STORAGE_BLOB_DATA_READER", role_name
+            )
+            logger.debug(
+                f"Storage Data Blob Reader permission applied for spn {uber_principal.client.client_id} "
+                f"to storage account {storage.storage_account}"
+            )
+
+    def _create_scope(self, uber_principal: PrincipalSecret, inventory_database: str):
+        logger.info(f"Creating secret scope {inventory_database}.")
+        try:
+            self._ws.secrets.create_scope(inventory_database)
+        except ResourceAlreadyExists:
+            logger.warning(f"Secret scope {inventory_database} already exists, using the same")
+        self._ws.secrets.put_secret(inventory_database, "uber_principal_secret", string_value=uber_principal.secret)
+
     def load(self):
         return self._installation.load(list[StoragePermissionMapping], filename=self._filename)
 
 
@@ -18,7 +18,7 @@
     AzureResourcePermissions,
     StoragePermissionMapping,
 )
-from databricks.labs.ucx.azure.resources import AzureResources
+from databricks.labs.ucx.azure.resources import AzureAPIClient, AzureResources
 from databricks.labs.ucx.config import WorkspaceConfig
 from databricks.labs.ucx.framework.crawlers import StatementExecutionBackend
 from databricks.labs.ucx.hive_metastore.locations import ExternalLocations
@@ -171,7 +171,12 @@ def for_cli(cls, ws: WorkspaceClient, installation: Installation, prompts: Promp
 
         config = installation.load(WorkspaceConfig)
         sql_backend = StatementExecutionBackend(ws, config.warehouse_id)
-        azurerm = AzureResources(ws)
+        azure_mgmt_client = AzureAPIClient(
+            ws.config.arm_environment.resource_manager_endpoint,
+            ws.config.arm_environment.service_management_endpoint,
+        )
+        graph_client = AzureAPIClient("https://graph.microsoft.com", "https://graph.microsoft.com")
+        azurerm = AzureResources(azure_mgmt_client, graph_client)
         locations = ExternalLocations(ws, sql_backend, config.inventory_database)
 
         resource_permissions = AzureResourcePermissions(installation, ws, azurerm, locations)
Original file line number	Diff line number	Diff line change
`@@ -27,7 +27,7 @@ classifiers = [`
`27`	`27`	`"Programming Language :: Python :: Implementation :: CPython",`
`28`	`28`	`]`
`29`	`29`	`dependencies = ["databricks-sdk~=0.20.0",`
`30`		`- "databricks-labs-blueprint~=0.3.0",`
	`30`	`+ "databricks-labs-blueprint~=0.3.1",`
`31`	`31`	`"PyYAML>=6.0.0,<7.0.0"]`
`32`	`32`
`33`	`33`	`[project.entry-points.databricks]`