Skip to content

Commit fbaca77

Browse files
committed
Merge branch 'databrickslabs1-main'
2 parents 6d1f9d1 + edfeaf1 commit fbaca77

File tree

13 files changed

+635
-165
lines changed

13 files changed

+635
-165
lines changed

README.md

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -656,8 +656,7 @@ databricks labs ucx principal-prefix-access --aws-profile test-profile
656656

657657
Use to identify all instance profiles in the workspace, and map their access to S3 buckets.
658658
Also captures the IAM roles which has UC arn listed, and map their access to S3 buckets
659-
This requires `aws` CLI to be installed and configured. It outputs uc_roles_access.csv which
660-
will be later used by migrate-credentials command to create UC storage credentials.
659+
This requires `aws` CLI to be installed and configured.
661660

662661
Once done, proceed to the [`migrate-credentials` command](#migrate-credentials-command).
663662

@@ -701,14 +700,20 @@ This command is one of prerequisites for the [table migration workflow](#table-m
701700
databricks labs ucx migrate-credentials
702701
```
703702

704-
This command migrates Azure Service Principals or IAM roles listed in `/Users/{user_name}/.ucx/azure_storage_account_info.csv`
705-
or `/Users/{user_name}/.ucx/uc_roles_access.csv` to UC storage credentials, which has been generated
706-
by [`principal-prefix-access` command](#principal-prefix-access-command).
707-
Please review the file and delete the Service Principals you do not want to be migrated.
708-
The command will only migrate the Service Principals that have client secret stored in Databricks Secret.
709-
710-
**Warning**: Service principals used to access storage accounts behind firewalls might cause connectivity issues. We
711-
recommend to use access connectors instead.
703+
For Azure, this command prompts to confirm performing the following credential migration steps:
704+
1. [RECOMMENDED] For each storage account, create access connectors with managed identities that have the
705+
`Storage Blob Data Contributor` role on the respective storage account. An storage credential is created for each
706+
access connector.
707+
2. Migrate Azure Service Principals, which have `Storage Blob Data Contributor`,
708+
`Storage Blob Data Reader`, `Storage Blob Data Owner`, or custom roles on ADLS Gen2 locations that are being used in
709+
Databricks, to UC storage credentials. The Azure Service Principals to location mapping are listed
710+
in `/Users/{user_name}/.ucx/azure_storage_account_info.csv` which is generated by
711+
[`principal-prefix-access` command](#principal-prefix-access-command). Please review the file and delete the Service
712+
Principals you do not want to be migrated. The command will only migrate the Service Principals that have client
713+
secret stored in Databricks Secret.
714+
715+
**Warning**: Service principals used to access storage accounts behind firewalls might cause connectivity issues. We
716+
recommend to use access connectors instead.
712717

713718
Once you're done with this command, run [`validate-external-locations` command](#validate-external-locations-command) after this one.
714719

src/databricks/labs/ucx/azure/access.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,7 @@ def create_uber_principal(self, prompts: Prompts):
248248

249249
def _create_access_connector_for_storage_account(
250250
self, storage_account: StorageAccount, role_name: str = "STORAGE_BLOB_DATA_READER"
251-
) -> AccessConnector:
251+
) -> tuple[AccessConnector, str]:
252252
access_connector = self._azurerm.create_or_update_access_connector(
253253
storage_account.id.subscription_id,
254254
storage_account.id.resource_group,
@@ -258,9 +258,21 @@ def _create_access_connector_for_storage_account(
258258
wait_for_provisioning=True,
259259
)
260260
self._apply_storage_permission(access_connector.principal_id, role_name, storage_account)
261-
return access_connector
262261

263-
def create_access_connectors_for_storage_accounts(self) -> list[AccessConnector]:
262+
container = next(self._azurerm.containers(storage_account.id), None)
263+
if container is None:
264+
url = f"abfss://{storage_account.name}.dfs.core.windows.net/"
265+
else:
266+
url = f"abfss://{container.container}@{container.storage_account}.dfs.core.windows.net/"
267+
268+
return access_connector, url
269+
270+
def create_access_connectors_for_storage_accounts(self) -> list[tuple[AccessConnector, str]]:
271+
"""Create access connectors for storage accounts
272+
273+
Returns:
274+
list[AccessConnector, str] : The access connectors with a storage url to which it has access.
275+
"""
264276
used_storage_accounts = self._get_storage_accounts()
265277
if len(used_storage_accounts) == 0:
266278
logger.warning(

src/databricks/labs/ucx/azure/credentials.py

Lines changed: 89 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
import logging
2-
from dataclasses import dataclass
2+
from dataclasses import dataclass, field
33

44
from databricks.labs.blueprint.installation import Installation
55
from databricks.labs.blueprint.tui import Prompts
66
from databricks.sdk import WorkspaceClient
7+
from databricks.sdk.errors import BadRequest
78
from databricks.sdk.errors.platform import InvalidParameterValue
89
from databricks.sdk.service.catalog import (
10+
AzureManagedIdentityRequest,
911
AzureServicePrincipal,
1012
Privilege,
1113
StorageCredentialInfo,
@@ -33,19 +35,42 @@ class ServicePrincipalMigrationInfo:
3335
class StorageCredentialValidationResult:
3436
name: str
3537
application_id: str
36-
read_only: bool
38+
read_only: bool | None
3739
validated_on: str
38-
directory_id: str | None = None
39-
failures: list[str] | None = None
40+
directory_id: str | None = None # str when storage credential created for AzureServicePrincipal
41+
failures: list[str] = field(default_factory=list)
4042

4143
@classmethod
42-
def from_validation(cls, permission_mapping: StoragePermissionMapping, failures: list[str] | None):
44+
def _get_application_and_directory_id(
45+
cls, storage_credential_info: StorageCredentialInfo
46+
) -> tuple[str, str | None]:
47+
if storage_credential_info.azure_service_principal is not None:
48+
application_id = storage_credential_info.azure_service_principal.application_id
49+
directory_id = storage_credential_info.azure_service_principal.directory_id
50+
return application_id, directory_id
51+
52+
if storage_credential_info.azure_managed_identity is not None:
53+
if storage_credential_info.azure_managed_identity.managed_identity_id is not None:
54+
return storage_credential_info.azure_managed_identity.managed_identity_id, None
55+
return storage_credential_info.azure_managed_identity.access_connector_id, None
56+
57+
raise KeyError("Storage credential info is missing an application id.")
58+
59+
@classmethod
60+
def from_storage_credential_info(
61+
cls,
62+
storage_credential_info: StorageCredentialInfo,
63+
validated_on: str,
64+
failures: list[str],
65+
):
66+
assert storage_credential_info.name is not None
67+
application_id, directory_id = cls._get_application_and_directory_id(storage_credential_info)
4368
return cls(
44-
permission_mapping.principal,
45-
permission_mapping.client_id,
46-
permission_mapping.privilege == Privilege.READ_FILES.value,
47-
permission_mapping.prefix,
48-
permission_mapping.directory_id,
69+
storage_credential_info.name,
70+
application_id,
71+
storage_credential_info.read_only,
72+
validated_on,
73+
directory_id,
4974
failures,
5075
)
5176

@@ -104,30 +129,31 @@ def create_with_client_secret(self, spn: ServicePrincipalMigrationInfo) -> Stora
104129
read_only=spn.permission_mapping.privilege == Privilege.READ_FILES.value,
105130
)
106131

107-
def validate(self, permission_mapping: StoragePermissionMapping) -> StorageCredentialValidationResult:
132+
def validate(self, storage_credential_info: StorageCredentialInfo, url: str) -> StorageCredentialValidationResult:
108133
try:
109134
validation = self._ws.storage_credentials.validate(
110-
storage_credential_name=permission_mapping.principal,
111-
url=permission_mapping.prefix,
112-
read_only=permission_mapping.privilege == Privilege.READ_FILES.value,
135+
storage_credential_name=storage_credential_info.name,
136+
url=url,
137+
read_only=storage_credential_info.read_only,
113138
)
114139
except InvalidParameterValue:
115140
logger.warning(
116141
"There is an existing external location overlaps with the prefix that is mapped to "
117142
"the service principal and used for validating the migrated storage credential. "
118143
"Skip the validation"
119144
)
120-
return StorageCredentialValidationResult.from_validation(
121-
permission_mapping,
145+
return StorageCredentialValidationResult.from_storage_credential_info(
146+
storage_credential_info,
147+
url,
122148
[
123149
"The validation is skipped because an existing external location overlaps "
124150
"with the location used for validation."
125151
],
126152
)
127153

128154
if not validation.results:
129-
return StorageCredentialValidationResult.from_validation(
130-
permission_mapping, ["Validation returned no results."]
155+
return StorageCredentialValidationResult.from_storage_credential_info(
156+
storage_credential_info, url, ["Validation returned no results."]
131157
)
132158

133159
failures = []
@@ -136,7 +162,8 @@ def validate(self, permission_mapping: StoragePermissionMapping) -> StorageCrede
136162
continue
137163
if result.result == ValidationResultResult.FAIL:
138164
failures.append(f"{result.operation.value} validation failed with message: {result.message}")
139-
return StorageCredentialValidationResult.from_validation(permission_mapping, None if not failures else failures)
165+
166+
return StorageCredentialValidationResult.from_storage_credential_info(storage_credential_info, url, failures)
140167

141168

142169
class ServicePrincipalMigration(SecretsMixin):
@@ -238,53 +265,69 @@ def _migrate_service_principals(
238265
f"'{spn.permission_mapping.prefix}' with non-Allow network configuration"
239266
)
240267

241-
self._storage_credential_manager.create_with_client_secret(spn)
242-
execution_result.append(self._storage_credential_manager.validate(spn.permission_mapping))
243-
268+
storage_credential_info = self._storage_credential_manager.create_with_client_secret(spn)
269+
validation_results = self._storage_credential_manager.validate(
270+
storage_credential_info,
271+
spn.permission_mapping.prefix,
272+
)
273+
execution_result.append(validation_results)
244274
return execution_result
245275

246-
def _create_access_connectors_for_storage_accounts(self) -> list[StorageCredentialValidationResult]:
247-
self._resource_permissions.create_access_connectors_for_storage_accounts()
248-
return []
276+
def _create_storage_credentials_for_storage_accounts(self) -> list[StorageCredentialValidationResult]:
277+
access_connectors = self._resource_permissions.create_access_connectors_for_storage_accounts()
278+
279+
execution_results = []
280+
for access_connector, url in access_connectors:
281+
storage_credential_info = self._ws.storage_credentials.create(
282+
access_connector.name,
283+
azure_managed_identity=AzureManagedIdentityRequest(str(access_connector.id)),
284+
comment="Created by UCX",
285+
read_only=False, # Access connectors get "STORAGE_BLOB_DATA_CONTRIBUTOR" permissions
286+
)
287+
try:
288+
validation_results = self._storage_credential_manager.validate(storage_credential_info, url)
289+
except BadRequest:
290+
logger.warning(f"Could not validate storage credential {storage_credential_info.name} for url {url}")
291+
else:
292+
execution_results.append(validation_results)
293+
294+
return execution_results
249295

250296
def run(self, prompts: Prompts, include_names: set[str] | None = None) -> list[StorageCredentialValidationResult]:
251297
plan_confirmed = prompts.confirm(
252-
"Above Azure Service Principals will be migrated to UC storage credentials, please review and confirm."
298+
"[RECOMMENDED] Please confirm to create an access connector with a managed identity for each storage "
299+
"account."
253300
)
254-
sp_results = []
301+
ac_results = []
255302
if plan_confirmed:
256-
sp_migration_infos = self._generate_migration_list(include_names)
257-
plan_confirmed = True
258-
if any(spn.permission_mapping.default_network_action != "Allow" for spn in sp_migration_infos):
259-
plan_confirmed = prompts.confirm(
260-
"At least one Azure Service Principal accesses a storage account with non-Allow default network "
261-
"configuration, which might cause connectivity issues. We recommend using Databricks Access "
262-
"Connectors instead (next prompt). Would you like to continue with migrating the service "
263-
"principals?"
264-
)
265-
if plan_confirmed:
266-
sp_results = self._migrate_service_principals(sp_migration_infos)
303+
ac_results = self._create_storage_credentials_for_storage_accounts()
267304

305+
sp_migration_infos = self._generate_migration_list(include_names)
306+
if any(spn.permission_mapping.default_network_action != "Allow" for spn in sp_migration_infos):
307+
logger.warning(
308+
"At least one Azure Service Principal accesses a storage account with non-Allow default network "
309+
"configuration, which might cause connectivity issues. We recommend using Databricks Access "
310+
"Connectors instead"
311+
)
268312
plan_confirmed = prompts.confirm(
269-
"[RECOMMENDED] Please confirm to create an access connector with a managed identity for each storage "
270-
"account."
313+
"Above Azure Service Principals will be migrated to UC storage credentials, please review and confirm."
271314
)
272-
ac_results = []
315+
sp_results = []
273316
if plan_confirmed:
274-
ac_results = self._create_access_connectors_for_storage_accounts()
317+
sp_results = self._migrate_service_principals(sp_migration_infos)
275318

276-
execution_results = sp_results + ac_results
319+
execution_results = ac_results + sp_results
277320
if execution_results:
278321
results_file = self.save(execution_results)
279322
logger.info(
280323
"Completed migration from Azure Service Principal to UC Storage credentials "
281-
"and creation of Databricks Access Connectors for storage accounts "
324+
"and creation of UC Storage credentials for storage access with Databricks Access Connectors. "
282325
f"Please check {results_file} for validation results"
283326
)
284327
else:
285328
logger.info(
286-
"No Azure Service Principal migrated to UC Storage credentials "
287-
"nor Databricks Access Connectors created for storage accounts"
329+
"No UC Storage credentials created during Azure Service Principal migration "
330+
"nor for storage access with Databricks Access Connectors."
288331
)
289332

290333
return execution_results

src/databricks/labs/ucx/azure/locations.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,33 @@ def _prefix_credential_name_mapping(self) -> tuple[dict[str, str], dict[str, str
7979
continue
8080
if permission_mapping.client_id in app_id_mapping_read:
8181
prefix_mapping_read[permission_mapping.prefix] = app_id_mapping_read[permission_mapping.client_id]
82+
83+
all_storage_accounts = list(self._azurerm.storage_accounts())
84+
for storage_credential in self._ws.storage_credentials.list():
85+
# Filter storage credentials for access connectors created by UCX
86+
if not (
87+
storage_credential.name is not None
88+
and storage_credential.name.startswith("ac-")
89+
and storage_credential.comment is not None
90+
and storage_credential.comment == "Created by UCX"
91+
):
92+
continue
93+
94+
storage_account_name = storage_credential.name.removeprefix("ac-")
95+
storage_accounts = [st for st in all_storage_accounts if st.name == storage_account_name]
96+
if len(storage_accounts) == 0:
97+
logger.warning(
98+
f"Storage account {storage_account_name} for access connector {storage_credential.name} not found, "
99+
"therefore, not able to create external locations for this storage account using the access "
100+
"connector."
101+
)
102+
continue
103+
104+
for container in self._azurerm.containers(storage_accounts[0].id):
105+
storage_url = f"abfss://{container.container}@{container.storage_account}.dfs.core.windows.net/"
106+
# UCX assigns created access connectors the "STORAGE_BLOB_DATA_CONTRIBUTOR" role on the storage account
107+
prefix_mapping_write[storage_url] = storage_credential.name
108+
82109
return prefix_mapping_write, prefix_mapping_read
83110

84111
def _create_location_name(self, location_url: str) -> str:

src/databricks/labs/ucx/cli.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -301,13 +301,15 @@ def create_missing_principals(
301301

302302
@ucx.command
303303
def migrate_credentials(w: WorkspaceClient, prompts: Prompts, ctx: WorkspaceContext | None = None, **named_parameters):
304-
"""For Azure, this command migrates Azure Service Principals, which have Storage Blob Data Contributor,
305-
Storage Blob Data Reader, Storage Blob Data Owner roles on ADLS Gen2 locations that are being used in
306-
Databricks, to UC storage credentials.
307-
The Azure Service Principals to location mapping are listed in
308-
{install_folder}/.ucx/azure_storage_account_info.csv which is generated by principal_prefix_access command.
309-
Please review the file and delete the Service Principals you do not want to be migrated.
310-
The command will only migrate the Service Principals that have client secret stored in Databricks Secret.
304+
"""For Azure, this command prompts to i) create UC storage credentials for the access connectors with a
305+
managed identity created for each storage account present in the ADLS Gen2 locations, the access connectors are
306+
granted Storage Blob Data Contributor permissions on their corresponding storage account, to prepare for adopting to
307+
use Databricks' best practice for using access connectors to authenticate with external storage, and ii) to migrate
308+
Azure Service Principals, which have Storage Blob Data Contributor, Storage Blob Data Reader, Storage Blob Data
309+
Owner roles on ADLS Gen2 locations that are being used in Databricks, to UC storage credentials. The Azure Service
310+
Principals to location mapping are listed in {install_folder}/.ucx/azure_storage_account_info.csv which is generated
311+
by principal_prefix_access command. Please review the file and delete the Service Principals you do not want to be
312+
migrated. The command will only migrate the Service Principals that have client secret stored in Databricks Secret.
311313
For AWS, this command migrates AWS Instance Profiles that are being used in Databricks, to UC storage credentials.
312314
The AWS Instance Profiles to location mapping are listed in
313315
{install_folder}/.ucx/aws_instance_profile_info.csv which is generated by principal_prefix_access command.
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import pytest
2+
3+
4+
def delete_ucx_created_resources(api_client):
5+
"""Delete UCX created resources"""
6+
for resource in api_client.list():
7+
if resource.name is not None and resource.comment is not None and resource.comment == "Created by UCX":
8+
api_client.delete(resource.name, force=True)
9+
10+
11+
@pytest.fixture
12+
def clean_storage_credentials(az_cli_ctx):
13+
"""Clean test generated storage credentials."""
14+
delete_ucx_created_resources(az_cli_ctx.workspace_client.storage_credentials)
15+
yield
16+
delete_ucx_created_resources(az_cli_ctx.workspace_client.storage_credentials)
17+
18+
19+
@pytest.fixture
20+
def clean_external_locations(az_cli_ctx):
21+
"""Clean test generated external locations."""
22+
delete_ucx_created_resources(az_cli_ctx.workspace_client.external_locations)
23+
yield
24+
delete_ucx_created_resources(az_cli_ctx.workspace_client.external_locations)

0 commit comments

Comments
 (0)