Skip to content

Commit e779888

Browse files
elbamitliranbg
andauthored
[RemoteRuntime] Secret token mounting for RemoteRuntime (#9050)
### 📝 Description <!-- A short summary of what this PR does. --> <!-- Include any relevant context or background information. --> Add secret token mounting for RemoteRuntime functions. Save `auth` as part of the function's spec. Use it to mount the secret into the runtime during function config compilation. --- ### 🛠️ Changes Made <!-- - Key changes (e.g., added feature X, refactored Y, fixed Z) --> - Added `auth` as part of `NuclioSpec`, `ApplicationSpec` and `ServingSpec` - Enrich auth token name on the function during deployment, then mount secret to runtime during function config compilation. - Refactor `mount_secret_token_to_runtime` to remove existing auth secret volumes/mounts - Refactor `_enrich_and_validate_auth_token_name` to `enrich_and_validate_auth_token_name_on_object` to support both `runObject` and `remoteRuntime` --- ### ✅ Checklist - [ ] I updated the documentation (if applicable) - [x] I have tested the changes in this PR - [ ] I confirmed whether my changes are covered by system tests - [ ] If yes, I ran all relevant system tests and ensured they passed before submitting this PR - [ ] I updated existing system tests and/or added new ones if needed to cover my changes - [ ] If I introduced a deprecation: - [ ] I followed the [Deprecation Guidelines](./DEPRECATION.md) - [ ] I updated the relevant Jira ticket for documentation --- ### 🧪 Testing <!-- - How it was tested (unit tests, manual, integration) --> <!-- - Any special cases covered. --> Unit tests + manual tests of deployment of nuclio/application/serving --- ### 🔗 References - Ticket link: https://iguazio.atlassian.net/browse/ML-11584 - Design docs links: - External links: --- ### 🚨 Breaking Changes? - [ ] Yes (explain below) - [x] No <!-- If yes, describe what needs to be changed downstream: --> --- ### 🔍️ Additional Notes <!-- Anything else reviewers should know (follow-up tasks, known issues, affected areas etc.). --> <!-- ### 📸 Screenshots / Logs --> As part of https://iguazio.atlassian.net/browse/ML-11599, need to handle redeployment of a function with a different auth token name. --------- Co-authored-by: Liran BG <[email protected]>
1 parent a20a595 commit e779888

File tree

16 files changed

+440
-149
lines changed

16 files changed

+440
-149
lines changed

mlrun/common/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040

4141
MLRUN_JOB_AUTH_SECRET_PATH = "/var/mlrun-secrets/auth"
4242
MLRUN_JOB_AUTH_SECRET_FILE = ".igz.yml"
43+
MLRUN_JOB_AUTH_DEFAULT_TOKEN_NAME = "default"
4344

4445

4546
class MLRunInternalLabels:

mlrun/projects/operations.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -405,6 +405,7 @@ def deploy_function(
405405
:param project_object: override the project object to use, will default to the project set in the runtime context.
406406
"""
407407
engine, function = _get_engine_and_function(function, project_object)
408+
# TODO in ML-11599 need to handle redeployment with different auth token name
408409
if function.kind not in mlrun.runtimes.RuntimeKinds.nuclio_runtimes():
409410
raise mlrun.errors.MLRunInvalidArgumentError(
410411
"deploy is used with real-time functions, for other kinds use build_function()"

mlrun/runtimes/base.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -955,5 +955,34 @@ def doc(self):
955955
line += f", default={p['default']}"
956956
print(" " + line)
957957

958+
def remove_auth_secret_volumes(self):
959+
secret_name_prefix = (
960+
mlrun.mlconf.secret_stores.kubernetes.auth_secret_name.format(
961+
hashed_access_key=""
962+
)
963+
)
964+
volumes = self.spec.volumes or []
965+
mounts = self.spec.volume_mounts or []
966+
967+
volumes_to_remove = set()
968+
969+
# Identify volumes to remove
970+
for vol in volumes:
971+
secret_name = mlrun.utils.get_in(vol, "secret.secretName", "")
972+
973+
# Pattern of auth secret volumes
974+
if secret_name.startswith(secret_name_prefix):
975+
volumes_to_remove.add(vol["name"])
976+
977+
# Filter out only the matched volumes
978+
self.spec.volumes = [
979+
volume for volume in volumes if volume["name"] not in volumes_to_remove
980+
]
981+
982+
# Filter out matching mounts
983+
self.spec.volume_mounts = [
984+
mount for mount in mounts if mount["name"] not in volumes_to_remove
985+
]
986+
958987
def skip_image_enrichment(self):
959988
return False

mlrun/runtimes/nuclio/application/application.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ def __init__(
8888
track_models=None,
8989
internal_application_port=None,
9090
application_ports=None,
91+
auth=None,
9192
):
9293
super().__init__(
9394
command=command,
@@ -133,6 +134,7 @@ def __init__(
133134
track_models=track_models,
134135
state_thresholds=state_thresholds,
135136
disable_default_http_trigger=disable_default_http_trigger,
137+
auth=auth,
136138
)
137139

138140
# Override default min/max replicas (don't assume application is stateless)

mlrun/runtimes/nuclio/function.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ class NuclioSpec(KubeResourceSpec):
114114
"service_type",
115115
"add_templated_ingress_host_mode",
116116
"disable_default_http_trigger",
117+
"auth",
117118
]
118119

119120
def __init__(
@@ -161,6 +162,7 @@ def __init__(
161162
graph=None,
162163
parameters=None,
163164
track_models=None,
165+
auth=None,
164166
):
165167
super().__init__(
166168
command=command,
@@ -217,6 +219,7 @@ def __init__(
217219
# When True it will set Nuclio spec.noBaseImagesPull to False (negative logic)
218220
# indicate that the base image should be pulled from the container registry (not cached)
219221
self.base_image_pull = False
222+
self.auth = auth or {}
220223

221224
def generate_nuclio_volumes(self):
222225
nuclio_volumes = []

mlrun/runtimes/nuclio/serving.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,7 @@ def __init__(
155155
disable_default_http_trigger=None,
156156
model_endpoint_creation_task_name=None,
157157
serving_spec=None,
158+
auth=None,
158159
):
159160
super().__init__(
160161
command=command,
@@ -196,6 +197,7 @@ def __init__(
196197
add_templated_ingress_host_mode=add_templated_ingress_host_mode,
197198
disable_default_http_trigger=disable_default_http_trigger,
198199
serving_spec=serving_spec,
200+
auth=auth,
199201
)
200202

201203
self.models = models or {}

server/py/services/api/api/endpoints/nuclio.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -476,6 +476,10 @@ def _deploy_function(
476476
# which later in Nuclio will be masked and saved to secrets
477477
raw_config = fn.mask_sensitive_data_in_config()
478478

479+
# Add auth token name in function spec
480+
# TODO in ML-11600/ML-11599 need to handle redeployment with different auth token name
481+
launcher.enrich_and_validate_auth_token_name(fn)
482+
479483
# save the function to DB
480484
fn.save(versioned=False)
481485

server/py/services/api/crud/runtimes/nuclio/function.py

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import base64
1717
import shlex
1818
import typing
19+
from typing import Optional
1920

2021
import nuclio
2122
import nuclio.utils
@@ -249,9 +250,9 @@ def _compile_function_config(
249250
250251
:return: function name, project name, nuclio function config
251252
"""
252-
253+
_enrich_config_spec(function, auth_info=auth_info)
253254
# resolve env vars before compiling the nuclio spec, as we need to set them in the spec
254-
env_dict, external_source_env_dict = _resolve_env_vars(function, auth_info)
255+
env_dict, external_source_env_dict = _resolve_env_vars(function)
255256

256257
project = function.metadata.project
257258
tag = function.metadata.tag
@@ -362,15 +363,20 @@ def _apply_escaped_config(config, parent_key, items: dict):
362363
mlrun.utils.update_in(config, f"{parent_key}.\\{key}\\", value)
363364

364365

365-
def _resolve_env_vars(function, auth_info=None):
366+
def _enrich_config_spec(
367+
function, auth_info: Optional[mlrun.common.schemas.AuthInfo] = None
368+
):
366369
# Add secret configurations to function's pod spec, if secret sources were added.
367370
# Needs to be here, since it adds env params, which are handled in the next lines.
368371
# This only needs to run if we're running within k8s context. If running in Docker, for example, skip.
369372
if framework.utils.singletons.k8s.get_k8s_helper(
370373
silent=True
371374
).is_running_inside_kubernetes_cluster():
372-
_add_secrets_config_to_function_spec(function)
375+
token_name = mlrun.utils.get_in(function.spec, "auth.token_name", None)
376+
_add_secrets_config_to_function_spec(function, token_name, auth_info)
377+
373378

379+
def _resolve_env_vars(function):
374380
env_dict, external_source_env_dict = function._get_nuclio_config_spec_env()
375381
mlrun.auth.utils.enrich_auth_env(env_dict)
376382

@@ -675,6 +681,8 @@ def _set_function_name(function, config, project, tag):
675681

676682
def _add_secrets_config_to_function_spec(
677683
function: mlrun.runtimes.nuclio.function.RemoteRuntime,
684+
token_name: str,
685+
auth_info: Optional[mlrun.common.schemas.AuthInfo] = None,
678686
):
679687
handler = services.api.runtime_handlers.BaseRuntimeHandler
680688
if function.kind in [
@@ -690,6 +698,8 @@ def _add_secrets_config_to_function_spec(
690698
function,
691699
project_name=function.metadata.project,
692700
encode_key_names=False,
701+
token_name=token_name,
702+
auth_info=auth_info,
693703
)
694704

695705
elif function.kind == mlrun.runtimes.RuntimeKinds.serving:
@@ -710,10 +720,16 @@ def _add_secrets_config_to_function_spec(
710720
function._secrets.get_k8s_secrets(),
711721
function,
712722
project_name=function.metadata.project,
723+
token_name=token_name,
724+
auth_info=auth_info,
713725
)
714726
else:
715727
handler.add_k8s_secrets_to_spec(
716-
None, function, project_name=function.metadata.project
728+
None,
729+
function,
730+
project_name=function.metadata.project,
731+
token_name=token_name,
732+
auth_info=auth_info,
717733
)
718734

719735
else:

server/py/services/api/crud/secrets.py

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -814,31 +814,6 @@ def _is_key_map_project_secret_key(self, key: str) -> bool:
814814
def _generate_uuid() -> str:
815815
return str(uuid.uuid4())
816816

817-
@staticmethod
818-
def mount_secret_token_to_runtime(
819-
runtime: mlrun.runtimes.base.BaseRuntime, token_name: str, username: str
820-
):
821-
# Validation that the secret exists is done in the ServerSideLauncher
822-
secret = framework.utils.singletons.k8s.get_k8s_helper()._get_user_token_secret(
823-
username=username, token_name=token_name
824-
)
825-
826-
# In case the secret was not found (which should not happen because of the prior validation), we do not mount it
827-
if secret:
828-
runtime.apply(
829-
mlrun.mounts.mount_secret(
830-
secret.metadata.name,
831-
mount_path=mlrun.common.constants.MLRUN_JOB_AUTH_SECRET_PATH,
832-
items=[
833-
{
834-
"key": "tokensFile",
835-
"path": mlrun.common.constants.MLRUN_JOB_AUTH_SECRET_FILE,
836-
}
837-
],
838-
)
839-
)
840-
return runtime
841-
842817

843818
def get_project_secret_provider(project: str) -> typing.Callable:
844819
"""Implement secret provider for handle the related project secret on the API side.

server/py/services/api/launcher.py

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -310,7 +310,7 @@ def _enrich_run(
310310

311311
self._handle_retry(run)
312312
run = self._pre_run_image_pull_secret_enrichment(run)
313-
self._enrich_and_validate_auth_token_name(run)
313+
self.enrich_and_validate_auth_token_name(run)
314314
return self._pre_run_scheduling_constraints_enrichment(runtime, run)
315315

316316
@staticmethod
@@ -697,20 +697,34 @@ def _validate_retry(runtime_kind: str, retry: Optional["mlrun.model.Retry"]):
697697
)
698698

699699
# TODO In ML-11600, implement token name resolution and validation + tests
700-
def _enrich_and_validate_auth_token_name(self, run: mlrun.run.RunObject):
701-
auth = run.spec.auth or {}
702-
703-
if auth.get("token_name"):
700+
def enrich_and_validate_auth_token_name(
701+
self, object: Union[mlrun.run.RunObject, mlrun.runtimes.RemoteRuntime]
702+
):
703+
if mlrun.mlconf.is_iguazio_v4_mode():
704+
if object.spec.auth is None:
705+
object.spec.auth = {}
706+
707+
# Get the provided token name, if any
708+
provided_token_name = object.spec.auth.get("token_name")
709+
710+
# Resolve token name and raise error only if token is explicitly provided by the user
711+
# in ML-11600, we will implement a proper resolution logic that checks all secret tokens
712+
# of the user and finds a valid one if no token name is provided
713+
raise_error_on_failure = bool(provided_token_name)
714+
token_name = (
715+
provided_token_name
716+
or mlrun.common.constants.MLRUN_JOB_AUTH_DEFAULT_TOKEN_NAME
717+
)
704718
self._validate_token_name(
705-
auth["token_name"],
706-
explicit=True,
719+
token_name, raise_error_on_failure=raise_error_on_failure
707720
)
708-
return
709721

710-
run.spec.auth["token_name"] = "default"
722+
object.spec.auth["token_name"] = token_name
711723

712724
# TODO implement validation in ML-11600
713-
def _validate_token_name(self, token_name: str, explicit: bool = False):
725+
def _validate_token_name(
726+
self, token_name: str, raise_error_on_failure: bool = False
727+
):
714728
pass
715729

716730

0 commit comments

Comments
 (0)