Skip to content

Migrate environment variables used in MLflow (e.g. MLFLOW_S3_ENDPOINT_URL) to the mlflow.environment_variables module #6379

@harupy

Description

@harupy

In #5745, we added the mlflow.environment_variables module to define environment variables used in MLflow in one place and provide a document for them. We should migrate the environment variables in the table below to this module as we did in #6375.

Name Location Assignee PR
MLFLOW_TRACKING_DIR
_TRACKING_DIR_ENV_VAR = "MLFLOW_TRACKING_DIR"
MLFLOW_ENABLE_DBFS_FUSE_ARTIFACT_REPO
USE_FUSE_ENV_VAR = "MLFLOW_ENABLE_DBFS_FUSE_ARTIFACT_REPO"
MLFLOW_DISABLE_ENV_CREATION
DISABLE_ENV_CREATION = "MLFLOW_DISABLE_ENV_CREATION"
MLFLOW_DEPLOYMENT_FLAVOR_NAME
DEPLOYMENT_CONFIG_KEY_FLAVOR_NAME = "MLFLOW_DEPLOYMENT_FLAVOR_NAME"
MLFLOW_PIPELINES_PROFILE
_PIPELINE_PROFILE_ENV_VAR = "MLFLOW_PIPELINES_PROFILE"
MLFLOW_PIPELINES_EXECUTION_DIRECTORY
_MLFLOW_PIPELINES_EXECUTION_DIRECTORY_ENV_VAR = "MLFLOW_PIPELINES_EXECUTION_DIRECTORY"
MLFLOW_SAGEMAKER_DEPLOY_IMG_URL
IMAGE_NAME_ENV_VAR = "MLFLOW_SAGEMAKER_DEPLOY_IMG_URL"
MLFLOW_EXPERIMENT_NAME
_RUN_ID_ENV_VAR = "MLFLOW_RUN_ID"
MLFLOW_EXPERIMENT_ID
_EXPERIMENT_NAME_ENV_VAR = "MLFLOW_EXPERIMENT_NAME"
MLFLOW_RUN_ID
_active_run_stack = []
MLFLOW_RUN_CONTEXT
MLFLOW_RUN_CONTEXT_ENV_VAR = "MLFLOW_RUN_CONTEXT"
MLFLOW_TRACKING_USERNAME
_TRACKING_USERNAME_ENV_VAR = "MLFLOW_TRACKING_USERNAME"
MLFLOW_TRACKING_URI
_TRACKING_URI_ENV_VAR = "MLFLOW_TRACKING_URI"
MLFLOW_TRACKING_SERVER_CERT_PATH
_TRACKING_SERVER_CERT_PATH_ENV_VAR = "MLFLOW_TRACKING_SERVER_CERT_PATH"
MLFLOW_TRACKING_INSECURE_TLS
_TRACKING_INSECURE_TLS_ENV_VAR = "MLFLOW_TRACKING_INSECURE_TLS"
MLFLOW_TRACKING_TOKEN
_TRACKING_TOKEN_ENV_VAR = "MLFLOW_TRACKING_TOKEN"
MLFLOW_TRACKING_CLIENT_CERT_PATH
_TRACKING_CLIENT_CERT_PATH_ENV_VAR = "MLFLOW_TRACKING_CLIENT_CERT_PATH"
MLFLOW_TRACKING_PASSWORD
_TRACKING_PASSWORD_ENV_VAR = "MLFLOW_TRACKING_PASSWORD"
MLFLOW_CONDA_HOME
MLFLOW_CONDA_HOME = "MLFLOW_CONDA_HOME"
MLFLOW_CONDA_CREATE_ENV_CMD
MLFLOW_CONDA_CREATE_ENV_CMD = "MLFLOW_CONDA_CREATE_ENV_CMD"
MLFLOW_ENV_ROOT
_MLFLOW_ENV_ROOT_ENV_VAR = "MLFLOW_ENV_ROOT"
MLFLOW_AUTOLOGGING_TESTING
_AUTOLOGGING_TEST_MODE_ENV_VAR = "MLFLOW_AUTOLOGGING_TESTING"
MLFLOW_AUTOLOGGING_TESTING
_AUTOLOGGING_TEST_MODE_ENV_VAR = "MLFLOW_AUTOLOGGING_TESTING"
Code toe generate this table
import ast
from pathlib import Path
from typing import Iterable, Set, Dict, Tuple
import re
from collections import defaultdict
import pandas as pd


def iter_python_scripts(root: str) -> Iterable[Path]:
    for p in Path(root).rglob("*"):
        if p.name.endswith(".py"):
            yield p


def read_file(path: Path) -> str:
    return path.read_text()


class Visitor(ast.NodeVisitor):
    def __init__(self) -> None:
        super().__init__()
        self.nodes: Set[str] = set()

    def visit_Assign(self, node: ast.Assign):
        if isinstance(node.value, ast.Str) and re.match(r"^MLFLOW_[A-Z0-9_]+$", node.value.s):
            self.nodes.add((node.value.s, node.lineno))
        self.generic_visit(node)


def main() -> None:
    envs: Dict[str, Set(Tuple(str, int))] = dict()
    for d in ["mlflow"]:
        for path in iter_python_scripts(d):
            if str(path) == "mlflow/environment_variables.py":
                continue
            visitor = Visitor()
            src = read_file(path)
            root = ast.parse(src)
            visitor.visit(root)
            if visitor.nodes:
                envs[str(path)] = visitor.nodes

    data = []
    for path, vals in envs.items():
        for (name, lineno) in vals:
            data.append(
                (
                    f"`{name}`",
                    "https://github.com/mlflow/mlflow/blob/{}/{}#L{}".format(
                        "1b89add428da0a4453e7523a20d13f04f6291f37", path, lineno
                    ),
                    "",
                    "",
                )
            )

    print(
        pd.DataFrame(data, columns=["Name", "Location", "Assignee", "PR"]).to_markdown(index=False)
    )


if __name__ == "__main__":
    main()
Old table
Location Assignee PR

mlflow/mlflow/data.py

Lines 37 to 38 in c983134

endpoint_url = os.environ.get("MLFLOW_S3_ENDPOINT_URL")
ignore_tls = os.environ.get("MLFLOW_S3_IGNORE_TLS")
WON'T DO. It looks like _fetch_s3 is not used at all -
s3_file_upload_extra_args = os.environ.get("MLFLOW_S3_UPLOAD_EXTRA_ARGS")
@ahlag #6438
envs = {
"MLFLOW_KERBEROS_TICKET_CACHE": os.environ.get("MLFLOW_KERBEROS_TICKET_CACHE"),
"MLFLOW_KERBEROS_USER": os.environ.get("MLFLOW_KERBEROS_USER"),
"MLFLOW_PYARROW_EXTRA_CONF": os.environ.get("MLFLOW_PYARROW_EXTRA_CONF"),
}
@ahlag #6438
MLFLOW_SQLALCHEMYSTORE_POOL_SIZE = "MLFLOW_SQLALCHEMYSTORE_POOL_SIZE"
MLFLOW_SQLALCHEMYSTORE_POOL_RECYCLE = "MLFLOW_SQLALCHEMYSTORE_POOL_RECYCLE"
MLFLOW_SQLALCHEMYSTORE_MAX_OVERFLOW = "MLFLOW_SQLALCHEMYSTORE_MAX_OVERFLOW"
@harupy #6396

(I'll add more environment variables to the table.)

Example PR:

#6375

Metadata

Metadata

Assignees

Labels

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions