|
1 | 1 | from __future__ import annotations |
2 | 2 |
|
3 | 3 | import logging |
4 | | -from typing import Dict, Union |
| 4 | +from typing import Dict, Optional, Union, cast |
5 | 5 |
|
6 | 6 | logger = logging.getLogger('databricks.sdk') |
7 | 7 | is_local_implementation = True |
@@ -86,23 +86,97 @@ def inner() -> Dict[str, str]: |
86 | 86 | _globals[var] = userNamespaceGlobals[var] |
87 | 87 | is_local_implementation = False |
88 | 88 | except ImportError: |
89 | | - from typing import cast |
90 | | - |
91 | 89 | # OSS implementation |
92 | 90 | is_local_implementation = True |
93 | 91 |
|
94 | | - from databricks.sdk.dbutils import RemoteDbUtils |
| 92 | + for var in dbruntime_objects: |
| 93 | + globals()[var] = None |
95 | 94 |
|
96 | | - from . import dbutils_stub |
| 95 | + # The next few try-except blocks are for initialising globals in a best effort |
| 96 | + # mannaer. We separate them to try to get as many of them working as possible |
| 97 | + try: |
| 98 | + # We expect this to fail and only do this for providing types |
| 99 | + from pyspark.sql.context import SQLContext |
| 100 | + sqlContext: SQLContext = None # type: ignore |
| 101 | + table = sqlContext.table |
| 102 | + except Exception as e: |
| 103 | + logging.debug(f"Failed to initialize globals 'sqlContext' and 'table', continuing. Cause: {e}") |
97 | 104 |
|
98 | | - dbutils_type = Union[dbutils_stub.dbutils, RemoteDbUtils] |
| 105 | + try: |
| 106 | + from pyspark.sql.functions import udf # type: ignore |
| 107 | + except ImportError as e: |
| 108 | + logging.debug(f"Failed to initialise udf global: {e}") |
99 | 109 |
|
100 | 110 | try: |
101 | | - from .stub import * |
102 | | - except (ImportError, NameError): |
103 | | - # this assumes that all environment variables are set |
104 | | - dbutils = RemoteDbUtils() |
| 111 | + from databricks.connect import DatabricksSession # type: ignore |
| 112 | + spark = DatabricksSession.builder.getOrCreate() |
| 113 | + sql = spark.sql # type: ignore |
| 114 | + except Exception as e: |
| 115 | + # We are ignoring all failures here because user might want to initialize |
| 116 | + # spark session themselves and we don't want to interfere with that |
| 117 | + logging.debug(f"Failed to initialize globals 'spark' and 'sql', continuing. Cause: {e}") |
105 | 118 |
|
| 119 | + try: |
| 120 | + # We expect this to fail locally since dbconnect does not support sparkcontext. This is just for typing |
| 121 | + sc = spark.sparkContext |
| 122 | + except Exception as e: |
| 123 | + logging.debug(f"Failed to initialize global 'sc', continuing. Cause: {e}") |
| 124 | + |
| 125 | + def display(input=None, *args, **kwargs) -> None: # type: ignore |
| 126 | + """ |
| 127 | + Display plots or data. |
| 128 | + Display plot: |
| 129 | + - display() # no-op |
| 130 | + - display(matplotlib.figure.Figure) |
| 131 | + Display dataset: |
| 132 | + - display(spark.DataFrame) |
| 133 | + - display(list) # if list can be converted to DataFrame, e.g., list of named tuples |
| 134 | + - display(pandas.DataFrame) |
| 135 | + - display(koalas.DataFrame) |
| 136 | + - display(pyspark.pandas.DataFrame) |
| 137 | + Display any other value that has a _repr_html_() method |
| 138 | + For Spark 2.0 and 2.1: |
| 139 | + - display(DataFrame, streamName='optional', trigger=optional pyspark.sql.streaming.Trigger, |
| 140 | + checkpointLocation='optional') |
| 141 | + For Spark 2.2+: |
| 142 | + - display(DataFrame, streamName='optional', trigger=optional interval like '1 second', |
| 143 | + checkpointLocation='optional') |
| 144 | + """ |
| 145 | + # Import inside the function so that imports are only triggered on usage. |
| 146 | + from IPython import display as IPDisplay |
| 147 | + return IPDisplay.display(input, *args, **kwargs) # type: ignore |
| 148 | + |
| 149 | + def displayHTML(html) -> None: # type: ignore |
| 150 | + """ |
| 151 | + Display HTML data. |
| 152 | + Parameters |
| 153 | + ---------- |
| 154 | + data : URL or HTML string |
| 155 | + If data is a URL, display the resource at that URL, the resource is loaded dynamically by the browser. |
| 156 | + Otherwise data should be the HTML to be displayed. |
| 157 | + See also: |
| 158 | + IPython.display.HTML |
| 159 | + IPython.display.display_html |
| 160 | + """ |
| 161 | + # Import inside the function so that imports are only triggered on usage. |
| 162 | + from IPython import display as IPDisplay |
| 163 | + return IPDisplay.display_html(html, raw=True) # type: ignore |
| 164 | + |
| 165 | + # We want to propagate the error in initialising dbutils because this is a core |
| 166 | + # functionality of the sdk |
| 167 | + from databricks.sdk.dbutils import RemoteDbUtils |
| 168 | + |
| 169 | + from . import dbutils_stub |
| 170 | + dbutils_type = Union[dbutils_stub.dbutils, RemoteDbUtils] |
| 171 | + |
| 172 | + dbutils = RemoteDbUtils() |
106 | 173 | dbutils = cast(dbutils_type, dbutils) |
107 | 174 |
|
108 | | -__all__ = ['dbutils'] if is_local_implementation else dbruntime_objects |
| 175 | + # We do this to prevent importing widgets implementation prematurely |
| 176 | + # The widget import should prompt users to use the implementation |
| 177 | + # which has ipywidget support. |
| 178 | + def getArgument(name: str, defaultValue: Optional[str] = None): |
| 179 | + return dbutils.widgets.getArgument(name, defaultValue) |
| 180 | + |
| 181 | + |
| 182 | +__all__ = dbruntime_objects |
0 commit comments