Skip to content

Commit a378226

Browse files
committed
Fail SQL gracefully if no python dependencies installed
1 parent e432961 commit a378226

File tree

8 files changed

+78
-44
lines changed

8 files changed

+78
-44
lines changed

python/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
<py4j.version>0.9.2</py4j.version>
3838
<python.test.exclude>
3939
**/PythonInterpreterWithPythonInstalledTest.java,
40-
**/PythonPandasSqlInterpreterTest.java
40+
**/PythonInterpreterPandasSqlTest.java
4141
</python.test.exclude>
4242
</properties>
4343

python/src/main/java/org/apache/zeppelin/python/PythonInterpreter.java

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -204,15 +204,20 @@ private Job getRunningJob(String paragraphId) {
204204
}
205205

206206

207-
private String sendCommandToPython(String cmd) {
207+
/**
208+
* Sends given text to Python interpreter, blocks and returns the output
209+
* @param cmd Python expression text
210+
* @return output
211+
*/
212+
String sendCommandToPython(String cmd) {
208213
String output = "";
209-
LOG.info("Sending : \n" + (cmd.length() > 200 ? cmd.substring(0, 200) + "..." : cmd));
214+
LOG.debug("Sending : \n" + (cmd.length() > 200 ? cmd.substring(0, 200) + "..." : cmd));
210215
try {
211216
output = process.sendAndGetResult(cmd);
212217
} catch (IOException e) {
213218
LOG.error("Error when sending commands to python process", e);
214219
}
215-
//logger.info("Got : \n" + output);
220+
LOG.debug("Got : \n" + output);
216221
return output;
217222
}
218223

@@ -243,11 +248,7 @@ public Integer getPy4jPort() {
243248

244249
public Boolean isPy4jInstalled() {
245250
String output = sendCommandToPython("\n\nimport py4j\n");
246-
if (output.contains("ImportError")) {
247-
return false;
248-
} else {
249-
return true;
250-
}
251+
return !output.contains("ImportError");
251252
}
252253

253254
private int findRandomOpenPortOnAllLocalInterfaces() {

python/src/main/java/org/apache/zeppelin/python/PythonPandasSqlInterpreter.java renamed to python/src/main/java/org/apache/zeppelin/python/PythonInterpreterPandasSql.java

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -33,16 +33,16 @@
3333
*
3434
* Match experience of %sparpk.sql over Spark DataFrame
3535
*/
36-
public class PythonPandasSqlInterpreter extends Interpreter {
37-
private static final Logger LOG = LoggerFactory.getLogger(PythonPandasSqlInterpreter.class);
36+
public class PythonInterpreterPandasSql extends Interpreter {
37+
private static final Logger LOG = LoggerFactory.getLogger(PythonInterpreterPandasSql.class);
3838

3939
private String SQL_BOOTSTRAP_FILE_PY = "/bootstrap_sql.py";
4040

41-
public PythonPandasSqlInterpreter(Properties property) {
41+
public PythonInterpreterPandasSql(Properties property) {
4242
super(property);
4343
}
4444

45-
private PythonInterpreter getPythonInterpreter() {
45+
PythonInterpreter getPythonInterpreter() {
4646
LazyOpenInterpreter lazy = null;
4747
PythonInterpreter python = null;
4848
Interpreter p = getInterpreterInTheSameSessionByClassName(PythonInterpreter.class.getName());
@@ -64,17 +64,23 @@ private PythonInterpreter getPythonInterpreter() {
6464
@Override
6565
public void open() {
6666
LOG.info("Open Python SQL interpreter instance: {}", this.toString());
67-
68-
//TODO(bzz): check i.e by importing and catching ImportError
69-
//if (py4jAndPandasAndPandasqlAreInstalled) {
7067
try {
7168
LOG.info("Bootstrap {} interpreter with {}", this.toString(), SQL_BOOTSTRAP_FILE_PY);
7269
PythonInterpreter python = getPythonInterpreter();
7370
python.bootStrapInterpreter(SQL_BOOTSTRAP_FILE_PY);
7471
} catch (IOException e) {
7572
LOG.error("Can't execute " + SQL_BOOTSTRAP_FILE_PY + " to import SQL dependencies", e);
7673
}
77-
//}
74+
}
75+
76+
/**
77+
* Checks if Python dependencies pandas and pandasql are installed
78+
* @return True if they are
79+
*/
80+
boolean isPandasAndPandasqlInstalled() {
81+
PythonInterpreter python = getPythonInterpreter();
82+
String output = python.sendCommandToPython("\n\nimport pandas\nimport pandasql\n");
83+
return !output.contains("ImportError");
7884
}
7985

8086
@Override

python/src/main/resources/bootstrap.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,8 @@ def show(self, p, **kwargs):
132132
# `isinstance(p, DataFrame)` would req `import pandas.core.frame.DataFrame`
133133
# and so a dependency on pandas
134134
self.show_dataframe(p, **kwargs)
135+
elif hasattr(p, '__call__'):
136+
p() #error reporting
135137

136138
def show_dataframe(self, df, **kwargs):
137139
"""Pretty prints DF using Table Display System

python/src/main/resources/bootstrap_sql.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,16 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515

16-
# This is for org.apache.zeppelin.python.PythonPandasSqlInterpreterTest
16+
# Setup SQL over Pandas DataFrames
1717
# It requires next dependencies to be installed:
18-
# - numpy
1918
# - pandas
2019
# - pandasql
2120

21+
from __future__ import print_function
2222

23-
import numpy as np
24-
import pandas as pd
25-
from pandasql import sqldf
26-
27-
pysqldf = lambda q: sqldf(q, globals())
23+
try:
24+
from pandasql import sqldf
25+
pysqldf = lambda q: sqldf(q, globals())
26+
except ImportError:
27+
pysqldf = lambda q: print("Can not run SQL over Pandas DataFrame" +
28+
"Make sure 'pandas' and 'pandasql' libraries are installed")

python/src/test/java/org/apache/zeppelin/python/PythonPandasSqlInterpreterTest.java renamed to python/src/test/java/org/apache/zeppelin/python/PythonInterpreterPandasSqlTest.java

Lines changed: 42 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -44,18 +44,19 @@
4444
* <ol>
4545
* - <li>Python</li>
4646
* - <li>NumPy</li>
47-
* - <li>Pandas DataFrame</li>
47+
* - <li>Pandas</li>
48+
* - <li>PandaSql</li>
4849
* <ol>
4950
*
5051
* To run manually on such environment, use:
5152
* <code>
5253
* mvn -Dpython.test.exclude='' test -pl python -am
5354
* </code>
5455
*/
55-
public class PythonPandasSqlInterpreterTest {
56+
public class PythonInterpreterPandasSqlTest {
5657

5758
private InterpreterGroup intpGroup;
58-
private PythonPandasSqlInterpreter sql;
59+
private PythonInterpreterPandasSql sql;
5960
private PythonInterpreter python;
6061

6162
private InterpreterContext context;
@@ -72,7 +73,7 @@ public void setUp() throws Exception {
7273
python.setInterpreterGroup(intpGroup);
7374
python.open();
7475

75-
sql = new PythonPandasSqlInterpreter(p);
76+
sql = new PythonInterpreterPandasSql(p);
7677
sql.setInterpreterGroup(intpGroup);
7778

7879
intpGroup.put("note", Arrays.asList(python, sql));
@@ -92,23 +93,47 @@ public void setUp() throws Exception {
9293
}
9394

9495
@Test
95-
public void sqlOverTestDataPrintsTable() {
96-
//given
97-
// `import pandas as pd` and `import numpy as np` done
98-
// DataFrame \w test data
99-
String expectedTable = "name\tage\n\nmoon\t33\n\npark\t34";
100-
python.interpret("df2 = pd.DataFrame({ 'age' : np.array([33, 51, 51, 34]), "+
101-
"'name' : pd.Categorical(['moon','jobs','gates','park'])})", context);
96+
public void dependenciesAreInstalled() {
97+
InterpreterResult ret = python.interpret("import pandas\nimport pandasql\nimport numpy\n", context);
98+
assertEquals(ret.message(), InterpreterResult.Code.SUCCESS, ret.code());
99+
}
100+
101+
@Test
102+
public void errorMessageIfDependenciesNotInstalled() {
103+
InterpreterResult ret;
104+
// given
105+
ret = python.interpret(
106+
"pysqldf = lambda q: print('Can not execute SQL as Python dependency is not installed')",
107+
context);
108+
assertEquals(ret.message(), InterpreterResult.Code.SUCCESS, ret.code());
109+
110+
// when
111+
ret = sql.interpret("SELECT * from something", context);
112+
113+
// then
114+
assertNotNull(ret);
115+
assertEquals(ret.message(), InterpreterResult.Code.SUCCESS, ret.code());
116+
assertTrue(ret.message().contains("dependency is not installed"));
117+
}
102118

119+
@Test
120+
public void sqlOverTestDataPrintsTable() {
121+
InterpreterResult ret;
122+
// given
123+
//String expectedTable = "name\tage\n\nmoon\t33\n\npark\t34";
124+
ret = python.interpret("import pandas as pd", context);
125+
ret = python.interpret("import numpy as np", context);
126+
// DataFrame df2 \w test data
127+
ret = python.interpret("df2 = pd.DataFrame({ 'age' : np.array([33, 51, 51, 34]), "+
128+
"'name' : pd.Categorical(['moon','jobs','gates','park'])})", context);
129+
assertEquals(ret.message(), InterpreterResult.Code.SUCCESS, ret.code());
103130

104131
//when
105-
InterpreterResult ret = sql.interpret("select name, age from df2 where age < 40", context);
132+
ret = sql.interpret("select name, age from df2 where age < 40", context);
106133

107134
//then
108-
assertEquals(InterpreterResult.Code.SUCCESS, ret.code());
109-
assertEquals(Type.TABLE, ret.type());
110-
//System.out.println(ret.message());
111-
//System.out.println(expectedTable);
135+
assertEquals(ret.message(), InterpreterResult.Code.SUCCESS, ret.code());
136+
assertEquals(ret.message(), Type.TABLE, ret.type());
112137
//assertEquals(expectedTable, ret.message()); //somehow it's same but not equal
113138
assertTrue(ret.message().indexOf("moon\t33") > 0);
114139
assertTrue(ret.message().indexOf("park\t34") > 0);
@@ -124,9 +149,8 @@ public void badSqlSyntaxFails() {
124149
//then
125150
assertNotNull("Interpreter returned 'null'", ret);
126151
//System.out.println("\nInterpreter response: \n" + ret.message());
127-
assertEquals(InterpreterResult.Code.ERROR, ret.code());
152+
assertEquals(ret.toString(), InterpreterResult.Code.ERROR, ret.code());
128153
assertTrue(ret.message().length() > 0);
129154
}
130155

131-
132156
}

python/src/test/java/org/apache/zeppelin/python/PythonInterpreterTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ private boolean tryToConnect(Socket s, Integer port) {
187187
s.connect(sa, 10000);
188188
connected = true;
189189
} catch (IOException e) {
190-
LOG.error("Can't open connection to " + sa, e);
190+
//LOG.warn("Can't open connection to " + sa, e);
191191
}
192192
return connected;
193193
}

python/src/test/java/org/apache/zeppelin/python/PythonInterpreterWithPythonInstalledTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
*
3737
* or
3838
* <code>
39-
* mvn -Dpython.test.exclude='' test -pl python
39+
* mvn -Dpython.test.exclude='' test -pl python -am
4040
* </code>
4141
*/
4242
public class PythonInterpreterWithPythonInstalledTest {

0 commit comments

Comments
 (0)