Skip to content

Commit 5fa270d

Browse files
committed
pull upstream master & fix some details
2 parents 294bea5 + 6e3a1d2 commit 5fa270d

File tree

56 files changed

+2383
-490
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

56 files changed

+2383
-490
lines changed

bin/interpreter.sh

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,28 @@ elif [[ "${INTERPRETER_ID}" == "hbase" ]]; then
149149
else
150150
echo "HBASE_HOME and HBASE_CONF_DIR are not set, configuration might not be loaded"
151151
fi
152+
elif [[ "${INTERPRETER_ID}" == "pig" ]]; then
153+
# autodetect HADOOP_CONF_HOME by heuristic
154+
if [[ -n "${HADOOP_HOME}" ]] && [[ -z "${HADOOP_CONF_DIR}" ]]; then
155+
if [[ -d "${HADOOP_HOME}/etc/hadoop" ]]; then
156+
export HADOOP_CONF_DIR="${HADOOP_HOME}/etc/hadoop"
157+
elif [[ -d "/etc/hadoop/conf" ]]; then
158+
export HADOOP_CONF_DIR="/etc/hadoop/conf"
159+
fi
160+
fi
161+
162+
if [[ -n "${HADOOP_CONF_DIR}" ]] && [[ -d "${HADOOP_CONF_DIR}" ]]; then
163+
ZEPPELIN_INTP_CLASSPATH+=":${HADOOP_CONF_DIR}"
164+
fi
165+
166+
# autodetect TEZ_CONF_DIR
167+
if [[ -n "${TEZ_CONF_DIR}" ]]; then
168+
ZEPPELIN_INTP_CLASSPATH+=":${TEZ_CONF_DIR}"
169+
elif [[ -d "/etc/tez/conf" ]]; then
170+
ZEPPELIN_INTP_CLASSPATH+=":/etc/tez/conf"
171+
else
172+
echo "TEZ_CONF_DIR is not set, configuration might not be loaded"
173+
fi
152174
fi
153175

154176
addJarInDirForIntp "${LOCAL_INTERPRETER_REPO}"

conf/interpreter-list

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ kylin org.apache.zeppelin:zeppelin-kylin:0.6.1 Kylin in
3232
lens org.apache.zeppelin:zeppelin-lens:0.6.1 Lens interpreter
3333
livy org.apache.zeppelin:zeppelin-livy:0.6.1 Livy interpreter
3434
md org.apache.zeppelin:zeppelin-markdown:0.6.1 Markdown support
35+
pig org.apache.zeppelin:zeppelin-pig:0.6.1 Pig interpreter
3536
postgresql org.apache.zeppelin:zeppelin-postgresql:0.6.1 Postgresql interpreter
3637
python org.apache.zeppelin:zeppelin-python:0.6.1 Python interpreter
3738
shell org.apache.zeppelin:zeppelin-shell:0.6.1 Shell command

conf/zeppelin-site.xml.template

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@
190190

191191
<property>
192192
<name>zeppelin.interpreters</name>
193-
<value>org.apache.zeppelin.spark.SparkInterpreter,org.apache.zeppelin.spark.PySparkInterpreter,org.apache.zeppelin.rinterpreter.RRepl,org.apache.zeppelin.rinterpreter.KnitR,org.apache.zeppelin.spark.SparkRInterpreter,org.apache.zeppelin.spark.SparkSqlInterpreter,org.apache.zeppelin.spark.DepInterpreter,org.apache.zeppelin.markdown.Markdown,org.apache.zeppelin.angular.AngularInterpreter,org.apache.zeppelin.shell.ShellInterpreter,org.apache.zeppelin.file.HDFSFileInterpreter,org.apache.zeppelin.flink.FlinkInterpreter,,org.apache.zeppelin.python.PythonInterpreter,org.apache.zeppelin.lens.LensInterpreter,org.apache.zeppelin.ignite.IgniteInterpreter,org.apache.zeppelin.ignite.IgniteSqlInterpreter,org.apache.zeppelin.cassandra.CassandraInterpreter,org.apache.zeppelin.geode.GeodeOqlInterpreter,org.apache.zeppelin.postgresql.PostgreSqlInterpreter,org.apache.zeppelin.jdbc.JDBCInterpreter,org.apache.zeppelin.kylin.KylinInterpreter,org.apache.zeppelin.elasticsearch.ElasticsearchInterpreter,org.apache.zeppelin.scalding.ScaldingInterpreter,org.apache.zeppelin.alluxio.AlluxioInterpreter,org.apache.zeppelin.hbase.HbaseInterpreter,org.apache.zeppelin.livy.LivySparkInterpreter,org.apache.zeppelin.livy.LivyPySparkInterpreter,org.apache.zeppelin.livy.LivySparkRInterpreter,org.apache.zeppelin.livy.LivySparkSQLInterpreter,org.apache.zeppelin.bigquery.BigQueryInterpreter,org.apache.zeppelin.beam.BeamInterpreter</value>
193+
<value>org.apache.zeppelin.spark.SparkInterpreter,org.apache.zeppelin.spark.PySparkInterpreter,org.apache.zeppelin.rinterpreter.RRepl,org.apache.zeppelin.rinterpreter.KnitR,org.apache.zeppelin.spark.SparkRInterpreter,org.apache.zeppelin.spark.SparkSqlInterpreter,org.apache.zeppelin.spark.DepInterpreter,org.apache.zeppelin.markdown.Markdown,org.apache.zeppelin.angular.AngularInterpreter,org.apache.zeppelin.shell.ShellInterpreter,org.apache.zeppelin.file.HDFSFileInterpreter,org.apache.zeppelin.flink.FlinkInterpreter,,org.apache.zeppelin.python.PythonInterpreter,org.apache.zeppelin.lens.LensInterpreter,org.apache.zeppelin.ignite.IgniteInterpreter,org.apache.zeppelin.ignite.IgniteSqlInterpreter,org.apache.zeppelin.cassandra.CassandraInterpreter,org.apache.zeppelin.geode.GeodeOqlInterpreter,org.apache.zeppelin.postgresql.PostgreSqlInterpreter,org.apache.zeppelin.jdbc.JDBCInterpreter,org.apache.zeppelin.kylin.KylinInterpreter,org.apache.zeppelin.elasticsearch.ElasticsearchInterpreter,org.apache.zeppelin.scalding.ScaldingInterpreter,org.apache.zeppelin.alluxio.AlluxioInterpreter,org.apache.zeppelin.hbase.HbaseInterpreter,org.apache.zeppelin.livy.LivySparkInterpreter,org.apache.zeppelin.livy.LivyPySparkInterpreter,org.apache.zeppelin.livy.LivySparkRInterpreter,org.apache.zeppelin.livy.LivySparkSQLInterpreter,org.apache.zeppelin.bigquery.BigQueryInterpreter,org.apache.zeppelin.beam.BeamInterpreter,org.apache.zeppelin.pig.PigInterpreter, org.apache.zeppelin.pig.PigQueryInterpreter</value>
194194
<description>Comma separated interpreter configurations. First interpreter become a default</description>
195195
</property>
196196

docs/_includes/themes/zeppelin/_navigation.html

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@
6262
<li><a href="{{BASE_PATH}}/interpreter/lens.html">Lens</a></li>
6363
<li><a href="{{BASE_PATH}}/interpreter/livy.html">Livy</a></li>
6464
<li><a href="{{BASE_PATH}}/interpreter/markdown.html">Markdown</a></li>
65+
<li><a href="{{BASE_PATH}}/interpreter/pig.html">Pig</a></li>
6566
<li><a href="{{BASE_PATH}}/interpreter/python.html">Python</a></li>
6667
<li><a href="{{BASE_PATH}}/interpreter/postgresql.html">Postgresql, HAWQ</a></li>
6768
<li><a href="{{BASE_PATH}}/interpreter/r.html">R</a></li>
@@ -118,8 +119,6 @@
118119
<li><a href="{{BASE_PATH}}/development/howtocontributewebsite.html">How to contribute (website)</a></li>
119120
</ul>
120121
</li>
121-
</ul>
122-
<ul class="nav navbar-nav">
123122
<li>
124123
<a href="{{BASE_PATH}}/search.html" class="nav-search-link">
125124
<span class="fa fa-search nav-search-icon"></span>

docs/assets/themes/zeppelin/css/style.css

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -619,6 +619,10 @@ and (max-width: 1024px) {
619619
.navbar-collapse.collapse {
620620
padding-right: 0;
621621
}
622+
623+
.navbar-fixed-top > .container {
624+
width: 800px;
625+
}
622626
}
623627

624628
/* master branch docs dropdown menu */

docs/interpreter/pig.md

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
---
2+
layout: page
3+
title: "Pig Interpreter for Apache Zeppelin"
4+
description: "Apache Pig is a platform for analyzing large data sets that consists of a high-level language for expressing data analysis programs, coupled with infrastructure for evaluating these programs."
5+
group: manual
6+
---
7+
{% include JB/setup %}
8+
9+
10+
# Pig Interpreter for Apache Zeppelin
11+
12+
<div id="toc"></div>
13+
14+
## Overview
15+
[Apache Pig](https://pig.apache.org/) is a platform for analyzing large data sets that consists of a high-level language for expressing data analysis programs, coupled with infrastructure for evaluating these programs. The salient property of Pig programs is that their structure is amenable to substantial parallelization, which in turns enables them to handle very large data sets.
16+
17+
## Supported interpreter type
18+
- `%pig.script` (default)
19+
20+
All the pig script can run in this type of interpreter, and display type is plain text.
21+
22+
- `%pig.query`
23+
24+
Almost the same as `%pig.script`. The only difference is that you don't need to add alias in the last statement. And the display type is table.
25+
26+
## Supported runtime mode
27+
- Local
28+
- MapReduce
29+
- Tez (Only Tez 0.7 is supported)
30+
31+
## How to use
32+
33+
### How to setup Pig
34+
35+
- Local Mode
36+
37+
Nothing needs to be done for local mode
38+
39+
- MapReduce Mode
40+
41+
HADOOP\_CONF\_DIR needs to be specified in `ZEPPELIN_HOME/conf/zeppelin-env.sh`.
42+
43+
- Tez Mode
44+
45+
HADOOP\_CONF\_DIR and TEZ\_CONF\_DIR needs to be specified in `ZEPPELIN_HOME/conf/zeppelin-env.sh`.
46+
47+
### How to configure interpreter
48+
49+
At the Interpreters menu, you have to create a new Pig interpreter. Pig interpreter has below properties by default.
50+
51+
<table class="table-configuration">
52+
<tr>
53+
<th>Property</th>
54+
<th>Default</th>
55+
<th>Description</th>
56+
</tr>
57+
<tr>
58+
<td>zeppelin.pig.execType</td>
59+
<td>mapreduce</td>
60+
<td>Execution mode for pig runtime. local | mapreduce | tez </td>
61+
</tr>
62+
<tr>
63+
<td>zeppelin.pig.includeJobStats</td>
64+
<td>false</td>
65+
<td>whether display jobStats info in <code>%pig.script</code></td>
66+
</tr>
67+
<tr>
68+
<td>zeppelin.pig.maxResult</td>
69+
<td>1000</td>
70+
<td>max row number displayed in <code>%pig.query</code></td>
71+
</tr>
72+
</table>
73+
74+
### Example
75+
76+
##### pig
77+
78+
```
79+
%pig
80+
81+
raw_data = load 'dataset/sf_crime/train.csv' using PigStorage(',') as (Dates,Category,Descript,DayOfWeek,PdDistrict,Resolution,Address,X,Y);
82+
b = group raw_data all;
83+
c = foreach b generate COUNT($1);
84+
dump c;
85+
```
86+
87+
##### pig.query
88+
89+
```
90+
%pig.query
91+
92+
b = foreach raw_data generate Category;
93+
c = group b by Category;
94+
foreach c generate group as category, COUNT($1) as count;
95+
```
96+
97+
Data is shared between `%pig` and `%pig.query`, so that you can do some common work in `%pig`, and do different kinds of query based on the data of `%pig`.

docs/manual/interpreters.md

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,3 +82,49 @@ interpreter.start()
8282
The above code will start interpreter thread inside your process. Once the interpreter is started you can configure zeppelin to connect to RemoteInterpreter by checking **Connect to existing process** checkbox and then provide **Host** and **Port** on which interpreter porocess is listening as shown in the image below:
8383

8484
<img src="../assets/themes/zeppelin/img/screenshots/existing_interpreter.png" width="450px">
85+
86+
87+
## (Experimental) Interpreter Execution Hooks
88+
89+
Zeppelin allows for users to specify additional code to be executed by an interpreter at pre and post-paragraph code execution. This is primarily useful if you need to run the same set of code for all of the paragraphs within your notebook at specific times. Currently, this feature is only available for the spark and pyspark interpreters. To specify your hook code, you may use '`z.registerHook()`. For example, enter the following into one paragraph:
90+
91+
```python
92+
%pyspark
93+
z.registerHook("post_exec", "print 'This code should be executed before the parapgraph code!'")
94+
z.registerHook("pre_exec", "print 'This code should be executed after the paragraph code!'")
95+
```
96+
97+
These calls will not take into effect until the next time you run a paragraph. In another paragraph, enter
98+
```python
99+
%pyspark
100+
print "This code should be entered into the paragraph by the user!"
101+
```
102+
103+
The output should be:
104+
```
105+
This code should be executed before the paragraph code!
106+
This code should be entered into the paragraph by the user!
107+
This code should be executed after the paragraph code!
108+
```
109+
110+
If you ever need to know the hook code, use `z.getHook()`:
111+
```python
112+
%pyspark
113+
print z.getHook("post_exec")
114+
```
115+
```
116+
print 'This code should be executed after the paragraph code!'
117+
```
118+
Any call to `z.registerHook()` will automatically overwrite what was previously registered. To completely unregister a hook event, use `z.unregisterHook(eventCode)`. Currently only `"post_exec"` and `"pre_exec"` are valid event codes for the Zeppelin Hook Registry system.
119+
120+
Finally, the hook registry is internally shared by other interpreters in the same group. This would allow for hook code for one interpreter REPL to be set by another as follows:
121+
122+
```scala
123+
%spark
124+
z.unregisterHook("post_exec", "pyspark")
125+
```
126+
The API is identical for both the spark (scala) and pyspark (python) implementations.
127+
128+
### Caveats
129+
Calls to `z.registerHook("pre_exec", ...)` should be made with care. If there are errors in your specified hook code, this will cause the interpreter REPL to become unable to execute any code pass the pre-execute stage making it impossible for direct calls to `z.unregisterHook()` to take into effect. Current workarounds include calling `z.unregisterHook()` from a different interpreter REPL in the same interpreter group (see above) or manually restarting the interpreter group in the UI.
130+

kylin/src/main/java/org/apache/zeppelin/kylin/KylinInterpreter.java

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -56,24 +56,6 @@ public class KylinInterpreter extends Interpreter {
5656
static final Pattern KYLIN_TABLE_FORMAT_REGEX_LABEL = Pattern.compile("\"label\":\"(.*?)\"");
5757
static final Pattern KYLIN_TABLE_FORMAT_REGEX = Pattern.compile("\"results\":\\[\\[\"(.*?)\"]]");
5858

59-
static {
60-
Interpreter.register(
61-
"kylin",
62-
"kylin",
63-
KylinInterpreter.class.getName(),
64-
new InterpreterPropertyBuilder()
65-
.add(KYLIN_USERNAME, "ADMIN", "username for kylin user")
66-
.add(KYLIN_PASSWORD, "KYLIN", "password for kylin user")
67-
.add(KYLIN_QUERY_API_URL, "http://<host>:<port>/kylin/api/query", "Kylin API.")
68-
.add(KYLIN_QUERY_PROJECT, "default", "kylin project name")
69-
.add(KYLIN_QUERY_OFFSET, "0", "kylin query offset")
70-
.add(KYLIN_QUERY_LIMIT, "5000", "kylin query limit")
71-
.add(KYLIN_QUERY_ACCEPT_PARTIAL, "true", "The kylin query partial flag").build());
72-
}
73-
74-
75-
76-
7759
public KylinInterpreter(Properties property) {
7860
super(property);
7961
}
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
[
2+
{
3+
"group": "kylin",
4+
"name": "kylin",
5+
"className": "org.apache.zeppelin.kylin.KylinInterpreter",
6+
"properties": {
7+
"kylin.api.url": {
8+
"envName": null,
9+
"propertyName": "kylin.api.url",
10+
"defaultValue": "http://localhost:7070/kylin/api/query",
11+
"description": "Kylin API"
12+
},
13+
"kylin.api.user": {
14+
"envName": null,
15+
"propertyName": "kylin.api.user",
16+
"defaultValue": "ADMIN",
17+
"description": "username for kylin user"
18+
},
19+
"kylin.api.password": {
20+
"envName": null,
21+
"propertyName": "kylin.api.password",
22+
"defaultValue": "KYLIN",
23+
"description": "password for kylin user"
24+
},
25+
"kylin.query.project": {
26+
"envName": null,
27+
"propertyName": "kylin.query.project",
28+
"defaultValue": "default",
29+
"description": "kylin project name"
30+
},
31+
"kylin.query.offset": {
32+
"envName": null,
33+
"propertyName": "kylin.query.offset",
34+
"defaultValue": "0",
35+
"description": "kylin query offset"
36+
},
37+
"kylin.query.limit": {
38+
"envName": null,
39+
"propertyName": "kylin.query.limit",
40+
"defaultValue": "5000",
41+
"description": "kylin query limit"
42+
},
43+
"kylin.query.ispartial": {
44+
"envName": null,
45+
"propertyName": "kylin.query.ispartial",
46+
"defaultValue": "true",
47+
"description": "The kylin query partial flag"
48+
}
49+
},
50+
"editor": {
51+
"language": "sql"
52+
}
53+
}
54+
]

kylin/src/test/java/KylinInterpreterTest.java

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,7 @@
2020
import org.apache.http.message.AbstractHttpMessage;
2121
import org.apache.zeppelin.interpreter.InterpreterResult;
2222
import org.apache.zeppelin.kylin.KylinInterpreter;
23-
import org.junit.After;
24-
import org.junit.Before;
23+
import org.junit.BeforeClass;
2524
import org.junit.Test;
2625

2726
import java.io.ByteArrayInputStream;
@@ -33,19 +32,23 @@
3332

3433
import static org.junit.Assert.assertEquals;
3534

36-
3735
public class KylinInterpreterTest {
38-
@Before
39-
public void setUp() throws Exception {
40-
}
36+
static final Properties kylinProperties = new Properties();
4137

42-
@After
43-
public void tearDown() throws Exception {
38+
@BeforeClass
39+
public static void setUpClass() {
40+
kylinProperties.put("kylin.api.url", "http://localhost:7070/kylin/api/query");
41+
kylinProperties.put("kylin.api.user", "ADMIN");
42+
kylinProperties.put("kylin.api.password", "KYLIN");
43+
kylinProperties.put("kylin.query.project", "default");
44+
kylinProperties.put("kylin.query.offset", "0");
45+
kylinProperties.put("kylin.query.limit", "5000");
46+
kylinProperties.put("kylin.query.ispartial", "true");
4447
}
4548

4649
@Test
4750
public void test(){
48-
KylinInterpreter t = new MockKylinInterpreter(new Properties());
51+
KylinInterpreter t = new MockKylinInterpreter(kylinProperties);
4952
InterpreterResult result = t.interpret(
5053
"select a.date,sum(b.measure) as measure from kylin_fact_table a " +
5154
"inner join kylin_lookup_table b on a.date=b.date group by a.date", null);
@@ -198,4 +201,4 @@ public boolean isStreaming() {
198201
public void consumeContent() throws IOException {
199202

200203
}
201-
}
204+
}

0 commit comments

Comments
 (0)