apache
diff --git a/‎.travis.yml‎
Lines changed: 19 additions & 6 deletions b/‎.travis.yml‎
Lines changed: 19 additions & 6 deletions
diff --git a/‎LICENSE‎
Lines changed: 13 additions & 1 deletion b/‎LICENSE‎
Lines changed: 13 additions & 1 deletion
diff --git a/‎bin/interpreter.sh‎
Lines changed: 6 additions & 1 deletion b/‎bin/interpreter.sh‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎conf/zeppelin-site.xml.template‎
Lines changed: 1 addition & 1 deletion b/‎conf/zeppelin-site.xml.template‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/interpreter/r.md‎
Lines changed: 100 additions & 0 deletions b/‎docs/interpreter/r.md‎
Lines changed: 100 additions & 0 deletions
diff --git a/‎docs/interpreter/screenshots/backtoscala.png‎
35.5 KB b/‎docs/interpreter/screenshots/backtoscala.png‎
35.5 KB
diff --git a/‎docs/interpreter/screenshots/knitgeo.png‎
58.2 KB b/‎docs/interpreter/screenshots/knitgeo.png‎
58.2 KB
diff --git a/‎docs/interpreter/screenshots/knitmotion.png‎
32.7 KB b/‎docs/interpreter/screenshots/knitmotion.png‎
32.7 KB
diff --git a/‎docs/interpreter/screenshots/knitstock.png‎
106 KB b/‎docs/interpreter/screenshots/knitstock.png‎
106 KB
diff --git a/‎docs/interpreter/screenshots/repl2plus2.png‎
12.8 KB b/‎docs/interpreter/screenshots/repl2plus2.png‎
12.8 KB
@@ -16,23 +16,24 @@
 language: java
 
 sudo: false
+
 cache:
   directories:
     - .spark-dist
-
+  
 matrix:
   include:
     # Test all modules
     - jdk: "oraclejdk7"
-      env: SPARK_VER="1.6.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.6 -Phadoop-2.3 -Ppyspark -Pscalding" BUILD_FLAG="package -Pbuild-distr" TEST_FLAG="verify -Pusing-packaged-distr" TEST_PROJECTS=""
+      env: SPARK_VER="1.6.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.6 -Pr -Phadoop-2.3 -Ppyspark -Pscalding" BUILD_FLAG="package -Pbuild-distr" TEST_FLAG="verify -Pusing-packaged-distr" TEST_PROJECTS=""
 
     # Test spark module for 1.5.2
     - jdk: "oraclejdk7"
-      env: SPARK_VER="1.5.2" HADOOP_VER="2.3" PROFILE="-Pspark-1.5 -Phadoop-2.3 -Ppyspark" BUILD_FLAG="package -DskipTests" TEST_FLAG="verify" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark* -DfailIfNoTests=false"
+      env: SPARK_VER="1.5.2" HADOOP_VER="2.3" PROFILE="-Pspark-1.5 -Pr -Phadoop-2.3 -Ppyspark" BUILD_FLAG="package -DskipTests" TEST_FLAG="verify" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark,r -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark* -DfailIfNoTests=false"
 
     # Test spark module for 1.4.1
     - jdk: "oraclejdk7"
-      env: SPARK_VER="1.4.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.4 -Phadoop-2.3 -Ppyspark" BUILD_FLAG="package -DskipTests" TEST_FLAG="verify" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark* -DfailIfNoTests=false"
+      env: SPARK_VER="1.4.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.4 -Pr -Phadoop-2.3 -Ppyspark" BUILD_FLAG="package -DskipTests" TEST_FLAG="verify" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark,r -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark* -DfailIfNoTests=false"
 
     # Test spark module for 1.3.1
     - jdk: "oraclejdk7"
@@ -46,12 +47,24 @@ matrix:
     - jdk: "oraclejdk7"
       env: SPARK_VER="1.1.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.1 -Phadoop-2.3 -Ppyspark" BUILD_FLAG="package -DskipTests" TEST_FLAG="verify" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark* -DfailIfNoTests=false"
 
-    # Test selenium with spark module for 1.6.0
+    # Test selenium with spark module for 1.6.1
     - jdk: "oraclejdk7"
-      env: TEST_SELENIUM="true" SPARK_VER="1.6.0" HADOOP_VER="2.3" PROFILE="-Pspark-1.6 -Phadoop-2.3 -Ppyspark" BUILD_FLAG="package -DskipTests" TEST_FLAG="verify" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark -Dtest=org.apache.zeppelin.AbstractFunctionalSuite -DfailIfNoTests=false"
+      env: TEST_SELENIUM="true" SPARK_VER="1.6.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.6 -Phadoop-2.3 -Ppyspark" BUILD_FLAG="package -DskipTests" TEST_FLAG="verify" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark -Dtest=org.apache.zeppelin.AbstractFunctionalSuite -DfailIfNoTests=false"
+
+addons:
+  apt:
+    sources:
+    - r-packages-precise
+    packages:
+    - r-base-dev
+    - r-cran-evaluate
+    - r-cran-base64enc
 
 before_install:
   - "ls -la .spark-dist"
+  - mkdir -p ~/R
+  - R -e "install.packages('knitr', repos = 'http://cran.us.r-project.org', lib='~/R')"
+  - export R_LIBS='~/R'
   - "export DISPLAY=:99.0"
   - "sh -e /etc/init.d/xvfb start"
 
 
@@ -244,4 +244,16 @@ Apache licenses
 The following components are provided under the Apache License. See project link for details.
 The text of each license is also included at licenses/LICENSE-[project]-[version].txt.
 
-    (Apache 2.0) Bootstrap v3.0.2 (http://getbootstrap.com/) - https://github.com/twbs/bootstrap/blob/v3.0.2/LICENSE
+    (Apache 2.0) Bootstrap v3.0.2 (http://getbootstrap.com/) - https://github.com/twbs/bootstrap/blob/v3.0.2/LICENSE
+
+========================================================================
+BSD 3-Clause licenses
+========================================================================
+The following components are provided under the BSD 3-Clause license.  See file headers and project links for details.
+
+  (BSD 3 Clause) portions of rscala 1.0.6 (https://dahl.byu.edu/software/rscala/) - https://cran.r-project.org/web/packages/rscala/index.html
+   r/R/rzeppelin/R/{common.R, globals.R,protocol.R,rServer.R,scalaInterpreter.R,zzz.R }
+   r/src/main/scala/org/apache/zeppelin/rinterpreter/rscala/{Package.scala, RClient.scala}
+
+  (BSD 3 Clause) portions of Scala (http://www.scala-lang.org/download) - http://www.scala-lang.org/download/#License
+   r/src/main/scala/scala/Console.scala
@@ -85,7 +85,10 @@ if [[ "${INTERPRETER_ID}" == "spark" ]]; then
     export SPARK_SUBMIT="${SPARK_HOME}/bin/spark-submit"
     SPARK_APP_JAR="$(ls ${ZEPPELIN_HOME}/interpreter/spark/zeppelin-spark*.jar)"
     # This will evantually passes SPARK_APP_JAR to classpath of SparkIMain
-    ZEPPELIN_CLASSPATH+=${SPARK_APP_JAR}
+    ZEPPELIN_CLASSPATH=${SPARK_APP_JAR}
+    # Need to add the R Interpreter
+    RZEPPELINPATH="$(ls ${ZEPPELIN_HOME}/interpreter/spark/zeppelin-zr*.jar)"
+    ZEPPELIN_CLASSPATH="${ZEPPELIN_CLASSPATH}:${RZEPPELINPATH}"
 
     pattern="$SPARK_HOME/python/lib/py4j-*-src.zip"
     py4j=($pattern)
@@ -130,6 +133,8 @@ if [[ "${INTERPRETER_ID}" == "spark" ]]; then
       ZEPPELIN_CLASSPATH+=":${HADOOP_CONF_DIR}"
     fi
 
+    RZEPPELINPATH="$(ls ${ZEPPELIN_HOME}/interpreter/spark/zeppelin-zr*.jar)"
+    ZEPPELIN_CLASSPATH="${ZEPPELIN_CLASSPATH}:${RZEPPELINPATH}"
     export SPARK_CLASSPATH+=":${ZEPPELIN_CLASSPATH}"
   fi
 elif [[ "${INTERPRETER_ID}" == "hbase" ]]; then
 
@@ -144,7 +144,7 @@
 
 <property>
   <name>zeppelin.interpreters</name>
-  <value>org.apache.zeppelin.spark.SparkInterpreter,org.apache.zeppelin.spark.PySparkInterpreter,org.apache.zeppelin.spark.SparkSqlInterpreter,org.apache.zeppelin.spark.DepInterpreter,org.apache.zeppelin.markdown.Markdown,org.apache.zeppelin.angular.AngularInterpreter,org.apache.zeppelin.shell.ShellInterpreter,org.apache.zeppelin.hive.HiveInterpreter,org.apache.zeppelin.tajo.TajoInterpreter,org.apache.zeppelin.file.HDFSFileInterpreter,org.apache.zeppelin.flink.FlinkInterpreter,org.apache.zeppelin.lens.LensInterpreter,org.apache.zeppelin.ignite.IgniteInterpreter,org.apache.zeppelin.ignite.IgniteSqlInterpreter,org.apache.zeppelin.cassandra.CassandraInterpreter,org.apache.zeppelin.geode.GeodeOqlInterpreter,org.apache.zeppelin.postgresql.PostgreSqlInterpreter,org.apache.zeppelin.jdbc.JDBCInterpreter,org.apache.zeppelin.phoenix.PhoenixInterpreter,org.apache.zeppelin.kylin.KylinInterpreter,org.apache.zeppelin.elasticsearch.ElasticsearchInterpreter,org.apache.zeppelin.scalding.ScaldingInterpreter,org.apache.zeppelin.alluxio.AlluxioInterpreter,org.apache.zeppelin.hbase.HbaseInterpreter</value>
+  <value>org.apache.zeppelin.spark.SparkInterpreter,org.apache.zeppelin.spark.PySparkInterpreter,org.apache.zeppelin.spark.SparkSqlInterpreter,org.apache.zeppelin.spark.DepInterpreter,org.apache.zeppelin.markdown.Markdown,org.apache.zeppelin.angular.AngularInterpreter,org.apache.zeppelin.shell.ShellInterpreter,org.apache.zeppelin.hive.HiveInterpreter,org.apache.zeppelin.tajo.TajoInterpreter,org.apache.zeppelin.file.HDFSFileInterpreter,org.apache.zeppelin.flink.FlinkInterpreter,org.apache.zeppelin.lens.LensInterpreter,org.apache.zeppelin.ignite.IgniteInterpreter,org.apache.zeppelin.ignite.IgniteSqlInterpreter,org.apache.zeppelin.cassandra.CassandraInterpreter,org.apache.zeppelin.geode.GeodeOqlInterpreter,org.apache.zeppelin.postgresql.PostgreSqlInterpreter,org.apache.zeppelin.jdbc.JDBCInterpreter,org.apache.zeppelin.phoenix.PhoenixInterpreter,org.apache.zeppelin.kylin.KylinInterpreter,org.apache.zeppelin.elasticsearch.ElasticsearchInterpreter,org.apache.zeppelin.scalding.ScaldingInterpreter,org.apache.zeppelin.alluxio.AlluxioInterpreter,org.apache.zeppelin.hbase.HbaseInterpreter,org.apache.zeppelin.rinterpreter.KnitR,org.apache.zeppelin.rinterpreter.RRepl</value>
   <description>Comma separated interpreter configurations. First interpreter become a default</description>
 </property>
 
 
@@ -0,0 +1,100 @@
+---
+layout: page
+title: "R Interpreter"
+description: ""
+group: manual
+---
+{% include JB/setup %}
+
+## R Interpreter
+
+This is a the Apache (incubating) Zeppelin project, with the addition of support for the R programming language and R-spark integration.
+
+### Requirements
+
+Additional requirements for the R interpreter are:
+
+ * R 3.1 or later (earlier versions may work, but have not been tested)
+ * The `evaluate` R package. 
+ 
+For full R support, you will also need the following R packages:
+ 
+ * `knitr` 
+ * `repr` -- available with `devtools::install_github("IRkernel/repr")`
+ * `htmltools` -- required for some interactive plotting
+ * `base64enc` -- required to view R base plots
+
+### Configuration 
+
+To run Zeppelin with the R Interpreter, the SPARK_HOME environment variable must be set. The best way to do this is by editing `conf/zeppelin-env.sh`. 
+
+If it is not set, the R Interpreter will not be able to interface with Spark. 
+
+You should also copy `conf/zeppelin-site.xml.template` to `conf/zeppelin-site.xml`.  That will ensure that Zeppelin sees the R Interpreter the first time it starts up. 
+
+### Using the R Interpreter
+
+By default, the R Interpreter appears as two Zeppelin Interpreters, `%r` and `%knitr`. 
+
+`%r` will behave like an ordinary REPL.  You can execute commands as in the CLI.   
+
+[![2+2](screenshots/repl2plus2.png)](screenshots/repl2plus2.png)
+
+R base plotting is fully supported
+
+[![replhist](screenshots/replhist.png)](screenshots/replhist.png)
+
+If you return a data.frame, Zeppelin will attempt to display it using Zeppelin's built-in visualizations. 
+
+[![replhist](screenshots/replhead.png)](screenshots/replhead.png)
+
+`%knitr` interfaces directly against `knitr`, with chunk options on the first line:
+
+[![knitgeo](screenshots/knitgeo.png)](screenshots/knitgeo.png)
+[![knitstock](screenshots/knitstock.png)](screenshots/knitstock.png)
+[![knitmotion](screenshots/knitmotion.png)](screenshots/knitmotion.png)
+
+The two interpreters share the same environment.  If you define a variable from `%r`, it will be within-scope if you then make a call using `knitr`.
+
+### Using SparkR & Moving Between Languages
+
+If `SPARK_HOME` is set, the `SparkR` package will be loaded automatically:
+
+[![sparkrfaithful](screenshots/sparkrfaithful.png)](screenshots/sparkrfaithful.png)
+ 
+The Spark Context and SQL Context are created and injected into the local environment automatically as `sc` and `sql`.
+
+The same context are shared with the `%spark`, `%sql` and `%pyspark` interpreters:
+
+[![backtoscala](screenshots/backtoscala.png)](screenshots/backtoscala.png)
+
+You can also make an ordinary R variable accessible in scala and Python:
+
+[![varr1](screenshots/varr1.png)](screenshots/varr1.png)
+
+And vice versa:
+
+[![varscala](screenshots/varscala.png)](screenshots/varscala.png)
+[![varr2](screenshots/varr2.png)](screenshots/varr2.png)
+
+### Caveats & Troubleshooting
+
+* Almost all issues with the R interpreter turned out to be caused by an incorrectly set `SPARK_HOME`.  The R interpreter must load a version of the `SparkR` package that matches the running version of Spark, and it does this by searching `SPARK_HOME`. If Zeppelin isn't configured to interface with Spark in `SPARK_HOME`, the R interpreter will not be able to connect to Spark. 
+
+* The `knitr` environment is persistent. If you run a chunk from Zeppelin that changes a variable, then run the same chunk again, the variable has already been changed.  Use immutable variables. 
+
+* (Note that `%spark.r` and `$r` are two different ways of calling the same interpreter, as are `%spark.knitr` and `%knitr`. By default, Zeppelin puts the R interpreters in the `%spark.` Interpreter Group.
+
+* Using the `%r` interpreter, if you return a data.frame, HTML, or an image, it will dominate the result. So if you execute three commands, and one is `hist()`, all you will see is the histogram, not the results of the other commands. This is a Zeppelin limitation.
+
+* If you return a data.frame (for instance, from calling `head()`) from the `%spark.r` interpreter, it will be parsed by Zeppelin's built-in data visualization system.  
+
+* Why `knitr` Instead of `rmarkdown`?  Why no `htmlwidgets`?  In order to support `htmlwidgets`, which has indirect dependencies, `rmarkdown` uses `pandoc`, which requires writing to and reading from disc.  This makes it many times slower than `knitr`, which can operate entirely in RAM. 
+
+* Why no `ggvis` or `shiny`?  Supporting `shiny` would require integrating a reverse-proxy into Zeppelin, which is a task. 
+
+* Max OS X & case-insensitive filesystem.  If you try to install on a case-insensitive filesystem, which is the Mac OS X default, maven can unintentionally delete the install directory because `r` and `R` become the same subdirectory. 
+
+* Error `unable to start device X11` with the repl interpreter.  Check your shell login scripts to see if they are adjusting the `DISPLAY` environment variable.  This is common on some operating systems as a workaround for ssh issues, but can interfere with R plotting. 
+
+* akka Library Version or `TTransport` errors.  This can happen if you try to run Zeppelin with a SPARK_HOME that has a version of Spark other than the one specified with `-Pspark-1.x` when Zeppelin was compiled.