fix r and python

YY-OnCall · YY-OnCall · commit 2ca5a7456f7d · 2017-06-13T18:01:10.000-07:00
diff --git a/R/pkg/R/mllib_classification.R b/R/pkg/R/mllib_classification.R
@@ -58,6 +58,8 @@ setClass("NaiveBayesModel", representation(jobj = "jobj"))
 #' @param regParam The regularization parameter. Only supports L2 regularization currently.
 #' @param maxIter Maximum iteration number.
 #' @param tol Convergence tolerance of iterations.
+#' @param solver solver parameter, supported options: "owlqn" or "l-bfgs".
+#' @param loss loss function, supported options: "hinge" and "squared_hinge".
 #' @param standardization Whether to standardize the training features before fitting the model. The coefficients
 #'                        of models will be always returned on the original scale, so it will be transparent for
 #'                        users. Note that with/without standardization, the models should be always converged
@@ -96,7 +98,8 @@ setClass("NaiveBayesModel", representation(jobj = "jobj"))
 #' @note spark.svmLinear since 2.2.0
 setMethod("spark.svmLinear", signature(data = "SparkDataFrame", formula = "formula"),
           function(data, formula, regParam = 0.0, maxIter = 100, tol = 1E-6, standardization = TRUE,
-                   threshold = 0.0, weightCol = NULL, aggregationDepth = 2) {
+                   threshold = 0.0, weightCol = NULL, aggregationDepth = 2, solver = "l-bfgs",
+                   loss = "squared_hinge") {
             formula <- paste(deparse(formula), collapse = "")
 
             if (!is.null(weightCol) && weightCol == "") {
@@ -108,7 +111,8 @@ setMethod("spark.svmLinear", signature(data = "SparkDataFrame", formula = "formu
             jobj <- callJStatic("org.apache.spark.ml.r.LinearSVCWrapper", "fit",
                                 data@sdf, formula, as.numeric(regParam), as.integer(maxIter),
                                 as.numeric(tol), as.logical(standardization), as.numeric(threshold),
-                                weightCol, as.integer(aggregationDepth))
+                                weightCol, as.integer(aggregationDepth), as.character(solver),
+                                as.character(loss))
             new("LinearSVCModel", jobj = jobj)
           })
 
diff --git a/R/pkg/tests/fulltests/test_mllib_classification.R b/R/pkg/tests/fulltests/test_mllib_classification.R
@@ -30,7 +30,8 @@ absoluteSparkPath <- function(x) {
 test_that("spark.svmLinear", {
   df <- suppressWarnings(createDataFrame(iris))
   training <- df[df$Species %in% c("versicolor", "virginica"), ]
-  model <- spark.svmLinear(training,  Species ~ ., regParam = 0.01, maxIter = 10)
+  model <- spark.svmLinear(training,  Species ~ ., regParam = 0.01, maxIter = 10,
+                           loss = "hinge", solver = "owlqn")
   summary <- summary(model)
 
   # test summary coefficients return matrix type
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/LinearSVCWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/LinearSVCWrapper.scala
@@ -70,7 +70,7 @@ private[r] object LinearSVCWrapper
   val PREDICTED_LABEL_INDEX_COL = "pred_label_idx"
   val PREDICTED_LABEL_COL = "prediction"
 
-  def fit(
+  def fit( // scalastyle:ignore
       data: DataFrame,
       formula: String,
       regParam: Double,
@@ -79,7 +79,9 @@ private[r] object LinearSVCWrapper
       standardization: Boolean,
       threshold: Double,
       weightCol: String,
-      aggregationDepth: Int
+      aggregationDepth: Int,
+      solver: String,
+      loss: String
       ): LinearSVCWrapper = {
 
     val rFormula = new RFormula()
@@ -105,6 +107,8 @@ private[r] object LinearSVCWrapper
       .setPredictionCol(PREDICTED_LABEL_INDEX_COL)
       .setThreshold(threshold)
       .setAggregationDepth(aggregationDepth)
+      .setSolver(solver)
+      .setLoss(loss)
 
     if (weightCol != null) svc.setWeightCol(weightCol)
 
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
@@ -80,9 +80,9 @@ class LinearSVC(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, Ha
     >>> svm = LinearSVC(maxIter=5, regParam=0.01)
     >>> model = svm.fit(df)
     >>> model.coefficients
-    DenseVector([0.0, -0.2792, -0.1833])
+    DenseVector([0.0, 0.0759, -0.6167])
     >>> model.intercept
-    1.0206118982229047
+    1.3113904822325306
     >>> model.numClasses
     2
     >>> model.numFeatures
@@ -92,7 +92,7 @@ class LinearSVC(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, Ha
     >>> result.prediction
     1.0
     >>> result.rawPrediction
-    DenseVector([-1.4831, 1.4831])
+    DenseVector([-1.8521, 1.8521])
     >>> svm_path = temp_path + "/svm"
     >>> svm.save(svm_path)
     >>> svm2 = LinearSVC.load(svm_path)