adapt to itemsCol

YY-OnCall · YY-OnCall · commit 9fef28075137 · 2017-03-21T11:32:23.000-07:00
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaFPGrowthExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaFPGrowthExample.java
@@ -44,11 +44,12 @@ public static void main(String[] args) {
       RowFactory.create(Arrays.asList("1 2".split(" ")))
     );
     StructType schema = new StructType(new StructField[]{ new StructField(
-      "features", new ArrayType(DataTypes.StringType, true), false, Metadata.empty())
+      "items", new ArrayType(DataTypes.StringType, true), false, Metadata.empty())
     });
     Dataset<Row> itemsDF = spark.createDataFrame(data, schema);
 
     FPGrowthModel model = new FPGrowth()
+      .setItemsCol("items")
       .setMinSupport(0.5)
       .setMinConfidence(0.6)
       .fit(itemsDF);
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/FPGrowthExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/FPGrowthExample.scala
@@ -45,9 +45,9 @@ object FPGrowthExample {
       "1 2 5",
       "1 2 3 5",
       "1 2")
-    ).map(t => t.split(" ")).toDF("features")
+    ).map(t => t.split(" ")).toDF("items")
 
-    val fpgrowth = new FPGrowth().setMinSupport(0.5).setMinConfidence(0.6)
+    val fpgrowth = new FPGrowth().setItemsCol("items").setMinSupport(0.5).setMinConfidence(0.6)
     val model = fpgrowth.fit(dataset)
 
     // Display frequent itemsets.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala b/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala
@@ -117,7 +117,7 @@ private[fpm] trait FPGrowthParams extends Params with HasPredictionCol {
  * Recommendation</a>. PFP distributes computation in such a way that each worker executes an
  * independent group of mining tasks. The FP-Growth algorithm is described in
  * <a href="http://dx.doi.org/10.1145/335191.335372">Han et al., Mining frequent patterns without
- * candidate generation</a>. Note null values in the feature column are ignored during fit().
+ * candidate generation</a>. Note null values in the itemsCol column are ignored during fit().
  *
  * @see <a href="http://en.wikipedia.org/wiki/Association_rule_learning">
  * Association rule learning (Wikipedia)</a>
@@ -230,7 +230,7 @@ class FPGrowthModel private[ml] (
    * Then for each association rule, it will examine the input items against antecedents and
    * summarize the consequents as prediction. The prediction column has the same data type as the
    * input column(Array[T]) and will not contain existing items in the input column. The null
-   * values in the feature columns are treated as empty sets.
+   * values in the itemsCol columns are treated as empty sets.
    * WARNING: internally it collects association rules to the driver and uses broadcast for
    * efficiency. This may bring pressure to driver memory for large set of association rules.
    */