1717
1818package org .apache .spark
1919
20- import java .io ._
20+ import java .io .{ ObjectInputStream , Serializable }
2121
2222import scala .collection .mutable .Map
2323import scala .collection .generic .Growable
2424import org .apache .spark .serializer .JavaSerializer
2525
2626/**
27- * A datatype that can be accumulated, i.e. has an commutative and associative "add" operation,
27+ * A data type that can be accumulated, ie has an commutative and associative "add" operation,
2828 * but where the result type, `R`, may be different from the element type being added, `T`.
2929 *
30- * You must define how to add data, and how to merge two of these together. For some datatypes ,
30+ * You must define how to add data, and how to merge two of these together. For some data types ,
3131 * such as a counter, these might be the same operation. In that case, you can use the simpler
3232 * [[org.apache.spark.Accumulator ]]. They won't always be the same, though -- e.g., imagine you are
3333 * accumulating a set. You will add items to the set, and you will union two sets together.
@@ -45,7 +45,7 @@ class Accumulable[R, T] (
4545 val id = Accumulators .newId
4646 @ transient private var value_ = initialValue // Current value on master
4747 val zero = param.zero(initialValue) // Zero value to be passed to workers
48- var deserialized = false
48+ private var deserialized = false
4949
5050 Accumulators .register(this , true )
5151
@@ -127,7 +127,7 @@ class Accumulable[R, T] (
127127
128128/**
129129 * Helper object defining how to accumulate values of a particular type. An implicit
130- * AccumulableParam needs to be available when you create Accumulables of a specific type.
130+ * AccumulableParam needs to be available when you create [[ Accumulable ]]s of a specific type.
131131 *
132132 * @tparam R the full accumulated data (result type)
133133 * @tparam T partial data that can be added in
@@ -185,8 +185,30 @@ class GrowableAccumulableParam[R <% Growable[T] with TraversableOnce[T] with Ser
185185}
186186
187187/**
188- * A simpler value of [[org.apache.spark.Accumulable ]] where the result type being accumulated is the same
189- * as the types of elements being merged.
188+ * A simpler value of [[Accumulable ]] where the result type being accumulated is the same
189+ * as the types of elements being merged, i.e. variables that are only "added" to through an
190+ * associative operation and can therefore be efficiently supported in parallel. They can be used
191+ * to implement counters (as in MapReduce) or sums. Spark natively supports accumulators of type
192+ * `Int` and `Double`, and programmers can add support for new types.
193+ *
194+ * An accumulator is created from an initial value `v` by calling [[SparkContext#accumulator ]].
195+ * Tasks running on the cluster can then add to it using the [[Accumulable#+= ]] operator.
196+ * However, they cannot read its value. Only the driver program can read the accumulator's value,
197+ * using its value method.
198+ *
199+ * The interpreter session below shows an accumulator being used to add up the elements of an array:
200+ *
201+ * {{{
202+ * scala> val accum = sc.accumulator(0)
203+ * accum: spark.Accumulator[Int] = 0
204+ *
205+ * scala> sc.parallelize(Array(1, 2, 3, 4)).foreach(x => accum += x)
206+ * ...
207+ * 10/09/29 18:41:08 INFO SparkContext: Tasks finished in 0.317106 s
208+ *
209+ * scala> accum.value
210+ * res2: Int = 10
211+ * }}}
190212 *
191213 * @param initialValue initial value of accumulator
192214 * @param param helper object defining how to add elements of type `T`
@@ -196,9 +218,9 @@ class Accumulator[T](@transient initialValue: T, param: AccumulatorParam[T])
196218 extends Accumulable [T ,T ](initialValue, param)
197219
198220/**
199- * A simpler version of [[org.apache.spark.AccumulableParam ]] where the only datatype you can add in is the same type
200- * as the accumulated value. An implicit AccumulatorParam object needs to be available when you create
201- * Accumulators of a specific type.
221+ * A simpler version of [[org.apache.spark.AccumulableParam ]] where the only data type you can add
222+ * in is the same type as the accumulated value. An implicit AccumulatorParam object needs to be
223+ * available when you create Accumulators of a specific type.
202224 *
203225 * @tparam T type of value to accumulate
204226 */
0 commit comments