Skip to content

Commit b65b041

Browse files
felixcheungshivaram
authored andcommitted
[SPARK-16508][SPARKR] doc updates and more CRAN check fixes
replace ``` ` ``` in code doc with `\code{thing}` remove added `...` for drop(DataFrame) fix remaining CRAN check warnings create doc with knitr junyangq Author: Felix Cheung <[email protected]> Closes #14734 from felixcheung/rdoccleanup. (cherry picked from commit 71afeee) Signed-off-by: Shivaram Venkataraman <[email protected]>
1 parent 01a4d69 commit b65b041

File tree

12 files changed

+110
-106
lines changed

12 files changed

+110
-106
lines changed

R/pkg/NAMESPACE

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# Imports from base R
2-
importFrom(methods, setGeneric, setMethod, setOldClass)
2+
# Do not include stats:: "rpois", "runif" - causes error at runtime
3+
importFrom("methods", "setGeneric", "setMethod", "setOldClass")
4+
importFrom("methods", "is", "new", "signature", "show")
5+
importFrom("stats", "gaussian", "setNames")
6+
importFrom("utils", "download.file", "packageVersion", "untar")
37

48
# Disable native libraries till we figure out how to package it
59
# See SPARKR-7839

R/pkg/R/DataFrame.R

Lines changed: 34 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ setMethod("explain",
150150

151151
#' isLocal
152152
#'
153-
#' Returns True if the `collect` and `take` methods can be run locally
153+
#' Returns True if the \code{collect} and \code{take} methods can be run locally
154154
#' (without any Spark executors).
155155
#'
156156
#' @param x A SparkDataFrame
@@ -635,10 +635,10 @@ setMethod("unpersist",
635635
#' The following options for repartition are possible:
636636
#' \itemize{
637637
#' \item{1.} {Return a new SparkDataFrame partitioned by
638-
#' the given columns into `numPartitions`.}
639-
#' \item{2.} {Return a new SparkDataFrame that has exactly `numPartitions`.}
638+
#' the given columns into \code{numPartitions}.}
639+
#' \item{2.} {Return a new SparkDataFrame that has exactly \code{numPartitions}.}
640640
#' \item{3.} {Return a new SparkDataFrame partitioned by the given column(s),
641-
#' using `spark.sql.shuffle.partitions` as number of partitions.}
641+
#' using \code{spark.sql.shuffle.partitions} as number of partitions.}
642642
#'}
643643
#' @param x a SparkDataFrame.
644644
#' @param numPartitions the number of partitions to use.
@@ -1125,9 +1125,8 @@ setMethod("take",
11251125

11261126
#' Head
11271127
#'
1128-
#' Return the first NUM rows of a SparkDataFrame as a R data.frame. If NUM is NULL,
1129-
#' then head() returns the first 6 rows in keeping with the current data.frame
1130-
#' convention in R.
1128+
#' Return the first \code{num} rows of a SparkDataFrame as a R data.frame. If \code{num} is not
1129+
#' specified, then head() returns the first 6 rows as with R data.frame.
11311130
#'
11321131
#' @param x a SparkDataFrame.
11331132
#' @param num the number of rows to return. Default is 6.
@@ -1399,11 +1398,11 @@ setMethod("dapplyCollect",
13991398
#'
14001399
#' @param cols grouping columns.
14011400
#' @param func a function to be applied to each group partition specified by grouping
1402-
#' column of the SparkDataFrame. The function `func` takes as argument
1401+
#' column of the SparkDataFrame. The function \code{func} takes as argument
14031402
#' a key - grouping columns and a data frame - a local R data.frame.
1404-
#' The output of `func` is a local R data.frame.
1403+
#' The output of \code{func} is a local R data.frame.
14051404
#' @param schema the schema of the resulting SparkDataFrame after the function is applied.
1406-
#' The schema must match to output of `func`. It has to be defined for each
1405+
#' The schema must match to output of \code{func}. It has to be defined for each
14071406
#' output column with preferred output column name and corresponding data type.
14081407
#' @return A SparkDataFrame.
14091408
#' @family SparkDataFrame functions
@@ -1490,9 +1489,9 @@ setMethod("gapply",
14901489
#'
14911490
#' @param cols grouping columns.
14921491
#' @param func a function to be applied to each group partition specified by grouping
1493-
#' column of the SparkDataFrame. The function `func` takes as argument
1492+
#' column of the SparkDataFrame. The function \code{func} takes as argument
14941493
#' a key - grouping columns and a data frame - a local R data.frame.
1495-
#' The output of `func` is a local R data.frame.
1494+
#' The output of \code{func} is a local R data.frame.
14961495
#' @return A data.frame.
14971496
#' @family SparkDataFrame functions
14981497
#' @aliases gapplyCollect,SparkDataFrame-method
@@ -1650,7 +1649,7 @@ setMethod("$", signature(x = "SparkDataFrame"),
16501649
getColumn(x, name)
16511650
})
16521651

1653-
#' @param value a Column or NULL. If NULL, the specified Column is dropped.
1652+
#' @param value a Column or \code{NULL}. If \code{NULL}, the specified Column is dropped.
16541653
#' @rdname select
16551654
#' @name $<-
16561655
#' @aliases $<-,SparkDataFrame-method
@@ -1740,7 +1739,7 @@ setMethod("[", signature(x = "SparkDataFrame"),
17401739
#' @family subsetting functions
17411740
#' @examples
17421741
#' \dontrun{
1743-
#' # Columns can be selected using `[[` and `[`
1742+
#' # Columns can be selected using [[ and [
17441743
#' df[[2]] == df[["age"]]
17451744
#' df[,2] == df[,"age"]
17461745
#' df[,c("name", "age")]
@@ -1785,7 +1784,7 @@ setMethod("subset", signature(x = "SparkDataFrame"),
17851784
#' select(df, df$name, df$age + 1)
17861785
#' select(df, c("col1", "col2"))
17871786
#' select(df, list(df$name, df$age + 1))
1788-
#' # Similar to R data frames columns can also be selected using `$`
1787+
#' # Similar to R data frames columns can also be selected using $
17891788
#' df[,df$age]
17901789
#' }
17911790
#' @note select(SparkDataFrame, character) since 1.4.0
@@ -2436,7 +2435,7 @@ generateAliasesForIntersectedCols <- function (x, intersectedColNames, suffix) {
24362435
#' Return a new SparkDataFrame containing the union of rows
24372436
#'
24382437
#' Return a new SparkDataFrame containing the union of rows in this SparkDataFrame
2439-
#' and another SparkDataFrame. This is equivalent to `UNION ALL` in SQL.
2438+
#' and another SparkDataFrame. This is equivalent to \code{UNION ALL} in SQL.
24402439
#' Note that this does not remove duplicate rows across the two SparkDataFrames.
24412440
#'
24422441
#' @param x A SparkDataFrame
@@ -2479,7 +2478,7 @@ setMethod("unionAll",
24792478

24802479
#' Union two or more SparkDataFrames
24812480
#'
2482-
#' Union two or more SparkDataFrames. This is equivalent to `UNION ALL` in SQL.
2481+
#' Union two or more SparkDataFrames. This is equivalent to \code{UNION ALL} in SQL.
24832482
#' Note that this does not remove duplicate rows across the two SparkDataFrames.
24842483
#'
24852484
#' @param x a SparkDataFrame.
@@ -2512,7 +2511,7 @@ setMethod("rbind",
25122511
#' Intersect
25132512
#'
25142513
#' Return a new SparkDataFrame containing rows only in both this SparkDataFrame
2515-
#' and another SparkDataFrame. This is equivalent to `INTERSECT` in SQL.
2514+
#' and another SparkDataFrame. This is equivalent to \code{INTERSECT} in SQL.
25162515
#'
25172516
#' @param x A SparkDataFrame
25182517
#' @param y A SparkDataFrame
@@ -2540,7 +2539,7 @@ setMethod("intersect",
25402539
#' except
25412540
#'
25422541
#' Return a new SparkDataFrame containing rows in this SparkDataFrame
2543-
#' but not in another SparkDataFrame. This is equivalent to `EXCEPT` in SQL.
2542+
#' but not in another SparkDataFrame. This is equivalent to \code{EXCEPT} in SQL.
25442543
#'
25452544
#' @param x a SparkDataFrame.
25462545
#' @param y a SparkDataFrame.
@@ -2569,8 +2568,8 @@ setMethod("except",
25692568

25702569
#' Save the contents of SparkDataFrame to a data source.
25712570
#'
2572-
#' The data source is specified by the `source` and a set of options (...).
2573-
#' If `source` is not specified, the default data source configured by
2571+
#' The data source is specified by the \code{source} and a set of options (...).
2572+
#' If \code{source} is not specified, the default data source configured by
25742573
#' spark.sql.sources.default will be used.
25752574
#'
25762575
#' Additionally, mode is used to specify the behavior of the save operation when data already
@@ -2606,7 +2605,7 @@ setMethod("except",
26062605
#' @note write.df since 1.4.0
26072606
setMethod("write.df",
26082607
signature(df = "SparkDataFrame", path = "character"),
2609-
function(df, path, source = NULL, mode = "error", ...){
2608+
function(df, path, source = NULL, mode = "error", ...) {
26102609
if (is.null(source)) {
26112610
source <- getDefaultSqlSource()
26122611
}
@@ -2628,14 +2627,14 @@ setMethod("write.df",
26282627
#' @note saveDF since 1.4.0
26292628
setMethod("saveDF",
26302629
signature(df = "SparkDataFrame", path = "character"),
2631-
function(df, path, source = NULL, mode = "error", ...){
2630+
function(df, path, source = NULL, mode = "error", ...) {
26322631
write.df(df, path, source, mode, ...)
26332632
})
26342633

26352634
#' Save the contents of the SparkDataFrame to a data source as a table
26362635
#'
2637-
#' The data source is specified by the `source` and a set of options (...).
2638-
#' If `source` is not specified, the default data source configured by
2636+
#' The data source is specified by the \code{source} and a set of options (...).
2637+
#' If \code{source} is not specified, the default data source configured by
26392638
#' spark.sql.sources.default will be used.
26402639
#'
26412640
#' Additionally, mode is used to specify the behavior of the save operation when
@@ -2668,7 +2667,7 @@ setMethod("saveDF",
26682667
#' @note saveAsTable since 1.4.0
26692668
setMethod("saveAsTable",
26702669
signature(df = "SparkDataFrame", tableName = "character"),
2671-
function(df, tableName, source = NULL, mode="error", ...){
2670+
function(df, tableName, source = NULL, mode="error", ...) {
26722671
if (is.null(source)) {
26732672
source <- getDefaultSqlSource()
26742673
}
@@ -2745,11 +2744,11 @@ setMethod("summary",
27452744
#' @param how "any" or "all".
27462745
#' if "any", drop a row if it contains any nulls.
27472746
#' if "all", drop a row only if all its values are null.
2748-
#' if minNonNulls is specified, how is ignored.
2747+
#' if \code{minNonNulls} is specified, how is ignored.
27492748
#' @param minNonNulls if specified, drop rows that have less than
2750-
#' minNonNulls non-null values.
2749+
#' \code{minNonNulls} non-null values.
27512750
#' This overwrites the how parameter.
2752-
#' @param cols optional list of column names to consider. In `fillna`,
2751+
#' @param cols optional list of column names to consider. In \code{fillna},
27532752
#' columns specified in cols that do not have matching data
27542753
#' type are ignored. For example, if value is a character, and
27552754
#' subset contains a non-character column, then the non-character
@@ -2872,8 +2871,8 @@ setMethod("fillna",
28722871
#' in your system to accommodate the contents.
28732872
#'
28742873
#' @param x a SparkDataFrame.
2875-
#' @param row.names NULL or a character vector giving the row names for the data frame.
2876-
#' @param optional If `TRUE`, converting column names is optional.
2874+
#' @param row.names \code{NULL} or a character vector giving the row names for the data frame.
2875+
#' @param optional If \code{TRUE}, converting column names is optional.
28772876
#' @param ... additional arguments to pass to base::as.data.frame.
28782877
#' @return A data.frame.
28792878
#' @family SparkDataFrame functions
@@ -3051,7 +3050,7 @@ setMethod("str",
30513050
#' @note drop since 2.0.0
30523051
setMethod("drop",
30533052
signature(x = "SparkDataFrame"),
3054-
function(x, col, ...) {
3053+
function(x, col) {
30553054
stopifnot(class(col) == "character" || class(col) == "Column")
30563055

30573056
if (class(col) == "Column") {
@@ -3211,8 +3210,8 @@ setMethod("histogram",
32113210
#' and to not change the existing data.
32123211
#' }
32133212
#'
3214-
#' @param x s SparkDataFrame.
3215-
#' @param url JDBC database url of the form `jdbc:subprotocol:subname`.
3213+
#' @param x a SparkDataFrame.
3214+
#' @param url JDBC database url of the form \code{jdbc:subprotocol:subname}.
32163215
#' @param tableName yhe name of the table in the external database.
32173216
#' @param mode one of 'append', 'overwrite', 'error', 'ignore' save mode (it is 'error' by default).
32183217
#' @param ... additional JDBC database connection properties.
@@ -3230,7 +3229,7 @@ setMethod("histogram",
32303229
#' @note write.jdbc since 2.0.0
32313230
setMethod("write.jdbc",
32323231
signature(x = "SparkDataFrame", url = "character", tableName = "character"),
3233-
function(x, url, tableName, mode = "error", ...){
3232+
function(x, url, tableName, mode = "error", ...) {
32343233
jmode <- convertToJSaveMode(mode)
32353234
jprops <- varargsToJProperties(...)
32363235
write <- callJMethod(x@sdf, "write")

R/pkg/R/RDD.R

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -887,17 +887,17 @@ setMethod("sampleRDD",
887887

888888
# Discards some random values to ensure each partition has a
889889
# different random seed.
890-
runif(partIndex)
890+
stats::runif(partIndex)
891891

892892
for (elem in part) {
893893
if (withReplacement) {
894-
count <- rpois(1, fraction)
894+
count <- stats::rpois(1, fraction)
895895
if (count > 0) {
896896
res[ (len + 1) : (len + count) ] <- rep(list(elem), count)
897897
len <- len + count
898898
}
899899
} else {
900-
if (runif(1) < fraction) {
900+
if (stats::runif(1) < fraction) {
901901
len <- len + 1
902902
res[[len]] <- elem
903903
}
@@ -965,15 +965,15 @@ setMethod("takeSample", signature(x = "RDD", withReplacement = "logical",
965965

966966
set.seed(seed)
967967
samples <- collectRDD(sampleRDD(x, withReplacement, fraction,
968-
as.integer(ceiling(runif(1,
968+
as.integer(ceiling(stats::runif(1,
969969
-MAXINT,
970970
MAXINT)))))
971971
# If the first sample didn't turn out large enough, keep trying to
972972
# take samples; this shouldn't happen often because we use a big
973973
# multiplier for thei initial size
974974
while (length(samples) < total)
975975
samples <- collectRDD(sampleRDD(x, withReplacement, fraction,
976-
as.integer(ceiling(runif(1,
976+
as.integer(ceiling(stats::runif(1,
977977
-MAXINT,
978978
MAXINT)))))
979979

R/pkg/R/SQLContext.R

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ infer_type <- function(x) {
115115
#' Get Runtime Config from the current active SparkSession
116116
#'
117117
#' Get Runtime Config from the current active SparkSession.
118-
#' To change SparkSession Runtime Config, please see `sparkR.session()`.
118+
#' To change SparkSession Runtime Config, please see \code{sparkR.session()}.
119119
#'
120120
#' @param key (optional) The key of the config to get, if omitted, all config is returned
121121
#' @param defaultValue (optional) The default value of the config to return if they config is not
@@ -720,11 +720,11 @@ dropTempView <- function(viewName) {
720720
#'
721721
#' Returns the dataset in a data source as a SparkDataFrame
722722
#'
723-
#' The data source is specified by the `source` and a set of options(...).
724-
#' If `source` is not specified, the default data source configured by
723+
#' The data source is specified by the \code{source} and a set of options(...).
724+
#' If \code{source} is not specified, the default data source configured by
725725
#' "spark.sql.sources.default" will be used. \cr
726-
#' Similar to R read.csv, when `source` is "csv", by default, a value of "NA" will be interpreted
727-
#' as NA.
726+
#' Similar to R read.csv, when \code{source} is "csv", by default, a value of "NA" will be
727+
#' interpreted as NA.
728728
#'
729729
#' @param path The path of files to load
730730
#' @param source The name of external data source
@@ -791,8 +791,8 @@ loadDF <- function(x, ...) {
791791
#' Creates an external table based on the dataset in a data source,
792792
#' Returns a SparkDataFrame associated with the external table.
793793
#'
794-
#' The data source is specified by the `source` and a set of options(...).
795-
#' If `source` is not specified, the default data source configured by
794+
#' The data source is specified by the \code{source} and a set of options(...).
795+
#' If \code{source} is not specified, the default data source configured by
796796
#' "spark.sql.sources.default" will be used.
797797
#'
798798
#' @param tableName a name of the table.
@@ -830,22 +830,22 @@ createExternalTable <- function(x, ...) {
830830
#' Additional JDBC database connection properties can be set (...)
831831
#'
832832
#' Only one of partitionColumn or predicates should be set. Partitions of the table will be
833-
#' retrieved in parallel based on the `numPartitions` or by the predicates.
833+
#' retrieved in parallel based on the \code{numPartitions} or by the predicates.
834834
#'
835835
#' Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash
836836
#' your external database systems.
837837
#'
838-
#' @param url JDBC database url of the form `jdbc:subprotocol:subname`
838+
#' @param url JDBC database url of the form \code{jdbc:subprotocol:subname}
839839
#' @param tableName the name of the table in the external database
840840
#' @param partitionColumn the name of a column of integral type that will be used for partitioning
841-
#' @param lowerBound the minimum value of `partitionColumn` used to decide partition stride
842-
#' @param upperBound the maximum value of `partitionColumn` used to decide partition stride
843-
#' @param numPartitions the number of partitions, This, along with `lowerBound` (inclusive),
844-
#' `upperBound` (exclusive), form partition strides for generated WHERE
845-
#' clause expressions used to split the column `partitionColumn` evenly.
841+
#' @param lowerBound the minimum value of \code{partitionColumn} used to decide partition stride
842+
#' @param upperBound the maximum value of \code{partitionColumn} used to decide partition stride
843+
#' @param numPartitions the number of partitions, This, along with \code{lowerBound} (inclusive),
844+
#' \code{upperBound} (exclusive), form partition strides for generated WHERE
845+
#' clause expressions used to split the column \code{partitionColumn} evenly.
846846
#' This defaults to SparkContext.defaultParallelism when unset.
847847
#' @param predicates a list of conditions in the where clause; each one defines one partition
848-
#' @param ... additional JDBC database connection named propertie(s).
848+
#' @param ... additional JDBC database connection named properties.
849849
#' @return SparkDataFrame
850850
#' @rdname read.jdbc
851851
#' @name read.jdbc

R/pkg/R/WindowSpec.R

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ windowSpec <- function(sws) {
4444
}
4545

4646
#' @rdname show
47+
#' @export
4748
#' @note show(WindowSpec) since 2.0.0
4849
setMethod("show", "WindowSpec",
4950
function(object) {
@@ -125,11 +126,11 @@ setMethod("orderBy",
125126

126127
#' rowsBetween
127128
#'
128-
#' Defines the frame boundaries, from `start` (inclusive) to `end` (inclusive).
129+
#' Defines the frame boundaries, from \code{start} (inclusive) to \code{end} (inclusive).
129130
#'
130-
#' Both `start` and `end` are relative positions from the current row. For example, "0" means
131-
#' "current row", while "-1" means the row before the current row, and "5" means the fifth row
132-
#' after the current row.
131+
#' Both \code{start} and \code{end} are relative positions from the current row. For example,
132+
#' "0" means "current row", while "-1" means the row before the current row, and "5" means the
133+
#' fifth row after the current row.
133134
#'
134135
#' @param x a WindowSpec
135136
#' @param start boundary start, inclusive.
@@ -157,12 +158,12 @@ setMethod("rowsBetween",
157158

158159
#' rangeBetween
159160
#'
160-
#' Defines the frame boundaries, from `start` (inclusive) to `end` (inclusive).
161+
#' Defines the frame boundaries, from \code{start} (inclusive) to \code{end} (inclusive).
162+
#'
163+
#' Both \code{start} and \code{end} are relative from the current row. For example, "0" means
164+
#' "current row", while "-1" means one off before the current row, and "5" means the five off
165+
#' after the current row.
161166
#'
162-
#' Both `start` and `end` are relative from the current row. For example, "0" means "current row",
163-
#' while "-1" means one off before the current row, and "5" means the five off after the
164-
#' current row.
165-
166167
#' @param x a WindowSpec
167168
#' @param start boundary start, inclusive.
168169
#' The frame is unbounded if this is the minimum long value.
@@ -195,8 +196,8 @@ setMethod("rangeBetween",
195196
#' Define a windowing column.
196197
#'
197198
#' @param x a Column, usually one returned by window function(s).
198-
#' @param window a WindowSpec object. Can be created by `windowPartitionBy` or
199-
#' `windowOrderBy` and configured by other WindowSpec methods.
199+
#' @param window a WindowSpec object. Can be created by \code{windowPartitionBy} or
200+
#' \code{windowOrderBy} and configured by other WindowSpec methods.
200201
#' @rdname over
201202
#' @name over
202203
#' @aliases over,Column,WindowSpec-method

0 commit comments

Comments
 (0)