Skip to content

Commit 4030b94

Browse files
committed
add rbindlist support for use.names=FALSE with fill=TRUE and use that here
1 parent 8249e12 commit 4030b94

File tree

5 files changed

+57
-18
lines changed

5 files changed

+57
-18
lines changed

NEWS.md

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,51 @@
206206
# v1.9.6 18.5400 19.1800 21.5100 20.6900 23.4200 29.040 100
207207
# v1.14.4 0.4826 0.5586 0.6586 0.6329 0.7348 1.318 100
208208
```
209+
210+
31. `rbind()` and `rbindlist()` now support `fill=TRUE` with `use.names=FALSE` instead of issuing the warning `use.names= cannot be FALSE when fill is TRUE. Setting use.names=TRUE.`
211+
212+
```R
213+
DT1
214+
# A B
215+
# <int> <int>
216+
# 1: 1 5
217+
# 2: 2 6
218+
219+
DT2
220+
# foo
221+
# <int>
222+
# 1: 3
223+
# 2: 4
224+
225+
rbind(DT1, DT2, fill=TRUE) # no change
226+
# A B foo
227+
# <int> <int> <int>
228+
# 1: 1 5 NA
229+
# 2: 2 6 NA
230+
# 3: NA NA 3
231+
# 4: NA NA 4
232+
233+
rbind(DT1, DT2, fill=TRUE, use.names=FALSE)
234+
235+
# was:
236+
# A B foo
237+
# <int> <int> <int>
238+
# 1: 1 5 NA
239+
# 2: 2 6 NA
240+
# 3: NA NA 3
241+
# 4: NA NA 4
242+
# Warning message:
243+
# In rbindlist(l, use.names, fill, idcol) :
244+
# use.names= cannot be FALSE when fill is TRUE. Setting use.names=TRUE.
245+
246+
# now:
247+
# A B
248+
# <int> <int>
249+
# 1: 1 5
250+
# 2: 2 6
251+
# 3: 3 NA
252+
# 4: 4 NA
253+
```
209254

210255
## BUG FIXES
211256

R/merge.R

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -78,15 +78,7 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
7878
# Perhaps not very commonly used, so not a huge deal that the join is redone here.
7979
missingyidx = y[!x, which=TRUE, on=by, allow.cartesian=allow.cartesian]
8080
if (length(missingyidx)) {
81-
yy = y[missingyidx]
82-
othercolsx = setdiff(nm_x, by)
83-
if (length(othercolsx)) {
84-
nx = make.unique(c(names(yy), othercolsx))
85-
set(yy, NULL, tail(nx, length(othercolsx)), rep(list(NA), length(othercolsx)))
86-
}
87-
# empty data.tables (nrow =0, ncol>0) doesn't skip names anymore in new rbindlist
88-
# takes care of #24 without having to save names. This is how it should be, IMHO.
89-
dt = rbind(dt, yy, use.names=FALSE)
81+
dt = rbind(dt, y[missingyidx], use.names=FALSE, fill=TRUE)
9082
}
9183
}
9284
# X[Y] syntax puts JIS i columns at the end, merge likes them alongside i.

inst/tests/tests.Rraw

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1877,14 +1877,14 @@ test(630.1, merge(DT1,DT2,all.x=TRUE), setkey(adt(merge(adf(DT1),adf(DT2),by="a"
18771877

18781878
test(631, merge(DT1,DT2,all.y=TRUE), data.table(a=c(2,3,5),total.x=c(NA,1,1),total.y=c(5,1,2),key="a"))
18791879
test(631.1, merge(DT1,DT2,all.y=TRUE), setkey(adt(merge(adf(DT1),adf(DT2),by="a",all.y=TRUE)),a))
1880-
# ensure merge(x,y,all.y) does not alter input y
1881-
# i subset y with 1:nrow(y)
1880+
# ensure merge(x,y,all.y) does not alter input y ...
1881+
# .. i subset y with 1:nrow(y)
18821882
test(631.2, merge(DT1[c(1,3)],DT2,all.y=TRUE), data.table(a=c(2,3,5),total.x=NA_real_,total.y=c(5,1,2),key="a"))
18831883
test(631.3, DT2, data.table(a=c(2,3,5), total=c(5,1,2), key="a"))
1884-
# nrow(y)=1, i subset y with 1 and no match with x
1884+
# .. nrow(y)=1, i subset y with 1 and no match with x
18851885
test(631.4, merge(DT1,DT3,all.y=TRUE), data.table(a=c(2),total.x=NA_real_,total.y=c(5),key="a"))
18861886
test(631.5, DT3, data.table(a=c(2), total=c(5), key="a"))
1887-
# nrow(y)=1, i subset y with 1 and match with x
1887+
# .. nrow(y)=1, i subset y with 1 and match with x
18881888
test(631.6, merge(DT1,DT4,all.y=TRUE), data.table(a=c(3),total.x=c(1),total.y=c(1),key="a"))
18891889
test(631.7, DT4, data.table(a=c(3), total=c(1), key="a"))
18901890

@@ -14589,8 +14589,11 @@ test(2002.12, rbind(DT1, DT2, idcol='id'), data.table(id=integer(), a=logica
1458914589
test(2003.1, rbindlist(list(), use.names=1), error="use.names= should be TRUE, FALSE, or not used [(]\"check\" by default[)]")
1459014590
test(2003.2, rbindlist(list(), fill=1), error="fill= should be TRUE or FALSE")
1459114591
test(2003.3, rbindlist(list(data.table(a=1:2), data.table(b=3:4)), fill=TRUE, use.names=FALSE),
14592-
data.table(a=c(1:2,NA,NA), b=c(NA,NA,3:4)),
14593-
warning="use.names= cannot be FALSE when fill is TRUE. Setting use.names=TRUE")
14592+
data.table(a=c(1:4)))
14593+
test(2003.4, rbindlist(list(data.table(a=1:2,c=5:6), data.table(b=3:4)), fill=TRUE, use.names=FALSE),
14594+
data.table(a=c(1:4), c=INT(5,6,NA,NA)))
14595+
test(2003.5, rbindlist(list(data.table(a=1:2), data.table(b=3:4, c=5:6)), fill=TRUE, use.names=FALSE),
14596+
data.table(a=c(1:4), V1=INT(NA,NA,5,6)))
1459414597

1459514598
# chmatch coverage for two different non-ascii encodings matching; issues mentioned in comments in chmatch.c #69 #2538 #111
1459614599
x1 = "fa\xE7ile"

man/rbindlist.Rd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ rbindlist(l, use.names="check", fill=FALSE, idcol=NULL)
1313
\arguments{
1414
\item{l}{ A list containing \code{data.table}, \code{data.frame} or \code{list} objects. \code{\dots} is the same but you pass the objects by name separately. }
1515
\item{use.names}{\code{TRUE} binds by matching column name, \code{FALSE} by position. `check` (default) warns if all items don't have the same names in the same order and then currently proceeds as if `use.names=FALSE` for backwards compatibility (\code{TRUE} in future); see news for v1.12.2.}
16-
\item{fill}{\code{TRUE} fills missing columns with NAs. By default \code{FALSE}. When \code{TRUE}, \code{use.names} is set to \code{TRUE}.}
16+
\item{fill}{\code{TRUE} fills missing columns with NAs. By default \code{FALSE}.}
1717
\item{idcol}{Creates a column in the result showing which list item those rows came from. \code{TRUE} names this column \code{".id"}. \code{idcol="file"} names this column \code{"file"}. If the input list has names, those names are the values placed in this id column, otherwise the values are an integer vector \code{1:length(l)}. See \code{examples}.}
1818
}
1919
\details{

src/rbindlist.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,7 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg)
1212
if (TYPEOF(l) != VECSXP) error(_("Input to rbindlist must be a list. This list can contain data.tables, data.frames or plain lists."));
1313
Rboolean usenames = LOGICAL(usenamesArg)[0];
1414
const bool fill = LOGICAL(fillArg)[0];
15-
if (fill && usenames!=TRUE) {
16-
if (usenames==FALSE) warning(_("use.names= cannot be FALSE when fill is TRUE. Setting use.names=TRUE.")); // else no warning if usenames==NA (default)
15+
if (fill && usenames==NA_LOGICAL) {
1716
usenames=TRUE;
1817
}
1918
const bool idcol = !isNull(idcolArg);

0 commit comments

Comments
 (0)