-
Notifications
You must be signed in to change notification settings - Fork 1k
Open
Labels
GForceissues relating to optimized grouping calculations (GForce)issues relating to optimized grouping calculations (GForce)benchmarkenhancementperformancetop requestOne of our most-requested issuesOne of our most-requested issues
Description
Most recent data.table. Not always, but quite often...
library(data.table)
library(microbenchmark)
N <- 1e6
DT <- data.table(x = sample(1e5,N,TRUE), y = sample(1e2,N,TRUE))
microbenchmark(times=1L,
DT[, length(unique(x)),y],
DT[, uniqueN(x),y],
DT[, uniqueN(.SD), by="y", .SDcols="x"])
# Unit: milliseconds
# expr min lq mean median uq max neval
# DT[, length(unique(x)), y] 85.58602 85.58602 85.58602 85.58602 85.58602 85.58602 1
# DT[, uniqueN(x), y] 92.71877 92.71877 92.71877 92.71877 92.71877 92.71877 1
# DT[, uniqueN(.SD), by = "y", .SDcols = "x"] 97.51024 97.51024 97.51024 97.51024 97.51024 97.51024 1
N <- 1e7
DT <- data.table(x = sample(1e5,N,TRUE), y = sample(1e2,N,TRUE))
microbenchmark(times=1L,
DT[, length(unique(x)),y],
DT[, uniqueN(x),y],
DT[, uniqueN(.SD), by="y", .SDcols="x"])
# Unit: milliseconds
# expr min lq mean median uq max neval
# DT[, length(unique(x)), y] 1642.5212 1642.5212 1642.5212 1642.5212 1642.5212 1642.5212 1
# DT[, uniqueN(x), y] 843.0670 843.0670 843.0670 843.0670 843.0670 843.0670 1
# DT[, uniqueN(.SD), by = "y", .SDcols = "x"] 804.7881 804.7881 804.7881 804.7881 804.7881 804.7881 1
N <- 1e7
DT <- data.table(x = sample(1e6,N,TRUE), y = sample(1e5,N,TRUE))
microbenchmark(times=1L,
DT[, length(unique(x)),y],
DT[, uniqueN(x),y],
DT[, uniqueN(.SD), by="y", .SDcols="x"])
# Unit: seconds
# expr min lq mean median uq max neval
# DT[, length(unique(x)), y] 3.025365 3.025365 3.025365 3.025365 3.025365 3.025365 1
# DT[, uniqueN(x), y] 4.734323 4.734323 4.734323 4.734323 4.734323 4.734323 1
# DT[, uniqueN(.SD), by = "y", .SDcols = "x"] 5.905721 5.905721 5.905721 5.905721 5.905721 5.905721 1
N <- 1e7
DT <- data.table(x = sample(1e3,N,TRUE), y = sample(1e5,N,TRUE))
microbenchmark(times=1L,
DT[, length(unique(x)),y],
DT[, uniqueN(x),y],
DT[, uniqueN(.SD), by="y", .SDcols="x"])
# Unit: seconds
# expr min lq mean median uq max neval
# DT[, length(unique(x)), y] 2.906589 2.906589 2.906589 2.906589 2.906589 2.906589 1
# DT[, uniqueN(x), y] 4.731925 4.731925 4.731925 4.731925 4.731925 4.731925 1
# DT[, uniqueN(.SD), by = "y", .SDcols = "x"] 7.084020 7.084020 7.084020 7.084020 7.084020 7.084020 1
N <- 1e7
DT <- data.table(x = sample(1e6,N,TRUE), y = sample(1e2,N,TRUE))
microbenchmark(times=1L,
DT[, length(unique(x)),y],
DT[, uniqueN(x),y],
DT[, uniqueN(.SD), by="y", .SDcols="x"])
# Unit: milliseconds
# expr min lq mean median uq max neval
# DT[, length(unique(x)), y] 1331.244 1331.244 1331.244 1331.244 1331.244 1331.244 1
# DT[, uniqueN(x), y] 998.040 998.040 998.040 998.040 998.040 998.040 1
# DT[, uniqueN(.SD), by = "y", .SDcols = "x"] 1096.867 1096.867 1096.867 1096.867 1096.867 1096.867 1
N <- 1e7
DT <- data.table(x = sample(letters,N,TRUE), y = sample(letters[1:10],N,TRUE))
microbenchmark(times=1L,
DT[, length(unique(x)),y],
DT[, uniqueN(x),y],
DT[, uniqueN(.SD), by="y", .SDcols="x"])
# Unit: milliseconds
# expr min lq mean median uq max neval
# DT[, length(unique(x)), y] 1304.4865 1304.4865 1304.4865 1304.4865 1304.4865 1304.4865 1
# DT[, uniqueN(x), y] 573.8628 573.8628 573.8628 573.8628 573.8628 573.8628 1
# DT[, uniqueN(.SD), by = "y", .SDcols = "x"] 528.3269 528.3269 528.3269 528.3269 528.3269 528.3269 1R version 3.1.3 (2015-03-09)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 14.04.2 LTS
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_DK.UTF-8 LC_COLLATE=en_US.UTF-8 LC_MONETARY=en_US.UTF-8 LC_MESSAGES=C
[7] LC_PAPER=en_US.UTF-8 LC_NAME=C LC_ADDRESS=C LC_TELEPHONE=C LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] data.table_1.9.5 microbenchmark_1.4-2
loaded via a namespace (and not attached):
[1] bitops_1.0-6 chron_2.3-45 colorspace_1.2-4 devtools_1.7.0 digest_0.6.8 evaluate_0.5.5 formatR_1.0 ggplot2_1.0.0 grid_3.1.3
[10] gtable_0.1.2 httr_0.6.1 knitr_1.8 MASS_7.3-37 munsell_0.4.2 plyr_1.8.1 proto_0.3-10 Rcpp_0.11.4 RCurl_1.95-4.5
[19] reshape2_1.4.1 scales_0.2.4 stringr_0.6.2 tools_3.1.3
RoyalTS, MichaelChirico, minemR, adamaltmejd, franknarf1 and 1 more
Metadata
Metadata
Assignees
Labels
GForceissues relating to optimized grouping calculations (GForce)issues relating to optimized grouping calculations (GForce)benchmarkenhancementperformancetop requestOne of our most-requested issuesOne of our most-requested issues