Skip to content

Commit 19b7866

Browse files
authored
memrecycle no snprintf overhead (#5463)
1 parent 052f8da commit 19b7866

File tree

2 files changed

+69
-63
lines changed

2 files changed

+69
-63
lines changed

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -597,6 +597,8 @@
597597
598598
14. The options `datatable.print.class` and `datatable.print.keys` are now `TRUE` by default. They have been available since v1.9.8 (Nov 2016) and v1.11.0 (May 2018) respectively.
599599
600+
15. Thanks to @ssh352, Václav Tlapák, Cole Miller, András Svraka and Toby Dylan Hocking for reporting and bisecting a significant performance regression in dev. This was fixed before release thanks to a PR by Jan Gorecki, [#5463](https://github.com/Rdatatable/data.table/pull/5463).
601+
600602
601603
# data.table [v1.14.4](https://github.com/Rdatatable/data.table/milestone/26?closed=1)
602604

src/assign.c

Lines changed: 67 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -684,6 +684,12 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values)
684684
#define MSGSIZE 1000
685685
static char memrecycle_message[MSGSIZE+1]; // returned to rbindlist so it can prefix with which one of the list of data.table-like objects
686686

687+
const char *targetDesc(const int colnum, const char *colname) {
688+
static char str[501]; // #5463
689+
snprintf(str, 500, colnum==0 ? _("target vector") : _("column %d named '%s'"), colnum, colname);
690+
return str;
691+
}
692+
687693
const char *memrecycle(const SEXP target, const SEXP where, const int start, const int len, SEXP source, const int sourceStart, const int sourceLen, const int colnum, const char *colname)
688694
// like memcpy but recycles single-item source
689695
// 'where' a 1-based INTEGER vector subset of target to assign to, or NULL or integer()
@@ -707,8 +713,6 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con
707713
if (colname==NULL)
708714
error(_("Internal error: memrecycle has received NULL colname")); // # nocov
709715
*memrecycle_message = '\0';
710-
static char targetDesc[501]; // from 1.14.1 coerceAs reuses memrecycle for a target vector, PR#4491
711-
snprintf(targetDesc, 500, colnum==0 ? _("target vector") : _("column %d named '%s'"), colnum, colname);
712716
int protecti=0;
713717
const bool sourceIsFactor=isFactor(source), targetIsFactor=isFactor(target);
714718
const bool sourceIsI64=isReal(source) && INHERITS(source, char_integer64);
@@ -730,15 +734,15 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con
730734
for (int i=0; i<slen; ++i) {
731735
const int val = sd[i+soff];
732736
if ((val<1 && val!=NA_INTEGER) || val>nlevel) {
733-
error(_("Assigning factor numbers to %s. But %d is outside the level range [1,%d]"), targetDesc, val, nlevel);
737+
error(_("Assigning factor numbers to %s. But %d is outside the level range [1,%d]"), targetDesc(colnum, colname), val, nlevel);
734738
}
735739
}
736740
} else {
737741
const double *sd = REAL(source);
738742
for (int i=0; i<slen; ++i) {
739743
const double val = sd[i+soff];
740744
if (!ISNAN(val) && (!R_FINITE(val) || val!=(int)val || (int)val<1 || (int)val>nlevel)) {
741-
error(_("Assigning factor numbers to %s. But %f is outside the level range [1,%d], or is not a whole number."), targetDesc, val, nlevel);
745+
error(_("Assigning factor numbers to %s. But %f is outside the level range [1,%d], or is not a whole number."), targetDesc(colnum, colname), val, nlevel);
742746
}
743747
}
744748
}
@@ -830,47 +834,47 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con
830834
}
831835
}
832836
} else if (isString(source) && !isString(target) && !isNewList(target)) {
833-
warning(_("Coercing 'character' RHS to '%s' to match the type of %s."), targetIsI64?"integer64":type2char(TYPEOF(target)), targetDesc);
837+
warning(_("Coercing 'character' RHS to '%s' to match the type of %s."), targetIsI64?"integer64":type2char(TYPEOF(target)), targetDesc(colnum, colname));
834838
// this "Coercing ..." warning first to give context in case coerceVector warns 'NAs introduced by coercion'
835839
// and also because 'character' to integer/double coercion is often a user mistake (e.g. wrong target column, or wrong
836840
// variable on RHS) which they are more likely to appreciate than find inconvenient
837841
source = PROTECT(coerceVector(source, TYPEOF(target))); protecti++;
838842
} else if (isNewList(source) && !isNewList(target)) {
839843
if (targetIsI64) {
840-
error(_("Cannot coerce 'list' RHS to 'integer64' to match the type of %s."), targetDesc);
844+
error(_("Cannot coerce 'list' RHS to 'integer64' to match the type of %s."), targetDesc(colnum, colname));
841845
// because R's coerceVector doesn't know about integer64
842846
}
843847
// as in base R; e.g. let as.double(list(1,2,3)) work but not as.double(list(1,c(2,4),3))
844848
// relied on by NNS, simstudy and table.express; tests 1294.*
845-
warning(_("Coercing 'list' RHS to '%s' to match the type of %s."), type2char(TYPEOF(target)), targetDesc);
849+
warning(_("Coercing 'list' RHS to '%s' to match the type of %s."), type2char(TYPEOF(target)), targetDesc(colnum, colname));
846850
source = PROTECT(coerceVector(source, TYPEOF(target))); protecti++;
847851
} else if ((TYPEOF(target)!=TYPEOF(source) || targetIsI64!=sourceIsI64) && !isNewList(target)) {
848852
if (GetVerbose()>=3) {
849853
// only take the (small) cost of GetVerbose() (search of options() list) when types don't match
850854
Rprintf(_("Zero-copy coerce when assigning '%s' to '%s' %s.\n"),
851855
sourceIsI64 ? "integer64" : type2char(TYPEOF(source)),
852856
targetIsI64 ? "integer64" : type2char(TYPEOF(target)),
853-
targetDesc);
857+
targetDesc(colnum, colname));
854858
}
855859
// The following checks are up front here, otherwise we'd need them twice in the two branches
856860
// inside BODY that cater for 'where' or not. Maybe there's a way to merge the two macros in future.
857861
// The idea is to do these range checks without calling coerceVector() (which allocates)
858862

859-
#define CHECK_RANGE(STYPE, RFUN, COND, FMT, TO, FMTVAL) {{ \
860-
const STYPE *sd = (const STYPE *)RFUN(source); \
861-
for (int i=0; i<slen; ++i) { \
862-
const STYPE val = sd[i+soff]; \
863-
if (COND) { \
864-
const char *sType = sourceIsI64 ? "integer64" : type2char(TYPEOF(source)); \
865-
const char *tType = targetIsI64 ? "integer64" : type2char(TYPEOF(target)); \
866-
snprintf(memrecycle_message, MSGSIZE, \
867-
"%"FMT" (type '%s') at RHS position %d "TO" when assigning to type '%s' (%s)", \
868-
FMTVAL, sType, i+1, tType, targetDesc); \
869-
/* string returned so that rbindlist/dogroups can prefix it with which item of its list this refers to */ \
870-
break; \
871-
} \
872-
} \
873-
} break; }
863+
#define CHECK_RANGE(STYPE, RFUN, COND, FMT, TO, FMTVAL) {{ \
864+
const STYPE *sd = (const STYPE *)RFUN(source); \
865+
for (int i=0; i<slen; ++i) { \
866+
const STYPE val = sd[i+soff]; \
867+
if (COND) { \
868+
const char *sType = sourceIsI64 ? "integer64" : type2char(TYPEOF(source)); \
869+
const char *tType = targetIsI64 ? "integer64" : type2char(TYPEOF(target)); \
870+
snprintf(memrecycle_message, MSGSIZE, \
871+
"%"FMT" (type '%s') at RHS position %d "TO" when assigning to type '%s' (%s)", \
872+
FMTVAL, sType, i+1, tType, targetDesc(colnum, colname)); \
873+
/* string returned so that rbindlist/dogroups can prefix it with which item of its list this refers to */ \
874+
break; \
875+
} \
876+
} \
877+
} break; }
874878

875879
switch(TYPEOF(target)) {
876880
case LGLSXP:
@@ -909,47 +913,47 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con
909913
}
910914
}
911915

912-
#undef BODY
913-
#define BODY(STYPE, RFUN, CTYPE, CAST, ASSIGN) {{ \
914-
const STYPE *sd = (const STYPE *)RFUN(source); \
915-
if (length(where)) { \
916-
if (slen==1) { \
917-
const STYPE val = sd[soff]; \
918-
const CTYPE cval = CAST; \
919-
for (int wi=0; wi<len; ++wi) { \
920-
const int w = wd[wi]; \
921-
if (w<1) continue; /*0 or NA*/ \
922-
const int i = w-1; \
923-
ASSIGN; \
924-
} \
925-
} else { \
926-
for (int wi=0; wi<len; ++wi) { \
927-
const int w = wd[wi]; \
928-
if (w<1) continue; \
929-
const STYPE val = sd[wi+soff]; \
930-
const CTYPE cval = CAST; \
931-
const int i = w-1; \
932-
ASSIGN; \
933-
} \
934-
} \
935-
} else { \
936-
if (slen==1) { \
937-
const STYPE val = sd[soff]; \
938-
const CTYPE cval = CAST; \
939-
for (int i=0; i<len; ++i) { \
940-
ASSIGN; \
941-
} \
942-
} else { \
943-
for (int i=0; i<len; i++) { \
944-
const STYPE val = sd[i+soff]; \
945-
const CTYPE cval = CAST; \
946-
ASSIGN; \
947-
} \
948-
} \
949-
} \
950-
} break; }
916+
#undef BODY
917+
#define BODY(STYPE, RFUN, CTYPE, CAST, ASSIGN) {{ \
918+
const STYPE *sd = (const STYPE *)RFUN(source); \
919+
if (length(where)) { \
920+
if (slen==1) { \
921+
const STYPE val = sd[soff]; \
922+
const CTYPE cval = CAST; \
923+
for (int wi=0; wi<len; ++wi) { \
924+
const int w = wd[wi]; \
925+
if (w<1) continue; /*0 or NA*/ \
926+
const int i = w-1; \
927+
ASSIGN; \
928+
} \
929+
} else { \
930+
for (int wi=0; wi<len; ++wi) { \
931+
const int w = wd[wi]; \
932+
if (w<1) continue; \
933+
const STYPE val = sd[wi+soff]; \
934+
const CTYPE cval = CAST; \
935+
const int i = w-1; \
936+
ASSIGN; \
937+
} \
938+
} \
939+
} else { \
940+
if (slen==1) { \
941+
const STYPE val = sd[soff]; \
942+
const CTYPE cval = CAST; \
943+
for (int i=0; i<len; ++i) { \
944+
ASSIGN; \
945+
} \
946+
} else { \
947+
for (int i=0; i<len; i++) { \
948+
const STYPE val = sd[i+soff]; \
949+
const CTYPE cval = CAST; \
950+
ASSIGN; \
951+
} \
952+
} \
953+
} \
954+
} break; }
951955

952-
#define COERCE_ERROR(targetType) error(_("type '%s' cannot be coerced to '%s'"), type2char(TYPEOF(source)), targetType); // 'targetType' for integer64 vs double
956+
#define COERCE_ERROR(targetType) error(_("type '%s' cannot be coerced to '%s'"), type2char(TYPEOF(source)), targetType); // 'targetType' for integer64 vs double
953957

954958
const int off = length(where) ? 0 : start; // off = target offset; e.g. called from rbindlist with where=R_NilValue and start!=0
955959
const bool mc = length(where)==0 && slen>0 && slen==len && soff==0; // mc=memcpy; only if types match and not for single items (a single assign faster than these non-const memcpy calls)

0 commit comments

Comments
 (0)