-
-
Notifications
You must be signed in to change notification settings - Fork 100
Bottleneck: 30% time spent in genericSeqAssign in slicer and sigmoid_cross_entropy #141
Copy link
Copy link
Closed
Labels
Description
Following 9834ad0 the xor benchmark is now spending 3 sec out of 11s doing GenericSeqAssign.
Culprits:
proc slicer[T](t: AnyTensor[T],
slices: varargs[SteppedSlice],
ellipsis: Ellipsis): AnyTensor[T] {.noInit,noSideEffect.}=
## Take a Tensor, SteppedSlices and Ellipsis
## Returns:
## A copy of the original Tensor
## Offset and strides are changed to achieve the desired effect.
result = t
let full_slices = @slices & newSeqWith(t.rank - slices.len, span)
slicerT(result, full_slices)
proc slicer[T](t: AnyTensor[T],
ellipsis: Ellipsis,
slices: varargs[SteppedSlice]
): AnyTensor[T] {.noInit,noSideEffect.}=
## Take a Tensor, Ellipsis and SteppedSlices
## Returns:
## A copy of the original Tensor
## Offset and strides are changed to achieve the desired effect.
result = t
let full_slices = newSeqWith(t.rank - slices.len, span) & @slices
slicerT(result, full_slices)
proc slicer[T](t: AnyTensor[T],
slices1: varargs[SteppedSlice],
ellipsis: Ellipsis,
slices2: varargs[SteppedSlice]
): AnyTensor[T] {.noInit,noSideEffect.}=
## Take a Tensor, Ellipsis and SteppedSlices
## Returns:
## A copy of the original Tensor
## Offset and strides are changed to achieve the desired effect.
result = t
let full_slices = concat(@slices1,
newSeqWith(t.rank - slices1.len - slices2.len, span),
@slices2)
slicerT(result, full_slices)And sigmoid_cross_entropy from nn:
N_NIMCALL(tyObject_VariablecolonObjectType__V9arbGi37a9bYRP6noahxnkg*, sigmoid_cross_entropy_JUee17nUtwwG9crOhwAktAw)(tyObject_VariablecolonObjectType__V9arbGi37a9bYRP6noahxnkg* a, tyObject_Tensor_YVEir6VZKk3q2MAtip9aD6w* target) {
tyObject_VariablecolonObjectType__V9arbGi37a9bYRP6noahxnkg* result;
tyObject_SigmoidCrossEntropyLosscolonObjectType__UFgVZehjkS6ZKN0TA9bM9a6Q* gate;
tyObject_NodecolonObjectType__p32yf8YodYmaGoPmH50AWw* node;
NI T1_;
tyObject_LosscolonObjectType__AuSc1kjvf0sy9bIrj0fYzpQ* T2_;
result = (tyObject_VariablecolonObjectType__V9arbGi37a9bYRP6noahxnkg*)0;
gate = (tyObject_SigmoidCrossEntropyLosscolonObjectType__UFgVZehjkS6ZKN0TA9bM9a6Q*)0;
gate = (tyObject_SigmoidCrossEntropyLosscolonObjectType__UFgVZehjkS6ZKN0TA9bM9a6Q*) newObj((&NTI_HqKVGU8O1Eg0L2tk6CR9bTA_), sizeof(tyObject_SigmoidCrossEntropyLosscolonObjectType__UFgVZehjkS6ZKN0TA9bM9a6Q));
(*gate).Sup.Sup.m_type = (&NTI_UFgVZehjkS6ZKN0TA9bM9a6Q_);
(*gate).Sup.Sup.arity = ((NI) 1);
asgnRef((void**) (&(*gate).cache), a);
(*gate).Sup.target.shape = (*target).shape;
(*gate).Sup.target.strides = (*target).strides;
(*gate).Sup.target.offset = (*target).offset;
genericSeqAssign((&(*gate).Sup.target.data), (*target).data, (&NTI_4Xyxy0Om14N6K1l5e9bUPSQ_)); // <<<----------------- HERE
node = (tyObject_NodecolonObjectType__p32yf8YodYmaGoPmH50AWw*)0;
node = (tyObject_NodecolonObjectType__p32yf8YodYmaGoPmH50AWw*) newObj((&NTI_u2b9cqonYlV8r9bWdpfPYhKQ_), sizeof(tyObject_NodecolonObjectType__p32yf8YodYmaGoPmH50AWw));
asgnRef((void**) (&(*node).gate), gate);
asgnRef((void**) (&(*node).parents[(((NI) 0))- 0]), a);
(*(*a).tape).nodes = (tySequence_vShYhtvHQtyhCu8g2tVy6Q*) incrSeqV2(&((*(*a).tape).nodes)->Sup, sizeof(tyObject_NodecolonObjectType__p32yf8YodYmaGoPmH50AWw*));
T1_ = (*(*a).tape).nodes->Sup.len++;
asgnRef((void**) (&(*(*a).tape).nodes->data[T1_]), node);
T2_ = (tyObject_LosscolonObjectType__AuSc1kjvf0sy9bIrj0fYzpQ*)0;
T2_ = &gate->Sup;
result = forward_qAuDJ4FSE2zIoYPtuAdhHg(T2_, a, target);
asgnRef((void**) (&(*result).ancestor), node);
asgnRef((void**) (&(*node).child), result);
return result;
}Reactions are currently unavailable
