Skip to content

Bottleneck: 30% time spent in genericSeqAssign in slicer and sigmoid_cross_entropy #141

@mratsim

Description

@mratsim

Following 9834ad0 the xor benchmark is now spending 3 sec out of 11s doing GenericSeqAssign.

2017-11-01_14-00-18

Culprits:

proc slicer[T](t: AnyTensor[T],
                slices: varargs[SteppedSlice],
                ellipsis: Ellipsis): AnyTensor[T] {.noInit,noSideEffect.}=
  ## Take a Tensor, SteppedSlices and Ellipsis
  ## Returns:
  ##    A copy of the original Tensor
  ##    Offset and strides are changed to achieve the desired effect.

  result = t
  let full_slices = @slices & newSeqWith(t.rank - slices.len, span)
  slicerT(result, full_slices)

proc slicer[T](t: AnyTensor[T],
                ellipsis: Ellipsis,
                slices: varargs[SteppedSlice]
                ): AnyTensor[T] {.noInit,noSideEffect.}=
  ## Take a Tensor, Ellipsis and SteppedSlices
  ## Returns:
  ##    A copy of the original Tensor
  ##    Offset and strides are changed to achieve the desired effect.

  result = t
  let full_slices = newSeqWith(t.rank - slices.len, span) & @slices
  slicerT(result, full_slices)

proc slicer[T](t: AnyTensor[T],
                slices1: varargs[SteppedSlice],
                ellipsis: Ellipsis,
                slices2: varargs[SteppedSlice]
                ): AnyTensor[T] {.noInit,noSideEffect.}=
  ## Take a Tensor, Ellipsis and SteppedSlices
  ## Returns:
  ##    A copy of the original Tensor
  ##    Offset and strides are changed to achieve the desired effect.

  result = t
  let full_slices = concat(@slices1,
                            newSeqWith(t.rank - slices1.len - slices2.len, span),
                            @slices2)
  slicerT(result, full_slices)

And sigmoid_cross_entropy from nn:

N_NIMCALL(tyObject_VariablecolonObjectType__V9arbGi37a9bYRP6noahxnkg*, sigmoid_cross_entropy_JUee17nUtwwG9crOhwAktAw)(tyObject_VariablecolonObjectType__V9arbGi37a9bYRP6noahxnkg* a, tyObject_Tensor_YVEir6VZKk3q2MAtip9aD6w* target) {
	tyObject_VariablecolonObjectType__V9arbGi37a9bYRP6noahxnkg* result;
	tyObject_SigmoidCrossEntropyLosscolonObjectType__UFgVZehjkS6ZKN0TA9bM9a6Q* gate;
	tyObject_NodecolonObjectType__p32yf8YodYmaGoPmH50AWw* node;
	NI T1_;
	tyObject_LosscolonObjectType__AuSc1kjvf0sy9bIrj0fYzpQ* T2_;
	result = (tyObject_VariablecolonObjectType__V9arbGi37a9bYRP6noahxnkg*)0;
	gate = (tyObject_SigmoidCrossEntropyLosscolonObjectType__UFgVZehjkS6ZKN0TA9bM9a6Q*)0;
	gate = (tyObject_SigmoidCrossEntropyLosscolonObjectType__UFgVZehjkS6ZKN0TA9bM9a6Q*) newObj((&NTI_HqKVGU8O1Eg0L2tk6CR9bTA_), sizeof(tyObject_SigmoidCrossEntropyLosscolonObjectType__UFgVZehjkS6ZKN0TA9bM9a6Q));
	(*gate).Sup.Sup.m_type = (&NTI_UFgVZehjkS6ZKN0TA9bM9a6Q_);
	(*gate).Sup.Sup.arity = ((NI) 1);
	asgnRef((void**) (&(*gate).cache), a);
	(*gate).Sup.target.shape = (*target).shape;
	(*gate).Sup.target.strides = (*target).strides;
	(*gate).Sup.target.offset = (*target).offset;
	genericSeqAssign((&(*gate).Sup.target.data), (*target).data, (&NTI_4Xyxy0Om14N6K1l5e9bUPSQ_));   // <<<----------------- HERE
	node = (tyObject_NodecolonObjectType__p32yf8YodYmaGoPmH50AWw*)0;
	node = (tyObject_NodecolonObjectType__p32yf8YodYmaGoPmH50AWw*) newObj((&NTI_u2b9cqonYlV8r9bWdpfPYhKQ_), sizeof(tyObject_NodecolonObjectType__p32yf8YodYmaGoPmH50AWw));
	asgnRef((void**) (&(*node).gate), gate);
	asgnRef((void**) (&(*node).parents[(((NI) 0))- 0]), a);
	(*(*a).tape).nodes = (tySequence_vShYhtvHQtyhCu8g2tVy6Q*) incrSeqV2(&((*(*a).tape).nodes)->Sup, sizeof(tyObject_NodecolonObjectType__p32yf8YodYmaGoPmH50AWw*));
	T1_ = (*(*a).tape).nodes->Sup.len++;
	asgnRef((void**) (&(*(*a).tape).nodes->data[T1_]), node);
	T2_ = (tyObject_LosscolonObjectType__AuSc1kjvf0sy9bIrj0fYzpQ*)0;
	T2_ = &gate->Sup;
	result = forward_qAuDJ4FSE2zIoYPtuAdhHg(T2_, a, target);
	asgnRef((void**) (&(*result).ancestor), node);
	asgnRef((void**) (&(*node).child), result);
	return result;
}

Metadata

Metadata

Assignees

No one assigned

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions