vpiSubmitConvertImageFormat CPU usage is high

cd /opt/nvidia/vpi2/samples
cat > demo.cpp << 'EOF'
#include <stdlib.h>
#include <unistd.h>
#include <opencv2/opencv.hpp>
#include <vpi/OpenCVInterop.hpp>
#include <vpi/Image.h>
#include <vpi/Status.h>
#include <vpi/Stream.h>
#include <vpi/algo/ConvertImageFormat.h>
#include <vpi/algo/Rescale.h>

#include <cassert>
#include <cstring> // for memset
#include <iostream>
#include <sstream>

#define CHECK_STATUS(STMT)                                    \
    do                                                        \
    {                                                         \
        VPIStatus status = (STMT);                            \
        if (status != VPI_SUCCESS)                            \
        {                                                     \
            char buffer[VPI_MAX_STATUS_MESSAGE_LENGTH];       \
            vpiGetLastStatusMessage(buffer, sizeof(buffer));  \
            std::ostringstream ss;                            \
            ss << vpiStatusGetName(status) << ": " << buffer; \
            throw std::runtime_error(ss.str());               \
        }                                                     \
    } while (0);

int main(int argc, char *argv[])
{
    cv::Mat cvImage;
    VPIImage image      = NULL;
    VPIImage imageNV12  = NULL;
    VPIImage outputNV12 = NULL;
    VPIImage output     = NULL;
    VPIStream stream    = NULL;
    int retval = 0;
	std::string strInputFileName = argv[1];
	cvImage = cv::imread(strInputFileName);
	if (cvImage.empty())
	{
		throw std::runtime_error("Can't open '" + strInputFileName + "'");
	}

	cv::Mat rgbaImage;
	cv::cvtColor(cvImage, rgbaImage, cv::COLOR_RGB2RGBA);

	VPIBackend backend;
	backend = VPI_BACKEND_VIC;
	CHECK_STATUS(vpiStreamCreate(backend | VPI_BACKEND_CUDA, &stream));
	//CHECK_STATUS(vpiImageCreateWrapperOpenCVMat(rgbaImage, 0, &image));
	//CHECK_STATUS(vpiImageCreateWrapperOpenCVMat(cvImage, 0, &image));
	CHECK_STATUS(vpiImageCreate(cvImage.cols, cvImage.rows, VPI_IMAGE_FORMAT_BGR8, 0, &image));
	CHECK_STATUS(vpiImageCreate(cvImage.cols, cvImage.rows, VPI_IMAGE_FORMAT_NV12_ER, 0, &imageNV12));
	//CHECK_STATUS(vpiImageCreate(cvImage.cols, cvImage.rows, VPI_IMAGE_FORMAT_NV12, 0, &imageNV12));
	while(1)
	{
		CHECK_STATUS(vpiSubmitConvertImageFormat(stream, VPI_BACKEND_CUDA, image, imageNV12, NULL));
		//CHECK_STATUS(vpiSubmitConvertImageFormat(stream, VPI_BACKEND_VIC, image, imageNV12, NULL));
		CHECK_STATUS(vpiStreamSync(stream));
	}

    vpiStreamSync(stream);
    vpiImageDestroy(image);
    vpiImageDestroy(imageNV12);
    vpiStreamDestroy(stream);

    return retval;
}
EOF

g++ -o demo demo.cpp -I /opt/nvidia/vpi2/include \
	-L /opt/nvidia/vpi2/lib64 -lnvvpi \
	-Wl,-rpath=/opt/nvidia/vpi2/lib64 `pkg-config --cflags --libs opencv4`

cd /opt/nvidia/vpi2/samples	
./demo ../share/backgrounds/NVIDIA_700_DarkBG.png

top - 20:16:44 up 3 days, 22:10,  0 users,  load average: 1.59, 1.41, 1.54
Tasks: 469 total,   3 running, 458 sleeping,   1 stopped,   7 zombie
%Cpu(s):  8.7 us,  8.5 sy,  0.0 ni, 82.1 id,  0.0 wa,  0.6 hi,  0.1 si,  0.0 st
MiB Mem :  30587.4 total,  14196.6 free,   5991.2 used,  10399.5 buff/cache
MiB Swap:  15293.7 total,  14898.4 free,    395.2 used.  23380.3 avail Mem

    PID USER      PR  NI    VIRT    RES    SHR S  %CPU  %MEM     TIME+ COMMAND
   7259 nvidia    20   0   47.4g 278064 153156 S  62.4   0.9   0:18.47 demo

Which Jetson platform and JetPack SW you’re using?

# R35 (release), REVISION: 3.1, GCID: 32827747, BOARD: t186ref, EABI: aarch64, DATE: Sun Mar 19 15:19:21 UTC 2023

Software part of jetson-stats 4.3.2 - (c) 2024, Raffaello Bonghi
Model: Jetson AGX Orin - Jetpack 5.1.1 [L4T 35.3.1]
NV Power Mode[0]: MAXN
Serial Number: [XXX Show with: jetson_release -s XXX]
Hardware:
 - P-Number: p3701-0004
 - Module: NVIDIA Jetson AGX Orin (32GB ram)
Platform:
 - Distribution: Ubuntu 20.04 focal
 - Release: 5.10.104-tegra
jtop:
 - Version: 4.3.2
 - Service: Active
Libraries:
 - CUDA: 11.4.315
 - cuDNN: 8.6.0.166
 - TensorRT: 8.5.2.2
 - VPI: 2.2.6
 - Vulkan: 1.3.204
 - OpenCV: 4.5.4 - with CUDA: NO

Moving to AGX Orin forum.

Hi,

vpiStreamSync is a blocking call.
Indicating that the CPU will busy wait for the GPU tasks to finish.

Thanks.

I hope to reduce CPU usage, what should I do?

Hi,

You can submit multiple tasks to the GPU and wait it in the end:

	for( size_t i=0; i<ITER; i++)
	{
		CHECK_STATUS(vpiSubmitConvertImageFormat(stream, VPI_BACKEND_CUDA, 
		CHECK_STATUS(vpiStreamSync(stream));
	}

ITER is the number of GPU tasks. (e.g. 32).
Thanks.

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.