Hi,
My mistake — I tested using an online YUV video, and the example works as expected without any errors.
However, when I run my own example, the application initialises the encoder correctly, with all return values equal to zero.
The error message appears when I attempt to encode each frame.
I’ve stripped out all unrelated code from my project and isolated the encoder logic. I’m using a standard webcam for input (I’m aware there are some memory leaks — this version is purely for debugging NVEnc-related issues):
#include <atomic>
#include <cstdio>
#include <fstream>
#include <iostream>
#include <string>
#include <opencv2/opencv.hpp>
#include "NvBufSurface.h"
#include "NvVideoDecoder.h"
#include "NvVideoEncoder.h"
#define MAX_PLANES 3
#define BITS_PER_PIXEL 24 // For the YUV444 or NV24
#define CHUNK_SIZE 4000000
#define ENCODER_QUEUE_SIZE 15
#define RINGBUFFER_SIZE (ENCODER_QUEUE_SIZE * 2)
static thread_local bool hasEncodedFrame = false;
constexpr size_t MAX_ENCODER_THREAD_QUEUE_SIZE = 32;
constexpr size_t MAX_DECODER_THREAD_QUEUE_SIZE = 32;
struct encoded_frame_t {
const void* data;
size_t size;
uint32_t id;
uint32_t flags;
};
struct encoder_t {
uint16_t encWidth = 848;
uint16_t encHeight = 480;
uint16_t framerate = 30;
uint32_t bitrate = (encWidth * encHeight * framerate * BITS_PER_PIXEL) / 1000;
uint16_t intervalIDFrame = 30;
bool lossless = false;
NvVideoEncoder* nvEnc = 0;
int framesInOutputQueue = 0;
pthread_t pollingThread;
std::atomic<bool> runThread = true;
// captured frame, this is the output of the encoder
bool hasCompressedFrameToEnqueue;
// is the struct with the v4l2 buffer with the compressed frame
struct __attribute__((aligned(64))) queue_entry {
struct v4l2_buffer v4l2_buf;
struct v4l2_plane planes[MAX_PLANES];
NvBuffer* buffer = NULL;
};
queue_entry compressed_queue[MAX_ENCODER_THREAD_QUEUE_SIZE];
std::atomic<size_t> writeHead;
std::atomic<size_t> readHead;
uint32_t appendSequenceID = 0;
uint32_t readSequenceID = 0;
};
void rgbToNv24(uint8_t* srcImg, uint8_t* srcY, uint8_t* srcUV, int width, int height) {
const unsigned char* src = (const unsigned char*)srcImg;
unsigned char* dstY = (unsigned char*)srcY;
unsigned char* dstUV = (unsigned char*)srcUV;
// fill the channels
for (size_t y = 0; y < height; ++y) {
for (size_t x = 0; x < width; ++x) {
unsigned char r = *src++;
unsigned char g = *src++;
unsigned char b = *src++;
*dstY = (unsigned char)(((66 * r + 129 * g + 25 * b) >> 8) + 16);
dstY++;
*dstUV =
(unsigned char)(((-38 * r + -74 * g + 112 * b) >> 8) + 128);
dstUV++;
*dstUV =
(unsigned char)(((112 * r + -94 * g + -18 * b) >> 8) + 128);
dstUV++;
}
}
}
void* encoderPollingThread(void* ctx) {
encoder_t* encoderTest = (encoder_t*)ctx;
NvVideoEncoder* nvEnc = encoderTest->nvEnc;
while (encoderTest->runThread.load(std::memory_order_acquire)) {
nvEnc->SetPollInterrupt();
// it dows the module so when is at 32 (or multiple) it restart to zero the compressed_queue
size_t idx = encoderTest->writeHead % MAX_ENCODER_THREAD_QUEUE_SIZE;
encoder_t::queue_entry* entry = encoderTest->compressed_queue + idx; // goes to the next part of memory of the compressed_queue array
memset(&entry->v4l2_buf, 0, sizeof(entry->v4l2_buf));
memset(entry->planes, 0, sizeof(entry->planes));
entry->v4l2_buf.m.planes = entry->planes;
entry->v4l2_buf.length = 1;
int ret = nvEnc->capture_plane.dqBuffer(
entry->v4l2_buf, &entry->buffer, 0, -1);
if (ret < 0) continue;
encoderTest->writeHead++;
}
return 0;
}
int main() {
printf("hello from %s!\n", "NVENC Test");
int width = 1280;
int height = 720;
int framerate = 10;
std::string label = "CamTest";
bool pipeStarted = false;
std::cout << "Application Configuration:" << std::endl;
std::cout << " Resolution : " << width << "x" << height << std::endl;
std::cout << " Framerate : " << framerate << " FPS" << std::endl;
std::cout << " Label : " << label << std::endl;
cv::VideoCapture cap(0, cv::CAP_V4L2);
if (!cap.isOpened()) {
std::cerr << "Error: unable to open camera" << std::endl;
return -1;
}
cap.set(cv::CAP_PROP_FRAME_WIDTH, 1280);
cap.set(cv::CAP_PROP_FRAME_HEIGHT, 720);
cap.set(cv::CAP_PROP_FPS, 10);
double camWidth = cap.get(cv::CAP_PROP_FRAME_WIDTH);
double camHeight = cap.get(cv::CAP_PROP_FRAME_HEIGHT);
double camFPS = cap.get(cv::CAP_PROP_FPS);
std::cout << "Started webcam with config " << camWidth << "x" << camHeight << " @ " << camFPS << " FPS" << std::endl;
std::cout << "DONE!" << std::endl;
cv::Mat colorImage = cv::Mat::zeros(cv::Size(width, height), CV_8UC3);
cv::Mat depthImage = cv::Mat::zeros(cv::Size(width, height), CV_16UC1);
// ***************************** Nvidia *****************************
printf(" %s: Creating Encoder\n", label.c_str());
int intervalIDframe = framerate;
std::atomic<bool> runThread = true;
int NvPipelineWarmup = intervalIDframe;
int ret = 0;
int appendSequenceID = 0;
encoder_t* encoderTest = new encoder_t;
encoderTest->encWidth = width;
encoderTest->encHeight = height;
encoderTest->framerate = framerate;
encoderTest->intervalIDFrame = intervalIDframe;
encoderTest->bitrate = (encoderTest->encWidth * encoderTest->encHeight * encoderTest->framerate * BITS_PER_PIXEL) / 1000;
NvVideoEncoder* nvEnc = nullptr;
std::ofstream outFile("output.h265", std::ios::binary);
pthread_t pollingThread;
uint8_t* srcImg = (uint8_t*)calloc(encoderTest->encWidth, encoderTest->encHeight * 3);
uint8_t* srcY = (uint8_t*)calloc(encoderTest->encWidth, encoderTest->encHeight);
uint8_t* srcUV = (uint8_t*)calloc(encoderTest->encWidth, encoderTest->encHeight * 2);
std::string encName = "enc0";
nvEnc = NvVideoEncoder::createVideoEncoder(encName.c_str());
nvEnc->enableProfiling();
ret = nvEnc->setCapturePlaneFormat(V4L2_PIX_FMT_H265, encoderTest->encWidth, encoderTest->encHeight, 2 * 1024 * 1024);
if (ret != 0) printf("Error setting capture plane\n");
ret = nvEnc->setOutputPlaneFormat(V4L2_PIX_FMT_NV24M, encoderTest->encWidth, encoderTest->encHeight);
if (ret != 0) printf("Error setting output plane\n");
ret = nvEnc->setChromaFactorIDC(3);
if (ret != 0) printf("Error setting the H.265 encoder Chroma Format IDC\n");
ret = nvEnc->setFrameRate(30, 1);
if (ret != 0) printf("Error setting framerate, this is a fraction, thats because is written as 0 / 1\n");
ret = nvEnc->setProfile(V4L2_MPEG_VIDEO_H265_PROFILE_MAIN);
if (ret != 0) printf("Error setting profile\n");
ret = nvEnc->setInsertSpsPpsAtIdrEnabled(true);
if (ret != 0) printf("Error setting SPS PPS at every IDR frame flag\n");
ret = nvEnc->setRateControlMode(V4L2_MPEG_VIDEO_BITRATE_MODE_VBR);
if (ret != 0) printf("Error setting Rate Control Mode to V4L2_MPEG_VIDEO_BITRATE_MODE_VBR\n");
ret = nvEnc->setHWPresetType(V4L2_ENC_HW_PRESET_ULTRAFAST);
if (ret != 0) printf("Error setting HW Preset Type to V4L2_ENC_HW_PRESET_ULTRAFAST\n");
ret = nvEnc->setMaxPerfMode(1);
if (ret != 0) printf("Error setting MaxPerfMode\n");
ret = nvEnc->setBitrate(encoderTest->bitrate);
if (ret != 0) printf("Error setting bitrate\n");
ret = nvEnc->setPeakBitrate(encoderTest->bitrate + 500000);
if (ret != 0) printf("Error setting peak bitrate\n");
ret = nvEnc->setLossless(false);
if (ret != 0) printf("Error setting lossless\n");
ret = nvEnc->setIDRInterval(encoderTest->intervalIDFrame);
if (ret != 0) printf("Error setting IDR interval\n");
ret = nvEnc->setIFrameInterval(encoderTest->intervalIDFrame);
if (ret != 0) printf("Error setting I Frame interval\n");
ret = nvEnc->output_plane.setupPlane(V4L2_MEMORY_MMAP, ENCODER_QUEUE_SIZE, true, false);
if (ret != 0) printf("Error setting setup output plane\n");
ret = nvEnc->capture_plane.setupPlane(V4L2_MEMORY_MMAP, ENCODER_QUEUE_SIZE, true, false);
if (ret != 0) printf("Error setting setup capture plane\n");
ret = nvEnc->subscribeEvent(V4L2_EVENT_EOS, 0, 0);
if (ret != 0) printf("Error setting subscribe to end of stream events\n");
printf(" %s: Starting Encoder\n", label.c_str());
std::cout << "Starting NVENC Stream" << std::endl;
ret = nvEnc->output_plane.setStreamStatus(true);
if (ret != 0) printf("start the output stream");
ret = nvEnc->capture_plane.setStreamStatus(true);
if (ret != 0) printf("start the capture stream");
if (nvEnc->output_plane.getNumBuffers() == 0 || nvEnc->capture_plane.getNumBuffers() == 0) {
printf("Encoder not initialized correctly: check formats and order.\n");
return -1;
}
assert(nvEnc->capture_plane.getNumBuffers() <= ENCODER_QUEUE_SIZE);
for (uint32_t i = 0; ret == 0 && i < nvEnc->capture_plane.getNumBuffers(); i++) {
struct v4l2_buffer v4l2_buf;
struct v4l2_plane planes[MAX_PLANES];
memset(&v4l2_buf, 0, sizeof(v4l2_buf));
memset(planes, 0, MAX_PLANES * sizeof(struct v4l2_plane));
v4l2_buf.index = i;
v4l2_buf.m.planes = planes;
v4l2_buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE;
v4l2_buf.memory = V4L2_MEMORY_MMAP;
v4l2_buf.length = nvEnc->capture_plane.getNumPlanes();
ret = nvEnc->capture_plane.qBuffer(v4l2_buf, NULL);
if (ret != 0) printf("enqueue the capture buffer");
}
if (ret == 0) {
encoderTest->nvEnc = nvEnc;
printf("Creating %s polling thread\n", label.c_str());
encoderTest->runThread = true;
pthread_create(&encoderTest->pollingThread, NULL, encoderPollingThread, encoderTest);
}
else {
delete nvEnc;
nvEnc = 0;
return 1;
}
printf(" %s: Starting Loop\n", label.c_str());
for (int i = 0; i < 300; i++) {
cap >> colorImage;
if (colorImage.empty()) continue;
srcImg = (uint8_t*)colorImage.data;
rgbToNv24(srcImg, srcY, srcUV, width, height);
struct v4l2_buffer v4l2_buf;
struct v4l2_plane planes[MAX_PLANES];
// struct v4l2_meta_format metadata_fmt;
memset(&v4l2_buf, 0, sizeof(v4l2_buf));
memset(planes, 0, MAX_PLANES * sizeof(struct v4l2_plane));
v4l2_buf.m.planes = planes;
v4l2_buf.flags |= V4L2_BUF_FLAG_TIMESTAMP_COPY;
v4l2_buf.timestamp.tv_sec = 0;
v4l2_buf.timestamp.tv_usec = encoderTest->appendSequenceID++;
NvBuffer* buffer = 0;
bool good = true;
// if we still have to allocate new frames to send to the queue
// initialize them.
bool newFrame = encoderTest->framesInOutputQueue < ENCODER_QUEUE_SIZE;
if (newFrame) {
buffer =
encoderTest->nvEnc->output_plane.getNthBuffer(encoderTest->framesInOutputQueue);
v4l2_buf.index = encoderTest->framesInOutputQueue;
encoderTest->framesInOutputQueue++;
}
else {
ret = encoderTest->nvEnc->output_plane.dqBuffer(v4l2_buf, &buffer, NULL, 0);
if (ret != 0) printf("Error dequeuing output plane buffer\n");
}
size_t dst_y_stride = buffer->planes[0].fmt.stride;
size_t dst_uv_stride = buffer->planes[1].fmt.stride;
buffer->planes[0].bytesused = encoderTest->encHeight * dst_y_stride;
buffer->planes[1].bytesused = encoderTest->encHeight * dst_uv_stride;
planes[0].bytesused = buffer->planes[0].bytesused;
planes[1].bytesused = buffer->planes[1].bytesused;
const uint8_t* src_y = (const uint8_t*)srcY;
const uint8_t* src_uv = (const uint8_t*)srcUV;
uint8_t* dst_y = (uint8_t*)buffer->planes[0].data;
uint8_t* dst_uv = (uint8_t*)buffer->planes[1].data;
for (size_t i = 0; i < encoderTest->encHeight; ++i) {
memcpy(dst_y, src_y, encoderTest->encWidth);
memcpy(dst_uv, src_uv, 2 * encoderTest->encWidth);
src_y += encoderTest->encWidth;
src_uv += 2 * encoderTest->encWidth;
dst_y += dst_y_stride;
dst_uv += dst_uv_stride;
}
// ensure the memory consistency with the hardware accelerator
for (uint32_t j = 0; good && j < buffer->n_planes; j++) {
NvBufSurface* nvbuf_surf = 0;
ret = NvBufSurfaceFromFd(buffer->planes[j].fd, (void**)(&nvbuf_surf));
if (ret != 0) printf("Error creating NvBufSurface for plane %d\n", j);
ret = NvBufSurfaceSyncForDevice(nvbuf_surf, 0, j);
if (ret != 0) printf("Error syncing NvBufSurface for plane %d\n", j);
}
// write the frame to the encoder
ret = encoderTest->nvEnc->output_plane.qBuffer(v4l2_buf, NULL);
encoded_frame_t* frame = new encoded_frame_t;
frame->data = 0;
frame->size = 0;
frame->id = 0;
if (encoderTest->hasCompressedFrameToEnqueue) {
NvVideoEncoder* nvEnc = encoderTest->nvEnc;
size_t idx = encoderTest->readHead % MAX_ENCODER_THREAD_QUEUE_SIZE;
encoder_t::queue_entry* entry = encoderTest->compressed_queue + idx;
int ret = nvEnc->capture_plane.qBuffer(entry->v4l2_buf, 0);
if (ret >= 0) {
encoderTest->readHead++;
encoderTest->hasCompressedFrameToEnqueue = false;
}
else {
return false;
}
}
if (encoderTest->readHead == encoderTest->writeHead) continue;
size_t idx = encoderTest->readHead % MAX_ENCODER_THREAD_QUEUE_SIZE;
encoder_t::queue_entry* entry = encoderTest->compressed_queue + idx;
frame->data = entry->buffer->planes[0].data;
frame->size = entry->buffer->planes[0].bytesused;
frame->flags = entry->v4l2_buf.flags;
encoderTest->hasCompressedFrameToEnqueue = true;
frame->id = entry->v4l2_buf.timestamp.tv_usec;
assert(frame->id == encoderTest->readSequenceID);
encoderTest->readSequenceID++;
printf("ID: %d\n", frame->id);
// END CYCLE
}
encoderTest->nvEnc->abort();
encoderTest->nvEnc->output_plane.setStreamStatus(false);
encoderTest->nvEnc->capture_plane.setStreamStatus(false);
encoderTest->nvEnc->~NvVideoEncoder();
return 0;
}
I also tested with other cameras and resolutions/fps but the result is always the same