-
Notifications
You must be signed in to change notification settings - Fork 3.7k
Closed
Labels
ep:CUDAissues related to the CUDA execution providerissues related to the CUDA execution providerstaleissues that have not been addressed in a while; categorized by a botissues that have not been addressed in a while; categorized by a bot
Description
Describe the issue
During cuda graph catpure, ORT will trigger cudaStreamSynchronize, which is not allowed in CUDA graph catpure. Call stack is like the following:
libonnxruntime_providers_cuda.so!onnxruntime::CudaStream::CleanUpOnRunEnd(onnxruntime::CudaStream * const this) git\onnxruntime\onnxruntime\core\providers\cuda\cuda_stream_handle.cc:141)
onnxruntime_pybind11_state.so!onnxruntime::DeviceStreamCollectionImpl::CleanUp(onnxruntime::DeviceStreamCollectionImpl * const this, bool sync_streams) git\onnxruntime\onnxruntime\core\framework\device_stream_collection.cc:30)
onnxruntime_pybind11_state.so!onnxruntime::DeviceStreamCollection::CleanUp(onnxruntime::DeviceStreamCollection * const this, bool sync_streams) git\onnxruntime\onnxruntime\core\framework\device_stream_collection.cc:113)
onnxruntime_pybind11_state.so!onnxruntime::utils::ExecuteGraph(const onnxruntime::SessionState & session_state, onnxruntime::FeedsFetchesManager & feeds_fetches_manager, gsl::span<OrtValue const, 18446744073709551615> feeds, std::vector<OrtValue, std::allocator<OrtValue> > & fetches, ExecutionMode execution_mode, const bool & terminate_flag, const onnxruntime::logging::Logger & logger, bool sync_execution_provider, bool only_execute_path_to_fetches, onnxruntime::Stream * parent_stream) git\onnxruntime\onnxruntime\core\framework\utils.cc:782)
onnxruntime_pybind11_state.so!onnxruntime::utils::ExecuteGraph(const onnxruntime::SessionState & session_state, onnxruntime::FeedsFetchesManager & feeds_fetches_manager, gsl::span<OrtValue const, 18446744073709551615> feeds, std::vector<OrtValue, std::allocator<OrtValue> > & fetches, ExecutionMode execution_mode, const onnxruntime::RunOptions & run_options, const onnxruntime::logging::Logger & logger) git\onnxruntime\onnxruntime\core\framework\utils.cc:817)
onnxruntime_pybind11_state.so!onnxruntime::InferenceSession::Run(onnxruntime::InferenceSession * const this, const onnxruntime::RunOptions & run_options, gsl::span<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, 18446744073709551615> feed_names, gsl::span<OrtValue const, 18446744073709551615> feeds, gsl::span<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, 18446744073709551615> output_names, std::vector<OrtValue, std::allocator<OrtValue> > * p_fetches, const std::vector<OrtDevice, std::allocator<OrtDevice> > * p_fetches_device_info) git\onnxruntime\onnxruntime\core\session\inference_session.cc:2001)
onnxruntime_pybind11_state.so!onnxruntime::InferenceSession::Run(onnxruntime::InferenceSession * const this, const onnxruntime::RunOptions & run_options, onnxruntime::IOBinding & io_binding) git\onnxruntime\onnxruntime\core\session\inference_session.cc:2155)
onnxruntime_pybind11_state.so!onnxruntime::InferenceSession::Run(onnxruntime::InferenceSession * const this, onnxruntime::IOBinding & io_binding) git\onnxruntime\onnxruntime\core\session\inference_session.cc:2160)
onnxruntime_pybind11_state.so!onnxruntime::python::<lambda(onnxruntime::python::PyInferenceSession*, onnxruntime::SessionIOBinding&, onnxruntime::RunOptions*)>::operator()(onnxruntime::python::PyInferenceSession *, onnxruntime::SessionIOBinding &, onnxruntime::RunOptions *) const(const onnxruntime::python::<lambda(onnxruntime::python::PyInferenceSession*, onnxruntime::SessionIOBinding&, onnxruntime::RunOptions*)> * const __closure, onnxruntime::python::PyInferenceSession * sess, onnxruntime::SessionIOBinding & io_binding, onnxruntime::RunOptions * run_options) git\onnxruntime\onnxruntime\python\onnxruntime_pybind_state.cc:1668)
onnxruntime_pybind11_state.so!pybind11::detail::argument_loader<onnxruntime::python::PyInferenceSession*, onnxruntime::SessionIOBinding&, OrtRunOptions*>::call_impl<void, onnxruntime::python::addObjectMethods(pybind11::module&, onnxruntime::Environment&, onnxruntime::python::ExecutionProviderRegistrationFn)::<lambda(onnxruntime::python::PyInferenceSession*, onnxruntime::SessionIOBinding&, onnxruntime::RunOptions*)>&, 0, 1, 2, pybind11::detail::void_type>(onnxruntime::python::<lambda(onnxruntime::python::PyInferenceSession*, onnxruntime::SessionIOBinding&, onnxruntime::RunOptions*)> &, std::index_sequence, pybind11::detail::void_type &&)
Error is like the following (I added file and line):
2023-03-10 11:24:05.061767687 [E:onnxruntime:Default, cuda_call.cc:116 CudaCall] CUDA failure 900: operation not permitted when stream is capturing ; GPU=0 ; hostname=??; file=/work/tlwu/git/onnxruntime/onnxruntime/core/providers/cuda/cuda_stream_handle.cc ; line=141 ; expr=cudaStreamSynchronize(static_cast<cudaStream_t>(GetHandle()));
To reproduce
The error is not always triggered with small model. But with larger model like unet, it can always reproduce.
Urgency
No response
Platform
Linux
OS Version
Ubuntu 20.04
ONNX Runtime Installation
Released Package
ONNX Runtime Version or Commit ID
1.14.1
ONNX Runtime API
Python
Architecture
X64
Execution Provider
CUDA
Execution Provider Library Version
No response
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
ep:CUDAissues related to the CUDA execution providerissues related to the CUDA execution providerstaleissues that have not been addressed in a while; categorized by a botissues that have not been addressed in a while; categorized by a bot