-
Notifications
You must be signed in to change notification settings - Fork 26.3k
Description
Here is a minimal example. Is there any solution?
THCudaCheck FAIL file=/py/conda-bld/pytorch_1493670682084/work/torch/csrc/generic/StorageSharing.cpp line=248 error=11 : invalid argument
Traceback (most recent call last):
File "/home/yuandong/anaconda3/lib/python3.5/multiprocessing/queues.py", line 241, in _feed
obj = ForkingPickler.dumps(obj)
File "/home/yuandong/anaconda3/lib/python3.5/multiprocessing/reduction.py", line 50, in dumps
cls(buf, protocol).dump(obj)
File "/home/yuandong/anaconda3/lib/python3.5/site-packages/torch/multiprocessing/reductions.py", line 104, in reduce_storage
metadata = storage.share_cuda()
RuntimeError: cuda runtime error (11) : invalid argument at /py/conda-bld/pytorch_1493670682084/work/torch/csrc/generic/StorageSharing.cpp:248
import torch
import torch.multiprocessing as _mp
mp = _mp.get_context('spawn')
def process_main(idx, q, b):
m = q.get()
b.wait()
for i in range(10):
q.get()
print("[%d] %f, %f" % (idx, m["a"][0, 0], m["b"][2, 3]))
b.wait()
if __name__ == "__main__":
m = dict(a=torch.FloatTensor(2, 3).cuda(1), b=torch.FloatTensor(3, 4))
total_process = 3
q = mp.Queue()
b = mp.Barrier(total_process)
for i in range(total_process - 1):
proc = mp.Process(target=process_main, args=(i, q, b))
proc.start()
for i in range(total_process - 1):
q.put(m)
b.wait()
for i in range(10):
m["a"][0, 0] = i
m["b"][2, 3] = 2*i
for j in range(total_process-1):
q.put(1)
b.wait()