Skip to content

mednist_GAN_workflow_dict has errors #150

@wyli

Description

@wyli

Describe the bug
full log from https://github.com/Project-MONAI/MONAI/runs/2172199805?check_suite_focus=true

Running ./modules/mednist_GAN_workflow_dict.ipynb
Checking PEP8 compliance...
Running notebook...
Before:
    "max_epochs = 50\n",
After:
    "max_epochs = 1\n",
Before:
    "disc_train_steps = 5\n",
After:
    "disc_train_steps = 1\n",

Executing:   0%|          | 0/34 [00:00<?, ?cell/s]
Executing:   3%|▎         | 1/34 [00:00<00:32,  1.02cell/s]
Executing:   9%|▉         | 3/34 [00:05<00:43,  1.41s/cell]
Executing:  12%|█▏        | 4/34 [00:08<00:49,  1.66s/cell]
Executing:  29%|██▉       | 10/34 [00:22<00:45,  1.89s/cell]
Executing:  53%|█████▎    | 18/34 [00:29<00:25,  1.59s/cell]
Executing:  62%|██████▏   | 21/34 [00:31<00:17,  1.32s/cell]
Executing:  79%|███████▉  | 27/34 [00:32<00:06,  1.02cell/s]
Executing:  79%|███████▉  | 27/34 [00:33<00:08,  1.26s/cell]
Traceback (most recent call last):
  File "/opt/conda/bin/papermill", line 8, in <module>
    sys.exit(papermill())
  File "/opt/conda/lib/python3.8/site-packages/click/core.py", line 829, in __call__
    return self.main(*args, **kwargs)
  File "/opt/conda/lib/python3.8/site-packages/click/core.py", line 782, in main
    rv = self.invoke(ctx)
  File "/opt/conda/lib/python3.8/site-packages/click/core.py", line 1066, in invoke
    return ctx.invoke(self.callback, **ctx.params)
  File "/opt/conda/lib/python3.8/site-packages/click/core.py", line 610, in invoke
    return callback(*args, **kwargs)
  File "/opt/conda/lib/python3.8/site-packages/click/decorators.py", line 21, in new_func
    return f(get_current_context(), *args, **kwargs)
  File "/opt/conda/lib/python3.8/site-packages/papermill/cli.py", line 250, in papermill
    execute_notebook(
  File "/opt/conda/lib/python3.8/site-packages/papermill/execute.py", line 122, in execute_notebook
    raise_for_execution_errors(nb, output_path)
  File "/opt/conda/lib/python3.8/site-packages/papermill/execute.py", line 234, in raise_for_execution_errors
    raise error
papermill.exceptions.PapermillExecutionError: 
---------------------------------------------------------------------------
Exception encountered at "In [15]":
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-15-041e2033e90a> in <module>
----> 1 trainer.run()

/__w/MONAI/MONAI/monai/engines/trainer.py in run(self)
     46         """
     47         self.scaler = torch.cuda.amp.GradScaler() if self.amp else None
---> 48         super().run()
     49 
     50     def get_train_stats(self) -> Dict[str, float]:

/__w/MONAI/MONAI/monai/engines/workflow.py in run(self)
    195 
    196         """
--> 197         super().run(data=self.data_loader, max_epochs=self.state.max_epochs)
    198 
    199     def _iteration(self, engine: Engine, batchdata: Dict[str, torch.Tensor]):

/opt/conda/lib/python3.8/site-packages/ignite/engine/engine.py in run(self, data, max_epochs, epoch_length, seed)
    700 
    701         self.state.dataloader = data
--> 702         return self._internal_run()
    703 
    704     @staticmethod

/opt/conda/lib/python3.8/site-packages/ignite/engine/engine.py in _internal_run(self)
    773             self._dataloader_iter = None
    774             self.logger.error(f"Engine run is terminating due to exception: {e}")
--> 775             self._handle_exception(e)
    776 
    777         self._dataloader_iter = None

/opt/conda/lib/python3.8/site-packages/ignite/engine/engine.py in _handle_exception(self, e)
    465     def _handle_exception(self, e: BaseException) -> None:
    466         if Events.EXCEPTION_RAISED in self._event_handlers:
--> 467             self._fire_event(Events.EXCEPTION_RAISED, e)
    468         else:
    469             raise e

/opt/conda/lib/python3.8/site-packages/ignite/engine/engine.py in _fire_event(self, event_name, *event_args, **event_kwargs)
    422             kwargs.update(event_kwargs)
    423             first, others = ((args[0],), args[1:]) if (args and args[0] == self) else ((), args)
--> 424             func(*first, *(event_args + others), **kwargs)
    425 
    426     def fire_event(self, event_name: Any) -> None:

/__w/MONAI/MONAI/monai/handlers/stats_handler.py in exception_raised(self, engine, e)
    143         """
    144         self.logger.exception(f"Exception: {e}")
--> 145         raise e
    146 
    147     def _default_epoch_print(self, engine: Engine) -> None:

/opt/conda/lib/python3.8/site-packages/ignite/engine/engine.py in _internal_run(self)
    743                     self._setup_engine()
    744 
--> 745                 time_taken = self._run_once_on_dataset()
    746                 # time is available for handlers but must be update after fire
    747                 self.state.times[Events.EPOCH_COMPLETED.name] = time_taken

/opt/conda/lib/python3.8/site-packages/ignite/engine/engine.py in _run_once_on_dataset(self)
    848         except Exception as e:
    849             self.logger.error(f"Current run is terminating due to exception: {e}")
--> 850             self._handle_exception(e)
    851 
    852         return time.time() - start_time

/opt/conda/lib/python3.8/site-packages/ignite/engine/engine.py in _handle_exception(self, e)
    465     def _handle_exception(self, e: BaseException) -> None:
    466         if Events.EXCEPTION_RAISED in self._event_handlers:
--> 467             self._fire_event(Events.EXCEPTION_RAISED, e)
    468         else:
    469             raise e

/opt/conda/lib/python3.8/site-packages/ignite/engine/engine.py in _fire_event(self, event_name, *event_args, **event_kwargs)
    422             kwargs.update(event_kwargs)
    423             first, others = ((args[0],), args[1:]) if (args and args[0] == self) else ((), args)
--> 424             func(*first, *(event_args + others), **kwargs)
    425 
    426     def fire_event(self, event_name: Any) -> None:

/__w/MONAI/MONAI/monai/handlers/stats_handler.py in exception_raised(self, engine, e)
    143         """
    144         self.logger.exception(f"Exception: {e}")
--> 145         raise e
    146 
    147     def _default_epoch_print(self, engine: Engine) -> None:

/opt/conda/lib/python3.8/site-packages/ignite/engine/engine.py in _run_once_on_dataset(self)
    799                     if self.last_event_name != Events.DATALOADER_STOP_ITERATION:
    800                         self._fire_event(Events.GET_BATCH_STARTED)
--> 801                     self.state.batch = next(self._dataloader_iter)
    802                     self._fire_event(Events.GET_BATCH_COMPLETED)
    803                     iter_counter += 1

/opt/conda/lib/python3.8/site-packages/torch/utils/data/dataloader.py in __next__(self)
    515             if self._sampler_iter is None:
    516                 self._reset()
--> 517             data = self._next_data()
    518             self._num_yielded += 1
    519             if self._dataset_kind == _DatasetKind.Iterable and \

/opt/conda/lib/python3.8/site-packages/torch/utils/data/dataloader.py in _next_data(self)
   1197             else:
   1198                 del self._task_info[idx]
-> 1199                 return self._process_data(data)
   1200 
   1201     def _try_put_index(self):

/opt/conda/lib/python3.8/site-packages/torch/utils/data/dataloader.py in _process_data(self, data)
   1223         self._try_put_index()
   1224         if isinstance(data, ExceptionWrapper):
-> 1225             data.reraise()
   1226         return data
   1227 

/opt/conda/lib/python3.8/site-packages/torch/_utils.py in reraise(self)
    427             # have message field
    428             raise self.exc_type(message=msg)
--> 429         raise self.exc_type(msg)
    430 
    431 

RuntimeError: Caught RuntimeError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/__w/MONAI/MONAI/monai/data/utils.py", line 257, in list_data_collate
    return default_collate(data)
  File "/opt/conda/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py", line 73, in default_collate
    return {key: default_collate([d[key] for d in batch]) for key in elem}
  File "/opt/conda/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py", line 73, in <dictcomp>
    return {key: default_collate([d[key] for d in batch]) for key in elem}
  File "/opt/conda/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py", line 83, in default_collate
    return [default_collate(samples) for samples in transposed]
  File "/opt/conda/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py", line 83, in <listcomp>
    return [default_collate(samples) for samples in transposed]
  File "/opt/conda/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py", line 73, in default_collate
    return {key: default_collate([d[key] for d in batch]) for key in elem}
  File "/opt/conda/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py", line 73, in <dictcomp>
    return {key: default_collate([d[key] for d in batch]) for key in elem}
  File "/opt/conda/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py", line 73, in default_collate
    return {key: default_collate([d[key] for d in batch]) for key in elem}
  File "/opt/conda/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py", line 73, in <dictcomp>
    return {key: default_collate([d[key] for d in batch]) for key in elem}
  File "/opt/conda/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py", line 63, in default_collate
    return default_collate([torch.as_tensor(b) for b in batch])
  File "/opt/conda/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py", line 55, in default_collate
    return torch.stack(batch, 0, out=out)
RuntimeError: stack expects each tensor to be equal size, but got [3, 3] at entry 0 and [4, 4] at entry 4

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/opt/conda/lib/python3.8/site-packages/torch/utils/data/_utils/worker.py", line 202, in _worker_loop
    data = fetcher.fetch(index)
  File "/opt/conda/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 47, in fetch
    return self.collate_fn(data)
  File "/__w/MONAI/MONAI/monai/data/utils.py", line 266, in list_data_collate
    raise RuntimeError(re_str)
RuntimeError: stack expects each tensor to be equal size, but got [3, 3] at entry 0 and [4, 4] at entry 4
MONAI hint: if your transforms intentionally create images of different shapes, creating your `DataLoader` with `collate_fn=pad_list_data_collate` might solve this problem (check its documentation).

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions