-
-
Save Chris-hughes10/73628b1d8d6fc7d359b3dcbbbb8869d7 to your computer and use it in GitHub Desktop.
hi,
I got this error :
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Input In [41], in <cell line: 4>()
1 from pytorch_lightning import Trainer
2 trainer = Trainer(gpus=[0], max_epochs=5, num_sanity_val_steps=1)
----> 4 trainer.fit(model, dm)
File ~\Anaconda3\envs\SiT\lib\site-packages\pytorch_lightning\trainer\trainer.py:553, in Trainer.fit(self, model, train_dataloaders, val_dataloaders, datamodule, train_dataloader)
547 self.data_connector.attach_data(
548 model, train_dataloaders=train_dataloaders, val_dataloaders=val_dataloaders, datamodule=datamodule
549 )
551 self.checkpoint_connector.resume_start()
--> 553 self._run(model)
555 assert self.state.stopped
556 self.training = False
File ~\Anaconda3\envs\SiT\lib\site-packages\pytorch_lightning\trainer\trainer.py:918, in Trainer._run(self, model)
915 self.checkpoint_connector.restore_training_state()
917 # dispatch `start_training` or `start_evaluating` or `start_predicting`
--> 918 self._dispatch()
920 # plugin will finalized fitting (e.g. ddp_spawn will load trained model)
921 self._post_dispatch()
File ~\Anaconda3\envs\SiT\lib\site-packages\pytorch_lightning\trainer\trainer.py:986, in Trainer._dispatch(self)
984 self.accelerator.start_predicting(self)
985 else:
--> 986 self.accelerator.start_training(self)
File ~\Anaconda3\envs\SiT\lib\site-packages\pytorch_lightning\accelerators\accelerator.py:92, in Accelerator.start_training(self, trainer)
91 def start_training(self, trainer: "pl.Trainer") -> None:
---> 92 self.training_type_plugin.start_training(trainer)
File ~\Anaconda3\envs\SiT\lib\site-packages\pytorch_lightning\plugins\training_type\training_type_plugin.py:161, in TrainingTypePlugin.start_training(self, trainer)
159 def start_training(self, trainer: "pl.Trainer") -> None:
160 # double dispatch to initiate the training loop
--> 161 self._results = trainer.run_stage()
File ~\Anaconda3\envs\SiT\lib\site-packages\pytorch_lightning\trainer\trainer.py:996, in Trainer.run_stage(self)
994 if self.predicting:
995 return self._run_predict()
--> 996 return self._run_train()
File ~\Anaconda3\envs\SiT\lib\site-packages\pytorch_lightning\trainer\trainer.py:1031, in Trainer._run_train(self)
1028 if not self.is_global_zero and self.progress_bar_callback is not None:
1029 self.progress_bar_callback.disable()
-> 1031 self._run_sanity_check(self.lightning_module)
1033 # enable train mode
1034 self.model.train()
File ~\Anaconda3\envs\SiT\lib\site-packages\pytorch_lightning\trainer\trainer.py:1115, in Trainer._run_sanity_check(self, ref_model)
1113 # run eval step
1114 with torch.no_grad():
-> 1115 self._evaluation_loop.run()
1117 self.on_sanity_check_end()
1119 # reset validation metrics
File ~\Anaconda3\envs\SiT\lib\site-packages\pytorch_lightning\loops\base.py:111, in Loop.run(self, *args, **kwargs)
109 try:
110 self.on_advance_start(*args, **kwargs)
--> 111 self.advance(*args, **kwargs)
112 self.on_advance_end()
113 self.iteration_count += 1
File ~\Anaconda3\envs\SiT\lib\site-packages\pytorch_lightning\loops\dataloader\evaluation_loop.py:110, in EvaluationLoop.advance(self, *args, **kwargs)
107 dataloader_iter = enumerate(dataloader)
108 dl_max_batches = self._max_batches[self.current_dataloader_idx]
--> 110 dl_outputs = self.epoch_loop.run(
111 dataloader_iter, self.current_dataloader_idx, dl_max_batches, self.num_dataloaders
112 )
114 # store batch level output per dataloader
115 if self.should_track_batch_outputs_for_epoch_end:
File ~\Anaconda3\envs\SiT\lib\site-packages\pytorch_lightning\loops\base.py:111, in Loop.run(self, *args, **kwargs)
109 try:
110 self.on_advance_start(*args, **kwargs)
--> 111 self.advance(*args, **kwargs)
112 self.on_advance_end()
113 self.iteration_count += 1
File ~\Anaconda3\envs\SiT\lib\site-packages\pytorch_lightning\loops\epoch\evaluation_epoch_loop.py:93, in EvaluationEpochLoop.advance(self, dataloader_iter, dataloader_idx, dl_max_batches, num_dataloaders)
80 """Calls the evaluation step with the corresponding hooks and updates the logger connector.
81
82 Args:
(...)
89 StopIteration: If the current batch is None
90 """
91 void(dl_max_batches, num_dataloaders)
---> 93 batch_idx, batch = next(dataloader_iter)
95 if batch is None:
96 raise StopIteration
File ~\Anaconda3\envs\SiT\lib\site-packages\torch\utils\data\dataloader.py:521, in _BaseDataLoaderIter.__next__(self)
519 if self._sampler_iter is None:
520 self._reset()
--> 521 data = self._next_data()
522 self._num_yielded += 1
523 if self._dataset_kind == _DatasetKind.Iterable and \
524 self._IterableDataset_len_called is not None and \
525 self._num_yielded > self._IterableDataset_len_called:
File ~\Anaconda3\envs\SiT\lib\site-packages\torch\utils\data\dataloader.py:561, in _SingleProcessDataLoaderIter._next_data(self)
559 def _next_data(self):
560 index = self._next_index() # may raise StopIteration
--> 561 data = self._dataset_fetcher.fetch(index) # may raise StopIteration
562 if self._pin_memory:
563 data = _utils.pin_memory.pin_memory(data)
File ~\Anaconda3\envs\SiT\lib\site-packages\torch\utils\data\_utils\fetch.py:49, in _MapDatasetFetcher.fetch(self, possibly_batched_index)
47 def fetch(self, possibly_batched_index):
48 if self.auto_collation:
---> 49 data = [self.dataset[idx] for idx in possibly_batched_index]
50 else:
51 data = self.dataset[possibly_batched_index]
File ~\Anaconda3\envs\SiT\lib\site-packages\torch\utils\data\_utils\fetch.py:49, in <listcomp>(.0)
47 def fetch(self, possibly_batched_index):
48 if self.auto_collation:
---> 49 data = [self.dataset[idx] for idx in possibly_batched_index]
50 else:
51 data = self.dataset[possibly_batched_index]
Input In [36], in EfficientDetDataset.__getitem__(self, index)
43 (
44 image,
45 pascal_bboxes,
46 class_labels,
47 image_id,
48 ) = self.ds.get_image_and_labels_by_idx(index)
50 sample = {
51 "image": np.array(image, dtype=np.float32),
52 "bboxes": pascal_bboxes,
53 "labels": class_labels,
54 }
---> 56 sample = self.transforms(**sample)
57 sample["bboxes"] = np.array(sample["bboxes"])
58 image = sample["image"]
File ~\Anaconda3\envs\SiT\lib\site-packages\albumentations\core\composition.py:182, in Compose.__call__(self, force_apply, *args, **data)
179 for p in self.processors.values():
180 p.preprocess(data)
--> 182 data = t(force_apply=force_apply, **data)
184 if dual_start_end is not None and idx == dual_start_end[1]:
185 for p in self.processors.values():
File ~\Anaconda3\envs\SiT\lib\site-packages\albumentations\core\transforms_interface.py:90, in BasicTransform.__call__(self, force_apply, *args, **kwargs)
85 warn(
86 self.get_class_fullname() + " could work incorrectly in ReplayMode for other input data"
87 " because its' params depend on targets."
88 )
89 kwargs[self.save_key][id(self)] = deepcopy(params)
---> 90 return self.apply_with_params(params, **kwargs)
92 return kwargs
File ~\Anaconda3\envs\SiT\lib\site-packages\albumentations\core\transforms_interface.py:103, in BasicTransform.apply_with_params(self, params, force_apply, **kwargs)
101 target_function = self._get_target_function(key)
102 target_dependencies = {k: kwargs[k] for k in self.target_dependence.get(key, [])}
--> 103 res[key] = target_function(arg, **dict(params, **target_dependencies))
104 else:
105 res[key] = None
File ~\Anaconda3\envs\SiT\lib\site-packages\albumentations\augmentations\transforms.py:602, in Normalize.apply(self, image, **params)
601 def apply(self, image, **params):
--> 602 return F.normalize(image, self.mean, self.std, self.max_pixel_value)
File ~\Anaconda3\envs\SiT\lib\site-packages\albumentations\augmentations\functional.py:141, in normalize(img, mean, std, max_pixel_value)
138 denominator = np.reciprocal(std, dtype=np.float32)
140 img = img.astype(np.float32)
--> 141 img -= mean
142 img *= denominator
143 return img
ValueError: operands could not be broadcast together with shapes (512,512) (3,) (512,512)
any idea to solve this?
Hi @Chris-hughes10. thank you for this great job.
I'm new in this field. I have a dataset that is in Pascal VOC format with XML annotations. How can I use it to training? Can you please help me?
Dear @Chris-hughes10, Thank you for this amazing work.
Have you done any work as to convert the saved model from torch.save() to onnx?
I am asking this question because I got stuck trying to convert the saved model. The conversion script that I am using is this:
import os
import io
import numpy as np
import pandas as pd
from functools import partial
from custom_utils import widerface_data_adaptor
from custom_utils import effdet_data_module
from custom_utils import effdet_model
import torch
import torch.onnx
from effdet import get_efficientdet_config, EfficientDet, DetBenchPredict
model_checkpoint_path = "/home/soroush.tabadkani/projects/efficientdet-pytorch/checkpoints/trained_effdet.pt"
device = torch.device('cuda')
input_shape = (1, 3, 512, 512)
dummy_input = torch.randn(input_shape, dtype=torch.float32, requires_grad=True).to(device)
net = effdet_model.EfficientDetModel(
num_classes=1,
img_size=512
)
net.load_state_dict(torch.load(model_checkpoint_path))
net.eval()
dynamic_axes = {out:{0:'batch_size'} for out in ['outputs']}
dynamic_axes.update({input: {0: 'batch_size'} for input in ['inputs']})
torch.onnx.export(net.cuda(),
(dummy_input),
'efficientdet-d0.onnx',
input_names = ['inputs'],
output_names = ['outputs'],
verbose=True,
dynamic_axes=dynamic_axes,
opset_version=12)
and the error I get is:
torch.onnx.export(net.cuda(),
File "/home/soroush.tabadkani/projects/efficientdet-pytorch/env_test/lib/python3.8/site-packages/torch/onnx/__init__.py", line 271, in export
return utils.export(model, args, f, export_params, verbose, training,
File "/home/soroush.tabadkani/projects/efficientdet-pytorch/env_test/lib/python3.8/site-packages/torch/onnx/utils.py", line 88, in export
_export(model, args, f, export_params, verbose, training, input_names, output_names,
File "/home/soroush.tabadkani/projects/efficientdet-pytorch/env_test/lib/python3.8/site-packages/torch/onnx/utils.py", line 694, in _export
_model_to_graph(model, args, verbose, input_names,
File "/home/soroush.tabadkani/projects/efficientdet-pytorch/env_test/lib/python3.8/site-packages/torch/onnx/utils.py", line 457, in _model_to_graph
graph, params, torch_out, module = _create_jit_graph(model, args,
File "/home/soroush.tabadkani/projects/efficientdet-pytorch/env_test/lib/python3.8/site-packages/torch/onnx/utils.py", line 420, in _create_jit_graph
graph, torch_out = _trace_and_get_graph_from_model(model, args)
File "/home/soroush.tabadkani/projects/efficientdet-pytorch/env_test/lib/python3.8/site-packages/torch/onnx/utils.py", line 380, in _trace_and_get_graph_from_model
torch.jit._get_trace_graph(model, args, strict=False, _force_outplace=False, _return_inputs_states=True)
File "/home/soroush.tabadkani/projects/efficientdet-pytorch/env_test/lib/python3.8/site-packages/torch/jit/_trace.py", line 1139, in _get_trace_graph
outs = ONNXTracedModule(f, strict, _force_outplace, return_inputs, _return_inputs_states)(*args, **kwargs)
File "/home/soroush.tabadkani/projects/efficientdet-pytorch/env_test/lib/python3.8/site-packages/torch/nn/modules/module.py", line 891, in _call_impl
result = self.forward(*input, **kwargs)
File "/home/soroush.tabadkani/projects/efficientdet-pytorch/env_test/lib/python3.8/site-packages/torch/jit/_trace.py", line 125, in forward
graph, out = torch._C._create_graph_by_tracing(
File "/home/soroush.tabadkani/projects/efficientdet-pytorch/env_test/lib/python3.8/site-packages/torch/jit/_trace.py", line 116, in wrapper
outs.append(self.inner(*trace_inputs))
File "/home/soroush.tabadkani/projects/efficientdet-pytorch/env_test/lib/python3.8/site-packages/torch/nn/modules/module.py", line 889, in _call_impl
result = self._slow_forward(*input, **kwargs)
File "/home/soroush.tabadkani/projects/efficientdet-pytorch/env_test/lib/python3.8/site-packages/torch/nn/modules/module.py", line 862, in _slow_forward
result = self.forward(*input, **kwargs)
File "/home/soroush.tabadkani/projects/efficientdet-pytorch/env_test/lib/python3.8/site-packages/pytorch_lightning/core/decorators.py", line 62, in auto_transfer_args
return fn(self, *args, **kwargs)
TypeError: forward() missing 1 required positional argument: 'targets'
No matter how many approaches I tried to solve this problem with, they all eventually resulted in the error above. Any help or guidance if you can kindly provide me with is deeply appreciated.
Hi @ramdhan1989 Were you able to solve the operands broadcast issue? I am facing a similar error when training the model
@Chris-hughes10
My model is predicting
The model i have trained has 15 classes. What could have possibly gone wrong?
Hi, thank you for this implementation @Chris-hughes10 .
I'm using your code with multi classification. My loss has a good convergence so I do not understand why my model predict no bbox in. every single image (whereas every image contains a bbox). Do you have an idea where this could come from?
Thanks,