message | InvalidEnvironment:
[...]InvalidEnvironment:
Traceback (most recent call last):
File "/usr/local/lib/python3.8/dist-packages/duckietown_experiment_manager/code.py", line 298, in main
robot_ci.write_topic_and_expect_zero("seed", config.seed)
File "/usr/local/lib/python3.8/dist-packages/zuper_nodes_wrapper/wrapper_outside.py", line 156, in write_topic_and_expect_zero
msgs = read_reply(self.fpout, timeout=timeout, nickname=self.nickname)
File "/usr/local/lib/python3.8/dist-packages/zuper_nodes_wrapper/wrapper_outside.py", line 338, in read_reply
raise RemoteNodeAborted(msg)
zuper_nodes.structures.RemoteNodeAborted: The remote node "ego0" aborted with the following error:
error in ego0 |Unexpected error:
|
|| Traceback (most recent call last):
|| File "/usr/local/lib/python3.8/dist-packages/zuper_nodes_wrapper/wrapper.py", line 322, in loop
|| call_if_fun_exists(node, "init", context=context_data)
|| File "/usr/local/lib/python3.8/dist-packages/zuper_nodes_wrapper/utils.py", line 21, in call_if_fun_exists
|| f(**kwargs)
|| File "solution.py", line 29, in init
|| self.model = RLlibModel(0, experiment_idx=0, checkpoint_idx=0, logger=context)
|| File "/submission/model.py", line 67, in __init__
|| self.model.restore(checkpoint_path)
|| File "/usr/local/lib/python3.8/dist-packages/ray/tune/trainable.py", line 476, in restore
|| self.load_checkpoint(checkpoint_path)
|| File "/usr/local/lib/python3.8/dist-packages/ray/rllib/agents/trainer.py", line 673, in load_checkpoint
|| self.__setstate__(extra_data)
|| File "/usr/local/lib/python3.8/dist-packages/ray/rllib/agents/trainer_template.py", line 164, in __setstate__
|| Trainer.__setstate__(self, state)
|| File "/usr/local/lib/python3.8/dist-packages/ray/rllib/agents/trainer.py", line 1168, in __setstate__
|| self.workers.local_worker().restore(state["worker"])
|| File "/usr/local/lib/python3.8/dist-packages/ray/rllib/evaluation/rollout_worker.py", line 969, in restore
|| self.policy_map[pid].set_state(state)
|| File "/usr/local/lib/python3.8/dist-packages/ray/rllib/policy/torch_policy.py", line 499, in set_state
|| optim_state_dict = convert_to_torch_tensor(
|| File "/usr/local/lib/python3.8/dist-packages/ray/rllib/utils/torch_ops.py", line 71, in convert_to_torch_tensor
|| return tree.map_structure(mapping, x)
|| File "/usr/local/lib/python3.8/dist-packages/tree/__init__.py", line 516, in map_structure
|| [func(*args) for args in zip(*map(flatten, structures))])
|| File "/usr/local/lib/python3.8/dist-packages/tree/__init__.py", line 516, in <listcomp>
|| [func(*args) for args in zip(*map(flatten, structures))])
|| File "/usr/local/lib/python3.8/dist-packages/ray/rllib/utils/torch_ops.py", line 69, in mapping
|| return tensor if device is None else tensor.to(device)
|| RuntimeError: CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 14.76 GiB total capacity; 35.49 MiB already allocated; 38.19 MiB free; 38.00 MiB reserved in total by PyTorch)
||
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/usr/local/lib/python3.8/dist-packages/duckietown_challenges/cie_concrete.py", line 691, in scoring_context
yield cie
File "/usr/local/lib/python3.8/dist-packages/duckietown_experiment_manager/experiment_manager.py", line 60, in go
wrap(cie)
File "/usr/local/lib/python3.8/dist-packages/duckietown_experiment_manager/experiment_manager.py", line 33, in wrap
asyncio.run(main(cie, logdir, attempts), debug=True)
File "/usr/lib/python3.8/asyncio/runners.py", line 44, in run
return loop.run_until_complete(main)
File "/usr/lib/python3.8/asyncio/base_events.py", line 616, in run_until_complete
return future.result()
File "/usr/local/lib/python3.8/dist-packages/duckietown_experiment_manager/code.py", line 306, in main
raise InvalidEnvironment(msg) from e
duckietown_challenges.exceptions.InvalidEnvironment: Detected out of CUDA memory:
Traceback (most recent call last):
File "/usr/local/lib/python3.8/dist-packages/duckietown_experiment_manager/code.py", line 298, in main
robot_ci.write_topic_and_expect_zero("seed", config.seed)
File "/usr/local/lib/python3.8/dist-packages/zuper_nodes_wrapper/wrapper_outside.py", line 156, in write_topic_and_expect_zero
msgs = read_reply(self.fpout, timeout=timeout, nickname=self.nickname)
File "/usr/local/lib/python3.8/dist-packages/zuper_nodes_wrapper/wrapper_outside.py", line 338, in read_reply
raise RemoteNodeAborted(msg)
zuper_nodes.structures.RemoteNodeAborted: The remote node "ego0" aborted with the following error:
error in ego0 |Unexpected error:
|
|| Traceback (most recent call last):
|| File "/usr/local/lib/python3.8/dist-packages/zuper_nodes_wrapper/wrapper.py", line 322, in loop
|| call_if_fun_exists(node, "init", context=context_data)
|| File "/usr/local/lib/python3.8/dist-packages/zuper_nodes_wrapper/utils.py", line 21, in call_if_fun_exists
|| f(**kwargs)
|| File "solution.py", line 29, in init
|| self.model = RLlibModel(0, experiment_idx=0, checkpoint_idx=0, logger=context)
|| File "/submission/model.py", line 67, in __init__
|| self.model.restore(checkpoint_path)
|| File "/usr/local/lib/python3.8/dist-packages/ray/tune/trainable.py", line 476, in restore
|| self.load_checkpoint(checkpoint_path)
|| File "/usr/local/lib/python3.8/dist-packages/ray/rllib/agents/trainer.py", line 673, in load_checkpoint
|| self.__setstate__(extra_data)
|| File "/usr/local/lib/python3.8/dist-packages/ray/rllib/agents/trainer_template.py", line 164, in __setstate__
|| Trainer.__setstate__(self, state)
|| File "/usr/local/lib/python3.8/dist-packages/ray/rllib/agents/trainer.py", line 1168, in __setstate__
|| self.workers.local_worker().restore(state["worker"])
|| File "/usr/local/lib/python3.8/dist-packages/ray/rllib/evaluation/rollout_worker.py", line 969, in restore
|| self.policy_map[pid].set_state(state)
|| File "/usr/local/lib/python3.8/dist-packages/ray/rllib/policy/torch_policy.py", line 499, in set_state
|| optim_state_dict = convert_to_torch_tensor(
|| File "/usr/local/lib/python3.8/dist-packages/ray/rllib/utils/torch_ops.py", line 71, in convert_to_torch_tensor
|| return tree.map_structure(mapping, x)
|| File "/usr/local/lib/python3.8/dist-packages/tree/__init__.py", line 516, in map_structure
|| [func(*args) for args in zip(*map(flatten, structures))])
|| File "/usr/local/lib/python3.8/dist-packages/tree/__init__.py", line 516, in <listcomp>
|| [func(*args) for args in zip(*map(flatten, structures))])
|| File "/usr/local/lib/python3.8/dist-packages/ray/rllib/utils/torch_ops.py", line 69, in mapping
|| return tensor if device is None else tensor.to(device)
|| RuntimeError: CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 14.76 GiB total capacity; 35.49 MiB already allocated; 38.19 MiB free; 38.00 MiB reserved in total by PyTorch)
||
|