Last active
January 24, 2025 00:37
-
-
Save Aphoh/88f46022bd0d881dd79b9b41d5afd103 to your computer and use it in GitHub Desktop.
Barrier crash log
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
2025-01-23T13:34:40 - 0 - tqdm_loggable.tqdm_logging - tqdm_logging.py:145 - INFO :: Progress on:train 112it/1.00kit rate:84.6s/it remaining:20:51:22 elapsed:2:16:28 postfix:loss=0.698 | |
318 Traceback (most recent call last): | |
319 File "/opt/levanter/src/levanter/main/routed_lm.py", line 290, in <module> | |
320 levanter.config.main(main)() | |
321 File "/opt/levanter/src/levanter/config.py", line 84, in wrapper_inner | |
322 response = fn(cfg, *args, **kwargs) | |
323 File "/opt/levanter/src/levanter/main/routed_lm.py", line 277, in main | |
324 last_info = trainer.train(state, train_loader) | |
325 File "/opt/levanter/src/levanter/trainer.py", line 428, in train | |
326 for info in self.training_steps(state, train_loader): | |
327 File "/opt/levanter/src/levanter/trainer.py", line 416, in training_steps | |
328 example = next(iter_data) | |
329 File "/opt/levanter/src/levanter/data/loader.py", line 125, in __next__ | |
330 individual_data_batch = next(self._batches) | |
331 File "/opt/levanter/src/levanter/utils/background_iterable.py", line 66, in __next__ | |
332 batch.reraise() | |
333 File "/opt/levanter/src/levanter/utils/background_iterable.py", line 162, in reraise | |
334 raise self.exc.with_traceback(self.tb.as_traceback()) | |
335 File "/opt/levanter/src/levanter/utils/background_iterable.py", line 123, in _produce_batches_async | |
336 async for batch in iterator: | |
337 File "/opt/levanter/src/levanter/data/loader.py", line 150, in _produce_batches | |
338 async for batch in self._retrieve_batches(next_batch_numbers): | |
339 File "/opt/levanter/src/levanter/data/loader.py", line 166, in _retrieve_batches | |
340 individual_datums_for_each_batch = await self._do_retrieve_batch_of_batches(batch_numbers) | |
341 File "/opt/levanter/src/levanter/data/loader.py", line 238, in _do_retrieve_batch_of_batches | |
342 individual_datums = await self.dl.data_store.get_batch(indices_for_this_batch_of_batches) | |
343 File "/opt/levanter/src/levanter/data/dataset.py", line 417, in get_batch | |
344 items = await self.dataset.get_batch(indices) | |
345 File "/opt/levanter/src/levanter/store/cache.py", line 254, in get_batch | |
346 return await self.store.get_batch(indices) | |
347 File "/opt/levanter/src/levanter/store/tree_store.py", line 144, in get_batch | |
348 awaited_leaves = await asyncio.gather(*leaves) | |
349 File "/opt/levanter/src/levanter/store/jagged_array.py", line 389, in get_batch | |
350 data_futs = [self.data[start:stop].read() for start, stop in all_indices] | |
351 File "/opt/levanter/src/levanter/store/jagged_array.py", line 389, in <listcomp> | |
352 data_futs = [self.data[start:stop].read() for start, stop in all_indices] | |
353 IndexError: Computing interval slice for dimension 0: (205526950, -1) do not specify a valid closed index interval [source locations='tensorstore/index_interval.cc:358\ntensorstore/index_interval.cc:401\ntensorstore/index_space/internal/numpy_indexing_spec.cc:501\ntensorstore/index_space/internal/numpy_indexing_spec.cc:501\ntensorstore/index_space.h:438'] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment