Created
August 10, 2023 01:31
-
-
Save akulbe/dcca67bc816d3bd7258c4d54e57b5389 to your computer and use it in GitHub Desktop.
ooba errors
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
PS D:\oobabooga_windows> notepad.exe .\CMD_FLAGS.txt | |
PS D:\oobabooga_windows> .\start_windows.bat | |
bin D:\oobabooga_windows\installer_files\env\lib\site-packages\bitsandbytes\libbitsandbytes_cuda117.dll | |
2023-08-09 18:26:23 INFO:Loading the extension "gallery"... | |
Running on local URL: http://0.0.0.0:7860 | |
To create a public link, set `share=True` in `launch()`. | |
2023-08-09 18:27:29 INFO:Loading TheBloke_Llama-2-13B-GGML... | |
ggml_init_cublas: found 1 CUDA devices: | |
Device 0: NVIDIA GeForce RTX 3090, compute capability 8.6 | |
2023-08-09 18:27:30 INFO:llama.cpp weights detected: models\TheBloke_Llama-2-13B-GGML\llama-2-13b.ggmlv3.q2_K.bin | |
2023-08-09 18:27:30 INFO:Cache capacity is 0 bytes | |
llama.cpp: loading model from models\TheBloke_Llama-2-13B-GGML\llama-2-13b.ggmlv3.q2_K.bin | |
llama_model_load_internal: format = ggjt v3 (latest) | |
llama_model_load_internal: n_vocab = 32000 | |
llama_model_load_internal: n_ctx = 2048 | |
llama_model_load_internal: n_embd = 5120 | |
llama_model_load_internal: n_mult = 256 | |
llama_model_load_internal: n_head = 40 | |
llama_model_load_internal: n_head_kv = 40 | |
llama_model_load_internal: n_layer = 40 | |
llama_model_load_internal: n_rot = 128 | |
llama_model_load_internal: n_gqa = 1 | |
llama_model_load_internal: rnorm_eps = 5.0e-06 | |
llama_model_load_internal: n_ff = 13824 | |
llama_model_load_internal: freq_base = 10000.0 | |
llama_model_load_internal: freq_scale = 1 | |
llama_model_load_internal: ftype = 10 (mostly Q2_K) | |
llama_model_load_internal: model size = 13B | |
llama_model_load_internal: ggml ctx size = 0.11 MB | |
llama_model_load_internal: using CUDA for GPU acceleration | |
llama_model_load_internal: mem required = 5747.01 MB (+ 1600.00 MB per state) | |
llama_model_load_internal: offloading 0 repeating layers to GPU | |
llama_model_load_internal: offloaded 0/43 layers to GPU | |
llama_model_load_internal: total VRAM used: 480 MB | |
llama_new_context_with_model: kv self size = 1600.00 MB | |
AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | VSX = 0 | | |
2023-08-09 18:27:32 INFO:Loaded the model in 2.75 seconds. | |
Traceback (most recent call last): | |
File "D:\oobabooga_windows\installer_files\env\lib\site-packages\gradio\routes.py", line 427, in run_predict | |
output = await app.get_blocks().process_api( | |
File "D:\oobabooga_windows\installer_files\env\lib\site-packages\gradio\blocks.py", line 1323, in process_api | |
result = await self.call_function( | |
File "D:\oobabooga_windows\installer_files\env\lib\site-packages\gradio\blocks.py", line 1067, in call_function | |
prediction = await utils.async_iteration(iterator) | |
File "D:\oobabooga_windows\installer_files\env\lib\site-packages\gradio\utils.py", line 336, in async_iteration | |
return await iterator.__anext__() | |
File "D:\oobabooga_windows\installer_files\env\lib\site-packages\gradio\utils.py", line 329, in __anext__ | |
return await anyio.to_thread.run_sync( | |
File "D:\oobabooga_windows\installer_files\env\lib\site-packages\anyio\to_thread.py", line 33, in run_sync | |
return await get_asynclib().run_sync_in_worker_thread( | |
File "D:\oobabooga_windows\installer_files\env\lib\site-packages\anyio\_backends\_asyncio.py", line 877, in run_sync_in_worker_thread | |
return await future | |
File "D:\oobabooga_windows\installer_files\env\lib\site-packages\anyio\_backends\_asyncio.py", line 807, in run | |
result = context.run(func, *args) | |
File "D:\oobabooga_windows\installer_files\env\lib\site-packages\gradio\utils.py", line 312, in run_sync_iterator_async | |
return next(iterator) | |
File "D:\oobabooga_windows\text-generation-webui\modules\chat.py", line 327, in generate_chat_reply_wrapper | |
for i, history in enumerate(generate_chat_reply(text, state, regenerate, _continue, loading_message=True)): | |
File "D:\oobabooga_windows\text-generation-webui\modules\chat.py", line 312, in generate_chat_reply | |
for history in chatbot_wrapper(text, state, regenerate=regenerate, _continue=_continue, loading_message=loading_message): | |
File "D:\oobabooga_windows\text-generation-webui\modules\chat.py", line 197, in chatbot_wrapper | |
stopping_strings = get_stopping_strings(state) | |
File "D:\oobabooga_windows\text-generation-webui\modules\chat.py", line 160, in get_stopping_strings | |
state['turn_template'].split('<|user-message|>')[1].split('<|bot|>')[0] + '<|bot|>', | |
IndexError: list index out of range |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment