Created
August 22, 2023 00:33
-
-
Save opparco/b741c5cffd638f4ecce3788a6cc75a3a to your computer and use it in GitHub Desktop.
build gptneox-wip
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt | |
index d2176c9..3735e62 100644 | |
--- a/examples/CMakeLists.txt | |
+++ b/examples/CMakeLists.txt | |
@@ -13,6 +13,7 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR}) | |
if (EMSCRIPTEN) | |
else() | |
add_subdirectory(main) | |
+ add_subdirectory(gptneox-wip) | |
add_subdirectory(quantize) | |
add_subdirectory(quantize-stats) | |
add_subdirectory(perplexity) | |
diff --git a/examples/gptneox-wip/CMakeLists.txt b/examples/gptneox-wip/CMakeLists.txt | |
new file mode 100644 | |
index 0000000..b43841e | |
--- /dev/null | |
+++ b/examples/gptneox-wip/CMakeLists.txt | |
@@ -0,0 +1,8 @@ | |
+set(TARGET gptneox-main) | |
+add_executable(${TARGET} gptneox-main.cpp) | |
+install(TARGETS ${TARGET} RUNTIME) | |
+target_link_libraries(${TARGET} PRIVATE llama ${CMAKE_THREAD_LIBS_INIT}) | |
+target_compile_features(${TARGET} PRIVATE cxx_std_11) | |
+if(TARGET BUILD_INFO) | |
+ add_dependencies(${TARGET} BUILD_INFO) | |
+endif() | |
diff --git a/examples/gptneox-wip/cmpnct_gpt2bpe.hpp b/examples/gptneox-wip/cmpnct_gpt2bpe.hpp | |
index 9d433f4..0ea971a 100644 | |
--- a/examples/gptneox-wip/cmpnct_gpt2bpe.hpp | |
+++ b/examples/gptneox-wip/cmpnct_gpt2bpe.hpp | |
@@ -688,10 +688,10 @@ struct gpt2bpe_vocab { | |
int find_bpe_rank(const std::string& token1, const std::string& token2) const { | |
std::string left_token = token1; | |
std::string right_token = token2; | |
- left_token = replaceAll(left_token, " ", "Ġ"); | |
- left_token = replaceAll(left_token, "\n", "Ċ"); | |
- right_token = replaceAll(right_token, " ", "Ġ"); | |
- right_token = replaceAll(right_token, "\n", "Ċ"); | |
+ left_token = replaceAll(left_token, " ", u8"\u0120"); | |
+ left_token = replaceAll(left_token, "\n", u8"\u010a"); | |
+ right_token = replaceAll(right_token, " ", u8"\u0120"); | |
+ right_token = replaceAll(right_token, "\n", u8"\u010a"); | |
auto it = bpe_ranks.find(std::make_pair(left_token, right_token)); | |
if (it == bpe_ranks.end()) { |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment