Created
April 2, 2025 02:04
-
-
Save cyb70289/03682cf9b47d8caccf007386d08df8c6 to your computer and use it in GitHub Desktop.
llama model interleave
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt | |
index 9a4ee49..137e3ea 100644 | |
--- a/ggml/CMakeLists.txt | |
+++ b/ggml/CMakeLists.txt | |
@@ -337,6 +337,11 @@ set(GGML_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location of | |
set(GGML_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files") | |
set(GGML_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files") | |
+foreach(lib "ggml" "ggml-base") | |
+ target_link_libraries(${lib} PUBLIC numa) | |
+endforeach() | |
+ | |
+ | |
configure_package_config_file( | |
${CMAKE_CURRENT_SOURCE_DIR}/cmake/ggml-config.cmake.in | |
${CMAKE_CURRENT_BINARY_DIR}/ggml-config.cmake | |
diff --git a/src/llama-mmap.cpp b/src/llama-mmap.cpp | |
index 3970b74..ae51025 100644 | |
--- a/src/llama-mmap.cpp | |
+++ b/src/llama-mmap.cpp | |
@@ -269,6 +269,9 @@ void llama_file::write_u32(uint32_t val) const { pimpl->write_u32(val); } | |
// llama_mmap | |
+#include <numa.h> | |
+#include <numaif.h> | |
+ | |
struct llama_mmap::impl { | |
#ifdef _POSIX_MAPPED_FILES | |
std::vector<std::pair<size_t, size_t>> mapped_fragments; | |
@@ -285,6 +288,41 @@ struct llama_mmap::impl { | |
} | |
if (prefetch) { flags |= MAP_POPULATE; } | |
#endif | |
+#if 1 | |
+ // mmap the file as read-only | |
+ void *mapped = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, 0); | |
+ if (mapped == MAP_FAILED) { | |
+ perror("mmap"); | |
+ exit(1); | |
+ } | |
+ | |
+ // Backup the current NUMA policy | |
+ struct bitmask *old_policy = numa_get_interleave_mask(); | |
+ if (!old_policy) { | |
+ fprintf(stderr, "Failed to get current NUMA interleave mask.\n"); | |
+ exit(1); | |
+ } | |
+ | |
+ // Set new interleave policy to nodes 0 and 1 | |
+ struct bitmask *new_policy = numa_allocate_nodemask(); | |
+ numa_bitmask_clearall(new_policy); | |
+ numa_bitmask_setbit(new_policy, 0); | |
+ numa_bitmask_setbit(new_policy, 1); | |
+ numa_set_interleave_mask(new_policy); | |
+ | |
+ // Explicitly populate pages by reading them once | |
+ size_t pagesize = sysconf(_SC_PAGESIZE); | |
+ volatile char sum = 0; // volatile to avoid optimization | |
+ for (off_t offset = 0; offset < size; offset += pagesize) { | |
+ sum += *((volatile char *)mapped + offset); // Read a byte to trigger page fault | |
+ } | |
+ | |
+ printf("File pages populated with interleave policy on nodes 0 and 1.\n"); | |
+ | |
+ // Restore original NUMA policy | |
+ numa_set_interleave_mask(old_policy); | |
+ addr = mapped; | |
+#else | |
addr = mmap(NULL, file->size(), PROT_READ, flags, fd, 0); | |
if (addr == MAP_FAILED) { | |
throw std::runtime_error(format("mmap failed: %s", strerror(errno))); | |
@@ -304,6 +342,7 @@ struct llama_mmap::impl { | |
} | |
mapped_fragments.emplace_back(0, file->size()); | |
+#endif | |
} | |
static void align_range(size_t * first, size_t * last, size_t page_size) { | |
@@ -319,6 +358,7 @@ struct llama_mmap::impl { | |
} | |
void unmap_fragment(size_t first, size_t last) { | |
+#if 0 | |
int page_size = sysconf(_SC_PAGESIZE); | |
align_range(&first, &last, page_size); | |
size_t len = last - first; | |
@@ -352,6 +392,7 @@ struct llama_mmap::impl { | |
} | |
} | |
mapped_fragments = std::move(new_mapped_fragments); | |
+#endif | |
} | |
~impl() { |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment