Skip to content

Instantly share code, notes, and snippets.

@cyb70289
Created April 2, 2025 02:04
Show Gist options
  • Save cyb70289/03682cf9b47d8caccf007386d08df8c6 to your computer and use it in GitHub Desktop.
Save cyb70289/03682cf9b47d8caccf007386d08df8c6 to your computer and use it in GitHub Desktop.
llama model interleave
diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt
index 9a4ee49..137e3ea 100644
--- a/ggml/CMakeLists.txt
+++ b/ggml/CMakeLists.txt
@@ -337,6 +337,11 @@ set(GGML_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location of
set(GGML_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files")
set(GGML_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files")
+foreach(lib "ggml" "ggml-base")
+ target_link_libraries(${lib} PUBLIC numa)
+endforeach()
+
+
configure_package_config_file(
${CMAKE_CURRENT_SOURCE_DIR}/cmake/ggml-config.cmake.in
${CMAKE_CURRENT_BINARY_DIR}/ggml-config.cmake
diff --git a/src/llama-mmap.cpp b/src/llama-mmap.cpp
index 3970b74..ae51025 100644
--- a/src/llama-mmap.cpp
+++ b/src/llama-mmap.cpp
@@ -269,6 +269,9 @@ void llama_file::write_u32(uint32_t val) const { pimpl->write_u32(val); }
// llama_mmap
+#include <numa.h>
+#include <numaif.h>
+
struct llama_mmap::impl {
#ifdef _POSIX_MAPPED_FILES
std::vector<std::pair<size_t, size_t>> mapped_fragments;
@@ -285,6 +288,41 @@ struct llama_mmap::impl {
}
if (prefetch) { flags |= MAP_POPULATE; }
#endif
+#if 1
+ // mmap the file as read-only
+ void *mapped = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, 0);
+ if (mapped == MAP_FAILED) {
+ perror("mmap");
+ exit(1);
+ }
+
+ // Backup the current NUMA policy
+ struct bitmask *old_policy = numa_get_interleave_mask();
+ if (!old_policy) {
+ fprintf(stderr, "Failed to get current NUMA interleave mask.\n");
+ exit(1);
+ }
+
+ // Set new interleave policy to nodes 0 and 1
+ struct bitmask *new_policy = numa_allocate_nodemask();
+ numa_bitmask_clearall(new_policy);
+ numa_bitmask_setbit(new_policy, 0);
+ numa_bitmask_setbit(new_policy, 1);
+ numa_set_interleave_mask(new_policy);
+
+ // Explicitly populate pages by reading them once
+ size_t pagesize = sysconf(_SC_PAGESIZE);
+ volatile char sum = 0; // volatile to avoid optimization
+ for (off_t offset = 0; offset < size; offset += pagesize) {
+ sum += *((volatile char *)mapped + offset); // Read a byte to trigger page fault
+ }
+
+ printf("File pages populated with interleave policy on nodes 0 and 1.\n");
+
+ // Restore original NUMA policy
+ numa_set_interleave_mask(old_policy);
+ addr = mapped;
+#else
addr = mmap(NULL, file->size(), PROT_READ, flags, fd, 0);
if (addr == MAP_FAILED) {
throw std::runtime_error(format("mmap failed: %s", strerror(errno)));
@@ -304,6 +342,7 @@ struct llama_mmap::impl {
}
mapped_fragments.emplace_back(0, file->size());
+#endif
}
static void align_range(size_t * first, size_t * last, size_t page_size) {
@@ -319,6 +358,7 @@ struct llama_mmap::impl {
}
void unmap_fragment(size_t first, size_t last) {
+#if 0
int page_size = sysconf(_SC_PAGESIZE);
align_range(&first, &last, page_size);
size_t len = last - first;
@@ -352,6 +392,7 @@ struct llama_mmap::impl {
}
}
mapped_fragments = std::move(new_mapped_fragments);
+#endif
}
~impl() {
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment