cyb70289 · April 2, 2025 02:04
diff --git a/llama-model-interleave.diff b/llama-model-interleave.diff
 diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt
 index 9a4ee49..137e3ea 100644
 --- a/ggml/CMakeLists.txt
 +++ b/ggml/CMakeLists.txt
 @@ -337,6 +337,11 @@ set(GGML_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location of
 set(GGML_LIB_INSTALL_DIR     ${CMAKE_INSTALL_LIBDIR}     CACHE PATH "Location of library files")
 set(GGML_BIN_INSTALL_DIR     ${CMAKE_INSTALL_BINDIR}     CACHE PATH "Location of binary  files")
 
 +foreach(lib "ggml" "ggml-base")
 +    target_link_libraries(${lib} PUBLIC numa)
 +endforeach()
 +
 +
 configure_package_config_file(
         ${CMAKE_CURRENT_SOURCE_DIR}/cmake/ggml-config.cmake.in
         ${CMAKE_CURRENT_BINARY_DIR}/ggml-config.cmake
 diff --git a/src/llama-mmap.cpp b/src/llama-mmap.cpp
 index 3970b74..ae51025 100644
 --- a/src/llama-mmap.cpp
 +++ b/src/llama-mmap.cpp
 @@ -269,6 +269,9 @@ void llama_file::write_u32(uint32_t val) const { pimpl->write_u32(val); }
 
 // llama_mmap
 
 +#include <numa.h>
 +#include <numaif.h>
 +
 struct llama_mmap::impl {
 #ifdef _POSIX_MAPPED_FILES
     std::vector<std::pair<size_t, size_t>> mapped_fragments;
 @@ -285,6 +288,41 @@ struct llama_mmap::impl {
         }
         if (prefetch) { flags |= MAP_POPULATE; }
 #endif
 +#if 1
 +        // mmap the file as read-only
 +        void *mapped = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, 0);
 +        if (mapped == MAP_FAILED) {
 +            perror("mmap");
 +            exit(1);
 +        }
 +
 +        // Backup the current NUMA policy
 +        struct bitmask *old_policy = numa_get_interleave_mask();
 +        if (!old_policy) {
 +            fprintf(stderr, "Failed to get current NUMA interleave mask.\n");
 +            exit(1);
 +        }
 +
 +        // Set new interleave policy to nodes 0 and 1
 +        struct bitmask *new_policy = numa_allocate_nodemask();
 +        numa_bitmask_clearall(new_policy);
 +        numa_bitmask_setbit(new_policy, 0);
 +        numa_bitmask_setbit(new_policy, 1);
 +        numa_set_interleave_mask(new_policy);
 +
 +        // Explicitly populate pages by reading them once
 +        size_t pagesize = sysconf(_SC_PAGESIZE);
 +        volatile char sum = 0;  // volatile to avoid optimization
 +        for (off_t offset = 0; offset < size; offset += pagesize) {
 +            sum += *((volatile char *)mapped + offset); // Read a byte to trigger page fault
 +        }
 +
 +        printf("File pages populated with interleave policy on nodes 0 and 1.\n");
 +
 +        // Restore original NUMA policy
 +        numa_set_interleave_mask(old_policy);
 +        addr = mapped;
 +#else
         addr = mmap(NULL, file->size(), PROT_READ, flags, fd, 0);
         if (addr == MAP_FAILED) {
             throw std::runtime_error(format("mmap failed: %s", strerror(errno)));
 @@ -304,6 +342,7 @@ struct llama_mmap::impl {
         }
 
         mapped_fragments.emplace_back(0, file->size());
 +#endif
     }
 
     static void align_range(size_t * first, size_t * last, size_t page_size) {
 @@ -319,6 +358,7 @@ struct llama_mmap::impl {
     }
 
     void unmap_fragment(size_t first, size_t last) {
 +#if 0
         int page_size = sysconf(_SC_PAGESIZE);
         align_range(&first, &last, page_size);
         size_t len = last - first;
 @@ -352,6 +392,7 @@ struct llama_mmap::impl {
             }
         }
         mapped_fragments = std::move(new_mapped_fragments);
 +#endif
     }
 
     ~impl() {
	diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt
	index 9a4ee49..137e3ea 100644
	--- a/ggml/CMakeLists.txt
	+++ b/ggml/CMakeLists.txt
	@@ -337,6 +337,11 @@ set(GGML_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location of
	set(GGML_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files")
	set(GGML_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files")

	+foreach(lib "ggml" "ggml-base")
	+ target_link_libraries(${lib} PUBLIC numa)
	+endforeach()
	+
	+
	configure_package_config_file(
	${CMAKE_CURRENT_SOURCE_DIR}/cmake/ggml-config.cmake.in
	${CMAKE_CURRENT_BINARY_DIR}/ggml-config.cmake
	diff --git a/src/llama-mmap.cpp b/src/llama-mmap.cpp
	index 3970b74..ae51025 100644
	--- a/src/llama-mmap.cpp
	+++ b/src/llama-mmap.cpp
	@@ -269,6 +269,9 @@ void llama_file::write_u32(uint32_t val) const { pimpl->write_u32(val); }

	// llama_mmap

	+#include <numa.h>
	+#include <numaif.h>
	+
	struct llama_mmap::impl {
	#ifdef _POSIX_MAPPED_FILES
	std::vector<std::pair<size_t, size_t>> mapped_fragments;
	@@ -285,6 +288,41 @@ struct llama_mmap::impl {
	}
	if (prefetch) { flags \|= MAP_POPULATE; }
	#endif
	+#if 1
	+ // mmap the file as read-only
	+ void *mapped = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, 0);
	+ if (mapped == MAP_FAILED) {
	+ perror("mmap");
	+ exit(1);
	+ }
	+
	+ // Backup the current NUMA policy
	+ struct bitmask *old_policy = numa_get_interleave_mask();
	+ if (!old_policy) {
	+ fprintf(stderr, "Failed to get current NUMA interleave mask.\n");
	+ exit(1);
	+ }
	+
	+ // Set new interleave policy to nodes 0 and 1
	+ struct bitmask *new_policy = numa_allocate_nodemask();
	+ numa_bitmask_clearall(new_policy);
	+ numa_bitmask_setbit(new_policy, 0);
	+ numa_bitmask_setbit(new_policy, 1);
	+ numa_set_interleave_mask(new_policy);
	+
	+ // Explicitly populate pages by reading them once
	+ size_t pagesize = sysconf(_SC_PAGESIZE);
	+ volatile char sum = 0; // volatile to avoid optimization
	+ for (off_t offset = 0; offset < size; offset += pagesize) {
	+ sum += ((volatile char )mapped + offset); // Read a byte to trigger page fault
	+ }
	+
	+ printf("File pages populated with interleave policy on nodes 0 and 1.\n");
	+
	+ // Restore original NUMA policy
	+ numa_set_interleave_mask(old_policy);
	+ addr = mapped;
	+#else
	addr = mmap(NULL, file->size(), PROT_READ, flags, fd, 0);
	if (addr == MAP_FAILED) {
	throw std::runtime_error(format("mmap failed: %s", strerror(errno)));
	@@ -304,6 +342,7 @@ struct llama_mmap::impl {
	}

	mapped_fragments.emplace_back(0, file->size());
	+#endif
	}

	static void align_range(size_t * first, size_t * last, size_t page_size) {
	@@ -319,6 +358,7 @@ struct llama_mmap::impl {
	}

	void unmap_fragment(size_t first, size_t last) {
	+#if 0
	int page_size = sysconf(_SC_PAGESIZE);
	align_range(&first, &last, page_size);
	size_t len = last - first;
	@@ -352,6 +392,7 @@ struct llama_mmap::impl {
	}
	}
	mapped_fragments = std::move(new_mapped_fragments);
	+#endif
	}

	~impl() {