Created
January 1, 2011 20:53
-
-
Save karthick18/762003 to your computer and use it in GitHub Desktop.
A proof of concept contig alloc using mmap that reserves a vma for the process with MAP_FIXED (PROT_READ) and then faults in the vma hole space on demand through the sigsegv handler.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* A simple on-demand based mmap alloc-only proof-of-concept to just check out the possibilities of having a | |
* single giant MAP_FIXED allocation for a process that may not need to call free at all and just work with a | |
* contiguous allocation space | |
* | |
* A mmap contiguous hole space is reserved with a PROT_READ and then individual pages overridden with | |
* PROT_WRITE on page faults in sigsegv handler which mmaps the faulting address space. | |
* | |
* As a test, a 1 GIG file is created and then read into the memory allocated from the contiguous vma which is a | |
* brain-dead linear allocator from the address space as there are no requirements to free memory here. | |
* There are no calls to malloc and the application address space is contiguous and it appears as if it has | |
* infinite memory bounds only restricted by ram. This could be used to suck in large files pretty easily in C | |
* and for a moment I think that this is even simpler than using an alternate io_cache that I had written sometime back: | |
* | |
* https://github.com/karthick18/io_cache | |
* | |
* NOTE: If you want to compile and run the below gist, please copy the list.h file from my io_cache project whose | |
* project link is mentioned above. | |
* | |
*/ | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <assert.h> | |
#include <sys/ucontext.h> | |
#include <sys/mman.h> | |
#include <errno.h> | |
#include <fcntl.h> | |
#include <unistd.h> | |
#include <signal.h> | |
#include <sys/stat.h> | |
#include "list.h" | |
/* | |
* 1 gig contiguous vma to mmap on-demand + add anything extra or page aligned to the hole | |
*/ | |
#define MMAP_MAX_HOLE_SIZE ( (1<<30L) /* + (32<<10L) */ ) | |
#undef MIN | |
#define MIN(a,b) ( (a) < (b) ? (a) : (b) ) | |
#define __MALLOC(bytes) ( (bytes) > vma_hole->vm_end - (unsigned long)mem_space ? NULL :\ | |
( mem_space += (bytes), mem_space-bytes) ) | |
struct vm_area_struct | |
{ | |
unsigned long vm_start; | |
unsigned long vm_end; | |
struct list_head list; | |
}; | |
static DECLARE_LIST_HEAD(vm_area_list); | |
static struct sigaction oldact; | |
static struct vm_area_struct *vma_hole = NULL; /*the start of this VMA would be a contiguous VMA*/ | |
static char *mem_space; /*contiguous mem space*/ | |
static unsigned long hole_max = 0; | |
static int pagesize, pagemask; | |
static const int blocksize = 1 << 20U; | |
static int vm_addr_cmp(struct vm_area_struct *ref, struct vm_area_struct *element) | |
{ | |
if(ref->vm_start > element->vm_start) | |
return 1; | |
if(ref->vm_start < element->vm_start) | |
return -1; | |
return 0; | |
} | |
static void alloc_vma(unsigned long start, unsigned long end) | |
{ | |
struct vm_area_struct *vma = calloc(1, sizeof(*vma)); | |
assert(vma); | |
vma->vm_start = start; | |
vma->vm_end = end; | |
list_sort_add(&vma->list, &vm_area_list, struct vm_area_struct, list, vm_addr_cmp); | |
} | |
static void setup_mmap(unsigned long addr, unsigned long size, unsigned long hole_max) | |
{ | |
char *map; | |
map = mmap((void*)addr, hole_max, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); | |
if(map == MAP_FAILED) | |
{ | |
perror("mmap:"); | |
assert(0); /*crash the process as this just a test and the reserve mmap has to succeed*/ | |
} | |
map = mmap((void*)addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); | |
if(map == MAP_FAILED) | |
{ | |
perror("mmap:"); | |
} | |
else | |
{ | |
printf("Fixed mmap successful for addr [%p], map [%p], [%ld] bytes\n", (void*)addr, map, size); | |
/* | |
* Just fire a test write to the overridden mmap space. | |
*/ | |
memset(map, 0xaa, size); | |
} | |
} | |
/* | |
* Pick up the max VMA from the existing data vmas for using it as the mmap or allocation space | |
* for the process. | |
*/ | |
static void setup_mmap_hole(void) | |
{ | |
register struct list_head *iter; | |
struct vm_area_struct *last_vma = NULL; | |
struct vm_area_struct *vma = NULL; | |
list_for_each(iter, &vm_area_list) | |
{ | |
vma = list_entry(iter, struct vm_area_struct, list); | |
if(last_vma) | |
{ | |
unsigned long hole = vma->vm_start - last_vma->vm_end; | |
if(hole > hole_max) | |
{ | |
hole_max = hole; | |
vma_hole = last_vma; | |
} | |
} | |
else | |
vma_hole = vma; | |
last_vma = vma; | |
} | |
assert(vma_hole && hole_max); | |
hole_max = MIN(MMAP_MAX_HOLE_SIZE, hole_max); | |
vma = calloc(1, sizeof(*vma)); | |
assert(vma); | |
vma->vm_start = vma_hole->vm_end; | |
vma->vm_end = vma->vm_start + hole_max; | |
mem_space = (char*)vma->vm_start; | |
vma_hole = vma; | |
printf("Testing with mmapping VMAs at [%#lx - %#lx], size [%#lx] bytes\n", vma_hole->vm_start, vma_hole->vm_end, hole_max); | |
setup_mmap(vma_hole->vm_start, MIN(1<<20U, hole_max), hole_max); /*just map a max of 1 mb to start with and fault in others later*/ | |
/* | |
* Uncomment to test faults on the mmap writes to the entire address space. | |
if(hole_max > (1<<20U)) | |
memset((void*)(vma_hole->vm_start + (1<<20U)), 0xaa, hole_max-(1<<20U)); //force sigsegvs and remap from there | |
*/ | |
} | |
static void setup_vma(void) | |
{ | |
register struct list_head *iter; | |
struct vm_area_struct *vma; | |
list_for_each(iter, &vm_area_list) | |
{ | |
vma = list_entry(iter, struct vm_area_struct, list); | |
printf("VMA added [%#lx - %#lx]\n", vma->vm_start, vma->vm_end); | |
} | |
if(!LIST_EMPTY(&vm_area_list)) | |
setup_mmap_hole(); | |
} | |
static int read_maps(void) | |
{ | |
char filename[0xff+1]; | |
char buffer[0xff+1]; | |
FILE *fptr; | |
unsigned long start, end; | |
char perm[5], prog[40]; | |
snprintf(filename, sizeof(filename), "/proc/%d/maps", getpid()); | |
fptr = fopen(filename, "r"); | |
assert(fptr); | |
while(fgets(buffer, sizeof(buffer), fptr)) | |
{ | |
buffer[strlen(buffer)-1] = 0; | |
if(sscanf(buffer, "%lx-%lx %s %*s %*d:%*d %*d %s", | |
&start, &end, perm, prog) != 4) | |
continue; | |
/*printf("addr start [%#lx], end [%#lx], perm [%s], prog %s\n", start, end, perm, prog);*/ | |
if(!strncmp(perm, "rw", 2) | |
&& | |
!strstr(prog, "stack")) | |
alloc_vma(start, end); | |
} | |
fclose(fptr); | |
setup_vma(); | |
return 0; | |
} | |
/* | |
* IF our managed mmap contiguous vma hole take a page fault, | |
* then we have good control to swap out any old mmap chunks to a flat file db (berkeley/whatever) after munmapping | |
* and then mmap the faulting address to restrict the memory size of the process (if you don't believe in kernel swapping out chunks) | |
* In mobile/embedded world where there is no swap, it makes the usage of our own disk-based swap all the more feasible. | |
* Maybe this is what the greatest programmer in the WORLD: John Carmack implemented for RAGE (mega-textures) :) | |
* Got some hints from a tweet of his which is the inspiration for this proof-of-concept. | |
*/ | |
static void sigsegv_handler(int sig, siginfo_t *info, void *param) | |
{ | |
unsigned long fault_addr = (unsigned long)info->si_addr; | |
printf("Faulting instruction [%#lx]\n", fault_addr); | |
if(fault_addr >= vma_hole->vm_start | |
&& | |
fault_addr + pagesize <= vma_hole->vm_end) | |
{ | |
int chunksize = pagesize; | |
char *map; | |
fault_addr &= ~pagemask; /*page align the fault addr*/ | |
/* | |
* use chunksize mmap as a test. as this could be any contiguous chunk within our contiguous addr space. | |
* fall back to page size for the last bits. | |
*/ | |
if(fault_addr + blocksize <= vma_hole->vm_end) | |
chunksize = blocksize; | |
map = mmap((void*)fault_addr, chunksize, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); | |
if(map == MAP_FAILED) | |
{ | |
perror("mmap sigsegv:"); | |
goto out_restore; | |
} | |
printf("Faulting address [%#lx] mapped of size [%d] bytes successfully\n", fault_addr, chunksize); | |
return; /*restart the page fault that triggered the sigsegv*/ | |
} | |
out_restore: | |
sigaction(SIGSEGV, &oldact, NULL); | |
raise(sig); | |
} | |
static int test_file_read(void) | |
{ | |
int fd; | |
int err = -1; | |
char tempfile[40] = "/tmp/map_file_XXXXXX"; | |
char cmdbuf[0xff+1]; | |
unsigned long count, blocks; | |
struct stat stbuf; | |
size_t filesize; | |
const char *suffix; | |
fd = mkstemp(tempfile); | |
if(fd < 0) | |
{ | |
perror("mkstemp:"); | |
goto out; | |
} | |
close(fd); | |
if( (hole_max & (blocksize-1)) ) | |
{ | |
/* | |
* it HAS To be page aligned. | |
*/ | |
assert(!(hole_max & pagemask)); | |
count = hole_max / pagesize; | |
blocks = pagesize >> 10; | |
suffix = "K"; | |
} | |
else | |
{ | |
blocks = (blocksize >> 20); | |
count = hole_max / blocksize; | |
suffix = "M"; | |
} | |
snprintf(cmdbuf, sizeof(cmdbuf), "dd if=/dev/zero of=%s bs=%ld%s count=%ld", tempfile, | |
blocks, suffix, count); | |
printf("Running command [\"%s\"] to create a test file for reading\n", | |
cmdbuf); | |
if( (err = system(cmdbuf) ) < 0) | |
{ | |
perror("system:"); | |
goto out_unlink; | |
} | |
if(stat(tempfile, &stbuf) < 0) | |
{ | |
perror("stat:"); | |
goto out_unlink; | |
} | |
filesize = stbuf.st_size; | |
assert(filesize == hole_max); | |
fd = open(tempfile, O_RDWR, 0600); | |
if(fd < 0) | |
{ | |
perror("open:"); | |
goto out_unlink; | |
} | |
printf("Reading [%ld] blocks from file\n", count); | |
count = 0; | |
do | |
{ | |
int bytes; | |
char *buf; | |
int chunksize = blocksize; | |
if(filesize < blocksize) | |
chunksize = pagesize; | |
chunksize = MIN(chunksize, filesize); | |
buf = __MALLOC(chunksize); | |
assert(buf); | |
/* | |
* Uncomment below line to avoid the read errors with EFAULT which is a bit more descriptive in the way, | |
* the reads are faulted in, if a user is reading the code. | |
*/ | |
//*buf = 0; | |
restart_read: | |
bytes = read(fd, buf, chunksize); | |
if(bytes <= 0) | |
{ | |
if(errno == EFAULT) | |
{ | |
/* | |
* force a sigsegv. Would have been glad had it taken a pagefault directly from read syscall | |
*/ | |
*buf = 0; | |
printf("Page fault resolved at buf [%p]\n", buf); | |
goto restart_read; | |
} | |
perror("read:"); | |
goto out_close; | |
} | |
printf("Read block [%ld], bytes [%d] from file [%s]\n", (hole_max - filesize) >> 20, bytes, tempfile); | |
filesize -= bytes; | |
} while(filesize > 0); | |
err = 0; | |
out_close: | |
close(fd); | |
out_unlink: | |
printf("Unlinking temp file [%s]\n", tempfile); | |
unlink(tempfile); | |
out: | |
return err; | |
} | |
int main(int argc, char **argv) | |
{ | |
struct sigaction act; | |
int err; | |
pagesize = sysconf(_SC_PAGESIZE); | |
pagemask = pagesize-1; | |
memset(&act, 0, sizeof(act)); | |
sigemptyset(&act.sa_mask); | |
act.sa_sigaction = sigsegv_handler; | |
act.sa_flags = SA_RESTART | SA_SIGINFO; | |
err = sigaction(SIGSEGV, &act, &oldact); | |
assert(err == 0); | |
read_maps(); | |
test_file_read(); | |
return 0; | |
} | |
/* | |
* Local variables: | |
* c-file-style: "linux" | |
* compile-command: "gcc -Wall -g -o contig_alloc contig_alloc.c" | |
* tab-width: 4 | |
* c-basic-offset: 4 | |
* End: | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment