Skip to content

Instantly share code, notes, and snippets.

@karthick18
Created January 1, 2011 20:53
Show Gist options
  • Save karthick18/762003 to your computer and use it in GitHub Desktop.
Save karthick18/762003 to your computer and use it in GitHub Desktop.
A proof of concept contig alloc using mmap that reserves a vma for the process with MAP_FIXED (PROT_READ) and then faults in the vma hole space on demand through the sigsegv handler.
/*
* A simple on-demand based mmap alloc-only proof-of-concept to just check out the possibilities of having a
* single giant MAP_FIXED allocation for a process that may not need to call free at all and just work with a
* contiguous allocation space
*
* A mmap contiguous hole space is reserved with a PROT_READ and then individual pages overridden with
* PROT_WRITE on page faults in sigsegv handler which mmaps the faulting address space.
*
* As a test, a 1 GIG file is created and then read into the memory allocated from the contiguous vma which is a
* brain-dead linear allocator from the address space as there are no requirements to free memory here.
* There are no calls to malloc and the application address space is contiguous and it appears as if it has
* infinite memory bounds only restricted by ram. This could be used to suck in large files pretty easily in C
* and for a moment I think that this is even simpler than using an alternate io_cache that I had written sometime back:
*
* https://github.com/karthick18/io_cache
*
* NOTE: If you want to compile and run the below gist, please copy the list.h file from my io_cache project whose
* project link is mentioned above.
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <sys/ucontext.h>
#include <sys/mman.h>
#include <errno.h>
#include <fcntl.h>
#include <unistd.h>
#include <signal.h>
#include <sys/stat.h>
#include "list.h"
/*
* 1 gig contiguous vma to mmap on-demand + add anything extra or page aligned to the hole
*/
#define MMAP_MAX_HOLE_SIZE ( (1<<30L) /* + (32<<10L) */ )
#undef MIN
#define MIN(a,b) ( (a) < (b) ? (a) : (b) )
#define __MALLOC(bytes) ( (bytes) > vma_hole->vm_end - (unsigned long)mem_space ? NULL :\
( mem_space += (bytes), mem_space-bytes) )
struct vm_area_struct
{
unsigned long vm_start;
unsigned long vm_end;
struct list_head list;
};
static DECLARE_LIST_HEAD(vm_area_list);
static struct sigaction oldact;
static struct vm_area_struct *vma_hole = NULL; /*the start of this VMA would be a contiguous VMA*/
static char *mem_space; /*contiguous mem space*/
static unsigned long hole_max = 0;
static int pagesize, pagemask;
static const int blocksize = 1 << 20U;
static int vm_addr_cmp(struct vm_area_struct *ref, struct vm_area_struct *element)
{
if(ref->vm_start > element->vm_start)
return 1;
if(ref->vm_start < element->vm_start)
return -1;
return 0;
}
static void alloc_vma(unsigned long start, unsigned long end)
{
struct vm_area_struct *vma = calloc(1, sizeof(*vma));
assert(vma);
vma->vm_start = start;
vma->vm_end = end;
list_sort_add(&vma->list, &vm_area_list, struct vm_area_struct, list, vm_addr_cmp);
}
static void setup_mmap(unsigned long addr, unsigned long size, unsigned long hole_max)
{
char *map;
map = mmap((void*)addr, hole_max, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
if(map == MAP_FAILED)
{
perror("mmap:");
assert(0); /*crash the process as this just a test and the reserve mmap has to succeed*/
}
map = mmap((void*)addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
if(map == MAP_FAILED)
{
perror("mmap:");
}
else
{
printf("Fixed mmap successful for addr [%p], map [%p], [%ld] bytes\n", (void*)addr, map, size);
/*
* Just fire a test write to the overridden mmap space.
*/
memset(map, 0xaa, size);
}
}
/*
* Pick up the max VMA from the existing data vmas for using it as the mmap or allocation space
* for the process.
*/
static void setup_mmap_hole(void)
{
register struct list_head *iter;
struct vm_area_struct *last_vma = NULL;
struct vm_area_struct *vma = NULL;
list_for_each(iter, &vm_area_list)
{
vma = list_entry(iter, struct vm_area_struct, list);
if(last_vma)
{
unsigned long hole = vma->vm_start - last_vma->vm_end;
if(hole > hole_max)
{
hole_max = hole;
vma_hole = last_vma;
}
}
else
vma_hole = vma;
last_vma = vma;
}
assert(vma_hole && hole_max);
hole_max = MIN(MMAP_MAX_HOLE_SIZE, hole_max);
vma = calloc(1, sizeof(*vma));
assert(vma);
vma->vm_start = vma_hole->vm_end;
vma->vm_end = vma->vm_start + hole_max;
mem_space = (char*)vma->vm_start;
vma_hole = vma;
printf("Testing with mmapping VMAs at [%#lx - %#lx], size [%#lx] bytes\n", vma_hole->vm_start, vma_hole->vm_end, hole_max);
setup_mmap(vma_hole->vm_start, MIN(1<<20U, hole_max), hole_max); /*just map a max of 1 mb to start with and fault in others later*/
/*
* Uncomment to test faults on the mmap writes to the entire address space.
if(hole_max > (1<<20U))
memset((void*)(vma_hole->vm_start + (1<<20U)), 0xaa, hole_max-(1<<20U)); //force sigsegvs and remap from there
*/
}
static void setup_vma(void)
{
register struct list_head *iter;
struct vm_area_struct *vma;
list_for_each(iter, &vm_area_list)
{
vma = list_entry(iter, struct vm_area_struct, list);
printf("VMA added [%#lx - %#lx]\n", vma->vm_start, vma->vm_end);
}
if(!LIST_EMPTY(&vm_area_list))
setup_mmap_hole();
}
static int read_maps(void)
{
char filename[0xff+1];
char buffer[0xff+1];
FILE *fptr;
unsigned long start, end;
char perm[5], prog[40];
snprintf(filename, sizeof(filename), "/proc/%d/maps", getpid());
fptr = fopen(filename, "r");
assert(fptr);
while(fgets(buffer, sizeof(buffer), fptr))
{
buffer[strlen(buffer)-1] = 0;
if(sscanf(buffer, "%lx-%lx %s %*s %*d:%*d %*d %s",
&start, &end, perm, prog) != 4)
continue;
/*printf("addr start [%#lx], end [%#lx], perm [%s], prog %s\n", start, end, perm, prog);*/
if(!strncmp(perm, "rw", 2)
&&
!strstr(prog, "stack"))
alloc_vma(start, end);
}
fclose(fptr);
setup_vma();
return 0;
}
/*
* IF our managed mmap contiguous vma hole take a page fault,
* then we have good control to swap out any old mmap chunks to a flat file db (berkeley/whatever) after munmapping
* and then mmap the faulting address to restrict the memory size of the process (if you don't believe in kernel swapping out chunks)
* In mobile/embedded world where there is no swap, it makes the usage of our own disk-based swap all the more feasible.
* Maybe this is what the greatest programmer in the WORLD: John Carmack implemented for RAGE (mega-textures) :)
* Got some hints from a tweet of his which is the inspiration for this proof-of-concept.
*/
static void sigsegv_handler(int sig, siginfo_t *info, void *param)
{
unsigned long fault_addr = (unsigned long)info->si_addr;
printf("Faulting instruction [%#lx]\n", fault_addr);
if(fault_addr >= vma_hole->vm_start
&&
fault_addr + pagesize <= vma_hole->vm_end)
{
int chunksize = pagesize;
char *map;
fault_addr &= ~pagemask; /*page align the fault addr*/
/*
* use chunksize mmap as a test. as this could be any contiguous chunk within our contiguous addr space.
* fall back to page size for the last bits.
*/
if(fault_addr + blocksize <= vma_hole->vm_end)
chunksize = blocksize;
map = mmap((void*)fault_addr, chunksize, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
if(map == MAP_FAILED)
{
perror("mmap sigsegv:");
goto out_restore;
}
printf("Faulting address [%#lx] mapped of size [%d] bytes successfully\n", fault_addr, chunksize);
return; /*restart the page fault that triggered the sigsegv*/
}
out_restore:
sigaction(SIGSEGV, &oldact, NULL);
raise(sig);
}
static int test_file_read(void)
{
int fd;
int err = -1;
char tempfile[40] = "/tmp/map_file_XXXXXX";
char cmdbuf[0xff+1];
unsigned long count, blocks;
struct stat stbuf;
size_t filesize;
const char *suffix;
fd = mkstemp(tempfile);
if(fd < 0)
{
perror("mkstemp:");
goto out;
}
close(fd);
if( (hole_max & (blocksize-1)) )
{
/*
* it HAS To be page aligned.
*/
assert(!(hole_max & pagemask));
count = hole_max / pagesize;
blocks = pagesize >> 10;
suffix = "K";
}
else
{
blocks = (blocksize >> 20);
count = hole_max / blocksize;
suffix = "M";
}
snprintf(cmdbuf, sizeof(cmdbuf), "dd if=/dev/zero of=%s bs=%ld%s count=%ld", tempfile,
blocks, suffix, count);
printf("Running command [\"%s\"] to create a test file for reading\n",
cmdbuf);
if( (err = system(cmdbuf) ) < 0)
{
perror("system:");
goto out_unlink;
}
if(stat(tempfile, &stbuf) < 0)
{
perror("stat:");
goto out_unlink;
}
filesize = stbuf.st_size;
assert(filesize == hole_max);
fd = open(tempfile, O_RDWR, 0600);
if(fd < 0)
{
perror("open:");
goto out_unlink;
}
printf("Reading [%ld] blocks from file\n", count);
count = 0;
do
{
int bytes;
char *buf;
int chunksize = blocksize;
if(filesize < blocksize)
chunksize = pagesize;
chunksize = MIN(chunksize, filesize);
buf = __MALLOC(chunksize);
assert(buf);
/*
* Uncomment below line to avoid the read errors with EFAULT which is a bit more descriptive in the way,
* the reads are faulted in, if a user is reading the code.
*/
//*buf = 0;
restart_read:
bytes = read(fd, buf, chunksize);
if(bytes <= 0)
{
if(errno == EFAULT)
{
/*
* force a sigsegv. Would have been glad had it taken a pagefault directly from read syscall
*/
*buf = 0;
printf("Page fault resolved at buf [%p]\n", buf);
goto restart_read;
}
perror("read:");
goto out_close;
}
printf("Read block [%ld], bytes [%d] from file [%s]\n", (hole_max - filesize) >> 20, bytes, tempfile);
filesize -= bytes;
} while(filesize > 0);
err = 0;
out_close:
close(fd);
out_unlink:
printf("Unlinking temp file [%s]\n", tempfile);
unlink(tempfile);
out:
return err;
}
int main(int argc, char **argv)
{
struct sigaction act;
int err;
pagesize = sysconf(_SC_PAGESIZE);
pagemask = pagesize-1;
memset(&act, 0, sizeof(act));
sigemptyset(&act.sa_mask);
act.sa_sigaction = sigsegv_handler;
act.sa_flags = SA_RESTART | SA_SIGINFO;
err = sigaction(SIGSEGV, &act, &oldact);
assert(err == 0);
read_maps();
test_file_read();
return 0;
}
/*
* Local variables:
* c-file-style: "linux"
* compile-command: "gcc -Wall -g -o contig_alloc contig_alloc.c"
* tab-width: 4
* c-basic-offset: 4
* End:
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment