Created
December 28, 2010 23:51
-
-
Save karthick18/757928 to your computer and use it in GitHub Desktop.
Another variant of the last gist: 757086 but using mmap to write to backing storage over write to try and create a kernel panic in __mark_inode_dirty: fs-writeback.c :978 when USB device with ext4 is removed while the writes are targeted to its backing st
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* The below code is an effort to reproduce a kernel panic in __mark_inode_dirty triggered by a USB device pull | |
* while in the middle of writes to the disk. The issue is _consistently_ reproducible with our product runtime | |
* and log directories mount-binded to the USB drive and ejecting the USB drive while they are running. | |
* My 4 and a half yr old daughter has to be credited with reproducing the issue since I first observed the | |
* panic when she mercilessly pulled the USB cable while I was working :) After that, I have been trying hard | |
* to isolate the issue outside our product code without much success. | |
* The below code is an effort to reproduce an ext4 uninterruptibe task hang by trying | |
* to create many mmapped files from child processes logging to a USB backing storage and forcefully ejecting | |
* the USB drive in the middle of such writes. This code is a variant of another unsuccessful kernel-panic test code | |
* also in my github gist: https://gist.github.com/757086 that just uses write over mmap to write to the backing storage | |
* The panic is a result of the inodes backing device marker becoming NULL for superblock type:ext4 for DIRTY inodes. | |
* The NULL bdi pointer crashes the 2.6.36-rc8 kernel in __mark_inode_dirty: fs/fs-writeback.c: 978. | |
* But this test-code successfully reproduces the hung task warnings (> 120 seconds uninterruptible task hangs) | |
* after pulling the USB drive while in the middle of the writes. Invariably 3-4 child processes writing to the backing | |
* storage end up UNINTERRUPTIBLE state with the kernel dumping the backtrace of the tasks every 120 seconds. | |
* The uninterruptible hangs show a lockup in the ext4 sync page code-path associated with the journalled commits. | |
* To be run as: ./crash_kernel -c <num_log_files_to_be_created> -s <each_log_file_size> | |
* Example run: ./crash_kernel -c 10 -s 25m , | |
* would create 10 files with 10 log flusher processes flushing from its | |
* shared memory to backing storage mount binded to USB. Then eject the USB drive, Ctrl-C the program. The files would | |
* be created in /root/crash_test where /root is expected to be mount-binded to a USB drive. You can change the | |
* USB_DIR define below to your USB mount point. | |
* Do a "ps" and check for crash_kernel child processes entering "D" or uninterruptible state FOREVER thus creating | |
* uninterruptible task hung backtraces every 120 seconds. | |
* Don't believe a git pull might resolve the issue even though I am running a slightly old kernel(2.6.36-rc8) | |
* than the current since I don't see fixes/changes to ext4/fs-writeback.c in the latest. | |
* Note that I could have just used a series of "dd" but that does reproduce anything. The effort was to try and simulate | |
* a similar timing condition that seems to be ALWAYS reproduced when running our products applications having their log | |
* and runtime directories mount-binded to the USB drive. | |
*/ | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <stdarg.h> | |
#include <assert.h> | |
#include <unistd.h> | |
#include <sys/mman.h> | |
#include <fcntl.h> | |
#include <getopt.h> | |
#include <sys/stat.h> | |
#include <pthread.h> | |
#include <ctype.h> | |
#include <string.h> | |
#include <semaphore.h> | |
#include <time.h> | |
#include <signal.h> | |
#include <sys/wait.h> | |
#include <errno.h> | |
#undef MIN | |
#define MIN(a, b) ( (a) < (b) ? (a) : (b) ) | |
#define USB_DIR "/root" | |
#define FLUSH_DIR "crash_test" | |
#define SHM_SEGMENT_SIZE (1 << 20) | |
#define SHM_SEGMENT_FLUSH_RATE (20) | |
#define LOG_FILE_SIZE_DEFAULT (50LL << 20) /* 50 mb */ | |
#define LOG_FILES_DEFAULT 0x5 | |
#define MAX_ROTATIONS (0x4) /* power of 2 */ | |
#define ROTATION_MASK (MAX_ROTATIONS - 1) | |
#define SHM_WRITE_SIZE (SHM_SEGMENT_SIZE - sizeof(struct log_shm_ctl)) | |
struct log_shm_ctl | |
{ | |
sem_t sem; | |
int read_index; | |
int write_index; | |
} __attribute__ ((aligned(8))); | |
static volatile int exitme; | |
static char crash_dir[0xff+1]; | |
static unsigned long long filesize = LOG_FILE_SIZE_DEFAULT; | |
static int verbose; | |
#define LDEBUG(fmt, arg...) do { if(__builtin_expect(verbose, 0)) printf(fmt, ##arg); } while(0) | |
#ifdef SEM_RETRY /* required to test sem_init or futex initializes with non-process shared flag used with processes :P */ | |
#define SEM_WAIT(sem) ({ \ | |
int __err = 0; \ | |
do { \ | |
struct timespec __t; \ | |
clock_gettime(CLOCK_REALTIME, &__t); \ | |
__t.tv_sec += 1; \ | |
__err = sem_timedwait((sem), &__t); \ | |
} while (__err && ( errno == EINTR || errno == ETIMEDOUT)); \ | |
__err; \ | |
}) | |
#else | |
#define SEM_WAIT(sem) sem_wait(sem) | |
#endif | |
#define SEM_POST(sem) sem_post(sem) | |
static void sigint_handler(int sig) | |
{ | |
exitme = 1; | |
} | |
/* | |
* We halt when 1 byte of space is remaining as an indicator to throttle the writer or mark space_full | |
*/ | |
static int space_free(struct log_shm_ctl *ctl) | |
{ | |
if(ctl->read_index == ctl->write_index) | |
return SHM_WRITE_SIZE-1; | |
return (( ctl->read_index + SHM_WRITE_SIZE - ctl->write_index) % SHM_WRITE_SIZE) - 1; | |
} | |
/* | |
* Called with shm lock held. | |
*/ | |
static void log_shm_write(struct log_shm_ctl *ctl, const char *fmt, ...) | |
{ | |
char buf[0xff+1]; | |
int len; | |
int space; | |
char *data; | |
va_list ptr; | |
int pid = getpid(); | |
int err; | |
va_start(ptr, fmt); | |
len = vsnprintf(buf, sizeof(buf), fmt, ptr); | |
va_end(ptr); | |
data = (char*)(ctl+1); | |
/* | |
* Block till space is available. | |
*/ | |
while(!exitme && !(space=space_free(ctl))) | |
{ | |
SEM_POST(&ctl->sem); | |
usleep(1000); | |
LDEBUG("Log writer process [%d] going to grab the lock in shm_write as its full\n", pid); | |
err = SEM_WAIT(&ctl->sem); | |
if(!err) | |
LDEBUG("Log writer process [%d] grabbed the lock in shm_write as its full\n", pid); | |
else | |
LDEBUG("Log writer process [%d] failed to grab the lock in shm_write with error [%s]\n", | |
pid, strerror(errno)); | |
} | |
if(exitme) return; | |
len = MIN(len, space); | |
if(ctl->write_index >= ctl->read_index) | |
len = MIN(len, SHM_WRITE_SIZE - ctl->write_index); | |
else | |
len = MIN(len, ctl->read_index - ctl->write_index - 1); | |
memcpy(data + ctl->write_index, buf, len); | |
data[ctl->write_index + len - 1] = '\n' ; /*truncate with a newline*/ | |
ctl->write_index += len; | |
if(ctl->write_index == ctl->read_index) | |
ctl->write_index = 0; | |
} | |
/* | |
* Called with shm lock held. | |
*/ | |
static void log_shm_read(struct log_shm_ctl *ctl, char *data, int *size) | |
{ | |
int count; | |
int max_size = *size; | |
char *map = (char*)(ctl+1); | |
pid_t pid = getpid(); | |
int err; | |
while(!exitme && ctl->read_index == ctl->write_index) | |
{ | |
SEM_POST(&ctl->sem); | |
usleep(1000); | |
LDEBUG("Log flusher task [%d] going to grab the lock in shm read as shm is empty\n", pid); | |
err = SEM_WAIT(&ctl->sem); | |
if(!err) | |
LDEBUG("Log flusher task [%d] grabbed the lock in shm read as shm is empty\n", pid); | |
else | |
LDEBUG("Log flusher task [%d] failed to grab the lock in shm read with error [%s]\n", | |
pid, strerror(errno)); | |
} | |
if(ctl->read_index <= ctl->write_index) | |
count = MIN(max_size, ctl->write_index - ctl->read_index); | |
else | |
count = MIN(max_size, SHM_WRITE_SIZE - ctl->read_index); | |
memcpy(data, map + ctl->read_index, count); | |
ctl->read_index += count; | |
if(ctl->read_index == SHM_WRITE_SIZE) | |
ctl->read_index = 0; | |
*size = count; | |
} | |
static void do_write(char *filemap, unsigned long long offset, const char *buf, int bytes) | |
{ | |
memcpy(filemap + offset, buf, bytes); | |
} | |
static int flusher_task(int tid, int fd, char *filemap, unsigned long long filesize) | |
{ | |
int shm_fd; | |
char filename[0xff+1]; | |
int rotations = 0; | |
int cur_filesize = 0; | |
char *map; | |
struct log_shm_ctl *ctl; | |
static char *shared_data; | |
int count = 0; | |
pid_t pid = getpid(); | |
int err; | |
fprintf(stderr, "Log flusher task [%d] for tid [%d]\n", pid, tid); | |
snprintf(filename, sizeof(filename), "/CRASH_%d", tid); | |
shm_fd = shm_open(filename, O_RDWR, 0777); | |
if(shm_fd < 0) | |
{ | |
perror("shm_open:"); | |
goto out; | |
} | |
map = mmap(0, SHM_SEGMENT_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, 0); | |
if(map == MAP_FAILED) | |
{ | |
perror("mmap:"); | |
goto out_close; | |
} | |
snprintf(filename, sizeof(filename), "%s/%d", crash_dir, tid); | |
ctl = (struct log_shm_ctl*)map; | |
shared_data = calloc(1, SHM_SEGMENT_SIZE); | |
assert(shared_data); | |
SEM_WAIT(&ctl->sem); | |
while(!exitme) | |
{ | |
count = SHM_WRITE_SIZE; | |
log_shm_read(ctl, shared_data, &count); | |
if(!count) continue; | |
SEM_POST(&ctl->sem); /* drop the lock*/ | |
if(cur_filesize + count < filesize) | |
{ | |
do_write(filemap, cur_filesize, shared_data, count); | |
cur_filesize += count; | |
} | |
else | |
{ | |
register int i; | |
int write_size; | |
char fromfile[0xff+1]; | |
char tofile[0xff+1]; | |
write_size = (int)(filesize - cur_filesize); | |
do_write(filemap, cur_filesize, shared_data, write_size); | |
msync(filemap, filesize, MS_SYNC); | |
count -= write_size; | |
for(i = rotations - 1; i >= 0; --i) | |
{ | |
snprintf(fromfile, sizeof(fromfile), "%s/%d.%d", crash_dir, tid, i); | |
snprintf(tofile, sizeof(tofile), "%s/%d.%d", crash_dir, tid, i+1); | |
unlink(tofile); | |
rename(fromfile, tofile); | |
} | |
snprintf(fromfile, sizeof(fromfile), "%s/%d", crash_dir, tid); | |
snprintf(tofile, sizeof(tofile), "%s/%d.0", crash_dir, tid); | |
unlink(tofile); | |
rename(fromfile, tofile); | |
++rotations; | |
rotations &= ROTATION_MASK; | |
close(fd); | |
fd = open(filename, O_RDWR | O_CREAT | O_TRUNC, 0777); | |
if(fd < 0) | |
{ | |
perror("rotate open:"); | |
goto out_free; | |
} | |
if(ftruncate(fd, filesize) < 0) | |
{ | |
perror("rotate truncate:"); | |
goto out_free; | |
} | |
filemap = mmap(0, filesize, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); | |
if(filemap == MAP_FAILED) | |
{ | |
perror("rotate mmap:"); | |
goto out_free; | |
} | |
if(count > 0) | |
{ | |
do_write(filemap, 0, shared_data + write_size, count); | |
} | |
cur_filesize = count; | |
} | |
LDEBUG("Log flusher task [%d] going to grab the lock after shm write\n", pid); | |
err = SEM_WAIT(&ctl->sem); | |
if(!err) | |
LDEBUG("Log flusher task [%d] grabbed the lock after shm write\n", pid); | |
else | |
LDEBUG("Log flusher task [%d] failed to grab the lock after shm write with error [%s]\n", | |
pid, strerror(errno)); | |
} | |
SEM_POST(&ctl->sem); | |
out_free: | |
free(shared_data); | |
munmap(map, SHM_SEGMENT_SIZE); | |
out_close: | |
close(shm_fd); | |
out: | |
return 0; | |
} | |
static void *log_thread(void *arg) | |
{ | |
int tid = (int)(unsigned long)arg; | |
pid_t pid = getpid(); | |
int fd, target_fd; | |
char filename[0xff+1]; | |
char *map, *target_map; | |
register int i; | |
struct log_shm_ctl *ctl; | |
pid_t flusher_pid; | |
int err; | |
/* | |
* Mmap the shared segment and create a flusher thread. | |
*/ | |
snprintf(filename, sizeof(filename), "/CRASH_%d", tid); | |
fd = shm_open(filename, O_RDWR, 0777); | |
if(fd < 0) | |
{ | |
perror("shm_open:"); | |
goto out; | |
} | |
map = mmap(0, SHM_SEGMENT_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); | |
if(map == MAP_FAILED) | |
{ | |
perror("mmap:"); | |
goto out_close; | |
} | |
snprintf(filename, sizeof(filename), "%s/%d", crash_dir, tid); | |
target_fd = open(filename, O_RDWR | O_CREAT | O_TRUNC, 0777); | |
if(target_fd < 0) | |
{ | |
perror("target_fd open:"); | |
goto out_unmap; | |
} | |
if(ftruncate(target_fd, filesize) < 0) | |
{ | |
perror("target ftruncate:"); | |
goto out_close2; | |
} | |
target_map = mmap(0, filesize, PROT_READ | PROT_WRITE, MAP_SHARED, target_fd, 0); | |
if(target_map == MAP_FAILED) | |
{ | |
perror("target mmap:"); | |
goto out_close2; | |
} | |
ctl = (struct log_shm_ctl*)map; | |
err = sem_init(&ctl->sem, 1, 1); | |
if(err < 0) | |
{ | |
perror("sem_init:"); | |
assert(0); | |
} | |
ctl->read_index = ctl->write_index = 0; | |
SEM_WAIT(&ctl->sem); | |
/* | |
* test write and create flusher. | |
*/ | |
log_shm_write(ctl, "This is test write start\n"); | |
if( !(flusher_pid = fork() )) | |
{ | |
flusher_task(tid, target_fd, target_map, filesize); | |
if(target_map) | |
munmap(target_map, filesize); | |
if(target_fd >= 0) | |
close(target_fd); | |
exit(0); | |
} else signal(SIGCHLD, SIG_IGN); | |
while(!exitme) | |
{ | |
for(i = 0; i < SHM_SEGMENT_FLUSH_RATE; ++i) | |
log_shm_write(ctl, "This is test write %d to crash the kernel\n", i); | |
if(exitme) break; | |
SEM_POST(&ctl->sem); | |
usleep(1000); /* breather for the flusher to run */ | |
LDEBUG("Log writer [%d], flusher [%d] grabbing the lock before batch shm write\n", pid, flusher_pid); | |
err = SEM_WAIT(&ctl->sem); | |
if(!err) | |
LDEBUG("Log writer [%d], flusher [%d] grabbed the lock after batch shm write\n", pid, flusher_pid); | |
else | |
LDEBUG("Log writer [%d], flusher [%d] failed to grab the lock after batch shm write with error [%s]\n", | |
pid, flusher_pid, strerror(errno)); | |
} | |
SEM_POST(&ctl->sem); | |
waitpid(flusher_pid, NULL, 0); | |
sem_destroy(&ctl->sem); | |
munmap(target_map, filesize); | |
out_close2: | |
close(target_fd); | |
out_unmap: | |
munmap(map, SHM_SEGMENT_SIZE); | |
out_close: | |
close(fd); | |
out: | |
LDEBUG("Exiting log thread [%d]\n", tid); | |
return NULL; | |
} | |
static void crash_kernel(const char *usb_mnt, int files) | |
{ | |
register int i; | |
pthread_t *tids; | |
snprintf(crash_dir, sizeof(crash_dir), "%s/%s", USB_DIR, FLUSH_DIR); | |
if(!access(crash_dir, F_OK)) | |
{ | |
char cmdbuf[0xff+1]; | |
snprintf(cmdbuf, sizeof(cmdbuf), "rm -rf %s", crash_dir); | |
if(system(cmdbuf)) | |
perror("system: "); | |
} | |
if(mkdir(crash_dir, 0755)) | |
{ | |
perror("mkdir:"); | |
fprintf(stderr, "Unable to create crash test directory [%s]. Exiting ...\n", crash_dir); | |
exit(127); | |
} | |
for(i = 0; i < files; ++i) | |
{ | |
char filename[0xff+1]; | |
int fd; | |
snprintf(filename, sizeof(filename), "/CRASH_%d", i); | |
fd = shm_open(filename, O_CREAT | O_RDWR | O_TRUNC, 0777); | |
if(fd < 0) | |
{ | |
perror("shm_open:"); | |
assert(0); | |
} | |
if(ftruncate(fd, SHM_SEGMENT_SIZE) < 0) | |
{ | |
perror("ftruncate:"); | |
assert(0); | |
} | |
close(fd); | |
} | |
tids = calloc(files, sizeof(*tids)); | |
assert(tids); | |
for(i = 0; i < files; ++i) | |
{ | |
int err = pthread_create(&tids[i], NULL, log_thread, (void*)(unsigned long)i); | |
assert(err == 0); | |
} | |
for(i =0 ; i < files; ++i) | |
pthread_join(tids[i], NULL); | |
for(i = 0; i < files; ++i) | |
{ | |
char filename[0xff+1]; | |
snprintf(filename, sizeof(filename), "/CRASH_%d", i); | |
if(shm_unlink(filename)) | |
perror("shm_unlink:"); | |
} | |
} | |
static char *prog; | |
static volatile int usage(void) | |
{ | |
fprintf(stderr, "[%s] -s log_file_size -c num_log_files -v | verbose\n", prog); | |
exit(127); | |
} | |
static unsigned long long get_size(const char *s) | |
{ | |
unsigned long long size = 0; | |
int shift = 10; | |
char *last_char = NULL; | |
size = strtoull(s, &last_char, 10); | |
if(*last_char) | |
{ | |
switch( tolower(*last_char) ) | |
{ | |
case 'k': | |
shift = 10; | |
break; | |
case 'm': | |
shift = 20 ; | |
break; | |
case 'g': | |
shift = 30; | |
break; | |
default: | |
fprintf(stderr, "Invalid suffix [%c] to size string [%s]. Using KB\n", *last_char, s); | |
shift = 10; | |
break; | |
} | |
} | |
size <<= shift; | |
return size; | |
} | |
int main(int argc, char **argv) | |
{ | |
int count = LOG_FILES_DEFAULT; | |
int c; | |
if ( ( prog = strrchr(argv[0], '/') ) ) | |
++prog; | |
else prog = argv[0]; | |
opterr = 0; | |
while ( ( c = getopt(argc, argv, "vs:c:h") ) != EOF ) | |
switch(c) | |
{ | |
case 's': | |
filesize = get_size(optarg); | |
if(!filesize) filesize = LOG_FILE_SIZE_DEFAULT; | |
break; | |
case 'c': | |
{ | |
char *last_char = NULL; | |
count = strtol(optarg, &last_char, 10); | |
if(*last_char) | |
{ | |
fprintf(stderr, "Invalid count specified [%s]. Should be an integer\n", optarg); | |
exit(127); | |
} | |
if(!count) count = LOG_FILES_DEFAULT; | |
} | |
break; | |
case 'v': | |
verbose = 1; | |
break; | |
case 'h': | |
case '?': | |
default: | |
{ | |
usage(); | |
} | |
break; | |
} | |
if(optind != argc) usage(); | |
signal(SIGINT, sigint_handler); | |
printf("Testing shared memory flushes with size [%lld], count [%d]\n", filesize, count); | |
printf("Press Ctrl-C to exit\n"); | |
crash_kernel(USB_DIR, count); | |
return 0; | |
} | |
/* | |
* Local variables: | |
* c-file-style: "linux" | |
* compile-command: "gcc -Wall -g -o crash_kernel crash_kernel.c -lpthread -lrt" | |
* tab-width: 4 | |
* c-basic-offset: 4 | |
* End: | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Also raised a high-priority ext4 bug for the above issue: https://bugzilla.kernel.org/show_bug.cgi?id=25792