Last active
June 30, 2022 11:04
-
-
Save fdmanana/4b24d6b30983e956bb1784a44873c5dd to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c | |
index da41a0c371bc..1e77371035ac 100644 | |
--- a/fs/btrfs/file.c | |
+++ b/fs/btrfs/file.c | |
@@ -1846,6 +1846,29 @@ static ssize_t check_direct_IO(struct btrfs_fs_info *fs_info, | |
return 0; | |
} | |
+static size_t dio_fault_in_size(const struct iov_iter *iov, size_t prev_left) | |
+{ | |
+ const size_t left = iov_iter_count(iov); | |
+ size_t size = PAGE_SIZE; | |
+ | |
+ /* | |
+ * If there's no progress since the last time we had to fault in pages, | |
+ * then we fault in at most 1 page. Faulting in more than that, may | |
+ * result in not making any progress, as by the time we retry the DIO | |
+ * operation the first remaining page may have been evicted if we are | |
+ * under significant memory pressure, which could make the DIO operation | |
+ * loop for too long, fallback to a buffered write or do a partial read | |
+ * (not incorrect, but not every application is prepared to deal with | |
+ * them and retry reading the remaining data). | |
+ */ | |
+ if (left != prev_left) { | |
+ size = max(current->nr_dirtied_pause - current->nr_dirtied, 8); | |
+ size = min_t(size_t, SZ_1M, size << PAGE_SHIFT); | |
+ } | |
+ | |
+ return min(left, size); | |
+} | |
+ | |
static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from) | |
{ | |
const bool is_sync_write = (iocb->ki_flags & IOCB_DSYNC); | |
@@ -1956,7 +1979,9 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from) | |
if (left == prev_left) { | |
err = -ENOTBLK; | |
} else { | |
- fault_in_iov_iter_readable(from, left); | |
+ const size_t size = dio_fault_in_size(from, prev_left); | |
+ | |
+ fault_in_iov_iter_readable(from, size); | |
prev_left = left; | |
goto again; | |
} | |
@@ -3724,25 +3749,18 @@ static ssize_t btrfs_direct_read(struct kiocb *iocb, struct iov_iter *to) | |
if (iov_iter_count(to) > 0 && (ret == -EFAULT || ret > 0)) { | |
const size_t left = iov_iter_count(to); | |
+ const size_t fault_in_size = dio_fault_in_size(to, prev_left); | |
- if (left == prev_left) { | |
- /* | |
- * We didn't make any progress since the last attempt, | |
- * fallback to a buffered read for the remainder of the | |
- * range. This is just to avoid any possibility of looping | |
- * for too long. | |
- */ | |
- ret = read; | |
- } else { | |
- /* | |
- * We made some progress since the last retry or this is | |
- * the first time we are retrying. Fault in as many pages | |
- * as possible and retry. | |
- */ | |
- fault_in_iov_iter_writeable(to, left); | |
+ if (iter_is_iovec(to)) { | |
+ fault_in_iov_iter_writeable(to, fault_in_size); | |
prev_left = left; | |
goto again; | |
} | |
+ /* | |
+ * fault_in_iov_iter_writeable() only works for iovecs, return | |
+ * with a partial read. | |
+ */ | |
+ ret = read; | |
} | |
btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED); | |
return ret < 0 ? ret : read; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment