Created
January 20, 2015 18:49
-
-
Save invisiblek/a224e99b093ce6a422d1 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c | |
index 3c382b5..d0b8f98 100644 | |
--- a/fs/ext4/ext4_jbd2.c | |
+++ b/fs/ext4/ext4_jbd2.c | |
@@ -113,15 +113,6 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line, | |
if (WARN_ON_ONCE(err)) { | |
ext4_journal_abort_handle(where, line, __func__, bh, | |
handle, err); | |
- ext4_error_inode(inode, where, line, | |
- bh->b_blocknr, | |
- "journal_dirty_metadata failed: " | |
- "handle type %u started at line %u, " | |
- "credits %u/%u, errcode %d", | |
- handle->h_type, | |
- handle->h_line_no, | |
- handle->h_requested_credits, | |
- handle->h_buffer_credits, err); | |
} | |
} else { | |
if (inode) | |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c | |
index 246fc24..4296a6f 100644 | |
--- a/fs/ext4/extents.c | |
+++ b/fs/ext4/extents.c | |
@@ -2111,7 +2111,7 @@ static int | |
ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, | |
struct ext4_extent *ex) | |
{ | |
- struct ext4_ext_cache cex = {0, 0, 0}; | |
+ struct ext4_ext_cache cex; | |
int ret = 0; | |
if (ext4_ext_check_cache(inode, block, &cex)) { | |
@@ -4733,7 +4733,7 @@ static int ext4_xattr_fiemap(struct inode *inode, | |
error = ext4_get_inode_loc(inode, &iloc); | |
if (error) | |
return error; | |
- physical = iloc.bh->b_blocknr << blockbits; | |
+ physical = (__u64)iloc.bh->b_blocknr << blockbits; | |
offset = EXT4_GOOD_OLD_INODE_SIZE + | |
EXT4_I(inode)->i_extra_isize; | |
physical += offset; | |
@@ -4741,7 +4741,7 @@ static int ext4_xattr_fiemap(struct inode *inode, | |
flags |= FIEMAP_EXTENT_DATA_INLINE; | |
brelse(iloc.bh); | |
} else { /* external block */ | |
- physical = EXT4_I(inode)->i_file_acl << blockbits; | |
+ physical = (__u64)EXT4_I(inode)->i_file_acl << blockbits; | |
length = inode->i_sb->s_blocksize; | |
} | |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c | |
index e2707f0..75c4f36 100644 | |
--- a/fs/ext4/ialloc.c | |
+++ b/fs/ext4/ialloc.c | |
@@ -488,10 +488,12 @@ fallback_retry: | |
for (i = 0; i < ngroups; i++) { | |
grp = (parent_group + i) % ngroups; | |
desc = ext4_get_group_desc(sb, grp, NULL); | |
- grp_free = ext4_free_inodes_count(sb, desc); | |
- if (desc && grp_free && grp_free >= avefreei) { | |
- *group = grp; | |
- return 0; | |
+ if (desc) { | |
+ grp_free = ext4_free_inodes_count(sb, desc); | |
+ if (grp_free && grp_free >= avefreei) { | |
+ *group = grp; | |
+ return 0; | |
+ } | |
} | |
} | |
@@ -685,11 +687,8 @@ repeat_in_this_group: | |
ino = ext4_find_next_zero_bit((unsigned long *) | |
inode_bitmap_bh->b_data, | |
EXT4_INODES_PER_GROUP(sb), ino); | |
- if (ino >= EXT4_INODES_PER_GROUP(sb)) { | |
- if (++group == ngroups) | |
- group = 0; | |
- continue; | |
- } | |
+ if (ino >= EXT4_INODES_PER_GROUP(sb)) | |
+ goto next_group; | |
if (group == 0 && (ino+1) < EXT4_FIRST_INO(sb)) { | |
ext4_error(sb, "reserved inode found cleared - " | |
"inode=%lu", ino + 1); | |
@@ -707,6 +706,9 @@ repeat_in_this_group: | |
goto got; /* we grabbed the inode! */ | |
if (ino < EXT4_INODES_PER_GROUP(sb)) | |
goto repeat_in_this_group; | |
+next_group: | |
+ if (++group == ngroups) | |
+ group = 0; | |
} | |
err = -ENOSPC; | |
goto out; | |
@@ -732,7 +734,6 @@ got: | |
BUFFER_TRACE(block_bitmap_bh, "dirty block bitmap"); | |
err = ext4_handle_dirty_metadata(handle, NULL, block_bitmap_bh); | |
- brelse(block_bitmap_bh); | |
/* recheck and clear flag under lock if we still need to */ | |
ext4_lock_group(sb, group); | |
@@ -744,6 +745,7 @@ got: | |
gdp); | |
} | |
ext4_unlock_group(sb, group); | |
+ brelse(block_bitmap_bh); | |
if (err) | |
goto fail; | |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c | |
index 756fab6..5b6dcba 100644 | |
--- a/fs/ext4/inode.c | |
+++ b/fs/ext4/inode.c | |
@@ -145,7 +145,8 @@ void ext4_evict_inode(struct inode *inode) | |
* don't use page cache. | |
*/ | |
if (ext4_should_journal_data(inode) && | |
- (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) { | |
+ (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) && | |
+ inode->i_ino != EXT4_JOURNAL_INO) { | |
journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; | |
tid_t commit_tid = EXT4_I(inode)->i_datasync_tid; | |
@@ -279,6 +280,15 @@ void ext4_da_update_reserve_space(struct inode *inode, | |
used = ei->i_reserved_data_blocks; | |
} | |
+ if (unlikely(ei->i_allocated_meta_blocks > ei->i_reserved_meta_blocks)) { | |
+ ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, allocated %d " | |
+ "with only %d reserved metadata blocks\n", __func__, | |
+ inode->i_ino, ei->i_allocated_meta_blocks, | |
+ ei->i_reserved_meta_blocks); | |
+ WARN_ON(1); | |
+ ei->i_allocated_meta_blocks = ei->i_reserved_meta_blocks; | |
+ } | |
+ | |
/* Update per-inode reservations */ | |
ei->i_reserved_data_blocks -= used; | |
ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks; | |
@@ -1104,6 +1114,17 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) | |
struct ext4_inode_info *ei = EXT4_I(inode); | |
unsigned int md_needed; | |
int ret; | |
+ ext4_lblk_t save_last_lblock; | |
+ int save_len; | |
+ | |
+ /* | |
+ * We will charge metadata quota at writeout time; this saves | |
+ * us from metadata over-estimation, though we may go over by | |
+ * a small amount in the end. Here we just reserve for data. | |
+ */ | |
+ ret = dquot_reserve_block(inode, EXT4_C2B(sbi, 1)); | |
+ if (ret) | |
+ return ret; | |
/* | |
* recalculate the amount of metadata blocks to reserve | |
@@ -1112,32 +1133,31 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) | |
*/ | |
repeat: | |
spin_lock(&ei->i_block_reservation_lock); | |
+ /* | |
+ * ext4_calc_metadata_amount() has side effects, which we have | |
+ * to be prepared undo if we fail to claim space. | |
+ */ | |
+ save_len = ei->i_da_metadata_calc_len; | |
+ save_last_lblock = ei->i_da_metadata_calc_last_lblock; | |
md_needed = EXT4_NUM_B2C(sbi, | |
ext4_calc_metadata_amount(inode, lblock)); | |
trace_ext4_da_reserve_space(inode, md_needed); | |
- spin_unlock(&ei->i_block_reservation_lock); | |
/* | |
- * We will charge metadata quota at writeout time; this saves | |
- * us from metadata over-estimation, though we may go over by | |
- * a small amount in the end. Here we just reserve for data. | |
- */ | |
- ret = dquot_reserve_block(inode, EXT4_C2B(sbi, 1)); | |
- if (ret) | |
- return ret; | |
- /* | |
* We do still charge estimated metadata to the sb though; | |
* we cannot afford to run out of free blocks. | |
*/ | |
if (ext4_claim_free_clusters(sbi, md_needed + 1, 0)) { | |
- dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1)); | |
+ ei->i_da_metadata_calc_len = save_len; | |
+ ei->i_da_metadata_calc_last_lblock = save_last_lblock; | |
+ spin_unlock(&ei->i_block_reservation_lock); | |
if (ext4_should_retry_alloc(inode->i_sb, &retries)) { | |
yield(); | |
goto repeat; | |
} | |
+ dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1)); | |
return -ENOSPC; | |
} | |
- spin_lock(&ei->i_block_reservation_lock); | |
ei->i_reserved_data_blocks++; | |
ei->i_reserved_meta_blocks += md_needed; | |
spin_unlock(&ei->i_block_reservation_lock); | |
@@ -1405,6 +1425,8 @@ static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd) | |
index = mpd->first_page; | |
end = mpd->next_page - 1; | |
+ | |
+ pagevec_init(&pvec, 0); | |
while (index <= end) { | |
nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); | |
if (nr_pages == 0) | |
@@ -2367,6 +2389,16 @@ static int ext4_nonda_switch(struct super_block *sb) | |
free_blocks = EXT4_C2B(sbi, | |
percpu_counter_read_positive(&sbi->s_freeclusters_counter)); | |
dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyclusters_counter); | |
+ /* | |
+ * Start pushing delalloc when 1/2 of free blocks are dirty. | |
+ */ | |
+ if (dirty_blocks && (free_blocks < 2 * dirty_blocks) && | |
+ !writeback_in_progress(sb->s_bdi) && | |
+ down_read_trylock(&sb->s_umount)) { | |
+ writeback_inodes_sb(sb, WB_REASON_FS_FREE_SPACE); | |
+ up_read(&sb->s_umount); | |
+ } | |
+ | |
if (2 * free_blocks < 3 * dirty_blocks || | |
free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) { | |
/* | |
@@ -2375,13 +2407,6 @@ static int ext4_nonda_switch(struct super_block *sb) | |
*/ | |
return 1; | |
} | |
- /* | |
- * Even if we don't switch but are nearing capacity, | |
- * start pushing delalloc when 1/2 of free blocks are dirty. | |
- */ | |
- if (free_blocks < 2 * dirty_blocks) | |
- writeback_inodes_sb_if_idle(sb, WB_REASON_FS_FREE_SPACE); | |
- | |
return 0; | |
} | |
@@ -3869,6 +3894,7 @@ static int ext4_do_update_inode(handle_t *handle, | |
struct ext4_inode_info *ei = EXT4_I(inode); | |
struct buffer_head *bh = iloc->bh; | |
int err = 0, rc, block; | |
+ int need_datasync = 0; | |
/* For fields not not tracking in the in-memory inode, | |
* initialise them to zero for new inodes. */ | |
@@ -3917,7 +3943,10 @@ static int ext4_do_update_inode(handle_t *handle, | |
raw_inode->i_file_acl_high = | |
cpu_to_le16(ei->i_file_acl >> 32); | |
raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl); | |
- ext4_isize_set(raw_inode, ei->i_disksize); | |
+ if (ei->i_disksize != ext4_isize(raw_inode)) { | |
+ ext4_isize_set(raw_inode, ei->i_disksize); | |
+ need_datasync = 1; | |
+ } | |
if (ei->i_disksize > 0x7fffffffULL) { | |
struct super_block *sb = inode->i_sb; | |
if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, | |
@@ -3968,7 +3997,7 @@ static int ext4_do_update_inode(handle_t *handle, | |
err = rc; | |
ext4_clear_inode_state(inode, EXT4_STATE_NEW); | |
- ext4_update_inode_fsync_trans(handle, inode, 0); | |
+ ext4_update_inode_fsync_trans(handle, inode, need_datasync); | |
out_brelse: | |
brelse(bh); | |
ext4_std_error(inode->i_sb, err); | |
@@ -4191,7 +4220,7 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, | |
struct kstat *stat) | |
{ | |
struct inode *inode; | |
- unsigned long delalloc_blocks; | |
+ unsigned long long delalloc_blocks; | |
inode = dentry->d_inode; | |
generic_fillattr(inode, stat); | |
@@ -4208,7 +4237,7 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, | |
*/ | |
delalloc_blocks = EXT4_I(inode)->i_reserved_data_blocks; | |
- stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9; | |
+ stat->blocks += delalloc_blocks << (inode->i_sb->s_blocksize_bits-9); | |
return 0; | |
} | |
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c | |
index 6eee255..9727522 100644 | |
--- a/fs/ext4/ioctl.c | |
+++ b/fs/ext4/ioctl.c | |
@@ -38,7 +38,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |
handle_t *handle = NULL; | |
int err, migrate = 0; | |
struct ext4_iloc iloc; | |
- unsigned int oldflags; | |
+ unsigned int oldflags, mask, i; | |
unsigned int jflag; | |
if (!inode_owner_or_capable(inode)) | |
@@ -115,9 +115,14 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |
if (err) | |
goto flags_err; | |
- flags = flags & EXT4_FL_USER_MODIFIABLE; | |
- flags |= oldflags & ~EXT4_FL_USER_MODIFIABLE; | |
- ei->i_flags = flags; | |
+ for (i = 0, mask = 1; i < 32; i++, mask <<= 1) { | |
+ if (!(mask & EXT4_FL_USER_MODIFIABLE)) | |
+ continue; | |
+ if (mask & flags) | |
+ ext4_set_inode_flag(inode, i); | |
+ else | |
+ ext4_clear_inode_flag(inode, i); | |
+ } | |
ext4_set_inode_flags(inode); | |
inode->i_ctime = ext4_current_time(inode); | |
@@ -256,7 +261,6 @@ group_extend_out: | |
err = ext4_move_extents(filp, donor_filp, me.orig_start, | |
me.donor_start, me.len, &me.moved_len); | |
mnt_drop_write_file(filp); | |
- mnt_drop_write(filp->f_path.mnt); | |
if (copy_to_user((struct move_extent __user *)arg, | |
&me, sizeof(me))) | |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c | |
index 9898edc..cdfc763 100644 | |
--- a/fs/ext4/mballoc.c | |
+++ b/fs/ext4/mballoc.c | |
@@ -1980,7 +1980,11 @@ repeat: | |
group = ac->ac_g_ex.fe_group; | |
for (i = 0; i < ngroups; group++, i++) { | |
- if (group == ngroups) | |
+ /* | |
+ * Artificially restricted ngroups for non-extent | |
+ * files makes group > ngroups possible on first loop. | |
+ */ | |
+ if (group >= ngroups) | |
group = 0; | |
/* This now checks without needing the buddy page */ | |
@@ -2517,6 +2521,9 @@ int ext4_mb_release(struct super_block *sb) | |
struct ext4_sb_info *sbi = EXT4_SB(sb); | |
struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits); | |
+ if (sbi->s_proc) | |
+ remove_proc_entry("mb_groups", sbi->s_proc); | |
+ | |
if (sbi->s_group_info) { | |
for (i = 0; i < ngroups; i++) { | |
grinfo = ext4_get_group_info(sb, i); | |
@@ -2564,8 +2571,6 @@ int ext4_mb_release(struct super_block *sb) | |
} | |
free_percpu(sbi->s_locality_groups); | |
- if (sbi->s_proc) | |
- remove_proc_entry("mb_groups", sbi->s_proc); | |
return 0; | |
} | |
@@ -4130,7 +4135,7 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac) | |
/* The max size of hash table is PREALLOC_TB_SIZE */ | |
order = PREALLOC_TB_SIZE - 1; | |
/* Add the prealloc space to lg */ | |
- rcu_read_lock(); | |
+ spin_lock(&lg->lg_prealloc_lock); | |
list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order], | |
pa_inode_list) { | |
spin_lock(&tmp_pa->pa_lock); | |
@@ -4154,12 +4159,12 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac) | |
if (!added) | |
list_add_tail_rcu(&pa->pa_inode_list, | |
&lg->lg_prealloc_list[order]); | |
- rcu_read_unlock(); | |
+ spin_unlock(&lg->lg_prealloc_lock); | |
/* Now trim the list to be not more than 8 elements */ | |
if (lg_prealloc_count > 8) { | |
ext4_mb_discard_lg_preallocations(sb, lg, | |
- order, lg_prealloc_count); | |
+ order, lg_prealloc_count); | |
return; | |
} | |
return ; | |
@@ -4639,10 +4644,16 @@ do_more: | |
* blocks being freed are metadata. these blocks shouldn't | |
* be used until this transaction is committed | |
*/ | |
+ retry: | |
new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS); | |
if (!new_entry) { | |
- err = -ENOMEM; | |
- goto error_return; | |
+ /* | |
+ * We use a retry loop because | |
+ * ext4_free_blocks() is not allowed to fail. | |
+ */ | |
+ cond_resched(); | |
+ congestion_wait(BLK_RW_ASYNC, HZ/50); | |
+ goto retry; | |
} | |
new_entry->efd_start_cluster = bit; | |
new_entry->efd_group = block_group; | |
@@ -4987,8 +4998,9 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) | |
end = start + (range->len >> sb->s_blocksize_bits) - 1; | |
minlen = range->minlen >> sb->s_blocksize_bits; | |
- if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb)) || | |
- unlikely(start >= max_blks)) | |
+ if (minlen > EXT4_CLUSTERS_PER_GROUP(sb) || | |
+ start >= max_blks || | |
+ range->len < sb->s_blocksize) | |
return -EINVAL; | |
if (end >= max_blks) | |
end = max_blks - 1; | |
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c | |
index df5cde5..e2016f3 100644 | |
--- a/fs/ext4/move_extent.c | |
+++ b/fs/ext4/move_extent.c | |
@@ -1142,7 +1142,12 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |
orig_inode->i_ino, donor_inode->i_ino); | |
return -EINVAL; | |
} | |
- | |
+ /* TODO: This is non obvious task to swap blocks for inodes with full | |
+ jornaling enabled */ | |
+ if (ext4_should_journal_data(orig_inode) || | |
+ ext4_should_journal_data(donor_inode)) { | |
+ return -EINVAL; | |
+ } | |
/* Protect orig and donor inodes against a truncate */ | |
mext_inode_double_lock(orig_inode, donor_inode); | |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c | |
index 85fb03f..665e55c 100644 | |
--- a/fs/ext4/namei.c | |
+++ b/fs/ext4/namei.c | |
@@ -585,11 +585,8 @@ static int htree_dirblock_to_tree(struct file *dir_file, | |
if (ext4_check_dir_entry(dir, NULL, de, bh, | |
(block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb)) | |
+ ((char *)de - bh->b_data))) { | |
- /* On error, skip the f_pos to the next block. */ | |
- dir_file->f_pos = (dir_file->f_pos | | |
- (dir->i_sb->s_blocksize - 1)) + 1; | |
- brelse(bh); | |
- return count; | |
+ /* silently ignore the rest of the block */ | |
+ break; | |
} | |
ext4fs_dirhash(de->name, de->name_len, hinfo); | |
if ((hinfo->hash < start_hash) || |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment