Created
January 20, 2015 18:36
-
-
Save invisiblek/3572d7a66d385faedd3a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c | |
index 3c382b5..d0b8f98 100644 | |
--- a/fs/ext4/ext4_jbd2.c | |
+++ b/fs/ext4/ext4_jbd2.c | |
@@ -113,15 +113,6 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line, | |
if (WARN_ON_ONCE(err)) { | |
ext4_journal_abort_handle(where, line, __func__, bh, | |
handle, err); | |
- ext4_error_inode(inode, where, line, | |
- bh->b_blocknr, | |
- "journal_dirty_metadata failed: " | |
- "handle type %u started at line %u, " | |
- "credits %u/%u, errcode %d", | |
- handle->h_type, | |
- handle->h_line_no, | |
- handle->h_requested_credits, | |
- handle->h_buffer_credits, err); | |
} | |
} else { | |
if (inode) | |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c | |
index 178b159..4296a6f 100644 | |
--- a/fs/ext4/extents.c | |
+++ b/fs/ext4/extents.c | |
@@ -2111,7 +2111,7 @@ static int | |
ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, | |
struct ext4_extent *ex) | |
{ | |
- struct ext4_ext_cache cex = {0, 0, 0}; | |
+ struct ext4_ext_cache cex; | |
int ret = 0; | |
if (ext4_ext_check_cache(inode, block, &cex)) { | |
@@ -2540,10 +2540,10 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, | |
{ | |
struct super_block *sb = inode->i_sb; | |
int depth = ext_depth(inode); | |
- struct ext4_ext_path *path; | |
+ struct ext4_ext_path *path = NULL; | |
ext4_fsblk_t partial_cluster = 0; | |
handle_t *handle; | |
- int i, err; | |
+ int i = 0, err; | |
ext_debug("truncate since %u to %u\n", start, end); | |
@@ -2576,8 +2576,12 @@ again: | |
} | |
depth = ext_depth(inode); | |
ex = path[depth].p_ext; | |
- if (!ex) | |
+ if (!ex) { | |
+ ext4_ext_drop_refs(path); | |
+ kfree(path); | |
+ path = NULL; | |
goto cont; | |
+ } | |
ee_block = le32_to_cpu(ex->ee_block); | |
@@ -2607,8 +2611,6 @@ again: | |
if (err < 0) | |
goto out; | |
} | |
- ext4_ext_drop_refs(path); | |
- kfree(path); | |
} | |
cont: | |
@@ -2617,19 +2619,28 @@ cont: | |
* after i_size and walking into the tree depth-wise. | |
*/ | |
depth = ext_depth(inode); | |
- path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_NOFS); | |
- if (path == NULL) { | |
- ext4_journal_stop(handle); | |
- return -ENOMEM; | |
- } | |
- path[0].p_depth = depth; | |
- path[0].p_hdr = ext_inode_hdr(inode); | |
+ if (path) { | |
+ int k = i = depth; | |
+ while (--k > 0) | |
+ path[k].p_block = | |
+ le16_to_cpu(path[k].p_hdr->eh_entries)+1; | |
+ } else { | |
+ path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), | |
+ GFP_NOFS); | |
+ if (path == NULL) { | |
+ ext4_journal_stop(handle); | |
+ return -ENOMEM; | |
+ } | |
+ path[0].p_depth = depth; | |
+ path[0].p_hdr = ext_inode_hdr(inode); | |
+ i = 0; | |
- if (ext4_ext_check(inode, path[0].p_hdr, depth)) { | |
- err = -EIO; | |
- goto out; | |
+ if (ext4_ext_check(inode, path[0].p_hdr, depth)) { | |
+ err = -EIO; | |
+ goto out; | |
+ } | |
} | |
- i = err = 0; | |
+ err = 0; | |
while (i >= 0 && err == 0) { | |
if (i == depth) { | |
@@ -2743,8 +2754,10 @@ cont: | |
out: | |
ext4_ext_drop_refs(path); | |
kfree(path); | |
- if (err == -EAGAIN) | |
+ if (err == -EAGAIN) { | |
+ path = NULL; | |
goto again; | |
+ } | |
ext4_journal_stop(handle); | |
return err; | |
@@ -2966,6 +2979,7 @@ static int ext4_split_extent(handle_t *handle, | |
int err = 0; | |
int uninitialized; | |
int split_flag1, flags1; | |
+ int allocated = map->m_len; | |
depth = ext_depth(inode); | |
ex = path[depth].p_ext; | |
@@ -2985,6 +2999,8 @@ static int ext4_split_extent(handle_t *handle, | |
map->m_lblk + map->m_len, split_flag1, flags1); | |
if (err) | |
goto out; | |
+ } else { | |
+ allocated = ee_len - (map->m_lblk - ee_block); | |
} | |
ext4_ext_drop_refs(path); | |
@@ -3007,7 +3023,7 @@ static int ext4_split_extent(handle_t *handle, | |
ext4_ext_show_leaf(inode, path); | |
out: | |
- return err ? err : map->m_len; | |
+ return err ? err : allocated; | |
} | |
#define EXT4_EXT_ZERO_LEN 7 | |
@@ -3675,6 +3691,7 @@ out: | |
allocated - map->m_len); | |
allocated = map->m_len; | |
} | |
+ map->m_len = allocated; | |
/* | |
* If we have done fallocate with the offset that is already | |
@@ -4716,7 +4733,7 @@ static int ext4_xattr_fiemap(struct inode *inode, | |
error = ext4_get_inode_loc(inode, &iloc); | |
if (error) | |
return error; | |
- physical = iloc.bh->b_blocknr << blockbits; | |
+ physical = (__u64)iloc.bh->b_blocknr << blockbits; | |
offset = EXT4_GOOD_OLD_INODE_SIZE + | |
EXT4_I(inode)->i_extra_isize; | |
physical += offset; | |
@@ -4724,7 +4741,7 @@ static int ext4_xattr_fiemap(struct inode *inode, | |
flags |= FIEMAP_EXTENT_DATA_INLINE; | |
brelse(iloc.bh); | |
} else { /* external block */ | |
- physical = EXT4_I(inode)->i_file_acl << blockbits; | |
+ physical = (__u64)EXT4_I(inode)->i_file_acl << blockbits; | |
length = inode->i_sb->s_blocksize; | |
} | |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c | |
index e2707f0..75c4f36 100644 | |
--- a/fs/ext4/ialloc.c | |
+++ b/fs/ext4/ialloc.c | |
@@ -488,10 +488,12 @@ fallback_retry: | |
for (i = 0; i < ngroups; i++) { | |
grp = (parent_group + i) % ngroups; | |
desc = ext4_get_group_desc(sb, grp, NULL); | |
- grp_free = ext4_free_inodes_count(sb, desc); | |
- if (desc && grp_free && grp_free >= avefreei) { | |
- *group = grp; | |
- return 0; | |
+ if (desc) { | |
+ grp_free = ext4_free_inodes_count(sb, desc); | |
+ if (grp_free && grp_free >= avefreei) { | |
+ *group = grp; | |
+ return 0; | |
+ } | |
} | |
} | |
@@ -685,11 +687,8 @@ repeat_in_this_group: | |
ino = ext4_find_next_zero_bit((unsigned long *) | |
inode_bitmap_bh->b_data, | |
EXT4_INODES_PER_GROUP(sb), ino); | |
- if (ino >= EXT4_INODES_PER_GROUP(sb)) { | |
- if (++group == ngroups) | |
- group = 0; | |
- continue; | |
- } | |
+ if (ino >= EXT4_INODES_PER_GROUP(sb)) | |
+ goto next_group; | |
if (group == 0 && (ino+1) < EXT4_FIRST_INO(sb)) { | |
ext4_error(sb, "reserved inode found cleared - " | |
"inode=%lu", ino + 1); | |
@@ -707,6 +706,9 @@ repeat_in_this_group: | |
goto got; /* we grabbed the inode! */ | |
if (ino < EXT4_INODES_PER_GROUP(sb)) | |
goto repeat_in_this_group; | |
+next_group: | |
+ if (++group == ngroups) | |
+ group = 0; | |
} | |
err = -ENOSPC; | |
goto out; | |
@@ -732,7 +734,6 @@ got: | |
BUFFER_TRACE(block_bitmap_bh, "dirty block bitmap"); | |
err = ext4_handle_dirty_metadata(handle, NULL, block_bitmap_bh); | |
- brelse(block_bitmap_bh); | |
/* recheck and clear flag under lock if we still need to */ | |
ext4_lock_group(sb, group); | |
@@ -744,6 +745,7 @@ got: | |
gdp); | |
} | |
ext4_unlock_group(sb, group); | |
+ brelse(block_bitmap_bh); | |
if (err) | |
goto fail; | |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c | |
index 756fab6..5b6dcba 100644 | |
--- a/fs/ext4/inode.c | |
+++ b/fs/ext4/inode.c | |
@@ -145,7 +145,8 @@ void ext4_evict_inode(struct inode *inode) | |
* don't use page cache. | |
*/ | |
if (ext4_should_journal_data(inode) && | |
- (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) { | |
+ (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) && | |
+ inode->i_ino != EXT4_JOURNAL_INO) { | |
journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; | |
tid_t commit_tid = EXT4_I(inode)->i_datasync_tid; | |
@@ -279,6 +280,15 @@ void ext4_da_update_reserve_space(struct inode *inode, | |
used = ei->i_reserved_data_blocks; | |
} | |
+ if (unlikely(ei->i_allocated_meta_blocks > ei->i_reserved_meta_blocks)) { | |
+ ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, allocated %d " | |
+ "with only %d reserved metadata blocks\n", __func__, | |
+ inode->i_ino, ei->i_allocated_meta_blocks, | |
+ ei->i_reserved_meta_blocks); | |
+ WARN_ON(1); | |
+ ei->i_allocated_meta_blocks = ei->i_reserved_meta_blocks; | |
+ } | |
+ | |
/* Update per-inode reservations */ | |
ei->i_reserved_data_blocks -= used; | |
ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks; | |
@@ -1104,6 +1114,17 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) | |
struct ext4_inode_info *ei = EXT4_I(inode); | |
unsigned int md_needed; | |
int ret; | |
+ ext4_lblk_t save_last_lblock; | |
+ int save_len; | |
+ | |
+ /* | |
+ * We will charge metadata quota at writeout time; this saves | |
+ * us from metadata over-estimation, though we may go over by | |
+ * a small amount in the end. Here we just reserve for data. | |
+ */ | |
+ ret = dquot_reserve_block(inode, EXT4_C2B(sbi, 1)); | |
+ if (ret) | |
+ return ret; | |
/* | |
* recalculate the amount of metadata blocks to reserve | |
@@ -1112,32 +1133,31 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) | |
*/ | |
repeat: | |
spin_lock(&ei->i_block_reservation_lock); | |
+ /* | |
+ * ext4_calc_metadata_amount() has side effects, which we have | |
+ * to be prepared undo if we fail to claim space. | |
+ */ | |
+ save_len = ei->i_da_metadata_calc_len; | |
+ save_last_lblock = ei->i_da_metadata_calc_last_lblock; | |
md_needed = EXT4_NUM_B2C(sbi, | |
ext4_calc_metadata_amount(inode, lblock)); | |
trace_ext4_da_reserve_space(inode, md_needed); | |
- spin_unlock(&ei->i_block_reservation_lock); | |
/* | |
- * We will charge metadata quota at writeout time; this saves | |
- * us from metadata over-estimation, though we may go over by | |
- * a small amount in the end. Here we just reserve for data. | |
- */ | |
- ret = dquot_reserve_block(inode, EXT4_C2B(sbi, 1)); | |
- if (ret) | |
- return ret; | |
- /* | |
* We do still charge estimated metadata to the sb though; | |
* we cannot afford to run out of free blocks. | |
*/ | |
if (ext4_claim_free_clusters(sbi, md_needed + 1, 0)) { | |
- dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1)); | |
+ ei->i_da_metadata_calc_len = save_len; | |
+ ei->i_da_metadata_calc_last_lblock = save_last_lblock; | |
+ spin_unlock(&ei->i_block_reservation_lock); | |
if (ext4_should_retry_alloc(inode->i_sb, &retries)) { | |
yield(); | |
goto repeat; | |
} | |
+ dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1)); | |
return -ENOSPC; | |
} | |
- spin_lock(&ei->i_block_reservation_lock); | |
ei->i_reserved_data_blocks++; | |
ei->i_reserved_meta_blocks += md_needed; | |
spin_unlock(&ei->i_block_reservation_lock); | |
@@ -1405,6 +1425,8 @@ static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd) | |
index = mpd->first_page; | |
end = mpd->next_page - 1; | |
+ | |
+ pagevec_init(&pvec, 0); | |
while (index <= end) { | |
nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); | |
if (nr_pages == 0) | |
@@ -2367,6 +2389,16 @@ static int ext4_nonda_switch(struct super_block *sb) | |
free_blocks = EXT4_C2B(sbi, | |
percpu_counter_read_positive(&sbi->s_freeclusters_counter)); | |
dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyclusters_counter); | |
+ /* | |
+ * Start pushing delalloc when 1/2 of free blocks are dirty. | |
+ */ | |
+ if (dirty_blocks && (free_blocks < 2 * dirty_blocks) && | |
+ !writeback_in_progress(sb->s_bdi) && | |
+ down_read_trylock(&sb->s_umount)) { | |
+ writeback_inodes_sb(sb, WB_REASON_FS_FREE_SPACE); | |
+ up_read(&sb->s_umount); | |
+ } | |
+ | |
if (2 * free_blocks < 3 * dirty_blocks || | |
free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) { | |
/* | |
@@ -2375,13 +2407,6 @@ static int ext4_nonda_switch(struct super_block *sb) | |
*/ | |
return 1; | |
} | |
- /* | |
- * Even if we don't switch but are nearing capacity, | |
- * start pushing delalloc when 1/2 of free blocks are dirty. | |
- */ | |
- if (free_blocks < 2 * dirty_blocks) | |
- writeback_inodes_sb_if_idle(sb, WB_REASON_FS_FREE_SPACE); | |
- | |
return 0; | |
} | |
@@ -3869,6 +3894,7 @@ static int ext4_do_update_inode(handle_t *handle, | |
struct ext4_inode_info *ei = EXT4_I(inode); | |
struct buffer_head *bh = iloc->bh; | |
int err = 0, rc, block; | |
+ int need_datasync = 0; | |
/* For fields not not tracking in the in-memory inode, | |
* initialise them to zero for new inodes. */ | |
@@ -3917,7 +3943,10 @@ static int ext4_do_update_inode(handle_t *handle, | |
raw_inode->i_file_acl_high = | |
cpu_to_le16(ei->i_file_acl >> 32); | |
raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl); | |
- ext4_isize_set(raw_inode, ei->i_disksize); | |
+ if (ei->i_disksize != ext4_isize(raw_inode)) { | |
+ ext4_isize_set(raw_inode, ei->i_disksize); | |
+ need_datasync = 1; | |
+ } | |
if (ei->i_disksize > 0x7fffffffULL) { | |
struct super_block *sb = inode->i_sb; | |
if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, | |
@@ -3968,7 +3997,7 @@ static int ext4_do_update_inode(handle_t *handle, | |
err = rc; | |
ext4_clear_inode_state(inode, EXT4_STATE_NEW); | |
- ext4_update_inode_fsync_trans(handle, inode, 0); | |
+ ext4_update_inode_fsync_trans(handle, inode, need_datasync); | |
out_brelse: | |
brelse(bh); | |
ext4_std_error(inode->i_sb, err); | |
@@ -4191,7 +4220,7 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, | |
struct kstat *stat) | |
{ | |
struct inode *inode; | |
- unsigned long delalloc_blocks; | |
+ unsigned long long delalloc_blocks; | |
inode = dentry->d_inode; | |
generic_fillattr(inode, stat); | |
@@ -4208,7 +4237,7 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, | |
*/ | |
delalloc_blocks = EXT4_I(inode)->i_reserved_data_blocks; | |
- stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9; | |
+ stat->blocks += delalloc_blocks << (inode->i_sb->s_blocksize_bits-9); | |
return 0; | |
} | |
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c | |
index 6eee255..9727522 100644 | |
--- a/fs/ext4/ioctl.c | |
+++ b/fs/ext4/ioctl.c | |
@@ -38,7 +38,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |
handle_t *handle = NULL; | |
int err, migrate = 0; | |
struct ext4_iloc iloc; | |
- unsigned int oldflags; | |
+ unsigned int oldflags, mask, i; | |
unsigned int jflag; | |
if (!inode_owner_or_capable(inode)) | |
@@ -115,9 +115,14 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |
if (err) | |
goto flags_err; | |
- flags = flags & EXT4_FL_USER_MODIFIABLE; | |
- flags |= oldflags & ~EXT4_FL_USER_MODIFIABLE; | |
- ei->i_flags = flags; | |
+ for (i = 0, mask = 1; i < 32; i++, mask <<= 1) { | |
+ if (!(mask & EXT4_FL_USER_MODIFIABLE)) | |
+ continue; | |
+ if (mask & flags) | |
+ ext4_set_inode_flag(inode, i); | |
+ else | |
+ ext4_clear_inode_flag(inode, i); | |
+ } | |
ext4_set_inode_flags(inode); | |
inode->i_ctime = ext4_current_time(inode); | |
@@ -256,7 +261,6 @@ group_extend_out: | |
err = ext4_move_extents(filp, donor_filp, me.orig_start, | |
me.donor_start, me.len, &me.moved_len); | |
mnt_drop_write_file(filp); | |
- mnt_drop_write(filp->f_path.mnt); | |
if (copy_to_user((struct move_extent __user *)arg, | |
&me, sizeof(me))) | |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c | |
index 9898edc..cdfc763 100644 | |
--- a/fs/ext4/mballoc.c | |
+++ b/fs/ext4/mballoc.c | |
@@ -1980,7 +1980,11 @@ repeat: | |
group = ac->ac_g_ex.fe_group; | |
for (i = 0; i < ngroups; group++, i++) { | |
- if (group == ngroups) | |
+ /* | |
+ * Artificially restricted ngroups for non-extent | |
+ * files makes group > ngroups possible on first loop. | |
+ */ | |
+ if (group >= ngroups) | |
group = 0; | |
/* This now checks without needing the buddy page */ | |
@@ -2517,6 +2521,9 @@ int ext4_mb_release(struct super_block *sb) | |
struct ext4_sb_info *sbi = EXT4_SB(sb); | |
struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits); | |
+ if (sbi->s_proc) | |
+ remove_proc_entry("mb_groups", sbi->s_proc); | |
+ | |
if (sbi->s_group_info) { | |
for (i = 0; i < ngroups; i++) { | |
grinfo = ext4_get_group_info(sb, i); | |
@@ -2564,8 +2571,6 @@ int ext4_mb_release(struct super_block *sb) | |
} | |
free_percpu(sbi->s_locality_groups); | |
- if (sbi->s_proc) | |
- remove_proc_entry("mb_groups", sbi->s_proc); | |
return 0; | |
} | |
@@ -4130,7 +4135,7 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac) | |
/* The max size of hash table is PREALLOC_TB_SIZE */ | |
order = PREALLOC_TB_SIZE - 1; | |
/* Add the prealloc space to lg */ | |
- rcu_read_lock(); | |
+ spin_lock(&lg->lg_prealloc_lock); | |
list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order], | |
pa_inode_list) { | |
spin_lock(&tmp_pa->pa_lock); | |
@@ -4154,12 +4159,12 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac) | |
if (!added) | |
list_add_tail_rcu(&pa->pa_inode_list, | |
&lg->lg_prealloc_list[order]); | |
- rcu_read_unlock(); | |
+ spin_unlock(&lg->lg_prealloc_lock); | |
/* Now trim the list to be not more than 8 elements */ | |
if (lg_prealloc_count > 8) { | |
ext4_mb_discard_lg_preallocations(sb, lg, | |
- order, lg_prealloc_count); | |
+ order, lg_prealloc_count); | |
return; | |
} | |
return ; | |
@@ -4639,10 +4644,16 @@ do_more: | |
* blocks being freed are metadata. these blocks shouldn't | |
* be used until this transaction is committed | |
*/ | |
+ retry: | |
new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS); | |
if (!new_entry) { | |
- err = -ENOMEM; | |
- goto error_return; | |
+ /* | |
+ * We use a retry loop because | |
+ * ext4_free_blocks() is not allowed to fail. | |
+ */ | |
+ cond_resched(); | |
+ congestion_wait(BLK_RW_ASYNC, HZ/50); | |
+ goto retry; | |
} | |
new_entry->efd_start_cluster = bit; | |
new_entry->efd_group = block_group; | |
@@ -4987,8 +4998,9 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) | |
end = start + (range->len >> sb->s_blocksize_bits) - 1; | |
minlen = range->minlen >> sb->s_blocksize_bits; | |
- if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb)) || | |
- unlikely(start >= max_blks)) | |
+ if (minlen > EXT4_CLUSTERS_PER_GROUP(sb) || | |
+ start >= max_blks || | |
+ range->len < sb->s_blocksize) | |
return -EINVAL; | |
if (end >= max_blks) | |
end = max_blks - 1; | |
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c | |
index c5826c6..e2016f3 100644 | |
--- a/fs/ext4/move_extent.c | |
+++ b/fs/ext4/move_extent.c | |
@@ -141,55 +141,21 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path, | |
} | |
/** | |
- * mext_check_null_inode - NULL check for two inodes | |
- * | |
- * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0. | |
- */ | |
-static int | |
-mext_check_null_inode(struct inode *inode1, struct inode *inode2, | |
- const char *function, unsigned int line) | |
-{ | |
- int ret = 0; | |
- | |
- if (inode1 == NULL) { | |
- __ext4_error(inode2->i_sb, function, line, | |
- "Both inodes should not be NULL: " | |
- "inode1 NULL inode2 %lu", inode2->i_ino); | |
- ret = -EIO; | |
- } else if (inode2 == NULL) { | |
- __ext4_error(inode1->i_sb, function, line, | |
- "Both inodes should not be NULL: " | |
- "inode1 %lu inode2 NULL", inode1->i_ino); | |
- ret = -EIO; | |
- } | |
- return ret; | |
-} | |
- | |
-/** | |
* double_down_write_data_sem - Acquire two inodes' write lock of i_data_sem | |
* | |
- * @orig_inode: original inode structure | |
- * @donor_inode: donor inode structure | |
- * Acquire write lock of i_data_sem of the two inodes (orig and donor) by | |
- * i_ino order. | |
+ * Acquire write lock of i_data_sem of the two inodes | |
*/ | |
static void | |
-double_down_write_data_sem(struct inode *orig_inode, struct inode *donor_inode) | |
+double_down_write_data_sem(struct inode *first, struct inode *second) | |
{ | |
- struct inode *first = orig_inode, *second = donor_inode; | |
+ if (first < second) { | |
+ down_write(&EXT4_I(first)->i_data_sem); | |
+ down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING); | |
+ } else { | |
+ down_write(&EXT4_I(second)->i_data_sem); | |
+ down_write_nested(&EXT4_I(first)->i_data_sem, SINGLE_DEPTH_NESTING); | |
- /* | |
- * Use the inode number to provide the stable locking order instead | |
- * of its address, because the C language doesn't guarantee you can | |
- * compare pointers that don't come from the same array. | |
- */ | |
- if (donor_inode->i_ino < orig_inode->i_ino) { | |
- first = donor_inode; | |
- second = orig_inode; | |
} | |
- | |
- down_write(&EXT4_I(first)->i_data_sem); | |
- down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING); | |
} | |
/** | |
@@ -969,14 +935,6 @@ mext_check_arguments(struct inode *orig_inode, | |
return -EINVAL; | |
} | |
- /* Files should be in the same ext4 FS */ | |
- if (orig_inode->i_sb != donor_inode->i_sb) { | |
- ext4_debug("ext4 move extent: The argument files " | |
- "should be in same FS [ino:orig %lu, donor %lu]\n", | |
- orig_inode->i_ino, donor_inode->i_ino); | |
- return -EINVAL; | |
- } | |
- | |
/* Ext4 move extent supports only extent based file */ | |
if (!(ext4_test_inode_flag(orig_inode, EXT4_INODE_EXTENTS))) { | |
ext4_debug("ext4 move extent: orig file is not extents " | |
@@ -1072,35 +1030,19 @@ mext_check_arguments(struct inode *orig_inode, | |
* @inode1: the inode structure | |
* @inode2: the inode structure | |
* | |
- * Lock two inodes' i_mutex by i_ino order. | |
- * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0. | |
+ * Lock two inodes' i_mutex | |
*/ | |
-static int | |
+static void | |
mext_inode_double_lock(struct inode *inode1, struct inode *inode2) | |
{ | |
- int ret = 0; | |
- | |
- BUG_ON(inode1 == NULL && inode2 == NULL); | |
- | |
- ret = mext_check_null_inode(inode1, inode2, __func__, __LINE__); | |
- if (ret < 0) | |
- goto out; | |
- | |
- if (inode1 == inode2) { | |
- mutex_lock(&inode1->i_mutex); | |
- goto out; | |
- } | |
- | |
- if (inode1->i_ino < inode2->i_ino) { | |
+ BUG_ON(inode1 == inode2); | |
+ if (inode1 < inode2) { | |
mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); | |
mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); | |
} else { | |
mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT); | |
mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD); | |
} | |
- | |
-out: | |
- return ret; | |
} | |
/** | |
@@ -1109,28 +1051,13 @@ out: | |
* @inode1: the inode that is released first | |
* @inode2: the inode that is released second | |
* | |
- * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0. | |
*/ | |
-static int | |
+static void | |
mext_inode_double_unlock(struct inode *inode1, struct inode *inode2) | |
{ | |
- int ret = 0; | |
- | |
- BUG_ON(inode1 == NULL && inode2 == NULL); | |
- | |
- ret = mext_check_null_inode(inode1, inode2, __func__, __LINE__); | |
- if (ret < 0) | |
- goto out; | |
- | |
- if (inode1) | |
- mutex_unlock(&inode1->i_mutex); | |
- | |
- if (inode2 && inode2 != inode1) | |
- mutex_unlock(&inode2->i_mutex); | |
- | |
-out: | |
- return ret; | |
+ mutex_unlock(&inode1->i_mutex); | |
+ mutex_unlock(&inode2->i_mutex); | |
} | |
/** | |
@@ -1187,16 +1114,23 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |
ext4_lblk_t block_end, seq_start, add_blocks, file_end, seq_blocks = 0; | |
ext4_lblk_t rest_blocks; | |
pgoff_t orig_page_offset = 0, seq_end_page; | |
- int ret1, ret2, depth, last_extent = 0; | |
+ int ret, depth, last_extent = 0; | |
int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; | |
int data_offset_in_page; | |
int block_len_in_page; | |
int uninit; | |
- /* orig and donor should be different file */ | |
- if (orig_inode->i_ino == donor_inode->i_ino) { | |
+ if (orig_inode->i_sb != donor_inode->i_sb) { | |
+ ext4_debug("ext4 move extent: The argument files " | |
+ "should be in same FS [ino:orig %lu, donor %lu]\n", | |
+ orig_inode->i_ino, donor_inode->i_ino); | |
+ return -EINVAL; | |
+ } | |
+ | |
+ /* orig and donor should be different inodes */ | |
+ if (orig_inode == donor_inode) { | |
ext4_debug("ext4 move extent: The argument files should not " | |
- "be same file [ino:orig %lu, donor %lu]\n", | |
+ "be same inode [ino:orig %lu, donor %lu]\n", | |
orig_inode->i_ino, donor_inode->i_ino); | |
return -EINVAL; | |
} | |
@@ -1208,18 +1142,21 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |
orig_inode->i_ino, donor_inode->i_ino); | |
return -EINVAL; | |
} | |
- | |
+ /* TODO: This is non obvious task to swap blocks for inodes with full | |
+ jornaling enabled */ | |
+ if (ext4_should_journal_data(orig_inode) || | |
+ ext4_should_journal_data(donor_inode)) { | |
+ return -EINVAL; | |
+ } | |
/* Protect orig and donor inodes against a truncate */ | |
- ret1 = mext_inode_double_lock(orig_inode, donor_inode); | |
- if (ret1 < 0) | |
- return ret1; | |
+ mext_inode_double_lock(orig_inode, donor_inode); | |
/* Protect extent tree against block allocations via delalloc */ | |
double_down_write_data_sem(orig_inode, donor_inode); | |
/* Check the filesystem environment whether move_extent can be done */ | |
- ret1 = mext_check_arguments(orig_inode, donor_inode, orig_start, | |
+ ret = mext_check_arguments(orig_inode, donor_inode, orig_start, | |
donor_start, &len); | |
- if (ret1) | |
+ if (ret) | |
goto out; | |
file_end = (i_size_read(orig_inode) - 1) >> orig_inode->i_blkbits; | |
@@ -1227,13 +1164,13 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |
if (file_end < block_end) | |
len -= block_end - file_end; | |
- ret1 = get_ext_path(orig_inode, block_start, &orig_path); | |
- if (ret1) | |
+ ret = get_ext_path(orig_inode, block_start, &orig_path); | |
+ if (ret) | |
goto out; | |
/* Get path structure to check the hole */ | |
- ret1 = get_ext_path(orig_inode, block_start, &holecheck_path); | |
- if (ret1) | |
+ ret = get_ext_path(orig_inode, block_start, &holecheck_path); | |
+ if (ret) | |
goto out; | |
depth = ext_depth(orig_inode); | |
@@ -1252,13 +1189,13 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |
last_extent = mext_next_extent(orig_inode, | |
holecheck_path, &ext_cur); | |
if (last_extent < 0) { | |
- ret1 = last_extent; | |
+ ret = last_extent; | |
goto out; | |
} | |
last_extent = mext_next_extent(orig_inode, orig_path, | |
&ext_dummy); | |
if (last_extent < 0) { | |
- ret1 = last_extent; | |
+ ret = last_extent; | |
goto out; | |
} | |
seq_start = le32_to_cpu(ext_cur->ee_block); | |
@@ -1272,7 +1209,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |
if (le32_to_cpu(ext_cur->ee_block) > block_end) { | |
ext4_debug("ext4 move extent: The specified range of file " | |
"may be the hole\n"); | |
- ret1 = -EINVAL; | |
+ ret = -EINVAL; | |
goto out; | |
} | |
@@ -1292,7 +1229,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |
last_extent = mext_next_extent(orig_inode, holecheck_path, | |
&ext_cur); | |
if (last_extent < 0) { | |
- ret1 = last_extent; | |
+ ret = last_extent; | |
break; | |
} | |
add_blocks = ext4_ext_get_actual_len(ext_cur); | |
@@ -1349,18 +1286,18 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |
orig_page_offset, | |
data_offset_in_page, | |
block_len_in_page, uninit, | |
- &ret1); | |
+ &ret); | |
/* Count how many blocks we have exchanged */ | |
*moved_len += block_len_in_page; | |
- if (ret1 < 0) | |
+ if (ret < 0) | |
break; | |
if (*moved_len > len) { | |
EXT4_ERROR_INODE(orig_inode, | |
"We replaced blocks too much! " | |
"sum of replaced: %llu requested: %llu", | |
*moved_len, len); | |
- ret1 = -EIO; | |
+ ret = -EIO; | |
break; | |
} | |
@@ -1374,22 +1311,22 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |
} | |
double_down_write_data_sem(orig_inode, donor_inode); | |
- if (ret1 < 0) | |
+ if (ret < 0) | |
break; | |
/* Decrease buffer counter */ | |
if (holecheck_path) | |
ext4_ext_drop_refs(holecheck_path); | |
- ret1 = get_ext_path(orig_inode, seq_start, &holecheck_path); | |
- if (ret1) | |
+ ret = get_ext_path(orig_inode, seq_start, &holecheck_path); | |
+ if (ret) | |
break; | |
depth = holecheck_path->p_depth; | |
/* Decrease buffer counter */ | |
if (orig_path) | |
ext4_ext_drop_refs(orig_path); | |
- ret1 = get_ext_path(orig_inode, seq_start, &orig_path); | |
- if (ret1) | |
+ ret = get_ext_path(orig_inode, seq_start, &orig_path); | |
+ if (ret) | |
break; | |
ext_cur = holecheck_path[depth].p_ext; | |
@@ -1412,12 +1349,7 @@ out: | |
kfree(holecheck_path); | |
} | |
double_up_write_data_sem(orig_inode, donor_inode); | |
- ret2 = mext_inode_double_unlock(orig_inode, donor_inode); | |
- | |
- if (ret1) | |
- return ret1; | |
- else if (ret2) | |
- return ret2; | |
+ mext_inode_double_unlock(orig_inode, donor_inode); | |
- return 0; | |
+ return ret; | |
} | |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c | |
index 6832a51..665e55c 100644 | |
--- a/fs/ext4/namei.c | |
+++ b/fs/ext4/namei.c | |
@@ -585,11 +585,8 @@ static int htree_dirblock_to_tree(struct file *dir_file, | |
if (ext4_check_dir_entry(dir, NULL, de, bh, | |
(block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb)) | |
+ ((char *)de - bh->b_data))) { | |
- /* On error, skip the f_pos to the next block. */ | |
- dir_file->f_pos = (dir_file->f_pos | | |
- (dir->i_sb->s_blocksize - 1)) + 1; | |
- brelse(bh); | |
- return count; | |
+ /* silently ignore the rest of the block */ | |
+ break; | |
} | |
ext4fs_dirhash(de->name, de->name_len, hinfo); | |
if ((hinfo->hash < start_hash) || | |
@@ -1048,6 +1045,12 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru | |
EXT4_ERROR_INODE(dir, "bad inode number: %u", ino); | |
return ERR_PTR(-EIO); | |
} | |
+ if (unlikely(ino == dir->i_ino)) { | |
+ EXT4_ERROR_INODE(dir, "'%.*s' linked to parent dir", | |
+ dentry->d_name.len, | |
+ dentry->d_name.name); | |
+ return ERR_PTR(-EIO); | |
+ } | |
inode = ext4_iget(dir->i_sb, ino); | |
if (inode == ERR_PTR(-ESTALE)) { | |
EXT4_ERROR_INODE(dir, | |
@@ -1808,9 +1811,7 @@ retry: | |
err = PTR_ERR(inode); | |
if (!IS_ERR(inode)) { | |
init_special_inode(inode, inode->i_mode, rdev); | |
-#ifdef CONFIG_EXT4_FS_XATTR | |
inode->i_op = &ext4_special_inode_operations; | |
-#endif | |
err = ext4_add_nondir(handle, dentry, inode); | |
} | |
ext4_journal_stop(handle); | |
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c | |
index 8a67054..a43e43c 100644 | |
--- a/fs/ext4/resize.c | |
+++ b/fs/ext4/resize.c | |
@@ -161,6 +161,8 @@ static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned long flexbg_size) | |
if (flex_gd == NULL) | |
goto out3; | |
+ if (flexbg_size >= UINT_MAX / sizeof(struct ext4_new_flex_group_data)) | |
+ goto out2; | |
flex_gd->count = flexbg_size; | |
flex_gd->groups = kmalloc(sizeof(struct ext4_new_group_data) * | |
@@ -454,6 +456,9 @@ static int setup_new_flex_group_blocks(struct super_block *sb, | |
gdblocks = ext4_bg_num_gdb(sb, group); | |
start = ext4_group_first_block_no(sb, group); | |
+ if (!ext4_bg_has_super(sb, group)) | |
+ goto handle_itb; | |
+ | |
/* Copy all of the GDT blocks into the backup in this group */ | |
for (j = 0, block = start + 1; j < gdblocks; j++, block++) { | |
struct buffer_head *gdb; | |
@@ -496,6 +501,7 @@ static int setup_new_flex_group_blocks(struct super_block *sb, | |
goto out; | |
} | |
+handle_itb: | |
/* Initialize group tables of the grop @group */ | |
if (!(bg_flags[i] & EXT4_BG_INODE_ZEROED)) | |
goto handle_bb; | |
@@ -1198,6 +1204,8 @@ static void ext4_update_super(struct super_block *sb, | |
/* Update the global fs size fields */ | |
sbi->s_groups_count += flex_gd->count; | |
+ sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count, | |
+ (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb))); | |
/* Update the reserved block counts only once the new group is | |
* active. */ | |
@@ -1298,13 +1306,15 @@ exit_journal: | |
err = err2; | |
if (!err) { | |
- int i; | |
+ int gdb_num = group / EXT4_DESC_PER_BLOCK(sb); | |
+ int gdb_num_end = ((group + flex_gd->count - 1) / | |
+ EXT4_DESC_PER_BLOCK(sb)); | |
+ | |
update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es, | |
sizeof(struct ext4_super_block)); | |
- for (i = 0; i < flex_gd->count; i++, group++) { | |
+ for (; gdb_num <= gdb_num_end; gdb_num++) { | |
struct buffer_head *gdb_bh; | |
- int gdb_num; | |
- gdb_num = group / EXT4_BLOCKS_PER_GROUP(sb); | |
+ | |
gdb_bh = sbi->s_group_desc[gdb_num]; | |
update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data, | |
gdb_bh->b_size); | |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c | |
index e90eeda..f0e4e46 100644 | |
--- a/fs/ext4/super.c | |
+++ b/fs/ext4/super.c | |
@@ -658,14 +658,6 @@ void __ext4_abort(struct super_block *sb, const char *function, | |
if (EXT4_SB(sb)->s_journal) | |
jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); | |
save_error_info(sb, function, line); | |
- #ifdef CONFIG_MACH_LGE | |
- /* LGE_CHANGE | |
- * put panic when ext4 partition is remounted as Read Only | |
- * 2014-04-15, [email protected] | |
- */ | |
- panic("EXT4-fs panic from previous error. remounted as RO \n"); | |
- #endif | |
- | |
} | |
if (test_opt(sb, ERRORS_PANIC)) | |
panic("EXT4-fs panic from previous error\n"); | |
@@ -1014,11 +1006,6 @@ static int init_inodecache(void) | |
static void destroy_inodecache(void) | |
{ | |
- /* | |
- * Make sure all delayed rcu free inodes are flushed before we | |
- * destroy cache. | |
- */ | |
- rcu_barrier(); | |
kmem_cache_destroy(ext4_inode_cachep); | |
} | |
@@ -2136,7 +2123,9 @@ static void ext4_orphan_cleanup(struct super_block *sb, | |
__func__, inode->i_ino, inode->i_size); | |
jbd_debug(2, "truncating inode %lu to %lld bytes\n", | |
inode->i_ino, inode->i_size); | |
+ mutex_lock(&inode->i_mutex); | |
ext4_truncate(inode); | |
+ mutex_unlock(&inode->i_mutex); | |
nr_truncates++; | |
} else { | |
ext4_msg(sb, KERN_DEBUG, | |
@@ -3825,55 +3814,27 @@ no_journal: | |
cantfind_ext4: | |
if (!silent) | |
ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem"); | |
-#ifdef CONFIG_MACH_LGE | |
-/* LGE_CHANGE | |
- * add return code if ext4 superblock is damaged | |
- * 2014-01-16, [email protected] | |
- */ | |
- ret = -ESUPER; | |
-#endif | |
goto failed_mount; | |
failed_mount7: | |
-#ifdef CONFIG_MACH_LGE | |
- printk(KERN_ERR "EXT4-fs: failed_mount7\n"); | |
-#endif | |
ext4_unregister_li_request(sb); | |
failed_mount6: | |
-#ifdef CONFIG_MACH_LGE | |
- printk(KERN_ERR "EXT4-fs: failed_mount6\n"); | |
-#endif | |
ext4_mb_release(sb); | |
failed_mount5: | |
-#ifdef CONFIG_MACH_LGE | |
- printk(KERN_ERR "EXT4-fs: failed_mount5\n"); | |
-#endif | |
ext4_ext_release(sb); | |
ext4_release_system_zone(sb); | |
failed_mount4a: | |
-#ifdef CONFIG_MACH_LGE | |
- printk(KERN_ERR "EXT4-fs: failed_mount4a\n"); | |
-#endif | |
dput(sb->s_root); | |
sb->s_root = NULL; | |
failed_mount4: | |
-#ifdef CONFIG_MACH_LGE | |
- printk(KERN_ERR "EXT4-fs: failed_mount4\n"); | |
-#endif | |
ext4_msg(sb, KERN_ERR, "mount failed"); | |
destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq); | |
failed_mount_wq: | |
-#ifdef CONFIG_MACH_LGE | |
- printk(KERN_ERR "EXT4-fs: failed_mount_wq\n"); | |
-#endif | |
if (sbi->s_journal) { | |
jbd2_journal_destroy(sbi->s_journal); | |
sbi->s_journal = NULL; | |
} | |
failed_mount3: | |
-#ifdef CONFIG_MACH_LGE | |
- printk(KERN_ERR "EXT4-fs: failed_mount3\n"); | |
-#endif | |
del_timer(&sbi->s_err_report); | |
if (sbi->s_flex_groups) | |
ext4_kvfree(sbi->s_flex_groups); | |
@@ -3884,17 +3845,10 @@ failed_mount3: | |
if (sbi->s_mmp_tsk) | |
kthread_stop(sbi->s_mmp_tsk); | |
failed_mount2: | |
-#ifdef CONFIG_MACH_LGE | |
- printk(KERN_ERR "EXT4-fs: failed_mount2\n"); | |
- ret = -ESUPER; | |
-#endif | |
for (i = 0; i < db_count; i++) | |
brelse(sbi->s_group_desc[i]); | |
ext4_kvfree(sbi->s_group_desc); | |
failed_mount: | |
-#ifdef CONFIG_MACH_LGE | |
- printk(KERN_ERR "EXT4-fs: failed_mount\n"); | |
-#endif | |
if (sbi->s_proc) { | |
remove_proc_entry("options", sbi->s_proc); | |
remove_proc_entry(sb->s_id, ext4_proc_root); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment