invisiblek · January 20, 2015 18:49
diff --git a/foo.diff b/foo.diff
 diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
 index 3c382b5..d0b8f98 100644
 --- a/fs/ext4/ext4_jbd2.c
 +++ b/fs/ext4/ext4_jbd2.c
 @@ -113,15 +113,6 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
 		if (WARN_ON_ONCE(err)) {
 			ext4_journal_abort_handle(where, line, __func__, bh,
 						  handle, err);
 -			ext4_error_inode(inode, where, line,
 -					 bh->b_blocknr,
 -					 "journal_dirty_metadata failed: "
 -					 "handle type %u started at line %u, "
 -					 "credits %u/%u, errcode %d",
 -					 handle->h_type,
 -					 handle->h_line_no,
 -					 handle->h_requested_credits,
 -					 handle->h_buffer_credits, err);
 		}
 	} else {
 		if (inode)
 diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
 index 246fc24..4296a6f 100644
 --- a/fs/ext4/extents.c
 +++ b/fs/ext4/extents.c
 @@ -2111,7 +2111,7 @@ static int
 ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
 			struct ext4_extent *ex)
 {
 -	struct ext4_ext_cache cex = {0, 0, 0};
 +	struct ext4_ext_cache cex;
 	int ret = 0;
 
 	if (ext4_ext_check_cache(inode, block, &cex)) {
 @@ -4733,7 +4733,7 @@ static int ext4_xattr_fiemap(struct inode *inode,
 		error = ext4_get_inode_loc(inode, &iloc);
 		if (error)
 			return error;
 -		physical = iloc.bh->b_blocknr << blockbits;
 +		physical = (__u64)iloc.bh->b_blocknr << blockbits;
 		offset = EXT4_GOOD_OLD_INODE_SIZE +
 				EXT4_I(inode)->i_extra_isize;
 		physical += offset;
 @@ -4741,7 +4741,7 @@ static int ext4_xattr_fiemap(struct inode *inode,
 		flags |= FIEMAP_EXTENT_DATA_INLINE;
 		brelse(iloc.bh);
 	} else { /* external block */
 -		physical = EXT4_I(inode)->i_file_acl << blockbits;
 +		physical = (__u64)EXT4_I(inode)->i_file_acl << blockbits;
 		length = inode->i_sb->s_blocksize;
 	}
 
 diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
 index e2707f0..75c4f36 100644
 --- a/fs/ext4/ialloc.c
 +++ b/fs/ext4/ialloc.c
 @@ -488,10 +488,12 @@ fallback_retry:
 	for (i = 0; i < ngroups; i++) {
 		grp = (parent_group + i) % ngroups;
 		desc = ext4_get_group_desc(sb, grp, NULL);
 -		grp_free = ext4_free_inodes_count(sb, desc);
 -		if (desc && grp_free && grp_free >= avefreei) {
 -			*group = grp;
 -			return 0;
 +		if (desc) {
 +			grp_free = ext4_free_inodes_count(sb, desc);
 +			if (grp_free && grp_free >= avefreei) {
 +				*group = grp;
 +				return 0;
 +			}
 		}
 	}
 
 @@ -685,11 +687,8 @@ repeat_in_this_group:
 		ino = ext4_find_next_zero_bit((unsigned long *)
 					      inode_bitmap_bh->b_data,
 					      EXT4_INODES_PER_GROUP(sb), ino);
 -		if (ino >= EXT4_INODES_PER_GROUP(sb)) {
 -			if (++group == ngroups)
 -				group = 0;
 -			continue;
 -		}
 +		if (ino >= EXT4_INODES_PER_GROUP(sb))
 +			goto next_group;
 		if (group == 0 && (ino+1) < EXT4_FIRST_INO(sb)) {
 			ext4_error(sb, "reserved inode found cleared - "
 				   "inode=%lu", ino + 1);
 @@ -707,6 +706,9 @@ repeat_in_this_group:
 			goto got; /* we grabbed the inode! */
 		if (ino < EXT4_INODES_PER_GROUP(sb))
 			goto repeat_in_this_group;
 +next_group:
 +		if (++group == ngroups)
 +			group = 0;
 	}
 	err = -ENOSPC;
 	goto out;
 @@ -732,7 +734,6 @@ got:
 
 		BUFFER_TRACE(block_bitmap_bh, "dirty block bitmap");
 		err = ext4_handle_dirty_metadata(handle, NULL, block_bitmap_bh);
 -		brelse(block_bitmap_bh);
 
 		/* recheck and clear flag under lock if we still need to */
 		ext4_lock_group(sb, group);
 @@ -744,6 +745,7 @@ got:
 								gdp);
 		}
 		ext4_unlock_group(sb, group);
 +		brelse(block_bitmap_bh);
 
 		if (err)
 			goto fail;
 diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
 index 756fab6..5b6dcba 100644
 --- a/fs/ext4/inode.c
 +++ b/fs/ext4/inode.c
 @@ -145,7 +145,8 @@ void ext4_evict_inode(struct inode *inode)
 		 * don't use page cache.
 		 */
 		if (ext4_should_journal_data(inode) &&
 -		    (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) {
 +		    (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) &&
 +		    inode->i_ino != EXT4_JOURNAL_INO) {
 			journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
 			tid_t commit_tid = EXT4_I(inode)->i_datasync_tid;
 
 @@ -279,6 +280,15 @@ void ext4_da_update_reserve_space(struct inode *inode,
 		used = ei->i_reserved_data_blocks;
 	}
 
 +	if (unlikely(ei->i_allocated_meta_blocks > ei->i_reserved_meta_blocks)) {
 +		ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, allocated %d "
 +			 "with only %d reserved metadata blocks\n", __func__,
 +			 inode->i_ino, ei->i_allocated_meta_blocks,
 +			 ei->i_reserved_meta_blocks);
 +		WARN_ON(1);
 +		ei->i_allocated_meta_blocks = ei->i_reserved_meta_blocks;
 +	}
 +
 	/* Update per-inode reservations */
 	ei->i_reserved_data_blocks -= used;
 	ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks;
 @@ -1104,6 +1114,17 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
 	struct ext4_inode_info *ei = EXT4_I(inode);
 	unsigned int md_needed;
 	int ret;
 +	ext4_lblk_t save_last_lblock;
 +	int save_len;
 +
 +	/*
 +	 * We will charge metadata quota at writeout time; this saves
 +	 * us from metadata over-estimation, though we may go over by
 +	 * a small amount in the end.  Here we just reserve for data.
 +	 */
 +	ret = dquot_reserve_block(inode, EXT4_C2B(sbi, 1));
 +	if (ret)
 +		return ret;
 
 	/*
 	 * recalculate the amount of metadata blocks to reserve
 @@ -1112,32 +1133,31 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
 	 */
 repeat:
 	spin_lock(&ei->i_block_reservation_lock);
 +	/*
 +	 * ext4_calc_metadata_amount() has side effects, which we have
 +	 * to be prepared undo if we fail to claim space.
 +	 */
 +	save_len = ei->i_da_metadata_calc_len;
 +	save_last_lblock = ei->i_da_metadata_calc_last_lblock;
 	md_needed = EXT4_NUM_B2C(sbi,
 				 ext4_calc_metadata_amount(inode, lblock));
 	trace_ext4_da_reserve_space(inode, md_needed);
 -	spin_unlock(&ei->i_block_reservation_lock);
 
 	/*
 -	 * We will charge metadata quota at writeout time; this saves
 -	 * us from metadata over-estimation, though we may go over by
 -	 * a small amount in the end.  Here we just reserve for data.
 -	 */
 -	ret = dquot_reserve_block(inode, EXT4_C2B(sbi, 1));
 -	if (ret)
 -		return ret;
 -	/*
 	 * We do still charge estimated metadata to the sb though;
 	 * we cannot afford to run out of free blocks.
 	 */
 	if (ext4_claim_free_clusters(sbi, md_needed + 1, 0)) {
 -		dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
 +		ei->i_da_metadata_calc_len = save_len;
 +		ei->i_da_metadata_calc_last_lblock = save_last_lblock;
 +		spin_unlock(&ei->i_block_reservation_lock);
 		if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
 			yield();
 			goto repeat;
 		}
 +		dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
 		return -ENOSPC;
 	}
 -	spin_lock(&ei->i_block_reservation_lock);
 	ei->i_reserved_data_blocks++;
 	ei->i_reserved_meta_blocks += md_needed;
 	spin_unlock(&ei->i_block_reservation_lock);
 @@ -1405,6 +1425,8 @@ static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd)
 
 	index = mpd->first_page;
 	end   = mpd->next_page - 1;
 +
 +	pagevec_init(&pvec, 0);
 	while (index <= end) {
 		nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
 		if (nr_pages == 0)
 @@ -2367,6 +2389,16 @@ static int ext4_nonda_switch(struct super_block *sb)
 	free_blocks  = EXT4_C2B(sbi,
 		percpu_counter_read_positive(&sbi->s_freeclusters_counter));
 	dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyclusters_counter);
 +	/*
 +	 * Start pushing delalloc when 1/2 of free blocks are dirty.
 +	 */
 +	if (dirty_blocks && (free_blocks < 2 * dirty_blocks) &&
 +	    !writeback_in_progress(sb->s_bdi) &&
 +	    down_read_trylock(&sb->s_umount)) {
 +		writeback_inodes_sb(sb, WB_REASON_FS_FREE_SPACE);
 +		up_read(&sb->s_umount);
 +	}
 +
 	if (2 * free_blocks < 3 * dirty_blocks ||
 		free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) {
 		/*
 @@ -2375,13 +2407,6 @@ static int ext4_nonda_switch(struct super_block *sb)
 		 */
 		return 1;
 	}
 -	/*
 -	 * Even if we don't switch but are nearing capacity,
 -	 * start pushing delalloc when 1/2 of free blocks are dirty.
 -	 */
 -	if (free_blocks < 2 * dirty_blocks)
 -		writeback_inodes_sb_if_idle(sb, WB_REASON_FS_FREE_SPACE);
 -
 	return 0;
 }
 
 @@ -3869,6 +3894,7 @@ static int ext4_do_update_inode(handle_t *handle,
 	struct ext4_inode_info *ei = EXT4_I(inode);
 	struct buffer_head *bh = iloc->bh;
 	int err = 0, rc, block;
 +	int need_datasync = 0;
 
 	/* For fields not not tracking in the in-memory inode,
 	 * initialise them to zero for new inodes. */
 @@ -3917,7 +3943,10 @@ static int ext4_do_update_inode(handle_t *handle,
 		raw_inode->i_file_acl_high =
 			cpu_to_le16(ei->i_file_acl >> 32);
 	raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl);
 -	ext4_isize_set(raw_inode, ei->i_disksize);
 +	if (ei->i_disksize != ext4_isize(raw_inode)) {
 +		ext4_isize_set(raw_inode, ei->i_disksize);
 +		need_datasync = 1;
 +	}
 	if (ei->i_disksize > 0x7fffffffULL) {
 		struct super_block *sb = inode->i_sb;
 		if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
 @@ -3968,7 +3997,7 @@ static int ext4_do_update_inode(handle_t *handle,
 		err = rc;
 	ext4_clear_inode_state(inode, EXT4_STATE_NEW);
 
 -	ext4_update_inode_fsync_trans(handle, inode, 0);
 +	ext4_update_inode_fsync_trans(handle, inode, need_datasync);
 out_brelse:
 	brelse(bh);
 	ext4_std_error(inode->i_sb, err);
 @@ -4191,7 +4220,7 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
 		 struct kstat *stat)
 {
 	struct inode *inode;
 -	unsigned long delalloc_blocks;
 +	unsigned long long delalloc_blocks;
 
 	inode = dentry->d_inode;
 	generic_fillattr(inode, stat);
 @@ -4208,7 +4237,7 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
 	 */
 	delalloc_blocks = EXT4_I(inode)->i_reserved_data_blocks;
 
 -	stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9;
 +	stat->blocks += delalloc_blocks << (inode->i_sb->s_blocksize_bits-9);
 	return 0;
 }
 
 diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
 index 6eee255..9727522 100644
 --- a/fs/ext4/ioctl.c
 +++ b/fs/ext4/ioctl.c
 @@ -38,7 +38,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		handle_t *handle = NULL;
 		int err, migrate = 0;
 		struct ext4_iloc iloc;
 -		unsigned int oldflags;
 +		unsigned int oldflags, mask, i;
 		unsigned int jflag;
 
 		if (!inode_owner_or_capable(inode))
 @@ -115,9 +115,14 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		if (err)
 			goto flags_err;
 
 -		flags = flags & EXT4_FL_USER_MODIFIABLE;
 -		flags |= oldflags & ~EXT4_FL_USER_MODIFIABLE;
 -		ei->i_flags = flags;
 +		for (i = 0, mask = 1; i < 32; i++, mask <<= 1) {
 +			if (!(mask & EXT4_FL_USER_MODIFIABLE))
 +				continue;
 +			if (mask & flags)
 +				ext4_set_inode_flag(inode, i);
 +			else
 +				ext4_clear_inode_flag(inode, i);
 +		}
 
 		ext4_set_inode_flags(inode);
 		inode->i_ctime = ext4_current_time(inode);
 @@ -256,7 +261,6 @@ group_extend_out:
 		err = ext4_move_extents(filp, donor_filp, me.orig_start,
 					me.donor_start, me.len, &me.moved_len);
 		mnt_drop_write_file(filp);
 -		mnt_drop_write(filp->f_path.mnt);
 
 		if (copy_to_user((struct move_extent __user *)arg,
 				 &me, sizeof(me)))
 diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
 index 9898edc..cdfc763 100644
 --- a/fs/ext4/mballoc.c
 +++ b/fs/ext4/mballoc.c
 @@ -1980,7 +1980,11 @@ repeat:
 		group = ac->ac_g_ex.fe_group;
 
 		for (i = 0; i < ngroups; group++, i++) {
 -			if (group == ngroups)
 +			/*
 +			 * Artificially restricted ngroups for non-extent
 +			 * files makes group > ngroups possible on first loop.
 +			 */
 +			if (group >= ngroups)
 				group = 0;
 
 			/* This now checks without needing the buddy page */
 @@ -2517,6 +2521,9 @@ int ext4_mb_release(struct super_block *sb)
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
 
 +	if (sbi->s_proc)
 +		remove_proc_entry("mb_groups", sbi->s_proc);
 +
 	if (sbi->s_group_info) {
 		for (i = 0; i < ngroups; i++) {
 			grinfo = ext4_get_group_info(sb, i);
 @@ -2564,8 +2571,6 @@ int ext4_mb_release(struct super_block *sb)
 	}
 
 	free_percpu(sbi->s_locality_groups);
 -	if (sbi->s_proc)
 -		remove_proc_entry("mb_groups", sbi->s_proc);
 
 	return 0;
 }
 @@ -4130,7 +4135,7 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
 		/* The max size of hash table is PREALLOC_TB_SIZE */
 		order = PREALLOC_TB_SIZE - 1;
 	/* Add the prealloc space to lg */
 -	rcu_read_lock();
 +	spin_lock(&lg->lg_prealloc_lock);
 	list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order],
 						pa_inode_list) {
 		spin_lock(&tmp_pa->pa_lock);
 @@ -4154,12 +4159,12 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
 	if (!added)
 		list_add_tail_rcu(&pa->pa_inode_list,
 					&lg->lg_prealloc_list[order]);
 -	rcu_read_unlock();
 +	spin_unlock(&lg->lg_prealloc_lock);
 
 	/* Now trim the list to be not more than 8 elements */
 	if (lg_prealloc_count > 8) {
 		ext4_mb_discard_lg_preallocations(sb, lg,
 -						order, lg_prealloc_count);
 +						  order, lg_prealloc_count);
 		return;
 	}
 	return ;
 @@ -4639,10 +4644,16 @@ do_more:
 		 * blocks being freed are metadata. these blocks shouldn't
 		 * be used until this transaction is committed
 		 */
 +	retry:
 		new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS);
 		if (!new_entry) {
 -			err = -ENOMEM;
 -			goto error_return;
 +			/*
 +			 * We use a retry loop because
 +			 * ext4_free_blocks() is not allowed to fail.
 +			 */
 +			cond_resched();
 +			congestion_wait(BLK_RW_ASYNC, HZ/50);
 +			goto retry;
 		}
 		new_entry->efd_start_cluster = bit;
 		new_entry->efd_group = block_group;
 @@ -4987,8 +4998,9 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
 	end = start + (range->len >> sb->s_blocksize_bits) - 1;
 	minlen = range->minlen >> sb->s_blocksize_bits;
 
 -	if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb)) ||
 -	    unlikely(start >= max_blks))
 +	if (minlen > EXT4_CLUSTERS_PER_GROUP(sb) ||
 +	    start >= max_blks ||
 +	    range->len < sb->s_blocksize)
 		return -EINVAL;
 	if (end >= max_blks)
 		end = max_blks - 1;
 diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
 index df5cde5..e2016f3 100644
 --- a/fs/ext4/move_extent.c
 +++ b/fs/ext4/move_extent.c
 @@ -1142,7 +1142,12 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
 			orig_inode->i_ino, donor_inode->i_ino);
 		return -EINVAL;
 	}
 -
 +	/* TODO: This is non obvious task to swap blocks for inodes with full
 +	   jornaling enabled */
 +	if (ext4_should_journal_data(orig_inode) ||
 +	    ext4_should_journal_data(donor_inode)) {
 +		return -EINVAL;
 +	}
 	/* Protect orig and donor inodes against a truncate */
 	mext_inode_double_lock(orig_inode, donor_inode);
 
 diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
 index 85fb03f..665e55c 100644
 --- a/fs/ext4/namei.c
 +++ b/fs/ext4/namei.c
 @@ -585,11 +585,8 @@ static int htree_dirblock_to_tree(struct file *dir_file,
 		if (ext4_check_dir_entry(dir, NULL, de, bh,
 				(block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb))
 					 + ((char *)de - bh->b_data))) {
 -			/* On error, skip the f_pos to the next block. */
 -			dir_file->f_pos = (dir_file->f_pos |
 -					(dir->i_sb->s_blocksize - 1)) + 1;
 -			brelse(bh);
 -			return count;
 +			/* silently ignore the rest of the block */
 +			break;
 		}
 		ext4fs_dirhash(de->name, de->name_len, hinfo);
 		if ((hinfo->hash < start_hash) ||
	diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
	index 3c382b5..d0b8f98 100644
	--- a/fs/ext4/ext4_jbd2.c
	+++ b/fs/ext4/ext4_jbd2.c
	@@ -113,15 +113,6 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
	if (WARN_ON_ONCE(err)) {
	ext4_journal_abort_handle(where, line, __func__, bh,
	handle, err);
	- ext4_error_inode(inode, where, line,
	- bh->b_blocknr,
	- "journal_dirty_metadata failed: "
	- "handle type %u started at line %u, "
	- "credits %u/%u, errcode %d",
	- handle->h_type,
	- handle->h_line_no,
	- handle->h_requested_credits,
	- handle->h_buffer_credits, err);
	}
	} else {
	if (inode)
	diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
	index 246fc24..4296a6f 100644
	--- a/fs/ext4/extents.c
	+++ b/fs/ext4/extents.c
	@@ -2111,7 +2111,7 @@ static int
	ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
	struct ext4_extent *ex)
	{
	- struct ext4_ext_cache cex = {0, 0, 0};
	+ struct ext4_ext_cache cex;
	int ret = 0;

	if (ext4_ext_check_cache(inode, block, &cex)) {
	@@ -4733,7 +4733,7 @@ static int ext4_xattr_fiemap(struct inode *inode,
	error = ext4_get_inode_loc(inode, &iloc);
	if (error)
	return error;
	- physical = iloc.bh->b_blocknr << blockbits;
	+ physical = (__u64)iloc.bh->b_blocknr << blockbits;
	offset = EXT4_GOOD_OLD_INODE_SIZE +
	EXT4_I(inode)->i_extra_isize;
	physical += offset;
	@@ -4741,7 +4741,7 @@ static int ext4_xattr_fiemap(struct inode *inode,
	flags \|= FIEMAP_EXTENT_DATA_INLINE;
	brelse(iloc.bh);
	} else { /* external block */
	- physical = EXT4_I(inode)->i_file_acl << blockbits;
	+ physical = (__u64)EXT4_I(inode)->i_file_acl << blockbits;
	length = inode->i_sb->s_blocksize;
	}

	diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
	index e2707f0..75c4f36 100644
	--- a/fs/ext4/ialloc.c
	+++ b/fs/ext4/ialloc.c
	@@ -488,10 +488,12 @@ fallback_retry:
	for (i = 0; i < ngroups; i++) {
	grp = (parent_group + i) % ngroups;
	desc = ext4_get_group_desc(sb, grp, NULL);
	- grp_free = ext4_free_inodes_count(sb, desc);
	- if (desc && grp_free && grp_free >= avefreei) {
	- *group = grp;
	- return 0;
	+ if (desc) {
	+ grp_free = ext4_free_inodes_count(sb, desc);
	+ if (grp_free && grp_free >= avefreei) {
	+ *group = grp;
	+ return 0;
	+ }
	}
	}

	@@ -685,11 +687,8 @@ repeat_in_this_group:
	ino = ext4_find_next_zero_bit((unsigned long *)
	inode_bitmap_bh->b_data,
	EXT4_INODES_PER_GROUP(sb), ino);
	- if (ino >= EXT4_INODES_PER_GROUP(sb)) {
	- if (++group == ngroups)
	- group = 0;
	- continue;
	- }
	+ if (ino >= EXT4_INODES_PER_GROUP(sb))
	+ goto next_group;
	if (group == 0 && (ino+1) < EXT4_FIRST_INO(sb)) {
	ext4_error(sb, "reserved inode found cleared - "
	"inode=%lu", ino + 1);
	@@ -707,6 +706,9 @@ repeat_in_this_group:
	goto got; /* we grabbed the inode! */
	if (ino < EXT4_INODES_PER_GROUP(sb))
	goto repeat_in_this_group;
	+next_group:
	+ if (++group == ngroups)
	+ group = 0;
	}
	err = -ENOSPC;
	goto out;
	@@ -732,7 +734,6 @@ got:

	BUFFER_TRACE(block_bitmap_bh, "dirty block bitmap");
	err = ext4_handle_dirty_metadata(handle, NULL, block_bitmap_bh);
	- brelse(block_bitmap_bh);

	/* recheck and clear flag under lock if we still need to */
	ext4_lock_group(sb, group);
	@@ -744,6 +745,7 @@ got:
	gdp);
	}
	ext4_unlock_group(sb, group);
	+ brelse(block_bitmap_bh);

	if (err)
	goto fail;
	diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
	index 756fab6..5b6dcba 100644
	--- a/fs/ext4/inode.c
	+++ b/fs/ext4/inode.c
	@@ -145,7 +145,8 @@ void ext4_evict_inode(struct inode *inode)
	* don't use page cache.
	*/
	if (ext4_should_journal_data(inode) &&
	- (S_ISLNK(inode->i_mode) \|\| S_ISREG(inode->i_mode))) {
	+ (S_ISLNK(inode->i_mode) \|\| S_ISREG(inode->i_mode)) &&
	+ inode->i_ino != EXT4_JOURNAL_INO) {
	journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
	tid_t commit_tid = EXT4_I(inode)->i_datasync_tid;

	@@ -279,6 +280,15 @@ void ext4_da_update_reserve_space(struct inode *inode,
	used = ei->i_reserved_data_blocks;
	}

	+ if (unlikely(ei->i_allocated_meta_blocks > ei->i_reserved_meta_blocks)) {
	+ ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, allocated %d "
	+ "with only %d reserved metadata blocks\n", __func__,
	+ inode->i_ino, ei->i_allocated_meta_blocks,
	+ ei->i_reserved_meta_blocks);
	+ WARN_ON(1);
	+ ei->i_allocated_meta_blocks = ei->i_reserved_meta_blocks;
	+ }
	+
	/* Update per-inode reservations */
	ei->i_reserved_data_blocks -= used;
	ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks;
	@@ -1104,6 +1114,17 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
	struct ext4_inode_info *ei = EXT4_I(inode);
	unsigned int md_needed;
	int ret;
	+ ext4_lblk_t save_last_lblock;
	+ int save_len;
	+
	+ /*
	+ * We will charge metadata quota at writeout time; this saves
	+ * us from metadata over-estimation, though we may go over by
	+ * a small amount in the end. Here we just reserve for data.
	+ */
	+ ret = dquot_reserve_block(inode, EXT4_C2B(sbi, 1));
	+ if (ret)
	+ return ret;

	/*
	* recalculate the amount of metadata blocks to reserve
	@@ -1112,32 +1133,31 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
	*/
	repeat:
	spin_lock(&ei->i_block_reservation_lock);
	+ /*
	+ * ext4_calc_metadata_amount() has side effects, which we have
	+ * to be prepared undo if we fail to claim space.
	+ */
	+ save_len = ei->i_da_metadata_calc_len;
	+ save_last_lblock = ei->i_da_metadata_calc_last_lblock;
	md_needed = EXT4_NUM_B2C(sbi,
	ext4_calc_metadata_amount(inode, lblock));
	trace_ext4_da_reserve_space(inode, md_needed);
	- spin_unlock(&ei->i_block_reservation_lock);

	/*
	- * We will charge metadata quota at writeout time; this saves
	- * us from metadata over-estimation, though we may go over by
	- * a small amount in the end. Here we just reserve for data.
	- */
	- ret = dquot_reserve_block(inode, EXT4_C2B(sbi, 1));
	- if (ret)
	- return ret;
	- /*
	* We do still charge estimated metadata to the sb though;
	* we cannot afford to run out of free blocks.
	*/
	if (ext4_claim_free_clusters(sbi, md_needed + 1, 0)) {
	- dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
	+ ei->i_da_metadata_calc_len = save_len;
	+ ei->i_da_metadata_calc_last_lblock = save_last_lblock;
	+ spin_unlock(&ei->i_block_reservation_lock);
	if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
	yield();
	goto repeat;
	}
	+ dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
	return -ENOSPC;
	}
	- spin_lock(&ei->i_block_reservation_lock);
	ei->i_reserved_data_blocks++;
	ei->i_reserved_meta_blocks += md_needed;
	spin_unlock(&ei->i_block_reservation_lock);
	@@ -1405,6 +1425,8 @@ static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd)

	index = mpd->first_page;
	end = mpd->next_page - 1;
	+
	+ pagevec_init(&pvec, 0);
	while (index <= end) {
	nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
	if (nr_pages == 0)
	@@ -2367,6 +2389,16 @@ static int ext4_nonda_switch(struct super_block *sb)
	free_blocks = EXT4_C2B(sbi,
	percpu_counter_read_positive(&sbi->s_freeclusters_counter));
	dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyclusters_counter);
	+ /*
	+ * Start pushing delalloc when 1/2 of free blocks are dirty.
	+ */
	+ if (dirty_blocks && (free_blocks < 2 * dirty_blocks) &&
	+ !writeback_in_progress(sb->s_bdi) &&
	+ down_read_trylock(&sb->s_umount)) {
	+ writeback_inodes_sb(sb, WB_REASON_FS_FREE_SPACE);
	+ up_read(&sb->s_umount);
	+ }
	+
	if (2 * free_blocks < 3 * dirty_blocks \|\|
	free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) {
	/*
	@@ -2375,13 +2407,6 @@ static int ext4_nonda_switch(struct super_block *sb)
	*/
	return 1;
	}
	- /*
	- * Even if we don't switch but are nearing capacity,
	- * start pushing delalloc when 1/2 of free blocks are dirty.
	- */
	- if (free_blocks < 2 * dirty_blocks)
	- writeback_inodes_sb_if_idle(sb, WB_REASON_FS_FREE_SPACE);
	-
	return 0;
	}

	@@ -3869,6 +3894,7 @@ static int ext4_do_update_inode(handle_t *handle,
	struct ext4_inode_info *ei = EXT4_I(inode);
	struct buffer_head *bh = iloc->bh;
	int err = 0, rc, block;
	+ int need_datasync = 0;

	/* For fields not not tracking in the in-memory inode,
	* initialise them to zero for new inodes. */
	@@ -3917,7 +3943,10 @@ static int ext4_do_update_inode(handle_t *handle,
	raw_inode->i_file_acl_high =
	cpu_to_le16(ei->i_file_acl >> 32);
	raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl);
	- ext4_isize_set(raw_inode, ei->i_disksize);
	+ if (ei->i_disksize != ext4_isize(raw_inode)) {
	+ ext4_isize_set(raw_inode, ei->i_disksize);
	+ need_datasync = 1;
	+ }
	if (ei->i_disksize > 0x7fffffffULL) {
	struct super_block *sb = inode->i_sb;
	if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
	@@ -3968,7 +3997,7 @@ static int ext4_do_update_inode(handle_t *handle,
	err = rc;
	ext4_clear_inode_state(inode, EXT4_STATE_NEW);

	- ext4_update_inode_fsync_trans(handle, inode, 0);
	+ ext4_update_inode_fsync_trans(handle, inode, need_datasync);
	out_brelse:
	brelse(bh);
	ext4_std_error(inode->i_sb, err);
	@@ -4191,7 +4220,7 @@ int ext4_getattr(struct vfsmount mnt, struct dentry dentry,
	struct kstat *stat)
	{
	struct inode *inode;
	- unsigned long delalloc_blocks;
	+ unsigned long long delalloc_blocks;

	inode = dentry->d_inode;
	generic_fillattr(inode, stat);
	@@ -4208,7 +4237,7 @@ int ext4_getattr(struct vfsmount mnt, struct dentry dentry,
	*/
	delalloc_blocks = EXT4_I(inode)->i_reserved_data_blocks;

	- stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9;
	+ stat->blocks += delalloc_blocks << (inode->i_sb->s_blocksize_bits-9);
	return 0;
	}

	diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
	index 6eee255..9727522 100644
	--- a/fs/ext4/ioctl.c
	+++ b/fs/ext4/ioctl.c
	@@ -38,7 +38,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
	handle_t *handle = NULL;
	int err, migrate = 0;
	struct ext4_iloc iloc;
	- unsigned int oldflags;
	+ unsigned int oldflags, mask, i;
	unsigned int jflag;

	if (!inode_owner_or_capable(inode))
	@@ -115,9 +115,14 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
	if (err)
	goto flags_err;

	- flags = flags & EXT4_FL_USER_MODIFIABLE;
	- flags \|= oldflags & ~EXT4_FL_USER_MODIFIABLE;
	- ei->i_flags = flags;
	+ for (i = 0, mask = 1; i < 32; i++, mask <<= 1) {
	+ if (!(mask & EXT4_FL_USER_MODIFIABLE))
	+ continue;
	+ if (mask & flags)
	+ ext4_set_inode_flag(inode, i);
	+ else
	+ ext4_clear_inode_flag(inode, i);
	+ }

	ext4_set_inode_flags(inode);
	inode->i_ctime = ext4_current_time(inode);
	@@ -256,7 +261,6 @@ group_extend_out:
	err = ext4_move_extents(filp, donor_filp, me.orig_start,
	me.donor_start, me.len, &me.moved_len);
	mnt_drop_write_file(filp);
	- mnt_drop_write(filp->f_path.mnt);

	if (copy_to_user((struct move_extent __user *)arg,
	&me, sizeof(me)))
	diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
	index 9898edc..cdfc763 100644
	--- a/fs/ext4/mballoc.c
	+++ b/fs/ext4/mballoc.c
	@@ -1980,7 +1980,11 @@ repeat:
	group = ac->ac_g_ex.fe_group;

	for (i = 0; i < ngroups; group++, i++) {
	- if (group == ngroups)
	+ /*
	+ * Artificially restricted ngroups for non-extent
	+ * files makes group > ngroups possible on first loop.
	+ */
	+ if (group >= ngroups)
	group = 0;

	/* This now checks without needing the buddy page */
	@@ -2517,6 +2521,9 @@ int ext4_mb_release(struct super_block *sb)
	struct ext4_sb_info *sbi = EXT4_SB(sb);
	struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);

	+ if (sbi->s_proc)
	+ remove_proc_entry("mb_groups", sbi->s_proc);
	+
	if (sbi->s_group_info) {
	for (i = 0; i < ngroups; i++) {
	grinfo = ext4_get_group_info(sb, i);
	@@ -2564,8 +2571,6 @@ int ext4_mb_release(struct super_block *sb)
	}

	free_percpu(sbi->s_locality_groups);
	- if (sbi->s_proc)
	- remove_proc_entry("mb_groups", sbi->s_proc);

	return 0;
	}
	@@ -4130,7 +4135,7 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
	/* The max size of hash table is PREALLOC_TB_SIZE */
	order = PREALLOC_TB_SIZE - 1;
	/* Add the prealloc space to lg */
	- rcu_read_lock();
	+ spin_lock(&lg->lg_prealloc_lock);
	list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order],
	pa_inode_list) {
	spin_lock(&tmp_pa->pa_lock);
	@@ -4154,12 +4159,12 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
	if (!added)
	list_add_tail_rcu(&pa->pa_inode_list,
	&lg->lg_prealloc_list[order]);
	- rcu_read_unlock();
	+ spin_unlock(&lg->lg_prealloc_lock);

	/* Now trim the list to be not more than 8 elements */
	if (lg_prealloc_count > 8) {
	ext4_mb_discard_lg_preallocations(sb, lg,
	- order, lg_prealloc_count);
	+ order, lg_prealloc_count);
	return;
	}
	return ;
	@@ -4639,10 +4644,16 @@ do_more:
	* blocks being freed are metadata. these blocks shouldn't
	* be used until this transaction is committed
	*/
	+ retry:
	new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS);
	if (!new_entry) {
	- err = -ENOMEM;
	- goto error_return;
	+ /*
	+ * We use a retry loop because
	+ * ext4_free_blocks() is not allowed to fail.
	+ */
	+ cond_resched();
	+ congestion_wait(BLK_RW_ASYNC, HZ/50);
	+ goto retry;
	}
	new_entry->efd_start_cluster = bit;
	new_entry->efd_group = block_group;
	@@ -4987,8 +4998,9 @@ int ext4_trim_fs(struct super_block sb, struct fstrim_range range)
	end = start + (range->len >> sb->s_blocksize_bits) - 1;
	minlen = range->minlen >> sb->s_blocksize_bits;

	- if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb)) \|\|
	- unlikely(start >= max_blks))
	+ if (minlen > EXT4_CLUSTERS_PER_GROUP(sb) \|\|
	+ start >= max_blks \|\|
	+ range->len < sb->s_blocksize)
	return -EINVAL;
	if (end >= max_blks)
	end = max_blks - 1;
	diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
	index df5cde5..e2016f3 100644
	--- a/fs/ext4/move_extent.c
	+++ b/fs/ext4/move_extent.c
	@@ -1142,7 +1142,12 @@ ext4_move_extents(struct file o_filp, struct file d_filp,
	orig_inode->i_ino, donor_inode->i_ino);
	return -EINVAL;
	}
	-
	+ /* TODO: This is non obvious task to swap blocks for inodes with full
	+ jornaling enabled */
	+ if (ext4_should_journal_data(orig_inode) \|\|
	+ ext4_should_journal_data(donor_inode)) {
	+ return -EINVAL;
	+ }
	/* Protect orig and donor inodes against a truncate */
	mext_inode_double_lock(orig_inode, donor_inode);

	diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
	index 85fb03f..665e55c 100644
	--- a/fs/ext4/namei.c
	+++ b/fs/ext4/namei.c
	@@ -585,11 +585,8 @@ static int htree_dirblock_to_tree(struct file *dir_file,
	if (ext4_check_dir_entry(dir, NULL, de, bh,
	(block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb))
	+ ((char *)de - bh->b_data))) {
	- /* On error, skip the f_pos to the next block. */
	- dir_file->f_pos = (dir_file->f_pos \|
	- (dir->i_sb->s_blocksize - 1)) + 1;
	- brelse(bh);
	- return count;
	+ /* silently ignore the rest of the block */
	+ break;
	}
	ext4fs_dirhash(de->name, de->name_len, hinfo);
	if ((hinfo->hash < start_hash) \|\|