Hi Linus,
Here are some Ocfs2 patches - bug fixes, performance fixes and small
cleanups.
--Mark
Please pull from 'upstream-linus' branch of
git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2.git upstream-linus
to receive the following updates:
fs/ocfs2/alloc.c | 2 +-
fs/ocfs2/aops.c | 22 ++++++++++++++++++++++
fs/ocfs2/cluster/heartbeat.c | 2 +-
fs/ocfs2/dcache.c | 2 +-
fs/ocfs2/dir.c | 6 +++---
fs/ocfs2/dlmglue.c | 25 ++++++++++---------------
fs/ocfs2/file.c | 26 +++++++++++++++++++++++++-
fs/ocfs2/namei.c | 13 ++++++++++---
8 files changed, 73 insertions(+), 25 deletions(-)
Adrian Bunk (1):
[2.6 patch] make ocfs2_find_entry_el() static
Jan Kara (1):
Fix possibly too long write in o2hb_setup_one_bio()
Mark Fasheh (4):
ocfs2: Create locks at initially requested level
ocfs2: Re-order iput in ocfs2_drop_dentry_lock
ocfs2: Commit journal on sync writes
ocfs2: fix write() performance regression
Roel Kluin (1):
Fix priority mistakes in fs/ocfs2/{alloc.c, dlmglue.c}
Srinivas Eeda (1):
ocfs2: fix rename vs unlink race
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 4ba7f0b..ce62c15 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -3946,7 +3946,7 @@ static int __ocfs2_mark_extent_written(struct inode
*inode,
struct ocfs2_merge_ctxt ctxt;
struct ocfs2_extent_list *rightmost_el;
- if (!rec->e_flags & OCFS2_EXT_UNWRITTEN) {
+ if (!(rec->e_flags & OCFS2_EXT_UNWRITTEN)) {
ret = -EIO;
mlog_errno(ret);
goto out;
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index c69c1b3..556e34c 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -729,6 +729,27 @@ static void ocfs2_clear_page_regions(struct page *page,
}
/*
+ * Nonsparse file systems fully allocate before we get to the write
+ * code. This prevents ocfs2_write() from tagging the write as an
+ * allocating one, which means ocfs2_map_page_blocks() might try to
+ * read-in the blocks at the tail of our file. Avoid reading them by
+ * testing i_size against each block offset.
+ */
+static int ocfs2_should_read_blk(struct inode *inode, struct page *page,
+ unsigned int block_start)
+{
+ u64 offset = page_offset(page) + block_start;
+
+ if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
+ return 1;
+
+ if (i_size_read(inode) > offset)
+ return 1;
+
+ return 0;
+}
+
+/*
* Some of this taken from block_prepare_write(). We already have our
* mapping by now though, and the entire write will be allocating or
* it won't, so not much need to use BH_New.
@@ -781,6 +802,7 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
set_buffer_uptodate(bh);
} else if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
!buffer_new(bh) &&
+ ocfs2_should_read_blk(inode, page, block_start) &&
(block_start < from || block_end > to)) {
ll_rw_block(READ, 1, &bh);
*wait_bh++=bh;
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 9cc7c04..f02ccb3 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -267,7 +267,7 @@ static struct bio *o2hb_setup_one_bio(struct o2hb_region
*reg,
current_page = cs / spp;
page = reg->hr_slot_data[current_page];
- vec_len = min(PAGE_CACHE_SIZE,
+ vec_len = min(PAGE_CACHE_SIZE - vec_start,
(max_slots-cs) * (PAGE_CACHE_SIZE/spp) );
mlog(ML_HB_BIO, "page %d, vec_len = %u, vec_start = %u\n",
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index 3094ddb..1957a5e 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -318,9 +318,9 @@ out_attach:
static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb,
struct ocfs2_dentry_lock *dl)
{
+ iput(dl->dl_inode);
ocfs2_simple_drop_lockres(osb, &dl->dl_lockres);
ocfs2_lock_res_free(&dl->dl_lockres);
- iput(dl->dl_inode);
kfree(dl);
}
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 6a2f143..63b28fd 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -208,9 +208,9 @@ out:
return NULL;
}
-struct buffer_head *ocfs2_find_entry_el(const char *name, int namelen,
- struct inode *dir,
- struct ocfs2_dir_entry **res_dir)
+static struct buffer_head *ocfs2_find_entry_el(const char *name, int namelen,
+ struct inode *dir,
+ struct ocfs2_dir_entry **res_dir)
{
struct super_block *sb;
struct buffer_head *bh_use[NAMEI_RA_SIZE];
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 41c76ff..4e97dcc 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -670,7 +670,7 @@ static inline void ocfs2_generic_handle_attach_action(struct
ocfs2_lock_res *loc
{
mlog_entry_void();
- BUG_ON((!lockres->l_flags & OCFS2_LOCK_BUSY));
+ BUG_ON((!(lockres->l_flags & OCFS2_LOCK_BUSY)));
BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED);
if (lockres->l_requested > LKM_NLMODE &&
@@ -980,18 +980,6 @@ again:
goto unlock;
}
- if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {
- /* lock has not been created yet. */
- spin_unlock_irqrestore(&lockres->l_lock, flags);
-
- ret = ocfs2_lock_create(osb, lockres, LKM_NLMODE, 0);
- if (ret < 0) {
- mlog_errno(ret);
- goto out;
- }
- goto again;
- }
-
if (lockres->l_flags & OCFS2_LOCK_BLOCKED &&
!ocfs2_may_continue_on_blocked_lock(lockres, level)) {
/* is the lock is currently blocked on behalf of
@@ -1006,7 +994,14 @@ again:
mlog(ML_ERROR, "lockres %s has action %u pending\n",
lockres->l_name, lockres->l_action);
- lockres->l_action = OCFS2_AST_CONVERT;
+ if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {
+ lockres->l_action = OCFS2_AST_ATTACH;
+ lkm_flags &= ~LKM_CONVERT;
+ } else {
+ lockres->l_action = OCFS2_AST_CONVERT;
+ lkm_flags |= LKM_CONVERT;
+ }
+
lockres->l_requested = level;
lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
spin_unlock_irqrestore(&lockres->l_lock, flags);
@@ -1021,7 +1016,7 @@ again:
status = dlmlock(osb->dlm,
level,
&lockres->l_lksb,
- lkm_flags|LKM_CONVERT,
+ lkm_flags,
lockres->l_name,
OCFS2_LOCK_ID_MAX_LEN - 1,
ocfs2_locking_ast,
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index f92fe91..bbac7cd 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1891,9 +1891,11 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
ssize_t written = 0;
size_t ocount; /* original count */
size_t count; /* after file limit checks */
- loff_t *ppos = &iocb->ki_pos;
+ loff_t old_size, *ppos = &iocb->ki_pos;
+ u32 old_clusters;
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_path.dentry->d_inode;
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
mlog_entry("(0x%p, %u, '%.*s')\n", file,
(unsigned int)nr_segs,
@@ -1949,6 +1951,13 @@ relock:
goto relock;
}
+ /*
+ * To later detect whether a journal commit for sync writes is
+ * necessary, we sample i_size, and cluster count here.
+ */
+ old_size = i_size_read(inode);
+ old_clusters = OCFS2_I(inode)->ip_clusters;
+
/* communicate with ocfs2_dio_end_io */
ocfs2_iocb_set_rw_locked(iocb, rw_level);
@@ -1978,6 +1987,21 @@ out_dio:
/* buffered aio wouldn't have proper lock coverage today */
BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT));
+ if ((file->f_flags & O_SYNC && !direct_io) || IS_SYNC(inode)) {
+ /*
+ * The generic write paths have handled getting data
+ * to disk, but since we don't make use of the dirty
+ * inode list, a manual journal commit is necessary
+ * here.
+ */
+ if (old_size != i_size_read(inode) ||
+ old_clusters != OCFS2_I(inode)->ip_clusters) {
+ ret = journal_force_commit(osb->journal->j_journal);
+ if (ret < 0)
+ written = ret;
+ }
+ }
+
/*
* deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io
* function pointer which is called when o_direct io completes so that
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 7292590..989ac27 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -1105,9 +1105,16 @@ static int ocfs2_rename(struct inode *old_dir,
goto bail;
}
- if (!new_de && new_inode)
- mlog(ML_ERROR, "inode %lu does not exist in it's parent "
- "directory!", new_inode->i_ino);
+ if (!new_de && new_inode) {
+ /*
+ * Target was unlinked by another node while we were
+ * waiting to get to ocfs2_rename(). There isn't
+ * anything we can do here to help the situation, so
+ * bubble up the appropriate error.
+ */
+ status = -ENOENT;
+ goto bail;
+ }
/* In case we need to overwrite an existing file, we blow it
* away first */
Hi Linus,
Here's some more Ocfs2 patches for 2.6.24. This series also includes
a small performance fixup from Jan Kara. The rest of the patches are bug
fixes or very trivial cleanups.
Please pull from 'upstream-linus' branch of
git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2.git upstream-linus
to receive the following updates:
fs/Kconfig | 9 +++++++++
fs/ocfs2/aops.c | 2 +-
fs/ocfs2/cluster/masklog.h | 2 +-
fs/ocfs2/dcache.c | 20 ++++++++++++++++----
fs/ocfs2/dlm/dlmmaster.c | 4 ++--
fs/ocfs2/file.c | 19 +++++++++++++++----
fs/ocfs2/inode.c | 6 +++---
fs/ocfs2/localalloc.c | 5 +++--
fs/ocfs2/super.c | 6 +++---
9 files changed, 53 insertions(+), 20 deletions(-)
Jan Kara (1):
ocfs2: Remove expensive bitmap scanning
Joe Perches (1):
fs/ocfs2: Add missing "space"
Mark Fasheh (6):
ocfs2: Reset journal parameters after s_mount_opt update
ocfs2: Filter -ENOSPC in mlog_errno()
ocfs2: log valid inode # on bad inode
ocfs2: Remove bug statement in ocfs2_dentry_iput()
ocfs2: Fix comparison in ocfs2_size_fits_inline_data()
ocfs2: reverse inline-data truncate args
diff --git a/fs/Kconfig b/fs/Kconfig
index 429a002..635f3e2 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -459,6 +459,15 @@ config OCFS2_DEBUG_MASKLOG
This option will enlarge your kernel, but it allows debugging of
ocfs2 filesystem issues.
+config OCFS2_DEBUG_FS
+ bool "OCFS2 expensive checks"
+ depends on OCFS2_FS
+ default n
+ help
+ This option will enable expensive consistency checks. Enable
+ this option for debugging only as it is likely to decrease
+ performance of the filesystem.
+
config MINIX_FS
tristate "Minix fs support"
help
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 556e34c..56f7790 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1514,7 +1514,7 @@ int ocfs2_size_fits_inline_data(struct buffer_head *di_bh,
u64 new_size)
{
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
- if (new_size < le16_to_cpu(di->id2.i_data.id_count))
+ if (new_size <= le16_to_cpu(di->id2.i_data.id_count))
return 1;
return 0;
}
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index cd04606..597e064 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -212,7 +212,7 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits;
#define mlog_errno(st) do { \
int _st = (st); \
if (_st != -ERESTARTSYS && _st != -EINTR && \
- _st != AOP_TRUNCATED_PAGE) \
+ _st != AOP_TRUNCATED_PAGE && _st != -ENOSPC) \
mlog(ML_ERROR, "status = %lld\n", (long long)_st); \
} while (0)
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index 1957a5e..9923278 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -344,12 +344,24 @@ static void ocfs2_dentry_iput(struct dentry *dentry,
struct inode *inode)
{
struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
- mlog_bug_on_msg(!dl && !(dentry->d_flags &
DCACHE_DISCONNECTED),
- "dentry: %.*s\n", dentry->d_name.len,
- dentry->d_name.name);
+ if (!dl) {
+ /*
+ * No dentry lock is ok if we're disconnected or
+ * unhashed.
+ */
+ if (!(dentry->d_flags & DCACHE_DISCONNECTED) &&
+ !d_unhashed(dentry)) {
+ unsigned long long ino = 0ULL;
+ if (inode)
+ ino = (unsigned long long)OCFS2_I(inode)->ip_blkno;
+ mlog(ML_ERROR, "Dentry is missing cluster lock. "
+ "inode: %llu, d_flags: 0x%x, d_name: %.*s\n",
+ ino, dentry->d_flags, dentry->d_name.len,
+ dentry->d_name.name);
+ }
- if (!dl)
goto out;
+ }
mlog_bug_on_msg(dl->dl_count == 0, "dentry: %.*s, count: %u\n",
dentry->d_name.len, dentry->d_name.name,
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 62e4a7d..a54d33d 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -908,7 +908,7 @@ lookup:
* but they might own this lockres. wait on them. */
bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0);
if (bit < O2NM_MAX_NODES) {
- mlog(ML_NOTICE, "%s:%.*s: at least one node (%d) to"
+ mlog(ML_NOTICE, "%s:%.*s: at least one node (%d) to "
"recover before lock mastery can begin\n",
dlm->name, namelen, (char *)lockid, bit);
wait_on_recovery = 1;
@@ -962,7 +962,7 @@ redo_request:
spin_lock(&dlm->spinlock);
bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0);
if (bit < O2NM_MAX_NODES) {
- mlog(ML_NOTICE, "%s:%.*s: at least one node (%d) to"
+ mlog(ML_NOTICE, "%s:%.*s: at least one node (%d) to "
"recover before lock mastery can begin\n",
dlm->name, namelen, (char *)lockid, bit);
wait_on_recovery = 1;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index bbac7cd..b75b2e1 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -399,7 +399,7 @@ static int ocfs2_truncate_file(struct inode *inode,
if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
status = ocfs2_truncate_inline(inode, di_bh, new_i_size,
- i_size_read(inode), 0);
+ i_size_read(inode), 1);
if (status)
mlog_errno(status);
@@ -1521,6 +1521,7 @@ static int ocfs2_remove_inode_range(struct inode *inode,
u32 trunc_start, trunc_len, cpos, phys_cpos, alloc_size;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct ocfs2_cached_dealloc_ctxt dealloc;
+ struct address_space *mapping = inode->i_mapping;
ocfs2_init_dealloc_ctxt(&dealloc);
@@ -1529,10 +1530,20 @@ static int ocfs2_remove_inode_range(struct inode *inode,
if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
ret = ocfs2_truncate_inline(inode, di_bh, byte_start,
- byte_start + byte_len, 1);
- if (ret)
+ byte_start + byte_len, 0);
+ if (ret) {
mlog_errno(ret);
- return ret;
+ goto out;
+ }
+ /*
+ * There's no need to get fancy with the page cache
+ * truncate of an inline-data inode. We're talking
+ * about less than a page here, which will be cached
+ * in the dinode buffer anyway.
+ */
+ unmap_mapping_range(mapping, 0, 0, 0);
+ truncate_inode_pages(mapping, 0);
+ goto out;
}
trunc_start = ocfs2_clusters_for_bytes(osb->sb, byte_start);
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 1d5e0cb..ebb2bbe 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -455,8 +455,8 @@ static int ocfs2_read_locked_inode(struct inode *inode,
status = -EINVAL;
fe = (struct ocfs2_dinode *) bh->b_data;
if (!OCFS2_IS_VALID_DINODE(fe)) {
- mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n",
- (unsigned long long)le64_to_cpu(fe->i_blkno), 7,
+ mlog(0, "Invalid dinode #%llu: signature = %.*s\n",
+ (unsigned long long)args->fi_blkno, 7,
fe->i_signature);
goto bail;
}
@@ -863,7 +863,7 @@ static int ocfs2_query_inode_wipe(struct inode *inode,
status = ocfs2_try_open_lock(inode, 1);
if (status == -EAGAIN) {
status = 0;
- mlog(0, "Skipping delete of %llu because it is in use on"
+ mlog(0, "Skipping delete of %llu because it is in use on "
"other nodes\n", (unsigned long long)oi->ip_blkno);
goto bail;
}
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index d272847..58ea88b 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -484,6 +484,7 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
+#ifdef OCFS2_DEBUG_FS
if (le32_to_cpu(alloc->id1.bitmap1.i_used) !
ocfs2_local_alloc_count_bits(alloc)) {
ocfs2_error(osb->sb, "local alloc inode %llu says it has "
@@ -494,6 +495,7 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
status = -EIO;
goto bail;
}
+#endif
free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) -
le32_to_cpu(alloc->id1.bitmap1.i_used);
@@ -712,9 +714,8 @@ static int ocfs2_sync_local_to_main(struct ocfs2_super *osb,
void *bitmap;
struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc);
- mlog_entry("total = %u, COUNT = %u, used = %u\n",
+ mlog_entry("total = %u, used = %u\n",
le32_to_cpu(alloc->id1.bitmap1.i_total),
- ocfs2_local_alloc_count_bits(alloc),
le32_to_cpu(alloc->id1.bitmap1.i_used));
if (!alloc->id1.bitmap1.i_total) {
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index be562ac..5ee7754 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -438,14 +438,14 @@ unlock_osb:
}
if (!ret) {
- if (!ocfs2_is_hard_readonly(osb))
- ocfs2_set_journal_params(osb);
-
/* Only save off the new mount options in case of a successful
* remount. */
osb->s_mount_opt = parsed_options.mount_opt;
osb->s_atime_quantum = parsed_options.atime_quantum;
osb->preferred_slot = parsed_options.slot;
+
+ if (!ocfs2_is_hard_readonly(osb))
+ ocfs2_set_journal_params(osb);
}
out:
return ret;