Nigel Metheringham writes:> Has anyone tried LFS (ie >2G files support) and Ext3 together?
> Are there good reasons why this should/should not work?
I have already implemented an LFS patch for ext3, and sent it to Stephen.
I will post it here as well, but there are several other changes that I
have made to the ext3 source code which may cause the patch to not apply
cleanly on stock ext3-0.0.6b.
> I see the RH enterprise kernel patch set specifically does not attempt
> both lfs and ext3, but the lfs patches themselves touch some reasonably
> localised parts of ext2, so I would hope (without having dived in there
> to test), that the ext3 changes would mirror that reasonably well...
Can't say about the RH kernel, but the ext3+LFS code has been under testing
at TurboLinux and not seen any problems.
Cheers, Andreas
====================== ext3-0.0.6b-lfs.diff =============================---
linux/fs/ext3/file.c.orig Fri Jan 5 22:11:00 2001
+++ linux/fs/ext3/file.c Mon Jan 15 13:21:44 2001
@@ -40,10 +40,6 @@
static long long ext3_file_lseek(struct file *, long long, int);
static ssize_t ext3_file_write (struct file *, const char *, size_t, loff_t *);
static int ext3_release_file (struct inode *, struct file *);
-#if BITS_PER_LONG < 64
-static int ext3_open_file (struct inode *, struct file *);
-
-#else
#define EXT3_MAX_SIZE(bits) \
(((EXT3_NDIR_BLOCKS + (1LL << (bits - 2)) + \
@@ -56,8 +52,6 @@
EXT3_MAX_SIZE(10), EXT3_MAX_SIZE(11), EXT3_MAX_SIZE(12), EXT3_MAX_SIZE(13)
};
-#endif
-
/*
* We have mostly NULL's here: the current defaults are ok for
* the ext3 filesystem.
@@ -70,11 +64,7 @@
NULL, /* poll - default */
ext3_ioctl, /* ioctl */
generic_file_mmap, /* mmap */
-#if BITS_PER_LONG == 64
NULL, /* no special open is needed */
-#else
- ext3_open_file,
-#endif
NULL, /* flush */
ext3_release_file, /* release */
ext3_sync_file, /* fsync */
@@ -122,14 +111,9 @@
case 1:
offset += file->f_pos;
}
-#if BITS_PER_LONG < 64
- if (offset >> 31)
- return -EINVAL;
-#else
if (offset < 0 ||
offset > ext3_max_sizes[EXT3_BLOCK_SIZE_BITS(inode->i_sb)])
return -EINVAL;
-#endif
if (offset != file->f_pos) {
file->f_pos = offset;
file->f_reada = 0;
@@ -157,12 +141,13 @@
size_t count, loff_t *ppos)
{
struct inode * inode = filp->f_dentry->d_inode;
- off_t pos;
- long block;
+ loff_t pos;
+ long block, needed;
int offset;
- int written, written_transaction, c, needed, err;
+ size_t written, written_transaction, c;
struct buffer_head * bh;
struct super_block * sb;
+ int err;
int write_error, new_buffer;
unsigned long limit;
handle_t *handle;
@@ -173,9 +158,9 @@
return 0;
/* This makes the bounds-checking arithmetic later on much more
* sane. */
- if (((signed) count) < 0)
- return -EINVAL;
-
+ if (((ssize_t) count) < 0)
+ return -EINVAL;
+
jfs_debug(4, "Write for %u at %ld to inode %p\n",
count, (long) *ppos, inode);
write_error = 0;
@@ -203,15 +188,45 @@
pos = *ppos;
if (pos != *ppos)
return -EINVAL;
-#if BITS_PER_LONG >= 64
- if (pos > ext3_max_sizes[EXT3_BLOCK_SIZE_BITS(sb)])
- return -EINVAL;
-#endif
}
- block = pos >> EXT3_BLOCK_SIZE_BITS(sb);
- offset = pos & (sb->s_blocksize - 1);
- c = sb->s_blocksize - offset;
+ /* Check for overflow.. */
+
+ /* L-F-S spec 2.2.1.27: */
+ if (!(filp->f_flags & O_LARGEFILE)) {
+ if (pos >= 0x7fffffffULL) /* pos@2G forbidden */
+ return -EFBIG;
+
+ if (pos + count > 0x7fffffffULL)
+ /* Write only until end of allowed region */
+ count = 0x7fffffffULL - pos;
+ }
+
+ {
+ loff_t max = ext3_max_sizes[EXT3_BLOCK_SIZE_BITS(sb)];
+
+ if (pos >= max)
+ return -EFBIG;
+
+ if (pos + count > max) {
+ count = max - pos;
+ if (!count)
+ return -EFBIG;
+ }
+ }
+
+ /* From SUS: We must generate a SIGXFSZ for file size overflow
+ * only if no bytes were actually written to the file. --sct */
+
+ limit = current->rlim[RLIMIT_FSIZE].rlim_cur;
+ if (limit != RLIM_INFINITY) {
+ if (pos >= limit) {
+ send_sig(SIGXFSZ, current, 0);
+ return -EFBIG;
+ }
+ if (pos+count > limit)
+ count = limit - pos;
+ }
/* will_journal_data must not just check the journaling mode:
* specific files can be marked for data-journaling even with
@@ -235,76 +250,24 @@
needed = (count >> EXT3_BLOCK_SIZE_BITS(sb)) + 1;
if (needed > EXT3_MAX_TRANS_DATA)
needed = EXT3_MAX_TRANS_DATA;
- } else
+ } else
needed = 0;
-
- handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS + needed);
-
-
- /* Check for overflow.. */
-
-#if BITS_PER_LONG < 64
- /* If the fd's pos is already greater than or equal to the file
- * descriptor's offset maximum, then we need to return EFBIG for
- * any non-zero count (and we already tested for zero above). */
- if (((unsigned) pos) >= 0x7FFFFFFFUL) {
- written = -EFBIG;
- goto error_out;
- }
-
- /* If we are about to overflow the maximum file size, we also
- * need to return the error, but only if no bytes can be written
- * successfully. */
- if (((unsigned) pos + count) > 0x7FFFFFFFUL) {
- count = 0x7FFFFFFFL - pos;
- if (((signed) count) < 0) {
- written = -EFBIG;
- goto error_out;
- }
- }
-#else
- {
- off_t max = ext3_max_sizes[EXT3_BLOCK_SIZE_BITS(sb)];
- if (pos >= max) {
- written = -EFBIG;
- goto error_out;
- }
-
- if (pos + count > max) {
- count = max - pos;
- if (!count) {
- written = -EFBIG;
- goto error_out;
- }
- }
- if (((pos + count) >> 31) &&
- !EXT3_HAS_RO_COMPAT_FEATURE(sb,
- EXT3_FEATURE_RO_COMPAT_LARGE_FILE)) {
- /* If this is the first large file created, add a flag
- to the superblock */
- journal_get_write_access(handle, sb->u.ext3_sb.s_sbh);
- ext3_update_fs_rev(sb);
- EXT3_SET_RO_COMPAT_FEATURE(sb,
- EXT3_FEATURE_RO_COMPAT_LARGE_FILE);
- journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh);
- }
- }
-#endif
+ block = pos >> EXT3_BLOCK_SIZE_BITS(sb);
+ offset = pos & (sb->s_blocksize - 1);
+ c = sb->s_blocksize - offset;
- /* From SUS: We must generate a SIGXFSZ for file size overflow
- * only if no bytes were actually written to the file. --sct */
+ handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS + needed);
- limit = current->rlim[RLIMIT_FSIZE].rlim_cur;
- if (limit < RLIM_INFINITY) {
- if (((unsigned) pos+count) >= limit) {
- count = limit - pos;
- if (((signed) count) <= 0) {
- send_sig(SIGXFSZ, current, 0);
- written = -EFBIG;
- goto error_out;
- }
- }
+ if (((pos + count) >> 31) &&
+ (!EXT3_HAS_RO_COMPAT_FEATURE(sb,EXT3_FEATURE_RO_COMPAT_LARGE_FILE)||
+ EXT3_SB(sb)->s_es->s_rev_level == cpu_to_le32(EXT3_GOOD_OLD_REV))){
+ /* If this is the first large file, add a flag to superblock */
+ journal_get_write_access(handle, sb->u.ext3_sb.s_sbh);
+ ext3_update_fs_rev(sb);
+ EXT3_SET_RO_COMPAT_FEATURE(sb,
+ EXT3_FEATURE_RO_COMPAT_LARGE_FILE);
+ journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh);
}
written = written_transaction = 0;
@@ -441,7 +399,6 @@
inode->i_ctime = inode->i_mtime = CURRENT_TIME;
*ppos = pos;
ext3_mark_inode_dirty(handle, inode);
-error_out:
ext3_journal_stop(handle, inode);
return written;
}
@@ -458,15 +415,3 @@
return 0;
}
-#if BITS_PER_LONG < 64
-/*
- * Called when an inode is about to be open.
- * We use this to disallow opening RW large files on 32bit systems.
- */
-static int ext3_open_file (struct inode * inode, struct file * filp)
-{
- if (inode->u.ext3_i.i_high_size && (filp->f_mode &
FMODE_WRITE))
- return -EFBIG;
- return 0;
-}
-#endif
--- linux/fs/ext3/inode.c.orig Fri Jan 5 22:11:00 2001
+++ linux/fs/ext3/inode.c Mon Jan 15 16:25:05 2001
@@ -626,15 +626,8 @@
inode->u.ext3_i.i_dir_acl = le32_to_cpu(iloc.raw_inode->i_dir_acl);
else {
inode->u.ext3_i.i_dir_acl = 0;
- inode->u.ext3_i.i_high_size -
le32_to_cpu(iloc.raw_inode->i_size_high);
-#if BITS_PER_LONG < 64
- if (iloc.raw_inode->i_size_high)
- inode->i_size = (__u32)-1;
-#else
- inode->i_size |= ((__u64)le32_to_cpu(iloc.raw_inode->i_size_high))
- << 32;
-#endif
+ inode->i_size = ((__u64)(inode->i_size & 0xFFFFFFFFUL)) |
+ (((__u64)le32_to_cpu(iloc.raw_inode->i_size_high))<<32);
}
inode->u.ext3_i.i_disksize = inode->i_size;
inode->u.ext3_i.i_block_group = iloc.block_group;
@@ -725,14 +718,8 @@
raw_inode->i_file_acl = cpu_to_le32(inode->u.ext3_i.i_file_acl);
if (S_ISDIR(inode->i_mode))
raw_inode->i_dir_acl = cpu_to_le32(inode->u.ext3_i.i_dir_acl);
- else {
-#if BITS_PER_LONG < 64
- raw_inode->i_size_high - cpu_to_le32(inode->u.ext3_i.i_high_size);
-#else
+ else
raw_inode->i_size_high = cpu_to_le32(inode->u.ext3_i.i_disksize
>> 32);
-#endif
- }
if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
raw_inode->i_block[0] = cpu_to_le32(kdev_t_to_nr(inode->i_rdev));
else for (block = 0; block < EXT3_N_BLOCKS; block++)
@@ -843,60 +829,51 @@
goto out;
if (iattr->ia_valid & ATTR_SIZE) {
- off_t size = iattr->ia_size;
+ loff_t size = iattr->ia_size;
unsigned long limit = current->rlim[RLIMIT_FSIZE].rlim_cur;
if (size < 0)
return -EINVAL;
-#if BITS_PER_LONG == 64
if (size > ext3_max_sizes[EXT3_BLOCK_SIZE_BITS(inode->i_sb)])
return -EFBIG;
-#endif
- if (limit < RLIM_INFINITY && size > limit) {
+ if (limit != RLIM_INFINITY && size > limit) {
send_sig(SIGXFSZ, current, 0);
return -EFBIG;
}
-
-#if BITS_PER_LONG == 64
- if (size >> 33) {
- struct super_block *sb = inode->i_sb;
- struct ext3_super_block *es = sb->u.ext3_sb.s_es;
-
- if (!EXT3_HAS_RO_COMPAT_FEATURE(sb,
- EXT3_FEATURE_RO_COMPAT_LARGE_FILE)) {
- struct buffer_head *bh = sb->u.ext3_sb.s_sbh;
-
- handle = ext3_journal_start(inode, 1);
- if (IS_ERR(handle))
- return PTR_ERR(handle);
-
- /* If this is the first large file
- * created, add a flag to the superblock */
- ext3_update_fs_rev(sb);
- EXT3_SET_RO_COMPAT_FEATURE(sb,
- EXT3_FEATURE_RO_COMPAT_LARGE_FILE);
- journal_dirty_metadata(handle, bh); /*@@@err*/
- ext3_journal_stop(handle, inode);
- }
- }
-#endif
}
-
retval = inode_change_ok(inode, iattr);
if (retval != 0)
goto out;
/* Notify-change transaction. The maximum number of buffers
- * required is one. */
+ * required is two (inode and maybe superblock if a large file). */
- handle = ext3_journal_start(inode, 1);
+ handle = ext3_journal_start(inode, 2);
if (IS_ERR(handle))
return PTR_ERR(handle);
retval = ext3_reserve_inode_write(handle, inode, &iloc);
- if (retval)
+ if (retval)
goto out_stop;
-
+
+ if (iattr->ia_valid & ATTR_SIZE && iattr->ia_size >>
31) {
+ struct super_block *sb = inode->i_sb;
+
+ if (!EXT3_HAS_RO_COMPAT_FEATURE(sb,
+ EXT3_FEATURE_RO_COMPAT_LARGE_FILE) ||
+ EXT3_SB(sb)->s_es->s_rev_level =+
cpu_to_le32(EXT3_GOOD_OLD_REV)) {
+ /* If this is the first large file created,
+ * add a flag to the superblock */
+ /* FIXME do we need journal_write_access() on sb? */
+ ext3_update_fs_rev(sb);
+ EXT3_SET_RO_COMPAT_FEATURE(sb,
+ EXT3_FEATURE_RO_COMPAT_LARGE_FILE);
+ /*@@@err*/
+ journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh);
+ }
+ }
+
inode_setattr(inode, iattr);
if (iattr->ia_valid & ATTR_ATTR_FLAG) {
--- linux/include/linux/ext3_fs_i.h.orig Fri Jan 5 22:10:59 2001
+++ linux/include/linux/ext3_fs_i.h Mon Jan 15 13:37:50 2001
@@ -45,7 +45,7 @@
* in memory. During truncate, i_size is set to 0 by the VFS
* but the filesystem won't set i_disksize to 0 until the
* truncate is actually under way. */
- off_t i_disksize;
+ loff_t i_disksize;
};
#endif /* _LINUX_EXT3_FS_I */
--
Andreas Dilger \ "If a man ate a pound of pasta and a pound of antipasto,
\ would they cancel out, leaving him still hungry?"
http://www-mddsp.enel.ucalgary.ca/People/adilger/ -- Dogbert