diff --git a/pkgs/os-specific/linux/e3cfsprogs/default.nix b/pkgs/os-specific/linux/e3cfsprogs/default.nix index 8db3317a230b..89d5ad5158ce 100644 --- a/pkgs/os-specific/linux/e3cfsprogs/default.nix +++ b/pkgs/os-specific/linux/e3cfsprogs/default.nix @@ -7,8 +7,8 @@ stdenv.mkDerivation { patches = [ ./e3cfsprogs-1.39_bin_links.patch ./e3cfsprogs-1.39_etc.patch ]; src = fetchurl { - url = http://www.ext3cow.com/Download_files/e3cfsprogs-1.39.tgz; - sha256 = "26f535007a497d91c85d337ac67d62d42e3c8fde2ee02c5cb6b6e3e884a5d58f"; + url = http://ext3cow.com/e3cfsprogs/e3cfsprogs-1.39.tgz; + sha256 = "8dd3de546aeb1ae42fb05409aeb724a145fe9aa1dbe1115441c2297c9d48cf31"; }; configureFlags = diff --git a/pkgs/os-specific/linux/ext3cow-tools/default.nix b/pkgs/os-specific/linux/ext3cow-tools/default.nix index 71f6657bffa4..9b776de86ce5 100644 --- a/pkgs/os-specific/linux/ext3cow-tools/default.nix +++ b/pkgs/os-specific/linux/ext3cow-tools/default.nix @@ -5,7 +5,7 @@ stdenv.mkDerivation { builder = ./builder.sh; src = fetchurl { - url = http://www.ext3cow.com/Download_files/ext3cow-tools-1.tgz; + url = http://ext3cow.com/tools/ext3cow-tools.tgz; sha256 = "78f55b19c8eeaa7b8abde63c7d6547b1ac0421a46d826a8d41c049719a3081f2"; }; diff --git a/pkgs/os-specific/linux/kernel/linux-2.6.20.3-ext3cow.patch b/pkgs/os-specific/linux/kernel/linux-2.6.20.3-ext3cow.patch index 5ce03fa0434d..22704f1a1db7 100644 --- a/pkgs/os-specific/linux/kernel/linux-2.6.20.3-ext3cow.patch +++ b/pkgs/os-specific/linux/kernel/linux-2.6.20.3-ext3cow.patch @@ -1,159 +1,6 @@ -diff -ruN linux-2.6.20.3/fs/Kconfig linux-2.6.20.3-ext3cow/fs/Kconfig ---- linux-2.6.20.3/fs/Kconfig 2007-03-13 14:27:08.000000000 -0400 -+++ linux-2.6.20.3-ext3cow/fs/Kconfig 2007-04-07 14:23:46.000000000 -0400 -@@ -136,6 +136,77 @@ - If you are not using a security module that requires using - extended attributes for file security labels, say N. - -+ -+ -+config EXT3COW_FS -+ tristate "Ext3cow journalling and versioning file system support" -+ select JBD -+ help -+ This is the journalling version of the Second extended file system -+ (often called ext3), the de facto standard Linux file system -+ (method to organize files on a storage device) for hard disks. -+ -+ The journalling code included in this driver means you do not have -+ to run e2fsck (file system checker) on your file systems after a -+ crash. The journal keeps track of any changes that were being made -+ at the time the system crashed, and can ensure that your file system -+ is consistent without the need for a lengthy check. -+ -+ Other than adding the journal to the file system, the on-disk format -+ of ext3 is identical to ext2. It is possible to freely switch -+ between using the ext3 driver and the ext2 driver, as long as the -+ file system has been cleanly unmounted, or e2fsck is run on the file -+ system. -+ -+ To add a journal on an existing ext2 file system or change the -+ behavior of ext3 file systems, you can use the tune2fs utility ("man -+ tune2fs"). To modify attributes of files and directories on ext3 -+ file systems, use chattr ("man chattr"). You need to be using -+ e2fsprogs version 1.20 or later in order to create ext3 journals -+ (available at ). -+ -+ To compile this file system support as a module, choose M here: the -+ module will be called ext3. -+ -+config EXT3COW_FS_XATTR -+ bool "Ext3cow extended attributes" -+ depends on EXT3COW_FS -+ default y -+ help -+ Extended attributes are name:value pairs associated with inodes by -+ the kernel or by users (see the attr(5) manual page, or visit -+ for details). -+ -+ If unsure, say N. -+ -+ You need this for POSIX ACL support on ext3cow. -+ -+config EXT3COW_FS_POSIX_ACL -+ bool "Ext3cow POSIX Access Control Lists" -+ depends on EXT3COW_FS_XATTR -+ select FS_POSIX_ACL -+ help -+ Posix Access Control Lists (ACLs) support permissions for users and -+ groups beyond the owner/group/world scheme. -+ -+ To learn more about Access Control Lists, visit the Posix ACLs for -+ Linux website . -+ -+ If you don't know what Access Control Lists are, say N -+ -+config EXT3COW_FS_SECURITY -+ bool "Ext3cow Security Labels" -+ depends on EXT3COW_FS_XATTR -+ help -+ Security labels support alternative access control models -+ implemented by security modules like SELinux. This option -+ enables an extended attribute handler for file security -+ labels in the ext3cow filesystem. -+ -+ If you are not using a security module that requires using -+ extended attributes for file security labels, say N. -+ -+ - config EXT4DEV_FS - tristate "Ext4dev/ext4 extended fs support development (EXPERIMENTAL)" - depends on EXPERIMENTAL -@@ -205,23 +276,23 @@ - tristate - help - This is a generic journalling layer for block devices. It is -- currently used by the ext3 and OCFS2 file systems, but it could -+ currently used by the ext3, ext3cow and OCFS2 file systems, but it could - also be used to add journal support to other file systems or block - devices such as RAID or LVM. - -- If you are using the ext3 or OCFS2 file systems, you need to -+ If you are using the ext3, ext3cow or OCFS2 file systems, you need to - say Y here. If you are not using ext3 OCFS2 then you will probably - want to say N. - - To compile this device as a module, choose M here: the module will be -- called jbd. If you are compiling ext3 or OCFS2 into the kernel, -+ called jbd. If you are compiling ext3, ext3cow or OCFS2 into the kernel, - you cannot compile this code as a module. - - config JBD_DEBUG - bool "JBD (ext3) debugging support" - depends on JBD - help -- If you are using the ext3 journaled file system (or potentially any -+ If you are using the ext3 or ext3cow journaled file system (or potentially any - other file system/device using JBD), this option allows you to - enable debugging output while the system is running, in order to - help track down any problems you are having. By default the -@@ -266,11 +337,12 @@ - "echo 0 > /proc/sys/fs/jbd2-debug". - - config FS_MBCACHE --# Meta block cache for Extended Attributes (ext2/ext3/ext4) -+# Meta block cache for Extended Attributes (ext2/ext3(cow)/ext4) - tristate -- depends on EXT2_FS_XATTR || EXT3_FS_XATTR || EXT4DEV_FS_XATTR -- default y if EXT2_FS=y || EXT3_FS=y || EXT4DEV_FS=y -- default m if EXT2_FS=m || EXT3_FS=m || EXT4DEV_FS=m -+ depends on EXT2_FS_XATTR || EXT3_FS_XATTR || EXT3COW_FS_XATTR || EXT4DEV_FS_XATTR -+ default y if EXT2_FS=y || EXT3_FS=y || EXT3COW_FS=y || EXT4DEV_FS=y -+ default m if EXT2_FS=m || EXT3_FS=m || EXT3COW_FS=m || EXT4DEV_FS=m -+ - - config REISERFS_FS - tristate "Reiserfs support" -diff -ruN linux-2.6.20.3/fs/Makefile linux-2.6.20.3-ext3cow/fs/Makefile ---- linux-2.6.20.3/fs/Makefile 2007-03-13 14:27:08.000000000 -0400 -+++ linux-2.6.20.3-ext3cow/fs/Makefile 2007-04-07 14:23:46.000000000 -0400 -@@ -63,6 +63,7 @@ - # Do not add any filesystems before this line - obj-$(CONFIG_REISERFS_FS) += reiserfs/ - obj-$(CONFIG_EXT3_FS) += ext3/ # Before ext2 so root fs can be ext3 -+obj-$(CONFIG_EXT3COW_FS) += ext3cow/ # Before ext2 so root fs can be ext3 - obj-$(CONFIG_EXT4DEV_FS) += ext4/ # Before ext2 so root fs can be ext4dev - obj-$(CONFIG_JBD) += jbd/ - obj-$(CONFIG_JBD2) += jbd2/ -diff -ruN linux-2.6.20.3/fs/ext3cow/Makefile linux-2.6.20.3-ext3cow/fs/ext3cow/Makefile ---- linux-2.6.20.3/fs/ext3cow/Makefile 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.20.3-ext3cow/fs/ext3cow/Makefile 2007-04-07 14:23:50.000000000 -0400 -@@ -0,0 +1,12 @@ -+# -+# Makefile for the linux ext3cow-filesystem routines. -+# -+ -+obj-$(CONFIG_EXT3COW_FS) += ext3cow.o -+ -+ext3cow-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ -+ ioctl.o namei.o super.o symlink.o hash.o resize.o ext3cow_jbd.o -+ -+ext3cow-$(CONFIG_EXT3COW_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o -+ext3cow-$(CONFIG_EXT3COW_FS_POSIX_ACL) += acl.o -+ext3cow-$(CONFIG_EXT3COW_FS_SECURITY) += xattr_security.o diff -ruN linux-2.6.20.3/fs/ext3cow/acl.c linux-2.6.20.3-ext3cow/fs/ext3cow/acl.c --- linux-2.6.20.3/fs/ext3cow/acl.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.20.3-ext3cow/fs/ext3cow/acl.c 2007-04-07 14:23:50.000000000 -0400 ++++ linux-2.6.20.3-ext3cow/fs/ext3cow/acl.c 2008-03-09 11:14:49.000000000 -0400 @@ -0,0 +1,551 @@ +/* + * linux/fs/ext3cow/acl.c @@ -708,7 +555,7 @@ diff -ruN linux-2.6.20.3/fs/ext3cow/acl.c linux-2.6.20.3-ext3cow/fs/ext3cow/acl. +}; diff -ruN linux-2.6.20.3/fs/ext3cow/acl.h linux-2.6.20.3-ext3cow/fs/ext3cow/acl.h --- linux-2.6.20.3/fs/ext3cow/acl.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.20.3-ext3cow/fs/ext3cow/acl.h 2007-04-07 14:23:50.000000000 -0400 ++++ linux-2.6.20.3-ext3cow/fs/ext3cow/acl.h 2008-03-09 11:14:48.000000000 -0400 @@ -0,0 +1,81 @@ +/* + File: fs/ext3cow/acl.h @@ -793,7 +640,7 @@ diff -ruN linux-2.6.20.3/fs/ext3cow/acl.h linux-2.6.20.3-ext3cow/fs/ext3cow/acl. + diff -ruN linux-2.6.20.3/fs/ext3cow/balloc.c linux-2.6.20.3-ext3cow/fs/ext3cow/balloc.c --- linux-2.6.20.3/fs/ext3cow/balloc.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.20.3-ext3cow/fs/ext3cow/balloc.c 2007-04-14 11:40:48.000000000 -0400 ++++ linux-2.6.20.3-ext3cow/fs/ext3cow/balloc.c 2008-03-09 11:14:48.000000000 -0400 @@ -0,0 +1,1823 @@ +/* + * linux/fs/ext3cow/balloc.c @@ -2620,7 +2467,7 @@ diff -ruN linux-2.6.20.3/fs/ext3cow/balloc.c linux-2.6.20.3-ext3cow/fs/ext3cow/b +} diff -ruN linux-2.6.20.3/fs/ext3cow/bitmap.c linux-2.6.20.3-ext3cow/fs/ext3cow/bitmap.c --- linux-2.6.20.3/fs/ext3cow/bitmap.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.20.3-ext3cow/fs/ext3cow/bitmap.c 2007-04-07 14:23:50.000000000 -0400 ++++ linux-2.6.20.3-ext3cow/fs/ext3cow/bitmap.c 2008-03-09 11:14:48.000000000 -0400 @@ -0,0 +1,32 @@ +/* + * linux/fs/ext3/bitmap.c @@ -2654,9 +2501,26 @@ diff -ruN linux-2.6.20.3/fs/ext3cow/bitmap.c linux-2.6.20.3-ext3cow/fs/ext3cow/b + +#endif /* EXT3COWFS_DEBUG */ + +diff -ruN linux-2.6.20.3/fs/ext3cow/CHANGELOG linux-2.6.20.3-ext3cow/fs/ext3cow/CHANGELOG +--- linux-2.6.20.3/fs/ext3cow/CHANGELOG 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.20.3-ext3cow/fs/ext3cow/CHANGELOG 2008-03-09 11:27:12.000000000 -0400 +@@ -0,0 +1,12 @@ ++3-9-08 ++- Fixed a bug that resulted in the first block in a newly allocated indirect block to be allocated over and over again. ++- Fixed a bug that resulted in COW bitmaps not to be reset after truncate. ++- Bug e2fsprogs that caused aborting journal fixed. ++ ++6-20-97 ++- Finished the rollback code for inode chains in case of error. ++ ++6-18-07 ++- Added support for 32-bit uid's and gid's back in again ++- Took out support for block fragmentation ++- Hopefully fixed the non-sticking uid/gid bug. +\ No newline at end of file diff -ruN linux-2.6.20.3/fs/ext3cow/dir.c linux-2.6.20.3-ext3cow/fs/ext3cow/dir.c --- linux-2.6.20.3/fs/ext3cow/dir.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.20.3-ext3cow/fs/ext3cow/dir.c 2007-04-07 14:23:50.000000000 -0400 ++++ linux-2.6.20.3-ext3cow/fs/ext3cow/dir.c 2008-03-09 11:14:49.000000000 -0400 @@ -0,0 +1,732 @@ +/* + * linux/fs/ext3cow/dir.c @@ -2769,7 +2633,7 @@ diff -ruN linux-2.6.20.3/fs/ext3cow/dir.c linux-2.6.20.3-ext3cow/fs/ext3cow/dir. + * of recovering data when there's a bad sector + */ + if (!bh) { -+ ext3cow_error (sb, "ext3cow_readdir", ++ ext3cow_error (sb, "ext3cow_versions", + "directory #%lu contains a hole at offset %lu", + dir->i_ino, (unsigned long)filp->f_pos); + /* corrupt size? Maybe no more blocks to read */ @@ -2808,7 +2672,7 @@ diff -ruN linux-2.6.20.3/fs/ext3cow/dir.c linux-2.6.20.3-ext3cow/fs/ext3cow/dir. + while (!error && filp->f_pos < dir->i_size + && offset < sb->s_blocksize) { + de = (struct ext3cow_dir_entry_2 *) (bh->b_data + offset); -+ if (!ext3cow_check_dir_entry ("ext3cow_readdir", dir, de, ++ if (!ext3cow_check_dir_entry ("ext3cow_readversions", dir, de, + bh, offset)) { + /* On error, skip the f_pos to the + next block. */ @@ -3392,7 +3256,7 @@ diff -ruN linux-2.6.20.3/fs/ext3cow/dir.c linux-2.6.20.3-ext3cow/fs/ext3cow/dir. +#endif diff -ruN linux-2.6.20.3/fs/ext3cow/ext3cow_jbd.c linux-2.6.20.3-ext3cow/fs/ext3cow/ext3cow_jbd.c --- linux-2.6.20.3/fs/ext3cow/ext3cow_jbd.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.20.3-ext3cow/fs/ext3cow/ext3cow_jbd.c 2007-04-07 14:23:50.000000000 -0400 ++++ linux-2.6.20.3-ext3cow/fs/ext3cow/ext3cow_jbd.c 2008-03-09 11:14:48.000000000 -0400 @@ -0,0 +1,59 @@ +/* + * Interface between ext3cow and JBD @@ -3455,7 +3319,7 @@ diff -ruN linux-2.6.20.3/fs/ext3cow/ext3cow_jbd.c linux-2.6.20.3-ext3cow/fs/ext3 +} diff -ruN linux-2.6.20.3/fs/ext3cow/file.c linux-2.6.20.3-ext3cow/fs/ext3cow/file.c --- linux-2.6.20.3/fs/ext3cow/file.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.20.3-ext3cow/fs/ext3cow/file.c 2007-04-07 14:23:50.000000000 -0400 ++++ linux-2.6.20.3-ext3cow/fs/ext3cow/file.c 2008-03-09 11:14:48.000000000 -0400 @@ -0,0 +1,147 @@ +/* + * linux/fs/ext3cow/file.c @@ -3606,7 +3470,7 @@ diff -ruN linux-2.6.20.3/fs/ext3cow/file.c linux-2.6.20.3-ext3cow/fs/ext3cow/fil + diff -ruN linux-2.6.20.3/fs/ext3cow/fsync.c linux-2.6.20.3-ext3cow/fs/ext3cow/fsync.c --- linux-2.6.20.3/fs/ext3cow/fsync.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.20.3-ext3cow/fs/ext3cow/fsync.c 2007-04-07 14:23:50.000000000 -0400 ++++ linux-2.6.20.3-ext3cow/fs/ext3cow/fsync.c 2008-03-09 11:14:48.000000000 -0400 @@ -0,0 +1,88 @@ +/* + * linux/fs/ext3cow/fsync.c @@ -3698,7 +3562,7 @@ diff -ruN linux-2.6.20.3/fs/ext3cow/fsync.c linux-2.6.20.3-ext3cow/fs/ext3cow/fs +} diff -ruN linux-2.6.20.3/fs/ext3cow/hash.c linux-2.6.20.3-ext3cow/fs/ext3cow/hash.c --- linux-2.6.20.3/fs/ext3cow/hash.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.20.3-ext3cow/fs/ext3cow/hash.c 2007-04-07 14:23:50.000000000 -0400 ++++ linux-2.6.20.3-ext3cow/fs/ext3cow/hash.c 2008-03-09 11:14:48.000000000 -0400 @@ -0,0 +1,152 @@ +/* + * linux/fs/ext3cow/hash.c @@ -3854,8 +3718,8 @@ diff -ruN linux-2.6.20.3/fs/ext3cow/hash.c linux-2.6.20.3-ext3cow/fs/ext3cow/has +} diff -ruN linux-2.6.20.3/fs/ext3cow/ialloc.c linux-2.6.20.3-ext3cow/fs/ext3cow/ialloc.c --- linux-2.6.20.3/fs/ext3cow/ialloc.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.20.3-ext3cow/fs/ext3cow/ialloc.c 2007-04-07 14:23:50.000000000 -0400 -@@ -0,0 +1,763 @@ ++++ linux-2.6.20.3-ext3cow/fs/ext3cow/ialloc.c 2008-03-09 11:14:48.000000000 -0400 +@@ -0,0 +1,764 @@ +/* + * linux/fs/ext3cow/ialloc.c + * @@ -4436,9 +4300,10 @@ diff -ruN linux-2.6.20.3/fs/ext3cow/ialloc.c linux-2.6.20.3-ext3cow/fs/ext3cow/i + if (!S_ISDIR(mode)) + ei->i_flags &= ~EXT3COW_DIRSYNC_FL; +#ifdef EXT3COW_FRAGMENTS -+ ei->i_faddr = 0; -+ ei->i_frag_no = 0; -+ ei->i_frag_size = 0; ++ /* Taken out for versioning -znjp */ ++ //ei->i_faddr = 0; ++ //ei->i_frag_no = 0; ++ //ei->i_frag_size = 0; +#endif + ei->i_file_acl = 0; + ei->i_dir_acl = 0; @@ -4621,8 +4486,8 @@ diff -ruN linux-2.6.20.3/fs/ext3cow/ialloc.c linux-2.6.20.3-ext3cow/fs/ext3cow/i + diff -ruN linux-2.6.20.3/fs/ext3cow/inode.c linux-2.6.20.3-ext3cow/fs/ext3cow/inode.c --- linux-2.6.20.3/fs/ext3cow/inode.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.20.3-ext3cow/fs/ext3cow/inode.c 2007-04-17 11:34:02.000000000 -0400 -@@ -0,0 +1,3474 @@ ++++ linux-2.6.20.3-ext3cow/fs/ext3cow/inode.c 2008-03-09 11:14:48.000000000 -0400 +@@ -0,0 +1,3502 @@ +/* + * linux/fs/ext3cow/inode.c + * @@ -5012,25 +4877,28 @@ diff -ruN linux-2.6.20.3/fs/ext3cow/inode.c linux-2.6.20.3-ext3cow/fs/ext3cow/in +{ + struct super_block *sb = inode->i_sb; + Indirect *p = chain; -+ struct buffer_head *bh; -+ u32* bitmap_w; ++ struct buffer_head *bh = NULL; ++ u32 *bitmap_w = NULL; + int ptrs = EXT3COW_ADDR_PER_BLOCK(inode->i_sb); + int nbitsperword = (sizeof(u32) * 8); + + *err = 0; + *cow = 0; ++ ++ + /* i_data is not going away, no lock needed */ + add_chain (chain, NULL, EXT3COW_I(inode)->i_data + *offsets); + if (!p->key){ + /* Set the bitmap on allocation - znjp */ -+ if(create) ++ if(create){ + EXT3COW_I(inode)->i_cow_bitmap |= (1UL << *offsets); ++ } + goto no_block; + } + -+ /* Are we COWing any direct blocks? -znjp */ ++ /* Are we writing and COWing any direct blocks? -znjp */ + if(create && !(EXT3COW_I(inode)->i_cow_bitmap & (1UL << *offsets))){ -+ printk(KERN_INFO "COWing direct block\n"); ++ //printk(KERN_INFO "COWing direct block\n"); + *(p->p) = 0; + p->key = 0; + /* Set the bitamp when COWing -znjp */ @@ -5040,6 +4908,7 @@ diff -ruN linux-2.6.20.3/fs/ext3cow/inode.c linux-2.6.20.3-ext3cow/fs/ext3cow/in + } + + while (--depth) { ++ + bh = sb_bread(sb, le32_to_cpu(p->key)); + if (!bh) + goto failure; @@ -5049,23 +4918,23 @@ diff -ruN linux-2.6.20.3/fs/ext3cow/inode.c linux-2.6.20.3-ext3cow/fs/ext3cow/in + goto changed; + add_chain(++p, bh, (__le32*)bh->b_data + *++offsets); + /* Reader: end */ -+ /* Find correct bitamp word */ ++ /* Find correct bitmap word */ + bitmap_w = (u32*)bh->b_data + ptrs + (*offsets/nbitsperword); + if (!p->key){ + /* Set the bitmap when allocating -znjp */ -+ if(create) -+ *bitmap_w = (u32)*bitmap_w | (u32)(1UL << (int)(*offsets%nbitsperword)); ++ if(create){ ++ *bitmap_w |= (u32)(1UL << (int)(*offsets%nbitsperword)); ++ } + goto no_block; + } + + /* Are we COWing any indirect blocks? -znjp */ -+ if(create && !((1UL << (int)(*offsets%nbitsperword)) & -+ le32_to_cpu((u32)*bitmap_w))){ -+ printk(KERN_INFO "COWing indirect block\n"); ++ if(create && !(*bitmap_w & (1UL << (int)(*offsets%nbitsperword)))){ ++ //printk(KERN_INFO "COWing indirect block\n"); + *(p->p) = 0; + p->key = 0; + /* Set the bitmap -znjp */ -+ *bitmap_w = (u32)*bitmap_w | (u32)(1UL << (int)(*offsets%nbitsperword)); ++ *bitmap_w |= (u32)(1UL << (int)(*offsets%nbitsperword)); + *cow = 1; + goto no_block; + } @@ -5297,6 +5166,10 @@ diff -ruN linux-2.6.20.3/fs/ext3cow/inode.c linux-2.6.20.3-ext3cow/fs/ext3cow/in + ext3cow_fsblk_t new_blocks[4]; + ext3cow_fsblk_t current_block; + ++ u32 *bitmap_w = NULL; ++ int ptrs = EXT3COW_ADDR_PER_BLOCK(inode->i_sb); ++ int nbitsperword = (sizeof(u32) * 8); ++ + num = ext3cow_alloc_blocks(handle, inode, goal, indirect_blks, + *blks, new_blocks, &err); + if (err) @@ -5306,6 +5179,7 @@ diff -ruN linux-2.6.20.3/fs/ext3cow/inode.c linux-2.6.20.3-ext3cow/fs/ext3cow/in + /* + * metadata blocks and data blocks are allocated. + */ ++ + for (n = 1; n <= indirect_blks; n++) { + /* + * Get buffer_head for parent block, zero it out @@ -5324,6 +5198,13 @@ diff -ruN linux-2.6.20.3/fs/ext3cow/inode.c linux-2.6.20.3-ext3cow/fs/ext3cow/in + } + + memset(bh->b_data, 0, blocksize); ++ /* Mark the cow bitmap for each new indirect block allocated. ++ * We had to put this here, because get_branch was insufficient ++ * when allocating an indirect block. -znjp ++ */ ++ bitmap_w = (u32*)bh->b_data + ptrs + (offsets[n]/nbitsperword); ++ *bitmap_w |= (u32)(1UL << (int)(offsets[n]%nbitsperword)); ++ + branch[n].p = (__le32 *) bh->b_data + offsets[n]; + branch[n].key = cpu_to_le32(new_blocks[n]); + *branch[n].p = branch[n].key; @@ -6902,6 +6783,7 @@ diff -ruN linux-2.6.20.3/fs/ext3cow/inode.c linux-2.6.20.3-ext3cow/fs/ext3cow/in + continue; + } + /* Only free the branches that have been newly allocated - znjp */ ++ /* Also, set the bits back to 0 in the bitmap -znjp */ + cur = 0; + count = 0; + bitmap_word = (u32*)bh->b_data + addr_per_block; @@ -6924,6 +6806,8 @@ diff -ruN linux-2.6.20.3/fs/ext3cow/inode.c linux-2.6.20.3-ext3cow/fs/ext3cow/in + first_block = (u32*)bh->b_data + cur; + count = 1; + } ++ /* Set the bit in the bitmap back to 0 */ ++ *bitmap_word ^= (1UL << i); + } + } + (u32*)bitmap_word++; @@ -7135,6 +7019,9 @@ diff -ruN linux-2.6.20.3/fs/ext3cow/inode.c linux-2.6.20.3-ext3cow/fs/ext3cow/in + + /* We only want to remove blocks that were allocated in this + * epoch, i.e., have 1 bit in the bitmap. -znjp */ ++ /* If we're going to truncate a block, we should its ++ * corresponding bit in the bitmap back to 0, meaning, ++ * it needs to be allocated - znjp */ + for(b = offsets[0]; b < EXT3COW_NDIR_BLOCKS; b++){ + if(EXT3COW_I(inode)->i_cow_bitmap & (1UL << b)){ + if(count == 0){ @@ -7148,6 +7035,8 @@ diff -ruN linux-2.6.20.3/fs/ext3cow/inode.c linux-2.6.20.3-ext3cow/fs/ext3cow/in + block_to_free = b; + count = 1; + } ++ /* Turn off the bit in the bitmap */ ++ EXT3COW_I(inode)->i_cow_bitmap ^= (1UL << b); + } + } + if(count > 0) @@ -7198,6 +7087,8 @@ diff -ruN linux-2.6.20.3/fs/ext3cow/inode.c linux-2.6.20.3-ext3cow/fs/ext3cow/in + ext3cow_free_branches(handle, inode, NULL, &nr, &nr+1, 1); + i_data[EXT3COW_IND_BLOCK] = 0; + } ++ /* And set bitmap back to 0 */ ++ EXT3COW_I(inode)->i_cow_bitmap ^= (1UL << EXT3COW_IND_BLOCK); + } + case EXT3COW_IND_BLOCK: + if(EXT3COW_I(inode)->i_cow_bitmap & (1UL << EXT3COW_DIND_BLOCK)){ @@ -7206,6 +7097,7 @@ diff -ruN linux-2.6.20.3/fs/ext3cow/inode.c linux-2.6.20.3-ext3cow/fs/ext3cow/in + ext3cow_free_branches(handle, inode, NULL, &nr, &nr+1, 2); + i_data[EXT3COW_DIND_BLOCK] = 0; + } ++ EXT3COW_I(inode)->i_cow_bitmap ^= (1UL << EXT3COW_DIND_BLOCK); + } + case EXT3COW_DIND_BLOCK: + if(EXT3COW_I(inode)->i_cow_bitmap & (1UL << EXT3COW_TIND_BLOCK)){ @@ -7214,6 +7106,7 @@ diff -ruN linux-2.6.20.3/fs/ext3cow/inode.c linux-2.6.20.3-ext3cow/fs/ext3cow/in + ext3cow_free_branches(handle, inode, NULL, &nr, &nr+1, 3); + i_data[EXT3COW_TIND_BLOCK] = 0; + } ++ EXT3COW_I(inode)->i_cow_bitmap ^= (1UL << EXT3COW_TIND_BLOCK); + } + case EXT3COW_TIND_BLOCK: + ; @@ -7450,12 +7343,10 @@ diff -ruN linux-2.6.20.3/fs/ext3cow/inode.c linux-2.6.20.3-ext3cow/fs/ext3cow/in + inode->i_mode = le16_to_cpu(raw_inode->i_mode); + inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); + inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); -+ /* Taken out for versioning -znjp + if(!(test_opt (inode->i_sb, NO_UID32))) { + inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; + inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; + } -+ */ + inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); + inode->i_size = le32_to_cpu(raw_inode->i_size); + inode->i_atime.tv_sec = le32_to_cpu(raw_inode->i_atime); @@ -7491,9 +7382,10 @@ diff -ruN linux-2.6.20.3/fs/ext3cow/inode.c linux-2.6.20.3-ext3cow/fs/ext3cow/in + ei->i_next_inode = le32_to_cpu(raw_inode->i_nxt_inode); + +#ifdef EXT3COW_FRAGMENTS -+ ei->i_faddr = le32_to_cpu(raw_inode->i_faddr); -+ ei->i_frag_no = raw_inode->i_frag; -+ ei->i_frag_size = raw_inode->i_fsize; ++ /* Taken out for versioning -znjp */ ++ //ei->i_faddr = le32_to_cpu(raw_inode->i_faddr); ++ //ei->i_frag_no = raw_inode->i_frag; ++ //ei->i_frag_size = raw_inode->i_fsize; +#endif + ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl); + if (!S_ISREG(inode->i_mode)) { @@ -7593,15 +7485,15 @@ diff -ruN linux-2.6.20.3/fs/ext3cow/inode.c linux-2.6.20.3-ext3cow/fs/ext3cow/in + + raw_inode->i_mode = cpu_to_le16(inode->i_mode); + -+ /* Taken out for versioning -znjp ++ + if(!(test_opt(inode->i_sb, NO_UID32))) { + raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid)); + raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid)); -+* -+ * Fix up interoperability with old kernels. Otherwise, old inodes get -+ * re-used with the upper 16 bits of the uid/gid intact -+ * -+ ++ ++ /* Fix up interoperability with old kernels. Otherwise, old inodes get ++ * re-used with the upper 16 bits of the uid/gid intact ++ */ ++ + if(!ei->i_dtime) { + raw_inode->i_uid_high = + cpu_to_le16(high_16_bits(inode->i_uid)); @@ -7620,7 +7512,7 @@ diff -ruN linux-2.6.20.3/fs/ext3cow/inode.c linux-2.6.20.3-ext3cow/fs/ext3cow/in + raw_inode->i_uid_high = 0; + raw_inode->i_gid_high = 0; + } -+ */ ++ + raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); + raw_inode->i_size = cpu_to_le32(ei->i_disksize); + raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec); @@ -7635,9 +7527,10 @@ diff -ruN linux-2.6.20.3/fs/ext3cow/inode.c linux-2.6.20.3-ext3cow/fs/ext3cow/in + raw_inode->i_nxt_inode = cpu_to_le32(EXT3COW_I(inode)->i_next_inode); + +#ifdef EXT3COW_FRAGMENTS -+ raw_inode->i_faddr = cpu_to_le32(ei->i_faddr); -+ raw_inode->i_frag = ei->i_frag_no; -+ raw_inode->i_fsize = ei->i_frag_size; ++ /* Taken out for versioning -znjp */ ++ //raw_inode->i_faddr = cpu_to_le32(ei->i_faddr); ++ //raw_inode->i_frag = ei->i_frag_no; ++ //raw_inode->i_fsize = ei->i_frag_size; +#endif + raw_inode->i_file_acl = cpu_to_le32(ei->i_file_acl); + if (!S_ISREG(inode->i_mode)) { @@ -8099,7 +7992,7 @@ diff -ruN linux-2.6.20.3/fs/ext3cow/inode.c linux-2.6.20.3-ext3cow/fs/ext3cow/in +} diff -ruN linux-2.6.20.3/fs/ext3cow/ioctl.c linux-2.6.20.3-ext3cow/fs/ext3cow/ioctl.c --- linux-2.6.20.3/fs/ext3cow/ioctl.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.20.3-ext3cow/fs/ext3cow/ioctl.c 2007-04-07 14:23:50.000000000 -0400 ++++ linux-2.6.20.3-ext3cow/fs/ext3cow/ioctl.c 2008-03-09 11:14:48.000000000 -0400 @@ -0,0 +1,312 @@ +/* + * linux/fs/ext3cow/ioctl.c @@ -8413,10 +8306,26 @@ diff -ruN linux-2.6.20.3/fs/ext3cow/ioctl.c linux-2.6.20.3-ext3cow/fs/ext3cow/io + return ret; +} +#endif +diff -ruN linux-2.6.20.3/fs/ext3cow/Makefile linux-2.6.20.3-ext3cow/fs/ext3cow/Makefile +--- linux-2.6.20.3/fs/ext3cow/Makefile 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.20.3-ext3cow/fs/ext3cow/Makefile 2008-03-09 11:14:49.000000000 -0400 +@@ -0,0 +1,12 @@ ++# ++# Makefile for the linux ext3cow-filesystem routines. ++# ++ ++obj-$(CONFIG_EXT3COW_FS) += ext3cow.o ++ ++ext3cow-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ ++ ioctl.o namei.o super.o symlink.o hash.o resize.o ext3cow_jbd.o ++ ++ext3cow-$(CONFIG_EXT3COW_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o ++ext3cow-$(CONFIG_EXT3COW_FS_POSIX_ACL) += acl.o ++ext3cow-$(CONFIG_EXT3COW_FS_SECURITY) += xattr_security.o diff -ruN linux-2.6.20.3/fs/ext3cow/namei.c linux-2.6.20.3-ext3cow/fs/ext3cow/namei.c --- linux-2.6.20.3/fs/ext3cow/namei.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.20.3-ext3cow/fs/ext3cow/namei.c 2007-04-16 22:44:05.000000000 -0400 -@@ -0,0 +1,2960 @@ ++++ linux-2.6.20.3-ext3cow/fs/ext3cow/namei.c 2008-03-09 11:14:48.000000000 -0400 +@@ -0,0 +1,2979 @@ +/* + * linux/fs/ext3cow/namei.c + * @@ -9988,7 +9897,6 @@ diff -ruN linux-2.6.20.3/fs/ext3cow/namei.c linux-2.6.20.3-ext3cow/fs/ext3cow/na + + if(EXT3COW_S_EPOCHNUMBER(sb) > EXT3COW_I_EPOCHNUMBER(dir)){ + if(ext3cow_dup_inode(dentry->d_parent->d_parent->d_inode, dir)) -+ //if(ext3cow_dup_inode(NULL, dir)) + return -1; + } + @@ -10197,7 +10105,6 @@ diff -ruN linux-2.6.20.3/fs/ext3cow/namei.c linux-2.6.20.3-ext3cow/fs/ext3cow/na + + if(EXT3COW_S_EPOCHNUMBER(dir->i_sb) > EXT3COW_I_EPOCHNUMBER(dir)){ + if(ext3cow_dup_inode(dentry->d_parent->d_parent->d_inode, dir)) -+ //if(ext3cow_dup_inode(NULL, dir)) + return -1; + } + @@ -11074,11 +10981,6 @@ diff -ruN linux-2.6.20.3/fs/ext3cow/namei.c linux-2.6.20.3-ext3cow/fs/ext3cow/na + fake_inode->i_uid = inode->i_uid; + fake_inode->i_gid = inode->i_gid; + -+ /* uid_high and gid_high code would go here -znjp -+ fake_inode->i_uid_high = inode->i_uid_high; -+ fake_inode->i_gid_high = inode->i_gid_high; -+ */ -+ + atomic_set(&fake_inode->i_count, 1); + + fake_inode->i_nlink = inode->i_nlink; @@ -11097,9 +10999,10 @@ diff -ruN linux-2.6.20.3/fs/ext3cow/namei.c linux-2.6.20.3-ext3cow/fs/ext3cow/na + fake_inode->i_blocks = inode->i_blocks; + fake_ini->i_flags = ini->i_flags; +#ifdef EXT3COW_FRAGMENTS -+ fake_ini->i_faddr = ini->i_faddr; -+ fake_ini->i_frag_no = ini->i_frag_no; -+ fake_ini->i_frag_size = ini->i_frag_size; ++ /* Taken out for versioning -znjp */ ++ //fake_ini->i_faddr = ini->i_faddr; ++ //fake_ini->i_frag_no = ini->i_frag_no; ++ //fake_ini->i_frag_size = ini->i_frag_size; +#endif + fake_ini->i_file_acl = ini->i_file_acl; + if (!S_ISREG(fake_inode->i_mode)) { @@ -11229,11 +11132,6 @@ diff -ruN linux-2.6.20.3/fs/ext3cow/namei.c linux-2.6.20.3-ext3cow/fs/ext3cow/na + cow_inode->i_mode = inode->i_mode; + cow_inode->i_uid = inode->i_uid; + cow_inode->i_gid = inode->i_gid; -+ -+ /* uid_high and gid_high code would go here -znjp -+ cow_inode->i_uid_high = inode->i_uid_high; -+ cow_inode->i_gid_high = inode->i_gid_high; -+ */ + + cow_inode->i_nlink = inode->i_nlink; + cow_inode->i_size = inode->i_size; @@ -11251,9 +11149,10 @@ diff -ruN linux-2.6.20.3/fs/ext3cow/namei.c linux-2.6.20.3-ext3cow/fs/ext3cow/na + cow_inode->i_blocks = inode->i_blocks; + cow_ini->i_flags = ini->i_flags; +#ifdef EXT3COW_FRAGMENTS -+ cow_ini->i_faddr = ini->i_faddr; -+ cow_ini->i_frag_no = ini->i_frag_no; -+ cow_ini->i_frag_size = ini->i_frag_size; ++ /* Taken out for versioning -znjp */ ++ //cow_ini->i_faddr = ini->i_faddr; ++ //cow_ini->i_frag_no = ini->i_frag_no; ++ //cow_ini->i_frag_size = ini->i_frag_size; +#endif + cow_ini->i_file_acl = ini->i_file_acl; + if (!S_ISREG(cow_inode->i_mode)) { @@ -11334,14 +11233,43 @@ diff -ruN linux-2.6.20.3/fs/ext3cow/namei.c linux-2.6.20.3-ext3cow/fs/ext3cow/na + * be used for removing versions. */ +int ext3cow_reclaim_dup_inode(struct inode *dir, struct inode *inode) +{ -+ // handle_t *handle = NULL; ++ handle_t *handle = NULL; ++ int err = 0; ++ struct inode *old_inode = NULL; ++ struct inode *parent = dir; ++ ++ if(!parent) ++ parent = inode; + + if(is_bad_inode(inode)) -+ goto no_delete; ++ return -1; ++ ++ handle = ext3cow_journal_start(parent, ++ EXT3COW_DELETE_TRANS_BLOCKS(parent->i_sb)); ++ if(IS_ERR(handle)) ++ return PTR_ERR(handle); + ++ if(IS_DIRSYNC(parent)) ++ handle->h_sync = 1; ++ ++ old_inode = iget(parent->i_sb, EXT3COW_I_NEXT_INODE(inode)); ++ err = PTR_ERR(old_inode); ++ if (!IS_ERR(old_inode)){ ++ ++ EXT3COW_I(inode)->i_epoch_number = EXT3COW_I_EPOCHNUMBER(old_inode); ++ EXT3COW_I(inode)->i_cow_bitmap = EXT3COW_I(old_inode)->i_cow_bitmap; ++ EXT3COW_I(inode)->i_next_inode = EXT3COW_I(old_inode)->i_next_inode; ++ old_inode->i_nlink = 0; ++ ++ iput(old_inode); ++ ext3cow_mark_inode_dirty(handle, inode); ++ }else ++ ext3cow_error(inode->i_sb, "ext3cow_reclaim_dup_inode", ++ "Couldn't remove dup'd inode."); ++ ++ ext3cow_journal_stop(handle); ++ + return 0; -+ no_delete: -+ return -1; +} + +/* @@ -11379,7 +11307,7 @@ diff -ruN linux-2.6.20.3/fs/ext3cow/namei.c linux-2.6.20.3-ext3cow/fs/ext3cow/na +}; diff -ruN linux-2.6.20.3/fs/ext3cow/namei.h linux-2.6.20.3-ext3cow/fs/ext3cow/namei.h --- linux-2.6.20.3/fs/ext3cow/namei.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.20.3-ext3cow/fs/ext3cow/namei.h 2007-04-07 14:23:50.000000000 -0400 ++++ linux-2.6.20.3-ext3cow/fs/ext3cow/namei.h 2008-03-09 11:14:48.000000000 -0400 @@ -0,0 +1,8 @@ +/* linux/fs/ext3cow/namei.h + * @@ -11391,7 +11319,7 @@ diff -ruN linux-2.6.20.3/fs/ext3cow/namei.h linux-2.6.20.3-ext3cow/fs/ext3cow/na +extern struct dentry *ext3cow_get_parent(struct dentry *child); diff -ruN linux-2.6.20.3/fs/ext3cow/resize.c linux-2.6.20.3-ext3cow/fs/ext3cow/resize.c --- linux-2.6.20.3/fs/ext3cow/resize.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.20.3-ext3cow/fs/ext3cow/resize.c 2007-04-07 14:23:50.000000000 -0400 ++++ linux-2.6.20.3-ext3cow/fs/ext3cow/resize.c 2008-03-09 11:14:48.000000000 -0400 @@ -0,0 +1,1042 @@ +/* + * linux/fs/ext3cow/resize.c @@ -12437,7 +12365,7 @@ diff -ruN linux-2.6.20.3/fs/ext3cow/resize.c linux-2.6.20.3-ext3cow/fs/ext3cow/r +} /* ext3cow_group_extend */ diff -ruN linux-2.6.20.3/fs/ext3cow/super.c linux-2.6.20.3-ext3cow/fs/ext3cow/super.c --- linux-2.6.20.3/fs/ext3cow/super.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.20.3-ext3cow/fs/ext3cow/super.c 2007-04-07 14:23:50.000000000 -0400 ++++ linux-2.6.20.3-ext3cow/fs/ext3cow/super.c 2008-03-09 11:14:49.000000000 -0400 @@ -0,0 +1,2808 @@ +/* + * linux/fs/ext3cow/super.c @@ -15249,7 +15177,7 @@ diff -ruN linux-2.6.20.3/fs/ext3cow/super.c linux-2.6.20.3-ext3cow/fs/ext3cow/su +module_exit(exit_ext3cow_fs) diff -ruN linux-2.6.20.3/fs/ext3cow/symlink.c linux-2.6.20.3-ext3cow/fs/ext3cow/symlink.c --- linux-2.6.20.3/fs/ext3cow/symlink.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.20.3-ext3cow/fs/ext3cow/symlink.c 2007-04-07 14:23:50.000000000 -0400 ++++ linux-2.6.20.3-ext3cow/fs/ext3cow/symlink.c 2008-03-09 11:14:48.000000000 -0400 @@ -0,0 +1,54 @@ +/* + * linux/fs/ext3cow/symlink.c @@ -15307,7 +15235,7 @@ diff -ruN linux-2.6.20.3/fs/ext3cow/symlink.c linux-2.6.20.3-ext3cow/fs/ext3cow/ +}; diff -ruN linux-2.6.20.3/fs/ext3cow/xattr.c linux-2.6.20.3-ext3cow/fs/ext3cow/xattr.c --- linux-2.6.20.3/fs/ext3cow/xattr.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.20.3-ext3cow/fs/ext3cow/xattr.c 2007-04-07 14:23:50.000000000 -0400 ++++ linux-2.6.20.3-ext3cow/fs/ext3cow/xattr.c 2008-03-09 11:14:49.000000000 -0400 @@ -0,0 +1,1314 @@ +/* + * linux/fs/ext3cow/xattr.c @@ -16625,7 +16553,7 @@ diff -ruN linux-2.6.20.3/fs/ext3cow/xattr.c linux-2.6.20.3-ext3cow/fs/ext3cow/xa +} diff -ruN linux-2.6.20.3/fs/ext3cow/xattr.h linux-2.6.20.3-ext3cow/fs/ext3cow/xattr.h --- linux-2.6.20.3/fs/ext3cow/xattr.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.20.3-ext3cow/fs/ext3cow/xattr.h 2007-04-07 14:23:50.000000000 -0400 ++++ linux-2.6.20.3-ext3cow/fs/ext3cow/xattr.h 2008-03-09 11:14:49.000000000 -0400 @@ -0,0 +1,145 @@ +/* + File: fs/ext3cow/xattr.h @@ -16774,7 +16702,7 @@ diff -ruN linux-2.6.20.3/fs/ext3cow/xattr.h linux-2.6.20.3-ext3cow/fs/ext3cow/xa +#endif diff -ruN linux-2.6.20.3/fs/ext3cow/xattr_security.c linux-2.6.20.3-ext3cow/fs/ext3cow/xattr_security.c --- linux-2.6.20.3/fs/ext3cow/xattr_security.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.20.3-ext3cow/fs/ext3cow/xattr_security.c 2007-04-07 14:23:50.000000000 -0400 ++++ linux-2.6.20.3-ext3cow/fs/ext3cow/xattr_security.c 2008-03-09 11:14:48.000000000 -0400 @@ -0,0 +1,77 @@ +/* + * linux/fs/ext3cow/xattr_security.c @@ -16855,7 +16783,7 @@ diff -ruN linux-2.6.20.3/fs/ext3cow/xattr_security.c linux-2.6.20.3-ext3cow/fs/e +}; diff -ruN linux-2.6.20.3/fs/ext3cow/xattr_trusted.c linux-2.6.20.3-ext3cow/fs/ext3cow/xattr_trusted.c --- linux-2.6.20.3/fs/ext3cow/xattr_trusted.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.20.3-ext3cow/fs/ext3cow/xattr_trusted.c 2007-04-07 14:23:50.000000000 -0400 ++++ linux-2.6.20.3-ext3cow/fs/ext3cow/xattr_trusted.c 2008-03-09 11:14:48.000000000 -0400 @@ -0,0 +1,62 @@ +/* + * linux/fs/ext3cow/xattr_trusted.c @@ -16921,7 +16849,7 @@ diff -ruN linux-2.6.20.3/fs/ext3cow/xattr_trusted.c linux-2.6.20.3-ext3cow/fs/ex +}; diff -ruN linux-2.6.20.3/fs/ext3cow/xattr_user.c linux-2.6.20.3-ext3cow/fs/ext3cow/xattr_user.c --- linux-2.6.20.3/fs/ext3cow/xattr_user.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.20.3-ext3cow/fs/ext3cow/xattr_user.c 2007-04-07 14:23:50.000000000 -0400 ++++ linux-2.6.20.3-ext3cow/fs/ext3cow/xattr_user.c 2008-03-09 11:14:48.000000000 -0400 @@ -0,0 +1,64 @@ +/* + * linux/fs/ext3cow/xattr_user.c @@ -16987,10 +16915,147 @@ diff -ruN linux-2.6.20.3/fs/ext3cow/xattr_user.c linux-2.6.20.3-ext3cow/fs/ext3c + .get = ext3cow_xattr_user_get, + .set = ext3cow_xattr_user_set, +}; +diff -ruN linux-2.6.20.3/fs/Kconfig linux-2.6.20.3-ext3cow/fs/Kconfig +--- linux-2.6.20.3/fs/Kconfig 2007-03-13 14:27:08.000000000 -0400 ++++ linux-2.6.20.3-ext3cow/fs/Kconfig 2008-03-09 11:14:25.000000000 -0400 +@@ -136,6 +136,77 @@ + If you are not using a security module that requires using + extended attributes for file security labels, say N. + ++ ++ ++config EXT3COW_FS ++ tristate "Ext3cow journalling and versioning file system support" ++ select JBD ++ help ++ This is the journalling version of the Second extended file system ++ (often called ext3), the de facto standard Linux file system ++ (method to organize files on a storage device) for hard disks. ++ ++ The journalling code included in this driver means you do not have ++ to run e2fsck (file system checker) on your file systems after a ++ crash. The journal keeps track of any changes that were being made ++ at the time the system crashed, and can ensure that your file system ++ is consistent without the need for a lengthy check. ++ ++ Other than adding the journal to the file system, the on-disk format ++ of ext3 is identical to ext2. It is possible to freely switch ++ between using the ext3 driver and the ext2 driver, as long as the ++ file system has been cleanly unmounted, or e2fsck is run on the file ++ system. ++ ++ To add a journal on an existing ext2 file system or change the ++ behavior of ext3 file systems, you can use the tune2fs utility ("man ++ tune2fs"). To modify attributes of files and directories on ext3 ++ file systems, use chattr ("man chattr"). You need to be using ++ e2fsprogs version 1.20 or later in order to create ext3 journals ++ (available at ). ++ ++ To compile this file system support as a module, choose M here: the ++ module will be called ext3. ++ ++config EXT3COW_FS_XATTR ++ bool "Ext3cow extended attributes" ++ depends on EXT3COW_FS ++ default y ++ help ++ Extended attributes are name:value pairs associated with inodes by ++ the kernel or by users (see the attr(5) manual page, or visit ++ for details). ++ ++ If unsure, say N. ++ ++ You need this for POSIX ACL support on ext3cow. ++ ++config EXT3COW_FS_POSIX_ACL ++ bool "Ext3cow POSIX Access Control Lists" ++ depends on EXT3COW_FS_XATTR ++ select FS_POSIX_ACL ++ help ++ Posix Access Control Lists (ACLs) support permissions for users and ++ groups beyond the owner/group/world scheme. ++ ++ To learn more about Access Control Lists, visit the Posix ACLs for ++ Linux website . ++ ++ If you don't know what Access Control Lists are, say N ++ ++config EXT3COW_FS_SECURITY ++ bool "Ext3cow Security Labels" ++ depends on EXT3COW_FS_XATTR ++ help ++ Security labels support alternative access control models ++ implemented by security modules like SELinux. This option ++ enables an extended attribute handler for file security ++ labels in the ext3cow filesystem. ++ ++ If you are not using a security module that requires using ++ extended attributes for file security labels, say N. ++ ++ + config EXT4DEV_FS + tristate "Ext4dev/ext4 extended fs support development (EXPERIMENTAL)" + depends on EXPERIMENTAL +@@ -205,23 +276,23 @@ + tristate + help + This is a generic journalling layer for block devices. It is +- currently used by the ext3 and OCFS2 file systems, but it could ++ currently used by the ext3, ext3cow and OCFS2 file systems, but it could + also be used to add journal support to other file systems or block + devices such as RAID or LVM. + +- If you are using the ext3 or OCFS2 file systems, you need to ++ If you are using the ext3, ext3cow or OCFS2 file systems, you need to + say Y here. If you are not using ext3 OCFS2 then you will probably + want to say N. + + To compile this device as a module, choose M here: the module will be +- called jbd. If you are compiling ext3 or OCFS2 into the kernel, ++ called jbd. If you are compiling ext3, ext3cow or OCFS2 into the kernel, + you cannot compile this code as a module. + + config JBD_DEBUG + bool "JBD (ext3) debugging support" + depends on JBD + help +- If you are using the ext3 journaled file system (or potentially any ++ If you are using the ext3 or ext3cow journaled file system (or potentially any + other file system/device using JBD), this option allows you to + enable debugging output while the system is running, in order to + help track down any problems you are having. By default the +@@ -266,11 +337,12 @@ + "echo 0 > /proc/sys/fs/jbd2-debug". + + config FS_MBCACHE +-# Meta block cache for Extended Attributes (ext2/ext3/ext4) ++# Meta block cache for Extended Attributes (ext2/ext3(cow)/ext4) + tristate +- depends on EXT2_FS_XATTR || EXT3_FS_XATTR || EXT4DEV_FS_XATTR +- default y if EXT2_FS=y || EXT3_FS=y || EXT4DEV_FS=y +- default m if EXT2_FS=m || EXT3_FS=m || EXT4DEV_FS=m ++ depends on EXT2_FS_XATTR || EXT3_FS_XATTR || EXT3COW_FS_XATTR || EXT4DEV_FS_XATTR ++ default y if EXT2_FS=y || EXT3_FS=y || EXT3COW_FS=y || EXT4DEV_FS=y ++ default m if EXT2_FS=m || EXT3_FS=m || EXT3COW_FS=m || EXT4DEV_FS=m ++ + + config REISERFS_FS + tristate "Reiserfs support" +diff -ruN linux-2.6.20.3/fs/Makefile linux-2.6.20.3-ext3cow/fs/Makefile +--- linux-2.6.20.3/fs/Makefile 2007-03-13 14:27:08.000000000 -0400 ++++ linux-2.6.20.3-ext3cow/fs/Makefile 2008-03-09 11:14:54.000000000 -0400 +@@ -63,6 +63,7 @@ + # Do not add any filesystems before this line + obj-$(CONFIG_REISERFS_FS) += reiserfs/ + obj-$(CONFIG_EXT3_FS) += ext3/ # Before ext2 so root fs can be ext3 ++obj-$(CONFIG_EXT3COW_FS) += ext3cow/ # Before ext2 so root fs can be ext3 + obj-$(CONFIG_EXT4DEV_FS) += ext4/ # Before ext2 so root fs can be ext4dev + obj-$(CONFIG_JBD) += jbd/ + obj-$(CONFIG_JBD2) += jbd2/ diff -ruN linux-2.6.20.3/include/linux/ext3cow_fs.h linux-2.6.20.3-ext3cow/include/linux/ext3cow_fs.h --- linux-2.6.20.3/include/linux/ext3cow_fs.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.20.3-ext3cow/include/linux/ext3cow_fs.h 2007-04-07 15:30:04.000000000 -0400 -@@ -0,0 +1,947 @@ ++++ linux-2.6.20.3-ext3cow/include/linux/ext3cow_fs.h 2008-03-09 11:10:57.000000000 -0400 +@@ -0,0 +1,948 @@ +/* + * linux/include/linux/ext3cow_fs.h + * @@ -17021,6 +17086,7 @@ diff -ruN linux-2.6.20.3/include/linux/ext3cow_fs.h linux-2.6.20.3-ext3cow/inclu + */ +#undef EXT3COWFS_DEBUG + ++ +/* + * Define EXT3COW_RESERVATION to reserve data blocks for expanding files + */ @@ -17314,11 +17380,11 @@ diff -ruN linux-2.6.20.3/include/linux/ext3cow_fs.h linux-2.6.20.3-ext3cow/inclu + __le32 i_faddr; /* Fragment address */ + union { + struct { -+ __u8 l_i_frag; /* Fragment number */ -+ __u8 l_i_fsize; /* Fragment size */ -+ __u16 i_pad1; -+ //__le16 l_i_uid_high; /* these 2 fields */ -+ //__le16 l_i_gid_high; /* were reserved2[0] */ ++ //__u8 l_i_frag; /* Fragment number */ ++ //__u8 l_i_fsize; /* Fragment size */ ++ //__u16 i_pad1; ++ __le16 l_i_uid_high; /* these 2 fields */ ++ __le16 l_i_gid_high; /* were reserved2[0] */ + //__u32 l_i_reserved2; + /* Epoch number for versioning -znjp */ + __le32 l_i_epoch_number; @@ -17349,13 +17415,13 @@ diff -ruN linux-2.6.20.3/include/linux/ext3cow_fs.h linux-2.6.20.3-ext3cow/inclu +/* For versioning -znjp */ +//#define i_reserved1 osd1.linux1.l_i_reserved1 +#define i_cowbitmap osd1.linux1.l_i_direct_cow_bitmap -+#define i_frag osd2.linux2.l_i_frag -+#define i_fsize osd2.linux2.l_i_fsize ++//#define i_frag osd2.linux2.l_i_frag ++//#define i_fsize osd2.linux2.l_i_fsize +#define i_uid_low i_uid +#define i_gid_low i_gid +/* For versioning -znjp */ -+//#define i_uid_high osd2.linux2.l_i_uid_high -+//#define i_gid_high osd2.linux2.l_i_gid_high ++#define i_uid_high osd2.linux2.l_i_uid_high ++#define i_gid_high osd2.linux2.l_i_gid_high +//#define i_reserved2 osd2.linux2.l_i_reserved2 +#define i_epch_number osd2.linux2.l_i_epoch_number +#define i_nxt_inode osd2.linux2.l_i_next_inode @@ -17940,7 +18006,7 @@ diff -ruN linux-2.6.20.3/include/linux/ext3cow_fs.h linux-2.6.20.3-ext3cow/inclu +#endif /* _LINUX_EXT3COW_FS_H */ diff -ruN linux-2.6.20.3/include/linux/ext3cow_fs_i.h linux-2.6.20.3-ext3cow/include/linux/ext3cow_fs_i.h --- linux-2.6.20.3/include/linux/ext3cow_fs_i.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.20.3-ext3cow/include/linux/ext3cow_fs_i.h 2007-03-24 15:22:06.000000000 -0400 ++++ linux-2.6.20.3-ext3cow/include/linux/ext3cow_fs_i.h 2008-03-09 11:10:55.000000000 -0400 @@ -0,0 +1,152 @@ +/* + * linux/include/linux/ext3cow_fs_i.h @@ -18096,7 +18162,7 @@ diff -ruN linux-2.6.20.3/include/linux/ext3cow_fs_i.h linux-2.6.20.3-ext3cow/inc +#endif /* _LINUX_EXT3COW_FS_I */ diff -ruN linux-2.6.20.3/include/linux/ext3cow_fs_sb.h linux-2.6.20.3-ext3cow/include/linux/ext3cow_fs_sb.h --- linux-2.6.20.3/include/linux/ext3cow_fs_sb.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.20.3-ext3cow/include/linux/ext3cow_fs_sb.h 2007-03-24 15:22:35.000000000 -0400 ++++ linux-2.6.20.3-ext3cow/include/linux/ext3cow_fs_sb.h 2008-03-09 11:10:57.000000000 -0400 @@ -0,0 +1,86 @@ +/* + * linux/include/linux/ext3cow_fs_sb.h @@ -18186,7 +18252,7 @@ diff -ruN linux-2.6.20.3/include/linux/ext3cow_fs_sb.h linux-2.6.20.3-ext3cow/in +#endif /* _LINUX_EXT3COW_FS_SB */ diff -ruN linux-2.6.20.3/include/linux/ext3cow_jbd.h linux-2.6.20.3-ext3cow/include/linux/ext3cow_jbd.h --- linux-2.6.20.3/include/linux/ext3cow_jbd.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.20.3-ext3cow/include/linux/ext3cow_jbd.h 2007-03-24 13:58:07.000000000 -0400 ++++ linux-2.6.20.3-ext3cow/include/linux/ext3cow_jbd.h 2008-03-09 11:10:56.000000000 -0400 @@ -0,0 +1,226 @@ +/* + * linux/include/linux/ext3cow_jbd.h @@ -18416,7 +18482,7 @@ diff -ruN linux-2.6.20.3/include/linux/ext3cow_jbd.h linux-2.6.20.3-ext3cow/incl +#endif /* _LINUX_EXT3COW_JBD_H */ diff -ruN linux-2.6.20.3/include/linux/magic.h linux-2.6.20.3-ext3cow/include/linux/magic.h --- linux-2.6.20.3/include/linux/magic.h 2007-03-13 14:27:08.000000000 -0400 -+++ linux-2.6.20.3-ext3cow/include/linux/magic.h 2007-03-24 14:06:39.000000000 -0400 ++++ linux-2.6.20.3-ext3cow/include/linux/magic.h 2008-03-09 11:10:57.000000000 -0400 @@ -9,6 +9,7 @@ #define EFS_SUPER_MAGIC 0x414A53 #define EXT2_SUPER_MAGIC 0xEF53 diff --git a/pkgs/os-specific/linux/kernel/linux-2.6.21.7-ext3cow_wouter.patch b/pkgs/os-specific/linux/kernel/linux-2.6.21.7-ext3cow_wouter.patch deleted file mode 100644 index 583597158ad7..000000000000 --- a/pkgs/os-specific/linux/kernel/linux-2.6.21.7-ext3cow_wouter.patch +++ /dev/null @@ -1,18462 +0,0 @@ -diff -Naur linux-2.6.21.7/fs/ext3cow/acl.c linux-2.6.21.7_ext3cowPatched/fs/ext3cow/acl.c ---- linux-2.6.21.7/fs/ext3cow/acl.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.21.7_ext3cowPatched/fs/ext3cow/acl.c 2007-10-23 17:47:18.000000000 +0200 -@@ -0,0 +1,551 @@ -+/* -+ * linux/fs/ext3cow/acl.c -+ * -+ * Copyright (C) 2001-2003 Andreas Gruenbacher, -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "xattr.h" -+#include "acl.h" -+ -+/* -+ * Convert from filesystem to in-memory representation. -+ */ -+static struct posix_acl * -+ext3cow_acl_from_disk(const void *value, size_t size) -+{ -+ const char *end = (char *)value + size; -+ int n, count; -+ struct posix_acl *acl; -+ -+ if (!value) -+ return NULL; -+ if (size < sizeof(ext3cow_acl_header)) -+ return ERR_PTR(-EINVAL); -+ if (((ext3cow_acl_header *)value)->a_version != -+ cpu_to_le32(EXT3COW_ACL_VERSION)) -+ return ERR_PTR(-EINVAL); -+ value = (char *)value + sizeof(ext3cow_acl_header); -+ count = ext3cow_acl_count(size); -+ if (count < 0) -+ return ERR_PTR(-EINVAL); -+ if (count == 0) -+ return NULL; -+ acl = posix_acl_alloc(count, GFP_KERNEL); -+ if (!acl) -+ return ERR_PTR(-ENOMEM); -+ for (n=0; n < count; n++) { -+ ext3cow_acl_entry *entry = -+ (ext3cow_acl_entry *)value; -+ if ((char *)value + sizeof(ext3cow_acl_entry_short) > end) -+ goto fail; -+ acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag); -+ acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm); -+ switch(acl->a_entries[n].e_tag) { -+ case ACL_USER_OBJ: -+ case ACL_GROUP_OBJ: -+ case ACL_MASK: -+ case ACL_OTHER: -+ value = (char *)value + -+ sizeof(ext3cow_acl_entry_short); -+ acl->a_entries[n].e_id = ACL_UNDEFINED_ID; -+ break; -+ -+ case ACL_USER: -+ case ACL_GROUP: -+ value = (char *)value + sizeof(ext3cow_acl_entry); -+ if ((char *)value > end) -+ goto fail; -+ acl->a_entries[n].e_id = -+ le32_to_cpu(entry->e_id); -+ break; -+ -+ default: -+ goto fail; -+ } -+ } -+ if (value != end) -+ goto fail; -+ return acl; -+ -+fail: -+ posix_acl_release(acl); -+ return ERR_PTR(-EINVAL); -+} -+ -+/* -+ * Convert from in-memory to filesystem representation. -+ */ -+static void * -+ext3cow_acl_to_disk(const struct posix_acl *acl, size_t *size) -+{ -+ ext3cow_acl_header *ext_acl; -+ char *e; -+ size_t n; -+ -+ *size = ext3cow_acl_size(acl->a_count); -+ ext_acl = kmalloc(sizeof(ext3cow_acl_header) + acl->a_count * -+ sizeof(ext3cow_acl_entry), GFP_KERNEL); -+ if (!ext_acl) -+ return ERR_PTR(-ENOMEM); -+ ext_acl->a_version = cpu_to_le32(EXT3COW_ACL_VERSION); -+ e = (char *)ext_acl + sizeof(ext3cow_acl_header); -+ for (n=0; n < acl->a_count; n++) { -+ ext3cow_acl_entry *entry = (ext3cow_acl_entry *)e; -+ entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag); -+ entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm); -+ switch(acl->a_entries[n].e_tag) { -+ case ACL_USER: -+ case ACL_GROUP: -+ entry->e_id = -+ cpu_to_le32(acl->a_entries[n].e_id); -+ e += sizeof(ext3cow_acl_entry); -+ break; -+ -+ case ACL_USER_OBJ: -+ case ACL_GROUP_OBJ: -+ case ACL_MASK: -+ case ACL_OTHER: -+ e += sizeof(ext3cow_acl_entry_short); -+ break; -+ -+ default: -+ goto fail; -+ } -+ } -+ return (char *)ext_acl; -+ -+fail: -+ kfree(ext_acl); -+ return ERR_PTR(-EINVAL); -+} -+ -+static inline struct posix_acl * -+ext3cow_iget_acl(struct inode *inode, struct posix_acl **i_acl) -+{ -+ struct posix_acl *acl = EXT3COW_ACL_NOT_CACHED; -+ -+ spin_lock(&inode->i_lock); -+ if (*i_acl != EXT3COW_ACL_NOT_CACHED) -+ acl = posix_acl_dup(*i_acl); -+ spin_unlock(&inode->i_lock); -+ -+ return acl; -+} -+ -+static inline void -+ext3cow_iset_acl(struct inode *inode, struct posix_acl **i_acl, -+ struct posix_acl *acl) -+{ -+ spin_lock(&inode->i_lock); -+ if (*i_acl != EXT3COW_ACL_NOT_CACHED) -+ posix_acl_release(*i_acl); -+ *i_acl = posix_acl_dup(acl); -+ spin_unlock(&inode->i_lock); -+} -+ -+/* -+ * Inode operation get_posix_acl(). -+ * -+ * inode->i_mutex: don't care -+ */ -+static struct posix_acl * -+ext3cow_get_acl(struct inode *inode, int type) -+{ -+ struct ext3cow_inode_info *ei = EXT3COW_I(inode); -+ int name_index; -+ char *value = NULL; -+ struct posix_acl *acl; -+ int retval; -+ -+ if (!test_opt(inode->i_sb, POSIX_ACL)) -+ return NULL; -+ -+ switch(type) { -+ case ACL_TYPE_ACCESS: -+ acl = ext3cow_iget_acl(inode, &ei->i_acl); -+ if (acl != EXT3COW_ACL_NOT_CACHED) -+ return acl; -+ name_index = EXT3COW_XATTR_INDEX_POSIX_ACL_ACCESS; -+ break; -+ -+ case ACL_TYPE_DEFAULT: -+ acl = ext3cow_iget_acl(inode, &ei->i_default_acl); -+ if (acl != EXT3COW_ACL_NOT_CACHED) -+ return acl; -+ name_index = EXT3COW_XATTR_INDEX_POSIX_ACL_DEFAULT; -+ break; -+ -+ default: -+ return ERR_PTR(-EINVAL); -+ } -+ retval = ext3cow_xattr_get(inode, name_index, "", NULL, 0); -+ if (retval > 0) { -+ value = kmalloc(retval, GFP_KERNEL); -+ if (!value) -+ return ERR_PTR(-ENOMEM); -+ retval = ext3cow_xattr_get(inode, name_index, "", value, retval); -+ } -+ if (retval > 0) -+ acl = ext3cow_acl_from_disk(value, retval); -+ else if (retval == -ENODATA || retval == -ENOSYS) -+ acl = NULL; -+ else -+ acl = ERR_PTR(retval); -+ kfree(value); -+ -+ if (!IS_ERR(acl)) { -+ switch(type) { -+ case ACL_TYPE_ACCESS: -+ ext3cow_iset_acl(inode, &ei->i_acl, acl); -+ break; -+ -+ case ACL_TYPE_DEFAULT: -+ ext3cow_iset_acl(inode, &ei->i_default_acl, acl); -+ break; -+ } -+ } -+ return acl; -+} -+ -+/* -+ * Set the access or default ACL of an inode. -+ * -+ * inode->i_mutex: down unless called from ext3cow_new_inode -+ */ -+static int -+ext3cow_set_acl(handle_t *handle, struct inode *inode, int type, -+ struct posix_acl *acl) -+{ -+ struct ext3cow_inode_info *ei = EXT3COW_I(inode); -+ int name_index; -+ void *value = NULL; -+ size_t size = 0; -+ int error; -+ -+ if (S_ISLNK(inode->i_mode)) -+ return -EOPNOTSUPP; -+ -+ switch(type) { -+ case ACL_TYPE_ACCESS: -+ name_index = EXT3COW_XATTR_INDEX_POSIX_ACL_ACCESS; -+ if (acl) { -+ mode_t mode = inode->i_mode; -+ error = posix_acl_equiv_mode(acl, &mode); -+ if (error < 0) -+ return error; -+ else { -+ inode->i_mode = mode; -+ ext3cow_mark_inode_dirty(handle, inode); -+ if (error == 0) -+ acl = NULL; -+ } -+ } -+ break; -+ -+ case ACL_TYPE_DEFAULT: -+ name_index = EXT3COW_XATTR_INDEX_POSIX_ACL_DEFAULT; -+ if (!S_ISDIR(inode->i_mode)) -+ return acl ? -EACCES : 0; -+ break; -+ -+ default: -+ return -EINVAL; -+ } -+ if (acl) { -+ value = ext3cow_acl_to_disk(acl, &size); -+ if (IS_ERR(value)) -+ return (int)PTR_ERR(value); -+ } -+ -+ error = ext3cow_xattr_set_handle(handle, inode, name_index, "", -+ value, size, 0); -+ -+ kfree(value); -+ if (!error) { -+ switch(type) { -+ case ACL_TYPE_ACCESS: -+ ext3cow_iset_acl(inode, &ei->i_acl, acl); -+ break; -+ -+ case ACL_TYPE_DEFAULT: -+ ext3cow_iset_acl(inode, &ei->i_default_acl, acl); -+ break; -+ } -+ } -+ return error; -+} -+ -+static int -+ext3cow_check_acl(struct inode *inode, int mask) -+{ -+ struct posix_acl *acl = ext3cow_get_acl(inode, ACL_TYPE_ACCESS); -+ -+ if (IS_ERR(acl)) -+ return PTR_ERR(acl); -+ if (acl) { -+ int error = posix_acl_permission(inode, acl, mask); -+ posix_acl_release(acl); -+ return error; -+ } -+ -+ return -EAGAIN; -+} -+ -+int -+ext3cow_permission(struct inode *inode, int mask, struct nameidata *nd) -+{ -+ return generic_permission(inode, mask, ext3cow_check_acl); -+} -+ -+/* -+ * Initialize the ACLs of a new inode. Called from ext3cow_new_inode. -+ * -+ * dir->i_mutex: down -+ * inode->i_mutex: up (access to inode is still exclusive) -+ */ -+int -+ext3cow_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) -+{ -+ struct posix_acl *acl = NULL; -+ int error = 0; -+ -+ if (!S_ISLNK(inode->i_mode)) { -+ if (test_opt(dir->i_sb, POSIX_ACL)) { -+ acl = ext3cow_get_acl(dir, ACL_TYPE_DEFAULT); -+ if (IS_ERR(acl)) -+ return PTR_ERR(acl); -+ } -+ if (!acl) -+ inode->i_mode &= ~current->fs->umask; -+ } -+ if (test_opt(inode->i_sb, POSIX_ACL) && acl) { -+ struct posix_acl *clone; -+ mode_t mode; -+ -+ if (S_ISDIR(inode->i_mode)) { -+ error = ext3cow_set_acl(handle, inode, -+ ACL_TYPE_DEFAULT, acl); -+ if (error) -+ goto cleanup; -+ } -+ clone = posix_acl_clone(acl, GFP_KERNEL); -+ error = -ENOMEM; -+ if (!clone) -+ goto cleanup; -+ -+ mode = inode->i_mode; -+ error = posix_acl_create_masq(clone, &mode); -+ if (error >= 0) { -+ inode->i_mode = mode; -+ if (error > 0) { -+ /* This is an extended ACL */ -+ error = ext3cow_set_acl(handle, inode, -+ ACL_TYPE_ACCESS, clone); -+ } -+ } -+ posix_acl_release(clone); -+ } -+cleanup: -+ posix_acl_release(acl); -+ return error; -+} -+ -+/* -+ * Does chmod for an inode that may have an Access Control List. The -+ * inode->i_mode field must be updated to the desired value by the caller -+ * before calling this function. -+ * Returns 0 on success, or a negative error number. -+ * -+ * We change the ACL rather than storing some ACL entries in the file -+ * mode permission bits (which would be more efficient), because that -+ * would break once additional permissions (like ACL_APPEND, ACL_DELETE -+ * for directories) are added. There are no more bits available in the -+ * file mode. -+ * -+ * inode->i_mutex: down -+ */ -+int -+ext3cow_acl_chmod(struct inode *inode) -+{ -+ struct posix_acl *acl, *clone; -+ int error; -+ -+ if (S_ISLNK(inode->i_mode)) -+ return -EOPNOTSUPP; -+ if (!test_opt(inode->i_sb, POSIX_ACL)) -+ return 0; -+ acl = ext3cow_get_acl(inode, ACL_TYPE_ACCESS); -+ if (IS_ERR(acl) || !acl) -+ return PTR_ERR(acl); -+ clone = posix_acl_clone(acl, GFP_KERNEL); -+ posix_acl_release(acl); -+ if (!clone) -+ return -ENOMEM; -+ error = posix_acl_chmod_masq(clone, inode->i_mode); -+ if (!error) { -+ handle_t *handle; -+ int retries = 0; -+ -+ retry: -+ handle = ext3cow_journal_start(inode, -+ EXT3COW_DATA_TRANS_BLOCKS(inode->i_sb)); -+ if (IS_ERR(handle)) { -+ error = PTR_ERR(handle); -+ ext3cow_std_error(inode->i_sb, error); -+ goto out; -+ } -+ error = ext3cow_set_acl(handle, inode, ACL_TYPE_ACCESS, clone); -+ ext3cow_journal_stop(handle); -+ if (error == -ENOSPC && -+ ext3cow_should_retry_alloc(inode->i_sb, &retries)) -+ goto retry; -+ } -+out: -+ posix_acl_release(clone); -+ return error; -+} -+ -+/* -+ * Extended attribute handlers -+ */ -+static size_t -+ext3cow_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len, -+ const char *name, size_t name_len) -+{ -+ const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS); -+ -+ if (!test_opt(inode->i_sb, POSIX_ACL)) -+ return 0; -+ if (list && size <= list_len) -+ memcpy(list, POSIX_ACL_XATTR_ACCESS, size); -+ return size; -+} -+ -+static size_t -+ext3cow_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len, -+ const char *name, size_t name_len) -+{ -+ const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT); -+ -+ if (!test_opt(inode->i_sb, POSIX_ACL)) -+ return 0; -+ if (list && size <= list_len) -+ memcpy(list, POSIX_ACL_XATTR_DEFAULT, size); -+ return size; -+} -+ -+static int -+ext3cow_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size) -+{ -+ struct posix_acl *acl; -+ int error; -+ -+ if (!test_opt(inode->i_sb, POSIX_ACL)) -+ return -EOPNOTSUPP; -+ -+ acl = ext3cow_get_acl(inode, type); -+ if (IS_ERR(acl)) -+ return PTR_ERR(acl); -+ if (acl == NULL) -+ return -ENODATA; -+ error = posix_acl_to_xattr(acl, buffer, size); -+ posix_acl_release(acl); -+ -+ return error; -+} -+ -+static int -+ext3cow_xattr_get_acl_access(struct inode *inode, const char *name, -+ void *buffer, size_t size) -+{ -+ if (strcmp(name, "") != 0) -+ return -EINVAL; -+ return ext3cow_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size); -+} -+ -+static int -+ext3cow_xattr_get_acl_default(struct inode *inode, const char *name, -+ void *buffer, size_t size) -+{ -+ if (strcmp(name, "") != 0) -+ return -EINVAL; -+ return ext3cow_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size); -+} -+ -+static int -+ext3cow_xattr_set_acl(struct inode *inode, int type, const void *value, -+ size_t size) -+{ -+ handle_t *handle; -+ struct posix_acl *acl; -+ int error, retries = 0; -+ -+ if (!test_opt(inode->i_sb, POSIX_ACL)) -+ return -EOPNOTSUPP; -+ if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) -+ return -EPERM; -+ -+ if (value) { -+ acl = posix_acl_from_xattr(value, size); -+ if (IS_ERR(acl)) -+ return PTR_ERR(acl); -+ else if (acl) { -+ error = posix_acl_valid(acl); -+ if (error) -+ goto release_and_out; -+ } -+ } else -+ acl = NULL; -+ -+retry: -+ handle = ext3cow_journal_start(inode, EXT3COW_DATA_TRANS_BLOCKS(inode->i_sb)); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ error = ext3cow_set_acl(handle, inode, type, acl); -+ ext3cow_journal_stop(handle); -+ if (error == -ENOSPC && ext3cow_should_retry_alloc(inode->i_sb, &retries)) -+ goto retry; -+ -+release_and_out: -+ posix_acl_release(acl); -+ return error; -+} -+ -+static int -+ext3cow_xattr_set_acl_access(struct inode *inode, const char *name, -+ const void *value, size_t size, int flags) -+{ -+ if (strcmp(name, "") != 0) -+ return -EINVAL; -+ return ext3cow_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size); -+} -+ -+static int -+ext3cow_xattr_set_acl_default(struct inode *inode, const char *name, -+ const void *value, size_t size, int flags) -+{ -+ if (strcmp(name, "") != 0) -+ return -EINVAL; -+ return ext3cow_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size); -+} -+ -+struct xattr_handler ext3cow_xattr_acl_access_handler = { -+ .prefix = POSIX_ACL_XATTR_ACCESS, -+ .list = ext3cow_xattr_list_acl_access, -+ .get = ext3cow_xattr_get_acl_access, -+ .set = ext3cow_xattr_set_acl_access, -+}; -+ -+struct xattr_handler ext3cow_xattr_acl_default_handler = { -+ .prefix = POSIX_ACL_XATTR_DEFAULT, -+ .list = ext3cow_xattr_list_acl_default, -+ .get = ext3cow_xattr_get_acl_default, -+ .set = ext3cow_xattr_set_acl_default, -+}; -diff -Naur linux-2.6.21.7/fs/ext3cow/acl.h linux-2.6.21.7_ext3cowPatched/fs/ext3cow/acl.h ---- linux-2.6.21.7/fs/ext3cow/acl.h 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.21.7_ext3cowPatched/fs/ext3cow/acl.h 2007-10-23 17:47:18.000000000 +0200 -@@ -0,0 +1,81 @@ -+/* -+ File: fs/ext3cow/acl.h -+ -+ (C) 2001 Andreas Gruenbacher, -+*/ -+ -+#include -+ -+#define EXT3COW_ACL_VERSION 0x0001 -+ -+typedef struct { -+ __le16 e_tag; -+ __le16 e_perm; -+ __le32 e_id; -+} ext3cow_acl_entry; -+ -+typedef struct { -+ __le16 e_tag; -+ __le16 e_perm; -+} ext3cow_acl_entry_short; -+ -+typedef struct { -+ __le32 a_version; -+} ext3cow_acl_header; -+ -+static inline size_t ext3cow_acl_size(int count) -+{ -+ if (count <= 4) { -+ return sizeof(ext3cow_acl_header) + -+ count * sizeof(ext3cow_acl_entry_short); -+ } else { -+ return sizeof(ext3cow_acl_header) + -+ 4 * sizeof(ext3cow_acl_entry_short) + -+ (count - 4) * sizeof(ext3cow_acl_entry); -+ } -+} -+ -+static inline int ext3cow_acl_count(size_t size) -+{ -+ ssize_t s; -+ size -= sizeof(ext3cow_acl_header); -+ s = size - 4 * sizeof(ext3cow_acl_entry_short); -+ if (s < 0) { -+ if (size % sizeof(ext3cow_acl_entry_short)) -+ return -1; -+ return size / sizeof(ext3cow_acl_entry_short); -+ } else { -+ if (s % sizeof(ext3cow_acl_entry)) -+ return -1; -+ return s / sizeof(ext3cow_acl_entry) + 4; -+ } -+} -+ -+#ifdef CONFIG_EXT3COW_FS_POSIX_ACL -+ -+/* Value for inode->u.ext3cow_i.i_acl and inode->u.ext3cow_i.i_default_acl -+ if the ACL has not been cached */ -+#define EXT3COW_ACL_NOT_CACHED ((void *)-1) -+ -+/* acl.c */ -+extern int ext3cow_permission (struct inode *, int, struct nameidata *); -+extern int ext3cow_acl_chmod (struct inode *); -+extern int ext3cow_init_acl (handle_t *, struct inode *, struct inode *); -+ -+#else /* CONFIG_EXT3COW_FS_POSIX_ACL */ -+#include -+#define ext3cow_permission NULL -+ -+static inline int -+ext3cow_acl_chmod(struct inode *inode) -+{ -+ return 0; -+} -+ -+static inline int -+ext3cow_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) -+{ -+ return 0; -+} -+#endif /* CONFIG_EXT3COW_FS_POSIX_ACL */ -+ -diff -Naur linux-2.6.21.7/fs/ext3cow/balloc.c linux-2.6.21.7_ext3cowPatched/fs/ext3cow/balloc.c ---- linux-2.6.21.7/fs/ext3cow/balloc.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.21.7_ext3cowPatched/fs/ext3cow/balloc.c 2007-10-23 17:47:18.000000000 +0200 -@@ -0,0 +1,1823 @@ -+/* -+ * linux/fs/ext3cow/balloc.c -+ * -+ * Copyright (C) 1992, 1993, 1994, 1995 -+ * Remy Card (card@masi.ibp.fr) -+ * Laboratoire MASI - Institut Blaise Pascal -+ * Universite Pierre et Marie Curie (Paris VI) -+ * -+ * Enhanced block allocation by Stephen Tweedie (sct@redhat.com), 1993 -+ * Big-endian to little-endian byte-swapping/bitmaps by -+ * David S. Miller (davem@caip.rutgers.edu), 1995 -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+/* -+ * balloc.c contains the blocks allocation and deallocation routines -+ */ -+ -+/* -+ * The free blocks are managed by bitmaps. A file system contains several -+ * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap -+ * block for inodes, N blocks for the inode table and data blocks. -+ * -+ * The file system contains group descriptors which are located after the -+ * super block. Each descriptor contains the number of the bitmap block and -+ * the free blocks count in the block. The descriptors are loaded in memory -+ * when a file system is mounted (see ext3cow_read_super). -+ */ -+ -+ -+#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) -+ -+/** -+ * ext3cow_get_group_desc() -- load group descriptor from disk -+ * @sb: super block -+ * @block_group: given block group -+ * @bh: pointer to the buffer head to store the block -+ * group descriptor -+ */ -+struct ext3cow_group_desc * ext3cow_get_group_desc(struct super_block * sb, -+ unsigned int block_group, -+ struct buffer_head ** bh) -+{ -+ unsigned long group_desc; -+ unsigned long offset; -+ struct ext3cow_group_desc * desc; -+ struct ext3cow_sb_info *sbi = EXT3COW_SB(sb); -+ -+ if (block_group >= sbi->s_groups_count) { -+ ext3cow_error (sb, "ext3cow_get_group_desc", -+ "block_group >= groups_count - " -+ "block_group = %d, groups_count = %lu", -+ block_group, sbi->s_groups_count); -+ -+ return NULL; -+ } -+ smp_rmb(); -+ -+ group_desc = block_group >> EXT3COW_DESC_PER_BLOCK_BITS(sb); -+ offset = block_group & (EXT3COW_DESC_PER_BLOCK(sb) - 1); -+ if (!sbi->s_group_desc[group_desc]) { -+ ext3cow_error (sb, "ext3cow_get_group_desc", -+ "Group descriptor not loaded - " -+ "block_group = %d, group_desc = %lu, desc = %lu", -+ block_group, group_desc, offset); -+ return NULL; -+ } -+ -+ desc = (struct ext3cow_group_desc *) sbi->s_group_desc[group_desc]->b_data; -+ if (bh) -+ *bh = sbi->s_group_desc[group_desc]; -+ return desc + offset; -+} -+ -+/** -+ * read_block_bitmap() -+ * @sb: super block -+ * @block_group: given block group -+ * -+ * Read the bitmap for a given block_group, reading into the specified -+ * slot in the superblock's bitmap cache. -+ * -+ * Return buffer_head on success or NULL in case of failure. -+ */ -+static struct buffer_head * -+read_block_bitmap(struct super_block *sb, unsigned int block_group) -+{ -+ struct ext3cow_group_desc * desc; -+ struct buffer_head * bh = NULL; -+ -+ desc = ext3cow_get_group_desc (sb, block_group, NULL); -+ if (!desc) -+ goto error_out; -+ bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap)); -+ if (!bh) -+ ext3cow_error (sb, "read_block_bitmap", -+ "Cannot read block bitmap - " -+ "block_group = %d, block_bitmap = %u", -+ block_group, le32_to_cpu(desc->bg_block_bitmap)); -+error_out: -+ return bh; -+} -+/* -+ * The reservation window structure operations -+ * -------------------------------------------- -+ * Operations include: -+ * dump, find, add, remove, is_empty, find_next_reservable_window, etc. -+ * -+ * We use a red-black tree to represent per-filesystem reservation -+ * windows. -+ * -+ */ -+ -+/** -+ * __rsv_window_dump() -- Dump the filesystem block allocation reservation map -+ * @rb_root: root of per-filesystem reservation rb tree -+ * @verbose: verbose mode -+ * @fn: function which wishes to dump the reservation map -+ * -+ * If verbose is turned on, it will print the whole block reservation -+ * windows(start, end). Otherwise, it will only print out the "bad" windows, -+ * those windows that overlap with their immediate neighbors. -+ */ -+#if 1 -+static void __rsv_window_dump(struct rb_root *root, int verbose, -+ const char *fn) -+{ -+ struct rb_node *n; -+ struct ext3cow_reserve_window_node *rsv, *prev; -+ int bad; -+ -+restart: -+ n = rb_first(root); -+ bad = 0; -+ prev = NULL; -+ -+ printk("Block Allocation Reservation Windows Map (%s):\n", fn); -+ while (n) { -+ rsv = rb_entry(n, struct ext3cow_reserve_window_node, rsv_node); -+ if (verbose) -+ printk("reservation window 0x%p " -+ "start: %lu, end: %lu\n", -+ rsv, rsv->rsv_start, rsv->rsv_end); -+ if (rsv->rsv_start && rsv->rsv_start >= rsv->rsv_end) { -+ printk("Bad reservation %p (start >= end)\n", -+ rsv); -+ bad = 1; -+ } -+ if (prev && prev->rsv_end >= rsv->rsv_start) { -+ printk("Bad reservation %p (prev->end >= start)\n", -+ rsv); -+ bad = 1; -+ } -+ if (bad) { -+ if (!verbose) { -+ printk("Restarting reservation walk in verbose mode\n"); -+ verbose = 1; -+ goto restart; -+ } -+ } -+ n = rb_next(n); -+ prev = rsv; -+ } -+ printk("Window map complete.\n"); -+ if (bad) -+ BUG(); -+} -+#define rsv_window_dump(root, verbose) \ -+ __rsv_window_dump((root), (verbose), __FUNCTION__) -+#else -+#define rsv_window_dump(root, verbose) do {} while (0) -+#endif -+ -+/** -+ * goal_in_my_reservation() -+ * @rsv: inode's reservation window -+ * @grp_goal: given goal block relative to the allocation block group -+ * @group: the current allocation block group -+ * @sb: filesystem super block -+ * -+ * Test if the given goal block (group relative) is within the file's -+ * own block reservation window range. -+ * -+ * If the reservation window is outside the goal allocation group, return 0; -+ * grp_goal (given goal block) could be -1, which means no specific -+ * goal block. In this case, always return 1. -+ * If the goal block is within the reservation window, return 1; -+ * otherwise, return 0; -+ */ -+static int -+goal_in_my_reservation(struct ext3cow_reserve_window *rsv, ext3cow_grpblk_t grp_goal, -+ unsigned int group, struct super_block * sb) -+{ -+ ext3cow_fsblk_t group_first_block, group_last_block; -+ -+ group_first_block = ext3cow_group_first_block_no(sb, group); -+ group_last_block = group_first_block + (EXT3COW_BLOCKS_PER_GROUP(sb) - 1); -+ -+ if ((rsv->_rsv_start > group_last_block) || -+ (rsv->_rsv_end < group_first_block)) -+ return 0; -+ if ((grp_goal >= 0) && ((grp_goal + group_first_block < rsv->_rsv_start) -+ || (grp_goal + group_first_block > rsv->_rsv_end))) -+ return 0; -+ return 1; -+} -+ -+/** -+ * search_reserve_window() -+ * @rb_root: root of reservation tree -+ * @goal: target allocation block -+ * -+ * Find the reserved window which includes the goal, or the previous one -+ * if the goal is not in any window. -+ * Returns NULL if there are no windows or if all windows start after the goal. -+ */ -+static struct ext3cow_reserve_window_node * -+search_reserve_window(struct rb_root *root, ext3cow_fsblk_t goal) -+{ -+ struct rb_node *n = root->rb_node; -+ struct ext3cow_reserve_window_node *rsv; -+ -+ if (!n) -+ return NULL; -+ -+ do { -+ rsv = rb_entry(n, struct ext3cow_reserve_window_node, rsv_node); -+ -+ if (goal < rsv->rsv_start) -+ n = n->rb_left; -+ else if (goal > rsv->rsv_end) -+ n = n->rb_right; -+ else -+ return rsv; -+ } while (n); -+ /* -+ * We've fallen off the end of the tree: the goal wasn't inside -+ * any particular node. OK, the previous node must be to one -+ * side of the interval containing the goal. If it's the RHS, -+ * we need to back up one. -+ */ -+ if (rsv->rsv_start > goal) { -+ n = rb_prev(&rsv->rsv_node); -+ rsv = rb_entry(n, struct ext3cow_reserve_window_node, rsv_node); -+ } -+ return rsv; -+} -+ -+/** -+ * ext3cow_rsv_window_add() -- Insert a window to the block reservation rb tree. -+ * @sb: super block -+ * @rsv: reservation window to add -+ * -+ * Must be called with rsv_lock hold. -+ */ -+void ext3cow_rsv_window_add(struct super_block *sb, -+ struct ext3cow_reserve_window_node *rsv) -+{ -+ struct rb_root *root = &EXT3COW_SB(sb)->s_rsv_window_root; -+ struct rb_node *node = &rsv->rsv_node; -+ ext3cow_fsblk_t start = rsv->rsv_start; -+ -+ struct rb_node ** p = &root->rb_node; -+ struct rb_node * parent = NULL; -+ struct ext3cow_reserve_window_node *this; -+ -+ while (*p) -+ { -+ parent = *p; -+ this = rb_entry(parent, struct ext3cow_reserve_window_node, rsv_node); -+ -+ if (start < this->rsv_start) -+ p = &(*p)->rb_left; -+ else if (start > this->rsv_end) -+ p = &(*p)->rb_right; -+ else { -+ rsv_window_dump(root, 1); -+ BUG(); -+ } -+ } -+ -+ rb_link_node(node, parent, p); -+ rb_insert_color(node, root); -+} -+ -+/** -+ * ext3cow_rsv_window_remove() -- unlink a window from the reservation rb tree -+ * @sb: super block -+ * @rsv: reservation window to remove -+ * -+ * Mark the block reservation window as not allocated, and unlink it -+ * from the filesystem reservation window rb tree. Must be called with -+ * rsv_lock hold. -+ */ -+static void rsv_window_remove(struct super_block *sb, -+ struct ext3cow_reserve_window_node *rsv) -+{ -+ rsv->rsv_start = EXT3COW_RESERVE_WINDOW_NOT_ALLOCATED; -+ rsv->rsv_end = EXT3COW_RESERVE_WINDOW_NOT_ALLOCATED; -+ rsv->rsv_alloc_hit = 0; -+ rb_erase(&rsv->rsv_node, &EXT3COW_SB(sb)->s_rsv_window_root); -+} -+ -+/* -+ * rsv_is_empty() -- Check if the reservation window is allocated. -+ * @rsv: given reservation window to check -+ * -+ * returns 1 if the end block is EXT3COW_RESERVE_WINDOW_NOT_ALLOCATED. -+ */ -+static inline int rsv_is_empty(struct ext3cow_reserve_window *rsv) -+{ -+ /* a valid reservation end block could not be 0 */ -+ return rsv->_rsv_end == EXT3COW_RESERVE_WINDOW_NOT_ALLOCATED; -+} -+ -+/** -+ * ext3cow_init_block_alloc_info() -+ * @inode: file inode structure -+ * -+ * Allocate and initialize the reservation window structure, and -+ * link the window to the ext3cow inode structure at last -+ * -+ * The reservation window structure is only dynamically allocated -+ * and linked to ext3cow inode the first time the open file -+ * needs a new block. So, before every ext3cow_new_block(s) call, for -+ * regular files, we should check whether the reservation window -+ * structure exists or not. In the latter case, this function is called. -+ * Fail to do so will result in block reservation being turned off for that -+ * open file. -+ * -+ * This function is called from ext3cow_get_blocks_handle(), also called -+ * when setting the reservation window size through ioctl before the file -+ * is open for write (needs block allocation). -+ * -+ * Needs truncate_mutex protection prior to call this function. -+ */ -+void ext3cow_init_block_alloc_info(struct inode *inode) -+{ -+ struct ext3cow_inode_info *ei = EXT3COW_I(inode); -+ struct ext3cow_block_alloc_info *block_i = ei->i_block_alloc_info; -+ struct super_block *sb = inode->i_sb; -+ -+ block_i = kmalloc(sizeof(*block_i), GFP_NOFS); -+ if (block_i) { -+ struct ext3cow_reserve_window_node *rsv = &block_i->rsv_window_node; -+ -+ rsv->rsv_start = EXT3COW_RESERVE_WINDOW_NOT_ALLOCATED; -+ rsv->rsv_end = EXT3COW_RESERVE_WINDOW_NOT_ALLOCATED; -+ -+ /* -+ * if filesystem is mounted with NORESERVATION, the goal -+ * reservation window size is set to zero to indicate -+ * block reservation is off -+ */ -+ if (!test_opt(sb, RESERVATION)) -+ rsv->rsv_goal_size = 0; -+ else -+ rsv->rsv_goal_size = EXT3COW_DEFAULT_RESERVE_BLOCKS; -+ rsv->rsv_alloc_hit = 0; -+ block_i->last_alloc_logical_block = 0; -+ block_i->last_alloc_physical_block = 0; -+ } -+ ei->i_block_alloc_info = block_i; -+} -+ -+/** -+ * ext3cow_discard_reservation() -+ * @inode: inode -+ * -+ * Discard(free) block reservation window on last file close, or truncate -+ * or at last iput(). -+ * -+ * It is being called in three cases: -+ * ext3cow_release_file(): last writer close the file -+ * ext3cow_clear_inode(): last iput(), when nobody link to this file. -+ * ext3cow_truncate(): when the block indirect map is about to change. -+ * -+ */ -+void ext3cow_discard_reservation(struct inode *inode) -+{ -+ struct ext3cow_inode_info *ei = EXT3COW_I(inode); -+ struct ext3cow_block_alloc_info *block_i = ei->i_block_alloc_info; -+ struct ext3cow_reserve_window_node *rsv; -+ spinlock_t *rsv_lock = &EXT3COW_SB(inode->i_sb)->s_rsv_window_lock; -+ -+ if (!block_i) -+ return; -+ -+ rsv = &block_i->rsv_window_node; -+ if (!rsv_is_empty(&rsv->rsv_window)) { -+ spin_lock(rsv_lock); -+ if (!rsv_is_empty(&rsv->rsv_window)) -+ rsv_window_remove(inode->i_sb, rsv); -+ spin_unlock(rsv_lock); -+ } -+} -+ -+/** -+ * ext3cow_free_blocks_sb() -- Free given blocks and update quota -+ * @handle: handle to this transaction -+ * @sb: super block -+ * @block: start physcial block to free -+ * @count: number of blocks to free -+ * @pdquot_freed_blocks: pointer to quota -+ */ -+void ext3cow_free_blocks_sb(handle_t *handle, struct super_block *sb, -+ ext3cow_fsblk_t block, unsigned long count, -+ unsigned long *pdquot_freed_blocks) -+{ -+ struct buffer_head *bitmap_bh = NULL; -+ struct buffer_head *gd_bh; -+ unsigned long block_group; -+ ext3cow_grpblk_t bit; -+ unsigned long i; -+ unsigned long overflow; -+ struct ext3cow_group_desc * desc; -+ struct ext3cow_super_block * es; -+ struct ext3cow_sb_info *sbi; -+ int err = 0, ret; -+ ext3cow_grpblk_t group_freed; -+ -+ *pdquot_freed_blocks = 0; -+ sbi = EXT3COW_SB(sb); -+ es = sbi->s_es; -+ if (block < le32_to_cpu(es->s_first_data_block) || -+ block + count < block || -+ block + count > le32_to_cpu(es->s_blocks_count)) { -+ ext3cow_error (sb, "ext3cow_free_blocks", -+ "Freeing blocks not in datazone - " -+ "block = "E3FSBLK", count = %lu", block, count); -+ goto error_return; -+ } -+ -+ //TODO: Remove: -+ printk(KERN_INFO "freeing block(s) %lu-%lu\n", block, block + count - 1); -+ ext3cow_debug ("freeing block(s) %lu-%lu\n", block, block + count - 1); -+ -+do_more: -+ overflow = 0; -+ block_group = (block - le32_to_cpu(es->s_first_data_block)) / -+ EXT3COW_BLOCKS_PER_GROUP(sb); -+ bit = (block - le32_to_cpu(es->s_first_data_block)) % -+ EXT3COW_BLOCKS_PER_GROUP(sb); -+ /* -+ * Check to see if we are freeing blocks across a group -+ * boundary. -+ */ -+ if (bit + count > EXT3COW_BLOCKS_PER_GROUP(sb)) { -+ overflow = bit + count - EXT3COW_BLOCKS_PER_GROUP(sb); -+ count -= overflow; -+ } -+ brelse(bitmap_bh); -+ bitmap_bh = read_block_bitmap(sb, block_group); -+ if (!bitmap_bh) -+ goto error_return; -+ desc = ext3cow_get_group_desc (sb, block_group, &gd_bh); -+ if (!desc) -+ goto error_return; -+ -+ if (in_range (le32_to_cpu(desc->bg_block_bitmap), block, count) || -+ in_range (le32_to_cpu(desc->bg_inode_bitmap), block, count) || -+ in_range (block, le32_to_cpu(desc->bg_inode_table), -+ sbi->s_itb_per_group) || -+ in_range (block + count - 1, le32_to_cpu(desc->bg_inode_table), -+ sbi->s_itb_per_group)) -+ ext3cow_error (sb, "ext3cow_free_blocks", -+ "Freeing blocks in system zones - " -+ "Block = "E3FSBLK", count = %lu", -+ block, count); -+ -+ /* -+ * We are about to start releasing blocks in the bitmap, -+ * so we need undo access. -+ */ -+ /* @@@ check errors */ -+ BUFFER_TRACE(bitmap_bh, "getting undo access"); -+ err = ext3cow_journal_get_undo_access(handle, bitmap_bh); -+ if (err) -+ goto error_return; -+ -+ /* -+ * We are about to modify some metadata. Call the journal APIs -+ * to unshare ->b_data if a currently-committing transaction is -+ * using it -+ */ -+ BUFFER_TRACE(gd_bh, "get_write_access"); -+ err = ext3cow_journal_get_write_access(handle, gd_bh); -+ if (err) -+ goto error_return; -+ -+ jbd_lock_bh_state(bitmap_bh); -+ -+ for (i = 0, group_freed = 0; i < count; i++) { -+ /* -+ * An HJ special. This is expensive... -+ */ -+#ifdef CONFIG_JBD_DEBUG -+ jbd_unlock_bh_state(bitmap_bh); -+ { -+ struct buffer_head *debug_bh; -+ debug_bh = sb_find_get_block(sb, block + i); -+ if (debug_bh) { -+ BUFFER_TRACE(debug_bh, "Deleted!"); -+ if (!bh2jh(bitmap_bh)->b_committed_data) -+ BUFFER_TRACE(debug_bh, -+ "No commited data in bitmap"); -+ BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap"); -+ __brelse(debug_bh); -+ } -+ } -+ jbd_lock_bh_state(bitmap_bh); -+#endif -+ if (need_resched()) { -+ jbd_unlock_bh_state(bitmap_bh); -+ cond_resched(); -+ jbd_lock_bh_state(bitmap_bh); -+ } -+ /* @@@ This prevents newly-allocated data from being -+ * freed and then reallocated within the same -+ * transaction. -+ * -+ * Ideally we would want to allow that to happen, but to -+ * do so requires making journal_forget() capable of -+ * revoking the queued write of a data block, which -+ * implies blocking on the journal lock. *forget() -+ * cannot block due to truncate races. -+ * -+ * Eventually we can fix this by making journal_forget() -+ * return a status indicating whether or not it was able -+ * to revoke the buffer. On successful revoke, it is -+ * safe not to set the allocation bit in the committed -+ * bitmap, because we know that there is no outstanding -+ * activity on the buffer any more and so it is safe to -+ * reallocate it. -+ */ -+ BUFFER_TRACE(bitmap_bh, "set in b_committed_data"); -+ J_ASSERT_BH(bitmap_bh, -+ bh2jh(bitmap_bh)->b_committed_data != NULL); -+ ext3cow_set_bit_atomic(sb_bgl_lock(sbi, block_group), bit + i, -+ bh2jh(bitmap_bh)->b_committed_data); -+ -+ /* -+ * We clear the bit in the bitmap after setting the committed -+ * data bit, because this is the reverse order to that which -+ * the allocator uses. -+ */ -+ BUFFER_TRACE(bitmap_bh, "clear bit"); -+ if (!ext3cow_clear_bit_atomic(sb_bgl_lock(sbi, block_group), -+ bit + i, bitmap_bh->b_data)) { -+ jbd_unlock_bh_state(bitmap_bh); -+ ext3cow_error(sb, __FUNCTION__, -+ "bit already cleared for block "E3FSBLK, -+ block + i); -+ jbd_lock_bh_state(bitmap_bh); -+ BUFFER_TRACE(bitmap_bh, "bit already cleared"); -+ } else { -+ group_freed++; -+ } -+ } -+ jbd_unlock_bh_state(bitmap_bh); -+ -+ spin_lock(sb_bgl_lock(sbi, block_group)); -+ desc->bg_free_blocks_count = -+ cpu_to_le16(le16_to_cpu(desc->bg_free_blocks_count) + -+ group_freed); -+ spin_unlock(sb_bgl_lock(sbi, block_group)); -+ percpu_counter_mod(&sbi->s_freeblocks_counter, count); -+ -+ /* We dirtied the bitmap block */ -+ BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); -+ err = ext3cow_journal_dirty_metadata(handle, bitmap_bh); -+ -+ /* And the group descriptor block */ -+ BUFFER_TRACE(gd_bh, "dirtied group descriptor block"); -+ ret = ext3cow_journal_dirty_metadata(handle, gd_bh); -+ if (!err) err = ret; -+ *pdquot_freed_blocks += group_freed; -+ -+ if (overflow && !err) { -+ block += count; -+ count = overflow; -+ goto do_more; -+ } -+ sb->s_dirt = 1; -+error_return: -+ brelse(bitmap_bh); -+ ext3cow_std_error(sb, err); -+ return; -+} -+ -+/** -+ * ext3cow_free_blocks() -- Free given blocks and update quota -+ * @handle: handle for this transaction -+ * @inode: inode -+ * @block: start physical block to free -+ * @count: number of blocks to count -+ */ -+void ext3cow_free_blocks(handle_t *handle, struct inode *inode, -+ ext3cow_fsblk_t block, unsigned long count) -+{ -+ struct super_block * sb; -+ unsigned long dquot_freed_blocks; -+ -+ sb = inode->i_sb; -+ if (!sb) { -+ printk ("ext3cow_free_blocks: nonexistent device"); -+ return; -+ } -+ ext3cow_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks); -+ if (dquot_freed_blocks) -+ DQUOT_FREE_BLOCK(inode, dquot_freed_blocks); -+ return; -+} -+ -+/** -+ * ext3cow_test_allocatable() -+ * @nr: given allocation block group -+ * @bh: bufferhead contains the bitmap of the given block group -+ * -+ * For ext3cow allocations, we must not reuse any blocks which are -+ * allocated in the bitmap buffer's "last committed data" copy. This -+ * prevents deletes from freeing up the page for reuse until we have -+ * committed the delete transaction. -+ * -+ * If we didn't do this, then deleting something and reallocating it as -+ * data would allow the old block to be overwritten before the -+ * transaction committed (because we force data to disk before commit). -+ * This would lead to corruption if we crashed between overwriting the -+ * data and committing the delete. -+ * -+ * @@@ We may want to make this allocation behaviour conditional on -+ * data-writes at some point, and disable it for metadata allocations or -+ * sync-data inodes. -+ */ -+static int ext3cow_test_allocatable(ext3cow_grpblk_t nr, struct buffer_head *bh) -+{ -+ int ret; -+ struct journal_head *jh = bh2jh(bh); -+ -+ if (ext3cow_test_bit(nr, bh->b_data)) -+ return 0; -+ -+ jbd_lock_bh_state(bh); -+ if (!jh->b_committed_data) -+ ret = 1; -+ else -+ ret = !ext3cow_test_bit(nr, jh->b_committed_data); -+ jbd_unlock_bh_state(bh); -+ return ret; -+} -+ -+/** -+ * bitmap_search_next_usable_block() -+ * @start: the starting block (group relative) of the search -+ * @bh: bufferhead contains the block group bitmap -+ * @maxblocks: the ending block (group relative) of the reservation -+ * -+ * The bitmap search --- search forward alternately through the actual -+ * bitmap on disk and the last-committed copy in journal, until we find a -+ * bit free in both bitmaps. -+ */ -+static ext3cow_grpblk_t -+bitmap_search_next_usable_block(ext3cow_grpblk_t start, struct buffer_head *bh, -+ ext3cow_grpblk_t maxblocks) -+{ -+ ext3cow_grpblk_t next; -+ struct journal_head *jh = bh2jh(bh); -+ -+ while (start < maxblocks) { -+ next = ext3cow_find_next_zero_bit(bh->b_data, maxblocks, start); -+ if (next >= maxblocks) -+ return -1; -+ if (ext3cow_test_allocatable(next, bh)) -+ return next; -+ jbd_lock_bh_state(bh); -+ if (jh->b_committed_data) -+ start = ext3cow_find_next_zero_bit(jh->b_committed_data, -+ maxblocks, next); -+ jbd_unlock_bh_state(bh); -+ } -+ return -1; -+} -+ -+/** -+ * find_next_usable_block() -+ * @start: the starting block (group relative) to find next -+ * allocatable block in bitmap. -+ * @bh: bufferhead contains the block group bitmap -+ * @maxblocks: the ending block (group relative) for the search -+ * -+ * Find an allocatable block in a bitmap. We honor both the bitmap and -+ * its last-committed copy (if that exists), and perform the "most -+ * appropriate allocation" algorithm of looking for a free block near -+ * the initial goal; then for a free byte somewhere in the bitmap; then -+ * for any free bit in the bitmap. -+ */ -+static ext3cow_grpblk_t -+find_next_usable_block(ext3cow_grpblk_t start, struct buffer_head *bh, -+ ext3cow_grpblk_t maxblocks) -+{ -+ ext3cow_grpblk_t here, next; -+ char *p, *r; -+ -+ if (start > 0) { -+ /* -+ * The goal was occupied; search forward for a free -+ * block within the next XX blocks. -+ * -+ * end_goal is more or less random, but it has to be -+ * less than EXT3COW_BLOCKS_PER_GROUP. Aligning up to the -+ * next 64-bit boundary is simple.. -+ */ -+ ext3cow_grpblk_t end_goal = (start + 63) & ~63; -+ if (end_goal > maxblocks) -+ end_goal = maxblocks; -+ here = ext3cow_find_next_zero_bit(bh->b_data, end_goal, start); -+ if (here < end_goal && ext3cow_test_allocatable(here, bh)) -+ return here; -+ ext3cow_debug("Bit not found near goal\n"); -+ } -+ -+ here = start; -+ if (here < 0) -+ here = 0; -+ -+ p = ((char *)bh->b_data) + (here >> 3); -+ r = memscan(p, 0, ((maxblocks + 7) >> 3) - (here >> 3)); -+ next = (r - ((char *)bh->b_data)) << 3; -+ -+ if (next < maxblocks && next >= start && ext3cow_test_allocatable(next, bh)) -+ return next; -+ -+ /* -+ * The bitmap search --- search forward alternately through the actual -+ * bitmap and the last-committed copy until we find a bit free in -+ * both -+ */ -+ here = bitmap_search_next_usable_block(here, bh, maxblocks); -+ return here; -+} -+ -+/** -+ * claim_block() -+ * @block: the free block (group relative) to allocate -+ * @bh: the bufferhead containts the block group bitmap -+ * -+ * We think we can allocate this block in this bitmap. Try to set the bit. -+ * If that succeeds then check that nobody has allocated and then freed the -+ * block since we saw that is was not marked in b_committed_data. If it _was_ -+ * allocated and freed then clear the bit in the bitmap again and return -+ * zero (failure). -+ */ -+static inline int -+claim_block(spinlock_t *lock, ext3cow_grpblk_t block, struct buffer_head *bh) -+{ -+ struct journal_head *jh = bh2jh(bh); -+ int ret; -+ -+ if (ext3cow_set_bit_atomic(lock, block, bh->b_data)) -+ return 0; -+ jbd_lock_bh_state(bh); -+ if (jh->b_committed_data && ext3cow_test_bit(block,jh->b_committed_data)) { -+ ext3cow_clear_bit_atomic(lock, block, bh->b_data); -+ ret = 0; -+ } else { -+ ret = 1; -+ } -+ jbd_unlock_bh_state(bh); -+ return ret; -+} -+ -+/** -+ * ext3cow_try_to_allocate() -+ * @sb: superblock -+ * @handle: handle to this transaction -+ * @group: given allocation block group -+ * @bitmap_bh: bufferhead holds the block bitmap -+ * @grp_goal: given target block within the group -+ * @count: target number of blocks to allocate -+ * @my_rsv: reservation window -+ * -+ * Attempt to allocate blocks within a give range. Set the range of allocation -+ * first, then find the first free bit(s) from the bitmap (within the range), -+ * and at last, allocate the blocks by claiming the found free bit as allocated. -+ * -+ * To set the range of this allocation: -+ * if there is a reservation window, only try to allocate block(s) from the -+ * file's own reservation window; -+ * Otherwise, the allocation range starts from the give goal block, ends at -+ * the block group's last block. -+ * -+ * If we failed to allocate the desired block then we may end up crossing to a -+ * new bitmap. In that case we must release write access to the old one via -+ * ext3cow_journal_release_buffer(), else we'll run out of credits. -+ */ -+static ext3cow_grpblk_t -+ext3cow_try_to_allocate(struct super_block *sb, handle_t *handle, int group, -+ struct buffer_head *bitmap_bh, ext3cow_grpblk_t grp_goal, -+ unsigned long *count, struct ext3cow_reserve_window *my_rsv) -+{ -+ ext3cow_fsblk_t group_first_block; -+ ext3cow_grpblk_t start, end; -+ unsigned long num = 0; -+ -+ /* we do allocation within the reservation window if we have a window */ -+ if (my_rsv) { -+ group_first_block = ext3cow_group_first_block_no(sb, group); -+ if (my_rsv->_rsv_start >= group_first_block) -+ start = my_rsv->_rsv_start - group_first_block; -+ else -+ /* reservation window cross group boundary */ -+ start = 0; -+ end = my_rsv->_rsv_end - group_first_block + 1; -+ if (end > EXT3COW_BLOCKS_PER_GROUP(sb)) -+ /* reservation window crosses group boundary */ -+ end = EXT3COW_BLOCKS_PER_GROUP(sb); -+ if ((start <= grp_goal) && (grp_goal < end)) -+ start = grp_goal; -+ else -+ grp_goal = -1; -+ } else { -+ if (grp_goal > 0) -+ start = grp_goal; -+ else -+ start = 0; -+ end = EXT3COW_BLOCKS_PER_GROUP(sb); -+ } -+ -+ BUG_ON(start > EXT3COW_BLOCKS_PER_GROUP(sb)); -+ -+repeat: -+ if (grp_goal < 0 || !ext3cow_test_allocatable(grp_goal, bitmap_bh)) { -+ grp_goal = find_next_usable_block(start, bitmap_bh, end); -+ if (grp_goal < 0) -+ goto fail_access; -+ if (!my_rsv) { -+ int i; -+ -+ for (i = 0; i < 7 && grp_goal > start && -+ ext3cow_test_allocatable(grp_goal - 1, -+ bitmap_bh); -+ i++, grp_goal--) -+ ; -+ } -+ } -+ start = grp_goal; -+ -+ if (!claim_block(sb_bgl_lock(EXT3COW_SB(sb), group), -+ grp_goal, bitmap_bh)) { -+ /* -+ * The block was allocated by another thread, or it was -+ * allocated and then freed by another thread -+ */ -+ start++; -+ grp_goal++; -+ if (start >= end) -+ goto fail_access; -+ goto repeat; -+ } -+ num++; -+ grp_goal++; -+ while (num < *count && grp_goal < end -+ && ext3cow_test_allocatable(grp_goal, bitmap_bh) -+ && claim_block(sb_bgl_lock(EXT3COW_SB(sb), group), -+ grp_goal, bitmap_bh)) { -+ num++; -+ grp_goal++; -+ } -+ *count = num; -+ return grp_goal - num; -+fail_access: -+ *count = num; -+ return -1; -+} -+ -+/** -+ * find_next_reservable_window(): -+ * find a reservable space within the given range. -+ * It does not allocate the reservation window for now: -+ * alloc_new_reservation() will do the work later. -+ * -+ * @search_head: the head of the searching list; -+ * This is not necessarily the list head of the whole filesystem -+ * -+ * We have both head and start_block to assist the search -+ * for the reservable space. The list starts from head, -+ * but we will shift to the place where start_block is, -+ * then start from there, when looking for a reservable space. -+ * -+ * @size: the target new reservation window size -+ * -+ * @group_first_block: the first block we consider to start -+ * the real search from -+ * -+ * @last_block: -+ * the maximum block number that our goal reservable space -+ * could start from. This is normally the last block in this -+ * group. The search will end when we found the start of next -+ * possible reservable space is out of this boundary. -+ * This could handle the cross boundary reservation window -+ * request. -+ * -+ * basically we search from the given range, rather than the whole -+ * reservation double linked list, (start_block, last_block) -+ * to find a free region that is of my size and has not -+ * been reserved. -+ * -+ */ -+static int find_next_reservable_window( -+ struct ext3cow_reserve_window_node *search_head, -+ struct ext3cow_reserve_window_node *my_rsv, -+ struct super_block * sb, -+ ext3cow_fsblk_t start_block, -+ ext3cow_fsblk_t last_block) -+{ -+ struct rb_node *next; -+ struct ext3cow_reserve_window_node *rsv, *prev; -+ ext3cow_fsblk_t cur; -+ int size = my_rsv->rsv_goal_size; -+ -+ /* TODO: make the start of the reservation window byte-aligned */ -+ /* cur = *start_block & ~7;*/ -+ cur = start_block; -+ rsv = search_head; -+ if (!rsv) -+ return -1; -+ -+ while (1) { -+ if (cur <= rsv->rsv_end) -+ cur = rsv->rsv_end + 1; -+ -+ /* TODO? -+ * in the case we could not find a reservable space -+ * that is what is expected, during the re-search, we could -+ * remember what's the largest reservable space we could have -+ * and return that one. -+ * -+ * For now it will fail if we could not find the reservable -+ * space with expected-size (or more)... -+ */ -+ if (cur > last_block) -+ return -1; /* fail */ -+ -+ prev = rsv; -+ next = rb_next(&rsv->rsv_node); -+ rsv = rb_entry(next,struct ext3cow_reserve_window_node,rsv_node); -+ -+ /* -+ * Reached the last reservation, we can just append to the -+ * previous one. -+ */ -+ if (!next) -+ break; -+ -+ if (cur + size <= rsv->rsv_start) { -+ /* -+ * Found a reserveable space big enough. We could -+ * have a reservation across the group boundary here -+ */ -+ break; -+ } -+ } -+ /* -+ * we come here either : -+ * when we reach the end of the whole list, -+ * and there is empty reservable space after last entry in the list. -+ * append it to the end of the list. -+ * -+ * or we found one reservable space in the middle of the list, -+ * return the reservation window that we could append to. -+ * succeed. -+ */ -+ -+ if ((prev != my_rsv) && (!rsv_is_empty(&my_rsv->rsv_window))) -+ rsv_window_remove(sb, my_rsv); -+ -+ /* -+ * Let's book the whole avaliable window for now. We will check the -+ * disk bitmap later and then, if there are free blocks then we adjust -+ * the window size if it's larger than requested. -+ * Otherwise, we will remove this node from the tree next time -+ * call find_next_reservable_window. -+ */ -+ my_rsv->rsv_start = cur; -+ my_rsv->rsv_end = cur + size - 1; -+ my_rsv->rsv_alloc_hit = 0; -+ -+ if (prev != my_rsv) -+ ext3cow_rsv_window_add(sb, my_rsv); -+ -+ return 0; -+} -+ -+/** -+ * alloc_new_reservation()--allocate a new reservation window -+ * -+ * To make a new reservation, we search part of the filesystem -+ * reservation list (the list that inside the group). We try to -+ * allocate a new reservation window near the allocation goal, -+ * or the beginning of the group, if there is no goal. -+ * -+ * We first find a reservable space after the goal, then from -+ * there, we check the bitmap for the first free block after -+ * it. If there is no free block until the end of group, then the -+ * whole group is full, we failed. Otherwise, check if the free -+ * block is inside the expected reservable space, if so, we -+ * succeed. -+ * If the first free block is outside the reservable space, then -+ * start from the first free block, we search for next available -+ * space, and go on. -+ * -+ * on succeed, a new reservation will be found and inserted into the list -+ * It contains at least one free block, and it does not overlap with other -+ * reservation windows. -+ * -+ * failed: we failed to find a reservation window in this group -+ * -+ * @rsv: the reservation -+ * -+ * @grp_goal: The goal (group-relative). It is where the search for a -+ * free reservable space should start from. -+ * if we have a grp_goal(grp_goal >0 ), then start from there, -+ * no grp_goal(grp_goal = -1), we start from the first block -+ * of the group. -+ * -+ * @sb: the super block -+ * @group: the group we are trying to allocate in -+ * @bitmap_bh: the block group block bitmap -+ * -+ */ -+static int alloc_new_reservation(struct ext3cow_reserve_window_node *my_rsv, -+ ext3cow_grpblk_t grp_goal, struct super_block *sb, -+ unsigned int group, struct buffer_head *bitmap_bh) -+{ -+ struct ext3cow_reserve_window_node *search_head; -+ ext3cow_fsblk_t group_first_block, group_end_block, start_block; -+ ext3cow_grpblk_t first_free_block; -+ struct rb_root *fs_rsv_root = &EXT3COW_SB(sb)->s_rsv_window_root; -+ unsigned long size; -+ int ret; -+ spinlock_t *rsv_lock = &EXT3COW_SB(sb)->s_rsv_window_lock; -+ -+ group_first_block = ext3cow_group_first_block_no(sb, group); -+ group_end_block = group_first_block + (EXT3COW_BLOCKS_PER_GROUP(sb) - 1); -+ -+ if (grp_goal < 0) -+ start_block = group_first_block; -+ else -+ start_block = grp_goal + group_first_block; -+ -+ size = my_rsv->rsv_goal_size; -+ -+ if (!rsv_is_empty(&my_rsv->rsv_window)) { -+ /* -+ * if the old reservation is cross group boundary -+ * and if the goal is inside the old reservation window, -+ * we will come here when we just failed to allocate from -+ * the first part of the window. We still have another part -+ * that belongs to the next group. In this case, there is no -+ * point to discard our window and try to allocate a new one -+ * in this group(which will fail). we should -+ * keep the reservation window, just simply move on. -+ * -+ * Maybe we could shift the start block of the reservation -+ * window to the first block of next group. -+ */ -+ -+ if ((my_rsv->rsv_start <= group_end_block) && -+ (my_rsv->rsv_end > group_end_block) && -+ (start_block >= my_rsv->rsv_start)) -+ return -1; -+ -+ if ((my_rsv->rsv_alloc_hit > -+ (my_rsv->rsv_end - my_rsv->rsv_start + 1) / 2)) { -+ /* -+ * if the previously allocation hit ratio is -+ * greater than 1/2, then we double the size of -+ * the reservation window the next time, -+ * otherwise we keep the same size window -+ */ -+ size = size * 2; -+ if (size > EXT3COW_MAX_RESERVE_BLOCKS) -+ size = EXT3COW_MAX_RESERVE_BLOCKS; -+ my_rsv->rsv_goal_size= size; -+ } -+ } -+ -+ spin_lock(rsv_lock); -+ /* -+ * shift the search start to the window near the goal block -+ */ -+ search_head = search_reserve_window(fs_rsv_root, start_block); -+ -+ /* -+ * find_next_reservable_window() simply finds a reservable window -+ * inside the given range(start_block, group_end_block). -+ * -+ * To make sure the reservation window has a free bit inside it, we -+ * need to check the bitmap after we found a reservable window. -+ */ -+retry: -+ ret = find_next_reservable_window(search_head, my_rsv, sb, -+ start_block, group_end_block); -+ -+ if (ret == -1) { -+ if (!rsv_is_empty(&my_rsv->rsv_window)) -+ rsv_window_remove(sb, my_rsv); -+ spin_unlock(rsv_lock); -+ return -1; -+ } -+ -+ /* -+ * On success, find_next_reservable_window() returns the -+ * reservation window where there is a reservable space after it. -+ * Before we reserve this reservable space, we need -+ * to make sure there is at least a free block inside this region. -+ * -+ * searching the first free bit on the block bitmap and copy of -+ * last committed bitmap alternatively, until we found a allocatable -+ * block. Search start from the start block of the reservable space -+ * we just found. -+ */ -+ spin_unlock(rsv_lock); -+ first_free_block = bitmap_search_next_usable_block( -+ my_rsv->rsv_start - group_first_block, -+ bitmap_bh, group_end_block - group_first_block + 1); -+ -+ if (first_free_block < 0) { -+ /* -+ * no free block left on the bitmap, no point -+ * to reserve the space. return failed. -+ */ -+ spin_lock(rsv_lock); -+ if (!rsv_is_empty(&my_rsv->rsv_window)) -+ rsv_window_remove(sb, my_rsv); -+ spin_unlock(rsv_lock); -+ return -1; /* failed */ -+ } -+ -+ start_block = first_free_block + group_first_block; -+ /* -+ * check if the first free block is within the -+ * free space we just reserved -+ */ -+ if (start_block >= my_rsv->rsv_start && start_block <= my_rsv->rsv_end) -+ return 0; /* success */ -+ /* -+ * if the first free bit we found is out of the reservable space -+ * continue search for next reservable space, -+ * start from where the free block is, -+ * we also shift the list head to where we stopped last time -+ */ -+ search_head = my_rsv; -+ spin_lock(rsv_lock); -+ goto retry; -+} -+ -+/** -+ * try_to_extend_reservation() -+ * @my_rsv: given reservation window -+ * @sb: super block -+ * @size: the delta to extend -+ * -+ * Attempt to expand the reservation window large enough to have -+ * required number of free blocks -+ * -+ * Since ext3cow_try_to_allocate() will always allocate blocks within -+ * the reservation window range, if the window size is too small, -+ * multiple blocks allocation has to stop at the end of the reservation -+ * window. To make this more efficient, given the total number of -+ * blocks needed and the current size of the window, we try to -+ * expand the reservation window size if necessary on a best-effort -+ * basis before ext3cow_new_blocks() tries to allocate blocks, -+ */ -+static void try_to_extend_reservation(struct ext3cow_reserve_window_node *my_rsv, -+ struct super_block *sb, int size) -+{ -+ struct ext3cow_reserve_window_node *next_rsv; -+ struct rb_node *next; -+ spinlock_t *rsv_lock = &EXT3COW_SB(sb)->s_rsv_window_lock; -+ -+ if (!spin_trylock(rsv_lock)) -+ return; -+ -+ next = rb_next(&my_rsv->rsv_node); -+ -+ if (!next) -+ my_rsv->rsv_end += size; -+ else { -+ next_rsv = rb_entry(next, struct ext3cow_reserve_window_node, rsv_node); -+ -+ if ((next_rsv->rsv_start - my_rsv->rsv_end - 1) >= size) -+ my_rsv->rsv_end += size; -+ else -+ my_rsv->rsv_end = next_rsv->rsv_start - 1; -+ } -+ spin_unlock(rsv_lock); -+} -+ -+/** -+ * ext3cow_try_to_allocate_with_rsv() -+ * @sb: superblock -+ * @handle: handle to this transaction -+ * @group: given allocation block group -+ * @bitmap_bh: bufferhead holds the block bitmap -+ * @grp_goal: given target block within the group -+ * @count: target number of blocks to allocate -+ * @my_rsv: reservation window -+ * @errp: pointer to store the error code -+ * -+ * This is the main function used to allocate a new block and its reservation -+ * window. -+ * -+ * Each time when a new block allocation is need, first try to allocate from -+ * its own reservation. If it does not have a reservation window, instead of -+ * looking for a free bit on bitmap first, then look up the reservation list to -+ * see if it is inside somebody else's reservation window, we try to allocate a -+ * reservation window for it starting from the goal first. Then do the block -+ * allocation within the reservation window. -+ * -+ * This will avoid keeping on searching the reservation list again and -+ * again when somebody is looking for a free block (without -+ * reservation), and there are lots of free blocks, but they are all -+ * being reserved. -+ * -+ * We use a red-black tree for the per-filesystem reservation list. -+ * -+ */ -+static ext3cow_grpblk_t -+ext3cow_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle, -+ unsigned int group, struct buffer_head *bitmap_bh, -+ ext3cow_grpblk_t grp_goal, -+ struct ext3cow_reserve_window_node * my_rsv, -+ unsigned long *count, int *errp) -+{ -+ ext3cow_fsblk_t group_first_block, group_last_block; -+ ext3cow_grpblk_t ret = 0; -+ int fatal; -+ unsigned long num = *count; -+ -+ *errp = 0; -+ -+ /* -+ * Make sure we use undo access for the bitmap, because it is critical -+ * that we do the frozen_data COW on bitmap buffers in all cases even -+ * if the buffer is in BJ_Forget state in the committing transaction. -+ */ -+ BUFFER_TRACE(bitmap_bh, "get undo access for new block"); -+ fatal = ext3cow_journal_get_undo_access(handle, bitmap_bh); -+ if (fatal) { -+ *errp = fatal; -+ return -1; -+ } -+ -+ /* -+ * we don't deal with reservation when -+ * filesystem is mounted without reservation -+ * or the file is not a regular file -+ * or last attempt to allocate a block with reservation turned on failed -+ */ -+ if (my_rsv == NULL ) { -+ ret = ext3cow_try_to_allocate(sb, handle, group, bitmap_bh, -+ grp_goal, count, NULL); -+ goto out; -+ } -+ /* -+ * grp_goal is a group relative block number (if there is a goal) -+ * 0 <= grp_goal < EXT3COW_BLOCKS_PER_GROUP(sb) -+ * first block is a filesystem wide block number -+ * first block is the block number of the first block in this group -+ */ -+ group_first_block = ext3cow_group_first_block_no(sb, group); -+ group_last_block = group_first_block + (EXT3COW_BLOCKS_PER_GROUP(sb) - 1); -+ -+ /* -+ * Basically we will allocate a new block from inode's reservation -+ * window. -+ * -+ * We need to allocate a new reservation window, if: -+ * a) inode does not have a reservation window; or -+ * b) last attempt to allocate a block from existing reservation -+ * failed; or -+ * c) we come here with a goal and with a reservation window -+ * -+ * We do not need to allocate a new reservation window if we come here -+ * at the beginning with a goal and the goal is inside the window, or -+ * we don't have a goal but already have a reservation window. -+ * then we could go to allocate from the reservation window directly. -+ */ -+ while (1) { -+ if (rsv_is_empty(&my_rsv->rsv_window) || (ret < 0) || -+ !goal_in_my_reservation(&my_rsv->rsv_window, -+ grp_goal, group, sb)) { -+ if (my_rsv->rsv_goal_size < *count) -+ my_rsv->rsv_goal_size = *count; -+ ret = alloc_new_reservation(my_rsv, grp_goal, sb, -+ group, bitmap_bh); -+ if (ret < 0) -+ break; /* failed */ -+ -+ if (!goal_in_my_reservation(&my_rsv->rsv_window, -+ grp_goal, group, sb)) -+ grp_goal = -1; -+ } else if (grp_goal >= 0) { -+ int curr = my_rsv->rsv_end - -+ (grp_goal + group_first_block) + 1; -+ -+ if (curr < *count) -+ try_to_extend_reservation(my_rsv, sb, -+ *count - curr); -+ } -+ -+ if ((my_rsv->rsv_start > group_last_block) || -+ (my_rsv->rsv_end < group_first_block)) { -+ rsv_window_dump(&EXT3COW_SB(sb)->s_rsv_window_root, 1); -+ BUG(); -+ } -+ ret = ext3cow_try_to_allocate(sb, handle, group, bitmap_bh, -+ grp_goal, &num, &my_rsv->rsv_window); -+ if (ret >= 0) { -+ my_rsv->rsv_alloc_hit += num; -+ *count = num; -+ break; /* succeed */ -+ } -+ num = *count; -+ } -+out: -+ if (ret >= 0) { -+ BUFFER_TRACE(bitmap_bh, "journal_dirty_metadata for " -+ "bitmap block"); -+ fatal = ext3cow_journal_dirty_metadata(handle, bitmap_bh); -+ if (fatal) { -+ *errp = fatal; -+ return -1; -+ } -+ return ret; -+ } -+ -+ BUFFER_TRACE(bitmap_bh, "journal_release_buffer"); -+ ext3cow_journal_release_buffer(handle, bitmap_bh); -+ return ret; -+} -+ -+/** -+ * ext3cow_has_free_blocks() -+ * @sbi: in-core super block structure. -+ * -+ * Check if filesystem has at least 1 free block available for allocation. -+ */ -+static int ext3cow_has_free_blocks(struct ext3cow_sb_info *sbi) -+{ -+ ext3cow_fsblk_t free_blocks, root_blocks; -+ -+ free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); -+ root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count); -+ if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) && -+ sbi->s_resuid != current->fsuid && -+ (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) { -+ return 0; -+ } -+ return 1; -+} -+ -+/** -+ * ext3cow_should_retry_alloc() -+ * @sb: super block -+ * @retries number of attemps has been made -+ * -+ * ext3cow_should_retry_alloc() is called when ENOSPC is returned, and if -+ * it is profitable to retry the operation, this function will wait -+ * for the current or commiting transaction to complete, and then -+ * return TRUE. -+ * -+ * if the total number of retries exceed three times, return FALSE. -+ */ -+int ext3cow_should_retry_alloc(struct super_block *sb, int *retries) -+{ -+ if (!ext3cow_has_free_blocks(EXT3COW_SB(sb)) || (*retries)++ > 3) -+ return 0; -+ -+ jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id); -+ -+ return journal_force_commit_nested(EXT3COW_SB(sb)->s_journal); -+} -+ -+/** -+ * ext3cow_new_blocks() -- core block(s) allocation function -+ * @handle: handle to this transaction -+ * @inode: file inode -+ * @goal: given target block(filesystem wide) -+ * @count: target number of blocks to allocate -+ * @errp: error code -+ * -+ * ext3cow_new_blocks uses a goal block to assist allocation. It tries to -+ * allocate block(s) from the block group contains the goal block first. If that -+ * fails, it will try to allocate block(s) from other block groups without -+ * any specific goal block. -+ * -+ */ -+ext3cow_fsblk_t ext3cow_new_blocks(handle_t *handle, struct inode *inode, -+ ext3cow_fsblk_t goal, unsigned long *count, int *errp) -+{ -+ struct buffer_head *bitmap_bh = NULL; -+ struct buffer_head *gdp_bh; -+ int group_no; -+ int goal_group; -+ ext3cow_grpblk_t grp_target_blk; /* blockgroup relative goal block */ -+ ext3cow_grpblk_t grp_alloc_blk; /* blockgroup-relative allocated block*/ -+ ext3cow_fsblk_t ret_block; /* filesyetem-wide allocated block */ -+ int bgi; /* blockgroup iteration index */ -+ int fatal = 0, err; -+ int performed_allocation = 0; -+ ext3cow_grpblk_t free_blocks; /* number of free blocks in a group */ -+ struct super_block *sb; -+ struct ext3cow_group_desc *gdp; -+ struct ext3cow_super_block *es; -+ struct ext3cow_sb_info *sbi; -+ struct ext3cow_reserve_window_node *my_rsv = NULL; -+ struct ext3cow_block_alloc_info *block_i; -+ unsigned short windowsz = 0; -+#ifdef EXT3COWFS_DEBUG -+ static int goal_hits, goal_attempts; -+#endif -+ unsigned long ngroups; -+ unsigned long num = *count; -+ -+ *errp = -ENOSPC; -+ sb = inode->i_sb; -+ if (!sb) { -+ printk("ext3cow_new_block: nonexistent device"); -+ return 0; -+ } -+ -+ /* -+ * Check quota for allocation of this block. -+ */ -+ if (DQUOT_ALLOC_BLOCK(inode, num)) { -+ *errp = -EDQUOT; -+ return 0; -+ } -+ -+ sbi = EXT3COW_SB(sb); -+ es = EXT3COW_SB(sb)->s_es; -+ ext3cow_debug("goal=%lu.\n", goal); -+ /* -+ * Allocate a block from reservation only when -+ * filesystem is mounted with reservation(default,-o reservation), and -+ * it's a regular file, and -+ * the desired window size is greater than 0 (One could use ioctl -+ * command EXT3COW_IOC_SETRSVSZ to set the window size to 0 to turn off -+ * reservation on that particular file) -+ */ -+ block_i = EXT3COW_I(inode)->i_block_alloc_info; -+ if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0)) -+ my_rsv = &block_i->rsv_window_node; -+ -+ if (!ext3cow_has_free_blocks(sbi)) { -+ *errp = -ENOSPC; -+ goto out; -+ } -+ -+ /* -+ * First, test whether the goal block is free. -+ */ -+ if (goal < le32_to_cpu(es->s_first_data_block) || -+ goal >= le32_to_cpu(es->s_blocks_count)) -+ goal = le32_to_cpu(es->s_first_data_block); -+ group_no = (goal - le32_to_cpu(es->s_first_data_block)) / -+ EXT3COW_BLOCKS_PER_GROUP(sb); -+ goal_group = group_no; -+retry_alloc: -+ gdp = ext3cow_get_group_desc(sb, group_no, &gdp_bh); -+ if (!gdp) -+ goto io_error; -+ -+ free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); -+ /* -+ * if there is not enough free blocks to make a new resevation -+ * turn off reservation for this allocation -+ */ -+ if (my_rsv && (free_blocks < windowsz) -+ && (rsv_is_empty(&my_rsv->rsv_window))) -+ my_rsv = NULL; -+ -+ if (free_blocks > 0) { -+ grp_target_blk = ((goal - le32_to_cpu(es->s_first_data_block)) % -+ EXT3COW_BLOCKS_PER_GROUP(sb)); -+ bitmap_bh = read_block_bitmap(sb, group_no); -+ if (!bitmap_bh) -+ goto io_error; -+ grp_alloc_blk = ext3cow_try_to_allocate_with_rsv(sb, handle, -+ group_no, bitmap_bh, grp_target_blk, -+ my_rsv, &num, &fatal); -+ if (fatal) -+ goto out; -+ if (grp_alloc_blk >= 0) -+ goto allocated; -+ } -+ -+ ngroups = EXT3COW_SB(sb)->s_groups_count; -+ smp_rmb(); -+ -+ /* -+ * Now search the rest of the groups. We assume that -+ * i and gdp correctly point to the last group visited. -+ */ -+ for (bgi = 0; bgi < ngroups; bgi++) { -+ group_no++; -+ if (group_no >= ngroups) -+ group_no = 0; -+ gdp = ext3cow_get_group_desc(sb, group_no, &gdp_bh); -+ if (!gdp) -+ goto io_error; -+ free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); -+ /* -+ * skip this group if the number of -+ * free blocks is less than half of the reservation -+ * window size. -+ */ -+ if (free_blocks <= (windowsz/2)) -+ continue; -+ -+ brelse(bitmap_bh); -+ bitmap_bh = read_block_bitmap(sb, group_no); -+ if (!bitmap_bh) -+ goto io_error; -+ /* -+ * try to allocate block(s) from this group, without a goal(-1). -+ */ -+ grp_alloc_blk = ext3cow_try_to_allocate_with_rsv(sb, handle, -+ group_no, bitmap_bh, -1, my_rsv, -+ &num, &fatal); -+ if (fatal) -+ goto out; -+ if (grp_alloc_blk >= 0) -+ goto allocated; -+ } -+ /* -+ * We may end up a bogus ealier ENOSPC error due to -+ * filesystem is "full" of reservations, but -+ * there maybe indeed free blocks avaliable on disk -+ * In this case, we just forget about the reservations -+ * just do block allocation as without reservations. -+ */ -+ if (my_rsv) { -+ my_rsv = NULL; -+ windowsz = 0; -+ group_no = goal_group; -+ goto retry_alloc; -+ } -+ /* No space left on the device */ -+ *errp = -ENOSPC; -+ goto out; -+ -+allocated: -+ -+ ext3cow_debug("using block group %d(%d)\n", -+ group_no, gdp->bg_free_blocks_count); -+ -+ BUFFER_TRACE(gdp_bh, "get_write_access"); -+ fatal = ext3cow_journal_get_write_access(handle, gdp_bh); -+ if (fatal) -+ goto out; -+ -+ ret_block = grp_alloc_blk + ext3cow_group_first_block_no(sb, group_no); -+ -+ if (in_range(le32_to_cpu(gdp->bg_block_bitmap), ret_block, num) || -+ in_range(le32_to_cpu(gdp->bg_inode_bitmap), ret_block, num) || -+ in_range(ret_block, le32_to_cpu(gdp->bg_inode_table), -+ EXT3COW_SB(sb)->s_itb_per_group) || -+ in_range(ret_block + num - 1, le32_to_cpu(gdp->bg_inode_table), -+ EXT3COW_SB(sb)->s_itb_per_group)) -+ ext3cow_error(sb, "ext3cow_new_block", -+ "Allocating block in system zone - " -+ "blocks from "E3FSBLK", length %lu", -+ ret_block, num); -+ -+ performed_allocation = 1; -+ -+#ifdef CONFIG_JBD_DEBUG -+ { -+ struct buffer_head *debug_bh; -+ -+ /* Record bitmap buffer state in the newly allocated block */ -+ debug_bh = sb_find_get_block(sb, ret_block); -+ if (debug_bh) { -+ BUFFER_TRACE(debug_bh, "state when allocated"); -+ BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap state"); -+ brelse(debug_bh); -+ } -+ } -+ jbd_lock_bh_state(bitmap_bh); -+ spin_lock(sb_bgl_lock(sbi, group_no)); -+ if (buffer_jbd(bitmap_bh) && bh2jh(bitmap_bh)->b_committed_data) { -+ int i; -+ -+ for (i = 0; i < num; i++) { -+ if (ext3cow_test_bit(grp_alloc_blk+i, -+ bh2jh(bitmap_bh)->b_committed_data)) { -+ printk("%s: block was unexpectedly set in " -+ "b_committed_data\n", __FUNCTION__); -+ } -+ } -+ } -+ ext3cow_debug("found bit %d\n", grp_alloc_blk); -+ spin_unlock(sb_bgl_lock(sbi, group_no)); -+ jbd_unlock_bh_state(bitmap_bh); -+#endif -+ -+ if (ret_block + num - 1 >= le32_to_cpu(es->s_blocks_count)) { -+ ext3cow_error(sb, "ext3cow_new_block", -+ "block("E3FSBLK") >= blocks count(%d) - " -+ "block_group = %d, es == %p ", ret_block, -+ le32_to_cpu(es->s_blocks_count), group_no, es); -+ goto out; -+ } -+ -+ /* -+ * It is up to the caller to add the new buffer to a journal -+ * list of some description. We don't know in advance whether -+ * the caller wants to use it as metadata or data. -+ */ -+ ext3cow_debug("allocating block %lu. Goal hits %d of %d.\n", -+ ret_block, goal_hits, goal_attempts); -+ -+ spin_lock(sb_bgl_lock(sbi, group_no)); -+ gdp->bg_free_blocks_count = -+ cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)-num); -+ spin_unlock(sb_bgl_lock(sbi, group_no)); -+ percpu_counter_mod(&sbi->s_freeblocks_counter, -num); -+ -+ BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor"); -+ err = ext3cow_journal_dirty_metadata(handle, gdp_bh); -+ if (!fatal) -+ fatal = err; -+ -+ sb->s_dirt = 1; -+ if (fatal) -+ goto out; -+ -+ *errp = 0; -+ brelse(bitmap_bh); -+ DQUOT_FREE_BLOCK(inode, *count-num); -+ *count = num; -+ return ret_block; -+ -+io_error: -+ *errp = -EIO; -+out: -+ if (fatal) { -+ *errp = fatal; -+ ext3cow_std_error(sb, fatal); -+ } -+ /* -+ * Undo the block allocation -+ */ -+ if (!performed_allocation) -+ DQUOT_FREE_BLOCK(inode, *count); -+ brelse(bitmap_bh); -+ return 0; -+} -+ -+ext3cow_fsblk_t ext3cow_new_block(handle_t *handle, struct inode *inode, -+ ext3cow_fsblk_t goal, int *errp) -+{ -+ unsigned long count = 1; -+ -+ return ext3cow_new_blocks(handle, inode, goal, &count, errp); -+} -+ -+/** -+ * ext3cow_count_free_blocks() -- count filesystem free blocks -+ * @sb: superblock -+ * -+ * Adds up the number of free blocks from each block group. -+ */ -+ext3cow_fsblk_t ext3cow_count_free_blocks(struct super_block *sb) -+{ -+ ext3cow_fsblk_t desc_count; -+ struct ext3cow_group_desc *gdp; -+ int i; -+ unsigned long ngroups = EXT3COW_SB(sb)->s_groups_count; -+#ifdef EXT3COWFS_DEBUG -+ struct ext3cow_super_block *es; -+ ext3cow_fsblk_t bitmap_count; -+ unsigned long x; -+ struct buffer_head *bitmap_bh = NULL; -+ -+ es = EXT3COW_SB(sb)->s_es; -+ desc_count = 0; -+ bitmap_count = 0; -+ gdp = NULL; -+ -+ smp_rmb(); -+ for (i = 0; i < ngroups; i++) { -+ gdp = ext3cow_get_group_desc(sb, i, NULL); -+ if (!gdp) -+ continue; -+ desc_count += le16_to_cpu(gdp->bg_free_blocks_count); -+ brelse(bitmap_bh); -+ bitmap_bh = read_block_bitmap(sb, i); -+ if (bitmap_bh == NULL) -+ continue; -+ -+ x = ext3cow_count_free(bitmap_bh, sb->s_blocksize); -+ printk("group %d: stored = %d, counted = %lu\n", -+ i, le16_to_cpu(gdp->bg_free_blocks_count), x); -+ bitmap_count += x; -+ } -+ brelse(bitmap_bh); -+ printk("ext3cow_count_free_blocks: stored = "E3FSBLK -+ ", computed = "E3FSBLK", "E3FSBLK"\n", -+ le32_to_cpu(es->s_free_blocks_count), -+ desc_count, bitmap_count); -+ return bitmap_count; -+#else -+ desc_count = 0; -+ smp_rmb(); -+ for (i = 0; i < ngroups; i++) { -+ gdp = ext3cow_get_group_desc(sb, i, NULL); -+ if (!gdp) -+ continue; -+ desc_count += le16_to_cpu(gdp->bg_free_blocks_count); -+ } -+ -+ return desc_count; -+#endif -+} -+ -+static inline int -+block_in_use(ext3cow_fsblk_t block, struct super_block *sb, unsigned char *map) -+{ -+ return ext3cow_test_bit ((block - -+ le32_to_cpu(EXT3COW_SB(sb)->s_es->s_first_data_block)) % -+ EXT3COW_BLOCKS_PER_GROUP(sb), map); -+} -+ -+static inline int test_root(int a, int b) -+{ -+ int num = b; -+ -+ while (a > num) -+ num *= b; -+ return num == a; -+} -+ -+static int ext3cow_group_sparse(int group) -+{ -+ if (group <= 1) -+ return 1; -+ if (!(group & 1)) -+ return 0; -+ return (test_root(group, 7) || test_root(group, 5) || -+ test_root(group, 3)); -+} -+ -+/** -+ * ext3cow_bg_has_super - number of blocks used by the superblock in group -+ * @sb: superblock for filesystem -+ * @group: group number to check -+ * -+ * Return the number of blocks used by the superblock (primary or backup) -+ * in this group. Currently this will be only 0 or 1. -+ */ -+int ext3cow_bg_has_super(struct super_block *sb, int group) -+{ -+ if (EXT3COW_HAS_RO_COMPAT_FEATURE(sb, -+ EXT3COW_FEATURE_RO_COMPAT_SPARSE_SUPER) && -+ !ext3cow_group_sparse(group)) -+ return 0; -+ return 1; -+} -+ -+static unsigned long ext3cow_bg_num_gdb_meta(struct super_block *sb, int group) -+{ -+ unsigned long metagroup = group / EXT3COW_DESC_PER_BLOCK(sb); -+ unsigned long first = metagroup * EXT3COW_DESC_PER_BLOCK(sb); -+ unsigned long last = first + EXT3COW_DESC_PER_BLOCK(sb) - 1; -+ -+ if (group == first || group == first + 1 || group == last) -+ return 1; -+ return 0; -+} -+ -+static unsigned long ext3cow_bg_num_gdb_nometa(struct super_block *sb, int group) -+{ -+ if (EXT3COW_HAS_RO_COMPAT_FEATURE(sb, -+ EXT3COW_FEATURE_RO_COMPAT_SPARSE_SUPER) && -+ !ext3cow_group_sparse(group)) -+ return 0; -+ return EXT3COW_SB(sb)->s_gdb_count; -+} -+ -+/** -+ * ext3cow_bg_num_gdb - number of blocks used by the group table in group -+ * @sb: superblock for filesystem -+ * @group: group number to check -+ * -+ * Return the number of blocks used by the group descriptor table -+ * (primary or backup) in this group. In the future there may be a -+ * different number of descriptor blocks in each group. -+ */ -+unsigned long ext3cow_bg_num_gdb(struct super_block *sb, int group) -+{ -+ unsigned long first_meta_bg = -+ le32_to_cpu(EXT3COW_SB(sb)->s_es->s_first_meta_bg); -+ unsigned long metagroup = group / EXT3COW_DESC_PER_BLOCK(sb); -+ -+ if (!EXT3COW_HAS_INCOMPAT_FEATURE(sb,EXT3COW_FEATURE_INCOMPAT_META_BG) || -+ metagroup < first_meta_bg) -+ return ext3cow_bg_num_gdb_nometa(sb,group); -+ -+ return ext3cow_bg_num_gdb_meta(sb,group); -+ -+} -diff -Naur linux-2.6.21.7/fs/ext3cow/bitmap.c linux-2.6.21.7_ext3cowPatched/fs/ext3cow/bitmap.c ---- linux-2.6.21.7/fs/ext3cow/bitmap.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.21.7_ext3cowPatched/fs/ext3cow/bitmap.c 2007-10-23 17:47:18.000000000 +0200 -@@ -0,0 +1,32 @@ -+/* -+ * linux/fs/ext3/bitmap.c -+ * -+ * Copyright (C) 1992, 1993, 1994, 1995 -+ * Remy Card (card@masi.ibp.fr) -+ * Laboratoire MASI - Institut Blaise Pascal -+ * Universite Pierre et Marie Curie (Paris VI) -+ */ -+ -+#include -+#include -+#include -+ -+#ifdef EXT3COWFS_DEBUG -+ -+static int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0}; -+ -+unsigned long ext3cow_count_free (struct buffer_head * map, unsigned int numchars) -+{ -+ unsigned int i; -+ unsigned long sum = 0; -+ -+ if (!map) -+ return (0); -+ for (i = 0; i < numchars; i++) -+ sum += nibblemap[map->b_data[i] & 0xf] + -+ nibblemap[(map->b_data[i] >> 4) & 0xf]; -+ return (sum); -+} -+ -+#endif /* EXT3COWFS_DEBUG */ -+ -diff -Naur linux-2.6.21.7/fs/ext3cow/CHANGELOG linux-2.6.21.7_ext3cowPatched/fs/ext3cow/CHANGELOG ---- linux-2.6.21.7/fs/ext3cow/CHANGELOG 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.21.7_ext3cowPatched/fs/ext3cow/CHANGELOG 2007-10-23 17:47:18.000000000 +0200 -@@ -0,0 +1,7 @@ -+6-20-7 -+- Finished the roll code for inode chains in case of error. -+ -+6-18-07 -+- Added support for 32-bit uid's and gid's back in again -+- Took out support for block fragmentation -+- Hopefully fixed the non-sticking uid/gid bug. -\ No newline at end of file -diff -Naur linux-2.6.21.7/fs/ext3cow/dir.c linux-2.6.21.7_ext3cowPatched/fs/ext3cow/dir.c ---- linux-2.6.21.7/fs/ext3cow/dir.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.21.7_ext3cowPatched/fs/ext3cow/dir.c 2007-10-23 17:47:18.000000000 +0200 -@@ -0,0 +1,732 @@ -+/* -+ * linux/fs/ext3cow/dir.c -+ * -+ * Copyright (C) 1992, 1993, 1994, 1995 -+ * Remy Card (card@masi.ibp.fr) -+ * Laboratoire MASI - Institut Blaise Pascal -+ * Universite Pierre et Marie Curie (Paris VI) -+ * -+ * from -+ * -+ * linux/fs/minix/dir.c -+ * -+ * Copyright (C) 1991, 1992 Linus Torvalds -+ * -+ * ext3cow directory handling functions -+ * -+ * Big-endian to little-endian byte-swapping/bitmaps by -+ * David S. Miller (davem@caip.rutgers.edu), 1995 -+ * -+ * Hash Tree Directory indexing (c) 2001 Daniel Phillips -+ * -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+static unsigned char ext3cow_filetype_table[] = { -+ DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK -+}; -+ -+static int ext3cow_readdir(struct file *, void *, filldir_t); -+static int ext3cow_dx_readdir(struct file * filp, -+ void * dirent, filldir_t filldir); -+static int ext3cow_release_dir (struct inode * inode, -+ struct file * filp); -+ -+const struct file_operations ext3cow_dir_operations = { -+ .llseek = generic_file_llseek, -+ .read = generic_read_dir, -+ .readdir = ext3cow_readdir, /* we take BKL. needed?*/ -+ .ioctl = ext3cow_ioctl, /* BKL held */ -+#ifdef CONFIG_COMPAT -+ .compat_ioctl = ext3cow_compat_ioctl, -+#endif -+ .fsync = ext3cow_sync_file, /* BKL held */ -+#ifdef CONFIG_EXT3COW_INDEX -+ .release = ext3cow_release_dir, -+#endif -+}; -+ -+ -+static unsigned char get_dtype(struct super_block *sb, int filetype) -+{ -+ if (!EXT3COW_HAS_INCOMPAT_FEATURE(sb, EXT3COW_FEATURE_INCOMPAT_FILETYPE) || -+ (filetype >= EXT3COW_FT_MAX)) -+ return DT_UNKNOWN; -+ -+ return (ext3cow_filetype_table[filetype]); -+} -+ -+static int ext3cow_readversions(struct file * filp, void * dirent, -+ filldir_t filldir) -+{ -+ int error = 0; -+ unsigned long offset; -+ int i, stored; -+ struct buffer_head *bh; -+ struct ext3cow_dir_entry_2 * de; -+ struct super_block * sb; -+ int err; -+ struct inode *dir = filp->f_dentry->d_inode; -+ char *at; -+ unsigned long ino; -+ int ref_len = filp->f_dentry->d_name.len -1; -+ -+ sb = dir->i_sb; -+ -+ stored = 0; -+ bh = NULL; -+ offset = filp->f_pos & (sb->s_blocksize - 1); -+ -+ at = strrchr(filp->f_dentry->d_name.name, EXT3COW_FLUX_TOKEN); -+ -+ while (!error && !stored && filp->f_pos < dir->i_size) { -+ unsigned long blk = (filp->f_pos) >> EXT3COW_BLOCK_SIZE_BITS(sb); -+ struct buffer_head map_bh; -+ -+ bh = NULL; -+ map_bh.b_state = 0; -+ err = ext3cow_get_blocks_handle(NULL, dir, blk, 1, -+ &map_bh, 0, 0); -+ if (err > 0) { -+ page_cache_readahead(sb->s_bdev->bd_inode->i_mapping, -+ &filp->f_ra, -+ filp, -+ map_bh.b_blocknr >> -+ (PAGE_CACHE_SHIFT - dir->i_blkbits), -+ 1); -+ bh = ext3cow_bread(NULL, dir, blk, 0, &err); -+ } -+ -+ /* -+ * We ignore I/O errors on directories so users have a chance -+ * of recovering data when there's a bad sector -+ */ -+ if (!bh) { -+ ext3cow_error (sb, "ext3cow_readdir", -+ "directory #%lu contains a hole at offset %lu", -+ dir->i_ino, (unsigned long)filp->f_pos); -+ /* corrupt size? Maybe no more blocks to read */ -+ if (filp->f_pos > dir->i_blocks << 9) -+ break; -+ filp->f_pos += sb->s_blocksize - offset; -+ continue; -+ } -+ -+ ver_revalidate: -+ /* If the dir block has changed since the last call to -+ * readdir(2), then we might be pointing to an invalid -+ * dirent right now. Scan from the start of the block -+ * to make sure. */ -+ if (filp->f_version != dir->i_version) { -+ for (i = 0; i < sb->s_blocksize && i < offset; ) { -+ de = (struct ext3cow_dir_entry_2 *) -+ (bh->b_data + i); -+ /* It's too expensive to do a full -+ * dirent test each time round this -+ * loop, but we do have to test at -+ * least that it is non-zero. A -+ * failure will be detected in the -+ * dirent test below. */ -+ if (le16_to_cpu(de->rec_len) < -+ EXT3COW_DIR_REC_LEN(1)) -+ break; -+ i += le16_to_cpu(de->rec_len); -+ } -+ offset = i; -+ filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1)) -+ | offset; -+ filp->f_version = dir->i_version; -+ } -+ -+ while (!error && filp->f_pos < dir->i_size -+ && offset < sb->s_blocksize) { -+ de = (struct ext3cow_dir_entry_2 *) (bh->b_data + offset); -+ if (!ext3cow_check_dir_entry ("ext3cow_readdir", dir, de, -+ bh, offset)) { -+ /* On error, skip the f_pos to the -+ next block. */ -+ filp->f_pos = (filp->f_pos | -+ (sb->s_blocksize - 1)) + 1; -+ brelse (bh); -+ return stored; -+ } -+ offset += le16_to_cpu(de->rec_len); -+ -+ if (le32_to_cpu(de->inode)){ -+ unsigned long version = filp->f_version; -+ unsigned char d_type = DT_UNKNOWN; -+ -+ /* We might block in the next section -+ * if the data destination is -+ * currently swapped out. So, use a -+ * version stamp to detect whether or -+ * not the directory has been modified -+ * during the copy operation. -+ */ -+ -+ if (EXT3COW_HAS_INCOMPAT_FEATURE(sb, -+ EXT3COW_FEATURE_INCOMPAT_FILETYPE) -+ && de->file_type < EXT3COW_FT_MAX) -+ d_type = -+ ext3cow_filetype_table[de->file_type]; -+ if (de->name_len == ref_len -+ && strncmp(filp->f_dentry->d_name.name, de->name, ref_len)==0) { -+ -+ struct inode * inde; -+ char * name; -+ -+ name = kmalloc(EXT3COW_NAME_LEN, GFP_KERNEL); -+ strncpy(name, de->name, de->name_len); -+ inde = iget(dir->i_sb, de->inode); -+ -+ if (de->death_epoch!=0 && de->birth_epoch!=de->death_epoch) { -+ name[de->name_len]='\0'; -+ sprintf(name,"%s@%d",name, de->death_epoch); -+ error = filldir(dirent, name, -+ strlen(name), -+ filp->f_pos, -+ le32_to_cpu(inde->i_ino), -+ d_type); -+ stored++; -+ } -+ -+ while (EXT3COW_I(inde)->i_next_inode!=0) { -+ name[de->name_len]='\0'; -+ sprintf(name,"%s@%d",name, EXT3COW_I_EPOCHNUMBER(inde)); -+ error = filldir(dirent, name, -+ strlen(name), -+ filp->f_pos, -+ le32_to_cpu(inde->i_ino), -+ d_type); -+ ino = EXT3COW_I(inde)->i_next_inode; -+ iput(inde); -+ inde = iget(dir->i_sb, ino); -+ stored++; -+ } -+ -+ kfree(name); -+ iput(inde); -+ -+ if (error) -+ break; -+ -+ if (!stored && -+ EXT3COW_IS_DIRENT_SCOPED(de, EXT3COW_I_EPOCHNUMBER(dir))) { -+ error = filldir(dirent, de->name, -+ de->name_len, -+ filp->f_pos, -+ le32_to_cpu(de->inode), -+ d_type); -+ } -+ -+ if (error) -+ break; -+ if (version != filp->f_version) -+ goto ver_revalidate; -+ stored ++; -+ } -+ } -+ -+ filp->f_pos += le16_to_cpu(de->rec_len); -+ } -+ offset = 0; -+ brelse (bh); -+ } -+ return 0; -+} -+ -+ -+int ext3cow_check_dir_entry (const char * function, struct inode * dir, -+ struct ext3cow_dir_entry_2 * de, -+ struct buffer_head * bh, -+ unsigned long offset) -+{ -+ const char * error_msg = NULL; -+ const int rlen = le16_to_cpu(de->rec_len); -+ unsigned int current_epoch = EXT3COW_S_EPOCHNUMBER(dir->i_sb); -+ -+ if (rlen < EXT3COW_DIR_REC_LEN(1)) -+ error_msg = "rec_len is smaller than minimal"; -+ else if (rlen % 4 != 0) -+ error_msg = "rec_len % 4 != 0"; -+ else if (rlen < EXT3COW_DIR_REC_LEN(de->name_len)) -+ error_msg = "rec_len is too small for name_len"; -+ else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize) -+ error_msg = "directory entry across blocks"; -+ else if (le32_to_cpu(de->inode) > -+ le32_to_cpu(EXT3COW_SB(dir->i_sb)->s_es->s_inodes_count)) -+ error_msg = "inode out of bounds"; -+ /* Some bounds on versioned entries -znjp*/ -+ else if (le32_to_cpu(de->death_epoch) != EXT3COW_DIRENT_ALIVE && -+ le32_to_cpu(de->birth_epoch) > le32_to_cpu(de->death_epoch)) -+ error_msg = "entry died before it was born"; -+ else if (le32_to_cpu(de->birth_epoch) > current_epoch) -+ error_msg = "entry was born in the future"; -+ else if (le32_to_cpu(de->death_epoch) > current_epoch) -+ error_msg = "entry has already died in the future"; -+ -+ if (error_msg != NULL) -+ ext3cow_error (dir->i_sb, function, -+ "bad entry in directory #%lu: %s - " -+ "offset=%lu, inode=%lu, rec_len=%d, name_len=%d, " -+ "birth_epoch=%d death_epoch=%d", -+ dir->i_ino, error_msg, offset, -+ (unsigned long) le32_to_cpu(de->inode), -+ rlen, de->name_len, de->birth_epoch, de->death_epoch); -+ return error_msg == NULL ? 1 : 0; -+} -+ -+static int ext3cow_readdir(struct file * filp, -+ void * dirent, filldir_t filldir) -+{ -+ int error = 0; -+ unsigned long offset; -+ int i, stored; -+ struct ext3cow_dir_entry_2 *de; -+ struct super_block *sb; -+ int err; -+ struct inode *inode = filp->f_path.dentry->d_inode; -+ int ret = 0; -+ -+ /* is this a version listing? */ -+ if (filp->f_dentry->d_name.name[filp->f_dentry->d_name.len-1] == -+ EXT3COW_FLUX_TOKEN) -+ return ext3cow_readversions(filp, dirent, filldir); -+ -+ sb = inode->i_sb; -+ -+#ifdef CONFIG_EXT3COW_INDEX -+ if (EXT3COW_HAS_COMPAT_FEATURE(inode->i_sb, -+ EXT3COW_FEATURE_COMPAT_DIR_INDEX) && -+ ((EXT3COW_I(inode)->i_flags & EXT3COW_INDEX_FL) || -+ ((inode->i_size >> sb->s_blocksize_bits) == 1))) { -+ -+ err = ext3cow_dx_readdir(filp, dirent, filldir); -+ if (err != ERR_BAD_DX_DIR) { -+ ret = err; -+ goto out; -+ } -+ /* -+ * We don't set the inode dirty flag since it's not -+ * critical that it get flushed back to the disk. -+ */ -+ EXT3COW_I(filp->f_path.dentry->d_inode)->i_flags &= ~EXT3COW_INDEX_FL; -+ } -+#endif -+ stored = 0; -+ offset = filp->f_pos & (sb->s_blocksize - 1); -+ -+ while (!error && !stored && filp->f_pos < inode->i_size) { -+ unsigned long blk = filp->f_pos >> EXT3COW_BLOCK_SIZE_BITS(sb); -+ struct buffer_head map_bh; -+ struct buffer_head *bh = NULL; -+ -+ map_bh.b_state = 0; -+ err = ext3cow_get_blocks_handle(NULL, inode, blk, 1, -+ &map_bh, 0, 0); -+ if (err > 0) { -+ page_cache_readahead(sb->s_bdev->bd_inode->i_mapping, -+ &filp->f_ra, -+ filp, -+ map_bh.b_blocknr >> -+ (PAGE_CACHE_SHIFT - inode->i_blkbits), -+ 1); -+ bh = ext3cow_bread(NULL, inode, blk, 0, &err); -+ } -+ -+ /* -+ * We ignore I/O errors on directories so users have a chance -+ * of recovering data when there's a bad sector -+ */ -+ if (!bh) { -+ ext3cow_error (sb, "ext3cow_readdir", -+ "directory #%lu contains a hole at offset %lu", -+ inode->i_ino, (unsigned long)filp->f_pos); -+ /* corrupt size? Maybe no more blocks to read */ -+ if (filp->f_pos > inode->i_blocks << 9) -+ break; -+ filp->f_pos += sb->s_blocksize - offset; -+ continue; -+ } -+ -+revalidate: -+ /* If the dir block has changed since the last call to -+ * readdir(2), then we might be pointing to an invalid -+ * dirent right now. Scan from the start of the block -+ * to make sure. */ -+ if (filp->f_version != inode->i_version) { -+ for (i = 0; i < sb->s_blocksize && i < offset; ) { -+ de = (struct ext3cow_dir_entry_2 *) -+ (bh->b_data + i); -+ /* It's too expensive to do a full -+ * dirent test each time round this -+ * loop, but we do have to test at -+ * least that it is non-zero. A -+ * failure will be detected in the -+ * dirent test below. */ -+ if (le16_to_cpu(de->rec_len) < -+ EXT3COW_DIR_REC_LEN(1)) -+ break; -+ i += le16_to_cpu(de->rec_len); -+ } -+ offset = i; -+ filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1)) -+ | offset; -+ filp->f_version = inode->i_version; -+ } -+ -+ while (!error && filp->f_pos < inode->i_size -+ && offset < sb->s_blocksize) { -+ de = (struct ext3cow_dir_entry_2 *) (bh->b_data + offset); -+ if (!ext3cow_check_dir_entry ("ext3cow_readdir", inode, de, -+ bh, offset)) { -+ /* On error, skip the f_pos to the -+ next block. */ -+ filp->f_pos = (filp->f_pos | -+ (sb->s_blocksize - 1)) + 1; -+ brelse (bh); -+ ret = stored; -+ goto out; -+ } -+ offset += le16_to_cpu(de->rec_len); -+ /* -+ printk("Inode %ld Epoch number %u: is -+ dir %d -> %s be %d de %d scoped? %d\n", -+ dir->i_ino, -+ EXT3COW_I_EPOCHNUMBER(dir), -+ de->inode, -+ de->name, -+ de->birth_epoch, -+ de->death_epoch, -+ EXT3COW_IS_DIRENT_SCOPED(de, EXT3COW_I_EPOCHNUMBER(dir))); -+ */ -+ -+ /* Only add scoped dirents - znjp */ -+ if (le32_to_cpu(de->inode) && -+ EXT3COW_IS_DIRENT_SCOPED(de, EXT3COW_I_EPOCHNUMBER(inode))) { -+ /* We might block in the next section -+ * if the data destination is -+ * currently swapped out. So, use a -+ * version stamp to detect whether or -+ * not the directory has been modified -+ * during the copy operation. -+ */ -+ unsigned long version = filp->f_version; -+ -+ error = filldir(dirent, de->name, -+ de->name_len, -+ filp->f_pos, -+ le32_to_cpu(de->inode), -+ get_dtype(sb, de->file_type)); -+ if (error) -+ break; -+ if (version != filp->f_version) -+ goto revalidate; -+ stored ++; -+ } -+ filp->f_pos += le16_to_cpu(de->rec_len); -+ } -+ offset = 0; -+ brelse (bh); -+ } -+out: -+ return ret; -+} -+ -+#ifdef CONFIG_EXT3COW_INDEX -+/* -+ * These functions convert from the major/minor hash to an f_pos -+ * value. -+ * -+ * Currently we only use major hash numer. This is unfortunate, but -+ * on 32-bit machines, the same VFS interface is used for lseek and -+ * llseek, so if we use the 64 bit offset, then the 32-bit versions of -+ * lseek/telldir/seekdir will blow out spectacularly, and from within -+ * the ext2 low-level routine, we don't know if we're being called by -+ * a 64-bit version of the system call or the 32-bit version of the -+ * system call. Worse yet, NFSv2 only allows for a 32-bit readdir -+ * cookie. Sigh. -+ */ -+#define hash2pos(major, minor) (major >> 1) -+#define pos2maj_hash(pos) ((pos << 1) & 0xffffffff) -+#define pos2min_hash(pos) (0) -+ -+/* -+ * This structure holds the nodes of the red-black tree used to store -+ * the directory entry in hash order. -+ */ -+struct fname { -+ __u32 hash; -+ __u32 minor_hash; -+ struct rb_node rb_hash; -+ struct fname *next; -+ __u32 inode; -+ __u8 name_len; -+ __u8 file_type; -+ char name[0]; -+}; -+ -+/* -+ * This functoin implements a non-recursive way of freeing all of the -+ * nodes in the red-black tree. -+ */ -+static void free_rb_tree_fname(struct rb_root *root) -+{ -+ struct rb_node *n = root->rb_node; -+ struct rb_node *parent; -+ struct fname *fname; -+ -+ while (n) { -+ /* Do the node's children first */ -+ if ((n)->rb_left) { -+ n = n->rb_left; -+ continue; -+ } -+ if (n->rb_right) { -+ n = n->rb_right; -+ continue; -+ } -+ /* -+ * The node has no children; free it, and then zero -+ * out parent's link to it. Finally go to the -+ * beginning of the loop and try to free the parent -+ * node. -+ */ -+ parent = rb_parent(n); -+ fname = rb_entry(n, struct fname, rb_hash); -+ while (fname) { -+ struct fname * old = fname; -+ fname = fname->next; -+ kfree (old); -+ } -+ if (!parent) -+ root->rb_node = NULL; -+ else if (parent->rb_left == n) -+ parent->rb_left = NULL; -+ else if (parent->rb_right == n) -+ parent->rb_right = NULL; -+ n = parent; -+ } -+ root->rb_node = NULL; -+} -+ -+ -+static struct dir_private_info *create_dir_info(loff_t pos) -+{ -+ struct dir_private_info *p; -+ -+ p = kmalloc(sizeof(struct dir_private_info), GFP_KERNEL); -+ if (!p) -+ return NULL; -+ p->root.rb_node = NULL; -+ p->curr_node = NULL; -+ p->extra_fname = NULL; -+ p->last_pos = 0; -+ p->curr_hash = pos2maj_hash(pos); -+ p->curr_minor_hash = pos2min_hash(pos); -+ p->next_hash = 0; -+ return p; -+} -+ -+void ext3cow_htree_free_dir_info(struct dir_private_info *p) -+{ -+ free_rb_tree_fname(&p->root); -+ kfree(p); -+} -+ -+/* -+ * Given a directory entry, enter it into the fname rb tree. -+ */ -+int ext3cow_htree_store_dirent(struct file *dir_file, __u32 hash, -+ __u32 minor_hash, -+ struct ext3cow_dir_entry_2 *dirent) -+{ -+ struct rb_node **p, *parent = NULL; -+ struct fname * fname, *new_fn; -+ struct dir_private_info *info; -+ int len; -+ -+ info = (struct dir_private_info *) dir_file->private_data; -+ p = &info->root.rb_node; -+ -+ /* Create and allocate the fname structure */ -+ len = sizeof(struct fname) + dirent->name_len + 1; -+ new_fn = kzalloc(len, GFP_KERNEL); -+ if (!new_fn) -+ return -ENOMEM; -+ new_fn->hash = hash; -+ new_fn->minor_hash = minor_hash; -+ new_fn->inode = le32_to_cpu(dirent->inode); -+ new_fn->name_len = dirent->name_len; -+ new_fn->file_type = dirent->file_type; -+ memcpy(new_fn->name, dirent->name, dirent->name_len); -+ new_fn->name[dirent->name_len] = 0; -+ -+ while (*p) { -+ parent = *p; -+ fname = rb_entry(parent, struct fname, rb_hash); -+ -+ /* -+ * If the hash and minor hash match up, then we put -+ * them on a linked list. This rarely happens... -+ */ -+ if ((new_fn->hash == fname->hash) && -+ (new_fn->minor_hash == fname->minor_hash)) { -+ new_fn->next = fname->next; -+ fname->next = new_fn; -+ return 0; -+ } -+ -+ if (new_fn->hash < fname->hash) -+ p = &(*p)->rb_left; -+ else if (new_fn->hash > fname->hash) -+ p = &(*p)->rb_right; -+ else if (new_fn->minor_hash < fname->minor_hash) -+ p = &(*p)->rb_left; -+ else /* if (new_fn->minor_hash > fname->minor_hash) */ -+ p = &(*p)->rb_right; -+ } -+ -+ rb_link_node(&new_fn->rb_hash, parent, p); -+ rb_insert_color(&new_fn->rb_hash, &info->root); -+ return 0; -+} -+ -+ -+ -+/* -+ * This is a helper function for ext3cow_dx_readdir. It calls filldir -+ * for all entres on the fname linked list. (Normally there is only -+ * one entry on the linked list, unless there are 62 bit hash collisions.) -+ */ -+static int call_filldir(struct file * filp, void * dirent, -+ filldir_t filldir, struct fname *fname) -+{ -+ struct dir_private_info *info = filp->private_data; -+ loff_t curr_pos; -+ struct inode *inode = filp->f_path.dentry->d_inode; -+ struct super_block * sb; -+ int error; -+ -+ sb = inode->i_sb; -+ -+ printk(KERN_INFO, "Got %s\n", filp->f_path.dentry->d_name.name); -+ -+ if (!fname) { -+ printk("call_filldir: called with null fname?!?\n"); -+ return 0; -+ } -+ curr_pos = hash2pos(fname->hash, fname->minor_hash); -+ while (fname) { -+ error = filldir(dirent, fname->name, -+ fname->name_len, curr_pos, -+ fname->inode, -+ get_dtype(sb, fname->file_type)); -+ if (error) { -+ filp->f_pos = curr_pos; -+ info->extra_fname = fname->next; -+ return error; -+ } -+ fname = fname->next; -+ } -+ return 0; -+} -+ -+static int ext3cow_dx_readdir(struct file * filp, -+ void * dirent, filldir_t filldir) -+{ -+ struct dir_private_info *info = filp->private_data; -+ struct inode *inode = filp->f_path.dentry->d_inode; -+ struct fname *fname; -+ int ret; -+ -+ if (!info) { -+ info = create_dir_info(filp->f_pos); -+ if (!info) -+ return -ENOMEM; -+ filp->private_data = info; -+ } -+ -+ if (filp->f_pos == EXT3COW_HTREE_EOF) -+ return 0; /* EOF */ -+ -+ /* Some one has messed with f_pos; reset the world */ -+ if (info->last_pos != filp->f_pos) { -+ free_rb_tree_fname(&info->root); -+ info->curr_node = NULL; -+ info->extra_fname = NULL; -+ info->curr_hash = pos2maj_hash(filp->f_pos); -+ info->curr_minor_hash = pos2min_hash(filp->f_pos); -+ } -+ -+ /* -+ * If there are any leftover names on the hash collision -+ * chain, return them first. -+ */ -+ if (info->extra_fname && -+ call_filldir(filp, dirent, filldir, info->extra_fname)) -+ goto finished; -+ -+ if (!info->curr_node) -+ info->curr_node = rb_first(&info->root); -+ -+ while (1) { -+ /* -+ * Fill the rbtree if we have no more entries, -+ * or the inode has changed since we last read in the -+ * cached entries. -+ */ -+ if ((!info->curr_node) || -+ (filp->f_version != inode->i_version)) { -+ info->curr_node = NULL; -+ free_rb_tree_fname(&info->root); -+ filp->f_version = inode->i_version; -+ ret = ext3cow_htree_fill_tree(filp, info->curr_hash, -+ info->curr_minor_hash, -+ &info->next_hash); -+ if (ret < 0) -+ return ret; -+ if (ret == 0) { -+ filp->f_pos = EXT3COW_HTREE_EOF; -+ break; -+ } -+ info->curr_node = rb_first(&info->root); -+ } -+ -+ fname = rb_entry(info->curr_node, struct fname, rb_hash); -+ info->curr_hash = fname->hash; -+ info->curr_minor_hash = fname->minor_hash; -+ if (call_filldir(filp, dirent, filldir, fname)) -+ break; -+ -+ info->curr_node = rb_next(info->curr_node); -+ if (!info->curr_node) { -+ if (info->next_hash == ~0) { -+ filp->f_pos = EXT3COW_HTREE_EOF; -+ break; -+ } -+ info->curr_hash = info->next_hash; -+ info->curr_minor_hash = 0; -+ } -+ } -+finished: -+ info->last_pos = filp->f_pos; -+ return 0; -+} -+ -+static int ext3cow_release_dir (struct inode * inode, struct file * filp) -+{ -+ if (filp->private_data) -+ ext3cow_htree_free_dir_info(filp->private_data); -+ -+ return 0; -+} -+ -+#endif -diff -Naur linux-2.6.21.7/fs/ext3cow/ext3cow_jbd.c linux-2.6.21.7_ext3cowPatched/fs/ext3cow/ext3cow_jbd.c ---- linux-2.6.21.7/fs/ext3cow/ext3cow_jbd.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.21.7_ext3cowPatched/fs/ext3cow/ext3cow_jbd.c 2007-10-23 17:47:18.000000000 +0200 -@@ -0,0 +1,59 @@ -+/* -+ * Interface between ext3cow and JBD -+ */ -+ -+#include -+ -+int __ext3cow_journal_get_undo_access(const char *where, handle_t *handle, -+ struct buffer_head *bh) -+{ -+ int err = journal_get_undo_access(handle, bh); -+ if (err) -+ ext3cow_journal_abort_handle(where, __FUNCTION__, bh, handle,err); -+ return err; -+} -+ -+int __ext3cow_journal_get_write_access(const char *where, handle_t *handle, -+ struct buffer_head *bh) -+{ -+ int err = journal_get_write_access(handle, bh); -+ if (err) -+ ext3cow_journal_abort_handle(where, __FUNCTION__, bh, handle,err); -+ return err; -+} -+ -+int __ext3cow_journal_forget(const char *where, handle_t *handle, -+ struct buffer_head *bh) -+{ -+ int err = journal_forget(handle, bh); -+ if (err) -+ ext3cow_journal_abort_handle(where, __FUNCTION__, bh, handle,err); -+ return err; -+} -+ -+int __ext3cow_journal_revoke(const char *where, handle_t *handle, -+ unsigned long blocknr, struct buffer_head *bh) -+{ -+ int err = journal_revoke(handle, blocknr, bh); -+ if (err) -+ ext3cow_journal_abort_handle(where, __FUNCTION__, bh, handle,err); -+ return err; -+} -+ -+int __ext3cow_journal_get_create_access(const char *where, -+ handle_t *handle, struct buffer_head *bh) -+{ -+ int err = journal_get_create_access(handle, bh); -+ if (err) -+ ext3cow_journal_abort_handle(where, __FUNCTION__, bh, handle,err); -+ return err; -+} -+ -+int __ext3cow_journal_dirty_metadata(const char *where, -+ handle_t *handle, struct buffer_head *bh) -+{ -+ int err = journal_dirty_metadata(handle, bh); -+ if (err) -+ ext3cow_journal_abort_handle(where, __FUNCTION__, bh, handle,err); -+ return err; -+} -diff -Naur linux-2.6.21.7/fs/ext3cow/file.c linux-2.6.21.7_ext3cowPatched/fs/ext3cow/file.c ---- linux-2.6.21.7/fs/ext3cow/file.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.21.7_ext3cowPatched/fs/ext3cow/file.c 2007-10-23 17:47:18.000000000 +0200 -@@ -0,0 +1,147 @@ -+/* -+ * linux/fs/ext3cow/file.c -+ * -+ * Copyright (C) 1992, 1993, 1994, 1995 -+ * Remy Card (card@masi.ibp.fr) -+ * Laboratoire MASI - Institut Blaise Pascal -+ * Universite Pierre et Marie Curie (Paris VI) -+ * -+ * from -+ * -+ * linux/fs/minix/file.c -+ * -+ * Copyright (C) 1991, 1992 Linus Torvalds -+ * -+ * ext3cow fs regular file handling primitives -+ * -+ * 64-bit file support on 64-bit platforms by Jakub Jelinek -+ * (jj@sunsite.ms.mff.cuni.cz) -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include "xattr.h" -+#include "acl.h" -+ -+/* -+ * Called when an inode is released. Note that this is different -+ * from ext3cow_file_open: open gets called at every open, but release -+ * gets called only when /all/ the files are closed. -+ */ -+static int ext3cow_release_file (struct inode * inode, struct file * filp) -+{ -+ /* if we are the last writer on the inode, drop the block reservation */ -+ if ((filp->f_mode & FMODE_WRITE) && -+ (atomic_read(&inode->i_writecount) == 1)) -+ { -+ mutex_lock(&EXT3COW_I(inode)->truncate_mutex); -+ ext3cow_discard_reservation(inode); -+ mutex_unlock(&EXT3COW_I(inode)->truncate_mutex); -+ } -+ if (is_dx(inode) && filp->private_data) -+ ext3cow_htree_free_dir_info(filp->private_data); -+ -+ return 0; -+} -+ -+static ssize_t -+ext3cow_file_write(struct kiocb *iocb, const struct iovec *iov, -+ unsigned long nr_segs, loff_t pos) -+{ -+ struct file *file = iocb->ki_filp; -+ struct inode *inode = file->f_path.dentry->d_inode; -+ struct inode *dir = file->f_path.dentry->d_parent->d_inode; -+ ssize_t ret = 0; -+ int err = 0; -+ -+ /* This is the place where we create a new version on write -znjp */ -+ if(EXT3COW_S_EPOCHNUMBER(inode->i_sb) > EXT3COW_I_EPOCHNUMBER(inode)){ -+ err = ext3cow_dup_inode(dir, inode); -+ if(err) -+ return err; -+ } -+ -+ ret = generic_file_aio_write(iocb, iov, nr_segs, pos); -+ -+ /* -+ * Skip flushing if there was an error, or if nothing was written. -+ */ -+ if (ret <= 0) -+ return ret; -+ -+ /* -+ * If the inode is IS_SYNC, or is O_SYNC and we are doing data -+ * journalling then we need to make sure that we force the transaction -+ * to disk to keep all metadata uptodate synchronously. -+ */ -+ if (file->f_flags & O_SYNC) { -+ /* -+ * If we are non-data-journaled, then the dirty data has -+ * already been flushed to backing store by generic_osync_inode, -+ * and the inode has been flushed too if there have been any -+ * modifications other than mere timestamp updates. -+ * -+ * Open question --- do we care about flushing timestamps too -+ * if the inode is IS_SYNC? -+ */ -+ if (!ext3cow_should_journal_data(inode)) -+ return ret; -+ -+ goto force_commit; -+ } -+ -+ /* -+ * So we know that there has been no forced data flush. If the inode -+ * is marked IS_SYNC, we need to force one ourselves. -+ */ -+ if (!IS_SYNC(inode)) -+ return ret; -+ -+ /* -+ * Open question #2 --- should we force data to disk here too? If we -+ * don't, the only impact is that data=writeback filesystems won't -+ * flush data to disk automatically on IS_SYNC, only metadata (but -+ * historically, that is what ext2 has done.) -+ */ -+ -+force_commit: -+ err = ext3cow_force_commit(inode->i_sb); -+ if (err) -+ return err; -+ return ret; -+} -+ -+const struct file_operations ext3cow_file_operations = { -+ .llseek = generic_file_llseek, -+ .read = do_sync_read, -+ .write = do_sync_write, -+ .aio_read = generic_file_aio_read, -+ .aio_write = ext3cow_file_write, -+ .ioctl = ext3cow_ioctl, -+#ifdef CONFIG_COMPAT -+ .compat_ioctl = ext3cow_compat_ioctl, -+#endif -+ .mmap = generic_file_mmap, -+ .open = generic_file_open, -+ .release = ext3cow_release_file, -+ .fsync = ext3cow_sync_file, -+ .sendfile = generic_file_sendfile, -+ .splice_read = generic_file_splice_read, -+ .splice_write = generic_file_splice_write, -+}; -+ -+struct inode_operations ext3cow_file_inode_operations = { -+ .truncate = ext3cow_truncate, -+ .setattr = ext3cow_setattr, -+#ifdef CONFIG_EXT3COW_FS_XATTR -+ .setxattr = generic_setxattr, -+ .getxattr = generic_getxattr, -+ .listxattr = ext3cow_listxattr, -+ .removexattr = generic_removexattr, -+#endif -+ .permission = ext3cow_permission, -+}; -+ -diff -Naur linux-2.6.21.7/fs/ext3cow/fsync.c linux-2.6.21.7_ext3cowPatched/fs/ext3cow/fsync.c ---- linux-2.6.21.7/fs/ext3cow/fsync.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.21.7_ext3cowPatched/fs/ext3cow/fsync.c 2007-10-23 17:47:18.000000000 +0200 -@@ -0,0 +1,88 @@ -+/* -+ * linux/fs/ext3cow/fsync.c -+ * -+ * Copyright (C) 1993 Stephen Tweedie (sct@redhat.com) -+ * from -+ * Copyright (C) 1992 Remy Card (card@masi.ibp.fr) -+ * Laboratoire MASI - Institut Blaise Pascal -+ * Universite Pierre et Marie Curie (Paris VI) -+ * from -+ * linux/fs/minix/truncate.c Copyright (C) 1991, 1992 Linus Torvalds -+ * -+ * ext3cowfs fsync primitive -+ * -+ * Big-endian to little-endian byte-swapping/bitmaps by -+ * David S. Miller (davem@caip.rutgers.edu), 1995 -+ * -+ * Removed unnecessary code duplication for little endian machines -+ * and excessive __inline__s. -+ * Andi Kleen, 1997 -+ * -+ * Major simplications and cleanup - we only need to do the metadata, because -+ * we can depend on generic_block_fdatasync() to sync the data blocks. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+/* -+ * akpm: A new design for ext3cow_sync_file(). -+ * -+ * This is only called from sys_fsync(), sys_fdatasync() and sys_msync(). -+ * There cannot be a transaction open by this task. -+ * Another task could have dirtied this inode. Its data can be in any -+ * state in the journalling system. -+ * -+ * What we do is just kick off a commit and wait on it. This will snapshot the -+ * inode to disk. -+ */ -+ -+int ext3cow_sync_file(struct file * file, struct dentry *dentry, int datasync) -+{ -+ struct inode *inode = dentry->d_inode; -+ int ret = 0; -+ -+ J_ASSERT(ext3cow_journal_current_handle() == 0); -+ -+ /* -+ * data=writeback: -+ * The caller's filemap_fdatawrite()/wait will sync the data. -+ * sync_inode() will sync the metadata -+ * -+ * data=ordered: -+ * The caller's filemap_fdatawrite() will write the data and -+ * sync_inode() will write the inode if it is dirty. Then the caller's -+ * filemap_fdatawait() will wait on the pages. -+ * -+ * data=journal: -+ * filemap_fdatawrite won't do anything (the buffers are clean). -+ * ext3cow_force_commit will write the file data into the journal and -+ * will wait on that. -+ * filemap_fdatawait() will encounter a ton of newly-dirtied pages -+ * (they were dirtied by commit). But that's OK - the blocks are -+ * safe in-journal, which is all fsync() needs to ensure. -+ */ -+ if (ext3cow_should_journal_data(inode)) { -+ ret = ext3cow_force_commit(inode->i_sb); -+ goto out; -+ } -+ -+ /* -+ * The VFS has written the file data. If the inode is unaltered -+ * then we need not start a commit. -+ */ -+ if (inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC)) { -+ struct writeback_control wbc = { -+ .sync_mode = WB_SYNC_ALL, -+ .nr_to_write = 0, /* sys_fsync did this */ -+ }; -+ ret = sync_inode(inode, &wbc); -+ } -+out: -+ return ret; -+} -diff -Naur linux-2.6.21.7/fs/ext3cow/hash.c linux-2.6.21.7_ext3cowPatched/fs/ext3cow/hash.c ---- linux-2.6.21.7/fs/ext3cow/hash.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.21.7_ext3cowPatched/fs/ext3cow/hash.c 2007-10-23 17:47:18.000000000 +0200 -@@ -0,0 +1,152 @@ -+/* -+ * linux/fs/ext3cow/hash.c -+ * -+ * Copyright (C) 2002 by Theodore Ts'o -+ * -+ * This file is released under the GPL v2. -+ * -+ * This file may be redistributed under the terms of the GNU Public -+ * License. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+ -+#define DELTA 0x9E3779B9 -+ -+static void TEA_transform(__u32 buf[4], __u32 const in[]) -+{ -+ __u32 sum = 0; -+ __u32 b0 = buf[0], b1 = buf[1]; -+ __u32 a = in[0], b = in[1], c = in[2], d = in[3]; -+ int n = 16; -+ -+ do { -+ sum += DELTA; -+ b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); -+ b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); -+ } while(--n); -+ -+ buf[0] += b0; -+ buf[1] += b1; -+} -+ -+ -+/* The old legacy hash */ -+static __u32 dx_hack_hash (const char *name, int len) -+{ -+ __u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9; -+ while (len--) { -+ __u32 hash = hash1 + (hash0 ^ (*name++ * 7152373)); -+ -+ if (hash & 0x80000000) hash -= 0x7fffffff; -+ hash1 = hash0; -+ hash0 = hash; -+ } -+ return (hash0 << 1); -+} -+ -+static void str2hashbuf(const char *msg, int len, __u32 *buf, int num) -+{ -+ __u32 pad, val; -+ int i; -+ -+ pad = (__u32)len | ((__u32)len << 8); -+ pad |= pad << 16; -+ -+ val = pad; -+ if (len > num*4) -+ len = num * 4; -+ for (i=0; i < len; i++) { -+ if ((i % 4) == 0) -+ val = pad; -+ val = msg[i] + (val << 8); -+ if ((i % 4) == 3) { -+ *buf++ = val; -+ val = pad; -+ num--; -+ } -+ } -+ if (--num >= 0) -+ *buf++ = val; -+ while (--num >= 0) -+ *buf++ = pad; -+} -+ -+/* -+ * Returns the hash of a filename. If len is 0 and name is NULL, then -+ * this function can be used to test whether or not a hash version is -+ * supported. -+ * -+ * The seed is an 4 longword (32 bits) "secret" which can be used to -+ * uniquify a hash. If the seed is all zero's, then some default seed -+ * may be used. -+ * -+ * A particular hash version specifies whether or not the seed is -+ * represented, and whether or not the returned hash is 32 bits or 64 -+ * bits. 32 bit hashes will return 0 for the minor hash. -+ */ -+int ext3cowfs_dirhash(const char *name, int len, struct dx_hash_info *hinfo) -+{ -+ __u32 hash; -+ __u32 minor_hash = 0; -+ const char *p; -+ int i; -+ __u32 in[8], buf[4]; -+ -+ /* Initialize the default seed for the hash checksum functions */ -+ buf[0] = 0x67452301; -+ buf[1] = 0xefcdab89; -+ buf[2] = 0x98badcfe; -+ buf[3] = 0x10325476; -+ -+ /* Check to see if the seed is all zero's */ -+ if (hinfo->seed) { -+ for (i=0; i < 4; i++) { -+ if (hinfo->seed[i]) -+ break; -+ } -+ if (i < 4) -+ memcpy(buf, hinfo->seed, sizeof(buf)); -+ } -+ -+ switch (hinfo->hash_version) { -+ case DX_HASH_LEGACY: -+ hash = dx_hack_hash(name, len); -+ break; -+ case DX_HASH_HALF_MD4: -+ p = name; -+ while (len > 0) { -+ str2hashbuf(p, len, in, 8); -+ half_md4_transform(buf, in); -+ len -= 32; -+ p += 32; -+ } -+ minor_hash = buf[2]; -+ hash = buf[1]; -+ break; -+ case DX_HASH_TEA: -+ p = name; -+ while (len > 0) { -+ str2hashbuf(p, len, in, 4); -+ TEA_transform(buf, in); -+ len -= 16; -+ p += 16; -+ } -+ hash = buf[0]; -+ minor_hash = buf[1]; -+ break; -+ default: -+ hinfo->hash = 0; -+ return -1; -+ } -+ hash = hash & ~1; -+ if (hash == (EXT3COW_HTREE_EOF << 1)) -+ hash = (EXT3COW_HTREE_EOF-1) << 1; -+ hinfo->hash = hash; -+ hinfo->minor_hash = minor_hash; -+ return 0; -+} -diff -Naur linux-2.6.21.7/fs/ext3cow/ialloc.c linux-2.6.21.7_ext3cowPatched/fs/ext3cow/ialloc.c ---- linux-2.6.21.7/fs/ext3cow/ialloc.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.21.7_ext3cowPatched/fs/ext3cow/ialloc.c 2007-10-23 17:47:18.000000000 +0200 -@@ -0,0 +1,764 @@ -+/* -+ * linux/fs/ext3cow/ialloc.c -+ * -+ * Copyright (C) 1992, 1993, 1994, 1995 -+ * Remy Card (card@masi.ibp.fr) -+ * Laboratoire MASI - Institut Blaise Pascal -+ * Universite Pierre et Marie Curie (Paris VI) -+ * -+ * BSD ufs-inspired inode and directory allocation by -+ * Stephen Tweedie (sct@redhat.com), 1993 -+ * Big-endian to little-endian byte-swapping/bitmaps by -+ * David S. Miller (davem@caip.rutgers.edu), 1995 -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+ -+#include "xattr.h" -+#include "acl.h" -+ -+/* -+ * ialloc.c contains the inodes allocation and deallocation routines -+ */ -+ -+/* -+ * The free inodes are managed by bitmaps. A file system contains several -+ * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap -+ * block for inodes, N blocks for the inode table and data blocks. -+ * -+ * The file system contains group descriptors which are located after the -+ * super block. Each descriptor contains the number of the bitmap block and -+ * the free blocks count in the block. -+ */ -+ -+ -+/* -+ * Read the inode allocation bitmap for a given block_group, reading -+ * into the specified slot in the superblock's bitmap cache. -+ * -+ * Return buffer_head of bitmap on success or NULL. -+ */ -+static struct buffer_head * -+read_inode_bitmap(struct super_block * sb, unsigned long block_group) -+{ -+ struct ext3cow_group_desc *desc; -+ struct buffer_head *bh = NULL; -+ -+ desc = ext3cow_get_group_desc(sb, block_group, NULL); -+ if (!desc) -+ goto error_out; -+ -+ bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap)); -+ if (!bh) -+ ext3cow_error(sb, "read_inode_bitmap", -+ "Cannot read inode bitmap - " -+ "block_group = %lu, inode_bitmap = %u", -+ block_group, le32_to_cpu(desc->bg_inode_bitmap)); -+error_out: -+ return bh; -+} -+ -+/* -+ * NOTE! When we get the inode, we're the only people -+ * that have access to it, and as such there are no -+ * race conditions we have to worry about. The inode -+ * is not on the hash-lists, and it cannot be reached -+ * through the filesystem because the directory entry -+ * has been deleted earlier. -+ * -+ * HOWEVER: we must make sure that we get no aliases, -+ * which means that we have to call "clear_inode()" -+ * _before_ we mark the inode not in use in the inode -+ * bitmaps. Otherwise a newly created file might use -+ * the same inode number (not actually the same pointer -+ * though), and then we'd have two inodes sharing the -+ * same inode number and space on the harddisk. -+ */ -+void ext3cow_free_inode (handle_t *handle, struct inode * inode) -+{ -+ struct super_block * sb = inode->i_sb; -+ int is_directory; -+ unsigned long ino; -+ struct buffer_head *bitmap_bh = NULL; -+ struct buffer_head *bh2; -+ unsigned long block_group; -+ unsigned long bit; -+ struct ext3cow_group_desc * gdp; -+ struct ext3cow_super_block * es; -+ struct ext3cow_sb_info *sbi; -+ int fatal = 0, err; -+ -+ if (atomic_read(&inode->i_count) > 1) { -+ printk ("ext3cow_free_inode: inode has count=%d\n", -+ atomic_read(&inode->i_count)); -+ return; -+ } -+ if (inode->i_nlink) { -+ printk ("ext3cow_free_inode: inode has nlink=%d\n", -+ inode->i_nlink); -+ return; -+ } -+ if (!sb) { -+ printk("ext3cow_free_inode: inode on nonexistent device\n"); -+ return; -+ } -+ sbi = EXT3COW_SB(sb); -+ -+ ino = inode->i_ino; -+ ext3cow_debug ("freeing inode %lu\n", ino); -+ -+ /* -+ * Note: we must free any quota before locking the superblock, -+ * as writing the quota to disk may need the lock as well. -+ */ -+ DQUOT_INIT(inode); -+ ext3cow_xattr_delete_inode(handle, inode); -+ DQUOT_FREE_INODE(inode); -+ DQUOT_DROP(inode); -+ -+ is_directory = S_ISDIR(inode->i_mode); -+ -+ /* Do this BEFORE marking the inode not in use or returning an error */ -+ clear_inode (inode); -+ -+ es = EXT3COW_SB(sb)->s_es; -+ if (ino < EXT3COW_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { -+ ext3cow_error (sb, "ext3cow_free_inode", -+ "reserved or nonexistent inode %lu", ino); -+ goto error_return; -+ } -+ block_group = (ino - 1) / EXT3COW_INODES_PER_GROUP(sb); -+ bit = (ino - 1) % EXT3COW_INODES_PER_GROUP(sb); -+ bitmap_bh = read_inode_bitmap(sb, block_group); -+ if (!bitmap_bh) -+ goto error_return; -+ -+ BUFFER_TRACE(bitmap_bh, "get_write_access"); -+ fatal = ext3cow_journal_get_write_access(handle, bitmap_bh); -+ if (fatal) -+ goto error_return; -+ -+ /* Ok, now we can actually update the inode bitmaps.. */ -+ if (!ext3cow_clear_bit_atomic(sb_bgl_lock(sbi, block_group), -+ bit, bitmap_bh->b_data)) -+ ext3cow_error (sb, "ext3cow_free_inode", -+ "bit already cleared for inode %lu", ino); -+ else { -+ gdp = ext3cow_get_group_desc (sb, block_group, &bh2); -+ -+ BUFFER_TRACE(bh2, "get_write_access"); -+ fatal = ext3cow_journal_get_write_access(handle, bh2); -+ if (fatal) goto error_return; -+ -+ if (gdp) { -+ spin_lock(sb_bgl_lock(sbi, block_group)); -+ gdp->bg_free_inodes_count = cpu_to_le16( -+ le16_to_cpu(gdp->bg_free_inodes_count) + 1); -+ if (is_directory) -+ gdp->bg_used_dirs_count = cpu_to_le16( -+ le16_to_cpu(gdp->bg_used_dirs_count) - 1); -+ spin_unlock(sb_bgl_lock(sbi, block_group)); -+ percpu_counter_inc(&sbi->s_freeinodes_counter); -+ if (is_directory) -+ percpu_counter_dec(&sbi->s_dirs_counter); -+ -+ } -+ BUFFER_TRACE(bh2, "call ext3cow_journal_dirty_metadata"); -+ err = ext3cow_journal_dirty_metadata(handle, bh2); -+ if (!fatal) fatal = err; -+ } -+ BUFFER_TRACE(bitmap_bh, "call ext3cow_journal_dirty_metadata"); -+ err = ext3cow_journal_dirty_metadata(handle, bitmap_bh); -+ if (!fatal) -+ fatal = err; -+ sb->s_dirt = 1; -+error_return: -+ brelse(bitmap_bh); -+ ext3cow_std_error(sb, fatal); -+} -+ -+/* -+ * There are two policies for allocating an inode. If the new inode is -+ * a directory, then a forward search is made for a block group with both -+ * free space and a low directory-to-inode ratio; if that fails, then of -+ * the groups with above-average free space, that group with the fewest -+ * directories already is chosen. -+ * -+ * For other inodes, search forward from the parent directory\'s block -+ * group to find a free inode. -+ */ -+static int find_group_dir(struct super_block *sb, struct inode *parent) -+{ -+ int ngroups = EXT3COW_SB(sb)->s_groups_count; -+ unsigned int freei, avefreei; -+ struct ext3cow_group_desc *desc, *best_desc = NULL; -+ struct buffer_head *bh; -+ int group, best_group = -1; -+ -+ freei = percpu_counter_read_positive(&EXT3COW_SB(sb)->s_freeinodes_counter); -+ avefreei = freei / ngroups; -+ -+ for (group = 0; group < ngroups; group++) { -+ desc = ext3cow_get_group_desc (sb, group, &bh); -+ if (!desc || !desc->bg_free_inodes_count) -+ continue; -+ if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei) -+ continue; -+ if (!best_desc || -+ (le16_to_cpu(desc->bg_free_blocks_count) > -+ le16_to_cpu(best_desc->bg_free_blocks_count))) { -+ best_group = group; -+ best_desc = desc; -+ } -+ } -+ return best_group; -+} -+ -+/* -+ * Orlov's allocator for directories. -+ * -+ * We always try to spread first-level directories. -+ * -+ * If there are blockgroups with both free inodes and free blocks counts -+ * not worse than average we return one with smallest directory count. -+ * Otherwise we simply return a random group. -+ * -+ * For the rest rules look so: -+ * -+ * It's OK to put directory into a group unless -+ * it has too many directories already (max_dirs) or -+ * it has too few free inodes left (min_inodes) or -+ * it has too few free blocks left (min_blocks) or -+ * it's already running too large debt (max_debt). -+ * Parent's group is prefered, if it doesn't satisfy these -+ * conditions we search cyclically through the rest. If none -+ * of the groups look good we just look for a group with more -+ * free inodes than average (starting at parent's group). -+ * -+ * Debt is incremented each time we allocate a directory and decremented -+ * when we allocate an inode, within 0--255. -+ */ -+ -+#define INODE_COST 64 -+#define BLOCK_COST 256 -+ -+static int find_group_orlov(struct super_block *sb, struct inode *parent) -+{ -+ int parent_group = EXT3COW_I(parent)->i_block_group; -+ struct ext3cow_sb_info *sbi = EXT3COW_SB(sb); -+ struct ext3cow_super_block *es = sbi->s_es; -+ int ngroups = sbi->s_groups_count; -+ int inodes_per_group = EXT3COW_INODES_PER_GROUP(sb); -+ unsigned int freei, avefreei; -+ ext3cow_fsblk_t freeb, avefreeb; -+ ext3cow_fsblk_t blocks_per_dir; -+ unsigned int ndirs; -+ int max_debt, max_dirs, min_inodes; -+ ext3cow_grpblk_t min_blocks; -+ int group = -1, i; -+ struct ext3cow_group_desc *desc; -+ struct buffer_head *bh; -+ -+ freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter); -+ avefreei = freei / ngroups; -+ freeb = percpu_counter_read_positive(&sbi->s_freeblocks_counter); -+ avefreeb = freeb / ngroups; -+ ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter); -+ -+ if ((parent == sb->s_root->d_inode) || -+ (EXT3COW_I(parent)->i_flags & EXT3COW_TOPDIR_FL)) { -+ int best_ndir = inodes_per_group; -+ int best_group = -1; -+ -+ get_random_bytes(&group, sizeof(group)); -+ parent_group = (unsigned)group % ngroups; -+ for (i = 0; i < ngroups; i++) { -+ group = (parent_group + i) % ngroups; -+ desc = ext3cow_get_group_desc (sb, group, &bh); -+ if (!desc || !desc->bg_free_inodes_count) -+ continue; -+ if (le16_to_cpu(desc->bg_used_dirs_count) >= best_ndir) -+ continue; -+ if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei) -+ continue; -+ if (le16_to_cpu(desc->bg_free_blocks_count) < avefreeb) -+ continue; -+ best_group = group; -+ best_ndir = le16_to_cpu(desc->bg_used_dirs_count); -+ } -+ if (best_group >= 0) -+ return best_group; -+ goto fallback; -+ } -+ -+ blocks_per_dir = (le32_to_cpu(es->s_blocks_count) - freeb) / ndirs; -+ -+ max_dirs = ndirs / ngroups + inodes_per_group / 16; -+ min_inodes = avefreei - inodes_per_group / 4; -+ min_blocks = avefreeb - EXT3COW_BLOCKS_PER_GROUP(sb) / 4; -+ -+ max_debt = EXT3COW_BLOCKS_PER_GROUP(sb) / max(blocks_per_dir, (ext3cow_fsblk_t)BLOCK_COST); -+ if (max_debt * INODE_COST > inodes_per_group) -+ max_debt = inodes_per_group / INODE_COST; -+ if (max_debt > 255) -+ max_debt = 255; -+ if (max_debt == 0) -+ max_debt = 1; -+ -+ for (i = 0; i < ngroups; i++) { -+ group = (parent_group + i) % ngroups; -+ desc = ext3cow_get_group_desc (sb, group, &bh); -+ if (!desc || !desc->bg_free_inodes_count) -+ continue; -+ if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs) -+ continue; -+ if (le16_to_cpu(desc->bg_free_inodes_count) < min_inodes) -+ continue; -+ if (le16_to_cpu(desc->bg_free_blocks_count) < min_blocks) -+ continue; -+ return group; -+ } -+ -+fallback: -+ for (i = 0; i < ngroups; i++) { -+ group = (parent_group + i) % ngroups; -+ desc = ext3cow_get_group_desc (sb, group, &bh); -+ if (!desc || !desc->bg_free_inodes_count) -+ continue; -+ if (le16_to_cpu(desc->bg_free_inodes_count) >= avefreei) -+ return group; -+ } -+ -+ if (avefreei) { -+ /* -+ * The free-inodes counter is approximate, and for really small -+ * filesystems the above test can fail to find any blockgroups -+ */ -+ avefreei = 0; -+ goto fallback; -+ } -+ -+ return -1; -+} -+ -+static int find_group_other(struct super_block *sb, struct inode *parent) -+{ -+ int parent_group = EXT3COW_I(parent)->i_block_group; -+ int ngroups = EXT3COW_SB(sb)->s_groups_count; -+ struct ext3cow_group_desc *desc; -+ struct buffer_head *bh; -+ int group, i; -+ -+ /* -+ * Try to place the inode in its parent directory -+ */ -+ group = parent_group; -+ desc = ext3cow_get_group_desc (sb, group, &bh); -+ if (desc && le16_to_cpu(desc->bg_free_inodes_count) && -+ le16_to_cpu(desc->bg_free_blocks_count)) -+ return group; -+ -+ /* -+ * We're going to place this inode in a different blockgroup from its -+ * parent. We want to cause files in a common directory to all land in -+ * the same blockgroup. But we want files which are in a different -+ * directory which shares a blockgroup with our parent to land in a -+ * different blockgroup. -+ * -+ * So add our directory's i_ino into the starting point for the hash. -+ */ -+ group = (group + parent->i_ino) % ngroups; -+ -+ /* -+ * Use a quadratic hash to find a group with a free inode and some free -+ * blocks. -+ */ -+ for (i = 1; i < ngroups; i <<= 1) { -+ group += i; -+ if (group >= ngroups) -+ group -= ngroups; -+ desc = ext3cow_get_group_desc (sb, group, &bh); -+ if (desc && le16_to_cpu(desc->bg_free_inodes_count) && -+ le16_to_cpu(desc->bg_free_blocks_count)) -+ return group; -+ } -+ -+ /* -+ * That failed: try linear search for a free inode, even if that group -+ * has no free blocks. -+ */ -+ group = parent_group; -+ for (i = 0; i < ngroups; i++) { -+ if (++group >= ngroups) -+ group = 0; -+ desc = ext3cow_get_group_desc (sb, group, &bh); -+ if (desc && le16_to_cpu(desc->bg_free_inodes_count)) -+ return group; -+ } -+ -+ return -1; -+} -+ -+/* -+ * There are two policies for allocating an inode. If the new inode is -+ * a directory, then a forward search is made for a block group with both -+ * free space and a low directory-to-inode ratio; if that fails, then of -+ * the groups with above-average free space, that group with the fewest -+ * directories already is chosen. -+ * -+ * For other inodes, search forward from the parent directory's block -+ * group to find a free inode. -+ */ -+struct inode *ext3cow_new_inode(handle_t *handle, struct inode * dir, int mode) -+{ -+ struct super_block *sb; -+ struct buffer_head *bitmap_bh = NULL; -+ struct buffer_head *bh2; -+ int group; -+ unsigned long ino = 0; -+ struct inode * inode; -+ struct ext3cow_group_desc * gdp = NULL; -+ struct ext3cow_super_block * es; -+ struct ext3cow_inode_info *ei; -+ struct ext3cow_sb_info *sbi; -+ int err = 0; -+ struct inode *ret; -+ int i; -+ -+ /* Cannot create files in a deleted directory */ -+ if (!dir || !dir->i_nlink) -+ return ERR_PTR(-EPERM); -+ -+ sb = dir->i_sb; -+ inode = new_inode(sb); -+ if (!inode) -+ return ERR_PTR(-ENOMEM); -+ ei = EXT3COW_I(inode); -+ -+ sbi = EXT3COW_SB(sb); -+ es = sbi->s_es; -+ if (S_ISDIR(mode)) { -+ if (test_opt (sb, OLDALLOC)) -+ group = find_group_dir(sb, dir); -+ else -+ group = find_group_orlov(sb, dir); -+ } else -+ group = find_group_other(sb, dir); -+ -+ err = -ENOSPC; -+ if (group == -1) -+ goto out; -+ -+ for (i = 0; i < sbi->s_groups_count; i++) { -+ err = -EIO; -+ -+ gdp = ext3cow_get_group_desc(sb, group, &bh2); -+ if (!gdp) -+ goto fail; -+ -+ brelse(bitmap_bh); -+ bitmap_bh = read_inode_bitmap(sb, group); -+ if (!bitmap_bh) -+ goto fail; -+ -+ ino = 0; -+ -+repeat_in_this_group: -+ ino = ext3cow_find_next_zero_bit((unsigned long *) -+ bitmap_bh->b_data, EXT3COW_INODES_PER_GROUP(sb), ino); -+ if (ino < EXT3COW_INODES_PER_GROUP(sb)) { -+ -+ BUFFER_TRACE(bitmap_bh, "get_write_access"); -+ err = ext3cow_journal_get_write_access(handle, bitmap_bh); -+ if (err) -+ goto fail; -+ -+ if (!ext3cow_set_bit_atomic(sb_bgl_lock(sbi, group), -+ ino, bitmap_bh->b_data)) { -+ /* we won it */ -+ BUFFER_TRACE(bitmap_bh, -+ "call ext3cow_journal_dirty_metadata"); -+ err = ext3cow_journal_dirty_metadata(handle, -+ bitmap_bh); -+ if (err) -+ goto fail; -+ goto got; -+ } -+ /* we lost it */ -+ journal_release_buffer(handle, bitmap_bh); -+ -+ if (++ino < EXT3COW_INODES_PER_GROUP(sb)) -+ goto repeat_in_this_group; -+ } -+ -+ /* -+ * This case is possible in concurrent environment. It is very -+ * rare. We cannot repeat the find_group_xxx() call because -+ * that will simply return the same blockgroup, because the -+ * group descriptor metadata has not yet been updated. -+ * So we just go onto the next blockgroup. -+ */ -+ if (++group == sbi->s_groups_count) -+ group = 0; -+ } -+ err = -ENOSPC; -+ goto out; -+ -+got: -+ ino += group * EXT3COW_INODES_PER_GROUP(sb) + 1; -+ if (ino < EXT3COW_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { -+ ext3cow_error (sb, "ext3cow_new_inode", -+ "reserved inode or inode > inodes count - " -+ "block_group = %d, inode=%lu", group, ino); -+ err = -EIO; -+ goto fail; -+ } -+ -+ BUFFER_TRACE(bh2, "get_write_access"); -+ err = ext3cow_journal_get_write_access(handle, bh2); -+ if (err) goto fail; -+ spin_lock(sb_bgl_lock(sbi, group)); -+ gdp->bg_free_inodes_count = -+ cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1); -+ if (S_ISDIR(mode)) { -+ gdp->bg_used_dirs_count = -+ cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1); -+ } -+ spin_unlock(sb_bgl_lock(sbi, group)); -+ BUFFER_TRACE(bh2, "call ext3cow_journal_dirty_metadata"); -+ err = ext3cow_journal_dirty_metadata(handle, bh2); -+ if (err) goto fail; -+ -+ percpu_counter_dec(&sbi->s_freeinodes_counter); -+ if (S_ISDIR(mode)) -+ percpu_counter_inc(&sbi->s_dirs_counter); -+ sb->s_dirt = 1; -+ -+ inode->i_uid = current->fsuid; -+ if (test_opt (sb, GRPID)) -+ inode->i_gid = dir->i_gid; -+ else if (dir->i_mode & S_ISGID) { -+ inode->i_gid = dir->i_gid; -+ if (S_ISDIR(mode)) -+ mode |= S_ISGID; -+ } else -+ inode->i_gid = current->fsgid; -+ inode->i_mode = mode; -+ -+ inode->i_ino = ino; -+ /* This is the optimal IO size (for stat), not the fs block size */ -+ inode->i_blocks = 0; -+ inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; -+ -+ /* For versioning -znjp */ -+ ei->i_cow_bitmap = 0x0000; -+ ei->i_epoch_number = EXT3COW_S_EPOCHNUMBER(dir->i_sb); -+ ei->i_next_inode = 0; -+ -+ memset(ei->i_data, 0, sizeof(ei->i_data)); -+ ei->i_dir_start_lookup = 0; -+ ei->i_disksize = 0; -+ -+ ei->i_flags = EXT3COW_I(dir)->i_flags & ~EXT3COW_INDEX_FL; -+ if (S_ISLNK(mode)) -+ ei->i_flags &= ~(EXT3COW_IMMUTABLE_FL|EXT3COW_APPEND_FL); -+ /* dirsync only applies to directories */ -+ if (!S_ISDIR(mode)) -+ ei->i_flags &= ~EXT3COW_DIRSYNC_FL; -+#ifdef EXT3COW_FRAGMENTS -+ /* Taken out for versioning -znjp */ -+ //ei->i_faddr = 0; -+ //ei->i_frag_no = 0; -+ //ei->i_frag_size = 0; -+#endif -+ ei->i_file_acl = 0; -+ ei->i_dir_acl = 0; -+ ei->i_dtime = 0; -+ ei->i_block_alloc_info = NULL; -+ ei->i_block_group = group; -+ -+ ext3cow_set_inode_flags(inode); -+ if (IS_DIRSYNC(inode)) -+ handle->h_sync = 1; -+ insert_inode_hash(inode); -+ spin_lock(&sbi->s_next_gen_lock); -+ inode->i_generation = sbi->s_next_generation++; -+ spin_unlock(&sbi->s_next_gen_lock); -+ -+ ei->i_state = EXT3COW_STATE_NEW; -+ ei->i_extra_isize = -+ (EXT3COW_INODE_SIZE(inode->i_sb) > EXT3COW_GOOD_OLD_INODE_SIZE) ? -+ sizeof(struct ext3cow_inode) - EXT3COW_GOOD_OLD_INODE_SIZE : 0; -+ -+ ret = inode; -+ if(DQUOT_ALLOC_INODE(inode)) { -+ err = -EDQUOT; -+ goto fail_drop; -+ } -+ -+ err = ext3cow_init_acl(handle, inode, dir); -+ if (err) -+ goto fail_free_drop; -+ -+ err = ext3cow_init_security(handle,inode, dir); -+ if (err) -+ goto fail_free_drop; -+ -+ err = ext3cow_mark_inode_dirty(handle, inode); -+ if (err) { -+ ext3cow_std_error(sb, err); -+ goto fail_free_drop; -+ } -+ -+ ext3cow_debug("allocating inode %lu\n", inode->i_ino); -+ goto really_out; -+fail: -+ ext3cow_std_error(sb, err); -+out: -+ iput(inode); -+ ret = ERR_PTR(err); -+really_out: -+ brelse(bitmap_bh); -+ return ret; -+ -+fail_free_drop: -+ DQUOT_FREE_INODE(inode); -+ -+fail_drop: -+ DQUOT_DROP(inode); -+ inode->i_flags |= S_NOQUOTA; -+ inode->i_nlink = 0; -+ iput(inode); -+ brelse(bitmap_bh); -+ return ERR_PTR(err); -+} -+ -+/* Verify that we are loading a valid orphan from disk */ -+struct inode *ext3cow_orphan_get(struct super_block *sb, unsigned long ino) -+{ -+ unsigned long max_ino = le32_to_cpu(EXT3COW_SB(sb)->s_es->s_inodes_count); -+ unsigned long block_group; -+ int bit; -+ struct buffer_head *bitmap_bh = NULL; -+ struct inode *inode = NULL; -+ -+ /* Error cases - e2fsck has already cleaned up for us */ -+ if (ino > max_ino) { -+ ext3cow_warning(sb, __FUNCTION__, -+ "bad orphan ino %lu! e2fsck was run?", ino); -+ goto out; -+ } -+ -+ block_group = (ino - 1) / EXT3COW_INODES_PER_GROUP(sb); -+ bit = (ino - 1) % EXT3COW_INODES_PER_GROUP(sb); -+ bitmap_bh = read_inode_bitmap(sb, block_group); -+ if (!bitmap_bh) { -+ ext3cow_warning(sb, __FUNCTION__, -+ "inode bitmap error for orphan %lu", ino); -+ goto out; -+ } -+ -+ /* Having the inode bit set should be a 100% indicator that this -+ * is a valid orphan (no e2fsck run on fs). Orphans also include -+ * inodes that were being truncated, so we can't check i_nlink==0. -+ */ -+ if (!ext3cow_test_bit(bit, bitmap_bh->b_data) || -+ !(inode = iget(sb, ino)) || is_bad_inode(inode) || -+ NEXT_ORPHAN(inode) > max_ino) { -+ ext3cow_warning(sb, __FUNCTION__, -+ "bad orphan inode %lu! e2fsck was run?", ino); -+ printk(KERN_NOTICE "ext3cow_test_bit(bit=%d, block=%llu) = %d\n", -+ bit, (unsigned long long)bitmap_bh->b_blocknr, -+ ext3cow_test_bit(bit, bitmap_bh->b_data)); -+ printk(KERN_NOTICE "inode=%p\n", inode); -+ if (inode) { -+ printk(KERN_NOTICE "is_bad_inode(inode)=%d\n", -+ is_bad_inode(inode)); -+ printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n", -+ NEXT_ORPHAN(inode)); -+ printk(KERN_NOTICE "max_ino=%lu\n", max_ino); -+ } -+ /* Avoid freeing blocks if we got a bad deleted inode */ -+ if (inode && inode->i_nlink == 0) -+ inode->i_blocks = 0; -+ iput(inode); -+ inode = NULL; -+ } -+out: -+ brelse(bitmap_bh); -+ return inode; -+} -+ -+unsigned long ext3cow_count_free_inodes (struct super_block * sb) -+{ -+ unsigned long desc_count; -+ struct ext3cow_group_desc *gdp; -+ int i; -+#ifdef EXT3COWFS_DEBUG -+ struct ext3cow_super_block *es; -+ unsigned long bitmap_count, x; -+ struct buffer_head *bitmap_bh = NULL; -+ -+ es = EXT3COW_SB(sb)->s_es; -+ desc_count = 0; -+ bitmap_count = 0; -+ gdp = NULL; -+ for (i = 0; i < EXT3COW_SB(sb)->s_groups_count; i++) { -+ gdp = ext3cow_get_group_desc (sb, i, NULL); -+ if (!gdp) -+ continue; -+ desc_count += le16_to_cpu(gdp->bg_free_inodes_count); -+ brelse(bitmap_bh); -+ bitmap_bh = read_inode_bitmap(sb, i); -+ if (!bitmap_bh) -+ continue; -+ -+ x = ext3cow_count_free(bitmap_bh, EXT3COW_INODES_PER_GROUP(sb) / 8); -+ printk("group %d: stored = %d, counted = %lu\n", -+ i, le16_to_cpu(gdp->bg_free_inodes_count), x); -+ bitmap_count += x; -+ } -+ brelse(bitmap_bh); -+ printk("ext3cow_count_free_inodes: stored = %u, computed = %lu, %lu\n", -+ le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count); -+ return desc_count; -+#else -+ desc_count = 0; -+ for (i = 0; i < EXT3COW_SB(sb)->s_groups_count; i++) { -+ gdp = ext3cow_get_group_desc (sb, i, NULL); -+ if (!gdp) -+ continue; -+ desc_count += le16_to_cpu(gdp->bg_free_inodes_count); -+ cond_resched(); -+ } -+ return desc_count; -+#endif -+} -+ -+/* Called at mount-time, super-block is locked */ -+unsigned long ext3cow_count_dirs (struct super_block * sb) -+{ -+ unsigned long count = 0; -+ int i; -+ -+ for (i = 0; i < EXT3COW_SB(sb)->s_groups_count; i++) { -+ struct ext3cow_group_desc *gdp = ext3cow_get_group_desc (sb, i, NULL); -+ if (!gdp) -+ continue; -+ count += le16_to_cpu(gdp->bg_used_dirs_count); -+ } -+ return count; -+} -+ -diff -Naur linux-2.6.21.7/fs/ext3cow/inode.c linux-2.6.21.7_ext3cowPatched/fs/ext3cow/inode.c ---- linux-2.6.21.7/fs/ext3cow/inode.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.21.7_ext3cowPatched/fs/ext3cow/inode.c 2007-10-23 17:47:18.000000000 +0200 -@@ -0,0 +1,3474 @@ -+/* -+ * linux/fs/ext3cow/inode.c -+ * -+ * Copyright (C) 1992, 1993, 1994, 1995 -+ * Remy Card (card@masi.ibp.fr) -+ * Laboratoire MASI - Institut Blaise Pascal -+ * Universite Pierre et Marie Curie (Paris VI) -+ * -+ * from -+ * -+ * linux/fs/minix/inode.c -+ * -+ * Copyright (C) 1991, 1992 Linus Torvalds -+ * -+ * Goal-directed block allocation by Stephen Tweedie -+ * (sct@redhat.com), 1993, 1998 -+ * Big-endian to little-endian byte-swapping/bitmaps by -+ * David S. Miller (davem@caip.rutgers.edu), 1995 -+ * 64-bit file support on 64-bit platforms by Jakub Jelinek -+ * (jj@sunsite.ms.mff.cuni.cz) -+ * -+ * Assorted race fixes, rewrite of ext3cow_get_block() by Al Viro, 2000 -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "xattr.h" -+#include "acl.h" -+ -+static int ext3cow_writepage_trans_blocks(struct inode *inode); -+ -+/* -+ * Test whether an inode is a fast symlink. -+ */ -+static int ext3cow_inode_is_fast_symlink(struct inode *inode) -+{ -+ int ea_blocks = EXT3COW_I(inode)->i_file_acl ? -+ (inode->i_sb->s_blocksize >> 9) : 0; -+ -+ return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0); -+} -+ -+/* -+ * The ext3cow forget function must perform a revoke if we are freeing data -+ * which has been journaled. Metadata (eg. indirect blocks) must be -+ * revoked in all cases. -+ * -+ * "bh" may be NULL: a metadata block may have been freed from memory -+ * but there may still be a record of it in the journal, and that record -+ * still needs to be revoked. -+ */ -+int ext3cow_forget(handle_t *handle, int is_metadata, struct inode *inode, -+ struct buffer_head *bh, ext3cow_fsblk_t blocknr) -+{ -+ int err; -+ -+ might_sleep(); -+ -+ BUFFER_TRACE(bh, "enter"); -+ -+ jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, " -+ "data mode %lx\n", -+ bh, is_metadata, inode->i_mode, -+ test_opt(inode->i_sb, DATA_FLAGS)); -+ -+ /* Never use the revoke function if we are doing full data -+ * journaling: there is no need to, and a V1 superblock won't -+ * support it. Otherwise, only skip the revoke on un-journaled -+ * data blocks. */ -+ -+ if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3COW_MOUNT_JOURNAL_DATA || -+ (!is_metadata && !ext3cow_should_journal_data(inode))) { -+ if (bh) { -+ BUFFER_TRACE(bh, "call journal_forget"); -+ return ext3cow_journal_forget(handle, bh); -+ } -+ return 0; -+ } -+ -+ /* -+ * data!=journal && (is_metadata || should_journal_data(inode)) -+ */ -+ BUFFER_TRACE(bh, "call ext3cow_journal_revoke"); -+ err = ext3cow_journal_revoke(handle, blocknr, bh); -+ if (err) -+ ext3cow_abort(inode->i_sb, __FUNCTION__, -+ "error %d when attempting revoke", err); -+ BUFFER_TRACE(bh, "exit"); -+ return err; -+} -+ -+/* -+ * Work out how many blocks we need to proceed with the next chunk of a -+ * truncate transaction. -+ */ -+static unsigned long blocks_for_truncate(struct inode *inode) -+{ -+ unsigned long needed; -+ -+ needed = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9); -+ -+ /* Give ourselves just enough room to cope with inodes in which -+ * i_blocks is corrupt: we've seen disk corruptions in the past -+ * which resulted in random data in an inode which looked enough -+ * like a regular file for ext3cow to try to delete it. Things -+ * will go a bit crazy if that happens, but at least we should -+ * try not to panic the whole kernel. */ -+ if (needed < 2) -+ needed = 2; -+ -+ /* But we need to bound the transaction so we don't overflow the -+ * journal. */ -+ if (needed > EXT3COW_MAX_TRANS_DATA) -+ needed = EXT3COW_MAX_TRANS_DATA; -+ -+ return EXT3COW_DATA_TRANS_BLOCKS(inode->i_sb) + needed; -+} -+ -+/* -+ * Truncate transactions can be complex and absolutely huge. So we need to -+ * be able to restart the transaction at a conventient checkpoint to make -+ * sure we don't overflow the journal. -+ * -+ * start_transaction gets us a new handle for a truncate transaction, -+ * and extend_transaction tries to extend the existing one a bit. If -+ * extend fails, we need to propagate the failure up and restart the -+ * transaction in the top-level truncate loop. --sct -+ */ -+static handle_t *start_transaction(struct inode *inode) -+{ -+ handle_t *result; -+ -+ result = ext3cow_journal_start(inode, blocks_for_truncate(inode)); -+ if (!IS_ERR(result)) -+ return result; -+ -+ ext3cow_std_error(inode->i_sb, PTR_ERR(result)); -+ return result; -+} -+ -+/* -+ * Try to extend this transaction for the purposes of truncation. -+ * -+ * Returns 0 if we managed to create more room. If we can't create more -+ * room, and the transaction must be restarted we return 1. -+ */ -+static int try_to_extend_transaction(handle_t *handle, struct inode *inode) -+{ -+ if (handle->h_buffer_credits > EXT3COW_RESERVE_TRANS_BLOCKS) -+ return 0; -+ if (!ext3cow_journal_extend(handle, blocks_for_truncate(inode))) -+ return 0; -+ return 1; -+} -+ -+/* -+ * Restart the transaction associated with *handle. This does a commit, -+ * so before we call here everything must be consistently dirtied against -+ * this transaction. -+ */ -+static int ext3cow_journal_test_restart(handle_t *handle, struct inode *inode) -+{ -+ jbd_debug(2, "restarting handle %p\n", handle); -+ return ext3cow_journal_restart(handle, blocks_for_truncate(inode)); -+} -+ -+/* -+ * Called at the last iput() if i_nlink is zero. -+ */ -+void ext3cow_delete_inode (struct inode * inode) -+{ -+ handle_t *handle; -+ -+ truncate_inode_pages(&inode->i_data, 0); -+ -+ if (is_bad_inode(inode)) -+ goto no_delete; -+ -+ handle = start_transaction(inode); -+ if (IS_ERR(handle)) { -+ /* -+ * If we're going to skip the normal cleanup, we still need to -+ * make sure that the in-core orphan linked list is properly -+ * cleaned up. -+ */ -+ ext3cow_orphan_del(NULL, inode); -+ goto no_delete; -+ } -+ -+ if (IS_SYNC(inode)) -+ handle->h_sync = 1; -+ inode->i_size = 0; -+ if (inode->i_blocks) -+ ext3cow_truncate(inode); -+ /* -+ * Kill off the orphan record which ext3cow_truncate created. -+ * AKPM: I think this can be inside the above `if'. -+ * Note that ext3cow_orphan_del() has to be able to cope with the -+ * deletion of a non-existent orphan - this is because we don't -+ * know if ext3cow_truncate() actually created an orphan record. -+ * (Well, we could do this if we need to, but heck - it works) -+ */ -+ ext3cow_orphan_del(handle, inode); -+ EXT3COW_I(inode)->i_dtime = get_seconds(); -+ -+ /* -+ * One subtle ordering requirement: if anything has gone wrong -+ * (transaction abort, IO errors, whatever), then we can still -+ * do these next steps (the fs will already have been marked as -+ * having errors), but we can't free the inode if the mark_dirty -+ * fails. -+ */ -+ if (ext3cow_mark_inode_dirty(handle, inode)) -+ /* If that failed, just do the required in-core inode clear. */ -+ clear_inode(inode); -+ else -+ ext3cow_free_inode(handle, inode); -+ ext3cow_journal_stop(handle); -+ return; -+no_delete: -+ clear_inode(inode); /* We must guarantee clearing of inode... */ -+} -+ -+typedef struct { -+ __le32 *p; -+ __le32 key; -+ struct buffer_head *bh; -+} Indirect; -+ -+static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v) -+{ -+ p->key = *(p->p = v); -+ p->bh = bh; -+} -+ -+static int verify_chain(Indirect *from, Indirect *to) -+{ -+ while (from <= to && from->key == *from->p) -+ from++; -+ return (from > to); -+} -+ -+//TODO: Delete at some point -+/* znjp - used for bitmap testing */ -+ -+ static void printbin(u32 val, int size) { -+ u32 mask; -+ -+ mask=(1UL << (size-1)); -+ while (mask) { -+ if (mask & val) -+ printk("1"); -+ else -+ printk("0"); -+ mask /= 2; -+ } -+ printk("\n"); -+ -+ } -+ -+ -+/** -+ * ext3cow_block_to_path - parse the block number into array of offsets -+ * @inode: inode in question (we are only interested in its superblock) -+ * @i_block: block number to be parsed -+ * @offsets: array to store the offsets in -+ * @boundary: set this non-zero if the referred-to block is likely to be -+ * followed (on disk) by an indirect block. -+ * -+ * To store the locations of file's data ext3cow uses a data structure common -+ * for UNIX filesystems - tree of pointers anchored in the inode, with -+ * data blocks at leaves and indirect blocks in intermediate nodes. -+ * This function translates the block number into path in that tree - -+ * return value is the path length and @offsets[n] is the offset of -+ * pointer to (n+1)th node in the nth one. If @block is out of range -+ * (negative or too large) warning is printed and zero returned. -+ * -+ * Note: function doesn't find node addresses, so no IO is needed. All -+ * we need to know is the capacity of indirect blocks (taken from the -+ * inode->i_sb). -+ */ -+ -+/* -+ * Portability note: the last comparison (check that we fit into triple -+ * indirect block) is spelled differently, because otherwise on an -+ * architecture with 32-bit longs and 8Kb pages we might get into trouble -+ * if our filesystem had 8Kb blocks. We might use long long, but that would -+ * kill us on x86. Oh, well, at least the sign propagation does not matter - -+ * i_block would have to be negative in the very beginning, so we would not -+ * get there at all. -+ */ -+ -+static int ext3cow_block_to_path(struct inode *inode, -+ long i_block, int offsets[4], int *boundary) -+{ -+ /* TODO: Check for efficientcy -znjp */ -+ int ptrs = EXT3COW_ADDR_PER_BLOCK(inode->i_sb); -+ const long direct_blocks = EXT3COW_NDIR_BLOCKS, -+ indirect_blocks = ptrs, -+ double_blocks = (ptrs * ptrs); -+ //double_blocks = (1 << (ptrs_bits * 2)); -+ int n = 0; -+ int final = 0; -+ -+ if (i_block < 0) { -+ ext3cow_warning (inode->i_sb, "ext3cow_block_to_path", "block < 0"); -+ } else if (i_block < direct_blocks) { -+ offsets[n++] = i_block; -+ final = direct_blocks; -+ } else if ( (i_block -= direct_blocks) < indirect_blocks) { -+ offsets[n++] = EXT3COW_IND_BLOCK; -+ offsets[n++] = i_block; -+ final = ptrs; -+ } else if ((i_block -= indirect_blocks) < double_blocks) { -+ offsets[n++] = EXT3COW_DIND_BLOCK; -+ offsets[n++] = (i_block/ptrs); //i_block >> ptrs_bits; -+ offsets[n++] = (i_block%ptrs); //i_block & (ptrs - 1); -+ final = ptrs; -+ } else if (((i_block -= double_blocks)/(double_blocks)) < ptrs) { -+ // } else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) { -+ offsets[n++] = EXT3COW_TIND_BLOCK; -+ offsets[n++] = (i_block/double_blocks); //i_block >> (ptrs_bits * 2); -+ offsets[n++] = (i_block/double_blocks)%ptrs; //(i_block >> ptrs_bits) & (ptrs - 1); -+ offsets[n++] = i_block%ptrs; //i_block & (ptrs - 1); -+ final = ptrs; -+ } else { -+ ext3cow_warning(inode->i_sb, "ext3cow_block_to_path", "block > big"); -+ } -+ if (boundary) -+ *boundary = final - 1 - (i_block & (ptrs - 1)); -+ return n; -+} -+ -+/** -+ * ext3cow_get_branch - read the chain of indirect blocks leading to data -+ * @inode: inode in question -+ * @depth: depth of the chain (1 - direct pointer, etc.) -+ * @offsets: offsets of pointers in inode/indirect blocks -+ * @chain: place to store the result -+ * @err: here we store the error value -+ * -+ * Function fills the array of triples and returns %NULL -+ * if everything went OK or the pointer to the last filled triple -+ * (incomplete one) otherwise. Upon the return chain[i].key contains -+ * the number of (i+1)-th block in the chain (as it is stored in memory, -+ * i.e. little-endian 32-bit), chain[i].p contains the address of that -+ * number (it points into struct inode for i==0 and into the bh->b_data -+ * for i>0) and chain[i].bh points to the buffer_head of i-th indirect -+ * block for i>0 and NULL for i==0. In other words, it holds the block -+ * numbers of the chain, addresses they were taken from (and where we can -+ * verify that chain did not change) and buffer_heads hosting these -+ * numbers. -+ * -+ * Function stops when it stumbles upon zero pointer (absent block) -+ * (pointer to last triple returned, *@err == 0) -+ * or when it gets an IO error reading an indirect block -+ * (ditto, *@err == -EIO) -+ * or when it notices that chain had been changed while it was reading -+ * (ditto, *@err == -EAGAIN) -+ * or when it reads all @depth-1 indirect blocks successfully and finds -+ * the whole chain, all way to the data (returns %NULL, *err == 0). -+ * If this is COW we set the cow field to 1. We know if it's COW -+ * because there will already be a key. We need this field so we -+ * zero out the data already in the buffer. -+ * The create flag let's us know if were just looking for a block -+ * to read, or a block to write. We only set the bitmap when -+ * we're looking for a block to write, either on new allocation -+ * or on COWing. -znjp -+ */ -+static Indirect *ext3cow_get_branch(struct inode *inode, int depth, -+ int *offsets, -+ Indirect chain[4], int *err, int *cow, -+ int create) -+{ -+ struct super_block *sb = inode->i_sb; -+ Indirect *p = chain; -+ struct buffer_head *bh; -+ u32* bitmap_w; -+ int ptrs = EXT3COW_ADDR_PER_BLOCK(inode->i_sb); -+ int nbitsperword = (sizeof(u32) * 8); -+ -+ *err = 0; -+ *cow = 0; -+ /* i_data is not going away, no lock needed */ -+ add_chain (chain, NULL, EXT3COW_I(inode)->i_data + *offsets); -+ if (!p->key){ -+ /* Set the bitmap on allocation - znjp */ -+ if(create) -+ EXT3COW_I(inode)->i_cow_bitmap |= (1UL << *offsets); -+ goto no_block; -+ } -+ -+ /* Are we COWing any direct blocks? -znjp */ -+ if(create && !(EXT3COW_I(inode)->i_cow_bitmap & (1UL << *offsets))){ -+ printk(KERN_INFO "COWing direct block\n"); -+ *(p->p) = 0; -+ p->key = 0; -+ /* Set the bitamp when COWing -znjp */ -+ EXT3COW_I(inode)->i_cow_bitmap |= (1UL << *offsets); -+ *cow = 1; -+ goto no_block; -+ } -+ -+ while (--depth) { -+ bh = sb_bread(sb, le32_to_cpu(p->key)); -+ if (!bh) -+ goto failure; -+ -+ /* Reader: pointers */ -+ if (!verify_chain(chain, p)) -+ goto changed; -+ add_chain(++p, bh, (__le32*)bh->b_data + *++offsets); -+ /* Reader: end */ -+ /* Find correct bitamp word */ -+ bitmap_w = (u32*)bh->b_data + ptrs + (*offsets/nbitsperword); -+ if (!p->key){ -+ /* Set the bitmap when allocating -znjp */ -+ if(create) -+ *bitmap_w = (u32)*bitmap_w | (u32)(1UL << (int)(*offsets%nbitsperword)); -+ goto no_block; -+ } -+ -+ /* Are we COWing any indirect blocks? -znjp */ -+ if(create && !((1UL << (int)(*offsets%nbitsperword)) & -+ le32_to_cpu((u32)*bitmap_w))){ -+ printk(KERN_INFO "COWing indirect block\n"); -+ *(p->p) = 0; -+ p->key = 0; -+ /* Set the bitmap -znjp */ -+ *bitmap_w = (u32)*bitmap_w | (u32)(1UL << (int)(*offsets%nbitsperword)); -+ *cow = 1; -+ goto no_block; -+ } -+ } -+ return NULL; -+ -+changed: -+ brelse(bh); -+ *err = -EAGAIN; -+ goto no_block; -+failure: -+ *err = -EIO; -+no_block: -+ return p; -+} -+ -+/** -+ * ext3cow_find_near - find a place for allocation with sufficient locality -+ * @inode: owner -+ * @ind: descriptor of indirect block. -+ * -+ * This function returns the prefered place for block allocation. -+ * It is used when heuristic for sequential allocation fails. -+ * Rules are: -+ * + if there is a block to the left of our position - allocate near it. -+ * + if pointer will live in indirect block - allocate near that block. -+ * + if pointer will live in inode - allocate in the same -+ * cylinder group. -+ * -+ * In the latter case we colour the starting block by the callers PID to -+ * prevent it from clashing with concurrent allocations for a different inode -+ * in the same block group. The PID is used here so that functionally related -+ * files will be close-by on-disk. -+ * -+ * Caller must make sure that @ind is valid and will stay that way. -+ */ -+static ext3cow_fsblk_t ext3cow_find_near(struct inode *inode, Indirect *ind) -+{ -+ struct ext3cow_inode_info *ei = EXT3COW_I(inode); -+ __le32 *start = ind->bh ? (__le32*) ind->bh->b_data : ei->i_data; -+ __le32 *p; -+ ext3cow_fsblk_t bg_start; -+ ext3cow_grpblk_t colour; -+ -+ /* Try to find previous block */ -+ for (p = ind->p - 1; p >= start; p--) { -+ if (*p) -+ return le32_to_cpu(*p); -+ } -+ -+ /* No such thing, so let's try location of indirect block */ -+ if (ind->bh) -+ return ind->bh->b_blocknr; -+ -+ /* -+ * It is going to be referred to from the inode itself? OK, just put it -+ * into the same cylinder group then. -+ */ -+ bg_start = ext3cow_group_first_block_no(inode->i_sb, ei->i_block_group); -+ colour = (current->pid % 16) * -+ (EXT3COW_BLOCKS_PER_GROUP(inode->i_sb) / 16); -+ return bg_start + colour; -+} -+ -+/** -+ * ext3cow_find_goal - find a prefered place for allocation. -+ * @inode: owner -+ * @block: block we want -+ * @chain: chain of indirect blocks -+ * @partial: pointer to the last triple within a chain -+ * @goal: place to store the result. -+ * -+ * Normally this function find the prefered place for block allocation, -+ * stores it in *@goal and returns zero. -+ */ -+ -+static ext3cow_fsblk_t ext3cow_find_goal(struct inode *inode, long block, -+ Indirect chain[4], Indirect *partial) -+{ -+ struct ext3cow_block_alloc_info *block_i; -+ -+ block_i = EXT3COW_I(inode)->i_block_alloc_info; -+ -+ /* -+ * try the heuristic for sequential allocation, -+ * failing that at least try to get decent locality. -+ */ -+ if (block_i && (block == block_i->last_alloc_logical_block + 1) -+ && (block_i->last_alloc_physical_block != 0)) { -+ return block_i->last_alloc_physical_block + 1; -+ } -+ -+ return ext3cow_find_near(inode, partial); -+} -+ -+/** -+ * ext3cow_blks_to_allocate: Look up the block map and count the number -+ * of direct blocks need to be allocated for the given branch. -+ * -+ * @branch: chain of indirect blocks -+ * @k: number of blocks need for indirect blocks -+ * @blks: number of data blocks to be mapped. -+ * @blocks_to_boundary: the offset in the indirect block -+ * -+ * return the total number of blocks to be allocate, including the -+ * direct and indirect blocks. -+ */ -+static int ext3cow_blks_to_allocate(Indirect *branch, int k, unsigned long blks, -+ int blocks_to_boundary) -+{ -+ unsigned long count = 0; -+ -+ /* -+ * Simple case, [t,d]Indirect block(s) has not allocated yet -+ * then it's clear blocks on that path have not allocated -+ */ -+ if (k > 0) { -+ /* right now we don't handle cross boundary allocation */ -+ if (blks < blocks_to_boundary + 1) -+ count += blks; -+ else -+ count += blocks_to_boundary + 1; -+ return count; -+ } -+ -+ count++; -+ while (count < blks && count <= blocks_to_boundary && -+ le32_to_cpu(*(branch[0].p + count)) == 0) { -+ count++; -+ } -+ return count; -+} -+ -+/** -+ * ext3cow_alloc_blocks: multiple allocate blocks needed for a branch -+ * @indirect_blks: the number of blocks need to allocate for indirect -+ * blocks -+ * -+ * @new_blocks: on return it will store the new block numbers for -+ * the indirect blocks(if needed) and the first direct block, -+ * @blks: on return it will store the total number of allocated -+ * direct blocks -+ */ -+static int ext3cow_alloc_blocks(handle_t *handle, struct inode *inode, -+ ext3cow_fsblk_t goal, int indirect_blks, int blks, -+ ext3cow_fsblk_t new_blocks[4], int *err) -+{ -+ int target, i; -+ unsigned long count = 0; -+ int index = 0; -+ ext3cow_fsblk_t current_block = 0; -+ int ret = 0; -+ -+ /* -+ * Here we try to allocate the requested multiple blocks at once, -+ * on a best-effort basis. -+ * To build a branch, we should allocate blocks for -+ * the indirect blocks(if not allocated yet), and at least -+ * the first direct block of this branch. That's the -+ * minimum number of blocks need to allocate(required) -+ */ -+ target = blks + indirect_blks; -+ -+ while (1) { -+ count = target; -+ /* allocating blocks for indirect blocks and direct blocks */ -+ current_block = ext3cow_new_blocks(handle,inode,goal,&count,err); -+ if (*err) -+ goto failed_out; -+ -+ target -= count; -+ /* allocate blocks for indirect blocks */ -+ while (index < indirect_blks && count) { -+ new_blocks[index++] = current_block++; -+ count--; -+ } -+ -+ if (count > 0) -+ break; -+ } -+ -+ /* save the new block number for the first direct block */ -+ new_blocks[index] = current_block; -+ -+ /* total number of blocks allocated for direct blocks */ -+ ret = count; -+ *err = 0; -+ return ret; -+failed_out: -+ for (i = 0; i key). Upon the exit we have the same -+ * picture as after the successful ext3cow_get_block(), except that in one -+ * place chain is disconnected - *branch->p is still zero (we did not -+ * set the last link), but branch->key contains the number that should -+ * be placed into *branch->p to fill that gap. -+ * -+ * If allocation fails we free all blocks we've allocated (and forget -+ * their buffer_heads) and return the error value the from failed -+ * ext3cow_alloc_block() (normally -ENOSPC). Otherwise we set the chain -+ * as described above and return 0. -+ */ -+static int ext3cow_alloc_branch(handle_t *handle, struct inode *inode, -+ int indirect_blks, int *blks, ext3cow_fsblk_t goal, -+ int *offsets, Indirect *branch) -+{ -+ int blocksize = inode->i_sb->s_blocksize; -+ int i, n = 0; -+ int err = 0; -+ struct buffer_head *bh; -+ int num; -+ ext3cow_fsblk_t new_blocks[4]; -+ ext3cow_fsblk_t current_block; -+ -+ num = ext3cow_alloc_blocks(handle, inode, goal, indirect_blks, -+ *blks, new_blocks, &err); -+ if (err) -+ return err; -+ -+ branch[0].key = cpu_to_le32(new_blocks[0]); -+ /* -+ * metadata blocks and data blocks are allocated. -+ */ -+ for (n = 1; n <= indirect_blks; n++) { -+ /* -+ * Get buffer_head for parent block, zero it out -+ * and set the pointer to new one, then send -+ * parent to disk. -+ */ -+ bh = sb_getblk(inode->i_sb, new_blocks[n-1]); -+ branch[n].bh = bh; -+ lock_buffer(bh); -+ BUFFER_TRACE(bh, "call get_create_access"); -+ err = ext3cow_journal_get_create_access(handle, bh); -+ if (err) { -+ unlock_buffer(bh); -+ brelse(bh); -+ goto failed; -+ } -+ -+ memset(bh->b_data, 0, blocksize); -+ branch[n].p = (__le32 *) bh->b_data + offsets[n]; -+ branch[n].key = cpu_to_le32(new_blocks[n]); -+ *branch[n].p = branch[n].key; -+ if ( n == indirect_blks) { -+ current_block = new_blocks[n]; -+ /* -+ * End of chain, update the last new metablock of -+ * the chain to point to the new allocated -+ * data blocks numbers -+ */ -+ for (i=1; i < num; i++) -+ *(branch[n].p + i) = cpu_to_le32(++current_block); -+ } -+ BUFFER_TRACE(bh, "marking uptodate"); -+ set_buffer_uptodate(bh); -+ unlock_buffer(bh); -+ -+ BUFFER_TRACE(bh, "call ext3cow_journal_dirty_metadata"); -+ err = ext3cow_journal_dirty_metadata(handle, bh); -+ if (err) -+ goto failed; -+ } -+ *blks = num; -+ return err; -+failed: -+ /* Allocation failed, free what we already allocated */ -+ for (i = 1; i <= n ; i++) { -+ BUFFER_TRACE(branch[i].bh, "call journal_forget"); -+ ext3cow_journal_forget(handle, branch[i].bh); -+ } -+ for (i = 0; i i_blocks, etc.). In case of success we end up with the full -+ * chain to new block and return 0. -+ */ -+static int ext3cow_splice_branch(handle_t *handle, struct inode *inode, -+ long block, Indirect *where, int num, int blks) -+{ -+ int i; -+ int err = 0; -+ struct ext3cow_block_alloc_info *block_i; -+ ext3cow_fsblk_t current_block; -+ -+ block_i = EXT3COW_I(inode)->i_block_alloc_info; -+ /* -+ * If we're splicing into a [td]indirect block (as opposed to the -+ * inode) then we need to get write access to the [td]indirect block -+ * before the splice. -+ */ -+ if (where->bh) { -+ BUFFER_TRACE(where->bh, "get_write_access"); -+ err = ext3cow_journal_get_write_access(handle, where->bh); -+ if (err) -+ goto err_out; -+ } -+ /* That's it */ -+ -+ *where->p = where->key; -+ -+ /* -+ * Update the host buffer_head or inode to point to more just allocated -+ * direct blocks blocks -+ */ -+ if (num == 0 && blks > 1) { -+ current_block = le32_to_cpu(where->key) + 1; -+ for (i = 1; i < blks; i++) -+ *(where->p + i ) = cpu_to_le32(current_block++); -+ } -+ -+ /* -+ * update the most recently allocated logical & physical block -+ * in i_block_alloc_info, to assist find the proper goal block for next -+ * allocation -+ */ -+ if (block_i) { -+ block_i->last_alloc_logical_block = block + blks - 1; -+ block_i->last_alloc_physical_block = -+ le32_to_cpu(where[num].key) + blks - 1; -+ } -+ -+ /* We are done with atomic stuff, now do the rest of housekeeping */ -+ -+ inode->i_ctime = CURRENT_TIME_SEC; -+ ext3cow_mark_inode_dirty(handle, inode); -+ -+ /* had we spliced it onto indirect block? */ -+ if (where->bh) { -+ /* -+ * If we spliced it onto an indirect block, we haven't -+ * altered the inode. Note however that if it is being spliced -+ * onto an indirect block at the very end of the file (the -+ * file is growing) then we *will* alter the inode to reflect -+ * the new i_size. But that is not done here - it is done in -+ * generic_commit_write->__mark_inode_dirty->ext3cow_dirty_inode. -+ */ -+ jbd_debug(5, "splicing indirect only\n"); -+ BUFFER_TRACE(where->bh, "call ext3cow_journal_dirty_metadata"); -+ err = ext3cow_journal_dirty_metadata(handle, where->bh); -+ if (err) -+ goto err_out; -+ } else { -+ /* -+ * OK, we spliced it into the inode itself on a direct block. -+ * Inode was dirtied above. -+ */ -+ jbd_debug(5, "splicing direct\n"); -+ } -+ return err; -+ -+err_out: -+ for (i = 1; i <= num; i++) { -+ BUFFER_TRACE(where[i].bh, "call journal_forget"); -+ ext3cow_journal_forget(handle, where[i].bh); -+ ext3cow_free_blocks(handle,inode,le32_to_cpu(where[i-1].key),1); -+ } -+ ext3cow_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks); -+ -+ return err; -+} -+ -+/* -+ * Allocation strategy is simple: if we have to allocate something, we will -+ * have to go the whole way to leaf. So let's do it before attaching anything -+ * to tree, set linkage between the newborn blocks, write them if sync is -+ * required, recheck the path, free and repeat if check fails, otherwise -+ * set the last missing link (that will protect us from any truncate-generated -+ * removals - all blocks on the path are immune now) and possibly force the -+ * write on the parent block. -+ * That has a nice additional property: no special recovery from the failed -+ * allocations is needed - we simply release blocks and do not touch anything -+ * reachable from inode. -+ * -+ * `handle' can be NULL if create == 0. -+ * -+ * The BKL may not be held on entry here. Be sure to take it early. -+ * return > 0, # of blocks mapped or allocated. -+ * return = 0, if plain lookup failed. -+ * return < 0, error case. -+ */ -+int ext3cow_get_blocks_handle(handle_t *handle, struct inode *inode, -+ sector_t iblock, unsigned long maxblocks, -+ struct buffer_head *bh_result, -+ int create, int extend_disksize) -+{ -+ int err = -EIO; -+ int offsets[4]; -+ Indirect chain[4]; -+ Indirect *partial; -+ ext3cow_fsblk_t goal; -+ int indirect_blks; -+ int blocks_to_boundary = 0; -+ int depth; -+ struct ext3cow_inode_info *ei = EXT3COW_I(inode); -+ int count = 0; -+ ext3cow_fsblk_t first_block = 0; -+ int cow = 0; /* To determine wether we clear the buffer of not -znjp */ -+ -+ -+ J_ASSERT(handle != NULL || create == 0); -+ depth = ext3cow_block_to_path(inode,iblock,offsets,&blocks_to_boundary); -+ -+ if (depth == 0) -+ goto out; -+ -+ partial = ext3cow_get_branch(inode, depth, offsets, -+ chain, &err, &cow, create); -+ -+ /* Simplest case - block found, no allocation needed */ -+ if (!partial) { -+ first_block = le32_to_cpu(chain[depth - 1].key); -+ if(!cow) /* Don't clear the buffer if it's a COW allocation -znjp */ -+ clear_buffer_new(bh_result); -+ count++; -+ /*map more blocks*/ -+ while (count < maxblocks && count <= blocks_to_boundary) { -+ ext3cow_fsblk_t blk; -+ -+ if (!verify_chain(chain, partial)) { -+ /* -+ * Indirect block might be removed by -+ * truncate while we were reading it. -+ * Handling of that case: forget what we've -+ * got now. Flag the err as EAGAIN, so it -+ * will reread. -+ */ -+ err = -EAGAIN; -+ count = 0; -+ break; -+ } -+ blk = le32_to_cpu(*(chain[depth-1].p + count)); -+ -+ if (blk == first_block + count) -+ count++; -+ else -+ break; -+ } -+ if (err != -EAGAIN) -+ goto got_it; -+ } -+ -+ /* Next simple case - plain lookup or failed read of indirect block */ -+ if (!create || err == -EIO) -+ goto cleanup; -+ -+ mutex_lock(&ei->truncate_mutex); -+ -+ /* -+ * If the indirect block is missing while we are reading -+ * the chain(ext3cow_get_branch() returns -EAGAIN err), or -+ * if the chain has been changed after we grab the semaphore, -+ * (either because another process truncated this branch, or -+ * another get_block allocated this branch) re-grab the chain to see if -+ * the request block has been allocated or not. -+ * -+ * Since we already block the truncate/other get_block -+ * at this point, we will have the current copy of the chain when we -+ * splice the branch into the tree. -+ */ -+ if (err == -EAGAIN || !verify_chain(chain, partial)) { -+ while (partial > chain) { -+ brelse(partial->bh); -+ partial--; -+ } -+ partial = ext3cow_get_branch(inode, depth, offsets, -+ chain, &err, &cow, create); -+ if (!partial) { -+ count++; -+ mutex_unlock(&ei->truncate_mutex); -+ if (err) -+ goto cleanup; -+ /* Don't clear the buffer if we're COWing it -znjp */ -+ if(!cow) -+ clear_buffer_new(bh_result); -+ goto got_it; -+ } -+ } -+ -+ /* -+ * Okay, we need to do block allocation. Lazily initialize the block -+ * allocation info here if necessary -+ */ -+ if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info)) -+ ext3cow_init_block_alloc_info(inode); -+ -+ goal = ext3cow_find_goal(inode, iblock, chain, partial); -+ -+ /* the number of blocks need to allocate for [d,t]indirect blocks */ -+ indirect_blks = (chain + depth) - partial - 1; -+ -+ /* -+ * Next look up the indirect map to count the totoal number of -+ * direct blocks to allocate for this branch. -+ */ -+ count = ext3cow_blks_to_allocate(partial, indirect_blks, -+ maxblocks, blocks_to_boundary); -+ /* -+ * Block out ext3cow_truncate while we alter the tree -+ */ -+ err = ext3cow_alloc_branch(handle, inode, indirect_blks, &count, goal, -+ offsets + (partial - chain), partial); -+ -+ /* -+ * The ext3cow_splice_branch call will free and forget any buffers -+ * on the new chain if there is a failure, but that risks using -+ * up transaction credits, especially for bitmaps where the -+ * credits cannot be returned. Can we handle this somehow? We -+ * may need to return -EAGAIN upwards in the worst case. --sct -+ */ -+ if (!err) -+ err = ext3cow_splice_branch(handle, inode, iblock, -+ partial, indirect_blks, count); -+ /* -+ * i_disksize growing is protected by truncate_mutex. Don't forget to -+ * protect it if you're about to implement concurrent -+ * ext3cow_get_block() -bzzz -+ */ -+ if (!err && extend_disksize && inode->i_size > ei->i_disksize) -+ ei->i_disksize = inode->i_size; -+ mutex_unlock(&ei->truncate_mutex); -+ if (err) -+ goto cleanup; -+ -+ set_buffer_new(bh_result); -+got_it: -+ map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key)); -+ if (count > blocks_to_boundary) -+ set_buffer_boundary(bh_result); -+ err = count; -+ /* Clean up and exit */ -+ partial = chain + depth - 1; /* the whole chain */ -+cleanup: -+ while (partial > chain) { -+ BUFFER_TRACE(partial->bh, "call brelse"); -+ brelse(partial->bh); -+ partial--; -+ } -+ BUFFER_TRACE(bh_result, "returned"); -+out: -+ return err; -+} -+ -+#define DIO_CREDITS (EXT3COW_RESERVE_TRANS_BLOCKS + 32) -+ -+static int ext3cow_get_block(struct inode *inode, sector_t iblock, -+ struct buffer_head *bh_result, int create) -+{ -+ handle_t *handle = journal_current_handle(); -+ int ret = 0; -+ unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; -+ -+ if (!create) -+ goto get_block; /* A read */ -+ -+ if (max_blocks == 1) -+ goto get_block; /* A single block get */ -+ -+ if (handle->h_transaction->t_state == T_LOCKED) { -+ /* -+ * Huge direct-io writes can hold off commits for long -+ * periods of time. Let this commit run. -+ */ -+ ext3cow_journal_stop(handle); -+ handle = ext3cow_journal_start(inode, DIO_CREDITS); -+ if (IS_ERR(handle)) -+ ret = PTR_ERR(handle); -+ goto get_block; -+ } -+ -+ if (handle->h_buffer_credits <= EXT3COW_RESERVE_TRANS_BLOCKS) { -+ /* -+ * Getting low on buffer credits... -+ */ -+ ret = ext3cow_journal_extend(handle, DIO_CREDITS); -+ if (ret > 0) { -+ /* -+ * Couldn't extend the transaction. Start a new one. -+ */ -+ ret = ext3cow_journal_restart(handle, DIO_CREDITS); -+ } -+ } -+ -+get_block: -+ if (ret == 0) { -+ ret = ext3cow_get_blocks_handle(handle, inode, iblock, -+ max_blocks, bh_result, create, 0); -+ if (ret > 0) { -+ bh_result->b_size = (ret << inode->i_blkbits); -+ ret = 0; -+ } -+ } -+ return ret; -+} -+ -+/* -+ * `handle' can be NULL if create is zero -+ */ -+struct buffer_head *ext3cow_getblk(handle_t *handle, struct inode *inode, -+ long block, int create, int *errp) -+{ -+ struct buffer_head dummy; -+ int fatal = 0, err; -+ -+ J_ASSERT(handle != NULL || create == 0); -+ -+ dummy.b_state = 0; -+ dummy.b_blocknr = -1000; -+ buffer_trace_init(&dummy.b_history); -+ err = ext3cow_get_blocks_handle(handle, inode, block, 1, -+ &dummy, create, 1); -+ /* -+ * ext3cow_get_blocks_handle() returns number of blocks -+ * mapped. 0 in case of a HOLE. -+ */ -+ if (err > 0) { -+ if (err > 1) -+ WARN_ON(1); -+ err = 0; -+ } -+ *errp = err; -+ if (!err && buffer_mapped(&dummy)) { -+ struct buffer_head *bh; -+ bh = sb_getblk(inode->i_sb, dummy.b_blocknr); -+ if (!bh) { -+ *errp = -EIO; -+ goto err; -+ } -+ if (buffer_new(&dummy)) { -+ J_ASSERT(create != 0); -+ J_ASSERT(handle != 0); -+ -+ /* -+ * Now that we do not always journal data, we should -+ * keep in mind whether this should always journal the -+ * new buffer as metadata. For now, regular file -+ * writes use ext3cow_get_block instead, so it's not a -+ * problem. -+ */ -+ lock_buffer(bh); -+ BUFFER_TRACE(bh, "call get_create_access"); -+ fatal = ext3cow_journal_get_create_access(handle, bh); -+ if (!fatal && !buffer_uptodate(bh)) { -+ memset(bh->b_data,0,inode->i_sb->s_blocksize); -+ set_buffer_uptodate(bh); -+ } -+ unlock_buffer(bh); -+ BUFFER_TRACE(bh, "call ext3cow_journal_dirty_metadata"); -+ err = ext3cow_journal_dirty_metadata(handle, bh); -+ if (!fatal) -+ fatal = err; -+ } else { -+ BUFFER_TRACE(bh, "not a new buffer"); -+ } -+ if (fatal) { -+ *errp = fatal; -+ brelse(bh); -+ bh = NULL; -+ } -+ return bh; -+ } -+err: -+ return NULL; -+} -+ -+struct buffer_head *ext3cow_bread(handle_t *handle, struct inode *inode, -+ int block, int create, int *err) -+{ -+ struct buffer_head * bh; -+ -+ bh = ext3cow_getblk(handle, inode, block, create, err); -+ if (!bh) -+ return bh; -+ if (buffer_uptodate(bh)) -+ return bh; -+ ll_rw_block(READ_META, 1, &bh); -+ wait_on_buffer(bh); -+ if (buffer_uptodate(bh)) -+ return bh; -+ put_bh(bh); -+ *err = -EIO; -+ return NULL; -+} -+ -+static int walk_page_buffers( handle_t *handle, -+ struct buffer_head *head, -+ unsigned from, -+ unsigned to, -+ int *partial, -+ int (*fn)( handle_t *handle, -+ struct buffer_head *bh)) -+{ -+ struct buffer_head *bh; -+ unsigned block_start, block_end; -+ unsigned blocksize = head->b_size; -+ int err, ret = 0; -+ struct buffer_head *next; -+ -+ for ( bh = head, block_start = 0; -+ ret == 0 && (bh != head || !block_start); -+ block_start = block_end, bh = next) -+ { -+ next = bh->b_this_page; -+ block_end = block_start + blocksize; -+ if (block_end <= from || block_start >= to) { -+ if (partial && !buffer_uptodate(bh)) -+ *partial = 1; -+ continue; -+ } -+ err = (*fn)(handle, bh); -+ if (!ret) -+ ret = err; -+ } -+ return ret; -+} -+ -+/* -+ * To preserve ordering, it is essential that the hole instantiation and -+ * the data write be encapsulated in a single transaction. We cannot -+ * close off a transaction and start a new one between the ext3cow_get_block() -+ * and the commit_write(). So doing the journal_start at the start of -+ * prepare_write() is the right place. -+ * -+ * Also, this function can nest inside ext3cow_writepage() -> -+ * block_write_full_page(). In that case, we *know* that ext3cow_writepage() -+ * has generated enough buffer credits to do the whole page. So we won't -+ * block on the journal in that case, which is good, because the caller may -+ * be PF_MEMALLOC. -+ * -+ * By accident, ext3cow can be reentered when a transaction is open via -+ * quota file writes. If we were to commit the transaction while thus -+ * reentered, there can be a deadlock - we would be holding a quota -+ * lock, and the commit would never complete if another thread had a -+ * transaction open and was blocking on the quota lock - a ranking -+ * violation. -+ * -+ * So what we do is to rely on the fact that journal_stop/journal_start -+ * will _not_ run commit under these circumstances because handle->h_ref -+ * is elevated. We'll still have enough credits for the tiny quotafile -+ * write. -+ */ -+static int do_journal_get_write_access(handle_t *handle, -+ struct buffer_head *bh) -+{ -+ if (!buffer_mapped(bh) || buffer_freed(bh)) -+ return 0; -+ return ext3cow_journal_get_write_access(handle, bh); -+} -+ -+/* -+ * The idea of this helper function is following: -+ * if prepare_write has allocated some blocks, but not all of them, the -+ * transaction must include the content of the newly allocated blocks. -+ * This content is expected to be set to zeroes by block_prepare_write(). -+ * 2006/10/14 SAW -+ */ -+static int ext3cow_prepare_failure(struct file *file, struct page *page, -+ unsigned from, unsigned to) -+{ -+ struct address_space *mapping; -+ struct buffer_head *bh, *head, *next; -+ unsigned block_start, block_end; -+ unsigned blocksize; -+ int ret; -+ handle_t *handle = ext3cow_journal_current_handle(); -+ -+ mapping = page->mapping; -+ if (ext3cow_should_writeback_data(mapping->host)) { -+ /* optimization: no constraints about data */ -+skip: -+ return ext3cow_journal_stop(handle); -+ } -+ -+ head = page_buffers(page); -+ blocksize = head->b_size; -+ for ( bh = head, block_start = 0; -+ bh != head || !block_start; -+ block_start = block_end, bh = next) -+ { -+ next = bh->b_this_page; -+ block_end = block_start + blocksize; -+ if (block_end <= from) -+ continue; -+ if (block_start >= to) { -+ block_start = to; -+ break; -+ } -+ if (!buffer_mapped(bh)) -+ /* prepare_write failed on this bh */ -+ break; -+ if (ext3cow_should_journal_data(mapping->host)) { -+ ret = do_journal_get_write_access(handle, bh); -+ if (ret) { -+ ext3cow_journal_stop(handle); -+ return ret; -+ } -+ } -+ /* -+ * block_start here becomes the first block where the current iteration -+ * of prepare_write failed. -+ */ -+ } -+ if (block_start <= from) -+ goto skip; -+ -+ /* commit allocated and zeroed buffers */ -+ return mapping->a_ops->commit_write(file, page, from, block_start); -+} -+ -+/* Used to quickly unmap all buffers in a page for COWing -znjp */ -+static int ext3cow_clear_buffer_mapped(handle_t *handle, -+ struct buffer_head *bh) -+{ -+ clear_buffer_mapped(bh); -+ return 0; -+} -+ -+static int ext3cow_prepare_write(struct file *file, struct page *page, -+ unsigned from, unsigned to) -+{ -+ struct inode *inode = page->mapping->host; -+ int ret, ret2; -+ int needed_blocks = ext3cow_writepage_trans_blocks(inode); -+ handle_t *handle; -+ int retries = 0; -+ -+retry: -+ handle = ext3cow_journal_start(inode, needed_blocks); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ /* Unset the BH_Mapped flag so get_block is always called -znjp */ -+ if(page_has_buffers(page)) -+ ret = walk_page_buffers(handle, page_buffers(page), -+ from, to, NULL, ext3cow_clear_buffer_mapped); -+ -+ if (test_opt(inode->i_sb, NOBH) && ext3cow_should_writeback_data(inode)) -+ ret = nobh_prepare_write(page, from, to, ext3cow_get_block); -+ else -+ ret = block_prepare_write(page, from, to, ext3cow_get_block); -+ if (ret) -+ goto failure; -+ -+ if (ext3cow_should_journal_data(inode)) { -+ ret = walk_page_buffers(handle, page_buffers(page), -+ from, to, NULL, do_journal_get_write_access); -+ if (ret) -+ /* fatal error, just put the handle and return */ -+ journal_stop(handle); -+ } -+ return ret; -+ -+failure: -+ ret2 = ext3cow_prepare_failure(file, page, from, to); -+ if (ret2 < 0) -+ return ret2; -+ if (ret == -ENOSPC && ext3cow_should_retry_alloc(inode->i_sb, &retries)) -+ goto retry; -+ /* retry number exceeded, or other error like -EDQUOT */ -+ return ret; -+} -+ -+int ext3cow_journal_dirty_data(handle_t *handle, struct buffer_head *bh) -+{ -+ int err = journal_dirty_data(handle, bh); -+ if (err) -+ ext3cow_journal_abort_handle(__FUNCTION__, __FUNCTION__, -+ bh, handle,err); -+ return err; -+} -+ -+/* For commit_write() in data=journal mode */ -+static int commit_write_fn(handle_t *handle, struct buffer_head *bh) -+{ -+ if (!buffer_mapped(bh) || buffer_freed(bh)) -+ return 0; -+ set_buffer_uptodate(bh); -+ return ext3cow_journal_dirty_metadata(handle, bh); -+} -+ -+/* -+ * We need to pick up the new inode size which generic_commit_write gave us -+ * `file' can be NULL - eg, when called from page_symlink(). -+ * -+ * ext3cow never places buffers on inode->i_mapping->private_list. metadata -+ * buffers are managed internally. -+ */ -+static int ext3cow_ordered_commit_write(struct file *file, struct page *page, -+ unsigned from, unsigned to) -+{ -+ handle_t *handle = ext3cow_journal_current_handle(); -+ struct inode *inode = page->mapping->host; -+ int ret = 0, ret2; -+ -+ ret = walk_page_buffers(handle, page_buffers(page), -+ from, to, NULL, ext3cow_journal_dirty_data); -+ -+ if (ret == 0) { -+ /* -+ * generic_commit_write() will run mark_inode_dirty() if i_size -+ * changes. So let's piggyback the i_disksize mark_inode_dirty -+ * into that. -+ */ -+ loff_t new_i_size; -+ -+ new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; -+ if (new_i_size > EXT3COW_I(inode)->i_disksize) -+ EXT3COW_I(inode)->i_disksize = new_i_size; -+ ret = generic_commit_write(file, page, from, to); -+ } -+ ret2 = ext3cow_journal_stop(handle); -+ if (!ret) -+ ret = ret2; -+ return ret; -+} -+ -+static int ext3cow_writeback_commit_write(struct file *file, struct page *page, -+ unsigned from, unsigned to) -+{ -+ handle_t *handle = ext3cow_journal_current_handle(); -+ struct inode *inode = page->mapping->host; -+ int ret = 0, ret2; -+ loff_t new_i_size; -+ -+ new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; -+ if (new_i_size > EXT3COW_I(inode)->i_disksize) -+ EXT3COW_I(inode)->i_disksize = new_i_size; -+ -+ if (test_opt(inode->i_sb, NOBH) && ext3cow_should_writeback_data(inode)) -+ ret = nobh_commit_write(file, page, from, to); -+ else -+ ret = generic_commit_write(file, page, from, to); -+ -+ ret2 = ext3cow_journal_stop(handle); -+ if (!ret) -+ ret = ret2; -+ return ret; -+} -+ -+static int ext3cow_journalled_commit_write(struct file *file, -+ struct page *page, unsigned from, unsigned to) -+{ -+ handle_t *handle = ext3cow_journal_current_handle(); -+ struct inode *inode = page->mapping->host; -+ int ret = 0, ret2; -+ int partial = 0; -+ loff_t pos; -+ -+ /* -+ * Here we duplicate the generic_commit_write() functionality -+ */ -+ pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; -+ -+ ret = walk_page_buffers(handle, page_buffers(page), from, -+ to, &partial, commit_write_fn); -+ if (!partial) -+ SetPageUptodate(page); -+ if (pos > inode->i_size) -+ i_size_write(inode, pos); -+ EXT3COW_I(inode)->i_state |= EXT3COW_STATE_JDATA; -+ if (inode->i_size > EXT3COW_I(inode)->i_disksize) { -+ EXT3COW_I(inode)->i_disksize = inode->i_size; -+ ret2 = ext3cow_mark_inode_dirty(handle, inode); -+ if (!ret) -+ ret = ret2; -+ } -+ ret2 = ext3cow_journal_stop(handle); -+ if (!ret) -+ ret = ret2; -+ return ret; -+} -+ -+/* -+ * bmap() is special. It gets used by applications such as lilo and by -+ * the swapper to find the on-disk block of a specific piece of data. -+ * -+ * Naturally, this is dangerous if the block concerned is still in the -+ * journal. If somebody makes a swapfile on an ext3cow data-journaling -+ * filesystem and enables swap, then they may get a nasty shock when the -+ * data getting swapped to that swapfile suddenly gets overwritten by -+ * the original zero's written out previously to the journal and -+ * awaiting writeback in the kernel's buffer cache. -+ * -+ * So, if we see any bmap calls here on a modified, data-journaled file, -+ * take extra steps to flush any blocks which might be in the cache. -+ */ -+static sector_t ext3cow_bmap(struct address_space *mapping, sector_t block) -+{ -+ struct inode *inode = mapping->host; -+ journal_t *journal; -+ int err; -+ -+ if (EXT3COW_I(inode)->i_state & EXT3COW_STATE_JDATA) { -+ /* -+ * This is a REALLY heavyweight approach, but the use of -+ * bmap on dirty files is expected to be extremely rare: -+ * only if we run lilo or swapon on a freshly made file -+ * do we expect this to happen. -+ * -+ * (bmap requires CAP_SYS_RAWIO so this does not -+ * represent an unprivileged user DOS attack --- we'd be -+ * in trouble if mortal users could trigger this path at -+ * will.) -+ * -+ * NB. EXT3COW_STATE_JDATA is not set on files other than -+ * regular files. If somebody wants to bmap a directory -+ * or symlink and gets confused because the buffer -+ * hasn't yet been flushed to disk, they deserve -+ * everything they get. -+ */ -+ -+ EXT3COW_I(inode)->i_state &= ~EXT3COW_STATE_JDATA; -+ journal = EXT3COW_JOURNAL(inode); -+ journal_lock_updates(journal); -+ err = journal_flush(journal); -+ journal_unlock_updates(journal); -+ -+ if (err) -+ return 0; -+ } -+ -+ return generic_block_bmap(mapping,block,ext3cow_get_block); -+} -+ -+static int bget_one(handle_t *handle, struct buffer_head *bh) -+{ -+ get_bh(bh); -+ return 0; -+} -+ -+static int bput_one(handle_t *handle, struct buffer_head *bh) -+{ -+ put_bh(bh); -+ return 0; -+} -+ -+static int journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh) -+{ -+ if (buffer_mapped(bh)) -+ return ext3cow_journal_dirty_data(handle, bh); -+ return 0; -+} -+ -+/* -+ * Note that we always start a transaction even if we're not journalling -+ * data. This is to preserve ordering: any hole instantiation within -+ * __block_write_full_page -> ext3cow_get_block() should be journalled -+ * along with the data so we don't crash and then get metadata which -+ * refers to old data. -+ * -+ * In all journalling modes block_write_full_page() will start the I/O. -+ * -+ * Problem: -+ * -+ * ext3cow_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() -> -+ * ext3cow_writepage() -+ * -+ * Similar for: -+ * -+ * ext3cow_file_write() -> generic_file_write() -> __alloc_pages() -> ... -+ * -+ * Same applies to ext3cow_get_block(). We will deadlock on various things like -+ * lock_journal and i_truncate_mutex. -+ * -+ * Setting PF_MEMALLOC here doesn't work - too many internal memory -+ * allocations fail. -+ * -+ * 16May01: If we're reentered then journal_current_handle() will be -+ * non-zero. We simply *return*. -+ * -+ * 1 July 2001: @@@ FIXME: -+ * In journalled data mode, a data buffer may be metadata against the -+ * current transaction. But the same file is part of a shared mapping -+ * and someone does a writepage() on it. -+ * -+ * We will move the buffer onto the async_data list, but *after* it has -+ * been dirtied. So there's a small window where we have dirty data on -+ * BJ_Metadata. -+ * -+ * Note that this only applies to the last partial page in the file. The -+ * bit which block_write_full_page() uses prepare/commit for. (That's -+ * broken code anyway: it's wrong for msync()). -+ * -+ * It's a rare case: affects the final partial page, for journalled data -+ * where the file is subject to bith write() and writepage() in the same -+ * transction. To fix it we'll need a custom block_write_full_page(). -+ * We'll probably need that anyway for journalling writepage() output. -+ * -+ * We don't honour synchronous mounts for writepage(). That would be -+ * disastrous. Any write() or metadata operation will sync the fs for -+ * us. -+ * -+ * AKPM2: if all the page's buffers are mapped to disk and !data=journal, -+ * we don't need to open a transaction here. -+ */ -+static int ext3cow_ordered_writepage(struct page *page, -+ struct writeback_control *wbc) -+{ -+ struct inode *inode = page->mapping->host; -+ struct buffer_head *page_bufs; -+ handle_t *handle = NULL; -+ int ret = 0; -+ int err; -+ -+ J_ASSERT(PageLocked(page)); -+ -+ /* -+ * We give up here if we're reentered, because it might be for a -+ * different filesystem. -+ */ -+ if (ext3cow_journal_current_handle()) -+ goto out_fail; -+ -+ handle = ext3cow_journal_start(inode, ext3cow_writepage_trans_blocks(inode)); -+ -+ if (IS_ERR(handle)) { -+ ret = PTR_ERR(handle); -+ goto out_fail; -+ } -+ -+ if (!page_has_buffers(page)) { -+ create_empty_buffers(page, inode->i_sb->s_blocksize, -+ (1 << BH_Dirty)|(1 << BH_Uptodate)); -+ } -+ page_bufs = page_buffers(page); -+ walk_page_buffers(handle, page_bufs, 0, -+ PAGE_CACHE_SIZE, NULL, bget_one); -+ -+ ret = block_write_full_page(page, ext3cow_get_block, wbc); -+ -+ /* -+ * The page can become unlocked at any point now, and -+ * truncate can then come in and change things. So we -+ * can't touch *page from now on. But *page_bufs is -+ * safe due to elevated refcount. -+ */ -+ -+ /* -+ * And attach them to the current transaction. But only if -+ * block_write_full_page() succeeded. Otherwise they are unmapped, -+ * and generally junk. -+ */ -+ if (ret == 0) { -+ err = walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE, -+ NULL, journal_dirty_data_fn); -+ if (!ret) -+ ret = err; -+ } -+ walk_page_buffers(handle, page_bufs, 0, -+ PAGE_CACHE_SIZE, NULL, bput_one); -+ err = ext3cow_journal_stop(handle); -+ if (!ret) -+ ret = err; -+ return ret; -+ -+out_fail: -+ redirty_page_for_writepage(wbc, page); -+ unlock_page(page); -+ return ret; -+} -+ -+static int ext3cow_writeback_writepage(struct page *page, -+ struct writeback_control *wbc) -+{ -+ struct inode *inode = page->mapping->host; -+ handle_t *handle = NULL; -+ int ret = 0; -+ int err; -+ -+ if (ext3cow_journal_current_handle()) -+ goto out_fail; -+ -+ handle = ext3cow_journal_start(inode, ext3cow_writepage_trans_blocks(inode)); -+ if (IS_ERR(handle)) { -+ ret = PTR_ERR(handle); -+ goto out_fail; -+ } -+ -+ if (test_opt(inode->i_sb, NOBH) && ext3cow_should_writeback_data(inode)) -+ ret = nobh_writepage(page, ext3cow_get_block, wbc); -+ else -+ ret = block_write_full_page(page, ext3cow_get_block, wbc); -+ -+ err = ext3cow_journal_stop(handle); -+ if (!ret) -+ ret = err; -+ return ret; -+ -+out_fail: -+ redirty_page_for_writepage(wbc, page); -+ unlock_page(page); -+ return ret; -+} -+ -+static int ext3cow_journalled_writepage(struct page *page, -+ struct writeback_control *wbc) -+{ -+ struct inode *inode = page->mapping->host; -+ handle_t *handle = NULL; -+ int ret = 0; -+ int err; -+ -+ if (ext3cow_journal_current_handle()) -+ goto no_write; -+ -+ handle = ext3cow_journal_start(inode, ext3cow_writepage_trans_blocks(inode)); -+ if (IS_ERR(handle)) { -+ ret = PTR_ERR(handle); -+ goto no_write; -+ } -+ -+ if (!page_has_buffers(page) || PageChecked(page)) { -+ /* -+ * It's mmapped pagecache. Add buffers and journal it. There -+ * doesn't seem much point in redirtying the page here. -+ */ -+ ClearPageChecked(page); -+ ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, -+ ext3cow_get_block); -+ if (ret != 0) { -+ ext3cow_journal_stop(handle); -+ goto out_unlock; -+ } -+ ret = walk_page_buffers(handle, page_buffers(page), 0, -+ PAGE_CACHE_SIZE, NULL, do_journal_get_write_access); -+ -+ err = walk_page_buffers(handle, page_buffers(page), 0, -+ PAGE_CACHE_SIZE, NULL, commit_write_fn); -+ if (ret == 0) -+ ret = err; -+ EXT3COW_I(inode)->i_state |= EXT3COW_STATE_JDATA; -+ unlock_page(page); -+ } else { -+ /* -+ * It may be a page full of checkpoint-mode buffers. We don't -+ * really know unless we go poke around in the buffer_heads. -+ * But block_write_full_page will do the right thing. -+ */ -+ ret = block_write_full_page(page, ext3cow_get_block, wbc); -+ } -+ err = ext3cow_journal_stop(handle); -+ if (!ret) -+ ret = err; -+out: -+ return ret; -+ -+no_write: -+ redirty_page_for_writepage(wbc, page); -+out_unlock: -+ unlock_page(page); -+ goto out; -+} -+ -+static int ext3cow_readpage(struct file *file, struct page *page) -+{ -+ return mpage_readpage(page, ext3cow_get_block); -+} -+ -+static int -+ext3cow_readpages(struct file *file, struct address_space *mapping, -+ struct list_head *pages, unsigned nr_pages) -+{ -+ return mpage_readpages(mapping, pages, nr_pages, ext3cow_get_block); -+} -+ -+static void ext3cow_invalidatepage(struct page *page, unsigned long offset) -+{ -+ journal_t *journal = EXT3COW_JOURNAL(page->mapping->host); -+ -+ /* -+ * If it's a full truncate we just forget about the pending dirtying -+ */ -+ if (offset == 0) -+ ClearPageChecked(page); -+ -+ journal_invalidatepage(journal, page, offset); -+} -+ -+static int ext3cow_releasepage(struct page *page, gfp_t wait) -+{ -+ journal_t *journal = EXT3COW_JOURNAL(page->mapping->host); -+ -+ WARN_ON(PageChecked(page)); -+ if (!page_has_buffers(page)) -+ return 0; -+ return journal_try_to_free_buffers(journal, page, wait); -+} -+ -+/* -+ * If the O_DIRECT write will extend the file then add this inode to the -+ * orphan list. So recovery will truncate it back to the original size -+ * if the machine crashes during the write. -+ * -+ * If the O_DIRECT write is intantiating holes inside i_size and the machine -+ * crashes then stale disk data _may_ be exposed inside the file. -+ */ -+static ssize_t ext3cow_direct_IO(int rw, struct kiocb *iocb, -+ const struct iovec *iov, loff_t offset, -+ unsigned long nr_segs) -+{ -+ struct file *file = iocb->ki_filp; -+ struct inode *inode = file->f_mapping->host; -+ struct ext3cow_inode_info *ei = EXT3COW_I(inode); -+ handle_t *handle = NULL; -+ ssize_t ret; -+ int orphan = 0; -+ size_t count = iov_length(iov, nr_segs); -+ -+ if (rw == WRITE) { -+ loff_t final_size = offset + count; -+ -+ handle = ext3cow_journal_start(inode, DIO_CREDITS); -+ if (IS_ERR(handle)) { -+ ret = PTR_ERR(handle); -+ goto out; -+ } -+ if (final_size > inode->i_size) { -+ ret = ext3cow_orphan_add(handle, inode); -+ if (ret) -+ goto out_stop; -+ orphan = 1; -+ ei->i_disksize = inode->i_size; -+ } -+ } -+ -+ ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, -+ offset, nr_segs, -+ ext3cow_get_block, NULL); -+ -+ /* -+ * Reacquire the handle: ext3cow_get_block() can restart the transaction -+ */ -+ handle = journal_current_handle(); -+ -+out_stop: -+ if (handle) { -+ int err; -+ -+ if (orphan && inode->i_nlink) -+ ext3cow_orphan_del(handle, inode); -+ if (orphan && ret > 0) { -+ loff_t end = offset + ret; -+ if (end > inode->i_size) { -+ ei->i_disksize = end; -+ i_size_write(inode, end); -+ /* -+ * We're going to return a positive `ret' -+ * here due to non-zero-length I/O, so there's -+ * no way of reporting error returns from -+ * ext3cow_mark_inode_dirty() to userspace. So -+ * ignore it. -+ */ -+ ext3cow_mark_inode_dirty(handle, inode); -+ } -+ } -+ err = ext3cow_journal_stop(handle); -+ if (ret == 0) -+ ret = err; -+ } -+out: -+ return ret; -+} -+ -+/* -+ * Pages can be marked dirty completely asynchronously from ext3cow's journalling -+ * activity. By filemap_sync_pte(), try_to_unmap_one(), etc. We cannot do -+ * much here because ->set_page_dirty is called under VFS locks. The page is -+ * not necessarily locked. -+ * -+ * We cannot just dirty the page and leave attached buffers clean, because the -+ * buffers' dirty state is "definitive". We cannot just set the buffers dirty -+ * or jbddirty because all the journalling code will explode. -+ * -+ * So what we do is to mark the page "pending dirty" and next time writepage -+ * is called, propagate that into the buffers appropriately. -+ */ -+static int ext3cow_journalled_set_page_dirty(struct page *page) -+{ -+ SetPageChecked(page); -+ return __set_page_dirty_nobuffers(page); -+} -+ -+static const struct address_space_operations ext3cow_ordered_aops = { -+ .readpage = ext3cow_readpage, -+ .readpages = ext3cow_readpages, -+ .writepage = ext3cow_ordered_writepage, -+ .sync_page = block_sync_page, -+ .prepare_write = ext3cow_prepare_write, -+ .commit_write = ext3cow_ordered_commit_write, -+ .bmap = ext3cow_bmap, -+ .invalidatepage = ext3cow_invalidatepage, -+ .releasepage = ext3cow_releasepage, -+ .direct_IO = ext3cow_direct_IO, -+ .migratepage = buffer_migrate_page, -+}; -+ -+static const struct address_space_operations ext3cow_writeback_aops = { -+ .readpage = ext3cow_readpage, -+ .readpages = ext3cow_readpages, -+ .writepage = ext3cow_writeback_writepage, -+ .sync_page = block_sync_page, -+ .prepare_write = ext3cow_prepare_write, -+ .commit_write = ext3cow_writeback_commit_write, -+ .bmap = ext3cow_bmap, -+ .invalidatepage = ext3cow_invalidatepage, -+ .releasepage = ext3cow_releasepage, -+ .direct_IO = ext3cow_direct_IO, -+ .migratepage = buffer_migrate_page, -+}; -+ -+static const struct address_space_operations ext3cow_journalled_aops = { -+ .readpage = ext3cow_readpage, -+ .readpages = ext3cow_readpages, -+ .writepage = ext3cow_journalled_writepage, -+ .sync_page = block_sync_page, -+ .prepare_write = ext3cow_prepare_write, -+ .commit_write = ext3cow_journalled_commit_write, -+ .set_page_dirty = ext3cow_journalled_set_page_dirty, -+ .bmap = ext3cow_bmap, -+ .invalidatepage = ext3cow_invalidatepage, -+ .releasepage = ext3cow_releasepage, -+}; -+ -+void ext3cow_set_aops(struct inode *inode) -+{ -+ if (ext3cow_should_order_data(inode)) -+ inode->i_mapping->a_ops = &ext3cow_ordered_aops; -+ else if (ext3cow_should_writeback_data(inode)) -+ inode->i_mapping->a_ops = &ext3cow_writeback_aops; -+ else -+ inode->i_mapping->a_ops = &ext3cow_journalled_aops; -+} -+ -+/* -+ * ext3cow_block_truncate_page() zeroes out a mapping from file offset `from' -+ * up to the end of the block which corresponds to `from'. -+ * This required during truncate. We need to physically zero the tail end -+ * of that block so it doesn't yield old data if the file is later grown. -+ */ -+static int ext3cow_block_truncate_page(handle_t *handle, struct page *page, -+ struct address_space *mapping, loff_t from) -+{ -+ ext3cow_fsblk_t index = from >> PAGE_CACHE_SHIFT; -+ unsigned offset = from & (PAGE_CACHE_SIZE-1); -+ unsigned blocksize, iblock, length, pos; -+ struct inode *inode = mapping->host; -+ struct buffer_head *bh; -+ int err = 0; -+ void *kaddr; -+ -+ blocksize = inode->i_sb->s_blocksize; -+ length = blocksize - (offset & (blocksize - 1)); -+ iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); -+ -+ /* -+ * For "nobh" option, we can only work if we don't need to -+ * read-in the page - otherwise we create buffers to do the IO. -+ */ -+ if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) && -+ ext3cow_should_writeback_data(inode) && PageUptodate(page)) { -+ kaddr = kmap_atomic(page, KM_USER0); -+ memset(kaddr + offset, 0, length); -+ flush_dcache_page(page); -+ kunmap_atomic(kaddr, KM_USER0); -+ set_page_dirty(page); -+ goto unlock; -+ } -+ -+ if (!page_has_buffers(page)) -+ create_empty_buffers(page, blocksize, 0); -+ -+ /* Find the buffer that contains "offset" */ -+ bh = page_buffers(page); -+ pos = blocksize; -+ while (offset >= pos) { -+ bh = bh->b_this_page; -+ iblock++; -+ pos += blocksize; -+ } -+ -+ err = 0; -+ if (buffer_freed(bh)) { -+ BUFFER_TRACE(bh, "freed: skip"); -+ goto unlock; -+ } -+ -+ if (!buffer_mapped(bh)) { -+ BUFFER_TRACE(bh, "unmapped"); -+ ext3cow_get_block(inode, iblock, bh, 0); -+ /* unmapped? It's a hole - nothing to do */ -+ if (!buffer_mapped(bh)) { -+ BUFFER_TRACE(bh, "still unmapped"); -+ goto unlock; -+ } -+ } -+ -+ /* Ok, it's mapped. Make sure it's up-to-date */ -+ if (PageUptodate(page)) -+ set_buffer_uptodate(bh); -+ -+ if (!buffer_uptodate(bh)) { -+ err = -EIO; -+ ll_rw_block(READ, 1, &bh); -+ wait_on_buffer(bh); -+ /* Uhhuh. Read error. Complain and punt. */ -+ if (!buffer_uptodate(bh)) -+ goto unlock; -+ } -+ -+ if (ext3cow_should_journal_data(inode)) { -+ BUFFER_TRACE(bh, "get write access"); -+ err = ext3cow_journal_get_write_access(handle, bh); -+ if (err) -+ goto unlock; -+ } -+ -+ kaddr = kmap_atomic(page, KM_USER0); -+ memset(kaddr + offset, 0, length); -+ flush_dcache_page(page); -+ kunmap_atomic(kaddr, KM_USER0); -+ -+ BUFFER_TRACE(bh, "zeroed end of block"); -+ -+ err = 0; -+ if (ext3cow_should_journal_data(inode)) { -+ err = ext3cow_journal_dirty_metadata(handle, bh); -+ } else { -+ if (ext3cow_should_order_data(inode)) -+ err = ext3cow_journal_dirty_data(handle, bh); -+ mark_buffer_dirty(bh); -+ } -+ -+unlock: -+ unlock_page(page); -+ page_cache_release(page); -+ return err; -+} -+ -+/* -+ * Probably it should be a library function... search for first non-zero word -+ * or memcmp with zero_page, whatever is better for particular architecture. -+ * Linus? -+ */ -+static inline int all_zeroes(__le32 *p, __le32 *q) -+{ -+ while (p < q) -+ if (*p++) -+ return 0; -+ return 1; -+} -+ -+/** -+ * ext3cow_find_shared - find the indirect blocks for partial truncation. -+ * @inode: inode in question -+ * @depth: depth of the affected branch -+ * @offsets: offsets of pointers in that branch (see ext3cow_block_to_path) -+ * @chain: place to store the pointers to partial indirect blocks -+ * @top: place to the (detached) top of branch -+ * -+ * This is a helper function used by ext3cow_truncate(). -+ * -+ * When we do truncate() we may have to clean the ends of several -+ * indirect blocks but leave the blocks themselves alive. Block is -+ * partially truncated if some data below the new i_size is refered -+ * from it (and it is on the path to the first completely truncated -+ * data block, indeed). We have to free the top of that path along -+ * with everything to the right of the path. Since no allocation -+ * past the truncation point is possible until ext3cow_truncate() -+ * finishes, we may safely do the latter, but top of branch may -+ * require special attention - pageout below the truncation point -+ * might try to populate it. -+ * -+ * We atomically detach the top of branch from the tree, store the -+ * block number of its root in *@top, pointers to buffer_heads of -+ * partially truncated blocks - in @chain[].bh and pointers to -+ * their last elements that should not be removed - in -+ * @chain[].p. Return value is the pointer to last filled element -+ * of @chain. -+ * -+ * The work left to caller to do the actual freeing of subtrees: -+ * a) free the subtree starting from *@top -+ * b) free the subtrees whose roots are stored in -+ * (@chain[i].p+1 .. end of @chain[i].bh->b_data) -+ * c) free the subtrees growing from the inode past the @chain[0]. -+ * (no partially truncated stuff there). */ -+ -+static Indirect *ext3cow_find_shared(struct inode *inode, int depth, -+ int offsets[4], Indirect chain[4], __le32 *top) -+{ -+ Indirect *partial, *p; -+ int k, err, cow; -+ -+ *top = 0; -+ /* Make k index the deepest non-null offest + 1 */ -+ for (k = depth; k > 1 && !offsets[k-1]; k--) -+ ; -+ partial = ext3cow_get_branch(inode, k, offsets, chain, &err, &cow, 0); -+ /* Writer: pointers */ -+ if (!partial) -+ partial = chain + k-1; -+ /* -+ * If the branch acquired continuation since we've looked at it - -+ * fine, it should all survive and (new) top doesn't belong to us. -+ */ -+ if (!partial->key && *partial->p) -+ /* Writer: end */ -+ goto no_top; -+ for (p=partial; p>chain && all_zeroes((__le32*)p->bh->b_data,p->p); p--) -+ ; -+ /* -+ * OK, we've found the last block that must survive. The rest of our -+ * branch should be detached before unlocking. However, if that rest -+ * of branch is all ours and does not grow immediately from the inode -+ * it's easier to cheat and just decrement partial->p. -+ */ -+ if (p == chain + k - 1 && p > chain) { -+ p->p--; -+ } else { -+ *top = *p->p; -+ /* Nope, don't do this in ext3cow. Must leave the tree intact */ -+#if 0 -+ *p->p = 0; -+#endif -+ } -+ /* Writer: end */ -+ -+ while(partial > p) { -+ brelse(partial->bh); -+ partial--; -+ } -+no_top: -+ return partial; -+} -+ -+/* -+ * Zero a number of block pointers in either an inode or an indirect block. -+ * If we restart the transaction we must again get write access to the -+ * indirect block for further modification. -+ * -+ * We release `count' blocks on disk, but (last - first) may be greater -+ * than `count' because there can be holes in there. -+ */ -+static void ext3cow_clear_blocks(handle_t *handle, struct inode *inode, -+ struct buffer_head *bh, ext3cow_fsblk_t block_to_free, -+ unsigned long count, __le32 *first, __le32 *last) -+{ -+ __le32 *p; -+ if (try_to_extend_transaction(handle, inode)) { -+ if (bh) { -+ BUFFER_TRACE(bh, "call ext3cow_journal_dirty_metadata"); -+ ext3cow_journal_dirty_metadata(handle, bh); -+ } -+ ext3cow_mark_inode_dirty(handle, inode); -+ ext3cow_journal_test_restart(handle, inode); -+ if (bh) { -+ BUFFER_TRACE(bh, "retaking write access"); -+ ext3cow_journal_get_write_access(handle, bh); -+ } -+ } -+ -+ /* -+ * Any buffers which are on the journal will be in memory. We find -+ * them on the hash table so journal_revoke() will run journal_forget() -+ * on them. We've already detached each block from the file, so -+ * bforget() in journal_forget() should be safe. -+ * -+ * AKPM: turn on bforget in journal_forget()!!! -+ */ -+ for (p = first; p < last; p++) { -+ u32 nr = le32_to_cpu(*p); -+ if (nr) { -+ struct buffer_head *bh; -+ -+ *p = 0; -+ bh = sb_find_get_block(inode->i_sb, nr); -+ ext3cow_forget(handle, 0, inode, bh, nr); -+ } -+ } -+ -+ ext3cow_free_blocks(handle, inode, block_to_free, count); -+} -+ -+/** -+ * ext3cow_free_data - free a list of data blocks -+ * @handle: handle for this transaction -+ * @inode: inode we are dealing with -+ * @this_bh: indirect buffer_head which contains *@first and *@last -+ * @first: array of block numbers -+ * @last: points immediately past the end of array -+ * -+ * We are freeing all blocks refered from that array (numbers are stored as -+ * little-endian 32-bit) and updating @inode->i_blocks appropriately. -+ * -+ * We accumulate contiguous runs of blocks to free. Conveniently, if these -+ * blocks are contiguous then releasing them at one time will only affect one -+ * or two bitmap blocks (+ group descriptor(s) and superblock) and we won't -+ * actually use a lot of journal space. -+ * -+ * @this_bh will be %NULL if @first and @last point into the inode's direct -+ * block pointers. -+ */ -+static void ext3cow_free_data(handle_t *handle, struct inode *inode, -+ struct buffer_head *this_bh, -+ __le32 *first, __le32 *last) -+{ -+ ext3cow_fsblk_t block_to_free = 0; /* Starting block # of a run */ -+ unsigned long count = 0; /* Number of blocks in the run */ -+ __le32 *block_to_free_p = NULL; /* Pointer into inode/ind -+ corresponding to -+ block_to_free */ -+ ext3cow_fsblk_t nr; /* Current block # */ -+ __le32 *p; /* Pointer into inode/ind -+ for current block */ -+ int err; -+ -+ if (this_bh) { /* For indirect block */ -+ BUFFER_TRACE(this_bh, "get_write_access"); -+ err = ext3cow_journal_get_write_access(handle, this_bh); -+ /* Important: if we can't update the indirect pointers -+ * to the blocks, we can't free them. */ -+ if (err) -+ return; -+ } -+ -+ for (p = first; p < last; p++) { -+ nr = le32_to_cpu(*p); -+ if (nr) { -+ /* accumulate blocks to free if they're contiguous */ -+ if (count == 0) { -+ block_to_free = nr; -+ block_to_free_p = p; -+ count = 1; -+ } else if (nr == block_to_free + count) { -+ count++; -+ } else { -+ ext3cow_clear_blocks(handle, inode, this_bh, -+ block_to_free, -+ count, block_to_free_p, p); -+ block_to_free = nr; -+ block_to_free_p = p; -+ count = 1; -+ } -+ } -+ } -+ -+ if (count > 0) -+ ext3cow_clear_blocks(handle, inode, this_bh, block_to_free, -+ count, block_to_free_p, p); -+ -+ if (this_bh) { -+ BUFFER_TRACE(this_bh, "call ext3cow_journal_dirty_metadata"); -+ ext3cow_journal_dirty_metadata(handle, this_bh); -+ } -+} -+ -+/** -+ * ext3cow_free_branches - free an array of branches -+ * @handle: JBD handle for this transaction -+ * @inode: inode we are dealing with -+ * @parent_bh: the buffer_head which contains *@first and *@last -+ * @first: array of block numbers -+ * @last: pointer immediately past the end of array -+ * @depth: depth of the branches to free -+ * -+ * We are freeing all blocks refered from these branches (numbers are -+ * stored as little-endian 32-bit) and updating @inode->i_blocks -+ * appropriately. -+ */ -+static void ext3cow_free_branches(handle_t *handle, struct inode *inode, -+ struct buffer_head *parent_bh, -+ __le32 *first, __le32 *last, int depth) -+{ -+ ext3cow_fsblk_t nr; -+ __le32 *p; -+ -+ if (is_handle_aborted(handle)) -+ return; -+ -+ if (depth--) { -+ struct buffer_head *bh; -+ int addr_per_block = EXT3COW_ADDR_PER_BLOCK(inode->i_sb); -+ u32 *bitmap_word = NULL, *first_block = NULL; -+ unsigned int count = 0, cur = 0, bcount = 0; -+ int i = 0; -+ p = last; -+ while (--p >= first) { -+ nr = le32_to_cpu(*p); -+ if (!nr) -+ continue; /* A hole */ -+ -+ /* Go read the buffer for the next level down */ -+ bh = sb_bread(inode->i_sb, nr); -+ -+ /* -+ * A read failure? Report error and clear slot -+ * (should be rare). -+ */ -+ if (!bh) { -+ ext3cow_error(inode->i_sb, "ext3cow_free_branches", -+ "Read failure, inode=%lu, block="E3FSBLK, -+ inode->i_ino, nr); -+ continue; -+ } -+ /* Only free the branches that have been newly allocated - znjp */ -+ cur = 0; -+ count = 0; -+ bitmap_word = (u32*)bh->b_data + addr_per_block; -+ -+ for(bcount = 0; bcount < EXT3COW_COWBITMAPS_PER_IBLOCK(inode->i_sb); -+ bcount++){ -+ for(i = 0; i < EXT3COW_COWBITMAP_SIZE; i++, cur++){ -+ if(cur >= addr_per_block) -+ goto free; -+ if(le32_to_cpu(*bitmap_word) & (1UL << i)){ -+ if(count == 0){ -+ first_block = (u32*)bh->b_data + cur; -+ count = 1; -+ }else if((u32*)first_block + count == (u32*)bh->b_data + cur){ -+ count++; -+ }else{ -+ BUFFER_TRACE(bh, "free child branches"); -+ ext3cow_free_branches(handle, inode, bh, (u32*)first_block, -+ (u32*)first_block + count, depth); -+ first_block = (u32*)bh->b_data + cur; -+ count = 1; -+ } -+ } -+ } -+ (u32*)bitmap_word++; -+ } -+ free: -+ if(count){ -+ BUFFER_TRACE(bh, "free child branches"); -+ ext3cow_free_branches(handle, inode, bh, (u32*)first_block, -+ (u32*)first_block + count, depth); -+ } -+ -+ /* -+ * We've probably journalled the indirect block several -+ * times during the truncate. But it's no longer -+ * needed and we now drop it from the transaction via -+ * journal_revoke(). -+ * -+ * That's easy if it's exclusively part of this -+ * transaction. But if it's part of the committing -+ * transaction then journal_forget() will simply -+ * brelse() it. That means that if the underlying -+ * block is reallocated in ext3cow_get_block(), -+ * unmap_underlying_metadata() will find this block -+ * and will try to get rid of it. damn, damn. -+ * -+ * If this block has already been committed to the -+ * journal, a revoke record will be written. And -+ * revoke records must be emitted *before* clearing -+ * this block's bit in the bitmaps. -+ */ -+ ext3cow_forget(handle, 1, inode, bh, bh->b_blocknr); -+ -+ /* -+ * Everything below this this pointer has been -+ * released. Now let this top-of-subtree go. -+ * -+ * We want the freeing of this indirect block to be -+ * atomic in the journal with the updating of the -+ * bitmap block which owns it. So make some room in -+ * the journal. -+ * -+ * We zero the parent pointer *after* freeing its -+ * pointee in the bitmaps, so if extend_transaction() -+ * for some reason fails to put the bitmap changes and -+ * the release into the same transaction, recovery -+ * will merely complain about releasing a free block, -+ * rather than leaking blocks. -+ */ -+ if (is_handle_aborted(handle)) -+ return; -+ if (try_to_extend_transaction(handle, inode)) { -+ ext3cow_mark_inode_dirty(handle, inode); -+ ext3cow_journal_test_restart(handle, inode); -+ } -+ -+ ext3cow_free_blocks(handle, inode, nr, 1); -+ -+ if (parent_bh) { -+ /* -+ * The block which we have just freed is -+ * pointed to by an indirect block: journal it -+ */ -+ BUFFER_TRACE(parent_bh, "get_write_access"); -+ if (!ext3cow_journal_get_write_access(handle, -+ parent_bh)){ -+ *p = 0; -+ BUFFER_TRACE(parent_bh, -+ "call ext3cow_journal_dirty_metadata"); -+ ext3cow_journal_dirty_metadata(handle, -+ parent_bh); -+ } -+ } -+ } -+ } else { -+ /* We have reached the bottom of the tree. */ -+ BUFFER_TRACE(parent_bh, "free data blocks"); -+ ext3cow_free_data(handle, inode, parent_bh, first, last); -+ } -+} -+ -+/* -+ * ext3cow_truncate() -+ * -+ * We block out ext3cow_get_block() block instantiations across the entire -+ * transaction, and VFS/VM ensures that ext3cow_truncate() cannot run -+ * simultaneously on behalf of the same inode. -+ * -+ * As we work through the truncate and commmit bits of it to the journal there -+ * is one core, guiding principle: the file's tree must always be consistent on -+ * disk. We must be able to restart the truncate after a crash. -+ * -+ * The file's tree may be transiently inconsistent in memory (although it -+ * probably isn't), but whenever we close off and commit a journal transaction, -+ * the contents of (the filesystem + the journal) must be consistent and -+ * restartable. It's pretty simple, really: bottom up, right to left (although -+ * left-to-right works OK too). -+ * -+ * Note that at recovery time, journal replay occurs *before* the restart of -+ * truncate against the orphan inode list. -+ * -+ * The committed inode has the new, desired i_size (which is the same as -+ * i_disksize in this case). After a crash, ext3cow_orphan_cleanup() will see -+ * that this inode's truncate did not complete and it will again call -+ * ext3cow_truncate() to have another go. So there will be instantiated blocks -+ * to the right of the truncation point in a crashed ext3cow filesystem. But -+ * that's fine - as long as they are linked from the inode, the post-crash -+ * ext3cow_truncate() run will find them and release them. -+ */ -+void ext3cow_truncate(struct inode *inode) -+{ -+ handle_t *handle; -+ struct ext3cow_inode_info *ei = EXT3COW_I(inode); -+ __le32 *i_data = ei->i_data; -+ int addr_per_block = EXT3COW_ADDR_PER_BLOCK(inode->i_sb); -+ struct address_space *mapping = inode->i_mapping; -+ int offsets[4]; -+ Indirect chain[4]; -+ Indirect *partial; -+ __le32 nr = 0; -+ int n; -+ long last_block; -+ unsigned blocksize = inode->i_sb->s_blocksize; -+ struct page *page; -+ -+ -+ if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || -+ S_ISLNK(inode->i_mode))) -+ return; -+ if (ext3cow_inode_is_fast_symlink(inode)) -+ return; -+ if (IS_APPEND(inode) || IS_IMMUTABLE(inode) || -+ EXT3COW_IS_UNCHANGEABLE(inode)) /* znjp */ -+ return; -+ -+ /* If the inode needs to be dup'd, then there are no blocks -+ * to truncate; they all are part of the previous version. -+ * - znjp */ -+ if(EXT3COW_S_EPOCHNUMBER(inode->i_sb) > EXT3COW_I_EPOCHNUMBER(inode)){ -+ ext3cow_dup_inode(NULL, inode); -+ return; -+ } -+ -+ /* -+ * We have to lock the EOF page here, because lock_page() nests -+ * outside journal_start(). -+ */ -+ if ((inode->i_size & (blocksize - 1)) == 0) { -+ /* Block boundary? Nothing to do */ -+ page = NULL; -+ } else { -+ page = grab_cache_page(mapping, -+ inode->i_size >> PAGE_CACHE_SHIFT); -+ if (!page) -+ return; -+ } -+ -+ handle = start_transaction(inode); -+ if (IS_ERR(handle)) { -+ if (page) { -+ clear_highpage(page); -+ flush_dcache_page(page); -+ unlock_page(page); -+ page_cache_release(page); -+ } -+ return; /* AKPM: return what? */ -+ } -+ -+ last_block = (inode->i_size + blocksize-1) -+ >> EXT3COW_BLOCK_SIZE_BITS(inode->i_sb); -+ -+ if (page) -+ ext3cow_block_truncate_page(handle, page, mapping, inode->i_size); -+ -+ n = ext3cow_block_to_path(inode, last_block, offsets, NULL); -+ if (n == 0) -+ goto out_stop; /* error */ -+ -+ /* -+ * OK. This truncate is going to happen. We add the inode to the -+ * orphan list, so that if this truncate spans multiple transactions, -+ * and we crash, we will resume the truncate when the filesystem -+ * recovers. It also marks the inode dirty, to catch the new size. -+ * -+ * Implication: the file must always be in a sane, consistent -+ * truncatable state while each transaction commits. -+ */ -+ if (ext3cow_orphan_add(handle, inode)) -+ goto out_stop; -+ -+ /* -+ * The orphan list entry will now protect us from any crash which -+ * occurs before the truncate completes, so it is now safe to propagate -+ * the new, shorter inode size (held for now in i_size) into the -+ * on-disk inode. We do this via i_disksize, which is the value which -+ * ext3cow *really* writes onto the disk inode. -+ */ -+ ei->i_disksize = inode->i_size; -+ -+ /* -+ * From here we block out all ext3cow_get_block() callers who want to -+ * modify the block allocation tree. -+ */ -+ mutex_lock(&ei->truncate_mutex); -+ -+ if (n == 1) { /* direct blocks */ -+ unsigned int count = 0; -+ unsigned long block_to_free = 0; -+ unsigned long b = 0; -+ -+ /* We only want to remove blocks that were allocated in this -+ * epoch, i.e., have 1 bit in the bitmap. -znjp */ -+ for(b = offsets[0]; b < EXT3COW_NDIR_BLOCKS; b++){ -+ if(EXT3COW_I(inode)->i_cow_bitmap & (1UL << b)){ -+ if(count == 0){ -+ block_to_free = b; -+ count = 1; -+ }else if(b == block_to_free + count){ -+ count++; -+ }else{ -+ ext3cow_free_data(handle, inode, NULL, i_data + (int)block_to_free, -+ i_data + (int)(block_to_free + count)); -+ block_to_free = b; -+ count = 1; -+ } -+ } -+ } -+ if(count > 0) -+ ext3cow_free_data(handle, inode, NULL, i_data+(int)block_to_free, -+ i_data + (int)(block_to_free + count)); -+ goto do_indirects; -+ } -+ -+ partial = ext3cow_find_shared(inode, n, offsets, chain, &nr); -+ /* Kill the top of shared branch (not detached) */ -+ if (nr) { -+ if (partial == chain) { -+ /* Shared branch grows from the inode */ -+ ext3cow_free_branches(handle, inode, NULL, -+ &nr, &nr+1, (chain+n-1) - partial); -+ *partial->p = 0; -+ /* -+ * We mark the inode dirty prior to restart, -+ * and prior to stop. No need for it here. -+ */ -+ } else { -+ /* Shared branch grows from an indirect block */ -+ BUFFER_TRACE(partial->bh, "get_write_access"); -+ ext3cow_free_branches(handle, inode, partial->bh, -+ partial->p, -+ partial->p+1, (chain+n-1) - partial); -+ } -+ } -+ /* Clear the ends of indirect blocks on the shared branch */ -+ while (partial > chain) { -+ ext3cow_free_branches(handle, inode, partial->bh, partial->p + 1, -+ (__le32*)partial->bh->b_data+addr_per_block, -+ (chain+n-1) - partial); -+ BUFFER_TRACE(partial->bh, "call brelse"); -+ brelse (partial->bh); -+ partial--; -+ } -+do_indirects: -+ /* Kill the remaining (whole) subtrees */ -+ /* Unless we don't have to. If the indirect block has a 0 bit -+ * then all of the children do too, so we can skip the branch - znjp -+ */ -+ switch (offsets[0]) { -+ default: -+ if(EXT3COW_I(inode)->i_cow_bitmap & (1UL << EXT3COW_IND_BLOCK)){ -+ nr = i_data[EXT3COW_IND_BLOCK]; -+ if (nr) { -+ ext3cow_free_branches(handle, inode, NULL, &nr, &nr+1, 1); -+ i_data[EXT3COW_IND_BLOCK] = 0; -+ } -+ } -+ case EXT3COW_IND_BLOCK: -+ if(EXT3COW_I(inode)->i_cow_bitmap & (1UL << EXT3COW_DIND_BLOCK)){ -+ nr = i_data[EXT3COW_DIND_BLOCK]; -+ if (nr) { -+ ext3cow_free_branches(handle, inode, NULL, &nr, &nr+1, 2); -+ i_data[EXT3COW_DIND_BLOCK] = 0; -+ } -+ } -+ case EXT3COW_DIND_BLOCK: -+ if(EXT3COW_I(inode)->i_cow_bitmap & (1UL << EXT3COW_TIND_BLOCK)){ -+ nr = i_data[EXT3COW_TIND_BLOCK]; -+ if (nr) { -+ ext3cow_free_branches(handle, inode, NULL, &nr, &nr+1, 3); -+ i_data[EXT3COW_TIND_BLOCK] = 0; -+ } -+ } -+ case EXT3COW_TIND_BLOCK: -+ ; -+ } -+ -+ ext3cow_discard_reservation(inode); -+ -+ mutex_unlock(&ei->truncate_mutex); -+ inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; -+ ext3cow_mark_inode_dirty(handle, inode); -+ -+ /* -+ * In a multi-transaction truncate, we only make the final transaction -+ * synchronous -+ */ -+ if (IS_SYNC(inode)) -+ handle->h_sync = 1; -+out_stop: -+ /* -+ * If this was a simple ftruncate(), and the file will remain alive -+ * then we need to clear up the orphan record which we created above. -+ * However, if this was a real unlink then we were called by -+ * ext3cow_delete_inode(), and we allow that function to clean up the -+ * orphan info for us. -+ */ -+ if (inode->i_nlink) -+ ext3cow_orphan_del(handle, inode); -+ -+ ext3cow_journal_stop(handle); -+} -+ -+static ext3cow_fsblk_t ext3cow_get_inode_block(struct super_block *sb, -+ unsigned long ino, struct ext3cow_iloc *iloc) -+{ -+ unsigned long desc, group_desc, block_group; -+ unsigned long offset; -+ ext3cow_fsblk_t block; -+ struct buffer_head *bh; -+ struct ext3cow_group_desc * gdp; -+ -+ if (!ext3cow_valid_inum(sb, ino)) { -+ /* -+ * This error is already checked for in namei.c unless we are -+ * looking at an NFS filehandle, in which case no error -+ * report is needed -+ */ -+ return 0; -+ } -+ -+ block_group = (ino - 1) / EXT3COW_INODES_PER_GROUP(sb); -+ if (block_group >= EXT3COW_SB(sb)->s_groups_count) { -+ ext3cow_error(sb,"ext3cow_get_inode_block","group >= groups count"); -+ return 0; -+ } -+ smp_rmb(); -+ group_desc = block_group >> EXT3COW_DESC_PER_BLOCK_BITS(sb); -+ desc = block_group & (EXT3COW_DESC_PER_BLOCK(sb) - 1); -+ bh = EXT3COW_SB(sb)->s_group_desc[group_desc]; -+ if (!bh) { -+ ext3cow_error (sb, "ext3cow_get_inode_block", -+ "Descriptor not loaded"); -+ return 0; -+ } -+ -+ gdp = (struct ext3cow_group_desc *)bh->b_data; -+ /* -+ * Figure out the offset within the block group inode table -+ */ -+ offset = ((ino - 1) % EXT3COW_INODES_PER_GROUP(sb)) * -+ EXT3COW_INODE_SIZE(sb); -+ block = le32_to_cpu(gdp[desc].bg_inode_table) + -+ (offset >> EXT3COW_BLOCK_SIZE_BITS(sb)); -+ -+ iloc->block_group = block_group; -+ iloc->offset = offset & (EXT3COW_BLOCK_SIZE(sb) - 1); -+ return block; -+} -+ -+/* -+ * ext3cow_get_inode_loc returns with an extra refcount against the inode's -+ * underlying buffer_head on success. If 'in_mem' is true, we have all -+ * data in memory that is needed to recreate the on-disk version of this -+ * inode. -+ */ -+static int __ext3cow_get_inode_loc(struct inode *inode, -+ struct ext3cow_iloc *iloc, int in_mem) -+{ -+ ext3cow_fsblk_t block; -+ struct buffer_head *bh; -+ -+ block = ext3cow_get_inode_block(inode->i_sb, inode->i_ino, iloc); -+ if (!block) -+ return -EIO; -+ -+ bh = sb_getblk(inode->i_sb, block); -+ if (!bh) { -+ ext3cow_error (inode->i_sb, "ext3cow_get_inode_loc", -+ "unable to read inode block - " -+ "inode=%lu, block="E3FSBLK, -+ inode->i_ino, block); -+ return -EIO; -+ } -+ if (!buffer_uptodate(bh)) { -+ lock_buffer(bh); -+ if (buffer_uptodate(bh)) { -+ /* someone brought it uptodate while we waited */ -+ unlock_buffer(bh); -+ goto has_buffer; -+ } -+ -+ /* -+ * If we have all information of the inode in memory and this -+ * is the only valid inode in the block, we need not read the -+ * block. -+ */ -+ if (in_mem) { -+ struct buffer_head *bitmap_bh; -+ struct ext3cow_group_desc *desc; -+ int inodes_per_buffer; -+ int inode_offset, i; -+ int block_group; -+ int start; -+ -+ block_group = (inode->i_ino - 1) / -+ EXT3COW_INODES_PER_GROUP(inode->i_sb); -+ inodes_per_buffer = bh->b_size / -+ EXT3COW_INODE_SIZE(inode->i_sb); -+ inode_offset = ((inode->i_ino - 1) % -+ EXT3COW_INODES_PER_GROUP(inode->i_sb)); -+ start = inode_offset & ~(inodes_per_buffer - 1); -+ -+ /* Is the inode bitmap in cache? */ -+ desc = ext3cow_get_group_desc(inode->i_sb, -+ block_group, NULL); -+ if (!desc) -+ goto make_io; -+ -+ bitmap_bh = sb_getblk(inode->i_sb, -+ le32_to_cpu(desc->bg_inode_bitmap)); -+ if (!bitmap_bh) -+ goto make_io; -+ -+ /* -+ * If the inode bitmap isn't in cache then the -+ * optimisation may end up performing two reads instead -+ * of one, so skip it. -+ */ -+ if (!buffer_uptodate(bitmap_bh)) { -+ brelse(bitmap_bh); -+ goto make_io; -+ } -+ for (i = start; i < start + inodes_per_buffer; i++) { -+ if (i == inode_offset) -+ continue; -+ if (ext3cow_test_bit(i, bitmap_bh->b_data)) -+ break; -+ } -+ brelse(bitmap_bh); -+ if (i == start + inodes_per_buffer) { -+ /* all other inodes are free, so skip I/O */ -+ memset(bh->b_data, 0, bh->b_size); -+ set_buffer_uptodate(bh); -+ unlock_buffer(bh); -+ goto has_buffer; -+ } -+ } -+ -+make_io: -+ /* -+ * There are other valid inodes in the buffer, this inode -+ * has in-inode xattrs, or we don't have this inode in memory. -+ * Read the block from disk. -+ */ -+ get_bh(bh); -+ bh->b_end_io = end_buffer_read_sync; -+ submit_bh(READ_META, bh); -+ wait_on_buffer(bh); -+ if (!buffer_uptodate(bh)) { -+ ext3cow_error(inode->i_sb, "ext3cow_get_inode_loc", -+ "unable to read inode block - " -+ "inode=%lu, block="E3FSBLK, -+ inode->i_ino, block); -+ brelse(bh); -+ return -EIO; -+ } -+ } -+has_buffer: -+ iloc->bh = bh; -+ return 0; -+} -+ -+int ext3cow_get_inode_loc(struct inode *inode, struct ext3cow_iloc *iloc) -+{ -+ /* We have all inode data except xattrs in memory here. */ -+ return __ext3cow_get_inode_loc(inode, iloc, -+ !(EXT3COW_I(inode)->i_state & EXT3COW_STATE_XATTR)); -+} -+ -+void ext3cow_set_inode_flags(struct inode *inode) -+{ -+ unsigned int flags = EXT3COW_I(inode)->i_flags; -+ -+ inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); -+ if (flags & EXT3COW_SYNC_FL) -+ inode->i_flags |= S_SYNC; -+ if (flags & EXT3COW_APPEND_FL) -+ inode->i_flags |= S_APPEND; -+ if (flags & EXT3COW_IMMUTABLE_FL) -+ inode->i_flags |= S_IMMUTABLE; -+ if (flags & EXT3COW_NOATIME_FL) -+ inode->i_flags |= S_NOATIME; -+ if (flags & EXT3COW_DIRSYNC_FL) -+ inode->i_flags |= S_DIRSYNC; -+} -+ -+void ext3cow_read_inode(struct inode * inode) -+{ -+ struct ext3cow_iloc iloc; -+ struct ext3cow_inode *raw_inode; -+ struct ext3cow_inode_info *ei = EXT3COW_I(inode); -+ struct buffer_head *bh; -+ int block; -+ -+#ifdef CONFIG_EXT3COW_FS_POSIX_ACL -+ ei->i_acl = EXT3COW_ACL_NOT_CACHED; -+ ei->i_default_acl = EXT3COW_ACL_NOT_CACHED; -+#endif -+ ei->i_block_alloc_info = NULL; -+ -+ if (__ext3cow_get_inode_loc(inode, &iloc, 0)) -+ goto bad_inode; -+ bh = iloc.bh; -+ raw_inode = ext3cow_raw_inode(&iloc); -+ inode->i_mode = le16_to_cpu(raw_inode->i_mode); -+ inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); -+ inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); -+ if(!(test_opt (inode->i_sb, NO_UID32))) { -+ inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; -+ inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; -+ } -+ inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); -+ inode->i_size = le32_to_cpu(raw_inode->i_size); -+ inode->i_atime.tv_sec = le32_to_cpu(raw_inode->i_atime); -+ inode->i_ctime.tv_sec = le32_to_cpu(raw_inode->i_ctime); -+ inode->i_mtime.tv_sec = le32_to_cpu(raw_inode->i_mtime); -+ inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_mtime.tv_nsec = 0; -+ -+ ei->i_state = 0; -+ ei->i_dir_start_lookup = 0; -+ ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); -+ /* We now have enough fields to check if the inode was active or not. -+ * This is needed because nfsd might try to access dead inodes -+ * the test is that same one that e2fsck uses -+ * NeilBrown 1999oct15 -+ */ -+ if (inode->i_nlink == 0) { -+ if (inode->i_mode == 0 || -+ !(EXT3COW_SB(inode->i_sb)->s_mount_state & EXT3COW_ORPHAN_FS)) { -+ /* this inode is deleted */ -+ brelse (bh); -+ goto bad_inode; -+ } -+ /* The only unlinked inodes we let through here have -+ * valid i_mode and are being read by the orphan -+ * recovery code: that's fine, we're about to complete -+ * the process of deleting those. */ -+ } -+ inode->i_blocks = le32_to_cpu(raw_inode->i_blocks); -+ ei->i_flags = le32_to_cpu(raw_inode->i_flags); -+ /* For versioning -znjp */ -+ ei->i_cow_bitmap = le32_to_cpu(raw_inode->i_cowbitmap); -+ ei->i_epoch_number = le32_to_cpu(raw_inode->i_epch_number); -+ ei->i_next_inode = le32_to_cpu(raw_inode->i_nxt_inode); -+ -+#ifdef EXT3COW_FRAGMENTS -+ /* Taken out for versioning -znjp */ -+ //ei->i_faddr = le32_to_cpu(raw_inode->i_faddr); -+ //ei->i_frag_no = raw_inode->i_frag; -+ //ei->i_frag_size = raw_inode->i_fsize; -+#endif -+ ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl); -+ if (!S_ISREG(inode->i_mode)) { -+ ei->i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl); -+ } else { -+ inode->i_size |= -+ ((__u64)le32_to_cpu(raw_inode->i_size_high)) << 32; -+ } -+ ei->i_disksize = inode->i_size; -+ inode->i_generation = le32_to_cpu(raw_inode->i_generation); -+ ei->i_block_group = iloc.block_group; -+ /* -+ * NOTE! The in-memory inode i_data array is in little-endian order -+ * even on big-endian machines: we do NOT byteswap the block numbers! -+ */ -+ for (block = 0; block < EXT3COW_N_BLOCKS; block++) -+ ei->i_data[block] = raw_inode->i_block[block]; -+ INIT_LIST_HEAD(&ei->i_orphan); -+ -+ if (inode->i_ino >= EXT3COW_FIRST_INO(inode->i_sb) + 1 && -+ EXT3COW_INODE_SIZE(inode->i_sb) > EXT3COW_GOOD_OLD_INODE_SIZE) { -+ /* -+ * When mke2fs creates big inodes it does not zero out -+ * the unused bytes above EXT3COW_GOOD_OLD_INODE_SIZE, -+ * so ignore those first few inodes. -+ */ -+ ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); -+ if (EXT3COW_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > -+ EXT3COW_INODE_SIZE(inode->i_sb)) -+ goto bad_inode; -+ if (ei->i_extra_isize == 0) { -+ /* The extra space is currently unused. Use it. */ -+ ei->i_extra_isize = sizeof(struct ext3cow_inode) - -+ EXT3COW_GOOD_OLD_INODE_SIZE; -+ } else { -+ __le32 *magic = (void *)raw_inode + -+ EXT3COW_GOOD_OLD_INODE_SIZE + -+ ei->i_extra_isize; -+ if (*magic == cpu_to_le32(EXT3COW_XATTR_MAGIC)) -+ ei->i_state |= EXT3COW_STATE_XATTR; -+ } -+ } else -+ ei->i_extra_isize = 0; -+ -+ if (S_ISREG(inode->i_mode)) { -+ inode->i_op = &ext3cow_file_inode_operations; -+ inode->i_fop = &ext3cow_file_operations; -+ ext3cow_set_aops(inode); -+ } else if (S_ISDIR(inode->i_mode)) { -+ inode->i_op = &ext3cow_dir_inode_operations; -+ inode->i_fop = &ext3cow_dir_operations; -+ } else if (S_ISLNK(inode->i_mode)) { -+ if (ext3cow_inode_is_fast_symlink(inode)) -+ inode->i_op = &ext3cow_fast_symlink_inode_operations; -+ else { -+ inode->i_op = &ext3cow_symlink_inode_operations; -+ ext3cow_set_aops(inode); -+ } -+ } else { -+ inode->i_op = &ext3cow_special_inode_operations; -+ if (raw_inode->i_block[0]) -+ init_special_inode(inode, inode->i_mode, -+ old_decode_dev(le32_to_cpu(raw_inode->i_block[0]))); -+ else -+ init_special_inode(inode, inode->i_mode, -+ new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); -+ } -+ brelse (iloc.bh); -+ ext3cow_set_inode_flags(inode); -+ return; -+ -+bad_inode: -+ make_bad_inode(inode); -+ return; -+} -+ -+/* -+ * Post the struct inode info into an on-disk inode location in the -+ * buffer-cache. This gobbles the caller's reference to the -+ * buffer_head in the inode location struct. -+ * -+ * The caller must have write access to iloc->bh. -+ */ -+static int ext3cow_do_update_inode(handle_t *handle, -+ struct inode *inode, -+ struct ext3cow_iloc *iloc) -+{ -+ struct ext3cow_inode *raw_inode = ext3cow_raw_inode(iloc); -+ struct ext3cow_inode_info *ei = EXT3COW_I(inode); -+ struct buffer_head *bh = iloc->bh; -+ int err = 0, rc, block; -+ -+ /* For fields not not tracking in the in-memory inode, -+ * initialise them to zero for new inodes. */ -+ if (ei->i_state & EXT3COW_STATE_NEW) -+ memset(raw_inode, 0, EXT3COW_SB(inode->i_sb)->s_inode_size); -+ -+ raw_inode->i_mode = cpu_to_le16(inode->i_mode); -+ -+ -+ if(!(test_opt(inode->i_sb, NO_UID32))) { -+ raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid)); -+ raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid)); -+ -+ /* Fix up interoperability with old kernels. Otherwise, old inodes get -+ * re-used with the upper 16 bits of the uid/gid intact -+ */ -+ -+ if(!ei->i_dtime) { -+ raw_inode->i_uid_high = -+ cpu_to_le16(high_16_bits(inode->i_uid)); -+ raw_inode->i_gid_high = -+ cpu_to_le16(high_16_bits(inode->i_gid)); -+ } else { -+ raw_inode->i_uid_high = 0; -+ raw_inode->i_gid_high = 0; -+ } -+ -+ } else { -+ raw_inode->i_uid_low = -+ cpu_to_le16(fs_high2lowuid(inode->i_uid)); -+ raw_inode->i_gid_low = -+ cpu_to_le16(fs_high2lowgid(inode->i_gid)); -+ raw_inode->i_uid_high = 0; -+ raw_inode->i_gid_high = 0; -+ } -+ -+ raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); -+ raw_inode->i_size = cpu_to_le32(ei->i_disksize); -+ raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec); -+ raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec); -+ raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec); -+ raw_inode->i_blocks = cpu_to_le32(inode->i_blocks); -+ raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); -+ raw_inode->i_flags = cpu_to_le32(ei->i_flags); -+ /* For versioning -znjp */ -+ raw_inode->i_cowbitmap = cpu_to_le16(EXT3COW_I(inode)->i_cow_bitmap); -+ raw_inode->i_epch_number = cpu_to_le32(EXT3COW_I(inode)->i_epoch_number); -+ raw_inode->i_nxt_inode = cpu_to_le32(EXT3COW_I(inode)->i_next_inode); -+ -+#ifdef EXT3COW_FRAGMENTS -+ /* Taken out for versioning -znjp */ -+ //raw_inode->i_faddr = cpu_to_le32(ei->i_faddr); -+ //raw_inode->i_frag = ei->i_frag_no; -+ //raw_inode->i_fsize = ei->i_frag_size; -+#endif -+ raw_inode->i_file_acl = cpu_to_le32(ei->i_file_acl); -+ if (!S_ISREG(inode->i_mode)) { -+ raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl); -+ } else { -+ raw_inode->i_size_high = -+ cpu_to_le32(ei->i_disksize >> 32); -+ if (ei->i_disksize > 0x7fffffffULL) { -+ struct super_block *sb = inode->i_sb; -+ if (!EXT3COW_HAS_RO_COMPAT_FEATURE(sb, -+ EXT3COW_FEATURE_RO_COMPAT_LARGE_FILE) || -+ EXT3COW_SB(sb)->s_es->s_rev_level == -+ cpu_to_le32(EXT3COW_GOOD_OLD_REV)) { -+ /* If this is the first large file -+ * created, add a flag to the superblock. -+ */ -+ err = ext3cow_journal_get_write_access(handle, -+ EXT3COW_SB(sb)->s_sbh); -+ if (err) -+ goto out_brelse; -+ ext3cow_update_dynamic_rev(sb); -+ EXT3COW_SET_RO_COMPAT_FEATURE(sb, -+ EXT3COW_FEATURE_RO_COMPAT_LARGE_FILE); -+ sb->s_dirt = 1; -+ handle->h_sync = 1; -+ err = ext3cow_journal_dirty_metadata(handle, -+ EXT3COW_SB(sb)->s_sbh); -+ } -+ } -+ } -+ raw_inode->i_generation = cpu_to_le32(inode->i_generation); -+ if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { -+ if (old_valid_dev(inode->i_rdev)) { -+ raw_inode->i_block[0] = -+ cpu_to_le32(old_encode_dev(inode->i_rdev)); -+ raw_inode->i_block[1] = 0; -+ } else { -+ raw_inode->i_block[0] = 0; -+ raw_inode->i_block[1] = -+ cpu_to_le32(new_encode_dev(inode->i_rdev)); -+ raw_inode->i_block[2] = 0; -+ } -+ } else for (block = 0; block < EXT3COW_N_BLOCKS; block++) -+ raw_inode->i_block[block] = ei->i_data[block]; -+ -+ if (ei->i_extra_isize) -+ raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); -+ -+ BUFFER_TRACE(bh, "call ext3cow_journal_dirty_metadata"); -+ rc = ext3cow_journal_dirty_metadata(handle, bh); -+ if (!err) -+ err = rc; -+ ei->i_state &= ~EXT3COW_STATE_NEW; -+ -+out_brelse: -+ brelse (bh); -+ ext3cow_std_error(inode->i_sb, err); -+ return err; -+} -+ -+/* -+ * ext3cow_write_inode() -+ * -+ * We are called from a few places: -+ * -+ * - Within generic_file_write() for O_SYNC files. -+ * Here, there will be no transaction running. We wait for any running -+ * trasnaction to commit. -+ * -+ * - Within sys_sync(), kupdate and such. -+ * We wait on commit, if tol to. -+ * -+ * - Within prune_icache() (PF_MEMALLOC == true) -+ * Here we simply return. We can't afford to block kswapd on the -+ * journal commit. -+ * -+ * In all cases it is actually safe for us to return without doing anything, -+ * because the inode has been copied into a raw inode buffer in -+ * ext3cow_mark_inode_dirty(). This is a correctness thing for O_SYNC and for -+ * knfsd. -+ * -+ * Note that we are absolutely dependent upon all inode dirtiers doing the -+ * right thing: they *must* call mark_inode_dirty() after dirtying info in -+ * which we are interested. -+ * -+ * It would be a bug for them to not do this. The code: -+ * -+ * mark_inode_dirty(inode) -+ * stuff(); -+ * inode->i_size = expr; -+ * -+ * is in error because a kswapd-driven write_inode() could occur while -+ * `stuff()' is running, and the new i_size will be lost. Plus the inode -+ * will no longer be on the superblock's dirty inode list. -+ */ -+int ext3cow_write_inode(struct inode *inode, int wait) -+{ -+ if (current->flags & PF_MEMALLOC) -+ return 0; -+ -+ if (ext3cow_journal_current_handle()) { -+ jbd_debug(0, "called recursively, non-PF_MEMALLOC!\n"); -+ dump_stack(); -+ return -EIO; -+ } -+ -+ if (!wait) -+ return 0; -+ -+ return ext3cow_force_commit(inode->i_sb); -+} -+ -+/* -+ * ext3cow_setattr() -+ * -+ * Called from notify_change. -+ * -+ * We want to trap VFS attempts to truncate the file as soon as -+ * possible. In particular, we want to make sure that when the VFS -+ * shrinks i_size, we put the inode on the orphan list and modify -+ * i_disksize immediately, so that during the subsequent flushing of -+ * dirty pages and freeing of disk blocks, we can guarantee that any -+ * commit will leave the blocks being flushed in an unused state on -+ * disk. (On recovery, the inode will get truncated and the blocks will -+ * be freed, so we have a strong guarantee that no future commit will -+ * leave these blocks visible to the user.) -+ * -+ * Called with inode->sem down. -+ */ -+int ext3cow_setattr(struct dentry *dentry, struct iattr *attr) -+{ -+ struct inode *inode = dentry->d_inode; -+ int error, rc = 0; -+ const unsigned int ia_valid = attr->ia_valid; -+ -+ error = inode_change_ok(inode, attr); -+ if (error) -+ return error; -+ -+ /* For versioning -znjp */ -+ if(is_unchangeable(inode, dentry)){ -+ error = -EROFS; -+ goto err_out; -+ } -+ -+ if(EXT3COW_S_EPOCHNUMBER(inode->i_sb) > EXT3COW_I_EPOCHNUMBER(inode)){ -+ error = ext3cow_dup_inode(dentry->d_parent->d_inode, inode); -+ if(error) -+ goto err_out; -+ } -+ -+ if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || -+ (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { -+ handle_t *handle; -+ -+ /* (user+group)*(old+new) structure, inode write (sb, -+ * inode block, ? - but truncate inode update has it) */ -+ handle = ext3cow_journal_start(inode, 2*(EXT3COW_QUOTA_INIT_BLOCKS(inode->i_sb)+ -+ EXT3COW_QUOTA_DEL_BLOCKS(inode->i_sb))+3); -+ if (IS_ERR(handle)) { -+ error = PTR_ERR(handle); -+ goto err_out; -+ } -+ error = DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0; -+ if (error) { -+ ext3cow_journal_stop(handle); -+ return error; -+ } -+ /* Update corresponding info in inode so that everything is in -+ * one transaction */ -+ if (attr->ia_valid & ATTR_UID) -+ inode->i_uid = attr->ia_uid; -+ if (attr->ia_valid & ATTR_GID) -+ inode->i_gid = attr->ia_gid; -+ error = ext3cow_mark_inode_dirty(handle, inode); -+ ext3cow_journal_stop(handle); -+ } -+ -+ if (S_ISREG(inode->i_mode) && -+ attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) { -+ handle_t *handle; -+ -+ handle = ext3cow_journal_start(inode, 3); -+ if (IS_ERR(handle)) { -+ error = PTR_ERR(handle); -+ goto err_out; -+ } -+ -+ error = ext3cow_orphan_add(handle, inode); -+ EXT3COW_I(inode)->i_disksize = attr->ia_size; -+ rc = ext3cow_mark_inode_dirty(handle, inode); -+ if (!error) -+ error = rc; -+ ext3cow_journal_stop(handle); -+ } -+ -+ rc = inode_setattr(inode, attr); -+ -+ /* If inode_setattr's call to ext3cow_truncate failed to get a -+ * transaction handle at all, we need to clean up the in-core -+ * orphan list manually. */ -+ if (inode->i_nlink) -+ ext3cow_orphan_del(NULL, inode); -+ -+ if (!rc && (ia_valid & ATTR_MODE)) -+ rc = ext3cow_acl_chmod(inode); -+ -+err_out: -+ ext3cow_std_error(inode->i_sb, error); -+ if (!error) -+ error = rc; -+ return error; -+} -+ -+ -+/* -+ * How many blocks doth make a writepage()? -+ * -+ * With N blocks per page, it may be: -+ * N data blocks -+ * 2 indirect block -+ * 2 dindirect -+ * 1 tindirect -+ * N+5 bitmap blocks (from the above) -+ * N+5 group descriptor summary blocks -+ * 1 inode block -+ * 1 superblock. -+ * 2 * EXT3COW_SINGLEDATA_TRANS_BLOCKS for the quote files -+ * -+ * 3 * (N + 5) + 2 + 2 * EXT3COW_SINGLEDATA_TRANS_BLOCKS -+ * -+ * With ordered or writeback data it's the same, less the N data blocks. -+ * -+ * If the inode's direct blocks can hold an integral number of pages then a -+ * page cannot straddle two indirect blocks, and we can only touch one indirect -+ * and dindirect block, and the "5" above becomes "3". -+ * -+ * This still overestimates under most circumstances. If we were to pass the -+ * start and end offsets in here as well we could do block_to_path() on each -+ * block and work out the exact number of indirects which are touched. Pah. -+ */ -+ -+static int ext3cow_writepage_trans_blocks(struct inode *inode) -+{ -+ int bpp = ext3cow_journal_blocks_per_page(inode); -+ int indirects = (EXT3COW_NDIR_BLOCKS % bpp) ? 5 : 3; -+ int ret; -+ -+ if (ext3cow_should_journal_data(inode)) -+ ret = 3 * (bpp + indirects) + 2; -+ else -+ ret = 2 * (bpp + indirects) + 2; -+ -+#ifdef CONFIG_QUOTA -+ /* We know that structure was already allocated during DQUOT_INIT so -+ * we will be updating only the data blocks + inodes */ -+ ret += 2*EXT3COW_QUOTA_TRANS_BLOCKS(inode->i_sb); -+#endif -+ -+ return ret; -+} -+ -+/* -+ * The caller must have previously called ext3cow_reserve_inode_write(). -+ * Give this, we know that the caller already has write access to iloc->bh. -+ */ -+int ext3cow_mark_iloc_dirty(handle_t *handle, -+ struct inode *inode, struct ext3cow_iloc *iloc) -+{ -+ int err = 0; -+ -+ /* the do_update_inode consumes one bh->b_count */ -+ get_bh(iloc->bh); -+ -+ /* ext3cow_do_update_inode() does journal_dirty_metadata */ -+ err = ext3cow_do_update_inode(handle, inode, iloc); -+ put_bh(iloc->bh); -+ return err; -+} -+ -+/* -+ * On success, We end up with an outstanding reference count against -+ * iloc->bh. This _must_ be cleaned up later. -+ */ -+ -+int -+ext3cow_reserve_inode_write(handle_t *handle, struct inode *inode, -+ struct ext3cow_iloc *iloc) -+{ -+ int err = 0; -+ if (handle) { -+ err = ext3cow_get_inode_loc(inode, iloc); -+ if (!err) { -+ BUFFER_TRACE(iloc->bh, "get_write_access"); -+ err = ext3cow_journal_get_write_access(handle, iloc->bh); -+ if (err) { -+ brelse(iloc->bh); -+ iloc->bh = NULL; -+ } -+ } -+ } -+ ext3cow_std_error(inode->i_sb, err); -+ return err; -+} -+ -+/* -+ * What we do here is to mark the in-core inode as clean with respect to inode -+ * dirtiness (it may still be data-dirty). -+ * This means that the in-core inode may be reaped by prune_icache -+ * without having to perform any I/O. This is a very good thing, -+ * because *any* task may call prune_icache - even ones which -+ * have a transaction open against a different journal. -+ * -+ * Is this cheating? Not really. Sure, we haven't written the -+ * inode out, but prune_icache isn't a user-visible syncing function. -+ * Whenever the user wants stuff synced (sys_sync, sys_msync, sys_fsync) -+ * we start and wait on commits. -+ * -+ * Is this efficient/effective? Well, we're being nice to the system -+ * by cleaning up our inodes proactively so they can be reaped -+ * without I/O. But we are potentially leaving up to five seconds' -+ * worth of inodes floating about which prune_icache wants us to -+ * write out. One way to fix that would be to get prune_icache() -+ * to do a write_super() to free up some memory. It has the desired -+ * effect. -+ */ -+int ext3cow_mark_inode_dirty(handle_t *handle, struct inode *inode) -+{ -+ struct ext3cow_iloc iloc; -+ int err; -+ -+ if(EXT3COW_IS_FAKEINODE(inode)) -+ return 0; -+ -+ might_sleep(); -+ err = ext3cow_reserve_inode_write(handle, inode, &iloc); -+ if (!err) -+ err = ext3cow_mark_iloc_dirty(handle, inode, &iloc); -+ return err; -+} -+ -+/* -+ * ext3cow_dirty_inode() is called from __mark_inode_dirty() -+ * -+ * We're really interested in the case where a file is being extended. -+ * i_size has been changed by generic_commit_write() and we thus need -+ * to include the updated inode in the current transaction. -+ * -+ * Also, DQUOT_ALLOC_SPACE() will always dirty the inode when blocks -+ * are allocated to the file. -+ * -+ * If the inode is marked synchronous, we don't honour that here - doing -+ * so would cause a commit on atime updates, which we don't bother doing. -+ * We handle synchronous inodes at the highest possible level. -+ */ -+void ext3cow_dirty_inode(struct inode *inode) -+{ -+ handle_t *current_handle = ext3cow_journal_current_handle(); -+ handle_t *handle; -+ -+ handle = ext3cow_journal_start(inode, 2); -+ if (IS_ERR(handle)) -+ goto out; -+ if (current_handle && -+ current_handle->h_transaction != handle->h_transaction) { -+ /* This task has a transaction open against a different fs */ -+ printk(KERN_EMERG "%s: transactions do not match!\n", -+ __FUNCTION__); -+ } else { -+ jbd_debug(5, "marking dirty. outer handle=%p\n", -+ current_handle); -+ ext3cow_mark_inode_dirty(handle, inode); -+ } -+ ext3cow_journal_stop(handle); -+out: -+ return; -+} -+ -+#if 0 -+/* -+ * Bind an inode's backing buffer_head into this transaction, to prevent -+ * it from being flushed to disk early. Unlike -+ * ext3cow_reserve_inode_write, this leaves behind no bh reference and -+ * returns no iloc structure, so the caller needs to repeat the iloc -+ * lookup to mark the inode dirty later. -+ */ -+static int ext3cow_pin_inode(handle_t *handle, struct inode *inode) -+{ -+ struct ext3cow_iloc iloc; -+ -+ int err = 0; -+ if (handle) { -+ err = ext3cow_get_inode_loc(inode, &iloc); -+ if (!err) { -+ BUFFER_TRACE(iloc.bh, "get_write_access"); -+ err = journal_get_write_access(handle, iloc.bh); -+ if (!err) -+ err = ext3cow_journal_dirty_metadata(handle, -+ iloc.bh); -+ brelse(iloc.bh); -+ } -+ } -+ ext3cow_std_error(inode->i_sb, err); -+ return err; -+} -+#endif -+ -+int ext3cow_change_inode_journal_flag(struct inode *inode, int val) -+{ -+ journal_t *journal; -+ handle_t *handle; -+ int err; -+ -+ /* -+ * We have to be very careful here: changing a data block's -+ * journaling status dynamically is dangerous. If we write a -+ * data block to the journal, change the status and then delete -+ * that block, we risk forgetting to revoke the old log record -+ * from the journal and so a subsequent replay can corrupt data. -+ * So, first we make sure that the journal is empty and that -+ * nobody is changing anything. -+ */ -+ -+ journal = EXT3COW_JOURNAL(inode); -+ if (is_journal_aborted(journal) || IS_RDONLY(inode)) -+ return -EROFS; -+ -+ journal_lock_updates(journal); -+ journal_flush(journal); -+ -+ /* -+ * OK, there are no updates running now, and all cached data is -+ * synced to disk. We are now in a completely consistent state -+ * which doesn't have anything in the journal, and we know that -+ * no filesystem updates are running, so it is safe to modify -+ * the inode's in-core data-journaling state flag now. -+ */ -+ -+ if (val) -+ EXT3COW_I(inode)->i_flags |= EXT3COW_JOURNAL_DATA_FL; -+ else -+ EXT3COW_I(inode)->i_flags &= ~EXT3COW_JOURNAL_DATA_FL; -+ ext3cow_set_aops(inode); -+ -+ journal_unlock_updates(journal); -+ -+ /* Finally we can mark the inode as dirty. */ -+ -+ handle = ext3cow_journal_start(inode, 1); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ -+ err = ext3cow_mark_inode_dirty(handle, inode); -+ handle->h_sync = 1; -+ ext3cow_journal_stop(handle); -+ ext3cow_std_error(inode->i_sb, err); -+ -+ return err; -+} -diff -Naur linux-2.6.21.7/fs/ext3cow/ioctl.c linux-2.6.21.7_ext3cowPatched/fs/ext3cow/ioctl.c ---- linux-2.6.21.7/fs/ext3cow/ioctl.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.21.7_ext3cowPatched/fs/ext3cow/ioctl.c 2007-10-23 17:47:18.000000000 +0200 -@@ -0,0 +1,312 @@ -+/* -+ * linux/fs/ext3cow/ioctl.c -+ * -+ * Copyright (C) 1993, 1994, 1995 -+ * Remy Card (card@masi.ibp.fr) -+ * Laboratoire MASI - Institut Blaise Pascal -+ * Universite Pierre et Marie Curie (Paris VI) -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+int ext3cow_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, -+ unsigned long arg) -+{ -+ struct ext3cow_inode_info *ei = EXT3COW_I(inode); -+ unsigned int flags; -+ unsigned short rsv_window_size; -+ -+ ext3cow_debug ("cmd = %u, arg = %lu\n", cmd, arg); -+ -+ switch (cmd) { -+ /* Some IOCTLs for version */ -+ case EXT3COW_IOC_TAKESNAPSHOT: -+ return (unsigned int)ext3cow_take_snapshot(inode->i_sb); -+ case EXT3COW_IOC_GETEPOCH: -+ return (unsigned int)EXT3COW_S_EPOCHNUMBER(inode->i_sb); -+ case EXT3COW_IOC_GETFLAGS: -+ flags = ei->i_flags & EXT3COW_FL_USER_VISIBLE; -+ return put_user(flags, (int __user *) arg); -+ case EXT3COW_IOC_SETFLAGS: { -+ handle_t *handle = NULL; -+ int err; -+ struct ext3cow_iloc iloc; -+ unsigned int oldflags; -+ unsigned int jflag; -+ -+ if (IS_RDONLY(inode)) -+ return -EROFS; -+ -+ if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) -+ return -EACCES; -+ -+ if (get_user(flags, (int __user *) arg)) -+ return -EFAULT; -+ -+ if (!S_ISDIR(inode->i_mode)) -+ flags &= ~EXT3COW_DIRSYNC_FL; -+ -+ mutex_lock(&inode->i_mutex); -+ oldflags = ei->i_flags; -+ -+ /* The JOURNAL_DATA flag is modifiable only by root */ -+ jflag = flags & EXT3COW_JOURNAL_DATA_FL; -+ -+ /* -+ * The IMMUTABLE and APPEND_ONLY flags can only be changed by -+ * the relevant capability. -+ * -+ * This test looks nicer. Thanks to Pauline Middelink -+ */ -+ if ((flags ^ oldflags) & (EXT3COW_APPEND_FL | EXT3COW_IMMUTABLE_FL)) { -+ if (!capable(CAP_LINUX_IMMUTABLE)) { -+ mutex_unlock(&inode->i_mutex); -+ return -EPERM; -+ } -+ } -+ -+ /* -+ * The JOURNAL_DATA flag can only be changed by -+ * the relevant capability. -+ */ -+ if ((jflag ^ oldflags) & (EXT3COW_JOURNAL_DATA_FL)) { -+ if (!capable(CAP_SYS_RESOURCE)) { -+ mutex_unlock(&inode->i_mutex); -+ return -EPERM; -+ } -+ } -+ -+ -+ handle = ext3cow_journal_start(inode, 1); -+ if (IS_ERR(handle)) { -+ mutex_unlock(&inode->i_mutex); -+ return PTR_ERR(handle); -+ } -+ if (IS_SYNC(inode)) -+ handle->h_sync = 1; -+ err = ext3cow_reserve_inode_write(handle, inode, &iloc); -+ if (err) -+ goto flags_err; -+ -+ flags = flags & EXT3COW_FL_USER_MODIFIABLE; -+ flags |= oldflags & ~EXT3COW_FL_USER_MODIFIABLE; -+ ei->i_flags = flags; -+ -+ ext3cow_set_inode_flags(inode); -+ inode->i_ctime = CURRENT_TIME_SEC; -+ -+ err = ext3cow_mark_iloc_dirty(handle, inode, &iloc); -+flags_err: -+ ext3cow_journal_stop(handle); -+ if (err) { -+ mutex_unlock(&inode->i_mutex); -+ return err; -+ } -+ -+ if ((jflag ^ oldflags) & (EXT3COW_JOURNAL_DATA_FL)) -+ err = ext3cow_change_inode_journal_flag(inode, jflag); -+ mutex_unlock(&inode->i_mutex); -+ return err; -+ } -+ case EXT3COW_IOC_GETVERSION: -+ case EXT3COW_IOC_GETVERSION_OLD: -+ return put_user(inode->i_generation, (int __user *) arg); -+ case EXT3COW_IOC_SETVERSION: -+ case EXT3COW_IOC_SETVERSION_OLD: { -+ handle_t *handle; -+ struct ext3cow_iloc iloc; -+ __u32 generation; -+ int err; -+ -+ if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) -+ return -EPERM; -+ if (IS_RDONLY(inode)) -+ return -EROFS; -+ if (get_user(generation, (int __user *) arg)) -+ return -EFAULT; -+ -+ handle = ext3cow_journal_start(inode, 1); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ err = ext3cow_reserve_inode_write(handle, inode, &iloc); -+ if (err == 0) { -+ inode->i_ctime = CURRENT_TIME_SEC; -+ inode->i_generation = generation; -+ err = ext3cow_mark_iloc_dirty(handle, inode, &iloc); -+ } -+ ext3cow_journal_stop(handle); -+ return err; -+ } -+#ifdef CONFIG_JBD_DEBUG -+ case EXT3COW_IOC_WAIT_FOR_READONLY: -+ /* -+ * This is racy - by the time we're woken up and running, -+ * the superblock could be released. And the module could -+ * have been unloaded. So sue me. -+ * -+ * Returns 1 if it slept, else zero. -+ */ -+ { -+ struct super_block *sb = inode->i_sb; -+ DECLARE_WAITQUEUE(wait, current); -+ int ret = 0; -+ -+ set_current_state(TASK_INTERRUPTIBLE); -+ add_wait_queue(&EXT3COW_SB(sb)->ro_wait_queue, &wait); -+ if (timer_pending(&EXT3COW_SB(sb)->turn_ro_timer)) { -+ schedule(); -+ ret = 1; -+ } -+ remove_wait_queue(&EXT3COW_SB(sb)->ro_wait_queue, &wait); -+ return ret; -+ } -+#endif -+ case EXT3COW_IOC_GETRSVSZ: -+ if (test_opt(inode->i_sb, RESERVATION) -+ && S_ISREG(inode->i_mode) -+ && ei->i_block_alloc_info) { -+ rsv_window_size = ei->i_block_alloc_info->rsv_window_node.rsv_goal_size; -+ return put_user(rsv_window_size, (int __user *)arg); -+ } -+ return -ENOTTY; -+ case EXT3COW_IOC_SETRSVSZ: { -+ -+ if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode)) -+ return -ENOTTY; -+ -+ if (IS_RDONLY(inode)) -+ return -EROFS; -+ -+ if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) -+ return -EACCES; -+ -+ if (get_user(rsv_window_size, (int __user *)arg)) -+ return -EFAULT; -+ -+ if (rsv_window_size > EXT3COW_MAX_RESERVE_BLOCKS) -+ rsv_window_size = EXT3COW_MAX_RESERVE_BLOCKS; -+ -+ /* -+ * need to allocate reservation structure for this inode -+ * before set the window size -+ */ -+ mutex_lock(&ei->truncate_mutex); -+ if (!ei->i_block_alloc_info) -+ ext3cow_init_block_alloc_info(inode); -+ -+ if (ei->i_block_alloc_info){ -+ struct ext3cow_reserve_window_node *rsv = &ei->i_block_alloc_info->rsv_window_node; -+ rsv->rsv_goal_size = rsv_window_size; -+ } -+ mutex_unlock(&ei->truncate_mutex); -+ return 0; -+ } -+ case EXT3COW_IOC_GROUP_EXTEND: { -+ ext3cow_fsblk_t n_blocks_count; -+ struct super_block *sb = inode->i_sb; -+ int err; -+ -+ if (!capable(CAP_SYS_RESOURCE)) -+ return -EPERM; -+ -+ if (IS_RDONLY(inode)) -+ return -EROFS; -+ -+ if (get_user(n_blocks_count, (__u32 __user *)arg)) -+ return -EFAULT; -+ -+ err = ext3cow_group_extend(sb, EXT3COW_SB(sb)->s_es, n_blocks_count); -+ journal_lock_updates(EXT3COW_SB(sb)->s_journal); -+ journal_flush(EXT3COW_SB(sb)->s_journal); -+ journal_unlock_updates(EXT3COW_SB(sb)->s_journal); -+ -+ return err; -+ } -+ case EXT3COW_IOC_GROUP_ADD: { -+ struct ext3cow_new_group_data input; -+ struct super_block *sb = inode->i_sb; -+ int err; -+ -+ if (!capable(CAP_SYS_RESOURCE)) -+ return -EPERM; -+ -+ if (IS_RDONLY(inode)) -+ return -EROFS; -+ -+ if (copy_from_user(&input, (struct ext3cow_new_group_input __user *)arg, -+ sizeof(input))) -+ return -EFAULT; -+ -+ err = ext3cow_group_add(sb, &input); -+ journal_lock_updates(EXT3COW_SB(sb)->s_journal); -+ journal_flush(EXT3COW_SB(sb)->s_journal); -+ journal_unlock_updates(EXT3COW_SB(sb)->s_journal); -+ -+ return err; -+ } -+ -+ -+ default: -+ return -ENOTTY; -+ } -+} -+ -+#ifdef CONFIG_COMPAT -+long ext3cow_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -+{ -+ struct inode *inode = file->f_path.dentry->d_inode; -+ int ret; -+ -+ /* These are just misnamed, they actually get/put from/to user an int */ -+ switch (cmd) { -+ case EXT3COW_IOC32_GETFLAGS: -+ cmd = EXT3COW_IOC_GETFLAGS; -+ break; -+ case EXT3COW_IOC32_SETFLAGS: -+ cmd = EXT3COW_IOC_SETFLAGS; -+ break; -+ case EXT3COW_IOC32_GETVERSION: -+ cmd = EXT3COW_IOC_GETVERSION; -+ break; -+ case EXT3COW_IOC32_SETVERSION: -+ cmd = EXT3COW_IOC_SETVERSION; -+ break; -+ case EXT3COW_IOC32_GROUP_EXTEND: -+ cmd = EXT3COW_IOC_GROUP_EXTEND; -+ break; -+ case EXT3COW_IOC32_GETVERSION_OLD: -+ cmd = EXT3COW_IOC_GETVERSION_OLD; -+ break; -+ case EXT3COW_IOC32_SETVERSION_OLD: -+ cmd = EXT3COW_IOC_SETVERSION_OLD; -+ break; -+#ifdef CONFIG_JBD_DEBUG -+ case EXT3COW_IOC32_WAIT_FOR_READONLY: -+ cmd = EXT3COW_IOC_WAIT_FOR_READONLY; -+ break; -+#endif -+ case EXT3COW_IOC32_GETRSVSZ: -+ cmd = EXT3COW_IOC_GETRSVSZ; -+ break; -+ case EXT3COW_IOC32_SETRSVSZ: -+ cmd = EXT3COW_IOC_SETRSVSZ; -+ break; -+ case EXT3COW_IOC_GROUP_ADD: -+ break; -+ default: -+ return -ENOIOCTLCMD; -+ } -+ lock_kernel(); -+ ret = ext3cow_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg)); -+ unlock_kernel(); -+ return ret; -+} -+#endif -diff -Naur linux-2.6.21.7/fs/ext3cow/Makefile linux-2.6.21.7_ext3cowPatched/fs/ext3cow/Makefile ---- linux-2.6.21.7/fs/ext3cow/Makefile 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.21.7_ext3cowPatched/fs/ext3cow/Makefile 2007-10-23 17:47:18.000000000 +0200 -@@ -0,0 +1,12 @@ -+# -+# Makefile for the linux ext3cow-filesystem routines. -+# -+ -+obj-$(CONFIG_EXT3COW_FS) += ext3cow.o -+ -+ext3cow-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ -+ ioctl.o namei.o super.o symlink.o hash.o resize.o ext3cow_jbd.o -+ -+ext3cow-$(CONFIG_EXT3COW_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o -+ext3cow-$(CONFIG_EXT3COW_FS_POSIX_ACL) += acl.o -+ext3cow-$(CONFIG_EXT3COW_FS_SECURITY) += xattr_security.o -diff -Naur linux-2.6.21.7/fs/ext3cow/namei.c linux-2.6.21.7_ext3cowPatched/fs/ext3cow/namei.c ---- linux-2.6.21.7/fs/ext3cow/namei.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.21.7_ext3cowPatched/fs/ext3cow/namei.c 2007-10-23 17:47:18.000000000 +0200 -@@ -0,0 +1,2981 @@ -+/* -+ * linux/fs/ext3cow/namei.c -+ * -+ * Copyright (C) 1992, 1993, 1994, 1995 -+ * Remy Card (card@masi.ibp.fr) -+ * Laboratoire MASI - Institut Blaise Pascal -+ * Universite Pierre et Marie Curie (Paris VI) -+ * -+ * from -+ * -+ * linux/fs/minix/namei.c -+ * -+ * Copyright (C) 1991, 1992 Linus Torvalds -+ * -+ * Big-endian to little-endian byte-swapping/bitmaps by -+ * David S. Miller (davem@caip.rutgers.edu), 1995 -+ * Directory entry file type support and forward compatibility hooks -+ * for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998 -+ * Hash Tree Directory indexing (c) -+ * Daniel Phillips, 2001 -+ * Hash Tree Directory indexing porting -+ * Christopher Li, 2002 -+ * Hash Tree Directory indexing cleanup -+ * Theodore Ts'o, 2002 -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "namei.h" -+#include "xattr.h" -+#include "acl.h" -+ -+/* -+ * define how far ahead to read directories while searching them. -+ */ -+#define NAMEI_RA_CHUNKS 2 -+#define NAMEI_RA_BLOCKS 4 -+#define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS) -+#define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b)) -+ -+/* is the inode marked unchangeable or does the name -+ contain an epoch less than the current system epoch -znjp */ -+int is_unchangeable(struct inode *inode, struct dentry *dentry){ -+ -+ char *at = NULL; -+ -+ if (inode && (EXT3COW_IS_UNCHANGEABLE(inode) || IS_IMMUTABLE(inode))) -+ return 1; -+ if(dentry) -+ at = strrchr(dentry->d_name.name, EXT3COW_FLUX_TOKEN); -+ if(at && (simple_strtol(&at[1], (char **)NULL, 10) > 0)) -+ return 1; -+ -+ return 0; -+} -+ -+static struct buffer_head *ext3cow_append(handle_t *handle, -+ struct inode *inode, -+ u32 *block, int *err) -+{ -+ struct buffer_head *bh; -+ -+ *block = inode->i_size >> inode->i_sb->s_blocksize_bits; -+ -+ if ((bh = ext3cow_bread(handle, inode, *block, 1, err))) { -+ inode->i_size += inode->i_sb->s_blocksize; -+ EXT3COW_I(inode)->i_disksize = inode->i_size; -+ ext3cow_journal_get_write_access(handle,bh); -+ } -+ return bh; -+} -+ -+#ifndef assert -+#define assert(test) J_ASSERT(test) -+#endif -+ -+#ifndef swap -+#define swap(x, y) do { typeof(x) z = x; x = y; y = z; } while (0) -+#endif -+ -+#ifdef DX_DEBUG -+#define dxtrace(command) command -+#else -+#define dxtrace(command) -+#endif -+ -+struct fake_dirent -+{ -+ __le32 inode; -+ __le16 rec_len; -+ u8 name_len; -+ u8 file_type; -+}; -+ -+struct dx_countlimit -+{ -+ __le16 limit; -+ __le16 count; -+}; -+ -+struct dx_entry -+{ -+ __le32 hash; -+ __le32 block; -+}; -+ -+/* -+ * dx_root_info is laid out so that if it should somehow get overlaid by a -+ * dirent the two low bits of the hash version will be zero. Therefore, the -+ * hash version mod 4 should never be 0. Sincerely, the paranoia department. -+ */ -+ -+struct dx_root -+{ -+ struct fake_dirent dot; -+ char dot_name[4]; -+ struct fake_dirent dotdot; -+ char dotdot_name[4]; -+ struct dx_root_info -+ { -+ __le32 reserved_zero; -+ u8 hash_version; -+ u8 info_length; /* 8 */ -+ u8 indirect_levels; -+ u8 unused_flags; -+ } -+ info; -+ struct dx_entry entries[0]; -+}; -+ -+struct dx_node -+{ -+ struct fake_dirent fake; -+ struct dx_entry entries[0]; -+}; -+ -+ -+struct dx_frame -+{ -+ struct buffer_head *bh; -+ struct dx_entry *entries; -+ struct dx_entry *at; -+}; -+ -+struct dx_map_entry -+{ -+ u32 hash; -+ u32 offs; -+}; -+ -+#ifdef CONFIG_EXT3COW_INDEX -+static inline unsigned dx_get_block (struct dx_entry *entry); -+static void dx_set_block (struct dx_entry *entry, unsigned value); -+static inline unsigned dx_get_hash (struct dx_entry *entry); -+static void dx_set_hash (struct dx_entry *entry, unsigned value); -+static unsigned dx_get_count (struct dx_entry *entries); -+static unsigned dx_get_limit (struct dx_entry *entries); -+static void dx_set_count (struct dx_entry *entries, unsigned value); -+static void dx_set_limit (struct dx_entry *entries, unsigned value); -+static unsigned dx_root_limit (struct inode *dir, unsigned infosize); -+static unsigned dx_node_limit (struct inode *dir); -+static struct dx_frame *dx_probe(struct dentry *dentry, -+ struct inode *dir, -+ struct dx_hash_info *hinfo, -+ struct dx_frame *frame, -+ int *err); -+static void dx_release (struct dx_frame *frames); -+static int dx_make_map (struct ext3cow_dir_entry_2 *de, int size, -+ struct dx_hash_info *hinfo, struct dx_map_entry map[]); -+static void dx_sort_map(struct dx_map_entry *map, unsigned count); -+static struct ext3cow_dir_entry_2 *dx_move_dirents (char *from, char *to, -+ struct dx_map_entry *offsets, int count); -+static struct ext3cow_dir_entry_2* dx_pack_dirents (char *base, int size); -+static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block); -+static int ext3cow_htree_next_block(struct inode *dir, __u32 hash, -+ struct dx_frame *frame, -+ struct dx_frame *frames, -+ __u32 *start_hash); -+static struct buffer_head * ext3cow_dx_find_entry(struct dentry *dentry, -+ struct ext3cow_dir_entry_2 **res_dir, int *err); -+static int ext3cow_dx_add_entry(handle_t *handle, struct dentry *dentry, -+ struct inode *inode); -+ -+/* -+ * Future: use high four bits of block for coalesce-on-delete flags -+ * Mask them off for now. -+ */ -+ -+static inline unsigned dx_get_block (struct dx_entry *entry) -+{ -+ return le32_to_cpu(entry->block) & 0x00ffffff; -+} -+ -+static inline void dx_set_block (struct dx_entry *entry, unsigned value) -+{ -+ entry->block = cpu_to_le32(value); -+} -+ -+static inline unsigned dx_get_hash (struct dx_entry *entry) -+{ -+ return le32_to_cpu(entry->hash); -+} -+ -+static inline void dx_set_hash (struct dx_entry *entry, unsigned value) -+{ -+ entry->hash = cpu_to_le32(value); -+} -+ -+static inline unsigned dx_get_count (struct dx_entry *entries) -+{ -+ return le16_to_cpu(((struct dx_countlimit *) entries)->count); -+} -+ -+static inline unsigned dx_get_limit (struct dx_entry *entries) -+{ -+ return le16_to_cpu(((struct dx_countlimit *) entries)->limit); -+} -+ -+static inline void dx_set_count (struct dx_entry *entries, unsigned value) -+{ -+ ((struct dx_countlimit *) entries)->count = cpu_to_le16(value); -+} -+ -+static inline void dx_set_limit (struct dx_entry *entries, unsigned value) -+{ -+ ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value); -+} -+ -+static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize) -+{ -+ unsigned entry_space = dir->i_sb->s_blocksize - EXT3COW_DIR_REC_LEN(1) - -+ EXT3COW_DIR_REC_LEN(2) - infosize; -+ return 0? 20: entry_space / sizeof(struct dx_entry); -+} -+ -+static inline unsigned dx_node_limit (struct inode *dir) -+{ -+ unsigned entry_space = dir->i_sb->s_blocksize - EXT3COW_DIR_REC_LEN(0); -+ return 0? 22: entry_space / sizeof(struct dx_entry); -+} -+ -+/* -+ * Debug -+ */ -+#ifdef DX_DEBUG -+static void dx_show_index (char * label, struct dx_entry *entries) -+{ -+ int i, n = dx_get_count (entries); -+ printk("%s index ", label); -+ for (i = 0; i < n; i++) -+ { -+ printk("%x->%u ", i? dx_get_hash(entries + i): 0, dx_get_block(entries + i)); -+ } -+ printk("\n"); -+} -+ -+struct stats -+{ -+ unsigned names; -+ unsigned space; -+ unsigned bcount; -+}; -+ -+static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3cow_dir_entry_2 *de, -+ int size, int show_names) -+{ -+ unsigned names = 0, space = 0; -+ char *base = (char *) de; -+ struct dx_hash_info h = *hinfo; -+ -+ printk("names: "); -+ while ((char *) de < base + size) -+ { -+ if (de->inode) -+ { -+ if (show_names) -+ { -+ int len = de->name_len; -+ char *name = de->name; -+ while (len--) printk("%c", *name++); -+ ext3cowfs_dirhash(de->name, de->name_len, &h); -+ printk(":%x.%u ", h.hash, -+ ((char *) de - base)); -+ } -+ space += EXT3COW_DIR_REC_LEN(de->name_len); -+ names++; -+ } -+ de = (struct ext3cow_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len)); -+ } -+ printk("(%i)\n", names); -+ return (struct stats) { names, space, 1 }; -+} -+ -+struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, -+ struct dx_entry *entries, int levels) -+{ -+ unsigned blocksize = dir->i_sb->s_blocksize; -+ unsigned count = dx_get_count (entries), names = 0, space = 0, i; -+ unsigned bcount = 0; -+ struct buffer_head *bh; -+ int err; -+ printk("%i indexed blocks...\n", count); -+ for (i = 0; i < count; i++, entries++) -+ { -+ u32 block = dx_get_block(entries), hash = i? dx_get_hash(entries): 0; -+ u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash; -+ struct stats stats; -+ printk("%s%3u:%03u hash %8x/%8x ",levels?"":" ", i, block, hash, range); -+ if (!(bh = ext3cow_bread (NULL,dir, block, 0,&err))) continue; -+ stats = levels? -+ dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1): -+ dx_show_leaf(hinfo, (struct ext3cow_dir_entry_2 *) bh->b_data, blocksize, 0); -+ names += stats.names; -+ space += stats.space; -+ bcount += stats.bcount; -+ brelse (bh); -+ } -+ if (bcount) -+ printk("%snames %u, fullness %u (%u%%)\n", levels?"":" ", -+ names, space/bcount,(space/bcount)*100/blocksize); -+ return (struct stats) { names, space, bcount}; -+} -+#endif /* DX_DEBUG */ -+ -+/* -+ * Probe for a directory leaf block to search. -+ * -+ * dx_probe can return ERR_BAD_DX_DIR, which means there was a format -+ * error in the directory index, and the caller should fall back to -+ * searching the directory normally. The callers of dx_probe **MUST** -+ * check for this error code, and make sure it never gets reflected -+ * back to userspace. -+ */ -+static struct dx_frame * -+dx_probe(struct dentry *dentry, struct inode *dir, -+ struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err) -+{ -+ unsigned count, indirect; -+ struct dx_entry *at, *entries, *p, *q, *m; -+ struct dx_root *root; -+ struct buffer_head *bh; -+ struct dx_frame *frame = frame_in; -+ u32 hash; -+ -+ frame->bh = NULL; -+ if (dentry) -+ dir = dentry->d_parent->d_inode; -+ if (!(bh = ext3cow_bread (NULL,dir, 0, 0, err))) -+ goto fail; -+ root = (struct dx_root *) bh->b_data; -+ if (root->info.hash_version != DX_HASH_TEA && -+ root->info.hash_version != DX_HASH_HALF_MD4 && -+ root->info.hash_version != DX_HASH_LEGACY) { -+ ext3cow_warning(dir->i_sb, __FUNCTION__, -+ "Unrecognised inode hash code %d", -+ root->info.hash_version); -+ brelse(bh); -+ *err = ERR_BAD_DX_DIR; -+ goto fail; -+ } -+ hinfo->hash_version = root->info.hash_version; -+ hinfo->seed = EXT3COW_SB(dir->i_sb)->s_hash_seed; -+ if (dentry) -+ ext3cowfs_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo); -+ hash = hinfo->hash; -+ -+ if (root->info.unused_flags & 1) { -+ ext3cow_warning(dir->i_sb, __FUNCTION__, -+ "Unimplemented inode hash flags: %#06x", -+ root->info.unused_flags); -+ brelse(bh); -+ *err = ERR_BAD_DX_DIR; -+ goto fail; -+ } -+ -+ if ((indirect = root->info.indirect_levels) > 1) { -+ ext3cow_warning(dir->i_sb, __FUNCTION__, -+ "Unimplemented inode hash depth: %#06x", -+ root->info.indirect_levels); -+ brelse(bh); -+ *err = ERR_BAD_DX_DIR; -+ goto fail; -+ } -+ -+ entries = (struct dx_entry *) (((char *)&root->info) + -+ root->info.info_length); -+ assert(dx_get_limit(entries) == dx_root_limit(dir, -+ root->info.info_length)); -+ dxtrace (printk("Look up %x", hash)); -+ while (1) -+ { -+ count = dx_get_count(entries); -+ assert (count && count <= dx_get_limit(entries)); -+ p = entries + 1; -+ q = entries + count - 1; -+ while (p <= q) -+ { -+ m = p + (q - p)/2; -+ dxtrace(printk(".")); -+ if (dx_get_hash(m) > hash) -+ q = m - 1; -+ else -+ p = m + 1; -+ } -+ -+ if (0) // linear search cross check -+ { -+ unsigned n = count - 1; -+ at = entries; -+ while (n--) -+ { -+ dxtrace(printk(",")); -+ if (dx_get_hash(++at) > hash) -+ { -+ at--; -+ break; -+ } -+ } -+ assert (at == p - 1); -+ } -+ -+ at = p - 1; -+ dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at))); -+ frame->bh = bh; -+ frame->entries = entries; -+ frame->at = at; -+ if (!indirect--) return frame; -+ if (!(bh = ext3cow_bread (NULL,dir, dx_get_block(at), 0, err))) -+ goto fail2; -+ at = entries = ((struct dx_node *) bh->b_data)->entries; -+ assert (dx_get_limit(entries) == dx_node_limit (dir)); -+ frame++; -+ } -+fail2: -+ while (frame >= frame_in) { -+ brelse(frame->bh); -+ frame--; -+ } -+fail: -+ return NULL; -+} -+ -+static void dx_release (struct dx_frame *frames) -+{ -+ if (frames[0].bh == NULL) -+ return; -+ -+ if (((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels) -+ brelse(frames[1].bh); -+ brelse(frames[0].bh); -+} -+ -+/* -+ * This function increments the frame pointer to search the next leaf -+ * block, and reads in the necessary intervening nodes if the search -+ * should be necessary. Whether or not the search is necessary is -+ * controlled by the hash parameter. If the hash value is even, then -+ * the search is only continued if the next block starts with that -+ * hash value. This is used if we are searching for a specific file. -+ * -+ * If the hash value is HASH_NB_ALWAYS, then always go to the next block. -+ * -+ * This function returns 1 if the caller should continue to search, -+ * or 0 if it should not. If there is an error reading one of the -+ * index blocks, it will a negative error code. -+ * -+ * If start_hash is non-null, it will be filled in with the starting -+ * hash of the next page. -+ */ -+static int ext3cow_htree_next_block(struct inode *dir, __u32 hash, -+ struct dx_frame *frame, -+ struct dx_frame *frames, -+ __u32 *start_hash) -+{ -+ struct dx_frame *p; -+ struct buffer_head *bh; -+ int err, num_frames = 0; -+ __u32 bhash; -+ -+ p = frame; -+ /* -+ * Find the next leaf page by incrementing the frame pointer. -+ * If we run out of entries in the interior node, loop around and -+ * increment pointer in the parent node. When we break out of -+ * this loop, num_frames indicates the number of interior -+ * nodes need to be read. -+ */ -+ while (1) { -+ if (++(p->at) < p->entries + dx_get_count(p->entries)) -+ break; -+ if (p == frames) -+ return 0; -+ num_frames++; -+ p--; -+ } -+ -+ /* -+ * If the hash is 1, then continue only if the next page has a -+ * continuation hash of any value. This is used for readdir -+ * handling. Otherwise, check to see if the hash matches the -+ * desired contiuation hash. If it doesn't, return since -+ * there's no point to read in the successive index pages. -+ */ -+ bhash = dx_get_hash(p->at); -+ if (start_hash) -+ *start_hash = bhash; -+ if ((hash & 1) == 0) { -+ if ((bhash & ~1) != hash) -+ return 0; -+ } -+ /* -+ * If the hash is HASH_NB_ALWAYS, we always go to the next -+ * block so no check is necessary -+ */ -+ while (num_frames--) { -+ if (!(bh = ext3cow_bread(NULL, dir, dx_get_block(p->at), -+ 0, &err))) -+ return err; /* Failure */ -+ p++; -+ brelse (p->bh); -+ p->bh = bh; -+ p->at = p->entries = ((struct dx_node *) bh->b_data)->entries; -+ } -+ return 1; -+} -+ -+ -+/* -+ * p is at least 6 bytes before the end of page -+ */ -+static inline struct ext3cow_dir_entry_2 *ext3cow_next_entry(struct ext3cow_dir_entry_2 *p) -+{ -+ return (struct ext3cow_dir_entry_2 *)((char*)p + le16_to_cpu(p->rec_len)); -+} -+ -+/* -+ * This function fills a red-black tree with information from a -+ * directory block. It returns the number directory entries loaded -+ * into the tree. If there is an error it is returned in err. -+ */ -+static int htree_dirblock_to_tree(struct file *dir_file, -+ struct inode *dir, int block, -+ struct dx_hash_info *hinfo, -+ __u32 start_hash, __u32 start_minor_hash) -+{ -+ struct buffer_head *bh; -+ struct ext3cow_dir_entry_2 *de, *top; -+ int err, count = 0; -+ -+ dxtrace(printk("In htree dirblock_to_tree: block %d\n", block)); -+ if (!(bh = ext3cow_bread (NULL, dir, block, 0, &err))) -+ return err; -+ -+ de = (struct ext3cow_dir_entry_2 *) bh->b_data; -+ top = (struct ext3cow_dir_entry_2 *) ((char *) de + -+ dir->i_sb->s_blocksize - -+ EXT3COW_DIR_REC_LEN(0)); -+ for (; de < top; de = ext3cow_next_entry(de)) { -+ if (!ext3cow_check_dir_entry("htree_dirblock_to_tree", dir, de, bh, -+ (block<i_sb)) -+ +((char *)de - bh->b_data))) { -+ /* On error, skip the f_pos to the next block. */ -+ dir_file->f_pos = (dir_file->f_pos | -+ (dir->i_sb->s_blocksize - 1)) + 1; -+ brelse (bh); -+ return count; -+ } -+ ext3cowfs_dirhash(de->name, de->name_len, hinfo); -+ if ((hinfo->hash < start_hash) || -+ ((hinfo->hash == start_hash) && -+ (hinfo->minor_hash < start_minor_hash))) -+ continue; -+ if (de->inode == 0) -+ continue; -+ if ((err = ext3cow_htree_store_dirent(dir_file, -+ hinfo->hash, hinfo->minor_hash, de)) != 0) { -+ brelse(bh); -+ return err; -+ } -+ count++; -+ } -+ brelse(bh); -+ return count; -+} -+ -+ -+/* -+ * This function fills a red-black tree with information from a -+ * directory. We start scanning the directory in hash order, starting -+ * at start_hash and start_minor_hash. -+ * -+ * This function returns the number of entries inserted into the tree, -+ * or a negative error code. -+ */ -+int ext3cow_htree_fill_tree(struct file *dir_file, __u32 start_hash, -+ __u32 start_minor_hash, __u32 *next_hash) -+{ -+ struct dx_hash_info hinfo; -+ struct ext3cow_dir_entry_2 *de; -+ struct dx_frame frames[2], *frame; -+ struct inode *dir; -+ int block, err; -+ int count = 0; -+ int ret; -+ __u32 hashval; -+ -+ dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash, -+ start_minor_hash)); -+ dir = dir_file->f_path.dentry->d_inode; -+ if (!(EXT3COW_I(dir)->i_flags & EXT3COW_INDEX_FL)) { -+ hinfo.hash_version = EXT3COW_SB(dir->i_sb)->s_def_hash_version; -+ hinfo.seed = EXT3COW_SB(dir->i_sb)->s_hash_seed; -+ count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo, -+ start_hash, start_minor_hash); -+ *next_hash = ~0; -+ return count; -+ } -+ hinfo.hash = start_hash; -+ hinfo.minor_hash = 0; -+ frame = dx_probe(NULL, dir_file->f_path.dentry->d_inode, &hinfo, frames, &err); -+ if (!frame) -+ return err; -+ -+ /* Add '.' and '..' from the htree header */ -+ if (!start_hash && !start_minor_hash) { -+ de = (struct ext3cow_dir_entry_2 *) frames[0].bh->b_data; -+ if ((err = ext3cow_htree_store_dirent(dir_file, 0, 0, de)) != 0) -+ goto errout; -+ count++; -+ } -+ if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) { -+ de = (struct ext3cow_dir_entry_2 *) frames[0].bh->b_data; -+ de = ext3cow_next_entry(de); -+ if ((err = ext3cow_htree_store_dirent(dir_file, 2, 0, de)) != 0) -+ goto errout; -+ count++; -+ } -+ -+ while (1) { -+ block = dx_get_block(frame->at); -+ ret = htree_dirblock_to_tree(dir_file, dir, block, &hinfo, -+ start_hash, start_minor_hash); -+ if (ret < 0) { -+ err = ret; -+ goto errout; -+ } -+ count += ret; -+ hashval = ~0; -+ ret = ext3cow_htree_next_block(dir, HASH_NB_ALWAYS, -+ frame, frames, &hashval); -+ *next_hash = hashval; -+ if (ret < 0) { -+ err = ret; -+ goto errout; -+ } -+ /* -+ * Stop if: (a) there are no more entries, or -+ * (b) we have inserted at least one entry and the -+ * next hash value is not a continuation -+ */ -+ if ((ret == 0) || -+ (count && ((hashval & 1) == 0))) -+ break; -+ } -+ dx_release(frames); -+ dxtrace(printk("Fill tree: returned %d entries, next hash: %x\n", -+ count, *next_hash)); -+ return count; -+errout: -+ dx_release(frames); -+ return (err); -+} -+ -+ -+/* -+ * Directory block splitting, compacting -+ */ -+ -+static int dx_make_map (struct ext3cow_dir_entry_2 *de, int size, -+ struct dx_hash_info *hinfo, struct dx_map_entry *map_tail) -+{ -+ int count = 0; -+ char *base = (char *) de; -+ struct dx_hash_info h = *hinfo; -+ -+ while ((char *) de < base + size) -+ { -+ if (de->name_len && de->inode) { -+ ext3cowfs_dirhash(de->name, de->name_len, &h); -+ map_tail--; -+ map_tail->hash = h.hash; -+ map_tail->offs = (u32) ((char *) de - base); -+ count++; -+ cond_resched(); -+ } -+ /* XXX: do we need to check rec_len == 0 case? -Chris */ -+ de = (struct ext3cow_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len)); -+ } -+ return count; -+} -+ -+static void dx_sort_map (struct dx_map_entry *map, unsigned count) -+{ -+ struct dx_map_entry *p, *q, *top = map + count - 1; -+ int more; -+ /* Combsort until bubble sort doesn't suck */ -+ while (count > 2) -+ { -+ count = count*10/13; -+ if (count - 9 < 2) /* 9, 10 -> 11 */ -+ count = 11; -+ for (p = top, q = p - count; q >= map; p--, q--) -+ if (p->hash < q->hash) -+ swap(*p, *q); -+ } -+ /* Garden variety bubble sort */ -+ do { -+ more = 0; -+ q = top; -+ while (q-- > map) -+ { -+ if (q[1].hash >= q[0].hash) -+ continue; -+ swap(*(q+1), *q); -+ more = 1; -+ } -+ } while(more); -+} -+ -+static void dx_insert_block(struct dx_frame *frame, u32 hash, u32 block) -+{ -+ struct dx_entry *entries = frame->entries; -+ struct dx_entry *old = frame->at, *new = old + 1; -+ int count = dx_get_count(entries); -+ -+ assert(count < dx_get_limit(entries)); -+ assert(old < entries + count); -+ memmove(new + 1, new, (char *)(entries + count) - (char *)(new)); -+ dx_set_hash(new, hash); -+ dx_set_block(new, block); -+ dx_set_count(entries, count + 1); -+} -+#endif -+ -+ -+static void ext3cow_update_dx_flag(struct inode *inode) -+{ -+ if (!EXT3COW_HAS_COMPAT_FEATURE(inode->i_sb, -+ EXT3COW_FEATURE_COMPAT_DIR_INDEX)) -+ EXT3COW_I(inode)->i_flags &= ~EXT3COW_INDEX_FL; -+} -+ -+/* -+ * NOTE! unlike strncmp, ext3cow_match returns 1 for success, 0 for failure. -+ * -+ * `len <= EXT3COW_NAME_LEN' is guaranteed by caller. -+ * `de != NULL' is guaranteed by caller. -+ */ -+static inline int ext3cow_match (int len, const char * const name, -+ struct ext3cow_dir_entry_2 * de) -+{ -+ if (len != de->name_len) -+ return 0; -+ if (!de->inode) -+ return 0; -+ return !memcmp(name, de->name, len); -+} -+ -+/* -+ * Returns 0 if not found, -1 on failure, and 1 on success -+ */ -+/* For versioning - this is the function used when looking for -+ * names. We now handle names which include the flux token, -+ * strip it off and continue looking -znjp */ -+static inline int search_dirblock(struct buffer_head * bh, -+ struct inode *dir, -+ struct dentry *dentry, -+ unsigned long offset, -+ struct ext3cow_dir_entry_2 ** res_dir) -+{ -+ struct ext3cow_dir_entry_2 * de; -+ char * dlimit, * flux = NULL; -+ int de_len; -+ char name[EXT3COW_NAME_LEN]; -+ int namelen = dentry->d_name.len; -+ unsigned int epoch_number = EXT3COW_I_EPOCHNUMBER(dir); -+ -+ /* Get the name for the dentry */ -+ memcpy(name, dentry->d_name.name, namelen); -+ name[namelen] = '\0'; -+ -+ /* Check to see if the flux token is in the name */ -+ flux = strrchr(dentry->d_name.name, EXT3COW_FLUX_TOKEN); -+ if(NULL != flux){ -+ /* If we're here, the name we want is in the past. */ -+ int new_namelen = strlen(dentry->d_name.name) - strlen(flux); -+ /* Get the epoch number */ -+ epoch_number = simple_strtol(&flux[1], (char **)NULL, 10) - 1; -+ /* If there's a valid epoch number or if we're version listing -+ * we need the name seperately, otherwise the FLUX_TOKEN exists -+ * in the file name */ -+ if(epoch_number + 1 == 0 && (strlen(flux) > 1)){ -+ /* EXT3COW_FLUX_TOKEN exists in the file name */ -+ epoch_number = EXT3COW_S_EPOCHNUMBER(dir->i_sb); -+ }else{ -+ /* Grab the correct name and length */ -+ memcpy(name, dentry->d_name.name, new_namelen); -+ name[new_namelen] = '\0'; -+ namelen = strlen(name); -+ } -+ } -+ -+ -+ de = (struct ext3cow_dir_entry_2 *) bh->b_data; -+ dlimit = bh->b_data + dir->i_sb->s_blocksize; -+ while ((char *) de < dlimit) { -+ /* this code is executed quadratically often */ -+ /* do minimal checking `by hand' */ -+ -+ /* Can't just return first entry of something; -+ * may exist twice if died and same name appears again. - znjp -+ */ -+ if ((char *) de + namelen <= dlimit && -+ ext3cow_match (namelen, name, de) && -+ EXT3COW_IS_DIRENT_SCOPED(de, epoch_number)) { -+ /* found a match - just to be sure, do a full check */ -+ if (!ext3cow_check_dir_entry("ext3cow_find_entry", -+ dir, de, bh, offset)) -+ return -1; -+ *res_dir = de; -+ return 1; -+ } -+ /* prevent looping on a bad block */ -+ de_len = le16_to_cpu(de->rec_len); -+ if (de_len <= 0) -+ return -1; -+ offset += de_len; -+ de = (struct ext3cow_dir_entry_2 *) ((char *) de + de_len); -+ } -+ return 0; -+} -+ -+ -+/* -+ * ext3cow_find_entry() -+ * -+ * finds an entry in the specified directory with the wanted name. It -+ * returns the cache buffer in which the entry was found, and the entry -+ * itself (as a parameter - res_dir). It does NOT read the inode of the -+ * entry - you'll have to do that yourself if you want to. -+ * -+ * The returned buffer_head has ->b_count elevated. The caller is expected -+ * to brelse() it when appropriate. -+ */ -+static struct buffer_head * ext3cow_find_entry (struct dentry *dentry, -+ struct ext3cow_dir_entry_2 ** res_dir) -+{ -+ struct super_block * sb; -+ struct buffer_head * bh_use[NAMEI_RA_SIZE]; -+ struct buffer_head * bh, *ret = NULL; -+ unsigned long start, block, b; -+ int ra_max = 0; /* Number of bh's in the readahead -+ buffer, bh_use[] */ -+ int ra_ptr = 0; /* Current index into readahead -+ buffer */ -+ int num = 0; -+ int nblocks, i, err; -+ struct inode *dir = dentry->d_parent->d_inode; -+ int namelen; -+ const u8 *name; -+ unsigned blocksize; -+ -+ *res_dir = NULL; -+ sb = dir->i_sb; -+ blocksize = sb->s_blocksize; -+ namelen = dentry->d_name.len; -+ name = dentry->d_name.name; -+ if (namelen > EXT3COW_NAME_LEN) -+ return NULL; -+#ifdef CONFIG_EXT3COW_INDEX -+ if (is_dx(dir)) { -+ bh = ext3cow_dx_find_entry(dentry, res_dir, &err); -+ /* -+ * On success, or if the error was file not found, -+ * return. Otherwise, fall back to doing a search the -+ * old fashioned way. -+ */ -+ if (bh || (err != ERR_BAD_DX_DIR)) -+ return bh; -+ dxtrace(printk("ext3cow_find_entry: dx failed, falling back\n")); -+ } -+#endif -+ nblocks = dir->i_size >> EXT3COW_BLOCK_SIZE_BITS(sb); -+ start = EXT3COW_I(dir)->i_dir_start_lookup; -+ if (start >= nblocks) -+ start = 0; -+ block = start; -+restart: -+ do { -+ /* -+ * We deal with the read-ahead logic here. -+ */ -+ if (ra_ptr >= ra_max) { -+ /* Refill the readahead buffer */ -+ ra_ptr = 0; -+ b = block; -+ for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) { -+ /* -+ * Terminate if we reach the end of the -+ * directory and must wrap, or if our -+ * search has finished at this block. -+ */ -+ if (b >= nblocks || (num && block == start)) { -+ bh_use[ra_max] = NULL; -+ break; -+ } -+ num++; -+ bh = ext3cow_getblk(NULL, dir, b++, 0, &err); -+ bh_use[ra_max] = bh; -+ if (bh) -+ ll_rw_block(READ_META, 1, &bh); -+ } -+ } -+ if ((bh = bh_use[ra_ptr++]) == NULL) -+ goto next; -+ wait_on_buffer(bh); -+ if (!buffer_uptodate(bh)) { -+ /* read error, skip block & hope for the best */ -+ ext3cow_error(sb, __FUNCTION__, "reading directory #%lu " -+ "offset %lu", dir->i_ino, block); -+ brelse(bh); -+ goto next; -+ } -+ i = search_dirblock(bh, dir, dentry, -+ block << EXT3COW_BLOCK_SIZE_BITS(sb), res_dir); -+ if (i == 1) { -+ EXT3COW_I(dir)->i_dir_start_lookup = block; -+ ret = bh; -+ goto cleanup_and_exit; -+ } else { -+ brelse(bh); -+ if (i < 0) -+ goto cleanup_and_exit; -+ } -+ next: -+ if (++block >= nblocks) -+ block = 0; -+ } while (block != start); -+ -+ /* -+ * If the directory has grown while we were searching, then -+ * search the last part of the directory before giving up. -+ */ -+ block = nblocks; -+ nblocks = dir->i_size >> EXT3COW_BLOCK_SIZE_BITS(sb); -+ if (block < nblocks) { -+ start = 0; -+ goto restart; -+ } -+ -+cleanup_and_exit: -+ /* Clean up the read-ahead blocks */ -+ for (; ra_ptr < ra_max; ra_ptr++) -+ brelse (bh_use[ra_ptr]); -+ return ret; -+} -+ -+#ifdef CONFIG_EXT3COW_INDEX -+static struct buffer_head * ext3cow_dx_find_entry(struct dentry *dentry, -+ struct ext3cow_dir_entry_2 **res_dir, int *err) -+{ -+ struct super_block * sb; -+ struct dx_hash_info hinfo; -+ u32 hash; -+ struct dx_frame frames[2], *frame; -+ struct ext3cow_dir_entry_2 *de, *top; -+ struct buffer_head *bh; -+ unsigned long block; -+ int retval; -+ int namelen = dentry->d_name.len; -+ const u8 *name = dentry->d_name.name; -+ struct inode *dir = dentry->d_parent->d_inode; -+ -+ sb = dir->i_sb; -+ /* NFS may look up ".." - look at dx_root directory block */ -+ if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){ -+ if (!(frame = dx_probe(dentry, NULL, &hinfo, frames, err))) -+ return NULL; -+ } else { -+ frame = frames; -+ frame->bh = NULL; /* for dx_release() */ -+ frame->at = (struct dx_entry *)frames; /* hack for zero entry*/ -+ dx_set_block(frame->at, 0); /* dx_root block is 0 */ -+ } -+ hash = hinfo.hash; -+ do { -+ block = dx_get_block(frame->at); -+ if (!(bh = ext3cow_bread (NULL,dir, block, 0, err))) -+ goto errout; -+ de = (struct ext3cow_dir_entry_2 *) bh->b_data; -+ top = (struct ext3cow_dir_entry_2 *) ((char *) de + sb->s_blocksize - -+ EXT3COW_DIR_REC_LEN(0)); -+ for (; de < top; de = ext3cow_next_entry(de)) -+ if (ext3cow_match (namelen, name, de)) { -+ if (!ext3cow_check_dir_entry("ext3cow_find_entry", -+ dir, de, bh, -+ (block<b_data))) { -+ brelse (bh); -+ goto errout; -+ } -+ *res_dir = de; -+ dx_release (frames); -+ return bh; -+ } -+ brelse (bh); -+ /* Check to see if we should continue to search */ -+ retval = ext3cow_htree_next_block(dir, hash, frame, -+ frames, NULL); -+ if (retval < 0) { -+ ext3cow_warning(sb, __FUNCTION__, -+ "error reading index page in directory #%lu", -+ dir->i_ino); -+ *err = retval; -+ goto errout; -+ } -+ } while (retval == 1); -+ -+ *err = -ENOENT; -+errout: -+ dxtrace(printk("%s not found\n", name)); -+ dx_release (frames); -+ return NULL; -+} -+#endif -+ -+/* ext3cow_lookup: One the key functions of this versioning file sytem, -+ * allowing people to return to the past. -+ * -+ * Two policies for inode chains: -+ * 1) If it's the head of the list, it's the most current inode -+ * and always changable. The inode number is static. -+ * 2) If it's any inode in the chain that's not the head, -+ * than it's an inode in the past and unchangeable. The inode -+ * number may change. -+ */ -+static struct dentry *ext3cow_lookup(struct inode * dir, struct dentry *dentry, -+ struct nameidata *nd) -+{ -+ struct inode * inode = NULL; -+ struct ext3cow_dir_entry_2 * de = NULL; -+ struct buffer_head * bh = NULL; -+ unsigned int epoch_number = 0; -+ char * flux = NULL; -+ -+ if (dentry->d_name.len > EXT3COW_NAME_LEN) -+ return ERR_PTR(-ENAMETOOLONG); -+ -+ /* Find the epoch number to scope with -znjp -+ * if the parent is unchangeable, so is the inode -+ */ -+ if(EXT3COW_IS_UNCHANGEABLE(dir)) -+ epoch_number = EXT3COW_I_EPOCHNUMBER(dir); -+ else -+ epoch_number = EXT3COW_S_EPOCHNUMBER(dir->i_sb); -+ -+ bh = ext3cow_find_entry(dentry, &de); -+ if (bh) { -+ unsigned long ino = le32_to_cpu(de->inode); -+ brelse (bh); -+ if (!ext3cow_valid_inum(dir->i_sb, ino)) { -+ ext3cow_error(dir->i_sb, "ext3cow_lookup", -+ "bad inode number: %lu", ino); -+ inode = NULL; -+ } else -+ inode = iget(dir->i_sb, ino); -+ -+ if (!inode) -+ return ERR_PTR(-EACCES); -+ -+ /* Is this a version listing ? */ -+ if ((char)dentry->d_name.name[dentry->d_name.len - 1] == -+ EXT3COW_FLUX_TOKEN) { -+ /* prevent going round in circles */ -+ if (dentry->d_parent && -+ dentry->d_parent->d_name.name[dentry->d_parent->d_name.len - 1] == -+ EXT3COW_FLUX_TOKEN) { -+ return NULL; -+ } -+ /* we fake a directory using the directory inode instead of -+ * the file one and subsequently force a call to ext3cow_readdir */ -+ iput(inode); -+ inode = ext3cow_fake_inode(dir, EXT3COW_S_EPOCHNUMBER(dir->i_sb)); -+ EXT3COW_I(inode)->i_next_inode = EXT3COW_I(dir)->i_next_inode; -+ d_splice_alias(inode, dentry); -+ -+ return NULL; -+ } -+ -+ /* Is the user time-shifting to the past? */ -+ flux = strrchr(dentry->d_name.name, EXT3COW_FLUX_TOKEN); -+ if(NULL != flux){ -+ -+ if(strnicmp(&flux[1], "onehour", 8) == 0){ -+ epoch_number = get_seconds() - ONEHOUR; -+ printk(KERN_INFO "ONEHOUR!\n"); -+ }else if(strnicmp(&flux[1], "yesterday", 10) == 0 || -+ strnicmp(&flux[1], "oneday", 7) == 0){ -+ epoch_number = get_seconds() - YESTERDAY; -+ }else if(strnicmp(&flux[1], "oneweek", 8) == 0){ -+ epoch_number = get_seconds() - ONEWEEK; -+ }else if(strnicmp(&flux[1], "onemonth", 9) == 0){ -+ epoch_number = get_seconds() - ONEMONTH; -+ }else if(strnicmp(&flux[1], "oneyear", 8) == 0){ -+ epoch_number = get_seconds() - ONEYEAR; -+ }else -+ epoch_number = simple_strtol(&flux[1], (char **)NULL, 10) - 1; -+ -+ /* No future epochs */ -+ if(epoch_number + 1 > EXT3COW_S_EPOCHNUMBER(dir->i_sb)) -+ return ERR_PTR(-ENOENT); -+ -+ /* Move to present -+ if(epoch_number + 1 == 0) -+ epoch_number = EXT3COW_S_EPOCHNUMBER(dir->i_sb); -+ */ -+ } -+ -+ /* Find correct inode in chain */ -+ while(EXT3COW_I_EPOCHNUMBER(inode) > epoch_number){ -+ -+ printk(KERN_INFO "Looking for %u with epoch %u\n", epoch_number, -+ EXT3COW_I_EPOCHNUMBER(inode)); -+ -+ ino = EXT3COW_I(inode)->i_next_inode; -+ if(ino == 0){ -+ ext3cow_warning(dir->i_sb, "ext3cow_lookup", -+ "Next inode is 0 in lookup."); -+ iput(inode); -+ return ERR_PTR(-ENOENT); -+ } -+ iput(inode); /* for correct usage count (i_count) */ -+ inode = iget(dir->i_sb, ino); -+ -+ if (!inode){ -+ ext3cow_warning(dir->i_sb, "ext3cow_lookup", -+ "Could not access inode number %lu", -+ ino); -+ return ERR_PTR(-EACCES); -+ } -+ } -+ -+ /* If we're in the past, fake the inode for scoping and "unchangability" */ -+ if(flux || (epoch_number != EXT3COW_S_EPOCHNUMBER(dir->i_sb))){ -+ printk(KERN_INFO "Faking %s\n", dentry->d_name.name); -+ inode = ext3cow_fake_inode(inode, epoch_number); -+ } -+ -+ if (!inode) -+ return ERR_PTR(-EACCES); -+ } -+ return d_splice_alias(inode, dentry); -+} -+ -+ -+struct dentry *ext3cow_get_parent(struct dentry *child) -+{ -+ unsigned long ino; -+ struct dentry *parent; -+ struct inode *inode; -+ struct dentry dotdot; -+ struct ext3cow_dir_entry_2 * de; -+ struct buffer_head *bh; -+ -+ dotdot.d_name.name = ".."; -+ dotdot.d_name.len = 2; -+ dotdot.d_parent = child; /* confusing, isn't it! */ -+ -+ bh = ext3cow_find_entry(&dotdot, &de); -+ inode = NULL; -+ if (!bh) -+ return ERR_PTR(-ENOENT); -+ ino = le32_to_cpu(de->inode); -+ brelse(bh); -+ -+ if (!ext3cow_valid_inum(child->d_inode->i_sb, ino)) { -+ ext3cow_error(child->d_inode->i_sb, "ext3cow_get_parent", -+ "bad inode number: %lu", ino); -+ inode = NULL; -+ } else -+ inode = iget(child->d_inode->i_sb, ino); -+ -+ if (!inode) -+ return ERR_PTR(-EACCES); -+ -+ parent = d_alloc_anon(inode); -+ if (!parent) { -+ iput(inode); -+ parent = ERR_PTR(-ENOMEM); -+ } -+ return parent; -+} -+ -+#define S_SHIFT 12 -+static unsigned char ext3cow_type_by_mode[S_IFMT >> S_SHIFT] = { -+ [S_IFREG >> S_SHIFT] = EXT3COW_FT_REG_FILE, -+ [S_IFDIR >> S_SHIFT] = EXT3COW_FT_DIR, -+ [S_IFCHR >> S_SHIFT] = EXT3COW_FT_CHRDEV, -+ [S_IFBLK >> S_SHIFT] = EXT3COW_FT_BLKDEV, -+ [S_IFIFO >> S_SHIFT] = EXT3COW_FT_FIFO, -+ [S_IFSOCK >> S_SHIFT] = EXT3COW_FT_SOCK, -+ [S_IFLNK >> S_SHIFT] = EXT3COW_FT_SYMLINK, -+}; -+ -+static inline void ext3cow_set_de_type(struct super_block *sb, -+ struct ext3cow_dir_entry_2 *de, -+ umode_t mode) { -+ if (EXT3COW_HAS_INCOMPAT_FEATURE(sb, EXT3COW_FEATURE_INCOMPAT_FILETYPE)) -+ de->file_type = ext3cow_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; -+} -+ -+#ifdef CONFIG_EXT3COW_INDEX -+static struct ext3cow_dir_entry_2 * -+dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count) -+{ -+ unsigned rec_len = 0; -+ -+ while (count--) { -+ struct ext3cow_dir_entry_2 *de = (struct ext3cow_dir_entry_2 *) (from + map->offs); -+ rec_len = EXT3COW_DIR_REC_LEN(de->name_len); -+ memcpy (to, de, rec_len); -+ ((struct ext3cow_dir_entry_2 *) to)->rec_len = -+ cpu_to_le16(rec_len); -+ de->inode = 0; -+ map++; -+ to += rec_len; -+ } -+ return (struct ext3cow_dir_entry_2 *) (to - rec_len); -+} -+ -+static struct ext3cow_dir_entry_2* dx_pack_dirents(char *base, int size) -+{ -+ struct ext3cow_dir_entry_2 *next, *to, *prev, *de = (struct ext3cow_dir_entry_2 *) base; -+ unsigned rec_len = 0; -+ -+ prev = to = de; -+ while ((char*)de < base + size) { -+ next = (struct ext3cow_dir_entry_2 *) ((char *) de + -+ le16_to_cpu(de->rec_len)); -+ if (de->inode && de->name_len) { -+ rec_len = EXT3COW_DIR_REC_LEN(de->name_len); -+ if (de > to) -+ memmove(to, de, rec_len); -+ to->rec_len = cpu_to_le16(rec_len); -+ prev = to; -+ to = (struct ext3cow_dir_entry_2 *) (((char *) to) + rec_len); -+ } -+ de = next; -+ } -+ return prev; -+} -+ -+static struct ext3cow_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, -+ struct buffer_head **bh,struct dx_frame *frame, -+ struct dx_hash_info *hinfo, int *error) -+{ -+ unsigned blocksize = dir->i_sb->s_blocksize; -+ unsigned count, continued; -+ struct buffer_head *bh2; -+ u32 newblock; -+ u32 hash2; -+ struct dx_map_entry *map; -+ char *data1 = (*bh)->b_data, *data2; -+ unsigned split; -+ struct ext3cow_dir_entry_2 *de = NULL, *de2; -+ int err; -+ -+ bh2 = ext3cow_append (handle, dir, &newblock, error); -+ if (!(bh2)) { -+ brelse(*bh); -+ *bh = NULL; -+ goto errout; -+ } -+ -+ BUFFER_TRACE(*bh, "get_write_access"); -+ err = ext3cow_journal_get_write_access(handle, *bh); -+ if (err) { -+ journal_error: -+ brelse(*bh); -+ brelse(bh2); -+ *bh = NULL; -+ ext3cow_std_error(dir->i_sb, err); -+ goto errout; -+ } -+ BUFFER_TRACE(frame->bh, "get_write_access"); -+ err = ext3cow_journal_get_write_access(handle, frame->bh); -+ if (err) -+ goto journal_error; -+ -+ data2 = bh2->b_data; -+ -+ /* create map in the end of data2 block */ -+ map = (struct dx_map_entry *) (data2 + blocksize); -+ count = dx_make_map ((struct ext3cow_dir_entry_2 *) data1, -+ blocksize, hinfo, map); -+ map -= count; -+ split = count/2; // need to adjust to actual middle -+ dx_sort_map (map, count); -+ hash2 = map[split].hash; -+ continued = hash2 == map[split - 1].hash; -+ dxtrace(printk("Split block %i at %x, %i/%i\n", -+ dx_get_block(frame->at), hash2, split, count-split)); -+ -+ /* Fancy dance to stay within two buffers */ -+ de2 = dx_move_dirents(data1, data2, map + split, count - split); -+ de = dx_pack_dirents(data1,blocksize); -+ de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de); -+ de2->rec_len = cpu_to_le16(data2 + blocksize - (char *) de2); -+ dxtrace(dx_show_leaf (hinfo, (struct ext3cow_dir_entry_2 *) data1, blocksize, 1)); -+ dxtrace(dx_show_leaf (hinfo, (struct ext3cow_dir_entry_2 *) data2, blocksize, 1)); -+ -+ /* Which block gets the new entry? */ -+ if (hinfo->hash >= hash2) -+ { -+ swap(*bh, bh2); -+ de = de2; -+ } -+ dx_insert_block (frame, hash2 + continued, newblock); -+ err = ext3cow_journal_dirty_metadata (handle, bh2); -+ if (err) -+ goto journal_error; -+ err = ext3cow_journal_dirty_metadata (handle, frame->bh); -+ if (err) -+ goto journal_error; -+ brelse (bh2); -+ dxtrace(dx_show_index ("frame", frame->entries)); -+errout: -+ return de; -+} -+#endif -+ -+ -+/* -+ * Add a new entry into a directory (leaf) block. If de is non-NULL, -+ * it points to a directory entry which is guaranteed to be large -+ * enough for new directory entry. If de is NULL, then -+ * add_dirent_to_buf will attempt search the directory block for -+ * space. It will return -ENOSPC if no space is available, and -EIO -+ * and -EEXIST if directory entry already exists. -+ * -+ * NOTE! bh is NOT released in the case where ENOSPC is returned. In -+ * all other cases bh is released. -+ */ -+static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, -+ struct inode *inode, struct ext3cow_dir_entry_2 *de, -+ struct buffer_head * bh) -+{ -+ struct inode *dir = dentry->d_parent->d_inode; -+ const char *name = dentry->d_name.name; -+ int namelen = dentry->d_name.len; -+ unsigned long offset = 0; -+ unsigned short reclen; -+ int nlen, rlen, err; -+ char *top; -+ -+ reclen = EXT3COW_DIR_REC_LEN(namelen); -+ if (!de) { -+ de = (struct ext3cow_dir_entry_2 *)bh->b_data; -+ top = bh->b_data + dir->i_sb->s_blocksize - reclen; -+ while ((char *) de <= top) { -+ if (!ext3cow_check_dir_entry("ext3cow_add_entry", dir, de, -+ bh, offset)) { -+ brelse (bh); -+ ext3cow_reclaim_dup_inode(dentry->d_parent->d_parent->d_inode, dir); -+ return -EIO; -+ } -+ /* If name exists and it's still alive, no add. But if it's a new -+ * name in this scope, ok to add. -znjp */ -+ if (ext3cow_match (namelen, name, de) && EXT3COW_IS_DIRENT_ALIVE(de)) { -+ brelse (bh); -+ return -EEXIST; -+ } -+ nlen = EXT3COW_DIR_REC_LEN(de->name_len); -+ rlen = le16_to_cpu(de->rec_len); -+ if ((de->inode? rlen - nlen: rlen) >= reclen) -+ break; -+ de = (struct ext3cow_dir_entry_2 *)((char *)de + rlen); -+ offset += rlen; -+ } -+ if ((char *) de > top) -+ return -ENOSPC; -+ } -+ BUFFER_TRACE(bh, "get_write_access"); -+ err = ext3cow_journal_get_write_access(handle, bh); -+ if (err) { -+ ext3cow_std_error(dir->i_sb, err); -+ brelse(bh); -+ return err; -+ } -+ -+ /* By now the buffer is marked for journaling */ -+ nlen = EXT3COW_DIR_REC_LEN(de->name_len); -+ rlen = le16_to_cpu(de->rec_len); -+ if (de->inode) { -+ struct ext3cow_dir_entry_2 *de1 = (struct ext3cow_dir_entry_2 *)((char *)de + nlen); -+ de1->rec_len = cpu_to_le16(rlen - nlen); -+ de->rec_len = cpu_to_le16(nlen); -+ de = de1; -+ } -+ de->file_type = EXT3COW_FT_UNKNOWN; -+ if (inode) { -+ de->inode = cpu_to_le32(inode->i_ino); -+ ext3cow_set_de_type(dir->i_sb, de, inode->i_mode); -+ } else -+ de->inode = 0; -+ /* For versioning -znjp */ -+ de->birth_epoch = cpu_to_le32(EXT3COW_S_EPOCHNUMBER(dir->i_sb)); -+ de->death_epoch = cpu_to_le32(EXT3COW_DIRENT_ALIVE); -+ de->name_len = namelen; -+ memcpy (de->name, name, namelen); -+ /* -+ * XXX shouldn't update any times until successful -+ * completion of syscall, but too many callers depend -+ * on this. -+ * -+ * XXX similarly, too many callers depend on -+ * ext3cow_new_inode() setting the times, but error -+ * recovery deletes the inode, so the worst that can -+ * happen is that the times are slightly out of date -+ * and/or different from the directory change time. -+ */ -+ dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; -+ ext3cow_update_dx_flag(dir); -+ dir->i_version++; -+ ext3cow_mark_inode_dirty(handle, dir); -+ BUFFER_TRACE(bh, "call ext3cow_journal_dirty_metadata"); -+ err = ext3cow_journal_dirty_metadata(handle, bh); -+ if (err) -+ ext3cow_std_error(dir->i_sb, err); -+ brelse(bh); -+ return 0; -+} -+ -+#ifdef CONFIG_EXT3COW_INDEX -+/* -+ * This converts a one block unindexed directory to a 3 block indexed -+ * directory, and adds the dentry to the indexed directory. -+ */ -+static int make_indexed_dir(handle_t *handle, struct dentry *dentry, -+ struct inode *inode, struct buffer_head *bh) -+{ -+ struct inode *dir = dentry->d_parent->d_inode; -+ const char *name = dentry->d_name.name; -+ int namelen = dentry->d_name.len; -+ struct buffer_head *bh2; -+ struct dx_root *root; -+ struct dx_frame frames[2], *frame; -+ struct dx_entry *entries; -+ struct ext3cow_dir_entry_2 *de, *de2; -+ char *data1, *top; -+ unsigned len; -+ int retval; -+ unsigned blocksize; -+ struct dx_hash_info hinfo; -+ u32 block; -+ struct fake_dirent *fde; -+ -+ blocksize = dir->i_sb->s_blocksize; -+ dxtrace(printk("Creating index\n")); -+ retval = ext3cow_journal_get_write_access(handle, bh); -+ if (retval) { -+ ext3cow_std_error(dir->i_sb, retval); -+ brelse(bh); -+ return retval; -+ } -+ root = (struct dx_root *) bh->b_data; -+ -+ bh2 = ext3cow_append (handle, dir, &block, &retval); -+ if (!(bh2)) { -+ brelse(bh); -+ return retval; -+ } -+ EXT3COW_I(dir)->i_flags |= EXT3COW_INDEX_FL; -+ data1 = bh2->b_data; -+ -+ /* The 0th block becomes the root, move the dirents out */ -+ fde = &root->dotdot; -+ de = (struct ext3cow_dir_entry_2 *)((char *)fde + le16_to_cpu(fde->rec_len)); -+ len = ((char *) root) + blocksize - (char *) de; -+ memcpy (data1, de, len); -+ de = (struct ext3cow_dir_entry_2 *) data1; -+ top = data1 + len; -+ while ((char *)(de2=(void*)de+le16_to_cpu(de->rec_len)) < top) -+ de = de2; -+ de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de); -+ /* Initialize the root; the dot dirents already exist */ -+ de = (struct ext3cow_dir_entry_2 *) (&root->dotdot); -+ de->rec_len = cpu_to_le16(blocksize - EXT3COW_DIR_REC_LEN(2)); -+ memset (&root->info, 0, sizeof(root->info)); -+ root->info.info_length = sizeof(root->info); -+ root->info.hash_version = EXT3COW_SB(dir->i_sb)->s_def_hash_version; -+ entries = root->entries; -+ dx_set_block (entries, 1); -+ dx_set_count (entries, 1); -+ dx_set_limit (entries, dx_root_limit(dir, sizeof(root->info))); -+ -+ /* Initialize as for dx_probe */ -+ hinfo.hash_version = root->info.hash_version; -+ hinfo.seed = EXT3COW_SB(dir->i_sb)->s_hash_seed; -+ ext3cowfs_dirhash(name, namelen, &hinfo); -+ frame = frames; -+ frame->entries = entries; -+ frame->at = entries; -+ frame->bh = bh; -+ bh = bh2; -+ de = do_split(handle,dir, &bh, frame, &hinfo, &retval); -+ dx_release (frames); -+ if (!(de)) -+ return retval; -+ -+ return add_dirent_to_buf(handle, dentry, inode, de, bh); -+} -+#endif -+ -+/* -+ * ext3cow_add_entry() -+ * -+ * adds a file entry to the specified directory, using the same -+ * semantics as ext3cow_find_entry(). It returns NULL if it failed. -+ * -+ * NOTE!! The inode part of 'de' is left at 0 - which means you -+ * may not sleep between calling this and putting something into -+ * the entry, as someone else might have used it while you slept. -+ */ -+static int ext3cow_add_entry (handle_t *handle, struct dentry *dentry, -+ struct inode *inode) -+{ -+ struct inode *dir = dentry->d_parent->d_inode; -+ unsigned long offset; -+ struct buffer_head * bh; -+ struct ext3cow_dir_entry_2 *de; -+ struct super_block * sb; -+ int retval; -+#ifdef CONFIG_EXT3COW_INDEX -+ int dx_fallback=0; -+#endif -+ unsigned blocksize; -+ u32 block, blocks; -+ -+ sb = dir->i_sb; -+ blocksize = sb->s_blocksize; -+ if (!dentry->d_name.len) -+ return -EINVAL; -+ /* No additions in the past -znjp */ -+ if(is_unchangeable(dir, dentry)) -+ return -EROFS; -+ -+ if(EXT3COW_S_EPOCHNUMBER(sb) > EXT3COW_I_EPOCHNUMBER(dir)){ -+ if(ext3cow_dup_inode(dentry->d_parent->d_parent->d_inode, dir)) -+ //if(ext3cow_dup_inode(NULL, dir)) -+ return -1; -+ } -+ -+#ifdef CONFIG_EXT3COW_INDEX -+ if (is_dx(dir)) { -+ retval = ext3cow_dx_add_entry(handle, dentry, inode); -+ if (!retval || (retval != ERR_BAD_DX_DIR)){ -+ ext3cow_reclaim_dup_inode(dentry->d_parent->d_parent->d_inode, dir); -+ return retval; -+ } -+ EXT3COW_I(dir)->i_flags &= ~EXT3COW_INDEX_FL; -+ dx_fallback++; -+ ext3cow_mark_inode_dirty(handle, dir); -+ } -+#endif -+ blocks = dir->i_size >> sb->s_blocksize_bits; -+ for (block = 0, offset = 0; block < blocks; block++) { -+ bh = ext3cow_bread(handle, dir, block, 0, &retval); -+ if(!bh){ -+ ext3cow_reclaim_dup_inode(dentry->d_parent->d_parent->d_inode, dir); -+ return retval; -+ } -+ retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh); -+ if (retval != -ENOSPC) -+ return retval; -+ -+#ifdef CONFIG_EXT3COW_INDEX -+ if (blocks == 1 && !dx_fallback && -+ EXT3COW_HAS_COMPAT_FEATURE(sb, EXT3COW_FEATURE_COMPAT_DIR_INDEX)) -+ return make_indexed_dir(handle, dentry, inode, bh); -+#endif -+ brelse(bh); -+ } -+ -+ bh = ext3cow_append(handle, dir, &block, &retval); -+ if (!bh){ -+ ext3cow_reclaim_dup_inode(dentry->d_parent->d_parent->d_inode, dir); -+ return retval; -+ } -+ de = (struct ext3cow_dir_entry_2 *) bh->b_data; -+ de->inode = 0; -+ de->rec_len = cpu_to_le16(blocksize); -+ return add_dirent_to_buf(handle, dentry, inode, de, bh); -+} -+ -+#ifdef CONFIG_EXT3COW_INDEX -+/* -+ * Returns 0 for success, or a negative error value -+ */ -+static int ext3cow_dx_add_entry(handle_t *handle, struct dentry *dentry, -+ struct inode *inode) -+{ -+ struct dx_frame frames[2], *frame; -+ struct dx_entry *entries, *at; -+ struct dx_hash_info hinfo; -+ struct buffer_head * bh; -+ struct inode *dir = dentry->d_parent->d_inode; -+ struct super_block * sb = dir->i_sb; -+ struct ext3cow_dir_entry_2 *de; -+ int err; -+ -+ frame = dx_probe(dentry, NULL, &hinfo, frames, &err); -+ if (!frame) -+ return err; -+ entries = frame->entries; -+ at = frame->at; -+ -+ if (!(bh = ext3cow_bread(handle,dir, dx_get_block(frame->at), 0, &err))) -+ goto cleanup; -+ -+ BUFFER_TRACE(bh, "get_write_access"); -+ err = ext3cow_journal_get_write_access(handle, bh); -+ if (err) -+ goto journal_error; -+ -+ err = add_dirent_to_buf(handle, dentry, inode, NULL, bh); -+ if (err != -ENOSPC) { -+ bh = NULL; -+ goto cleanup; -+ } -+ -+ /* Block full, should compress but for now just split */ -+ dxtrace(printk("using %u of %u node entries\n", -+ dx_get_count(entries), dx_get_limit(entries))); -+ /* Need to split index? */ -+ if (dx_get_count(entries) == dx_get_limit(entries)) { -+ u32 newblock; -+ unsigned icount = dx_get_count(entries); -+ int levels = frame - frames; -+ struct dx_entry *entries2; -+ struct dx_node *node2; -+ struct buffer_head *bh2; -+ -+ if (levels && (dx_get_count(frames->entries) == -+ dx_get_limit(frames->entries))) { -+ ext3cow_warning(sb, __FUNCTION__, -+ "Directory index full!"); -+ err = -ENOSPC; -+ goto cleanup; -+ } -+ bh2 = ext3cow_append (handle, dir, &newblock, &err); -+ if (!(bh2)) -+ goto cleanup; -+ node2 = (struct dx_node *)(bh2->b_data); -+ entries2 = node2->entries; -+ node2->fake.rec_len = cpu_to_le16(sb->s_blocksize); -+ node2->fake.inode = 0; -+ BUFFER_TRACE(frame->bh, "get_write_access"); -+ err = ext3cow_journal_get_write_access(handle, frame->bh); -+ if (err) -+ goto journal_error; -+ if (levels) { -+ unsigned icount1 = icount/2, icount2 = icount - icount1; -+ unsigned hash2 = dx_get_hash(entries + icount1); -+ dxtrace(printk("Split index %i/%i\n", icount1, icount2)); -+ -+ BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */ -+ err = ext3cow_journal_get_write_access(handle, -+ frames[0].bh); -+ if (err) -+ goto journal_error; -+ -+ memcpy ((char *) entries2, (char *) (entries + icount1), -+ icount2 * sizeof(struct dx_entry)); -+ dx_set_count (entries, icount1); -+ dx_set_count (entries2, icount2); -+ dx_set_limit (entries2, dx_node_limit(dir)); -+ -+ /* Which index block gets the new entry? */ -+ if (at - entries >= icount1) { -+ frame->at = at = at - entries - icount1 + entries2; -+ frame->entries = entries = entries2; -+ swap(frame->bh, bh2); -+ } -+ dx_insert_block (frames + 0, hash2, newblock); -+ dxtrace(dx_show_index ("node", frames[1].entries)); -+ dxtrace(dx_show_index ("node", -+ ((struct dx_node *) bh2->b_data)->entries)); -+ err = ext3cow_journal_dirty_metadata(handle, bh2); -+ if (err) -+ goto journal_error; -+ brelse (bh2); -+ } else { -+ dxtrace(printk("Creating second level index...\n")); -+ memcpy((char *) entries2, (char *) entries, -+ icount * sizeof(struct dx_entry)); -+ dx_set_limit(entries2, dx_node_limit(dir)); -+ -+ /* Set up root */ -+ dx_set_count(entries, 1); -+ dx_set_block(entries + 0, newblock); -+ ((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels = 1; -+ -+ /* Add new access path frame */ -+ frame = frames + 1; -+ frame->at = at = at - entries + entries2; -+ frame->entries = entries = entries2; -+ frame->bh = bh2; -+ err = ext3cow_journal_get_write_access(handle, -+ frame->bh); -+ if (err) -+ goto journal_error; -+ } -+ ext3cow_journal_dirty_metadata(handle, frames[0].bh); -+ } -+ de = do_split(handle, dir, &bh, frame, &hinfo, &err); -+ if (!de) -+ goto cleanup; -+ err = add_dirent_to_buf(handle, dentry, inode, de, bh); -+ bh = NULL; -+ goto cleanup; -+ -+journal_error: -+ ext3cow_std_error(dir->i_sb, err); -+cleanup: -+ if (bh) -+ brelse(bh); -+ dx_release(frames); -+ return err; -+} -+#endif -+ -+/* -+ * ext3cow_delete_entry deletes a directory entry by merging it with the -+ * previous entry -+ */ -+static int ext3cow_delete_entry (handle_t *handle, -+ struct inode * dir, -+ struct ext3cow_dir_entry_2 * de_del, -+ struct buffer_head * bh, -+ struct dentry *dentry) -+{ -+ struct ext3cow_dir_entry_2 * de, * pde; -+ int i; -+ -+ i = 0; -+ pde = NULL; -+ de = (struct ext3cow_dir_entry_2 *) bh->b_data; -+ while (i < bh->b_size) { -+ if (!ext3cow_check_dir_entry("ext3cow_delete_entry", dir, de, bh, i)) -+ return -EIO; -+ if (de == de_del) { -+ /* Can't delete an already dead entry - znjp */ -+ if(!EXT3COW_IS_DIRENT_ALIVE(de)) -+ return 0; -+ -+ if(EXT3COW_S_EPOCHNUMBER(dir->i_sb) > EXT3COW_I_EPOCHNUMBER(dir)){ -+ if(ext3cow_dup_inode(dentry->d_parent->d_parent->d_inode, dir)) -+ //if(ext3cow_dup_inode(NULL, dir)) -+ return -1; -+ } -+ -+ BUFFER_TRACE(bh, "get_write_access"); -+ ext3cow_journal_get_write_access(handle, bh); -+ /* There used to be code here to adjust the rec_len -+ * but since names really never go away, the code was deleted -+ if (pde) -+ pde->rec_len = -+ cpu_to_le16(le16_to_cpu(pde->rec_len) + -+ le16_to_cpu(de->rec_len)); -+ else -+ de->inode = 0; -+ */ -+ /* Mark it dead - znjp */ -+ de->death_epoch = cpu_to_le32(EXT3COW_I_EPOCHNUMBER(dir)); -+ dir->i_version++; -+ BUFFER_TRACE(bh, "call ext3cow_journal_dirty_metadata"); -+ ext3cow_journal_dirty_metadata(handle, bh); -+ return 0; -+ } -+ i += le16_to_cpu(de->rec_len); -+ pde = de; -+ de = (struct ext3cow_dir_entry_2 *) -+ ((char *) de + le16_to_cpu(de->rec_len)); -+ } -+ return -ENOENT; -+} -+ -+/* -+ * ext3cow_mark_inode_dirty is somewhat expensive, so unlike ext2 we -+ * do not perform it in these functions. We perform it at the call site, -+ * if it is needed. -+ */ -+static inline void ext3cow_inc_count(handle_t *handle, struct inode *inode) -+{ -+ inc_nlink(inode); -+} -+ -+static inline void ext3cow_dec_count(handle_t *handle, struct inode *inode) -+{ -+ drop_nlink(inode); -+} -+ -+static int ext3cow_add_nondir(handle_t *handle, -+ struct dentry *dentry, struct inode *inode) -+{ -+ int err = ext3cow_add_entry(handle, dentry, inode); -+ if (!err) { -+ ext3cow_mark_inode_dirty(handle, inode); -+ d_instantiate(dentry, inode); -+ return 0; -+ } -+ ext3cow_dec_count(handle, inode); -+ iput(inode); -+ return err; -+} -+ -+/* -+ * By the time this is called, we already have created -+ * the directory cache entry for the new file, but it -+ * is so far negative - it has no inode. -+ * -+ * If the create succeeds, we fill in the inode information -+ * with d_instantiate(). -+ */ -+static int ext3cow_create (struct inode * dir, struct dentry * dentry, int mode, -+ struct nameidata *nd) -+{ -+ handle_t *handle; -+ struct inode * inode; -+ int err, retries = 0; -+ -+ /* Can't create in the past -znjp */ -+ if(is_unchangeable(dir, dentry)) -+ return -EROFS; -+ -+retry: -+ handle = ext3cow_journal_start(dir, EXT3COW_DATA_TRANS_BLOCKS(dir->i_sb) + -+ EXT3COW_INDEX_EXTRA_TRANS_BLOCKS + 3 + -+ 2*EXT3COW_QUOTA_INIT_BLOCKS(dir->i_sb)); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ -+ if (IS_DIRSYNC(dir)) -+ handle->h_sync = 1; -+ -+ inode = ext3cow_new_inode (handle, dir, mode); -+ err = PTR_ERR(inode); -+ if (!IS_ERR(inode)) { -+ inode->i_op = &ext3cow_file_inode_operations; -+ inode->i_fop = &ext3cow_file_operations; -+ ext3cow_set_aops(inode); -+ err = ext3cow_add_nondir(handle, dentry, inode); -+ } -+ ext3cow_journal_stop(handle); -+ if (err == -ENOSPC && ext3cow_should_retry_alloc(dir->i_sb, &retries)) -+ goto retry; -+ return err; -+} -+ -+static int ext3cow_mknod (struct inode * dir, struct dentry *dentry, -+ int mode, dev_t rdev) -+{ -+ handle_t *handle; -+ struct inode *inode; -+ int err, retries = 0; -+ -+ if (!new_valid_dev(rdev)) -+ return -EINVAL; -+ -+retry: -+ handle = ext3cow_journal_start(dir, EXT3COW_DATA_TRANS_BLOCKS(dir->i_sb) + -+ EXT3COW_INDEX_EXTRA_TRANS_BLOCKS + 3 + -+ 2*EXT3COW_QUOTA_INIT_BLOCKS(dir->i_sb)); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ -+ if (IS_DIRSYNC(dir)) -+ handle->h_sync = 1; -+ -+ inode = ext3cow_new_inode (handle, dir, mode); -+ err = PTR_ERR(inode); -+ if (!IS_ERR(inode)) { -+ init_special_inode(inode, inode->i_mode, rdev); -+#ifdef CONFIG_EXT3COW_FS_XATTR -+ inode->i_op = &ext3cow_special_inode_operations; -+#endif -+ err = ext3cow_add_nondir(handle, dentry, inode); -+ } -+ ext3cow_journal_stop(handle); -+ if (err == -ENOSPC && ext3cow_should_retry_alloc(dir->i_sb, &retries)) -+ goto retry; -+ return err; -+} -+ -+static int ext3cow_mkdir(struct inode * dir, struct dentry * dentry, int mode) -+{ -+ handle_t *handle; -+ struct inode * inode; -+ struct buffer_head * dir_block; -+ struct ext3cow_dir_entry_2 * de; -+ int err, retries = 0; -+ -+ if (dir->i_nlink >= EXT3COW_LINK_MAX) -+ return -EMLINK; -+ /* No mkdirs in the past -znjp */ -+ if(is_unchangeable(dir, dentry)) -+ return -EROFS; -+ -+ -+retry: -+ handle = ext3cow_journal_start(dir, EXT3COW_DATA_TRANS_BLOCKS(dir->i_sb) + -+ EXT3COW_INDEX_EXTRA_TRANS_BLOCKS + 3 + -+ 2*EXT3COW_QUOTA_INIT_BLOCKS(dir->i_sb)); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ -+ if (IS_DIRSYNC(dir)) -+ handle->h_sync = 1; -+ -+ inode = ext3cow_new_inode (handle, dir, S_IFDIR | mode); -+ err = PTR_ERR(inode); -+ if (IS_ERR(inode)) -+ goto out_stop; -+ -+ inode->i_op = &ext3cow_dir_inode_operations; -+ inode->i_fop = &ext3cow_dir_operations; -+ inode->i_size = EXT3COW_I(inode)->i_disksize = inode->i_sb->s_blocksize; -+ dir_block = ext3cow_bread (handle, inode, 0, 1, &err); -+ if (!dir_block) { -+ drop_nlink(inode); /* is this nlink == 0? */ -+ ext3cow_mark_inode_dirty(handle, inode); -+ iput (inode); -+ goto out_stop; -+ } -+ BUFFER_TRACE(dir_block, "get_write_access"); -+ ext3cow_journal_get_write_access(handle, dir_block); -+ de = (struct ext3cow_dir_entry_2 *) dir_block->b_data; -+ de->inode = cpu_to_le32(inode->i_ino); -+ de->name_len = 1; -+ de->rec_len = cpu_to_le16(EXT3COW_DIR_REC_LEN(de->name_len)); -+ /* For versioning -znjp */ -+ de->birth_epoch = cpu_to_le32(EXT3COW_S_EPOCHNUMBER(dir->i_sb)); -+ de->death_epoch = cpu_to_le32(EXT3COW_DIRENT_ALIVE); -+ strcpy (de->name, "."); -+ ext3cow_set_de_type(dir->i_sb, de, S_IFDIR); -+ de = (struct ext3cow_dir_entry_2 *) -+ ((char *) de + le16_to_cpu(de->rec_len)); -+ de->inode = cpu_to_le32(dir->i_ino); -+ de->rec_len = cpu_to_le16(inode->i_sb->s_blocksize-EXT3COW_DIR_REC_LEN(1)); -+ de->name_len = 2; -+ strcpy (de->name, ".."); -+ ext3cow_set_de_type(dir->i_sb, de, S_IFDIR); -+ inode->i_nlink = 2; -+ /* For versioning -znjp */ -+ de->birth_epoch = cpu_to_le32(EXT3COW_I_EPOCHNUMBER(dir)); -+ de->death_epoch = cpu_to_le32(EXT3COW_DIRENT_ALIVE); -+ BUFFER_TRACE(dir_block, "call ext3cow_journal_dirty_metadata"); -+ ext3cow_journal_dirty_metadata(handle, dir_block); -+ brelse (dir_block); -+ ext3cow_mark_inode_dirty(handle, inode); -+ err = ext3cow_add_entry (handle, dentry, inode); -+ if (err) { -+ inode->i_nlink = 0; -+ ext3cow_mark_inode_dirty(handle, inode); -+ iput (inode); -+ goto out_stop; -+ } -+ inc_nlink(dir); -+ ext3cow_update_dx_flag(dir); -+ ext3cow_mark_inode_dirty(handle, dir); -+ d_instantiate(dentry, inode); -+out_stop: -+ ext3cow_journal_stop(handle); -+ if (err == -ENOSPC && ext3cow_should_retry_alloc(dir->i_sb, &retries)) -+ goto retry; -+ return err; -+} -+ -+/* -+ * routine to check that the specified directory is empty (for rmdir) -+ */ -+static int empty_dir (struct inode * inode) -+{ -+ unsigned long offset; -+ struct buffer_head * bh; -+ struct ext3cow_dir_entry_2 * de, * de1; -+ struct super_block * sb; -+ int err = 0; -+ -+ sb = inode->i_sb; -+ if (inode->i_size < EXT3COW_DIR_REC_LEN(1) + EXT3COW_DIR_REC_LEN(2) || -+ !(bh = ext3cow_bread (NULL, inode, 0, 0, &err))) { -+ if (err) -+ ext3cow_error(inode->i_sb, __FUNCTION__, -+ "error %d reading directory #%lu offset 0", -+ err, inode->i_ino); -+ else -+ ext3cow_warning(inode->i_sb, __FUNCTION__, -+ "bad directory (dir #%lu) - no data block", -+ inode->i_ino); -+ return 1; -+ } -+ de = (struct ext3cow_dir_entry_2 *) bh->b_data; -+ de1 = (struct ext3cow_dir_entry_2 *) -+ ((char *) de + le16_to_cpu(de->rec_len)); -+ if (le32_to_cpu(de->inode) != inode->i_ino || -+ !le32_to_cpu(de1->inode) || -+ strcmp (".", de->name) || -+ strcmp ("..", de1->name)) { -+ ext3cow_warning (inode->i_sb, "empty_dir", -+ "bad directory (dir #%lu) - no `.' or `..'", -+ inode->i_ino); -+ brelse (bh); -+ return 1; -+ } -+ offset = le16_to_cpu(de->rec_len) + le16_to_cpu(de1->rec_len); -+ de = (struct ext3cow_dir_entry_2 *) -+ ((char *) de1 + le16_to_cpu(de1->rec_len)); -+ while (offset < inode->i_size ) { -+ if (!bh || -+ (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) { -+ err = 0; -+ brelse (bh); -+ bh = ext3cow_bread (NULL, inode, -+ offset >> EXT3COW_BLOCK_SIZE_BITS(sb), 0, &err); -+ if (!bh) { -+ if (err) -+ ext3cow_error(sb, __FUNCTION__, -+ "error %d reading directory" -+ " #%lu offset %lu", -+ err, inode->i_ino, offset); -+ offset += sb->s_blocksize; -+ continue; -+ } -+ de = (struct ext3cow_dir_entry_2 *) bh->b_data; -+ } -+ if (!ext3cow_check_dir_entry("empty_dir", inode, de, bh, offset)) { -+ de = (struct ext3cow_dir_entry_2 *)(bh->b_data + -+ sb->s_blocksize); -+ offset = (offset | (sb->s_blocksize - 1)) + 1; -+ continue; -+ } -+ /* Can remove a dir only if all dirents are out of scope -znjp */ -+ if (le32_to_cpu(de->inode) && -+ EXT3COW_IS_DIRENT_SCOPED(de, EXT3COW_I_EPOCHNUMBER(inode))) { -+ brelse (bh); -+ return 0; -+ } -+ offset += le16_to_cpu(de->rec_len); -+ de = (struct ext3cow_dir_entry_2 *) -+ ((char *) de + le16_to_cpu(de->rec_len)); -+ } -+ brelse (bh); -+ return 1; -+} -+ -+/* ext3cow_orphan_add() links an unlinked or truncated inode into a list of -+ * such inodes, starting at the superblock, in case we crash before the -+ * file is closed/deleted, or in case the inode truncate spans multiple -+ * transactions and the last transaction is not recovered after a crash. -+ * -+ * At filesystem recovery time, we walk this list deleting unlinked -+ * inodes and truncating linked inodes in ext3cow_orphan_cleanup(). -+ */ -+int ext3cow_orphan_add(handle_t *handle, struct inode *inode) -+{ -+ struct super_block *sb = inode->i_sb; -+ struct ext3cow_iloc iloc; -+ int err = 0, rc; -+ -+ lock_super(sb); -+ if (!list_empty(&EXT3COW_I(inode)->i_orphan)) -+ goto out_unlock; -+ -+ /* Orphan handling is only valid for files with data blocks -+ * being truncated, or files being unlinked. */ -+ -+ /* @@@ FIXME: Observation from aviro: -+ * I think I can trigger J_ASSERT in ext3cow_orphan_add(). We block -+ * here (on lock_super()), so race with ext3cow_link() which might bump -+ * ->i_nlink. For, say it, character device. Not a regular file, -+ * not a directory, not a symlink and ->i_nlink > 0. -+ */ -+ J_ASSERT ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || -+ S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); -+ -+ BUFFER_TRACE(EXT3COW_SB(sb)->s_sbh, "get_write_access"); -+ err = ext3cow_journal_get_write_access(handle, EXT3COW_SB(sb)->s_sbh); -+ if (err) -+ goto out_unlock; -+ -+ err = ext3cow_reserve_inode_write(handle, inode, &iloc); -+ if (err) -+ goto out_unlock; -+ -+ /* Insert this inode at the head of the on-disk orphan list... */ -+ NEXT_ORPHAN(inode) = le32_to_cpu(EXT3COW_SB(sb)->s_es->s_last_orphan); -+ EXT3COW_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino); -+ err = ext3cow_journal_dirty_metadata(handle, EXT3COW_SB(sb)->s_sbh); -+ rc = ext3cow_mark_iloc_dirty(handle, inode, &iloc); -+ if (!err) -+ err = rc; -+ -+ /* Only add to the head of the in-memory list if all the -+ * previous operations succeeded. If the orphan_add is going to -+ * fail (possibly taking the journal offline), we can't risk -+ * leaving the inode on the orphan list: stray orphan-list -+ * entries can cause panics at unmount time. -+ * -+ * This is safe: on error we're going to ignore the orphan list -+ * anyway on the next recovery. */ -+ if (!err) -+ list_add(&EXT3COW_I(inode)->i_orphan, &EXT3COW_SB(sb)->s_orphan); -+ -+ jbd_debug(4, "superblock will point to %lu\n", inode->i_ino); -+ jbd_debug(4, "orphan inode %lu will point to %d\n", -+ inode->i_ino, NEXT_ORPHAN(inode)); -+out_unlock: -+ unlock_super(sb); -+ ext3cow_std_error(inode->i_sb, err); -+ return err; -+} -+ -+/* -+ * ext3cow_orphan_del() removes an unlinked or truncated inode from the list -+ * of such inodes stored on disk, because it is finally being cleaned up. -+ */ -+int ext3cow_orphan_del(handle_t *handle, struct inode *inode) -+{ -+ struct list_head *prev; -+ struct ext3cow_inode_info *ei = EXT3COW_I(inode); -+ struct ext3cow_sb_info *sbi; -+ unsigned long ino_next; -+ struct ext3cow_iloc iloc; -+ int err = 0; -+ -+ lock_super(inode->i_sb); -+ if (list_empty(&ei->i_orphan)) { -+ unlock_super(inode->i_sb); -+ return 0; -+ } -+ -+ ino_next = NEXT_ORPHAN(inode); -+ prev = ei->i_orphan.prev; -+ sbi = EXT3COW_SB(inode->i_sb); -+ -+ jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino); -+ -+ list_del_init(&ei->i_orphan); -+ -+ /* If we're on an error path, we may not have a valid -+ * transaction handle with which to update the orphan list on -+ * disk, but we still need to remove the inode from the linked -+ * list in memory. */ -+ if (!handle) -+ goto out; -+ -+ err = ext3cow_reserve_inode_write(handle, inode, &iloc); -+ if (err) -+ goto out_err; -+ -+ if (prev == &sbi->s_orphan) { -+ jbd_debug(4, "superblock will point to %lu\n", ino_next); -+ BUFFER_TRACE(sbi->s_sbh, "get_write_access"); -+ err = ext3cow_journal_get_write_access(handle, sbi->s_sbh); -+ if (err) -+ goto out_brelse; -+ sbi->s_es->s_last_orphan = cpu_to_le32(ino_next); -+ err = ext3cow_journal_dirty_metadata(handle, sbi->s_sbh); -+ } else { -+ struct ext3cow_iloc iloc2; -+ struct inode *i_prev = -+ &list_entry(prev, struct ext3cow_inode_info, i_orphan)->vfs_inode; -+ -+ jbd_debug(4, "orphan inode %lu will point to %lu\n", -+ i_prev->i_ino, ino_next); -+ err = ext3cow_reserve_inode_write(handle, i_prev, &iloc2); -+ if (err) -+ goto out_brelse; -+ NEXT_ORPHAN(i_prev) = ino_next; -+ err = ext3cow_mark_iloc_dirty(handle, i_prev, &iloc2); -+ } -+ if (err) -+ goto out_brelse; -+ NEXT_ORPHAN(inode) = 0; -+ err = ext3cow_mark_iloc_dirty(handle, inode, &iloc); -+ -+out_err: -+ ext3cow_std_error(inode->i_sb, err); -+out: -+ unlock_super(inode->i_sb); -+ return err; -+ -+out_brelse: -+ brelse(iloc.bh); -+ goto out_err; -+} -+ -+static int ext3cow_rmdir (struct inode * dir, struct dentry *dentry) -+{ -+ int retval; -+ struct inode * inode; -+ struct buffer_head * bh; -+ struct ext3cow_dir_entry_2 * de; -+ handle_t *handle; -+ -+ /* Initialize quotas before so that eventual writes go in -+ * separate transaction */ -+ DQUOT_INIT(dentry->d_inode); -+ handle = ext3cow_journal_start(dir, EXT3COW_DELETE_TRANS_BLOCKS(dir->i_sb)); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ -+ retval = -ENOENT; -+ bh = ext3cow_find_entry (dentry, &de); -+ if (!bh) -+ goto end_rmdir; -+ -+ if (IS_DIRSYNC(dir)) -+ handle->h_sync = 1; -+ -+ inode = dentry->d_inode; -+ -+ /* Can't rmdir in the past -znjp */ -+ retval = -EROFS; -+ if(is_unchangeable(inode, dentry)) -+ goto end_rmdir; -+ -+ retval = -EIO; -+ if (le32_to_cpu(de->inode) != inode->i_ino) -+ goto end_rmdir; -+ -+ retval = -ENOTEMPTY; -+ if (!empty_dir (inode)) -+ goto end_rmdir; -+ -+ retval = ext3cow_delete_entry(handle, dir, de, bh, dentry); -+ if (retval) -+ goto end_rmdir; -+ if (inode->i_nlink != 2) -+ ext3cow_warning (inode->i_sb, "ext3cow_rmdir", -+ "empty directory has nlink!=2 (%d)", -+ inode->i_nlink); -+ inode->i_version++; -+ -+ /* We only delete things that were created in the same epoch -znjp */ -+ if(de->birth_epoch == de->death_epoch){ -+ clear_nlink(inode); -+ /* There's no need to set i_disksize: the fact that i_nlink is -+ * zero will ensure that the right thing happens during any -+ * recovery. */ -+ inode->i_size = 0; -+ ext3cow_orphan_add(handle, inode); -+ drop_nlink(dir); -+ } -+ EXT3COW_I(inode)->i_flags |= EXT3COW_UNCHANGEABLE_FL; -+ inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; -+ ext3cow_mark_inode_dirty(handle, inode); -+ ext3cow_update_dx_flag(dir); -+ ext3cow_mark_inode_dirty(handle, dir); -+ -+end_rmdir: -+ ext3cow_journal_stop(handle); -+ brelse (bh); -+ return retval; -+} -+ -+static int ext3cow_unlink(struct inode * dir, struct dentry *dentry) -+{ -+ int retval; -+ struct inode * inode; -+ struct buffer_head * bh; -+ struct ext3cow_dir_entry_2 * de; -+ handle_t *handle; -+ -+ /* Initialize quotas before so that eventual writes go -+ * in separate transaction */ -+ DQUOT_INIT(dentry->d_inode); -+ handle = ext3cow_journal_start(dir, EXT3COW_DELETE_TRANS_BLOCKS(dir->i_sb)); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ -+ if (IS_DIRSYNC(dir)) -+ handle->h_sync = 1; -+ -+ retval = -ENOENT; -+ bh = ext3cow_find_entry (dentry, &de); -+ if (!bh) -+ goto end_unlink; -+ -+ inode = dentry->d_inode; -+ -+ /* Can't unlink in the past -znjp */ -+ retval = -EROFS; -+ if(is_unchangeable(inode, dentry)) -+ goto end_unlink; -+ -+ retval = -EIO; -+ if (le32_to_cpu(de->inode) != inode->i_ino) -+ goto end_unlink; -+ -+ if (!inode->i_nlink) { -+ ext3cow_warning (inode->i_sb, "ext3cow_unlink", -+ "Deleting nonexistent file (%lu), %d", -+ inode->i_ino, inode->i_nlink); -+ inode->i_nlink = 1; -+ } -+ retval = ext3cow_delete_entry(handle, dir, de, bh, dentry); -+ if (retval) -+ goto end_unlink; -+ dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; -+ ext3cow_update_dx_flag(dir); -+ ext3cow_mark_inode_dirty(handle, dir); -+ -+ /* If the file should be deleted here, don't actually delete it -+ * but mark it unchangeable, i.e. it's now in the past. -znjp */ -+ -+ /* If file was created in this epoch, then we actually unlink it, -+ * if not, then it belongs to the past, so mark it unchangeable -znjp */ -+ if(de->birth_epoch == de->death_epoch){ -+ drop_nlink(inode); -+ if (!inode->i_nlink){ -+ ext3cow_orphan_add(handle, inode); -+ } -+ }else{ -+ if(!(inode->i_nlink - 1)) -+ EXT3COW_I(inode)->i_flags |= EXT3COW_UNCHANGEABLE_FL; -+ } -+ inode->i_ctime = dir->i_ctime; -+ ext3cow_mark_inode_dirty(handle, inode); -+ retval = 0; -+ -+end_unlink: -+ ext3cow_journal_stop(handle); -+ brelse (bh); -+ return retval; -+} -+ -+static int ext3cow_symlink (struct inode * dir, -+ struct dentry *dentry, const char * symname) -+{ -+ handle_t *handle; -+ struct inode * inode; -+ int l, err, retries = 0; -+ -+ l = strlen(symname)+1; -+ if (l > dir->i_sb->s_blocksize) -+ return -ENAMETOOLONG; -+ -+retry: -+ handle = ext3cow_journal_start(dir, EXT3COW_DATA_TRANS_BLOCKS(dir->i_sb) + -+ EXT3COW_INDEX_EXTRA_TRANS_BLOCKS + 5 + -+ 2*EXT3COW_QUOTA_INIT_BLOCKS(dir->i_sb)); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ -+ if (IS_DIRSYNC(dir)) -+ handle->h_sync = 1; -+ -+ inode = ext3cow_new_inode (handle, dir, S_IFLNK|S_IRWXUGO); -+ err = PTR_ERR(inode); -+ if (IS_ERR(inode)) -+ goto out_stop; -+ -+ if (l > sizeof (EXT3COW_I(inode)->i_data)) { -+ inode->i_op = &ext3cow_symlink_inode_operations; -+ ext3cow_set_aops(inode); -+ /* -+ * page_symlink() calls into ext3cow_prepare/commit_write. -+ * We have a transaction open. All is sweetness. It also sets -+ * i_size in generic_commit_write(). -+ */ -+ err = __page_symlink(inode, symname, l, -+ mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); -+ if (err) { -+ ext3cow_dec_count(handle, inode); -+ ext3cow_mark_inode_dirty(handle, inode); -+ iput (inode); -+ goto out_stop; -+ } -+ } else { -+ inode->i_op = &ext3cow_fast_symlink_inode_operations; -+ memcpy((char*)&EXT3COW_I(inode)->i_data,symname,l); -+ inode->i_size = l-1; -+ } -+ EXT3COW_I(inode)->i_disksize = inode->i_size; -+ err = ext3cow_add_nondir(handle, dentry, inode); -+out_stop: -+ ext3cow_journal_stop(handle); -+ if (err == -ENOSPC && ext3cow_should_retry_alloc(dir->i_sb, &retries)) -+ goto retry; -+ return err; -+} -+ -+static int ext3cow_link (struct dentry * old_dentry, -+ struct inode * dir, struct dentry *dentry) -+{ -+ handle_t *handle; -+ struct inode *inode = old_dentry->d_inode; -+ int err, retries = 0; -+ -+ if (inode->i_nlink >= EXT3COW_LINK_MAX) -+ return -EMLINK; -+ -+retry: -+ handle = ext3cow_journal_start(dir, EXT3COW_DATA_TRANS_BLOCKS(dir->i_sb) + -+ EXT3COW_INDEX_EXTRA_TRANS_BLOCKS); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ -+ if (IS_DIRSYNC(dir)) -+ handle->h_sync = 1; -+ -+ inode->i_ctime = CURRENT_TIME_SEC; -+ ext3cow_inc_count(handle, inode); -+ atomic_inc(&inode->i_count); -+ -+ err = ext3cow_add_nondir(handle, dentry, inode); -+ ext3cow_journal_stop(handle); -+ if (err == -ENOSPC && ext3cow_should_retry_alloc(dir->i_sb, &retries)) -+ goto retry; -+ return err; -+} -+ -+#define PARENT_INO(buffer) \ -+ ((struct ext3cow_dir_entry_2 *) ((char *) buffer + \ -+ le16_to_cpu(((struct ext3cow_dir_entry_2 *) buffer)->rec_len)))->inode -+ -+/* -+ * Anybody can rename anything with this: the permission checks are left to the -+ * higher-level routines. -+ */ -+static int ext3cow_rename (struct inode * old_dir, struct dentry *old_dentry, -+ struct inode * new_dir,struct dentry *new_dentry) -+{ -+ handle_t *handle; -+ struct inode * old_inode, * new_inode; -+ struct buffer_head * old_bh, * new_bh, * dir_bh; -+ struct ext3cow_dir_entry_2 * old_de, * new_de; -+ int retval; -+ -+ old_bh = new_bh = dir_bh = NULL; -+ -+ /* Initialize quotas before so that eventual writes go -+ * in separate transaction */ -+ if (new_dentry->d_inode) -+ DQUOT_INIT(new_dentry->d_inode); -+ handle = ext3cow_journal_start(old_dir, 2 * -+ EXT3COW_DATA_TRANS_BLOCKS(old_dir->i_sb) + -+ EXT3COW_INDEX_EXTRA_TRANS_BLOCKS + 2); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ -+ if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) -+ handle->h_sync = 1; -+ -+ old_bh = ext3cow_find_entry (old_dentry, &old_de); -+ /* -+ * Check for inode number is _not_ due to possible IO errors. -+ * We might rmdir the source, keep it as pwd of some process -+ * and merrily kill the link to whatever was created under the -+ * same name. Goodbye sticky bit ;-< -+ */ -+ old_inode = old_dentry->d_inode; -+ retval = -ENOENT; -+ if (!old_bh || le32_to_cpu(old_de->inode) != old_inode->i_ino) -+ goto end_rename; -+ -+ new_inode = new_dentry->d_inode; -+ new_bh = ext3cow_find_entry (new_dentry, &new_de); -+ if (new_bh) { -+ if (!new_inode) { -+ brelse (new_bh); -+ new_bh = NULL; -+ } -+ } -+ -+ /* can't move something into the past -znjp */ -+ retval = -EROFS; -+ if(is_unchangeable(new_inode, new_dentry)) -+ goto end_rename; -+ /* can't some move from the past -znjp */ -+ if(is_unchangeable(old_inode, old_dentry)) -+ goto end_rename; -+ -+ if (S_ISDIR(old_inode->i_mode)) { -+ if (new_inode) { -+ retval = -ENOTEMPTY; -+ if (!empty_dir (new_inode)) -+ goto end_rename; -+ } -+ retval = -EIO; -+ dir_bh = ext3cow_bread (handle, old_inode, 0, 0, &retval); -+ if (!dir_bh) -+ goto end_rename; -+ if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino) -+ goto end_rename; -+ retval = -EMLINK; -+ if (!new_inode && new_dir!=old_dir && -+ new_dir->i_nlink >= EXT3COW_LINK_MAX) -+ goto end_rename; -+ } -+ if (!new_bh) { -+ retval = ext3cow_add_entry (handle, new_dentry, old_inode); -+ if (retval) -+ goto end_rename; -+ } else { -+ BUFFER_TRACE(new_bh, "get write access"); -+ ext3cow_journal_get_write_access(handle, new_bh); -+ new_de->inode = cpu_to_le32(old_inode->i_ino); -+ if (EXT3COW_HAS_INCOMPAT_FEATURE(new_dir->i_sb, -+ EXT3COW_FEATURE_INCOMPAT_FILETYPE)) -+ new_de->file_type = old_de->file_type; -+ new_dir->i_version++; -+ BUFFER_TRACE(new_bh, "call ext3cow_journal_dirty_metadata"); -+ ext3cow_journal_dirty_metadata(handle, new_bh); -+ brelse(new_bh); -+ new_bh = NULL; -+ } -+ -+ /* -+ * Like most other Unix systems, set the ctime for inodes on a -+ * rename. -+ */ -+ old_inode->i_ctime = CURRENT_TIME_SEC; -+ ext3cow_mark_inode_dirty(handle, old_inode); -+ -+ /* -+ * ok, that's it -+ */ -+ if (le32_to_cpu(old_de->inode) != old_inode->i_ino || -+ old_de->name_len != old_dentry->d_name.len || -+ strncmp(old_de->name, old_dentry->d_name.name, old_de->name_len) || -+ (retval = ext3cow_delete_entry(handle, old_dir, -+ old_de, old_bh, new_dentry)) == -ENOENT) { -+ /* old_de could have moved from under us during htree split, so -+ * make sure that we are deleting the right entry. We might -+ * also be pointing to a stale entry in the unused part of -+ * old_bh so just checking inum and the name isn't enough. */ -+ struct buffer_head *old_bh2; -+ struct ext3cow_dir_entry_2 *old_de2; -+ -+ old_bh2 = ext3cow_find_entry(old_dentry, &old_de2); -+ if (old_bh2) { -+ retval = ext3cow_delete_entry(handle, old_dir, -+ old_de2, old_bh2, new_dentry); -+ brelse(old_bh2); -+ } -+ } -+ if (retval) { -+ ext3cow_warning(old_dir->i_sb, "ext3cow_rename", -+ "Deleting old file (%lu), %d, error=%d", -+ old_dir->i_ino, old_dir->i_nlink, retval); -+ } -+ -+ if (new_inode) { -+ new_inode->i_ctime = CURRENT_TIME_SEC; -+ } -+ if(!is_unchangeable(old_inode, old_dentry)) -+ old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME_SEC; -+ ext3cow_update_dx_flag(old_dir); -+ if (dir_bh) { -+ BUFFER_TRACE(dir_bh, "get_write_access"); -+ ext3cow_journal_get_write_access(handle, dir_bh); -+ PARENT_INO(dir_bh->b_data) = cpu_to_le32(new_dir->i_ino); -+ BUFFER_TRACE(dir_bh, "call ext3cow_journal_dirty_metadata"); -+ ext3cow_journal_dirty_metadata(handle, dir_bh); -+ if (!new_inode) { -+ inc_nlink(new_dir); -+ ext3cow_update_dx_flag(new_dir); -+ ext3cow_mark_inode_dirty(handle, new_dir); -+ } -+ } -+ ext3cow_mark_inode_dirty(handle, old_dir); -+ if (new_inode) { -+ ext3cow_mark_inode_dirty(handle, new_inode); -+ if (!new_inode->i_nlink) -+ ext3cow_orphan_add(handle, new_inode); -+ } -+ retval = 0; -+ -+end_rename: -+ brelse (dir_bh); -+ brelse (old_bh); -+ brelse (new_bh); -+ ext3cow_journal_stop(handle); -+ return retval; -+} -+ -+/* ext3cow_fake_inode: This function creates a VFS-only inode -+ * used for properly scoping views into the past file system - znjp -+ */ -+struct inode *ext3cow_fake_inode(struct inode *inode, -+ unsigned int epoch_number) -+{ -+ struct inode * fake_inode = NULL; -+ struct ext3cow_inode_info * ini = NULL; -+ struct ext3cow_inode_info * fake_ini = NULL; -+ static unsigned int last_ino = UINT_MAX; -+ int err = 0; -+ int block = -1; -+ -+ if(NULL == inode){ -+ printk(KERN_ERR "Trying to duplicate a NULL inode.\n"); -+ return NULL; -+ } -+ -+ if(EXT3COW_IS_FAKEINODE(inode)){ -+ printk(KERN_ERR "Trying to fake a fake inode.\n"); -+ return inode; -+ } -+ -+ printk(KERN_INFO "** faking inode %lu\n", inode->i_ino); -+ -+ ini = EXT3COW_I(inode); -+ -+ /* Create a new VFS-only inode */ -+ fake_inode = new_inode(inode->i_sb); -+ err = PTR_ERR(fake_inode); -+ if(!IS_ERR(fake_inode)){ -+ -+ fake_ini = EXT3COW_I(fake_inode); -+ -+ printk(KERN_INFO "** got inode %lu setting with %u\n", fake_inode->i_ino, -+ last_ino); -+ -+ /* When inode is a directory, we can fake the inode number */ -+ //if(S_ISDIR(inode->i_mode)) -+ fake_inode->i_ino = --last_ino; -+ -+ fake_inode->i_mode = inode->i_mode; -+ fake_inode->i_uid = inode->i_uid; -+ fake_inode->i_gid = inode->i_gid; -+ -+ atomic_set(&fake_inode->i_count, 1); -+ -+ fake_inode->i_nlink = inode->i_nlink; -+ fake_inode->i_size = inode->i_size; -+ fake_inode->i_atime.tv_sec = inode->i_atime.tv_sec; -+ fake_inode->i_ctime.tv_sec = inode->i_ctime.tv_sec; -+ fake_inode->i_mtime.tv_sec = inode->i_mtime.tv_sec; -+ fake_inode->i_atime.tv_nsec = inode->i_atime.tv_nsec; -+ fake_inode->i_ctime.tv_nsec = inode->i_ctime.tv_nsec; -+ fake_inode->i_mtime.tv_nsec = inode->i_mtime.tv_nsec; -+ -+ fake_ini->i_state = ini->i_state; -+ fake_ini->i_dir_start_lookup = ini->i_dir_start_lookup; -+ fake_ini->i_dtime = ini->i_dtime; -+ -+ fake_inode->i_blocks = inode->i_blocks; -+ fake_ini->i_flags = ini->i_flags; -+#ifdef EXT3COW_FRAGMENTS -+ /* Taken out for versioning -znjp */ -+ //fake_ini->i_faddr = ini->i_faddr; -+ //fake_ini->i_frag_no = ini->i_frag_no; -+ //fake_ini->i_frag_size = ini->i_frag_size; -+#endif -+ fake_ini->i_file_acl = ini->i_file_acl; -+ if (!S_ISREG(fake_inode->i_mode)) { -+ fake_ini->i_dir_acl = ini->i_dir_acl; -+ } -+ fake_ini->i_disksize = inode->i_size; -+ fake_inode->i_generation = inode->i_generation; -+ //TODO: This could be wrong. -+ //fake_ini->i_block_group = ini->i_block_group; //iloc.block_group; -+ -+ for (block = 0; block < EXT3COW_N_BLOCKS; block++) -+ fake_ini->i_data[block] = ini->i_data[block]; -+ -+ fake_ini->i_extra_isize = ini->i_extra_isize; -+ -+ /* set copy-on-write bitmap to 0 */ -+ fake_ini->i_cow_bitmap = 0x0000; -+ -+ /* Mark fake inode unchangeable, etc. */ -+ fake_ini->i_flags |= EXT3COW_UNCHANGEABLE_FL; -+ fake_ini->i_flags |= EXT3COW_UNVERSIONABLE_FL; -+ fake_ini->i_flags |= EXT3COW_FAKEINODE_FL; -+ fake_ini->i_flags |= EXT3COW_IMMUTABLE_FL; -+ -+ /* Make sure we get the right operations */ -+ if (S_ISREG(fake_inode->i_mode)) { -+ fake_inode->i_op = &ext3cow_file_inode_operations; -+ fake_inode->i_fop = &ext3cow_file_operations; -+ ext3cow_set_aops(fake_inode); -+ } else if (S_ISDIR(fake_inode->i_mode)) { -+ fake_inode->i_op = &ext3cow_dir_inode_operations; -+ fake_inode->i_fop = &ext3cow_dir_operations; -+ } else if (S_ISLNK(fake_inode->i_mode)) { -+ //if (ext3cow_inode_is_fast_symlink(cow_inode)) -+ if((S_ISLNK(fake_inode->i_mode) && fake_inode->i_blocks - -+ (EXT3COW_I(fake_inode)->i_file_acl ? -+ (fake_inode->i_sb->s_blocksize >> 9) : 0))) -+ fake_inode->i_op = &ext3cow_fast_symlink_inode_operations; -+ else { -+ fake_inode->i_op = &ext3cow_symlink_inode_operations; -+ ext3cow_set_aops(fake_inode); -+ } -+ } else { -+ fake_inode->i_op = &ext3cow_special_inode_operations; -+ } -+ -+ fake_ini->i_epoch_number = epoch_number; -+ fake_ini->i_next_inode = 0; -+ -+ iput(inode); /* dec i_count */ -+ -+ return fake_inode; -+ }else -+ ext3cow_warning(inode->i_sb, "ext3cow_fake_inode", -+ "Could not create fake inode."); -+ -+ return NULL; -+} -+ -+/* -+ * ext3cow_dup_inode: This function creates a new inode, -+ * copies all the metadata from the passed in inode, -+ * and adds it to the version chain, creating a new version. -+ * The head of the chain never changes; it is always the most current version. -+ * Similar in nature to ext3cow_creat and ext3cow_read_inode. -znjp -+ */ -+int ext3cow_dup_inode(struct inode *dir, struct inode *inode){ -+ -+ struct inode *cow_inode = NULL; -+ struct inode *parent = NULL; -+ struct ext3cow_inode_info *ini = NULL; -+ struct ext3cow_inode_info *cow_ini = NULL; -+ handle_t *handle = NULL; -+ int err = 0; -+ int block = -1; -+ unsigned int epoch_number_temp = 0; -+ int retries = 0; -+ -+ printk(KERN_INFO "** duping inode %lu\n", inode->i_ino); -+ -+ if(EXT3COW_IS_UNVERSIONABLE(inode)) -+ return 0; -+ -+ if(NULL == inode){ -+ printk(KERN_ERR "Trying to duplicate a NULL inode.\n"); -+ return -1; -+ } -+ -+ if (inode->i_nlink == 0) { -+ if (inode->i_mode == 0 || -+ !(EXT3COW_SB(inode->i_sb)->s_mount_state & EXT3COW_ORPHAN_FS)) { -+ /* this inode is deleted */ -+ return -1; -+ } -+ /* The only unlinked inodes we let through here have -+ * valid i_mode and are being read by the orphan -+ * recovery code: that's fine, we're about to complete -+ * the process of deleting those. */ -+ } -+ -+ ini = EXT3COW_I(inode); -+ -+ /* This is for truncate, which can't pass in a parent */ -+ if(NULL == dir) -+ parent = inode; -+ else -+ parent = dir; -+ -+ retry: -+ handle = ext3cow_journal_start(parent, EXT3COW_DATA_TRANS_BLOCKS(dir->i_sb) + -+ EXT3COW_INDEX_EXTRA_TRANS_BLOCKS + 3 + -+ 2*EXT3COW_QUOTA_INIT_BLOCKS(dir->i_sb)); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ -+ if (IS_DIRSYNC(parent)) -+ handle->h_sync = 1; -+ -+ cow_inode = ext3cow_new_inode (handle, parent, inode->i_mode); -+ err = PTR_ERR(cow_inode); -+ if (!IS_ERR(cow_inode)) { -+ -+ printk(KERN_INFO " ** Allocated new inode %lu\n", cow_inode->i_ino); -+ -+ cow_ini = EXT3COW_I(cow_inode); -+ -+ cow_inode->i_mode = inode->i_mode; -+ cow_inode->i_uid = inode->i_uid; -+ cow_inode->i_gid = inode->i_gid; -+ -+ cow_inode->i_nlink = inode->i_nlink; -+ cow_inode->i_size = inode->i_size; -+ cow_inode->i_atime.tv_sec = inode->i_atime.tv_sec; -+ cow_inode->i_ctime.tv_sec = inode->i_ctime.tv_sec; -+ cow_inode->i_mtime.tv_sec = inode->i_mtime.tv_sec; -+ cow_inode->i_atime.tv_nsec = inode->i_atime.tv_nsec; -+ cow_inode->i_ctime.tv_nsec = inode->i_ctime.tv_nsec; -+ cow_inode->i_mtime.tv_nsec = inode->i_mtime.tv_nsec; -+ -+ cow_ini->i_state = ini->i_state; -+ cow_ini->i_dir_start_lookup = ini->i_dir_start_lookup; -+ cow_ini->i_dtime = ini->i_dtime; -+ -+ cow_inode->i_blocks = inode->i_blocks; -+ cow_ini->i_flags = ini->i_flags; -+#ifdef EXT3COW_FRAGMENTS -+ /* Taken out for versioning -znjp */ -+ //cow_ini->i_faddr = ini->i_faddr; -+ //cow_ini->i_frag_no = ini->i_frag_no; -+ //cow_ini->i_frag_size = ini->i_frag_size; -+#endif -+ cow_ini->i_file_acl = ini->i_file_acl; -+ if (!S_ISREG(cow_inode->i_mode)) { -+ cow_ini->i_dir_acl = ini->i_dir_acl; -+ } -+ cow_ini->i_disksize = inode->i_size; -+ cow_inode->i_generation = inode->i_generation; -+ //TODO: This could be wrong. -+ cow_ini->i_block_group = ini->i_block_group; //iloc.block_group; -+ -+ for (block = 0; block < EXT3COW_N_BLOCKS; block++) -+ cow_ini->i_data[block] = ini->i_data[block]; -+ -+ //TODO: This could be wrong -+ //cow_ini->i_orphan = NULL; //INIT_LIST_HEAD(&ei->i_orphan); -+ -+ cow_ini->i_extra_isize = ini->i_extra_isize; -+ -+ /* Make sure we get the right operations */ -+ if (S_ISREG(cow_inode->i_mode)) { -+ cow_inode->i_op = &ext3cow_file_inode_operations; -+ cow_inode->i_fop = &ext3cow_file_operations; -+ ext3cow_set_aops(cow_inode); -+ } else if (S_ISDIR(cow_inode->i_mode)) { -+ cow_inode->i_op = &ext3cow_dir_inode_operations; -+ cow_inode->i_fop = &ext3cow_dir_operations; -+ } else if (S_ISLNK(cow_inode->i_mode)) { -+ //if (ext3cow_inode_is_fast_symlink(cow_inode)) -+ if((S_ISLNK(cow_inode->i_mode) && cow_inode->i_blocks - -+ (EXT3COW_I(cow_inode)->i_file_acl ? -+ (cow_inode->i_sb->s_blocksize >> 9) : 0))) -+ cow_inode->i_op = &ext3cow_fast_symlink_inode_operations; -+ else { -+ cow_inode->i_op = &ext3cow_symlink_inode_operations; -+ ext3cow_set_aops(cow_inode); -+ } -+ } else { -+ cow_inode->i_op = &ext3cow_special_inode_operations; -+ /* -+ if (raw_inode->i_block[0]) -+ init_special_inode(inode, inode->i_mode, -+ old_decode_dev(le32_to_cpu(raw_inode->i_block[0]))); -+ else -+ init_special_inode(inode, inode->i_mode, -+ new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); -+ */ -+ } -+ -+ /* Dup in the direct cow bitmap */ -+ cow_ini->i_cow_bitmap = ini->i_cow_bitmap; -+ ini->i_cow_bitmap = 0x0000; -+ /* Mark new inode unchangeable */ -+ cow_ini->i_flags |= EXT3COW_UNCHANGEABLE_FL; -+ /* Switch epoch numbers */ -+ epoch_number_temp = ini->i_epoch_number; -+ ini->i_epoch_number = cow_ini->i_epoch_number; -+ cow_ini->i_epoch_number = epoch_number_temp; -+ /* Chain Inodes together */ -+ cow_ini->i_next_inode = ini->i_next_inode; -+ ini->i_next_inode = cow_inode->i_ino; -+ -+ ext3cow_mark_inode_dirty(handle, cow_inode); -+ ext3cow_mark_inode_dirty(handle, inode); -+ -+ iput(cow_inode); /* dec i_count */ -+ -+ err = 0; -+ } -+ ext3cow_journal_stop(handle); -+ if (err == -ENOSPC && ext3cow_should_retry_alloc(dir->i_sb, &retries)) -+ goto retry; -+ return err; -+ -+} -+ -+/* ext3cow_reclaim_dup_inode: rolls back a recently dup'd inode -+ * on error, including epoch number and bitmaps. Should not -+ * be used for removing versions. */ -+int ext3cow_reclaim_dup_inode(struct inode *dir, struct inode *inode) -+{ -+ handle_t *handle = NULL; -+ int err = 0; -+ struct inode *old_inode = NULL; -+ struct inode *parent = dir; -+ -+ if(!parent) -+ parent = inode; -+ -+ if(is_bad_inode(inode)) -+ return -1; -+ -+ handle = ext3cow_journal_start(parent, -+ EXT3COW_DELETE_TRANS_BLOCKS(parent->i_sb)); -+ if(IS_ERR(handle)) -+ return PTR_ERR(handle); -+ -+ if(IS_DIRSYNC(parent)) -+ handle->h_sync = 1; -+ -+ old_inode = iget(parent->i_sb, EXT3COW_I_NEXT_INODE(inode)); -+ err = PTR_ERR(old_inode); -+ if (!IS_ERR(old_inode)){ -+ -+ EXT3COW_I(inode)->i_epoch_number = EXT3COW_I_EPOCHNUMBER(old_inode); -+ EXT3COW_I(inode)->i_cow_bitmap = EXT3COW_I(old_inode)->i_cow_bitmap; -+ EXT3COW_I(inode)->i_next_inode = EXT3COW_I(old_inode)->i_next_inode; -+ old_inode->i_nlink = 0; -+ -+ iput(old_inode); -+ ext3cow_mark_inode_dirty(handle, inode); -+ }else -+ ext3cow_error(inode->i_sb, "ext3cow_reclaim_dup_inode", -+ "Couldn't remove dup'd inode."); -+ -+ ext3cow_journal_stop(handle); -+ -+ return 0; -+} -+ -+/* -+ * directories can handle most operations... -+ */ -+struct inode_operations ext3cow_dir_inode_operations = { -+ .create = ext3cow_create, -+ .lookup = ext3cow_lookup, -+ .link = ext3cow_link, -+ .unlink = ext3cow_unlink, -+ .symlink = ext3cow_symlink, -+ .mkdir = ext3cow_mkdir, -+ .rmdir = ext3cow_rmdir, -+ .mknod = ext3cow_mknod, -+ .rename = ext3cow_rename, -+ .setattr = ext3cow_setattr, -+#ifdef CONFIG_EXT3COW_FS_XATTR -+ .setxattr = generic_setxattr, -+ .getxattr = generic_getxattr, -+ .listxattr = ext3cow_listxattr, -+ .removexattr = generic_removexattr, -+#endif -+ .permission = ext3cow_permission, -+}; -+ -+struct inode_operations ext3cow_special_inode_operations = { -+ .setattr = ext3cow_setattr, -+#ifdef CONFIG_EXT3COW_FS_XATTR -+ .setxattr = generic_setxattr, -+ .getxattr = generic_getxattr, -+ .listxattr = ext3cow_listxattr, -+ .removexattr = generic_removexattr, -+#endif -+ .permission = ext3cow_permission, -+}; -diff -Naur linux-2.6.21.7/fs/ext3cow/namei.h linux-2.6.21.7_ext3cowPatched/fs/ext3cow/namei.h ---- linux-2.6.21.7/fs/ext3cow/namei.h 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.21.7_ext3cowPatched/fs/ext3cow/namei.h 2007-10-23 17:47:18.000000000 +0200 -@@ -0,0 +1,8 @@ -+/* linux/fs/ext3cow/namei.h -+ * -+ * Copyright (C) 2005 Simtec Electronics -+ * Ben Dooks -+ * -+*/ -+ -+extern struct dentry *ext3cow_get_parent(struct dentry *child); -diff -Naur linux-2.6.21.7/fs/ext3cow/resize.c linux-2.6.21.7_ext3cowPatched/fs/ext3cow/resize.c ---- linux-2.6.21.7/fs/ext3cow/resize.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.21.7_ext3cowPatched/fs/ext3cow/resize.c 2007-10-23 17:47:18.000000000 +0200 -@@ -0,0 +1,1042 @@ -+/* -+ * linux/fs/ext3cow/resize.c -+ * -+ * Support for resizing an ext3cow filesystem while it is mounted. -+ * -+ * Copyright (C) 2001, 2002 Andreas Dilger -+ * -+ * This could probably be made into a module, because it is not often in use. -+ */ -+ -+ -+#define EXT3COWFS_DEBUG -+ -+#include -+#include -+#include -+ -+#include -+#include -+ -+ -+#define outside(b, first, last) ((b) < (first) || (b) >= (last)) -+#define inside(b, first, last) ((b) >= (first) && (b) < (last)) -+ -+static int verify_group_input(struct super_block *sb, -+ struct ext3cow_new_group_data *input) -+{ -+ struct ext3cow_sb_info *sbi = EXT3COW_SB(sb); -+ struct ext3cow_super_block *es = sbi->s_es; -+ ext3cow_fsblk_t start = le32_to_cpu(es->s_blocks_count); -+ ext3cow_fsblk_t end = start + input->blocks_count; -+ unsigned group = input->group; -+ ext3cow_fsblk_t itend = input->inode_table + sbi->s_itb_per_group; -+ unsigned overhead = ext3cow_bg_has_super(sb, group) ? -+ (1 + ext3cow_bg_num_gdb(sb, group) + -+ le16_to_cpu(es->s_reserved_gdt_blocks)) : 0; -+ ext3cow_fsblk_t metaend = start + overhead; -+ struct buffer_head *bh = NULL; -+ ext3cow_grpblk_t free_blocks_count; -+ int err = -EINVAL; -+ -+ input->free_blocks_count = free_blocks_count = -+ input->blocks_count - 2 - overhead - sbi->s_itb_per_group; -+ -+ if (test_opt(sb, DEBUG)) -+ printk(KERN_DEBUG "EXT3COW-fs: adding %s group %u: %u blocks " -+ "(%d free, %u reserved)\n", -+ ext3cow_bg_has_super(sb, input->group) ? "normal" : -+ "no-super", input->group, input->blocks_count, -+ free_blocks_count, input->reserved_blocks); -+ -+ if (group != sbi->s_groups_count) -+ ext3cow_warning(sb, __FUNCTION__, -+ "Cannot add at group %u (only %lu groups)", -+ input->group, sbi->s_groups_count); -+ else if ((start - le32_to_cpu(es->s_first_data_block)) % -+ EXT3COW_BLOCKS_PER_GROUP(sb)) -+ ext3cow_warning(sb, __FUNCTION__, "Last group not full"); -+ else if (input->reserved_blocks > input->blocks_count / 5) -+ ext3cow_warning(sb, __FUNCTION__, "Reserved blocks too high (%u)", -+ input->reserved_blocks); -+ else if (free_blocks_count < 0) -+ ext3cow_warning(sb, __FUNCTION__, "Bad blocks count %u", -+ input->blocks_count); -+ else if (!(bh = sb_bread(sb, end - 1))) -+ ext3cow_warning(sb, __FUNCTION__, -+ "Cannot read last block ("E3FSBLK")", -+ end - 1); -+ else if (outside(input->block_bitmap, start, end)) -+ ext3cow_warning(sb, __FUNCTION__, -+ "Block bitmap not in group (block %u)", -+ input->block_bitmap); -+ else if (outside(input->inode_bitmap, start, end)) -+ ext3cow_warning(sb, __FUNCTION__, -+ "Inode bitmap not in group (block %u)", -+ input->inode_bitmap); -+ else if (outside(input->inode_table, start, end) || -+ outside(itend - 1, start, end)) -+ ext3cow_warning(sb, __FUNCTION__, -+ "Inode table not in group (blocks %u-"E3FSBLK")", -+ input->inode_table, itend - 1); -+ else if (input->inode_bitmap == input->block_bitmap) -+ ext3cow_warning(sb, __FUNCTION__, -+ "Block bitmap same as inode bitmap (%u)", -+ input->block_bitmap); -+ else if (inside(input->block_bitmap, input->inode_table, itend)) -+ ext3cow_warning(sb, __FUNCTION__, -+ "Block bitmap (%u) in inode table (%u-"E3FSBLK")", -+ input->block_bitmap, input->inode_table, itend-1); -+ else if (inside(input->inode_bitmap, input->inode_table, itend)) -+ ext3cow_warning(sb, __FUNCTION__, -+ "Inode bitmap (%u) in inode table (%u-"E3FSBLK")", -+ input->inode_bitmap, input->inode_table, itend-1); -+ else if (inside(input->block_bitmap, start, metaend)) -+ ext3cow_warning(sb, __FUNCTION__, -+ "Block bitmap (%u) in GDT table" -+ " ("E3FSBLK"-"E3FSBLK")", -+ input->block_bitmap, start, metaend - 1); -+ else if (inside(input->inode_bitmap, start, metaend)) -+ ext3cow_warning(sb, __FUNCTION__, -+ "Inode bitmap (%u) in GDT table" -+ " ("E3FSBLK"-"E3FSBLK")", -+ input->inode_bitmap, start, metaend - 1); -+ else if (inside(input->inode_table, start, metaend) || -+ inside(itend - 1, start, metaend)) -+ ext3cow_warning(sb, __FUNCTION__, -+ "Inode table (%u-"E3FSBLK") overlaps" -+ "GDT table ("E3FSBLK"-"E3FSBLK")", -+ input->inode_table, itend - 1, start, metaend - 1); -+ else -+ err = 0; -+ brelse(bh); -+ -+ return err; -+} -+ -+static struct buffer_head *bclean(handle_t *handle, struct super_block *sb, -+ ext3cow_fsblk_t blk) -+{ -+ struct buffer_head *bh; -+ int err; -+ -+ bh = sb_getblk(sb, blk); -+ if (!bh) -+ return ERR_PTR(-EIO); -+ if ((err = ext3cow_journal_get_write_access(handle, bh))) { -+ brelse(bh); -+ bh = ERR_PTR(err); -+ } else { -+ lock_buffer(bh); -+ memset(bh->b_data, 0, sb->s_blocksize); -+ set_buffer_uptodate(bh); -+ unlock_buffer(bh); -+ } -+ -+ return bh; -+} -+ -+/* -+ * To avoid calling the atomic setbit hundreds or thousands of times, we only -+ * need to use it within a single byte (to ensure we get endianness right). -+ * We can use memset for the rest of the bitmap as there are no other users. -+ */ -+static void mark_bitmap_end(int start_bit, int end_bit, char *bitmap) -+{ -+ int i; -+ -+ if (start_bit >= end_bit) -+ return; -+ -+ ext3cow_debug("mark end bits +%d through +%d used\n", start_bit, end_bit); -+ for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++) -+ ext3cow_set_bit(i, bitmap); -+ if (i < end_bit) -+ memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3); -+} -+ -+/* -+ * Set up the block and inode bitmaps, and the inode table for the new group. -+ * This doesn't need to be part of the main transaction, since we are only -+ * changing blocks outside the actual filesystem. We still do journaling to -+ * ensure the recovery is correct in case of a failure just after resize. -+ * If any part of this fails, we simply abort the resize. -+ */ -+static int setup_new_group_blocks(struct super_block *sb, -+ struct ext3cow_new_group_data *input) -+{ -+ struct ext3cow_sb_info *sbi = EXT3COW_SB(sb); -+ ext3cow_fsblk_t start = ext3cow_group_first_block_no(sb, input->group); -+ int reserved_gdb = ext3cow_bg_has_super(sb, input->group) ? -+ le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0; -+ unsigned long gdblocks = ext3cow_bg_num_gdb(sb, input->group); -+ struct buffer_head *bh; -+ handle_t *handle; -+ ext3cow_fsblk_t block; -+ ext3cow_grpblk_t bit; -+ int i; -+ int err = 0, err2; -+ -+ handle = ext3cow_journal_start_sb(sb, reserved_gdb + gdblocks + -+ 2 + sbi->s_itb_per_group); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ -+ lock_super(sb); -+ if (input->group != sbi->s_groups_count) { -+ err = -EBUSY; -+ goto exit_journal; -+ } -+ -+ if (IS_ERR(bh = bclean(handle, sb, input->block_bitmap))) { -+ err = PTR_ERR(bh); -+ goto exit_journal; -+ } -+ -+ if (ext3cow_bg_has_super(sb, input->group)) { -+ ext3cow_debug("mark backup superblock %#04lx (+0)\n", start); -+ ext3cow_set_bit(0, bh->b_data); -+ } -+ -+ /* Copy all of the GDT blocks into the backup in this group */ -+ for (i = 0, bit = 1, block = start + 1; -+ i < gdblocks; i++, block++, bit++) { -+ struct buffer_head *gdb; -+ -+ ext3cow_debug("update backup group %#04lx (+%d)\n", block, bit); -+ -+ gdb = sb_getblk(sb, block); -+ if (!gdb) { -+ err = -EIO; -+ goto exit_bh; -+ } -+ if ((err = ext3cow_journal_get_write_access(handle, gdb))) { -+ brelse(gdb); -+ goto exit_bh; -+ } -+ lock_buffer(bh); -+ memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, bh->b_size); -+ set_buffer_uptodate(gdb); -+ unlock_buffer(bh); -+ ext3cow_journal_dirty_metadata(handle, gdb); -+ ext3cow_set_bit(bit, bh->b_data); -+ brelse(gdb); -+ } -+ -+ /* Zero out all of the reserved backup group descriptor table blocks */ -+ for (i = 0, bit = gdblocks + 1, block = start + bit; -+ i < reserved_gdb; i++, block++, bit++) { -+ struct buffer_head *gdb; -+ -+ ext3cow_debug("clear reserved block %#04lx (+%d)\n", block, bit); -+ -+ if (IS_ERR(gdb = bclean(handle, sb, block))) { -+ err = PTR_ERR(bh); -+ goto exit_bh; -+ } -+ ext3cow_journal_dirty_metadata(handle, gdb); -+ ext3cow_set_bit(bit, bh->b_data); -+ brelse(gdb); -+ } -+ ext3cow_debug("mark block bitmap %#04x (+%ld)\n", input->block_bitmap, -+ input->block_bitmap - start); -+ ext3cow_set_bit(input->block_bitmap - start, bh->b_data); -+ ext3cow_debug("mark inode bitmap %#04x (+%ld)\n", input->inode_bitmap, -+ input->inode_bitmap - start); -+ ext3cow_set_bit(input->inode_bitmap - start, bh->b_data); -+ -+ /* Zero out all of the inode table blocks */ -+ for (i = 0, block = input->inode_table, bit = block - start; -+ i < sbi->s_itb_per_group; i++, bit++, block++) { -+ struct buffer_head *it; -+ -+ ext3cow_debug("clear inode block %#04lx (+%d)\n", block, bit); -+ if (IS_ERR(it = bclean(handle, sb, block))) { -+ err = PTR_ERR(it); -+ goto exit_bh; -+ } -+ ext3cow_journal_dirty_metadata(handle, it); -+ brelse(it); -+ ext3cow_set_bit(bit, bh->b_data); -+ } -+ mark_bitmap_end(input->blocks_count, EXT3COW_BLOCKS_PER_GROUP(sb), -+ bh->b_data); -+ ext3cow_journal_dirty_metadata(handle, bh); -+ brelse(bh); -+ -+ /* Mark unused entries in inode bitmap used */ -+ ext3cow_debug("clear inode bitmap %#04x (+%ld)\n", -+ input->inode_bitmap, input->inode_bitmap - start); -+ if (IS_ERR(bh = bclean(handle, sb, input->inode_bitmap))) { -+ err = PTR_ERR(bh); -+ goto exit_journal; -+ } -+ -+ mark_bitmap_end(EXT3COW_INODES_PER_GROUP(sb), EXT3COW_BLOCKS_PER_GROUP(sb), -+ bh->b_data); -+ ext3cow_journal_dirty_metadata(handle, bh); -+exit_bh: -+ brelse(bh); -+ -+exit_journal: -+ unlock_super(sb); -+ if ((err2 = ext3cow_journal_stop(handle)) && !err) -+ err = err2; -+ -+ return err; -+} -+ -+/* -+ * Iterate through the groups which hold BACKUP superblock/GDT copies in an -+ * ext3cow filesystem. The counters should be initialized to 1, 5, and 7 before -+ * calling this for the first time. In a sparse filesystem it will be the -+ * sequence of powers of 3, 5, and 7: 1, 3, 5, 7, 9, 25, 27, 49, 81, ... -+ * For a non-sparse filesystem it will be every group: 1, 2, 3, 4, ... -+ */ -+static unsigned ext3cow_list_backups(struct super_block *sb, unsigned *three, -+ unsigned *five, unsigned *seven) -+{ -+ unsigned *min = three; -+ int mult = 3; -+ unsigned ret; -+ -+ if (!EXT3COW_HAS_RO_COMPAT_FEATURE(sb, -+ EXT3COW_FEATURE_RO_COMPAT_SPARSE_SUPER)) { -+ ret = *min; -+ *min += 1; -+ return ret; -+ } -+ -+ if (*five < *min) { -+ min = five; -+ mult = 5; -+ } -+ if (*seven < *min) { -+ min = seven; -+ mult = 7; -+ } -+ -+ ret = *min; -+ *min *= mult; -+ -+ return ret; -+} -+ -+/* -+ * Check that all of the backup GDT blocks are held in the primary GDT block. -+ * It is assumed that they are stored in group order. Returns the number of -+ * groups in current filesystem that have BACKUPS, or -ve error code. -+ */ -+static int verify_reserved_gdb(struct super_block *sb, -+ struct buffer_head *primary) -+{ -+ const ext3cow_fsblk_t blk = primary->b_blocknr; -+ const unsigned long end = EXT3COW_SB(sb)->s_groups_count; -+ unsigned three = 1; -+ unsigned five = 5; -+ unsigned seven = 7; -+ unsigned grp; -+ __le32 *p = (__le32 *)primary->b_data; -+ int gdbackups = 0; -+ -+ while ((grp = ext3cow_list_backups(sb, &three, &five, &seven)) < end) { -+ if (le32_to_cpu(*p++) != grp * EXT3COW_BLOCKS_PER_GROUP(sb) + blk){ -+ ext3cow_warning(sb, __FUNCTION__, -+ "reserved GDT "E3FSBLK -+ " missing grp %d ("E3FSBLK")", -+ blk, grp, -+ grp * EXT3COW_BLOCKS_PER_GROUP(sb) + blk); -+ return -EINVAL; -+ } -+ if (++gdbackups > EXT3COW_ADDR_PER_BLOCK(sb)) -+ return -EFBIG; -+ } -+ -+ return gdbackups; -+} -+ -+/* -+ * Called when we need to bring a reserved group descriptor table block into -+ * use from the resize inode. The primary copy of the new GDT block currently -+ * is an indirect block (under the double indirect block in the resize inode). -+ * The new backup GDT blocks will be stored as leaf blocks in this indirect -+ * block, in group order. Even though we know all the block numbers we need, -+ * we check to ensure that the resize inode has actually reserved these blocks. -+ * -+ * Don't need to update the block bitmaps because the blocks are still in use. -+ * -+ * We get all of the error cases out of the way, so that we are sure to not -+ * fail once we start modifying the data on disk, because JBD has no rollback. -+ */ -+static int add_new_gdb(handle_t *handle, struct inode *inode, -+ struct ext3cow_new_group_data *input, -+ struct buffer_head **primary) -+{ -+ struct super_block *sb = inode->i_sb; -+ struct ext3cow_super_block *es = EXT3COW_SB(sb)->s_es; -+ unsigned long gdb_num = input->group / EXT3COW_DESC_PER_BLOCK(sb); -+ ext3cow_fsblk_t gdblock = EXT3COW_SB(sb)->s_sbh->b_blocknr + 1 + gdb_num; -+ struct buffer_head **o_group_desc, **n_group_desc; -+ struct buffer_head *dind; -+ int gdbackups; -+ struct ext3cow_iloc iloc; -+ __le32 *data; -+ int err; -+ -+ if (test_opt(sb, DEBUG)) -+ printk(KERN_DEBUG -+ "EXT3COW-fs: ext3cow_add_new_gdb: adding group block %lu\n", -+ gdb_num); -+ -+ /* -+ * If we are not using the primary superblock/GDT copy don't resize, -+ * because the user tools have no way of handling this. Probably a -+ * bad time to do it anyways. -+ */ -+ if (EXT3COW_SB(sb)->s_sbh->b_blocknr != -+ le32_to_cpu(EXT3COW_SB(sb)->s_es->s_first_data_block)) { -+ ext3cow_warning(sb, __FUNCTION__, -+ "won't resize using backup superblock at %llu", -+ (unsigned long long)EXT3COW_SB(sb)->s_sbh->b_blocknr); -+ return -EPERM; -+ } -+ -+ *primary = sb_bread(sb, gdblock); -+ if (!*primary) -+ return -EIO; -+ -+ if ((gdbackups = verify_reserved_gdb(sb, *primary)) < 0) { -+ err = gdbackups; -+ goto exit_bh; -+ } -+ -+ data = EXT3COW_I(inode)->i_data + EXT3COW_DIND_BLOCK; -+ dind = sb_bread(sb, le32_to_cpu(*data)); -+ if (!dind) { -+ err = -EIO; -+ goto exit_bh; -+ } -+ -+ data = (__le32 *)dind->b_data; -+ if (le32_to_cpu(data[gdb_num % EXT3COW_ADDR_PER_BLOCK(sb)]) != gdblock) { -+ ext3cow_warning(sb, __FUNCTION__, -+ "new group %u GDT block "E3FSBLK" not reserved", -+ input->group, gdblock); -+ err = -EINVAL; -+ goto exit_dind; -+ } -+ -+ if ((err = ext3cow_journal_get_write_access(handle, EXT3COW_SB(sb)->s_sbh))) -+ goto exit_dind; -+ -+ if ((err = ext3cow_journal_get_write_access(handle, *primary))) -+ goto exit_sbh; -+ -+ if ((err = ext3cow_journal_get_write_access(handle, dind))) -+ goto exit_primary; -+ -+ /* ext3cow_reserve_inode_write() gets a reference on the iloc */ -+ if ((err = ext3cow_reserve_inode_write(handle, inode, &iloc))) -+ goto exit_dindj; -+ -+ n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *), -+ GFP_KERNEL); -+ if (!n_group_desc) { -+ err = -ENOMEM; -+ ext3cow_warning (sb, __FUNCTION__, -+ "not enough memory for %lu groups", gdb_num + 1); -+ goto exit_inode; -+ } -+ -+ /* -+ * Finally, we have all of the possible failures behind us... -+ * -+ * Remove new GDT block from inode double-indirect block and clear out -+ * the new GDT block for use (which also "frees" the backup GDT blocks -+ * from the reserved inode). We don't need to change the bitmaps for -+ * these blocks, because they are marked as in-use from being in the -+ * reserved inode, and will become GDT blocks (primary and backup). -+ */ -+ data[gdb_num % EXT3COW_ADDR_PER_BLOCK(sb)] = 0; -+ ext3cow_journal_dirty_metadata(handle, dind); -+ brelse(dind); -+ inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9; -+ ext3cow_mark_iloc_dirty(handle, inode, &iloc); -+ memset((*primary)->b_data, 0, sb->s_blocksize); -+ ext3cow_journal_dirty_metadata(handle, *primary); -+ -+ o_group_desc = EXT3COW_SB(sb)->s_group_desc; -+ memcpy(n_group_desc, o_group_desc, -+ EXT3COW_SB(sb)->s_gdb_count * sizeof(struct buffer_head *)); -+ n_group_desc[gdb_num] = *primary; -+ EXT3COW_SB(sb)->s_group_desc = n_group_desc; -+ EXT3COW_SB(sb)->s_gdb_count++; -+ kfree(o_group_desc); -+ -+ es->s_reserved_gdt_blocks = -+ cpu_to_le16(le16_to_cpu(es->s_reserved_gdt_blocks) - 1); -+ ext3cow_journal_dirty_metadata(handle, EXT3COW_SB(sb)->s_sbh); -+ -+ return 0; -+ -+exit_inode: -+ //ext3cow_journal_release_buffer(handle, iloc.bh); -+ brelse(iloc.bh); -+exit_dindj: -+ //ext3cow_journal_release_buffer(handle, dind); -+exit_primary: -+ //ext3cow_journal_release_buffer(handle, *primary); -+exit_sbh: -+ //ext3cow_journal_release_buffer(handle, *primary); -+exit_dind: -+ brelse(dind); -+exit_bh: -+ brelse(*primary); -+ -+ ext3cow_debug("leaving with error %d\n", err); -+ return err; -+} -+ -+/* -+ * Called when we are adding a new group which has a backup copy of each of -+ * the GDT blocks (i.e. sparse group) and there are reserved GDT blocks. -+ * We need to add these reserved backup GDT blocks to the resize inode, so -+ * that they are kept for future resizing and not allocated to files. -+ * -+ * Each reserved backup GDT block will go into a different indirect block. -+ * The indirect blocks are actually the primary reserved GDT blocks, -+ * so we know in advance what their block numbers are. We only get the -+ * double-indirect block to verify it is pointing to the primary reserved -+ * GDT blocks so we don't overwrite a data block by accident. The reserved -+ * backup GDT blocks are stored in their reserved primary GDT block. -+ */ -+static int reserve_backup_gdb(handle_t *handle, struct inode *inode, -+ struct ext3cow_new_group_data *input) -+{ -+ struct super_block *sb = inode->i_sb; -+ int reserved_gdb =le16_to_cpu(EXT3COW_SB(sb)->s_es->s_reserved_gdt_blocks); -+ struct buffer_head **primary; -+ struct buffer_head *dind; -+ struct ext3cow_iloc iloc; -+ ext3cow_fsblk_t blk; -+ __le32 *data, *end; -+ int gdbackups = 0; -+ int res, i; -+ int err; -+ -+ primary = kmalloc(reserved_gdb * sizeof(*primary), GFP_KERNEL); -+ if (!primary) -+ return -ENOMEM; -+ -+ data = EXT3COW_I(inode)->i_data + EXT3COW_DIND_BLOCK; -+ dind = sb_bread(sb, le32_to_cpu(*data)); -+ if (!dind) { -+ err = -EIO; -+ goto exit_free; -+ } -+ -+ blk = EXT3COW_SB(sb)->s_sbh->b_blocknr + 1 + EXT3COW_SB(sb)->s_gdb_count; -+ data = (__le32 *)dind->b_data + EXT3COW_SB(sb)->s_gdb_count; -+ end = (__le32 *)dind->b_data + EXT3COW_ADDR_PER_BLOCK(sb); -+ -+ /* Get each reserved primary GDT block and verify it holds backups */ -+ for (res = 0; res < reserved_gdb; res++, blk++) { -+ if (le32_to_cpu(*data) != blk) { -+ ext3cow_warning(sb, __FUNCTION__, -+ "reserved block "E3FSBLK -+ " not at offset %ld", -+ blk, -+ (long)(data - (__le32 *)dind->b_data)); -+ err = -EINVAL; -+ goto exit_bh; -+ } -+ primary[res] = sb_bread(sb, blk); -+ if (!primary[res]) { -+ err = -EIO; -+ goto exit_bh; -+ } -+ if ((gdbackups = verify_reserved_gdb(sb, primary[res])) < 0) { -+ brelse(primary[res]); -+ err = gdbackups; -+ goto exit_bh; -+ } -+ if (++data >= end) -+ data = (__le32 *)dind->b_data; -+ } -+ -+ for (i = 0; i < reserved_gdb; i++) { -+ if ((err = ext3cow_journal_get_write_access(handle, primary[i]))) { -+ /* -+ int j; -+ for (j = 0; j < i; j++) -+ ext3cow_journal_release_buffer(handle, primary[j]); -+ */ -+ goto exit_bh; -+ } -+ } -+ -+ if ((err = ext3cow_reserve_inode_write(handle, inode, &iloc))) -+ goto exit_bh; -+ -+ /* -+ * Finally we can add each of the reserved backup GDT blocks from -+ * the new group to its reserved primary GDT block. -+ */ -+ blk = input->group * EXT3COW_BLOCKS_PER_GROUP(sb); -+ for (i = 0; i < reserved_gdb; i++) { -+ int err2; -+ data = (__le32 *)primary[i]->b_data; -+ /* printk("reserving backup %lu[%u] = %lu\n", -+ primary[i]->b_blocknr, gdbackups, -+ blk + primary[i]->b_blocknr); */ -+ data[gdbackups] = cpu_to_le32(blk + primary[i]->b_blocknr); -+ err2 = ext3cow_journal_dirty_metadata(handle, primary[i]); -+ if (!err) -+ err = err2; -+ } -+ inode->i_blocks += reserved_gdb * sb->s_blocksize >> 9; -+ ext3cow_mark_iloc_dirty(handle, inode, &iloc); -+ -+exit_bh: -+ while (--res >= 0) -+ brelse(primary[res]); -+ brelse(dind); -+ -+exit_free: -+ kfree(primary); -+ -+ return err; -+} -+ -+/* -+ * Update the backup copies of the ext3cow metadata. These don't need to be part -+ * of the main resize transaction, because e2fsck will re-write them if there -+ * is a problem (basically only OOM will cause a problem). However, we -+ * _should_ update the backups if possible, in case the primary gets trashed -+ * for some reason and we need to run e2fsck from a backup superblock. The -+ * important part is that the new block and inode counts are in the backup -+ * superblocks, and the location of the new group metadata in the GDT backups. -+ * -+ * We do not need lock_super() for this, because these blocks are not -+ * otherwise touched by the filesystem code when it is mounted. We don't -+ * need to worry about last changing from sbi->s_groups_count, because the -+ * worst that can happen is that we do not copy the full number of backups -+ * at this time. The resize which changed s_groups_count will backup again. -+ */ -+static void update_backups(struct super_block *sb, -+ int blk_off, char *data, int size) -+{ -+ struct ext3cow_sb_info *sbi = EXT3COW_SB(sb); -+ const unsigned long last = sbi->s_groups_count; -+ const int bpg = EXT3COW_BLOCKS_PER_GROUP(sb); -+ unsigned three = 1; -+ unsigned five = 5; -+ unsigned seven = 7; -+ unsigned group; -+ int rest = sb->s_blocksize - size; -+ handle_t *handle; -+ int err = 0, err2; -+ -+ handle = ext3cow_journal_start_sb(sb, EXT3COW_MAX_TRANS_DATA); -+ if (IS_ERR(handle)) { -+ group = 1; -+ err = PTR_ERR(handle); -+ goto exit_err; -+ } -+ -+ while ((group = ext3cow_list_backups(sb, &three, &five, &seven)) < last) { -+ struct buffer_head *bh; -+ -+ /* Out of journal space, and can't get more - abort - so sad */ -+ if (handle->h_buffer_credits == 0 && -+ ext3cow_journal_extend(handle, EXT3COW_MAX_TRANS_DATA) && -+ (err = ext3cow_journal_restart(handle, EXT3COW_MAX_TRANS_DATA))) -+ break; -+ -+ bh = sb_getblk(sb, group * bpg + blk_off); -+ if (!bh) { -+ err = -EIO; -+ break; -+ } -+ ext3cow_debug("update metadata backup %#04lx\n", -+ (unsigned long)bh->b_blocknr); -+ if ((err = ext3cow_journal_get_write_access(handle, bh))) -+ break; -+ lock_buffer(bh); -+ memcpy(bh->b_data, data, size); -+ if (rest) -+ memset(bh->b_data + size, 0, rest); -+ set_buffer_uptodate(bh); -+ unlock_buffer(bh); -+ ext3cow_journal_dirty_metadata(handle, bh); -+ brelse(bh); -+ } -+ if ((err2 = ext3cow_journal_stop(handle)) && !err) -+ err = err2; -+ -+ /* -+ * Ugh! Need to have e2fsck write the backup copies. It is too -+ * late to revert the resize, we shouldn't fail just because of -+ * the backup copies (they are only needed in case of corruption). -+ * -+ * However, if we got here we have a journal problem too, so we -+ * can't really start a transaction to mark the superblock. -+ * Chicken out and just set the flag on the hope it will be written -+ * to disk, and if not - we will simply wait until next fsck. -+ */ -+exit_err: -+ if (err) { -+ ext3cow_warning(sb, __FUNCTION__, -+ "can't update backup for group %d (err %d), " -+ "forcing fsck on next reboot", group, err); -+ sbi->s_mount_state &= ~EXT3COW_VALID_FS; -+ sbi->s_es->s_state &= cpu_to_le16(~EXT3COW_VALID_FS); -+ mark_buffer_dirty(sbi->s_sbh); -+ } -+} -+ -+/* Add group descriptor data to an existing or new group descriptor block. -+ * Ensure we handle all possible error conditions _before_ we start modifying -+ * the filesystem, because we cannot abort the transaction and not have it -+ * write the data to disk. -+ * -+ * If we are on a GDT block boundary, we need to get the reserved GDT block. -+ * Otherwise, we may need to add backup GDT blocks for a sparse group. -+ * -+ * We only need to hold the superblock lock while we are actually adding -+ * in the new group's counts to the superblock. Prior to that we have -+ * not really "added" the group at all. We re-check that we are still -+ * adding in the last group in case things have changed since verifying. -+ */ -+int ext3cow_group_add(struct super_block *sb, struct ext3cow_new_group_data *input) -+{ -+ struct ext3cow_sb_info *sbi = EXT3COW_SB(sb); -+ struct ext3cow_super_block *es = sbi->s_es; -+ int reserved_gdb = ext3cow_bg_has_super(sb, input->group) ? -+ le16_to_cpu(es->s_reserved_gdt_blocks) : 0; -+ struct buffer_head *primary = NULL; -+ struct ext3cow_group_desc *gdp; -+ struct inode *inode = NULL; -+ handle_t *handle; -+ int gdb_off, gdb_num; -+ int err, err2; -+ -+ gdb_num = input->group / EXT3COW_DESC_PER_BLOCK(sb); -+ gdb_off = input->group % EXT3COW_DESC_PER_BLOCK(sb); -+ -+ if (gdb_off == 0 && !EXT3COW_HAS_RO_COMPAT_FEATURE(sb, -+ EXT3COW_FEATURE_RO_COMPAT_SPARSE_SUPER)) { -+ ext3cow_warning(sb, __FUNCTION__, -+ "Can't resize non-sparse filesystem further"); -+ return -EPERM; -+ } -+ -+ if (le32_to_cpu(es->s_blocks_count) + input->blocks_count < -+ le32_to_cpu(es->s_blocks_count)) { -+ ext3cow_warning(sb, __FUNCTION__, "blocks_count overflow\n"); -+ return -EINVAL; -+ } -+ -+ if (le32_to_cpu(es->s_inodes_count) + EXT3COW_INODES_PER_GROUP(sb) < -+ le32_to_cpu(es->s_inodes_count)) { -+ ext3cow_warning(sb, __FUNCTION__, "inodes_count overflow\n"); -+ return -EINVAL; -+ } -+ -+ if (reserved_gdb || gdb_off == 0) { -+ if (!EXT3COW_HAS_COMPAT_FEATURE(sb, -+ EXT3COW_FEATURE_COMPAT_RESIZE_INODE)){ -+ ext3cow_warning(sb, __FUNCTION__, -+ "No reserved GDT blocks, can't resize"); -+ return -EPERM; -+ } -+ inode = iget(sb, EXT3COW_RESIZE_INO); -+ if (!inode || is_bad_inode(inode)) { -+ ext3cow_warning(sb, __FUNCTION__, -+ "Error opening resize inode"); -+ iput(inode); -+ return -ENOENT; -+ } -+ } -+ -+ if ((err = verify_group_input(sb, input))) -+ goto exit_put; -+ -+ if ((err = setup_new_group_blocks(sb, input))) -+ goto exit_put; -+ -+ /* -+ * We will always be modifying at least the superblock and a GDT -+ * block. If we are adding a group past the last current GDT block, -+ * we will also modify the inode and the dindirect block. If we -+ * are adding a group with superblock/GDT backups we will also -+ * modify each of the reserved GDT dindirect blocks. -+ */ -+ handle = ext3cow_journal_start_sb(sb, -+ ext3cow_bg_has_super(sb, input->group) ? -+ 3 + reserved_gdb : 4); -+ if (IS_ERR(handle)) { -+ err = PTR_ERR(handle); -+ goto exit_put; -+ } -+ -+ lock_super(sb); -+ if (input->group != sbi->s_groups_count) { -+ ext3cow_warning(sb, __FUNCTION__, -+ "multiple resizers run on filesystem!"); -+ err = -EBUSY; -+ goto exit_journal; -+ } -+ -+ if ((err = ext3cow_journal_get_write_access(handle, sbi->s_sbh))) -+ goto exit_journal; -+ -+ /* -+ * We will only either add reserved group blocks to a backup group -+ * or remove reserved blocks for the first group in a new group block. -+ * Doing both would be mean more complex code, and sane people don't -+ * use non-sparse filesystems anymore. This is already checked above. -+ */ -+ if (gdb_off) { -+ primary = sbi->s_group_desc[gdb_num]; -+ if ((err = ext3cow_journal_get_write_access(handle, primary))) -+ goto exit_journal; -+ -+ if (reserved_gdb && ext3cow_bg_num_gdb(sb, input->group) && -+ (err = reserve_backup_gdb(handle, inode, input))) -+ goto exit_journal; -+ } else if ((err = add_new_gdb(handle, inode, input, &primary))) -+ goto exit_journal; -+ -+ /* -+ * OK, now we've set up the new group. Time to make it active. -+ * -+ * Current kernels don't lock all allocations via lock_super(), -+ * so we have to be safe wrt. concurrent accesses the group -+ * data. So we need to be careful to set all of the relevant -+ * group descriptor data etc. *before* we enable the group. -+ * -+ * The key field here is sbi->s_groups_count: as long as -+ * that retains its old value, nobody is going to access the new -+ * group. -+ * -+ * So first we update all the descriptor metadata for the new -+ * group; then we update the total disk blocks count; then we -+ * update the groups count to enable the group; then finally we -+ * update the free space counts so that the system can start -+ * using the new disk blocks. -+ */ -+ -+ /* Update group descriptor block for new group */ -+ gdp = (struct ext3cow_group_desc *)primary->b_data + gdb_off; -+ -+ gdp->bg_block_bitmap = cpu_to_le32(input->block_bitmap); -+ gdp->bg_inode_bitmap = cpu_to_le32(input->inode_bitmap); -+ gdp->bg_inode_table = cpu_to_le32(input->inode_table); -+ gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count); -+ gdp->bg_free_inodes_count = cpu_to_le16(EXT3COW_INODES_PER_GROUP(sb)); -+ -+ /* -+ * Make the new blocks and inodes valid next. We do this before -+ * increasing the group count so that once the group is enabled, -+ * all of its blocks and inodes are already valid. -+ * -+ * We always allocate group-by-group, then block-by-block or -+ * inode-by-inode within a group, so enabling these -+ * blocks/inodes before the group is live won't actually let us -+ * allocate the new space yet. -+ */ -+ es->s_blocks_count = cpu_to_le32(le32_to_cpu(es->s_blocks_count) + -+ input->blocks_count); -+ es->s_inodes_count = cpu_to_le32(le32_to_cpu(es->s_inodes_count) + -+ EXT3COW_INODES_PER_GROUP(sb)); -+ -+ /* -+ * We need to protect s_groups_count against other CPUs seeing -+ * inconsistent state in the superblock. -+ * -+ * The precise rules we use are: -+ * -+ * * Writers of s_groups_count *must* hold lock_super -+ * AND -+ * * Writers must perform a smp_wmb() after updating all dependent -+ * data and before modifying the groups count -+ * -+ * * Readers must hold lock_super() over the access -+ * OR -+ * * Readers must perform an smp_rmb() after reading the groups count -+ * and before reading any dependent data. -+ * -+ * NB. These rules can be relaxed when checking the group count -+ * while freeing data, as we can only allocate from a block -+ * group after serialising against the group count, and we can -+ * only then free after serialising in turn against that -+ * allocation. -+ */ -+ smp_wmb(); -+ -+ /* Update the global fs size fields */ -+ sbi->s_groups_count++; -+ -+ ext3cow_journal_dirty_metadata(handle, primary); -+ -+ /* Update the reserved block counts only once the new group is -+ * active. */ -+ es->s_r_blocks_count = cpu_to_le32(le32_to_cpu(es->s_r_blocks_count) + -+ input->reserved_blocks); -+ -+ /* Update the free space counts */ -+ percpu_counter_mod(&sbi->s_freeblocks_counter, -+ input->free_blocks_count); -+ percpu_counter_mod(&sbi->s_freeinodes_counter, -+ EXT3COW_INODES_PER_GROUP(sb)); -+ -+ ext3cow_journal_dirty_metadata(handle, sbi->s_sbh); -+ sb->s_dirt = 1; -+ -+exit_journal: -+ unlock_super(sb); -+ if ((err2 = ext3cow_journal_stop(handle)) && !err) -+ err = err2; -+ if (!err) { -+ update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es, -+ sizeof(struct ext3cow_super_block)); -+ update_backups(sb, primary->b_blocknr, primary->b_data, -+ primary->b_size); -+ } -+exit_put: -+ iput(inode); -+ return err; -+} /* ext3cow_group_add */ -+ -+/* Extend the filesystem to the new number of blocks specified. This entry -+ * point is only used to extend the current filesystem to the end of the last -+ * existing group. It can be accessed via ioctl, or by "remount,resize=" -+ * for emergencies (because it has no dependencies on reserved blocks). -+ * -+ * If we _really_ wanted, we could use default values to call ext3cow_group_add() -+ * allow the "remount" trick to work for arbitrary resizing, assuming enough -+ * GDT blocks are reserved to grow to the desired size. -+ */ -+int ext3cow_group_extend(struct super_block *sb, struct ext3cow_super_block *es, -+ ext3cow_fsblk_t n_blocks_count) -+{ -+ ext3cow_fsblk_t o_blocks_count; -+ unsigned long o_groups_count; -+ ext3cow_grpblk_t last; -+ ext3cow_grpblk_t add; -+ struct buffer_head * bh; -+ handle_t *handle; -+ int err; -+ unsigned long freed_blocks; -+ -+ /* We don't need to worry about locking wrt other resizers just -+ * yet: we're going to revalidate es->s_blocks_count after -+ * taking lock_super() below. */ -+ o_blocks_count = le32_to_cpu(es->s_blocks_count); -+ o_groups_count = EXT3COW_SB(sb)->s_groups_count; -+ -+ if (test_opt(sb, DEBUG)) -+ printk(KERN_DEBUG "EXT3COW-fs: extending last group from "E3FSBLK" uto "E3FSBLK" blocks\n", -+ o_blocks_count, n_blocks_count); -+ -+ if (n_blocks_count == 0 || n_blocks_count == o_blocks_count) -+ return 0; -+ -+ if (n_blocks_count > (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { -+ printk(KERN_ERR "EXT3COW-fs: filesystem on %s:" -+ " too large to resize to %lu blocks safely\n", -+ sb->s_id, n_blocks_count); -+ if (sizeof(sector_t) < 8) -+ ext3cow_warning(sb, __FUNCTION__, -+ "CONFIG_LBD not enabled\n"); -+ return -EINVAL; -+ } -+ -+ if (n_blocks_count < o_blocks_count) { -+ ext3cow_warning(sb, __FUNCTION__, -+ "can't shrink FS - resize aborted"); -+ return -EBUSY; -+ } -+ -+ /* Handle the remaining blocks in the last group only. */ -+ last = (o_blocks_count - le32_to_cpu(es->s_first_data_block)) % -+ EXT3COW_BLOCKS_PER_GROUP(sb); -+ -+ if (last == 0) { -+ ext3cow_warning(sb, __FUNCTION__, -+ "need to use ext2online to resize further"); -+ return -EPERM; -+ } -+ -+ add = EXT3COW_BLOCKS_PER_GROUP(sb) - last; -+ -+ if (o_blocks_count + add < o_blocks_count) { -+ ext3cow_warning(sb, __FUNCTION__, "blocks_count overflow"); -+ return -EINVAL; -+ } -+ -+ if (o_blocks_count + add > n_blocks_count) -+ add = n_blocks_count - o_blocks_count; -+ -+ if (o_blocks_count + add < n_blocks_count) -+ ext3cow_warning(sb, __FUNCTION__, -+ "will only finish group ("E3FSBLK -+ " blocks, %u new)", -+ o_blocks_count + add, add); -+ -+ /* See if the device is actually as big as what was requested */ -+ bh = sb_bread(sb, o_blocks_count + add -1); -+ if (!bh) { -+ ext3cow_warning(sb, __FUNCTION__, -+ "can't read last block, resize aborted"); -+ return -ENOSPC; -+ } -+ brelse(bh); -+ -+ /* We will update the superblock, one block bitmap, and -+ * one group descriptor via ext3cow_free_blocks(). -+ */ -+ handle = ext3cow_journal_start_sb(sb, 3); -+ if (IS_ERR(handle)) { -+ err = PTR_ERR(handle); -+ ext3cow_warning(sb, __FUNCTION__, "error %d on journal start",err); -+ goto exit_put; -+ } -+ -+ lock_super(sb); -+ if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) { -+ ext3cow_warning(sb, __FUNCTION__, -+ "multiple resizers run on filesystem!"); -+ unlock_super(sb); -+ err = -EBUSY; -+ goto exit_put; -+ } -+ -+ if ((err = ext3cow_journal_get_write_access(handle, -+ EXT3COW_SB(sb)->s_sbh))) { -+ ext3cow_warning(sb, __FUNCTION__, -+ "error %d on journal write access", err); -+ unlock_super(sb); -+ ext3cow_journal_stop(handle); -+ goto exit_put; -+ } -+ es->s_blocks_count = cpu_to_le32(o_blocks_count + add); -+ ext3cow_journal_dirty_metadata(handle, EXT3COW_SB(sb)->s_sbh); -+ sb->s_dirt = 1; -+ unlock_super(sb); -+ ext3cow_debug("freeing blocks %lu through "E3FSBLK"\n", o_blocks_count, -+ o_blocks_count + add); -+ ext3cow_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks); -+ ext3cow_debug("freed blocks "E3FSBLK" through "E3FSBLK"\n", o_blocks_count, -+ o_blocks_count + add); -+ if ((err = ext3cow_journal_stop(handle))) -+ goto exit_put; -+ if (test_opt(sb, DEBUG)) -+ printk(KERN_DEBUG "EXT3COW-fs: extended group to %u blocks\n", -+ le32_to_cpu(es->s_blocks_count)); -+ update_backups(sb, EXT3COW_SB(sb)->s_sbh->b_blocknr, (char *)es, -+ sizeof(struct ext3cow_super_block)); -+exit_put: -+ return err; -+} /* ext3cow_group_extend */ -diff -Naur linux-2.6.21.7/fs/ext3cow/super.c linux-2.6.21.7_ext3cowPatched/fs/ext3cow/super.c ---- linux-2.6.21.7/fs/ext3cow/super.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.21.7_ext3cowPatched/fs/ext3cow/super.c 2007-10-23 17:47:18.000000000 +0200 -@@ -0,0 +1,2808 @@ -+/* -+ * linux/fs/ext3cow/super.c -+ * -+ * Copyright (C) 1992, 1993, 1994, 1995 -+ * Remy Card (card@masi.ibp.fr) -+ * Laboratoire MASI - Institut Blaise Pascal -+ * Universite Pierre et Marie Curie (Paris VI) -+ * -+ * from -+ * -+ * linux/fs/minix/inode.c -+ * -+ * Copyright (C) 1991, 1992 Linus Torvalds -+ * -+ * Big-endian to little-endian byte-swapping/bitmaps by -+ * David S. Miller (davem@caip.rutgers.edu), 1995 -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+ -+#include "xattr.h" -+#include "acl.h" -+#include "namei.h" -+ -+static int ext3cow_load_journal(struct super_block *, struct ext3cow_super_block *, -+ unsigned long journal_devnum); -+static int ext3cow_create_journal(struct super_block *, struct ext3cow_super_block *, -+ unsigned int); -+static void ext3cow_commit_super (struct super_block * sb, -+ struct ext3cow_super_block * es, -+ int sync); -+static void ext3cow_mark_recovery_complete(struct super_block * sb, -+ struct ext3cow_super_block * es); -+static void ext3cow_clear_journal_err(struct super_block * sb, -+ struct ext3cow_super_block * es); -+static int ext3cow_sync_fs(struct super_block *sb, int wait); -+static const char *ext3cow_decode_error(struct super_block * sb, int errno, -+ char nbuf[16]); -+static int ext3cow_remount (struct super_block * sb, int * flags, char * data); -+static int ext3cow_statfs (struct dentry * dentry, struct kstatfs * buf); -+static void ext3cow_unlockfs(struct super_block *sb); -+static void ext3cow_write_super (struct super_block * sb); -+static void ext3cow_write_super_lockfs(struct super_block *sb); -+ -+/* -+ * Wrappers for journal_start/end. -+ * -+ * The only special thing we need to do here is to make sure that all -+ * journal_end calls result in the superblock being marked dirty, so -+ * that sync() will call the filesystem's write_super callback if -+ * appropriate. -+ */ -+handle_t *ext3cow_journal_start_sb(struct super_block *sb, int nblocks) -+{ -+ journal_t *journal; -+ -+ if (sb->s_flags & MS_RDONLY) -+ return ERR_PTR(-EROFS); -+ -+ /* Special case here: if the journal has aborted behind our -+ * backs (eg. EIO in the commit thread), then we still need to -+ * take the FS itself readonly cleanly. */ -+ journal = EXT3COW_SB(sb)->s_journal; -+ if (is_journal_aborted(journal)) { -+ ext3cow_abort(sb, __FUNCTION__, -+ "Detected aborted journal"); -+ return ERR_PTR(-EROFS); -+ } -+ -+ return journal_start(journal, nblocks); -+} -+ -+/* -+ * The only special thing we need to do here is to make sure that all -+ * journal_stop calls result in the superblock being marked dirty, so -+ * that sync() will call the filesystem's write_super callback if -+ * appropriate. -+ */ -+int __ext3cow_journal_stop(const char *where, handle_t *handle) -+{ -+ struct super_block *sb; -+ int err; -+ int rc; -+ -+ sb = handle->h_transaction->t_journal->j_private; -+ err = handle->h_err; -+ rc = journal_stop(handle); -+ -+ if (!err) -+ err = rc; -+ if (err) -+ __ext3cow_std_error(sb, where, err); -+ return err; -+} -+ -+void ext3cow_journal_abort_handle(const char *caller, const char *err_fn, -+ struct buffer_head *bh, handle_t *handle, int err) -+{ -+ char nbuf[16]; -+ const char *errstr = ext3cow_decode_error(NULL, err, nbuf); -+ -+ if (bh) -+ BUFFER_TRACE(bh, "abort"); -+ -+ if (!handle->h_err) -+ handle->h_err = err; -+ -+ if (is_handle_aborted(handle)) -+ return; -+ -+ printk(KERN_ERR "%s: aborting transaction: %s in %s\n", -+ caller, errstr, err_fn); -+ -+ journal_abort_handle(handle); -+} -+ -+/* Deal with the reporting of failure conditions on a filesystem such as -+ * inconsistencies detected or read IO failures. -+ * -+ * On ext2, we can store the error state of the filesystem in the -+ * superblock. That is not possible on ext3cow, because we may have other -+ * write ordering constraints on the superblock which prevent us from -+ * writing it out straight away; and given that the journal is about to -+ * be aborted, we can't rely on the current, or future, transactions to -+ * write out the superblock safely. -+ * -+ * We'll just use the journal_abort() error code to record an error in -+ * the journal instead. On recovery, the journal will compain about -+ * that error until we've noted it down and cleared it. -+ */ -+ -+static void ext3cow_handle_error(struct super_block *sb) -+{ -+ struct ext3cow_super_block *es = EXT3COW_SB(sb)->s_es; -+ -+ EXT3COW_SB(sb)->s_mount_state |= EXT3COW_ERROR_FS; -+ es->s_state |= cpu_to_le16(EXT3COW_ERROR_FS); -+ -+ if (sb->s_flags & MS_RDONLY) -+ return; -+ -+ if (!test_opt (sb, ERRORS_CONT)) { -+ journal_t *journal = EXT3COW_SB(sb)->s_journal; -+ -+ EXT3COW_SB(sb)->s_mount_opt |= EXT3COW_MOUNT_ABORT; -+ if (journal) -+ journal_abort(journal, -EIO); -+ } -+ if (test_opt (sb, ERRORS_RO)) { -+ printk (KERN_CRIT "Remounting filesystem read-only\n"); -+ sb->s_flags |= MS_RDONLY; -+ } -+ ext3cow_commit_super(sb, es, 1); -+ if (test_opt(sb, ERRORS_PANIC)) -+ panic("EXT3COW-fs (device %s): panic forced after error\n", -+ sb->s_id); -+} -+ -+void ext3cow_error (struct super_block * sb, const char * function, -+ const char * fmt, ...) -+{ -+ va_list args; -+ -+ va_start(args, fmt); -+ printk(KERN_CRIT "EXT3COW-fs error (device %s): %s: ",sb->s_id, function); -+ vprintk(fmt, args); -+ printk("\n"); -+ va_end(args); -+ -+ ext3cow_handle_error(sb); -+} -+ -+static const char *ext3cow_decode_error(struct super_block * sb, int errno, -+ char nbuf[16]) -+{ -+ char *errstr = NULL; -+ -+ switch (errno) { -+ case -EIO: -+ errstr = "IO failure"; -+ break; -+ case -ENOMEM: -+ errstr = "Out of memory"; -+ break; -+ case -EROFS: -+ if (!sb || EXT3COW_SB(sb)->s_journal->j_flags & JFS_ABORT) -+ errstr = "Journal has aborted"; -+ else -+ errstr = "Readonly filesystem"; -+ break; -+ default: -+ /* If the caller passed in an extra buffer for unknown -+ * errors, textualise them now. Else we just return -+ * NULL. */ -+ if (nbuf) { -+ /* Check for truncated error codes... */ -+ if (snprintf(nbuf, 16, "error %d", -errno) >= 0) -+ errstr = nbuf; -+ } -+ break; -+ } -+ -+ return errstr; -+} -+ -+/* __ext3cow_std_error decodes expected errors from journaling functions -+ * automatically and invokes the appropriate error response. */ -+ -+void __ext3cow_std_error (struct super_block * sb, const char * function, -+ int errno) -+{ -+ char nbuf[16]; -+ const char *errstr; -+ -+ /* Special case: if the error is EROFS, and we're not already -+ * inside a transaction, then there's really no point in logging -+ * an error. */ -+ if (errno == -EROFS && journal_current_handle() == NULL && -+ (sb->s_flags & MS_RDONLY)) -+ return; -+ -+ errstr = ext3cow_decode_error(sb, errno, nbuf); -+ printk (KERN_CRIT "EXT3COW-fs error (device %s) in %s: %s\n", -+ sb->s_id, function, errstr); -+ -+ ext3cow_handle_error(sb); -+} -+ -+/* -+ * ext3cow_abort is a much stronger failure handler than ext3cow_error. The -+ * abort function may be used to deal with unrecoverable failures such -+ * as journal IO errors or ENOMEM at a critical moment in log management. -+ * -+ * We unconditionally force the filesystem into an ABORT|READONLY state, -+ * unless the error response on the fs has been set to panic in which -+ * case we take the easy way out and panic immediately. -+ */ -+ -+void ext3cow_abort (struct super_block * sb, const char * function, -+ const char * fmt, ...) -+{ -+ va_list args; -+ -+ printk (KERN_CRIT "ext3cow_abort called.\n"); -+ -+ va_start(args, fmt); -+ printk(KERN_CRIT "EXT3COW-fs error (device %s): %s: ",sb->s_id, function); -+ vprintk(fmt, args); -+ printk("\n"); -+ va_end(args); -+ -+ if (test_opt(sb, ERRORS_PANIC)) -+ panic("EXT3COW-fs panic from previous error\n"); -+ -+ if (sb->s_flags & MS_RDONLY) -+ return; -+ -+ printk(KERN_CRIT "Remounting filesystem read-only\n"); -+ EXT3COW_SB(sb)->s_mount_state |= EXT3COW_ERROR_FS; -+ sb->s_flags |= MS_RDONLY; -+ EXT3COW_SB(sb)->s_mount_opt |= EXT3COW_MOUNT_ABORT; -+ journal_abort(EXT3COW_SB(sb)->s_journal, -EIO); -+} -+ -+void ext3cow_warning (struct super_block * sb, const char * function, -+ const char * fmt, ...) -+{ -+ va_list args; -+ -+ va_start(args, fmt); -+ printk(KERN_WARNING "EXT3COW-fs warning (device %s): %s: ", -+ sb->s_id, function); -+ vprintk(fmt, args); -+ printk("\n"); -+ va_end(args); -+} -+ -+void ext3cow_update_dynamic_rev(struct super_block *sb) -+{ -+ struct ext3cow_super_block *es = EXT3COW_SB(sb)->s_es; -+ -+ if (le32_to_cpu(es->s_rev_level) > EXT3COW_GOOD_OLD_REV) -+ return; -+ -+ ext3cow_warning(sb, __FUNCTION__, -+ "updating to rev %d because of new feature flag, " -+ "running e2fsck is recommended", -+ EXT3COW_DYNAMIC_REV); -+ -+ es->s_first_ino = cpu_to_le32(EXT3COW_GOOD_OLD_FIRST_INO); -+ es->s_inode_size = cpu_to_le16(EXT3COW_GOOD_OLD_INODE_SIZE); -+ es->s_rev_level = cpu_to_le32(EXT3COW_DYNAMIC_REV); -+ /* leave es->s_feature_*compat flags alone */ -+ /* es->s_uuid will be set by e2fsck if empty */ -+ -+ /* -+ * The rest of the superblock fields should be zero, and if not it -+ * means they are likely already in use, so leave them alone. We -+ * can leave it up to e2fsck to clean up any inconsistencies there. -+ */ -+} -+ -+/* -+ * Open the external journal device -+ */ -+static struct block_device *ext3cow_blkdev_get(dev_t dev) -+{ -+ struct block_device *bdev; -+ char b[BDEVNAME_SIZE]; -+ -+ bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE); -+ if (IS_ERR(bdev)) -+ goto fail; -+ return bdev; -+ -+fail: -+ printk(KERN_ERR "EXT3COW: failed to open journal device %s: %ld\n", -+ __bdevname(dev, b), PTR_ERR(bdev)); -+ return NULL; -+} -+ -+/* -+ * Release the journal device -+ */ -+static int ext3cow_blkdev_put(struct block_device *bdev) -+{ -+ bd_release(bdev); -+ return blkdev_put(bdev); -+} -+ -+static int ext3cow_blkdev_remove(struct ext3cow_sb_info *sbi) -+{ -+ struct block_device *bdev; -+ int ret = -ENODEV; -+ -+ bdev = sbi->journal_bdev; -+ if (bdev) { -+ ret = ext3cow_blkdev_put(bdev); -+ sbi->journal_bdev = NULL; -+ } -+ return ret; -+} -+ -+static inline struct inode *orphan_list_entry(struct list_head *l) -+{ -+ return &list_entry(l, struct ext3cow_inode_info, i_orphan)->vfs_inode; -+} -+ -+static void dump_orphan_list(struct super_block *sb, struct ext3cow_sb_info *sbi) -+{ -+ struct list_head *l; -+ -+ printk(KERN_ERR "sb orphan head is %d\n", -+ le32_to_cpu(sbi->s_es->s_last_orphan)); -+ -+ printk(KERN_ERR "sb_info orphan list:\n"); -+ list_for_each(l, &sbi->s_orphan) { -+ struct inode *inode = orphan_list_entry(l); -+ printk(KERN_ERR " " -+ "inode %s:%lu at %p: mode %o, nlink %d, next %d\n", -+ inode->i_sb->s_id, inode->i_ino, inode, -+ inode->i_mode, inode->i_nlink, -+ NEXT_ORPHAN(inode)); -+ } -+} -+ -+static void ext3cow_put_super (struct super_block * sb) -+{ -+ struct ext3cow_sb_info *sbi = EXT3COW_SB(sb); -+ struct ext3cow_super_block *es = sbi->s_es; -+ int i; -+ -+ ext3cow_xattr_put_super(sb); -+ journal_destroy(sbi->s_journal); -+ if (!(sb->s_flags & MS_RDONLY)) { -+ EXT3COW_CLEAR_INCOMPAT_FEATURE(sb, EXT3COW_FEATURE_INCOMPAT_RECOVER); -+ es->s_state = cpu_to_le16(sbi->s_mount_state); -+ BUFFER_TRACE(sbi->s_sbh, "marking dirty"); -+ mark_buffer_dirty(sbi->s_sbh); -+ ext3cow_commit_super(sb, es, 1); -+ } -+ -+ for (i = 0; i < sbi->s_gdb_count; i++) -+ brelse(sbi->s_group_desc[i]); -+ kfree(sbi->s_group_desc); -+ percpu_counter_destroy(&sbi->s_freeblocks_counter); -+ percpu_counter_destroy(&sbi->s_freeinodes_counter); -+ percpu_counter_destroy(&sbi->s_dirs_counter); -+ brelse(sbi->s_sbh); -+#ifdef CONFIG_QUOTA -+ for (i = 0; i < MAXQUOTAS; i++) -+ kfree(sbi->s_qf_names[i]); -+#endif -+ -+ /* Debugging code just in case the in-memory inode orphan list -+ * isn't empty. The on-disk one can be non-empty if we've -+ * detected an error and taken the fs readonly, but the -+ * in-memory list had better be clean by this point. */ -+ if (!list_empty(&sbi->s_orphan)) -+ dump_orphan_list(sb, sbi); -+ J_ASSERT(list_empty(&sbi->s_orphan)); -+ -+ invalidate_bdev(sb->s_bdev, 0); -+ if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) { -+ /* -+ * Invalidate the journal device's buffers. We don't want them -+ * floating about in memory - the physical journal device may -+ * hotswapped, and it breaks the `ro-after' testing code. -+ */ -+ sync_blockdev(sbi->journal_bdev); -+ invalidate_bdev(sbi->journal_bdev, 0); -+ ext3cow_blkdev_remove(sbi); -+ } -+ sb->s_fs_info = NULL; -+ kfree(sbi); -+ return; -+} -+ -+static struct kmem_cache *ext3cow_inode_cachep; -+ -+/* -+ * Called inside transaction, so use GFP_NOFS -+ */ -+static struct inode *ext3cow_alloc_inode(struct super_block *sb) -+{ -+ struct ext3cow_inode_info *ei; -+ -+ ei = kmem_cache_alloc(ext3cow_inode_cachep, GFP_NOFS); -+ if (!ei) -+ return NULL; -+#ifdef CONFIG_EXT3COW_FS_POSIX_ACL -+ ei->i_acl = EXT3COW_ACL_NOT_CACHED; -+ ei->i_default_acl = EXT3COW_ACL_NOT_CACHED; -+#endif -+ ei->i_block_alloc_info = NULL; -+ ei->vfs_inode.i_version = 1; -+ return &ei->vfs_inode; -+} -+ -+static void ext3cow_destroy_inode(struct inode *inode) -+{ -+ kmem_cache_free(ext3cow_inode_cachep, EXT3COW_I(inode)); -+} -+ -+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) -+{ -+ struct ext3cow_inode_info *ei = (struct ext3cow_inode_info *) foo; -+ -+ if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == -+ SLAB_CTOR_CONSTRUCTOR) { -+ INIT_LIST_HEAD(&ei->i_orphan); -+#ifdef CONFIG_EXT3COW_FS_XATTR -+ init_rwsem(&ei->xattr_sem); -+#endif -+ mutex_init(&ei->truncate_mutex); -+ inode_init_once(&ei->vfs_inode); -+ } -+} -+ -+static int init_inodecache(void) -+{ -+ ext3cow_inode_cachep = kmem_cache_create("ext3cow_inode_cache", -+ sizeof(struct ext3cow_inode_info), -+ 0, (SLAB_RECLAIM_ACCOUNT| -+ SLAB_MEM_SPREAD), -+ init_once, NULL); -+ if (ext3cow_inode_cachep == NULL) -+ return -ENOMEM; -+ return 0; -+} -+ -+static void destroy_inodecache(void) -+{ -+ kmem_cache_destroy(ext3cow_inode_cachep); -+} -+ -+static void ext3cow_clear_inode(struct inode *inode) -+{ -+ struct ext3cow_block_alloc_info *rsv = EXT3COW_I(inode)->i_block_alloc_info; -+#ifdef CONFIG_EXT3COW_FS_POSIX_ACL -+ if (EXT3COW_I(inode)->i_acl && -+ EXT3COW_I(inode)->i_acl != EXT3COW_ACL_NOT_CACHED) { -+ posix_acl_release(EXT3COW_I(inode)->i_acl); -+ EXT3COW_I(inode)->i_acl = EXT3COW_ACL_NOT_CACHED; -+ } -+ if (EXT3COW_I(inode)->i_default_acl && -+ EXT3COW_I(inode)->i_default_acl != EXT3COW_ACL_NOT_CACHED) { -+ posix_acl_release(EXT3COW_I(inode)->i_default_acl); -+ EXT3COW_I(inode)->i_default_acl = EXT3COW_ACL_NOT_CACHED; -+ } -+#endif -+ ext3cow_discard_reservation(inode); -+ EXT3COW_I(inode)->i_block_alloc_info = NULL; -+ if (unlikely(rsv)) -+ kfree(rsv); -+} -+ -+static inline void ext3cow_show_quota_options(struct seq_file *seq, struct super_block *sb) -+{ -+#if defined(CONFIG_QUOTA) -+ struct ext3cow_sb_info *sbi = EXT3COW_SB(sb); -+ -+ if (sbi->s_jquota_fmt) -+ seq_printf(seq, ",jqfmt=%s", -+ (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold": "vfsv0"); -+ -+ if (sbi->s_qf_names[USRQUOTA]) -+ seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); -+ -+ if (sbi->s_qf_names[GRPQUOTA]) -+ seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); -+ -+ if (sbi->s_mount_opt & EXT3COW_MOUNT_USRQUOTA) -+ seq_puts(seq, ",usrquota"); -+ -+ if (sbi->s_mount_opt & EXT3COW_MOUNT_GRPQUOTA) -+ seq_puts(seq, ",grpquota"); -+#endif -+} -+ -+static int ext3cow_show_options(struct seq_file *seq, struct vfsmount *vfs) -+{ -+ struct super_block *sb = vfs->mnt_sb; -+ -+ if (test_opt(sb, DATA_FLAGS) == EXT3COW_MOUNT_JOURNAL_DATA) -+ seq_puts(seq, ",data=journal"); -+ else if (test_opt(sb, DATA_FLAGS) == EXT3COW_MOUNT_ORDERED_DATA) -+ seq_puts(seq, ",data=ordered"); -+ else if (test_opt(sb, DATA_FLAGS) == EXT3COW_MOUNT_WRITEBACK_DATA) -+ seq_puts(seq, ",data=writeback"); -+ -+ ext3cow_show_quota_options(seq, sb); -+ -+ return 0; -+} -+ -+ -+static struct dentry *ext3cow_get_dentry(struct super_block *sb, void *vobjp) -+{ -+ __u32 *objp = vobjp; -+ unsigned long ino = objp[0]; -+ __u32 generation = objp[1]; -+ struct inode *inode; -+ struct dentry *result; -+ -+ if (ino < EXT3COW_FIRST_INO(sb) && ino != EXT3COW_ROOT_INO) -+ return ERR_PTR(-ESTALE); -+ if (ino > le32_to_cpu(EXT3COW_SB(sb)->s_es->s_inodes_count)) -+ return ERR_PTR(-ESTALE); -+ -+ /* iget isn't really right if the inode is currently unallocated!! -+ * -+ * ext3cow_read_inode will return a bad_inode if the inode had been -+ * deleted, so we should be safe. -+ * -+ * Currently we don't know the generation for parent directory, so -+ * a generation of 0 means "accept any" -+ */ -+ inode = iget(sb, ino); -+ if (inode == NULL) -+ return ERR_PTR(-ENOMEM); -+ if (is_bad_inode(inode) || -+ (generation && inode->i_generation != generation)) { -+ iput(inode); -+ return ERR_PTR(-ESTALE); -+ } -+ /* now to find a dentry. -+ * If possible, get a well-connected one -+ */ -+ result = d_alloc_anon(inode); -+ if (!result) { -+ iput(inode); -+ return ERR_PTR(-ENOMEM); -+ } -+ return result; -+} -+ -+#ifdef CONFIG_QUOTA -+#define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group") -+#define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) -+ -+static int ext3cow_dquot_initialize(struct inode *inode, int type); -+static int ext3cow_dquot_drop(struct inode *inode); -+static int ext3cow_write_dquot(struct dquot *dquot); -+static int ext3cow_acquire_dquot(struct dquot *dquot); -+static int ext3cow_release_dquot(struct dquot *dquot); -+static int ext3cow_mark_dquot_dirty(struct dquot *dquot); -+static int ext3cow_write_info(struct super_block *sb, int type); -+static int ext3cow_quota_on(struct super_block *sb, int type, int format_id, char *path); -+static int ext3cow_quota_on_mount(struct super_block *sb, int type); -+static ssize_t ext3cow_quota_read(struct super_block *sb, int type, char *data, -+ size_t len, loff_t off); -+static ssize_t ext3cow_quota_write(struct super_block *sb, int type, -+ const char *data, size_t len, loff_t off); -+ -+static struct dquot_operations ext3cow_quota_operations = { -+ .initialize = ext3cow_dquot_initialize, -+ .drop = ext3cow_dquot_drop, -+ .alloc_space = dquot_alloc_space, -+ .alloc_inode = dquot_alloc_inode, -+ .free_space = dquot_free_space, -+ .free_inode = dquot_free_inode, -+ .transfer = dquot_transfer, -+ .write_dquot = ext3cow_write_dquot, -+ .acquire_dquot = ext3cow_acquire_dquot, -+ .release_dquot = ext3cow_release_dquot, -+ .mark_dirty = ext3cow_mark_dquot_dirty, -+ .write_info = ext3cow_write_info -+}; -+ -+static struct quotactl_ops ext3cow_qctl_operations = { -+ .quota_on = ext3cow_quota_on, -+ .quota_off = vfs_quota_off, -+ .quota_sync = vfs_quota_sync, -+ .get_info = vfs_get_dqinfo, -+ .set_info = vfs_set_dqinfo, -+ .get_dqblk = vfs_get_dqblk, -+ .set_dqblk = vfs_set_dqblk -+}; -+#endif -+ -+static struct super_operations ext3cow_sops = { -+ .alloc_inode = ext3cow_alloc_inode, -+ .destroy_inode = ext3cow_destroy_inode, -+ .read_inode = ext3cow_read_inode, -+ .write_inode = ext3cow_write_inode, -+ .dirty_inode = ext3cow_dirty_inode, -+ .delete_inode = ext3cow_delete_inode, -+ .put_super = ext3cow_put_super, -+ .write_super = ext3cow_write_super, -+ .sync_fs = ext3cow_sync_fs, -+ .write_super_lockfs = ext3cow_write_super_lockfs, -+ .unlockfs = ext3cow_unlockfs, -+ .statfs = ext3cow_statfs, -+ .remount_fs = ext3cow_remount, -+ .clear_inode = ext3cow_clear_inode, -+ .show_options = ext3cow_show_options, -+#ifdef CONFIG_QUOTA -+ .quota_read = ext3cow_quota_read, -+ .quota_write = ext3cow_quota_write, -+#endif -+}; -+ -+static struct export_operations ext3cow_export_ops = { -+ .get_parent = ext3cow_get_parent, -+ .get_dentry = ext3cow_get_dentry, -+}; -+ -+enum { -+ Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, -+ Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, -+ Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, -+ Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, -+ Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, -+ Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, -+ Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, -+ Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, -+ Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, -+ Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, -+ Opt_grpquota -+}; -+ -+static match_table_t tokens = { -+ {Opt_bsd_df, "bsddf"}, -+ {Opt_minix_df, "minixdf"}, -+ {Opt_grpid, "grpid"}, -+ {Opt_grpid, "bsdgroups"}, -+ {Opt_nogrpid, "nogrpid"}, -+ {Opt_nogrpid, "sysvgroups"}, -+ {Opt_resgid, "resgid=%u"}, -+ {Opt_resuid, "resuid=%u"}, -+ {Opt_sb, "sb=%u"}, -+ {Opt_err_cont, "errors=continue"}, -+ {Opt_err_panic, "errors=panic"}, -+ {Opt_err_ro, "errors=remount-ro"}, -+ {Opt_nouid32, "nouid32"}, -+ {Opt_nocheck, "nocheck"}, -+ {Opt_nocheck, "check=none"}, -+ {Opt_debug, "debug"}, -+ {Opt_oldalloc, "oldalloc"}, -+ {Opt_orlov, "orlov"}, -+ {Opt_user_xattr, "user_xattr"}, -+ {Opt_nouser_xattr, "nouser_xattr"}, -+ {Opt_acl, "acl"}, -+ {Opt_noacl, "noacl"}, -+ {Opt_reservation, "reservation"}, -+ {Opt_noreservation, "noreservation"}, -+ {Opt_noload, "noload"}, -+ {Opt_nobh, "nobh"}, -+ {Opt_bh, "bh"}, -+ {Opt_commit, "commit=%u"}, -+ {Opt_journal_update, "journal=update"}, -+ {Opt_journal_inum, "journal=%u"}, -+ {Opt_journal_dev, "journal_dev=%u"}, -+ {Opt_abort, "abort"}, -+ {Opt_data_journal, "data=journal"}, -+ {Opt_data_ordered, "data=ordered"}, -+ {Opt_data_writeback, "data=writeback"}, -+ {Opt_offusrjquota, "usrjquota="}, -+ {Opt_usrjquota, "usrjquota=%s"}, -+ {Opt_offgrpjquota, "grpjquota="}, -+ {Opt_grpjquota, "grpjquota=%s"}, -+ {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, -+ {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, -+ {Opt_grpquota, "grpquota"}, -+ {Opt_noquota, "noquota"}, -+ {Opt_quota, "quota"}, -+ {Opt_usrquota, "usrquota"}, -+ {Opt_barrier, "barrier=%u"}, -+ {Opt_err, NULL}, -+ {Opt_resize, "resize"}, -+}; -+ -+static ext3cow_fsblk_t get_sb_block(void **data) -+{ -+ ext3cow_fsblk_t sb_block; -+ char *options = (char *) *data; -+ -+ if (!options || strncmp(options, "sb=", 3) != 0) -+ return 1; /* Default location */ -+ options += 3; -+ /*todo: use simple_strtoll with >32bit ext3cow */ -+ sb_block = simple_strtoul(options, &options, 0); -+ if (*options && *options != ',') { -+ printk("EXT3COW-fs: Invalid sb specification: %s\n", -+ (char *) *data); -+ return 1; -+ } -+ if (*options == ',') -+ options++; -+ *data = (void *) options; -+ return sb_block; -+} -+ -+static int parse_options (char *options, struct super_block *sb, -+ unsigned int *inum, unsigned long *journal_devnum, -+ ext3cow_fsblk_t *n_blocks_count, int is_remount) -+{ -+ struct ext3cow_sb_info *sbi = EXT3COW_SB(sb); -+ char * p; -+ substring_t args[MAX_OPT_ARGS]; -+ int data_opt = 0; -+ int option; -+#ifdef CONFIG_QUOTA -+ int qtype; -+ char *qname; -+#endif -+ -+ if (!options) -+ return 1; -+ -+ while ((p = strsep (&options, ",")) != NULL) { -+ int token; -+ if (!*p) -+ continue; -+ -+ token = match_token(p, tokens, args); -+ switch (token) { -+ case Opt_bsd_df: -+ clear_opt (sbi->s_mount_opt, MINIX_DF); -+ break; -+ case Opt_minix_df: -+ set_opt (sbi->s_mount_opt, MINIX_DF); -+ break; -+ case Opt_grpid: -+ set_opt (sbi->s_mount_opt, GRPID); -+ break; -+ case Opt_nogrpid: -+ clear_opt (sbi->s_mount_opt, GRPID); -+ break; -+ case Opt_resuid: -+ if (match_int(&args[0], &option)) -+ return 0; -+ sbi->s_resuid = option; -+ break; -+ case Opt_resgid: -+ if (match_int(&args[0], &option)) -+ return 0; -+ sbi->s_resgid = option; -+ break; -+ case Opt_sb: -+ /* handled by get_sb_block() instead of here */ -+ /* *sb_block = match_int(&args[0]); */ -+ break; -+ case Opt_err_panic: -+ clear_opt (sbi->s_mount_opt, ERRORS_CONT); -+ clear_opt (sbi->s_mount_opt, ERRORS_RO); -+ set_opt (sbi->s_mount_opt, ERRORS_PANIC); -+ break; -+ case Opt_err_ro: -+ clear_opt (sbi->s_mount_opt, ERRORS_CONT); -+ clear_opt (sbi->s_mount_opt, ERRORS_PANIC); -+ set_opt (sbi->s_mount_opt, ERRORS_RO); -+ break; -+ case Opt_err_cont: -+ clear_opt (sbi->s_mount_opt, ERRORS_RO); -+ clear_opt (sbi->s_mount_opt, ERRORS_PANIC); -+ set_opt (sbi->s_mount_opt, ERRORS_CONT); -+ break; -+ case Opt_nouid32: -+ set_opt (sbi->s_mount_opt, NO_UID32); -+ break; -+ case Opt_nocheck: -+ clear_opt (sbi->s_mount_opt, CHECK); -+ break; -+ case Opt_debug: -+ set_opt (sbi->s_mount_opt, DEBUG); -+ break; -+ case Opt_oldalloc: -+ set_opt (sbi->s_mount_opt, OLDALLOC); -+ break; -+ case Opt_orlov: -+ clear_opt (sbi->s_mount_opt, OLDALLOC); -+ break; -+#ifdef CONFIG_EXT3COW_FS_XATTR -+ case Opt_user_xattr: -+ set_opt (sbi->s_mount_opt, XATTR_USER); -+ break; -+ case Opt_nouser_xattr: -+ clear_opt (sbi->s_mount_opt, XATTR_USER); -+ break; -+#else -+ case Opt_user_xattr: -+ case Opt_nouser_xattr: -+ printk("EXT3COW (no)user_xattr options not supported\n"); -+ break; -+#endif -+#ifdef CONFIG_EXT3COW_FS_POSIX_ACL -+ case Opt_acl: -+ set_opt(sbi->s_mount_opt, POSIX_ACL); -+ break; -+ case Opt_noacl: -+ clear_opt(sbi->s_mount_opt, POSIX_ACL); -+ break; -+#else -+ case Opt_acl: -+ case Opt_noacl: -+ printk("EXT3COW (no)acl options not supported\n"); -+ break; -+#endif -+ case Opt_reservation: -+ set_opt(sbi->s_mount_opt, RESERVATION); -+ break; -+ case Opt_noreservation: -+ clear_opt(sbi->s_mount_opt, RESERVATION); -+ break; -+ case Opt_journal_update: -+ /* @@@ FIXME */ -+ /* Eventually we will want to be able to create -+ a journal file here. For now, only allow the -+ user to specify an existing inode to be the -+ journal file. */ -+ if (is_remount) { -+ printk(KERN_ERR "EXT3COW-fs: cannot specify " -+ "journal on remount\n"); -+ return 0; -+ } -+ set_opt (sbi->s_mount_opt, UPDATE_JOURNAL); -+ break; -+ case Opt_journal_inum: -+ if (is_remount) { -+ printk(KERN_ERR "EXT3COW-fs: cannot specify " -+ "journal on remount\n"); -+ return 0; -+ } -+ if (match_int(&args[0], &option)) -+ return 0; -+ *inum = option; -+ break; -+ case Opt_journal_dev: -+ if (is_remount) { -+ printk(KERN_ERR "EXT3COW-fs: cannot specify " -+ "journal on remount\n"); -+ return 0; -+ } -+ if (match_int(&args[0], &option)) -+ return 0; -+ *journal_devnum = option; -+ break; -+ case Opt_noload: -+ set_opt (sbi->s_mount_opt, NOLOAD); -+ break; -+ case Opt_commit: -+ if (match_int(&args[0], &option)) -+ return 0; -+ if (option < 0) -+ return 0; -+ if (option == 0) -+ option = JBD_DEFAULT_MAX_COMMIT_AGE; -+ sbi->s_commit_interval = HZ * option; -+ break; -+ case Opt_data_journal: -+ data_opt = EXT3COW_MOUNT_JOURNAL_DATA; -+ goto datacheck; -+ case Opt_data_ordered: -+ data_opt = EXT3COW_MOUNT_ORDERED_DATA; -+ goto datacheck; -+ case Opt_data_writeback: -+ data_opt = EXT3COW_MOUNT_WRITEBACK_DATA; -+ datacheck: -+ if (is_remount) { -+ if ((sbi->s_mount_opt & EXT3COW_MOUNT_DATA_FLAGS) -+ != data_opt) { -+ printk(KERN_ERR -+ "EXT3COW-fs: cannot change data " -+ "mode on remount\n"); -+ return 0; -+ } -+ } else { -+ sbi->s_mount_opt &= ~EXT3COW_MOUNT_DATA_FLAGS; -+ sbi->s_mount_opt |= data_opt; -+ } -+ break; -+#ifdef CONFIG_QUOTA -+ case Opt_usrjquota: -+ qtype = USRQUOTA; -+ goto set_qf_name; -+ case Opt_grpjquota: -+ qtype = GRPQUOTA; -+set_qf_name: -+ if (sb_any_quota_enabled(sb)) { -+ printk(KERN_ERR -+ "EXT3COW-fs: Cannot change journalled " -+ "quota options when quota turned on.\n"); -+ return 0; -+ } -+ qname = match_strdup(&args[0]); -+ if (!qname) { -+ printk(KERN_ERR -+ "EXT3COW-fs: not enough memory for " -+ "storing quotafile name.\n"); -+ return 0; -+ } -+ if (sbi->s_qf_names[qtype] && -+ strcmp(sbi->s_qf_names[qtype], qname)) { -+ printk(KERN_ERR -+ "EXT3COW-fs: %s quota file already " -+ "specified.\n", QTYPE2NAME(qtype)); -+ kfree(qname); -+ return 0; -+ } -+ sbi->s_qf_names[qtype] = qname; -+ if (strchr(sbi->s_qf_names[qtype], '/')) { -+ printk(KERN_ERR -+ "EXT3COW-fs: quotafile must be on " -+ "filesystem root.\n"); -+ kfree(sbi->s_qf_names[qtype]); -+ sbi->s_qf_names[qtype] = NULL; -+ return 0; -+ } -+ set_opt(sbi->s_mount_opt, QUOTA); -+ break; -+ case Opt_offusrjquota: -+ qtype = USRQUOTA; -+ goto clear_qf_name; -+ case Opt_offgrpjquota: -+ qtype = GRPQUOTA; -+clear_qf_name: -+ if (sb_any_quota_enabled(sb)) { -+ printk(KERN_ERR "EXT3COW-fs: Cannot change " -+ "journalled quota options when " -+ "quota turned on.\n"); -+ return 0; -+ } -+ /* -+ * The space will be released later when all options -+ * are confirmed to be correct -+ */ -+ sbi->s_qf_names[qtype] = NULL; -+ break; -+ case Opt_jqfmt_vfsold: -+ sbi->s_jquota_fmt = QFMT_VFS_OLD; -+ break; -+ case Opt_jqfmt_vfsv0: -+ sbi->s_jquota_fmt = QFMT_VFS_V0; -+ break; -+ case Opt_quota: -+ case Opt_usrquota: -+ set_opt(sbi->s_mount_opt, QUOTA); -+ set_opt(sbi->s_mount_opt, USRQUOTA); -+ break; -+ case Opt_grpquota: -+ set_opt(sbi->s_mount_opt, QUOTA); -+ set_opt(sbi->s_mount_opt, GRPQUOTA); -+ break; -+ case Opt_noquota: -+ if (sb_any_quota_enabled(sb)) { -+ printk(KERN_ERR "EXT3COW-fs: Cannot change quota " -+ "options when quota turned on.\n"); -+ return 0; -+ } -+ clear_opt(sbi->s_mount_opt, QUOTA); -+ clear_opt(sbi->s_mount_opt, USRQUOTA); -+ clear_opt(sbi->s_mount_opt, GRPQUOTA); -+ break; -+#else -+ case Opt_quota: -+ case Opt_usrquota: -+ case Opt_grpquota: -+ case Opt_usrjquota: -+ case Opt_grpjquota: -+ case Opt_offusrjquota: -+ case Opt_offgrpjquota: -+ case Opt_jqfmt_vfsold: -+ case Opt_jqfmt_vfsv0: -+ printk(KERN_ERR -+ "EXT3COW-fs: journalled quota options not " -+ "supported.\n"); -+ break; -+ case Opt_noquota: -+ break; -+#endif -+ case Opt_abort: -+ set_opt(sbi->s_mount_opt, ABORT); -+ break; -+ case Opt_barrier: -+ if (match_int(&args[0], &option)) -+ return 0; -+ if (option) -+ set_opt(sbi->s_mount_opt, BARRIER); -+ else -+ clear_opt(sbi->s_mount_opt, BARRIER); -+ break; -+ case Opt_ignore: -+ break; -+ case Opt_resize: -+ if (!is_remount) { -+ printk("EXT3COW-fs: resize option only available " -+ "for remount\n"); -+ return 0; -+ } -+ if (match_int(&args[0], &option) != 0) -+ return 0; -+ *n_blocks_count = option; -+ break; -+ case Opt_nobh: -+ set_opt(sbi->s_mount_opt, NOBH); -+ break; -+ case Opt_bh: -+ clear_opt(sbi->s_mount_opt, NOBH); -+ break; -+ default: -+ printk (KERN_ERR -+ "EXT3COW-fs: Unrecognized mount option \"%s\" " -+ "or missing value\n", p); -+ return 0; -+ } -+ } -+#ifdef CONFIG_QUOTA -+ if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { -+ if ((sbi->s_mount_opt & EXT3COW_MOUNT_USRQUOTA) && -+ sbi->s_qf_names[USRQUOTA]) -+ clear_opt(sbi->s_mount_opt, USRQUOTA); -+ -+ if ((sbi->s_mount_opt & EXT3COW_MOUNT_GRPQUOTA) && -+ sbi->s_qf_names[GRPQUOTA]) -+ clear_opt(sbi->s_mount_opt, GRPQUOTA); -+ -+ if ((sbi->s_qf_names[USRQUOTA] && -+ (sbi->s_mount_opt & EXT3COW_MOUNT_GRPQUOTA)) || -+ (sbi->s_qf_names[GRPQUOTA] && -+ (sbi->s_mount_opt & EXT3COW_MOUNT_USRQUOTA))) { -+ printk(KERN_ERR "EXT3COW-fs: old and new quota " -+ "format mixing.\n"); -+ return 0; -+ } -+ -+ if (!sbi->s_jquota_fmt) { -+ printk(KERN_ERR "EXT3COW-fs: journalled quota format " -+ "not specified.\n"); -+ return 0; -+ } -+ } else { -+ if (sbi->s_jquota_fmt) { -+ printk(KERN_ERR "EXT3COW-fs: journalled quota format " -+ "specified with no journalling " -+ "enabled.\n"); -+ return 0; -+ } -+ } -+#endif -+ return 1; -+} -+ -+static int ext3cow_setup_super(struct super_block *sb, struct ext3cow_super_block *es, -+ int read_only) -+{ -+ struct ext3cow_sb_info *sbi = EXT3COW_SB(sb); -+ int res = 0; -+ -+ if (le32_to_cpu(es->s_rev_level) > EXT3COW_MAX_SUPP_REV) { -+ printk (KERN_ERR "EXT3COW-fs warning: revision level too high, " -+ "forcing read-only mode\n"); -+ res = MS_RDONLY; -+ } -+ if (read_only) -+ return res; -+ if (!(sbi->s_mount_state & EXT3COW_VALID_FS)) -+ printk (KERN_WARNING "EXT3COW-fs warning: mounting unchecked fs, " -+ "running e2fsck is recommended\n"); -+ else if ((sbi->s_mount_state & EXT3COW_ERROR_FS)) -+ printk (KERN_WARNING -+ "EXT3COW-fs warning: mounting fs with errors, " -+ "running e2fsck is recommended\n"); -+ else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 && -+ le16_to_cpu(es->s_mnt_count) >= -+ (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) -+ printk (KERN_WARNING -+ "EXT3COW-fs warning: maximal mount count reached, " -+ "running e2fsck is recommended\n"); -+ else if (le32_to_cpu(es->s_checkinterval) && -+ (le32_to_cpu(es->s_lastcheck) + -+ le32_to_cpu(es->s_checkinterval) <= get_seconds())) -+ printk (KERN_WARNING -+ "EXT3COW-fs warning: checktime reached, " -+ "running e2fsck is recommended\n"); -+#if 0 -+ /* @@@ We _will_ want to clear the valid bit if we find -+ inconsistencies, to force a fsck at reboot. But for -+ a plain journaled filesystem we can keep it set as -+ valid forever! :) */ -+ es->s_state = cpu_to_le16(le16_to_cpu(es->s_state) & ~EXT3COW_VALID_FS); -+#endif -+ if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) -+ es->s_max_mnt_count = cpu_to_le16(EXT3COW_DFL_MAX_MNT_COUNT); -+ es->s_mnt_count=cpu_to_le16(le16_to_cpu(es->s_mnt_count) + 1); -+ es->s_mtime = cpu_to_le32(get_seconds()); -+ ext3cow_update_dynamic_rev(sb); -+ EXT3COW_SET_INCOMPAT_FEATURE(sb, EXT3COW_FEATURE_INCOMPAT_RECOVER); -+ -+ ext3cow_commit_super(sb, es, 1); -+ if (test_opt(sb, DEBUG)) -+ printk(KERN_INFO "[EXT3COW FS bs=%lu, gc=%lu, " -+ "bpg=%lu, ipg=%lu, mo=%04lx]\n", -+ sb->s_blocksize, -+ sbi->s_groups_count, -+ EXT3COW_BLOCKS_PER_GROUP(sb), -+ EXT3COW_INODES_PER_GROUP(sb), -+ sbi->s_mount_opt); -+ -+ printk(KERN_INFO "EXT3COW FS on %s, ", sb->s_id); -+ if (EXT3COW_SB(sb)->s_journal->j_inode == NULL) { -+ char b[BDEVNAME_SIZE]; -+ -+ printk("external journal on %s\n", -+ bdevname(EXT3COW_SB(sb)->s_journal->j_dev, b)); -+ } else { -+ printk("internal journal\n"); -+ } -+ return res; -+} -+ -+/* Called at mount-time, super-block is locked */ -+static int ext3cow_check_descriptors (struct super_block * sb) -+{ -+ struct ext3cow_sb_info *sbi = EXT3COW_SB(sb); -+ ext3cow_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block); -+ ext3cow_fsblk_t last_block; -+ struct ext3cow_group_desc * gdp = NULL; -+ int desc_block = 0; -+ int i; -+ -+ ext3cow_debug ("Checking group descriptors"); -+ -+ for (i = 0; i < sbi->s_groups_count; i++) -+ { -+ if (i == sbi->s_groups_count - 1) -+ last_block = le32_to_cpu(sbi->s_es->s_blocks_count) - 1; -+ else -+ last_block = first_block + -+ (EXT3COW_BLOCKS_PER_GROUP(sb) - 1); -+ -+ if ((i % EXT3COW_DESC_PER_BLOCK(sb)) == 0) -+ gdp = (struct ext3cow_group_desc *) -+ sbi->s_group_desc[desc_block++]->b_data; -+ if (le32_to_cpu(gdp->bg_block_bitmap) < first_block || -+ le32_to_cpu(gdp->bg_block_bitmap) > last_block) -+ { -+ ext3cow_error (sb, "ext3cow_check_descriptors", -+ "Block bitmap for group %d" -+ " not in group (block %lu)!", -+ i, (unsigned long) -+ le32_to_cpu(gdp->bg_block_bitmap)); -+ return 0; -+ } -+ if (le32_to_cpu(gdp->bg_inode_bitmap) < first_block || -+ le32_to_cpu(gdp->bg_inode_bitmap) > last_block) -+ { -+ ext3cow_error (sb, "ext3cow_check_descriptors", -+ "Inode bitmap for group %d" -+ " not in group (block %lu)!", -+ i, (unsigned long) -+ le32_to_cpu(gdp->bg_inode_bitmap)); -+ return 0; -+ } -+ if (le32_to_cpu(gdp->bg_inode_table) < first_block || -+ le32_to_cpu(gdp->bg_inode_table) + sbi->s_itb_per_group > -+ last_block) -+ { -+ ext3cow_error (sb, "ext3cow_check_descriptors", -+ "Inode table for group %d" -+ " not in group (block %lu)!", -+ i, (unsigned long) -+ le32_to_cpu(gdp->bg_inode_table)); -+ return 0; -+ } -+ first_block += EXT3COW_BLOCKS_PER_GROUP(sb); -+ gdp++; -+ } -+ -+ sbi->s_es->s_free_blocks_count=cpu_to_le32(ext3cow_count_free_blocks(sb)); -+ sbi->s_es->s_free_inodes_count=cpu_to_le32(ext3cow_count_free_inodes(sb)); -+ return 1; -+} -+ -+ -+/* ext3cow_orphan_cleanup() walks a singly-linked list of inodes (starting at -+ * the superblock) which were deleted from all directories, but held open by -+ * a process at the time of a crash. We walk the list and try to delete these -+ * inodes at recovery time (only with a read-write filesystem). -+ * -+ * In order to keep the orphan inode chain consistent during traversal (in -+ * case of crash during recovery), we link each inode into the superblock -+ * orphan list_head and handle it the same way as an inode deletion during -+ * normal operation (which journals the operations for us). -+ * -+ * We only do an iget() and an iput() on each inode, which is very safe if we -+ * accidentally point at an in-use or already deleted inode. The worst that -+ * can happen in this case is that we get a "bit already cleared" message from -+ * ext3cow_free_inode(). The only reason we would point at a wrong inode is if -+ * e2fsck was run on this filesystem, and it must have already done the orphan -+ * inode cleanup for us, so we can safely abort without any further action. -+ */ -+static void ext3cow_orphan_cleanup (struct super_block * sb, -+ struct ext3cow_super_block * es) -+{ -+ unsigned int s_flags = sb->s_flags; -+ int nr_orphans = 0, nr_truncates = 0; -+#ifdef CONFIG_QUOTA -+ int i; -+#endif -+ if (!es->s_last_orphan) { -+ jbd_debug(4, "no orphan inodes to clean up\n"); -+ return; -+ } -+ -+ if (bdev_read_only(sb->s_bdev)) { -+ printk(KERN_ERR "EXT3COW-fs: write access " -+ "unavailable, skipping orphan cleanup.\n"); -+ return; -+ } -+ -+ if (EXT3COW_SB(sb)->s_mount_state & EXT3COW_ERROR_FS) { -+ if (es->s_last_orphan) -+ jbd_debug(1, "Errors on filesystem, " -+ "clearing orphan list.\n"); -+ es->s_last_orphan = 0; -+ jbd_debug(1, "Skipping orphan recovery on fs with errors.\n"); -+ return; -+ } -+ -+ if (s_flags & MS_RDONLY) { -+ printk(KERN_INFO "EXT3COW-fs: %s: orphan cleanup on readonly fs\n", -+ sb->s_id); -+ sb->s_flags &= ~MS_RDONLY; -+ } -+#ifdef CONFIG_QUOTA -+ /* Needed for iput() to work correctly and not trash data */ -+ sb->s_flags |= MS_ACTIVE; -+ /* Turn on quotas so that they are updated correctly */ -+ for (i = 0; i < MAXQUOTAS; i++) { -+ if (EXT3COW_SB(sb)->s_qf_names[i]) { -+ int ret = ext3cow_quota_on_mount(sb, i); -+ if (ret < 0) -+ printk(KERN_ERR -+ "EXT3COW-fs: Cannot turn on journalled " -+ "quota: error %d\n", ret); -+ } -+ } -+#endif -+ -+ while (es->s_last_orphan) { -+ struct inode *inode; -+ -+ if (!(inode = -+ ext3cow_orphan_get(sb, le32_to_cpu(es->s_last_orphan)))) { -+ es->s_last_orphan = 0; -+ break; -+ } -+ -+ list_add(&EXT3COW_I(inode)->i_orphan, &EXT3COW_SB(sb)->s_orphan); -+ DQUOT_INIT(inode); -+ if (inode->i_nlink) { -+ printk(KERN_DEBUG -+ "%s: truncating inode %lu to %Ld bytes\n", -+ __FUNCTION__, inode->i_ino, inode->i_size); -+ jbd_debug(2, "truncating inode %lu to %Ld bytes\n", -+ inode->i_ino, inode->i_size); -+ ext3cow_truncate(inode); -+ nr_truncates++; -+ } else { -+ printk(KERN_DEBUG -+ "%s: deleting unreferenced inode %lu\n", -+ __FUNCTION__, inode->i_ino); -+ jbd_debug(2, "deleting unreferenced inode %lu\n", -+ inode->i_ino); -+ nr_orphans++; -+ } -+ iput(inode); /* The delete magic happens here! */ -+ } -+ -+#define PLURAL(x) (x), ((x)==1) ? "" : "s" -+ -+ if (nr_orphans) -+ printk(KERN_INFO "EXT3COW-fs: %s: %d orphan inode%s deleted\n", -+ sb->s_id, PLURAL(nr_orphans)); -+ if (nr_truncates) -+ printk(KERN_INFO "EXT3COW-fs: %s: %d truncate%s cleaned up\n", -+ sb->s_id, PLURAL(nr_truncates)); -+#ifdef CONFIG_QUOTA -+ /* Turn quotas off */ -+ for (i = 0; i < MAXQUOTAS; i++) { -+ if (sb_dqopt(sb)->files[i]) -+ vfs_quota_off(sb, i); -+ } -+#endif -+ sb->s_flags = s_flags; /* Restore MS_RDONLY status */ -+} -+ -+/* -+ * Maximal file size. There is a direct, and {,double-,triple-}indirect -+ * block limit, and also a limit of (2^32 - 1) 512-byte sectors in i_blocks. -+ * We need to be 1 filesystem block less than the 2^32 sector limit. -+ */ -+static loff_t ext3cow_max_size(int bits) -+{ -+ loff_t res = EXT3COW_NDIR_BLOCKS; -+ /* This constant is calculated to be the largest file size for a -+ * dense, 4k-blocksize file such that the total number of -+ * sectors in the file, including data and all indirect blocks, -+ * does not exceed 2^32. */ -+ const loff_t upper_limit = 0x1ff7fffd000LL; -+ -+ res += 1LL << (bits-2); -+ res += 1LL << (2*(bits-2)); -+ res += 1LL << (3*(bits-2)); -+ res <<= bits; -+ if (res > upper_limit) -+ res = upper_limit; -+ return res; -+} -+ -+static ext3cow_fsblk_t descriptor_loc(struct super_block *sb, -+ ext3cow_fsblk_t logic_sb_block, -+ int nr) -+{ -+ struct ext3cow_sb_info *sbi = EXT3COW_SB(sb); -+ unsigned long bg, first_meta_bg; -+ int has_super = 0; -+ -+ first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg); -+ -+ if (!EXT3COW_HAS_INCOMPAT_FEATURE(sb, EXT3COW_FEATURE_INCOMPAT_META_BG) || -+ nr < first_meta_bg) -+ return (logic_sb_block + nr + 1); -+ bg = sbi->s_desc_per_block * nr; -+ if (ext3cow_bg_has_super(sb, bg)) -+ has_super = 1; -+ return (has_super + ext3cow_group_first_block_no(sb, bg)); -+} -+ -+ -+static int ext3cow_fill_super (struct super_block *sb, void *data, int silent) -+{ -+ struct buffer_head * bh; -+ struct ext3cow_super_block *es = NULL; -+ struct ext3cow_sb_info *sbi; -+ ext3cow_fsblk_t block; -+ ext3cow_fsblk_t sb_block = get_sb_block(&data); -+ ext3cow_fsblk_t logic_sb_block; -+ unsigned long offset = 0; -+ unsigned int journal_inum = 0; -+ unsigned long journal_devnum = 0; -+ unsigned long def_mount_opts; -+ struct inode *root; -+ int blocksize; -+ int hblock; -+ int db_count; -+ int i; -+ int needs_recovery; -+ __le32 features; -+ -+ sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); -+ if (!sbi) -+ return -ENOMEM; -+ sb->s_fs_info = sbi; -+ sbi->s_mount_opt = 0; -+ sbi->s_resuid = EXT3COW_DEF_RESUID; -+ sbi->s_resgid = EXT3COW_DEF_RESGID; -+ -+ unlock_kernel(); -+ -+ blocksize = sb_min_blocksize(sb, EXT3COW_MIN_BLOCK_SIZE); -+ if (!blocksize) { -+ printk(KERN_ERR "EXT3COW-fs: unable to set blocksize\n"); -+ goto out_fail; -+ } -+ -+ /* -+ * The ext3cow superblock will not be buffer aligned for other than 1kB -+ * block sizes. We need to calculate the offset from buffer start. -+ */ -+ if (blocksize != EXT3COW_MIN_BLOCK_SIZE) { -+ logic_sb_block = (sb_block * EXT3COW_MIN_BLOCK_SIZE) / blocksize; -+ offset = (sb_block * EXT3COW_MIN_BLOCK_SIZE) % blocksize; -+ } else { -+ logic_sb_block = sb_block; -+ } -+ -+ if (!(bh = sb_bread(sb, logic_sb_block))) { -+ printk (KERN_ERR "EXT3COW-fs: unable to read superblock\n"); -+ goto out_fail; -+ } -+ /* -+ * Note: s_es must be initialized as soon as possible because -+ * some ext3cow macro-instructions depend on its value -+ */ -+ es = (struct ext3cow_super_block *) (((char *)bh->b_data) + offset); -+ sbi->s_es = es; -+ sb->s_magic = le16_to_cpu(es->s_magic); -+ if (sb->s_magic != EXT3COW_SUPER_MAGIC) -+ goto cantfind_ext3cow; -+ -+ /* Set defaults before we parse the mount options */ -+ def_mount_opts = le32_to_cpu(es->s_default_mount_opts); -+ if (def_mount_opts & EXT3COW_DEFM_DEBUG) -+ set_opt(sbi->s_mount_opt, DEBUG); -+ if (def_mount_opts & EXT3COW_DEFM_BSDGROUPS) -+ set_opt(sbi->s_mount_opt, GRPID); -+ if (def_mount_opts & EXT3COW_DEFM_UID16) -+ set_opt(sbi->s_mount_opt, NO_UID32); -+#ifdef CONFIG_EXT3COW_FS_XATTR -+ if (def_mount_opts & EXT3COW_DEFM_XATTR_USER) -+ set_opt(sbi->s_mount_opt, XATTR_USER); -+#endif -+#ifdef CONFIG_EXT3COW_FS_POSIX_ACL -+ if (def_mount_opts & EXT3COW_DEFM_ACL) -+ set_opt(sbi->s_mount_opt, POSIX_ACL); -+#endif -+ if ((def_mount_opts & EXT3COW_DEFM_JMODE) == EXT3COW_DEFM_JMODE_DATA) -+ sbi->s_mount_opt |= EXT3COW_MOUNT_JOURNAL_DATA; -+ else if ((def_mount_opts & EXT3COW_DEFM_JMODE) == EXT3COW_DEFM_JMODE_ORDERED) -+ sbi->s_mount_opt |= EXT3COW_MOUNT_ORDERED_DATA; -+ else if ((def_mount_opts & EXT3COW_DEFM_JMODE) == EXT3COW_DEFM_JMODE_WBACK) -+ sbi->s_mount_opt |= EXT3COW_MOUNT_WRITEBACK_DATA; -+ -+ if (le16_to_cpu(sbi->s_es->s_errors) == EXT3COW_ERRORS_PANIC) -+ set_opt(sbi->s_mount_opt, ERRORS_PANIC); -+ else if (le16_to_cpu(sbi->s_es->s_errors) == EXT3COW_ERRORS_RO) -+ set_opt(sbi->s_mount_opt, ERRORS_RO); -+ else -+ set_opt(sbi->s_mount_opt, ERRORS_CONT); -+ -+ sbi->s_resuid = le16_to_cpu(es->s_def_resuid); -+ sbi->s_resgid = le16_to_cpu(es->s_def_resgid); -+ -+ set_opt(sbi->s_mount_opt, RESERVATION); -+ -+ if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum, -+ NULL, 0)) -+ goto failed_mount; -+ -+ sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | -+ ((sbi->s_mount_opt & EXT3COW_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); -+ -+ if (le32_to_cpu(es->s_rev_level) == EXT3COW_GOOD_OLD_REV && -+ (EXT3COW_HAS_COMPAT_FEATURE(sb, ~0U) || -+ EXT3COW_HAS_RO_COMPAT_FEATURE(sb, ~0U) || -+ EXT3COW_HAS_INCOMPAT_FEATURE(sb, ~0U))) -+ printk(KERN_WARNING -+ "EXT3COW-fs warning: feature flags set on rev 0 fs, " -+ "running e2fsck is recommended\n"); -+ /* -+ * Check feature flags regardless of the revision level, since we -+ * previously didn't change the revision level when setting the flags, -+ * so there is a chance incompat flags are set on a rev 0 filesystem. -+ */ -+ features = EXT3COW_HAS_INCOMPAT_FEATURE(sb, ~EXT3COW_FEATURE_INCOMPAT_SUPP); -+ if (features) { -+ printk(KERN_ERR "EXT3COW-fs: %s: couldn't mount because of " -+ "unsupported optional features (%x).\n", -+ sb->s_id, le32_to_cpu(features)); -+ goto failed_mount; -+ } -+ features = EXT3COW_HAS_RO_COMPAT_FEATURE(sb, ~EXT3COW_FEATURE_RO_COMPAT_SUPP); -+ if (!(sb->s_flags & MS_RDONLY) && features) { -+ printk(KERN_ERR "EXT3COW-fs: %s: couldn't mount RDWR because of " -+ "unsupported optional features (%x).\n", -+ sb->s_id, le32_to_cpu(features)); -+ goto failed_mount; -+ } -+ blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); -+ -+ if (blocksize < EXT3COW_MIN_BLOCK_SIZE || -+ blocksize > EXT3COW_MAX_BLOCK_SIZE) { -+ printk(KERN_ERR -+ "EXT3COW-fs: Unsupported filesystem blocksize %d on %s.\n", -+ blocksize, sb->s_id); -+ goto failed_mount; -+ } -+ -+ hblock = bdev_hardsect_size(sb->s_bdev); -+ if (sb->s_blocksize != blocksize) { -+ /* -+ * Make sure the blocksize for the filesystem is larger -+ * than the hardware sectorsize for the machine. -+ */ -+ if (blocksize < hblock) { -+ printk(KERN_ERR "EXT3COW-fs: blocksize %d too small for " -+ "device blocksize %d.\n", blocksize, hblock); -+ goto failed_mount; -+ } -+ -+ brelse (bh); -+ sb_set_blocksize(sb, blocksize); -+ logic_sb_block = (sb_block * EXT3COW_MIN_BLOCK_SIZE) / blocksize; -+ offset = (sb_block * EXT3COW_MIN_BLOCK_SIZE) % blocksize; -+ bh = sb_bread(sb, logic_sb_block); -+ if (!bh) { -+ printk(KERN_ERR -+ "EXT3COW-fs: Can't read superblock on 2nd try.\n"); -+ goto failed_mount; -+ } -+ es = (struct ext3cow_super_block *)(((char *)bh->b_data) + offset); -+ sbi->s_es = es; -+ if (es->s_magic != cpu_to_le16(EXT3COW_SUPER_MAGIC)) { -+ printk (KERN_ERR -+ "EXT3COW-fs: Magic mismatch, very weird !\n"); -+ goto failed_mount; -+ } -+ } -+ -+ sb->s_maxbytes = ext3cow_max_size(sb->s_blocksize_bits); -+ -+ if (le32_to_cpu(es->s_rev_level) == EXT3COW_GOOD_OLD_REV) { -+ sbi->s_inode_size = EXT3COW_GOOD_OLD_INODE_SIZE; -+ sbi->s_first_ino = EXT3COW_GOOD_OLD_FIRST_INO; -+ } else { -+ sbi->s_inode_size = le16_to_cpu(es->s_inode_size); -+ sbi->s_first_ino = le32_to_cpu(es->s_first_ino); -+ if ((sbi->s_inode_size < EXT3COW_GOOD_OLD_INODE_SIZE) || -+ (sbi->s_inode_size & (sbi->s_inode_size - 1)) || -+ (sbi->s_inode_size > blocksize)) { -+ printk (KERN_ERR -+ "EXT3COW-fs: unsupported inode size: %d\n", -+ sbi->s_inode_size); -+ goto failed_mount; -+ } -+ } -+ sbi->s_frag_size = EXT3COW_MIN_FRAG_SIZE << -+ le32_to_cpu(es->s_log_frag_size); -+ if (blocksize != sbi->s_frag_size) { -+ printk(KERN_ERR -+ "EXT3COW-fs: fragsize %lu != blocksize %u (unsupported)\n", -+ sbi->s_frag_size, blocksize); -+ goto failed_mount; -+ } -+ sbi->s_frags_per_block = 1; -+ sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); -+ sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group); -+ sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); -+ if (EXT3COW_INODE_SIZE(sb) == 0) -+ goto cantfind_ext3cow; -+ sbi->s_inodes_per_block = blocksize / EXT3COW_INODE_SIZE(sb); -+ if (sbi->s_inodes_per_block == 0) -+ goto cantfind_ext3cow; -+ sbi->s_itb_per_group = sbi->s_inodes_per_group / -+ sbi->s_inodes_per_block; -+ sbi->s_desc_per_block = blocksize / sizeof(struct ext3cow_group_desc); -+ sbi->s_sbh = bh; -+ sbi->s_mount_state = le16_to_cpu(es->s_state); -+ sbi->s_addr_per_block_bits = ilog2(EXT3COW_ADDR_PER_BLOCK(sb)); -+ sbi->s_desc_per_block_bits = ilog2(EXT3COW_DESC_PER_BLOCK(sb)); -+ for (i=0; i < 4; i++) -+ sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); -+ sbi->s_def_hash_version = es->s_def_hash_version; -+ -+ /* Epoch number for versioning -znjp */ -+ sbi->s_epoch_number = le32_to_cpu(es->s_epoch_number); -+ printk(KERN_INFO "EXT3COW-fs: System epoch number: %u\n", -+ sbi->s_epoch_number); -+ -+ if (sbi->s_blocks_per_group > blocksize * 8) { -+ printk (KERN_ERR -+ "EXT3COW-fs: #blocks per group too big: %lu\n", -+ sbi->s_blocks_per_group); -+ goto failed_mount; -+ } -+ if (sbi->s_frags_per_group > blocksize * 8) { -+ printk (KERN_ERR -+ "EXT3COW-fs: #fragments per group too big: %lu\n", -+ sbi->s_frags_per_group); -+ goto failed_mount; -+ } -+ if (sbi->s_inodes_per_group > blocksize * 8) { -+ printk (KERN_ERR -+ "EXT3COW-fs: #inodes per group too big: %lu\n", -+ sbi->s_inodes_per_group); -+ goto failed_mount; -+ } -+ -+ if (le32_to_cpu(es->s_blocks_count) > -+ (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { -+ printk(KERN_ERR "EXT3COW-fs: filesystem on %s:" -+ " too large to mount safely\n", sb->s_id); -+ if (sizeof(sector_t) < 8) -+ printk(KERN_WARNING "EXT3COW-fs: CONFIG_LBD not " -+ "enabled\n"); -+ goto failed_mount; -+ } -+ -+ if (EXT3COW_BLOCKS_PER_GROUP(sb) == 0) -+ goto cantfind_ext3cow; -+ sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) - -+ le32_to_cpu(es->s_first_data_block) - 1) -+ / EXT3COW_BLOCKS_PER_GROUP(sb)) + 1; -+ db_count = (sbi->s_groups_count + EXT3COW_DESC_PER_BLOCK(sb) - 1) / -+ EXT3COW_DESC_PER_BLOCK(sb); -+ sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *), -+ GFP_KERNEL); -+ if (sbi->s_group_desc == NULL) { -+ printk (KERN_ERR "EXT3COW-fs: not enough memory\n"); -+ goto failed_mount; -+ } -+ -+ bgl_lock_init(&sbi->s_blockgroup_lock); -+ -+ for (i = 0; i < db_count; i++) { -+ block = descriptor_loc(sb, logic_sb_block, i); -+ sbi->s_group_desc[i] = sb_bread(sb, block); -+ if (!sbi->s_group_desc[i]) { -+ printk (KERN_ERR "EXT3COW-fs: " -+ "can't read group descriptor %d\n", i); -+ db_count = i; -+ goto failed_mount2; -+ } -+ } -+ if (!ext3cow_check_descriptors (sb)) { -+ printk(KERN_ERR "EXT3COW-fs: group descriptors corrupted!\n"); -+ goto failed_mount2; -+ } -+ sbi->s_gdb_count = db_count; -+ get_random_bytes(&sbi->s_next_generation, sizeof(u32)); -+ spin_lock_init(&sbi->s_next_gen_lock); -+ -+ percpu_counter_init(&sbi->s_freeblocks_counter, -+ ext3cow_count_free_blocks(sb)); -+ percpu_counter_init(&sbi->s_freeinodes_counter, -+ ext3cow_count_free_inodes(sb)); -+ percpu_counter_init(&sbi->s_dirs_counter, -+ ext3cow_count_dirs(sb)); -+ -+ /* per fileystem reservation list head & lock */ -+ spin_lock_init(&sbi->s_rsv_window_lock); -+ sbi->s_rsv_window_root = RB_ROOT; -+ /* Add a single, static dummy reservation to the start of the -+ * reservation window list --- it gives us a placeholder for -+ * append-at-start-of-list which makes the allocation logic -+ * _much_ simpler. */ -+ sbi->s_rsv_window_head.rsv_start = EXT3COW_RESERVE_WINDOW_NOT_ALLOCATED; -+ sbi->s_rsv_window_head.rsv_end = EXT3COW_RESERVE_WINDOW_NOT_ALLOCATED; -+ sbi->s_rsv_window_head.rsv_alloc_hit = 0; -+ sbi->s_rsv_window_head.rsv_goal_size = 0; -+ ext3cow_rsv_window_add(sb, &sbi->s_rsv_window_head); -+ -+ /* -+ * set up enough so that it can read an inode -+ */ -+ sb->s_op = &ext3cow_sops; -+ sb->s_export_op = &ext3cow_export_ops; -+ sb->s_xattr = ext3cow_xattr_handlers; -+#ifdef CONFIG_QUOTA -+ sb->s_qcop = &ext3cow_qctl_operations; -+ sb->dq_op = &ext3cow_quota_operations; -+#endif -+ INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ -+ -+ sb->s_root = NULL; -+ -+ needs_recovery = (es->s_last_orphan != 0 || -+ EXT3COW_HAS_INCOMPAT_FEATURE(sb, -+ EXT3COW_FEATURE_INCOMPAT_RECOVER)); -+ -+ /* -+ * The first inode we look at is the journal inode. Don't try -+ * root first: it may be modified in the journal! -+ */ -+ if (!test_opt(sb, NOLOAD) && -+ EXT3COW_HAS_COMPAT_FEATURE(sb, EXT3COW_FEATURE_COMPAT_HAS_JOURNAL)) { -+ if (ext3cow_load_journal(sb, es, journal_devnum)) -+ goto failed_mount3; -+ } else if (journal_inum) { -+ if (ext3cow_create_journal(sb, es, journal_inum)) -+ goto failed_mount3; -+ } else { -+ if (!silent) -+ printk (KERN_ERR -+ "ext3cow: No journal on filesystem on %s\n", -+ sb->s_id); -+ goto failed_mount3; -+ } -+ -+ /* We have now updated the journal if required, so we can -+ * validate the data journaling mode. */ -+ switch (test_opt(sb, DATA_FLAGS)) { -+ case 0: -+ /* No mode set, assume a default based on the journal -+ capabilities: ORDERED_DATA if the journal can -+ cope, else JOURNAL_DATA */ -+ if (journal_check_available_features -+ (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)) -+ set_opt(sbi->s_mount_opt, ORDERED_DATA); -+ else -+ set_opt(sbi->s_mount_opt, JOURNAL_DATA); -+ break; -+ -+ case EXT3COW_MOUNT_ORDERED_DATA: -+ case EXT3COW_MOUNT_WRITEBACK_DATA: -+ if (!journal_check_available_features -+ (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)) { -+ printk(KERN_ERR "EXT3COW-fs: Journal does not support " -+ "requested data journaling mode\n"); -+ goto failed_mount4; -+ } -+ default: -+ break; -+ } -+ -+ if (test_opt(sb, NOBH)) { -+ if (!(test_opt(sb, DATA_FLAGS) == EXT3COW_MOUNT_WRITEBACK_DATA)) { -+ printk(KERN_WARNING "EXT3COW-fs: Ignoring nobh option - " -+ "its supported only with writeback mode\n"); -+ clear_opt(sbi->s_mount_opt, NOBH); -+ } -+ } -+ /* -+ * The journal_load will have done any necessary log recovery, -+ * so we can safely mount the rest of the filesystem now. -+ */ -+ -+ root = iget(sb, EXT3COW_ROOT_INO); -+ sb->s_root = d_alloc_root(root); -+ if (!sb->s_root) { -+ printk(KERN_ERR "EXT3COW-fs: get root inode failed\n"); -+ iput(root); -+ goto failed_mount4; -+ } -+ if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { -+ dput(sb->s_root); -+ sb->s_root = NULL; -+ printk(KERN_ERR "EXT3COW-fs: corrupt root inode, run e2fsck\n"); -+ goto failed_mount4; -+ } -+ -+ ext3cow_setup_super (sb, es, sb->s_flags & MS_RDONLY); -+ /* -+ * akpm: core read_super() calls in here with the superblock locked. -+ * That deadlocks, because orphan cleanup needs to lock the superblock -+ * in numerous places. Here we just pop the lock - it's relatively -+ * harmless, because we are now ready to accept write_super() requests, -+ * and aviro says that's the only reason for hanging onto the -+ * superblock lock. -+ */ -+ EXT3COW_SB(sb)->s_mount_state |= EXT3COW_ORPHAN_FS; -+ ext3cow_orphan_cleanup(sb, es); -+ EXT3COW_SB(sb)->s_mount_state &= ~EXT3COW_ORPHAN_FS; -+ if (needs_recovery) -+ printk (KERN_INFO "EXT3COW-fs: recovery complete.\n"); -+ ext3cow_mark_recovery_complete(sb, es); -+ printk (KERN_INFO "EXT3COW-fs: mounted filesystem with %s data mode.\n", -+ test_opt(sb,DATA_FLAGS) == EXT3COW_MOUNT_JOURNAL_DATA ? "journal": -+ test_opt(sb,DATA_FLAGS) == EXT3COW_MOUNT_ORDERED_DATA ? "ordered": -+ "writeback"); -+ -+ lock_kernel(); -+ return 0; -+ -+cantfind_ext3cow: -+ if (!silent) -+ printk(KERN_ERR "VFS: Can't find ext3cow filesystem on dev %s.\n", -+ sb->s_id); -+ goto failed_mount; -+ -+failed_mount4: -+ journal_destroy(sbi->s_journal); -+failed_mount3: -+ percpu_counter_destroy(&sbi->s_freeblocks_counter); -+ percpu_counter_destroy(&sbi->s_freeinodes_counter); -+ percpu_counter_destroy(&sbi->s_dirs_counter); -+failed_mount2: -+ for (i = 0; i < db_count; i++) -+ brelse(sbi->s_group_desc[i]); -+ kfree(sbi->s_group_desc); -+failed_mount: -+#ifdef CONFIG_QUOTA -+ for (i = 0; i < MAXQUOTAS; i++) -+ kfree(sbi->s_qf_names[i]); -+#endif -+ ext3cow_blkdev_remove(sbi); -+ brelse(bh); -+out_fail: -+ sb->s_fs_info = NULL; -+ kfree(sbi); -+ lock_kernel(); -+ return -EINVAL; -+} -+ -+/* -+ * Setup any per-fs journal parameters now. We'll do this both on -+ * initial mount, once the journal has been initialised but before we've -+ * done any recovery; and again on any subsequent remount. -+ */ -+static void ext3cow_init_journal_params(struct super_block *sb, journal_t *journal) -+{ -+ struct ext3cow_sb_info *sbi = EXT3COW_SB(sb); -+ -+ if (sbi->s_commit_interval) -+ journal->j_commit_interval = sbi->s_commit_interval; -+ /* We could also set up an ext3cow-specific default for the commit -+ * interval here, but for now we'll just fall back to the jbd -+ * default. */ -+ -+ spin_lock(&journal->j_state_lock); -+ if (test_opt(sb, BARRIER)) -+ journal->j_flags |= JFS_BARRIER; -+ else -+ journal->j_flags &= ~JFS_BARRIER; -+ spin_unlock(&journal->j_state_lock); -+} -+ -+static journal_t *ext3cow_get_journal(struct super_block *sb, -+ unsigned int journal_inum) -+{ -+ struct inode *journal_inode; -+ journal_t *journal; -+ -+ /* First, test for the existence of a valid inode on disk. Bad -+ * things happen if we iget() an unused inode, as the subsequent -+ * iput() will try to delete it. */ -+ -+ journal_inode = iget(sb, journal_inum); -+ if (!journal_inode) { -+ printk(KERN_ERR "EXT3COW-fs: no journal found.\n"); -+ return NULL; -+ } -+ if (!journal_inode->i_nlink) { -+ make_bad_inode(journal_inode); -+ iput(journal_inode); -+ printk(KERN_ERR "EXT3COW-fs: journal inode is deleted.\n"); -+ return NULL; -+ } -+ -+ jbd_debug(2, "Journal inode found at %p: %Ld bytes\n", -+ journal_inode, journal_inode->i_size); -+ if (is_bad_inode(journal_inode) || !S_ISREG(journal_inode->i_mode)) { -+ printk(KERN_ERR "EXT3COW-fs: invalid journal inode.\n"); -+ iput(journal_inode); -+ return NULL; -+ } -+ -+ journal = journal_init_inode(journal_inode); -+ if (!journal) { -+ printk(KERN_ERR "EXT3COW-fs: Could not load journal inode\n"); -+ iput(journal_inode); -+ return NULL; -+ } -+ /* Make sure the journal never gets versioned -znjp */ -+ EXT3COW_I(journal_inode)->i_flags |= EXT3COW_UNVERSIONABLE_FL; -+ journal->j_private = sb; -+ ext3cow_init_journal_params(sb, journal); -+ return journal; -+} -+ -+static journal_t *ext3cow_get_dev_journal(struct super_block *sb, -+ dev_t j_dev) -+{ -+ struct buffer_head * bh; -+ journal_t *journal; -+ ext3cow_fsblk_t start; -+ ext3cow_fsblk_t len; -+ int hblock, blocksize; -+ ext3cow_fsblk_t sb_block; -+ unsigned long offset; -+ struct ext3cow_super_block * es; -+ struct block_device *bdev; -+ -+ bdev = ext3cow_blkdev_get(j_dev); -+ if (bdev == NULL) -+ return NULL; -+ -+ if (bd_claim(bdev, sb)) { -+ printk(KERN_ERR -+ "EXT3COW: failed to claim external journal device.\n"); -+ blkdev_put(bdev); -+ return NULL; -+ } -+ -+ blocksize = sb->s_blocksize; -+ hblock = bdev_hardsect_size(bdev); -+ if (blocksize < hblock) { -+ printk(KERN_ERR -+ "EXT3COW-fs: blocksize too small for journal device.\n"); -+ goto out_bdev; -+ } -+ -+ sb_block = EXT3COW_MIN_BLOCK_SIZE / blocksize; -+ offset = EXT3COW_MIN_BLOCK_SIZE % blocksize; -+ set_blocksize(bdev, blocksize); -+ if (!(bh = __bread(bdev, sb_block, blocksize))) { -+ printk(KERN_ERR "EXT3COW-fs: couldn't read superblock of " -+ "external journal\n"); -+ goto out_bdev; -+ } -+ -+ es = (struct ext3cow_super_block *) (((char *)bh->b_data) + offset); -+ if ((le16_to_cpu(es->s_magic) != EXT3COW_SUPER_MAGIC) || -+ !(le32_to_cpu(es->s_feature_incompat) & -+ EXT3COW_FEATURE_INCOMPAT_JOURNAL_DEV)) { -+ printk(KERN_ERR "EXT3COW-fs: external journal has " -+ "bad superblock\n"); -+ brelse(bh); -+ goto out_bdev; -+ } -+ -+ if (memcmp(EXT3COW_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) { -+ printk(KERN_ERR "EXT3COW-fs: journal UUID does not match\n"); -+ brelse(bh); -+ goto out_bdev; -+ } -+ -+ len = le32_to_cpu(es->s_blocks_count); -+ start = sb_block + 1; -+ brelse(bh); /* we're done with the superblock */ -+ -+ journal = journal_init_dev(bdev, sb->s_bdev, -+ start, len, blocksize); -+ if (!journal) { -+ printk(KERN_ERR "EXT3COW-fs: failed to create device journal\n"); -+ goto out_bdev; -+ } -+ journal->j_private = sb; -+ ll_rw_block(READ, 1, &journal->j_sb_buffer); -+ wait_on_buffer(journal->j_sb_buffer); -+ if (!buffer_uptodate(journal->j_sb_buffer)) { -+ printk(KERN_ERR "EXT3COW-fs: I/O error on journal device\n"); -+ goto out_journal; -+ } -+ if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) { -+ printk(KERN_ERR "EXT3COW-fs: External journal has more than one " -+ "user (unsupported) - %d\n", -+ be32_to_cpu(journal->j_superblock->s_nr_users)); -+ goto out_journal; -+ } -+ EXT3COW_SB(sb)->journal_bdev = bdev; -+ ext3cow_init_journal_params(sb, journal); -+ return journal; -+out_journal: -+ journal_destroy(journal); -+out_bdev: -+ ext3cow_blkdev_put(bdev); -+ return NULL; -+} -+ -+static int ext3cow_load_journal(struct super_block *sb, -+ struct ext3cow_super_block *es, -+ unsigned long journal_devnum) -+{ -+ journal_t *journal; -+ unsigned int journal_inum = le32_to_cpu(es->s_journal_inum); -+ dev_t journal_dev; -+ int err = 0; -+ int really_read_only; -+ -+ if (journal_devnum && -+ journal_devnum != le32_to_cpu(es->s_journal_dev)) { -+ printk(KERN_INFO "EXT3COW-fs: external journal device major/minor " -+ "numbers have changed\n"); -+ journal_dev = new_decode_dev(journal_devnum); -+ } else -+ journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev)); -+ -+ really_read_only = bdev_read_only(sb->s_bdev); -+ -+ /* -+ * Are we loading a blank journal or performing recovery after a -+ * crash? For recovery, we need to check in advance whether we -+ * can get read-write access to the device. -+ */ -+ -+ if (EXT3COW_HAS_INCOMPAT_FEATURE(sb, EXT3COW_FEATURE_INCOMPAT_RECOVER)) { -+ if (sb->s_flags & MS_RDONLY) { -+ printk(KERN_INFO "EXT3COW-fs: INFO: recovery " -+ "required on readonly filesystem.\n"); -+ if (really_read_only) { -+ printk(KERN_ERR "EXT3COW-fs: write access " -+ "unavailable, cannot proceed.\n"); -+ return -EROFS; -+ } -+ printk (KERN_INFO "EXT3COW-fs: write access will " -+ "be enabled during recovery.\n"); -+ } -+ } -+ -+ if (journal_inum && journal_dev) { -+ printk(KERN_ERR "EXT3COW-fs: filesystem has both journal " -+ "and inode journals!\n"); -+ return -EINVAL; -+ } -+ -+ if (journal_inum) { -+ if (!(journal = ext3cow_get_journal(sb, journal_inum))) -+ return -EINVAL; -+ } else { -+ if (!(journal = ext3cow_get_dev_journal(sb, journal_dev))) -+ return -EINVAL; -+ } -+ -+ if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { -+ err = journal_update_format(journal); -+ if (err) { -+ printk(KERN_ERR "EXT3COW-fs: error updating journal.\n"); -+ journal_destroy(journal); -+ return err; -+ } -+ } -+ -+ if (!EXT3COW_HAS_INCOMPAT_FEATURE(sb, EXT3COW_FEATURE_INCOMPAT_RECOVER)) -+ err = journal_wipe(journal, !really_read_only); -+ if (!err) -+ err = journal_load(journal); -+ -+ if (err) { -+ printk(KERN_ERR "EXT3COW-fs: error loading journal.\n"); -+ journal_destroy(journal); -+ return err; -+ } -+ -+ EXT3COW_SB(sb)->s_journal = journal; -+ ext3cow_clear_journal_err(sb, es); -+ -+ if (journal_devnum && -+ journal_devnum != le32_to_cpu(es->s_journal_dev)) { -+ es->s_journal_dev = cpu_to_le32(journal_devnum); -+ sb->s_dirt = 1; -+ -+ /* Make sure we flush the recovery flag to disk. */ -+ ext3cow_commit_super(sb, es, 1); -+ } -+ -+ return 0; -+} -+ -+static int ext3cow_create_journal(struct super_block * sb, -+ struct ext3cow_super_block * es, -+ unsigned int journal_inum) -+{ -+ journal_t *journal; -+ -+ if (sb->s_flags & MS_RDONLY) { -+ printk(KERN_ERR "EXT3COW-fs: readonly filesystem when trying to " -+ "create journal.\n"); -+ return -EROFS; -+ } -+ -+ if (!(journal = ext3cow_get_journal(sb, journal_inum))) -+ return -EINVAL; -+ -+ printk(KERN_INFO "EXT3COW-fs: creating new journal on inode %u\n", -+ journal_inum); -+ -+ if (journal_create(journal)) { -+ printk(KERN_ERR "EXT3COW-fs: error creating journal.\n"); -+ journal_destroy(journal); -+ return -EIO; -+ } -+ -+ EXT3COW_SB(sb)->s_journal = journal; -+ -+ ext3cow_update_dynamic_rev(sb); -+ EXT3COW_SET_INCOMPAT_FEATURE(sb, EXT3COW_FEATURE_INCOMPAT_RECOVER); -+ EXT3COW_SET_COMPAT_FEATURE(sb, EXT3COW_FEATURE_COMPAT_HAS_JOURNAL); -+ -+ es->s_journal_inum = cpu_to_le32(journal_inum); -+ sb->s_dirt = 1; -+ -+ /* Make sure we flush the recovery flag to disk. */ -+ ext3cow_commit_super(sb, es, 1); -+ -+ return 0; -+} -+ -+static void ext3cow_commit_super (struct super_block * sb, -+ struct ext3cow_super_block * es, -+ int sync) -+{ -+ struct buffer_head *sbh = EXT3COW_SB(sb)->s_sbh; -+ -+ if (!sbh) -+ return; -+ es->s_wtime = cpu_to_le32(get_seconds()); -+ es->s_free_blocks_count = cpu_to_le32(ext3cow_count_free_blocks(sb)); -+ es->s_free_inodes_count = cpu_to_le32(ext3cow_count_free_inodes(sb)); -+ BUFFER_TRACE(sbh, "marking dirty"); -+ mark_buffer_dirty(sbh); -+ if (sync) -+ sync_dirty_buffer(sbh); -+} -+ -+ -+/* -+ * Have we just finished recovery? If so, and if we are mounting (or -+ * remounting) the filesystem readonly, then we will end up with a -+ * consistent fs on disk. Record that fact. -+ */ -+static void ext3cow_mark_recovery_complete(struct super_block * sb, -+ struct ext3cow_super_block * es) -+{ -+ journal_t *journal = EXT3COW_SB(sb)->s_journal; -+ -+ journal_lock_updates(journal); -+ journal_flush(journal); -+ if (EXT3COW_HAS_INCOMPAT_FEATURE(sb, EXT3COW_FEATURE_INCOMPAT_RECOVER) && -+ sb->s_flags & MS_RDONLY) { -+ EXT3COW_CLEAR_INCOMPAT_FEATURE(sb, EXT3COW_FEATURE_INCOMPAT_RECOVER); -+ sb->s_dirt = 0; -+ ext3cow_commit_super(sb, es, 1); -+ } -+ journal_unlock_updates(journal); -+} -+ -+/* -+ * If we are mounting (or read-write remounting) a filesystem whose journal -+ * has recorded an error from a previous lifetime, move that error to the -+ * main filesystem now. -+ */ -+static void ext3cow_clear_journal_err(struct super_block * sb, -+ struct ext3cow_super_block * es) -+{ -+ journal_t *journal; -+ int j_errno; -+ const char *errstr; -+ -+ journal = EXT3COW_SB(sb)->s_journal; -+ -+ /* -+ * Now check for any error status which may have been recorded in the -+ * journal by a prior ext3cow_error() or ext3cow_abort() -+ */ -+ -+ j_errno = journal_errno(journal); -+ if (j_errno) { -+ char nbuf[16]; -+ -+ errstr = ext3cow_decode_error(sb, j_errno, nbuf); -+ ext3cow_warning(sb, __FUNCTION__, "Filesystem error recorded " -+ "from previous mount: %s", errstr); -+ ext3cow_warning(sb, __FUNCTION__, "Marking fs in need of " -+ "filesystem check."); -+ -+ EXT3COW_SB(sb)->s_mount_state |= EXT3COW_ERROR_FS; -+ es->s_state |= cpu_to_le16(EXT3COW_ERROR_FS); -+ ext3cow_commit_super (sb, es, 1); -+ -+ journal_clear_err(journal); -+ } -+} -+ -+/* -+ * Force the running and committing transactions to commit, -+ * and wait on the commit. -+ */ -+int ext3cow_force_commit(struct super_block *sb) -+{ -+ journal_t *journal; -+ int ret; -+ -+ if (sb->s_flags & MS_RDONLY) -+ return 0; -+ -+ journal = EXT3COW_SB(sb)->s_journal; -+ sb->s_dirt = 0; -+ ret = ext3cow_journal_force_commit(journal); -+ return ret; -+} -+ -+/* -+ * Ext3 always journals updates to the superblock itself, so we don't -+ * have to propagate any other updates to the superblock on disk at this -+ * point. Just start an async writeback to get the buffers on their way -+ * to the disk. -+ * -+ * This implicitly triggers the writebehind on sync(). -+ */ -+ -+static void ext3cow_write_super (struct super_block * sb) -+{ -+ if (mutex_trylock(&sb->s_lock) != 0) -+ BUG(); -+ sb->s_dirt = 0; -+} -+ -+static int ext3cow_sync_fs(struct super_block *sb, int wait) -+{ -+ tid_t target; -+ -+ sb->s_dirt = 0; -+ if (journal_start_commit(EXT3COW_SB(sb)->s_journal, &target)) { -+ if (wait) -+ log_wait_commit(EXT3COW_SB(sb)->s_journal, target); -+ } -+ return 0; -+} -+ -+/* -+ * LVM calls this function before a (read-only) snapshot is created. This -+ * gives us a chance to flush the journal completely and mark the fs clean. -+ */ -+static void ext3cow_write_super_lockfs(struct super_block *sb) -+{ -+ sb->s_dirt = 0; -+ -+ if (!(sb->s_flags & MS_RDONLY)) { -+ journal_t *journal = EXT3COW_SB(sb)->s_journal; -+ -+ /* Now we set up the journal barrier. */ -+ journal_lock_updates(journal); -+ journal_flush(journal); -+ -+ /* Journal blocked and flushed, clear needs_recovery flag. */ -+ EXT3COW_CLEAR_INCOMPAT_FEATURE(sb, EXT3COW_FEATURE_INCOMPAT_RECOVER); -+ ext3cow_commit_super(sb, EXT3COW_SB(sb)->s_es, 1); -+ } -+} -+ -+/* -+ * Called by LVM after the snapshot is done. We need to reset the RECOVER -+ * flag here, even though the filesystem is not technically dirty yet. -+ */ -+static void ext3cow_unlockfs(struct super_block *sb) -+{ -+ if (!(sb->s_flags & MS_RDONLY)) { -+ lock_super(sb); -+ /* Reser the needs_recovery flag before the fs is unlocked. */ -+ EXT3COW_SET_INCOMPAT_FEATURE(sb, EXT3COW_FEATURE_INCOMPAT_RECOVER); -+ ext3cow_commit_super(sb, EXT3COW_SB(sb)->s_es, 1); -+ unlock_super(sb); -+ journal_unlock_updates(EXT3COW_SB(sb)->s_journal); -+ } -+} -+ -+static int ext3cow_remount (struct super_block * sb, int * flags, char * data) -+{ -+ struct ext3cow_super_block * es; -+ struct ext3cow_sb_info *sbi = EXT3COW_SB(sb); -+ ext3cow_fsblk_t n_blocks_count = 0; -+ unsigned long old_sb_flags; -+ struct ext3cow_mount_options old_opts; -+ int err; -+#ifdef CONFIG_QUOTA -+ int i; -+#endif -+ -+ /* Store the original options */ -+ old_sb_flags = sb->s_flags; -+ old_opts.s_mount_opt = sbi->s_mount_opt; -+ old_opts.s_resuid = sbi->s_resuid; -+ old_opts.s_resgid = sbi->s_resgid; -+ old_opts.s_commit_interval = sbi->s_commit_interval; -+#ifdef CONFIG_QUOTA -+ old_opts.s_jquota_fmt = sbi->s_jquota_fmt; -+ for (i = 0; i < MAXQUOTAS; i++) -+ old_opts.s_qf_names[i] = sbi->s_qf_names[i]; -+#endif -+ -+ /* -+ * Allow the "check" option to be passed as a remount option. -+ */ -+ if (!parse_options(data, sb, NULL, NULL, &n_blocks_count, 1)) { -+ err = -EINVAL; -+ goto restore_opts; -+ } -+ -+ if (sbi->s_mount_opt & EXT3COW_MOUNT_ABORT) -+ ext3cow_abort(sb, __FUNCTION__, "Abort forced by user"); -+ -+ sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | -+ ((sbi->s_mount_opt & EXT3COW_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); -+ -+ es = sbi->s_es; -+ -+ ext3cow_init_journal_params(sb, sbi->s_journal); -+ -+ if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) || -+ n_blocks_count > le32_to_cpu(es->s_blocks_count)) { -+ if (sbi->s_mount_opt & EXT3COW_MOUNT_ABORT) { -+ err = -EROFS; -+ goto restore_opts; -+ } -+ -+ if (*flags & MS_RDONLY) { -+ /* -+ * First of all, the unconditional stuff we have to do -+ * to disable replay of the journal when we next remount -+ */ -+ sb->s_flags |= MS_RDONLY; -+ -+ /* -+ * OK, test if we are remounting a valid rw partition -+ * readonly, and if so set the rdonly flag and then -+ * mark the partition as valid again. -+ */ -+ if (!(es->s_state & cpu_to_le16(EXT3COW_VALID_FS)) && -+ (sbi->s_mount_state & EXT3COW_VALID_FS)) -+ es->s_state = cpu_to_le16(sbi->s_mount_state); -+ -+ ext3cow_mark_recovery_complete(sb, es); -+ } else { -+ __le32 ret; -+ if ((ret = EXT3COW_HAS_RO_COMPAT_FEATURE(sb, -+ ~EXT3COW_FEATURE_RO_COMPAT_SUPP))) { -+ printk(KERN_WARNING "EXT3COW-fs: %s: couldn't " -+ "remount RDWR because of unsupported " -+ "optional features (%x).\n", -+ sb->s_id, le32_to_cpu(ret)); -+ err = -EROFS; -+ goto restore_opts; -+ } -+ /* -+ * Mounting a RDONLY partition read-write, so reread -+ * and store the current valid flag. (It may have -+ * been changed by e2fsck since we originally mounted -+ * the partition.) -+ */ -+ ext3cow_clear_journal_err(sb, es); -+ sbi->s_mount_state = le16_to_cpu(es->s_state); -+ if ((err = ext3cow_group_extend(sb, es, n_blocks_count))) -+ goto restore_opts; -+ if (!ext3cow_setup_super (sb, es, 0)) -+ sb->s_flags &= ~MS_RDONLY; -+ } -+ } -+#ifdef CONFIG_QUOTA -+ /* Release old quota file names */ -+ for (i = 0; i < MAXQUOTAS; i++) -+ if (old_opts.s_qf_names[i] && -+ old_opts.s_qf_names[i] != sbi->s_qf_names[i]) -+ kfree(old_opts.s_qf_names[i]); -+#endif -+ return 0; -+restore_opts: -+ sb->s_flags = old_sb_flags; -+ sbi->s_mount_opt = old_opts.s_mount_opt; -+ sbi->s_resuid = old_opts.s_resuid; -+ sbi->s_resgid = old_opts.s_resgid; -+ sbi->s_commit_interval = old_opts.s_commit_interval; -+#ifdef CONFIG_QUOTA -+ sbi->s_jquota_fmt = old_opts.s_jquota_fmt; -+ for (i = 0; i < MAXQUOTAS; i++) { -+ if (sbi->s_qf_names[i] && -+ old_opts.s_qf_names[i] != sbi->s_qf_names[i]) -+ kfree(sbi->s_qf_names[i]); -+ sbi->s_qf_names[i] = old_opts.s_qf_names[i]; -+ } -+#endif -+ return err; -+} -+ -+static int ext3cow_statfs (struct dentry * dentry, struct kstatfs * buf) -+{ -+ struct super_block *sb = dentry->d_sb; -+ struct ext3cow_sb_info *sbi = EXT3COW_SB(sb); -+ struct ext3cow_super_block *es = sbi->s_es; -+ ext3cow_fsblk_t overhead; -+ int i; -+ u64 fsid; -+ -+ if (test_opt (sb, MINIX_DF)) -+ overhead = 0; -+ else { -+ unsigned long ngroups; -+ ngroups = EXT3COW_SB(sb)->s_groups_count; -+ smp_rmb(); -+ -+ /* -+ * Compute the overhead (FS structures) -+ */ -+ -+ /* -+ * All of the blocks before first_data_block are -+ * overhead -+ */ -+ overhead = le32_to_cpu(es->s_first_data_block); -+ -+ /* -+ * Add the overhead attributed to the superblock and -+ * block group descriptors. If the sparse superblocks -+ * feature is turned on, then not all groups have this. -+ */ -+ for (i = 0; i < ngroups; i++) { -+ overhead += ext3cow_bg_has_super(sb, i) + -+ ext3cow_bg_num_gdb(sb, i); -+ cond_resched(); -+ } -+ -+ /* -+ * Every block group has an inode bitmap, a block -+ * bitmap, and an inode table. -+ */ -+ overhead += (ngroups * (2 + EXT3COW_SB(sb)->s_itb_per_group)); -+ } -+ -+ buf->f_type = EXT3COW_SUPER_MAGIC; -+ buf->f_bsize = sb->s_blocksize; -+ buf->f_blocks = le32_to_cpu(es->s_blocks_count) - overhead; -+ buf->f_bfree = percpu_counter_sum(&sbi->s_freeblocks_counter); -+ buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count); -+ if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count)) -+ buf->f_bavail = 0; -+ buf->f_files = le32_to_cpu(es->s_inodes_count); -+ buf->f_ffree = percpu_counter_sum(&sbi->s_freeinodes_counter); -+ buf->f_namelen = EXT3COW_NAME_LEN; -+ fsid = le64_to_cpup((void *)es->s_uuid) ^ -+ le64_to_cpup((void *)es->s_uuid + sizeof(u64)); -+ buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; -+ buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; -+ return 0; -+} -+ -+/* Helper function for writing quotas on sync - we need to start transaction before quota file -+ * is locked for write. Otherwise the are possible deadlocks: -+ * Process 1 Process 2 -+ * ext3cow_create() quota_sync() -+ * journal_start() write_dquot() -+ * DQUOT_INIT() down(dqio_mutex) -+ * down(dqio_mutex) journal_start() -+ * -+ */ -+ -+#ifdef CONFIG_QUOTA -+ -+static inline struct inode *dquot_to_inode(struct dquot *dquot) -+{ -+ return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type]; -+} -+ -+static int ext3cow_dquot_initialize(struct inode *inode, int type) -+{ -+ handle_t *handle; -+ int ret, err; -+ -+ /* We may create quota structure so we need to reserve enough blocks */ -+ handle = ext3cow_journal_start(inode, 2*EXT3COW_QUOTA_INIT_BLOCKS(inode->i_sb)); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ ret = dquot_initialize(inode, type); -+ err = ext3cow_journal_stop(handle); -+ if (!ret) -+ ret = err; -+ return ret; -+} -+ -+static int ext3cow_dquot_drop(struct inode *inode) -+{ -+ handle_t *handle; -+ int ret, err; -+ -+ /* We may delete quota structure so we need to reserve enough blocks */ -+ handle = ext3cow_journal_start(inode, 2*EXT3COW_QUOTA_DEL_BLOCKS(inode->i_sb)); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ ret = dquot_drop(inode); -+ err = ext3cow_journal_stop(handle); -+ if (!ret) -+ ret = err; -+ return ret; -+} -+ -+static int ext3cow_write_dquot(struct dquot *dquot) -+{ -+ int ret, err; -+ handle_t *handle; -+ struct inode *inode; -+ -+ inode = dquot_to_inode(dquot); -+ handle = ext3cow_journal_start(inode, -+ EXT3COW_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ ret = dquot_commit(dquot); -+ err = ext3cow_journal_stop(handle); -+ if (!ret) -+ ret = err; -+ return ret; -+} -+ -+static int ext3cow_acquire_dquot(struct dquot *dquot) -+{ -+ int ret, err; -+ handle_t *handle; -+ -+ handle = ext3cow_journal_start(dquot_to_inode(dquot), -+ EXT3COW_QUOTA_INIT_BLOCKS(dquot->dq_sb)); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ ret = dquot_acquire(dquot); -+ err = ext3cow_journal_stop(handle); -+ if (!ret) -+ ret = err; -+ return ret; -+} -+ -+static int ext3cow_release_dquot(struct dquot *dquot) -+{ -+ int ret, err; -+ handle_t *handle; -+ -+ handle = ext3cow_journal_start(dquot_to_inode(dquot), -+ EXT3COW_QUOTA_DEL_BLOCKS(dquot->dq_sb)); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ ret = dquot_release(dquot); -+ err = ext3cow_journal_stop(handle); -+ if (!ret) -+ ret = err; -+ return ret; -+} -+ -+static int ext3cow_mark_dquot_dirty(struct dquot *dquot) -+{ -+ /* Are we journalling quotas? */ -+ if (EXT3COW_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] || -+ EXT3COW_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) { -+ dquot_mark_dquot_dirty(dquot); -+ return ext3cow_write_dquot(dquot); -+ } else { -+ return dquot_mark_dquot_dirty(dquot); -+ } -+} -+ -+static int ext3cow_write_info(struct super_block *sb, int type) -+{ -+ int ret, err; -+ handle_t *handle; -+ -+ /* Data block + inode block */ -+ handle = ext3cow_journal_start(sb->s_root->d_inode, 2); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ ret = dquot_commit_info(sb, type); -+ err = ext3cow_journal_stop(handle); -+ if (!ret) -+ ret = err; -+ return ret; -+} -+ -+/* -+ * Turn on quotas during mount time - we need to find -+ * the quota file and such... -+ */ -+static int ext3cow_quota_on_mount(struct super_block *sb, int type) -+{ -+ return vfs_quota_on_mount(sb, EXT3COW_SB(sb)->s_qf_names[type], -+ EXT3COW_SB(sb)->s_jquota_fmt, type); -+} -+ -+/* -+ * Standard function to be called on quota_on -+ */ -+static int ext3cow_quota_on(struct super_block *sb, int type, int format_id, -+ char *path) -+{ -+ int err; -+ struct nameidata nd; -+ -+ if (!test_opt(sb, QUOTA)) -+ return -EINVAL; -+ /* Not journalling quota? */ -+ if (!EXT3COW_SB(sb)->s_qf_names[USRQUOTA] && -+ !EXT3COW_SB(sb)->s_qf_names[GRPQUOTA]) -+ return vfs_quota_on(sb, type, format_id, path); -+ err = path_lookup(path, LOOKUP_FOLLOW, &nd); -+ if (err) -+ return err; -+ /* Quotafile not on the same filesystem? */ -+ if (nd.mnt->mnt_sb != sb) { -+ path_release(&nd); -+ return -EXDEV; -+ } -+ /* Quotafile not of fs root? */ -+ if (nd.dentry->d_parent->d_inode != sb->s_root->d_inode) -+ printk(KERN_WARNING -+ "EXT3COW-fs: Quota file not on filesystem root. " -+ "Journalled quota will not work.\n"); -+ path_release(&nd); -+ return vfs_quota_on(sb, type, format_id, path); -+} -+ -+/* Read data from quotafile - avoid pagecache and such because we cannot afford -+ * acquiring the locks... As quota files are never truncated and quota code -+ * itself serializes the operations (and noone else should touch the files) -+ * we don't have to be afraid of races */ -+static ssize_t ext3cow_quota_read(struct super_block *sb, int type, char *data, -+ size_t len, loff_t off) -+{ -+ struct inode *inode = sb_dqopt(sb)->files[type]; -+ sector_t blk = off >> EXT3COW_BLOCK_SIZE_BITS(sb); -+ int err = 0; -+ int offset = off & (sb->s_blocksize - 1); -+ int tocopy; -+ size_t toread; -+ struct buffer_head *bh; -+ loff_t i_size = i_size_read(inode); -+ -+ if (off > i_size) -+ return 0; -+ if (off+len > i_size) -+ len = i_size-off; -+ toread = len; -+ while (toread > 0) { -+ tocopy = sb->s_blocksize - offset < toread ? -+ sb->s_blocksize - offset : toread; -+ bh = ext3cow_bread(NULL, inode, blk, 0, &err); -+ if (err) -+ return err; -+ if (!bh) /* A hole? */ -+ memset(data, 0, tocopy); -+ else -+ memcpy(data, bh->b_data+offset, tocopy); -+ brelse(bh); -+ offset = 0; -+ toread -= tocopy; -+ data += tocopy; -+ blk++; -+ } -+ return len; -+} -+ -+/* Write to quotafile (we know the transaction is already started and has -+ * enough credits) */ -+static ssize_t ext3cow_quota_write(struct super_block *sb, int type, -+ const char *data, size_t len, loff_t off) -+{ -+ struct inode *inode = sb_dqopt(sb)->files[type]; -+ sector_t blk = off >> EXT3COW_BLOCK_SIZE_BITS(sb); -+ int err = 0; -+ int offset = off & (sb->s_blocksize - 1); -+ int tocopy; -+ int journal_quota = EXT3COW_SB(sb)->s_qf_names[type] != NULL; -+ size_t towrite = len; -+ struct buffer_head *bh; -+ handle_t *handle = journal_current_handle(); -+ -+ mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); -+ while (towrite > 0) { -+ tocopy = sb->s_blocksize - offset < towrite ? -+ sb->s_blocksize - offset : towrite; -+ bh = ext3cow_bread(handle, inode, blk, 1, &err); -+ if (!bh) -+ goto out; -+ if (journal_quota) { -+ err = ext3cow_journal_get_write_access(handle, bh); -+ if (err) { -+ brelse(bh); -+ goto out; -+ } -+ } -+ lock_buffer(bh); -+ memcpy(bh->b_data+offset, data, tocopy); -+ flush_dcache_page(bh->b_page); -+ unlock_buffer(bh); -+ if (journal_quota) -+ err = ext3cow_journal_dirty_metadata(handle, bh); -+ else { -+ /* Always do at least ordered writes for quotas */ -+ err = ext3cow_journal_dirty_data(handle, bh); -+ mark_buffer_dirty(bh); -+ } -+ brelse(bh); -+ if (err) -+ goto out; -+ offset = 0; -+ towrite -= tocopy; -+ data += tocopy; -+ blk++; -+ } -+out: -+ if (len == towrite) -+ return err; -+ if (inode->i_size < off+len-towrite) { -+ i_size_write(inode, off+len-towrite); -+ EXT3COW_I(inode)->i_disksize = inode->i_size; -+ } -+ inode->i_version++; -+ inode->i_mtime = inode->i_ctime = CURRENT_TIME; -+ ext3cow_mark_inode_dirty(handle, inode); -+ mutex_unlock(&inode->i_mutex); -+ return len - towrite; -+} -+ -+#endif -+ -+static int ext3cow_get_sb(struct file_system_type *fs_type, -+ int flags, const char *dev_name, void *data, struct vfsmount *mnt) -+{ -+ return get_sb_bdev(fs_type, flags, dev_name, data, ext3cow_fill_super, mnt); -+} -+ -+/* Code to update the epoch counter in the super block -znjp */ -+unsigned int ext3cow_take_snapshot(struct super_block *sb){ -+ -+ struct ext3cow_sb_info *sbi = NULL; -+ struct ext3cow_super_block *es = NULL; -+ tid_t target; -+ -+ if(NULL == sb){ -+ printk("EXT3COW-fs: superblock is NULL when taking snapshot.\n"); -+ return -1; -+ } -+ -+ sbi = EXT3COW_SB(sb); -+ es = sbi->s_es; -+ -+ /* Sync the dirty blocks */ -+ if (journal_start_commit(EXT3COW_SB(sb)->s_journal, &target)) { -+ log_wait_commit(EXT3COW_SB(sb)->s_journal, target); -+ } -+ -+ -+ sbi->s_epoch_number = cpu_to_le32(get_seconds()); -+ es->s_epoch_number = sbi->s_epoch_number; -+ sb->s_dirt = 1; -+ -+ BUFFER_TRACE(EXT3COW_SB(sb)->s_sbh, "marking dirty"); -+ mark_buffer_dirty(sbi->s_sbh); -+ ext3cow_commit_super (sb, es, 1); -+ -+ return (unsigned int)sbi->s_epoch_number; -+} -+ -+static struct file_system_type ext3cow_fs_type = { -+ .owner = THIS_MODULE, -+ .name = "ext3cow", -+ .get_sb = ext3cow_get_sb, -+ .kill_sb = kill_block_super, -+ .fs_flags = FS_REQUIRES_DEV, -+}; -+ -+static int __init init_ext3cow_fs(void) -+{ -+ int err = init_ext3cow_xattr(); -+ if (err) -+ return err; -+ err = init_inodecache(); -+ if (err) -+ goto out1; -+ err = register_filesystem(&ext3cow_fs_type); -+ if (err) -+ goto out; -+ return 0; -+out: -+ destroy_inodecache(); -+out1: -+ exit_ext3cow_xattr(); -+ return err; -+} -+ -+static void __exit exit_ext3cow_fs(void) -+{ -+ unregister_filesystem(&ext3cow_fs_type); -+ destroy_inodecache(); -+ exit_ext3cow_xattr(); -+} -+ -+MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); -+MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions"); -+MODULE_LICENSE("GPL"); -+module_init(init_ext3cow_fs) -+module_exit(exit_ext3cow_fs) -diff -Naur linux-2.6.21.7/fs/ext3cow/symlink.c linux-2.6.21.7_ext3cowPatched/fs/ext3cow/symlink.c ---- linux-2.6.21.7/fs/ext3cow/symlink.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.21.7_ext3cowPatched/fs/ext3cow/symlink.c 2007-10-23 17:47:18.000000000 +0200 -@@ -0,0 +1,54 @@ -+/* -+ * linux/fs/ext3cow/symlink.c -+ * -+ * Only fast symlinks left here - the rest is done by generic code. AV, 1999 -+ * -+ * Copyright (C) 1992, 1993, 1994, 1995 -+ * Remy Card (card@masi.ibp.fr) -+ * Laboratoire MASI - Institut Blaise Pascal -+ * Universite Pierre et Marie Curie (Paris VI) -+ * -+ * from -+ * -+ * linux/fs/minix/symlink.c -+ * -+ * Copyright (C) 1991, 1992 Linus Torvalds -+ * -+ * ext3cow symlink handling code -+ */ -+ -+#include -+#include -+#include -+#include -+#include "xattr.h" -+ -+static void * ext3cow_follow_link(struct dentry *dentry, struct nameidata *nd) -+{ -+ struct ext3cow_inode_info *ei = EXT3COW_I(dentry->d_inode); -+ nd_set_link(nd, (char*)ei->i_data); -+ return NULL; -+} -+ -+struct inode_operations ext3cow_symlink_inode_operations = { -+ .readlink = generic_readlink, -+ .follow_link = page_follow_link_light, -+ .put_link = page_put_link, -+#ifdef CONFIG_EXT3COW_FS_XATTR -+ .setxattr = generic_setxattr, -+ .getxattr = generic_getxattr, -+ .listxattr = ext3cow_listxattr, -+ .removexattr = generic_removexattr, -+#endif -+}; -+ -+struct inode_operations ext3cow_fast_symlink_inode_operations = { -+ .readlink = generic_readlink, -+ .follow_link = ext3cow_follow_link, -+#ifdef CONFIG_EXT3COW_FS_XATTR -+ .setxattr = generic_setxattr, -+ .getxattr = generic_getxattr, -+ .listxattr = ext3cow_listxattr, -+ .removexattr = generic_removexattr, -+#endif -+}; -diff -Naur linux-2.6.21.7/fs/ext3cow/xattr.c linux-2.6.21.7_ext3cowPatched/fs/ext3cow/xattr.c ---- linux-2.6.21.7/fs/ext3cow/xattr.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.21.7_ext3cowPatched/fs/ext3cow/xattr.c 2007-10-23 17:47:18.000000000 +0200 -@@ -0,0 +1,1314 @@ -+/* -+ * linux/fs/ext3cow/xattr.c -+ * -+ * Copyright (C) 2001-2003 Andreas Gruenbacher, -+ * -+ * Fix by Harrison Xing . -+ * Ext3 code with a lot of help from Eric Jarman . -+ * Extended attributes for symlinks and special files added per -+ * suggestion of Luka Renko . -+ * xattr consolidation Copyright (c) 2004 James Morris , -+ * Red Hat Inc. -+ * ea-in-inode support by Alex Tomas aka bzzz -+ * and Andreas Gruenbacher . -+ */ -+ -+/* -+ * Extended attributes are stored directly in inodes (on file systems with -+ * inodes bigger than 128 bytes) and on additional disk blocks. The i_file_acl -+ * field contains the block number if an inode uses an additional block. All -+ * attributes must fit in the inode and one additional block. Blocks that -+ * contain the identical set of attributes may be shared among several inodes. -+ * Identical blocks are detected by keeping a cache of blocks that have -+ * recently been accessed. -+ * -+ * The attributes in inodes and on blocks have a different header; the entries -+ * are stored in the same format: -+ * -+ * +------------------+ -+ * | header | -+ * | entry 1 | | -+ * | entry 2 | | growing downwards -+ * | entry 3 | v -+ * | four null bytes | -+ * | . . . | -+ * | value 1 | ^ -+ * | value 3 | | growing upwards -+ * | value 2 | | -+ * +------------------+ -+ * -+ * The header is followed by multiple entry descriptors. In disk blocks, the -+ * entry descriptors are kept sorted. In inodes, they are unsorted. The -+ * attribute values are aligned to the end of the block in no specific order. -+ * -+ * Locking strategy -+ * ---------------- -+ * EXT3COW_I(inode)->i_file_acl is protected by EXT3COW_I(inode)->xattr_sem. -+ * EA blocks are only changed if they are exclusive to an inode, so -+ * holding xattr_sem also means that nothing but the EA block's reference -+ * count can change. Multiple writers to the same block are synchronized -+ * by the buffer lock. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "xattr.h" -+#include "acl.h" -+ -+#define BHDR(bh) ((struct ext3cow_xattr_header *)((bh)->b_data)) -+#define ENTRY(ptr) ((struct ext3cow_xattr_entry *)(ptr)) -+#define BFIRST(bh) ENTRY(BHDR(bh)+1) -+#define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0) -+ -+#define IHDR(inode, raw_inode) \ -+ ((struct ext3cow_xattr_ibody_header *) \ -+ ((void *)raw_inode + \ -+ EXT3COW_GOOD_OLD_INODE_SIZE + \ -+ EXT3COW_I(inode)->i_extra_isize)) -+#define IFIRST(hdr) ((struct ext3cow_xattr_entry *)((hdr)+1)) -+ -+#ifdef EXT3COW_XATTR_DEBUG -+# define ea_idebug(inode, f...) do { \ -+ printk(KERN_DEBUG "inode %s:%lu: ", \ -+ inode->i_sb->s_id, inode->i_ino); \ -+ printk(f); \ -+ printk("\n"); \ -+ } while (0) -+# define ea_bdebug(bh, f...) do { \ -+ char b[BDEVNAME_SIZE]; \ -+ printk(KERN_DEBUG "block %s:%lu: ", \ -+ bdevname(bh->b_bdev, b), \ -+ (unsigned long) bh->b_blocknr); \ -+ printk(f); \ -+ printk("\n"); \ -+ } while (0) -+#else -+# define ea_idebug(f...) -+# define ea_bdebug(f...) -+#endif -+ -+static void ext3cow_xattr_cache_insert(struct buffer_head *); -+static struct buffer_head *ext3cow_xattr_cache_find(struct inode *, -+ struct ext3cow_xattr_header *, -+ struct mb_cache_entry **); -+static void ext3cow_xattr_rehash(struct ext3cow_xattr_header *, -+ struct ext3cow_xattr_entry *); -+ -+static struct mb_cache *ext3cow_xattr_cache; -+ -+static struct xattr_handler *ext3cow_xattr_handler_map[] = { -+ [EXT3COW_XATTR_INDEX_USER] = &ext3cow_xattr_user_handler, -+#ifdef CONFIG_EXT3COW_FS_POSIX_ACL -+ [EXT3COW_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext3cow_xattr_acl_access_handler, -+ [EXT3COW_XATTR_INDEX_POSIX_ACL_DEFAULT] = &ext3cow_xattr_acl_default_handler, -+#endif -+ [EXT3COW_XATTR_INDEX_TRUSTED] = &ext3cow_xattr_trusted_handler, -+#ifdef CONFIG_EXT3COW_FS_SECURITY -+ [EXT3COW_XATTR_INDEX_SECURITY] = &ext3cow_xattr_security_handler, -+#endif -+}; -+ -+struct xattr_handler *ext3cow_xattr_handlers[] = { -+ &ext3cow_xattr_user_handler, -+ &ext3cow_xattr_trusted_handler, -+#ifdef CONFIG_EXT3COW_FS_POSIX_ACL -+ &ext3cow_xattr_acl_access_handler, -+ &ext3cow_xattr_acl_default_handler, -+#endif -+#ifdef CONFIG_EXT3COW_FS_SECURITY -+ &ext3cow_xattr_security_handler, -+#endif -+ NULL -+}; -+ -+static inline struct xattr_handler * -+ext3cow_xattr_handler(int name_index) -+{ -+ struct xattr_handler *handler = NULL; -+ -+ if (name_index > 0 && name_index < ARRAY_SIZE(ext3cow_xattr_handler_map)) -+ handler = ext3cow_xattr_handler_map[name_index]; -+ return handler; -+} -+ -+/* -+ * Inode operation listxattr() -+ * -+ * dentry->d_inode->i_mutex: don't care -+ */ -+ssize_t -+ext3cow_listxattr(struct dentry *dentry, char *buffer, size_t size) -+{ -+ return ext3cow_xattr_list(dentry->d_inode, buffer, size); -+} -+ -+static int -+ext3cow_xattr_check_names(struct ext3cow_xattr_entry *entry, void *end) -+{ -+ while (!IS_LAST_ENTRY(entry)) { -+ struct ext3cow_xattr_entry *next = EXT3COW_XATTR_NEXT(entry); -+ if ((void *)next >= end) -+ return -EIO; -+ entry = next; -+ } -+ return 0; -+} -+ -+static inline int -+ext3cow_xattr_check_block(struct buffer_head *bh) -+{ -+ int error; -+ -+ if (BHDR(bh)->h_magic != cpu_to_le32(EXT3COW_XATTR_MAGIC) || -+ BHDR(bh)->h_blocks != cpu_to_le32(1)) -+ return -EIO; -+ error = ext3cow_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size); -+ return error; -+} -+ -+static inline int -+ext3cow_xattr_check_entry(struct ext3cow_xattr_entry *entry, size_t size) -+{ -+ size_t value_size = le32_to_cpu(entry->e_value_size); -+ -+ if (entry->e_value_block != 0 || value_size > size || -+ le16_to_cpu(entry->e_value_offs) + value_size > size) -+ return -EIO; -+ return 0; -+} -+ -+static int -+ext3cow_xattr_find_entry(struct ext3cow_xattr_entry **pentry, int name_index, -+ const char *name, size_t size, int sorted) -+{ -+ struct ext3cow_xattr_entry *entry; -+ size_t name_len; -+ int cmp = 1; -+ -+ if (name == NULL) -+ return -EINVAL; -+ name_len = strlen(name); -+ entry = *pentry; -+ for (; !IS_LAST_ENTRY(entry); entry = EXT3COW_XATTR_NEXT(entry)) { -+ cmp = name_index - entry->e_name_index; -+ if (!cmp) -+ cmp = name_len - entry->e_name_len; -+ if (!cmp) -+ cmp = memcmp(name, entry->e_name, name_len); -+ if (cmp <= 0 && (sorted || cmp == 0)) -+ break; -+ } -+ *pentry = entry; -+ if (!cmp && ext3cow_xattr_check_entry(entry, size)) -+ return -EIO; -+ return cmp ? -ENODATA : 0; -+} -+ -+static int -+ext3cow_xattr_block_get(struct inode *inode, int name_index, const char *name, -+ void *buffer, size_t buffer_size) -+{ -+ struct buffer_head *bh = NULL; -+ struct ext3cow_xattr_entry *entry; -+ size_t size; -+ int error; -+ -+ ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", -+ name_index, name, buffer, (long)buffer_size); -+ -+ error = -ENODATA; -+ if (!EXT3COW_I(inode)->i_file_acl) -+ goto cleanup; -+ ea_idebug(inode, "reading block %u", EXT3COW_I(inode)->i_file_acl); -+ bh = sb_bread(inode->i_sb, EXT3COW_I(inode)->i_file_acl); -+ if (!bh) -+ goto cleanup; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); -+ if (ext3cow_xattr_check_block(bh)) { -+bad_block: ext3cow_error(inode->i_sb, __FUNCTION__, -+ "inode %lu: bad block "E3FSBLK, inode->i_ino, -+ EXT3COW_I(inode)->i_file_acl); -+ error = -EIO; -+ goto cleanup; -+ } -+ ext3cow_xattr_cache_insert(bh); -+ entry = BFIRST(bh); -+ error = ext3cow_xattr_find_entry(&entry, name_index, name, bh->b_size, 1); -+ if (error == -EIO) -+ goto bad_block; -+ if (error) -+ goto cleanup; -+ size = le32_to_cpu(entry->e_value_size); -+ if (buffer) { -+ error = -ERANGE; -+ if (size > buffer_size) -+ goto cleanup; -+ memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs), -+ size); -+ } -+ error = size; -+ -+cleanup: -+ brelse(bh); -+ return error; -+} -+ -+static int -+ext3cow_xattr_ibody_get(struct inode *inode, int name_index, const char *name, -+ void *buffer, size_t buffer_size) -+{ -+ struct ext3cow_xattr_ibody_header *header; -+ struct ext3cow_xattr_entry *entry; -+ struct ext3cow_inode *raw_inode; -+ struct ext3cow_iloc iloc; -+ size_t size; -+ void *end; -+ int error; -+ -+ if (!(EXT3COW_I(inode)->i_state & EXT3COW_STATE_XATTR)) -+ return -ENODATA; -+ error = ext3cow_get_inode_loc(inode, &iloc); -+ if (error) -+ return error; -+ raw_inode = ext3cow_raw_inode(&iloc); -+ header = IHDR(inode, raw_inode); -+ entry = IFIRST(header); -+ end = (void *)raw_inode + EXT3COW_SB(inode->i_sb)->s_inode_size; -+ error = ext3cow_xattr_check_names(entry, end); -+ if (error) -+ goto cleanup; -+ error = ext3cow_xattr_find_entry(&entry, name_index, name, -+ end - (void *)entry, 0); -+ if (error) -+ goto cleanup; -+ size = le32_to_cpu(entry->e_value_size); -+ if (buffer) { -+ error = -ERANGE; -+ if (size > buffer_size) -+ goto cleanup; -+ memcpy(buffer, (void *)IFIRST(header) + -+ le16_to_cpu(entry->e_value_offs), size); -+ } -+ error = size; -+ -+cleanup: -+ brelse(iloc.bh); -+ return error; -+} -+ -+/* -+ * ext3cow_xattr_get() -+ * -+ * Copy an extended attribute into the buffer -+ * provided, or compute the buffer size required. -+ * Buffer is NULL to compute the size of the buffer required. -+ * -+ * Returns a negative error number on failure, or the number of bytes -+ * used / required on success. -+ */ -+int -+ext3cow_xattr_get(struct inode *inode, int name_index, const char *name, -+ void *buffer, size_t buffer_size) -+{ -+ int error; -+ -+ down_read(&EXT3COW_I(inode)->xattr_sem); -+ error = ext3cow_xattr_ibody_get(inode, name_index, name, buffer, -+ buffer_size); -+ if (error == -ENODATA) -+ error = ext3cow_xattr_block_get(inode, name_index, name, buffer, -+ buffer_size); -+ up_read(&EXT3COW_I(inode)->xattr_sem); -+ return error; -+} -+ -+static int -+ext3cow_xattr_list_entries(struct inode *inode, struct ext3cow_xattr_entry *entry, -+ char *buffer, size_t buffer_size) -+{ -+ size_t rest = buffer_size; -+ -+ for (; !IS_LAST_ENTRY(entry); entry = EXT3COW_XATTR_NEXT(entry)) { -+ struct xattr_handler *handler = -+ ext3cow_xattr_handler(entry->e_name_index); -+ -+ if (handler) { -+ size_t size = handler->list(inode, buffer, rest, -+ entry->e_name, -+ entry->e_name_len); -+ if (buffer) { -+ if (size > rest) -+ return -ERANGE; -+ buffer += size; -+ } -+ rest -= size; -+ } -+ } -+ return buffer_size - rest; -+} -+ -+static int -+ext3cow_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size) -+{ -+ struct buffer_head *bh = NULL; -+ int error; -+ -+ ea_idebug(inode, "buffer=%p, buffer_size=%ld", -+ buffer, (long)buffer_size); -+ -+ error = 0; -+ if (!EXT3COW_I(inode)->i_file_acl) -+ goto cleanup; -+ ea_idebug(inode, "reading block %u", EXT3COW_I(inode)->i_file_acl); -+ bh = sb_bread(inode->i_sb, EXT3COW_I(inode)->i_file_acl); -+ error = -EIO; -+ if (!bh) -+ goto cleanup; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); -+ if (ext3cow_xattr_check_block(bh)) { -+ ext3cow_error(inode->i_sb, __FUNCTION__, -+ "inode %lu: bad block "E3FSBLK, inode->i_ino, -+ EXT3COW_I(inode)->i_file_acl); -+ error = -EIO; -+ goto cleanup; -+ } -+ ext3cow_xattr_cache_insert(bh); -+ error = ext3cow_xattr_list_entries(inode, BFIRST(bh), buffer, buffer_size); -+ -+cleanup: -+ brelse(bh); -+ -+ return error; -+} -+ -+static int -+ext3cow_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size) -+{ -+ struct ext3cow_xattr_ibody_header *header; -+ struct ext3cow_inode *raw_inode; -+ struct ext3cow_iloc iloc; -+ void *end; -+ int error; -+ -+ if (!(EXT3COW_I(inode)->i_state & EXT3COW_STATE_XATTR)) -+ return 0; -+ error = ext3cow_get_inode_loc(inode, &iloc); -+ if (error) -+ return error; -+ raw_inode = ext3cow_raw_inode(&iloc); -+ header = IHDR(inode, raw_inode); -+ end = (void *)raw_inode + EXT3COW_SB(inode->i_sb)->s_inode_size; -+ error = ext3cow_xattr_check_names(IFIRST(header), end); -+ if (error) -+ goto cleanup; -+ error = ext3cow_xattr_list_entries(inode, IFIRST(header), -+ buffer, buffer_size); -+ -+cleanup: -+ brelse(iloc.bh); -+ return error; -+} -+ -+/* -+ * ext3cow_xattr_list() -+ * -+ * Copy a list of attribute names into the buffer -+ * provided, or compute the buffer size required. -+ * Buffer is NULL to compute the size of the buffer required. -+ * -+ * Returns a negative error number on failure, or the number of bytes -+ * used / required on success. -+ */ -+int -+ext3cow_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) -+{ -+ int i_error, b_error; -+ -+ down_read(&EXT3COW_I(inode)->xattr_sem); -+ i_error = ext3cow_xattr_ibody_list(inode, buffer, buffer_size); -+ if (i_error < 0) { -+ b_error = 0; -+ } else { -+ if (buffer) { -+ buffer += i_error; -+ buffer_size -= i_error; -+ } -+ b_error = ext3cow_xattr_block_list(inode, buffer, buffer_size); -+ if (b_error < 0) -+ i_error = 0; -+ } -+ up_read(&EXT3COW_I(inode)->xattr_sem); -+ return i_error + b_error; -+} -+ -+/* -+ * If the EXT3COW_FEATURE_COMPAT_EXT_ATTR feature of this file system is -+ * not set, set it. -+ */ -+static void ext3cow_xattr_update_super_block(handle_t *handle, -+ struct super_block *sb) -+{ -+ if (EXT3COW_HAS_COMPAT_FEATURE(sb, EXT3COW_FEATURE_COMPAT_EXT_ATTR)) -+ return; -+ -+ if (ext3cow_journal_get_write_access(handle, EXT3COW_SB(sb)->s_sbh) == 0) { -+ EXT3COW_SET_COMPAT_FEATURE(sb, EXT3COW_FEATURE_COMPAT_EXT_ATTR); -+ sb->s_dirt = 1; -+ ext3cow_journal_dirty_metadata(handle, EXT3COW_SB(sb)->s_sbh); -+ } -+} -+ -+/* -+ * Release the xattr block BH: If the reference count is > 1, decrement -+ * it; otherwise free the block. -+ */ -+static void -+ext3cow_xattr_release_block(handle_t *handle, struct inode *inode, -+ struct buffer_head *bh) -+{ -+ struct mb_cache_entry *ce = NULL; -+ -+ ce = mb_cache_entry_get(ext3cow_xattr_cache, bh->b_bdev, bh->b_blocknr); -+ if (BHDR(bh)->h_refcount == cpu_to_le32(1)) { -+ ea_bdebug(bh, "refcount now=0; freeing"); -+ if (ce) -+ mb_cache_entry_free(ce); -+ ext3cow_free_blocks(handle, inode, bh->b_blocknr, 1); -+ get_bh(bh); -+ ext3cow_forget(handle, 1, inode, bh, bh->b_blocknr); -+ } else { -+ if (ext3cow_journal_get_write_access(handle, bh) == 0) { -+ lock_buffer(bh); -+ BHDR(bh)->h_refcount = cpu_to_le32( -+ le32_to_cpu(BHDR(bh)->h_refcount) - 1); -+ ext3cow_journal_dirty_metadata(handle, bh); -+ if (IS_SYNC(inode)) -+ handle->h_sync = 1; -+ DQUOT_FREE_BLOCK(inode, 1); -+ unlock_buffer(bh); -+ ea_bdebug(bh, "refcount now=%d; releasing", -+ le32_to_cpu(BHDR(bh)->h_refcount)); -+ } -+ if (ce) -+ mb_cache_entry_release(ce); -+ } -+} -+ -+struct ext3cow_xattr_info { -+ int name_index; -+ const char *name; -+ const void *value; -+ size_t value_len; -+}; -+ -+struct ext3cow_xattr_search { -+ struct ext3cow_xattr_entry *first; -+ void *base; -+ void *end; -+ struct ext3cow_xattr_entry *here; -+ int not_found; -+}; -+ -+static int -+ext3cow_xattr_set_entry(struct ext3cow_xattr_info *i, struct ext3cow_xattr_search *s) -+{ -+ struct ext3cow_xattr_entry *last; -+ size_t free, min_offs = s->end - s->base, name_len = strlen(i->name); -+ -+ /* Compute min_offs and last. */ -+ last = s->first; -+ for (; !IS_LAST_ENTRY(last); last = EXT3COW_XATTR_NEXT(last)) { -+ if (!last->e_value_block && last->e_value_size) { -+ size_t offs = le16_to_cpu(last->e_value_offs); -+ if (offs < min_offs) -+ min_offs = offs; -+ } -+ } -+ free = min_offs - ((void *)last - s->base) - sizeof(__u32); -+ if (!s->not_found) { -+ if (!s->here->e_value_block && s->here->e_value_size) { -+ size_t size = le32_to_cpu(s->here->e_value_size); -+ free += EXT3COW_XATTR_SIZE(size); -+ } -+ free += EXT3COW_XATTR_LEN(name_len); -+ } -+ if (i->value) { -+ if (free < EXT3COW_XATTR_SIZE(i->value_len) || -+ free < EXT3COW_XATTR_LEN(name_len) + -+ EXT3COW_XATTR_SIZE(i->value_len)) -+ return -ENOSPC; -+ } -+ -+ if (i->value && s->not_found) { -+ /* Insert the new name. */ -+ size_t size = EXT3COW_XATTR_LEN(name_len); -+ size_t rest = (void *)last - (void *)s->here + sizeof(__u32); -+ memmove((void *)s->here + size, s->here, rest); -+ memset(s->here, 0, size); -+ s->here->e_name_index = i->name_index; -+ s->here->e_name_len = name_len; -+ memcpy(s->here->e_name, i->name, name_len); -+ } else { -+ if (!s->here->e_value_block && s->here->e_value_size) { -+ void *first_val = s->base + min_offs; -+ size_t offs = le16_to_cpu(s->here->e_value_offs); -+ void *val = s->base + offs; -+ size_t size = EXT3COW_XATTR_SIZE( -+ le32_to_cpu(s->here->e_value_size)); -+ -+ if (i->value && size == EXT3COW_XATTR_SIZE(i->value_len)) { -+ /* The old and the new value have the same -+ size. Just replace. */ -+ s->here->e_value_size = -+ cpu_to_le32(i->value_len); -+ memset(val + size - EXT3COW_XATTR_PAD, 0, -+ EXT3COW_XATTR_PAD); /* Clear pad bytes. */ -+ memcpy(val, i->value, i->value_len); -+ return 0; -+ } -+ -+ /* Remove the old value. */ -+ memmove(first_val + size, first_val, val - first_val); -+ memset(first_val, 0, size); -+ s->here->e_value_size = 0; -+ s->here->e_value_offs = 0; -+ min_offs += size; -+ -+ /* Adjust all value offsets. */ -+ last = s->first; -+ while (!IS_LAST_ENTRY(last)) { -+ size_t o = le16_to_cpu(last->e_value_offs); -+ if (!last->e_value_block && -+ last->e_value_size && o < offs) -+ last->e_value_offs = -+ cpu_to_le16(o + size); -+ last = EXT3COW_XATTR_NEXT(last); -+ } -+ } -+ if (!i->value) { -+ /* Remove the old name. */ -+ size_t size = EXT3COW_XATTR_LEN(name_len); -+ last = ENTRY((void *)last - size); -+ memmove(s->here, (void *)s->here + size, -+ (void *)last - (void *)s->here + sizeof(__u32)); -+ memset(last, 0, size); -+ } -+ } -+ -+ if (i->value) { -+ /* Insert the new value. */ -+ s->here->e_value_size = cpu_to_le32(i->value_len); -+ if (i->value_len) { -+ size_t size = EXT3COW_XATTR_SIZE(i->value_len); -+ void *val = s->base + min_offs - size; -+ s->here->e_value_offs = cpu_to_le16(min_offs - size); -+ memset(val + size - EXT3COW_XATTR_PAD, 0, -+ EXT3COW_XATTR_PAD); /* Clear the pad bytes. */ -+ memcpy(val, i->value, i->value_len); -+ } -+ } -+ return 0; -+} -+ -+struct ext3cow_xattr_block_find { -+ struct ext3cow_xattr_search s; -+ struct buffer_head *bh; -+}; -+ -+static int -+ext3cow_xattr_block_find(struct inode *inode, struct ext3cow_xattr_info *i, -+ struct ext3cow_xattr_block_find *bs) -+{ -+ struct super_block *sb = inode->i_sb; -+ int error; -+ -+ ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld", -+ i->name_index, i->name, i->value, (long)i->value_len); -+ -+ if (EXT3COW_I(inode)->i_file_acl) { -+ /* The inode already has an extended attribute block. */ -+ bs->bh = sb_bread(sb, EXT3COW_I(inode)->i_file_acl); -+ error = -EIO; -+ if (!bs->bh) -+ goto cleanup; -+ ea_bdebug(bs->bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bs->bh->b_count)), -+ le32_to_cpu(BHDR(bs->bh)->h_refcount)); -+ if (ext3cow_xattr_check_block(bs->bh)) { -+ ext3cow_error(sb, __FUNCTION__, -+ "inode %lu: bad block "E3FSBLK, inode->i_ino, -+ EXT3COW_I(inode)->i_file_acl); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* Find the named attribute. */ -+ bs->s.base = BHDR(bs->bh); -+ bs->s.first = BFIRST(bs->bh); -+ bs->s.end = bs->bh->b_data + bs->bh->b_size; -+ bs->s.here = bs->s.first; -+ error = ext3cow_xattr_find_entry(&bs->s.here, i->name_index, -+ i->name, bs->bh->b_size, 1); -+ if (error && error != -ENODATA) -+ goto cleanup; -+ bs->s.not_found = error; -+ } -+ error = 0; -+ -+cleanup: -+ return error; -+} -+ -+static int -+ext3cow_xattr_block_set(handle_t *handle, struct inode *inode, -+ struct ext3cow_xattr_info *i, -+ struct ext3cow_xattr_block_find *bs) -+{ -+ struct super_block *sb = inode->i_sb; -+ struct buffer_head *new_bh = NULL; -+ struct ext3cow_xattr_search *s = &bs->s; -+ struct mb_cache_entry *ce = NULL; -+ int error; -+ -+#define header(x) ((struct ext3cow_xattr_header *)(x)) -+ -+ if (i->value && i->value_len > sb->s_blocksize) -+ return -ENOSPC; -+ if (s->base) { -+ ce = mb_cache_entry_get(ext3cow_xattr_cache, bs->bh->b_bdev, -+ bs->bh->b_blocknr); -+ if (header(s->base)->h_refcount == cpu_to_le32(1)) { -+ if (ce) { -+ mb_cache_entry_free(ce); -+ ce = NULL; -+ } -+ ea_bdebug(bs->bh, "modifying in-place"); -+ error = ext3cow_journal_get_write_access(handle, bs->bh); -+ if (error) -+ goto cleanup; -+ lock_buffer(bs->bh); -+ error = ext3cow_xattr_set_entry(i, s); -+ if (!error) { -+ if (!IS_LAST_ENTRY(s->first)) -+ ext3cow_xattr_rehash(header(s->base), -+ s->here); -+ ext3cow_xattr_cache_insert(bs->bh); -+ } -+ unlock_buffer(bs->bh); -+ if (error == -EIO) -+ goto bad_block; -+ if (!error) -+ error = ext3cow_journal_dirty_metadata(handle, -+ bs->bh); -+ if (error) -+ goto cleanup; -+ goto inserted; -+ } else { -+ int offset = (char *)s->here - bs->bh->b_data; -+ -+ if (ce) { -+ mb_cache_entry_release(ce); -+ ce = NULL; -+ } -+ ea_bdebug(bs->bh, "cloning"); -+ s->base = kmalloc(bs->bh->b_size, GFP_KERNEL); -+ error = -ENOMEM; -+ if (s->base == NULL) -+ goto cleanup; -+ memcpy(s->base, BHDR(bs->bh), bs->bh->b_size); -+ s->first = ENTRY(header(s->base)+1); -+ header(s->base)->h_refcount = cpu_to_le32(1); -+ s->here = ENTRY(s->base + offset); -+ s->end = s->base + bs->bh->b_size; -+ } -+ } else { -+ /* Allocate a buffer where we construct the new block. */ -+ s->base = kmalloc(sb->s_blocksize, GFP_KERNEL); -+ /* assert(header == s->base) */ -+ error = -ENOMEM; -+ if (s->base == NULL) -+ goto cleanup; -+ memset(s->base, 0, sb->s_blocksize); -+ header(s->base)->h_magic = cpu_to_le32(EXT3COW_XATTR_MAGIC); -+ header(s->base)->h_blocks = cpu_to_le32(1); -+ header(s->base)->h_refcount = cpu_to_le32(1); -+ s->first = ENTRY(header(s->base)+1); -+ s->here = ENTRY(header(s->base)+1); -+ s->end = s->base + sb->s_blocksize; -+ } -+ -+ error = ext3cow_xattr_set_entry(i, s); -+ if (error == -EIO) -+ goto bad_block; -+ if (error) -+ goto cleanup; -+ if (!IS_LAST_ENTRY(s->first)) -+ ext3cow_xattr_rehash(header(s->base), s->here); -+ -+inserted: -+ if (!IS_LAST_ENTRY(s->first)) { -+ new_bh = ext3cow_xattr_cache_find(inode, header(s->base), &ce); -+ if (new_bh) { -+ /* We found an identical block in the cache. */ -+ if (new_bh == bs->bh) -+ ea_bdebug(new_bh, "keeping"); -+ else { -+ /* The old block is released after updating -+ the inode. */ -+ error = -EDQUOT; -+ if (DQUOT_ALLOC_BLOCK(inode, 1)) -+ goto cleanup; -+ error = ext3cow_journal_get_write_access(handle, -+ new_bh); -+ if (error) -+ goto cleanup_dquot; -+ lock_buffer(new_bh); -+ BHDR(new_bh)->h_refcount = cpu_to_le32(1 + -+ le32_to_cpu(BHDR(new_bh)->h_refcount)); -+ ea_bdebug(new_bh, "reusing; refcount now=%d", -+ le32_to_cpu(BHDR(new_bh)->h_refcount)); -+ unlock_buffer(new_bh); -+ error = ext3cow_journal_dirty_metadata(handle, -+ new_bh); -+ if (error) -+ goto cleanup_dquot; -+ } -+ mb_cache_entry_release(ce); -+ ce = NULL; -+ } else if (bs->bh && s->base == bs->bh->b_data) { -+ /* We were modifying this block in-place. */ -+ ea_bdebug(bs->bh, "keeping this block"); -+ new_bh = bs->bh; -+ get_bh(new_bh); -+ } else { -+ /* We need to allocate a new block */ -+ ext3cow_fsblk_t goal = le32_to_cpu( -+ EXT3COW_SB(sb)->s_es->s_first_data_block) + -+ (ext3cow_fsblk_t)EXT3COW_I(inode)->i_block_group * -+ EXT3COW_BLOCKS_PER_GROUP(sb); -+ ext3cow_fsblk_t block = ext3cow_new_block(handle, inode, -+ goal, &error); -+ if (error) -+ goto cleanup; -+ ea_idebug(inode, "creating block %d", block); -+ -+ new_bh = sb_getblk(sb, block); -+ if (!new_bh) { -+getblk_failed: -+ ext3cow_free_blocks(handle, inode, block, 1); -+ error = -EIO; -+ goto cleanup; -+ } -+ lock_buffer(new_bh); -+ error = ext3cow_journal_get_create_access(handle, new_bh); -+ if (error) { -+ unlock_buffer(new_bh); -+ goto getblk_failed; -+ } -+ memcpy(new_bh->b_data, s->base, new_bh->b_size); -+ set_buffer_uptodate(new_bh); -+ unlock_buffer(new_bh); -+ ext3cow_xattr_cache_insert(new_bh); -+ error = ext3cow_journal_dirty_metadata(handle, new_bh); -+ if (error) -+ goto cleanup; -+ } -+ } -+ -+ /* Update the inode. */ -+ EXT3COW_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0; -+ -+ /* Drop the previous xattr block. */ -+ if (bs->bh && bs->bh != new_bh) -+ ext3cow_xattr_release_block(handle, inode, bs->bh); -+ error = 0; -+ -+cleanup: -+ if (ce) -+ mb_cache_entry_release(ce); -+ brelse(new_bh); -+ if (!(bs->bh && s->base == bs->bh->b_data)) -+ kfree(s->base); -+ -+ return error; -+ -+cleanup_dquot: -+ DQUOT_FREE_BLOCK(inode, 1); -+ goto cleanup; -+ -+bad_block: -+ ext3cow_error(inode->i_sb, __FUNCTION__, -+ "inode %lu: bad block "E3FSBLK, inode->i_ino, -+ EXT3COW_I(inode)->i_file_acl); -+ goto cleanup; -+ -+#undef header -+} -+ -+struct ext3cow_xattr_ibody_find { -+ struct ext3cow_xattr_search s; -+ struct ext3cow_iloc iloc; -+}; -+ -+static int -+ext3cow_xattr_ibody_find(struct inode *inode, struct ext3cow_xattr_info *i, -+ struct ext3cow_xattr_ibody_find *is) -+{ -+ struct ext3cow_xattr_ibody_header *header; -+ struct ext3cow_inode *raw_inode; -+ int error; -+ -+ if (EXT3COW_I(inode)->i_extra_isize == 0) -+ return 0; -+ raw_inode = ext3cow_raw_inode(&is->iloc); -+ header = IHDR(inode, raw_inode); -+ is->s.base = is->s.first = IFIRST(header); -+ is->s.here = is->s.first; -+ is->s.end = (void *)raw_inode + EXT3COW_SB(inode->i_sb)->s_inode_size; -+ if (EXT3COW_I(inode)->i_state & EXT3COW_STATE_XATTR) { -+ error = ext3cow_xattr_check_names(IFIRST(header), is->s.end); -+ if (error) -+ return error; -+ /* Find the named attribute. */ -+ error = ext3cow_xattr_find_entry(&is->s.here, i->name_index, -+ i->name, is->s.end - -+ (void *)is->s.base, 0); -+ if (error && error != -ENODATA) -+ return error; -+ is->s.not_found = error; -+ } -+ return 0; -+} -+ -+static int -+ext3cow_xattr_ibody_set(handle_t *handle, struct inode *inode, -+ struct ext3cow_xattr_info *i, -+ struct ext3cow_xattr_ibody_find *is) -+{ -+ struct ext3cow_xattr_ibody_header *header; -+ struct ext3cow_xattr_search *s = &is->s; -+ int error; -+ -+ if (EXT3COW_I(inode)->i_extra_isize == 0) -+ return -ENOSPC; -+ error = ext3cow_xattr_set_entry(i, s); -+ if (error) -+ return error; -+ header = IHDR(inode, ext3cow_raw_inode(&is->iloc)); -+ if (!IS_LAST_ENTRY(s->first)) { -+ header->h_magic = cpu_to_le32(EXT3COW_XATTR_MAGIC); -+ EXT3COW_I(inode)->i_state |= EXT3COW_STATE_XATTR; -+ } else { -+ header->h_magic = cpu_to_le32(0); -+ EXT3COW_I(inode)->i_state &= ~EXT3COW_STATE_XATTR; -+ } -+ return 0; -+} -+ -+/* -+ * ext3cow_xattr_set_handle() -+ * -+ * Create, replace or remove an extended attribute for this inode. Buffer -+ * is NULL to remove an existing extended attribute, and non-NULL to -+ * either replace an existing extended attribute, or create a new extended -+ * attribute. The flags XATTR_REPLACE and XATTR_CREATE -+ * specify that an extended attribute must exist and must not exist -+ * previous to the call, respectively. -+ * -+ * Returns 0, or a negative error number on failure. -+ */ -+int -+ext3cow_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, -+ const char *name, const void *value, size_t value_len, -+ int flags) -+{ -+ struct ext3cow_xattr_info i = { -+ .name_index = name_index, -+ .name = name, -+ .value = value, -+ .value_len = value_len, -+ -+ }; -+ struct ext3cow_xattr_ibody_find is = { -+ .s = { .not_found = -ENODATA, }, -+ }; -+ struct ext3cow_xattr_block_find bs = { -+ .s = { .not_found = -ENODATA, }, -+ }; -+ int error; -+ -+ if (!name) -+ return -EINVAL; -+ if (strlen(name) > 255) -+ return -ERANGE; -+ down_write(&EXT3COW_I(inode)->xattr_sem); -+ error = ext3cow_get_inode_loc(inode, &is.iloc); -+ if (error) -+ goto cleanup; -+ -+ if (EXT3COW_I(inode)->i_state & EXT3COW_STATE_NEW) { -+ struct ext3cow_inode *raw_inode = ext3cow_raw_inode(&is.iloc); -+ memset(raw_inode, 0, EXT3COW_SB(inode->i_sb)->s_inode_size); -+ EXT3COW_I(inode)->i_state &= ~EXT3COW_STATE_NEW; -+ } -+ -+ error = ext3cow_xattr_ibody_find(inode, &i, &is); -+ if (error) -+ goto cleanup; -+ if (is.s.not_found) -+ error = ext3cow_xattr_block_find(inode, &i, &bs); -+ if (error) -+ goto cleanup; -+ if (is.s.not_found && bs.s.not_found) { -+ error = -ENODATA; -+ if (flags & XATTR_REPLACE) -+ goto cleanup; -+ error = 0; -+ if (!value) -+ goto cleanup; -+ } else { -+ error = -EEXIST; -+ if (flags & XATTR_CREATE) -+ goto cleanup; -+ } -+ error = ext3cow_journal_get_write_access(handle, is.iloc.bh); -+ if (error) -+ goto cleanup; -+ if (!value) { -+ if (!is.s.not_found) -+ error = ext3cow_xattr_ibody_set(handle, inode, &i, &is); -+ else if (!bs.s.not_found) -+ error = ext3cow_xattr_block_set(handle, inode, &i, &bs); -+ } else { -+ error = ext3cow_xattr_ibody_set(handle, inode, &i, &is); -+ if (!error && !bs.s.not_found) { -+ i.value = NULL; -+ error = ext3cow_xattr_block_set(handle, inode, &i, &bs); -+ } else if (error == -ENOSPC) { -+ error = ext3cow_xattr_block_set(handle, inode, &i, &bs); -+ if (error) -+ goto cleanup; -+ if (!is.s.not_found) { -+ i.value = NULL; -+ error = ext3cow_xattr_ibody_set(handle, inode, &i, -+ &is); -+ } -+ } -+ } -+ if (!error) { -+ ext3cow_xattr_update_super_block(handle, inode->i_sb); -+ inode->i_ctime = CURRENT_TIME_SEC; -+ error = ext3cow_mark_iloc_dirty(handle, inode, &is.iloc); -+ /* -+ * The bh is consumed by ext3cow_mark_iloc_dirty, even with -+ * error != 0. -+ */ -+ is.iloc.bh = NULL; -+ if (IS_SYNC(inode)) -+ handle->h_sync = 1; -+ } -+ -+cleanup: -+ brelse(is.iloc.bh); -+ brelse(bs.bh); -+ up_write(&EXT3COW_I(inode)->xattr_sem); -+ return error; -+} -+ -+/* -+ * ext3cow_xattr_set() -+ * -+ * Like ext3cow_xattr_set_handle, but start from an inode. This extended -+ * attribute modification is a filesystem transaction by itself. -+ * -+ * Returns 0, or a negative error number on failure. -+ */ -+int -+ext3cow_xattr_set(struct inode *inode, int name_index, const char *name, -+ const void *value, size_t value_len, int flags) -+{ -+ handle_t *handle; -+ int error, retries = 0; -+ -+retry: -+ handle = ext3cow_journal_start(inode, EXT3COW_DATA_TRANS_BLOCKS(inode->i_sb)); -+ if (IS_ERR(handle)) { -+ error = PTR_ERR(handle); -+ } else { -+ int error2; -+ -+ error = ext3cow_xattr_set_handle(handle, inode, name_index, name, -+ value, value_len, flags); -+ error2 = ext3cow_journal_stop(handle); -+ if (error == -ENOSPC && -+ ext3cow_should_retry_alloc(inode->i_sb, &retries)) -+ goto retry; -+ if (error == 0) -+ error = error2; -+ } -+ -+ return error; -+} -+ -+/* -+ * ext3cow_xattr_delete_inode() -+ * -+ * Free extended attribute resources associated with this inode. This -+ * is called immediately before an inode is freed. We have exclusive -+ * access to the inode. -+ */ -+void -+ext3cow_xattr_delete_inode(handle_t *handle, struct inode *inode) -+{ -+ struct buffer_head *bh = NULL; -+ -+ if (!EXT3COW_I(inode)->i_file_acl) -+ goto cleanup; -+ bh = sb_bread(inode->i_sb, EXT3COW_I(inode)->i_file_acl); -+ if (!bh) { -+ ext3cow_error(inode->i_sb, __FUNCTION__, -+ "inode %lu: block "E3FSBLK" read error", inode->i_ino, -+ EXT3COW_I(inode)->i_file_acl); -+ goto cleanup; -+ } -+ if (BHDR(bh)->h_magic != cpu_to_le32(EXT3COW_XATTR_MAGIC) || -+ BHDR(bh)->h_blocks != cpu_to_le32(1)) { -+ ext3cow_error(inode->i_sb, __FUNCTION__, -+ "inode %lu: bad block "E3FSBLK, inode->i_ino, -+ EXT3COW_I(inode)->i_file_acl); -+ goto cleanup; -+ } -+ ext3cow_xattr_release_block(handle, inode, bh); -+ EXT3COW_I(inode)->i_file_acl = 0; -+ -+cleanup: -+ brelse(bh); -+} -+ -+/* -+ * ext3cow_xattr_put_super() -+ * -+ * This is called when a file system is unmounted. -+ */ -+void -+ext3cow_xattr_put_super(struct super_block *sb) -+{ -+ mb_cache_shrink(sb->s_bdev); -+} -+ -+/* -+ * ext3cow_xattr_cache_insert() -+ * -+ * Create a new entry in the extended attribute cache, and insert -+ * it unless such an entry is already in the cache. -+ * -+ * Returns 0, or a negative error number on failure. -+ */ -+static void -+ext3cow_xattr_cache_insert(struct buffer_head *bh) -+{ -+ __u32 hash = le32_to_cpu(BHDR(bh)->h_hash); -+ struct mb_cache_entry *ce; -+ int error; -+ -+ ce = mb_cache_entry_alloc(ext3cow_xattr_cache); -+ if (!ce) { -+ ea_bdebug(bh, "out of memory"); -+ return; -+ } -+ error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, &hash); -+ if (error) { -+ mb_cache_entry_free(ce); -+ if (error == -EBUSY) { -+ ea_bdebug(bh, "already in cache"); -+ error = 0; -+ } -+ } else { -+ ea_bdebug(bh, "inserting [%x]", (int)hash); -+ mb_cache_entry_release(ce); -+ } -+} -+ -+/* -+ * ext3cow_xattr_cmp() -+ * -+ * Compare two extended attribute blocks for equality. -+ * -+ * Returns 0 if the blocks are equal, 1 if they differ, and -+ * a negative error number on errors. -+ */ -+static int -+ext3cow_xattr_cmp(struct ext3cow_xattr_header *header1, -+ struct ext3cow_xattr_header *header2) -+{ -+ struct ext3cow_xattr_entry *entry1, *entry2; -+ -+ entry1 = ENTRY(header1+1); -+ entry2 = ENTRY(header2+1); -+ while (!IS_LAST_ENTRY(entry1)) { -+ if (IS_LAST_ENTRY(entry2)) -+ return 1; -+ if (entry1->e_hash != entry2->e_hash || -+ entry1->e_name_index != entry2->e_name_index || -+ entry1->e_name_len != entry2->e_name_len || -+ entry1->e_value_size != entry2->e_value_size || -+ memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len)) -+ return 1; -+ if (entry1->e_value_block != 0 || entry2->e_value_block != 0) -+ return -EIO; -+ if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs), -+ (char *)header2 + le16_to_cpu(entry2->e_value_offs), -+ le32_to_cpu(entry1->e_value_size))) -+ return 1; -+ -+ entry1 = EXT3COW_XATTR_NEXT(entry1); -+ entry2 = EXT3COW_XATTR_NEXT(entry2); -+ } -+ if (!IS_LAST_ENTRY(entry2)) -+ return 1; -+ return 0; -+} -+ -+/* -+ * ext3cow_xattr_cache_find() -+ * -+ * Find an identical extended attribute block. -+ * -+ * Returns a pointer to the block found, or NULL if such a block was -+ * not found or an error occurred. -+ */ -+static struct buffer_head * -+ext3cow_xattr_cache_find(struct inode *inode, struct ext3cow_xattr_header *header, -+ struct mb_cache_entry **pce) -+{ -+ __u32 hash = le32_to_cpu(header->h_hash); -+ struct mb_cache_entry *ce; -+ -+ if (!header->h_hash) -+ return NULL; /* never share */ -+ ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); -+again: -+ ce = mb_cache_entry_find_first(ext3cow_xattr_cache, 0, -+ inode->i_sb->s_bdev, hash); -+ while (ce) { -+ struct buffer_head *bh; -+ -+ if (IS_ERR(ce)) { -+ if (PTR_ERR(ce) == -EAGAIN) -+ goto again; -+ break; -+ } -+ bh = sb_bread(inode->i_sb, ce->e_block); -+ if (!bh) { -+ ext3cow_error(inode->i_sb, __FUNCTION__, -+ "inode %lu: block %lu read error", -+ inode->i_ino, (unsigned long) ce->e_block); -+ } else if (le32_to_cpu(BHDR(bh)->h_refcount) >= -+ EXT3COW_XATTR_REFCOUNT_MAX) { -+ ea_idebug(inode, "block %lu refcount %d>=%d", -+ (unsigned long) ce->e_block, -+ le32_to_cpu(BHDR(bh)->h_refcount), -+ EXT3COW_XATTR_REFCOUNT_MAX); -+ } else if (ext3cow_xattr_cmp(header, BHDR(bh)) == 0) { -+ *pce = ce; -+ return bh; -+ } -+ brelse(bh); -+ ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash); -+ } -+ return NULL; -+} -+ -+#define NAME_HASH_SHIFT 5 -+#define VALUE_HASH_SHIFT 16 -+ -+/* -+ * ext3cow_xattr_hash_entry() -+ * -+ * Compute the hash of an extended attribute. -+ */ -+static inline void ext3cow_xattr_hash_entry(struct ext3cow_xattr_header *header, -+ struct ext3cow_xattr_entry *entry) -+{ -+ __u32 hash = 0; -+ char *name = entry->e_name; -+ int n; -+ -+ for (n=0; n < entry->e_name_len; n++) { -+ hash = (hash << NAME_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^ -+ *name++; -+ } -+ -+ if (entry->e_value_block == 0 && entry->e_value_size != 0) { -+ __le32 *value = (__le32 *)((char *)header + -+ le16_to_cpu(entry->e_value_offs)); -+ for (n = (le32_to_cpu(entry->e_value_size) + -+ EXT3COW_XATTR_ROUND) >> EXT3COW_XATTR_PAD_BITS; n; n--) { -+ hash = (hash << VALUE_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^ -+ le32_to_cpu(*value++); -+ } -+ } -+ entry->e_hash = cpu_to_le32(hash); -+} -+ -+#undef NAME_HASH_SHIFT -+#undef VALUE_HASH_SHIFT -+ -+#define BLOCK_HASH_SHIFT 16 -+ -+/* -+ * ext3cow_xattr_rehash() -+ * -+ * Re-compute the extended attribute hash value after an entry has changed. -+ */ -+static void ext3cow_xattr_rehash(struct ext3cow_xattr_header *header, -+ struct ext3cow_xattr_entry *entry) -+{ -+ struct ext3cow_xattr_entry *here; -+ __u32 hash = 0; -+ -+ ext3cow_xattr_hash_entry(header, entry); -+ here = ENTRY(header+1); -+ while (!IS_LAST_ENTRY(here)) { -+ if (!here->e_hash) { -+ /* Block is not shared if an entry's hash value == 0 */ -+ hash = 0; -+ break; -+ } -+ hash = (hash << BLOCK_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^ -+ le32_to_cpu(here->e_hash); -+ here = EXT3COW_XATTR_NEXT(here); -+ } -+ header->h_hash = cpu_to_le32(hash); -+} -+ -+#undef BLOCK_HASH_SHIFT -+ -+int __init -+init_ext3cow_xattr(void) -+{ -+ ext3cow_xattr_cache = mb_cache_create("ext3cow_xattr", NULL, -+ sizeof(struct mb_cache_entry) + -+ sizeof(((struct mb_cache_entry *) 0)->e_indexes[0]), 1, 6); -+ if (!ext3cow_xattr_cache) -+ return -ENOMEM; -+ return 0; -+} -+ -+void -+exit_ext3cow_xattr(void) -+{ -+ if (ext3cow_xattr_cache) -+ mb_cache_destroy(ext3cow_xattr_cache); -+ ext3cow_xattr_cache = NULL; -+} -diff -Naur linux-2.6.21.7/fs/ext3cow/xattr.h linux-2.6.21.7_ext3cowPatched/fs/ext3cow/xattr.h ---- linux-2.6.21.7/fs/ext3cow/xattr.h 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.21.7_ext3cowPatched/fs/ext3cow/xattr.h 2007-10-23 17:47:18.000000000 +0200 -@@ -0,0 +1,145 @@ -+/* -+ File: fs/ext3cow/xattr.h -+ -+ On-disk format of extended attributes for the ext3cow filesystem. -+ -+ (C) 2001 Andreas Gruenbacher, -+*/ -+ -+#include -+ -+/* Magic value in attribute blocks */ -+#define EXT3COW_XATTR_MAGIC 0xEA020000 -+ -+/* Maximum number of references to one attribute block */ -+#define EXT3COW_XATTR_REFCOUNT_MAX 1024 -+ -+/* Name indexes */ -+#define EXT3COW_XATTR_INDEX_USER 1 -+#define EXT3COW_XATTR_INDEX_POSIX_ACL_ACCESS 2 -+#define EXT3COW_XATTR_INDEX_POSIX_ACL_DEFAULT 3 -+#define EXT3COW_XATTR_INDEX_TRUSTED 4 -+#define EXT3COW_XATTR_INDEX_LUSTRE 5 -+#define EXT3COW_XATTR_INDEX_SECURITY 6 -+ -+struct ext3cow_xattr_header { -+ __le32 h_magic; /* magic number for identification */ -+ __le32 h_refcount; /* reference count */ -+ __le32 h_blocks; /* number of disk blocks used */ -+ __le32 h_hash; /* hash value of all attributes */ -+ __u32 h_reserved[4]; /* zero right now */ -+}; -+ -+struct ext3cow_xattr_ibody_header { -+ __le32 h_magic; /* magic number for identification */ -+}; -+ -+struct ext3cow_xattr_entry { -+ __u8 e_name_len; /* length of name */ -+ __u8 e_name_index; /* attribute name index */ -+ __le16 e_value_offs; /* offset in disk block of value */ -+ __le32 e_value_block; /* disk block attribute is stored on (n/i) */ -+ __le32 e_value_size; /* size of attribute value */ -+ __le32 e_hash; /* hash value of name and value */ -+ char e_name[0]; /* attribute name */ -+}; -+ -+#define EXT3COW_XATTR_PAD_BITS 2 -+#define EXT3COW_XATTR_PAD (1<e_name_len)) ) -+#define EXT3COW_XATTR_SIZE(size) \ -+ (((size) + EXT3COW_XATTR_ROUND) & ~EXT3COW_XATTR_ROUND) -+ -+# ifdef CONFIG_EXT3COW_FS_XATTR -+ -+extern struct xattr_handler ext3cow_xattr_user_handler; -+extern struct xattr_handler ext3cow_xattr_trusted_handler; -+extern struct xattr_handler ext3cow_xattr_acl_access_handler; -+extern struct xattr_handler ext3cow_xattr_acl_default_handler; -+extern struct xattr_handler ext3cow_xattr_security_handler; -+ -+extern ssize_t ext3cow_listxattr(struct dentry *, char *, size_t); -+ -+extern int ext3cow_xattr_get(struct inode *, int, const char *, void *, size_t); -+extern int ext3cow_xattr_list(struct inode *, char *, size_t); -+extern int ext3cow_xattr_set(struct inode *, int, const char *, const void *, size_t, int); -+extern int ext3cow_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int); -+ -+extern void ext3cow_xattr_delete_inode(handle_t *, struct inode *); -+extern void ext3cow_xattr_put_super(struct super_block *); -+ -+extern int init_ext3cow_xattr(void); -+extern void exit_ext3cow_xattr(void); -+ -+extern struct xattr_handler *ext3cow_xattr_handlers[]; -+ -+# else /* CONFIG_EXT3COW_FS_XATTR */ -+ -+static inline int -+ext3cow_xattr_get(struct inode *inode, int name_index, const char *name, -+ void *buffer, size_t size, int flags) -+{ -+ return -EOPNOTSUPP; -+} -+ -+static inline int -+ext3cow_xattr_list(struct inode *inode, void *buffer, size_t size) -+{ -+ return -EOPNOTSUPP; -+} -+ -+static inline int -+ext3cow_xattr_set(struct inode *inode, int name_index, const char *name, -+ const void *value, size_t size, int flags) -+{ -+ return -EOPNOTSUPP; -+} -+ -+static inline int -+ext3cow_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, -+ const char *name, const void *value, size_t size, int flags) -+{ -+ return -EOPNOTSUPP; -+} -+ -+static inline void -+ext3cow_xattr_delete_inode(handle_t *handle, struct inode *inode) -+{ -+} -+ -+static inline void -+ext3cow_xattr_put_super(struct super_block *sb) -+{ -+} -+ -+static inline int -+init_ext3cow_xattr(void) -+{ -+ return 0; -+} -+ -+static inline void -+exit_ext3cow_xattr(void) -+{ -+} -+ -+#define ext3cow_xattr_handlers NULL -+ -+# endif /* CONFIG_EXT3COW_FS_XATTR */ -+ -+#ifdef CONFIG_EXT3COW_FS_SECURITY -+extern int ext3cow_init_security(handle_t *handle, struct inode *inode, -+ struct inode *dir); -+#else -+static inline int ext3cow_init_security(handle_t *handle, struct inode *inode, -+ struct inode *dir) -+{ -+ return 0; -+} -+#endif -diff -Naur linux-2.6.21.7/fs/ext3cow/xattr_security.c linux-2.6.21.7_ext3cowPatched/fs/ext3cow/xattr_security.c ---- linux-2.6.21.7/fs/ext3cow/xattr_security.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.21.7_ext3cowPatched/fs/ext3cow/xattr_security.c 2007-10-23 17:47:18.000000000 +0200 -@@ -0,0 +1,77 @@ -+/* -+ * linux/fs/ext3cow/xattr_security.c -+ * Handler for storing security labels as extended attributes. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "xattr.h" -+ -+static size_t -+ext3cow_xattr_security_list(struct inode *inode, char *list, size_t list_size, -+ const char *name, size_t name_len) -+{ -+ const size_t prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1; -+ const size_t total_len = prefix_len + name_len + 1; -+ -+ -+ if (list && total_len <= list_size) { -+ memcpy(list, XATTR_SECURITY_PREFIX, prefix_len); -+ memcpy(list+prefix_len, name, name_len); -+ list[prefix_len + name_len] = '\0'; -+ } -+ return total_len; -+} -+ -+static int -+ext3cow_xattr_security_get(struct inode *inode, const char *name, -+ void *buffer, size_t size) -+{ -+ if (strcmp(name, "") == 0) -+ return -EINVAL; -+ return ext3cow_xattr_get(inode, EXT3COW_XATTR_INDEX_SECURITY, name, -+ buffer, size); -+} -+ -+static int -+ext3cow_xattr_security_set(struct inode *inode, const char *name, -+ const void *value, size_t size, int flags) -+{ -+ if (strcmp(name, "") == 0) -+ return -EINVAL; -+ return ext3cow_xattr_set(inode, EXT3COW_XATTR_INDEX_SECURITY, name, -+ value, size, flags); -+} -+ -+int -+ext3cow_init_security(handle_t *handle, struct inode *inode, struct inode *dir) -+{ -+ int err; -+ size_t len; -+ void *value; -+ char *name; -+ -+ err = security_inode_init_security(inode, dir, &name, &value, &len); -+ if (err) { -+ if (err == -EOPNOTSUPP) -+ return 0; -+ return err; -+ } -+ err = ext3cow_xattr_set_handle(handle, inode, EXT3COW_XATTR_INDEX_SECURITY, -+ name, value, len, 0); -+ kfree(name); -+ kfree(value); -+ return err; -+} -+ -+struct xattr_handler ext3cow_xattr_security_handler = { -+ .prefix = XATTR_SECURITY_PREFIX, -+ .list = ext3cow_xattr_security_list, -+ .get = ext3cow_xattr_security_get, -+ .set = ext3cow_xattr_security_set, -+}; -diff -Naur linux-2.6.21.7/fs/ext3cow/xattr_trusted.c linux-2.6.21.7_ext3cowPatched/fs/ext3cow/xattr_trusted.c ---- linux-2.6.21.7/fs/ext3cow/xattr_trusted.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.21.7_ext3cowPatched/fs/ext3cow/xattr_trusted.c 2007-10-23 17:47:18.000000000 +0200 -@@ -0,0 +1,62 @@ -+/* -+ * linux/fs/ext3cow/xattr_trusted.c -+ * Handler for trusted extended attributes. -+ * -+ * Copyright (C) 2003 by Andreas Gruenbacher, -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "xattr.h" -+ -+#define XATTR_TRUSTED_PREFIX "trusted." -+ -+static size_t -+ext3cow_xattr_trusted_list(struct inode *inode, char *list, size_t list_size, -+ const char *name, size_t name_len) -+{ -+ const size_t prefix_len = sizeof(XATTR_TRUSTED_PREFIX)-1; -+ const size_t total_len = prefix_len + name_len + 1; -+ -+ if (!capable(CAP_SYS_ADMIN)) -+ return 0; -+ -+ if (list && total_len <= list_size) { -+ memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len); -+ memcpy(list+prefix_len, name, name_len); -+ list[prefix_len + name_len] = '\0'; -+ } -+ return total_len; -+} -+ -+static int -+ext3cow_xattr_trusted_get(struct inode *inode, const char *name, -+ void *buffer, size_t size) -+{ -+ if (strcmp(name, "") == 0) -+ return -EINVAL; -+ return ext3cow_xattr_get(inode, EXT3COW_XATTR_INDEX_TRUSTED, name, -+ buffer, size); -+} -+ -+static int -+ext3cow_xattr_trusted_set(struct inode *inode, const char *name, -+ const void *value, size_t size, int flags) -+{ -+ if (strcmp(name, "") == 0) -+ return -EINVAL; -+ return ext3cow_xattr_set(inode, EXT3COW_XATTR_INDEX_TRUSTED, name, -+ value, size, flags); -+} -+ -+struct xattr_handler ext3cow_xattr_trusted_handler = { -+ .prefix = XATTR_TRUSTED_PREFIX, -+ .list = ext3cow_xattr_trusted_list, -+ .get = ext3cow_xattr_trusted_get, -+ .set = ext3cow_xattr_trusted_set, -+}; -diff -Naur linux-2.6.21.7/fs/ext3cow/xattr_user.c linux-2.6.21.7_ext3cowPatched/fs/ext3cow/xattr_user.c ---- linux-2.6.21.7/fs/ext3cow/xattr_user.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.21.7_ext3cowPatched/fs/ext3cow/xattr_user.c 2007-10-23 17:47:18.000000000 +0200 -@@ -0,0 +1,64 @@ -+/* -+ * linux/fs/ext3cow/xattr_user.c -+ * Handler for extended user attributes. -+ * -+ * Copyright (C) 2001 by Andreas Gruenbacher, -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include "xattr.h" -+ -+#define XATTR_USER_PREFIX "user." -+ -+static size_t -+ext3cow_xattr_user_list(struct inode *inode, char *list, size_t list_size, -+ const char *name, size_t name_len) -+{ -+ const size_t prefix_len = sizeof(XATTR_USER_PREFIX)-1; -+ const size_t total_len = prefix_len + name_len + 1; -+ -+ if (!test_opt(inode->i_sb, XATTR_USER)) -+ return 0; -+ -+ if (list && total_len <= list_size) { -+ memcpy(list, XATTR_USER_PREFIX, prefix_len); -+ memcpy(list+prefix_len, name, name_len); -+ list[prefix_len + name_len] = '\0'; -+ } -+ return total_len; -+} -+ -+static int -+ext3cow_xattr_user_get(struct inode *inode, const char *name, -+ void *buffer, size_t size) -+{ -+ if (strcmp(name, "") == 0) -+ return -EINVAL; -+ if (!test_opt(inode->i_sb, XATTR_USER)) -+ return -EOPNOTSUPP; -+ return ext3cow_xattr_get(inode, EXT3COW_XATTR_INDEX_USER, name, buffer, size); -+} -+ -+static int -+ext3cow_xattr_user_set(struct inode *inode, const char *name, -+ const void *value, size_t size, int flags) -+{ -+ if (strcmp(name, "") == 0) -+ return -EINVAL; -+ if (!test_opt(inode->i_sb, XATTR_USER)) -+ return -EOPNOTSUPP; -+ return ext3cow_xattr_set(inode, EXT3COW_XATTR_INDEX_USER, name, -+ value, size, flags); -+} -+ -+struct xattr_handler ext3cow_xattr_user_handler = { -+ .prefix = XATTR_USER_PREFIX, -+ .list = ext3cow_xattr_user_list, -+ .get = ext3cow_xattr_user_get, -+ .set = ext3cow_xattr_user_set, -+}; -diff -Naur linux-2.6.21.7/fs/Kconfig linux-2.6.21.7_ext3cowPatched/fs/Kconfig ---- linux-2.6.21.7/fs/Kconfig 2007-08-04 18:11:13.000000000 +0200 -+++ linux-2.6.21.7_ext3cowPatched/fs/Kconfig 2007-10-23 17:46:52.000000000 +0200 -@@ -136,6 +136,77 @@ - If you are not using a security module that requires using - extended attributes for file security labels, say N. - -+ -+ -+config EXT3COW_FS -+ tristate "Ext3cow journalling and versioning file system support" -+ select JBD -+ help -+ This is the journalling version of the Second extended file system -+ (often called ext3), the de facto standard Linux file system -+ (method to organize files on a storage device) for hard disks. -+ -+ The journalling code included in this driver means you do not have -+ to run e2fsck (file system checker) on your file systems after a -+ crash. The journal keeps track of any changes that were being made -+ at the time the system crashed, and can ensure that your file system -+ is consistent without the need for a lengthy check. -+ -+ Other than adding the journal to the file system, the on-disk format -+ of ext3 is identical to ext2. It is possible to freely switch -+ between using the ext3 driver and the ext2 driver, as long as the -+ file system has been cleanly unmounted, or e2fsck is run on the file -+ system. -+ -+ To add a journal on an existing ext2 file system or change the -+ behavior of ext3 file systems, you can use the tune2fs utility ("man -+ tune2fs"). To modify attributes of files and directories on ext3 -+ file systems, use chattr ("man chattr"). You need to be using -+ e2fsprogs version 1.20 or later in order to create ext3 journals -+ (available at ). -+ -+ To compile this file system support as a module, choose M here: the -+ module will be called ext3. -+ -+config EXT3COW_FS_XATTR -+ bool "Ext3cow extended attributes" -+ depends on EXT3COW_FS -+ default y -+ help -+ Extended attributes are name:value pairs associated with inodes by -+ the kernel or by users (see the attr(5) manual page, or visit -+ for details). -+ -+ If unsure, say N. -+ -+ You need this for POSIX ACL support on ext3cow. -+ -+config EXT3COW_FS_POSIX_ACL -+ bool "Ext3cow POSIX Access Control Lists" -+ depends on EXT3COW_FS_XATTR -+ select FS_POSIX_ACL -+ help -+ Posix Access Control Lists (ACLs) support permissions for users and -+ groups beyond the owner/group/world scheme. -+ -+ To learn more about Access Control Lists, visit the Posix ACLs for -+ Linux website . -+ -+ If you don't know what Access Control Lists are, say N -+ -+config EXT3COW_FS_SECURITY -+ bool "Ext3cow Security Labels" -+ depends on EXT3COW_FS_XATTR -+ help -+ Security labels support alternative access control models -+ implemented by security modules like SELinux. This option -+ enables an extended attribute handler for file security -+ labels in the ext3cow filesystem. -+ -+ If you are not using a security module that requires using -+ extended attributes for file security labels, say N. -+ -+ - config EXT4DEV_FS - tristate "Ext4dev/ext4 extended fs support development (EXPERIMENTAL)" - depends on EXPERIMENTAL -@@ -205,23 +276,23 @@ - tristate - help - This is a generic journalling layer for block devices. It is -- currently used by the ext3 and OCFS2 file systems, but it could -+ currently used by the ext3, ext3cow and OCFS2 file systems, but it could - also be used to add journal support to other file systems or block - devices such as RAID or LVM. - -- If you are using the ext3 or OCFS2 file systems, you need to -+ If you are using the ext3, ext3cow or OCFS2 file systems, you need to - say Y here. If you are not using ext3 OCFS2 then you will probably - want to say N. - - To compile this device as a module, choose M here: the module will be -- called jbd. If you are compiling ext3 or OCFS2 into the kernel, -+ called jbd. If you are compiling ext3, ext3cow or OCFS2 into the kernel, - you cannot compile this code as a module. - - config JBD_DEBUG - bool "JBD (ext3) debugging support" - depends on JBD - help -- If you are using the ext3 journaled file system (or potentially any -+ If you are using the ext3 or ext3cow journaled file system (or potentially any - other file system/device using JBD), this option allows you to - enable debugging output while the system is running, in order to - help track down any problems you are having. By default the -@@ -266,11 +337,12 @@ - "echo 0 > /proc/sys/fs/jbd2-debug". - - config FS_MBCACHE --# Meta block cache for Extended Attributes (ext2/ext3/ext4) -+# Meta block cache for Extended Attributes (ext2/ext3(cow)/ext4) - tristate -- depends on EXT2_FS_XATTR || EXT3_FS_XATTR || EXT4DEV_FS_XATTR -- default y if EXT2_FS=y || EXT3_FS=y || EXT4DEV_FS=y -- default m if EXT2_FS=m || EXT3_FS=m || EXT4DEV_FS=m -+ depends on EXT2_FS_XATTR || EXT3_FS_XATTR || EXT3COW_FS_XATTR || EXT4DEV_FS_XATTR -+ default y if EXT2_FS=y || EXT3_FS=y || EXT3COW_FS=y || EXT4DEV_FS=y -+ default m if EXT2_FS=m || EXT3_FS=m || EXT3COW_FS=m || EXT4DEV_FS=m -+ - - config REISERFS_FS - tristate "Reiserfs support" -diff -Naur linux-2.6.21.7/fs/Makefile linux-2.6.21.7_ext3cowPatched/fs/Makefile ---- linux-2.6.21.7/fs/Makefile 2007-08-04 18:11:13.000000000 +0200 -+++ linux-2.6.21.7_ext3cowPatched/fs/Makefile 2007-10-23 17:46:52.000000000 +0200 -@@ -63,6 +63,7 @@ - # Do not add any filesystems before this line - obj-$(CONFIG_REISERFS_FS) += reiserfs/ - obj-$(CONFIG_EXT3_FS) += ext3/ # Before ext2 so root fs can be ext3 -+obj-$(CONFIG_EXT3COW_FS) += ext3cow/ # Before ext2 so root fs can be ext3 - obj-$(CONFIG_EXT4DEV_FS) += ext4/ # Before ext2 so root fs can be ext4dev - obj-$(CONFIG_JBD) += jbd/ - obj-$(CONFIG_JBD2) += jbd2/ -diff -Naur linux-2.6.21.7/include/linux/ext3cow_fs.h linux-2.6.21.7_ext3cowPatched/include/linux/ext3cow_fs.h ---- linux-2.6.21.7/include/linux/ext3cow_fs.h 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.21.7_ext3cowPatched/include/linux/ext3cow_fs.h 2007-10-23 17:48:10.000000000 +0200 -@@ -0,0 +1,948 @@ -+/* -+ * linux/include/linux/ext3cow_fs.h -+ * -+ * Copyright (C) 1992, 1993, 1994, 1995 -+ * Remy Card (card@masi.ibp.fr) -+ * Laboratoire MASI - Institut Blaise Pascal -+ * Universite Pierre et Marie Curie (Paris VI) -+ * -+ * from -+ * -+ * linux/include/linux/minix_fs.h -+ * -+ * Copyright (C) 1991, 1992 Linus Torvalds -+ */ -+ -+#ifndef _LINUX_EXT3COW_FS_H -+#define _LINUX_EXT3COW_FS_H -+ -+#include -+#include -+ -+/* -+ * The second extended filesystem constants/structures -+ */ -+ -+/* -+ * Define EXT3COWFS_DEBUG to produce debug messages -+ */ -+#undef EXT3COWFS_DEBUG -+ -+ -+/* -+ * Define EXT3COW_RESERVATION to reserve data blocks for expanding files -+ */ -+#define EXT3COW_DEFAULT_RESERVE_BLOCKS 8 -+/*max window size: 1024(direct blocks) + 3([t,d]indirect blocks) */ -+#define EXT3COW_MAX_RESERVE_BLOCKS 1027 -+#define EXT3COW_RESERVE_WINDOW_NOT_ALLOCATED 0 -+/* -+ * Always enable hashed directories -+ */ -+//#define CONFIG_EXT3COW_INDEX -+ -+/* -+ * Debug code -+ */ -+#ifdef EXT3COWFS_DEBUG -+#define ext3cow_debug(f, a...) \ -+ do { \ -+ printk (KERN_DEBUG "EXT3COW-fs DEBUG (%s, %d): %s:", \ -+ __FILE__, __LINE__, __FUNCTION__); \ -+ printk (KERN_DEBUG f, ## a); \ -+ } while (0) -+#else -+#define ext3cow_debug(f, a...) do {} while (0) -+#endif -+ -+/* -+ * Special inodes numbers -+ */ -+#define EXT3COW_BAD_INO 1 /* Bad blocks inode */ -+#define EXT3COW_ROOT_INO 2 /* Root inode */ -+#define EXT3COW_BOOT_LOADER_INO 5 /* Boot loader inode */ -+#define EXT3COW_UNDEL_DIR_INO 6 /* Undelete directory inode */ -+#define EXT3COW_RESIZE_INO 7 /* Reserved group descriptors inode */ -+#define EXT3COW_JOURNAL_INO 8 /* Journal inode */ -+ -+/* First non-reserved inode for old ext3cow filesystems */ -+#define EXT3COW_GOOD_OLD_FIRST_INO 11 -+ -+/* -+ * Maximal count of links to a file -+ */ -+#define EXT3COW_LINK_MAX 32000 -+ -+/* For versioning -znjp */ -+#define EXT3COW_FLUX_TOKEN '@' -+/* Macros for scoping - in seconds -znjp */ -+#define ONEHOUR 3600 -+#define YESTERDAY 86400 -+#define ONEWEEK 604800 -+#define ONEMONTH 2419200 -+#define ONEYEAR 31449600 -+ -+/* -+ * Macro-instructions used to manage several block sizes -+ */ -+#define EXT3COW_MIN_BLOCK_SIZE 1024 -+#define EXT3COW_MAX_BLOCK_SIZE 4096 -+#define EXT3COW_MIN_BLOCK_LOG_SIZE 10 -+#ifdef __KERNEL__ -+# define EXT3COW_BLOCK_SIZE(s) ((s)->s_blocksize) -+#else -+# define EXT3COW_BLOCK_SIZE(s) (EXT3COW_MIN_BLOCK_SIZE << (s)->s_log_block_size) -+#endif -+//#define EXT3COW_ADDR_PER_BLOCK(s) (EXT3COW_BLOCK_SIZE(s) / sizeof (__u32)) -+#ifdef __KERNEL__ -+# define EXT3COW_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) -+#else -+# define EXT3COW_BLOCK_SIZE_BITS(s) ((s)->s_log_block_size + 10) -+#endif -+#ifdef __KERNEL__ -+#define EXT3COW_ADDR_PER_BLOCK_BITS(s) (EXT3COW_SB(s)->s_addr_per_block_bits) -+#define EXT3COW_INODE_SIZE(s) (EXT3COW_SB(s)->s_inode_size) -+#define EXT3COW_FIRST_INO(s) (EXT3COW_SB(s)->s_first_ino) -+#else -+#define EXT3COW_INODE_SIZE(s) (((s)->s_rev_level == EXT3COW_GOOD_OLD_REV) ? \ -+ EXT3COW_GOOD_OLD_INODE_SIZE : \ -+ (s)->s_inode_size) -+#define EXT3COW_FIRST_INO(s) (((s)->s_rev_level == EXT3COW_GOOD_OLD_REV) ? \ -+ EXT3COW_GOOD_OLD_FIRST_INO : \ -+ (s)->s_first_ino) -+#endif -+/* -+ * Macro-instructions for versioning support - znjp -+ */ -+#define EXT3COW_COWBITMAP_SIZE (sizeof(__u32) * 8) /* one word */ -+#define EXT3COW_COWBITMAPS_PER_IBLOCK(s) \ -+ (( (EXT3COW_BLOCK_SIZE(s) / sizeof(__u32)) / (EXT3COW_COWBITMAP_SIZE))) -+/* Accounts for COW bitmaps */ -+#define EXT3COW_ADDR_PER_BLOCK(s) ((EXT3COW_BLOCK_SIZE(s) / sizeof(__u32)) - EXT3COW_COWBITMAPS_PER_IBLOCK(s)) -+ -+/* -+ * Macro-instructions used to manage fragments -+ */ -+#define EXT3COW_MIN_FRAG_SIZE 1024 -+#define EXT3COW_MAX_FRAG_SIZE 4096 -+#define EXT3COW_MIN_FRAG_LOG_SIZE 10 -+#ifdef __KERNEL__ -+# define EXT3COW_FRAG_SIZE(s) (EXT3COW_SB(s)->s_frag_size) -+# define EXT3COW_FRAGS_PER_BLOCK(s) (EXT3COW_SB(s)->s_frags_per_block) -+#else -+# define EXT3COW_FRAG_SIZE(s) (EXT3COW_MIN_FRAG_SIZE << (s)->s_log_frag_size) -+# define EXT3COW_FRAGS_PER_BLOCK(s) (EXT3COW_BLOCK_SIZE(s) / EXT3COW_FRAG_SIZE(s)) -+#endif -+ -+/* -+ * Structure of a blocks group descriptor -+ */ -+struct ext3cow_group_desc -+{ -+ __le32 bg_block_bitmap; /* Blocks bitmap block */ -+ __le32 bg_inode_bitmap; /* Inodes bitmap block */ -+ __le32 bg_inode_table; /* Inodes table block */ -+ __le16 bg_free_blocks_count; /* Free blocks count */ -+ __le16 bg_free_inodes_count; /* Free inodes count */ -+ __le16 bg_used_dirs_count; /* Directories count */ -+ __u16 bg_pad; -+ __le32 bg_reserved[3]; -+}; -+ -+/* -+ * Macro-instructions used to manage group descriptors -+ */ -+#ifdef __KERNEL__ -+# define EXT3COW_BLOCKS_PER_GROUP(s) (EXT3COW_SB(s)->s_blocks_per_group) -+# define EXT3COW_DESC_PER_BLOCK(s) (EXT3COW_SB(s)->s_desc_per_block) -+# define EXT3COW_INODES_PER_GROUP(s) (EXT3COW_SB(s)->s_inodes_per_group) -+# define EXT3COW_DESC_PER_BLOCK_BITS(s) (EXT3COW_SB(s)->s_desc_per_block_bits) -+#else -+# define EXT3COW_BLOCKS_PER_GROUP(s) ((s)->s_blocks_per_group) -+# define EXT3COW_DESC_PER_BLOCK(s) (EXT3COW_BLOCK_SIZE(s) / sizeof (struct ext3cow_group_desc)) -+# define EXT3COW_INODES_PER_GROUP(s) ((s)->s_inodes_per_group) -+#endif -+ -+/* -+ * Constants relative to the data blocks -+ */ -+#define EXT3COW_NDIR_BLOCKS 12 -+#define EXT3COW_IND_BLOCK EXT3COW_NDIR_BLOCKS -+#define EXT3COW_DIND_BLOCK (EXT3COW_IND_BLOCK + 1) -+#define EXT3COW_TIND_BLOCK (EXT3COW_DIND_BLOCK + 1) -+#define EXT3COW_N_BLOCKS (EXT3COW_TIND_BLOCK + 1) -+ -+/* -+ * Inode flags -+ */ -+#define EXT3COW_SECRM_FL 0x00000001 /* Secure deletion */ -+#define EXT3COW_UNRM_FL 0x00000002 /* Undelete */ -+#define EXT3COW_COMPR_FL 0x00000004 /* Compress file */ -+#define EXT3COW_SYNC_FL 0x00000008 /* Synchronous updates */ -+#define EXT3COW_IMMUTABLE_FL 0x00000010 /* Immutable file */ -+#define EXT3COW_APPEND_FL 0x00000020 /* writes to file may only append */ -+#define EXT3COW_NODUMP_FL 0x00000040 /* do not dump file */ -+#define EXT3COW_NOATIME_FL 0x00000080 /* do not update atime */ -+/* Reserved for compression usage... */ -+#define EXT3COW_DIRTY_FL 0x00000100 -+#define EXT3COW_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */ -+#define EXT3COW_NOCOMPR_FL 0x00000400 /* Don't compress */ -+#define EXT3COW_ECOMPR_FL 0x00000800 /* Compression error */ -+/* End compression flags --- maybe not all used */ -+#define EXT3COW_INDEX_FL 0x00001000 /* hash-indexed directory */ -+#define EXT3COW_IMAGIC_FL 0x00002000 /* AFS directory */ -+#define EXT3COW_JOURNAL_DATA_FL 0x00004000 /* file data should be journaled */ -+#define EXT3COW_NOTAIL_FL 0x00008000 /* file tail should not be merged */ -+#define EXT3COW_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ -+#define EXT3COW_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ -+/* Used for Versioning - znjp */ -+#define EXT3COW_UNCHANGEABLE_FL 0x00040000 -+#define EXT3COW_UNVERSIONABLE_FL 0x00080000 -+#define EXT3COW_FAKEINODE_FL 0x00100000 -+#define EXT3COW_RESERVED_FL 0x80000000 /* reserved for ext3cow lib */ -+ -+#define EXT3COW_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ -+#define EXT3COW_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ -+ -+/* -+ * Inode dynamic state flags -+ */ -+#define EXT3COW_STATE_JDATA 0x00000001 /* journaled data exists */ -+#define EXT3COW_STATE_NEW 0x00000002 /* inode is newly created */ -+#define EXT3COW_STATE_XATTR 0x00000004 /* has in-inode xattrs */ -+ -+/* Used to pass group descriptor data when online resize is done */ -+struct ext3cow_new_group_input { -+ __u32 group; /* Group number for this data */ -+ __u32 block_bitmap; /* Absolute block number of block bitmap */ -+ __u32 inode_bitmap; /* Absolute block number of inode bitmap */ -+ __u32 inode_table; /* Absolute block number of inode table start */ -+ __u32 blocks_count; /* Total number of blocks in this group */ -+ __u16 reserved_blocks; /* Number of reserved blocks in this group */ -+ __u16 unused; -+}; -+ -+/* The struct ext3cow_new_group_input in kernel space, with free_blocks_count */ -+struct ext3cow_new_group_data { -+ __u32 group; -+ __u32 block_bitmap; -+ __u32 inode_bitmap; -+ __u32 inode_table; -+ __u32 blocks_count; -+ __u16 reserved_blocks; -+ __u16 unused; -+ __u32 free_blocks_count; -+}; -+ -+ -+/* -+ * ioctl commands -+ */ -+#define EXT3COW_IOC_GETFLAGS FS_IOC_GETFLAGS -+#define EXT3COW_IOC_SETFLAGS FS_IOC_SETFLAGS -+#define EXT3COW_IOC_GETVERSION _IOR('f', 3, long) -+#define EXT3COW_IOC_SETVERSION _IOW('f', 4, long) -+#define EXT3COW_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long) -+#define EXT3COW_IOC_GROUP_ADD _IOW('f', 8,struct ext3cow_new_group_input) -+#define EXT3COW_IOC_GETVERSION_OLD FS_IOC_GETVERSION -+#define EXT3COW_IOC_SETVERSION_OLD FS_IOC_SETVERSION -+#ifdef CONFIG_JBD_DEBUG -+#define EXT3COW_IOC_WAIT_FOR_READONLY _IOR('f', 99, long) -+#endif -+#define EXT3COW_IOC_GETRSVSZ _IOR('f', 5, long) -+#define EXT3COW_IOC_SETRSVSZ _IOW('f', 6, long) -+/* ioctls for versioning - znjp */ -+#define EXT3COW_IOC_TAKESNAPSHOT _IOR('f', 7, long) -+#define EXT3COW_IOC_GETEPOCH _IOR('f', 8, long) -+ -+/* -+ * ioctl commands in 32 bit emulation -+ */ -+#define EXT3COW_IOC32_GETFLAGS FS_IOC32_GETFLAGS -+#define EXT3COW_IOC32_SETFLAGS FS_IOC32_SETFLAGS -+#define EXT3COW_IOC32_GETVERSION _IOR('f', 3, int) -+#define EXT3COW_IOC32_SETVERSION _IOW('f', 4, int) -+#define EXT3COW_IOC32_GETRSVSZ _IOR('f', 5, int) -+#define EXT3COW_IOC32_SETRSVSZ _IOW('f', 6, int) -+#define EXT3COW_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int) -+#ifdef CONFIG_JBD_DEBUG -+#define EXT3COW_IOC32_WAIT_FOR_READONLY _IOR('f', 99, int) -+#endif -+#define EXT3COW_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION -+#define EXT3COW_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION -+ -+ -+/* -+ * Mount options -+ */ -+struct ext3cow_mount_options { -+ unsigned long s_mount_opt; -+ uid_t s_resuid; -+ gid_t s_resgid; -+ unsigned long s_commit_interval; -+#ifdef CONFIG_QUOTA -+ int s_jquota_fmt; -+ char *s_qf_names[MAXQUOTAS]; -+#endif -+}; -+ -+/* -+ * Structure of an inode on the disk -+ */ -+struct ext3cow_inode { -+ __le16 i_mode; /* File mode */ -+ __le16 i_uid; /* Low 16 bits of Owner Uid */ -+ __le32 i_size; /* Size in bytes */ -+ __le32 i_atime; /* Access time */ -+ __le32 i_ctime; /* Creation time */ -+ __le32 i_mtime; /* Modification time */ -+ __le32 i_dtime; /* Deletion Time */ -+ __le16 i_gid; /* Low 16 bits of Group Id */ -+ __le16 i_links_count; /* Links count */ -+ __le32 i_blocks; /* Blocks count */ -+ __le32 i_flags; /* File flags */ -+ union { -+ struct { -+ //__u32 l_i_reserved1; -+ /* Direct block COW bitmap -znjp */ -+ __u16 l_i_direct_cow_bitmap; -+ __u16 l_i_pad1; -+ } linux1; -+ struct { -+ __u32 h_i_translator; -+ } hurd1; -+ struct { -+ __u32 m_i_reserved1; -+ } masix1; -+ } osd1; /* OS dependent 1 */ -+ __le32 i_block[EXT3COW_N_BLOCKS];/* Pointers to blocks */ -+ __le32 i_generation; /* File version (for NFS) */ -+ __le32 i_file_acl; /* File ACL */ -+ __le32 i_dir_acl; /* Directory ACL */ -+ __le32 i_faddr; /* Fragment address */ -+ union { -+ struct { -+ //__u8 l_i_frag; /* Fragment number */ -+ //__u8 l_i_fsize; /* Fragment size */ -+ //__u16 i_pad1; -+ __le16 l_i_uid_high; /* these 2 fields */ -+ __le16 l_i_gid_high; /* were reserved2[0] */ -+ //__u32 l_i_reserved2; -+ /* Epoch number for versioning -znjp */ -+ __le32 l_i_epoch_number; -+ __u32 l_i_next_inode; -+ } linux2; -+ struct { -+ __u8 h_i_frag; /* Fragment number */ -+ __u8 h_i_fsize; /* Fragment size */ -+ __u16 h_i_mode_high; -+ __u16 h_i_uid_high; -+ __u16 h_i_gid_high; -+ __u32 h_i_author; -+ } hurd2; -+ struct { -+ __u8 m_i_frag; /* Fragment number */ -+ __u8 m_i_fsize; /* Fragment size */ -+ __u16 m_pad1; -+ __u32 m_i_reserved2[2]; -+ } masix2; -+ } osd2; /* OS dependent 2 */ -+ __le16 i_extra_isize; -+ __le16 i_pad1; -+}; -+ -+#define i_size_high i_dir_acl -+ -+#if defined(__KERNEL__) || defined(__linux__) -+/* For versioning -znjp */ -+//#define i_reserved1 osd1.linux1.l_i_reserved1 -+#define i_cowbitmap osd1.linux1.l_i_direct_cow_bitmap -+//#define i_frag osd2.linux2.l_i_frag -+//#define i_fsize osd2.linux2.l_i_fsize -+#define i_uid_low i_uid -+#define i_gid_low i_gid -+/* For versioning -znjp */ -+#define i_uid_high osd2.linux2.l_i_uid_high -+#define i_gid_high osd2.linux2.l_i_gid_high -+//#define i_reserved2 osd2.linux2.l_i_reserved2 -+#define i_epch_number osd2.linux2.l_i_epoch_number -+#define i_nxt_inode osd2.linux2.l_i_next_inode -+ -+#elif defined(__GNU__) -+ -+#define i_translator osd1.hurd1.h_i_translator -+#define i_frag osd2.hurd2.h_i_frag; -+#define i_fsize osd2.hurd2.h_i_fsize; -+#define i_uid_high osd2.hurd2.h_i_uid_high -+#define i_gid_high osd2.hurd2.h_i_gid_high -+#define i_author osd2.hurd2.h_i_author -+ -+#elif defined(__masix__) -+ -+#define i_reserved1 osd1.masix1.m_i_reserved1 -+#define i_frag osd2.masix2.m_i_frag -+#define i_fsize osd2.masix2.m_i_fsize -+#define i_reserved2 osd2.masix2.m_i_reserved2 -+ -+#endif /* defined(__KERNEL__) || defined(__linux__) */ -+ -+/* -+ * File system states -+ */ -+#define EXT3COW_VALID_FS 0x0001 /* Unmounted cleanly */ -+#define EXT3COW_ERROR_FS 0x0002 /* Errors detected */ -+#define EXT3COW_ORPHAN_FS 0x0004 /* Orphans being recovered */ -+ -+/* -+ * Mount flags -+ */ -+#define EXT3COW_MOUNT_CHECK 0x00001 /* Do mount-time checks */ -+#define EXT3COW_MOUNT_OLDALLOC 0x00002 /* Don't use the new Orlov allocator */ -+#define EXT3COW_MOUNT_GRPID 0x00004 /* Create files with directory's group */ -+#define EXT3COW_MOUNT_DEBUG 0x00008 /* Some debugging messages */ -+#define EXT3COW_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */ -+#define EXT3COW_MOUNT_ERRORS_RO 0x00020 /* Remount fs ro on errors */ -+#define EXT3COW_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */ -+#define EXT3COW_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */ -+#define EXT3COW_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/ -+#define EXT3COW_MOUNT_ABORT 0x00200 /* Fatal error detected */ -+#define EXT3COW_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */ -+#define EXT3COW_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */ -+#define EXT3COW_MOUNT_ORDERED_DATA 0x00800 /* Flush data before commit */ -+#define EXT3COW_MOUNT_WRITEBACK_DATA 0x00C00 /* No data ordering */ -+#define EXT3COW_MOUNT_UPDATE_JOURNAL 0x01000 /* Update the journal format */ -+#define EXT3COW_MOUNT_NO_UID32 0x02000 /* Disable 32-bit UIDs */ -+#define EXT3COW_MOUNT_XATTR_USER 0x04000 /* Extended user attributes */ -+#define EXT3COW_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */ -+#define EXT3COW_MOUNT_RESERVATION 0x10000 /* Preallocation */ -+#define EXT3COW_MOUNT_BARRIER 0x20000 /* Use block barriers */ -+#define EXT3COW_MOUNT_NOBH 0x40000 /* No bufferheads */ -+#define EXT3COW_MOUNT_QUOTA 0x80000 /* Some quota option set */ -+#define EXT3COW_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ -+#define EXT3COW_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ -+ -+/* Compatibility, for having both ext2_fs.h and ext3cow_fs.h included at once */ -+#ifndef _LINUX_EXT2_FS_H -+#define clear_opt(o, opt) o &= ~EXT3COW_MOUNT_##opt -+#define set_opt(o, opt) o |= EXT3COW_MOUNT_##opt -+#define test_opt(sb, opt) (EXT3COW_SB(sb)->s_mount_opt & \ -+ EXT3COW_MOUNT_##opt) -+#else -+#define EXT2_MOUNT_NOLOAD EXT3COW_MOUNT_NOLOAD -+#define EXT2_MOUNT_ABORT EXT3COW_MOUNT_ABORT -+#define EXT2_MOUNT_DATA_FLAGS EXT3COW_MOUNT_DATA_FLAGS -+#endif -+ -+#define ext3cow_set_bit ext2_set_bit -+#define ext3cow_set_bit_atomic ext2_set_bit_atomic -+#define ext3cow_clear_bit ext2_clear_bit -+#define ext3cow_clear_bit_atomic ext2_clear_bit_atomic -+#define ext3cow_test_bit ext2_test_bit -+#define ext3cow_find_first_zero_bit ext2_find_first_zero_bit -+#define ext3cow_find_next_zero_bit ext2_find_next_zero_bit -+ -+/* -+ * Maximal mount counts between two filesystem checks -+ */ -+#define EXT3COW_DFL_MAX_MNT_COUNT 20 /* Allow 20 mounts */ -+#define EXT3COW_DFL_CHECKINTERVAL 0 /* Don't use interval check */ -+ -+/* -+ * Behaviour when detecting errors -+ */ -+#define EXT3COW_ERRORS_CONTINUE 1 /* Continue execution */ -+#define EXT3COW_ERRORS_RO 2 /* Remount fs read-only */ -+#define EXT3COW_ERRORS_PANIC 3 /* Panic */ -+#define EXT3COW_ERRORS_DEFAULT EXT3COW_ERRORS_CONTINUE -+ -+/* -+ * Structure of the super block -+ */ -+struct ext3cow_super_block { -+/*00*/ __le32 s_inodes_count; /* Inodes count */ -+ __le32 s_blocks_count; /* Blocks count */ -+ __le32 s_r_blocks_count; /* Reserved blocks count */ -+ __le32 s_free_blocks_count; /* Free blocks count */ -+/*10*/ __le32 s_free_inodes_count; /* Free inodes count */ -+ __le32 s_first_data_block; /* First Data Block */ -+ __le32 s_log_block_size; /* Block size */ -+ __le32 s_log_frag_size; /* Fragment size */ -+/*20*/ __le32 s_blocks_per_group; /* # Blocks per group */ -+ __le32 s_frags_per_group; /* # Fragments per group */ -+ __le32 s_inodes_per_group; /* # Inodes per group */ -+ __le32 s_mtime; /* Mount time */ -+/*30*/ __le32 s_wtime; /* Write time */ -+ __le16 s_mnt_count; /* Mount count */ -+ __le16 s_max_mnt_count; /* Maximal mount count */ -+ __le16 s_magic; /* Magic signature */ -+ __le16 s_state; /* File system state */ -+ __le16 s_errors; /* Behaviour when detecting errors */ -+ __le16 s_minor_rev_level; /* minor revision level */ -+/*40*/ __le32 s_lastcheck; /* time of last check */ -+ __le32 s_checkinterval; /* max. time between checks */ -+ __le32 s_creator_os; /* OS */ -+ __le32 s_rev_level; /* Revision level */ -+/*50*/ __le16 s_def_resuid; /* Default uid for reserved blocks */ -+ __le16 s_def_resgid; /* Default gid for reserved blocks */ -+ /* -+ * These fields are for EXT3COW_DYNAMIC_REV superblocks only. -+ * -+ * Note: the difference between the compatible feature set and -+ * the incompatible feature set is that if there is a bit set -+ * in the incompatible feature set that the kernel doesn't -+ * know about, it should refuse to mount the filesystem. -+ * -+ * e2fsck's requirements are more strict; if it doesn't know -+ * about a feature in either the compatible or incompatible -+ * feature set, it must abort and not try to meddle with -+ * things it doesn't understand... -+ */ -+ __le32 s_first_ino; /* First non-reserved inode */ -+ __le16 s_inode_size; /* size of inode structure */ -+ __le16 s_block_group_nr; /* block group # of this superblock */ -+ __le32 s_feature_compat; /* compatible feature set */ -+/*60*/ __le32 s_feature_incompat; /* incompatible feature set */ -+ __le32 s_feature_ro_compat; /* readonly-compatible feature set */ -+/*68*/ __u8 s_uuid[16]; /* 128-bit uuid for volume */ -+/*78*/ char s_volume_name[16]; /* volume name */ -+/*88*/ char s_last_mounted[64]; /* directory where last mounted */ -+/*C8*/ __le32 s_algorithm_usage_bitmap; /* For compression */ -+ /* -+ * Performance hints. Directory preallocation should only -+ * happen if the EXT3COW_FEATURE_COMPAT_DIR_PREALLOC flag is on. -+ */ -+ __u8 s_prealloc_blocks; /* Nr of blocks to try to preallocate*/ -+ __u8 s_prealloc_dir_blocks; /* Nr to preallocate for dirs */ -+ __le16 s_reserved_gdt_blocks; /* Per group desc for online growth */ -+ /* -+ * Journaling support valid if EXT3COW_FEATURE_COMPAT_HAS_JOURNAL set. -+ */ -+/*D0*/ __u8 s_journal_uuid[16]; /* uuid of journal superblock */ -+/*E0*/ __le32 s_journal_inum; /* inode number of journal file */ -+ __le32 s_journal_dev; /* device number of journal file */ -+ __le32 s_last_orphan; /* start of list of inodes to delete */ -+ __le32 s_hash_seed[4]; /* HTREE hash seed */ -+ __u8 s_def_hash_version; /* Default hash version to use */ -+ __u8 s_reserved_char_pad; -+ __u16 s_reserved_word_pad; -+ __le32 s_default_mount_opts; -+ __le32 s_first_meta_bg; /* First metablock block group */ -+ /* Added for version - znjp */ -+ __le32 s_epoch_number; -+ __u32 s_reserved[189]; /* Padding to the end of the block */ -+}; -+ -+#ifdef __KERNEL__ -+#include -+#include -+static inline struct ext3cow_sb_info * EXT3COW_SB(struct super_block *sb) -+{ -+ return sb->s_fs_info; -+} -+static inline struct ext3cow_inode_info *EXT3COW_I(struct inode *inode) -+{ -+ return container_of(inode, struct ext3cow_inode_info, vfs_inode); -+} -+ -+static inline int ext3cow_valid_inum(struct super_block *sb, unsigned long ino) -+{ -+ return ino == EXT3COW_ROOT_INO || -+ ino == EXT3COW_JOURNAL_INO || -+ ino == EXT3COW_RESIZE_INO || -+ (ino >= EXT3COW_FIRST_INO(sb) && -+ ino <= le32_to_cpu(EXT3COW_SB(sb)->s_es->s_inodes_count)); -+} -+#else -+/* Assume that user mode programs are passing in an ext3cowfs superblock, not -+ * a kernel struct super_block. This will allow us to call the feature-test -+ * macros from user land. */ -+#define EXT3COW_SB(sb) (sb) -+#endif -+ -+#define NEXT_ORPHAN(inode) EXT3COW_I(inode)->i_dtime -+ -+/* -+ * Codes for operating systems -+ */ -+#define EXT3COW_OS_LINUX 0 -+#define EXT3COW_OS_HURD 1 -+#define EXT3COW_OS_MASIX 2 -+#define EXT3COW_OS_FREEBSD 3 -+#define EXT3COW_OS_LITES 4 -+ -+/* -+ * Revision levels -+ */ -+#define EXT3COW_GOOD_OLD_REV 0 /* The good old (original) format */ -+#define EXT3COW_DYNAMIC_REV 1 /* V2 format w/ dynamic inode sizes */ -+ -+#define EXT3COW_CURRENT_REV EXT3COW_GOOD_OLD_REV -+#define EXT3COW_MAX_SUPP_REV EXT3COW_DYNAMIC_REV -+ -+#define EXT3COW_GOOD_OLD_INODE_SIZE 128 -+ -+/* -+ * Feature set definitions -+ */ -+ -+#define EXT3COW_HAS_COMPAT_FEATURE(sb,mask) \ -+ ( EXT3COW_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask) ) -+#define EXT3COW_HAS_RO_COMPAT_FEATURE(sb,mask) \ -+ ( EXT3COW_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask) ) -+#define EXT3COW_HAS_INCOMPAT_FEATURE(sb,mask) \ -+ ( EXT3COW_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask) ) -+#define EXT3COW_SET_COMPAT_FEATURE(sb,mask) \ -+ EXT3COW_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask) -+#define EXT3COW_SET_RO_COMPAT_FEATURE(sb,mask) \ -+ EXT3COW_SB(sb)->s_es->s_feature_ro_compat |= cpu_to_le32(mask) -+#define EXT3COW_SET_INCOMPAT_FEATURE(sb,mask) \ -+ EXT3COW_SB(sb)->s_es->s_feature_incompat |= cpu_to_le32(mask) -+#define EXT3COW_CLEAR_COMPAT_FEATURE(sb,mask) \ -+ EXT3COW_SB(sb)->s_es->s_feature_compat &= ~cpu_to_le32(mask) -+#define EXT3COW_CLEAR_RO_COMPAT_FEATURE(sb,mask) \ -+ EXT3COW_SB(sb)->s_es->s_feature_ro_compat &= ~cpu_to_le32(mask) -+#define EXT3COW_CLEAR_INCOMPAT_FEATURE(sb,mask) \ -+ EXT3COW_SB(sb)->s_es->s_feature_incompat &= ~cpu_to_le32(mask) -+ -+#define EXT3COW_FEATURE_COMPAT_DIR_PREALLOC 0x0001 -+#define EXT3COW_FEATURE_COMPAT_IMAGIC_INODES 0x0002 -+#define EXT3COW_FEATURE_COMPAT_HAS_JOURNAL 0x0004 -+#define EXT3COW_FEATURE_COMPAT_EXT_ATTR 0x0008 -+#define EXT3COW_FEATURE_COMPAT_RESIZE_INODE 0x0010 -+#define EXT3COW_FEATURE_COMPAT_DIR_INDEX 0x0020 -+ -+#define EXT3COW_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001 -+#define EXT3COW_FEATURE_RO_COMPAT_LARGE_FILE 0x0002 -+#define EXT3COW_FEATURE_RO_COMPAT_BTREE_DIR 0x0004 -+ -+#define EXT3COW_FEATURE_INCOMPAT_COMPRESSION 0x0001 -+#define EXT3COW_FEATURE_INCOMPAT_FILETYPE 0x0002 -+#define EXT3COW_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */ -+#define EXT3COW_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */ -+#define EXT3COW_FEATURE_INCOMPAT_META_BG 0x0010 -+ -+#define EXT3COW_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR -+#define EXT3COW_FEATURE_INCOMPAT_SUPP (EXT3COW_FEATURE_INCOMPAT_FILETYPE| \ -+ EXT3COW_FEATURE_INCOMPAT_RECOVER| \ -+ EXT3COW_FEATURE_INCOMPAT_META_BG) -+#define EXT3COW_FEATURE_RO_COMPAT_SUPP (EXT3COW_FEATURE_RO_COMPAT_SPARSE_SUPER| \ -+ EXT3COW_FEATURE_RO_COMPAT_LARGE_FILE| \ -+ EXT3COW_FEATURE_RO_COMPAT_BTREE_DIR) -+ -+/* -+ * Default values for user and/or group using reserved blocks -+ */ -+#define EXT3COW_DEF_RESUID 0 -+#define EXT3COW_DEF_RESGID 0 -+ -+/* -+ * Default mount options -+ */ -+#define EXT3COW_DEFM_DEBUG 0x0001 -+#define EXT3COW_DEFM_BSDGROUPS 0x0002 -+#define EXT3COW_DEFM_XATTR_USER 0x0004 -+#define EXT3COW_DEFM_ACL 0x0008 -+#define EXT3COW_DEFM_UID16 0x0010 -+#define EXT3COW_DEFM_JMODE 0x0060 -+#define EXT3COW_DEFM_JMODE_DATA 0x0020 -+#define EXT3COW_DEFM_JMODE_ORDERED 0x0040 -+#define EXT3COW_DEFM_JMODE_WBACK 0x0060 -+ -+/* -+ * Structure of a directory entry -+ */ -+#define EXT3COW_NAME_LEN 255 -+ -+struct ext3cow_dir_entry { -+ __le32 inode; /* Inode number */ -+ __le16 rec_len; /* Directory entry length */ -+ __le16 name_len; /* Name length */ -+ char name[EXT3COW_NAME_LEN]; /* File name */ -+}; -+ -+/* -+ * The new version of the directory entry. Since EXT3COW structures are -+ * stored in intel byte order, and the name_len field could never be -+ * bigger than 255 chars, it's safe to reclaim the extra byte for the -+ * file_type field. -+ */ -+struct ext3cow_dir_entry_2 { -+ __le32 inode; /* Inode number */ -+ __le16 rec_len; /* Directory entry length */ -+ __u8 name_len; /* Name length */ -+ __u8 file_type; -+ /* Added for versioning - znjp */ -+ __u32 birth_epoch; -+ __u32 death_epoch; -+ char name[EXT3COW_NAME_LEN]; /* File name */ -+}; -+ -+/* -+ * Ext3 directory file types. Only the low 3 bits are used. The -+ * other bits are reserved for now. -+ */ -+#define EXT3COW_FT_UNKNOWN 0 -+#define EXT3COW_FT_REG_FILE 1 -+#define EXT3COW_FT_DIR 2 -+#define EXT3COW_FT_CHRDEV 3 -+#define EXT3COW_FT_BLKDEV 4 -+#define EXT3COW_FT_FIFO 5 -+#define EXT3COW_FT_SOCK 6 -+#define EXT3COW_FT_SYMLINK 7 -+ -+#define EXT3COW_FT_MAX 8 -+ -+/* Versioning macros - znjp */ -+#define EXT3COW_DIRENT_ALIVE 0 -+#define EXT3COW_IS_DIRENT_ALIVE(de) ((le32_to_cpu(de->death_epoch) == EXT3COW_DIRENT_ALIVE)) -+#define EXT3COW_IS_DIRENT_SCOPED(de, epoch) \ -+((le32_to_cpu(de->birth_epoch) <= epoch) && \ -+(EXT3COW_IS_DIRENT_ALIVE(de) || (!EXT3COW_IS_DIRENT_ALIVE(de) && \ -+le32_to_cpu(de->death_epoch) > epoch))) -+#define EXT3COW_I_EPOCHNUMBER(inode) (((unsigned int)EXT3COW_I(inode)->i_epoch_number)) -+#define EXT3COW_S_EPOCHNUMBER(sb) (((unsigned int)EXT3COW_SB(sb)->s_epoch_number)) -+#define EXT3COW_I_NEXT_INODE(inode) (((unsigned int)EXT3COW_I(inode)->i_next_inode)) -+#define EXT3COW_IS_UNVERSIONABLE(inode) (((unsigned int)EXT3COW_I(inode)->i_flags & EXT3COW_UNVERSIONABLE_FL)) -+#define EXT3COW_IS_UNCHANGEABLE(inode) (((unsigned int)EXT3COW_I(inode)->i_flags & EXT3COW_UNCHANGEABLE_FL)) -+#define EXT3COW_IS_FAKEINODE(inode) (((unsigned int)EXT3COW_I(inode)->i_flags & EXT3COW_FAKEINODE_FL)) -+ -+ -+/* -+ * EXT3COW_DIR_PAD defines the directory entries boundaries -+ * -+ * NOTE: It must be a multiple of 4 -+ */ -+#define EXT3COW_DIR_PAD 4 -+#define EXT3COW_DIR_ROUND (EXT3COW_DIR_PAD - 1) -+/* Added 8 to account for birth and death epochs -znjp */ -+#define EXT3COW_DIR_REC_LEN(name_len) (((name_len) + 16 + EXT3COW_DIR_ROUND) & \ -+ ~EXT3COW_DIR_ROUND) -+/* -+ * Hash Tree Directory indexing -+ * (c) Daniel Phillips, 2001 -+ */ -+ -+#ifdef CONFIG_EXT3COW_INDEX -+ #define is_dx(dir) (EXT3COW_HAS_COMPAT_FEATURE(dir->i_sb, \ -+ EXT3COW_FEATURE_COMPAT_DIR_INDEX) && \ -+ (EXT3COW_I(dir)->i_flags & EXT3COW_INDEX_FL)) -+#define EXT3COW_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT3COW_LINK_MAX) -+#define EXT3COW_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1) -+#else -+ #define is_dx(dir) 0 -+#define EXT3COW_DIR_LINK_MAX(dir) ((dir)->i_nlink >= EXT3COW_LINK_MAX) -+#define EXT3COW_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2) -+#endif -+ -+/* Legal values for the dx_root hash_version field: */ -+ -+#define DX_HASH_LEGACY 0 -+#define DX_HASH_HALF_MD4 1 -+#define DX_HASH_TEA 2 -+ -+#ifdef __KERNEL__ -+ -+/* hash info structure used by the directory hash */ -+struct dx_hash_info -+{ -+ u32 hash; -+ u32 minor_hash; -+ int hash_version; -+ u32 *seed; -+}; -+ -+#define EXT3COW_HTREE_EOF 0x7fffffff -+ -+/* -+ * Control parameters used by ext3cow_htree_next_block -+ */ -+#define HASH_NB_ALWAYS 1 -+ -+ -+/* -+ * Describe an inode's exact location on disk and in memory -+ */ -+struct ext3cow_iloc -+{ -+ struct buffer_head *bh; -+ unsigned long offset; -+ unsigned long block_group; -+}; -+ -+static inline struct ext3cow_inode *ext3cow_raw_inode(struct ext3cow_iloc *iloc) -+{ -+ return (struct ext3cow_inode *) (iloc->bh->b_data + iloc->offset); -+} -+ -+/* -+ * This structure is stuffed into the struct file's private_data field -+ * for directories. It is where we put information so that we can do -+ * readdir operations in hash tree order. -+ */ -+struct dir_private_info { -+ struct rb_root root; -+ struct rb_node *curr_node; -+ struct fname *extra_fname; -+ loff_t last_pos; -+ __u32 curr_hash; -+ __u32 curr_minor_hash; -+ __u32 next_hash; -+}; -+ -+/* calculate the first block number of the group */ -+static inline ext3cow_fsblk_t -+ext3cow_group_first_block_no(struct super_block *sb, unsigned long group_no) -+{ -+ return group_no * (ext3cow_fsblk_t)EXT3COW_BLOCKS_PER_GROUP(sb) + -+ le32_to_cpu(EXT3COW_SB(sb)->s_es->s_first_data_block); -+} -+ -+/* -+ * Special error return code only used by dx_probe() and its callers. -+ */ -+#define ERR_BAD_DX_DIR -75000 -+ -+/* -+ * Function prototypes -+ */ -+ -+/* -+ * Ok, these declarations are also in but none of the -+ * ext3cow source programs needs to include it so they are duplicated here. -+ */ -+# define NORET_TYPE /**/ -+# define ATTRIB_NORET __attribute__((noreturn)) -+# define NORET_AND noreturn, -+ -+/* balloc.c */ -+extern int ext3cow_bg_has_super(struct super_block *sb, int group); -+extern unsigned long ext3cow_bg_num_gdb(struct super_block *sb, int group); -+extern ext3cow_fsblk_t ext3cow_new_block (handle_t *handle, struct inode *inode, -+ ext3cow_fsblk_t goal, int *errp); -+extern ext3cow_fsblk_t ext3cow_new_blocks (handle_t *handle, struct inode *inode, -+ ext3cow_fsblk_t goal, unsigned long *count, int *errp); -+extern void ext3cow_free_blocks (handle_t *handle, struct inode *inode, -+ ext3cow_fsblk_t block, unsigned long count); -+extern void ext3cow_free_blocks_sb (handle_t *handle, struct super_block *sb, -+ ext3cow_fsblk_t block, unsigned long count, -+ unsigned long *pdquot_freed_blocks); -+extern ext3cow_fsblk_t ext3cow_count_free_blocks (struct super_block *); -+extern void ext3cow_check_blocks_bitmap (struct super_block *); -+extern struct ext3cow_group_desc * ext3cow_get_group_desc(struct super_block * sb, -+ unsigned int block_group, -+ struct buffer_head ** bh); -+extern int ext3cow_should_retry_alloc(struct super_block *sb, int *retries); -+extern void ext3cow_init_block_alloc_info(struct inode *); -+extern void ext3cow_rsv_window_add(struct super_block *sb, struct ext3cow_reserve_window_node *rsv); -+ -+ -+/* dir.c */ -+extern int ext3cow_check_dir_entry(const char *, struct inode *, -+ struct ext3cow_dir_entry_2 *, -+ struct buffer_head *, unsigned long); -+extern int ext3cow_htree_store_dirent(struct file *dir_file, __u32 hash, -+ __u32 minor_hash, -+ struct ext3cow_dir_entry_2 *dirent); -+extern void ext3cow_htree_free_dir_info(struct dir_private_info *p); -+ -+/* fsync.c */ -+extern int ext3cow_sync_file (struct file *, struct dentry *, int); -+ -+/* hash.c */ -+extern int ext3cowfs_dirhash(const char *name, int len, struct -+ dx_hash_info *hinfo); -+ -+/* ialloc.c */ -+extern struct inode * ext3cow_new_inode (handle_t *, struct inode *, int); -+extern void ext3cow_free_inode (handle_t *, struct inode *); -+extern struct inode * ext3cow_orphan_get (struct super_block *, unsigned long); -+extern unsigned long ext3cow_count_free_inodes (struct super_block *); -+extern unsigned long ext3cow_count_dirs (struct super_block *); -+extern void ext3cow_check_inodes_bitmap (struct super_block *); -+extern unsigned long ext3cow_count_free (struct buffer_head *, unsigned); -+ -+ -+/* inode.c */ -+int ext3cow_forget(handle_t *handle, int is_metadata, struct inode *inode, -+ struct buffer_head *bh, ext3cow_fsblk_t blocknr); -+struct buffer_head * ext3cow_getblk (handle_t *, struct inode *, long, int, int *); -+struct buffer_head * ext3cow_bread (handle_t *, struct inode *, int, int, int *); -+int ext3cow_get_blocks_handle(handle_t *handle, struct inode *inode, -+ sector_t iblock, unsigned long maxblocks, struct buffer_head *bh_result, -+ int create, int extend_disksize); -+ -+extern void ext3cow_read_inode (struct inode *); -+extern int ext3cow_write_inode (struct inode *, int); -+extern int ext3cow_setattr (struct dentry *, struct iattr *); -+extern void ext3cow_delete_inode (struct inode *); -+extern int ext3cow_sync_inode (handle_t *, struct inode *); -+extern void ext3cow_discard_reservation (struct inode *); -+extern void ext3cow_dirty_inode(struct inode *); -+extern int ext3cow_change_inode_journal_flag(struct inode *, int); -+extern int ext3cow_get_inode_loc(struct inode *, struct ext3cow_iloc *); -+extern void ext3cow_truncate (struct inode *); -+extern void ext3cow_set_inode_flags(struct inode *); -+extern void ext3cow_set_aops(struct inode *inode); -+ -+/* ioctl.c */ -+extern int ext3cow_ioctl (struct inode *, struct file *, unsigned int, -+ unsigned long); -+extern long ext3cow_compat_ioctl (struct file *, unsigned int, unsigned long); -+ -+/* namei.c */ -+extern int is_unchangeable(struct inode *, struct dentry *); -+extern int ext3cow_orphan_add(handle_t *, struct inode *); -+extern int ext3cow_orphan_del(handle_t *, struct inode *); -+extern int ext3cow_htree_fill_tree(struct file *dir_file, __u32 start_hash, -+ __u32 start_minor_hash, __u32 *next_hash); -+extern struct inode *ext3cow_fake_inode(struct inode *, unsigned int); -+extern int ext3cow_dup_inode(struct inode *, struct inode *); -+extern int ext3cow_reclaim_dup_inode(struct inode *, struct inode *); -+ -+/* resize.c */ -+extern int ext3cow_group_add(struct super_block *sb, -+ struct ext3cow_new_group_data *input); -+extern int ext3cow_group_extend(struct super_block *sb, -+ struct ext3cow_super_block *es, -+ ext3cow_fsblk_t n_blocks_count); -+ -+/* super.c */ -+extern void ext3cow_error (struct super_block *, const char *, const char *, ...) -+ __attribute__ ((format (printf, 3, 4))); -+extern void __ext3cow_std_error (struct super_block *, const char *, int); -+extern void ext3cow_abort (struct super_block *, const char *, const char *, ...) -+ __attribute__ ((format (printf, 3, 4))); -+extern void ext3cow_warning (struct super_block *, const char *, const char *, ...) -+ __attribute__ ((format (printf, 3, 4))); -+extern void ext3cow_update_dynamic_rev (struct super_block *sb); -+extern unsigned int ext3cow_take_snapshot(struct super_block *sb); -+ -+#define ext3cow_std_error(sb, errno) \ -+do { \ -+ if ((errno)) \ -+ __ext3cow_std_error((sb), __FUNCTION__, (errno)); \ -+} while (0) -+ -+/* -+ * Inodes and files operations -+ */ -+ -+/* dir.c */ -+extern const struct file_operations ext3cow_dir_operations; -+ -+/* file.c */ -+extern struct inode_operations ext3cow_file_inode_operations; -+extern const struct file_operations ext3cow_file_operations; -+ -+/* namei.c */ -+extern struct inode_operations ext3cow_dir_inode_operations; -+extern struct inode_operations ext3cow_special_inode_operations; -+ -+/* symlink.c */ -+extern struct inode_operations ext3cow_symlink_inode_operations; -+extern struct inode_operations ext3cow_fast_symlink_inode_operations; -+ -+ -+#endif /* __KERNEL__ */ -+ -+#endif /* _LINUX_EXT3COW_FS_H */ -diff -Naur linux-2.6.21.7/include/linux/ext3cow_fs_i.h linux-2.6.21.7_ext3cowPatched/include/linux/ext3cow_fs_i.h ---- linux-2.6.21.7/include/linux/ext3cow_fs_i.h 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.21.7_ext3cowPatched/include/linux/ext3cow_fs_i.h 2007-10-23 17:48:10.000000000 +0200 -@@ -0,0 +1,152 @@ -+/* -+ * linux/include/linux/ext3cow_fs_i.h -+ * -+ * Copyright (C) 1992, 1993, 1994, 1995 -+ * Remy Card (card@masi.ibp.fr) -+ * Laboratoire MASI - Institut Blaise Pascal -+ * Universite Pierre et Marie Curie (Paris VI) -+ * -+ * from -+ * -+ * linux/include/linux/minix_fs_i.h -+ * -+ * Copyright (C) 1991, 1992 Linus Torvalds -+ */ -+ -+#ifndef _LINUX_EXT3COW_FS_I -+#define _LINUX_EXT3COW_FS_I -+ -+#include -+#include -+#include -+#include -+ -+/* data type for block offset of block group */ -+typedef int ext3cow_grpblk_t; -+ -+/* data type for filesystem-wide blocks number */ -+typedef unsigned long ext3cow_fsblk_t; -+ -+#define E3FSBLK "%lu" -+ -+struct ext3cow_reserve_window { -+ ext3cow_fsblk_t _rsv_start; /* First byte reserved */ -+ ext3cow_fsblk_t _rsv_end; /* Last byte reserved or 0 */ -+}; -+ -+struct ext3cow_reserve_window_node { -+ struct rb_node rsv_node; -+ __u32 rsv_goal_size; -+ __u32 rsv_alloc_hit; -+ struct ext3cow_reserve_window rsv_window; -+}; -+ -+struct ext3cow_block_alloc_info { -+ /* information about reservation window */ -+ struct ext3cow_reserve_window_node rsv_window_node; -+ /* -+ * was i_next_alloc_block in ext3cow_inode_info -+ * is the logical (file-relative) number of the -+ * most-recently-allocated block in this file. -+ * We use this for detecting linearly ascending allocation requests. -+ */ -+ __u32 last_alloc_logical_block; -+ /* -+ * Was i_next_alloc_goal in ext3cow_inode_info -+ * is the *physical* companion to i_next_alloc_block. -+ * it the the physical block number of the block which was most-recentl -+ * allocated to this file. This give us the goal (target) for the next -+ * allocation when we detect linearly ascending requests. -+ */ -+ ext3cow_fsblk_t last_alloc_physical_block; -+}; -+ -+#define rsv_start rsv_window._rsv_start -+#define rsv_end rsv_window._rsv_end -+ -+/* -+ * third extended file system inode data in memory -+ */ -+struct ext3cow_inode_info { -+ __le32 i_data[15]; /* unconverted */ -+ __u32 i_flags; -+#ifdef EXT3COW_FRAGMENTS -+ __u32 i_faddr; -+ __u8 i_frag_no; -+ __u8 i_frag_size; -+#endif -+ ext3cow_fsblk_t i_file_acl; -+ __u32 i_dir_acl; -+ __u32 i_dtime; -+ -+ /* -+ * i_block_group is the number of the block group which contains -+ * this file's inode. Constant across the lifetime of the inode, -+ * it is ued for making block allocation decisions - we try to -+ * place a file's data blocks near its inode block, and new inodes -+ * near to their parent directory's inode. -+ */ -+ __u32 i_block_group; -+ __u32 i_state; /* Dynamic state flags for ext3cow */ -+ -+ /* block reservation info */ -+ struct ext3cow_block_alloc_info *i_block_alloc_info; -+ -+ __u32 i_dir_start_lookup; -+ -+ /* For versioning -znjp */ -+ __u16 i_cow_bitmap; -+ __u32 i_epoch_number; -+ __u32 i_next_inode; -+#ifdef CONFIG_EXT3COW_FS_XATTR -+ /* -+ * Extended attributes can be read independently of the main file -+ * data. Taking i_mutex even when reading would cause contention -+ * between readers of EAs and writers of regular file data, so -+ * instead we synchronize on xattr_sem when reading or changing -+ * EAs. -+ */ -+ struct rw_semaphore xattr_sem; -+#endif -+#ifdef CONFIG_EXT3COW_FS_POSIX_ACL -+ struct posix_acl *i_acl; -+ struct posix_acl *i_default_acl; -+#endif -+ -+ struct list_head i_orphan; /* unlinked but open inodes */ -+ -+ /* -+ * i_disksize keeps track of what the inode size is ON DISK, not -+ * in memory. During truncate, i_size is set to the new size by -+ * the VFS prior to calling ext3cow_truncate(), but the filesystem won't -+ * set i_disksize to 0 until the truncate is actually under way. -+ * -+ * The intent is that i_disksize always represents the blocks which -+ * are used by this file. This allows recovery to restart truncate -+ * on orphans if we crash during truncate. We actually write i_disksize -+ * into the on-disk inode when writing inodes out, instead of i_size. -+ * -+ * The only time when i_disksize and i_size may be different is when -+ * a truncate is in progress. The only things which change i_disksize -+ * are ext3cow_get_block (growth) and ext3cow_truncate (shrinkth). -+ */ -+ loff_t i_disksize; -+ -+ /* on-disk additional length */ -+ __u16 i_extra_isize; -+ -+ /* -+ * truncate_mutex is for serialising ext3cow_truncate() against -+ * ext3cow_getblock(). In the 2.4 ext2 design, great chunks of inode's -+ * data tree are chopped off during truncate. We can't do that in -+ * ext3cow because whenever we perform intermediate commits during -+ * truncate, the inode and all the metadata blocks *must* be in a -+ * consistent state which allows truncation of the orphans to restart -+ * during recovery. Hence we must fix the get_block-vs-truncate race -+ * by other means, so we have truncate_mutex. -+ */ -+ struct mutex truncate_mutex; -+ struct inode vfs_inode; -+}; -+ -+#endif /* _LINUX_EXT3COW_FS_I */ -diff -Naur linux-2.6.21.7/include/linux/ext3cow_fs_sb.h linux-2.6.21.7_ext3cowPatched/include/linux/ext3cow_fs_sb.h ---- linux-2.6.21.7/include/linux/ext3cow_fs_sb.h 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.21.7_ext3cowPatched/include/linux/ext3cow_fs_sb.h 2007-10-23 17:48:10.000000000 +0200 -@@ -0,0 +1,86 @@ -+/* -+ * linux/include/linux/ext3cow_fs_sb.h -+ * -+ * Copyright (C) 1992, 1993, 1994, 1995 -+ * Remy Card (card@masi.ibp.fr) -+ * Laboratoire MASI - Institut Blaise Pascal -+ * Universite Pierre et Marie Curie (Paris VI) -+ * -+ * from -+ * -+ * linux/include/linux/minix_fs_sb.h -+ * -+ * Copyright (C) 1991, 1992 Linus Torvalds -+ */ -+ -+#ifndef _LINUX_EXT3COW_FS_SB -+#define _LINUX_EXT3COW_FS_SB -+ -+#ifdef __KERNEL__ -+#include -+#include -+#include -+#include -+#endif -+#include -+ -+/* -+ * third extended-fs super-block data in memory -+ */ -+struct ext3cow_sb_info { -+ unsigned long s_frag_size; /* Size of a fragment in bytes */ -+ unsigned long s_frags_per_block;/* Number of fragments per block */ -+ unsigned long s_inodes_per_block;/* Number of inodes per block */ -+ unsigned long s_frags_per_group;/* Number of fragments in a group */ -+ unsigned long s_blocks_per_group;/* Number of blocks in a group */ -+ unsigned long s_inodes_per_group;/* Number of inodes in a group */ -+ unsigned long s_itb_per_group; /* Number of inode table blocks per group */ -+ unsigned long s_gdb_count; /* Number of group descriptor blocks */ -+ unsigned long s_desc_per_block; /* Number of group descriptors per block */ -+ unsigned long s_groups_count; /* Number of groups in the fs */ -+ struct buffer_head * s_sbh; /* Buffer containing the super block */ -+ struct ext3cow_super_block * s_es; /* Pointer to the super block in the buffer */ -+ struct buffer_head ** s_group_desc; -+ unsigned long s_mount_opt; -+ uid_t s_resuid; -+ gid_t s_resgid; -+ unsigned short s_mount_state; -+ unsigned short s_pad; -+ int s_addr_per_block_bits; -+ int s_desc_per_block_bits; -+ int s_inode_size; -+ int s_first_ino; -+ spinlock_t s_next_gen_lock; -+ u32 s_next_generation; -+ u32 s_hash_seed[4]; -+ int s_def_hash_version; -+ struct percpu_counter s_freeblocks_counter; -+ struct percpu_counter s_freeinodes_counter; -+ struct percpu_counter s_dirs_counter; -+ struct blockgroup_lock s_blockgroup_lock; -+ -+ /* root of the per fs reservation window tree */ -+ spinlock_t s_rsv_window_lock; -+ struct rb_root s_rsv_window_root; -+ struct ext3cow_reserve_window_node s_rsv_window_head; -+ -+ /* For versioning -znjp */ -+ u32 s_epoch_number; -+ -+ /* Journaling */ -+ struct inode * s_journal_inode; -+ struct journal_s * s_journal; -+ struct list_head s_orphan; -+ unsigned long s_commit_interval; -+ struct block_device *journal_bdev; -+#ifdef CONFIG_JBD_DEBUG -+ struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */ -+ wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */ -+#endif -+#ifdef CONFIG_QUOTA -+ char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ -+ int s_jquota_fmt; /* Format of quota to use */ -+#endif -+}; -+ -+#endif /* _LINUX_EXT3COW_FS_SB */ -diff -Naur linux-2.6.21.7/include/linux/ext3cow_jbd.h linux-2.6.21.7_ext3cowPatched/include/linux/ext3cow_jbd.h ---- linux-2.6.21.7/include/linux/ext3cow_jbd.h 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.21.7_ext3cowPatched/include/linux/ext3cow_jbd.h 2007-10-23 17:48:10.000000000 +0200 -@@ -0,0 +1,226 @@ -+/* -+ * linux/include/linux/ext3cow_jbd.h -+ * -+ * Written by Stephen C. Tweedie , 1999 -+ * -+ * Copyright 1998--1999 Red Hat corp --- All Rights Reserved -+ * -+ * This file is part of the Linux kernel and is made available under -+ * the terms of the GNU General Public License, version 2, or at your -+ * option, any later version, incorporated herein by reference. -+ * -+ * Ext3-specific journaling extensions. -+ */ -+ -+#ifndef _LINUX_EXT3COW_JBD_H -+#define _LINUX_EXT3COW_JBD_H -+ -+#include -+#include -+#include -+ -+#define EXT3COW_JOURNAL(inode) (EXT3COW_SB((inode)->i_sb)->s_journal) -+ -+/* Define the number of blocks we need to account to a transaction to -+ * modify one block of data. -+ * -+ * We may have to touch one inode, one bitmap buffer, up to three -+ * indirection blocks, the group and superblock summaries, and the data -+ * block to complete the transaction. */ -+ -+#define EXT3COW_SINGLEDATA_TRANS_BLOCKS 8U -+ -+/* Extended attribute operations touch at most two data buffers, -+ * two bitmap buffers, and two group summaries, in addition to the inode -+ * and the superblock, which are already accounted for. */ -+ -+#define EXT3COW_XATTR_TRANS_BLOCKS 6U -+ -+/* Define the minimum size for a transaction which modifies data. This -+ * needs to take into account the fact that we may end up modifying two -+ * quota files too (one for the group, one for the user quota). The -+ * superblock only gets updated once, of course, so don't bother -+ * counting that again for the quota updates. */ -+ -+#define EXT3COW_DATA_TRANS_BLOCKS(sb) (EXT3COW_SINGLEDATA_TRANS_BLOCKS + \ -+ EXT3COW_XATTR_TRANS_BLOCKS - 2 + \ -+ 2*EXT3COW_QUOTA_TRANS_BLOCKS(sb)) -+ -+/* Delete operations potentially hit one directory's namespace plus an -+ * entire inode, plus arbitrary amounts of bitmap/indirection data. Be -+ * generous. We can grow the delete transaction later if necessary. */ -+ -+#define EXT3COW_DELETE_TRANS_BLOCKS(sb) (2 * EXT3COW_DATA_TRANS_BLOCKS(sb) + 64) -+ -+/* Define an arbitrary limit for the amount of data we will anticipate -+ * writing to any given transaction. For unbounded transactions such as -+ * write(2) and truncate(2) we can write more than this, but we always -+ * start off at the maximum transaction size and grow the transaction -+ * optimistically as we go. */ -+ -+#define EXT3COW_MAX_TRANS_DATA 64U -+ -+/* We break up a large truncate or write transaction once the handle's -+ * buffer credits gets this low, we need either to extend the -+ * transaction or to start a new one. Reserve enough space here for -+ * inode, bitmap, superblock, group and indirection updates for at least -+ * one block, plus two quota updates. Quota allocations are not -+ * needed. */ -+ -+#define EXT3COW_RESERVE_TRANS_BLOCKS 12U -+ -+#define EXT3COW_INDEX_EXTRA_TRANS_BLOCKS 8 -+ -+#ifdef CONFIG_QUOTA -+/* Amount of blocks needed for quota update - we know that the structure was -+ * allocated so we need to update only inode+data */ -+#define EXT3COW_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 2 : 0) -+/* Amount of blocks needed for quota insert/delete - we do some block writes -+ * but inode, sb and group updates are done only once */ -+#define EXT3COW_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\ -+ (EXT3COW_SINGLEDATA_TRANS_BLOCKS-3)+3+DQUOT_INIT_REWRITE) : 0) -+#define EXT3COW_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\ -+ (EXT3COW_SINGLEDATA_TRANS_BLOCKS-3)+3+DQUOT_DEL_REWRITE) : 0) -+#else -+#define EXT3COW_QUOTA_TRANS_BLOCKS(sb) 0 -+#define EXT3COW_QUOTA_INIT_BLOCKS(sb) 0 -+#define EXT3COW_QUOTA_DEL_BLOCKS(sb) 0 -+#endif -+ -+int -+ext3cow_mark_iloc_dirty(handle_t *handle, -+ struct inode *inode, -+ struct ext3cow_iloc *iloc); -+ -+/* -+ * On success, We end up with an outstanding reference count against -+ * iloc->bh. This _must_ be cleaned up later. -+ */ -+ -+int ext3cow_reserve_inode_write(handle_t *handle, struct inode *inode, -+ struct ext3cow_iloc *iloc); -+ -+int ext3cow_mark_inode_dirty(handle_t *handle, struct inode *inode); -+ -+/* -+ * Wrapper functions with which ext3cow calls into JBD. The intent here is -+ * to allow these to be turned into appropriate stubs so ext3cow can control -+ * ext2 filesystems, so ext2+ext3cow systems only nee one fs. This work hasn't -+ * been done yet. -+ */ -+ -+static inline void ext3cow_journal_release_buffer(handle_t *handle, -+ struct buffer_head *bh) -+{ -+ journal_release_buffer(handle, bh); -+} -+ -+void ext3cow_journal_abort_handle(const char *caller, const char *err_fn, -+ struct buffer_head *bh, handle_t *handle, int err); -+ -+int __ext3cow_journal_get_undo_access(const char *where, handle_t *handle, -+ struct buffer_head *bh); -+ -+int __ext3cow_journal_get_write_access(const char *where, handle_t *handle, -+ struct buffer_head *bh); -+ -+int __ext3cow_journal_forget(const char *where, handle_t *handle, -+ struct buffer_head *bh); -+ -+int __ext3cow_journal_revoke(const char *where, handle_t *handle, -+ unsigned long blocknr, struct buffer_head *bh); -+ -+int __ext3cow_journal_get_create_access(const char *where, -+ handle_t *handle, struct buffer_head *bh); -+ -+int __ext3cow_journal_dirty_metadata(const char *where, -+ handle_t *handle, struct buffer_head *bh); -+ -+#define ext3cow_journal_get_undo_access(handle, bh) \ -+ __ext3cow_journal_get_undo_access(__FUNCTION__, (handle), (bh)) -+#define ext3cow_journal_get_write_access(handle, bh) \ -+ __ext3cow_journal_get_write_access(__FUNCTION__, (handle), (bh)) -+#define ext3cow_journal_revoke(handle, blocknr, bh) \ -+ __ext3cow_journal_revoke(__FUNCTION__, (handle), (blocknr), (bh)) -+#define ext3cow_journal_get_create_access(handle, bh) \ -+ __ext3cow_journal_get_create_access(__FUNCTION__, (handle), (bh)) -+#define ext3cow_journal_dirty_metadata(handle, bh) \ -+ __ext3cow_journal_dirty_metadata(__FUNCTION__, (handle), (bh)) -+#define ext3cow_journal_forget(handle, bh) \ -+ __ext3cow_journal_forget(__FUNCTION__, (handle), (bh)) -+ -+int ext3cow_journal_dirty_data(handle_t *handle, struct buffer_head *bh); -+ -+handle_t *ext3cow_journal_start_sb(struct super_block *sb, int nblocks); -+int __ext3cow_journal_stop(const char *where, handle_t *handle); -+ -+static inline handle_t *ext3cow_journal_start(struct inode *inode, int nblocks) -+{ -+ return ext3cow_journal_start_sb(inode->i_sb, nblocks); -+} -+ -+#define ext3cow_journal_stop(handle) \ -+ __ext3cow_journal_stop(__FUNCTION__, (handle)) -+ -+static inline handle_t *ext3cow_journal_current_handle(void) -+{ -+ return journal_current_handle(); -+} -+ -+static inline int ext3cow_journal_extend(handle_t *handle, int nblocks) -+{ -+ return journal_extend(handle, nblocks); -+} -+ -+static inline int ext3cow_journal_restart(handle_t *handle, int nblocks) -+{ -+ return journal_restart(handle, nblocks); -+} -+ -+static inline int ext3cow_journal_blocks_per_page(struct inode *inode) -+{ -+ return journal_blocks_per_page(inode); -+} -+ -+static inline int ext3cow_journal_force_commit(journal_t *journal) -+{ -+ return journal_force_commit(journal); -+} -+ -+/* super.c */ -+int ext3cow_force_commit(struct super_block *sb); -+ -+static inline int ext3cow_should_journal_data(struct inode *inode) -+{ -+ if (!S_ISREG(inode->i_mode)) -+ return 1; -+ if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3COW_MOUNT_JOURNAL_DATA) -+ return 1; -+ if (EXT3COW_I(inode)->i_flags & EXT3COW_JOURNAL_DATA_FL) -+ return 1; -+ return 0; -+} -+ -+static inline int ext3cow_should_order_data(struct inode *inode) -+{ -+ if (!S_ISREG(inode->i_mode)) -+ return 0; -+ if (EXT3COW_I(inode)->i_flags & EXT3COW_JOURNAL_DATA_FL) -+ return 0; -+ if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3COW_MOUNT_ORDERED_DATA) -+ return 1; -+ return 0; -+} -+ -+static inline int ext3cow_should_writeback_data(struct inode *inode) -+{ -+ if (!S_ISREG(inode->i_mode)) -+ return 0; -+ if (EXT3COW_I(inode)->i_flags & EXT3COW_JOURNAL_DATA_FL) -+ return 0; -+ if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3COW_MOUNT_WRITEBACK_DATA) -+ return 1; -+ return 0; -+} -+ -+#endif /* _LINUX_EXT3COW_JBD_H */ -diff -Naur linux-2.6.21.7/include/linux/magic.h linux-2.6.21.7_ext3cowPatched/include/linux/magic.h ---- linux-2.6.21.7/include/linux/magic.h 2007-08-04 18:11:13.000000000 +0200 -+++ linux-2.6.21.7_ext3cowPatched/include/linux/magic.h 2007-10-23 17:46:52.000000000 +0200 -@@ -9,6 +9,7 @@ - #define EFS_SUPER_MAGIC 0x414A53 - #define EXT2_SUPER_MAGIC 0xEF53 - #define EXT3_SUPER_MAGIC 0xEF53 -+#define EXT3COW_SUPER_MAGIC 0xEF53 - #define EXT4_SUPER_MAGIC 0xEF53 - #define HPFS_SUPER_MAGIC 0xf995e849 - #define ISOFS_SUPER_MAGIC 0x9660