Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6

This commit is contained in:
David Woodhouse
2007-10-13 14:58:23 +01:00
4422 changed files with 315270 additions and 98702 deletions

View File

@@ -8,6 +8,7 @@
#include <linux/inetdevice.h>
#include <linux/netdevice.h>
#include <linux/if_arp.h>
#include <net/net_namespace.h>
#include "internal.h"
/*
@@ -23,7 +24,7 @@ int afs_get_MAC_address(u8 *mac, size_t maclen)
BUG();
rtnl_lock();
dev = __dev_getfirstbyhwtype(ARPHRD_ETHER);
dev = __dev_getfirstbyhwtype(&init_net, ARPHRD_ETHER);
if (dev) {
memcpy(mac, dev->dev_addr, maclen);
ret = 0;
@@ -47,7 +48,7 @@ int afs_get_ipv4_interfaces(struct afs_interface *bufs, size_t maxbufs,
ASSERT(maxbufs > 0);
rtnl_lock();
for_each_netdev(dev) {
for_each_netdev(&init_net, dev) {
if (dev->type == ARPHRD_LOOPBACK && !wantloopback)
continue;
idev = __in_dev_get_rtnl(dev);

View File

@@ -1514,9 +1514,6 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
int thread_status_size = 0;
elf_addr_t *auxv;
unsigned long mm_flags;
#ifdef ELF_CORE_WRITE_EXTRA_NOTES
int extra_notes_size;
#endif
/*
* We no longer stop all VM operations.
@@ -1645,10 +1642,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
sz += thread_status_size;
#ifdef ELF_CORE_WRITE_EXTRA_NOTES
extra_notes_size = ELF_CORE_EXTRA_NOTES_SIZE;
sz += extra_notes_size;
#endif
sz += elf_coredump_extra_notes_size();
fill_elf_note_phdr(&phdr, sz, offset);
offset += sz;
@@ -1698,10 +1692,8 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
if (!writenote(notes + i, file, &foffset))
goto end_coredump;
#ifdef ELF_CORE_WRITE_EXTRA_NOTES
ELF_CORE_WRITE_EXTRA_NOTES;
foffset += extra_notes_size;
#endif
if (elf_coredump_extra_notes_write(file, &foffset))
goto end_coredump;
/* write out the thread status notes section */
list_for_each(t, &thread_list) {

View File

@@ -798,13 +798,9 @@ void bio_unmap_user(struct bio *bio)
bio_put(bio);
}
static int bio_map_kern_endio(struct bio *bio, unsigned int bytes_done, int err)
static void bio_map_kern_endio(struct bio *bio, int err)
{
if (bio->bi_size)
return 1;
bio_put(bio);
return 0;
}
@@ -1002,34 +998,26 @@ void bio_check_pages_dirty(struct bio *bio)
/**
* bio_endio - end I/O on a bio
* @bio: bio
* @bytes_done: number of bytes completed
* @error: error, if any
*
* Description:
* bio_endio() will end I/O on @bytes_done number of bytes. This may be
* just a partial part of the bio, or it may be the whole bio. bio_endio()
* is the preferred way to end I/O on a bio, it takes care of decrementing
* bi_size and clearing BIO_UPTODATE on error. @error is 0 on success, and
* and one of the established -Exxxx (-EIO, for instance) error values in
* case something went wrong. Noone should call bi_end_io() directly on
* a bio unless they own it and thus know that it has an end_io function.
* bio_endio() will end I/O on the whole bio. bio_endio() is the
* preferred way to end I/O on a bio, it takes care of clearing
* BIO_UPTODATE on error. @error is 0 on success, and and one of the
* established -Exxxx (-EIO, for instance) error values in case
* something went wrong. Noone should call bi_end_io() directly on a
* bio unless they own it and thus know that it has an end_io
* function.
**/
void bio_endio(struct bio *bio, unsigned int bytes_done, int error)
void bio_endio(struct bio *bio, int error)
{
if (error)
clear_bit(BIO_UPTODATE, &bio->bi_flags);
if (unlikely(bytes_done > bio->bi_size)) {
printk("%s: want %u bytes done, only %u left\n", __FUNCTION__,
bytes_done, bio->bi_size);
bytes_done = bio->bi_size;
}
bio->bi_size -= bytes_done;
bio->bi_sector += (bytes_done >> 9);
else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
error = -EIO;
if (bio->bi_end_io)
bio->bi_end_io(bio, bytes_done, error);
bio->bi_end_io(bio, error);
}
void bio_pair_release(struct bio_pair *bp)
@@ -1037,37 +1025,29 @@ void bio_pair_release(struct bio_pair *bp)
if (atomic_dec_and_test(&bp->cnt)) {
struct bio *master = bp->bio1.bi_private;
bio_endio(master, master->bi_size, bp->error);
bio_endio(master, bp->error);
mempool_free(bp, bp->bio2.bi_private);
}
}
static int bio_pair_end_1(struct bio * bi, unsigned int done, int err)
static void bio_pair_end_1(struct bio *bi, int err)
{
struct bio_pair *bp = container_of(bi, struct bio_pair, bio1);
if (err)
bp->error = err;
if (bi->bi_size)
return 1;
bio_pair_release(bp);
return 0;
}
static int bio_pair_end_2(struct bio * bi, unsigned int done, int err)
static void bio_pair_end_2(struct bio *bi, int err)
{
struct bio_pair *bp = container_of(bi, struct bio_pair, bio2);
if (err)
bp->error = err;
if (bi->bi_size)
return 1;
bio_pair_release(bp);
return 0;
}
/*

View File

@@ -172,7 +172,7 @@ blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
}
#if 0
static int blk_end_aio(struct bio *bio, unsigned int bytes_done, int error)
static void blk_end_aio(struct bio *bio, int error)
{
struct kiocb *iocb = bio->bi_private;
atomic_t *bio_count = &iocb->ki_bio_count;

View File

@@ -2634,13 +2634,10 @@ sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
return tmp.b_blocknr;
}
static int end_bio_bh_io_sync(struct bio *bio, unsigned int bytes_done, int err)
static void end_bio_bh_io_sync(struct bio *bio, int err)
{
struct buffer_head *bh = bio->bi_private;
if (bio->bi_size)
return 1;
if (err == -EOPNOTSUPP) {
set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
set_bit(BH_Eopnotsupp, &bh->b_state);
@@ -2648,7 +2645,6 @@ static int end_bio_bh_io_sync(struct bio *bio, unsigned int bytes_done, int err)
bh->b_end_io(bh, test_bit(BIO_UPTODATE, &bio->bi_flags));
bio_put(bio);
return 0;
}
int submit_bh(int rw, struct buffer_head * bh)

View File

@@ -21,7 +21,6 @@
#include <linux/if.h>
#include <linux/if_bridge.h>
#include <linux/slab.h>
#include <linux/hdreg.h>
#include <linux/raid/md.h>
#include <linux/kd.h>
#include <linux/dirent.h>
@@ -33,12 +32,10 @@
#include <linux/vt.h>
#include <linux/fs.h>
#include <linux/file.h>
#include <linux/fd.h>
#include <linux/ppp_defs.h>
#include <linux/if_ppp.h>
#include <linux/if_pppox.h>
#include <linux/mtio.h>
#include <linux/cdrom.h>
#include <linux/auto_fs.h>
#include <linux/auto_fs4.h>
#include <linux/tty.h>
@@ -48,7 +45,6 @@
#include <linux/netdevice.h>
#include <linux/raw.h>
#include <linux/smb_fs.h>
#include <linux/blkpg.h>
#include <linux/blkdev.h>
#include <linux/elevator.h>
#include <linux/rtc.h>
@@ -62,7 +58,6 @@
#include <linux/i2c-dev.h>
#include <linux/wireless.h>
#include <linux/atalk.h>
#include <linux/blktrace_api.h>
#include <linux/loop.h>
#include <net/bluetooth/bluetooth.h>
@@ -324,22 +319,21 @@ struct ifconf32 {
static int dev_ifname32(unsigned int fd, unsigned int cmd, unsigned long arg)
{
struct net_device *dev;
struct ifreq32 ifr32;
struct ifreq __user *uifr;
int err;
if (copy_from_user(&ifr32, compat_ptr(arg), sizeof(ifr32)))
uifr = compat_alloc_user_space(sizeof(struct ifreq));
if (copy_in_user(uifr, compat_ptr(arg), sizeof(struct ifreq32)));
return -EFAULT;
dev = dev_get_by_index(ifr32.ifr_ifindex);
if (!dev)
return -ENODEV;
err = sys_ioctl(fd, SIOCGIFNAME, (unsigned long)uifr);
if (err)
return err;
strlcpy(ifr32.ifr_name, dev->name, sizeof(ifr32.ifr_name));
dev_put(dev);
err = copy_to_user(compat_ptr(arg), &ifr32, sizeof(ifr32));
return (err ? -EFAULT : 0);
if (copy_in_user(compat_ptr(arg), uifr, sizeof(struct ifreq32)))
return -EFAULT;
return 0;
}
static int dev_ifconf(unsigned int fd, unsigned int cmd, unsigned long arg)
@@ -668,53 +662,6 @@ out:
#endif
#ifdef CONFIG_BLOCK
struct hd_geometry32 {
unsigned char heads;
unsigned char sectors;
unsigned short cylinders;
u32 start;
};
static int hdio_getgeo(unsigned int fd, unsigned int cmd, unsigned long arg)
{
mm_segment_t old_fs = get_fs();
struct hd_geometry geo;
struct hd_geometry32 __user *ugeo;
int err;
set_fs (KERNEL_DS);
err = sys_ioctl(fd, HDIO_GETGEO, (unsigned long)&geo);
set_fs (old_fs);
ugeo = compat_ptr(arg);
if (!err) {
err = copy_to_user (ugeo, &geo, 4);
err |= __put_user (geo.start, &ugeo->start);
if (err)
err = -EFAULT;
}
return err;
}
static int hdio_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
{
mm_segment_t old_fs = get_fs();
unsigned long kval;
unsigned int __user *uvp;
int error;
set_fs(KERNEL_DS);
error = sys_ioctl(fd, cmd, (long)&kval);
set_fs(old_fs);
if(error == 0) {
uvp = compat_ptr(arg);
if(put_user(kval, uvp))
error = -EFAULT;
}
return error;
}
typedef struct sg_io_hdr32 {
compat_int_t interface_id; /* [i] 'S' for SCSI generic (required) */
compat_int_t dxfer_direction; /* [i] data transfer direction */
@@ -1089,108 +1036,6 @@ static int mt_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
return err ? -EFAULT: 0;
}
struct cdrom_read_audio32 {
union cdrom_addr addr;
u8 addr_format;
compat_int_t nframes;
compat_caddr_t buf;
};
struct cdrom_generic_command32 {
unsigned char cmd[CDROM_PACKET_SIZE];
compat_caddr_t buffer;
compat_uint_t buflen;
compat_int_t stat;
compat_caddr_t sense;
unsigned char data_direction;
compat_int_t quiet;
compat_int_t timeout;
compat_caddr_t reserved[1];
};
static int cdrom_do_read_audio(unsigned int fd, unsigned int cmd, unsigned long arg)
{
struct cdrom_read_audio __user *cdread_audio;
struct cdrom_read_audio32 __user *cdread_audio32;
__u32 data;
void __user *datap;
cdread_audio = compat_alloc_user_space(sizeof(*cdread_audio));
cdread_audio32 = compat_ptr(arg);
if (copy_in_user(&cdread_audio->addr,
&cdread_audio32->addr,
(sizeof(*cdread_audio32) -
sizeof(compat_caddr_t))))
return -EFAULT;
if (get_user(data, &cdread_audio32->buf))
return -EFAULT;
datap = compat_ptr(data);
if (put_user(datap, &cdread_audio->buf))
return -EFAULT;
return sys_ioctl(fd, cmd, (unsigned long) cdread_audio);
}
static int cdrom_do_generic_command(unsigned int fd, unsigned int cmd, unsigned long arg)
{
struct cdrom_generic_command __user *cgc;
struct cdrom_generic_command32 __user *cgc32;
u32 data;
unsigned char dir;
int itmp;
cgc = compat_alloc_user_space(sizeof(*cgc));
cgc32 = compat_ptr(arg);
if (copy_in_user(&cgc->cmd, &cgc32->cmd, sizeof(cgc->cmd)) ||
get_user(data, &cgc32->buffer) ||
put_user(compat_ptr(data), &cgc->buffer) ||
copy_in_user(&cgc->buflen, &cgc32->buflen,
(sizeof(unsigned int) + sizeof(int))) ||
get_user(data, &cgc32->sense) ||
put_user(compat_ptr(data), &cgc->sense) ||
get_user(dir, &cgc32->data_direction) ||
put_user(dir, &cgc->data_direction) ||
get_user(itmp, &cgc32->quiet) ||
put_user(itmp, &cgc->quiet) ||
get_user(itmp, &cgc32->timeout) ||
put_user(itmp, &cgc->timeout) ||
get_user(data, &cgc32->reserved[0]) ||
put_user(compat_ptr(data), &cgc->reserved[0]))
return -EFAULT;
return sys_ioctl(fd, cmd, (unsigned long) cgc);
}
static int cdrom_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
{
int err;
switch(cmd) {
case CDROMREADAUDIO:
err = cdrom_do_read_audio(fd, cmd, arg);
break;
case CDROM_SEND_PACKET:
err = cdrom_do_generic_command(fd, cmd, arg);
break;
default:
do {
static int count;
if (++count <= 20)
printk("cdrom_ioctl: Unknown cmd fd(%d) "
"cmd(%08x) arg(%08x)\n",
(int)fd, (unsigned int)cmd, (unsigned int)arg);
} while(0);
err = -EINVAL;
break;
};
return err;
}
#endif /* CONFIG_BLOCK */
#ifdef CONFIG_VT
@@ -1536,71 +1381,11 @@ ret_einval(unsigned int fd, unsigned int cmd, unsigned long arg)
return -EINVAL;
}
#ifdef CONFIG_BLOCK
static int broken_blkgetsize(unsigned int fd, unsigned int cmd, unsigned long arg)
{
/* The mkswap binary hard codes it to Intel value :-((( */
return w_long(fd, BLKGETSIZE, arg);
}
struct blkpg_ioctl_arg32 {
compat_int_t op;
compat_int_t flags;
compat_int_t datalen;
compat_caddr_t data;
};
static int blkpg_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
{
struct blkpg_ioctl_arg32 __user *ua32 = compat_ptr(arg);
struct blkpg_ioctl_arg __user *a = compat_alloc_user_space(sizeof(*a));
compat_caddr_t udata;
compat_int_t n;
int err;
err = get_user(n, &ua32->op);
err |= put_user(n, &a->op);
err |= get_user(n, &ua32->flags);
err |= put_user(n, &a->flags);
err |= get_user(n, &ua32->datalen);
err |= put_user(n, &a->datalen);
err |= get_user(udata, &ua32->data);
err |= put_user(compat_ptr(udata), &a->data);
if (err)
return err;
return sys_ioctl(fd, cmd, (unsigned long)a);
}
#endif
static int ioc_settimeout(unsigned int fd, unsigned int cmd, unsigned long arg)
{
return rw_long(fd, AUTOFS_IOC_SETTIMEOUT, arg);
}
#ifdef CONFIG_BLOCK
/* Fix sizeof(sizeof()) breakage */
#define BLKBSZGET_32 _IOR(0x12,112,int)
#define BLKBSZSET_32 _IOW(0x12,113,int)
#define BLKGETSIZE64_32 _IOR(0x12,114,int)
static int do_blkbszget(unsigned int fd, unsigned int cmd, unsigned long arg)
{
return sys_ioctl(fd, BLKBSZGET, (unsigned long)compat_ptr(arg));
}
static int do_blkbszset(unsigned int fd, unsigned int cmd, unsigned long arg)
{
return sys_ioctl(fd, BLKBSZSET, (unsigned long)compat_ptr(arg));
}
static int do_blkgetsize64(unsigned int fd, unsigned int cmd,
unsigned long arg)
{
return sys_ioctl(fd, BLKGETSIZE64, (unsigned long)compat_ptr(arg));
}
#endif
/* Bluetooth ioctls */
#define HCIUARTSETPROTO _IOW('U', 200, int)
#define HCIUARTGETPROTO _IOR('U', 201, int)
@@ -1620,333 +1405,6 @@ static int do_blkgetsize64(unsigned int fd, unsigned int cmd,
#define HIDPGETCONNLIST _IOR('H', 210, int)
#define HIDPGETCONNINFO _IOR('H', 211, int)
#ifdef CONFIG_BLOCK
struct floppy_struct32 {
compat_uint_t size;
compat_uint_t sect;
compat_uint_t head;
compat_uint_t track;
compat_uint_t stretch;
unsigned char gap;
unsigned char rate;
unsigned char spec1;
unsigned char fmt_gap;
const compat_caddr_t name;
};
struct floppy_drive_params32 {
char cmos;
compat_ulong_t max_dtr;
compat_ulong_t hlt;
compat_ulong_t hut;
compat_ulong_t srt;
compat_ulong_t spinup;
compat_ulong_t spindown;
unsigned char spindown_offset;
unsigned char select_delay;
unsigned char rps;
unsigned char tracks;
compat_ulong_t timeout;
unsigned char interleave_sect;
struct floppy_max_errors max_errors;
char flags;
char read_track;
short autodetect[8];
compat_int_t checkfreq;
compat_int_t native_format;
};
struct floppy_drive_struct32 {
signed char flags;
compat_ulong_t spinup_date;
compat_ulong_t select_date;
compat_ulong_t first_read_date;
short probed_format;
short track;
short maxblock;
short maxtrack;
compat_int_t generation;
compat_int_t keep_data;
compat_int_t fd_ref;
compat_int_t fd_device;
compat_int_t last_checked;
compat_caddr_t dmabuf;
compat_int_t bufblocks;
};
struct floppy_fdc_state32 {
compat_int_t spec1;
compat_int_t spec2;
compat_int_t dtr;
unsigned char version;
unsigned char dor;
compat_ulong_t address;
unsigned int rawcmd:2;
unsigned int reset:1;
unsigned int need_configure:1;
unsigned int perp_mode:2;
unsigned int has_fifo:1;
unsigned int driver_version;
unsigned char track[4];
};
struct floppy_write_errors32 {
unsigned int write_errors;
compat_ulong_t first_error_sector;
compat_int_t first_error_generation;
compat_ulong_t last_error_sector;
compat_int_t last_error_generation;
compat_uint_t badness;
};
#define FDSETPRM32 _IOW(2, 0x42, struct floppy_struct32)
#define FDDEFPRM32 _IOW(2, 0x43, struct floppy_struct32)
#define FDGETPRM32 _IOR(2, 0x04, struct floppy_struct32)
#define FDSETDRVPRM32 _IOW(2, 0x90, struct floppy_drive_params32)
#define FDGETDRVPRM32 _IOR(2, 0x11, struct floppy_drive_params32)
#define FDGETDRVSTAT32 _IOR(2, 0x12, struct floppy_drive_struct32)
#define FDPOLLDRVSTAT32 _IOR(2, 0x13, struct floppy_drive_struct32)
#define FDGETFDCSTAT32 _IOR(2, 0x15, struct floppy_fdc_state32)
#define FDWERRORGET32 _IOR(2, 0x17, struct floppy_write_errors32)
static struct {
unsigned int cmd32;
unsigned int cmd;
} fd_ioctl_trans_table[] = {
{ FDSETPRM32, FDSETPRM },
{ FDDEFPRM32, FDDEFPRM },
{ FDGETPRM32, FDGETPRM },
{ FDSETDRVPRM32, FDSETDRVPRM },
{ FDGETDRVPRM32, FDGETDRVPRM },
{ FDGETDRVSTAT32, FDGETDRVSTAT },
{ FDPOLLDRVSTAT32, FDPOLLDRVSTAT },
{ FDGETFDCSTAT32, FDGETFDCSTAT },
{ FDWERRORGET32, FDWERRORGET }
};
#define NR_FD_IOCTL_TRANS ARRAY_SIZE(fd_ioctl_trans_table)
static int fd_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
{
mm_segment_t old_fs = get_fs();
void *karg = NULL;
unsigned int kcmd = 0;
int i, err;
for (i = 0; i < NR_FD_IOCTL_TRANS; i++)
if (cmd == fd_ioctl_trans_table[i].cmd32) {
kcmd = fd_ioctl_trans_table[i].cmd;
break;
}
if (!kcmd)
return -EINVAL;
switch (cmd) {
case FDSETPRM32:
case FDDEFPRM32:
case FDGETPRM32:
{
compat_uptr_t name;
struct floppy_struct32 __user *uf;
struct floppy_struct *f;
uf = compat_ptr(arg);
f = karg = kmalloc(sizeof(struct floppy_struct), GFP_KERNEL);
if (!karg)
return -ENOMEM;
if (cmd == FDGETPRM32)
break;
err = __get_user(f->size, &uf->size);
err |= __get_user(f->sect, &uf->sect);
err |= __get_user(f->head, &uf->head);
err |= __get_user(f->track, &uf->track);
err |= __get_user(f->stretch, &uf->stretch);
err |= __get_user(f->gap, &uf->gap);
err |= __get_user(f->rate, &uf->rate);
err |= __get_user(f->spec1, &uf->spec1);
err |= __get_user(f->fmt_gap, &uf->fmt_gap);
err |= __get_user(name, &uf->name);
f->name = compat_ptr(name);
if (err) {
err = -EFAULT;
goto out;
}
break;
}
case FDSETDRVPRM32:
case FDGETDRVPRM32:
{
struct floppy_drive_params32 __user *uf;
struct floppy_drive_params *f;
uf = compat_ptr(arg);
f = karg = kmalloc(sizeof(struct floppy_drive_params), GFP_KERNEL);
if (!karg)
return -ENOMEM;
if (cmd == FDGETDRVPRM32)
break;
err = __get_user(f->cmos, &uf->cmos);
err |= __get_user(f->max_dtr, &uf->max_dtr);
err |= __get_user(f->hlt, &uf->hlt);
err |= __get_user(f->hut, &uf->hut);
err |= __get_user(f->srt, &uf->srt);
err |= __get_user(f->spinup, &uf->spinup);
err |= __get_user(f->spindown, &uf->spindown);
err |= __get_user(f->spindown_offset, &uf->spindown_offset);
err |= __get_user(f->select_delay, &uf->select_delay);
err |= __get_user(f->rps, &uf->rps);
err |= __get_user(f->tracks, &uf->tracks);
err |= __get_user(f->timeout, &uf->timeout);
err |= __get_user(f->interleave_sect, &uf->interleave_sect);
err |= __copy_from_user(&f->max_errors, &uf->max_errors, sizeof(f->max_errors));
err |= __get_user(f->flags, &uf->flags);
err |= __get_user(f->read_track, &uf->read_track);
err |= __copy_from_user(f->autodetect, uf->autodetect, sizeof(f->autodetect));
err |= __get_user(f->checkfreq, &uf->checkfreq);
err |= __get_user(f->native_format, &uf->native_format);
if (err) {
err = -EFAULT;
goto out;
}
break;
}
case FDGETDRVSTAT32:
case FDPOLLDRVSTAT32:
karg = kmalloc(sizeof(struct floppy_drive_struct), GFP_KERNEL);
if (!karg)
return -ENOMEM;
break;
case FDGETFDCSTAT32:
karg = kmalloc(sizeof(struct floppy_fdc_state), GFP_KERNEL);
if (!karg)
return -ENOMEM;
break;
case FDWERRORGET32:
karg = kmalloc(sizeof(struct floppy_write_errors), GFP_KERNEL);
if (!karg)
return -ENOMEM;
break;
default:
return -EINVAL;
}
set_fs (KERNEL_DS);
err = sys_ioctl (fd, kcmd, (unsigned long)karg);
set_fs (old_fs);
if (err)
goto out;
switch (cmd) {
case FDGETPRM32:
{
struct floppy_struct *f = karg;
struct floppy_struct32 __user *uf = compat_ptr(arg);
err = __put_user(f->size, &uf->size);
err |= __put_user(f->sect, &uf->sect);
err |= __put_user(f->head, &uf->head);
err |= __put_user(f->track, &uf->track);
err |= __put_user(f->stretch, &uf->stretch);
err |= __put_user(f->gap, &uf->gap);
err |= __put_user(f->rate, &uf->rate);
err |= __put_user(f->spec1, &uf->spec1);
err |= __put_user(f->fmt_gap, &uf->fmt_gap);
err |= __put_user((u64)f->name, (compat_caddr_t __user *)&uf->name);
break;
}
case FDGETDRVPRM32:
{
struct floppy_drive_params32 __user *uf;
struct floppy_drive_params *f = karg;
uf = compat_ptr(arg);
err = __put_user(f->cmos, &uf->cmos);
err |= __put_user(f->max_dtr, &uf->max_dtr);
err |= __put_user(f->hlt, &uf->hlt);
err |= __put_user(f->hut, &uf->hut);
err |= __put_user(f->srt, &uf->srt);
err |= __put_user(f->spinup, &uf->spinup);
err |= __put_user(f->spindown, &uf->spindown);
err |= __put_user(f->spindown_offset, &uf->spindown_offset);
err |= __put_user(f->select_delay, &uf->select_delay);
err |= __put_user(f->rps, &uf->rps);
err |= __put_user(f->tracks, &uf->tracks);
err |= __put_user(f->timeout, &uf->timeout);
err |= __put_user(f->interleave_sect, &uf->interleave_sect);
err |= __copy_to_user(&uf->max_errors, &f->max_errors, sizeof(f->max_errors));
err |= __put_user(f->flags, &uf->flags);
err |= __put_user(f->read_track, &uf->read_track);
err |= __copy_to_user(uf->autodetect, f->autodetect, sizeof(f->autodetect));
err |= __put_user(f->checkfreq, &uf->checkfreq);
err |= __put_user(f->native_format, &uf->native_format);
break;
}
case FDGETDRVSTAT32:
case FDPOLLDRVSTAT32:
{
struct floppy_drive_struct32 __user *uf;
struct floppy_drive_struct *f = karg;
uf = compat_ptr(arg);
err = __put_user(f->flags, &uf->flags);
err |= __put_user(f->spinup_date, &uf->spinup_date);
err |= __put_user(f->select_date, &uf->select_date);
err |= __put_user(f->first_read_date, &uf->first_read_date);
err |= __put_user(f->probed_format, &uf->probed_format);
err |= __put_user(f->track, &uf->track);
err |= __put_user(f->maxblock, &uf->maxblock);
err |= __put_user(f->maxtrack, &uf->maxtrack);
err |= __put_user(f->generation, &uf->generation);
err |= __put_user(f->keep_data, &uf->keep_data);
err |= __put_user(f->fd_ref, &uf->fd_ref);
err |= __put_user(f->fd_device, &uf->fd_device);
err |= __put_user(f->last_checked, &uf->last_checked);
err |= __put_user((u64)f->dmabuf, &uf->dmabuf);
err |= __put_user((u64)f->bufblocks, &uf->bufblocks);
break;
}
case FDGETFDCSTAT32:
{
struct floppy_fdc_state32 __user *uf;
struct floppy_fdc_state *f = karg;
uf = compat_ptr(arg);
err = __put_user(f->spec1, &uf->spec1);
err |= __put_user(f->spec2, &uf->spec2);
err |= __put_user(f->dtr, &uf->dtr);
err |= __put_user(f->version, &uf->version);
err |= __put_user(f->dor, &uf->dor);
err |= __put_user(f->address, &uf->address);
err |= __copy_to_user((char __user *)&uf->address + sizeof(uf->address),
(char *)&f->address + sizeof(f->address), sizeof(int));
err |= __put_user(f->driver_version, &uf->driver_version);
err |= __copy_to_user(uf->track, f->track, sizeof(f->track));
break;
}
case FDWERRORGET32:
{
struct floppy_write_errors32 __user *uf;
struct floppy_write_errors *f = karg;
uf = compat_ptr(arg);
err = __put_user(f->write_errors, &uf->write_errors);
err |= __put_user(f->first_error_sector, &uf->first_error_sector);
err |= __put_user(f->first_error_generation, &uf->first_error_generation);
err |= __put_user(f->last_error_sector, &uf->last_error_sector);
err |= __put_user(f->last_error_generation, &uf->last_error_generation);
err |= __put_user(f->badness, &uf->badness);
break;
}
default:
break;
}
if (err)
err = -EFAULT;
out:
kfree(karg);
return err;
}
#endif
struct mtd_oob_buf32 {
u_int32_t start;
u_int32_t length;
@@ -2506,60 +1964,6 @@ COMPATIBLE_IOCTL(FIONREAD) /* This is also TIOCINQ */
/* 0x00 */
COMPATIBLE_IOCTL(FIBMAP)
COMPATIBLE_IOCTL(FIGETBSZ)
/* 0x03 -- HD/IDE ioctl's used by hdparm and friends.
* Some need translations, these do not.
*/
COMPATIBLE_IOCTL(HDIO_GET_IDENTITY)
COMPATIBLE_IOCTL(HDIO_DRIVE_TASK)
COMPATIBLE_IOCTL(HDIO_DRIVE_CMD)
ULONG_IOCTL(HDIO_SET_MULTCOUNT)
ULONG_IOCTL(HDIO_SET_UNMASKINTR)
ULONG_IOCTL(HDIO_SET_KEEPSETTINGS)
ULONG_IOCTL(HDIO_SET_32BIT)
ULONG_IOCTL(HDIO_SET_NOWERR)
ULONG_IOCTL(HDIO_SET_DMA)
ULONG_IOCTL(HDIO_SET_PIO_MODE)
ULONG_IOCTL(HDIO_SET_NICE)
ULONG_IOCTL(HDIO_SET_WCACHE)
ULONG_IOCTL(HDIO_SET_ACOUSTIC)
ULONG_IOCTL(HDIO_SET_BUSSTATE)
ULONG_IOCTL(HDIO_SET_ADDRESS)
COMPATIBLE_IOCTL(HDIO_SCAN_HWIF)
/* 0x330 is reserved -- it used to be HDIO_GETGEO_BIG */
COMPATIBLE_IOCTL(0x330)
/* 0x02 -- Floppy ioctls */
COMPATIBLE_IOCTL(FDMSGON)
COMPATIBLE_IOCTL(FDMSGOFF)
COMPATIBLE_IOCTL(FDSETEMSGTRESH)
COMPATIBLE_IOCTL(FDFLUSH)
COMPATIBLE_IOCTL(FDWERRORCLR)
COMPATIBLE_IOCTL(FDSETMAXERRS)
COMPATIBLE_IOCTL(FDGETMAXERRS)
COMPATIBLE_IOCTL(FDGETDRVTYP)
COMPATIBLE_IOCTL(FDEJECT)
COMPATIBLE_IOCTL(FDCLRPRM)
COMPATIBLE_IOCTL(FDFMTBEG)
COMPATIBLE_IOCTL(FDFMTEND)
COMPATIBLE_IOCTL(FDRESET)
COMPATIBLE_IOCTL(FDTWADDLE)
COMPATIBLE_IOCTL(FDFMTTRK)
COMPATIBLE_IOCTL(FDRAWCMD)
/* 0x12 */
#ifdef CONFIG_BLOCK
COMPATIBLE_IOCTL(BLKRASET)
COMPATIBLE_IOCTL(BLKROSET)
COMPATIBLE_IOCTL(BLKROGET)
COMPATIBLE_IOCTL(BLKRRPART)
COMPATIBLE_IOCTL(BLKFLSBUF)
COMPATIBLE_IOCTL(BLKSECTSET)
COMPATIBLE_IOCTL(BLKSSZGET)
COMPATIBLE_IOCTL(BLKTRACESTART)
COMPATIBLE_IOCTL(BLKTRACESTOP)
COMPATIBLE_IOCTL(BLKTRACESETUP)
COMPATIBLE_IOCTL(BLKTRACETEARDOWN)
ULONG_IOCTL(BLKRASET)
ULONG_IOCTL(BLKFRASET)
#endif
/* RAID */
COMPATIBLE_IOCTL(RAID_VERSION)
COMPATIBLE_IOCTL(GET_ARRAY_INFO)
@@ -2807,50 +2211,6 @@ COMPATIBLE_IOCTL(PPGETMODE)
COMPATIBLE_IOCTL(PPGETPHASE)
COMPATIBLE_IOCTL(PPGETFLAGS)
COMPATIBLE_IOCTL(PPSETFLAGS)
/* CDROM stuff */
COMPATIBLE_IOCTL(CDROMPAUSE)
COMPATIBLE_IOCTL(CDROMRESUME)
COMPATIBLE_IOCTL(CDROMPLAYMSF)
COMPATIBLE_IOCTL(CDROMPLAYTRKIND)
COMPATIBLE_IOCTL(CDROMREADTOCHDR)
COMPATIBLE_IOCTL(CDROMREADTOCENTRY)
COMPATIBLE_IOCTL(CDROMSTOP)
COMPATIBLE_IOCTL(CDROMSTART)
COMPATIBLE_IOCTL(CDROMEJECT)
COMPATIBLE_IOCTL(CDROMVOLCTRL)
COMPATIBLE_IOCTL(CDROMSUBCHNL)
ULONG_IOCTL(CDROMEJECT_SW)
COMPATIBLE_IOCTL(CDROMMULTISESSION)
COMPATIBLE_IOCTL(CDROM_GET_MCN)
COMPATIBLE_IOCTL(CDROMRESET)
COMPATIBLE_IOCTL(CDROMVOLREAD)
COMPATIBLE_IOCTL(CDROMSEEK)
COMPATIBLE_IOCTL(CDROMPLAYBLK)
COMPATIBLE_IOCTL(CDROMCLOSETRAY)
ULONG_IOCTL(CDROM_SET_OPTIONS)
ULONG_IOCTL(CDROM_CLEAR_OPTIONS)
ULONG_IOCTL(CDROM_SELECT_SPEED)
ULONG_IOCTL(CDROM_SELECT_DISC)
ULONG_IOCTL(CDROM_MEDIA_CHANGED)
ULONG_IOCTL(CDROM_DRIVE_STATUS)
COMPATIBLE_IOCTL(CDROM_DISC_STATUS)
COMPATIBLE_IOCTL(CDROM_CHANGER_NSLOTS)
ULONG_IOCTL(CDROM_LOCKDOOR)
ULONG_IOCTL(CDROM_DEBUG)
COMPATIBLE_IOCTL(CDROM_GET_CAPABILITY)
/* Ignore cdrom.h about these next 5 ioctls, they absolutely do
* not take a struct cdrom_read, instead they take a struct cdrom_msf
* which is compatible.
*/
COMPATIBLE_IOCTL(CDROMREADMODE2)
COMPATIBLE_IOCTL(CDROMREADMODE1)
COMPATIBLE_IOCTL(CDROMREADRAW)
COMPATIBLE_IOCTL(CDROMREADCOOKED)
COMPATIBLE_IOCTL(CDROMREADALL)
/* DVD ioctls */
COMPATIBLE_IOCTL(DVD_READ_STRUCT)
COMPATIBLE_IOCTL(DVD_WRITE_STRUCT)
COMPATIBLE_IOCTL(DVD_AUTH)
/* pktcdvd */
COMPATIBLE_IOCTL(PACKET_CTRL_CMD)
/* Big A */
@@ -3336,33 +2696,6 @@ HANDLE_IOCTL(SIOCGSTAMP, do_siocgstamp)
HANDLE_IOCTL(SIOCGSTAMPNS, do_siocgstampns)
#endif
#ifdef CONFIG_BLOCK
HANDLE_IOCTL(HDIO_GETGEO, hdio_getgeo)
HANDLE_IOCTL(BLKRAGET, w_long)
HANDLE_IOCTL(BLKGETSIZE, w_long)
HANDLE_IOCTL(0x1260, broken_blkgetsize)
HANDLE_IOCTL(BLKFRAGET, w_long)
HANDLE_IOCTL(BLKSECTGET, w_long)
HANDLE_IOCTL(BLKPG, blkpg_ioctl_trans)
HANDLE_IOCTL(HDIO_GET_UNMASKINTR, hdio_ioctl_trans)
HANDLE_IOCTL(HDIO_GET_MULTCOUNT, hdio_ioctl_trans)
HANDLE_IOCTL(HDIO_GET_KEEPSETTINGS, hdio_ioctl_trans)
HANDLE_IOCTL(HDIO_GET_32BIT, hdio_ioctl_trans)
HANDLE_IOCTL(HDIO_GET_NOWERR, hdio_ioctl_trans)
HANDLE_IOCTL(HDIO_GET_DMA, hdio_ioctl_trans)
HANDLE_IOCTL(HDIO_GET_NICE, hdio_ioctl_trans)
HANDLE_IOCTL(HDIO_GET_WCACHE, hdio_ioctl_trans)
HANDLE_IOCTL(HDIO_GET_ACOUSTIC, hdio_ioctl_trans)
HANDLE_IOCTL(HDIO_GET_ADDRESS, hdio_ioctl_trans)
HANDLE_IOCTL(HDIO_GET_BUSSTATE, hdio_ioctl_trans)
HANDLE_IOCTL(FDSETPRM32, fd_ioctl_trans)
HANDLE_IOCTL(FDDEFPRM32, fd_ioctl_trans)
HANDLE_IOCTL(FDGETPRM32, fd_ioctl_trans)
HANDLE_IOCTL(FDSETDRVPRM32, fd_ioctl_trans)
HANDLE_IOCTL(FDGETDRVPRM32, fd_ioctl_trans)
HANDLE_IOCTL(FDGETDRVSTAT32, fd_ioctl_trans)
HANDLE_IOCTL(FDPOLLDRVSTAT32, fd_ioctl_trans)
HANDLE_IOCTL(FDGETFDCSTAT32, fd_ioctl_trans)
HANDLE_IOCTL(FDWERRORGET32, fd_ioctl_trans)
HANDLE_IOCTL(SG_IO,sg_ioctl_trans)
HANDLE_IOCTL(SG_GET_REQUEST_TABLE, sg_grt_trans)
#endif
@@ -3373,8 +2706,6 @@ HANDLE_IOCTL(PPPIOCSACTIVE32, ppp_sock_fprog_ioctl_trans)
#ifdef CONFIG_BLOCK
HANDLE_IOCTL(MTIOCGET32, mt_ioctl_trans)
HANDLE_IOCTL(MTIOCPOS32, mt_ioctl_trans)
HANDLE_IOCTL(CDROMREADAUDIO, cdrom_ioctl_trans)
HANDLE_IOCTL(CDROM_SEND_PACKET, cdrom_ioctl_trans)
#endif
#define AUTOFS_IOC_SETTIMEOUT32 _IOWR(0x93,0x64,unsigned int)
HANDLE_IOCTL(AUTOFS_IOC_SETTIMEOUT32, ioc_settimeout)
@@ -3415,9 +2746,6 @@ HANDLE_IOCTL(SONET_GETFRAMING, do_atm_ioctl)
HANDLE_IOCTL(SONET_GETFRSENSE, do_atm_ioctl)
/* block stuff */
#ifdef CONFIG_BLOCK
HANDLE_IOCTL(BLKBSZGET_32, do_blkbszget)
HANDLE_IOCTL(BLKBSZSET_32, do_blkbszset)
HANDLE_IOCTL(BLKGETSIZE64_32, do_blkgetsize64)
/* Raw devices */
HANDLE_IOCTL(RAW_SETBIND, raw_ioctl)
HANDLE_IOCTL(RAW_GETBIND, raw_ioctl)

View File

@@ -221,6 +221,42 @@ struct dentry *debugfs_create_u64(const char *name, mode_t mode,
}
EXPORT_SYMBOL_GPL(debugfs_create_u64);
DEFINE_SIMPLE_ATTRIBUTE(fops_x8, debugfs_u8_get, debugfs_u8_set, "0x%02llx\n");
DEFINE_SIMPLE_ATTRIBUTE(fops_x16, debugfs_u16_get, debugfs_u16_set, "0x%04llx\n");
DEFINE_SIMPLE_ATTRIBUTE(fops_x32, debugfs_u32_get, debugfs_u32_set, "0x%08llx\n");
/**
* debugfs_create_x8 - create a debugfs file that is used to read and write an unsigned 8-bit value
* debugfs_create_x16 - create a debugfs file that is used to read and write an unsigned 16-bit value
* debugfs_create_x32 - create a debugfs file that is used to read and write an unsigned 32-bit value
*
* These functions are exactly the same as the above functions, (but use a hex
* output for the decimal challenged) for details look at the above unsigned
* decimal functions.
*/
struct dentry *debugfs_create_x8(const char *name, mode_t mode,
struct dentry *parent, u8 *value)
{
return debugfs_create_file(name, mode, parent, value, &fops_x8);
}
EXPORT_SYMBOL_GPL(debugfs_create_x8);
struct dentry *debugfs_create_x16(const char *name, mode_t mode,
struct dentry *parent, u16 *value)
{
return debugfs_create_file(name, mode, parent, value, &fops_x16);
}
EXPORT_SYMBOL_GPL(debugfs_create_x16);
struct dentry *debugfs_create_x32(const char *name, mode_t mode,
struct dentry *parent, u32 *value)
{
return debugfs_create_file(name, mode, parent, value, &fops_x32);
}
EXPORT_SYMBOL_GPL(debugfs_create_x32);
static ssize_t read_file_bool(struct file *file, char __user *user_buf,
size_t count, loff_t *ppos)
{

View File

@@ -264,15 +264,12 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio);
/*
* Asynchronous IO callback.
*/
static int dio_bio_end_aio(struct bio *bio, unsigned int bytes_done, int error)
static void dio_bio_end_aio(struct bio *bio, int error)
{
struct dio *dio = bio->bi_private;
unsigned long remaining;
unsigned long flags;
if (bio->bi_size)
return 1;
/* cleanup the bio */
dio_bio_complete(dio, bio);
@@ -287,8 +284,6 @@ static int dio_bio_end_aio(struct bio *bio, unsigned int bytes_done, int error)
aio_complete(dio->iocb, ret, 0);
kfree(dio);
}
return 0;
}
/*
@@ -298,21 +293,17 @@ static int dio_bio_end_aio(struct bio *bio, unsigned int bytes_done, int error)
* During I/O bi_private points at the dio. After I/O, bi_private is used to
* implement a singly-linked list of completed BIOs, at dio->bio_list.
*/
static int dio_bio_end_io(struct bio *bio, unsigned int bytes_done, int error)
static void dio_bio_end_io(struct bio *bio, int error)
{
struct dio *dio = bio->bi_private;
unsigned long flags;
if (bio->bi_size)
return 1;
spin_lock_irqsave(&dio->bio_lock, flags);
bio->bi_private = dio->bio_list;
dio->bio_list = bio;
if (--dio->refcount == 1 && dio->waiter)
wake_up_process(dio->waiter);
spin_unlock_irqrestore(&dio->bio_lock, flags);
return 0;
}
static int

View File

@@ -491,6 +491,7 @@ struct dlm_ls {
uint64_t ls_recover_seq;
struct dlm_recover *ls_recover_args;
struct rw_semaphore ls_in_recovery; /* block local requests */
struct rw_semaphore ls_recv_active; /* block dlm_recv */
struct list_head ls_requestqueue;/* queue remote requests */
struct mutex ls_requestqueue_mutex;
char *ls_recover_buf;

View File

@@ -3638,55 +3638,8 @@ static void receive_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms)
dlm_put_lkb(lkb);
}
int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery)
static void _receive_message(struct dlm_ls *ls, struct dlm_message *ms)
{
struct dlm_message *ms = (struct dlm_message *) hd;
struct dlm_ls *ls;
int error = 0;
if (!recovery)
dlm_message_in(ms);
ls = dlm_find_lockspace_global(hd->h_lockspace);
if (!ls) {
log_print("drop message %d from %d for unknown lockspace %d",
ms->m_type, nodeid, hd->h_lockspace);
return -EINVAL;
}
/* recovery may have just ended leaving a bunch of backed-up requests
in the requestqueue; wait while dlm_recoverd clears them */
if (!recovery)
dlm_wait_requestqueue(ls);
/* recovery may have just started while there were a bunch of
in-flight requests -- save them in requestqueue to be processed
after recovery. we can't let dlm_recvd block on the recovery
lock. if dlm_recoverd is calling this function to clear the
requestqueue, it needs to be interrupted (-EINTR) if another
recovery operation is starting. */
while (1) {
if (dlm_locking_stopped(ls)) {
if (recovery) {
error = -EINTR;
goto out;
}
error = dlm_add_requestqueue(ls, nodeid, hd);
if (error == -EAGAIN)
continue;
else {
error = -EINTR;
goto out;
}
}
if (dlm_lock_recovery_try(ls))
break;
schedule();
}
switch (ms->m_type) {
/* messages sent to a master node */
@@ -3761,17 +3714,90 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery)
log_error(ls, "unknown message type %d", ms->m_type);
}
dlm_unlock_recovery(ls);
out:
dlm_put_lockspace(ls);
dlm_astd_wake();
return error;
}
/* If the lockspace is in recovery mode (locking stopped), then normal
messages are saved on the requestqueue for processing after recovery is
done. When not in recovery mode, we wait for dlm_recoverd to drain saved
messages off the requestqueue before we process new ones. This occurs right
after recovery completes when we transition from saving all messages on
requestqueue, to processing all the saved messages, to processing new
messages as they arrive. */
/*
* Recovery related
*/
static void dlm_receive_message(struct dlm_ls *ls, struct dlm_message *ms,
int nodeid)
{
if (dlm_locking_stopped(ls)) {
dlm_add_requestqueue(ls, nodeid, (struct dlm_header *) ms);
} else {
dlm_wait_requestqueue(ls);
_receive_message(ls, ms);
}
}
/* This is called by dlm_recoverd to process messages that were saved on
the requestqueue. */
void dlm_receive_message_saved(struct dlm_ls *ls, struct dlm_message *ms)
{
_receive_message(ls, ms);
}
/* This is called by the midcomms layer when something is received for
the lockspace. It could be either a MSG (normal message sent as part of
standard locking activity) or an RCOM (recovery message sent as part of
lockspace recovery). */
void dlm_receive_buffer(struct dlm_header *hd, int nodeid)
{
struct dlm_message *ms = (struct dlm_message *) hd;
struct dlm_rcom *rc = (struct dlm_rcom *) hd;
struct dlm_ls *ls;
int type = 0;
switch (hd->h_cmd) {
case DLM_MSG:
dlm_message_in(ms);
type = ms->m_type;
break;
case DLM_RCOM:
dlm_rcom_in(rc);
type = rc->rc_type;
break;
default:
log_print("invalid h_cmd %d from %u", hd->h_cmd, nodeid);
return;
}
if (hd->h_nodeid != nodeid) {
log_print("invalid h_nodeid %d from %d lockspace %x",
hd->h_nodeid, nodeid, hd->h_lockspace);
return;
}
ls = dlm_find_lockspace_global(hd->h_lockspace);
if (!ls) {
log_print("invalid h_lockspace %x from %d cmd %d type %d",
hd->h_lockspace, nodeid, hd->h_cmd, type);
if (hd->h_cmd == DLM_RCOM && type == DLM_RCOM_STATUS)
dlm_send_ls_not_ready(nodeid, rc);
return;
}
/* this rwsem allows dlm_ls_stop() to wait for all dlm_recv threads to
be inactive (in this ls) before transitioning to recovery mode */
down_read(&ls->ls_recv_active);
if (hd->h_cmd == DLM_MSG)
dlm_receive_message(ls, ms, nodeid);
else
dlm_receive_rcom(ls, rc, nodeid);
up_read(&ls->ls_recv_active);
dlm_put_lockspace(ls);
}
static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb)
{
@@ -4429,7 +4455,8 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
if (lvb_in && ua->lksb.sb_lvbptr)
memcpy(ua->lksb.sb_lvbptr, lvb_in, DLM_USER_LVB_LEN);
ua->castparam = ua_tmp->castparam;
if (ua_tmp->castparam)
ua->castparam = ua_tmp->castparam;
ua->user_lksb = ua_tmp->user_lksb;
error = set_unlock_args(flags, ua, &args);
@@ -4474,7 +4501,8 @@ int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
goto out;
ua = (struct dlm_user_args *)lkb->lkb_astparam;
ua->castparam = ua_tmp->castparam;
if (ua_tmp->castparam)
ua->castparam = ua_tmp->castparam;
ua->user_lksb = ua_tmp->user_lksb;
error = set_unlock_args(flags, ua, &args);

View File

@@ -16,7 +16,8 @@
void dlm_print_rsb(struct dlm_rsb *r);
void dlm_dump_rsb(struct dlm_rsb *r);
void dlm_print_lkb(struct dlm_lkb *lkb);
int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery);
void dlm_receive_message_saved(struct dlm_ls *ls, struct dlm_message *ms);
void dlm_receive_buffer(struct dlm_header *hd, int nodeid);
int dlm_modes_compat(int mode1, int mode2);
int dlm_find_rsb(struct dlm_ls *ls, char *name, int namelen,
unsigned int flags, struct dlm_rsb **r_ret);

View File

@@ -167,7 +167,6 @@ static struct kobj_type dlm_ktype = {
};
static struct kset dlm_kset = {
.kobj = {.name = "dlm",},
.ktype = &dlm_ktype,
};
@@ -228,6 +227,7 @@ int dlm_lockspace_init(void)
INIT_LIST_HEAD(&lslist);
spin_lock_init(&lslist_lock);
kobject_set_name(&dlm_kset.kobj, "dlm");
kobj_set_kset_s(&dlm_kset, kernel_subsys);
error = kset_register(&dlm_kset);
if (error)
@@ -519,6 +519,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
ls->ls_recover_seq = 0;
ls->ls_recover_args = NULL;
init_rwsem(&ls->ls_in_recovery);
init_rwsem(&ls->ls_recv_active);
INIT_LIST_HEAD(&ls->ls_requestqueue);
mutex_init(&ls->ls_requestqueue_mutex);
mutex_init(&ls->ls_clear_proc_locks);

View File

@@ -334,18 +334,8 @@ static void close_connection(struct connection *con, bool and_other)
con->rx_page = NULL;
}
/* If we are an 'othercon' then NULL the pointer to us
from the parent and tidy ourself up */
if (test_bit(CF_IS_OTHERCON, &con->flags)) {
struct connection *parent = __nodeid2con(con->nodeid, 0);
parent->othercon = NULL;
kmem_cache_free(con_cache, con);
}
else {
/* Parent connections get reused */
con->retries = 0;
mutex_unlock(&con->sock_mutex);
}
con->retries = 0;
mutex_unlock(&con->sock_mutex);
}
/* We only send shutdown messages to nodes that are not part of the cluster */
@@ -731,6 +721,8 @@ static int tcp_accept_from_sock(struct connection *con)
INIT_WORK(&othercon->swork, process_send_sockets);
INIT_WORK(&othercon->rwork, process_recv_sockets);
set_bit(CF_IS_OTHERCON, &othercon->flags);
}
if (!othercon->sock) {
newcon->othercon = othercon;
othercon->sock = newsock;
newsock->sk->sk_user_data = othercon;
@@ -1272,14 +1264,15 @@ static void send_to_sock(struct connection *con)
if (len) {
ret = sendpage(con->sock, e->page, offset, len,
msg_flags);
if (ret == -EAGAIN || ret == 0)
if (ret == -EAGAIN || ret == 0) {
cond_resched();
goto out;
}
if (ret <= 0)
goto send_error;
} else {
}
/* Don't starve people filling buffers */
cond_resched();
}
spin_lock(&con->writequeue_lock);
e->offset += ret;

View File

@@ -18,10 +18,6 @@
#include "rcom.h"
#include "config.h"
/*
* Following called by dlm_recoverd thread
*/
static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new)
{
struct dlm_member *memb = NULL;
@@ -250,18 +246,30 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
return error;
}
/*
* Following called from lockspace.c
*/
/* Userspace guarantees that dlm_ls_stop() has completed on all nodes before
dlm_ls_start() is called on any of them to start the new recovery. */
int dlm_ls_stop(struct dlm_ls *ls)
{
int new;
/*
* A stop cancels any recovery that's in progress (see RECOVERY_STOP,
* dlm_recovery_stopped()) and prevents any new locks from being
* processed (see RUNNING, dlm_locking_stopped()).
* Prevent dlm_recv from being in the middle of something when we do
* the stop. This includes ensuring dlm_recv isn't processing a
* recovery message (rcom), while dlm_recoverd is aborting and
* resetting things from an in-progress recovery. i.e. we want
* dlm_recoverd to abort its recovery without worrying about dlm_recv
* processing an rcom at the same time. Stopping dlm_recv also makes
* it easy for dlm_receive_message() to check locking stopped and add a
* message to the requestqueue without races.
*/
down_write(&ls->ls_recv_active);
/*
* Abort any recovery that's in progress (see RECOVERY_STOP,
* dlm_recovery_stopped()) and tell any other threads running in the
* dlm to quit any processing (see RUNNING, dlm_locking_stopped()).
*/
spin_lock(&ls->ls_recover_lock);
@@ -270,9 +278,15 @@ int dlm_ls_stop(struct dlm_ls *ls)
ls->ls_recover_seq++;
spin_unlock(&ls->ls_recover_lock);
/*
* Let dlm_recv run again, now any normal messages will be saved on the
* requestqueue for later.
*/
up_write(&ls->ls_recv_active);
/*
* This in_recovery lock does two things:
*
* 1) Keeps this function from returning until all threads are out
* of locking routines and locking is truely stopped.
* 2) Keeps any new requests from being processed until it's unlocked
@@ -284,9 +298,8 @@ int dlm_ls_stop(struct dlm_ls *ls)
/*
* The recoverd suspend/resume makes sure that dlm_recoverd (if
* running) has noticed the clearing of RUNNING above and quit
* processing the previous recovery. This will be true for all nodes
* before any nodes start the new recovery.
* running) has noticed RECOVERY_STOP above and quit processing the
* previous recovery.
*/
dlm_recoverd_suspend(ls);

View File

@@ -2,7 +2,7 @@
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
@@ -27,7 +27,6 @@
#include "dlm_internal.h"
#include "lowcomms.h"
#include "config.h"
#include "rcom.h"
#include "lock.h"
#include "midcomms.h"
@@ -117,19 +116,7 @@ int dlm_process_incoming_buffer(int nodeid, const void *base,
offset &= (limit - 1);
len -= msglen;
switch (msg->h_cmd) {
case DLM_MSG:
dlm_receive_message(msg, nodeid, 0);
break;
case DLM_RCOM:
dlm_receive_rcom(msg, nodeid);
break;
default:
log_print("unknown msg type %x from %u: %u %u %u %u",
msg->h_cmd, nodeid, msglen, len, offset, ret);
}
dlm_receive_buffer(msg, nodeid);
}
if (msg != (struct dlm_header *) __tmp)

View File

@@ -2,7 +2,7 @@
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2005 Red Hat, Inc. All rights reserved.
** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
@@ -386,7 +386,10 @@ static void receive_rcom_lock_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
dlm_recover_process_copy(ls, rc_in);
}
static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
/* If the lockspace doesn't exist then still send a status message
back; it's possible that it just doesn't have its global_id yet. */
int dlm_send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
{
struct dlm_rcom *rc;
struct rcom_config *rf;
@@ -446,28 +449,11 @@ static int is_old_reply(struct dlm_ls *ls, struct dlm_rcom *rc)
return rv;
}
/* Called by dlm_recvd; corresponds to dlm_receive_message() but special
/* Called by dlm_recv; corresponds to dlm_receive_message() but special
recovery-only comms are sent through here. */
void dlm_receive_rcom(struct dlm_header *hd, int nodeid)
void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
{
struct dlm_rcom *rc = (struct dlm_rcom *) hd;
struct dlm_ls *ls;
dlm_rcom_in(rc);
/* If the lockspace doesn't exist then still send a status message
back; it's possible that it just doesn't have its global_id yet. */
ls = dlm_find_lockspace_global(hd->h_lockspace);
if (!ls) {
log_print("lockspace %x from %d type %x not found",
hd->h_lockspace, nodeid, rc->rc_type);
if (rc->rc_type == DLM_RCOM_STATUS)
send_ls_not_ready(nodeid, rc);
return;
}
if (dlm_recovery_stopped(ls) && (rc->rc_type != DLM_RCOM_STATUS)) {
log_debug(ls, "ignoring recovery message %x from %d",
rc->rc_type, nodeid);
@@ -477,12 +463,6 @@ void dlm_receive_rcom(struct dlm_header *hd, int nodeid)
if (is_old_reply(ls, rc))
goto out;
if (nodeid != rc->rc_header.h_nodeid) {
log_error(ls, "bad rcom nodeid %d from %d",
rc->rc_header.h_nodeid, nodeid);
goto out;
}
switch (rc->rc_type) {
case DLM_RCOM_STATUS:
receive_rcom_status(ls, rc);
@@ -520,6 +500,6 @@ void dlm_receive_rcom(struct dlm_header *hd, int nodeid)
DLM_ASSERT(0, printk("rc_type=%x\n", rc->rc_type););
}
out:
dlm_put_lockspace(ls);
return;
}

View File

@@ -2,7 +2,7 @@
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2005 Red Hat, Inc. All rights reserved.
** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
@@ -18,7 +18,8 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid);
int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name,int last_len);
int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid);
int dlm_send_rcom_lock(struct dlm_rsb *r, struct dlm_lkb *lkb);
void dlm_receive_rcom(struct dlm_header *hd, int nodeid);
void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid);
int dlm_send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in);
#endif

View File

@@ -24,19 +24,28 @@
/* If the start for which we're re-enabling locking (seq) has been superseded
by a newer stop (ls_recover_seq), we need to leave locking disabled. */
by a newer stop (ls_recover_seq), we need to leave locking disabled.
We suspend dlm_recv threads here to avoid the race where dlm_recv a) sees
locking stopped and b) adds a message to the requestqueue, but dlm_recoverd
enables locking and clears the requestqueue between a and b. */
static int enable_locking(struct dlm_ls *ls, uint64_t seq)
{
int error = -EINTR;
down_write(&ls->ls_recv_active);
spin_lock(&ls->ls_recover_lock);
if (ls->ls_recover_seq == seq) {
set_bit(LSFL_RUNNING, &ls->ls_flags);
/* unblocks processes waiting to enter the dlm */
up_write(&ls->ls_in_recovery);
error = 0;
}
spin_unlock(&ls->ls_recover_lock);
up_write(&ls->ls_recv_active);
return error;
}

View File

@@ -1,7 +1,7 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) 2005 Red Hat, Inc. All rights reserved.
** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
@@ -20,7 +20,7 @@
struct rq_entry {
struct list_head list;
int nodeid;
char request[1];
char request[0];
};
/*
@@ -30,42 +30,39 @@ struct rq_entry {
* lockspace is enabled on some while still suspended on others.
*/
int dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd)
void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd)
{
struct rq_entry *e;
int length = hd->h_length;
int rv = 0;
e = kmalloc(sizeof(struct rq_entry) + length, GFP_KERNEL);
if (!e) {
log_print("dlm_add_requestqueue: out of memory\n");
return 0;
log_print("dlm_add_requestqueue: out of memory len %d", length);
return;
}
e->nodeid = nodeid;
memcpy(e->request, hd, length);
/* We need to check dlm_locking_stopped() after taking the mutex to
avoid a race where dlm_recoverd enables locking and runs
process_requestqueue between our earlier dlm_locking_stopped check
and this addition to the requestqueue. */
mutex_lock(&ls->ls_requestqueue_mutex);
if (dlm_locking_stopped(ls))
list_add_tail(&e->list, &ls->ls_requestqueue);
else {
log_debug(ls, "dlm_add_requestqueue skip from %d", nodeid);
kfree(e);
rv = -EAGAIN;
}
list_add_tail(&e->list, &ls->ls_requestqueue);
mutex_unlock(&ls->ls_requestqueue_mutex);
return rv;
}
/*
* Called by dlm_recoverd to process normal messages saved while recovery was
* happening. Normal locking has been enabled before this is called. dlm_recv
* upon receiving a message, will wait for all saved messages to be drained
* here before processing the message it got. If a new dlm_ls_stop() arrives
* while we're processing these saved messages, it may block trying to suspend
* dlm_recv if dlm_recv is waiting for us in dlm_wait_requestqueue. In that
* case, we don't abort since locking_stopped is still 0. If dlm_recv is not
* waiting for us, then this processing may be aborted due to locking_stopped.
*/
int dlm_process_requestqueue(struct dlm_ls *ls)
{
struct rq_entry *e;
struct dlm_header *hd;
int error = 0;
mutex_lock(&ls->ls_requestqueue_mutex);
@@ -79,14 +76,7 @@ int dlm_process_requestqueue(struct dlm_ls *ls)
e = list_entry(ls->ls_requestqueue.next, struct rq_entry, list);
mutex_unlock(&ls->ls_requestqueue_mutex);
hd = (struct dlm_header *) e->request;
error = dlm_receive_message(hd, e->nodeid, 1);
if (error == -EINTR) {
/* entry is left on requestqueue */
log_debug(ls, "process_requestqueue abort eintr");
break;
}
dlm_receive_message_saved(ls, (struct dlm_message *)e->request);
mutex_lock(&ls->ls_requestqueue_mutex);
list_del(&e->list);
@@ -106,10 +96,12 @@ int dlm_process_requestqueue(struct dlm_ls *ls)
/*
* After recovery is done, locking is resumed and dlm_recoverd takes all the
* saved requests and processes them as they would have been by dlm_recvd. At
* the same time, dlm_recvd will start receiving new requests from remote
* nodes. We want to delay dlm_recvd processing new requests until
* dlm_recoverd has finished processing the old saved requests.
* saved requests and processes them as they would have been by dlm_recv. At
* the same time, dlm_recv will start receiving new requests from remote nodes.
* We want to delay dlm_recv processing new requests until dlm_recoverd has
* finished processing the old saved requests. We don't check for locking
* stopped here because dlm_ls_stop won't stop locking until it's suspended us
* (dlm_recv).
*/
void dlm_wait_requestqueue(struct dlm_ls *ls)
@@ -118,8 +110,6 @@ void dlm_wait_requestqueue(struct dlm_ls *ls)
mutex_lock(&ls->ls_requestqueue_mutex);
if (list_empty(&ls->ls_requestqueue))
break;
if (dlm_locking_stopped(ls))
break;
mutex_unlock(&ls->ls_requestqueue_mutex);
schedule();
}

View File

@@ -1,7 +1,7 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) 2005 Red Hat, Inc. All rights reserved.
** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
@@ -13,7 +13,7 @@
#ifndef __REQUESTQUEUE_DOT_H__
#define __REQUESTQUEUE_DOT_H__
int dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd);
void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd);
int dlm_process_requestqueue(struct dlm_ls *ls);
void dlm_wait_requestqueue(struct dlm_ls *ls);
void dlm_purge_requestqueue(struct dlm_ls *ls);

View File

@@ -165,22 +165,10 @@ static int ecryptfs_process_nl_quit(struct sk_buff *skb)
* it to its desired netlink context element and wake up the process
* that is waiting for a response.
*/
static void ecryptfs_receive_nl_message(struct sock *sk, int len)
static void ecryptfs_receive_nl_message(struct sk_buff *skb)
{
struct sk_buff *skb;
struct nlmsghdr *nlh;
int rc = 0; /* skb_recv_datagram requires this */
receive:
skb = skb_recv_datagram(sk, 0, 0, &rc);
if (rc == -EINTR)
goto receive;
else if (rc < 0) {
ecryptfs_printk(KERN_ERR, "Error occurred while "
"receiving eCryptfs netlink message; "
"rc = [%d]\n", rc);
return;
}
nlh = nlmsg_hdr(skb);
if (!NLMSG_OK(nlh, skb->len)) {
ecryptfs_printk(KERN_ERR, "Received corrupt netlink "
@@ -227,7 +215,7 @@ int ecryptfs_init_netlink(void)
{
int rc;
ecryptfs_nl_sock = netlink_kernel_create(NETLINK_ECRYPTFS, 0,
ecryptfs_nl_sock = netlink_kernel_create(&init_net, NETLINK_ECRYPTFS, 0,
ecryptfs_receive_nl_message,
NULL, THIS_MODULE);
if (!ecryptfs_nl_sock) {

View File

@@ -14,6 +14,7 @@
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/spinlock.h>
#include <linux/sched.h>
#include <linux/fs.h>

View File

@@ -93,9 +93,10 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
map_bh(bh, inode->i_sb, block);
set_buffer_uptodate(bh);
if (!gfs2_is_jdata(ip))
mark_buffer_dirty(bh);
if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip))
gfs2_trans_add_bh(ip->i_gl, bh, 0);
mark_buffer_dirty(bh);
if (release) {
unlock_page(page);
@@ -1085,6 +1086,33 @@ static int do_shrink(struct gfs2_inode *ip, u64 size)
return error;
}
static int do_touch(struct gfs2_inode *ip, u64 size)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct buffer_head *dibh;
int error;
error = gfs2_trans_begin(sdp, RES_DINODE, 0);
if (error)
return error;
down_write(&ip->i_rw_mutex);
error = gfs2_meta_inode_buffer(ip, &dibh);
if (error)
goto do_touch_out;
ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
do_touch_out:
up_write(&ip->i_rw_mutex);
gfs2_trans_end(sdp);
return error;
}
/**
* gfs2_truncatei - make a file a given size
* @ip: the inode
@@ -1105,8 +1133,11 @@ int gfs2_truncatei(struct gfs2_inode *ip, u64 size)
if (size > ip->i_di.di_size)
error = do_grow(ip, size);
else
else if (size < ip->i_di.di_size)
error = do_shrink(ip, size);
else
/* update time stamps */
error = do_touch(ip, size);
return error;
}

View File

@@ -34,30 +34,6 @@
The kthread functions used to start these daemons block and flush signals. */
/**
* gfs2_scand - Look for cached glocks and inodes to toss from memory
* @sdp: Pointer to GFS2 superblock
*
* One of these daemons runs, finding candidates to add to sd_reclaim_list.
* See gfs2_glockd()
*/
int gfs2_scand(void *data)
{
struct gfs2_sbd *sdp = data;
unsigned long t;
while (!kthread_should_stop()) {
gfs2_scand_internal(sdp);
t = gfs2_tune_get(sdp, gt_scand_secs) * HZ;
if (freezing(current))
refrigerator();
schedule_timeout_interruptible(t);
}
return 0;
}
/**
* gfs2_glockd - Reclaim unused glock structures
* @sdp: Pointer to GFS2 superblock

View File

@@ -10,7 +10,6 @@
#ifndef __DAEMON_DOT_H__
#define __DAEMON_DOT_H__
int gfs2_scand(void *data);
int gfs2_glockd(void *data);
int gfs2_recoverd(void *data);
int gfs2_logd(void *data);

View File

@@ -1043,6 +1043,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
error = gfs2_meta_inode_buffer(dip, &dibh);
if (!gfs2_assert_withdraw(GFS2_SB(&dip->i_inode), !error)) {
gfs2_trans_add_bh(dip->i_gl, dibh, 1);
dip->i_di.di_blocks++;
gfs2_set_inode_blocks(&dip->i_inode);
gfs2_dinode_out(dip, dibh->b_data);
@@ -1501,7 +1502,7 @@ struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name)
inode = gfs2_inode_lookup(dir->i_sb,
be16_to_cpu(dent->de_type),
be64_to_cpu(dent->de_inum.no_addr),
be64_to_cpu(dent->de_inum.no_formal_ino));
be64_to_cpu(dent->de_inum.no_formal_ino), 0);
brelse(bh);
return inode;
}

View File

@@ -200,28 +200,28 @@ static int security_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er)
return gfs2_ea_remove_i(ip, er);
}
static struct gfs2_eattr_operations gfs2_user_eaops = {
static const struct gfs2_eattr_operations gfs2_user_eaops = {
.eo_get = user_eo_get,
.eo_set = user_eo_set,
.eo_remove = user_eo_remove,
.eo_name = "user",
};
struct gfs2_eattr_operations gfs2_system_eaops = {
const struct gfs2_eattr_operations gfs2_system_eaops = {
.eo_get = system_eo_get,
.eo_set = system_eo_set,
.eo_remove = system_eo_remove,
.eo_name = "system",
};
static struct gfs2_eattr_operations gfs2_security_eaops = {
static const struct gfs2_eattr_operations gfs2_security_eaops = {
.eo_get = security_eo_get,
.eo_set = security_eo_set,
.eo_remove = security_eo_remove,
.eo_name = "security",
};
struct gfs2_eattr_operations *gfs2_ea_ops[] = {
const struct gfs2_eattr_operations *gfs2_ea_ops[] = {
NULL,
&gfs2_user_eaops,
&gfs2_system_eaops,

View File

@@ -22,9 +22,9 @@ struct gfs2_eattr_operations {
unsigned int gfs2_ea_name2type(const char *name, const char **truncated_name);
extern struct gfs2_eattr_operations gfs2_system_eaops;
extern const struct gfs2_eattr_operations gfs2_system_eaops;
extern struct gfs2_eattr_operations *gfs2_ea_ops[];
extern const struct gfs2_eattr_operations *gfs2_ea_ops[];
#endif /* __EAOPS_DOT_H__ */

View File

@@ -25,8 +25,10 @@
#include <asm/uaccess.h>
#include <linux/seq_file.h>
#include <linux/debugfs.h>
#include <linux/module.h>
#include <linux/kallsyms.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
#include <linux/workqueue.h>
#include <linux/jiffies.h>
#include "gfs2.h"
#include "incore.h"
@@ -48,7 +50,6 @@ struct glock_iter {
int hash; /* hash bucket index */
struct gfs2_sbd *sdp; /* incore superblock */
struct gfs2_glock *gl; /* current glock struct */
struct hlist_head *hb_list; /* current hash bucket ptr */
struct seq_file *seq; /* sequence file for debugfs */
char string[512]; /* scratch space */
};
@@ -59,8 +60,13 @@ static int gfs2_dump_lockstate(struct gfs2_sbd *sdp);
static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl);
static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh);
static void gfs2_glock_drop_th(struct gfs2_glock *gl);
static void run_queue(struct gfs2_glock *gl);
static DECLARE_RWSEM(gfs2_umount_flush_sem);
static struct dentry *gfs2_root;
static struct task_struct *scand_process;
static unsigned int scand_secs = 5;
static struct workqueue_struct *glock_workqueue;
#define GFS2_GL_HASH_SHIFT 15
#define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT)
@@ -276,6 +282,18 @@ static struct gfs2_glock *gfs2_glock_find(const struct gfs2_sbd *sdp,
return gl;
}
static void glock_work_func(struct work_struct *work)
{
struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work);
spin_lock(&gl->gl_spin);
if (test_and_clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags))
set_bit(GLF_DEMOTE, &gl->gl_flags);
run_queue(gl);
spin_unlock(&gl->gl_spin);
gfs2_glock_put(gl);
}
/**
* gfs2_glock_get() - Get a glock, or create one if one doesn't exist
* @sdp: The GFS2 superblock
@@ -315,6 +333,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
gl->gl_name = name;
atomic_set(&gl->gl_ref, 1);
gl->gl_state = LM_ST_UNLOCKED;
gl->gl_demote_state = LM_ST_EXCLUSIVE;
gl->gl_hash = hash;
gl->gl_owner_pid = 0;
gl->gl_ip = 0;
@@ -323,10 +342,12 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
gl->gl_req_bh = NULL;
gl->gl_vn = 0;
gl->gl_stamp = jiffies;
gl->gl_tchange = jiffies;
gl->gl_object = NULL;
gl->gl_sbd = sdp;
gl->gl_aspace = NULL;
lops_init_le(&gl->gl_le, &gfs2_glock_lops);
INIT_DELAYED_WORK(&gl->gl_work, glock_work_func);
/* If this glock protects actual on-disk data or metadata blocks,
create a VFS inode to manage the pages/buffers holding them. */
@@ -440,6 +461,8 @@ static void wait_on_holder(struct gfs2_holder *gh)
static void gfs2_demote_wake(struct gfs2_glock *gl)
{
BUG_ON(!spin_is_locked(&gl->gl_spin));
gl->gl_demote_state = LM_ST_EXCLUSIVE;
clear_bit(GLF_DEMOTE, &gl->gl_flags);
smp_mb__after_clear_bit();
wake_up_bit(&gl->gl_flags, GLF_DEMOTE);
@@ -545,12 +568,14 @@ static int rq_demote(struct gfs2_glock *gl)
return 0;
}
set_bit(GLF_LOCK, &gl->gl_flags);
spin_unlock(&gl->gl_spin);
if (gl->gl_demote_state == LM_ST_UNLOCKED ||
gl->gl_state != LM_ST_EXCLUSIVE)
gl->gl_state != LM_ST_EXCLUSIVE) {
spin_unlock(&gl->gl_spin);
gfs2_glock_drop_th(gl);
else
} else {
spin_unlock(&gl->gl_spin);
gfs2_glock_xmote_th(gl, NULL);
}
spin_lock(&gl->gl_spin);
return 0;
@@ -679,24 +704,25 @@ static void gfs2_glmutex_unlock(struct gfs2_glock *gl)
* practise: LM_ST_SHARED and LM_ST_UNLOCKED
*/
static void handle_callback(struct gfs2_glock *gl, unsigned int state, int remote)
static void handle_callback(struct gfs2_glock *gl, unsigned int state,
int remote, unsigned long delay)
{
int bit = delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE;
spin_lock(&gl->gl_spin);
if (test_and_set_bit(GLF_DEMOTE, &gl->gl_flags) == 0) {
set_bit(bit, &gl->gl_flags);
if (gl->gl_demote_state == LM_ST_EXCLUSIVE) {
gl->gl_demote_state = state;
gl->gl_demote_time = jiffies;
if (remote && gl->gl_ops->go_type == LM_TYPE_IOPEN &&
gl->gl_object) {
struct inode *inode = igrab(gl->gl_object);
gfs2_glock_schedule_for_reclaim(gl);
spin_unlock(&gl->gl_spin);
if (inode) {
d_prune_aliases(inode);
iput(inode);
}
return;
}
} else if (gl->gl_demote_state != LM_ST_UNLOCKED) {
gl->gl_demote_state = state;
} else if (gl->gl_demote_state != LM_ST_UNLOCKED &&
gl->gl_demote_state != state) {
gl->gl_demote_state = LM_ST_UNLOCKED;
}
spin_unlock(&gl->gl_spin);
}
@@ -723,6 +749,7 @@ static void state_change(struct gfs2_glock *gl, unsigned int new_state)
}
gl->gl_state = new_state;
gl->gl_tchange = jiffies;
}
/**
@@ -760,10 +787,20 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret)
if (!gh) {
gl->gl_stamp = jiffies;
if (ret & LM_OUT_CANCELED)
if (ret & LM_OUT_CANCELED) {
op_done = 0;
else
} else {
spin_lock(&gl->gl_spin);
if (gl->gl_state != gl->gl_demote_state) {
gl->gl_req_bh = NULL;
spin_unlock(&gl->gl_spin);
gfs2_glock_drop_th(gl);
gfs2_glock_put(gl);
return;
}
gfs2_demote_wake(gl);
spin_unlock(&gl->gl_spin);
}
} else {
spin_lock(&gl->gl_spin);
list_del_init(&gh->gh_list);
@@ -799,7 +836,6 @@ out:
gl->gl_req_gh = NULL;
gl->gl_req_bh = NULL;
clear_bit(GLF_LOCK, &gl->gl_flags);
run_queue(gl);
spin_unlock(&gl->gl_spin);
}
@@ -817,7 +853,7 @@ out:
*
*/
void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh)
static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh)
{
struct gfs2_sbd *sdp = gl->gl_sbd;
int flags = gh ? gh->gh_flags : 0;
@@ -871,7 +907,6 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret)
gfs2_assert_warn(sdp, !ret);
state_change(gl, LM_ST_UNLOCKED);
gfs2_demote_wake(gl);
if (glops->go_inval)
glops->go_inval(gl, DIO_METADATA);
@@ -884,10 +919,10 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret)
}
spin_lock(&gl->gl_spin);
gfs2_demote_wake(gl);
gl->gl_req_gh = NULL;
gl->gl_req_bh = NULL;
clear_bit(GLF_LOCK, &gl->gl_flags);
run_queue(gl);
spin_unlock(&gl->gl_spin);
gfs2_glock_put(gl);
@@ -1067,24 +1102,31 @@ static void add_to_queue(struct gfs2_holder *gh)
if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags))
BUG();
existing = find_holder_by_owner(&gl->gl_holders, gh->gh_owner_pid);
if (existing) {
print_symbol(KERN_WARNING "original: %s\n", existing->gh_ip);
printk(KERN_INFO "pid : %d\n", existing->gh_owner_pid);
printk(KERN_INFO "lock type : %d lock state : %d\n",
existing->gh_gl->gl_name.ln_type, existing->gh_gl->gl_state);
print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip);
printk(KERN_INFO "pid : %d\n", gh->gh_owner_pid);
printk(KERN_INFO "lock type : %d lock state : %d\n",
gl->gl_name.ln_type, gl->gl_state);
BUG();
}
existing = find_holder_by_owner(&gl->gl_waiters3, gh->gh_owner_pid);
if (existing) {
print_symbol(KERN_WARNING "original: %s\n", existing->gh_ip);
print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip);
BUG();
if (!(gh->gh_flags & GL_FLOCK)) {
existing = find_holder_by_owner(&gl->gl_holders,
gh->gh_owner_pid);
if (existing) {
print_symbol(KERN_WARNING "original: %s\n",
existing->gh_ip);
printk(KERN_INFO "pid : %d\n", existing->gh_owner_pid);
printk(KERN_INFO "lock type : %d lock state : %d\n",
existing->gh_gl->gl_name.ln_type,
existing->gh_gl->gl_state);
print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip);
printk(KERN_INFO "pid : %d\n", gh->gh_owner_pid);
printk(KERN_INFO "lock type : %d lock state : %d\n",
gl->gl_name.ln_type, gl->gl_state);
BUG();
}
existing = find_holder_by_owner(&gl->gl_waiters3,
gh->gh_owner_pid);
if (existing) {
print_symbol(KERN_WARNING "original: %s\n",
existing->gh_ip);
print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip);
BUG();
}
}
if (gh->gh_flags & LM_FLAG_PRIORITY)
@@ -1195,9 +1237,10 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
{
struct gfs2_glock *gl = gh->gh_gl;
const struct gfs2_glock_operations *glops = gl->gl_ops;
unsigned delay = 0;
if (gh->gh_flags & GL_NOCACHE)
handle_callback(gl, LM_ST_UNLOCKED, 0);
handle_callback(gl, LM_ST_UNLOCKED, 0, 0);
gfs2_glmutex_lock(gl);
@@ -1215,8 +1258,14 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
}
clear_bit(GLF_LOCK, &gl->gl_flags);
run_queue(gl);
spin_unlock(&gl->gl_spin);
gfs2_glock_hold(gl);
if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
!test_bit(GLF_DEMOTE, &gl->gl_flags))
delay = gl->gl_ops->go_min_hold_time;
if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
gfs2_glock_put(gl);
}
void gfs2_glock_dq_wait(struct gfs2_holder *gh)
@@ -1443,18 +1492,21 @@ static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name,
unsigned int state)
{
struct gfs2_glock *gl;
unsigned long delay = 0;
unsigned long holdtime;
unsigned long now = jiffies;
gl = gfs2_glock_find(sdp, name);
if (!gl)
return;
handle_callback(gl, state, 1);
holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time;
if (time_before(now, holdtime))
delay = holdtime - now;
spin_lock(&gl->gl_spin);
run_queue(gl);
spin_unlock(&gl->gl_spin);
gfs2_glock_put(gl);
handle_callback(gl, state, 1, delay);
if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
gfs2_glock_put(gl);
}
/**
@@ -1495,7 +1547,8 @@ void gfs2_glock_cb(void *cb_data, unsigned int type, void *data)
return;
if (!gfs2_assert_warn(sdp, gl->gl_req_bh))
gl->gl_req_bh(gl, async->lc_ret);
gfs2_glock_put(gl);
if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
gfs2_glock_put(gl);
up_read(&gfs2_umount_flush_sem);
return;
}
@@ -1588,7 +1641,7 @@ void gfs2_reclaim_glock(struct gfs2_sbd *sdp)
if (gfs2_glmutex_trylock(gl)) {
if (list_empty(&gl->gl_holders) &&
gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl))
handle_callback(gl, LM_ST_UNLOCKED, 0);
handle_callback(gl, LM_ST_UNLOCKED, 0, 0);
gfs2_glmutex_unlock(gl);
}
@@ -1617,7 +1670,7 @@ static int examine_bucket(glock_examiner examiner, struct gfs2_sbd *sdp,
goto out;
gl = list_entry(head->first, struct gfs2_glock, gl_list);
while(1) {
if (gl->gl_sbd == sdp) {
if (!sdp || gl->gl_sbd == sdp) {
gfs2_glock_hold(gl);
read_unlock(gl_lock_addr(hash));
if (prev)
@@ -1635,6 +1688,7 @@ out:
read_unlock(gl_lock_addr(hash));
if (prev)
gfs2_glock_put(prev);
cond_resched();
return has_entries;
}
@@ -1662,20 +1716,6 @@ out_schedule:
gfs2_glock_schedule_for_reclaim(gl);
}
/**
* gfs2_scand_internal - Look for glocks and inodes to toss from memory
* @sdp: the filesystem
*
*/
void gfs2_scand_internal(struct gfs2_sbd *sdp)
{
unsigned int x;
for (x = 0; x < GFS2_GL_HASH_SIZE; x++)
examine_bucket(scan_glock, sdp, x);
}
/**
* clear_glock - look at a glock and see if we can free it from glock cache
* @gl: the glock to look at
@@ -1701,7 +1741,7 @@ static void clear_glock(struct gfs2_glock *gl)
if (gfs2_glmutex_trylock(gl)) {
if (list_empty(&gl->gl_holders) &&
gl->gl_state != LM_ST_UNLOCKED)
handle_callback(gl, LM_ST_UNLOCKED, 0);
handle_callback(gl, LM_ST_UNLOCKED, 0, 0);
gfs2_glmutex_unlock(gl);
}
}
@@ -1843,7 +1883,7 @@ static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl)
spin_lock(&gl->gl_spin);
print_dbg(gi, "Glock 0x%p (%u, %llu)\n", gl, gl->gl_name.ln_type,
print_dbg(gi, "Glock 0x%p (%u, 0x%llx)\n", gl, gl->gl_name.ln_type,
(unsigned long long)gl->gl_name.ln_number);
print_dbg(gi, " gl_flags =");
for (x = 0; x < 32; x++) {
@@ -1963,6 +2003,35 @@ static int gfs2_dump_lockstate(struct gfs2_sbd *sdp)
return error;
}
/**
* gfs2_scand - Look for cached glocks and inodes to toss from memory
* @sdp: Pointer to GFS2 superblock
*
* One of these daemons runs, finding candidates to add to sd_reclaim_list.
* See gfs2_glockd()
*/
static int gfs2_scand(void *data)
{
unsigned x;
unsigned delay;
while (!kthread_should_stop()) {
for (x = 0; x < GFS2_GL_HASH_SIZE; x++)
examine_bucket(scan_glock, NULL, x);
if (freezing(current))
refrigerator();
delay = scand_secs;
if (delay < 1)
delay = 1;
schedule_timeout_interruptible(delay * HZ);
}
return 0;
}
int __init gfs2_glock_init(void)
{
unsigned i;
@@ -1974,52 +2043,69 @@ int __init gfs2_glock_init(void)
rwlock_init(&gl_hash_locks[i]);
}
#endif
scand_process = kthread_run(gfs2_scand, NULL, "gfs2_scand");
if (IS_ERR(scand_process))
return PTR_ERR(scand_process);
glock_workqueue = create_workqueue("glock_workqueue");
if (IS_ERR(glock_workqueue)) {
kthread_stop(scand_process);
return PTR_ERR(glock_workqueue);
}
return 0;
}
void gfs2_glock_exit(void)
{
destroy_workqueue(glock_workqueue);
kthread_stop(scand_process);
}
module_param(scand_secs, uint, S_IRUGO|S_IWUSR);
MODULE_PARM_DESC(scand_secs, "The number of seconds between scand runs");
static int gfs2_glock_iter_next(struct glock_iter *gi)
{
struct gfs2_glock *gl;
restart:
read_lock(gl_lock_addr(gi->hash));
while (1) {
if (!gi->hb_list) { /* If we don't have a hash bucket yet */
gi->hb_list = &gl_hash_table[gi->hash].hb_list;
if (hlist_empty(gi->hb_list)) {
read_unlock(gl_lock_addr(gi->hash));
gi->hash++;
read_lock(gl_lock_addr(gi->hash));
gi->hb_list = NULL;
if (gi->hash >= GFS2_GL_HASH_SIZE) {
read_unlock(gl_lock_addr(gi->hash));
return 1;
}
else
continue;
}
if (!hlist_empty(gi->hb_list)) {
gi->gl = list_entry(gi->hb_list->first,
struct gfs2_glock,
gl_list);
}
} else {
if (gi->gl->gl_list.next == NULL) {
read_unlock(gl_lock_addr(gi->hash));
gi->hash++;
read_lock(gl_lock_addr(gi->hash));
gi->hb_list = NULL;
continue;
}
gi->gl = list_entry(gi->gl->gl_list.next,
struct gfs2_glock, gl_list);
}
gl = gi->gl;
if (gl) {
gi->gl = hlist_entry(gl->gl_list.next,
struct gfs2_glock, gl_list);
if (gi->gl)
break;
gfs2_glock_hold(gi->gl);
}
read_unlock(gl_lock_addr(gi->hash));
if (gl)
gfs2_glock_put(gl);
if (gl && gi->gl == NULL)
gi->hash++;
while(gi->gl == NULL) {
if (gi->hash >= GFS2_GL_HASH_SIZE)
return 1;
read_lock(gl_lock_addr(gi->hash));
gi->gl = hlist_entry(gl_hash_table[gi->hash].hb_list.first,
struct gfs2_glock, gl_list);
if (gi->gl)
gfs2_glock_hold(gi->gl);
read_unlock(gl_lock_addr(gi->hash));
gi->hash++;
}
if (gi->sdp != gi->gl->gl_sbd)
goto restart;
return 0;
}
static void gfs2_glock_iter_free(struct glock_iter *gi)
{
if (gi->gl)
gfs2_glock_put(gi->gl);
kfree(gi);
}
@@ -2033,9 +2119,8 @@ static struct glock_iter *gfs2_glock_iter_init(struct gfs2_sbd *sdp)
gi->sdp = sdp;
gi->hash = 0;
gi->gl = NULL;
gi->hb_list = NULL;
gi->seq = NULL;
gi->gl = NULL;
memset(gi->string, 0, sizeof(gi->string));
if (gfs2_glock_iter_next(gi)) {
@@ -2055,7 +2140,7 @@ static void *gfs2_glock_seq_start(struct seq_file *file, loff_t *pos)
if (!gi)
return NULL;
while (n--) {
while(n--) {
if (gfs2_glock_iter_next(gi)) {
gfs2_glock_iter_free(gi);
return NULL;
@@ -2082,7 +2167,9 @@ static void *gfs2_glock_seq_next(struct seq_file *file, void *iter_ptr,
static void gfs2_glock_seq_stop(struct seq_file *file, void *iter_ptr)
{
/* nothing for now */
struct glock_iter *gi = iter_ptr;
if (gi)
gfs2_glock_iter_free(gi);
}
static int gfs2_glock_seq_show(struct seq_file *file, void *iter_ptr)
@@ -2095,7 +2182,7 @@ static int gfs2_glock_seq_show(struct seq_file *file, void *iter_ptr)
return 0;
}
static struct seq_operations gfs2_glock_seq_ops = {
static const struct seq_operations gfs2_glock_seq_ops = {
.start = gfs2_glock_seq_start,
.next = gfs2_glock_seq_next,
.stop = gfs2_glock_seq_stop,

View File

@@ -26,6 +26,7 @@
#define GL_SKIP 0x00000100
#define GL_ATIME 0x00000200
#define GL_NOCACHE 0x00000400
#define GL_FLOCK 0x00000800
#define GL_NOCANCEL 0x00001000
#define GLR_TRYFAILED 13
@@ -132,11 +133,11 @@ void gfs2_glock_cb(void *cb_data, unsigned int type, void *data);
void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl);
void gfs2_reclaim_glock(struct gfs2_sbd *sdp);
void gfs2_scand_internal(struct gfs2_sbd *sdp);
void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait);
int __init gfs2_glock_init(void);
void gfs2_glock_exit(void);
int gfs2_create_debugfs_file(struct gfs2_sbd *sdp);
void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp);
int gfs2_register_debugfs(void);

View File

@@ -41,7 +41,6 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
struct list_head *head = &gl->gl_ail_list;
struct gfs2_bufdata *bd;
struct buffer_head *bh;
u64 blkno;
int error;
blocks = atomic_read(&gl->gl_ail_count);
@@ -57,19 +56,12 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
bd = list_entry(head->next, struct gfs2_bufdata,
bd_ail_gl_list);
bh = bd->bd_bh;
blkno = bh->b_blocknr;
gfs2_remove_from_ail(NULL, bd);
bd->bd_bh = NULL;
bh->b_private = NULL;
bd->bd_blkno = bh->b_blocknr;
gfs2_assert_withdraw(sdp, !buffer_busy(bh));
bd->bd_ail = NULL;
list_del(&bd->bd_ail_st_list);
list_del(&bd->bd_ail_gl_list);
atomic_dec(&gl->gl_ail_count);
brelse(bh);
gfs2_log_unlock(sdp);
gfs2_trans_add_revoke(sdp, blkno);
gfs2_log_lock(sdp);
gfs2_trans_add_revoke(sdp, bd);
}
gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count));
gfs2_log_unlock(sdp);
@@ -156,9 +148,11 @@ static void inode_go_sync(struct gfs2_glock *gl)
ip = NULL;
if (test_bit(GLF_DIRTY, &gl->gl_flags)) {
if (ip)
if (ip && !gfs2_is_jdata(ip))
filemap_fdatawrite(ip->i_inode.i_mapping);
gfs2_log_flush(gl->gl_sbd, gl);
if (ip && gfs2_is_jdata(ip))
filemap_fdatawrite(ip->i_inode.i_mapping);
gfs2_meta_sync(gl);
if (ip) {
struct address_space *mapping = ip->i_inode.i_mapping;
@@ -452,6 +446,7 @@ const struct gfs2_glock_operations gfs2_inode_glops = {
.go_lock = inode_go_lock,
.go_unlock = inode_go_unlock,
.go_type = LM_TYPE_INODE,
.go_min_hold_time = HZ / 10,
};
const struct gfs2_glock_operations gfs2_rgrp_glops = {
@@ -462,6 +457,7 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = {
.go_lock = rgrp_go_lock,
.go_unlock = rgrp_go_unlock,
.go_type = LM_TYPE_RGRP,
.go_min_hold_time = HZ / 10,
};
const struct gfs2_glock_operations gfs2_trans_glops = {

View File

@@ -11,6 +11,7 @@
#define __INCORE_DOT_H__
#include <linux/fs.h>
#include <linux/workqueue.h>
#define DIO_WAIT 0x00000010
#define DIO_METADATA 0x00000020
@@ -113,7 +114,13 @@ struct gfs2_bufdata {
struct buffer_head *bd_bh;
struct gfs2_glock *bd_gl;
struct list_head bd_list_tr;
union {
struct list_head list_tr;
u64 blkno;
} u;
#define bd_list_tr u.list_tr
#define bd_blkno u.blkno
struct gfs2_log_element bd_le;
struct gfs2_ail *bd_ail;
@@ -130,6 +137,7 @@ struct gfs2_glock_operations {
int (*go_lock) (struct gfs2_holder *gh);
void (*go_unlock) (struct gfs2_holder *gh);
const int go_type;
const unsigned long go_min_hold_time;
};
enum {
@@ -161,6 +169,7 @@ enum {
GLF_LOCK = 1,
GLF_STICKY = 2,
GLF_DEMOTE = 3,
GLF_PENDING_DEMOTE = 4,
GLF_DIRTY = 5,
};
@@ -193,6 +202,7 @@ struct gfs2_glock {
u64 gl_vn;
unsigned long gl_stamp;
unsigned long gl_tchange;
void *gl_object;
struct list_head gl_reclaim;
@@ -203,6 +213,7 @@ struct gfs2_glock {
struct gfs2_log_element gl_le;
struct list_head gl_ail_list;
atomic_t gl_ail_count;
struct delayed_work gl_work;
};
struct gfs2_alloc {
@@ -293,11 +304,6 @@ struct gfs2_file {
struct gfs2_holder f_fl_gh;
};
struct gfs2_revoke {
struct gfs2_log_element rv_le;
u64 rv_blkno;
};
struct gfs2_revoke_replay {
struct list_head rr_list;
u64 rr_blkno;
@@ -335,12 +341,6 @@ struct gfs2_quota_data {
unsigned long qd_last_touched;
};
struct gfs2_log_buf {
struct list_head lb_list;
struct buffer_head *lb_bh;
struct buffer_head *lb_real;
};
struct gfs2_trans {
unsigned long tr_ip;
@@ -429,7 +429,6 @@ struct gfs2_tune {
unsigned int gt_log_flush_secs;
unsigned int gt_jindex_refresh_secs; /* Check for new journal index */
unsigned int gt_scand_secs;
unsigned int gt_recoverd_secs;
unsigned int gt_logd_secs;
unsigned int gt_quotad_secs;
@@ -574,7 +573,6 @@ struct gfs2_sbd {
/* Daemon stuff */
struct task_struct *sd_scand_process;
struct task_struct *sd_recoverd_process;
struct task_struct *sd_logd_process;
struct task_struct *sd_quotad_process;
@@ -609,13 +607,13 @@ struct gfs2_sbd {
unsigned int sd_log_num_revoke;
unsigned int sd_log_num_rg;
unsigned int sd_log_num_databuf;
unsigned int sd_log_num_jdata;
struct list_head sd_log_le_gl;
struct list_head sd_log_le_buf;
struct list_head sd_log_le_revoke;
struct list_head sd_log_le_rg;
struct list_head sd_log_le_databuf;
struct list_head sd_log_le_ordered;
unsigned int sd_log_blks_free;
struct mutex sd_log_reserve_mutex;
@@ -627,7 +625,8 @@ struct gfs2_sbd {
unsigned long sd_log_flush_time;
struct rw_semaphore sd_log_flush_lock;
struct list_head sd_log_flush_list;
atomic_t sd_log_in_flight;
wait_queue_head_t sd_log_flush_wait;
unsigned int sd_log_flush_head;
u64 sd_log_flush_wrapped;

View File

@@ -77,6 +77,49 @@ static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr)
return iget5_locked(sb, hash, iget_test, iget_set, &no_addr);
}
struct gfs2_skip_data {
u64 no_addr;
int skipped;
};
static int iget_skip_test(struct inode *inode, void *opaque)
{
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_skip_data *data = opaque;
if (ip->i_no_addr == data->no_addr && inode->i_private != NULL){
if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)){
data->skipped = 1;
return 0;
}
return 1;
}
return 0;
}
static int iget_skip_set(struct inode *inode, void *opaque)
{
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_skip_data *data = opaque;
if (data->skipped)
return 1;
inode->i_ino = (unsigned long)(data->no_addr);
ip->i_no_addr = data->no_addr;
return 0;
}
static struct inode *gfs2_iget_skip(struct super_block *sb,
u64 no_addr)
{
struct gfs2_skip_data data;
unsigned long hash = (unsigned long)no_addr;
data.no_addr = no_addr;
data.skipped = 0;
return iget5_locked(sb, hash, iget_skip_test, iget_skip_set, &data);
}
/**
* GFS2 lookup code fills in vfs inode contents based on info obtained
* from directory entry inside gfs2_inode_lookup(). This has caused issues
@@ -112,6 +155,7 @@ void gfs2_set_iop(struct inode *inode)
* @sb: The super block
* @no_addr: The inode number
* @type: The type of the inode
* @skip_freeing: set this not return an inode if it is currently being freed.
*
* Returns: A VFS inode, or an error
*/
@@ -119,13 +163,19 @@ void gfs2_set_iop(struct inode *inode)
struct inode *gfs2_inode_lookup(struct super_block *sb,
unsigned int type,
u64 no_addr,
u64 no_formal_ino)
u64 no_formal_ino, int skip_freeing)
{
struct inode *inode = gfs2_iget(sb, no_addr);
struct gfs2_inode *ip = GFS2_I(inode);
struct inode *inode;
struct gfs2_inode *ip;
struct gfs2_glock *io_gl;
int error;
if (skip_freeing)
inode = gfs2_iget_skip(sb, no_addr);
else
inode = gfs2_iget(sb, no_addr);
ip = GFS2_I(inode);
if (!inode)
return ERR_PTR(-ENOBUFS);
@@ -244,6 +294,11 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
return 0;
}
static void gfs2_inode_bh(struct gfs2_inode *ip, struct buffer_head *bh)
{
ip->i_cache[0] = bh;
}
/**
* gfs2_inode_refresh - Refresh the incore copy of the dinode
* @ip: The GFS2 inode
@@ -688,7 +743,7 @@ out:
static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
const struct gfs2_inum_host *inum, unsigned int mode,
unsigned int uid, unsigned int gid,
const u64 *generation, dev_t dev)
const u64 *generation, dev_t dev, struct buffer_head **bhp)
{
struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
struct gfs2_dinode *di;
@@ -743,13 +798,15 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
di->di_mtime_nsec = cpu_to_be32(tv.tv_nsec);
di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec);
memset(&di->di_reserved, 0, sizeof(di->di_reserved));
set_buffer_uptodate(dibh);
brelse(dibh);
*bhp = dibh;
}
static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
unsigned int mode, const struct gfs2_inum_host *inum,
const u64 *generation, dev_t dev)
const u64 *generation, dev_t dev, struct buffer_head **bhp)
{
struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
unsigned int uid, gid;
@@ -770,7 +827,7 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
if (error)
goto out_quota;
init_dinode(dip, gl, inum, mode, uid, gid, generation, dev);
init_dinode(dip, gl, inum, mode, uid, gid, generation, dev, bhp);
gfs2_quota_change(dip, +1, uid, gid);
gfs2_trans_end(sdp);
@@ -909,6 +966,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 };
int error;
u64 generation;
struct buffer_head *bh=NULL;
if (!name->len || name->len > GFS2_FNAMESIZE)
return ERR_PTR(-ENAMETOOLONG);
@@ -935,16 +993,18 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
if (error)
goto fail_gunlock;
error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation, dev);
error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation, dev, &bh);
if (error)
goto fail_gunlock2;
inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode),
inum.no_addr,
inum.no_formal_ino);
inum.no_formal_ino, 0);
if (IS_ERR(inode))
goto fail_gunlock2;
gfs2_inode_bh(GFS2_I(inode), bh);
error = gfs2_inode_refresh(GFS2_I(inode));
if (error)
goto fail_gunlock2;

View File

@@ -49,7 +49,8 @@ static inline void gfs2_inum_out(const struct gfs2_inode *ip,
void gfs2_inode_attr_in(struct gfs2_inode *ip);
void gfs2_set_iop(struct inode *inode);
struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type,
u64 no_addr, u64 no_formal_ino);
u64 no_addr, u64 no_formal_ino,
int skip_freeing);
struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr);
int gfs2_inode_refresh(struct gfs2_inode *ip);

View File

@@ -13,7 +13,6 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/string.h>
#include <linux/list.h>

View File

@@ -346,15 +346,16 @@ static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
static unsigned int dev_poll(struct file *file, poll_table *wait)
{
unsigned int mask = 0;
poll_wait(file, &send_wq, wait);
spin_lock(&ops_lock);
if (!list_empty(&send_list)) {
spin_unlock(&ops_lock);
return POLLIN | POLLRDNORM;
}
if (!list_empty(&send_list))
mask = POLLIN | POLLRDNORM;
spin_unlock(&ops_lock);
return 0;
return mask;
}
static const struct file_operations dev_fops = {

View File

@@ -190,7 +190,6 @@ static struct kobj_type gdlm_ktype = {
};
static struct kset gdlm_kset = {
.kobj = {.name = "lock_dlm",},
.ktype = &gdlm_ktype,
};
@@ -224,6 +223,7 @@ int gdlm_sysfs_init(void)
{
int error;
kobject_set_name(&gdlm_kset.kobj, "lock_dlm");
kobj_set_kset_s(&gdlm_kset, kernel_subsys);
error = kset_register(&gdlm_kset);
if (error)

View File

@@ -268,20 +268,16 @@ static inline int check_drop(struct gdlm_ls *ls)
return 0;
}
static int gdlm_thread(void *data)
static int gdlm_thread(void *data, int blist)
{
struct gdlm_ls *ls = (struct gdlm_ls *) data;
struct gdlm_lock *lp = NULL;
int blist = 0;
uint8_t complete, blocking, submit, drop;
DECLARE_WAITQUEUE(wait, current);
/* Only thread1 is allowed to do blocking callbacks since gfs
may wait for a completion callback within a blocking cb. */
if (current == ls->thread1)
blist = 1;
while (!kthread_should_stop()) {
set_current_state(TASK_INTERRUPTIBLE);
add_wait_queue(&ls->thread_wait, &wait);
@@ -333,12 +329,22 @@ static int gdlm_thread(void *data)
return 0;
}
static int gdlm_thread1(void *data)
{
return gdlm_thread(data, 1);
}
static int gdlm_thread2(void *data)
{
return gdlm_thread(data, 0);
}
int gdlm_init_threads(struct gdlm_ls *ls)
{
struct task_struct *p;
int error;
p = kthread_run(gdlm_thread, ls, "lock_dlm1");
p = kthread_run(gdlm_thread1, ls, "lock_dlm1");
error = IS_ERR(p);
if (error) {
log_error("can't start lock_dlm1 thread %d", error);
@@ -346,7 +352,7 @@ int gdlm_init_threads(struct gdlm_ls *ls)
}
ls->thread1 = p;
p = kthread_run(gdlm_thread, ls, "lock_dlm2");
p = kthread_run(gdlm_thread2, ls, "lock_dlm2");
error = IS_ERR(p);
if (error) {
log_error("can't start lock_dlm2 thread %d", error);

View File

@@ -9,7 +9,6 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/types.h>
#include <linux/fs.h>

View File

@@ -59,6 +59,26 @@ unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
return blks;
}
/**
* gfs2_remove_from_ail - Remove an entry from the ail lists, updating counters
* @mapping: The associated mapping (maybe NULL)
* @bd: The gfs2_bufdata to remove
*
* The log lock _must_ be held when calling this function
*
*/
void gfs2_remove_from_ail(struct address_space *mapping, struct gfs2_bufdata *bd)
{
bd->bd_ail = NULL;
list_del_init(&bd->bd_ail_st_list);
list_del_init(&bd->bd_ail_gl_list);
atomic_dec(&bd->bd_gl->gl_ail_count);
if (mapping)
gfs2_meta_cache_flush(GFS2_I(mapping->host));
brelse(bd->bd_bh);
}
/**
* gfs2_ail1_start_one - Start I/O on a part of the AIL
* @sdp: the filesystem
@@ -83,17 +103,9 @@ static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
gfs2_assert(sdp, bd->bd_ail == ai);
if (!bh){
list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
continue;
}
if (!buffer_busy(bh)) {
if (!buffer_uptodate(bh)) {
gfs2_log_unlock(sdp);
if (!buffer_uptodate(bh))
gfs2_io_error_bh(sdp, bh);
gfs2_log_lock(sdp);
}
list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
continue;
}
@@ -103,9 +115,16 @@ static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list);
get_bh(bh);
gfs2_log_unlock(sdp);
wait_on_buffer(bh);
ll_rw_block(WRITE, 1, &bh);
lock_buffer(bh);
if (test_clear_buffer_dirty(bh)) {
bh->b_end_io = end_buffer_write_sync;
submit_bh(WRITE, bh);
} else {
unlock_buffer(bh);
brelse(bh);
}
gfs2_log_lock(sdp);
retry = 1;
@@ -130,11 +149,6 @@ static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int fl
bd_ail_st_list) {
bh = bd->bd_bh;
if (!bh){
list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
continue;
}
gfs2_assert(sdp, bd->bd_ail == ai);
if (buffer_busy(bh)) {
@@ -155,13 +169,14 @@ static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int fl
static void gfs2_ail1_start(struct gfs2_sbd *sdp, int flags)
{
struct list_head *head = &sdp->sd_ail1_list;
struct list_head *head;
u64 sync_gen;
struct list_head *first;
struct gfs2_ail *first_ai, *ai, *tmp;
int done = 0;
gfs2_log_lock(sdp);
head = &sdp->sd_ail1_list;
if (list_empty(head)) {
gfs2_log_unlock(sdp);
return;
@@ -233,11 +248,7 @@ static void gfs2_ail2_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
bd = list_entry(head->prev, struct gfs2_bufdata,
bd_ail_st_list);
gfs2_assert(sdp, bd->bd_ail == ai);
bd->bd_ail = NULL;
list_del(&bd->bd_ail_st_list);
list_del(&bd->bd_ail_gl_list);
atomic_dec(&bd->bd_gl->gl_ail_count);
brelse(bd->bd_bh);
gfs2_remove_from_ail(bd->bd_bh->b_page->mapping, bd);
}
}
@@ -439,10 +450,10 @@ static unsigned int current_tail(struct gfs2_sbd *sdp)
return tail;
}
static inline void log_incr_head(struct gfs2_sbd *sdp)
void gfs2_log_incr_head(struct gfs2_sbd *sdp)
{
if (sdp->sd_log_flush_head == sdp->sd_log_tail)
gfs2_assert_withdraw(sdp, sdp->sd_log_flush_head == sdp->sd_log_head);
BUG_ON(sdp->sd_log_flush_head != sdp->sd_log_head);
if (++sdp->sd_log_flush_head == sdp->sd_jdesc->jd_blocks) {
sdp->sd_log_flush_head = 0;
@@ -450,6 +461,23 @@ static inline void log_incr_head(struct gfs2_sbd *sdp)
}
}
/**
* gfs2_log_write_endio - End of I/O for a log buffer
* @bh: The buffer head
* @uptodate: I/O Status
*
*/
static void gfs2_log_write_endio(struct buffer_head *bh, int uptodate)
{
struct gfs2_sbd *sdp = bh->b_private;
bh->b_private = NULL;
end_buffer_write_sync(bh, uptodate);
if (atomic_dec_and_test(&sdp->sd_log_in_flight))
wake_up(&sdp->sd_log_flush_wait);
}
/**
* gfs2_log_get_buf - Get and initialize a buffer to use for log control data
* @sdp: The GFS2 superblock
@@ -460,24 +488,42 @@ static inline void log_incr_head(struct gfs2_sbd *sdp)
struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp)
{
u64 blkno = log_bmap(sdp, sdp->sd_log_flush_head);
struct gfs2_log_buf *lb;
struct buffer_head *bh;
lb = kzalloc(sizeof(struct gfs2_log_buf), GFP_NOFS | __GFP_NOFAIL);
list_add(&lb->lb_list, &sdp->sd_log_flush_list);
bh = lb->lb_bh = sb_getblk(sdp->sd_vfs, blkno);
bh = sb_getblk(sdp->sd_vfs, blkno);
lock_buffer(bh);
memset(bh->b_data, 0, bh->b_size);
set_buffer_uptodate(bh);
clear_buffer_dirty(bh);
unlock_buffer(bh);
log_incr_head(sdp);
gfs2_log_incr_head(sdp);
atomic_inc(&sdp->sd_log_in_flight);
bh->b_private = sdp;
bh->b_end_io = gfs2_log_write_endio;
return bh;
}
/**
* gfs2_fake_write_endio -
* @bh: The buffer head
* @uptodate: The I/O Status
*
*/
static void gfs2_fake_write_endio(struct buffer_head *bh, int uptodate)
{
struct buffer_head *real_bh = bh->b_private;
struct gfs2_bufdata *bd = real_bh->b_private;
struct gfs2_sbd *sdp = bd->bd_gl->gl_sbd;
end_buffer_write_sync(bh, uptodate);
free_buffer_head(bh);
unlock_buffer(real_bh);
brelse(real_bh);
if (atomic_dec_and_test(&sdp->sd_log_in_flight))
wake_up(&sdp->sd_log_flush_wait);
}
/**
* gfs2_log_fake_buf - Build a fake buffer head to write metadata buffer to log
* @sdp: the filesystem
@@ -490,22 +536,20 @@ struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
struct buffer_head *real)
{
u64 blkno = log_bmap(sdp, sdp->sd_log_flush_head);
struct gfs2_log_buf *lb;
struct buffer_head *bh;
lb = kzalloc(sizeof(struct gfs2_log_buf), GFP_NOFS | __GFP_NOFAIL);
list_add(&lb->lb_list, &sdp->sd_log_flush_list);
lb->lb_real = real;
bh = lb->lb_bh = alloc_buffer_head(GFP_NOFS | __GFP_NOFAIL);
bh = alloc_buffer_head(GFP_NOFS | __GFP_NOFAIL);
atomic_set(&bh->b_count, 1);
bh->b_state = (1 << BH_Mapped) | (1 << BH_Uptodate);
bh->b_state = (1 << BH_Mapped) | (1 << BH_Uptodate) | (1 << BH_Lock);
set_bh_page(bh, real->b_page, bh_offset(real));
bh->b_blocknr = blkno;
bh->b_size = sdp->sd_sb.sb_bsize;
bh->b_bdev = sdp->sd_vfs->s_bdev;
bh->b_private = real;
bh->b_end_io = gfs2_fake_write_endio;
log_incr_head(sdp);
gfs2_log_incr_head(sdp);
atomic_inc(&sdp->sd_log_in_flight);
return bh;
}
@@ -572,45 +616,75 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
gfs2_assert_withdraw(sdp, !pull);
sdp->sd_log_idle = (tail == sdp->sd_log_flush_head);
log_incr_head(sdp);
gfs2_log_incr_head(sdp);
}
static void log_flush_commit(struct gfs2_sbd *sdp)
{
struct list_head *head = &sdp->sd_log_flush_list;
struct gfs2_log_buf *lb;
DEFINE_WAIT(wait);
if (atomic_read(&sdp->sd_log_in_flight)) {
do {
prepare_to_wait(&sdp->sd_log_flush_wait, &wait,
TASK_UNINTERRUPTIBLE);
if (atomic_read(&sdp->sd_log_in_flight))
io_schedule();
} while(atomic_read(&sdp->sd_log_in_flight));
finish_wait(&sdp->sd_log_flush_wait, &wait);
}
log_write_header(sdp, 0, 0);
}
static void gfs2_ordered_write(struct gfs2_sbd *sdp)
{
struct gfs2_bufdata *bd;
struct buffer_head *bh;
int flushcount = 0;
LIST_HEAD(written);
while (!list_empty(head)) {
lb = list_entry(head->next, struct gfs2_log_buf, lb_list);
list_del(&lb->lb_list);
bh = lb->lb_bh;
wait_on_buffer(bh);
if (!buffer_uptodate(bh))
gfs2_io_error_bh(sdp, bh);
if (lb->lb_real) {
while (atomic_read(&bh->b_count) != 1) /* Grrrr... */
schedule();
free_buffer_head(bh);
} else
gfs2_log_lock(sdp);
while (!list_empty(&sdp->sd_log_le_ordered)) {
bd = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_bufdata, bd_le.le_list);
list_move(&bd->bd_le.le_list, &written);
bh = bd->bd_bh;
if (!buffer_dirty(bh))
continue;
get_bh(bh);
gfs2_log_unlock(sdp);
lock_buffer(bh);
if (test_clear_buffer_dirty(bh)) {
bh->b_end_io = end_buffer_write_sync;
submit_bh(WRITE, bh);
} else {
unlock_buffer(bh);
brelse(bh);
kfree(lb);
flushcount++;
}
gfs2_log_lock(sdp);
}
list_splice(&written, &sdp->sd_log_le_ordered);
gfs2_log_unlock(sdp);
}
/* If nothing was journaled, the header is unplanned and unwanted. */
if (flushcount) {
log_write_header(sdp, 0, 0);
} else {
unsigned int tail;
tail = current_tail(sdp);
static void gfs2_ordered_wait(struct gfs2_sbd *sdp)
{
struct gfs2_bufdata *bd;
struct buffer_head *bh;
gfs2_ail1_empty(sdp, 0);
if (sdp->sd_log_tail != tail)
log_pull_tail(sdp, tail);
gfs2_log_lock(sdp);
while (!list_empty(&sdp->sd_log_le_ordered)) {
bd = list_entry(sdp->sd_log_le_ordered.prev, struct gfs2_bufdata, bd_le.le_list);
bh = bd->bd_bh;
if (buffer_locked(bh)) {
get_bh(bh);
gfs2_log_unlock(sdp);
wait_on_buffer(bh);
brelse(bh);
gfs2_log_lock(sdp);
continue;
}
list_del_init(&bd->bd_le.le_list);
}
gfs2_log_unlock(sdp);
}
/**
@@ -640,10 +714,16 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
INIT_LIST_HEAD(&ai->ai_ail1_list);
INIT_LIST_HEAD(&ai->ai_ail2_list);
gfs2_assert_withdraw(sdp,
sdp->sd_log_num_buf + sdp->sd_log_num_jdata ==
sdp->sd_log_commited_buf +
sdp->sd_log_commited_databuf);
if (sdp->sd_log_num_buf != sdp->sd_log_commited_buf) {
printk(KERN_INFO "GFS2: log buf %u %u\n", sdp->sd_log_num_buf,
sdp->sd_log_commited_buf);
gfs2_assert_withdraw(sdp, 0);
}
if (sdp->sd_log_num_databuf != sdp->sd_log_commited_databuf) {
printk(KERN_INFO "GFS2: log databuf %u %u\n",
sdp->sd_log_num_databuf, sdp->sd_log_commited_databuf);
gfs2_assert_withdraw(sdp, 0);
}
gfs2_assert_withdraw(sdp,
sdp->sd_log_num_revoke == sdp->sd_log_commited_revoke);
@@ -651,8 +731,11 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
sdp->sd_log_flush_wrapped = 0;
ai->ai_first = sdp->sd_log_flush_head;
gfs2_ordered_write(sdp);
lops_before_commit(sdp);
if (!list_empty(&sdp->sd_log_flush_list))
gfs2_ordered_wait(sdp);
if (sdp->sd_log_head != sdp->sd_log_flush_head)
log_flush_commit(sdp);
else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){
gfs2_log_lock(sdp);
@@ -744,7 +827,6 @@ void gfs2_log_shutdown(struct gfs2_sbd *sdp)
gfs2_assert_withdraw(sdp, !sdp->sd_log_blks_reserved);
gfs2_assert_withdraw(sdp, !sdp->sd_log_num_gl);
gfs2_assert_withdraw(sdp, !sdp->sd_log_num_buf);
gfs2_assert_withdraw(sdp, !sdp->sd_log_num_jdata);
gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
gfs2_assert_withdraw(sdp, !sdp->sd_log_num_rg);
gfs2_assert_withdraw(sdp, !sdp->sd_log_num_databuf);

View File

@@ -52,12 +52,14 @@ int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags);
int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks);
void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks);
void gfs2_log_incr_head(struct gfs2_sbd *sdp);
struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp);
struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
struct buffer_head *real);
void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl);
void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans);
void gfs2_remove_from_ail(struct address_space *mapping, struct gfs2_bufdata *bd);
void gfs2_log_shutdown(struct gfs2_sbd *sdp);
void gfs2_meta_syncfs(struct gfs2_sbd *sdp);

View File

@@ -27,7 +27,104 @@
#include "trans.h"
#include "util.h"
static void glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
/**
* gfs2_pin - Pin a buffer in memory
* @sdp: The superblock
* @bh: The buffer to be pinned
*
* The log lock must be held when calling this function
*/
static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
{
struct gfs2_bufdata *bd;
gfs2_assert_withdraw(sdp, test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags));
clear_buffer_dirty(bh);
if (test_set_buffer_pinned(bh))
gfs2_assert_withdraw(sdp, 0);
if (!buffer_uptodate(bh))
gfs2_io_error_bh(sdp, bh);
bd = bh->b_private;
/* If this buffer is in the AIL and it has already been written
* to in-place disk block, remove it from the AIL.
*/
if (bd->bd_ail)
list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list);
get_bh(bh);
}
/**
* gfs2_unpin - Unpin a buffer
* @sdp: the filesystem the buffer belongs to
* @bh: The buffer to unpin
* @ai:
*
*/
static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
struct gfs2_ail *ai)
{
struct gfs2_bufdata *bd = bh->b_private;
gfs2_assert_withdraw(sdp, buffer_uptodate(bh));
if (!buffer_pinned(bh))
gfs2_assert_withdraw(sdp, 0);
lock_buffer(bh);
mark_buffer_dirty(bh);
clear_buffer_pinned(bh);
gfs2_log_lock(sdp);
if (bd->bd_ail) {
list_del(&bd->bd_ail_st_list);
brelse(bh);
} else {
struct gfs2_glock *gl = bd->bd_gl;
list_add(&bd->bd_ail_gl_list, &gl->gl_ail_list);
atomic_inc(&gl->gl_ail_count);
}
bd->bd_ail = ai;
list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list);
gfs2_log_unlock(sdp);
unlock_buffer(bh);
}
static inline struct gfs2_log_descriptor *bh_log_desc(struct buffer_head *bh)
{
return (struct gfs2_log_descriptor *)bh->b_data;
}
static inline __be64 *bh_log_ptr(struct buffer_head *bh)
{
struct gfs2_log_descriptor *ld = bh_log_desc(bh);
return (__force __be64 *)(ld + 1);
}
static inline __be64 *bh_ptr_end(struct buffer_head *bh)
{
return (__force __be64 *)(bh->b_data + bh->b_size);
}
static struct buffer_head *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type)
{
struct buffer_head *bh = gfs2_log_get_buf(sdp);
struct gfs2_log_descriptor *ld = bh_log_desc(bh);
ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD);
ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD);
ld->ld_type = cpu_to_be32(ld_type);
ld->ld_length = 0;
ld->ld_data1 = 0;
ld->ld_data2 = 0;
memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
return bh;
}
static void __glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
{
struct gfs2_glock *gl;
struct gfs2_trans *tr = current->journal_info;
@@ -38,15 +135,19 @@ static void glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl)))
return;
gfs2_log_lock(sdp);
if (!list_empty(&le->le_list)){
gfs2_log_unlock(sdp);
if (!list_empty(&le->le_list))
return;
}
gfs2_glock_hold(gl);
set_bit(GLF_DIRTY, &gl->gl_flags);
sdp->sd_log_num_gl++;
list_add(&le->le_list, &sdp->sd_log_le_gl);
}
static void glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
{
gfs2_log_lock(sdp);
__glock_lo_add(sdp, le);
gfs2_log_unlock(sdp);
}
@@ -71,30 +172,25 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
struct gfs2_trans *tr;
lock_buffer(bd->bd_bh);
gfs2_log_lock(sdp);
if (!list_empty(&bd->bd_list_tr)) {
gfs2_log_unlock(sdp);
return;
}
if (!list_empty(&bd->bd_list_tr))
goto out;
tr = current->journal_info;
tr->tr_touched = 1;
tr->tr_num_buf++;
list_add(&bd->bd_list_tr, &tr->tr_list_buf);
gfs2_log_unlock(sdp);
if (!list_empty(&le->le_list))
return;
gfs2_trans_add_gl(bd->bd_gl);
goto out;
__glock_lo_add(sdp, &bd->bd_gl->gl_le);
gfs2_meta_check(sdp, bd->bd_bh);
gfs2_pin(sdp, bd->bd_bh);
gfs2_log_lock(sdp);
sdp->sd_log_num_buf++;
list_add(&le->le_list, &sdp->sd_log_le_buf);
gfs2_log_unlock(sdp);
tr->tr_num_buf_new++;
out:
gfs2_log_unlock(sdp);
unlock_buffer(bd->bd_bh);
}
static void buf_lo_incore_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
@@ -117,8 +213,7 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp)
struct buffer_head *bh;
struct gfs2_log_descriptor *ld;
struct gfs2_bufdata *bd1 = NULL, *bd2;
unsigned int total = sdp->sd_log_num_buf;
unsigned int offset = BUF_OFFSET;
unsigned int total;
unsigned int limit;
unsigned int num;
unsigned n;
@@ -127,22 +222,20 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp)
limit = buf_limit(sdp);
/* for 4k blocks, limit = 503 */
gfs2_log_lock(sdp);
total = sdp->sd_log_num_buf;
bd1 = bd2 = list_prepare_entry(bd1, &sdp->sd_log_le_buf, bd_le.le_list);
while(total) {
num = total;
if (total > limit)
num = limit;
bh = gfs2_log_get_buf(sdp);
ld = (struct gfs2_log_descriptor *)bh->b_data;
ptr = (__be64 *)(bh->b_data + offset);
ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD);
ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD);
ld->ld_type = cpu_to_be32(GFS2_LOG_DESC_METADATA);
gfs2_log_unlock(sdp);
bh = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_METADATA);
gfs2_log_lock(sdp);
ld = bh_log_desc(bh);
ptr = bh_log_ptr(bh);
ld->ld_length = cpu_to_be32(num + 1);
ld->ld_data1 = cpu_to_be32(num);
ld->ld_data2 = cpu_to_be32(0);
memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
n = 0;
list_for_each_entry_continue(bd1, &sdp->sd_log_le_buf,
@@ -152,21 +245,27 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp)
break;
}
set_buffer_dirty(bh);
ll_rw_block(WRITE, 1, &bh);
gfs2_log_unlock(sdp);
submit_bh(WRITE, bh);
gfs2_log_lock(sdp);
n = 0;
list_for_each_entry_continue(bd2, &sdp->sd_log_le_buf,
bd_le.le_list) {
get_bh(bd2->bd_bh);
gfs2_log_unlock(sdp);
lock_buffer(bd2->bd_bh);
bh = gfs2_log_fake_buf(sdp, bd2->bd_bh);
set_buffer_dirty(bh);
ll_rw_block(WRITE, 1, &bh);
submit_bh(WRITE, bh);
gfs2_log_lock(sdp);
if (++n >= num)
break;
}
BUG_ON(total < num);
total -= num;
}
gfs2_log_unlock(sdp);
}
static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
@@ -270,11 +369,8 @@ static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
tr = current->journal_info;
tr->tr_touched = 1;
tr->tr_num_revoke++;
gfs2_log_lock(sdp);
sdp->sd_log_num_revoke++;
list_add(&le->le_list, &sdp->sd_log_le_revoke);
gfs2_log_unlock(sdp);
}
static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
@@ -284,32 +380,25 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
struct buffer_head *bh;
unsigned int offset;
struct list_head *head = &sdp->sd_log_le_revoke;
struct gfs2_revoke *rv;
struct gfs2_bufdata *bd;
if (!sdp->sd_log_num_revoke)
return;
bh = gfs2_log_get_buf(sdp);
ld = (struct gfs2_log_descriptor *)bh->b_data;
ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD);
ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD);
ld->ld_type = cpu_to_be32(GFS2_LOG_DESC_REVOKE);
bh = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_REVOKE);
ld = bh_log_desc(bh);
ld->ld_length = cpu_to_be32(gfs2_struct2blk(sdp, sdp->sd_log_num_revoke,
sizeof(u64)));
ld->ld_data1 = cpu_to_be32(sdp->sd_log_num_revoke);
ld->ld_data2 = cpu_to_be32(0);
memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
offset = sizeof(struct gfs2_log_descriptor);
while (!list_empty(head)) {
rv = list_entry(head->next, struct gfs2_revoke, rv_le.le_list);
list_del_init(&rv->rv_le.le_list);
bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
list_del_init(&bd->bd_le.le_list);
sdp->sd_log_num_revoke--;
if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) {
set_buffer_dirty(bh);
ll_rw_block(WRITE, 1, &bh);
submit_bh(WRITE, bh);
bh = gfs2_log_get_buf(sdp);
mh = (struct gfs2_meta_header *)bh->b_data;
@@ -319,15 +408,14 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
offset = sizeof(struct gfs2_meta_header);
}
*(__be64 *)(bh->b_data + offset) = cpu_to_be64(rv->rv_blkno);
kfree(rv);
*(__be64 *)(bh->b_data + offset) = cpu_to_be64(bd->bd_blkno);
kmem_cache_free(gfs2_bufdata_cachep, bd);
offset += sizeof(u64);
}
gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
set_buffer_dirty(bh);
ll_rw_block(WRITE, 1, &bh);
submit_bh(WRITE, bh);
}
static void revoke_lo_before_scan(struct gfs2_jdesc *jd,
@@ -466,222 +554,136 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
struct address_space *mapping = bd->bd_bh->b_page->mapping;
struct gfs2_inode *ip = GFS2_I(mapping->host);
lock_buffer(bd->bd_bh);
gfs2_log_lock(sdp);
if (!list_empty(&bd->bd_list_tr)) {
gfs2_log_unlock(sdp);
return;
}
if (!list_empty(&bd->bd_list_tr))
goto out;
tr->tr_touched = 1;
if (gfs2_is_jdata(ip)) {
tr->tr_num_buf++;
list_add(&bd->bd_list_tr, &tr->tr_list_buf);
}
gfs2_log_unlock(sdp);
if (!list_empty(&le->le_list))
return;
goto out;
gfs2_trans_add_gl(bd->bd_gl);
__glock_lo_add(sdp, &bd->bd_gl->gl_le);
if (gfs2_is_jdata(ip)) {
sdp->sd_log_num_jdata++;
gfs2_pin(sdp, bd->bd_bh);
tr->tr_num_databuf_new++;
sdp->sd_log_num_databuf++;
list_add(&le->le_list, &sdp->sd_log_le_databuf);
} else {
list_add(&le->le_list, &sdp->sd_log_le_ordered);
}
gfs2_log_lock(sdp);
sdp->sd_log_num_databuf++;
list_add(&le->le_list, &sdp->sd_log_le_databuf);
out:
gfs2_log_unlock(sdp);
unlock_buffer(bd->bd_bh);
}
static int gfs2_check_magic(struct buffer_head *bh)
static void gfs2_check_magic(struct buffer_head *bh)
{
struct page *page = bh->b_page;
void *kaddr;
__be32 *ptr;
int rv = 0;
kaddr = kmap_atomic(page, KM_USER0);
clear_buffer_escaped(bh);
kaddr = kmap_atomic(bh->b_page, KM_USER0);
ptr = kaddr + bh_offset(bh);
if (*ptr == cpu_to_be32(GFS2_MAGIC))
rv = 1;
set_buffer_escaped(bh);
kunmap_atomic(kaddr, KM_USER0);
}
return rv;
static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh,
struct list_head *list, struct list_head *done,
unsigned int n)
{
struct buffer_head *bh1;
struct gfs2_log_descriptor *ld;
struct gfs2_bufdata *bd;
__be64 *ptr;
if (!bh)
return;
ld = bh_log_desc(bh);
ld->ld_length = cpu_to_be32(n + 1);
ld->ld_data1 = cpu_to_be32(n);
ptr = bh_log_ptr(bh);
get_bh(bh);
submit_bh(WRITE, bh);
gfs2_log_lock(sdp);
while(!list_empty(list)) {
bd = list_entry(list->next, struct gfs2_bufdata, bd_le.le_list);
list_move_tail(&bd->bd_le.le_list, done);
get_bh(bd->bd_bh);
while (be64_to_cpu(*ptr) != bd->bd_bh->b_blocknr) {
gfs2_log_incr_head(sdp);
ptr += 2;
}
gfs2_log_unlock(sdp);
lock_buffer(bd->bd_bh);
if (buffer_escaped(bd->bd_bh)) {
void *kaddr;
bh1 = gfs2_log_get_buf(sdp);
kaddr = kmap_atomic(bd->bd_bh->b_page, KM_USER0);
memcpy(bh1->b_data, kaddr + bh_offset(bd->bd_bh),
bh1->b_size);
kunmap_atomic(kaddr, KM_USER0);
*(__be32 *)bh1->b_data = 0;
clear_buffer_escaped(bd->bd_bh);
unlock_buffer(bd->bd_bh);
brelse(bd->bd_bh);
} else {
bh1 = gfs2_log_fake_buf(sdp, bd->bd_bh);
}
submit_bh(WRITE, bh1);
gfs2_log_lock(sdp);
ptr += 2;
}
gfs2_log_unlock(sdp);
brelse(bh);
}
/**
* databuf_lo_before_commit - Scan the data buffers, writing as we go
*
* Here we scan through the lists of buffers and make the assumption
* that any buffer thats been pinned is being journaled, and that
* any unpinned buffer is an ordered write data buffer and therefore
* will be written back rather than journaled.
*/
static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
{
LIST_HEAD(started);
struct gfs2_bufdata *bd1 = NULL, *bd2, *bdt;
struct buffer_head *bh = NULL,*bh1 = NULL;
struct gfs2_log_descriptor *ld;
unsigned int limit;
unsigned int total_dbuf;
unsigned int total_jdata = sdp->sd_log_num_jdata;
unsigned int num, n;
__be64 *ptr = NULL;
struct gfs2_bufdata *bd = NULL;
struct buffer_head *bh = NULL;
unsigned int n = 0;
__be64 *ptr = NULL, *end = NULL;
LIST_HEAD(processed);
LIST_HEAD(in_progress);
limit = databuf_limit(sdp);
/*
* Start writing ordered buffers, write journaled buffers
* into the log along with a header
*/
gfs2_log_lock(sdp);
total_dbuf = sdp->sd_log_num_databuf;
bd2 = bd1 = list_prepare_entry(bd1, &sdp->sd_log_le_databuf,
bd_le.le_list);
while(total_dbuf) {
num = total_jdata;
if (num > limit)
num = limit;
n = 0;
list_for_each_entry_safe_continue(bd1, bdt,
&sdp->sd_log_le_databuf,
bd_le.le_list) {
/* store off the buffer head in a local ptr since
* gfs2_bufdata might change when we drop the log lock
*/
bh1 = bd1->bd_bh;
/* An ordered write buffer */
if (bh1 && !buffer_pinned(bh1)) {
list_move(&bd1->bd_le.le_list, &started);
if (bd1 == bd2) {
bd2 = NULL;
bd2 = list_prepare_entry(bd2,
&sdp->sd_log_le_databuf,
bd_le.le_list);
}
total_dbuf--;
if (bh1) {
if (buffer_dirty(bh1)) {
get_bh(bh1);
gfs2_log_unlock(sdp);
ll_rw_block(SWRITE, 1, &bh1);
brelse(bh1);
gfs2_log_lock(sdp);
}
continue;
}
continue;
} else if (bh1) { /* A journaled buffer */
int magic;
gfs2_log_unlock(sdp);
if (!bh) {
bh = gfs2_log_get_buf(sdp);
ld = (struct gfs2_log_descriptor *)
bh->b_data;
ptr = (__be64 *)(bh->b_data +
DATABUF_OFFSET);
ld->ld_header.mh_magic =
cpu_to_be32(GFS2_MAGIC);
ld->ld_header.mh_type =
cpu_to_be32(GFS2_METATYPE_LD);
ld->ld_header.mh_format =
cpu_to_be32(GFS2_FORMAT_LD);
ld->ld_type =
cpu_to_be32(GFS2_LOG_DESC_JDATA);
ld->ld_length = cpu_to_be32(num + 1);
ld->ld_data1 = cpu_to_be32(num);
ld->ld_data2 = cpu_to_be32(0);
memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
}
magic = gfs2_check_magic(bh1);
*ptr++ = cpu_to_be64(bh1->b_blocknr);
*ptr++ = cpu_to_be64((__u64)magic);
clear_buffer_escaped(bh1);
if (unlikely(magic != 0))
set_buffer_escaped(bh1);
gfs2_log_lock(sdp);
if (++n >= num)
break;
} else if (!bh1) {
total_dbuf--;
sdp->sd_log_num_databuf--;
list_del_init(&bd1->bd_le.le_list);
if (bd1 == bd2) {
bd2 = NULL;
bd2 = list_prepare_entry(bd2,
&sdp->sd_log_le_databuf,
bd_le.le_list);
}
kmem_cache_free(gfs2_bufdata_cachep, bd1);
}
}
gfs2_log_unlock(sdp);
if (bh) {
set_buffer_mapped(bh);
set_buffer_dirty(bh);
ll_rw_block(WRITE, 1, &bh);
bh = NULL;
}
n = 0;
gfs2_log_lock(sdp);
list_for_each_entry_continue(bd2, &sdp->sd_log_le_databuf,
bd_le.le_list) {
if (!bd2->bd_bh)
continue;
/* copy buffer if it needs escaping */
while (!list_empty(&sdp->sd_log_le_databuf)) {
if (ptr == end) {
gfs2_log_unlock(sdp);
if (unlikely(buffer_escaped(bd2->bd_bh))) {
void *kaddr;
struct page *page = bd2->bd_bh->b_page;
bh = gfs2_log_get_buf(sdp);
kaddr = kmap_atomic(page, KM_USER0);
memcpy(bh->b_data,
kaddr + bh_offset(bd2->bd_bh),
sdp->sd_sb.sb_bsize);
kunmap_atomic(kaddr, KM_USER0);
*(__be32 *)bh->b_data = 0;
} else {
bh = gfs2_log_fake_buf(sdp, bd2->bd_bh);
}
set_buffer_dirty(bh);
ll_rw_block(WRITE, 1, &bh);
gfs2_write_blocks(sdp, bh, &in_progress, &processed, n);
n = 0;
bh = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_JDATA);
ptr = bh_log_ptr(bh);
end = bh_ptr_end(bh) - 1;
gfs2_log_lock(sdp);
if (++n >= num)
break;
continue;
}
bh = NULL;
BUG_ON(total_dbuf < num);
total_dbuf -= num;
total_jdata -= num;
bd = list_entry(sdp->sd_log_le_databuf.next, struct gfs2_bufdata, bd_le.le_list);
list_move_tail(&bd->bd_le.le_list, &in_progress);
gfs2_check_magic(bd->bd_bh);
*ptr++ = cpu_to_be64(bd->bd_bh->b_blocknr);
*ptr++ = cpu_to_be64(buffer_escaped(bh) ? 1 : 0);
n++;
}
gfs2_log_unlock(sdp);
/* Wait on all ordered buffers */
while (!list_empty(&started)) {
gfs2_log_lock(sdp);
bd1 = list_entry(started.next, struct gfs2_bufdata,
bd_le.le_list);
list_del_init(&bd1->bd_le.le_list);
sdp->sd_log_num_databuf--;
bh = bd1->bd_bh;
if (bh) {
bh->b_private = NULL;
get_bh(bh);
gfs2_log_unlock(sdp);
wait_on_buffer(bh);
brelse(bh);
} else
gfs2_log_unlock(sdp);
kmem_cache_free(gfs2_bufdata_cachep, bd1);
}
/* We've removed all the ordered write bufs here, so only jdata left */
gfs2_assert_warn(sdp, sdp->sd_log_num_databuf == sdp->sd_log_num_jdata);
gfs2_write_blocks(sdp, bh, &in_progress, &processed, n);
gfs2_log_lock(sdp);
list_splice(&processed, &sdp->sd_log_le_databuf);
gfs2_log_unlock(sdp);
}
static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
@@ -765,11 +767,9 @@ static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
list_del_init(&bd->bd_le.le_list);
sdp->sd_log_num_databuf--;
sdp->sd_log_num_jdata--;
gfs2_unpin(sdp, bd->bd_bh, ai);
}
gfs2_assert_warn(sdp, !sdp->sd_log_num_databuf);
gfs2_assert_warn(sdp, !sdp->sd_log_num_jdata);
}
@@ -817,10 +817,10 @@ const struct gfs2_log_operations gfs2_databuf_lops = {
const struct gfs2_log_operations *gfs2_log_ops[] = {
&gfs2_glock_lops,
&gfs2_buf_lops,
&gfs2_revoke_lops,
&gfs2_rg_lops,
&gfs2_databuf_lops,
&gfs2_buf_lops,
&gfs2_rg_lops,
&gfs2_revoke_lops,
NULL,
};

View File

@@ -107,6 +107,8 @@ static int __init init_gfs2_fs(void)
fail_unregister:
unregister_filesystem(&gfs2_fs_type);
fail:
gfs2_glock_exit();
if (gfs2_bufdata_cachep)
kmem_cache_destroy(gfs2_bufdata_cachep);
@@ -127,6 +129,7 @@ fail:
static void __exit exit_gfs2_fs(void)
{
gfs2_glock_exit();
gfs2_unregister_debugfs();
unregister_filesystem(&gfs2_fs_type);
unregister_filesystem(&gfs2meta_fs_type);

View File

@@ -297,74 +297,35 @@ void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh,
unlock_page(bh->b_page);
}
/**
* gfs2_pin - Pin a buffer in memory
* @sdp: the filesystem the buffer belongs to
* @bh: The buffer to be pinned
*
*/
void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int meta)
{
struct gfs2_sbd *sdp = GFS2_SB(bh->b_page->mapping->host);
struct gfs2_bufdata *bd = bh->b_private;
gfs2_assert_withdraw(sdp, test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags));
if (test_set_buffer_pinned(bh))
gfs2_assert_withdraw(sdp, 0);
wait_on_buffer(bh);
/* If this buffer is in the AIL and it has already been written
to in-place disk block, remove it from the AIL. */
gfs2_log_lock(sdp);
if (bd->bd_ail && !buffer_in_io(bh))
list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list);
gfs2_log_unlock(sdp);
clear_buffer_dirty(bh);
wait_on_buffer(bh);
if (!buffer_uptodate(bh))
gfs2_io_error_bh(sdp, bh);
get_bh(bh);
}
/**
* gfs2_unpin - Unpin a buffer
* @sdp: the filesystem the buffer belongs to
* @bh: The buffer to unpin
* @ai:
*
*/
void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
struct gfs2_ail *ai)
{
struct gfs2_bufdata *bd = bh->b_private;
gfs2_assert_withdraw(sdp, buffer_uptodate(bh));
if (!buffer_pinned(bh))
gfs2_assert_withdraw(sdp, 0);
mark_buffer_dirty(bh);
clear_buffer_pinned(bh);
gfs2_log_lock(sdp);
if (bd->bd_ail) {
list_del(&bd->bd_ail_st_list);
if (test_clear_buffer_pinned(bh)) {
list_del_init(&bd->bd_le.le_list);
if (meta) {
gfs2_assert_warn(sdp, sdp->sd_log_num_buf);
sdp->sd_log_num_buf--;
tr->tr_num_buf_rm++;
} else {
gfs2_assert_warn(sdp, sdp->sd_log_num_databuf);
sdp->sd_log_num_databuf--;
tr->tr_num_databuf_rm++;
}
tr->tr_touched = 1;
brelse(bh);
} else {
struct gfs2_glock *gl = bd->bd_gl;
list_add(&bd->bd_ail_gl_list, &gl->gl_ail_list);
atomic_inc(&gl->gl_ail_count);
}
bd->bd_ail = ai;
list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list);
gfs2_log_unlock(sdp);
if (bd) {
if (bd->bd_ail) {
gfs2_remove_from_ail(NULL, bd);
bh->b_private = NULL;
bd->bd_bh = NULL;
bd->bd_blkno = bh->b_blocknr;
gfs2_trans_add_revoke(sdp, bd);
}
}
clear_buffer_dirty(bh);
clear_buffer_uptodate(bh);
}
/**
@@ -383,44 +344,11 @@ void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen)
while (blen) {
bh = getbuf(ip->i_gl, bstart, NO_CREATE);
if (bh) {
struct gfs2_bufdata *bd = bh->b_private;
if (test_clear_buffer_pinned(bh)) {
struct gfs2_trans *tr = current->journal_info;
struct gfs2_inode *bh_ip =
GFS2_I(bh->b_page->mapping->host);
gfs2_log_lock(sdp);
list_del_init(&bd->bd_le.le_list);
gfs2_assert_warn(sdp, sdp->sd_log_num_buf);
sdp->sd_log_num_buf--;
gfs2_log_unlock(sdp);
if (bh_ip->i_inode.i_private != NULL)
tr->tr_num_databuf_rm++;
else
tr->tr_num_buf_rm++;
brelse(bh);
}
if (bd) {
gfs2_log_lock(sdp);
if (bd->bd_ail) {
u64 blkno = bh->b_blocknr;
bd->bd_ail = NULL;
list_del(&bd->bd_ail_st_list);
list_del(&bd->bd_ail_gl_list);
atomic_dec(&bd->bd_gl->gl_ail_count);
brelse(bh);
gfs2_log_unlock(sdp);
gfs2_trans_add_revoke(sdp, blkno);
} else
gfs2_log_unlock(sdp);
}
lock_buffer(bh);
clear_buffer_dirty(bh);
clear_buffer_uptodate(bh);
gfs2_log_lock(sdp);
gfs2_remove_from_journal(bh, current->journal_info, 1);
gfs2_log_unlock(sdp);
unlock_buffer(bh);
brelse(bh);
}
@@ -446,10 +374,10 @@ void gfs2_meta_cache_flush(struct gfs2_inode *ip)
for (x = 0; x < GFS2_MAX_META_HEIGHT; x++) {
bh_slot = &ip->i_cache[x];
if (!*bh_slot)
break;
brelse(*bh_slot);
*bh_slot = NULL;
if (*bh_slot) {
brelse(*bh_slot);
*bh_slot = NULL;
}
}
spin_unlock(&ip->i_spin);

View File

@@ -50,9 +50,9 @@ int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh);
void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh,
int meta);
void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh);
void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
struct gfs2_ail *ai);
void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr,
int meta);
void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen);

View File

@@ -42,6 +42,7 @@ enum {
Opt_nosuiddir,
Opt_data_writeback,
Opt_data_ordered,
Opt_err,
};
static match_table_t tokens = {
@@ -64,7 +65,8 @@ static match_table_t tokens = {
{Opt_suiddir, "suiddir"},
{Opt_nosuiddir, "nosuiddir"},
{Opt_data_writeback, "data=writeback"},
{Opt_data_ordered, "data=ordered"}
{Opt_data_ordered, "data=ordered"},
{Opt_err, NULL}
};
/**
@@ -237,6 +239,7 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount)
case Opt_data_ordered:
args->ar_data = GFS2_DATA_ORDERED;
break;
case Opt_err:
default:
fs_info(sdp, "unknown option: %s\n", o);
error = -EINVAL;

View File

@@ -90,7 +90,7 @@ static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock,
error = gfs2_block_map(inode, lblock, 0, bh_result);
if (error)
return error;
if (bh_result->b_blocknr == 0)
if (!buffer_mapped(bh_result))
return -EIO;
return 0;
}
@@ -414,7 +414,8 @@ static int gfs2_prepare_write(struct file *file, struct page *page,
if (ind_blocks || data_blocks)
rblocks += RES_STATFS + RES_QUOTA;
error = gfs2_trans_begin(sdp, rblocks, 0);
error = gfs2_trans_begin(sdp, rblocks,
PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize);
if (error)
goto out_trans_fail;
@@ -616,58 +617,50 @@ static sector_t gfs2_bmap(struct address_space *mapping, sector_t lblock)
return dblock;
}
static void discard_buffer(struct gfs2_sbd *sdp, struct buffer_head *bh)
static void gfs2_discard(struct gfs2_sbd *sdp, struct buffer_head *bh)
{
struct gfs2_bufdata *bd;
lock_buffer(bh);
gfs2_log_lock(sdp);
clear_buffer_dirty(bh);
bd = bh->b_private;
if (bd) {
bd->bd_bh = NULL;
bh->b_private = NULL;
if (!bd->bd_ail && list_empty(&bd->bd_le.le_list))
kmem_cache_free(gfs2_bufdata_cachep, bd);
if (!list_empty(&bd->bd_le.le_list) && !buffer_pinned(bh))
list_del_init(&bd->bd_le.le_list);
else
gfs2_remove_from_journal(bh, current->journal_info, 0);
}
gfs2_log_unlock(sdp);
lock_buffer(bh);
clear_buffer_dirty(bh);
bh->b_bdev = NULL;
clear_buffer_mapped(bh);
clear_buffer_req(bh);
clear_buffer_new(bh);
clear_buffer_delay(bh);
gfs2_log_unlock(sdp);
unlock_buffer(bh);
}
static void gfs2_invalidatepage(struct page *page, unsigned long offset)
{
struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
struct buffer_head *head, *bh, *next;
unsigned int curr_off = 0;
struct buffer_head *bh, *head;
unsigned long pos = 0;
BUG_ON(!PageLocked(page));
if (offset == 0)
ClearPageChecked(page);
if (!page_has_buffers(page))
return;
goto out;
bh = head = page_buffers(page);
do {
unsigned int next_off = curr_off + bh->b_size;
next = bh->b_this_page;
if (offset <= curr_off)
discard_buffer(sdp, bh);
curr_off = next_off;
bh = next;
if (offset <= pos)
gfs2_discard(sdp, bh);
pos += bh->b_size;
bh = bh->b_this_page;
} while (bh != head);
if (!offset)
out:
if (offset == 0)
try_to_release_page(page, 0);
return;
}
/**
@@ -735,59 +728,6 @@ out:
return rv;
}
/**
* stuck_releasepage - We're stuck in gfs2_releasepage(). Print stuff out.
* @bh: the buffer we're stuck on
*
*/
static void stuck_releasepage(struct buffer_head *bh)
{
struct inode *inode = bh->b_page->mapping->host;
struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
struct gfs2_bufdata *bd = bh->b_private;
struct gfs2_glock *gl;
static unsigned limit = 0;
if (limit > 3)
return;
limit++;
fs_warn(sdp, "stuck in gfs2_releasepage() %p\n", inode);
fs_warn(sdp, "blkno = %llu, bh->b_count = %d\n",
(unsigned long long)bh->b_blocknr, atomic_read(&bh->b_count));
fs_warn(sdp, "pinned = %u\n", buffer_pinned(bh));
fs_warn(sdp, "bh->b_private = %s\n", (bd) ? "!NULL" : "NULL");
if (!bd)
return;
gl = bd->bd_gl;
fs_warn(sdp, "gl = (%u, %llu)\n",
gl->gl_name.ln_type, (unsigned long long)gl->gl_name.ln_number);
fs_warn(sdp, "bd_list_tr = %s, bd_le.le_list = %s\n",
(list_empty(&bd->bd_list_tr)) ? "no" : "yes",
(list_empty(&bd->bd_le.le_list)) ? "no" : "yes");
if (gl->gl_ops == &gfs2_inode_glops) {
struct gfs2_inode *ip = gl->gl_object;
unsigned int x;
if (!ip)
return;
fs_warn(sdp, "ip = %llu %llu\n",
(unsigned long long)ip->i_no_formal_ino,
(unsigned long long)ip->i_no_addr);
for (x = 0; x < GFS2_MAX_META_HEIGHT; x++)
fs_warn(sdp, "ip->i_cache[%u] = %s\n",
x, (ip->i_cache[x]) ? "!NULL" : "NULL");
}
}
/**
* gfs2_releasepage - free the metadata associated with a page
* @page: the page that's being released
@@ -805,41 +745,39 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
struct gfs2_sbd *sdp = aspace->i_sb->s_fs_info;
struct buffer_head *bh, *head;
struct gfs2_bufdata *bd;
unsigned long t = jiffies + gfs2_tune_get(sdp, gt_stall_secs) * HZ;
if (!page_has_buffers(page))
goto out;
return 0;
gfs2_log_lock(sdp);
head = bh = page_buffers(page);
do {
if (atomic_read(&bh->b_count))
goto cannot_release;
bd = bh->b_private;
if (bd && bd->bd_ail)
goto cannot_release;
gfs2_assert_warn(sdp, !buffer_pinned(bh));
gfs2_assert_warn(sdp, !buffer_dirty(bh));
bh = bh->b_this_page;
} while(bh != head);
gfs2_log_unlock(sdp);
head = bh = page_buffers(page);
do {
while (atomic_read(&bh->b_count)) {
if (!atomic_read(&aspace->i_writecount))
return 0;
if (!(gfp_mask & __GFP_WAIT))
return 0;
if (time_after_eq(jiffies, t)) {
stuck_releasepage(bh);
/* should we withdraw here? */
return 0;
}
yield();
}
gfs2_assert_warn(sdp, !buffer_pinned(bh));
gfs2_assert_warn(sdp, !buffer_dirty(bh));
gfs2_log_lock(sdp);
bd = bh->b_private;
if (bd) {
gfs2_assert_warn(sdp, bd->bd_bh == bh);
gfs2_assert_warn(sdp, list_empty(&bd->bd_list_tr));
gfs2_assert_warn(sdp, !bd->bd_ail);
bd->bd_bh = NULL;
if (!list_empty(&bd->bd_le.le_list))
bd = NULL;
if (!list_empty(&bd->bd_le.le_list)) {
if (!buffer_pinned(bh))
list_del_init(&bd->bd_le.le_list);
else
bd = NULL;
}
if (bd)
bd->bd_bh = NULL;
bh->b_private = NULL;
}
gfs2_log_unlock(sdp);
@@ -849,8 +787,10 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
bh = bh->b_this_page;
} while (bh != head);
out:
return try_to_free_buffers(page);
cannot_release:
gfs2_log_unlock(sdp);
return 0;
}
const struct address_space_operations gfs2_file_aops = {

View File

@@ -237,7 +237,7 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj)
inode = gfs2_inode_lookup(sb, DT_UNKNOWN,
inum->no_addr,
0);
0, 0);
if (!inode)
goto fail;
if (IS_ERR(inode)) {

View File

@@ -571,7 +571,8 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
int error = 0;
state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED;
flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE;
flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE
| GL_FLOCK;
mutex_lock(&fp->f_fl_mutex);
@@ -579,21 +580,19 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
if (gl) {
if (fl_gh->gh_state == state)
goto out;
gfs2_glock_hold(gl);
flock_lock_file_wait(file,
&(struct file_lock){.fl_type = F_UNLCK});
gfs2_glock_dq_uninit(fl_gh);
gfs2_glock_dq_wait(fl_gh);
gfs2_holder_reinit(state, flags, fl_gh);
} else {
error = gfs2_glock_get(GFS2_SB(&ip->i_inode),
ip->i_no_addr, &gfs2_flock_glops,
CREATE, &gl);
if (error)
goto out;
gfs2_holder_init(gl, state, flags, fl_gh);
gfs2_glock_put(gl);
}
gfs2_holder_init(gl, state, flags, fl_gh);
gfs2_glock_put(gl);
error = gfs2_glock_nq(fl_gh);
if (error) {
gfs2_holder_uninit(fl_gh);

View File

@@ -28,18 +28,18 @@
#include "lm.h"
#include "mount.h"
#include "ops_fstype.h"
#include "ops_dentry.h"
#include "ops_super.h"
#include "recovery.h"
#include "rgrp.h"
#include "super.h"
#include "sys.h"
#include "util.h"
#include "log.h"
#define DO 0
#define UNDO 1
extern struct dentry_operations gfs2_dops;
static struct gfs2_sbd *init_sbd(struct super_block *sb)
{
struct gfs2_sbd *sdp;
@@ -82,13 +82,15 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
INIT_LIST_HEAD(&sdp->sd_log_le_revoke);
INIT_LIST_HEAD(&sdp->sd_log_le_rg);
INIT_LIST_HEAD(&sdp->sd_log_le_databuf);
INIT_LIST_HEAD(&sdp->sd_log_le_ordered);
mutex_init(&sdp->sd_log_reserve_mutex);
INIT_LIST_HEAD(&sdp->sd_ail1_list);
INIT_LIST_HEAD(&sdp->sd_ail2_list);
init_rwsem(&sdp->sd_log_flush_lock);
INIT_LIST_HEAD(&sdp->sd_log_flush_list);
atomic_set(&sdp->sd_log_in_flight, 0);
init_waitqueue_head(&sdp->sd_log_flush_wait);
INIT_LIST_HEAD(&sdp->sd_revoke_list);
@@ -145,7 +147,8 @@ static int init_names(struct gfs2_sbd *sdp, int silent)
snprintf(sdp->sd_proto_name, GFS2_FSNAME_LEN, "%s", proto);
snprintf(sdp->sd_table_name, GFS2_FSNAME_LEN, "%s", table);
while ((table = strchr(sdp->sd_table_name, '/')))
table = sdp->sd_table_name;
while ((table = strchr(table, '/')))
*table = '_';
out:
@@ -161,14 +164,6 @@ static int init_locking(struct gfs2_sbd *sdp, struct gfs2_holder *mount_gh,
if (undo)
goto fail_trans;
p = kthread_run(gfs2_scand, sdp, "gfs2_scand");
error = IS_ERR(p);
if (error) {
fs_err(sdp, "can't start scand thread: %d\n", error);
return error;
}
sdp->sd_scand_process = p;
for (sdp->sd_glockd_num = 0;
sdp->sd_glockd_num < sdp->sd_args.ar_num_glockd;
sdp->sd_glockd_num++) {
@@ -229,14 +224,13 @@ fail:
while (sdp->sd_glockd_num--)
kthread_stop(sdp->sd_glockd_process[sdp->sd_glockd_num]);
kthread_stop(sdp->sd_scand_process);
return error;
}
static inline struct inode *gfs2_lookup_root(struct super_block *sb,
u64 no_addr)
{
return gfs2_inode_lookup(sb, DT_DIR, no_addr, 0);
return gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0);
}
static int init_sb(struct gfs2_sbd *sdp, int silent, int undo)
@@ -301,8 +295,9 @@ static int init_sb(struct gfs2_sbd *sdp, int silent, int undo)
fs_err(sdp, "can't get root dentry\n");
error = -ENOMEM;
iput(inode);
}
sb->s_root->d_op = &gfs2_dops;
} else
sb->s_root->d_op = &gfs2_dops;
out:
gfs2_glock_dq_uninit(&sb_gh);
return error;
@@ -368,7 +363,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
ip = GFS2_I(sdp->sd_jdesc->jd_inode);
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED,
LM_FLAG_NOEXP | GL_EXACT,
LM_FLAG_NOEXP | GL_EXACT | GL_NOCACHE,
&sdp->sd_jinode_gh);
if (error) {
fs_err(sdp, "can't acquire journal inode glock: %d\n",
@@ -818,7 +813,6 @@ static struct super_block* get_gfs2_sb(const char *dev_name)
struct nameidata nd;
struct file_system_type *fstype;
struct super_block *sb = NULL, *s;
struct list_head *l;
int error;
error = path_lookup(dev_name, LOOKUP_FOLLOW, &nd);
@@ -830,8 +824,7 @@ static struct super_block* get_gfs2_sb(const char *dev_name)
error = vfs_getattr(nd.mnt, nd.dentry, &stat);
fstype = get_fs_type("gfs2");
list_for_each(l, &fstype->fs_supers) {
s = list_entry(l, struct super_block, s_instances);
list_for_each_entry(s, &fstype->fs_supers, s_instances) {
if ((S_ISBLK(stat.mode) && s->s_dev == stat.rdev) ||
(S_ISDIR(stat.mode) && s == nd.dentry->d_inode->i_sb)) {
sb = s;
@@ -861,7 +854,7 @@ static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags,
error = -ENOENT;
goto error;
}
sdp = (struct gfs2_sbd*) sb->s_fs_info;
sdp = sb->s_fs_info;
if (sdp->sd_vfs_meta) {
printk(KERN_WARNING "GFS2: gfs2meta mount already exists\n");
error = -EBUSY;
@@ -896,7 +889,10 @@ error:
static void gfs2_kill_sb(struct super_block *sb)
{
gfs2_delete_debugfs_file(sb->s_fs_info);
if (sb->s_fs_info) {
gfs2_delete_debugfs_file(sb->s_fs_info);
gfs2_meta_syncfs(sb->s_fs_info);
}
kill_block_super(sb);
}

View File

@@ -69,7 +69,7 @@ static int gfs2_create(struct inode *dir, struct dentry *dentry,
mark_inode_dirty(inode);
break;
} else if (PTR_ERR(inode) != -EEXIST ||
(nd->intent.open.flags & O_EXCL)) {
(nd && (nd->intent.open.flags & O_EXCL))) {
gfs2_holder_uninit(ghs);
return PTR_ERR(inode);
}
@@ -278,17 +278,25 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
error = gfs2_glock_nq_m(3, ghs);
error = gfs2_glock_nq(ghs); /* parent */
if (error)
goto out;
goto out_parent;
error = gfs2_glock_nq(ghs + 1); /* child */
if (error)
goto out_child;
error = gfs2_glock_nq(ghs + 2); /* rgrp */
if (error)
goto out_rgrp;
error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
if (error)
goto out_gunlock;
goto out_rgrp;
error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0);
if (error)
goto out_gunlock;
goto out_rgrp;
error = gfs2_dir_del(dip, &dentry->d_name);
if (error)
@@ -298,12 +306,15 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
out_end_trans:
gfs2_trans_end(sdp);
out_gunlock:
gfs2_glock_dq_m(3, ghs);
out:
gfs2_holder_uninit(ghs);
gfs2_holder_uninit(ghs + 1);
gfs2_glock_dq(ghs + 2);
out_rgrp:
gfs2_holder_uninit(ghs + 2);
gfs2_glock_dq(ghs + 1);
out_child:
gfs2_holder_uninit(ghs + 1);
gfs2_glock_dq(ghs);
out_parent:
gfs2_holder_uninit(ghs);
gfs2_glock_dq_uninit(&ri_gh);
return error;
}
@@ -894,12 +905,17 @@ static int gfs2_permission(struct inode *inode, int mask, struct nameidata *nd)
static int setattr_size(struct inode *inode, struct iattr *attr)
{
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
int error;
if (attr->ia_size != ip->i_di.di_size) {
error = vmtruncate(inode, attr->ia_size);
error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks);
if (error)
return error;
error = vmtruncate(inode, attr->ia_size);
gfs2_trans_end(sdp);
if (error)
return error;
}
error = gfs2_truncatei(ip, attr->ia_size);

View File

@@ -92,7 +92,6 @@ static void gfs2_put_super(struct super_block *sb)
kthread_stop(sdp->sd_recoverd_process);
while (sdp->sd_glockd_num--)
kthread_stop(sdp->sd_glockd_process[sdp->sd_glockd_num]);
kthread_stop(sdp->sd_scand_process);
if (!(sb->s_flags & MS_RDONLY)) {
error = gfs2_make_fs_ro(sdp);
@@ -456,12 +455,15 @@ static void gfs2_delete_inode(struct inode *inode)
}
error = gfs2_dinode_dealloc(ip);
/*
* Must do this before unlock to avoid trying to write back
* potentially dirty data now that inode no longer exists
* on disk.
*/
if (error)
goto out_unlock;
error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks);
if (error)
goto out_unlock;
/* Needs to be done before glock release & also in a transaction */
truncate_inode_pages(&inode->i_data, 0);
gfs2_trans_end(sdp);
out_unlock:
gfs2_glock_dq(&ip->i_iopen_gh);

View File

@@ -70,6 +70,7 @@ struct gfs2_quota_host {
u64 qu_limit;
u64 qu_warn;
s64 qu_value;
u32 qu_ll_next;
};
struct gfs2_quota_change_host {
@@ -580,6 +581,7 @@ static void gfs2_quota_in(struct gfs2_quota_host *qu, const void *buf)
qu->qu_limit = be64_to_cpu(str->qu_limit);
qu->qu_warn = be64_to_cpu(str->qu_warn);
qu->qu_value = be64_to_cpu(str->qu_value);
qu->qu_ll_next = be32_to_cpu(str->qu_ll_next);
}
static void gfs2_quota_out(const struct gfs2_quota_host *qu, void *buf)
@@ -589,6 +591,7 @@ static void gfs2_quota_out(const struct gfs2_quota_host *qu, void *buf)
str->qu_limit = cpu_to_be64(qu->qu_limit);
str->qu_warn = cpu_to_be64(qu->qu_warn);
str->qu_value = cpu_to_be64(qu->qu_value);
str->qu_ll_next = cpu_to_be32(qu->qu_ll_next);
memset(&str->qu_reserved, 0, sizeof(str->qu_reserved));
}
@@ -614,6 +617,16 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
s64 value;
int err = -EIO;
if (gfs2_is_stuffed(ip)) {
struct gfs2_alloc *al = NULL;
al = gfs2_alloc_get(ip);
/* just request 1 blk */
al->al_requested = 1;
gfs2_inplace_reserve(ip);
gfs2_unstuff_dinode(ip, NULL);
gfs2_inplace_release(ip);
gfs2_alloc_put(ip);
}
page = grab_cache_page(mapping, index);
if (!page)
return -ENOMEM;

View File

@@ -469,7 +469,7 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd)
};
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED,
LM_FLAG_NOEXP, &ji_gh);
LM_FLAG_NOEXP | GL_NOCACHE, &ji_gh);
if (error)
goto fail_gunlock_j;
} else {

View File

@@ -31,6 +31,7 @@
#include "inode.h"
#define BFITNOENT ((u32)~0)
#define NO_BLOCK ((u64)~0)
/*
* These routines are used by the resource group routines (rgrp.c)
@@ -116,8 +117,7 @@ static unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd, unsigned char *buffer,
* @buffer: the buffer that holds the bitmaps
* @buflen: the length (in bytes) of the buffer
* @goal: start search at this block's bit-pair (within @buffer)
* @old_state: GFS2_BLKST_XXX the state of the block we're looking for;
* bit 0 = alloc(1)/free(0), bit 1 = meta(1)/data(0)
* @old_state: GFS2_BLKST_XXX the state of the block we're looking for.
*
* Scope of @goal and returned block number is only within this bitmap buffer,
* not entire rgrp or filesystem. @buffer will be offset from the actual
@@ -137,9 +137,13 @@ static u32 gfs2_bitfit(struct gfs2_rgrpd *rgd, unsigned char *buffer,
byte = buffer + (goal / GFS2_NBBY);
bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE;
end = buffer + buflen;
alloc = (old_state & 1) ? 0 : 0x55;
alloc = (old_state == GFS2_BLKST_FREE) ? 0x55 : 0;
while (byte < end) {
/* If we're looking for a free block we can eliminate all
bitmap settings with 0x55, which represents four data
blocks in a row. If we're looking for a data block, we can
eliminate 0x00 which corresponds to four free blocks. */
if ((*byte & 0x55) == alloc) {
blk += (8 - bit) >> 1;
@@ -859,23 +863,28 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al)
static struct inode *try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked)
{
struct inode *inode;
u32 goal = 0;
u32 goal = 0, block;
u64 no_addr;
struct gfs2_sbd *sdp = rgd->rd_sbd;
for(;;) {
if (goal >= rgd->rd_data)
break;
goal = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED,
GFS2_BLKST_UNLINKED);
if (goal == BFITNOENT)
down_write(&sdp->sd_log_flush_lock);
block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED,
GFS2_BLKST_UNLINKED);
up_write(&sdp->sd_log_flush_lock);
if (block == BFITNOENT)
break;
no_addr = goal + rgd->rd_data0;
/* rgblk_search can return a block < goal, so we need to
keep it marching forward. */
no_addr = block + rgd->rd_data0;
goal++;
if (no_addr < *last_unlinked)
if (*last_unlinked != NO_BLOCK && no_addr <= *last_unlinked)
continue;
*last_unlinked = no_addr;
inode = gfs2_inode_lookup(rgd->rd_sbd->sd_vfs, DT_UNKNOWN,
no_addr, -1);
no_addr, -1, 1);
if (!IS_ERR(inode))
return inode;
}
@@ -1152,7 +1161,7 @@ int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, unsigned int line)
struct gfs2_alloc *al = &ip->i_alloc;
struct inode *inode;
int error = 0;
u64 last_unlinked = 0;
u64 last_unlinked = NO_BLOCK;
if (gfs2_assert_warn(sdp, al->al_requested))
return -EINVAL;
@@ -1289,7 +1298,9 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
allocatable block anywhere else, we want to be able wrap around and
search in the first part of our first-searched bit block. */
for (x = 0; x <= length; x++) {
if (bi->bi_clone)
/* The GFS2_BLKST_UNLINKED state doesn't apply to the clone
bitmaps, so we must search the originals for that. */
if (old_state != GFS2_BLKST_UNLINKED && bi->bi_clone)
blk = gfs2_bitfit(rgd, bi->bi_clone + bi->bi_offset,
bi->bi_len, goal, old_state);
else
@@ -1305,9 +1316,7 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
goal = 0;
}
if (old_state != new_state) {
gfs2_assert_withdraw(rgd->rd_sbd, blk != BFITNOENT);
if (blk != BFITNOENT && old_state != new_state) {
gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
gfs2_setbit(rgd, bi->bi_bh->b_data + bi->bi_offset,
bi->bi_len, blk, new_state);

View File

@@ -58,7 +58,6 @@ void gfs2_tune_init(struct gfs2_tune *gt)
gt->gt_incore_log_blocks = 1024;
gt->gt_log_flush_secs = 60;
gt->gt_jindex_refresh_secs = 60;
gt->gt_scand_secs = 15;
gt->gt_recoverd_secs = 60;
gt->gt_logd_secs = 1;
gt->gt_quotad_secs = 5;
@@ -160,18 +159,15 @@ int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent)
}
static int end_bio_io_page(struct bio *bio, unsigned int bytes_done, int error)
static void end_bio_io_page(struct bio *bio, int error)
{
struct page *page = bio->bi_private;
if (bio->bi_size)
return 1;
if (!error)
SetPageUptodate(page);
else
printk(KERN_WARNING "gfs2: error %d reading superblock\n", error);
unlock_page(page);
return 0;
}
static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)

View File

@@ -222,7 +222,6 @@ static struct kobj_type gfs2_ktype = {
};
static struct kset gfs2_kset = {
.kobj = {.name = "gfs2"},
.ktype = &gfs2_ktype,
};
@@ -442,7 +441,6 @@ TUNE_ATTR(quota_simul_sync, 1);
TUNE_ATTR(quota_cache_secs, 1);
TUNE_ATTR(stall_secs, 1);
TUNE_ATTR(statfs_quantum, 1);
TUNE_ATTR_DAEMON(scand_secs, scand_process);
TUNE_ATTR_DAEMON(recoverd_secs, recoverd_process);
TUNE_ATTR_DAEMON(logd_secs, logd_process);
TUNE_ATTR_DAEMON(quotad_secs, quotad_process);
@@ -464,7 +462,6 @@ static struct attribute *tune_attrs[] = {
&tune_attr_quota_cache_secs.attr,
&tune_attr_stall_secs.attr,
&tune_attr_statfs_quantum.attr,
&tune_attr_scand_secs.attr,
&tune_attr_recoverd_secs.attr,
&tune_attr_logd_secs.attr,
&tune_attr_quotad_secs.attr,
@@ -553,6 +550,7 @@ int gfs2_sys_init(void)
{
gfs2_sys_margs = NULL;
spin_lock_init(&gfs2_sys_margs_lock);
kobject_set_name(&gfs2_kset.kobj, "gfs2");
kobj_set_kset_s(&gfs2_kset, fs_subsys);
return kset_register(&gfs2_kset);
}

View File

@@ -142,25 +142,25 @@ void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta)
lops_add(sdp, &bd->bd_le);
}
void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, u64 blkno)
void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
{
struct gfs2_revoke *rv = kmalloc(sizeof(struct gfs2_revoke),
GFP_NOFS | __GFP_NOFAIL);
lops_init_le(&rv->rv_le, &gfs2_revoke_lops);
rv->rv_blkno = blkno;
lops_add(sdp, &rv->rv_le);
BUG_ON(!list_empty(&bd->bd_le.le_list));
BUG_ON(!list_empty(&bd->bd_ail_st_list));
BUG_ON(!list_empty(&bd->bd_ail_gl_list));
lops_init_le(&bd->bd_le, &gfs2_revoke_lops);
lops_add(sdp, &bd->bd_le);
}
void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno)
{
struct gfs2_revoke *rv;
struct gfs2_bufdata *bd;
int found = 0;
gfs2_log_lock(sdp);
list_for_each_entry(rv, &sdp->sd_log_le_revoke, rv_le.le_list) {
if (rv->rv_blkno == blkno) {
list_del(&rv->rv_le.le_list);
list_for_each_entry(bd, &sdp->sd_log_le_revoke, bd_le.le_list) {
if (bd->bd_blkno == blkno) {
list_del_init(&bd->bd_le.le_list);
gfs2_assert_withdraw(sdp, sdp->sd_log_num_revoke);
sdp->sd_log_num_revoke--;
found = 1;
@@ -172,7 +172,7 @@ void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno)
if (found) {
struct gfs2_trans *tr = current->journal_info;
kfree(rv);
kmem_cache_free(gfs2_bufdata_cachep, bd);
tr->tr_num_revoke_rm++;
}
}

View File

@@ -32,7 +32,7 @@ void gfs2_trans_end(struct gfs2_sbd *sdp);
void gfs2_trans_add_gl(struct gfs2_glock *gl);
void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta);
void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, u64 blkno);
void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd);
void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno);
void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd);

View File

@@ -2162,7 +2162,7 @@ static void lbmStartIO(struct lbuf * bp)
/* check if journaling to disk has been disabled */
if (log->no_integrity) {
bio->bi_size = 0;
lbmIODone(bio, 0, 0);
lbmIODone(bio, 0);
} else {
submit_bio(WRITE_SYNC, bio);
INCREMENT(lmStat.submitted);
@@ -2200,16 +2200,13 @@ static int lbmIOWait(struct lbuf * bp, int flag)
*
* executed at INTIODONE level
*/
static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error)
static void lbmIODone(struct bio *bio, int error)
{
struct lbuf *bp = bio->bi_private;
struct lbuf *nextbp, *tail;
struct jfs_log *log;
unsigned long flags;
if (bio->bi_size)
return 1;
/*
* get back jfs buffer bound to the i/o buffer
*/
@@ -2237,8 +2234,6 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error)
/* wakeup I/O initiator */
LCACHE_WAKEUP(&bp->l_ioevent);
return 0;
}
/*
@@ -2263,7 +2258,6 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error)
if (bp->l_flag & lbmDIRECT) {
LCACHE_WAKEUP(&bp->l_ioevent);
LCACHE_UNLOCK(flags);
return 0;
}
tail = log->wqueue;
@@ -2342,8 +2336,6 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error)
LCACHE_UNLOCK(flags); /* unlock+enable */
}
return 0;
}
int jfsIOWait(void *arg)

View File

@@ -280,14 +280,10 @@ static void last_read_complete(struct page *page)
unlock_page(page);
}
static int metapage_read_end_io(struct bio *bio, unsigned int bytes_done,
int err)
static void metapage_read_end_io(struct bio *bio, int err)
{
struct page *page = bio->bi_private;
if (bio->bi_size)
return 1;
if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
printk(KERN_ERR "metapage_read_end_io: I/O error\n");
SetPageError(page);
@@ -295,8 +291,6 @@ static int metapage_read_end_io(struct bio *bio, unsigned int bytes_done,
dec_io(page, last_read_complete);
bio_put(bio);
return 0;
}
static void remove_from_logsync(struct metapage *mp)
@@ -341,23 +335,18 @@ static void last_write_complete(struct page *page)
end_page_writeback(page);
}
static int metapage_write_end_io(struct bio *bio, unsigned int bytes_done,
int err)
static void metapage_write_end_io(struct bio *bio, int err)
{
struct page *page = bio->bi_private;
BUG_ON(!PagePrivate(page));
if (bio->bi_size)
return 1;
if (! test_bit(BIO_UPTODATE, &bio->bi_flags)) {
printk(KERN_ERR "metapage_write_end_io: I/O error\n");
SetPageError(page);
}
dec_io(page, last_write_complete);
bio_put(bio);
return 0;
}
static int metapage_writepage(struct page *page, struct writeback_control *wbc)

View File

@@ -39,14 +39,11 @@
* status of that page is hard. See end_buffer_async_read() for the details.
* There is no point in duplicating all that complexity.
*/
static int mpage_end_io_read(struct bio *bio, unsigned int bytes_done, int err)
static void mpage_end_io_read(struct bio *bio, int err)
{
const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
if (bio->bi_size)
return 1;
do {
struct page *page = bvec->bv_page;
@@ -62,17 +59,13 @@ static int mpage_end_io_read(struct bio *bio, unsigned int bytes_done, int err)
unlock_page(page);
} while (bvec >= bio->bi_io_vec);
bio_put(bio);
return 0;
}
static int mpage_end_io_write(struct bio *bio, unsigned int bytes_done, int err)
static void mpage_end_io_write(struct bio *bio, int err)
{
const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
if (bio->bi_size)
return 1;
do {
struct page *page = bvec->bv_page;
@@ -87,7 +80,6 @@ static int mpage_end_io_write(struct bio *bio, unsigned int bytes_done, int err)
end_page_writeback(page);
} while (bvec >= bio->bi_io_vec);
bio_put(bio);
return 0;
}
static struct bio *mpage_bio_submit(int rw, struct bio *bio)

View File

@@ -17,6 +17,18 @@ ToDo/Notes:
happen is unclear however so it is worth waiting until someone hits
the problem.
2.1.29 - Fix a deadlock at mount time.
- During mount the VFS holds s_umount lock on the superblock. So when
we try to empty the journal $LogFile contents by calling
ntfs_attr_set() when the machine does not have much memory and the
journal is large ntfs_attr_set() results in the VM trying to balance
dirty pages which in turn tries to that the s_umount lock and thus we
get a deadlock. The solution is to not use ntfs_attr_set() and
instead do the zeroing by hand at the block level rather than page
cache level.
- Fix sparse warnings.
2.1.28 - Fix a deadlock.
- Fix deadlock in fs/ntfs/inode.c::ntfs_put_inode(). Thanks to Sergey

View File

@@ -6,7 +6,7 @@ ntfs-objs := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \
index.o inode.o mft.o mst.o namei.o runlist.o super.o sysctl.o \
unistr.o upcase.o
EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.28\"
EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.29\"
ifeq ($(CONFIG_NTFS_DEBUG),y)
EXTRA_CFLAGS += -DDEBUG

View File

@@ -2,7 +2,7 @@
* aops.c - NTFS kernel address space operations and page cache handling.
* Part of the Linux-NTFS project.
*
* Copyright (c) 2001-2006 Anton Altaparmakov
* Copyright (c) 2001-2007 Anton Altaparmakov
* Copyright (c) 2002 Richard Russon
*
* This program/include file is free software; you can redistribute it and/or
@@ -396,7 +396,7 @@ static int ntfs_readpage(struct file *file, struct page *page)
loff_t i_size;
struct inode *vi;
ntfs_inode *ni, *base_ni;
u8 *kaddr;
u8 *addr;
ntfs_attr_search_ctx *ctx;
MFT_RECORD *mrec;
unsigned long flags;
@@ -491,15 +491,15 @@ retry_readpage:
/* Race with shrinking truncate. */
attr_len = i_size;
}
kaddr = kmap_atomic(page, KM_USER0);
addr = kmap_atomic(page, KM_USER0);
/* Copy the data to the page. */
memcpy(kaddr, (u8*)ctx->attr +
memcpy(addr, (u8*)ctx->attr +
le16_to_cpu(ctx->attr->data.resident.value_offset),
attr_len);
/* Zero the remainder of the page. */
memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
memset(addr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
flush_dcache_page(page);
kunmap_atomic(kaddr, KM_USER0);
kunmap_atomic(addr, KM_USER0);
put_unm_err_out:
ntfs_attr_put_search_ctx(ctx);
unm_err_out:
@@ -1344,7 +1344,7 @@ static int ntfs_writepage(struct page *page, struct writeback_control *wbc)
loff_t i_size;
struct inode *vi = page->mapping->host;
ntfs_inode *base_ni = NULL, *ni = NTFS_I(vi);
char *kaddr;
char *addr;
ntfs_attr_search_ctx *ctx = NULL;
MFT_RECORD *m = NULL;
u32 attr_len;
@@ -1484,14 +1484,14 @@ retry_writepage:
/* Shrinking cannot fail. */
BUG_ON(err);
}
kaddr = kmap_atomic(page, KM_USER0);
addr = kmap_atomic(page, KM_USER0);
/* Copy the data from the page to the mft record. */
memcpy((u8*)ctx->attr +
le16_to_cpu(ctx->attr->data.resident.value_offset),
kaddr, attr_len);
addr, attr_len);
/* Zero out of bounds area in the page cache page. */
memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
kunmap_atomic(kaddr, KM_USER0);
memset(addr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
kunmap_atomic(addr, KM_USER0);
flush_dcache_page(page);
flush_dcache_mft_record_page(ctx->ntfs_ino);
/* We are done with the page. */

View File

@@ -1,7 +1,7 @@
/**
* attrib.c - NTFS attribute operations. Part of the Linux-NTFS project.
*
* Copyright (c) 2001-2006 Anton Altaparmakov
* Copyright (c) 2001-2007 Anton Altaparmakov
* Copyright (c) 2002 Richard Russon
*
* This program/include file is free software; you can redistribute it and/or
@@ -2500,7 +2500,7 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
struct page *page;
u8 *kaddr;
pgoff_t idx, end;
unsigned int start_ofs, end_ofs, size;
unsigned start_ofs, end_ofs, size;
ntfs_debug("Entering for ofs 0x%llx, cnt 0x%llx, val 0x%hx.",
(long long)ofs, (long long)cnt, val);
@@ -2548,6 +2548,8 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
kunmap_atomic(kaddr, KM_USER0);
set_page_dirty(page);
page_cache_release(page);
balance_dirty_pages_ratelimited(mapping);
cond_resched();
if (idx == end)
goto done;
idx++;
@@ -2604,6 +2606,8 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
kunmap_atomic(kaddr, KM_USER0);
set_page_dirty(page);
page_cache_release(page);
balance_dirty_pages_ratelimited(mapping);
cond_resched();
}
done:
ntfs_debug("Done.");

View File

@@ -1,7 +1,7 @@
/*
* file.c - NTFS kernel file operations. Part of the Linux-NTFS project.
*
* Copyright (c) 2001-2006 Anton Altaparmakov
* Copyright (c) 2001-2007 Anton Altaparmakov
*
* This program/include file is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as published
@@ -26,7 +26,6 @@
#include <linux/swap.h>
#include <linux/uio.h>
#include <linux/writeback.h>
#include <linux/sched.h>
#include <asm/page.h>
#include <asm/uaccess.h>
@@ -362,7 +361,7 @@ static inline void ntfs_fault_in_pages_readable(const char __user *uaddr,
volatile char c;
/* Set @end to the first byte outside the last page we care about. */
end = (const char __user*)PAGE_ALIGN((ptrdiff_t __user)uaddr + bytes);
end = (const char __user*)PAGE_ALIGN((unsigned long)uaddr + bytes);
while (!__get_user(c, uaddr) && (uaddr += PAGE_SIZE, uaddr < end))
;
@@ -532,7 +531,8 @@ static int ntfs_prepare_pages_for_non_resident_write(struct page **pages,
blocksize_bits = vol->sb->s_blocksize_bits;
u = 0;
do {
struct page *page = pages[u];
page = pages[u];
BUG_ON(!page);
/*
* create_empty_buffers() will create uptodate/dirty buffers if
* the page is uptodate/dirty.
@@ -1291,7 +1291,7 @@ static inline size_t ntfs_copy_from_user(struct page **pages,
size_t bytes)
{
struct page **last_page = pages + nr_pages;
char *kaddr;
char *addr;
size_t total = 0;
unsigned len;
int left;
@@ -1300,13 +1300,13 @@ static inline size_t ntfs_copy_from_user(struct page **pages,
len = PAGE_CACHE_SIZE - ofs;
if (len > bytes)
len = bytes;
kaddr = kmap_atomic(*pages, KM_USER0);
left = __copy_from_user_inatomic(kaddr + ofs, buf, len);
kunmap_atomic(kaddr, KM_USER0);
addr = kmap_atomic(*pages, KM_USER0);
left = __copy_from_user_inatomic(addr + ofs, buf, len);
kunmap_atomic(addr, KM_USER0);
if (unlikely(left)) {
/* Do it the slow way. */
kaddr = kmap(*pages);
left = __copy_from_user(kaddr + ofs, buf, len);
addr = kmap(*pages);
left = __copy_from_user(addr + ofs, buf, len);
kunmap(*pages);
if (unlikely(left))
goto err_out;
@@ -1408,26 +1408,26 @@ static inline size_t ntfs_copy_from_user_iovec(struct page **pages,
size_t *iov_ofs, size_t bytes)
{
struct page **last_page = pages + nr_pages;
char *kaddr;
char *addr;
size_t copied, len, total = 0;
do {
len = PAGE_CACHE_SIZE - ofs;
if (len > bytes)
len = bytes;
kaddr = kmap_atomic(*pages, KM_USER0);
copied = __ntfs_copy_from_user_iovec_inatomic(kaddr + ofs,
addr = kmap_atomic(*pages, KM_USER0);
copied = __ntfs_copy_from_user_iovec_inatomic(addr + ofs,
*iov, *iov_ofs, len);
kunmap_atomic(kaddr, KM_USER0);
kunmap_atomic(addr, KM_USER0);
if (unlikely(copied != len)) {
/* Do it the slow way. */
kaddr = kmap(*pages);
copied = __ntfs_copy_from_user_iovec_inatomic(kaddr + ofs,
addr = kmap(*pages);
copied = __ntfs_copy_from_user_iovec_inatomic(addr + ofs,
*iov, *iov_ofs, len);
/*
* Zero the rest of the target like __copy_from_user().
*/
memset(kaddr + ofs + copied, 0, len - copied);
memset(addr + ofs + copied, 0, len - copied);
kunmap(*pages);
if (unlikely(copied != len))
goto err_out;
@@ -1735,8 +1735,6 @@ static int ntfs_commit_pages_after_write(struct page **pages,
read_unlock_irqrestore(&ni->size_lock, flags);
BUG_ON(initialized_size != i_size);
if (end > initialized_size) {
unsigned long flags;
write_lock_irqsave(&ni->size_lock, flags);
ni->initialized_size = end;
i_size_write(vi, end);

View File

@@ -34,7 +34,6 @@
#include "dir.h"
#include "debug.h"
#include "inode.h"
#include "attrib.h"
#include "lcnalloc.h"
#include "malloc.h"
#include "mft.h"
@@ -2500,8 +2499,6 @@ retry_truncate:
/* Resize the attribute record to best fit the new attribute size. */
if (new_size < vol->mft_record_size &&
!ntfs_resident_attr_value_resize(m, a, new_size)) {
unsigned long flags;
/* The resize succeeded! */
flush_dcache_mft_record_page(ctx->ntfs_ino);
mark_mft_record_dirty(ctx->ntfs_ino);

View File

@@ -1,7 +1,7 @@
/*
* logfile.c - NTFS kernel journal handling. Part of the Linux-NTFS project.
*
* Copyright (c) 2002-2005 Anton Altaparmakov
* Copyright (c) 2002-2007 Anton Altaparmakov
*
* This program/include file is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as published
@@ -724,24 +724,139 @@ bool ntfs_is_logfile_clean(struct inode *log_vi, const RESTART_PAGE_HEADER *rp)
*/
bool ntfs_empty_logfile(struct inode *log_vi)
{
ntfs_volume *vol = NTFS_SB(log_vi->i_sb);
VCN vcn, end_vcn;
ntfs_inode *log_ni = NTFS_I(log_vi);
ntfs_volume *vol = log_ni->vol;
struct super_block *sb = vol->sb;
runlist_element *rl;
unsigned long flags;
unsigned block_size, block_size_bits;
int err;
bool should_wait = true;
ntfs_debug("Entering.");
if (!NVolLogFileEmpty(vol)) {
int err;
err = ntfs_attr_set(NTFS_I(log_vi), 0, i_size_read(log_vi),
0xff);
if (unlikely(err)) {
ntfs_error(vol->sb, "Failed to fill $LogFile with "
"0xff bytes (error code %i).", err);
return false;
}
/* Set the flag so we do not have to do it again on remount. */
NVolSetLogFileEmpty(vol);
if (NVolLogFileEmpty(vol)) {
ntfs_debug("Done.");
return true;
}
/*
* We cannot use ntfs_attr_set() because we may be still in the middle
* of a mount operation. Thus we do the emptying by hand by first
* zapping the page cache pages for the $LogFile/$DATA attribute and
* then emptying each of the buffers in each of the clusters specified
* by the runlist by hand.
*/
block_size = sb->s_blocksize;
block_size_bits = sb->s_blocksize_bits;
vcn = 0;
read_lock_irqsave(&log_ni->size_lock, flags);
end_vcn = (log_ni->initialized_size + vol->cluster_size_mask) >>
vol->cluster_size_bits;
read_unlock_irqrestore(&log_ni->size_lock, flags);
truncate_inode_pages(log_vi->i_mapping, 0);
down_write(&log_ni->runlist.lock);
rl = log_ni->runlist.rl;
if (unlikely(!rl || vcn < rl->vcn || !rl->length)) {
map_vcn:
err = ntfs_map_runlist_nolock(log_ni, vcn, NULL);
if (err) {
ntfs_error(sb, "Failed to map runlist fragment (error "
"%d).", -err);
goto err;
}
rl = log_ni->runlist.rl;
BUG_ON(!rl || vcn < rl->vcn || !rl->length);
}
/* Seek to the runlist element containing @vcn. */
while (rl->length && vcn >= rl[1].vcn)
rl++;
do {
LCN lcn;
sector_t block, end_block;
s64 len;
/*
* If this run is not mapped map it now and start again as the
* runlist will have been updated.
*/
lcn = rl->lcn;
if (unlikely(lcn == LCN_RL_NOT_MAPPED)) {
vcn = rl->vcn;
goto map_vcn;
}
/* If this run is not valid abort with an error. */
if (unlikely(!rl->length || lcn < LCN_HOLE))
goto rl_err;
/* Skip holes. */
if (lcn == LCN_HOLE)
continue;
block = lcn << vol->cluster_size_bits >> block_size_bits;
len = rl->length;
if (rl[1].vcn > end_vcn)
len = end_vcn - rl->vcn;
end_block = (lcn + len) << vol->cluster_size_bits >>
block_size_bits;
/* Iterate over the blocks in the run and empty them. */
do {
struct buffer_head *bh;
/* Obtain the buffer, possibly not uptodate. */
bh = sb_getblk(sb, block);
BUG_ON(!bh);
/* Setup buffer i/o submission. */
lock_buffer(bh);
bh->b_end_io = end_buffer_write_sync;
get_bh(bh);
/* Set the entire contents of the buffer to 0xff. */
memset(bh->b_data, -1, block_size);
if (!buffer_uptodate(bh))
set_buffer_uptodate(bh);
if (buffer_dirty(bh))
clear_buffer_dirty(bh);
/*
* Submit the buffer and wait for i/o to complete but
* only for the first buffer so we do not miss really
* serious i/o errors. Once the first buffer has
* completed ignore errors afterwards as we can assume
* that if one buffer worked all of them will work.
*/
submit_bh(WRITE, bh);
if (should_wait) {
should_wait = false;
wait_on_buffer(bh);
if (unlikely(!buffer_uptodate(bh)))
goto io_err;
}
brelse(bh);
} while (++block < end_block);
} while ((++rl)->vcn < end_vcn);
up_write(&log_ni->runlist.lock);
/*
* Zap the pages again just in case any got instantiated whilst we were
* emptying the blocks by hand. FIXME: We may not have completed
* writing to all the buffer heads yet so this may happen too early.
* We really should use a kernel thread to do the emptying
* asynchronously and then we can also set the volume dirty and output
* an error message if emptying should fail.
*/
truncate_inode_pages(log_vi->i_mapping, 0);
/* Set the flag so we do not have to do it again on remount. */
NVolSetLogFileEmpty(vol);
ntfs_debug("Done.");
return true;
io_err:
ntfs_error(sb, "Failed to write buffer. Unmount and run chkdsk.");
goto dirty_err;
rl_err:
ntfs_error(sb, "Runlist is corrupt. Unmount and run chkdsk.");
dirty_err:
NVolSetErrors(vol);
err = -EIO;
err:
up_write(&log_ni->runlist.lock);
ntfs_error(sb, "Failed to fill $LogFile with 0xff bytes (error %d).",
-err);
return false;
}
#endif /* NTFS_RW */

View File

@@ -1,7 +1,7 @@
/**
* runlist.c - NTFS runlist handling code. Part of the Linux-NTFS project.
*
* Copyright (c) 2001-2005 Anton Altaparmakov
* Copyright (c) 2001-2007 Anton Altaparmakov
* Copyright (c) 2002-2005 Richard Russon
*
* This program/include file is free software; you can redistribute it and/or
@@ -1714,7 +1714,7 @@ extend_hole:
sizeof(*rl));
/* Adjust the beginning of the tail if necessary. */
if (end > rl->vcn) {
s64 delta = end - rl->vcn;
delta = end - rl->vcn;
rl->vcn = end;
rl->length -= delta;
/* Only adjust the lcn if it is real. */

View File

@@ -354,7 +354,6 @@ struct ocfs2_insert_type {
enum ocfs2_append_type ins_appending;
enum ocfs2_contig_type ins_contig;
int ins_contig_index;
int ins_free_records;
int ins_tree_depth;
};
@@ -362,7 +361,6 @@ struct ocfs2_merge_ctxt {
enum ocfs2_contig_type c_contig_type;
int c_has_empty_extent;
int c_split_covers_rec;
int c_used_tail_recs;
};
/*
@@ -2808,36 +2806,28 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
struct ocfs2_merge_ctxt *ctxt)
{
int ret = 0, delete_tail_recs = 0;
int ret = 0;
struct ocfs2_extent_list *el = path_leaf_el(left_path);
struct ocfs2_extent_rec *rec = &el->l_recs[split_index];
BUG_ON(ctxt->c_contig_type == CONTIG_NONE);
if (ctxt->c_split_covers_rec) {
delete_tail_recs++;
if (ctxt->c_contig_type == CONTIG_LEFTRIGHT ||
ctxt->c_has_empty_extent)
delete_tail_recs++;
if (ctxt->c_has_empty_extent) {
/*
* The merge code will need to create an empty
* extent to take the place of the newly
* emptied slot. Remove any pre-existing empty
* extents - having more than one in a leaf is
* illegal.
*/
ret = ocfs2_rotate_tree_left(inode, handle, left_path,
dealloc);
if (ret) {
mlog_errno(ret);
goto out;
}
split_index--;
rec = &el->l_recs[split_index];
if (ctxt->c_split_covers_rec && ctxt->c_has_empty_extent) {
/*
* The merge code will need to create an empty
* extent to take the place of the newly
* emptied slot. Remove any pre-existing empty
* extents - having more than one in a leaf is
* illegal.
*/
ret = ocfs2_rotate_tree_left(inode, handle, left_path,
dealloc);
if (ret) {
mlog_errno(ret);
goto out;
}
split_index--;
rec = &el->l_recs[split_index];
}
if (ctxt->c_contig_type == CONTIG_LEFTRIGHT) {
@@ -3593,6 +3583,7 @@ static int ocfs2_figure_insert_type(struct inode *inode,
struct buffer_head *di_bh,
struct buffer_head **last_eb_bh,
struct ocfs2_extent_rec *insert_rec,
int *free_records,
struct ocfs2_insert_type *insert)
{
int ret;
@@ -3633,7 +3624,7 @@ static int ocfs2_figure_insert_type(struct inode *inode,
* XXX: This test is simplistic, we can search for empty
* extent records too.
*/
insert->ins_free_records = le16_to_cpu(el->l_count) -
*free_records = le16_to_cpu(el->l_count) -
le16_to_cpu(el->l_next_free_rec);
if (!insert->ins_tree_depth) {
@@ -3730,10 +3721,13 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
struct ocfs2_alloc_context *meta_ac)
{
int status;
int uninitialized_var(free_records);
struct buffer_head *last_eb_bh = NULL;
struct ocfs2_insert_type insert = {0, };
struct ocfs2_extent_rec rec;
BUG_ON(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL);
mlog(0, "add %u clusters at position %u to inode %llu\n",
new_clusters, cpos, (unsigned long long)OCFS2_I(inode)->ip_blkno);
@@ -3752,7 +3746,7 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
rec.e_flags = flags;
status = ocfs2_figure_insert_type(inode, fe_bh, &last_eb_bh, &rec,
&insert);
&free_records, &insert);
if (status < 0) {
mlog_errno(status);
goto bail;
@@ -3762,9 +3756,9 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
"Insert.contig_index: %d, Insert.free_records: %d, "
"Insert.tree_depth: %d\n",
insert.ins_appending, insert.ins_contig, insert.ins_contig_index,
insert.ins_free_records, insert.ins_tree_depth);
free_records, insert.ins_tree_depth);
if (insert.ins_contig == CONTIG_NONE && insert.ins_free_records == 0) {
if (insert.ins_contig == CONTIG_NONE && free_records == 0) {
status = ocfs2_grow_tree(inode, handle, fe_bh,
&insert.ins_tree_depth, &last_eb_bh,
meta_ac);
@@ -3847,26 +3841,17 @@ leftright:
if (le16_to_cpu(rightmost_el->l_next_free_rec) ==
le16_to_cpu(rightmost_el->l_count)) {
int old_depth = depth;
ret = ocfs2_grow_tree(inode, handle, di_bh, &depth, last_eb_bh,
meta_ac);
if (ret) {
mlog_errno(ret);
goto out;
}
if (old_depth != depth) {
eb = (struct ocfs2_extent_block *)(*last_eb_bh)->b_data;
rightmost_el = &eb->h_list;
}
}
memset(&insert, 0, sizeof(struct ocfs2_insert_type));
insert.ins_appending = APPEND_NONE;
insert.ins_contig = CONTIG_NONE;
insert.ins_free_records = le16_to_cpu(rightmost_el->l_count)
- le16_to_cpu(rightmost_el->l_next_free_rec);
insert.ins_tree_depth = depth;
insert_range = le32_to_cpu(split_rec.e_cpos) +
@@ -4015,11 +4000,6 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
} else
rightmost_el = path_root_el(path);
ctxt.c_used_tail_recs = le16_to_cpu(rightmost_el->l_next_free_rec);
if (ctxt.c_used_tail_recs > 0 &&
ocfs2_is_empty_extent(&rightmost_el->l_recs[0]))
ctxt.c_used_tail_recs--;
if (rec->e_cpos == split_rec->e_cpos &&
rec->e_leaf_clusters == split_rec->e_leaf_clusters)
ctxt.c_split_covers_rec = 1;
@@ -4028,10 +4008,9 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
ctxt.c_has_empty_extent = ocfs2_is_empty_extent(&el->l_recs[0]);
mlog(0, "index: %d, contig: %u, used_tail_recs: %u, "
"has_empty: %u, split_covers: %u\n", split_index,
ctxt.c_contig_type, ctxt.c_used_tail_recs,
ctxt.c_has_empty_extent, ctxt.c_split_covers_rec);
mlog(0, "index: %d, contig: %u, has_empty: %u, split_covers: %u\n",
split_index, ctxt.c_contig_type, ctxt.c_has_empty_extent,
ctxt.c_split_covers_rec);
if (ctxt.c_contig_type == CONTIG_NONE) {
if (ctxt.c_split_covers_rec)
@@ -4180,27 +4159,18 @@ static int ocfs2_split_tree(struct inode *inode, struct buffer_head *di_bh,
if (le16_to_cpu(rightmost_el->l_next_free_rec) ==
le16_to_cpu(rightmost_el->l_count)) {
int old_depth = depth;
ret = ocfs2_grow_tree(inode, handle, di_bh, &depth, &last_eb_bh,
meta_ac);
if (ret) {
mlog_errno(ret);
goto out;
}
if (old_depth != depth) {
eb = (struct ocfs2_extent_block *)last_eb_bh->b_data;
rightmost_el = &eb->h_list;
}
}
memset(&insert, 0, sizeof(struct ocfs2_insert_type));
insert.ins_appending = APPEND_NONE;
insert.ins_contig = CONTIG_NONE;
insert.ins_split = SPLIT_RIGHT;
insert.ins_free_records = le16_to_cpu(rightmost_el->l_count)
- le16_to_cpu(rightmost_el->l_next_free_rec);
insert.ins_tree_depth = depth;
ret = ocfs2_do_insert_extent(inode, handle, di_bh, &split_rec, &insert);
@@ -5665,12 +5635,50 @@ static int ocfs2_ordered_zero_func(handle_t *handle, struct buffer_head *bh)
return ocfs2_journal_dirty_data(handle, bh);
}
static void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle,
unsigned int from, unsigned int to,
struct page *page, int zero, u64 *phys)
{
int ret, partial = 0;
ret = ocfs2_map_page_blocks(page, phys, inode, from, to, 0);
if (ret)
mlog_errno(ret);
if (zero)
zero_user_page(page, from, to - from, KM_USER0);
/*
* Need to set the buffers we zero'd into uptodate
* here if they aren't - ocfs2_map_page_blocks()
* might've skipped some
*/
if (ocfs2_should_order_data(inode)) {
ret = walk_page_buffers(handle,
page_buffers(page),
from, to, &partial,
ocfs2_ordered_zero_func);
if (ret < 0)
mlog_errno(ret);
} else {
ret = walk_page_buffers(handle, page_buffers(page),
from, to, &partial,
ocfs2_writeback_zero_func);
if (ret < 0)
mlog_errno(ret);
}
if (!partial)
SetPageUptodate(page);
flush_dcache_page(page);
}
static void ocfs2_zero_cluster_pages(struct inode *inode, loff_t start,
loff_t end, struct page **pages,
int numpages, u64 phys, handle_t *handle)
{
int i, ret, partial = 0;
void *kaddr;
int i;
struct page *page;
unsigned int from, to = PAGE_CACHE_SIZE;
struct super_block *sb = inode->i_sb;
@@ -5691,87 +5699,31 @@ static void ocfs2_zero_cluster_pages(struct inode *inode, loff_t start,
BUG_ON(from > PAGE_CACHE_SIZE);
BUG_ON(to > PAGE_CACHE_SIZE);
ret = ocfs2_map_page_blocks(page, &phys, inode, from, to, 0);
if (ret)
mlog_errno(ret);
kaddr = kmap_atomic(page, KM_USER0);
memset(kaddr + from, 0, to - from);
kunmap_atomic(kaddr, KM_USER0);
/*
* Need to set the buffers we zero'd into uptodate
* here if they aren't - ocfs2_map_page_blocks()
* might've skipped some
*/
if (ocfs2_should_order_data(inode)) {
ret = walk_page_buffers(handle,
page_buffers(page),
from, to, &partial,
ocfs2_ordered_zero_func);
if (ret < 0)
mlog_errno(ret);
} else {
ret = walk_page_buffers(handle, page_buffers(page),
from, to, &partial,
ocfs2_writeback_zero_func);
if (ret < 0)
mlog_errno(ret);
}
if (!partial)
SetPageUptodate(page);
flush_dcache_page(page);
ocfs2_map_and_dirty_page(inode, handle, from, to, page, 1,
&phys);
start = (page->index + 1) << PAGE_CACHE_SHIFT;
}
out:
if (pages) {
for (i = 0; i < numpages; i++) {
page = pages[i];
unlock_page(page);
mark_page_accessed(page);
page_cache_release(page);
}
}
if (pages)
ocfs2_unlock_and_free_pages(pages, numpages);
}
static int ocfs2_grab_eof_pages(struct inode *inode, loff_t start, loff_t end,
struct page **pages, int *num, u64 *phys)
struct page **pages, int *num)
{
int i, numpages = 0, ret = 0;
unsigned int ext_flags;
int numpages, ret = 0;
struct super_block *sb = inode->i_sb;
struct address_space *mapping = inode->i_mapping;
unsigned long index;
loff_t last_page_bytes;
BUG_ON(!ocfs2_sparse_alloc(OCFS2_SB(sb)));
BUG_ON(start > end);
if (start == end)
goto out;
BUG_ON(start >> OCFS2_SB(sb)->s_clustersize_bits !=
(end - 1) >> OCFS2_SB(sb)->s_clustersize_bits);
ret = ocfs2_extent_map_get_blocks(inode, start >> sb->s_blocksize_bits,
phys, NULL, &ext_flags);
if (ret) {
mlog_errno(ret);
goto out;
}
/* Tail is a hole. */
if (*phys == 0)
goto out;
/* Tail is marked as unwritten, we can count on write to zero
* in that case. */
if (ext_flags & OCFS2_EXT_UNWRITTEN)
goto out;
numpages = 0;
last_page_bytes = PAGE_ALIGN(end);
index = start >> PAGE_CACHE_SHIFT;
do {
@@ -5788,14 +5740,8 @@ static int ocfs2_grab_eof_pages(struct inode *inode, loff_t start, loff_t end,
out:
if (ret != 0) {
if (pages) {
for (i = 0; i < numpages; i++) {
if (pages[i]) {
unlock_page(pages[i]);
page_cache_release(pages[i]);
}
}
}
if (pages)
ocfs2_unlock_and_free_pages(pages, numpages);
numpages = 0;
}
@@ -5816,18 +5762,20 @@ out:
int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle,
u64 range_start, u64 range_end)
{
int ret, numpages;
int ret = 0, numpages;
struct page **pages = NULL;
u64 phys;
unsigned int ext_flags;
struct super_block *sb = inode->i_sb;
/*
* File systems which don't support sparse files zero on every
* extend.
*/
if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
if (!ocfs2_sparse_alloc(OCFS2_SB(sb)))
return 0;
pages = kcalloc(ocfs2_pages_per_cluster(inode->i_sb),
pages = kcalloc(ocfs2_pages_per_cluster(sb),
sizeof(struct page *), GFP_NOFS);
if (pages == NULL) {
ret = -ENOMEM;
@@ -5835,16 +5783,31 @@ int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle,
goto out;
}
ret = ocfs2_grab_eof_pages(inode, range_start, range_end, pages,
&numpages, &phys);
if (range_start == range_end)
goto out;
ret = ocfs2_extent_map_get_blocks(inode,
range_start >> sb->s_blocksize_bits,
&phys, NULL, &ext_flags);
if (ret) {
mlog_errno(ret);
goto out;
}
if (numpages == 0)
/*
* Tail is a hole, or is marked unwritten. In either case, we
* can count on read and write to return/push zero's.
*/
if (phys == 0 || ext_flags & OCFS2_EXT_UNWRITTEN)
goto out;
ret = ocfs2_grab_eof_pages(inode, range_start, range_end, pages,
&numpages);
if (ret) {
mlog_errno(ret);
goto out;
}
ocfs2_zero_cluster_pages(inode, range_start, range_end, pages,
numpages, phys, handle);
@@ -5865,6 +5828,178 @@ out:
return ret;
}
static void ocfs2_zero_dinode_id2(struct inode *inode, struct ocfs2_dinode *di)
{
unsigned int blocksize = 1 << inode->i_sb->s_blocksize_bits;
memset(&di->id2, 0, blocksize - offsetof(struct ocfs2_dinode, id2));
}
void ocfs2_dinode_new_extent_list(struct inode *inode,
struct ocfs2_dinode *di)
{
ocfs2_zero_dinode_id2(inode, di);
di->id2.i_list.l_tree_depth = 0;
di->id2.i_list.l_next_free_rec = 0;
di->id2.i_list.l_count = cpu_to_le16(ocfs2_extent_recs_per_inode(inode->i_sb));
}
void ocfs2_set_inode_data_inline(struct inode *inode, struct ocfs2_dinode *di)
{
struct ocfs2_inode_info *oi = OCFS2_I(inode);
struct ocfs2_inline_data *idata = &di->id2.i_data;
spin_lock(&oi->ip_lock);
oi->ip_dyn_features |= OCFS2_INLINE_DATA_FL;
di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
spin_unlock(&oi->ip_lock);
/*
* We clear the entire i_data structure here so that all
* fields can be properly initialized.
*/
ocfs2_zero_dinode_id2(inode, di);
idata->id_count = cpu_to_le16(ocfs2_max_inline_data(inode->i_sb));
}
int ocfs2_convert_inline_data_to_extents(struct inode *inode,
struct buffer_head *di_bh)
{
int ret, i, has_data, num_pages = 0;
handle_t *handle;
u64 uninitialized_var(block);
struct ocfs2_inode_info *oi = OCFS2_I(inode);
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
struct ocfs2_alloc_context *data_ac = NULL;
struct page **pages = NULL;
loff_t end = osb->s_clustersize;
has_data = i_size_read(inode) ? 1 : 0;
if (has_data) {
pages = kcalloc(ocfs2_pages_per_cluster(osb->sb),
sizeof(struct page *), GFP_NOFS);
if (pages == NULL) {
ret = -ENOMEM;
mlog_errno(ret);
goto out;
}
ret = ocfs2_reserve_clusters(osb, 1, &data_ac);
if (ret) {
mlog_errno(ret);
goto out;
}
}
handle = ocfs2_start_trans(osb, OCFS2_INLINE_TO_EXTENTS_CREDITS);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
mlog_errno(ret);
goto out_unlock;
}
ret = ocfs2_journal_access(handle, inode, di_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (ret) {
mlog_errno(ret);
goto out_commit;
}
if (has_data) {
u32 bit_off, num;
unsigned int page_end;
u64 phys;
ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off,
&num);
if (ret) {
mlog_errno(ret);
goto out_commit;
}
/*
* Save two copies, one for insert, and one that can
* be changed by ocfs2_map_and_dirty_page() below.
*/
block = phys = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
/*
* Non sparse file systems zero on extend, so no need
* to do that now.
*/
if (!ocfs2_sparse_alloc(osb) &&
PAGE_CACHE_SIZE < osb->s_clustersize)
end = PAGE_CACHE_SIZE;
ret = ocfs2_grab_eof_pages(inode, 0, end, pages, &num_pages);
if (ret) {
mlog_errno(ret);
goto out_commit;
}
/*
* This should populate the 1st page for us and mark
* it up to date.
*/
ret = ocfs2_read_inline_data(inode, pages[0], di_bh);
if (ret) {
mlog_errno(ret);
goto out_commit;
}
page_end = PAGE_CACHE_SIZE;
if (PAGE_CACHE_SIZE > osb->s_clustersize)
page_end = osb->s_clustersize;
for (i = 0; i < num_pages; i++)
ocfs2_map_and_dirty_page(inode, handle, 0, page_end,
pages[i], i > 0, &phys);
}
spin_lock(&oi->ip_lock);
oi->ip_dyn_features &= ~OCFS2_INLINE_DATA_FL;
di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
spin_unlock(&oi->ip_lock);
ocfs2_dinode_new_extent_list(inode, di);
ocfs2_journal_dirty(handle, di_bh);
if (has_data) {
/*
* An error at this point should be extremely rare. If
* this proves to be false, we could always re-build
* the in-inode data from our pages.
*/
ret = ocfs2_insert_extent(osb, handle, inode, di_bh,
0, block, 1, 0, NULL);
if (ret) {
mlog_errno(ret);
goto out_commit;
}
inode->i_blocks = ocfs2_inode_sector_count(inode);
}
out_commit:
ocfs2_commit_trans(osb, handle);
out_unlock:
if (data_ac)
ocfs2_free_alloc_context(data_ac);
out:
if (pages) {
ocfs2_unlock_and_free_pages(pages, num_pages);
kfree(pages);
}
return ret;
}
/*
* It is expected, that by the time you call this function,
* inode->i_size and fe->i_size have been adjusted.
@@ -6090,6 +6225,81 @@ bail:
return status;
}
/*
* 'start' is inclusive, 'end' is not.
*/
int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
unsigned int start, unsigned int end, int trunc)
{
int ret;
unsigned int numbytes;
handle_t *handle;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
struct ocfs2_inline_data *idata = &di->id2.i_data;
if (end > i_size_read(inode))
end = i_size_read(inode);
BUG_ON(start >= end);
if (!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) ||
!(le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL) ||
!ocfs2_supports_inline_data(osb)) {
ocfs2_error(inode->i_sb,
"Inline data flags for inode %llu don't agree! "
"Disk: 0x%x, Memory: 0x%x, Superblock: 0x%x\n",
(unsigned long long)OCFS2_I(inode)->ip_blkno,
le16_to_cpu(di->i_dyn_features),
OCFS2_I(inode)->ip_dyn_features,
osb->s_feature_incompat);
ret = -EROFS;
goto out;
}
handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
mlog_errno(ret);
goto out;
}
ret = ocfs2_journal_access(handle, inode, di_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (ret) {
mlog_errno(ret);
goto out_commit;
}
numbytes = end - start;
memset(idata->id_data + start, 0, numbytes);
/*
* No need to worry about the data page here - it's been
* truncated already and inline data doesn't need it for
* pushing zero's to disk, so we'll let readpage pick it up
* later.
*/
if (trunc) {
i_size_write(inode, start);
di->i_size = cpu_to_le64(start);
}
inode->i_blocks = ocfs2_inode_sector_count(inode);
inode->i_ctime = inode->i_mtime = CURRENT_TIME;
di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec);
di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
ocfs2_journal_dirty(handle, di_bh);
out_commit:
ocfs2_commit_trans(osb, handle);
out:
return ret;
}
static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc)
{
/*

View File

@@ -62,6 +62,11 @@ static inline int ocfs2_extend_meta_needed(struct ocfs2_dinode *fe)
return le16_to_cpu(fe->id2.i_list.l_tree_depth) + 2;
}
void ocfs2_dinode_new_extent_list(struct inode *inode, struct ocfs2_dinode *di);
void ocfs2_set_inode_data_inline(struct inode *inode, struct ocfs2_dinode *di);
int ocfs2_convert_inline_data_to_extents(struct inode *inode,
struct buffer_head *di_bh);
int ocfs2_truncate_log_init(struct ocfs2_super *osb);
void ocfs2_truncate_log_shutdown(struct ocfs2_super *osb);
void ocfs2_schedule_truncate_log_flush(struct ocfs2_super *osb,
@@ -115,6 +120,8 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb,
struct inode *inode,
struct buffer_head *fe_bh,
struct ocfs2_truncate_context *tc);
int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
unsigned int start, unsigned int end, int trunc);
int ocfs2_find_leaf(struct inode *inode, struct ocfs2_extent_list *root_el,
u32 cpos, struct buffer_head **leaf_bh);

View File

@@ -206,9 +206,70 @@ bail:
return err;
}
int ocfs2_read_inline_data(struct inode *inode, struct page *page,
struct buffer_head *di_bh)
{
void *kaddr;
unsigned int size;
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
if (!(le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL)) {
ocfs2_error(inode->i_sb, "Inode %llu lost inline data flag",
(unsigned long long)OCFS2_I(inode)->ip_blkno);
return -EROFS;
}
size = i_size_read(inode);
if (size > PAGE_CACHE_SIZE ||
size > ocfs2_max_inline_data(inode->i_sb)) {
ocfs2_error(inode->i_sb,
"Inode %llu has with inline data has bad size: %u",
(unsigned long long)OCFS2_I(inode)->ip_blkno, size);
return -EROFS;
}
kaddr = kmap_atomic(page, KM_USER0);
if (size)
memcpy(kaddr, di->id2.i_data.id_data, size);
/* Clear the remaining part of the page */
memset(kaddr + size, 0, PAGE_CACHE_SIZE - size);
flush_dcache_page(page);
kunmap_atomic(kaddr, KM_USER0);
SetPageUptodate(page);
return 0;
}
static int ocfs2_readpage_inline(struct inode *inode, struct page *page)
{
int ret;
struct buffer_head *di_bh = NULL;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
BUG_ON(!PageLocked(page));
BUG_ON(!OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL);
ret = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &di_bh,
OCFS2_BH_CACHED, inode);
if (ret) {
mlog_errno(ret);
goto out;
}
ret = ocfs2_read_inline_data(inode, page, di_bh);
out:
unlock_page(page);
brelse(di_bh);
return ret;
}
static int ocfs2_readpage(struct file *file, struct page *page)
{
struct inode *inode = page->mapping->host;
struct ocfs2_inode_info *oi = OCFS2_I(inode);
loff_t start = (loff_t)page->index << PAGE_CACHE_SHIFT;
int ret, unlock = 1;
@@ -222,7 +283,7 @@ static int ocfs2_readpage(struct file *file, struct page *page)
goto out;
}
if (down_read_trylock(&OCFS2_I(inode)->ip_alloc_sem) == 0) {
if (down_read_trylock(&oi->ip_alloc_sem) == 0) {
ret = AOP_TRUNCATED_PAGE;
goto out_meta_unlock;
}
@@ -252,7 +313,10 @@ static int ocfs2_readpage(struct file *file, struct page *page)
goto out_alloc;
}
ret = block_read_full_page(page, ocfs2_get_block);
if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL)
ret = ocfs2_readpage_inline(inode, page);
else
ret = block_read_full_page(page, ocfs2_get_block);
unlock = 0;
ocfs2_data_unlock(inode, 0);
@@ -301,12 +365,8 @@ int ocfs2_prepare_write_nolock(struct inode *inode, struct page *page,
{
int ret;
down_read(&OCFS2_I(inode)->ip_alloc_sem);
ret = block_prepare_write(page, from, to, ocfs2_get_block);
up_read(&OCFS2_I(inode)->ip_alloc_sem);
return ret;
}
@@ -401,7 +461,9 @@ static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block)
down_read(&OCFS2_I(inode)->ip_alloc_sem);
}
err = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL, NULL);
if (!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL))
err = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL,
NULL);
if (!INODE_JOURNAL(inode)) {
up_read(&OCFS2_I(inode)->ip_alloc_sem);
@@ -415,7 +477,6 @@ static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block)
goto bail;
}
bail:
status = err ? 0 : p_blkno;
@@ -570,6 +631,13 @@ static ssize_t ocfs2_direct_IO(int rw,
mlog_entry_void();
/*
* Fallback to buffered I/O if we see an inode without
* extents.
*/
if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
return 0;
if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) {
/*
* We get PR data locks even for O_DIRECT. This
@@ -834,18 +902,22 @@ struct ocfs2_write_ctxt {
struct ocfs2_cached_dealloc_ctxt w_dealloc;
};
static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc)
void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages)
{
int i;
for(i = 0; i < wc->w_num_pages; i++) {
if (wc->w_pages[i] == NULL)
continue;
unlock_page(wc->w_pages[i]);
mark_page_accessed(wc->w_pages[i]);
page_cache_release(wc->w_pages[i]);
for(i = 0; i < num_pages; i++) {
if (pages[i]) {
unlock_page(pages[i]);
mark_page_accessed(pages[i]);
page_cache_release(pages[i]);
}
}
}
static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc)
{
ocfs2_unlock_and_free_pages(wc->w_pages, wc->w_num_pages);
brelse(wc->w_di_bh);
kfree(wc);
@@ -1360,6 +1432,160 @@ out:
return ret;
}
static int ocfs2_write_begin_inline(struct address_space *mapping,
struct inode *inode,
struct ocfs2_write_ctxt *wc)
{
int ret;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct page *page;
handle_t *handle;
struct ocfs2_dinode *di = (struct ocfs2_dinode *)wc->w_di_bh->b_data;
page = find_or_create_page(mapping, 0, GFP_NOFS);
if (!page) {
ret = -ENOMEM;
mlog_errno(ret);
goto out;
}
/*
* If we don't set w_num_pages then this page won't get unlocked
* and freed on cleanup of the write context.
*/
wc->w_pages[0] = wc->w_target_page = page;
wc->w_num_pages = 1;
handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
mlog_errno(ret);
goto out;
}
ret = ocfs2_journal_access(handle, inode, wc->w_di_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (ret) {
ocfs2_commit_trans(osb, handle);
mlog_errno(ret);
goto out;
}
if (!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL))
ocfs2_set_inode_data_inline(inode, di);
if (!PageUptodate(page)) {
ret = ocfs2_read_inline_data(inode, page, wc->w_di_bh);
if (ret) {
ocfs2_commit_trans(osb, handle);
goto out;
}
}
wc->w_handle = handle;
out:
return ret;
}
int ocfs2_size_fits_inline_data(struct buffer_head *di_bh, u64 new_size)
{
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
if (new_size < le16_to_cpu(di->id2.i_data.id_count))
return 1;
return 0;
}
static int ocfs2_try_to_write_inline_data(struct address_space *mapping,
struct inode *inode, loff_t pos,
unsigned len, struct page *mmap_page,
struct ocfs2_write_ctxt *wc)
{
int ret, written = 0;
loff_t end = pos + len;
struct ocfs2_inode_info *oi = OCFS2_I(inode);
mlog(0, "Inode %llu, write of %u bytes at off %llu. features: 0x%x\n",
(unsigned long long)oi->ip_blkno, len, (unsigned long long)pos,
oi->ip_dyn_features);
/*
* Handle inodes which already have inline data 1st.
*/
if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
if (mmap_page == NULL &&
ocfs2_size_fits_inline_data(wc->w_di_bh, end))
goto do_inline_write;
/*
* The write won't fit - we have to give this inode an
* inline extent list now.
*/
ret = ocfs2_convert_inline_data_to_extents(inode, wc->w_di_bh);
if (ret)
mlog_errno(ret);
goto out;
}
/*
* Check whether the inode can accept inline data.
*/
if (oi->ip_clusters != 0 || i_size_read(inode) != 0)
return 0;
/*
* Check whether the write can fit.
*/
if (mmap_page || end > ocfs2_max_inline_data(inode->i_sb))
return 0;
do_inline_write:
ret = ocfs2_write_begin_inline(mapping, inode, wc);
if (ret) {
mlog_errno(ret);
goto out;
}
/*
* This signals to the caller that the data can be written
* inline.
*/
written = 1;
out:
return written ? written : ret;
}
/*
* This function only does anything for file systems which can't
* handle sparse files.
*
* What we want to do here is fill in any hole between the current end
* of allocation and the end of our write. That way the rest of the
* write path can treat it as an non-allocating write, which has no
* special case code for sparse/nonsparse files.
*/
static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos,
unsigned len,
struct ocfs2_write_ctxt *wc)
{
int ret;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
loff_t newsize = pos + len;
if (ocfs2_sparse_alloc(osb))
return 0;
if (newsize <= i_size_read(inode))
return 0;
ret = ocfs2_extend_no_holes(inode, newsize, newsize - len);
if (ret)
mlog_errno(ret);
return ret;
}
int ocfs2_write_begin_nolock(struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata,
@@ -1381,6 +1607,25 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
return ret;
}
if (ocfs2_supports_inline_data(osb)) {
ret = ocfs2_try_to_write_inline_data(mapping, inode, pos, len,
mmap_page, wc);
if (ret == 1) {
ret = 0;
goto success;
}
if (ret < 0) {
mlog_errno(ret);
goto out;
}
}
ret = ocfs2_expand_nonsparse_inode(inode, pos, len, wc);
if (ret) {
mlog_errno(ret);
goto out;
}
ret = ocfs2_populate_write_desc(inode, wc, &clusters_to_alloc,
&extents_to_split);
if (ret) {
@@ -1462,6 +1707,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
if (meta_ac)
ocfs2_free_alloc_context(meta_ac);
success:
*pagep = wc->w_target_page;
*fsdata = wc;
return 0;
@@ -1529,6 +1775,31 @@ out_fail:
return ret;
}
static void ocfs2_write_end_inline(struct inode *inode, loff_t pos,
unsigned len, unsigned *copied,
struct ocfs2_dinode *di,
struct ocfs2_write_ctxt *wc)
{
void *kaddr;
if (unlikely(*copied < len)) {
if (!PageUptodate(wc->w_target_page)) {
*copied = 0;
return;
}
}
kaddr = kmap_atomic(wc->w_target_page, KM_USER0);
memcpy(di->id2.i_data.id_data + pos, kaddr + pos, *copied);
kunmap_atomic(kaddr, KM_USER0);
mlog(0, "Data written to inode at offset %llu. "
"id_count = %u, copied = %u, i_dyn_features = 0x%x\n",
(unsigned long long)pos, *copied,
le16_to_cpu(di->id2.i_data.id_count),
le16_to_cpu(di->i_dyn_features));
}
int ocfs2_write_end_nolock(struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
@@ -1542,6 +1813,11 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
handle_t *handle = wc->w_handle;
struct page *tmppage;
if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
ocfs2_write_end_inline(inode, pos, len, &copied, di, wc);
goto out_write_size;
}
if (unlikely(copied < len)) {
if (!PageUptodate(wc->w_target_page))
copied = 0;
@@ -1579,6 +1855,7 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
block_commit_write(tmppage, from, to);
}
out_write_size:
pos += copied;
if (pos > inode->i_size) {
i_size_write(inode, pos);

View File

@@ -34,6 +34,8 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
struct inode *inode, unsigned int from,
unsigned int to, int new);
void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages);
int walk_page_buffers( handle_t *handle,
struct buffer_head *head,
unsigned from,
@@ -59,6 +61,10 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
struct page **pagep, void **fsdata,
struct buffer_head *di_bh, struct page *mmap_page);
int ocfs2_read_inline_data(struct inode *inode, struct page *page,
struct buffer_head *di_bh);
int ocfs2_size_fits_inline_data(struct buffer_head *di_bh, u64 new_size);
/* all ocfs2_dio_end_io()'s fault */
#define ocfs2_iocb_is_rw_locked(iocb) \
test_bit(0, (unsigned long *)&iocb->private)

View File

@@ -216,8 +216,7 @@ static void o2hb_wait_on_io(struct o2hb_region *reg,
wait_for_completion(&wc->wc_io_complete);
}
static int o2hb_bio_end_io(struct bio *bio,
unsigned int bytes_done,
static void o2hb_bio_end_io(struct bio *bio,
int error)
{
struct o2hb_bio_wait_ctxt *wc = bio->bi_private;
@@ -227,12 +226,8 @@ static int o2hb_bio_end_io(struct bio *bio,
wc->wc_error = error;
}
if (bio->bi_size)
return 1;
o2hb_bio_wait_dec(wc, 1);
bio_put(bio);
return 0;
}
/* Setup a Bio to cover I/O against num_slots slots starting at

View File

@@ -143,7 +143,7 @@ static struct kobj_type mlog_ktype = {
};
static struct kset mlog_kset = {
.kobj = {.name = "logmask", .ktype = &mlog_ktype},
.kobj = {.ktype = &mlog_ktype},
};
int mlog_sys_init(struct kset *o2cb_subsys)
@@ -156,6 +156,7 @@ int mlog_sys_init(struct kset *o2cb_subsys)
}
mlog_attr_ptrs[i] = NULL;
kobject_set_name(&mlog_kset.kobj, "logmask");
kobj_set_kset_s(&mlog_kset, *o2cb_subsys);
return kset_register(&mlog_kset);
}

File diff suppressed because it is too large Load Diff

View File

@@ -26,17 +26,49 @@
#ifndef OCFS2_DIR_H
#define OCFS2_DIR_H
struct buffer_head *ocfs2_find_entry(const char *name,
int namelen,
struct inode *dir,
struct ocfs2_dir_entry **res_dir);
int ocfs2_delete_entry(handle_t *handle,
struct inode *dir,
struct ocfs2_dir_entry *de_del,
struct buffer_head *bh);
int __ocfs2_add_entry(handle_t *handle,
struct inode *dir,
const char *name, int namelen,
struct inode *inode, u64 blkno,
struct buffer_head *parent_fe_bh,
struct buffer_head *insert_bh);
static inline int ocfs2_add_entry(handle_t *handle,
struct dentry *dentry,
struct inode *inode, u64 blkno,
struct buffer_head *parent_fe_bh,
struct buffer_head *insert_bh)
{
return __ocfs2_add_entry(handle, dentry->d_parent->d_inode,
dentry->d_name.name, dentry->d_name.len,
inode, blkno, parent_fe_bh, insert_bh);
}
int ocfs2_update_entry(struct inode *dir, handle_t *handle,
struct buffer_head *de_bh, struct ocfs2_dir_entry *de,
struct inode *new_entry_inode);
int ocfs2_check_dir_for_entry(struct inode *dir,
const char *name,
int namelen);
int ocfs2_empty_dir(struct inode *inode); /* FIXME: to namei.c */
int ocfs2_empty_dir(struct inode *inode);
int ocfs2_find_files_on_disk(const char *name,
int namelen,
u64 *blkno,
struct inode *inode,
struct buffer_head **dirent_bh,
struct ocfs2_dir_entry **dirent);
int ocfs2_lookup_ino_from_name(struct inode *dir, const char *name,
int namelen, u64 *blkno);
int ocfs2_readdir(struct file *filp, void *dirent, filldir_t filldir);
int ocfs2_dir_foreach(struct inode *inode, loff_t *f_pos, void *priv,
filldir_t filldir);
int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb,
struct inode *dir,
struct buffer_head *parent_fe_bh,
@@ -44,11 +76,11 @@ int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb,
int namelen,
struct buffer_head **ret_de_bh);
struct ocfs2_alloc_context;
int ocfs2_do_extend_dir(struct super_block *sb,
handle_t *handle,
struct inode *dir,
struct buffer_head *parent_fe_bh,
struct ocfs2_alloc_context *data_ac,
struct ocfs2_alloc_context *meta_ac,
struct buffer_head **new_bh);
int ocfs2_fill_new_dir(struct ocfs2_super *osb,
handle_t *handle,
struct inode *parent,
struct inode *inode,
struct buffer_head *fe_bh,
struct ocfs2_alloc_context *data_ac);
#endif /* OCFS2_DIR_H */

View File

@@ -1482,6 +1482,7 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
lvb->lvb_imtime_packed =
cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime));
lvb->lvb_iattr = cpu_to_be32(oi->ip_attr);
lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features);
lvb->lvb_igeneration = cpu_to_be32(inode->i_generation);
out:
@@ -1515,6 +1516,7 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
i_size_write(inode, be64_to_cpu(lvb->lvb_isize));
oi->ip_attr = be32_to_cpu(lvb->lvb_iattr);
oi->ip_dyn_features = be16_to_cpu(lvb->lvb_idynfeatures);
ocfs2_set_inode_flags(inode);
/* fast-symlinks are a special case */

View File

@@ -29,12 +29,12 @@
#include "dcache.h"
#define OCFS2_LVB_VERSION 4
#define OCFS2_LVB_VERSION 5
struct ocfs2_meta_lvb {
__u8 lvb_version;
__u8 lvb_reserved0;
__be16 lvb_reserved1;
__be16 lvb_idynfeatures;
__be32 lvb_iclusters;
__be32 lvb_iuid;
__be32 lvb_igid;

View File

@@ -88,8 +88,6 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
struct dentry *parent;
struct inode *inode;
struct inode *dir = child->d_inode;
struct buffer_head *dirent_bh = NULL;
struct ocfs2_dir_entry *dirent;
mlog_entry("(0x%p, '%.*s')\n", child,
child->d_name.len, child->d_name.name);
@@ -105,8 +103,7 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
goto bail;
}
status = ocfs2_find_files_on_disk("..", 2, &blkno, dir, &dirent_bh,
&dirent);
status = ocfs2_lookup_ino_from_name(dir, "..", 2, &blkno);
if (status < 0) {
parent = ERR_PTR(-ENOENT);
goto bail_unlock;
@@ -131,9 +128,6 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
bail_unlock:
ocfs2_meta_unlock(dir, 0);
if (dirent_bh)
brelse(dirent_bh);
bail:
mlog_exit_ptr(parent);

View File

@@ -387,6 +387,12 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
struct ocfs2_extent_rec *rec;
u32 coff;
if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
ret = -ERANGE;
mlog_errno(ret);
goto out;
}
ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster,
num_clusters, extent_flags);
if (ret == 0)

View File

@@ -397,6 +397,15 @@ static int ocfs2_truncate_file(struct inode *inode,
unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1);
truncate_inode_pages(inode->i_mapping, new_i_size);
if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
status = ocfs2_truncate_inline(inode, di_bh, new_i_size,
i_size_read(inode), 0);
if (status)
mlog_errno(status);
goto bail_unlock_data;
}
/* alright, we're going to need to do a full blown alloc size
* change. Orphan the inode so that recovery can complete the
* truncate if necessary. This does the task of marking
@@ -779,25 +788,6 @@ leave:
return status;
}
static int ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
u32 clusters_to_add, int mark_unwritten)
{
int ret;
/*
* The alloc sem blocks peope in read/write from reading our
* allocation until we're done changing it. We depend on
* i_mutex to block other extend/truncate calls while we're
* here.
*/
down_write(&OCFS2_I(inode)->ip_alloc_sem);
ret = __ocfs2_extend_allocation(inode, logical_start, clusters_to_add,
mark_unwritten);
up_write(&OCFS2_I(inode)->ip_alloc_sem);
return ret;
}
/* Some parts of this taken from generic_cont_expand, which turned out
* to be too fragile to do exactly what we need without us having to
* worry about recursive locking in ->prepare_write() and
@@ -889,25 +879,48 @@ out:
return ret;
}
/*
* A tail_to_skip value > 0 indicates that we're being called from
* ocfs2_file_aio_write(). This has the following implications:
*
* - we don't want to update i_size
* - di_bh will be NULL, which is fine because it's only used in the
* case where we want to update i_size.
* - ocfs2_zero_extend() will then only be filling the hole created
* between i_size and the start of the write.
*/
int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, u64 zero_to)
{
int ret;
u32 clusters_to_add;
struct ocfs2_inode_info *oi = OCFS2_I(inode);
clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size);
if (clusters_to_add < oi->ip_clusters)
clusters_to_add = 0;
else
clusters_to_add -= oi->ip_clusters;
if (clusters_to_add) {
ret = __ocfs2_extend_allocation(inode, oi->ip_clusters,
clusters_to_add, 0);
if (ret) {
mlog_errno(ret);
goto out;
}
}
/*
* Call this even if we don't add any clusters to the tree. We
* still need to zero the area between the old i_size and the
* new i_size.
*/
ret = ocfs2_zero_extend(inode, zero_to);
if (ret < 0)
mlog_errno(ret);
out:
return ret;
}
static int ocfs2_extend_file(struct inode *inode,
struct buffer_head *di_bh,
u64 new_i_size,
size_t tail_to_skip)
u64 new_i_size)
{
int ret = 0;
u32 clusters_to_add = 0;
int ret = 0, data_locked = 0;
struct ocfs2_inode_info *oi = OCFS2_I(inode);
BUG_ON(!tail_to_skip && !di_bh);
BUG_ON(!di_bh);
/* setattr sometimes calls us like this. */
if (new_i_size == 0)
@@ -917,13 +930,18 @@ static int ocfs2_extend_file(struct inode *inode,
goto out;
BUG_ON(new_i_size < i_size_read(inode));
if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) {
BUG_ON(tail_to_skip != 0);
/*
* Fall through for converting inline data, even if the fs
* supports sparse files.
*
* The check for inline data here is legal - nobody can add
* the feature since we have i_mutex. We must check it again
* after acquiring ip_alloc_sem though, as paths like mmap
* might have raced us to converting the inode to extents.
*/
if (!(oi->ip_dyn_features & OCFS2_INLINE_DATA_FL)
&& ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
goto out_update_size;
}
clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size) -
OCFS2_I(inode)->ip_clusters;
/*
* protect the pages that ocfs2_zero_extend is going to be
@@ -937,39 +955,52 @@ static int ocfs2_extend_file(struct inode *inode,
mlog_errno(ret);
goto out;
}
data_locked = 1;
/*
* The alloc sem blocks people in read/write from reading our
* allocation until we're done changing it. We depend on
* i_mutex to block other extend/truncate calls while we're
* here.
*/
down_write(&oi->ip_alloc_sem);
if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
/*
* We can optimize small extends by keeping the inodes
* inline data.
*/
if (ocfs2_size_fits_inline_data(di_bh, new_i_size)) {
up_write(&oi->ip_alloc_sem);
goto out_update_size;
}
ret = ocfs2_convert_inline_data_to_extents(inode, di_bh);
if (ret) {
up_write(&oi->ip_alloc_sem);
if (clusters_to_add) {
ret = ocfs2_extend_allocation(inode,
OCFS2_I(inode)->ip_clusters,
clusters_to_add, 0);
if (ret < 0) {
mlog_errno(ret);
goto out_unlock;
}
}
/*
* Call this even if we don't add any clusters to the tree. We
* still need to zero the area between the old i_size and the
* new i_size.
*/
ret = ocfs2_zero_extend(inode, (u64)new_i_size - tail_to_skip);
if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
ret = ocfs2_extend_no_holes(inode, new_i_size, new_i_size);
up_write(&oi->ip_alloc_sem);
if (ret < 0) {
mlog_errno(ret);
goto out_unlock;
}
out_update_size:
if (!tail_to_skip) {
/* We're being called from ocfs2_setattr() which wants
* us to update i_size */
ret = ocfs2_simple_size_update(inode, di_bh, new_i_size);
if (ret < 0)
mlog_errno(ret);
}
ret = ocfs2_simple_size_update(inode, di_bh, new_i_size);
if (ret < 0)
mlog_errno(ret);
out_unlock:
if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
if (data_locked)
ocfs2_data_unlock(inode, 1);
out:
@@ -1035,7 +1066,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
if (i_size_read(inode) > attr->ia_size)
status = ocfs2_truncate_file(inode, bh, attr->ia_size);
else
status = ocfs2_extend_file(inode, bh, attr->ia_size, 0);
status = ocfs2_extend_file(inode, bh, attr->ia_size);
if (status < 0) {
if (status != -ENOSPC)
mlog_errno(status);
@@ -1243,6 +1274,31 @@ static int ocfs2_allocate_unwritten_extents(struct inode *inode,
{
int ret;
u32 cpos, phys_cpos, clusters, alloc_size;
u64 end = start + len;
struct buffer_head *di_bh = NULL;
if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
OCFS2_I(inode)->ip_blkno, &di_bh,
OCFS2_BH_CACHED, inode);
if (ret) {
mlog_errno(ret);
goto out;
}
/*
* Nothing to do if the requested reservation range
* fits within the inode.
*/
if (ocfs2_size_fits_inline_data(di_bh, end))
goto out;
ret = ocfs2_convert_inline_data_to_extents(inode, di_bh);
if (ret) {
mlog_errno(ret);
goto out;
}
}
/*
* We consider both start and len to be inclusive.
@@ -1288,6 +1344,8 @@ next:
ret = 0;
out:
brelse(di_bh);
return ret;
}
@@ -1469,6 +1527,14 @@ static int ocfs2_remove_inode_range(struct inode *inode,
if (byte_len == 0)
return 0;
if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
ret = ocfs2_truncate_inline(inode, di_bh, byte_start,
byte_start + byte_len, 1);
if (ret)
mlog_errno(ret);
return ret;
}
trunc_start = ocfs2_clusters_for_bytes(osb->sb, byte_start);
trunc_len = (byte_start + byte_len) >> osb->s_clustersize_bits;
if (trunc_len >= trunc_start)
@@ -1713,15 +1779,13 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
int appending,
int *direct_io)
{
int ret = 0, meta_level = appending;
int ret = 0, meta_level = 0;
struct inode *inode = dentry->d_inode;
u32 clusters;
loff_t newsize, saved_pos;
loff_t saved_pos, end;
/*
* We sample i_size under a read level meta lock to see if our write
* is extending the file, if it is we back off and get a write level
* meta lock.
* We start with a read level meta lock and only jump to an ex
* if we need to make modifications here.
*/
for(;;) {
ret = ocfs2_meta_lock(inode, NULL, meta_level);
@@ -1763,87 +1827,47 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
saved_pos = *ppos;
}
if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) {
loff_t end = saved_pos + count;
end = saved_pos + count;
/*
* Skip the O_DIRECT checks if we don't need
* them.
*/
if (!direct_io || !(*direct_io))
break;
/*
* Skip the O_DIRECT checks if we don't need
* them.
*/
if (!direct_io || !(*direct_io))
break;
/*
* Allowing concurrent direct writes means
* i_size changes wouldn't be synchronized, so
* one node could wind up truncating another
* nodes writes.
*/
if (end > i_size_read(inode)) {
*direct_io = 0;
break;
}
/*
* We don't fill holes during direct io, so
* check for them here. If any are found, the
* caller will have to retake some cluster
* locks and initiate the io as buffered.
*/
ret = ocfs2_check_range_for_holes(inode, saved_pos,
count);
if (ret == 1) {
*direct_io = 0;
ret = 0;
} else if (ret < 0)
mlog_errno(ret);
/*
* There's no sane way to do direct writes to an inode
* with inline data.
*/
if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
*direct_io = 0;
break;
}
/*
* The rest of this loop is concerned with legacy file
* systems which don't support sparse files.
* Allowing concurrent direct writes means
* i_size changes wouldn't be synchronized, so
* one node could wind up truncating another
* nodes writes.
*/
newsize = count + saved_pos;
mlog(0, "pos=%lld newsize=%lld cursize=%lld\n",
(long long) saved_pos, (long long) newsize,
(long long) i_size_read(inode));
/* No need for a higher level metadata lock if we're
* never going past i_size. */
if (newsize <= i_size_read(inode))
if (end > i_size_read(inode)) {
*direct_io = 0;
break;
if (meta_level == 0) {
ocfs2_meta_unlock(inode, meta_level);
meta_level = 1;
continue;
}
spin_lock(&OCFS2_I(inode)->ip_lock);
clusters = ocfs2_clusters_for_bytes(inode->i_sb, newsize) -
OCFS2_I(inode)->ip_clusters;
spin_unlock(&OCFS2_I(inode)->ip_lock);
mlog(0, "Writing at EOF, may need more allocation: "
"i_size = %lld, newsize = %lld, need %u clusters\n",
(long long) i_size_read(inode), (long long) newsize,
clusters);
/* We only want to continue the rest of this loop if
* our extend will actually require more
* allocation. */
if (!clusters)
break;
ret = ocfs2_extend_file(inode, NULL, newsize, count);
if (ret < 0) {
if (ret != -ENOSPC)
mlog_errno(ret);
goto out_unlock;
}
/*
* We don't fill holes during direct io, so
* check for them here. If any are found, the
* caller will have to retake some cluster
* locks and initiate the io as buffered.
*/
ret = ocfs2_check_range_for_holes(inode, saved_pos, count);
if (ret == 1) {
*direct_io = 0;
ret = 0;
} else if (ret < 0)
mlog_errno(ret);
break;
}

View File

@@ -47,6 +47,8 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
struct ocfs2_alloc_context *data_ac,
struct ocfs2_alloc_context *meta_ac,
enum ocfs2_alloc_restarted *reason_ret);
int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size,
u64 zero_to);
int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di,
u32 clusters_to_add, u32 extents_to_split,
struct ocfs2_alloc_context **data_ac,

View File

@@ -241,6 +241,7 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr);
OCFS2_I(inode)->ip_dyn_features = le16_to_cpu(fe->i_dyn_features);
inode->i_version = 1;
inode->i_generation = le32_to_cpu(fe->i_generation);
@@ -513,6 +514,10 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
fe = (struct ocfs2_dinode *) fe_bh->b_data;
/*
* This check will also skip truncate of inodes with inline
* data and fast symlinks.
*/
if (fe->i_clusters) {
handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
if (IS_ERR(handle)) {
@@ -1220,6 +1225,7 @@ int ocfs2_mark_inode_dirty(handle_t *handle,
fe->i_clusters = cpu_to_le32(OCFS2_I(inode)->ip_clusters);
ocfs2_get_inode_flags(OCFS2_I(inode));
fe->i_attr = cpu_to_le32(OCFS2_I(inode)->ip_attr);
fe->i_dyn_features = cpu_to_le16(OCFS2_I(inode)->ip_dyn_features);
spin_unlock(&OCFS2_I(inode)->ip_lock);
fe->i_size = cpu_to_le64(i_size_read(inode));
@@ -1257,6 +1263,7 @@ void ocfs2_refresh_inode(struct inode *inode,
OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr);
OCFS2_I(inode)->ip_dyn_features = le16_to_cpu(fe->i_dyn_features);
ocfs2_set_inode_flags(inode);
i_size_write(inode, le64_to_cpu(fe->i_size));
inode->i_nlink = le16_to_cpu(fe->i_links_count);

View File

@@ -51,6 +51,7 @@ struct ocfs2_inode_info
u32 ip_flags; /* see below */
u32 ip_attr; /* inode attributes */
u16 ip_dyn_features;
/* protected by recovery_lock. */
struct inode *ip_next_orphan;

View File

@@ -35,13 +35,13 @@
#include "ocfs2.h"
#include "alloc.h"
#include "dir.h"
#include "dlmglue.h"
#include "extent_map.h"
#include "heartbeat.h"
#include "inode.h"
#include "journal.h"
#include "localalloc.h"
#include "namei.h"
#include "slot_map.h"
#include "super.h"
#include "vote.h"
@@ -1213,17 +1213,49 @@ bail:
return status;
}
struct ocfs2_orphan_filldir_priv {
struct inode *head;
struct ocfs2_super *osb;
};
static int ocfs2_orphan_filldir(void *priv, const char *name, int name_len,
loff_t pos, u64 ino, unsigned type)
{
struct ocfs2_orphan_filldir_priv *p = priv;
struct inode *iter;
if (name_len == 1 && !strncmp(".", name, 1))
return 0;
if (name_len == 2 && !strncmp("..", name, 2))
return 0;
/* Skip bad inodes so that recovery can continue */
iter = ocfs2_iget(p->osb, ino,
OCFS2_FI_FLAG_ORPHAN_RECOVERY);
if (IS_ERR(iter))
return 0;
mlog(0, "queue orphan %llu\n",
(unsigned long long)OCFS2_I(iter)->ip_blkno);
/* No locking is required for the next_orphan queue as there
* is only ever a single process doing orphan recovery. */
OCFS2_I(iter)->ip_next_orphan = p->head;
p->head = iter;
return 0;
}
static int ocfs2_queue_orphans(struct ocfs2_super *osb,
int slot,
struct inode **head)
{
int status;
struct inode *orphan_dir_inode = NULL;
struct inode *iter;
unsigned long offset, blk, local;
struct buffer_head *bh = NULL;
struct ocfs2_dir_entry *de;
struct super_block *sb = osb->sb;
struct ocfs2_orphan_filldir_priv priv;
loff_t pos = 0;
priv.osb = osb;
priv.head = *head;
orphan_dir_inode = ocfs2_get_system_file_inode(osb,
ORPHAN_DIR_SYSTEM_INODE,
@@ -1241,77 +1273,15 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb,
goto out;
}
offset = 0;
iter = NULL;
while(offset < i_size_read(orphan_dir_inode)) {
blk = offset >> sb->s_blocksize_bits;
bh = ocfs2_bread(orphan_dir_inode, blk, &status, 0);
if (!bh)
status = -EINVAL;
if (status < 0) {
if (bh)
brelse(bh);
mlog_errno(status);
goto out_unlock;
}
local = 0;
while(offset < i_size_read(orphan_dir_inode)
&& local < sb->s_blocksize) {
de = (struct ocfs2_dir_entry *) (bh->b_data + local);
if (!ocfs2_check_dir_entry(orphan_dir_inode,
de, bh, local)) {
status = -EINVAL;
mlog_errno(status);
brelse(bh);
goto out_unlock;
}
local += le16_to_cpu(de->rec_len);
offset += le16_to_cpu(de->rec_len);
/* I guess we silently fail on no inode? */
if (!le64_to_cpu(de->inode))
continue;
if (de->file_type > OCFS2_FT_MAX) {
mlog(ML_ERROR,
"block %llu contains invalid de: "
"inode = %llu, rec_len = %u, "
"name_len = %u, file_type = %u, "
"name='%.*s'\n",
(unsigned long long)bh->b_blocknr,
(unsigned long long)le64_to_cpu(de->inode),
le16_to_cpu(de->rec_len),
de->name_len,
de->file_type,
de->name_len,
de->name);
continue;
}
if (de->name_len == 1 && !strncmp(".", de->name, 1))
continue;
if (de->name_len == 2 && !strncmp("..", de->name, 2))
continue;
iter = ocfs2_iget(osb, le64_to_cpu(de->inode),
OCFS2_FI_FLAG_ORPHAN_RECOVERY);
if (IS_ERR(iter))
continue;
mlog(0, "queue orphan %llu\n",
(unsigned long long)OCFS2_I(iter)->ip_blkno);
/* No locking is required for the next_orphan
* queue as there is only ever a single
* process doing orphan recovery. */
OCFS2_I(iter)->ip_next_orphan = *head;
*head = iter;
}
brelse(bh);
status = ocfs2_dir_foreach(orphan_dir_inode, &pos, &priv,
ocfs2_orphan_filldir);
if (status) {
mlog_errno(status);
goto out;
}
out_unlock:
*head = priv.head;
ocfs2_meta_unlock(orphan_dir_inode, 0);
out:
mutex_unlock(&orphan_dir_inode->i_mutex);

View File

@@ -282,6 +282,9 @@ int ocfs2_journal_dirty_data(handle_t *handle,
* prev. group desc. if we relink. */
#define OCFS2_SUBALLOC_ALLOC (3)
#define OCFS2_INLINE_TO_EXTENTS_CREDITS (OCFS2_SUBALLOC_ALLOC \
+ OCFS2_INODE_UPDATE_CREDITS)
/* dinode + group descriptor update. We don't relink on free yet. */
#define OCFS2_SUBALLOC_FREE (2)

View File

@@ -64,29 +64,6 @@
#include "buffer_head_io.h"
#define NAMEI_RA_CHUNKS 2
#define NAMEI_RA_BLOCKS 4
#define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
#define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b))
static int inline ocfs2_search_dirblock(struct buffer_head *bh,
struct inode *dir,
const char *name, int namelen,
unsigned long offset,
struct ocfs2_dir_entry **res_dir);
static int ocfs2_delete_entry(handle_t *handle,
struct inode *dir,
struct ocfs2_dir_entry *de_del,
struct buffer_head *bh);
static int __ocfs2_add_entry(handle_t *handle,
struct inode *dir,
const char *name, int namelen,
struct inode *inode, u64 blkno,
struct buffer_head *parent_fe_bh,
struct buffer_head *insert_bh);
static int ocfs2_mknod_locked(struct ocfs2_super *osb,
struct inode *dir,
struct dentry *dentry, int mode,
@@ -97,13 +74,6 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
struct inode **ret_inode,
struct ocfs2_alloc_context *inode_ac);
static int ocfs2_fill_new_dir(struct ocfs2_super *osb,
handle_t *handle,
struct inode *parent,
struct inode *inode,
struct buffer_head *fe_bh,
struct ocfs2_alloc_context *data_ac);
static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
struct inode **ret_orphan_dir,
struct inode *inode,
@@ -123,17 +93,6 @@ static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
struct inode *inode,
const char *symname);
static inline int ocfs2_add_entry(handle_t *handle,
struct dentry *dentry,
struct inode *inode, u64 blkno,
struct buffer_head *parent_fe_bh,
struct buffer_head *insert_bh)
{
return __ocfs2_add_entry(handle, dentry->d_parent->d_inode,
dentry->d_name.name, dentry->d_name.len,
inode, blkno, parent_fe_bh, insert_bh);
}
/* An orphan dir name is an 8 byte value, printed as a hex string */
#define OCFS2_ORPHAN_NAMELEN ((int)(2 * sizeof(u64)))
@@ -142,10 +101,8 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
{
int status;
u64 blkno;
struct buffer_head *dirent_bh = NULL;
struct inode *inode = NULL;
struct dentry *ret;
struct ocfs2_dir_entry *dirent;
struct ocfs2_inode_info *oi;
mlog_entry("(0x%p, 0x%p, '%.*s')\n", dir, dentry,
@@ -167,9 +124,8 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
goto bail;
}
status = ocfs2_find_files_on_disk(dentry->d_name.name,
dentry->d_name.len, &blkno,
dir, &dirent_bh, &dirent);
status = ocfs2_lookup_ino_from_name(dir, dentry->d_name.name,
dentry->d_name.len, &blkno);
if (status < 0)
goto bail_add;
@@ -224,83 +180,12 @@ bail_unlock:
ocfs2_meta_unlock(dir, 0);
bail:
if (dirent_bh)
brelse(dirent_bh);
mlog_exit_ptr(ret);
return ret;
}
static int ocfs2_fill_new_dir(struct ocfs2_super *osb,
handle_t *handle,
struct inode *parent,
struct inode *inode,
struct buffer_head *fe_bh,
struct ocfs2_alloc_context *data_ac)
{
int status;
struct buffer_head *new_bh = NULL;
struct ocfs2_dir_entry *de = NULL;
mlog_entry_void();
status = ocfs2_do_extend_dir(osb->sb, handle, inode, fe_bh,
data_ac, NULL, &new_bh);
if (status < 0) {
mlog_errno(status);
goto bail;
}
ocfs2_set_new_buffer_uptodate(inode, new_bh);
status = ocfs2_journal_access(handle, inode, new_bh,
OCFS2_JOURNAL_ACCESS_CREATE);
if (status < 0) {
mlog_errno(status);
goto bail;
}
memset(new_bh->b_data, 0, osb->sb->s_blocksize);
de = (struct ocfs2_dir_entry *) new_bh->b_data;
de->inode = cpu_to_le64(OCFS2_I(inode)->ip_blkno);
de->name_len = 1;
de->rec_len =
cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len));
strcpy(de->name, ".");
ocfs2_set_de_type(de, S_IFDIR);
de = (struct ocfs2_dir_entry *) ((char *)de + le16_to_cpu(de->rec_len));
de->inode = cpu_to_le64(OCFS2_I(parent)->ip_blkno);
de->rec_len = cpu_to_le16(inode->i_sb->s_blocksize -
OCFS2_DIR_REC_LEN(1));
de->name_len = 2;
strcpy(de->name, "..");
ocfs2_set_de_type(de, S_IFDIR);
status = ocfs2_journal_dirty(handle, new_bh);
if (status < 0) {
mlog_errno(status);
goto bail;
}
i_size_write(inode, inode->i_sb->s_blocksize);
inode->i_nlink = 2;
inode->i_blocks = ocfs2_inode_sector_count(inode);
status = ocfs2_mark_inode_dirty(handle, inode, fe_bh);
if (status < 0) {
mlog_errno(status);
goto bail;
}
status = 0;
bail:
if (new_bh)
brelse(new_bh);
mlog_exit(status);
return status;
}
static int ocfs2_mknod(struct inode *dir,
struct dentry *dentry,
int mode,
@@ -365,9 +250,8 @@ static int ocfs2_mknod(struct inode *dir,
goto leave;
}
/* are we making a directory? If so, reserve a cluster for his
* 1st extent. */
if (S_ISDIR(mode)) {
/* Reserve a cluster if creating an extent based directory. */
if (S_ISDIR(mode) && !ocfs2_supports_inline_data(osb)) {
status = ocfs2_reserve_clusters(osb, 1, &data_ac);
if (status < 0) {
if (status != -ENOSPC)
@@ -564,10 +448,21 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
cpu_to_le32(CURRENT_TIME.tv_nsec);
fe->i_dtime = 0;
fel = &fe->id2.i_list;
fel->l_tree_depth = 0;
fel->l_next_free_rec = 0;
fel->l_count = cpu_to_le16(ocfs2_extent_recs_per_inode(osb->sb));
/*
* If supported, directories start with inline data.
*/
if (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) {
u16 feat = le16_to_cpu(fe->i_dyn_features);
fe->i_dyn_features = cpu_to_le16(feat | OCFS2_INLINE_DATA_FL);
fe->id2.i_data.id_count = cpu_to_le16(ocfs2_max_inline_data(osb->sb));
} else {
fel = &fe->id2.i_list;
fel->l_tree_depth = 0;
fel->l_next_free_rec = 0;
fel->l_count = cpu_to_le16(ocfs2_extent_recs_per_inode(osb->sb));
}
status = ocfs2_journal_dirty(handle, *new_fe_bh);
if (status < 0) {
@@ -1048,11 +943,6 @@ static void ocfs2_double_unlock(struct inode *inode1, struct inode *inode2)
ocfs2_meta_unlock(inode2, 1);
}
#define PARENT_INO(buffer) \
((struct ocfs2_dir_entry *) \
((char *)buffer + \
le16_to_cpu(((struct ocfs2_dir_entry *)buffer)->rec_len)))->inode
static int ocfs2_rename(struct inode *old_dir,
struct dentry *old_dentry,
struct inode *new_dir,
@@ -1070,12 +960,12 @@ static int ocfs2_rename(struct inode *old_dir,
struct buffer_head *old_inode_bh = NULL;
struct buffer_head *insert_entry_bh = NULL;
struct ocfs2_super *osb = NULL;
u64 newfe_blkno;
u64 newfe_blkno, old_de_ino;
handle_t *handle = NULL;
struct buffer_head *old_dir_bh = NULL;
struct buffer_head *new_dir_bh = NULL;
struct ocfs2_dir_entry *old_de = NULL, *new_de = NULL; // dirent for old_dentry
// and new_dentry
struct ocfs2_dir_entry *old_inode_dot_dot_de = NULL, *old_de = NULL,
*new_de = NULL;
struct buffer_head *new_de_bh = NULL, *old_de_bh = NULL; // bhs for above
struct buffer_head *old_inode_de_bh = NULL; // if old_dentry is a dir,
// this is the 1st dirent bh
@@ -1159,27 +1049,35 @@ static int ocfs2_rename(struct inode *old_dir,
}
if (S_ISDIR(old_inode->i_mode)) {
status = -EIO;
old_inode_de_bh = ocfs2_bread(old_inode, 0, &status, 0);
if (!old_inode_de_bh)
goto bail;
u64 old_inode_parent;
status = -EIO;
if (le64_to_cpu(PARENT_INO(old_inode_de_bh->b_data)) !=
OCFS2_I(old_dir)->ip_blkno)
status = ocfs2_find_files_on_disk("..", 2, &old_inode_parent,
old_inode, &old_inode_de_bh,
&old_inode_dot_dot_de);
if (status) {
status = -EIO;
goto bail;
status = -EMLINK;
if (!new_inode && new_dir!=old_dir &&
new_dir->i_nlink >= OCFS2_LINK_MAX)
}
if (old_inode_parent != OCFS2_I(old_dir)->ip_blkno) {
status = -EIO;
goto bail;
}
if (!new_inode && new_dir != old_dir &&
new_dir->i_nlink >= OCFS2_LINK_MAX) {
status = -EMLINK;
goto bail;
}
}
status = -ENOENT;
old_de_bh = ocfs2_find_entry(old_dentry->d_name.name,
old_dentry->d_name.len,
old_dir, &old_de);
if (!old_de_bh)
status = ocfs2_lookup_ino_from_name(old_dir, old_dentry->d_name.name,
old_dentry->d_name.len,
&old_de_ino);
if (status) {
status = -ENOENT;
goto bail;
}
/*
* Check for inode number is _not_ due to possible IO errors.
@@ -1187,8 +1085,10 @@ static int ocfs2_rename(struct inode *old_dir,
* and merrily kill the link to whatever was created under the
* same name. Goodbye sticky bit ;-<
*/
if (le64_to_cpu(old_de->inode) != OCFS2_I(old_inode)->ip_blkno)
if (old_de_ino != OCFS2_I(old_inode)->ip_blkno) {
status = -ENOENT;
goto bail;
}
/* check if the target already exists (in which case we need
* to delete it */
@@ -1321,20 +1221,13 @@ static int ocfs2_rename(struct inode *old_dir,
}
/* change the dirent to point to the correct inode */
status = ocfs2_journal_access(handle, new_dir, new_de_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
status = ocfs2_update_entry(new_dir, handle, new_de_bh,
new_de, old_inode);
if (status < 0) {
mlog_errno(status);
goto bail;
}
new_de->inode = cpu_to_le64(OCFS2_I(old_inode)->ip_blkno);
new_de->file_type = old_de->file_type;
new_dir->i_version++;
status = ocfs2_journal_dirty(handle, new_de_bh);
if (status < 0) {
mlog_errno(status);
goto bail;
}
if (S_ISDIR(new_inode->i_mode))
newfe->i_links_count = 0;
@@ -1370,7 +1263,21 @@ static int ocfs2_rename(struct inode *old_dir,
} else
mlog_errno(status);
/* now that the name has been added to new_dir, remove the old name */
/*
* Now that the name has been added to new_dir, remove the old name.
*
* We don't keep any directory entry context around until now
* because the insert might have changed the type of directory
* we're dealing with.
*/
old_de_bh = ocfs2_find_entry(old_dentry->d_name.name,
old_dentry->d_name.len,
old_dir, &old_de);
if (!old_de_bh) {
status = -EIO;
goto bail;
}
status = ocfs2_delete_entry(handle, old_dir, old_de, old_de_bh);
if (status < 0) {
mlog_errno(status);
@@ -1383,12 +1290,8 @@ static int ocfs2_rename(struct inode *old_dir,
}
old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME;
if (old_inode_de_bh) {
status = ocfs2_journal_access(handle, old_inode,
old_inode_de_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
PARENT_INO(old_inode_de_bh->b_data) =
cpu_to_le64(OCFS2_I(new_dir)->ip_blkno);
status = ocfs2_journal_dirty(handle, old_inode_de_bh);
status = ocfs2_update_entry(old_inode, handle, old_inode_de_bh,
old_inode_dot_dot_de, new_dir);
old_dir->i_nlink--;
if (new_inode) {
new_inode->i_nlink--;
@@ -1767,329 +1670,6 @@ bail:
return status;
}
int ocfs2_check_dir_entry(struct inode * dir,
struct ocfs2_dir_entry * de,
struct buffer_head * bh,
unsigned long offset)
{
const char *error_msg = NULL;
const int rlen = le16_to_cpu(de->rec_len);
if (rlen < OCFS2_DIR_REC_LEN(1))
error_msg = "rec_len is smaller than minimal";
else if (rlen % 4 != 0)
error_msg = "rec_len % 4 != 0";
else if (rlen < OCFS2_DIR_REC_LEN(de->name_len))
error_msg = "rec_len is too small for name_len";
else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize)
error_msg = "directory entry across blocks";
if (error_msg != NULL)
mlog(ML_ERROR, "bad entry in directory #%llu: %s - "
"offset=%lu, inode=%llu, rec_len=%d, name_len=%d\n",
(unsigned long long)OCFS2_I(dir)->ip_blkno, error_msg,
offset, (unsigned long long)le64_to_cpu(de->inode), rlen,
de->name_len);
return error_msg == NULL ? 1 : 0;
}
/* we don't always have a dentry for what we want to add, so people
* like orphan dir can call this instead.
*
* If you pass me insert_bh, I'll skip the search of the other dir
* blocks and put the record in there.
*/
static int __ocfs2_add_entry(handle_t *handle,
struct inode *dir,
const char *name, int namelen,
struct inode *inode, u64 blkno,
struct buffer_head *parent_fe_bh,
struct buffer_head *insert_bh)
{
unsigned long offset;
unsigned short rec_len;
struct ocfs2_dir_entry *de, *de1;
struct super_block *sb;
int retval, status;
mlog_entry_void();
sb = dir->i_sb;
if (!namelen)
return -EINVAL;
rec_len = OCFS2_DIR_REC_LEN(namelen);
offset = 0;
de = (struct ocfs2_dir_entry *) insert_bh->b_data;
while (1) {
BUG_ON((char *)de >= sb->s_blocksize + insert_bh->b_data);
/* These checks should've already been passed by the
* prepare function, but I guess we can leave them
* here anyway. */
if (!ocfs2_check_dir_entry(dir, de, insert_bh, offset)) {
retval = -ENOENT;
goto bail;
}
if (ocfs2_match(namelen, name, de)) {
retval = -EEXIST;
goto bail;
}
if (((le64_to_cpu(de->inode) == 0) &&
(le16_to_cpu(de->rec_len) >= rec_len)) ||
(le16_to_cpu(de->rec_len) >=
(OCFS2_DIR_REC_LEN(de->name_len) + rec_len))) {
dir->i_mtime = dir->i_ctime = CURRENT_TIME;
retval = ocfs2_mark_inode_dirty(handle, dir, parent_fe_bh);
if (retval < 0) {
mlog_errno(retval);
goto bail;
}
status = ocfs2_journal_access(handle, dir, insert_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
/* By now the buffer is marked for journaling */
offset += le16_to_cpu(de->rec_len);
if (le64_to_cpu(de->inode)) {
de1 = (struct ocfs2_dir_entry *)((char *) de +
OCFS2_DIR_REC_LEN(de->name_len));
de1->rec_len =
cpu_to_le16(le16_to_cpu(de->rec_len) -
OCFS2_DIR_REC_LEN(de->name_len));
de->rec_len = cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len));
de = de1;
}
de->file_type = OCFS2_FT_UNKNOWN;
if (blkno) {
de->inode = cpu_to_le64(blkno);
ocfs2_set_de_type(de, inode->i_mode);
} else
de->inode = 0;
de->name_len = namelen;
memcpy(de->name, name, namelen);
dir->i_version++;
status = ocfs2_journal_dirty(handle, insert_bh);
retval = 0;
goto bail;
}
offset += le16_to_cpu(de->rec_len);
de = (struct ocfs2_dir_entry *) ((char *) de + le16_to_cpu(de->rec_len));
}
/* when you think about it, the assert above should prevent us
* from ever getting here. */
retval = -ENOSPC;
bail:
mlog_exit(retval);
return retval;
}
/*
* ocfs2_delete_entry deletes a directory entry by merging it with the
* previous entry
*/
static int ocfs2_delete_entry(handle_t *handle,
struct inode *dir,
struct ocfs2_dir_entry *de_del,
struct buffer_head *bh)
{
struct ocfs2_dir_entry *de, *pde;
int i, status = -ENOENT;
mlog_entry("(0x%p, 0x%p, 0x%p, 0x%p)\n", handle, dir, de_del, bh);
i = 0;
pde = NULL;
de = (struct ocfs2_dir_entry *) bh->b_data;
while (i < bh->b_size) {
if (!ocfs2_check_dir_entry(dir, de, bh, i)) {
status = -EIO;
mlog_errno(status);
goto bail;
}
if (de == de_del) {
status = ocfs2_journal_access(handle, dir, bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (status < 0) {
status = -EIO;
mlog_errno(status);
goto bail;
}
if (pde)
pde->rec_len =
cpu_to_le16(le16_to_cpu(pde->rec_len) +
le16_to_cpu(de->rec_len));
else
de->inode = 0;
dir->i_version++;
status = ocfs2_journal_dirty(handle, bh);
goto bail;
}
i += le16_to_cpu(de->rec_len);
pde = de;
de = (struct ocfs2_dir_entry *)((char *)de + le16_to_cpu(de->rec_len));
}
bail:
mlog_exit(status);
return status;
}
/*
* Returns 0 if not found, -1 on failure, and 1 on success
*/
static int inline ocfs2_search_dirblock(struct buffer_head *bh,
struct inode *dir,
const char *name, int namelen,
unsigned long offset,
struct ocfs2_dir_entry **res_dir)
{
struct ocfs2_dir_entry *de;
char *dlimit, *de_buf;
int de_len;
int ret = 0;
mlog_entry_void();
de_buf = bh->b_data;
dlimit = de_buf + dir->i_sb->s_blocksize;
while (de_buf < dlimit) {
/* this code is executed quadratically often */
/* do minimal checking `by hand' */
de = (struct ocfs2_dir_entry *) de_buf;
if (de_buf + namelen <= dlimit &&
ocfs2_match(namelen, name, de)) {
/* found a match - just to be sure, do a full check */
if (!ocfs2_check_dir_entry(dir, de, bh, offset)) {
ret = -1;
goto bail;
}
*res_dir = de;
ret = 1;
goto bail;
}
/* prevent looping on a bad block */
de_len = le16_to_cpu(de->rec_len);
if (de_len <= 0) {
ret = -1;
goto bail;
}
de_buf += de_len;
offset += de_len;
}
bail:
mlog_exit(ret);
return ret;
}
struct buffer_head *ocfs2_find_entry(const char *name, int namelen,
struct inode *dir,
struct ocfs2_dir_entry **res_dir)
{
struct super_block *sb;
struct buffer_head *bh_use[NAMEI_RA_SIZE];
struct buffer_head *bh, *ret = NULL;
unsigned long start, block, b;
int ra_max = 0; /* Number of bh's in the readahead
buffer, bh_use[] */
int ra_ptr = 0; /* Current index into readahead
buffer */
int num = 0;
int nblocks, i, err;
mlog_entry_void();
*res_dir = NULL;
sb = dir->i_sb;
nblocks = i_size_read(dir) >> sb->s_blocksize_bits;
start = OCFS2_I(dir)->ip_dir_start_lookup;
if (start >= nblocks)
start = 0;
block = start;
restart:
do {
/*
* We deal with the read-ahead logic here.
*/
if (ra_ptr >= ra_max) {
/* Refill the readahead buffer */
ra_ptr = 0;
b = block;
for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) {
/*
* Terminate if we reach the end of the
* directory and must wrap, or if our
* search has finished at this block.
*/
if (b >= nblocks || (num && block == start)) {
bh_use[ra_max] = NULL;
break;
}
num++;
bh = ocfs2_bread(dir, b++, &err, 1);
bh_use[ra_max] = bh;
}
}
if ((bh = bh_use[ra_ptr++]) == NULL)
goto next;
wait_on_buffer(bh);
if (!buffer_uptodate(bh)) {
/* read error, skip block & hope for the best */
ocfs2_error(dir->i_sb, "reading directory %llu, "
"offset %lu\n",
(unsigned long long)OCFS2_I(dir)->ip_blkno,
block);
brelse(bh);
goto next;
}
i = ocfs2_search_dirblock(bh, dir, name, namelen,
block << sb->s_blocksize_bits,
res_dir);
if (i == 1) {
OCFS2_I(dir)->ip_dir_start_lookup = block;
ret = bh;
goto cleanup_and_exit;
} else {
brelse(bh);
if (i < 0)
goto cleanup_and_exit;
}
next:
if (++block >= nblocks)
block = 0;
} while (block != start);
/*
* If the directory has grown while we were searching, then
* search the last part of the directory before giving up.
*/
block = nblocks;
nblocks = i_size_read(dir) >> sb->s_blocksize_bits;
if (block < nblocks) {
start = 0;
goto restart;
}
cleanup_and_exit:
/* Clean up the read-ahead blocks */
for (; ra_ptr < ra_max; ra_ptr++)
brelse(bh_use[ra_ptr]);
mlog_exit_ptr(ret);
return ret;
}
static int ocfs2_blkno_stringify(u64 blkno, char *name)
{
int status, namelen;

View File

@@ -30,29 +30,10 @@ extern const struct inode_operations ocfs2_dir_iops;
struct dentry *ocfs2_get_parent(struct dentry *child);
int ocfs2_check_dir_entry (struct inode *dir,
struct ocfs2_dir_entry *de,
struct buffer_head *bh,
unsigned long offset);
struct buffer_head *ocfs2_find_entry(const char *name,
int namelen,
struct inode *dir,
struct ocfs2_dir_entry **res_dir);
int ocfs2_orphan_del(struct ocfs2_super *osb,
handle_t *handle,
struct inode *orphan_dir_inode,
struct inode *inode,
struct buffer_head *orphan_dir_bh);
static inline int ocfs2_match(int len,
const char * const name,
struct ocfs2_dir_entry *de)
{
if (len != de->name_len)
return 0;
if (!de->inode)
return 0;
return !memcmp(name, de->name, len);
}
#endif /* OCFS2_NAMEI_H */

View File

@@ -319,6 +319,13 @@ static inline int ocfs2_writes_unwritten_extents(struct ocfs2_super *osb)
return 0;
}
static inline int ocfs2_supports_inline_data(struct ocfs2_super *osb)
{
if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_INLINE_DATA)
return 1;
return 0;
}
/* set / clear functions because cluster events can make these happen
* in parallel so we want the transitions to be atomic. this also
* means that any future flags osb_flags must be protected by spinlock

View File

@@ -87,7 +87,8 @@
#define OCFS2_FEATURE_COMPAT_SUPP OCFS2_FEATURE_COMPAT_BACKUP_SB
#define OCFS2_FEATURE_INCOMPAT_SUPP (OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT \
| OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC)
| OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC \
| OCFS2_FEATURE_INCOMPAT_INLINE_DATA)
#define OCFS2_FEATURE_RO_COMPAT_SUPP OCFS2_FEATURE_RO_COMPAT_UNWRITTEN
/*
@@ -110,6 +111,20 @@
/* Support for sparse allocation in b-trees */
#define OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC 0x0010
/*
* Tunefs sets this incompat flag before starting an operation which
* would require cleanup on abort. This is done to protect users from
* inadvertently mounting the fs after an aborted run without
* fsck-ing.
*
* s_tunefs_flags on the super block describes precisely which
* operations were in progress.
*/
#define OCFS2_FEATURE_INCOMPAT_TUNEFS_INPROG 0x0020
/* Support for data packed into inode blocks */
#define OCFS2_FEATURE_INCOMPAT_INLINE_DATA 0x0040
/*
* backup superblock flag is used to indicate that this volume
* has backup superblocks.
@@ -129,6 +144,11 @@
/* the max backup superblock nums */
#define OCFS2_MAX_BACKUP_SUPERBLOCKS 6
/*
* Flags on ocfs2_super_block.s_tunefs_flags
*/
#define OCFS2_TUNEFS_INPROG_REMOVE_SLOT 0x0001 /* Removing slots */
/*
* Flags on ocfs2_dinode.i_flags
*/
@@ -146,6 +166,17 @@
#define OCFS2_CHAIN_FL (0x00000400) /* Chain allocator */
#define OCFS2_DEALLOC_FL (0x00000800) /* Truncate log */
/*
* Flags on ocfs2_dinode.i_dyn_features
*
* These can change much more often than i_flags. When adding flags,
* keep in mind that i_dyn_features is only 16 bits wide.
*/
#define OCFS2_INLINE_DATA_FL (0x0001) /* Data stored in inode block */
#define OCFS2_HAS_XATTR_FL (0x0002)
#define OCFS2_INLINE_XATTR_FL (0x0004)
#define OCFS2_INDEXED_DIR_FL (0x0008)
/* Inode attributes, keep in sync with EXT2 */
#define OCFS2_SECRM_FL (0x00000001) /* Secure deletion */
#define OCFS2_UNRM_FL (0x00000002) /* Undelete */
@@ -447,8 +478,8 @@ struct ocfs2_super_block {
__le32 s_clustersize_bits; /* Clustersize for this fs */
/*40*/ __le16 s_max_slots; /* Max number of simultaneous mounts
before tunefs required */
__le16 s_reserved1;
__le32 s_reserved2;
__le16 s_tunefs_flag;
__le32 s_reserved1;
__le64 s_first_cluster_group; /* Block offset of 1st cluster
* group header */
/*50*/ __u8 s_label[OCFS2_MAX_VOL_LABEL_LEN]; /* Label for mounting, etc. */
@@ -470,6 +501,19 @@ struct ocfs2_local_alloc
/*10*/ __u8 la_bitmap[0];
};
/*
* Data-in-inode header. This is only used if i_dyn_features has
* OCFS2_INLINE_DATA_FL set.
*/
struct ocfs2_inline_data
{
/*00*/ __le16 id_count; /* Number of bytes that can be used
* for data, starting at id_data */
__le16 id_reserved0;
__le32 id_reserved1;
__u8 id_data[0]; /* Start of user data */
};
/*
* On disk inode for OCFS2
*/
@@ -502,7 +546,7 @@ struct ocfs2_dinode {
__le32 i_attr;
__le16 i_orphaned_slot; /* Only valid when OCFS2_ORPHANED_FL
was set in i_flags */
__le16 i_reserved1;
__le16 i_dyn_features;
/*70*/ __le64 i_reserved2[8];
/*B8*/ union {
__le64 i_pad1; /* Generic way to refer to this
@@ -528,6 +572,7 @@ struct ocfs2_dinode {
struct ocfs2_chain_list i_chain;
struct ocfs2_extent_list i_list;
struct ocfs2_truncate_log i_dealloc;
struct ocfs2_inline_data i_data;
__u8 i_symlink[0];
} id2;
/* Actual on-disk size is one block */
@@ -577,6 +622,12 @@ static inline int ocfs2_fast_symlink_chars(struct super_block *sb)
offsetof(struct ocfs2_dinode, id2.i_symlink);
}
static inline int ocfs2_max_inline_data(struct super_block *sb)
{
return sb->s_blocksize -
offsetof(struct ocfs2_dinode, id2.i_data.id_data);
}
static inline int ocfs2_extent_recs_per_inode(struct super_block *sb)
{
int size;
@@ -656,6 +707,11 @@ static inline int ocfs2_fast_symlink_chars(int blocksize)
return blocksize - offsetof(struct ocfs2_dinode, id2.i_symlink);
}
static inline int ocfs2_max_inline_data(int blocksize)
{
return blocksize - offsetof(struct ocfs2_dinode, id2.i_data.id_data);
}
static inline int ocfs2_extent_recs_per_inode(int blocksize)
{
int size;

View File

@@ -39,6 +39,7 @@
#include <linux/parser.h>
#include <linux/crc32.h>
#include <linux/debugfs.h>
#include <linux/mount.h>
#include <cluster/nodemanager.h>
@@ -91,6 +92,7 @@ struct mount_options
static int ocfs2_parse_options(struct super_block *sb, char *options,
struct mount_options *mopt,
int is_remount);
static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt);
static void ocfs2_put_super(struct super_block *sb);
static int ocfs2_mount_volume(struct super_block *sb);
static int ocfs2_remount(struct super_block *sb, int *flags, char *data);
@@ -105,7 +107,7 @@ static int ocfs2_sync_fs(struct super_block *sb, int wait);
static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb);
static int ocfs2_init_local_system_inodes(struct ocfs2_super *osb);
static int ocfs2_release_system_inodes(struct ocfs2_super *osb);
static void ocfs2_release_system_inodes(struct ocfs2_super *osb);
static int ocfs2_fill_local_node_info(struct ocfs2_super *osb);
static int ocfs2_check_volume(struct ocfs2_super *osb);
static int ocfs2_verify_volume(struct ocfs2_dinode *di,
@@ -133,6 +135,7 @@ static const struct super_operations ocfs2_sops = {
.write_super = ocfs2_write_super,
.put_super = ocfs2_put_super,
.remount_fs = ocfs2_remount,
.show_options = ocfs2_show_options,
};
enum {
@@ -177,7 +180,7 @@ static void ocfs2_write_super(struct super_block *sb)
static int ocfs2_sync_fs(struct super_block *sb, int wait)
{
int status = 0;
int status;
tid_t target;
struct ocfs2_super *osb = OCFS2_SB(sb);
@@ -275,9 +278,9 @@ bail:
return status;
}
static int ocfs2_release_system_inodes(struct ocfs2_super *osb)
static void ocfs2_release_system_inodes(struct ocfs2_super *osb)
{
int status = 0, i;
int i;
struct inode *inode;
mlog_entry_void();
@@ -302,8 +305,7 @@ static int ocfs2_release_system_inodes(struct ocfs2_super *osb)
osb->root_inode = NULL;
}
mlog_exit(status);
return status;
mlog_exit(0);
}
/* We're allocating fs objects, use GFP_NOFS */
@@ -453,7 +455,7 @@ static int ocfs2_sb_probe(struct super_block *sb,
struct buffer_head **bh,
int *sector_size)
{
int status = 0, tmpstat;
int status, tmpstat;
struct ocfs1_vol_disk_hdr *hdr;
struct ocfs2_dinode *di;
int blksize;
@@ -830,6 +832,41 @@ bail:
return status;
}
static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
{
struct ocfs2_super *osb = OCFS2_SB(mnt->mnt_sb);
unsigned long opts = osb->s_mount_opt;
if (opts & OCFS2_MOUNT_HB_LOCAL)
seq_printf(s, ",_netdev,heartbeat=local");
else
seq_printf(s, ",heartbeat=none");
if (opts & OCFS2_MOUNT_NOINTR)
seq_printf(s, ",nointr");
if (opts & OCFS2_MOUNT_DATA_WRITEBACK)
seq_printf(s, ",data=writeback");
else
seq_printf(s, ",data=ordered");
if (opts & OCFS2_MOUNT_BARRIER)
seq_printf(s, ",barrier=1");
if (opts & OCFS2_MOUNT_ERRORS_PANIC)
seq_printf(s, ",errors=panic");
else
seq_printf(s, ",errors=remount-ro");
if (osb->preferred_slot != OCFS2_INVALID_SLOT)
seq_printf(s, ",preferred_slot=%d", osb->preferred_slot);
if (osb->s_atime_quantum != OCFS2_DEFAULT_ATIME_QUANTUM)
seq_printf(s, ",atime_quantum=%u", osb->s_atime_quantum);
return 0;
}
static int __init ocfs2_init(void)
{
int status;
@@ -1209,13 +1246,14 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
tmp = ocfs2_request_umount_vote(osb);
if (tmp < 0)
mlog_errno(tmp);
if (osb->slot_num != OCFS2_INVALID_SLOT)
ocfs2_put_slot(osb);
ocfs2_super_unlock(osb, 1);
}
if (osb->slot_num != OCFS2_INVALID_SLOT)
ocfs2_put_slot(osb);
if (osb->dlm)
ocfs2_super_unlock(osb, 1);
ocfs2_release_system_inodes(osb);
if (osb->dlm) {
@@ -1275,7 +1313,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
struct buffer_head *bh,
int sector_size)
{
int status = 0;
int status;
int i, cbits, bbits;
struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
struct inode *inode = NULL;
@@ -1596,7 +1634,7 @@ static int ocfs2_verify_volume(struct ocfs2_dinode *di,
static int ocfs2_check_volume(struct ocfs2_super *osb)
{
int status = 0;
int status;
int dirty;
int local;
struct ocfs2_dinode *local_alloc = NULL; /* only used if we

View File

@@ -100,17 +100,14 @@ static struct inode * _ocfs2_get_system_file_inode(struct ocfs2_super *osb,
char namebuf[40];
struct inode *inode = NULL;
u64 blkno;
struct buffer_head *dirent_bh = NULL;
struct ocfs2_dir_entry *de = NULL;
int status = 0;
ocfs2_sprintf_system_inode_name(namebuf,
sizeof(namebuf),
type, slot);
status = ocfs2_find_files_on_disk(namebuf, strlen(namebuf),
&blkno, osb->sys_root_inode,
&dirent_bh, &de);
status = ocfs2_lookup_ino_from_name(osb->sys_root_inode, namebuf,
strlen(namebuf), &blkno);
if (status < 0) {
goto bail;
}
@@ -122,8 +119,7 @@ static struct inode * _ocfs2_get_system_file_inode(struct ocfs2_super *osb,
goto bail;
}
bail:
if (dirent_bh)
brelse(dirent_bh);
return inode;
}

View File

@@ -381,10 +381,12 @@ void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len,
p->partno = part;
p->policy = disk->policy;
if (isdigit(disk->kobj.name[strlen(disk->kobj.name)-1]))
snprintf(p->kobj.name,KOBJ_NAME_LEN,"%sp%d",disk->kobj.name,part);
if (isdigit(disk->kobj.k_name[strlen(disk->kobj.k_name)-1]))
kobject_set_name(&p->kobj, "%sp%d",
kobject_name(&disk->kobj), part);
else
snprintf(p->kobj.name,KOBJ_NAME_LEN,"%s%d",disk->kobj.name,part);
kobject_set_name(&p->kobj, "%s%d",
kobject_name(&disk->kobj),part);
p->kobj.parent = &disk->kobj;
p->kobj.ktype = &ktype_part;
kobject_init(&p->kobj);
@@ -477,9 +479,9 @@ void register_disk(struct gendisk *disk)
struct hd_struct *p;
int err;
strlcpy(disk->kobj.name,disk->disk_name,KOBJ_NAME_LEN);
kobject_set_name(&disk->kobj, "%s", disk->disk_name);
/* ewww... some of these buggers have / in name... */
s = strchr(disk->kobj.name, '/');
s = strchr(disk->kobj.k_name, '/');
if (s)
*s = '!';
if ((err = kobject_add(&disk->kobj)))

View File

@@ -11,6 +11,7 @@ proc-y += inode.o root.o base.o generic.o array.o \
proc_tty.o proc_misc.o
proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o
proc-$(CONFIG_NET) += proc_net.o
proc-$(CONFIG_PROC_KCORE) += kcore.o
proc-$(CONFIG_PROC_VMCORE) += vmcore.o
proc-$(CONFIG_PROC_DEVICETREE) += proc_devtree.o

View File

@@ -16,6 +16,11 @@ extern int proc_sys_init(void);
#else
static inline void proc_sys_init(void) { }
#endif
#ifdef CONFIG_NET
extern int proc_net_init(void);
#else
static inline int proc_net_init(void) { return 0; }
#endif
struct vmalloc_info {
unsigned long used;

200
fs/proc/proc_net.c Normal file
View File

@@ -0,0 +1,200 @@
/*
* linux/fs/proc/net.c
*
* Copyright (C) 2007
*
* Author: Eric Biederman <ebiederm@xmission.com>
*
* proc net directory handling functions
*/
#include <asm/uaccess.h>
#include <linux/errno.h>
#include <linux/time.h>
#include <linux/proc_fs.h>
#include <linux/stat.h>
#include <linux/init.h>
#include <linux/sched.h>
#include <linux/module.h>
#include <linux/bitops.h>
#include <linux/smp_lock.h>
#include <linux/mount.h>
#include <linux/nsproxy.h>
#include <net/net_namespace.h>
#include "internal.h"
struct proc_dir_entry *proc_net_create(struct net *net,
const char *name, mode_t mode, get_info_t *get_info)
{
return create_proc_info_entry(name,mode, net->proc_net, get_info);
}
EXPORT_SYMBOL_GPL(proc_net_create);
struct proc_dir_entry *proc_net_fops_create(struct net *net,
const char *name, mode_t mode, const struct file_operations *fops)
{
struct proc_dir_entry *res;
res = create_proc_entry(name, mode, net->proc_net);
if (res)
res->proc_fops = fops;
return res;
}
EXPORT_SYMBOL_GPL(proc_net_fops_create);
void proc_net_remove(struct net *net, const char *name)
{
remove_proc_entry(name, net->proc_net);
}
EXPORT_SYMBOL_GPL(proc_net_remove);
struct net *get_proc_net(const struct inode *inode)
{
return maybe_get_net(PDE_NET(PDE(inode)));
}
EXPORT_SYMBOL_GPL(get_proc_net);
static struct proc_dir_entry *proc_net_shadow;
static struct dentry *proc_net_shadow_dentry(struct dentry *parent,
struct proc_dir_entry *de)
{
struct dentry *shadow = NULL;
struct inode *inode;
if (!de)
goto out;
de_get(de);
inode = proc_get_inode(parent->d_inode->i_sb, de->low_ino, de);
if (!inode)
goto out_de_put;
shadow = d_alloc_name(parent, de->name);
if (!shadow)
goto out_iput;
shadow->d_op = parent->d_op; /* proc_dentry_operations */
d_instantiate(shadow, inode);
out:
return shadow;
out_iput:
iput(inode);
out_de_put:
de_put(de);
goto out;
}
static void *proc_net_follow_link(struct dentry *parent, struct nameidata *nd)
{
struct net *net = current->nsproxy->net_ns;
struct dentry *shadow;
shadow = proc_net_shadow_dentry(parent, net->proc_net);
if (!shadow)
return ERR_PTR(-ENOENT);
dput(nd->dentry);
/* My dentry count is 1 and that should be enough as the
* shadow dentry is thrown away immediately.
*/
nd->dentry = shadow;
return NULL;
}
static struct dentry *proc_net_lookup(struct inode *dir, struct dentry *dentry,
struct nameidata *nd)
{
struct net *net = current->nsproxy->net_ns;
struct dentry *shadow;
shadow = proc_net_shadow_dentry(nd->dentry, net->proc_net);
if (!shadow)
return ERR_PTR(-ENOENT);
dput(nd->dentry);
nd->dentry = shadow;
return shadow->d_inode->i_op->lookup(shadow->d_inode, dentry, nd);
}
static int proc_net_setattr(struct dentry *dentry, struct iattr *iattr)
{
struct net *net = current->nsproxy->net_ns;
struct dentry *shadow;
int ret;
shadow = proc_net_shadow_dentry(dentry->d_parent, net->proc_net);
if (!shadow)
return -ENOENT;
ret = shadow->d_inode->i_op->setattr(shadow, iattr);
dput(shadow);
return ret;
}
static const struct file_operations proc_net_dir_operations = {
.read = generic_read_dir,
};
static struct inode_operations proc_net_dir_inode_operations = {
.follow_link = proc_net_follow_link,
.lookup = proc_net_lookup,
.setattr = proc_net_setattr,
};
static __net_init int proc_net_ns_init(struct net *net)
{
struct proc_dir_entry *root, *netd, *net_statd;
int err;
err = -ENOMEM;
root = kzalloc(sizeof(*root), GFP_KERNEL);
if (!root)
goto out;
err = -EEXIST;
netd = proc_mkdir("net", root);
if (!netd)
goto free_root;
err = -EEXIST;
net_statd = proc_mkdir("stat", netd);
if (!net_statd)
goto free_net;
root->data = net;
netd->data = net;
net_statd->data = net;
net->proc_net_root = root;
net->proc_net = netd;
net->proc_net_stat = net_statd;
err = 0;
out:
return err;
free_net:
remove_proc_entry("net", root);
free_root:
kfree(root);
goto out;
}
static __net_exit void proc_net_ns_exit(struct net *net)
{
remove_proc_entry("stat", net->proc_net);
remove_proc_entry("net", net->proc_net_root);
kfree(net->proc_net_root);
}
struct pernet_operations __net_initdata proc_net_ns_ops = {
.init = proc_net_ns_init,
.exit = proc_net_ns_exit,
};
int __init proc_net_init(void)
{
proc_net_shadow = proc_mkdir("net", NULL);
proc_net_shadow->proc_iops = &proc_net_dir_inode_operations;
proc_net_shadow->proc_fops = &proc_net_dir_operations;
return register_pernet_subsys(&proc_net_ns_ops);
}

View File

@@ -21,7 +21,7 @@
#include "internal.h"
struct proc_dir_entry *proc_net, *proc_net_stat, *proc_bus, *proc_root_fs, *proc_root_driver;
struct proc_dir_entry *proc_bus, *proc_root_fs, *proc_root_driver;
static int proc_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data, struct vfsmount *mnt)
@@ -61,8 +61,8 @@ void __init proc_root_init(void)
return;
}
proc_misc_init();
proc_net = proc_mkdir("net", NULL);
proc_net_stat = proc_mkdir("net/stat", NULL);
proc_net_init();
#ifdef CONFIG_SYSVIPC
proc_mkdir("sysvipc", NULL);
@@ -159,7 +159,5 @@ EXPORT_SYMBOL(create_proc_entry);
EXPORT_SYMBOL(remove_proc_entry);
EXPORT_SYMBOL(proc_root);
EXPORT_SYMBOL(proc_root_fs);
EXPORT_SYMBOL(proc_net);
EXPORT_SYMBOL(proc_net_stat);
EXPORT_SYMBOL(proc_bus);
EXPORT_SYMBOL(proc_root_driver);

View File

@@ -429,6 +429,39 @@ int seq_release_private(struct inode *inode, struct file *file)
}
EXPORT_SYMBOL(seq_release_private);
void *__seq_open_private(struct file *f, const struct seq_operations *ops,
int psize)
{
int rc;
void *private;
struct seq_file *seq;
private = kzalloc(psize, GFP_KERNEL);
if (private == NULL)
goto out;
rc = seq_open(f, ops);
if (rc < 0)
goto out_free;
seq = f->private_data;
seq->private = private;
return private;
out_free:
kfree(private);
out:
return NULL;
}
EXPORT_SYMBOL(__seq_open_private);
int seq_open_private(struct file *filp, const struct seq_operations *ops,
int psize)
{
return __seq_open_private(filp, ops, psize) ? 0 : -ENOMEM;
}
EXPORT_SYMBOL(seq_open_private);
int seq_putc(struct seq_file *m, char c)
{
if (m->count < m->size) {

Some files were not shown because too many files have changed in this diff Show More