Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

The netfilter conflicts were rather simple overlapping
changes.

However, the cls_tcindex.c stuff was a bit more complex.

On the 'net' side, Cong is fixing several races and memory
leaks.  Whilst on the 'net-next' side we have Vlad adding
the rtnl-ness support.

What I've decided to do, in order to resolve this, is revert the
conversion over to using a workqueue that Cong did, bringing us back
to pure RCU.  I did it this way because I believe that either Cong's
races don't apply with have Vlad did things, or Cong will have to
implement the race fix slightly differently.

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller
2019-02-15 12:38:38 -08:00
272 changed files with 1968 additions and 1234 deletions

View File

@@ -1436,6 +1436,7 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
if (unlikely(!req->ki_filp))
return -EBADF;
req->ki_complete = aio_complete_rw;
req->private = NULL;
req->ki_pos = iocb->aio_offset;
req->ki_flags = iocb_flags(req->ki_filp);
if (iocb->aio_flags & IOCB_FLAG_RESFD)

View File

@@ -42,14 +42,10 @@ static int load_script(struct linux_binprm *bprm)
fput(bprm->file);
bprm->file = NULL;
for (cp = bprm->buf+2;; cp++) {
if (cp >= bprm->buf + BINPRM_BUF_SIZE)
return -ENOEXEC;
if (!*cp || (*cp == '\n'))
break;
}
bprm->buf[BINPRM_BUF_SIZE - 1] = '\0';
if ((cp = strchr(bprm->buf, '\n')) == NULL)
cp = bprm->buf+BINPRM_BUF_SIZE-1;
*cp = '\0';
while (cp > bprm->buf) {
cp--;
if ((*cp == ' ') || (*cp == '\t'))

View File

@@ -200,6 +200,7 @@ __find_get_block_slow(struct block_device *bdev, sector_t block)
struct buffer_head *head;
struct page *page;
int all_mapped = 1;
static DEFINE_RATELIMIT_STATE(last_warned, HZ, 1);
index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
page = find_get_page_flags(bd_mapping, index, FGP_ACCESSED);
@@ -227,15 +228,15 @@ __find_get_block_slow(struct block_device *bdev, sector_t block)
* file io on the block device and getblk. It gets dealt with
* elsewhere, don't buffer_error if we had some unmapped buffers
*/
if (all_mapped) {
printk("__find_get_block_slow() failed. "
"block=%llu, b_blocknr=%llu\n",
(unsigned long long)block,
(unsigned long long)bh->b_blocknr);
printk("b_state=0x%08lx, b_size=%zu\n",
bh->b_state, bh->b_size);
printk("device %pg blocksize: %d\n", bdev,
1 << bd_inode->i_blkbits);
ratelimit_set_flags(&last_warned, RATELIMIT_MSG_ON_RELEASE);
if (all_mapped && __ratelimit(&last_warned)) {
printk("__find_get_block_slow() failed. block=%llu, "
"b_blocknr=%llu, b_state=0x%08lx, b_size=%zu, "
"device %pg blocksize: %d\n",
(unsigned long long)block,
(unsigned long long)bh->b_blocknr,
bh->b_state, bh->b_size, bdev,
1 << bd_inode->i_blkbits);
}
out_unlock:
spin_unlock(&bd_mapping->private_lock);

View File

@@ -116,16 +116,8 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
goto out;
}
ret = file_write_and_wait_range(file, start, end);
if (ret)
return ret;
if (!journal) {
struct writeback_control wbc = {
.sync_mode = WB_SYNC_ALL
};
ret = ext4_write_inode(inode, &wbc);
ret = __generic_file_fsync(file, start, end, datasync);
if (!ret)
ret = ext4_sync_parent(inode);
if (test_opt(inode->i_sb, BARRIER))
@@ -133,6 +125,9 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
goto out;
}
ret = file_write_and_wait_range(file, start, end);
if (ret)
return ret;
/*
* data=writeback,ordered:
* The caller's filemap_fdatawrite()/wait will sync the data.

View File

@@ -28,7 +28,6 @@
#include "util.h"
#include "trans.h"
#include "dir.h"
#include "lops.h"
struct workqueue_struct *gfs2_freeze_wq;

View File

@@ -733,7 +733,7 @@ void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
lh->lh_crc = cpu_to_be32(crc);
gfs2_log_write(sdp, page, sb->s_blocksize, 0, addr);
gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE | op_flags);
gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE, op_flags);
log_flush_wait(sdp);
}
@@ -810,7 +810,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
gfs2_ordered_write(sdp);
lops_before_commit(sdp, tr);
gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE);
gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE, 0);
if (sdp->sd_log_head != sdp->sd_log_flush_head) {
log_flush_wait(sdp);

View File

@@ -17,9 +17,7 @@
#include <linux/bio.h>
#include <linux/fs.h>
#include <linux/list_sort.h>
#include <linux/blkdev.h>
#include "bmap.h"
#include "dir.h"
#include "gfs2.h"
#include "incore.h"
@@ -195,6 +193,7 @@ static void gfs2_end_log_write_bh(struct gfs2_sbd *sdp, struct bio_vec *bvec,
/**
* gfs2_end_log_write - end of i/o to the log
* @bio: The bio
* @error: Status of i/o request
*
* Each bio_vec contains either data from the pagecache or data
* relating to the log itself. Here we iterate over the bio_vec
@@ -231,19 +230,20 @@ static void gfs2_end_log_write(struct bio *bio)
/**
* gfs2_log_submit_bio - Submit any pending log bio
* @biop: Address of the bio pointer
* @opf: REQ_OP | op_flags
* @op: REQ_OP
* @op_flags: req_flag_bits
*
* Submit any pending part-built or full bio to the block device. If
* there is no pending bio, then this is a no-op.
*/
void gfs2_log_submit_bio(struct bio **biop, int opf)
void gfs2_log_submit_bio(struct bio **biop, int op, int op_flags)
{
struct bio *bio = *biop;
if (bio) {
struct gfs2_sbd *sdp = bio->bi_private;
atomic_inc(&sdp->sd_log_in_flight);
bio->bi_opf = opf;
bio_set_op_attrs(bio, op, op_flags);
submit_bio(bio);
*biop = NULL;
}
@@ -304,7 +304,7 @@ static struct bio *gfs2_log_get_bio(struct gfs2_sbd *sdp, u64 blkno,
nblk >>= sdp->sd_fsb2bb_shift;
if (blkno == nblk && !flush)
return bio;
gfs2_log_submit_bio(biop, op);
gfs2_log_submit_bio(biop, op, 0);
}
*biop = gfs2_log_alloc_bio(sdp, blkno, end_io);
@@ -375,184 +375,6 @@ void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page)
gfs2_log_bmap(sdp));
}
/**
* gfs2_end_log_read - end I/O callback for reads from the log
* @bio: The bio
*
* Simply unlock the pages in the bio. The main thread will wait on them and
* process them in order as necessary.
*/
static void gfs2_end_log_read(struct bio *bio)
{
struct page *page;
struct bio_vec *bvec;
int i;
bio_for_each_segment_all(bvec, bio, i) {
page = bvec->bv_page;
if (bio->bi_status) {
int err = blk_status_to_errno(bio->bi_status);
SetPageError(page);
mapping_set_error(page->mapping, err);
}
unlock_page(page);
}
bio_put(bio);
}
/**
* gfs2_jhead_pg_srch - Look for the journal head in a given page.
* @jd: The journal descriptor
* @page: The page to look in
*
* Returns: 1 if found, 0 otherwise.
*/
static bool gfs2_jhead_pg_srch(struct gfs2_jdesc *jd,
struct gfs2_log_header_host *head,
struct page *page)
{
struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
struct gfs2_log_header_host uninitialized_var(lh);
void *kaddr = kmap_atomic(page);
unsigned int offset;
bool ret = false;
for (offset = 0; offset < PAGE_SIZE; offset += sdp->sd_sb.sb_bsize) {
if (!__get_log_header(sdp, kaddr + offset, 0, &lh)) {
if (lh.lh_sequence > head->lh_sequence)
*head = lh;
else {
ret = true;
break;
}
}
}
kunmap_atomic(kaddr);
return ret;
}
/**
* gfs2_jhead_process_page - Search/cleanup a page
* @jd: The journal descriptor
* @index: Index of the page to look into
* @done: If set, perform only cleanup, else search and set if found.
*
* Find the page with 'index' in the journal's mapping. Search the page for
* the journal head if requested (cleanup == false). Release refs on the
* page so the page cache can reclaim it (put_page() twice). We grabbed a
* reference on this page two times, first when we did a find_or_create_page()
* to obtain the page to add it to the bio and second when we do a
* find_get_page() here to get the page to wait on while I/O on it is being
* completed.
* This function is also used to free up a page we might've grabbed but not
* used. Maybe we added it to a bio, but not submitted it for I/O. Or we
* submitted the I/O, but we already found the jhead so we only need to drop
* our references to the page.
*/
static void gfs2_jhead_process_page(struct gfs2_jdesc *jd, unsigned long index,
struct gfs2_log_header_host *head,
bool *done)
{
struct page *page;
page = find_get_page(jd->jd_inode->i_mapping, index);
wait_on_page_locked(page);
if (PageError(page))
*done = true;
if (!*done)
*done = gfs2_jhead_pg_srch(jd, head, page);
put_page(page); /* Once for find_get_page */
put_page(page); /* Once more for find_or_create_page */
}
/**
* gfs2_find_jhead - find the head of a log
* @jd: The journal descriptor
* @head: The log descriptor for the head of the log is returned here
*
* Do a search of a journal by reading it in large chunks using bios and find
* the valid log entry with the highest sequence number. (i.e. the log head)
*
* Returns: 0 on success, errno otherwise
*/
int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
{
struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
struct address_space *mapping = jd->jd_inode->i_mapping;
struct gfs2_journal_extent *je;
u32 block, read_idx = 0, submit_idx = 0, index = 0;
int shift = PAGE_SHIFT - sdp->sd_sb.sb_bsize_shift;
int blocks_per_page = 1 << shift, sz, ret = 0;
struct bio *bio = NULL;
struct page *page;
bool done = false;
errseq_t since;
memset(head, 0, sizeof(*head));
if (list_empty(&jd->extent_list))
gfs2_map_journal_extents(sdp, jd);
since = filemap_sample_wb_err(mapping);
list_for_each_entry(je, &jd->extent_list, list) {
for (block = 0; block < je->blocks; block += blocks_per_page) {
index = (je->lblock + block) >> shift;
page = find_or_create_page(mapping, index, GFP_NOFS);
if (!page) {
ret = -ENOMEM;
done = true;
goto out;
}
if (bio) {
sz = bio_add_page(bio, page, PAGE_SIZE, 0);
if (sz == PAGE_SIZE)
goto page_added;
submit_idx = index;
submit_bio(bio);
bio = NULL;
}
bio = gfs2_log_alloc_bio(sdp,
je->dblock + (index << shift),
gfs2_end_log_read);
bio->bi_opf = REQ_OP_READ;
sz = bio_add_page(bio, page, PAGE_SIZE, 0);
gfs2_assert_warn(sdp, sz == PAGE_SIZE);
page_added:
if (submit_idx <= read_idx + BIO_MAX_PAGES) {
/* Keep at least one bio in flight */
continue;
}
gfs2_jhead_process_page(jd, read_idx++, head, &done);
if (done)
goto out; /* found */
}
}
out:
if (bio)
submit_bio(bio);
while (read_idx <= index)
gfs2_jhead_process_page(jd, read_idx++, head, &done);
if (!ret)
ret = filemap_check_wb_err(mapping, since);
return ret;
}
static struct page *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type,
u32 ld_length, u32 ld_data1)
{

View File

@@ -30,10 +30,8 @@ extern u64 gfs2_log_bmap(struct gfs2_sbd *sdp);
extern void gfs2_log_write(struct gfs2_sbd *sdp, struct page *page,
unsigned size, unsigned offset, u64 blkno);
extern void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page);
extern void gfs2_log_submit_bio(struct bio **biop, int opf);
extern void gfs2_log_submit_bio(struct bio **biop, int op, int op_flags);
extern void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh);
extern int gfs2_find_jhead(struct gfs2_jdesc *jd,
struct gfs2_log_header_host *head);
static inline unsigned int buf_limit(struct gfs2_sbd *sdp)
{

View File

@@ -41,7 +41,6 @@
#include "dir.h"
#include "meta_io.h"
#include "trace_gfs2.h"
#include "lops.h"
#define DO 0
#define UNDO 1

View File

@@ -181,6 +181,129 @@ static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
return error;
}
/**
* find_good_lh - find a good log header
* @jd: the journal
* @blk: the segment to start searching from
* @lh: the log header to fill in
* @forward: if true search forward in the log, else search backward
*
* Call get_log_header() to get a log header for a segment, but if the
* segment is bad, either scan forward or backward until we find a good one.
*
* Returns: errno
*/
static int find_good_lh(struct gfs2_jdesc *jd, unsigned int *blk,
struct gfs2_log_header_host *head)
{
unsigned int orig_blk = *blk;
int error;
for (;;) {
error = get_log_header(jd, *blk, head);
if (error <= 0)
return error;
if (++*blk == jd->jd_blocks)
*blk = 0;
if (*blk == orig_blk) {
gfs2_consist_inode(GFS2_I(jd->jd_inode));
return -EIO;
}
}
}
/**
* jhead_scan - make sure we've found the head of the log
* @jd: the journal
* @head: this is filled in with the log descriptor of the head
*
* At this point, seg and lh should be either the head of the log or just
* before. Scan forward until we find the head.
*
* Returns: errno
*/
static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
{
unsigned int blk = head->lh_blkno;
struct gfs2_log_header_host lh;
int error;
for (;;) {
if (++blk == jd->jd_blocks)
blk = 0;
error = get_log_header(jd, blk, &lh);
if (error < 0)
return error;
if (error == 1)
continue;
if (lh.lh_sequence == head->lh_sequence) {
gfs2_consist_inode(GFS2_I(jd->jd_inode));
return -EIO;
}
if (lh.lh_sequence < head->lh_sequence)
break;
*head = lh;
}
return 0;
}
/**
* gfs2_find_jhead - find the head of a log
* @jd: the journal
* @head: the log descriptor for the head of the log is returned here
*
* Do a binary search of a journal and find the valid log entry with the
* highest sequence number. (i.e. the log head)
*
* Returns: errno
*/
int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
{
struct gfs2_log_header_host lh_1, lh_m;
u32 blk_1, blk_2, blk_m;
int error;
blk_1 = 0;
blk_2 = jd->jd_blocks - 1;
for (;;) {
blk_m = (blk_1 + blk_2) / 2;
error = find_good_lh(jd, &blk_1, &lh_1);
if (error)
return error;
error = find_good_lh(jd, &blk_m, &lh_m);
if (error)
return error;
if (blk_1 == blk_m || blk_m == blk_2)
break;
if (lh_1.lh_sequence <= lh_m.lh_sequence)
blk_1 = blk_m;
else
blk_2 = blk_m;
}
error = jhead_scan(jd, &lh_1);
if (error)
return error;
*head = lh_1;
return error;
}
/**
* foreach_descriptor - go through the active part of the log
* @jd: the journal

View File

@@ -27,6 +27,8 @@ extern int gfs2_revoke_add(struct gfs2_jdesc *jd, u64 blkno, unsigned int where)
extern int gfs2_revoke_check(struct gfs2_jdesc *jd, u64 blkno, unsigned int where);
extern void gfs2_revoke_clean(struct gfs2_jdesc *jd);
extern int gfs2_find_jhead(struct gfs2_jdesc *jd,
struct gfs2_log_header_host *head);
extern int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd, bool wait);
extern void gfs2_recover_func(struct work_struct *work);
extern int __get_log_header(struct gfs2_sbd *sdp,

View File

@@ -45,7 +45,6 @@
#include "util.h"
#include "sys.h"
#include "xattr.h"
#include "lops.h"
#define args_neq(a1, a2, x) ((a1)->ar_##x != (a2)->ar_##x)

View File

@@ -730,11 +730,8 @@ static enum lru_status inode_lru_isolate(struct list_head *item,
return LRU_REMOVED;
}
/*
* Recently referenced inodes and inodes with many attached pages
* get one more pass.
*/
if (inode->i_state & I_REFERENCED || inode->i_data.nrpages > 1) {
/* recently referenced inodes get one more pass */
if (inode->i_state & I_REFERENCED) {
inode->i_state &= ~I_REFERENCED;
spin_unlock(&inode->i_lock);
return LRU_ROTATE;

View File

@@ -423,7 +423,7 @@ struct mem_size_stats {
};
static void smaps_account(struct mem_size_stats *mss, struct page *page,
bool compound, bool young, bool dirty)
bool compound, bool young, bool dirty, bool locked)
{
int i, nr = compound ? 1 << compound_order(page) : 1;
unsigned long size = nr * PAGE_SIZE;
@@ -450,24 +450,31 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page,
else
mss->private_clean += size;
mss->pss += (u64)size << PSS_SHIFT;
if (locked)
mss->pss_locked += (u64)size << PSS_SHIFT;
return;
}
for (i = 0; i < nr; i++, page++) {
int mapcount = page_mapcount(page);
unsigned long pss = (PAGE_SIZE << PSS_SHIFT);
if (mapcount >= 2) {
if (dirty || PageDirty(page))
mss->shared_dirty += PAGE_SIZE;
else
mss->shared_clean += PAGE_SIZE;
mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount;
mss->pss += pss / mapcount;
if (locked)
mss->pss_locked += pss / mapcount;
} else {
if (dirty || PageDirty(page))
mss->private_dirty += PAGE_SIZE;
else
mss->private_clean += PAGE_SIZE;
mss->pss += PAGE_SIZE << PSS_SHIFT;
mss->pss += pss;
if (locked)
mss->pss_locked += pss;
}
}
}
@@ -490,6 +497,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
{
struct mem_size_stats *mss = walk->private;
struct vm_area_struct *vma = walk->vma;
bool locked = !!(vma->vm_flags & VM_LOCKED);
struct page *page = NULL;
if (pte_present(*pte)) {
@@ -532,7 +540,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
if (!page)
return;
smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte));
smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte), locked);
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -541,6 +549,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
{
struct mem_size_stats *mss = walk->private;
struct vm_area_struct *vma = walk->vma;
bool locked = !!(vma->vm_flags & VM_LOCKED);
struct page *page;
/* FOLL_DUMP will return -EFAULT on huge zero page */
@@ -555,7 +564,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
/* pass */;
else
VM_BUG_ON_PAGE(1, page);
smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd));
smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd), locked);
}
#else
static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
@@ -737,11 +746,8 @@ static void smap_gather_stats(struct vm_area_struct *vma,
}
}
#endif
/* mmap_sem is held in m_start */
walk_page_vma(vma, &smaps_walk);
if (vma->vm_flags & VM_LOCKED)
mss->pss_locked += mss->pss;
}
#define SEQ_PUT_DEC(str, val) \