Merge branch 'mvebu/drivers' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc

Merge the mvebu/drivers branch of the arm-soc tree which contains
just a single patch bfa1ce5f38 ("bus:
mvebu-mbus: add mv_mbus_dram_info_nooverlap()") that happens to be
a prerequisite of the new marvell/cesa crypto driver.
This commit is contained in:
Herbert Xu
2015-06-19 22:07:07 +08:00
4373 changed files with 173294 additions and 96730 deletions

View File

@@ -18,9 +18,8 @@ config HAVE_ARCH_BITREVERSE
default n
depends on BITREVERSE
help
This option provides an config for the architecture which have instruction
can do bitreverse operation, we use the hardware instruction if the architecture
have this capability.
This option enables the use of hardware bit-reversal instructions on
architectures which support such operations.
config RATIONAL
bool
@@ -403,10 +402,6 @@ config CPUMASK_OFFSTACK
them on the stack. This is a bit more expensive, but avoids
stack overflow.
config DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
bool "Disable obsolete cpumask functions" if DEBUG_PER_CPU_MAPS
depends on BROKEN
config CPU_RMAP
bool
depends on SMP

View File

@@ -25,7 +25,7 @@ obj-y += lockref.o
obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
bust_spinlocks.o kasprintf.o bitmap.o scatterlist.o \
gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \
bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \
bsearch.o find_bit.o llist.o memweight.o kfifo.o \
percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o
obj-y += string_helpers.o
obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
@@ -108,7 +108,7 @@ obj-$(CONFIG_AUDIT_GENERIC) += audit.o
obj-$(CONFIG_AUDIT_COMPAT_GENERIC) += compat_audit.o
obj-$(CONFIG_SWIOTLB) += swiotlb.o
obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o
obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o iommu-common.o
obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o
obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o
obj-$(CONFIG_CPU_NOTIFIER_ERROR_INJECT) += cpu-notifier-error-inject.o

View File

@@ -42,36 +42,6 @@
* for the best explanations of this ordering.
*/
int __bitmap_empty(const unsigned long *bitmap, unsigned int bits)
{
unsigned int k, lim = bits/BITS_PER_LONG;
for (k = 0; k < lim; ++k)
if (bitmap[k])
return 0;
if (bits % BITS_PER_LONG)
if (bitmap[k] & BITMAP_LAST_WORD_MASK(bits))
return 0;
return 1;
}
EXPORT_SYMBOL(__bitmap_empty);
int __bitmap_full(const unsigned long *bitmap, unsigned int bits)
{
unsigned int k, lim = bits/BITS_PER_LONG;
for (k = 0; k < lim; ++k)
if (~bitmap[k])
return 0;
if (bits % BITS_PER_LONG)
if (~bitmap[k] & BITMAP_LAST_WORD_MASK(bits))
return 0;
return 1;
}
EXPORT_SYMBOL(__bitmap_full);
int __bitmap_equal(const unsigned long *bitmap1,
const unsigned long *bitmap2, unsigned int bits)
{

View File

@@ -5,27 +5,6 @@
#include <linux/export.h>
#include <linux/bootmem.h>
int __first_cpu(const cpumask_t *srcp)
{
return min_t(int, NR_CPUS, find_first_bit(srcp->bits, NR_CPUS));
}
EXPORT_SYMBOL(__first_cpu);
int __next_cpu(int n, const cpumask_t *srcp)
{
return min_t(int, NR_CPUS, find_next_bit(srcp->bits, NR_CPUS, n+1));
}
EXPORT_SYMBOL(__next_cpu);
#if NR_CPUS > 64
int __next_cpu_nr(int n, const cpumask_t *srcp)
{
return min_t(int, nr_cpu_ids,
find_next_bit(srcp->bits, nr_cpu_ids, n+1));
}
EXPORT_SYMBOL(__next_cpu_nr);
#endif
/**
* cpumask_next_and - get the next cpu in *src1p & *src2p
* @n: the cpu prior to the place to search (ie. return will be > @n)
@@ -37,10 +16,11 @@ EXPORT_SYMBOL(__next_cpu_nr);
int cpumask_next_and(int n, const struct cpumask *src1p,
const struct cpumask *src2p)
{
while ((n = cpumask_next(n, src1p)) < nr_cpu_ids)
if (cpumask_test_cpu(n, src2p))
break;
return n;
struct cpumask tmp;
if (cpumask_and(&tmp, src1p, src2p))
return cpumask_next(n, &tmp);
return nr_cpu_ids;
}
EXPORT_SYMBOL(cpumask_next_and);
@@ -89,13 +69,6 @@ bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node)
dump_stack();
}
#endif
/* FIXME: Bandaid to save us from old primitives which go to NR_CPUS. */
if (*mask) {
unsigned char *ptr = (unsigned char *)cpumask_bits(*mask);
unsigned int tail;
tail = BITS_TO_LONGS(NR_CPUS - nr_cpumask_bits) * sizeof(long);
memset(ptr + cpumask_size() - tail, 0, tail);
}
return *mask != NULL;
}

View File

@@ -71,6 +71,34 @@ void __iomem *devm_ioremap_nocache(struct device *dev, resource_size_t offset,
}
EXPORT_SYMBOL(devm_ioremap_nocache);
/**
* devm_ioremap_wc - Managed ioremap_wc()
* @dev: Generic device to remap IO address for
* @offset: BUS offset to map
* @size: Size of map
*
* Managed ioremap_wc(). Map is automatically unmapped on driver detach.
*/
void __iomem *devm_ioremap_wc(struct device *dev, resource_size_t offset,
resource_size_t size)
{
void __iomem **ptr, *addr;
ptr = devres_alloc(devm_ioremap_release, sizeof(*ptr), GFP_KERNEL);
if (!ptr)
return NULL;
addr = ioremap_wc(offset, size);
if (addr) {
*ptr = addr;
devres_add(dev, ptr);
} else
devres_free(ptr);
return addr;
}
EXPORT_SYMBOL(devm_ioremap_wc);
/**
* devm_iounmap - Managed iounmap()
* @dev: Generic device to unmap for

View File

@@ -361,7 +361,7 @@ static struct dma_debug_entry *bucket_find_contain(struct hash_bucket **bucket,
unsigned int range = 0;
while (range <= max_range) {
entry = __hash_bucket_find(*bucket, &index, containing_match);
entry = __hash_bucket_find(*bucket, ref, containing_match);
if (entry)
return entry;

193
lib/find_bit.c Normal file
View File

@@ -0,0 +1,193 @@
/* bit search implementation
*
* Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*
* Copyright (C) 2008 IBM Corporation
* 'find_last_bit' is written by Rusty Russell <rusty@rustcorp.com.au>
* (Inspired by David Howell's find_next_bit implementation)
*
* Rewritten by Yury Norov <yury.norov@gmail.com> to decrease
* size and improve performance, 2015.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/bitops.h>
#include <linux/bitmap.h>
#include <linux/export.h>
#include <linux/kernel.h>
#if !defined(find_next_bit) || !defined(find_next_zero_bit)
/*
* This is a common helper function for find_next_bit and
* find_next_zero_bit. The difference is the "invert" argument, which
* is XORed with each fetched word before searching it for one bits.
*/
static unsigned long _find_next_bit(const unsigned long *addr,
unsigned long nbits, unsigned long start, unsigned long invert)
{
unsigned long tmp;
if (!nbits || start >= nbits)
return nbits;
tmp = addr[start / BITS_PER_LONG] ^ invert;
/* Handle 1st word. */
tmp &= BITMAP_FIRST_WORD_MASK(start);
start = round_down(start, BITS_PER_LONG);
while (!tmp) {
start += BITS_PER_LONG;
if (start >= nbits)
return nbits;
tmp = addr[start / BITS_PER_LONG] ^ invert;
}
return min(start + __ffs(tmp), nbits);
}
#endif
#ifndef find_next_bit
/*
* Find the next set bit in a memory region.
*/
unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
unsigned long offset)
{
return _find_next_bit(addr, size, offset, 0UL);
}
EXPORT_SYMBOL(find_next_bit);
#endif
#ifndef find_next_zero_bit
unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
unsigned long offset)
{
return _find_next_bit(addr, size, offset, ~0UL);
}
EXPORT_SYMBOL(find_next_zero_bit);
#endif
#ifndef find_first_bit
/*
* Find the first set bit in a memory region.
*/
unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
{
unsigned long idx;
for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
if (addr[idx])
return min(idx * BITS_PER_LONG + __ffs(addr[idx]), size);
}
return size;
}
EXPORT_SYMBOL(find_first_bit);
#endif
#ifndef find_first_zero_bit
/*
* Find the first cleared bit in a memory region.
*/
unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
{
unsigned long idx;
for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
if (addr[idx] != ~0UL)
return min(idx * BITS_PER_LONG + ffz(addr[idx]), size);
}
return size;
}
EXPORT_SYMBOL(find_first_zero_bit);
#endif
#ifndef find_last_bit
unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
{
if (size) {
unsigned long val = BITMAP_LAST_WORD_MASK(size);
unsigned long idx = (size-1) / BITS_PER_LONG;
do {
val &= addr[idx];
if (val)
return idx * BITS_PER_LONG + __fls(val);
val = ~0ul;
} while (idx--);
}
return size;
}
EXPORT_SYMBOL(find_last_bit);
#endif
#ifdef __BIG_ENDIAN
/* include/linux/byteorder does not support "unsigned long" type */
static inline unsigned long ext2_swab(const unsigned long y)
{
#if BITS_PER_LONG == 64
return (unsigned long) __swab64((u64) y);
#elif BITS_PER_LONG == 32
return (unsigned long) __swab32((u32) y);
#else
#error BITS_PER_LONG not defined
#endif
}
#if !defined(find_next_bit_le) || !defined(find_next_zero_bit_le)
static unsigned long _find_next_bit_le(const unsigned long *addr,
unsigned long nbits, unsigned long start, unsigned long invert)
{
unsigned long tmp;
if (!nbits || start >= nbits)
return nbits;
tmp = addr[start / BITS_PER_LONG] ^ invert;
/* Handle 1st word. */
tmp &= ext2_swab(BITMAP_FIRST_WORD_MASK(start));
start = round_down(start, BITS_PER_LONG);
while (!tmp) {
start += BITS_PER_LONG;
if (start >= nbits)
return nbits;
tmp = addr[start / BITS_PER_LONG] ^ invert;
}
return min(start + __ffs(ext2_swab(tmp)), nbits);
}
#endif
#ifndef find_next_zero_bit_le
unsigned long find_next_zero_bit_le(const void *addr, unsigned
long size, unsigned long offset)
{
return _find_next_bit_le(addr, size, offset, ~0UL);
}
EXPORT_SYMBOL(find_next_zero_bit_le);
#endif
#ifndef find_next_bit_le
unsigned long find_next_bit_le(const void *addr, unsigned
long size, unsigned long offset)
{
return _find_next_bit_le(addr, size, offset, 0UL);
}
EXPORT_SYMBOL(find_next_bit_le);
#endif
#endif /* __BIG_ENDIAN */

View File

@@ -4,6 +4,9 @@
* Written by Rusty Russell <rusty@rustcorp.com.au>
* (Inspired by David Howell's find_next_bit implementation)
*
* Rewritten by Yury Norov <yury.norov@gmail.com> to decrease
* size and improve performance, 2015.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
@@ -11,37 +14,26 @@
*/
#include <linux/bitops.h>
#include <linux/bitmap.h>
#include <linux/export.h>
#include <asm/types.h>
#include <asm/byteorder.h>
#include <linux/kernel.h>
#ifndef find_last_bit
unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
{
unsigned long words;
unsigned long tmp;
if (size) {
unsigned long val = BITMAP_LAST_WORD_MASK(size);
unsigned long idx = (size-1) / BITS_PER_LONG;
/* Start at final word. */
words = size / BITS_PER_LONG;
do {
val &= addr[idx];
if (val)
return idx * BITS_PER_LONG + __fls(val);
/* Partial final word? */
if (size & (BITS_PER_LONG-1)) {
tmp = (addr[words] & (~0UL >> (BITS_PER_LONG
- (size & (BITS_PER_LONG-1)))));
if (tmp)
goto found;
val = ~0ul;
} while (idx--);
}
while (words) {
tmp = addr[--words];
if (tmp) {
found:
return words * BITS_PER_LONG + __fls(tmp);
}
}
/* Not found */
return size;
}
EXPORT_SYMBOL(find_last_bit);

View File

@@ -1,285 +0,0 @@
/* find_next_bit.c: fallback find next bit implementation
*
* Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/bitops.h>
#include <linux/export.h>
#include <asm/types.h>
#include <asm/byteorder.h>
#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG)
#ifndef find_next_bit
/*
* Find the next set bit in a memory region.
*/
unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
unsigned long offset)
{
const unsigned long *p = addr + BITOP_WORD(offset);
unsigned long result = offset & ~(BITS_PER_LONG-1);
unsigned long tmp;
if (offset >= size)
return size;
size -= result;
offset %= BITS_PER_LONG;
if (offset) {
tmp = *(p++);
tmp &= (~0UL << offset);
if (size < BITS_PER_LONG)
goto found_first;
if (tmp)
goto found_middle;
size -= BITS_PER_LONG;
result += BITS_PER_LONG;
}
while (size & ~(BITS_PER_LONG-1)) {
if ((tmp = *(p++)))
goto found_middle;
result += BITS_PER_LONG;
size -= BITS_PER_LONG;
}
if (!size)
return result;
tmp = *p;
found_first:
tmp &= (~0UL >> (BITS_PER_LONG - size));
if (tmp == 0UL) /* Are any bits set? */
return result + size; /* Nope. */
found_middle:
return result + __ffs(tmp);
}
EXPORT_SYMBOL(find_next_bit);
#endif
#ifndef find_next_zero_bit
/*
* This implementation of find_{first,next}_zero_bit was stolen from
* Linus' asm-alpha/bitops.h.
*/
unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
unsigned long offset)
{
const unsigned long *p = addr + BITOP_WORD(offset);
unsigned long result = offset & ~(BITS_PER_LONG-1);
unsigned long tmp;
if (offset >= size)
return size;
size -= result;
offset %= BITS_PER_LONG;
if (offset) {
tmp = *(p++);
tmp |= ~0UL >> (BITS_PER_LONG - offset);
if (size < BITS_PER_LONG)
goto found_first;
if (~tmp)
goto found_middle;
size -= BITS_PER_LONG;
result += BITS_PER_LONG;
}
while (size & ~(BITS_PER_LONG-1)) {
if (~(tmp = *(p++)))
goto found_middle;
result += BITS_PER_LONG;
size -= BITS_PER_LONG;
}
if (!size)
return result;
tmp = *p;
found_first:
tmp |= ~0UL << size;
if (tmp == ~0UL) /* Are any bits zero? */
return result + size; /* Nope. */
found_middle:
return result + ffz(tmp);
}
EXPORT_SYMBOL(find_next_zero_bit);
#endif
#ifndef find_first_bit
/*
* Find the first set bit in a memory region.
*/
unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
{
const unsigned long *p = addr;
unsigned long result = 0;
unsigned long tmp;
while (size & ~(BITS_PER_LONG-1)) {
if ((tmp = *(p++)))
goto found;
result += BITS_PER_LONG;
size -= BITS_PER_LONG;
}
if (!size)
return result;
tmp = (*p) & (~0UL >> (BITS_PER_LONG - size));
if (tmp == 0UL) /* Are any bits set? */
return result + size; /* Nope. */
found:
return result + __ffs(tmp);
}
EXPORT_SYMBOL(find_first_bit);
#endif
#ifndef find_first_zero_bit
/*
* Find the first cleared bit in a memory region.
*/
unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
{
const unsigned long *p = addr;
unsigned long result = 0;
unsigned long tmp;
while (size & ~(BITS_PER_LONG-1)) {
if (~(tmp = *(p++)))
goto found;
result += BITS_PER_LONG;
size -= BITS_PER_LONG;
}
if (!size)
return result;
tmp = (*p) | (~0UL << size);
if (tmp == ~0UL) /* Are any bits zero? */
return result + size; /* Nope. */
found:
return result + ffz(tmp);
}
EXPORT_SYMBOL(find_first_zero_bit);
#endif
#ifdef __BIG_ENDIAN
/* include/linux/byteorder does not support "unsigned long" type */
static inline unsigned long ext2_swabp(const unsigned long * x)
{
#if BITS_PER_LONG == 64
return (unsigned long) __swab64p((u64 *) x);
#elif BITS_PER_LONG == 32
return (unsigned long) __swab32p((u32 *) x);
#else
#error BITS_PER_LONG not defined
#endif
}
/* include/linux/byteorder doesn't support "unsigned long" type */
static inline unsigned long ext2_swab(const unsigned long y)
{
#if BITS_PER_LONG == 64
return (unsigned long) __swab64((u64) y);
#elif BITS_PER_LONG == 32
return (unsigned long) __swab32((u32) y);
#else
#error BITS_PER_LONG not defined
#endif
}
#ifndef find_next_zero_bit_le
unsigned long find_next_zero_bit_le(const void *addr, unsigned
long size, unsigned long offset)
{
const unsigned long *p = addr;
unsigned long result = offset & ~(BITS_PER_LONG - 1);
unsigned long tmp;
if (offset >= size)
return size;
p += BITOP_WORD(offset);
size -= result;
offset &= (BITS_PER_LONG - 1UL);
if (offset) {
tmp = ext2_swabp(p++);
tmp |= (~0UL >> (BITS_PER_LONG - offset));
if (size < BITS_PER_LONG)
goto found_first;
if (~tmp)
goto found_middle;
size -= BITS_PER_LONG;
result += BITS_PER_LONG;
}
while (size & ~(BITS_PER_LONG - 1)) {
if (~(tmp = *(p++)))
goto found_middle_swap;
result += BITS_PER_LONG;
size -= BITS_PER_LONG;
}
if (!size)
return result;
tmp = ext2_swabp(p);
found_first:
tmp |= ~0UL << size;
if (tmp == ~0UL) /* Are any bits zero? */
return result + size; /* Nope. Skip ffz */
found_middle:
return result + ffz(tmp);
found_middle_swap:
return result + ffz(ext2_swab(tmp));
}
EXPORT_SYMBOL(find_next_zero_bit_le);
#endif
#ifndef find_next_bit_le
unsigned long find_next_bit_le(const void *addr, unsigned
long size, unsigned long offset)
{
const unsigned long *p = addr;
unsigned long result = offset & ~(BITS_PER_LONG - 1);
unsigned long tmp;
if (offset >= size)
return size;
p += BITOP_WORD(offset);
size -= result;
offset &= (BITS_PER_LONG - 1UL);
if (offset) {
tmp = ext2_swabp(p++);
tmp &= (~0UL << offset);
if (size < BITS_PER_LONG)
goto found_first;
if (tmp)
goto found_middle;
size -= BITS_PER_LONG;
result += BITS_PER_LONG;
}
while (size & ~(BITS_PER_LONG - 1)) {
tmp = *(p++);
if (tmp)
goto found_middle_swap;
result += BITS_PER_LONG;
size -= BITS_PER_LONG;
}
if (!size)
return result;
tmp = ext2_swabp(p);
found_first:
tmp &= (~0UL >> (BITS_PER_LONG - size));
if (tmp == 0UL) /* Are any bits set? */
return result + size; /* Nope. */
found_middle:
return result + __ffs(tmp);
found_middle_swap:
return result + __ffs(ext2_swab(tmp));
}
EXPORT_SYMBOL(find_next_bit_le);
#endif
#endif /* __BIG_ENDIAN */

270
lib/iommu-common.c Normal file
View File

@@ -0,0 +1,270 @@
/*
* IOMMU mmap management and range allocation functions.
* Based almost entirely upon the powerpc iommu allocator.
*/
#include <linux/export.h>
#include <linux/bitmap.h>
#include <linux/bug.h>
#include <linux/iommu-helper.h>
#include <linux/iommu-common.h>
#include <linux/dma-mapping.h>
#include <linux/hash.h>
#ifndef DMA_ERROR_CODE
#define DMA_ERROR_CODE (~(dma_addr_t)0x0)
#endif
static unsigned long iommu_large_alloc = 15;
static DEFINE_PER_CPU(unsigned int, iommu_hash_common);
static inline bool need_flush(struct iommu_map_table *iommu)
{
return (iommu->lazy_flush != NULL &&
(iommu->flags & IOMMU_NEED_FLUSH) != 0);
}
static inline void set_flush(struct iommu_map_table *iommu)
{
iommu->flags |= IOMMU_NEED_FLUSH;
}
static inline void clear_flush(struct iommu_map_table *iommu)
{
iommu->flags &= ~IOMMU_NEED_FLUSH;
}
static void setup_iommu_pool_hash(void)
{
unsigned int i;
static bool do_once;
if (do_once)
return;
do_once = true;
for_each_possible_cpu(i)
per_cpu(iommu_hash_common, i) = hash_32(i, IOMMU_POOL_HASHBITS);
}
/*
* Initialize iommu_pool entries for the iommu_map_table. `num_entries'
* is the number of table entries. If `large_pool' is set to true,
* the top 1/4 of the table will be set aside for pool allocations
* of more than iommu_large_alloc pages.
*/
void iommu_tbl_pool_init(struct iommu_map_table *iommu,
unsigned long num_entries,
u32 table_shift,
void (*lazy_flush)(struct iommu_map_table *),
bool large_pool, u32 npools,
bool skip_span_boundary_check)
{
unsigned int start, i;
struct iommu_pool *p = &(iommu->large_pool);
setup_iommu_pool_hash();
if (npools == 0)
iommu->nr_pools = IOMMU_NR_POOLS;
else
iommu->nr_pools = npools;
BUG_ON(npools > IOMMU_NR_POOLS);
iommu->table_shift = table_shift;
iommu->lazy_flush = lazy_flush;
start = 0;
if (skip_span_boundary_check)
iommu->flags |= IOMMU_NO_SPAN_BOUND;
if (large_pool)
iommu->flags |= IOMMU_HAS_LARGE_POOL;
if (!large_pool)
iommu->poolsize = num_entries/iommu->nr_pools;
else
iommu->poolsize = (num_entries * 3 / 4)/iommu->nr_pools;
for (i = 0; i < iommu->nr_pools; i++) {
spin_lock_init(&(iommu->pools[i].lock));
iommu->pools[i].start = start;
iommu->pools[i].hint = start;
start += iommu->poolsize; /* start for next pool */
iommu->pools[i].end = start - 1;
}
if (!large_pool)
return;
/* initialize large_pool */
spin_lock_init(&(p->lock));
p->start = start;
p->hint = p->start;
p->end = num_entries;
}
EXPORT_SYMBOL(iommu_tbl_pool_init);
unsigned long iommu_tbl_range_alloc(struct device *dev,
struct iommu_map_table *iommu,
unsigned long npages,
unsigned long *handle,
unsigned long mask,
unsigned int align_order)
{
unsigned int pool_hash = __this_cpu_read(iommu_hash_common);
unsigned long n, end, start, limit, boundary_size;
struct iommu_pool *pool;
int pass = 0;
unsigned int pool_nr;
unsigned int npools = iommu->nr_pools;
unsigned long flags;
bool large_pool = ((iommu->flags & IOMMU_HAS_LARGE_POOL) != 0);
bool largealloc = (large_pool && npages > iommu_large_alloc);
unsigned long shift;
unsigned long align_mask = 0;
if (align_order > 0)
align_mask = 0xffffffffffffffffl >> (64 - align_order);
/* Sanity check */
if (unlikely(npages == 0)) {
WARN_ON_ONCE(1);
return DMA_ERROR_CODE;
}
if (largealloc) {
pool = &(iommu->large_pool);
pool_nr = 0; /* to keep compiler happy */
} else {
/* pick out pool_nr */
pool_nr = pool_hash & (npools - 1);
pool = &(iommu->pools[pool_nr]);
}
spin_lock_irqsave(&pool->lock, flags);
again:
if (pass == 0 && handle && *handle &&
(*handle >= pool->start) && (*handle < pool->end))
start = *handle;
else
start = pool->hint;
limit = pool->end;
/* The case below can happen if we have a small segment appended
* to a large, or when the previous alloc was at the very end of
* the available space. If so, go back to the beginning. If a
* flush is needed, it will get done based on the return value
* from iommu_area_alloc() below.
*/
if (start >= limit)
start = pool->start;
shift = iommu->table_map_base >> iommu->table_shift;
if (limit + shift > mask) {
limit = mask - shift + 1;
/* If we're constrained on address range, first try
* at the masked hint to avoid O(n) search complexity,
* but on second pass, start at 0 in pool 0.
*/
if ((start & mask) >= limit || pass > 0) {
spin_unlock(&(pool->lock));
pool = &(iommu->pools[0]);
spin_lock(&(pool->lock));
start = pool->start;
} else {
start &= mask;
}
}
if (dev)
boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
1 << iommu->table_shift);
else
boundary_size = ALIGN(1ULL << 32, 1 << iommu->table_shift);
boundary_size = boundary_size >> iommu->table_shift;
/*
* if the skip_span_boundary_check had been set during init, we set
* things up so that iommu_is_span_boundary() merely checks if the
* (index + npages) < num_tsb_entries
*/
if ((iommu->flags & IOMMU_NO_SPAN_BOUND) != 0) {
shift = 0;
boundary_size = iommu->poolsize * iommu->nr_pools;
}
n = iommu_area_alloc(iommu->map, limit, start, npages, shift,
boundary_size, align_mask);
if (n == -1) {
if (likely(pass == 0)) {
/* First failure, rescan from the beginning. */
pool->hint = pool->start;
set_flush(iommu);
pass++;
goto again;
} else if (!largealloc && pass <= iommu->nr_pools) {
spin_unlock(&(pool->lock));
pool_nr = (pool_nr + 1) & (iommu->nr_pools - 1);
pool = &(iommu->pools[pool_nr]);
spin_lock(&(pool->lock));
pool->hint = pool->start;
set_flush(iommu);
pass++;
goto again;
} else {
/* give up */
n = DMA_ERROR_CODE;
goto bail;
}
}
if (n < pool->hint || need_flush(iommu)) {
clear_flush(iommu);
iommu->lazy_flush(iommu);
}
end = n + npages;
pool->hint = end;
/* Update handle for SG allocations */
if (handle)
*handle = end;
bail:
spin_unlock_irqrestore(&(pool->lock), flags);
return n;
}
EXPORT_SYMBOL(iommu_tbl_range_alloc);
static struct iommu_pool *get_pool(struct iommu_map_table *tbl,
unsigned long entry)
{
struct iommu_pool *p;
unsigned long largepool_start = tbl->large_pool.start;
bool large_pool = ((tbl->flags & IOMMU_HAS_LARGE_POOL) != 0);
/* The large pool is the last pool at the top of the table */
if (large_pool && entry >= largepool_start) {
p = &tbl->large_pool;
} else {
unsigned int pool_nr = entry / tbl->poolsize;
BUG_ON(pool_nr >= tbl->nr_pools);
p = &tbl->pools[pool_nr];
}
return p;
}
/* Caller supplies the index of the entry into the iommu map table
* itself when the mapping from dma_addr to the entry is not the
* default addr->entry mapping below.
*/
void iommu_tbl_range_free(struct iommu_map_table *iommu, u64 dma_addr,
unsigned long npages, unsigned long entry)
{
struct iommu_pool *pool;
unsigned long flags;
unsigned long shift = iommu->table_shift;
if (entry == DMA_ERROR_CODE) /* use default addr->entry mapping */
entry = (dma_addr - iommu->table_map_base) >> shift;
pool = get_pool(iommu, entry);
spin_lock_irqsave(&(pool->lock), flags);
bitmap_clear(iommu->map, entry, npages);
spin_unlock_irqrestore(&(pool->lock), flags);
}
EXPORT_SYMBOL(iommu_tbl_range_free);

View File

@@ -131,11 +131,12 @@ static inline const struct raid6_recov_calls *raid6_choose_recov(void)
static inline const struct raid6_calls *raid6_choose_gen(
void *(*const dptrs)[(65536/PAGE_SIZE)+2], const int disks)
{
unsigned long perf, bestperf, j0, j1;
unsigned long perf, bestgenperf, bestxorperf, j0, j1;
int start = (disks>>1)-1, stop = disks-3; /* work on the second half of the disks */
const struct raid6_calls *const *algo;
const struct raid6_calls *best;
for (bestperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) {
for (bestgenperf = 0, bestxorperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) {
if (!best || (*algo)->prefer >= best->prefer) {
if ((*algo)->valid && !(*algo)->valid())
continue;
@@ -153,19 +154,45 @@ static inline const struct raid6_calls *raid6_choose_gen(
}
preempt_enable();
if (perf > bestperf) {
bestperf = perf;
if (perf > bestgenperf) {
bestgenperf = perf;
best = *algo;
}
pr_info("raid6: %-8s %5ld MB/s\n", (*algo)->name,
pr_info("raid6: %-8s gen() %5ld MB/s\n", (*algo)->name,
(perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
if (!(*algo)->xor_syndrome)
continue;
perf = 0;
preempt_disable();
j0 = jiffies;
while ((j1 = jiffies) == j0)
cpu_relax();
while (time_before(jiffies,
j1 + (1<<RAID6_TIME_JIFFIES_LG2))) {
(*algo)->xor_syndrome(disks, start, stop,
PAGE_SIZE, *dptrs);
perf++;
}
preempt_enable();
if (best == *algo)
bestxorperf = perf;
pr_info("raid6: %-8s xor() %5ld MB/s\n", (*algo)->name,
(perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2+1));
}
}
if (best) {
pr_info("raid6: using algorithm %s (%ld MB/s)\n",
pr_info("raid6: using algorithm %s gen() %ld MB/s\n",
best->name,
(bestperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
(bestgenperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
if (best->xor_syndrome)
pr_info("raid6: .... xor() %ld MB/s, rmw enabled\n",
(bestxorperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2+1));
raid6_call = *best;
} else
pr_err("raid6: Yikes! No algorithm found!\n");

View File

@@ -119,6 +119,7 @@ int raid6_have_altivec(void)
const struct raid6_calls raid6_altivec$# = {
raid6_altivec$#_gen_syndrome,
NULL, /* XOR not yet implemented */
raid6_have_altivec,
"altivecx$#",
0

View File

@@ -89,6 +89,7 @@ static void raid6_avx21_gen_syndrome(int disks, size_t bytes, void **ptrs)
const struct raid6_calls raid6_avx2x1 = {
raid6_avx21_gen_syndrome,
NULL, /* XOR not yet implemented */
raid6_have_avx2,
"avx2x1",
1 /* Has cache hints */
@@ -150,6 +151,7 @@ static void raid6_avx22_gen_syndrome(int disks, size_t bytes, void **ptrs)
const struct raid6_calls raid6_avx2x2 = {
raid6_avx22_gen_syndrome,
NULL, /* XOR not yet implemented */
raid6_have_avx2,
"avx2x2",
1 /* Has cache hints */
@@ -242,6 +244,7 @@ static void raid6_avx24_gen_syndrome(int disks, size_t bytes, void **ptrs)
const struct raid6_calls raid6_avx2x4 = {
raid6_avx24_gen_syndrome,
NULL, /* XOR not yet implemented */
raid6_have_avx2,
"avx2x4",
1 /* Has cache hints */

View File

@@ -107,9 +107,48 @@ static void raid6_int$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
}
}
static void raid6_int$#_xor_syndrome(int disks, int start, int stop,
size_t bytes, void **ptrs)
{
u8 **dptr = (u8 **)ptrs;
u8 *p, *q;
int d, z, z0;
unative_t wd$$, wq$$, wp$$, w1$$, w2$$;
z0 = stop; /* P/Q right side optimization */
p = dptr[disks-2]; /* XOR parity */
q = dptr[disks-1]; /* RS syndrome */
for ( d = 0 ; d < bytes ; d += NSIZE*$# ) {
/* P/Q data pages */
wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE];
for ( z = z0-1 ; z >= start ; z-- ) {
wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
wp$$ ^= wd$$;
w2$$ = MASK(wq$$);
w1$$ = SHLBYTE(wq$$);
w2$$ &= NBYTES(0x1d);
w1$$ ^= w2$$;
wq$$ = w1$$ ^ wd$$;
}
/* P/Q left side optimization */
for ( z = start-1 ; z >= 0 ; z-- ) {
w2$$ = MASK(wq$$);
w1$$ = SHLBYTE(wq$$);
w2$$ &= NBYTES(0x1d);
wq$$ = w1$$ ^ w2$$;
}
*(unative_t *)&p[d+NSIZE*$$] ^= wp$$;
*(unative_t *)&q[d+NSIZE*$$] ^= wq$$;
}
}
const struct raid6_calls raid6_intx$# = {
raid6_int$#_gen_syndrome,
NULL, /* always valid */
raid6_int$#_xor_syndrome,
NULL, /* always valid */
"int" NSTRING "x$#",
0
};

View File

@@ -76,6 +76,7 @@ static void raid6_mmx1_gen_syndrome(int disks, size_t bytes, void **ptrs)
const struct raid6_calls raid6_mmxx1 = {
raid6_mmx1_gen_syndrome,
NULL, /* XOR not yet implemented */
raid6_have_mmx,
"mmxx1",
0
@@ -134,6 +135,7 @@ static void raid6_mmx2_gen_syndrome(int disks, size_t bytes, void **ptrs)
const struct raid6_calls raid6_mmxx2 = {
raid6_mmx2_gen_syndrome,
NULL, /* XOR not yet implemented */
raid6_have_mmx,
"mmxx2",
0

View File

@@ -42,6 +42,7 @@
} \
struct raid6_calls const raid6_neonx ## _n = { \
raid6_neon ## _n ## _gen_syndrome, \
NULL, /* XOR not yet implemented */ \
raid6_have_neon, \
"neonx" #_n, \
0 \

View File

@@ -92,6 +92,7 @@ static void raid6_sse11_gen_syndrome(int disks, size_t bytes, void **ptrs)
const struct raid6_calls raid6_sse1x1 = {
raid6_sse11_gen_syndrome,
NULL, /* XOR not yet implemented */
raid6_have_sse1_or_mmxext,
"sse1x1",
1 /* Has cache hints */
@@ -154,6 +155,7 @@ static void raid6_sse12_gen_syndrome(int disks, size_t bytes, void **ptrs)
const struct raid6_calls raid6_sse1x2 = {
raid6_sse12_gen_syndrome,
NULL, /* XOR not yet implemented */
raid6_have_sse1_or_mmxext,
"sse1x2",
1 /* Has cache hints */

View File

@@ -88,8 +88,58 @@ static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs)
kernel_fpu_end();
}
static void raid6_sse21_xor_syndrome(int disks, int start, int stop,
size_t bytes, void **ptrs)
{
u8 **dptr = (u8 **)ptrs;
u8 *p, *q;
int d, z, z0;
z0 = stop; /* P/Q right side optimization */
p = dptr[disks-2]; /* XOR parity */
q = dptr[disks-1]; /* RS syndrome */
kernel_fpu_begin();
asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
for ( d = 0 ; d < bytes ; d += 16 ) {
asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
asm volatile("pxor %xmm4,%xmm2");
/* P/Q data pages */
for ( z = z0-1 ; z >= start ; z-- ) {
asm volatile("pxor %xmm5,%xmm5");
asm volatile("pcmpgtb %xmm4,%xmm5");
asm volatile("paddb %xmm4,%xmm4");
asm volatile("pand %xmm0,%xmm5");
asm volatile("pxor %xmm5,%xmm4");
asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
asm volatile("pxor %xmm5,%xmm2");
asm volatile("pxor %xmm5,%xmm4");
}
/* P/Q left side optimization */
for ( z = start-1 ; z >= 0 ; z-- ) {
asm volatile("pxor %xmm5,%xmm5");
asm volatile("pcmpgtb %xmm4,%xmm5");
asm volatile("paddb %xmm4,%xmm4");
asm volatile("pand %xmm0,%xmm5");
asm volatile("pxor %xmm5,%xmm4");
}
asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
/* Don't use movntdq for r/w memory area < cache line */
asm volatile("movdqa %%xmm4,%0" : "=m" (q[d]));
asm volatile("movdqa %%xmm2,%0" : "=m" (p[d]));
}
asm volatile("sfence" : : : "memory");
kernel_fpu_end();
}
const struct raid6_calls raid6_sse2x1 = {
raid6_sse21_gen_syndrome,
raid6_sse21_xor_syndrome,
raid6_have_sse2,
"sse2x1",
1 /* Has cache hints */
@@ -150,8 +200,76 @@ static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs)
kernel_fpu_end();
}
static void raid6_sse22_xor_syndrome(int disks, int start, int stop,
size_t bytes, void **ptrs)
{
u8 **dptr = (u8 **)ptrs;
u8 *p, *q;
int d, z, z0;
z0 = stop; /* P/Q right side optimization */
p = dptr[disks-2]; /* XOR parity */
q = dptr[disks-1]; /* RS syndrome */
kernel_fpu_begin();
asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
for ( d = 0 ; d < bytes ; d += 32 ) {
asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
asm volatile("movdqa %0,%%xmm6" :: "m" (dptr[z0][d+16]));
asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
asm volatile("movdqa %0,%%xmm3" : : "m" (p[d+16]));
asm volatile("pxor %xmm4,%xmm2");
asm volatile("pxor %xmm6,%xmm3");
/* P/Q data pages */
for ( z = z0-1 ; z >= start ; z-- ) {
asm volatile("pxor %xmm5,%xmm5");
asm volatile("pxor %xmm7,%xmm7");
asm volatile("pcmpgtb %xmm4,%xmm5");
asm volatile("pcmpgtb %xmm6,%xmm7");
asm volatile("paddb %xmm4,%xmm4");
asm volatile("paddb %xmm6,%xmm6");
asm volatile("pand %xmm0,%xmm5");
asm volatile("pand %xmm0,%xmm7");
asm volatile("pxor %xmm5,%xmm4");
asm volatile("pxor %xmm7,%xmm6");
asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
asm volatile("pxor %xmm5,%xmm2");
asm volatile("pxor %xmm7,%xmm3");
asm volatile("pxor %xmm5,%xmm4");
asm volatile("pxor %xmm7,%xmm6");
}
/* P/Q left side optimization */
for ( z = start-1 ; z >= 0 ; z-- ) {
asm volatile("pxor %xmm5,%xmm5");
asm volatile("pxor %xmm7,%xmm7");
asm volatile("pcmpgtb %xmm4,%xmm5");
asm volatile("pcmpgtb %xmm6,%xmm7");
asm volatile("paddb %xmm4,%xmm4");
asm volatile("paddb %xmm6,%xmm6");
asm volatile("pand %xmm0,%xmm5");
asm volatile("pand %xmm0,%xmm7");
asm volatile("pxor %xmm5,%xmm4");
asm volatile("pxor %xmm7,%xmm6");
}
asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
asm volatile("pxor %0,%%xmm6" : : "m" (q[d+16]));
/* Don't use movntdq for r/w memory area < cache line */
asm volatile("movdqa %%xmm4,%0" : "=m" (q[d]));
asm volatile("movdqa %%xmm6,%0" : "=m" (q[d+16]));
asm volatile("movdqa %%xmm2,%0" : "=m" (p[d]));
asm volatile("movdqa %%xmm3,%0" : "=m" (p[d+16]));
}
asm volatile("sfence" : : : "memory");
kernel_fpu_end();
}
const struct raid6_calls raid6_sse2x2 = {
raid6_sse22_gen_syndrome,
raid6_sse22_xor_syndrome,
raid6_have_sse2,
"sse2x2",
1 /* Has cache hints */
@@ -248,8 +366,117 @@ static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs)
kernel_fpu_end();
}
static void raid6_sse24_xor_syndrome(int disks, int start, int stop,
size_t bytes, void **ptrs)
{
u8 **dptr = (u8 **)ptrs;
u8 *p, *q;
int d, z, z0;
z0 = stop; /* P/Q right side optimization */
p = dptr[disks-2]; /* XOR parity */
q = dptr[disks-1]; /* RS syndrome */
kernel_fpu_begin();
asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0]));
for ( d = 0 ; d < bytes ; d += 64 ) {
asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
asm volatile("movdqa %0,%%xmm6" :: "m" (dptr[z0][d+16]));
asm volatile("movdqa %0,%%xmm12" :: "m" (dptr[z0][d+32]));
asm volatile("movdqa %0,%%xmm14" :: "m" (dptr[z0][d+48]));
asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
asm volatile("movdqa %0,%%xmm3" : : "m" (p[d+16]));
asm volatile("movdqa %0,%%xmm10" : : "m" (p[d+32]));
asm volatile("movdqa %0,%%xmm11" : : "m" (p[d+48]));
asm volatile("pxor %xmm4,%xmm2");
asm volatile("pxor %xmm6,%xmm3");
asm volatile("pxor %xmm12,%xmm10");
asm volatile("pxor %xmm14,%xmm11");
/* P/Q data pages */
for ( z = z0-1 ; z >= start ; z-- ) {
asm volatile("prefetchnta %0" :: "m" (dptr[z][d]));
asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32]));
asm volatile("pxor %xmm5,%xmm5");
asm volatile("pxor %xmm7,%xmm7");
asm volatile("pxor %xmm13,%xmm13");
asm volatile("pxor %xmm15,%xmm15");
asm volatile("pcmpgtb %xmm4,%xmm5");
asm volatile("pcmpgtb %xmm6,%xmm7");
asm volatile("pcmpgtb %xmm12,%xmm13");
asm volatile("pcmpgtb %xmm14,%xmm15");
asm volatile("paddb %xmm4,%xmm4");
asm volatile("paddb %xmm6,%xmm6");
asm volatile("paddb %xmm12,%xmm12");
asm volatile("paddb %xmm14,%xmm14");
asm volatile("pand %xmm0,%xmm5");
asm volatile("pand %xmm0,%xmm7");
asm volatile("pand %xmm0,%xmm13");
asm volatile("pand %xmm0,%xmm15");
asm volatile("pxor %xmm5,%xmm4");
asm volatile("pxor %xmm7,%xmm6");
asm volatile("pxor %xmm13,%xmm12");
asm volatile("pxor %xmm15,%xmm14");
asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32]));
asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48]));
asm volatile("pxor %xmm5,%xmm2");
asm volatile("pxor %xmm7,%xmm3");
asm volatile("pxor %xmm13,%xmm10");
asm volatile("pxor %xmm15,%xmm11");
asm volatile("pxor %xmm5,%xmm4");
asm volatile("pxor %xmm7,%xmm6");
asm volatile("pxor %xmm13,%xmm12");
asm volatile("pxor %xmm15,%xmm14");
}
asm volatile("prefetchnta %0" :: "m" (q[d]));
asm volatile("prefetchnta %0" :: "m" (q[d+32]));
/* P/Q left side optimization */
for ( z = start-1 ; z >= 0 ; z-- ) {
asm volatile("pxor %xmm5,%xmm5");
asm volatile("pxor %xmm7,%xmm7");
asm volatile("pxor %xmm13,%xmm13");
asm volatile("pxor %xmm15,%xmm15");
asm volatile("pcmpgtb %xmm4,%xmm5");
asm volatile("pcmpgtb %xmm6,%xmm7");
asm volatile("pcmpgtb %xmm12,%xmm13");
asm volatile("pcmpgtb %xmm14,%xmm15");
asm volatile("paddb %xmm4,%xmm4");
asm volatile("paddb %xmm6,%xmm6");
asm volatile("paddb %xmm12,%xmm12");
asm volatile("paddb %xmm14,%xmm14");
asm volatile("pand %xmm0,%xmm5");
asm volatile("pand %xmm0,%xmm7");
asm volatile("pand %xmm0,%xmm13");
asm volatile("pand %xmm0,%xmm15");
asm volatile("pxor %xmm5,%xmm4");
asm volatile("pxor %xmm7,%xmm6");
asm volatile("pxor %xmm13,%xmm12");
asm volatile("pxor %xmm15,%xmm14");
}
asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32]));
asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48]));
asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
asm volatile("pxor %0,%%xmm6" : : "m" (q[d+16]));
asm volatile("pxor %0,%%xmm12" : : "m" (q[d+32]));
asm volatile("pxor %0,%%xmm14" : : "m" (q[d+48]));
asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32]));
asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48]));
}
asm volatile("sfence" : : : "memory");
kernel_fpu_end();
}
const struct raid6_calls raid6_sse2x4 = {
raid6_sse24_gen_syndrome,
raid6_sse24_xor_syndrome,
raid6_have_sse2,
"sse2x4",
1 /* Has cache hints */

View File

@@ -28,11 +28,11 @@ char *dataptrs[NDISKS];
char data[NDISKS][PAGE_SIZE];
char recovi[PAGE_SIZE], recovj[PAGE_SIZE];
static void makedata(void)
static void makedata(int start, int stop)
{
int i, j;
for (i = 0; i < NDISKS; i++) {
for (i = start; i <= stop; i++) {
for (j = 0; j < PAGE_SIZE; j++)
data[i][j] = rand();
@@ -91,34 +91,55 @@ int main(int argc, char *argv[])
{
const struct raid6_calls *const *algo;
const struct raid6_recov_calls *const *ra;
int i, j;
int i, j, p1, p2;
int err = 0;
makedata();
makedata(0, NDISKS-1);
for (ra = raid6_recov_algos; *ra; ra++) {
if ((*ra)->valid && !(*ra)->valid())
continue;
raid6_2data_recov = (*ra)->data2;
raid6_datap_recov = (*ra)->datap;
printf("using recovery %s\n", (*ra)->name);
for (algo = raid6_algos; *algo; algo++) {
if (!(*algo)->valid || (*algo)->valid()) {
raid6_call = **algo;
if ((*algo)->valid && !(*algo)->valid())
continue;
/* Nuke syndromes */
memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE);
raid6_call = **algo;
/* Generate assumed good syndrome */
raid6_call.gen_syndrome(NDISKS, PAGE_SIZE,
(void **)&dataptrs);
/* Nuke syndromes */
memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE);
/* Generate assumed good syndrome */
raid6_call.gen_syndrome(NDISKS, PAGE_SIZE,
(void **)&dataptrs);
for (i = 0; i < NDISKS-1; i++)
for (j = i+1; j < NDISKS; j++)
err += test_disks(i, j);
if (!raid6_call.xor_syndrome)
continue;
for (p1 = 0; p1 < NDISKS-2; p1++)
for (p2 = p1; p2 < NDISKS-2; p2++) {
/* Simulate rmw run */
raid6_call.xor_syndrome(NDISKS, p1, p2, PAGE_SIZE,
(void **)&dataptrs);
makedata(p1, p2);
raid6_call.xor_syndrome(NDISKS, p1, p2, PAGE_SIZE,
(void **)&dataptrs);
for (i = 0; i < NDISKS-1; i++)
for (j = i+1; j < NDISKS; j++)
err += test_disks(i, j);
}
for (i = 0; i < NDISKS-1; i++)
for (j = i+1; j < NDISKS; j++)
err += test_disks(i, j);
}
}
printf("\n");
}

View File

@@ -80,6 +80,7 @@ void raid6_tilegx$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
const struct raid6_calls raid6_tilegx$# = {
raid6_tilegx$#_gen_syndrome,
NULL, /* XOR not yet implemented */
NULL,
"tilegx$#",
0

View File

@@ -4,6 +4,7 @@
* Copyright 31 August 2008 James Bottomley
* Copyright (C) 2013, Intel Corporation
*/
#include <linux/bug.h>
#include <linux/kernel.h>
#include <linux/math64.h>
#include <linux/export.h>
@@ -14,7 +15,8 @@
/**
* string_get_size - get the size in the specified units
* @size: The size to be converted
* @size: The size to be converted in blocks
* @blk_size: Size of the block (use 1 for size in bytes)
* @units: units to use (powers of 1000 or 1024)
* @buf: buffer to format to
* @len: length of buffer
@@ -24,14 +26,14 @@
* at least 9 bytes and will always be zero terminated.
*
*/
void string_get_size(u64 size, const enum string_size_units units,
void string_get_size(u64 size, u64 blk_size, const enum string_size_units units,
char *buf, int len)
{
static const char *const units_10[] = {
"B", "kB", "MB", "GB", "TB", "PB", "EB"
"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"
};
static const char *const units_2[] = {
"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB"
"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"
};
static const char *const *const units_str[] = {
[STRING_UNITS_10] = units_10,
@@ -42,31 +44,57 @@ void string_get_size(u64 size, const enum string_size_units units,
[STRING_UNITS_2] = 1024,
};
int i, j;
u32 remainder = 0, sf_cap;
u32 remainder = 0, sf_cap, exp;
char tmp[8];
const char *unit;
tmp[0] = '\0';
i = 0;
if (size >= divisor[units]) {
while (size >= divisor[units]) {
remainder = do_div(size, divisor[units]);
i++;
}
if (!size)
goto out;
sf_cap = size;
for (j = 0; sf_cap*10 < 1000; j++)
sf_cap *= 10;
if (j) {
remainder *= 1000;
remainder /= divisor[units];
snprintf(tmp, sizeof(tmp), ".%03u", remainder);
tmp[j+1] = '\0';
}
while (blk_size >= divisor[units]) {
remainder = do_div(blk_size, divisor[units]);
i++;
}
exp = divisor[units] / (u32)blk_size;
if (size >= exp) {
remainder = do_div(size, divisor[units]);
remainder *= blk_size;
i++;
} else {
remainder *= size;
}
size *= blk_size;
size += remainder / divisor[units];
remainder %= divisor[units];
while (size >= divisor[units]) {
remainder = do_div(size, divisor[units]);
i++;
}
sf_cap = size;
for (j = 0; sf_cap*10 < 1000; j++)
sf_cap *= 10;
if (j) {
remainder *= 1000;
remainder /= divisor[units];
snprintf(tmp, sizeof(tmp), ".%03u", remainder);
tmp[j+1] = '\0';
}
out:
if (i >= ARRAY_SIZE(units_2))
unit = "UNK";
else
unit = units_str[units][i];
snprintf(buf, len, "%u%s %s", (u32)size,
tmp, units_str[units][i]);
tmp, unit);
}
EXPORT_SYMBOL(string_get_size);

View File

@@ -48,7 +48,7 @@ static void __init test_hexdump(size_t len, int rowsize, int groupsize,
char test[32 * 3 + 2 + 32 + 1];
char real[32 * 3 + 2 + 32 + 1];
char *p;
const char **result;
const char * const *result;
size_t l = len;
int gs = groupsize, rs = rowsize;
unsigned int i;

View File

@@ -33,6 +33,7 @@
#include <asm/page.h> /* for PAGE_SIZE */
#include <asm/sections.h> /* for dereference_function_descriptor() */
#include <asm/byteorder.h> /* cpu_to_le16 */
#include <linux/string_helpers.h>
#include "kstrtox.h"
@@ -122,142 +123,145 @@ int skip_atoi(const char **s)
return i;
}
/* Decimal conversion is by far the most typical, and is used
* for /proc and /sys data. This directly impacts e.g. top performance
* with many processes running. We optimize it for speed
* using ideas described at <http://www.cs.uiowa.edu/~jones/bcd/divide.html>
* (with permission from the author, Douglas W. Jones).
/*
* Decimal conversion is by far the most typical, and is used for
* /proc and /sys data. This directly impacts e.g. top performance
* with many processes running. We optimize it for speed by emitting
* two characters at a time, using a 200 byte lookup table. This
* roughly halves the number of multiplications compared to computing
* the digits one at a time. Implementation strongly inspired by the
* previous version, which in turn used ideas described at
* <http://www.cs.uiowa.edu/~jones/bcd/divide.html> (with permission
* from the author, Douglas W. Jones).
*
* It turns out there is precisely one 26 bit fixed-point
* approximation a of 64/100 for which x/100 == (x * (u64)a) >> 32
* holds for all x in [0, 10^8-1], namely a = 0x28f5c29. The actual
* range happens to be somewhat larger (x <= 1073741898), but that's
* irrelevant for our purpose.
*
* For dividing a number in the range [10^4, 10^6-1] by 100, we still
* need a 32x32->64 bit multiply, so we simply use the same constant.
*
* For dividing a number in the range [100, 10^4-1] by 100, there are
* several options. The simplest is (x * 0x147b) >> 19, which is valid
* for all x <= 43698.
*/
#if BITS_PER_LONG != 32 || BITS_PER_LONG_LONG != 64
/* Formats correctly any integer in [0, 999999999] */
static noinline_for_stack
char *put_dec_full9(char *buf, unsigned q)
{
unsigned r;
static const u16 decpair[100] = {
#define _(x) (__force u16) cpu_to_le16(((x % 10) | ((x / 10) << 8)) + 0x3030)
_( 0), _( 1), _( 2), _( 3), _( 4), _( 5), _( 6), _( 7), _( 8), _( 9),
_(10), _(11), _(12), _(13), _(14), _(15), _(16), _(17), _(18), _(19),
_(20), _(21), _(22), _(23), _(24), _(25), _(26), _(27), _(28), _(29),
_(30), _(31), _(32), _(33), _(34), _(35), _(36), _(37), _(38), _(39),
_(40), _(41), _(42), _(43), _(44), _(45), _(46), _(47), _(48), _(49),
_(50), _(51), _(52), _(53), _(54), _(55), _(56), _(57), _(58), _(59),
_(60), _(61), _(62), _(63), _(64), _(65), _(66), _(67), _(68), _(69),
_(70), _(71), _(72), _(73), _(74), _(75), _(76), _(77), _(78), _(79),
_(80), _(81), _(82), _(83), _(84), _(85), _(86), _(87), _(88), _(89),
_(90), _(91), _(92), _(93), _(94), _(95), _(96), _(97), _(98), _(99),
#undef _
};
/*
* Possible ways to approx. divide by 10
* (x * 0x1999999a) >> 32 x < 1073741829 (multiply must be 64-bit)
* (x * 0xcccd) >> 19 x < 81920 (x < 262149 when 64-bit mul)
* (x * 0x6667) >> 18 x < 43699
* (x * 0x3334) >> 17 x < 16389
* (x * 0x199a) >> 16 x < 16389
* (x * 0x0ccd) >> 15 x < 16389
* (x * 0x0667) >> 14 x < 2739
* (x * 0x0334) >> 13 x < 1029
* (x * 0x019a) >> 12 x < 1029
* (x * 0x00cd) >> 11 x < 1029 shorter code than * 0x67 (on i386)
* (x * 0x0067) >> 10 x < 179
* (x * 0x0034) >> 9 x < 69 same
* (x * 0x001a) >> 8 x < 69 same
* (x * 0x000d) >> 7 x < 69 same, shortest code (on i386)
* (x * 0x0007) >> 6 x < 19
* See <http://www.cs.uiowa.edu/~jones/bcd/divide.html>
*/
r = (q * (uint64_t)0x1999999a) >> 32;
*buf++ = (q - 10 * r) + '0'; /* 1 */
q = (r * (uint64_t)0x1999999a) >> 32;
*buf++ = (r - 10 * q) + '0'; /* 2 */
r = (q * (uint64_t)0x1999999a) >> 32;
*buf++ = (q - 10 * r) + '0'; /* 3 */
q = (r * (uint64_t)0x1999999a) >> 32;
*buf++ = (r - 10 * q) + '0'; /* 4 */
r = (q * (uint64_t)0x1999999a) >> 32;
*buf++ = (q - 10 * r) + '0'; /* 5 */
/* Now value is under 10000, can avoid 64-bit multiply */
q = (r * 0x199a) >> 16;
*buf++ = (r - 10 * q) + '0'; /* 6 */
r = (q * 0xcd) >> 11;
*buf++ = (q - 10 * r) + '0'; /* 7 */
q = (r * 0xcd) >> 11;
*buf++ = (r - 10 * q) + '0'; /* 8 */
*buf++ = q + '0'; /* 9 */
return buf;
}
#endif
/* Similar to above but do not pad with zeros.
* Code can be easily arranged to print 9 digits too, but our callers
* always call put_dec_full9() instead when the number has 9 decimal digits.
*/
/*
* This will print a single '0' even if r == 0, since we would
* immediately jump to out_r where two 0s would be written but only
* one of them accounted for in buf. This is needed by ip4_string
* below. All other callers pass a non-zero value of r.
*/
static noinline_for_stack
char *put_dec_trunc8(char *buf, unsigned r)
{
unsigned q;
/* Copy of previous function's body with added early returns */
while (r >= 10000) {
q = r + '0';
r = (r * (uint64_t)0x1999999a) >> 32;
*buf++ = q - 10*r;
}
/* 1 <= r < 10^8 */
if (r < 100)
goto out_r;
q = (r * 0x199a) >> 16; /* r <= 9999 */
*buf++ = (r - 10 * q) + '0';
if (q == 0)
return buf;
r = (q * 0xcd) >> 11; /* q <= 999 */
*buf++ = (q - 10 * r) + '0';
if (r == 0)
return buf;
q = (r * 0xcd) >> 11; /* r <= 99 */
*buf++ = (r - 10 * q) + '0';
if (q == 0)
return buf;
*buf++ = q + '0'; /* q <= 9 */
/* 100 <= r < 10^8 */
q = (r * (u64)0x28f5c29) >> 32;
*((u16 *)buf) = decpair[r - 100*q];
buf += 2;
/* 1 <= q < 10^6 */
if (q < 100)
goto out_q;
/* 100 <= q < 10^6 */
r = (q * (u64)0x28f5c29) >> 32;
*((u16 *)buf) = decpair[q - 100*r];
buf += 2;
/* 1 <= r < 10^4 */
if (r < 100)
goto out_r;
/* 100 <= r < 10^4 */
q = (r * 0x147b) >> 19;
*((u16 *)buf) = decpair[r - 100*q];
buf += 2;
out_q:
/* 1 <= q < 100 */
r = q;
out_r:
/* 1 <= r < 100 */
*((u16 *)buf) = decpair[r];
buf += r < 10 ? 1 : 2;
return buf;
}
/* There are two algorithms to print larger numbers.
* One is generic: divide by 1000000000 and repeatedly print
* groups of (up to) 9 digits. It's conceptually simple,
* but requires a (unsigned long long) / 1000000000 division.
*
* Second algorithm splits 64-bit unsigned long long into 16-bit chunks,
* manipulates them cleverly and generates groups of 4 decimal digits.
* It so happens that it does NOT require long long division.
*
* If long is > 32 bits, division of 64-bit values is relatively easy,
* and we will use the first algorithm.
* If long long is > 64 bits (strange architecture with VERY large long long),
* second algorithm can't be used, and we again use the first one.
*
* Else (if long is 32 bits and long long is 64 bits) we use second one.
*/
#if BITS_PER_LONG == 64 && BITS_PER_LONG_LONG == 64
static noinline_for_stack
char *put_dec_full8(char *buf, unsigned r)
{
unsigned q;
#if BITS_PER_LONG != 32 || BITS_PER_LONG_LONG != 64
/* 0 <= r < 10^8 */
q = (r * (u64)0x28f5c29) >> 32;
*((u16 *)buf) = decpair[r - 100*q];
buf += 2;
/* First algorithm: generic */
/* 0 <= q < 10^6 */
r = (q * (u64)0x28f5c29) >> 32;
*((u16 *)buf) = decpair[q - 100*r];
buf += 2;
static
/* 0 <= r < 10^4 */
q = (r * 0x147b) >> 19;
*((u16 *)buf) = decpair[r - 100*q];
buf += 2;
/* 0 <= q < 100 */
*((u16 *)buf) = decpair[q];
buf += 2;
return buf;
}
static noinline_for_stack
char *put_dec(char *buf, unsigned long long n)
{
if (n >= 100*1000*1000) {
while (n >= 1000*1000*1000)
buf = put_dec_full9(buf, do_div(n, 1000*1000*1000));
if (n >= 100*1000*1000)
return put_dec_full9(buf, n);
}
if (n >= 100*1000*1000)
buf = put_dec_full8(buf, do_div(n, 100*1000*1000));
/* 1 <= n <= 1.6e11 */
if (n >= 100*1000*1000)
buf = put_dec_full8(buf, do_div(n, 100*1000*1000));
/* 1 <= n < 1e8 */
return put_dec_trunc8(buf, n);
}
#else
#elif BITS_PER_LONG == 32 && BITS_PER_LONG_LONG == 64
/* Second algorithm: valid only for 64-bit long longs */
/* See comment in put_dec_full9 for choice of constants */
static noinline_for_stack
void put_dec_full4(char *buf, unsigned q)
static void
put_dec_full4(char *buf, unsigned r)
{
unsigned r;
r = (q * 0xccd) >> 15;
buf[0] = (q - 10 * r) + '0';
q = (r * 0xcd) >> 11;
buf[1] = (r - 10 * q) + '0';
r = (q * 0xcd) >> 11;
buf[2] = (q - 10 * r) + '0';
buf[3] = r + '0';
unsigned q;
/* 0 <= r < 10^4 */
q = (r * 0x147b) >> 19;
*((u16 *)buf) = decpair[r - 100*q];
buf += 2;
/* 0 <= q < 100 */
*((u16 *)buf) = decpair[q];
}
/*
@@ -265,9 +269,9 @@ void put_dec_full4(char *buf, unsigned q)
* The approximation x/10000 == (x * 0x346DC5D7) >> 43
* holds for all x < 1,128,869,999. The largest value this
* helper will ever be asked to convert is 1,125,520,955.
* (d1 in the put_dec code, assuming n is all-ones).
* (second call in the put_dec code, assuming n is all-ones).
*/
static
static noinline_for_stack
unsigned put_dec_helper4(char *buf, unsigned x)
{
uint32_t q = (x * (uint64_t)0x346DC5D7) >> 43;
@@ -294,6 +298,8 @@ char *put_dec(char *buf, unsigned long long n)
d2 = (h ) & 0xffff;
d3 = (h >> 16); /* implicit "& 0xffff" */
/* n = 2^48 d3 + 2^32 d2 + 2^16 d1 + d0
= 281_4749_7671_0656 d3 + 42_9496_7296 d2 + 6_5536 d1 + d0 */
q = 656 * d3 + 7296 * d2 + 5536 * d1 + ((uint32_t)n & 0xffff);
q = put_dec_helper4(buf, q);
@@ -323,7 +329,8 @@ char *put_dec(char *buf, unsigned long long n)
*/
int num_to_str(char *buf, int size, unsigned long long num)
{
char tmp[sizeof(num) * 3];
/* put_dec requires 2-byte alignment of the buffer. */
char tmp[sizeof(num) * 3] __aligned(2);
int idx, len;
/* put_dec() may work incorrectly for num = 0 (generate "", not "0") */
@@ -384,7 +391,8 @@ static noinline_for_stack
char *number(char *buf, char *end, unsigned long long num,
struct printf_spec spec)
{
char tmp[3 * sizeof(num)];
/* put_dec requires 2-byte alignment of the buffer. */
char tmp[3 * sizeof(num)] __aligned(2);
char sign;
char locase;
int need_pfx = ((spec.flags & SPECIAL) && spec.base != 10);
@@ -944,7 +952,7 @@ char *ip4_string(char *p, const u8 *addr, const char *fmt)
break;
}
for (i = 0; i < 4; i++) {
char temp[3]; /* hold each IP quad in reverse order */
char temp[4] __aligned(2); /* hold each IP quad in reverse order */
int digits = put_dec_trunc8(temp, addr[index]) - temp;
if (leading_zeros) {
if (digits < 3)