Merge branch 'mvebu/drivers' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc
Merge the mvebu/drivers branch of the arm-soc tree which contains
just a single patch bfa1ce5f38
("bus:
mvebu-mbus: add mv_mbus_dram_info_nooverlap()") that happens to be
a prerequisite of the new marvell/cesa crypto driver.
This commit is contained in:
@@ -18,9 +18,8 @@ config HAVE_ARCH_BITREVERSE
|
||||
default n
|
||||
depends on BITREVERSE
|
||||
help
|
||||
This option provides an config for the architecture which have instruction
|
||||
can do bitreverse operation, we use the hardware instruction if the architecture
|
||||
have this capability.
|
||||
This option enables the use of hardware bit-reversal instructions on
|
||||
architectures which support such operations.
|
||||
|
||||
config RATIONAL
|
||||
bool
|
||||
@@ -403,10 +402,6 @@ config CPUMASK_OFFSTACK
|
||||
them on the stack. This is a bit more expensive, but avoids
|
||||
stack overflow.
|
||||
|
||||
config DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
|
||||
bool "Disable obsolete cpumask functions" if DEBUG_PER_CPU_MAPS
|
||||
depends on BROKEN
|
||||
|
||||
config CPU_RMAP
|
||||
bool
|
||||
depends on SMP
|
||||
|
@@ -25,7 +25,7 @@ obj-y += lockref.o
|
||||
obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
|
||||
bust_spinlocks.o kasprintf.o bitmap.o scatterlist.o \
|
||||
gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \
|
||||
bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \
|
||||
bsearch.o find_bit.o llist.o memweight.o kfifo.o \
|
||||
percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o
|
||||
obj-y += string_helpers.o
|
||||
obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
|
||||
@@ -108,7 +108,7 @@ obj-$(CONFIG_AUDIT_GENERIC) += audit.o
|
||||
obj-$(CONFIG_AUDIT_COMPAT_GENERIC) += compat_audit.o
|
||||
|
||||
obj-$(CONFIG_SWIOTLB) += swiotlb.o
|
||||
obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o
|
||||
obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o iommu-common.o
|
||||
obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o
|
||||
obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o
|
||||
obj-$(CONFIG_CPU_NOTIFIER_ERROR_INJECT) += cpu-notifier-error-inject.o
|
||||
|
30
lib/bitmap.c
30
lib/bitmap.c
@@ -42,36 +42,6 @@
|
||||
* for the best explanations of this ordering.
|
||||
*/
|
||||
|
||||
int __bitmap_empty(const unsigned long *bitmap, unsigned int bits)
|
||||
{
|
||||
unsigned int k, lim = bits/BITS_PER_LONG;
|
||||
for (k = 0; k < lim; ++k)
|
||||
if (bitmap[k])
|
||||
return 0;
|
||||
|
||||
if (bits % BITS_PER_LONG)
|
||||
if (bitmap[k] & BITMAP_LAST_WORD_MASK(bits))
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
EXPORT_SYMBOL(__bitmap_empty);
|
||||
|
||||
int __bitmap_full(const unsigned long *bitmap, unsigned int bits)
|
||||
{
|
||||
unsigned int k, lim = bits/BITS_PER_LONG;
|
||||
for (k = 0; k < lim; ++k)
|
||||
if (~bitmap[k])
|
||||
return 0;
|
||||
|
||||
if (bits % BITS_PER_LONG)
|
||||
if (~bitmap[k] & BITMAP_LAST_WORD_MASK(bits))
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
EXPORT_SYMBOL(__bitmap_full);
|
||||
|
||||
int __bitmap_equal(const unsigned long *bitmap1,
|
||||
const unsigned long *bitmap2, unsigned int bits)
|
||||
{
|
||||
|
@@ -5,27 +5,6 @@
|
||||
#include <linux/export.h>
|
||||
#include <linux/bootmem.h>
|
||||
|
||||
int __first_cpu(const cpumask_t *srcp)
|
||||
{
|
||||
return min_t(int, NR_CPUS, find_first_bit(srcp->bits, NR_CPUS));
|
||||
}
|
||||
EXPORT_SYMBOL(__first_cpu);
|
||||
|
||||
int __next_cpu(int n, const cpumask_t *srcp)
|
||||
{
|
||||
return min_t(int, NR_CPUS, find_next_bit(srcp->bits, NR_CPUS, n+1));
|
||||
}
|
||||
EXPORT_SYMBOL(__next_cpu);
|
||||
|
||||
#if NR_CPUS > 64
|
||||
int __next_cpu_nr(int n, const cpumask_t *srcp)
|
||||
{
|
||||
return min_t(int, nr_cpu_ids,
|
||||
find_next_bit(srcp->bits, nr_cpu_ids, n+1));
|
||||
}
|
||||
EXPORT_SYMBOL(__next_cpu_nr);
|
||||
#endif
|
||||
|
||||
/**
|
||||
* cpumask_next_and - get the next cpu in *src1p & *src2p
|
||||
* @n: the cpu prior to the place to search (ie. return will be > @n)
|
||||
@@ -37,10 +16,11 @@ EXPORT_SYMBOL(__next_cpu_nr);
|
||||
int cpumask_next_and(int n, const struct cpumask *src1p,
|
||||
const struct cpumask *src2p)
|
||||
{
|
||||
while ((n = cpumask_next(n, src1p)) < nr_cpu_ids)
|
||||
if (cpumask_test_cpu(n, src2p))
|
||||
break;
|
||||
return n;
|
||||
struct cpumask tmp;
|
||||
|
||||
if (cpumask_and(&tmp, src1p, src2p))
|
||||
return cpumask_next(n, &tmp);
|
||||
return nr_cpu_ids;
|
||||
}
|
||||
EXPORT_SYMBOL(cpumask_next_and);
|
||||
|
||||
@@ -89,13 +69,6 @@ bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node)
|
||||
dump_stack();
|
||||
}
|
||||
#endif
|
||||
/* FIXME: Bandaid to save us from old primitives which go to NR_CPUS. */
|
||||
if (*mask) {
|
||||
unsigned char *ptr = (unsigned char *)cpumask_bits(*mask);
|
||||
unsigned int tail;
|
||||
tail = BITS_TO_LONGS(NR_CPUS - nr_cpumask_bits) * sizeof(long);
|
||||
memset(ptr + cpumask_size() - tail, 0, tail);
|
||||
}
|
||||
|
||||
return *mask != NULL;
|
||||
}
|
||||
|
28
lib/devres.c
28
lib/devres.c
@@ -71,6 +71,34 @@ void __iomem *devm_ioremap_nocache(struct device *dev, resource_size_t offset,
|
||||
}
|
||||
EXPORT_SYMBOL(devm_ioremap_nocache);
|
||||
|
||||
/**
|
||||
* devm_ioremap_wc - Managed ioremap_wc()
|
||||
* @dev: Generic device to remap IO address for
|
||||
* @offset: BUS offset to map
|
||||
* @size: Size of map
|
||||
*
|
||||
* Managed ioremap_wc(). Map is automatically unmapped on driver detach.
|
||||
*/
|
||||
void __iomem *devm_ioremap_wc(struct device *dev, resource_size_t offset,
|
||||
resource_size_t size)
|
||||
{
|
||||
void __iomem **ptr, *addr;
|
||||
|
||||
ptr = devres_alloc(devm_ioremap_release, sizeof(*ptr), GFP_KERNEL);
|
||||
if (!ptr)
|
||||
return NULL;
|
||||
|
||||
addr = ioremap_wc(offset, size);
|
||||
if (addr) {
|
||||
*ptr = addr;
|
||||
devres_add(dev, ptr);
|
||||
} else
|
||||
devres_free(ptr);
|
||||
|
||||
return addr;
|
||||
}
|
||||
EXPORT_SYMBOL(devm_ioremap_wc);
|
||||
|
||||
/**
|
||||
* devm_iounmap - Managed iounmap()
|
||||
* @dev: Generic device to unmap for
|
||||
|
@@ -361,7 +361,7 @@ static struct dma_debug_entry *bucket_find_contain(struct hash_bucket **bucket,
|
||||
unsigned int range = 0;
|
||||
|
||||
while (range <= max_range) {
|
||||
entry = __hash_bucket_find(*bucket, &index, containing_match);
|
||||
entry = __hash_bucket_find(*bucket, ref, containing_match);
|
||||
|
||||
if (entry)
|
||||
return entry;
|
||||
|
193
lib/find_bit.c
Normal file
193
lib/find_bit.c
Normal file
@@ -0,0 +1,193 @@
|
||||
/* bit search implementation
|
||||
*
|
||||
* Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
|
||||
* Written by David Howells (dhowells@redhat.com)
|
||||
*
|
||||
* Copyright (C) 2008 IBM Corporation
|
||||
* 'find_last_bit' is written by Rusty Russell <rusty@rustcorp.com.au>
|
||||
* (Inspired by David Howell's find_next_bit implementation)
|
||||
*
|
||||
* Rewritten by Yury Norov <yury.norov@gmail.com> to decrease
|
||||
* size and improve performance, 2015.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*/
|
||||
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/bitmap.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/kernel.h>
|
||||
|
||||
#if !defined(find_next_bit) || !defined(find_next_zero_bit)
|
||||
|
||||
/*
|
||||
* This is a common helper function for find_next_bit and
|
||||
* find_next_zero_bit. The difference is the "invert" argument, which
|
||||
* is XORed with each fetched word before searching it for one bits.
|
||||
*/
|
||||
static unsigned long _find_next_bit(const unsigned long *addr,
|
||||
unsigned long nbits, unsigned long start, unsigned long invert)
|
||||
{
|
||||
unsigned long tmp;
|
||||
|
||||
if (!nbits || start >= nbits)
|
||||
return nbits;
|
||||
|
||||
tmp = addr[start / BITS_PER_LONG] ^ invert;
|
||||
|
||||
/* Handle 1st word. */
|
||||
tmp &= BITMAP_FIRST_WORD_MASK(start);
|
||||
start = round_down(start, BITS_PER_LONG);
|
||||
|
||||
while (!tmp) {
|
||||
start += BITS_PER_LONG;
|
||||
if (start >= nbits)
|
||||
return nbits;
|
||||
|
||||
tmp = addr[start / BITS_PER_LONG] ^ invert;
|
||||
}
|
||||
|
||||
return min(start + __ffs(tmp), nbits);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef find_next_bit
|
||||
/*
|
||||
* Find the next set bit in a memory region.
|
||||
*/
|
||||
unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
|
||||
unsigned long offset)
|
||||
{
|
||||
return _find_next_bit(addr, size, offset, 0UL);
|
||||
}
|
||||
EXPORT_SYMBOL(find_next_bit);
|
||||
#endif
|
||||
|
||||
#ifndef find_next_zero_bit
|
||||
unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
|
||||
unsigned long offset)
|
||||
{
|
||||
return _find_next_bit(addr, size, offset, ~0UL);
|
||||
}
|
||||
EXPORT_SYMBOL(find_next_zero_bit);
|
||||
#endif
|
||||
|
||||
#ifndef find_first_bit
|
||||
/*
|
||||
* Find the first set bit in a memory region.
|
||||
*/
|
||||
unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
|
||||
{
|
||||
unsigned long idx;
|
||||
|
||||
for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
|
||||
if (addr[idx])
|
||||
return min(idx * BITS_PER_LONG + __ffs(addr[idx]), size);
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
EXPORT_SYMBOL(find_first_bit);
|
||||
#endif
|
||||
|
||||
#ifndef find_first_zero_bit
|
||||
/*
|
||||
* Find the first cleared bit in a memory region.
|
||||
*/
|
||||
unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
|
||||
{
|
||||
unsigned long idx;
|
||||
|
||||
for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
|
||||
if (addr[idx] != ~0UL)
|
||||
return min(idx * BITS_PER_LONG + ffz(addr[idx]), size);
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
EXPORT_SYMBOL(find_first_zero_bit);
|
||||
#endif
|
||||
|
||||
#ifndef find_last_bit
|
||||
unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
|
||||
{
|
||||
if (size) {
|
||||
unsigned long val = BITMAP_LAST_WORD_MASK(size);
|
||||
unsigned long idx = (size-1) / BITS_PER_LONG;
|
||||
|
||||
do {
|
||||
val &= addr[idx];
|
||||
if (val)
|
||||
return idx * BITS_PER_LONG + __fls(val);
|
||||
|
||||
val = ~0ul;
|
||||
} while (idx--);
|
||||
}
|
||||
return size;
|
||||
}
|
||||
EXPORT_SYMBOL(find_last_bit);
|
||||
#endif
|
||||
|
||||
#ifdef __BIG_ENDIAN
|
||||
|
||||
/* include/linux/byteorder does not support "unsigned long" type */
|
||||
static inline unsigned long ext2_swab(const unsigned long y)
|
||||
{
|
||||
#if BITS_PER_LONG == 64
|
||||
return (unsigned long) __swab64((u64) y);
|
||||
#elif BITS_PER_LONG == 32
|
||||
return (unsigned long) __swab32((u32) y);
|
||||
#else
|
||||
#error BITS_PER_LONG not defined
|
||||
#endif
|
||||
}
|
||||
|
||||
#if !defined(find_next_bit_le) || !defined(find_next_zero_bit_le)
|
||||
static unsigned long _find_next_bit_le(const unsigned long *addr,
|
||||
unsigned long nbits, unsigned long start, unsigned long invert)
|
||||
{
|
||||
unsigned long tmp;
|
||||
|
||||
if (!nbits || start >= nbits)
|
||||
return nbits;
|
||||
|
||||
tmp = addr[start / BITS_PER_LONG] ^ invert;
|
||||
|
||||
/* Handle 1st word. */
|
||||
tmp &= ext2_swab(BITMAP_FIRST_WORD_MASK(start));
|
||||
start = round_down(start, BITS_PER_LONG);
|
||||
|
||||
while (!tmp) {
|
||||
start += BITS_PER_LONG;
|
||||
if (start >= nbits)
|
||||
return nbits;
|
||||
|
||||
tmp = addr[start / BITS_PER_LONG] ^ invert;
|
||||
}
|
||||
|
||||
return min(start + __ffs(ext2_swab(tmp)), nbits);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef find_next_zero_bit_le
|
||||
unsigned long find_next_zero_bit_le(const void *addr, unsigned
|
||||
long size, unsigned long offset)
|
||||
{
|
||||
return _find_next_bit_le(addr, size, offset, ~0UL);
|
||||
}
|
||||
EXPORT_SYMBOL(find_next_zero_bit_le);
|
||||
#endif
|
||||
|
||||
#ifndef find_next_bit_le
|
||||
unsigned long find_next_bit_le(const void *addr, unsigned
|
||||
long size, unsigned long offset)
|
||||
{
|
||||
return _find_next_bit_le(addr, size, offset, 0UL);
|
||||
}
|
||||
EXPORT_SYMBOL(find_next_bit_le);
|
||||
#endif
|
||||
|
||||
#endif /* __BIG_ENDIAN */
|
@@ -4,6 +4,9 @@
|
||||
* Written by Rusty Russell <rusty@rustcorp.com.au>
|
||||
* (Inspired by David Howell's find_next_bit implementation)
|
||||
*
|
||||
* Rewritten by Yury Norov <yury.norov@gmail.com> to decrease
|
||||
* size and improve performance, 2015.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
@@ -11,37 +14,26 @@
|
||||
*/
|
||||
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/bitmap.h>
|
||||
#include <linux/export.h>
|
||||
#include <asm/types.h>
|
||||
#include <asm/byteorder.h>
|
||||
#include <linux/kernel.h>
|
||||
|
||||
#ifndef find_last_bit
|
||||
|
||||
unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
|
||||
{
|
||||
unsigned long words;
|
||||
unsigned long tmp;
|
||||
if (size) {
|
||||
unsigned long val = BITMAP_LAST_WORD_MASK(size);
|
||||
unsigned long idx = (size-1) / BITS_PER_LONG;
|
||||
|
||||
/* Start at final word. */
|
||||
words = size / BITS_PER_LONG;
|
||||
do {
|
||||
val &= addr[idx];
|
||||
if (val)
|
||||
return idx * BITS_PER_LONG + __fls(val);
|
||||
|
||||
/* Partial final word? */
|
||||
if (size & (BITS_PER_LONG-1)) {
|
||||
tmp = (addr[words] & (~0UL >> (BITS_PER_LONG
|
||||
- (size & (BITS_PER_LONG-1)))));
|
||||
if (tmp)
|
||||
goto found;
|
||||
val = ~0ul;
|
||||
} while (idx--);
|
||||
}
|
||||
|
||||
while (words) {
|
||||
tmp = addr[--words];
|
||||
if (tmp) {
|
||||
found:
|
||||
return words * BITS_PER_LONG + __fls(tmp);
|
||||
}
|
||||
}
|
||||
|
||||
/* Not found */
|
||||
return size;
|
||||
}
|
||||
EXPORT_SYMBOL(find_last_bit);
|
||||
|
@@ -1,285 +0,0 @@
|
||||
/* find_next_bit.c: fallback find next bit implementation
|
||||
*
|
||||
* Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
|
||||
* Written by David Howells (dhowells@redhat.com)
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*/
|
||||
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/export.h>
|
||||
#include <asm/types.h>
|
||||
#include <asm/byteorder.h>
|
||||
|
||||
#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG)
|
||||
|
||||
#ifndef find_next_bit
|
||||
/*
|
||||
* Find the next set bit in a memory region.
|
||||
*/
|
||||
unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
|
||||
unsigned long offset)
|
||||
{
|
||||
const unsigned long *p = addr + BITOP_WORD(offset);
|
||||
unsigned long result = offset & ~(BITS_PER_LONG-1);
|
||||
unsigned long tmp;
|
||||
|
||||
if (offset >= size)
|
||||
return size;
|
||||
size -= result;
|
||||
offset %= BITS_PER_LONG;
|
||||
if (offset) {
|
||||
tmp = *(p++);
|
||||
tmp &= (~0UL << offset);
|
||||
if (size < BITS_PER_LONG)
|
||||
goto found_first;
|
||||
if (tmp)
|
||||
goto found_middle;
|
||||
size -= BITS_PER_LONG;
|
||||
result += BITS_PER_LONG;
|
||||
}
|
||||
while (size & ~(BITS_PER_LONG-1)) {
|
||||
if ((tmp = *(p++)))
|
||||
goto found_middle;
|
||||
result += BITS_PER_LONG;
|
||||
size -= BITS_PER_LONG;
|
||||
}
|
||||
if (!size)
|
||||
return result;
|
||||
tmp = *p;
|
||||
|
||||
found_first:
|
||||
tmp &= (~0UL >> (BITS_PER_LONG - size));
|
||||
if (tmp == 0UL) /* Are any bits set? */
|
||||
return result + size; /* Nope. */
|
||||
found_middle:
|
||||
return result + __ffs(tmp);
|
||||
}
|
||||
EXPORT_SYMBOL(find_next_bit);
|
||||
#endif
|
||||
|
||||
#ifndef find_next_zero_bit
|
||||
/*
|
||||
* This implementation of find_{first,next}_zero_bit was stolen from
|
||||
* Linus' asm-alpha/bitops.h.
|
||||
*/
|
||||
unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
|
||||
unsigned long offset)
|
||||
{
|
||||
const unsigned long *p = addr + BITOP_WORD(offset);
|
||||
unsigned long result = offset & ~(BITS_PER_LONG-1);
|
||||
unsigned long tmp;
|
||||
|
||||
if (offset >= size)
|
||||
return size;
|
||||
size -= result;
|
||||
offset %= BITS_PER_LONG;
|
||||
if (offset) {
|
||||
tmp = *(p++);
|
||||
tmp |= ~0UL >> (BITS_PER_LONG - offset);
|
||||
if (size < BITS_PER_LONG)
|
||||
goto found_first;
|
||||
if (~tmp)
|
||||
goto found_middle;
|
||||
size -= BITS_PER_LONG;
|
||||
result += BITS_PER_LONG;
|
||||
}
|
||||
while (size & ~(BITS_PER_LONG-1)) {
|
||||
if (~(tmp = *(p++)))
|
||||
goto found_middle;
|
||||
result += BITS_PER_LONG;
|
||||
size -= BITS_PER_LONG;
|
||||
}
|
||||
if (!size)
|
||||
return result;
|
||||
tmp = *p;
|
||||
|
||||
found_first:
|
||||
tmp |= ~0UL << size;
|
||||
if (tmp == ~0UL) /* Are any bits zero? */
|
||||
return result + size; /* Nope. */
|
||||
found_middle:
|
||||
return result + ffz(tmp);
|
||||
}
|
||||
EXPORT_SYMBOL(find_next_zero_bit);
|
||||
#endif
|
||||
|
||||
#ifndef find_first_bit
|
||||
/*
|
||||
* Find the first set bit in a memory region.
|
||||
*/
|
||||
unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
|
||||
{
|
||||
const unsigned long *p = addr;
|
||||
unsigned long result = 0;
|
||||
unsigned long tmp;
|
||||
|
||||
while (size & ~(BITS_PER_LONG-1)) {
|
||||
if ((tmp = *(p++)))
|
||||
goto found;
|
||||
result += BITS_PER_LONG;
|
||||
size -= BITS_PER_LONG;
|
||||
}
|
||||
if (!size)
|
||||
return result;
|
||||
|
||||
tmp = (*p) & (~0UL >> (BITS_PER_LONG - size));
|
||||
if (tmp == 0UL) /* Are any bits set? */
|
||||
return result + size; /* Nope. */
|
||||
found:
|
||||
return result + __ffs(tmp);
|
||||
}
|
||||
EXPORT_SYMBOL(find_first_bit);
|
||||
#endif
|
||||
|
||||
#ifndef find_first_zero_bit
|
||||
/*
|
||||
* Find the first cleared bit in a memory region.
|
||||
*/
|
||||
unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
|
||||
{
|
||||
const unsigned long *p = addr;
|
||||
unsigned long result = 0;
|
||||
unsigned long tmp;
|
||||
|
||||
while (size & ~(BITS_PER_LONG-1)) {
|
||||
if (~(tmp = *(p++)))
|
||||
goto found;
|
||||
result += BITS_PER_LONG;
|
||||
size -= BITS_PER_LONG;
|
||||
}
|
||||
if (!size)
|
||||
return result;
|
||||
|
||||
tmp = (*p) | (~0UL << size);
|
||||
if (tmp == ~0UL) /* Are any bits zero? */
|
||||
return result + size; /* Nope. */
|
||||
found:
|
||||
return result + ffz(tmp);
|
||||
}
|
||||
EXPORT_SYMBOL(find_first_zero_bit);
|
||||
#endif
|
||||
|
||||
#ifdef __BIG_ENDIAN
|
||||
|
||||
/* include/linux/byteorder does not support "unsigned long" type */
|
||||
static inline unsigned long ext2_swabp(const unsigned long * x)
|
||||
{
|
||||
#if BITS_PER_LONG == 64
|
||||
return (unsigned long) __swab64p((u64 *) x);
|
||||
#elif BITS_PER_LONG == 32
|
||||
return (unsigned long) __swab32p((u32 *) x);
|
||||
#else
|
||||
#error BITS_PER_LONG not defined
|
||||
#endif
|
||||
}
|
||||
|
||||
/* include/linux/byteorder doesn't support "unsigned long" type */
|
||||
static inline unsigned long ext2_swab(const unsigned long y)
|
||||
{
|
||||
#if BITS_PER_LONG == 64
|
||||
return (unsigned long) __swab64((u64) y);
|
||||
#elif BITS_PER_LONG == 32
|
||||
return (unsigned long) __swab32((u32) y);
|
||||
#else
|
||||
#error BITS_PER_LONG not defined
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifndef find_next_zero_bit_le
|
||||
unsigned long find_next_zero_bit_le(const void *addr, unsigned
|
||||
long size, unsigned long offset)
|
||||
{
|
||||
const unsigned long *p = addr;
|
||||
unsigned long result = offset & ~(BITS_PER_LONG - 1);
|
||||
unsigned long tmp;
|
||||
|
||||
if (offset >= size)
|
||||
return size;
|
||||
p += BITOP_WORD(offset);
|
||||
size -= result;
|
||||
offset &= (BITS_PER_LONG - 1UL);
|
||||
if (offset) {
|
||||
tmp = ext2_swabp(p++);
|
||||
tmp |= (~0UL >> (BITS_PER_LONG - offset));
|
||||
if (size < BITS_PER_LONG)
|
||||
goto found_first;
|
||||
if (~tmp)
|
||||
goto found_middle;
|
||||
size -= BITS_PER_LONG;
|
||||
result += BITS_PER_LONG;
|
||||
}
|
||||
|
||||
while (size & ~(BITS_PER_LONG - 1)) {
|
||||
if (~(tmp = *(p++)))
|
||||
goto found_middle_swap;
|
||||
result += BITS_PER_LONG;
|
||||
size -= BITS_PER_LONG;
|
||||
}
|
||||
if (!size)
|
||||
return result;
|
||||
tmp = ext2_swabp(p);
|
||||
found_first:
|
||||
tmp |= ~0UL << size;
|
||||
if (tmp == ~0UL) /* Are any bits zero? */
|
||||
return result + size; /* Nope. Skip ffz */
|
||||
found_middle:
|
||||
return result + ffz(tmp);
|
||||
|
||||
found_middle_swap:
|
||||
return result + ffz(ext2_swab(tmp));
|
||||
}
|
||||
EXPORT_SYMBOL(find_next_zero_bit_le);
|
||||
#endif
|
||||
|
||||
#ifndef find_next_bit_le
|
||||
unsigned long find_next_bit_le(const void *addr, unsigned
|
||||
long size, unsigned long offset)
|
||||
{
|
||||
const unsigned long *p = addr;
|
||||
unsigned long result = offset & ~(BITS_PER_LONG - 1);
|
||||
unsigned long tmp;
|
||||
|
||||
if (offset >= size)
|
||||
return size;
|
||||
p += BITOP_WORD(offset);
|
||||
size -= result;
|
||||
offset &= (BITS_PER_LONG - 1UL);
|
||||
if (offset) {
|
||||
tmp = ext2_swabp(p++);
|
||||
tmp &= (~0UL << offset);
|
||||
if (size < BITS_PER_LONG)
|
||||
goto found_first;
|
||||
if (tmp)
|
||||
goto found_middle;
|
||||
size -= BITS_PER_LONG;
|
||||
result += BITS_PER_LONG;
|
||||
}
|
||||
|
||||
while (size & ~(BITS_PER_LONG - 1)) {
|
||||
tmp = *(p++);
|
||||
if (tmp)
|
||||
goto found_middle_swap;
|
||||
result += BITS_PER_LONG;
|
||||
size -= BITS_PER_LONG;
|
||||
}
|
||||
if (!size)
|
||||
return result;
|
||||
tmp = ext2_swabp(p);
|
||||
found_first:
|
||||
tmp &= (~0UL >> (BITS_PER_LONG - size));
|
||||
if (tmp == 0UL) /* Are any bits set? */
|
||||
return result + size; /* Nope. */
|
||||
found_middle:
|
||||
return result + __ffs(tmp);
|
||||
|
||||
found_middle_swap:
|
||||
return result + __ffs(ext2_swab(tmp));
|
||||
}
|
||||
EXPORT_SYMBOL(find_next_bit_le);
|
||||
#endif
|
||||
|
||||
#endif /* __BIG_ENDIAN */
|
270
lib/iommu-common.c
Normal file
270
lib/iommu-common.c
Normal file
@@ -0,0 +1,270 @@
|
||||
/*
|
||||
* IOMMU mmap management and range allocation functions.
|
||||
* Based almost entirely upon the powerpc iommu allocator.
|
||||
*/
|
||||
|
||||
#include <linux/export.h>
|
||||
#include <linux/bitmap.h>
|
||||
#include <linux/bug.h>
|
||||
#include <linux/iommu-helper.h>
|
||||
#include <linux/iommu-common.h>
|
||||
#include <linux/dma-mapping.h>
|
||||
#include <linux/hash.h>
|
||||
|
||||
#ifndef DMA_ERROR_CODE
|
||||
#define DMA_ERROR_CODE (~(dma_addr_t)0x0)
|
||||
#endif
|
||||
|
||||
static unsigned long iommu_large_alloc = 15;
|
||||
|
||||
static DEFINE_PER_CPU(unsigned int, iommu_hash_common);
|
||||
|
||||
static inline bool need_flush(struct iommu_map_table *iommu)
|
||||
{
|
||||
return (iommu->lazy_flush != NULL &&
|
||||
(iommu->flags & IOMMU_NEED_FLUSH) != 0);
|
||||
}
|
||||
|
||||
static inline void set_flush(struct iommu_map_table *iommu)
|
||||
{
|
||||
iommu->flags |= IOMMU_NEED_FLUSH;
|
||||
}
|
||||
|
||||
static inline void clear_flush(struct iommu_map_table *iommu)
|
||||
{
|
||||
iommu->flags &= ~IOMMU_NEED_FLUSH;
|
||||
}
|
||||
|
||||
static void setup_iommu_pool_hash(void)
|
||||
{
|
||||
unsigned int i;
|
||||
static bool do_once;
|
||||
|
||||
if (do_once)
|
||||
return;
|
||||
do_once = true;
|
||||
for_each_possible_cpu(i)
|
||||
per_cpu(iommu_hash_common, i) = hash_32(i, IOMMU_POOL_HASHBITS);
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize iommu_pool entries for the iommu_map_table. `num_entries'
|
||||
* is the number of table entries. If `large_pool' is set to true,
|
||||
* the top 1/4 of the table will be set aside for pool allocations
|
||||
* of more than iommu_large_alloc pages.
|
||||
*/
|
||||
void iommu_tbl_pool_init(struct iommu_map_table *iommu,
|
||||
unsigned long num_entries,
|
||||
u32 table_shift,
|
||||
void (*lazy_flush)(struct iommu_map_table *),
|
||||
bool large_pool, u32 npools,
|
||||
bool skip_span_boundary_check)
|
||||
{
|
||||
unsigned int start, i;
|
||||
struct iommu_pool *p = &(iommu->large_pool);
|
||||
|
||||
setup_iommu_pool_hash();
|
||||
if (npools == 0)
|
||||
iommu->nr_pools = IOMMU_NR_POOLS;
|
||||
else
|
||||
iommu->nr_pools = npools;
|
||||
BUG_ON(npools > IOMMU_NR_POOLS);
|
||||
|
||||
iommu->table_shift = table_shift;
|
||||
iommu->lazy_flush = lazy_flush;
|
||||
start = 0;
|
||||
if (skip_span_boundary_check)
|
||||
iommu->flags |= IOMMU_NO_SPAN_BOUND;
|
||||
if (large_pool)
|
||||
iommu->flags |= IOMMU_HAS_LARGE_POOL;
|
||||
|
||||
if (!large_pool)
|
||||
iommu->poolsize = num_entries/iommu->nr_pools;
|
||||
else
|
||||
iommu->poolsize = (num_entries * 3 / 4)/iommu->nr_pools;
|
||||
for (i = 0; i < iommu->nr_pools; i++) {
|
||||
spin_lock_init(&(iommu->pools[i].lock));
|
||||
iommu->pools[i].start = start;
|
||||
iommu->pools[i].hint = start;
|
||||
start += iommu->poolsize; /* start for next pool */
|
||||
iommu->pools[i].end = start - 1;
|
||||
}
|
||||
if (!large_pool)
|
||||
return;
|
||||
/* initialize large_pool */
|
||||
spin_lock_init(&(p->lock));
|
||||
p->start = start;
|
||||
p->hint = p->start;
|
||||
p->end = num_entries;
|
||||
}
|
||||
EXPORT_SYMBOL(iommu_tbl_pool_init);
|
||||
|
||||
unsigned long iommu_tbl_range_alloc(struct device *dev,
|
||||
struct iommu_map_table *iommu,
|
||||
unsigned long npages,
|
||||
unsigned long *handle,
|
||||
unsigned long mask,
|
||||
unsigned int align_order)
|
||||
{
|
||||
unsigned int pool_hash = __this_cpu_read(iommu_hash_common);
|
||||
unsigned long n, end, start, limit, boundary_size;
|
||||
struct iommu_pool *pool;
|
||||
int pass = 0;
|
||||
unsigned int pool_nr;
|
||||
unsigned int npools = iommu->nr_pools;
|
||||
unsigned long flags;
|
||||
bool large_pool = ((iommu->flags & IOMMU_HAS_LARGE_POOL) != 0);
|
||||
bool largealloc = (large_pool && npages > iommu_large_alloc);
|
||||
unsigned long shift;
|
||||
unsigned long align_mask = 0;
|
||||
|
||||
if (align_order > 0)
|
||||
align_mask = 0xffffffffffffffffl >> (64 - align_order);
|
||||
|
||||
/* Sanity check */
|
||||
if (unlikely(npages == 0)) {
|
||||
WARN_ON_ONCE(1);
|
||||
return DMA_ERROR_CODE;
|
||||
}
|
||||
|
||||
if (largealloc) {
|
||||
pool = &(iommu->large_pool);
|
||||
pool_nr = 0; /* to keep compiler happy */
|
||||
} else {
|
||||
/* pick out pool_nr */
|
||||
pool_nr = pool_hash & (npools - 1);
|
||||
pool = &(iommu->pools[pool_nr]);
|
||||
}
|
||||
spin_lock_irqsave(&pool->lock, flags);
|
||||
|
||||
again:
|
||||
if (pass == 0 && handle && *handle &&
|
||||
(*handle >= pool->start) && (*handle < pool->end))
|
||||
start = *handle;
|
||||
else
|
||||
start = pool->hint;
|
||||
|
||||
limit = pool->end;
|
||||
|
||||
/* The case below can happen if we have a small segment appended
|
||||
* to a large, or when the previous alloc was at the very end of
|
||||
* the available space. If so, go back to the beginning. If a
|
||||
* flush is needed, it will get done based on the return value
|
||||
* from iommu_area_alloc() below.
|
||||
*/
|
||||
if (start >= limit)
|
||||
start = pool->start;
|
||||
shift = iommu->table_map_base >> iommu->table_shift;
|
||||
if (limit + shift > mask) {
|
||||
limit = mask - shift + 1;
|
||||
/* If we're constrained on address range, first try
|
||||
* at the masked hint to avoid O(n) search complexity,
|
||||
* but on second pass, start at 0 in pool 0.
|
||||
*/
|
||||
if ((start & mask) >= limit || pass > 0) {
|
||||
spin_unlock(&(pool->lock));
|
||||
pool = &(iommu->pools[0]);
|
||||
spin_lock(&(pool->lock));
|
||||
start = pool->start;
|
||||
} else {
|
||||
start &= mask;
|
||||
}
|
||||
}
|
||||
|
||||
if (dev)
|
||||
boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
|
||||
1 << iommu->table_shift);
|
||||
else
|
||||
boundary_size = ALIGN(1ULL << 32, 1 << iommu->table_shift);
|
||||
|
||||
boundary_size = boundary_size >> iommu->table_shift;
|
||||
/*
|
||||
* if the skip_span_boundary_check had been set during init, we set
|
||||
* things up so that iommu_is_span_boundary() merely checks if the
|
||||
* (index + npages) < num_tsb_entries
|
||||
*/
|
||||
if ((iommu->flags & IOMMU_NO_SPAN_BOUND) != 0) {
|
||||
shift = 0;
|
||||
boundary_size = iommu->poolsize * iommu->nr_pools;
|
||||
}
|
||||
n = iommu_area_alloc(iommu->map, limit, start, npages, shift,
|
||||
boundary_size, align_mask);
|
||||
if (n == -1) {
|
||||
if (likely(pass == 0)) {
|
||||
/* First failure, rescan from the beginning. */
|
||||
pool->hint = pool->start;
|
||||
set_flush(iommu);
|
||||
pass++;
|
||||
goto again;
|
||||
} else if (!largealloc && pass <= iommu->nr_pools) {
|
||||
spin_unlock(&(pool->lock));
|
||||
pool_nr = (pool_nr + 1) & (iommu->nr_pools - 1);
|
||||
pool = &(iommu->pools[pool_nr]);
|
||||
spin_lock(&(pool->lock));
|
||||
pool->hint = pool->start;
|
||||
set_flush(iommu);
|
||||
pass++;
|
||||
goto again;
|
||||
} else {
|
||||
/* give up */
|
||||
n = DMA_ERROR_CODE;
|
||||
goto bail;
|
||||
}
|
||||
}
|
||||
if (n < pool->hint || need_flush(iommu)) {
|
||||
clear_flush(iommu);
|
||||
iommu->lazy_flush(iommu);
|
||||
}
|
||||
|
||||
end = n + npages;
|
||||
pool->hint = end;
|
||||
|
||||
/* Update handle for SG allocations */
|
||||
if (handle)
|
||||
*handle = end;
|
||||
bail:
|
||||
spin_unlock_irqrestore(&(pool->lock), flags);
|
||||
|
||||
return n;
|
||||
}
|
||||
EXPORT_SYMBOL(iommu_tbl_range_alloc);
|
||||
|
||||
static struct iommu_pool *get_pool(struct iommu_map_table *tbl,
|
||||
unsigned long entry)
|
||||
{
|
||||
struct iommu_pool *p;
|
||||
unsigned long largepool_start = tbl->large_pool.start;
|
||||
bool large_pool = ((tbl->flags & IOMMU_HAS_LARGE_POOL) != 0);
|
||||
|
||||
/* The large pool is the last pool at the top of the table */
|
||||
if (large_pool && entry >= largepool_start) {
|
||||
p = &tbl->large_pool;
|
||||
} else {
|
||||
unsigned int pool_nr = entry / tbl->poolsize;
|
||||
|
||||
BUG_ON(pool_nr >= tbl->nr_pools);
|
||||
p = &tbl->pools[pool_nr];
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
/* Caller supplies the index of the entry into the iommu map table
|
||||
* itself when the mapping from dma_addr to the entry is not the
|
||||
* default addr->entry mapping below.
|
||||
*/
|
||||
void iommu_tbl_range_free(struct iommu_map_table *iommu, u64 dma_addr,
|
||||
unsigned long npages, unsigned long entry)
|
||||
{
|
||||
struct iommu_pool *pool;
|
||||
unsigned long flags;
|
||||
unsigned long shift = iommu->table_shift;
|
||||
|
||||
if (entry == DMA_ERROR_CODE) /* use default addr->entry mapping */
|
||||
entry = (dma_addr - iommu->table_map_base) >> shift;
|
||||
pool = get_pool(iommu, entry);
|
||||
|
||||
spin_lock_irqsave(&(pool->lock), flags);
|
||||
bitmap_clear(iommu->map, entry, npages);
|
||||
spin_unlock_irqrestore(&(pool->lock), flags);
|
||||
}
|
||||
EXPORT_SYMBOL(iommu_tbl_range_free);
|
@@ -131,11 +131,12 @@ static inline const struct raid6_recov_calls *raid6_choose_recov(void)
|
||||
static inline const struct raid6_calls *raid6_choose_gen(
|
||||
void *(*const dptrs)[(65536/PAGE_SIZE)+2], const int disks)
|
||||
{
|
||||
unsigned long perf, bestperf, j0, j1;
|
||||
unsigned long perf, bestgenperf, bestxorperf, j0, j1;
|
||||
int start = (disks>>1)-1, stop = disks-3; /* work on the second half of the disks */
|
||||
const struct raid6_calls *const *algo;
|
||||
const struct raid6_calls *best;
|
||||
|
||||
for (bestperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) {
|
||||
for (bestgenperf = 0, bestxorperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) {
|
||||
if (!best || (*algo)->prefer >= best->prefer) {
|
||||
if ((*algo)->valid && !(*algo)->valid())
|
||||
continue;
|
||||
@@ -153,19 +154,45 @@ static inline const struct raid6_calls *raid6_choose_gen(
|
||||
}
|
||||
preempt_enable();
|
||||
|
||||
if (perf > bestperf) {
|
||||
bestperf = perf;
|
||||
if (perf > bestgenperf) {
|
||||
bestgenperf = perf;
|
||||
best = *algo;
|
||||
}
|
||||
pr_info("raid6: %-8s %5ld MB/s\n", (*algo)->name,
|
||||
pr_info("raid6: %-8s gen() %5ld MB/s\n", (*algo)->name,
|
||||
(perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
|
||||
|
||||
if (!(*algo)->xor_syndrome)
|
||||
continue;
|
||||
|
||||
perf = 0;
|
||||
|
||||
preempt_disable();
|
||||
j0 = jiffies;
|
||||
while ((j1 = jiffies) == j0)
|
||||
cpu_relax();
|
||||
while (time_before(jiffies,
|
||||
j1 + (1<<RAID6_TIME_JIFFIES_LG2))) {
|
||||
(*algo)->xor_syndrome(disks, start, stop,
|
||||
PAGE_SIZE, *dptrs);
|
||||
perf++;
|
||||
}
|
||||
preempt_enable();
|
||||
|
||||
if (best == *algo)
|
||||
bestxorperf = perf;
|
||||
|
||||
pr_info("raid6: %-8s xor() %5ld MB/s\n", (*algo)->name,
|
||||
(perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2+1));
|
||||
}
|
||||
}
|
||||
|
||||
if (best) {
|
||||
pr_info("raid6: using algorithm %s (%ld MB/s)\n",
|
||||
pr_info("raid6: using algorithm %s gen() %ld MB/s\n",
|
||||
best->name,
|
||||
(bestperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
|
||||
(bestgenperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
|
||||
if (best->xor_syndrome)
|
||||
pr_info("raid6: .... xor() %ld MB/s, rmw enabled\n",
|
||||
(bestxorperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2+1));
|
||||
raid6_call = *best;
|
||||
} else
|
||||
pr_err("raid6: Yikes! No algorithm found!\n");
|
||||
|
@@ -119,6 +119,7 @@ int raid6_have_altivec(void)
|
||||
|
||||
const struct raid6_calls raid6_altivec$# = {
|
||||
raid6_altivec$#_gen_syndrome,
|
||||
NULL, /* XOR not yet implemented */
|
||||
raid6_have_altivec,
|
||||
"altivecx$#",
|
||||
0
|
||||
|
@@ -89,6 +89,7 @@ static void raid6_avx21_gen_syndrome(int disks, size_t bytes, void **ptrs)
|
||||
|
||||
const struct raid6_calls raid6_avx2x1 = {
|
||||
raid6_avx21_gen_syndrome,
|
||||
NULL, /* XOR not yet implemented */
|
||||
raid6_have_avx2,
|
||||
"avx2x1",
|
||||
1 /* Has cache hints */
|
||||
@@ -150,6 +151,7 @@ static void raid6_avx22_gen_syndrome(int disks, size_t bytes, void **ptrs)
|
||||
|
||||
const struct raid6_calls raid6_avx2x2 = {
|
||||
raid6_avx22_gen_syndrome,
|
||||
NULL, /* XOR not yet implemented */
|
||||
raid6_have_avx2,
|
||||
"avx2x2",
|
||||
1 /* Has cache hints */
|
||||
@@ -242,6 +244,7 @@ static void raid6_avx24_gen_syndrome(int disks, size_t bytes, void **ptrs)
|
||||
|
||||
const struct raid6_calls raid6_avx2x4 = {
|
||||
raid6_avx24_gen_syndrome,
|
||||
NULL, /* XOR not yet implemented */
|
||||
raid6_have_avx2,
|
||||
"avx2x4",
|
||||
1 /* Has cache hints */
|
||||
|
@@ -107,9 +107,48 @@ static void raid6_int$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
|
||||
}
|
||||
}
|
||||
|
||||
static void raid6_int$#_xor_syndrome(int disks, int start, int stop,
|
||||
size_t bytes, void **ptrs)
|
||||
{
|
||||
u8 **dptr = (u8 **)ptrs;
|
||||
u8 *p, *q;
|
||||
int d, z, z0;
|
||||
|
||||
unative_t wd$$, wq$$, wp$$, w1$$, w2$$;
|
||||
|
||||
z0 = stop; /* P/Q right side optimization */
|
||||
p = dptr[disks-2]; /* XOR parity */
|
||||
q = dptr[disks-1]; /* RS syndrome */
|
||||
|
||||
for ( d = 0 ; d < bytes ; d += NSIZE*$# ) {
|
||||
/* P/Q data pages */
|
||||
wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE];
|
||||
for ( z = z0-1 ; z >= start ; z-- ) {
|
||||
wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
|
||||
wp$$ ^= wd$$;
|
||||
w2$$ = MASK(wq$$);
|
||||
w1$$ = SHLBYTE(wq$$);
|
||||
w2$$ &= NBYTES(0x1d);
|
||||
w1$$ ^= w2$$;
|
||||
wq$$ = w1$$ ^ wd$$;
|
||||
}
|
||||
/* P/Q left side optimization */
|
||||
for ( z = start-1 ; z >= 0 ; z-- ) {
|
||||
w2$$ = MASK(wq$$);
|
||||
w1$$ = SHLBYTE(wq$$);
|
||||
w2$$ &= NBYTES(0x1d);
|
||||
wq$$ = w1$$ ^ w2$$;
|
||||
}
|
||||
*(unative_t *)&p[d+NSIZE*$$] ^= wp$$;
|
||||
*(unative_t *)&q[d+NSIZE*$$] ^= wq$$;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
const struct raid6_calls raid6_intx$# = {
|
||||
raid6_int$#_gen_syndrome,
|
||||
NULL, /* always valid */
|
||||
raid6_int$#_xor_syndrome,
|
||||
NULL, /* always valid */
|
||||
"int" NSTRING "x$#",
|
||||
0
|
||||
};
|
||||
|
@@ -76,6 +76,7 @@ static void raid6_mmx1_gen_syndrome(int disks, size_t bytes, void **ptrs)
|
||||
|
||||
const struct raid6_calls raid6_mmxx1 = {
|
||||
raid6_mmx1_gen_syndrome,
|
||||
NULL, /* XOR not yet implemented */
|
||||
raid6_have_mmx,
|
||||
"mmxx1",
|
||||
0
|
||||
@@ -134,6 +135,7 @@ static void raid6_mmx2_gen_syndrome(int disks, size_t bytes, void **ptrs)
|
||||
|
||||
const struct raid6_calls raid6_mmxx2 = {
|
||||
raid6_mmx2_gen_syndrome,
|
||||
NULL, /* XOR not yet implemented */
|
||||
raid6_have_mmx,
|
||||
"mmxx2",
|
||||
0
|
||||
|
@@ -42,6 +42,7 @@
|
||||
} \
|
||||
struct raid6_calls const raid6_neonx ## _n = { \
|
||||
raid6_neon ## _n ## _gen_syndrome, \
|
||||
NULL, /* XOR not yet implemented */ \
|
||||
raid6_have_neon, \
|
||||
"neonx" #_n, \
|
||||
0 \
|
||||
|
@@ -92,6 +92,7 @@ static void raid6_sse11_gen_syndrome(int disks, size_t bytes, void **ptrs)
|
||||
|
||||
const struct raid6_calls raid6_sse1x1 = {
|
||||
raid6_sse11_gen_syndrome,
|
||||
NULL, /* XOR not yet implemented */
|
||||
raid6_have_sse1_or_mmxext,
|
||||
"sse1x1",
|
||||
1 /* Has cache hints */
|
||||
@@ -154,6 +155,7 @@ static void raid6_sse12_gen_syndrome(int disks, size_t bytes, void **ptrs)
|
||||
|
||||
const struct raid6_calls raid6_sse1x2 = {
|
||||
raid6_sse12_gen_syndrome,
|
||||
NULL, /* XOR not yet implemented */
|
||||
raid6_have_sse1_or_mmxext,
|
||||
"sse1x2",
|
||||
1 /* Has cache hints */
|
||||
|
227
lib/raid6/sse2.c
227
lib/raid6/sse2.c
@@ -88,8 +88,58 @@ static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs)
|
||||
kernel_fpu_end();
|
||||
}
|
||||
|
||||
|
||||
static void raid6_sse21_xor_syndrome(int disks, int start, int stop,
|
||||
size_t bytes, void **ptrs)
|
||||
{
|
||||
u8 **dptr = (u8 **)ptrs;
|
||||
u8 *p, *q;
|
||||
int d, z, z0;
|
||||
|
||||
z0 = stop; /* P/Q right side optimization */
|
||||
p = dptr[disks-2]; /* XOR parity */
|
||||
q = dptr[disks-1]; /* RS syndrome */
|
||||
|
||||
kernel_fpu_begin();
|
||||
|
||||
asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
|
||||
|
||||
for ( d = 0 ; d < bytes ; d += 16 ) {
|
||||
asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
|
||||
asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
|
||||
asm volatile("pxor %xmm4,%xmm2");
|
||||
/* P/Q data pages */
|
||||
for ( z = z0-1 ; z >= start ; z-- ) {
|
||||
asm volatile("pxor %xmm5,%xmm5");
|
||||
asm volatile("pcmpgtb %xmm4,%xmm5");
|
||||
asm volatile("paddb %xmm4,%xmm4");
|
||||
asm volatile("pand %xmm0,%xmm5");
|
||||
asm volatile("pxor %xmm5,%xmm4");
|
||||
asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
|
||||
asm volatile("pxor %xmm5,%xmm2");
|
||||
asm volatile("pxor %xmm5,%xmm4");
|
||||
}
|
||||
/* P/Q left side optimization */
|
||||
for ( z = start-1 ; z >= 0 ; z-- ) {
|
||||
asm volatile("pxor %xmm5,%xmm5");
|
||||
asm volatile("pcmpgtb %xmm4,%xmm5");
|
||||
asm volatile("paddb %xmm4,%xmm4");
|
||||
asm volatile("pand %xmm0,%xmm5");
|
||||
asm volatile("pxor %xmm5,%xmm4");
|
||||
}
|
||||
asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
|
||||
/* Don't use movntdq for r/w memory area < cache line */
|
||||
asm volatile("movdqa %%xmm4,%0" : "=m" (q[d]));
|
||||
asm volatile("movdqa %%xmm2,%0" : "=m" (p[d]));
|
||||
}
|
||||
|
||||
asm volatile("sfence" : : : "memory");
|
||||
kernel_fpu_end();
|
||||
}
|
||||
|
||||
const struct raid6_calls raid6_sse2x1 = {
|
||||
raid6_sse21_gen_syndrome,
|
||||
raid6_sse21_xor_syndrome,
|
||||
raid6_have_sse2,
|
||||
"sse2x1",
|
||||
1 /* Has cache hints */
|
||||
@@ -150,8 +200,76 @@ static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs)
|
||||
kernel_fpu_end();
|
||||
}
|
||||
|
||||
static void raid6_sse22_xor_syndrome(int disks, int start, int stop,
|
||||
size_t bytes, void **ptrs)
|
||||
{
|
||||
u8 **dptr = (u8 **)ptrs;
|
||||
u8 *p, *q;
|
||||
int d, z, z0;
|
||||
|
||||
z0 = stop; /* P/Q right side optimization */
|
||||
p = dptr[disks-2]; /* XOR parity */
|
||||
q = dptr[disks-1]; /* RS syndrome */
|
||||
|
||||
kernel_fpu_begin();
|
||||
|
||||
asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
|
||||
|
||||
for ( d = 0 ; d < bytes ; d += 32 ) {
|
||||
asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
|
||||
asm volatile("movdqa %0,%%xmm6" :: "m" (dptr[z0][d+16]));
|
||||
asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
|
||||
asm volatile("movdqa %0,%%xmm3" : : "m" (p[d+16]));
|
||||
asm volatile("pxor %xmm4,%xmm2");
|
||||
asm volatile("pxor %xmm6,%xmm3");
|
||||
/* P/Q data pages */
|
||||
for ( z = z0-1 ; z >= start ; z-- ) {
|
||||
asm volatile("pxor %xmm5,%xmm5");
|
||||
asm volatile("pxor %xmm7,%xmm7");
|
||||
asm volatile("pcmpgtb %xmm4,%xmm5");
|
||||
asm volatile("pcmpgtb %xmm6,%xmm7");
|
||||
asm volatile("paddb %xmm4,%xmm4");
|
||||
asm volatile("paddb %xmm6,%xmm6");
|
||||
asm volatile("pand %xmm0,%xmm5");
|
||||
asm volatile("pand %xmm0,%xmm7");
|
||||
asm volatile("pxor %xmm5,%xmm4");
|
||||
asm volatile("pxor %xmm7,%xmm6");
|
||||
asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
|
||||
asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
|
||||
asm volatile("pxor %xmm5,%xmm2");
|
||||
asm volatile("pxor %xmm7,%xmm3");
|
||||
asm volatile("pxor %xmm5,%xmm4");
|
||||
asm volatile("pxor %xmm7,%xmm6");
|
||||
}
|
||||
/* P/Q left side optimization */
|
||||
for ( z = start-1 ; z >= 0 ; z-- ) {
|
||||
asm volatile("pxor %xmm5,%xmm5");
|
||||
asm volatile("pxor %xmm7,%xmm7");
|
||||
asm volatile("pcmpgtb %xmm4,%xmm5");
|
||||
asm volatile("pcmpgtb %xmm6,%xmm7");
|
||||
asm volatile("paddb %xmm4,%xmm4");
|
||||
asm volatile("paddb %xmm6,%xmm6");
|
||||
asm volatile("pand %xmm0,%xmm5");
|
||||
asm volatile("pand %xmm0,%xmm7");
|
||||
asm volatile("pxor %xmm5,%xmm4");
|
||||
asm volatile("pxor %xmm7,%xmm6");
|
||||
}
|
||||
asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
|
||||
asm volatile("pxor %0,%%xmm6" : : "m" (q[d+16]));
|
||||
/* Don't use movntdq for r/w memory area < cache line */
|
||||
asm volatile("movdqa %%xmm4,%0" : "=m" (q[d]));
|
||||
asm volatile("movdqa %%xmm6,%0" : "=m" (q[d+16]));
|
||||
asm volatile("movdqa %%xmm2,%0" : "=m" (p[d]));
|
||||
asm volatile("movdqa %%xmm3,%0" : "=m" (p[d+16]));
|
||||
}
|
||||
|
||||
asm volatile("sfence" : : : "memory");
|
||||
kernel_fpu_end();
|
||||
}
|
||||
|
||||
const struct raid6_calls raid6_sse2x2 = {
|
||||
raid6_sse22_gen_syndrome,
|
||||
raid6_sse22_xor_syndrome,
|
||||
raid6_have_sse2,
|
||||
"sse2x2",
|
||||
1 /* Has cache hints */
|
||||
@@ -248,8 +366,117 @@ static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs)
|
||||
kernel_fpu_end();
|
||||
}
|
||||
|
||||
static void raid6_sse24_xor_syndrome(int disks, int start, int stop,
|
||||
size_t bytes, void **ptrs)
|
||||
{
|
||||
u8 **dptr = (u8 **)ptrs;
|
||||
u8 *p, *q;
|
||||
int d, z, z0;
|
||||
|
||||
z0 = stop; /* P/Q right side optimization */
|
||||
p = dptr[disks-2]; /* XOR parity */
|
||||
q = dptr[disks-1]; /* RS syndrome */
|
||||
|
||||
kernel_fpu_begin();
|
||||
|
||||
asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0]));
|
||||
|
||||
for ( d = 0 ; d < bytes ; d += 64 ) {
|
||||
asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
|
||||
asm volatile("movdqa %0,%%xmm6" :: "m" (dptr[z0][d+16]));
|
||||
asm volatile("movdqa %0,%%xmm12" :: "m" (dptr[z0][d+32]));
|
||||
asm volatile("movdqa %0,%%xmm14" :: "m" (dptr[z0][d+48]));
|
||||
asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
|
||||
asm volatile("movdqa %0,%%xmm3" : : "m" (p[d+16]));
|
||||
asm volatile("movdqa %0,%%xmm10" : : "m" (p[d+32]));
|
||||
asm volatile("movdqa %0,%%xmm11" : : "m" (p[d+48]));
|
||||
asm volatile("pxor %xmm4,%xmm2");
|
||||
asm volatile("pxor %xmm6,%xmm3");
|
||||
asm volatile("pxor %xmm12,%xmm10");
|
||||
asm volatile("pxor %xmm14,%xmm11");
|
||||
/* P/Q data pages */
|
||||
for ( z = z0-1 ; z >= start ; z-- ) {
|
||||
asm volatile("prefetchnta %0" :: "m" (dptr[z][d]));
|
||||
asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32]));
|
||||
asm volatile("pxor %xmm5,%xmm5");
|
||||
asm volatile("pxor %xmm7,%xmm7");
|
||||
asm volatile("pxor %xmm13,%xmm13");
|
||||
asm volatile("pxor %xmm15,%xmm15");
|
||||
asm volatile("pcmpgtb %xmm4,%xmm5");
|
||||
asm volatile("pcmpgtb %xmm6,%xmm7");
|
||||
asm volatile("pcmpgtb %xmm12,%xmm13");
|
||||
asm volatile("pcmpgtb %xmm14,%xmm15");
|
||||
asm volatile("paddb %xmm4,%xmm4");
|
||||
asm volatile("paddb %xmm6,%xmm6");
|
||||
asm volatile("paddb %xmm12,%xmm12");
|
||||
asm volatile("paddb %xmm14,%xmm14");
|
||||
asm volatile("pand %xmm0,%xmm5");
|
||||
asm volatile("pand %xmm0,%xmm7");
|
||||
asm volatile("pand %xmm0,%xmm13");
|
||||
asm volatile("pand %xmm0,%xmm15");
|
||||
asm volatile("pxor %xmm5,%xmm4");
|
||||
asm volatile("pxor %xmm7,%xmm6");
|
||||
asm volatile("pxor %xmm13,%xmm12");
|
||||
asm volatile("pxor %xmm15,%xmm14");
|
||||
asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
|
||||
asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
|
||||
asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32]));
|
||||
asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48]));
|
||||
asm volatile("pxor %xmm5,%xmm2");
|
||||
asm volatile("pxor %xmm7,%xmm3");
|
||||
asm volatile("pxor %xmm13,%xmm10");
|
||||
asm volatile("pxor %xmm15,%xmm11");
|
||||
asm volatile("pxor %xmm5,%xmm4");
|
||||
asm volatile("pxor %xmm7,%xmm6");
|
||||
asm volatile("pxor %xmm13,%xmm12");
|
||||
asm volatile("pxor %xmm15,%xmm14");
|
||||
}
|
||||
asm volatile("prefetchnta %0" :: "m" (q[d]));
|
||||
asm volatile("prefetchnta %0" :: "m" (q[d+32]));
|
||||
/* P/Q left side optimization */
|
||||
for ( z = start-1 ; z >= 0 ; z-- ) {
|
||||
asm volatile("pxor %xmm5,%xmm5");
|
||||
asm volatile("pxor %xmm7,%xmm7");
|
||||
asm volatile("pxor %xmm13,%xmm13");
|
||||
asm volatile("pxor %xmm15,%xmm15");
|
||||
asm volatile("pcmpgtb %xmm4,%xmm5");
|
||||
asm volatile("pcmpgtb %xmm6,%xmm7");
|
||||
asm volatile("pcmpgtb %xmm12,%xmm13");
|
||||
asm volatile("pcmpgtb %xmm14,%xmm15");
|
||||
asm volatile("paddb %xmm4,%xmm4");
|
||||
asm volatile("paddb %xmm6,%xmm6");
|
||||
asm volatile("paddb %xmm12,%xmm12");
|
||||
asm volatile("paddb %xmm14,%xmm14");
|
||||
asm volatile("pand %xmm0,%xmm5");
|
||||
asm volatile("pand %xmm0,%xmm7");
|
||||
asm volatile("pand %xmm0,%xmm13");
|
||||
asm volatile("pand %xmm0,%xmm15");
|
||||
asm volatile("pxor %xmm5,%xmm4");
|
||||
asm volatile("pxor %xmm7,%xmm6");
|
||||
asm volatile("pxor %xmm13,%xmm12");
|
||||
asm volatile("pxor %xmm15,%xmm14");
|
||||
}
|
||||
asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
|
||||
asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
|
||||
asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32]));
|
||||
asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48]));
|
||||
asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
|
||||
asm volatile("pxor %0,%%xmm6" : : "m" (q[d+16]));
|
||||
asm volatile("pxor %0,%%xmm12" : : "m" (q[d+32]));
|
||||
asm volatile("pxor %0,%%xmm14" : : "m" (q[d+48]));
|
||||
asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
|
||||
asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
|
||||
asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32]));
|
||||
asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48]));
|
||||
}
|
||||
asm volatile("sfence" : : : "memory");
|
||||
kernel_fpu_end();
|
||||
}
|
||||
|
||||
|
||||
const struct raid6_calls raid6_sse2x4 = {
|
||||
raid6_sse24_gen_syndrome,
|
||||
raid6_sse24_xor_syndrome,
|
||||
raid6_have_sse2,
|
||||
"sse2x4",
|
||||
1 /* Has cache hints */
|
||||
|
@@ -28,11 +28,11 @@ char *dataptrs[NDISKS];
|
||||
char data[NDISKS][PAGE_SIZE];
|
||||
char recovi[PAGE_SIZE], recovj[PAGE_SIZE];
|
||||
|
||||
static void makedata(void)
|
||||
static void makedata(int start, int stop)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < NDISKS; i++) {
|
||||
for (i = start; i <= stop; i++) {
|
||||
for (j = 0; j < PAGE_SIZE; j++)
|
||||
data[i][j] = rand();
|
||||
|
||||
@@ -91,34 +91,55 @@ int main(int argc, char *argv[])
|
||||
{
|
||||
const struct raid6_calls *const *algo;
|
||||
const struct raid6_recov_calls *const *ra;
|
||||
int i, j;
|
||||
int i, j, p1, p2;
|
||||
int err = 0;
|
||||
|
||||
makedata();
|
||||
makedata(0, NDISKS-1);
|
||||
|
||||
for (ra = raid6_recov_algos; *ra; ra++) {
|
||||
if ((*ra)->valid && !(*ra)->valid())
|
||||
continue;
|
||||
|
||||
raid6_2data_recov = (*ra)->data2;
|
||||
raid6_datap_recov = (*ra)->datap;
|
||||
|
||||
printf("using recovery %s\n", (*ra)->name);
|
||||
|
||||
for (algo = raid6_algos; *algo; algo++) {
|
||||
if (!(*algo)->valid || (*algo)->valid()) {
|
||||
raid6_call = **algo;
|
||||
if ((*algo)->valid && !(*algo)->valid())
|
||||
continue;
|
||||
|
||||
/* Nuke syndromes */
|
||||
memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE);
|
||||
raid6_call = **algo;
|
||||
|
||||
/* Generate assumed good syndrome */
|
||||
raid6_call.gen_syndrome(NDISKS, PAGE_SIZE,
|
||||
(void **)&dataptrs);
|
||||
/* Nuke syndromes */
|
||||
memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE);
|
||||
|
||||
/* Generate assumed good syndrome */
|
||||
raid6_call.gen_syndrome(NDISKS, PAGE_SIZE,
|
||||
(void **)&dataptrs);
|
||||
|
||||
for (i = 0; i < NDISKS-1; i++)
|
||||
for (j = i+1; j < NDISKS; j++)
|
||||
err += test_disks(i, j);
|
||||
|
||||
if (!raid6_call.xor_syndrome)
|
||||
continue;
|
||||
|
||||
for (p1 = 0; p1 < NDISKS-2; p1++)
|
||||
for (p2 = p1; p2 < NDISKS-2; p2++) {
|
||||
|
||||
/* Simulate rmw run */
|
||||
raid6_call.xor_syndrome(NDISKS, p1, p2, PAGE_SIZE,
|
||||
(void **)&dataptrs);
|
||||
makedata(p1, p2);
|
||||
raid6_call.xor_syndrome(NDISKS, p1, p2, PAGE_SIZE,
|
||||
(void **)&dataptrs);
|
||||
|
||||
for (i = 0; i < NDISKS-1; i++)
|
||||
for (j = i+1; j < NDISKS; j++)
|
||||
err += test_disks(i, j);
|
||||
}
|
||||
|
||||
for (i = 0; i < NDISKS-1; i++)
|
||||
for (j = i+1; j < NDISKS; j++)
|
||||
err += test_disks(i, j);
|
||||
}
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
@@ -80,6 +80,7 @@ void raid6_tilegx$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
|
||||
|
||||
const struct raid6_calls raid6_tilegx$# = {
|
||||
raid6_tilegx$#_gen_syndrome,
|
||||
NULL, /* XOR not yet implemented */
|
||||
NULL,
|
||||
"tilegx$#",
|
||||
0
|
||||
|
@@ -4,6 +4,7 @@
|
||||
* Copyright 31 August 2008 James Bottomley
|
||||
* Copyright (C) 2013, Intel Corporation
|
||||
*/
|
||||
#include <linux/bug.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/math64.h>
|
||||
#include <linux/export.h>
|
||||
@@ -14,7 +15,8 @@
|
||||
|
||||
/**
|
||||
* string_get_size - get the size in the specified units
|
||||
* @size: The size to be converted
|
||||
* @size: The size to be converted in blocks
|
||||
* @blk_size: Size of the block (use 1 for size in bytes)
|
||||
* @units: units to use (powers of 1000 or 1024)
|
||||
* @buf: buffer to format to
|
||||
* @len: length of buffer
|
||||
@@ -24,14 +26,14 @@
|
||||
* at least 9 bytes and will always be zero terminated.
|
||||
*
|
||||
*/
|
||||
void string_get_size(u64 size, const enum string_size_units units,
|
||||
void string_get_size(u64 size, u64 blk_size, const enum string_size_units units,
|
||||
char *buf, int len)
|
||||
{
|
||||
static const char *const units_10[] = {
|
||||
"B", "kB", "MB", "GB", "TB", "PB", "EB"
|
||||
"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"
|
||||
};
|
||||
static const char *const units_2[] = {
|
||||
"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB"
|
||||
"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"
|
||||
};
|
||||
static const char *const *const units_str[] = {
|
||||
[STRING_UNITS_10] = units_10,
|
||||
@@ -42,31 +44,57 @@ void string_get_size(u64 size, const enum string_size_units units,
|
||||
[STRING_UNITS_2] = 1024,
|
||||
};
|
||||
int i, j;
|
||||
u32 remainder = 0, sf_cap;
|
||||
u32 remainder = 0, sf_cap, exp;
|
||||
char tmp[8];
|
||||
const char *unit;
|
||||
|
||||
tmp[0] = '\0';
|
||||
i = 0;
|
||||
if (size >= divisor[units]) {
|
||||
while (size >= divisor[units]) {
|
||||
remainder = do_div(size, divisor[units]);
|
||||
i++;
|
||||
}
|
||||
if (!size)
|
||||
goto out;
|
||||
|
||||
sf_cap = size;
|
||||
for (j = 0; sf_cap*10 < 1000; j++)
|
||||
sf_cap *= 10;
|
||||
|
||||
if (j) {
|
||||
remainder *= 1000;
|
||||
remainder /= divisor[units];
|
||||
snprintf(tmp, sizeof(tmp), ".%03u", remainder);
|
||||
tmp[j+1] = '\0';
|
||||
}
|
||||
while (blk_size >= divisor[units]) {
|
||||
remainder = do_div(blk_size, divisor[units]);
|
||||
i++;
|
||||
}
|
||||
|
||||
exp = divisor[units] / (u32)blk_size;
|
||||
if (size >= exp) {
|
||||
remainder = do_div(size, divisor[units]);
|
||||
remainder *= blk_size;
|
||||
i++;
|
||||
} else {
|
||||
remainder *= size;
|
||||
}
|
||||
|
||||
size *= blk_size;
|
||||
size += remainder / divisor[units];
|
||||
remainder %= divisor[units];
|
||||
|
||||
while (size >= divisor[units]) {
|
||||
remainder = do_div(size, divisor[units]);
|
||||
i++;
|
||||
}
|
||||
|
||||
sf_cap = size;
|
||||
for (j = 0; sf_cap*10 < 1000; j++)
|
||||
sf_cap *= 10;
|
||||
|
||||
if (j) {
|
||||
remainder *= 1000;
|
||||
remainder /= divisor[units];
|
||||
snprintf(tmp, sizeof(tmp), ".%03u", remainder);
|
||||
tmp[j+1] = '\0';
|
||||
}
|
||||
|
||||
out:
|
||||
if (i >= ARRAY_SIZE(units_2))
|
||||
unit = "UNK";
|
||||
else
|
||||
unit = units_str[units][i];
|
||||
|
||||
snprintf(buf, len, "%u%s %s", (u32)size,
|
||||
tmp, units_str[units][i]);
|
||||
tmp, unit);
|
||||
}
|
||||
EXPORT_SYMBOL(string_get_size);
|
||||
|
||||
|
@@ -48,7 +48,7 @@ static void __init test_hexdump(size_t len, int rowsize, int groupsize,
|
||||
char test[32 * 3 + 2 + 32 + 1];
|
||||
char real[32 * 3 + 2 + 32 + 1];
|
||||
char *p;
|
||||
const char **result;
|
||||
const char * const *result;
|
||||
size_t l = len;
|
||||
int gs = groupsize, rs = rowsize;
|
||||
unsigned int i;
|
||||
|
246
lib/vsprintf.c
246
lib/vsprintf.c
@@ -33,6 +33,7 @@
|
||||
|
||||
#include <asm/page.h> /* for PAGE_SIZE */
|
||||
#include <asm/sections.h> /* for dereference_function_descriptor() */
|
||||
#include <asm/byteorder.h> /* cpu_to_le16 */
|
||||
|
||||
#include <linux/string_helpers.h>
|
||||
#include "kstrtox.h"
|
||||
@@ -122,142 +123,145 @@ int skip_atoi(const char **s)
|
||||
return i;
|
||||
}
|
||||
|
||||
/* Decimal conversion is by far the most typical, and is used
|
||||
* for /proc and /sys data. This directly impacts e.g. top performance
|
||||
* with many processes running. We optimize it for speed
|
||||
* using ideas described at <http://www.cs.uiowa.edu/~jones/bcd/divide.html>
|
||||
* (with permission from the author, Douglas W. Jones).
|
||||
/*
|
||||
* Decimal conversion is by far the most typical, and is used for
|
||||
* /proc and /sys data. This directly impacts e.g. top performance
|
||||
* with many processes running. We optimize it for speed by emitting
|
||||
* two characters at a time, using a 200 byte lookup table. This
|
||||
* roughly halves the number of multiplications compared to computing
|
||||
* the digits one at a time. Implementation strongly inspired by the
|
||||
* previous version, which in turn used ideas described at
|
||||
* <http://www.cs.uiowa.edu/~jones/bcd/divide.html> (with permission
|
||||
* from the author, Douglas W. Jones).
|
||||
*
|
||||
* It turns out there is precisely one 26 bit fixed-point
|
||||
* approximation a of 64/100 for which x/100 == (x * (u64)a) >> 32
|
||||
* holds for all x in [0, 10^8-1], namely a = 0x28f5c29. The actual
|
||||
* range happens to be somewhat larger (x <= 1073741898), but that's
|
||||
* irrelevant for our purpose.
|
||||
*
|
||||
* For dividing a number in the range [10^4, 10^6-1] by 100, we still
|
||||
* need a 32x32->64 bit multiply, so we simply use the same constant.
|
||||
*
|
||||
* For dividing a number in the range [100, 10^4-1] by 100, there are
|
||||
* several options. The simplest is (x * 0x147b) >> 19, which is valid
|
||||
* for all x <= 43698.
|
||||
*/
|
||||
|
||||
#if BITS_PER_LONG != 32 || BITS_PER_LONG_LONG != 64
|
||||
/* Formats correctly any integer in [0, 999999999] */
|
||||
static noinline_for_stack
|
||||
char *put_dec_full9(char *buf, unsigned q)
|
||||
{
|
||||
unsigned r;
|
||||
static const u16 decpair[100] = {
|
||||
#define _(x) (__force u16) cpu_to_le16(((x % 10) | ((x / 10) << 8)) + 0x3030)
|
||||
_( 0), _( 1), _( 2), _( 3), _( 4), _( 5), _( 6), _( 7), _( 8), _( 9),
|
||||
_(10), _(11), _(12), _(13), _(14), _(15), _(16), _(17), _(18), _(19),
|
||||
_(20), _(21), _(22), _(23), _(24), _(25), _(26), _(27), _(28), _(29),
|
||||
_(30), _(31), _(32), _(33), _(34), _(35), _(36), _(37), _(38), _(39),
|
||||
_(40), _(41), _(42), _(43), _(44), _(45), _(46), _(47), _(48), _(49),
|
||||
_(50), _(51), _(52), _(53), _(54), _(55), _(56), _(57), _(58), _(59),
|
||||
_(60), _(61), _(62), _(63), _(64), _(65), _(66), _(67), _(68), _(69),
|
||||
_(70), _(71), _(72), _(73), _(74), _(75), _(76), _(77), _(78), _(79),
|
||||
_(80), _(81), _(82), _(83), _(84), _(85), _(86), _(87), _(88), _(89),
|
||||
_(90), _(91), _(92), _(93), _(94), _(95), _(96), _(97), _(98), _(99),
|
||||
#undef _
|
||||
};
|
||||
|
||||
/*
|
||||
* Possible ways to approx. divide by 10
|
||||
* (x * 0x1999999a) >> 32 x < 1073741829 (multiply must be 64-bit)
|
||||
* (x * 0xcccd) >> 19 x < 81920 (x < 262149 when 64-bit mul)
|
||||
* (x * 0x6667) >> 18 x < 43699
|
||||
* (x * 0x3334) >> 17 x < 16389
|
||||
* (x * 0x199a) >> 16 x < 16389
|
||||
* (x * 0x0ccd) >> 15 x < 16389
|
||||
* (x * 0x0667) >> 14 x < 2739
|
||||
* (x * 0x0334) >> 13 x < 1029
|
||||
* (x * 0x019a) >> 12 x < 1029
|
||||
* (x * 0x00cd) >> 11 x < 1029 shorter code than * 0x67 (on i386)
|
||||
* (x * 0x0067) >> 10 x < 179
|
||||
* (x * 0x0034) >> 9 x < 69 same
|
||||
* (x * 0x001a) >> 8 x < 69 same
|
||||
* (x * 0x000d) >> 7 x < 69 same, shortest code (on i386)
|
||||
* (x * 0x0007) >> 6 x < 19
|
||||
* See <http://www.cs.uiowa.edu/~jones/bcd/divide.html>
|
||||
*/
|
||||
r = (q * (uint64_t)0x1999999a) >> 32;
|
||||
*buf++ = (q - 10 * r) + '0'; /* 1 */
|
||||
q = (r * (uint64_t)0x1999999a) >> 32;
|
||||
*buf++ = (r - 10 * q) + '0'; /* 2 */
|
||||
r = (q * (uint64_t)0x1999999a) >> 32;
|
||||
*buf++ = (q - 10 * r) + '0'; /* 3 */
|
||||
q = (r * (uint64_t)0x1999999a) >> 32;
|
||||
*buf++ = (r - 10 * q) + '0'; /* 4 */
|
||||
r = (q * (uint64_t)0x1999999a) >> 32;
|
||||
*buf++ = (q - 10 * r) + '0'; /* 5 */
|
||||
/* Now value is under 10000, can avoid 64-bit multiply */
|
||||
q = (r * 0x199a) >> 16;
|
||||
*buf++ = (r - 10 * q) + '0'; /* 6 */
|
||||
r = (q * 0xcd) >> 11;
|
||||
*buf++ = (q - 10 * r) + '0'; /* 7 */
|
||||
q = (r * 0xcd) >> 11;
|
||||
*buf++ = (r - 10 * q) + '0'; /* 8 */
|
||||
*buf++ = q + '0'; /* 9 */
|
||||
return buf;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Similar to above but do not pad with zeros.
|
||||
* Code can be easily arranged to print 9 digits too, but our callers
|
||||
* always call put_dec_full9() instead when the number has 9 decimal digits.
|
||||
*/
|
||||
/*
|
||||
* This will print a single '0' even if r == 0, since we would
|
||||
* immediately jump to out_r where two 0s would be written but only
|
||||
* one of them accounted for in buf. This is needed by ip4_string
|
||||
* below. All other callers pass a non-zero value of r.
|
||||
*/
|
||||
static noinline_for_stack
|
||||
char *put_dec_trunc8(char *buf, unsigned r)
|
||||
{
|
||||
unsigned q;
|
||||
|
||||
/* Copy of previous function's body with added early returns */
|
||||
while (r >= 10000) {
|
||||
q = r + '0';
|
||||
r = (r * (uint64_t)0x1999999a) >> 32;
|
||||
*buf++ = q - 10*r;
|
||||
}
|
||||
/* 1 <= r < 10^8 */
|
||||
if (r < 100)
|
||||
goto out_r;
|
||||
|
||||
q = (r * 0x199a) >> 16; /* r <= 9999 */
|
||||
*buf++ = (r - 10 * q) + '0';
|
||||
if (q == 0)
|
||||
return buf;
|
||||
r = (q * 0xcd) >> 11; /* q <= 999 */
|
||||
*buf++ = (q - 10 * r) + '0';
|
||||
if (r == 0)
|
||||
return buf;
|
||||
q = (r * 0xcd) >> 11; /* r <= 99 */
|
||||
*buf++ = (r - 10 * q) + '0';
|
||||
if (q == 0)
|
||||
return buf;
|
||||
*buf++ = q + '0'; /* q <= 9 */
|
||||
/* 100 <= r < 10^8 */
|
||||
q = (r * (u64)0x28f5c29) >> 32;
|
||||
*((u16 *)buf) = decpair[r - 100*q];
|
||||
buf += 2;
|
||||
|
||||
/* 1 <= q < 10^6 */
|
||||
if (q < 100)
|
||||
goto out_q;
|
||||
|
||||
/* 100 <= q < 10^6 */
|
||||
r = (q * (u64)0x28f5c29) >> 32;
|
||||
*((u16 *)buf) = decpair[q - 100*r];
|
||||
buf += 2;
|
||||
|
||||
/* 1 <= r < 10^4 */
|
||||
if (r < 100)
|
||||
goto out_r;
|
||||
|
||||
/* 100 <= r < 10^4 */
|
||||
q = (r * 0x147b) >> 19;
|
||||
*((u16 *)buf) = decpair[r - 100*q];
|
||||
buf += 2;
|
||||
out_q:
|
||||
/* 1 <= q < 100 */
|
||||
r = q;
|
||||
out_r:
|
||||
/* 1 <= r < 100 */
|
||||
*((u16 *)buf) = decpair[r];
|
||||
buf += r < 10 ? 1 : 2;
|
||||
return buf;
|
||||
}
|
||||
|
||||
/* There are two algorithms to print larger numbers.
|
||||
* One is generic: divide by 1000000000 and repeatedly print
|
||||
* groups of (up to) 9 digits. It's conceptually simple,
|
||||
* but requires a (unsigned long long) / 1000000000 division.
|
||||
*
|
||||
* Second algorithm splits 64-bit unsigned long long into 16-bit chunks,
|
||||
* manipulates them cleverly and generates groups of 4 decimal digits.
|
||||
* It so happens that it does NOT require long long division.
|
||||
*
|
||||
* If long is > 32 bits, division of 64-bit values is relatively easy,
|
||||
* and we will use the first algorithm.
|
||||
* If long long is > 64 bits (strange architecture with VERY large long long),
|
||||
* second algorithm can't be used, and we again use the first one.
|
||||
*
|
||||
* Else (if long is 32 bits and long long is 64 bits) we use second one.
|
||||
*/
|
||||
#if BITS_PER_LONG == 64 && BITS_PER_LONG_LONG == 64
|
||||
static noinline_for_stack
|
||||
char *put_dec_full8(char *buf, unsigned r)
|
||||
{
|
||||
unsigned q;
|
||||
|
||||
#if BITS_PER_LONG != 32 || BITS_PER_LONG_LONG != 64
|
||||
/* 0 <= r < 10^8 */
|
||||
q = (r * (u64)0x28f5c29) >> 32;
|
||||
*((u16 *)buf) = decpair[r - 100*q];
|
||||
buf += 2;
|
||||
|
||||
/* First algorithm: generic */
|
||||
/* 0 <= q < 10^6 */
|
||||
r = (q * (u64)0x28f5c29) >> 32;
|
||||
*((u16 *)buf) = decpair[q - 100*r];
|
||||
buf += 2;
|
||||
|
||||
static
|
||||
/* 0 <= r < 10^4 */
|
||||
q = (r * 0x147b) >> 19;
|
||||
*((u16 *)buf) = decpair[r - 100*q];
|
||||
buf += 2;
|
||||
|
||||
/* 0 <= q < 100 */
|
||||
*((u16 *)buf) = decpair[q];
|
||||
buf += 2;
|
||||
return buf;
|
||||
}
|
||||
|
||||
static noinline_for_stack
|
||||
char *put_dec(char *buf, unsigned long long n)
|
||||
{
|
||||
if (n >= 100*1000*1000) {
|
||||
while (n >= 1000*1000*1000)
|
||||
buf = put_dec_full9(buf, do_div(n, 1000*1000*1000));
|
||||
if (n >= 100*1000*1000)
|
||||
return put_dec_full9(buf, n);
|
||||
}
|
||||
if (n >= 100*1000*1000)
|
||||
buf = put_dec_full8(buf, do_div(n, 100*1000*1000));
|
||||
/* 1 <= n <= 1.6e11 */
|
||||
if (n >= 100*1000*1000)
|
||||
buf = put_dec_full8(buf, do_div(n, 100*1000*1000));
|
||||
/* 1 <= n < 1e8 */
|
||||
return put_dec_trunc8(buf, n);
|
||||
}
|
||||
|
||||
#else
|
||||
#elif BITS_PER_LONG == 32 && BITS_PER_LONG_LONG == 64
|
||||
|
||||
/* Second algorithm: valid only for 64-bit long longs */
|
||||
|
||||
/* See comment in put_dec_full9 for choice of constants */
|
||||
static noinline_for_stack
|
||||
void put_dec_full4(char *buf, unsigned q)
|
||||
static void
|
||||
put_dec_full4(char *buf, unsigned r)
|
||||
{
|
||||
unsigned r;
|
||||
r = (q * 0xccd) >> 15;
|
||||
buf[0] = (q - 10 * r) + '0';
|
||||
q = (r * 0xcd) >> 11;
|
||||
buf[1] = (r - 10 * q) + '0';
|
||||
r = (q * 0xcd) >> 11;
|
||||
buf[2] = (q - 10 * r) + '0';
|
||||
buf[3] = r + '0';
|
||||
unsigned q;
|
||||
|
||||
/* 0 <= r < 10^4 */
|
||||
q = (r * 0x147b) >> 19;
|
||||
*((u16 *)buf) = decpair[r - 100*q];
|
||||
buf += 2;
|
||||
/* 0 <= q < 100 */
|
||||
*((u16 *)buf) = decpair[q];
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -265,9 +269,9 @@ void put_dec_full4(char *buf, unsigned q)
|
||||
* The approximation x/10000 == (x * 0x346DC5D7) >> 43
|
||||
* holds for all x < 1,128,869,999. The largest value this
|
||||
* helper will ever be asked to convert is 1,125,520,955.
|
||||
* (d1 in the put_dec code, assuming n is all-ones).
|
||||
* (second call in the put_dec code, assuming n is all-ones).
|
||||
*/
|
||||
static
|
||||
static noinline_for_stack
|
||||
unsigned put_dec_helper4(char *buf, unsigned x)
|
||||
{
|
||||
uint32_t q = (x * (uint64_t)0x346DC5D7) >> 43;
|
||||
@@ -294,6 +298,8 @@ char *put_dec(char *buf, unsigned long long n)
|
||||
d2 = (h ) & 0xffff;
|
||||
d3 = (h >> 16); /* implicit "& 0xffff" */
|
||||
|
||||
/* n = 2^48 d3 + 2^32 d2 + 2^16 d1 + d0
|
||||
= 281_4749_7671_0656 d3 + 42_9496_7296 d2 + 6_5536 d1 + d0 */
|
||||
q = 656 * d3 + 7296 * d2 + 5536 * d1 + ((uint32_t)n & 0xffff);
|
||||
q = put_dec_helper4(buf, q);
|
||||
|
||||
@@ -323,7 +329,8 @@ char *put_dec(char *buf, unsigned long long n)
|
||||
*/
|
||||
int num_to_str(char *buf, int size, unsigned long long num)
|
||||
{
|
||||
char tmp[sizeof(num) * 3];
|
||||
/* put_dec requires 2-byte alignment of the buffer. */
|
||||
char tmp[sizeof(num) * 3] __aligned(2);
|
||||
int idx, len;
|
||||
|
||||
/* put_dec() may work incorrectly for num = 0 (generate "", not "0") */
|
||||
@@ -384,7 +391,8 @@ static noinline_for_stack
|
||||
char *number(char *buf, char *end, unsigned long long num,
|
||||
struct printf_spec spec)
|
||||
{
|
||||
char tmp[3 * sizeof(num)];
|
||||
/* put_dec requires 2-byte alignment of the buffer. */
|
||||
char tmp[3 * sizeof(num)] __aligned(2);
|
||||
char sign;
|
||||
char locase;
|
||||
int need_pfx = ((spec.flags & SPECIAL) && spec.base != 10);
|
||||
@@ -944,7 +952,7 @@ char *ip4_string(char *p, const u8 *addr, const char *fmt)
|
||||
break;
|
||||
}
|
||||
for (i = 0; i < 4; i++) {
|
||||
char temp[3]; /* hold each IP quad in reverse order */
|
||||
char temp[4] __aligned(2); /* hold each IP quad in reverse order */
|
||||
int digits = put_dec_trunc8(temp, addr[index]) - temp;
|
||||
if (leading_zeros) {
|
||||
if (digits < 3)
|
||||
|
Reference in New Issue
Block a user