Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!
Dieser Commit ist enthalten in:
Linus Torvalds
2005-04-16 15:20:36 -07:00
Commit 1da177e4c3
17291 geänderte Dateien mit 6718755 neuen und 0 gelöschten Zeilen

9
arch/parisc/lib/Makefile Normale Datei
Datei anzeigen

@@ -0,0 +1,9 @@
#
# Makefile for parisc-specific library files
#
lib-y := lusercopy.o bitops.o checksum.o io.o memset.o fixup.o memcpy.o
obj-y := iomap.o
lib-$(CONFIG_SMP) += debuglocks.o

84
arch/parisc/lib/bitops.c Normale Datei
Datei anzeigen

@@ -0,0 +1,84 @@
/*
* bitops.c: atomic operations which got too long to be inlined all over
* the place.
*
* Copyright 1999 Philipp Rumpf (prumpf@tux.org)
* Copyright 2000 Grant Grundler (grundler@cup.hp.com)
*/
#include <linux/config.h>
#include <linux/kernel.h>
#include <linux/spinlock.h>
#include <asm/system.h>
#include <asm/atomic.h>
#ifdef CONFIG_SMP
spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned = {
[0 ... (ATOMIC_HASH_SIZE-1)] = SPIN_LOCK_UNLOCKED
};
#endif
#ifdef __LP64__
unsigned long __xchg64(unsigned long x, unsigned long *ptr)
{
unsigned long temp, flags;
_atomic_spin_lock_irqsave(ptr, flags);
temp = *ptr;
*ptr = x;
_atomic_spin_unlock_irqrestore(ptr, flags);
return temp;
}
#endif
unsigned long __xchg32(int x, int *ptr)
{
unsigned long flags;
long temp;
_atomic_spin_lock_irqsave(ptr, flags);
temp = (long) *ptr; /* XXX - sign extension wanted? */
*ptr = x;
_atomic_spin_unlock_irqrestore(ptr, flags);
return (unsigned long)temp;
}
unsigned long __xchg8(char x, char *ptr)
{
unsigned long flags;
long temp;
_atomic_spin_lock_irqsave(ptr, flags);
temp = (long) *ptr; /* XXX - sign extension wanted? */
*ptr = x;
_atomic_spin_unlock_irqrestore(ptr, flags);
return (unsigned long)temp;
}
#ifdef __LP64__
unsigned long __cmpxchg_u64(volatile unsigned long *ptr, unsigned long old, unsigned long new)
{
unsigned long flags;
unsigned long prev;
_atomic_spin_lock_irqsave(ptr, flags);
if ((prev = *ptr) == old)
*ptr = new;
_atomic_spin_unlock_irqrestore(ptr, flags);
return prev;
}
#endif
unsigned long __cmpxchg_u32(volatile unsigned int *ptr, unsigned int old, unsigned int new)
{
unsigned long flags;
unsigned int prev;
_atomic_spin_lock_irqsave(ptr, flags);
if ((prev = *ptr) == old)
*ptr = new;
_atomic_spin_unlock_irqrestore(ptr, flags);
return (unsigned long)prev;
}

148
arch/parisc/lib/checksum.c Normale Datei
Datei anzeigen

@@ -0,0 +1,148 @@
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
* interface as the means of communication with the user level.
*
* MIPS specific IP/TCP/UDP checksumming routines
*
* Authors: Ralf Baechle, <ralf@waldorf-gmbh.de>
* Lots of code moved from tcp.c and ip.c; see those files
* for more names.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* $Id: checksum.c,v 1.3 1997/12/01 17:57:34 ralf Exp $
*/
#include <linux/module.h>
#include <linux/types.h>
#include <net/checksum.h>
#include <asm/byteorder.h>
#include <asm/string.h>
#include <asm/uaccess.h>
#define addc(_t,_r) \
__asm__ __volatile__ ( \
" add %0, %1, %0\n" \
" addc %0, %%r0, %0\n" \
: "=r"(_t) \
: "r"(_r), "0"(_t));
static inline unsigned short from32to16(unsigned int x)
{
/* 32 bits --> 16 bits + carry */
x = (x & 0xffff) + (x >> 16);
/* 16 bits + carry --> 16 bits including carry */
x = (x & 0xffff) + (x >> 16);
return (unsigned short)x;
}
static inline unsigned int do_csum(const unsigned char * buff, int len)
{
int odd, count;
unsigned int result = 0;
if (len <= 0)
goto out;
odd = 1 & (unsigned long) buff;
if (odd) {
result = be16_to_cpu(*buff);
len--;
buff++;
}
count = len >> 1; /* nr of 16-bit words.. */
if (count) {
if (2 & (unsigned long) buff) {
result += *(unsigned short *) buff;
count--;
len -= 2;
buff += 2;
}
count >>= 1; /* nr of 32-bit words.. */
if (count) {
while (count >= 4) {
unsigned int r1, r2, r3, r4;
r1 = *(unsigned int *)(buff + 0);
r2 = *(unsigned int *)(buff + 4);
r3 = *(unsigned int *)(buff + 8);
r4 = *(unsigned int *)(buff + 12);
addc(result, r1);
addc(result, r2);
addc(result, r3);
addc(result, r4);
count -= 4;
buff += 16;
}
while (count) {
unsigned int w = *(unsigned int *) buff;
count--;
buff += 4;
addc(result, w);
}
result = (result & 0xffff) + (result >> 16);
}
if (len & 2) {
result += *(unsigned short *) buff;
buff += 2;
}
}
if (len & 1)
result += le16_to_cpu(*buff);
result = from32to16(result);
if (odd)
result = swab16(result);
out:
return result;
}
/*
* computes a partial checksum, e.g. for TCP/UDP fragments
*/
unsigned int csum_partial(const unsigned char *buff, int len, unsigned int sum)
{
unsigned int result = do_csum(buff, len);
addc(result, sum);
return from32to16(result);
}
EXPORT_SYMBOL(csum_partial);
/*
* copy while checksumming, otherwise like csum_partial
*/
unsigned int csum_partial_copy_nocheck(const unsigned char *src, unsigned char *dst,
int len, unsigned int sum)
{
/*
* It's 2:30 am and I don't feel like doing it real ...
* This is lots slower than the real thing (tm)
*/
sum = csum_partial(src, len, sum);
memcpy(dst, src, len);
return sum;
}
EXPORT_SYMBOL(csum_partial_copy_nocheck);
/*
* Copy from userspace and compute checksum. If we catch an exception
* then zero the rest of the buffer.
*/
unsigned int csum_partial_copy_from_user(const unsigned char __user *src,
unsigned char *dst, int len,
unsigned int sum, int *err_ptr)
{
int missing;
missing = copy_from_user(dst, src, len);
if (missing) {
memset(dst + len - missing, 0, missing);
*err_ptr = -EFAULT;
}
return csum_partial(dst, len, sum);
}
EXPORT_SYMBOL(csum_partial_copy_from_user);

277
arch/parisc/lib/debuglocks.c Normale Datei
Datei anzeigen

@@ -0,0 +1,277 @@
/*
* Debugging versions of SMP locking primitives.
*
* Copyright (C) 2004 Thibaut VARENE <varenet@parisc-linux.org>
*
* Some code stollen from alpha & sparc64 ;)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* We use pdc_printf() throughout the file for all output messages, to avoid
* losing messages because of disabled interrupts. Since we're using these
* messages for debugging purposes, it makes sense not to send them to the
* linux console.
*/
#include <linux/config.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/spinlock.h>
#include <linux/hardirq.h> /* in_interrupt() */
#include <asm/system.h>
#include <asm/hardirq.h> /* in_interrupt() */
#include <asm/pdc.h>
#undef INIT_STUCK
#define INIT_STUCK 1L << 30
#ifdef CONFIG_DEBUG_SPINLOCK
void _dbg_spin_lock(spinlock_t * lock, const char *base_file, int line_no)
{
volatile unsigned int *a;
long stuck = INIT_STUCK;
void *inline_pc = __builtin_return_address(0);
unsigned long started = jiffies;
int printed = 0;
int cpu = smp_processor_id();
try_again:
/* Do the actual locking */
/* <T-Bone> ggg: we can't get stuck on the outter loop?
* <ggg> T-Bone: We can hit the outer loop
* alot if multiple CPUs are constantly racing for a lock
* and the backplane is NOT fair about which CPU sees
* the update first. But it won't hang since every failed
* attempt will drop us back into the inner loop and
* decrement `stuck'.
* <ggg> K-class and some of the others are NOT fair in the HW
* implementation so we could see false positives.
* But fixing the lock contention is easier than
* fixing the HW to be fair.
* <tausq> __ldcw() returns 1 if we get the lock; otherwise we
* spin until the value of the lock changes, or we time out.
*/
mb();
a = __ldcw_align(lock);
while (stuck && (__ldcw(a) == 0))
while ((*a == 0) && --stuck);
mb();
if (unlikely(stuck <= 0)) {
pdc_printf(
"%s:%d: spin_lock(%s/%p) stuck in %s at %p(%d)"
" owned by %s:%d in %s at %p(%d)\n",
base_file, line_no, lock->module, lock,
current->comm, inline_pc, cpu,
lock->bfile, lock->bline, lock->task->comm,
lock->previous, lock->oncpu);
stuck = INIT_STUCK;
printed = 1;
goto try_again;
}
/* Exiting. Got the lock. */
lock->oncpu = cpu;
lock->previous = inline_pc;
lock->task = current;
lock->bfile = (char *)base_file;
lock->bline = line_no;
if (unlikely(printed)) {
pdc_printf(
"%s:%d: spin_lock grabbed in %s at %p(%d) %ld ticks\n",
base_file, line_no, current->comm, inline_pc,
cpu, jiffies - started);
}
}
void _dbg_spin_unlock(spinlock_t * lock, const char *base_file, int line_no)
{
CHECK_LOCK(lock);
volatile unsigned int *a;
mb();
a = __ldcw_align(lock);
if (unlikely((*a != 0) && lock->babble)) {
lock->babble--;
pdc_printf(
"%s:%d: spin_unlock(%s:%p) not locked\n",
base_file, line_no, lock->module, lock);
}
*a = 1;
mb();
}
int _dbg_spin_trylock(spinlock_t * lock, const char *base_file, int line_no)
{
int ret;
volatile unsigned int *a;
mb();
a = __ldcw_align(lock);
ret = (__ldcw(a) != 0);
mb();
if (ret) {
lock->oncpu = smp_processor_id();
lock->previous = __builtin_return_address(0);
lock->task = current;
} else {
lock->bfile = (char *)base_file;
lock->bline = line_no;
}
return ret;
}
#endif /* CONFIG_DEBUG_SPINLOCK */
#ifdef CONFIG_DEBUG_RWLOCK
/* Interrupts trouble detailed explanation, thx Grant:
*
* o writer (wants to modify data) attempts to acquire the rwlock
* o He gets the write lock.
* o Interupts are still enabled, we take an interrupt with the
* write still holding the lock.
* o interrupt handler tries to acquire the rwlock for read.
* o deadlock since the writer can't release it at this point.
*
* In general, any use of spinlocks that competes between "base"
* level and interrupt level code will risk deadlock. Interrupts
* need to be disabled in the base level routines to avoid it.
* Or more precisely, only the IRQ the base level routine
* is competing with for the lock. But it's more efficient/faster
* to just disable all interrupts on that CPU to guarantee
* once it gets the lock it can release it quickly too.
*/
void _dbg_write_lock(rwlock_t *rw, const char *bfile, int bline)
{
void *inline_pc = __builtin_return_address(0);
unsigned long started = jiffies;
long stuck = INIT_STUCK;
int printed = 0;
int cpu = smp_processor_id();
if(unlikely(in_interrupt())) { /* acquiring write lock in interrupt context, bad idea */
pdc_printf("write_lock caller: %s:%d, IRQs enabled,\n", bfile, bline);
BUG();
}
/* Note: if interrupts are disabled (which is most likely), the printk
will never show on the console. We might need a polling method to flush
the dmesg buffer anyhow. */
retry:
_raw_spin_lock(&rw->lock);
if(rw->counter != 0) {
/* this basically never happens */
_raw_spin_unlock(&rw->lock);
stuck--;
if ((unlikely(stuck <= 0)) && (rw->counter < 0)) {
pdc_printf(
"%s:%d: write_lock stuck on writer"
" in %s at %p(%d) %ld ticks\n",
bfile, bline, current->comm, inline_pc,
cpu, jiffies - started);
stuck = INIT_STUCK;
printed = 1;
}
else if (unlikely(stuck <= 0)) {
pdc_printf(
"%s:%d: write_lock stuck on reader"
" in %s at %p(%d) %ld ticks\n",
bfile, bline, current->comm, inline_pc,
cpu, jiffies - started);
stuck = INIT_STUCK;
printed = 1;
}
while(rw->counter != 0);
goto retry;
}
/* got it. now leave without unlocking */
rw->counter = -1; /* remember we are locked */
if (unlikely(printed)) {
pdc_printf(
"%s:%d: write_lock grabbed in %s at %p(%d) %ld ticks\n",
bfile, bline, current->comm, inline_pc,
cpu, jiffies - started);
}
}
int _dbg_write_trylock(rwlock_t *rw, const char *bfile, int bline)
{
#if 0
void *inline_pc = __builtin_return_address(0);
int cpu = smp_processor_id();
#endif
if(unlikely(in_interrupt())) { /* acquiring write lock in interrupt context, bad idea */
pdc_printf("write_lock caller: %s:%d, IRQs enabled,\n", bfile, bline);
BUG();
}
/* Note: if interrupts are disabled (which is most likely), the printk
will never show on the console. We might need a polling method to flush
the dmesg buffer anyhow. */
_raw_spin_lock(&rw->lock);
if(rw->counter != 0) {
/* this basically never happens */
_raw_spin_unlock(&rw->lock);
return 0;
}
/* got it. now leave without unlocking */
rw->counter = -1; /* remember we are locked */
#if 0
pdc_printf("%s:%d: try write_lock grabbed in %s at %p(%d)\n",
bfile, bline, current->comm, inline_pc, cpu);
#endif
return 1;
}
void _dbg_read_lock(rwlock_t * rw, const char *bfile, int bline)
{
#if 0
void *inline_pc = __builtin_return_address(0);
unsigned long started = jiffies;
int cpu = smp_processor_id();
#endif
unsigned long flags;
local_irq_save(flags);
_raw_spin_lock(&rw->lock);
rw->counter++;
#if 0
pdc_printf(
"%s:%d: read_lock grabbed in %s at %p(%d) %ld ticks\n",
bfile, bline, current->comm, inline_pc,
cpu, jiffies - started);
#endif
_raw_spin_unlock(&rw->lock);
local_irq_restore(flags);
}
#endif /* CONFIG_DEBUG_RWLOCK */

89
arch/parisc/lib/fixup.S Normale Datei
Datei anzeigen

@@ -0,0 +1,89 @@
/*
* Linux/PA-RISC Project (http://www.parisc-linux.org/)
*
* Copyright (C) 2004 Randolph Chung <tausq@debian.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* Fixup routines for kernel exception handling.
*/
#include <linux/config.h>
#include <asm/offsets.h>
#include <asm/assembly.h>
#include <asm/errno.h>
#ifdef CONFIG_SMP
.macro get_fault_ip t1 t2
addil LT%__per_cpu_offset,%r27
LDREG RT%__per_cpu_offset(%r1),\t1
/* t2 = smp_processor_id() */
mfctl 30,\t2
ldw TI_CPU(\t2),\t2
#ifdef __LP64__
extrd,u \t2,63,32,\t2
#endif
/* t2 = &__per_cpu_offset[smp_processor_id()]; */
LDREG,s \t2(\t1),\t2
addil LT%per_cpu__exception_data,%r27
LDREG RT%per_cpu__exception_data(%r1),\t1
/* t1 = &__get_cpu_var(exception_data) */
add,l \t1,\t2,\t1
/* t1 = t1->fault_ip */
LDREG EXCDATA_IP(\t1), \t1
.endm
#else
.macro get_fault_ip t1 t2
/* t1 = &__get_cpu_var(exception_data) */
addil LT%per_cpu__exception_data,%r27
LDREG RT%per_cpu__exception_data(%r1),\t2
/* t1 = t2->fault_ip */
LDREG EXCDATA_IP(\t2), \t1
.endm
#endif
.text
.section .fixup, "ax"
/* get_user() fixups, store -EFAULT in r8, and 0 in r9 */
.export fixup_get_user_skip_1
fixup_get_user_skip_1:
get_fault_ip %r1,%r8
ldo 4(%r1), %r1
ldi -EFAULT, %r8
bv %r0(%r1)
copy %r0, %r9
.export fixup_get_user_skip_2
fixup_get_user_skip_2:
get_fault_ip %r1,%r8
ldo 8(%r1), %r1
ldi -EFAULT, %r8
bv %r0(%r1)
copy %r0, %r9
/* put_user() fixups, store -EFAULT in r8 */
.export fixup_put_user_skip_1
fixup_put_user_skip_1:
get_fault_ip %r1,%r8
ldo 4(%r1), %r1
bv %r0(%r1)
ldi -EFAULT, %r8
.export fixup_put_user_skip_2
fixup_put_user_skip_2:
get_fault_ip %r1,%r8
ldo 8(%r1), %r1
bv %r0(%r1)
ldi -EFAULT, %r8

488
arch/parisc/lib/io.c Normale Datei
Datei anzeigen

@@ -0,0 +1,488 @@
/*
* arch/parisc/lib/io.c
*
* Copyright (c) Matthew Wilcox 2001 for Hewlett-Packard
* Copyright (c) Randolph Chung 2001 <tausq@debian.org>
*
* IO accessing functions which shouldn't be inlined because they're too big
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <asm/io.h>
/* Copies a block of memory to a device in an efficient manner.
* Assumes the device can cope with 32-bit transfers. If it can't,
* don't use this function.
*/
void memcpy_toio(volatile void __iomem *dst, const void *src, int count)
{
if (((unsigned long)dst & 3) != ((unsigned long)src & 3))
goto bytecopy;
while ((unsigned long)dst & 3) {
writeb(*(char *)src, dst++);
src++;
count--;
}
while (count > 3) {
__raw_writel(*(u32 *)src, dst);
src += 4;
dst += 4;
count -= 4;
}
bytecopy:
while (count--) {
writeb(*(char *)src, dst++);
src++;
}
}
/*
** Copies a block of memory from a device in an efficient manner.
** Assumes the device can cope with 32-bit transfers. If it can't,
** don't use this function.
**
** CR16 counts on C3000 reading 256 bytes from Symbios 896 RAM:
** 27341/64 = 427 cyc per int
** 61311/128 = 478 cyc per short
** 122637/256 = 479 cyc per byte
** Ergo bus latencies dominant (not transfer size).
** Minimize total number of transfers at cost of CPU cycles.
** TODO: only look at src alignment and adjust the stores to dest.
*/
void memcpy_fromio(void *dst, const volatile void __iomem *src, int count)
{
/* first compare alignment of src/dst */
if ( (((unsigned long)dst ^ (unsigned long)src) & 1) || (count < 2) )
goto bytecopy;
if ( (((unsigned long)dst ^ (unsigned long)src) & 2) || (count < 4) )
goto shortcopy;
/* Then check for misaligned start address */
if ((unsigned long)src & 1) {
*(u8 *)dst = readb(src);
src++;
dst++;
count--;
if (count < 2) goto bytecopy;
}
if ((unsigned long)src & 2) {
*(u16 *)dst = __raw_readw(src);
src += 2;
dst += 2;
count -= 2;
}
while (count > 3) {
*(u32 *)dst = __raw_readl(src);
dst += 4;
src += 4;
count -= 4;
}
shortcopy:
while (count > 1) {
*(u16 *)dst = __raw_readw(src);
src += 2;
dst += 2;
count -= 2;
}
bytecopy:
while (count--) {
*(char *)dst = readb(src);
src++;
dst++;
}
}
/* Sets a block of memory on a device to a given value.
* Assumes the device can cope with 32-bit transfers. If it can't,
* don't use this function.
*/
void memset_io(volatile void __iomem *addr, unsigned char val, int count)
{
u32 val32 = (val << 24) | (val << 16) | (val << 8) | val;
while ((unsigned long)addr & 3) {
writeb(val, addr++);
count--;
}
while (count > 3) {
__raw_writel(val32, addr);
addr += 4;
count -= 4;
}
while (count--) {
writeb(val, addr++);
}
}
/*
* Read COUNT 8-bit bytes from port PORT into memory starting at
* SRC.
*/
void insb (unsigned long port, void *dst, unsigned long count)
{
unsigned char *p;
p = (unsigned char *)dst;
while (((unsigned long)p) & 0x3) {
if (!count)
return;
count--;
*p = inb(port);
p++;
}
while (count >= 4) {
unsigned int w;
count -= 4;
w = inb(port) << 24;
w |= inb(port) << 16;
w |= inb(port) << 8;
w |= inb(port);
*(unsigned int *) p = w;
p += 4;
}
while (count) {
--count;
*p = inb(port);
p++;
}
}
/*
* Read COUNT 16-bit words from port PORT into memory starting at
* SRC. SRC must be at least short aligned. This is used by the
* IDE driver to read disk sectors. Performance is important, but
* the interfaces seems to be slow: just using the inlined version
* of the inw() breaks things.
*/
void insw (unsigned long port, void *dst, unsigned long count)
{
unsigned int l = 0, l2;
unsigned char *p;
p = (unsigned char *)dst;
if (!count)
return;
switch (((unsigned long)p) & 0x3)
{
case 0x00: /* Buffer 32-bit aligned */
while (count>=2) {
count -= 2;
l = cpu_to_le16(inw(port)) << 16;
l |= cpu_to_le16(inw(port));
*(unsigned int *)p = l;
p += 4;
}
if (count) {
*(unsigned short *)p = cpu_to_le16(inw(port));
}
break;
case 0x02: /* Buffer 16-bit aligned */
*(unsigned short *)p = cpu_to_le16(inw(port));
p += 2;
count--;
while (count>=2) {
count -= 2;
l = cpu_to_le16(inw(port)) << 16;
l |= cpu_to_le16(inw(port));
*(unsigned int *)p = l;
p += 4;
}
if (count) {
*(unsigned short *)p = cpu_to_le16(inw(port));
}
break;
case 0x01: /* Buffer 8-bit aligned */
case 0x03:
/* I don't bother with 32bit transfers
* in this case, 16bit will have to do -- DE */
--count;
l = cpu_to_le16(inw(port));
*p = l >> 8;
p++;
while (count--)
{
l2 = cpu_to_le16(inw(port));
*(unsigned short *)p = (l & 0xff) << 8 | (l2 >> 8);
p += 2;
l = l2;
}
*p = l & 0xff;
break;
}
}
/*
* Read COUNT 32-bit words from port PORT into memory starting at
* SRC. Now works with any alignment in SRC. Performance is important,
* but the interfaces seems to be slow: just using the inlined version
* of the inl() breaks things.
*/
void insl (unsigned long port, void *dst, unsigned long count)
{
unsigned int l = 0, l2;
unsigned char *p;
p = (unsigned char *)dst;
if (!count)
return;
switch (((unsigned long) dst) & 0x3)
{
case 0x00: /* Buffer 32-bit aligned */
while (count--)
{
*(unsigned int *)p = cpu_to_le32(inl(port));
p += 4;
}
break;
case 0x02: /* Buffer 16-bit aligned */
--count;
l = cpu_to_le32(inl(port));
*(unsigned short *)p = l >> 16;
p += 2;
while (count--)
{
l2 = cpu_to_le32(inl(port));
*(unsigned int *)p = (l & 0xffff) << 16 | (l2 >> 16);
p += 4;
l = l2;
}
*(unsigned short *)p = l & 0xffff;
break;
case 0x01: /* Buffer 8-bit aligned */
--count;
l = cpu_to_le32(inl(port));
*(unsigned char *)p = l >> 24;
p++;
*(unsigned short *)p = (l >> 8) & 0xffff;
p += 2;
while (count--)
{
l2 = cpu_to_le32(inl(port));
*(unsigned int *)p = (l & 0xff) << 24 | (l2 >> 8);
p += 4;
l = l2;
}
*p = l & 0xff;
break;
case 0x03: /* Buffer 8-bit aligned */
--count;
l = cpu_to_le32(inl(port));
*p = l >> 24;
p++;
while (count--)
{
l2 = cpu_to_le32(inl(port));
*(unsigned int *)p = (l & 0xffffff) << 8 | l2 >> 24;
p += 4;
l = l2;
}
*(unsigned short *)p = (l >> 8) & 0xffff;
p += 2;
*p = l & 0xff;
break;
}
}
/*
* Like insb but in the opposite direction.
* Don't worry as much about doing aligned memory transfers:
* doing byte reads the "slow" way isn't nearly as slow as
* doing byte writes the slow way (no r-m-w cycle).
*/
void outsb(unsigned long port, const void * src, unsigned long count)
{
const unsigned char *p;
p = (const unsigned char *)src;
while (count) {
count--;
outb(*p, port);
p++;
}
}
/*
* Like insw but in the opposite direction. This is used by the IDE
* driver to write disk sectors. Performance is important, but the
* interfaces seems to be slow: just using the inlined version of the
* outw() breaks things.
*/
void outsw (unsigned long port, const void *src, unsigned long count)
{
unsigned int l = 0, l2;
const unsigned char *p;
p = (const unsigned char *)src;
if (!count)
return;
switch (((unsigned long)p) & 0x3)
{
case 0x00: /* Buffer 32-bit aligned */
while (count>=2) {
count -= 2;
l = *(unsigned int *)p;
p += 4;
outw(le16_to_cpu(l >> 16), port);
outw(le16_to_cpu(l & 0xffff), port);
}
if (count) {
outw(le16_to_cpu(*(unsigned short*)p), port);
}
break;
case 0x02: /* Buffer 16-bit aligned */
outw(le16_to_cpu(*(unsigned short*)p), port);
p += 2;
count--;
while (count>=2) {
count -= 2;
l = *(unsigned int *)p;
p += 4;
outw(le16_to_cpu(l >> 16), port);
outw(le16_to_cpu(l & 0xffff), port);
}
if (count) {
outw(le16_to_cpu(*(unsigned short *)p), port);
}
break;
case 0x01: /* Buffer 8-bit aligned */
/* I don't bother with 32bit transfers
* in this case, 16bit will have to do -- DE */
l = *p << 8;
p++;
count--;
while (count)
{
count--;
l2 = *(unsigned short *)p;
p += 2;
outw(le16_to_cpu(l | l2 >> 8), port);
l = l2 << 8;
}
l2 = *(unsigned char *)p;
outw (le16_to_cpu(l | l2>>8), port);
break;
}
}
/*
* Like insl but in the opposite direction. This is used by the IDE
* driver to write disk sectors. Works with any alignment in SRC.
* Performance is important, but the interfaces seems to be slow:
* just using the inlined version of the outl() breaks things.
*/
void outsl (unsigned long port, const void *src, unsigned long count)
{
unsigned int l = 0, l2;
const unsigned char *p;
p = (const unsigned char *)src;
if (!count)
return;
switch (((unsigned long)p) & 0x3)
{
case 0x00: /* Buffer 32-bit aligned */
while (count--)
{
outl(le32_to_cpu(*(unsigned int *)p), port);
p += 4;
}
break;
case 0x02: /* Buffer 16-bit aligned */
--count;
l = *(unsigned short *)p;
p += 2;
while (count--)
{
l2 = *(unsigned int *)p;
p += 4;
outl (le32_to_cpu(l << 16 | l2 >> 16), port);
l = l2;
}
l2 = *(unsigned short *)p;
outl (le32_to_cpu(l << 16 | l2), port);
break;
case 0x01: /* Buffer 8-bit aligned */
--count;
l = *p << 24;
p++;
l |= *(unsigned short *)p << 8;
p += 2;
while (count--)
{
l2 = *(unsigned int *)p;
p += 4;
outl (le32_to_cpu(l | l2 >> 24), port);
l = l2 << 8;
}
l2 = *p;
outl (le32_to_cpu(l | l2), port);
break;
case 0x03: /* Buffer 8-bit aligned */
--count;
l = *p << 24;
p++;
while (count--)
{
l2 = *(unsigned int *)p;
p += 4;
outl (le32_to_cpu(l | l2 >> 8), port);
l = l2 << 24;
}
l2 = *(unsigned short *)p << 16;
p += 2;
l2 |= *p;
outl (le32_to_cpu(l | l2), port);
break;
}
}
EXPORT_SYMBOL(insb);
EXPORT_SYMBOL(insw);
EXPORT_SYMBOL(insl);
EXPORT_SYMBOL(outsb);
EXPORT_SYMBOL(outsw);
EXPORT_SYMBOL(outsl);

422
arch/parisc/lib/iomap.c Normale Datei
Datei anzeigen

@@ -0,0 +1,422 @@
/*
* iomap.c - Implement iomap interface for PA-RISC
* Copyright (c) 2004 Matthew Wilcox
*/
#include <linux/ioport.h>
#include <linux/pci.h>
#include <asm/io.h>
/*
* The iomap space on 32-bit PA-RISC is intended to look like this:
* 00000000-7fffffff virtual mapped IO
* 80000000-8fffffff ISA/EISA port space that can't be virtually mapped
* 90000000-9fffffff Dino port space
* a0000000-afffffff Astro port space
* b0000000-bfffffff PAT port space
* c0000000-cfffffff non-swapped memory IO
* f0000000-ffffffff legacy IO memory pointers
*
* For the moment, here's what it looks like:
* 80000000-8fffffff All ISA/EISA port space
* f0000000-ffffffff legacy IO memory pointers
*
* On 64-bit, everything is extended, so:
* 8000000000000000-8fffffffffffffff All ISA/EISA port space
* f000000000000000-ffffffffffffffff legacy IO memory pointers
*/
/*
* Technically, this should be 'if (VMALLOC_START < addr < VMALLOC_END),
* but that's slow and we know it'll be within the first 2GB.
*/
#ifdef CONFIG_64BIT
#define INDIRECT_ADDR(addr) (((unsigned long)(addr) & 1UL<<63) != 0)
#define ADDR_TO_REGION(addr) (((unsigned long)addr >> 60) & 7)
#define IOPORT_MAP_BASE (8UL << 60)
#else
#define INDIRECT_ADDR(addr) (((unsigned long)(addr) & 1UL<<31) != 0)
#define ADDR_TO_REGION(addr) (((unsigned long)addr >> 28) & 7)
#define IOPORT_MAP_BASE (8UL << 28)
#endif
struct iomap_ops {
unsigned int (*read8)(void __iomem *);
unsigned int (*read16)(void __iomem *);
unsigned int (*read32)(void __iomem *);
void (*write8)(u8, void __iomem *);
void (*write16)(u16, void __iomem *);
void (*write32)(u32, void __iomem *);
void (*read8r)(void __iomem *, void *, unsigned long);
void (*read16r)(void __iomem *, void *, unsigned long);
void (*read32r)(void __iomem *, void *, unsigned long);
void (*write8r)(void __iomem *, const void *, unsigned long);
void (*write16r)(void __iomem *, const void *, unsigned long);
void (*write32r)(void __iomem *, const void *, unsigned long);
};
/* Generic ioport ops. To be replaced later by specific dino/elroy/wax code */
#define ADDR2PORT(addr) ((unsigned long __force)(addr) & 0xffffff)
static unsigned int ioport_read8(void __iomem *addr)
{
return inb(ADDR2PORT(addr));
}
static unsigned int ioport_read16(void __iomem *addr)
{
return inw(ADDR2PORT(addr));
}
static unsigned int ioport_read32(void __iomem *addr)
{
return inl(ADDR2PORT(addr));
}
static void ioport_write8(u8 datum, void __iomem *addr)
{
outb(datum, ADDR2PORT(addr));
}
static void ioport_write16(u16 datum, void __iomem *addr)
{
outw(datum, ADDR2PORT(addr));
}
static void ioport_write32(u32 datum, void __iomem *addr)
{
outl(datum, ADDR2PORT(addr));
}
static void ioport_read8r(void __iomem *addr, void *dst, unsigned long count)
{
insb(ADDR2PORT(addr), dst, count);
}
static void ioport_read16r(void __iomem *addr, void *dst, unsigned long count)
{
insw(ADDR2PORT(addr), dst, count);
}
static void ioport_read32r(void __iomem *addr, void *dst, unsigned long count)
{
insl(ADDR2PORT(addr), dst, count);
}
static void ioport_write8r(void __iomem *addr, const void *s, unsigned long n)
{
outsb(ADDR2PORT(addr), s, n);
}
static void ioport_write16r(void __iomem *addr, const void *s, unsigned long n)
{
outsw(ADDR2PORT(addr), s, n);
}
static void ioport_write32r(void __iomem *addr, const void *s, unsigned long n)
{
outsl(ADDR2PORT(addr), s, n);
}
static const struct iomap_ops ioport_ops = {
ioport_read8,
ioport_read16,
ioport_read32,
ioport_write8,
ioport_write16,
ioport_write32,
ioport_read8r,
ioport_read16r,
ioport_read32r,
ioport_write8r,
ioport_write16r,
ioport_write32r,
};
/* Legacy I/O memory ops */
static unsigned int iomem_read8(void __iomem *addr)
{
return readb(addr);
}
static unsigned int iomem_read16(void __iomem *addr)
{
return readw(addr);
}
static unsigned int iomem_read32(void __iomem *addr)
{
return readl(addr);
}
static void iomem_write8(u8 datum, void __iomem *addr)
{
writeb(datum, addr);
}
static void iomem_write16(u16 datum, void __iomem *addr)
{
writew(datum, addr);
}
static void iomem_write32(u32 datum, void __iomem *addr)
{
writel(datum, addr);
}
static void iomem_read8r(void __iomem *addr, void *dst, unsigned long count)
{
while (count--) {
*(u8 *)dst = __raw_readb(addr);
dst++;
}
}
static void iomem_read16r(void __iomem *addr, void *dst, unsigned long count)
{
while (count--) {
*(u16 *)dst = __raw_readw(addr);
dst += 2;
}
}
static void iomem_read32r(void __iomem *addr, void *dst, unsigned long count)
{
while (count--) {
*(u32 *)dst = __raw_readl(addr);
dst += 4;
}
}
static void iomem_write8r(void __iomem *addr, const void *s, unsigned long n)
{
while (n--) {
__raw_writeb(*(u8 *)s, addr);
s++;
}
}
static void iomem_write16r(void __iomem *addr, const void *s, unsigned long n)
{
while (n--) {
__raw_writew(*(u16 *)s, addr);
s += 2;
}
}
static void iomem_write32r(void __iomem *addr, const void *s, unsigned long n)
{
while (n--) {
__raw_writel(*(u32 *)s, addr);
s += 4;
}
}
static const struct iomap_ops iomem_ops = {
iomem_read8,
iomem_read16,
iomem_read32,
iomem_write8,
iomem_write16,
iomem_write32,
iomem_read8r,
iomem_read16r,
iomem_read32r,
iomem_write8r,
iomem_write16r,
iomem_write32r,
};
const struct iomap_ops *iomap_ops[8] = {
[0] = &ioport_ops,
#ifdef CONFIG_DEBUG_IOREMAP
[6] = &iomem_ops,
#else
[7] = &iomem_ops
#endif
};
unsigned int ioread8(void __iomem *addr)
{
if (unlikely(INDIRECT_ADDR(addr)))
return iomap_ops[ADDR_TO_REGION(addr)]->read8(addr);
return *((u8 *)addr);
}
unsigned int ioread16(void __iomem *addr)
{
if (unlikely(INDIRECT_ADDR(addr)))
return iomap_ops[ADDR_TO_REGION(addr)]->read16(addr);
return le16_to_cpup((u16 *)addr);
}
unsigned int ioread32(void __iomem *addr)
{
if (unlikely(INDIRECT_ADDR(addr)))
return iomap_ops[ADDR_TO_REGION(addr)]->read32(addr);
return le32_to_cpup((u32 *)addr);
}
void iowrite8(u8 datum, void __iomem *addr)
{
if (unlikely(INDIRECT_ADDR(addr))) {
iomap_ops[ADDR_TO_REGION(addr)]->write8(datum, addr);
} else {
*((u8 *)addr) = datum;
}
}
void iowrite16(u16 datum, void __iomem *addr)
{
if (unlikely(INDIRECT_ADDR(addr))) {
iomap_ops[ADDR_TO_REGION(addr)]->write16(datum, addr);
} else {
*((u16 *)addr) = cpu_to_le16(datum);
}
}
void iowrite32(u32 datum, void __iomem *addr)
{
if (unlikely(INDIRECT_ADDR(addr))) {
iomap_ops[ADDR_TO_REGION(addr)]->write32(datum, addr);
} else {
*((u32 *)addr) = cpu_to_le32(datum);
}
}
/* Repeating interfaces */
void ioread8_rep(void __iomem *addr, void *dst, unsigned long count)
{
if (unlikely(INDIRECT_ADDR(addr))) {
iomap_ops[ADDR_TO_REGION(addr)]->read8r(addr, dst, count);
} else {
while (count--) {
*(u8 *)dst = *(u8 *)addr;
dst++;
}
}
}
void ioread16_rep(void __iomem *addr, void *dst, unsigned long count)
{
if (unlikely(INDIRECT_ADDR(addr))) {
iomap_ops[ADDR_TO_REGION(addr)]->read16r(addr, dst, count);
} else {
while (count--) {
*(u16 *)dst = *(u16 *)addr;
dst += 2;
}
}
}
void ioread32_rep(void __iomem *addr, void *dst, unsigned long count)
{
if (unlikely(INDIRECT_ADDR(addr))) {
iomap_ops[ADDR_TO_REGION(addr)]->read32r(addr, dst, count);
} else {
while (count--) {
*(u32 *)dst = *(u32 *)addr;
dst += 4;
}
}
}
void iowrite8_rep(void __iomem *addr, const void *src, unsigned long count)
{
if (unlikely(INDIRECT_ADDR(addr))) {
iomap_ops[ADDR_TO_REGION(addr)]->write8r(addr, src, count);
} else {
while (count--) {
*(u8 *)addr = *(u8 *)src;
src++;
}
}
}
void iowrite16_rep(void __iomem *addr, const void *src, unsigned long count)
{
if (unlikely(INDIRECT_ADDR(addr))) {
iomap_ops[ADDR_TO_REGION(addr)]->write16r(addr, src, count);
} else {
while (count--) {
*(u16 *)addr = *(u16 *)src;
src += 2;
}
}
}
void iowrite32_rep(void __iomem *addr, const void *src, unsigned long count)
{
if (unlikely(INDIRECT_ADDR(addr))) {
iomap_ops[ADDR_TO_REGION(addr)]->write32r(addr, src, count);
} else {
while (count--) {
*(u32 *)addr = *(u32 *)src;
src += 4;
}
}
}
/* Mapping interfaces */
void __iomem *ioport_map(unsigned long port, unsigned int nr)
{
return (void __iomem *)(IOPORT_MAP_BASE | port);
}
void ioport_unmap(void __iomem *addr)
{
if (!INDIRECT_ADDR(addr)) {
iounmap(addr);
}
}
/* Create a virtual mapping cookie for a PCI BAR (memory or IO) */
void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
{
unsigned long start = pci_resource_start(dev, bar);
unsigned long len = pci_resource_len(dev, bar);
unsigned long flags = pci_resource_flags(dev, bar);
if (!len || !start)
return NULL;
if (maxlen && len > maxlen)
len = maxlen;
if (flags & IORESOURCE_IO)
return ioport_map(start, len);
if (flags & IORESOURCE_MEM) {
if (flags & IORESOURCE_CACHEABLE)
return ioremap(start, len);
return ioremap_nocache(start, len);
}
/* What? */
return NULL;
}
void pci_iounmap(struct pci_dev *dev, void __iomem * addr)
{
if (!INDIRECT_ADDR(addr)) {
iounmap(addr);
}
}
EXPORT_SYMBOL(ioread8);
EXPORT_SYMBOL(ioread16);
EXPORT_SYMBOL(ioread32);
EXPORT_SYMBOL(iowrite8);
EXPORT_SYMBOL(iowrite16);
EXPORT_SYMBOL(iowrite32);
EXPORT_SYMBOL(ioread8_rep);
EXPORT_SYMBOL(ioread16_rep);
EXPORT_SYMBOL(ioread32_rep);
EXPORT_SYMBOL(iowrite8_rep);
EXPORT_SYMBOL(iowrite16_rep);
EXPORT_SYMBOL(iowrite32_rep);
EXPORT_SYMBOL(ioport_map);
EXPORT_SYMBOL(ioport_unmap);
EXPORT_SYMBOL(pci_iomap);
EXPORT_SYMBOL(pci_iounmap);

193
arch/parisc/lib/lusercopy.S Normale Datei
Datei anzeigen

@@ -0,0 +1,193 @@
/*
* User Space Access Routines
*
* Copyright (C) 2000-2002 Hewlett-Packard (John Marvin)
* Copyright (C) 2000 Richard Hirst <rhirst with parisc-linux.org>
* Copyright (C) 2001 Matthieu Delahaye <delahaym at esiee.fr>
* Copyright (C) 2003 Randolph Chung <tausq with parisc-linux.org>
*
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* These routines still have plenty of room for optimization
* (word & doubleword load/store, dual issue, store hints, etc.).
*/
/*
* The following routines assume that space register 3 (sr3) contains
* the space id associated with the current users address space.
*/
.text
#include <asm/assembly.h>
#include <asm/errno.h>
/*
* get_sr gets the appropriate space value into
* sr1 for kernel/user space access, depending
* on the flag stored in the task structure.
*/
.macro get_sr
mfctl %cr30,%r1
ldw TI_SEGMENT(%r1),%r22
mfsp %sr3,%r1
or,<> %r22,%r0,%r0
copy %r0,%r1
mtsp %r1,%sr1
.endm
.macro fixup_branch lbl
ldil L%\lbl, %r1
ldo R%\lbl(%r1), %r1
bv %r0(%r1)
.endm
/*
* long lstrncpy_from_user(char *dst, const char *src, long n)
*
* Returns -EFAULT if exception before terminator,
* N if the entire buffer filled,
* otherwise strlen (i.e. excludes zero byte)
*/
.export lstrncpy_from_user,code
lstrncpy_from_user:
.proc
.callinfo NO_CALLS
.entry
comib,= 0,%r24,$lsfu_done
copy %r24,%r23
get_sr
1: ldbs,ma 1(%sr1,%r25),%r1
$lsfu_loop:
stbs,ma %r1,1(%r26)
comib,=,n 0,%r1,$lsfu_done
addib,<>,n -1,%r24,$lsfu_loop
2: ldbs,ma 1(%sr1,%r25),%r1
$lsfu_done:
sub %r23,%r24,%r28
$lsfu_exit:
bv %r0(%r2)
nop
.exit
.section .fixup,"ax"
3: fixup_branch $lsfu_exit
ldi -EFAULT,%r28
.previous
.section __ex_table,"aw"
#ifdef __LP64__
.dword 1b,3b
.dword 2b,3b
#else
.word 1b,3b
.word 2b,3b
#endif
.previous
.procend
/*
* unsigned long lclear_user(void *to, unsigned long n)
*
* Returns 0 for success.
* otherwise, returns number of bytes not transferred.
*/
.export lclear_user,code
lclear_user:
.proc
.callinfo NO_CALLS
.entry
comib,=,n 0,%r25,$lclu_done
get_sr
$lclu_loop:
addib,<> -1,%r25,$lclu_loop
1: stbs,ma %r0,1(%sr1,%r26)
$lclu_done:
bv %r0(%r2)
copy %r25,%r28
.exit
.section .fixup,"ax"
2: fixup_branch $lclu_done
ldo 1(%r25),%r25
.previous
.section __ex_table,"aw"
#ifdef __LP64__
.dword 1b,2b
#else
.word 1b,2b
#endif
.previous
.procend
/*
* long lstrnlen_user(char *s, long n)
*
* Returns 0 if exception before zero byte or reaching N,
* N+1 if N would be exceeded,
* else strlen + 1 (i.e. includes zero byte).
*/
.export lstrnlen_user,code
lstrnlen_user:
.proc
.callinfo NO_CALLS
.entry
comib,= 0,%r25,$lslen_nzero
copy %r26,%r24
get_sr
1: ldbs,ma 1(%sr1,%r26),%r1
$lslen_loop:
comib,=,n 0,%r1,$lslen_done
addib,<> -1,%r25,$lslen_loop
2: ldbs,ma 1(%sr1,%r26),%r1
$lslen_done:
bv %r0(%r2)
sub %r26,%r24,%r28
.exit
$lslen_nzero:
b $lslen_done
ldo 1(%r26),%r26 /* special case for N == 0 */
.section .fixup,"ax"
3: fixup_branch $lslen_done
copy %r24,%r26 /* reset r26 so 0 is returned on fault */
.previous
.section __ex_table,"aw"
#ifdef __LP64__
.dword 1b,3b
.dword 2b,3b
#else
.word 1b,3b
.word 2b,3b
#endif
.previous
.procend
.end

522
arch/parisc/lib/memcpy.c Normale Datei
Datei anzeigen

@@ -0,0 +1,522 @@
/*
* Optimized memory copy routines.
*
* Copyright (C) 2004 Randolph Chung <tausq@debian.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* Portions derived from the GNU C Library
* Copyright (C) 1991, 1997, 2003 Free Software Foundation, Inc.
*
* Several strategies are tried to try to get the best performance for various
* conditions. In the optimal case, we copy 64-bytes in an unrolled loop using
* fp regs. This is followed by loops that copy 32- or 16-bytes at a time using
* general registers. Unaligned copies are handled either by aligning the
* destination and then using shift-and-write method, or in a few cases by
* falling back to a byte-at-a-time copy.
*
* I chose to implement this in C because it is easier to maintain and debug,
* and in my experiments it appears that the C code generated by gcc (3.3/3.4
* at the time of writing) is fairly optimal. Unfortunately some of the
* semantics of the copy routine (exception handling) is difficult to express
* in C, so we have to play some tricks to get it to work.
*
* All the loads and stores are done via explicit asm() code in order to use
* the right space registers.
*
* Testing with various alignments and buffer sizes shows that this code is
* often >10x faster than a simple byte-at-a-time copy, even for strangely
* aligned operands. It is interesting to note that the glibc version
* of memcpy (written in C) is actually quite fast already. This routine is
* able to beat it by 30-40% for aligned copies because of the loop unrolling,
* but in some cases the glibc version is still slightly faster. This lends
* more credibility that gcc can generate very good code as long as we are
* careful.
*
* TODO:
* - cache prefetching needs more experimentation to get optimal settings
* - try not to use the post-increment address modifiers; they create additional
* interlocks
* - replace byte-copy loops with stybs sequences
*/
#ifdef __KERNEL__
#include <linux/config.h>
#include <linux/module.h>
#include <linux/compiler.h>
#include <asm/uaccess.h>
#define s_space "%%sr1"
#define d_space "%%sr2"
#else
#include "memcpy.h"
#define s_space "%%sr0"
#define d_space "%%sr0"
#define pa_memcpy new2_copy
#endif
DECLARE_PER_CPU(struct exception_data, exception_data);
#define preserve_branch(label) do { \
volatile int dummy; \
/* The following branch is never taken, it's just here to */ \
/* prevent gcc from optimizing away our exception code. */ \
if (unlikely(dummy != dummy)) \
goto label; \
} while (0)
#define get_user_space() (segment_eq(get_fs(), KERNEL_DS) ? 0 : mfsp(3))
#define get_kernel_space() (0)
#define MERGE(w0, sh_1, w1, sh_2) ({ \
unsigned int _r; \
asm volatile ( \
"mtsar %3\n" \
"shrpw %1, %2, %%sar, %0\n" \
: "=r"(_r) \
: "r"(w0), "r"(w1), "r"(sh_2) \
); \
_r; \
})
#define THRESHOLD 16
#ifdef DEBUG_MEMCPY
#define DPRINTF(fmt, args...) do { printk(KERN_DEBUG "%s:%d:%s ", __FILE__, __LINE__, __FUNCTION__ ); printk(KERN_DEBUG fmt, ##args ); } while (0)
#else
#define DPRINTF(fmt, args...)
#endif
#ifndef __LP64__
#define EXC_WORD ".word"
#else
#define EXC_WORD ".dword"
#endif
#define def_load_ai_insn(_insn,_sz,_tt,_s,_a,_t,_e) \
__asm__ __volatile__ ( \
"1:\t" #_insn ",ma " #_sz "(" _s ",%1), %0\n" \
"\t.section __ex_table,\"aw\"\n" \
"\t" EXC_WORD "\t1b\n" \
"\t" EXC_WORD "\t" #_e "\n" \
"\t.previous\n" \
: _tt(_t), "+r"(_a) \
: \
: "r8")
#define def_store_ai_insn(_insn,_sz,_tt,_s,_a,_t,_e) \
__asm__ __volatile__ ( \
"1:\t" #_insn ",ma %1, " #_sz "(" _s ",%0)\n" \
"\t.section __ex_table,\"aw\"\n" \
"\t" EXC_WORD "\t1b\n" \
"\t" EXC_WORD "\t" #_e "\n" \
"\t.previous\n" \
: "+r"(_a) \
: _tt(_t) \
: "r8")
#define ldbma(_s, _a, _t, _e) def_load_ai_insn(ldbs,1,"=r",_s,_a,_t,_e)
#define stbma(_s, _t, _a, _e) def_store_ai_insn(stbs,1,"r",_s,_a,_t,_e)
#define ldwma(_s, _a, _t, _e) def_load_ai_insn(ldw,4,"=r",_s,_a,_t,_e)
#define stwma(_s, _t, _a, _e) def_store_ai_insn(stw,4,"r",_s,_a,_t,_e)
#define flddma(_s, _a, _t, _e) def_load_ai_insn(fldd,8,"=f",_s,_a,_t,_e)
#define fstdma(_s, _t, _a, _e) def_store_ai_insn(fstd,8,"f",_s,_a,_t,_e)
#define def_load_insn(_insn,_tt,_s,_o,_a,_t,_e) \
__asm__ __volatile__ ( \
"1:\t" #_insn " " #_o "(" _s ",%1), %0\n" \
"\t.section __ex_table,\"aw\"\n" \
"\t" EXC_WORD "\t1b\n" \
"\t" EXC_WORD "\t" #_e "\n" \
"\t.previous\n" \
: _tt(_t) \
: "r"(_a) \
: "r8")
#define def_store_insn(_insn,_tt,_s,_t,_o,_a,_e) \
__asm__ __volatile__ ( \
"1:\t" #_insn " %0, " #_o "(" _s ",%1)\n" \
"\t.section __ex_table,\"aw\"\n" \
"\t" EXC_WORD "\t1b\n" \
"\t" EXC_WORD "\t" #_e "\n" \
"\t.previous\n" \
: \
: _tt(_t), "r"(_a) \
: "r8")
#define ldw(_s,_o,_a,_t,_e) def_load_insn(ldw,"=r",_s,_o,_a,_t,_e)
#define stw(_s,_t,_o,_a,_e) def_store_insn(stw,"r",_s,_t,_o,_a,_e)
#ifdef CONFIG_PREFETCH
extern inline void prefetch_src(const void *addr)
{
__asm__("ldw 0(" s_space ",%0), %%r0" : : "r" (addr));
}
extern inline void prefetch_dst(const void *addr)
{
__asm__("ldd 0(" d_space ",%0), %%r0" : : "r" (addr));
}
#else
#define prefetch_src(addr)
#define prefetch_dst(addr)
#endif
/* Copy from a not-aligned src to an aligned dst, using shifts. Handles 4 words
* per loop. This code is derived from glibc.
*/
static inline unsigned long copy_dstaligned(unsigned long dst, unsigned long src, unsigned long len, unsigned long o_dst, unsigned long o_src, unsigned long o_len)
{
/* gcc complains that a2 and a3 may be uninitialized, but actually
* they cannot be. Initialize a2/a3 to shut gcc up.
*/
register unsigned int a0, a1, a2 = 0, a3 = 0;
int sh_1, sh_2;
struct exception_data *d;
/* prefetch_src((const void *)src); */
/* Calculate how to shift a word read at the memory operation
aligned srcp to make it aligned for copy. */
sh_1 = 8 * (src % sizeof(unsigned int));
sh_2 = 8 * sizeof(unsigned int) - sh_1;
/* Make src aligned by rounding it down. */
src &= -sizeof(unsigned int);
switch (len % 4)
{
case 2:
/* a1 = ((unsigned int *) src)[0];
a2 = ((unsigned int *) src)[1]; */
ldw(s_space, 0, src, a1, cda_ldw_exc);
ldw(s_space, 4, src, a2, cda_ldw_exc);
src -= 1 * sizeof(unsigned int);
dst -= 3 * sizeof(unsigned int);
len += 2;
goto do1;
case 3:
/* a0 = ((unsigned int *) src)[0];
a1 = ((unsigned int *) src)[1]; */
ldw(s_space, 0, src, a0, cda_ldw_exc);
ldw(s_space, 4, src, a1, cda_ldw_exc);
src -= 0 * sizeof(unsigned int);
dst -= 2 * sizeof(unsigned int);
len += 1;
goto do2;
case 0:
if (len == 0)
return 0;
/* a3 = ((unsigned int *) src)[0];
a0 = ((unsigned int *) src)[1]; */
ldw(s_space, 0, src, a3, cda_ldw_exc);
ldw(s_space, 4, src, a0, cda_ldw_exc);
src -=-1 * sizeof(unsigned int);
dst -= 1 * sizeof(unsigned int);
len += 0;
goto do3;
case 1:
/* a2 = ((unsigned int *) src)[0];
a3 = ((unsigned int *) src)[1]; */
ldw(s_space, 0, src, a2, cda_ldw_exc);
ldw(s_space, 4, src, a3, cda_ldw_exc);
src -=-2 * sizeof(unsigned int);
dst -= 0 * sizeof(unsigned int);
len -= 1;
if (len == 0)
goto do0;
goto do4; /* No-op. */
}
do
{
/* prefetch_src((const void *)(src + 4 * sizeof(unsigned int))); */
do4:
/* a0 = ((unsigned int *) src)[0]; */
ldw(s_space, 0, src, a0, cda_ldw_exc);
/* ((unsigned int *) dst)[0] = MERGE (a2, sh_1, a3, sh_2); */
stw(d_space, MERGE (a2, sh_1, a3, sh_2), 0, dst, cda_stw_exc);
do3:
/* a1 = ((unsigned int *) src)[1]; */
ldw(s_space, 4, src, a1, cda_ldw_exc);
/* ((unsigned int *) dst)[1] = MERGE (a3, sh_1, a0, sh_2); */
stw(d_space, MERGE (a3, sh_1, a0, sh_2), 4, dst, cda_stw_exc);
do2:
/* a2 = ((unsigned int *) src)[2]; */
ldw(s_space, 8, src, a2, cda_ldw_exc);
/* ((unsigned int *) dst)[2] = MERGE (a0, sh_1, a1, sh_2); */
stw(d_space, MERGE (a0, sh_1, a1, sh_2), 8, dst, cda_stw_exc);
do1:
/* a3 = ((unsigned int *) src)[3]; */
ldw(s_space, 12, src, a3, cda_ldw_exc);
/* ((unsigned int *) dst)[3] = MERGE (a1, sh_1, a2, sh_2); */
stw(d_space, MERGE (a1, sh_1, a2, sh_2), 12, dst, cda_stw_exc);
src += 4 * sizeof(unsigned int);
dst += 4 * sizeof(unsigned int);
len -= 4;
}
while (len != 0);
do0:
/* ((unsigned int *) dst)[0] = MERGE (a2, sh_1, a3, sh_2); */
stw(d_space, MERGE (a2, sh_1, a3, sh_2), 0, dst, cda_stw_exc);
preserve_branch(handle_load_error);
preserve_branch(handle_store_error);
return 0;
handle_load_error:
__asm__ __volatile__ ("cda_ldw_exc:\n");
d = &__get_cpu_var(exception_data);
DPRINTF("cda_ldw_exc: o_len=%lu fault_addr=%lu o_src=%lu ret=%lu\n",
o_len, d->fault_addr, o_src, o_len - d->fault_addr + o_src);
return o_len * 4 - d->fault_addr + o_src;
handle_store_error:
__asm__ __volatile__ ("cda_stw_exc:\n");
d = &__get_cpu_var(exception_data);
DPRINTF("cda_stw_exc: o_len=%lu fault_addr=%lu o_dst=%lu ret=%lu\n",
o_len, d->fault_addr, o_dst, o_len - d->fault_addr + o_dst);
return o_len * 4 - d->fault_addr + o_dst;
}
/* Returns 0 for success, otherwise, returns number of bytes not transferred. */
unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len)
{
register unsigned long src, dst, t1, t2, t3;
register unsigned char *pcs, *pcd;
register unsigned int *pws, *pwd;
register double *pds, *pdd;
unsigned long ret = 0;
unsigned long o_dst, o_src, o_len;
struct exception_data *d;
src = (unsigned long)srcp;
dst = (unsigned long)dstp;
pcs = (unsigned char *)srcp;
pcd = (unsigned char *)dstp;
o_dst = dst; o_src = src; o_len = len;
/* prefetch_src((const void *)srcp); */
if (len < THRESHOLD)
goto byte_copy;
/* Check alignment */
t1 = (src ^ dst);
if (unlikely(t1 & (sizeof(double)-1)))
goto unaligned_copy;
/* src and dst have same alignment. */
/* Copy bytes till we are double-aligned. */
t2 = src & (sizeof(double) - 1);
if (unlikely(t2 != 0)) {
t2 = sizeof(double) - t2;
while (t2 && len) {
/* *pcd++ = *pcs++; */
ldbma(s_space, pcs, t3, pmc_load_exc);
len--;
stbma(d_space, t3, pcd, pmc_store_exc);
t2--;
}
}
pds = (double *)pcs;
pdd = (double *)pcd;
/* Copy 8 doubles at a time */
while (len >= 8*sizeof(double)) {
register double r1, r2, r3, r4, r5, r6, r7, r8;
/* prefetch_src((char *)pds + L1_CACHE_BYTES); */
flddma(s_space, pds, r1, pmc_load_exc);
flddma(s_space, pds, r2, pmc_load_exc);
flddma(s_space, pds, r3, pmc_load_exc);
flddma(s_space, pds, r4, pmc_load_exc);
fstdma(d_space, r1, pdd, pmc_store_exc);
fstdma(d_space, r2, pdd, pmc_store_exc);
fstdma(d_space, r3, pdd, pmc_store_exc);
fstdma(d_space, r4, pdd, pmc_store_exc);
#if 0
if (L1_CACHE_BYTES <= 32)
prefetch_src((char *)pds + L1_CACHE_BYTES);
#endif
flddma(s_space, pds, r5, pmc_load_exc);
flddma(s_space, pds, r6, pmc_load_exc);
flddma(s_space, pds, r7, pmc_load_exc);
flddma(s_space, pds, r8, pmc_load_exc);
fstdma(d_space, r5, pdd, pmc_store_exc);
fstdma(d_space, r6, pdd, pmc_store_exc);
fstdma(d_space, r7, pdd, pmc_store_exc);
fstdma(d_space, r8, pdd, pmc_store_exc);
len -= 8*sizeof(double);
}
pws = (unsigned int *)pds;
pwd = (unsigned int *)pdd;
word_copy:
while (len >= 8*sizeof(unsigned int)) {
register unsigned int r1,r2,r3,r4,r5,r6,r7,r8;
/* prefetch_src((char *)pws + L1_CACHE_BYTES); */
ldwma(s_space, pws, r1, pmc_load_exc);
ldwma(s_space, pws, r2, pmc_load_exc);
ldwma(s_space, pws, r3, pmc_load_exc);
ldwma(s_space, pws, r4, pmc_load_exc);
stwma(d_space, r1, pwd, pmc_store_exc);
stwma(d_space, r2, pwd, pmc_store_exc);
stwma(d_space, r3, pwd, pmc_store_exc);
stwma(d_space, r4, pwd, pmc_store_exc);
ldwma(s_space, pws, r5, pmc_load_exc);
ldwma(s_space, pws, r6, pmc_load_exc);
ldwma(s_space, pws, r7, pmc_load_exc);
ldwma(s_space, pws, r8, pmc_load_exc);
stwma(d_space, r5, pwd, pmc_store_exc);
stwma(d_space, r6, pwd, pmc_store_exc);
stwma(d_space, r7, pwd, pmc_store_exc);
stwma(d_space, r8, pwd, pmc_store_exc);
len -= 8*sizeof(unsigned int);
}
while (len >= 4*sizeof(unsigned int)) {
register unsigned int r1,r2,r3,r4;
ldwma(s_space, pws, r1, pmc_load_exc);
ldwma(s_space, pws, r2, pmc_load_exc);
ldwma(s_space, pws, r3, pmc_load_exc);
ldwma(s_space, pws, r4, pmc_load_exc);
stwma(d_space, r1, pwd, pmc_store_exc);
stwma(d_space, r2, pwd, pmc_store_exc);
stwma(d_space, r3, pwd, pmc_store_exc);
stwma(d_space, r4, pwd, pmc_store_exc);
len -= 4*sizeof(unsigned int);
}
pcs = (unsigned char *)pws;
pcd = (unsigned char *)pwd;
byte_copy:
while (len) {
/* *pcd++ = *pcs++; */
ldbma(s_space, pcs, t3, pmc_load_exc);
stbma(d_space, t3, pcd, pmc_store_exc);
len--;
}
return 0;
unaligned_copy:
/* possibly we are aligned on a word, but not on a double... */
if (likely(t1 & (sizeof(unsigned int)-1)) == 0) {
t2 = src & (sizeof(unsigned int) - 1);
if (unlikely(t2 != 0)) {
t2 = sizeof(unsigned int) - t2;
while (t2) {
/* *pcd++ = *pcs++; */
ldbma(s_space, pcs, t3, pmc_load_exc);
stbma(d_space, t3, pcd, pmc_store_exc);
len--;
t2--;
}
}
pws = (unsigned int *)pcs;
pwd = (unsigned int *)pcd;
goto word_copy;
}
/* Align the destination. */
if (unlikely((dst & (sizeof(unsigned int) - 1)) != 0)) {
t2 = sizeof(unsigned int) - (dst & (sizeof(unsigned int) - 1));
while (t2) {
/* *pcd++ = *pcs++; */
ldbma(s_space, pcs, t3, pmc_load_exc);
stbma(d_space, t3, pcd, pmc_store_exc);
len--;
t2--;
}
dst = (unsigned long)pcd;
src = (unsigned long)pcs;
}
ret = copy_dstaligned(dst, src, len / sizeof(unsigned int),
o_dst, o_src, o_len);
if (ret)
return ret;
pcs += (len & -sizeof(unsigned int));
pcd += (len & -sizeof(unsigned int));
len %= sizeof(unsigned int);
preserve_branch(handle_load_error);
preserve_branch(handle_store_error);
goto byte_copy;
handle_load_error:
__asm__ __volatile__ ("pmc_load_exc:\n");
d = &__get_cpu_var(exception_data);
DPRINTF("pmc_load_exc: o_len=%lu fault_addr=%lu o_src=%lu ret=%lu\n",
o_len, d->fault_addr, o_src, o_len - d->fault_addr + o_src);
return o_len - d->fault_addr + o_src;
handle_store_error:
__asm__ __volatile__ ("pmc_store_exc:\n");
d = &__get_cpu_var(exception_data);
DPRINTF("pmc_store_exc: o_len=%lu fault_addr=%lu o_dst=%lu ret=%lu\n",
o_len, d->fault_addr, o_dst, o_len - d->fault_addr + o_dst);
return o_len - d->fault_addr + o_dst;
}
#ifdef __KERNEL__
unsigned long copy_to_user(void __user *dst, const void *src, unsigned long len)
{
mtsp(get_kernel_space(), 1);
mtsp(get_user_space(), 2);
return pa_memcpy((void __force *)dst, src, len);
}
unsigned long copy_from_user(void *dst, const void __user *src, unsigned long len)
{
mtsp(get_user_space(), 1);
mtsp(get_kernel_space(), 2);
return pa_memcpy(dst, (void __force *)src, len);
}
unsigned long copy_in_user(void __user *dst, const void __user *src, unsigned long len)
{
mtsp(get_user_space(), 1);
mtsp(get_user_space(), 2);
return pa_memcpy((void __force *)dst, (void __force *)src, len);
}
void * memcpy(void * dst,const void *src, size_t count)
{
mtsp(get_kernel_space(), 1);
mtsp(get_kernel_space(), 2);
pa_memcpy(dst, src, count);
return dst;
}
EXPORT_SYMBOL(copy_to_user);
EXPORT_SYMBOL(copy_from_user);
EXPORT_SYMBOL(copy_in_user);
EXPORT_SYMBOL(memcpy);
#endif

91
arch/parisc/lib/memset.c Normale Datei
Datei anzeigen

@@ -0,0 +1,91 @@
/* Copyright (C) 1991, 1997 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
/* Slight modifications for pa-risc linux - Paul Bame <bame@debian.org> */
#include <linux/types.h>
#include <asm/string.h>
#define OPSIZ (BITS_PER_LONG/8)
typedef unsigned long op_t;
void *
memset (void *dstpp, int sc, size_t len)
{
unsigned int c = sc;
long int dstp = (long int) dstpp;
if (len >= 8)
{
size_t xlen;
op_t cccc;
cccc = (unsigned char) c;
cccc |= cccc << 8;
cccc |= cccc << 16;
if (OPSIZ > 4)
/* Do the shift in two steps to avoid warning if long has 32 bits. */
cccc |= (cccc << 16) << 16;
/* There are at least some bytes to set.
No need to test for LEN == 0 in this alignment loop. */
while (dstp % OPSIZ != 0)
{
((unsigned char *) dstp)[0] = c;
dstp += 1;
len -= 1;
}
/* Write 8 `op_t' per iteration until less than 8 `op_t' remain. */
xlen = len / (OPSIZ * 8);
while (xlen > 0)
{
((op_t *) dstp)[0] = cccc;
((op_t *) dstp)[1] = cccc;
((op_t *) dstp)[2] = cccc;
((op_t *) dstp)[3] = cccc;
((op_t *) dstp)[4] = cccc;
((op_t *) dstp)[5] = cccc;
((op_t *) dstp)[6] = cccc;
((op_t *) dstp)[7] = cccc;
dstp += 8 * OPSIZ;
xlen -= 1;
}
len %= OPSIZ * 8;
/* Write 1 `op_t' per iteration until less than OPSIZ bytes remain. */
xlen = len / OPSIZ;
while (xlen > 0)
{
((op_t *) dstp)[0] = cccc;
dstp += OPSIZ;
xlen -= 1;
}
len %= OPSIZ;
}
/* Write the last few bytes. */
while (len > 0)
{
((unsigned char *) dstp)[0] = c;
dstp += 1;
len -= 1;
}
return dstpp;
}