sh: Move arch/sh64/lib to arch/sh/lib64.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
This commit is contained in:
Paul Mundt
2007-11-08 18:51:33 +09:00
parent 2c6deb5ea4
commit dd730b8ff8
12 changed files with 0 additions and 0 deletions

1
arch/sh/lib64/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
syscalltab.h

19
arch/sh/lib64/Makefile Normal file
View File

@@ -0,0 +1,19 @@
#
# This file is subject to the terms and conditions of the GNU General Public
# License. See the file "COPYING" in the main directory of this archive
# for more details.
#
# Copyright (C) 2000, 2001 Paolo Alberelli
# Coprygith (C) 2003 Paul Mundt
#
# Makefile for the SH-5 specific library files..
#
# Note! Dependencies are done automagically by 'make dep', which also
# removes any old dependencies. DON'T put your own dependencies here
# unless it's something special (ie not a .c file).
#
# Panic should really be compiled as PIC
lib-y := udelay.o c-checksum.o dbg.o io.o panic.o memcpy.o copy_user_memcpy.o \
page_copy.o page_clear.o iomap.o

217
arch/sh/lib64/c-checksum.c Normal file
View File

@@ -0,0 +1,217 @@
/*
* arch/sh64/lib/c-checksum.c
*
* This file contains network checksum routines that are better done
* in an architecture-specific manner due to speed..
*/
#undef DEBUG
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <asm/byteorder.h>
#include <asm/uaccess.h>
static inline unsigned short from64to16(unsigned long long x)
{
/* add up 32-bit words for 33 bits */
x = (x & 0xffffffff) + (x >> 32);
/* add up 16-bit and 17-bit words for 17+c bits */
x = (x & 0xffff) + (x >> 16);
/* add up 16-bit and 2-bit for 16+c bit */
x = (x & 0xffff) + (x >> 16);
/* add up carry.. */
x = (x & 0xffff) + (x >> 16);
return x;
}
static inline unsigned short foldto16(unsigned long x)
{
/* add up 16-bit for 17 bits */
x = (x & 0xffff) + (x >> 16);
/* add up carry.. */
x = (x & 0xffff) + (x >> 16);
return x;
}
static inline unsigned short myfoldto16(unsigned long long x)
{
/* Fold down to 32-bits so we don't loose in the typedef-less
network stack. */
/* 64 to 33 */
x = (x & 0xffffffff) + (x >> 32);
/* 33 to 32 */
x = (x & 0xffffffff) + (x >> 32);
/* add up 16-bit for 17 bits */
x = (x & 0xffff) + (x >> 16);
/* add up carry.. */
x = (x & 0xffff) + (x >> 16);
return x;
}
#define odd(x) ((x)&1)
#define U16(x) ntohs(x)
static unsigned long do_csum(const unsigned char *buff, int len)
{
int odd, count;
unsigned long result = 0;
pr_debug("do_csum buff %p, len %d (0x%x)\n", buff, len, len);
#ifdef DEBUG
for (i = 0; i < len; i++) {
if ((i % 26) == 0)
printk("\n");
printk("%02X ", buff[i]);
}
#endif
if (len <= 0)
goto out;
odd = 1 & (unsigned long) buff;
if (odd) {
result = *buff << 8;
len--;
buff++;
}
count = len >> 1; /* nr of 16-bit words.. */
if (count) {
if (2 & (unsigned long) buff) {
result += *(unsigned short *) buff;
count--;
len -= 2;
buff += 2;
}
count >>= 1; /* nr of 32-bit words.. */
if (count) {
unsigned long carry = 0;
do {
unsigned long w = *(unsigned long *) buff;
buff += 4;
count--;
result += carry;
result += w;
carry = (w > result);
} while (count);
result += carry;
result = (result & 0xffff) + (result >> 16);
}
if (len & 2) {
result += *(unsigned short *) buff;
buff += 2;
}
}
if (len & 1)
result += *buff;
result = foldto16(result);
if (odd)
result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
pr_debug("\nCHECKSUM is 0x%lx\n", result);
out:
return result;
}
/* computes the checksum of a memory block at buff, length len,
and adds in "sum" (32-bit) */
__wsum csum_partial(const void *buff, int len, __wsum sum)
{
unsigned long long result = do_csum(buff, len);
/* add in old sum, and carry.. */
result += (__force u32)sum;
/* 32+c bits -> 32 bits */
result = (result & 0xffffffff) + (result >> 32);
pr_debug("csum_partial, buff %p len %d sum 0x%x result=0x%016Lx\n",
buff, len, sum, result);
return (__force __wsum)result;
}
/* Copy while checksumming, otherwise like csum_partial. */
__wsum
csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum)
{
sum = csum_partial(src, len, sum);
memcpy(dst, src, len);
return sum;
}
/* Copy from userspace and compute checksum. If we catch an exception
then zero the rest of the buffer. */
__wsum
csum_partial_copy_from_user(const void __user *src, void *dst, int len,
__wsum sum, int *err_ptr)
{
int missing;
pr_debug
("csum_partial_copy_from_user src %p, dest %p, len %d, sum %08x, err_ptr %p\n",
src, dst, len, sum, err_ptr);
missing = copy_from_user(dst, src, len);
pr_debug(" access_ok %d\n", __access_ok((unsigned long) src, len));
pr_debug(" missing %d\n", missing);
if (missing) {
memset(dst + len - missing, 0, missing);
*err_ptr = -EFAULT;
}
return csum_partial(dst, len, sum);
}
/* Copy to userspace and compute checksum. */
__wsum
csum_partial_copy_to_user(const unsigned char *src, unsigned char *dst, int len,
__wsum sum, int *err_ptr)
{
sum = csum_partial(src, len, sum);
if (copy_to_user(dst, src, len))
*err_ptr = -EFAULT;
return sum;
}
/*
* This is a version of ip_compute_csum() optimized for IP headers,
* which always checksum on 4 octet boundaries.
*/
__sum16 ip_fast_csum(const void *iph, unsigned int ihl)
{
pr_debug("ip_fast_csum %p,%d\n", iph, ihl);
return (__force __sum16)~do_csum(iph, ihl * 4);
}
__wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
unsigned short len,
unsigned short proto, __wsum sum)
{
unsigned long long result;
pr_debug("ntohs(0x%x)=0x%x\n", 0xdead, ntohs(0xdead));
pr_debug("htons(0x%x)=0x%x\n", 0xdead, htons(0xdead));
result = (__force u64) saddr + (__force u64) daddr +
(__force u64) sum + ((len + proto) << 8);
/* Fold down to 32-bits so we don't loose in the typedef-less
network stack. */
/* 64 to 33 */
result = (result & 0xffffffff) + (result >> 32);
/* 33 to 32 */
result = (result & 0xffffffff) + (result >> 32);
pr_debug("%s saddr %x daddr %x len %x proto %x sum %x result %08Lx\n",
__FUNCTION__, saddr, daddr, len, proto, sum, result);
return (__wsum)result;
}
EXPORT_SYMBOL(csum_tcpudp_nofold);

View File

@@ -0,0 +1,217 @@
!
! Fast SH memcpy
!
! by Toshiyasu Morita (tm@netcom.com)
! hacked by J"orn Rernnecke (joern.rennecke@superh.com) ("o for o-umlaut)
! SH5 code Copyright 2002 SuperH Ltd.
!
! Entry: ARG0: destination pointer
! ARG1: source pointer
! ARG2: byte count
!
! Exit: RESULT: destination pointer
! any other registers in the range r0-r7: trashed
!
! Notes: Usually one wants to do small reads and write a longword, but
! unfortunately it is difficult in some cases to concatanate bytes
! into a longword on the SH, so this does a longword read and small
! writes.
!
! This implementation makes two assumptions about how it is called:
!
! 1.: If the byte count is nonzero, the address of the last byte to be
! copied is unsigned greater than the address of the first byte to
! be copied. This could be easily swapped for a signed comparison,
! but the algorithm used needs some comparison.
!
! 2.: When there are two or three bytes in the last word of an 11-or-more
! bytes memory chunk to b copied, the rest of the word can be read
! without side effects.
! This could be easily changed by increasing the minumum size of
! a fast memcpy and the amount subtracted from r7 before L_2l_loop be 2,
! however, this would cost a few extra cyles on average.
! For SHmedia, the assumption is that any quadword can be read in its
! enirety if at least one byte is included in the copy.
/* Imported into Linux kernel by Richard Curnow. This is used to implement the
__copy_user function in the general case, so it has to be a distinct
function from intra-kernel memcpy to allow for exception fix-ups in the
event that the user pointer is bad somewhere in the copy (e.g. due to
running off the end of the vma).
Note, this algorithm will be slightly wasteful in the case where the source
and destination pointers are equally aligned, because the stlo/sthi pairs
could then be merged back into single stores. If there are a lot of cache
misses, this is probably offset by the stall lengths on the preloads.
*/
/* NOTE : Prefetches removed and allocos guarded by synco to avoid TAKum03020
* erratum. The first two prefetches are nop-ed out to avoid upsetting the
* instruction counts used in the jump address calculation.
* */
.section .text..SHmedia32,"ax"
.little
.balign 32
.global copy_user_memcpy
.global copy_user_memcpy_end
copy_user_memcpy:
#define LDUAQ(P,O,D0,D1) ldlo.q P,O,D0; ldhi.q P,O+7,D1
#define STUAQ(P,O,D0,D1) stlo.q P,O,D0; sthi.q P,O+7,D1
#define LDUAL(P,O,D0,D1) ldlo.l P,O,D0; ldhi.l P,O+3,D1
#define STUAL(P,O,D0,D1) stlo.l P,O,D0; sthi.l P,O+3,D1
nop ! ld.b r3,0,r63 ! TAKum03020
pta/l Large,tr0
movi 25,r0
bgeu/u r4,r0,tr0
nsb r4,r0
shlli r0,5,r0
movi (L1-L0+63*32 + 1) & 0xffff,r1
sub r1, r0, r0
L0: ptrel r0,tr0
add r2,r4,r5
ptabs r18,tr1
add r3,r4,r6
blink tr0,r63
/* Rearranged to make cut2 safe */
.balign 8
L4_7: /* 4..7 byte memcpy cntd. */
stlo.l r2, 0, r0
or r6, r7, r6
sthi.l r5, -1, r6
stlo.l r5, -4, r6
blink tr1,r63
.balign 8
L1: /* 0 byte memcpy */
nop
blink tr1,r63
nop
nop
nop
nop
L2_3: /* 2 or 3 byte memcpy cntd. */
st.b r5,-1,r6
blink tr1,r63
/* 1 byte memcpy */
ld.b r3,0,r0
st.b r2,0,r0
blink tr1,r63
L8_15: /* 8..15 byte memcpy cntd. */
stlo.q r2, 0, r0
or r6, r7, r6
sthi.q r5, -1, r6
stlo.q r5, -8, r6
blink tr1,r63
/* 2 or 3 byte memcpy */
ld.b r3,0,r0
nop ! ld.b r2,0,r63 ! TAKum03020
ld.b r3,1,r1
st.b r2,0,r0
pta/l L2_3,tr0
ld.b r6,-1,r6
st.b r2,1,r1
blink tr0, r63
/* 4 .. 7 byte memcpy */
LDUAL (r3, 0, r0, r1)
pta L4_7, tr0
ldlo.l r6, -4, r7
or r0, r1, r0
sthi.l r2, 3, r0
ldhi.l r6, -1, r6
blink tr0, r63
/* 8 .. 15 byte memcpy */
LDUAQ (r3, 0, r0, r1)
pta L8_15, tr0
ldlo.q r6, -8, r7
or r0, r1, r0
sthi.q r2, 7, r0
ldhi.q r6, -1, r6
blink tr0, r63
/* 16 .. 24 byte memcpy */
LDUAQ (r3, 0, r0, r1)
LDUAQ (r3, 8, r8, r9)
or r0, r1, r0
sthi.q r2, 7, r0
or r8, r9, r8
sthi.q r2, 15, r8
ldlo.q r6, -8, r7
ldhi.q r6, -1, r6
stlo.q r2, 8, r8
stlo.q r2, 0, r0
or r6, r7, r6
sthi.q r5, -1, r6
stlo.q r5, -8, r6
blink tr1,r63
Large:
! ld.b r2, 0, r63 ! TAKum03020
pta/l Loop_ua, tr1
ori r3, -8, r7
sub r2, r7, r22
sub r3, r2, r6
add r2, r4, r5
ldlo.q r3, 0, r0
addi r5, -16, r5
movi 64+8, r27 ! could subtract r7 from that.
stlo.q r2, 0, r0
sthi.q r2, 7, r0
ldx.q r22, r6, r0
bgtu/l r27, r4, tr1
addi r5, -48, r27
pta/l Loop_line, tr0
addi r6, 64, r36
addi r6, -24, r19
addi r6, -16, r20
addi r6, -8, r21
Loop_line:
! ldx.q r22, r36, r63 ! TAKum03020
alloco r22, 32
synco
addi r22, 32, r22
ldx.q r22, r19, r23
sthi.q r22, -25, r0
ldx.q r22, r20, r24
ldx.q r22, r21, r25
stlo.q r22, -32, r0
ldx.q r22, r6, r0
sthi.q r22, -17, r23
sthi.q r22, -9, r24
sthi.q r22, -1, r25
stlo.q r22, -24, r23
stlo.q r22, -16, r24
stlo.q r22, -8, r25
bgeu r27, r22, tr0
Loop_ua:
addi r22, 8, r22
sthi.q r22, -1, r0
stlo.q r22, -8, r0
ldx.q r22, r6, r0
bgtu/l r5, r22, tr1
add r3, r4, r7
ldlo.q r7, -8, r1
sthi.q r22, 7, r0
ldhi.q r7, -1, r7
ptabs r18,tr1
stlo.q r22, 0, r0
or r1, r7, r1
sthi.q r5, 15, r1
stlo.q r5, 8, r1
blink tr1, r63
copy_user_memcpy_end:
nop

430
arch/sh/lib64/dbg.c Normal file
View File

@@ -0,0 +1,430 @@
/*--------------------------------------------------------------------------
--
-- Identity : Linux50 Debug Funcions
--
-- File : arch/sh64/lib/dbg.C
--
-- Copyright 2000, 2001 STMicroelectronics Limited.
-- Copyright 2004 Richard Curnow (evt_debug etc)
--
--------------------------------------------------------------------------*/
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/fs.h>
#include <asm/mmu_context.h>
typedef u64 regType_t;
static regType_t getConfigReg(u64 id)
{
register u64 reg __asm__("r2");
asm volatile ("getcfg %1, 0, %0":"=r" (reg):"r"(id));
return (reg);
}
/* ======================================================================= */
static char *szTab[] = { "4k", "64k", "1M", "512M" };
static char *protTab[] = { "----",
"---R",
"--X-",
"--XR",
"-W--",
"-W-R",
"-WX-",
"-WXR",
"U---",
"U--R",
"U-X-",
"U-XR",
"UW--",
"UW-R",
"UWX-",
"UWXR"
};
#define ITLB_BASE 0x00000000
#define DTLB_BASE 0x00800000
#define MAX_TLBs 64
/* PTE High */
#define GET_VALID(pte) ((pte) & 0x1)
#define GET_SHARED(pte) ((pte) & 0x2)
#define GET_ASID(pte) ((pte >> 2) & 0x0ff)
#define GET_EPN(pte) ((pte) & 0xfffff000)
/* PTE Low */
#define GET_CBEHAVIOR(pte) ((pte) & 0x3)
#define GET_PAGE_SIZE(pte) szTab[((pte >> 3) & 0x3)]
#define GET_PROTECTION(pte) protTab[((pte >> 6) & 0xf)]
#define GET_PPN(pte) ((pte) & 0xfffff000)
#define PAGE_1K_MASK 0x00000000
#define PAGE_4K_MASK 0x00000010
#define PAGE_64K_MASK 0x00000080
#define MMU_PAGESIZE_MASK (PAGE_64K_MASK | PAGE_4K_MASK)
#define PAGE_1MB_MASK MMU_PAGESIZE_MASK
#define PAGE_1K (1024)
#define PAGE_4K (1024 * 4)
#define PAGE_64K (1024 * 64)
#define PAGE_1MB (1024 * 1024)
#define HOW_TO_READ_TLB_CONTENT \
"[ ID] PPN EPN ASID Share CB P.Size PROT.\n"
void print_single_tlb(unsigned long tlb, int single_print)
{
regType_t pteH;
regType_t pteL;
unsigned int valid, shared, asid, epn, cb, ppn;
char *pSize;
char *pProt;
/*
** in case of single print <single_print> is true, this implies:
** 1) print the TLB in any case also if NOT VALID
** 2) print out the header
*/
pteH = getConfigReg(tlb);
valid = GET_VALID(pteH);
if (single_print)
printk(HOW_TO_READ_TLB_CONTENT);
else if (!valid)
return;
pteL = getConfigReg(tlb + 1);
shared = GET_SHARED(pteH);
asid = GET_ASID(pteH);
epn = GET_EPN(pteH);
cb = GET_CBEHAVIOR(pteL);
pSize = GET_PAGE_SIZE(pteL);
pProt = GET_PROTECTION(pteL);
ppn = GET_PPN(pteL);
printk("[%c%2ld] 0x%08x 0x%08x %03d %02x %02x %4s %s\n",
((valid) ? ' ' : 'u'), ((tlb & 0x0ffff) / TLB_STEP),
ppn, epn, asid, shared, cb, pSize, pProt);
}
void print_dtlb(void)
{
int count;
unsigned long tlb;
printk(" ================= SH-5 D-TLBs Status ===================\n");
printk(HOW_TO_READ_TLB_CONTENT);
tlb = DTLB_BASE;
for (count = 0; count < MAX_TLBs; count++, tlb += TLB_STEP)
print_single_tlb(tlb, 0);
printk
(" =============================================================\n");
}
void print_itlb(void)
{
int count;
unsigned long tlb;
printk(" ================= SH-5 I-TLBs Status ===================\n");
printk(HOW_TO_READ_TLB_CONTENT);
tlb = ITLB_BASE;
for (count = 0; count < MAX_TLBs; count++, tlb += TLB_STEP)
print_single_tlb(tlb, 0);
printk
(" =============================================================\n");
}
/* ======================================================================= */
#ifdef CONFIG_POOR_MANS_STRACE
#include "syscalltab.h"
struct ring_node {
int evt;
int ret_addr;
int event;
int tra;
int pid;
unsigned long sp;
unsigned long pc;
};
static struct ring_node event_ring[16];
static int event_ptr = 0;
struct stored_syscall_data {
int pid;
int syscall_number;
};
#define N_STORED_SYSCALLS 16
static struct stored_syscall_data stored_syscalls[N_STORED_SYSCALLS];
static int syscall_next=0;
static int syscall_next_print=0;
void evt_debug(int evt, int ret_addr, int event, int tra, struct pt_regs *regs)
{
int syscallno = tra & 0xff;
unsigned long sp;
unsigned long stack_bottom;
int pid;
struct ring_node *rr;
pid = current->pid;
stack_bottom = (unsigned long) task_stack_page(current);
asm volatile("ori r15, 0, %0" : "=r" (sp));
rr = event_ring + event_ptr;
rr->evt = evt;
rr->ret_addr = ret_addr;
rr->event = event;
rr->tra = tra;
rr->pid = pid;
rr->sp = sp;
rr->pc = regs->pc;
if (sp < stack_bottom + 3092) {
printk("evt_debug : stack underflow report\n");
int i, j;
for (j=0, i = event_ptr; j<16; j++) {
rr = event_ring + i;
printk("evt=%08x event=%08x tra=%08x pid=%5d sp=%08lx pc=%08lx\n",
rr->evt, rr->event, rr->tra, rr->pid, rr->sp, rr->pc);
i--;
i &= 15;
}
panic("STACK UNDERFLOW\n");
}
event_ptr = (event_ptr + 1) & 15;
if ((event == 2) && (evt == 0x160)) {
if (syscallno < NUM_SYSCALL_INFO_ENTRIES) {
/* Store the syscall information to print later. We
* can't print this now - currently we're running with
* SR.BL=1, so we can't take a tlbmiss (which could occur
* in the console drivers under printk).
*
* Just overwrite old entries on ring overflow - this
* is only for last-hope debugging. */
stored_syscalls[syscall_next].pid = current->pid;
stored_syscalls[syscall_next].syscall_number = syscallno;
syscall_next++;
syscall_next &= (N_STORED_SYSCALLS - 1);
}
}
}
static void drain_syscalls(void) {
while (syscall_next_print != syscall_next) {
printk("Task %d: %s()\n",
stored_syscalls[syscall_next_print].pid,
syscall_info_table[stored_syscalls[syscall_next_print].syscall_number].name);
syscall_next_print++;
syscall_next_print &= (N_STORED_SYSCALLS - 1);
}
}
void evt_debug2(unsigned int ret)
{
drain_syscalls();
printk("Task %d: syscall returns %08x\n", current->pid, ret);
}
void evt_debug_ret_from_irq(struct pt_regs *regs)
{
int pid;
struct ring_node *rr;
pid = current->pid;
rr = event_ring + event_ptr;
rr->evt = 0xffff;
rr->ret_addr = 0;
rr->event = 0;
rr->tra = 0;
rr->pid = pid;
rr->pc = regs->pc;
event_ptr = (event_ptr + 1) & 15;
}
void evt_debug_ret_from_exc(struct pt_regs *regs)
{
int pid;
struct ring_node *rr;
pid = current->pid;
rr = event_ring + event_ptr;
rr->evt = 0xfffe;
rr->ret_addr = 0;
rr->event = 0;
rr->tra = 0;
rr->pid = pid;
rr->pc = regs->pc;
event_ptr = (event_ptr + 1) & 15;
}
#endif /* CONFIG_POOR_MANS_STRACE */
/* ======================================================================= */
void show_excp_regs(char *from, int trapnr, int signr, struct pt_regs *regs)
{
unsigned long long ah, al, bh, bl, ch, cl;
printk("\n");
printk("EXCEPTION - %s: task %d; Linux trap # %d; signal = %d\n",
((from) ? from : "???"), current->pid, trapnr, signr);
asm volatile ("getcon " __EXPEVT ", %0":"=r"(ah));
asm volatile ("getcon " __EXPEVT ", %0":"=r"(al));
ah = (ah) >> 32;
al = (al) & 0xffffffff;
asm volatile ("getcon " __KCR1 ", %0":"=r"(bh));
asm volatile ("getcon " __KCR1 ", %0":"=r"(bl));
bh = (bh) >> 32;
bl = (bl) & 0xffffffff;
asm volatile ("getcon " __INTEVT ", %0":"=r"(ch));
asm volatile ("getcon " __INTEVT ", %0":"=r"(cl));
ch = (ch) >> 32;
cl = (cl) & 0xffffffff;
printk("EXPE: %08Lx%08Lx KCR1: %08Lx%08Lx INTE: %08Lx%08Lx\n",
ah, al, bh, bl, ch, cl);
asm volatile ("getcon " __PEXPEVT ", %0":"=r"(ah));
asm volatile ("getcon " __PEXPEVT ", %0":"=r"(al));
ah = (ah) >> 32;
al = (al) & 0xffffffff;
asm volatile ("getcon " __PSPC ", %0":"=r"(bh));
asm volatile ("getcon " __PSPC ", %0":"=r"(bl));
bh = (bh) >> 32;
bl = (bl) & 0xffffffff;
asm volatile ("getcon " __PSSR ", %0":"=r"(ch));
asm volatile ("getcon " __PSSR ", %0":"=r"(cl));
ch = (ch) >> 32;
cl = (cl) & 0xffffffff;
printk("PEXP: %08Lx%08Lx PSPC: %08Lx%08Lx PSSR: %08Lx%08Lx\n",
ah, al, bh, bl, ch, cl);
ah = (regs->pc) >> 32;
al = (regs->pc) & 0xffffffff;
bh = (regs->regs[18]) >> 32;
bl = (regs->regs[18]) & 0xffffffff;
ch = (regs->regs[15]) >> 32;
cl = (regs->regs[15]) & 0xffffffff;
printk("PC : %08Lx%08Lx LINK: %08Lx%08Lx SP : %08Lx%08Lx\n",
ah, al, bh, bl, ch, cl);
ah = (regs->sr) >> 32;
al = (regs->sr) & 0xffffffff;
asm volatile ("getcon " __TEA ", %0":"=r"(bh));
asm volatile ("getcon " __TEA ", %0":"=r"(bl));
bh = (bh) >> 32;
bl = (bl) & 0xffffffff;
asm volatile ("getcon " __KCR0 ", %0":"=r"(ch));
asm volatile ("getcon " __KCR0 ", %0":"=r"(cl));
ch = (ch) >> 32;
cl = (cl) & 0xffffffff;
printk("SR : %08Lx%08Lx TEA : %08Lx%08Lx KCR0: %08Lx%08Lx\n",
ah, al, bh, bl, ch, cl);
ah = (regs->regs[0]) >> 32;
al = (regs->regs[0]) & 0xffffffff;
bh = (regs->regs[1]) >> 32;
bl = (regs->regs[1]) & 0xffffffff;
ch = (regs->regs[2]) >> 32;
cl = (regs->regs[2]) & 0xffffffff;
printk("R0 : %08Lx%08Lx R1 : %08Lx%08Lx R2 : %08Lx%08Lx\n",
ah, al, bh, bl, ch, cl);
ah = (regs->regs[3]) >> 32;
al = (regs->regs[3]) & 0xffffffff;
bh = (regs->regs[4]) >> 32;
bl = (regs->regs[4]) & 0xffffffff;
ch = (regs->regs[5]) >> 32;
cl = (regs->regs[5]) & 0xffffffff;
printk("R3 : %08Lx%08Lx R4 : %08Lx%08Lx R5 : %08Lx%08Lx\n",
ah, al, bh, bl, ch, cl);
ah = (regs->regs[6]) >> 32;
al = (regs->regs[6]) & 0xffffffff;
bh = (regs->regs[7]) >> 32;
bl = (regs->regs[7]) & 0xffffffff;
ch = (regs->regs[8]) >> 32;
cl = (regs->regs[8]) & 0xffffffff;
printk("R6 : %08Lx%08Lx R7 : %08Lx%08Lx R8 : %08Lx%08Lx\n",
ah, al, bh, bl, ch, cl);
ah = (regs->regs[9]) >> 32;
al = (regs->regs[9]) & 0xffffffff;
bh = (regs->regs[10]) >> 32;
bl = (regs->regs[10]) & 0xffffffff;
ch = (regs->regs[11]) >> 32;
cl = (regs->regs[11]) & 0xffffffff;
printk("R9 : %08Lx%08Lx R10 : %08Lx%08Lx R11 : %08Lx%08Lx\n",
ah, al, bh, bl, ch, cl);
printk("....\n");
ah = (regs->tregs[0]) >> 32;
al = (regs->tregs[0]) & 0xffffffff;
bh = (regs->tregs[1]) >> 32;
bl = (regs->tregs[1]) & 0xffffffff;
ch = (regs->tregs[2]) >> 32;
cl = (regs->tregs[2]) & 0xffffffff;
printk("T0 : %08Lx%08Lx T1 : %08Lx%08Lx T2 : %08Lx%08Lx\n",
ah, al, bh, bl, ch, cl);
printk("....\n");
print_dtlb();
print_itlb();
}
/* ======================================================================= */
/*
** Depending on <base> scan the MMU, Data or Instruction side
** looking for a valid mapping matching Eaddr & asid.
** Return -1 if not found or the TLB id entry otherwise.
** Note: it works only for 4k pages!
*/
static unsigned long
lookup_mmu_side(unsigned long base, unsigned long Eaddr, unsigned long asid)
{
regType_t pteH;
unsigned long epn;
int count;
epn = Eaddr & 0xfffff000;
for (count = 0; count < MAX_TLBs; count++, base += TLB_STEP) {
pteH = getConfigReg(base);
if (GET_VALID(pteH))
if ((unsigned long) GET_EPN(pteH) == epn)
if ((unsigned long) GET_ASID(pteH) == asid)
break;
}
return ((unsigned long) ((count < MAX_TLBs) ? base : -1));
}
unsigned long lookup_dtlb(unsigned long Eaddr)
{
unsigned long asid = get_asid();
return (lookup_mmu_side((u64) DTLB_BASE, Eaddr, asid));
}
unsigned long lookup_itlb(unsigned long Eaddr)
{
unsigned long asid = get_asid();
return (lookup_mmu_side((u64) ITLB_BASE, Eaddr, asid));
}
void print_page(struct page *page)
{
printk(" page[%p] -> index 0x%lx, count 0x%x, flags 0x%lx\n",
page, page->index, page_count(page), page->flags);
printk(" address_space = %p, pages =%ld\n", page->mapping,
page->mapping->nrpages);
}

128
arch/sh/lib64/io.c Normal file
View File

@@ -0,0 +1,128 @@
/*
* Copyright (C) 2000 David J. Mckay (david.mckay@st.com)
*
* May be copied or modified under the terms of the GNU General Public
* License. See linux/COPYING for more information.
*
* This file contains the I/O routines for use on the overdrive board
*
*/
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/delay.h>
#include <linux/module.h>
#include <asm/system.h>
#include <asm/processor.h>
#include <asm/io.h>
/* Now for the string version of these functions */
void outsb(unsigned long port, const void *addr, unsigned long count)
{
int i;
unsigned char *p = (unsigned char *) addr;
for (i = 0; i < count; i++, p++) {
outb(*p, port);
}
}
EXPORT_SYMBOL(outsb);
void insb(unsigned long port, void *addr, unsigned long count)
{
int i;
unsigned char *p = (unsigned char *) addr;
for (i = 0; i < count; i++, p++) {
*p = inb(port);
}
}
EXPORT_SYMBOL(insb);
/* For the 16 and 32 bit string functions, we have to worry about alignment.
* The SH does not do unaligned accesses, so we have to read as bytes and
* then write as a word or dword.
* This can be optimised a lot more, especially in the case where the data
* is aligned
*/
void outsw(unsigned long port, const void *addr, unsigned long count)
{
int i;
unsigned short tmp;
unsigned char *p = (unsigned char *) addr;
for (i = 0; i < count; i++, p += 2) {
tmp = (*p) | ((*(p + 1)) << 8);
outw(tmp, port);
}
}
EXPORT_SYMBOL(outsw);
void insw(unsigned long port, void *addr, unsigned long count)
{
int i;
unsigned short tmp;
unsigned char *p = (unsigned char *) addr;
for (i = 0; i < count; i++, p += 2) {
tmp = inw(port);
p[0] = tmp & 0xff;
p[1] = (tmp >> 8) & 0xff;
}
}
EXPORT_SYMBOL(insw);
void outsl(unsigned long port, const void *addr, unsigned long count)
{
int i;
unsigned tmp;
unsigned char *p = (unsigned char *) addr;
for (i = 0; i < count; i++, p += 4) {
tmp = (*p) | ((*(p + 1)) << 8) | ((*(p + 2)) << 16) |
((*(p + 3)) << 24);
outl(tmp, port);
}
}
EXPORT_SYMBOL(outsl);
void insl(unsigned long port, void *addr, unsigned long count)
{
int i;
unsigned tmp;
unsigned char *p = (unsigned char *) addr;
for (i = 0; i < count; i++, p += 4) {
tmp = inl(port);
p[0] = tmp & 0xff;
p[1] = (tmp >> 8) & 0xff;
p[2] = (tmp >> 16) & 0xff;
p[3] = (tmp >> 24) & 0xff;
}
}
EXPORT_SYMBOL(insl);
void memcpy_toio(void __iomem *to, const void *from, long count)
{
unsigned char *p = (unsigned char *) from;
while (count) {
count--;
writeb(*p++, to++);
}
}
EXPORT_SYMBOL(memcpy_toio);
void memcpy_fromio(void *to, void __iomem *from, long count)
{
int i;
unsigned char *p = (unsigned char *) to;
for (i = 0; i < count; i++) {
p[i] = readb(from);
from++;
}
}
EXPORT_SYMBOL(memcpy_fromio);

54
arch/sh/lib64/iomap.c Normal file
View File

@@ -0,0 +1,54 @@
/*
* arch/sh64/lib/iomap.c
*
* Generic sh64 iomap interface
*
* Copyright (C) 2004 Paul Mundt
*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*/
#include <linux/pci.h>
#include <asm/io.h>
void __iomem *__attribute__ ((weak))
ioport_map(unsigned long port, unsigned int len)
{
return (void __iomem *)port;
}
EXPORT_SYMBOL(ioport_map);
void ioport_unmap(void __iomem *addr)
{
/* Nothing .. */
}
EXPORT_SYMBOL(ioport_unmap);
#ifdef CONFIG_PCI
void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max)
{
unsigned long start = pci_resource_start(dev, bar);
unsigned long len = pci_resource_len(dev, bar);
unsigned long flags = pci_resource_flags(dev, bar);
if (!len)
return NULL;
if (max && len > max)
len = max;
if (flags & IORESOURCE_IO)
return ioport_map(start + pciio_virt, len);
if (flags & IORESOURCE_MEM)
return (void __iomem *)start;
/* What? */
return NULL;
}
EXPORT_SYMBOL(pci_iomap);
void pci_iounmap(struct pci_dev *dev, void __iomem *addr)
{
/* Nothing .. */
}
EXPORT_SYMBOL(pci_iounmap);
#endif

81
arch/sh/lib64/memcpy.c Normal file
View File

@@ -0,0 +1,81 @@
/*
* Copyright (C) 2002 Mark Debbage (Mark.Debbage@superh.com)
*
* May be copied or modified under the terms of the GNU General Public
* License. See linux/COPYING for more information.
*
*/
#include <linux/types.h>
#include <asm/string.h>
// This is a simplistic optimization of memcpy to increase the
// granularity of access beyond one byte using aligned
// loads and stores. This is not an optimal implementation
// for SH-5 (especially with regard to prefetching and the cache),
// and a better version should be provided later ...
void *memcpy(void *dest, const void *src, size_t count)
{
char *d = (char *) dest, *s = (char *) src;
if (count >= 32) {
int i = 8 - (((unsigned long) d) & 0x7);
if (i != 8)
while (i-- && count--) {
*d++ = *s++;
}
if (((((unsigned long) d) & 0x7) == 0) &&
((((unsigned long) s) & 0x7) == 0)) {
while (count >= 32) {
unsigned long long t1, t2, t3, t4;
t1 = *(unsigned long long *) (s);
t2 = *(unsigned long long *) (s + 8);
t3 = *(unsigned long long *) (s + 16);
t4 = *(unsigned long long *) (s + 24);
*(unsigned long long *) (d) = t1;
*(unsigned long long *) (d + 8) = t2;
*(unsigned long long *) (d + 16) = t3;
*(unsigned long long *) (d + 24) = t4;
d += 32;
s += 32;
count -= 32;
}
while (count >= 8) {
*(unsigned long long *) d =
*(unsigned long long *) s;
d += 8;
s += 8;
count -= 8;
}
}
if (((((unsigned long) d) & 0x3) == 0) &&
((((unsigned long) s) & 0x3) == 0)) {
while (count >= 4) {
*(unsigned long *) d = *(unsigned long *) s;
d += 4;
s += 4;
count -= 4;
}
}
if (((((unsigned long) d) & 0x1) == 0) &&
((((unsigned long) s) & 0x1) == 0)) {
while (count >= 2) {
*(unsigned short *) d = *(unsigned short *) s;
d += 2;
s += 2;
count -= 2;
}
}
}
while (count--) {
*d++ = *s++;
}
return d;
}

View File

@@ -0,0 +1,54 @@
/*
Copyright 2003 Richard Curnow, SuperH (UK) Ltd.
This file is subject to the terms and conditions of the GNU General Public
License. See the file "COPYING" in the main directory of this archive
for more details.
Tight version of memset for the case of just clearing a page. It turns out
that having the alloco's spaced out slightly due to the increment/branch
pair causes them to contend less for access to the cache. Similarly,
keeping the stores apart from the allocos causes less contention. => Do two
separate loops. Do multiple stores per loop to amortise the
increment/branch cost a little.
Parameters:
r2 : source effective address (start of page)
Always clears 4096 bytes.
Note : alloco guarded by synco to avoid TAKum03020 erratum
*/
.section .text..SHmedia32,"ax"
.little
.balign 8
.global sh64_page_clear
sh64_page_clear:
pta/l 1f, tr1
pta/l 2f, tr2
ptabs/l r18, tr0
movi 4096, r7
add r2, r7, r7
add r2, r63, r6
1:
alloco r6, 0
synco ! TAKum03020
addi r6, 32, r6
bgt/l r7, r6, tr1
add r2, r63, r6
2:
st.q r6, 0, r63
st.q r6, 8, r63
st.q r6, 16, r63
st.q r6, 24, r63
addi r6, 32, r6
bgt/l r7, r6, tr2
blink tr0, r63

91
arch/sh/lib64/page_copy.S Normal file
View File

@@ -0,0 +1,91 @@
/*
Copyright 2003 Richard Curnow, SuperH (UK) Ltd.
This file is subject to the terms and conditions of the GNU General Public
License. See the file "COPYING" in the main directory of this archive
for more details.
Tight version of mempy for the case of just copying a page.
Prefetch strategy empirically optimised against RTL simulations
of SH5-101 cut2 eval chip with Cayman board DDR memory.
Parameters:
r2 : source effective address (start of page)
r3 : destination effective address (start of page)
Always copies 4096 bytes.
Points to review.
* Currently the prefetch is 4 lines ahead and the alloco is 2 lines ahead.
It seems like the prefetch needs to be at at least 4 lines ahead to get
the data into the cache in time, and the allocos contend with outstanding
prefetches for the same cache set, so it's better to have the numbers
different.
*/
.section .text..SHmedia32,"ax"
.little
.balign 8
.global sh64_page_copy
sh64_page_copy:
/* Copy 4096 bytes worth of data from r2 to r3.
Do prefetches 4 lines ahead.
Do alloco 2 lines ahead */
pta 1f, tr1
pta 2f, tr2
pta 3f, tr3
ptabs r18, tr0
#if 0
/* TAKum03020 */
ld.q r2, 0x00, r63
ld.q r2, 0x20, r63
ld.q r2, 0x40, r63
ld.q r2, 0x60, r63
#endif
alloco r3, 0x00
synco ! TAKum03020
alloco r3, 0x20
synco ! TAKum03020
movi 3968, r6
add r3, r6, r6
addi r6, 64, r7
addi r7, 64, r8
sub r2, r3, r60
addi r60, 8, r61
addi r61, 8, r62
addi r62, 8, r23
addi r60, 0x80, r22
/* Minimal code size. The extra branches inside the loop don't cost much
because they overlap with the time spent waiting for prefetches to
complete. */
1:
#if 0
/* TAKum03020 */
bge/u r3, r6, tr2 ! skip prefetch for last 4 lines
ldx.q r3, r22, r63 ! prefetch 4 lines hence
#endif
2:
bge/u r3, r7, tr3 ! skip alloco for last 2 lines
alloco r3, 0x40 ! alloc destination line 2 lines ahead
synco ! TAKum03020
3:
ldx.q r3, r60, r36
ldx.q r3, r61, r37
ldx.q r3, r62, r38
ldx.q r3, r23, r39
st.q r3, 0, r36
st.q r3, 8, r37
st.q r3, 16, r38
st.q r3, 24, r39
addi r3, 32, r3
bgt/l r8, r3, tr1
blink tr0, r63 ! return

58
arch/sh/lib64/panic.c Normal file
View File

@@ -0,0 +1,58 @@
/*
* Copyright (C) 2003 Richard Curnow, SuperH UK Limited
*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*/
#include <linux/kernel.h>
#include <asm/io.h>
#include <asm/registers.h>
/* THIS IS A PHYSICAL ADDRESS */
#define HDSP2534_ADDR (0x04002100)
#ifdef CONFIG_SH_CAYMAN
static void poor_mans_delay(void)
{
int i;
for (i = 0; i < 2500000; i++) {
} /* poor man's delay */
}
static void show_value(unsigned long x)
{
int i;
unsigned nibble;
for (i = 0; i < 8; i++) {
nibble = ((x >> (i * 4)) & 0xf);
ctrl_outb(nibble + ((nibble > 9) ? 55 : 48),
HDSP2534_ADDR + 0xe0 + ((7 - i) << 2));
}
}
#endif
void
panic_handler(unsigned long panicPC, unsigned long panicSSR,
unsigned long panicEXPEVT)
{
#ifdef CONFIG_SH_CAYMAN
while (1) {
/* This piece of code displays the PC on the LED display */
show_value(panicPC);
poor_mans_delay();
show_value(panicSSR);
poor_mans_delay();
show_value(panicEXPEVT);
poor_mans_delay();
}
#endif
/* Never return from the panic handler */
for (;;) ;
}

59
arch/sh/lib64/udelay.c Normal file
View File

@@ -0,0 +1,59 @@
/*
* arch/sh64/lib/udelay.c
*
* Delay routines, using a pre-computed "loops_per_jiffy" value.
*
* Copyright (C) 2000, 2001 Paolo Alberelli
* Copyright (C) 2003, 2004 Paul Mundt
*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*/
#include <linux/sched.h>
#include <asm/param.h>
extern unsigned long loops_per_jiffy;
/*
* Use only for very small delays (< 1 msec).
*
* The active part of our cycle counter is only 32-bits wide, and
* we're treating the difference between two marks as signed. On
* a 1GHz box, that's about 2 seconds.
*/
void __delay(int loops)
{
long long dummy;
__asm__ __volatile__("gettr tr0, %1\n\t"
"pta $+4, tr0\n\t"
"addi %0, -1, %0\n\t"
"bne %0, r63, tr0\n\t"
"ptabs %1, tr0\n\t":"=r"(loops),
"=r"(dummy)
:"0"(loops));
}
void __udelay(unsigned long long usecs, unsigned long lpj)
{
usecs *= (((unsigned long long) HZ << 32) / 1000000) * lpj;
__delay((long long) usecs >> 32);
}
void __ndelay(unsigned long long nsecs, unsigned long lpj)
{
nsecs *= (((unsigned long long) HZ << 32) / 1000000000) * lpj;
__delay((long long) nsecs >> 32);
}
void udelay(unsigned long usecs)
{
__udelay(usecs, loops_per_jiffy);
}
void ndelay(unsigned long nsecs)
{
__ndelay(nsecs, loops_per_jiffy);
}