Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
2005-04-16 15:20:36 -07:00
commit 1da177e4c3
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -0,0 +1,411 @@
+comment "Processor Type"
+
+config CPU_32
+	bool
+	default y
+
+# Select CPU types depending on the architecture selected.  This selects
+# which CPUs we support in the kernel image, and the compiler instruction
+# optimiser behaviour.
+
+# ARM610
+config CPU_ARM610
+	bool "Support ARM610 processor"
+	depends on ARCH_RPC
+	select CPU_32v3
+	select CPU_CACHE_V3
+	select CPU_CACHE_VIVT
+	select CPU_COPY_V3
+	select CPU_TLB_V3
+	help
+	  The ARM610 is the successor to the ARM3 processor
+	  and was produced by VLSI Technology Inc.
+
+	  Say Y if you want support for the ARM610 processor.
+	  Otherwise, say N.
+
+# ARM710
+config CPU_ARM710
+	bool "Support ARM710 processor" if !ARCH_CLPS7500 && ARCH_RPC
+	default y if ARCH_CLPS7500
+	select CPU_32v3
+	select CPU_CACHE_V3
+	select CPU_CACHE_VIVT
+	select CPU_COPY_V3
+	select CPU_TLB_V3
+	help
+	  A 32-bit RISC microprocessor based on the ARM7 processor core
+	  designed by Advanced RISC Machines Ltd. The ARM710 is the
+	  successor to the ARM610 processor. It was released in
+	  July 1994 by VLSI Technology Inc.
+
+	  Say Y if you want support for the ARM710 processor.
+	  Otherwise, say N.
+
+# ARM720T
+config CPU_ARM720T
+	bool "Support ARM720T processor" if !ARCH_CLPS711X && !ARCH_L7200 && !ARCH_CDB89712 && ARCH_INTEGRATOR
+	default y if ARCH_CLPS711X || ARCH_L7200 || ARCH_CDB89712 || ARCH_H720X
+	select CPU_32v4
+	select CPU_ABRT_LV4T
+	select CPU_CACHE_V4
+	select CPU_CACHE_VIVT
+	select CPU_COPY_V4WT
+	select CPU_TLB_V4WT
+	help
+	  A 32-bit RISC processor with 8kByte Cache, Write Buffer and
+	  MMU built around an ARM7TDMI core.
+
+	  Say Y if you want support for the ARM720T processor.
+	  Otherwise, say N.
+
+# ARM920T
+config CPU_ARM920T
+	bool "Support ARM920T processor" if !ARCH_S3C2410
+	depends on ARCH_INTEGRATOR || ARCH_S3C2410 || ARCH_IMX
+	default y if ARCH_S3C2410
+	select CPU_32v4
+	select CPU_ABRT_EV4T
+	select CPU_CACHE_V4WT
+	select CPU_CACHE_VIVT
+	select CPU_COPY_V4WB
+	select CPU_TLB_V4WBI
+	help
+	  The ARM920T is licensed to be produced by numerous vendors,
+	  and is used in the Maverick EP9312 and the Samsung S3C2410.
+
+	  More information on the Maverick EP9312 at
+	  <http://linuxdevices.com/products/PD2382866068.html>.
+
+	  Say Y if you want support for the ARM920T processor.
+	  Otherwise, say N.
+
+# ARM922T
+config CPU_ARM922T
+	bool "Support ARM922T processor" if ARCH_INTEGRATOR
+	depends on ARCH_CAMELOT || ARCH_LH7A40X || ARCH_INTEGRATOR
+	default y if ARCH_CAMELOT || ARCH_LH7A40X
+	select CPU_32v4
+	select CPU_ABRT_EV4T
+	select CPU_CACHE_V4WT
+	select CPU_CACHE_VIVT
+	select CPU_COPY_V4WB
+	select CPU_TLB_V4WBI
+	help
+	  The ARM922T is a version of the ARM920T, but with smaller
+	  instruction and data caches. It is used in Altera's
+	  Excalibur XA device family.
+
+	  Say Y if you want support for the ARM922T processor.
+	  Otherwise, say N.
+
+# ARM925T
+config CPU_ARM925T
+ 	bool "Support ARM925T processor" if ARCH_OMAP
+ 	depends on ARCH_OMAP1510
+ 	default y if ARCH_OMAP1510
+	select CPU_32v4
+	select CPU_ABRT_EV4T
+	select CPU_CACHE_V4WT
+	select CPU_CACHE_VIVT
+	select CPU_COPY_V4WB
+	select CPU_TLB_V4WBI
+ 	help
+ 	  The ARM925T is a mix between the ARM920T and ARM926T, but with
+	  different instruction and data caches. It is used in TI's OMAP
+ 	  device family.
+
+ 	  Say Y if you want support for the ARM925T processor.
+ 	  Otherwise, say N.
+
+# ARM926T
+config CPU_ARM926T
+	bool "Support ARM926T processor" if ARCH_INTEGRATOR
+	depends on ARCH_INTEGRATOR || ARCH_VERSATILE_PB || MACH_VERSATILE_AB || ARCH_OMAP730 || ARCH_OMAP16XX
+	default y if ARCH_VERSATILE_PB || MACH_VERSATILE_AB || ARCH_OMAP730 || ARCH_OMAP16XX
+	select CPU_32v5
+	select CPU_ABRT_EV5TJ
+	select CPU_CACHE_VIVT
+	select CPU_COPY_V4WB
+	select CPU_TLB_V4WBI
+	help
+	  This is a variant of the ARM920.  It has slightly different
+	  instruction sequences for cache and TLB operations.  Curiously,
+	  there is no documentation on it at the ARM corporate website.
+
+	  Say Y if you want support for the ARM926T processor.
+	  Otherwise, say N.
+
+# ARM1020 - needs validating
+config CPU_ARM1020
+	bool "Support ARM1020T (rev 0) processor"
+	depends on ARCH_INTEGRATOR
+	select CPU_32v5
+	select CPU_ABRT_EV4T
+	select CPU_CACHE_V4WT
+	select CPU_CACHE_VIVT
+	select CPU_COPY_V4WB
+	select CPU_TLB_V4WBI
+	help
+	  The ARM1020 is the 32K cached version of the ARM10 processor,
+	  with an addition of a floating-point unit.
+
+	  Say Y if you want support for the ARM1020 processor.
+	  Otherwise, say N.
+
+# ARM1020E - needs validating
+config CPU_ARM1020E
+	bool "Support ARM1020E processor"
+	depends on ARCH_INTEGRATOR
+	select CPU_32v5
+	select CPU_ABRT_EV4T
+	select CPU_CACHE_V4WT
+	select CPU_CACHE_VIVT
+	select CPU_COPY_V4WB
+	select CPU_TLB_V4WBI
+	depends on n
+
+# ARM1022E
+config CPU_ARM1022
+	bool "Support ARM1022E processor"
+	depends on ARCH_INTEGRATOR
+	select CPU_32v5
+	select CPU_ABRT_EV4T
+	select CPU_CACHE_VIVT
+	select CPU_COPY_V4WB # can probably do better
+	select CPU_TLB_V4WBI
+	help
+	  The ARM1022E is an implementation of the ARMv5TE architecture
+	  based upon the ARM10 integer core with a 16KiB L1 Harvard cache,
+	  embedded trace macrocell, and a floating-point unit.
+
+	  Say Y if you want support for the ARM1022E processor.
+	  Otherwise, say N.
+
+# ARM1026EJ-S
+config CPU_ARM1026
+	bool "Support ARM1026EJ-S processor"
+	depends on ARCH_INTEGRATOR
+	select CPU_32v5
+	select CPU_ABRT_EV5T # But need Jazelle, but EV5TJ ignores bit 10
+	select CPU_CACHE_VIVT
+	select CPU_COPY_V4WB # can probably do better
+	select CPU_TLB_V4WBI
+	help
+	  The ARM1026EJ-S is an implementation of the ARMv5TEJ architecture
+	  based upon the ARM10 integer core.
+
+	  Say Y if you want support for the ARM1026EJ-S processor.
+	  Otherwise, say N.
+
+# SA110
+config CPU_SA110
+	bool "Support StrongARM(R) SA-110 processor" if !ARCH_EBSA110 && !FOOTBRIDGE && !ARCH_TBOX && !ARCH_SHARK && !ARCH_NEXUSPCI && ARCH_RPC
+	default y if ARCH_EBSA110 || FOOTBRIDGE || ARCH_TBOX || ARCH_SHARK || ARCH_NEXUSPCI
+	select CPU_32v3 if ARCH_RPC
+	select CPU_32v4 if !ARCH_RPC
+	select CPU_ABRT_EV4
+	select CPU_CACHE_V4WB
+	select CPU_CACHE_VIVT
+	select CPU_COPY_V4WB
+	select CPU_TLB_V4WB
+	help
+	  The Intel StrongARM(R) SA-110 is a 32-bit microprocessor and
+	  is available at five speeds ranging from 100 MHz to 233 MHz.
+	  More information is available at
+	  <http://developer.intel.com/design/strong/sa110.htm>.
+
+	  Say Y if you want support for the SA-110 processor.
+	  Otherwise, say N.
+
+# SA1100
+config CPU_SA1100
+	bool
+	depends on ARCH_SA1100
+	default y
+	select CPU_32v4
+	select CPU_ABRT_EV4
+	select CPU_CACHE_V4WB
+	select CPU_CACHE_VIVT
+	select CPU_TLB_V4WB
+	select CPU_MINICACHE
+
+# XScale
+config CPU_XSCALE
+	bool
+	depends on ARCH_IOP3XX || ARCH_PXA || ARCH_IXP4XX || ARCH_IXP2000
+	default y
+	select CPU_32v5
+	select CPU_ABRT_EV5T
+	select CPU_CACHE_VIVT
+	select CPU_TLB_V4WBI
+	select CPU_MINICACHE
+
+# ARMv6
+config CPU_V6
+	bool "Support ARM V6 processor"
+	depends on ARCH_INTEGRATOR
+	select CPU_32v6
+	select CPU_ABRT_EV6
+	select CPU_CACHE_V6
+	select CPU_CACHE_VIPT
+	select CPU_COPY_V6
+	select CPU_TLB_V6
+
+# Figure out what processor architecture version we should be using.
+# This defines the compiler instruction set which depends on the machine type.
+config CPU_32v3
+	bool
+
+config CPU_32v4
+	bool
+
+config CPU_32v5
+	bool
+
+config CPU_32v6
+	bool
+
+# The abort model
+config CPU_ABRT_EV4
+	bool
+
+config CPU_ABRT_EV4T
+	bool
+
+config CPU_ABRT_LV4T
+	bool
+
+config CPU_ABRT_EV5T
+	bool
+
+config CPU_ABRT_EV5TJ
+	bool
+
+config CPU_ABRT_EV6
+	bool
+
+# The cache model
+config CPU_CACHE_V3
+	bool
+
+config CPU_CACHE_V4
+	bool
+
+config CPU_CACHE_V4WT
+	bool
+
+config CPU_CACHE_V4WB
+	bool
+
+config CPU_CACHE_V6
+	bool
+
+config CPU_CACHE_VIVT
+	bool
+
+config CPU_CACHE_VIPT
+	bool
+
+# The copy-page model
+config CPU_COPY_V3
+	bool
+
+config CPU_COPY_V4WT
+	bool
+
+config CPU_COPY_V4WB
+	bool
+
+config CPU_COPY_V6
+	bool
+
+# This selects the TLB model
+config CPU_TLB_V3
+	bool
+	help
+	  ARM Architecture Version 3 TLB.
+
+config CPU_TLB_V4WT
+	bool
+	help
+	  ARM Architecture Version 4 TLB with writethrough cache.
+
+config CPU_TLB_V4WB
+	bool
+	help
+	  ARM Architecture Version 4 TLB with writeback cache.
+
+config CPU_TLB_V4WBI
+	bool
+	help
+	  ARM Architecture Version 4 TLB with writeback cache and invalidate
+	  instruction cache entry.
+
+config CPU_TLB_V6
+	bool
+
+config CPU_MINICACHE
+	bool
+	help
+	  Processor has a minicache.
+
+comment "Processor Features"
+
+config ARM_THUMB
+	bool "Support Thumb user binaries"
+	depends on CPU_ARM720T || CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM1020 || CPU_ARM1020E || CPU_ARM1022 || CPU_ARM1026 || CPU_XSCALE || CPU_V6
+	default y
+	help
+	  Say Y if you want to include kernel support for running user space
+	  Thumb binaries.
+
+	  The Thumb instruction set is a compressed form of the standard ARM
+	  instruction set resulting in smaller binaries at the expense of
+	  slightly less efficient code.
+
+	  If you don't know what this all is, saying Y is a safe choice.
+
+config CPU_BIG_ENDIAN
+	bool "Build big-endian kernel"
+	depends on ARCH_SUPPORTS_BIG_ENDIAN
+	help
+	  Say Y if you plan on running a kernel in big-endian mode.
+	  Note that your board must be properly built and your board
+	  port must properly enable any big-endian related features
+	  of your chipset/board/processor.
+
+config CPU_ICACHE_DISABLE
+	bool "Disable I-Cache"
+	depends on CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM1020
+	help
+	  Say Y here to disable the processor instruction cache. Unless
+	  you have a reason not to or are unsure, say N.
+
+config CPU_DCACHE_DISABLE
+	bool "Disable D-Cache"
+	depends on CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM1020
+	help
+	  Say Y here to disable the processor data cache. Unless
+	  you have a reason not to or are unsure, say N.
+
+config CPU_DCACHE_WRITETHROUGH
+	bool "Force write through D-cache"
+	depends on (CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM1020) && !CPU_DISABLE_DCACHE
+	default y if CPU_ARM925T
+	help
+	  Say Y here to use the data cache in writethrough mode. Unless you
+	  specifically require this or are unsure, say N.
+
+config CPU_CACHE_ROUND_ROBIN
+	bool "Round robin I and D cache replacement algorithm"
+	depends on (CPU_ARM926T || CPU_ARM1020) && (!CPU_ICACHE_DISABLE || !CPU_DCACHE_DISABLE)
+	help
+	  Say Y here to use the predictable round-robin cache replacement
+	  policy.  Unless you specifically require this or are unsure, say N.
+
+config CPU_BPREDICT_DISABLE
+	bool "Disable branch prediction"
+	depends on CPU_ARM1020
+	help
+	  Say Y here to disable branch prediction.  If unsure, say N.
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -0,0 +1,56 @@
+#
+# Makefile for the linux arm-specific parts of the memory manager.
+#
+
+obj-y				:= consistent.o extable.o fault-armv.o \
+				   fault.o flush.o init.o ioremap.o mmap.o \
+				   mm-armv.o
+
+obj-$(CONFIG_MODULES)		+= proc-syms.o
+
+obj-$(CONFIG_ALIGNMENT_TRAP)	+= alignment.o
+obj-$(CONFIG_DISCONTIGMEM)	+= discontig.o
+
+obj-$(CONFIG_CPU_ABRT_EV4)	+= abort-ev4.o
+obj-$(CONFIG_CPU_ABRT_EV4T)	+= abort-ev4t.o
+obj-$(CONFIG_CPU_ABRT_LV4T)	+= abort-lv4t.o
+obj-$(CONFIG_CPU_ABRT_EV5T)	+= abort-ev5t.o
+obj-$(CONFIG_CPU_ABRT_EV5TJ)	+= abort-ev5tj.o
+obj-$(CONFIG_CPU_ABRT_EV6)	+= abort-ev6.o
+
+obj-$(CONFIG_CPU_CACHE_V3)	+= cache-v3.o
+obj-$(CONFIG_CPU_CACHE_V4)	+= cache-v4.o
+obj-$(CONFIG_CPU_CACHE_V4WT)	+= cache-v4wt.o
+obj-$(CONFIG_CPU_CACHE_V4WB)	+= cache-v4wb.o
+obj-$(CONFIG_CPU_CACHE_V6)	+= cache-v6.o
+
+obj-$(CONFIG_CPU_COPY_V3)	+= copypage-v3.o
+obj-$(CONFIG_CPU_COPY_V4WT)	+= copypage-v4wt.o
+obj-$(CONFIG_CPU_COPY_V4WB)	+= copypage-v4wb.o
+obj-$(CONFIG_CPU_COPY_V6)	+= copypage-v6.o mmu.o
+obj-$(CONFIG_CPU_SA1100)	+= copypage-v4mc.o
+obj-$(CONFIG_CPU_XSCALE)	+= copypage-xscale.o
+
+obj-$(CONFIG_CPU_MINICACHE)	+= minicache.o
+
+obj-$(CONFIG_CPU_TLB_V3)	+= tlb-v3.o
+obj-$(CONFIG_CPU_TLB_V4WT)	+= tlb-v4.o
+obj-$(CONFIG_CPU_TLB_V4WB)	+= tlb-v4wb.o
+obj-$(CONFIG_CPU_TLB_V4WBI)	+= tlb-v4wbi.o
+obj-$(CONFIG_CPU_TLB_V6)	+= tlb-v6.o
+
+obj-$(CONFIG_CPU_ARM610)	+= proc-arm6_7.o
+obj-$(CONFIG_CPU_ARM710)	+= proc-arm6_7.o
+obj-$(CONFIG_CPU_ARM720T)	+= proc-arm720.o
+obj-$(CONFIG_CPU_ARM920T)	+= proc-arm920.o
+obj-$(CONFIG_CPU_ARM922T)	+= proc-arm922.o
+obj-$(CONFIG_CPU_ARM925T)	+= proc-arm925.o
+obj-$(CONFIG_CPU_ARM926T)	+= proc-arm926.o
+obj-$(CONFIG_CPU_ARM1020)	+= proc-arm1020.o
+obj-$(CONFIG_CPU_ARM1020E)	+= proc-arm1020e.o
+obj-$(CONFIG_CPU_ARM1022)	+= proc-arm1022.o
+obj-$(CONFIG_CPU_ARM1026)	+= proc-arm1026.o
+obj-$(CONFIG_CPU_SA110)		+= proc-sa110.o
+obj-$(CONFIG_CPU_SA1100)	+= proc-sa1100.o
+obj-$(CONFIG_CPU_XSCALE)	+= proc-xscale.o
+obj-$(CONFIG_CPU_V6)		+= proc-v6.o blockops.o
--- a/arch/arm/mm/abort-ev4.S
+++ b/arch/arm/mm/abort-ev4.S
@@ -0,0 +1,30 @@
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+/*
+ * Function: v4_early_abort
+ *
+ * Params  : r2 = address of aborted instruction
+ *         : r3 = saved SPSR
+ *
+ * Returns : r0 = address of abort
+ *	   : r1 = FSR, bit 11 = write
+ *	   : r2-r8 = corrupted
+ *	   : r9 = preserved
+ *	   : sp = pointer to registers
+ *
+ * Purpose : obtain information about current aborted instruction.
+ * Note: we read user space.  This means we might cause a data
+ * abort here if the I-TLB and D-TLB aren't seeing the same
+ * picture.  Unfortunately, this does happen.  We live with it.
+ */
+	.align	5
+ENTRY(v4_early_abort)
+	mrc	p15, 0, r1, c5, c0, 0		@ get FSR
+	mrc	p15, 0, r0, c6, c0, 0		@ get FAR
+	ldr	r3, [r2]			@ read aborted ARM instruction
+	bic	r1, r1, #1 << 11 | 1 << 10	@ clear bits 11 and 10 of FSR
+	tst	r3, #1 << 20			@ L = 1 -> write?
+	orreq	r1, r1, #1 << 11		@ yes.
+	mov	pc, lr
+
+
--- a/arch/arm/mm/abort-ev4t.S
+++ b/arch/arm/mm/abort-ev4t.S
@@ -0,0 +1,30 @@
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include "abort-macro.S"
+/*
+ * Function: v4t_early_abort
+ *
+ * Params  : r2 = address of aborted instruction
+ *         : r3 = saved SPSR
+ *
+ * Returns : r0 = address of abort
+ *	   : r1 = FSR, bit 11 = write
+ *	   : r2-r8 = corrupted
+ *	   : r9 = preserved
+ *	   : sp = pointer to registers
+ *
+ * Purpose : obtain information about current aborted instruction.
+ * Note: we read user space.  This means we might cause a data
+ * abort here if the I-TLB and D-TLB aren't seeing the same
+ * picture.  Unfortunately, this does happen.  We live with it.
+ */
+	.align	5
+ENTRY(v4t_early_abort)
+	mrc	p15, 0, r1, c5, c0, 0		@ get FSR
+	mrc	p15, 0, r0, c6, c0, 0		@ get FAR
+	do_thumb_abort
+	ldreq	r3, [r2]			@ read aborted ARM instruction
+	bic	r1, r1, #1 << 11 | 1 << 10	@ clear bits 11 and 10 of FSR
+	tst	r3, #1 << 20			@ check write
+	orreq	r1, r1, #1 << 11
+	mov	pc, lr
--- a/arch/arm/mm/abort-ev5t.S
+++ b/arch/arm/mm/abort-ev5t.S
@@ -0,0 +1,31 @@
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include "abort-macro.S"
+/*
+ * Function: v5t_early_abort
+ *
+ * Params  : r2 = address of aborted instruction
+ *         : r3 = saved SPSR
+ *
+ * Returns : r0 = address of abort
+ *	   : r1 = FSR, bit 11 = write
+ *	   : r2-r8 = corrupted
+ *	   : r9 = preserved
+ *	   : sp = pointer to registers
+ *
+ * Purpose : obtain information about current aborted instruction.
+ * Note: we read user space.  This means we might cause a data
+ * abort here if the I-TLB and D-TLB aren't seeing the same
+ * picture.  Unfortunately, this does happen.  We live with it.
+ */
+	.align	5
+ENTRY(v5t_early_abort)
+	mrc	p15, 0, r1, c5, c0, 0		@ get FSR
+	mrc	p15, 0, r0, c6, c0, 0		@ get FAR
+	do_thumb_abort
+	ldreq	r3, [r2]			@ read aborted ARM instruction
+	bic	r1, r1, #1 << 11		@ clear bits 11 of FSR
+	do_ldrd_abort
+	tst	r3, #1 << 20			@ check write
+	orreq	r1, r1, #1 << 11
+	mov	pc, lr
--- a/arch/arm/mm/abort-ev5tj.S
+++ b/arch/arm/mm/abort-ev5tj.S
@@ -0,0 +1,35 @@
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include "abort-macro.S"
+/*
+ * Function: v5tj_early_abort
+ *
+ * Params  : r2 = address of aborted instruction
+ *         : r3 = saved SPSR
+ *
+ * Returns : r0 = address of abort
+ *	   : r1 = FSR, bit 11 = write
+ *	   : r2-r8 = corrupted
+ *	   : r9 = preserved
+ *	   : sp = pointer to registers
+ *
+ * Purpose : obtain information about current aborted instruction.
+ * Note: we read user space.  This means we might cause a data
+ * abort here if the I-TLB and D-TLB aren't seeing the same
+ * picture.  Unfortunately, this does happen.  We live with it.
+ */
+	.align	5
+ENTRY(v5tj_early_abort)
+	mrc	p15, 0, r1, c5, c0, 0		@ get FSR
+	mrc	p15, 0, r0, c6, c0, 0		@ get FAR
+	bic	r1, r1, #1 << 11 | 1 << 10	@ clear bits 11 and 10 of FSR
+	tst	r3, #PSR_J_BIT			@ Java?
+	movne	pc, lr
+	do_thumb_abort
+	ldreq	r3, [r2]			@ read aborted ARM instruction
+	do_ldrd_abort
+	tst	r3, #1 << 20			@ L = 0 -> write
+	orreq	r1, r1, #1 << 11		@ yes.
+	mov	pc, lr
+
+
--- a/arch/arm/mm/abort-ev6.S
+++ b/arch/arm/mm/abort-ev6.S
@@ -0,0 +1,23 @@
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+/*
+ * Function: v6_early_abort
+ *
+ * Params  : r2 = address of aborted instruction
+ *         : r3 = saved SPSR
+ *
+ * Returns : r0 = address of abort
+ *	   : r1 = FSR, bit 11 = write
+ *	   : r2-r8 = corrupted
+ *	   : r9 = preserved
+ *	   : sp = pointer to registers
+ *
+ * Purpose : obtain information about current aborted instruction.
+ */
+	.align	5
+ENTRY(v6_early_abort)
+	mrc	p15, 0, r1, c5, c0, 0		@ get FSR
+	mrc	p15, 0, r0, c6, c0, 0		@ get FAR
+	mov	pc, lr
+
+
--- a/arch/arm/mm/abort-lv4t.S
+++ b/arch/arm/mm/abort-lv4t.S
@@ -0,0 +1,220 @@
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+/*
+ * Function: v4t_late_abort
+ *
+ * Params  : r2 = address of aborted instruction
+ *         : r3 = saved SPSR
+ *
+ * Returns : r0 = address of abort
+ *	   : r1 = FSR, bit 11 = write
+ *	   : r2-r8 = corrupted
+ *	   : r9 = preserved
+ *	   : sp = pointer to registers
+ *
+ * Purpose : obtain information about current aborted instruction.
+ * Note: we read user space.  This means we might cause a data
+ * abort here if the I-TLB and D-TLB aren't seeing the same
+ * picture.  Unfortunately, this does happen.  We live with it.
+ */
+ENTRY(v4t_late_abort)
+	tst	r3, #PSR_T_BIT			@ check for thumb mode
+	mrc	p15, 0, r1, c5, c0, 0		@ get FSR
+	mrc	p15, 0, r0, c6, c0, 0		@ get FAR
+	bne	.data_thumb_abort
+	ldr	r8, [r2]			@ read arm instruction
+	bic	r1, r1, #1 << 11 | 1 << 10	@ clear bits 11 and 10 of FSR
+	tst	r8, #1 << 20			@ L = 1 -> write?
+	orreq	r1, r1, #1 << 11		@ yes.
+	and	r7, r8, #15 << 24
+	add	pc, pc, r7, lsr #22		@ Now branch to the relevant processing routine
+	nop
+
+/* 0 */	b	.data_arm_lateldrhpost		@ ldrh	rd, [rn], #m/rm
+/* 1 */	b	.data_arm_lateldrhpre		@ ldrh	rd, [rn, #m/rm]
+/* 2 */	b	.data_unknown
+/* 3 */	b	.data_unknown
+/* 4 */	b	.data_arm_lateldrpostconst	@ ldr	rd, [rn], #m
+/* 5 */	b	.data_arm_lateldrpreconst	@ ldr	rd, [rn, #m] 
+/* 6 */	b	.data_arm_lateldrpostreg	@ ldr	rd, [rn], rm
+/* 7 */	b	.data_arm_lateldrprereg		@ ldr	rd, [rn, rm]
+/* 8 */	b	.data_arm_ldmstm		@ ldm*a	rn, <rlist>
+/* 9 */	b	.data_arm_ldmstm		@ ldm*b	rn, <rlist>
+/* a */	b	.data_unknown
+/* b */	b	.data_unknown
+/* c */	mov	pc, lr				@ ldc	rd, [rn], #m	@ Same as ldr	rd, [rn], #m
+/* d */	mov	pc, lr				@ ldc	rd, [rn, #m]
+/* e */	b	.data_unknown
+/* f */
+.data_unknown:	@ Part of jumptable
+	mov	r0, r2
+	mov	r1, r8
+	mov	r2, sp
+	bl	baddataabort
+	b	ret_from_exception
+
+.data_arm_ldmstm:
+	tst	r8, #1 << 21			@ check writeback bit
+	moveq	pc, lr				@ no writeback -> no fixup
+	mov	r7, #0x11
+	orr	r7, r7, #0x1100
+	and	r6, r8, r7
+	and	r2, r8, r7, lsl #1
+	add	r6, r6, r2, lsr #1
+	and	r2, r8, r7, lsl #2
+	add	r6, r6, r2, lsr #2
+	and	r2, r8, r7, lsl #3
+	add	r6, r6, r2, lsr #3
+	add	r6, r6, r6, lsr #8
+	add	r6, r6, r6, lsr #4
+	and	r6, r6, #15			@ r6 = no. of registers to transfer.
+	and	r5, r8, #15 << 16		@ Extract 'n' from instruction
+	ldr	r7, [sp, r5, lsr #14]		@ Get register 'Rn'
+	tst	r8, #1 << 23			@ Check U bit
+	subne	r7, r7, r6, lsl #2		@ Undo increment
+	addeq	r7, r7, r6, lsl #2		@ Undo decrement
+	str	r7, [sp, r5, lsr #14]		@ Put register 'Rn'
+	mov	pc, lr
+
+.data_arm_lateldrhpre:
+	tst	r8, #1 << 21			@ Check writeback bit
+	moveq	pc, lr				@ No writeback -> no fixup
+.data_arm_lateldrhpost:
+	and	r5, r8, #0x00f			@ get Rm / low nibble of immediate value
+	tst	r8, #1 << 22			@ if (immediate offset)
+	andne	r6, r8, #0xf00			@ { immediate high nibble
+	orrne	r6, r5, r6, lsr #4		@   combine nibbles } else
+	ldreq	r6, [sp, r5, lsl #2]		@ { load Rm value }
+.data_arm_apply_r6_and_rn:
+	and	r5, r8, #15 << 16		@ Extract 'n' from instruction
+	ldr	r7, [sp, r5, lsr #14]		@ Get register 'Rn'
+	tst	r8, #1 << 23			@ Check U bit
+	subne	r7, r7, r6			@ Undo incrmenet
+	addeq	r7, r7, r6			@ Undo decrement
+	str	r7, [sp, r5, lsr #14]		@ Put register 'Rn'
+	mov	pc, lr
+
+.data_arm_lateldrpreconst:
+	tst	r8, #1 << 21			@ check writeback bit
+	moveq	pc, lr				@ no writeback -> no fixup
+.data_arm_lateldrpostconst:
+	movs	r2, r8, lsl #20			@ Get offset
+	moveq	pc, lr				@ zero -> no fixup
+	and	r5, r8, #15 << 16		@ Extract 'n' from instruction
+	ldr	r7, [sp, r5, lsr #14]		@ Get register 'Rn'
+	tst	r8, #1 << 23			@ Check U bit
+	subne	r7, r7, r2, lsr #20		@ Undo increment
+	addeq	r7, r7, r2, lsr #20		@ Undo decrement
+	str	r7, [sp, r5, lsr #14]		@ Put register 'Rn'
+	mov	pc, lr
+
+.data_arm_lateldrprereg:
+	tst	r8, #1 << 21			@ check writeback bit
+	moveq	pc, lr				@ no writeback -> no fixup
+.data_arm_lateldrpostreg:
+	and	r7, r8, #15			@ Extract 'm' from instruction
+	ldr	r6, [sp, r7, lsl #2]		@ Get register 'Rm'
+	mov	r5, r8, lsr #7			@ get shift count
+	ands	r5, r5, #31
+	and	r7, r8, #0x70			@ get shift type
+	orreq	r7, r7, #8			@ shift count = 0
+	add	pc, pc, r7
+	nop
+
+	mov	r6, r6, lsl r5			@ 0: LSL #!0
+	b	.data_arm_apply_r6_and_rn
+	b	.data_arm_apply_r6_and_rn	@ 1: LSL #0
+	nop
+	b	.data_unknown			@ 2: MUL?
+	nop
+	b	.data_unknown			@ 3: MUL?
+	nop
+	mov	r6, r6, lsr r5			@ 4: LSR #!0
+	b	.data_arm_apply_r6_and_rn
+	mov	r6, r6, lsr #32			@ 5: LSR #32
+	b	.data_arm_apply_r6_and_rn
+	b	.data_unknown			@ 6: MUL?
+	nop
+	b	.data_unknown			@ 7: MUL?
+	nop
+	mov	r6, r6, asr r5			@ 8: ASR #!0
+	b	.data_arm_apply_r6_and_rn
+	mov	r6, r6, asr #32			@ 9: ASR #32
+	b	.data_arm_apply_r6_and_rn
+	b	.data_unknown			@ A: MUL?
+	nop
+	b	.data_unknown			@ B: MUL?
+	nop
+	mov	r6, r6, ror r5			@ C: ROR #!0
+	b	.data_arm_apply_r6_and_rn
+	mov	r6, r6, rrx			@ D: RRX
+	b	.data_arm_apply_r6_and_rn
+	b	.data_unknown			@ E: MUL?
+	nop
+	b	.data_unknown			@ F: MUL?
+
+.data_thumb_abort:
+	ldrh	r8, [r2]			@ read instruction
+	tst	r8, #1 << 11			@ L = 1 -> write?
+	orreq	r1, r1, #1 << 8			@ yes
+	and	r7, r8, #15 << 12
+	add	pc, pc, r7, lsr #10		@ lookup in table
+	nop
+
+/* 0 */	b	.data_unknown
+/* 1 */	b	.data_unknown
+/* 2 */	b	.data_unknown
+/* 3 */	b	.data_unknown
+/* 4 */	b	.data_unknown
+/* 5 */	b	.data_thumb_reg
+/* 6 */	mov	pc, lr
+/* 7 */	mov	pc, lr
+/* 8 */	mov	pc, lr
+/* 9 */	mov	pc, lr
+/* A */	b	.data_unknown
+/* B */	b	.data_thumb_pushpop
+/* C */	b	.data_thumb_ldmstm
+/* D */	b	.data_unknown
+/* E */	b	.data_unknown
+/* F */	b	.data_unknown
+
+.data_thumb_reg:
+	tst	r8, #1 << 9
+	moveq	pc, lr
+	tst	r8, #1 << 10			@ If 'S' (signed) bit is set
+	movne	r1, #0				@ it must be a load instr
+	mov	pc, lr
+
+.data_thumb_pushpop:
+	tst	r8, #1 << 10
+	beq	.data_unknown
+	and	r6, r8, #0x55			@ hweight8(r8) + R bit
+	and	r2, r8, #0xaa
+	add	r6, r6, r2, lsr #1
+	and	r2, r6, #0xcc
+	and	r6, r6, #0x33
+	add	r6, r6, r2, lsr #2
+	movs	r7, r8, lsr #9			@ C = r8 bit 8 (R bit)
+	adc	r6, r6, r6, lsr #4		@ high + low nibble + R bit
+	and	r6, r6, #15			@ number of regs to transfer
+	ldr	r7, [sp, #13 << 2]
+	tst	r8, #1 << 11
+	addeq	r7, r7, r6, lsl #2		@ increment SP if PUSH
+	subne	r7, r7, r6, lsl #2		@ decrement SP if POP
+	str	r7, [sp, #13 << 2]
+	mov	pc, lr
+
+.data_thumb_ldmstm:
+	and	r6, r8, #0x55			@ hweight8(r8)
+	and	r2, r8, #0xaa
+	add	r6, r6, r2, lsr #1
+	and	r2, r6, #0xcc
+	and	r6, r6, #0x33
+	add	r6, r6, r2, lsr #2
+	add	r6, r6, r6, lsr #4
+	and	r5, r8, #7 << 8
+	ldr	r7, [sp, r5, lsr #6]
+	and	r6, r6, #15			@ number of regs to transfer
+	sub	r7, r7, r6, lsl #2		@ always decrement
+	str	r7, [sp, r5, lsr #6]
+	mov	pc, lr
--- a/arch/arm/mm/abort-macro.S
+++ b/arch/arm/mm/abort-macro.S
@@ -0,0 +1,42 @@
+/*
+ * The ARM LDRD and Thumb LDRSB instructions use bit 20/11 (ARM/Thumb)
+ * differently than every other instruction, so it is set to 0 (write)
+ * even though the instructions are read instructions. This means that
+ * during an abort the instructions will be treated as a write and the
+ * handler will raise a signal from unwriteable locations if they
+ * fault. We have to specifically check for these instructions
+ * from the abort handlers to treat them properly.
+ *
+ */
+
+	.macro	do_thumb_abort
+	tst	r3, #PSR_T_BIT
+	beq	not_thumb
+	ldrh	r3, [r2]			@ Read aborted Thumb instruction
+	and	r3, r3, # 0xfe00		@ Mask opcode field
+	cmp	r3, # 0x5600			@ Is it ldrsb?
+	orreq	r3, r3, #1 << 11		@ Set L-bit if yes
+	tst	r3, #1 << 11			@ L = 0 -> write
+	orreq	r1, r1, #1 << 11		@ yes.
+	mov	pc, lr
+not_thumb:
+	.endm
+
+/*
+ * We check for the following insturction encoding for LDRD.
+ *
+ * [27:25] == 0
+ *   [7:4] == 1101
+ *    [20] == 0
+ */
+ 	.macro	do_ldrd_abort
+ 	tst	r3, #0x0e000000			@ [27:25] == 0
+	bne	not_ldrd
+	and	r2, r3, #0x000000f0		@ [7:4] == 1101
+	cmp	r2, #0x000000d0
+	bne	not_ldrd
+	tst	r3, #1 << 20			@ [20] == 0
+	moveq	pc, lr
+not_ldrd:
+	.endm
+
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -0,0 +1,756 @@
+/*
+ *  linux/arch/arm/mm/alignment.c
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ *  Modifications for ARM processor (c) 1995-2001 Russell King
+ *  Thumb aligment fault fixups (c) 2004 MontaVista Software, Inc.
+ *  - Adapted from gdb/sim/arm/thumbemu.c -- Thumb instruction emulation.
+ *    Copyright (C) 1996, Cygnus Software Technologies Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/config.h>
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/ptrace.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+
+#include <asm/uaccess.h>
+#include <asm/unaligned.h>
+
+#include "fault.h"
+
+/*
+ * 32-bit misaligned trap handler (c) 1998 San Mehat (CCC) -July 1998
+ * /proc/sys/debug/alignment, modified and integrated into
+ * Linux 2.1 by Russell King
+ *
+ * Speed optimisations and better fault handling by Russell King.
+ *
+ * *** NOTE ***
+ * This code is not portable to processors with late data abort handling.
+ */
+#define CODING_BITS(i)	(i & 0x0e000000)
+
+#define LDST_I_BIT(i)	(i & (1 << 26))		/* Immediate constant	*/
+#define LDST_P_BIT(i)	(i & (1 << 24))		/* Preindex		*/
+#define LDST_U_BIT(i)	(i & (1 << 23))		/* Add offset		*/
+#define LDST_W_BIT(i)	(i & (1 << 21))		/* Writeback		*/
+#define LDST_L_BIT(i)	(i & (1 << 20))		/* Load			*/
+
+#define LDST_P_EQ_U(i)	((((i) ^ ((i) >> 1)) & (1 << 23)) == 0)
+
+#define LDSTH_I_BIT(i)	(i & (1 << 22))		/* half-word immed	*/
+#define LDM_S_BIT(i)	(i & (1 << 22))		/* write CPSR from SPSR	*/
+
+#define RN_BITS(i)	((i >> 16) & 15)	/* Rn			*/
+#define RD_BITS(i)	((i >> 12) & 15)	/* Rd			*/
+#define RM_BITS(i)	(i & 15)		/* Rm			*/
+
+#define REGMASK_BITS(i)	(i & 0xffff)
+#define OFFSET_BITS(i)	(i & 0x0fff)
+
+#define IS_SHIFT(i)	(i & 0x0ff0)
+#define SHIFT_BITS(i)	((i >> 7) & 0x1f)
+#define SHIFT_TYPE(i)	(i & 0x60)
+#define SHIFT_LSL	0x00
+#define SHIFT_LSR	0x20
+#define SHIFT_ASR	0x40
+#define SHIFT_RORRRX	0x60
+
+static unsigned long ai_user;
+static unsigned long ai_sys;
+static unsigned long ai_skipped;
+static unsigned long ai_half;
+static unsigned long ai_word;
+static unsigned long ai_multi;
+static int ai_usermode;
+
+#ifdef CONFIG_PROC_FS
+static const char *usermode_action[] = {
+	"ignored",
+	"warn",
+	"fixup",
+	"fixup+warn",
+	"signal",
+	"signal+warn"
+};
+
+static int
+proc_alignment_read(char *page, char **start, off_t off, int count, int *eof,
+		    void *data)
+{
+	char *p = page;
+	int len;
+
+	p += sprintf(p, "User:\t\t%lu\n", ai_user);
+	p += sprintf(p, "System:\t\t%lu\n", ai_sys);
+	p += sprintf(p, "Skipped:\t%lu\n", ai_skipped);
+	p += sprintf(p, "Half:\t\t%lu\n", ai_half);
+	p += sprintf(p, "Word:\t\t%lu\n", ai_word);
+	p += sprintf(p, "Multi:\t\t%lu\n", ai_multi);
+	p += sprintf(p, "User faults:\t%i (%s)\n", ai_usermode,
+			usermode_action[ai_usermode]);
+
+	len = (p - page) - off;
+	if (len < 0)
+		len = 0;
+
+	*eof = (len <= count) ? 1 : 0;
+	*start = page + off;
+
+	return len;
+}
+
+static int proc_alignment_write(struct file *file, const char __user *buffer,
+			       unsigned long count, void *data)
+{
+	char mode;
+
+	if (count > 0) {
+		if (get_user(mode, buffer))
+			return -EFAULT;
+		if (mode >= '0' && mode <= '5')
+			   ai_usermode = mode - '0';
+	}
+	return count;
+}
+
+#endif /* CONFIG_PROC_FS */
+
+union offset_union {
+	unsigned long un;
+	  signed long sn;
+};
+
+#define TYPE_ERROR	0
+#define TYPE_FAULT	1
+#define TYPE_LDST	2
+#define TYPE_DONE	3
+
+#ifdef __ARMEB__
+#define BE		1
+#define FIRST_BYTE_16	"mov	%1, %1, ror #8\n"
+#define FIRST_BYTE_32	"mov	%1, %1, ror #24\n"
+#define NEXT_BYTE	"ror #24"
+#else
+#define BE		0
+#define FIRST_BYTE_16
+#define FIRST_BYTE_32
+#define NEXT_BYTE	"lsr #8"
+#endif
+
+#define __get8_unaligned_check(ins,val,addr,err)	\
+	__asm__(					\
+	"1:	"ins"	%1, [%2], #1\n"			\
+	"2:\n"						\
+	"	.section .fixup,\"ax\"\n"		\
+	"	.align	2\n"				\
+	"3:	mov	%0, #1\n"			\
+	"	b	2b\n"				\
+	"	.previous\n"				\
+	"	.section __ex_table,\"a\"\n"		\
+	"	.align	3\n"				\
+	"	.long	1b, 3b\n"			\
+	"	.previous\n"				\
+	: "=r" (err), "=&r" (val), "=r" (addr)		\
+	: "0" (err), "2" (addr))
+
+#define __get16_unaligned_check(ins,val,addr)			\
+	do {							\
+		unsigned int err = 0, v, a = addr;		\
+		__get8_unaligned_check(ins,v,a,err);		\
+		val =  v << ((BE) ? 8 : 0);			\
+		__get8_unaligned_check(ins,v,a,err);		\
+		val |= v << ((BE) ? 0 : 8);			\
+		if (err)					\
+			goto fault;				\
+	} while (0)
+
+#define get16_unaligned_check(val,addr) \
+	__get16_unaligned_check("ldrb",val,addr)
+
+#define get16t_unaligned_check(val,addr) \
+	__get16_unaligned_check("ldrbt",val,addr)
+
+#define __get32_unaligned_check(ins,val,addr)			\
+	do {							\
+		unsigned int err = 0, v, a = addr;		\
+		__get8_unaligned_check(ins,v,a,err);		\
+		val =  v << ((BE) ? 24 :  0);			\
+		__get8_unaligned_check(ins,v,a,err);		\
+		val |= v << ((BE) ? 16 :  8);			\
+		__get8_unaligned_check(ins,v,a,err);		\
+		val |= v << ((BE) ?  8 : 16);			\
+		__get8_unaligned_check(ins,v,a,err);		\
+		val |= v << ((BE) ?  0 : 24);			\
+		if (err)					\
+			goto fault;				\
+	} while (0)
+
+#define get32_unaligned_check(val,addr) \
+	__get32_unaligned_check("ldrb",val,addr)
+
+#define get32t_unaligned_check(val,addr) \
+	__get32_unaligned_check("ldrbt",val,addr)
+
+#define __put16_unaligned_check(ins,val,addr)			\
+	do {							\
+		unsigned int err = 0, v = val, a = addr;	\
+		__asm__( FIRST_BYTE_16				\
+		"1:	"ins"	%1, [%2], #1\n"			\
+		"	mov	%1, %1, "NEXT_BYTE"\n"		\
+		"2:	"ins"	%1, [%2]\n"			\
+		"3:\n"						\
+		"	.section .fixup,\"ax\"\n"		\
+		"	.align	2\n"				\
+		"4:	mov	%0, #1\n"			\
+		"	b	3b\n"				\
+		"	.previous\n"				\
+		"	.section __ex_table,\"a\"\n"		\
+		"	.align	3\n"				\
+		"	.long	1b, 4b\n"			\
+		"	.long	2b, 4b\n"			\
+		"	.previous\n"				\
+		: "=r" (err), "=&r" (v), "=&r" (a)		\
+		: "0" (err), "1" (v), "2" (a));			\
+		if (err)					\
+			goto fault;				\
+	} while (0)
+
+#define put16_unaligned_check(val,addr)  \
+	__put16_unaligned_check("strb",val,addr)
+
+#define put16t_unaligned_check(val,addr) \
+	__put16_unaligned_check("strbt",val,addr)
+
+#define __put32_unaligned_check(ins,val,addr)			\
+	do {							\
+		unsigned int err = 0, v = val, a = addr;	\
+		__asm__( FIRST_BYTE_32				\
+		"1:	"ins"	%1, [%2], #1\n"			\
+		"	mov	%1, %1, "NEXT_BYTE"\n"		\
+		"2:	"ins"	%1, [%2], #1\n"			\
+		"	mov	%1, %1, "NEXT_BYTE"\n"		\
+		"3:	"ins"	%1, [%2], #1\n"			\
+		"	mov	%1, %1, "NEXT_BYTE"\n"		\
+		"4:	"ins"	%1, [%2]\n"			\
+		"5:\n"						\
+		"	.section .fixup,\"ax\"\n"		\
+		"	.align	2\n"				\
+		"6:	mov	%0, #1\n"			\
+		"	b	5b\n"				\
+		"	.previous\n"				\
+		"	.section __ex_table,\"a\"\n"		\
+		"	.align	3\n"				\
+		"	.long	1b, 6b\n"			\
+		"	.long	2b, 6b\n"			\
+		"	.long	3b, 6b\n"			\
+		"	.long	4b, 6b\n"			\
+		"	.previous\n"				\
+		: "=r" (err), "=&r" (v), "=&r" (a)		\
+		: "0" (err), "1" (v), "2" (a));			\
+		if (err)					\
+			goto fault;				\
+	} while (0)
+
+#define put32_unaligned_check(val,addr)	 \
+	__put32_unaligned_check("strb", val, addr)
+
+#define put32t_unaligned_check(val,addr) \
+	__put32_unaligned_check("strbt", val, addr)
+
+static void
+do_alignment_finish_ldst(unsigned long addr, unsigned long instr, struct pt_regs *regs, union offset_union offset)
+{
+	if (!LDST_U_BIT(instr))
+		offset.un = -offset.un;
+
+	if (!LDST_P_BIT(instr))
+		addr += offset.un;
+
+	if (!LDST_P_BIT(instr) || LDST_W_BIT(instr))
+		regs->uregs[RN_BITS(instr)] = addr;
+}
+
+static int
+do_alignment_ldrhstrh(unsigned long addr, unsigned long instr, struct pt_regs *regs)
+{
+	unsigned int rd = RD_BITS(instr);
+
+	if ((instr & 0x01f00ff0) == 0x01000090)
+		goto swp;
+
+	if ((instr & 0x90) != 0x90 || (instr & 0x60) == 0)
+		goto bad;
+
+	ai_half += 1;
+
+	if (user_mode(regs))
+		goto user;
+
+	if (LDST_L_BIT(instr)) {
+		unsigned long val;
+		get16_unaligned_check(val, addr);
+
+		/* signed half-word? */
+		if (instr & 0x40)
+			val = (signed long)((signed short) val);
+
+		regs->uregs[rd] = val;
+	} else
+		put16_unaligned_check(regs->uregs[rd], addr);
+
+	return TYPE_LDST;
+
+ user:
+ 	if (LDST_L_BIT(instr)) {
+ 		unsigned long val;
+ 		get16t_unaligned_check(val, addr);
+
+ 		/* signed half-word? */
+ 		if (instr & 0x40)
+ 			val = (signed long)((signed short) val);
+
+ 		regs->uregs[rd] = val;
+ 	} else
+ 		put16t_unaligned_check(regs->uregs[rd], addr);
+
+ 	return TYPE_LDST;
+
+ swp:
+	printk(KERN_ERR "Alignment trap: not handling swp instruction\n");
+ bad:
+	return TYPE_ERROR;
+
+ fault:
+	return TYPE_FAULT;
+}
+
+static int
+do_alignment_ldrstr(unsigned long addr, unsigned long instr, struct pt_regs *regs)
+{
+	unsigned int rd = RD_BITS(instr);
+
+	ai_word += 1;
+
+	if ((!LDST_P_BIT(instr) && LDST_W_BIT(instr)) || user_mode(regs))
+		goto trans;
+
+	if (LDST_L_BIT(instr)) {
+		unsigned int val;
+		get32_unaligned_check(val, addr);
+		regs->uregs[rd] = val;
+	} else
+		put32_unaligned_check(regs->uregs[rd], addr);
+	return TYPE_LDST;
+
+ trans:
+	if (LDST_L_BIT(instr)) {
+		unsigned int val;
+		get32t_unaligned_check(val, addr);
+		regs->uregs[rd] = val;
+	} else
+		put32t_unaligned_check(regs->uregs[rd], addr);
+	return TYPE_LDST;
+
+ fault:
+	return TYPE_FAULT;
+}
+
+/*
+ * LDM/STM alignment handler.
+ *
+ * There are 4 variants of this instruction:
+ *
+ * B = rn pointer before instruction, A = rn pointer after instruction
+ *              ------ increasing address ----->
+ *	        |    | r0 | r1 | ... | rx |    |
+ * PU = 01             B                    A
+ * PU = 11        B                    A
+ * PU = 00        A                    B
+ * PU = 10             A                    B
+ */
+static int
+do_alignment_ldmstm(unsigned long addr, unsigned long instr, struct pt_regs *regs)
+{
+	unsigned int rd, rn, correction, nr_regs, regbits;
+	unsigned long eaddr, newaddr;
+
+	if (LDM_S_BIT(instr))
+		goto bad;
+
+	correction = 4; /* processor implementation defined */
+	regs->ARM_pc += correction;
+
+	ai_multi += 1;
+
+	/* count the number of registers in the mask to be transferred */
+	nr_regs = hweight16(REGMASK_BITS(instr)) * 4;
+
+	rn = RN_BITS(instr);
+	newaddr = eaddr = regs->uregs[rn];
+
+	if (!LDST_U_BIT(instr))
+		nr_regs = -nr_regs;
+	newaddr += nr_regs;
+	if (!LDST_U_BIT(instr))
+		eaddr = newaddr;
+
+	if (LDST_P_EQ_U(instr))	/* U = P */
+		eaddr += 4;
+
+	/* 
+	 * For alignment faults on the ARM922T/ARM920T the MMU  makes
+	 * the FSR (and hence addr) equal to the updated base address
+	 * of the multiple access rather than the restored value.
+	 * Switch this message off if we've got a ARM92[02], otherwise
+	 * [ls]dm alignment faults are noisy!
+	 */
+#if !(defined CONFIG_CPU_ARM922T)  && !(defined CONFIG_CPU_ARM920T)
+	/*
+	 * This is a "hint" - we already have eaddr worked out by the
+	 * processor for us.
+	 */
+	if (addr != eaddr) {
+		printk(KERN_ERR "LDMSTM: PC = %08lx, instr = %08lx, "
+			"addr = %08lx, eaddr = %08lx\n",
+			 instruction_pointer(regs), instr, addr, eaddr);
+		show_regs(regs);
+	}
+#endif
+
+	if (user_mode(regs)) {
+		for (regbits = REGMASK_BITS(instr), rd = 0; regbits;
+		     regbits >>= 1, rd += 1)
+			if (regbits & 1) {
+				if (LDST_L_BIT(instr)) {
+					unsigned int val;
+					get32t_unaligned_check(val, eaddr);
+					regs->uregs[rd] = val;
+				} else
+					put32t_unaligned_check(regs->uregs[rd], eaddr);
+				eaddr += 4;
+			}
+	} else {
+		for (regbits = REGMASK_BITS(instr), rd = 0; regbits;
+		     regbits >>= 1, rd += 1)
+			if (regbits & 1) {
+				if (LDST_L_BIT(instr)) {
+					unsigned int val;
+					get32_unaligned_check(val, eaddr);
+					regs->uregs[rd] = val;
+				} else
+					put32_unaligned_check(regs->uregs[rd], eaddr);
+				eaddr += 4;
+			}
+	}
+
+	if (LDST_W_BIT(instr))
+		regs->uregs[rn] = newaddr;
+	if (!LDST_L_BIT(instr) || !(REGMASK_BITS(instr) & (1 << 15)))
+		regs->ARM_pc -= correction;
+	return TYPE_DONE;
+
+fault:
+	regs->ARM_pc -= correction;
+	return TYPE_FAULT;
+
+bad:
+	printk(KERN_ERR "Alignment trap: not handling ldm with s-bit set\n");
+	return TYPE_ERROR;
+}
+
+/*
+ * Convert Thumb ld/st instruction forms to equivalent ARM instructions so
+ * we can reuse ARM userland alignment fault fixups for Thumb.
+ *
+ * This implementation was initially based on the algorithm found in
+ * gdb/sim/arm/thumbemu.c. It is basically just a code reduction of same
+ * to convert only Thumb ld/st instruction forms to equivalent ARM forms.
+ *
+ * NOTES:
+ * 1. Comments below refer to ARM ARM DDI0100E Thumb Instruction sections.
+ * 2. If for some reason we're passed an non-ld/st Thumb instruction to
+ *    decode, we return 0xdeadc0de. This should never happen under normal
+ *    circumstances but if it does, we've got other problems to deal with
+ *    elsewhere and we obviously can't fix those problems here.
+ */
+
+static unsigned long
+thumb2arm(u16 tinstr)
+{
+	u32 L = (tinstr & (1<<11)) >> 11;
+
+	switch ((tinstr & 0xf800) >> 11) {
+	/* 6.5.1 Format 1: */
+	case 0x6000 >> 11:				/* 7.1.52 STR(1) */
+	case 0x6800 >> 11:				/* 7.1.26 LDR(1) */
+	case 0x7000 >> 11:				/* 7.1.55 STRB(1) */
+	case 0x7800 >> 11:				/* 7.1.30 LDRB(1) */
+		return 0xe5800000 |
+			((tinstr & (1<<12)) << (22-12)) |	/* fixup */
+			(L<<20) |				/* L==1? */
+			((tinstr & (7<<0)) << (12-0)) |		/* Rd */
+			((tinstr & (7<<3)) << (16-3)) |		/* Rn */
+			((tinstr & (31<<6)) >>			/* immed_5 */
+				(6 - ((tinstr & (1<<12)) ? 0 : 2)));
+	case 0x8000 >> 11:				/* 7.1.57 STRH(1) */
+	case 0x8800 >> 11:				/* 7.1.32 LDRH(1) */
+		return 0xe1c000b0 |
+			(L<<20) |				/* L==1? */
+			((tinstr & (7<<0)) << (12-0)) |		/* Rd */
+			((tinstr & (7<<3)) << (16-3)) |		/* Rn */
+			((tinstr & (7<<6)) >> (6-1)) |	 /* immed_5[2:0] */
+			((tinstr & (3<<9)) >> (9-8));	 /* immed_5[4:3] */
+
+	/* 6.5.1 Format 2: */
+	case 0x5000 >> 11:
+	case 0x5800 >> 11:
+		{
+			static const u32 subset[8] = {
+				0xe7800000,		/* 7.1.53 STR(2) */
+				0xe18000b0,		/* 7.1.58 STRH(2) */
+				0xe7c00000,		/* 7.1.56 STRB(2) */
+				0xe19000d0,		/* 7.1.34 LDRSB */
+				0xe7900000,		/* 7.1.27 LDR(2) */
+				0xe19000b0,		/* 7.1.33 LDRH(2) */
+				0xe7d00000,		/* 7.1.31 LDRB(2) */
+				0xe19000f0		/* 7.1.35 LDRSH */
+			};
+			return subset[(tinstr & (7<<9)) >> 9] |
+			    ((tinstr & (7<<0)) << (12-0)) |	/* Rd */
+			    ((tinstr & (7<<3)) << (16-3)) |	/* Rn */
+			    ((tinstr & (7<<6)) >> (6-0));	/* Rm */
+		}
+
+	/* 6.5.1 Format 3: */
+	case 0x4800 >> 11:				/* 7.1.28 LDR(3) */
+		/* NOTE: This case is not technically possible. We're
+		 * 	 loading 32-bit memory data via PC relative
+		 *	 addressing mode. So we can and should eliminate
+		 *	 this case. But I'll leave it here for now.
+		 */
+		return 0xe59f0000 |
+		    ((tinstr & (7<<8)) << (12-8)) |		/* Rd */
+		    ((tinstr & 255) << (2-0));			/* immed_8 */
+
+	/* 6.5.1 Format 4: */
+	case 0x9000 >> 11:				/* 7.1.54 STR(3) */
+	case 0x9800 >> 11:				/* 7.1.29 LDR(4) */
+		return 0xe58d0000 |
+			(L<<20) |				/* L==1? */
+			((tinstr & (7<<8)) << (12-8)) |		/* Rd */
+			((tinstr & 255) << 2);			/* immed_8 */
+
+	/* 6.6.1 Format 1: */
+	case 0xc000 >> 11:				/* 7.1.51 STMIA */
+	case 0xc800 >> 11:				/* 7.1.25 LDMIA */
+		{
+			u32 Rn = (tinstr & (7<<8)) >> 8;
+			u32 W = ((L<<Rn) & (tinstr&255)) ? 0 : 1<<21;
+
+			return 0xe8800000 | W | (L<<20) | (Rn<<16) |
+				(tinstr&255);
+		}
+
+	/* 6.6.1 Format 2: */
+	case 0xb000 >> 11:				/* 7.1.48 PUSH */
+	case 0xb800 >> 11:				/* 7.1.47 POP */
+		if ((tinstr & (3 << 9)) == 0x0400) {
+			static const u32 subset[4] = {
+				0xe92d0000,	/* STMDB sp!,{registers} */
+				0xe92d4000,	/* STMDB sp!,{registers,lr} */
+				0xe8bd0000,	/* LDMIA sp!,{registers} */
+				0xe8bd8000	/* LDMIA sp!,{registers,pc} */
+			};
+			return subset[(L<<1) | ((tinstr & (1<<8)) >> 8)] |
+			    (tinstr & 255);		/* register_list */
+		}
+		/* Else fall through for illegal instruction case */
+
+	default:
+		return 0xdeadc0de;
+	}
+}
+
+static int
+do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
+{
+	union offset_union offset;
+	unsigned long instr = 0, instrptr;
+	int (*handler)(unsigned long addr, unsigned long instr, struct pt_regs *regs);
+	unsigned int type;
+	mm_segment_t fs;
+	unsigned int fault;
+	u16 tinstr = 0;
+
+	instrptr = instruction_pointer(regs);
+
+	fs = get_fs();
+	set_fs(KERNEL_DS);
+	if thumb_mode(regs) {
+		fault = __get_user(tinstr, (u16 *)(instrptr & ~1));
+		if (!(fault))
+			instr = thumb2arm(tinstr);
+	} else
+		fault = __get_user(instr, (u32 *)instrptr);
+	set_fs(fs);
+
+	if (fault) {
+		type = TYPE_FAULT;
+ 		goto bad_or_fault;
+	}
+
+	if (user_mode(regs))
+		goto user;
+
+	ai_sys += 1;
+
+ fixup:
+
+	regs->ARM_pc += thumb_mode(regs) ? 2 : 4;
+
+	switch (CODING_BITS(instr)) {
+	case 0x00000000:	/* ldrh or strh */
+		if (LDSTH_I_BIT(instr))
+			offset.un = (instr & 0xf00) >> 4 | (instr & 15);
+		else
+			offset.un = regs->uregs[RM_BITS(instr)];
+		handler = do_alignment_ldrhstrh;
+		break;
+
+	case 0x04000000:	/* ldr or str immediate */
+		offset.un = OFFSET_BITS(instr);
+		handler = do_alignment_ldrstr;
+		break;
+
+	case 0x06000000:	/* ldr or str register */
+		offset.un = regs->uregs[RM_BITS(instr)];
+
+		if (IS_SHIFT(instr)) {
+			unsigned int shiftval = SHIFT_BITS(instr);
+
+			switch(SHIFT_TYPE(instr)) {
+			case SHIFT_LSL:
+				offset.un <<= shiftval;
+				break;
+
+			case SHIFT_LSR:
+				offset.un >>= shiftval;
+				break;
+
+			case SHIFT_ASR:
+				offset.sn >>= shiftval;
+				break;
+
+			case SHIFT_RORRRX:
+				if (shiftval == 0) {
+					offset.un >>= 1;
+					if (regs->ARM_cpsr & PSR_C_BIT)
+						offset.un |= 1 << 31;
+				} else
+					offset.un = offset.un >> shiftval |
+							  offset.un << (32 - shiftval);
+				break;
+			}
+		}
+		handler = do_alignment_ldrstr;
+		break;
+
+	case 0x08000000:	/* ldm or stm */
+		handler = do_alignment_ldmstm;
+		break;
+
+	default:
+		goto bad;
+	}
+
+	type = handler(addr, instr, regs);
+
+	if (type == TYPE_ERROR || type == TYPE_FAULT)
+		goto bad_or_fault;
+
+	if (type == TYPE_LDST)
+		do_alignment_finish_ldst(addr, instr, regs, offset);
+
+	return 0;
+
+ bad_or_fault:
+	if (type == TYPE_ERROR)
+		goto bad;
+	regs->ARM_pc -= thumb_mode(regs) ? 2 : 4;
+	/*
+	 * We got a fault - fix it up, or die.
+	 */
+	do_bad_area(current, current->mm, addr, fsr, regs);
+	return 0;
+
+ bad:
+	/*
+	 * Oops, we didn't handle the instruction.
+	 */
+	printk(KERN_ERR "Alignment trap: not handling instruction "
+		"%0*lx at [<%08lx>]\n",
+		thumb_mode(regs) ? 4 : 8,
+		thumb_mode(regs) ? tinstr : instr, instrptr);
+	ai_skipped += 1;
+	return 1;
+
+ user:
+	ai_user += 1;
+
+	if (ai_usermode & 1)
+		printk("Alignment trap: %s (%d) PC=0x%08lx Instr=0x%0*lx "
+		       "Address=0x%08lx FSR 0x%03x\n", current->comm,
+			current->pid, instrptr,
+		        thumb_mode(regs) ? 4 : 8,
+		        thumb_mode(regs) ? tinstr : instr,
+		        addr, fsr);
+
+	if (ai_usermode & 2)
+		goto fixup;
+
+	if (ai_usermode & 4)
+		force_sig(SIGBUS, current);
+	else
+		set_cr(cr_no_alignment);
+
+	return 0;
+}
+
+/*
+ * This needs to be done after sysctl_init, otherwise sys/ will be
+ * overwritten.  Actually, this shouldn't be in sys/ at all since
+ * it isn't a sysctl, and it doesn't contain sysctl information.
+ * We now locate it in /proc/cpu/alignment instead.
+ */
+static int __init alignment_init(void)
+{
+#ifdef CONFIG_PROC_FS
+	struct proc_dir_entry *res;
+
+	res = proc_mkdir("cpu", NULL);
+	if (!res)
+		return -ENOMEM;
+
+	res = create_proc_entry("alignment", S_IWUSR | S_IRUGO, res);
+	if (!res)
+		return -ENOMEM;
+
+	res->read_proc = proc_alignment_read;
+	res->write_proc = proc_alignment_write;
+#endif
+
+	hook_fault_code(1, do_alignment, SIGILL, "alignment exception");
+	hook_fault_code(3, do_alignment, SIGILL, "alignment exception");
+
+	return 0;
+}
+
+fs_initcall(alignment_init);
--- a/arch/arm/mm/blockops.c
+++ b/arch/arm/mm/blockops.c
@@ -0,0 +1,184 @@
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+
+#include <asm/memory.h>
+#include <asm/ptrace.h>
+#include <asm/cacheflush.h>
+#include <asm/traps.h>
+
+extern struct cpu_cache_fns blk_cache_fns;
+
+#define HARVARD_CACHE
+
+/*
+ *	blk_flush_kern_dcache_page(kaddr)
+ *
+ *	Ensure that the data held in the page kaddr is written back
+ *	to the page in question.
+ *
+ *	- kaddr   - kernel address (guaranteed to be page aligned)
+ */
+static void __attribute__((naked))
+blk_flush_kern_dcache_page(void *kaddr)
+{
+	asm(
+	"add	r1, r0, %0							\n\
+1:	.word	0xec401f0e	@ mcrr	p15, 0, r0, r1, c14, 0	@ blocking	\n\
+	mov	r0, #0								\n\
+	mcr	p15, 0, r0, c7, c5, 0						\n\
+	mcr	p15, 0, r0, c7, c10, 4						\n\
+	mov	pc, lr"
+	:
+	: "I" (PAGE_SIZE));
+}
+
+/*
+ *	blk_dma_inv_range(start,end)
+ *
+ *	Invalidate the data cache within the specified region; we will
+ *	be performing a DMA operation in this region and we want to
+ *	purge old data in the cache.
+ *
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ */
+static void __attribute__((naked))
+blk_dma_inv_range_unified(unsigned long start, unsigned long end)
+{
+	asm(
+	"tst	r0, %0								\n\
+	mcrne	p15, 0, r0, c7, c11, 1		@ clean unified line		\n\
+	tst	r1, %0								\n\
+	mcrne	p15, 0, r1, c7, c15, 1		@ clean & invalidate unified line\n\
+	.word	0xec401f06	@ mcrr	p15, 0, r1, r0, c6, 0	@ blocking	\n\
+	mov	r0, #0								\n\
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer		\n\
+	mov	pc, lr"
+	:
+	: "I" (L1_CACHE_BYTES - 1));
+}
+
+static void __attribute__((naked))
+blk_dma_inv_range_harvard(unsigned long start, unsigned long end)
+{
+	asm(
+	"tst	r0, %0								\n\
+	mcrne	p15, 0, r0, c7, c10, 1		@ clean D line			\n\
+	tst	r1, %0								\n\
+	mcrne	p15, 0, r1, c7, c14, 1		@ clean & invalidate D line	\n\
+	.word	0xec401f06	@ mcrr	p15, 0, r1, r0, c6, 0	@ blocking	\n\
+	mov	r0, #0								\n\
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer		\n\
+	mov	pc, lr"
+	:
+	: "I" (L1_CACHE_BYTES - 1));
+}
+
+/*
+ *	blk_dma_clean_range(start,end)
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ */
+static void __attribute__((naked))
+blk_dma_clean_range(unsigned long start, unsigned long end)
+{
+	asm(
+	".word	0xec401f0c	@ mcrr	p15, 0, r1, r0, c12, 0	@ blocking	\n\
+	mov	r0, #0								\n\
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer		\n\
+	mov	pc, lr");
+}
+
+/*
+ *	blk_dma_flush_range(start,end)
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ */
+static void __attribute__((naked))
+blk_dma_flush_range(unsigned long start, unsigned long end)
+{
+	asm(
+	".word	0xec401f0e	@ mcrr	p15, 0, r1, r0, c14, 0	@ blocking	\n\
+	mov	pc, lr");
+}
+
+static int blockops_trap(struct pt_regs *regs, unsigned int instr)
+{
+	regs->ARM_r4 |= regs->ARM_r2;
+	regs->ARM_pc += 4;
+	return 0;
+}
+
+static char *func[] = {
+	"Prefetch data range",
+	"Clean+Invalidate data range",
+	"Clean data range",
+	"Invalidate data range",
+	"Invalidate instr range"
+};
+
+static struct undef_hook blockops_hook __initdata = {
+	.instr_mask	= 0x0fffffd0,
+	.instr_val	= 0x0c401f00,
+	.cpsr_mask	= PSR_T_BIT,
+	.cpsr_val	= 0,
+	.fn		= blockops_trap,
+};
+
+static int __init blockops_check(void)
+{
+	register unsigned int err asm("r4") = 0;
+	unsigned int err_pos = 1;
+	unsigned int cache_type;
+	int i;
+
+	asm("mrc p15, 0, %0, c0, c0, 1" : "=r" (cache_type));
+
+	printk("Checking V6 block cache operations:\n");
+	register_undef_hook(&blockops_hook);
+
+	__asm__ ("mov	r0, %0\n\t"
+		"mov	r1, %1\n\t"
+		"mov	r2, #1\n\t"
+		".word	0xec401f2c @ mcrr p15, 0, r1, r0, c12, 2\n\t"
+		"mov	r2, #2\n\t"
+		".word	0xec401f0e @ mcrr p15, 0, r1, r0, c14, 0\n\t"
+		"mov	r2, #4\n\t"
+		".word	0xec401f0c @ mcrr p15, 0, r1, r0, c12, 0\n\t"
+		"mov	r2, #8\n\t"
+		".word	0xec401f06 @ mcrr p15, 0, r1, r0, c6, 0\n\t"
+		"mov	r2, #16\n\t"
+		".word	0xec401f05 @ mcrr p15, 0, r1, r0, c5, 0\n\t"
+		:
+		: "r" (PAGE_OFFSET), "r" (PAGE_OFFSET + 128)
+		: "r0", "r1", "r2");
+
+	unregister_undef_hook(&blockops_hook);
+
+	for (i = 0; i < ARRAY_SIZE(func); i++, err_pos <<= 1)
+		printk("%30s: %ssupported\n", func[i], err & err_pos ? "not " : "");
+
+	if ((err & 8) == 0) {
+		printk(" --> Using %s block cache invalidate\n",
+			cache_type & (1 << 24) ? "harvard" : "unified");
+		if (cache_type & (1 << 24))
+			cpu_cache.dma_inv_range = blk_dma_inv_range_harvard;
+		else
+			cpu_cache.dma_inv_range = blk_dma_inv_range_unified;
+	}
+	if ((err & 4) == 0) {
+		printk(" --> Using block cache clean\n");
+		cpu_cache.dma_clean_range        = blk_dma_clean_range;
+	}
+	if ((err & 2) == 0) {
+		printk(" --> Using block cache clean+invalidate\n");
+		cpu_cache.dma_flush_range        = blk_dma_flush_range;
+		cpu_cache.flush_kern_dcache_page = blk_flush_kern_dcache_page;
+	}
+
+	return 0;
+}
+
+__initcall(blockops_check);
--- a/arch/arm/mm/cache-v3.S
+++ b/arch/arm/mm/cache-v3.S
@@ -0,0 +1,137 @@
+/*
+ *  linux/arch/arm/mm/cache-v3.S
+ *
+ *  Copyright (C) 1997-2002 Russell king
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/hardware.h>
+#include <asm/page.h>
+#include "proc-macros.S"
+
+/*
+ *	flush_user_cache_all()
+ *
+ *	Invalidate all cache entries in a particular address
+ *	space.
+ *
+ *	- mm	- mm_struct describing address space
+ */
+ENTRY(v3_flush_user_cache_all)
+	/* FALLTHROUGH */
+/*
+ *	flush_kern_cache_all()
+ *
+ *	Clean and invalidate the entire cache.
+ */
+ENTRY(v3_flush_kern_cache_all)
+	/* FALLTHROUGH */
+
+/*
+ *	flush_user_cache_range(start, end, flags)
+ *
+ *	Invalidate a range of cache entries in the specified
+ *	address space.
+ *
+ *	- start - start address (may not be aligned)
+ *	- end	- end address (exclusive, may not be aligned)
+ *	- flags	- vma_area_struct flags describing address space
+ */
+ENTRY(v3_flush_user_cache_range)
+	mov	ip, #0
+	mcreq	p15, 0, ip, c7, c0, 0		@ flush ID cache
+	mov	pc, lr
+
+/*
+ *	coherent_kern_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+ENTRY(v3_coherent_kern_range)
+	/* FALLTHROUGH */
+
+/*
+ *	coherent_user_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+ENTRY(v3_coherent_user_range)
+	mov	pc, lr
+
+/*
+ *	flush_kern_dcache_page(void *page)
+ *
+ *	Ensure no D cache aliasing occurs, either with itself or
+ *	the I cache
+ *
+ *	- addr	- page aligned address
+ */
+ENTRY(v3_flush_kern_dcache_page)
+	/* FALLTHROUGH */
+
+/*
+ *	dma_inv_range(start, end)
+ *
+ *	Invalidate (discard) the specified virtual address range.
+ *	May not write back any entries.  If 'start' or 'end'
+ *	are not cache line aligned, those lines must be written
+ *	back.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+ENTRY(v3_dma_inv_range)
+	/* FALLTHROUGH */
+
+/*
+ *	dma_flush_range(start, end)
+ *
+ *	Clean and invalidate the specified virtual address range.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+ENTRY(v3_dma_flush_range)
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c0, 0		@ flush ID cache
+	/* FALLTHROUGH */
+
+/*
+ *	dma_clean_range(start, end)
+ *
+ *	Clean (write back) the specified virtual address range.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+ENTRY(v3_dma_clean_range)
+	mov	pc, lr
+
+	__INITDATA
+
+	.type	v3_cache_fns, #object
+ENTRY(v3_cache_fns)
+	.long	v3_flush_kern_cache_all
+	.long	v3_flush_user_cache_all
+	.long	v3_flush_user_cache_range
+	.long	v3_coherent_kern_range
+	.long	v3_coherent_user_range
+	.long	v3_flush_kern_dcache_page
+	.long	v3_dma_inv_range
+	.long	v3_dma_clean_range
+	.long	v3_dma_flush_range
+	.size	v3_cache_fns, . - v3_cache_fns
--- a/arch/arm/mm/cache-v4.S
+++ b/arch/arm/mm/cache-v4.S
@@ -0,0 +1,139 @@
+/*
+ *  linux/arch/arm/mm/cache-v4.S
+ *
+ *  Copyright (C) 1997-2002 Russell king
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/hardware.h>
+#include <asm/page.h>
+#include "proc-macros.S"
+
+/*
+ *	flush_user_cache_all()
+ *
+ *	Invalidate all cache entries in a particular address
+ *	space.
+ *
+ *	- mm	- mm_struct describing address space
+ */
+ENTRY(v4_flush_user_cache_all)
+	/* FALLTHROUGH */
+/*
+ *	flush_kern_cache_all()
+ *
+ *	Clean and invalidate the entire cache.
+ */
+ENTRY(v4_flush_kern_cache_all)
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c7, 0		@ flush ID cache
+	mov	pc, lr
+
+/*
+ *	flush_user_cache_range(start, end, flags)
+ *
+ *	Invalidate a range of cache entries in the specified
+ *	address space.
+ *
+ *	- start - start address (may not be aligned)
+ *	- end	- end address (exclusive, may not be aligned)
+ *	- flags	- vma_area_struct flags describing address space
+ */
+ENTRY(v4_flush_user_cache_range)
+	mov	ip, #0
+	mcreq	p15, 0, ip, c7, c7, 0		@ flush ID cache
+	mov	pc, lr
+
+/*
+ *	coherent_kern_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+ENTRY(v4_coherent_kern_range)
+	/* FALLTHROUGH */
+
+/*
+ *	coherent_user_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+ENTRY(v4_coherent_user_range)
+	mov	pc, lr
+
+/*
+ *	flush_kern_dcache_page(void *page)
+ *
+ *	Ensure no D cache aliasing occurs, either with itself or
+ *	the I cache
+ *
+ *	- addr	- page aligned address
+ */
+ENTRY(v4_flush_kern_dcache_page)
+	/* FALLTHROUGH */
+
+/*
+ *	dma_inv_range(start, end)
+ *
+ *	Invalidate (discard) the specified virtual address range.
+ *	May not write back any entries.  If 'start' or 'end'
+ *	are not cache line aligned, those lines must be written
+ *	back.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+ENTRY(v4_dma_inv_range)
+	/* FALLTHROUGH */
+
+/*
+ *	dma_flush_range(start, end)
+ *
+ *	Clean and invalidate the specified virtual address range.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+ENTRY(v4_dma_flush_range)
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c7, 0		@ flush ID cache
+	/* FALLTHROUGH */
+
+/*
+ *	dma_clean_range(start, end)
+ *
+ *	Clean (write back) the specified virtual address range.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+ENTRY(v4_dma_clean_range)
+	mov	pc, lr
+
+	__INITDATA
+
+	.type	v4_cache_fns, #object
+ENTRY(v4_cache_fns)
+	.long	v4_flush_kern_cache_all
+	.long	v4_flush_user_cache_all
+	.long	v4_flush_user_cache_range
+	.long	v4_coherent_kern_range
+	.long	v4_coherent_user_range
+	.long	v4_flush_kern_dcache_page
+	.long	v4_dma_inv_range
+	.long	v4_dma_clean_range
+	.long	v4_dma_flush_range
+	.size	v4_cache_fns, . - v4_cache_fns
--- a/arch/arm/mm/cache-v4wb.S
+++ b/arch/arm/mm/cache-v4wb.S
@@ -0,0 +1,216 @@
+/*
+ *  linux/arch/arm/mm/cache-v4wb.S
+ *
+ *  Copyright (C) 1997-2002 Russell king
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/config.h>
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/hardware.h>
+#include <asm/page.h>
+#include "proc-macros.S"
+
+/*
+ * The size of one data cache line.
+ */
+#define CACHE_DLINESIZE	32
+
+/*
+ * The total size of the data cache.
+ */
+#if defined(CONFIG_CPU_SA110)
+# define CACHE_DSIZE	16384
+#elif defined(CONFIG_CPU_SA1100)
+# define CACHE_DSIZE	8192
+#else
+# error Unknown cache size
+#endif
+
+/*
+ * This is the size at which it becomes more efficient to
+ * clean the whole cache, rather than using the individual
+ * cache line maintainence instructions.
+ *
+ *  Size  Clean (ticks) Dirty (ticks)
+ *   4096   21  20  21    53  55  54
+ *   8192   40  41  40   106 100 102
+ *  16384   77  77  76   140 140 138
+ *  32768  150 149 150   214 216 212 <---
+ *  65536  296 297 296   351 358 361
+ * 131072  591 591 591   656 657 651
+ *  Whole  132 136 132   221 217 207 <---
+ */
+#define CACHE_DLIMIT	(CACHE_DSIZE * 4)
+
+/*
+ *	flush_user_cache_all()
+ *
+ *	Clean and invalidate all cache entries in a particular address
+ *	space.
+ */
+ENTRY(v4wb_flush_user_cache_all)
+	/* FALLTHROUGH */
+/*
+ *	flush_kern_cache_all()
+ *
+ *	Clean and invalidate the entire cache.
+ */
+ENTRY(v4wb_flush_kern_cache_all)
+	mov	ip, #0
+	mcr	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+__flush_whole_cache:
+	mov	r0, #FLUSH_BASE
+	add	r1, r0, #CACHE_DSIZE
+1:	ldr	r2, [r0], #32
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, ip, c7, c10, 4		@ drain write buffer
+	mov	pc, lr
+
+/*
+ *	flush_user_cache_range(start, end, flags)
+ *
+ *	Invalidate a range of cache entries in the specified
+ *	address space.
+ *
+ *	- start - start address (inclusive, page aligned)
+ *	- end	- end address (exclusive, page aligned)
+ *	- flags	- vma_area_struct flags describing address space
+ */
+ENTRY(v4wb_flush_user_cache_range)
+	sub	r3, r1, r0			@ calculate total size
+	tst	r2, #VM_EXEC			@ executable region?
+	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+
+	cmp	r3, #CACHE_DLIMIT		@ total size >= limit?
+	bhs	__flush_whole_cache		@ flush whole D cache
+
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	tst	r2, #VM_EXEC
+	mcrne	p15, 0, ip, c7, c10, 4		@ drain write buffer
+	mov	pc, lr
+
+/*
+ *	flush_kern_dcache_page(void *page)
+ *
+ *	Ensure no D cache aliasing occurs, either with itself or
+ *	the I cache
+ *
+ *	- addr	- page aligned address
+ */
+ENTRY(v4wb_flush_kern_dcache_page)
+	add	r1, r0, #PAGE_SZ
+	/* fall through */
+
+/*
+ *	coherent_kern_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+ENTRY(v4wb_coherent_kern_range)
+	/* fall through */
+
+/*
+ *	coherent_user_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+ENTRY(v4wb_coherent_user_range)
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mov	ip, #0
+	mcr	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+
+/*
+ *	dma_inv_range(start, end)
+ *
+ *	Invalidate (discard) the specified virtual address range.
+ *	May not write back any entries.  If 'start' or 'end'
+ *	are not cache line aligned, those lines must be written
+ *	back.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+ENTRY(v4wb_dma_inv_range)
+	tst	r0, #CACHE_DLINESIZE - 1
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, r0, c7, c10, 1		@ clean D entry
+	tst	r1, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, r1, c7, c10, 1		@ clean D entry
+1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
+	mov	pc, lr
+
+/*
+ *	dma_clean_range(start, end)
+ *
+ *	Clean (write back) the specified virtual address range.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+ENTRY(v4wb_dma_clean_range)
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
+	mov	pc, lr
+
+/*
+ *	dma_flush_range(start, end)
+ *
+ *	Clean and invalidate the specified virtual address range.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ *
+ *	This is actually the same as v4wb_coherent_kern_range()
+ */
+	.globl	v4wb_dma_flush_range
+	.set	v4wb_dma_flush_range, v4wb_coherent_kern_range
+
+	__INITDATA
+
+	.type	v4wb_cache_fns, #object
+ENTRY(v4wb_cache_fns)
+	.long	v4wb_flush_kern_cache_all
+	.long	v4wb_flush_user_cache_all
+	.long	v4wb_flush_user_cache_range
+	.long	v4wb_coherent_kern_range
+	.long	v4wb_coherent_user_range
+	.long	v4wb_flush_kern_dcache_page
+	.long	v4wb_dma_inv_range
+	.long	v4wb_dma_clean_range
+	.long	v4wb_dma_flush_range
+	.size	v4wb_cache_fns, . - v4wb_cache_fns
--- a/arch/arm/mm/cache-v4wt.S
+++ b/arch/arm/mm/cache-v4wt.S
@@ -0,0 +1,188 @@
+/*
+ *  linux/arch/arm/mm/cache-v4wt.S
+ *
+ *  Copyright (C) 1997-2002 Russell king
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  ARMv4 write through cache operations support.
+ *
+ *  We assume that the write buffer is not enabled.
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/hardware.h>
+#include <asm/page.h>
+#include "proc-macros.S"
+
+/*
+ * The size of one data cache line.
+ */
+#define CACHE_DLINESIZE	32
+
+/*
+ * The number of data cache segments.
+ */
+#define CACHE_DSEGMENTS	8
+
+/*
+ * The number of lines in a cache segment.
+ */
+#define CACHE_DENTRIES	64
+
+/*
+ * This is the size at which it becomes more efficient to
+ * clean the whole cache, rather than using the individual
+ * cache line maintainence instructions.
+ *
+ * *** This needs benchmarking
+ */
+#define CACHE_DLIMIT	16384
+
+/*
+ *	flush_user_cache_all()
+ *
+ *	Invalidate all cache entries in a particular address
+ *	space.
+ */
+ENTRY(v4wt_flush_user_cache_all)
+	/* FALLTHROUGH */
+/*
+ *	flush_kern_cache_all()
+ *
+ *	Clean and invalidate the entire cache.
+ */
+ENTRY(v4wt_flush_kern_cache_all)
+	mov	r2, #VM_EXEC
+	mov	ip, #0
+__flush_whole_cache:
+	tst	r2, #VM_EXEC
+	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+	mcr	p15, 0, ip, c7, c6, 0		@ invalidate D cache
+	mov	pc, lr
+
+/*
+ *	flush_user_cache_range(start, end, flags)
+ *
+ *	Clean and invalidate a range of cache entries in the specified
+ *	address space.
+ *
+ *	- start - start address (inclusive, page aligned)
+ *	- end	- end address (exclusive, page aligned)
+ *	- flags	- vma_area_struct flags describing address space
+ */
+ENTRY(v4wt_flush_user_cache_range)
+	sub	r3, r1, r0			@ calculate total size
+	cmp	r3, #CACHE_DLIMIT
+	bhs	__flush_whole_cache
+
+1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+	tst	r2, #VM_EXEC
+	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mov	pc, lr
+
+/*
+ *	coherent_kern_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+ENTRY(v4wt_coherent_kern_range)
+	/* FALLTRHOUGH */
+
+/*
+ *	coherent_user_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+ENTRY(v4wt_coherent_user_range)
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mov	pc, lr
+
+/*
+ *	flush_kern_dcache_page(void *page)
+ *
+ *	Ensure no D cache aliasing occurs, either with itself or
+ *	the I cache
+ *
+ *	- addr	- page aligned address
+ */
+ENTRY(v4wt_flush_kern_dcache_page)
+	mov	r2, #0
+	mcr	p15, 0, r2, c7, c5, 0		@ invalidate I cache
+	add	r1, r0, #PAGE_SZ
+	/* fallthrough */
+
+/*
+ *	dma_inv_range(start, end)
+ *
+ *	Invalidate (discard) the specified virtual address range.
+ *	May not write back any entries.  If 'start' or 'end'
+ *	are not cache line aligned, those lines must be written
+ *	back.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+ENTRY(v4wt_dma_inv_range)
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	/* FALLTHROUGH */
+
+/*
+ *	dma_clean_range(start, end)
+ *
+ *	Clean the specified virtual address range.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+ENTRY(v4wt_dma_clean_range)
+	mov	pc, lr
+
+/*
+ *	dma_flush_range(start, end)
+ *
+ *	Clean and invalidate the specified virtual address range.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+	.globl	v4wt_dma_flush_range
+	.equ	v4wt_dma_flush_range, v4wt_dma_inv_range
+
+	__INITDATA
+
+	.type	v4wt_cache_fns, #object
+ENTRY(v4wt_cache_fns)
+	.long	v4wt_flush_kern_cache_all
+	.long	v4wt_flush_user_cache_all
+	.long	v4wt_flush_user_cache_range
+	.long	v4wt_coherent_kern_range
+	.long	v4wt_coherent_user_range
+	.long	v4wt_flush_kern_dcache_page
+	.long	v4wt_dma_inv_range
+	.long	v4wt_dma_clean_range
+	.long	v4wt_dma_flush_range
+	.size	v4wt_cache_fns, . - v4wt_cache_fns
--- a/arch/arm/mm/cache-v6.S
+++ b/arch/arm/mm/cache-v6.S
@@ -0,0 +1,227 @@
+/*
+ *  linux/arch/arm/mm/cache-v6.S
+ *
+ *  Copyright (C) 2001 Deep Blue Solutions Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  This is the "shell" of the ARMv6 processor support.
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+
+#include "proc-macros.S"
+
+#define HARVARD_CACHE
+#define CACHE_LINE_SIZE		32
+#define D_CACHE_LINE_SIZE	32
+
+/*
+ *	v6_flush_cache_all()
+ *
+ *	Flush the entire cache.
+ *
+ *	It is assumed that:
+ */
+ENTRY(v6_flush_kern_cache_all)
+	mov	r0, #0
+#ifdef HARVARD_CACHE
+	mcr	p15, 0, r0, c7, c14, 0		@ D cache clean+invalidate
+	mcr	p15, 0, r0, c7, c5, 0		@ I+BTB cache invalidate
+#else
+	mcr	p15, 0, r0, c7, c15, 0		@ Cache clean+invalidate
+#endif
+	mov	pc, lr
+
+/*
+ *	v6_flush_cache_all()
+ *
+ *	Flush all TLB entries in a particular address space
+ *
+ *	- mm    - mm_struct describing address space
+ */
+ENTRY(v6_flush_user_cache_all)
+	/*FALLTHROUGH*/
+
+/*
+ *	v6_flush_cache_range(start, end, flags)
+ *
+ *	Flush a range of TLB entries in the specified address space.
+ *
+ *	- start - start address (may not be aligned)
+ *	- end   - end address (exclusive, may not be aligned)
+ *	- flags	- vm_area_struct flags describing address space
+ *
+ *	It is assumed that:
+ *	- we have a VIPT cache.
+ */
+ENTRY(v6_flush_user_cache_range)
+	mov	pc, lr
+
+/*
+ *	v6_coherent_kern_range(start,end)
+ *
+ *	Ensure that the I and D caches are coherent within specified
+ *	region.  This is typically used when code has been written to
+ *	a memory region, and will be executed.
+ *
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ *
+ *	It is assumed that:
+ *	- the Icache does not read data from the write buffer
+ */
+ENTRY(v6_coherent_kern_range)
+	/* FALLTHROUGH */
+
+/*
+ *	v6_coherent_user_range(start,end)
+ *
+ *	Ensure that the I and D caches are coherent within specified
+ *	region.  This is typically used when code has been written to
+ *	a memory region, and will be executed.
+ *
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ *
+ *	It is assumed that:
+ *	- the Icache does not read data from the write buffer
+ */
+ENTRY(v6_coherent_user_range)
+	bic	r0, r0, #CACHE_LINE_SIZE - 1
+1:
+#ifdef HARVARD_CACHE
+	mcr	p15, 0, r0, c7, c10, 1		@ clean D line
+	mcr	p15, 0, r0, c7, c5, 1		@ invalidate I line
+#endif
+	mcr	p15, 0, r0, c7, c5, 7		@ invalidate BTB entry
+	add	r0, r0, #CACHE_LINE_SIZE
+	cmp	r0, r1
+	blo	1b
+#ifdef HARVARD_CACHE
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
+#endif
+	mov	pc, lr
+
+/*
+ *	v6_flush_kern_dcache_page(kaddr)
+ *
+ *	Ensure that the data held in the page kaddr is written back
+ *	to the page in question.
+ *
+ *	- kaddr   - kernel address (guaranteed to be page aligned)
+ */
+ENTRY(v6_flush_kern_dcache_page)
+	add	r1, r0, #PAGE_SZ
+1:
+#ifdef HARVARD_CACHE
+	mcr	p15, 0, r0, c7, c14, 1		@ clean & invalidate D line
+#else
+	mcr	p15, 0, r0, c7, c15, 1		@ clean & invalidate unified line
+#endif	
+	add	r0, r0, #D_CACHE_LINE_SIZE
+	cmp	r0, r1
+	blo	1b
+#ifdef HARVARD_CACHE
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c10, 4
+#endif
+	mov	pc, lr
+
+
+/*
+ *	v6_dma_inv_range(start,end)
+ *
+ *	Invalidate the data cache within the specified region; we will
+ *	be performing a DMA operation in this region and we want to
+ *	purge old data in the cache.
+ *
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ */
+ENTRY(v6_dma_inv_range)
+	tst	r0, #D_CACHE_LINE_SIZE - 1
+	bic	r0, r0, #D_CACHE_LINE_SIZE - 1
+#ifdef HARVARD_CACHE
+	mcrne	p15, 0, r0, c7, c10, 1		@ clean D line
+#else
+	mcrne	p15, 0, r0, c7, c11, 1		@ clean unified line
+#endif
+	tst	r1, #D_CACHE_LINE_SIZE - 1
+	bic	r1, r1, #D_CACHE_LINE_SIZE - 1
+#ifdef HARVARD_CACHE
+	mcrne	p15, 0, r1, c7, c14, 1		@ clean & invalidate D line
+#else
+	mcrne	p15, 0, r1, c7, c15, 1		@ clean & invalidate unified line
+#endif
+1:
+#ifdef HARVARD_CACHE
+	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D line
+#else
+	mcr	p15, 0, r0, c7, c7, 1		@ invalidate unified line
+#endif
+	add	r0, r0, #D_CACHE_LINE_SIZE
+	cmp	r0, r1
+	blo	1b
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
+	mov	pc, lr
+
+/*
+ *	v6_dma_clean_range(start,end)
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ */
+ENTRY(v6_dma_clean_range)
+	bic	r0, r0, #D_CACHE_LINE_SIZE - 1
+1:
+#ifdef HARVARD_CACHE
+	mcr	p15, 0, r0, c7, c10, 1		@ clean D line
+#else
+	mcr	p15, 0, r0, c7, c11, 1		@ clean unified line
+#endif
+	add	r0, r0, #D_CACHE_LINE_SIZE
+	cmp	r0, r1
+	blo	1b
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
+	mov	pc, lr
+
+/*
+ *	v6_dma_flush_range(start,end)
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ */
+ENTRY(v6_dma_flush_range)
+	bic	r0, r0, #D_CACHE_LINE_SIZE - 1
+1:
+#ifdef HARVARD_CACHE
+	mcr	p15, 0, r0, c7, c14, 1		@ clean & invalidate D line
+#else
+	mcr	p15, 0, r0, c7, c15, 1		@ clean & invalidate line
+#endif
+	add	r0, r0, #D_CACHE_LINE_SIZE
+	cmp	r0, r1
+	blo	1b
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
+	mov	pc, lr
+
+	__INITDATA
+
+	.type	v6_cache_fns, #object
+ENTRY(v6_cache_fns)
+	.long	v6_flush_kern_cache_all
+	.long	v6_flush_user_cache_all
+	.long	v6_flush_user_cache_range
+	.long	v6_coherent_kern_range
+	.long	v6_coherent_user_range
+	.long	v6_flush_kern_dcache_page
+	.long	v6_dma_inv_range
+	.long	v6_dma_clean_range
+	.long	v6_dma_flush_range
+	.size	v6_cache_fns, . - v6_cache_fns
--- a/arch/arm/mm/consistent.c
+++ b/arch/arm/mm/consistent.c
@@ -0,0 +1,451 @@
+/*
+ *  linux/arch/arm/mm/consistent.c
+ *
+ *  Copyright (C) 2000-2004 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  DMA uncached mapping support.
+ */
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/list.h>
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+
+#include <asm/cacheflush.h>
+#include <asm/io.h>
+#include <asm/tlbflush.h>
+
+#define CONSISTENT_BASE	(0xffc00000)
+#define CONSISTENT_END	(0xffe00000)
+#define CONSISTENT_OFFSET(x)	(((unsigned long)(x) - CONSISTENT_BASE) >> PAGE_SHIFT)
+
+/*
+ * This is the page table (2MB) covering uncached, DMA consistent allocations
+ */
+static pte_t *consistent_pte;
+static DEFINE_SPINLOCK(consistent_lock);
+
+/*
+ * VM region handling support.
+ *
+ * This should become something generic, handling VM region allocations for
+ * vmalloc and similar (ioremap, module space, etc).
+ *
+ * I envisage vmalloc()'s supporting vm_struct becoming:
+ *
+ *  struct vm_struct {
+ *    struct vm_region	region;
+ *    unsigned long	flags;
+ *    struct page	**pages;
+ *    unsigned int	nr_pages;
+ *    unsigned long	phys_addr;
+ *  };
+ *
+ * get_vm_area() would then call vm_region_alloc with an appropriate
+ * struct vm_region head (eg):
+ *
+ *  struct vm_region vmalloc_head = {
+ *	.vm_list	= LIST_HEAD_INIT(vmalloc_head.vm_list),
+ *	.vm_start	= VMALLOC_START,
+ *	.vm_end		= VMALLOC_END,
+ *  };
+ *
+ * However, vmalloc_head.vm_start is variable (typically, it is dependent on
+ * the amount of RAM found at boot time.)  I would imagine that get_vm_area()
+ * would have to initialise this each time prior to calling vm_region_alloc().
+ */
+struct vm_region {
+	struct list_head	vm_list;
+	unsigned long		vm_start;
+	unsigned long		vm_end;
+	struct page		*vm_pages;
+};
+
+static struct vm_region consistent_head = {
+	.vm_list	= LIST_HEAD_INIT(consistent_head.vm_list),
+	.vm_start	= CONSISTENT_BASE,
+	.vm_end		= CONSISTENT_END,
+};
+
+static struct vm_region *
+vm_region_alloc(struct vm_region *head, size_t size, int gfp)
+{
+	unsigned long addr = head->vm_start, end = head->vm_end - size;
+	unsigned long flags;
+	struct vm_region *c, *new;
+
+	new = kmalloc(sizeof(struct vm_region), gfp);
+	if (!new)
+		goto out;
+
+	spin_lock_irqsave(&consistent_lock, flags);
+
+	list_for_each_entry(c, &head->vm_list, vm_list) {
+		if ((addr + size) < addr)
+			goto nospc;
+		if ((addr + size) <= c->vm_start)
+			goto found;
+		addr = c->vm_end;
+		if (addr > end)
+			goto nospc;
+	}
+
+ found:
+	/*
+	 * Insert this entry _before_ the one we found.
+	 */
+	list_add_tail(&new->vm_list, &c->vm_list);
+	new->vm_start = addr;
+	new->vm_end = addr + size;
+
+	spin_unlock_irqrestore(&consistent_lock, flags);
+	return new;
+
+ nospc:
+	spin_unlock_irqrestore(&consistent_lock, flags);
+	kfree(new);
+ out:
+	return NULL;
+}
+
+static struct vm_region *vm_region_find(struct vm_region *head, unsigned long addr)
+{
+	struct vm_region *c;
+	
+	list_for_each_entry(c, &head->vm_list, vm_list) {
+		if (c->vm_start == addr)
+			goto out;
+	}
+	c = NULL;
+ out:
+	return c;
+}
+
+#ifdef CONFIG_HUGETLB_PAGE
+#error ARM Coherent DMA allocator does not (yet) support huge TLB
+#endif
+
+static void *
+__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, int gfp,
+	    pgprot_t prot)
+{
+	struct page *page;
+	struct vm_region *c;
+	unsigned long order;
+	u64 mask = ISA_DMA_THRESHOLD, limit;
+
+	if (!consistent_pte) {
+		printk(KERN_ERR "%s: not initialised\n", __func__);
+		dump_stack();
+		return NULL;
+	}
+
+	if (dev) {
+		mask = dev->coherent_dma_mask;
+
+		/*
+		 * Sanity check the DMA mask - it must be non-zero, and
+		 * must be able to be satisfied by a DMA allocation.
+		 */
+		if (mask == 0) {
+			dev_warn(dev, "coherent DMA mask is unset\n");
+			goto no_page;
+		}
+
+		if ((~mask) & ISA_DMA_THRESHOLD) {
+			dev_warn(dev, "coherent DMA mask %#llx is smaller "
+				 "than system GFP_DMA mask %#llx\n",
+				 mask, (unsigned long long)ISA_DMA_THRESHOLD);
+			goto no_page;
+		}
+	}
+
+	/*
+	 * Sanity check the allocation size.
+	 */
+	size = PAGE_ALIGN(size);
+	limit = (mask + 1) & ~mask;
+	if ((limit && size >= limit) ||
+	    size >= (CONSISTENT_END - CONSISTENT_BASE)) {
+		printk(KERN_WARNING "coherent allocation too big "
+		       "(requested %#x mask %#llx)\n", size, mask);
+		goto no_page;
+	}
+
+	order = get_order(size);
+
+	if (mask != 0xffffffff)
+		gfp |= GFP_DMA;
+
+	page = alloc_pages(gfp, order);
+	if (!page)
+		goto no_page;
+
+	/*
+	 * Invalidate any data that might be lurking in the
+	 * kernel direct-mapped region for device DMA.
+	 */
+	{
+		unsigned long kaddr = (unsigned long)page_address(page);
+		memset(page_address(page), 0, size);
+		dmac_flush_range(kaddr, kaddr + size);
+	}
+
+	/*
+	 * Allocate a virtual address in the consistent mapping region.
+	 */
+	c = vm_region_alloc(&consistent_head, size,
+			    gfp & ~(__GFP_DMA | __GFP_HIGHMEM));
+	if (c) {
+		pte_t *pte = consistent_pte + CONSISTENT_OFFSET(c->vm_start);
+		struct page *end = page + (1 << order);
+
+		c->vm_pages = page;
+
+		/*
+		 * Set the "dma handle"
+		 */
+		*handle = page_to_dma(dev, page);
+
+		do {
+			BUG_ON(!pte_none(*pte));
+
+			set_page_count(page, 1);
+			/*
+			 * x86 does not mark the pages reserved...
+			 */
+			SetPageReserved(page);
+			set_pte(pte, mk_pte(page, prot));
+			page++;
+			pte++;
+		} while (size -= PAGE_SIZE);
+
+		/*
+		 * Free the otherwise unused pages.
+		 */
+		while (page < end) {
+			set_page_count(page, 1);
+			__free_page(page);
+			page++;
+		}
+
+		return (void *)c->vm_start;
+	}
+
+	if (page)
+		__free_pages(page, order);
+ no_page:
+	*handle = ~0;
+	return NULL;
+}
+
+/*
+ * Allocate DMA-coherent memory space and return both the kernel remapped
+ * virtual and bus address for that space.
+ */
+void *
+dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, int gfp)
+{
+	return __dma_alloc(dev, size, handle, gfp,
+			   pgprot_noncached(pgprot_kernel));
+}
+EXPORT_SYMBOL(dma_alloc_coherent);
+
+/*
+ * Allocate a writecombining region, in much the same way as
+ * dma_alloc_coherent above.
+ */
+void *
+dma_alloc_writecombine(struct device *dev, size_t size, dma_addr_t *handle, int gfp)
+{
+	return __dma_alloc(dev, size, handle, gfp,
+			   pgprot_writecombine(pgprot_kernel));
+}
+EXPORT_SYMBOL(dma_alloc_writecombine);
+
+static int dma_mmap(struct device *dev, struct vm_area_struct *vma,
+		    void *cpu_addr, dma_addr_t dma_addr, size_t size)
+{
+	unsigned long flags, user_size, kern_size;
+	struct vm_region *c;
+	int ret = -ENXIO;
+
+	user_size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+
+	spin_lock_irqsave(&consistent_lock, flags);
+	c = vm_region_find(&consistent_head, (unsigned long)cpu_addr);
+	spin_unlock_irqrestore(&consistent_lock, flags);
+
+	if (c) {
+		unsigned long off = vma->vm_pgoff;
+
+		kern_size = (c->vm_end - c->vm_start) >> PAGE_SHIFT;
+
+		if (off < kern_size &&
+		    user_size <= (kern_size - off)) {
+			vma->vm_flags |= VM_RESERVED;
+			ret = remap_pfn_range(vma, vma->vm_start,
+					      page_to_pfn(c->vm_pages) + off,
+					      user_size << PAGE_SHIFT,
+					      vma->vm_page_prot);
+		}
+	}
+
+	return ret;
+}
+
+int dma_mmap_coherent(struct device *dev, struct vm_area_struct *vma,
+		      void *cpu_addr, dma_addr_t dma_addr, size_t size)
+{
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	return dma_mmap(dev, vma, cpu_addr, dma_addr, size);
+}
+EXPORT_SYMBOL(dma_mmap_coherent);
+
+int dma_mmap_writecombine(struct device *dev, struct vm_area_struct *vma,
+			  void *cpu_addr, dma_addr_t dma_addr, size_t size)
+{
+	vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+	return dma_mmap(dev, vma, cpu_addr, dma_addr, size);
+}
+EXPORT_SYMBOL(dma_mmap_writecombine);
+
+/*
+ * free a page as defined by the above mapping.
+ */
+void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle)
+{
+	struct vm_region *c;
+	unsigned long flags, addr;
+	pte_t *ptep;
+
+	size = PAGE_ALIGN(size);
+
+	spin_lock_irqsave(&consistent_lock, flags);
+
+	c = vm_region_find(&consistent_head, (unsigned long)cpu_addr);
+	if (!c)
+		goto no_area;
+
+	if ((c->vm_end - c->vm_start) != size) {
+		printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n",
+		       __func__, c->vm_end - c->vm_start, size);
+		dump_stack();
+		size = c->vm_end - c->vm_start;
+	}
+
+	ptep = consistent_pte + CONSISTENT_OFFSET(c->vm_start);
+	addr = c->vm_start;
+	do {
+		pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep);
+		unsigned long pfn;
+
+		ptep++;
+		addr += PAGE_SIZE;
+
+		if (!pte_none(pte) && pte_present(pte)) {
+			pfn = pte_pfn(pte);
+
+			if (pfn_valid(pfn)) {
+				struct page *page = pfn_to_page(pfn);
+
+				/*
+				 * x86 does not mark the pages reserved...
+				 */
+				ClearPageReserved(page);
+
+				__free_page(page);
+				continue;
+			}
+		}
+
+		printk(KERN_CRIT "%s: bad page in kernel page table\n",
+		       __func__);
+	} while (size -= PAGE_SIZE);
+
+	flush_tlb_kernel_range(c->vm_start, c->vm_end);
+
+	list_del(&c->vm_list);
+
+	spin_unlock_irqrestore(&consistent_lock, flags);
+
+	kfree(c);
+	return;
+
+ no_area:
+	spin_unlock_irqrestore(&consistent_lock, flags);
+	printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n",
+	       __func__, cpu_addr);
+	dump_stack();
+}
+EXPORT_SYMBOL(dma_free_coherent);
+
+/*
+ * Initialise the consistent memory allocation.
+ */
+static int __init consistent_init(void)
+{
+	pgd_t *pgd;
+	pmd_t *pmd;
+	pte_t *pte;
+	int ret = 0;
+
+	spin_lock(&init_mm.page_table_lock);
+
+	do {
+		pgd = pgd_offset(&init_mm, CONSISTENT_BASE);
+		pmd = pmd_alloc(&init_mm, pgd, CONSISTENT_BASE);
+		if (!pmd) {
+			printk(KERN_ERR "%s: no pmd tables\n", __func__);
+			ret = -ENOMEM;
+			break;
+		}
+		WARN_ON(!pmd_none(*pmd));
+
+		pte = pte_alloc_kernel(&init_mm, pmd, CONSISTENT_BASE);
+		if (!pte) {
+			printk(KERN_ERR "%s: no pte tables\n", __func__);
+			ret = -ENOMEM;
+			break;
+		}
+
+		consistent_pte = pte;
+	} while (0);
+
+	spin_unlock(&init_mm.page_table_lock);
+
+	return ret;
+}
+
+core_initcall(consistent_init);
+
+/*
+ * Make an area consistent for devices.
+ */
+void consistent_sync(void *vaddr, size_t size, int direction)
+{
+	unsigned long start = (unsigned long)vaddr;
+	unsigned long end   = start + size;
+
+	switch (direction) {
+	case DMA_FROM_DEVICE:		/* invalidate only */
+		dmac_inv_range(start, end);
+		break;
+	case DMA_TO_DEVICE:		/* writeback only */
+		dmac_clean_range(start, end);
+		break;
+	case DMA_BIDIRECTIONAL:		/* writeback and invalidate */
+		dmac_flush_range(start, end);
+		break;
+	default:
+		BUG();
+	}
+}
+EXPORT_SYMBOL(consistent_sync);
--- a/arch/arm/mm/copypage-v3.S
+++ b/arch/arm/mm/copypage-v3.S
@@ -0,0 +1,67 @@
+/*
+ *  linux/arch/arm/lib/copypage.S
+ *
+ *  Copyright (C) 1995-1999 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  ASM optimised string functions
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/constants.h>
+
+		.text
+		.align	5
+/*
+ * ARMv3 optimised copy_user_page
+ *
+ * FIXME: do we need to handle cache stuff...
+ */
+ENTRY(v3_copy_user_page)
+	stmfd	sp!, {r4, lr}			@	2
+	mov	r2, #PAGE_SZ/64			@	1
+	ldmia	r1!, {r3, r4, ip, lr}		@	4+1
+1:	stmia	r0!, {r3, r4, ip, lr}		@	4
+	ldmia	r1!, {r3, r4, ip, lr}		@	4+1
+	stmia	r0!, {r3, r4, ip, lr}		@	4
+	ldmia	r1!, {r3, r4, ip, lr}		@	4+1
+	stmia	r0!, {r3, r4, ip, lr}		@	4
+	ldmia	r1!, {r3, r4, ip, lr}		@	4
+	subs	r2, r2, #1			@	1
+	stmia	r0!, {r3, r4, ip, lr}		@	4
+	ldmneia	r1!, {r3, r4, ip, lr}		@	4
+	bne	1b				@	1
+	LOADREGS(fd, sp!, {r4, pc})		@	3
+
+	.align	5
+/*
+ * ARMv3 optimised clear_user_page
+ *
+ * FIXME: do we need to handle cache stuff...
+ */
+ENTRY(v3_clear_user_page)
+	str	lr, [sp, #-4]!
+	mov	r1, #PAGE_SZ/64			@ 1
+	mov	r2, #0				@ 1
+	mov	r3, #0				@ 1
+	mov	ip, #0				@ 1
+	mov	lr, #0				@ 1
+1:	stmia	r0!, {r2, r3, ip, lr}		@ 4
+	stmia	r0!, {r2, r3, ip, lr}		@ 4
+	stmia	r0!, {r2, r3, ip, lr}		@ 4
+	stmia	r0!, {r2, r3, ip, lr}		@ 4
+	subs	r1, r1, #1			@ 1
+	bne	1b				@ 1
+	ldr	pc, [sp], #4
+
+	__INITDATA
+
+	.type	v3_user_fns, #object
+ENTRY(v3_user_fns)
+	.long	v3_clear_user_page
+	.long	v3_copy_user_page
+	.size	v3_user_fns, . - v3_user_fns
--- a/arch/arm/mm/copypage-v4mc.S
+++ b/arch/arm/mm/copypage-v4mc.S
@@ -0,0 +1,80 @@
+/*
+ *  linux/arch/arm/lib/copy_page-armv4mc.S
+ *
+ *  Copyright (C) 1995-2001 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  ASM optimised string functions
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/constants.h>
+
+	.text
+	.align	5
+/*
+ * ARMv4 mini-dcache optimised copy_user_page
+ *
+ * We flush the destination cache lines just before we write the data into the
+ * corresponding address.  Since the Dcache is read-allocate, this removes the
+ * Dcache aliasing issue.  The writes will be forwarded to the write buffer,
+ * and merged as appropriate.
+ *
+ * Note: We rely on all ARMv4 processors implementing the "invalidate D line"
+ * instruction.  If your processor does not supply this, you have to write your
+ * own copy_user_page that does the right thing.
+ */
+ENTRY(v4_mc_copy_user_page)
+	stmfd	sp!, {r4, lr}			@ 2
+	mov	r4, r0
+	mov	r0, r1
+	bl	map_page_minicache
+	mov	r1, #PAGE_SZ/64			@ 1
+	ldmia	r0!, {r2, r3, ip, lr}		@ 4
+1:	mcr	p15, 0, r4, c7, c6, 1		@ 1   invalidate D line
+	stmia	r4!, {r2, r3, ip, lr}		@ 4
+	ldmia	r0!, {r2, r3, ip, lr}		@ 4+1
+	stmia	r4!, {r2, r3, ip, lr}		@ 4
+	ldmia	r0!, {r2, r3, ip, lr}		@ 4
+	mcr	p15, 0, r4, c7, c6, 1		@ 1   invalidate D line
+	stmia	r4!, {r2, r3, ip, lr}		@ 4
+	ldmia	r0!, {r2, r3, ip, lr}		@ 4
+	subs	r1, r1, #1			@ 1
+	stmia	r4!, {r2, r3, ip, lr}		@ 4
+	ldmneia	r0!, {r2, r3, ip, lr}		@ 4
+	bne	1b				@ 1
+	ldmfd	sp!, {r4, pc}			@ 3
+
+	.align	5
+/*
+ * ARMv4 optimised clear_user_page
+ *
+ * Same story as above.
+ */
+ENTRY(v4_mc_clear_user_page)
+	str	lr, [sp, #-4]!
+	mov	r1, #PAGE_SZ/64			@ 1
+	mov	r2, #0				@ 1
+	mov	r3, #0				@ 1
+	mov	ip, #0				@ 1
+	mov	lr, #0				@ 1
+1:	mcr	p15, 0, r0, c7, c6, 1		@ 1   invalidate D line
+	stmia	r0!, {r2, r3, ip, lr}		@ 4
+	stmia	r0!, {r2, r3, ip, lr}		@ 4
+	mcr	p15, 0, r0, c7, c6, 1		@ 1   invalidate D line
+	stmia	r0!, {r2, r3, ip, lr}		@ 4
+	stmia	r0!, {r2, r3, ip, lr}		@ 4
+	subs	r1, r1, #1			@ 1
+	bne	1b				@ 1
+	ldr	pc, [sp], #4
+
+	__INITDATA
+
+	.type	v4_mc_user_fns, #object
+ENTRY(v4_mc_user_fns)
+	.long	v4_mc_clear_user_page
+	.long	v4_mc_copy_user_page
+	.size	v4_mc_user_fns, . - v4_mc_user_fns
--- a/arch/arm/mm/copypage-v4wb.S
+++ b/arch/arm/mm/copypage-v4wb.S
@@ -0,0 +1,79 @@
+/*
+ *  linux/arch/arm/lib/copypage.S
+ *
+ *  Copyright (C) 1995-1999 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  ASM optimised string functions
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/constants.h>
+
+	.text
+	.align	5
+/*
+ * ARMv4 optimised copy_user_page
+ *
+ * We flush the destination cache lines just before we write the data into the
+ * corresponding address.  Since the Dcache is read-allocate, this removes the
+ * Dcache aliasing issue.  The writes will be forwarded to the write buffer,
+ * and merged as appropriate.
+ *
+ * Note: We rely on all ARMv4 processors implementing the "invalidate D line"
+ * instruction.  If your processor does not supply this, you have to write your
+ * own copy_user_page that does the right thing.
+ */
+ENTRY(v4wb_copy_user_page)
+	stmfd	sp!, {r4, lr}			@ 2
+	mov	r2, #PAGE_SZ/64			@ 1
+	ldmia	r1!, {r3, r4, ip, lr}		@ 4
+1:	mcr	p15, 0, r0, c7, c6, 1		@ 1   invalidate D line
+	stmia	r0!, {r3, r4, ip, lr}		@ 4
+	ldmia	r1!, {r3, r4, ip, lr}		@ 4+1
+	stmia	r0!, {r3, r4, ip, lr}		@ 4
+	ldmia	r1!, {r3, r4, ip, lr}		@ 4
+	mcr	p15, 0, r0, c7, c6, 1		@ 1   invalidate D line
+	stmia	r0!, {r3, r4, ip, lr}		@ 4
+	ldmia	r1!, {r3, r4, ip, lr}		@ 4
+	subs	r2, r2, #1			@ 1
+	stmia	r0!, {r3, r4, ip, lr}		@ 4
+	ldmneia	r1!, {r3, r4, ip, lr}		@ 4
+	bne	1b				@ 1
+	mcr	p15, 0, r1, c7, c10, 4		@ 1   drain WB
+	ldmfd	 sp!, {r4, pc}			@ 3
+
+	.align	5
+/*
+ * ARMv4 optimised clear_user_page
+ *
+ * Same story as above.
+ */
+ENTRY(v4wb_clear_user_page)
+	str	lr, [sp, #-4]!
+	mov	r1, #PAGE_SZ/64			@ 1
+	mov	r2, #0				@ 1
+	mov	r3, #0				@ 1
+	mov	ip, #0				@ 1
+	mov	lr, #0				@ 1
+1:	mcr	p15, 0, r0, c7, c6, 1		@ 1   invalidate D line
+	stmia	r0!, {r2, r3, ip, lr}		@ 4
+	stmia	r0!, {r2, r3, ip, lr}		@ 4
+	mcr	p15, 0, r0, c7, c6, 1		@ 1   invalidate D line
+	stmia	r0!, {r2, r3, ip, lr}		@ 4
+	stmia	r0!, {r2, r3, ip, lr}		@ 4
+	subs	r1, r1, #1			@ 1
+	bne	1b				@ 1
+	mcr	p15, 0, r1, c7, c10, 4		@ 1   drain WB
+	ldr	pc, [sp], #4
+
+	__INITDATA
+
+	.type	v4wb_user_fns, #object
+ENTRY(v4wb_user_fns)
+	.long	v4wb_clear_user_page
+	.long	v4wb_copy_user_page
+	.size	v4wb_user_fns, . - v4wb_user_fns
--- a/arch/arm/mm/copypage-v4wt.S
+++ b/arch/arm/mm/copypage-v4wt.S
@@ -0,0 +1,73 @@
+/*
+ *  linux/arch/arm/lib/copypage-v4.S
+ *
+ *  Copyright (C) 1995-1999 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  ASM optimised string functions
+ *
+ *  This is for CPUs with a writethrough cache and 'flush ID cache' is
+ *  the only supported cache operation.
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/constants.h>
+
+	.text
+	.align	5
+/*
+ * ARMv4 optimised copy_user_page
+ *
+ * Since we have writethrough caches, we don't have to worry about
+ * dirty data in the cache.  However, we do have to ensure that
+ * subsequent reads are up to date.
+ */
+ENTRY(v4wt_copy_user_page)
+	stmfd	sp!, {r4, lr}			@ 2
+	mov	r2, #PAGE_SZ/64			@ 1
+	ldmia	r1!, {r3, r4, ip, lr}		@ 4
+1:	stmia	r0!, {r3, r4, ip, lr}		@ 4
+	ldmia	r1!, {r3, r4, ip, lr}		@ 4+1
+	stmia	r0!, {r3, r4, ip, lr}		@ 4
+	ldmia	r1!, {r3, r4, ip, lr}		@ 4
+	stmia	r0!, {r3, r4, ip, lr}		@ 4
+	ldmia	r1!, {r3, r4, ip, lr}		@ 4
+	subs	r2, r2, #1			@ 1
+	stmia	r0!, {r3, r4, ip, lr}		@ 4
+	ldmneia	r1!, {r3, r4, ip, lr}		@ 4
+	bne	1b				@ 1
+	mcr	p15, 0, r2, c7, c7, 0		@ flush ID cache
+	ldmfd	sp!, {r4, pc}			@ 3
+
+	.align	5
+/*
+ * ARMv4 optimised clear_user_page
+ *
+ * Same story as above.
+ */
+ENTRY(v4wt_clear_user_page)
+	str	lr, [sp, #-4]!
+	mov	r1, #PAGE_SZ/64			@ 1
+	mov	r2, #0				@ 1
+	mov	r3, #0				@ 1
+	mov	ip, #0				@ 1
+	mov	lr, #0				@ 1
+1:	stmia	r0!, {r2, r3, ip, lr}		@ 4
+	stmia	r0!, {r2, r3, ip, lr}		@ 4
+	stmia	r0!, {r2, r3, ip, lr}		@ 4
+	stmia	r0!, {r2, r3, ip, lr}		@ 4
+	subs	r1, r1, #1			@ 1
+	bne	1b				@ 1
+	mcr	p15, 0, r2, c7, c7, 0		@ flush ID cache
+	ldr	pc, [sp], #4
+
+	__INITDATA
+
+	.type	v4wt_user_fns, #object
+ENTRY(v4wt_user_fns)
+	.long	v4wt_clear_user_page
+	.long	v4wt_copy_user_page
+	.size	v4wt_user_fns, . - v4wt_user_fns
--- a/arch/arm/mm/copypage-v6.c
+++ b/arch/arm/mm/copypage-v6.c
@@ -0,0 +1,155 @@
+/*
+ *  linux/arch/arm/mm/copypage-v6.c
+ *
+ *  Copyright (C) 2002 Deep Blue Solutions Ltd, All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/shmparam.h>
+#include <asm/tlbflush.h>
+#include <asm/cacheflush.h>
+
+#if SHMLBA > 16384
+#error FIX ME
+#endif
+
+#define from_address	(0xffff8000)
+#define from_pgprot	PAGE_KERNEL
+#define to_address	(0xffffc000)
+#define to_pgprot	PAGE_KERNEL
+
+static pte_t *from_pte;
+static pte_t *to_pte;
+static DEFINE_SPINLOCK(v6_lock);
+
+#define DCACHE_COLOUR(vaddr) ((vaddr & (SHMLBA - 1)) >> PAGE_SHIFT)
+
+/*
+ * Copy the user page.  No aliasing to deal with so we can just
+ * attack the kernel's existing mapping of these pages.
+ */
+void v6_copy_user_page_nonaliasing(void *kto, const void *kfrom, unsigned long vaddr)
+{
+	copy_page(kto, kfrom);
+}
+
+/*
+ * Clear the user page.  No aliasing to deal with so we can just
+ * attack the kernel's existing mapping of this page.
+ */
+void v6_clear_user_page_nonaliasing(void *kaddr, unsigned long vaddr)
+{
+	clear_page(kaddr);
+}
+
+/*
+ * Copy the page, taking account of the cache colour.
+ */
+void v6_copy_user_page_aliasing(void *kto, const void *kfrom, unsigned long vaddr)
+{
+	unsigned int offset = DCACHE_COLOUR(vaddr);
+	unsigned long from, to;
+
+	/*
+	 * Discard data in the kernel mapping for the new page.
+	 * FIXME: needs this MCRR to be supported.
+	 */
+	__asm__("mcrr	p15, 0, %1, %0, c6	@ 0xec401f06"
+	   :
+	   : "r" (kto),
+	     "r" ((unsigned long)kto + PAGE_SIZE - L1_CACHE_BYTES)
+	   : "cc");
+
+	/*
+	 * Now copy the page using the same cache colour as the
+	 * pages ultimate destination.
+	 */
+	spin_lock(&v6_lock);
+
+	set_pte(from_pte + offset, pfn_pte(__pa(kfrom) >> PAGE_SHIFT, from_pgprot));
+	set_pte(to_pte + offset, pfn_pte(__pa(kto) >> PAGE_SHIFT, to_pgprot));
+
+	from = from_address + (offset << PAGE_SHIFT);
+	to   = to_address + (offset << PAGE_SHIFT);
+
+	flush_tlb_kernel_page(from);
+	flush_tlb_kernel_page(to);
+
+	copy_page((void *)to, (void *)from);
+
+	spin_unlock(&v6_lock);
+}
+
+/*
+ * Clear the user page.  We need to deal with the aliasing issues,
+ * so remap the kernel page into the same cache colour as the user
+ * page.
+ */
+void v6_clear_user_page_aliasing(void *kaddr, unsigned long vaddr)
+{
+	unsigned int offset = DCACHE_COLOUR(vaddr);
+	unsigned long to = to_address + (offset << PAGE_SHIFT);
+
+	/*
+	 * Discard data in the kernel mapping for the new page
+	 * FIXME: needs this MCRR to be supported.
+	 */
+	__asm__("mcrr	p15, 0, %1, %0, c6	@ 0xec401f06"
+	   :
+	   : "r" (kaddr),
+	     "r" ((unsigned long)kaddr + PAGE_SIZE - L1_CACHE_BYTES)
+	   : "cc");
+
+	/*
+	 * Now clear the page using the same cache colour as
+	 * the pages ultimate destination.
+	 */
+	spin_lock(&v6_lock);
+
+	set_pte(to_pte + offset, pfn_pte(__pa(kaddr) >> PAGE_SHIFT, to_pgprot));
+	flush_tlb_kernel_page(to);
+	clear_page((void *)to);
+
+	spin_unlock(&v6_lock);
+}
+
+struct cpu_user_fns v6_user_fns __initdata = {
+	.cpu_clear_user_page	= v6_clear_user_page_nonaliasing,
+	.cpu_copy_user_page	= v6_copy_user_page_nonaliasing,
+};
+
+static int __init v6_userpage_init(void)
+{
+	if (cache_is_vipt_aliasing()) {
+		pgd_t *pgd;
+		pmd_t *pmd;
+
+		pgd = pgd_offset_k(from_address);
+		pmd = pmd_alloc(&init_mm, pgd, from_address);
+		if (!pmd)
+			BUG();
+		from_pte = pte_alloc_kernel(&init_mm, pmd, from_address);
+		if (!from_pte)
+			BUG();
+
+		to_pte = pte_alloc_kernel(&init_mm, pmd, to_address);
+		if (!to_pte)
+			BUG();
+
+		cpu_user.cpu_clear_user_page = v6_clear_user_page_aliasing;
+		cpu_user.cpu_copy_user_page = v6_copy_user_page_aliasing;
+	}
+
+	return 0;
+}
+
+__initcall(v6_userpage_init);
+
--- a/arch/arm/mm/copypage-xscale.S
+++ b/arch/arm/mm/copypage-xscale.S
@@ -0,0 +1,113 @@
+/*
+ *  linux/arch/arm/lib/copypage-xscale.S
+ *
+ *  Copyright (C) 2001 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/constants.h>
+
+/*
+ * General note:
+ *  We don't really want write-allocate cache behaviour for these functions
+ *  since that will just eat through 8K of the cache.
+ */
+
+	.text
+	.align	5
+/*
+ * XScale optimised copy_user_page
+ *  r0 = destination
+ *  r1 = source
+ *  r2 = virtual user address of ultimate destination page
+ *
+ * The source page may have some clean entries in the cache already, but we
+ * can safely ignore them - break_cow() will flush them out of the cache
+ * if we eventually end up using our copied page.
+ *
+ * What we could do is use the mini-cache to buffer reads from the source
+ * page.  We rely on the mini-cache being smaller than one page, so we'll
+ * cycle through the complete cache anyway.
+ */
+ENTRY(xscale_mc_copy_user_page)
+	stmfd	sp!, {r4, r5, lr}
+	mov	r5, r0
+	mov	r0, r1
+	bl	map_page_minicache
+	mov	r1, r5
+	mov	lr, #PAGE_SZ/64-1
+
+	/*
+	 * Strangely enough, best performance is achieved
+	 * when prefetching destination as well.  (NP)
+	 */
+	pld	[r0, #0]
+	pld	[r0, #32]
+	pld	[r1, #0]
+	pld	[r1, #32]
+
+1:	pld	[r0, #64]
+	pld	[r0, #96]
+	pld	[r1, #64]
+	pld	[r1, #96]
+
+2:	ldrd	r2, [r0], #8
+	ldrd	r4, [r0], #8
+	mov	ip, r1
+	strd	r2, [r1], #8
+	ldrd	r2, [r0], #8
+	strd	r4, [r1], #8
+	ldrd	r4, [r0], #8
+	strd	r2, [r1], #8
+	strd	r4, [r1], #8
+	mcr	p15, 0, ip, c7, c10, 1		@ clean D line
+	ldrd	r2, [r0], #8
+	mcr	p15, 0, ip, c7, c6, 1		@ invalidate D line
+	ldrd	r4, [r0], #8
+	mov	ip, r1
+	strd	r2, [r1], #8
+	ldrd	r2, [r0], #8
+	strd	r4, [r1], #8
+	ldrd	r4, [r0], #8
+	strd	r2, [r1], #8
+	strd	r4, [r1], #8
+	mcr	p15, 0, ip, c7, c10, 1		@ clean D line
+	subs	lr, lr, #1
+	mcr	p15, 0, ip, c7, c6, 1		@ invalidate D line
+	bgt	1b
+	beq	2b
+
+	ldmfd	sp!, {r4, r5, pc}
+
+	.align	5
+/*
+ * XScale optimised clear_user_page
+ *  r0 = destination
+ *  r1 = virtual user address of ultimate destination page
+ */
+ENTRY(xscale_mc_clear_user_page)
+	mov	r1, #PAGE_SZ/32
+	mov	r2, #0
+	mov	r3, #0
+1:	mov	ip, r0
+	strd	r2, [r0], #8
+	strd	r2, [r0], #8
+	strd	r2, [r0], #8
+	strd	r2, [r0], #8
+	mcr	p15, 0, ip, c7, c10, 1		@ clean D line
+	subs	r1, r1, #1
+	mcr	p15, 0, ip, c7, c6, 1		@ invalidate D line
+	bne	1b
+	mov	pc, lr
+
+	__INITDATA
+
+	.type	xscale_mc_user_fns, #object
+ENTRY(xscale_mc_user_fns)
+	.long	xscale_mc_clear_user_page
+	.long	xscale_mc_copy_user_page
+	.size	xscale_mc_user_fns, . - xscale_mc_user_fns
--- a/arch/arm/mm/discontig.c
+++ b/arch/arm/mm/discontig.c
@@ -0,0 +1,49 @@
+/*
+ * linux/arch/arm/mm/discontig.c
+ *
+ * Discontiguous memory support.
+ *
+ * Initial code: Copyright (C) 1999-2000 Nicolas Pitre
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+
+#if MAX_NUMNODES != 4 && MAX_NUMNODES != 16
+# error Fix Me Please
+#endif
+
+/*
+ * Our node_data structure for discontiguous memory.
+ */
+
+static bootmem_data_t node_bootmem_data[MAX_NUMNODES];
+
+pg_data_t discontig_node_data[MAX_NUMNODES] = {
+  { .bdata = &node_bootmem_data[0] },
+  { .bdata = &node_bootmem_data[1] },
+  { .bdata = &node_bootmem_data[2] },
+  { .bdata = &node_bootmem_data[3] },
+#if MAX_NUMNODES == 16
+  { .bdata = &node_bootmem_data[4] },
+  { .bdata = &node_bootmem_data[5] },
+  { .bdata = &node_bootmem_data[6] },
+  { .bdata = &node_bootmem_data[7] },
+  { .bdata = &node_bootmem_data[8] },
+  { .bdata = &node_bootmem_data[9] },
+  { .bdata = &node_bootmem_data[10] },
+  { .bdata = &node_bootmem_data[11] },
+  { .bdata = &node_bootmem_data[12] },
+  { .bdata = &node_bootmem_data[13] },
+  { .bdata = &node_bootmem_data[14] },
+  { .bdata = &node_bootmem_data[15] },
+#endif
+};
+
+EXPORT_SYMBOL(discontig_node_data);
--- a/arch/arm/mm/extable.c
+++ b/arch/arm/mm/extable.c
@@ -0,0 +1,16 @@
+/*
+ *  linux/arch/arm/mm/extable.c
+ */
+#include <linux/module.h>
+#include <asm/uaccess.h>
+
+int fixup_exception(struct pt_regs *regs)
+{
+	const struct exception_table_entry *fixup;
+
+	fixup = search_exception_tables(instruction_pointer(regs));
+	if (fixup)
+		regs->ARM_pc = fixup->fixup;
+
+	return fixup != NULL;
+}
--- a/arch/arm/mm/fault-armv.c
+++ b/arch/arm/mm/fault-armv.c
@@ -0,0 +1,223 @@
+/*
+ *  linux/arch/arm/mm/fault-armv.c
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ *  Modifications for ARM processor (c) 1995-2002 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/bitops.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/pagemap.h>
+
+#include <asm/cacheflush.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+
+static unsigned long shared_pte_mask = L_PTE_CACHEABLE;
+
+/*
+ * We take the easy way out of this problem - we make the
+ * PTE uncacheable.  However, we leave the write buffer on.
+ */
+static int adjust_pte(struct vm_area_struct *vma, unsigned long address)
+{
+	pgd_t *pgd;
+	pmd_t *pmd;
+	pte_t *pte, entry;
+	int ret = 0;
+
+	pgd = pgd_offset(vma->vm_mm, address);
+	if (pgd_none(*pgd))
+		goto no_pgd;
+	if (pgd_bad(*pgd))
+		goto bad_pgd;
+
+	pmd = pmd_offset(pgd, address);
+	if (pmd_none(*pmd))
+		goto no_pmd;
+	if (pmd_bad(*pmd))
+		goto bad_pmd;
+
+	pte = pte_offset_map(pmd, address);
+	entry = *pte;
+
+	/*
+	 * If this page isn't present, or is already setup to
+	 * fault (ie, is old), we can safely ignore any issues.
+	 */
+	if (pte_present(entry) && pte_val(entry) & shared_pte_mask) {
+		flush_cache_page(vma, address, pte_pfn(entry));
+		pte_val(entry) &= ~shared_pte_mask;
+		set_pte(pte, entry);
+		flush_tlb_page(vma, address);
+		ret = 1;
+	}
+	pte_unmap(pte);
+	return ret;
+
+bad_pgd:
+	pgd_ERROR(*pgd);
+	pgd_clear(pgd);
+no_pgd:
+	return 0;
+
+bad_pmd:
+	pmd_ERROR(*pmd);
+	pmd_clear(pmd);
+no_pmd:
+	return 0;
+}
+
+static void
+make_coherent(struct vm_area_struct *vma, unsigned long addr, struct page *page, int dirty)
+{
+	struct address_space *mapping = page_mapping(page);
+	struct mm_struct *mm = vma->vm_mm;
+	struct vm_area_struct *mpnt;
+	struct prio_tree_iter iter;
+	unsigned long offset;
+	pgoff_t pgoff;
+	int aliases = 0;
+
+	if (!mapping)
+		return;
+
+	pgoff = vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT);
+
+	/*
+	 * If we have any shared mappings that are in the same mm
+	 * space, then we need to handle them specially to maintain
+	 * cache coherency.
+	 */
+	flush_dcache_mmap_lock(mapping);
+	vma_prio_tree_foreach(mpnt, &iter, &mapping->i_mmap, pgoff, pgoff) {
+		/*
+		 * If this VMA is not in our MM, we can ignore it.
+		 * Note that we intentionally mask out the VMA
+		 * that we are fixing up.
+		 */
+		if (mpnt->vm_mm != mm || mpnt == vma)
+			continue;
+		if (!(mpnt->vm_flags & VM_MAYSHARE))
+			continue;
+		offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT;
+		aliases += adjust_pte(mpnt, mpnt->vm_start + offset);
+	}
+	flush_dcache_mmap_unlock(mapping);
+	if (aliases)
+		adjust_pte(vma, addr);
+	else
+		flush_cache_page(vma, addr, page_to_pfn(page));
+}
+
+/*
+ * Take care of architecture specific things when placing a new PTE into
+ * a page table, or changing an existing PTE.  Basically, there are two
+ * things that we need to take care of:
+ *
+ *  1. If PG_dcache_dirty is set for the page, we need to ensure
+ *     that any cache entries for the kernels virtual memory
+ *     range are written back to the page.
+ *  2. If we have multiple shared mappings of the same space in
+ *     an object, we need to deal with the cache aliasing issues.
+ *
+ * Note that the page_table_lock will be held.
+ */
+void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, pte_t pte)
+{
+	unsigned long pfn = pte_pfn(pte);
+	struct page *page;
+
+	if (!pfn_valid(pfn))
+		return;
+	page = pfn_to_page(pfn);
+	if (page_mapping(page)) {
+		int dirty = test_and_clear_bit(PG_dcache_dirty, &page->flags);
+
+		if (dirty) {
+			/*
+			 * This is our first userspace mapping of this page.
+			 * Ensure that the physical page is coherent with
+			 * the kernel mapping.
+			 *
+			 * FIXME: only need to do this on VIVT and aliasing
+			 *        VIPT cache architectures.  We can do that
+			 *	  by choosing whether to set this bit...
+			 */
+			__cpuc_flush_dcache_page(page_address(page));
+		}
+
+		if (cache_is_vivt())
+			make_coherent(vma, addr, page, dirty);
+	}
+}
+
+/*
+ * Check whether the write buffer has physical address aliasing
+ * issues.  If it has, we need to avoid them for the case where
+ * we have several shared mappings of the same object in user
+ * space.
+ */
+static int __init check_writebuffer(unsigned long *p1, unsigned long *p2)
+{
+	register unsigned long zero = 0, one = 1, val;
+
+	local_irq_disable();
+	mb();
+	*p1 = one;
+	mb();
+	*p2 = zero;
+	mb();
+	val = *p1;
+	mb();
+	local_irq_enable();
+	return val != zero;
+}
+
+void __init check_writebuffer_bugs(void)
+{
+	struct page *page;
+	const char *reason;
+	unsigned long v = 1;
+
+	printk(KERN_INFO "CPU: Testing write buffer coherency: ");
+
+	page = alloc_page(GFP_KERNEL);
+	if (page) {
+		unsigned long *p1, *p2;
+		pgprot_t prot = __pgprot(L_PTE_PRESENT|L_PTE_YOUNG|
+					 L_PTE_DIRTY|L_PTE_WRITE|
+					 L_PTE_BUFFERABLE);
+
+		p1 = vmap(&page, 1, VM_IOREMAP, prot);
+		p2 = vmap(&page, 1, VM_IOREMAP, prot);
+
+		if (p1 && p2) {
+			v = check_writebuffer(p1, p2);
+			reason = "enabling work-around";
+		} else {
+			reason = "unable to map memory\n";
+		}
+
+		vunmap(p1);
+		vunmap(p2);
+		put_page(page);
+	} else {
+		reason = "unable to grab page\n";
+	}
+
+	if (v) {
+		printk("failed, %s\n", reason);
+		shared_pte_mask |= L_PTE_BUFFERABLE;
+	} else {
+		printk("ok\n");
+	}
+}
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -0,0 +1,462 @@
+/*
+ *  linux/arch/arm/mm/fault.c
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ *  Modifications for ARM processor (c) 1995-2004 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/signal.h>
+#include <linux/ptrace.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+
+#include <asm/system.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+#include <asm/uaccess.h>
+
+#include "fault.h"
+
+/*
+ * This is useful to dump out the page tables associated with
+ * 'addr' in mm 'mm'.
+ */
+void show_pte(struct mm_struct *mm, unsigned long addr)
+{
+	pgd_t *pgd;
+
+	if (!mm)
+		mm = &init_mm;
+
+	printk(KERN_ALERT "pgd = %p\n", mm->pgd);
+	pgd = pgd_offset(mm, addr);
+	printk(KERN_ALERT "[%08lx] *pgd=%08lx", addr, pgd_val(*pgd));
+
+	do {
+		pmd_t *pmd;
+		pte_t *pte;
+
+		if (pgd_none(*pgd))
+			break;
+
+		if (pgd_bad(*pgd)) {
+			printk("(bad)");
+			break;
+		}
+
+		pmd = pmd_offset(pgd, addr);
+#if PTRS_PER_PMD != 1
+		printk(", *pmd=%08lx", pmd_val(*pmd));
+#endif
+
+		if (pmd_none(*pmd))
+			break;
+
+		if (pmd_bad(*pmd)) {
+			printk("(bad)");
+			break;
+		}
+
+#ifndef CONFIG_HIGHMEM
+		/* We must not map this if we have highmem enabled */
+		pte = pte_offset_map(pmd, addr);
+		printk(", *pte=%08lx", pte_val(*pte));
+		printk(", *ppte=%08lx", pte_val(pte[-PTRS_PER_PTE]));
+		pte_unmap(pte);
+#endif
+	} while(0);
+
+	printk("\n");
+}
+
+/*
+ * Oops.  The kernel tried to access some page that wasn't present.
+ */
+static void
+__do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
+		  struct pt_regs *regs)
+{
+	/*
+	 * Are we prepared to handle this kernel fault?
+	 */
+	if (fixup_exception(regs))
+		return;
+
+	/*
+	 * No handler, we'll have to terminate things with extreme prejudice.
+	 */
+	bust_spinlocks(1);
+	printk(KERN_ALERT
+		"Unable to handle kernel %s at virtual address %08lx\n",
+		(addr < PAGE_SIZE) ? "NULL pointer dereference" :
+		"paging request", addr);
+
+	show_pte(mm, addr);
+	die("Oops", regs, fsr);
+	bust_spinlocks(0);
+	do_exit(SIGKILL);
+}
+
+/*
+ * Something tried to access memory that isn't in our memory map..
+ * User mode accesses just cause a SIGSEGV
+ */
+static void
+__do_user_fault(struct task_struct *tsk, unsigned long addr,
+		unsigned int fsr, int code, struct pt_regs *regs)
+{
+	struct siginfo si;
+
+#ifdef CONFIG_DEBUG_USER
+	if (user_debug & UDBG_SEGV) {
+		printk(KERN_DEBUG "%s: unhandled page fault at 0x%08lx, code 0x%03x\n",
+		       tsk->comm, addr, fsr);
+		show_pte(tsk->mm, addr);
+		show_regs(regs);
+	}
+#endif
+
+	tsk->thread.address = addr;
+	tsk->thread.error_code = fsr;
+	tsk->thread.trap_no = 14;
+	si.si_signo = SIGSEGV;
+	si.si_errno = 0;
+	si.si_code = code;
+	si.si_addr = (void __user *)addr;
+	force_sig_info(SIGSEGV, &si, tsk);
+}
+
+void
+do_bad_area(struct task_struct *tsk, struct mm_struct *mm, unsigned long addr,
+	    unsigned int fsr, struct pt_regs *regs)
+{
+	/*
+	 * If we are in kernel mode at this point, we
+	 * have no context to handle this fault with.
+	 */
+	if (user_mode(regs))
+		__do_user_fault(tsk, addr, fsr, SEGV_MAPERR, regs);
+	else
+		__do_kernel_fault(mm, addr, fsr, regs);
+}
+
+#define VM_FAULT_BADMAP		(-20)
+#define VM_FAULT_BADACCESS	(-21)
+
+static int
+__do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
+		struct task_struct *tsk)
+{
+	struct vm_area_struct *vma;
+	int fault, mask;
+
+	vma = find_vma(mm, addr);
+	fault = VM_FAULT_BADMAP;
+	if (!vma)
+		goto out;
+	if (vma->vm_start > addr)
+		goto check_stack;
+
+	/*
+	 * Ok, we have a good vm_area for this
+	 * memory access, so we can handle it.
+	 */
+good_area:
+	if (fsr & (1 << 11)) /* write? */
+		mask = VM_WRITE;
+	else
+		mask = VM_READ|VM_EXEC;
+
+	fault = VM_FAULT_BADACCESS;
+	if (!(vma->vm_flags & mask))
+		goto out;
+
+	/*
+	 * If for any reason at all we couldn't handle
+	 * the fault, make sure we exit gracefully rather
+	 * than endlessly redo the fault.
+	 */
+survive:
+	fault = handle_mm_fault(mm, vma, addr & PAGE_MASK, fsr & (1 << 11));
+
+	/*
+	 * Handle the "normal" cases first - successful and sigbus
+	 */
+	switch (fault) {
+	case VM_FAULT_MAJOR:
+		tsk->maj_flt++;
+		return fault;
+	case VM_FAULT_MINOR:
+		tsk->min_flt++;
+	case VM_FAULT_SIGBUS:
+		return fault;
+	}
+
+	if (tsk->pid != 1)
+		goto out;
+
+	/*
+	 * If we are out of memory for pid1,
+	 * sleep for a while and retry
+	 */
+	yield();
+	goto survive;
+
+check_stack:
+	if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr))
+		goto good_area;
+out:
+	return fault;
+}
+
+static int
+do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
+{
+	struct task_struct *tsk;
+	struct mm_struct *mm;
+	int fault;
+
+	tsk = current;
+	mm  = tsk->mm;
+
+	/*
+	 * If we're in an interrupt or have no user
+	 * context, we must not take the fault..
+	 */
+	if (in_interrupt() || !mm)
+		goto no_context;
+
+	down_read(&mm->mmap_sem);
+	fault = __do_page_fault(mm, addr, fsr, tsk);
+	up_read(&mm->mmap_sem);
+
+	/*
+	 * Handle the "normal" case first
+	 */
+	if (fault > 0)
+		return 0;
+
+	/*
+	 * We had some memory, but were unable to
+	 * successfully fix up this page fault.
+	 */
+	if (fault == 0)
+		goto do_sigbus;
+
+	/*
+	 * If we are in kernel mode at this point, we
+	 * have no context to handle this fault with.
+	 */
+	if (!user_mode(regs))
+		goto no_context;
+
+	if (fault == VM_FAULT_OOM) {
+		/*
+		 * We ran out of memory, or some other thing happened to
+		 * us that made us unable to handle the page fault gracefully.
+		 */
+		printk("VM: killing process %s\n", tsk->comm);
+		do_exit(SIGKILL);
+	} else
+		__do_user_fault(tsk, addr, fsr, fault == VM_FAULT_BADACCESS ?
+				SEGV_ACCERR : SEGV_MAPERR, regs);
+	return 0;
+
+
+/*
+ * We ran out of memory, or some other thing happened to us that made
+ * us unable to handle the page fault gracefully.
+ */
+do_sigbus:
+	/*
+	 * Send a sigbus, regardless of whether we were in kernel
+	 * or user mode.
+	 */
+	tsk->thread.address = addr;
+	tsk->thread.error_code = fsr;
+	tsk->thread.trap_no = 14;
+	force_sig(SIGBUS, tsk);
+#ifdef CONFIG_DEBUG_USER
+	if (user_debug & UDBG_BUS) {
+		printk(KERN_DEBUG "%s: sigbus at 0x%08lx, pc=0x%08lx\n",
+			current->comm, addr, instruction_pointer(regs));
+	}
+#endif
+
+	/* Kernel mode? Handle exceptions or die */
+	if (user_mode(regs))
+		return 0;
+
+no_context:
+	__do_kernel_fault(mm, addr, fsr, regs);
+	return 0;
+}
+
+/*
+ * First Level Translation Fault Handler
+ *
+ * We enter here because the first level page table doesn't contain
+ * a valid entry for the address.
+ *
+ * If the address is in kernel space (>= TASK_SIZE), then we are
+ * probably faulting in the vmalloc() area.
+ *
+ * If the init_task's first level page tables contains the relevant
+ * entry, we copy the it to this task.  If not, we send the process
+ * a signal, fixup the exception, or oops the kernel.
+ *
+ * NOTE! We MUST NOT take any locks for this case. We may be in an
+ * interrupt or a critical region, and should only copy the information
+ * from the master page table, nothing more.
+ */
+static int
+do_translation_fault(unsigned long addr, unsigned int fsr,
+		     struct pt_regs *regs)
+{
+	struct task_struct *tsk;
+	unsigned int index;
+	pgd_t *pgd, *pgd_k;
+	pmd_t *pmd, *pmd_k;
+
+	if (addr < TASK_SIZE)
+		return do_page_fault(addr, fsr, regs);
+
+	index = pgd_index(addr);
+
+	/*
+	 * FIXME: CP15 C1 is write only on ARMv3 architectures.
+	 */
+	pgd = cpu_get_pgd() + index;
+	pgd_k = init_mm.pgd + index;
+
+	if (pgd_none(*pgd_k))
+		goto bad_area;
+
+	if (!pgd_present(*pgd))
+		set_pgd(pgd, *pgd_k);
+
+	pmd_k = pmd_offset(pgd_k, addr);
+	pmd   = pmd_offset(pgd, addr);
+
+	if (pmd_none(*pmd_k))
+		goto bad_area;
+
+	copy_pmd(pmd, pmd_k);
+	return 0;
+
+bad_area:
+	tsk = current;
+
+	do_bad_area(tsk, tsk->active_mm, addr, fsr, regs);
+	return 0;
+}
+
+/*
+ * Some section permission faults need to be handled gracefully.
+ * They can happen due to a __{get,put}_user during an oops.
+ */
+static int
+do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
+{
+	struct task_struct *tsk = current;
+	do_bad_area(tsk, tsk->active_mm, addr, fsr, regs);
+	return 0;
+}
+
+/*
+ * This abort handler always returns "fault".
+ */
+static int
+do_bad(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
+{
+	return 1;
+}
+
+static struct fsr_info {
+	int	(*fn)(unsigned long addr, unsigned int fsr, struct pt_regs *regs);
+	int	sig;
+	const char *name;
+} fsr_info[] = {
+	/*
+	 * The following are the standard ARMv3 and ARMv4 aborts.  ARMv5
+	 * defines these to be "precise" aborts.
+	 */
+	{ do_bad,		SIGSEGV, "vector exception"		   },
+	{ do_bad,		SIGILL,	 "alignment exception"		   },
+	{ do_bad,		SIGKILL, "terminal exception"		   },
+	{ do_bad,		SIGILL,	 "alignment exception"		   },
+	{ do_bad,		SIGBUS,	 "external abort on linefetch"	   },
+	{ do_translation_fault,	SIGSEGV, "section translation fault"	   },
+	{ do_bad,		SIGBUS,	 "external abort on linefetch"	   },
+	{ do_page_fault,	SIGSEGV, "page translation fault"	   },
+	{ do_bad,		SIGBUS,	 "external abort on non-linefetch" },
+	{ do_bad,		SIGSEGV, "section domain fault"		   },
+	{ do_bad,		SIGBUS,	 "external abort on non-linefetch" },
+	{ do_bad,		SIGSEGV, "page domain fault"		   },
+	{ do_bad,		SIGBUS,	 "external abort on translation"   },
+	{ do_sect_fault,	SIGSEGV, "section permission fault"	   },
+	{ do_bad,		SIGBUS,	 "external abort on translation"   },
+	{ do_page_fault,	SIGSEGV, "page permission fault"	   },
+	/*
+	 * The following are "imprecise" aborts, which are signalled by bit
+	 * 10 of the FSR, and may not be recoverable.  These are only
+	 * supported if the CPU abort handler supports bit 10.
+	 */
+	{ do_bad,		SIGBUS,  "unknown 16"			   },
+	{ do_bad,		SIGBUS,  "unknown 17"			   },
+	{ do_bad,		SIGBUS,  "unknown 18"			   },
+	{ do_bad,		SIGBUS,  "unknown 19"			   },
+	{ do_bad,		SIGBUS,  "lock abort"			   }, /* xscale */
+	{ do_bad,		SIGBUS,  "unknown 21"			   },
+	{ do_bad,		SIGBUS,  "imprecise external abort"	   }, /* xscale */
+	{ do_bad,		SIGBUS,  "unknown 23"			   },
+	{ do_bad,		SIGBUS,  "dcache parity error"		   }, /* xscale */
+	{ do_bad,		SIGBUS,  "unknown 25"			   },
+	{ do_bad,		SIGBUS,  "unknown 26"			   },
+	{ do_bad,		SIGBUS,  "unknown 27"			   },
+	{ do_bad,		SIGBUS,  "unknown 28"			   },
+	{ do_bad,		SIGBUS,  "unknown 29"			   },
+	{ do_bad,		SIGBUS,  "unknown 30"			   },
+	{ do_bad,		SIGBUS,  "unknown 31"			   }
+};
+
+void __init
+hook_fault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *),
+		int sig, const char *name)
+{
+	if (nr >= 0 && nr < ARRAY_SIZE(fsr_info)) {
+		fsr_info[nr].fn   = fn;
+		fsr_info[nr].sig  = sig;
+		fsr_info[nr].name = name;
+	}
+}
+
+/*
+ * Dispatch a data abort to the relevant handler.
+ */
+asmlinkage void
+do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
+{
+	const struct fsr_info *inf = fsr_info + (fsr & 15) + ((fsr & (1 << 10)) >> 6);
+
+	if (!inf->fn(addr, fsr, regs))
+		return;
+
+	printk(KERN_ALERT "Unhandled fault: %s (0x%03x) at 0x%08lx\n",
+		inf->name, fsr, addr);
+	force_sig(inf->sig, current);
+	show_pte(current->mm, addr);
+	die_if_kernel("Oops", regs, 0);
+}
+
+asmlinkage void
+do_PrefetchAbort(unsigned long addr, struct pt_regs *regs)
+{
+	do_translation_fault(addr, 0, regs);
+}
+
--- a/arch/arm/mm/fault.h
+++ b/arch/arm/mm/fault.h
@@ -0,0 +1,6 @@
+void do_bad_area(struct task_struct *tsk, struct mm_struct *mm,
+		 unsigned long addr, unsigned int fsr, struct pt_regs *regs);
+
+void show_pte(struct mm_struct *mm, unsigned long addr);
+
+unsigned long search_exception_table(unsigned long addr);
--- a/arch/arm/mm/flush.c
+++ b/arch/arm/mm/flush.c
@@ -0,0 +1,94 @@
+/*
+ *  linux/arch/arm/mm/flush.c
+ *
+ *  Copyright (C) 1995-2002 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+
+#include <asm/cacheflush.h>
+#include <asm/system.h>
+
+static void __flush_dcache_page(struct address_space *mapping, struct page *page)
+{
+	struct mm_struct *mm = current->active_mm;
+	struct vm_area_struct *mpnt;
+	struct prio_tree_iter iter;
+	pgoff_t pgoff;
+
+	/*
+	 * Writeback any data associated with the kernel mapping of this
+	 * page.  This ensures that data in the physical page is mutually
+	 * coherent with the kernels mapping.
+	 */
+	__cpuc_flush_dcache_page(page_address(page));
+
+	/*
+	 * If there's no mapping pointer here, then this page isn't
+	 * visible to userspace yet, so there are no cache lines
+	 * associated with any other aliases.
+	 */
+	if (!mapping)
+		return;
+
+	/*
+	 * There are possible user space mappings of this page:
+	 * - VIVT cache: we need to also write back and invalidate all user
+	 *   data in the current VM view associated with this page.
+	 * - aliasing VIPT: we only need to find one mapping of this page.
+	 */
+	pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+
+	flush_dcache_mmap_lock(mapping);
+	vma_prio_tree_foreach(mpnt, &iter, &mapping->i_mmap, pgoff, pgoff) {
+		unsigned long offset;
+
+		/*
+		 * If this VMA is not in our MM, we can ignore it.
+		 */
+		if (mpnt->vm_mm != mm)
+			continue;
+		if (!(mpnt->vm_flags & VM_MAYSHARE))
+			continue;
+		offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT;
+		flush_cache_page(mpnt, mpnt->vm_start + offset, page_to_pfn(page));
+		if (cache_is_vipt())
+			break;
+	}
+	flush_dcache_mmap_unlock(mapping);
+}
+
+/*
+ * Ensure cache coherency between kernel mapping and userspace mapping
+ * of this page.
+ *
+ * We have three cases to consider:
+ *  - VIPT non-aliasing cache: fully coherent so nothing required.
+ *  - VIVT: fully aliasing, so we need to handle every alias in our
+ *          current VM view.
+ *  - VIPT aliasing: need to handle one alias in our current VM view.
+ *
+ * If we need to handle aliasing:
+ *  If the page only exists in the page cache and there are no user
+ *  space mappings, we can be lazy and remember that we may have dirty
+ *  kernel cache lines for later.  Otherwise, we assume we have
+ *  aliasing mappings.
+ */
+void flush_dcache_page(struct page *page)
+{
+	struct address_space *mapping = page_mapping(page);
+
+	if (cache_is_vipt_nonaliasing())
+		return;
+
+	if (mapping && !mapping_mapped(mapping))
+		set_bit(PG_dcache_dirty, &page->flags);
+	else
+		__flush_dcache_page(mapping, page);
+}
+EXPORT_SYMBOL(flush_dcache_page);
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -0,0 +1,621 @@
+/*
+ *  linux/arch/arm/mm/init.c
+ *
+ *  Copyright (C) 1995-2002 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/swap.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/mman.h>
+#include <linux/nodemask.h>
+#include <linux/initrd.h>
+
+#include <asm/mach-types.h>
+#include <asm/hardware.h>
+#include <asm/setup.h>
+#include <asm/tlb.h>
+
+#include <asm/mach/arch.h>
+#include <asm/mach/map.h>
+
+#define TABLE_SIZE	(2 * PTRS_PER_PTE * sizeof(pte_t))
+
+DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+
+extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
+extern void _stext, _text, _etext, __data_start, _end, __init_begin, __init_end;
+extern unsigned long phys_initrd_start;
+extern unsigned long phys_initrd_size;
+
+/*
+ * The sole use of this is to pass memory configuration
+ * data from paging_init to mem_init.
+ */
+static struct meminfo meminfo __initdata = { 0, };
+
+/*
+ * empty_zero_page is a special page that is used for
+ * zero-initialized data and COW.
+ */
+struct page *empty_zero_page;
+
+void show_mem(void)
+{
+	int free = 0, total = 0, reserved = 0;
+	int shared = 0, cached = 0, slab = 0, node;
+
+	printk("Mem-info:\n");
+	show_free_areas();
+	printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
+
+	for_each_online_node(node) {
+		struct page *page, *end;
+
+		page = NODE_MEM_MAP(node);
+		end  = page + NODE_DATA(node)->node_spanned_pages;
+
+		do {
+			total++;
+			if (PageReserved(page))
+				reserved++;
+			else if (PageSwapCache(page))
+				cached++;
+			else if (PageSlab(page))
+				slab++;
+			else if (!page_count(page))
+				free++;
+			else
+				shared += page_count(page) - 1;
+			page++;
+		} while (page < end);
+	}
+
+	printk("%d pages of RAM\n", total);
+	printk("%d free pages\n", free);
+	printk("%d reserved pages\n", reserved);
+	printk("%d slab pages\n", slab);
+	printk("%d pages shared\n", shared);
+	printk("%d pages swap cached\n", cached);
+}
+
+struct node_info {
+	unsigned int start;
+	unsigned int end;
+	int bootmap_pages;
+};
+
+#define O_PFN_DOWN(x)	((x) >> PAGE_SHIFT)
+#define V_PFN_DOWN(x)	O_PFN_DOWN(__pa(x))
+
+#define O_PFN_UP(x)	(PAGE_ALIGN(x) >> PAGE_SHIFT)
+#define V_PFN_UP(x)	O_PFN_UP(__pa(x))
+
+#define PFN_SIZE(x)	((x) >> PAGE_SHIFT)
+#define PFN_RANGE(s,e)	PFN_SIZE(PAGE_ALIGN((unsigned long)(e)) - \
+				(((unsigned long)(s)) & PAGE_MASK))
+
+/*
+ * FIXME: We really want to avoid allocating the bootmap bitmap
+ * over the top of the initrd.  Hopefully, this is located towards
+ * the start of a bank, so if we allocate the bootmap bitmap at
+ * the end, we won't clash.
+ */
+static unsigned int __init
+find_bootmap_pfn(int node, struct meminfo *mi, unsigned int bootmap_pages)
+{
+	unsigned int start_pfn, bank, bootmap_pfn;
+
+	start_pfn   = V_PFN_UP(&_end);
+	bootmap_pfn = 0;
+
+	for (bank = 0; bank < mi->nr_banks; bank ++) {
+		unsigned int start, end;
+
+		if (mi->bank[bank].node != node)
+			continue;
+
+		start = O_PFN_UP(mi->bank[bank].start);
+		end   = O_PFN_DOWN(mi->bank[bank].size +
+				   mi->bank[bank].start);
+
+		if (end < start_pfn)
+			continue;
+
+		if (start < start_pfn)
+			start = start_pfn;
+
+		if (end <= start)
+			continue;
+
+		if (end - start >= bootmap_pages) {
+			bootmap_pfn = start;
+			break;
+		}
+	}
+
+	if (bootmap_pfn == 0)
+		BUG();
+
+	return bootmap_pfn;
+}
+
+/*
+ * Scan the memory info structure and pull out:
+ *  - the end of memory
+ *  - the number of nodes
+ *  - the pfn range of each node
+ *  - the number of bootmem bitmap pages
+ */
+static unsigned int __init
+find_memend_and_nodes(struct meminfo *mi, struct node_info *np)
+{
+	unsigned int i, bootmem_pages = 0, memend_pfn = 0;
+
+	for (i = 0; i < MAX_NUMNODES; i++) {
+		np[i].start = -1U;
+		np[i].end = 0;
+		np[i].bootmap_pages = 0;
+	}
+
+	for (i = 0; i < mi->nr_banks; i++) {
+		unsigned long start, end;
+		int node;
+
+		if (mi->bank[i].size == 0) {
+			/*
+			 * Mark this bank with an invalid node number
+			 */
+			mi->bank[i].node = -1;
+			continue;
+		}
+
+		node = mi->bank[i].node;
+
+		/*
+		 * Make sure we haven't exceeded the maximum number of nodes
+		 * that we have in this configuration.  If we have, we're in
+		 * trouble.  (maybe we ought to limit, instead of bugging?)
+		 */
+		if (node >= MAX_NUMNODES)
+			BUG();
+		node_set_online(node);
+
+		/*
+		 * Get the start and end pfns for this bank
+		 */
+		start = O_PFN_UP(mi->bank[i].start);
+		end   = O_PFN_DOWN(mi->bank[i].start + mi->bank[i].size);
+
+		if (np[node].start > start)
+			np[node].start = start;
+
+		if (np[node].end < end)
+			np[node].end = end;
+
+		if (memend_pfn < end)
+			memend_pfn = end;
+	}
+
+	/*
+	 * Calculate the number of pages we require to
+	 * store the bootmem bitmaps.
+	 */
+	for_each_online_node(i) {
+		if (np[i].end == 0)
+			continue;
+
+		np[i].bootmap_pages = bootmem_bootmap_pages(np[i].end -
+							    np[i].start);
+		bootmem_pages += np[i].bootmap_pages;
+	}
+
+	high_memory = __va(memend_pfn << PAGE_SHIFT);
+
+	/*
+	 * This doesn't seem to be used by the Linux memory
+	 * manager any more.  If we can get rid of it, we
+	 * also get rid of some of the stuff above as well.
+	 */
+	max_low_pfn = memend_pfn - O_PFN_DOWN(PHYS_OFFSET);
+	max_pfn = memend_pfn - O_PFN_DOWN(PHYS_OFFSET);
+
+	return bootmem_pages;
+}
+
+static int __init check_initrd(struct meminfo *mi)
+{
+	int initrd_node = -2;
+#ifdef CONFIG_BLK_DEV_INITRD
+	unsigned long end = phys_initrd_start + phys_initrd_size;
+
+	/*
+	 * Make sure that the initrd is within a valid area of
+	 * memory.
+	 */
+	if (phys_initrd_size) {
+		unsigned int i;
+
+		initrd_node = -1;
+
+		for (i = 0; i < mi->nr_banks; i++) {
+			unsigned long bank_end;
+
+			bank_end = mi->bank[i].start + mi->bank[i].size;
+
+			if (mi->bank[i].start <= phys_initrd_start &&
+			    end <= bank_end)
+				initrd_node = mi->bank[i].node;
+		}
+	}
+
+	if (initrd_node == -1) {
+		printk(KERN_ERR "initrd (0x%08lx - 0x%08lx) extends beyond "
+		       "physical memory - disabling initrd\n",
+		       phys_initrd_start, end);
+		phys_initrd_start = phys_initrd_size = 0;
+	}
+#endif
+
+	return initrd_node;
+}
+
+/*
+ * Reserve the various regions of node 0
+ */
+static __init void reserve_node_zero(unsigned int bootmap_pfn, unsigned int bootmap_pages)
+{
+	pg_data_t *pgdat = NODE_DATA(0);
+	unsigned long res_size = 0;
+
+	/*
+	 * Register the kernel text and data with bootmem.
+	 * Note that this can only be in node 0.
+	 */
+#ifdef CONFIG_XIP_KERNEL
+	reserve_bootmem_node(pgdat, __pa(&__data_start), &_end - &__data_start);
+#else
+	reserve_bootmem_node(pgdat, __pa(&_stext), &_end - &_stext);
+#endif
+
+	/*
+	 * Reserve the page tables.  These are already in use,
+	 * and can only be in node 0.
+	 */
+	reserve_bootmem_node(pgdat, __pa(swapper_pg_dir),
+			     PTRS_PER_PGD * sizeof(pgd_t));
+
+	/*
+	 * And don't forget to reserve the allocator bitmap,
+	 * which will be freed later.
+	 */
+	reserve_bootmem_node(pgdat, bootmap_pfn << PAGE_SHIFT,
+			     bootmap_pages << PAGE_SHIFT);
+
+	/*
+	 * Hmm... This should go elsewhere, but we really really need to
+	 * stop things allocating the low memory; ideally we need a better
+	 * implementation of GFP_DMA which does not assume that DMA-able
+	 * memory starts at zero.
+	 */
+	if (machine_is_integrator() || machine_is_cintegrator())
+		res_size = __pa(swapper_pg_dir) - PHYS_OFFSET;
+
+	/*
+	 * These should likewise go elsewhere.  They pre-reserve the
+	 * screen memory region at the start of main system memory.
+	 */
+	if (machine_is_edb7211())
+		res_size = 0x00020000;
+	if (machine_is_p720t())
+		res_size = 0x00014000;
+
+#ifdef CONFIG_SA1111
+	/*
+	 * Because of the SA1111 DMA bug, we want to preserve our
+	 * precious DMA-able memory...
+	 */
+	res_size = __pa(swapper_pg_dir) - PHYS_OFFSET;
+#endif
+	if (res_size)
+		reserve_bootmem_node(pgdat, PHYS_OFFSET, res_size);
+}
+
+/*
+ * Register all available RAM in this node with the bootmem allocator.
+ */
+static inline void free_bootmem_node_bank(int node, struct meminfo *mi)
+{
+	pg_data_t *pgdat = NODE_DATA(node);
+	int bank;
+
+	for (bank = 0; bank < mi->nr_banks; bank++)
+		if (mi->bank[bank].node == node)
+			free_bootmem_node(pgdat, mi->bank[bank].start,
+					  mi->bank[bank].size);
+}
+
+/*
+ * Initialise the bootmem allocator for all nodes.  This is called
+ * early during the architecture specific initialisation.
+ */
+static void __init bootmem_init(struct meminfo *mi)
+{
+	struct node_info node_info[MAX_NUMNODES], *np = node_info;
+	unsigned int bootmap_pages, bootmap_pfn, map_pg;
+	int node, initrd_node;
+
+	bootmap_pages = find_memend_and_nodes(mi, np);
+	bootmap_pfn   = find_bootmap_pfn(0, mi, bootmap_pages);
+	initrd_node   = check_initrd(mi);
+
+	map_pg = bootmap_pfn;
+
+	/*
+	 * Initialise the bootmem nodes.
+	 *
+	 * What we really want to do is:
+	 *
+	 *   unmap_all_regions_except_kernel();
+	 *   for_each_node_in_reverse_order(node) {
+	 *     map_node(node);
+	 *     allocate_bootmem_map(node);
+	 *     init_bootmem_node(node);
+	 *     free_bootmem_node(node);
+	 *   }
+	 *
+	 * but this is a 2.5-type change.  For now, we just set
+	 * the nodes up in reverse order.
+	 *
+	 * (we could also do with rolling bootmem_init and paging_init
+	 * into one generic "memory_init" type function).
+	 */
+	np += num_online_nodes() - 1;
+	for (node = num_online_nodes() - 1; node >= 0; node--, np--) {
+		/*
+		 * If there are no pages in this node, ignore it.
+		 * Note that node 0 must always have some pages.
+		 */
+		if (np->end == 0 || !node_online(node)) {
+			if (node == 0)
+				BUG();
+			continue;
+		}
+
+		/*
+		 * Initialise the bootmem allocator.
+		 */
+		init_bootmem_node(NODE_DATA(node), map_pg, np->start, np->end);
+		free_bootmem_node_bank(node, mi);
+		map_pg += np->bootmap_pages;
+
+		/*
+		 * If this is node 0, we need to reserve some areas ASAP -
+		 * we may use bootmem on node 0 to setup the other nodes.
+		 */
+		if (node == 0)
+			reserve_node_zero(bootmap_pfn, bootmap_pages);
+	}
+
+
+#ifdef CONFIG_BLK_DEV_INITRD
+	if (phys_initrd_size && initrd_node >= 0) {
+		reserve_bootmem_node(NODE_DATA(initrd_node), phys_initrd_start,
+				     phys_initrd_size);
+		initrd_start = __phys_to_virt(phys_initrd_start);
+		initrd_end = initrd_start + phys_initrd_size;
+	}
+#endif
+
+	BUG_ON(map_pg != bootmap_pfn + bootmap_pages);
+}
+
+/*
+ * paging_init() sets up the page tables, initialises the zone memory
+ * maps, and sets up the zero page, bad page and bad page tables.
+ */
+void __init paging_init(struct meminfo *mi, struct machine_desc *mdesc)
+{
+	void *zero_page;
+	int node;
+
+	bootmem_init(mi);
+
+	memcpy(&meminfo, mi, sizeof(meminfo));
+
+	/*
+	 * allocate the zero page.  Note that we count on this going ok.
+	 */
+	zero_page = alloc_bootmem_low_pages(PAGE_SIZE);
+
+	/*
+	 * initialise the page tables.
+	 */
+	memtable_init(mi);
+	if (mdesc->map_io)
+		mdesc->map_io();
+	flush_tlb_all();
+
+	/*
+	 * initialise the zones within each node
+	 */
+	for_each_online_node(node) {
+		unsigned long zone_size[MAX_NR_ZONES];
+		unsigned long zhole_size[MAX_NR_ZONES];
+		struct bootmem_data *bdata;
+		pg_data_t *pgdat;
+		int i;
+
+		/*
+		 * Initialise the zone size information.
+		 */
+		for (i = 0; i < MAX_NR_ZONES; i++) {
+			zone_size[i]  = 0;
+			zhole_size[i] = 0;
+		}
+
+		pgdat = NODE_DATA(node);
+		bdata = pgdat->bdata;
+
+		/*
+		 * The size of this node has already been determined.
+		 * If we need to do anything fancy with the allocation
+		 * of this memory to the zones, now is the time to do
+		 * it.
+		 */
+		zone_size[0] = bdata->node_low_pfn -
+				(bdata->node_boot_start >> PAGE_SHIFT);
+
+		/*
+		 * If this zone has zero size, skip it.
+		 */
+		if (!zone_size[0])
+			continue;
+
+		/*
+		 * For each bank in this node, calculate the size of the
+		 * holes.  holes = node_size - sum(bank_sizes_in_node)
+		 */
+		zhole_size[0] = zone_size[0];
+		for (i = 0; i < mi->nr_banks; i++) {
+			if (mi->bank[i].node != node)
+				continue;
+
+			zhole_size[0] -= mi->bank[i].size >> PAGE_SHIFT;
+		}
+
+		/*
+		 * Adjust the sizes according to any special
+		 * requirements for this machine type.
+		 */
+		arch_adjust_zones(node, zone_size, zhole_size);
+
+		free_area_init_node(node, pgdat, zone_size,
+				bdata->node_boot_start >> PAGE_SHIFT, zhole_size);
+	}
+
+	/*
+	 * finish off the bad pages once
+	 * the mem_map is initialised
+	 */
+	memzero(zero_page, PAGE_SIZE);
+	empty_zero_page = virt_to_page(zero_page);
+	flush_dcache_page(empty_zero_page);
+}
+
+static inline void free_area(unsigned long addr, unsigned long end, char *s)
+{
+	unsigned int size = (end - addr) >> 10;
+
+	for (; addr < end; addr += PAGE_SIZE) {
+		struct page *page = virt_to_page(addr);
+		ClearPageReserved(page);
+		set_page_count(page, 1);
+		free_page(addr);
+		totalram_pages++;
+	}
+
+	if (size && s)
+		printk(KERN_INFO "Freeing %s memory: %dK\n", s, size);
+}
+
+/*
+ * mem_init() marks the free areas in the mem_map and tells us how much
+ * memory is free.  This is done after various parts of the system have
+ * claimed their memory after the kernel image.
+ */
+void __init mem_init(void)
+{
+	unsigned int codepages, datapages, initpages;
+	int i, node;
+
+	codepages = &_etext - &_text;
+	datapages = &_end - &__data_start;
+	initpages = &__init_end - &__init_begin;
+
+#ifndef CONFIG_DISCONTIGMEM
+	max_mapnr   = virt_to_page(high_memory) - mem_map;
+#endif
+
+	/*
+	 * We may have non-contiguous memory.
+	 */
+	if (meminfo.nr_banks != 1)
+		create_memmap_holes(&meminfo);
+
+	/* this will put all unused low memory onto the freelists */
+	for_each_online_node(node) {
+		pg_data_t *pgdat = NODE_DATA(node);
+
+		if (pgdat->node_spanned_pages != 0)
+			totalram_pages += free_all_bootmem_node(pgdat);
+	}
+
+#ifdef CONFIG_SA1111
+	/* now that our DMA memory is actually so designated, we can free it */
+	free_area(PAGE_OFFSET, (unsigned long)swapper_pg_dir, NULL);
+#endif
+
+	/*
+	 * Since our memory may not be contiguous, calculate the
+	 * real number of pages we have in this system
+	 */
+	printk(KERN_INFO "Memory:");
+
+	num_physpages = 0;
+	for (i = 0; i < meminfo.nr_banks; i++) {
+		num_physpages += meminfo.bank[i].size >> PAGE_SHIFT;
+		printk(" %ldMB", meminfo.bank[i].size >> 20);
+	}
+
+	printk(" = %luMB total\n", num_physpages >> (20 - PAGE_SHIFT));
+	printk(KERN_NOTICE "Memory: %luKB available (%dK code, "
+		"%dK data, %dK init)\n",
+		(unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
+		codepages >> 10, datapages >> 10, initpages >> 10);
+
+	if (PAGE_SIZE >= 16384 && num_physpages <= 128) {
+		extern int sysctl_overcommit_memory;
+		/*
+		 * On a machine this small we won't get
+		 * anywhere without overcommit, so turn
+		 * it on by default.
+		 */
+		sysctl_overcommit_memory = OVERCOMMIT_ALWAYS;
+	}
+}
+
+void free_initmem(void)
+{
+	if (!machine_is_integrator() && !machine_is_cintegrator()) {
+		free_area((unsigned long)(&__init_begin),
+			  (unsigned long)(&__init_end),
+			  "init");
+	}
+}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+
+static int keep_initrd;
+
+void free_initrd_mem(unsigned long start, unsigned long end)
+{
+	if (!keep_initrd)
+		free_area(start, end, "initrd");
+}
+
+static int __init keepinitrd_setup(char *__unused)
+{
+	keep_initrd = 1;
+	return 1;
+}
+
+__setup("keepinitrd", keepinitrd_setup);
+#endif
--- a/arch/arm/mm/ioremap.c
+++ b/arch/arm/mm/ioremap.c
@@ -0,0 +1,172 @@
+/*
+ *  linux/arch/arm/mm/ioremap.c
+ *
+ * Re-map IO memory to kernel address space so that we can access it.
+ *
+ * (C) Copyright 1995 1996 Linus Torvalds
+ *
+ * Hacked for ARM by Phil Blundell <philb@gnu.org>
+ * Hacked to allow all architectures to build, and various cleanups
+ * by Russell King
+ *
+ * This allows a driver to remap an arbitrary region of bus memory into
+ * virtual space.  One should *only* use readl, writel, memcpy_toio and
+ * so on with such remapped areas.
+ *
+ * Because the ARM only has a 32-bit address space we can't address the
+ * whole of the (physical) PCI space at once.  PCI huge-mode addressing
+ * allows us to circumvent this restriction by splitting PCI space into
+ * two 2GB chunks and mapping only one at a time into processor memory.
+ * We use MMU protection domains to trap any attempt to access the bank
+ * that is not currently mapped.  (This isn't fully implemented yet.)
+ */
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+
+#include <asm/cacheflush.h>
+#include <asm/io.h>
+#include <asm/tlbflush.h>
+
+static inline void
+remap_area_pte(pte_t * pte, unsigned long address, unsigned long size,
+	       unsigned long phys_addr, pgprot_t pgprot)
+{
+	unsigned long end;
+
+	address &= ~PMD_MASK;
+	end = address + size;
+	if (end > PMD_SIZE)
+		end = PMD_SIZE;
+	BUG_ON(address >= end);
+	do {
+		if (!pte_none(*pte))
+			goto bad;
+
+		set_pte(pte, pfn_pte(phys_addr >> PAGE_SHIFT, pgprot));
+		address += PAGE_SIZE;
+		phys_addr += PAGE_SIZE;
+		pte++;
+	} while (address && (address < end));
+	return;
+
+ bad:
+	printk("remap_area_pte: page already exists\n");
+	BUG();
+}
+
+static inline int
+remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned long size,
+	       unsigned long phys_addr, unsigned long flags)
+{
+	unsigned long end;
+	pgprot_t pgprot;
+
+	address &= ~PGDIR_MASK;
+	end = address + size;
+
+	if (end > PGDIR_SIZE)
+		end = PGDIR_SIZE;
+
+	phys_addr -= address;
+	BUG_ON(address >= end);
+
+	pgprot = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | L_PTE_WRITE | flags);
+	do {
+		pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address);
+		if (!pte)
+			return -ENOMEM;
+		remap_area_pte(pte, address, end - address, address + phys_addr, pgprot);
+		address = (address + PMD_SIZE) & PMD_MASK;
+		pmd++;
+	} while (address && (address < end));
+	return 0;
+}
+
+static int
+remap_area_pages(unsigned long start, unsigned long phys_addr,
+		 unsigned long size, unsigned long flags)
+{
+	unsigned long address = start;
+	unsigned long end = start + size;
+	int err = 0;
+	pgd_t * dir;
+
+	phys_addr -= address;
+	dir = pgd_offset(&init_mm, address);
+	BUG_ON(address >= end);
+	spin_lock(&init_mm.page_table_lock);
+	do {
+		pmd_t *pmd = pmd_alloc(&init_mm, dir, address);
+		if (!pmd) {
+			err = -ENOMEM;
+			break;
+		}
+		if (remap_area_pmd(pmd, address, end - address,
+					 phys_addr + address, flags)) {
+			err = -ENOMEM;
+			break;
+		}
+
+		address = (address + PGDIR_SIZE) & PGDIR_MASK;
+		dir++;
+	} while (address && (address < end));
+
+	spin_unlock(&init_mm.page_table_lock);
+	flush_cache_vmap(start, end);
+	return err;
+}
+
+/*
+ * Remap an arbitrary physical address space into the kernel virtual
+ * address space. Needed when the kernel wants to access high addresses
+ * directly.
+ *
+ * NOTE! We need to allow non-page-aligned mappings too: we will obviously
+ * have to convert them into an offset in a page-aligned mapping, but the
+ * caller shouldn't need to know that small detail.
+ *
+ * 'flags' are the extra L_PTE_ flags that you want to specify for this
+ * mapping.  See include/asm-arm/proc-armv/pgtable.h for more information.
+ */
+void __iomem *
+__ioremap(unsigned long phys_addr, size_t size, unsigned long flags,
+	  unsigned long align)
+{
+	void * addr;
+	struct vm_struct * area;
+	unsigned long offset, last_addr;
+
+	/* Don't allow wraparound or zero size */
+	last_addr = phys_addr + size - 1;
+	if (!size || last_addr < phys_addr)
+		return NULL;
+
+	/*
+	 * Mappings have to be page-aligned
+	 */
+	offset = phys_addr & ~PAGE_MASK;
+	phys_addr &= PAGE_MASK;
+	size = PAGE_ALIGN(last_addr + 1) - phys_addr;
+
+	/*
+	 * Ok, go for it..
+	 */
+	area = get_vm_area(size, VM_IOREMAP);
+	if (!area)
+		return NULL;
+	addr = area->addr;
+	if (remap_area_pages((unsigned long) addr, phys_addr, size, flags)) {
+		vfree(addr);
+		return NULL;
+	}
+	return (void __iomem *) (offset + (char *)addr);
+}
+EXPORT_SYMBOL(__ioremap);
+
+void __iounmap(void __iomem *addr)
+{
+	vfree((void *) (PAGE_MASK & (unsigned long) addr));
+}
+EXPORT_SYMBOL(__iounmap);
--- a/arch/arm/mm/minicache.c
+++ b/arch/arm/mm/minicache.c
@@ -0,0 +1,73 @@
+/*
+ *  linux/arch/arm/mm/minicache.c
+ *
+ *  Copyright (C) 2001 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This handles the mini data cache, as found on SA11x0 and XScale
+ * processors.  When we copy a user page page, we map it in such a way
+ * that accesses to this page will not touch the main data cache, but
+ * will be cached in the mini data cache.  This prevents us thrashing
+ * the main data cache on page faults.
+ */
+#include <linux/init.h>
+#include <linux/mm.h>
+
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+
+/*
+ * 0xffff8000 to 0xffffffff is reserved for any ARM architecture
+ * specific hacks for copying pages efficiently.
+ */
+#define minicache_address (0xffff8000)
+#define minicache_pgprot __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | \
+				  L_PTE_CACHEABLE)
+
+static pte_t *minicache_pte;
+
+/*
+ * Note that this is intended to be called only from the copy_user_page
+ * asm code; anything else will require special locking to prevent the
+ * mini-cache space being re-used.  (Note: probably preempt unsafe).
+ *
+ * We rely on the fact that the minicache is 2K, and we'll be pushing
+ * 4K of data through it, so we don't actually have to specifically
+ * flush the minicache when we change the mapping.
+ *
+ * Note also: assert(PAGE_OFFSET <= virt < high_memory).
+ * Unsafe: preempt, kmap.
+ */
+unsigned long map_page_minicache(unsigned long virt)
+{
+	set_pte(minicache_pte, pfn_pte(__pa(virt) >> PAGE_SHIFT, minicache_pgprot));
+	flush_tlb_kernel_page(minicache_address);
+
+	return minicache_address;
+}
+
+static int __init minicache_init(void)
+{
+	pgd_t *pgd;
+	pmd_t *pmd;
+
+	spin_lock(&init_mm.page_table_lock);
+
+	pgd = pgd_offset_k(minicache_address);
+	pmd = pmd_alloc(&init_mm, pgd, minicache_address);
+	if (!pmd)
+		BUG();
+	minicache_pte = pte_alloc_kernel(&init_mm, pmd, minicache_address);
+	if (!minicache_pte)
+		BUG();
+
+	spin_unlock(&init_mm.page_table_lock);
+
+	return 0;
+}
+
+core_initcall(minicache_init);
--- a/arch/arm/mm/mm-armv.c
+++ b/arch/arm/mm/mm-armv.c
@@ -0,0 +1,760 @@
+/*
+ *  linux/arch/arm/mm/mm-armv.c
+ *
+ *  Copyright (C) 1998-2002 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  Page table sludge for ARM v3 and v4 processor architectures.
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/highmem.h>
+#include <linux/nodemask.h>
+
+#include <asm/pgalloc.h>
+#include <asm/page.h>
+#include <asm/io.h>
+#include <asm/setup.h>
+#include <asm/tlbflush.h>
+
+#include <asm/mach/map.h>
+
+#define CPOLICY_UNCACHED	0
+#define CPOLICY_BUFFERED	1
+#define CPOLICY_WRITETHROUGH	2
+#define CPOLICY_WRITEBACK	3
+#define CPOLICY_WRITEALLOC	4
+
+static unsigned int cachepolicy __initdata = CPOLICY_WRITEBACK;
+static unsigned int ecc_mask __initdata = 0;
+pgprot_t pgprot_kernel;
+
+EXPORT_SYMBOL(pgprot_kernel);
+
+struct cachepolicy {
+	const char	policy[16];
+	unsigned int	cr_mask;
+	unsigned int	pmd;
+	unsigned int	pte;
+};
+
+static struct cachepolicy cache_policies[] __initdata = {
+	{
+		.policy		= "uncached",
+		.cr_mask	= CR_W|CR_C,
+		.pmd		= PMD_SECT_UNCACHED,
+		.pte		= 0,
+	}, {
+		.policy		= "buffered",
+		.cr_mask	= CR_C,
+		.pmd		= PMD_SECT_BUFFERED,
+		.pte		= PTE_BUFFERABLE,
+	}, {
+		.policy		= "writethrough",
+		.cr_mask	= 0,
+		.pmd		= PMD_SECT_WT,
+		.pte		= PTE_CACHEABLE,
+	}, {
+		.policy		= "writeback",
+		.cr_mask	= 0,
+		.pmd		= PMD_SECT_WB,
+		.pte		= PTE_BUFFERABLE|PTE_CACHEABLE,
+	}, {
+		.policy		= "writealloc",
+		.cr_mask	= 0,
+		.pmd		= PMD_SECT_WBWA,
+		.pte		= PTE_BUFFERABLE|PTE_CACHEABLE,
+	}
+};
+
+/*
+ * These are useful for identifing cache coherency
+ * problems by allowing the cache or the cache and
+ * writebuffer to be turned off.  (Note: the write
+ * buffer should not be on and the cache off).
+ */
+static void __init early_cachepolicy(char **p)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(cache_policies); i++) {
+		int len = strlen(cache_policies[i].policy);
+
+		if (memcmp(*p, cache_policies[i].policy, len) == 0) {
+			cachepolicy = i;
+			cr_alignment &= ~cache_policies[i].cr_mask;
+			cr_no_alignment &= ~cache_policies[i].cr_mask;
+			*p += len;
+			break;
+		}
+	}
+	if (i == ARRAY_SIZE(cache_policies))
+		printk(KERN_ERR "ERROR: unknown or unsupported cache policy\n");
+	flush_cache_all();
+	set_cr(cr_alignment);
+}
+
+static void __init early_nocache(char **__unused)
+{
+	char *p = "buffered";
+	printk(KERN_WARNING "nocache is deprecated; use cachepolicy=%s\n", p);
+	early_cachepolicy(&p);
+}
+
+static void __init early_nowrite(char **__unused)
+{
+	char *p = "uncached";
+	printk(KERN_WARNING "nowb is deprecated; use cachepolicy=%s\n", p);
+	early_cachepolicy(&p);
+}
+
+static void __init early_ecc(char **p)
+{
+	if (memcmp(*p, "on", 2) == 0) {
+		ecc_mask = PMD_PROTECTION;
+		*p += 2;
+	} else if (memcmp(*p, "off", 3) == 0) {
+		ecc_mask = 0;
+		*p += 3;
+	}
+}
+
+__early_param("nocache", early_nocache);
+__early_param("nowb", early_nowrite);
+__early_param("cachepolicy=", early_cachepolicy);
+__early_param("ecc=", early_ecc);
+
+static int __init noalign_setup(char *__unused)
+{
+	cr_alignment &= ~CR_A;
+	cr_no_alignment &= ~CR_A;
+	set_cr(cr_alignment);
+	return 1;
+}
+
+__setup("noalign", noalign_setup);
+
+#define FIRST_KERNEL_PGD_NR	(FIRST_USER_PGD_NR + USER_PTRS_PER_PGD)
+
+/*
+ * need to get a 16k page for level 1
+ */
+pgd_t *get_pgd_slow(struct mm_struct *mm)
+{
+	pgd_t *new_pgd, *init_pgd;
+	pmd_t *new_pmd, *init_pmd;
+	pte_t *new_pte, *init_pte;
+
+	new_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, 2);
+	if (!new_pgd)
+		goto no_pgd;
+
+	memzero(new_pgd, FIRST_KERNEL_PGD_NR * sizeof(pgd_t));
+
+	init_pgd = pgd_offset_k(0);
+
+	if (!vectors_high()) {
+		/*
+		 * This lock is here just to satisfy pmd_alloc and pte_lock
+		 */
+		spin_lock(&mm->page_table_lock);
+
+		/*
+		 * On ARM, first page must always be allocated since it
+		 * contains the machine vectors.
+		 */
+		new_pmd = pmd_alloc(mm, new_pgd, 0);
+		if (!new_pmd)
+			goto no_pmd;
+
+		new_pte = pte_alloc_map(mm, new_pmd, 0);
+		if (!new_pte)
+			goto no_pte;
+
+		init_pmd = pmd_offset(init_pgd, 0);
+		init_pte = pte_offset_map_nested(init_pmd, 0);
+		set_pte(new_pte, *init_pte);
+		pte_unmap_nested(init_pte);
+		pte_unmap(new_pte);
+
+		spin_unlock(&mm->page_table_lock);
+	}
+
+	/*
+	 * Copy over the kernel and IO PGD entries
+	 */
+	memcpy(new_pgd + FIRST_KERNEL_PGD_NR, init_pgd + FIRST_KERNEL_PGD_NR,
+		       (PTRS_PER_PGD - FIRST_KERNEL_PGD_NR) * sizeof(pgd_t));
+
+	clean_dcache_area(new_pgd, PTRS_PER_PGD * sizeof(pgd_t));
+
+	return new_pgd;
+
+no_pte:
+	spin_unlock(&mm->page_table_lock);
+	pmd_free(new_pmd);
+	free_pages((unsigned long)new_pgd, 2);
+	return NULL;
+
+no_pmd:
+	spin_unlock(&mm->page_table_lock);
+	free_pages((unsigned long)new_pgd, 2);
+	return NULL;
+
+no_pgd:
+	return NULL;
+}
+
+void free_pgd_slow(pgd_t *pgd)
+{
+	pmd_t *pmd;
+	struct page *pte;
+
+	if (!pgd)
+		return;
+
+	/* pgd is always present and good */
+	pmd = (pmd_t *)pgd;
+	if (pmd_none(*pmd))
+		goto free;
+	if (pmd_bad(*pmd)) {
+		pmd_ERROR(*pmd);
+		pmd_clear(pmd);
+		goto free;
+	}
+
+	pte = pmd_page(*pmd);
+	pmd_clear(pmd);
+	dec_page_state(nr_page_table_pages);
+	pte_free(pte);
+	pmd_free(pmd);
+free:
+	free_pages((unsigned long) pgd, 2);
+}
+
+/*
+ * Create a SECTION PGD between VIRT and PHYS in domain
+ * DOMAIN with protection PROT.  This operates on half-
+ * pgdir entry increments.
+ */
+static inline void
+alloc_init_section(unsigned long virt, unsigned long phys, int prot)
+{
+	pmd_t *pmdp;
+
+	pmdp = pmd_offset(pgd_offset_k(virt), virt);
+	if (virt & (1 << 20))
+		pmdp++;
+
+	*pmdp = __pmd(phys | prot);
+	flush_pmd_entry(pmdp);
+}
+
+/*
+ * Create a SUPER SECTION PGD between VIRT and PHYS with protection PROT
+ */
+static inline void
+alloc_init_supersection(unsigned long virt, unsigned long phys, int prot)
+{
+	int i;
+
+	for (i = 0; i < 16; i += 1) {
+		alloc_init_section(virt, phys & SUPERSECTION_MASK,
+				   prot | PMD_SECT_SUPER);
+
+		virt += (PGDIR_SIZE / 2);
+		phys += (PGDIR_SIZE / 2);
+	}
+}
+
+/*
+ * Add a PAGE mapping between VIRT and PHYS in domain
+ * DOMAIN with protection PROT.  Note that due to the
+ * way we map the PTEs, we must allocate two PTE_SIZE'd
+ * blocks - one for the Linux pte table, and one for
+ * the hardware pte table.
+ */
+static inline void
+alloc_init_page(unsigned long virt, unsigned long phys, unsigned int prot_l1, pgprot_t prot)
+{
+	pmd_t *pmdp;
+	pte_t *ptep;
+
+	pmdp = pmd_offset(pgd_offset_k(virt), virt);
+
+	if (pmd_none(*pmdp)) {
+		unsigned long pmdval;
+		ptep = alloc_bootmem_low_pages(2 * PTRS_PER_PTE *
+					       sizeof(pte_t));
+
+		pmdval = __pa(ptep) | prot_l1;
+		pmdp[0] = __pmd(pmdval);
+		pmdp[1] = __pmd(pmdval + 256 * sizeof(pte_t));
+		flush_pmd_entry(pmdp);
+	}
+	ptep = pte_offset_kernel(pmdp, virt);
+
+	set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, prot));
+}
+
+/*
+ * Clear any PGD mapping.  On a two-level page table system,
+ * the clearance is done by the middle-level functions (pmd)
+ * rather than the top-level (pgd) functions.
+ */
+static inline void clear_mapping(unsigned long virt)
+{
+	pmd_clear(pmd_offset(pgd_offset_k(virt), virt));
+}
+
+struct mem_types {
+	unsigned int	prot_pte;
+	unsigned int	prot_l1;
+	unsigned int	prot_sect;
+	unsigned int	domain;
+};
+
+static struct mem_types mem_types[] __initdata = {
+	[MT_DEVICE] = {
+		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
+				L_PTE_WRITE,
+		.prot_l1   = PMD_TYPE_TABLE,
+		.prot_sect = PMD_TYPE_SECT | PMD_SECT_UNCACHED |
+				PMD_SECT_AP_WRITE,
+		.domain    = DOMAIN_IO,
+	},
+	[MT_CACHECLEAN] = {
+		.prot_sect = PMD_TYPE_SECT,
+		.domain    = DOMAIN_KERNEL,
+	},
+	[MT_MINICLEAN] = {
+		.prot_sect = PMD_TYPE_SECT | PMD_SECT_MINICACHE,
+		.domain    = DOMAIN_KERNEL,
+	},
+	[MT_LOW_VECTORS] = {
+		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
+				L_PTE_EXEC,
+		.prot_l1   = PMD_TYPE_TABLE,
+		.domain    = DOMAIN_USER,
+	},
+	[MT_HIGH_VECTORS] = {
+		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
+				L_PTE_USER | L_PTE_EXEC,
+		.prot_l1   = PMD_TYPE_TABLE,
+		.domain    = DOMAIN_USER,
+	},
+	[MT_MEMORY] = {
+		.prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
+		.domain    = DOMAIN_KERNEL,
+	},
+	[MT_ROM] = {
+		.prot_sect = PMD_TYPE_SECT,
+		.domain    = DOMAIN_KERNEL,
+	},
+	[MT_IXP2000_DEVICE] = { /* IXP2400 requires XCB=101 for on-chip I/O */
+		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
+				L_PTE_WRITE,
+		.prot_l1   = PMD_TYPE_TABLE,
+		.prot_sect = PMD_TYPE_SECT | PMD_SECT_UNCACHED |
+				PMD_SECT_AP_WRITE | PMD_SECT_BUFFERABLE |
+				PMD_SECT_TEX(1),
+		.domain    = DOMAIN_IO,
+	}
+};
+
+/*
+ * Adjust the PMD section entries according to the CPU in use.
+ */
+static void __init build_mem_type_table(void)
+{
+	struct cachepolicy *cp;
+	unsigned int cr = get_cr();
+	int cpu_arch = cpu_architecture();
+	int i;
+
+#if defined(CONFIG_CPU_DCACHE_DISABLE)
+	if (cachepolicy > CPOLICY_BUFFERED)
+		cachepolicy = CPOLICY_BUFFERED;
+#elif defined(CONFIG_CPU_DCACHE_WRITETHROUGH)
+	if (cachepolicy > CPOLICY_WRITETHROUGH)
+		cachepolicy = CPOLICY_WRITETHROUGH;
+#endif
+	if (cpu_arch < CPU_ARCH_ARMv5) {
+		if (cachepolicy >= CPOLICY_WRITEALLOC)
+			cachepolicy = CPOLICY_WRITEBACK;
+		ecc_mask = 0;
+	}
+
+	if (cpu_arch <= CPU_ARCH_ARMv5) {
+		for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
+			if (mem_types[i].prot_l1)
+				mem_types[i].prot_l1 |= PMD_BIT4;
+			if (mem_types[i].prot_sect)
+				mem_types[i].prot_sect |= PMD_BIT4;
+		}
+	}
+
+	/*
+	 * ARMv6 and above have extended page tables.
+	 */
+	if (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP)) {
+		/*
+		 * bit 4 becomes XN which we must clear for the
+		 * kernel memory mapping.
+		 */
+		mem_types[MT_MEMORY].prot_sect &= ~PMD_BIT4;
+		mem_types[MT_ROM].prot_sect &= ~PMD_BIT4;
+		/*
+		 * Mark cache clean areas read only from SVC mode
+		 * and no access from userspace.
+		 */
+		mem_types[MT_MINICLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
+		mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
+	}
+
+	cp = &cache_policies[cachepolicy];
+
+	if (cpu_arch >= CPU_ARCH_ARMv5) {
+		mem_types[MT_LOW_VECTORS].prot_pte |= cp->pte & PTE_CACHEABLE;
+		mem_types[MT_HIGH_VECTORS].prot_pte |= cp->pte & PTE_CACHEABLE;
+	} else {
+		mem_types[MT_LOW_VECTORS].prot_pte |= cp->pte;
+		mem_types[MT_HIGH_VECTORS].prot_pte |= cp->pte;
+		mem_types[MT_MINICLEAN].prot_sect &= ~PMD_SECT_TEX(1);
+	}
+
+	mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask;
+	mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask;
+	mem_types[MT_MEMORY].prot_sect |= ecc_mask | cp->pmd;
+	mem_types[MT_ROM].prot_sect |= cp->pmd;
+
+	for (i = 0; i < 16; i++) {
+		unsigned long v = pgprot_val(protection_map[i]);
+		v &= (~(PTE_BUFFERABLE|PTE_CACHEABLE)) | cp->pte;
+		protection_map[i] = __pgprot(v);
+	}
+
+	pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG |
+				 L_PTE_DIRTY | L_PTE_WRITE |
+				 L_PTE_EXEC | cp->pte);
+
+	switch (cp->pmd) {
+	case PMD_SECT_WT:
+		mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WT;
+		break;
+	case PMD_SECT_WB:
+	case PMD_SECT_WBWA:
+		mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WB;
+		break;
+	}
+	printk("Memory policy: ECC %sabled, Data cache %s\n",
+		ecc_mask ? "en" : "dis", cp->policy);
+}
+
+#define vectors_base()	(vectors_high() ? 0xffff0000 : 0)
+
+/*
+ * Create the page directory entries and any necessary
+ * page tables for the mapping specified by `md'.  We
+ * are able to cope here with varying sizes and address
+ * offsets, and we take full advantage of sections and
+ * supersections.
+ */
+static void __init create_mapping(struct map_desc *md)
+{
+	unsigned long virt, length;
+	int prot_sect, prot_l1, domain;
+	pgprot_t prot_pte;
+	long off;
+
+	if (md->virtual != vectors_base() && md->virtual < TASK_SIZE) {
+		printk(KERN_WARNING "BUG: not creating mapping for "
+		       "0x%08lx at 0x%08lx in user region\n",
+		       md->physical, md->virtual);
+		return;
+	}
+
+	if ((md->type == MT_DEVICE || md->type == MT_ROM) &&
+	    md->virtual >= PAGE_OFFSET && md->virtual < VMALLOC_END) {
+		printk(KERN_WARNING "BUG: mapping for 0x%08lx at 0x%08lx "
+		       "overlaps vmalloc space\n",
+		       md->physical, md->virtual);
+	}
+
+	domain	  = mem_types[md->type].domain;
+	prot_pte  = __pgprot(mem_types[md->type].prot_pte);
+	prot_l1   = mem_types[md->type].prot_l1 | PMD_DOMAIN(domain);
+	prot_sect = mem_types[md->type].prot_sect | PMD_DOMAIN(domain);
+
+	virt   = md->virtual;
+	off    = md->physical - virt;
+	length = md->length;
+
+	if (mem_types[md->type].prot_l1 == 0 &&
+	    (virt & 0xfffff || (virt + off) & 0xfffff || (virt + length) & 0xfffff)) {
+		printk(KERN_WARNING "BUG: map for 0x%08lx at 0x%08lx can not "
+		       "be mapped using pages, ignoring.\n",
+		       md->physical, md->virtual);
+		return;
+	}
+
+	while ((virt & 0xfffff || (virt + off) & 0xfffff) && length >= PAGE_SIZE) {
+		alloc_init_page(virt, virt + off, prot_l1, prot_pte);
+
+		virt   += PAGE_SIZE;
+		length -= PAGE_SIZE;
+	}
+
+	/* N.B.	ARMv6 supersections are only defined to work with domain 0.
+	 *	Since domain assignments can in fact be arbitrary, the
+	 *	'domain == 0' check below is required to insure that ARMv6
+	 *	supersections are only allocated for domain 0 regardless
+	 *	of the actual domain assignments in use.
+	 */
+	if (cpu_architecture() >= CPU_ARCH_ARMv6 && domain == 0) {
+		/* Align to supersection boundary */
+		while ((virt & ~SUPERSECTION_MASK || (virt + off) &
+			~SUPERSECTION_MASK) && length >= (PGDIR_SIZE / 2)) {
+			alloc_init_section(virt, virt + off, prot_sect);
+
+			virt   += (PGDIR_SIZE / 2);
+			length -= (PGDIR_SIZE / 2);
+		}
+
+		while (length >= SUPERSECTION_SIZE) {
+			alloc_init_supersection(virt, virt + off, prot_sect);
+
+			virt   += SUPERSECTION_SIZE;
+			length -= SUPERSECTION_SIZE;
+		}
+	}
+
+	/*
+	 * A section mapping covers half a "pgdir" entry.
+	 */
+	while (length >= (PGDIR_SIZE / 2)) {
+		alloc_init_section(virt, virt + off, prot_sect);
+
+		virt   += (PGDIR_SIZE / 2);
+		length -= (PGDIR_SIZE / 2);
+	}
+
+	while (length >= PAGE_SIZE) {
+		alloc_init_page(virt, virt + off, prot_l1, prot_pte);
+
+		virt   += PAGE_SIZE;
+		length -= PAGE_SIZE;
+	}
+}
+
+/*
+ * In order to soft-boot, we need to insert a 1:1 mapping in place of
+ * the user-mode pages.  This will then ensure that we have predictable
+ * results when turning the mmu off
+ */
+void setup_mm_for_reboot(char mode)
+{
+	unsigned long pmdval;
+	pgd_t *pgd;
+	pmd_t *pmd;
+	int i;
+	int cpu_arch = cpu_architecture();
+
+	if (current->mm && current->mm->pgd)
+		pgd = current->mm->pgd;
+	else
+		pgd = init_mm.pgd;
+
+	for (i = 0; i < FIRST_USER_PGD_NR + USER_PTRS_PER_PGD; i++) {
+		pmdval = (i << PGDIR_SHIFT) |
+			 PMD_SECT_AP_WRITE | PMD_SECT_AP_READ |
+			 PMD_TYPE_SECT;
+		if (cpu_arch <= CPU_ARCH_ARMv5)
+			pmdval |= PMD_BIT4;
+		pmd = pmd_offset(pgd + i, i << PGDIR_SHIFT);
+		pmd[0] = __pmd(pmdval);
+		pmd[1] = __pmd(pmdval + (1 << (PGDIR_SHIFT - 1)));
+		flush_pmd_entry(pmd);
+	}
+}
+
+extern void _stext, _etext;
+
+/*
+ * Setup initial mappings.  We use the page we allocated for zero page to hold
+ * the mappings, which will get overwritten by the vectors in traps_init().
+ * The mappings must be in virtual address order.
+ */
+void __init memtable_init(struct meminfo *mi)
+{
+	struct map_desc *init_maps, *p, *q;
+	unsigned long address = 0;
+	int i;
+
+	build_mem_type_table();
+
+	init_maps = p = alloc_bootmem_low_pages(PAGE_SIZE);
+
+#ifdef CONFIG_XIP_KERNEL
+	p->physical   = CONFIG_XIP_PHYS_ADDR & PMD_MASK;
+	p->virtual    = (unsigned long)&_stext & PMD_MASK;
+	p->length     = ((unsigned long)&_etext - p->virtual + ~PMD_MASK) & PMD_MASK;
+	p->type       = MT_ROM;
+	p ++;
+#endif
+
+	for (i = 0; i < mi->nr_banks; i++) {
+		if (mi->bank[i].size == 0)
+			continue;
+
+		p->physical   = mi->bank[i].start;
+		p->virtual    = __phys_to_virt(p->physical);
+		p->length     = mi->bank[i].size;
+		p->type       = MT_MEMORY;
+		p ++;
+	}
+
+#ifdef FLUSH_BASE
+	p->physical   = FLUSH_BASE_PHYS;
+	p->virtual    = FLUSH_BASE;
+	p->length     = PGDIR_SIZE;
+	p->type       = MT_CACHECLEAN;
+	p ++;
+#endif
+
+#ifdef FLUSH_BASE_MINICACHE
+	p->physical   = FLUSH_BASE_PHYS + PGDIR_SIZE;
+	p->virtual    = FLUSH_BASE_MINICACHE;
+	p->length     = PGDIR_SIZE;
+	p->type       = MT_MINICLEAN;
+	p ++;
+#endif
+
+	/*
+	 * Go through the initial mappings, but clear out any
+	 * pgdir entries that are not in the description.
+	 */
+	q = init_maps;
+	do {
+		if (address < q->virtual || q == p) {
+			clear_mapping(address);
+			address += PGDIR_SIZE;
+		} else {
+			create_mapping(q);
+
+			address = q->virtual + q->length;
+			address = (address + PGDIR_SIZE - 1) & PGDIR_MASK;
+
+			q ++;
+		}
+	} while (address != 0);
+
+	/*
+	 * Create a mapping for the machine vectors at the high-vectors
+	 * location (0xffff0000).  If we aren't using high-vectors, also
+	 * create a mapping at the low-vectors virtual address.
+	 */
+	init_maps->physical   = virt_to_phys(init_maps);
+	init_maps->virtual    = 0xffff0000;
+	init_maps->length     = PAGE_SIZE;
+	init_maps->type       = MT_HIGH_VECTORS;
+	create_mapping(init_maps);
+
+	if (!vectors_high()) {
+		init_maps->virtual = 0;
+		init_maps->type = MT_LOW_VECTORS;
+		create_mapping(init_maps);
+	}
+
+	flush_cache_all();
+	flush_tlb_all();
+}
+
+/*
+ * Create the architecture specific mappings
+ */
+void __init iotable_init(struct map_desc *io_desc, int nr)
+{
+	int i;
+
+	for (i = 0; i < nr; i++)
+		create_mapping(io_desc + i);
+}
+
+static inline void
+free_memmap(int node, unsigned long start_pfn, unsigned long end_pfn)
+{
+	struct page *start_pg, *end_pg;
+	unsigned long pg, pgend;
+
+	/*
+	 * Convert start_pfn/end_pfn to a struct page pointer.
+	 */
+	start_pg = pfn_to_page(start_pfn);
+	end_pg = pfn_to_page(end_pfn);
+
+	/*
+	 * Convert to physical addresses, and
+	 * round start upwards and end downwards.
+	 */
+	pg = PAGE_ALIGN(__pa(start_pg));
+	pgend = __pa(end_pg) & PAGE_MASK;
+
+	/*
+	 * If there are free pages between these,
+	 * free the section of the memmap array.
+	 */
+	if (pg < pgend)
+		free_bootmem_node(NODE_DATA(node), pg, pgend - pg);
+}
+
+static inline void free_unused_memmap_node(int node, struct meminfo *mi)
+{
+	unsigned long bank_start, prev_bank_end = 0;
+	unsigned int i;
+
+	/*
+	 * [FIXME] This relies on each bank being in address order.  This
+	 * may not be the case, especially if the user has provided the
+	 * information on the command line.
+	 */
+	for (i = 0; i < mi->nr_banks; i++) {
+		if (mi->bank[i].size == 0 || mi->bank[i].node != node)
+			continue;
+
+		bank_start = mi->bank[i].start >> PAGE_SHIFT;
+		if (bank_start < prev_bank_end) {
+			printk(KERN_ERR "MEM: unordered memory banks.  "
+				"Not freeing memmap.\n");
+			break;
+		}
+
+		/*
+		 * If we had a previous bank, and there is a space
+		 * between the current bank and the previous, free it.
+		 */
+		if (prev_bank_end && prev_bank_end != bank_start)
+			free_memmap(node, prev_bank_end, bank_start);
+
+		prev_bank_end = PAGE_ALIGN(mi->bank[i].start +
+					   mi->bank[i].size) >> PAGE_SHIFT;
+	}
+}
+
+/*
+ * The mem_map array can get very big.  Free
+ * the unused area of the memory map.
+ */
+void __init create_memmap_holes(struct meminfo *mi)
+{
+	int node;
+
+	for_each_online_node(node)
+		free_unused_memmap_node(node, mi);
+}
--- a/arch/arm/mm/mmap.c
+++ b/arch/arm/mm/mmap.c
@@ -0,0 +1,109 @@
+/*
+ *  linux/arch/arm/mm/mmap.c
+ */
+#include <linux/config.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/shm.h>
+
+#include <asm/system.h>
+
+#define COLOUR_ALIGN(addr,pgoff)		\
+	((((addr)+SHMLBA-1)&~(SHMLBA-1)) +	\
+	 (((pgoff)<<PAGE_SHIFT) & (SHMLBA-1)))
+
+/*
+ * We need to ensure that shared mappings are correctly aligned to
+ * avoid aliasing issues with VIPT caches.  We need to ensure that
+ * a specific page of an object is always mapped at a multiple of
+ * SHMLBA bytes.
+ *
+ * We unconditionally provide this function for all cases, however
+ * in the VIVT case, we optimise out the alignment rules.
+ */
+unsigned long
+arch_get_unmapped_area(struct file *filp, unsigned long addr,
+		unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	unsigned long start_addr;
+#ifdef CONFIG_CPU_V6
+	unsigned int cache_type;
+	int do_align = 0, aliasing = 0;
+
+	/*
+	 * We only need to do colour alignment if either the I or D
+	 * caches alias.  This is indicated by bits 9 and 21 of the
+	 * cache type register.
+	 */
+	cache_type = read_cpuid(CPUID_CACHETYPE);
+	if (cache_type != read_cpuid(CPUID_ID)) {
+		aliasing = (cache_type | cache_type >> 12) & (1 << 11);
+		if (aliasing)
+			do_align = filp || flags & MAP_SHARED;
+	}
+#else
+#define do_align 0
+#define aliasing 0
+#endif
+
+	/*
+	 * We should enforce the MAP_FIXED case.  However, currently
+	 * the generic kernel code doesn't allow us to handle this.
+	 */
+	if (flags & MAP_FIXED) {
+		if (aliasing && flags & MAP_SHARED && addr & (SHMLBA - 1))
+			return -EINVAL;
+		return addr;
+	}
+
+	if (len > TASK_SIZE)
+		return -ENOMEM;
+
+	if (addr) {
+		if (do_align)
+			addr = COLOUR_ALIGN(addr, pgoff);
+		else
+			addr = PAGE_ALIGN(addr);
+
+		vma = find_vma(mm, addr);
+		if (TASK_SIZE - len >= addr &&
+		    (!vma || addr + len <= vma->vm_start))
+			return addr;
+	}
+	start_addr = addr = mm->free_area_cache;
+
+full_search:
+	if (do_align)
+		addr = COLOUR_ALIGN(addr, pgoff);
+	else
+		addr = PAGE_ALIGN(addr);
+
+	for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
+		/* At this point:  (!vma || addr < vma->vm_end). */
+		if (TASK_SIZE - len < addr) {
+			/*
+			 * Start a new search - just in case we missed
+			 * some holes.
+			 */
+			if (start_addr != TASK_UNMAPPED_BASE) {
+				start_addr = addr = TASK_UNMAPPED_BASE;
+				goto full_search;
+			}
+			return -ENOMEM;
+		}
+		if (!vma || addr + len <= vma->vm_start) {
+			/*
+			 * Remember the place where we stopped the search:
+			 */
+			mm->free_area_cache = addr + len;
+			return addr;
+		}
+		addr = vma->vm_end;
+		if (do_align)
+			addr = COLOUR_ALIGN(addr, pgoff);
+	}
+}
+
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -0,0 +1,45 @@
+/*
+ *  linux/arch/arm/mm/mmu.c
+ *
+ *  Copyright (C) 2002-2003 Deep Blue Solutions Ltd, all rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+
+#include <asm/mmu_context.h>
+#include <asm/tlbflush.h>
+
+unsigned int cpu_last_asid = { 1 << ASID_BITS };
+
+/*
+ * We fork()ed a process, and we need a new context for the child
+ * to run in.  We reserve version 0 for initial tasks so we will
+ * always allocate an ASID.
+ */
+void __init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+{
+	mm->context.id = 0;
+}
+
+void __new_context(struct mm_struct *mm)
+{
+	unsigned int asid;
+
+	asid = ++cpu_last_asid;
+	if (asid == 0)
+		asid = cpu_last_asid = 1 << ASID_BITS;
+
+	/*
+	 * If we've used up all our ASIDs, we need
+	 * to start a new version and flush the TLB.
+	 */
+	if ((asid & ~ASID_MASK) == 0)
+		flush_tlb_all();
+
+	mm->context.id = asid;
+}
--- a/arch/arm/mm/proc-arm1020.S
+++ b/arch/arm/mm/proc-arm1020.S
@@ -0,0 +1,530 @@
+/*
+ *  linux/arch/arm/mm/proc-arm1020.S: MMU functions for ARM1020
+ *
+ *  Copyright (C) 2000 ARM Limited
+ *  Copyright (C) 2000 Deep Blue Solutions Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ *
+ * These are the low level assembler for performing cache and TLB
+ * functions on the arm1020.
+ *
+ *  CONFIG_CPU_ARM1020_CPU_IDLE -> nohlt
+ */
+#include <linux/linkage.h>
+#include <linux/config.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/constants.h>
+#include <asm/pgtable.h>
+#include <asm/procinfo.h>
+#include <asm/ptrace.h>
+#include <asm/hardware.h>
+
+/*
+ * This is the maximum size of an area which will be invalidated
+ * using the single invalidate entry instructions.  Anything larger
+ * than this, and we go for the whole cache.
+ *
+ * This value should be chosen such that we choose the cheapest
+ * alternative.
+ */
+#define MAX_AREA_SIZE	32768
+
+/*
+ * The size of one data cache line.
+ */
+#define CACHE_DLINESIZE	32
+
+/*
+ * The number of data cache segments.
+ */
+#define CACHE_DSEGMENTS	16
+
+/*
+ * The number of lines in a cache segment.
+ */
+#define CACHE_DENTRIES	64
+
+/*
+ * This is the size at which it becomes more efficient to
+ * clean the whole cache, rather than using the individual
+ * cache line maintainence instructions.
+ */
+#define CACHE_DLIMIT	32768
+
+	.text
+/*
+ * cpu_arm1020_proc_init()
+ */
+ENTRY(cpu_arm1020_proc_init)
+	mov	pc, lr
+
+/*
+ * cpu_arm1020_proc_fin()
+ */
+ENTRY(cpu_arm1020_proc_fin)
+	stmfd	sp!, {lr}
+	mov	ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
+	msr	cpsr_c, ip
+	bl	arm1020_flush_kern_cache_all
+	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
+	bic	r0, r0, #0x1000 		@ ...i............
+	bic	r0, r0, #0x000e 		@ ............wca.
+	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
+	ldmfd	sp!, {pc}
+
+/*
+ * cpu_arm1020_reset(loc)
+ *
+ * Perform a soft reset of the system.	Put the CPU into the
+ * same state as it would be if it had been reset, and branch
+ * to what would be the reset vector.
+ *
+ * loc: location to jump to for soft reset
+ */
+	.align	5
+ENTRY(cpu_arm1020_reset)
+	mov	ip, #0
+	mcr	p15, 0, ip, c7, c7, 0		@ invalidate I,D caches
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
+	mrc	p15, 0, ip, c1, c0, 0		@ ctrl register
+	bic	ip, ip, #0x000f 		@ ............wcam
+	bic	ip, ip, #0x1100 		@ ...i...s........
+	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
+	mov	pc, r0
+
+/*
+ * cpu_arm1020_do_idle()
+ */
+	.align	5
+ENTRY(cpu_arm1020_do_idle)
+	mcr	p15, 0, r0, c7, c0, 4		@ Wait for interrupt
+	mov	pc, lr
+
+/* ================================= CACHE ================================ */
+
+	.align	5
+/*
+ *	flush_user_cache_all()
+ *
+ *	Invalidate all cache entries in a particular address
+ *	space.
+ */
+ENTRY(arm1020_flush_user_cache_all)
+	/* FALLTHROUGH */
+/*
+ *	flush_kern_cache_all()
+ *
+ *	Clean and invalidate the entire cache.
+ */
+ENTRY(arm1020_flush_kern_cache_all)
+	mov	r2, #VM_EXEC
+	mov	ip, #0
+__flush_whole_cache:
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mov	r1, #(CACHE_DSEGMENTS - 1) << 5	@ 16 segments
+1:	orr	r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries
+2:	mcr	p15, 0, r3, c7, c14, 2		@ clean+invalidate D index
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	subs	r3, r3, #1 << 26
+	bcs	2b				@ entries 63 to 0
+	subs	r1, r1, #1 << 5
+	bcs	1b				@ segments 15 to 0
+#endif
+	tst	r2, #VM_EXEC
+#ifndef CONFIG_CPU_ICACHE_DISABLE
+	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+#endif
+	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	flush_user_cache_range(start, end, flags)
+ *
+ *	Invalidate a range of cache entries in the specified
+ *	address space.
+ *
+ *	- start	- start address (inclusive)
+ *	- end	- end address (exclusive)
+ *	- flags	- vm_flags for this space
+ */
+ENTRY(arm1020_flush_user_cache_range)
+	mov	ip, #0
+	sub	r3, r1, r0			@ calculate total size
+	cmp	r3, #CACHE_DLIMIT
+	bhs	__flush_whole_cache
+
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	mcr	p15, 0, ip, c7, c10, 4
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+#endif
+	tst	r2, #VM_EXEC
+#ifndef CONFIG_CPU_ICACHE_DISABLE
+	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+#endif
+	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	coherent_kern_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm1020_coherent_kern_range)
+	/* FALLTRHOUGH */
+
+/*
+ *	coherent_user_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm1020_coherent_user_range)
+	mov	ip, #0
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+	mcr	p15, 0, ip, c7, c10, 4
+1:
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+#endif
+#ifndef CONFIG_CPU_ICACHE_DISABLE
+	mcr	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+#endif
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	flush_kern_dcache_page(void *page)
+ *
+ *	Ensure no D cache aliasing occurs, either with itself or
+ *	the I cache
+ *
+ *	- page	- page aligned address
+ */
+ENTRY(arm1020_flush_kern_dcache_page)
+	mov	ip, #0
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	add	r1, r0, #PAGE_SZ
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+#endif
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	dma_inv_range(start, end)
+ *
+ *	Invalidate (discard) the specified virtual address range.
+ *	May not write back any entries.  If 'start' or 'end'
+ *	are not cache line aligned, those lines must be written
+ *	back.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ *
+ * (same as v4wb)
+ */
+ENTRY(arm1020_dma_inv_range)
+	mov	ip, #0
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	tst	r0, #CACHE_DLINESIZE - 1
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, ip, c7, c10, 4
+	mcrne	p15, 0, r0, c7, c10, 1		@ clean D entry
+	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
+	tst	r1, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, ip, c7, c10, 4
+	mcrne	p15, 0, r1, c7, c10, 1		@ clean D entry
+	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
+1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+#endif
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	dma_clean_range(start, end)
+ *
+ *	Clean the specified virtual address range.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ *
+ * (same as v4wb)
+ */
+ENTRY(arm1020_dma_clean_range)
+	mov	ip, #0
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+#endif
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	dma_flush_range(start, end)
+ *
+ *	Clean and invalidate the specified virtual address range.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm1020_dma_flush_range)
+	mov	ip, #0
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+	mcr	p15, 0, ip, c7, c10, 4
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+#endif
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+ENTRY(arm1020_cache_fns)
+	.long	arm1020_flush_kern_cache_all
+	.long	arm1020_flush_user_cache_all
+	.long	arm1020_flush_user_cache_range
+	.long	arm1020_coherent_kern_range
+	.long	arm1020_coherent_user_range
+	.long	arm1020_flush_kern_dcache_page
+	.long	arm1020_dma_inv_range
+	.long	arm1020_dma_clean_range
+	.long	arm1020_dma_flush_range
+
+	.align	5
+ENTRY(cpu_arm1020_dcache_clean_area)
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	mov	ip, #0
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	add	r0, r0, #CACHE_DLINESIZE
+	subs	r1, r1, #CACHE_DLINESIZE
+	bhi	1b
+#endif
+	mov	pc, lr
+
+/* =============================== PageTable ============================== */
+
+/*
+ * cpu_arm1020_switch_mm(pgd)
+ *
+ * Set the translation base pointer to be as described by pgd.
+ *
+ * pgd: new page tables
+ */
+	.align	5
+ENTRY(cpu_arm1020_switch_mm)
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	mcr	p15, 0, r3, c7, c10, 4
+	mov	r1, #0xF			@ 16 segments
+1:	mov	r3, #0x3F			@ 64 entries
+2:	mov	ip, r3, LSL #26 		@ shift up entry
+	orr	ip, ip, r1, LSL #5		@ shift in/up index
+	mcr	p15, 0, ip, c7, c14, 2		@ Clean & Inval DCache entry
+	mov	ip, #0
+	mcr	p15, 0, ip, c7, c10, 4
+	subs	r3, r3, #1
+	cmp	r3, #0
+	bge	2b				@ entries 3F to 0
+	subs	r1, r1, #1
+	cmp	r1, #0
+	bge	1b				@ segments 15 to 0
+
+#endif
+	mov	r1, #0
+#ifndef CONFIG_CPU_ICACHE_DISABLE
+	mcr	p15, 0, r1, c7, c5, 0		@ invalidate I cache
+#endif
+	mcr	p15, 0, r1, c7, c10, 4		@ drain WB
+	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
+	mcr	p15, 0, r1, c8, c7, 0		@ invalidate I & D TLBs
+	mov	pc, lr
+        
+/*
+ * cpu_arm1020_set_pte(ptep, pte)
+ *
+ * Set a PTE and flush it out
+ */
+	.align	5
+ENTRY(cpu_arm1020_set_pte)
+	str	r1, [r0], #-2048		@ linux version
+
+	eor	r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY
+
+	bic	r2, r1, #PTE_SMALL_AP_MASK
+	bic	r2, r2, #PTE_TYPE_MASK
+	orr	r2, r2, #PTE_TYPE_SMALL
+
+	tst	r1, #L_PTE_USER			@ User?
+	orrne	r2, r2, #PTE_SMALL_AP_URO_SRW
+
+	tst	r1, #L_PTE_WRITE | L_PTE_DIRTY	@ Write and Dirty?
+	orreq	r2, r2, #PTE_SMALL_AP_UNO_SRW
+
+	tst	r1, #L_PTE_PRESENT | L_PTE_YOUNG	@ Present and Young?
+	movne	r2, #0
+
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+	eor	r3, r1, #0x0a			@ C & small page?
+	tst	r3, #0x0b
+	biceq	r2, r2, #4
+#endif
+	str	r2, [r0]			@ hardware version
+	mov	r0, r0
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	mcr	p15, 0, r0, c7, c10, 4
+	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+#endif
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+	__INIT
+
+	.type	__arm1020_setup, #function
+__arm1020_setup:
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c7		@ invalidate I,D caches on v4
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer on v4
+	mcr	p15, 0, r0, c8, c7		@ invalidate I,D TLBs on v4
+	mrc	p15, 0, r0, c1, c0		@ get control register v4
+	ldr	r5, arm1020_cr1_clear
+	bic	r0, r0, r5
+	ldr	r5, arm1020_cr1_set
+	orr	r0, r0, r5
+#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
+	orr	r0, r0, #0x4000 		@ .R.. .... .... ....
+#endif
+	mov	pc, lr
+	.size	__arm1020_setup, . - __arm1020_setup
+
+	/*
+	 *  R
+	 * .RVI ZFRS BLDP WCAM
+	 * .0.1 1001 ..11 0101	/* FIXME: why no V bit? */
+	 */
+	.type	arm1020_cr1_clear, #object
+	.type	arm1020_cr1_set, #object
+arm1020_cr1_clear:
+	.word	0x593f
+arm1020_cr1_set:
+	.word	0x1935
+
+	__INITDATA
+
+/*
+ * Purpose : Function pointers used to access above functions - all calls
+ *	     come through these
+ */
+	.type	arm1020_processor_functions, #object
+arm1020_processor_functions:
+	.word	v4t_early_abort
+	.word	cpu_arm1020_proc_init
+	.word	cpu_arm1020_proc_fin
+	.word	cpu_arm1020_reset
+	.word	cpu_arm1020_do_idle
+	.word	cpu_arm1020_dcache_clean_area
+	.word	cpu_arm1020_switch_mm
+	.word	cpu_arm1020_set_pte
+	.size	arm1020_processor_functions, . - arm1020_processor_functions
+
+	.section ".rodata"
+
+	.type	cpu_arch_name, #object
+cpu_arch_name:
+	.asciz	"armv5t"
+	.size	cpu_arch_name, . - cpu_arch_name
+
+	.type	cpu_elf_name, #object
+cpu_elf_name:
+	.asciz	"v5"
+	.size	cpu_elf_name, . - cpu_elf_name
+
+	.type	cpu_arm1020_name, #object
+cpu_arm1020_name:
+	.ascii	"ARM1020"
+#ifndef CONFIG_CPU_ICACHE_DISABLE
+	.ascii	"i"
+#endif
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	.ascii	"d"
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+	.ascii	"(wt)"
+#else
+	.ascii	"(wb)"
+#endif
+#endif
+#ifndef CONFIG_CPU_BPREDICT_DISABLE
+	.ascii	"B"
+#endif
+#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
+	.ascii	"RR"
+#endif
+	.ascii	"\0"
+	.size	cpu_arm1020_name, . - cpu_arm1020_name
+
+	.align
+
+	.section ".proc.info", #alloc, #execinstr
+
+	.type	__arm1020_proc_info,#object
+__arm1020_proc_info:
+	.long	0x4104a200			@ ARM 1020T (Architecture v5T)
+	.long	0xff0ffff0
+	.long   PMD_TYPE_SECT | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	b	__arm1020_setup
+	.long	cpu_arch_name
+	.long	cpu_elf_name
+	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB
+	.long	cpu_arm1020_name
+	.long	arm1020_processor_functions
+	.long	v4wbi_tlb_fns
+	.long	v4wb_user_fns
+	.long	arm1020_cache_fns
+	.size	__arm1020_proc_info, . - __arm1020_proc_info
--- a/arch/arm/mm/proc-arm1020e.S
+++ b/arch/arm/mm/proc-arm1020e.S
@@ -0,0 +1,513 @@
+/*
+ *  linux/arch/arm/mm/proc-arm1020e.S: MMU functions for ARM1020
+ *
+ *  Copyright (C) 2000 ARM Limited
+ *  Copyright (C) 2000 Deep Blue Solutions Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ *
+ * These are the low level assembler for performing cache and TLB
+ * functions on the arm1020e.
+ *
+ *  CONFIG_CPU_ARM1020_CPU_IDLE -> nohlt
+ */
+#include <linux/linkage.h>
+#include <linux/config.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/constants.h>
+#include <asm/pgtable.h>
+#include <asm/procinfo.h>
+#include <asm/ptrace.h>
+#include <asm/hardware.h>
+
+/*
+ * This is the maximum size of an area which will be invalidated
+ * using the single invalidate entry instructions.  Anything larger
+ * than this, and we go for the whole cache.
+ *
+ * This value should be chosen such that we choose the cheapest
+ * alternative.
+ */
+#define MAX_AREA_SIZE	32768
+
+/*
+ * The size of one data cache line.
+ */
+#define CACHE_DLINESIZE	32
+
+/*
+ * The number of data cache segments.
+ */
+#define CACHE_DSEGMENTS	16
+
+/*
+ * The number of lines in a cache segment.
+ */
+#define CACHE_DENTRIES	64
+
+/*
+ * This is the size at which it becomes more efficient to
+ * clean the whole cache, rather than using the individual
+ * cache line maintainence instructions.
+ */
+#define CACHE_DLIMIT	32768
+
+	.text
+/*
+ * cpu_arm1020e_proc_init()
+ */
+ENTRY(cpu_arm1020e_proc_init)
+	mov	pc, lr
+
+/*
+ * cpu_arm1020e_proc_fin()
+ */
+ENTRY(cpu_arm1020e_proc_fin)
+	stmfd	sp!, {lr}
+	mov	ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
+	msr	cpsr_c, ip
+	bl	arm1020e_flush_kern_cache_all
+	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
+	bic	r0, r0, #0x1000 		@ ...i............
+	bic	r0, r0, #0x000e 		@ ............wca.
+	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
+	ldmfd	sp!, {pc}
+
+/*
+ * cpu_arm1020e_reset(loc)
+ *
+ * Perform a soft reset of the system.	Put the CPU into the
+ * same state as it would be if it had been reset, and branch
+ * to what would be the reset vector.
+ *
+ * loc: location to jump to for soft reset
+ */
+	.align	5
+ENTRY(cpu_arm1020e_reset)
+	mov	ip, #0
+	mcr	p15, 0, ip, c7, c7, 0		@ invalidate I,D caches
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
+	mrc	p15, 0, ip, c1, c0, 0		@ ctrl register
+	bic	ip, ip, #0x000f 		@ ............wcam
+	bic	ip, ip, #0x1100 		@ ...i...s........
+	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
+	mov	pc, r0
+
+/*
+ * cpu_arm1020e_do_idle()
+ */
+	.align	5
+ENTRY(cpu_arm1020e_do_idle)
+	mcr	p15, 0, r0, c7, c0, 4		@ Wait for interrupt
+	mov	pc, lr
+
+/* ================================= CACHE ================================ */
+
+	.align	5
+/*
+ *	flush_user_cache_all()
+ *
+ *	Invalidate all cache entries in a particular address
+ *	space.
+ */
+ENTRY(arm1020e_flush_user_cache_all)
+	/* FALLTHROUGH */
+/*
+ *	flush_kern_cache_all()
+ *
+ *	Clean and invalidate the entire cache.
+ */
+ENTRY(arm1020e_flush_kern_cache_all)
+	mov	r2, #VM_EXEC
+	mov	ip, #0
+__flush_whole_cache:
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mov	r1, #(CACHE_DSEGMENTS - 1) << 5	@ 16 segments
+1:	orr	r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries
+2:	mcr	p15, 0, r3, c7, c14, 2		@ clean+invalidate D index
+	subs	r3, r3, #1 << 26
+	bcs	2b				@ entries 63 to 0
+	subs	r1, r1, #1 << 5
+	bcs	1b				@ segments 15 to 0
+#endif
+	tst	r2, #VM_EXEC
+#ifndef CONFIG_CPU_ICACHE_DISABLE
+	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+#endif
+	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	flush_user_cache_range(start, end, flags)
+ *
+ *	Invalidate a range of cache entries in the specified
+ *	address space.
+ *
+ *	- start	- start address (inclusive)
+ *	- end	- end address (exclusive)
+ *	- flags	- vm_flags for this space
+ */
+ENTRY(arm1020e_flush_user_cache_range)
+	mov	ip, #0
+	sub	r3, r1, r0			@ calculate total size
+	cmp	r3, #CACHE_DLIMIT
+	bhs	__flush_whole_cache
+
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+#endif
+	tst	r2, #VM_EXEC
+#ifndef CONFIG_CPU_ICACHE_DISABLE
+	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+#endif
+	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	coherent_kern_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm1020e_coherent_kern_range)
+	/* FALLTHROUGH */
+/*
+ *	coherent_user_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm1020e_coherent_user_range)
+	mov	ip, #0
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+#endif
+#ifndef CONFIG_CPU_ICACHE_DISABLE
+	mcr	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+#endif
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	flush_kern_dcache_page(void *page)
+ *
+ *	Ensure no D cache aliasing occurs, either with itself or
+ *	the I cache
+ *
+ *	- page	- page aligned address
+ */
+ENTRY(arm1020e_flush_kern_dcache_page)
+	mov	ip, #0
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	add	r1, r0, #PAGE_SZ
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+#endif
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	dma_inv_range(start, end)
+ *
+ *	Invalidate (discard) the specified virtual address range.
+ *	May not write back any entries.  If 'start' or 'end'
+ *	are not cache line aligned, those lines must be written
+ *	back.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ *
+ * (same as v4wb)
+ */
+ENTRY(arm1020e_dma_inv_range)
+	mov	ip, #0
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	tst	r0, #CACHE_DLINESIZE - 1
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, r0, c7, c10, 1		@ clean D entry
+	tst	r1, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, r1, c7, c10, 1		@ clean D entry
+1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+#endif
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	dma_clean_range(start, end)
+ *
+ *	Clean the specified virtual address range.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ *
+ * (same as v4wb)
+ */
+ENTRY(arm1020e_dma_clean_range)
+	mov	ip, #0
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+#endif
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	dma_flush_range(start, end)
+ *
+ *	Clean and invalidate the specified virtual address range.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm1020e_dma_flush_range)
+	mov	ip, #0
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+#endif
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+ENTRY(arm1020e_cache_fns)
+	.long	arm1020e_flush_kern_cache_all
+	.long	arm1020e_flush_user_cache_all
+	.long	arm1020e_flush_user_cache_range
+	.long	arm1020e_coherent_kern_range
+	.long	arm1020e_coherent_user_range
+	.long	arm1020e_flush_kern_dcache_page
+	.long	arm1020e_dma_inv_range
+	.long	arm1020e_dma_clean_range
+	.long	arm1020e_dma_flush_range
+
+	.align	5
+ENTRY(cpu_arm1020e_dcache_clean_area)
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	mov	ip, #0
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	subs	r1, r1, #CACHE_DLINESIZE
+	bhi	1b
+#endif
+	mov	pc, lr
+
+/* =============================== PageTable ============================== */
+
+/*
+ * cpu_arm1020e_switch_mm(pgd)
+ *
+ * Set the translation base pointer to be as described by pgd.
+ *
+ * pgd: new page tables
+ */
+	.align	5
+ENTRY(cpu_arm1020e_switch_mm)
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	mcr	p15, 0, r3, c7, c10, 4
+	mov	r1, #0xF			@ 16 segments
+1:	mov	r3, #0x3F			@ 64 entries
+2:	mov	ip, r3, LSL #26 		@ shift up entry
+	orr	ip, ip, r1, LSL #5		@ shift in/up index
+	mcr	p15, 0, ip, c7, c14, 2		@ Clean & Inval DCache entry
+	mov	ip, #0
+	subs	r3, r3, #1
+	cmp	r3, #0
+	bge	2b				@ entries 3F to 0
+	subs	r1, r1, #1
+	cmp	r1, #0
+	bge	1b				@ segments 15 to 0
+
+#endif
+	mov	r1, #0
+#ifndef CONFIG_CPU_ICACHE_DISABLE
+	mcr	p15, 0, r1, c7, c5, 0		@ invalidate I cache
+#endif
+	mcr	p15, 0, r1, c7, c10, 4		@ drain WB
+	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
+	mcr	p15, 0, r1, c8, c7, 0		@ invalidate I & D TLBs
+	mov	pc, lr
+        
+/*
+ * cpu_arm1020e_set_pte(ptep, pte)
+ *
+ * Set a PTE and flush it out
+ */
+	.align	5
+ENTRY(cpu_arm1020e_set_pte)
+	str	r1, [r0], #-2048		@ linux version
+
+	eor	r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY
+
+	bic	r2, r1, #PTE_SMALL_AP_MASK
+	bic	r2, r2, #PTE_TYPE_MASK
+	orr	r2, r2, #PTE_TYPE_SMALL
+
+	tst	r1, #L_PTE_USER			@ User?
+	orrne	r2, r2, #PTE_SMALL_AP_URO_SRW
+
+	tst	r1, #L_PTE_WRITE | L_PTE_DIRTY	@ Write and Dirty?
+	orreq	r2, r2, #PTE_SMALL_AP_UNO_SRW
+
+	tst	r1, #L_PTE_PRESENT | L_PTE_YOUNG	@ Present and Young?
+	movne	r2, #0
+
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+	eor	r3, r1, #0x0a			@ C & small page?
+	tst	r3, #0x0b
+	biceq	r2, r2, #4
+#endif
+	str	r2, [r0]			@ hardware version
+	mov	r0, r0
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+#endif
+	mov	pc, lr
+
+	__INIT
+
+	.type	__arm1020e_setup, #function
+__arm1020e_setup:
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c7		@ invalidate I,D caches on v4
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer on v4
+	mcr	p15, 0, r0, c8, c7		@ invalidate I,D TLBs on v4
+	mrc	p15, 0, r0, c1, c0		@ get control register v4
+	ldr	r5, arm1020e_cr1_clear
+	bic	r0, r0, r5
+	ldr	r5, arm1020e_cr1_set
+	orr	r0, r0, r5
+#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
+	orr	r0, r0, #0x4000 		@ .R.. .... .... ....
+#endif
+	mov	pc, lr
+	.size	__arm1020e_setup, . - __arm1020e_setup
+
+	/*
+	 *  R
+	 * .RVI ZFRS BLDP WCAM
+	 * .0.1 1001 ..11 0101	/* FIXME: why no V bit? */
+	 */
+	.type	arm1020e_cr1_clear, #object
+	.type	arm1020e_cr1_set, #object
+arm1020e_cr1_clear:
+	.word	0x5f3f
+arm1020e_cr1_set:
+	.word	0x1935
+
+	__INITDATA
+
+/*
+ * Purpose : Function pointers used to access above functions - all calls
+ *	     come through these
+ */
+	.type	arm1020e_processor_functions, #object
+arm1020e_processor_functions:
+	.word	v4t_early_abort
+	.word	cpu_arm1020e_proc_init
+	.word	cpu_arm1020e_proc_fin
+	.word	cpu_arm1020e_reset
+	.word	cpu_arm1020e_do_idle
+	.word	cpu_arm1020e_dcache_clean_area
+	.word	cpu_arm1020e_switch_mm
+	.word	cpu_arm1020e_set_pte
+	.size	arm1020e_processor_functions, . - arm1020e_processor_functions
+
+	.section ".rodata"
+
+	.type	cpu_arch_name, #object
+cpu_arch_name:
+	.asciz	"armv5te"
+	.size	cpu_arch_name, . - cpu_arch_name
+
+	.type	cpu_elf_name, #object
+cpu_elf_name:
+	.asciz	"v5"
+	.size	cpu_elf_name, . - cpu_elf_name
+
+	.type	cpu_arm1020e_name, #object
+cpu_arm1020e_name:
+	.ascii	"ARM1020E"
+#ifndef CONFIG_CPU_ICACHE_DISABLE
+	.ascii	"i"
+#endif
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	.ascii	"d"
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+	.ascii	"(wt)"
+#else
+	.ascii	"(wb)"
+#endif
+#endif
+#ifndef CONFIG_CPU_BPREDICT_DISABLE
+	.ascii	"B"
+#endif
+#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
+	.ascii	"RR"
+#endif
+	.ascii	"\0"
+	.size	cpu_arm1020e_name, . - cpu_arm1020e_name
+
+	.align
+
+	.section ".proc.info", #alloc, #execinstr
+
+	.type	__arm1020e_proc_info,#object
+__arm1020e_proc_info:
+	.long	0x4105a200			@ ARM 1020TE (Architecture v5TE)
+	.long	0xff0ffff0
+	.long   PMD_TYPE_SECT | \
+		PMD_BIT4 | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	b	__arm1020e_setup
+	.long	cpu_arch_name
+	.long	cpu_elf_name
+	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_EDSP
+	.long	cpu_arm1020e_name
+	.long	arm1020e_processor_functions
+	.long	v4wbi_tlb_fns
+	.long	v4wb_user_fns
+	.long	arm1020e_cache_fns
+	.size	__arm1020e_proc_info, . - __arm1020e_proc_info
--- a/arch/arm/mm/proc-arm1022.S
+++ b/arch/arm/mm/proc-arm1022.S
@@ -0,0 +1,495 @@
+/*
+ *  linux/arch/arm/mm/proc-arm1022.S: MMU functions for ARM1022E
+ *
+ *  Copyright (C) 2000 ARM Limited
+ *  Copyright (C) 2000 Deep Blue Solutions Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ *
+ * These are the low level assembler for performing cache and TLB
+ * functions on the ARM1022E.
+ */
+#include <linux/linkage.h>
+#include <linux/config.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/constants.h>
+#include <asm/pgtable.h>
+#include <asm/procinfo.h>
+#include <asm/ptrace.h>
+
+/*
+ * This is the maximum size of an area which will be invalidated
+ * using the single invalidate entry instructions.  Anything larger
+ * than this, and we go for the whole cache.
+ *
+ * This value should be chosen such that we choose the cheapest
+ * alternative.
+ */
+#define MAX_AREA_SIZE	32768
+
+/*
+ * The size of one data cache line.
+ */
+#define CACHE_DLINESIZE	32
+
+/*
+ * The number of data cache segments.
+ */
+#define CACHE_DSEGMENTS	16
+
+/*
+ * The number of lines in a cache segment.
+ */
+#define CACHE_DENTRIES	64
+
+/*
+ * This is the size at which it becomes more efficient to
+ * clean the whole cache, rather than using the individual
+ * cache line maintainence instructions.
+ */
+#define CACHE_DLIMIT	32768
+
+	.text
+/*
+ * cpu_arm1022_proc_init()
+ */
+ENTRY(cpu_arm1022_proc_init)
+	mov	pc, lr
+
+/*
+ * cpu_arm1022_proc_fin()
+ */
+ENTRY(cpu_arm1022_proc_fin)
+	stmfd	sp!, {lr}
+	mov	ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
+	msr	cpsr_c, ip
+	bl	arm1022_flush_kern_cache_all
+	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
+	bic	r0, r0, #0x1000 		@ ...i............
+	bic	r0, r0, #0x000e 		@ ............wca.
+	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
+	ldmfd	sp!, {pc}
+
+/*
+ * cpu_arm1022_reset(loc)
+ *
+ * Perform a soft reset of the system.	Put the CPU into the
+ * same state as it would be if it had been reset, and branch
+ * to what would be the reset vector.
+ *
+ * loc: location to jump to for soft reset
+ */
+	.align	5
+ENTRY(cpu_arm1022_reset)
+	mov	ip, #0
+	mcr	p15, 0, ip, c7, c7, 0		@ invalidate I,D caches
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
+	mrc	p15, 0, ip, c1, c0, 0		@ ctrl register
+	bic	ip, ip, #0x000f 		@ ............wcam
+	bic	ip, ip, #0x1100 		@ ...i...s........
+	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
+	mov	pc, r0
+
+/*
+ * cpu_arm1022_do_idle()
+ */
+	.align	5
+ENTRY(cpu_arm1022_do_idle)
+	mcr	p15, 0, r0, c7, c0, 4		@ Wait for interrupt
+	mov	pc, lr
+
+/* ================================= CACHE ================================ */
+
+	.align	5
+/*
+ *	flush_user_cache_all()
+ *
+ *	Invalidate all cache entries in a particular address
+ *	space.
+ */
+ENTRY(arm1022_flush_user_cache_all)
+	/* FALLTHROUGH */
+/*
+ *	flush_kern_cache_all()
+ *
+ *	Clean and invalidate the entire cache.
+ */
+ENTRY(arm1022_flush_kern_cache_all)
+	mov	r2, #VM_EXEC
+	mov	ip, #0
+__flush_whole_cache:
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	mov	r1, #(CACHE_DSEGMENTS - 1) << 5	@ 16 segments
+1:	orr	r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries
+2:	mcr	p15, 0, r3, c7, c14, 2		@ clean+invalidate D index
+	subs	r3, r3, #1 << 26
+	bcs	2b				@ entries 63 to 0
+	subs	r1, r1, #1 << 5
+	bcs	1b				@ segments 15 to 0
+#endif
+	tst	r2, #VM_EXEC
+#ifndef CONFIG_CPU_ICACHE_DISABLE
+	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+#endif
+	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	flush_user_cache_range(start, end, flags)
+ *
+ *	Invalidate a range of cache entries in the specified
+ *	address space.
+ *
+ *	- start	- start address (inclusive)
+ *	- end	- end address (exclusive)
+ *	- flags	- vm_flags for this space
+ */
+ENTRY(arm1022_flush_user_cache_range)
+	mov	ip, #0
+	sub	r3, r1, r0			@ calculate total size
+	cmp	r3, #CACHE_DLIMIT
+	bhs	__flush_whole_cache
+
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+#endif
+	tst	r2, #VM_EXEC
+#ifndef CONFIG_CPU_ICACHE_DISABLE
+	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+#endif
+	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	coherent_kern_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm1022_coherent_kern_range)
+	/* FALLTHROUGH */
+
+/*
+ *	coherent_user_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm1022_coherent_user_range)
+	mov	ip, #0
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+#endif
+#ifndef CONFIG_CPU_ICACHE_DISABLE
+	mcr	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+#endif
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	flush_kern_dcache_page(void *page)
+ *
+ *	Ensure no D cache aliasing occurs, either with itself or
+ *	the I cache
+ *
+ *	- page	- page aligned address
+ */
+ENTRY(arm1022_flush_kern_dcache_page)
+	mov	ip, #0
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	add	r1, r0, #PAGE_SZ
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+#endif
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	dma_inv_range(start, end)
+ *
+ *	Invalidate (discard) the specified virtual address range.
+ *	May not write back any entries.  If 'start' or 'end'
+ *	are not cache line aligned, those lines must be written
+ *	back.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ *
+ * (same as v4wb)
+ */
+ENTRY(arm1022_dma_inv_range)
+	mov	ip, #0
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	tst	r0, #CACHE_DLINESIZE - 1
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, r0, c7, c10, 1		@ clean D entry
+	tst	r1, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, r1, c7, c10, 1		@ clean D entry
+1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+#endif
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	dma_clean_range(start, end)
+ *
+ *	Clean the specified virtual address range.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ *
+ * (same as v4wb)
+ */
+ENTRY(arm1022_dma_clean_range)
+	mov	ip, #0
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+#endif
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	dma_flush_range(start, end)
+ *
+ *	Clean and invalidate the specified virtual address range.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm1022_dma_flush_range)
+	mov	ip, #0
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+#endif
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+ENTRY(arm1022_cache_fns)
+	.long	arm1022_flush_kern_cache_all
+	.long	arm1022_flush_user_cache_all
+	.long	arm1022_flush_user_cache_range
+	.long	arm1022_coherent_kern_range
+	.long	arm1022_coherent_user_range
+	.long	arm1022_flush_kern_dcache_page
+	.long	arm1022_dma_inv_range
+	.long	arm1022_dma_clean_range
+	.long	arm1022_dma_flush_range
+
+	.align	5
+ENTRY(cpu_arm1022_dcache_clean_area)
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	mov	ip, #0
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	subs	r1, r1, #CACHE_DLINESIZE
+	bhi	1b
+#endif
+	mov	pc, lr
+
+/* =============================== PageTable ============================== */
+
+/*
+ * cpu_arm1022_switch_mm(pgd)
+ *
+ * Set the translation base pointer to be as described by pgd.
+ *
+ * pgd: new page tables
+ */
+	.align	5
+ENTRY(cpu_arm1022_switch_mm)
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	mov	r1, #(CACHE_DSEGMENTS - 1) << 5	@ 16 segments
+1:	orr	r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries
+2:	mcr	p15, 0, r3, c7, c14, 2		@ clean+invalidate D index
+	subs	r3, r3, #1 << 26
+	bcs	2b				@ entries 63 to 0
+	subs	r1, r1, #1 << 5
+	bcs	1b				@ segments 15 to 0
+#endif
+	mov	r1, #0
+#ifndef CONFIG_CPU_ICACHE_DISABLE
+	mcr	p15, 0, r1, c7, c5, 0		@ invalidate I cache
+#endif
+	mcr	p15, 0, r1, c7, c10, 4		@ drain WB
+	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
+	mcr	p15, 0, r1, c8, c7, 0		@ invalidate I & D TLBs
+	mov	pc, lr
+        
+/*
+ * cpu_arm1022_set_pte(ptep, pte)
+ *
+ * Set a PTE and flush it out
+ */
+	.align	5
+ENTRY(cpu_arm1022_set_pte)
+	str	r1, [r0], #-2048		@ linux version
+
+	eor	r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY
+
+	bic	r2, r1, #PTE_SMALL_AP_MASK
+	bic	r2, r2, #PTE_TYPE_MASK
+	orr	r2, r2, #PTE_TYPE_SMALL
+
+	tst	r1, #L_PTE_USER			@ User?
+	orrne	r2, r2, #PTE_SMALL_AP_URO_SRW
+
+	tst	r1, #L_PTE_WRITE | L_PTE_DIRTY	@ Write and Dirty?
+	orreq	r2, r2, #PTE_SMALL_AP_UNO_SRW
+
+	tst	r1, #L_PTE_PRESENT | L_PTE_YOUNG	@ Present and Young?
+	movne	r2, #0
+
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+	eor	r3, r1, #0x0a			@ C & small page?
+	tst	r3, #0x0b
+	biceq	r2, r2, #4
+#endif
+	str	r2, [r0]			@ hardware version
+	mov	r0, r0
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+#endif
+	mov	pc, lr
+
+	__INIT
+
+	.type	__arm1022_setup, #function
+__arm1022_setup:
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c7		@ invalidate I,D caches on v4
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer on v4
+	mcr	p15, 0, r0, c8, c7		@ invalidate I,D TLBs on v4
+	mrc	p15, 0, r0, c1, c0		@ get control register v4
+	ldr	r5, arm1022_cr1_clear
+	bic	r0, r0, r5
+	ldr	r5, arm1022_cr1_set
+	orr	r0, r0, r5
+#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
+	orr	r0, r0, #0x4000 		@ .R..............
+#endif
+	mov	pc, lr
+	.size	__arm1022_setup, . - __arm1022_setup
+
+	/*
+	 *  R
+	 * .RVI ZFRS BLDP WCAM
+	 * .011 1001 ..11 0101
+	 * 
+	 */
+	.type	arm1022_cr1_clear, #object
+	.type	arm1022_cr1_set, #object
+arm1022_cr1_clear:
+	.word	0x7f3f
+arm1022_cr1_set:
+	.word	0x3935
+
+	__INITDATA
+
+/*
+ * Purpose : Function pointers used to access above functions - all calls
+ *	     come through these
+ */
+	.type	arm1022_processor_functions, #object
+arm1022_processor_functions:
+	.word	v4t_early_abort
+	.word	cpu_arm1022_proc_init
+	.word	cpu_arm1022_proc_fin
+	.word	cpu_arm1022_reset
+	.word	cpu_arm1022_do_idle
+	.word	cpu_arm1022_dcache_clean_area
+	.word	cpu_arm1022_switch_mm
+	.word	cpu_arm1022_set_pte
+	.size	arm1022_processor_functions, . - arm1022_processor_functions
+
+	.section ".rodata"
+
+	.type	cpu_arch_name, #object
+cpu_arch_name:
+	.asciz	"armv5te"
+	.size	cpu_arch_name, . - cpu_arch_name
+
+	.type	cpu_elf_name, #object
+cpu_elf_name:
+	.asciz	"v5"
+	.size	cpu_elf_name, . - cpu_elf_name
+
+	.type	cpu_arm1022_name, #object
+cpu_arm1022_name:
+	.ascii	"arm1022"
+#ifndef CONFIG_CPU_ICACHE_DISABLE
+	.ascii	"i"
+#endif
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	.ascii	"d"
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+	.ascii	"(wt)"
+#else
+	.ascii	"(wb)"
+#endif
+#endif
+#ifndef CONFIG_CPU_BPREDICT_DISABLE
+	.ascii	"B"
+#endif
+#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
+	.ascii	"RR"
+#endif
+	.ascii	"\0"
+	.size	cpu_arm1022_name, . - cpu_arm1022_name
+
+	.align
+
+	.section ".proc.info", #alloc, #execinstr
+
+	.type	__arm1022_proc_info,#object
+__arm1022_proc_info:
+	.long	0x4105a220			@ ARM 1022E (v5TE)
+	.long	0xff0ffff0
+	.long   PMD_TYPE_SECT | \
+		PMD_BIT4 | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	b	__arm1022_setup
+	.long	cpu_arch_name
+	.long	cpu_elf_name
+	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_EDSP
+	.long	cpu_arm1022_name
+	.long	arm1022_processor_functions
+	.long	v4wbi_tlb_fns
+	.long	v4wb_user_fns
+	.long	arm1022_cache_fns
+	.size	__arm1022_proc_info, . - __arm1022_proc_info
--- a/arch/arm/mm/proc-arm1026.S
+++ b/arch/arm/mm/proc-arm1026.S
@@ -0,0 +1,491 @@
+/*
+ *  linux/arch/arm/mm/proc-arm1026.S: MMU functions for ARM1026EJ-S
+ *
+ *  Copyright (C) 2000 ARM Limited
+ *  Copyright (C) 2000 Deep Blue Solutions Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ *
+ * These are the low level assembler for performing cache and TLB
+ * functions on the ARM1026EJ-S.
+ */
+#include <linux/linkage.h>
+#include <linux/config.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/constants.h>
+#include <asm/pgtable.h>
+#include <asm/procinfo.h>
+#include <asm/ptrace.h>
+
+/*
+ * This is the maximum size of an area which will be invalidated
+ * using the single invalidate entry instructions.  Anything larger
+ * than this, and we go for the whole cache.
+ *
+ * This value should be chosen such that we choose the cheapest
+ * alternative.
+ */
+#define MAX_AREA_SIZE	32768
+
+/*
+ * The size of one data cache line.
+ */
+#define CACHE_DLINESIZE	32
+
+/*
+ * The number of data cache segments.
+ */
+#define CACHE_DSEGMENTS	16
+
+/*
+ * The number of lines in a cache segment.
+ */
+#define CACHE_DENTRIES	64
+
+/*
+ * This is the size at which it becomes more efficient to
+ * clean the whole cache, rather than using the individual
+ * cache line maintainence instructions.
+ */
+#define CACHE_DLIMIT	32768
+
+	.text
+/*
+ * cpu_arm1026_proc_init()
+ */
+ENTRY(cpu_arm1026_proc_init)
+	mov	pc, lr
+
+/*
+ * cpu_arm1026_proc_fin()
+ */
+ENTRY(cpu_arm1026_proc_fin)
+	stmfd	sp!, {lr}
+	mov	ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
+	msr	cpsr_c, ip
+	bl	arm1026_flush_kern_cache_all
+	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
+	bic	r0, r0, #0x1000 		@ ...i............
+	bic	r0, r0, #0x000e 		@ ............wca.
+	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
+	ldmfd	sp!, {pc}
+
+/*
+ * cpu_arm1026_reset(loc)
+ *
+ * Perform a soft reset of the system.	Put the CPU into the
+ * same state as it would be if it had been reset, and branch
+ * to what would be the reset vector.
+ *
+ * loc: location to jump to for soft reset
+ */
+	.align	5
+ENTRY(cpu_arm1026_reset)
+	mov	ip, #0
+	mcr	p15, 0, ip, c7, c7, 0		@ invalidate I,D caches
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
+	mrc	p15, 0, ip, c1, c0, 0		@ ctrl register
+	bic	ip, ip, #0x000f 		@ ............wcam
+	bic	ip, ip, #0x1100 		@ ...i...s........
+	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
+	mov	pc, r0
+
+/*
+ * cpu_arm1026_do_idle()
+ */
+	.align	5
+ENTRY(cpu_arm1026_do_idle)
+	mcr	p15, 0, r0, c7, c0, 4		@ Wait for interrupt
+	mov	pc, lr
+
+/* ================================= CACHE ================================ */
+
+	.align	5
+/*
+ *	flush_user_cache_all()
+ *
+ *	Invalidate all cache entries in a particular address
+ *	space.
+ */
+ENTRY(arm1026_flush_user_cache_all)
+	/* FALLTHROUGH */
+/*
+ *	flush_kern_cache_all()
+ *
+ *	Clean and invalidate the entire cache.
+ */
+ENTRY(arm1026_flush_kern_cache_all)
+	mov	r2, #VM_EXEC
+	mov	ip, #0
+__flush_whole_cache:
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+1:	mrc	p15, 0, r15, c7, c14, 3		@ test, clean, invalidate
+	bne	1b
+#endif
+	tst	r2, #VM_EXEC
+#ifndef CONFIG_CPU_ICACHE_DISABLE
+	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+#endif
+	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	flush_user_cache_range(start, end, flags)
+ *
+ *	Invalidate a range of cache entries in the specified
+ *	address space.
+ *
+ *	- start	- start address (inclusive)
+ *	- end	- end address (exclusive)
+ *	- flags	- vm_flags for this space
+ */
+ENTRY(arm1026_flush_user_cache_range)
+	mov	ip, #0
+	sub	r3, r1, r0			@ calculate total size
+	cmp	r3, #CACHE_DLIMIT
+	bhs	__flush_whole_cache
+
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+#endif
+	tst	r2, #VM_EXEC
+#ifndef CONFIG_CPU_ICACHE_DISABLE
+	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+#endif
+	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	coherent_kern_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm1026_coherent_kern_range)
+	/* FALLTHROUGH */
+/*
+ *	coherent_user_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm1026_coherent_user_range)
+	mov	ip, #0
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+#endif
+#ifndef CONFIG_CPU_ICACHE_DISABLE
+	mcr	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+#endif
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	flush_kern_dcache_page(void *page)
+ *
+ *	Ensure no D cache aliasing occurs, either with itself or
+ *	the I cache
+ *
+ *	- page	- page aligned address
+ */
+ENTRY(arm1026_flush_kern_dcache_page)
+	mov	ip, #0
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	add	r1, r0, #PAGE_SZ
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+#endif
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	dma_inv_range(start, end)
+ *
+ *	Invalidate (discard) the specified virtual address range.
+ *	May not write back any entries.  If 'start' or 'end'
+ *	are not cache line aligned, those lines must be written
+ *	back.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ *
+ * (same as v4wb)
+ */
+ENTRY(arm1026_dma_inv_range)
+	mov	ip, #0
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	tst	r0, #CACHE_DLINESIZE - 1
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, r0, c7, c10, 1		@ clean D entry
+	tst	r1, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, r1, c7, c10, 1		@ clean D entry
+1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+#endif
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	dma_clean_range(start, end)
+ *
+ *	Clean the specified virtual address range.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ *
+ * (same as v4wb)
+ */
+ENTRY(arm1026_dma_clean_range)
+	mov	ip, #0
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+#endif
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	dma_flush_range(start, end)
+ *
+ *	Clean and invalidate the specified virtual address range.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm1026_dma_flush_range)
+	mov	ip, #0
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+#endif
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+ENTRY(arm1026_cache_fns)
+	.long	arm1026_flush_kern_cache_all
+	.long	arm1026_flush_user_cache_all
+	.long	arm1026_flush_user_cache_range
+	.long	arm1026_coherent_kern_range
+	.long	arm1026_coherent_user_range
+	.long	arm1026_flush_kern_dcache_page
+	.long	arm1026_dma_inv_range
+	.long	arm1026_dma_clean_range
+	.long	arm1026_dma_flush_range
+
+	.align	5
+ENTRY(cpu_arm1026_dcache_clean_area)
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	mov	ip, #0
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	subs	r1, r1, #CACHE_DLINESIZE
+	bhi	1b
+#endif
+	mov	pc, lr
+
+/* =============================== PageTable ============================== */
+
+/*
+ * cpu_arm1026_switch_mm(pgd)
+ *
+ * Set the translation base pointer to be as described by pgd.
+ *
+ * pgd: new page tables
+ */
+	.align	5
+ENTRY(cpu_arm1026_switch_mm)
+	mov	r1, #0
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+1:	mrc	p15, 0, r15, c7, c14, 3		@ test, clean, invalidate
+	bne	1b
+#endif
+#ifndef CONFIG_CPU_ICACHE_DISABLE
+	mcr	p15, 0, r1, c7, c5, 0		@ invalidate I cache
+#endif
+	mcr	p15, 0, r1, c7, c10, 4		@ drain WB
+	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
+	mcr	p15, 0, r1, c8, c7, 0		@ invalidate I & D TLBs
+	mov	pc, lr
+        
+/*
+ * cpu_arm1026_set_pte(ptep, pte)
+ *
+ * Set a PTE and flush it out
+ */
+	.align	5
+ENTRY(cpu_arm1026_set_pte)
+	str	r1, [r0], #-2048		@ linux version
+
+	eor	r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY
+
+	bic	r2, r1, #PTE_SMALL_AP_MASK
+	bic	r2, r2, #PTE_TYPE_MASK
+	orr	r2, r2, #PTE_TYPE_SMALL
+
+	tst	r1, #L_PTE_USER			@ User?
+	orrne	r2, r2, #PTE_SMALL_AP_URO_SRW
+
+	tst	r1, #L_PTE_WRITE | L_PTE_DIRTY	@ Write and Dirty?
+	orreq	r2, r2, #PTE_SMALL_AP_UNO_SRW
+
+	tst	r1, #L_PTE_PRESENT | L_PTE_YOUNG	@ Present and Young?
+	movne	r2, #0
+
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+	eor	r3, r1, #0x0a			@ C & small page?
+	tst	r3, #0x0b
+	biceq	r2, r2, #4
+#endif
+	str	r2, [r0]			@ hardware version
+	mov	r0, r0
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+#endif
+	mov	pc, lr
+
+
+	__INIT
+
+	.type	__arm1026_setup, #function
+__arm1026_setup:
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c7		@ invalidate I,D caches on v4
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer on v4
+	mcr	p15, 0, r0, c8, c7		@ invalidate I,D TLBs on v4
+	mcr	p15, 0, r4, c2, c0		@ load page table pointer
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+	mov	r0, #4				@ explicitly disable writeback
+	mcr	p15, 7, r0, c15, c0, 0
+#endif
+	mrc	p15, 0, r0, c1, c0		@ get control register v4
+	ldr	r5, arm1026_cr1_clear
+	bic	r0, r0, r5
+	ldr	r5, arm1026_cr1_set
+	orr	r0, r0, r5
+#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
+	orr	r0, r0, #0x4000 		@ .R.. .... .... ....
+#endif
+	mov	pc, lr
+	.size	__arm1026_setup, . - __arm1026_setup
+
+	/*
+	 *  R
+	 * .RVI ZFRS BLDP WCAM
+	 * .011 1001 ..11 0101
+	 * 
+	 */
+	.type	arm1026_cr1_clear, #object
+	.type	arm1026_cr1_set, #object
+arm1026_cr1_clear:
+	.word	0x7f3f
+arm1026_cr1_set:
+	.word	0x3935
+
+	__INITDATA
+
+/*
+ * Purpose : Function pointers used to access above functions - all calls
+ *	     come through these
+ */
+	.type	arm1026_processor_functions, #object
+arm1026_processor_functions:
+	.word	v5t_early_abort
+	.word	cpu_arm1026_proc_init
+	.word	cpu_arm1026_proc_fin
+	.word	cpu_arm1026_reset
+	.word	cpu_arm1026_do_idle
+	.word	cpu_arm1026_dcache_clean_area
+	.word	cpu_arm1026_switch_mm
+	.word	cpu_arm1026_set_pte
+	.size	arm1026_processor_functions, . - arm1026_processor_functions
+
+	.section .rodata
+
+	.type	cpu_arch_name, #object
+cpu_arch_name:
+	.asciz	"armv5tej"
+	.size	cpu_arch_name, . - cpu_arch_name
+
+	.type	cpu_elf_name, #object
+cpu_elf_name:
+	.asciz	"v5"
+	.size	cpu_elf_name, . - cpu_elf_name
+	.align
+
+	.type	cpu_arm1026_name, #object
+cpu_arm1026_name:
+	.ascii	"ARM1026EJ-S"
+#ifndef CONFIG_CPU_ICACHE_DISABLE
+	.ascii	"i"
+#endif
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	.ascii	"d"
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+	.ascii	"(wt)"
+#else
+	.ascii	"(wb)"
+#endif
+#endif
+#ifndef CONFIG_CPU_BPREDICT_DISABLE
+	.ascii	"B"
+#endif
+#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
+	.ascii	"RR"
+#endif
+	.ascii	"\0"
+	.size	cpu_arm1026_name, . - cpu_arm1026_name
+
+	.align
+
+	.section ".proc.info", #alloc, #execinstr
+
+	.type	__arm1026_proc_info,#object
+__arm1026_proc_info:
+	.long	0x4106a260			@ ARM 1026EJ-S (v5TEJ)
+	.long	0xff0ffff0
+	.long   PMD_TYPE_SECT | \
+		PMD_BIT4 | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	b	__arm1026_setup
+	.long	cpu_arch_name
+	.long	cpu_elf_name
+	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_JAVA
+	.long	cpu_arm1026_name
+	.long	arm1026_processor_functions
+	.long	v4wbi_tlb_fns
+	.long	v4wb_user_fns
+	.long	arm1026_cache_fns
+	.size	__arm1026_proc_info, . - __arm1026_proc_info
--- a/arch/arm/mm/proc-arm6_7.S
+++ b/arch/arm/mm/proc-arm6_7.S
@@ -0,0 +1,404 @@
+/*
+ *  linux/arch/arm/mm/proc-arm6,7.S
+ *
+ *  Copyright (C) 1997-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  These are the low level assembler for performing cache and TLB
+ *  functions on the ARM610 & ARM710.
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/constants.h>
+#include <asm/pgtable.h>
+#include <asm/procinfo.h>
+#include <asm/ptrace.h>
+
+ENTRY(cpu_arm6_dcache_clean_area)
+ENTRY(cpu_arm7_dcache_clean_area)
+		mov	pc, lr
+
+/*
+ * Function: arm6_7_data_abort ()
+ *
+ * Params  : r2 = address of aborted instruction
+ *	   : sp = pointer to registers
+ *
+ * Purpose : obtain information about current aborted instruction
+ *
+ * Returns : r0 = address of abort
+ *	   : r1 = FSR
+ */
+
+ENTRY(cpu_arm7_data_abort)
+	mrc	p15, 0, r1, c5, c0, 0		@ get FSR
+	mrc	p15, 0, r0, c6, c0, 0		@ get FAR
+	ldr	r8, [r0]			@ read arm instruction
+	tst	r8, #1 << 20			@ L = 1 -> write?
+	orreq	r1, r1, #1 << 8			@ yes.
+	and	r7, r8, #15 << 24
+	add	pc, pc, r7, lsr #22		@ Now branch to the relevant processing routine
+	nop
+
+/* 0 */	b	.data_unknown
+/* 1 */	mov	pc, lr				@ swp
+/* 2 */	b	.data_unknown
+/* 3 */	b	.data_unknown
+/* 4 */	b	.data_arm_lateldrpostconst	@ ldr	rd, [rn], #m
+/* 5 */	b	.data_arm_lateldrpreconst	@ ldr	rd, [rn, #m]
+/* 6 */	b	.data_arm_lateldrpostreg	@ ldr	rd, [rn], rm
+/* 7 */	b	.data_arm_lateldrprereg		@ ldr	rd, [rn, rm]
+/* 8 */	b	.data_arm_ldmstm		@ ldm*a	rn, <rlist>
+/* 9 */	b	.data_arm_ldmstm		@ ldm*b	rn, <rlist>
+/* a */	b	.data_unknown
+/* b */	b	.data_unknown
+/* c */	mov	pc, lr				@ ldc	rd, [rn], #m	@ Same as ldr	rd, [rn], #m
+/* d */	mov	pc, lr				@ ldc	rd, [rn, #m]
+/* e */	b	.data_unknown
+/* f */
+.data_unknown:	@ Part of jumptable
+	mov	r0, r2
+	mov	r1, r8
+	mov	r2, sp
+	bl	baddataabort
+	b	ret_from_exception
+
+ENTRY(cpu_arm6_data_abort)
+	mrc	p15, 0, r1, c5, c0, 0		@ get FSR
+	mrc	p15, 0, r0, c6, c0, 0		@ get FAR
+	ldr	r8, [r2]			@ read arm instruction
+	tst	r8, #1 << 20			@ L = 1 -> write?
+	orreq	r1, r1, #1 << 8			@ yes.
+	and	r7, r8, #14 << 24
+	teq	r7, #8 << 24			@ was it ldm/stm
+	movne	pc, lr
+
+.data_arm_ldmstm:
+	tst	r8, #1 << 21			@ check writeback bit
+	moveq	pc, lr				@ no writeback -> no fixup
+	mov	r7, #0x11
+	orr	r7, r7, #0x1100
+	and	r6, r8, r7
+	and	r2, r8, r7, lsl #1
+	add	r6, r6, r2, lsr #1
+	and	r2, r8, r7, lsl #2
+	add	r6, r6, r2, lsr #2
+	and	r2, r8, r7, lsl #3
+	add	r6, r6, r2, lsr #3
+	add	r6, r6, r6, lsr #8
+	add	r6, r6, r6, lsr #4
+	and	r6, r6, #15			@ r6 = no. of registers to transfer.
+	and	r5, r8, #15 << 16		@ Extract 'n' from instruction
+	ldr	r7, [sp, r5, lsr #14]		@ Get register 'Rn'
+	tst	r8, #1 << 23			@ Check U bit
+	subne	r7, r7, r6, lsl #2		@ Undo increment
+	addeq	r7, r7, r6, lsl #2		@ Undo decrement
+	str	r7, [sp, r5, lsr #14]		@ Put register 'Rn'
+	mov	pc, lr
+
+.data_arm_apply_r6_and_rn:
+	and	r5, r8, #15 << 16		@ Extract 'n' from instruction
+	ldr	r7, [sp, r5, lsr #14]		@ Get register 'Rn'
+	tst	r8, #1 << 23			@ Check U bit
+	subne	r7, r7, r6			@ Undo incrmenet
+	addeq	r7, r7, r6			@ Undo decrement
+	str	r7, [sp, r5, lsr #14]		@ Put register 'Rn'
+	mov	pc, lr
+
+.data_arm_lateldrpreconst:
+	tst	r8, #1 << 21			@ check writeback bit
+	moveq	pc, lr				@ no writeback -> no fixup
+.data_arm_lateldrpostconst:
+	movs	r2, r8, lsl #20			@ Get offset
+	moveq	pc, lr				@ zero -> no fixup
+	and	r5, r8, #15 << 16		@ Extract 'n' from instruction
+	ldr	r7, [sp, r5, lsr #14]		@ Get register 'Rn'
+	tst	r8, #1 << 23			@ Check U bit
+	subne	r7, r7, r2, lsr #20		@ Undo increment
+	addeq	r7, r7, r2, lsr #20		@ Undo decrement
+	str	r7, [sp, r5, lsr #14]		@ Put register 'Rn'
+	mov	pc, lr
+
+.data_arm_lateldrprereg:
+	tst	r8, #1 << 21			@ check writeback bit
+	moveq	pc, lr				@ no writeback -> no fixup
+.data_arm_lateldrpostreg:
+	and	r7, r8, #15			@ Extract 'm' from instruction
+	ldr	r6, [sp, r7, lsl #2]		@ Get register 'Rm'
+	mov	r5, r8, lsr #7			@ get shift count
+	ands	r5, r5, #31
+	and	r7, r8, #0x70			@ get shift type
+	orreq	r7, r7, #8			@ shift count = 0
+	add	pc, pc, r7
+	nop
+
+	mov	r6, r6, lsl r5			@ 0: LSL #!0
+	b	.data_arm_apply_r6_and_rn
+	b	.data_arm_apply_r6_and_rn	@ 1: LSL #0
+	nop
+	b	.data_unknown			@ 2: MUL?
+	nop
+	b	.data_unknown			@ 3: MUL?
+	nop
+	mov	r6, r6, lsr r5			@ 4: LSR #!0
+	b	.data_arm_apply_r6_and_rn
+	mov	r6, r6, lsr #32			@ 5: LSR #32
+	b	.data_arm_apply_r6_and_rn
+	b	.data_unknown			@ 6: MUL?
+	nop
+	b	.data_unknown			@ 7: MUL?
+	nop
+	mov	r6, r6, asr r5			@ 8: ASR #!0
+	b	.data_arm_apply_r6_and_rn
+	mov	r6, r6, asr #32			@ 9: ASR #32
+	b	.data_arm_apply_r6_and_rn
+	b	.data_unknown			@ A: MUL?
+	nop
+	b	.data_unknown			@ B: MUL?
+	nop
+	mov	r6, r6, ror r5			@ C: ROR #!0
+	b	.data_arm_apply_r6_and_rn
+	mov	r6, r6, rrx			@ D: RRX
+	b	.data_arm_apply_r6_and_rn
+	b	.data_unknown			@ E: MUL?
+	nop
+	b	.data_unknown			@ F: MUL?
+
+/*
+ * Function: arm6_7_proc_init (void)
+ *	   : arm6_7_proc_fin (void)
+ *
+ * Notes   : This processor does not require these
+ */
+ENTRY(cpu_arm6_proc_init)
+ENTRY(cpu_arm7_proc_init)
+		mov	pc, lr
+
+ENTRY(cpu_arm6_proc_fin)
+ENTRY(cpu_arm7_proc_fin)
+		mov	r0, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
+		msr	cpsr_c, r0
+		mov	r0, #0x31			@ ....S..DP...M
+		mcr	p15, 0, r0, c1, c0, 0		@ disable caches
+		mov	pc, lr
+
+ENTRY(cpu_arm6_do_idle)
+ENTRY(cpu_arm7_do_idle)
+		mov	pc, lr
+
+/*
+ * Function: arm6_7_switch_mm(unsigned long pgd_phys)
+ * Params  : pgd_phys	Physical address of page table
+ * Purpose : Perform a task switch, saving the old processes state, and restoring
+ *	     the new.
+ */
+ENTRY(cpu_arm6_switch_mm)
+ENTRY(cpu_arm7_switch_mm)
+		mov	r1, #0
+		mcr	p15, 0, r1, c7, c0, 0		@ flush cache
+		mcr	p15, 0, r0, c2, c0, 0		@ update page table ptr
+		mcr	p15, 0, r1, c5, c0, 0		@ flush TLBs
+		mov	pc, lr
+
+/*
+ * Function: arm6_7_set_pte(pte_t *ptep, pte_t pte)
+ * Params  : r0 = Address to set
+ *	   : r1 = value to set
+ * Purpose : Set a PTE and flush it out of any WB cache
+ */
+		.align	5
+ENTRY(cpu_arm6_set_pte)
+ENTRY(cpu_arm7_set_pte)
+		str	r1, [r0], #-2048		@ linux version
+
+		eor	r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY
+
+		bic	r2, r1, #PTE_SMALL_AP_MASK
+		bic	r2, r2, #PTE_TYPE_MASK
+		orr	r2, r2, #PTE_TYPE_SMALL
+
+		tst	r1, #L_PTE_USER			@ User?
+		orrne	r2, r2, #PTE_SMALL_AP_URO_SRW
+
+		tst	r1, #L_PTE_WRITE | L_PTE_DIRTY	@ Write and Dirty?
+		orreq	r2, r2, #PTE_SMALL_AP_UNO_SRW
+
+		tst	r1, #L_PTE_PRESENT | L_PTE_YOUNG	@ Present and Young
+		movne	r2, #0
+
+		str	r2, [r0]			@ hardware version
+		mov	pc, lr
+
+/*
+ * Function: _arm6_7_reset
+ * Params  : r0 = address to jump to
+ * Notes   : This sets up everything for a reset
+ */
+ENTRY(cpu_arm6_reset)
+ENTRY(cpu_arm7_reset)
+		mov	r1, #0
+		mcr	p15, 0, r1, c7, c0, 0		@ flush cache
+		mcr	p15, 0, r1, c5, c0, 0		@ flush TLB
+		mov	r1, #0x30
+		mcr	p15, 0, r1, c1, c0, 0		@ turn off MMU etc
+		mov	pc, r0
+
+		__INIT
+
+		.type	__arm6_setup, #function
+__arm6_setup:	mov	r0, #0
+		mcr	p15, 0, r0, c7, c0		@ flush caches on v3
+		mcr	p15, 0, r0, c5, c0		@ flush TLBs on v3
+		mov	r0, #0x3d			@ . ..RS BLDP WCAM
+		orr	r0, r0, #0x100			@ . ..01 0011 1101
+		mov	pc, lr
+		.size	__arm6_setup, . - __arm6_setup
+
+		.type	__arm7_setup, #function
+__arm7_setup:	mov	r0, #0
+		mcr	p15, 0, r0, c7, c0		@ flush caches on v3
+		mcr	p15, 0, r0, c5, c0		@ flush TLBs on v3
+		mcr	p15, 0, r0, c3, c0		@ load domain access register
+		mov	r0, #0x7d			@ . ..RS BLDP WCAM
+		orr	r0, r0, #0x100			@ . ..01 0111 1101
+		mov	pc, lr
+		.size	__arm7_setup, . - __arm7_setup
+
+		__INITDATA
+
+/*
+ * Purpose : Function pointers used to access above functions - all calls
+ *	     come through these
+ */
+		.type	arm6_processor_functions, #object
+ENTRY(arm6_processor_functions)
+		.word	cpu_arm6_data_abort
+		.word	cpu_arm6_proc_init
+		.word	cpu_arm6_proc_fin
+		.word	cpu_arm6_reset
+		.word	cpu_arm6_do_idle
+		.word	cpu_arm6_dcache_clean_area
+		.word	cpu_arm6_switch_mm
+		.word	cpu_arm6_set_pte
+		.size	arm6_processor_functions, . - arm6_processor_functions
+
+/*
+ * Purpose : Function pointers used to access above functions - all calls
+ *	     come through these
+ */
+		.type	arm7_processor_functions, #object
+ENTRY(arm7_processor_functions)
+		.word	cpu_arm7_data_abort
+		.word	cpu_arm7_proc_init
+		.word	cpu_arm7_proc_fin
+		.word	cpu_arm7_reset
+		.word	cpu_arm7_do_idle
+		.word	cpu_arm7_dcache_clean_area
+		.word	cpu_arm7_switch_mm
+		.word	cpu_arm7_set_pte
+		.size	arm7_processor_functions, . - arm7_processor_functions
+
+		.section ".rodata"
+
+		.type	cpu_arch_name, #object
+cpu_arch_name:	.asciz	"armv3"
+		.size	cpu_arch_name, . - cpu_arch_name
+
+		.type	cpu_elf_name, #object
+cpu_elf_name:	.asciz	"v3"
+		.size	cpu_elf_name, . - cpu_elf_name
+
+		.type	cpu_arm6_name, #object
+cpu_arm6_name:	.asciz	"ARM6"
+		.size	cpu_arm6_name, . - cpu_arm6_name
+
+		.type	cpu_arm610_name, #object
+cpu_arm610_name:
+		.asciz	"ARM610"
+		.size	cpu_arm610_name, . - cpu_arm610_name
+
+		.type	cpu_arm7_name, #object
+cpu_arm7_name:	.asciz	"ARM7"
+		.size	cpu_arm7_name, . - cpu_arm7_name
+
+		.type	cpu_arm710_name, #object
+cpu_arm710_name:
+		.asciz	"ARM710"
+		.size	cpu_arm710_name, . - cpu_arm710_name
+
+		.align
+
+		.section ".proc.info", #alloc, #execinstr
+
+		.type	__arm6_proc_info, #object
+__arm6_proc_info:
+		.long	0x41560600
+		.long	0xfffffff0
+		.long	0x00000c1e
+		b	__arm6_setup
+		.long	cpu_arch_name
+		.long	cpu_elf_name
+		.long	HWCAP_SWP | HWCAP_26BIT
+		.long	cpu_arm6_name
+		.long	arm6_processor_functions
+		.long	v3_tlb_fns
+		.long	v3_user_fns
+		.long	v3_cache_fns
+		.size	__arm6_proc_info, . - __arm6_proc_info
+
+		.type	__arm610_proc_info, #object
+__arm610_proc_info:
+		.long	0x41560610
+		.long	0xfffffff0
+		.long	0x00000c1e
+		b	__arm6_setup
+		.long	cpu_arch_name
+		.long	cpu_elf_name
+		.long	HWCAP_SWP | HWCAP_26BIT
+		.long	cpu_arm610_name
+		.long	arm6_processor_functions
+		.long	v3_tlb_fns
+		.long	v3_user_fns
+		.long	v3_cache_fns
+		.size	__arm610_proc_info, . - __arm610_proc_info
+
+		.type	__arm7_proc_info, #object
+__arm7_proc_info:
+		.long	0x41007000
+		.long	0xffffff00
+		.long	0x00000c1e
+		b	__arm7_setup
+		.long	cpu_arch_name
+		.long	cpu_elf_name
+		.long	HWCAP_SWP | HWCAP_26BIT
+		.long	cpu_arm7_name
+		.long	arm7_processor_functions
+		.long	v3_tlb_fns
+		.long	v3_user_fns
+		.long	v3_cache_fns
+		.size	__arm7_proc_info, . - __arm7_proc_info
+
+		.type	__arm710_proc_info, #object
+__arm710_proc_info:
+		.long	0x41007100
+		.long	0xfff8ff00
+		.long   PMD_TYPE_SECT | \
+			PMD_SECT_BUFFERABLE | \
+			PMD_SECT_CACHEABLE | \
+			PMD_BIT4 | \
+			PMD_SECT_AP_WRITE | \
+			PMD_SECT_AP_READ
+		b	__arm7_setup
+		.long	cpu_arch_name
+		.long	cpu_elf_name
+		.long	HWCAP_SWP | HWCAP_26BIT
+		.long	cpu_arm710_name
+		.long	arm7_processor_functions
+		.long	v3_tlb_fns
+		.long	v3_user_fns
+		.long	v3_cache_fns
+		.size	__arm710_proc_info, . - __arm710_proc_info
--- a/arch/arm/mm/proc-arm720.S
+++ b/arch/arm/mm/proc-arm720.S
@@ -0,0 +1,267 @@
+/*
+ *  linux/arch/arm/mm/proc-arm720.S: MMU functions for ARM720
+ *
+ *  Copyright (C) 2000 Steve Hill (sjhill@cotw.com)
+ *                     Rob Scott (rscott@mtrob.fdns.net)
+ *  Copyright (C) 2000 ARM Limited, Deep Blue Solutions Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ *
+ * These are the low level assembler for performing cache and TLB
+ * functions on the ARM720T.  The ARM720T has a writethrough IDC
+ * cache, so we don't need to clean it.
+ *
+ *  Changelog:
+ *   05-09-2000 SJH	Created by moving 720 specific functions
+ *			out of 'proc-arm6,7.S' per RMK discussion
+ *   07-25-2000 SJH	Added idle function.
+ *   08-25-2000	DBS	Updated for integration of ARM Ltd version.
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/constants.h>
+#include <asm/pgtable.h>
+#include <asm/procinfo.h>
+#include <asm/ptrace.h>
+#include <asm/hardware.h>
+
+/*
+ * Function: arm720_proc_init (void)
+ *	   : arm720_proc_fin (void)
+ *
+ * Notes   : This processor does not require these
+ */
+ENTRY(cpu_arm720_dcache_clean_area)
+ENTRY(cpu_arm720_proc_init)
+		mov	pc, lr
+
+ENTRY(cpu_arm720_proc_fin)
+		stmfd	sp!, {lr}
+		mov	ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
+		msr	cpsr_c, ip
+		mrc	p15, 0, r0, c1, c0, 0
+		bic	r0, r0, #0x1000			@ ...i............
+		bic	r0, r0, #0x000e			@ ............wca.
+		mcr	p15, 0, r0, c1, c0, 0		@ disable caches
+		mcr	p15, 0, r1, c7, c7, 0		@ invalidate cache
+		ldmfd	sp!, {pc}
+
+/*
+ * Function: arm720_proc_do_idle(void)
+ * Params  : r0 = unused
+ * Purpose : put the processer in proper idle mode
+ */
+ENTRY(cpu_arm720_do_idle)
+		mov	pc, lr
+
+/*
+ * Function: arm720_switch_mm(unsigned long pgd_phys)
+ * Params  : pgd_phys	Physical address of page table
+ * Purpose : Perform a task switch, saving the old process' state and restoring
+ *	     the new.
+ */
+ENTRY(cpu_arm720_switch_mm)
+		mov	r1, #0
+		mcr	p15, 0, r1, c7, c7, 0		@ invalidate cache
+		mcr	p15, 0, r0, c2, c0, 0		@ update page table ptr
+		mcr	p15, 0, r1, c8, c7, 0		@ flush TLB (v4)
+		mov	pc, lr
+
+/*
+ * Function: arm720_set_pte(pte_t *ptep, pte_t pte)
+ * Params  : r0 = Address to set
+ *	   : r1 = value to set
+ * Purpose : Set a PTE and flush it out of any WB cache
+ */
+		.align	5
+ENTRY(cpu_arm720_set_pte)
+		str	r1, [r0], #-2048		@ linux version
+
+		eor	r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY
+
+		bic	r2, r1, #PTE_SMALL_AP_MASK
+		bic	r2, r2, #PTE_TYPE_MASK
+		orr	r2, r2, #PTE_TYPE_SMALL
+
+		tst	r1, #L_PTE_USER			@ User?
+		orrne	r2, r2, #PTE_SMALL_AP_URO_SRW
+
+		tst	r1, #L_PTE_WRITE | L_PTE_DIRTY	@ Write and Dirty?
+		orreq	r2, r2, #PTE_SMALL_AP_UNO_SRW
+
+		tst	r1, #L_PTE_PRESENT | L_PTE_YOUNG	@ Present and Young
+		movne	r2, #0
+
+		str	r2, [r0]			@ hardware version
+		mov	pc, lr
+
+/*
+ * Function: arm720_reset
+ * Params  : r0 = address to jump to
+ * Notes   : This sets up everything for a reset
+ */
+ENTRY(cpu_arm720_reset)
+		mov	ip, #0
+		mcr	p15, 0, ip, c7, c7, 0		@ invalidate cache
+		mcr	p15, 0, ip, c8, c7, 0		@ flush TLB (v4)
+		mrc	p15, 0, ip, c1, c0, 0		@ get ctrl register
+		bic	ip, ip, #0x000f			@ ............wcam
+		bic	ip, ip, #0x2100			@ ..v....s........
+		mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
+		mov	pc, r0
+
+	__INIT
+
+	.type	__arm710_setup, #function
+__arm710_setup:
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c7, 0		@ invalidate caches
+	mcr	p15, 0, r0, c8, c7, 0		@ flush TLB (v4)
+	mrc	p15, 0, r0, c1, c0		@ get control register
+	ldr	r5, arm710_cr1_clear
+	bic	r0, r0, r5
+	ldr	r5, arm710_cr1_set
+	orr	r0, r0, r5
+	mov	pc, lr				@ __ret (head.S)
+	.size	__arm710_setup, . - __arm710_setup
+
+	/*
+	 *  R
+	 * .RVI ZFRS BLDP WCAM
+	 * .... 0001 ..11 1101
+	 * 
+	 */
+	.type	arm710_cr1_clear, #object
+	.type	arm710_cr1_set, #object
+arm710_cr1_clear:
+	.word	0x0f3f
+arm710_cr1_set:
+	.word	0x013d
+
+	.type	__arm720_setup, #function
+__arm720_setup:
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c7, 0		@ invalidate caches
+	mcr	p15, 0, r0, c8, c7, 0		@ flush TLB (v4)
+	mrc	p15, 0, r0, c1, c0		@ get control register
+	ldr	r5, arm720_cr1_clear
+	bic	r0, r0, r5
+	ldr	r5, arm720_cr1_set
+	orr	r0, r0, r5
+	mov	pc, lr				@ __ret (head.S)
+	.size	__arm720_setup, . - __arm720_setup
+
+	/*
+	 *  R
+	 * .RVI ZFRS BLDP WCAM
+	 * ..1. 1001 ..11 1101
+	 * 
+	 */
+	.type	arm720_cr1_clear, #object
+	.type	arm720_cr1_set, #object
+arm720_cr1_clear:
+	.word	0x2f3f
+arm720_cr1_set:
+	.word	0x213d
+
+		__INITDATA
+
+/*
+ * Purpose : Function pointers used to access above functions - all calls
+ *	     come through these
+ */
+		.type	arm720_processor_functions, #object
+ENTRY(arm720_processor_functions)
+		.word	v4t_late_abort
+		.word	cpu_arm720_proc_init
+		.word	cpu_arm720_proc_fin
+		.word	cpu_arm720_reset
+		.word	cpu_arm720_do_idle
+		.word	cpu_arm720_dcache_clean_area
+		.word	cpu_arm720_switch_mm
+		.word	cpu_arm720_set_pte
+		.size	arm720_processor_functions, . - arm720_processor_functions
+
+		.section ".rodata"
+
+		.type	cpu_arch_name, #object
+cpu_arch_name:	.asciz	"armv4t"
+		.size	cpu_arch_name, . - cpu_arch_name
+
+		.type	cpu_elf_name, #object
+cpu_elf_name:	.asciz	"v4"
+		.size	cpu_elf_name, . - cpu_elf_name
+
+		.type	cpu_arm710_name, #object
+cpu_arm710_name:
+		.asciz	"ARM710T"
+		.size	cpu_arm710_name, . - cpu_arm710_name
+
+		.type	cpu_arm720_name, #object
+cpu_arm720_name:
+		.asciz	"ARM720T"
+		.size	cpu_arm720_name, . - cpu_arm720_name
+
+		.align
+
+/*
+ * See linux/include/asm-arm/procinfo.h for a definition of this structure.
+ */
+	
+		.section ".proc.info", #alloc, #execinstr
+
+		.type	__arm710_proc_info, #object
+__arm710_proc_info:
+		.long	0x41807100				@ cpu_val
+		.long	0xffffff00				@ cpu_mask
+		.long   PMD_TYPE_SECT | \
+			PMD_SECT_BUFFERABLE | \
+			PMD_SECT_CACHEABLE | \
+			PMD_BIT4 | \
+			PMD_SECT_AP_WRITE | \
+			PMD_SECT_AP_READ
+		b	__arm710_setup				@ cpu_flush
+		.long	cpu_arch_name				@ arch_name
+		.long	cpu_elf_name				@ elf_name
+		.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB	@ elf_hwcap
+		.long	cpu_arm710_name				@ name
+		.long	arm720_processor_functions
+		.long	v4_tlb_fns
+		.long	v4wt_user_fns
+		.long	v4_cache_fns
+		.size	__arm710_proc_info, . - __arm710_proc_info
+
+		.type	__arm720_proc_info, #object
+__arm720_proc_info:
+		.long	0x41807200				@ cpu_val
+		.long	0xffffff00				@ cpu_mask
+		.long   PMD_TYPE_SECT | \
+			PMD_SECT_BUFFERABLE | \
+			PMD_SECT_CACHEABLE | \
+			PMD_BIT4 | \
+			PMD_SECT_AP_WRITE | \
+			PMD_SECT_AP_READ
+		b	__arm720_setup				@ cpu_flush
+		.long	cpu_arch_name				@ arch_name
+		.long	cpu_elf_name				@ elf_name
+		.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB	@ elf_hwcap
+		.long	cpu_arm720_name				@ name
+		.long	arm720_processor_functions
+		.long	v4_tlb_fns
+		.long	v4wt_user_fns
+		.long	v4_cache_fns
+		.size	__arm720_proc_info, . - __arm720_proc_info
--- a/arch/arm/mm/proc-arm920.S
+++ b/arch/arm/mm/proc-arm920.S
@@ -0,0 +1,480 @@
+/*
+ *  linux/arch/arm/mm/proc-arm920.S: MMU functions for ARM920
+ *
+ *  Copyright (C) 1999,2000 ARM Limited
+ *  Copyright (C) 2000 Deep Blue Solutions Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ *
+ * These are the low level assembler for performing cache and TLB
+ * functions on the arm920.
+ *
+ *  CONFIG_CPU_ARM920_CPU_IDLE -> nohlt
+ */
+#include <linux/linkage.h>
+#include <linux/config.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/pgtable.h>
+#include <asm/procinfo.h>
+#include <asm/hardware.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include "proc-macros.S"
+
+/*
+ * The size of one data cache line.
+ */
+#define CACHE_DLINESIZE	32
+
+/*
+ * The number of data cache segments.
+ */
+#define CACHE_DSEGMENTS	8
+
+/*
+ * The number of lines in a cache segment.
+ */
+#define CACHE_DENTRIES	64
+
+/*
+ * This is the size at which it becomes more efficient to
+ * clean the whole cache, rather than using the individual
+ * cache line maintainence instructions.
+ */
+#define CACHE_DLIMIT	65536
+
+
+	.text
+/*
+ * cpu_arm920_proc_init()
+ */
+ENTRY(cpu_arm920_proc_init)
+	mov	pc, lr
+
+/*
+ * cpu_arm920_proc_fin()
+ */
+ENTRY(cpu_arm920_proc_fin)
+	stmfd	sp!, {lr}
+	mov	ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
+	msr	cpsr_c, ip
+#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
+	bl	arm920_flush_kern_cache_all
+#else
+	bl	v4wt_flush_kern_cache_all
+#endif
+	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
+	bic	r0, r0, #0x1000			@ ...i............
+	bic	r0, r0, #0x000e			@ ............wca.
+	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
+	ldmfd	sp!, {pc}
+
+/*
+ * cpu_arm920_reset(loc)
+ *
+ * Perform a soft reset of the system.  Put the CPU into the
+ * same state as it would be if it had been reset, and branch
+ * to what would be the reset vector.
+ *
+ * loc: location to jump to for soft reset
+ */
+	.align	5
+ENTRY(cpu_arm920_reset)
+	mov	ip, #0
+	mcr	p15, 0, ip, c7, c7, 0		@ invalidate I,D caches
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
+	mrc	p15, 0, ip, c1, c0, 0		@ ctrl register
+	bic	ip, ip, #0x000f			@ ............wcam
+	bic	ip, ip, #0x1100			@ ...i...s........
+	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
+	mov	pc, r0
+
+/*
+ * cpu_arm920_do_idle()
+ */
+	.align	5
+ENTRY(cpu_arm920_do_idle)
+	mcr	p15, 0, r0, c7, c0, 4		@ Wait for interrupt
+	mov	pc, lr
+
+
+#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
+
+/*
+ *	flush_user_cache_all()
+ *
+ *	Invalidate all cache entries in a particular address
+ *	space.
+ */
+ENTRY(arm920_flush_user_cache_all)
+	/* FALLTHROUGH */
+
+/*
+ *	flush_kern_cache_all()
+ *
+ *	Clean and invalidate the entire cache.
+ */
+ENTRY(arm920_flush_kern_cache_all)
+	mov	r2, #VM_EXEC
+	mov	ip, #0
+__flush_whole_cache:
+	mov	r1, #(CACHE_DSEGMENTS - 1) << 5	@ 8 segments
+1:	orr	r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries
+2:	mcr	p15, 0, r3, c7, c14, 2		@ clean+invalidate D index
+	subs	r3, r3, #1 << 26
+	bcs	2b				@ entries 63 to 0
+	subs	r1, r1, #1 << 5
+	bcs	1b				@ segments 7 to 0
+	tst	r2, #VM_EXEC
+	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	flush_user_cache_range(start, end, flags)
+ *
+ *	Invalidate a range of cache entries in the specified
+ *	address space.
+ *
+ *	- start	- start address (inclusive)
+ *	- end	- end address (exclusive)
+ *	- flags	- vm_flags for address space
+ */
+ENTRY(arm920_flush_user_cache_range)
+	mov	ip, #0
+	sub	r3, r1, r0			@ calculate total size
+	cmp	r3, #CACHE_DLIMIT
+	bhs	__flush_whole_cache
+
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+	tst	r2, #VM_EXEC
+	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	tst	r2, #VM_EXEC
+	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	coherent_kern_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start, end.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm920_coherent_kern_range)
+	/* FALLTHROUGH */
+
+/*
+ *	coherent_user_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start, end.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm920_coherent_user_range)
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	mcr	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	flush_kern_dcache_page(void *page)
+ *
+ *	Ensure no D cache aliasing occurs, either with itself or
+ *	the I cache
+ *
+ *	- addr	- page aligned address
+ */
+ENTRY(arm920_flush_kern_dcache_page)
+	add	r1, r0, #PAGE_SZ
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	dma_inv_range(start, end)
+ *
+ *	Invalidate (discard) the specified virtual address range.
+ *	May not write back any entries.  If 'start' or 'end'
+ *	are not cache line aligned, those lines must be written
+ *	back.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ *
+ * (same as v4wb)
+ */
+ENTRY(arm920_dma_inv_range)
+	tst	r0, #CACHE_DLINESIZE - 1
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, r0, c7, c10, 1		@ clean D entry
+	tst	r1, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, r1, c7, c10, 1		@ clean D entry
+1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	dma_clean_range(start, end)
+ *
+ *	Clean the specified virtual address range.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ *
+ * (same as v4wb)
+ */
+ENTRY(arm920_dma_clean_range)
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	dma_flush_range(start, end)
+ *
+ *	Clean and invalidate the specified virtual address range.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm920_dma_flush_range)
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+ENTRY(arm920_cache_fns)
+	.long	arm920_flush_kern_cache_all
+	.long	arm920_flush_user_cache_all
+	.long	arm920_flush_user_cache_range
+	.long	arm920_coherent_kern_range
+	.long	arm920_coherent_user_range
+	.long	arm920_flush_kern_dcache_page
+	.long	arm920_dma_inv_range
+	.long	arm920_dma_clean_range
+	.long	arm920_dma_flush_range
+
+#endif
+
+
+ENTRY(cpu_arm920_dcache_clean_area)
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	subs	r1, r1, #CACHE_DLINESIZE
+	bhi	1b
+	mov	pc, lr
+
+/* =============================== PageTable ============================== */
+
+/*
+ * cpu_arm920_switch_mm(pgd)
+ *
+ * Set the translation base pointer to be as described by pgd.
+ *
+ * pgd: new page tables
+ */
+	.align	5
+ENTRY(cpu_arm920_switch_mm)
+	mov	ip, #0
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+	mcr	p15, 0, ip, c7, c6, 0		@ invalidate D cache
+#else
+@ && 'Clean & Invalidate whole DCache'
+@ && Re-written to use Index Ops.
+@ && Uses registers r1, r3 and ip
+
+	mov	r1, #(CACHE_DSEGMENTS - 1) << 5	@ 8 segments
+1:	orr	r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries
+2:	mcr	p15, 0, r3, c7, c14, 2		@ clean & invalidate D index
+	subs	r3, r3, #1 << 26
+	bcs	2b				@ entries 63 to 0
+	subs	r1, r1, #1 << 5
+	bcs	1b				@ segments 7 to 0
+#endif
+	mcr	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
+	mov	pc, lr
+
+/*
+ * cpu_arm920_set_pte(ptep, pte)
+ *
+ * Set a PTE and flush it out
+ */
+	.align	5
+ENTRY(cpu_arm920_set_pte)
+	str	r1, [r0], #-2048		@ linux version
+
+	eor	r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY
+
+	bic	r2, r1, #PTE_SMALL_AP_MASK
+	bic	r2, r2, #PTE_TYPE_MASK
+	orr	r2, r2, #PTE_TYPE_SMALL
+
+	tst	r1, #L_PTE_USER			@ User?
+	orrne	r2, r2, #PTE_SMALL_AP_URO_SRW
+
+	tst	r1, #L_PTE_WRITE | L_PTE_DIRTY	@ Write and Dirty?
+	orreq	r2, r2, #PTE_SMALL_AP_UNO_SRW
+
+	tst	r1, #L_PTE_PRESENT | L_PTE_YOUNG	@ Present and Young?
+	movne	r2, #0
+
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+	eor	r3, r2, #0x0a			@ C & small page?
+	tst	r3, #0x0b
+	biceq	r2, r2, #4
+#endif
+	str	r2, [r0]			@ hardware version
+	mov	r0, r0
+	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+	__INIT
+
+	.type	__arm920_setup, #function
+__arm920_setup:
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c7		@ invalidate I,D caches on v4
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer on v4
+	mcr	p15, 0, r0, c8, c7		@ invalidate I,D TLBs on v4
+	mrc	p15, 0, r0, c1, c0		@ get control register v4
+	ldr	r5, arm920_cr1_clear
+	bic	r0, r0, r5
+	ldr	r5, arm920_cr1_set
+	orr	r0, r0, r5
+	mov	pc, lr
+	.size	__arm920_setup, . - __arm920_setup
+
+	/*
+	 *  R
+	 * .RVI ZFRS BLDP WCAM
+	 * ..11 0001 ..11 0101
+	 * 
+	 */
+	.type	arm920_cr1_clear, #object
+	.type	arm920_cr1_set, #object
+arm920_cr1_clear:
+	.word	0x3f3f
+arm920_cr1_set:
+	.word	0x3135
+
+	__INITDATA
+
+/*
+ * Purpose : Function pointers used to access above functions - all calls
+ *	     come through these
+ */
+	.type	arm920_processor_functions, #object
+arm920_processor_functions:
+	.word	v4t_early_abort
+	.word	cpu_arm920_proc_init
+	.word	cpu_arm920_proc_fin
+	.word	cpu_arm920_reset
+	.word   cpu_arm920_do_idle
+	.word	cpu_arm920_dcache_clean_area
+	.word	cpu_arm920_switch_mm
+	.word	cpu_arm920_set_pte
+	.size	arm920_processor_functions, . - arm920_processor_functions
+
+	.section ".rodata"
+
+	.type	cpu_arch_name, #object
+cpu_arch_name:
+	.asciz	"armv4t"
+	.size	cpu_arch_name, . - cpu_arch_name
+
+	.type	cpu_elf_name, #object
+cpu_elf_name:
+	.asciz	"v4"
+	.size	cpu_elf_name, . - cpu_elf_name
+
+	.type	cpu_arm920_name, #object
+cpu_arm920_name:
+	.ascii	"ARM920T"
+#ifndef CONFIG_CPU_ICACHE_DISABLE
+	.ascii	"i"
+#endif
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	.ascii	"d"
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+	.ascii	"(wt)"
+#else
+	.ascii	"(wb)"
+#endif
+#endif
+	.ascii	"\0"
+	.size	cpu_arm920_name, . - cpu_arm920_name
+
+	.align
+
+	.section ".proc.info", #alloc, #execinstr
+
+	.type	__arm920_proc_info,#object
+__arm920_proc_info:
+	.long	0x41009200
+	.long	0xff00fff0
+	.long   PMD_TYPE_SECT | \
+		PMD_SECT_BUFFERABLE | \
+		PMD_SECT_CACHEABLE | \
+		PMD_BIT4 | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	b	__arm920_setup
+	.long	cpu_arch_name
+	.long	cpu_elf_name
+	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB
+	.long	cpu_arm920_name
+	.long	arm920_processor_functions
+	.long	v4wbi_tlb_fns
+	.long	v4wb_user_fns
+#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
+	.long	arm920_cache_fns
+#else
+	.long	v4wt_cache_fns
+#endif
+	.size	__arm920_proc_info, . - __arm920_proc_info
--- a/arch/arm/mm/proc-arm922.S
+++ b/arch/arm/mm/proc-arm922.S
@@ -0,0 +1,484 @@
+/*
+ *  linux/arch/arm/mm/proc-arm922.S: MMU functions for ARM922
+ *
+ *  Copyright (C) 1999,2000 ARM Limited
+ *  Copyright (C) 2000 Deep Blue Solutions Ltd.
+ *  Copyright (C) 2001 Altera Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ *
+ * These are the low level assembler for performing cache and TLB
+ * functions on the arm922.
+ *
+ *  CONFIG_CPU_ARM922_CPU_IDLE -> nohlt
+ */
+#include <linux/linkage.h>
+#include <linux/config.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/pgtable.h>
+#include <asm/procinfo.h>
+#include <asm/hardware.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include "proc-macros.S"
+
+/*
+ * The size of one data cache line.
+ */
+#define CACHE_DLINESIZE	32
+
+/*
+ * The number of data cache segments.
+ */
+#define CACHE_DSEGMENTS	4
+
+/*
+ * The number of lines in a cache segment.
+ */
+#define CACHE_DENTRIES	64
+
+/*
+ * This is the size at which it becomes more efficient to
+ * clean the whole cache, rather than using the individual
+ * cache line maintainence instructions.  (I think this should
+ * be 32768).
+ */
+#define CACHE_DLIMIT	8192
+
+
+	.text
+/*
+ * cpu_arm922_proc_init()
+ */
+ENTRY(cpu_arm922_proc_init)
+	mov	pc, lr
+
+/*
+ * cpu_arm922_proc_fin()
+ */
+ENTRY(cpu_arm922_proc_fin)
+	stmfd	sp!, {lr}
+	mov	ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
+	msr	cpsr_c, ip
+#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
+	bl	arm922_flush_kern_cache_all
+#else
+	bl	v4wt_flush_kern_cache_all
+#endif
+	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
+	bic	r0, r0, #0x1000			@ ...i............
+	bic	r0, r0, #0x000e			@ ............wca.
+	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
+	ldmfd	sp!, {pc}
+
+/*
+ * cpu_arm922_reset(loc)
+ *
+ * Perform a soft reset of the system.  Put the CPU into the
+ * same state as it would be if it had been reset, and branch
+ * to what would be the reset vector.
+ *
+ * loc: location to jump to for soft reset
+ */
+	.align	5
+ENTRY(cpu_arm922_reset)
+	mov	ip, #0
+	mcr	p15, 0, ip, c7, c7, 0		@ invalidate I,D caches
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
+	mrc	p15, 0, ip, c1, c0, 0		@ ctrl register
+	bic	ip, ip, #0x000f			@ ............wcam
+	bic	ip, ip, #0x1100			@ ...i...s........
+	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
+	mov	pc, r0
+
+/*
+ * cpu_arm922_do_idle()
+ */
+	.align	5
+ENTRY(cpu_arm922_do_idle)
+	mcr	p15, 0, r0, c7, c0, 4		@ Wait for interrupt
+	mov	pc, lr
+
+
+#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
+
+/*
+ *	flush_user_cache_all()
+ *
+ *	Clean and invalidate all cache entries in a particular
+ *	address space.
+ */
+ENTRY(arm922_flush_user_cache_all)
+	/* FALLTHROUGH */
+
+/*
+ *	flush_kern_cache_all()
+ *
+ *	Clean and invalidate the entire cache.
+ */
+ENTRY(arm922_flush_kern_cache_all)
+	mov	r2, #VM_EXEC
+	mov	ip, #0
+__flush_whole_cache:
+	mov	r1, #(CACHE_DSEGMENTS - 1) << 5	@ 8 segments
+1:	orr	r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries
+2:	mcr	p15, 0, r3, c7, c14, 2		@ clean+invalidate D index
+	subs	r3, r3, #1 << 26
+	bcs	2b				@ entries 63 to 0
+	subs	r1, r1, #1 << 5
+	bcs	1b				@ segments 7 to 0
+	tst	r2, #VM_EXEC
+	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	flush_user_cache_range(start, end, flags)
+ *
+ *	Clean and invalidate a range of cache entries in the
+ *	specified address range.
+ *
+ *	- start	- start address (inclusive)
+ *	- end	- end address (exclusive)
+ *	- flags	- vm_flags describing address space
+ */
+ENTRY(arm922_flush_user_cache_range)
+	mov	ip, #0
+	sub	r3, r1, r0			@ calculate total size
+	cmp	r3, #CACHE_DLIMIT
+	bhs	__flush_whole_cache
+
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+	tst	r2, #VM_EXEC
+	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	tst	r2, #VM_EXEC
+	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	coherent_kern_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start, end.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm922_coherent_kern_range)
+	/* FALLTHROUGH */
+
+/*
+ *	coherent_user_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start, end.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm922_coherent_user_range)
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	mcr	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	flush_kern_dcache_page(void *page)
+ *
+ *	Ensure no D cache aliasing occurs, either with itself or
+ *	the I cache
+ *
+ *	- addr	- page aligned address
+ */
+ENTRY(arm922_flush_kern_dcache_page)
+	add	r1, r0, #PAGE_SZ
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	dma_inv_range(start, end)
+ *
+ *	Invalidate (discard) the specified virtual address range.
+ *	May not write back any entries.  If 'start' or 'end'
+ *	are not cache line aligned, those lines must be written
+ *	back.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ *
+ * (same as v4wb)
+ */
+ENTRY(arm922_dma_inv_range)
+	tst	r0, #CACHE_DLINESIZE - 1
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, r0, c7, c10, 1		@ clean D entry
+	tst	r1, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, r1, c7, c10, 1		@ clean D entry
+1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	dma_clean_range(start, end)
+ *
+ *	Clean the specified virtual address range.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ *
+ * (same as v4wb)
+ */
+ENTRY(arm922_dma_clean_range)
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	dma_flush_range(start, end)
+ *
+ *	Clean and invalidate the specified virtual address range.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm922_dma_flush_range)
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+ENTRY(arm922_cache_fns)
+	.long	arm922_flush_kern_cache_all
+	.long	arm922_flush_user_cache_all
+	.long	arm922_flush_user_cache_range
+	.long	arm922_coherent_kern_range
+	.long	arm922_coherent_user_range
+	.long	arm922_flush_kern_dcache_page
+	.long	arm922_dma_inv_range
+	.long	arm922_dma_clean_range
+	.long	arm922_dma_flush_range
+
+#endif
+
+
+ENTRY(cpu_arm922_dcache_clean_area)
+#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	subs	r1, r1, #CACHE_DLINESIZE
+	bhi	1b
+#endif
+	mov	pc, lr
+
+/* =============================== PageTable ============================== */
+
+/*
+ * cpu_arm922_switch_mm(pgd)
+ *
+ * Set the translation base pointer to be as described by pgd.
+ *
+ * pgd: new page tables
+ */
+	.align	5
+ENTRY(cpu_arm922_switch_mm)
+	mov	ip, #0
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+	mcr	p15, 0, ip, c7, c6, 0		@ invalidate D cache
+#else
+@ && 'Clean & Invalidate whole DCache'
+@ && Re-written to use Index Ops.
+@ && Uses registers r1, r3 and ip
+
+	mov	r1, #(CACHE_DSEGMENTS - 1) << 5	@ 4 segments
+1:	orr	r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries
+2:	mcr	p15, 0, r3, c7, c14, 2		@ clean & invalidate D index
+	subs	r3, r3, #1 << 26
+	bcs	2b				@ entries 63 to 0
+	subs	r1, r1, #1 << 5
+	bcs	1b				@ segments 7 to 0
+#endif
+	mcr	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
+	mov	pc, lr
+
+/*
+ * cpu_arm922_set_pte(ptep, pte)
+ *
+ * Set a PTE and flush it out
+ */
+	.align	5
+ENTRY(cpu_arm922_set_pte)
+	str	r1, [r0], #-2048		@ linux version
+
+	eor	r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY
+
+	bic	r2, r1, #PTE_SMALL_AP_MASK
+	bic	r2, r2, #PTE_TYPE_MASK
+	orr	r2, r2, #PTE_TYPE_SMALL
+
+	tst	r1, #L_PTE_USER			@ User?
+	orrne	r2, r2, #PTE_SMALL_AP_URO_SRW
+
+	tst	r1, #L_PTE_WRITE | L_PTE_DIRTY	@ Write and Dirty?
+	orreq	r2, r2, #PTE_SMALL_AP_UNO_SRW
+
+	tst	r1, #L_PTE_PRESENT | L_PTE_YOUNG	@ Present and Young?
+	movne	r2, #0
+
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+	eor	r3, r2, #0x0a			@ C & small page?
+	tst	r3, #0x0b
+	biceq	r2, r2, #4
+#endif
+	str	r2, [r0]			@ hardware version
+	mov	r0, r0
+	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+	__INIT
+
+	.type	__arm922_setup, #function
+__arm922_setup:
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c7		@ invalidate I,D caches on v4
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer on v4
+	mcr	p15, 0, r0, c8, c7		@ invalidate I,D TLBs on v4
+	mrc	p15, 0, r0, c1, c0		@ get control register v4
+	ldr	r5, arm922_cr1_clear
+	bic	r0, r0, r5
+	ldr	r5, arm922_cr1_set
+	orr	r0, r0, r5
+	mov	pc, lr
+	.size	__arm922_setup, . - __arm922_setup
+
+	/*
+	 *  R
+	 * .RVI ZFRS BLDP WCAM
+	 * ..11 0001 ..11 0101
+	 * 
+	 */
+	.type	arm922_cr1_clear, #object
+	.type	arm922_cr1_set, #object
+arm922_cr1_clear:
+	.word	0x3f3f
+arm922_cr1_set:
+	.word	0x3135
+
+	__INITDATA
+
+/*
+ * Purpose : Function pointers used to access above functions - all calls
+ *	     come through these
+ */
+	.type	arm922_processor_functions, #object
+arm922_processor_functions:
+	.word	v4t_early_abort
+	.word	cpu_arm922_proc_init
+	.word	cpu_arm922_proc_fin
+	.word	cpu_arm922_reset
+	.word   cpu_arm922_do_idle
+	.word	cpu_arm922_dcache_clean_area
+	.word	cpu_arm922_switch_mm
+	.word	cpu_arm922_set_pte
+	.size	arm922_processor_functions, . - arm922_processor_functions
+
+	.section ".rodata"
+
+	.type	cpu_arch_name, #object
+cpu_arch_name:
+	.asciz	"armv4t"
+	.size	cpu_arch_name, . - cpu_arch_name
+
+	.type	cpu_elf_name, #object
+cpu_elf_name:
+	.asciz	"v4"
+	.size	cpu_elf_name, . - cpu_elf_name
+
+	.type	cpu_arm922_name, #object
+cpu_arm922_name:
+	.ascii	"ARM922T"
+#ifndef CONFIG_CPU_ICACHE_DISABLE
+	.ascii	"i"
+#endif
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	.ascii	"d"
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+	.ascii	"(wt)"
+#else
+	.ascii	"(wb)"
+#endif
+#endif
+	.ascii	"\0"
+	.size	cpu_arm922_name, . - cpu_arm922_name
+
+	.align
+
+	.section ".proc.info", #alloc, #execinstr
+
+	.type	__arm922_proc_info,#object
+__arm922_proc_info:
+	.long	0x41009220
+	.long	0xff00fff0
+	.long   PMD_TYPE_SECT | \
+		PMD_SECT_BUFFERABLE | \
+		PMD_SECT_CACHEABLE | \
+		PMD_BIT4 | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	b	__arm922_setup
+	.long	cpu_arch_name
+	.long	cpu_elf_name
+	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB
+	.long	cpu_arm922_name
+	.long	arm922_processor_functions
+	.long	v4wbi_tlb_fns
+	.long	v4wb_user_fns
+#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
+	.long	arm922_cache_fns
+#else
+	.long	v4wt_cache_fns
+#endif
+	.size	__arm922_proc_info, . - __arm922_proc_info
--- a/arch/arm/mm/proc-arm925.S
+++ b/arch/arm/mm/proc-arm925.S
@@ -0,0 +1,562 @@
+/*
+ *  linux/arch/arm/mm/arm925.S: MMU functions for ARM925
+ *
+ *  Copyright (C) 1999,2000 ARM Limited
+ *  Copyright (C) 2000 Deep Blue Solutions Ltd.
+ *  Copyright (C) 2002 RidgeRun, Inc.
+ *  Copyright (C) 2002-2003 MontaVista Software, Inc.
+ *
+ *  Update for Linux-2.6 and cache flush improvements
+ *  Copyright (C) 2004 Nokia Corporation by Tony Lindgren <tony@atomide.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ *
+ * These are the low level assembler for performing cache and TLB
+ * functions on the arm925.
+ *
+ *  CONFIG_CPU_ARM925_CPU_IDLE -> nohlt
+ *
+ * Some additional notes based on deciphering the TI TRM on OMAP-5910:
+ *
+ * NOTE1: The TI925T Configuration Register bit "D-cache clean and flush
+ *	  entry mode" must be 0 to flush the entries in both segments
+ *	  at once. This is the default value. See TRM 2-20 and 2-24 for
+ *	  more information.
+ *
+ * NOTE2: Default is the "D-cache clean and flush entry mode". It looks
+ *	  like the "Transparent mode" must be on for partial cache flushes
+ *	  to work in this mode. This mode only works with 16-bit external
+ *	  memory. See TRM 2-24 for more information.
+ *
+ * NOTE3: Write-back cache flushing seems to be flakey with devices using
+ *        direct memory access, such as USB OHCI. The workaround is to use
+ *        write-through cache with CONFIG_CPU_DCACHE_WRITETHROUGH (this is
+ *        the default for OMAP-1510).
+ */
+
+#include <linux/linkage.h>
+#include <linux/config.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/pgtable.h>
+#include <asm/procinfo.h>
+#include <asm/hardware.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include "proc-macros.S"
+
+/*
+ * The size of one data cache line.
+ */
+#define CACHE_DLINESIZE	16
+
+/*
+ * The number of data cache segments.
+ */
+#define CACHE_DSEGMENTS	2
+
+/*
+ * The number of lines in a cache segment.
+ */
+#define CACHE_DENTRIES	256
+
+/*
+ * This is the size at which it becomes more efficient to
+ * clean the whole cache, rather than using the individual
+ * cache line maintainence instructions.
+ */
+#define CACHE_DLIMIT	8192
+
+	.text
+/*
+ * cpu_arm925_proc_init()
+ */
+ENTRY(cpu_arm925_proc_init)
+	mov	pc, lr
+
+/*
+ * cpu_arm925_proc_fin()
+ */
+ENTRY(cpu_arm925_proc_fin)
+	stmfd	sp!, {lr}
+	mov	ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
+	msr	cpsr_c, ip
+	bl	arm925_flush_kern_cache_all
+	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
+	bic	r0, r0, #0x1000			@ ...i............
+	bic	r0, r0, #0x000e			@ ............wca.
+	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
+	ldmfd	sp!, {pc}
+
+/*
+ * cpu_arm925_reset(loc)
+ *
+ * Perform a soft reset of the system.  Put the CPU into the
+ * same state as it would be if it had been reset, and branch
+ * to what would be the reset vector.
+ *
+ * loc: location to jump to for soft reset
+ */
+	.align	5
+ENTRY(cpu_arm925_reset)
+	/* Send software reset to MPU and DSP */
+	mov	ip, #0xff000000
+	orr	ip, ip, #0x00fe0000
+	orr	ip, ip, #0x0000ce00
+	mov	r4, #1
+	strh	r4, [ip, #0x10]
+
+	mov	ip, #0
+	mcr	p15, 0, ip, c7, c7, 0		@ invalidate I,D caches
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
+	mrc	p15, 0, ip, c1, c0, 0		@ ctrl register
+	bic	ip, ip, #0x000f			@ ............wcam
+	bic	ip, ip, #0x1100			@ ...i...s........
+	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
+	mov	pc, r0
+
+/*
+ * cpu_arm925_do_idle()
+ *
+ * Called with IRQs disabled
+ */
+	.align	10
+ENTRY(cpu_arm925_do_idle)
+	mov	r0, #0
+	mrc	p15, 0, r1, c1, c0, 0		@ Read control register
+	mcr	p15, 0, r0, c7, c10, 4		@ Drain write buffer
+	bic	r2, r1, #1 << 12
+	mcr	p15, 0, r2, c1, c0, 0		@ Disable I cache
+	mcr	p15, 0, r0, c7, c0, 4		@ Wait for interrupt
+	mcr	p15, 0, r1, c1, c0, 0		@ Restore ICache enable
+	mov	pc, lr
+
+/*
+ *	flush_user_cache_all()
+ *
+ *	Clean and invalidate all cache entries in a particular
+ *	address space.
+ */
+ENTRY(arm925_flush_user_cache_all)
+	/* FALLTHROUGH */
+
+/*
+ *	flush_kern_cache_all()
+ *
+ *	Clean and invalidate the entire cache.
+ */
+ENTRY(arm925_flush_kern_cache_all)
+	mov	r2, #VM_EXEC
+	mov	ip, #0
+__flush_whole_cache:
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+	mcr	p15, 0, ip, c7, c6, 0		@ invalidate D cache
+#else
+	/* Flush entries in both segments at once, see NOTE1 above */
+	mov	r3, #(CACHE_DENTRIES - 1) << 4	@ 256 entries in segment
+2:	mcr	p15, 0, r3, c7, c14, 2		@ clean+invalidate D index
+	subs	r3, r3, #1 << 4
+	bcs	2b				@ entries 255 to 0
+#endif
+	tst	r2, #VM_EXEC
+	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	flush_user_cache_range(start, end, flags)
+ *
+ *	Clean and invalidate a range of cache entries in the
+ *	specified address range.
+ *
+ *	- start	- start address (inclusive)
+ *	- end	- end address (exclusive)
+ *	- flags	- vm_flags describing address space
+ */
+ENTRY(arm925_flush_user_cache_range)
+	mov	ip, #0
+	sub	r3, r1, r0			@ calculate total size
+	cmp	r3, #CACHE_DLIMIT
+	bgt	__flush_whole_cache
+1:	tst	r2, #VM_EXEC
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+#else
+	mcr	p15, 0, r0, c7, c14, 1		@ clean and invalidate D entry
+	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+	mcr	p15, 0, r0, c7, c14, 1		@ clean and invalidate D entry
+	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+#endif
+	cmp	r0, r1
+	blo	1b
+	tst	r2, #VM_EXEC
+	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	coherent_kern_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start, end.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm925_coherent_kern_range)
+	/* FALLTHROUGH */
+
+/*
+ *	coherent_user_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start, end.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm925_coherent_user_range)
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	mcr	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	flush_kern_dcache_page(void *page)
+ *
+ *	Ensure no D cache aliasing occurs, either with itself or
+ *	the I cache
+ *
+ *	- addr	- page aligned address
+ */
+ENTRY(arm925_flush_kern_dcache_page)
+	add	r1, r0, #PAGE_SZ
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	dma_inv_range(start, end)
+ *
+ *	Invalidate (discard) the specified virtual address range.
+ *	May not write back any entries.  If 'start' or 'end'
+ *	are not cache line aligned, those lines must be written
+ *	back.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ *
+ * (same as v4wb)
+ */
+ENTRY(arm925_dma_inv_range)
+#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
+	tst	r0, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, r0, c7, c10, 1		@ clean D entry
+	tst	r1, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, r1, c7, c10, 1		@ clean D entry
+#endif
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	dma_clean_range(start, end)
+ *
+ *	Clean the specified virtual address range.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ *
+ * (same as v4wb)
+ */
+ENTRY(arm925_dma_clean_range)
+#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+#endif
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	dma_flush_range(start, end)
+ *
+ *	Clean and invalidate the specified virtual address range.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm925_dma_flush_range)
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:
+#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
+	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+#else
+	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+#endif
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+ENTRY(arm925_cache_fns)
+	.long	arm925_flush_kern_cache_all
+	.long	arm925_flush_user_cache_all
+	.long	arm925_flush_user_cache_range
+	.long	arm925_coherent_kern_range
+	.long	arm925_coherent_user_range
+	.long	arm925_flush_kern_dcache_page
+	.long	arm925_dma_inv_range
+	.long	arm925_dma_clean_range
+	.long	arm925_dma_flush_range
+
+ENTRY(cpu_arm925_dcache_clean_area)
+#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	subs	r1, r1, #CACHE_DLINESIZE
+	bhi	1b
+#endif
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/* =============================== PageTable ============================== */
+
+/*
+ * cpu_arm925_switch_mm(pgd)
+ *
+ * Set the translation base pointer to be as described by pgd.
+ *
+ * pgd: new page tables
+ */
+	.align	5
+ENTRY(cpu_arm925_switch_mm)
+	mov	ip, #0
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+	mcr	p15, 0, ip, c7, c6, 0		@ invalidate D cache
+#else
+	/* Flush entries in bothe segments at once, see NOTE1 above */
+	mov	r3, #(CACHE_DENTRIES - 1) << 4	@ 256 entries in segment
+2:	mcr	p15, 0, r3, c7, c14, 2		@ clean & invalidate D index
+	subs	r3, r3, #1 << 4
+	bcs	2b				@ entries 255 to 0
+#endif
+	mcr	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
+	mov	pc, lr
+
+/*
+ * cpu_arm925_set_pte(ptep, pte)
+ *
+ * Set a PTE and flush it out
+ */
+	.align	5
+ENTRY(cpu_arm925_set_pte)
+	str	r1, [r0], #-2048		@ linux version
+
+	eor	r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY
+
+	bic	r2, r1, #PTE_SMALL_AP_MASK
+	bic	r2, r2, #PTE_TYPE_MASK
+	orr	r2, r2, #PTE_TYPE_SMALL
+
+	tst	r1, #L_PTE_USER			@ User?
+	orrne	r2, r2, #PTE_SMALL_AP_URO_SRW
+
+	tst	r1, #L_PTE_WRITE | L_PTE_DIRTY	@ Write and Dirty?
+	orreq	r2, r2, #PTE_SMALL_AP_UNO_SRW
+
+	tst	r1, #L_PTE_PRESENT | L_PTE_YOUNG	@ Present and Young?
+	movne	r2, #0
+
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+	eor	r3, r2, #0x0a			@ C & small page?
+	tst	r3, #0x0b
+	biceq	r2, r2, #4
+#endif
+	str	r2, [r0]			@ hardware version
+	mov	r0, r0
+#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
+	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+#endif
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+	__INIT
+
+	.type	__arm925_setup, #function
+__arm925_setup:
+	mov	r0, #0
+#if defined(CONFIG_CPU_ICACHE_STREAMING_DISABLE)
+        orr     r0,r0,#1 << 7
+#endif
+
+	/* Transparent on, D-cache clean & flush mode. See  NOTE2 above */
+        orr     r0,r0,#1 << 1			@ transparent mode on
+        mcr     p15, 0, r0, c15, c1, 0          @ write TI config register
+
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c7		@ invalidate I,D caches on v4
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer on v4
+	mcr	p15, 0, r0, c8, c7		@ invalidate I,D TLBs on v4
+
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+	mov	r0, #4				@ disable write-back on caches explicitly
+	mcr	p15, 7, r0, c15, c0, 0
+#endif
+
+	mrc	p15, 0, r0, c1, c0		@ get control register v4
+	ldr	r5, arm925_cr1_clear
+	bic	r0, r0, r5
+	ldr	r5, arm925_cr1_set
+	orr	r0, r0, r5
+#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
+	orr	r0, r0, #0x4000			@ .1.. .... .... ....
+#endif
+	mov	pc, lr
+	.size	__arm925_setup, . - __arm925_setup
+
+	/*
+	 *  R
+	 * .RVI ZFRS BLDP WCAM
+	 * .011 0001 ..11 1101
+	 * 
+	 */
+	.type	arm925_cr1_clear, #object
+	.type	arm925_cr1_set, #object
+arm925_cr1_clear:
+	.word	0x7f3f
+arm925_cr1_set:
+	.word	0x313d
+
+	__INITDATA
+
+/*
+ * Purpose : Function pointers used to access above functions - all calls
+ *	     come through these
+ */
+	.type	arm925_processor_functions, #object
+arm925_processor_functions:
+	.word	v4t_early_abort
+	.word	cpu_arm925_proc_init
+	.word	cpu_arm925_proc_fin
+	.word	cpu_arm925_reset
+	.word   cpu_arm925_do_idle
+	.word	cpu_arm925_dcache_clean_area
+	.word	cpu_arm925_switch_mm
+	.word	cpu_arm925_set_pte
+	.size	arm925_processor_functions, . - arm925_processor_functions
+
+	.section ".rodata"
+
+	.type	cpu_arch_name, #object
+cpu_arch_name:
+	.asciz	"armv4t"
+	.size	cpu_arch_name, . - cpu_arch_name
+
+	.type	cpu_elf_name, #object
+cpu_elf_name:
+	.asciz	"v4"
+	.size	cpu_elf_name, . - cpu_elf_name
+
+	.type	cpu_arm925_name, #object
+cpu_arm925_name:
+	.ascii	"ARM925T"
+#ifndef CONFIG_CPU_ICACHE_DISABLE
+	.ascii	"i"
+#endif
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	.ascii	"d"
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+	.ascii	"(wt)"
+#else
+	.ascii	"(wb)"
+#endif
+#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
+	.ascii	"RR"
+#endif
+#endif
+	.ascii	"\0"
+	.size	cpu_arm925_name, . - cpu_arm925_name
+
+	.align
+
+	.section ".proc.info", #alloc, #execinstr
+
+	.type	__arm925_proc_info,#object
+__arm925_proc_info:
+	.long	0x54029250
+	.long	0xfffffff0
+	.long   PMD_TYPE_SECT | \
+		PMD_BIT4 | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	b	__arm925_setup
+	.long	cpu_arch_name
+	.long	cpu_elf_name
+	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB
+	.long	cpu_arm925_name
+	.long	arm925_processor_functions
+	.long	v4wbi_tlb_fns
+	.long	v4wb_user_fns
+	.long	arm925_cache_fns
+	.size	__arm925_proc_info, . - __arm925_proc_info
+
+	.type	__arm915_proc_info,#object
+__arm915_proc_info:
+	.long	0x54029150
+	.long	0xfffffff0
+	.long   PMD_TYPE_SECT | \
+		PMD_BIT4 | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	b	__arm925_setup
+	.long	cpu_arch_name
+	.long	cpu_elf_name
+	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB
+	.long	cpu_arm925_name
+	.long	arm925_processor_functions
+	.long	v4wbi_tlb_fns
+	.long	v4wb_user_fns
+	.long	arm925_cache_fns
+	.size	__arm925_proc_info, . - __arm925_proc_info
--- a/arch/arm/mm/proc-arm926.S
+++ b/arch/arm/mm/proc-arm926.S
@@ -0,0 +1,495 @@
+/*
+ *  linux/arch/arm/mm/proc-arm926.S: MMU functions for ARM926EJ-S
+ *
+ *  Copyright (C) 1999-2001 ARM Limited
+ *  Copyright (C) 2000 Deep Blue Solutions Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ *
+ * These are the low level assembler for performing cache and TLB
+ * functions on the arm926.
+ *
+ *  CONFIG_CPU_ARM926_CPU_IDLE -> nohlt
+ */
+#include <linux/linkage.h>
+#include <linux/config.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/pgtable.h>
+#include <asm/procinfo.h>
+#include <asm/hardware.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include "proc-macros.S"
+
+/*
+ * This is the maximum size of an area which will be invalidated
+ * using the single invalidate entry instructions.  Anything larger
+ * than this, and we go for the whole cache.
+ *
+ * This value should be chosen such that we choose the cheapest
+ * alternative.
+ */
+#define CACHE_DLIMIT	16384
+
+/*
+ * the cache line size of the I and D cache
+ */
+#define CACHE_DLINESIZE	32
+
+	.text
+/*
+ * cpu_arm926_proc_init()
+ */
+ENTRY(cpu_arm926_proc_init)
+	mov	pc, lr
+
+/*
+ * cpu_arm926_proc_fin()
+ */
+ENTRY(cpu_arm926_proc_fin)
+	stmfd	sp!, {lr}
+	mov	ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
+	msr	cpsr_c, ip
+	bl	arm926_flush_kern_cache_all
+	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
+	bic	r0, r0, #0x1000			@ ...i............
+	bic	r0, r0, #0x000e			@ ............wca.
+	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
+	ldmfd	sp!, {pc}
+
+/*
+ * cpu_arm926_reset(loc)
+ *
+ * Perform a soft reset of the system.  Put the CPU into the
+ * same state as it would be if it had been reset, and branch
+ * to what would be the reset vector.
+ *
+ * loc: location to jump to for soft reset
+ */
+	.align	5
+ENTRY(cpu_arm926_reset)
+	mov	ip, #0
+	mcr	p15, 0, ip, c7, c7, 0		@ invalidate I,D caches
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
+	mrc	p15, 0, ip, c1, c0, 0		@ ctrl register
+	bic	ip, ip, #0x000f			@ ............wcam
+	bic	ip, ip, #0x1100			@ ...i...s........
+	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
+	mov	pc, r0
+
+/*
+ * cpu_arm926_do_idle()
+ *
+ * Called with IRQs disabled
+ */
+	.align	10
+ENTRY(cpu_arm926_do_idle)
+	mov	r0, #0
+	mrc	p15, 0, r1, c1, c0, 0		@ Read control register
+	mcr	p15, 0, r0, c7, c10, 4		@ Drain write buffer
+	bic	r2, r1, #1 << 12
+	mcr	p15, 0, r2, c1, c0, 0		@ Disable I cache
+	mcr	p15, 0, r0, c7, c0, 4		@ Wait for interrupt
+	mcr	p15, 0, r1, c1, c0, 0		@ Restore ICache enable
+	mov	pc, lr
+
+/*
+ *	flush_user_cache_all()
+ *
+ *	Clean and invalidate all cache entries in a particular
+ *	address space.
+ */
+ENTRY(arm926_flush_user_cache_all)
+	/* FALLTHROUGH */
+
+/*
+ *	flush_kern_cache_all()
+ *
+ *	Clean and invalidate the entire cache.
+ */
+ENTRY(arm926_flush_kern_cache_all)
+	mov	r2, #VM_EXEC
+	mov	ip, #0
+__flush_whole_cache:
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+	mcr	p15, 0, ip, c7, c6, 0		@ invalidate D cache
+#else
+1:	mrc	p15, 0, r15, c7, c14, 3 	@ test,clean,invalidate
+	bne	1b
+#endif
+	tst	r2, #VM_EXEC
+	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	flush_user_cache_range(start, end, flags)
+ *
+ *	Clean and invalidate a range of cache entries in the
+ *	specified address range.
+ *
+ *	- start	- start address (inclusive)
+ *	- end	- end address (exclusive)
+ *	- flags	- vm_flags describing address space
+ */
+ENTRY(arm926_flush_user_cache_range)
+	mov	ip, #0
+	sub	r3, r1, r0			@ calculate total size
+	cmp	r3, #CACHE_DLIMIT
+	bgt	__flush_whole_cache
+1:	tst	r2, #VM_EXEC
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+#else
+	mcr	p15, 0, r0, c7, c14, 1		@ clean and invalidate D entry
+	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+	mcr	p15, 0, r0, c7, c14, 1		@ clean and invalidate D entry
+	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+#endif
+	cmp	r0, r1
+	blo	1b
+	tst	r2, #VM_EXEC
+	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	coherent_kern_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start, end.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm926_coherent_kern_range)
+	/* FALLTHROUGH */
+
+/*
+ *	coherent_user_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start, end.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm926_coherent_user_range)
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	mcr	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	flush_kern_dcache_page(void *page)
+ *
+ *	Ensure no D cache aliasing occurs, either with itself or
+ *	the I cache
+ *
+ *	- addr	- page aligned address
+ */
+ENTRY(arm926_flush_kern_dcache_page)
+	add	r1, r0, #PAGE_SZ
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	dma_inv_range(start, end)
+ *
+ *	Invalidate (discard) the specified virtual address range.
+ *	May not write back any entries.  If 'start' or 'end'
+ *	are not cache line aligned, those lines must be written
+ *	back.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ *
+ * (same as v4wb)
+ */
+ENTRY(arm926_dma_inv_range)
+#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
+	tst	r0, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, r0, c7, c10, 1		@ clean D entry
+	tst	r1, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, r1, c7, c10, 1		@ clean D entry
+#endif
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	dma_clean_range(start, end)
+ *
+ *	Clean the specified virtual address range.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ *
+ * (same as v4wb)
+ */
+ENTRY(arm926_dma_clean_range)
+#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+#endif
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	dma_flush_range(start, end)
+ *
+ *	Clean and invalidate the specified virtual address range.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm926_dma_flush_range)
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:
+#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
+	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+#else
+	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+#endif
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+ENTRY(arm926_cache_fns)
+	.long	arm926_flush_kern_cache_all
+	.long	arm926_flush_user_cache_all
+	.long	arm926_flush_user_cache_range
+	.long	arm926_coherent_kern_range
+	.long	arm926_coherent_user_range
+	.long	arm926_flush_kern_dcache_page
+	.long	arm926_dma_inv_range
+	.long	arm926_dma_clean_range
+	.long	arm926_dma_flush_range
+
+ENTRY(cpu_arm926_dcache_clean_area)
+#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	subs	r1, r1, #CACHE_DLINESIZE
+	bhi	1b
+#endif
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/* =============================== PageTable ============================== */
+
+/*
+ * cpu_arm926_switch_mm(pgd)
+ *
+ * Set the translation base pointer to be as described by pgd.
+ *
+ * pgd: new page tables
+ */
+	.align	5
+ENTRY(cpu_arm926_switch_mm)
+	mov	ip, #0
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+	mcr	p15, 0, ip, c7, c6, 0		@ invalidate D cache
+#else
+@ && 'Clean & Invalidate whole DCache'
+1:	mrc	p15, 0, r15, c7, c14, 3 	@ test,clean,invalidate
+	bne	1b
+#endif
+	mcr	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
+	mov	pc, lr
+
+/*
+ * cpu_arm926_set_pte(ptep, pte)
+ *
+ * Set a PTE and flush it out
+ */
+	.align	5
+ENTRY(cpu_arm926_set_pte)
+	str	r1, [r0], #-2048		@ linux version
+
+	eor	r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY
+
+	bic	r2, r1, #PTE_SMALL_AP_MASK
+	bic	r2, r2, #PTE_TYPE_MASK
+	orr	r2, r2, #PTE_TYPE_SMALL
+
+	tst	r1, #L_PTE_USER			@ User?
+	orrne	r2, r2, #PTE_SMALL_AP_URO_SRW
+
+	tst	r1, #L_PTE_WRITE | L_PTE_DIRTY	@ Write and Dirty?
+	orreq	r2, r2, #PTE_SMALL_AP_UNO_SRW
+
+	tst	r1, #L_PTE_PRESENT | L_PTE_YOUNG	@ Present and Young?
+	movne	r2, #0
+
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+	eor	r3, r2, #0x0a			@ C & small page?
+	tst	r3, #0x0b
+	biceq	r2, r2, #4
+#endif
+	str	r2, [r0]			@ hardware version
+	mov	r0, r0
+#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
+	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+#endif
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+	__INIT
+
+	.type	__arm926_setup, #function
+__arm926_setup:
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c7		@ invalidate I,D caches on v4
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer on v4
+	mcr	p15, 0, r0, c8, c7		@ invalidate I,D TLBs on v4
+
+
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+	mov	r0, #4				@ disable write-back on caches explicitly
+	mcr	p15, 7, r0, c15, c0, 0
+#endif 
+
+	mrc	p15, 0, r0, c1, c0		@ get control register v4
+	ldr	r5, arm926_cr1_clear
+	bic	r0, r0, r5
+	ldr	r5, arm926_cr1_set
+	orr	r0, r0, r5
+#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
+	orr	r0, r0, #0x4000			@ .1.. .... .... ....
+#endif
+	mov	pc, lr
+	.size	__arm926_setup, . - __arm926_setup
+
+	/*
+	 *  R
+	 * .RVI ZFRS BLDP WCAM
+	 * .011 0001 ..11 0101
+	 * 
+	 */
+	.type	arm926_cr1_clear, #object
+	.type	arm926_cr1_set, #object
+arm926_cr1_clear:
+	.word	0x7f3f
+arm926_cr1_set:
+	.word	0x3135
+
+	__INITDATA
+
+/*
+ * Purpose : Function pointers used to access above functions - all calls
+ *	     come through these
+ */
+	.type	arm926_processor_functions, #object
+arm926_processor_functions:
+	.word	v5tj_early_abort
+	.word	cpu_arm926_proc_init
+	.word	cpu_arm926_proc_fin
+	.word	cpu_arm926_reset
+	.word	cpu_arm926_do_idle
+	.word	cpu_arm926_dcache_clean_area
+	.word	cpu_arm926_switch_mm
+	.word	cpu_arm926_set_pte
+	.size	arm926_processor_functions, . - arm926_processor_functions
+
+	.section ".rodata"
+
+	.type	cpu_arch_name, #object
+cpu_arch_name:
+	.asciz	"armv5tej"
+	.size	cpu_arch_name, . - cpu_arch_name
+
+	.type	cpu_elf_name, #object
+cpu_elf_name:
+	.asciz	"v5"
+	.size	cpu_elf_name, . - cpu_elf_name
+
+	.type	cpu_arm926_name, #object
+cpu_arm926_name:
+	.ascii	"ARM926EJ-S"
+#ifndef CONFIG_CPU_ICACHE_DISABLE
+	.ascii	"i"
+#endif
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	.ascii	"d"
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+	.ascii	"(wt)"
+#else
+	.ascii	"(wb)"
+#endif
+#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
+	.ascii	"RR"
+#endif
+#endif
+	.ascii	"\0"
+	.size	cpu_arm926_name, . - cpu_arm926_name
+
+	.align
+
+	.section ".proc.info", #alloc, #execinstr
+
+	.type	__arm926_proc_info,#object
+__arm926_proc_info:
+	.long	0x41069260			@ ARM926EJ-S (v5TEJ)
+	.long	0xff0ffff0
+	.long   PMD_TYPE_SECT | \
+		PMD_SECT_BUFFERABLE | \
+		PMD_SECT_CACHEABLE | \
+		PMD_BIT4 | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	b	__arm926_setup
+	.long	cpu_arch_name
+	.long	cpu_elf_name
+	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_JAVA
+	.long	cpu_arm926_name
+	.long	arm926_processor_functions
+	.long	v4wbi_tlb_fns
+	.long	v4wb_user_fns
+	.long	arm926_cache_fns
+	.size	__arm926_proc_info, . - __arm926_proc_info
--- a/arch/arm/mm/proc-macros.S
+++ b/arch/arm/mm/proc-macros.S
@@ -0,0 +1,51 @@
+/*
+ * We need constants.h for:
+ *  VMA_VM_MM
+ *  VMA_VM_FLAGS
+ *  VM_EXEC
+ */
+#include <asm/constants.h>
+#include <asm/thread_info.h>
+
+/*
+ * vma_vm_mm - get mm pointer from vma pointer (vma->vm_mm)
+ */
+	.macro	vma_vm_mm, rd, rn
+	ldr	\rd, [\rn, #VMA_VM_MM]
+	.endm
+
+/*
+ * vma_vm_flags - get vma->vm_flags
+ */
+	.macro	vma_vm_flags, rd, rn
+	ldr	\rd, [\rn, #VMA_VM_FLAGS]
+	.endm
+
+	.macro	tsk_mm, rd, rn
+	ldr	\rd, [\rn, #TI_TASK]
+	ldr	\rd, [\rd, #TSK_ACTIVE_MM]
+	.endm
+
+/*
+ * act_mm - get current->active_mm
+ */
+	.macro	act_mm, rd
+	bic	\rd, sp, #8128
+	bic	\rd, \rd, #63
+	ldr	\rd, [\rd, #TI_TASK]
+	ldr	\rd, [\rd, #TSK_ACTIVE_MM]
+	.endm
+
+/*
+ * mmid - get context id from mm pointer (mm->context.id)
+ */
+	.macro	mmid, rd, rn
+	ldr	\rd, [\rn, #MM_CONTEXT_ID]
+	.endm
+
+/*
+ * mask_asid - mask the ASID from the context ID
+ */
+	.macro	asid, rd, rn
+	and	\rd, \rn, #255
+	.endm
--- a/arch/arm/mm/proc-sa110.S
+++ b/arch/arm/mm/proc-sa110.S
@@ -0,0 +1,272 @@
+/*
+ *  linux/arch/arm/mm/proc-sa110.S
+ *
+ *  Copyright (C) 1997-2002 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  MMU functions for SA110
+ *
+ *  These are the low level assembler for performing cache and TLB
+ *  functions on the StrongARM-110.
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/constants.h>
+#include <asm/procinfo.h>
+#include <asm/hardware.h>
+#include <asm/pgtable.h>
+#include <asm/ptrace.h>
+
+/*
+ * the cache line size of the I and D cache
+ */
+#define DCACHELINESIZE	32
+#define FLUSH_OFFSET	32768
+
+	.macro flush_110_dcache	rd, ra, re
+	ldr	\rd, =flush_base
+	ldr	\ra, [\rd]
+	eor	\ra, \ra, #FLUSH_OFFSET
+	str	\ra, [\rd]
+	add	\re, \ra, #16384		@ only necessary for 16k
+1001:	ldr	\rd, [\ra], #DCACHELINESIZE
+	teq	\re, \ra
+	bne	1001b
+	.endm
+
+	.data
+flush_base:
+	.long	FLUSH_BASE
+	.text
+
+/*
+ * cpu_sa110_proc_init()
+ */
+ENTRY(cpu_sa110_proc_init)
+	mov	r0, #0
+	mcr	p15, 0, r0, c15, c1, 2		@ Enable clock switching
+	mov	pc, lr
+
+/*
+ * cpu_sa110_proc_fin()
+ */
+ENTRY(cpu_sa110_proc_fin)
+	stmfd	sp!, {lr}
+	mov	ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
+	msr	cpsr_c, ip
+	bl	v4wb_flush_kern_cache_all	@ clean caches
+1:	mov	r0, #0
+	mcr	p15, 0, r0, c15, c2, 2		@ Disable clock switching
+	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
+	bic	r0, r0, #0x1000			@ ...i............
+	bic	r0, r0, #0x000e			@ ............wca.
+	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
+	ldmfd	sp!, {pc}
+
+/*
+ * cpu_sa110_reset(loc)
+ *
+ * Perform a soft reset of the system.  Put the CPU into the
+ * same state as it would be if it had been reset, and branch
+ * to what would be the reset vector.
+ *
+ * loc: location to jump to for soft reset
+ */
+	.align	5
+ENTRY(cpu_sa110_reset)
+	mov	ip, #0
+	mcr	p15, 0, ip, c7, c7, 0		@ invalidate I,D caches
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
+	mrc	p15, 0, ip, c1, c0, 0		@ ctrl register
+	bic	ip, ip, #0x000f			@ ............wcam
+	bic	ip, ip, #0x1100			@ ...i...s........
+	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
+	mov	pc, r0
+
+/*
+ * cpu_sa110_do_idle(type)
+ *
+ * Cause the processor to idle
+ *
+ * type: call type:
+ *   0 = slow idle
+ *   1 = fast idle
+ *   2 = switch to slow processor clock
+ *   3 = switch to fast processor clock
+ */
+	.align	5
+
+ENTRY(cpu_sa110_do_idle)
+	mcr	p15, 0, ip, c15, c2, 2		@ disable clock switching
+	ldr	r1, =UNCACHEABLE_ADDR		@ load from uncacheable loc
+	ldr	r1, [r1, #0]			@ force switch to MCLK
+	mov	r0, r0				@ safety
+	mov	r0, r0				@ safety
+	mov	r0, r0				@ safety
+	mcr	p15, 0, r0, c15, c8, 2		@ Wait for interrupt, cache aligned
+	mov	r0, r0				@ safety
+	mov	r0, r0				@ safety
+	mov	r0, r0				@ safety
+	mcr	p15, 0, r0, c15, c1, 2		@ enable clock switching
+	mov	pc, lr
+
+/* ================================= CACHE ================================ */
+
+/*
+ * cpu_sa110_dcache_clean_area(addr,sz)
+ *
+ * Clean the specified entry of any caches such that the MMU
+ * translation fetches will obtain correct data.
+ *
+ * addr: cache-unaligned virtual address
+ */
+	.align	5
+ENTRY(cpu_sa110_dcache_clean_area)
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #DCACHELINESIZE
+	subs	r1, r1, #DCACHELINESIZE
+	bhi	1b
+	mov	pc, lr
+
+/* =============================== PageTable ============================== */
+
+/*
+ * cpu_sa110_switch_mm(pgd)
+ *
+ * Set the translation base pointer to be as described by pgd.
+ *
+ * pgd: new page tables
+ */
+	.align	5
+ENTRY(cpu_sa110_switch_mm)
+	flush_110_dcache	r3, ip, r1
+	mov	r1, #0
+	mcr	p15, 0, r1, c7, c5, 0		@ invalidate I cache
+	mcr	p15, 0, r1, c7, c10, 4		@ drain WB
+	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
+	mcr	p15, 0, r1, c8, c7, 0		@ invalidate I & D TLBs
+	mov	pc, lr
+
+/*
+ * cpu_sa110_set_pte(ptep, pte)
+ *
+ * Set a PTE and flush it out
+ */
+	.align	5
+ENTRY(cpu_sa110_set_pte)
+	str	r1, [r0], #-2048		@ linux version
+
+	eor	r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY
+
+	bic	r2, r1, #PTE_SMALL_AP_MASK
+	bic	r2, r2, #PTE_TYPE_MASK
+	orr	r2, r2, #PTE_TYPE_SMALL
+
+	tst	r1, #L_PTE_USER			@ User?
+	orrne	r2, r2, #PTE_SMALL_AP_URO_SRW
+
+	tst	r1, #L_PTE_WRITE | L_PTE_DIRTY	@ Write and Dirty?
+	orreq	r2, r2, #PTE_SMALL_AP_UNO_SRW
+
+	tst	r1, #L_PTE_PRESENT | L_PTE_YOUNG	@ Present and Young?
+	movne	r2, #0
+
+	str	r2, [r0]			@ hardware version
+	mov	r0, r0
+	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+	__INIT
+
+	.type	__sa110_setup, #function
+__sa110_setup:
+	mov	r10, #0
+	mcr	p15, 0, r10, c7, c7		@ invalidate I,D caches on v4
+	mcr	p15, 0, r10, c7, c10, 4		@ drain write buffer on v4
+	mcr	p15, 0, r10, c8, c7		@ invalidate I,D TLBs on v4
+	mrc	p15, 0, r0, c1, c0		@ get control register v4
+	ldr	r5, sa110_cr1_clear
+	bic	r0, r0, r5
+	ldr	r5, sa110_cr1_set
+	orr	r0, r0, r5
+	mov	pc, lr
+	.size	__sa110_setup, . - __sa110_setup
+
+	/*
+	 *  R
+	 * .RVI ZFRS BLDP WCAM
+	 * ..01 0001 ..11 1101
+	 * 
+	 */
+	.type	sa110_cr1_clear, #object
+	.type	sa110_cr1_set, #object
+sa110_cr1_clear:
+	.word	0x3f3f
+sa110_cr1_set:
+	.word	0x113d
+
+	__INITDATA
+
+/*
+ * Purpose : Function pointers used to access above functions - all calls
+ *	     come through these
+ */
+
+	.type	sa110_processor_functions, #object
+ENTRY(sa110_processor_functions)
+	.word	v4_early_abort
+	.word	cpu_sa110_proc_init
+	.word	cpu_sa110_proc_fin
+	.word	cpu_sa110_reset
+	.word	cpu_sa110_do_idle
+	.word	cpu_sa110_dcache_clean_area
+	.word	cpu_sa110_switch_mm
+	.word	cpu_sa110_set_pte
+	.size	sa110_processor_functions, . - sa110_processor_functions
+
+	.section ".rodata"
+
+	.type	cpu_arch_name, #object
+cpu_arch_name:
+	.asciz	"armv4"
+	.size	cpu_arch_name, . - cpu_arch_name
+
+	.type	cpu_elf_name, #object
+cpu_elf_name:
+	.asciz	"v4"
+	.size	cpu_elf_name, . - cpu_elf_name
+
+	.type	cpu_sa110_name, #object
+cpu_sa110_name:
+	.asciz	"StrongARM-110"
+	.size	cpu_sa110_name, . - cpu_sa110_name
+
+	.align
+
+	.section ".proc.info", #alloc, #execinstr
+
+	.type	__sa110_proc_info,#object
+__sa110_proc_info:
+	.long	0x4401a100
+	.long	0xfffffff0
+	.long   PMD_TYPE_SECT | \
+		PMD_SECT_BUFFERABLE | \
+		PMD_SECT_CACHEABLE | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	b	__sa110_setup
+	.long	cpu_arch_name
+	.long	cpu_elf_name
+	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_26BIT | HWCAP_FAST_MULT
+	.long	cpu_sa110_name
+	.long	sa110_processor_functions
+	.long	v4wb_tlb_fns
+	.long	v4wb_user_fns
+	.long	v4wb_cache_fns
+	.size	__sa110_proc_info, . - __sa110_proc_info
--- a/arch/arm/mm/proc-sa1100.S
+++ b/arch/arm/mm/proc-sa1100.S
@@ -0,0 +1,323 @@
+/*
+ *  linux/arch/arm/mm/proc-sa1100.S
+ *
+ *  Copyright (C) 1997-2002 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  MMU functions for SA110
+ *
+ *  These are the low level assembler for performing cache and TLB
+ *  functions on the StrongARM-1100 and StrongARM-1110.
+ *
+ *  Note that SA1100 and SA1110 share everything but their name and CPU ID.
+ *
+ *  12-jun-2000, Erik Mouw (J.A.K.Mouw@its.tudelft.nl):
+ *    Flush the read buffer at context switches
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/constants.h>
+#include <asm/procinfo.h>
+#include <asm/hardware.h>
+#include <asm/pgtable.h>
+
+/*
+ * the cache line size of the I and D cache
+ */
+#define DCACHELINESIZE	32
+#define FLUSH_OFFSET	32768
+
+	.macro flush_1100_dcache rd, ra, re
+	ldr	\rd, =flush_base
+	ldr	\ra, [\rd]
+	eor	\ra, \ra, #FLUSH_OFFSET
+	str	\ra, [\rd]
+	add	\re, \ra, #8192			@ only necessary for 8k
+1001:	ldr	\rd, [\ra], #DCACHELINESIZE
+	teq	\re, \ra
+	bne	1001b
+#ifdef FLUSH_BASE_MINICACHE
+	add	\ra, \ra, #FLUSH_BASE_MINICACHE - FLUSH_BASE
+	add	\re, \ra, #512			@ only 512 bytes
+1002:	ldr	\rd, [\ra], #DCACHELINESIZE
+	teq	\re, \ra
+	bne	1002b
+#endif
+	.endm
+
+	.data
+flush_base:
+	.long	FLUSH_BASE
+	.text
+
+	__INIT
+
+/*
+ * cpu_sa1100_proc_init()
+ */
+ENTRY(cpu_sa1100_proc_init)
+	mov	r0, #0
+	mcr	p15, 0, r0, c15, c1, 2		@ Enable clock switching
+	mcr	p15, 0, r0, c9, c0, 5		@ Allow read-buffer operations from userland
+	mov	pc, lr
+
+	.previous
+
+/*
+ * cpu_sa1100_proc_fin()
+ *
+ * Prepare the CPU for reset:
+ *  - Disable interrupts
+ *  - Clean and turn off caches.
+ */
+ENTRY(cpu_sa1100_proc_fin)
+	stmfd	sp!, {lr}
+	mov	ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
+	msr	cpsr_c, ip
+	flush_1100_dcache r0, r1, r2		@ clean caches
+	mov	r0, #0
+	mcr	p15, 0, r0, c15, c2, 2		@ Disable clock switching
+	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
+	bic	r0, r0, #0x1000			@ ...i............
+	bic	r0, r0, #0x000e			@ ............wca.
+	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
+	ldmfd	sp!, {pc}
+
+/*
+ * cpu_sa1100_reset(loc)
+ *
+ * Perform a soft reset of the system.  Put the CPU into the
+ * same state as it would be if it had been reset, and branch
+ * to what would be the reset vector.
+ *
+ * loc: location to jump to for soft reset
+ */
+	.align	5
+ENTRY(cpu_sa1100_reset)
+	mov	ip, #0
+	mcr	p15, 0, ip, c7, c7, 0		@ invalidate I,D caches
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
+	mrc	p15, 0, ip, c1, c0, 0		@ ctrl register
+	bic	ip, ip, #0x000f			@ ............wcam
+	bic	ip, ip, #0x1100			@ ...i...s........
+	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
+	mov	pc, r0
+
+/*
+ * cpu_sa1100_do_idle(type)
+ *
+ * Cause the processor to idle
+ *
+ * type: call type:
+ *   0 = slow idle
+ *   1 = fast idle
+ *   2 = switch to slow processor clock
+ *   3 = switch to fast processor clock
+ */
+	.align	5
+ENTRY(cpu_sa1100_do_idle)
+	mov	r0, r0				@ 4 nop padding
+	mov	r0, r0
+	mov	r0, r0
+	mov	r0, r0				@ 4 nop padding
+	mov	r0, r0
+	mov	r0, r0
+	mov	r0, #0
+	ldr	r1, =UNCACHEABLE_ADDR		@ ptr to uncacheable address
+	@ --- aligned to a cache line
+	mcr	p15, 0, r0, c15, c2, 2		@ disable clock switching
+	ldr	r1, [r1, #0]			@ force switch to MCLK
+	mcr	p15, 0, r0, c15, c8, 2		@ wait for interrupt
+	mov	r0, r0				@ safety
+	mcr	p15, 0, r0, c15, c1, 2		@ enable clock switching
+	mov	pc, lr
+
+/* ================================= CACHE ================================ */
+
+/*
+ * cpu_sa1100_dcache_clean_area(addr,sz)
+ *
+ * Clean the specified entry of any caches such that the MMU
+ * translation fetches will obtain correct data.
+ *
+ * addr: cache-unaligned virtual address
+ */
+	.align	5
+ENTRY(cpu_sa1100_dcache_clean_area)
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #DCACHELINESIZE
+	subs	r1, r1, #DCACHELINESIZE
+	bhi	1b
+	mov	pc, lr
+
+/* =============================== PageTable ============================== */
+
+/*
+ * cpu_sa1100_switch_mm(pgd)
+ *
+ * Set the translation base pointer to be as described by pgd.
+ *
+ * pgd: new page tables
+ */
+	.align	5
+ENTRY(cpu_sa1100_switch_mm)
+	flush_1100_dcache r3, ip, r1
+	mov	ip, #0
+	mcr	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+	mcr	p15, 0, ip, c9, c0, 0		@ invalidate RB
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
+	mov	pc, lr
+
+/*
+ * cpu_sa1100_set_pte(ptep, pte)
+ *
+ * Set a PTE and flush it out
+ */
+	.align	5
+ENTRY(cpu_sa1100_set_pte)
+	str	r1, [r0], #-2048		@ linux version
+
+	eor	r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY
+
+	bic	r2, r1, #PTE_SMALL_AP_MASK
+	bic	r2, r2, #PTE_TYPE_MASK
+	orr	r2, r2, #PTE_TYPE_SMALL
+
+	tst	r1, #L_PTE_USER			@ User?
+	orrne	r2, r2, #PTE_SMALL_AP_URO_SRW
+
+	tst	r1, #L_PTE_WRITE | L_PTE_DIRTY	@ Write and Dirty?
+	orreq	r2, r2, #PTE_SMALL_AP_UNO_SRW
+
+	tst	r1, #L_PTE_PRESENT | L_PTE_YOUNG	@ Present and Young?
+	movne	r2, #0
+
+	str	r2, [r0]			@ hardware version
+	mov	r0, r0
+	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+	__INIT
+
+	.type	__sa1100_setup, #function
+__sa1100_setup:
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c7		@ invalidate I,D caches on v4
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer on v4
+	mcr	p15, 0, r0, c8, c7		@ invalidate I,D TLBs on v4
+	mrc	p15, 0, r0, c1, c0		@ get control register v4
+	ldr	r5, sa1100_cr1_clear
+	bic	r0, r0, r5
+	ldr	r5, sa1100_cr1_set
+	orr	r0, r0, r5
+	mov	pc, lr
+	.size	__sa1100_setup, . - __sa1100_setup
+
+	/*
+	 *  R
+	 * .RVI ZFRS BLDP WCAM
+	 * ..11 0001 ..11 1101
+	 * 
+	 */
+	.type	sa1100_cr1_clear, #object
+	.type	sa1100_cr1_set, #object
+sa1100_cr1_clear:
+	.word	0x3f3f
+sa1100_cr1_set:
+	.word	0x313d
+
+	__INITDATA
+
+/*
+ * Purpose : Function pointers used to access above functions - all calls
+ *	     come through these
+ */
+
+/*
+ * SA1100 and SA1110 share the same function calls
+ */
+	.type	sa1100_processor_functions, #object
+ENTRY(sa1100_processor_functions)
+	.word	v4_early_abort
+	.word	cpu_sa1100_proc_init
+	.word	cpu_sa1100_proc_fin
+	.word	cpu_sa1100_reset
+	.word	cpu_sa1100_do_idle
+	.word	cpu_sa1100_dcache_clean_area
+	.word	cpu_sa1100_switch_mm
+	.word	cpu_sa1100_set_pte
+	.size	sa1100_processor_functions, . - sa1100_processor_functions
+
+	.section ".rodata"
+
+	.type	cpu_arch_name, #object
+cpu_arch_name:
+	.asciz	"armv4"
+	.size	cpu_arch_name, . - cpu_arch_name
+
+	.type	cpu_elf_name, #object
+cpu_elf_name:
+	.asciz	"v4"
+	.size	cpu_elf_name, . - cpu_elf_name
+
+	.type	cpu_sa1100_name, #object
+cpu_sa1100_name:
+	.asciz	"StrongARM-1100"
+	.size	cpu_sa1100_name, . - cpu_sa1100_name
+
+	.type	cpu_sa1110_name, #object
+cpu_sa1110_name:
+	.asciz	"StrongARM-1110"
+	.size	cpu_sa1110_name, . - cpu_sa1110_name
+
+	.align
+
+	.section ".proc.info", #alloc, #execinstr
+
+	.type	__sa1100_proc_info,#object
+__sa1100_proc_info:
+	.long	0x4401a110
+	.long	0xfffffff0
+	.long   PMD_TYPE_SECT | \
+		PMD_SECT_BUFFERABLE | \
+		PMD_SECT_CACHEABLE | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	b	__sa1100_setup
+	.long	cpu_arch_name
+	.long	cpu_elf_name
+	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_26BIT | HWCAP_FAST_MULT
+	.long	cpu_sa1100_name
+	.long	sa1100_processor_functions
+	.long	v4wb_tlb_fns
+	.long	v4_mc_user_fns
+	.long	v4wb_cache_fns
+	.size	__sa1100_proc_info, . - __sa1100_proc_info
+
+	.type	__sa1110_proc_info,#object
+__sa1110_proc_info:
+	.long	0x6901b110
+	.long	0xfffffff0
+	.long   PMD_TYPE_SECT | \
+		PMD_SECT_BUFFERABLE | \
+		PMD_SECT_CACHEABLE | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	b	__sa1100_setup
+	.long	cpu_arch_name
+	.long	cpu_elf_name
+	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_26BIT | HWCAP_FAST_MULT
+	.long	cpu_sa1110_name
+	.long	sa1100_processor_functions
+	.long	v4wb_tlb_fns
+	.long	v4_mc_user_fns
+	.long	v4wb_cache_fns
+	.size	__sa1110_proc_info, . - __sa1110_proc_info
--- a/arch/arm/mm/proc-syms.c
+++ b/arch/arm/mm/proc-syms.c
@@ -0,0 +1,40 @@
+/*
+ *  linux/arch/arm/mm/proc-syms.c
+ *
+ *  Copyright (C) 2000-2002 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/mm.h>
+
+#include <asm/cacheflush.h>
+#include <asm/proc-fns.h>
+#include <asm/tlbflush.h>
+
+#ifndef MULTI_CPU
+EXPORT_SYMBOL(cpu_dcache_clean_area);
+EXPORT_SYMBOL(cpu_set_pte);
+#else
+EXPORT_SYMBOL(processor);
+#endif
+
+#ifndef MULTI_CACHE
+EXPORT_SYMBOL(__cpuc_flush_kern_all);
+EXPORT_SYMBOL(__cpuc_flush_user_all);
+EXPORT_SYMBOL(__cpuc_flush_user_range);
+EXPORT_SYMBOL(__cpuc_coherent_kern_range);
+#else
+EXPORT_SYMBOL(cpu_cache);
+#endif
+
+/*
+ * No module should need to touch the TLB (and currently
+ * no modules do.  We export this for "loadkernel" support
+ * (booting a new kernel from within a running kernel.)
+ */
+#ifdef MULTI_TLB
+EXPORT_SYMBOL(cpu_tlb);
+#endif
--- a/arch/arm/mm/proc-v6.S
+++ b/arch/arm/mm/proc-v6.S
@@ -0,0 +1,272 @@
+/*
+ *  linux/arch/arm/mm/proc-v6.S
+ *
+ *  Copyright (C) 2001 Deep Blue Solutions Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  This is the "shell" of the ARMv6 processor support.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/constants.h>
+#include <asm/procinfo.h>
+#include <asm/pgtable.h>
+
+#include "proc-macros.S"
+
+#define D_CACHE_LINE_SIZE	32
+
+	.macro	cpsie, flags
+	.ifc \flags, f
+	.long	0xf1080040
+	.exitm
+	.endif
+	.ifc \flags, i
+	.long	0xf1080080
+	.exitm
+	.endif
+	.ifc \flags, if
+	.long	0xf10800c0
+	.exitm
+	.endif
+	.err
+	.endm
+
+	.macro	cpsid, flags
+	.ifc \flags, f
+	.long	0xf10c0040
+	.exitm
+	.endif
+	.ifc \flags, i
+	.long	0xf10c0080
+	.exitm
+	.endif
+	.ifc \flags, if
+	.long	0xf10c00c0
+	.exitm
+	.endif
+	.err
+	.endm
+
+ENTRY(cpu_v6_proc_init)
+	mov	pc, lr
+
+ENTRY(cpu_v6_proc_fin)
+	mov	pc, lr
+
+/*
+ *	cpu_v6_reset(loc)
+ *
+ *	Perform a soft reset of the system.  Put the CPU into the
+ *	same state as it would be if it had been reset, and branch
+ *	to what would be the reset vector.
+ *
+ *	- loc   - location to jump to for soft reset
+ *
+ *	It is assumed that:
+ */
+	.align	5
+ENTRY(cpu_v6_reset)
+	mov	pc, r0
+
+/*
+ *	cpu_v6_do_idle()
+ *
+ *	Idle the processor (eg, wait for interrupt).
+ *
+ *	IRQs are already disabled.
+ */
+ENTRY(cpu_v6_do_idle)
+	mcr	p15, 0, r1, c7, c0, 4		@ wait for interrupt
+	mov	pc, lr
+
+ENTRY(cpu_v6_dcache_clean_area)
+#ifndef TLB_CAN_READ_FROM_L1_CACHE
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #D_CACHE_LINE_SIZE
+	subs	r1, r1, #D_CACHE_LINE_SIZE
+	bhi	1b
+#endif
+	mov	pc, lr
+
+/*
+ *	cpu_arm926_switch_mm(pgd_phys, tsk)
+ *
+ *	Set the translation table base pointer to be pgd_phys
+ *
+ *	- pgd_phys - physical address of new TTB
+ *
+ *	It is assumed that:
+ *	- we are not using split page tables
+ */
+ENTRY(cpu_v6_switch_mm)
+	mov	r2, #0
+	ldr	r1, [r1, #MM_CONTEXT_ID]	@ get mm->context.id
+	mcr     p15, 0, r2, c7, c5, 6           @ flush BTAC/BTB
+	mcr	p15, 0, r2, c7, c10, 4		@ drain write buffer
+	mcr	p15, 0, r0, c2, c0, 0		@ set TTB 0
+	mcr	p15, 0, r1, c13, c0, 1		@ set context ID
+	mov	pc, lr
+
+#define nG	(1 << 11)
+#define APX	(1 << 9)
+#define AP1	(1 << 5)
+#define AP0	(1 << 4)
+#define XN	(1 << 0)
+
+/*
+ *	cpu_v6_set_pte(ptep, pte)
+ *
+ *	Set a level 2 translation table entry.
+ *
+ *	- ptep  - pointer to level 2 translation table entry
+ *		  (hardware version is stored at -1024 bytes)
+ *	- pte   - PTE value to store
+ *
+ *	Permissions:
+ *	  YUWD  APX AP1 AP0	SVC	User
+ *	  0xxx   0   0   0	no acc	no acc
+ *	  100x   1   0   1	r/o	no acc
+ *	  10x0   1   0   1	r/o	no acc
+ *	  1011   0   0   1	r/w	no acc
+ *	  110x   1   1   0	r/o	r/o
+ *	  11x0   1   1   0	r/o	r/o
+ *	  1111   0   1   1	r/w	r/w
+ */
+ENTRY(cpu_v6_set_pte)
+	str	r1, [r0], #-2048		@ linux version
+
+	bic	r2, r1, #0x00000ff0
+	bic	r2, r2, #0x00000003
+	orr	r2, r2, #AP0 | 2
+
+	tst	r1, #L_PTE_WRITE
+	tstne	r1, #L_PTE_DIRTY
+	orreq	r2, r2, #APX
+
+	tst	r1, #L_PTE_USER
+	orrne	r2, r2, #AP1 | nG
+	tstne	r2, #APX
+	eorne	r2, r2, #AP0
+
+	tst	r1, #L_PTE_YOUNG
+	biceq	r2, r2, #APX | AP1 | AP0
+
+@	tst	r1, #L_PTE_EXEC
+@	orreq	r2, r2, #XN
+
+	tst	r1, #L_PTE_PRESENT
+	moveq	r2, #0
+
+	str	r2, [r0]
+	mcr	p15, 0, r0, c7, c10, 1 @ flush_pte
+	mov	pc, lr
+
+
+
+
+cpu_v6_name:
+	.asciz	"Some Random V6 Processor"
+	.align
+
+	.section ".text.init", #alloc, #execinstr
+
+/*
+ *	__v6_setup
+ *
+ *	Initialise TLB, Caches, and MMU state ready to switch the MMU
+ *	on.  Return in r0 the new CP15 C1 control register setting.
+ *
+ *	We automatically detect if we have a Harvard cache, and use the
+ *	Harvard cache control instructions insead of the unified cache
+ *	control instructions.
+ *
+ *	This should be able to cover all ARMv6 cores.
+ *
+ *	It is assumed that:
+ *	- cache type register is implemented
+ */
+__v6_setup:
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c14, 0		@ clean+invalidate D cache
+	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
+	mcr	p15, 0, r0, c7, c15, 0		@ clean+invalidate cache
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
+	mcr	p15, 0, r0, c8, c7, 0		@ invalidate I + D TLBs
+	mcr	p15, 0, r0, c2, c0, 2		@ TTB control register
+	mcr	p15, 0, r4, c2, c0, 1		@ load TTB1
+#ifdef CONFIG_VFP
+	mrc	p15, 0, r0, c1, c0, 2
+	orr	r0, r0, #(3 << 20)
+	mcr	p15, 0, r0, c1, c0, 2		@ Enable full access to VFP
+#endif
+	mrc	p15, 0, r0, c1, c0, 0		@ read control register
+	ldr	r5, v6_cr1_clear		@ get mask for bits to clear
+	bic	r0, r0, r5			@ clear bits them
+	ldr	r5, v6_cr1_set			@ get mask for bits to set
+	orr	r0, r0, r5			@ set them
+	mov	pc, lr				@ return to head.S:__ret
+
+	/*
+	 *         V X F   I D LR
+	 * .... ...E PUI. .T.T 4RVI ZFRS BLDP WCAM
+	 * rrrr rrrx xxx0 0101 xxxx xxxx x111 xxxx < forced
+	 *         0 110       0011 1.00 .111 1101 < we want
+	 */
+	.type	v6_cr1_clear, #object
+	.type	v6_cr1_set, #object
+v6_cr1_clear:
+	.word	0x01e0fb7f
+v6_cr1_set:
+	.word	0x00c0387d
+
+	.type	v6_processor_functions, #object
+ENTRY(v6_processor_functions)
+	.word	v6_early_abort
+	.word	cpu_v6_proc_init
+	.word	cpu_v6_proc_fin
+	.word	cpu_v6_reset
+	.word	cpu_v6_do_idle
+	.word	cpu_v6_dcache_clean_area
+	.word	cpu_v6_switch_mm
+	.word	cpu_v6_set_pte
+	.size	v6_processor_functions, . - v6_processor_functions
+
+	.type	cpu_arch_name, #object
+cpu_arch_name:
+	.asciz	"armv6"
+	.size	cpu_arch_name, . - cpu_arch_name
+
+	.type	cpu_elf_name, #object
+cpu_elf_name:
+	.asciz	"v6"
+	.size	cpu_elf_name, . - cpu_elf_name
+	.align
+
+	.section ".proc.info", #alloc, #execinstr
+
+	/*
+	 * Match any ARMv6 processor core.
+	 */
+	.type	__v6_proc_info, #object
+__v6_proc_info:
+	.long	0x0007b000
+	.long	0x0007f000
+	.long   PMD_TYPE_SECT | \
+		PMD_SECT_BUFFERABLE | \
+		PMD_SECT_CACHEABLE | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	b	__v6_setup
+	.long	cpu_arch_name
+	.long	cpu_elf_name
+	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_VFP|HWCAP_EDSP|HWCAP_JAVA
+	.long	cpu_v6_name
+	.long	v6_processor_functions
+	.long	v6wbi_tlb_fns
+	.long	v6_user_fns
+	.long	v6_cache_fns
+	.size	__v6_proc_info, . - __v6_proc_info
--- a/arch/arm/mm/proc-xscale.S
+++ b/arch/arm/mm/proc-xscale.S
@@ -0,0 +1,934 @@
+/*
+ *  linux/arch/arm/mm/proc-xscale.S
+ *
+ *  Author:	Nicolas Pitre
+ *  Created:	November 2000
+ *  Copyright:	(C) 2000, 2001 MontaVista Software Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * MMU functions for the Intel XScale CPUs
+ *
+ * 2001 Aug 21:
+ *	some contributions by Brett Gaines <brett.w.gaines@intel.com>
+ *	Copyright 2001 by Intel Corp.
+ *
+ * 2001 Sep 08:
+ *	Completely revisited, many important fixes
+ *	Nicolas Pitre <nico@cam.org>
+ */
+
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/procinfo.h>
+#include <asm/hardware.h>
+#include <asm/pgtable.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include "proc-macros.S"
+
+/*
+ * This is the maximum size of an area which will be flushed.  If the area
+ * is larger than this, then we flush the whole cache
+ */
+#define MAX_AREA_SIZE	32768
+
+/*
+ * the cache line size of the I and D cache
+ */
+#define CACHELINESIZE	32
+
+/*
+ * the size of the data cache
+ */
+#define CACHESIZE	32768
+
+/*
+ * Virtual address used to allocate the cache when flushed
+ *
+ * This must be an address range which is _never_ used.  It should
+ * apparently have a mapping in the corresponding page table for
+ * compatibility with future CPUs that _could_ require it.  For instance we
+ * don't care.
+ *
+ * This must be aligned on a 2*CACHESIZE boundary.  The code selects one of
+ * the 2 areas in alternance each time the clean_d_cache macro is used.
+ * Without this the XScale core exhibits cache eviction problems and no one
+ * knows why.
+ *
+ * Reminder: the vector table is located at 0xffff0000-0xffff0fff.
+ */
+#define CLEAN_ADDR	0xfffe0000
+
+/*
+ * This macro is used to wait for a CP15 write and is needed
+ * when we have to ensure that the last operation to the co-pro
+ * was completed before continuing with operation.
+ */
+	.macro	cpwait, rd
+	mrc	p15, 0, \rd, c2, c0, 0		@ arbitrary read of cp15
+	mov	\rd, \rd			@ wait for completion
+	sub 	pc, pc, #4			@ flush instruction pipeline
+	.endm
+
+	.macro	cpwait_ret, lr, rd
+	mrc	p15, 0, \rd, c2, c0, 0		@ arbitrary read of cp15
+	sub	pc, \lr, \rd, LSR #32		@ wait for completion and
+						@ flush instruction pipeline
+	.endm
+
+/*
+ * This macro cleans the entire dcache using line allocate.
+ * The main loop has been unrolled to reduce loop overhead.
+ * rd and rs are two scratch registers.
+ */
+	.macro  clean_d_cache, rd, rs
+	ldr	\rs, =clean_addr
+	ldr	\rd, [\rs]
+	eor	\rd, \rd, #CACHESIZE
+	str	\rd, [\rs]
+	add	\rs, \rd, #CACHESIZE
+1:	mcr	p15, 0, \rd, c7, c2, 5		@ allocate D cache line
+	add	\rd, \rd, #CACHELINESIZE
+	mcr	p15, 0, \rd, c7, c2, 5		@ allocate D cache line
+	add	\rd, \rd, #CACHELINESIZE
+	mcr	p15, 0, \rd, c7, c2, 5		@ allocate D cache line
+	add	\rd, \rd, #CACHELINESIZE
+	mcr	p15, 0, \rd, c7, c2, 5		@ allocate D cache line
+	add	\rd, \rd, #CACHELINESIZE
+	teq	\rd, \rs
+	bne	1b
+	.endm
+
+	.data
+clean_addr:	.word	CLEAN_ADDR
+
+	.text
+
+/*
+ * cpu_xscale_proc_init()
+ *
+ * Nothing too exciting at the moment
+ */
+ENTRY(cpu_xscale_proc_init)
+	mov	pc, lr
+
+/*
+ * cpu_xscale_proc_fin()
+ */
+ENTRY(cpu_xscale_proc_fin)
+	str	lr, [sp, #-4]!
+	mov	r0, #PSR_F_BIT|PSR_I_BIT|SVC_MODE
+	msr	cpsr_c, r0
+	bl	xscale_flush_kern_cache_all	@ clean caches
+	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
+	bic	r0, r0, #0x1800			@ ...IZ...........
+	bic	r0, r0, #0x0006			@ .............CA.
+	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
+	ldr	pc, [sp], #4
+
+/*
+ * cpu_xscale_reset(loc)
+ *
+ * Perform a soft reset of the system.  Put the CPU into the
+ * same state as it would be if it had been reset, and branch
+ * to what would be the reset vector.
+ *
+ * loc: location to jump to for soft reset
+ */
+	.align	5
+ENTRY(cpu_xscale_reset)
+	mov	r1, #PSR_F_BIT|PSR_I_BIT|SVC_MODE
+	msr	cpsr_c, r1			@ reset CPSR
+	mrc	p15, 0, r1, c1, c0, 0		@ ctrl register
+	bic	r1, r1, #0x0086			@ ........B....CA.
+	bic	r1, r1, #0x3900			@ ..VIZ..S........
+	mcr	p15, 0, r1, c1, c0, 0		@ ctrl register
+	mcr	p15, 0, ip, c7, c7, 0		@ invalidate I,D caches & BTB
+	bic	r1, r1, #0x0001			@ ...............M
+	mcr	p15, 0, r1, c1, c0, 0		@ ctrl register
+	@ CAUTION: MMU turned off from this point. We count on the pipeline
+	@ already containing those two last instructions to survive.
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
+	mov	pc, r0
+
+/*
+ * cpu_xscale_do_idle()
+ *
+ * Cause the processor to idle
+ *
+ * For now we do nothing but go to idle mode for every case
+ *
+ * XScale supports clock switching, but using idle mode support
+ * allows external hardware to react to system state changes.
+ */
+	.align	5
+
+ENTRY(cpu_xscale_do_idle)
+	mov	r0, #1
+	mcr	p14, 0, r0, c7, c0, 0		@ Go to IDLE
+	mov	pc, lr
+
+/* ================================= CACHE ================================ */
+
+/*
+ *	flush_user_cache_all()
+ *
+ *	Invalidate all cache entries in a particular address
+ *	space.
+ */
+ENTRY(xscale_flush_user_cache_all)
+	/* FALLTHROUGH */
+
+/*
+ *	flush_kern_cache_all()
+ *
+ *	Clean and invalidate the entire cache.
+ */
+ENTRY(xscale_flush_kern_cache_all)
+	mov	r2, #VM_EXEC
+	mov	ip, #0
+__flush_whole_cache:
+	clean_d_cache r0, r1
+	tst	r2, #VM_EXEC
+	mcrne	p15, 0, ip, c7, c5, 0		@ Invalidate I cache & BTB
+	mcrne	p15, 0, ip, c7, c10, 4		@ Drain Write (& Fill) Buffer
+	mov	pc, lr
+
+/*
+ *	flush_user_cache_range(start, end, vm_flags)
+ *
+ *	Invalidate a range of cache entries in the specified
+ *	address space.
+ *
+ *	- start - start address (may not be aligned)
+ *	- end	- end address (exclusive, may not be aligned)
+ *	- vma	- vma_area_struct describing address space
+ */
+	.align	5
+ENTRY(xscale_flush_user_cache_range)
+	mov	ip, #0
+	sub	r3, r1, r0			@ calculate total size
+	cmp	r3, #MAX_AREA_SIZE
+	bhs	__flush_whole_cache
+
+1:	tst	r2, #VM_EXEC
+	mcrne	p15, 0, r0, c7, c5, 1		@ Invalidate I cache line
+	mcr	p15, 0, r0, c7, c10, 1		@ Clean D cache line
+	mcr	p15, 0, r0, c7, c6, 1		@ Invalidate D cache line
+	add	r0, r0, #CACHELINESIZE
+	cmp	r0, r1
+	blo	1b
+	tst	r2, #VM_EXEC
+	mcrne	p15, 0, ip, c7, c5, 6		@ Invalidate BTB
+	mcrne	p15, 0, ip, c7, c10, 4		@ Drain Write (& Fill) Buffer
+	mov	pc, lr
+
+/*
+ *	coherent_kern_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ *
+ *	Note: single I-cache line invalidation isn't used here since
+ *	it also trashes the mini I-cache used by JTAG debuggers.
+ */
+ENTRY(xscale_coherent_kern_range)
+	/* FALLTHROUGH */
+
+/*
+ *	coherent_user_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ *
+ *	Note: single I-cache line invalidation isn't used here since
+ *	it also trashes the mini I-cache used by JTAG debuggers.
+ */
+ENTRY(xscale_coherent_user_range)
+	bic	r0, r0, #CACHELINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHELINESIZE
+	cmp	r0, r1
+	blo	1b
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 0		@ Invalidate I cache & BTB
+	mcr	p15, 0, r0, c7, c10, 4		@ Drain Write (& Fill) Buffer
+	mov	pc, lr
+
+/*
+ *	flush_kern_dcache_page(void *page)
+ *
+ *	Ensure no D cache aliasing occurs, either with itself or
+ *	the I cache
+ *
+ *	- addr	- page aligned address
+ */
+ENTRY(xscale_flush_kern_dcache_page)
+	add	r1, r0, #PAGE_SZ
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+	add	r0, r0, #CACHELINESIZE
+	cmp	r0, r1
+	blo	1b
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 0		@ Invalidate I cache & BTB
+	mcr	p15, 0, r0, c7, c10, 4		@ Drain Write (& Fill) Buffer
+	mov	pc, lr
+
+/*
+ *	dma_inv_range(start, end)
+ *
+ *	Invalidate (discard) the specified virtual address range.
+ *	May not write back any entries.  If 'start' or 'end'
+ *	are not cache line aligned, those lines must be written
+ *	back.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+ENTRY(xscale_dma_inv_range)
+	mrc	p15, 0, r2, c0, c0, 0		@ read ID
+	eor	r2, r2, #0x69000000
+	eor	r2, r2, #0x00052000
+	bics	r2, r2, #1
+	beq	xscale_dma_flush_range
+
+	tst	r0, #CACHELINESIZE - 1
+	bic	r0, r0, #CACHELINESIZE - 1
+	mcrne	p15, 0, r0, c7, c10, 1		@ clean D entry
+	tst	r1, #CACHELINESIZE - 1
+	mcrne	p15, 0, r1, c7, c10, 1		@ clean D entry
+1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+	add	r0, r0, #CACHELINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ Drain Write (& Fill) Buffer
+	mov	pc, lr
+
+/*
+ *	dma_clean_range(start, end)
+ *
+ *	Clean the specified virtual address range.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+ENTRY(xscale_dma_clean_range)
+	bic	r0, r0, #CACHELINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHELINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ Drain Write (& Fill) Buffer
+	mov	pc, lr
+
+/*
+ *	dma_flush_range(start, end)
+ *
+ *	Clean and invalidate the specified virtual address range.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+ENTRY(xscale_dma_flush_range)
+	bic	r0, r0, #CACHELINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+	add	r0, r0, #CACHELINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ Drain Write (& Fill) Buffer
+	mov	pc, lr
+
+ENTRY(xscale_cache_fns)
+	.long	xscale_flush_kern_cache_all
+	.long	xscale_flush_user_cache_all
+	.long	xscale_flush_user_cache_range
+	.long	xscale_coherent_kern_range
+	.long	xscale_coherent_user_range
+	.long	xscale_flush_kern_dcache_page
+	.long	xscale_dma_inv_range
+	.long	xscale_dma_clean_range
+	.long	xscale_dma_flush_range
+
+ENTRY(cpu_xscale_dcache_clean_area)
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHELINESIZE
+	subs	r1, r1, #CACHELINESIZE
+	bhi	1b
+	mov	pc, lr
+
+/* ================================ CACHE LOCKING============================
+ *
+ * The XScale MicroArchitecture implements support for locking entries into
+ * the data and instruction cache.  The following functions implement the core
+ * low level instructions needed to accomplish the locking.  The developer's
+ * manual states that the code that performs the locking must be in non-cached
+ * memory.  To accomplish this, the code in xscale-cache-lock.c copies the
+ * following functions from the cache into a non-cached memory region that
+ * is allocated through consistent_alloc().
+ *
+ */
+	.align	5
+/*
+ * xscale_icache_lock
+ *
+ * r0: starting address to lock
+ * r1: end address to lock
+ */
+ENTRY(xscale_icache_lock)
+
+iLockLoop:
+	bic	r0, r0, #CACHELINESIZE - 1
+	mcr	p15, 0, r0, c9, c1, 0	@ lock into cache
+	cmp	r0, r1			@ are we done?
+	add	r0, r0, #CACHELINESIZE	@ advance to next cache line
+	bls	iLockLoop
+	mov	pc, lr
+
+/*
+ * xscale_icache_unlock
+ */
+ENTRY(xscale_icache_unlock)
+	mcr	p15, 0, r0, c9, c1, 1	@ Unlock icache
+	mov	pc, lr
+
+/*
+ * xscale_dcache_lock
+ *
+ * r0: starting address to lock
+ * r1: end address to lock
+ */
+ENTRY(xscale_dcache_lock)
+	mcr	p15, 0, ip, c7, c10, 4		@ Drain Write (& Fill) Buffer
+	mov	r2, #1
+	mcr	p15, 0, r2, c9, c2, 0	@ Put dcache in lock mode
+	cpwait	ip			@ Wait for completion
+
+	mrs	r2, cpsr
+	orr	r3, r2, #PSR_F_BIT | PSR_I_BIT
+dLockLoop:
+	msr	cpsr_c, r3
+	mcr	p15, 0, r0, c7, c10, 1	@ Write back line if it is dirty
+	mcr	p15, 0, r0, c7, c6, 1	@ Flush/invalidate line
+	msr	cpsr_c, r2
+	ldr	ip, [r0], #CACHELINESIZE @ Preload 32 bytes into cache from
+					@ location [r0]. Post-increment
+					@ r3 to next cache line
+	cmp	r0, r1			@ Are we done?
+	bls	dLockLoop
+
+	mcr	p15, 0, ip, c7, c10, 4		@ Drain Write (& Fill) Buffer
+	mov	r2, #0
+	mcr	p15, 0, r2, c9, c2, 0	@ Get out of lock mode
+	cpwait_ret lr, ip
+
+/*
+ * xscale_dcache_unlock
+ */
+ENTRY(xscale_dcache_unlock)
+	mcr	p15, 0, ip, c7, c10, 4		@ Drain Write (& Fill) Buffer
+	mcr	p15, 0, ip, c9, c2, 1	@ Unlock cache
+	mov	pc, lr
+
+/*
+ * Needed to determine the length of the code that needs to be copied.
+ */
+	.align	5
+ENTRY(xscale_cache_dummy)
+	mov	pc, lr
+
+/* ================================ TLB LOCKING==============================
+ *
+ * The XScale MicroArchitecture implements support for locking entries into
+ * the Instruction and Data TLBs.  The following functions provide the
+ * low level support for supporting these under Linux.  xscale-lock.c
+ * implements some higher level management code.  Most of the following
+ * is taken straight out of the Developer's Manual.
+ */
+
+/*
+ * Lock I-TLB entry
+ *
+ * r0: Virtual address to translate and lock
+ */
+	.align	5
+ENTRY(xscale_itlb_lock)
+	mrs	r2, cpsr
+	orr	r3, r2, #PSR_F_BIT | PSR_I_BIT
+	msr	cpsr_c, r3			@ Disable interrupts
+	mcr	p15, 0, r0, c8, c5, 1		@ Invalidate I-TLB entry
+	mcr	p15, 0, r0, c10, c4, 0		@ Translate and lock
+	msr	cpsr_c, r2			@ Restore interrupts
+	cpwait_ret lr, ip
+
+/*
+ * Lock D-TLB entry
+ *
+ * r0: Virtual address to translate and lock
+ */
+	.align	5
+ENTRY(xscale_dtlb_lock)
+	mrs	r2, cpsr
+	orr	r3, r2, #PSR_F_BIT | PSR_I_BIT
+	msr	cpsr_c, r3			@ Disable interrupts
+	mcr	p15, 0, r0, c8, c6, 1		@ Invalidate D-TLB entry
+	mcr	p15, 0, r0, c10, c8, 0		@ Translate and lock
+	msr	cpsr_c, r2			@ Restore interrupts
+	cpwait_ret lr, ip
+
+/*
+ * Unlock all I-TLB entries
+ */
+	.align	5
+ENTRY(xscale_itlb_unlock)
+	mcr	p15, 0, ip, c10, c4, 1		@ Unlock I-TLB
+	mcr	p15, 0, ip, c8, c5, 0		@ Invalidate I-TLB
+	cpwait_ret lr, ip
+
+/*
+ * Unlock all D-TLB entries
+ */
+ENTRY(xscale_dtlb_unlock)
+	mcr	p15, 0, ip, c10, c8, 1		@ Unlock D-TBL
+	mcr	p15, 0, ip, c8, c6, 0		@ Invalidate D-TLB
+	cpwait_ret lr, ip
+
+/* =============================== PageTable ============================== */
+
+#define PTE_CACHE_WRITE_ALLOCATE 0
+
+/*
+ * cpu_xscale_switch_mm(pgd)
+ *
+ * Set the translation base pointer to be as described by pgd.
+ *
+ * pgd: new page tables
+ */
+	.align	5
+ENTRY(cpu_xscale_switch_mm)
+	clean_d_cache r1, r2
+	mcr	p15, 0, ip, c7, c5, 0		@ Invalidate I cache & BTB
+	mcr	p15, 0, ip, c7, c10, 4		@ Drain Write (& Fill) Buffer
+	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
+	cpwait_ret lr, ip
+
+/*
+ * cpu_xscale_set_pte(ptep, pte)
+ *
+ * Set a PTE and flush it out
+ *
+ * Errata 40: must set memory to write-through for user read-only pages.
+ */
+	.align	5
+ENTRY(cpu_xscale_set_pte)
+	str	r1, [r0], #-2048		@ linux version
+
+	bic	r2, r1, #0xff0
+	orr	r2, r2, #PTE_TYPE_EXT		@ extended page
+
+	eor	r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY
+
+	tst	r3, #L_PTE_USER			@ User?
+	orrne	r2, r2, #PTE_EXT_AP_URO_SRW	@ yes -> user r/o, system r/w
+
+	tst	r3, #L_PTE_WRITE | L_PTE_DIRTY	@ Write and Dirty?
+	orreq	r2, r2, #PTE_EXT_AP_UNO_SRW	@ yes -> user n/a, system r/w
+						@ combined with user -> user r/w
+
+	@
+	@ Handle the X bit.  We want to set this bit for the minicache
+	@ (U = E = B = W = 0, C = 1) or when write allocate is enabled,
+	@ and we have a writeable, cacheable region.  If we ignore the
+	@ U and E bits, we can allow user space to use the minicache as
+	@ well.
+	@
+	@  X = (C & ~W & ~B) | (C & W & B & write_allocate)
+	@
+	eor	ip, r1, #L_PTE_CACHEABLE
+	tst	ip, #L_PTE_CACHEABLE | L_PTE_WRITE | L_PTE_BUFFERABLE
+#if PTE_CACHE_WRITE_ALLOCATE
+	eorne	ip, r1, #L_PTE_CACHEABLE | L_PTE_WRITE | L_PTE_BUFFERABLE
+	tstne	ip, #L_PTE_CACHEABLE | L_PTE_WRITE | L_PTE_BUFFERABLE
+#endif
+	orreq	r2, r2, #PTE_EXT_TEX(1)
+
+	@
+	@ Erratum 40: The B bit must be cleared for a user read-only
+	@ cacheable page.
+	@
+	@  B = B & ~(U & C & ~W)
+	@
+	and	ip, r1, #L_PTE_USER | L_PTE_WRITE | L_PTE_CACHEABLE
+	teq	ip, #L_PTE_USER | L_PTE_CACHEABLE
+	biceq	r2, r2, #PTE_BUFFERABLE
+
+	tst	r3, #L_PTE_PRESENT | L_PTE_YOUNG	@ Present and Young?
+	movne	r2, #0				@ no -> fault
+
+	str	r2, [r0]			@ hardware version
+	mov	ip, #0
+	mcr	p15, 0, r0, c7, c10, 1		@ Clean D cache line
+	mcr	p15, 0, ip, c7, c10, 4		@ Drain Write (& Fill) Buffer
+	mov	pc, lr
+
+
+	.ltorg
+
+	.align
+
+	__INIT
+
+	.type	__xscale_setup, #function
+__xscale_setup:
+	mcr	p15, 0, ip, c7, c7, 0		@ invalidate I, D caches & BTB
+	mcr	p15, 0, ip, c7, c10, 4		@ Drain Write (& Fill) Buffer
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I, D TLBs
+#ifdef CONFIG_IWMMXT
+	mov	r0, #0				@ initially disallow access to CP0/CP1
+#else
+	mov	r0, #1				@ Allow access to CP0
+#endif
+	orr     r0, r0, #1 << 6			@ cp6 for IOP3xx and Bulverde
+	orr	r0, r0, #1 << 13		@ Its undefined whether this
+	mcr	p15, 0, r0, c15, c1, 0		@ affects USR or SVC modes
+	mrc	p15, 0, r0, c1, c0, 0		@ get control register
+	ldr	r5, xscale_cr1_clear
+	bic	r0, r0, r5
+	ldr	r5, xscale_cr1_set
+	orr	r0, r0, r5
+	mov	pc, lr
+	.size	__xscale_setup, . - __xscale_setup
+
+	/*
+	 *  R
+	 * .RVI ZFRS BLDP WCAM
+	 * ..11 1.01 .... .101
+	 * 
+	 */
+	.type	xscale_cr1_clear, #object
+	.type	xscale_cr1_set, #object
+xscale_cr1_clear:
+	.word	0x3b07
+xscale_cr1_set:
+	.word	0x3905
+
+	__INITDATA
+
+/*
+ * Purpose : Function pointers used to access above functions - all calls
+ *	     come through these
+ */
+
+	.type	xscale_processor_functions, #object
+ENTRY(xscale_processor_functions)
+	.word	v5t_early_abort
+	.word	cpu_xscale_proc_init
+	.word	cpu_xscale_proc_fin
+	.word	cpu_xscale_reset
+	.word	cpu_xscale_do_idle
+	.word	cpu_xscale_dcache_clean_area
+	.word	cpu_xscale_switch_mm
+	.word	cpu_xscale_set_pte
+	.size	xscale_processor_functions, . - xscale_processor_functions
+
+	.section ".rodata"
+
+	.type	cpu_arch_name, #object
+cpu_arch_name:
+	.asciz	"armv5te"
+	.size	cpu_arch_name, . - cpu_arch_name
+
+	.type	cpu_elf_name, #object
+cpu_elf_name:
+	.asciz	"v5"
+	.size	cpu_elf_name, . - cpu_elf_name
+
+	.type	cpu_80200_name, #object
+cpu_80200_name:
+	.asciz	"XScale-80200"
+	.size	cpu_80200_name, . - cpu_80200_name
+
+	.type	cpu_8032x_name, #object
+cpu_8032x_name:
+	.asciz	"XScale-IOP8032x Family"
+	.size	cpu_8032x_name, . - cpu_8032x_name
+
+	.type	cpu_8033x_name, #object
+cpu_8033x_name:
+	.asciz	"XScale-IOP8033x Family"
+	.size	cpu_8033x_name, . - cpu_8033x_name
+
+	.type	cpu_pxa250_name, #object
+cpu_pxa250_name:
+	.asciz	"XScale-PXA250"
+	.size	cpu_pxa250_name, . - cpu_pxa250_name
+
+	.type	cpu_pxa210_name, #object
+cpu_pxa210_name:
+	.asciz	"XScale-PXA210"
+	.size	cpu_pxa210_name, . - cpu_pxa210_name
+
+	.type	cpu_ixp42x_name, #object
+cpu_ixp42x_name:
+	.asciz	"XScale-IXP42x Family"
+	.size	cpu_ixp42x_name, . - cpu_ixp42x_name
+
+	.type	cpu_ixp46x_name, #object
+cpu_ixp46x_name:
+	.asciz	"XScale-IXP46x Family"
+	.size	cpu_ixp46x_name, . - cpu_ixp46x_name
+
+	.type	cpu_ixp2400_name, #object
+cpu_ixp2400_name:
+	.asciz	"XScale-IXP2400"
+	.size	cpu_ixp2400_name, . - cpu_ixp2400_name
+
+	.type	cpu_ixp2800_name, #object
+cpu_ixp2800_name:
+	.asciz	"XScale-IXP2800"
+	.size	cpu_ixp2800_name, . - cpu_ixp2800_name
+
+	.type	cpu_pxa255_name, #object
+cpu_pxa255_name:
+	.asciz	"XScale-PXA255"
+	.size	cpu_pxa255_name, . - cpu_pxa255_name
+
+	.type	cpu_pxa270_name, #object
+cpu_pxa270_name:
+	.asciz	"XScale-PXA270"
+	.size	cpu_pxa270_name, . - cpu_pxa270_name
+
+	.align
+
+	.section ".proc.info", #alloc, #execinstr
+
+	.type	__80200_proc_info,#object
+__80200_proc_info:
+	.long	0x69052000
+	.long	0xfffffff0
+	.long   PMD_TYPE_SECT | \
+		PMD_SECT_BUFFERABLE | \
+		PMD_SECT_CACHEABLE | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	b	__xscale_setup
+	.long	cpu_arch_name
+	.long	cpu_elf_name
+	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
+	.long	cpu_80200_name
+	.long	xscale_processor_functions
+	.long	v4wbi_tlb_fns
+	.long	xscale_mc_user_fns
+	.long	xscale_cache_fns
+	.size	__80200_proc_info, . - __80200_proc_info
+
+	.type	__8032x_proc_info,#object
+__8032x_proc_info:
+	.long	0x69052420
+	.long	0xfffff5e0      @ mask should accomodate IOP80219 also
+	.long   PMD_TYPE_SECT | \
+		PMD_SECT_BUFFERABLE | \
+		PMD_SECT_CACHEABLE | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	b	__xscale_setup
+	.long	cpu_arch_name
+	.long	cpu_elf_name
+	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
+	.long	cpu_8032x_name
+	.long	xscale_processor_functions
+	.long	v4wbi_tlb_fns
+	.long	xscale_mc_user_fns
+	.long	xscale_cache_fns
+	.size	__8032x_proc_info, . - __8032x_proc_info
+
+	.type	__8033x_proc_info,#object
+__8033x_proc_info:
+	.long	0x69054010
+	.long	0xffffff30
+	.long   PMD_TYPE_SECT | \
+		PMD_SECT_BUFFERABLE | \
+		PMD_SECT_CACHEABLE | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	b	__xscale_setup
+	.long	cpu_arch_name
+	.long	cpu_elf_name
+	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
+	.long	cpu_8033x_name
+	.long	xscale_processor_functions
+	.long	v4wbi_tlb_fns
+	.long	xscale_mc_user_fns
+	.long	xscale_cache_fns
+	.size	__8033x_proc_info, . - __8033x_proc_info
+
+	.type	__pxa250_proc_info,#object
+__pxa250_proc_info:
+	.long	0x69052100
+	.long	0xfffff7f0
+	.long   PMD_TYPE_SECT | \
+		PMD_SECT_BUFFERABLE | \
+		PMD_SECT_CACHEABLE | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	b	__xscale_setup
+	.long	cpu_arch_name
+	.long	cpu_elf_name
+	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
+	.long	cpu_pxa250_name
+	.long	xscale_processor_functions
+	.long	v4wbi_tlb_fns
+	.long	xscale_mc_user_fns
+	.long	xscale_cache_fns
+	.size	__pxa250_proc_info, . - __pxa250_proc_info
+
+	.type	__pxa210_proc_info,#object
+__pxa210_proc_info:
+	.long	0x69052120
+	.long	0xfffff3f0
+	.long   PMD_TYPE_SECT | \
+		PMD_SECT_BUFFERABLE | \
+		PMD_SECT_CACHEABLE | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	b	__xscale_setup
+	.long	cpu_arch_name
+	.long	cpu_elf_name
+	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
+	.long	cpu_pxa210_name
+	.long	xscale_processor_functions
+	.long	v4wbi_tlb_fns
+	.long	xscale_mc_user_fns
+	.long	xscale_cache_fns
+	.size	__pxa210_proc_info, . - __pxa210_proc_info
+
+	.type	__ixp2400_proc_info, #object
+__ixp2400_proc_info:
+	.long   0x69054190
+	.long   0xfffffff0
+	.long   PMD_TYPE_SECT | \
+		PMD_SECT_BUFFERABLE | \
+		PMD_SECT_CACHEABLE | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	b       __xscale_setup
+	.long   cpu_arch_name
+	.long   cpu_elf_name
+	.long   HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
+	.long   cpu_ixp2400_name
+	.long   xscale_processor_functions
+	.long	v4wbi_tlb_fns
+	.long	xscale_mc_user_fns
+	.long	xscale_cache_fns
+	.size   __ixp2400_proc_info, . - __ixp2400_proc_info                
+
+	.type	__ixp2800_proc_info, #object
+__ixp2800_proc_info:
+	.long   0x690541a0
+	.long   0xfffffff0
+	.long   PMD_TYPE_SECT | \
+		PMD_SECT_BUFFERABLE | \
+		PMD_SECT_CACHEABLE | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	b       __xscale_setup
+	.long   cpu_arch_name
+	.long   cpu_elf_name
+	.long   HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
+	.long   cpu_ixp2800_name
+	.long   xscale_processor_functions
+	.long	v4wbi_tlb_fns
+	.long	xscale_mc_user_fns
+	.long	xscale_cache_fns
+	.size   __ixp2800_proc_info, . - __ixp2800_proc_info                
+
+	.type	__ixp42x_proc_info, #object
+__ixp42x_proc_info:
+	.long   0x690541c0
+	.long   0xffffffc0
+	.long   PMD_TYPE_SECT | \
+		PMD_SECT_BUFFERABLE | \
+		PMD_SECT_CACHEABLE | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	b       __xscale_setup
+	.long   cpu_arch_name
+	.long   cpu_elf_name
+	.long   HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
+	.long   cpu_ixp42x_name
+	.long   xscale_processor_functions
+	.long	v4wbi_tlb_fns
+	.long	xscale_mc_user_fns
+	.long	xscale_cache_fns
+	.size   __ixp42x_proc_info, . - __ixp42x_proc_info                
+
+	.type	__ixp46x_proc_info, #object
+__ixp46x_proc_info:
+	.long   0x69054200
+	.long   0xffffff00
+	.long   0x00000c0e
+	b       __xscale_setup
+	.long   cpu_arch_name
+	.long   cpu_elf_name
+	.long   HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
+	.long   cpu_ixp46x_name
+	.long   xscale_processor_functions
+	.long	v4wbi_tlb_fns
+	.long	xscale_mc_user_fns
+	.long	xscale_cache_fns
+	.size   __ixp46x_proc_info, . - __ixp46x_proc_info
+
+	.type	__pxa255_proc_info,#object
+__pxa255_proc_info:
+	.long	0x69052d00
+	.long	0xfffffff0
+	.long   PMD_TYPE_SECT | \
+		PMD_SECT_BUFFERABLE | \
+		PMD_SECT_CACHEABLE | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	b	__xscale_setup
+	.long	cpu_arch_name
+	.long	cpu_elf_name
+	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
+	.long	cpu_pxa255_name
+	.long	xscale_processor_functions
+	.long	v4wbi_tlb_fns
+	.long	xscale_mc_user_fns
+	.long	xscale_cache_fns
+	.size	__pxa255_proc_info, . - __pxa255_proc_info
+
+	.type	__pxa270_proc_info,#object
+__pxa270_proc_info:
+	.long	0x69054110
+	.long	0xfffffff0
+	.long   PMD_TYPE_SECT | \
+		PMD_SECT_BUFFERABLE | \
+		PMD_SECT_CACHEABLE | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	b	__xscale_setup
+	.long	cpu_arch_name
+	.long	cpu_elf_name
+	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
+	.long	cpu_pxa270_name
+	.long	xscale_processor_functions
+	.long	v4wbi_tlb_fns
+	.long	xscale_mc_user_fns
+	.long	xscale_cache_fns
+	.size	__pxa270_proc_info, . - __pxa270_proc_info
+
--- a/arch/arm/mm/tlb-v3.S
+++ b/arch/arm/mm/tlb-v3.S
@@ -0,0 +1,52 @@
+/*
+ *  linux/arch/arm/mm/tlbv3.S
+ *
+ *  Copyright (C) 1997-2002 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  ARM architecture version 3 TLB handling functions.
+ *
+ * Processors: ARM610, ARM710.
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/constants.h>
+#include <asm/tlbflush.h>
+#include "proc-macros.S"
+
+	.align	5
+/*
+ *	v3_flush_user_tlb_range(start, end, mm)
+ *
+ *	Invalidate a range of TLB entries in the specified address space.
+ *
+ *	- start - range start address
+ *	- end   - range end address
+ *	- mm    - mm_struct describing address space
+ */
+	.align	5
+ENTRY(v3_flush_user_tlb_range)
+	vma_vm_mm r2, r2
+	act_mm	r3				@ get current->active_mm
+	teq	r2, r3				@ == mm ?
+	movne	pc, lr				@ no, we dont do anything
+ENTRY(v3_flush_kern_tlb_range)
+	bic	r0, r0, #0x0ff
+	bic	r0, r0, #0xf00
+1:	mcr	p15, 0, r0, c6, c0, 0		@ invalidate TLB entry
+	add	r0, r0, #PAGE_SZ
+	cmp	r0, r1
+	blo	1b
+	mov	pc, lr
+
+	__INITDATA
+
+	.type	v3_tlb_fns, #object
+ENTRY(v3_tlb_fns)
+	.long	v3_flush_user_tlb_range
+	.long	v3_flush_kern_tlb_range
+	.long	v3_tlb_flags
+	.size	v3_tlb_fns, . - v3_tlb_fns
--- a/arch/arm/mm/tlb-v4.S
+++ b/arch/arm/mm/tlb-v4.S
@@ -0,0 +1,65 @@
+/*
+ *  linux/arch/arm/mm/tlbv4.S
+ *
+ *  Copyright (C) 1997-2002 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  ARM architecture version 4 TLB handling functions.
+ *  These assume a split I/D TLBs, and no write buffer.
+ *
+ * Processors: ARM720T
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/constants.h>
+#include <asm/tlbflush.h>
+#include "proc-macros.S"
+
+	.align	5
+/*
+ *	v4_flush_user_tlb_range(start, end, mm)
+ *
+ *	Invalidate a range of TLB entries in the specified user address space.
+ *
+ *	- start - range start address
+ *	- end   - range end address
+ *	- mm    - mm_struct describing address space
+ */
+	.align	5
+ENTRY(v4_flush_user_tlb_range)
+	vma_vm_mm ip, r2
+	act_mm	r3				@ get current->active_mm
+	eors	r3, ip, r3				@ == mm ?
+	movne	pc, lr				@ no, we dont do anything
+.v4_flush_kern_tlb_range:
+	bic	r0, r0, #0x0ff
+	bic	r0, r0, #0xf00
+1:	mcr	p15, 0, r0, c8, c7, 1		@ invalidate TLB entry
+	add	r0, r0, #PAGE_SZ
+	cmp	r0, r1
+	blo	1b
+	mov	pc, lr
+
+/*
+ *	v4_flush_kern_tlb_range(start, end)
+ *
+ *	Invalidate a range of TLB entries in the specified kernel
+ *	address range.
+ *
+ *	- start - virtual address (may not be aligned)
+ *	- end   - virtual address (may not be aligned)
+ */
+.globl v4_flush_kern_tlb_range
+.equ v4_flush_kern_tlb_range, .v4_flush_kern_tlb_range
+
+	__INITDATA
+
+	.type	v4_tlb_fns, #object
+ENTRY(v4_tlb_fns)
+	.long	v4_flush_user_tlb_range
+	.long	v4_flush_kern_tlb_range
+	.long	v4_tlb_flags
+	.size	v4_tlb_fns, . - v4_tlb_fns
--- a/arch/arm/mm/tlb-v4wb.S
+++ b/arch/arm/mm/tlb-v4wb.S
@@ -0,0 +1,77 @@
+/*
+ *  linux/arch/arm/mm/tlbv4wb.S
+ *
+ *  Copyright (C) 1997-2002 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  ARM architecture version 4 TLB handling functions.
+ *  These assume a split I/D TLBs w/o I TLB entry, with a write buffer.
+ *
+ *  Processors: SA110 SA1100 SA1110
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/constants.h>
+#include <asm/tlbflush.h>
+#include "proc-macros.S"
+
+	.align	5
+/*
+ *	v4wb_flush_user_tlb_range(start, end, mm)
+ *
+ *	Invalidate a range of TLB entries in the specified address space.
+ *
+ *	- start - range start address
+ *	- end   - range end address
+ *	- mm    - mm_struct describing address space
+ */
+	.align	5
+ENTRY(v4wb_flush_user_tlb_range)
+	vma_vm_mm ip, r2
+	act_mm	r3				@ get current->active_mm
+	eors	r3, ip, r3				@ == mm ?
+	movne	pc, lr				@ no, we dont do anything
+	vma_vm_flags r2, r2
+	mcr	p15, 0, r3, c7, c10, 4		@ drain WB
+	tst	r2, #VM_EXEC
+	mcrne	p15, 0, r3, c8, c5, 0		@ invalidate I TLB
+	bic	r0, r0, #0x0ff
+	bic	r0, r0, #0xf00
+1:	mcr	p15, 0, r0, c8, c6, 1		@ invalidate D TLB entry
+	add	r0, r0, #PAGE_SZ
+	cmp	r0, r1
+	blo	1b
+	mov	pc, lr
+
+/*
+ *	v4_flush_kern_tlb_range(start, end)
+ *
+ *	Invalidate a range of TLB entries in the specified kernel
+ *	address range.
+ *
+ *	- start - virtual address (may not be aligned)
+ *	- end   - virtual address (may not be aligned)
+ */
+ENTRY(v4wb_flush_kern_tlb_range)
+	mov	r3, #0
+	mcr	p15, 0, r3, c7, c10, 4		@ drain WB
+	bic	r0, r0, #0x0ff
+	bic	r0, r0, #0xf00
+	mcr	p15, 0, r3, c8, c5, 0		@ invalidate I TLB
+1:	mcr	p15, 0, r0, c8, c6, 1		@ invalidate D TLB entry
+	add	r0, r0, #PAGE_SZ
+	cmp	r0, r1
+	blo	1b
+	mov	pc, lr
+
+	__INITDATA
+
+	.type	v4wb_tlb_fns, #object
+ENTRY(v4wb_tlb_fns)
+	.long	v4wb_flush_user_tlb_range
+	.long	v4wb_flush_kern_tlb_range
+	.long	v4wb_tlb_flags
+	.size	v4wb_tlb_fns, . - v4wb_tlb_fns
--- a/arch/arm/mm/tlb-v4wbi.S
+++ b/arch/arm/mm/tlb-v4wbi.S
@@ -0,0 +1,68 @@
+/*
+ *  linux/arch/arm/mm/tlbv4wbi.S
+ *
+ *  Copyright (C) 1997-2002 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  ARM architecture version 4 and version 5 TLB handling functions.
+ *  These assume a split I/D TLBs, with a write buffer.
+ *
+ *  Processors: ARM920 ARM922 ARM925 ARM926 XScale
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/constants.h>
+#include <asm/tlbflush.h>
+#include "proc-macros.S"
+
+/*
+ *	v4wb_flush_user_tlb_range(start, end, mm)
+ *
+ *	Invalidate a range of TLB entries in the specified address space.
+ *
+ *	- start - range start address
+ *	- end   - range end address
+ *	- mm    - mm_struct describing address space
+ */
+	.align	5
+ENTRY(v4wbi_flush_user_tlb_range)
+	vma_vm_mm ip, r2
+	act_mm	r3				@ get current->active_mm
+	eors	r3, ip, r3			@ == mm ?
+	movne	pc, lr				@ no, we dont do anything
+	mov	r3, #0
+	mcr	p15, 0, r3, c7, c10, 4		@ drain WB
+	vma_vm_flags r2, r2
+	bic	r0, r0, #0x0ff
+	bic	r0, r0, #0xf00
+1:	tst	r2, #VM_EXEC
+	mcrne	p15, 0, r0, c8, c5, 1		@ invalidate I TLB entry
+	mcr	p15, 0, r0, c8, c6, 1		@ invalidate D TLB entry
+	add	r0, r0, #PAGE_SZ
+	cmp	r0, r1
+	blo	1b
+	mov	pc, lr
+
+ENTRY(v4wbi_flush_kern_tlb_range)
+	mov	r3, #0
+	mcr	p15, 0, r3, c7, c10, 4		@ drain WB
+	bic	r0, r0, #0x0ff
+	bic	r0, r0, #0xf00
+1:	mcr	p15, 0, r0, c8, c5, 1		@ invalidate I TLB entry
+	mcr	p15, 0, r0, c8, c6, 1		@ invalidate D TLB entry
+	add	r0, r0, #PAGE_SZ
+	cmp	r0, r1
+	blo	1b
+	mov	pc, lr
+
+	__INITDATA
+
+	.type	v4wbi_tlb_fns, #object
+ENTRY(v4wbi_tlb_fns)
+	.long	v4wbi_flush_user_tlb_range
+	.long	v4wbi_flush_kern_tlb_range
+	.long	v4wbi_tlb_flags
+	.size	v4wbi_tlb_fns, . - v4wbi_tlb_fns
--- a/arch/arm/mm/tlb-v6.S
+++ b/arch/arm/mm/tlb-v6.S
@@ -0,0 +1,92 @@
+/*
+ *  linux/arch/arm/mm/tlb-v6.S
+ *
+ *  Copyright (C) 1997-2002 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  ARM architecture version 6 TLB handling functions.
+ *  These assume a split I/D TLB.
+ */
+#include <linux/linkage.h>
+#include <asm/constants.h>
+#include <asm/page.h>
+#include <asm/tlbflush.h>
+#include "proc-macros.S"
+
+#define HARVARD_TLB
+
+/*
+ *	v6wbi_flush_user_tlb_range(start, end, vma)
+ *
+ *	Invalidate a range of TLB entries in the specified address space.
+ *
+ *	- start - start address (may not be aligned)
+ *	- end   - end address (exclusive, may not be aligned)
+ *	- vma   - vma_struct describing address range
+ *
+ *	It is assumed that:
+ *	- the "Invalidate single entry" instruction will invalidate
+ *	  both the I and the D TLBs on Harvard-style TLBs
+ */
+ENTRY(v6wbi_flush_user_tlb_range)
+	vma_vm_mm r3, r2			@ get vma->vm_mm
+	mov	ip, #0
+	mmid	r3, r3				@ get vm_mm->context.id
+	mcr	p15, 0, ip, c7, c10, 4		@ drain write buffer
+	mov	r0, r0, lsr #PAGE_SHIFT		@ align address
+	mov	r1, r1, lsr #PAGE_SHIFT
+	asid	r3, r3				@ mask ASID
+	orr	r0, r3, r0, lsl #PAGE_SHIFT	@ Create initial MVA
+	mov	r1, r1, lsl #PAGE_SHIFT
+	vma_vm_flags r2, r2			@ get vma->vm_flags
+1:
+#ifdef HARVARD_TLB
+	mcr	p15, 0, r0, c8, c6, 1		@ TLB invalidate D MVA (was 1)
+	tst	r2, #VM_EXEC			@ Executable area ?
+	mcrne	p15, 0, r0, c8, c5, 1		@ TLB invalidate I MVA (was 1)
+#else
+	mcr	p15, 0, r0, c8, c7, 1		@ TLB invalidate MVA (was 1)
+#endif
+	add	r0, r0, #PAGE_SZ
+	cmp	r0, r1
+	blo	1b
+	mov	pc, lr
+
+/*
+ *	v6wbi_flush_kern_tlb_range(start,end)
+ *
+ *	Invalidate a range of kernel TLB entries
+ *
+ *	- start - start address (may not be aligned)
+ *	- end   - end address (exclusive, may not be aligned)
+ */
+ENTRY(v6wbi_flush_kern_tlb_range)
+	mov	r2, #0
+	mcr	p15, 0, r2, c7, c10, 4		@ drain write buffer
+	mov	r0, r0, lsr #PAGE_SHIFT		@ align address
+	mov	r1, r1, lsr #PAGE_SHIFT
+	mov	r0, r0, lsl #PAGE_SHIFT
+	mov	r1, r1, lsl #PAGE_SHIFT
+1:
+#ifdef HARVARD_TLB
+	mcr	p15, 0, r0, c8, c6, 1		@ TLB invalidate D MVA
+	mcr	p15, 0, r0, c8, c5, 1		@ TLB invalidate I MVA
+#else
+	mcr	p15, 0, r0, c8, c7, 1		@ TLB invalidate MVA
+#endif
+	add	r0, r0, #PAGE_SZ
+	cmp	r0, r1
+	blo	1b
+	mov	pc, lr
+
+	.section ".text.init", #alloc, #execinstr
+
+	.type	v6wbi_tlb_fns, #object
+ENTRY(v6wbi_tlb_fns)
+	.long	v6wbi_flush_user_tlb_range
+	.long	v6wbi_flush_kern_tlb_range
+	.long	v6wbi_tlb_flags
+	.size	v6wbi_tlb_fns, . - v6wbi_tlb_fns