Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
2005-04-16 15:20:36 -07:00
commit 1da177e4c3
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -0,0 +1,420 @@
+#
+# For a description of the syntax of this configuration file,
+# see Documentation/kbuild/kconfig-language.txt.
+#
+
+mainmenu "IA-64 Linux Kernel Configuration"
+
+source "init/Kconfig"
+
+menu "Processor type and features"
+
+config IA64
+	bool
+	default y
+	help
+	  The Itanium Processor Family is Intel's 64-bit successor to
+	  the 32-bit X86 line.  The IA-64 Linux project has a home
+	  page at <http://www.linuxia64.org/> and a mailing list at
+	  <linux-ia64@vger.kernel.org>.
+
+config 64BIT
+	bool
+	default y
+
+config MMU
+	bool
+	default y
+
+config RWSEM_XCHGADD_ALGORITHM
+	bool
+	default y
+
+config GENERIC_CALIBRATE_DELAY
+	bool
+	default y
+
+config TIME_INTERPOLATION
+	bool
+	default y
+
+config EFI
+	bool
+	default y
+
+config GENERIC_IOMAP
+	bool
+	default y
+
+choice
+	prompt "System type"
+	default IA64_GENERIC
+
+config IA64_GENERIC
+	bool "generic"
+	select NUMA
+	select ACPI_NUMA
+	select VIRTUAL_MEM_MAP
+	select DISCONTIGMEM
+	help
+	  This selects the system type of your hardware.  A "generic" kernel
+	  will run on any supported IA-64 system.  However, if you configure
+	  a kernel for your specific system, it will be faster and smaller.
+
+	  generic		For any supported IA-64 system
+	  DIG-compliant		For DIG ("Developer's Interface Guide") compliant systems
+	  HP-zx1/sx1000		For HP systems
+	  HP-zx1/sx1000+swiotlb	For HP systems with (broken) DMA-constrained devices.
+	  SGI-SN2		For SGI Altix systems
+	  Ski-simulator		For the HP simulator <http://www.hpl.hp.com/research/linux/ski/>
+
+	  If you don't know what to do, choose "generic".
+
+config IA64_DIG
+	bool "DIG-compliant"
+
+config IA64_HP_ZX1
+	bool "HP-zx1/sx1000"
+	help
+	  Build a kernel that runs on HP zx1 and sx1000 systems.  This adds
+	  support for the HP I/O MMU.
+
+config IA64_HP_ZX1_SWIOTLB
+	bool "HP-zx1/sx1000 with software I/O TLB"
+	help
+	  Build a kernel that runs on HP zx1 and sx1000 systems even when they
+	  have broken PCI devices which cannot DMA to full 32 bits.  Apart
+	  from support for the HP I/O MMU, this includes support for the software
+	  I/O TLB, which allows supporting the broken devices at the expense of
+	  wasting some kernel memory (about 2MB by default).
+
+config IA64_SGI_SN2
+	bool "SGI-SN2"
+	help
+	  Selecting this option will optimize the kernel for use on sn2 based
+	  systems, but the resulting kernel binary will not run on other
+	  types of ia64 systems.  If you have an SGI Altix system, it's safe
+	  to select this option.  If in doubt, select ia64 generic support
+	  instead.
+
+config IA64_HP_SIM
+	bool "Ski-simulator"
+
+endchoice
+
+choice
+	prompt "Processor type"
+	default ITANIUM
+
+config ITANIUM
+	bool "Itanium"
+	help
+	  Select your IA-64 processor type.  The default is Itanium.
+	  This choice is safe for all IA-64 systems, but may not perform
+	  optimally on systems with, say, Itanium 2 or newer processors.
+
+config MCKINLEY
+	bool "Itanium 2"
+	help
+	  Select this to configure for an Itanium 2 (McKinley) processor.
+
+endchoice
+
+choice
+	prompt "Kernel page size"
+	default IA64_PAGE_SIZE_16KB
+
+config IA64_PAGE_SIZE_4KB
+	bool "4KB"
+	help
+	  This lets you select the page size of the kernel.  For best IA-64
+	  performance, a page size of 8KB or 16KB is recommended.  For best
+	  IA-32 compatibility, a page size of 4KB should be selected (the vast
+	  majority of IA-32 binaries work perfectly fine with a larger page
+	  size).  For Itanium 2 or newer systems, a page size of 64KB can also
+	  be selected.
+
+	  4KB                For best IA-32 compatibility
+	  8KB                For best IA-64 performance
+	  16KB               For best IA-64 performance
+	  64KB               Requires Itanium 2 or newer processor.
+
+	  If you don't know what to do, choose 16KB.
+
+config IA64_PAGE_SIZE_8KB
+	bool "8KB"
+
+config IA64_PAGE_SIZE_16KB
+	bool "16KB"
+
+config IA64_PAGE_SIZE_64KB
+	depends on !ITANIUM
+	bool "64KB"
+
+endchoice
+
+config IA64_BRL_EMU
+	bool
+	depends on ITANIUM
+	default y
+
+# align cache-sensitive data to 128 bytes
+config IA64_L1_CACHE_SHIFT
+	int
+	default "7" if MCKINLEY
+	default "6" if ITANIUM
+
+# align cache-sensitive data to 64 bytes
+config NUMA
+	bool "NUMA support"
+	depends on !IA64_HP_SIM
+	default y if IA64_SGI_SN2
+	select ACPI_NUMA
+	help
+	  Say Y to compile the kernel to support NUMA (Non-Uniform Memory
+	  Access).  This option is for configuring high-end multiprocessor
+	  server systems.  If in doubt, say N.
+
+config VIRTUAL_MEM_MAP
+	bool "Virtual mem map"
+	default y if !IA64_HP_SIM
+	help
+	  Say Y to compile the kernel with support for a virtual mem map.
+	  This code also only takes effect if a memory hole of greater than
+	  1 Gb is found during boot.  You must turn this option on if you
+	  require the DISCONTIGMEM option for your machine. If you are
+	  unsure, say Y.
+
+config HOLES_IN_ZONE
+	bool
+	default y if VIRTUAL_MEM_MAP
+
+config DISCONTIGMEM
+	bool "Discontiguous memory support"
+	depends on (IA64_DIG || IA64_SGI_SN2 || IA64_GENERIC || IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB) && NUMA && VIRTUAL_MEM_MAP
+	default y if (IA64_SGI_SN2 || IA64_GENERIC) && NUMA
+	help
+	  Say Y to support efficient handling of discontiguous physical memory,
+	  for architectures which are either NUMA (Non-Uniform Memory Access)
+	  or have huge holes in the physical address space for other reasons.
+	  See <file:Documentation/vm/numa> for more.
+
+config IA64_CYCLONE
+	bool "Cyclone (EXA) Time Source support"
+	help
+	  Say Y here to enable support for IBM EXA Cyclone time source.
+	  If you're unsure, answer N.
+
+config IOSAPIC
+	bool
+	depends on !IA64_HP_SIM
+	default y
+
+config IA64_SGI_SN_SIM
+	bool "SGI Medusa Simulator Support"
+	depends on IA64_SGI_SN2
+	help
+	  If you are compiling a kernel that will run under SGI's IA-64
+	  simulator (Medusa) then say Y, otherwise say N.
+
+config FORCE_MAX_ZONEORDER
+	int
+	default "18"
+
+config SMP
+	bool "Symmetric multi-processing support"
+	help
+	  This enables support for systems with more than one CPU. If you have
+	  a system with only one CPU, say N.  If you have a system with more
+	  than one CPU, say Y.
+
+	  If you say N here, the kernel will run on single and multiprocessor
+	  systems, but will use only one CPU of a multiprocessor system.  If
+	  you say Y here, the kernel will run on many, but not all,
+	  single processor systems.  On a single processor system, the kernel
+	  will run faster if you say N here.
+
+	  See also the <file:Documentation/smp.txt> and the SMP-HOWTO
+	  available at <http://www.tldp.org/docs.html#howto>.
+
+	  If you don't know what to do here, say N.
+
+config NR_CPUS
+	int "Maximum number of CPUs (2-512)"
+	range 2 512
+	depends on SMP
+	default "64"
+	help
+	  You should set this to the number of CPUs in your system, but
+	  keep in mind that a kernel compiled for, e.g., 2 CPUs will boot but
+	  only use 2 CPUs on a >2 CPU system.  Setting this to a value larger
+	  than 64 will cause the use of a CPU mask array, causing a small
+	  performance hit.
+
+config HOTPLUG_CPU
+	bool "Support for hot-pluggable CPUs (EXPERIMENTAL)"
+	depends on SMP && EXPERIMENTAL
+	select HOTPLUG
+	default n
+	---help---
+	  Say Y here to experiment with turning CPUs off and on.  CPUs
+	  can be controlled through /sys/devices/system/cpu/cpu#.
+	  Say N if you want to disable CPU hotplug.
+
+config PREEMPT
+	bool "Preemptible Kernel"
+        help
+          This option reduces the latency of the kernel when reacting to
+          real-time or interactive events by allowing a low priority process to
+          be preempted even if it is in kernel mode executing a system call.
+          This allows applications to run more reliably even when the system is
+          under load.
+
+          Say Y here if you are building a kernel for a desktop, embedded
+          or real-time system.  Say N if you are unsure.
+
+config HAVE_DEC_LOCK
+	bool
+	depends on (SMP || PREEMPT)
+	default y
+
+config IA32_SUPPORT
+	bool "Support for Linux/x86 binaries"
+	help
+	  IA-64 processors can execute IA-32 (X86) instructions.  By
+	  saying Y here, the kernel will include IA-32 system call
+	  emulation support which makes it possible to transparently
+	  run IA-32 Linux binaries on an IA-64 Linux system.
+	  If in doubt, say Y.
+
+config COMPAT
+	bool
+	depends on IA32_SUPPORT
+	default y
+
+config IA64_MCA_RECOVERY
+	tristate "MCA recovery from errors other than TLB."
+
+config PERFMON
+	bool "Performance monitor support"
+	help
+	  Selects whether support for the IA-64 performance monitor hardware
+	  is included in the kernel.  This makes some kernel data-structures a
+	  little bigger and slows down execution a bit, but it is generally
+	  a good idea to turn this on.  If you're unsure, say Y.
+
+config IA64_PALINFO
+	tristate "/proc/pal support"
+	help
+	  If you say Y here, you are able to get PAL (Processor Abstraction
+	  Layer) information in /proc/pal.  This contains useful information
+	  about the processors in your systems, such as cache and TLB sizes
+	  and the PAL firmware version in use.
+
+	  To use this option, you have to ensure that the "/proc file system
+	  support" (CONFIG_PROC_FS) is enabled, too.
+
+config ACPI_DEALLOCATE_IRQ
+	bool
+	depends on IOSAPIC && EXPERIMENTAL
+	default y
+
+source "drivers/firmware/Kconfig"
+
+source "fs/Kconfig.binfmt"
+
+endmenu
+
+menu "Power management and ACPI"
+
+config PM
+	bool "Power Management support"
+	depends on IA64_GENERIC || IA64_DIG || IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB
+	default y
+	help
+	  "Power Management" means that parts of your computer are shut
+	  off or put into a power conserving "sleep" mode if they are not
+	  being used.  There are two competing standards for doing this: APM
+	  and ACPI.  If you want to use either one, say Y here and then also
+	  to the requisite support below.
+
+	  Power Management is most important for battery powered laptop
+	  computers; if you have a laptop, check out the Linux Laptop home
+	  page on the WWW at <http://www.linux-on-laptops.com/> and the
+	  Battery Powered Linux mini-HOWTO, available from
+	  <http://www.tldp.org/docs.html#howto>.
+
+	  Note that, even if you say N here, Linux on the x86 architecture
+	  will issue the hlt instruction if nothing is to be done, thereby
+	  sending the processor to sleep and saving power.
+
+config ACPI
+	bool
+	depends on !IA64_HP_SIM
+	default y
+
+if !IA64_HP_SIM
+
+source "drivers/acpi/Kconfig"
+
+endif
+
+endmenu
+
+if !IA64_HP_SIM
+
+menu "Bus options (PCI, PCMCIA)"
+
+config PCI
+	bool "PCI support"
+	help
+	  Find out whether you have a PCI motherboard. PCI is the name of a
+	  bus system, i.e. the way the CPU talks to the other stuff inside
+	  your box. Other bus systems are ISA, EISA, MicroChannel (MCA) or
+	  VESA. If you have PCI, say Y, otherwise N.
+
+	  The PCI-HOWTO, available from
+	  <http://www.tldp.org/docs.html#howto>, contains valuable
+	  information about which PCI hardware does work under Linux and which
+	  doesn't.
+
+config PCI_DOMAINS
+	bool
+	default PCI
+
+source "drivers/pci/Kconfig"
+
+source "drivers/pci/hotplug/Kconfig"
+
+source "drivers/pcmcia/Kconfig"
+
+endmenu
+
+endif
+
+source "drivers/Kconfig"
+
+source "fs/Kconfig"
+
+source "lib/Kconfig"
+
+#
+# Use the generic interrupt handling code in kernel/irq/:
+#
+config GENERIC_HARDIRQS
+	bool
+	default y
+
+config GENERIC_IRQ_PROBE
+	bool
+	default y
+
+source "arch/ia64/hp/sim/Kconfig"
+
+source "arch/ia64/oprofile/Kconfig"
+
+source "arch/ia64/Kconfig.debug"
+
+source "security/Kconfig"
+
+source "crypto/Kconfig"
--- a/arch/ia64/Kconfig.debug
+++ b/arch/ia64/Kconfig.debug
@@ -0,0 +1,64 @@
+menu "Kernel hacking"
+
+source "lib/Kconfig.debug"
+
+choice
+	prompt "Physical memory granularity"
+	default IA64_GRANULE_64MB
+
+config IA64_GRANULE_16MB
+	bool "16MB"
+	help
+	  IA-64 identity-mapped regions use a large page size called "granules".
+
+	  Select "16MB" for a small granule size.
+	  Select "64MB" for a large granule size.  This is the current default.
+
+config IA64_GRANULE_64MB
+	bool "64MB"
+	depends on !(IA64_GENERIC || IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB || IA64_SGI_SN2)
+
+endchoice
+
+config IA64_PRINT_HAZARDS
+	bool "Print possible IA-64 dependency violations to console"
+	depends on DEBUG_KERNEL
+	help
+	  Selecting this option prints more information for Illegal Dependency
+	  Faults, that is, for Read-after-Write (RAW), Write-after-Write (WAW),
+	  or Write-after-Read (WAR) violations.  This option is ignored if you
+	  are compiling for an Itanium A step processor
+	  (CONFIG_ITANIUM_ASTEP_SPECIFIC).  If you're unsure, select Y.
+
+config DISABLE_VHPT
+	bool "Disable VHPT"
+	depends on DEBUG_KERNEL
+	help
+	  The Virtual Hash Page Table (VHPT) enhances virtual address
+	  translation performance.  Normally you want the VHPT active but you
+	  can select this option to disable the VHPT for debugging.  If you're
+	  unsure, answer N.
+
+config IA64_DEBUG_CMPXCHG
+	bool "Turn on compare-and-exchange bug checking (slow!)"
+	depends on DEBUG_KERNEL
+	help
+	  Selecting this option turns on bug checking for the IA-64
+	  compare-and-exchange instructions.  This is slow!  Itaniums
+	  from step B3 or later don't have this problem. If you're unsure,
+	  select N.
+
+config IA64_DEBUG_IRQ
+	bool "Turn on irq debug checks (slow!)"
+	depends on DEBUG_KERNEL
+	help
+	  Selecting this option turns on bug checking for the IA-64 irq_save
+	  and restore instructions.  It's useful for tracking down spinlock
+	  problems, but slow!  If you're unsure, select N.
+
+config SYSVIPC_COMPAT
+	bool
+	depends on COMPAT && SYSVIPC
+	default y
+
+endmenu
--- a/arch/ia64/Makefile
+++ b/arch/ia64/Makefile
@@ -0,0 +1,115 @@
+#
+# ia64/Makefile
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1998-2004 by David Mosberger-Tang <davidm@hpl.hp.com>
+#
+
+NM := $(CROSS_COMPILE)nm -B
+READELF := $(CROSS_COMPILE)readelf
+
+export AWK
+
+CHECKFLAGS	+= -m64 -D__ia64=1 -D__ia64__=1 -D_LP64 -D__LP64__
+
+OBJCOPYFLAGS	:= --strip-all
+LDFLAGS_vmlinux	:= -static
+LDFLAGS_MODULE	+= -T $(srctree)/arch/ia64/module.lds
+AFLAGS_KERNEL	:= -mconstant-gp
+EXTRA		:=
+
+cflags-y	:= -pipe $(EXTRA) -ffixed-r13 -mfixed-range=f12-f15,f32-f127 \
+		   -falign-functions=32 -frename-registers -fno-optimize-sibling-calls
+CFLAGS_KERNEL	:= -mconstant-gp
+
+GCC_VERSION     := $(call cc-version)
+GAS_STATUS	= $(shell $(srctree)/arch/ia64/scripts/check-gas "$(CC)" "$(OBJDUMP)")
+CPPFLAGS += $(shell $(srctree)/arch/ia64/scripts/toolchain-flags "$(CC)" "$(OBJDUMP)" "$(READELF)")
+
+ifeq ($(GAS_STATUS),buggy)
+$(error Sorry, you need a newer version of the assember, one that is built from	\
+	a source-tree that post-dates 18-Dec-2002.  You can find a pre-compiled	\
+	static binary of such an assembler at:					\
+										\
+		ftp://ftp.hpl.hp.com/pub/linux-ia64/gas-030124.tar.gz)
+endif
+
+ifneq ($(shell if [ $(GCC_VERSION) -lt 0300 ] ; then echo "bad"; fi ;),)
+$(error Sorry, your compiler is too old.  GCC v2.96 is known to generate bad code.)
+endif
+
+ifeq ($(GCC_VERSION),0304)
+	cflags-$(CONFIG_ITANIUM)	+= -mtune=merced
+	cflags-$(CONFIG_MCKINLEY)	+= -mtune=mckinley
+endif
+
+CFLAGS += $(cflags-y)
+head-y := arch/ia64/kernel/head.o arch/ia64/kernel/init_task.o
+
+libs-y				+= arch/ia64/lib/
+core-y				+= arch/ia64/kernel/ arch/ia64/mm/
+core-$(CONFIG_IA32_SUPPORT)	+= arch/ia64/ia32/
+core-$(CONFIG_IA64_DIG) 	+= arch/ia64/dig/
+core-$(CONFIG_IA64_GENERIC) 	+= arch/ia64/dig/
+core-$(CONFIG_IA64_HP_ZX1)	+= arch/ia64/dig/
+core-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/dig/
+core-$(CONFIG_IA64_SGI_SN2)	+= arch/ia64/sn/
+
+drivers-$(CONFIG_PCI)		+= arch/ia64/pci/
+drivers-$(CONFIG_IA64_HP_SIM)	+= arch/ia64/hp/sim/
+drivers-$(CONFIG_IA64_HP_ZX1)	+= arch/ia64/hp/common/ arch/ia64/hp/zx1/
+drivers-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/hp/common/ arch/ia64/hp/zx1/
+drivers-$(CONFIG_IA64_GENERIC)	+= arch/ia64/hp/common/ arch/ia64/hp/zx1/ arch/ia64/hp/sim/ arch/ia64/sn/
+drivers-$(CONFIG_OPROFILE)	+= arch/ia64/oprofile/
+
+boot := arch/ia64/hp/sim/boot
+
+.PHONY: boot compressed check
+
+all: compressed unwcheck
+
+compressed: vmlinux.gz
+
+vmlinux.gz: vmlinux
+	$(Q)$(MAKE) $(build)=$(boot) $@
+
+unwcheck: vmlinux
+	-$(Q)READELF=$(READELF) $(srctree)/arch/ia64/scripts/unwcheck.py $<
+
+archclean:
+	$(Q)$(MAKE) $(clean)=$(boot)
+
+CLEAN_FILES += include/asm-ia64/.offsets.h.stamp vmlinux.gz bootloader
+
+MRPROPER_FILES += include/asm-ia64/offsets.h
+
+prepare: include/asm-ia64/offsets.h
+
+arch/ia64/kernel/asm-offsets.s: include/asm include/linux/version.h include/config/MARKER
+
+include/asm-ia64/offsets.h: arch/ia64/kernel/asm-offsets.s
+	$(call filechk,gen-asm-offsets)
+
+arch/ia64/kernel/asm-offsets.s: include/asm-ia64/.offsets.h.stamp
+
+include/asm-ia64/.offsets.h.stamp:
+	mkdir -p include/asm-ia64
+	[ -s include/asm-ia64/offsets.h ] \
+	 || echo "#define IA64_TASK_SIZE 0" > include/asm-ia64/offsets.h
+	touch $@
+
+boot:	lib/lib.a vmlinux
+	$(Q)$(MAKE) $(build)=$(boot) $@
+
+install: vmlinux.gz
+	sh $(srctree)/arch/ia64/install.sh $(KERNELRELEASE) $< System.map "$(INSTALL_PATH)"
+
+define archhelp
+  echo '* compressed	- Build compressed kernel image'
+  echo '  install	- Install compressed kernel image'
+  echo '  boot		- Build vmlinux and bootloader for Ski simulator'
+  echo '* unwcheck	- Check vmlinux for invalid unwind info'
+endef
--- a/arch/ia64/configs/bigsur_defconfig
+++ b/arch/ia64/configs/bigsur_defconfig
--- a/arch/ia64/configs/sim_defconfig
+++ b/arch/ia64/configs/sim_defconfig
@@ -0,0 +1,534 @@
+#
+# Automatically generated make config: don't edit
+#
+
+#
+# Code maturity level options
+#
+CONFIG_EXPERIMENTAL=y
+# CONFIG_CLEAN_COMPILE is not set
+# CONFIG_STANDALONE is not set
+CONFIG_BROKEN=y
+CONFIG_BROKEN_ON_SMP=y
+
+#
+# General setup
+#
+CONFIG_SWAP=y
+CONFIG_SYSVIPC=y
+# CONFIG_POSIX_MQUEUE is not set
+# CONFIG_BSD_PROCESS_ACCT is not set
+CONFIG_SYSCTL=y
+# CONFIG_AUDIT is not set
+CONFIG_LOG_BUF_SHIFT=16
+# CONFIG_HOTPLUG is not set
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+# CONFIG_EMBEDDED is not set
+CONFIG_KALLSYMS=y
+# CONFIG_KALLSYMS_ALL is not set
+CONFIG_FUTEX=y
+CONFIG_EPOLL=y
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+
+#
+# Loadable module support
+#
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_OBSOLETE_MODPARM=y
+CONFIG_MODVERSIONS=y
+CONFIG_KMOD=y
+CONFIG_STOP_MACHINE=y
+
+#
+# Processor type and features
+#
+CONFIG_IA64=y
+CONFIG_64BIT=y
+CONFIG_MMU=y
+CONFIG_RWSEM_XCHGADD_ALGORITHM=y
+CONFIG_TIME_INTERPOLATION=y
+CONFIG_EFI=y
+# CONFIG_IA64_GENERIC is not set
+# CONFIG_IA64_DIG is not set
+# CONFIG_IA64_HP_ZX1 is not set
+# CONFIG_IA64_SGI_SN2 is not set
+CONFIG_IA64_HP_SIM=y
+# CONFIG_ITANIUM is not set
+CONFIG_MCKINLEY=y
+# CONFIG_IA64_PAGE_SIZE_4KB is not set
+# CONFIG_IA64_PAGE_SIZE_8KB is not set
+# CONFIG_IA64_PAGE_SIZE_16KB is not set
+CONFIG_IA64_PAGE_SIZE_64KB=y
+CONFIG_IA64_L1_CACHE_SHIFT=7
+# CONFIG_MCKINLEY_ASTEP_SPECIFIC is not set
+# CONFIG_VIRTUAL_MEM_MAP is not set
+# CONFIG_IA64_CYCLONE is not set
+CONFIG_FORCE_MAX_ZONEORDER=18
+CONFIG_SMP=y
+CONFIG_NR_CPUS=64
+CONFIG_PREEMPT=y
+CONFIG_HAVE_DEC_LOCK=y
+CONFIG_IA32_SUPPORT=y
+CONFIG_COMPAT=y
+# CONFIG_PERFMON is not set
+CONFIG_IA64_PALINFO=m
+
+#
+# Firmware Drivers
+#
+CONFIG_EFI_VARS=y
+# CONFIG_SMBIOS is not set
+CONFIG_BINFMT_ELF=y
+CONFIG_BINFMT_MISC=y
+
+#
+# Power management and ACPI
+#
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+# CONFIG_DEBUG_DRIVER is not set
+
+#
+# Memory Technology Devices (MTD)
+#
+# CONFIG_MTD is not set
+
+#
+# Parallel port support
+#
+# CONFIG_PARPORT is not set
+
+#
+# Plug and Play support
+#
+
+#
+# Block devices
+#
+CONFIG_BLK_DEV_LOOP=y
+# CONFIG_BLK_DEV_CRYPTOLOOP is not set
+# CONFIG_BLK_DEV_NBD is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=4096
+# CONFIG_BLK_DEV_INITRD is not set
+
+#
+# ATA/ATAPI/MFM/RLL support
+#
+# CONFIG_IDE is not set
+
+#
+# SCSI device support
+#
+CONFIG_SCSI=y
+CONFIG_SCSI_PROC_FS=y
+
+#
+# SCSI support type (disk, tape, CD-ROM)
+#
+CONFIG_BLK_DEV_SD=y
+# CONFIG_CHR_DEV_ST is not set
+# CONFIG_CHR_DEV_OSST is not set
+# CONFIG_BLK_DEV_SR is not set
+# CONFIG_CHR_DEV_SG is not set
+
+#
+# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
+#
+CONFIG_SCSI_MULTI_LUN=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+
+#
+# SCSI Transport Attributes
+#
+CONFIG_SCSI_SPI_ATTRS=y
+# CONFIG_SCSI_FC_ATTRS is not set
+
+#
+# SCSI low-level drivers
+#
+# CONFIG_SCSI_AIC7XXX_OLD is not set
+# CONFIG_SCSI_SATA is not set
+# CONFIG_SCSI_EATA_PIO is not set
+# CONFIG_SCSI_DEBUG is not set
+
+#
+# Multi-device support (RAID and LVM)
+#
+# CONFIG_MD is not set
+
+#
+# Fusion MPT device support
+#
+
+#
+# IEEE 1394 (FireWire) support
+#
+# CONFIG_IEEE1394 is not set
+
+#
+# I2O device support
+#
+
+#
+# Networking support
+#
+CONFIG_NET=y
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+# CONFIG_PACKET_MMAP is not set
+# CONFIG_NETLINK_DEV is not set
+# CONFIG_UNIX is not set
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+# CONFIG_IP_ADVANCED_ROUTER is not set
+# CONFIG_IP_PNP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_IP_MROUTE is not set
+# CONFIG_ARPD is not set
+# CONFIG_SYN_COOKIES is not set
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_IPV6 is not set
+# CONFIG_NETFILTER is not set
+
+#
+# SCTP Configuration (EXPERIMENTAL)
+#
+# CONFIG_IP_SCTP is not set
+# CONFIG_ATM is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_NET_DIVERT is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+# CONFIG_NET_HW_FLOWCONTROL is not set
+
+#
+# QoS and/or fair queueing
+#
+# CONFIG_NET_SCHED is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+# CONFIG_NETPOLL is not set
+# CONFIG_NET_POLL_CONTROLLER is not set
+# CONFIG_HAMRADIO is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+# CONFIG_NETDEVICES is not set
+
+#
+# ISDN subsystem
+#
+# CONFIG_ISDN is not set
+
+#
+# Telephony Support
+#
+# CONFIG_PHONE is not set
+
+#
+# Input device support
+#
+CONFIG_INPUT=y
+
+#
+# Userland interfaces
+#
+CONFIG_INPUT_MOUSEDEV=y
+CONFIG_INPUT_MOUSEDEV_PSAUX=y
+CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
+CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
+# CONFIG_INPUT_JOYDEV is not set
+# CONFIG_INPUT_TSDEV is not set
+# CONFIG_INPUT_EVDEV is not set
+# CONFIG_INPUT_EVBUG is not set
+
+#
+# Input I/O drivers
+#
+# CONFIG_GAMEPORT is not set
+CONFIG_SOUND_GAMEPORT=y
+CONFIG_SERIO=y
+# CONFIG_SERIO_I8042 is not set
+CONFIG_SERIO_SERPORT=y
+# CONFIG_SERIO_CT82C710 is not set
+
+#
+# Input Device Drivers
+#
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_INPUT_JOYSTICK is not set
+# CONFIG_INPUT_TOUCHSCREEN is not set
+# CONFIG_INPUT_MISC is not set
+
+#
+# Character devices
+#
+CONFIG_VT=y
+CONFIG_VT_CONSOLE=y
+CONFIG_HW_CONSOLE=y
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+# CONFIG_SERIAL_8250 is not set
+
+#
+# Non-8250 serial port support
+#
+CONFIG_UNIX98_PTYS=y
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_QIC02_TAPE is not set
+
+#
+# IPMI
+#
+# CONFIG_IPMI_HANDLER is not set
+
+#
+# Watchdog Cards
+#
+# CONFIG_WATCHDOG is not set
+CONFIG_EFI_RTC=y
+# CONFIG_DTLK is not set
+# CONFIG_R3964 is not set
+# CONFIG_APPLICOM is not set
+
+#
+# Ftape, the floppy tape device driver
+#
+# CONFIG_FTAPE is not set
+# CONFIG_AGP is not set
+# CONFIG_DRM is not set
+# CONFIG_RAW_DRIVER is not set
+
+#
+# I2C support
+#
+# CONFIG_I2C is not set
+
+#
+# Misc devices
+#
+
+#
+# Multimedia devices
+#
+# CONFIG_VIDEO_DEV is not set
+
+#
+# Digital Video Broadcasting Devices
+#
+# CONFIG_DVB is not set
+
+#
+# Graphics support
+#
+# CONFIG_FB is not set
+
+#
+# Console display driver support
+#
+# CONFIG_VGA_CONSOLE is not set
+# CONFIG_MDA_CONSOLE is not set
+CONFIG_DUMMY_CONSOLE=y
+
+#
+# Sound
+#
+# CONFIG_SOUND is not set
+
+#
+# USB support
+#
+
+#
+# USB Gadget Support
+#
+# CONFIG_USB_GADGET is not set
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+# CONFIG_EXT2_FS_XATTR is not set
+CONFIG_EXT3_FS=y
+# CONFIG_EXT3_FS_XATTR is not set
+CONFIG_JBD=y
+# CONFIG_JBD_DEBUG is not set
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+# CONFIG_XFS_FS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_ROMFS_FS is not set
+# CONFIG_QUOTA is not set
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+
+#
+# CD-ROM/DVD Filesystems
+#
+# CONFIG_ISO9660_FS is not set
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+# CONFIG_FAT_FS is not set
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_SYSFS=y
+# CONFIG_DEVFS_FS is not set
+# CONFIG_DEVPTS_FS_XATTR is not set
+# CONFIG_TMPFS is not set
+CONFIG_HUGETLBFS=y
+CONFIG_HUGETLB_PAGE=y
+CONFIG_RAMFS=y
+
+#
+# Miscellaneous filesystems
+#
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+
+#
+# Network File Systems
+#
+CONFIG_NFS_FS=y
+# CONFIG_NFS_V3 is not set
+# CONFIG_NFS_V4 is not set
+CONFIG_NFS_DIRECTIO=y
+CONFIG_NFSD=y
+CONFIG_NFSD_V3=y
+# CONFIG_NFSD_V4 is not set
+# CONFIG_NFSD_TCP is not set
+CONFIG_LOCKD=y
+CONFIG_LOCKD_V4=y
+CONFIG_EXPORTFS=y
+CONFIG_SUNRPC=y
+# CONFIG_RPCSEC_GSS_KRB5 is not set
+# CONFIG_SMB_FS is not set
+# CONFIG_CIFS is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_CODA_FS is not set
+# CONFIG_AFS_FS is not set
+
+#
+# Partition Types
+#
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_ACORN_PARTITION is not set
+# CONFIG_OSF_PARTITION is not set
+# CONFIG_AMIGA_PARTITION is not set
+# CONFIG_ATARI_PARTITION is not set
+# CONFIG_MAC_PARTITION is not set
+CONFIG_MSDOS_PARTITION=y
+# CONFIG_BSD_DISKLABEL is not set
+# CONFIG_MINIX_SUBPARTITION is not set
+# CONFIG_SOLARIS_X86_PARTITION is not set
+# CONFIG_UNIXWARE_DISKLABEL is not set
+# CONFIG_LDM_PARTITION is not set
+# CONFIG_NEC98_PARTITION is not set
+# CONFIG_SGI_PARTITION is not set
+# CONFIG_ULTRIX_PARTITION is not set
+# CONFIG_SUN_PARTITION is not set
+CONFIG_EFI_PARTITION=y
+
+#
+# Native Language Support
+#
+# CONFIG_NLS is not set
+
+#
+# Library routines
+#
+CONFIG_CRC32=y
+# CONFIG_LIBCRC32C is not set
+
+#
+# HP Simulator drivers
+#
+CONFIG_HP_SIMETH=y
+CONFIG_HP_SIMSERIAL=y
+CONFIG_HP_SIMSERIAL_CONSOLE=y
+CONFIG_HP_SIMSCSI=y
+
+#
+# Profiling support
+#
+# CONFIG_PROFILING is not set
+
+#
+# Kernel hacking
+#
+# CONFIG_IA64_GRANULE_16MB is not set
+CONFIG_IA64_GRANULE_64MB=y
+CONFIG_DEBUG_KERNEL=y
+# CONFIG_IA64_PRINT_HAZARDS is not set
+# CONFIG_DISABLE_VHPT is not set
+# CONFIG_MAGIC_SYSRQ is not set
+# CONFIG_DEBUG_SLAB is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_IA64_DEBUG_CMPXCHG is not set
+# CONFIG_IA64_DEBUG_IRQ is not set
+CONFIG_DEBUG_INFO=y
+CONFIG_SYSVIPC_COMPAT=y
+
+#
+# Security options
+#
+# CONFIG_SECURITY is not set
+
+#
+# Cryptographic options
+#
+# CONFIG_CRYPTO is not set
--- a/arch/ia64/configs/sn2_defconfig
+++ b/arch/ia64/configs/sn2_defconfig
--- a/arch/ia64/configs/tiger_defconfig
+++ b/arch/ia64/configs/tiger_defconfig
--- a/arch/ia64/configs/zx1_defconfig
+++ b/arch/ia64/configs/zx1_defconfig
--- a/arch/ia64/defconfig
+++ b/arch/ia64/defconfig
--- a/arch/ia64/dig/Makefile
+++ b/arch/ia64/dig/Makefile
@@ -0,0 +1,9 @@
+#
+# ia64/platform/dig/Makefile
+#
+# Copyright (C) 1999 Silicon Graphics, Inc.
+# Copyright (C) Srinivasa Thirumalachar (sprasad@engr.sgi.com)
+#
+
+obj-y := setup.o
+obj-$(CONFIG_IA64_GENERIC) += machvec.o
--- a/arch/ia64/dig/machvec.c
+++ b/arch/ia64/dig/machvec.c
@@ -0,0 +1,3 @@
+#define MACHVEC_PLATFORM_NAME		dig
+#define MACHVEC_PLATFORM_HEADER		<asm/machvec_dig.h>
+#include <asm/machvec_init.h>
--- a/arch/ia64/dig/setup.c
+++ b/arch/ia64/dig/setup.c
@@ -0,0 +1,86 @@
+/*
+ * Platform dependent support for DIG64 platforms.
+ *
+ * Copyright (C) 1999 Intel Corp.
+ * Copyright (C) 1999, 2001 Hewlett-Packard Co
+ * Copyright (C) 1999, 2001, 2003 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 1999 Vijay Chander <vijay@engr.sgi.com>
+ */
+#include <linux/config.h>
+
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/kdev_t.h>
+#include <linux/string.h>
+#include <linux/tty.h>
+#include <linux/console.h>
+#include <linux/timex.h>
+#include <linux/sched.h>
+#include <linux/root_dev.h>
+
+#include <asm/io.h>
+#include <asm/machvec.h>
+#include <asm/system.h>
+
+/*
+ * This is here so we can use the CMOS detection in ide-probe.c to
+ * determine what drives are present.  In theory, we don't need this
+ * as the auto-detection could be done via ide-probe.c:do_probe() but
+ * in practice that would be much slower, which is painful when
+ * running in the simulator.  Note that passing zeroes in DRIVE_INFO
+ * is sufficient (the IDE driver will autodetect the drive geometry).
+ */
+char drive_info[4*16];
+
+void __init
+dig_setup (char **cmdline_p)
+{
+	unsigned int orig_x, orig_y, num_cols, num_rows, font_height;
+
+	/*
+	 * Default to /dev/sda2.  This assumes that the EFI partition
+	 * is physical disk 1 partition 1 and the Linux root disk is
+	 * physical disk 1 partition 2.
+	 */
+	ROOT_DEV = Root_SDA2;		/* default to second partition on first drive */
+
+#ifdef CONFIG_SMP
+	init_smp_config();
+#endif
+
+	memset(&screen_info, 0, sizeof(screen_info));
+
+	if (!ia64_boot_param->console_info.num_rows
+	    || !ia64_boot_param->console_info.num_cols)
+	{
+		printk(KERN_WARNING "dig_setup: warning: invalid screen-info, guessing 80x25\n");
+		orig_x = 0;
+		orig_y = 0;
+		num_cols = 80;
+		num_rows = 25;
+		font_height = 16;
+	} else {
+		orig_x = ia64_boot_param->console_info.orig_x;
+		orig_y = ia64_boot_param->console_info.orig_y;
+		num_cols = ia64_boot_param->console_info.num_cols;
+		num_rows = ia64_boot_param->console_info.num_rows;
+		font_height = 400 / num_rows;
+	}
+
+	screen_info.orig_x = orig_x;
+	screen_info.orig_y = orig_y;
+	screen_info.orig_video_cols  = num_cols;
+	screen_info.orig_video_lines = num_rows;
+	screen_info.orig_video_points = font_height;
+	screen_info.orig_video_mode = 3;	/* XXX fake */
+	screen_info.orig_video_isVGA = 1;	/* XXX fake */
+	screen_info.orig_video_ega_bx = 3;	/* XXX fake */
+}
+
+void __init
+dig_irq_init (void)
+{
+}
--- a/arch/ia64/hp/common/Makefile
+++ b/arch/ia64/hp/common/Makefile
@@ -0,0 +1,10 @@
+#
+# ia64/platform/hp/common/Makefile
+#
+# Copyright (C) 2002 Hewlett Packard
+# Copyright (C) Alex Williamson (alex_williamson@hp.com)
+#
+
+obj-y := sba_iommu.o
+obj-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += hwsw_iommu.o
+obj-$(CONFIG_IA64_GENERIC) += hwsw_iommu.o
--- a/arch/ia64/hp/common/hwsw_iommu.c
+++ b/arch/ia64/hp/common/hwsw_iommu.c
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2004 Hewlett-Packard Development Company, L.P.
+ *   Contributed by David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * This is a pseudo I/O MMU which dispatches to the hardware I/O MMU
+ * whenever possible.  We assume that the hardware I/O MMU requires
+ * full 32-bit addressability, as is the case, e.g., for HP zx1-based
+ * systems (there, the I/O MMU window is mapped at 3-4GB).  If a
+ * device doesn't provide full 32-bit addressability, we fall back on
+ * the sw I/O TLB.  This is good enough to let us support broken
+ * hardware such as soundcards which have a DMA engine that can
+ * address only 28 bits.
+ */
+
+#include <linux/device.h>
+
+#include <asm/machvec.h>
+
+/* swiotlb declarations & definitions: */
+extern void swiotlb_init_with_default_size (size_t size);
+extern ia64_mv_dma_alloc_coherent	swiotlb_alloc_coherent;
+extern ia64_mv_dma_free_coherent	swiotlb_free_coherent;
+extern ia64_mv_dma_map_single		swiotlb_map_single;
+extern ia64_mv_dma_unmap_single		swiotlb_unmap_single;
+extern ia64_mv_dma_map_sg		swiotlb_map_sg;
+extern ia64_mv_dma_unmap_sg		swiotlb_unmap_sg;
+extern ia64_mv_dma_supported		swiotlb_dma_supported;
+extern ia64_mv_dma_mapping_error	swiotlb_dma_mapping_error;
+
+/* hwiommu declarations & definitions: */
+
+extern ia64_mv_dma_alloc_coherent	sba_alloc_coherent;
+extern ia64_mv_dma_free_coherent	sba_free_coherent;
+extern ia64_mv_dma_map_single		sba_map_single;
+extern ia64_mv_dma_unmap_single		sba_unmap_single;
+extern ia64_mv_dma_map_sg		sba_map_sg;
+extern ia64_mv_dma_unmap_sg		sba_unmap_sg;
+extern ia64_mv_dma_supported		sba_dma_supported;
+extern ia64_mv_dma_mapping_error	sba_dma_mapping_error;
+
+#define hwiommu_alloc_coherent		sba_alloc_coherent
+#define hwiommu_free_coherent		sba_free_coherent
+#define hwiommu_map_single		sba_map_single
+#define hwiommu_unmap_single		sba_unmap_single
+#define hwiommu_map_sg			sba_map_sg
+#define hwiommu_unmap_sg		sba_unmap_sg
+#define hwiommu_dma_supported		sba_dma_supported
+#define hwiommu_dma_mapping_error	sba_dma_mapping_error
+#define hwiommu_sync_single_for_cpu	machvec_dma_sync_single
+#define hwiommu_sync_sg_for_cpu		machvec_dma_sync_sg
+#define hwiommu_sync_single_for_device	machvec_dma_sync_single
+#define hwiommu_sync_sg_for_device	machvec_dma_sync_sg
+
+
+/*
+ * Note: we need to make the determination of whether or not to use
+ * the sw I/O TLB based purely on the device structure.  Anything else
+ * would be unreliable or would be too intrusive.
+ */
+static inline int
+use_swiotlb (struct device *dev)
+{
+	return dev && dev->dma_mask && !hwiommu_dma_supported(dev, *dev->dma_mask);
+}
+
+void
+hwsw_init (void)
+{
+	/* default to a smallish 2MB sw I/O TLB */
+	swiotlb_init_with_default_size (2 * (1<<20));
+}
+
+void *
+hwsw_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, int flags)
+{
+	if (use_swiotlb(dev))
+		return swiotlb_alloc_coherent(dev, size, dma_handle, flags);
+	else
+		return hwiommu_alloc_coherent(dev, size, dma_handle, flags);
+}
+
+void
+hwsw_free_coherent (struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle)
+{
+	if (use_swiotlb(dev))
+		swiotlb_free_coherent(dev, size, vaddr, dma_handle);
+	else
+		hwiommu_free_coherent(dev, size, vaddr, dma_handle);
+}
+
+dma_addr_t
+hwsw_map_single (struct device *dev, void *addr, size_t size, int dir)
+{
+	if (use_swiotlb(dev))
+		return swiotlb_map_single(dev, addr, size, dir);
+	else
+		return hwiommu_map_single(dev, addr, size, dir);
+}
+
+void
+hwsw_unmap_single (struct device *dev, dma_addr_t iova, size_t size, int dir)
+{
+	if (use_swiotlb(dev))
+		return swiotlb_unmap_single(dev, iova, size, dir);
+	else
+		return hwiommu_unmap_single(dev, iova, size, dir);
+}
+
+
+int
+hwsw_map_sg (struct device *dev, struct scatterlist *sglist, int nents, int dir)
+{
+	if (use_swiotlb(dev))
+		return swiotlb_map_sg(dev, sglist, nents, dir);
+	else
+		return hwiommu_map_sg(dev, sglist, nents, dir);
+}
+
+void
+hwsw_unmap_sg (struct device *dev, struct scatterlist *sglist, int nents, int dir)
+{
+	if (use_swiotlb(dev))
+		return swiotlb_unmap_sg(dev, sglist, nents, dir);
+	else
+		return hwiommu_unmap_sg(dev, sglist, nents, dir);
+}
+
+void
+hwsw_sync_single_for_cpu (struct device *dev, dma_addr_t addr, size_t size, int dir)
+{
+	if (use_swiotlb(dev))
+		swiotlb_sync_single_for_cpu(dev, addr, size, dir);
+	else
+		hwiommu_sync_single_for_cpu(dev, addr, size, dir);
+}
+
+void
+hwsw_sync_sg_for_cpu (struct device *dev, struct scatterlist *sg, int nelems, int dir)
+{
+	if (use_swiotlb(dev))
+		swiotlb_sync_sg_for_cpu(dev, sg, nelems, dir);
+	else
+		hwiommu_sync_sg_for_cpu(dev, sg, nelems, dir);
+}
+
+void
+hwsw_sync_single_for_device (struct device *dev, dma_addr_t addr, size_t size, int dir)
+{
+	if (use_swiotlb(dev))
+		swiotlb_sync_single_for_device(dev, addr, size, dir);
+	else
+		hwiommu_sync_single_for_device(dev, addr, size, dir);
+}
+
+void
+hwsw_sync_sg_for_device (struct device *dev, struct scatterlist *sg, int nelems, int dir)
+{
+	if (use_swiotlb(dev))
+		swiotlb_sync_sg_for_device(dev, sg, nelems, dir);
+	else
+		hwiommu_sync_sg_for_device(dev, sg, nelems, dir);
+}
+
+int
+hwsw_dma_supported (struct device *dev, u64 mask)
+{
+	if (hwiommu_dma_supported(dev, mask))
+		return 1;
+	return swiotlb_dma_supported(dev, mask);
+}
+
+int
+hwsw_dma_mapping_error (dma_addr_t dma_addr)
+{
+	return hwiommu_dma_mapping_error (dma_addr) || swiotlb_dma_mapping_error(dma_addr);
+}
+
+EXPORT_SYMBOL(hwsw_dma_mapping_error);
+EXPORT_SYMBOL(hwsw_map_single);
+EXPORT_SYMBOL(hwsw_unmap_single);
+EXPORT_SYMBOL(hwsw_map_sg);
+EXPORT_SYMBOL(hwsw_unmap_sg);
+EXPORT_SYMBOL(hwsw_dma_supported);
+EXPORT_SYMBOL(hwsw_alloc_coherent);
+EXPORT_SYMBOL(hwsw_free_coherent);
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
--- a/arch/ia64/hp/sim/Kconfig
+++ b/arch/ia64/hp/sim/Kconfig
@@ -0,0 +1,20 @@
+
+menu "HP Simulator drivers"
+	depends on IA64_HP_SIM || IA64_GENERIC
+
+config HP_SIMETH
+	bool "Simulated Ethernet "
+
+config HP_SIMSERIAL
+	bool "Simulated serial driver support"
+
+config HP_SIMSERIAL_CONSOLE
+	bool "Console for HP simulator"
+	depends on HP_SIMSERIAL
+
+config HP_SIMSCSI
+	tristate "Simulated SCSI disk"
+	depends on SCSI
+
+endmenu
+
--- a/arch/ia64/hp/sim/Makefile
+++ b/arch/ia64/hp/sim/Makefile
@@ -0,0 +1,16 @@
+#
+# ia64/platform/hp/sim/Makefile
+#
+# Copyright (C) 2002 Hewlett-Packard Co.
+#	David Mosberger-Tang <davidm@hpl.hp.com>
+# Copyright (C) 1999 Silicon Graphics, Inc.
+# Copyright (C) Srinivasa Thirumalachar (sprasad@engr.sgi.com)
+#
+
+obj-y := hpsim_irq.o hpsim_setup.o hpsim.o
+obj-$(CONFIG_IA64_GENERIC) += hpsim_machvec.o
+
+obj-$(CONFIG_HP_SIMETH)	+= simeth.o
+obj-$(CONFIG_HP_SIMSERIAL) += simserial.o
+obj-$(CONFIG_HP_SIMSERIAL_CONSOLE) += hpsim_console.o
+obj-$(CONFIG_HP_SIMSCSI) += simscsi.o
--- a/arch/ia64/hp/sim/boot/Makefile
+++ b/arch/ia64/hp/sim/boot/Makefile
@@ -0,0 +1,37 @@
+#
+# ia64/boot/Makefile
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1998, 2003 by David Mosberger-Tang <davidm@hpl.hp.com>
+#
+
+targets-$(CONFIG_IA64_HP_SIM)  += bootloader
+targets := vmlinux.bin vmlinux.gz $(targets-y)
+
+quiet_cmd_cptotop = LN      $@
+      cmd_cptotop = ln -f $< $@
+
+vmlinux.gz: $(obj)/vmlinux.gz $(addprefix $(obj)/,$(targets-y))
+	$(call cmd,cptotop)
+	@echo '  Kernel: $@ is ready'
+
+boot: bootloader
+
+bootloader: $(obj)/bootloader
+	$(call cmd,cptotop)
+
+$(obj)/vmlinux.gz: $(obj)/vmlinux.bin FORCE
+	$(call if_changed,gzip)
+
+$(obj)/vmlinux.bin: vmlinux FORCE
+	$(call if_changed,objcopy)
+
+
+LDFLAGS_bootloader = -static -T
+
+$(obj)/bootloader: $(src)/bootloader.lds $(obj)/bootloader.o $(obj)/boot_head.o $(obj)/fw-emu.o \
+                   lib/lib.a arch/ia64/lib/lib.a FORCE
+	$(call if_changed,ld)
--- a/arch/ia64/hp/sim/boot/boot_head.S
+++ b/arch/ia64/hp/sim/boot/boot_head.S
@@ -0,0 +1,144 @@
+/*
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#include <asm/asmmacro.h>
+
+	.bss
+	.align 16
+stack_mem:
+	.skip 16834
+
+	.text
+
+/* This needs to be defined because lib/string.c:strlcat() calls it in case of error... */
+GLOBAL_ENTRY(printk)
+	break 0
+END(printk)
+
+GLOBAL_ENTRY(_start)
+	.prologue
+	.save rp, r0
+	.body
+	movl gp = __gp
+	movl sp = stack_mem
+	bsw.1
+	br.call.sptk.many rp=start_bootloader
+END(_start)
+
+/*
+ * Set a break point on this function so that symbols are available to set breakpoints in
+ * the kernel being debugged.
+ */
+GLOBAL_ENTRY(debug_break)
+	br.ret.sptk.many b0
+END(debug_break)
+
+GLOBAL_ENTRY(ssc)
+	.regstk 5,0,0,0
+	mov r15=in4
+	break 0x80001
+	br.ret.sptk.many b0
+END(ssc)
+
+GLOBAL_ENTRY(jmp_to_kernel)
+	.regstk 2,0,0,0
+	mov r28=in0
+	mov b7=in1
+	br.sptk.few b7
+END(jmp_to_kernel)
+
+
+GLOBAL_ENTRY(pal_emulator_static)
+	mov r8=-1
+	mov r9=256
+	;;
+	cmp.gtu p6,p7=r9,r28		/* r28 <= 255? */
+(p6)	br.cond.sptk.few static
+	;;
+	mov r9=512
+	;;
+	cmp.gtu p6,p7=r9,r28
+(p6)	br.cond.sptk.few stacked
+	;;
+static:	cmp.eq p6,p7=6,r28		/* PAL_PTCE_INFO */
+(p7)	br.cond.sptk.few 1f
+	;;
+	mov r8=0			/* status = 0 */
+	movl r9=0x100000000		/* tc.base */
+	movl r10=0x0000000200000003	/* count[0], count[1] */
+	movl r11=0x1000000000002000	/* stride[0], stride[1] */
+	br.cond.sptk.few rp
+1:	cmp.eq p6,p7=14,r28		/* PAL_FREQ_RATIOS */
+(p7)	br.cond.sptk.few 1f
+	mov r8=0			/* status = 0 */
+	movl r9 =0x100000064		/* proc_ratio (1/100) */
+	movl r10=0x100000100		/* bus_ratio<<32 (1/256) */
+	movl r11=0x100000064		/* itc_ratio<<32 (1/100) */
+	;;
+1:	cmp.eq p6,p7=19,r28		/* PAL_RSE_INFO */
+(p7)	br.cond.sptk.few 1f
+	mov r8=0			/* status = 0 */
+	mov r9=96			/* num phys stacked */
+	mov r10=0			/* hints */
+	mov r11=0
+	br.cond.sptk.few rp
+1:	cmp.eq p6,p7=1,r28		/* PAL_CACHE_FLUSH */
+(p7)	br.cond.sptk.few 1f
+	mov r9=ar.lc
+	movl r8=524288			/* flush 512k million cache lines (16MB) */
+	;;
+	mov ar.lc=r8
+	movl r8=0xe000000000000000
+	;;
+.loop:	fc r8
+	add r8=32,r8
+	br.cloop.sptk.few .loop
+	sync.i
+	;;
+	srlz.i
+	;;
+	mov ar.lc=r9
+	mov r8=r0
+	;;
+1:	cmp.eq p6,p7=15,r28		/* PAL_PERF_MON_INFO */
+(p7)	br.cond.sptk.few 1f
+	mov r8=0			/* status = 0 */
+	movl r9 =0x08122f04		/* generic=4 width=47 retired=8 cycles=18 */
+	mov r10=0			/* reserved */
+	mov r11=0			/* reserved */
+	mov r16=0xffff			/* implemented PMC */
+	mov r17=0x3ffff			/* implemented PMD */
+	add r18=8,r29			/* second index */
+	;;
+	st8 [r29]=r16,16		/* store implemented PMC */
+	st8 [r18]=r0,16			/* clear remaining bits  */
+	;;
+	st8 [r29]=r0,16			/* clear remaining bits  */
+	st8 [r18]=r0,16			/* clear remaining bits  */
+	;;
+	st8 [r29]=r17,16		/* store implemented PMD */
+	st8 [r18]=r0,16			/* clear remaining bits  */
+	mov r16=0xf0			/* cycles count capable PMC */
+	;;
+	st8 [r29]=r0,16			/* clear remaining bits  */
+	st8 [r18]=r0,16			/* clear remaining bits  */
+	mov r17=0xf0			/* retired bundles capable PMC */
+	;;
+	st8 [r29]=r16,16		/* store cycles capable */
+	st8 [r18]=r0,16			/* clear remaining bits  */
+	;;
+	st8 [r29]=r0,16			/* clear remaining bits  */
+	st8 [r18]=r0,16			/* clear remaining bits  */
+	;;
+	st8 [r29]=r17,16		/* store retired bundle capable */
+	st8 [r18]=r0,16			/* clear remaining bits  */
+	;;
+	st8 [r29]=r0,16			/* clear remaining bits  */
+	st8 [r18]=r0,16			/* clear remaining bits  */
+	;;
+1:	br.cond.sptk.few rp
+stacked:
+	br.ret.sptk.few rp
+END(pal_emulator_static)
--- a/arch/ia64/hp/sim/boot/bootloader.c
+++ b/arch/ia64/hp/sim/boot/bootloader.c
@@ -0,0 +1,176 @@
+/*
+ * arch/ia64/hp/sim/boot/bootloader.c
+ *
+ * Loads an ELF kernel.
+ *
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ *
+ * 01/07/99 S.Eranian modified to pass command line arguments to kernel
+ */
+struct task_struct;	/* forward declaration for elf.h */
+
+#include <linux/config.h>
+#include <linux/elf.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+
+#include <asm/elf.h>
+#include <asm/intrinsics.h>
+#include <asm/pal.h>
+#include <asm/pgtable.h>
+#include <asm/sal.h>
+#include <asm/system.h>
+
+#include "ssc.h"
+
+struct disk_req {
+	unsigned long addr;
+	unsigned len;
+};
+
+struct disk_stat {
+	int fd;
+	unsigned count;
+};
+
+extern void jmp_to_kernel (unsigned long bp, unsigned long e_entry);
+extern struct ia64_boot_param *sys_fw_init (const char *args, int arglen);
+extern void debug_break (void);
+
+static void
+cons_write (const char *buf)
+{
+	unsigned long ch;
+
+	while ((ch = *buf++) != '\0') {
+		ssc(ch, 0, 0, 0, SSC_PUTCHAR);
+		if (ch == '\n')
+		  ssc('\r', 0, 0, 0, SSC_PUTCHAR);
+	}
+}
+
+#define MAX_ARGS 32
+
+void
+start_bootloader (void)
+{
+	static char mem[4096];
+	static char buffer[1024];
+	unsigned long off;
+	int fd, i;
+	struct disk_req req;
+	struct disk_stat stat;
+	struct elfhdr *elf;
+	struct elf_phdr *elf_phdr;	/* program header */
+	unsigned long e_entry, e_phoff, e_phnum;
+	register struct ia64_boot_param *bp;
+	char *kpath, *args;
+	long arglen = 0;
+
+	ssc(0, 0, 0, 0, SSC_CONSOLE_INIT);
+
+	/*
+	 * S.Eranian: extract the commandline argument from the simulator
+	 *
+	 * The expected format is as follows:
+         *
+	 *	kernelname args...
+	 *
+	 * Both are optional but you can't have the second one without the first.
+	 */
+	arglen = ssc((long) buffer, 0, 0, 0, SSC_GET_ARGS);
+
+	kpath = "vmlinux";
+	args = buffer;
+	if (arglen > 0) {
+		kpath = buffer;
+		while (*args != ' ' && *args != '\0')
+			++args, --arglen;
+		if (*args == ' ')
+			*args++ = '\0', --arglen;
+	}
+
+	if (arglen <= 0) {
+		args = "";
+		arglen = 1;
+	}
+
+	fd = ssc((long) kpath, 1, 0, 0, SSC_OPEN);
+
+	if (fd < 0) {
+		cons_write(kpath);
+		cons_write(": file not found, reboot now\n");
+		for(;;);
+	}
+	stat.fd = fd;
+	off = 0;
+
+	req.len = sizeof(mem);
+	req.addr = (long) mem;
+	ssc(fd, 1, (long) &req, off, SSC_READ);
+	ssc((long) &stat, 0, 0, 0, SSC_WAIT_COMPLETION);
+
+	elf = (struct elfhdr *) mem;
+	if (elf->e_ident[0] == 0x7f && strncmp(elf->e_ident + 1, "ELF", 3) != 0) {
+		cons_write("not an ELF file\n");
+		return;
+	}
+	if (elf->e_type != ET_EXEC) {
+		cons_write("not an ELF executable\n");
+		return;
+	}
+	if (!elf_check_arch(elf)) {
+		cons_write("kernel not for this processor\n");
+		return;
+	}
+
+	e_entry = elf->e_entry;
+	e_phnum = elf->e_phnum;
+	e_phoff = elf->e_phoff;
+
+	cons_write("loading ");
+	cons_write(kpath);
+	cons_write("...\n");
+
+	for (i = 0; i < e_phnum; ++i) {
+		req.len = sizeof(*elf_phdr);
+		req.addr = (long) mem;
+		ssc(fd, 1, (long) &req, e_phoff, SSC_READ);
+		ssc((long) &stat, 0, 0, 0, SSC_WAIT_COMPLETION);
+		if (stat.count != sizeof(*elf_phdr)) {
+			cons_write("failed to read phdr\n");
+			return;
+		}
+		e_phoff += sizeof(*elf_phdr);
+
+		elf_phdr = (struct elf_phdr *) mem;
+
+		if (elf_phdr->p_type != PT_LOAD)
+			continue;
+
+		req.len = elf_phdr->p_filesz;
+		req.addr = __pa(elf_phdr->p_paddr);
+		ssc(fd, 1, (long) &req, elf_phdr->p_offset, SSC_READ);
+		ssc((long) &stat, 0, 0, 0, SSC_WAIT_COMPLETION);
+		memset((char *)__pa(elf_phdr->p_paddr) + elf_phdr->p_filesz, 0,
+		       elf_phdr->p_memsz - elf_phdr->p_filesz);
+	}
+	ssc(fd, 0, 0, 0, SSC_CLOSE);
+
+	cons_write("starting kernel...\n");
+
+	/* fake an I/O base address: */
+	ia64_setreg(_IA64_REG_AR_KR0, 0xffffc000000UL);
+
+	bp = sys_fw_init(args, arglen);
+
+	ssc(0, (long) kpath, 0, 0, SSC_LOAD_SYMBOLS);
+
+	debug_break();
+	jmp_to_kernel((unsigned long) bp, e_entry);
+
+	cons_write("kernel returned!\n");
+	ssc(-1, 0, 0, 0, SSC_EXIT);
+}
--- a/arch/ia64/hp/sim/boot/bootloader.lds
+++ b/arch/ia64/hp/sim/boot/bootloader.lds
@@ -0,0 +1,65 @@
+OUTPUT_FORMAT("elf64-ia64-little")
+OUTPUT_ARCH(ia64)
+ENTRY(_start)
+SECTIONS
+{
+  /* Read-only sections, merged into text segment: */
+  . = 0x100000;
+
+  _text = .;
+  .text : { *(__ivt_section) *(.text) }
+  _etext = .;
+
+  /* Global data */
+  _data = .;
+  .rodata : { *(.rodata) *(.rodata.*) }
+  .data    : { *(.data) *(.gnu.linkonce.d*) CONSTRUCTORS }
+  __gp = ALIGN (8) + 0x200000;
+  .got           : { *(.got.plt) *(.got) }
+  /* We want the small data sections together, so single-instruction offsets
+     can access them all, and initialized data all before uninitialized, so
+     we can shorten the on-disk segment size.  */
+  .sdata     : { *(.sdata) }
+  _edata  =  .;
+
+  _bss = .;
+  .sbss      : { *(.sbss) *(.scommon) }
+  .bss       : { *(.bss) *(COMMON) }
+  . = ALIGN(64 / 8);
+  _end = . ;
+
+  /* Stabs debugging sections.  */
+  .stab 0 : { *(.stab) }
+  .stabstr 0 : { *(.stabstr) }
+  .stab.excl 0 : { *(.stab.excl) }
+  .stab.exclstr 0 : { *(.stab.exclstr) }
+  .stab.index 0 : { *(.stab.index) }
+  .stab.indexstr 0 : { *(.stab.indexstr) }
+  .comment 0 : { *(.comment) }
+  /* DWARF debug sections.
+     Symbols in the DWARF debugging sections are relative to the beginning
+     of the section so we begin them at 0.  */
+  /* DWARF 1 */
+  .debug          0 : { *(.debug) }
+  .line           0 : { *(.line) }
+  /* GNU DWARF 1 extensions */
+  .debug_srcinfo  0 : { *(.debug_srcinfo) }
+  .debug_sfnames  0 : { *(.debug_sfnames) }
+  /* DWARF 1.1 and DWARF 2 */
+  .debug_aranges  0 : { *(.debug_aranges) }
+  .debug_pubnames 0 : { *(.debug_pubnames) }
+  /* DWARF 2 */
+  .debug_info     0 : { *(.debug_info) }
+  .debug_abbrev   0 : { *(.debug_abbrev) }
+  .debug_line     0 : { *(.debug_line) }
+  .debug_frame    0 : { *(.debug_frame) }
+  .debug_str      0 : { *(.debug_str) }
+  .debug_loc      0 : { *(.debug_loc) }
+  .debug_macinfo  0 : { *(.debug_macinfo) }
+  /* SGI/MIPS DWARF 2 extensions */
+  .debug_weaknames 0 : { *(.debug_weaknames) }
+  .debug_funcnames 0 : { *(.debug_funcnames) }
+  .debug_typenames 0 : { *(.debug_typenames) }
+  .debug_varnames  0 : { *(.debug_varnames) }
+  /* These must appear regardless of  .  */
+}
--- a/arch/ia64/hp/sim/boot/fw-emu.c
+++ b/arch/ia64/hp/sim/boot/fw-emu.c
@@ -0,0 +1,398 @@
+/*
+ * PAL & SAL emulation.
+ *
+ * Copyright (C) 1998-2001 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#include <linux/config.h>
+
+#ifdef CONFIG_PCI
+# include <linux/pci.h>
+#endif
+
+#include <linux/efi.h>
+#include <asm/io.h>
+#include <asm/pal.h>
+#include <asm/sal.h>
+
+#include "ssc.h"
+
+#define MB	(1024*1024UL)
+
+#define SIMPLE_MEMMAP	1
+
+#if SIMPLE_MEMMAP
+# define NUM_MEM_DESCS	4
+#else
+# define NUM_MEM_DESCS	16
+#endif
+
+static char fw_mem[(  sizeof(struct ia64_boot_param)
+		    + sizeof(efi_system_table_t)
+		    + sizeof(efi_runtime_services_t)
+		    + 1*sizeof(efi_config_table_t)
+		    + sizeof(struct ia64_sal_systab)
+		    + sizeof(struct ia64_sal_desc_entry_point)
+		    + NUM_MEM_DESCS*(sizeof(efi_memory_desc_t))
+		    + 1024)] __attribute__ ((aligned (8)));
+
+#define SECS_PER_HOUR   (60 * 60)
+#define SECS_PER_DAY    (SECS_PER_HOUR * 24)
+
+/* Compute the `struct tm' representation of *T,
+   offset OFFSET seconds east of UTC,
+   and store year, yday, mon, mday, wday, hour, min, sec into *TP.
+   Return nonzero if successful.  */
+int
+offtime (unsigned long t, efi_time_t *tp)
+{
+	const unsigned short int __mon_yday[2][13] =
+	{
+		/* Normal years.  */
+		{ 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 },
+		/* Leap years.  */
+		{ 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 }
+	};
+	long int days, rem, y;
+	const unsigned short int *ip;
+
+	days = t / SECS_PER_DAY;
+	rem = t % SECS_PER_DAY;
+	while (rem < 0) {
+		rem += SECS_PER_DAY;
+		--days;
+	}
+	while (rem >= SECS_PER_DAY) {
+		rem -= SECS_PER_DAY;
+		++days;
+	}
+	tp->hour = rem / SECS_PER_HOUR;
+	rem %= SECS_PER_HOUR;
+	tp->minute = rem / 60;
+	tp->second = rem % 60;
+	/* January 1, 1970 was a Thursday.  */
+	y = 1970;
+
+#	define DIV(a, b) ((a) / (b) - ((a) % (b) < 0))
+#	define LEAPS_THRU_END_OF(y) (DIV (y, 4) - DIV (y, 100) + DIV (y, 400))
+#	define __isleap(year) \
+	  ((year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0))
+
+	while (days < 0 || days >= (__isleap (y) ? 366 : 365)) {
+		/* Guess a corrected year, assuming 365 days per year.  */
+		long int yg = y + days / 365 - (days % 365 < 0);
+
+		/* Adjust DAYS and Y to match the guessed year.  */
+		days -= ((yg - y) * 365 + LEAPS_THRU_END_OF (yg - 1)
+			 - LEAPS_THRU_END_OF (y - 1));
+		y = yg;
+	}
+	tp->year = y;
+	ip = __mon_yday[__isleap(y)];
+	for (y = 11; days < (long int) ip[y]; --y)
+		continue;
+	days -= ip[y];
+	tp->month = y + 1;
+	tp->day = days + 1;
+	return 1;
+}
+
+extern void pal_emulator_static (void);
+
+/* Macro to emulate SAL call using legacy IN and OUT calls to CF8, CFC etc.. */
+
+#define BUILD_CMD(addr)		((0x80000000 | (addr)) & ~3)
+
+#define REG_OFFSET(addr)	(0x00000000000000FF & (addr))
+#define DEVICE_FUNCTION(addr)	(0x000000000000FF00 & (addr))
+#define BUS_NUMBER(addr)	(0x0000000000FF0000 & (addr))
+
+static efi_status_t
+fw_efi_get_time (efi_time_t *tm, efi_time_cap_t *tc)
+{
+#if defined(CONFIG_IA64_HP_SIM) || defined(CONFIG_IA64_GENERIC)
+	struct {
+		int tv_sec;	/* must be 32bits to work */
+		int tv_usec;
+	} tv32bits;
+
+	ssc((unsigned long) &tv32bits, 0, 0, 0, SSC_GET_TOD);
+
+	memset(tm, 0, sizeof(*tm));
+	offtime(tv32bits.tv_sec, tm);
+
+	if (tc)
+		memset(tc, 0, sizeof(*tc));
+#else
+#	error Not implemented yet...
+#endif
+	return EFI_SUCCESS;
+}
+
+static void
+efi_reset_system (int reset_type, efi_status_t status, unsigned long data_size, efi_char16_t *data)
+{
+#if defined(CONFIG_IA64_HP_SIM) || defined(CONFIG_IA64_GENERIC)
+	ssc(status, 0, 0, 0, SSC_EXIT);
+#else
+#	error Not implemented yet...
+#endif
+}
+
+static efi_status_t
+efi_unimplemented (void)
+{
+	return EFI_UNSUPPORTED;
+}
+
+static struct sal_ret_values
+sal_emulator (long index, unsigned long in1, unsigned long in2,
+	      unsigned long in3, unsigned long in4, unsigned long in5,
+	      unsigned long in6, unsigned long in7)
+{
+	long r9  = 0;
+	long r10 = 0;
+	long r11 = 0;
+	long status;
+
+	/*
+	 * Don't do a "switch" here since that gives us code that
+	 * isn't self-relocatable.
+	 */
+	status = 0;
+	if (index == SAL_FREQ_BASE) {
+		switch (in1) {
+		      case SAL_FREQ_BASE_PLATFORM:
+			r9 = 200000000;
+			break;
+
+		      case SAL_FREQ_BASE_INTERVAL_TIMER:
+			/*
+			 * Is this supposed to be the cr.itc frequency
+			 * or something platform specific?  The SAL
+			 * doc ain't exactly clear on this...
+			 */
+			r9 = 700000000;
+			break;
+
+		      case SAL_FREQ_BASE_REALTIME_CLOCK:
+			r9 = 1;
+			break;
+
+		      default:
+			status = -1;
+			break;
+		}
+	} else if (index == SAL_SET_VECTORS) {
+		;
+	} else if (index == SAL_GET_STATE_INFO) {
+		;
+	} else if (index == SAL_GET_STATE_INFO_SIZE) {
+		;
+	} else if (index == SAL_CLEAR_STATE_INFO) {
+		;
+	} else if (index == SAL_MC_RENDEZ) {
+		;
+	} else if (index == SAL_MC_SET_PARAMS) {
+		;
+	} else if (index == SAL_CACHE_FLUSH) {
+		;
+	} else if (index == SAL_CACHE_INIT) {
+		;
+#ifdef CONFIG_PCI
+	} else if (index == SAL_PCI_CONFIG_READ) {
+		/*
+		 * in1 contains the PCI configuration address and in2
+		 * the size of the read.  The value that is read is
+		 * returned via the general register r9.
+		 */
+                outl(BUILD_CMD(in1), 0xCF8);
+                if (in2 == 1)                           /* Reading byte  */
+                        r9 = inb(0xCFC + ((REG_OFFSET(in1) & 3)));
+                else if (in2 == 2)                      /* Reading word  */
+                        r9 = inw(0xCFC + ((REG_OFFSET(in1) & 2)));
+                else                                    /* Reading dword */
+                        r9 = inl(0xCFC);
+                status = PCIBIOS_SUCCESSFUL;
+	} else if (index == SAL_PCI_CONFIG_WRITE) {
+	      	/*
+		 * in1 contains the PCI configuration address, in2 the
+		 * size of the write, and in3 the actual value to be
+		 * written out.
+		 */
+                outl(BUILD_CMD(in1), 0xCF8);
+                if (in2 == 1)                           /* Writing byte  */
+                        outb(in3, 0xCFC + ((REG_OFFSET(in1) & 3)));
+                else if (in2 == 2)                      /* Writing word  */
+                        outw(in3, 0xCFC + ((REG_OFFSET(in1) & 2)));
+                else                                    /* Writing dword */
+                        outl(in3, 0xCFC);
+                status = PCIBIOS_SUCCESSFUL;
+#endif /* CONFIG_PCI */
+	} else if (index == SAL_UPDATE_PAL) {
+		;
+	} else {
+		status = -1;
+	}
+	return ((struct sal_ret_values) {status, r9, r10, r11});
+}
+
+
+/*
+ * This is here to work around a bug in egcs-1.1.1b that causes the
+ * compiler to crash (seems like a bug in the new alias analysis code.
+ */
+void *
+id (long addr)
+{
+	return (void *) addr;
+}
+
+struct ia64_boot_param *
+sys_fw_init (const char *args, int arglen)
+{
+	efi_system_table_t *efi_systab;
+	efi_runtime_services_t *efi_runtime;
+	efi_config_table_t *efi_tables;
+	struct ia64_sal_systab *sal_systab;
+	efi_memory_desc_t *efi_memmap, *md;
+	unsigned long *pal_desc, *sal_desc;
+	struct ia64_sal_desc_entry_point *sal_ed;
+	struct ia64_boot_param *bp;
+	unsigned char checksum = 0;
+	char *cp, *cmd_line;
+	int i = 0;
+#	define MAKE_MD(typ, attr, start, end)		\
+	do {						\
+		md = efi_memmap + i++;			\
+		md->type = typ;				\
+		md->pad = 0;				\
+		md->phys_addr = start;			\
+		md->virt_addr = 0;			\
+		md->num_pages = (end - start) >> 12;	\
+		md->attribute = attr;			\
+	} while (0)
+
+	memset(fw_mem, 0, sizeof(fw_mem));
+
+	pal_desc = (unsigned long *) &pal_emulator_static;
+	sal_desc = (unsigned long *) &sal_emulator;
+
+	cp = fw_mem;
+	efi_systab  = (void *) cp; cp += sizeof(*efi_systab);
+	efi_runtime = (void *) cp; cp += sizeof(*efi_runtime);
+	efi_tables  = (void *) cp; cp += sizeof(*efi_tables);
+	sal_systab  = (void *) cp; cp += sizeof(*sal_systab);
+	sal_ed      = (void *) cp; cp += sizeof(*sal_ed);
+	efi_memmap  = (void *) cp; cp += NUM_MEM_DESCS*sizeof(*efi_memmap);
+	bp	    = (void *) cp; cp += sizeof(*bp);
+	cmd_line    = (void *) cp;
+
+	if (args) {
+		if (arglen >= 1024)
+			arglen = 1023;
+		memcpy(cmd_line, args, arglen);
+	} else {
+		arglen = 0;
+	}
+	cmd_line[arglen] = '\0';
+
+	memset(efi_systab, 0, sizeof(efi_systab));
+	efi_systab->hdr.signature = EFI_SYSTEM_TABLE_SIGNATURE;
+	efi_systab->hdr.revision  = EFI_SYSTEM_TABLE_REVISION;
+	efi_systab->hdr.headersize = sizeof(efi_systab->hdr);
+	efi_systab->fw_vendor = __pa("H\0e\0w\0l\0e\0t\0t\0-\0P\0a\0c\0k\0a\0r\0d\0\0");
+	efi_systab->fw_revision = 1;
+	efi_systab->runtime = (void *) __pa(efi_runtime);
+	efi_systab->nr_tables = 1;
+	efi_systab->tables = __pa(efi_tables);
+
+	efi_runtime->hdr.signature = EFI_RUNTIME_SERVICES_SIGNATURE;
+	efi_runtime->hdr.revision = EFI_RUNTIME_SERVICES_REVISION;
+	efi_runtime->hdr.headersize = sizeof(efi_runtime->hdr);
+	efi_runtime->get_time = __pa(&fw_efi_get_time);
+	efi_runtime->set_time = __pa(&efi_unimplemented);
+	efi_runtime->get_wakeup_time = __pa(&efi_unimplemented);
+	efi_runtime->set_wakeup_time = __pa(&efi_unimplemented);
+	efi_runtime->set_virtual_address_map = __pa(&efi_unimplemented);
+	efi_runtime->get_variable = __pa(&efi_unimplemented);
+	efi_runtime->get_next_variable = __pa(&efi_unimplemented);
+	efi_runtime->set_variable = __pa(&efi_unimplemented);
+	efi_runtime->get_next_high_mono_count = __pa(&efi_unimplemented);
+	efi_runtime->reset_system = __pa(&efi_reset_system);
+
+	efi_tables->guid = SAL_SYSTEM_TABLE_GUID;
+	efi_tables->table = __pa(sal_systab);
+
+	/* fill in the SAL system table: */
+	memcpy(sal_systab->signature, "SST_", 4);
+	sal_systab->size = sizeof(*sal_systab);
+	sal_systab->sal_rev_minor = 1;
+	sal_systab->sal_rev_major = 0;
+	sal_systab->entry_count = 1;
+
+#ifdef CONFIG_IA64_GENERIC
+        strcpy(sal_systab->oem_id, "Generic");
+        strcpy(sal_systab->product_id, "IA-64 system");
+#endif
+
+#ifdef CONFIG_IA64_HP_SIM
+	strcpy(sal_systab->oem_id, "Hewlett-Packard");
+	strcpy(sal_systab->product_id, "HP-simulator");
+#endif
+
+#ifdef CONFIG_IA64_SDV
+	strcpy(sal_systab->oem_id, "Intel");
+	strcpy(sal_systab->product_id, "SDV");
+#endif
+
+	/* fill in an entry point: */
+	sal_ed->type = SAL_DESC_ENTRY_POINT;
+	sal_ed->pal_proc = __pa(pal_desc[0]);
+	sal_ed->sal_proc = __pa(sal_desc[0]);
+	sal_ed->gp = __pa(sal_desc[1]);
+
+	for (cp = (char *) sal_systab; cp < (char *) efi_memmap; ++cp)
+		checksum += *cp;
+
+	sal_systab->checksum = -checksum;
+
+#if SIMPLE_MEMMAP
+	/* simulate free memory at physical address zero */
+	MAKE_MD(EFI_BOOT_SERVICES_DATA,		EFI_MEMORY_WB,    0*MB,    1*MB);
+	MAKE_MD(EFI_PAL_CODE,			EFI_MEMORY_WB,    1*MB,    2*MB);
+	MAKE_MD(EFI_CONVENTIONAL_MEMORY,	EFI_MEMORY_WB,    2*MB,  130*MB);
+	MAKE_MD(EFI_CONVENTIONAL_MEMORY,	EFI_MEMORY_WB, 4096*MB, 4128*MB);
+#else
+	MAKE_MD( 4,		   0x9, 0x0000000000000000, 0x0000000000001000);
+	MAKE_MD( 7,		   0x9, 0x0000000000001000, 0x000000000008a000);
+	MAKE_MD( 4,		   0x9, 0x000000000008a000, 0x00000000000a0000);
+	MAKE_MD( 5, 0x8000000000000009, 0x00000000000c0000, 0x0000000000100000);
+	MAKE_MD( 7,		   0x9, 0x0000000000100000, 0x0000000004400000);
+	MAKE_MD( 2,		   0x9, 0x0000000004400000, 0x0000000004be5000);
+	MAKE_MD( 7,		   0x9, 0x0000000004be5000, 0x000000007f77e000);
+	MAKE_MD( 6, 0x8000000000000009, 0x000000007f77e000, 0x000000007fb94000);
+	MAKE_MD( 6, 0x8000000000000009, 0x000000007fb94000, 0x000000007fb95000);
+	MAKE_MD( 6, 0x8000000000000009, 0x000000007fb95000, 0x000000007fc00000);
+	MAKE_MD(13, 0x8000000000000009, 0x000000007fc00000, 0x000000007fc3a000);
+	MAKE_MD( 7,		   0x9, 0x000000007fc3a000, 0x000000007fea0000);
+	MAKE_MD( 5, 0x8000000000000009, 0x000000007fea0000, 0x000000007fea8000);
+	MAKE_MD( 7,		   0x9, 0x000000007fea8000, 0x000000007feab000);
+	MAKE_MD( 5, 0x8000000000000009, 0x000000007feab000, 0x000000007ffff000);
+	MAKE_MD( 7,		   0x9, 0x00000000ff400000, 0x0000000104000000);
+#endif
+
+	bp->efi_systab = __pa(&fw_mem);
+	bp->efi_memmap = __pa(efi_memmap);
+	bp->efi_memmap_size = NUM_MEM_DESCS*sizeof(efi_memory_desc_t);
+	bp->efi_memdesc_size = sizeof(efi_memory_desc_t);
+	bp->efi_memdesc_version = 1;
+	bp->command_line = __pa(cmd_line);
+	bp->console_info.num_cols = 80;
+	bp->console_info.num_rows = 25;
+	bp->console_info.orig_x = 0;
+	bp->console_info.orig_y = 24;
+	bp->fpswa = 0;
+
+	return bp;
+}
--- a/arch/ia64/hp/sim/boot/ssc.h
+++ b/arch/ia64/hp/sim/boot/ssc.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ */
+#ifndef ssc_h
+#define ssc_h
+
+/* Simulator system calls: */
+
+#define SSC_CONSOLE_INIT		20
+#define SSC_GETCHAR			21
+#define SSC_PUTCHAR			31
+#define SSC_OPEN			50
+#define SSC_CLOSE			51
+#define SSC_READ			52
+#define SSC_WRITE			53
+#define SSC_GET_COMPLETION		54
+#define SSC_WAIT_COMPLETION		55
+#define SSC_CONNECT_INTERRUPT		58
+#define SSC_GENERATE_INTERRUPT		59
+#define SSC_SET_PERIODIC_INTERRUPT	60
+#define SSC_GET_RTC			65
+#define SSC_EXIT			66
+#define SSC_LOAD_SYMBOLS		69
+#define SSC_GET_TOD			74
+
+#define SSC_GET_ARGS			75
+
+/*
+ * Simulator system call.
+ */
+extern long ssc (long arg0, long arg1, long arg2, long arg3, int nr);
+
+#endif /* ssc_h */
--- a/arch/ia64/hp/sim/hpsim.S
+++ b/arch/ia64/hp/sim/hpsim.S
@@ -0,0 +1,10 @@
+#include <asm/asmmacro.h>
+
+/*
+ * Simulator system call.
+ */
+GLOBAL_ENTRY(ia64_ssc)
+	mov r15=r36
+	break 0x80001
+	br.ret.sptk.many rp
+END(ia64_ssc)
--- a/arch/ia64/hp/sim/hpsim_console.c
+++ b/arch/ia64/hp/sim/hpsim_console.c
@@ -0,0 +1,65 @@
+/*
+ * Platform dependent support for HP simulator.
+ *
+ * Copyright (C) 1998, 1999, 2002 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 Vijay Chander <vijay@engr.sgi.com>
+ */
+#include <linux/config.h>
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/tty.h>
+#include <linux/kdev_t.h>
+#include <linux/console.h>
+
+#include <asm/delay.h>
+#include <asm/irq.h>
+#include <asm/pal.h>
+#include <asm/machvec.h>
+#include <asm/pgtable.h>
+#include <asm/sal.h>
+
+#include "hpsim_ssc.h"
+
+static int simcons_init (struct console *, char *);
+static void simcons_write (struct console *, const char *, unsigned);
+static struct tty_driver *simcons_console_device (struct console *, int *);
+
+struct console hpsim_cons = {
+	.name =		"simcons",
+	.write =	simcons_write,
+	.device =	simcons_console_device,
+	.setup =	simcons_init,
+	.flags =	CON_PRINTBUFFER,
+	.index =	-1,
+};
+
+static int
+simcons_init (struct console *cons, char *options)
+{
+	return 0;
+}
+
+static void
+simcons_write (struct console *cons, const char *buf, unsigned count)
+{
+	unsigned long ch;
+
+	while (count-- > 0) {
+		ch = *buf++;
+		ia64_ssc(ch, 0, 0, 0, SSC_PUTCHAR);
+		if (ch == '\n')
+		  ia64_ssc('\r', 0, 0, 0, SSC_PUTCHAR);
+	}
+}
+
+static struct tty_driver *simcons_console_device (struct console *c, int *index)
+{
+	extern struct tty_driver *hp_simserial_driver;
+	*index = c->index;
+	return hp_simserial_driver;
+}
--- a/arch/ia64/hp/sim/hpsim_irq.c
+++ b/arch/ia64/hp/sim/hpsim_irq.c
@@ -0,0 +1,51 @@
+/*
+ * Platform dependent support for HP simulator.
+ *
+ * Copyright (C) 1998-2001 Hewlett-Packard Co
+ * Copyright (C) 1998-2001 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/irq.h>
+
+static unsigned int
+hpsim_irq_startup (unsigned int irq)
+{
+	return 0;
+}
+
+static void
+hpsim_irq_noop (unsigned int irq)
+{
+}
+
+static void
+hpsim_set_affinity_noop (unsigned int a, cpumask_t b)
+{
+}
+
+static struct hw_interrupt_type irq_type_hp_sim = {
+	.typename =	"hpsim",
+	.startup =	hpsim_irq_startup,
+	.shutdown =	hpsim_irq_noop,
+	.enable =	hpsim_irq_noop,
+	.disable =	hpsim_irq_noop,
+	.ack =		hpsim_irq_noop,
+	.end =		hpsim_irq_noop,
+	.set_affinity =	hpsim_set_affinity_noop,
+};
+
+void __init
+hpsim_irq_init (void)
+{
+	irq_desc_t *idesc;
+	int i;
+
+	for (i = 0; i < NR_IRQS; ++i) {
+		idesc = irq_descp(i);
+		if (idesc->handler == &no_irq_type)
+			idesc->handler = &irq_type_hp_sim;
+	}
+}
--- a/arch/ia64/hp/sim/hpsim_machvec.c
+++ b/arch/ia64/hp/sim/hpsim_machvec.c
@@ -0,0 +1,3 @@
+#define MACHVEC_PLATFORM_NAME		hpsim
+#define MACHVEC_PLATFORM_HEADER		<asm/machvec_hpsim.h>
+#include <asm/machvec_init.h>
--- a/arch/ia64/hp/sim/hpsim_setup.c
+++ b/arch/ia64/hp/sim/hpsim_setup.c
@@ -0,0 +1,52 @@
+/*
+ * Platform dependent support for HP simulator.
+ *
+ * Copyright (C) 1998, 1999, 2002 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 Vijay Chander <vijay@engr.sgi.com>
+ */
+#include <linux/config.h>
+#include <linux/console.h>
+#include <linux/init.h>
+#include <linux/kdev_t.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/param.h>
+#include <linux/root_dev.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#include <asm/delay.h>
+#include <asm/irq.h>
+#include <asm/pal.h>
+#include <asm/machvec.h>
+#include <asm/pgtable.h>
+#include <asm/sal.h>
+
+#include "hpsim_ssc.h"
+
+void
+ia64_ssc_connect_irq (long intr, long irq)
+{
+	ia64_ssc(intr, irq, 0, 0, SSC_CONNECT_INTERRUPT);
+}
+
+void
+ia64_ctl_trace (long on)
+{
+	ia64_ssc(on, 0, 0, 0, SSC_CTL_TRACE);
+}
+
+void __init
+hpsim_setup (char **cmdline_p)
+{
+	ROOT_DEV = Root_SDA1;		/* default to first SCSI drive */
+
+#ifdef CONFIG_HP_SIMSERIAL_CONSOLE
+	{
+		extern struct console hpsim_cons;
+		if (ia64_platform_is("hpsim"))
+			register_console(&hpsim_cons);
+	}
+#endif
+}
--- a/arch/ia64/hp/sim/hpsim_ssc.h
+++ b/arch/ia64/hp/sim/hpsim_ssc.h
@@ -0,0 +1,36 @@
+/*
+ * Platform dependent support for HP simulator.
+ *
+ * Copyright (C) 1998, 1999 Hewlett-Packard Co
+ * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 Vijay Chander <vijay@engr.sgi.com>
+ */
+#ifndef _IA64_PLATFORM_HPSIM_SSC_H
+#define _IA64_PLATFORM_HPSIM_SSC_H
+
+/* Simulator system calls: */
+
+#define SSC_CONSOLE_INIT		20
+#define SSC_GETCHAR			21
+#define SSC_PUTCHAR			31
+#define SSC_CONNECT_INTERRUPT		58
+#define SSC_GENERATE_INTERRUPT		59
+#define SSC_SET_PERIODIC_INTERRUPT	60
+#define SSC_GET_RTC			65
+#define SSC_EXIT			66
+#define SSC_LOAD_SYMBOLS		69
+#define SSC_GET_TOD			74
+#define SSC_CTL_TRACE			76
+
+#define SSC_NETDEV_PROBE		100
+#define SSC_NETDEV_SEND			101
+#define SSC_NETDEV_RECV			102
+#define SSC_NETDEV_ATTACH		103
+#define SSC_NETDEV_DETACH		104
+
+/*
+ * Simulator system call.
+ */
+extern long ia64_ssc (long arg0, long arg1, long arg2, long arg3, int nr);
+
+#endif /* _IA64_PLATFORM_HPSIM_SSC_H */
--- a/arch/ia64/hp/sim/simeth.c
+++ b/arch/ia64/hp/sim/simeth.c
@@ -0,0 +1,530 @@
+/*
+ * Simulated Ethernet Driver
+ *
+ * Copyright (C) 1999-2001, 2003 Hewlett-Packard Co
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ */
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <linux/in.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/if_ether.h>
+#include <linux/if_arp.h>
+#include <linux/skbuff.h>
+#include <linux/notifier.h>
+#include <linux/bitops.h>
+#include <asm/system.h>
+#include <asm/irq.h>
+
+#define SIMETH_RECV_MAX	10
+
+/*
+ * Maximum possible received frame for Ethernet.
+ * We preallocate an sk_buff of that size to avoid costly
+ * memcpy for temporary buffer into sk_buff. We do basically
+ * what's done in other drivers, like eepro with a ring.
+ * The difference is, of course, that we don't have real DMA !!!
+ */
+#define SIMETH_FRAME_SIZE	ETH_FRAME_LEN
+
+
+#define SSC_NETDEV_PROBE		100
+#define SSC_NETDEV_SEND			101
+#define SSC_NETDEV_RECV			102
+#define SSC_NETDEV_ATTACH		103
+#define SSC_NETDEV_DETACH		104
+
+#define NETWORK_INTR			8
+
+struct simeth_local {
+	struct net_device_stats stats;
+	int 			simfd;	 /* descriptor in the simulator */
+};
+
+static int simeth_probe1(void);
+static int simeth_open(struct net_device *dev);
+static int simeth_close(struct net_device *dev);
+static int simeth_tx(struct sk_buff *skb, struct net_device *dev);
+static int simeth_rx(struct net_device *dev);
+static struct net_device_stats *simeth_get_stats(struct net_device *dev);
+static irqreturn_t simeth_interrupt(int irq, void *dev_id, struct pt_regs * regs);
+static void set_multicast_list(struct net_device *dev);
+static int simeth_device_event(struct notifier_block *this,unsigned long event, void *ptr);
+
+static char *simeth_version="0.3";
+
+/*
+ * This variable is used to establish a mapping between the Linux/ia64 kernel
+ * and the host linux kernel.
+ *
+ * As of today, we support only one card, even though most of the code
+ * is ready for many more. The mapping is then:
+ *	linux/ia64 -> linux/x86
+ * 	   eth0    -> eth1
+ *
+ * In the future, we some string operations, we could easily support up
+ * to 10 cards (0-9).
+ *
+ * The default mapping can be changed on the kernel command line by
+ * specifying simeth=ethX (or whatever string you want).
+ */
+static char *simeth_device="eth0";	 /* default host interface to use */
+
+
+
+static volatile unsigned int card_count; /* how many cards "found" so far */
+static int simeth_debug;		/* set to 1 to get debug information */
+
+/*
+ * Used to catch IFF_UP & IFF_DOWN events
+ */
+static struct notifier_block simeth_dev_notifier = {
+	simeth_device_event,
+	0
+};
+
+
+/*
+ * Function used when using a kernel command line option.
+ *
+ * Format: simeth=interface_name (like eth0)
+ */
+static int __init
+simeth_setup(char *str)
+{
+	simeth_device = str;
+	return 1;
+}
+
+__setup("simeth=", simeth_setup);
+
+/*
+ * Function used to probe for simeth devices when not installed
+ * as a loadable module
+ */
+
+int __init
+simeth_probe (void)
+{
+	int r;
+
+	printk(KERN_INFO "simeth: v%s\n", simeth_version);
+
+	r = simeth_probe1();
+
+	if (r == 0) register_netdevice_notifier(&simeth_dev_notifier);
+
+	return r;
+}
+
+extern long ia64_ssc (long, long, long, long, int);
+extern void ia64_ssc_connect_irq (long intr, long irq);
+
+static inline int
+netdev_probe(char *name, unsigned char *ether)
+{
+	return ia64_ssc(__pa(name), __pa(ether), 0,0, SSC_NETDEV_PROBE);
+}
+
+
+static inline int
+netdev_connect(int irq)
+{
+	/* XXX Fix me
+	 * this does not support multiple cards
+	 * also no return value
+	 */
+	ia64_ssc_connect_irq(NETWORK_INTR, irq);
+	return 0;
+}
+
+static inline int
+netdev_attach(int fd, int irq, unsigned int ipaddr)
+{
+	/* this puts the host interface in the right mode (start interrupting) */
+	return ia64_ssc(fd, ipaddr, 0,0, SSC_NETDEV_ATTACH);
+}
+
+
+static inline int
+netdev_detach(int fd)
+{
+	/*
+	 * inactivate the host interface (don't interrupt anymore) */
+	return ia64_ssc(fd, 0,0,0, SSC_NETDEV_DETACH);
+}
+
+static inline int
+netdev_send(int fd, unsigned char *buf, unsigned int len)
+{
+	return ia64_ssc(fd, __pa(buf), len, 0, SSC_NETDEV_SEND);
+}
+
+static inline int
+netdev_read(int fd, unsigned char *buf, unsigned int len)
+{
+	return ia64_ssc(fd, __pa(buf), len, 0, SSC_NETDEV_RECV);
+}
+
+/*
+ * Function shared with module code, so cannot be in init section
+ *
+ * So far this function "detects" only one card (test_&_set) but could
+ * be extended easily.
+ *
+ * Return:
+ * 	- -ENODEV is no device found
+ *	- -ENOMEM is no more memory
+ *	- 0 otherwise
+ */
+static int
+simeth_probe1(void)
+{
+	unsigned char mac_addr[ETH_ALEN];
+	struct simeth_local *local;
+	struct net_device *dev;
+	int fd, i, err;
+
+	/*
+	 * XXX Fix me
+	 * let's support just one card for now
+	 */
+	if (test_and_set_bit(0, &card_count))
+		return -ENODEV;
+
+	/*
+	 * check with the simulator for the device
+	 */
+	fd = netdev_probe(simeth_device, mac_addr);
+	if (fd == -1)
+		return -ENODEV;
+
+	dev = alloc_etherdev(sizeof(struct simeth_local));
+	if (!dev)
+		return -ENOMEM;
+
+	memcpy(dev->dev_addr, mac_addr, sizeof(mac_addr));
+
+	local = dev->priv;
+	local->simfd = fd; /* keep track of underlying file descriptor */
+
+	dev->open		= simeth_open;
+	dev->stop		= simeth_close;
+	dev->hard_start_xmit	= simeth_tx;
+	dev->get_stats		= simeth_get_stats;
+	dev->set_multicast_list = set_multicast_list; /* no yet used */
+
+	err = register_netdev(dev);
+	if (err) {
+		free_netdev(dev);
+		return err;
+	}
+
+	dev->irq = assign_irq_vector(AUTO_ASSIGN);
+
+	/*
+	 * attach the interrupt in the simulator, this does enable interrupts
+	 * until a netdev_attach() is called
+	 */
+	netdev_connect(dev->irq);
+
+	printk(KERN_INFO "%s: hosteth=%s simfd=%d, HwAddr",
+	       dev->name, simeth_device, local->simfd);
+	for(i = 0; i < ETH_ALEN; i++) {
+		printk(" %2.2x", dev->dev_addr[i]);
+	}
+	printk(", IRQ %d\n", dev->irq);
+
+	return 0;
+}
+
+/*
+ * actually binds the device to an interrupt vector
+ */
+static int
+simeth_open(struct net_device *dev)
+{
+	if (request_irq(dev->irq, simeth_interrupt, 0, "simeth", dev)) {
+		printk(KERN_WARNING "simeth: unable to get IRQ %d.\n", dev->irq);
+		return -EAGAIN;
+	}
+
+	netif_start_queue(dev);
+
+	return 0;
+}
+
+/* copied from lapbether.c */
+static __inline__ int dev_is_ethdev(struct net_device *dev)
+{
+       return ( dev->type == ARPHRD_ETHER && strncmp(dev->name, "dummy", 5));
+}
+
+
+/*
+ * Handler for IFF_UP or IFF_DOWN
+ *
+ * The reason for that is that we don't want to be interrupted when the
+ * interface is down. There is no way to unconnect in the simualtor. Instead
+ * we use this function to shutdown packet processing in the frame filter
+ * in the simulator. Thus no interrupts are generated
+ *
+ *
+ * That's also the place where we pass the IP address of this device to the
+ * simulator so that that we can start filtering packets for it
+ *
+ * There may be a better way of doing this, but I don't know which yet.
+ */
+static int
+simeth_device_event(struct notifier_block *this,unsigned long event, void *ptr)
+{
+	struct net_device *dev = ptr;
+	struct simeth_local *local;
+	struct in_device *in_dev;
+	struct in_ifaddr **ifap = NULL;
+	struct in_ifaddr *ifa = NULL;
+	int r;
+
+
+	if ( ! dev ) {
+		printk(KERN_WARNING "simeth_device_event dev=0\n");
+		return NOTIFY_DONE;
+	}
+
+	if ( event != NETDEV_UP && event != NETDEV_DOWN ) return NOTIFY_DONE;
+
+	/*
+	 * Check whether or not it's for an ethernet device
+	 *
+	 * XXX Fixme: This works only as long as we support one
+	 * type of ethernet device.
+	 */
+	if ( !dev_is_ethdev(dev) ) return NOTIFY_DONE;
+
+	if ((in_dev=dev->ip_ptr) != NULL) {
+		for (ifap=&in_dev->ifa_list; (ifa=*ifap) != NULL; ifap=&ifa->ifa_next)
+			if (strcmp(dev->name, ifa->ifa_label) == 0) break;
+	}
+	if ( ifa == NULL ) {
+		printk(KERN_ERR "simeth_open: can't find device %s's ifa\n", dev->name);
+		return NOTIFY_DONE;
+	}
+
+	printk(KERN_INFO "simeth_device_event: %s ipaddr=0x%x\n",
+	       dev->name, htonl(ifa->ifa_local));
+
+	/*
+	 * XXX Fix me
+	 * if the device was up, and we're simply reconfiguring it, not sure
+	 * we get DOWN then UP.
+	 */
+
+	local = dev->priv;
+	/* now do it for real */
+	r = event == NETDEV_UP ?
+		netdev_attach(local->simfd, dev->irq, htonl(ifa->ifa_local)):
+		netdev_detach(local->simfd);
+
+	printk(KERN_INFO "simeth: netdev_attach/detach: event=%s ->%d\n",
+	       event == NETDEV_UP ? "attach":"detach", r);
+
+	return NOTIFY_DONE;
+}
+
+static int
+simeth_close(struct net_device *dev)
+{
+	netif_stop_queue(dev);
+
+	free_irq(dev->irq, dev);
+
+	return 0;
+}
+
+/*
+ * Only used for debug
+ */
+static void
+frame_print(unsigned char *from, unsigned char *frame, int len)
+{
+	int i;
+
+	printk("%s: (%d) %02x", from, len, frame[0] & 0xff);
+	for(i=1; i < 6; i++ ) {
+		printk(":%02x", frame[i] &0xff);
+	}
+	printk(" %2x", frame[6] &0xff);
+	for(i=7; i < 12; i++ ) {
+		printk(":%02x", frame[i] &0xff);
+	}
+	printk(" [%02x%02x]\n", frame[12], frame[13]);
+
+	for(i=14; i < len; i++ ) {
+		printk("%02x ", frame[i] &0xff);
+		if ( (i%10)==0) printk("\n");
+	}
+	printk("\n");
+}
+
+
+/*
+ * Function used to transmit of frame, very last one on the path before
+ * going to the simulator.
+ */
+static int
+simeth_tx(struct sk_buff *skb, struct net_device *dev)
+{
+	struct simeth_local *local = dev->priv;
+
+#if 0
+	/* ensure we have at least ETH_ZLEN bytes (min frame size) */
+	unsigned int length = ETH_ZLEN < skb->len ? skb->len : ETH_ZLEN;
+	/* Where do the extra padding bytes comes from inthe skbuff ? */
+#else
+	/* the real driver in the host system is going to take care of that
+	 * or maybe it's the NIC itself.
+	 */
+	unsigned int length = skb->len;
+#endif
+
+	local->stats.tx_bytes += skb->len;
+	local->stats.tx_packets++;
+
+
+	if (simeth_debug > 5) frame_print("simeth_tx", skb->data, length);
+
+	netdev_send(local->simfd, skb->data, length);
+
+	/*
+	 * we are synchronous on write, so we don't simulate a
+	 * trasnmit complete interrupt, thus we don't need to arm a tx
+	 */
+
+	dev_kfree_skb(skb);
+	return 0;
+}
+
+static inline struct sk_buff *
+make_new_skb(struct net_device *dev)
+{
+	struct sk_buff *nskb;
+
+	/*
+	 * The +2 is used to make sure that the IP header is nicely
+	 * aligned (on 4byte boundary I assume 14+2=16)
+	 */
+	nskb = dev_alloc_skb(SIMETH_FRAME_SIZE + 2);
+	if ( nskb == NULL ) {
+		printk(KERN_NOTICE "%s: memory squeeze. dropping packet.\n", dev->name);
+		return NULL;
+	}
+	nskb->dev = dev;
+
+	skb_reserve(nskb, 2);	/* Align IP on 16 byte boundaries */
+
+	skb_put(nskb,SIMETH_FRAME_SIZE);
+
+	return nskb;
+}
+
+/*
+ * called from interrupt handler to process a received frame
+ */
+static int
+simeth_rx(struct net_device *dev)
+{
+	struct simeth_local	*local;
+	struct sk_buff		*skb;
+	int			len;
+	int			rcv_count = SIMETH_RECV_MAX;
+
+	local = dev->priv;
+	/*
+	 * the loop concept has been borrowed from other drivers
+	 * looks to me like it's a throttling thing to avoid pushing to many
+	 * packets at one time into the stack. Making sure we can process them
+	 * upstream and make forward progress overall
+	 */
+	do {
+		if ( (skb=make_new_skb(dev)) == NULL ) {
+			printk(KERN_NOTICE "%s: memory squeeze. dropping packet.\n", dev->name);
+			local->stats.rx_dropped++;
+			return 0;
+		}
+		/*
+		 * Read only one frame at a time
+		 */
+		len = netdev_read(local->simfd, skb->data, SIMETH_FRAME_SIZE);
+		if ( len == 0 ) {
+			if ( simeth_debug > 0 ) printk(KERN_WARNING "%s: count=%d netdev_read=0\n",
+						       dev->name, SIMETH_RECV_MAX-rcv_count);
+			break;
+		}
+#if 0
+		/*
+		 * XXX Fix me
+		 * Should really do a csum+copy here
+		 */
+		memcpy(skb->data, frame, len);
+#endif
+		skb->protocol = eth_type_trans(skb, dev);
+
+		if ( simeth_debug > 6 ) frame_print("simeth_rx", skb->data, len);
+
+		/*
+		 * push the packet up & trigger software interrupt
+		 */
+		netif_rx(skb);
+
+		local->stats.rx_packets++;
+		local->stats.rx_bytes += len;
+
+	} while ( --rcv_count );
+
+	return len; /* 0 = nothing left to read, otherwise, we can try again */
+}
+
+/*
+ * Interrupt handler (Yes, we can do it too !!!)
+ */
+static irqreturn_t
+simeth_interrupt(int irq, void *dev_id, struct pt_regs * regs)
+{
+	struct net_device *dev = dev_id;
+
+	if ( dev == NULL ) {
+		printk(KERN_WARNING "simeth: irq %d for unknown device\n", irq);
+		return IRQ_NONE;
+	}
+
+	/*
+	 * very simple loop because we get interrupts only when receiving
+	 */
+	while (simeth_rx(dev));
+	return IRQ_HANDLED;
+}
+
+static struct net_device_stats *
+simeth_get_stats(struct net_device *dev)
+{
+	struct simeth_local *local = dev->priv;
+
+	return &local->stats;
+}
+
+/* fake multicast ability */
+static void
+set_multicast_list(struct net_device *dev)
+{
+	printk(KERN_WARNING "%s: set_multicast_list called\n", dev->name);
+}
+
+__initcall(simeth_probe);
--- a/arch/ia64/hp/sim/simscsi.c
+++ b/arch/ia64/hp/sim/simscsi.c
@@ -0,0 +1,404 @@
+/*
+ * Simulated SCSI driver.
+ *
+ * Copyright (C) 1999, 2001-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ *
+ * 02/01/15 David Mosberger	Updated for v2.5.1
+ * 99/12/18 David Mosberger	Added support for READ10/WRITE10 needed by linux v2.3.33
+ */
+#include <linux/blkdev.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/timer.h>
+#include <asm/irq.h>
+
+#include <scsi/scsi.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_host.h>
+
+#define DEBUG_SIMSCSI	0
+
+#define SIMSCSI_REQ_QUEUE_LEN	64
+#define DEFAULT_SIMSCSI_ROOT	"/var/ski-disks/sd"
+
+/* Simulator system calls: */
+
+#define SSC_OPEN			50
+#define SSC_CLOSE			51
+#define SSC_READ			52
+#define SSC_WRITE			53
+#define SSC_GET_COMPLETION		54
+#define SSC_WAIT_COMPLETION		55
+
+#define SSC_WRITE_ACCESS		2
+#define SSC_READ_ACCESS			1
+
+#if DEBUG_SIMSCSI
+  int simscsi_debug;
+# define DBG	simscsi_debug
+#else
+# define DBG	0
+#endif
+
+static struct Scsi_Host *host;
+
+static void simscsi_interrupt (unsigned long val);
+static DECLARE_TASKLET(simscsi_tasklet, simscsi_interrupt, 0);
+
+struct disk_req {
+	unsigned long addr;
+	unsigned len;
+};
+
+struct disk_stat {
+	int fd;
+	unsigned count;
+};
+
+extern long ia64_ssc (long arg0, long arg1, long arg2, long arg3, int nr);
+
+static int desc[16] = {
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
+};
+
+static struct queue_entry {
+	struct scsi_cmnd *sc;
+} queue[SIMSCSI_REQ_QUEUE_LEN];
+
+static int rd, wr;
+static atomic_t num_reqs = ATOMIC_INIT(0);
+
+/* base name for default disks */
+static char *simscsi_root = DEFAULT_SIMSCSI_ROOT;
+
+#define MAX_ROOT_LEN	128
+
+/*
+ * used to setup a new base for disk images
+ * to use /foo/bar/disk[a-z] as disk images
+ * you have to specify simscsi=/foo/bar/disk on the command line
+ */
+static int __init
+simscsi_setup (char *s)
+{
+	/* XXX Fix me we may need to strcpy() ? */
+	if (strlen(s) > MAX_ROOT_LEN) {
+		printk(KERN_ERR "simscsi_setup: prefix too long---using default %s\n",
+		       simscsi_root);
+	}
+	simscsi_root = s;
+	return 1;
+}
+
+__setup("simscsi=", simscsi_setup);
+
+static void
+simscsi_interrupt (unsigned long val)
+{
+	struct scsi_cmnd *sc;
+
+	while ((sc = queue[rd].sc) != 0) {
+		atomic_dec(&num_reqs);
+		queue[rd].sc = 0;
+		if (DBG)
+			printk("simscsi_interrupt: done with %ld\n", sc->serial_number);
+		(*sc->scsi_done)(sc);
+		rd = (rd + 1) % SIMSCSI_REQ_QUEUE_LEN;
+	}
+}
+
+static int
+simscsi_biosparam (struct scsi_device *sdev, struct block_device *n,
+		sector_t capacity, int ip[])
+{
+	ip[0] = 64;		/* heads */
+	ip[1] = 32;		/* sectors */
+	ip[2] = capacity >> 11;	/* cylinders */
+	return 0;
+}
+
+static void
+simscsi_readwrite (struct scsi_cmnd *sc, int mode, unsigned long offset, unsigned long len)
+{
+	struct disk_stat stat;
+	struct disk_req req;
+
+	req.addr = __pa(sc->request_buffer);
+	req.len  = len;			/* # of bytes to transfer */
+
+	if (sc->request_bufflen < req.len)
+		return;
+
+	stat.fd = desc[sc->device->id];
+	if (DBG)
+		printk("simscsi_%s @ %lx (off %lx)\n",
+		       mode == SSC_READ ? "read":"write", req.addr, offset);
+	ia64_ssc(stat.fd, 1, __pa(&req), offset, mode);
+	ia64_ssc(__pa(&stat), 0, 0, 0, SSC_WAIT_COMPLETION);
+
+	if (stat.count == req.len) {
+		sc->result = GOOD;
+	} else {
+		sc->result = DID_ERROR << 16;
+	}
+}
+
+static void
+simscsi_sg_readwrite (struct scsi_cmnd *sc, int mode, unsigned long offset)
+{
+	int list_len = sc->use_sg;
+	struct scatterlist *sl = (struct scatterlist *)sc->buffer;
+	struct disk_stat stat;
+	struct disk_req req;
+
+	stat.fd = desc[sc->device->id];
+
+	while (list_len) {
+		req.addr = __pa(page_address(sl->page) + sl->offset);
+		req.len  = sl->length;
+		if (DBG)
+			printk("simscsi_sg_%s @ %lx (off %lx) use_sg=%d len=%d\n",
+			       mode == SSC_READ ? "read":"write", req.addr, offset,
+			       list_len, sl->length);
+		ia64_ssc(stat.fd, 1, __pa(&req), offset, mode);
+		ia64_ssc(__pa(&stat), 0, 0, 0, SSC_WAIT_COMPLETION);
+
+		/* should not happen in our case */
+		if (stat.count != req.len) {
+			sc->result = DID_ERROR << 16;
+			return;
+		}
+		offset +=  sl->length;
+		sl++;
+		list_len--;
+	}
+	sc->result = GOOD;
+}
+
+/*
+ * function handling both READ_6/WRITE_6 (non-scatter/gather mode)
+ * commands.
+ * Added 02/26/99 S.Eranian
+ */
+static void
+simscsi_readwrite6 (struct scsi_cmnd *sc, int mode)
+{
+	unsigned long offset;
+
+	offset = (((sc->cmnd[1] & 0x1f) << 16) | (sc->cmnd[2] << 8) | sc->cmnd[3])*512;
+	if (sc->use_sg > 0)
+		simscsi_sg_readwrite(sc, mode, offset);
+	else
+		simscsi_readwrite(sc, mode, offset, sc->cmnd[4]*512);
+}
+
+static size_t
+simscsi_get_disk_size (int fd)
+{
+	struct disk_stat stat;
+	size_t bit, sectors = 0;
+	struct disk_req req;
+	char buf[512];
+
+	/*
+	 * This is a bit kludgey: the simulator doesn't provide a direct way of determining
+	 * the disk size, so we do a binary search, assuming a maximum disk size of 4GB.
+	 */
+	for (bit = (4UL << 30)/512; bit != 0; bit >>= 1) {
+		req.addr = __pa(&buf);
+		req.len = sizeof(buf);
+		ia64_ssc(fd, 1, __pa(&req), ((sectors | bit) - 1)*512, SSC_READ);
+		stat.fd = fd;
+		ia64_ssc(__pa(&stat), 0, 0, 0, SSC_WAIT_COMPLETION);
+		if (stat.count == sizeof(buf))
+			sectors |= bit;
+	}
+	return sectors - 1;	/* return last valid sector number */
+}
+
+static void
+simscsi_readwrite10 (struct scsi_cmnd *sc, int mode)
+{
+	unsigned long offset;
+
+	offset = (  (sc->cmnd[2] << 24) | (sc->cmnd[3] << 16)
+		  | (sc->cmnd[4] <<  8) | (sc->cmnd[5] <<  0))*512;
+	if (sc->use_sg > 0)
+		simscsi_sg_readwrite(sc, mode, offset);
+	else
+		simscsi_readwrite(sc, mode, offset, ((sc->cmnd[7] << 8) | sc->cmnd[8])*512);
+}
+
+static int
+simscsi_queuecommand (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
+{
+	unsigned int target_id = sc->device->id;
+	char fname[MAX_ROOT_LEN+16];
+	size_t disk_size;
+	char *buf;
+#if DEBUG_SIMSCSI
+	register long sp asm ("sp");
+
+	if (DBG)
+		printk("simscsi_queuecommand: target=%d,cmnd=%u,sc=%lu,sp=%lx,done=%p\n",
+		       target_id, sc->cmnd[0], sc->serial_number, sp, done);
+#endif
+
+	sc->result = DID_BAD_TARGET << 16;
+	sc->scsi_done = done;
+	if (target_id <= 15 && sc->device->lun == 0) {
+		switch (sc->cmnd[0]) {
+		      case INQUIRY:
+			if (sc->request_bufflen < 35) {
+				break;
+			}
+			sprintf (fname, "%s%c", simscsi_root, 'a' + target_id);
+			desc[target_id] = ia64_ssc(__pa(fname), SSC_READ_ACCESS|SSC_WRITE_ACCESS,
+						   0, 0, SSC_OPEN);
+			if (desc[target_id] < 0) {
+				/* disk doesn't exist... */
+				break;
+			}
+			buf = sc->request_buffer;
+			buf[0] = 0;	/* magnetic disk */
+			buf[1] = 0;	/* not a removable medium */
+			buf[2] = 2;	/* SCSI-2 compliant device */
+			buf[3] = 2;	/* SCSI-2 response data format */
+			buf[4] = 31;	/* additional length (bytes) */
+			buf[5] = 0;	/* reserved */
+			buf[6] = 0;	/* reserved */
+			buf[7] = 0;	/* various flags */
+			memcpy(buf + 8, "HP      SIMULATED DISK  0.00",  28);
+			sc->result = GOOD;
+			break;
+
+		      case TEST_UNIT_READY:
+			sc->result = GOOD;
+			break;
+
+		      case READ_6:
+			if (desc[target_id] < 0 )
+				break;
+			simscsi_readwrite6(sc, SSC_READ);
+			break;
+
+		      case READ_10:
+			if (desc[target_id] < 0 )
+				break;
+			simscsi_readwrite10(sc, SSC_READ);
+			break;
+
+		      case WRITE_6:
+			if (desc[target_id] < 0)
+				break;
+			simscsi_readwrite6(sc, SSC_WRITE);
+			break;
+
+		      case WRITE_10:
+			if (desc[target_id] < 0)
+				break;
+			simscsi_readwrite10(sc, SSC_WRITE);
+			break;
+
+
+		      case READ_CAPACITY:
+			if (desc[target_id] < 0 || sc->request_bufflen < 8) {
+				break;
+			}
+			buf = sc->request_buffer;
+
+			disk_size = simscsi_get_disk_size(desc[target_id]);
+
+			/* pretend to be a 1GB disk (partition table contains real stuff): */
+			buf[0] = (disk_size >> 24) & 0xff;
+			buf[1] = (disk_size >> 16) & 0xff;
+			buf[2] = (disk_size >>  8) & 0xff;
+			buf[3] = (disk_size >>  0) & 0xff;
+			/* set block size of 512 bytes: */
+			buf[4] = 0;
+			buf[5] = 0;
+			buf[6] = 2;
+			buf[7] = 0;
+			sc->result = GOOD;
+			break;
+
+		      case MODE_SENSE:
+		      case MODE_SENSE_10:
+			/* sd.c uses this to determine whether disk does write-caching. */
+			memset(sc->request_buffer, 0, 128);
+			sc->result = GOOD;
+			break;
+
+		      case START_STOP:
+			printk(KERN_ERR "START_STOP\n");
+			break;
+
+		      default:
+			panic("simscsi: unknown SCSI command %u\n", sc->cmnd[0]);
+		}
+	}
+	if (sc->result == DID_BAD_TARGET) {
+		sc->result |= DRIVER_SENSE << 24;
+		sc->sense_buffer[0] = 0x70;
+		sc->sense_buffer[2] = 0x00;
+	}
+	if (atomic_read(&num_reqs) >= SIMSCSI_REQ_QUEUE_LEN) {
+		panic("Attempt to queue command while command is pending!!");
+	}
+	atomic_inc(&num_reqs);
+	queue[wr].sc = sc;
+	wr = (wr + 1) % SIMSCSI_REQ_QUEUE_LEN;
+
+	tasklet_schedule(&simscsi_tasklet);
+	return 0;
+}
+
+static int
+simscsi_host_reset (struct scsi_cmnd *sc)
+{
+	printk(KERN_ERR "simscsi_host_reset: not implemented\n");
+	return 0;
+}
+
+static struct scsi_host_template driver_template = {
+	.name			= "simulated SCSI host adapter",
+	.proc_name		= "simscsi",
+	.queuecommand		= simscsi_queuecommand,
+	.eh_host_reset_handler	= simscsi_host_reset,
+	.bios_param		= simscsi_biosparam,
+	.can_queue		= SIMSCSI_REQ_QUEUE_LEN,
+	.this_id		= -1,
+	.sg_tablesize		= SG_ALL,
+	.max_sectors		= 1024,
+	.cmd_per_lun		= SIMSCSI_REQ_QUEUE_LEN,
+	.use_clustering		= DISABLE_CLUSTERING,
+};
+
+static int __init
+simscsi_init(void)
+{
+	int error;
+
+	host = scsi_host_alloc(&driver_template, 0);
+	if (!host)
+		return -ENOMEM;
+
+	error = scsi_add_host(host, NULL);
+	if (!error)
+		scsi_scan_host(host);
+	return error;
+}
+
+static void __exit
+simscsi_exit(void)
+{
+	scsi_remove_host(host);
+	scsi_host_put(host);
+}
+
+module_init(simscsi_init);
+module_exit(simscsi_exit);
--- a/arch/ia64/hp/sim/simserial.c
+++ b/arch/ia64/hp/sim/simserial.c
--- a/arch/ia64/hp/zx1/Makefile
+++ b/arch/ia64/hp/zx1/Makefile
@@ -0,0 +1,8 @@
+#
+# ia64/hp/zx1/Makefile
+#
+# Copyright (C) 2002 Hewlett Packard
+# Copyright (C) Alex Williamson (alex_williamson@hp.com)
+#
+
+obj-$(CONFIG_IA64_GENERIC) += hpzx1_machvec.o hpzx1_swiotlb_machvec.o
--- a/arch/ia64/hp/zx1/hpzx1_machvec.c
+++ b/arch/ia64/hp/zx1/hpzx1_machvec.c
@@ -0,0 +1,3 @@
+#define MACHVEC_PLATFORM_NAME		hpzx1
+#define MACHVEC_PLATFORM_HEADER		<asm/machvec_hpzx1.h>
+#include <asm/machvec_init.h>
--- a/arch/ia64/hp/zx1/hpzx1_swiotlb_machvec.c
+++ b/arch/ia64/hp/zx1/hpzx1_swiotlb_machvec.c
@@ -0,0 +1,3 @@
+#define MACHVEC_PLATFORM_NAME		hpzx1_swiotlb
+#define MACHVEC_PLATFORM_HEADER		<asm/machvec_hpzx1_swiotlb.h>
+#include <asm/machvec_init.h>
--- a/arch/ia64/ia32/Makefile
+++ b/arch/ia64/ia32/Makefile
@@ -0,0 +1,12 @@
+#
+# Makefile for the ia32 kernel emulation subsystem.
+#
+
+obj-y := ia32_entry.o sys_ia32.o ia32_ioctl.o ia32_signal.o \
+	 ia32_support.o ia32_traps.o binfmt_elf32.o ia32_ldt.o
+
+CFLAGS_ia32_ioctl.o += -Ifs/
+
+# Don't let GCC uses f16-f31 so that save_ia32_fpstate_live() and
+# restore_ia32_fpstate_live() can be sure the live register contain user-level state.
+CFLAGS_ia32_signal.o += -mfixed-range=f16-f31
--- a/arch/ia64/ia32/binfmt_elf32.c
+++ b/arch/ia64/ia32/binfmt_elf32.c
@@ -0,0 +1,294 @@
+/*
+ * IA-32 ELF support.
+ *
+ * Copyright (C) 1999 Arun Sharma <arun.sharma@intel.com>
+ * Copyright (C) 2001 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * 06/16/00	A. Mallick	initialize csd/ssd/tssd/cflg for ia32_load_state
+ * 04/13/01	D. Mosberger	dropped saving tssd in ar.k1---it's not needed
+ * 09/14/01	D. Mosberger	fixed memory management for gdt/tss page
+ */
+#include <linux/config.h>
+
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/security.h>
+
+#include <asm/param.h>
+#include <asm/signal.h>
+
+#include "ia32priv.h"
+#include "elfcore32.h"
+
+/* Override some function names */
+#undef start_thread
+#define start_thread			ia32_start_thread
+#define elf_format			elf32_format
+#define init_elf_binfmt			init_elf32_binfmt
+#define exit_elf_binfmt			exit_elf32_binfmt
+
+#undef CLOCKS_PER_SEC
+#define CLOCKS_PER_SEC	IA32_CLOCKS_PER_SEC
+
+extern void ia64_elf32_init (struct pt_regs *regs);
+
+static void elf32_set_personality (void);
+
+#define setup_arg_pages(bprm,tos,exec)		ia32_setup_arg_pages(bprm,exec)
+#define elf_map				elf32_map
+
+#undef SET_PERSONALITY
+#define SET_PERSONALITY(ex, ibcs2)	elf32_set_personality()
+
+#define elf_read_implies_exec(ex, have_pt_gnu_stack)	(!(have_pt_gnu_stack))
+
+/* Ugly but avoids duplication */
+#include "../../../fs/binfmt_elf.c"
+
+extern struct page *ia32_shared_page[];
+extern unsigned long *ia32_gdt;
+extern struct page *ia32_gate_page;
+
+struct page *
+ia32_install_shared_page (struct vm_area_struct *vma, unsigned long address, int *type)
+{
+	struct page *pg = ia32_shared_page[smp_processor_id()];
+	get_page(pg);
+	if (type)
+		*type = VM_FAULT_MINOR;
+	return pg;
+}
+
+struct page *
+ia32_install_gate_page (struct vm_area_struct *vma, unsigned long address, int *type)
+{
+	struct page *pg = ia32_gate_page;
+	get_page(pg);
+	if (type)
+		*type = VM_FAULT_MINOR;
+	return pg;
+}
+
+
+static struct vm_operations_struct ia32_shared_page_vm_ops = {
+	.nopage = ia32_install_shared_page
+};
+
+static struct vm_operations_struct ia32_gate_page_vm_ops = {
+	.nopage = ia32_install_gate_page
+};
+
+void
+ia64_elf32_init (struct pt_regs *regs)
+{
+	struct vm_area_struct *vma;
+
+	/*
+	 * Map GDT below 4GB, where the processor can find it.  We need to map
+	 * it with privilege level 3 because the IVE uses non-privileged accesses to these
+	 * tables.  IA-32 segmentation is used to protect against IA-32 accesses to them.
+	 */
+	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+	if (vma) {
+		memset(vma, 0, sizeof(*vma));
+		vma->vm_mm = current->mm;
+		vma->vm_start = IA32_GDT_OFFSET;
+		vma->vm_end = vma->vm_start + PAGE_SIZE;
+		vma->vm_page_prot = PAGE_SHARED;
+		vma->vm_flags = VM_READ|VM_MAYREAD|VM_RESERVED;
+		vma->vm_ops = &ia32_shared_page_vm_ops;
+		down_write(&current->mm->mmap_sem);
+		{
+			if (insert_vm_struct(current->mm, vma)) {
+				kmem_cache_free(vm_area_cachep, vma);
+				up_write(&current->mm->mmap_sem);
+				BUG();
+			}
+		}
+		up_write(&current->mm->mmap_sem);
+	}
+
+	/*
+	 * When user stack is not executable, push sigreturn code to stack makes
+	 * segmentation fault raised when returning to kernel. So now sigreturn
+	 * code is locked in specific gate page, which is pointed by pretcode
+	 * when setup_frame_ia32
+	 */
+	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+	if (vma) {
+		memset(vma, 0, sizeof(*vma));
+		vma->vm_mm = current->mm;
+		vma->vm_start = IA32_GATE_OFFSET;
+		vma->vm_end = vma->vm_start + PAGE_SIZE;
+		vma->vm_page_prot = PAGE_COPY_EXEC;
+		vma->vm_flags = VM_READ | VM_MAYREAD | VM_EXEC
+				| VM_MAYEXEC | VM_RESERVED;
+		vma->vm_ops = &ia32_gate_page_vm_ops;
+		down_write(&current->mm->mmap_sem);
+		{
+			if (insert_vm_struct(current->mm, vma)) {
+				kmem_cache_free(vm_area_cachep, vma);
+				up_write(&current->mm->mmap_sem);
+				BUG();
+			}
+		}
+		up_write(&current->mm->mmap_sem);
+	}
+
+	/*
+	 * Install LDT as anonymous memory.  This gives us all-zero segment descriptors
+	 * until a task modifies them via modify_ldt().
+	 */
+	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+	if (vma) {
+		memset(vma, 0, sizeof(*vma));
+		vma->vm_mm = current->mm;
+		vma->vm_start = IA32_LDT_OFFSET;
+		vma->vm_end = vma->vm_start + PAGE_ALIGN(IA32_LDT_ENTRIES*IA32_LDT_ENTRY_SIZE);
+		vma->vm_page_prot = PAGE_SHARED;
+		vma->vm_flags = VM_READ|VM_WRITE|VM_MAYREAD|VM_MAYWRITE;
+		down_write(&current->mm->mmap_sem);
+		{
+			if (insert_vm_struct(current->mm, vma)) {
+				kmem_cache_free(vm_area_cachep, vma);
+				up_write(&current->mm->mmap_sem);
+				BUG();
+			}
+		}
+		up_write(&current->mm->mmap_sem);
+	}
+
+	ia64_psr(regs)->ac = 0;		/* turn off alignment checking */
+	regs->loadrs = 0;
+	/*
+	 *  According to the ABI %edx points to an `atexit' handler.  Since we don't have
+	 *  one we'll set it to 0 and initialize all the other registers just to make
+	 *  things more deterministic, ala the i386 implementation.
+	 */
+	regs->r8 = 0;	/* %eax */
+	regs->r11 = 0;	/* %ebx */
+	regs->r9 = 0;	/* %ecx */
+	regs->r10 = 0;	/* %edx */
+	regs->r13 = 0;	/* %ebp */
+	regs->r14 = 0;	/* %esi */
+	regs->r15 = 0;	/* %edi */
+
+	current->thread.eflag = IA32_EFLAG;
+	current->thread.fsr = IA32_FSR_DEFAULT;
+	current->thread.fcr = IA32_FCR_DEFAULT;
+	current->thread.fir = 0;
+	current->thread.fdr = 0;
+
+	/*
+	 * Setup GDTD.  Note: GDTD is the descrambled version of the pseudo-descriptor
+	 * format defined by Figure 3-11 "Pseudo-Descriptor Format" in the IA-32
+	 * architecture manual. Also note that the only fields that are not ignored are
+	 * `base', `limit', 'G', `P' (must be 1) and `S' (must be 0).
+	 */
+	regs->r31 = IA32_SEG_UNSCRAMBLE(IA32_SEG_DESCRIPTOR(IA32_GDT_OFFSET, IA32_PAGE_SIZE - 1,
+							    0, 0, 0, 1, 0, 0, 0));
+	/* Setup the segment selectors */
+	regs->r16 = (__USER_DS << 16) | __USER_DS; /* ES == DS, GS, FS are zero */
+	regs->r17 = (__USER_DS << 16) | __USER_CS; /* SS, CS; ia32_load_state() sets TSS and LDT */
+
+	ia32_load_segment_descriptors(current);
+	ia32_load_state(current);
+}
+
+int
+ia32_setup_arg_pages (struct linux_binprm *bprm, int executable_stack)
+{
+	unsigned long stack_base;
+	struct vm_area_struct *mpnt;
+	struct mm_struct *mm = current->mm;
+	int i, ret;
+
+	stack_base = IA32_STACK_TOP - MAX_ARG_PAGES*PAGE_SIZE;
+	mm->arg_start = bprm->p + stack_base;
+
+	bprm->p += stack_base;
+	if (bprm->loader)
+		bprm->loader += stack_base;
+	bprm->exec += stack_base;
+
+	mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+	if (!mpnt)
+		return -ENOMEM;
+
+	if (security_vm_enough_memory((IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))
+				      >> PAGE_SHIFT)) {
+		kmem_cache_free(vm_area_cachep, mpnt);
+		return -ENOMEM;
+	}
+
+	memset(mpnt, 0, sizeof(*mpnt));
+
+	down_write(&current->mm->mmap_sem);
+	{
+		mpnt->vm_mm = current->mm;
+		mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p;
+		mpnt->vm_end = IA32_STACK_TOP;
+		if (executable_stack == EXSTACK_ENABLE_X)
+			mpnt->vm_flags = VM_STACK_FLAGS |  VM_EXEC;
+		else if (executable_stack == EXSTACK_DISABLE_X)
+			mpnt->vm_flags = VM_STACK_FLAGS & ~VM_EXEC;
+		else
+			mpnt->vm_flags = VM_STACK_FLAGS;
+		mpnt->vm_page_prot = (mpnt->vm_flags & VM_EXEC)?
+					PAGE_COPY_EXEC: PAGE_COPY;
+		if ((ret = insert_vm_struct(current->mm, mpnt))) {
+			up_write(&current->mm->mmap_sem);
+			kmem_cache_free(vm_area_cachep, mpnt);
+			return ret;
+		}
+		current->mm->stack_vm = current->mm->total_vm = vma_pages(mpnt);
+	}
+
+	for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
+		struct page *page = bprm->page[i];
+		if (page) {
+			bprm->page[i] = NULL;
+			install_arg_page(mpnt, page, stack_base);
+		}
+		stack_base += PAGE_SIZE;
+	}
+	up_write(&current->mm->mmap_sem);
+
+	/* Can't do it in ia64_elf32_init(). Needs to be done before calls to
+	   elf32_map() */
+	current->thread.ppl = ia32_init_pp_list();
+
+	return 0;
+}
+
+static void
+elf32_set_personality (void)
+{
+	set_personality(PER_LINUX32);
+	current->thread.map_base  = IA32_PAGE_OFFSET/3;
+	current->thread.task_size = IA32_PAGE_OFFSET;	/* use what Linux/x86 uses... */
+	set_fs(USER_DS);				/* set addr limit for new TASK_SIZE */
+}
+
+static unsigned long
+elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type)
+{
+	unsigned long pgoff = (eppnt->p_vaddr) & ~IA32_PAGE_MASK;
+
+	return ia32_do_mmap(filep, (addr & IA32_PAGE_MASK), eppnt->p_filesz + pgoff, prot, type,
+			    eppnt->p_offset - pgoff);
+}
+
+#define cpu_uses_ia32el()	(local_cpu_data->family > 0x1f)
+
+static int __init check_elf32_binfmt(void)
+{
+	if (cpu_uses_ia32el()) {
+		printk("Please use IA-32 EL for executing IA-32 binaries\n");
+		return unregister_binfmt(&elf_format);
+	}
+	return 0;
+}
+
+module_init(check_elf32_binfmt)
--- a/arch/ia64/ia32/elfcore32.h
+++ b/arch/ia64/ia32/elfcore32.h
@@ -0,0 +1,138 @@
+/*
+ * IA-32 ELF core dump support.
+ *
+ * Copyright (C) 2003 Arun Sharma <arun.sharma@intel.com>
+ *
+ * Derived from the x86_64 version
+ */
+#ifndef _ELFCORE32_H_
+#define _ELFCORE32_H_
+
+#include <asm/intrinsics.h>
+#include <asm/uaccess.h>
+
+#define USE_ELF_CORE_DUMP 1
+
+/* Override elfcore.h */
+#define _LINUX_ELFCORE_H 1
+typedef unsigned int elf_greg_t;
+
+#define ELF_NGREG (sizeof (struct user_regs_struct32) / sizeof(elf_greg_t))
+typedef elf_greg_t elf_gregset_t[ELF_NGREG];
+
+typedef struct ia32_user_i387_struct elf_fpregset_t;
+typedef struct ia32_user_fxsr_struct elf_fpxregset_t;
+
+struct elf_siginfo
+{
+	int	si_signo;			/* signal number */
+	int	si_code;			/* extra code */
+	int	si_errno;			/* errno */
+};
+
+#define jiffies_to_timeval(a,b) do { (b)->tv_usec = 0; (b)->tv_sec = (a)/HZ; }while(0)
+
+struct elf_prstatus
+{
+	struct elf_siginfo pr_info;	/* Info associated with signal */
+	short	pr_cursig;		/* Current signal */
+	unsigned int pr_sigpend;	/* Set of pending signals */
+	unsigned int pr_sighold;	/* Set of held signals */
+	pid_t	pr_pid;
+	pid_t	pr_ppid;
+	pid_t	pr_pgrp;
+	pid_t	pr_sid;
+	struct compat_timeval pr_utime;	/* User time */
+	struct compat_timeval pr_stime;	/* System time */
+	struct compat_timeval pr_cutime;	/* Cumulative user time */
+	struct compat_timeval pr_cstime;	/* Cumulative system time */
+	elf_gregset_t pr_reg;	/* GP registers */
+	int pr_fpvalid;		/* True if math co-processor being used.  */
+};
+
+#define ELF_PRARGSZ	(80)	/* Number of chars for args */
+
+struct elf_prpsinfo
+{
+	char	pr_state;	/* numeric process state */
+	char	pr_sname;	/* char for pr_state */
+	char	pr_zomb;	/* zombie */
+	char	pr_nice;	/* nice val */
+	unsigned int pr_flag;	/* flags */
+	__u16	pr_uid;
+	__u16	pr_gid;
+	pid_t	pr_pid, pr_ppid, pr_pgrp, pr_sid;
+	/* Lots missing */
+	char	pr_fname[16];	/* filename of executable */
+	char	pr_psargs[ELF_PRARGSZ];	/* initial part of arg list */
+};
+
+#define ELF_CORE_COPY_REGS(pr_reg, regs)       		\
+	pr_reg[0] = regs->r11;				\
+	pr_reg[1] = regs->r9;				\
+	pr_reg[2] = regs->r10;				\
+	pr_reg[3] = regs->r14;				\
+	pr_reg[4] = regs->r15;				\
+	pr_reg[5] = regs->r13;				\
+	pr_reg[6] = regs->r8;				\
+	pr_reg[7] = regs->r16 & 0xffff;			\
+	pr_reg[8] = (regs->r16 >> 16) & 0xffff;		\
+	pr_reg[9] = (regs->r16 >> 32) & 0xffff;		\
+	pr_reg[10] = (regs->r16 >> 48) & 0xffff;	\
+	pr_reg[11] = regs->r1; 				\
+	pr_reg[12] = regs->cr_iip;			\
+	pr_reg[13] = regs->r17 & 0xffff;		\
+	pr_reg[14] = ia64_getreg(_IA64_REG_AR_EFLAG);	\
+	pr_reg[15] = regs->r12;				\
+	pr_reg[16] = (regs->r17 >> 16) & 0xffff;
+
+static inline void elf_core_copy_regs(elf_gregset_t *elfregs,
+				      struct pt_regs *regs)
+{
+	ELF_CORE_COPY_REGS((*elfregs), regs)
+}
+
+static inline int elf_core_copy_task_regs(struct task_struct *t,
+					  elf_gregset_t* elfregs)
+{
+	struct pt_regs *pp = ia64_task_regs(t);
+	ELF_CORE_COPY_REGS((*elfregs), pp);
+	return 1;
+}
+
+static inline int
+elf_core_copy_task_fpregs(struct task_struct *tsk, struct pt_regs *regs, elf_fpregset_t *fpu)
+{
+	struct ia32_user_i387_struct *fpstate = (void*)fpu;
+	mm_segment_t old_fs;
+
+	if (!tsk_used_math(tsk))
+		return 0;
+	
+	old_fs = get_fs();
+	set_fs(KERNEL_DS);
+	save_ia32_fpstate(tsk, (struct ia32_user_i387_struct __user *) fpstate);
+	set_fs(old_fs);
+
+	return 1;
+}
+
+#define ELF_CORE_COPY_XFPREGS 1
+static inline int
+elf_core_copy_task_xfpregs(struct task_struct *tsk, elf_fpxregset_t *xfpu)
+{
+	struct ia32_user_fxsr_struct *fpxstate = (void*) xfpu;
+	mm_segment_t old_fs;
+
+	if (!tsk_used_math(tsk))
+		return 0;
+
+	old_fs = get_fs();
+	set_fs(KERNEL_DS);
+	save_ia32_fpxstate(tsk, (struct ia32_user_fxsr_struct __user *) fpxstate);
+	set_fs(old_fs);
+
+	return 1;
+}
+
+#endif /* _ELFCORE32_H_ */
--- a/arch/ia64/ia32/ia32_entry.S
+++ b/arch/ia64/ia32/ia32_entry.S
@@ -0,0 +1,500 @@
+#include <asm/asmmacro.h>
+#include <asm/ia32.h>
+#include <asm/offsets.h>
+#include <asm/signal.h>
+#include <asm/thread_info.h>
+
+#include "../kernel/minstate.h"
+
+	/*
+	 * execve() is special because in case of success, we need to
+	 * setup a null register window frame (in case an IA-32 process
+	 * is exec'ing an IA-64 program).
+	 */
+ENTRY(ia32_execve)
+	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(3)
+	alloc loc1=ar.pfs,3,2,4,0
+	mov loc0=rp
+	.body
+	zxt4 out0=in0			// filename
+	;;				// stop bit between alloc and call
+	zxt4 out1=in1			// argv
+	zxt4 out2=in2			// envp
+	add out3=16,sp			// regs
+	br.call.sptk.few rp=sys32_execve
+1:	cmp.ge p6,p0=r8,r0
+	mov ar.pfs=loc1			// restore ar.pfs
+	;;
+(p6)	mov ar.pfs=r0			// clear ar.pfs in case of success
+	sxt4 r8=r8			// return 64-bit result
+	mov rp=loc0
+	br.ret.sptk.few rp
+END(ia32_execve)
+
+ENTRY(ia32_clone)
+	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(5)
+	alloc r16=ar.pfs,5,2,6,0
+	DO_SAVE_SWITCH_STACK
+	mov loc0=rp
+	mov loc1=r16				// save ar.pfs across do_fork
+	.body
+	zxt4 out1=in1				// newsp
+	mov out3=16				// stacksize (compensates for 16-byte scratch area)
+	adds out2=IA64_SWITCH_STACK_SIZE+16,sp	// out2 = &regs
+	mov out0=in0				// out0 = clone_flags
+	zxt4 out4=in2				// out4 = parent_tidptr
+	zxt4 out5=in4				// out5 = child_tidptr
+	br.call.sptk.many rp=do_fork
+.ret0:	.restore sp
+	adds sp=IA64_SWITCH_STACK_SIZE,sp	// pop the switch stack
+	mov ar.pfs=loc1
+	mov rp=loc0
+	br.ret.sptk.many rp
+END(ia32_clone)
+
+ENTRY(sys32_rt_sigsuspend)
+	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
+	alloc loc1=ar.pfs,8,2,3,0		// preserve all eight input regs
+	mov loc0=rp
+	mov out0=in0				// mask
+	mov out1=in1				// sigsetsize
+	mov out2=sp				// out2 = &sigscratch
+	.fframe 16
+	adds sp=-16,sp				// allocate dummy "sigscratch"
+	;;
+	.body
+	br.call.sptk.many rp=ia32_rt_sigsuspend
+1:	.restore sp
+	adds sp=16,sp
+	mov rp=loc0
+	mov ar.pfs=loc1
+	br.ret.sptk.many rp
+END(sys32_rt_sigsuspend)
+
+ENTRY(sys32_sigsuspend)
+	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
+	alloc loc1=ar.pfs,8,2,3,0		// preserve all eight input regs
+	mov loc0=rp
+	mov out0=in2				// mask (first two args are ignored)
+	;;
+	mov out1=sp				// out1 = &sigscratch
+	.fframe 16
+	adds sp=-16,sp				// allocate dummy "sigscratch"
+	.body
+	br.call.sptk.many rp=ia32_sigsuspend
+1:	.restore sp
+	adds sp=16,sp
+	mov rp=loc0
+	mov ar.pfs=loc1
+	br.ret.sptk.many rp
+END(sys32_sigsuspend)
+
+GLOBAL_ENTRY(ia32_ret_from_clone)
+	PT_REGS_UNWIND_INFO(0)
+{	/*
+	 * Some versions of gas generate bad unwind info if the first instruction of a
+	 * procedure doesn't go into the first slot of a bundle.  This is a workaround.
+	 */
+	nop.m 0
+	nop.i 0
+	/*
+	 * We need to call schedule_tail() to complete the scheduling process.
+	 * Called by ia64_switch_to after do_fork()->copy_thread().  r8 contains the
+	 * address of the previously executing task.
+	 */
+	br.call.sptk.many rp=ia64_invoke_schedule_tail
+}
+.ret1:
+	adds r2=TI_FLAGS+IA64_TASK_SIZE,r13
+	;;
+	ld4 r2=[r2]
+	;;
+	mov r8=0
+	and r2=_TIF_SYSCALL_TRACEAUDIT,r2
+	;;
+	cmp.ne p6,p0=r2,r0
+(p6)	br.cond.spnt .ia32_strace_check_retval
+	;;					// prevent RAW on r8
+END(ia32_ret_from_clone)
+	// fall thrugh
+GLOBAL_ENTRY(ia32_ret_from_syscall)
+	PT_REGS_UNWIND_INFO(0)
+
+	cmp.ge p6,p7=r8,r0                      // syscall executed successfully?
+	adds r2=IA64_PT_REGS_R8_OFFSET+16,sp    // r2 = &pt_regs.r8
+	;;
+	alloc r3=ar.pfs,0,0,0,0			// drop the syscall argument frame
+	st8 [r2]=r8                             // store return value in slot for r8
+	br.cond.sptk.many ia64_leave_kernel
+END(ia32_ret_from_syscall)
+
+	//
+	// Invoke a system call, but do some tracing before and after the call.
+	// We MUST preserve the current register frame throughout this routine
+	// because some system calls (such as ia64_execve) directly
+	// manipulate ar.pfs.
+	//
+	// Input:
+	//	r8 = syscall number
+	//	b6 = syscall entry point
+	//
+GLOBAL_ENTRY(ia32_trace_syscall)
+	PT_REGS_UNWIND_INFO(0)
+	mov r3=-38
+	adds r2=IA64_PT_REGS_R8_OFFSET+16,sp
+	;;
+	st8 [r2]=r3				// initialize return code to -ENOSYS
+	br.call.sptk.few rp=syscall_trace_enter	// give parent a chance to catch syscall args
+.ret2:	// Need to reload arguments (they may be changed by the tracing process)
+	adds r2=IA64_PT_REGS_R1_OFFSET+16,sp	// r2 = &pt_regs.r1
+	adds r3=IA64_PT_REGS_R13_OFFSET+16,sp	// r3 = &pt_regs.r13
+	mov r15=IA32_NR_syscalls
+	;;
+	ld4 r8=[r2],IA64_PT_REGS_R9_OFFSET-IA64_PT_REGS_R1_OFFSET
+	movl r16=ia32_syscall_table
+	;;
+	ld4 r33=[r2],8				// r9 == ecx
+	ld4 r37=[r3],16				// r13 == ebp
+	cmp.ltu.unc p6,p7=r8,r15
+	;;
+	ld4 r34=[r2],8				// r10 == edx
+	ld4 r36=[r3],8				// r15 == edi
+(p6)	shladd r16=r8,3,r16	// force ni_syscall if not valid syscall number
+	;;
+	ld8 r16=[r16]
+	;;
+	ld4 r32=[r2],8				// r11 == ebx
+	mov b6=r16
+	ld4 r35=[r3],8				// r14 == esi
+	br.call.sptk.few rp=b6			// do the syscall
+.ia32_strace_check_retval:
+	cmp.lt p6,p0=r8,r0			// syscall failed?
+	adds r2=IA64_PT_REGS_R8_OFFSET+16,sp	// r2 = &pt_regs.r8
+	;;
+	st8.spill [r2]=r8			// store return value in slot for r8
+	br.call.sptk.few rp=syscall_trace_leave	// give parent a chance to catch return value
+.ret4:	alloc r2=ar.pfs,0,0,0,0			// drop the syscall argument frame
+	br.cond.sptk.many ia64_leave_kernel
+END(ia32_trace_syscall)
+
+GLOBAL_ENTRY(sys32_vfork)
+	alloc r16=ar.pfs,2,2,4,0;;
+	mov out0=IA64_CLONE_VFORK|IA64_CLONE_VM|SIGCHLD	// out0 = clone_flags
+	br.cond.sptk.few .fork1			// do the work
+END(sys32_vfork)
+
+GLOBAL_ENTRY(sys32_fork)
+	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2)
+	alloc r16=ar.pfs,2,2,4,0
+	mov out0=SIGCHLD			// out0 = clone_flags
+	;;
+.fork1:
+	mov loc0=rp
+	mov loc1=r16				// save ar.pfs across do_fork
+	DO_SAVE_SWITCH_STACK
+
+	.body
+
+	mov out1=0
+	mov out3=0
+	adds out2=IA64_SWITCH_STACK_SIZE+16,sp	// out2 = &regs
+	br.call.sptk.few rp=do_fork
+.ret5:	.restore sp
+	adds sp=IA64_SWITCH_STACK_SIZE,sp	// pop the switch stack
+	mov ar.pfs=loc1
+	mov rp=loc0
+	br.ret.sptk.many rp
+END(sys32_fork)
+
+	.rodata
+	.align 8
+	.globl ia32_syscall_table
+ia32_syscall_table:
+	data8 sys_ni_syscall	  /* 0	-  old "setup(" system call*/
+	data8 sys_exit
+	data8 sys32_fork
+	data8 sys_read
+	data8 sys_write
+	data8 sys32_open	  /* 5 */
+	data8 sys_close
+	data8 sys32_waitpid
+	data8 sys_creat
+	data8 sys_link
+	data8 sys_unlink	  /* 10 */
+	data8 ia32_execve
+	data8 sys_chdir
+	data8 compat_sys_time
+	data8 sys_mknod
+	data8 sys_chmod		  /* 15 */
+	data8 sys_lchown	/* 16-bit version */
+	data8 sys_ni_syscall	  /* old break syscall holder */
+	data8 sys_ni_syscall
+	data8 sys32_lseek
+	data8 sys_getpid	  /* 20 */
+	data8 compat_sys_mount
+	data8 sys_oldumount
+	data8 sys_setuid	/* 16-bit version */
+	data8 sys_getuid	/* 16-bit version */
+	data8 compat_sys_stime    /* 25 */
+	data8 sys32_ptrace
+	data8 sys32_alarm
+	data8 sys_ni_syscall
+	data8 sys32_pause
+	data8 compat_sys_utime	  /* 30 */
+	data8 sys_ni_syscall	  /* old stty syscall holder */
+	data8 sys_ni_syscall	  /* old gtty syscall holder */
+	data8 sys_access
+	data8 sys_nice
+	data8 sys_ni_syscall	  /* 35 */	  /* old ftime syscall holder */
+	data8 sys_sync
+	data8 sys_kill
+	data8 sys_rename
+	data8 sys_mkdir
+	data8 sys_rmdir		  /* 40 */
+	data8 sys_dup
+	data8 sys32_pipe
+	data8 compat_sys_times
+	data8 sys_ni_syscall	  /* old prof syscall holder */
+	data8 sys32_brk		  /* 45 */
+	data8 sys_setgid	/* 16-bit version */
+	data8 sys_getgid	/* 16-bit version */
+	data8 sys32_signal
+	data8 sys_geteuid	/* 16-bit version */
+	data8 sys_getegid	/* 16-bit version */	  /* 50 */
+	data8 sys_acct
+	data8 sys_umount	  /* recycled never used phys( */
+	data8 sys_ni_syscall	  /* old lock syscall holder */
+	data8 compat_sys_ioctl
+	data8 compat_sys_fcntl	  /* 55 */
+	data8 sys_ni_syscall	  /* old mpx syscall holder */
+	data8 sys_setpgid
+	data8 sys_ni_syscall	  /* old ulimit syscall holder */
+	data8 sys_ni_syscall
+	data8 sys_umask		  /* 60 */
+	data8 sys_chroot
+	data8 sys_ustat
+	data8 sys_dup2
+	data8 sys_getppid
+	data8 sys_getpgrp	  /* 65 */
+	data8 sys_setsid
+	data8 sys32_sigaction
+	data8 sys_ni_syscall
+	data8 sys_ni_syscall
+	data8 sys_setreuid	/* 16-bit version */	  /* 70 */
+	data8 sys_setregid	/* 16-bit version */
+	data8 sys32_sigsuspend
+	data8 compat_sys_sigpending
+	data8 sys_sethostname
+	data8 compat_sys_setrlimit	  /* 75 */
+	data8 compat_sys_old_getrlimit
+	data8 compat_sys_getrusage
+	data8 sys32_gettimeofday
+	data8 sys32_settimeofday
+	data8 sys32_getgroups16	  /* 80 */
+	data8 sys32_setgroups16
+	data8 sys32_old_select
+	data8 sys_symlink
+	data8 sys_ni_syscall
+	data8 sys_readlink	  /* 85 */
+	data8 sys_uselib
+	data8 sys_swapon
+	data8 sys_reboot
+	data8 sys32_readdir
+	data8 sys32_mmap	  /* 90 */
+	data8 sys32_munmap
+	data8 sys_truncate
+	data8 sys_ftruncate
+	data8 sys_fchmod
+	data8 sys_fchown	/* 16-bit version */	  /* 95 */
+	data8 sys_getpriority
+	data8 sys_setpriority
+	data8 sys_ni_syscall	  /* old profil syscall holder */
+	data8 compat_sys_statfs
+	data8 compat_sys_fstatfs	  /* 100 */
+	data8 sys_ni_syscall	/* ioperm */
+	data8 compat_sys_socketcall
+	data8 sys_syslog
+	data8 compat_sys_setitimer
+	data8 compat_sys_getitimer	  /* 105 */
+	data8 compat_sys_newstat
+	data8 compat_sys_newlstat
+	data8 compat_sys_newfstat
+	data8 sys_ni_syscall
+	data8 sys_ni_syscall	/* iopl */	/* 110 */
+	data8 sys_vhangup
+	data8 sys_ni_syscall		/* used to be sys_idle */
+	data8 sys_ni_syscall
+	data8 compat_sys_wait4
+	data8 sys_swapoff	  /* 115 */
+	data8 sys32_sysinfo
+	data8 sys32_ipc
+	data8 sys_fsync
+	data8 sys32_sigreturn
+	data8 ia32_clone	  /* 120 */
+	data8 sys_setdomainname
+	data8 sys32_newuname
+	data8 sys32_modify_ldt
+	data8 sys_ni_syscall	/* adjtimex */
+	data8 sys32_mprotect	  /* 125 */
+	data8 compat_sys_sigprocmask
+	data8 sys_ni_syscall	/* create_module */
+	data8 sys_ni_syscall	/* init_module */
+	data8 sys_ni_syscall	/* delete_module */
+	data8 sys_ni_syscall	/* get_kernel_syms */  /* 130 */
+	data8 sys_quotactl
+	data8 sys_getpgid
+	data8 sys_fchdir
+	data8 sys_ni_syscall	/* sys_bdflush */
+	data8 sys_sysfs		/* 135 */
+	data8 sys32_personality
+	data8 sys_ni_syscall	  /* for afs_syscall */
+	data8 sys_setfsuid	/* 16-bit version */
+	data8 sys_setfsgid	/* 16-bit version */
+	data8 sys_llseek	  /* 140 */
+	data8 compat_sys_getdents
+	data8 compat_sys_select
+	data8 sys_flock
+	data8 sys32_msync
+	data8 compat_sys_readv	  /* 145 */
+	data8 compat_sys_writev
+	data8 sys_getsid
+	data8 sys_fdatasync
+	data8 sys32_sysctl
+	data8 sys_mlock		  /* 150 */
+	data8 sys_munlock
+	data8 sys_mlockall
+	data8 sys_munlockall
+	data8 sys_sched_setparam
+	data8 sys_sched_getparam  /* 155 */
+	data8 sys_sched_setscheduler
+	data8 sys_sched_getscheduler
+	data8 sys_sched_yield
+	data8 sys_sched_get_priority_max
+	data8 sys_sched_get_priority_min	 /* 160 */
+	data8 sys32_sched_rr_get_interval
+	data8 compat_sys_nanosleep
+	data8 sys32_mremap
+	data8 sys_setresuid	/* 16-bit version */
+	data8 sys32_getresuid16	/* 16-bit version */	  /* 165 */
+	data8 sys_ni_syscall	/* vm86 */
+	data8 sys_ni_syscall	/* sys_query_module */
+	data8 sys_poll
+	data8 sys_ni_syscall	/* nfsservctl */
+	data8 sys_setresgid	  /* 170 */
+	data8 sys32_getresgid16
+	data8 sys_prctl
+	data8 sys32_rt_sigreturn
+	data8 sys32_rt_sigaction
+	data8 sys32_rt_sigprocmask /* 175 */
+	data8 sys_rt_sigpending
+	data8 compat_sys_rt_sigtimedwait
+	data8 sys32_rt_sigqueueinfo
+	data8 sys32_rt_sigsuspend
+	data8 sys32_pread	  /* 180 */
+	data8 sys32_pwrite
+	data8 sys_chown	/* 16-bit version */
+	data8 sys_getcwd
+	data8 sys_capget
+	data8 sys_capset	  /* 185 */
+	data8 sys32_sigaltstack
+	data8 sys32_sendfile
+	data8 sys_ni_syscall		  /* streams1 */
+	data8 sys_ni_syscall		  /* streams2 */
+	data8 sys32_vfork	  /* 190 */
+	data8 compat_sys_getrlimit
+	data8 sys32_mmap2
+	data8 sys32_truncate64
+	data8 sys32_ftruncate64
+	data8 sys32_stat64	  /* 195 */
+	data8 sys32_lstat64
+	data8 sys32_fstat64
+	data8 sys_lchown
+	data8 sys_getuid
+	data8 sys_getgid	  /* 200 */
+	data8 sys_geteuid
+	data8 sys_getegid
+	data8 sys_setreuid
+	data8 sys_setregid
+	data8 sys_getgroups	  /* 205 */
+	data8 sys_setgroups
+	data8 sys_fchown
+	data8 sys_setresuid
+	data8 sys_getresuid
+	data8 sys_setresgid	  /* 210 */
+	data8 sys_getresgid
+	data8 sys_chown
+	data8 sys_setuid
+	data8 sys_setgid
+	data8 sys_setfsuid	  /* 215 */
+	data8 sys_setfsgid
+	data8 sys_pivot_root
+	data8 sys_mincore
+	data8 sys_madvise
+	data8 compat_sys_getdents64	  /* 220 */
+	data8 compat_sys_fcntl64
+	data8 sys_ni_syscall		/* reserved for TUX */
+	data8 sys_ni_syscall		/* reserved for Security */
+	data8 sys_gettid
+	data8 sys_readahead	  /* 225 */
+ 	data8 sys_setxattr
+ 	data8 sys_lsetxattr
+ 	data8 sys_fsetxattr
+ 	data8 sys_getxattr
+ 	data8 sys_lgetxattr	/* 230 */
+ 	data8 sys_fgetxattr
+ 	data8 sys_listxattr
+ 	data8 sys_llistxattr
+ 	data8 sys_flistxattr
+ 	data8 sys_removexattr	/* 235 */
+ 	data8 sys_lremovexattr
+ 	data8 sys_fremovexattr
+	data8 sys_tkill
+ 	data8 sys_sendfile64
+	data8 compat_sys_futex	/* 240 */
+	data8 compat_sys_sched_setaffinity
+	data8 compat_sys_sched_getaffinity
+	data8 sys32_set_thread_area
+	data8 sys32_get_thread_area
+ 	data8 compat_sys_io_setup	/* 245 */
+ 	data8 sys_io_destroy
+ 	data8 compat_sys_io_getevents
+ 	data8 compat_sys_io_submit
+ 	data8 sys_io_cancel
+ 	data8 sys_fadvise64	/* 250 */
+	data8 sys_ni_syscall
+	data8 sys_exit_group
+ 	data8 sys_lookup_dcookie
+	data8 sys_epoll_create
+	data8 sys32_epoll_ctl	/* 255 */
+	data8 sys32_epoll_wait
+	data8 sys_remap_file_pages
+	data8 sys_set_tid_address
+ 	data8 sys32_timer_create
+ 	data8 compat_sys_timer_settime	/* 260 */
+ 	data8 compat_sys_timer_gettime
+ 	data8 sys_timer_getoverrun
+ 	data8 sys_timer_delete
+ 	data8 compat_sys_clock_settime
+ 	data8 compat_sys_clock_gettime /* 265 */
+ 	data8 compat_sys_clock_getres
+ 	data8 compat_sys_clock_nanosleep
+	data8 compat_sys_statfs64
+	data8 compat_sys_fstatfs64
+ 	data8 sys_tgkill	/* 270 */
+ 	data8 compat_sys_utimes
+ 	data8 sys32_fadvise64_64
+ 	data8 sys_ni_syscall
+  	data8 sys_ni_syscall
+ 	data8 sys_ni_syscall	/* 275 */
+  	data8 sys_ni_syscall
+  	data8 compat_sys_mq_open
+  	data8 sys_mq_unlink
+  	data8 compat_sys_mq_timedsend
+  	data8 compat_sys_mq_timedreceive	/* 280 */
+  	data8 compat_sys_mq_notify
+  	data8 compat_sys_mq_getsetattr
+	data8 sys_ni_syscall		/* reserved for kexec */
+	data8 compat_sys_waitid
+
+	// guard against failures to increase IA32_NR_syscalls
+	.org ia32_syscall_table + 8*IA32_NR_syscalls
--- a/arch/ia64/ia32/ia32_ioctl.c
+++ b/arch/ia64/ia32/ia32_ioctl.c
@@ -0,0 +1,48 @@
+/*
+ * IA32 Architecture-specific ioctl shim code
+ *
+ * Copyright (C) 2000 VA Linux Co
+ * Copyright (C) 2000 Don Dugger <n0ano@valinux.com>
+ * Copyright (C) 2001-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#include <linux/signal.h>	/* argh, msdos_fs.h isn't self-contained... */
+#include <linux/syscalls.h>
+#include "ia32priv.h"
+  
+#define	INCLUDES
+#include "compat_ioctl.c"
+#include <asm/ioctl32.h>
+
+#define IOCTL_NR(a)	((a) & ~(_IOC_SIZEMASK << _IOC_SIZESHIFT))
+
+#define DO_IOCTL(fd, cmd, arg) ({			\
+	int _ret;					\
+	mm_segment_t _old_fs = get_fs();		\
+							\
+	set_fs(KERNEL_DS);				\
+	_ret = sys_ioctl(fd, cmd, (unsigned long)arg);	\
+	set_fs(_old_fs);				\
+	_ret;						\
+})
+
+#define CODE
+#include "compat_ioctl.c"
+
+typedef int (* ioctl32_handler_t)(unsigned int, unsigned int, unsigned long, struct file *);
+
+#define COMPATIBLE_IOCTL(cmd)		HANDLE_IOCTL((cmd),sys_ioctl)
+#define HANDLE_IOCTL(cmd,handler)	{ (cmd), (ioctl32_handler_t)(handler), NULL },
+#define IOCTL_TABLE_START \
+	struct ioctl_trans ioctl_start[] = {
+#define IOCTL_TABLE_END \
+	};
+
+IOCTL_TABLE_START
+#define DECLARES
+#include "compat_ioctl.c"
+#include <linux/compat_ioctl.h>
+IOCTL_TABLE_END
+
+int ioctl_table_size = ARRAY_SIZE(ioctl_start);
--- a/arch/ia64/ia32/ia32_ldt.c
+++ b/arch/ia64/ia32/ia32_ldt.c
@@ -0,0 +1,147 @@
+/*
+ * Copyright (C) 2001, 2004 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * Adapted from arch/i386/kernel/ldt.c
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/vmalloc.h>
+
+#include <asm/uaccess.h>
+
+#include "ia32priv.h"
+
+/*
+ * read_ldt() is not really atomic - this is not a problem since synchronization of reads
+ * and writes done to the LDT has to be assured by user-space anyway. Writes are atomic,
+ * to protect the security checks done on new descriptors.
+ */
+static int
+read_ldt (void __user *ptr, unsigned long bytecount)
+{
+	unsigned long bytes_left, n;
+	char __user *src, *dst;
+	char buf[256];	/* temporary buffer (don't overflow kernel stack!) */
+
+	if (bytecount > IA32_LDT_ENTRIES*IA32_LDT_ENTRY_SIZE)
+		bytecount = IA32_LDT_ENTRIES*IA32_LDT_ENTRY_SIZE;
+
+	bytes_left = bytecount;
+
+	src = (void __user *) IA32_LDT_OFFSET;
+	dst = ptr;
+
+	while (bytes_left) {
+		n = sizeof(buf);
+		if (n > bytes_left)
+			n = bytes_left;
+
+		/*
+		 * We know we're reading valid memory, but we still must guard against
+		 * running out of memory.
+		 */
+		if (__copy_from_user(buf, src, n))
+			return -EFAULT;
+
+		if (copy_to_user(dst, buf, n))
+			return -EFAULT;
+
+		src += n;
+		dst += n;
+		bytes_left -= n;
+	}
+	return bytecount;
+}
+
+static int
+read_default_ldt (void __user * ptr, unsigned long bytecount)
+{
+	unsigned long size;
+	int err;
+
+	/* XXX fix me: should return equivalent of default_ldt[0] */
+	err = 0;
+	size = 8;
+	if (size > bytecount)
+		size = bytecount;
+
+	err = size;
+	if (clear_user(ptr, size))
+		err = -EFAULT;
+
+	return err;
+}
+
+static int
+write_ldt (void __user * ptr, unsigned long bytecount, int oldmode)
+{
+	struct ia32_user_desc ldt_info;
+	__u64 entry;
+	int ret;
+
+	if (bytecount != sizeof(ldt_info))
+		return -EINVAL;
+	if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info)))
+		return -EFAULT;
+
+	if (ldt_info.entry_number >= IA32_LDT_ENTRIES)
+		return -EINVAL;
+	if (ldt_info.contents == 3) {
+		if (oldmode)
+			return -EINVAL;
+		if (ldt_info.seg_not_present == 0)
+			return -EINVAL;
+	}
+
+	if (ldt_info.base_addr == 0 && ldt_info.limit == 0
+	    && (oldmode || (ldt_info.contents == 0 && ldt_info.read_exec_only == 1
+			    && ldt_info.seg_32bit == 0 && ldt_info.limit_in_pages == 0
+			    && ldt_info.seg_not_present == 1 && ldt_info.useable == 0)))
+		/* allow LDTs to be cleared by the user */
+		entry = 0;
+	else
+		/* we must set the "Accessed" bit as IVE doesn't emulate it */
+		entry = IA32_SEG_DESCRIPTOR(ldt_info.base_addr, ldt_info.limit,
+					    (((ldt_info.read_exec_only ^ 1) << 1)
+					     | (ldt_info.contents << 2)) | 1,
+					    1, 3, ldt_info.seg_not_present ^ 1,
+					    (oldmode ? 0 : ldt_info.useable),
+					    ldt_info.seg_32bit,
+					    ldt_info.limit_in_pages);
+	/*
+	 * Install the new entry.  We know we're accessing valid (mapped) user-level
+	 * memory, but we still need to guard against out-of-memory, hence we must use
+	 * put_user().
+	 */
+	ret = __put_user(entry, (__u64 __user *) IA32_LDT_OFFSET + ldt_info.entry_number);
+	ia32_load_segment_descriptors(current);
+	return ret;
+}
+
+asmlinkage int
+sys32_modify_ldt (int func, unsigned int ptr, unsigned int bytecount)
+{
+	int ret = -ENOSYS;
+
+	switch (func) {
+	      case 0:
+		ret = read_ldt(compat_ptr(ptr), bytecount);
+		break;
+	      case 1:
+		ret = write_ldt(compat_ptr(ptr), bytecount, 1);
+		break;
+	      case 2:
+		ret = read_default_ldt(compat_ptr(ptr), bytecount);
+		break;
+	      case 0x11:
+		ret = write_ldt(compat_ptr(ptr), bytecount, 0);
+		break;
+	}
+	return ret;
+}
--- a/arch/ia64/ia32/ia32_signal.c
+++ b/arch/ia64/ia32/ia32_signal.c
--- a/arch/ia64/ia32/ia32_support.c
+++ b/arch/ia64/ia32/ia32_support.c
@@ -0,0 +1,264 @@
+/*
+ * IA32 helper functions
+ *
+ * Copyright (C) 1999 Arun Sharma <arun.sharma@intel.com>
+ * Copyright (C) 2000 Asit K. Mallick <asit.k.mallick@intel.com>
+ * Copyright (C) 2001-2002 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * 06/16/00	A. Mallick	added csd/ssd/tssd for ia32 thread context
+ * 02/19/01	D. Mosberger	dropped tssd; it's not needed
+ * 09/14/01	D. Mosberger	fixed memory management for gdt/tss page
+ * 09/29/01	D. Mosberger	added ia32_load_segment_descriptors()
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/personality.h>
+#include <linux/sched.h>
+
+#include <asm/intrinsics.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/processor.h>
+#include <asm/uaccess.h>
+
+#include "ia32priv.h"
+
+extern void die_if_kernel (char *str, struct pt_regs *regs, long err);
+
+struct exec_domain ia32_exec_domain;
+struct page *ia32_shared_page[NR_CPUS];
+unsigned long *ia32_boot_gdt;
+unsigned long *cpu_gdt_table[NR_CPUS];
+struct page *ia32_gate_page;
+
+static unsigned long
+load_desc (u16 selector)
+{
+	unsigned long *table, limit, index;
+
+	if (!selector)
+		return 0;
+	if (selector & IA32_SEGSEL_TI) {
+		table = (unsigned long *) IA32_LDT_OFFSET;
+		limit = IA32_LDT_ENTRIES;
+	} else {
+		table = cpu_gdt_table[smp_processor_id()];
+		limit = IA32_PAGE_SIZE / sizeof(ia32_boot_gdt[0]);
+	}
+	index = selector >> IA32_SEGSEL_INDEX_SHIFT;
+	if (index >= limit)
+		return 0;
+	return IA32_SEG_UNSCRAMBLE(table[index]);
+}
+
+void
+ia32_load_segment_descriptors (struct task_struct *task)
+{
+	struct pt_regs *regs = ia64_task_regs(task);
+
+	/* Setup the segment descriptors */
+	regs->r24 = load_desc(regs->r16 >> 16);		/* ESD */
+	regs->r27 = load_desc(regs->r16 >>  0);		/* DSD */
+	regs->r28 = load_desc(regs->r16 >> 32);		/* FSD */
+	regs->r29 = load_desc(regs->r16 >> 48);		/* GSD */
+	regs->ar_csd = load_desc(regs->r17 >>  0);	/* CSD */
+	regs->ar_ssd = load_desc(regs->r17 >> 16);	/* SSD */
+}
+
+int
+ia32_clone_tls (struct task_struct *child, struct pt_regs *childregs)
+{
+	struct desc_struct *desc;
+	struct ia32_user_desc info;
+	int idx;
+
+	if (copy_from_user(&info, (void __user *)(childregs->r14 & 0xffffffff), sizeof(info)))
+		return -EFAULT;
+	if (LDT_empty(&info))
+		return -EINVAL;
+
+	idx = info.entry_number;
+	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
+		return -EINVAL;
+
+	desc = child->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
+	desc->a = LDT_entry_a(&info);
+	desc->b = LDT_entry_b(&info);
+
+	/* XXX: can this be done in a cleaner way ? */
+	load_TLS(&child->thread, smp_processor_id());
+	ia32_load_segment_descriptors(child);
+	load_TLS(&current->thread, smp_processor_id());
+
+	return 0;
+}
+
+void
+ia32_save_state (struct task_struct *t)
+{
+	t->thread.eflag = ia64_getreg(_IA64_REG_AR_EFLAG);
+	t->thread.fsr   = ia64_getreg(_IA64_REG_AR_FSR);
+	t->thread.fcr   = ia64_getreg(_IA64_REG_AR_FCR);
+	t->thread.fir   = ia64_getreg(_IA64_REG_AR_FIR);
+	t->thread.fdr   = ia64_getreg(_IA64_REG_AR_FDR);
+	ia64_set_kr(IA64_KR_IO_BASE, t->thread.old_iob);
+	ia64_set_kr(IA64_KR_TSSD, t->thread.old_k1);
+}
+
+void
+ia32_load_state (struct task_struct *t)
+{
+	unsigned long eflag, fsr, fcr, fir, fdr, tssd;
+	struct pt_regs *regs = ia64_task_regs(t);
+
+	eflag = t->thread.eflag;
+	fsr = t->thread.fsr;
+	fcr = t->thread.fcr;
+	fir = t->thread.fir;
+	fdr = t->thread.fdr;
+	tssd = load_desc(_TSS);					/* TSSD */
+
+	ia64_setreg(_IA64_REG_AR_EFLAG, eflag);
+	ia64_setreg(_IA64_REG_AR_FSR, fsr);
+	ia64_setreg(_IA64_REG_AR_FCR, fcr);
+	ia64_setreg(_IA64_REG_AR_FIR, fir);
+	ia64_setreg(_IA64_REG_AR_FDR, fdr);
+	current->thread.old_iob = ia64_get_kr(IA64_KR_IO_BASE);
+	current->thread.old_k1 = ia64_get_kr(IA64_KR_TSSD);
+	ia64_set_kr(IA64_KR_IO_BASE, IA32_IOBASE);
+	ia64_set_kr(IA64_KR_TSSD, tssd);
+
+	regs->r17 = (_TSS << 48) | (_LDT << 32) | (__u32) regs->r17;
+	regs->r30 = load_desc(_LDT);				/* LDTD */
+	load_TLS(&t->thread, smp_processor_id());
+}
+
+/*
+ * Setup IA32 GDT and TSS
+ */
+void
+ia32_gdt_init (void)
+{
+	int cpu = smp_processor_id();
+
+	ia32_shared_page[cpu] = alloc_page(GFP_KERNEL);
+	if (!ia32_shared_page[cpu])
+		panic("failed to allocate ia32_shared_page[%d]\n", cpu);
+
+	cpu_gdt_table[cpu] = page_address(ia32_shared_page[cpu]);
+
+	/* Copy from the boot cpu's GDT */
+	memcpy(cpu_gdt_table[cpu], ia32_boot_gdt, PAGE_SIZE);
+}
+
+
+/*
+ * Setup IA32 GDT and TSS
+ */
+static void
+ia32_boot_gdt_init (void)
+{
+	unsigned long ldt_size;
+
+	ia32_shared_page[0] = alloc_page(GFP_KERNEL);
+	if (!ia32_shared_page[0])
+		panic("failed to allocate ia32_shared_page[0]\n");
+
+	ia32_boot_gdt = page_address(ia32_shared_page[0]);
+	cpu_gdt_table[0] = ia32_boot_gdt;
+
+	/* CS descriptor in IA-32 (scrambled) format */
+	ia32_boot_gdt[__USER_CS >> 3]
+		= IA32_SEG_DESCRIPTOR(0, (IA32_GATE_END-1) >> IA32_PAGE_SHIFT,
+				      0xb, 1, 3, 1, 1, 1, 1);
+
+	/* DS descriptor in IA-32 (scrambled) format */
+	ia32_boot_gdt[__USER_DS >> 3]
+		= IA32_SEG_DESCRIPTOR(0, (IA32_GATE_END-1) >> IA32_PAGE_SHIFT,
+				      0x3, 1, 3, 1, 1, 1, 1);
+
+	ldt_size = PAGE_ALIGN(IA32_LDT_ENTRIES*IA32_LDT_ENTRY_SIZE);
+	ia32_boot_gdt[TSS_ENTRY] = IA32_SEG_DESCRIPTOR(IA32_TSS_OFFSET, 235,
+						       0xb, 0, 3, 1, 1, 1, 0);
+	ia32_boot_gdt[LDT_ENTRY] = IA32_SEG_DESCRIPTOR(IA32_LDT_OFFSET, ldt_size - 1,
+						       0x2, 0, 3, 1, 1, 1, 0);
+}
+
+static void
+ia32_gate_page_init(void)
+{
+	unsigned long *sr;
+
+	ia32_gate_page = alloc_page(GFP_KERNEL);
+	sr = page_address(ia32_gate_page);
+	/* This is popl %eax ; movl $,%eax ; int $0x80 */
+	*sr++ = 0xb858 | (__IA32_NR_sigreturn << 16) | (0x80cdUL << 48);
+
+	/* This is movl $,%eax ; int $0x80 */
+	*sr = 0xb8 | (__IA32_NR_rt_sigreturn << 8) | (0x80cdUL << 40);
+}
+
+void
+ia32_mem_init(void)
+{
+	ia32_boot_gdt_init();
+	ia32_gate_page_init();
+}
+
+/*
+ * Handle bad IA32 interrupt via syscall
+ */
+void
+ia32_bad_interrupt (unsigned long int_num, struct pt_regs *regs)
+{
+	siginfo_t siginfo;
+
+	die_if_kernel("Bad IA-32 interrupt", regs, int_num);
+
+	siginfo.si_signo = SIGTRAP;
+	siginfo.si_errno = int_num;	/* XXX is it OK to abuse si_errno like this? */
+	siginfo.si_flags = 0;
+	siginfo.si_isr = 0;
+	siginfo.si_addr = NULL;
+	siginfo.si_imm = 0;
+	siginfo.si_code = TRAP_BRKPT;
+	force_sig_info(SIGTRAP, &siginfo, current);
+}
+
+void
+ia32_cpu_init (void)
+{
+	/* initialize global ia32 state - CR0 and CR4 */
+	ia64_setreg(_IA64_REG_AR_CFLAG, (((ulong) IA32_CR4 << 32) | IA32_CR0));
+}
+
+static int __init
+ia32_init (void)
+{
+	ia32_exec_domain.name = "Linux/x86";
+	ia32_exec_domain.handler = NULL;
+	ia32_exec_domain.pers_low = PER_LINUX32;
+	ia32_exec_domain.pers_high = PER_LINUX32;
+	ia32_exec_domain.signal_map = default_exec_domain.signal_map;
+	ia32_exec_domain.signal_invmap = default_exec_domain.signal_invmap;
+	register_exec_domain(&ia32_exec_domain);
+
+#if PAGE_SHIFT > IA32_PAGE_SHIFT
+	{
+		extern kmem_cache_t *partial_page_cachep;
+
+		partial_page_cachep = kmem_cache_create("partial_page_cache",
+							sizeof(struct partial_page), 0, 0,
+							NULL, NULL);
+		if (!partial_page_cachep)
+			panic("Cannot create partial page SLAB cache");
+	}
+#endif
+	return 0;
+}
+
+__initcall(ia32_init);
--- a/arch/ia64/ia32/ia32_traps.c
+++ b/arch/ia64/ia32/ia32_traps.c
@@ -0,0 +1,156 @@
+/*
+ * IA-32 exception handlers
+ *
+ * Copyright (C) 2000 Asit K. Mallick <asit.k.mallick@intel.com>
+ * Copyright (C) 2001-2002 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * 06/16/00	A. Mallick	added siginfo for most cases (close to IA32)
+ * 09/29/00	D. Mosberger	added ia32_intercept()
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+
+#include "ia32priv.h"
+
+#include <asm/intrinsics.h>
+#include <asm/ptrace.h>
+
+int
+ia32_intercept (struct pt_regs *regs, unsigned long isr)
+{
+	switch ((isr >> 16) & 0xff) {
+	      case 0:	/* Instruction intercept fault */
+	      case 4:	/* Locked Data reference fault */
+	      case 1:	/* Gate intercept trap */
+		return -1;
+
+	      case 2:	/* System flag trap */
+		if (((isr >> 14) & 0x3) >= 2) {
+			/* MOV SS, POP SS instructions */
+			ia64_psr(regs)->id = 1;
+			return 0;
+		} else
+			return -1;
+	}
+	return -1;
+}
+
+int
+ia32_exception (struct pt_regs *regs, unsigned long isr)
+{
+	struct siginfo siginfo;
+
+	/* initialize these fields to avoid leaking kernel bits to user space: */
+	siginfo.si_errno = 0;
+	siginfo.si_flags = 0;
+	siginfo.si_isr = 0;
+	siginfo.si_imm = 0;
+	switch ((isr >> 16) & 0xff) {
+	      case 1:
+	      case 2:
+		siginfo.si_signo = SIGTRAP;
+		if (isr == 0)
+			siginfo.si_code = TRAP_TRACE;
+		else if (isr & 0x4)
+			siginfo.si_code = TRAP_BRANCH;
+		else
+			siginfo.si_code = TRAP_BRKPT;
+		break;
+
+	      case 3:
+		siginfo.si_signo = SIGTRAP;
+		siginfo.si_code = TRAP_BRKPT;
+		break;
+
+	      case 0:	/* Divide fault */
+		siginfo.si_signo = SIGFPE;
+		siginfo.si_code = FPE_INTDIV;
+		break;
+
+	      case 4:	/* Overflow */
+	      case 5:	/* Bounds fault */
+		siginfo.si_signo = SIGFPE;
+		siginfo.si_code = 0;
+		break;
+
+	      case 6:	/* Invalid Op-code */
+		siginfo.si_signo = SIGILL;
+		siginfo.si_code = ILL_ILLOPN;
+		break;
+
+	      case 7:	/* FP DNA */
+	      case 8:	/* Double Fault */
+	      case 9:	/* Invalid TSS */
+	      case 11:	/* Segment not present */
+	      case 12:	/* Stack fault */
+	      case 13:	/* General Protection Fault */
+		siginfo.si_signo = SIGSEGV;
+		siginfo.si_code = 0;
+		break;
+
+	      case 16:	/* Pending FP error */
+		{
+			unsigned long fsr, fcr;
+
+			fsr = ia64_getreg(_IA64_REG_AR_FSR);
+			fcr = ia64_getreg(_IA64_REG_AR_FCR);
+
+			siginfo.si_signo = SIGFPE;
+			/*
+			 * (~cwd & swd) will mask out exceptions that are not set to unmasked
+			 * status.  0x3f is the exception bits in these regs, 0x200 is the
+			 * C1 reg you need in case of a stack fault, 0x040 is the stack
+			 * fault bit.  We should only be taking one exception at a time,
+			 * so if this combination doesn't produce any single exception,
+			 * then we have a bad program that isn't synchronizing its FPU usage
+			 * and it will suffer the consequences since we won't be able to
+			 * fully reproduce the context of the exception
+			 */
+			siginfo.si_isr = isr;
+			siginfo.si_flags = __ISR_VALID;
+			switch(((~fcr) & (fsr & 0x3f)) | (fsr & 0x240)) {
+				case 0x000:
+				default:
+					siginfo.si_code = 0;
+					break;
+				case 0x001: /* Invalid Op */
+				case 0x040: /* Stack Fault */
+				case 0x240: /* Stack Fault | Direction */
+					siginfo.si_code = FPE_FLTINV;
+					break;
+				case 0x002: /* Denormalize */
+				case 0x010: /* Underflow */
+					siginfo.si_code = FPE_FLTUND;
+					break;
+				case 0x004: /* Zero Divide */
+					siginfo.si_code = FPE_FLTDIV;
+					break;
+				case 0x008: /* Overflow */
+					siginfo.si_code = FPE_FLTOVF;
+					break;
+				case 0x020: /* Precision */
+					siginfo.si_code = FPE_FLTRES;
+					break;
+			}
+
+			break;
+		}
+
+	      case 17:	/* Alignment check */
+		siginfo.si_signo = SIGSEGV;
+		siginfo.si_code = BUS_ADRALN;
+		break;
+
+	      case 19:	/* SSE Numeric error */
+		siginfo.si_signo = SIGFPE;
+		siginfo.si_code = 0;
+		break;
+
+	      default:
+		return -1;
+	}
+	force_sig_info(siginfo.si_signo, &siginfo, current);
+	return 0;
+}
--- a/arch/ia64/ia32/ia32priv.h
+++ b/arch/ia64/ia32/ia32priv.h
@@ -0,0 +1,544 @@
+#ifndef _ASM_IA64_IA32_PRIV_H
+#define _ASM_IA64_IA32_PRIV_H
+
+#include <linux/config.h>
+
+#include <asm/ia32.h>
+
+#ifdef CONFIG_IA32_SUPPORT
+
+#include <linux/binfmts.h>
+#include <linux/compat.h>
+#include <linux/rbtree.h>
+
+#include <asm/processor.h>
+
+/*
+ * 32 bit structures for IA32 support.
+ */
+
+#define IA32_PAGE_SIZE		(1UL << IA32_PAGE_SHIFT)
+#define IA32_PAGE_MASK		(~(IA32_PAGE_SIZE - 1))
+#define IA32_PAGE_ALIGN(addr)	(((addr) + IA32_PAGE_SIZE - 1) & IA32_PAGE_MASK)
+#define IA32_CLOCKS_PER_SEC	100	/* Cast in stone for IA32 Linux */
+
+/*
+ * partially mapped pages provide precise accounting of which 4k sub pages
+ * are mapped and which ones are not, thereby improving IA-32 compatibility.
+ */
+struct partial_page {
+	struct partial_page	*next; /* linked list, sorted by address */
+	struct rb_node		pp_rb;
+	/* 64K is the largest "normal" page supported by ia64 ABI. So 4K*32
+	 * should suffice.*/
+	unsigned int		bitmap;
+	unsigned int		base;
+};
+
+struct partial_page_list {
+	struct partial_page	*pp_head; /* list head, points to the lowest
+					   * addressed partial page */
+	struct rb_root		ppl_rb;
+	struct partial_page	*pp_hint; /* pp_hint->next is the last
+					   * accessed partial page */
+	atomic_t		pp_count; /* reference count */
+};
+
+#if PAGE_SHIFT > IA32_PAGE_SHIFT
+struct partial_page_list* ia32_init_pp_list (void);
+#else
+# define ia32_init_pp_list()	0
+#endif
+
+/* sigcontext.h */
+/*
+ * As documented in the iBCS2 standard..
+ *
+ * The first part of "struct _fpstate" is just the
+ * normal i387 hardware setup, the extra "status"
+ * word is used to save the coprocessor status word
+ * before entering the handler.
+ */
+struct _fpreg_ia32 {
+       unsigned short significand[4];
+       unsigned short exponent;
+};
+
+struct _fpxreg_ia32 {
+        unsigned short significand[4];
+        unsigned short exponent;
+        unsigned short padding[3];
+};
+
+struct _xmmreg_ia32 {
+        unsigned int element[4];
+};
+
+
+struct _fpstate_ia32 {
+       unsigned int    cw,
+		       sw,
+		       tag,
+		       ipoff,
+		       cssel,
+		       dataoff,
+		       datasel;
+       struct _fpreg_ia32      _st[8];
+       unsigned short  status;
+       unsigned short  magic;          /* 0xffff = regular FPU data only */
+
+       /* FXSR FPU environment */
+       unsigned int         _fxsr_env[6];   /* FXSR FPU env is ignored */
+       unsigned int         mxcsr;
+       unsigned int         reserved;
+       struct _fpxreg_ia32  _fxsr_st[8];    /* FXSR FPU reg data is ignored */
+       struct _xmmreg_ia32  _xmm[8];
+       unsigned int         padding[56];
+};
+
+struct sigcontext_ia32 {
+       unsigned short gs, __gsh;
+       unsigned short fs, __fsh;
+       unsigned short es, __esh;
+       unsigned short ds, __dsh;
+       unsigned int edi;
+       unsigned int esi;
+       unsigned int ebp;
+       unsigned int esp;
+       unsigned int ebx;
+       unsigned int edx;
+       unsigned int ecx;
+       unsigned int eax;
+       unsigned int trapno;
+       unsigned int err;
+       unsigned int eip;
+       unsigned short cs, __csh;
+       unsigned int eflags;
+       unsigned int esp_at_signal;
+       unsigned short ss, __ssh;
+       unsigned int fpstate;		/* really (struct _fpstate_ia32 *) */
+       unsigned int oldmask;
+       unsigned int cr2;
+};
+
+/* user.h */
+/*
+ * IA32 (Pentium III/4) FXSR, SSE support
+ *
+ * Provide support for the GDB 5.0+ PTRACE_{GET|SET}FPXREGS requests for
+ * interacting with the FXSR-format floating point environment.  Floating
+ * point data can be accessed in the regular format in the usual manner,
+ * and both the standard and SIMD floating point data can be accessed via
+ * the new ptrace requests.  In either case, changes to the FPU environment
+ * will be reflected in the task's state as expected.
+ */
+struct ia32_user_i387_struct {
+	int	cwd;
+	int	swd;
+	int	twd;
+	int	fip;
+	int	fcs;
+	int	foo;
+	int	fos;
+	/* 8*10 bytes for each FP-reg = 80 bytes */
+	struct _fpreg_ia32 	st_space[8];
+};
+
+struct ia32_user_fxsr_struct {
+	unsigned short	cwd;
+	unsigned short	swd;
+	unsigned short	twd;
+	unsigned short	fop;
+	int	fip;
+	int	fcs;
+	int	foo;
+	int	fos;
+	int	mxcsr;
+	int	reserved;
+	int	st_space[32];	/* 8*16 bytes for each FP-reg = 128 bytes */
+	int	xmm_space[32];	/* 8*16 bytes for each XMM-reg = 128 bytes */
+	int	padding[56];
+};
+
+/* signal.h */
+#define IA32_SET_SA_HANDLER(ka,handler,restorer)				\
+				((ka)->sa.sa_handler = (__sighandler_t)		\
+					(((unsigned long)(restorer) << 32)	\
+					 | ((handler) & 0xffffffff)))
+#define IA32_SA_HANDLER(ka)	((unsigned long) (ka)->sa.sa_handler & 0xffffffff)
+#define IA32_SA_RESTORER(ka)	((unsigned long) (ka)->sa.sa_handler >> 32)
+
+#define __IA32_NR_sigreturn 119
+#define __IA32_NR_rt_sigreturn 173
+
+struct sigaction32 {
+       unsigned int sa_handler;		/* Really a pointer, but need to deal with 32 bits */
+       unsigned int sa_flags;
+       unsigned int sa_restorer;	/* Another 32 bit pointer */
+       compat_sigset_t sa_mask;		/* A 32 bit mask */
+};
+
+struct old_sigaction32 {
+       unsigned int  sa_handler;	/* Really a pointer, but need to deal
+					     with 32 bits */
+       compat_old_sigset_t sa_mask;		/* A 32 bit mask */
+       unsigned int sa_flags;
+       unsigned int sa_restorer;	/* Another 32 bit pointer */
+};
+
+typedef struct sigaltstack_ia32 {
+	unsigned int	ss_sp;
+	int		ss_flags;
+	unsigned int	ss_size;
+} stack_ia32_t;
+
+struct ucontext_ia32 {
+	unsigned int	  uc_flags;
+	unsigned int	  uc_link;
+	stack_ia32_t	  uc_stack;
+	struct sigcontext_ia32 uc_mcontext;
+	sigset_t	  uc_sigmask;	/* mask last for extensibility */
+};
+
+struct stat64 {
+	unsigned long long	st_dev;
+	unsigned char	__pad0[4];
+	unsigned int	__st_ino;
+	unsigned int	st_mode;
+	unsigned int	st_nlink;
+	unsigned int	st_uid;
+	unsigned int	st_gid;
+	unsigned long long	st_rdev;
+	unsigned char	__pad3[4];
+	unsigned int	st_size_lo;
+	unsigned int	st_size_hi;
+	unsigned int	st_blksize;
+	unsigned int	st_blocks;	/* Number 512-byte blocks allocated. */
+	unsigned int	__pad4;		/* future possible st_blocks high bits */
+	unsigned int	st_atime;
+	unsigned int	st_atime_nsec;
+	unsigned int	st_mtime;
+	unsigned int	st_mtime_nsec;
+	unsigned int	st_ctime;
+	unsigned int	st_ctime_nsec;
+	unsigned int	st_ino_lo;
+	unsigned int	st_ino_hi;
+};
+
+typedef struct compat_siginfo {
+	int si_signo;
+	int si_errno;
+	int si_code;
+
+	union {
+		int _pad[((128/sizeof(int)) - 3)];
+
+		/* kill() */
+		struct {
+			unsigned int _pid;	/* sender's pid */
+			unsigned int _uid;	/* sender's uid */
+		} _kill;
+
+		/* POSIX.1b timers */
+		struct {
+			timer_t _tid;		/* timer id */
+			int _overrun;		/* overrun count */
+			char _pad[sizeof(unsigned int) - sizeof(int)];
+			compat_sigval_t _sigval;	/* same as below */
+			int _sys_private;       /* not to be passed to user */
+		} _timer;
+
+		/* POSIX.1b signals */
+		struct {
+			unsigned int _pid;	/* sender's pid */
+			unsigned int _uid;	/* sender's uid */
+			compat_sigval_t _sigval;
+		} _rt;
+
+		/* SIGCHLD */
+		struct {
+			unsigned int _pid;	/* which child */
+			unsigned int _uid;	/* sender's uid */
+			int _status;		/* exit code */
+			compat_clock_t _utime;
+			compat_clock_t _stime;
+		} _sigchld;
+
+		/* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
+		struct {
+			unsigned int _addr;	/* faulting insn/memory ref. */
+		} _sigfault;
+
+		/* SIGPOLL */
+		struct {
+			int _band;	/* POLL_IN, POLL_OUT, POLL_MSG */
+			int _fd;
+		} _sigpoll;
+	} _sifields;
+} compat_siginfo_t;
+
+struct old_linux32_dirent {
+	u32	d_ino;
+	u32	d_offset;
+	u16	d_namlen;
+	char	d_name[1];
+};
+
+/*
+ * IA-32 ELF specific definitions for IA-64.
+ */
+
+#define _ASM_IA64_ELF_H		/* Don't include elf.h */
+
+#include <linux/sched.h>
+#include <asm/processor.h>
+
+/*
+ * This is used to ensure we don't load something for the wrong architecture.
+ */
+#define elf_check_arch(x) ((x)->e_machine == EM_386)
+
+/*
+ * These are used to set parameters in the core dumps.
+ */
+#define ELF_CLASS	ELFCLASS32
+#define ELF_DATA	ELFDATA2LSB
+#define ELF_ARCH	EM_386
+
+#define IA32_PAGE_OFFSET	0xc0000000
+#define IA32_STACK_TOP		IA32_PAGE_OFFSET
+#define IA32_GATE_OFFSET	IA32_PAGE_OFFSET
+#define IA32_GATE_END		IA32_PAGE_OFFSET + PAGE_SIZE
+
+/*
+ * The system segments (GDT, TSS, LDT) have to be mapped below 4GB so the IA-32 engine can
+ * access them.
+ */
+#define IA32_GDT_OFFSET		(IA32_PAGE_OFFSET + PAGE_SIZE)
+#define IA32_TSS_OFFSET		(IA32_PAGE_OFFSET + 2*PAGE_SIZE)
+#define IA32_LDT_OFFSET		(IA32_PAGE_OFFSET + 3*PAGE_SIZE)
+
+#define ELF_EXEC_PAGESIZE	IA32_PAGE_SIZE
+
+/*
+ * This is the location that an ET_DYN program is loaded if exec'ed.
+ * Typical use of this is to invoke "./ld.so someprog" to test out a
+ * new version of the loader.  We need to make sure that it is out of
+ * the way of the program that it will "exec", and that there is
+ * sufficient room for the brk.
+ */
+#define ELF_ET_DYN_BASE		(IA32_PAGE_OFFSET/3 + 0x1000000)
+
+void ia64_elf32_init(struct pt_regs *regs);
+#define ELF_PLAT_INIT(_r, load_addr)	ia64_elf32_init(_r)
+
+#define elf_addr_t	u32
+
+/* This macro yields a bitmask that programs can use to figure out
+   what instruction set this CPU supports.  */
+#define ELF_HWCAP	0
+
+/* This macro yields a string that ld.so will use to load
+   implementation specific libraries for optimization.  Not terribly
+   relevant until we have real hardware to play with... */
+#define ELF_PLATFORM	NULL
+
+#ifdef __KERNEL__
+# define SET_PERSONALITY(EX,IBCS2)				\
+	(current->personality = (IBCS2) ? PER_SVR4 : PER_LINUX)
+#endif
+
+#define IA32_EFLAG	0x200
+
+/*
+ * IA-32 ELF specific definitions for IA-64.
+ */
+
+#define __USER_CS      0x23
+#define __USER_DS      0x2B
+
+/*
+ * The per-cpu GDT has 32 entries: see <asm-i386/segment.h>
+ */
+#define GDT_ENTRIES 32
+
+#define GDT_SIZE	(GDT_ENTRIES * 8)
+
+#define TSS_ENTRY 14
+#define LDT_ENTRY	(TSS_ENTRY + 1)
+
+#define IA32_SEGSEL_RPL		(0x3 << 0)
+#define IA32_SEGSEL_TI		(0x1 << 2)
+#define IA32_SEGSEL_INDEX_SHIFT	3
+
+#define _TSS			((unsigned long) TSS_ENTRY << IA32_SEGSEL_INDEX_SHIFT)
+#define _LDT			((unsigned long) LDT_ENTRY << IA32_SEGSEL_INDEX_SHIFT)
+
+#define IA32_SEG_BASE		16
+#define IA32_SEG_TYPE		40
+#define IA32_SEG_SYS		44
+#define IA32_SEG_DPL		45
+#define IA32_SEG_P		47
+#define IA32_SEG_HIGH_LIMIT	48
+#define IA32_SEG_AVL		52
+#define IA32_SEG_DB		54
+#define IA32_SEG_G		55
+#define IA32_SEG_HIGH_BASE	56
+
+#define IA32_SEG_DESCRIPTOR(base, limit, segtype, nonsysseg, dpl, segpresent, avl, segdb, gran)	\
+	       (((limit) & 0xffff)								\
+		| (((unsigned long) (base) & 0xffffff) << IA32_SEG_BASE)			\
+		| ((unsigned long) (segtype) << IA32_SEG_TYPE)					\
+		| ((unsigned long) (nonsysseg) << IA32_SEG_SYS)					\
+		| ((unsigned long) (dpl) << IA32_SEG_DPL)					\
+		| ((unsigned long) (segpresent) << IA32_SEG_P)					\
+		| ((((unsigned long) (limit) >> 16) & 0xf) << IA32_SEG_HIGH_LIMIT)		\
+		| ((unsigned long) (avl) << IA32_SEG_AVL)					\
+		| ((unsigned long) (segdb) << IA32_SEG_DB)					\
+		| ((unsigned long) (gran) << IA32_SEG_G)					\
+		| ((((unsigned long) (base) >> 24) & 0xff) << IA32_SEG_HIGH_BASE))
+
+#define SEG_LIM		32
+#define SEG_TYPE	52
+#define SEG_SYS		56
+#define SEG_DPL		57
+#define SEG_P		59
+#define SEG_AVL		60
+#define SEG_DB		62
+#define SEG_G		63
+
+/* Unscramble an IA-32 segment descriptor into the IA-64 format.  */
+#define IA32_SEG_UNSCRAMBLE(sd)									 \
+	(   (((sd) >> IA32_SEG_BASE) & 0xffffff) | ((((sd) >> IA32_SEG_HIGH_BASE) & 0xff) << 24) \
+	 | ((((sd) & 0xffff) | ((((sd) >> IA32_SEG_HIGH_LIMIT) & 0xf) << 16)) << SEG_LIM)	 \
+	 | ((((sd) >> IA32_SEG_TYPE) & 0xf) << SEG_TYPE)					 \
+	 | ((((sd) >> IA32_SEG_SYS) & 0x1) << SEG_SYS)						 \
+	 | ((((sd) >> IA32_SEG_DPL) & 0x3) << SEG_DPL)						 \
+	 | ((((sd) >> IA32_SEG_P) & 0x1) << SEG_P)						 \
+	 | ((((sd) >> IA32_SEG_AVL) & 0x1) << SEG_AVL)						 \
+	 | ((((sd) >> IA32_SEG_DB) & 0x1) << SEG_DB)						 \
+	 | ((((sd) >> IA32_SEG_G) & 0x1) << SEG_G))
+
+#define IA32_IOBASE	0x2000000000000000UL /* Virtual address for I/O space */
+
+#define IA32_CR0	0x80000001	/* Enable PG and PE bits */
+#define IA32_CR4	0x600		/* MMXEX and FXSR on */
+
+/*
+ *  IA32 floating point control registers starting values
+ */
+
+#define IA32_FSR_DEFAULT	0x55550000		/* set all tag bits */
+#define IA32_FCR_DEFAULT	0x17800000037fUL	/* extended precision, all masks */
+
+#define IA32_PTRACE_GETREGS	12
+#define IA32_PTRACE_SETREGS	13
+#define IA32_PTRACE_GETFPREGS	14
+#define IA32_PTRACE_SETFPREGS	15
+#define IA32_PTRACE_GETFPXREGS	18
+#define IA32_PTRACE_SETFPXREGS	19
+
+#define ia32_start_thread(regs,new_ip,new_sp) do {				\
+	set_fs(USER_DS);							\
+	ia64_psr(regs)->cpl = 3;	/* set user mode */			\
+	ia64_psr(regs)->ri = 0;		/* clear return slot number */		\
+	ia64_psr(regs)->is = 1;		/* IA-32 instruction set */		\
+	regs->cr_iip = new_ip;							\
+	regs->ar_rsc = 0xc;		/* enforced lazy mode, priv. level 3 */	\
+	regs->ar_rnat = 0;							\
+	regs->loadrs = 0;							\
+	regs->r12 = new_sp;							\
+} while (0)
+
+/*
+ * Local Descriptor Table (LDT) related declarations.
+ */
+
+#define IA32_LDT_ENTRIES	8192		/* Maximum number of LDT entries supported. */
+#define IA32_LDT_ENTRY_SIZE	8		/* The size of each LDT entry. */
+
+#define LDT_entry_a(info) \
+	((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
+
+#define LDT_entry_b(info)				\
+	(((info)->base_addr & 0xff000000) |		\
+	(((info)->base_addr & 0x00ff0000) >> 16) |	\
+	((info)->limit & 0xf0000) |			\
+	(((info)->read_exec_only ^ 1) << 9) |		\
+	((info)->contents << 10) |			\
+	(((info)->seg_not_present ^ 1) << 15) |		\
+	((info)->seg_32bit << 22) |			\
+	((info)->limit_in_pages << 23) |		\
+	((info)->useable << 20) |			\
+	0x7100)
+
+#define LDT_empty(info) (			\
+	(info)->base_addr	== 0	&&	\
+	(info)->limit		== 0	&&	\
+	(info)->contents	== 0	&&	\
+	(info)->read_exec_only	== 1	&&	\
+	(info)->seg_32bit	== 0	&&	\
+	(info)->limit_in_pages	== 0	&&	\
+	(info)->seg_not_present	== 1	&&	\
+	(info)->useable		== 0	)
+
+static inline void
+load_TLS (struct thread_struct *t, unsigned int cpu)
+{
+	extern unsigned long *cpu_gdt_table[NR_CPUS];
+
+	memcpy(cpu_gdt_table[cpu] + GDT_ENTRY_TLS_MIN + 0, &t->tls_array[0], sizeof(long));
+	memcpy(cpu_gdt_table[cpu] + GDT_ENTRY_TLS_MIN + 1, &t->tls_array[1], sizeof(long));
+	memcpy(cpu_gdt_table[cpu] + GDT_ENTRY_TLS_MIN + 2, &t->tls_array[2], sizeof(long));
+}
+
+struct ia32_user_desc {
+	unsigned int entry_number;
+	unsigned int base_addr;
+	unsigned int limit;
+	unsigned int seg_32bit:1;
+	unsigned int contents:2;
+	unsigned int read_exec_only:1;
+	unsigned int limit_in_pages:1;
+	unsigned int seg_not_present:1;
+	unsigned int useable:1;
+};
+
+struct linux_binprm;
+
+extern void ia32_init_addr_space (struct pt_regs *regs);
+extern int ia32_setup_arg_pages (struct linux_binprm *bprm, int exec_stack);
+extern unsigned long ia32_do_mmap (struct file *, unsigned long, unsigned long, int, int, loff_t);
+extern void ia32_load_segment_descriptors (struct task_struct *task);
+
+#define ia32f2ia64f(dst,src)			\
+do {						\
+	ia64_ldfe(6,src);			\
+	ia64_stop();				\
+	ia64_stf_spill(dst, 6);			\
+} while(0)
+
+#define ia64f2ia32f(dst,src)			\
+do {						\
+	ia64_ldf_fill(6, src);			\
+	ia64_stop();				\
+	ia64_stfe(dst, 6);			\
+} while(0)
+
+struct user_regs_struct32 {
+	__u32 ebx, ecx, edx, esi, edi, ebp, eax;
+	unsigned short ds, __ds, es, __es;
+	unsigned short fs, __fs, gs, __gs;
+	__u32 orig_eax, eip;
+	unsigned short cs, __cs;
+	__u32 eflags, esp;
+	unsigned short ss, __ss;
+};
+
+/* Prototypes for use in elfcore32.h */
+extern int save_ia32_fpstate (struct task_struct *, struct ia32_user_i387_struct __user *);
+extern int save_ia32_fpxstate (struct task_struct *, struct ia32_user_fxsr_struct __user *);
+
+#endif /* !CONFIG_IA32_SUPPORT */
+
+#endif /* _ASM_IA64_IA32_PRIV_H */
--- a/arch/ia64/ia32/sys_ia32.c
+++ b/arch/ia64/ia32/sys_ia32.c
--- a/arch/ia64/install.sh
+++ b/arch/ia64/install.sh
@@ -0,0 +1,40 @@
+#!/bin/sh
+#
+# arch/ia64/install.sh
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1995 by Linus Torvalds
+#
+# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin
+#
+# "make install" script for ia64 architecture
+#
+# Arguments:
+#   $1 - kernel version
+#   $2 - kernel image file
+#   $3 - kernel map file
+#   $4 - default install path (blank if root directory)
+#
+
+# User may have a custom install script
+
+if [ -x ~/bin/installkernel ]; then exec ~/bin/installkernel "$@"; fi
+if [ -x /sbin/installkernel ]; then exec /sbin/installkernel "$@"; fi
+
+# Default install - same as make zlilo
+
+if [ -f $4/vmlinuz ]; then
+	mv $4/vmlinuz $4/vmlinuz.old
+fi
+
+if [ -f $4/System.map ]; then
+	mv $4/System.map $4/System.old
+fi
+
+cat $2 > $4/vmlinuz
+cp $3 $4/System.map
+
+test -x /usr/sbin/elilo && /usr/sbin/elilo
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -0,0 +1,52 @@
+#
+# Makefile for the linux kernel.
+#
+
+extra-y	:= head.o init_task.o vmlinux.lds
+
+obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o	\
+	 irq_lsapic.o ivt.o machvec.o pal.o patch.o process.o perfmon.o ptrace.o sal.o		\
+	 salinfo.o semaphore.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \
+	 unwind.o mca.o mca_asm.o topology.o
+
+obj-$(CONFIG_IA64_BRL_EMU)	+= brl_emu.o
+obj-$(CONFIG_IA64_GENERIC)	+= acpi-ext.o
+obj-$(CONFIG_IA64_HP_ZX1)	+= acpi-ext.o
+obj-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += acpi-ext.o
+obj-$(CONFIG_IA64_PALINFO)	+= palinfo.o
+obj-$(CONFIG_IOSAPIC)		+= iosapic.o
+obj-$(CONFIG_MODULES)		+= module.o
+obj-$(CONFIG_SMP)		+= smp.o smpboot.o domain.o
+obj-$(CONFIG_PERFMON)		+= perfmon_default_smpl.o
+obj-$(CONFIG_IA64_CYCLONE)	+= cyclone.o
+obj-$(CONFIG_IA64_MCA_RECOVERY)	+= mca_recovery.o
+mca_recovery-y			+= mca_drv.o mca_drv_asm.o
+
+# The gate DSO image is built using a special linker script.
+targets += gate.so gate-syms.o
+
+extra-y += gate.so gate-syms.o gate.lds gate.o
+
+# fp_emulate() expects f2-f5,f16-f31 to contain the user-level state.
+CFLAGS_traps.o  += -mfixed-range=f2-f5,f16-f31
+
+CPPFLAGS_gate.lds := -P -C -U$(ARCH)
+
+quiet_cmd_gate = GATE $@
+      cmd_gate = $(CC) -nostdlib $(GATECFLAGS_$(@F)) -Wl,-T,$(filter-out FORCE,$^) -o $@
+
+GATECFLAGS_gate.so = -shared -s -Wl,-soname=linux-gate.so.1
+$(obj)/gate.so: $(obj)/gate.lds $(obj)/gate.o FORCE
+	$(call if_changed,gate)
+
+$(obj)/built-in.o: $(obj)/gate-syms.o
+$(obj)/built-in.o: ld_flags += -R $(obj)/gate-syms.o
+
+GATECFLAGS_gate-syms.o = -r
+$(obj)/gate-syms.o: $(obj)/gate.lds $(obj)/gate.o FORCE
+	$(call if_changed,gate)
+
+# gate-data.o contains the gate DSO image as data in section .data.gate.
+# We must build gate.so before we can assemble it.
+# Note: kbuild does not track this dependency due to usage of .incbin
+$(obj)/gate-data.o: $(obj)/gate.so
--- a/arch/ia64/kernel/acpi-ext.c
+++ b/arch/ia64/kernel/acpi-ext.c
@@ -0,0 +1,100 @@
+/*
+ * arch/ia64/kernel/acpi-ext.c
+ *
+ * Copyright (C) 2003 Hewlett-Packard
+ * Copyright (C) Alex Williamson
+ * Copyright (C) Bjorn Helgaas
+ *
+ * Vendor specific extensions to ACPI.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/acpi.h>
+#include <linux/efi.h>
+
+#include <asm/acpi-ext.h>
+
+struct acpi_vendor_descriptor {
+	u8				guid_id;
+	efi_guid_t			guid;
+};
+
+struct acpi_vendor_info {
+	struct acpi_vendor_descriptor	*descriptor;
+	u8				*data;
+	u32				length;
+};
+
+acpi_status
+acpi_vendor_resource_match(struct acpi_resource *resource, void *context)
+{
+	struct acpi_vendor_info *info = (struct acpi_vendor_info *) context;
+	struct acpi_resource_vendor *vendor;
+	struct acpi_vendor_descriptor *descriptor;
+	u32 length;
+
+	if (resource->id != ACPI_RSTYPE_VENDOR)
+		return AE_OK;
+
+	vendor = (struct acpi_resource_vendor *) &resource->data;
+	descriptor = (struct acpi_vendor_descriptor *) vendor->reserved;
+	if (vendor->length <= sizeof(*info->descriptor) ||
+	    descriptor->guid_id != info->descriptor->guid_id ||
+	    efi_guidcmp(descriptor->guid, info->descriptor->guid))
+		return AE_OK;
+
+	length = vendor->length - sizeof(struct acpi_vendor_descriptor);
+	info->data = acpi_os_allocate(length);
+	if (!info->data)
+		return AE_NO_MEMORY;
+
+	memcpy(info->data, vendor->reserved + sizeof(struct acpi_vendor_descriptor), length);
+	info->length = length;
+	return AE_CTRL_TERMINATE;
+}
+
+acpi_status
+acpi_find_vendor_resource(acpi_handle obj, struct acpi_vendor_descriptor *id,
+		u8 **data, u32 *length)
+{
+	struct acpi_vendor_info info;
+
+	info.descriptor = id;
+	info.data = NULL;
+
+	acpi_walk_resources(obj, METHOD_NAME__CRS, acpi_vendor_resource_match, &info);
+	if (!info.data)
+		return AE_NOT_FOUND;
+
+	*data = info.data;
+	*length = info.length;
+	return AE_OK;
+}
+
+struct acpi_vendor_descriptor hp_ccsr_descriptor = {
+	.guid_id = 2,
+	.guid    = EFI_GUID(0x69e9adf9, 0x924f, 0xab5f, 0xf6, 0x4a, 0x24, 0xd2, 0x01, 0x37, 0x0e, 0xad)
+};
+
+acpi_status
+hp_acpi_csr_space(acpi_handle obj, u64 *csr_base, u64 *csr_length)
+{
+	acpi_status status;
+	u8 *data;
+	u32 length;
+
+	status = acpi_find_vendor_resource(obj, &hp_ccsr_descriptor, &data, &length);
+
+	if (ACPI_FAILURE(status) || length != 16)
+		return AE_NOT_FOUND;
+
+	memcpy(csr_base, data, sizeof(*csr_base));
+	memcpy(csr_length, data + 8, sizeof(*csr_length));
+	acpi_os_free(data);
+
+	return AE_OK;
+}
+
+EXPORT_SYMBOL(hp_acpi_csr_space);
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -0,0 +1,841 @@
+/*
+ *  acpi.c - Architecture-Specific Low-Level ACPI Support
+ *
+ *  Copyright (C) 1999 VA Linux Systems
+ *  Copyright (C) 1999,2000 Walt Drummond <drummond@valinux.com>
+ *  Copyright (C) 2000, 2002-2003 Hewlett-Packard Co.
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *  Copyright (C) 2000 Intel Corp.
+ *  Copyright (C) 2000,2001 J.I. Lee <jung-ik.lee@intel.com>
+ *  Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
+ *  Copyright (C) 2001 Jenna Hall <jenna.s.hall@intel.com>
+ *  Copyright (C) 2001 Takayoshi Kochi <t-kochi@bq.jp.nec.com>
+ *  Copyright (C) 2002 Erich Focht <efocht@ess.nec.de>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/irq.h>
+#include <linux/acpi.h>
+#include <linux/efi.h>
+#include <linux/mmzone.h>
+#include <linux/nodemask.h>
+#include <asm/io.h>
+#include <asm/iosapic.h>
+#include <asm/machvec.h>
+#include <asm/page.h>
+#include <asm/system.h>
+#include <asm/numa.h>
+#include <asm/sal.h>
+#include <asm/cyclone.h>
+
+#define BAD_MADT_ENTRY(entry, end) (                                        \
+		(!entry) || (unsigned long)entry + sizeof(*entry) > end ||  \
+		((acpi_table_entry_header *)entry)->length != sizeof(*entry))
+
+#define PREFIX			"ACPI: "
+
+void (*pm_idle) (void);
+EXPORT_SYMBOL(pm_idle);
+void (*pm_power_off) (void);
+EXPORT_SYMBOL(pm_power_off);
+
+unsigned char acpi_kbd_controller_present = 1;
+unsigned char acpi_legacy_devices;
+
+#define MAX_SAPICS 256
+u16 ia64_acpiid_to_sapicid[MAX_SAPICS] =
+	{ [0 ... MAX_SAPICS - 1] = -1 };
+EXPORT_SYMBOL(ia64_acpiid_to_sapicid);
+
+const char *
+acpi_get_sysname (void)
+{
+#ifdef CONFIG_IA64_GENERIC
+	unsigned long rsdp_phys;
+	struct acpi20_table_rsdp *rsdp;
+	struct acpi_table_xsdt *xsdt;
+	struct acpi_table_header *hdr;
+
+	rsdp_phys = acpi_find_rsdp();
+	if (!rsdp_phys) {
+		printk(KERN_ERR "ACPI 2.0 RSDP not found, default to \"dig\"\n");
+		return "dig";
+	}
+
+	rsdp = (struct acpi20_table_rsdp *) __va(rsdp_phys);
+	if (strncmp(rsdp->signature, RSDP_SIG, sizeof(RSDP_SIG) - 1)) {
+		printk(KERN_ERR "ACPI 2.0 RSDP signature incorrect, default to \"dig\"\n");
+		return "dig";
+	}
+
+	xsdt = (struct acpi_table_xsdt *) __va(rsdp->xsdt_address);
+	hdr = &xsdt->header;
+	if (strncmp(hdr->signature, XSDT_SIG, sizeof(XSDT_SIG) - 1)) {
+		printk(KERN_ERR "ACPI 2.0 XSDT signature incorrect, default to \"dig\"\n");
+		return "dig";
+	}
+
+	if (!strcmp(hdr->oem_id, "HP")) {
+		return "hpzx1";
+	}
+	else if (!strcmp(hdr->oem_id, "SGI")) {
+		return "sn2";
+	}
+
+	return "dig";
+#else
+# if defined (CONFIG_IA64_HP_SIM)
+	return "hpsim";
+# elif defined (CONFIG_IA64_HP_ZX1)
+	return "hpzx1";
+# elif defined (CONFIG_IA64_HP_ZX1_SWIOTLB)
+	return "hpzx1_swiotlb";
+# elif defined (CONFIG_IA64_SGI_SN2)
+	return "sn2";
+# elif defined (CONFIG_IA64_DIG)
+	return "dig";
+# else
+#	error Unknown platform.  Fix acpi.c.
+# endif
+#endif
+}
+
+#ifdef CONFIG_ACPI_BOOT
+
+#define ACPI_MAX_PLATFORM_INTERRUPTS	256
+
+/* Array to record platform interrupt vectors for generic interrupt routing. */
+int platform_intr_list[ACPI_MAX_PLATFORM_INTERRUPTS] = {
+	[0 ... ACPI_MAX_PLATFORM_INTERRUPTS - 1] = -1
+};
+
+enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_IOSAPIC;
+
+/*
+ * Interrupt routing API for device drivers.  Provides interrupt vector for
+ * a generic platform event.  Currently only CPEI is implemented.
+ */
+int
+acpi_request_vector (u32 int_type)
+{
+	int vector = -1;
+
+	if (int_type < ACPI_MAX_PLATFORM_INTERRUPTS) {
+		/* corrected platform error interrupt */
+		vector = platform_intr_list[int_type];
+	} else
+		printk(KERN_ERR "acpi_request_vector(): invalid interrupt type\n");
+	return vector;
+}
+
+char *
+__acpi_map_table (unsigned long phys_addr, unsigned long size)
+{
+	return __va(phys_addr);
+}
+
+/* --------------------------------------------------------------------------
+                            Boot-time Table Parsing
+   -------------------------------------------------------------------------- */
+
+static int			total_cpus __initdata;
+static int			available_cpus __initdata;
+struct acpi_table_madt *	acpi_madt __initdata;
+static u8			has_8259;
+
+
+static int __init
+acpi_parse_lapic_addr_ovr (
+	acpi_table_entry_header *header, const unsigned long end)
+{
+	struct acpi_table_lapic_addr_ovr *lapic;
+
+	lapic = (struct acpi_table_lapic_addr_ovr *) header;
+
+	if (BAD_MADT_ENTRY(lapic, end))
+		return -EINVAL;
+
+	if (lapic->address) {
+		iounmap(ipi_base_addr);
+		ipi_base_addr = ioremap(lapic->address, 0);
+	}
+	return 0;
+}
+
+
+static int __init
+acpi_parse_lsapic (acpi_table_entry_header *header, const unsigned long end)
+{
+	struct acpi_table_lsapic *lsapic;
+
+	lsapic = (struct acpi_table_lsapic *) header;
+
+	if (BAD_MADT_ENTRY(lsapic, end))
+		return -EINVAL;
+
+	if (lsapic->flags.enabled) {
+#ifdef CONFIG_SMP
+		smp_boot_data.cpu_phys_id[available_cpus] = (lsapic->id << 8) | lsapic->eid;
+#endif
+		ia64_acpiid_to_sapicid[lsapic->acpi_id] = (lsapic->id << 8) | lsapic->eid;
+		++available_cpus;
+	}
+
+	total_cpus++;
+	return 0;
+}
+
+
+static int __init
+acpi_parse_lapic_nmi (acpi_table_entry_header *header, const unsigned long end)
+{
+	struct acpi_table_lapic_nmi *lacpi_nmi;
+
+	lacpi_nmi = (struct acpi_table_lapic_nmi*) header;
+
+	if (BAD_MADT_ENTRY(lacpi_nmi, end))
+		return -EINVAL;
+
+	/* TBD: Support lapic_nmi entries */
+	return 0;
+}
+
+
+static int __init
+acpi_parse_iosapic (acpi_table_entry_header *header, const unsigned long end)
+{
+	struct acpi_table_iosapic *iosapic;
+
+	iosapic = (struct acpi_table_iosapic *) header;
+
+	if (BAD_MADT_ENTRY(iosapic, end))
+		return -EINVAL;
+
+	iosapic_init(iosapic->address, iosapic->global_irq_base);
+
+	return 0;
+}
+
+
+static int __init
+acpi_parse_plat_int_src (
+	acpi_table_entry_header *header, const unsigned long end)
+{
+	struct acpi_table_plat_int_src *plintsrc;
+	int vector;
+
+	plintsrc = (struct acpi_table_plat_int_src *) header;
+
+	if (BAD_MADT_ENTRY(plintsrc, end))
+		return -EINVAL;
+
+	/*
+	 * Get vector assignment for this interrupt, set attributes,
+	 * and program the IOSAPIC routing table.
+	 */
+	vector = iosapic_register_platform_intr(plintsrc->type,
+						plintsrc->global_irq,
+						plintsrc->iosapic_vector,
+						plintsrc->eid,
+						plintsrc->id,
+						(plintsrc->flags.polarity == 1) ? IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW,
+						(plintsrc->flags.trigger == 1) ? IOSAPIC_EDGE : IOSAPIC_LEVEL);
+
+	platform_intr_list[plintsrc->type] = vector;
+	return 0;
+}
+
+
+static int __init
+acpi_parse_int_src_ovr (
+	acpi_table_entry_header *header, const unsigned long end)
+{
+	struct acpi_table_int_src_ovr *p;
+
+	p = (struct acpi_table_int_src_ovr *) header;
+
+	if (BAD_MADT_ENTRY(p, end))
+		return -EINVAL;
+
+	iosapic_override_isa_irq(p->bus_irq, p->global_irq,
+				 (p->flags.polarity == 1) ? IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW,
+				 (p->flags.trigger == 1) ? IOSAPIC_EDGE : IOSAPIC_LEVEL);
+	return 0;
+}
+
+
+static int __init
+acpi_parse_nmi_src (acpi_table_entry_header *header, const unsigned long end)
+{
+	struct acpi_table_nmi_src *nmi_src;
+
+	nmi_src = (struct acpi_table_nmi_src*) header;
+
+	if (BAD_MADT_ENTRY(nmi_src, end))
+		return -EINVAL;
+
+	/* TBD: Support nimsrc entries */
+	return 0;
+}
+
+static void __init
+acpi_madt_oem_check (char *oem_id, char *oem_table_id)
+{
+	if (!strncmp(oem_id, "IBM", 3) &&
+	    (!strncmp(oem_table_id, "SERMOW", 6))) {
+
+		/*
+		 * Unfortunately ITC_DRIFT is not yet part of the
+		 * official SAL spec, so the ITC_DRIFT bit is not
+		 * set by the BIOS on this hardware.
+		 */
+		sal_platform_features |= IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT;
+
+		cyclone_setup();
+	}
+}
+
+static int __init
+acpi_parse_madt (unsigned long phys_addr, unsigned long size)
+{
+	if (!phys_addr || !size)
+		return -EINVAL;
+
+	acpi_madt = (struct acpi_table_madt *) __va(phys_addr);
+
+	/* remember the value for reference after free_initmem() */
+#ifdef CONFIG_ITANIUM
+	has_8259 = 1; /* Firmware on old Itanium systems is broken */
+#else
+	has_8259 = acpi_madt->flags.pcat_compat;
+#endif
+	iosapic_system_init(has_8259);
+
+	/* Get base address of IPI Message Block */
+
+	if (acpi_madt->lapic_address)
+		ipi_base_addr = ioremap(acpi_madt->lapic_address, 0);
+
+	printk(KERN_INFO PREFIX "Local APIC address %p\n", ipi_base_addr);
+
+	acpi_madt_oem_check(acpi_madt->header.oem_id,
+		acpi_madt->header.oem_table_id);
+
+	return 0;
+}
+
+
+#ifdef CONFIG_ACPI_NUMA
+
+#undef SLIT_DEBUG
+
+#define PXM_FLAG_LEN ((MAX_PXM_DOMAINS + 1)/32)
+
+static int __initdata srat_num_cpus;			/* number of cpus */
+static u32 __devinitdata pxm_flag[PXM_FLAG_LEN];
+#define pxm_bit_set(bit)	(set_bit(bit,(void *)pxm_flag))
+#define pxm_bit_test(bit)	(test_bit(bit,(void *)pxm_flag))
+/* maps to convert between proximity domain and logical node ID */
+int __devinitdata pxm_to_nid_map[MAX_PXM_DOMAINS];
+int __initdata nid_to_pxm_map[MAX_NUMNODES];
+static struct acpi_table_slit __initdata *slit_table;
+
+/*
+ * ACPI 2.0 SLIT (System Locality Information Table)
+ * http://devresource.hp.com/devresource/Docs/TechPapers/IA64/slit.pdf
+ */
+void __init
+acpi_numa_slit_init (struct acpi_table_slit *slit)
+{
+	u32 len;
+
+	len = sizeof(struct acpi_table_header) + 8
+		+ slit->localities * slit->localities;
+	if (slit->header.length != len) {
+		printk(KERN_ERR "ACPI 2.0 SLIT: size mismatch: %d expected, %d actual\n",
+		       len, slit->header.length);
+		memset(numa_slit, 10, sizeof(numa_slit));
+		return;
+	}
+	slit_table = slit;
+}
+
+void __init
+acpi_numa_processor_affinity_init (struct acpi_table_processor_affinity *pa)
+{
+	/* record this node in proximity bitmap */
+	pxm_bit_set(pa->proximity_domain);
+
+	node_cpuid[srat_num_cpus].phys_id = (pa->apic_id << 8) | (pa->lsapic_eid);
+	/* nid should be overridden as logical node id later */
+	node_cpuid[srat_num_cpus].nid = pa->proximity_domain;
+	srat_num_cpus++;
+}
+
+void __init
+acpi_numa_memory_affinity_init (struct acpi_table_memory_affinity *ma)
+{
+	unsigned long paddr, size;
+	u8 pxm;
+	struct node_memblk_s *p, *q, *pend;
+
+	pxm = ma->proximity_domain;
+
+	/* fill node memory chunk structure */
+	paddr = ma->base_addr_hi;
+	paddr = (paddr << 32) | ma->base_addr_lo;
+	size = ma->length_hi;
+	size = (size << 32) | ma->length_lo;
+
+	/* Ignore disabled entries */
+	if (!ma->flags.enabled)
+		return;
+
+	/* record this node in proximity bitmap */
+	pxm_bit_set(pxm);
+
+	/* Insertion sort based on base address */
+	pend = &node_memblk[num_node_memblks];
+	for (p = &node_memblk[0]; p < pend; p++) {
+		if (paddr < p->start_paddr)
+			break;
+	}
+	if (p < pend) {
+		for (q = pend - 1; q >= p; q--)
+			*(q + 1) = *q;
+	}
+	p->start_paddr = paddr;
+	p->size = size;
+	p->nid = pxm;
+	num_node_memblks++;
+}
+
+void __init
+acpi_numa_arch_fixup (void)
+{
+	int i, j, node_from, node_to;
+
+	/* If there's no SRAT, fix the phys_id and mark node 0 online */
+	if (srat_num_cpus == 0) {
+		node_set_online(0);
+		node_cpuid[0].phys_id = hard_smp_processor_id();
+		return;
+	}
+
+	/*
+	 * MCD - This can probably be dropped now.  No need for pxm ID to node ID
+	 * mapping with sparse node numbering iff MAX_PXM_DOMAINS <= MAX_NUMNODES.
+	 */
+	/* calculate total number of nodes in system from PXM bitmap */
+	memset(pxm_to_nid_map, -1, sizeof(pxm_to_nid_map));
+	memset(nid_to_pxm_map, -1, sizeof(nid_to_pxm_map));
+	nodes_clear(node_online_map);
+	for (i = 0; i < MAX_PXM_DOMAINS; i++) {
+		if (pxm_bit_test(i)) {
+			int nid = num_online_nodes();
+			pxm_to_nid_map[i] = nid;
+			nid_to_pxm_map[nid] = i;
+			node_set_online(nid);
+		}
+	}
+
+	/* set logical node id in memory chunk structure */
+	for (i = 0; i < num_node_memblks; i++)
+		node_memblk[i].nid = pxm_to_nid_map[node_memblk[i].nid];
+
+	/* assign memory bank numbers for each chunk on each node */
+	for_each_online_node(i) {
+		int bank;
+
+		bank = 0;
+		for (j = 0; j < num_node_memblks; j++)
+			if (node_memblk[j].nid == i)
+				node_memblk[j].bank = bank++;
+	}
+
+	/* set logical node id in cpu structure */
+	for (i = 0; i < srat_num_cpus; i++)
+		node_cpuid[i].nid = pxm_to_nid_map[node_cpuid[i].nid];
+
+	printk(KERN_INFO "Number of logical nodes in system = %d\n", num_online_nodes());
+	printk(KERN_INFO "Number of memory chunks in system = %d\n", num_node_memblks);
+
+	if (!slit_table) return;
+	memset(numa_slit, -1, sizeof(numa_slit));
+	for (i=0; i<slit_table->localities; i++) {
+		if (!pxm_bit_test(i))
+			continue;
+		node_from = pxm_to_nid_map[i];
+		for (j=0; j<slit_table->localities; j++) {
+			if (!pxm_bit_test(j))
+				continue;
+			node_to = pxm_to_nid_map[j];
+			node_distance(node_from, node_to) =
+				slit_table->entry[i*slit_table->localities + j];
+		}
+	}
+
+#ifdef SLIT_DEBUG
+	printk("ACPI 2.0 SLIT locality table:\n");
+	for_each_online_node(i) {
+		for_each_online_node(j)
+			printk("%03d ", node_distance(i,j));
+		printk("\n");
+	}
+#endif
+}
+#endif /* CONFIG_ACPI_NUMA */
+
+unsigned int
+acpi_register_gsi (u32 gsi, int edge_level, int active_high_low)
+{
+	if (has_8259 && gsi < 16)
+		return isa_irq_to_vector(gsi);
+
+	return iosapic_register_intr(gsi,
+			(active_high_low == ACPI_ACTIVE_HIGH) ? IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW,
+			(edge_level == ACPI_EDGE_SENSITIVE) ? IOSAPIC_EDGE : IOSAPIC_LEVEL);
+}
+EXPORT_SYMBOL(acpi_register_gsi);
+
+#ifdef CONFIG_ACPI_DEALLOCATE_IRQ
+void
+acpi_unregister_gsi (u32 gsi)
+{
+	iosapic_unregister_intr(gsi);
+}
+EXPORT_SYMBOL(acpi_unregister_gsi);
+#endif /* CONFIG_ACPI_DEALLOCATE_IRQ */
+
+static int __init
+acpi_parse_fadt (unsigned long phys_addr, unsigned long size)
+{
+	struct acpi_table_header *fadt_header;
+	struct fadt_descriptor_rev2 *fadt;
+
+	if (!phys_addr || !size)
+		return -EINVAL;
+
+	fadt_header = (struct acpi_table_header *) __va(phys_addr);
+	if (fadt_header->revision != 3)
+		return -ENODEV;		/* Only deal with ACPI 2.0 FADT */
+
+	fadt = (struct fadt_descriptor_rev2 *) fadt_header;
+
+	if (!(fadt->iapc_boot_arch & BAF_8042_KEYBOARD_CONTROLLER))
+		acpi_kbd_controller_present = 0;
+
+	if (fadt->iapc_boot_arch & BAF_LEGACY_DEVICES)
+		acpi_legacy_devices = 1;
+
+	acpi_register_gsi(fadt->sci_int, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW);
+	return 0;
+}
+
+
+unsigned long __init
+acpi_find_rsdp (void)
+{
+	unsigned long rsdp_phys = 0;
+
+	if (efi.acpi20)
+		rsdp_phys = __pa(efi.acpi20);
+	else if (efi.acpi)
+		printk(KERN_WARNING PREFIX "v1.0/r0.71 tables no longer supported\n");
+	return rsdp_phys;
+}
+
+
+int __init
+acpi_boot_init (void)
+{
+
+	/*
+	 * MADT
+	 * ----
+	 * Parse the Multiple APIC Description Table (MADT), if exists.
+	 * Note that this table provides platform SMP configuration
+	 * information -- the successor to MPS tables.
+	 */
+
+	if (acpi_table_parse(ACPI_APIC, acpi_parse_madt) < 1) {
+		printk(KERN_ERR PREFIX "Can't find MADT\n");
+		goto skip_madt;
+	}
+
+	/* Local APIC */
+
+	if (acpi_table_parse_madt(ACPI_MADT_LAPIC_ADDR_OVR, acpi_parse_lapic_addr_ovr, 0) < 0)
+		printk(KERN_ERR PREFIX "Error parsing LAPIC address override entry\n");
+
+	if (acpi_table_parse_madt(ACPI_MADT_LSAPIC, acpi_parse_lsapic, NR_CPUS) < 1)
+		printk(KERN_ERR PREFIX "Error parsing MADT - no LAPIC entries\n");
+
+	if (acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi, 0) < 0)
+		printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n");
+
+	/* I/O APIC */
+
+	if (acpi_table_parse_madt(ACPI_MADT_IOSAPIC, acpi_parse_iosapic, NR_IOSAPICS) < 1)
+		printk(KERN_ERR PREFIX "Error parsing MADT - no IOSAPIC entries\n");
+
+	/* System-Level Interrupt Routing */
+
+	if (acpi_table_parse_madt(ACPI_MADT_PLAT_INT_SRC, acpi_parse_plat_int_src, ACPI_MAX_PLATFORM_INTERRUPTS) < 0)
+		printk(KERN_ERR PREFIX "Error parsing platform interrupt source entry\n");
+
+	if (acpi_table_parse_madt(ACPI_MADT_INT_SRC_OVR, acpi_parse_int_src_ovr, 0) < 0)
+		printk(KERN_ERR PREFIX "Error parsing interrupt source overrides entry\n");
+
+	if (acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src, 0) < 0)
+		printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n");
+  skip_madt:
+
+	/*
+	 * FADT says whether a legacy keyboard controller is present.
+	 * The FADT also contains an SCI_INT line, by which the system
+	 * gets interrupts such as power and sleep buttons.  If it's not
+	 * on a Legacy interrupt, it needs to be setup.
+	 */
+	if (acpi_table_parse(ACPI_FADT, acpi_parse_fadt) < 1)
+		printk(KERN_ERR PREFIX "Can't find FADT\n");
+
+#ifdef CONFIG_SMP
+	if (available_cpus == 0) {
+		printk(KERN_INFO "ACPI: Found 0 CPUS; assuming 1\n");
+		printk(KERN_INFO "CPU 0 (0x%04x)", hard_smp_processor_id());
+		smp_boot_data.cpu_phys_id[available_cpus] = hard_smp_processor_id();
+		available_cpus = 1; /* We've got at least one of these, no? */
+	}
+	smp_boot_data.cpu_count = available_cpus;
+
+	smp_build_cpu_map();
+# ifdef CONFIG_ACPI_NUMA
+	if (srat_num_cpus == 0) {
+		int cpu, i = 1;
+		for (cpu = 0; cpu < smp_boot_data.cpu_count; cpu++)
+			if (smp_boot_data.cpu_phys_id[cpu] != hard_smp_processor_id())
+				node_cpuid[i++].phys_id = smp_boot_data.cpu_phys_id[cpu];
+	}
+	build_cpu_to_node_map();
+# endif
+#endif
+	/* Make boot-up look pretty */
+	printk(KERN_INFO "%d CPUs available, %d CPUs total\n", available_cpus, total_cpus);
+	return 0;
+}
+
+int
+acpi_gsi_to_irq (u32 gsi, unsigned int *irq)
+{
+	int vector;
+
+	if (has_8259 && gsi < 16)
+		*irq = isa_irq_to_vector(gsi);
+	else {
+		vector = gsi_to_vector(gsi);
+		if (vector == -1)
+			return -1;
+
+		*irq = vector;
+	}
+	return 0;
+}
+
+/*
+ *  ACPI based hotplug CPU support
+ */
+#ifdef CONFIG_ACPI_HOTPLUG_CPU
+static
+int
+acpi_map_cpu2node(acpi_handle handle, int cpu, long physid)
+{
+#ifdef CONFIG_ACPI_NUMA
+	int 			pxm_id;
+
+	pxm_id = acpi_get_pxm(handle);
+
+	/*
+	 * Assuming that the container driver would have set the proximity
+	 * domain and would have initialized pxm_to_nid_map[pxm_id] && pxm_flag
+	 */
+	node_cpuid[cpu].nid = (pxm_id < 0) ? 0:
+			pxm_to_nid_map[pxm_id];
+
+	node_cpuid[cpu].phys_id =  physid;
+#endif
+	return(0);
+}
+
+
+int
+acpi_map_lsapic(acpi_handle handle, int *pcpu)
+{
+	struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
+	union acpi_object *obj;
+	struct acpi_table_lsapic *lsapic;
+	cpumask_t tmp_map;
+	long physid;
+	int cpu;
+ 
+	if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer)))
+		return -EINVAL;
+
+	if (!buffer.length ||  !buffer.pointer)
+		return -EINVAL;
+ 
+	obj = buffer.pointer;
+	if (obj->type != ACPI_TYPE_BUFFER ||
+	    obj->buffer.length < sizeof(*lsapic)) {
+		acpi_os_free(buffer.pointer);
+		return -EINVAL;
+	}
+
+	lsapic = (struct acpi_table_lsapic *)obj->buffer.pointer;
+
+	if ((lsapic->header.type != ACPI_MADT_LSAPIC) ||
+	    (!lsapic->flags.enabled)) {
+		acpi_os_free(buffer.pointer);
+		return -EINVAL;
+	}
+
+	physid = ((lsapic->id <<8) | (lsapic->eid));
+
+	acpi_os_free(buffer.pointer);
+	buffer.length = ACPI_ALLOCATE_BUFFER;
+	buffer.pointer = NULL;
+
+	cpus_complement(tmp_map, cpu_present_map);
+	cpu = first_cpu(tmp_map);
+	if(cpu >= NR_CPUS)
+		return -EINVAL;
+
+	acpi_map_cpu2node(handle, cpu, physid);
+
+ 	cpu_set(cpu, cpu_present_map);
+	ia64_cpu_to_sapicid[cpu] = physid;
+	ia64_acpiid_to_sapicid[lsapic->acpi_id] = ia64_cpu_to_sapicid[cpu];
+
+	*pcpu = cpu;
+	return(0);
+}
+EXPORT_SYMBOL(acpi_map_lsapic);
+
+
+int
+acpi_unmap_lsapic(int cpu)
+{
+	int i;
+
+	for (i=0; i<MAX_SAPICS; i++) {
+ 		if (ia64_acpiid_to_sapicid[i] == ia64_cpu_to_sapicid[cpu]) {
+ 			ia64_acpiid_to_sapicid[i] = -1;
+ 			break;
+ 		}
+ 	}
+	ia64_cpu_to_sapicid[cpu] = -1;
+	cpu_clear(cpu,cpu_present_map);
+
+#ifdef CONFIG_ACPI_NUMA
+	/* NUMA specific cleanup's */
+#endif
+
+	return(0);
+}
+EXPORT_SYMBOL(acpi_unmap_lsapic);
+#endif /* CONFIG_ACPI_HOTPLUG_CPU */
+ 
+
+#ifdef CONFIG_ACPI_NUMA
+acpi_status __init
+acpi_map_iosapic (acpi_handle handle, u32 depth, void *context, void **ret)
+{
+	struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
+	union acpi_object *obj;
+	struct acpi_table_iosapic *iosapic;
+	unsigned int gsi_base;
+	int node;
+
+	/* Only care about objects w/ a method that returns the MADT */
+	if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer)))
+		return AE_OK;
+
+	if (!buffer.length || !buffer.pointer)
+		return AE_OK;
+
+	obj = buffer.pointer;
+	if (obj->type != ACPI_TYPE_BUFFER ||
+	    obj->buffer.length < sizeof(*iosapic)) {
+		acpi_os_free(buffer.pointer);
+		return AE_OK;
+	}
+
+	iosapic = (struct acpi_table_iosapic *)obj->buffer.pointer;
+
+	if (iosapic->header.type != ACPI_MADT_IOSAPIC) {
+		acpi_os_free(buffer.pointer);
+		return AE_OK;
+	}
+
+	gsi_base = iosapic->global_irq_base;
+
+	acpi_os_free(buffer.pointer);
+	buffer.length = ACPI_ALLOCATE_BUFFER;
+	buffer.pointer = NULL;
+
+	/*
+	 * OK, it's an IOSAPIC MADT entry, look for a _PXM method to tell
+	 * us which node to associate this with.
+	 */
+	if (ACPI_FAILURE(acpi_evaluate_object(handle, "_PXM", NULL, &buffer)))
+		return AE_OK;
+
+	if (!buffer.length || !buffer.pointer)
+		return AE_OK;
+
+	obj = buffer.pointer;
+
+	if (obj->type != ACPI_TYPE_INTEGER ||
+	    obj->integer.value >= MAX_PXM_DOMAINS) {
+		acpi_os_free(buffer.pointer);
+		return AE_OK;
+	}
+
+	node = pxm_to_nid_map[obj->integer.value];
+	acpi_os_free(buffer.pointer);
+
+	if (node >= MAX_NUMNODES || !node_online(node) ||
+	    cpus_empty(node_to_cpumask(node)))
+		return AE_OK;
+
+	/* We know a gsi to node mapping! */
+	map_iosapic_to_node(gsi_base, node);
+	return AE_OK;
+}
+#endif /* CONFIG_NUMA */
+#endif /* CONFIG_ACPI_BOOT */
--- a/arch/ia64/kernel/asm-offsets.c
+++ b/arch/ia64/kernel/asm-offsets.c
@@ -0,0 +1,239 @@
+/*
+ * Generate definitions needed by assembly language modules.
+ * This code generates raw asm output which is post-processed
+ * to extract and format the required data.
+ */
+
+#include <linux/config.h>
+
+#include <linux/sched.h>
+
+#include <asm-ia64/processor.h>
+#include <asm-ia64/ptrace.h>
+#include <asm-ia64/siginfo.h>
+#include <asm-ia64/sigcontext.h>
+#include <asm-ia64/mca.h>
+
+#include "../kernel/sigframe.h"
+
+#define DEFINE(sym, val) \
+        asm volatile("\n->" #sym " %0 " #val : : "i" (val))
+
+#define BLANK() asm volatile("\n->" : : )
+
+void foo(void)
+{
+	DEFINE(IA64_TASK_SIZE, sizeof (struct task_struct));
+	DEFINE(IA64_THREAD_INFO_SIZE, sizeof (struct thread_info));
+	DEFINE(IA64_PT_REGS_SIZE, sizeof (struct pt_regs));
+	DEFINE(IA64_SWITCH_STACK_SIZE, sizeof (struct switch_stack));
+	DEFINE(IA64_SIGINFO_SIZE, sizeof (struct siginfo));
+	DEFINE(IA64_CPU_SIZE, sizeof (struct cpuinfo_ia64));
+	DEFINE(SIGFRAME_SIZE, sizeof (struct sigframe));
+	DEFINE(UNW_FRAME_INFO_SIZE, sizeof (struct unw_frame_info));
+
+	BLANK();
+
+	DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
+	DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count));
+
+	BLANK();
+
+	DEFINE(IA64_TASK_BLOCKED_OFFSET,offsetof (struct task_struct, blocked));
+	DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, clear_child_tid));
+	DEFINE(IA64_TASK_GROUP_LEADER_OFFSET, offsetof (struct task_struct, group_leader));
+	DEFINE(IA64_TASK_PENDING_OFFSET,offsetof (struct task_struct, pending));
+	DEFINE(IA64_TASK_PID_OFFSET, offsetof (struct task_struct, pid));
+	DEFINE(IA64_TASK_REAL_PARENT_OFFSET, offsetof (struct task_struct, real_parent));
+	DEFINE(IA64_TASK_SIGHAND_OFFSET,offsetof (struct task_struct, sighand));
+	DEFINE(IA64_TASK_SIGNAL_OFFSET,offsetof (struct task_struct, signal));
+	DEFINE(IA64_TASK_TGID_OFFSET, offsetof (struct task_struct, tgid));
+	DEFINE(IA64_TASK_THREAD_KSP_OFFSET, offsetof (struct task_struct, thread.ksp));
+	DEFINE(IA64_TASK_THREAD_ON_USTACK_OFFSET, offsetof (struct task_struct, thread.on_ustack));
+
+	BLANK();
+
+	DEFINE(IA64_SIGHAND_SIGLOCK_OFFSET,offsetof (struct sighand_struct, siglock));
+
+	BLANK();
+
+	DEFINE(IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,offsetof (struct signal_struct,
+							     group_stop_count));
+	DEFINE(IA64_SIGNAL_SHARED_PENDING_OFFSET,offsetof (struct signal_struct, shared_pending));
+
+	BLANK();
+
+	DEFINE(IA64_PT_REGS_B6_OFFSET, offsetof (struct pt_regs, b6));
+	DEFINE(IA64_PT_REGS_B7_OFFSET, offsetof (struct pt_regs, b7));
+	DEFINE(IA64_PT_REGS_AR_CSD_OFFSET, offsetof (struct pt_regs, ar_csd));
+	DEFINE(IA64_PT_REGS_AR_SSD_OFFSET, offsetof (struct pt_regs, ar_ssd));
+	DEFINE(IA64_PT_REGS_R8_OFFSET, offsetof (struct pt_regs, r8));
+	DEFINE(IA64_PT_REGS_R9_OFFSET, offsetof (struct pt_regs, r9));
+	DEFINE(IA64_PT_REGS_R10_OFFSET, offsetof (struct pt_regs, r10));
+	DEFINE(IA64_PT_REGS_R11_OFFSET, offsetof (struct pt_regs, r11));
+	DEFINE(IA64_PT_REGS_CR_IPSR_OFFSET, offsetof (struct pt_regs, cr_ipsr));
+	DEFINE(IA64_PT_REGS_CR_IIP_OFFSET, offsetof (struct pt_regs, cr_iip));
+	DEFINE(IA64_PT_REGS_CR_IFS_OFFSET, offsetof (struct pt_regs, cr_ifs));
+	DEFINE(IA64_PT_REGS_AR_UNAT_OFFSET, offsetof (struct pt_regs, ar_unat));
+	DEFINE(IA64_PT_REGS_AR_PFS_OFFSET, offsetof (struct pt_regs, ar_pfs));
+	DEFINE(IA64_PT_REGS_AR_RSC_OFFSET, offsetof (struct pt_regs, ar_rsc));
+	DEFINE(IA64_PT_REGS_AR_RNAT_OFFSET, offsetof (struct pt_regs, ar_rnat));
+
+	DEFINE(IA64_PT_REGS_AR_BSPSTORE_OFFSET, offsetof (struct pt_regs, ar_bspstore));
+	DEFINE(IA64_PT_REGS_PR_OFFSET, offsetof (struct pt_regs, pr));
+	DEFINE(IA64_PT_REGS_B0_OFFSET, offsetof (struct pt_regs, b0));
+	DEFINE(IA64_PT_REGS_LOADRS_OFFSET, offsetof (struct pt_regs, loadrs));
+	DEFINE(IA64_PT_REGS_R1_OFFSET, offsetof (struct pt_regs, r1));
+	DEFINE(IA64_PT_REGS_R12_OFFSET, offsetof (struct pt_regs, r12));
+	DEFINE(IA64_PT_REGS_R13_OFFSET, offsetof (struct pt_regs, r13));
+	DEFINE(IA64_PT_REGS_AR_FPSR_OFFSET, offsetof (struct pt_regs, ar_fpsr));
+	DEFINE(IA64_PT_REGS_R15_OFFSET, offsetof (struct pt_regs, r15));
+	DEFINE(IA64_PT_REGS_R14_OFFSET, offsetof (struct pt_regs, r14));
+	DEFINE(IA64_PT_REGS_R2_OFFSET, offsetof (struct pt_regs, r2));
+	DEFINE(IA64_PT_REGS_R3_OFFSET, offsetof (struct pt_regs, r3));
+	DEFINE(IA64_PT_REGS_R16_OFFSET, offsetof (struct pt_regs, r16));
+	DEFINE(IA64_PT_REGS_R17_OFFSET, offsetof (struct pt_regs, r17));
+	DEFINE(IA64_PT_REGS_R18_OFFSET, offsetof (struct pt_regs, r18));
+	DEFINE(IA64_PT_REGS_R19_OFFSET, offsetof (struct pt_regs, r19));
+	DEFINE(IA64_PT_REGS_R20_OFFSET, offsetof (struct pt_regs, r20));
+	DEFINE(IA64_PT_REGS_R21_OFFSET, offsetof (struct pt_regs, r21));
+	DEFINE(IA64_PT_REGS_R22_OFFSET, offsetof (struct pt_regs, r22));
+	DEFINE(IA64_PT_REGS_R23_OFFSET, offsetof (struct pt_regs, r23));
+	DEFINE(IA64_PT_REGS_R24_OFFSET, offsetof (struct pt_regs, r24));
+	DEFINE(IA64_PT_REGS_R25_OFFSET, offsetof (struct pt_regs, r25));
+	DEFINE(IA64_PT_REGS_R26_OFFSET, offsetof (struct pt_regs, r26));
+	DEFINE(IA64_PT_REGS_R27_OFFSET, offsetof (struct pt_regs, r27));
+	DEFINE(IA64_PT_REGS_R28_OFFSET, offsetof (struct pt_regs, r28));
+	DEFINE(IA64_PT_REGS_R29_OFFSET, offsetof (struct pt_regs, r29));
+	DEFINE(IA64_PT_REGS_R30_OFFSET, offsetof (struct pt_regs, r30));
+	DEFINE(IA64_PT_REGS_R31_OFFSET, offsetof (struct pt_regs, r31));
+	DEFINE(IA64_PT_REGS_AR_CCV_OFFSET, offsetof (struct pt_regs, ar_ccv));
+	DEFINE(IA64_PT_REGS_F6_OFFSET, offsetof (struct pt_regs, f6));
+	DEFINE(IA64_PT_REGS_F7_OFFSET, offsetof (struct pt_regs, f7));
+	DEFINE(IA64_PT_REGS_F8_OFFSET, offsetof (struct pt_regs, f8));
+	DEFINE(IA64_PT_REGS_F9_OFFSET, offsetof (struct pt_regs, f9));
+	DEFINE(IA64_PT_REGS_F10_OFFSET, offsetof (struct pt_regs, f10));
+	DEFINE(IA64_PT_REGS_F11_OFFSET, offsetof (struct pt_regs, f11));
+
+	BLANK();
+
+	DEFINE(IA64_SWITCH_STACK_CALLER_UNAT_OFFSET, offsetof (struct switch_stack, caller_unat));
+	DEFINE(IA64_SWITCH_STACK_AR_FPSR_OFFSET, offsetof (struct switch_stack, ar_fpsr));
+	DEFINE(IA64_SWITCH_STACK_F2_OFFSET, offsetof (struct switch_stack, f2));
+	DEFINE(IA64_SWITCH_STACK_F3_OFFSET, offsetof (struct switch_stack, f3));
+	DEFINE(IA64_SWITCH_STACK_F4_OFFSET, offsetof (struct switch_stack, f4));
+	DEFINE(IA64_SWITCH_STACK_F5_OFFSET, offsetof (struct switch_stack, f5));
+	DEFINE(IA64_SWITCH_STACK_F12_OFFSET, offsetof (struct switch_stack, f12));
+	DEFINE(IA64_SWITCH_STACK_F13_OFFSET, offsetof (struct switch_stack, f13));
+	DEFINE(IA64_SWITCH_STACK_F14_OFFSET, offsetof (struct switch_stack, f14));
+	DEFINE(IA64_SWITCH_STACK_F15_OFFSET, offsetof (struct switch_stack, f15));
+	DEFINE(IA64_SWITCH_STACK_F16_OFFSET, offsetof (struct switch_stack, f16));
+	DEFINE(IA64_SWITCH_STACK_F17_OFFSET, offsetof (struct switch_stack, f17));
+	DEFINE(IA64_SWITCH_STACK_F18_OFFSET, offsetof (struct switch_stack, f18));
+	DEFINE(IA64_SWITCH_STACK_F19_OFFSET, offsetof (struct switch_stack, f19));
+	DEFINE(IA64_SWITCH_STACK_F20_OFFSET, offsetof (struct switch_stack, f20));
+	DEFINE(IA64_SWITCH_STACK_F21_OFFSET, offsetof (struct switch_stack, f21));
+	DEFINE(IA64_SWITCH_STACK_F22_OFFSET, offsetof (struct switch_stack, f22));
+	DEFINE(IA64_SWITCH_STACK_F23_OFFSET, offsetof (struct switch_stack, f23));
+	DEFINE(IA64_SWITCH_STACK_F24_OFFSET, offsetof (struct switch_stack, f24));
+	DEFINE(IA64_SWITCH_STACK_F25_OFFSET, offsetof (struct switch_stack, f25));
+	DEFINE(IA64_SWITCH_STACK_F26_OFFSET, offsetof (struct switch_stack, f26));
+	DEFINE(IA64_SWITCH_STACK_F27_OFFSET, offsetof (struct switch_stack, f27));
+	DEFINE(IA64_SWITCH_STACK_F28_OFFSET, offsetof (struct switch_stack, f28));
+	DEFINE(IA64_SWITCH_STACK_F29_OFFSET, offsetof (struct switch_stack, f29));
+	DEFINE(IA64_SWITCH_STACK_F30_OFFSET, offsetof (struct switch_stack, f30));
+	DEFINE(IA64_SWITCH_STACK_F31_OFFSET, offsetof (struct switch_stack, f31));
+	DEFINE(IA64_SWITCH_STACK_R4_OFFSET, offsetof (struct switch_stack, r4));
+	DEFINE(IA64_SWITCH_STACK_R5_OFFSET, offsetof (struct switch_stack, r5));
+	DEFINE(IA64_SWITCH_STACK_R6_OFFSET, offsetof (struct switch_stack, r6));
+	DEFINE(IA64_SWITCH_STACK_R7_OFFSET, offsetof (struct switch_stack, r7));
+	DEFINE(IA64_SWITCH_STACK_B0_OFFSET, offsetof (struct switch_stack, b0));
+	DEFINE(IA64_SWITCH_STACK_B1_OFFSET, offsetof (struct switch_stack, b1));
+	DEFINE(IA64_SWITCH_STACK_B2_OFFSET, offsetof (struct switch_stack, b2));
+	DEFINE(IA64_SWITCH_STACK_B3_OFFSET, offsetof (struct switch_stack, b3));
+	DEFINE(IA64_SWITCH_STACK_B4_OFFSET, offsetof (struct switch_stack, b4));
+	DEFINE(IA64_SWITCH_STACK_B5_OFFSET, offsetof (struct switch_stack, b5));
+	DEFINE(IA64_SWITCH_STACK_AR_PFS_OFFSET, offsetof (struct switch_stack, ar_pfs));
+	DEFINE(IA64_SWITCH_STACK_AR_LC_OFFSET, offsetof (struct switch_stack, ar_lc));
+	DEFINE(IA64_SWITCH_STACK_AR_UNAT_OFFSET, offsetof (struct switch_stack, ar_unat));
+	DEFINE(IA64_SWITCH_STACK_AR_RNAT_OFFSET, offsetof (struct switch_stack, ar_rnat));
+	DEFINE(IA64_SWITCH_STACK_AR_BSPSTORE_OFFSET, offsetof (struct switch_stack, ar_bspstore));
+	DEFINE(IA64_SWITCH_STACK_PR_OFFSET, offsetof (struct switch_stack, pr));
+
+	BLANK();
+
+	DEFINE(IA64_SIGCONTEXT_IP_OFFSET, offsetof (struct sigcontext, sc_ip));
+	DEFINE(IA64_SIGCONTEXT_AR_BSP_OFFSET, offsetof (struct sigcontext, sc_ar_bsp));
+	DEFINE(IA64_SIGCONTEXT_AR_FPSR_OFFSET, offsetof (struct sigcontext, sc_ar_fpsr));
+	DEFINE(IA64_SIGCONTEXT_AR_RNAT_OFFSET, offsetof (struct sigcontext, sc_ar_rnat));
+	DEFINE(IA64_SIGCONTEXT_AR_UNAT_OFFSET, offsetof (struct sigcontext, sc_ar_unat));
+	DEFINE(IA64_SIGCONTEXT_B0_OFFSET, offsetof (struct sigcontext, sc_br[0]));
+	DEFINE(IA64_SIGCONTEXT_CFM_OFFSET, offsetof (struct sigcontext, sc_cfm));
+	DEFINE(IA64_SIGCONTEXT_FLAGS_OFFSET, offsetof (struct sigcontext, sc_flags));
+	DEFINE(IA64_SIGCONTEXT_FR6_OFFSET, offsetof (struct sigcontext, sc_fr[6]));
+	DEFINE(IA64_SIGCONTEXT_PR_OFFSET, offsetof (struct sigcontext, sc_pr));
+	DEFINE(IA64_SIGCONTEXT_R12_OFFSET, offsetof (struct sigcontext, sc_gr[12]));
+	DEFINE(IA64_SIGCONTEXT_RBS_BASE_OFFSET,offsetof (struct sigcontext, sc_rbs_base));
+	DEFINE(IA64_SIGCONTEXT_LOADRS_OFFSET, offsetof (struct sigcontext, sc_loadrs));
+
+	BLANK();
+
+	DEFINE(IA64_SIGPENDING_SIGNAL_OFFSET, offsetof (struct sigpending, signal));
+
+	BLANK();
+
+	DEFINE(IA64_SIGFRAME_ARG0_OFFSET, offsetof (struct sigframe, arg0));
+	DEFINE(IA64_SIGFRAME_ARG1_OFFSET, offsetof (struct sigframe, arg1));
+	DEFINE(IA64_SIGFRAME_ARG2_OFFSET, offsetof (struct sigframe, arg2));
+	DEFINE(IA64_SIGFRAME_HANDLER_OFFSET, offsetof (struct sigframe, handler));
+	DEFINE(IA64_SIGFRAME_SIGCONTEXT_OFFSET, offsetof (struct sigframe, sc));
+	BLANK();
+    /* for assembly files which can't include sched.h: */
+	DEFINE(IA64_CLONE_VFORK, CLONE_VFORK);
+	DEFINE(IA64_CLONE_VM, CLONE_VM);
+
+	BLANK();
+	DEFINE(IA64_CPUINFO_NSEC_PER_CYC_OFFSET,
+	       offsetof (struct cpuinfo_ia64, nsec_per_cyc));
+	DEFINE(IA64_CPUINFO_PTCE_BASE_OFFSET,
+	       offsetof (struct cpuinfo_ia64, ptce_base));
+	DEFINE(IA64_CPUINFO_PTCE_COUNT_OFFSET,
+	       offsetof (struct cpuinfo_ia64, ptce_count));
+	DEFINE(IA64_CPUINFO_PTCE_STRIDE_OFFSET,
+	       offsetof (struct cpuinfo_ia64, ptce_stride));
+	BLANK();
+	DEFINE(IA64_TIMESPEC_TV_NSEC_OFFSET,
+	       offsetof (struct timespec, tv_nsec));
+
+	DEFINE(CLONE_SETTLS_BIT, 19);
+#if CLONE_SETTLS != (1<<19)
+# error "CLONE_SETTLS_BIT incorrect, please fix"
+#endif
+
+	BLANK();
+	DEFINE(IA64_MCA_CPU_PROC_STATE_DUMP_OFFSET,
+	       offsetof (struct ia64_mca_cpu, proc_state_dump));
+	DEFINE(IA64_MCA_CPU_STACK_OFFSET,
+	       offsetof (struct ia64_mca_cpu, stack));
+	DEFINE(IA64_MCA_CPU_STACKFRAME_OFFSET,
+	       offsetof (struct ia64_mca_cpu, stackframe));
+	DEFINE(IA64_MCA_CPU_RBSTORE_OFFSET,
+	       offsetof (struct ia64_mca_cpu, rbstore));
+	DEFINE(IA64_MCA_CPU_INIT_STACK_OFFSET,
+	       offsetof (struct ia64_mca_cpu, init_stack));
+	BLANK();
+	/* used by fsys_gettimeofday in arch/ia64/kernel/fsys.S */
+	DEFINE(IA64_TIME_INTERPOLATOR_ADDRESS_OFFSET, offsetof (struct time_interpolator, addr));
+	DEFINE(IA64_TIME_INTERPOLATOR_SOURCE_OFFSET, offsetof (struct time_interpolator, source));
+	DEFINE(IA64_TIME_INTERPOLATOR_SHIFT_OFFSET, offsetof (struct time_interpolator, shift));
+	DEFINE(IA64_TIME_INTERPOLATOR_NSEC_OFFSET, offsetof (struct time_interpolator, nsec_per_cyc));
+	DEFINE(IA64_TIME_INTERPOLATOR_OFFSET_OFFSET, offsetof (struct time_interpolator, offset));
+	DEFINE(IA64_TIME_INTERPOLATOR_LAST_CYCLE_OFFSET, offsetof (struct time_interpolator, last_cycle));
+	DEFINE(IA64_TIME_INTERPOLATOR_LAST_COUNTER_OFFSET, offsetof (struct time_interpolator, last_counter));
+	DEFINE(IA64_TIME_INTERPOLATOR_JITTER_OFFSET, offsetof (struct time_interpolator, jitter));
+	DEFINE(IA64_TIME_INTERPOLATOR_MASK_OFFSET, offsetof (struct time_interpolator, mask));
+	DEFINE(IA64_TIME_SOURCE_CPU, TIME_SOURCE_CPU);
+	DEFINE(IA64_TIME_SOURCE_MMIO64, TIME_SOURCE_MMIO64);
+	DEFINE(IA64_TIME_SOURCE_MMIO32, TIME_SOURCE_MMIO32);
+	DEFINE(IA64_TIMESPEC_TV_NSEC_OFFSET, offsetof (struct timespec, tv_nsec));
+}
--- a/arch/ia64/kernel/brl_emu.c
+++ b/arch/ia64/kernel/brl_emu.c
@@ -0,0 +1,234 @@
+/*
+ *  Emulation of the "brl" instruction for IA64 processors that
+ *  don't support it in hardware.
+ *  Author: Stephan Zeisset, Intel Corp. <Stephan.Zeisset@intel.com>
+ *
+ *    02/22/02	D. Mosberger	Clear si_flgs, si_isr, and si_imm to avoid
+ *				leaking kernel bits.
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <asm/uaccess.h>
+#include <asm/processor.h>
+
+extern char ia64_set_b1, ia64_set_b2, ia64_set_b3, ia64_set_b4, ia64_set_b5;
+
+struct illegal_op_return {
+	unsigned long fkt, arg1, arg2, arg3;
+};
+
+/*
+ *  The unimplemented bits of a virtual address must be set
+ *  to the value of the most significant implemented bit.
+ *  unimpl_va_mask includes all unimplemented bits and
+ *  the most significant implemented bit, so the result
+ *  of an and operation with the mask must be all 0's
+ *  or all 1's for the address to be valid.
+ */
+#define unimplemented_virtual_address(va) (						\
+	((va) & local_cpu_data->unimpl_va_mask) != 0 &&					\
+	((va) & local_cpu_data->unimpl_va_mask) != local_cpu_data->unimpl_va_mask	\
+)
+
+/*
+ *  The unimplemented bits of a physical address must be 0.
+ *  unimpl_pa_mask includes all unimplemented bits, so the result
+ *  of an and operation with the mask must be all 0's for the
+ *  address to be valid.
+ */
+#define unimplemented_physical_address(pa) (		\
+	((pa) & local_cpu_data->unimpl_pa_mask) != 0	\
+)
+
+/*
+ *  Handle an illegal operation fault that was caused by an
+ *  unimplemented "brl" instruction.
+ *  If we are not successful (e.g because the illegal operation
+ *  wasn't caused by a "brl" after all), we return -1.
+ *  If we are successful, we return either 0 or the address
+ *  of a "fixup" function for manipulating preserved register
+ *  state.
+ */
+
+struct illegal_op_return
+ia64_emulate_brl (struct pt_regs *regs, unsigned long ar_ec)
+{
+	unsigned long bundle[2];
+	unsigned long opcode, btype, qp, offset, cpl;
+	unsigned long next_ip;
+	struct siginfo siginfo;
+	struct illegal_op_return rv;
+	long tmp_taken, unimplemented_address;
+
+	rv.fkt = (unsigned long) -1;
+
+	/*
+	 *  Decode the instruction bundle.
+	 */
+
+	if (copy_from_user(bundle, (void *) (regs->cr_iip), sizeof(bundle)))
+		return rv;
+
+	next_ip = (unsigned long) regs->cr_iip + 16;
+
+	/* "brl" must be in slot 2. */
+	if (ia64_psr(regs)->ri != 1) return rv;
+
+	/* Must be "mlx" template */
+	if ((bundle[0] & 0x1e) != 0x4) return rv;
+
+	opcode = (bundle[1] >> 60);
+	btype = ((bundle[1] >> 29) & 0x7);
+	qp = ((bundle[1] >> 23) & 0x3f);
+	offset = ((bundle[1] & 0x0800000000000000L) << 4)
+		| ((bundle[1] & 0x00fffff000000000L) >> 32)
+		| ((bundle[1] & 0x00000000007fffffL) << 40)
+		| ((bundle[0] & 0xffff000000000000L) >> 24);
+
+	tmp_taken = regs->pr & (1L << qp);
+
+	switch(opcode) {
+
+		case 0xC:
+			/*
+			 *  Long Branch.
+			 */
+			if (btype != 0) return rv;
+			rv.fkt = 0;
+			if (!(tmp_taken)) {
+				/*
+				 *  Qualifying predicate is 0.
+				 *  Skip instruction.
+				 */
+				regs->cr_iip = next_ip;
+				ia64_psr(regs)->ri = 0;
+				return rv;
+			}
+			break;
+
+		case 0xD:
+			/*
+			 *  Long Call.
+			 */
+			rv.fkt = 0;
+			if (!(tmp_taken)) {
+				/*
+				 *  Qualifying predicate is 0.
+				 *  Skip instruction.
+				 */
+				regs->cr_iip = next_ip;
+				ia64_psr(regs)->ri = 0;
+				return rv;
+			}
+
+			/*
+			 *  BR[btype] = IP+16
+			 */
+			switch(btype) {
+				case 0:
+					regs->b0 = next_ip;
+					break;
+				case 1:
+					rv.fkt = (unsigned long) &ia64_set_b1;
+					break;
+				case 2:
+					rv.fkt = (unsigned long) &ia64_set_b2;
+					break;
+				case 3:
+					rv.fkt = (unsigned long) &ia64_set_b3;
+					break;
+				case 4:
+					rv.fkt = (unsigned long) &ia64_set_b4;
+					break;
+				case 5:
+					rv.fkt = (unsigned long) &ia64_set_b5;
+					break;
+				case 6:
+					regs->b6 = next_ip;
+					break;
+				case 7:
+					regs->b7 = next_ip;
+					break;
+			}
+			rv.arg1 = next_ip;
+
+			/*
+			 *  AR[PFS].pfm = CFM
+			 *  AR[PFS].pec = AR[EC]
+			 *  AR[PFS].ppl = PSR.cpl
+			 */
+			cpl = ia64_psr(regs)->cpl;
+			regs->ar_pfs = ((regs->cr_ifs & 0x3fffffffff)
+					| (ar_ec << 52) | (cpl << 62));
+
+			/*
+			 *  CFM.sof -= CFM.sol
+			 *  CFM.sol = 0
+			 *  CFM.sor = 0
+			 *  CFM.rrb.gr = 0
+			 *  CFM.rrb.fr = 0
+			 *  CFM.rrb.pr = 0
+			 */
+			regs->cr_ifs = ((regs->cr_ifs & 0xffffffc00000007f)
+					- ((regs->cr_ifs >> 7) & 0x7f));
+
+			break;
+
+		default:
+			/*
+			 *  Unknown opcode.
+			 */
+			return rv;
+
+	}
+
+	regs->cr_iip += offset;
+	ia64_psr(regs)->ri = 0;
+
+	if (ia64_psr(regs)->it == 0)
+		unimplemented_address = unimplemented_physical_address(regs->cr_iip);
+	else
+		unimplemented_address = unimplemented_virtual_address(regs->cr_iip);
+
+	if (unimplemented_address) {
+		/*
+		 *  The target address contains unimplemented bits.
+		 */
+		printk(KERN_DEBUG "Woah! Unimplemented Instruction Address Trap!\n");
+		siginfo.si_signo = SIGILL;
+		siginfo.si_errno = 0;
+		siginfo.si_flags = 0;
+		siginfo.si_isr = 0;
+		siginfo.si_imm = 0;
+		siginfo.si_code = ILL_BADIADDR;
+		force_sig_info(SIGILL, &siginfo, current);
+	} else if (ia64_psr(regs)->tb) {
+		/*
+		 *  Branch Tracing is enabled.
+		 *  Force a taken branch signal.
+		 */
+		siginfo.si_signo = SIGTRAP;
+		siginfo.si_errno = 0;
+		siginfo.si_code = TRAP_BRANCH;
+		siginfo.si_flags = 0;
+		siginfo.si_isr = 0;
+		siginfo.si_addr = 0;
+		siginfo.si_imm = 0;
+		force_sig_info(SIGTRAP, &siginfo, current);
+	} else if (ia64_psr(regs)->ss) {
+		/*
+		 *  Single Step is enabled.
+		 *  Force a trace signal.
+		 */
+		siginfo.si_signo = SIGTRAP;
+		siginfo.si_errno = 0;
+		siginfo.si_code = TRAP_TRACE;
+		siginfo.si_flags = 0;
+		siginfo.si_isr = 0;
+		siginfo.si_addr = 0;
+		siginfo.si_imm = 0;
+		force_sig_info(SIGTRAP, &siginfo, current);
+	}
+	return rv;
+}
--- a/arch/ia64/kernel/cyclone.c
+++ b/arch/ia64/kernel/cyclone.c
@@ -0,0 +1,109 @@
+#include <linux/module.h>
+#include <linux/smp.h>
+#include <linux/time.h>
+#include <linux/errno.h>
+#include <asm/io.h>
+
+/* IBM Summit (EXA) Cyclone counter code*/
+#define CYCLONE_CBAR_ADDR 0xFEB00CD0
+#define CYCLONE_PMCC_OFFSET 0x51A0
+#define CYCLONE_MPMC_OFFSET 0x51D0
+#define CYCLONE_MPCS_OFFSET 0x51A8
+#define CYCLONE_TIMER_FREQ 100000000
+
+int use_cyclone;
+void __init cyclone_setup(void)
+{
+	use_cyclone = 1;
+}
+
+
+struct time_interpolator cyclone_interpolator = {
+	.source =	TIME_SOURCE_MMIO64,
+	.shift =	16,
+	.frequency =	CYCLONE_TIMER_FREQ,
+	.drift =	-100,
+	.mask =		(1LL << 40) - 1
+};
+
+int __init init_cyclone_clock(void)
+{
+	u64* reg;
+	u64 base;	/* saved cyclone base address */
+	u64 offset;	/* offset from pageaddr to cyclone_timer register */
+	int i;
+	u32* volatile cyclone_timer;	/* Cyclone MPMC0 register */
+
+	if (!use_cyclone)
+		return -ENODEV;
+
+	printk(KERN_INFO "Summit chipset: Starting Cyclone Counter.\n");
+
+	/* find base address */
+	offset = (CYCLONE_CBAR_ADDR);
+	reg = (u64*)ioremap_nocache(offset, sizeof(u64));
+	if(!reg){
+		printk(KERN_ERR "Summit chipset: Could not find valid CBAR register.\n");
+		use_cyclone = 0;
+		return -ENODEV;
+	}
+	base = readq(reg);
+	if(!base){
+		printk(KERN_ERR "Summit chipset: Could not find valid CBAR value.\n");
+		use_cyclone = 0;
+		return -ENODEV;
+	}
+	iounmap(reg);
+
+	/* setup PMCC */
+	offset = (base + CYCLONE_PMCC_OFFSET);
+	reg = (u64*)ioremap_nocache(offset, sizeof(u64));
+	if(!reg){
+		printk(KERN_ERR "Summit chipset: Could not find valid PMCC register.\n");
+		use_cyclone = 0;
+		return -ENODEV;
+	}
+	writel(0x00000001,reg);
+	iounmap(reg);
+
+	/* setup MPCS */
+	offset = (base + CYCLONE_MPCS_OFFSET);
+	reg = (u64*)ioremap_nocache(offset, sizeof(u64));
+	if(!reg){
+		printk(KERN_ERR "Summit chipset: Could not find valid MPCS register.\n");
+		use_cyclone = 0;
+		return -ENODEV;
+	}
+	writel(0x00000001,reg);
+	iounmap(reg);
+
+	/* map in cyclone_timer */
+	offset = (base + CYCLONE_MPMC_OFFSET);
+	cyclone_timer = (u32*)ioremap_nocache(offset, sizeof(u32));
+	if(!cyclone_timer){
+		printk(KERN_ERR "Summit chipset: Could not find valid MPMC register.\n");
+		use_cyclone = 0;
+		return -ENODEV;
+	}
+
+	/*quick test to make sure its ticking*/
+	for(i=0; i<3; i++){
+		u32 old = readl(cyclone_timer);
+		int stall = 100;
+		while(stall--) barrier();
+		if(readl(cyclone_timer) == old){
+			printk(KERN_ERR "Summit chipset: Counter not counting! DISABLED\n");
+			iounmap(cyclone_timer);
+			cyclone_timer = 0;
+			use_cyclone = 0;
+			return -ENODEV;
+		}
+	}
+	/* initialize last tick */
+	cyclone_interpolator.addr = cyclone_timer;
+	register_time_interpolator(&cyclone_interpolator);
+
+	return 0;
+}
+
+__initcall(init_cyclone_clock);
--- a/arch/ia64/kernel/domain.c
+++ b/arch/ia64/kernel/domain.c
@@ -0,0 +1,382 @@
+/*
+ * arch/ia64/kernel/domain.c
+ * Architecture specific sched-domains builder.
+ *
+ * Copyright (C) 2004 Jesse Barnes
+ * Copyright (C) 2004 Silicon Graphics, Inc.
+ */
+
+#include <linux/sched.h>
+#include <linux/percpu.h>
+#include <linux/slab.h>
+#include <linux/cpumask.h>
+#include <linux/init.h>
+#include <linux/topology.h>
+#include <linux/nodemask.h>
+
+#define SD_NODES_PER_DOMAIN 6
+
+#ifdef CONFIG_NUMA
+/**
+ * find_next_best_node - find the next node to include in a sched_domain
+ * @node: node whose sched_domain we're building
+ * @used_nodes: nodes already in the sched_domain
+ *
+ * Find the next node to include in a given scheduling domain.  Simply
+ * finds the closest node not already in the @used_nodes map.
+ *
+ * Should use nodemask_t.
+ */
+static int __devinit find_next_best_node(int node, unsigned long *used_nodes)
+{
+	int i, n, val, min_val, best_node = 0;
+
+	min_val = INT_MAX;
+
+	for (i = 0; i < MAX_NUMNODES; i++) {
+		/* Start at @node */
+		n = (node + i) % MAX_NUMNODES;
+
+		if (!nr_cpus_node(n))
+			continue;
+
+		/* Skip already used nodes */
+		if (test_bit(n, used_nodes))
+			continue;
+
+		/* Simple min distance search */
+		val = node_distance(node, n);
+
+		if (val < min_val) {
+			min_val = val;
+			best_node = n;
+		}
+	}
+
+	set_bit(best_node, used_nodes);
+	return best_node;
+}
+
+/**
+ * sched_domain_node_span - get a cpumask for a node's sched_domain
+ * @node: node whose cpumask we're constructing
+ * @size: number of nodes to include in this span
+ *
+ * Given a node, construct a good cpumask for its sched_domain to span.  It
+ * should be one that prevents unnecessary balancing, but also spreads tasks
+ * out optimally.
+ */
+static cpumask_t __devinit sched_domain_node_span(int node)
+{
+	int i;
+	cpumask_t span, nodemask;
+	DECLARE_BITMAP(used_nodes, MAX_NUMNODES);
+
+	cpus_clear(span);
+	bitmap_zero(used_nodes, MAX_NUMNODES);
+
+	nodemask = node_to_cpumask(node);
+	cpus_or(span, span, nodemask);
+	set_bit(node, used_nodes);
+
+	for (i = 1; i < SD_NODES_PER_DOMAIN; i++) {
+		int next_node = find_next_best_node(node, used_nodes);
+		nodemask = node_to_cpumask(next_node);
+		cpus_or(span, span, nodemask);
+	}
+
+	return span;
+}
+#endif
+
+/*
+ * At the moment, CONFIG_SCHED_SMT is never defined, but leave it in so we
+ * can switch it on easily if needed.
+ */
+#ifdef CONFIG_SCHED_SMT
+static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
+static struct sched_group sched_group_cpus[NR_CPUS];
+static int __devinit cpu_to_cpu_group(int cpu)
+{
+	return cpu;
+}
+#endif
+
+static DEFINE_PER_CPU(struct sched_domain, phys_domains);
+static struct sched_group sched_group_phys[NR_CPUS];
+static int __devinit cpu_to_phys_group(int cpu)
+{
+#ifdef CONFIG_SCHED_SMT
+	return first_cpu(cpu_sibling_map[cpu]);
+#else
+	return cpu;
+#endif
+}
+
+#ifdef CONFIG_NUMA
+/*
+ * The init_sched_build_groups can't handle what we want to do with node
+ * groups, so roll our own. Now each node has its own list of groups which
+ * gets dynamically allocated.
+ */
+static DEFINE_PER_CPU(struct sched_domain, node_domains);
+static struct sched_group *sched_group_nodes[MAX_NUMNODES];
+
+static DEFINE_PER_CPU(struct sched_domain, allnodes_domains);
+static struct sched_group sched_group_allnodes[MAX_NUMNODES];
+
+static int __devinit cpu_to_allnodes_group(int cpu)
+{
+	return cpu_to_node(cpu);
+}
+#endif
+
+/*
+ * Set up scheduler domains and groups.  Callers must hold the hotplug lock.
+ */
+void __devinit arch_init_sched_domains(void)
+{
+	int i;
+	cpumask_t cpu_default_map;
+
+	/*
+	 * Setup mask for cpus without special case scheduling requirements.
+	 * For now this just excludes isolated cpus, but could be used to
+	 * exclude other special cases in the future.
+	 */
+	cpus_complement(cpu_default_map, cpu_isolated_map);
+	cpus_and(cpu_default_map, cpu_default_map, cpu_online_map);
+
+	/*
+	 * Set up domains. Isolated domains just stay on the dummy domain.
+	 */
+	for_each_cpu_mask(i, cpu_default_map) {
+		int group;
+		struct sched_domain *sd = NULL, *p;
+		cpumask_t nodemask = node_to_cpumask(cpu_to_node(i));
+
+		cpus_and(nodemask, nodemask, cpu_default_map);
+
+#ifdef CONFIG_NUMA
+		if (num_online_cpus()
+				> SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) {
+			sd = &per_cpu(allnodes_domains, i);
+			*sd = SD_ALLNODES_INIT;
+			sd->span = cpu_default_map;
+			group = cpu_to_allnodes_group(i);
+			sd->groups = &sched_group_allnodes[group];
+			p = sd;
+		} else
+			p = NULL;
+
+		sd = &per_cpu(node_domains, i);
+		*sd = SD_NODE_INIT;
+		sd->span = sched_domain_node_span(cpu_to_node(i));
+		sd->parent = p;
+		cpus_and(sd->span, sd->span, cpu_default_map);
+#endif
+
+		p = sd;
+		sd = &per_cpu(phys_domains, i);
+		group = cpu_to_phys_group(i);
+		*sd = SD_CPU_INIT;
+		sd->span = nodemask;
+		sd->parent = p;
+		sd->groups = &sched_group_phys[group];
+
+#ifdef CONFIG_SCHED_SMT
+		p = sd;
+		sd = &per_cpu(cpu_domains, i);
+		group = cpu_to_cpu_group(i);
+		*sd = SD_SIBLING_INIT;
+		sd->span = cpu_sibling_map[i];
+		cpus_and(sd->span, sd->span, cpu_default_map);
+		sd->parent = p;
+		sd->groups = &sched_group_cpus[group];
+#endif
+	}
+
+#ifdef CONFIG_SCHED_SMT
+	/* Set up CPU (sibling) groups */
+	for_each_cpu_mask(i, cpu_default_map) {
+		cpumask_t this_sibling_map = cpu_sibling_map[i];
+		cpus_and(this_sibling_map, this_sibling_map, cpu_default_map);
+		if (i != first_cpu(this_sibling_map))
+			continue;
+
+		init_sched_build_groups(sched_group_cpus, this_sibling_map,
+						&cpu_to_cpu_group);
+	}
+#endif
+
+	/* Set up physical groups */
+	for (i = 0; i < MAX_NUMNODES; i++) {
+		cpumask_t nodemask = node_to_cpumask(i);
+
+		cpus_and(nodemask, nodemask, cpu_default_map);
+		if (cpus_empty(nodemask))
+			continue;
+
+		init_sched_build_groups(sched_group_phys, nodemask,
+						&cpu_to_phys_group);
+	}
+
+#ifdef CONFIG_NUMA
+	init_sched_build_groups(sched_group_allnodes, cpu_default_map,
+				&cpu_to_allnodes_group);
+
+	for (i = 0; i < MAX_NUMNODES; i++) {
+		/* Set up node groups */
+		struct sched_group *sg, *prev;
+		cpumask_t nodemask = node_to_cpumask(i);
+		cpumask_t domainspan;
+		cpumask_t covered = CPU_MASK_NONE;
+		int j;
+
+		cpus_and(nodemask, nodemask, cpu_default_map);
+		if (cpus_empty(nodemask))
+			continue;
+
+		domainspan = sched_domain_node_span(i);
+		cpus_and(domainspan, domainspan, cpu_default_map);
+
+		sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL);
+		sched_group_nodes[i] = sg;
+		for_each_cpu_mask(j, nodemask) {
+			struct sched_domain *sd;
+			sd = &per_cpu(node_domains, j);
+			sd->groups = sg;
+			if (sd->groups == NULL) {
+				/* Turn off balancing if we have no groups */
+				sd->flags = 0;
+			}
+		}
+		if (!sg) {
+			printk(KERN_WARNING
+			"Can not alloc domain group for node %d\n", i);
+			continue;
+		}
+		sg->cpu_power = 0;
+		sg->cpumask = nodemask;
+		cpus_or(covered, covered, nodemask);
+		prev = sg;
+
+		for (j = 0; j < MAX_NUMNODES; j++) {
+			cpumask_t tmp, notcovered;
+			int n = (i + j) % MAX_NUMNODES;
+
+			cpus_complement(notcovered, covered);
+			cpus_and(tmp, notcovered, cpu_default_map);
+			cpus_and(tmp, tmp, domainspan);
+			if (cpus_empty(tmp))
+				break;
+
+			nodemask = node_to_cpumask(n);
+			cpus_and(tmp, tmp, nodemask);
+			if (cpus_empty(tmp))
+				continue;
+
+			sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL);
+			if (!sg) {
+				printk(KERN_WARNING
+				"Can not alloc domain group for node %d\n", j);
+				break;
+			}
+			sg->cpu_power = 0;
+			sg->cpumask = tmp;
+			cpus_or(covered, covered, tmp);
+			prev->next = sg;
+			prev = sg;
+		}
+		prev->next = sched_group_nodes[i];
+	}
+#endif
+
+	/* Calculate CPU power for physical packages and nodes */
+	for_each_cpu_mask(i, cpu_default_map) {
+		int power;
+		struct sched_domain *sd;
+#ifdef CONFIG_SCHED_SMT
+		sd = &per_cpu(cpu_domains, i);
+		power = SCHED_LOAD_SCALE;
+		sd->groups->cpu_power = power;
+#endif
+
+		sd = &per_cpu(phys_domains, i);
+		power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
+				(cpus_weight(sd->groups->cpumask)-1) / 10;
+		sd->groups->cpu_power = power;
+
+#ifdef CONFIG_NUMA
+		sd = &per_cpu(allnodes_domains, i);
+		if (sd->groups) {
+			power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
+				(cpus_weight(sd->groups->cpumask)-1) / 10;
+			sd->groups->cpu_power = power;
+		}
+#endif
+	}
+
+#ifdef CONFIG_NUMA
+	for (i = 0; i < MAX_NUMNODES; i++) {
+		struct sched_group *sg = sched_group_nodes[i];
+		int j;
+
+		if (sg == NULL)
+			continue;
+next_sg:
+		for_each_cpu_mask(j, sg->cpumask) {
+			struct sched_domain *sd;
+			int power;
+
+			sd = &per_cpu(phys_domains, j);
+			if (j != first_cpu(sd->groups->cpumask)) {
+				/*
+				 * Only add "power" once for each
+				 * physical package.
+				 */
+				continue;
+			}
+			power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
+				(cpus_weight(sd->groups->cpumask)-1) / 10;
+
+			sg->cpu_power += power;
+		}
+		sg = sg->next;
+		if (sg != sched_group_nodes[i])
+			goto next_sg;
+	}
+#endif
+
+	/* Attach the domains */
+	for_each_online_cpu(i) {
+		struct sched_domain *sd;
+#ifdef CONFIG_SCHED_SMT
+		sd = &per_cpu(cpu_domains, i);
+#else
+		sd = &per_cpu(phys_domains, i);
+#endif
+		cpu_attach_domain(sd, i);
+	}
+}
+
+void __devinit arch_destroy_sched_domains(void)
+{
+#ifdef CONFIG_NUMA
+	int i;
+	for (i = 0; i < MAX_NUMNODES; i++) {
+		struct sched_group *oldsg, *sg = sched_group_nodes[i];
+		if (sg == NULL)
+			continue;
+		sg = sg->next;
+next_sg:
+		oldsg = sg;
+		sg = sg->next;
+		kfree(oldsg);
+		if (oldsg != sched_group_nodes[i])
+			goto next_sg;
+		sched_group_nodes[i] = NULL;
+	}
+#endif
+}
+
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -0,0 +1,832 @@
+/*
+ * Extensible Firmware Interface
+ *
+ * Based on Extensible Firmware Interface Specification version 0.9 April 30, 1999
+ *
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 1999-2003 Hewlett-Packard Co.
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ *
+ * All EFI Runtime Services are not implemented yet as EFI only
+ * supports physical mode addressing on SoftSDV. This is to be fixed
+ * in a future version.  --drummond 1999-07-20
+ *
+ * Implemented EFI runtime services and virtual mode calls.  --davidm
+ *
+ * Goutham Rao: <goutham.rao@intel.com>
+ *	Skip non-WB memory and ignore empty memory ranges.
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/time.h>
+#include <linux/efi.h>
+
+#include <asm/io.h>
+#include <asm/kregs.h>
+#include <asm/meminit.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/mca.h>
+
+#define EFI_DEBUG	0
+
+extern efi_status_t efi_call_phys (void *, ...);
+
+struct efi efi;
+EXPORT_SYMBOL(efi);
+static efi_runtime_services_t *runtime;
+static unsigned long mem_limit = ~0UL, max_addr = ~0UL;
+
+#define efi_call_virt(f, args...)	(*(f))(args)
+
+#define STUB_GET_TIME(prefix, adjust_arg)							  \
+static efi_status_t										  \
+prefix##_get_time (efi_time_t *tm, efi_time_cap_t *tc)						  \
+{												  \
+	struct ia64_fpreg fr[6];								  \
+	efi_time_cap_t *atc = NULL;								  \
+	efi_status_t ret;									  \
+												  \
+	if (tc)											  \
+		atc = adjust_arg(tc);								  \
+	ia64_save_scratch_fpregs(fr);								  \
+	ret = efi_call_##prefix((efi_get_time_t *) __va(runtime->get_time), adjust_arg(tm), atc); \
+	ia64_load_scratch_fpregs(fr);								  \
+	return ret;										  \
+}
+
+#define STUB_SET_TIME(prefix, adjust_arg)							\
+static efi_status_t										\
+prefix##_set_time (efi_time_t *tm)								\
+{												\
+	struct ia64_fpreg fr[6];								\
+	efi_status_t ret;									\
+												\
+	ia64_save_scratch_fpregs(fr);								\
+	ret = efi_call_##prefix((efi_set_time_t *) __va(runtime->set_time), adjust_arg(tm));	\
+	ia64_load_scratch_fpregs(fr);								\
+	return ret;										\
+}
+
+#define STUB_GET_WAKEUP_TIME(prefix, adjust_arg)						\
+static efi_status_t										\
+prefix##_get_wakeup_time (efi_bool_t *enabled, efi_bool_t *pending, efi_time_t *tm)		\
+{												\
+	struct ia64_fpreg fr[6];								\
+	efi_status_t ret;									\
+												\
+	ia64_save_scratch_fpregs(fr);								\
+	ret = efi_call_##prefix((efi_get_wakeup_time_t *) __va(runtime->get_wakeup_time),	\
+				adjust_arg(enabled), adjust_arg(pending), adjust_arg(tm));	\
+	ia64_load_scratch_fpregs(fr);								\
+	return ret;										\
+}
+
+#define STUB_SET_WAKEUP_TIME(prefix, adjust_arg)						\
+static efi_status_t										\
+prefix##_set_wakeup_time (efi_bool_t enabled, efi_time_t *tm)					\
+{												\
+	struct ia64_fpreg fr[6];								\
+	efi_time_t *atm = NULL;									\
+	efi_status_t ret;									\
+												\
+	if (tm)											\
+		atm = adjust_arg(tm);								\
+	ia64_save_scratch_fpregs(fr);								\
+	ret = efi_call_##prefix((efi_set_wakeup_time_t *) __va(runtime->set_wakeup_time),	\
+				enabled, atm);							\
+	ia64_load_scratch_fpregs(fr);								\
+	return ret;										\
+}
+
+#define STUB_GET_VARIABLE(prefix, adjust_arg)						\
+static efi_status_t									\
+prefix##_get_variable (efi_char16_t *name, efi_guid_t *vendor, u32 *attr,		\
+		       unsigned long *data_size, void *data)				\
+{											\
+	struct ia64_fpreg fr[6];							\
+	u32 *aattr = NULL;									\
+	efi_status_t ret;								\
+											\
+	if (attr)									\
+		aattr = adjust_arg(attr);						\
+	ia64_save_scratch_fpregs(fr);							\
+	ret = efi_call_##prefix((efi_get_variable_t *) __va(runtime->get_variable),	\
+				adjust_arg(name), adjust_arg(vendor), aattr,		\
+				adjust_arg(data_size), adjust_arg(data));		\
+	ia64_load_scratch_fpregs(fr);							\
+	return ret;									\
+}
+
+#define STUB_GET_NEXT_VARIABLE(prefix, adjust_arg)						\
+static efi_status_t										\
+prefix##_get_next_variable (unsigned long *name_size, efi_char16_t *name, efi_guid_t *vendor)	\
+{												\
+	struct ia64_fpreg fr[6];								\
+	efi_status_t ret;									\
+												\
+	ia64_save_scratch_fpregs(fr);								\
+	ret = efi_call_##prefix((efi_get_next_variable_t *) __va(runtime->get_next_variable),	\
+				adjust_arg(name_size), adjust_arg(name), adjust_arg(vendor));	\
+	ia64_load_scratch_fpregs(fr);								\
+	return ret;										\
+}
+
+#define STUB_SET_VARIABLE(prefix, adjust_arg)						\
+static efi_status_t									\
+prefix##_set_variable (efi_char16_t *name, efi_guid_t *vendor, unsigned long attr,	\
+		       unsigned long data_size, void *data)				\
+{											\
+	struct ia64_fpreg fr[6];							\
+	efi_status_t ret;								\
+											\
+	ia64_save_scratch_fpregs(fr);							\
+	ret = efi_call_##prefix((efi_set_variable_t *) __va(runtime->set_variable),	\
+				adjust_arg(name), adjust_arg(vendor), attr, data_size,	\
+				adjust_arg(data));					\
+	ia64_load_scratch_fpregs(fr);							\
+	return ret;									\
+}
+
+#define STUB_GET_NEXT_HIGH_MONO_COUNT(prefix, adjust_arg)					\
+static efi_status_t										\
+prefix##_get_next_high_mono_count (u32 *count)							\
+{												\
+	struct ia64_fpreg fr[6];								\
+	efi_status_t ret;									\
+												\
+	ia64_save_scratch_fpregs(fr);								\
+	ret = efi_call_##prefix((efi_get_next_high_mono_count_t *)				\
+				__va(runtime->get_next_high_mono_count), adjust_arg(count));	\
+	ia64_load_scratch_fpregs(fr);								\
+	return ret;										\
+}
+
+#define STUB_RESET_SYSTEM(prefix, adjust_arg)					\
+static void									\
+prefix##_reset_system (int reset_type, efi_status_t status,			\
+		       unsigned long data_size, efi_char16_t *data)		\
+{										\
+	struct ia64_fpreg fr[6];						\
+	efi_char16_t *adata = NULL;						\
+										\
+	if (data)								\
+		adata = adjust_arg(data);					\
+										\
+	ia64_save_scratch_fpregs(fr);						\
+	efi_call_##prefix((efi_reset_system_t *) __va(runtime->reset_system),	\
+			  reset_type, status, data_size, adata);		\
+	/* should not return, but just in case... */				\
+	ia64_load_scratch_fpregs(fr);						\
+}
+
+#define phys_ptr(arg)	((__typeof__(arg)) ia64_tpa(arg))
+
+STUB_GET_TIME(phys, phys_ptr)
+STUB_SET_TIME(phys, phys_ptr)
+STUB_GET_WAKEUP_TIME(phys, phys_ptr)
+STUB_SET_WAKEUP_TIME(phys, phys_ptr)
+STUB_GET_VARIABLE(phys, phys_ptr)
+STUB_GET_NEXT_VARIABLE(phys, phys_ptr)
+STUB_SET_VARIABLE(phys, phys_ptr)
+STUB_GET_NEXT_HIGH_MONO_COUNT(phys, phys_ptr)
+STUB_RESET_SYSTEM(phys, phys_ptr)
+
+#define id(arg)	arg
+
+STUB_GET_TIME(virt, id)
+STUB_SET_TIME(virt, id)
+STUB_GET_WAKEUP_TIME(virt, id)
+STUB_SET_WAKEUP_TIME(virt, id)
+STUB_GET_VARIABLE(virt, id)
+STUB_GET_NEXT_VARIABLE(virt, id)
+STUB_SET_VARIABLE(virt, id)
+STUB_GET_NEXT_HIGH_MONO_COUNT(virt, id)
+STUB_RESET_SYSTEM(virt, id)
+
+void
+efi_gettimeofday (struct timespec *ts)
+{
+	efi_time_t tm;
+
+	memset(ts, 0, sizeof(ts));
+	if ((*efi.get_time)(&tm, NULL) != EFI_SUCCESS)
+		return;
+
+	ts->tv_sec = mktime(tm.year, tm.month, tm.day, tm.hour, tm.minute, tm.second);
+	ts->tv_nsec = tm.nanosecond;
+}
+
+static int
+is_available_memory (efi_memory_desc_t *md)
+{
+	if (!(md->attribute & EFI_MEMORY_WB))
+		return 0;
+
+	switch (md->type) {
+	      case EFI_LOADER_CODE:
+	      case EFI_LOADER_DATA:
+	      case EFI_BOOT_SERVICES_CODE:
+	      case EFI_BOOT_SERVICES_DATA:
+	      case EFI_CONVENTIONAL_MEMORY:
+		return 1;
+	}
+	return 0;
+}
+
+/*
+ * Trim descriptor MD so its starts at address START_ADDR.  If the descriptor covers
+ * memory that is normally available to the kernel, issue a warning that some memory
+ * is being ignored.
+ */
+static void
+trim_bottom (efi_memory_desc_t *md, u64 start_addr)
+{
+	u64 num_skipped_pages;
+
+	if (md->phys_addr >= start_addr || !md->num_pages)
+		return;
+
+	num_skipped_pages = (start_addr - md->phys_addr) >> EFI_PAGE_SHIFT;
+	if (num_skipped_pages > md->num_pages)
+		num_skipped_pages = md->num_pages;
+
+	if (is_available_memory(md))
+		printk(KERN_NOTICE "efi.%s: ignoring %luKB of memory at 0x%lx due to granule hole "
+		       "at 0x%lx\n", __FUNCTION__,
+		       (num_skipped_pages << EFI_PAGE_SHIFT) >> 10,
+		       md->phys_addr, start_addr - IA64_GRANULE_SIZE);
+	/*
+	 * NOTE: Don't set md->phys_addr to START_ADDR because that could cause the memory
+	 * descriptor list to become unsorted.  In such a case, md->num_pages will be
+	 * zero, so the Right Thing will happen.
+	 */
+	md->phys_addr += num_skipped_pages << EFI_PAGE_SHIFT;
+	md->num_pages -= num_skipped_pages;
+}
+
+static void
+trim_top (efi_memory_desc_t *md, u64 end_addr)
+{
+	u64 num_dropped_pages, md_end_addr;
+
+	md_end_addr = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
+
+	if (md_end_addr <= end_addr || !md->num_pages)
+		return;
+
+	num_dropped_pages = (md_end_addr - end_addr) >> EFI_PAGE_SHIFT;
+	if (num_dropped_pages > md->num_pages)
+		num_dropped_pages = md->num_pages;
+
+	if (is_available_memory(md))
+		printk(KERN_NOTICE "efi.%s: ignoring %luKB of memory at 0x%lx due to granule hole "
+		       "at 0x%lx\n", __FUNCTION__,
+		       (num_dropped_pages << EFI_PAGE_SHIFT) >> 10,
+		       md->phys_addr, end_addr);
+	md->num_pages -= num_dropped_pages;
+}
+
+/*
+ * Walks the EFI memory map and calls CALLBACK once for each EFI memory descriptor that
+ * has memory that is available for OS use.
+ */
+void
+efi_memmap_walk (efi_freemem_callback_t callback, void *arg)
+{
+	int prev_valid = 0;
+	struct range {
+		u64 start;
+		u64 end;
+	} prev, curr;
+	void *efi_map_start, *efi_map_end, *p, *q;
+	efi_memory_desc_t *md, *check_md;
+	u64 efi_desc_size, start, end, granule_addr, last_granule_addr, first_non_wb_addr = 0;
+	unsigned long total_mem = 0;
+
+	efi_map_start = __va(ia64_boot_param->efi_memmap);
+	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
+	efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+	for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+		md = p;
+
+		/* skip over non-WB memory descriptors; that's all we're interested in... */
+		if (!(md->attribute & EFI_MEMORY_WB))
+			continue;
+
+		/*
+		 * granule_addr is the base of md's first granule.
+		 * [granule_addr - first_non_wb_addr) is guaranteed to
+		 * be contiguous WB memory.
+		 */
+		granule_addr = GRANULEROUNDDOWN(md->phys_addr);
+		first_non_wb_addr = max(first_non_wb_addr, granule_addr);
+
+		if (first_non_wb_addr < md->phys_addr) {
+			trim_bottom(md, granule_addr + IA64_GRANULE_SIZE);
+			granule_addr = GRANULEROUNDDOWN(md->phys_addr);
+			first_non_wb_addr = max(first_non_wb_addr, granule_addr);
+		}
+
+		for (q = p; q < efi_map_end; q += efi_desc_size) {
+			check_md = q;
+
+			if ((check_md->attribute & EFI_MEMORY_WB) &&
+			    (check_md->phys_addr == first_non_wb_addr))
+				first_non_wb_addr += check_md->num_pages << EFI_PAGE_SHIFT;
+			else
+				break;		/* non-WB or hole */
+		}
+
+		last_granule_addr = GRANULEROUNDDOWN(first_non_wb_addr);
+		if (last_granule_addr < md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT))
+			trim_top(md, last_granule_addr);
+
+		if (is_available_memory(md)) {
+			if (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) >= max_addr) {
+				if (md->phys_addr >= max_addr)
+					continue;
+				md->num_pages = (max_addr - md->phys_addr) >> EFI_PAGE_SHIFT;
+				first_non_wb_addr = max_addr;
+			}
+
+			if (total_mem >= mem_limit)
+				continue;
+
+			if (total_mem + (md->num_pages << EFI_PAGE_SHIFT) > mem_limit) {
+				unsigned long limit_addr = md->phys_addr;
+
+				limit_addr += mem_limit - total_mem;
+				limit_addr = GRANULEROUNDDOWN(limit_addr);
+
+				if (md->phys_addr > limit_addr)
+					continue;
+
+				md->num_pages = (limit_addr - md->phys_addr) >>
+				                EFI_PAGE_SHIFT;
+				first_non_wb_addr = max_addr = md->phys_addr +
+				              (md->num_pages << EFI_PAGE_SHIFT);
+			}
+			total_mem += (md->num_pages << EFI_PAGE_SHIFT);
+
+			if (md->num_pages == 0)
+				continue;
+
+			curr.start = PAGE_OFFSET + md->phys_addr;
+			curr.end   = curr.start + (md->num_pages << EFI_PAGE_SHIFT);
+
+			if (!prev_valid) {
+				prev = curr;
+				prev_valid = 1;
+			} else {
+				if (curr.start < prev.start)
+					printk(KERN_ERR "Oops: EFI memory table not ordered!\n");
+
+				if (prev.end == curr.start) {
+					/* merge two consecutive memory ranges */
+					prev.end = curr.end;
+				} else {
+					start = PAGE_ALIGN(prev.start);
+					end = prev.end & PAGE_MASK;
+					if ((end > start) && (*callback)(start, end, arg) < 0)
+						return;
+					prev = curr;
+				}
+			}
+		}
+	}
+	if (prev_valid) {
+		start = PAGE_ALIGN(prev.start);
+		end = prev.end & PAGE_MASK;
+		if (end > start)
+			(*callback)(start, end, arg);
+	}
+}
+
+/*
+ * Look for the PAL_CODE region reported by EFI and maps it using an
+ * ITR to enable safe PAL calls in virtual mode.  See IA-64 Processor
+ * Abstraction Layer chapter 11 in ADAG
+ */
+
+void *
+efi_get_pal_addr (void)
+{
+	void *efi_map_start, *efi_map_end, *p;
+	efi_memory_desc_t *md;
+	u64 efi_desc_size;
+	int pal_code_count = 0;
+	u64 vaddr, mask;
+
+	efi_map_start = __va(ia64_boot_param->efi_memmap);
+	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
+	efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+	for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+		md = p;
+		if (md->type != EFI_PAL_CODE)
+			continue;
+
+		if (++pal_code_count > 1) {
+			printk(KERN_ERR "Too many EFI Pal Code memory ranges, dropped @ %lx\n",
+			       md->phys_addr);
+			continue;
+		}
+		/*
+		 * The only ITLB entry in region 7 that is used is the one installed by
+		 * __start().  That entry covers a 64MB range.
+		 */
+		mask  = ~((1 << KERNEL_TR_PAGE_SHIFT) - 1);
+		vaddr = PAGE_OFFSET + md->phys_addr;
+
+		/*
+		 * We must check that the PAL mapping won't overlap with the kernel
+		 * mapping.
+		 *
+		 * PAL code is guaranteed to be aligned on a power of 2 between 4k and
+		 * 256KB and that only one ITR is needed to map it. This implies that the
+		 * PAL code is always aligned on its size, i.e., the closest matching page
+		 * size supported by the TLB. Therefore PAL code is guaranteed never to
+		 * cross a 64MB unless it is bigger than 64MB (very unlikely!).  So for
+		 * now the following test is enough to determine whether or not we need a
+		 * dedicated ITR for the PAL code.
+		 */
+		if ((vaddr & mask) == (KERNEL_START & mask)) {
+			printk(KERN_INFO "%s: no need to install ITR for PAL code\n",
+			       __FUNCTION__);
+			continue;
+		}
+
+		if (md->num_pages << EFI_PAGE_SHIFT > IA64_GRANULE_SIZE)
+			panic("Woah!  PAL code size bigger than a granule!");
+
+#if EFI_DEBUG
+		mask  = ~((1 << IA64_GRANULE_SHIFT) - 1);
+
+		printk(KERN_INFO "CPU %d: mapping PAL code [0x%lx-0x%lx) into [0x%lx-0x%lx)\n",
+			smp_processor_id(), md->phys_addr,
+			md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
+			vaddr & mask, (vaddr & mask) + IA64_GRANULE_SIZE);
+#endif
+		return __va(md->phys_addr);
+	}
+	printk(KERN_WARNING "%s: no PAL-code memory-descriptor found",
+	       __FUNCTION__);
+	return NULL;
+}
+
+void
+efi_map_pal_code (void)
+{
+	void *pal_vaddr = efi_get_pal_addr ();
+	u64 psr;
+
+	if (!pal_vaddr)
+		return;
+
+	/*
+	 * Cannot write to CRx with PSR.ic=1
+	 */
+	psr = ia64_clear_ic();
+	ia64_itr(0x1, IA64_TR_PALCODE, GRANULEROUNDDOWN((unsigned long) pal_vaddr),
+		 pte_val(pfn_pte(__pa(pal_vaddr) >> PAGE_SHIFT, PAGE_KERNEL)),
+		 IA64_GRANULE_SHIFT);
+	ia64_set_psr(psr);		/* restore psr */
+	ia64_srlz_i();
+}
+
+void __init
+efi_init (void)
+{
+	void *efi_map_start, *efi_map_end;
+	efi_config_table_t *config_tables;
+	efi_char16_t *c16;
+	u64 efi_desc_size;
+	char *cp, *end, vendor[100] = "unknown";
+	extern char saved_command_line[];
+	int i;
+
+	/* it's too early to be able to use the standard kernel command line support... */
+	for (cp = saved_command_line; *cp; ) {
+		if (memcmp(cp, "mem=", 4) == 0) {
+			cp += 4;
+			mem_limit = memparse(cp, &end);
+			if (end != cp)
+				break;
+			cp = end;
+		} else if (memcmp(cp, "max_addr=", 9) == 0) {
+			cp += 9;
+			max_addr = GRANULEROUNDDOWN(memparse(cp, &end));
+			if (end != cp)
+				break;
+			cp = end;
+		} else {
+			while (*cp != ' ' && *cp)
+				++cp;
+			while (*cp == ' ')
+				++cp;
+		}
+	}
+	if (max_addr != ~0UL)
+		printk(KERN_INFO "Ignoring memory above %luMB\n", max_addr >> 20);
+
+	efi.systab = __va(ia64_boot_param->efi_systab);
+
+	/*
+	 * Verify the EFI Table
+	 */
+	if (efi.systab == NULL)
+		panic("Woah! Can't find EFI system table.\n");
+	if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
+		panic("Woah! EFI system table signature incorrect\n");
+	if ((efi.systab->hdr.revision ^ EFI_SYSTEM_TABLE_REVISION) >> 16 != 0)
+		printk(KERN_WARNING "Warning: EFI system table major version mismatch: "
+		       "got %d.%02d, expected %d.%02d\n",
+		       efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff,
+		       EFI_SYSTEM_TABLE_REVISION >> 16, EFI_SYSTEM_TABLE_REVISION & 0xffff);
+
+	config_tables = __va(efi.systab->tables);
+
+	/* Show what we know for posterity */
+	c16 = __va(efi.systab->fw_vendor);
+	if (c16) {
+		for (i = 0;i < (int) sizeof(vendor) && *c16; ++i)
+			vendor[i] = *c16++;
+		vendor[i] = '\0';
+	}
+
+	printk(KERN_INFO "EFI v%u.%.02u by %s:",
+	       efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff, vendor);
+
+	for (i = 0; i < (int) efi.systab->nr_tables; i++) {
+		if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) {
+			efi.mps = __va(config_tables[i].table);
+			printk(" MPS=0x%lx", config_tables[i].table);
+		} else if (efi_guidcmp(config_tables[i].guid, ACPI_20_TABLE_GUID) == 0) {
+			efi.acpi20 = __va(config_tables[i].table);
+			printk(" ACPI 2.0=0x%lx", config_tables[i].table);
+		} else if (efi_guidcmp(config_tables[i].guid, ACPI_TABLE_GUID) == 0) {
+			efi.acpi = __va(config_tables[i].table);
+			printk(" ACPI=0x%lx", config_tables[i].table);
+		} else if (efi_guidcmp(config_tables[i].guid, SMBIOS_TABLE_GUID) == 0) {
+			efi.smbios = __va(config_tables[i].table);
+			printk(" SMBIOS=0x%lx", config_tables[i].table);
+		} else if (efi_guidcmp(config_tables[i].guid, SAL_SYSTEM_TABLE_GUID) == 0) {
+			efi.sal_systab = __va(config_tables[i].table);
+			printk(" SALsystab=0x%lx", config_tables[i].table);
+		} else if (efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID) == 0) {
+			efi.hcdp = __va(config_tables[i].table);
+			printk(" HCDP=0x%lx", config_tables[i].table);
+		}
+	}
+	printk("\n");
+
+	runtime = __va(efi.systab->runtime);
+	efi.get_time = phys_get_time;
+	efi.set_time = phys_set_time;
+	efi.get_wakeup_time = phys_get_wakeup_time;
+	efi.set_wakeup_time = phys_set_wakeup_time;
+	efi.get_variable = phys_get_variable;
+	efi.get_next_variable = phys_get_next_variable;
+	efi.set_variable = phys_set_variable;
+	efi.get_next_high_mono_count = phys_get_next_high_mono_count;
+	efi.reset_system = phys_reset_system;
+
+	efi_map_start = __va(ia64_boot_param->efi_memmap);
+	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
+	efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+#if EFI_DEBUG
+	/* print EFI memory map: */
+	{
+		efi_memory_desc_t *md;
+		void *p;
+
+		for (i = 0, p = efi_map_start; p < efi_map_end; ++i, p += efi_desc_size) {
+			md = p;
+			printk("mem%02u: type=%u, attr=0x%lx, range=[0x%016lx-0x%016lx) (%luMB)\n",
+			       i, md->type, md->attribute, md->phys_addr,
+			       md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
+			       md->num_pages >> (20 - EFI_PAGE_SHIFT));
+		}
+	}
+#endif
+
+	efi_map_pal_code();
+	efi_enter_virtual_mode();
+}
+
+void
+efi_enter_virtual_mode (void)
+{
+	void *efi_map_start, *efi_map_end, *p;
+	efi_memory_desc_t *md;
+	efi_status_t status;
+	u64 efi_desc_size;
+
+	efi_map_start = __va(ia64_boot_param->efi_memmap);
+	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
+	efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+	for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+		md = p;
+		if (md->attribute & EFI_MEMORY_RUNTIME) {
+			/*
+			 * Some descriptors have multiple bits set, so the order of
+			 * the tests is relevant.
+			 */
+			if (md->attribute & EFI_MEMORY_WB) {
+				md->virt_addr = (u64) __va(md->phys_addr);
+			} else if (md->attribute & EFI_MEMORY_UC) {
+				md->virt_addr = (u64) ioremap(md->phys_addr, 0);
+			} else if (md->attribute & EFI_MEMORY_WC) {
+#if 0
+				md->virt_addr = ia64_remap(md->phys_addr, (_PAGE_A | _PAGE_P
+									   | _PAGE_D
+									   | _PAGE_MA_WC
+									   | _PAGE_PL_0
+									   | _PAGE_AR_RW));
+#else
+				printk(KERN_INFO "EFI_MEMORY_WC mapping\n");
+				md->virt_addr = (u64) ioremap(md->phys_addr, 0);
+#endif
+			} else if (md->attribute & EFI_MEMORY_WT) {
+#if 0
+				md->virt_addr = ia64_remap(md->phys_addr, (_PAGE_A | _PAGE_P
+									   | _PAGE_D | _PAGE_MA_WT
+									   | _PAGE_PL_0
+									   | _PAGE_AR_RW));
+#else
+				printk(KERN_INFO "EFI_MEMORY_WT mapping\n");
+				md->virt_addr = (u64) ioremap(md->phys_addr, 0);
+#endif
+			}
+		}
+	}
+
+	status = efi_call_phys(__va(runtime->set_virtual_address_map),
+			       ia64_boot_param->efi_memmap_size,
+			       efi_desc_size, ia64_boot_param->efi_memdesc_version,
+			       ia64_boot_param->efi_memmap);
+	if (status != EFI_SUCCESS) {
+		printk(KERN_WARNING "warning: unable to switch EFI into virtual mode "
+		       "(status=%lu)\n", status);
+		return;
+	}
+
+	/*
+	 * Now that EFI is in virtual mode, we call the EFI functions more efficiently:
+	 */
+	efi.get_time = virt_get_time;
+	efi.set_time = virt_set_time;
+	efi.get_wakeup_time = virt_get_wakeup_time;
+	efi.set_wakeup_time = virt_set_wakeup_time;
+	efi.get_variable = virt_get_variable;
+	efi.get_next_variable = virt_get_next_variable;
+	efi.set_variable = virt_set_variable;
+	efi.get_next_high_mono_count = virt_get_next_high_mono_count;
+	efi.reset_system = virt_reset_system;
+}
+
+/*
+ * Walk the EFI memory map looking for the I/O port range.  There can only be one entry of
+ * this type, other I/O port ranges should be described via ACPI.
+ */
+u64
+efi_get_iobase (void)
+{
+	void *efi_map_start, *efi_map_end, *p;
+	efi_memory_desc_t *md;
+	u64 efi_desc_size;
+
+	efi_map_start = __va(ia64_boot_param->efi_memmap);
+	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
+	efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+	for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+		md = p;
+		if (md->type == EFI_MEMORY_MAPPED_IO_PORT_SPACE) {
+			if (md->attribute & EFI_MEMORY_UC)
+				return md->phys_addr;
+		}
+	}
+	return 0;
+}
+
+u32
+efi_mem_type (unsigned long phys_addr)
+{
+	void *efi_map_start, *efi_map_end, *p;
+	efi_memory_desc_t *md;
+	u64 efi_desc_size;
+
+	efi_map_start = __va(ia64_boot_param->efi_memmap);
+	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
+	efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+	for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+		md = p;
+
+		if (phys_addr - md->phys_addr < (md->num_pages << EFI_PAGE_SHIFT))
+			 return md->type;
+	}
+	return 0;
+}
+
+u64
+efi_mem_attributes (unsigned long phys_addr)
+{
+	void *efi_map_start, *efi_map_end, *p;
+	efi_memory_desc_t *md;
+	u64 efi_desc_size;
+
+	efi_map_start = __va(ia64_boot_param->efi_memmap);
+	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
+	efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+	for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+		md = p;
+
+		if (phys_addr - md->phys_addr < (md->num_pages << EFI_PAGE_SHIFT))
+			return md->attribute;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(efi_mem_attributes);
+
+int
+valid_phys_addr_range (unsigned long phys_addr, unsigned long *size)
+{
+	void *efi_map_start, *efi_map_end, *p;
+	efi_memory_desc_t *md;
+	u64 efi_desc_size;
+
+	efi_map_start = __va(ia64_boot_param->efi_memmap);
+	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
+	efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+	for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+		md = p;
+
+		if (phys_addr - md->phys_addr < (md->num_pages << EFI_PAGE_SHIFT)) {
+			if (!(md->attribute & EFI_MEMORY_WB))
+				return 0;
+
+			if (*size > md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - phys_addr)
+				*size = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - phys_addr;
+			return 1;
+		}
+	}
+	return 0;
+}
+
+int __init
+efi_uart_console_only(void)
+{
+	efi_status_t status;
+	char *s, name[] = "ConOut";
+	efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
+	efi_char16_t *utf16, name_utf16[32];
+	unsigned char data[1024];
+	unsigned long size = sizeof(data);
+	struct efi_generic_dev_path *hdr, *end_addr;
+	int uart = 0;
+
+	/* Convert to UTF-16 */
+	utf16 = name_utf16;
+	s = name;
+	while (*s)
+		*utf16++ = *s++ & 0x7f;
+	*utf16 = 0;
+
+	status = efi.get_variable(name_utf16, &guid, NULL, &size, data);
+	if (status != EFI_SUCCESS) {
+		printk(KERN_ERR "No EFI %s variable?\n", name);
+		return 0;
+	}
+
+	hdr = (struct efi_generic_dev_path *) data;
+	end_addr = (struct efi_generic_dev_path *) ((u8 *) data + size);
+	while (hdr < end_addr) {
+		if (hdr->type == EFI_DEV_MSG &&
+		    hdr->sub_type == EFI_DEV_MSG_UART)
+			uart = 1;
+		else if (hdr->type == EFI_DEV_END_PATH ||
+			  hdr->type == EFI_DEV_END_PATH2) {
+			if (!uart)
+				return 0;
+			if (hdr->sub_type == EFI_DEV_END_ENTIRE)
+				return 1;
+			uart = 0;
+		}
+		hdr = (struct efi_generic_dev_path *) ((u8 *) hdr + hdr->length);
+	}
+	printk(KERN_ERR "Malformed %s value\n", name);
+	return 0;
+}
--- a/arch/ia64/kernel/efi_stub.S
+++ b/arch/ia64/kernel/efi_stub.S
@@ -0,0 +1,86 @@
+/*
+ * EFI call stub.
+ *
+ * Copyright (C) 1999-2001 Hewlett-Packard Co
+ *	David Mosberger <davidm@hpl.hp.com>
+ *
+ * This stub allows us to make EFI calls in physical mode with interrupts
+ * turned off.  We need this because we can't call SetVirtualMap() until
+ * the kernel has booted far enough to allow allocation of struct vma_struct
+ * entries (which we would need to map stuff with memory attributes other
+ * than uncached or writeback...).  Since the GetTime() service gets called
+ * earlier than that, we need to be able to make physical mode EFI calls from
+ * the kernel.
+ */
+
+/*
+ * PSR settings as per SAL spec (Chapter 8 in the "IA-64 System
+ * Abstraction Layer Specification", revision 2.6e).  Note that
+ * psr.dfl and psr.dfh MUST be cleared, despite what this manual says.
+ * Otherwise, SAL dies whenever it's trying to do an IA-32 BIOS call
+ * (the br.ia instruction fails unless psr.dfl and psr.dfh are
+ * cleared).  Fortunately, SAL promises not to touch the floating
+ * point regs, so at least we don't have to save f2-f127.
+ */
+#define PSR_BITS_TO_CLEAR						\
+	(IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_RT |		\
+	 IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED |	\
+	 IA64_PSR_DFL | IA64_PSR_DFH)
+
+#define PSR_BITS_TO_SET							\
+	(IA64_PSR_BN)
+
+#include <asm/processor.h>
+#include <asm/asmmacro.h>
+
+/*
+ * Inputs:
+ *	in0 = address of function descriptor of EFI routine to call
+ *	in1..in7 = arguments to routine
+ *
+ * Outputs:
+ *	r8 = EFI_STATUS returned by called function
+ */
+
+GLOBAL_ENTRY(efi_call_phys)
+	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
+	alloc loc1=ar.pfs,8,7,7,0
+	ld8 r2=[in0],8			// load EFI function's entry point
+	mov loc0=rp
+	.body
+	;;
+	mov loc2=gp			// save global pointer
+	mov loc4=ar.rsc			// save RSE configuration
+	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
+	;;
+	ld8 gp=[in0]			// load EFI function's global pointer
+	movl r16=PSR_BITS_TO_CLEAR
+	mov loc3=psr			// save processor status word
+	movl r17=PSR_BITS_TO_SET
+	;;
+	or loc3=loc3,r17
+	mov b6=r2
+	;;
+	andcm r16=loc3,r16		// get psr with IT, DT, and RT bits cleared
+	br.call.sptk.many rp=ia64_switch_mode_phys
+.ret0:	mov out4=in5
+	mov out0=in1
+	mov out1=in2
+	mov out2=in3
+	mov out3=in4
+	mov out5=in6
+	mov out6=in7
+	mov loc5=r19
+	mov loc6=r20
+	br.call.sptk.many rp=b6		// call the EFI function
+.ret1:	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
+	mov r16=loc3
+	mov r19=loc5
+	mov r20=loc6
+	br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode
+.ret2:	mov ar.rsc=loc4			// restore RSE configuration
+	mov ar.pfs=loc1
+	mov rp=loc0
+	mov gp=loc2
+	br.ret.sptk.many rp
+END(efi_call_phys)
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
--- a/arch/ia64/kernel/entry.h
+++ b/arch/ia64/kernel/entry.h
@@ -0,0 +1,82 @@
+#include <linux/config.h>
+
+/*
+ * Preserved registers that are shared between code in ivt.S and
+ * entry.S.  Be careful not to step on these!
+ */
+#define PRED_LEAVE_SYSCALL	1 /* TRUE iff leave from syscall */
+#define PRED_KERNEL_STACK	2 /* returning to kernel-stacks? */
+#define PRED_USER_STACK		3 /* returning to user-stacks? */
+#define PRED_SYSCALL		4 /* inside a system call? */
+#define PRED_NON_SYSCALL	5 /* complement of PRED_SYSCALL */
+
+#ifdef __ASSEMBLY__
+# define PASTE2(x,y)	x##y
+# define PASTE(x,y)	PASTE2(x,y)
+
+# define pLvSys		PASTE(p,PRED_LEAVE_SYSCALL)
+# define pKStk		PASTE(p,PRED_KERNEL_STACK)
+# define pUStk		PASTE(p,PRED_USER_STACK)
+# define pSys		PASTE(p,PRED_SYSCALL)
+# define pNonSys	PASTE(p,PRED_NON_SYSCALL)
+#endif
+
+#define PT(f)		(IA64_PT_REGS_##f##_OFFSET)
+#define SW(f)		(IA64_SWITCH_STACK_##f##_OFFSET)
+
+#define PT_REGS_SAVES(off)			\
+	.unwabi 3, 'i';				\
+	.fframe IA64_PT_REGS_SIZE+16+(off);	\
+	.spillsp rp, PT(CR_IIP)+16+(off);	\
+	.spillsp ar.pfs, PT(CR_IFS)+16+(off);	\
+	.spillsp ar.unat, PT(AR_UNAT)+16+(off);	\
+	.spillsp ar.fpsr, PT(AR_FPSR)+16+(off);	\
+	.spillsp pr, PT(PR)+16+(off);
+
+#define PT_REGS_UNWIND_INFO(off)		\
+	.prologue;				\
+	PT_REGS_SAVES(off);			\
+	.body
+
+#define SWITCH_STACK_SAVES(off)							\
+	.savesp ar.unat,SW(CALLER_UNAT)+16+(off);				\
+	.savesp ar.fpsr,SW(AR_FPSR)+16+(off);					\
+	.spillsp f2,SW(F2)+16+(off); .spillsp f3,SW(F3)+16+(off);		\
+	.spillsp f4,SW(F4)+16+(off); .spillsp f5,SW(F5)+16+(off);		\
+	.spillsp f16,SW(F16)+16+(off); .spillsp f17,SW(F17)+16+(off);		\
+	.spillsp f18,SW(F18)+16+(off); .spillsp f19,SW(F19)+16+(off);		\
+	.spillsp f20,SW(F20)+16+(off); .spillsp f21,SW(F21)+16+(off);		\
+	.spillsp f22,SW(F22)+16+(off); .spillsp f23,SW(F23)+16+(off);		\
+	.spillsp f24,SW(F24)+16+(off); .spillsp f25,SW(F25)+16+(off);		\
+	.spillsp f26,SW(F26)+16+(off); .spillsp f27,SW(F27)+16+(off);		\
+	.spillsp f28,SW(F28)+16+(off); .spillsp f29,SW(F29)+16+(off);		\
+	.spillsp f30,SW(F30)+16+(off); .spillsp f31,SW(F31)+16+(off);		\
+	.spillsp r4,SW(R4)+16+(off); .spillsp r5,SW(R5)+16+(off);		\
+	.spillsp r6,SW(R6)+16+(off); .spillsp r7,SW(R7)+16+(off);		\
+	.spillsp b0,SW(B0)+16+(off); .spillsp b1,SW(B1)+16+(off);		\
+	.spillsp b2,SW(B2)+16+(off); .spillsp b3,SW(B3)+16+(off);		\
+	.spillsp b4,SW(B4)+16+(off); .spillsp b5,SW(B5)+16+(off);		\
+	.spillsp ar.pfs,SW(AR_PFS)+16+(off); .spillsp ar.lc,SW(AR_LC)+16+(off);	\
+	.spillsp @priunat,SW(AR_UNAT)+16+(off);					\
+	.spillsp ar.rnat,SW(AR_RNAT)+16+(off);					\
+	.spillsp ar.bspstore,SW(AR_BSPSTORE)+16+(off);				\
+	.spillsp pr,SW(PR)+16+(off))
+
+#define DO_SAVE_SWITCH_STACK			\
+	movl r28=1f;				\
+	;;					\
+	.fframe IA64_SWITCH_STACK_SIZE;		\
+	adds sp=-IA64_SWITCH_STACK_SIZE,sp;	\
+	mov.ret.sptk b7=r28,1f;			\
+	SWITCH_STACK_SAVES(0);			\
+	br.cond.sptk.many save_switch_stack;	\
+1:
+
+#define DO_LOAD_SWITCH_STACK			\
+	movl r28=1f;				\
+	;;					\
+	invala;					\
+	mov.ret.sptk b7=r28,1f;			\
+	br.cond.sptk.many load_switch_stack;	\
+1:	.restore sp;				\
+	adds sp=IA64_SWITCH_STACK_SIZE,sp
--- a/arch/ia64/kernel/fsys.S
+++ b/arch/ia64/kernel/fsys.S
@@ -0,0 +1,884 @@
+/*
+ * This file contains the light-weight system call handlers (fsyscall-handlers).
+ *
+ * Copyright (C) 2003 Hewlett-Packard Co
+ * 	David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * 25-Sep-03 davidm	Implement fsys_rt_sigprocmask().
+ * 18-Feb-03 louisk	Implement fsys_gettimeofday().
+ * 28-Feb-03 davidm	Fixed several bugs in fsys_gettimeofday().  Tuned it some more,
+ *			probably broke it along the way... ;-)
+ * 13-Jul-04 clameter   Implement fsys_clock_gettime and revise fsys_gettimeofday to make
+ *                      it capable of using memory based clocks without falling back to C code.
+ */
+
+#include <asm/asmmacro.h>
+#include <asm/errno.h>
+#include <asm/offsets.h>
+#include <asm/percpu.h>
+#include <asm/thread_info.h>
+#include <asm/sal.h>
+#include <asm/signal.h>
+#include <asm/system.h>
+#include <asm/unistd.h>
+
+#include "entry.h"
+
+/*
+ * See Documentation/ia64/fsys.txt for details on fsyscalls.
+ *
+ * On entry to an fsyscall handler:
+ *   r10	= 0 (i.e., defaults to "successful syscall return")
+ *   r11	= saved ar.pfs (a user-level value)
+ *   r15	= system call number
+ *   r16	= "current" task pointer (in normal kernel-mode, this is in r13)
+ *   r32-r39	= system call arguments
+ *   b6		= return address (a user-level value)
+ *   ar.pfs	= previous frame-state (a user-level value)
+ *   PSR.be	= cleared to zero (i.e., little-endian byte order is in effect)
+ *   all other registers may contain values passed in from user-mode
+ *
+ * On return from an fsyscall handler:
+ *   r11	= saved ar.pfs (as passed into the fsyscall handler)
+ *   r15	= system call number (as passed into the fsyscall handler)
+ *   r32-r39	= system call arguments (as passed into the fsyscall handler)
+ *   b6		= return address (as passed into the fsyscall handler)
+ *   ar.pfs	= previous frame-state (as passed into the fsyscall handler)
+ */
+
+ENTRY(fsys_ni_syscall)
+	.prologue
+	.altrp b6
+	.body
+	mov r8=ENOSYS
+	mov r10=-1
+	FSYS_RETURN
+END(fsys_ni_syscall)
+
+ENTRY(fsys_getpid)
+	.prologue
+	.altrp b6
+	.body
+	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
+	;;
+	ld4 r9=[r9]
+	add r8=IA64_TASK_TGID_OFFSET,r16
+	;;
+	and r9=TIF_ALLWORK_MASK,r9
+	ld4 r8=[r8]				// r8 = current->tgid
+	;;
+	cmp.ne p8,p0=0,r9
+(p8)	br.spnt.many fsys_fallback_syscall
+	FSYS_RETURN
+END(fsys_getpid)
+
+ENTRY(fsys_getppid)
+	.prologue
+	.altrp b6
+	.body
+	add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16
+	;;
+	ld8 r17=[r17]				// r17 = current->group_leader
+	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
+	;;
+
+	ld4 r9=[r9]
+	add r17=IA64_TASK_REAL_PARENT_OFFSET,r17 // r17 = &current->group_leader->real_parent
+	;;
+	and r9=TIF_ALLWORK_MASK,r9
+
+1:	ld8 r18=[r17]				// r18 = current->group_leader->real_parent
+	;;
+	cmp.ne p8,p0=0,r9
+	add r8=IA64_TASK_TGID_OFFSET,r18	// r8 = &current->group_leader->real_parent->tgid
+	;;
+
+	/*
+	 * The .acq is needed to ensure that the read of tgid has returned its data before
+	 * we re-check "real_parent".
+	 */
+	ld4.acq r8=[r8]				// r8 = current->group_leader->real_parent->tgid
+#ifdef CONFIG_SMP
+	/*
+	 * Re-read current->group_leader->real_parent.
+	 */
+	ld8 r19=[r17]				// r19 = current->group_leader->real_parent
+(p8)	br.spnt.many fsys_fallback_syscall
+	;;
+	cmp.ne p6,p0=r18,r19			// did real_parent change?
+	mov r19=0			// i must not leak kernel bits...
+(p6)	br.cond.spnt.few 1b			// yes -> redo the read of tgid and the check
+	;;
+	mov r17=0			// i must not leak kernel bits...
+	mov r18=0			// i must not leak kernel bits...
+#else
+	mov r17=0			// i must not leak kernel bits...
+	mov r18=0			// i must not leak kernel bits...
+	mov r19=0			// i must not leak kernel bits...
+#endif
+	FSYS_RETURN
+END(fsys_getppid)
+
+ENTRY(fsys_set_tid_address)
+	.prologue
+	.altrp b6
+	.body
+	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
+	;;
+	ld4 r9=[r9]
+	tnat.z p6,p7=r32		// check argument register for being NaT
+	;;
+	and r9=TIF_ALLWORK_MASK,r9
+	add r8=IA64_TASK_PID_OFFSET,r16
+	add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16
+	;;
+	ld4 r8=[r8]
+	cmp.ne p8,p0=0,r9
+	mov r17=-1
+	;;
+(p6)	st8 [r18]=r32
+(p7)	st8 [r18]=r17
+(p8)	br.spnt.many fsys_fallback_syscall
+	;;
+	mov r17=0			// i must not leak kernel bits...
+	mov r18=0			// i must not leak kernel bits...
+	FSYS_RETURN
+END(fsys_set_tid_address)
+
+/*
+ * Ensure that the time interpolator structure is compatible with the asm code
+ */
+#if IA64_TIME_INTERPOLATOR_SOURCE_OFFSET !=0 || IA64_TIME_INTERPOLATOR_SHIFT_OFFSET != 2 \
+	|| IA64_TIME_INTERPOLATOR_JITTER_OFFSET != 3 || IA64_TIME_INTERPOLATOR_NSEC_OFFSET != 4
+#error fsys_gettimeofday incompatible with changes to struct time_interpolator
+#endif
+#define CLOCK_REALTIME 0
+#define CLOCK_MONOTONIC 1
+#define CLOCK_DIVIDE_BY_1000 0x4000
+#define CLOCK_ADD_MONOTONIC 0x8000
+
+ENTRY(fsys_gettimeofday)
+	.prologue
+	.altrp b6
+	.body
+	mov r31 = r32
+	tnat.nz p6,p0 = r33		// guard against NaT argument
+(p6)    br.cond.spnt.few .fail_einval
+	mov r30 = CLOCK_DIVIDE_BY_1000
+	;;
+.gettime:
+	// Register map
+	// Incoming r31 = pointer to address where to place result
+	//          r30 = flags determining how time is processed
+	// r2,r3 = temp r4-r7 preserved
+	// r8 = result nanoseconds
+	// r9 = result seconds
+	// r10 = temporary storage for clock difference
+	// r11 = preserved: saved ar.pfs
+	// r12 = preserved: memory stack
+	// r13 = preserved: thread pointer
+	// r14 = address of mask / mask
+	// r15 = preserved: system call number
+	// r16 = preserved: current task pointer
+	// r17 = wall to monotonic use
+	// r18 = time_interpolator->offset
+	// r19 = address of wall_to_monotonic
+	// r20 = pointer to struct time_interpolator / pointer to time_interpolator->address
+	// r21 = shift factor
+	// r22 = address of time interpolator->last_counter
+	// r23 = address of time_interpolator->last_cycle
+	// r24 = adress of time_interpolator->offset
+	// r25 = last_cycle value
+	// r26 = last_counter value
+	// r27 = pointer to xtime
+	// r28 = sequence number at the beginning of critcal section
+	// r29 = address of seqlock
+	// r30 = time processing flags / memory address
+	// r31 = pointer to result
+	// Predicates
+	// p6,p7 short term use
+	// p8 = timesource ar.itc
+	// p9 = timesource mmio64
+	// p10 = timesource mmio32
+	// p11 = timesource not to be handled by asm code
+	// p12 = memory time source ( = p9 | p10)
+	// p13 = do cmpxchg with time_interpolator_last_cycle
+	// p14 = Divide by 1000
+	// p15 = Add monotonic
+	//
+	// Note that instructions are optimized for McKinley. McKinley can process two
+	// bundles simultaneously and therefore we continuously try to feed the CPU
+	// two bundles and then a stop.
+	tnat.nz p6,p0 = r31	// branch deferred since it does not fit into bundle structure
+	mov pr = r30,0xc000	// Set predicates according to function
+	add r2 = TI_FLAGS+IA64_TASK_SIZE,r16
+	movl r20 = time_interpolator
+	;;
+	ld8 r20 = [r20]		// get pointer to time_interpolator structure
+	movl r29 = xtime_lock
+	ld4 r2 = [r2]		// process work pending flags
+	movl r27 = xtime
+	;;	// only one bundle here
+	ld8 r21 = [r20]		// first quad with control information
+	and r2 = TIF_ALLWORK_MASK,r2
+(p6)    br.cond.spnt.few .fail_einval	// deferred branch
+	;;
+	add r10 = IA64_TIME_INTERPOLATOR_ADDRESS_OFFSET,r20
+	extr r3 = r21,32,32	// time_interpolator->nsec_per_cyc
+	extr r8 = r21,0,16	// time_interpolator->source
+	cmp.ne p6, p0 = 0, r2	// Fallback if work is scheduled
+(p6)    br.cond.spnt.many fsys_fallback_syscall
+	;;
+	cmp.eq p8,p12 = 0,r8	// Check for cpu timer
+	cmp.eq p9,p0 = 1,r8	// MMIO64 ?
+	extr r2 = r21,24,8	// time_interpolator->jitter
+	cmp.eq p10,p0 = 2,r8	// MMIO32 ?
+	cmp.ltu p11,p0 = 2,r8	// function or other clock
+(p11)	br.cond.spnt.many fsys_fallback_syscall
+	;;
+	setf.sig f7 = r3	// Setup for scaling of counter
+(p15)	movl r19 = wall_to_monotonic
+(p12)	ld8 r30 = [r10]
+	cmp.ne p13,p0 = r2,r0	// need jitter compensation?
+	extr r21 = r21,16,8	// shift factor
+	;;
+.time_redo:
+	.pred.rel.mutex p8,p9,p10
+	ld4.acq r28 = [r29]	// xtime_lock.sequence. Must come first for locking purposes
+(p8)	mov r2 = ar.itc		// CPU_TIMER. 36 clocks latency!!!
+	add r22 = IA64_TIME_INTERPOLATOR_LAST_COUNTER_OFFSET,r20
+(p9)	ld8 r2 = [r30]		// readq(ti->address). Could also have latency issues..
+(p10)	ld4 r2 = [r30]		// readw(ti->address)
+(p13)	add r23 = IA64_TIME_INTERPOLATOR_LAST_CYCLE_OFFSET,r20
+	;;			// could be removed by moving the last add upward
+	ld8 r26 = [r22]		// time_interpolator->last_counter
+(p13)	ld8 r25 = [r23]		// time interpolator->last_cycle
+	add r24 = IA64_TIME_INTERPOLATOR_OFFSET_OFFSET,r20
+(p15)	ld8 r17 = [r19],IA64_TIMESPEC_TV_NSEC_OFFSET
+ 	ld8 r9 = [r27],IA64_TIMESPEC_TV_NSEC_OFFSET
+	add r14 = IA64_TIME_INTERPOLATOR_MASK_OFFSET, r20
+	;;
+	ld8 r18 = [r24]		// time_interpolator->offset
+	ld8 r8 = [r27],-IA64_TIMESPEC_TV_NSEC_OFFSET	// xtime.tv_nsec
+(p13)	sub r3 = r25,r2	// Diff needed before comparison (thanks davidm)
+	;;
+	ld8 r14 = [r14]		// time_interpolator->mask
+(p13)	cmp.gt.unc p6,p7 = r3,r0	// check if it is less than last. p6,p7 cleared
+	sub r10 = r2,r26	// current_counter - last_counter
+	;;
+(p6)	sub r10 = r25,r26	// time we got was less than last_cycle
+(p7)	mov ar.ccv = r25	// more than last_cycle. Prep for cmpxchg
+	;;
+	and r10 = r10,r14	// Apply mask
+	;;
+	setf.sig f8 = r10
+	nop.i 123
+	;;
+(p7)	cmpxchg8.rel r3 = [r23],r2,ar.ccv
+EX(.fail_efault, probe.w.fault r31, 3)	// This takes 5 cycles and we have spare time
+	xmpy.l f8 = f8,f7	// nsec_per_cyc*(counter-last_counter)
+(p15)	add r9 = r9,r17		// Add wall to monotonic.secs to result secs
+	;;
+(p15)	ld8 r17 = [r19],-IA64_TIMESPEC_TV_NSEC_OFFSET
+(p7)	cmp.ne p7,p0 = r25,r3	// if cmpxchg not successful redo
+	// simulate tbit.nz.or p7,p0 = r28,0
+	and r28 = ~1,r28	// Make sequence even to force retry if odd
+	getf.sig r2 = f8
+	mf
+	add r8 = r8,r18		// Add time interpolator offset
+	;;
+	ld4 r10 = [r29]		// xtime_lock.sequence
+(p15)	add r8 = r8, r17	// Add monotonic.nsecs to nsecs
+	shr.u r2 = r2,r21
+	;;		// overloaded 3 bundles!
+	// End critical section.
+	add r8 = r8,r2		// Add xtime.nsecs
+	cmp4.ne.or p7,p0 = r28,r10
+(p7)	br.cond.dpnt.few .time_redo	// sequence number changed ?
+	// Now r8=tv->tv_nsec and r9=tv->tv_sec
+	mov r10 = r0
+	movl r2 = 1000000000
+	add r23 = IA64_TIMESPEC_TV_NSEC_OFFSET, r31
+(p14)	movl r3 = 2361183241434822607	// Prep for / 1000 hack
+	;;
+.time_normalize:
+	mov r21 = r8
+	cmp.ge p6,p0 = r8,r2
+(p14)	shr.u r20 = r8, 3		// We can repeat this if necessary just wasting some time
+	;;
+(p14)	setf.sig f8 = r20
+(p6)	sub r8 = r8,r2
+(p6)	add r9 = 1,r9			// two nops before the branch.
+(p14)	setf.sig f7 = r3		// Chances for repeats are 1 in 10000 for gettod
+(p6)	br.cond.dpnt.few .time_normalize
+	;;
+	// Divided by 8 though shift. Now divide by 125
+	// The compiler was able to do that with a multiply
+	// and a shift and we do the same
+EX(.fail_efault, probe.w.fault r23, 3)		// This also costs 5 cycles
+(p14)	xmpy.hu f8 = f8, f7			// xmpy has 5 cycles latency so use it...
+	;;
+	mov r8 = r0
+(p14)	getf.sig r2 = f8
+	;;
+(p14)	shr.u r21 = r2, 4
+	;;
+EX(.fail_efault, st8 [r31] = r9)
+EX(.fail_efault, st8 [r23] = r21)
+	FSYS_RETURN
+.fail_einval:
+	mov r8 = EINVAL
+	mov r10 = -1
+	FSYS_RETURN
+.fail_efault:
+	mov r8 = EFAULT
+	mov r10 = -1
+	FSYS_RETURN
+END(fsys_gettimeofday)
+
+ENTRY(fsys_clock_gettime)
+	.prologue
+	.altrp b6
+	.body
+	cmp4.ltu p6, p0 = CLOCK_MONOTONIC, r32
+	// Fallback if this is not CLOCK_REALTIME or CLOCK_MONOTONIC
+(p6)	br.spnt.few fsys_fallback_syscall
+	mov r31 = r33
+	shl r30 = r32,15
+	br.many .gettime
+END(fsys_clock_gettime)
+
+/*
+ * long fsys_rt_sigprocmask (int how, sigset_t *set, sigset_t *oset, size_t sigsetsize).
+ */
+#if _NSIG_WORDS != 1
+# error Sorry, fsys_rt_sigprocmask() needs to be updated for _NSIG_WORDS != 1.
+#endif
+ENTRY(fsys_rt_sigprocmask)
+	.prologue
+	.altrp b6
+	.body
+
+	add r2=IA64_TASK_BLOCKED_OFFSET,r16
+	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
+	cmp4.ltu p6,p0=SIG_SETMASK,r32
+
+	cmp.ne p15,p0=r0,r34			// oset != NULL?
+	tnat.nz p8,p0=r34
+	add r31=IA64_TASK_SIGHAND_OFFSET,r16
+	;;
+	ld8 r3=[r2]				// read/prefetch current->blocked
+	ld4 r9=[r9]
+	tnat.nz.or p6,p0=r35
+
+	cmp.ne.or p6,p0=_NSIG_WORDS*8,r35
+	tnat.nz.or p6,p0=r32
+(p6)	br.spnt.few .fail_einval		// fail with EINVAL
+	;;
+#ifdef CONFIG_SMP
+	ld8 r31=[r31]				// r31 <- current->sighand
+#endif
+	and r9=TIF_ALLWORK_MASK,r9
+	tnat.nz.or p8,p0=r33
+	;;
+	cmp.ne p7,p0=0,r9
+	cmp.eq p6,p0=r0,r33			// set == NULL?
+	add r31=IA64_SIGHAND_SIGLOCK_OFFSET,r31	// r31 <- current->sighand->siglock
+(p8)	br.spnt.few .fail_efault		// fail with EFAULT
+(p7)	br.spnt.many fsys_fallback_syscall	// got pending kernel work...
+(p6)	br.dpnt.many .store_mask		// -> short-circuit to just reading the signal mask
+
+	/* Argh, we actually have to do some work and _update_ the signal mask: */
+
+EX(.fail_efault, probe.r.fault r33, 3)		// verify user has read-access to *set
+EX(.fail_efault, ld8 r14=[r33])			// r14 <- *set
+	mov r17=(1 << (SIGKILL - 1)) | (1 << (SIGSTOP - 1))
+	;;
+
+	rsm psr.i				// mask interrupt delivery
+	mov ar.ccv=0
+	andcm r14=r14,r17			// filter out SIGKILL & SIGSTOP
+
+#ifdef CONFIG_SMP
+	mov r17=1
+	;;
+	cmpxchg4.acq r18=[r31],r17,ar.ccv	// try to acquire the lock
+	mov r8=EINVAL			// default to EINVAL
+	;;
+	ld8 r3=[r2]			// re-read current->blocked now that we hold the lock
+	cmp4.ne p6,p0=r18,r0
+(p6)	br.cond.spnt.many .lock_contention
+	;;
+#else
+	ld8 r3=[r2]			// re-read current->blocked now that we hold the lock
+	mov r8=EINVAL			// default to EINVAL
+#endif
+	add r18=IA64_TASK_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r16
+	add r19=IA64_TASK_SIGNAL_OFFSET,r16
+	cmp4.eq p6,p0=SIG_BLOCK,r32
+	;;
+	ld8 r19=[r19]			// r19 <- current->signal
+	cmp4.eq p7,p0=SIG_UNBLOCK,r32
+	cmp4.eq p8,p0=SIG_SETMASK,r32
+	;;
+	ld8 r18=[r18]			// r18 <- current->pending.signal
+	.pred.rel.mutex p6,p7,p8
+(p6)	or r14=r3,r14			// SIG_BLOCK
+(p7)	andcm r14=r3,r14		// SIG_UNBLOCK
+
+(p8)	mov r14=r14			// SIG_SETMASK
+(p6)	mov r8=0			// clear error code
+	// recalc_sigpending()
+	add r17=IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,r19
+
+	add r19=IA64_SIGNAL_SHARED_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r19
+	;;
+	ld4 r17=[r17]		// r17 <- current->signal->group_stop_count
+(p7)	mov r8=0		// clear error code
+
+	ld8 r19=[r19]		// r19 <- current->signal->shared_pending
+	;;
+	cmp4.gt p6,p7=r17,r0	// p6/p7 <- (current->signal->group_stop_count > 0)?
+(p8)	mov r8=0		// clear error code
+
+	or r18=r18,r19		// r18 <- current->pending | current->signal->shared_pending
+	;;
+	// r18 <- (current->pending | current->signal->shared_pending) & ~current->blocked:
+	andcm r18=r18,r14
+	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
+	;;
+
+(p7)	cmp.ne.or.andcm p6,p7=r18,r0		// p6/p7 <- signal pending
+	mov r19=0					// i must not leak kernel bits...
+(p6)	br.cond.dpnt.many .sig_pending
+	;;
+
+1:	ld4 r17=[r9]				// r17 <- current->thread_info->flags
+	;;
+	mov ar.ccv=r17
+	and r18=~_TIF_SIGPENDING,r17		// r18 <- r17 & ~(1 << TIF_SIGPENDING)
+	;;
+
+	st8 [r2]=r14				// update current->blocked with new mask
+	cmpxchg4.acq r14=[r9],r18,ar.ccv	// current->thread_info->flags <- r18
+	;;
+	cmp.ne p6,p0=r17,r14			// update failed?
+(p6)	br.cond.spnt.few 1b			// yes -> retry
+
+#ifdef CONFIG_SMP
+	st4.rel [r31]=r0			// release the lock
+#endif
+	ssm psr.i
+	;;
+
+	srlz.d					// ensure psr.i is set again
+	mov r18=0					// i must not leak kernel bits...
+
+.store_mask:
+EX(.fail_efault, (p15) probe.w.fault r34, 3)	// verify user has write-access to *oset
+EX(.fail_efault, (p15) st8 [r34]=r3)
+	mov r2=0					// i must not leak kernel bits...
+	mov r3=0					// i must not leak kernel bits...
+	mov r8=0				// return 0
+	mov r9=0					// i must not leak kernel bits...
+	mov r14=0					// i must not leak kernel bits...
+	mov r17=0					// i must not leak kernel bits...
+	mov r31=0					// i must not leak kernel bits...
+	FSYS_RETURN
+
+.sig_pending:
+#ifdef CONFIG_SMP
+	st4.rel [r31]=r0			// release the lock
+#endif
+	ssm psr.i
+	;;
+	srlz.d
+	br.sptk.many fsys_fallback_syscall	// with signal pending, do the heavy-weight syscall
+
+#ifdef CONFIG_SMP
+.lock_contention:
+	/* Rather than spinning here, fall back on doing a heavy-weight syscall.  */
+	ssm psr.i
+	;;
+	srlz.d
+	br.sptk.many fsys_fallback_syscall
+#endif
+END(fsys_rt_sigprocmask)
+
+ENTRY(fsys_fallback_syscall)
+	.prologue
+	.altrp b6
+	.body
+	/*
+	 * We only get here from light-weight syscall handlers.  Thus, we already
+	 * know that r15 contains a valid syscall number.  No need to re-check.
+	 */
+	adds r17=-1024,r15
+	movl r14=sys_call_table
+	;;
+	rsm psr.i
+	shladd r18=r17,3,r14
+	;;
+	ld8 r18=[r18]				// load normal (heavy-weight) syscall entry-point
+	mov r29=psr				// read psr (12 cyc load latency)
+	mov r27=ar.rsc
+	mov r21=ar.fpsr
+	mov r26=ar.pfs
+END(fsys_fallback_syscall)
+	/* FALL THROUGH */
+GLOBAL_ENTRY(fsys_bubble_down)
+	.prologue
+	.altrp b6
+	.body
+	/*
+	 * We get here for syscalls that don't have a lightweight handler.  For those, we
+	 * need to bubble down into the kernel and that requires setting up a minimal
+	 * pt_regs structure, and initializing the CPU state more or less as if an
+	 * interruption had occurred.  To make syscall-restarts work, we setup pt_regs
+	 * such that cr_iip points to the second instruction in syscall_via_break.
+	 * Decrementing the IP hence will restart the syscall via break and not
+	 * decrementing IP will return us to the caller, as usual.  Note that we preserve
+	 * the value of psr.pp rather than initializing it from dcr.pp.  This makes it
+	 * possible to distinguish fsyscall execution from other privileged execution.
+	 *
+	 * On entry:
+	 *	- normal fsyscall handler register usage, except that we also have:
+	 *	- r18: address of syscall entry point
+	 *	- r21: ar.fpsr
+	 *	- r26: ar.pfs
+	 *	- r27: ar.rsc
+	 *	- r29: psr
+	 */
+#	define PSR_PRESERVED_BITS	(IA64_PSR_UP | IA64_PSR_MFL | IA64_PSR_MFH | IA64_PSR_PK \
+					 | IA64_PSR_DT | IA64_PSR_PP | IA64_PSR_SP | IA64_PSR_RT \
+					 | IA64_PSR_IC)
+	/*
+	 * Reading psr.l gives us only bits 0-31, psr.it, and psr.mc.  The rest we have
+	 * to synthesize.
+	 */
+#	define PSR_ONE_BITS		((3 << IA64_PSR_CPL0_BIT) | (0x1 << IA64_PSR_RI_BIT) \
+					 | IA64_PSR_BN | IA64_PSR_I)
+
+	invala
+	movl r8=PSR_ONE_BITS
+
+	mov r25=ar.unat			// save ar.unat (5 cyc)
+	movl r9=PSR_PRESERVED_BITS
+
+	mov ar.rsc=0			// set enforced lazy mode, pl 0, little-endian, loadrs=0
+	movl r28=__kernel_syscall_via_break
+	;;
+	mov r23=ar.bspstore		// save ar.bspstore (12 cyc)
+	mov r31=pr			// save pr (2 cyc)
+	mov r20=r1			// save caller's gp in r20
+	;;
+	mov r2=r16			// copy current task addr to addl-addressable register
+	and r9=r9,r29
+	mov r19=b6			// save b6 (2 cyc)
+	;;
+	mov psr.l=r9			// slam the door (17 cyc to srlz.i)
+	or r29=r8,r29			// construct cr.ipsr value to save
+	addl r22=IA64_RBS_OFFSET,r2	// compute base of RBS
+	;;
+	// GAS reports a spurious RAW hazard on the read of ar.rnat because it thinks
+	// we may be reading ar.itc after writing to psr.l.  Avoid that message with
+	// this directive:
+	dv_serialize_data
+	mov.m r24=ar.rnat		// read ar.rnat (5 cyc lat)
+	lfetch.fault.excl.nt1 [r22]
+	adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r2
+
+	// ensure previous insn group is issued before we stall for srlz.i:
+	;;
+	srlz.i				// ensure new psr.l has been established
+	/////////////////////////////////////////////////////////////////////////////
+	////////// from this point on, execution is not interruptible anymore
+	/////////////////////////////////////////////////////////////////////////////
+	addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2	// compute base of memory stack
+	cmp.ne pKStk,pUStk=r0,r0	// set pKStk <- 0, pUStk <- 1
+	;;
+	st1 [r16]=r0			// clear current->thread.on_ustack flag
+	mov ar.bspstore=r22		// switch to kernel RBS
+	mov b6=r18			// copy syscall entry-point to b6 (7 cyc)
+	add r3=TI_FLAGS+IA64_TASK_SIZE,r2
+	;;
+	ld4 r3=[r3]				// r2 = current_thread_info()->flags
+	mov r18=ar.bsp			// save (kernel) ar.bsp (12 cyc)
+	mov ar.rsc=0x3			// set eager mode, pl 0, little-endian, loadrs=0
+	br.call.sptk.many b7=ia64_syscall_setup
+	;;
+	ssm psr.i
+	movl r2=ia64_ret_from_syscall
+	;;
+	mov rp=r2				// set the real return addr
+	tbit.z p8,p0=r3,TIF_SYSCALL_TRACE
+	;;
+(p10)	br.cond.spnt.many ia64_ret_from_syscall	// p10==true means out registers are more than 8
+(p8)	br.call.sptk.many b6=b6		// ignore this return addr
+	br.cond.sptk ia64_trace_syscall
+END(fsys_bubble_down)
+
+	.rodata
+	.align 8
+	.globl fsyscall_table
+
+	data8 fsys_bubble_down
+fsyscall_table:
+	data8 fsys_ni_syscall
+	data8 0				// exit			// 1025
+	data8 0				// read
+	data8 0				// write
+	data8 0				// open
+	data8 0				// close
+	data8 0				// creat		// 1030
+	data8 0				// link
+	data8 0				// unlink
+	data8 0				// execve
+	data8 0				// chdir
+	data8 0				// fchdir		// 1035
+	data8 0				// utimes
+	data8 0				// mknod
+	data8 0				// chmod
+	data8 0				// chown
+	data8 0				// lseek		// 1040
+	data8 fsys_getpid		// getpid
+	data8 fsys_getppid		// getppid
+	data8 0				// mount
+	data8 0				// umount
+	data8 0				// setuid		// 1045
+	data8 0				// getuid
+	data8 0				// geteuid
+	data8 0				// ptrace
+	data8 0				// access
+	data8 0				// sync			// 1050
+	data8 0				// fsync
+	data8 0				// fdatasync
+	data8 0				// kill
+	data8 0				// rename
+	data8 0				// mkdir		// 1055
+	data8 0				// rmdir
+	data8 0				// dup
+	data8 0				// pipe
+	data8 0				// times
+	data8 0				// brk			// 1060
+	data8 0				// setgid
+	data8 0				// getgid
+	data8 0				// getegid
+	data8 0				// acct
+	data8 0				// ioctl		// 1065
+	data8 0				// fcntl
+	data8 0				// umask
+	data8 0				// chroot
+	data8 0				// ustat
+	data8 0				// dup2			// 1070
+	data8 0				// setreuid
+	data8 0				// setregid
+	data8 0				// getresuid
+	data8 0				// setresuid
+	data8 0				// getresgid		// 1075
+	data8 0				// setresgid
+	data8 0				// getgroups
+	data8 0				// setgroups
+	data8 0				// getpgid
+	data8 0				// setpgid		// 1080
+	data8 0				// setsid
+	data8 0				// getsid
+	data8 0				// sethostname
+	data8 0				// setrlimit
+	data8 0				// getrlimit		// 1085
+	data8 0				// getrusage
+	data8 fsys_gettimeofday		// gettimeofday
+	data8 0				// settimeofday
+	data8 0				// select
+	data8 0				// poll			// 1090
+	data8 0				// symlink
+	data8 0				// readlink
+	data8 0				// uselib
+	data8 0				// swapon
+	data8 0				// swapoff		// 1095
+	data8 0				// reboot
+	data8 0				// truncate
+	data8 0				// ftruncate
+	data8 0				// fchmod
+	data8 0				// fchown		// 1100
+	data8 0				// getpriority
+	data8 0				// setpriority
+	data8 0				// statfs
+	data8 0				// fstatfs
+	data8 0				// gettid		// 1105
+	data8 0				// semget
+	data8 0				// semop
+	data8 0				// semctl
+	data8 0				// msgget
+	data8 0				// msgsnd		// 1110
+	data8 0				// msgrcv
+	data8 0				// msgctl
+	data8 0				// shmget
+	data8 0				// shmat
+	data8 0				// shmdt		// 1115
+	data8 0				// shmctl
+	data8 0				// syslog
+	data8 0				// setitimer
+	data8 0				// getitimer
+	data8 0					 		// 1120
+	data8 0
+	data8 0
+	data8 0				// vhangup
+	data8 0				// lchown
+	data8 0				// remap_file_pages	// 1125
+	data8 0				// wait4
+	data8 0				// sysinfo
+	data8 0				// clone
+	data8 0				// setdomainname
+	data8 0				// newuname		// 1130
+	data8 0				// adjtimex
+	data8 0
+	data8 0				// init_module
+	data8 0				// delete_module
+	data8 0							// 1135
+	data8 0
+	data8 0				// quotactl
+	data8 0				// bdflush
+	data8 0				// sysfs
+	data8 0				// personality		// 1140
+	data8 0				// afs_syscall
+	data8 0				// setfsuid
+	data8 0				// setfsgid
+	data8 0				// getdents
+	data8 0				// flock		// 1145
+	data8 0				// readv
+	data8 0				// writev
+	data8 0				// pread64
+	data8 0				// pwrite64
+	data8 0				// sysctl		// 1150
+	data8 0				// mmap
+	data8 0				// munmap
+	data8 0				// mlock
+	data8 0				// mlockall
+	data8 0				// mprotect		// 1155
+	data8 0				// mremap
+	data8 0				// msync
+	data8 0				// munlock
+	data8 0				// munlockall
+	data8 0				// sched_getparam	// 1160
+	data8 0				// sched_setparam
+	data8 0				// sched_getscheduler
+	data8 0				// sched_setscheduler
+	data8 0				// sched_yield
+	data8 0				// sched_get_priority_max	// 1165
+	data8 0				// sched_get_priority_min
+	data8 0				// sched_rr_get_interval
+	data8 0				// nanosleep
+	data8 0				// nfsservctl
+	data8 0				// prctl		// 1170
+	data8 0				// getpagesize
+	data8 0				// mmap2
+	data8 0				// pciconfig_read
+	data8 0				// pciconfig_write
+	data8 0				// perfmonctl		// 1175
+	data8 0				// sigaltstack
+	data8 0				// rt_sigaction
+	data8 0				// rt_sigpending
+	data8 fsys_rt_sigprocmask	// rt_sigprocmask
+	data8 0				// rt_sigqueueinfo	// 1180
+	data8 0				// rt_sigreturn
+	data8 0				// rt_sigsuspend
+	data8 0				// rt_sigtimedwait
+	data8 0				// getcwd
+	data8 0				// capget		// 1185
+	data8 0				// capset
+	data8 0				// sendfile
+	data8 0
+	data8 0
+	data8 0				// socket		// 1190
+	data8 0				// bind
+	data8 0				// connect
+	data8 0				// listen
+	data8 0				// accept
+	data8 0				// getsockname		// 1195
+	data8 0				// getpeername
+	data8 0				// socketpair
+	data8 0				// send
+	data8 0				// sendto
+	data8 0				// recv			// 1200
+	data8 0				// recvfrom
+	data8 0				// shutdown
+	data8 0				// setsockopt
+	data8 0				// getsockopt
+	data8 0				// sendmsg		// 1205
+	data8 0				// recvmsg
+	data8 0				// pivot_root
+	data8 0				// mincore
+	data8 0				// madvise
+	data8 0				// newstat		// 1210
+	data8 0				// newlstat
+	data8 0				// newfstat
+	data8 0				// clone2
+	data8 0				// getdents64
+	data8 0				// getunwind		// 1215
+	data8 0				// readahead
+	data8 0				// setxattr
+	data8 0				// lsetxattr
+	data8 0				// fsetxattr
+	data8 0				// getxattr		// 1220
+	data8 0				// lgetxattr
+	data8 0				// fgetxattr
+	data8 0				// listxattr
+	data8 0				// llistxattr
+	data8 0				// flistxattr		// 1225
+	data8 0				// removexattr
+	data8 0				// lremovexattr
+	data8 0				// fremovexattr
+	data8 0				// tkill
+	data8 0				// futex		// 1230
+	data8 0				// sched_setaffinity
+	data8 0				// sched_getaffinity
+	data8 fsys_set_tid_address	// set_tid_address
+	data8 0				// fadvise64_64
+	data8 0				// tgkill		// 1235
+	data8 0				// exit_group
+	data8 0				// lookup_dcookie
+	data8 0				// io_setup
+	data8 0				// io_destroy
+	data8 0				// io_getevents		// 1240
+	data8 0				// io_submit
+	data8 0				// io_cancel
+	data8 0				// epoll_create
+	data8 0				// epoll_ctl
+	data8 0				// epoll_wait		// 1245
+	data8 0				// restart_syscall
+	data8 0				// semtimedop
+	data8 0				// timer_create
+	data8 0				// timer_settime
+	data8 0				// timer_gettime 	// 1250
+	data8 0				// timer_getoverrun
+	data8 0				// timer_delete
+	data8 0				// clock_settime
+	data8 fsys_clock_gettime	// clock_gettime
+	data8 0				// clock_getres		// 1255
+	data8 0				// clock_nanosleep
+	data8 0				// fstatfs64
+	data8 0				// statfs64
+	data8 0
+	data8 0							// 1260
+	data8 0
+	data8 0				// mq_open
+	data8 0				// mq_unlink
+	data8 0				// mq_timedsend
+	data8 0				// mq_timedreceive	// 1265
+	data8 0				// mq_notify
+	data8 0				// mq_getsetattr
+	data8 0				// kexec_load
+	data8 0
+	data8 0							// 1270
+	data8 0
+	data8 0
+	data8 0
+	data8 0
+	data8 0							// 1275
+	data8 0
+	data8 0
+	data8 0
+	data8 0
+
+	.org fsyscall_table + 8*NR_syscalls	// guard against failures to increase NR_syscalls
--- a/arch/ia64/kernel/gate-data.S
+++ b/arch/ia64/kernel/gate-data.S
@@ -0,0 +1,3 @@
+	.section .data.gate, "aw"
+
+	.incbin "arch/ia64/kernel/gate.so"
--- a/arch/ia64/kernel/gate.S
+++ b/arch/ia64/kernel/gate.S
@@ -0,0 +1,372 @@
+/*
+ * This file contains the code that gets mapped at the upper end of each task's text
+ * region.  For now, it contains the signal trampoline code only.
+ *
+ * Copyright (C) 1999-2003 Hewlett-Packard Co
+ * 	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#include <linux/config.h>
+
+#include <asm/asmmacro.h>
+#include <asm/errno.h>
+#include <asm/offsets.h>
+#include <asm/sigcontext.h>
+#include <asm/system.h>
+#include <asm/unistd.h>
+
+/*
+ * We can't easily refer to symbols inside the kernel.  To avoid full runtime relocation,
+ * complications with the linker (which likes to create PLT stubs for branches
+ * to targets outside the shared object) and to avoid multi-phase kernel builds, we
+ * simply create minimalistic "patch lists" in special ELF sections.
+ */
+	.section ".data.patch.fsyscall_table", "a"
+	.previous
+#define LOAD_FSYSCALL_TABLE(reg)			\
+[1:]	movl reg=0;					\
+	.xdata4 ".data.patch.fsyscall_table", 1b-.
+
+	.section ".data.patch.brl_fsys_bubble_down", "a"
+	.previous
+#define BRL_COND_FSYS_BUBBLE_DOWN(pr)			\
+[1:](pr)brl.cond.sptk 0;				\
+	.xdata4 ".data.patch.brl_fsys_bubble_down", 1b-.
+
+GLOBAL_ENTRY(__kernel_syscall_via_break)
+	.prologue
+	.altrp b6
+	.body
+	/*
+	 * Note: for (fast) syscall restart to work, the break instruction must be
+	 *	 the first one in the bundle addressed by syscall_via_break.
+	 */
+{ .mib
+	break 0x100000
+	nop.i 0
+	br.ret.sptk.many b6
+}
+END(__kernel_syscall_via_break)
+
+/*
+ * On entry:
+ *	r11 = saved ar.pfs
+ *	r15 = system call #
+ *	b0  = saved return address
+ *	b6  = return address
+ * On exit:
+ *	r11 = saved ar.pfs
+ *	r15 = system call #
+ *	b0  = saved return address
+ *	all other "scratch" registers:	undefined
+ *	all "preserved" registers:	same as on entry
+ */
+
+GLOBAL_ENTRY(__kernel_syscall_via_epc)
+	.prologue
+	.altrp b6
+	.body
+{
+	/*
+	 * Note: the kernel cannot assume that the first two instructions in this
+	 * bundle get executed.  The remaining code must be safe even if
+	 * they do not get executed.
+	 */
+	adds r17=-1024,r15
+	mov r10=0				// default to successful syscall execution
+	epc
+}
+	;;
+	rsm psr.be // note: on McKinley "rsm psr.be/srlz.d" is slightly faster than "rum psr.be"
+	LOAD_FSYSCALL_TABLE(r14)
+
+	mov r16=IA64_KR(CURRENT)		// 12 cycle read latency
+	tnat.nz p10,p9=r15
+	mov r19=NR_syscalls-1
+	;;
+	shladd r18=r17,3,r14
+
+	srlz.d
+	cmp.ne p8,p0=r0,r0			// p8 <- FALSE
+	/* Note: if r17 is a NaT, p6 will be set to zero.  */
+	cmp.geu p6,p7=r19,r17			// (syscall > 0 && syscall < 1024+NR_syscalls)?
+	;;
+(p6)	ld8 r18=[r18]
+	mov r21=ar.fpsr
+	add r14=-8,r14				// r14 <- addr of fsys_bubble_down entry
+	;;
+(p6)	mov b7=r18
+(p6)	tbit.z p8,p0=r18,0
+(p8)	br.dptk.many b7
+
+(p6)	rsm psr.i
+	mov r27=ar.rsc
+	mov r26=ar.pfs
+	;;
+	mov r29=psr				// read psr (12 cyc load latency)
+/*
+ * brl.cond doesn't work as intended because the linker would convert this branch
+ * into a branch to a PLT.  Perhaps there will be a way to avoid this with some
+ * future version of the linker.  In the meantime, we just use an indirect branch
+ * instead.
+ */
+#ifdef CONFIG_ITANIUM
+(p6)	ld8 r14=[r14]				// r14 <- fsys_bubble_down
+	;;
+(p6)	mov b7=r14
+(p6)	br.sptk.many b7
+#else
+	BRL_COND_FSYS_BUBBLE_DOWN(p6)
+#endif
+
+	mov r10=-1
+(p10)	mov r8=EINVAL
+(p9)	mov r8=ENOSYS
+	FSYS_RETURN
+END(__kernel_syscall_via_epc)
+
+#	define ARG0_OFF		(16 + IA64_SIGFRAME_ARG0_OFFSET)
+#	define ARG1_OFF		(16 + IA64_SIGFRAME_ARG1_OFFSET)
+#	define ARG2_OFF		(16 + IA64_SIGFRAME_ARG2_OFFSET)
+#	define SIGHANDLER_OFF	(16 + IA64_SIGFRAME_HANDLER_OFFSET)
+#	define SIGCONTEXT_OFF	(16 + IA64_SIGFRAME_SIGCONTEXT_OFFSET)
+
+#	define FLAGS_OFF	IA64_SIGCONTEXT_FLAGS_OFFSET
+#	define CFM_OFF		IA64_SIGCONTEXT_CFM_OFFSET
+#	define FR6_OFF		IA64_SIGCONTEXT_FR6_OFFSET
+#	define BSP_OFF		IA64_SIGCONTEXT_AR_BSP_OFFSET
+#	define RNAT_OFF		IA64_SIGCONTEXT_AR_RNAT_OFFSET
+#	define UNAT_OFF		IA64_SIGCONTEXT_AR_UNAT_OFFSET
+#	define FPSR_OFF		IA64_SIGCONTEXT_AR_FPSR_OFFSET
+#	define PR_OFF		IA64_SIGCONTEXT_PR_OFFSET
+#	define RP_OFF		IA64_SIGCONTEXT_IP_OFFSET
+#	define SP_OFF		IA64_SIGCONTEXT_R12_OFFSET
+#	define RBS_BASE_OFF	IA64_SIGCONTEXT_RBS_BASE_OFFSET
+#	define LOADRS_OFF	IA64_SIGCONTEXT_LOADRS_OFFSET
+#	define base0		r2
+#	define base1		r3
+	/*
+	 * When we get here, the memory stack looks like this:
+	 *
+	 *   +===============================+
+       	 *   |				     |
+       	 *   //	    struct sigframe          //
+       	 *   |				     |
+	 *   +-------------------------------+ <-- sp+16
+	 *   |      16 byte of scratch       |
+	 *   |            space              |
+	 *   +-------------------------------+ <-- sp
+	 *
+	 * The register stack looks _exactly_ the way it looked at the time the signal
+	 * occurred.  In other words, we're treading on a potential mine-field: each
+	 * incoming general register may be a NaT value (including sp, in which case the
+	 * process ends up dying with a SIGSEGV).
+	 *
+	 * The first thing need to do is a cover to get the registers onto the backing
+	 * store.  Once that is done, we invoke the signal handler which may modify some
+	 * of the machine state.  After returning from the signal handler, we return
+	 * control to the previous context by executing a sigreturn system call.  A signal
+	 * handler may call the rt_sigreturn() function to directly return to a given
+	 * sigcontext.  However, the user-level sigreturn() needs to do much more than
+	 * calling the rt_sigreturn() system call as it needs to unwind the stack to
+	 * restore preserved registers that may have been saved on the signal handler's
+	 * call stack.
+	 */
+
+#define SIGTRAMP_SAVES										\
+	.unwabi 3, 's';		/* mark this as a sigtramp handler (saves scratch regs) */	\
+	.unwabi @svr4, 's'; /* backwards compatibility with old unwinders (remove in v2.7) */	\
+	.savesp ar.unat, UNAT_OFF+SIGCONTEXT_OFF;						\
+	.savesp ar.fpsr, FPSR_OFF+SIGCONTEXT_OFF;						\
+	.savesp pr, PR_OFF+SIGCONTEXT_OFF;     							\
+	.savesp rp, RP_OFF+SIGCONTEXT_OFF;							\
+	.savesp ar.pfs, CFM_OFF+SIGCONTEXT_OFF;							\
+	.vframesp SP_OFF+SIGCONTEXT_OFF
+
+GLOBAL_ENTRY(__kernel_sigtramp)
+	// describe the state that is active when we get here:
+	.prologue
+	SIGTRAMP_SAVES
+	.body
+
+	.label_state 1
+
+	adds base0=SIGHANDLER_OFF,sp
+	adds base1=RBS_BASE_OFF+SIGCONTEXT_OFF,sp
+	br.call.sptk.many rp=1f
+1:
+	ld8 r17=[base0],(ARG0_OFF-SIGHANDLER_OFF)	// get pointer to signal handler's plabel
+	ld8 r15=[base1]					// get address of new RBS base (or NULL)
+	cover				// push args in interrupted frame onto backing store
+	;;
+	cmp.ne p1,p0=r15,r0		// do we need to switch rbs? (note: pr is saved by kernel)
+	mov.m r9=ar.bsp			// fetch ar.bsp
+	.spillsp.p p1, ar.rnat, RNAT_OFF+SIGCONTEXT_OFF
+(p1)	br.cond.spnt setup_rbs		// yup -> (clobbers p8, r14-r16, and r18-r20)
+back_from_setup_rbs:
+	alloc r8=ar.pfs,0,0,3,0
+	ld8 out0=[base0],16		// load arg0 (signum)
+	adds base1=(ARG1_OFF-(RBS_BASE_OFF+SIGCONTEXT_OFF)),base1
+	;;
+	ld8 out1=[base1]		// load arg1 (siginfop)
+	ld8 r10=[r17],8			// get signal handler entry point
+	;;
+	ld8 out2=[base0]		// load arg2 (sigcontextp)
+	ld8 gp=[r17]			// get signal handler's global pointer
+	adds base0=(BSP_OFF+SIGCONTEXT_OFF),sp
+	;;
+	.spillsp ar.bsp, BSP_OFF+SIGCONTEXT_OFF
+	st8 [base0]=r9			// save sc_ar_bsp
+	adds base0=(FR6_OFF+SIGCONTEXT_OFF),sp
+	adds base1=(FR6_OFF+16+SIGCONTEXT_OFF),sp
+	;;
+	stf.spill [base0]=f6,32
+	stf.spill [base1]=f7,32
+	;;
+	stf.spill [base0]=f8,32
+	stf.spill [base1]=f9,32
+	mov b6=r10
+	;;
+	stf.spill [base0]=f10,32
+	stf.spill [base1]=f11,32
+	;;
+	stf.spill [base0]=f12,32
+	stf.spill [base1]=f13,32
+	;;
+	stf.spill [base0]=f14,32
+	stf.spill [base1]=f15,32
+	br.call.sptk.many rp=b6			// call the signal handler
+.ret0:	adds base0=(BSP_OFF+SIGCONTEXT_OFF),sp
+	;;
+	ld8 r15=[base0]				// fetch sc_ar_bsp
+	mov r14=ar.bsp
+	;;
+	cmp.ne p1,p0=r14,r15			// do we need to restore the rbs?
+(p1)	br.cond.spnt restore_rbs		// yup -> (clobbers r14-r18, f6 & f7)
+	;;
+back_from_restore_rbs:
+	adds base0=(FR6_OFF+SIGCONTEXT_OFF),sp
+	adds base1=(FR6_OFF+16+SIGCONTEXT_OFF),sp
+	;;
+	ldf.fill f6=[base0],32
+	ldf.fill f7=[base1],32
+	;;
+	ldf.fill f8=[base0],32
+	ldf.fill f9=[base1],32
+	;;
+	ldf.fill f10=[base0],32
+	ldf.fill f11=[base1],32
+	;;
+	ldf.fill f12=[base0],32
+	ldf.fill f13=[base1],32
+	;;
+	ldf.fill f14=[base0],32
+	ldf.fill f15=[base1],32
+	mov r15=__NR_rt_sigreturn
+	.restore sp				// pop .prologue
+	break __BREAK_SYSCALL
+
+	.prologue
+	SIGTRAMP_SAVES
+setup_rbs:
+	mov ar.rsc=0				// put RSE into enforced lazy mode
+	;;
+	.save ar.rnat, r19
+	mov r19=ar.rnat				// save RNaT before switching backing store area
+	adds r14=(RNAT_OFF+SIGCONTEXT_OFF),sp
+
+	mov r18=ar.bspstore
+	mov ar.bspstore=r15			// switch over to new register backing store area
+	;;
+
+	.spillsp ar.rnat, RNAT_OFF+SIGCONTEXT_OFF
+	st8 [r14]=r19				// save sc_ar_rnat
+	.body
+	mov.m r16=ar.bsp			// sc_loadrs <- (new bsp - new bspstore) << 16
+	adds r14=(LOADRS_OFF+SIGCONTEXT_OFF),sp
+	;;
+	invala
+	sub r15=r16,r15
+	extr.u r20=r18,3,6
+	;;
+	mov ar.rsc=0xf				// set RSE into eager mode, pl 3
+	cmp.eq p8,p0=63,r20
+	shl r15=r15,16
+	;;
+	st8 [r14]=r15				// save sc_loadrs
+(p8)	st8 [r18]=r19		// if bspstore points at RNaT slot, store RNaT there now
+	.restore sp				// pop .prologue
+	br.cond.sptk back_from_setup_rbs
+
+	.prologue
+	SIGTRAMP_SAVES
+	.spillsp ar.rnat, RNAT_OFF+SIGCONTEXT_OFF
+	.body
+restore_rbs:
+	// On input:
+	//	r14 = bsp1 (bsp at the time of return from signal handler)
+	//	r15 = bsp0 (bsp at the time the signal occurred)
+	//
+	// Here, we need to calculate bspstore0, the value that ar.bspstore needs
+	// to be set to, based on bsp0 and the size of the dirty partition on
+	// the alternate stack (sc_loadrs >> 16).  This can be done with the
+	// following algorithm:
+	//
+	//  bspstore0 = rse_skip_regs(bsp0, -rse_num_regs(bsp1 - (loadrs >> 19), bsp1));
+	//
+	// This is what the code below does.
+	//
+	alloc r2=ar.pfs,0,0,0,0			// alloc null frame
+	adds r16=(LOADRS_OFF+SIGCONTEXT_OFF),sp
+	adds r18=(RNAT_OFF+SIGCONTEXT_OFF),sp
+	;;
+	ld8 r17=[r16]
+	ld8 r16=[r18]			// get new rnat
+	extr.u r18=r15,3,6	// r18 <- rse_slot_num(bsp0)
+	;;
+	mov ar.rsc=r17			// put RSE into enforced lazy mode
+	shr.u r17=r17,16
+	;;
+	sub r14=r14,r17		// r14 (bspstore1) <- bsp1 - (sc_loadrs >> 16)
+	shr.u r17=r17,3		// r17 <- (sc_loadrs >> 19)
+	;;
+	loadrs			// restore dirty partition
+	extr.u r14=r14,3,6	// r14 <- rse_slot_num(bspstore1)
+	;;
+	add r14=r14,r17		// r14 <- rse_slot_num(bspstore1) + (sc_loadrs >> 19)
+	;;
+	shr.u r14=r14,6		// r14 <- (rse_slot_num(bspstore1) + (sc_loadrs >> 19))/0x40
+	;;
+	sub r14=r14,r17		// r14 <- -rse_num_regs(bspstore1, bsp1)
+	movl r17=0x8208208208208209
+	;;
+	add r18=r18,r14		// r18 (delta) <- rse_slot_num(bsp0) - rse_num_regs(bspstore1,bsp1)
+	setf.sig f7=r17
+	cmp.lt p7,p0=r14,r0	// p7 <- (r14 < 0)?
+	;;
+(p7)	adds r18=-62,r18	// delta -= 62
+	;;
+	setf.sig f6=r18
+	;;
+	xmpy.h f6=f6,f7
+	;;
+	getf.sig r17=f6
+	;;
+	add r17=r17,r18
+	shr r18=r18,63
+	;;
+	shr r17=r17,5
+	;;
+	sub r17=r17,r18		// r17 = delta/63
+	;;
+	add r17=r14,r17		// r17 <- delta/63 - rse_num_regs(bspstore1, bsp1)
+	;;
+	shladd r15=r17,3,r15	// r15 <- bsp0 + 8*(delta/63 - rse_num_regs(bspstore1, bsp1))
+	;;
+	mov ar.bspstore=r15			// switch back to old register backing store area
+	;;
+	mov ar.rnat=r16				// restore RNaT
+	mov ar.rsc=0xf				// (will be restored later on from sc_ar_rsc)
+	// invala not necessary as that will happen when returning to user-mode
+	br.cond.sptk back_from_restore_rbs
+END(__kernel_sigtramp)
--- a/arch/ia64/kernel/gate.lds.S
+++ b/arch/ia64/kernel/gate.lds.S
@@ -0,0 +1,95 @@
+/*
+ * Linker script for gate DSO.  The gate pages are an ELF shared object prelinked to its
+ * virtual address, with only one read-only segment and one execute-only segment (both fit
+ * in one page).  This script controls its layout.
+ */
+
+#include <linux/config.h>
+
+#include <asm/system.h>
+
+SECTIONS
+{
+  . = GATE_ADDR + SIZEOF_HEADERS;
+
+  .hash				: { *(.hash) }				:readable
+  .dynsym			: { *(.dynsym) }
+  .dynstr			: { *(.dynstr) }
+  .gnu.version			: { *(.gnu.version) }
+  .gnu.version_d		: { *(.gnu.version_d) }
+  .gnu.version_r		: { *(.gnu.version_r) }
+  .dynamic			: { *(.dynamic) }			:readable :dynamic
+
+  /*
+   * This linker script is used both with -r and with -shared.  For the layouts to match,
+   * we need to skip more than enough space for the dynamic symbol table et al.  If this
+   * amount is insufficient, ld -shared will barf.  Just increase it here.
+   */
+  . = GATE_ADDR + 0x500;
+
+  .data.patch			: {
+				    __start_gate_mckinley_e9_patchlist = .;
+				    *(.data.patch.mckinley_e9)
+				    __end_gate_mckinley_e9_patchlist = .;
+
+				    __start_gate_vtop_patchlist = .;
+				    *(.data.patch.vtop)
+				    __end_gate_vtop_patchlist = .;
+
+				    __start_gate_fsyscall_patchlist = .;
+				    *(.data.patch.fsyscall_table)
+				    __end_gate_fsyscall_patchlist = .;
+
+				    __start_gate_brl_fsys_bubble_down_patchlist = .;
+				    *(.data.patch.brl_fsys_bubble_down)
+				    __end_gate_brl_fsys_bubble_down_patchlist = .;
+  }									:readable
+  .IA_64.unwind_info		: { *(.IA_64.unwind_info*) }
+  .IA_64.unwind			: { *(.IA_64.unwind*) }			:readable :unwind
+#ifdef HAVE_BUGGY_SEGREL
+  .text (GATE_ADDR + PAGE_SIZE)	: { *(.text) *(.text.*) }		:readable
+#else
+  . = ALIGN (PERCPU_PAGE_SIZE) + (. & (PERCPU_PAGE_SIZE - 1));
+  .text				: { *(.text) *(.text.*) }		:epc
+#endif
+
+  /DISCARD/			: {
+  	*(.got.plt) *(.got)
+	*(.data .data.* .gnu.linkonce.d.*)
+	*(.dynbss)
+	*(.bss .bss.* .gnu.linkonce.b.*)
+	*(__ex_table)
+  }
+}
+
+/*
+ * We must supply the ELF program headers explicitly to get just one
+ * PT_LOAD segment, and set the flags explicitly to make segments read-only.
+ */
+PHDRS
+{
+  readable  PT_LOAD	FILEHDR	PHDRS	FLAGS(4);	/* PF_R */
+#ifndef HAVE_BUGGY_SEGREL
+  epc	    PT_LOAD	FILEHDR PHDRS	FLAGS(1);	/* PF_X */
+#endif
+  dynamic   PT_DYNAMIC			FLAGS(4);	/* PF_R */
+  unwind    0x70000001; /* PT_IA_64_UNWIND, but ld doesn't match the name */
+}
+
+/*
+ * This controls what symbols we export from the DSO.
+ */
+VERSION
+{
+  LINUX_2.5 {
+    global:
+	__kernel_syscall_via_break;
+	__kernel_syscall_via_epc;
+	__kernel_sigtramp;
+
+    local: *;
+  };
+}
+
+/* The ELF entry point can be used to set the AT_SYSINFO value.  */
+ENTRY(__kernel_syscall_via_epc)
--- a/arch/ia64/kernel/head.S
+++ b/arch/ia64/kernel/head.S
@@ -0,0 +1,996 @@
+/*
+ * Here is where the ball gets rolling as far as the kernel is concerned.
+ * When control is transferred to _start, the bootload has already
+ * loaded us to the correct address.  All that's left to do here is
+ * to set up the kernel's global pointer and jump to the kernel
+ * entry point.
+ *
+ * Copyright (C) 1998-2001, 2003, 2005 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 1999 Intel Corp.
+ * Copyright (C) 1999 Asit Mallick <Asit.K.Mallick@intel.com>
+ * Copyright (C) 1999 Don Dugger <Don.Dugger@intel.com>
+ * Copyright (C) 2002 Fenghua Yu <fenghua.yu@intel.com>
+ *   -Optimize __ia64_save_fpu() and __ia64_load_fpu() for Itanium 2.
+ */
+
+#include <linux/config.h>
+
+#include <asm/asmmacro.h>
+#include <asm/fpu.h>
+#include <asm/kregs.h>
+#include <asm/mmu_context.h>
+#include <asm/offsets.h>
+#include <asm/pal.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+
+	.section __special_page_section,"ax"
+
+	.global empty_zero_page
+empty_zero_page:
+	.skip PAGE_SIZE
+
+	.global swapper_pg_dir
+swapper_pg_dir:
+	.skip PAGE_SIZE
+
+	.rodata
+halt_msg:
+	stringz "Halting kernel\n"
+
+	.text
+
+	.global start_ap
+
+	/*
+	 * Start the kernel.  When the bootloader passes control to _start(), r28
+	 * points to the address of the boot parameter area.  Execution reaches
+	 * here in physical mode.
+	 */
+GLOBAL_ENTRY(_start)
+start_ap:
+	.prologue
+	.save rp, r0		// terminate unwind chain with a NULL rp
+	.body
+
+	rsm psr.i | psr.ic
+	;;
+	srlz.i
+	;;
+	/*
+	 * Initialize kernel region registers:
+	 *	rr[0]: VHPT enabled, page size = PAGE_SHIFT
+	 *	rr[1]: VHPT enabled, page size = PAGE_SHIFT
+	 *	rr[2]: VHPT enabled, page size = PAGE_SHIFT
+	 *	rr[3]: VHPT enabled, page size = PAGE_SHIFT
+	 *	rr[4]: VHPT enabled, page size = PAGE_SHIFT
+	 *	rr[5]: VHPT enabled, page size = PAGE_SHIFT
+	 *	rr[6]: VHPT disabled, page size = IA64_GRANULE_SHIFT
+	 *	rr[7]: VHPT disabled, page size = IA64_GRANULE_SHIFT
+	 * We initialize all of them to prevent inadvertently assuming
+	 * something about the state of address translation early in boot.
+	 */
+	mov r6=((ia64_rid(IA64_REGION_ID_KERNEL, (0<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
+	movl r7=(0<<61)
+	mov r8=((ia64_rid(IA64_REGION_ID_KERNEL, (1<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
+	movl r9=(1<<61)
+	mov r10=((ia64_rid(IA64_REGION_ID_KERNEL, (2<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
+	movl r11=(2<<61)
+	mov r12=((ia64_rid(IA64_REGION_ID_KERNEL, (3<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
+	movl r13=(3<<61)
+	mov r14=((ia64_rid(IA64_REGION_ID_KERNEL, (4<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
+	movl r15=(4<<61)
+	mov r16=((ia64_rid(IA64_REGION_ID_KERNEL, (5<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
+	movl r17=(5<<61)
+	mov r18=((ia64_rid(IA64_REGION_ID_KERNEL, (6<<61)) << 8) | (IA64_GRANULE_SHIFT << 2))
+	movl r19=(6<<61)
+	mov r20=((ia64_rid(IA64_REGION_ID_KERNEL, (7<<61)) << 8) | (IA64_GRANULE_SHIFT << 2))
+	movl r21=(7<<61)
+	;;
+	mov rr[r7]=r6
+	mov rr[r9]=r8
+	mov rr[r11]=r10
+	mov rr[r13]=r12
+	mov rr[r15]=r14
+	mov rr[r17]=r16
+	mov rr[r19]=r18
+	mov rr[r21]=r20
+	;;
+	/*
+	 * Now pin mappings into the TLB for kernel text and data
+	 */
+	mov r18=KERNEL_TR_PAGE_SHIFT<<2
+	movl r17=KERNEL_START
+	;;
+	mov cr.itir=r18
+	mov cr.ifa=r17
+	mov r16=IA64_TR_KERNEL
+	mov r3=ip
+	movl r18=PAGE_KERNEL
+	;;
+	dep r2=0,r3,0,KERNEL_TR_PAGE_SHIFT
+	;;
+	or r18=r2,r18
+	;;
+	srlz.i
+	;;
+	itr.i itr[r16]=r18
+	;;
+	itr.d dtr[r16]=r18
+	;;
+	srlz.i
+
+	/*
+	 * Switch into virtual mode:
+	 */
+	movl r16=(IA64_PSR_IT|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_DFH|IA64_PSR_BN \
+		  |IA64_PSR_DI)
+	;;
+	mov cr.ipsr=r16
+	movl r17=1f
+	;;
+	mov cr.iip=r17
+	mov cr.ifs=r0
+	;;
+	rfi
+	;;
+1:	// now we are in virtual mode
+
+	// set IVT entry point---can't access I/O ports without it
+	movl r3=ia64_ivt
+	;;
+	mov cr.iva=r3
+	movl r2=FPSR_DEFAULT
+	;;
+	srlz.i
+	movl gp=__gp
+
+	mov ar.fpsr=r2
+	;;
+
+#define isAP	p2	// are we an Application Processor?
+#define isBP	p3	// are we the Bootstrap Processor?
+
+#ifdef CONFIG_SMP
+	/*
+	 * Find the init_task for the currently booting CPU.  At poweron, and in
+	 * UP mode, task_for_booting_cpu is NULL.
+	 */
+	movl r3=task_for_booting_cpu
+ 	;;
+	ld8 r3=[r3]
+	movl r2=init_task
+	;;
+	cmp.eq isBP,isAP=r3,r0
+	;;
+(isAP)	mov r2=r3
+#else
+	movl r2=init_task
+	cmp.eq isBP,isAP=r0,r0
+#endif
+	;;
+	tpa r3=r2		// r3 == phys addr of task struct
+	mov r16=-1
+(isBP)	br.cond.dpnt .load_current // BP stack is on region 5 --- no need to map it
+
+	// load mapping for stack (virtaddr in r2, physaddr in r3)
+	rsm psr.ic
+	movl r17=PAGE_KERNEL
+	;;
+	srlz.d
+	dep r18=0,r3,0,12
+	;;
+	or r18=r17,r18
+	dep r2=-1,r3,61,3	// IMVA of task
+	;;
+	mov r17=rr[r2]
+	shr.u r16=r3,IA64_GRANULE_SHIFT
+	;;
+	dep r17=0,r17,8,24
+	;;
+	mov cr.itir=r17
+	mov cr.ifa=r2
+
+	mov r19=IA64_TR_CURRENT_STACK
+	;;
+	itr.d dtr[r19]=r18
+	;;
+	ssm psr.ic
+	srlz.d
+  	;;
+
+.load_current:
+	// load the "current" pointer (r13) and ar.k6 with the current task
+	mov IA64_KR(CURRENT)=r2		// virtual address
+	mov IA64_KR(CURRENT_STACK)=r16
+	mov r13=r2
+	/*
+	 * Reserve space at the top of the stack for "struct pt_regs".  Kernel threads
+	 * don't store interesting values in that structure, but the space still needs
+	 * to be there because time-critical stuff such as the context switching can
+	 * be implemented more efficiently (for example, __switch_to()
+	 * always sets the psr.dfh bit of the task it is switching to).
+	 */
+	addl r12=IA64_STK_OFFSET-IA64_PT_REGS_SIZE-16,r2
+	addl r2=IA64_RBS_OFFSET,r2	// initialize the RSE
+	mov ar.rsc=0		// place RSE in enforced lazy mode
+	;;
+	loadrs			// clear the dirty partition
+	;;
+	mov ar.bspstore=r2	// establish the new RSE stack
+	;;
+	mov ar.rsc=0x3		// place RSE in eager mode
+
+(isBP)	dep r28=-1,r28,61,3	// make address virtual
+(isBP)	movl r2=ia64_boot_param
+	;;
+(isBP)	st8 [r2]=r28		// save the address of the boot param area passed by the bootloader
+
+#ifdef CONFIG_SMP
+(isAP)	br.call.sptk.many rp=start_secondary
+.ret0:
+(isAP)	br.cond.sptk self
+#endif
+
+	// This is executed by the bootstrap processor (bsp) only:
+
+#ifdef CONFIG_IA64_FW_EMU
+	// initialize PAL & SAL emulator:
+	br.call.sptk.many rp=sys_fw_init
+.ret1:
+#endif
+	br.call.sptk.many rp=start_kernel
+.ret2:	addl r3=@ltoff(halt_msg),gp
+	;;
+	alloc r2=ar.pfs,8,0,2,0
+	;;
+	ld8 out0=[r3]
+	br.call.sptk.many b0=console_print
+
+self:	hint @pause
+	br.sptk.many self		// endless loop
+END(_start)
+
+GLOBAL_ENTRY(ia64_save_debug_regs)
+	alloc r16=ar.pfs,1,0,0,0
+	mov r20=ar.lc			// preserve ar.lc
+	mov ar.lc=IA64_NUM_DBG_REGS-1
+	mov r18=0
+	add r19=IA64_NUM_DBG_REGS*8,in0
+	;;
+1:	mov r16=dbr[r18]
+#ifdef CONFIG_ITANIUM
+	;;
+	srlz.d
+#endif
+	mov r17=ibr[r18]
+	add r18=1,r18
+	;;
+	st8.nta [in0]=r16,8
+	st8.nta [r19]=r17,8
+	br.cloop.sptk.many 1b
+	;;
+	mov ar.lc=r20			// restore ar.lc
+	br.ret.sptk.many rp
+END(ia64_save_debug_regs)
+
+GLOBAL_ENTRY(ia64_load_debug_regs)
+	alloc r16=ar.pfs,1,0,0,0
+	lfetch.nta [in0]
+	mov r20=ar.lc			// preserve ar.lc
+	add r19=IA64_NUM_DBG_REGS*8,in0
+	mov ar.lc=IA64_NUM_DBG_REGS-1
+	mov r18=-1
+	;;
+1:	ld8.nta r16=[in0],8
+	ld8.nta r17=[r19],8
+	add r18=1,r18
+	;;
+	mov dbr[r18]=r16
+#ifdef CONFIG_ITANIUM
+	;;
+	srlz.d				// Errata 132 (NoFix status)
+#endif
+	mov ibr[r18]=r17
+	br.cloop.sptk.many 1b
+	;;
+	mov ar.lc=r20			// restore ar.lc
+	br.ret.sptk.many rp
+END(ia64_load_debug_regs)
+
+GLOBAL_ENTRY(__ia64_save_fpu)
+	alloc r2=ar.pfs,1,4,0,0
+	adds loc0=96*16-16,in0
+	adds loc1=96*16-16-128,in0
+	;;
+	stf.spill.nta [loc0]=f127,-256
+	stf.spill.nta [loc1]=f119,-256
+	;;
+	stf.spill.nta [loc0]=f111,-256
+	stf.spill.nta [loc1]=f103,-256
+	;;
+	stf.spill.nta [loc0]=f95,-256
+	stf.spill.nta [loc1]=f87,-256
+	;;
+	stf.spill.nta [loc0]=f79,-256
+	stf.spill.nta [loc1]=f71,-256
+	;;
+	stf.spill.nta [loc0]=f63,-256
+	stf.spill.nta [loc1]=f55,-256
+	adds loc2=96*16-32,in0
+	;;
+	stf.spill.nta [loc0]=f47,-256
+	stf.spill.nta [loc1]=f39,-256
+	adds loc3=96*16-32-128,in0
+	;;
+	stf.spill.nta [loc2]=f126,-256
+	stf.spill.nta [loc3]=f118,-256
+	;;
+	stf.spill.nta [loc2]=f110,-256
+	stf.spill.nta [loc3]=f102,-256
+	;;
+	stf.spill.nta [loc2]=f94,-256
+	stf.spill.nta [loc3]=f86,-256
+	;;
+	stf.spill.nta [loc2]=f78,-256
+	stf.spill.nta [loc3]=f70,-256
+	;;
+	stf.spill.nta [loc2]=f62,-256
+	stf.spill.nta [loc3]=f54,-256
+	adds loc0=96*16-48,in0
+	;;
+	stf.spill.nta [loc2]=f46,-256
+	stf.spill.nta [loc3]=f38,-256
+	adds loc1=96*16-48-128,in0
+	;;
+	stf.spill.nta [loc0]=f125,-256
+	stf.spill.nta [loc1]=f117,-256
+	;;
+	stf.spill.nta [loc0]=f109,-256
+	stf.spill.nta [loc1]=f101,-256
+	;;
+	stf.spill.nta [loc0]=f93,-256
+	stf.spill.nta [loc1]=f85,-256
+	;;
+	stf.spill.nta [loc0]=f77,-256
+	stf.spill.nta [loc1]=f69,-256
+	;;
+	stf.spill.nta [loc0]=f61,-256
+	stf.spill.nta [loc1]=f53,-256
+	adds loc2=96*16-64,in0
+	;;
+	stf.spill.nta [loc0]=f45,-256
+	stf.spill.nta [loc1]=f37,-256
+	adds loc3=96*16-64-128,in0
+	;;
+	stf.spill.nta [loc2]=f124,-256
+	stf.spill.nta [loc3]=f116,-256
+	;;
+	stf.spill.nta [loc2]=f108,-256
+	stf.spill.nta [loc3]=f100,-256
+	;;
+	stf.spill.nta [loc2]=f92,-256
+	stf.spill.nta [loc3]=f84,-256
+	;;
+	stf.spill.nta [loc2]=f76,-256
+	stf.spill.nta [loc3]=f68,-256
+	;;
+	stf.spill.nta [loc2]=f60,-256
+	stf.spill.nta [loc3]=f52,-256
+	adds loc0=96*16-80,in0
+	;;
+	stf.spill.nta [loc2]=f44,-256
+	stf.spill.nta [loc3]=f36,-256
+	adds loc1=96*16-80-128,in0
+	;;
+	stf.spill.nta [loc0]=f123,-256
+	stf.spill.nta [loc1]=f115,-256
+	;;
+	stf.spill.nta [loc0]=f107,-256
+	stf.spill.nta [loc1]=f99,-256
+	;;
+	stf.spill.nta [loc0]=f91,-256
+	stf.spill.nta [loc1]=f83,-256
+	;;
+	stf.spill.nta [loc0]=f75,-256
+	stf.spill.nta [loc1]=f67,-256
+	;;
+	stf.spill.nta [loc0]=f59,-256
+	stf.spill.nta [loc1]=f51,-256
+	adds loc2=96*16-96,in0
+	;;
+	stf.spill.nta [loc0]=f43,-256
+	stf.spill.nta [loc1]=f35,-256
+	adds loc3=96*16-96-128,in0
+	;;
+	stf.spill.nta [loc2]=f122,-256
+	stf.spill.nta [loc3]=f114,-256
+	;;
+	stf.spill.nta [loc2]=f106,-256
+	stf.spill.nta [loc3]=f98,-256
+	;;
+	stf.spill.nta [loc2]=f90,-256
+	stf.spill.nta [loc3]=f82,-256
+	;;
+	stf.spill.nta [loc2]=f74,-256
+	stf.spill.nta [loc3]=f66,-256
+	;;
+	stf.spill.nta [loc2]=f58,-256
+	stf.spill.nta [loc3]=f50,-256
+	adds loc0=96*16-112,in0
+	;;
+	stf.spill.nta [loc2]=f42,-256
+	stf.spill.nta [loc3]=f34,-256
+	adds loc1=96*16-112-128,in0
+	;;
+	stf.spill.nta [loc0]=f121,-256
+	stf.spill.nta [loc1]=f113,-256
+	;;
+	stf.spill.nta [loc0]=f105,-256
+	stf.spill.nta [loc1]=f97,-256
+	;;
+	stf.spill.nta [loc0]=f89,-256
+	stf.spill.nta [loc1]=f81,-256
+	;;
+	stf.spill.nta [loc0]=f73,-256
+	stf.spill.nta [loc1]=f65,-256
+	;;
+	stf.spill.nta [loc0]=f57,-256
+	stf.spill.nta [loc1]=f49,-256
+	adds loc2=96*16-128,in0
+	;;
+	stf.spill.nta [loc0]=f41,-256
+	stf.spill.nta [loc1]=f33,-256
+	adds loc3=96*16-128-128,in0
+	;;
+	stf.spill.nta [loc2]=f120,-256
+	stf.spill.nta [loc3]=f112,-256
+	;;
+	stf.spill.nta [loc2]=f104,-256
+	stf.spill.nta [loc3]=f96,-256
+	;;
+	stf.spill.nta [loc2]=f88,-256
+	stf.spill.nta [loc3]=f80,-256
+	;;
+	stf.spill.nta [loc2]=f72,-256
+	stf.spill.nta [loc3]=f64,-256
+	;;
+	stf.spill.nta [loc2]=f56,-256
+	stf.spill.nta [loc3]=f48,-256
+	;;
+	stf.spill.nta [loc2]=f40
+	stf.spill.nta [loc3]=f32
+	br.ret.sptk.many rp
+END(__ia64_save_fpu)
+
+GLOBAL_ENTRY(__ia64_load_fpu)
+	alloc r2=ar.pfs,1,2,0,0
+	adds r3=128,in0
+	adds r14=256,in0
+	adds r15=384,in0
+	mov loc0=512
+	mov loc1=-1024+16
+	;;
+	ldf.fill.nta f32=[in0],loc0
+	ldf.fill.nta f40=[ r3],loc0
+	ldf.fill.nta f48=[r14],loc0
+	ldf.fill.nta f56=[r15],loc0
+	;;
+	ldf.fill.nta f64=[in0],loc0
+	ldf.fill.nta f72=[ r3],loc0
+	ldf.fill.nta f80=[r14],loc0
+	ldf.fill.nta f88=[r15],loc0
+	;;
+	ldf.fill.nta f96=[in0],loc1
+	ldf.fill.nta f104=[ r3],loc1
+	ldf.fill.nta f112=[r14],loc1
+	ldf.fill.nta f120=[r15],loc1
+	;;
+	ldf.fill.nta f33=[in0],loc0
+	ldf.fill.nta f41=[ r3],loc0
+	ldf.fill.nta f49=[r14],loc0
+	ldf.fill.nta f57=[r15],loc0
+	;;
+	ldf.fill.nta f65=[in0],loc0
+	ldf.fill.nta f73=[ r3],loc0
+	ldf.fill.nta f81=[r14],loc0
+	ldf.fill.nta f89=[r15],loc0
+	;;
+	ldf.fill.nta f97=[in0],loc1
+	ldf.fill.nta f105=[ r3],loc1
+	ldf.fill.nta f113=[r14],loc1
+	ldf.fill.nta f121=[r15],loc1
+	;;
+	ldf.fill.nta f34=[in0],loc0
+	ldf.fill.nta f42=[ r3],loc0
+	ldf.fill.nta f50=[r14],loc0
+	ldf.fill.nta f58=[r15],loc0
+	;;
+	ldf.fill.nta f66=[in0],loc0
+	ldf.fill.nta f74=[ r3],loc0
+	ldf.fill.nta f82=[r14],loc0
+	ldf.fill.nta f90=[r15],loc0
+	;;
+	ldf.fill.nta f98=[in0],loc1
+	ldf.fill.nta f106=[ r3],loc1
+	ldf.fill.nta f114=[r14],loc1
+	ldf.fill.nta f122=[r15],loc1
+	;;
+	ldf.fill.nta f35=[in0],loc0
+	ldf.fill.nta f43=[ r3],loc0
+	ldf.fill.nta f51=[r14],loc0
+	ldf.fill.nta f59=[r15],loc0
+	;;
+	ldf.fill.nta f67=[in0],loc0
+	ldf.fill.nta f75=[ r3],loc0
+	ldf.fill.nta f83=[r14],loc0
+	ldf.fill.nta f91=[r15],loc0
+	;;
+	ldf.fill.nta f99=[in0],loc1
+	ldf.fill.nta f107=[ r3],loc1
+	ldf.fill.nta f115=[r14],loc1
+	ldf.fill.nta f123=[r15],loc1
+	;;
+	ldf.fill.nta f36=[in0],loc0
+	ldf.fill.nta f44=[ r3],loc0
+	ldf.fill.nta f52=[r14],loc0
+	ldf.fill.nta f60=[r15],loc0
+	;;
+	ldf.fill.nta f68=[in0],loc0
+	ldf.fill.nta f76=[ r3],loc0
+	ldf.fill.nta f84=[r14],loc0
+	ldf.fill.nta f92=[r15],loc0
+	;;
+	ldf.fill.nta f100=[in0],loc1
+	ldf.fill.nta f108=[ r3],loc1
+	ldf.fill.nta f116=[r14],loc1
+	ldf.fill.nta f124=[r15],loc1
+	;;
+	ldf.fill.nta f37=[in0],loc0
+	ldf.fill.nta f45=[ r3],loc0
+	ldf.fill.nta f53=[r14],loc0
+	ldf.fill.nta f61=[r15],loc0
+	;;
+	ldf.fill.nta f69=[in0],loc0
+	ldf.fill.nta f77=[ r3],loc0
+	ldf.fill.nta f85=[r14],loc0
+	ldf.fill.nta f93=[r15],loc0
+	;;
+	ldf.fill.nta f101=[in0],loc1
+	ldf.fill.nta f109=[ r3],loc1
+	ldf.fill.nta f117=[r14],loc1
+	ldf.fill.nta f125=[r15],loc1
+	;;
+	ldf.fill.nta f38 =[in0],loc0
+	ldf.fill.nta f46 =[ r3],loc0
+	ldf.fill.nta f54 =[r14],loc0
+	ldf.fill.nta f62 =[r15],loc0
+	;;
+	ldf.fill.nta f70 =[in0],loc0
+	ldf.fill.nta f78 =[ r3],loc0
+	ldf.fill.nta f86 =[r14],loc0
+	ldf.fill.nta f94 =[r15],loc0
+	;;
+	ldf.fill.nta f102=[in0],loc1
+	ldf.fill.nta f110=[ r3],loc1
+	ldf.fill.nta f118=[r14],loc1
+	ldf.fill.nta f126=[r15],loc1
+	;;
+	ldf.fill.nta f39 =[in0],loc0
+	ldf.fill.nta f47 =[ r3],loc0
+	ldf.fill.nta f55 =[r14],loc0
+	ldf.fill.nta f63 =[r15],loc0
+	;;
+	ldf.fill.nta f71 =[in0],loc0
+	ldf.fill.nta f79 =[ r3],loc0
+	ldf.fill.nta f87 =[r14],loc0
+	ldf.fill.nta f95 =[r15],loc0
+	;;
+	ldf.fill.nta f103=[in0]
+	ldf.fill.nta f111=[ r3]
+	ldf.fill.nta f119=[r14]
+	ldf.fill.nta f127=[r15]
+	br.ret.sptk.many rp
+END(__ia64_load_fpu)
+
+GLOBAL_ENTRY(__ia64_init_fpu)
+	stf.spill [sp]=f0		// M3
+	mov	 f32=f0			// F
+	nop.b	 0
+
+	ldfps	 f33,f34=[sp]		// M0
+	ldfps	 f35,f36=[sp]		// M1
+	mov      f37=f0			// F
+	;;
+
+	setf.s	 f38=r0			// M2
+	setf.s	 f39=r0			// M3
+	mov      f40=f0			// F
+
+	ldfps	 f41,f42=[sp]		// M0
+	ldfps	 f43,f44=[sp]		// M1
+	mov      f45=f0			// F
+
+	setf.s	 f46=r0			// M2
+	setf.s	 f47=r0			// M3
+	mov      f48=f0			// F
+
+	ldfps	 f49,f50=[sp]		// M0
+	ldfps	 f51,f52=[sp]		// M1
+	mov      f53=f0			// F
+
+	setf.s	 f54=r0			// M2
+	setf.s	 f55=r0			// M3
+	mov      f56=f0			// F
+
+	ldfps	 f57,f58=[sp]		// M0
+	ldfps	 f59,f60=[sp]		// M1
+	mov      f61=f0			// F
+
+	setf.s	 f62=r0			// M2
+	setf.s	 f63=r0			// M3
+	mov      f64=f0			// F
+
+	ldfps	 f65,f66=[sp]		// M0
+	ldfps	 f67,f68=[sp]		// M1
+	mov      f69=f0			// F
+
+	setf.s	 f70=r0			// M2
+	setf.s	 f71=r0			// M3
+	mov      f72=f0			// F
+
+	ldfps	 f73,f74=[sp]		// M0
+	ldfps	 f75,f76=[sp]		// M1
+	mov      f77=f0			// F
+
+	setf.s	 f78=r0			// M2
+	setf.s	 f79=r0			// M3
+	mov      f80=f0			// F
+
+	ldfps	 f81,f82=[sp]		// M0
+	ldfps	 f83,f84=[sp]		// M1
+	mov      f85=f0			// F
+
+	setf.s	 f86=r0			// M2
+	setf.s	 f87=r0			// M3
+	mov      f88=f0			// F
+
+	/*
+	 * When the instructions are cached, it would be faster to initialize
+	 * the remaining registers with simply mov instructions (F-unit).
+	 * This gets the time down to ~29 cycles.  However, this would use up
+	 * 33 bundles, whereas continuing with the above pattern yields
+	 * 10 bundles and ~30 cycles.
+	 */
+
+	ldfps	 f89,f90=[sp]		// M0
+	ldfps	 f91,f92=[sp]		// M1
+	mov      f93=f0			// F
+
+	setf.s	 f94=r0			// M2
+	setf.s	 f95=r0			// M3
+	mov      f96=f0			// F
+
+	ldfps	 f97,f98=[sp]		// M0
+	ldfps	 f99,f100=[sp]		// M1
+	mov      f101=f0		// F
+
+	setf.s	 f102=r0		// M2
+	setf.s	 f103=r0		// M3
+	mov      f104=f0		// F
+
+	ldfps	 f105,f106=[sp]		// M0
+	ldfps	 f107,f108=[sp]		// M1
+	mov      f109=f0		// F
+
+	setf.s	 f110=r0		// M2
+	setf.s	 f111=r0		// M3
+	mov      f112=f0		// F
+
+	ldfps	 f113,f114=[sp]		// M0
+	ldfps	 f115,f116=[sp]		// M1
+	mov      f117=f0		// F
+
+	setf.s	 f118=r0		// M2
+	setf.s	 f119=r0		// M3
+	mov      f120=f0		// F
+
+	ldfps	 f121,f122=[sp]		// M0
+	ldfps	 f123,f124=[sp]		// M1
+	mov      f125=f0		// F
+
+	setf.s	 f126=r0		// M2
+	setf.s	 f127=r0		// M3
+	br.ret.sptk.many rp		// F
+END(__ia64_init_fpu)
+
+/*
+ * Switch execution mode from virtual to physical
+ *
+ * Inputs:
+ *	r16 = new psr to establish
+ * Output:
+ *	r19 = old virtual address of ar.bsp
+ *	r20 = old virtual address of sp
+ *
+ * Note: RSE must already be in enforced lazy mode
+ */
+GLOBAL_ENTRY(ia64_switch_mode_phys)
+ {
+	alloc r2=ar.pfs,0,0,0,0
+	rsm psr.i | psr.ic		// disable interrupts and interrupt collection
+	mov r15=ip
+ }
+	;;
+ {
+	flushrs				// must be first insn in group
+	srlz.i
+ }
+	;;
+	mov cr.ipsr=r16			// set new PSR
+	add r3=1f-ia64_switch_mode_phys,r15
+
+	mov r19=ar.bsp
+	mov r20=sp
+	mov r14=rp			// get return address into a general register
+	;;
+
+	// going to physical mode, use tpa to translate virt->phys
+	tpa r17=r19
+	tpa r3=r3
+	tpa sp=sp
+	tpa r14=r14
+	;;
+
+	mov r18=ar.rnat			// save ar.rnat
+	mov ar.bspstore=r17		// this steps on ar.rnat
+	mov cr.iip=r3
+	mov cr.ifs=r0
+	;;
+	mov ar.rnat=r18			// restore ar.rnat
+	rfi				// must be last insn in group
+	;;
+1:	mov rp=r14
+	br.ret.sptk.many rp
+END(ia64_switch_mode_phys)
+
+/*
+ * Switch execution mode from physical to virtual
+ *
+ * Inputs:
+ *	r16 = new psr to establish
+ *	r19 = new bspstore to establish
+ *	r20 = new sp to establish
+ *
+ * Note: RSE must already be in enforced lazy mode
+ */
+GLOBAL_ENTRY(ia64_switch_mode_virt)
+ {
+	alloc r2=ar.pfs,0,0,0,0
+	rsm psr.i | psr.ic		// disable interrupts and interrupt collection
+	mov r15=ip
+ }
+	;;
+ {
+	flushrs				// must be first insn in group
+	srlz.i
+ }
+	;;
+	mov cr.ipsr=r16			// set new PSR
+	add r3=1f-ia64_switch_mode_virt,r15
+
+	mov r14=rp			// get return address into a general register
+	;;
+
+	// going to virtual
+	//   - for code addresses, set upper bits of addr to KERNEL_START
+	//   - for stack addresses, copy from input argument
+	movl r18=KERNEL_START
+	dep r3=0,r3,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT
+	dep r14=0,r14,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT
+	mov sp=r20
+	;;
+	or r3=r3,r18
+	or r14=r14,r18
+	;;
+
+	mov r18=ar.rnat			// save ar.rnat
+	mov ar.bspstore=r19		// this steps on ar.rnat
+	mov cr.iip=r3
+	mov cr.ifs=r0
+	;;
+	mov ar.rnat=r18			// restore ar.rnat
+	rfi				// must be last insn in group
+	;;
+1:	mov rp=r14
+	br.ret.sptk.many rp
+END(ia64_switch_mode_virt)
+
+GLOBAL_ENTRY(ia64_delay_loop)
+	.prologue
+{	nop 0			// work around GAS unwind info generation bug...
+	.save ar.lc,r2
+	mov r2=ar.lc
+	.body
+	;;
+	mov ar.lc=r32
+}
+	;;
+	// force loop to be 32-byte aligned (GAS bug means we cannot use .align
+	// inside function body without corrupting unwind info).
+{	nop 0 }
+1:	br.cloop.sptk.few 1b
+	;;
+	mov ar.lc=r2
+	br.ret.sptk.many rp
+END(ia64_delay_loop)
+
+/*
+ * Return a CPU-local timestamp in nano-seconds.  This timestamp is
+ * NOT synchronized across CPUs its return value must never be
+ * compared against the values returned on another CPU.  The usage in
+ * kernel/sched.c ensures that.
+ *
+ * The return-value of sched_clock() is NOT supposed to wrap-around.
+ * If it did, it would cause some scheduling hiccups (at the worst).
+ * Fortunately, with a 64-bit cycle-counter ticking at 100GHz, even
+ * that would happen only once every 5+ years.
+ *
+ * The code below basically calculates:
+ *
+ *   (ia64_get_itc() * local_cpu_data->nsec_per_cyc) >> IA64_NSEC_PER_CYC_SHIFT
+ *
+ * except that the multiplication and the shift are done with 128-bit
+ * intermediate precision so that we can produce a full 64-bit result.
+ */
+GLOBAL_ENTRY(sched_clock)
+	addl r8=THIS_CPU(cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
+	mov.m r9=ar.itc		// fetch cycle-counter				(35 cyc)
+	;;
+	ldf8 f8=[r8]
+	;;
+	setf.sig f9=r9		// certain to stall, so issue it _after_ ldf8...
+	;;
+	xmpy.lu f10=f9,f8	// calculate low 64 bits of 128-bit product	(4 cyc)
+	xmpy.hu f11=f9,f8	// calculate high 64 bits of 128-bit product
+	;;
+	getf.sig r8=f10		//						(5 cyc)
+	getf.sig r9=f11
+	;;
+	shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT
+	br.ret.sptk.many rp
+END(sched_clock)
+
+GLOBAL_ENTRY(start_kernel_thread)
+	.prologue
+	.save rp, r0				// this is the end of the call-chain
+	.body
+	alloc r2 = ar.pfs, 0, 0, 2, 0
+	mov out0 = r9
+	mov out1 = r11;;
+	br.call.sptk.many rp = kernel_thread_helper;;
+	mov out0 = r8
+	br.call.sptk.many rp = sys_exit;;
+1:	br.sptk.few 1b				// not reached
+END(start_kernel_thread)
+
+#ifdef CONFIG_IA64_BRL_EMU
+
+/*
+ *  Assembly routines used by brl_emu.c to set preserved register state.
+ */
+
+#define SET_REG(reg)				\
+ GLOBAL_ENTRY(ia64_set_##reg);			\
+	alloc r16=ar.pfs,1,0,0,0;		\
+	mov reg=r32;				\
+	;;					\
+	br.ret.sptk.many rp;			\
+ END(ia64_set_##reg)
+
+SET_REG(b1);
+SET_REG(b2);
+SET_REG(b3);
+SET_REG(b4);
+SET_REG(b5);
+
+#endif /* CONFIG_IA64_BRL_EMU */
+
+#ifdef CONFIG_SMP
+	/*
+	 * This routine handles spinlock contention.  It uses a non-standard calling
+	 * convention to avoid converting leaf routines into interior routines.  Because
+	 * of this special convention, there are several restrictions:
+	 *
+	 * - do not use gp relative variables, this code is called from the kernel
+	 *   and from modules, r1 is undefined.
+	 * - do not use stacked registers, the caller owns them.
+	 * - do not use the scratch stack space, the caller owns it.
+	 * - do not use any registers other than the ones listed below
+	 *
+	 * Inputs:
+	 *   ar.pfs - saved CFM of caller
+	 *   ar.ccv - 0 (and available for use)
+	 *   r27    - flags from spin_lock_irqsave or 0.  Must be preserved.
+	 *   r28    - available for use.
+	 *   r29    - available for use.
+	 *   r30    - available for use.
+	 *   r31    - address of lock, available for use.
+	 *   b6     - return address
+	 *   p14    - available for use.
+	 *   p15    - used to track flag status.
+	 *
+	 * If you patch this code to use more registers, do not forget to update
+	 * the clobber lists for spin_lock() in include/asm-ia64/spinlock.h.
+	 */
+
+#if __GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
+
+GLOBAL_ENTRY(ia64_spinlock_contention_pre3_4)
+	.prologue
+	.save ar.pfs, r0	// this code effectively has a zero frame size
+	.save rp, r28
+	.body
+	nop 0
+	tbit.nz p15,p0=r27,IA64_PSR_I_BIT
+	.restore sp		// pop existing prologue after next insn
+	mov b6 = r28
+	.prologue
+	.save ar.pfs, r0
+	.altrp b6
+	.body
+	;;
+(p15)	ssm psr.i		// reenable interrupts if they were on
+				// DavidM says that srlz.d is slow and is not required in this case
+.wait:
+	// exponential backoff, kdb, lockmeter etc. go in here
+	hint @pause
+	ld4 r30=[r31]		// don't use ld4.bias; if it's contended, we won't write the word
+	nop 0
+	;;
+	cmp4.ne p14,p0=r30,r0
+(p14)	br.cond.sptk.few .wait
+(p15)	rsm psr.i		// disable interrupts if we reenabled them
+	br.cond.sptk.few b6	// lock is now free, try to acquire
+	.global ia64_spinlock_contention_pre3_4_end	// for kernprof
+ia64_spinlock_contention_pre3_4_end:
+END(ia64_spinlock_contention_pre3_4)
+
+#else
+
+GLOBAL_ENTRY(ia64_spinlock_contention)
+	.prologue
+	.altrp b6
+	.body
+	tbit.nz p15,p0=r27,IA64_PSR_I_BIT
+	;;
+.wait:
+(p15)	ssm psr.i		// reenable interrupts if they were on
+				// DavidM says that srlz.d is slow and is not required in this case
+.wait2:
+	// exponential backoff, kdb, lockmeter etc. go in here
+	hint @pause
+	ld4 r30=[r31]		// don't use ld4.bias; if it's contended, we won't write the word
+	;;
+	cmp4.ne p14,p0=r30,r0
+	mov r30 = 1
+(p14)	br.cond.sptk.few .wait2
+(p15)	rsm psr.i		// disable interrupts if we reenabled them
+	;;
+	cmpxchg4.acq r30=[r31], r30, ar.ccv
+	;;
+	cmp4.ne p14,p0=r0,r30
+(p14)	br.cond.sptk.few .wait
+
+	br.ret.sptk.many b6	// lock is now taken
+END(ia64_spinlock_contention)
+
+#endif
+
+#endif /* CONFIG_SMP */
--- a/arch/ia64/kernel/ia64_ksyms.c
+++ b/arch/ia64/kernel/ia64_ksyms.c
@@ -0,0 +1,127 @@
+/*
+ * Architecture-specific kernel symbols
+ *
+ * Don't put any exports here unless it's defined in an assembler file.
+ * All other exports should be put directly after the definition.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/string.h>
+EXPORT_SYMBOL(memset);
+EXPORT_SYMBOL(memchr);
+EXPORT_SYMBOL(memcmp);
+EXPORT_SYMBOL(memcpy);
+EXPORT_SYMBOL(memmove);
+EXPORT_SYMBOL(memscan);
+EXPORT_SYMBOL(strcat);
+EXPORT_SYMBOL(strchr);
+EXPORT_SYMBOL(strcmp);
+EXPORT_SYMBOL(strcpy);
+EXPORT_SYMBOL(strlen);
+EXPORT_SYMBOL(strncat);
+EXPORT_SYMBOL(strncmp);
+EXPORT_SYMBOL(strncpy);
+EXPORT_SYMBOL(strnlen);
+EXPORT_SYMBOL(strrchr);
+EXPORT_SYMBOL(strstr);
+EXPORT_SYMBOL(strpbrk);
+
+#include <asm/checksum.h>
+EXPORT_SYMBOL(ip_fast_csum);		/* hand-coded assembly */
+
+#include <asm/semaphore.h>
+EXPORT_SYMBOL(__down);
+EXPORT_SYMBOL(__down_interruptible);
+EXPORT_SYMBOL(__down_trylock);
+EXPORT_SYMBOL(__up);
+
+#include <asm/page.h>
+EXPORT_SYMBOL(clear_page);
+
+#ifdef CONFIG_VIRTUAL_MEM_MAP
+#include <linux/bootmem.h>
+EXPORT_SYMBOL(max_low_pfn);	/* defined by bootmem.c, but not exported by generic code */
+#endif
+
+#include <asm/processor.h>
+EXPORT_SYMBOL(per_cpu__cpu_info);
+#ifdef CONFIG_SMP
+EXPORT_SYMBOL(per_cpu__local_per_cpu_offset);
+#endif
+
+#include <asm/uaccess.h>
+EXPORT_SYMBOL(__copy_user);
+EXPORT_SYMBOL(__do_clear_user);
+EXPORT_SYMBOL(__strlen_user);
+EXPORT_SYMBOL(__strncpy_from_user);
+EXPORT_SYMBOL(__strnlen_user);
+
+#include <asm/unistd.h>
+EXPORT_SYMBOL(__ia64_syscall);
+
+/* from arch/ia64/lib */
+extern void __divsi3(void);
+extern void __udivsi3(void);
+extern void __modsi3(void);
+extern void __umodsi3(void);
+extern void __divdi3(void);
+extern void __udivdi3(void);
+extern void __moddi3(void);
+extern void __umoddi3(void);
+
+EXPORT_SYMBOL(__divsi3);
+EXPORT_SYMBOL(__udivsi3);
+EXPORT_SYMBOL(__modsi3);
+EXPORT_SYMBOL(__umodsi3);
+EXPORT_SYMBOL(__divdi3);
+EXPORT_SYMBOL(__udivdi3);
+EXPORT_SYMBOL(__moddi3);
+EXPORT_SYMBOL(__umoddi3);
+
+#if defined(CONFIG_MD_RAID5) || defined(CONFIG_MD_RAID5_MODULE)
+extern void xor_ia64_2(void);
+extern void xor_ia64_3(void);
+extern void xor_ia64_4(void);
+extern void xor_ia64_5(void);
+
+EXPORT_SYMBOL(xor_ia64_2);
+EXPORT_SYMBOL(xor_ia64_3);
+EXPORT_SYMBOL(xor_ia64_4);
+EXPORT_SYMBOL(xor_ia64_5);
+#endif
+
+#include <asm/pal.h>
+EXPORT_SYMBOL(ia64_pal_call_phys_stacked);
+EXPORT_SYMBOL(ia64_pal_call_phys_static);
+EXPORT_SYMBOL(ia64_pal_call_stacked);
+EXPORT_SYMBOL(ia64_pal_call_static);
+EXPORT_SYMBOL(ia64_load_scratch_fpregs);
+EXPORT_SYMBOL(ia64_save_scratch_fpregs);
+
+#include <asm/unwind.h>
+EXPORT_SYMBOL(unw_init_running);
+
+#ifdef ASM_SUPPORTED
+# ifdef CONFIG_SMP
+#  if __GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
+/*
+ * This is not a normal routine and we don't want a function descriptor for it, so we use
+ * a fake declaration here.
+ */
+extern char ia64_spinlock_contention_pre3_4;
+EXPORT_SYMBOL(ia64_spinlock_contention_pre3_4);
+#  else
+/*
+ * This is not a normal routine and we don't want a function descriptor for it, so we use
+ * a fake declaration here.
+ */
+extern char ia64_spinlock_contention;
+EXPORT_SYMBOL(ia64_spinlock_contention);
+#  endif
+# endif
+#endif
+
+extern char ia64_ivt[];
+EXPORT_SYMBOL(ia64_ivt);
--- a/arch/ia64/kernel/init_task.c
+++ b/arch/ia64/kernel/init_task.c
@@ -0,0 +1,46 @@
+/*
+ * This is where we statically allocate and initialize the initial
+ * task.
+ *
+ * Copyright (C) 1999, 2002-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/init_task.h>
+#include <linux/mqueue.h>
+
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+
+static struct fs_struct init_fs = INIT_FS;
+static struct files_struct init_files = INIT_FILES;
+static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
+static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
+struct mm_struct init_mm = INIT_MM(init_mm);
+
+EXPORT_SYMBOL(init_mm);
+
+/*
+ * Initial task structure.
+ *
+ * We need to make sure that this is properly aligned due to the way process stacks are
+ * handled. This is done by having a special ".data.init_task" section...
+ */
+#define init_thread_info	init_task_mem.s.thread_info
+
+union {
+	struct {
+		struct task_struct task;
+		struct thread_info thread_info;
+	} s;
+	unsigned long stack[KERNEL_STACK_SIZE/sizeof (unsigned long)];
+} init_task_mem asm ("init_task") __attribute__((section(".data.init_task"))) = {{
+	.task =		INIT_TASK(init_task_mem.s.task),
+	.thread_info =	INIT_THREAD_INFO(init_task_mem.s.task)
+}};
+
+EXPORT_SYMBOL(init_task);
--- a/arch/ia64/kernel/iosapic.c
+++ b/arch/ia64/kernel/iosapic.c
@@ -0,0 +1,827 @@
+/*
+ * I/O SAPIC support.
+ *
+ * Copyright (C) 1999 Intel Corp.
+ * Copyright (C) 1999 Asit Mallick <asit.k.mallick@intel.com>
+ * Copyright (C) 2000-2002 J.I. Lee <jung-ik.lee@intel.com>
+ * Copyright (C) 1999-2000, 2002-2003 Hewlett-Packard Co.
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999,2000 Walt Drummond <drummond@valinux.com>
+ *
+ * 00/04/19	D. Mosberger	Rewritten to mirror more closely the x86 I/O APIC code.
+ *				In particular, we now have separate handlers for edge
+ *				and level triggered interrupts.
+ * 00/10/27	Asit Mallick, Goutham Rao <goutham.rao@intel.com> IRQ vector allocation
+ *				PCI to vector mapping, shared PCI interrupts.
+ * 00/10/27	D. Mosberger	Document things a bit more to make them more understandable.
+ *				Clean up much of the old IOSAPIC cruft.
+ * 01/07/27	J.I. Lee	PCI irq routing, Platform/Legacy interrupts and fixes for
+ *				ACPI S5(SoftOff) support.
+ * 02/01/23	J.I. Lee	iosapic pgm fixes for PCI irq routing from _PRT
+ * 02/01/07     E. Focht        <efocht@ess.nec.de> Redirectable interrupt vectors in
+ *                              iosapic_set_affinity(), initializations for
+ *                              /proc/irq/#/smp_affinity
+ * 02/04/02	P. Diefenbaugh	Cleaned up ACPI PCI IRQ routing.
+ * 02/04/18	J.I. Lee	bug fix in iosapic_init_pci_irq
+ * 02/04/30	J.I. Lee	bug fix in find_iosapic to fix ACPI PCI IRQ to IOSAPIC mapping
+ *				error
+ * 02/07/29	T. Kochi	Allocate interrupt vectors dynamically
+ * 02/08/04	T. Kochi	Cleaned up terminology (irq, global system interrupt, vector, etc.)
+ * 02/09/20	D. Mosberger	Simplified by taking advantage of ACPI's pci_irq code.
+ * 03/02/19	B. Helgaas	Make pcat_compat system-wide, not per-IOSAPIC.
+ *				Remove iosapic_address & gsi_base from external interfaces.
+ *				Rationalize __init/__devinit attributes.
+ * 04/12/04 Ashok Raj	<ashok.raj@intel.com> Intel Corporation 2004
+ *				Updated to work with irq migration necessary for CPU Hotplug
+ */
+/*
+ * Here is what the interrupt logic between a PCI device and the kernel looks like:
+ *
+ * (1) A PCI device raises one of the four interrupt pins (INTA, INTB, INTC, INTD).  The
+ *     device is uniquely identified by its bus--, and slot-number (the function
+ *     number does not matter here because all functions share the same interrupt
+ *     lines).
+ *
+ * (2) The motherboard routes the interrupt line to a pin on a IOSAPIC controller.
+ *     Multiple interrupt lines may have to share the same IOSAPIC pin (if they're level
+ *     triggered and use the same polarity).  Each interrupt line has a unique Global
+ *     System Interrupt (GSI) number which can be calculated as the sum of the controller's
+ *     base GSI number and the IOSAPIC pin number to which the line connects.
+ *
+ * (3) The IOSAPIC uses an internal routing table entries (RTEs) to map the IOSAPIC pin
+ *     into the IA-64 interrupt vector.  This interrupt vector is then sent to the CPU.
+ *
+ * (4) The kernel recognizes an interrupt as an IRQ.  The IRQ interface is used as
+ *     architecture-independent interrupt handling mechanism in Linux.  As an
+ *     IRQ is a number, we have to have IA-64 interrupt vector number <-> IRQ number
+ *     mapping.  On smaller systems, we use one-to-one mapping between IA-64 vector and
+ *     IRQ.  A platform can implement platform_irq_to_vector(irq) and
+ *     platform_local_vector_to_irq(vector) APIs to differentiate the mapping.
+ *     Please see also include/asm-ia64/hw_irq.h for those APIs.
+ *
+ * To sum up, there are three levels of mappings involved:
+ *
+ *	PCI pin -> global system interrupt (GSI) -> IA-64 vector <-> IRQ
+ *
+ * Note: The term "IRQ" is loosely used everywhere in Linux kernel to describe interrupts.
+ * Now we use "IRQ" only for Linux IRQ's.  ISA IRQ (isa_irq) is the only exception in this
+ * source code.
+ */
+#include <linux/config.h>
+
+#include <linux/acpi.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/pci.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/string.h>
+
+#include <asm/delay.h>
+#include <asm/hw_irq.h>
+#include <asm/io.h>
+#include <asm/iosapic.h>
+#include <asm/machvec.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+
+
+#undef DEBUG_INTERRUPT_ROUTING
+
+#ifdef DEBUG_INTERRUPT_ROUTING
+#define DBG(fmt...)	printk(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+static DEFINE_SPINLOCK(iosapic_lock);
+
+/* These tables map IA-64 vectors to the IOSAPIC pin that generates this vector. */
+
+static struct iosapic_intr_info {
+	char __iomem	*addr;		/* base address of IOSAPIC */
+	u32		low32;		/* current value of low word of Redirection table entry */
+	unsigned int	gsi_base;	/* first GSI assigned to this IOSAPIC */
+	char		rte_index;	/* IOSAPIC RTE index (-1 => not an IOSAPIC interrupt) */
+	unsigned char	dmode	: 3;	/* delivery mode (see iosapic.h) */
+	unsigned char 	polarity: 1;	/* interrupt polarity (see iosapic.h) */
+	unsigned char	trigger	: 1;	/* trigger mode (see iosapic.h) */
+	int		refcnt;		/* reference counter */
+} iosapic_intr_info[IA64_NUM_VECTORS];
+
+static struct iosapic {
+	char __iomem	*addr;		/* base address of IOSAPIC */
+	unsigned int 	gsi_base;	/* first GSI assigned to this IOSAPIC */
+	unsigned short 	num_rte;	/* number of RTE in this IOSAPIC */
+#ifdef CONFIG_NUMA
+	unsigned short	node;		/* numa node association via pxm */
+#endif
+} iosapic_lists[NR_IOSAPICS];
+
+static int num_iosapic;
+
+static unsigned char pcat_compat __initdata;	/* 8259 compatibility flag */
+
+
+/*
+ * Find an IOSAPIC associated with a GSI
+ */
+static inline int
+find_iosapic (unsigned int gsi)
+{
+	int i;
+
+	for (i = 0; i < num_iosapic; i++) {
+		if ((unsigned) (gsi - iosapic_lists[i].gsi_base) < iosapic_lists[i].num_rte)
+			return i;
+	}
+
+	return -1;
+}
+
+static inline int
+_gsi_to_vector (unsigned int gsi)
+{
+	struct iosapic_intr_info *info;
+
+	for (info = iosapic_intr_info; info < iosapic_intr_info + IA64_NUM_VECTORS; ++info)
+		if (info->gsi_base + info->rte_index == gsi)
+			return info - iosapic_intr_info;
+	return -1;
+}
+
+/*
+ * Translate GSI number to the corresponding IA-64 interrupt vector.  If no
+ * entry exists, return -1.
+ */
+inline int
+gsi_to_vector (unsigned int gsi)
+{
+	return _gsi_to_vector(gsi);
+}
+
+int
+gsi_to_irq (unsigned int gsi)
+{
+	/*
+	 * XXX fix me: this assumes an identity mapping vetween IA-64 vector and Linux irq
+	 * numbers...
+	 */
+	return _gsi_to_vector(gsi);
+}
+
+static void
+set_rte (unsigned int vector, unsigned int dest, int mask)
+{
+	unsigned long pol, trigger, dmode;
+	u32 low32, high32;
+	char __iomem *addr;
+	int rte_index;
+	char redir;
+
+	DBG(KERN_DEBUG"IOSAPIC: routing vector %d to 0x%x\n", vector, dest);
+
+	rte_index = iosapic_intr_info[vector].rte_index;
+	if (rte_index < 0)
+		return;		/* not an IOSAPIC interrupt */
+
+	addr    = iosapic_intr_info[vector].addr;
+	pol     = iosapic_intr_info[vector].polarity;
+	trigger = iosapic_intr_info[vector].trigger;
+	dmode   = iosapic_intr_info[vector].dmode;
+	vector &= (~IA64_IRQ_REDIRECTED);
+
+	redir = (dmode == IOSAPIC_LOWEST_PRIORITY) ? 1 : 0;
+
+#ifdef CONFIG_SMP
+	{
+		unsigned int irq;
+
+		for (irq = 0; irq < NR_IRQS; ++irq)
+			if (irq_to_vector(irq) == vector) {
+				set_irq_affinity_info(irq, (int)(dest & 0xffff), redir);
+				break;
+			}
+	}
+#endif
+
+	low32 = ((pol << IOSAPIC_POLARITY_SHIFT) |
+		 (trigger << IOSAPIC_TRIGGER_SHIFT) |
+		 (dmode << IOSAPIC_DELIVERY_SHIFT) |
+		 ((mask ? 1 : 0) << IOSAPIC_MASK_SHIFT) |
+		 vector);
+
+	/* dest contains both id and eid */
+	high32 = (dest << IOSAPIC_DEST_SHIFT);
+
+	iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index), high32);
+	iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
+	iosapic_intr_info[vector].low32 = low32;
+}
+
+static void
+nop (unsigned int vector)
+{
+	/* do nothing... */
+}
+
+static void
+mask_irq (unsigned int irq)
+{
+	unsigned long flags;
+	char __iomem *addr;
+	u32 low32;
+	int rte_index;
+	ia64_vector vec = irq_to_vector(irq);
+
+	addr = iosapic_intr_info[vec].addr;
+	rte_index = iosapic_intr_info[vec].rte_index;
+
+	if (rte_index < 0)
+		return;			/* not an IOSAPIC interrupt! */
+
+	spin_lock_irqsave(&iosapic_lock, flags);
+	{
+		/* set only the mask bit */
+		low32 = iosapic_intr_info[vec].low32 |= IOSAPIC_MASK;
+		iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
+	}
+	spin_unlock_irqrestore(&iosapic_lock, flags);
+}
+
+static void
+unmask_irq (unsigned int irq)
+{
+	unsigned long flags;
+	char __iomem *addr;
+	u32 low32;
+	int rte_index;
+	ia64_vector vec = irq_to_vector(irq);
+
+	addr = iosapic_intr_info[vec].addr;
+	rte_index = iosapic_intr_info[vec].rte_index;
+	if (rte_index < 0)
+		return;			/* not an IOSAPIC interrupt! */
+
+	spin_lock_irqsave(&iosapic_lock, flags);
+	{
+		low32 = iosapic_intr_info[vec].low32 &= ~IOSAPIC_MASK;
+		iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
+	}
+	spin_unlock_irqrestore(&iosapic_lock, flags);
+}
+
+
+static void
+iosapic_set_affinity (unsigned int irq, cpumask_t mask)
+{
+#ifdef CONFIG_SMP
+	unsigned long flags;
+	u32 high32, low32;
+	int dest, rte_index;
+	char __iomem *addr;
+	int redir = (irq & IA64_IRQ_REDIRECTED) ? 1 : 0;
+	ia64_vector vec;
+
+	irq &= (~IA64_IRQ_REDIRECTED);
+	vec = irq_to_vector(irq);
+
+	if (cpus_empty(mask))
+		return;
+
+	dest = cpu_physical_id(first_cpu(mask));
+
+	rte_index = iosapic_intr_info[vec].rte_index;
+	addr = iosapic_intr_info[vec].addr;
+
+	if (rte_index < 0)
+		return;			/* not an IOSAPIC interrupt */
+
+	set_irq_affinity_info(irq, dest, redir);
+
+	/* dest contains both id and eid */
+	high32 = dest << IOSAPIC_DEST_SHIFT;
+
+	spin_lock_irqsave(&iosapic_lock, flags);
+	{
+		low32 = iosapic_intr_info[vec].low32 & ~(7 << IOSAPIC_DELIVERY_SHIFT);
+
+		if (redir)
+		        /* change delivery mode to lowest priority */
+			low32 |= (IOSAPIC_LOWEST_PRIORITY << IOSAPIC_DELIVERY_SHIFT);
+		else
+		        /* change delivery mode to fixed */
+			low32 |= (IOSAPIC_FIXED << IOSAPIC_DELIVERY_SHIFT);
+
+		iosapic_intr_info[vec].low32 = low32;
+		iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index), high32);
+		iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
+	}
+	spin_unlock_irqrestore(&iosapic_lock, flags);
+#endif
+}
+
+/*
+ * Handlers for level-triggered interrupts.
+ */
+
+static unsigned int
+iosapic_startup_level_irq (unsigned int irq)
+{
+	unmask_irq(irq);
+	return 0;
+}
+
+static void
+iosapic_end_level_irq (unsigned int irq)
+{
+	ia64_vector vec = irq_to_vector(irq);
+
+	move_irq(irq);
+	iosapic_eoi(iosapic_intr_info[vec].addr, vec);
+}
+
+#define iosapic_shutdown_level_irq	mask_irq
+#define iosapic_enable_level_irq	unmask_irq
+#define iosapic_disable_level_irq	mask_irq
+#define iosapic_ack_level_irq		nop
+
+struct hw_interrupt_type irq_type_iosapic_level = {
+	.typename =	"IO-SAPIC-level",
+	.startup =	iosapic_startup_level_irq,
+	.shutdown =	iosapic_shutdown_level_irq,
+	.enable =	iosapic_enable_level_irq,
+	.disable =	iosapic_disable_level_irq,
+	.ack =		iosapic_ack_level_irq,
+	.end =		iosapic_end_level_irq,
+	.set_affinity =	iosapic_set_affinity
+};
+
+/*
+ * Handlers for edge-triggered interrupts.
+ */
+
+static unsigned int
+iosapic_startup_edge_irq (unsigned int irq)
+{
+	unmask_irq(irq);
+	/*
+	 * IOSAPIC simply drops interrupts pended while the
+	 * corresponding pin was masked, so we can't know if an
+	 * interrupt is pending already.  Let's hope not...
+	 */
+	return 0;
+}
+
+static void
+iosapic_ack_edge_irq (unsigned int irq)
+{
+	irq_desc_t *idesc = irq_descp(irq);
+
+	move_irq(irq);
+	/*
+	 * Once we have recorded IRQ_PENDING already, we can mask the
+	 * interrupt for real. This prevents IRQ storms from unhandled
+	 * devices.
+	 */
+	if ((idesc->status & (IRQ_PENDING|IRQ_DISABLED)) == (IRQ_PENDING|IRQ_DISABLED))
+		mask_irq(irq);
+}
+
+#define iosapic_enable_edge_irq		unmask_irq
+#define iosapic_disable_edge_irq	nop
+#define iosapic_end_edge_irq		nop
+
+struct hw_interrupt_type irq_type_iosapic_edge = {
+	.typename =	"IO-SAPIC-edge",
+	.startup =	iosapic_startup_edge_irq,
+	.shutdown =	iosapic_disable_edge_irq,
+	.enable =	iosapic_enable_edge_irq,
+	.disable =	iosapic_disable_edge_irq,
+	.ack =		iosapic_ack_edge_irq,
+	.end =		iosapic_end_edge_irq,
+	.set_affinity =	iosapic_set_affinity
+};
+
+unsigned int
+iosapic_version (char __iomem *addr)
+{
+	/*
+	 * IOSAPIC Version Register return 32 bit structure like:
+	 * {
+	 *	unsigned int version   : 8;
+	 *	unsigned int reserved1 : 8;
+	 *	unsigned int max_redir : 8;
+	 *	unsigned int reserved2 : 8;
+	 * }
+	 */
+	return iosapic_read(addr, IOSAPIC_VERSION);
+}
+
+/*
+ * if the given vector is already owned by other,
+ *  assign a new vector for the other and make the vector available
+ */
+static void __init
+iosapic_reassign_vector (int vector)
+{
+	int new_vector;
+
+	if (iosapic_intr_info[vector].rte_index >= 0 || iosapic_intr_info[vector].addr
+	    || iosapic_intr_info[vector].gsi_base || iosapic_intr_info[vector].dmode
+	    || iosapic_intr_info[vector].polarity || iosapic_intr_info[vector].trigger)
+	{
+		new_vector = assign_irq_vector(AUTO_ASSIGN);
+		printk(KERN_INFO "Reassigning vector %d to %d\n", vector, new_vector);
+		memcpy(&iosapic_intr_info[new_vector], &iosapic_intr_info[vector],
+		       sizeof(struct iosapic_intr_info));
+		memset(&iosapic_intr_info[vector], 0, sizeof(struct iosapic_intr_info));
+		iosapic_intr_info[vector].rte_index = -1;
+	}
+}
+
+static void
+register_intr (unsigned int gsi, int vector, unsigned char delivery,
+	       unsigned long polarity, unsigned long trigger)
+{
+	irq_desc_t *idesc;
+	struct hw_interrupt_type *irq_type;
+	int rte_index;
+	int index;
+	unsigned long gsi_base;
+	void __iomem *iosapic_address;
+
+	index = find_iosapic(gsi);
+	if (index < 0) {
+		printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n", __FUNCTION__, gsi);
+		return;
+	}
+
+	iosapic_address = iosapic_lists[index].addr;
+	gsi_base = iosapic_lists[index].gsi_base;
+
+	rte_index = gsi - gsi_base;
+	iosapic_intr_info[vector].rte_index = rte_index;
+	iosapic_intr_info[vector].polarity = polarity;
+	iosapic_intr_info[vector].dmode    = delivery;
+	iosapic_intr_info[vector].addr     = iosapic_address;
+	iosapic_intr_info[vector].gsi_base = gsi_base;
+	iosapic_intr_info[vector].trigger  = trigger;
+	iosapic_intr_info[vector].refcnt++;
+
+	if (trigger == IOSAPIC_EDGE)
+		irq_type = &irq_type_iosapic_edge;
+	else
+		irq_type = &irq_type_iosapic_level;
+
+	idesc = irq_descp(vector);
+	if (idesc->handler != irq_type) {
+		if (idesc->handler != &no_irq_type)
+			printk(KERN_WARNING "%s: changing vector %d from %s to %s\n",
+			       __FUNCTION__, vector, idesc->handler->typename, irq_type->typename);
+		idesc->handler = irq_type;
+	}
+}
+
+static unsigned int
+get_target_cpu (unsigned int gsi, int vector)
+{
+#ifdef CONFIG_SMP
+	static int cpu = -1;
+
+	/*
+	 * If the platform supports redirection via XTP, let it
+	 * distribute interrupts.
+	 */
+	if (smp_int_redirect & SMP_IRQ_REDIRECTION)
+		return cpu_physical_id(smp_processor_id());
+
+	/*
+	 * Some interrupts (ACPI SCI, for instance) are registered
+	 * before the BSP is marked as online.
+	 */
+	if (!cpu_online(smp_processor_id()))
+		return cpu_physical_id(smp_processor_id());
+
+#ifdef CONFIG_NUMA
+	{
+		int num_cpus, cpu_index, iosapic_index, numa_cpu, i = 0;
+		cpumask_t cpu_mask;
+
+		iosapic_index = find_iosapic(gsi);
+		if (iosapic_index < 0 ||
+		    iosapic_lists[iosapic_index].node == MAX_NUMNODES)
+			goto skip_numa_setup;
+
+		cpu_mask = node_to_cpumask(iosapic_lists[iosapic_index].node);
+
+		for_each_cpu_mask(numa_cpu, cpu_mask) {
+			if (!cpu_online(numa_cpu))
+				cpu_clear(numa_cpu, cpu_mask);
+		}
+
+		num_cpus = cpus_weight(cpu_mask);
+
+		if (!num_cpus)
+			goto skip_numa_setup;
+
+		/* Use vector assigment to distribute across cpus in node */
+		cpu_index = vector % num_cpus;
+
+		for (numa_cpu = first_cpu(cpu_mask) ; i < cpu_index ; i++)
+			numa_cpu = next_cpu(numa_cpu, cpu_mask);
+
+		if (numa_cpu != NR_CPUS)
+			return cpu_physical_id(numa_cpu);
+	}
+skip_numa_setup:
+#endif
+	/*
+	 * Otherwise, round-robin interrupt vectors across all the
+	 * processors.  (It'd be nice if we could be smarter in the
+	 * case of NUMA.)
+	 */
+	do {
+		if (++cpu >= NR_CPUS)
+			cpu = 0;
+	} while (!cpu_online(cpu));
+
+	return cpu_physical_id(cpu);
+#else
+	return cpu_physical_id(smp_processor_id());
+#endif
+}
+
+/*
+ * ACPI can describe IOSAPIC interrupts via static tables and namespace
+ * methods.  This provides an interface to register those interrupts and
+ * program the IOSAPIC RTE.
+ */
+int
+iosapic_register_intr (unsigned int gsi,
+		       unsigned long polarity, unsigned long trigger)
+{
+	int vector;
+	unsigned int dest;
+	unsigned long flags;
+
+	/*
+	 * If this GSI has already been registered (i.e., it's a
+	 * shared interrupt, or we lost a race to register it),
+	 * don't touch the RTE.
+	 */
+	spin_lock_irqsave(&iosapic_lock, flags);
+	{
+		vector = gsi_to_vector(gsi);
+		if (vector > 0) {
+			iosapic_intr_info[vector].refcnt++;
+			spin_unlock_irqrestore(&iosapic_lock, flags);
+			return vector;
+		}
+
+		vector = assign_irq_vector(AUTO_ASSIGN);
+		dest = get_target_cpu(gsi, vector);
+		register_intr(gsi, vector, IOSAPIC_LOWEST_PRIORITY,
+			polarity, trigger);
+
+		set_rte(vector, dest, 1);
+	}
+	spin_unlock_irqrestore(&iosapic_lock, flags);
+
+	printk(KERN_INFO "GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d\n",
+	       gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
+	       (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
+	       cpu_logical_id(dest), dest, vector);
+
+	return vector;
+}
+
+#ifdef CONFIG_ACPI_DEALLOCATE_IRQ
+void
+iosapic_unregister_intr (unsigned int gsi)
+{
+	unsigned long flags;
+	int irq, vector;
+	irq_desc_t *idesc;
+	int rte_index;
+	unsigned long trigger, polarity;
+
+	/*
+	 * If the irq associated with the gsi is not found,
+	 * iosapic_unregister_intr() is unbalanced. We need to check
+	 * this again after getting locks.
+	 */
+	irq = gsi_to_irq(gsi);
+	if (irq < 0) {
+		printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n", gsi);
+		WARN_ON(1);
+		return;
+	}
+	vector = irq_to_vector(irq);
+
+	idesc = irq_descp(irq);
+	spin_lock_irqsave(&idesc->lock, flags);
+	spin_lock(&iosapic_lock);
+	{
+		rte_index = iosapic_intr_info[vector].rte_index;
+		if (rte_index < 0) {
+			spin_unlock(&iosapic_lock);
+			spin_unlock_irqrestore(&idesc->lock, flags);
+			printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n", gsi);
+			WARN_ON(1);
+			return;
+		}
+
+		if (--iosapic_intr_info[vector].refcnt > 0) {
+			spin_unlock(&iosapic_lock);
+			spin_unlock_irqrestore(&idesc->lock, flags);
+			return;
+		}
+
+		/*
+		 * If interrupt handlers still exist on the irq
+		 * associated with the gsi, don't unregister the
+		 * interrupt.
+		 */
+		if (idesc->action) {
+			iosapic_intr_info[vector].refcnt++;
+			spin_unlock(&iosapic_lock);
+			spin_unlock_irqrestore(&idesc->lock, flags);
+			printk(KERN_WARNING "Cannot unregister GSI. IRQ %u is still in use.\n", irq);
+			return;
+		}
+
+		/* Clear the interrupt controller descriptor. */
+		idesc->handler = &no_irq_type;
+
+		trigger  = iosapic_intr_info[vector].trigger;
+		polarity = iosapic_intr_info[vector].polarity;
+
+		/* Clear the interrupt information. */
+		memset(&iosapic_intr_info[vector], 0, sizeof(struct iosapic_intr_info));
+		iosapic_intr_info[vector].rte_index = -1;	/* mark as unused */
+	}
+	spin_unlock(&iosapic_lock);
+	spin_unlock_irqrestore(&idesc->lock, flags);
+
+	/* Free the interrupt vector */
+	free_irq_vector(vector);
+
+	printk(KERN_INFO "GSI %u (%s, %s) -> vector %d unregisterd.\n",
+	       gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
+	       (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
+	       vector);
+}
+#endif /* CONFIG_ACPI_DEALLOCATE_IRQ */
+
+/*
+ * ACPI calls this when it finds an entry for a platform interrupt.
+ * Note that the irq_base and IOSAPIC address must be set in iosapic_init().
+ */
+int __init
+iosapic_register_platform_intr (u32 int_type, unsigned int gsi,
+				int iosapic_vector, u16 eid, u16 id,
+				unsigned long polarity, unsigned long trigger)
+{
+	static const char * const name[] = {"unknown", "PMI", "INIT", "CPEI"};
+	unsigned char delivery;
+	int vector, mask = 0;
+	unsigned int dest = ((id << 8) | eid) & 0xffff;
+
+	switch (int_type) {
+	      case ACPI_INTERRUPT_PMI:
+		vector = iosapic_vector;
+		/*
+		 * since PMI vector is alloc'd by FW(ACPI) not by kernel,
+		 * we need to make sure the vector is available
+		 */
+		iosapic_reassign_vector(vector);
+		delivery = IOSAPIC_PMI;
+		break;
+	      case ACPI_INTERRUPT_INIT:
+		vector = assign_irq_vector(AUTO_ASSIGN);
+		delivery = IOSAPIC_INIT;
+		break;
+	      case ACPI_INTERRUPT_CPEI:
+		vector = IA64_CPE_VECTOR;
+		delivery = IOSAPIC_LOWEST_PRIORITY;
+		mask = 1;
+		break;
+	      default:
+		printk(KERN_ERR "iosapic_register_platform_irq(): invalid int type 0x%x\n", int_type);
+		return -1;
+	}
+
+	register_intr(gsi, vector, delivery, polarity, trigger);
+
+	printk(KERN_INFO "PLATFORM int %s (0x%x): GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d\n",
+	       int_type < ARRAY_SIZE(name) ? name[int_type] : "unknown",
+	       int_type, gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
+	       (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
+	       cpu_logical_id(dest), dest, vector);
+
+	set_rte(vector, dest, mask);
+	return vector;
+}
+
+
+/*
+ * ACPI calls this when it finds an entry for a legacy ISA IRQ override.
+ * Note that the gsi_base and IOSAPIC address must be set in iosapic_init().
+ */
+void __init
+iosapic_override_isa_irq (unsigned int isa_irq, unsigned int gsi,
+			  unsigned long polarity,
+			  unsigned long trigger)
+{
+	int vector;
+	unsigned int dest = cpu_physical_id(smp_processor_id());
+
+	vector = isa_irq_to_vector(isa_irq);
+
+	register_intr(gsi, vector, IOSAPIC_LOWEST_PRIORITY, polarity, trigger);
+
+	DBG("ISA: IRQ %u -> GSI %u (%s,%s) -> CPU %d (0x%04x) vector %d\n",
+	    isa_irq, gsi, trigger == IOSAPIC_EDGE ? "edge" : "level",
+	    polarity == IOSAPIC_POL_HIGH ? "high" : "low",
+	    cpu_logical_id(dest), dest, vector);
+
+	set_rte(vector, dest, 1);
+}
+
+void __init
+iosapic_system_init (int system_pcat_compat)
+{
+	int vector;
+
+	for (vector = 0; vector < IA64_NUM_VECTORS; ++vector)
+		iosapic_intr_info[vector].rte_index = -1;	/* mark as unused */
+
+	pcat_compat = system_pcat_compat;
+	if (pcat_compat) {
+		/*
+		 * Disable the compatibility mode interrupts (8259 style), needs IN/OUT support
+		 * enabled.
+		 */
+		printk(KERN_INFO "%s: Disabling PC-AT compatible 8259 interrupts\n", __FUNCTION__);
+		outb(0xff, 0xA1);
+		outb(0xff, 0x21);
+	}
+}
+
+void __init
+iosapic_init (unsigned long phys_addr, unsigned int gsi_base)
+{
+	int num_rte;
+	unsigned int isa_irq, ver;
+	char __iomem *addr;
+
+	addr = ioremap(phys_addr, 0);
+	ver = iosapic_version(addr);
+
+	/*
+	 * The MAX_REDIR register holds the highest input pin
+	 * number (starting from 0).
+	 * We add 1 so that we can use it for number of pins (= RTEs)
+	 */
+	num_rte = ((ver >> 16) & 0xff) + 1;
+
+	iosapic_lists[num_iosapic].addr = addr;
+	iosapic_lists[num_iosapic].gsi_base = gsi_base;
+	iosapic_lists[num_iosapic].num_rte = num_rte;
+#ifdef CONFIG_NUMA
+	iosapic_lists[num_iosapic].node = MAX_NUMNODES;
+#endif
+	num_iosapic++;
+
+	if ((gsi_base == 0) && pcat_compat) {
+		/*
+		 * Map the legacy ISA devices into the IOSAPIC data.  Some of these may
+		 * get reprogrammed later on with data from the ACPI Interrupt Source
+		 * Override table.
+		 */
+		for (isa_irq = 0; isa_irq < 16; ++isa_irq)
+			iosapic_override_isa_irq(isa_irq, isa_irq, IOSAPIC_POL_HIGH, IOSAPIC_EDGE);
+	}
+}
+
+#ifdef CONFIG_NUMA
+void __init
+map_iosapic_to_node(unsigned int gsi_base, int node)
+{
+	int index;
+
+	index = find_iosapic(gsi_base);
+	if (index < 0) {
+		printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n",
+		       __FUNCTION__, gsi_base);
+		return;
+	}
+	iosapic_lists[index].node = node;
+	return;
+}
+#endif
--- a/arch/ia64/kernel/irq.c
+++ b/arch/ia64/kernel/irq.c
@@ -0,0 +1,238 @@
+/*
+ *	linux/arch/ia64/kernel/irq.c
+ *
+ *	Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
+ *
+ * This file contains the code used by various IRQ handling routines:
+ * asking for different IRQ's should be done through these routines
+ * instead of just grabbing them. Thus setups with different IRQ numbers
+ * shouldn't result in any weird surprises, and installing new handlers
+ * should be easier.
+ *
+ * Copyright (C) Ashok Raj<ashok.raj@intel.com>, Intel Corporation 2004
+ *
+ * 4/14/2004: Added code to handle cpu migration and do safe irq
+ *			migration without lossing interrupts for iosapic
+ *			architecture.
+ */
+
+#include <asm/delay.h>
+#include <asm/uaccess.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+
+/*
+ * 'what should we do if we get a hw irq event on an illegal vector'.
+ * each architecture has to answer this themselves.
+ */
+void ack_bad_irq(unsigned int irq)
+{
+	printk(KERN_ERR "Unexpected irq vector 0x%x on CPU %u!\n", irq, smp_processor_id());
+}
+
+#ifdef CONFIG_IA64_GENERIC
+unsigned int __ia64_local_vector_to_irq (ia64_vector vec)
+{
+	return (unsigned int) vec;
+}
+#endif
+
+/*
+ * Interrupt statistics:
+ */
+
+atomic_t irq_err_count;
+
+/*
+ * /proc/interrupts printing:
+ */
+
+int show_interrupts(struct seq_file *p, void *v)
+{
+	int i = *(loff_t *) v, j;
+	struct irqaction * action;
+	unsigned long flags;
+
+	if (i == 0) {
+		seq_printf(p, "           ");
+		for (j=0; j<NR_CPUS; j++)
+			if (cpu_online(j))
+				seq_printf(p, "CPU%d       ",j);
+		seq_putc(p, '\n');
+	}
+
+	if (i < NR_IRQS) {
+		spin_lock_irqsave(&irq_desc[i].lock, flags);
+		action = irq_desc[i].action;
+		if (!action)
+			goto skip;
+		seq_printf(p, "%3d: ",i);
+#ifndef CONFIG_SMP
+		seq_printf(p, "%10u ", kstat_irqs(i));
+#else
+		for (j = 0; j < NR_CPUS; j++)
+			if (cpu_online(j))
+				seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+#endif
+		seq_printf(p, " %14s", irq_desc[i].handler->typename);
+		seq_printf(p, "  %s", action->name);
+
+		for (action=action->next; action; action = action->next)
+			seq_printf(p, ", %s", action->name);
+
+		seq_putc(p, '\n');
+skip:
+		spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+	} else if (i == NR_IRQS)
+		seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
+	return 0;
+}
+
+#ifdef CONFIG_SMP
+/*
+ * This is updated when the user sets irq affinity via /proc
+ */
+static cpumask_t __cacheline_aligned pending_irq_cpumask[NR_IRQS];
+static unsigned long pending_irq_redir[BITS_TO_LONGS(NR_IRQS)];
+
+static char irq_redir [NR_IRQS]; // = { [0 ... NR_IRQS-1] = 1 };
+
+/*
+ * Arch specific routine for deferred write to iosapic rte to reprogram
+ * intr destination.
+ */
+void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val)
+{
+	pending_irq_cpumask[irq] = mask_val;
+}
+
+void set_irq_affinity_info (unsigned int irq, int hwid, int redir)
+{
+	cpumask_t mask = CPU_MASK_NONE;
+
+	cpu_set(cpu_logical_id(hwid), mask);
+
+	if (irq < NR_IRQS) {
+		irq_affinity[irq] = mask;
+		irq_redir[irq] = (char) (redir & 0xff);
+	}
+}
+
+
+void move_irq(int irq)
+{
+	/* note - we hold desc->lock */
+	cpumask_t tmp;
+	irq_desc_t *desc = irq_descp(irq);
+	int redir = test_bit(irq, pending_irq_redir);
+
+	if (unlikely(!desc->handler->set_affinity))
+		return;
+
+	if (!cpus_empty(pending_irq_cpumask[irq])) {
+		cpus_and(tmp, pending_irq_cpumask[irq], cpu_online_map);
+		if (unlikely(!cpus_empty(tmp))) {
+			desc->handler->set_affinity(irq | (redir ? IA64_IRQ_REDIRECTED : 0),
+						    pending_irq_cpumask[irq]);
+		}
+		cpus_clear(pending_irq_cpumask[irq]);
+	}
+}
+
+
+#endif /* CONFIG_SMP */
+
+#ifdef CONFIG_HOTPLUG_CPU
+unsigned int vectors_in_migration[NR_IRQS];
+
+/*
+ * Since cpu_online_map is already updated, we just need to check for
+ * affinity that has zeros
+ */
+static void migrate_irqs(void)
+{
+	cpumask_t	mask;
+	irq_desc_t *desc;
+	int 		irq, new_cpu;
+
+	for (irq=0; irq < NR_IRQS; irq++) {
+		desc = irq_descp(irq);
+
+		/*
+		 * No handling for now.
+		 * TBD: Implement a disable function so we can now
+		 * tell CPU not to respond to these local intr sources.
+		 * such as ITV,CPEI,MCA etc.
+		 */
+		if (desc->status == IRQ_PER_CPU)
+			continue;
+
+		cpus_and(mask, irq_affinity[irq], cpu_online_map);
+		if (any_online_cpu(mask) == NR_CPUS) {
+			/*
+			 * Save it for phase 2 processing
+			 */
+			vectors_in_migration[irq] = irq;
+
+			new_cpu = any_online_cpu(cpu_online_map);
+			mask = cpumask_of_cpu(new_cpu);
+
+			/*
+			 * Al three are essential, currently WARN_ON.. maybe panic?
+			 */
+			if (desc->handler && desc->handler->disable &&
+				desc->handler->enable && desc->handler->set_affinity) {
+				desc->handler->disable(irq);
+				desc->handler->set_affinity(irq, mask);
+				desc->handler->enable(irq);
+			} else {
+				WARN_ON((!(desc->handler) || !(desc->handler->disable) ||
+						!(desc->handler->enable) ||
+						!(desc->handler->set_affinity)));
+			}
+		}
+	}
+}
+
+void fixup_irqs(void)
+{
+	unsigned int irq;
+	extern void ia64_process_pending_intr(void);
+
+	ia64_set_itv(1<<16);
+	/*
+	 * Phase 1: Locate irq's bound to this cpu and
+	 * relocate them for cpu removal.
+	 */
+	migrate_irqs();
+
+	/*
+	 * Phase 2: Perform interrupt processing for all entries reported in
+	 * local APIC.
+	 */
+	ia64_process_pending_intr();
+
+	/*
+	 * Phase 3: Now handle any interrupts not captured in local APIC.
+	 * This is to account for cases that device interrupted during the time the
+	 * rte was being disabled and re-programmed.
+	 */
+	for (irq=0; irq < NR_IRQS; irq++) {
+		if (vectors_in_migration[irq]) {
+			vectors_in_migration[irq]=0;
+			__do_IRQ(irq, NULL);
+		}
+	}
+
+	/*
+	 * Now let processor die. We do irq disable and max_xtp() to
+	 * ensure there is no more interrupts routed to this processor.
+	 * But the local timer interrupt can have 1 pending which we
+	 * take care in timer_interrupt().
+	 */
+	max_xtp();
+	local_irq_disable();
+}
+#endif
--- a/arch/ia64/kernel/irq_ia64.c
+++ b/arch/ia64/kernel/irq_ia64.c
@@ -0,0 +1,278 @@
+/*
+ * linux/arch/ia64/kernel/irq.c
+ *
+ * Copyright (C) 1998-2001 Hewlett-Packard Co
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ *  6/10/99: Updated to bring in sync with x86 version to facilitate
+ *	     support for SMP and different interrupt controllers.
+ *
+ * 09/15/00 Goutham Rao <goutham.rao@intel.com> Implemented pci_irq_to_vector
+ *                      PCI to vector allocation routine.
+ * 04/14/2004 Ashok Raj <ashok.raj@intel.com>
+ *						Added CPU Hotplug handling for IPF.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/jiffies.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/kernel_stat.h>
+#include <linux/slab.h>
+#include <linux/ptrace.h>
+#include <linux/random.h>	/* for rand_initialize_irq() */
+#include <linux/signal.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/threads.h>
+#include <linux/bitops.h>
+
+#include <asm/delay.h>
+#include <asm/intrinsics.h>
+#include <asm/io.h>
+#include <asm/hw_irq.h>
+#include <asm/machvec.h>
+#include <asm/pgtable.h>
+#include <asm/system.h>
+
+#ifdef CONFIG_PERFMON
+# include <asm/perfmon.h>
+#endif
+
+#define IRQ_DEBUG	0
+
+/* default base addr of IPI table */
+void __iomem *ipi_base_addr = ((void __iomem *)
+			       (__IA64_UNCACHED_OFFSET | IA64_IPI_DEFAULT_BASE_ADDR));
+
+/*
+ * Legacy IRQ to IA-64 vector translation table.
+ */
+__u8 isa_irq_to_vector_map[16] = {
+	/* 8259 IRQ translation, first 16 entries */
+	0x2f, 0x20, 0x2e, 0x2d, 0x2c, 0x2b, 0x2a, 0x29,
+	0x28, 0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21
+};
+EXPORT_SYMBOL(isa_irq_to_vector_map);
+
+static unsigned long ia64_vector_mask[BITS_TO_LONGS(IA64_NUM_DEVICE_VECTORS)];
+
+int
+assign_irq_vector (int irq)
+{
+	int pos, vector;
+ again:
+	pos = find_first_zero_bit(ia64_vector_mask, IA64_NUM_DEVICE_VECTORS);
+	vector = IA64_FIRST_DEVICE_VECTOR + pos;
+	if (vector > IA64_LAST_DEVICE_VECTOR)
+		/* XXX could look for sharable vectors instead of panic'ing... */
+		panic("assign_irq_vector: out of interrupt vectors!");
+	if (test_and_set_bit(pos, ia64_vector_mask))
+		goto again;
+	return vector;
+}
+
+void
+free_irq_vector (int vector)
+{
+	int pos;
+
+	if (vector < IA64_FIRST_DEVICE_VECTOR || vector > IA64_LAST_DEVICE_VECTOR)
+		return;
+
+	pos = vector - IA64_FIRST_DEVICE_VECTOR;
+	if (!test_and_clear_bit(pos, ia64_vector_mask))
+		printk(KERN_WARNING "%s: double free!\n", __FUNCTION__);
+}
+
+#ifdef CONFIG_SMP
+#	define IS_RESCHEDULE(vec)	(vec == IA64_IPI_RESCHEDULE)
+#else
+#	define IS_RESCHEDULE(vec)	(0)
+#endif
+/*
+ * That's where the IVT branches when we get an external
+ * interrupt. This branches to the correct hardware IRQ handler via
+ * function ptr.
+ */
+void
+ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
+{
+	unsigned long saved_tpr;
+
+#if IRQ_DEBUG
+	{
+		unsigned long bsp, sp;
+
+		/*
+		 * Note: if the interrupt happened while executing in
+		 * the context switch routine (ia64_switch_to), we may
+		 * get a spurious stack overflow here.  This is
+		 * because the register and the memory stack are not
+		 * switched atomically.
+		 */
+		bsp = ia64_getreg(_IA64_REG_AR_BSP);
+		sp = ia64_getreg(_IA64_REG_SP);
+
+		if ((sp - bsp) < 1024) {
+			static unsigned char count;
+			static long last_time;
+
+			if (jiffies - last_time > 5*HZ)
+				count = 0;
+			if (++count < 5) {
+				last_time = jiffies;
+				printk("ia64_handle_irq: DANGER: less than "
+				       "1KB of free stack space!!\n"
+				       "(bsp=0x%lx, sp=%lx)\n", bsp, sp);
+			}
+		}
+	}
+#endif /* IRQ_DEBUG */
+
+	/*
+	 * Always set TPR to limit maximum interrupt nesting depth to
+	 * 16 (without this, it would be ~240, which could easily lead
+	 * to kernel stack overflows).
+	 */
+	irq_enter();
+	saved_tpr = ia64_getreg(_IA64_REG_CR_TPR);
+	ia64_srlz_d();
+	while (vector != IA64_SPURIOUS_INT_VECTOR) {
+		if (!IS_RESCHEDULE(vector)) {
+			ia64_setreg(_IA64_REG_CR_TPR, vector);
+			ia64_srlz_d();
+
+			__do_IRQ(local_vector_to_irq(vector), regs);
+
+			/*
+			 * Disable interrupts and send EOI:
+			 */
+			local_irq_disable();
+			ia64_setreg(_IA64_REG_CR_TPR, saved_tpr);
+		}
+		ia64_eoi();
+		vector = ia64_get_ivr();
+	}
+	/*
+	 * This must be done *after* the ia64_eoi().  For example, the keyboard softirq
+	 * handler needs to be able to wait for further keyboard interrupts, which can't
+	 * come through until ia64_eoi() has been done.
+	 */
+	irq_exit();
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+/*
+ * This function emulates a interrupt processing when a cpu is about to be
+ * brought down.
+ */
+void ia64_process_pending_intr(void)
+{
+	ia64_vector vector;
+	unsigned long saved_tpr;
+	extern unsigned int vectors_in_migration[NR_IRQS];
+
+	vector = ia64_get_ivr();
+
+	 irq_enter();
+	 saved_tpr = ia64_getreg(_IA64_REG_CR_TPR);
+	 ia64_srlz_d();
+
+	 /*
+	  * Perform normal interrupt style processing
+	  */
+	while (vector != IA64_SPURIOUS_INT_VECTOR) {
+		if (!IS_RESCHEDULE(vector)) {
+			ia64_setreg(_IA64_REG_CR_TPR, vector);
+			ia64_srlz_d();
+
+			/*
+			 * Now try calling normal ia64_handle_irq as it would have got called
+			 * from a real intr handler. Try passing null for pt_regs, hopefully
+			 * it will work. I hope it works!.
+			 * Probably could shared code.
+			 */
+			vectors_in_migration[local_vector_to_irq(vector)]=0;
+			__do_IRQ(local_vector_to_irq(vector), NULL);
+
+			/*
+			 * Disable interrupts and send EOI
+			 */
+			local_irq_disable();
+			ia64_setreg(_IA64_REG_CR_TPR, saved_tpr);
+		}
+		ia64_eoi();
+		vector = ia64_get_ivr();
+	}
+	irq_exit();
+}
+#endif
+
+
+#ifdef CONFIG_SMP
+extern irqreturn_t handle_IPI (int irq, void *dev_id, struct pt_regs *regs);
+
+static struct irqaction ipi_irqaction = {
+	.handler =	handle_IPI,
+	.flags =	SA_INTERRUPT,
+	.name =		"IPI"
+};
+#endif
+
+void
+register_percpu_irq (ia64_vector vec, struct irqaction *action)
+{
+	irq_desc_t *desc;
+	unsigned int irq;
+
+	for (irq = 0; irq < NR_IRQS; ++irq)
+		if (irq_to_vector(irq) == vec) {
+			desc = irq_descp(irq);
+			desc->status |= IRQ_PER_CPU;
+			desc->handler = &irq_type_ia64_lsapic;
+			if (action)
+				setup_irq(irq, action);
+		}
+}
+
+void __init
+init_IRQ (void)
+{
+	register_percpu_irq(IA64_SPURIOUS_INT_VECTOR, NULL);
+#ifdef CONFIG_SMP
+	register_percpu_irq(IA64_IPI_VECTOR, &ipi_irqaction);
+#endif
+#ifdef CONFIG_PERFMON
+	pfm_init_percpu();
+#endif
+	platform_irq_init();
+}
+
+void
+ia64_send_ipi (int cpu, int vector, int delivery_mode, int redirect)
+{
+	void __iomem *ipi_addr;
+	unsigned long ipi_data;
+	unsigned long phys_cpu_id;
+
+#ifdef CONFIG_SMP
+	phys_cpu_id = cpu_physical_id(cpu);
+#else
+	phys_cpu_id = (ia64_getreg(_IA64_REG_CR_LID) >> 16) & 0xffff;
+#endif
+
+	/*
+	 * cpu number is in 8bit ID and 8bit EID
+	 */
+
+	ipi_data = (delivery_mode << 8) | (vector & 0xff);
+	ipi_addr = ipi_base_addr + ((phys_cpu_id << 4) | ((redirect & 1) << 3));
+
+	writeq(ipi_data, ipi_addr);
+}
--- a/arch/ia64/kernel/irq_lsapic.c
+++ b/arch/ia64/kernel/irq_lsapic.c
@@ -0,0 +1,37 @@
+/*
+ * LSAPIC Interrupt Controller
+ *
+ * This takes care of interrupts that are generated by the CPU's
+ * internal Streamlined Advanced Programmable Interrupt Controller
+ * (LSAPIC), such as the ITC and IPI interrupts.
+    *
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 2000 Hewlett-Packard Co
+ * Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#include <linux/sched.h>
+#include <linux/irq.h>
+
+static unsigned int
+lsapic_noop_startup (unsigned int irq)
+{
+	return 0;
+}
+
+static void
+lsapic_noop (unsigned int irq)
+{
+	/* nuthing to do... */
+}
+
+struct hw_interrupt_type irq_type_ia64_lsapic = {
+	.typename =	"LSAPIC",
+	.startup =	lsapic_noop_startup,
+	.shutdown =	lsapic_noop,
+	.enable =	lsapic_noop,
+	.disable =	lsapic_noop,
+	.ack =		lsapic_noop,
+	.end =		lsapic_noop
+};
--- a/arch/ia64/kernel/ivt.S
+++ b/arch/ia64/kernel/ivt.S
--- a/arch/ia64/kernel/machvec.c
+++ b/arch/ia64/kernel/machvec.c
@@ -0,0 +1,70 @@
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <asm/machvec.h>
+#include <asm/system.h>
+
+#ifdef CONFIG_IA64_GENERIC
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+
+#include <asm/page.h>
+
+struct ia64_machine_vector ia64_mv;
+EXPORT_SYMBOL(ia64_mv);
+
+static struct ia64_machine_vector *
+lookup_machvec (const char *name)
+{
+	extern struct ia64_machine_vector machvec_start[];
+	extern struct ia64_machine_vector machvec_end[];
+	struct ia64_machine_vector *mv;
+
+	for (mv = machvec_start; mv < machvec_end; ++mv)
+		if (strcmp (mv->name, name) == 0)
+			return mv;
+
+	return 0;
+}
+
+void
+machvec_init (const char *name)
+{
+	struct ia64_machine_vector *mv;
+
+	mv = lookup_machvec(name);
+	if (!mv) {
+		panic("generic kernel failed to find machine vector for platform %s!", name);
+	}
+	ia64_mv = *mv;
+	printk(KERN_INFO "booting generic kernel on platform %s\n", name);
+}
+
+#endif /* CONFIG_IA64_GENERIC */
+
+void
+machvec_setup (char **arg)
+{
+}
+EXPORT_SYMBOL(machvec_setup);
+
+void
+machvec_timer_interrupt (int irq, void *dev_id, struct pt_regs *regs)
+{
+}
+EXPORT_SYMBOL(machvec_timer_interrupt);
+
+void
+machvec_dma_sync_single (struct device *hwdev, dma_addr_t dma_handle, size_t size, int dir)
+{
+	mb();
+}
+EXPORT_SYMBOL(machvec_dma_sync_single);
+
+void
+machvec_dma_sync_sg (struct device *hwdev, struct scatterlist *sg, int n, int dir)
+{
+	mb();
+}
+EXPORT_SYMBOL(machvec_dma_sync_sg);
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
--- a/arch/ia64/kernel/mca_asm.S
+++ b/arch/ia64/kernel/mca_asm.S
@@ -0,0 +1,928 @@
+//
+// assembly portion of the IA64 MCA handling
+//
+// Mods by cfleck to integrate into kernel build
+// 00/03/15 davidm Added various stop bits to get a clean compile
+//
+// 00/03/29 cfleck Added code to save INIT handoff state in pt_regs format, switch to temp
+//		   kstack, switch modes, jump to C INIT handler
+//
+// 02/01/04 J.Hall <jenna.s.hall@intel.com>
+//		   Before entering virtual mode code:
+//		   1. Check for TLB CPU error
+//		   2. Restore current thread pointer to kr6
+//		   3. Move stack ptr 16 bytes to conform to C calling convention
+//
+// 04/11/12 Russ Anderson <rja@sgi.com>
+//		   Added per cpu MCA/INIT stack save areas.
+//
+#include <linux/config.h>
+#include <linux/threads.h>
+
+#include <asm/asmmacro.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/mca_asm.h>
+#include <asm/mca.h>
+
+/*
+ * When we get a machine check, the kernel stack pointer is no longer
+ * valid, so we need to set a new stack pointer.
+ */
+#define	MINSTATE_PHYS	/* Make sure stack access is physical for MINSTATE */
+
+/*
+ * Needed for return context to SAL
+ */
+#define IA64_MCA_SAME_CONTEXT	0
+#define IA64_MCA_COLD_BOOT	-2
+
+#include "minstate.h"
+
+/*
+ * SAL_TO_OS_MCA_HANDOFF_STATE (SAL 3.0 spec)
+ *		1. GR1 = OS GP
+ *		2. GR8 = PAL_PROC physical address
+ *		3. GR9 = SAL_PROC physical address
+ *		4. GR10 = SAL GP (physical)
+ *		5. GR11 = Rendez state
+ *		6. GR12 = Return address to location within SAL_CHECK
+ */
+#define SAL_TO_OS_MCA_HANDOFF_STATE_SAVE(_tmp)		\
+	LOAD_PHYSICAL(p0, _tmp, ia64_sal_to_os_handoff_state);; \
+	st8	[_tmp]=r1,0x08;;			\
+	st8	[_tmp]=r8,0x08;;			\
+	st8	[_tmp]=r9,0x08;;			\
+	st8	[_tmp]=r10,0x08;;			\
+	st8	[_tmp]=r11,0x08;;			\
+	st8	[_tmp]=r12,0x08;;			\
+	st8	[_tmp]=r17,0x08;;			\
+	st8	[_tmp]=r18,0x08
+
+/*
+ * OS_MCA_TO_SAL_HANDOFF_STATE (SAL 3.0 spec)
+ * (p6) is executed if we never entered virtual mode (TLB error)
+ * (p7) is executed if we entered virtual mode as expected (normal case)
+ *	1. GR8 = OS_MCA return status
+ *	2. GR9 = SAL GP (physical)
+ *	3. GR10 = 0/1 returning same/new context
+ *	4. GR22 = New min state save area pointer
+ *	returns ptr to SAL rtn save loc in _tmp
+ */
+#define OS_MCA_TO_SAL_HANDOFF_STATE_RESTORE(_tmp)	\
+	movl	_tmp=ia64_os_to_sal_handoff_state;;	\
+	DATA_VA_TO_PA(_tmp);;				\
+	ld8	r8=[_tmp],0x08;;			\
+	ld8	r9=[_tmp],0x08;;			\
+	ld8	r10=[_tmp],0x08;;			\
+	ld8	r22=[_tmp],0x08;;
+	// now _tmp is pointing to SAL rtn save location
+
+/*
+ * COLD_BOOT_HANDOFF_STATE() sets ia64_mca_os_to_sal_state
+ *	imots_os_status=IA64_MCA_COLD_BOOT
+ *	imots_sal_gp=SAL GP
+ *	imots_context=IA64_MCA_SAME_CONTEXT
+ *	imots_new_min_state=Min state save area pointer
+ *	imots_sal_check_ra=Return address to location within SAL_CHECK
+ *
+ */
+#define COLD_BOOT_HANDOFF_STATE(sal_to_os_handoff,os_to_sal_handoff,tmp)\
+	movl	tmp=IA64_MCA_COLD_BOOT;					\
+	movl	sal_to_os_handoff=__pa(ia64_sal_to_os_handoff_state);	\
+	movl	os_to_sal_handoff=__pa(ia64_os_to_sal_handoff_state);;	\
+	st8	[os_to_sal_handoff]=tmp,8;;				\
+	ld8	tmp=[sal_to_os_handoff],48;;				\
+	st8	[os_to_sal_handoff]=tmp,8;;				\
+	movl	tmp=IA64_MCA_SAME_CONTEXT;;				\
+	st8	[os_to_sal_handoff]=tmp,8;;				\
+	ld8	tmp=[sal_to_os_handoff],-8;;				\
+	st8     [os_to_sal_handoff]=tmp,8;;				\
+	ld8	tmp=[sal_to_os_handoff];;				\
+	st8     [os_to_sal_handoff]=tmp;;
+
+#define GET_IA64_MCA_DATA(reg)						\
+	GET_THIS_PADDR(reg, ia64_mca_data)				\
+	;;								\
+	ld8 reg=[reg]
+
+	.global ia64_os_mca_dispatch
+	.global ia64_os_mca_dispatch_end
+	.global ia64_sal_to_os_handoff_state
+	.global	ia64_os_to_sal_handoff_state
+
+	.text
+	.align 16
+
+ia64_os_mca_dispatch:
+
+	// Serialize all MCA processing
+	mov	r3=1;;
+	LOAD_PHYSICAL(p0,r2,ia64_mca_serialize);;
+ia64_os_mca_spin:
+	xchg8	r4=[r2],r3;;
+	cmp.ne	p6,p0=r4,r0
+(p6)	br ia64_os_mca_spin
+
+	// Save the SAL to OS MCA handoff state as defined
+	// by SAL SPEC 3.0
+	// NOTE : The order in which the state gets saved
+	//	  is dependent on the way the C-structure
+	//	  for ia64_mca_sal_to_os_state_t has been
+	//	  defined in include/asm/mca.h
+	SAL_TO_OS_MCA_HANDOFF_STATE_SAVE(r2)
+	;;
+
+	// LOG PROCESSOR STATE INFO FROM HERE ON..
+begin_os_mca_dump:
+	br	ia64_os_mca_proc_state_dump;;
+
+ia64_os_mca_done_dump:
+
+	LOAD_PHYSICAL(p0,r16,ia64_sal_to_os_handoff_state+56)
+	;;
+	ld8 r18=[r16]		// Get processor state parameter on existing PALE_CHECK.
+	;;
+	tbit.nz p6,p7=r18,60
+(p7)	br.spnt done_tlb_purge_and_reload
+
+	// The following code purges TC and TR entries. Then reload all TC entries.
+	// Purge percpu data TC entries.
+begin_tlb_purge_and_reload:
+
+#define O(member)	IA64_CPUINFO_##member##_OFFSET
+
+	GET_THIS_PADDR(r2, cpu_info)	// load phys addr of cpu_info into r2
+	;;
+	addl r17=O(PTCE_STRIDE),r2
+	addl r2=O(PTCE_BASE),r2
+	;;
+	ld8 r18=[r2],(O(PTCE_COUNT)-O(PTCE_BASE));;	// r18=ptce_base
+	ld4 r19=[r2],4					// r19=ptce_count[0]
+	ld4 r21=[r17],4					// r21=ptce_stride[0]
+	;;
+	ld4 r20=[r2]					// r20=ptce_count[1]
+	ld4 r22=[r17]					// r22=ptce_stride[1]
+	mov r24=0
+	;;
+	adds r20=-1,r20
+	;;
+#undef O
+
+2:
+	cmp.ltu p6,p7=r24,r19
+(p7)	br.cond.dpnt.few 4f
+	mov ar.lc=r20
+3:
+	ptc.e r18
+	;;
+	add r18=r22,r18
+	br.cloop.sptk.few 3b
+	;;
+	add r18=r21,r18
+	add r24=1,r24
+	;;
+	br.sptk.few 2b
+4:
+	srlz.i 			// srlz.i implies srlz.d
+	;;
+
+        // Now purge addresses formerly mapped by TR registers
+	// 1. Purge ITR&DTR for kernel.
+	movl r16=KERNEL_START
+	mov r18=KERNEL_TR_PAGE_SHIFT<<2
+	;;
+	ptr.i r16, r18
+	ptr.d r16, r18
+	;;
+	srlz.i
+	;;
+	srlz.d
+	;;
+	// 2. Purge DTR for PERCPU data.
+	movl r16=PERCPU_ADDR
+	mov r18=PERCPU_PAGE_SHIFT<<2
+	;;
+	ptr.d r16,r18
+	;;
+	srlz.d
+	;;
+	// 3. Purge ITR for PAL code.
+	GET_THIS_PADDR(r2, ia64_mca_pal_base)
+	;;
+	ld8 r16=[r2]
+	mov r18=IA64_GRANULE_SHIFT<<2
+	;;
+	ptr.i r16,r18
+	;;
+	srlz.i
+	;;
+	// 4. Purge DTR for stack.
+	mov r16=IA64_KR(CURRENT_STACK)
+	;;
+	shl r16=r16,IA64_GRANULE_SHIFT
+	movl r19=PAGE_OFFSET
+	;;
+	add r16=r19,r16
+	mov r18=IA64_GRANULE_SHIFT<<2
+	;;
+	ptr.d r16,r18
+	;;
+	srlz.i
+	;;
+	// Finally reload the TR registers.
+	// 1. Reload DTR/ITR registers for kernel.
+	mov r18=KERNEL_TR_PAGE_SHIFT<<2
+	movl r17=KERNEL_START
+	;;
+	mov cr.itir=r18
+	mov cr.ifa=r17
+        mov r16=IA64_TR_KERNEL
+	mov r19=ip
+	movl r18=PAGE_KERNEL
+	;;
+        dep r17=0,r19,0, KERNEL_TR_PAGE_SHIFT
+	;;
+	or r18=r17,r18
+	;;
+        itr.i itr[r16]=r18
+	;;
+        itr.d dtr[r16]=r18
+        ;;
+	srlz.i
+	srlz.d
+	;;
+	// 2. Reload DTR register for PERCPU data.
+	GET_THIS_PADDR(r2, ia64_mca_per_cpu_pte)
+	;;
+	movl r16=PERCPU_ADDR		// vaddr
+	movl r18=PERCPU_PAGE_SHIFT<<2
+	;;
+	mov cr.itir=r18
+	mov cr.ifa=r16
+	;;
+	ld8 r18=[r2]			// load per-CPU PTE
+	mov r16=IA64_TR_PERCPU_DATA;
+	;;
+	itr.d dtr[r16]=r18
+	;;
+	srlz.d
+	;;
+	// 3. Reload ITR for PAL code.
+	GET_THIS_PADDR(r2, ia64_mca_pal_pte)
+	;;
+	ld8 r18=[r2]			// load PAL PTE
+	;;
+	GET_THIS_PADDR(r2, ia64_mca_pal_base)
+	;;
+	ld8 r16=[r2]			// load PAL vaddr
+	mov r19=IA64_GRANULE_SHIFT<<2
+	;;
+	mov cr.itir=r19
+	mov cr.ifa=r16
+	mov r20=IA64_TR_PALCODE
+	;;
+	itr.i itr[r20]=r18
+	;;
+	srlz.i
+	;;
+	// 4. Reload DTR for stack.
+	mov r16=IA64_KR(CURRENT_STACK)
+	;;
+	shl r16=r16,IA64_GRANULE_SHIFT
+	movl r19=PAGE_OFFSET
+	;;
+	add r18=r19,r16
+	movl r20=PAGE_KERNEL
+	;;
+	add r16=r20,r16
+	mov r19=IA64_GRANULE_SHIFT<<2
+	;;
+	mov cr.itir=r19
+	mov cr.ifa=r18
+	mov r20=IA64_TR_CURRENT_STACK
+	;;
+	itr.d dtr[r20]=r16
+	;;
+	srlz.d
+	;;
+	br.sptk.many done_tlb_purge_and_reload
+err:
+	COLD_BOOT_HANDOFF_STATE(r20,r21,r22)
+	br.sptk.many ia64_os_mca_done_restore
+
+done_tlb_purge_and_reload:
+
+	// Setup new stack frame for OS_MCA handling
+	GET_IA64_MCA_DATA(r2)
+	;;
+	add r3 = IA64_MCA_CPU_STACKFRAME_OFFSET, r2
+	add r2 = IA64_MCA_CPU_RBSTORE_OFFSET, r2
+	;;
+	rse_switch_context(r6,r3,r2);;	// RSC management in this new context
+
+	GET_IA64_MCA_DATA(r2)
+	;;
+	add r2 = IA64_MCA_CPU_STACK_OFFSET+IA64_MCA_STACK_SIZE-16, r2
+	;;
+	mov r12=r2		// establish new stack-pointer
+
+        // Enter virtual mode from physical mode
+	VIRTUAL_MODE_ENTER(r2, r3, ia64_os_mca_virtual_begin, r4)
+ia64_os_mca_virtual_begin:
+
+	// Call virtual mode handler
+	movl		r2=ia64_mca_ucmc_handler;;
+	mov		b6=r2;;
+	br.call.sptk.many    b0=b6;;
+.ret0:
+	// Revert back to physical mode before going back to SAL
+	PHYSICAL_MODE_ENTER(r2, r3, ia64_os_mca_virtual_end, r4)
+ia64_os_mca_virtual_end:
+
+	// restore the original stack frame here
+	GET_IA64_MCA_DATA(r2)
+	;;
+	add r2 = IA64_MCA_CPU_STACKFRAME_OFFSET, r2
+	;;
+	movl    r4=IA64_PSR_MC
+	;;
+	rse_return_context(r4,r3,r2)	// switch from interrupt context for RSE
+
+	// let us restore all the registers from our PSI structure
+	mov	r8=gp
+	;;
+begin_os_mca_restore:
+	br	ia64_os_mca_proc_state_restore;;
+
+ia64_os_mca_done_restore:
+	OS_MCA_TO_SAL_HANDOFF_STATE_RESTORE(r2);;
+	// branch back to SALE_CHECK
+	ld8		r3=[r2];;
+	mov		b0=r3;;		// SAL_CHECK return address
+
+	// release lock
+	movl		r3=ia64_mca_serialize;;
+	DATA_VA_TO_PA(r3);;
+	st8.rel		[r3]=r0
+
+	br		b0
+	;;
+ia64_os_mca_dispatch_end:
+//EndMain//////////////////////////////////////////////////////////////////////
+
+
+//++
+// Name:
+//      ia64_os_mca_proc_state_dump()
+//
+// Stub Description:
+//
+//       This stub dumps the processor state during MCHK to a data area
+//
+//--
+
+ia64_os_mca_proc_state_dump:
+// Save bank 1 GRs 16-31 which will be used by c-language code when we switch
+//  to virtual addressing mode.
+	GET_IA64_MCA_DATA(r2)
+	;;
+	add r2 = IA64_MCA_CPU_PROC_STATE_DUMP_OFFSET, r2
+	;;
+// save ar.NaT
+	mov		r5=ar.unat                  // ar.unat
+
+// save banked GRs 16-31 along with NaT bits
+	bsw.1;;
+	st8.spill	[r2]=r16,8;;
+	st8.spill	[r2]=r17,8;;
+	st8.spill	[r2]=r18,8;;
+	st8.spill	[r2]=r19,8;;
+	st8.spill	[r2]=r20,8;;
+	st8.spill	[r2]=r21,8;;
+	st8.spill	[r2]=r22,8;;
+	st8.spill	[r2]=r23,8;;
+	st8.spill	[r2]=r24,8;;
+	st8.spill	[r2]=r25,8;;
+	st8.spill	[r2]=r26,8;;
+	st8.spill	[r2]=r27,8;;
+	st8.spill	[r2]=r28,8;;
+	st8.spill	[r2]=r29,8;;
+	st8.spill	[r2]=r30,8;;
+	st8.spill	[r2]=r31,8;;
+
+	mov		r4=ar.unat;;
+	st8		[r2]=r4,8                // save User NaT bits for r16-r31
+	mov		ar.unat=r5                  // restore original unat
+	bsw.0;;
+
+//save BRs
+	add		r4=8,r2                  // duplicate r2 in r4
+	add		r6=2*8,r2                // duplicate r2 in r4
+
+	mov		r3=b0
+	mov		r5=b1
+	mov		r7=b2;;
+	st8		[r2]=r3,3*8
+	st8		[r4]=r5,3*8
+	st8		[r6]=r7,3*8;;
+
+	mov		r3=b3
+	mov		r5=b4
+	mov		r7=b5;;
+	st8		[r2]=r3,3*8
+	st8		[r4]=r5,3*8
+	st8		[r6]=r7,3*8;;
+
+	mov		r3=b6
+	mov		r5=b7;;
+	st8		[r2]=r3,2*8
+	st8		[r4]=r5,2*8;;
+
+cSaveCRs:
+// save CRs
+	add		r4=8,r2                  // duplicate r2 in r4
+	add		r6=2*8,r2                // duplicate r2 in r4
+
+	mov		r3=cr.dcr
+	mov		r5=cr.itm
+	mov		r7=cr.iva;;
+
+	st8		[r2]=r3,8*8
+	st8		[r4]=r5,3*8
+	st8		[r6]=r7,3*8;;            // 48 byte rements
+
+	mov		r3=cr.pta;;
+	st8		[r2]=r3,8*8;;            // 64 byte rements
+
+// if PSR.ic=0, reading interruption registers causes an illegal operation fault
+	mov		r3=psr;;
+	tbit.nz.unc	p6,p0=r3,PSR_IC;;           // PSI Valid Log bit pos. test
+(p6)    st8     [r2]=r0,9*8+160             // increment by 232 byte inc.
+begin_skip_intr_regs:
+(p6)	br		SkipIntrRegs;;
+
+	add		r4=8,r2                  // duplicate r2 in r4
+	add		r6=2*8,r2                // duplicate r2 in r6
+
+	mov		r3=cr.ipsr
+	mov		r5=cr.isr
+	mov		r7=r0;;
+	st8		[r2]=r3,3*8
+	st8		[r4]=r5,3*8
+	st8		[r6]=r7,3*8;;
+
+	mov		r3=cr.iip
+	mov		r5=cr.ifa
+	mov		r7=cr.itir;;
+	st8		[r2]=r3,3*8
+	st8		[r4]=r5,3*8
+	st8		[r6]=r7,3*8;;
+
+	mov		r3=cr.iipa
+	mov		r5=cr.ifs
+	mov		r7=cr.iim;;
+	st8		[r2]=r3,3*8
+	st8		[r4]=r5,3*8
+	st8		[r6]=r7,3*8;;
+
+	mov		r3=cr25;;                   // cr.iha
+	st8		[r2]=r3,160;;               // 160 byte rement
+
+SkipIntrRegs:
+	st8		[r2]=r0,152;;               // another 152 byte .
+
+	add		r4=8,r2                     // duplicate r2 in r4
+	add		r6=2*8,r2                   // duplicate r2 in r6
+
+	mov		r3=cr.lid
+//	mov		r5=cr.ivr                     // cr.ivr, don't read it
+	mov		r7=cr.tpr;;
+	st8		[r2]=r3,3*8
+	st8		[r4]=r5,3*8
+	st8		[r6]=r7,3*8;;
+
+	mov		r3=r0                       // cr.eoi => cr67
+	mov		r5=r0                       // cr.irr0 => cr68
+	mov		r7=r0;;                     // cr.irr1 => cr69
+	st8		[r2]=r3,3*8
+	st8		[r4]=r5,3*8
+	st8		[r6]=r7,3*8;;
+
+	mov		r3=r0                       // cr.irr2 => cr70
+	mov		r5=r0                       // cr.irr3 => cr71
+	mov		r7=cr.itv;;
+	st8		[r2]=r3,3*8
+	st8		[r4]=r5,3*8
+	st8		[r6]=r7,3*8;;
+
+	mov		r3=cr.pmv
+	mov		r5=cr.cmcv;;
+	st8		[r2]=r3,7*8
+	st8		[r4]=r5,7*8;;
+
+	mov		r3=r0                       // cr.lrr0 => cr80
+	mov		r5=r0;;                     // cr.lrr1 => cr81
+	st8		[r2]=r3,23*8
+	st8		[r4]=r5,23*8;;
+
+	adds		r2=25*8,r2;;
+
+cSaveARs:
+// save ARs
+	add		r4=8,r2                  // duplicate r2 in r4
+	add		r6=2*8,r2                // duplicate r2 in r6
+
+	mov		r3=ar.k0
+	mov		r5=ar.k1
+	mov		r7=ar.k2;;
+	st8		[r2]=r3,3*8
+	st8		[r4]=r5,3*8
+	st8		[r6]=r7,3*8;;
+
+	mov		r3=ar.k3
+	mov		r5=ar.k4
+	mov		r7=ar.k5;;
+	st8		[r2]=r3,3*8
+	st8		[r4]=r5,3*8
+	st8		[r6]=r7,3*8;;
+
+	mov		r3=ar.k6
+	mov		r5=ar.k7
+	mov		r7=r0;;                     // ar.kr8
+	st8		[r2]=r3,10*8
+	st8		[r4]=r5,10*8
+	st8		[r6]=r7,10*8;;           // rement by 72 bytes
+
+	mov		r3=ar.rsc
+	mov		ar.rsc=r0			    // put RSE in enforced lazy mode
+	mov		r5=ar.bsp
+	;;
+	mov		r7=ar.bspstore;;
+	st8		[r2]=r3,3*8
+	st8		[r4]=r5,3*8
+	st8		[r6]=r7,3*8;;
+
+	mov		r3=ar.rnat;;
+	st8		[r2]=r3,8*13             // increment by 13x8 bytes
+
+	mov		r3=ar.ccv;;
+	st8		[r2]=r3,8*4
+
+	mov		r3=ar.unat;;
+	st8		[r2]=r3,8*4
+
+	mov		r3=ar.fpsr;;
+	st8		[r2]=r3,8*4
+
+	mov		r3=ar.itc;;
+	st8		[r2]=r3,160                 // 160
+
+	mov		r3=ar.pfs;;
+	st8		[r2]=r3,8
+
+	mov		r3=ar.lc;;
+	st8		[r2]=r3,8
+
+	mov		r3=ar.ec;;
+	st8		[r2]=r3
+	add		r2=8*62,r2               //padding
+
+// save RRs
+	mov		ar.lc=0x08-1
+	movl		r4=0x00;;
+
+cStRR:
+	dep.z		r5=r4,61,3;;
+	mov		r3=rr[r5];;
+	st8		[r2]=r3,8
+	add		r4=1,r4
+	br.cloop.sptk.few	cStRR
+	;;
+end_os_mca_dump:
+	br	ia64_os_mca_done_dump;;
+
+//EndStub//////////////////////////////////////////////////////////////////////
+
+
+//++
+// Name:
+//       ia64_os_mca_proc_state_restore()
+//
+// Stub Description:
+//
+//       This is a stub to restore the saved processor state during MCHK
+//
+//--
+
+ia64_os_mca_proc_state_restore:
+
+// Restore bank1 GR16-31
+	GET_IA64_MCA_DATA(r2)
+	;;
+	add r2 = IA64_MCA_CPU_PROC_STATE_DUMP_OFFSET, r2
+
+restore_GRs:                                    // restore bank-1 GRs 16-31
+	bsw.1;;
+	add		r3=16*8,r2;;                // to get to NaT of GR 16-31
+	ld8		r3=[r3];;
+	mov		ar.unat=r3;;                // first restore NaT
+
+	ld8.fill	r16=[r2],8;;
+	ld8.fill	r17=[r2],8;;
+	ld8.fill	r18=[r2],8;;
+	ld8.fill	r19=[r2],8;;
+	ld8.fill	r20=[r2],8;;
+	ld8.fill	r21=[r2],8;;
+	ld8.fill	r22=[r2],8;;
+	ld8.fill	r23=[r2],8;;
+	ld8.fill	r24=[r2],8;;
+	ld8.fill	r25=[r2],8;;
+	ld8.fill	r26=[r2],8;;
+	ld8.fill	r27=[r2],8;;
+	ld8.fill	r28=[r2],8;;
+	ld8.fill	r29=[r2],8;;
+	ld8.fill	r30=[r2],8;;
+	ld8.fill	r31=[r2],8;;
+
+	ld8		r3=[r2],8;;              // increment to skip NaT
+	bsw.0;;
+
+restore_BRs:
+	add		r4=8,r2                  // duplicate r2 in r4
+	add		r6=2*8,r2;;              // duplicate r2 in r4
+
+	ld8		r3=[r2],3*8
+	ld8		r5=[r4],3*8
+	ld8		r7=[r6],3*8;;
+	mov		b0=r3
+	mov		b1=r5
+	mov		b2=r7;;
+
+	ld8		r3=[r2],3*8
+	ld8		r5=[r4],3*8
+	ld8		r7=[r6],3*8;;
+	mov		b3=r3
+	mov		b4=r5
+	mov		b5=r7;;
+
+	ld8		r3=[r2],2*8
+	ld8		r5=[r4],2*8;;
+	mov		b6=r3
+	mov		b7=r5;;
+
+restore_CRs:
+	add		r4=8,r2                  // duplicate r2 in r4
+	add		r6=2*8,r2;;              // duplicate r2 in r4
+
+	ld8		r3=[r2],8*8
+	ld8		r5=[r4],3*8
+	ld8		r7=[r6],3*8;;            // 48 byte increments
+	mov		cr.dcr=r3
+	mov		cr.itm=r5
+	mov		cr.iva=r7;;
+
+	ld8		r3=[r2],8*8;;            // 64 byte increments
+//      mov		cr.pta=r3
+
+
+// if PSR.ic=1, reading interruption registers causes an illegal operation fault
+	mov		r3=psr;;
+	tbit.nz.unc	p6,p0=r3,PSR_IC;;           // PSI Valid Log bit pos. test
+(p6)    st8     [r2]=r0,9*8+160             // increment by 232 byte inc.
+
+begin_rskip_intr_regs:
+(p6)	br		rSkipIntrRegs;;
+
+	add		r4=8,r2                  // duplicate r2 in r4
+	add		r6=2*8,r2;;              // duplicate r2 in r4
+
+	ld8		r3=[r2],3*8
+	ld8		r5=[r4],3*8
+	ld8		r7=[r6],3*8;;
+	mov		cr.ipsr=r3
+//	mov		cr.isr=r5                   // cr.isr is read only
+
+	ld8		r3=[r2],3*8
+	ld8		r5=[r4],3*8
+	ld8		r7=[r6],3*8;;
+	mov		cr.iip=r3
+	mov		cr.ifa=r5
+	mov		cr.itir=r7;;
+
+	ld8		r3=[r2],3*8
+	ld8		r5=[r4],3*8
+	ld8		r7=[r6],3*8;;
+	mov		cr.iipa=r3
+	mov		cr.ifs=r5
+	mov		cr.iim=r7
+
+	ld8		r3=[r2],160;;               // 160 byte increment
+	mov		cr.iha=r3
+
+rSkipIntrRegs:
+	ld8		r3=[r2],152;;               // another 152 byte inc.
+
+	add		r4=8,r2                     // duplicate r2 in r4
+	add		r6=2*8,r2;;                 // duplicate r2 in r6
+
+	ld8		r3=[r2],8*3
+	ld8		r5=[r4],8*3
+	ld8		r7=[r6],8*3;;
+	mov		cr.lid=r3
+//	mov		cr.ivr=r5                   // cr.ivr is read only
+	mov		cr.tpr=r7;;
+
+	ld8		r3=[r2],8*3
+	ld8		r5=[r4],8*3
+	ld8		r7=[r6],8*3;;
+//	mov		cr.eoi=r3
+//	mov		cr.irr0=r5                  // cr.irr0 is read only
+//	mov		cr.irr1=r7;;                // cr.irr1 is read only
+
+	ld8		r3=[r2],8*3
+	ld8		r5=[r4],8*3
+	ld8		r7=[r6],8*3;;
+//	mov		cr.irr2=r3                  // cr.irr2 is read only
+//	mov		cr.irr3=r5                  // cr.irr3 is read only
+	mov		cr.itv=r7;;
+
+	ld8		r3=[r2],8*7
+	ld8		r5=[r4],8*7;;
+	mov		cr.pmv=r3
+	mov		cr.cmcv=r5;;
+
+	ld8		r3=[r2],8*23
+	ld8		r5=[r4],8*23;;
+	adds		r2=8*23,r2
+	adds		r4=8*23,r4;;
+//	mov		cr.lrr0=r3
+//	mov		cr.lrr1=r5
+
+	adds		r2=8*2,r2;;
+
+restore_ARs:
+	add		r4=8,r2                  // duplicate r2 in r4
+	add		r6=2*8,r2;;              // duplicate r2 in r4
+
+	ld8		r3=[r2],3*8
+	ld8		r5=[r4],3*8
+	ld8		r7=[r6],3*8;;
+	mov		ar.k0=r3
+	mov		ar.k1=r5
+	mov		ar.k2=r7;;
+
+	ld8		r3=[r2],3*8
+	ld8		r5=[r4],3*8
+	ld8		r7=[r6],3*8;;
+	mov		ar.k3=r3
+	mov		ar.k4=r5
+	mov		ar.k5=r7;;
+
+	ld8		r3=[r2],10*8
+	ld8		r5=[r4],10*8
+	ld8		r7=[r6],10*8;;
+	mov		ar.k6=r3
+	mov		ar.k7=r5
+	;;
+
+	ld8		r3=[r2],3*8
+	ld8		r5=[r4],3*8
+	ld8		r7=[r6],3*8;;
+//	mov		ar.rsc=r3
+//	mov		ar.bsp=r5                   // ar.bsp is read only
+	mov		ar.rsc=r0			    // make sure that RSE is in enforced lazy mode
+	;;
+	mov		ar.bspstore=r7;;
+
+	ld8		r9=[r2],8*13;;
+	mov		ar.rnat=r9
+
+	mov		ar.rsc=r3
+	ld8		r3=[r2],8*4;;
+	mov		ar.ccv=r3
+
+	ld8		r3=[r2],8*4;;
+	mov		ar.unat=r3
+
+	ld8		r3=[r2],8*4;;
+	mov		ar.fpsr=r3
+
+	ld8		r3=[r2],160;;               // 160
+//      mov		ar.itc=r3
+
+	ld8		r3=[r2],8;;
+	mov		ar.pfs=r3
+
+	ld8		r3=[r2],8;;
+	mov		ar.lc=r3
+
+	ld8		r3=[r2];;
+	mov		ar.ec=r3
+	add		r2=8*62,r2;;             // padding
+
+restore_RRs:
+	mov		r5=ar.lc
+	mov		ar.lc=0x08-1
+	movl		r4=0x00;;
+cStRRr:
+	dep.z		r7=r4,61,3
+	ld8		r3=[r2],8;;
+	mov		rr[r7]=r3                   // what are its access previledges?
+	add		r4=1,r4
+	br.cloop.sptk.few	cStRRr
+	;;
+	mov		ar.lc=r5
+	;;
+end_os_mca_restore:
+	br	ia64_os_mca_done_restore;;
+
+//EndStub//////////////////////////////////////////////////////////////////////
+
+
+// ok, the issue here is that we need to save state information so
+// it can be useable by the kernel debugger and show regs routines.
+// In order to do this, our best bet is save the current state (plus
+// the state information obtain from the MIN_STATE_AREA) into a pt_regs
+// format.  This way we can pass it on in a useable format.
+//
+
+//
+// SAL to OS entry point for INIT on the monarch processor
+// This has been defined for registration purposes with SAL
+// as a part of ia64_mca_init.
+//
+// When we get here, the following registers have been
+// set by the SAL for our use
+//
+//		1. GR1 = OS INIT GP
+//		2. GR8 = PAL_PROC physical address
+//		3. GR9 = SAL_PROC physical address
+//		4. GR10 = SAL GP (physical)
+//		5. GR11 = Init Reason
+//			0 = Received INIT for event other than crash dump switch
+//			1 = Received wakeup at the end of an OS_MCA corrected machine check
+//			2 = Received INIT dude to CrashDump switch assertion
+//
+//		6. GR12 = Return address to location within SAL_INIT procedure
+
+
+GLOBAL_ENTRY(ia64_monarch_init_handler)
+	.prologue
+	// stash the information the SAL passed to os
+	SAL_TO_OS_MCA_HANDOFF_STATE_SAVE(r2)
+	;;
+	SAVE_MIN_WITH_COVER
+	;;
+	mov r8=cr.ifa
+	mov r9=cr.isr
+	adds r3=8,r2				// set up second base pointer
+	;;
+	SAVE_REST
+
+// ok, enough should be saved at this point to be dangerous, and supply
+// information for a dump
+// We need to switch to Virtual mode before hitting the C functions.
+
+	movl	r2=IA64_PSR_IT|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_DFH|IA64_PSR_BN
+	mov	r3=psr	// get the current psr, minimum enabled at this point
+	;;
+	or	r2=r2,r3
+	;;
+	movl	r3=IVirtual_Switch
+	;;
+	mov	cr.iip=r3	// short return to set the appropriate bits
+	mov	cr.ipsr=r2	// need to do an rfi to set appropriate bits
+	;;
+	rfi
+	;;
+IVirtual_Switch:
+	//
+	// We should now be running virtual
+	//
+	// Let's call the C handler to get the rest of the state info
+	//
+	alloc r14=ar.pfs,0,0,2,0		// now it's safe (must be first in insn group!)
+	;;
+	adds out0=16,sp				// out0 = pointer to pt_regs
+	;;
+	DO_SAVE_SWITCH_STACK
+	.body
+	adds out1=16,sp				// out0 = pointer to switch_stack
+
+	br.call.sptk.many rp=ia64_init_handler
+.ret1:
+
+return_from_init:
+	br.sptk return_from_init
+END(ia64_monarch_init_handler)
+
+//
+// SAL to OS entry point for INIT on the slave processor
+// This has been defined for registration purposes with SAL
+// as a part of ia64_mca_init.
+//
+
+GLOBAL_ENTRY(ia64_slave_init_handler)
+1:	br.sptk 1b
+END(ia64_slave_init_handler)
--- a/arch/ia64/kernel/mca_drv.c
+++ b/arch/ia64/kernel/mca_drv.c
@@ -0,0 +1,639 @@
+/*
+ * File:	mca_drv.c
+ * Purpose:	Generic MCA handling layer
+ *
+ * Copyright (C) 2004 FUJITSU LIMITED
+ * Copyright (C) Hidetoshi Seto (seto.hidetoshi@jp.fujitsu.com)
+ */
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kallsyms.h>
+#include <linux/smp_lock.h>
+#include <linux/bootmem.h>
+#include <linux/acpi.h>
+#include <linux/timer.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/workqueue.h>
+#include <linux/mm.h>
+
+#include <asm/delay.h>
+#include <asm/machvec.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+#include <asm/sal.h>
+#include <asm/mca.h>
+
+#include <asm/irq.h>
+#include <asm/hw_irq.h>
+
+#include "mca_drv.h"
+
+/* max size of SAL error record (default) */
+static int sal_rec_max = 10000;
+
+/* from mca.c */
+static ia64_mca_sal_to_os_state_t *sal_to_os_handoff_state;
+static ia64_mca_os_to_sal_state_t *os_to_sal_handoff_state;
+
+/* from mca_drv_asm.S */
+extern void *mca_handler_bhhook(void);
+
+static DEFINE_SPINLOCK(mca_bh_lock);
+
+typedef enum {
+	MCA_IS_LOCAL  = 0,
+	MCA_IS_GLOBAL = 1
+} mca_type_t;
+
+#define MAX_PAGE_ISOLATE 1024
+
+static struct page *page_isolate[MAX_PAGE_ISOLATE];
+static int num_page_isolate = 0;
+
+typedef enum {
+	ISOLATE_NG = 0,
+	ISOLATE_OK = 1
+} isolate_status_t;
+
+/*
+ *  This pool keeps pointers to the section part of SAL error record
+ */
+static struct {
+	slidx_list_t *buffer; /* section pointer list pool */
+	int	     cur_idx; /* Current index of section pointer list pool */
+	int	     max_idx; /* Maximum index of section pointer list pool */
+} slidx_pool;
+
+/**
+ * mca_page_isolate - isolate a poisoned page in order not to use it later
+ * @paddr:	poisoned memory location
+ *
+ * Return value:
+ *	ISOLATE_OK / ISOLATE_NG
+ */
+
+static isolate_status_t
+mca_page_isolate(unsigned long paddr)
+{
+	int i;
+	struct page *p;
+
+	/* whether physical address is valid or not */
+	if ( !ia64_phys_addr_valid(paddr) ) 
+		return ISOLATE_NG;
+
+	/* convert physical address to physical page number */
+	p = pfn_to_page(paddr>>PAGE_SHIFT);
+
+	/* check whether a page number have been already registered or not */
+	for( i = 0; i < num_page_isolate; i++ )
+		if( page_isolate[i] == p )
+			return ISOLATE_OK; /* already listed */
+
+	/* limitation check */
+	if( num_page_isolate == MAX_PAGE_ISOLATE ) 
+		return ISOLATE_NG;
+
+	/* kick pages having attribute 'SLAB' or 'Reserved' */
+	if( PageSlab(p) || PageReserved(p) ) 
+		return ISOLATE_NG;
+
+	/* add attribute 'Reserved' and register the page */
+	SetPageReserved(p);
+	page_isolate[num_page_isolate++] = p;
+
+	return ISOLATE_OK;
+}
+
+/**
+ * mca_hanlder_bh - Kill the process which occurred memory read error
+ * @paddr:	poisoned address received from MCA Handler
+ */
+
+void
+mca_handler_bh(unsigned long paddr)
+{
+	printk(KERN_DEBUG "OS_MCA: process [pid: %d](%s) encounters MCA.\n",
+		current->pid, current->comm);
+
+	spin_lock(&mca_bh_lock);
+	if (mca_page_isolate(paddr) == ISOLATE_OK) {
+		printk(KERN_DEBUG "Page isolation: ( %lx ) success.\n", paddr);
+	} else {
+		printk(KERN_DEBUG "Page isolation: ( %lx ) failure.\n", paddr);
+	}
+	spin_unlock(&mca_bh_lock);
+
+	/* This process is about to be killed itself */
+	force_sig(SIGKILL, current);
+	schedule();
+}
+
+/**
+ * mca_make_peidx - Make index of processor error section
+ * @slpi:	pointer to record of processor error section
+ * @peidx:	pointer to index of processor error section
+ */
+
+static void 
+mca_make_peidx(sal_log_processor_info_t *slpi, peidx_table_t *peidx)
+{
+	/* 
+	 * calculate the start address of
+	 *   "struct cpuid_info" and "sal_processor_static_info_t".
+	 */
+	u64 total_check_num = slpi->valid.num_cache_check
+				+ slpi->valid.num_tlb_check
+				+ slpi->valid.num_bus_check
+				+ slpi->valid.num_reg_file_check
+				+ slpi->valid.num_ms_check;
+	u64 head_size =	sizeof(sal_log_mod_error_info_t) * total_check_num
+			+ sizeof(sal_log_processor_info_t);
+	u64 mid_size  = slpi->valid.cpuid_info * sizeof(struct sal_cpuid_info);
+
+	peidx_head(peidx)   = slpi;
+	peidx_mid(peidx)    = (struct sal_cpuid_info *)
+		(slpi->valid.cpuid_info ? ((char*)slpi + head_size) : NULL);
+	peidx_bottom(peidx) = (sal_processor_static_info_t *)
+		(slpi->valid.psi_static_struct ?
+			((char*)slpi + head_size + mid_size) : NULL);
+}
+
+/**
+ * mca_make_slidx -  Make index of SAL error record 
+ * @buffer:	pointer to SAL error record
+ * @slidx:	pointer to index of SAL error record
+ *
+ * Return value:
+ *	1 if record has platform error / 0 if not
+ */
+#define LOG_INDEX_ADD_SECT_PTR(sect, ptr) \
+        { slidx_list_t *hl = &slidx_pool.buffer[slidx_pool.cur_idx]; \
+          hl->hdr = ptr; \
+          list_add(&hl->list, &(sect)); \
+          slidx_pool.cur_idx = (slidx_pool.cur_idx + 1)%slidx_pool.max_idx; }
+
+static int 
+mca_make_slidx(void *buffer, slidx_table_t *slidx)
+{
+	int platform_err = 0;
+	int record_len = ((sal_log_record_header_t*)buffer)->len;
+	u32 ercd_pos;
+	int sects;
+	sal_log_section_hdr_t *sp;
+
+	/*
+	 * Initialize index referring current record
+	 */
+	INIT_LIST_HEAD(&(slidx->proc_err));
+	INIT_LIST_HEAD(&(slidx->mem_dev_err));
+	INIT_LIST_HEAD(&(slidx->sel_dev_err));
+	INIT_LIST_HEAD(&(slidx->pci_bus_err));
+	INIT_LIST_HEAD(&(slidx->smbios_dev_err));
+	INIT_LIST_HEAD(&(slidx->pci_comp_err));
+	INIT_LIST_HEAD(&(slidx->plat_specific_err));
+	INIT_LIST_HEAD(&(slidx->host_ctlr_err));
+	INIT_LIST_HEAD(&(slidx->plat_bus_err));
+	INIT_LIST_HEAD(&(slidx->unsupported));
+
+	/*
+	 * Extract a Record Header
+	 */
+	slidx->header = buffer;
+
+	/*
+	 * Extract each section records
+	 * (arranged from "int ia64_log_platform_info_print()")
+	 */
+	for (ercd_pos = sizeof(sal_log_record_header_t), sects = 0;
+		ercd_pos < record_len; ercd_pos += sp->len, sects++) {
+		sp = (sal_log_section_hdr_t *)((char*)buffer + ercd_pos);
+		if (!efi_guidcmp(sp->guid, SAL_PROC_DEV_ERR_SECT_GUID)) {
+			LOG_INDEX_ADD_SECT_PTR(slidx->proc_err, sp);
+		} else if (!efi_guidcmp(sp->guid, SAL_PLAT_MEM_DEV_ERR_SECT_GUID)) {
+			platform_err = 1;
+			LOG_INDEX_ADD_SECT_PTR(slidx->mem_dev_err, sp);
+		} else if (!efi_guidcmp(sp->guid, SAL_PLAT_SEL_DEV_ERR_SECT_GUID)) {
+			platform_err = 1;
+			LOG_INDEX_ADD_SECT_PTR(slidx->sel_dev_err, sp);
+		} else if (!efi_guidcmp(sp->guid, SAL_PLAT_PCI_BUS_ERR_SECT_GUID)) {
+			platform_err = 1;
+			LOG_INDEX_ADD_SECT_PTR(slidx->pci_bus_err, sp);
+		} else if (!efi_guidcmp(sp->guid, SAL_PLAT_SMBIOS_DEV_ERR_SECT_GUID)) {
+			platform_err = 1;
+			LOG_INDEX_ADD_SECT_PTR(slidx->smbios_dev_err, sp);
+		} else if (!efi_guidcmp(sp->guid, SAL_PLAT_PCI_COMP_ERR_SECT_GUID)) {
+			platform_err = 1;
+			LOG_INDEX_ADD_SECT_PTR(slidx->pci_comp_err, sp);
+		} else if (!efi_guidcmp(sp->guid, SAL_PLAT_SPECIFIC_ERR_SECT_GUID)) {
+			platform_err = 1;
+			LOG_INDEX_ADD_SECT_PTR(slidx->plat_specific_err, sp);
+		} else if (!efi_guidcmp(sp->guid, SAL_PLAT_HOST_CTLR_ERR_SECT_GUID)) {
+			platform_err = 1;
+			LOG_INDEX_ADD_SECT_PTR(slidx->host_ctlr_err, sp);
+		} else if (!efi_guidcmp(sp->guid, SAL_PLAT_BUS_ERR_SECT_GUID)) {
+			platform_err = 1;
+			LOG_INDEX_ADD_SECT_PTR(slidx->plat_bus_err, sp);
+		} else {
+			LOG_INDEX_ADD_SECT_PTR(slidx->unsupported, sp);
+		}
+	}
+	slidx->n_sections = sects;
+
+	return platform_err;
+}
+
+/**
+ * init_record_index_pools - Initialize pool of lists for SAL record index
+ *
+ * Return value:
+ *	0 on Success / -ENOMEM on Failure
+ */
+static int 
+init_record_index_pools(void)
+{
+	int i;
+	int rec_max_size;  /* Maximum size of SAL error records */
+	int sect_min_size; /* Minimum size of SAL error sections */
+	/* minimum size table of each section */
+	static int sal_log_sect_min_sizes[] = { 
+		sizeof(sal_log_processor_info_t) + sizeof(sal_processor_static_info_t),
+		sizeof(sal_log_mem_dev_err_info_t),
+		sizeof(sal_log_sel_dev_err_info_t),
+		sizeof(sal_log_pci_bus_err_info_t),
+		sizeof(sal_log_smbios_dev_err_info_t),
+		sizeof(sal_log_pci_comp_err_info_t),
+		sizeof(sal_log_plat_specific_err_info_t),
+		sizeof(sal_log_host_ctlr_err_info_t),
+		sizeof(sal_log_plat_bus_err_info_t),
+	};
+
+	/*
+	 * MCA handler cannot allocate new memory on flight,
+	 * so we preallocate enough memory to handle a SAL record.
+	 *
+	 * Initialize a handling set of slidx_pool:
+	 *   1. Pick up the max size of SAL error records
+	 *   2. Pick up the min size of SAL error sections
+	 *   3. Allocate the pool as enough to 2 SAL records
+	 *     (now we can estimate the maxinum of section in a record.)
+	 */
+
+	/* - 1 - */
+	rec_max_size = sal_rec_max;
+
+	/* - 2 - */
+	sect_min_size = sal_log_sect_min_sizes[0];
+	for (i = 1; i < sizeof sal_log_sect_min_sizes/sizeof(size_t); i++)
+		if (sect_min_size > sal_log_sect_min_sizes[i])
+			sect_min_size = sal_log_sect_min_sizes[i];
+
+	/* - 3 - */
+	slidx_pool.max_idx = (rec_max_size/sect_min_size) * 2 + 1;
+	slidx_pool.buffer = (slidx_list_t *) kmalloc(slidx_pool.max_idx * sizeof(slidx_list_t), GFP_KERNEL);
+
+	return slidx_pool.buffer ? 0 : -ENOMEM;
+}
+
+
+/*****************************************************************************
+ * Recovery functions                                                        *
+ *****************************************************************************/
+
+/**
+ * is_mca_global - Check whether this MCA is global or not
+ * @peidx:	pointer of index of processor error section
+ * @pbci:	pointer to pal_bus_check_info_t
+ *
+ * Return value:
+ *	MCA_IS_LOCAL / MCA_IS_GLOBAL
+ */
+
+static mca_type_t
+is_mca_global(peidx_table_t *peidx, pal_bus_check_info_t *pbci)
+{
+	pal_processor_state_info_t *psp = (pal_processor_state_info_t*)peidx_psp(peidx);
+
+	/* 
+	 * PAL can request a rendezvous, if the MCA has a global scope.
+	 * If "rz_always" flag is set, SAL requests MCA rendezvous 
+	 * in spite of global MCA.
+	 * Therefore it is local MCA when rendezvous has not been requested.
+	 * Failed to rendezvous, the system must be down.
+	 */
+	switch (sal_to_os_handoff_state->imsto_rendez_state) {
+		case -1: /* SAL rendezvous unsuccessful */
+			return MCA_IS_GLOBAL;
+		case  0: /* SAL rendezvous not required */
+			return MCA_IS_LOCAL;
+		case  1: /* SAL rendezvous successful int */
+		case  2: /* SAL rendezvous successful int with init */
+		default:
+			break;
+	}
+
+	/*
+	 * If One or more Cache/TLB/Reg_File/Uarch_Check is here,
+	 * it would be a local MCA. (i.e. processor internal error)
+	 */
+	if (psp->tc || psp->cc || psp->rc || psp->uc)
+		return MCA_IS_LOCAL;
+	
+	/*
+	 * Bus_Check structure with Bus_Check.ib (internal bus error) flag set
+	 * would be a global MCA. (e.g. a system bus address parity error)
+	 */
+	if (!pbci || pbci->ib)
+		return MCA_IS_GLOBAL;
+
+	/*
+	 * Bus_Check structure with Bus_Check.eb (external bus error) flag set
+	 * could be either a local MCA or a global MCA.
+	 *
+	 * Referring Bus_Check.bsi:
+	 *   0: Unknown/unclassified
+	 *   1: BERR#
+	 *   2: BINIT#
+	 *   3: Hard Fail
+	 * (FIXME: Are these SGI specific or generic bsi values?)
+	 */
+	if (pbci->eb)
+		switch (pbci->bsi) {
+			case 0:
+				/* e.g. a load from poisoned memory */
+				return MCA_IS_LOCAL;
+			case 1:
+			case 2:
+			case 3:
+				return MCA_IS_GLOBAL;
+		}
+
+	return MCA_IS_GLOBAL;
+}
+
+/**
+ * recover_from_read_error - Try to recover the errors which type are "read"s.
+ * @slidx:	pointer of index of SAL error record
+ * @peidx:	pointer of index of processor error section
+ * @pbci:	pointer of pal_bus_check_info
+ *
+ * Return value:
+ *	1 on Success / 0 on Failure
+ */
+
+static int
+recover_from_read_error(slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_check_info_t *pbci)
+{
+	sal_log_mod_error_info_t *smei;
+	pal_min_state_area_t *pmsa;
+	struct ia64_psr *psr1, *psr2;
+	ia64_fptr_t *mca_hdlr_bh = (ia64_fptr_t*)mca_handler_bhhook;
+
+	/* Is target address valid? */
+	if (!pbci->tv)
+		return 0;
+
+	/*
+	 * cpu read or memory-mapped io read
+	 *
+	 *    offending process  affected process  OS MCA do
+	 *     kernel mode        kernel mode       down system
+	 *     kernel mode        user   mode       kill the process
+	 *     user   mode        kernel mode       down system (*)
+	 *     user   mode        user   mode       kill the process
+	 *
+	 * (*) You could terminate offending user-mode process
+	 *    if (pbci->pv && pbci->pl != 0) *and* if you sure
+	 *    the process not have any locks of kernel.
+	 */
+
+	psr1 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_ipsr);
+
+	/*
+	 *  Check the privilege level of interrupted context.
+	 *   If it is user-mode, then terminate affected process.
+	 */
+	if (psr1->cpl != 0) {
+		smei = peidx_bus_check(peidx, 0);
+		if (smei->valid.target_identifier) {
+			/*
+			 *  setup for resume to bottom half of MCA,
+			 * "mca_handler_bhhook"
+			 */
+			pmsa = (pal_min_state_area_t *)(sal_to_os_handoff_state->pal_min_state | (6ul<<61));
+			/* pass to bhhook as 1st argument (gr8) */
+			pmsa->pmsa_gr[8-1] = smei->target_identifier;
+			/* set interrupted return address (but no use) */
+			pmsa->pmsa_br0 = pmsa->pmsa_iip;
+			/* change resume address to bottom half */
+			pmsa->pmsa_iip = mca_hdlr_bh->fp;
+			pmsa->pmsa_gr[1-1] = mca_hdlr_bh->gp;
+			/* set cpl with kernel mode */
+			psr2 = (struct ia64_psr *)&pmsa->pmsa_ipsr;
+			psr2->cpl = 0;
+			psr2->ri  = 0;
+
+			return 1;
+		}
+
+	}
+
+	return 0;
+}
+
+/**
+ * recover_from_platform_error - Recover from platform error.
+ * @slidx:	pointer of index of SAL error record
+ * @peidx:	pointer of index of processor error section
+ * @pbci:	pointer of pal_bus_check_info
+ *
+ * Return value:
+ *	1 on Success / 0 on Failure
+ */
+
+static int
+recover_from_platform_error(slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_check_info_t *pbci)
+{
+	int status = 0;
+	pal_processor_state_info_t *psp = (pal_processor_state_info_t*)peidx_psp(peidx);
+
+	if (psp->bc && pbci->eb && pbci->bsi == 0) {
+		switch(pbci->type) {
+		case 1: /* partial read */
+		case 3: /* full line(cpu) read */
+		case 9: /* I/O space read */
+			status = recover_from_read_error(slidx, peidx, pbci);
+			break;
+		case 0: /* unknown */
+		case 2: /* partial write */
+		case 4: /* full line write */
+		case 5: /* implicit or explicit write-back operation */
+		case 6: /* snoop probe */
+		case 7: /* incoming or outgoing ptc.g */
+		case 8: /* write coalescing transactions */
+		case 10: /* I/O space write */
+		case 11: /* inter-processor interrupt message(IPI) */
+		case 12: /* interrupt acknowledge or external task priority cycle */
+		default:
+			break;
+		}
+	}
+
+	return status;
+}
+
+/**
+ * recover_from_processor_error
+ * @platform:	whether there are some platform error section or not
+ * @slidx:	pointer of index of SAL error record
+ * @peidx:	pointer of index of processor error section
+ * @pbci:	pointer of pal_bus_check_info
+ *
+ * Return value:
+ *	1 on Success / 0 on Failure
+ */
+/*
+ *  Later we try to recover when below all conditions are satisfied.
+ *   1. Only one processor error section is exist.
+ *   2. BUS_CHECK is exist and the others are not exist.(Except TLB_CHECK)
+ *   3. The entry of BUS_CHECK_INFO is 1.
+ *   4. "External bus error" flag is set and the others are not set.
+ */
+
+static int
+recover_from_processor_error(int platform, slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_check_info_t *pbci)
+{
+	pal_processor_state_info_t *psp = (pal_processor_state_info_t*)peidx_psp(peidx);
+
+	/* 
+	 * We cannot recover errors with other than bus_check.
+	 */
+	if (psp->cc || psp->rc || psp->uc) 
+		return 0;
+
+	/*
+	 * If there is no bus error, record is weird but we need not to recover.
+	 */
+	if (psp->bc == 0 || pbci == NULL)
+		return 1;
+
+	/*
+	 * Sorry, we cannot handle so many.
+	 */
+	if (peidx_bus_check_num(peidx) > 1)
+		return 0;
+	/*
+	 * Well, here is only one bus error.
+	 */
+	if (pbci->ib || pbci->cc)
+		return 0;
+	if (pbci->eb && pbci->bsi > 0)
+		return 0;
+	if (psp->ci == 0)
+		return 0;
+
+	/*
+	 * This is a local MCA and estimated as recoverble external bus error.
+	 * (e.g. a load from poisoned memory)
+	 * This means "there are some platform errors".
+	 */
+	if (platform) 
+		return recover_from_platform_error(slidx, peidx, pbci);
+	/* 
+	 * On account of strange SAL error record, we cannot recover. 
+	 */
+	return 0;
+}
+
+/**
+ * mca_try_to_recover - Try to recover from MCA
+ * @rec:	pointer to a SAL error record
+ *
+ * Return value:
+ *	1 on Success / 0 on Failure
+ */
+
+static int
+mca_try_to_recover(void *rec, 
+	ia64_mca_sal_to_os_state_t *sal_to_os_state,
+	ia64_mca_os_to_sal_state_t *os_to_sal_state)
+{
+	int platform_err;
+	int n_proc_err;
+	slidx_table_t slidx;
+	peidx_table_t peidx;
+	pal_bus_check_info_t pbci;
+
+	/* handoff state from/to mca.c */
+	sal_to_os_handoff_state = sal_to_os_state;
+	os_to_sal_handoff_state = os_to_sal_state;
+
+	/* Make index of SAL error record */
+	platform_err = mca_make_slidx(rec, &slidx);
+
+	/* Count processor error sections */
+	n_proc_err = slidx_count(&slidx, proc_err);
+
+	 /* Now, OS can recover when there is one processor error section */
+	if (n_proc_err > 1)
+		return 0;
+	else if (n_proc_err == 0) {
+		/* Weird SAL record ... We need not to recover */
+
+		return 1;
+	}
+
+	/* Make index of processor error section */
+	mca_make_peidx((sal_log_processor_info_t*)slidx_first_entry(&slidx.proc_err)->hdr, &peidx);
+
+	/* Extract Processor BUS_CHECK[0] */
+	*((u64*)&pbci) = peidx_check_info(&peidx, bus_check, 0);
+
+	/* Check whether MCA is global or not */
+	if (is_mca_global(&peidx, &pbci))
+		return 0;
+	
+	/* Try to recover a processor error */
+	return recover_from_processor_error(platform_err, &slidx, &peidx, &pbci);
+}
+
+/*
+ * =============================================================================
+ */
+
+int __init mca_external_handler_init(void)
+{
+	if (init_record_index_pools())
+		return -ENOMEM;
+
+	/* register external mca handlers */
+	if (ia64_reg_MCA_extension(mca_try_to_recover)){	
+		printk(KERN_ERR "ia64_reg_MCA_extension failed.\n");
+		kfree(slidx_pool.buffer);
+		return -EFAULT;
+	}
+	return 0;
+}
+
+void __exit mca_external_handler_exit(void)
+{
+	/* unregister external mca handlers */
+	ia64_unreg_MCA_extension();
+	kfree(slidx_pool.buffer);
+}
+
+module_init(mca_external_handler_init);
+module_exit(mca_external_handler_exit);
+
+module_param(sal_rec_max, int, 0644);
+MODULE_PARM_DESC(sal_rec_max, "Max size of SAL error record");
+
+MODULE_DESCRIPTION("ia64 platform dependent mca handler driver");
+MODULE_LICENSE("GPL");
--- a/arch/ia64/kernel/mca_drv.h
+++ b/arch/ia64/kernel/mca_drv.h
@@ -0,0 +1,113 @@
+/*
+ * File:	mca_drv.h
+ * Purpose:	Define helpers for Generic MCA handling
+ *
+ * Copyright (C) 2004 FUJITSU LIMITED
+ * Copyright (C) Hidetoshi Seto (seto.hidetoshi@jp.fujitsu.com)
+ */
+/*
+ * Processor error section: 
+ *
+ *  +-sal_log_processor_info_t *info-------------+
+ *  | sal_log_section_hdr_t header;              |
+ *  | ...                                        |
+ *  | sal_log_mod_error_info_t info[0];          |
+ *  +-+----------------+-------------------------+
+ *    | CACHE_CHECK    |  ^ num_cache_check v
+ *    +----------------+
+ *    | TLB_CHECK      |  ^ num_tlb_check v
+ *    +----------------+
+ *    | BUS_CHECK      |  ^ num_bus_check v
+ *    +----------------+
+ *    | REG_FILE_CHECK |  ^ num_reg_file_check v
+ *    +----------------+
+ *    | MS_CHECK       |  ^ num_ms_check v
+ *  +-struct cpuid_info *id----------------------+
+ *  | regs[5];                                   |
+ *  | reserved;                                  |
+ *  +-sal_processor_static_info_t *regs----------+
+ *  | valid;                                     |
+ *  | ...                                        |
+ *  | fr[128];                                   |
+ *  +--------------------------------------------+
+ */
+
+/* peidx: index of processor error section */
+typedef struct peidx_table {
+	sal_log_processor_info_t        *info;
+	struct sal_cpuid_info           *id;
+	sal_processor_static_info_t     *regs;
+} peidx_table_t;
+
+#define peidx_head(p)   (((p)->info))
+#define peidx_mid(p)    (((p)->id))
+#define peidx_bottom(p) (((p)->regs))
+
+#define peidx_psp(p)           (&(peidx_head(p)->proc_state_parameter))
+#define peidx_field_valid(p)   (&(peidx_head(p)->valid))
+#define peidx_minstate_area(p) (&(peidx_bottom(p)->min_state_area))
+
+#define peidx_cache_check_num(p)    (peidx_head(p)->valid.num_cache_check)
+#define peidx_tlb_check_num(p)      (peidx_head(p)->valid.num_tlb_check)
+#define peidx_bus_check_num(p)      (peidx_head(p)->valid.num_bus_check)
+#define peidx_reg_file_check_num(p) (peidx_head(p)->valid.num_reg_file_check)
+#define peidx_ms_check_num(p)       (peidx_head(p)->valid.num_ms_check)
+
+#define peidx_cache_check_idx(p, n)    (n)
+#define peidx_tlb_check_idx(p, n)      (peidx_cache_check_idx(p, peidx_cache_check_num(p)) + n)
+#define peidx_bus_check_idx(p, n)      (peidx_tlb_check_idx(p, peidx_tlb_check_num(p)) + n)
+#define peidx_reg_file_check_idx(p, n) (peidx_bus_check_idx(p, peidx_bus_check_num(p)) + n)
+#define peidx_ms_check_idx(p, n)       (peidx_reg_file_check_idx(p, peidx_reg_file_check_num(p)) + n)
+
+#define peidx_mod_error_info(p, name, n) \
+({	int __idx = peidx_##name##_idx(p, n); \
+	sal_log_mod_error_info_t *__ret = NULL; \
+	if (peidx_##name##_num(p) > n) /*BUG*/ \
+		__ret = &(peidx_head(p)->info[__idx]); \
+	__ret; })
+
+#define peidx_cache_check(p, n)    peidx_mod_error_info(p, cache_check, n)
+#define peidx_tlb_check(p, n)      peidx_mod_error_info(p, tlb_check, n)
+#define peidx_bus_check(p, n)      peidx_mod_error_info(p, bus_check, n)
+#define peidx_reg_file_check(p, n) peidx_mod_error_info(p, reg_file_check, n)
+#define peidx_ms_check(p, n)       peidx_mod_error_info(p, ms_check, n)
+
+#define peidx_check_info(proc, name, n) \
+({ \
+	sal_log_mod_error_info_t *__info = peidx_mod_error_info(proc, name, n);\
+	u64 __temp = __info && __info->valid.check_info \
+		? __info->check_info : 0; \
+	__temp; })
+
+/* slidx: index of SAL log error record */
+
+typedef struct slidx_list {
+	struct list_head list;
+	sal_log_section_hdr_t *hdr;
+} slidx_list_t;
+
+typedef struct slidx_table {
+	sal_log_record_header_t *header;
+	int n_sections;			/* # of section headers */
+	struct list_head proc_err;
+	struct list_head mem_dev_err;
+	struct list_head sel_dev_err;
+	struct list_head pci_bus_err;
+	struct list_head smbios_dev_err;
+	struct list_head pci_comp_err;
+	struct list_head plat_specific_err;
+	struct list_head host_ctlr_err;
+	struct list_head plat_bus_err;
+	struct list_head unsupported;	/* list of unsupported sections */
+} slidx_table_t;
+
+#define slidx_foreach_entry(pos, head) \
+	list_for_each_entry(pos, head, list)
+#define slidx_first_entry(head) \
+	(((head)->next != (head)) ? list_entry((head)->next, typeof(slidx_list_t), list) : NULL)
+#define slidx_count(slidx, sec) \
+({	int __count = 0; \
+	slidx_list_t *__pos; \
+	slidx_foreach_entry(__pos, &((slidx)->sec)) { __count++; }\
+	__count; })
+
--- a/arch/ia64/kernel/mca_drv_asm.S
+++ b/arch/ia64/kernel/mca_drv_asm.S
@@ -0,0 +1,45 @@
+/*
+ * File:        mca_drv_asm.S
+ * Purpose:     Assembly portion of Generic MCA handling
+ *
+ * Copyright (C) 2004 FUJITSU LIMITED
+ * Copyright (C) Hidetoshi Seto (seto.hidetoshi@jp.fujitsu.com)
+ */
+#include <linux/config.h>
+#include <linux/threads.h>
+
+#include <asm/asmmacro.h>
+#include <asm/processor.h>
+
+GLOBAL_ENTRY(mca_handler_bhhook)
+	invala						// clear RSE ?
+	;;						//
+	cover						// 
+	;;						//
+	clrrrb						//
+	;;						
+	alloc		r16=ar.pfs,0,2,1,0		// make a new frame
+	;;
+	mov		r13=IA64_KR(CURRENT)		// current task pointer
+	;;
+	adds		r12=IA64_TASK_THREAD_KSP_OFFSET,r13
+	;;
+	ld8		r12=[r12]			// stack pointer
+	;;
+	mov		loc0=r16
+	movl		loc1=mca_handler_bh		// recovery C function
+	;;
+	mov		out0=r8				// poisoned address
+	mov		b6=loc1
+	;;
+	mov		loc1=rp
+	;;
+	br.call.sptk.many    rp=b6			// not return ...
+	;;
+	mov		ar.pfs=loc0
+	mov 		rp=loc1
+	;;
+	mov		r8=r0
+	br.ret.sptk.many rp
+	;;
+END(mca_handler_bhhook)
--- a/arch/ia64/kernel/minstate.h
+++ b/arch/ia64/kernel/minstate.h
@@ -0,0 +1,251 @@
+#include <linux/config.h>
+
+#include <asm/cache.h>
+
+#include "entry.h"
+
+/*
+ * For ivt.s we want to access the stack virtually so we don't have to disable translation
+ * on interrupts.
+ *
+ *  On entry:
+ *	r1:	pointer to current task (ar.k6)
+ */
+#define MINSTATE_START_SAVE_MIN_VIRT								\
+(pUStk)	mov ar.rsc=0;		/* set enforced lazy mode, pl 0, little-endian, loadrs=0 */	\
+	;;											\
+(pUStk)	mov.m r24=ar.rnat;									\
+(pUStk)	addl r22=IA64_RBS_OFFSET,r1;			/* compute base of RBS */		\
+(pKStk) mov r1=sp;					/* get sp  */				\
+	;;											\
+(pUStk) lfetch.fault.excl.nt1 [r22];								\
+(pUStk)	addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1;	/* compute base of memory stack */	\
+(pUStk)	mov r23=ar.bspstore;				/* save ar.bspstore */			\
+	;;											\
+(pUStk)	mov ar.bspstore=r22;				/* switch to kernel RBS */		\
+(pKStk) addl r1=-IA64_PT_REGS_SIZE,r1;			/* if in kernel mode, use sp (r12) */	\
+	;;											\
+(pUStk)	mov r18=ar.bsp;										\
+(pUStk)	mov ar.rsc=0x3;		/* set eager mode, pl 0, little-endian, loadrs=0 */
+
+#define MINSTATE_END_SAVE_MIN_VIRT								\
+	bsw.1;			/* switch back to bank 1 (must be last in insn group) */	\
+	;;
+
+/*
+ * For mca_asm.S we want to access the stack physically since the state is saved before we
+ * go virtual and don't want to destroy the iip or ipsr.
+ */
+#define MINSTATE_START_SAVE_MIN_PHYS								\
+(pKStk) mov r3=IA64_KR(PER_CPU_DATA);;								\
+(pKStk) addl r3=THIS_CPU(ia64_mca_data),r3;;							\
+(pKStk) ld8 r3 = [r3];;										\
+(pKStk) addl r3=IA64_MCA_CPU_INIT_STACK_OFFSET,r3;;						\
+(pKStk) addl sp=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r3;						\
+(pUStk)	mov ar.rsc=0;		/* set enforced lazy mode, pl 0, little-endian, loadrs=0 */	\
+(pUStk)	addl r22=IA64_RBS_OFFSET,r1;		/* compute base of register backing store */	\
+	;;											\
+(pUStk)	mov r24=ar.rnat;									\
+(pUStk)	addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1;	/* compute base of memory stack */	\
+(pUStk)	mov r23=ar.bspstore;				/* save ar.bspstore */			\
+(pUStk)	dep r22=-1,r22,61,3;			/* compute kernel virtual addr of RBS */	\
+	;;											\
+(pKStk) addl r1=-IA64_PT_REGS_SIZE,r1;		/* if in kernel mode, use sp (r12) */		\
+(pUStk)	mov ar.bspstore=r22;			/* switch to kernel RBS */			\
+	;;											\
+(pUStk)	mov r18=ar.bsp;										\
+(pUStk)	mov ar.rsc=0x3;		/* set eager mode, pl 0, little-endian, loadrs=0 */		\
+
+#define MINSTATE_END_SAVE_MIN_PHYS								\
+	dep r12=-1,r12,61,3;		/* make sp a kernel virtual address */			\
+	;;
+
+#ifdef MINSTATE_VIRT
+# define MINSTATE_GET_CURRENT(reg)	mov reg=IA64_KR(CURRENT)
+# define MINSTATE_START_SAVE_MIN	MINSTATE_START_SAVE_MIN_VIRT
+# define MINSTATE_END_SAVE_MIN		MINSTATE_END_SAVE_MIN_VIRT
+#endif
+
+#ifdef MINSTATE_PHYS
+# define MINSTATE_GET_CURRENT(reg)	mov reg=IA64_KR(CURRENT);; tpa reg=reg
+# define MINSTATE_START_SAVE_MIN	MINSTATE_START_SAVE_MIN_PHYS
+# define MINSTATE_END_SAVE_MIN		MINSTATE_END_SAVE_MIN_PHYS
+#endif
+
+/*
+ * DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
+ * the minimum state necessary that allows us to turn psr.ic back
+ * on.
+ *
+ * Assumed state upon entry:
+ *	psr.ic: off
+ *	r31:	contains saved predicates (pr)
+ *
+ * Upon exit, the state is as follows:
+ *	psr.ic: off
+ *	 r2 = points to &pt_regs.r16
+ *	 r8 = contents of ar.ccv
+ *	 r9 = contents of ar.csd
+ *	r10 = contents of ar.ssd
+ *	r11 = FPSR_DEFAULT
+ *	r12 = kernel sp (kernel virtual address)
+ *	r13 = points to current task_struct (kernel virtual address)
+ *	p15 = TRUE if psr.i is set in cr.ipsr
+ *	predicate registers (other than p2, p3, and p15), b6, r3, r14, r15:
+ *		preserved
+ *
+ * Note that psr.ic is NOT turned on by this macro.  This is so that
+ * we can pass interruption state as arguments to a handler.
+ */
+#define DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA)							\
+	MINSTATE_GET_CURRENT(r16);	/* M (or M;;I) */					\
+	mov r27=ar.rsc;			/* M */							\
+	mov r20=r1;			/* A */							\
+	mov r25=ar.unat;		/* M */							\
+	mov r29=cr.ipsr;		/* M */							\
+	mov r26=ar.pfs;			/* I */							\
+	mov r28=cr.iip;			/* M */							\
+	mov r21=ar.fpsr;		/* M */							\
+	COVER;				/* B;; (or nothing) */					\
+	;;											\
+	adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16;						\
+	;;											\
+	ld1 r17=[r16];				/* load current->thread.on_ustack flag */	\
+	st1 [r16]=r0;				/* clear current->thread.on_ustack flag */	\
+	adds r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16						\
+	/* switch from user to kernel RBS: */							\
+	;;											\
+	invala;				/* M */							\
+	SAVE_IFS;										\
+	cmp.eq pKStk,pUStk=r0,r17;		/* are we in kernel mode already? */		\
+	;;											\
+	MINSTATE_START_SAVE_MIN									\
+	adds r17=2*L1_CACHE_BYTES,r1;		/* really: biggest cache-line size */		\
+	adds r16=PT(CR_IPSR),r1;								\
+	;;											\
+	lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES;						\
+	st8 [r16]=r29;		/* save cr.ipsr */						\
+	;;											\
+	lfetch.fault.excl.nt1 [r17];								\
+	tbit.nz p15,p0=r29,IA64_PSR_I_BIT;							\
+	mov r29=b0										\
+	;;											\
+	adds r16=PT(R8),r1;	/* initialize first base pointer */				\
+	adds r17=PT(R9),r1;	/* initialize second base pointer */				\
+(pKStk)	mov r18=r0;		/* make sure r18 isn't NaT */					\
+	;;											\
+.mem.offset 0,0; st8.spill [r16]=r8,16;								\
+.mem.offset 8,0; st8.spill [r17]=r9,16;								\
+        ;;											\
+.mem.offset 0,0; st8.spill [r16]=r10,24;							\
+.mem.offset 8,0; st8.spill [r17]=r11,24;							\
+        ;;											\
+	st8 [r16]=r28,16;	/* save cr.iip */						\
+	st8 [r17]=r30,16;	/* save cr.ifs */						\
+(pUStk)	sub r18=r18,r22;	/* r18=RSE.ndirty*8 */						\
+	mov r8=ar.ccv;										\
+	mov r9=ar.csd;										\
+	mov r10=ar.ssd;										\
+	movl r11=FPSR_DEFAULT;   /* L-unit */							\
+	;;											\
+	st8 [r16]=r25,16;	/* save ar.unat */						\
+	st8 [r17]=r26,16;	/* save ar.pfs */						\
+	shl r18=r18,16;		/* compute ar.rsc to be used for "loadrs" */			\
+	;;											\
+	st8 [r16]=r27,16;	/* save ar.rsc */						\
+(pUStk)	st8 [r17]=r24,16;	/* save ar.rnat */						\
+(pKStk)	adds r17=16,r17;	/* skip over ar_rnat field */					\
+	;;			/* avoid RAW on r16 & r17 */					\
+(pUStk)	st8 [r16]=r23,16;	/* save ar.bspstore */						\
+	st8 [r17]=r31,16;	/* save predicates */						\
+(pKStk)	adds r16=16,r16;	/* skip over ar_bspstore field */				\
+	;;											\
+	st8 [r16]=r29,16;	/* save b0 */							\
+	st8 [r17]=r18,16;	/* save ar.rsc value for "loadrs" */				\
+	cmp.eq pNonSys,pSys=r0,r0	/* initialize pSys=0, pNonSys=1 */			\
+	;;											\
+.mem.offset 0,0; st8.spill [r16]=r20,16;	/* save original r1 */				\
+.mem.offset 8,0; st8.spill [r17]=r12,16;							\
+	adds r12=-16,r1;	/* switch to kernel memory stack (with 16 bytes of scratch) */	\
+	;;											\
+.mem.offset 0,0; st8.spill [r16]=r13,16;							\
+.mem.offset 8,0; st8.spill [r17]=r21,16;	/* save ar.fpsr */				\
+	mov r13=IA64_KR(CURRENT);	/* establish `current' */				\
+	;;											\
+.mem.offset 0,0; st8.spill [r16]=r15,16;							\
+.mem.offset 8,0; st8.spill [r17]=r14,16;							\
+	;;											\
+.mem.offset 0,0; st8.spill [r16]=r2,16;								\
+.mem.offset 8,0; st8.spill [r17]=r3,16;								\
+	adds r2=IA64_PT_REGS_R16_OFFSET,r1;							\
+	;;											\
+	EXTRA;											\
+	movl r1=__gp;		/* establish kernel global pointer */				\
+	;;											\
+	MINSTATE_END_SAVE_MIN
+
+/*
+ * SAVE_REST saves the remainder of pt_regs (with psr.ic on).
+ *
+ * Assumed state upon entry:
+ *	psr.ic: on
+ *	r2:	points to &pt_regs.r16
+ *	r3:	points to &pt_regs.r17
+ *	r8:	contents of ar.ccv
+ *	r9:	contents of ar.csd
+ *	r10:	contents of ar.ssd
+ *	r11:	FPSR_DEFAULT
+ *
+ * Registers r14 and r15 are guaranteed not to be touched by SAVE_REST.
+ */
+#define SAVE_REST				\
+.mem.offset 0,0; st8.spill [r2]=r16,16;		\
+.mem.offset 8,0; st8.spill [r3]=r17,16;		\
+	;;					\
+.mem.offset 0,0; st8.spill [r2]=r18,16;		\
+.mem.offset 8,0; st8.spill [r3]=r19,16;		\
+	;;					\
+.mem.offset 0,0; st8.spill [r2]=r20,16;		\
+.mem.offset 8,0; st8.spill [r3]=r21,16;		\
+	mov r18=b6;				\
+	;;					\
+.mem.offset 0,0; st8.spill [r2]=r22,16;		\
+.mem.offset 8,0; st8.spill [r3]=r23,16;		\
+	mov r19=b7;				\
+	;;					\
+.mem.offset 0,0; st8.spill [r2]=r24,16;		\
+.mem.offset 8,0; st8.spill [r3]=r25,16;		\
+	;;					\
+.mem.offset 0,0; st8.spill [r2]=r26,16;		\
+.mem.offset 8,0; st8.spill [r3]=r27,16;		\
+	;;					\
+.mem.offset 0,0; st8.spill [r2]=r28,16;		\
+.mem.offset 8,0; st8.spill [r3]=r29,16;		\
+	;;					\
+.mem.offset 0,0; st8.spill [r2]=r30,16;		\
+.mem.offset 8,0; st8.spill [r3]=r31,32;		\
+	;;					\
+	mov ar.fpsr=r11;	/* M-unit */	\
+	st8 [r2]=r8,8;		/* ar.ccv */	\
+	adds r24=PT(B6)-PT(F7),r3;		\
+	;;					\
+	stf.spill [r2]=f6,32;			\
+	stf.spill [r3]=f7,32;			\
+	;;					\
+	stf.spill [r2]=f8,32;			\
+	stf.spill [r3]=f9,32;			\
+	;;					\
+	stf.spill [r2]=f10;			\
+	stf.spill [r3]=f11;			\
+	adds r25=PT(B7)-PT(F11),r3;		\
+	;;					\
+	st8 [r24]=r18,16;       /* b6 */	\
+	st8 [r25]=r19,16;       /* b7 */	\
+	;;					\
+	st8 [r24]=r9;        	/* ar.csd */	\
+	st8 [r25]=r10;      	/* ar.ssd */	\
+	;;
+
+#define SAVE_MIN_WITH_COVER	DO_SAVE_MIN(cover, mov r30=cr.ifs,)
+#define SAVE_MIN_WITH_COVER_R19	DO_SAVE_MIN(cover, mov r30=cr.ifs, mov r15=r19)
+#define SAVE_MIN		DO_SAVE_MIN(     , mov r30=r0, )
--- a/arch/ia64/kernel/module.c
+++ b/arch/ia64/kernel/module.c
@@ -0,0 +1,952 @@
+/*
+ * IA-64-specific support for kernel module loader.
+ *
+ * Copyright (C) 2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * Loosely based on patch by Rusty Russell.
+ */
+
+/* relocs tested so far:
+
+   DIR64LSB
+   FPTR64LSB
+   GPREL22
+   LDXMOV
+   LDXMOV
+   LTOFF22
+   LTOFF22X
+   LTOFF22X
+   LTOFF_FPTR22
+   PCREL21B	(for br.call only; br.cond is not supported out of modules!)
+   PCREL60B	(for brl.cond only; brl.call is not supported for modules!)
+   PCREL64LSB
+   SECREL32LSB
+   SEGREL64LSB
+ */
+
+#include <linux/config.h>
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/elf.h>
+#include <linux/moduleloader.h>
+#include <linux/string.h>
+#include <linux/vmalloc.h>
+
+#include <asm/patch.h>
+#include <asm/unaligned.h>
+
+#define ARCH_MODULE_DEBUG 0
+
+#if ARCH_MODULE_DEBUG
+# define DEBUGP printk
+# define inline
+#else
+# define DEBUGP(fmt , a...)
+#endif
+
+#ifdef CONFIG_ITANIUM
+# define USE_BRL	0
+#else
+# define USE_BRL	1
+#endif
+
+#define MAX_LTOFF	((uint64_t) (1 << 22))	/* max. allowable linkage-table offset */
+
+/* Define some relocation helper macros/types: */
+
+#define FORMAT_SHIFT	0
+#define FORMAT_BITS	3
+#define FORMAT_MASK	((1 << FORMAT_BITS) - 1)
+#define VALUE_SHIFT	3
+#define VALUE_BITS	5
+#define VALUE_MASK	((1 << VALUE_BITS) - 1)
+
+enum reloc_target_format {
+	/* direct encoded formats: */
+	RF_NONE = 0,
+	RF_INSN14 = 1,
+	RF_INSN22 = 2,
+	RF_INSN64 = 3,
+	RF_32MSB = 4,
+	RF_32LSB = 5,
+	RF_64MSB = 6,
+	RF_64LSB = 7,
+
+	/* formats that cannot be directly decoded: */
+	RF_INSN60,
+	RF_INSN21B,	/* imm21 form 1 */
+	RF_INSN21M,	/* imm21 form 2 */
+	RF_INSN21F	/* imm21 form 3 */
+};
+
+enum reloc_value_formula {
+	RV_DIRECT = 4,		/* S + A */
+	RV_GPREL = 5,		/* @gprel(S + A) */
+	RV_LTREL = 6,		/* @ltoff(S + A) */
+	RV_PLTREL = 7,		/* @pltoff(S + A) */
+	RV_FPTR = 8,		/* @fptr(S + A) */
+	RV_PCREL = 9,		/* S + A - P */
+	RV_LTREL_FPTR = 10,	/* @ltoff(@fptr(S + A)) */
+	RV_SEGREL = 11,		/* @segrel(S + A) */
+	RV_SECREL = 12,		/* @secrel(S + A) */
+	RV_BDREL = 13,		/* BD + A */
+	RV_LTV = 14,		/* S + A (like RV_DIRECT, except frozen at static link-time) */
+	RV_PCREL2 = 15,		/* S + A - P */
+	RV_SPECIAL = 16,	/* various (see below) */
+	RV_RSVD17 = 17,
+	RV_TPREL = 18,		/* @tprel(S + A) */
+	RV_LTREL_TPREL = 19,	/* @ltoff(@tprel(S + A)) */
+	RV_DTPMOD = 20,		/* @dtpmod(S + A) */
+	RV_LTREL_DTPMOD = 21,	/* @ltoff(@dtpmod(S + A)) */
+	RV_DTPREL = 22,		/* @dtprel(S + A) */
+	RV_LTREL_DTPREL = 23,	/* @ltoff(@dtprel(S + A)) */
+	RV_RSVD24 = 24,
+	RV_RSVD25 = 25,
+	RV_RSVD26 = 26,
+	RV_RSVD27 = 27
+	/* 28-31 reserved for implementation-specific purposes.  */
+};
+
+#define N(reloc)	[R_IA64_##reloc] = #reloc
+
+static const char *reloc_name[256] = {
+	N(NONE),		N(IMM14),		N(IMM22),		N(IMM64),
+	N(DIR32MSB),		N(DIR32LSB),		N(DIR64MSB),		N(DIR64LSB),
+	N(GPREL22),		N(GPREL64I),		N(GPREL32MSB),		N(GPREL32LSB),
+	N(GPREL64MSB),		N(GPREL64LSB),		N(LTOFF22),		N(LTOFF64I),
+	N(PLTOFF22),		N(PLTOFF64I),		N(PLTOFF64MSB),		N(PLTOFF64LSB),
+	N(FPTR64I),		N(FPTR32MSB),		N(FPTR32LSB),		N(FPTR64MSB),
+	N(FPTR64LSB),		N(PCREL60B),		N(PCREL21B),		N(PCREL21M),
+	N(PCREL21F),		N(PCREL32MSB),		N(PCREL32LSB),		N(PCREL64MSB),
+	N(PCREL64LSB),		N(LTOFF_FPTR22),	N(LTOFF_FPTR64I),	N(LTOFF_FPTR32MSB),
+	N(LTOFF_FPTR32LSB),	N(LTOFF_FPTR64MSB),	N(LTOFF_FPTR64LSB),	N(SEGREL32MSB),
+	N(SEGREL32LSB),		N(SEGREL64MSB),		N(SEGREL64LSB),		N(SECREL32MSB),
+	N(SECREL32LSB),		N(SECREL64MSB),		N(SECREL64LSB),		N(REL32MSB),
+	N(REL32LSB),		N(REL64MSB),		N(REL64LSB),		N(LTV32MSB),
+	N(LTV32LSB),		N(LTV64MSB),		N(LTV64LSB),		N(PCREL21BI),
+	N(PCREL22),		N(PCREL64I),		N(IPLTMSB),		N(IPLTLSB),
+	N(COPY),		N(LTOFF22X),		N(LDXMOV),		N(TPREL14),
+	N(TPREL22),		N(TPREL64I),		N(TPREL64MSB),		N(TPREL64LSB),
+	N(LTOFF_TPREL22),	N(DTPMOD64MSB),		N(DTPMOD64LSB),		N(LTOFF_DTPMOD22),
+	N(DTPREL14),		N(DTPREL22),		N(DTPREL64I),		N(DTPREL32MSB),
+	N(DTPREL32LSB),		N(DTPREL64MSB),		N(DTPREL64LSB),		N(LTOFF_DTPREL22)
+};
+
+#undef N
+
+struct got_entry {
+	uint64_t val;
+};
+
+struct fdesc {
+	uint64_t ip;
+	uint64_t gp;
+};
+
+/* Opaque struct for insns, to protect against derefs. */
+struct insn;
+
+static inline uint64_t
+bundle (const struct insn *insn)
+{
+	return (uint64_t) insn & ~0xfUL;
+}
+
+static inline int
+slot (const struct insn *insn)
+{
+	return (uint64_t) insn & 0x3;
+}
+
+static int
+apply_imm64 (struct module *mod, struct insn *insn, uint64_t val)
+{
+	if (slot(insn) != 2) {
+		printk(KERN_ERR "%s: invalid slot number %d for IMM64\n",
+		       mod->name, slot(insn));
+		return 0;
+	}
+	ia64_patch_imm64((u64) insn, val);
+	return 1;
+}
+
+static int
+apply_imm60 (struct module *mod, struct insn *insn, uint64_t val)
+{
+	if (slot(insn) != 2) {
+		printk(KERN_ERR "%s: invalid slot number %d for IMM60\n",
+		       mod->name, slot(insn));
+		return 0;
+	}
+	if (val + ((uint64_t) 1 << 59) >= (1UL << 60)) {
+		printk(KERN_ERR "%s: value %ld out of IMM60 range\n", mod->name, (int64_t) val);
+		return 0;
+	}
+	ia64_patch_imm60((u64) insn, val);
+	return 1;
+}
+
+static int
+apply_imm22 (struct module *mod, struct insn *insn, uint64_t val)
+{
+	if (val + (1 << 21) >= (1 << 22)) {
+		printk(KERN_ERR "%s: value %li out of IMM22 range\n", mod->name, (int64_t)val);
+		return 0;
+	}
+	ia64_patch((u64) insn, 0x01fffcfe000UL, (  ((val & 0x200000UL) << 15) /* bit 21 -> 36 */
+					         | ((val & 0x1f0000UL) <<  6) /* bit 16 -> 22 */
+					         | ((val & 0x00ff80UL) << 20) /* bit  7 -> 27 */
+					         | ((val & 0x00007fUL) << 13) /* bit  0 -> 13 */));
+	return 1;
+}
+
+static int
+apply_imm21b (struct module *mod, struct insn *insn, uint64_t val)
+{
+	if (val + (1 << 20) >= (1 << 21)) {
+		printk(KERN_ERR "%s: value %li out of IMM21b range\n", mod->name, (int64_t)val);
+		return 0;
+	}
+	ia64_patch((u64) insn, 0x11ffffe000UL, (  ((val & 0x100000UL) << 16) /* bit 20 -> 36 */
+					        | ((val & 0x0fffffUL) << 13) /* bit  0 -> 13 */));
+	return 1;
+}
+
+#if USE_BRL
+
+struct plt_entry {
+	/* Three instruction bundles in PLT. */
+ 	unsigned char bundle[2][16];
+};
+
+static const struct plt_entry ia64_plt_template = {
+	{
+		{
+			0x04, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0 */
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x20, /*	     movl gp=TARGET_GP */
+			0x00, 0x00, 0x00, 0x60
+		},
+		{
+			0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0 */
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /*	     brl.many gp=TARGET_GP */
+			0x08, 0x00, 0x00, 0xc0
+		}
+	}
+};
+
+static int
+patch_plt (struct module *mod, struct plt_entry *plt, long target_ip, unsigned long target_gp)
+{
+	if (apply_imm64(mod, (struct insn *) (plt->bundle[0] + 2), target_gp)
+	    && apply_imm60(mod, (struct insn *) (plt->bundle[1] + 2),
+			   (target_ip - (int64_t) plt->bundle[1]) / 16))
+		return 1;
+	return 0;
+}
+
+unsigned long
+plt_target (struct plt_entry *plt)
+{
+	uint64_t b0, b1, *b = (uint64_t *) plt->bundle[1];
+	long off;
+
+	b0 = b[0]; b1 = b[1];
+	off = (  ((b1 & 0x00fffff000000000UL) >> 36)		/* imm20b -> bit 0 */
+	       | ((b0 >> 48) << 20) | ((b1 & 0x7fffffUL) << 36)	/* imm39 -> bit 20 */
+	       | ((b1 & 0x0800000000000000UL) << 0));		/* i -> bit 59 */
+	return (long) plt->bundle[1] + 16*off;
+}
+
+#else /* !USE_BRL */
+
+struct plt_entry {
+	/* Three instruction bundles in PLT. */
+ 	unsigned char bundle[3][16];
+};
+
+static const struct plt_entry ia64_plt_template = {
+	{
+		{
+			0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0 */
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /*	     movl r16=TARGET_IP */
+			0x02, 0x00, 0x00, 0x60
+		},
+		{
+			0x04, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0 */
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x20, /*	     movl gp=TARGET_GP */
+			0x00, 0x00, 0x00, 0x60
+		},
+		{
+			0x11, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MIB] nop.m 0 */
+			0x60, 0x80, 0x04, 0x80, 0x03, 0x00, /*	     mov b6=r16 */
+			0x60, 0x00, 0x80, 0x00		    /*	     br.few b6 */
+		}
+	}
+};
+
+static int
+patch_plt (struct module *mod, struct plt_entry *plt, long target_ip, unsigned long target_gp)
+{
+	if (apply_imm64(mod, (struct insn *) (plt->bundle[0] + 2), target_ip)
+	    && apply_imm64(mod, (struct insn *) (plt->bundle[1] + 2), target_gp))
+		return 1;
+	return 0;
+}
+
+unsigned long
+plt_target (struct plt_entry *plt)
+{
+	uint64_t b0, b1, *b = (uint64_t *) plt->bundle[0];
+
+	b0 = b[0]; b1 = b[1];
+	return (  ((b1 & 0x000007f000000000) >> 36)		/* imm7b -> bit 0 */
+		| ((b1 & 0x07fc000000000000) >> 43)		/* imm9d -> bit 7 */
+		| ((b1 & 0x0003e00000000000) >> 29)		/* imm5c -> bit 16 */
+		| ((b1 & 0x0000100000000000) >> 23)		/* ic -> bit 21 */
+		| ((b0 >> 46) << 22) | ((b1 & 0x7fffff) << 40)	/* imm41 -> bit 22 */
+		| ((b1 & 0x0800000000000000) <<  4));		/* i -> bit 63 */
+}
+
+#endif /* !USE_BRL */
+
+void *
+module_alloc (unsigned long size)
+{
+	if (!size)
+		return NULL;
+	return vmalloc(size);
+}
+
+void
+module_free (struct module *mod, void *module_region)
+{
+	if (mod->arch.init_unw_table && module_region == mod->module_init) {
+		unw_remove_unwind_table(mod->arch.init_unw_table);
+		mod->arch.init_unw_table = NULL;
+	}
+	vfree(module_region);
+}
+
+/* Have we already seen one of these relocations? */
+/* FIXME: we could look in other sections, too --RR */
+static int
+duplicate_reloc (const Elf64_Rela *rela, unsigned int num)
+{
+	unsigned int i;
+
+	for (i = 0; i < num; i++) {
+		if (rela[i].r_info == rela[num].r_info && rela[i].r_addend == rela[num].r_addend)
+			return 1;
+	}
+	return 0;
+}
+
+/* Count how many GOT entries we may need */
+static unsigned int
+count_gots (const Elf64_Rela *rela, unsigned int num)
+{
+	unsigned int i, ret = 0;
+
+	/* Sure, this is order(n^2), but it's usually short, and not
+           time critical */
+	for (i = 0; i < num; i++) {
+		switch (ELF64_R_TYPE(rela[i].r_info)) {
+		      case R_IA64_LTOFF22:
+		      case R_IA64_LTOFF22X:
+		      case R_IA64_LTOFF64I:
+		      case R_IA64_LTOFF_FPTR22:
+		      case R_IA64_LTOFF_FPTR64I:
+		      case R_IA64_LTOFF_FPTR32MSB:
+		      case R_IA64_LTOFF_FPTR32LSB:
+		      case R_IA64_LTOFF_FPTR64MSB:
+		      case R_IA64_LTOFF_FPTR64LSB:
+			if (!duplicate_reloc(rela, i))
+				ret++;
+			break;
+		}
+	}
+	return ret;
+}
+
+/* Count how many PLT entries we may need */
+static unsigned int
+count_plts (const Elf64_Rela *rela, unsigned int num)
+{
+	unsigned int i, ret = 0;
+
+	/* Sure, this is order(n^2), but it's usually short, and not
+           time critical */
+	for (i = 0; i < num; i++) {
+		switch (ELF64_R_TYPE(rela[i].r_info)) {
+		      case R_IA64_PCREL21B:
+		      case R_IA64_PLTOFF22:
+		      case R_IA64_PLTOFF64I:
+		      case R_IA64_PLTOFF64MSB:
+		      case R_IA64_PLTOFF64LSB:
+		      case R_IA64_IPLTMSB:
+		      case R_IA64_IPLTLSB:
+			if (!duplicate_reloc(rela, i))
+				ret++;
+			break;
+		}
+	}
+	return ret;
+}
+
+/* We need to create an function-descriptors for any internal function
+   which is referenced. */
+static unsigned int
+count_fdescs (const Elf64_Rela *rela, unsigned int num)
+{
+	unsigned int i, ret = 0;
+
+	/* Sure, this is order(n^2), but it's usually short, and not time critical.  */
+	for (i = 0; i < num; i++) {
+		switch (ELF64_R_TYPE(rela[i].r_info)) {
+		      case R_IA64_FPTR64I:
+		      case R_IA64_FPTR32LSB:
+		      case R_IA64_FPTR32MSB:
+		      case R_IA64_FPTR64LSB:
+		      case R_IA64_FPTR64MSB:
+		      case R_IA64_LTOFF_FPTR22:
+		      case R_IA64_LTOFF_FPTR32LSB:
+		      case R_IA64_LTOFF_FPTR32MSB:
+		      case R_IA64_LTOFF_FPTR64I:
+		      case R_IA64_LTOFF_FPTR64LSB:
+		      case R_IA64_LTOFF_FPTR64MSB:
+		      case R_IA64_IPLTMSB:
+		      case R_IA64_IPLTLSB:
+			/*
+			 * Jumps to static functions sometimes go straight to their
+			 * offset.  Of course, that may not be possible if the jump is
+			 * from init -> core or vice. versa, so we need to generate an
+			 * FDESC (and PLT etc) for that.
+			 */
+		      case R_IA64_PCREL21B:
+			if (!duplicate_reloc(rela, i))
+				ret++;
+			break;
+		}
+	}
+	return ret;
+}
+
+int
+module_frob_arch_sections (Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings,
+			   struct module *mod)
+{
+	unsigned long core_plts = 0, init_plts = 0, gots = 0, fdescs = 0;
+	Elf64_Shdr *s, *sechdrs_end = sechdrs + ehdr->e_shnum;
+
+	/*
+	 * To store the PLTs and function-descriptors, we expand the .text section for
+	 * core module-code and the .init.text section for initialization code.
+	 */
+	for (s = sechdrs; s < sechdrs_end; ++s)
+		if (strcmp(".core.plt", secstrings + s->sh_name) == 0)
+			mod->arch.core_plt = s;
+		else if (strcmp(".init.plt", secstrings + s->sh_name) == 0)
+			mod->arch.init_plt = s;
+		else if (strcmp(".got", secstrings + s->sh_name) == 0)
+			mod->arch.got = s;
+		else if (strcmp(".opd", secstrings + s->sh_name) == 0)
+			mod->arch.opd = s;
+		else if (strcmp(".IA_64.unwind", secstrings + s->sh_name) == 0)
+			mod->arch.unwind = s;
+
+	if (!mod->arch.core_plt || !mod->arch.init_plt || !mod->arch.got || !mod->arch.opd) {
+		printk(KERN_ERR "%s: sections missing\n", mod->name);
+		return -ENOEXEC;
+	}
+
+	/* GOT and PLTs can occur in any relocated section... */
+	for (s = sechdrs + 1; s < sechdrs_end; ++s) {
+		const Elf64_Rela *rels = (void *)ehdr + s->sh_offset;
+		unsigned long numrels = s->sh_size/sizeof(Elf64_Rela);
+
+		if (s->sh_type != SHT_RELA)
+			continue;
+
+		gots += count_gots(rels, numrels);
+		fdescs += count_fdescs(rels, numrels);
+		if (strstr(secstrings + s->sh_name, ".init"))
+			init_plts += count_plts(rels, numrels);
+		else
+			core_plts += count_plts(rels, numrels);
+	}
+
+	mod->arch.core_plt->sh_type = SHT_NOBITS;
+	mod->arch.core_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+	mod->arch.core_plt->sh_addralign = 16;
+	mod->arch.core_plt->sh_size = core_plts * sizeof(struct plt_entry);
+	mod->arch.init_plt->sh_type = SHT_NOBITS;
+	mod->arch.init_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+	mod->arch.init_plt->sh_addralign = 16;
+	mod->arch.init_plt->sh_size = init_plts * sizeof(struct plt_entry);
+	mod->arch.got->sh_type = SHT_NOBITS;
+	mod->arch.got->sh_flags = ARCH_SHF_SMALL | SHF_ALLOC;
+	mod->arch.got->sh_addralign = 8;
+	mod->arch.got->sh_size = gots * sizeof(struct got_entry);
+	mod->arch.opd->sh_type = SHT_NOBITS;
+	mod->arch.opd->sh_flags = SHF_ALLOC;
+	mod->arch.opd->sh_addralign = 8;
+	mod->arch.opd->sh_size = fdescs * sizeof(struct fdesc);
+	DEBUGP("%s: core.plt=%lx, init.plt=%lx, got=%lx, fdesc=%lx\n",
+	       __FUNCTION__, mod->arch.core_plt->sh_size, mod->arch.init_plt->sh_size,
+	       mod->arch.got->sh_size, mod->arch.opd->sh_size);
+	return 0;
+}
+
+static inline int
+in_init (const struct module *mod, uint64_t addr)
+{
+	return addr - (uint64_t) mod->module_init < mod->init_size;
+}
+
+static inline int
+in_core (const struct module *mod, uint64_t addr)
+{
+	return addr - (uint64_t) mod->module_core < mod->core_size;
+}
+
+static inline int
+is_internal (const struct module *mod, uint64_t value)
+{
+	return in_init(mod, value) || in_core(mod, value);
+}
+
+/*
+ * Get gp-relative offset for the linkage-table entry of VALUE.
+ */
+static uint64_t
+get_ltoff (struct module *mod, uint64_t value, int *okp)
+{
+	struct got_entry *got, *e;
+
+	if (!*okp)
+		return 0;
+
+	got = (void *) mod->arch.got->sh_addr;
+	for (e = got; e < got + mod->arch.next_got_entry; ++e)
+		if (e->val == value)
+			goto found;
+
+	/* Not enough GOT entries? */
+	if (e >= (struct got_entry *) (mod->arch.got->sh_addr + mod->arch.got->sh_size))
+		BUG();
+
+	e->val = value;
+	++mod->arch.next_got_entry;
+  found:
+	return (uint64_t) e - mod->arch.gp;
+}
+
+static inline int
+gp_addressable (struct module *mod, uint64_t value)
+{
+	return value - mod->arch.gp + MAX_LTOFF/2 < MAX_LTOFF;
+}
+
+/* Get PC-relative PLT entry for this value.  Returns 0 on failure. */
+static uint64_t
+get_plt (struct module *mod, const struct insn *insn, uint64_t value, int *okp)
+{
+	struct plt_entry *plt, *plt_end;
+	uint64_t target_ip, target_gp;
+
+	if (!*okp)
+		return 0;
+
+	if (in_init(mod, (uint64_t) insn)) {
+		plt = (void *) mod->arch.init_plt->sh_addr;
+		plt_end = (void *) plt + mod->arch.init_plt->sh_size;
+	} else {
+		plt = (void *) mod->arch.core_plt->sh_addr;
+		plt_end = (void *) plt + mod->arch.core_plt->sh_size;
+	}
+
+	/* "value" is a pointer to a function-descriptor; fetch the target ip/gp from it: */
+	target_ip = ((uint64_t *) value)[0];
+	target_gp = ((uint64_t *) value)[1];
+
+	/* Look for existing PLT entry. */
+	while (plt->bundle[0][0]) {
+		if (plt_target(plt) == target_ip)
+			goto found;
+		if (++plt >= plt_end)
+			BUG();
+	}
+	*plt = ia64_plt_template;
+	if (!patch_plt(mod, plt, target_ip, target_gp)) {
+		*okp = 0;
+		return 0;
+	}
+#if ARCH_MODULE_DEBUG
+	if (plt_target(plt) != target_ip) {
+		printk("%s: mistargeted PLT: wanted %lx, got %lx\n",
+		       __FUNCTION__, target_ip, plt_target(plt));
+		*okp = 0;
+		return 0;
+	}
+#endif
+  found:
+	return (uint64_t) plt;
+}
+
+/* Get function descriptor for VALUE. */
+static uint64_t
+get_fdesc (struct module *mod, uint64_t value, int *okp)
+{
+	struct fdesc *fdesc = (void *) mod->arch.opd->sh_addr;
+
+	if (!*okp)
+		return 0;
+
+	if (!value) {
+		printk(KERN_ERR "%s: fdesc for zero requested!\n", mod->name);
+		return 0;
+	}
+
+	if (!is_internal(mod, value))
+		/*
+		 * If it's not a module-local entry-point, "value" already points to a
+		 * function-descriptor.
+		 */
+		return value;
+
+	/* Look for existing function descriptor. */
+	while (fdesc->ip) {
+		if (fdesc->ip == value)
+			return (uint64_t)fdesc;
+		if ((uint64_t) ++fdesc >= mod->arch.opd->sh_addr + mod->arch.opd->sh_size)
+			BUG();
+	}
+
+	/* Create new one */
+	fdesc->ip = value;
+	fdesc->gp = mod->arch.gp;
+	return (uint64_t) fdesc;
+}
+
+static inline int
+do_reloc (struct module *mod, uint8_t r_type, Elf64_Sym *sym, uint64_t addend,
+	  Elf64_Shdr *sec, void *location)
+{
+	enum reloc_target_format format = (r_type >> FORMAT_SHIFT) & FORMAT_MASK;
+	enum reloc_value_formula formula = (r_type >> VALUE_SHIFT) & VALUE_MASK;
+	uint64_t val;
+	int ok = 1;
+
+	val = sym->st_value + addend;
+
+	switch (formula) {
+	      case RV_SEGREL:	/* segment base is arbitrarily chosen to be 0 for kernel modules */
+	      case RV_DIRECT:
+		break;
+
+	      case RV_GPREL:	  val -= mod->arch.gp; break;
+	      case RV_LTREL:	  val = get_ltoff(mod, val, &ok); break;
+	      case RV_PLTREL:	  val = get_plt(mod, location, val, &ok); break;
+	      case RV_FPTR:	  val = get_fdesc(mod, val, &ok); break;
+	      case RV_SECREL:	  val -= sec->sh_addr; break;
+	      case RV_LTREL_FPTR: val = get_ltoff(mod, get_fdesc(mod, val, &ok), &ok); break;
+
+	      case RV_PCREL:
+		switch (r_type) {
+		      case R_IA64_PCREL21B:
+			if ((in_init(mod, val) && in_core(mod, (uint64_t)location)) ||
+			    (in_core(mod, val) && in_init(mod, (uint64_t)location))) {
+				/*
+				 * Init section may have been allocated far away from core,
+				 * if the branch won't reach, then allocate a plt for it.
+				 */
+				uint64_t delta = ((int64_t)val - (int64_t)location) / 16;
+				if (delta + (1 << 20) >= (1 << 21)) {
+					val = get_fdesc(mod, val, &ok);
+					val = get_plt(mod, location, val, &ok);
+				}
+			} else if (!is_internal(mod, val))
+				val = get_plt(mod, location, val, &ok);
+			/* FALL THROUGH */
+		      default:
+			val -= bundle(location);
+			break;
+
+		      case R_IA64_PCREL32MSB:
+		      case R_IA64_PCREL32LSB:
+		      case R_IA64_PCREL64MSB:
+		      case R_IA64_PCREL64LSB:
+			val -= (uint64_t) location;
+			break;
+
+		}
+		switch (r_type) {
+		      case R_IA64_PCREL60B: format = RF_INSN60; break;
+		      case R_IA64_PCREL21B: format = RF_INSN21B; break;
+		      case R_IA64_PCREL21M: format = RF_INSN21M; break;
+		      case R_IA64_PCREL21F: format = RF_INSN21F; break;
+		      default: break;
+		}
+		break;
+
+	      case RV_BDREL:
+		val -= (uint64_t) (in_init(mod, val) ? mod->module_init : mod->module_core);
+		break;
+
+	      case RV_LTV:
+		/* can link-time value relocs happen here?  */
+		BUG();
+		break;
+
+	      case RV_PCREL2:
+		if (r_type == R_IA64_PCREL21BI) {
+			if (!is_internal(mod, val)) {
+				printk(KERN_ERR "%s: %s reloc against non-local symbol (%lx)\n",
+				       __FUNCTION__, reloc_name[r_type], val);
+				return -ENOEXEC;
+			}
+			format = RF_INSN21B;
+		}
+		val -= bundle(location);
+		break;
+
+	      case RV_SPECIAL:
+		switch (r_type) {
+		      case R_IA64_IPLTMSB:
+		      case R_IA64_IPLTLSB:
+			val = get_fdesc(mod, get_plt(mod, location, val, &ok), &ok);
+			format = RF_64LSB;
+			if (r_type == R_IA64_IPLTMSB)
+				format = RF_64MSB;
+			break;
+
+		      case R_IA64_SUB:
+			val = addend - sym->st_value;
+			format = RF_INSN64;
+			break;
+
+		      case R_IA64_LTOFF22X:
+			if (gp_addressable(mod, val))
+				val -= mod->arch.gp;
+			else
+				val = get_ltoff(mod, val, &ok);
+			format = RF_INSN22;
+			break;
+
+		      case R_IA64_LDXMOV:
+			if (gp_addressable(mod, val)) {
+				/* turn "ld8" into "mov": */
+				DEBUGP("%s: patching ld8 at %p to mov\n", __FUNCTION__, location);
+				ia64_patch((u64) location, 0x1fff80fe000UL, 0x10000000000UL);
+			}
+			return 0;
+
+		      default:
+			if (reloc_name[r_type])
+				printk(KERN_ERR "%s: special reloc %s not supported",
+				       mod->name, reloc_name[r_type]);
+			else
+				printk(KERN_ERR "%s: unknown special reloc %x\n",
+				       mod->name, r_type);
+			return -ENOEXEC;
+		}
+		break;
+
+	      case RV_TPREL:
+	      case RV_LTREL_TPREL:
+	      case RV_DTPMOD:
+	      case RV_LTREL_DTPMOD:
+	      case RV_DTPREL:
+	      case RV_LTREL_DTPREL:
+		printk(KERN_ERR "%s: %s reloc not supported\n",
+		       mod->name, reloc_name[r_type] ? reloc_name[r_type] : "?");
+		return -ENOEXEC;
+
+	      default:
+		printk(KERN_ERR "%s: unknown reloc %x\n", mod->name, r_type);
+		return -ENOEXEC;
+	}
+
+	if (!ok)
+		return -ENOEXEC;
+
+	DEBUGP("%s: [%p]<-%016lx = %s(%lx)\n", __FUNCTION__, location, val,
+	       reloc_name[r_type] ? reloc_name[r_type] : "?", sym->st_value + addend);
+
+	switch (format) {
+	      case RF_INSN21B:	ok = apply_imm21b(mod, location, (int64_t) val / 16); break;
+	      case RF_INSN22:	ok = apply_imm22(mod, location, val); break;
+	      case RF_INSN64:	ok = apply_imm64(mod, location, val); break;
+	      case RF_INSN60:	ok = apply_imm60(mod, location, (int64_t) val / 16); break;
+	      case RF_32LSB:	put_unaligned(val, (uint32_t *) location); break;
+	      case RF_64LSB:	put_unaligned(val, (uint64_t *) location); break;
+	      case RF_32MSB:	/* ia64 Linux is little-endian... */
+	      case RF_64MSB:	/* ia64 Linux is little-endian... */
+	      case RF_INSN14:	/* must be within-module, i.e., resolved by "ld -r" */
+	      case RF_INSN21M:	/* must be within-module, i.e., resolved by "ld -r" */
+	      case RF_INSN21F:	/* must be within-module, i.e., resolved by "ld -r" */
+		printk(KERN_ERR "%s: format %u needed by %s reloc is not supported\n",
+		       mod->name, format, reloc_name[r_type] ? reloc_name[r_type] : "?");
+		return -ENOEXEC;
+
+	      default:
+		printk(KERN_ERR "%s: relocation %s resulted in unknown format %u\n",
+		       mod->name, reloc_name[r_type] ? reloc_name[r_type] : "?", format);
+		return -ENOEXEC;
+	}
+	return ok ? 0 : -ENOEXEC;
+}
+
+int
+apply_relocate_add (Elf64_Shdr *sechdrs, const char *strtab, unsigned int symindex,
+		    unsigned int relsec, struct module *mod)
+{
+	unsigned int i, n = sechdrs[relsec].sh_size / sizeof(Elf64_Rela);
+	Elf64_Rela *rela = (void *) sechdrs[relsec].sh_addr;
+	Elf64_Shdr *target_sec;
+	int ret;
+
+	DEBUGP("%s: applying section %u (%u relocs) to %u\n", __FUNCTION__,
+	       relsec, n, sechdrs[relsec].sh_info);
+
+	target_sec = sechdrs + sechdrs[relsec].sh_info;
+
+	if (target_sec->sh_entsize == ~0UL)
+		/*
+		 * If target section wasn't allocated, we don't need to relocate it.
+		 * Happens, e.g., for debug sections.
+		 */
+		return 0;
+
+	if (!mod->arch.gp) {
+		/*
+		 * XXX Should have an arch-hook for running this after final section
+		 *     addresses have been selected...
+		 */
+		/* See if gp can cover the entire core module:  */
+		uint64_t gp = (uint64_t) mod->module_core + MAX_LTOFF / 2;
+		if (mod->core_size >= MAX_LTOFF)
+			/*
+			 * This takes advantage of fact that SHF_ARCH_SMALL gets allocated
+			 * at the end of the module.
+			 */
+			gp = (uint64_t) mod->module_core + mod->core_size - MAX_LTOFF / 2;
+		mod->arch.gp = gp;
+		DEBUGP("%s: placing gp at 0x%lx\n", __FUNCTION__, gp);
+	}
+
+	for (i = 0; i < n; i++) {
+		ret = do_reloc(mod, ELF64_R_TYPE(rela[i].r_info),
+			       ((Elf64_Sym *) sechdrs[symindex].sh_addr
+				+ ELF64_R_SYM(rela[i].r_info)),
+			       rela[i].r_addend, target_sec,
+			       (void *) target_sec->sh_addr + rela[i].r_offset);
+		if (ret < 0)
+			return ret;
+	}
+	return 0;
+}
+
+int
+apply_relocate (Elf64_Shdr *sechdrs, const char *strtab, unsigned int symindex,
+		unsigned int relsec, struct module *mod)
+{
+	printk(KERN_ERR "module %s: REL relocs in section %u unsupported\n", mod->name, relsec);
+	return -ENOEXEC;
+}
+
+/*
+ * Modules contain a single unwind table which covers both the core and the init text
+ * sections but since the two are not contiguous, we need to split this table up such that
+ * we can register (and unregister) each "segment" seperately.  Fortunately, this sounds
+ * more complicated than it really is.
+ */
+static void
+register_unwind_table (struct module *mod)
+{
+	struct unw_table_entry *start = (void *) mod->arch.unwind->sh_addr;
+	struct unw_table_entry *end = start + mod->arch.unwind->sh_size / sizeof (*start);
+	struct unw_table_entry tmp, *e1, *e2, *core, *init;
+	unsigned long num_init = 0, num_core = 0;
+
+	/* First, count how many init and core unwind-table entries there are.  */
+	for (e1 = start; e1 < end; ++e1)
+		if (in_init(mod, e1->start_offset))
+			++num_init;
+		else
+			++num_core;
+	/*
+	 * Second, sort the table such that all unwind-table entries for the init and core
+	 * text sections are nicely separated.  We do this with a stupid bubble sort
+	 * (unwind tables don't get ridiculously huge).
+	 */
+	for (e1 = start; e1 < end; ++e1) {
+		for (e2 = e1 + 1; e2 < end; ++e2) {
+			if (e2->start_offset < e1->start_offset) {
+				tmp = *e1;
+				*e1 = *e2;
+				*e2 = tmp;
+			}
+		}
+	}
+	/*
+	 * Third, locate the init and core segments in the unwind table:
+	 */
+	if (in_init(mod, start->start_offset)) {
+		init = start;
+		core = start + num_init;
+	} else {
+		core = start;
+		init = start + num_core;
+	}
+
+	DEBUGP("%s: name=%s, gp=%lx, num_init=%lu, num_core=%lu\n", __FUNCTION__,
+	       mod->name, mod->arch.gp, num_init, num_core);
+
+	/*
+	 * Fourth, register both tables (if not empty).
+	 */
+	if (num_core > 0) {
+		mod->arch.core_unw_table = unw_add_unwind_table(mod->name, 0, mod->arch.gp,
+								core, core + num_core);
+		DEBUGP("%s:  core: handle=%p [%p-%p)\n", __FUNCTION__,
+		       mod->arch.core_unw_table, core, core + num_core);
+	}
+	if (num_init > 0) {
+		mod->arch.init_unw_table = unw_add_unwind_table(mod->name, 0, mod->arch.gp,
+								init, init + num_init);
+		DEBUGP("%s:  init: handle=%p [%p-%p)\n", __FUNCTION__,
+		       mod->arch.init_unw_table, init, init + num_init);
+	}
+}
+
+int
+module_finalize (const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod)
+{
+	DEBUGP("%s: init: entry=%p\n", __FUNCTION__, mod->init);
+	if (mod->arch.unwind)
+		register_unwind_table(mod);
+	return 0;
+}
+
+void
+module_arch_cleanup (struct module *mod)
+{
+	if (mod->arch.init_unw_table)
+		unw_remove_unwind_table(mod->arch.init_unw_table);
+	if (mod->arch.core_unw_table)
+		unw_remove_unwind_table(mod->arch.core_unw_table);
+}
+
+#ifdef CONFIG_SMP
+void
+percpu_modcopy (void *pcpudst, const void *src, unsigned long size)
+{
+	unsigned int i;
+	for (i = 0; i < NR_CPUS; i++)
+		if (cpu_possible(i))
+			memcpy(pcpudst + __per_cpu_offset[i], src, size);
+}
+#endif /* CONFIG_SMP */
--- a/arch/ia64/kernel/pal.S
+++ b/arch/ia64/kernel/pal.S
@@ -0,0 +1,302 @@
+/*
+ * PAL Firmware support
+ * IA-64 Processor Programmers Reference Vol 2
+ *
+ * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 1999-2001, 2003 Hewlett-Packard Co
+ *	David Mosberger <davidm@hpl.hp.com>
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ *
+ * 05/22/2000 eranian Added support for stacked register calls
+ * 05/24/2000 eranian Added support for physical mode static calls
+ */
+
+#include <asm/asmmacro.h>
+#include <asm/processor.h>
+
+	.data
+pal_entry_point:
+	data8 ia64_pal_default_handler
+	.text
+
+/*
+ * Set the PAL entry point address.  This could be written in C code, but we do it here
+ * to keep it all in one module (besides, it's so trivial that it's
+ * not a big deal).
+ *
+ * in0		Address of the PAL entry point (text address, NOT a function descriptor).
+ */
+GLOBAL_ENTRY(ia64_pal_handler_init)
+	alloc r3=ar.pfs,1,0,0,0
+	movl r2=pal_entry_point
+	;;
+	st8 [r2]=in0
+	br.ret.sptk.many rp
+END(ia64_pal_handler_init)
+
+/*
+ * Default PAL call handler.  This needs to be coded in assembly because it uses
+ * the static calling convention, i.e., the RSE may not be used and calls are
+ * done via "br.cond" (not "br.call").
+ */
+GLOBAL_ENTRY(ia64_pal_default_handler)
+	mov r8=-1
+	br.cond.sptk.many rp
+END(ia64_pal_default_handler)
+
+/*
+ * Make a PAL call using the static calling convention.
+ *
+ * in0         Index of PAL service
+ * in1 - in3   Remaining PAL arguments
+ * in4	       1 ==> clear psr.ic,  0 ==> don't clear psr.ic
+ *
+ */
+GLOBAL_ENTRY(ia64_pal_call_static)
+	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(5)
+	alloc loc1 = ar.pfs,5,5,0,0
+	movl loc2 = pal_entry_point
+1:	{
+	  mov r28 = in0
+	  mov r29 = in1
+	  mov r8 = ip
+	}
+	;;
+	ld8 loc2 = [loc2]		// loc2 <- entry point
+	tbit.nz p6,p7 = in4, 0
+	adds r8 = 1f-1b,r8
+	mov loc4=ar.rsc			// save RSE configuration
+	;;
+	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
+	mov loc3 = psr
+	mov loc0 = rp
+	.body
+	mov r30 = in2
+
+(p6)	rsm psr.i | psr.ic
+	mov r31 = in3
+	mov b7 = loc2
+
+(p7)	rsm psr.i
+	;;
+(p6)	srlz.i
+	mov rp = r8
+	br.cond.sptk.many b7
+1:	mov psr.l = loc3
+	mov ar.rsc = loc4		// restore RSE configuration
+	mov ar.pfs = loc1
+	mov rp = loc0
+	;;
+	srlz.d				// seralize restoration of psr.l
+	br.ret.sptk.many b0
+END(ia64_pal_call_static)
+
+/*
+ * Make a PAL call using the stacked registers calling convention.
+ *
+ * Inputs:
+ * 	in0         Index of PAL service
+ * 	in2 - in3   Remaning PAL arguments
+ */
+GLOBAL_ENTRY(ia64_pal_call_stacked)
+	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(4)
+	alloc loc1 = ar.pfs,4,4,4,0
+	movl loc2 = pal_entry_point
+
+	mov r28  = in0			// Index MUST be copied to r28
+	mov out0 = in0			// AND in0 of PAL function
+	mov loc0 = rp
+	.body
+	;;
+	ld8 loc2 = [loc2]		// loc2 <- entry point
+	mov out1 = in1
+	mov out2 = in2
+	mov out3 = in3
+	mov loc3 = psr
+	;;
+	rsm psr.i
+	mov b7 = loc2
+	;;
+	br.call.sptk.many rp=b7		// now make the call
+.ret0:	mov psr.l  = loc3
+	mov ar.pfs = loc1
+	mov rp = loc0
+	;;
+	srlz.d				// serialize restoration of psr.l
+	br.ret.sptk.many b0
+END(ia64_pal_call_stacked)
+
+/*
+ * Make a physical mode PAL call using the static registers calling convention.
+ *
+ * Inputs:
+ * 	in0         Index of PAL service
+ * 	in2 - in3   Remaning PAL arguments
+ *
+ * PSR_LP, PSR_TB, PSR_ID, PSR_DA are never set by the kernel.
+ * So we don't need to clear them.
+ */
+#define PAL_PSR_BITS_TO_CLEAR							\
+	(IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT  | IA64_PSR_DB | IA64_PSR_RT |	\
+	 IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED |		\
+	 IA64_PSR_DFL | IA64_PSR_DFH)
+
+#define PAL_PSR_BITS_TO_SET							\
+	(IA64_PSR_BN)
+
+
+GLOBAL_ENTRY(ia64_pal_call_phys_static)
+	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(4)
+	alloc loc1 = ar.pfs,4,7,0,0
+	movl loc2 = pal_entry_point
+1:	{
+	  mov r28  = in0		// copy procedure index
+	  mov r8   = ip			// save ip to compute branch
+	  mov loc0 = rp			// save rp
+	}
+	.body
+	;;
+	ld8 loc2 = [loc2]		// loc2 <- entry point
+	mov r29  = in1			// first argument
+	mov r30  = in2			// copy arg2
+	mov r31  = in3			// copy arg3
+	;;
+	mov loc3 = psr			// save psr
+	adds r8  = 1f-1b,r8		// calculate return address for call
+	;;
+	mov loc4=ar.rsc			// save RSE configuration
+	dep.z loc2=loc2,0,61		// convert pal entry point to physical
+	tpa r8=r8			// convert rp to physical
+	;;
+	mov b7 = loc2			// install target to branch reg
+	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
+	movl r16=PAL_PSR_BITS_TO_CLEAR
+	movl r17=PAL_PSR_BITS_TO_SET
+	;;
+	or loc3=loc3,r17		// add in psr the bits to set
+	;;
+	andcm r16=loc3,r16		// removes bits to clear from psr
+	br.call.sptk.many rp=ia64_switch_mode_phys
+.ret1:	mov rp = r8			// install return address (physical)
+	mov loc5 = r19
+	mov loc6 = r20
+	br.cond.sptk.many b7
+1:
+	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
+	mov r16=loc3			// r16= original psr
+	mov r19=loc5
+	mov r20=loc6
+	br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode
+.ret2:
+	mov psr.l = loc3		// restore init PSR
+
+	mov ar.pfs = loc1
+	mov rp = loc0
+	;;
+	mov ar.rsc=loc4			// restore RSE configuration
+	srlz.d				// seralize restoration of psr.l
+	br.ret.sptk.many b0
+END(ia64_pal_call_phys_static)
+
+/*
+ * Make a PAL call using the stacked registers in physical mode.
+ *
+ * Inputs:
+ * 	in0         Index of PAL service
+ * 	in2 - in3   Remaning PAL arguments
+ */
+GLOBAL_ENTRY(ia64_pal_call_phys_stacked)
+	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(5)
+	alloc	loc1 = ar.pfs,5,7,4,0
+	movl	loc2 = pal_entry_point
+1:	{
+	  mov r28  = in0		// copy procedure index
+	  mov loc0 = rp		// save rp
+	}
+	.body
+	;;
+	ld8 loc2 = [loc2]		// loc2 <- entry point
+	mov out0 = in0		// first argument
+	mov out1 = in1		// copy arg2
+	mov out2 = in2		// copy arg3
+	mov out3 = in3		// copy arg3
+	;;
+	mov loc3 = psr		// save psr
+	;;
+	mov loc4=ar.rsc			// save RSE configuration
+	dep.z loc2=loc2,0,61		// convert pal entry point to physical
+	;;
+	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
+	movl r16=PAL_PSR_BITS_TO_CLEAR
+	movl r17=PAL_PSR_BITS_TO_SET
+	;;
+	or loc3=loc3,r17		// add in psr the bits to set
+	mov b7 = loc2			// install target to branch reg
+	;;
+	andcm r16=loc3,r16		// removes bits to clear from psr
+	br.call.sptk.many rp=ia64_switch_mode_phys
+.ret6:
+	mov loc5 = r19
+	mov loc6 = r20
+	br.call.sptk.many rp=b7		// now make the call
+.ret7:
+	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
+	mov r16=loc3			// r16= original psr
+	mov r19=loc5
+	mov r20=loc6
+	br.call.sptk.many rp=ia64_switch_mode_virt	// return to virtual mode
+
+.ret8:	mov psr.l  = loc3		// restore init PSR
+	mov ar.pfs = loc1
+	mov rp = loc0
+	;;
+	mov ar.rsc=loc4			// restore RSE configuration
+	srlz.d				// seralize restoration of psr.l
+	br.ret.sptk.many b0
+END(ia64_pal_call_phys_stacked)
+
+/*
+ * Save scratch fp scratch regs which aren't saved in pt_regs already (fp10-fp15).
+ *
+ * NOTE: We need to do this since firmware (SAL and PAL) may use any of the scratch
+ * regs fp-low partition.
+ *
+ * Inputs:
+ *      in0	Address of stack storage for fp regs
+ */
+GLOBAL_ENTRY(ia64_save_scratch_fpregs)
+	alloc r3=ar.pfs,1,0,0,0
+	add r2=16,in0
+	;;
+	stf.spill [in0] = f10,32
+	stf.spill [r2]  = f11,32
+	;;
+	stf.spill [in0] = f12,32
+	stf.spill [r2]  = f13,32
+	;;
+	stf.spill [in0] = f14,32
+	stf.spill [r2]  = f15,32
+	br.ret.sptk.many rp
+END(ia64_save_scratch_fpregs)
+
+/*
+ * Load scratch fp scratch regs (fp10-fp15)
+ *
+ * Inputs:
+ *      in0	Address of stack storage for fp regs
+ */
+GLOBAL_ENTRY(ia64_load_scratch_fpregs)
+	alloc r3=ar.pfs,1,0,0,0
+	add r2=16,in0
+	;;
+	ldf.fill  f10 = [in0],32
+	ldf.fill  f11 = [r2],32
+	;;
+	ldf.fill  f12 = [in0],32
+	ldf.fill  f13 = [r2],32
+	;;
+	ldf.fill  f14 = [in0],32
+	ldf.fill  f15 = [r2],32
+	br.ret.sptk.many rp
+END(ia64_load_scratch_fpregs)
--- a/arch/ia64/kernel/palinfo.c
+++ b/arch/ia64/kernel/palinfo.c
--- a/arch/ia64/kernel/patch.c
+++ b/arch/ia64/kernel/patch.c
@@ -0,0 +1,189 @@
+/*
+ * Instruction-patching support.
+ *
+ * Copyright (C) 2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#include <linux/init.h>
+#include <linux/string.h>
+
+#include <asm/patch.h>
+#include <asm/processor.h>
+#include <asm/sections.h>
+#include <asm/system.h>
+#include <asm/unistd.h>
+
+/*
+ * This was adapted from code written by Tony Luck:
+ *
+ * The 64-bit value in a "movl reg=value" is scattered between the two words of the bundle
+ * like this:
+ *
+ * 6  6         5         4         3         2         1
+ * 3210987654321098765432109876543210987654321098765432109876543210
+ * ABBBBBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCDEEEEEFFFFFFFFFGGGGGGG
+ *
+ * CCCCCCCCCCCCCCCCCCxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+ * xxxxAFFFFFFFFFEEEEEDxGGGGGGGxxxxxxxxxxxxxBBBBBBBBBBBBBBBBBBBBBBB
+ */
+static u64
+get_imm64 (u64 insn_addr)
+{
+	u64 *p = (u64 *) (insn_addr & -16);	/* mask out slot number */
+
+	return ( (p[1] & 0x0800000000000000UL) << 4)  | /*A*/
+		((p[1] & 0x00000000007fffffUL) << 40) | /*B*/
+		((p[0] & 0xffffc00000000000UL) >> 24) | /*C*/
+		((p[1] & 0x0000100000000000UL) >> 23) | /*D*/
+		((p[1] & 0x0003e00000000000UL) >> 29) | /*E*/
+		((p[1] & 0x07fc000000000000UL) >> 43) | /*F*/
+		((p[1] & 0x000007f000000000UL) >> 36);  /*G*/
+}
+
+/* Patch instruction with "val" where "mask" has 1 bits. */
+void
+ia64_patch (u64 insn_addr, u64 mask, u64 val)
+{
+	u64 m0, m1, v0, v1, b0, b1, *b = (u64 *) (insn_addr & -16);
+#	define insn_mask ((1UL << 41) - 1)
+	unsigned long shift;
+
+	b0 = b[0]; b1 = b[1];
+	shift = 5 + 41 * (insn_addr % 16); /* 5 bits of template, then 3 x 41-bit instructions */
+	if (shift >= 64) {
+		m1 = mask << (shift - 64);
+		v1 = val << (shift - 64);
+	} else {
+		m0 = mask << shift; m1 = mask >> (64 - shift);
+		v0 = val  << shift; v1 = val >> (64 - shift);
+		b[0] = (b0 & ~m0) | (v0 & m0);
+	}
+	b[1] = (b1 & ~m1) | (v1 & m1);
+}
+
+void
+ia64_patch_imm64 (u64 insn_addr, u64 val)
+{
+	ia64_patch(insn_addr,
+		   0x01fffefe000UL, (  ((val & 0x8000000000000000UL) >> 27) /* bit 63 -> 36 */
+				     | ((val & 0x0000000000200000UL) <<  0) /* bit 21 -> 21 */
+				     | ((val & 0x00000000001f0000UL) <<  6) /* bit 16 -> 22 */
+				     | ((val & 0x000000000000ff80UL) << 20) /* bit  7 -> 27 */
+				     | ((val & 0x000000000000007fUL) << 13) /* bit  0 -> 13 */));
+	ia64_patch(insn_addr - 1, 0x1ffffffffffUL, val >> 22);
+}
+
+void
+ia64_patch_imm60 (u64 insn_addr, u64 val)
+{
+	ia64_patch(insn_addr,
+		   0x011ffffe000UL, (  ((val & 0x0800000000000000UL) >> 23) /* bit 59 -> 36 */
+				     | ((val & 0x00000000000fffffUL) << 13) /* bit  0 -> 13 */));
+	ia64_patch(insn_addr - 1, 0x1fffffffffcUL, val >> 18);
+}
+
+/*
+ * We need sometimes to load the physical address of a kernel
+ * object.  Often we can convert the virtual address to physical
+ * at execution time, but sometimes (either for performance reasons
+ * or during error recovery) we cannot to this.  Patch the marked
+ * bundles to load the physical address.
+ */
+void __init
+ia64_patch_vtop (unsigned long start, unsigned long end)
+{
+	s32 *offp = (s32 *) start;
+	u64 ip;
+
+	while (offp < (s32 *) end) {
+		ip = (u64) offp + *offp;
+
+		/* replace virtual address with corresponding physical address: */
+		ia64_patch_imm64(ip, ia64_tpa(get_imm64(ip)));
+		ia64_fc((void *) ip);
+		++offp;
+	}
+	ia64_sync_i();
+	ia64_srlz_i();
+}
+
+void
+ia64_patch_mckinley_e9 (unsigned long start, unsigned long end)
+{
+	static int first_time = 1;
+	int need_workaround;
+	s32 *offp = (s32 *) start;
+	u64 *wp;
+
+	need_workaround = (local_cpu_data->family == 0x1f && local_cpu_data->model == 0);
+
+	if (first_time) {
+		first_time = 0;
+		if (need_workaround)
+			printk(KERN_INFO "Leaving McKinley Errata 9 workaround enabled\n");
+		else
+			printk(KERN_INFO "McKinley Errata 9 workaround not needed; "
+			       "disabling it\n");
+	}
+	if (need_workaround)
+		return;
+
+	while (offp < (s32 *) end) {
+		wp = (u64 *) ia64_imva((char *) offp + *offp);
+		wp[0] = 0x0000000100000000UL; /* nop.m 0; nop.i 0; nop.i 0 */
+		wp[1] = 0x0004000000000200UL;
+		wp[2] = 0x0000000100000011UL; /* nop.m 0; nop.i 0; br.ret.sptk.many b6 */
+		wp[3] = 0x0084006880000200UL;
+		ia64_fc(wp); ia64_fc(wp + 2);
+		++offp;
+	}
+	ia64_sync_i();
+	ia64_srlz_i();
+}
+
+static void
+patch_fsyscall_table (unsigned long start, unsigned long end)
+{
+	extern unsigned long fsyscall_table[NR_syscalls];
+	s32 *offp = (s32 *) start;
+	u64 ip;
+
+	while (offp < (s32 *) end) {
+		ip = (u64) ia64_imva((char *) offp + *offp);
+		ia64_patch_imm64(ip, (u64) fsyscall_table);
+		ia64_fc((void *) ip);
+		++offp;
+	}
+	ia64_sync_i();
+	ia64_srlz_i();
+}
+
+static void
+patch_brl_fsys_bubble_down (unsigned long start, unsigned long end)
+{
+	extern char fsys_bubble_down[];
+	s32 *offp = (s32 *) start;
+	u64 ip;
+
+	while (offp < (s32 *) end) {
+		ip = (u64) offp + *offp;
+		ia64_patch_imm60((u64) ia64_imva((void *) ip),
+				 (u64) (fsys_bubble_down - (ip & -16)) / 16);
+		ia64_fc((void *) ip);
+		++offp;
+	}
+	ia64_sync_i();
+	ia64_srlz_i();
+}
+
+void
+ia64_patch_gate (void)
+{
+#	define START(name)	((unsigned long) __start_gate_##name##_patchlist)
+#	define END(name)	((unsigned long)__end_gate_##name##_patchlist)
+
+	patch_fsyscall_table(START(fsyscall), END(fsyscall));
+	patch_brl_fsys_bubble_down(START(brl_fsys_bubble_down), END(brl_fsys_bubble_down));
+	ia64_patch_vtop(START(vtop), END(vtop));
+	ia64_patch_mckinley_e9(START(mckinley_e9), END(mckinley_e9));
+}
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
--- a/arch/ia64/kernel/perfmon_default_smpl.c
+++ b/arch/ia64/kernel/perfmon_default_smpl.c
@@ -0,0 +1,306 @@
+/*
+ * Copyright (C) 2002-2003 Hewlett-Packard Co
+ *               Stephane Eranian <eranian@hpl.hp.com>
+ *
+ * This file implements the default sampling buffer format
+ * for the Linux/ia64 perfmon-2 subsystem.
+ */
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/config.h>
+#include <linux/init.h>
+#include <asm/delay.h>
+#include <linux/smp.h>
+
+#include <asm/perfmon.h>
+#include <asm/perfmon_default_smpl.h>
+
+MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
+MODULE_DESCRIPTION("perfmon default sampling format");
+MODULE_LICENSE("GPL");
+
+MODULE_PARM(debug, "i");
+MODULE_PARM_DESC(debug, "debug");
+
+MODULE_PARM(debug_ovfl, "i");
+MODULE_PARM_DESC(debug_ovfl, "debug ovfl");
+
+
+#define DEFAULT_DEBUG 1
+
+#ifdef DEFAULT_DEBUG
+#define DPRINT(a) \
+	do { \
+		if (unlikely(debug >0)) { printk("%s.%d: CPU%d ", __FUNCTION__, __LINE__, smp_processor_id()); printk a; } \
+	} while (0)
+
+#define DPRINT_ovfl(a) \
+	do { \
+		if (unlikely(debug_ovfl >0)) { printk("%s.%d: CPU%d ", __FUNCTION__, __LINE__, smp_processor_id()); printk a; } \
+	} while (0)
+
+#else
+#define DPRINT(a)
+#define DPRINT_ovfl(a)
+#endif
+
+static int debug, debug_ovfl;
+
+static int
+default_validate(struct task_struct *task, unsigned int flags, int cpu, void *data)
+{
+	pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t*)data;
+	int ret = 0;
+
+	if (data == NULL) {
+		DPRINT(("[%d] no argument passed\n", task->pid));
+		return -EINVAL;
+	}
+
+	DPRINT(("[%d] validate flags=0x%x CPU%d\n", task->pid, flags, cpu));
+
+	/*
+	 * must hold at least the buffer header + one minimally sized entry
+	 */
+	if (arg->buf_size < PFM_DEFAULT_SMPL_MIN_BUF_SIZE) return -EINVAL;
+
+	DPRINT(("buf_size=%lu\n", arg->buf_size));
+
+	return ret;
+}
+
+static int
+default_get_size(struct task_struct *task, unsigned int flags, int cpu, void *data, unsigned long *size)
+{
+	pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t *)data;
+
+	/*
+	 * size has been validated in default_validate
+	 */
+	*size = arg->buf_size;
+
+	return 0;
+}
+
+static int
+default_init(struct task_struct *task, void *buf, unsigned int flags, int cpu, void *data)
+{
+	pfm_default_smpl_hdr_t *hdr;
+	pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t *)data;
+
+	hdr = (pfm_default_smpl_hdr_t *)buf;
+
+	hdr->hdr_version      = PFM_DEFAULT_SMPL_VERSION;
+	hdr->hdr_buf_size     = arg->buf_size;
+	hdr->hdr_cur_offs     = sizeof(*hdr);
+	hdr->hdr_overflows    = 0UL;
+	hdr->hdr_count        = 0UL;
+
+	DPRINT(("[%d] buffer=%p buf_size=%lu hdr_size=%lu hdr_version=%u cur_offs=%lu\n",
+		task->pid,
+		buf,
+		hdr->hdr_buf_size,
+		sizeof(*hdr),
+		hdr->hdr_version,
+		hdr->hdr_cur_offs));
+
+	return 0;
+}
+
+static int
+default_handler(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg, struct pt_regs *regs, unsigned long stamp)
+{
+	pfm_default_smpl_hdr_t *hdr;
+	pfm_default_smpl_entry_t *ent;
+	void *cur, *last;
+	unsigned long *e, entry_size;
+	unsigned int npmds, i;
+	unsigned char ovfl_pmd;
+	unsigned char ovfl_notify;
+
+	if (unlikely(buf == NULL || arg == NULL|| regs == NULL || task == NULL)) {
+		DPRINT(("[%d] invalid arguments buf=%p arg=%p\n", task->pid, buf, arg));
+		return -EINVAL;
+	}
+
+	hdr         = (pfm_default_smpl_hdr_t *)buf;
+	cur         = buf+hdr->hdr_cur_offs;
+	last        = buf+hdr->hdr_buf_size;
+	ovfl_pmd    = arg->ovfl_pmd;
+	ovfl_notify = arg->ovfl_notify;
+
+	/*
+	 * precheck for sanity
+	 */
+	if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE) goto full;
+
+	npmds = hweight64(arg->smpl_pmds[0]);
+
+	ent = (pfm_default_smpl_entry_t *)cur;
+
+	prefetch(arg->smpl_pmds_values);
+
+	entry_size = sizeof(*ent) + (npmds << 3);
+
+	/* position for first pmd */
+	e = (unsigned long *)(ent+1);
+
+	hdr->hdr_count++;
+
+	DPRINT_ovfl(("[%d] count=%lu cur=%p last=%p free_bytes=%lu ovfl_pmd=%d ovfl_notify=%d npmds=%u\n",
+			task->pid,
+			hdr->hdr_count,
+			cur, last,
+			last-cur,
+			ovfl_pmd,
+			ovfl_notify, npmds));
+
+	/*
+	 * current = task running at the time of the overflow.
+	 *
+	 * per-task mode:
+	 * 	- this is ususally the task being monitored.
+	 * 	  Under certain conditions, it might be a different task
+	 *
+	 * system-wide:
+	 * 	- this is not necessarily the task controlling the session
+	 */
+	ent->pid            = current->pid;
+	ent->ovfl_pmd  	    = ovfl_pmd;
+	ent->last_reset_val = arg->pmd_last_reset; //pmd[0].reg_last_reset_val;
+
+	/*
+	 * where did the fault happen (includes slot number)
+	 */
+	ent->ip = regs->cr_iip | ((regs->cr_ipsr >> 41) & 0x3);
+
+	ent->tstamp    = stamp;
+	ent->cpu       = smp_processor_id();
+	ent->set       = arg->active_set;
+	ent->tgid      = current->tgid;
+
+	/*
+	 * selectively store PMDs in increasing index number
+	 */
+	if (npmds) {
+		unsigned long *val = arg->smpl_pmds_values;
+		for(i=0; i < npmds; i++) {
+			*e++ = *val++;
+		}
+	}
+
+	/*
+	 * update position for next entry
+	 */
+	hdr->hdr_cur_offs += entry_size;
+	cur               += entry_size;
+
+	/*
+	 * post check to avoid losing the last sample
+	 */
+	if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE) goto full;
+
+	/*
+	 * keep same ovfl_pmds, ovfl_notify
+	 */
+	arg->ovfl_ctrl.bits.notify_user     = 0;
+	arg->ovfl_ctrl.bits.block_task      = 0;
+	arg->ovfl_ctrl.bits.mask_monitoring = 0;
+	arg->ovfl_ctrl.bits.reset_ovfl_pmds = 1; /* reset before returning from interrupt handler */
+
+	return 0;
+full:
+	DPRINT_ovfl(("sampling buffer full free=%lu, count=%lu, ovfl_notify=%d\n", last-cur, hdr->hdr_count, ovfl_notify));
+
+	/*
+	 * increment number of buffer overflow.
+	 * important to detect duplicate set of samples.
+	 */
+	hdr->hdr_overflows++;
+
+	/*
+	 * if no notification requested, then we saturate the buffer
+	 */
+	if (ovfl_notify == 0) {
+		arg->ovfl_ctrl.bits.notify_user     = 0;
+		arg->ovfl_ctrl.bits.block_task      = 0;
+		arg->ovfl_ctrl.bits.mask_monitoring = 1;
+		arg->ovfl_ctrl.bits.reset_ovfl_pmds = 0;
+	} else {
+		arg->ovfl_ctrl.bits.notify_user     = 1;
+		arg->ovfl_ctrl.bits.block_task      = 1; /* ignored for non-blocking context */
+		arg->ovfl_ctrl.bits.mask_monitoring = 1;
+		arg->ovfl_ctrl.bits.reset_ovfl_pmds = 0; /* no reset now */
+	}
+	return -1; /* we are full, sorry */
+}
+
+static int
+default_restart(struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs)
+{
+	pfm_default_smpl_hdr_t *hdr;
+
+	hdr = (pfm_default_smpl_hdr_t *)buf;
+
+	hdr->hdr_count    = 0UL;
+	hdr->hdr_cur_offs = sizeof(*hdr);
+
+	ctrl->bits.mask_monitoring = 0;
+	ctrl->bits.reset_ovfl_pmds = 1; /* uses long-reset values */
+
+	return 0;
+}
+
+static int
+default_exit(struct task_struct *task, void *buf, struct pt_regs *regs)
+{
+	DPRINT(("[%d] exit(%p)\n", task->pid, buf));
+	return 0;
+}
+
+static pfm_buffer_fmt_t default_fmt={
+ 	.fmt_name 	    = "default_format",
+ 	.fmt_uuid	    = PFM_DEFAULT_SMPL_UUID,
+ 	.fmt_arg_size	    = sizeof(pfm_default_smpl_arg_t),
+ 	.fmt_validate	    = default_validate,
+ 	.fmt_getsize	    = default_get_size,
+ 	.fmt_init	    = default_init,
+ 	.fmt_handler	    = default_handler,
+ 	.fmt_restart	    = default_restart,
+ 	.fmt_restart_active = default_restart,
+ 	.fmt_exit	    = default_exit,
+};
+
+static int __init
+pfm_default_smpl_init_module(void)
+{
+	int ret;
+
+	ret = pfm_register_buffer_fmt(&default_fmt);
+	if (ret == 0) {
+		printk("perfmon_default_smpl: %s v%u.%u registered\n",
+			default_fmt.fmt_name,
+			PFM_DEFAULT_SMPL_VERSION_MAJ,
+			PFM_DEFAULT_SMPL_VERSION_MIN);
+	} else {
+		printk("perfmon_default_smpl: %s cannot register ret=%d\n",
+			default_fmt.fmt_name,
+			ret);
+	}
+
+	return ret;
+}
+
+static void __exit
+pfm_default_smpl_cleanup_module(void)
+{
+	int ret;
+	ret = pfm_unregister_buffer_fmt(default_fmt.fmt_uuid);
+
+	printk("perfmon_default_smpl: unregister %s=%d\n", default_fmt.fmt_name, ret);
+}
+
+module_init(pfm_default_smpl_init_module);
+module_exit(pfm_default_smpl_cleanup_module);
+
--- a/arch/ia64/kernel/perfmon_generic.h
+++ b/arch/ia64/kernel/perfmon_generic.h
@@ -0,0 +1,45 @@
+/*
+ * This file contains the generic PMU register description tables
+ * and pmc checker used by perfmon.c.
+ *
+ * Copyright (C) 2002-2003  Hewlett Packard Co
+ *               Stephane Eranian <eranian@hpl.hp.com>
+ */
+
+static pfm_reg_desc_t pfm_gen_pmc_desc[PMU_MAX_PMCS]={
+/* pmc0  */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc1  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc2  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc3  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc4  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc5  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc6  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc7  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+	    { PFM_REG_END     , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
+};
+
+static pfm_reg_desc_t pfm_gen_pmd_desc[PMU_MAX_PMDS]={
+/* pmd0  */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}},
+/* pmd1  */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}},
+/* pmd2  */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}},
+/* pmd3  */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}},
+/* pmd4  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}},
+/* pmd5  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}},
+/* pmd6  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}},
+/* pmd7  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}},
+	    { PFM_REG_END     , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
+};
+
+/*
+ * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
+ */
+static pmu_config_t pmu_conf_gen={
+	.pmu_name   = "Generic",
+	.pmu_family = 0xff, /* any */
+	.ovfl_val   = (1UL << 32) - 1,
+	.num_ibrs   = 0, /* does not use */
+	.num_dbrs   = 0, /* does not use */
+	.pmd_desc   = pfm_gen_pmd_desc,
+	.pmc_desc   = pfm_gen_pmc_desc
+};
+
--- a/arch/ia64/kernel/perfmon_itanium.h
+++ b/arch/ia64/kernel/perfmon_itanium.h
@@ -0,0 +1,115 @@
+/*
+ * This file contains the Itanium PMU register description tables
+ * and pmc checker used by perfmon.c.
+ *
+ * Copyright (C) 2002-2003  Hewlett Packard Co
+ *               Stephane Eranian <eranian@hpl.hp.com>
+ */
+static int pfm_ita_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
+
+static pfm_reg_desc_t pfm_ita_pmc_desc[PMU_MAX_PMCS]={
+/* pmc0  */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc1  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc2  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc3  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc4  */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc5  */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc6  */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc7  */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc8  */ { PFM_REG_CONFIG  , 0, 0xf00000003ffffff8UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc9  */ { PFM_REG_CONFIG  , 0, 0xf00000003ffffff8UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc10 */ { PFM_REG_MONITOR , 6, 0x0UL, -1UL, NULL, NULL, {RDEP(0)|RDEP(1),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc11 */ { PFM_REG_MONITOR , 6, 0x0000000010000000UL, -1UL, NULL, pfm_ita_pmc_check, {RDEP(2)|RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc12 */ { PFM_REG_MONITOR , 6, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc13 */ { PFM_REG_CONFIG  , 0, 0x0003ffff00000001UL, -1UL, NULL, pfm_ita_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+	    { PFM_REG_END     , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
+};
+
+static pfm_reg_desc_t pfm_ita_pmd_desc[PMU_MAX_PMDS]={
+/* pmd0  */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(1),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
+/* pmd1  */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(0),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
+/* pmd2  */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
+/* pmd3  */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
+/* pmd4  */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}},
+/* pmd5  */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}},
+/* pmd6  */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}},
+/* pmd7  */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}},
+/* pmd8  */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd9  */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd10 */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd11 */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd12 */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd13 */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd14 */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd15 */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd16 */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd17 */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(3),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
+	    { PFM_REG_END     , 0, 0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
+};
+
+static int
+pfm_ita_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs)
+{
+	int ret;
+	int is_loaded;
+
+	/* sanitfy check */
+	if (ctx == NULL) return -EINVAL;
+
+	is_loaded = ctx->ctx_state == PFM_CTX_LOADED || ctx->ctx_state == PFM_CTX_MASKED;
+
+	/*
+	 * we must clear the (instruction) debug registers if pmc13.ta bit is cleared
+	 * before they are written (fl_using_dbreg==0) to avoid picking up stale information.
+	 */
+	if (cnum == 13 && is_loaded && ((*val & 0x1) == 0UL) && ctx->ctx_fl_using_dbreg == 0) {
+
+		DPRINT(("pmc[%d]=0x%lx has active pmc13.ta cleared, clearing ibr\n", cnum, *val));
+
+		/* don't mix debug with perfmon */
+		if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
+
+		/*
+		 * a count of 0 will mark the debug registers as in use and also
+		 * ensure that they are properly cleared.
+		 */
+		ret = pfm_write_ibr_dbr(1, ctx, NULL, 0, regs);
+		if (ret) return ret;
+	}
+
+	/*
+	 * we must clear the (data) debug registers if pmc11.pt bit is cleared
+	 * before they are written (fl_using_dbreg==0) to avoid picking up stale information.
+	 */
+	if (cnum == 11 && is_loaded && ((*val >> 28)& 0x1) == 0 && ctx->ctx_fl_using_dbreg == 0) {
+
+		DPRINT(("pmc[%d]=0x%lx has active pmc11.pt cleared, clearing dbr\n", cnum, *val));
+
+		/* don't mix debug with perfmon */
+		if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
+
+		/*
+		 * a count of 0 will mark the debug registers as in use and also
+		 * ensure that they are properly cleared.
+		 */
+		ret = pfm_write_ibr_dbr(0, ctx, NULL, 0, regs);
+		if (ret) return ret;
+	}
+	return 0;
+}
+
+/*
+ * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
+ */
+static pmu_config_t pmu_conf_ita={
+	.pmu_name      = "Itanium",
+	.pmu_family    = 0x7,
+	.ovfl_val      = (1UL << 32) - 1,
+	.pmd_desc      = pfm_ita_pmd_desc,
+	.pmc_desc      = pfm_ita_pmc_desc,
+	.num_ibrs      = 8,
+	.num_dbrs      = 8,
+	.use_rr_dbregs = 1, /* debug register are use for range retrictions */
+};
+
+
--- a/arch/ia64/kernel/perfmon_mckinley.h
+++ b/arch/ia64/kernel/perfmon_mckinley.h
@@ -0,0 +1,187 @@
+/*
+ * This file contains the McKinley PMU register description tables
+ * and pmc checker used by perfmon.c.
+ *
+ * Copyright (C) 2002-2003  Hewlett Packard Co
+ *               Stephane Eranian <eranian@hpl.hp.com>
+ */
+static int pfm_mck_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
+
+static pfm_reg_desc_t pfm_mck_pmc_desc[PMU_MAX_PMCS]={
+/* pmc0  */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc1  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc2  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc3  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc4  */ { PFM_REG_COUNTING, 6, 0x0000000000800000UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc5  */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL,  pfm_mck_pmc_check, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc6  */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL,  pfm_mck_pmc_check, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc7  */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL,  pfm_mck_pmc_check, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc8  */ { PFM_REG_CONFIG  , 0, 0xffffffff3fffffffUL, 0xffffffff3ffffffbUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc9  */ { PFM_REG_CONFIG  , 0, 0xffffffff3ffffffcUL, 0xffffffff3ffffffbUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc10 */ { PFM_REG_MONITOR , 4, 0x0UL, 0xffffUL, NULL, pfm_mck_pmc_check, {RDEP(0)|RDEP(1),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc11 */ { PFM_REG_MONITOR , 6, 0x0UL, 0x30f01cf, NULL,  pfm_mck_pmc_check, {RDEP(2)|RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc12 */ { PFM_REG_MONITOR , 6, 0x0UL, 0xffffUL, NULL,  pfm_mck_pmc_check, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc13 */ { PFM_REG_CONFIG  , 0, 0x00002078fefefefeUL, 0x1e00018181818UL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc14 */ { PFM_REG_CONFIG  , 0, 0x0db60db60db60db6UL, 0x2492UL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc15 */ { PFM_REG_CONFIG  , 0, 0x00000000fffffff0UL, 0xfUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+	    { PFM_REG_END     , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
+};
+
+static pfm_reg_desc_t pfm_mck_pmd_desc[PMU_MAX_PMDS]={
+/* pmd0  */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(1),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
+/* pmd1  */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(0),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
+/* pmd2  */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
+/* pmd3  */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
+/* pmd4  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}},
+/* pmd5  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}},
+/* pmd6  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}},
+/* pmd7  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}},
+/* pmd8  */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd9  */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd10 */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd11 */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd12 */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd13 */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd14 */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd15 */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd16 */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd17 */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(3),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
+	    { PFM_REG_END     , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
+};
+
+/*
+ * PMC reserved fields must have their power-up values preserved
+ */
+static int
+pfm_mck_reserved(unsigned int cnum, unsigned long *val, struct pt_regs *regs)
+{
+	unsigned long tmp1, tmp2, ival = *val;
+
+	/* remove reserved areas from user value */
+	tmp1 = ival & PMC_RSVD_MASK(cnum);
+
+	/* get reserved fields values */
+	tmp2 = PMC_DFL_VAL(cnum) & ~PMC_RSVD_MASK(cnum);
+
+	*val = tmp1 | tmp2;
+
+	DPRINT(("pmc[%d]=0x%lx, mask=0x%lx, reset=0x%lx, val=0x%lx\n",
+		  cnum, ival, PMC_RSVD_MASK(cnum), PMC_DFL_VAL(cnum), *val));
+	return 0;
+}
+
+/*
+ * task can be NULL if the context is unloaded
+ */
+static int
+pfm_mck_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs)
+{
+	int ret = 0, check_case1 = 0;
+	unsigned long val8 = 0, val14 = 0, val13 = 0;
+	int is_loaded;
+
+	/* first preserve the reserved fields */
+	pfm_mck_reserved(cnum, val, regs);
+
+	/* sanitfy check */
+	if (ctx == NULL) return -EINVAL;
+
+	is_loaded = ctx->ctx_state == PFM_CTX_LOADED || ctx->ctx_state == PFM_CTX_MASKED;
+
+	/*
+	 * we must clear the debug registers if pmc13 has a value which enable
+	 * memory pipeline event constraints. In this case we need to clear the
+	 * the debug registers if they have not yet been accessed. This is required
+	 * to avoid picking stale state.
+	 * PMC13 is "active" if:
+	 * 	one of the pmc13.cfg_dbrpXX field is different from 0x3
+	 * AND
+	 * 	at the corresponding pmc13.ena_dbrpXX is set.
+	 */
+	DPRINT(("cnum=%u val=0x%lx, using_dbreg=%d loaded=%d\n", cnum, *val, ctx->ctx_fl_using_dbreg, is_loaded));
+
+	if (cnum == 13 && is_loaded
+	    && (*val & 0x1e00000000000UL) && (*val & 0x18181818UL) != 0x18181818UL && ctx->ctx_fl_using_dbreg == 0) {
+
+		DPRINT(("pmc[%d]=0x%lx has active pmc13 settings, clearing dbr\n", cnum, *val));
+
+		/* don't mix debug with perfmon */
+		if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
+
+		/*
+		 * a count of 0 will mark the debug registers as in use and also
+		 * ensure that they are properly cleared.
+		 */
+		ret = pfm_write_ibr_dbr(PFM_DATA_RR, ctx, NULL, 0, regs);
+		if (ret) return ret;
+	}
+	/*
+	 * we must clear the (instruction) debug registers if any pmc14.ibrpX bit is enabled
+	 * before they are (fl_using_dbreg==0) to avoid picking up stale information.
+	 */
+	if (cnum == 14 && is_loaded && ((*val & 0x2222UL) != 0x2222UL) && ctx->ctx_fl_using_dbreg == 0) {
+
+		DPRINT(("pmc[%d]=0x%lx has active pmc14 settings, clearing ibr\n", cnum, *val));
+
+		/* don't mix debug with perfmon */
+		if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
+
+		/*
+		 * a count of 0 will mark the debug registers as in use and also
+		 * ensure that they are properly cleared.
+		 */
+		ret = pfm_write_ibr_dbr(PFM_CODE_RR, ctx, NULL, 0, regs);
+		if (ret) return ret;
+
+	}
+
+	switch(cnum) {
+		case  4: *val |= 1UL << 23; /* force power enable bit */
+			 break;
+		case  8: val8 = *val;
+			 val13 = ctx->ctx_pmcs[13];
+			 val14 = ctx->ctx_pmcs[14];
+			 check_case1 = 1;
+			 break;
+		case 13: val8  = ctx->ctx_pmcs[8];
+			 val13 = *val;
+			 val14 = ctx->ctx_pmcs[14];
+			 check_case1 = 1;
+			 break;
+		case 14: val8  = ctx->ctx_pmcs[8];
+			 val13 = ctx->ctx_pmcs[13];
+			 val14 = *val;
+			 check_case1 = 1;
+			 break;
+	}
+	/* check illegal configuration which can produce inconsistencies in tagging
+	 * i-side events in L1D and L2 caches
+	 */
+	if (check_case1) {
+		ret =   ((val13 >> 45) & 0xf) == 0
+		   && ((val8 & 0x1) == 0)
+		   && ((((val14>>1) & 0x3) == 0x2 || ((val14>>1) & 0x3) == 0x0)
+		       ||(((val14>>4) & 0x3) == 0x2 || ((val14>>4) & 0x3) == 0x0));
+
+		if (ret) DPRINT((KERN_DEBUG "perfmon: failure check_case1\n"));
+	}
+
+	return ret ? -EINVAL : 0;
+}
+
+/*
+ * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
+ */
+static pmu_config_t pmu_conf_mck={
+	.pmu_name      = "Itanium 2",
+	.pmu_family    = 0x1f,
+	.flags	       = PFM_PMU_IRQ_RESEND,
+	.ovfl_val      = (1UL << 47) - 1,
+	.pmd_desc      = pfm_mck_pmd_desc,
+	.pmc_desc      = pfm_mck_pmc_desc,
+	.num_ibrs       = 8,
+	.num_dbrs       = 8,
+	.use_rr_dbregs = 1 /* debug register are use for range retrictions */
+};
+
+
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -0,0 +1,800 @@
+/*
+ * Architecture-specific setup.
+ *
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#define __KERNEL_SYSCALLS__	/* see <asm/unistd.h> */
+#include <linux/config.h>
+
+#include <linux/cpu.h>
+#include <linux/pm.h>
+#include <linux/elf.h>
+#include <linux/errno.h>
+#include <linux/kallsyms.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+#include <linux/personality.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/stddef.h>
+#include <linux/thread_info.h>
+#include <linux/unistd.h>
+#include <linux/efi.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+
+#include <asm/cpu.h>
+#include <asm/delay.h>
+#include <asm/elf.h>
+#include <asm/ia32.h>
+#include <asm/irq.h>
+#include <asm/pgalloc.h>
+#include <asm/processor.h>
+#include <asm/sal.h>
+#include <asm/tlbflush.h>
+#include <asm/uaccess.h>
+#include <asm/unwind.h>
+#include <asm/user.h>
+
+#include "entry.h"
+
+#ifdef CONFIG_PERFMON
+# include <asm/perfmon.h>
+#endif
+
+#include "sigframe.h"
+
+void (*ia64_mark_idle)(int);
+static cpumask_t cpu_idle_map;
+
+unsigned long boot_option_idle_override = 0;
+EXPORT_SYMBOL(boot_option_idle_override);
+
+void
+ia64_do_show_stack (struct unw_frame_info *info, void *arg)
+{
+	unsigned long ip, sp, bsp;
+	char buf[128];			/* don't make it so big that it overflows the stack! */
+
+	printk("\nCall Trace:\n");
+	do {
+		unw_get_ip(info, &ip);
+		if (ip == 0)
+			break;
+
+		unw_get_sp(info, &sp);
+		unw_get_bsp(info, &bsp);
+		snprintf(buf, sizeof(buf),
+			 " [<%016lx>] %%s\n"
+			 "                                sp=%016lx bsp=%016lx\n",
+			 ip, sp, bsp);
+		print_symbol(buf, ip);
+	} while (unw_unwind(info) >= 0);
+}
+
+void
+show_stack (struct task_struct *task, unsigned long *sp)
+{
+	if (!task)
+		unw_init_running(ia64_do_show_stack, NULL);
+	else {
+		struct unw_frame_info info;
+
+		unw_init_from_blocked_task(&info, task);
+		ia64_do_show_stack(&info, NULL);
+	}
+}
+
+void
+dump_stack (void)
+{
+	show_stack(NULL, NULL);
+}
+
+EXPORT_SYMBOL(dump_stack);
+
+void
+show_regs (struct pt_regs *regs)
+{
+	unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri;
+
+	print_modules();
+	printk("\nPid: %d, CPU %d, comm: %20s\n", current->pid, smp_processor_id(), current->comm);
+	printk("psr : %016lx ifs : %016lx ip  : [<%016lx>]    %s\n",
+	       regs->cr_ipsr, regs->cr_ifs, ip, print_tainted());
+	print_symbol("ip is at %s\n", ip);
+	printk("unat: %016lx pfs : %016lx rsc : %016lx\n",
+	       regs->ar_unat, regs->ar_pfs, regs->ar_rsc);
+	printk("rnat: %016lx bsps: %016lx pr  : %016lx\n",
+	       regs->ar_rnat, regs->ar_bspstore, regs->pr);
+	printk("ldrs: %016lx ccv : %016lx fpsr: %016lx\n",
+	       regs->loadrs, regs->ar_ccv, regs->ar_fpsr);
+	printk("csd : %016lx ssd : %016lx\n", regs->ar_csd, regs->ar_ssd);
+	printk("b0  : %016lx b6  : %016lx b7  : %016lx\n", regs->b0, regs->b6, regs->b7);
+	printk("f6  : %05lx%016lx f7  : %05lx%016lx\n",
+	       regs->f6.u.bits[1], regs->f6.u.bits[0],
+	       regs->f7.u.bits[1], regs->f7.u.bits[0]);
+	printk("f8  : %05lx%016lx f9  : %05lx%016lx\n",
+	       regs->f8.u.bits[1], regs->f8.u.bits[0],
+	       regs->f9.u.bits[1], regs->f9.u.bits[0]);
+	printk("f10 : %05lx%016lx f11 : %05lx%016lx\n",
+	       regs->f10.u.bits[1], regs->f10.u.bits[0],
+	       regs->f11.u.bits[1], regs->f11.u.bits[0]);
+
+	printk("r1  : %016lx r2  : %016lx r3  : %016lx\n", regs->r1, regs->r2, regs->r3);
+	printk("r8  : %016lx r9  : %016lx r10 : %016lx\n", regs->r8, regs->r9, regs->r10);
+	printk("r11 : %016lx r12 : %016lx r13 : %016lx\n", regs->r11, regs->r12, regs->r13);
+	printk("r14 : %016lx r15 : %016lx r16 : %016lx\n", regs->r14, regs->r15, regs->r16);
+	printk("r17 : %016lx r18 : %016lx r19 : %016lx\n", regs->r17, regs->r18, regs->r19);
+	printk("r20 : %016lx r21 : %016lx r22 : %016lx\n", regs->r20, regs->r21, regs->r22);
+	printk("r23 : %016lx r24 : %016lx r25 : %016lx\n", regs->r23, regs->r24, regs->r25);
+	printk("r26 : %016lx r27 : %016lx r28 : %016lx\n", regs->r26, regs->r27, regs->r28);
+	printk("r29 : %016lx r30 : %016lx r31 : %016lx\n", regs->r29, regs->r30, regs->r31);
+
+	if (user_mode(regs)) {
+		/* print the stacked registers */
+		unsigned long val, *bsp, ndirty;
+		int i, sof, is_nat = 0;
+
+		sof = regs->cr_ifs & 0x7f;	/* size of frame */
+		ndirty = (regs->loadrs >> 19);
+		bsp = ia64_rse_skip_regs((unsigned long *) regs->ar_bspstore, ndirty);
+		for (i = 0; i < sof; ++i) {
+			get_user(val, (unsigned long __user *) ia64_rse_skip_regs(bsp, i));
+			printk("r%-3u:%c%016lx%s", 32 + i, is_nat ? '*' : ' ', val,
+			       ((i == sof - 1) || (i % 3) == 2) ? "\n" : " ");
+		}
+	} else
+		show_stack(NULL, NULL);
+}
+
+void
+do_notify_resume_user (sigset_t *oldset, struct sigscratch *scr, long in_syscall)
+{
+	if (fsys_mode(current, &scr->pt)) {
+		/* defer signal-handling etc. until we return to privilege-level 0.  */
+		if (!ia64_psr(&scr->pt)->lp)
+			ia64_psr(&scr->pt)->lp = 1;
+		return;
+	}
+
+#ifdef CONFIG_PERFMON
+	if (current->thread.pfm_needs_checking)
+		pfm_handle_work();
+#endif
+
+	/* deal with pending signal delivery */
+	if (test_thread_flag(TIF_SIGPENDING))
+		ia64_do_signal(oldset, scr, in_syscall);
+}
+
+static int pal_halt = 1;
+static int __init nohalt_setup(char * str)
+{
+	pal_halt = 0;
+	return 1;
+}
+__setup("nohalt", nohalt_setup);
+
+/*
+ * We use this if we don't have any better idle routine..
+ */
+void
+default_idle (void)
+{
+	unsigned long pmu_active = ia64_getreg(_IA64_REG_PSR) & (IA64_PSR_PP | IA64_PSR_UP);
+
+	while (!need_resched())
+		if (pal_halt && !pmu_active)
+			safe_halt();
+		else
+			cpu_relax();
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+/* We don't actually take CPU down, just spin without interrupts. */
+static inline void play_dead(void)
+{
+	extern void ia64_cpu_local_tick (void);
+	/* Ack it */
+	__get_cpu_var(cpu_state) = CPU_DEAD;
+
+	/* We shouldn't have to disable interrupts while dead, but
+	 * some interrupts just don't seem to go away, and this makes
+	 * it "work" for testing purposes. */
+	max_xtp();
+	local_irq_disable();
+	/* Death loop */
+	while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE)
+		cpu_relax();
+
+	/*
+	 * Enable timer interrupts from now on
+	 * Not required if we put processor in SAL_BOOT_RENDEZ mode.
+	 */
+	local_flush_tlb_all();
+	cpu_set(smp_processor_id(), cpu_online_map);
+	wmb();
+	ia64_cpu_local_tick ();
+	local_irq_enable();
+}
+#else
+static inline void play_dead(void)
+{
+	BUG();
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+
+void cpu_idle_wait(void)
+{
+        int cpu;
+        cpumask_t map;
+
+        for_each_online_cpu(cpu)
+                cpu_set(cpu, cpu_idle_map);
+
+        wmb();
+        do {
+                ssleep(1);
+                cpus_and(map, cpu_idle_map, cpu_online_map);
+        } while (!cpus_empty(map));
+}
+EXPORT_SYMBOL_GPL(cpu_idle_wait);
+
+void __attribute__((noreturn))
+cpu_idle (void)
+{
+	void (*mark_idle)(int) = ia64_mark_idle;
+	int cpu = smp_processor_id();
+
+	/* endless idle loop with no priority at all */
+	while (1) {
+#ifdef CONFIG_SMP
+		if (!need_resched())
+			min_xtp();
+#endif
+		while (!need_resched()) {
+			void (*idle)(void);
+
+			if (mark_idle)
+				(*mark_idle)(1);
+
+			if (cpu_isset(cpu, cpu_idle_map))
+				cpu_clear(cpu, cpu_idle_map);
+			rmb();
+			idle = pm_idle;
+			if (!idle)
+				idle = default_idle;
+			(*idle)();
+		}
+
+		if (mark_idle)
+			(*mark_idle)(0);
+
+#ifdef CONFIG_SMP
+		normal_xtp();
+#endif
+		schedule();
+		check_pgt_cache();
+		if (cpu_is_offline(smp_processor_id()))
+			play_dead();
+	}
+}
+
+void
+ia64_save_extra (struct task_struct *task)
+{
+#ifdef CONFIG_PERFMON
+	unsigned long info;
+#endif
+
+	if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0)
+		ia64_save_debug_regs(&task->thread.dbr[0]);
+
+#ifdef CONFIG_PERFMON
+	if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0)
+		pfm_save_regs(task);
+
+	info = __get_cpu_var(pfm_syst_info);
+	if (info & PFM_CPUINFO_SYST_WIDE)
+		pfm_syst_wide_update_task(task, info, 0);
+#endif
+
+#ifdef CONFIG_IA32_SUPPORT
+	if (IS_IA32_PROCESS(ia64_task_regs(task)))
+		ia32_save_state(task);
+#endif
+}
+
+void
+ia64_load_extra (struct task_struct *task)
+{
+#ifdef CONFIG_PERFMON
+	unsigned long info;
+#endif
+
+	if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0)
+		ia64_load_debug_regs(&task->thread.dbr[0]);
+
+#ifdef CONFIG_PERFMON
+	if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0)
+		pfm_load_regs(task);
+
+	info = __get_cpu_var(pfm_syst_info);
+	if (info & PFM_CPUINFO_SYST_WIDE) 
+		pfm_syst_wide_update_task(task, info, 1);
+#endif
+
+#ifdef CONFIG_IA32_SUPPORT
+	if (IS_IA32_PROCESS(ia64_task_regs(task)))
+		ia32_load_state(task);
+#endif
+}
+
+/*
+ * Copy the state of an ia-64 thread.
+ *
+ * We get here through the following  call chain:
+ *
+ *	from user-level:	from kernel:
+ *
+ *	<clone syscall>	        <some kernel call frames>
+ *	sys_clone		   :
+ *	do_fork			do_fork
+ *	copy_thread		copy_thread
+ *
+ * This means that the stack layout is as follows:
+ *
+ *	+---------------------+ (highest addr)
+ *	|   struct pt_regs    |
+ *	+---------------------+
+ *	| struct switch_stack |
+ *	+---------------------+
+ *	|                     |
+ *	|    memory stack     |
+ *	|                     | <-- sp (lowest addr)
+ *	+---------------------+
+ *
+ * Observe that we copy the unat values that are in pt_regs and switch_stack.  Spilling an
+ * integer to address X causes bit N in ar.unat to be set to the NaT bit of the register,
+ * with N=(X & 0x1ff)/8.  Thus, copying the unat value preserves the NaT bits ONLY if the
+ * pt_regs structure in the parent is congruent to that of the child, modulo 512.  Since
+ * the stack is page aligned and the page size is at least 4KB, this is always the case,
+ * so there is nothing to worry about.
+ */
+int
+copy_thread (int nr, unsigned long clone_flags,
+	     unsigned long user_stack_base, unsigned long user_stack_size,
+	     struct task_struct *p, struct pt_regs *regs)
+{
+	extern char ia64_ret_from_clone, ia32_ret_from_clone;
+	struct switch_stack *child_stack, *stack;
+	unsigned long rbs, child_rbs, rbs_size;
+	struct pt_regs *child_ptregs;
+	int retval = 0;
+
+#ifdef CONFIG_SMP
+	/*
+	 * For SMP idle threads, fork_by_hand() calls do_fork with
+	 * NULL regs.
+	 */
+	if (!regs)
+		return 0;
+#endif
+
+	stack = ((struct switch_stack *) regs) - 1;
+
+	child_ptregs = (struct pt_regs *) ((unsigned long) p + IA64_STK_OFFSET) - 1;
+	child_stack = (struct switch_stack *) child_ptregs - 1;
+
+	/* copy parent's switch_stack & pt_regs to child: */
+	memcpy(child_stack, stack, sizeof(*child_ptregs) + sizeof(*child_stack));
+
+	rbs = (unsigned long) current + IA64_RBS_OFFSET;
+	child_rbs = (unsigned long) p + IA64_RBS_OFFSET;
+	rbs_size = stack->ar_bspstore - rbs;
+
+	/* copy the parent's register backing store to the child: */
+	memcpy((void *) child_rbs, (void *) rbs, rbs_size);
+
+	if (likely(user_mode(child_ptregs))) {
+		if ((clone_flags & CLONE_SETTLS) && !IS_IA32_PROCESS(regs))
+			child_ptregs->r13 = regs->r16;	/* see sys_clone2() in entry.S */
+		if (user_stack_base) {
+			child_ptregs->r12 = user_stack_base + user_stack_size - 16;
+			child_ptregs->ar_bspstore = user_stack_base;
+			child_ptregs->ar_rnat = 0;
+			child_ptregs->loadrs = 0;
+		}
+	} else {
+		/*
+		 * Note: we simply preserve the relative position of
+		 * the stack pointer here.  There is no need to
+		 * allocate a scratch area here, since that will have
+		 * been taken care of by the caller of sys_clone()
+		 * already.
+		 */
+		child_ptregs->r12 = (unsigned long) child_ptregs - 16; /* kernel sp */
+		child_ptregs->r13 = (unsigned long) p;		/* set `current' pointer */
+	}
+	child_stack->ar_bspstore = child_rbs + rbs_size;
+	if (IS_IA32_PROCESS(regs))
+		child_stack->b0 = (unsigned long) &ia32_ret_from_clone;
+	else
+		child_stack->b0 = (unsigned long) &ia64_ret_from_clone;
+
+	/* copy parts of thread_struct: */
+	p->thread.ksp = (unsigned long) child_stack - 16;
+
+	/* stop some PSR bits from being inherited.
+	 * the psr.up/psr.pp bits must be cleared on fork but inherited on execve()
+	 * therefore we must specify them explicitly here and not include them in
+	 * IA64_PSR_BITS_TO_CLEAR.
+	 */
+	child_ptregs->cr_ipsr = ((child_ptregs->cr_ipsr | IA64_PSR_BITS_TO_SET)
+				 & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_PP | IA64_PSR_UP));
+
+	/*
+	 * NOTE: The calling convention considers all floating point
+	 * registers in the high partition (fph) to be scratch.  Since
+	 * the only way to get to this point is through a system call,
+	 * we know that the values in fph are all dead.  Hence, there
+	 * is no need to inherit the fph state from the parent to the
+	 * child and all we have to do is to make sure that
+	 * IA64_THREAD_FPH_VALID is cleared in the child.
+	 *
+	 * XXX We could push this optimization a bit further by
+	 * clearing IA64_THREAD_FPH_VALID on ANY system call.
+	 * However, it's not clear this is worth doing.  Also, it
+	 * would be a slight deviation from the normal Linux system
+	 * call behavior where scratch registers are preserved across
+	 * system calls (unless used by the system call itself).
+	 */
+#	define THREAD_FLAGS_TO_CLEAR	(IA64_THREAD_FPH_VALID | IA64_THREAD_DBG_VALID \
+					 | IA64_THREAD_PM_VALID)
+#	define THREAD_FLAGS_TO_SET	0
+	p->thread.flags = ((current->thread.flags & ~THREAD_FLAGS_TO_CLEAR)
+			   | THREAD_FLAGS_TO_SET);
+	ia64_drop_fpu(p);	/* don't pick up stale state from a CPU's fph */
+#ifdef CONFIG_IA32_SUPPORT
+	/*
+	 * If we're cloning an IA32 task then save the IA32 extra
+	 * state from the current task to the new task
+	 */
+	if (IS_IA32_PROCESS(ia64_task_regs(current))) {
+		ia32_save_state(p);
+		if (clone_flags & CLONE_SETTLS)
+			retval = ia32_clone_tls(p, child_ptregs);
+
+		/* Copy partially mapped page list */
+		if (!retval)
+			retval = ia32_copy_partial_page_list(p, clone_flags);
+	}
+#endif
+
+#ifdef CONFIG_PERFMON
+	if (current->thread.pfm_context)
+		pfm_inherit(p, child_ptregs);
+#endif
+	return retval;
+}
+
+static void
+do_copy_task_regs (struct task_struct *task, struct unw_frame_info *info, void *arg)
+{
+	unsigned long mask, sp, nat_bits = 0, ip, ar_rnat, urbs_end, cfm;
+	elf_greg_t *dst = arg;
+	struct pt_regs *pt;
+	char nat;
+	int i;
+
+	memset(dst, 0, sizeof(elf_gregset_t));	/* don't leak any kernel bits to user-level */
+
+	if (unw_unwind_to_user(info) < 0)
+		return;
+
+	unw_get_sp(info, &sp);
+	pt = (struct pt_regs *) (sp + 16);
+
+	urbs_end = ia64_get_user_rbs_end(task, pt, &cfm);
+
+	if (ia64_sync_user_rbs(task, info->sw, pt->ar_bspstore, urbs_end) < 0)
+		return;
+
+	ia64_peek(task, info->sw, urbs_end, (long) ia64_rse_rnat_addr((long *) urbs_end),
+		  &ar_rnat);
+
+	/*
+	 * coredump format:
+	 *	r0-r31
+	 *	NaT bits (for r0-r31; bit N == 1 iff rN is a NaT)
+	 *	predicate registers (p0-p63)
+	 *	b0-b7
+	 *	ip cfm user-mask
+	 *	ar.rsc ar.bsp ar.bspstore ar.rnat
+	 *	ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec
+	 */
+
+	/* r0 is zero */
+	for (i = 1, mask = (1UL << i); i < 32; ++i) {
+		unw_get_gr(info, i, &dst[i], &nat);
+		if (nat)
+			nat_bits |= mask;
+		mask <<= 1;
+	}
+	dst[32] = nat_bits;
+	unw_get_pr(info, &dst[33]);
+
+	for (i = 0; i < 8; ++i)
+		unw_get_br(info, i, &dst[34 + i]);
+
+	unw_get_rp(info, &ip);
+	dst[42] = ip + ia64_psr(pt)->ri;
+	dst[43] = cfm;
+	dst[44] = pt->cr_ipsr & IA64_PSR_UM;
+
+	unw_get_ar(info, UNW_AR_RSC, &dst[45]);
+	/*
+	 * For bsp and bspstore, unw_get_ar() would return the kernel
+	 * addresses, but we need the user-level addresses instead:
+	 */
+	dst[46] = urbs_end;	/* note: by convention PT_AR_BSP points to the end of the urbs! */
+	dst[47] = pt->ar_bspstore;
+	dst[48] = ar_rnat;
+	unw_get_ar(info, UNW_AR_CCV, &dst[49]);
+	unw_get_ar(info, UNW_AR_UNAT, &dst[50]);
+	unw_get_ar(info, UNW_AR_FPSR, &dst[51]);
+	dst[52] = pt->ar_pfs;	/* UNW_AR_PFS is == to pt->cr_ifs for interrupt frames */
+	unw_get_ar(info, UNW_AR_LC, &dst[53]);
+	unw_get_ar(info, UNW_AR_EC, &dst[54]);
+	unw_get_ar(info, UNW_AR_CSD, &dst[55]);
+	unw_get_ar(info, UNW_AR_SSD, &dst[56]);
+}
+
+void
+do_dump_task_fpu (struct task_struct *task, struct unw_frame_info *info, void *arg)
+{
+	elf_fpreg_t *dst = arg;
+	int i;
+
+	memset(dst, 0, sizeof(elf_fpregset_t));	/* don't leak any "random" bits */
+
+	if (unw_unwind_to_user(info) < 0)
+		return;
+
+	/* f0 is 0.0, f1 is 1.0 */
+
+	for (i = 2; i < 32; ++i)
+		unw_get_fr(info, i, dst + i);
+
+	ia64_flush_fph(task);
+	if ((task->thread.flags & IA64_THREAD_FPH_VALID) != 0)
+		memcpy(dst + 32, task->thread.fph, 96*16);
+}
+
+void
+do_copy_regs (struct unw_frame_info *info, void *arg)
+{
+	do_copy_task_regs(current, info, arg);
+}
+
+void
+do_dump_fpu (struct unw_frame_info *info, void *arg)
+{
+	do_dump_task_fpu(current, info, arg);
+}
+
+int
+dump_task_regs(struct task_struct *task, elf_gregset_t *regs)
+{
+	struct unw_frame_info tcore_info;
+
+	if (current == task) {
+		unw_init_running(do_copy_regs, regs);
+	} else {
+		memset(&tcore_info, 0, sizeof(tcore_info));
+		unw_init_from_blocked_task(&tcore_info, task);
+		do_copy_task_regs(task, &tcore_info, regs);
+	}
+	return 1;
+}
+
+void
+ia64_elf_core_copy_regs (struct pt_regs *pt, elf_gregset_t dst)
+{
+	unw_init_running(do_copy_regs, dst);
+}
+
+int
+dump_task_fpu (struct task_struct *task, elf_fpregset_t *dst)
+{
+	struct unw_frame_info tcore_info;
+
+	if (current == task) {
+		unw_init_running(do_dump_fpu, dst);
+	} else {
+		memset(&tcore_info, 0, sizeof(tcore_info));
+		unw_init_from_blocked_task(&tcore_info, task);
+		do_dump_task_fpu(task, &tcore_info, dst);
+	}
+	return 1;
+}
+
+int
+dump_fpu (struct pt_regs *pt, elf_fpregset_t dst)
+{
+	unw_init_running(do_dump_fpu, dst);
+	return 1;	/* f0-f31 are always valid so we always return 1 */
+}
+
+long
+sys_execve (char __user *filename, char __user * __user *argv, char __user * __user *envp,
+	    struct pt_regs *regs)
+{
+	char *fname;
+	int error;
+
+	fname = getname(filename);
+	error = PTR_ERR(fname);
+	if (IS_ERR(fname))
+		goto out;
+	error = do_execve(fname, argv, envp, regs);
+	putname(fname);
+out:
+	return error;
+}
+
+pid_t
+kernel_thread (int (*fn)(void *), void *arg, unsigned long flags)
+{
+	extern void start_kernel_thread (void);
+	unsigned long *helper_fptr = (unsigned long *) &start_kernel_thread;
+	struct {
+		struct switch_stack sw;
+		struct pt_regs pt;
+	} regs;
+
+	memset(&regs, 0, sizeof(regs));
+	regs.pt.cr_iip = helper_fptr[0];	/* set entry point (IP) */
+	regs.pt.r1 = helper_fptr[1];		/* set GP */
+	regs.pt.r9 = (unsigned long) fn;	/* 1st argument */
+	regs.pt.r11 = (unsigned long) arg;	/* 2nd argument */
+	/* Preserve PSR bits, except for bits 32-34 and 37-45, which we can't read.  */
+	regs.pt.cr_ipsr = ia64_getreg(_IA64_REG_PSR) | IA64_PSR_BN;
+	regs.pt.cr_ifs = 1UL << 63;		/* mark as valid, empty frame */
+	regs.sw.ar_fpsr = regs.pt.ar_fpsr = ia64_getreg(_IA64_REG_AR_FPSR);
+	regs.sw.ar_bspstore = (unsigned long) current + IA64_RBS_OFFSET;
+	regs.sw.pr = (1 << PRED_KERNEL_STACK);
+	return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs.pt, 0, NULL, NULL);
+}
+EXPORT_SYMBOL(kernel_thread);
+
+/* This gets called from kernel_thread() via ia64_invoke_thread_helper().  */
+int
+kernel_thread_helper (int (*fn)(void *), void *arg)
+{
+#ifdef CONFIG_IA32_SUPPORT
+	if (IS_IA32_PROCESS(ia64_task_regs(current))) {
+		/* A kernel thread is always a 64-bit process. */
+		current->thread.map_base  = DEFAULT_MAP_BASE;
+		current->thread.task_size = DEFAULT_TASK_SIZE;
+		ia64_set_kr(IA64_KR_IO_BASE, current->thread.old_iob);
+		ia64_set_kr(IA64_KR_TSSD, current->thread.old_k1);
+	}
+#endif
+	return (*fn)(arg);
+}
+
+/*
+ * Flush thread state.  This is called when a thread does an execve().
+ */
+void
+flush_thread (void)
+{
+	/* drop floating-point and debug-register state if it exists: */
+	current->thread.flags &= ~(IA64_THREAD_FPH_VALID | IA64_THREAD_DBG_VALID);
+	ia64_drop_fpu(current);
+	if (IS_IA32_PROCESS(ia64_task_regs(current)))
+		ia32_drop_partial_page_list(current);
+}
+
+/*
+ * Clean up state associated with current thread.  This is called when
+ * the thread calls exit().
+ */
+void
+exit_thread (void)
+{
+	ia64_drop_fpu(current);
+#ifdef CONFIG_PERFMON
+       /* if needed, stop monitoring and flush state to perfmon context */
+	if (current->thread.pfm_context)
+		pfm_exit_thread(current);
+
+	/* free debug register resources */
+	if (current->thread.flags & IA64_THREAD_DBG_VALID)
+		pfm_release_debug_registers(current);
+#endif
+	if (IS_IA32_PROCESS(ia64_task_regs(current)))
+		ia32_drop_partial_page_list(current);
+}
+
+unsigned long
+get_wchan (struct task_struct *p)
+{
+	struct unw_frame_info info;
+	unsigned long ip;
+	int count = 0;
+
+	/*
+	 * Note: p may not be a blocked task (it could be current or
+	 * another process running on some other CPU.  Rather than
+	 * trying to determine if p is really blocked, we just assume
+	 * it's blocked and rely on the unwind routines to fail
+	 * gracefully if the process wasn't really blocked after all.
+	 * --davidm 99/12/15
+	 */
+	unw_init_from_blocked_task(&info, p);
+	do {
+		if (unw_unwind(&info) < 0)
+			return 0;
+		unw_get_ip(&info, &ip);
+		if (!in_sched_functions(ip))
+			return ip;
+	} while (count++ < 16);
+	return 0;
+}
+
+void
+cpu_halt (void)
+{
+	pal_power_mgmt_info_u_t power_info[8];
+	unsigned long min_power;
+	int i, min_power_state;
+
+	if (ia64_pal_halt_info(power_info) != 0)
+		return;
+
+	min_power_state = 0;
+	min_power = power_info[0].pal_power_mgmt_info_s.power_consumption;
+	for (i = 1; i < 8; ++i)
+		if (power_info[i].pal_power_mgmt_info_s.im
+		    && power_info[i].pal_power_mgmt_info_s.power_consumption < min_power) {
+			min_power = power_info[i].pal_power_mgmt_info_s.power_consumption;
+			min_power_state = i;
+		}
+
+	while (1)
+		ia64_pal_halt(min_power_state);
+}
+
+void
+machine_restart (char *restart_cmd)
+{
+	(*efi.reset_system)(EFI_RESET_WARM, 0, 0, NULL);
+}
+
+EXPORT_SYMBOL(machine_restart);
+
+void
+machine_halt (void)
+{
+	cpu_halt();
+}
+
+EXPORT_SYMBOL(machine_halt);
+
+void
+machine_power_off (void)
+{
+	if (pm_power_off)
+		pm_power_off();
+	machine_halt();
+}
+
+EXPORT_SYMBOL(machine_power_off);
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
--- a/arch/ia64/kernel/sal.c
+++ b/arch/ia64/kernel/sal.c
@@ -0,0 +1,302 @@
+/*
+ * System Abstraction Layer (SAL) interface routines.
+ *
+ * Copyright (C) 1998, 1999, 2001, 2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ */
+#include <linux/config.h>
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+
+#include <asm/page.h>
+#include <asm/sal.h>
+#include <asm/pal.h>
+
+ __cacheline_aligned DEFINE_SPINLOCK(sal_lock);
+unsigned long sal_platform_features;
+
+unsigned short sal_revision;
+unsigned short sal_version;
+
+#define SAL_MAJOR(x) ((x) >> 8)
+#define SAL_MINOR(x) ((x) & 0xff)
+
+static struct {
+	void *addr;	/* function entry point */
+	void *gpval;	/* gp value to use */
+} pdesc;
+
+static long
+default_handler (void)
+{
+	return -1;
+}
+
+ia64_sal_handler ia64_sal = (ia64_sal_handler) default_handler;
+ia64_sal_desc_ptc_t *ia64_ptc_domain_info;
+
+const char *
+ia64_sal_strerror (long status)
+{
+	const char *str;
+	switch (status) {
+	      case 0: str = "Call completed without error"; break;
+	      case 1: str = "Effect a warm boot of the system to complete "
+			      "the update"; break;
+	      case -1: str = "Not implemented"; break;
+	      case -2: str = "Invalid argument"; break;
+	      case -3: str = "Call completed with error"; break;
+	      case -4: str = "Virtual address not registered"; break;
+	      case -5: str = "No information available"; break;
+	      case -6: str = "Insufficient space to add the entry"; break;
+	      case -7: str = "Invalid entry_addr value"; break;
+	      case -8: str = "Invalid interrupt vector"; break;
+	      case -9: str = "Requested memory not available"; break;
+	      case -10: str = "Unable to write to the NVM device"; break;
+	      case -11: str = "Invalid partition type specified"; break;
+	      case -12: str = "Invalid NVM_Object id specified"; break;
+	      case -13: str = "NVM_Object already has the maximum number "
+				"of partitions"; break;
+	      case -14: str = "Insufficient space in partition for the "
+				"requested write sub-function"; break;
+	      case -15: str = "Insufficient data buffer space for the "
+				"requested read record sub-function"; break;
+	      case -16: str = "Scratch buffer required for the write/delete "
+				"sub-function"; break;
+	      case -17: str = "Insufficient space in the NVM_Object for the "
+				"requested create sub-function"; break;
+	      case -18: str = "Invalid value specified in the partition_rec "
+				"argument"; break;
+	      case -19: str = "Record oriented I/O not supported for this "
+				"partition"; break;
+	      case -20: str = "Bad format of record to be written or "
+				"required keyword variable not "
+				"specified"; break;
+	      default: str = "Unknown SAL status code"; break;
+	}
+	return str;
+}
+
+void __init
+ia64_sal_handler_init (void *entry_point, void *gpval)
+{
+	/* fill in the SAL procedure descriptor and point ia64_sal to it: */
+	pdesc.addr = entry_point;
+	pdesc.gpval = gpval;
+	ia64_sal = (ia64_sal_handler) &pdesc;
+}
+
+static void __init
+check_versions (struct ia64_sal_systab *systab)
+{
+	sal_revision = (systab->sal_rev_major << 8) | systab->sal_rev_minor;
+	sal_version = (systab->sal_b_rev_major << 8) | systab->sal_b_rev_minor;
+
+	/* Check for broken firmware */
+	if ((sal_revision == SAL_VERSION_CODE(49, 29))
+	    && (sal_version == SAL_VERSION_CODE(49, 29)))
+	{
+		/*
+		 * Old firmware for zx2000 prototypes have this weird version number,
+		 * reset it to something sane.
+		 */
+		sal_revision = SAL_VERSION_CODE(2, 8);
+		sal_version = SAL_VERSION_CODE(0, 0);
+	}
+}
+
+static void __init
+sal_desc_entry_point (void *p)
+{
+	struct ia64_sal_desc_entry_point *ep = p;
+	ia64_pal_handler_init(__va(ep->pal_proc));
+	ia64_sal_handler_init(__va(ep->sal_proc), __va(ep->gp));
+}
+
+#ifdef CONFIG_SMP
+static void __init
+set_smp_redirect (int flag)
+{
+#ifndef CONFIG_HOTPLUG_CPU
+	if (no_int_routing)
+		smp_int_redirect &= ~flag;
+	else
+		smp_int_redirect |= flag;
+#else
+	/*
+	 * For CPU Hotplug we dont want to do any chipset supported
+	 * interrupt redirection. The reason is this would require that
+	 * All interrupts be stopped and hard bind the irq to a cpu.
+	 * Later when the interrupt is fired we need to set the redir hint
+	 * on again in the vector. This is combersome for something that the
+	 * user mode irq balancer will solve anyways.
+	 */
+	no_int_routing=1;
+	smp_int_redirect &= ~flag;
+#endif
+}
+#else
+#define set_smp_redirect(flag)	do { } while (0)
+#endif
+
+static void __init
+sal_desc_platform_feature (void *p)
+{
+	struct ia64_sal_desc_platform_feature *pf = p;
+	sal_platform_features = pf->feature_mask;
+
+	printk(KERN_INFO "SAL Platform features:");
+	if (!sal_platform_features) {
+		printk(" None\n");
+		return;
+	}
+
+	if (sal_platform_features & IA64_SAL_PLATFORM_FEATURE_BUS_LOCK)
+		printk(" BusLock");
+	if (sal_platform_features & IA64_SAL_PLATFORM_FEATURE_IRQ_REDIR_HINT) {
+		printk(" IRQ_Redirection");
+		set_smp_redirect(SMP_IRQ_REDIRECTION);
+	}
+	if (sal_platform_features & IA64_SAL_PLATFORM_FEATURE_IPI_REDIR_HINT) {
+		printk(" IPI_Redirection");
+		set_smp_redirect(SMP_IPI_REDIRECTION);
+	}
+	if (sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT)
+		printk(" ITC_Drift");
+	printk("\n");
+}
+
+#ifdef CONFIG_SMP
+static void __init
+sal_desc_ap_wakeup (void *p)
+{
+	struct ia64_sal_desc_ap_wakeup *ap = p;
+
+	switch (ap->mechanism) {
+	case IA64_SAL_AP_EXTERNAL_INT:
+		ap_wakeup_vector = ap->vector;
+		printk(KERN_INFO "SAL: AP wakeup using external interrupt "
+				"vector 0x%lx\n", ap_wakeup_vector);
+		break;
+	default:
+		printk(KERN_ERR "SAL: AP wakeup mechanism unsupported!\n");
+		break;
+	}
+}
+
+static void __init
+chk_nointroute_opt(void)
+{
+	char *cp;
+	extern char saved_command_line[];
+
+	for (cp = saved_command_line; *cp; ) {
+		if (memcmp(cp, "nointroute", 10) == 0) {
+			no_int_routing = 1;
+			printk ("no_int_routing on\n");
+			break;
+		} else {
+			while (*cp != ' ' && *cp)
+				++cp;
+			while (*cp == ' ')
+				++cp;
+		}
+	}
+}
+
+#else
+static void __init sal_desc_ap_wakeup(void *p) { }
+#endif
+
+void __init
+ia64_sal_init (struct ia64_sal_systab *systab)
+{
+	char *p;
+	int i;
+
+	if (!systab) {
+		printk(KERN_WARNING "Hmm, no SAL System Table.\n");
+		return;
+	}
+
+	if (strncmp(systab->signature, "SST_", 4) != 0)
+		printk(KERN_ERR "bad signature in system table!");
+
+	check_versions(systab);
+#ifdef CONFIG_SMP
+	chk_nointroute_opt();
+#endif
+
+	/* revisions are coded in BCD, so %x does the job for us */
+	printk(KERN_INFO "SAL %x.%x: %.32s %.32s%sversion %x.%x\n",
+			SAL_MAJOR(sal_revision), SAL_MINOR(sal_revision),
+			systab->oem_id, systab->product_id,
+			systab->product_id[0] ? " " : "",
+			SAL_MAJOR(sal_version), SAL_MINOR(sal_version));
+
+	p = (char *) (systab + 1);
+	for (i = 0; i < systab->entry_count; i++) {
+		/*
+		 * The first byte of each entry type contains the type
+		 * descriptor.
+		 */
+		switch (*p) {
+		case SAL_DESC_ENTRY_POINT:
+			sal_desc_entry_point(p);
+			break;
+		case SAL_DESC_PLATFORM_FEATURE:
+			sal_desc_platform_feature(p);
+			break;
+		case SAL_DESC_PTC:
+			ia64_ptc_domain_info = (ia64_sal_desc_ptc_t *)p;
+			break;
+		case SAL_DESC_AP_WAKEUP:
+			sal_desc_ap_wakeup(p);
+			break;
+		}
+		p += SAL_DESC_SIZE(*p);
+	}
+}
+
+int
+ia64_sal_oemcall(struct ia64_sal_retval *isrvp, u64 oemfunc, u64 arg1,
+		 u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7)
+{
+	if (oemfunc < IA64_SAL_OEMFUNC_MIN || oemfunc > IA64_SAL_OEMFUNC_MAX)
+		return -1;
+	SAL_CALL(*isrvp, oemfunc, arg1, arg2, arg3, arg4, arg5, arg6, arg7);
+	return 0;
+}
+EXPORT_SYMBOL(ia64_sal_oemcall);
+
+int
+ia64_sal_oemcall_nolock(struct ia64_sal_retval *isrvp, u64 oemfunc, u64 arg1,
+			u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6,
+			u64 arg7)
+{
+	if (oemfunc < IA64_SAL_OEMFUNC_MIN || oemfunc > IA64_SAL_OEMFUNC_MAX)
+		return -1;
+	SAL_CALL_NOLOCK(*isrvp, oemfunc, arg1, arg2, arg3, arg4, arg5, arg6,
+			arg7);
+	return 0;
+}
+EXPORT_SYMBOL(ia64_sal_oemcall_nolock);
+
+int
+ia64_sal_oemcall_reentrant(struct ia64_sal_retval *isrvp, u64 oemfunc,
+			   u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5,
+			   u64 arg6, u64 arg7)
+{
+	if (oemfunc < IA64_SAL_OEMFUNC_MIN || oemfunc > IA64_SAL_OEMFUNC_MAX)
+		return -1;
+	SAL_CALL_REENTRANT(*isrvp, oemfunc, arg1, arg2, arg3, arg4, arg5, arg6,
+			   arg7);
+	return 0;
+}
+EXPORT_SYMBOL(ia64_sal_oemcall_reentrant);
--- a/arch/ia64/kernel/salinfo.c
+++ b/arch/ia64/kernel/salinfo.c
@@ -0,0 +1,629 @@
+/*
+ * salinfo.c
+ *
+ * Creates entries in /proc/sal for various system features.
+ *
+ * Copyright (c) 2003 Silicon Graphics, Inc.  All rights reserved.
+ * Copyright (c) 2003 Hewlett-Packard Co
+ *	Bjorn Helgaas <bjorn.helgaas@hp.com>
+ *
+ * 10/30/2001	jbarnes@sgi.com		copied much of Stephane's palinfo
+ *					code to create this file
+ * Oct 23 2003	kaos@sgi.com
+ *   Replace IPI with set_cpus_allowed() to read a record from the required cpu.
+ *   Redesign salinfo log processing to separate interrupt and user space
+ *   contexts.
+ *   Cache the record across multi-block reads from user space.
+ *   Support > 64 cpus.
+ *   Delete module_exit and MOD_INC/DEC_COUNT, salinfo cannot be a module.
+ *
+ * Jan 28 2004	kaos@sgi.com
+ *   Periodically check for outstanding MCA or INIT records.
+ *
+ * Dec  5 2004	kaos@sgi.com
+ *   Standardize which records are cleared automatically.
+ */
+
+#include <linux/types.h>
+#include <linux/proc_fs.h>
+#include <linux/module.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/timer.h>
+#include <linux/vmalloc.h>
+
+#include <asm/semaphore.h>
+#include <asm/sal.h>
+#include <asm/uaccess.h>
+
+MODULE_AUTHOR("Jesse Barnes <jbarnes@sgi.com>");
+MODULE_DESCRIPTION("/proc interface to IA-64 SAL features");
+MODULE_LICENSE("GPL");
+
+static int salinfo_read(char *page, char **start, off_t off, int count, int *eof, void *data);
+
+typedef struct {
+	const char		*name;		/* name of the proc entry */
+	unsigned long           feature;        /* feature bit */
+	struct proc_dir_entry	*entry;		/* registered entry (removal) */
+} salinfo_entry_t;
+
+/*
+ * List {name,feature} pairs for every entry in /proc/sal/<feature>
+ * that this module exports
+ */
+static salinfo_entry_t salinfo_entries[]={
+	{ "bus_lock",           IA64_SAL_PLATFORM_FEATURE_BUS_LOCK, },
+	{ "irq_redirection",	IA64_SAL_PLATFORM_FEATURE_IRQ_REDIR_HINT, },
+	{ "ipi_redirection",	IA64_SAL_PLATFORM_FEATURE_IPI_REDIR_HINT, },
+	{ "itc_drift",		IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT, },
+};
+
+#define NR_SALINFO_ENTRIES ARRAY_SIZE(salinfo_entries)
+
+static char *salinfo_log_name[] = {
+	"mca",
+	"init",
+	"cmc",
+	"cpe",
+};
+
+static struct proc_dir_entry *salinfo_proc_entries[
+	ARRAY_SIZE(salinfo_entries) +			/* /proc/sal/bus_lock */
+	ARRAY_SIZE(salinfo_log_name) +			/* /proc/sal/{mca,...} */
+	(2 * ARRAY_SIZE(salinfo_log_name)) +		/* /proc/sal/mca/{event,data} */
+	1];						/* /proc/sal */
+
+/* Some records we get ourselves, some are accessed as saved data in buffers
+ * that are owned by mca.c.
+ */
+struct salinfo_data_saved {
+	u8*			buffer;
+	u64			size;
+	u64			id;
+	int			cpu;
+};
+
+/* State transitions.  Actions are :-
+ *   Write "read <cpunum>" to the data file.
+ *   Write "clear <cpunum>" to the data file.
+ *   Write "oemdata <cpunum> <offset> to the data file.
+ *   Read from the data file.
+ *   Close the data file.
+ *
+ * Start state is NO_DATA.
+ *
+ * NO_DATA
+ *    write "read <cpunum>" -> NO_DATA or LOG_RECORD.
+ *    write "clear <cpunum>" -> NO_DATA or LOG_RECORD.
+ *    write "oemdata <cpunum> <offset> -> return -EINVAL.
+ *    read data -> return EOF.
+ *    close -> unchanged.  Free record areas.
+ *
+ * LOG_RECORD
+ *    write "read <cpunum>" -> NO_DATA or LOG_RECORD.
+ *    write "clear <cpunum>" -> NO_DATA or LOG_RECORD.
+ *    write "oemdata <cpunum> <offset> -> format the oem data, goto OEMDATA.
+ *    read data -> return the INIT/MCA/CMC/CPE record.
+ *    close -> unchanged.  Keep record areas.
+ *
+ * OEMDATA
+ *    write "read <cpunum>" -> NO_DATA or LOG_RECORD.
+ *    write "clear <cpunum>" -> NO_DATA or LOG_RECORD.
+ *    write "oemdata <cpunum> <offset> -> format the oem data, goto OEMDATA.
+ *    read data -> return the formatted oemdata.
+ *    close -> unchanged.  Keep record areas.
+ *
+ * Closing the data file does not change the state.  This allows shell scripts
+ * to manipulate salinfo data, each shell redirection opens the file, does one
+ * action then closes it again.  The record areas are only freed at close when
+ * the state is NO_DATA.
+ */
+enum salinfo_state {
+	STATE_NO_DATA,
+	STATE_LOG_RECORD,
+	STATE_OEMDATA,
+};
+
+struct salinfo_data {
+	volatile cpumask_t	cpu_event;	/* which cpus have outstanding events */
+	struct semaphore	sem;		/* count of cpus with outstanding events (bits set in cpu_event) */
+	u8			*log_buffer;
+	u64			log_size;
+	u8			*oemdata;	/* decoded oem data */
+	u64			oemdata_size;
+	int			open;		/* single-open to prevent races */
+	u8			type;
+	u8			saved_num;	/* using a saved record? */
+	enum salinfo_state	state :8;	/* processing state */
+	u8			padding;
+	int			cpu_check;	/* next CPU to check */
+	struct salinfo_data_saved data_saved[5];/* save last 5 records from mca.c, must be < 255 */
+};
+
+static struct salinfo_data salinfo_data[ARRAY_SIZE(salinfo_log_name)];
+
+static spinlock_t data_lock, data_saved_lock;
+
+/** salinfo_platform_oemdata - optional callback to decode oemdata from an error
+ * record.
+ * @sect_header: pointer to the start of the section to decode.
+ * @oemdata: returns vmalloc area containing the decded output.
+ * @oemdata_size: returns length of decoded output (strlen).
+ *
+ * Description: If user space asks for oem data to be decoded by the kernel
+ * and/or prom and the platform has set salinfo_platform_oemdata to the address
+ * of a platform specific routine then call that routine.  salinfo_platform_oemdata
+ * vmalloc's and formats its output area, returning the address of the text
+ * and its strlen.  Returns 0 for success, -ve for error.  The callback is
+ * invoked on the cpu that generated the error record.
+ */
+int (*salinfo_platform_oemdata)(const u8 *sect_header, u8 **oemdata, u64 *oemdata_size);
+
+struct salinfo_platform_oemdata_parms {
+	const u8 *efi_guid;
+	u8 **oemdata;
+	u64 *oemdata_size;
+	int ret;
+};
+
+static void
+salinfo_platform_oemdata_cpu(void *context)
+{
+	struct salinfo_platform_oemdata_parms *parms = context;
+	parms->ret = salinfo_platform_oemdata(parms->efi_guid, parms->oemdata, parms->oemdata_size);
+}
+
+static void
+shift1_data_saved (struct salinfo_data *data, int shift)
+{
+	memcpy(data->data_saved+shift, data->data_saved+shift+1,
+	       (ARRAY_SIZE(data->data_saved) - (shift+1)) * sizeof(data->data_saved[0]));
+	memset(data->data_saved + ARRAY_SIZE(data->data_saved) - 1, 0,
+	       sizeof(data->data_saved[0]));
+}
+
+/* This routine is invoked in interrupt context.  Note: mca.c enables
+ * interrupts before calling this code for CMC/CPE.  MCA and INIT events are
+ * not irq safe, do not call any routines that use spinlocks, they may deadlock.
+ * MCA and INIT records are recorded, a timer event will look for any
+ * outstanding events and wake up the user space code.
+ *
+ * The buffer passed from mca.c points to the output from ia64_log_get. This is
+ * a persistent buffer but its contents can change between the interrupt and
+ * when user space processes the record.  Save the record id to identify
+ * changes.
+ */
+void
+salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe)
+{
+	struct salinfo_data *data = salinfo_data + type;
+	struct salinfo_data_saved *data_saved;
+	unsigned long flags = 0;
+	int i;
+	int saved_size = ARRAY_SIZE(data->data_saved);
+
+	BUG_ON(type >= ARRAY_SIZE(salinfo_log_name));
+
+	if (irqsafe)
+		spin_lock_irqsave(&data_saved_lock, flags);
+	for (i = 0, data_saved = data->data_saved; i < saved_size; ++i, ++data_saved) {
+		if (!data_saved->buffer)
+			break;
+	}
+	if (i == saved_size) {
+		if (!data->saved_num) {
+			shift1_data_saved(data, 0);
+			data_saved = data->data_saved + saved_size - 1;
+		} else
+			data_saved = NULL;
+	}
+	if (data_saved) {
+		data_saved->cpu = smp_processor_id();
+		data_saved->id = ((sal_log_record_header_t *)buffer)->id;
+		data_saved->size = size;
+		data_saved->buffer = buffer;
+	}
+	if (irqsafe)
+		spin_unlock_irqrestore(&data_saved_lock, flags);
+
+	if (!test_and_set_bit(smp_processor_id(), &data->cpu_event)) {
+		if (irqsafe)
+			up(&data->sem);
+	}
+}
+
+/* Check for outstanding MCA/INIT records every minute (arbitrary) */
+#define SALINFO_TIMER_DELAY (60*HZ)
+static struct timer_list salinfo_timer;
+
+static void
+salinfo_timeout_check(struct salinfo_data *data)
+{
+	int i;
+	if (!data->open)
+		return;
+	for (i = 0; i < NR_CPUS; ++i) {
+		if (test_bit(i, &data->cpu_event)) {
+			/* double up() is not a problem, user space will see no
+			 * records for the additional "events".
+			 */
+			up(&data->sem);
+		}
+	}
+}
+
+static void 
+salinfo_timeout (unsigned long arg)
+{
+	salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_MCA);
+	salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_INIT);
+	salinfo_timer.expires = jiffies + SALINFO_TIMER_DELAY;
+	add_timer(&salinfo_timer);
+}
+
+static int
+salinfo_event_open(struct inode *inode, struct file *file)
+{
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	return 0;
+}
+
+static ssize_t
+salinfo_event_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos)
+{
+	struct inode *inode = file->f_dentry->d_inode;
+	struct proc_dir_entry *entry = PDE(inode);
+	struct salinfo_data *data = entry->data;
+	char cmd[32];
+	size_t size;
+	int i, n, cpu = -1;
+
+retry:
+	if (down_trylock(&data->sem)) {
+		if (file->f_flags & O_NONBLOCK)
+			return -EAGAIN;
+		if (down_interruptible(&data->sem))
+			return -ERESTARTSYS;
+	}
+
+	n = data->cpu_check;
+	for (i = 0; i < NR_CPUS; i++) {
+		if (test_bit(n, &data->cpu_event)) {
+			cpu = n;
+			break;
+		}
+		if (++n == NR_CPUS)
+			n = 0;
+	}
+
+	if (cpu == -1)
+		goto retry;
+
+	/* events are sticky until the user says "clear" */
+	up(&data->sem);
+
+	/* for next read, start checking at next CPU */
+	data->cpu_check = cpu;
+	if (++data->cpu_check == NR_CPUS)
+		data->cpu_check = 0;
+
+	snprintf(cmd, sizeof(cmd), "read %d\n", cpu);
+
+	size = strlen(cmd);
+	if (size > count)
+		size = count;
+	if (copy_to_user(buffer, cmd, size))
+		return -EFAULT;
+
+	return size;
+}
+
+static struct file_operations salinfo_event_fops = {
+	.open  = salinfo_event_open,
+	.read  = salinfo_event_read,
+};
+
+static int
+salinfo_log_open(struct inode *inode, struct file *file)
+{
+	struct proc_dir_entry *entry = PDE(inode);
+	struct salinfo_data *data = entry->data;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	spin_lock(&data_lock);
+	if (data->open) {
+		spin_unlock(&data_lock);
+		return -EBUSY;
+	}
+	data->open = 1;
+	spin_unlock(&data_lock);
+
+	if (data->state == STATE_NO_DATA &&
+	    !(data->log_buffer = vmalloc(ia64_sal_get_state_info_size(data->type)))) {
+		data->open = 0;
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static int
+salinfo_log_release(struct inode *inode, struct file *file)
+{
+	struct proc_dir_entry *entry = PDE(inode);
+	struct salinfo_data *data = entry->data;
+
+	if (data->state == STATE_NO_DATA) {
+		vfree(data->log_buffer);
+		vfree(data->oemdata);
+		data->log_buffer = NULL;
+		data->oemdata = NULL;
+	}
+	spin_lock(&data_lock);
+	data->open = 0;
+	spin_unlock(&data_lock);
+	return 0;
+}
+
+static void
+call_on_cpu(int cpu, void (*fn)(void *), void *arg)
+{
+	cpumask_t save_cpus_allowed, new_cpus_allowed;
+	memcpy(&save_cpus_allowed, &current->cpus_allowed, sizeof(save_cpus_allowed));
+	memset(&new_cpus_allowed, 0, sizeof(new_cpus_allowed));
+	set_bit(cpu, &new_cpus_allowed);
+	set_cpus_allowed(current, new_cpus_allowed);
+	(*fn)(arg);
+	set_cpus_allowed(current, save_cpus_allowed);
+}
+
+static void
+salinfo_log_read_cpu(void *context)
+{
+	struct salinfo_data *data = context;
+	sal_log_record_header_t *rh;
+	data->log_size = ia64_sal_get_state_info(data->type, (u64 *) data->log_buffer);
+	rh = (sal_log_record_header_t *)(data->log_buffer);
+	/* Clear corrected errors as they are read from SAL */
+	if (rh->severity == sal_log_severity_corrected)
+		ia64_sal_clear_state_info(data->type);
+}
+
+static void
+salinfo_log_new_read(int cpu, struct salinfo_data *data)
+{
+	struct salinfo_data_saved *data_saved;
+	unsigned long flags;
+	int i;
+	int saved_size = ARRAY_SIZE(data->data_saved);
+
+	data->saved_num = 0;
+	spin_lock_irqsave(&data_saved_lock, flags);
+retry:
+	for (i = 0, data_saved = data->data_saved; i < saved_size; ++i, ++data_saved) {
+		if (data_saved->buffer && data_saved->cpu == cpu) {
+			sal_log_record_header_t *rh = (sal_log_record_header_t *)(data_saved->buffer);
+			data->log_size = data_saved->size;
+			memcpy(data->log_buffer, rh, data->log_size);
+			barrier();	/* id check must not be moved */
+			if (rh->id == data_saved->id) {
+				data->saved_num = i+1;
+				break;
+			}
+			/* saved record changed by mca.c since interrupt, discard it */
+			shift1_data_saved(data, i);
+			goto retry;
+		}
+	}
+	spin_unlock_irqrestore(&data_saved_lock, flags);
+
+	if (!data->saved_num)
+		call_on_cpu(cpu, salinfo_log_read_cpu, data);
+	if (!data->log_size) {
+	        data->state = STATE_NO_DATA;
+	        clear_bit(cpu, &data->cpu_event);
+	} else {
+	        data->state = STATE_LOG_RECORD;
+	}
+}
+
+static ssize_t
+salinfo_log_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos)
+{
+	struct inode *inode = file->f_dentry->d_inode;
+	struct proc_dir_entry *entry = PDE(inode);
+	struct salinfo_data *data = entry->data;
+	u8 *buf;
+	u64 bufsize;
+
+	if (data->state == STATE_LOG_RECORD) {
+		buf = data->log_buffer;
+		bufsize = data->log_size;
+	} else if (data->state == STATE_OEMDATA) {
+		buf = data->oemdata;
+		bufsize = data->oemdata_size;
+	} else {
+		buf = NULL;
+		bufsize = 0;
+	}
+	return simple_read_from_buffer(buffer, count, ppos, buf, bufsize);
+}
+
+static void
+salinfo_log_clear_cpu(void *context)
+{
+	struct salinfo_data *data = context;
+	ia64_sal_clear_state_info(data->type);
+}
+
+static int
+salinfo_log_clear(struct salinfo_data *data, int cpu)
+{
+	sal_log_record_header_t *rh;
+	data->state = STATE_NO_DATA;
+	if (!test_bit(cpu, &data->cpu_event))
+		return 0;
+	down(&data->sem);
+	clear_bit(cpu, &data->cpu_event);
+	if (data->saved_num) {
+		unsigned long flags;
+		spin_lock_irqsave(&data_saved_lock, flags);
+		shift1_data_saved(data, data->saved_num - 1 );
+		data->saved_num = 0;
+		spin_unlock_irqrestore(&data_saved_lock, flags);
+	}
+	rh = (sal_log_record_header_t *)(data->log_buffer);
+	/* Corrected errors have already been cleared from SAL */
+	if (rh->severity != sal_log_severity_corrected)
+		call_on_cpu(cpu, salinfo_log_clear_cpu, data);
+	/* clearing a record may make a new record visible */
+	salinfo_log_new_read(cpu, data);
+	if (data->state == STATE_LOG_RECORD &&
+	    !test_and_set_bit(cpu,  &data->cpu_event))
+		up(&data->sem);
+	return 0;
+}
+
+static ssize_t
+salinfo_log_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos)
+{
+	struct inode *inode = file->f_dentry->d_inode;
+	struct proc_dir_entry *entry = PDE(inode);
+	struct salinfo_data *data = entry->data;
+	char cmd[32];
+	size_t size;
+	u32 offset;
+	int cpu;
+
+	size = sizeof(cmd);
+	if (count < size)
+		size = count;
+	if (copy_from_user(cmd, buffer, size))
+		return -EFAULT;
+
+	if (sscanf(cmd, "read %d", &cpu) == 1) {
+		salinfo_log_new_read(cpu, data);
+	} else if (sscanf(cmd, "clear %d", &cpu) == 1) {
+		int ret;
+		if ((ret = salinfo_log_clear(data, cpu)))
+			count = ret;
+	} else if (sscanf(cmd, "oemdata %d %d", &cpu, &offset) == 2) {
+		if (data->state != STATE_LOG_RECORD && data->state != STATE_OEMDATA)
+			return -EINVAL;
+		if (offset > data->log_size - sizeof(efi_guid_t))
+			return -EINVAL;
+		data->state = STATE_OEMDATA;
+		if (salinfo_platform_oemdata) {
+			struct salinfo_platform_oemdata_parms parms = {
+				.efi_guid = data->log_buffer + offset,
+				.oemdata = &data->oemdata,
+				.oemdata_size = &data->oemdata_size
+			};
+			call_on_cpu(cpu, salinfo_platform_oemdata_cpu, &parms);
+			if (parms.ret)
+				count = parms.ret;
+		} else
+			data->oemdata_size = 0;
+	} else
+		return -EINVAL;
+
+	return count;
+}
+
+static struct file_operations salinfo_data_fops = {
+	.open    = salinfo_log_open,
+	.release = salinfo_log_release,
+	.read    = salinfo_log_read,
+	.write   = salinfo_log_write,
+};
+
+static int __init
+salinfo_init(void)
+{
+	struct proc_dir_entry *salinfo_dir; /* /proc/sal dir entry */
+	struct proc_dir_entry **sdir = salinfo_proc_entries; /* keeps track of every entry */
+	struct proc_dir_entry *dir, *entry;
+	struct salinfo_data *data;
+	int i, j, online;
+
+	salinfo_dir = proc_mkdir("sal", NULL);
+	if (!salinfo_dir)
+		return 0;
+
+	for (i=0; i < NR_SALINFO_ENTRIES; i++) {
+		/* pass the feature bit in question as misc data */
+		*sdir++ = create_proc_read_entry (salinfo_entries[i].name, 0, salinfo_dir,
+						  salinfo_read, (void *)salinfo_entries[i].feature);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(salinfo_log_name); i++) {
+		data = salinfo_data + i;
+		data->type = i;
+		sema_init(&data->sem, 0);
+		dir = proc_mkdir(salinfo_log_name[i], salinfo_dir);
+		if (!dir)
+			continue;
+
+		entry = create_proc_entry("event", S_IRUSR, dir);
+		if (!entry)
+			continue;
+		entry->data = data;
+		entry->proc_fops = &salinfo_event_fops;
+		*sdir++ = entry;
+
+		entry = create_proc_entry("data", S_IRUSR | S_IWUSR, dir);
+		if (!entry)
+			continue;
+		entry->data = data;
+		entry->proc_fops = &salinfo_data_fops;
+		*sdir++ = entry;
+
+		/* we missed any events before now */
+		online = 0;
+		for (j = 0; j < NR_CPUS; j++)
+			if (cpu_online(j)) {
+				set_bit(j, &data->cpu_event);
+				++online;
+			}
+		sema_init(&data->sem, online);
+
+		*sdir++ = dir;
+	}
+
+	*sdir++ = salinfo_dir;
+
+	init_timer(&salinfo_timer);
+	salinfo_timer.expires = jiffies + SALINFO_TIMER_DELAY;
+	salinfo_timer.function = &salinfo_timeout;
+	add_timer(&salinfo_timer);
+
+	return 0;
+}
+
+/*
+ * 'data' contains an integer that corresponds to the feature we're
+ * testing
+ */
+static int
+salinfo_read(char *page, char **start, off_t off, int count, int *eof, void *data)
+{
+	int len = 0;
+
+	len = sprintf(page, (sal_platform_features & (unsigned long)data) ? "1\n" : "0\n");
+
+	if (len <= off+count) *eof = 1;
+
+	*start = page + off;
+	len   -= off;
+
+	if (len>count) len = count;
+	if (len<0) len = 0;
+
+	return len;
+}
+
+module_init(salinfo_init);
--- a/arch/ia64/kernel/semaphore.c
+++ b/arch/ia64/kernel/semaphore.c
@@ -0,0 +1,165 @@
+/*
+ * IA-64 semaphore implementation (derived from x86 version).
+ *
+ * Copyright (C) 1999-2000, 2002 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+/*
+ * Semaphores are implemented using a two-way counter: The "count"
+ * variable is decremented for each process that tries to acquire the
+ * semaphore, while the "sleepers" variable is a count of such
+ * acquires.
+ *
+ * Notably, the inline "up()" and "down()" functions can efficiently
+ * test if they need to do any extra work (up needs to do something
+ * only if count was negative before the increment operation.
+ *
+ * "sleeping" and the contention routine ordering is protected
+ * by the spinlock in the semaphore's waitqueue head.
+ *
+ * Note that these functions are only called when there is contention
+ * on the lock, and as such all this is the "non-critical" part of the
+ * whole semaphore business. The critical part is the inline stuff in
+ * <asm/semaphore.h> where we want to avoid any extra jumps and calls.
+ */
+#include <linux/sched.h>
+#include <linux/init.h>
+
+#include <asm/errno.h>
+#include <asm/semaphore.h>
+
+/*
+ * Logic:
+ *  - Only on a boundary condition do we need to care. When we go
+ *    from a negative count to a non-negative, we wake people up.
+ *  - When we go from a non-negative count to a negative do we
+ *    (a) synchronize with the "sleepers" count and (b) make sure
+ *    that we're on the wakeup list before we synchronize so that
+ *    we cannot lose wakeup events.
+ */
+
+void
+__up (struct semaphore *sem)
+{
+	wake_up(&sem->wait);
+}
+
+void __sched __down (struct semaphore *sem)
+{
+	struct task_struct *tsk = current;
+	DECLARE_WAITQUEUE(wait, tsk);
+	unsigned long flags;
+
+	tsk->state = TASK_UNINTERRUPTIBLE;
+	spin_lock_irqsave(&sem->wait.lock, flags);
+	add_wait_queue_exclusive_locked(&sem->wait, &wait);
+
+	sem->sleepers++;
+	for (;;) {
+		int sleepers = sem->sleepers;
+
+		/*
+		 * Add "everybody else" into it. They aren't
+		 * playing, because we own the spinlock in
+		 * the wait_queue_head.
+		 */
+		if (!atomic_add_negative(sleepers - 1, &sem->count)) {
+			sem->sleepers = 0;
+			break;
+		}
+		sem->sleepers = 1;	/* us - see -1 above */
+		spin_unlock_irqrestore(&sem->wait.lock, flags);
+
+		schedule();
+
+		spin_lock_irqsave(&sem->wait.lock, flags);
+		tsk->state = TASK_UNINTERRUPTIBLE;
+	}
+	remove_wait_queue_locked(&sem->wait, &wait);
+	wake_up_locked(&sem->wait);
+	spin_unlock_irqrestore(&sem->wait.lock, flags);
+	tsk->state = TASK_RUNNING;
+}
+
+int __sched __down_interruptible (struct semaphore * sem)
+{
+	int retval = 0;
+	struct task_struct *tsk = current;
+	DECLARE_WAITQUEUE(wait, tsk);
+	unsigned long flags;
+
+	tsk->state = TASK_INTERRUPTIBLE;
+	spin_lock_irqsave(&sem->wait.lock, flags);
+	add_wait_queue_exclusive_locked(&sem->wait, &wait);
+
+	sem->sleepers ++;
+	for (;;) {
+		int sleepers = sem->sleepers;
+
+		/*
+		 * With signals pending, this turns into
+		 * the trylock failure case - we won't be
+		 * sleeping, and we* can't get the lock as
+		 * it has contention. Just correct the count
+		 * and exit.
+		 */
+		if (signal_pending(current)) {
+			retval = -EINTR;
+			sem->sleepers = 0;
+			atomic_add(sleepers, &sem->count);
+			break;
+		}
+
+		/*
+		 * Add "everybody else" into it. They aren't
+		 * playing, because we own the spinlock in
+		 * wait_queue_head. The "-1" is because we're
+		 * still hoping to get the semaphore.
+		 */
+		if (!atomic_add_negative(sleepers - 1, &sem->count)) {
+			sem->sleepers = 0;
+			break;
+		}
+		sem->sleepers = 1;	/* us - see -1 above */
+		spin_unlock_irqrestore(&sem->wait.lock, flags);
+
+		schedule();
+
+		spin_lock_irqsave(&sem->wait.lock, flags);
+		tsk->state = TASK_INTERRUPTIBLE;
+	}
+	remove_wait_queue_locked(&sem->wait, &wait);
+	wake_up_locked(&sem->wait);
+	spin_unlock_irqrestore(&sem->wait.lock, flags);
+
+	tsk->state = TASK_RUNNING;
+	return retval;
+}
+
+/*
+ * Trylock failed - make sure we correct for having decremented the
+ * count.
+ */
+int
+__down_trylock (struct semaphore *sem)
+{
+	unsigned long flags;
+	int sleepers;
+
+	spin_lock_irqsave(&sem->wait.lock, flags);
+	sleepers = sem->sleepers + 1;
+	sem->sleepers = 0;
+
+	/*
+	 * Add "everybody else" and us into it. They aren't
+	 * playing, because we own the spinlock in the
+	 * wait_queue_head.
+	 */
+	if (!atomic_add_negative(sleepers, &sem->count)) {
+		wake_up_locked(&sem->wait);
+	}
+
+	spin_unlock_irqrestore(&sem->wait.lock, flags);
+	return 1;
+}
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -0,0 +1,723 @@
+/*
+ * Architecture-specific setup.
+ *
+ * Copyright (C) 1998-2001, 2003-2004 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ * Copyright (C) 2000, Rohit Seth <rohit.seth@intel.com>
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ *
+ * 11/12/01 D.Mosberger Convert get_cpuinfo() to seq_file based show_cpuinfo().
+ * 04/04/00 D.Mosberger renamed cpu_initialized to cpu_online_map
+ * 03/31/00 R.Seth	cpu_initialized and current->processor fixes
+ * 02/04/00 D.Mosberger	some more get_cpuinfo fixes...
+ * 02/01/00 R.Seth	fixed get_cpuinfo for SMP
+ * 01/07/99 S.Eranian	added the support for command line argument
+ * 06/24/99 W.Drummond	added boot_cpu_data.
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <linux/acpi.h>
+#include <linux/bootmem.h>
+#include <linux/console.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/reboot.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/string.h>
+#include <linux/threads.h>
+#include <linux/tty.h>
+#include <linux/serial.h>
+#include <linux/serial_core.h>
+#include <linux/efi.h>
+#include <linux/initrd.h>
+
+#include <asm/ia32.h>
+#include <asm/machvec.h>
+#include <asm/mca.h>
+#include <asm/meminit.h>
+#include <asm/page.h>
+#include <asm/patch.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/sal.h>
+#include <asm/sections.h>
+#include <asm/serial.h>
+#include <asm/setup.h>
+#include <asm/smp.h>
+#include <asm/system.h>
+#include <asm/unistd.h>
+
+#if defined(CONFIG_SMP) && (IA64_CPU_SIZE > PAGE_SIZE)
+# error "struct cpuinfo_ia64 too big!"
+#endif
+
+#ifdef CONFIG_SMP
+unsigned long __per_cpu_offset[NR_CPUS];
+EXPORT_SYMBOL(__per_cpu_offset);
+#endif
+
+DEFINE_PER_CPU(struct cpuinfo_ia64, cpu_info);
+DEFINE_PER_CPU(unsigned long, local_per_cpu_offset);
+DEFINE_PER_CPU(unsigned long, ia64_phys_stacked_size_p8);
+unsigned long ia64_cycles_per_usec;
+struct ia64_boot_param *ia64_boot_param;
+struct screen_info screen_info;
+
+unsigned long ia64_max_cacheline_size;
+unsigned long ia64_iobase;	/* virtual address for I/O accesses */
+EXPORT_SYMBOL(ia64_iobase);
+struct io_space io_space[MAX_IO_SPACES];
+EXPORT_SYMBOL(io_space);
+unsigned int num_io_spaces;
+
+/*
+ * The merge_mask variable needs to be set to (max(iommu_page_size(iommu)) - 1).  This
+ * mask specifies a mask of address bits that must be 0 in order for two buffers to be
+ * mergeable by the I/O MMU (i.e., the end address of the first buffer and the start
+ * address of the second buffer must be aligned to (merge_mask+1) in order to be
+ * mergeable).  By default, we assume there is no I/O MMU which can merge physically
+ * discontiguous buffers, so we set the merge_mask to ~0UL, which corresponds to a iommu
+ * page-size of 2^64.
+ */
+unsigned long ia64_max_iommu_merge_mask = ~0UL;
+EXPORT_SYMBOL(ia64_max_iommu_merge_mask);
+
+/*
+ * We use a special marker for the end of memory and it uses the extra (+1) slot
+ */
+struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1];
+int num_rsvd_regions;
+
+
+/*
+ * Filter incoming memory segments based on the primitive map created from the boot
+ * parameters. Segments contained in the map are removed from the memory ranges. A
+ * caller-specified function is called with the memory ranges that remain after filtering.
+ * This routine does not assume the incoming segments are sorted.
+ */
+int
+filter_rsvd_memory (unsigned long start, unsigned long end, void *arg)
+{
+	unsigned long range_start, range_end, prev_start;
+	void (*func)(unsigned long, unsigned long, int);
+	int i;
+
+#if IGNORE_PFN0
+	if (start == PAGE_OFFSET) {
+		printk(KERN_WARNING "warning: skipping physical page 0\n");
+		start += PAGE_SIZE;
+		if (start >= end) return 0;
+	}
+#endif
+	/*
+	 * lowest possible address(walker uses virtual)
+	 */
+	prev_start = PAGE_OFFSET;
+	func = arg;
+
+	for (i = 0; i < num_rsvd_regions; ++i) {
+		range_start = max(start, prev_start);
+		range_end   = min(end, rsvd_region[i].start);
+
+		if (range_start < range_end)
+			call_pernode_memory(__pa(range_start), range_end - range_start, func);
+
+		/* nothing more available in this segment */
+		if (range_end == end) return 0;
+
+		prev_start = rsvd_region[i].end;
+	}
+	/* end of memory marker allows full processing inside loop body */
+	return 0;
+}
+
+static void
+sort_regions (struct rsvd_region *rsvd_region, int max)
+{
+	int j;
+
+	/* simple bubble sorting */
+	while (max--) {
+		for (j = 0; j < max; ++j) {
+			if (rsvd_region[j].start > rsvd_region[j+1].start) {
+				struct rsvd_region tmp;
+				tmp = rsvd_region[j];
+				rsvd_region[j] = rsvd_region[j + 1];
+				rsvd_region[j + 1] = tmp;
+			}
+		}
+	}
+}
+
+/**
+ * reserve_memory - setup reserved memory areas
+ *
+ * Setup the reserved memory areas set aside for the boot parameters,
+ * initrd, etc.  There are currently %IA64_MAX_RSVD_REGIONS defined,
+ * see include/asm-ia64/meminit.h if you need to define more.
+ */
+void
+reserve_memory (void)
+{
+	int n = 0;
+
+	/*
+	 * none of the entries in this table overlap
+	 */
+	rsvd_region[n].start = (unsigned long) ia64_boot_param;
+	rsvd_region[n].end   = rsvd_region[n].start + sizeof(*ia64_boot_param);
+	n++;
+
+	rsvd_region[n].start = (unsigned long) __va(ia64_boot_param->efi_memmap);
+	rsvd_region[n].end   = rsvd_region[n].start + ia64_boot_param->efi_memmap_size;
+	n++;
+
+	rsvd_region[n].start = (unsigned long) __va(ia64_boot_param->command_line);
+	rsvd_region[n].end   = (rsvd_region[n].start
+				+ strlen(__va(ia64_boot_param->command_line)) + 1);
+	n++;
+
+	rsvd_region[n].start = (unsigned long) ia64_imva((void *)KERNEL_START);
+	rsvd_region[n].end   = (unsigned long) ia64_imva(_end);
+	n++;
+
+#ifdef CONFIG_BLK_DEV_INITRD
+	if (ia64_boot_param->initrd_start) {
+		rsvd_region[n].start = (unsigned long)__va(ia64_boot_param->initrd_start);
+		rsvd_region[n].end   = rsvd_region[n].start + ia64_boot_param->initrd_size;
+		n++;
+	}
+#endif
+
+	/* end of memory marker */
+	rsvd_region[n].start = ~0UL;
+	rsvd_region[n].end   = ~0UL;
+	n++;
+
+	num_rsvd_regions = n;
+
+	sort_regions(rsvd_region, num_rsvd_regions);
+}
+
+/**
+ * find_initrd - get initrd parameters from the boot parameter structure
+ *
+ * Grab the initrd start and end from the boot parameter struct given us by
+ * the boot loader.
+ */
+void
+find_initrd (void)
+{
+#ifdef CONFIG_BLK_DEV_INITRD
+	if (ia64_boot_param->initrd_start) {
+		initrd_start = (unsigned long)__va(ia64_boot_param->initrd_start);
+		initrd_end   = initrd_start+ia64_boot_param->initrd_size;
+
+		printk(KERN_INFO "Initial ramdisk at: 0x%lx (%lu bytes)\n",
+		       initrd_start, ia64_boot_param->initrd_size);
+	}
+#endif
+}
+
+static void __init
+io_port_init (void)
+{
+	extern unsigned long ia64_iobase;
+	unsigned long phys_iobase;
+
+	/*
+	 *  Set `iobase' to the appropriate address in region 6 (uncached access range).
+	 *
+	 *  The EFI memory map is the "preferred" location to get the I/O port space base,
+	 *  rather the relying on AR.KR0. This should become more clear in future SAL
+	 *  specs. We'll fall back to getting it out of AR.KR0 if no appropriate entry is
+	 *  found in the memory map.
+	 */
+	phys_iobase = efi_get_iobase();
+	if (phys_iobase)
+		/* set AR.KR0 since this is all we use it for anyway */
+		ia64_set_kr(IA64_KR_IO_BASE, phys_iobase);
+	else {
+		phys_iobase = ia64_get_kr(IA64_KR_IO_BASE);
+		printk(KERN_INFO "No I/O port range found in EFI memory map, falling back "
+		       "to AR.KR0\n");
+		printk(KERN_INFO "I/O port base = 0x%lx\n", phys_iobase);
+	}
+	ia64_iobase = (unsigned long) ioremap(phys_iobase, 0);
+
+	/* setup legacy IO port space */
+	io_space[0].mmio_base = ia64_iobase;
+	io_space[0].sparse = 1;
+	num_io_spaces = 1;
+}
+
+/**
+ * early_console_setup - setup debugging console
+ *
+ * Consoles started here require little enough setup that we can start using
+ * them very early in the boot process, either right after the machine
+ * vector initialization, or even before if the drivers can detect their hw.
+ *
+ * Returns non-zero if a console couldn't be setup.
+ */
+static inline int __init
+early_console_setup (char *cmdline)
+{
+#ifdef CONFIG_SERIAL_SGI_L1_CONSOLE
+	{
+		extern int sn_serial_console_early_setup(void);
+		if (!sn_serial_console_early_setup())
+			return 0;
+	}
+#endif
+#ifdef CONFIG_EFI_PCDP
+	if (!efi_setup_pcdp_console(cmdline))
+		return 0;
+#endif
+#ifdef CONFIG_SERIAL_8250_CONSOLE
+	if (!early_serial_console_init(cmdline))
+		return 0;
+#endif
+
+	return -1;
+}
+
+static inline void
+mark_bsp_online (void)
+{
+#ifdef CONFIG_SMP
+	/* If we register an early console, allow CPU 0 to printk */
+	cpu_set(smp_processor_id(), cpu_online_map);
+#endif
+}
+
+void __init
+setup_arch (char **cmdline_p)
+{
+	unw_init();
+
+	ia64_patch_vtop((u64) __start___vtop_patchlist, (u64) __end___vtop_patchlist);
+
+	*cmdline_p = __va(ia64_boot_param->command_line);
+	strlcpy(saved_command_line, *cmdline_p, COMMAND_LINE_SIZE);
+
+	efi_init();
+	io_port_init();
+
+#ifdef CONFIG_IA64_GENERIC
+	{
+		const char *mvec_name = strstr (*cmdline_p, "machvec=");
+		char str[64];
+
+		if (mvec_name) {
+			const char *end;
+			size_t len;
+
+			mvec_name += 8;
+			end = strchr (mvec_name, ' ');
+			if (end)
+				len = end - mvec_name;
+			else
+				len = strlen (mvec_name);
+			len = min(len, sizeof (str) - 1);
+			strncpy (str, mvec_name, len);
+			str[len] = '\0';
+			mvec_name = str;
+		} else
+			mvec_name = acpi_get_sysname();
+		machvec_init(mvec_name);
+	}
+#endif
+
+	if (early_console_setup(*cmdline_p) == 0)
+		mark_bsp_online();
+
+#ifdef CONFIG_ACPI_BOOT
+	/* Initialize the ACPI boot-time table parser */
+	acpi_table_init();
+# ifdef CONFIG_ACPI_NUMA
+	acpi_numa_init();
+# endif
+#else
+# ifdef CONFIG_SMP
+	smp_build_cpu_map();	/* happens, e.g., with the Ski simulator */
+# endif
+#endif /* CONFIG_APCI_BOOT */
+
+	find_memory();
+
+	/* process SAL system table: */
+	ia64_sal_init(efi.sal_systab);
+
+#ifdef CONFIG_SMP
+	cpu_physical_id(0) = hard_smp_processor_id();
+#endif
+
+	cpu_init();	/* initialize the bootstrap CPU */
+
+#ifdef CONFIG_ACPI_BOOT
+	acpi_boot_init();
+#endif
+
+#ifdef CONFIG_VT
+	if (!conswitchp) {
+# if defined(CONFIG_DUMMY_CONSOLE)
+		conswitchp = &dummy_con;
+# endif
+# if defined(CONFIG_VGA_CONSOLE)
+		/*
+		 * Non-legacy systems may route legacy VGA MMIO range to system
+		 * memory.  vga_con probes the MMIO hole, so memory looks like
+		 * a VGA device to it.  The EFI memory map can tell us if it's
+		 * memory so we can avoid this problem.
+		 */
+		if (efi_mem_type(0xA0000) != EFI_CONVENTIONAL_MEMORY)
+			conswitchp = &vga_con;
+# endif
+	}
+#endif
+
+	/* enable IA-64 Machine Check Abort Handling unless disabled */
+	if (!strstr(saved_command_line, "nomca"))
+		ia64_mca_init();
+
+	platform_setup(cmdline_p);
+	paging_init();
+}
+
+/*
+ * Display cpu info for all cpu's.
+ */
+static int
+show_cpuinfo (struct seq_file *m, void *v)
+{
+#ifdef CONFIG_SMP
+#	define lpj	c->loops_per_jiffy
+#	define cpunum	c->cpu
+#else
+#	define lpj	loops_per_jiffy
+#	define cpunum	0
+#endif
+	static struct {
+		unsigned long mask;
+		const char *feature_name;
+	} feature_bits[] = {
+		{ 1UL << 0, "branchlong" },
+		{ 1UL << 1, "spontaneous deferral"},
+		{ 1UL << 2, "16-byte atomic ops" }
+	};
+	char family[32], features[128], *cp, sep;
+	struct cpuinfo_ia64 *c = v;
+	unsigned long mask;
+	int i;
+
+	mask = c->features;
+
+	switch (c->family) {
+	      case 0x07:	memcpy(family, "Itanium", 8); break;
+	      case 0x1f:	memcpy(family, "Itanium 2", 10); break;
+	      default:		sprintf(family, "%u", c->family); break;
+	}
+
+	/* build the feature string: */
+	memcpy(features, " standard", 10);
+	cp = features;
+	sep = 0;
+	for (i = 0; i < (int) ARRAY_SIZE(feature_bits); ++i) {
+		if (mask & feature_bits[i].mask) {
+			if (sep)
+				*cp++ = sep;
+			sep = ',';
+			*cp++ = ' ';
+			strcpy(cp, feature_bits[i].feature_name);
+			cp += strlen(feature_bits[i].feature_name);
+			mask &= ~feature_bits[i].mask;
+		}
+	}
+	if (mask) {
+		/* print unknown features as a hex value: */
+		if (sep)
+			*cp++ = sep;
+		sprintf(cp, " 0x%lx", mask);
+	}
+
+	seq_printf(m,
+		   "processor  : %d\n"
+		   "vendor     : %s\n"
+		   "arch       : IA-64\n"
+		   "family     : %s\n"
+		   "model      : %u\n"
+		   "revision   : %u\n"
+		   "archrev    : %u\n"
+		   "features   :%s\n"	/* don't change this---it _is_ right! */
+		   "cpu number : %lu\n"
+		   "cpu regs   : %u\n"
+		   "cpu MHz    : %lu.%06lu\n"
+		   "itc MHz    : %lu.%06lu\n"
+		   "BogoMIPS   : %lu.%02lu\n\n",
+		   cpunum, c->vendor, family, c->model, c->revision, c->archrev,
+		   features, c->ppn, c->number,
+		   c->proc_freq / 1000000, c->proc_freq % 1000000,
+		   c->itc_freq / 1000000, c->itc_freq % 1000000,
+		   lpj*HZ/500000, (lpj*HZ/5000) % 100);
+	return 0;
+}
+
+static void *
+c_start (struct seq_file *m, loff_t *pos)
+{
+#ifdef CONFIG_SMP
+	while (*pos < NR_CPUS && !cpu_isset(*pos, cpu_online_map))
+		++*pos;
+#endif
+	return *pos < NR_CPUS ? cpu_data(*pos) : NULL;
+}
+
+static void *
+c_next (struct seq_file *m, void *v, loff_t *pos)
+{
+	++*pos;
+	return c_start(m, pos);
+}
+
+static void
+c_stop (struct seq_file *m, void *v)
+{
+}
+
+struct seq_operations cpuinfo_op = {
+	.start =	c_start,
+	.next =		c_next,
+	.stop =		c_stop,
+	.show =		show_cpuinfo
+};
+
+void
+identify_cpu (struct cpuinfo_ia64 *c)
+{
+	union {
+		unsigned long bits[5];
+		struct {
+			/* id 0 & 1: */
+			char vendor[16];
+
+			/* id 2 */
+			u64 ppn;		/* processor serial number */
+
+			/* id 3: */
+			unsigned number		:  8;
+			unsigned revision	:  8;
+			unsigned model		:  8;
+			unsigned family		:  8;
+			unsigned archrev	:  8;
+			unsigned reserved	: 24;
+
+			/* id 4: */
+			u64 features;
+		} field;
+	} cpuid;
+	pal_vm_info_1_u_t vm1;
+	pal_vm_info_2_u_t vm2;
+	pal_status_t status;
+	unsigned long impl_va_msb = 50, phys_addr_size = 44;	/* Itanium defaults */
+	int i;
+
+	for (i = 0; i < 5; ++i)
+		cpuid.bits[i] = ia64_get_cpuid(i);
+
+	memcpy(c->vendor, cpuid.field.vendor, 16);
+#ifdef CONFIG_SMP
+	c->cpu = smp_processor_id();
+#endif
+	c->ppn = cpuid.field.ppn;
+	c->number = cpuid.field.number;
+	c->revision = cpuid.field.revision;
+	c->model = cpuid.field.model;
+	c->family = cpuid.field.family;
+	c->archrev = cpuid.field.archrev;
+	c->features = cpuid.field.features;
+
+	status = ia64_pal_vm_summary(&vm1, &vm2);
+	if (status == PAL_STATUS_SUCCESS) {
+		impl_va_msb = vm2.pal_vm_info_2_s.impl_va_msb;
+		phys_addr_size = vm1.pal_vm_info_1_s.phys_add_size;
+	}
+	c->unimpl_va_mask = ~((7L<<61) | ((1L << (impl_va_msb + 1)) - 1));
+	c->unimpl_pa_mask = ~((1L<<63) | ((1L << phys_addr_size) - 1));
+}
+
+void
+setup_per_cpu_areas (void)
+{
+	/* start_kernel() requires this... */
+}
+
+static void
+get_max_cacheline_size (void)
+{
+	unsigned long line_size, max = 1;
+	u64 l, levels, unique_caches;
+        pal_cache_config_info_t cci;
+        s64 status;
+
+        status = ia64_pal_cache_summary(&levels, &unique_caches);
+        if (status != 0) {
+                printk(KERN_ERR "%s: ia64_pal_cache_summary() failed (status=%ld)\n",
+                       __FUNCTION__, status);
+                max = SMP_CACHE_BYTES;
+		goto out;
+        }
+
+	for (l = 0; l < levels; ++l) {
+		status = ia64_pal_cache_config_info(l, /* cache_type (data_or_unified)= */ 2,
+						    &cci);
+		if (status != 0) {
+			printk(KERN_ERR
+			       "%s: ia64_pal_cache_config_info(l=%lu) failed (status=%ld)\n",
+			       __FUNCTION__, l, status);
+			max = SMP_CACHE_BYTES;
+		}
+		line_size = 1 << cci.pcci_line_size;
+		if (line_size > max)
+			max = line_size;
+        }
+  out:
+	if (max > ia64_max_cacheline_size)
+		ia64_max_cacheline_size = max;
+}
+
+/*
+ * cpu_init() initializes state that is per-CPU.  This function acts
+ * as a 'CPU state barrier', nothing should get across.
+ */
+void
+cpu_init (void)
+{
+	extern void __devinit ia64_mmu_init (void *);
+	unsigned long num_phys_stacked;
+	pal_vm_info_2_u_t vmi;
+	unsigned int max_ctx;
+	struct cpuinfo_ia64 *cpu_info;
+	void *cpu_data;
+
+	cpu_data = per_cpu_init();
+
+	/*
+	 * We set ar.k3 so that assembly code in MCA handler can compute
+	 * physical addresses of per cpu variables with a simple:
+	 *   phys = ar.k3 + &per_cpu_var
+	 */
+	ia64_set_kr(IA64_KR_PER_CPU_DATA,
+		    ia64_tpa(cpu_data) - (long) __per_cpu_start);
+
+	get_max_cacheline_size();
+
+	/*
+	 * We can't pass "local_cpu_data" to identify_cpu() because we haven't called
+	 * ia64_mmu_init() yet.  And we can't call ia64_mmu_init() first because it
+	 * depends on the data returned by identify_cpu().  We break the dependency by
+	 * accessing cpu_data() through the canonical per-CPU address.
+	 */
+	cpu_info = cpu_data + ((char *) &__ia64_per_cpu_var(cpu_info) - __per_cpu_start);
+	identify_cpu(cpu_info);
+
+#ifdef CONFIG_MCKINLEY
+	{
+#		define FEATURE_SET 16
+		struct ia64_pal_retval iprv;
+
+		if (cpu_info->family == 0x1f) {
+			PAL_CALL_PHYS(iprv, PAL_PROC_GET_FEATURES, 0, FEATURE_SET, 0);
+			if ((iprv.status == 0) && (iprv.v0 & 0x80) && (iprv.v2 & 0x80))
+				PAL_CALL_PHYS(iprv, PAL_PROC_SET_FEATURES,
+				              (iprv.v1 | 0x80), FEATURE_SET, 0);
+		}
+	}
+#endif
+
+	/* Clear the stack memory reserved for pt_regs: */
+	memset(ia64_task_regs(current), 0, sizeof(struct pt_regs));
+
+	ia64_set_kr(IA64_KR_FPU_OWNER, 0);
+
+	/*
+	 * Initialize the page-table base register to a global
+	 * directory with all zeroes.  This ensure that we can handle
+	 * TLB-misses to user address-space even before we created the
+	 * first user address-space.  This may happen, e.g., due to
+	 * aggressive use of lfetch.fault.
+	 */
+	ia64_set_kr(IA64_KR_PT_BASE, __pa(ia64_imva(empty_zero_page)));
+
+	/*
+	 * Initialize default control register to defer all speculative faults.  The
+	 * kernel MUST NOT depend on a particular setting of these bits (in other words,
+	 * the kernel must have recovery code for all speculative accesses).  Turn on
+	 * dcr.lc as per recommendation by the architecture team.  Most IA-32 apps
+	 * shouldn't be affected by this (moral: keep your ia32 locks aligned and you'll
+	 * be fine).
+	 */
+	ia64_setreg(_IA64_REG_CR_DCR,  (  IA64_DCR_DP | IA64_DCR_DK | IA64_DCR_DX | IA64_DCR_DR
+					| IA64_DCR_DA | IA64_DCR_DD | IA64_DCR_LC));
+	atomic_inc(&init_mm.mm_count);
+	current->active_mm = &init_mm;
+	if (current->mm)
+		BUG();
+
+	ia64_mmu_init(ia64_imva(cpu_data));
+	ia64_mca_cpu_init(ia64_imva(cpu_data));
+
+#ifdef CONFIG_IA32_SUPPORT
+	ia32_cpu_init();
+#endif
+
+	/* Clear ITC to eliminiate sched_clock() overflows in human time.  */
+	ia64_set_itc(0);
+
+	/* disable all local interrupt sources: */
+	ia64_set_itv(1 << 16);
+	ia64_set_lrr0(1 << 16);
+	ia64_set_lrr1(1 << 16);
+	ia64_setreg(_IA64_REG_CR_PMV, 1 << 16);
+	ia64_setreg(_IA64_REG_CR_CMCV, 1 << 16);
+
+	/* clear TPR & XTP to enable all interrupt classes: */
+	ia64_setreg(_IA64_REG_CR_TPR, 0);
+#ifdef CONFIG_SMP
+	normal_xtp();
+#endif
+
+	/* set ia64_ctx.max_rid to the maximum RID that is supported by all CPUs: */
+	if (ia64_pal_vm_summary(NULL, &vmi) == 0)
+		max_ctx = (1U << (vmi.pal_vm_info_2_s.rid_size - 3)) - 1;
+	else {
+		printk(KERN_WARNING "cpu_init: PAL VM summary failed, assuming 18 RID bits\n");
+		max_ctx = (1U << 15) - 1;	/* use architected minimum */
+	}
+	while (max_ctx < ia64_ctx.max_ctx) {
+		unsigned int old = ia64_ctx.max_ctx;
+		if (cmpxchg(&ia64_ctx.max_ctx, old, max_ctx) == old)
+			break;
+	}
+
+	if (ia64_pal_rse_info(&num_phys_stacked, NULL) != 0) {
+		printk(KERN_WARNING "cpu_init: PAL RSE info failed; assuming 96 physical "
+		       "stacked regs\n");
+		num_phys_stacked = 96;
+	}
+	/* size of physical stacked register partition plus 8 bytes: */
+	__get_cpu_var(ia64_phys_stacked_size_p8) = num_phys_stacked*8 + 8;
+	platform_cpu_init();
+}
+
+void
+check_bugs (void)
+{
+	ia64_patch_mckinley_e9((unsigned long) __start___mckinley_e9_bundles,
+			       (unsigned long) __end___mckinley_e9_bundles);
+}
--- a/arch/ia64/kernel/sigframe.h
+++ b/arch/ia64/kernel/sigframe.h
@@ -0,0 +1,25 @@
+struct sigscratch {
+	unsigned long scratch_unat;	/* ar.unat for the general registers saved in pt */
+	unsigned long ar_pfs;		/* for syscalls, the user-level function-state  */
+	struct pt_regs pt;
+};
+
+struct sigframe {
+	/*
+	 * Place signal handler args where user-level unwinder can find them easily.
+	 * DO NOT MOVE THESE.  They are part of the IA-64 Linux ABI and there is
+	 * user-level code that depends on their presence!
+	 */
+	unsigned long arg0;		/* signum */
+	unsigned long arg1;		/* siginfo pointer */
+	unsigned long arg2;		/* sigcontext pointer */
+	/*
+	 * End of architected state.
+	 */
+
+	void __user *handler;		/* pointer to the plabel of the signal handler */
+	struct siginfo info;
+	struct sigcontext sc;
+};
+
+extern long ia64_do_signal (sigset_t *, struct sigscratch *, long);
--- a/arch/ia64/kernel/signal.c
+++ b/arch/ia64/kernel/signal.c
@@ -0,0 +1,691 @@
+/*
+ * Architecture-specific signal handling support.
+ *
+ * Copyright (C) 1999-2004 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * Derived from i386 and Alpha versions.
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/ptrace.h>
+#include <linux/sched.h>
+#include <linux/signal.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/stddef.h>
+#include <linux/tty.h>
+#include <linux/binfmts.h>
+#include <linux/unistd.h>
+#include <linux/wait.h>
+
+#include <asm/ia32.h>
+#include <asm/intrinsics.h>
+#include <asm/uaccess.h>
+#include <asm/rse.h>
+#include <asm/sigcontext.h>
+
+#include "sigframe.h"
+
+#define DEBUG_SIG	0
+#define STACK_ALIGN	16		/* minimal alignment for stack pointer */
+#define _BLOCKABLE	(~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
+
+#if _NSIG_WORDS > 1
+# define PUT_SIGSET(k,u)	__copy_to_user((u)->sig, (k)->sig, sizeof(sigset_t))
+# define GET_SIGSET(k,u)	__copy_from_user((k)->sig, (u)->sig, sizeof(sigset_t))
+#else
+# define PUT_SIGSET(k,u)	__put_user((k)->sig[0], &(u)->sig[0])
+# define GET_SIGSET(k,u)	__get_user((k)->sig[0], &(u)->sig[0])
+#endif
+
+long
+ia64_rt_sigsuspend (sigset_t __user *uset, size_t sigsetsize, struct sigscratch *scr)
+{
+	sigset_t oldset, set;
+
+	/* XXX: Don't preclude handling different sized sigset_t's.  */
+	if (sigsetsize != sizeof(sigset_t))
+		return -EINVAL;
+
+	if (!access_ok(VERIFY_READ, uset, sigsetsize))
+		return -EFAULT;
+
+	if (GET_SIGSET(&set, uset))
+		return -EFAULT;
+
+	sigdelsetmask(&set, ~_BLOCKABLE);
+
+	spin_lock_irq(&current->sighand->siglock);
+	{
+		oldset = current->blocked;
+		current->blocked = set;
+		recalc_sigpending();
+	}
+	spin_unlock_irq(&current->sighand->siglock);
+
+	/*
+	 * The return below usually returns to the signal handler.  We need to
+	 * pre-set the correct error code here to ensure that the right values
+	 * get saved in sigcontext by ia64_do_signal.
+	 */
+	scr->pt.r8 = EINTR;
+	scr->pt.r10 = -1;
+
+	while (1) {
+		current->state = TASK_INTERRUPTIBLE;
+		schedule();
+		if (ia64_do_signal(&oldset, scr, 1))
+			return -EINTR;
+	}
+}
+
+asmlinkage long
+sys_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, long arg2,
+		 long arg3, long arg4, long arg5, long arg6, long arg7,
+		 struct pt_regs regs)
+{
+	return do_sigaltstack(uss, uoss, regs.r12);
+}
+
+static long
+restore_sigcontext (struct sigcontext __user *sc, struct sigscratch *scr)
+{
+	unsigned long ip, flags, nat, um, cfm;
+	long err;
+
+	/* Always make any pending restarted system calls return -EINTR */
+	current_thread_info()->restart_block.fn = do_no_restart_syscall;
+
+	/* restore scratch that always needs gets updated during signal delivery: */
+	err  = __get_user(flags, &sc->sc_flags);
+	err |= __get_user(nat, &sc->sc_nat);
+	err |= __get_user(ip, &sc->sc_ip);			/* instruction pointer */
+	err |= __get_user(cfm, &sc->sc_cfm);
+	err |= __get_user(um, &sc->sc_um);			/* user mask */
+	err |= __get_user(scr->pt.ar_rsc, &sc->sc_ar_rsc);
+	err |= __get_user(scr->pt.ar_unat, &sc->sc_ar_unat);
+	err |= __get_user(scr->pt.ar_fpsr, &sc->sc_ar_fpsr);
+	err |= __get_user(scr->pt.ar_pfs, &sc->sc_ar_pfs);
+	err |= __get_user(scr->pt.pr, &sc->sc_pr);		/* predicates */
+	err |= __get_user(scr->pt.b0, &sc->sc_br[0]);		/* b0 (rp) */
+	err |= __get_user(scr->pt.b6, &sc->sc_br[6]);		/* b6 */
+	err |= __copy_from_user(&scr->pt.r1, &sc->sc_gr[1], 8);	/* r1 */
+	err |= __copy_from_user(&scr->pt.r8, &sc->sc_gr[8], 4*8);	/* r8-r11 */
+	err |= __copy_from_user(&scr->pt.r12, &sc->sc_gr[12], 2*8);	/* r12-r13 */
+	err |= __copy_from_user(&scr->pt.r15, &sc->sc_gr[15], 8);	/* r15 */
+
+	scr->pt.cr_ifs = cfm | (1UL << 63);
+
+	/* establish new instruction pointer: */
+	scr->pt.cr_iip = ip & ~0x3UL;
+	ia64_psr(&scr->pt)->ri = ip & 0x3;
+	scr->pt.cr_ipsr = (scr->pt.cr_ipsr & ~IA64_PSR_UM) | (um & IA64_PSR_UM);
+
+	scr->scratch_unat = ia64_put_scratch_nat_bits(&scr->pt, nat);
+
+	if (!(flags & IA64_SC_FLAG_IN_SYSCALL)) {
+		/* Restore most scratch-state only when not in syscall. */
+		err |= __get_user(scr->pt.ar_ccv, &sc->sc_ar_ccv);		/* ar.ccv */
+		err |= __get_user(scr->pt.b7, &sc->sc_br[7]);			/* b7 */
+		err |= __get_user(scr->pt.r14, &sc->sc_gr[14]);			/* r14 */
+		err |= __copy_from_user(&scr->pt.ar_csd, &sc->sc_ar25, 2*8); /* ar.csd & ar.ssd */
+		err |= __copy_from_user(&scr->pt.r2, &sc->sc_gr[2], 2*8);	/* r2-r3 */
+		err |= __copy_from_user(&scr->pt.r16, &sc->sc_gr[16], 16*8);	/* r16-r31 */
+	}
+
+	if ((flags & IA64_SC_FLAG_FPH_VALID) != 0) {
+		struct ia64_psr *psr = ia64_psr(&scr->pt);
+
+		__copy_from_user(current->thread.fph, &sc->sc_fr[32], 96*16);
+		psr->mfh = 0;	/* drop signal handler's fph contents... */
+		if (psr->dfh)
+			ia64_drop_fpu(current);
+		else {
+			/* We already own the local fph, otherwise psr->dfh wouldn't be 0.  */
+			__ia64_load_fpu(current->thread.fph);
+			ia64_set_local_fpu_owner(current);
+		}
+	}
+	return err;
+}
+
+int
+copy_siginfo_to_user (siginfo_t __user *to, siginfo_t *from)
+{
+	if (!access_ok(VERIFY_WRITE, to, sizeof(siginfo_t)))
+		return -EFAULT;
+	if (from->si_code < 0) {
+		if (__copy_to_user(to, from, sizeof(siginfo_t)))
+			return -EFAULT;
+		return 0;
+	} else {
+		int err;
+
+		/*
+		 * If you change siginfo_t structure, please be sure this code is fixed
+		 * accordingly.  It should never copy any pad contained in the structure
+		 * to avoid security leaks, but must copy the generic 3 ints plus the
+		 * relevant union member.
+		 */
+		err = __put_user(from->si_signo, &to->si_signo);
+		err |= __put_user(from->si_errno, &to->si_errno);
+		err |= __put_user((short)from->si_code, &to->si_code);
+		switch (from->si_code >> 16) {
+		      case __SI_FAULT >> 16:
+			err |= __put_user(from->si_flags, &to->si_flags);
+			err |= __put_user(from->si_isr, &to->si_isr);
+		      case __SI_POLL >> 16:
+			err |= __put_user(from->si_addr, &to->si_addr);
+			err |= __put_user(from->si_imm, &to->si_imm);
+			break;
+		      case __SI_TIMER >> 16:
+			err |= __put_user(from->si_tid, &to->si_tid);
+			err |= __put_user(from->si_overrun, &to->si_overrun);
+			err |= __put_user(from->si_ptr, &to->si_ptr);
+			break;
+		      case __SI_RT >> 16:	/* Not generated by the kernel as of now.  */
+		      case __SI_MESGQ >> 16:
+			err |= __put_user(from->si_uid, &to->si_uid);
+			err |= __put_user(from->si_pid, &to->si_pid);
+			err |= __put_user(from->si_ptr, &to->si_ptr);
+			break;
+		      case __SI_CHLD >> 16:
+			err |= __put_user(from->si_utime, &to->si_utime);
+			err |= __put_user(from->si_stime, &to->si_stime);
+			err |= __put_user(from->si_status, &to->si_status);
+		      default:
+			err |= __put_user(from->si_uid, &to->si_uid);
+			err |= __put_user(from->si_pid, &to->si_pid);
+			break;
+		}
+		return err;
+	}
+}
+
+long
+ia64_rt_sigreturn (struct sigscratch *scr)
+{
+	extern char ia64_strace_leave_kernel, ia64_leave_kernel;
+	struct sigcontext __user *sc;
+	struct siginfo si;
+	sigset_t set;
+	long retval;
+
+	sc = &((struct sigframe __user *) (scr->pt.r12 + 16))->sc;
+
+	/*
+	 * When we return to the previously executing context, r8 and r10 have already
+	 * been setup the way we want them.  Indeed, if the signal wasn't delivered while
+	 * in a system call, we must not touch r8 or r10 as otherwise user-level state
+	 * could be corrupted.
+	 */
+	retval = (long) &ia64_leave_kernel;
+	if (test_thread_flag(TIF_SYSCALL_TRACE))
+		/*
+		 * strace expects to be notified after sigreturn returns even though the
+		 * context to which we return may not be in the middle of a syscall.
+		 * Thus, the return-value that strace displays for sigreturn is
+		 * meaningless.
+		 */
+		retval = (long) &ia64_strace_leave_kernel;
+
+	if (!access_ok(VERIFY_READ, sc, sizeof(*sc)))
+		goto give_sigsegv;
+
+	if (GET_SIGSET(&set, &sc->sc_mask))
+		goto give_sigsegv;
+
+	sigdelsetmask(&set, ~_BLOCKABLE);
+
+	spin_lock_irq(&current->sighand->siglock);
+	{
+		current->blocked = set;
+		recalc_sigpending();
+	}
+	spin_unlock_irq(&current->sighand->siglock);
+
+	if (restore_sigcontext(sc, scr))
+		goto give_sigsegv;
+
+#if DEBUG_SIG
+	printk("SIG return (%s:%d): sp=%lx ip=%lx\n",
+	       current->comm, current->pid, scr->pt.r12, scr->pt.cr_iip);
+#endif
+	/*
+	 * It is more difficult to avoid calling this function than to
+	 * call it and ignore errors.
+	 */
+	do_sigaltstack(&sc->sc_stack, NULL, scr->pt.r12);
+	return retval;
+
+  give_sigsegv:
+	si.si_signo = SIGSEGV;
+	si.si_errno = 0;
+	si.si_code = SI_KERNEL;
+	si.si_pid = current->pid;
+	si.si_uid = current->uid;
+	si.si_addr = sc;
+	force_sig_info(SIGSEGV, &si, current);
+	return retval;
+}
+
+/*
+ * This does just the minimum required setup of sigcontext.
+ * Specifically, it only installs data that is either not knowable at
+ * the user-level or that gets modified before execution in the
+ * trampoline starts.  Everything else is done at the user-level.
+ */
+static long
+setup_sigcontext (struct sigcontext __user *sc, sigset_t *mask, struct sigscratch *scr)
+{
+	unsigned long flags = 0, ifs, cfm, nat;
+	long err;
+
+	ifs = scr->pt.cr_ifs;
+
+	if (on_sig_stack((unsigned long) sc))
+		flags |= IA64_SC_FLAG_ONSTACK;
+	if ((ifs & (1UL << 63)) == 0)
+		/* if cr_ifs doesn't have the valid bit set, we got here through a syscall */
+		flags |= IA64_SC_FLAG_IN_SYSCALL;
+	cfm = ifs & ((1UL << 38) - 1);
+	ia64_flush_fph(current);
+	if ((current->thread.flags & IA64_THREAD_FPH_VALID)) {
+		flags |= IA64_SC_FLAG_FPH_VALID;
+		__copy_to_user(&sc->sc_fr[32], current->thread.fph, 96*16);
+	}
+
+	nat = ia64_get_scratch_nat_bits(&scr->pt, scr->scratch_unat);
+
+	err  = __put_user(flags, &sc->sc_flags);
+	err |= __put_user(nat, &sc->sc_nat);
+	err |= PUT_SIGSET(mask, &sc->sc_mask);
+	err |= __put_user(cfm, &sc->sc_cfm);
+	err |= __put_user(scr->pt.cr_ipsr & IA64_PSR_UM, &sc->sc_um);
+	err |= __put_user(scr->pt.ar_rsc, &sc->sc_ar_rsc);
+	err |= __put_user(scr->pt.ar_unat, &sc->sc_ar_unat);		/* ar.unat */
+	err |= __put_user(scr->pt.ar_fpsr, &sc->sc_ar_fpsr);		/* ar.fpsr */
+	err |= __put_user(scr->pt.ar_pfs, &sc->sc_ar_pfs);
+	err |= __put_user(scr->pt.pr, &sc->sc_pr);			/* predicates */
+	err |= __put_user(scr->pt.b0, &sc->sc_br[0]);			/* b0 (rp) */
+	err |= __put_user(scr->pt.b6, &sc->sc_br[6]);			/* b6 */
+	err |= __copy_to_user(&sc->sc_gr[1], &scr->pt.r1, 8);		/* r1 */
+	err |= __copy_to_user(&sc->sc_gr[8], &scr->pt.r8, 4*8);		/* r8-r11 */
+	err |= __copy_to_user(&sc->sc_gr[12], &scr->pt.r12, 2*8);	/* r12-r13 */
+	err |= __copy_to_user(&sc->sc_gr[15], &scr->pt.r15, 8);		/* r15 */
+	err |= __put_user(scr->pt.cr_iip + ia64_psr(&scr->pt)->ri, &sc->sc_ip);
+
+	if (flags & IA64_SC_FLAG_IN_SYSCALL) {
+		/* Clear scratch registers if the signal interrupted a system call. */
+		err |= __put_user(0, &sc->sc_ar_ccv);				/* ar.ccv */
+		err |= __put_user(0, &sc->sc_br[7]);				/* b7 */
+		err |= __put_user(0, &sc->sc_gr[14]);				/* r14 */
+		err |= __clear_user(&sc->sc_ar25, 2*8);			/* ar.csd & ar.ssd */
+		err |= __clear_user(&sc->sc_gr[2], 2*8);			/* r2-r3 */
+		err |= __clear_user(&sc->sc_gr[16], 16*8);			/* r16-r31 */
+	} else {
+		/* Copy scratch regs to sigcontext if the signal didn't interrupt a syscall. */
+		err |= __put_user(scr->pt.ar_ccv, &sc->sc_ar_ccv);		/* ar.ccv */
+		err |= __put_user(scr->pt.b7, &sc->sc_br[7]);			/* b7 */
+		err |= __put_user(scr->pt.r14, &sc->sc_gr[14]);			/* r14 */
+		err |= __copy_to_user(&sc->sc_ar25, &scr->pt.ar_csd, 2*8); /* ar.csd & ar.ssd */
+		err |= __copy_to_user(&sc->sc_gr[2], &scr->pt.r2, 2*8);		/* r2-r3 */
+		err |= __copy_to_user(&sc->sc_gr[16], &scr->pt.r16, 16*8);	/* r16-r31 */
+	}
+	return err;
+}
+
+/*
+ * Check whether the register-backing store is already on the signal stack.
+ */
+static inline int
+rbs_on_sig_stack (unsigned long bsp)
+{
+	return (bsp - current->sas_ss_sp < current->sas_ss_size);
+}
+
+static long
+force_sigsegv_info (int sig, void __user *addr)
+{
+	unsigned long flags;
+	struct siginfo si;
+
+	if (sig == SIGSEGV) {
+		/*
+		 * Acquiring siglock around the sa_handler-update is almost
+		 * certainly overkill, but this isn't a
+		 * performance-critical path and I'd rather play it safe
+		 * here than having to debug a nasty race if and when
+		 * something changes in kernel/signal.c that would make it
+		 * no longer safe to modify sa_handler without holding the
+		 * lock.
+		 */
+		spin_lock_irqsave(&current->sighand->siglock, flags);
+		current->sighand->action[sig - 1].sa.sa_handler = SIG_DFL;
+		spin_unlock_irqrestore(&current->sighand->siglock, flags);
+	}
+	si.si_signo = SIGSEGV;
+	si.si_errno = 0;
+	si.si_code = SI_KERNEL;
+	si.si_pid = current->pid;
+	si.si_uid = current->uid;
+	si.si_addr = addr;
+	force_sig_info(SIGSEGV, &si, current);
+	return 0;
+}
+
+static long
+setup_frame (int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set,
+	     struct sigscratch *scr)
+{
+	extern char __kernel_sigtramp[];
+	unsigned long tramp_addr, new_rbs = 0;
+	struct sigframe __user *frame;
+	long err;
+
+	frame = (void __user *) scr->pt.r12;
+	tramp_addr = (unsigned long) __kernel_sigtramp;
+	if ((ka->sa.sa_flags & SA_ONSTACK) && sas_ss_flags((unsigned long) frame) == 0) {
+		frame = (void __user *) ((current->sas_ss_sp + current->sas_ss_size)
+					 & ~(STACK_ALIGN - 1));
+		/*
+		 * We need to check for the register stack being on the signal stack
+		 * separately, because it's switched separately (memory stack is switched
+		 * in the kernel, register stack is switched in the signal trampoline).
+		 */
+		if (!rbs_on_sig_stack(scr->pt.ar_bspstore))
+			new_rbs = (current->sas_ss_sp + sizeof(long) - 1) & ~(sizeof(long) - 1);
+	}
+	frame = (void __user *) frame - ((sizeof(*frame) + STACK_ALIGN - 1) & ~(STACK_ALIGN - 1));
+
+	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+		return force_sigsegv_info(sig, frame);
+
+	err  = __put_user(sig, &frame->arg0);
+	err |= __put_user(&frame->info, &frame->arg1);
+	err |= __put_user(&frame->sc, &frame->arg2);
+	err |= __put_user(new_rbs, &frame->sc.sc_rbs_base);
+	err |= __put_user(0, &frame->sc.sc_loadrs);	/* initialize to zero */
+	err |= __put_user(ka->sa.sa_handler, &frame->handler);
+
+	err |= copy_siginfo_to_user(&frame->info, info);
+
+	err |= __put_user(current->sas_ss_sp, &frame->sc.sc_stack.ss_sp);
+	err |= __put_user(current->sas_ss_size, &frame->sc.sc_stack.ss_size);
+	err |= __put_user(sas_ss_flags(scr->pt.r12), &frame->sc.sc_stack.ss_flags);
+	err |= setup_sigcontext(&frame->sc, set, scr);
+
+	if (unlikely(err))
+		return force_sigsegv_info(sig, frame);
+
+	scr->pt.r12 = (unsigned long) frame - 16;	/* new stack pointer */
+	scr->pt.ar_fpsr = FPSR_DEFAULT;			/* reset fpsr for signal handler */
+	scr->pt.cr_iip = tramp_addr;
+	ia64_psr(&scr->pt)->ri = 0;			/* start executing in first slot */
+	ia64_psr(&scr->pt)->be = 0;			/* force little-endian byte-order */
+	/*
+	 * Force the interruption function mask to zero.  This has no effect when a
+	 * system-call got interrupted by a signal (since, in that case, scr->pt_cr_ifs is
+	 * ignored), but it has the desirable effect of making it possible to deliver a
+	 * signal with an incomplete register frame (which happens when a mandatory RSE
+	 * load faults).  Furthermore, it has no negative effect on the getting the user's
+	 * dirty partition preserved, because that's governed by scr->pt.loadrs.
+	 */
+	scr->pt.cr_ifs = (1UL << 63);
+
+	/*
+	 * Note: this affects only the NaT bits of the scratch regs (the ones saved in
+	 * pt_regs), which is exactly what we want.
+	 */
+	scr->scratch_unat = 0; /* ensure NaT bits of r12 is clear */
+
+#if DEBUG_SIG
+	printk("SIG deliver (%s:%d): sig=%d sp=%lx ip=%lx handler=%p\n",
+	       current->comm, current->pid, sig, scr->pt.r12, frame->sc.sc_ip, frame->handler);
+#endif
+	return 1;
+}
+
+static long
+handle_signal (unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *oldset,
+	       struct sigscratch *scr)
+{
+	if (IS_IA32_PROCESS(&scr->pt)) {
+		/* send signal to IA-32 process */
+		if (!ia32_setup_frame1(sig, ka, info, oldset, &scr->pt))
+			return 0;
+	} else
+		/* send signal to IA-64 process */
+		if (!setup_frame(sig, ka, info, oldset, scr))
+			return 0;
+
+	if (!(ka->sa.sa_flags & SA_NODEFER)) {
+		spin_lock_irq(&current->sighand->siglock);
+		{
+			sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
+			sigaddset(&current->blocked, sig);
+			recalc_sigpending();
+		}
+		spin_unlock_irq(&current->sighand->siglock);
+	}
+	return 1;
+}
+
+/*
+ * Note that `init' is a special process: it doesn't get signals it doesn't want to
+ * handle.  Thus you cannot kill init even with a SIGKILL even by mistake.
+ */
+long
+ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall)
+{
+	struct k_sigaction ka;
+	siginfo_t info;
+	long restart = in_syscall;
+	long errno = scr->pt.r8;
+#	define ERR_CODE(c)	(IS_IA32_PROCESS(&scr->pt) ? -(c) : (c))
+
+	/*
+	 * In the ia64_leave_kernel code path, we want the common case to go fast, which
+	 * is why we may in certain cases get here from kernel mode. Just return without
+	 * doing anything if so.
+	 */
+	if (!user_mode(&scr->pt))
+		return 0;
+
+	if (!oldset)
+		oldset = &current->blocked;
+
+	/*
+	 * This only loops in the rare cases of handle_signal() failing, in which case we
+	 * need to push through a forced SIGSEGV.
+	 */
+	while (1) {
+		int signr = get_signal_to_deliver(&info, &ka, &scr->pt, NULL);
+
+		/*
+		 * get_signal_to_deliver() may have run a debugger (via notify_parent())
+		 * and the debugger may have modified the state (e.g., to arrange for an
+		 * inferior call), thus it's important to check for restarting _after_
+		 * get_signal_to_deliver().
+		 */
+		if (IS_IA32_PROCESS(&scr->pt)) {
+			if (in_syscall) {
+				if (errno >= 0)
+					restart = 0;
+				else
+					errno = -errno;
+			}
+		} else if ((long) scr->pt.r10 != -1)
+			/*
+			 * A system calls has to be restarted only if one of the error codes
+			 * ERESTARTNOHAND, ERESTARTSYS, or ERESTARTNOINTR is returned.  If r10
+			 * isn't -1 then r8 doesn't hold an error code and we don't need to
+			 * restart the syscall, so we can clear the "restart" flag here.
+			 */
+			restart = 0;
+
+		if (signr <= 0)
+			break;
+
+		if (unlikely(restart)) {
+			switch (errno) {
+			      case ERESTART_RESTARTBLOCK:
+			      case ERESTARTNOHAND:
+				scr->pt.r8 = ERR_CODE(EINTR);
+				/* note: scr->pt.r10 is already -1 */
+				break;
+
+			      case ERESTARTSYS:
+				if ((ka.sa.sa_flags & SA_RESTART) == 0) {
+					scr->pt.r8 = ERR_CODE(EINTR);
+					/* note: scr->pt.r10 is already -1 */
+					break;
+				}
+			      case ERESTARTNOINTR:
+				if (IS_IA32_PROCESS(&scr->pt)) {
+					scr->pt.r8 = scr->pt.r1;
+					scr->pt.cr_iip -= 2;
+				} else
+					ia64_decrement_ip(&scr->pt);
+				restart = 0; /* don't restart twice if handle_signal() fails... */
+			}
+		}
+
+		/*
+		 * Whee!  Actually deliver the signal.  If the delivery failed, we need to
+		 * continue to iterate in this loop so we can deliver the SIGSEGV...
+		 */
+		if (handle_signal(signr, &ka, &info, oldset, scr))
+			return 1;
+	}
+
+	/* Did we come from a system call? */
+	if (restart) {
+		/* Restart the system call - no handlers present */
+		if (errno == ERESTARTNOHAND || errno == ERESTARTSYS || errno == ERESTARTNOINTR
+		    || errno == ERESTART_RESTARTBLOCK)
+		{
+			if (IS_IA32_PROCESS(&scr->pt)) {
+				scr->pt.r8 = scr->pt.r1;
+				scr->pt.cr_iip -= 2;
+				if (errno == ERESTART_RESTARTBLOCK)
+					scr->pt.r8 = 0;	/* x86 version of __NR_restart_syscall */
+			} else {
+				/*
+				 * Note: the syscall number is in r15 which is saved in
+				 * pt_regs so all we need to do here is adjust ip so that
+				 * the "break" instruction gets re-executed.
+				 */
+				ia64_decrement_ip(&scr->pt);
+				if (errno == ERESTART_RESTARTBLOCK)
+					scr->pt.r15 = __NR_restart_syscall;
+			}
+		}
+	}
+	return 0;
+}
+
+/* Set a delayed signal that was detected in MCA/INIT/NMI/PMI context where it
+ * could not be delivered.  It is important that the target process is not
+ * allowed to do any more work in user space.  Possible cases for the target
+ * process:
+ *
+ * - It is sleeping and will wake up soon.  Store the data in the current task,
+ *   the signal will be sent when the current task returns from the next
+ *   interrupt.
+ *
+ * - It is running in user context.  Store the data in the current task, the
+ *   signal will be sent when the current task returns from the next interrupt.
+ *
+ * - It is running in kernel context on this or another cpu and will return to
+ *   user context.  Store the data in the target task, the signal will be sent
+ *   to itself when the target task returns to user space.
+ *
+ * - It is running in kernel context on this cpu and will sleep before
+ *   returning to user context.  Because this is also the current task, the
+ *   signal will not get delivered and the task could sleep indefinitely.
+ *   Store the data in the idle task for this cpu, the signal will be sent
+ *   after the idle task processes its next interrupt.
+ *
+ * To cover all cases, store the data in the target task, the current task and
+ * the idle task on this cpu.  Whatever happens, the signal will be delivered
+ * to the target task before it can do any useful user space work.  Multiple
+ * deliveries have no unwanted side effects.
+ *
+ * Note: This code is executed in MCA/INIT/NMI/PMI context, with interrupts
+ * disabled.  It must not take any locks nor use kernel structures or services
+ * that require locks.
+ */
+
+/* To ensure that we get the right pid, check its start time.  To avoid extra
+ * include files in thread_info.h, convert the task start_time to unsigned long,
+ * giving us a cycle time of > 580 years.
+ */
+static inline unsigned long
+start_time_ul(const struct task_struct *t)
+{
+	return t->start_time.tv_sec * NSEC_PER_SEC + t->start_time.tv_nsec;
+}
+
+void
+set_sigdelayed(pid_t pid, int signo, int code, void __user *addr)
+{
+	struct task_struct *t;
+	unsigned long start_time =  0;
+	int i;
+
+	for (i = 1; i <= 3; ++i) {
+		switch (i) {
+		case 1:
+			t = find_task_by_pid(pid);
+			if (t)
+				start_time = start_time_ul(t);
+			break;
+		case 2:
+			t = current;
+			break;
+		default:
+			t = idle_task(smp_processor_id());
+			break;
+		}
+
+		if (!t)
+			return;
+		t->thread_info->sigdelayed.signo = signo;
+		t->thread_info->sigdelayed.code = code;
+		t->thread_info->sigdelayed.addr = addr;
+		t->thread_info->sigdelayed.start_time = start_time;
+		t->thread_info->sigdelayed.pid = pid;
+		wmb();
+		set_tsk_thread_flag(t, TIF_SIGDELAYED);
+	}
+}
+
+/* Called from entry.S when it detects TIF_SIGDELAYED, a delayed signal that
+ * was detected in MCA/INIT/NMI/PMI context where it could not be delivered.
+ */
+
+void
+do_sigdelayed(void)
+{
+	struct siginfo siginfo;
+	pid_t pid;
+	struct task_struct *t;
+
+	clear_thread_flag(TIF_SIGDELAYED);
+	memset(&siginfo, 0, sizeof(siginfo));
+	siginfo.si_signo = current_thread_info()->sigdelayed.signo;
+	siginfo.si_code = current_thread_info()->sigdelayed.code;
+	siginfo.si_addr = current_thread_info()->sigdelayed.addr;
+	pid = current_thread_info()->sigdelayed.pid;
+	t = find_task_by_pid(pid);
+	if (!t)
+		return;
+	if (current_thread_info()->sigdelayed.start_time != start_time_ul(t))
+		return;
+	force_sig_info(siginfo.si_signo, &siginfo, t);
+}
--- a/arch/ia64/kernel/smp.c
+++ b/arch/ia64/kernel/smp.c
@@ -0,0 +1,376 @@
+/*
+ * SMP Support
+ *
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 1999, 2001, 2003 David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * Lots of stuff stolen from arch/alpha/kernel/smp.c
+ *
+ * 01/05/16 Rohit Seth <rohit.seth@intel.com>  IA64-SMP functions. Reorganized
+ * the existing code (on the lines of x86 port).
+ * 00/09/11 David Mosberger <davidm@hpl.hp.com> Do loops_per_jiffy
+ * calibration on each CPU.
+ * 00/08/23 Asit Mallick <asit.k.mallick@intel.com> fixed logical processor id
+ * 00/03/31 Rohit Seth <rohit.seth@intel.com>	Fixes for Bootstrap Processor
+ * & cpu_online_map now gets done here (instead of setup.c)
+ * 99/10/05 davidm	Update to bring it in sync with new command-line processing
+ *  scheme.
+ * 10/13/00 Goutham Rao <goutham.rao@intel.com> Updated smp_call_function and
+ *		smp_call_function_single to resend IPI on timeouts
+ */
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/smp.h>
+#include <linux/kernel_stat.h>
+#include <linux/mm.h>
+#include <linux/cache.h>
+#include <linux/delay.h>
+#include <linux/efi.h>
+#include <linux/bitops.h>
+
+#include <asm/atomic.h>
+#include <asm/current.h>
+#include <asm/delay.h>
+#include <asm/machvec.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/sal.h>
+#include <asm/system.h>
+#include <asm/tlbflush.h>
+#include <asm/unistd.h>
+#include <asm/mca.h>
+
+/*
+ * Structure and data for smp_call_function(). This is designed to minimise static memory
+ * requirements. It also looks cleaner.
+ */
+static  __cacheline_aligned DEFINE_SPINLOCK(call_lock);
+
+struct call_data_struct {
+	void (*func) (void *info);
+	void *info;
+	long wait;
+	atomic_t started;
+	atomic_t finished;
+};
+
+static volatile struct call_data_struct *call_data;
+
+#define IPI_CALL_FUNC		0
+#define IPI_CPU_STOP		1
+
+/* This needs to be cacheline aligned because it is written to by *other* CPUs.  */
+static DEFINE_PER_CPU(u64, ipi_operation) ____cacheline_aligned;
+
+extern void cpu_halt (void);
+
+void
+lock_ipi_calllock(void)
+{
+	spin_lock_irq(&call_lock);
+}
+
+void
+unlock_ipi_calllock(void)
+{
+	spin_unlock_irq(&call_lock);
+}
+
+static void
+stop_this_cpu (void)
+{
+	/*
+	 * Remove this CPU:
+	 */
+	cpu_clear(smp_processor_id(), cpu_online_map);
+	max_xtp();
+	local_irq_disable();
+	cpu_halt();
+}
+
+void
+cpu_die(void)
+{
+	max_xtp();
+	local_irq_disable();
+	cpu_halt();
+	/* Should never be here */
+	BUG();
+	for (;;);
+}
+
+irqreturn_t
+handle_IPI (int irq, void *dev_id, struct pt_regs *regs)
+{
+	int this_cpu = get_cpu();
+	unsigned long *pending_ipis = &__ia64_per_cpu_var(ipi_operation);
+	unsigned long ops;
+
+	mb();	/* Order interrupt and bit testing. */
+	while ((ops = xchg(pending_ipis, 0)) != 0) {
+		mb();	/* Order bit clearing and data access. */
+		do {
+			unsigned long which;
+
+			which = ffz(~ops);
+			ops &= ~(1 << which);
+
+			switch (which) {
+			      case IPI_CALL_FUNC:
+			      {
+				      struct call_data_struct *data;
+				      void (*func)(void *info);
+				      void *info;
+				      int wait;
+
+				      /* release the 'pointer lock' */
+				      data = (struct call_data_struct *) call_data;
+				      func = data->func;
+				      info = data->info;
+				      wait = data->wait;
+
+				      mb();
+				      atomic_inc(&data->started);
+				      /*
+				       * At this point the structure may be gone unless
+				       * wait is true.
+				       */
+				      (*func)(info);
+
+				      /* Notify the sending CPU that the task is done.  */
+				      mb();
+				      if (wait)
+					      atomic_inc(&data->finished);
+			      }
+			      break;
+
+			      case IPI_CPU_STOP:
+				stop_this_cpu();
+				break;
+
+			      default:
+				printk(KERN_CRIT "Unknown IPI on CPU %d: %lu\n", this_cpu, which);
+				break;
+			}
+		} while (ops);
+		mb();	/* Order data access and bit testing. */
+	}
+	put_cpu();
+	return IRQ_HANDLED;
+}
+
+/*
+ * Called with preeemption disabled.
+ */
+static inline void
+send_IPI_single (int dest_cpu, int op)
+{
+	set_bit(op, &per_cpu(ipi_operation, dest_cpu));
+	platform_send_ipi(dest_cpu, IA64_IPI_VECTOR, IA64_IPI_DM_INT, 0);
+}
+
+/*
+ * Called with preeemption disabled.
+ */
+static inline void
+send_IPI_allbutself (int op)
+{
+	unsigned int i;
+
+	for (i = 0; i < NR_CPUS; i++) {
+		if (cpu_online(i) && i != smp_processor_id())
+			send_IPI_single(i, op);
+	}
+}
+
+/*
+ * Called with preeemption disabled.
+ */
+static inline void
+send_IPI_all (int op)
+{
+	int i;
+
+	for (i = 0; i < NR_CPUS; i++)
+		if (cpu_online(i))
+			send_IPI_single(i, op);
+}
+
+/*
+ * Called with preeemption disabled.
+ */
+static inline void
+send_IPI_self (int op)
+{
+	send_IPI_single(smp_processor_id(), op);
+}
+
+/*
+ * Called with preeemption disabled.
+ */
+void
+smp_send_reschedule (int cpu)
+{
+	platform_send_ipi(cpu, IA64_IPI_RESCHEDULE, IA64_IPI_DM_INT, 0);
+}
+
+void
+smp_flush_tlb_all (void)
+{
+	on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1, 1);
+}
+
+void
+smp_flush_tlb_mm (struct mm_struct *mm)
+{
+	/* this happens for the common case of a single-threaded fork():  */
+	if (likely(mm == current->active_mm && atomic_read(&mm->mm_users) == 1))
+	{
+		local_finish_flush_tlb_mm(mm);
+		return;
+	}
+
+	/*
+	 * We could optimize this further by using mm->cpu_vm_mask to track which CPUs
+	 * have been running in the address space.  It's not clear that this is worth the
+	 * trouble though: to avoid races, we have to raise the IPI on the target CPU
+	 * anyhow, and once a CPU is interrupted, the cost of local_flush_tlb_all() is
+	 * rather trivial.
+	 */
+	on_each_cpu((void (*)(void *))local_finish_flush_tlb_mm, mm, 1, 1);
+}
+
+/*
+ * Run a function on another CPU
+ *  <func>	The function to run. This must be fast and non-blocking.
+ *  <info>	An arbitrary pointer to pass to the function.
+ *  <nonatomic>	Currently unused.
+ *  <wait>	If true, wait until function has completed on other CPUs.
+ *  [RETURNS]   0 on success, else a negative status code.
+ *
+ * Does not return until the remote CPU is nearly ready to execute <func>
+ * or is or has executed.
+ */
+
+int
+smp_call_function_single (int cpuid, void (*func) (void *info), void *info, int nonatomic,
+			  int wait)
+{
+	struct call_data_struct data;
+	int cpus = 1;
+	int me = get_cpu(); /* prevent preemption and reschedule on another processor */
+
+	if (cpuid == me) {
+		printk("%s: trying to call self\n", __FUNCTION__);
+		put_cpu();
+		return -EBUSY;
+	}
+
+	data.func = func;
+	data.info = info;
+	atomic_set(&data.started, 0);
+	data.wait = wait;
+	if (wait)
+		atomic_set(&data.finished, 0);
+
+	spin_lock_bh(&call_lock);
+
+	call_data = &data;
+	mb();	/* ensure store to call_data precedes setting of IPI_CALL_FUNC */
+  	send_IPI_single(cpuid, IPI_CALL_FUNC);
+
+	/* Wait for response */
+	while (atomic_read(&data.started) != cpus)
+		cpu_relax();
+
+	if (wait)
+		while (atomic_read(&data.finished) != cpus)
+			cpu_relax();
+	call_data = NULL;
+
+	spin_unlock_bh(&call_lock);
+	put_cpu();
+	return 0;
+}
+EXPORT_SYMBOL(smp_call_function_single);
+
+/*
+ * this function sends a 'generic call function' IPI to all other CPUs
+ * in the system.
+ */
+
+/*
+ *  [SUMMARY]	Run a function on all other CPUs.
+ *  <func>	The function to run. This must be fast and non-blocking.
+ *  <info>	An arbitrary pointer to pass to the function.
+ *  <nonatomic>	currently unused.
+ *  <wait>	If true, wait (atomically) until function has completed on other CPUs.
+ *  [RETURNS]   0 on success, else a negative status code.
+ *
+ * Does not return until remote CPUs are nearly ready to execute <func> or are or have
+ * executed.
+ *
+ * You must not call this function with disabled interrupts or from a
+ * hardware interrupt handler or from a bottom half handler.
+ */
+int
+smp_call_function (void (*func) (void *info), void *info, int nonatomic, int wait)
+{
+	struct call_data_struct data;
+	int cpus = num_online_cpus()-1;
+
+	if (!cpus)
+		return 0;
+
+	/* Can deadlock when called with interrupts disabled */
+	WARN_ON(irqs_disabled());
+
+	data.func = func;
+	data.info = info;
+	atomic_set(&data.started, 0);
+	data.wait = wait;
+	if (wait)
+		atomic_set(&data.finished, 0);
+
+	spin_lock(&call_lock);
+
+	call_data = &data;
+	mb();	/* ensure store to call_data precedes setting of IPI_CALL_FUNC */
+	send_IPI_allbutself(IPI_CALL_FUNC);
+
+	/* Wait for response */
+	while (atomic_read(&data.started) != cpus)
+		cpu_relax();
+
+	if (wait)
+		while (atomic_read(&data.finished) != cpus)
+			cpu_relax();
+	call_data = NULL;
+
+	spin_unlock(&call_lock);
+	return 0;
+}
+EXPORT_SYMBOL(smp_call_function);
+
+/*
+ * this function calls the 'stop' function on all other CPUs in the system.
+ */
+void
+smp_send_stop (void)
+{
+	send_IPI_allbutself(IPI_CPU_STOP);
+}
+
+int __init
+setup_profiling_timer (unsigned int multiplier)
+{
+	return -EINVAL;
+}
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -0,0 +1,692 @@
+/*
+ * SMP boot-related support
+ *
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * 01/05/16 Rohit Seth <rohit.seth@intel.com>	Moved SMP booting functions from smp.c to here.
+ * 01/04/27 David Mosberger <davidm@hpl.hp.com>	Added ITC synching code.
+ * 02/07/31 David Mosberger <davidm@hpl.hp.com>	Switch over to hotplug-CPU boot-sequence.
+ *						smp_boot_cpus()/smp_commence() is replaced by
+ *						smp_prepare_cpus()/__cpu_up()/smp_cpus_done().
+ */
+#include <linux/config.h>
+
+#include <linux/module.h>
+#include <linux/acpi.h>
+#include <linux/bootmem.h>
+#include <linux/cpu.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/mm.h>
+#include <linux/notifier.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
+#include <linux/efi.h>
+#include <linux/percpu.h>
+#include <linux/bitops.h>
+
+#include <asm/atomic.h>
+#include <asm/cache.h>
+#include <asm/current.h>
+#include <asm/delay.h>
+#include <asm/ia32.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/machvec.h>
+#include <asm/mca.h>
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/sal.h>
+#include <asm/system.h>
+#include <asm/tlbflush.h>
+#include <asm/unistd.h>
+
+#define SMP_DEBUG 0
+
+#if SMP_DEBUG
+#define Dprintk(x...)  printk(x)
+#else
+#define Dprintk(x...)
+#endif
+
+
+/*
+ * ITC synchronization related stuff:
+ */
+#define MASTER	0
+#define SLAVE	(SMP_CACHE_BYTES/8)
+
+#define NUM_ROUNDS	64	/* magic value */
+#define NUM_ITERS	5	/* likewise */
+
+static DEFINE_SPINLOCK(itc_sync_lock);
+static volatile unsigned long go[SLAVE + 1];
+
+#define DEBUG_ITC_SYNC	0
+
+extern void __devinit calibrate_delay (void);
+extern void start_ap (void);
+extern unsigned long ia64_iobase;
+
+task_t *task_for_booting_cpu;
+
+/*
+ * State for each CPU
+ */
+DEFINE_PER_CPU(int, cpu_state);
+
+/* Bitmasks of currently online, and possible CPUs */
+cpumask_t cpu_online_map;
+EXPORT_SYMBOL(cpu_online_map);
+cpumask_t cpu_possible_map;
+EXPORT_SYMBOL(cpu_possible_map);
+
+/* which logical CPU number maps to which CPU (physical APIC ID) */
+volatile int ia64_cpu_to_sapicid[NR_CPUS];
+EXPORT_SYMBOL(ia64_cpu_to_sapicid);
+
+static volatile cpumask_t cpu_callin_map;
+
+struct smp_boot_data smp_boot_data __initdata;
+
+unsigned long ap_wakeup_vector = -1; /* External Int use to wakeup APs */
+
+char __initdata no_int_routing;
+
+unsigned char smp_int_redirect; /* are INT and IPI redirectable by the chipset? */
+
+static int __init
+nointroute (char *str)
+{
+	no_int_routing = 1;
+	printk ("no_int_routing on\n");
+	return 1;
+}
+
+__setup("nointroute", nointroute);
+
+void
+sync_master (void *arg)
+{
+	unsigned long flags, i;
+
+	go[MASTER] = 0;
+
+	local_irq_save(flags);
+	{
+		for (i = 0; i < NUM_ROUNDS*NUM_ITERS; ++i) {
+			while (!go[MASTER]);
+			go[MASTER] = 0;
+			go[SLAVE] = ia64_get_itc();
+		}
+	}
+	local_irq_restore(flags);
+}
+
+/*
+ * Return the number of cycles by which our itc differs from the itc on the master
+ * (time-keeper) CPU.  A positive number indicates our itc is ahead of the master,
+ * negative that it is behind.
+ */
+static inline long
+get_delta (long *rt, long *master)
+{
+	unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0;
+	unsigned long tcenter, t0, t1, tm;
+	long i;
+
+	for (i = 0; i < NUM_ITERS; ++i) {
+		t0 = ia64_get_itc();
+		go[MASTER] = 1;
+		while (!(tm = go[SLAVE]));
+		go[SLAVE] = 0;
+		t1 = ia64_get_itc();
+
+		if (t1 - t0 < best_t1 - best_t0)
+			best_t0 = t0, best_t1 = t1, best_tm = tm;
+	}
+
+	*rt = best_t1 - best_t0;
+	*master = best_tm - best_t0;
+
+	/* average best_t0 and best_t1 without overflow: */
+	tcenter = (best_t0/2 + best_t1/2);
+	if (best_t0 % 2 + best_t1 % 2 == 2)
+		++tcenter;
+	return tcenter - best_tm;
+}
+
+/*
+ * Synchronize ar.itc of the current (slave) CPU with the ar.itc of the MASTER CPU
+ * (normally the time-keeper CPU).  We use a closed loop to eliminate the possibility of
+ * unaccounted-for errors (such as getting a machine check in the middle of a calibration
+ * step).  The basic idea is for the slave to ask the master what itc value it has and to
+ * read its own itc before and after the master responds.  Each iteration gives us three
+ * timestamps:
+ *
+ *	slave		master
+ *
+ *	t0 ---\
+ *             ---\
+ *		   --->
+ *			tm
+ *		   /---
+ *	       /---
+ *	t1 <---
+ *
+ *
+ * The goal is to adjust the slave's ar.itc such that tm falls exactly half-way between t0
+ * and t1.  If we achieve this, the clocks are synchronized provided the interconnect
+ * between the slave and the master is symmetric.  Even if the interconnect were
+ * asymmetric, we would still know that the synchronization error is smaller than the
+ * roundtrip latency (t0 - t1).
+ *
+ * When the interconnect is quiet and symmetric, this lets us synchronize the itc to
+ * within one or two cycles.  However, we can only *guarantee* that the synchronization is
+ * accurate to within a round-trip time, which is typically in the range of several
+ * hundred cycles (e.g., ~500 cycles).  In practice, this means that the itc's are usually
+ * almost perfectly synchronized, but we shouldn't assume that the accuracy is much better
+ * than half a micro second or so.
+ */
+void
+ia64_sync_itc (unsigned int master)
+{
+	long i, delta, adj, adjust_latency = 0, done = 0;
+	unsigned long flags, rt, master_time_stamp, bound;
+#if DEBUG_ITC_SYNC
+	struct {
+		long rt;	/* roundtrip time */
+		long master;	/* master's timestamp */
+		long diff;	/* difference between midpoint and master's timestamp */
+		long lat;	/* estimate of itc adjustment latency */
+	} t[NUM_ROUNDS];
+#endif
+
+	/*
+	 * Make sure local timer ticks are disabled while we sync.  If
+	 * they were enabled, we'd have to worry about nasty issues
+	 * like setting the ITC ahead of (or a long time before) the
+	 * next scheduled tick.
+	 */
+	BUG_ON((ia64_get_itv() & (1 << 16)) == 0);
+
+	go[MASTER] = 1;
+
+	if (smp_call_function_single(master, sync_master, NULL, 1, 0) < 0) {
+		printk(KERN_ERR "sync_itc: failed to get attention of CPU %u!\n", master);
+		return;
+	}
+
+	while (go[MASTER]);	/* wait for master to be ready */
+
+	spin_lock_irqsave(&itc_sync_lock, flags);
+	{
+		for (i = 0; i < NUM_ROUNDS; ++i) {
+			delta = get_delta(&rt, &master_time_stamp);
+			if (delta == 0) {
+				done = 1;	/* let's lock on to this... */
+				bound = rt;
+			}
+
+			if (!done) {
+				if (i > 0) {
+					adjust_latency += -delta;
+					adj = -delta + adjust_latency/4;
+				} else
+					adj = -delta;
+
+				ia64_set_itc(ia64_get_itc() + adj);
+			}
+#if DEBUG_ITC_SYNC
+			t[i].rt = rt;
+			t[i].master = master_time_stamp;
+			t[i].diff = delta;
+			t[i].lat = adjust_latency/4;
+#endif
+		}
+	}
+	spin_unlock_irqrestore(&itc_sync_lock, flags);
+
+#if DEBUG_ITC_SYNC
+	for (i = 0; i < NUM_ROUNDS; ++i)
+		printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n",
+		       t[i].rt, t[i].master, t[i].diff, t[i].lat);
+#endif
+
+	printk(KERN_INFO "CPU %d: synchronized ITC with CPU %u (last diff %ld cycles, "
+	       "maxerr %lu cycles)\n", smp_processor_id(), master, delta, rt);
+}
+
+/*
+ * Ideally sets up per-cpu profiling hooks.  Doesn't do much now...
+ */
+static inline void __devinit
+smp_setup_percpu_timer (void)
+{
+}
+
+static void __devinit
+smp_callin (void)
+{
+	int cpuid, phys_id;
+	extern void ia64_init_itm(void);
+
+#ifdef CONFIG_PERFMON
+	extern void pfm_init_percpu(void);
+#endif
+
+	cpuid = smp_processor_id();
+	phys_id = hard_smp_processor_id();
+
+	if (cpu_online(cpuid)) {
+		printk(KERN_ERR "huh, phys CPU#0x%x, CPU#0x%x already present??\n",
+		       phys_id, cpuid);
+		BUG();
+	}
+
+	lock_ipi_calllock();
+	cpu_set(cpuid, cpu_online_map);
+	unlock_ipi_calllock();
+
+	smp_setup_percpu_timer();
+
+	ia64_mca_cmc_vector_setup();	/* Setup vector on AP */
+
+#ifdef CONFIG_PERFMON
+	pfm_init_percpu();
+#endif
+
+	local_irq_enable();
+
+	if (!(sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT)) {
+		/*
+		 * Synchronize the ITC with the BP.  Need to do this after irqs are
+		 * enabled because ia64_sync_itc() calls smp_call_function_single(), which
+		 * calls spin_unlock_bh(), which calls spin_unlock_bh(), which calls
+		 * local_bh_enable(), which bugs out if irqs are not enabled...
+		 */
+		Dprintk("Going to syncup ITC with BP.\n");
+		ia64_sync_itc(0);
+	}
+
+	/*
+	 * Get our bogomips.
+	 */
+	ia64_init_itm();
+	calibrate_delay();
+	local_cpu_data->loops_per_jiffy = loops_per_jiffy;
+
+#ifdef CONFIG_IA32_SUPPORT
+	ia32_gdt_init();
+#endif
+
+	/*
+	 * Allow the master to continue.
+	 */
+	cpu_set(cpuid, cpu_callin_map);
+	Dprintk("Stack on CPU %d at about %p\n",cpuid, &cpuid);
+}
+
+
+/*
+ * Activate a secondary processor.  head.S calls this.
+ */
+int __devinit
+start_secondary (void *unused)
+{
+	/* Early console may use I/O ports */
+	ia64_set_kr(IA64_KR_IO_BASE, __pa(ia64_iobase));
+
+	Dprintk("start_secondary: starting CPU 0x%x\n", hard_smp_processor_id());
+	efi_map_pal_code();
+	cpu_init();
+	smp_callin();
+
+	cpu_idle();
+	return 0;
+}
+
+struct pt_regs * __devinit idle_regs(struct pt_regs *regs)
+{
+	return NULL;
+}
+
+struct create_idle {
+	struct task_struct *idle;
+	struct completion done;
+	int cpu;
+};
+
+void
+do_fork_idle(void *_c_idle)
+{
+	struct create_idle *c_idle = _c_idle;
+
+	c_idle->idle = fork_idle(c_idle->cpu);
+	complete(&c_idle->done);
+}
+
+static int __devinit
+do_boot_cpu (int sapicid, int cpu)
+{
+	int timeout;
+	struct create_idle c_idle = {
+		.cpu	= cpu,
+		.done	= COMPLETION_INITIALIZER(c_idle.done),
+	};
+	DECLARE_WORK(work, do_fork_idle, &c_idle);
+	/*
+	 * We can't use kernel_thread since we must avoid to reschedule the child.
+	 */
+	if (!keventd_up() || current_is_keventd())
+		work.func(work.data);
+	else {
+		schedule_work(&work);
+		wait_for_completion(&c_idle.done);
+	}
+
+	if (IS_ERR(c_idle.idle))
+		panic("failed fork for CPU %d", cpu);
+	task_for_booting_cpu = c_idle.idle;
+
+	Dprintk("Sending wakeup vector %lu to AP 0x%x/0x%x.\n", ap_wakeup_vector, cpu, sapicid);
+
+	platform_send_ipi(cpu, ap_wakeup_vector, IA64_IPI_DM_INT, 0);
+
+	/*
+	 * Wait 10s total for the AP to start
+	 */
+	Dprintk("Waiting on callin_map ...");
+	for (timeout = 0; timeout < 100000; timeout++) {
+		if (cpu_isset(cpu, cpu_callin_map))
+			break;  /* It has booted */
+		udelay(100);
+	}
+	Dprintk("\n");
+
+	if (!cpu_isset(cpu, cpu_callin_map)) {
+		printk(KERN_ERR "Processor 0x%x/0x%x is stuck.\n", cpu, sapicid);
+		ia64_cpu_to_sapicid[cpu] = -1;
+		cpu_clear(cpu, cpu_online_map);  /* was set in smp_callin() */
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int __init
+decay (char *str)
+{
+	int ticks;
+	get_option (&str, &ticks);
+	return 1;
+}
+
+__setup("decay=", decay);
+
+/*
+ * Initialize the logical CPU number to SAPICID mapping
+ */
+void __init
+smp_build_cpu_map (void)
+{
+	int sapicid, cpu, i;
+	int boot_cpu_id = hard_smp_processor_id();
+
+	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+		ia64_cpu_to_sapicid[cpu] = -1;
+#ifdef CONFIG_HOTPLUG_CPU
+		cpu_set(cpu, cpu_possible_map);
+#endif
+	}
+
+	ia64_cpu_to_sapicid[0] = boot_cpu_id;
+	cpus_clear(cpu_present_map);
+	cpu_set(0, cpu_present_map);
+	cpu_set(0, cpu_possible_map);
+	for (cpu = 1, i = 0; i < smp_boot_data.cpu_count; i++) {
+		sapicid = smp_boot_data.cpu_phys_id[i];
+		if (sapicid == boot_cpu_id)
+			continue;
+		cpu_set(cpu, cpu_present_map);
+		cpu_set(cpu, cpu_possible_map);
+		ia64_cpu_to_sapicid[cpu] = sapicid;
+		cpu++;
+	}
+}
+
+#ifdef CONFIG_NUMA
+
+/* on which node is each logical CPU (one cacheline even for 64 CPUs) */
+u8 cpu_to_node_map[NR_CPUS] __cacheline_aligned;
+EXPORT_SYMBOL(cpu_to_node_map);
+/* which logical CPUs are on which nodes */
+cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned;
+
+/*
+ * Build cpu to node mapping and initialize the per node cpu masks.
+ */
+void __init
+build_cpu_to_node_map (void)
+{
+	int cpu, i, node;
+
+	for(node=0; node<MAX_NUMNODES; node++)
+		cpus_clear(node_to_cpu_mask[node]);
+	for(cpu = 0; cpu < NR_CPUS; ++cpu) {
+		/*
+		 * All Itanium NUMA platforms I know use ACPI, so maybe we
+		 * can drop this ifdef completely.                    [EF]
+		 */
+#ifdef CONFIG_ACPI_NUMA
+		node = -1;
+		for (i = 0; i < NR_CPUS; ++i)
+			if (cpu_physical_id(cpu) == node_cpuid[i].phys_id) {
+				node = node_cpuid[i].nid;
+				break;
+			}
+#else
+#		error Fixme: Dunno how to build CPU-to-node map.
+#endif
+		cpu_to_node_map[cpu] = (node >= 0) ? node : 0;
+		if (node >= 0)
+			cpu_set(cpu, node_to_cpu_mask[node]);
+	}
+}
+
+#endif /* CONFIG_NUMA */
+
+/*
+ * Cycle through the APs sending Wakeup IPIs to boot each.
+ */
+void __init
+smp_prepare_cpus (unsigned int max_cpus)
+{
+	int boot_cpu_id = hard_smp_processor_id();
+
+	/*
+	 * Initialize the per-CPU profiling counter/multiplier
+	 */
+
+	smp_setup_percpu_timer();
+
+	/*
+	 * We have the boot CPU online for sure.
+	 */
+	cpu_set(0, cpu_online_map);
+	cpu_set(0, cpu_callin_map);
+
+	local_cpu_data->loops_per_jiffy = loops_per_jiffy;
+	ia64_cpu_to_sapicid[0] = boot_cpu_id;
+
+	printk(KERN_INFO "Boot processor id 0x%x/0x%x\n", 0, boot_cpu_id);
+
+	current_thread_info()->cpu = 0;
+
+	/*
+	 * If SMP should be disabled, then really disable it!
+	 */
+	if (!max_cpus) {
+		printk(KERN_INFO "SMP mode deactivated.\n");
+		cpus_clear(cpu_online_map);
+		cpus_clear(cpu_present_map);
+		cpus_clear(cpu_possible_map);
+		cpu_set(0, cpu_online_map);
+		cpu_set(0, cpu_present_map);
+		cpu_set(0, cpu_possible_map);
+		return;
+	}
+}
+
+void __devinit smp_prepare_boot_cpu(void)
+{
+	cpu_set(smp_processor_id(), cpu_online_map);
+	cpu_set(smp_processor_id(), cpu_callin_map);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+extern void fixup_irqs(void);
+/* must be called with cpucontrol mutex held */
+static int __devinit cpu_enable(unsigned int cpu)
+{
+	per_cpu(cpu_state,cpu) = CPU_UP_PREPARE;
+	wmb();
+
+	while (!cpu_online(cpu))
+		cpu_relax();
+	return 0;
+}
+
+int __cpu_disable(void)
+{
+	int cpu = smp_processor_id();
+
+	/*
+	 * dont permit boot processor for now
+	 */
+	if (cpu == 0)
+		return -EBUSY;
+
+	fixup_irqs();
+	local_flush_tlb_all();
+	printk ("Disabled cpu %u\n", smp_processor_id());
+	return 0;
+}
+
+void __cpu_die(unsigned int cpu)
+{
+	unsigned int i;
+
+	for (i = 0; i < 100; i++) {
+		/* They ack this in play_dead by setting CPU_DEAD */
+		if (per_cpu(cpu_state, cpu) == CPU_DEAD)
+		{
+			/*
+			 * TBD: Enable this when physical removal
+			 * or when we put the processor is put in
+			 * SAL_BOOT_RENDEZ mode
+			 * cpu_clear(cpu, cpu_callin_map);
+			 */
+			return;
+		}
+		msleep(100);
+	}
+ 	printk(KERN_ERR "CPU %u didn't die...\n", cpu);
+}
+#else /* !CONFIG_HOTPLUG_CPU */
+static int __devinit cpu_enable(unsigned int cpu)
+{
+	return 0;
+}
+
+int __cpu_disable(void)
+{
+	return -ENOSYS;
+}
+
+void __cpu_die(unsigned int cpu)
+{
+	/* We said "no" in __cpu_disable */
+	BUG();
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+void
+smp_cpus_done (unsigned int dummy)
+{
+	int cpu;
+	unsigned long bogosum = 0;
+
+	/*
+	 * Allow the user to impress friends.
+	 */
+
+	for (cpu = 0; cpu < NR_CPUS; cpu++)
+		if (cpu_online(cpu))
+			bogosum += cpu_data(cpu)->loops_per_jiffy;
+
+	printk(KERN_INFO "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
+	       (int)num_online_cpus(), bogosum/(500000/HZ), (bogosum/(5000/HZ))%100);
+}
+
+int __devinit
+__cpu_up (unsigned int cpu)
+{
+	int ret;
+	int sapicid;
+
+	sapicid = ia64_cpu_to_sapicid[cpu];
+	if (sapicid == -1)
+		return -EINVAL;
+
+	/*
+	 * Already booted.. just enable and get outa idle lool
+	 */
+	if (cpu_isset(cpu, cpu_callin_map))
+	{
+		cpu_enable(cpu);
+		local_irq_enable();
+		while (!cpu_isset(cpu, cpu_online_map))
+			mb();
+		return 0;
+	}
+	/* Processor goes to start_secondary(), sets online flag */
+	ret = do_boot_cpu(sapicid, cpu);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+/*
+ * Assume that CPU's have been discovered by some platform-dependent interface.  For
+ * SoftSDV/Lion, that would be ACPI.
+ *
+ * Setup of the IPI irq handler is done in irq.c:init_IRQ_SMP().
+ */
+void __init
+init_smp_config(void)
+{
+	struct fptr {
+		unsigned long fp;
+		unsigned long gp;
+	} *ap_startup;
+	long sal_ret;
+
+	/* Tell SAL where to drop the AP's.  */
+	ap_startup = (struct fptr *) start_ap;
+	sal_ret = ia64_sal_set_vectors(SAL_VECTOR_OS_BOOT_RENDEZ,
+				       ia64_tpa(ap_startup->fp), ia64_tpa(ap_startup->gp), 0, 0, 0, 0);
+	if (sal_ret < 0)
+		printk(KERN_ERR "SMP: Can't set SAL AP Boot Rendezvous: %s\n",
+		       ia64_sal_strerror(sal_ret));
+}
+
--- a/arch/ia64/kernel/sys_ia64.c
+++ b/arch/ia64/kernel/sys_ia64.c
@@ -0,0 +1,298 @@
+/*
+ * This file contains various system calls that have different calling
+ * conventions on different platforms.
+ *
+ * Copyright (C) 1999-2000, 2002-2003, 2005 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/sched.h>
+#include <linux/shm.h>
+#include <linux/file.h>		/* doh, must come after sched.h... */
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/syscalls.h>
+#include <linux/highuid.h>
+#include <linux/hugetlb.h>
+
+#include <asm/shmparam.h>
+#include <asm/uaccess.h>
+
+unsigned long
+arch_get_unmapped_area (struct file *filp, unsigned long addr, unsigned long len,
+			unsigned long pgoff, unsigned long flags)
+{
+	long map_shared = (flags & MAP_SHARED);
+	unsigned long start_addr, align_mask = PAGE_SIZE - 1;
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+
+	if (len > RGN_MAP_LIMIT)
+		return -ENOMEM;
+
+#ifdef CONFIG_HUGETLB_PAGE
+	if (REGION_NUMBER(addr) == REGION_HPAGE)
+		addr = 0;
+#endif
+	if (!addr)
+		addr = mm->free_area_cache;
+
+	if (map_shared && (TASK_SIZE > 0xfffffffful))
+		/*
+		 * For 64-bit tasks, align shared segments to 1MB to avoid potential
+		 * performance penalty due to virtual aliasing (see ASDM).  For 32-bit
+		 * tasks, we prefer to avoid exhausting the address space too quickly by
+		 * limiting alignment to a single page.
+		 */
+		align_mask = SHMLBA - 1;
+
+  full_search:
+	start_addr = addr = (addr + align_mask) & ~align_mask;
+
+	for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
+		/* At this point:  (!vma || addr < vma->vm_end). */
+		if (TASK_SIZE - len < addr || RGN_MAP_LIMIT - len < REGION_OFFSET(addr)) {
+			if (start_addr != TASK_UNMAPPED_BASE) {
+				/* Start a new search --- just in case we missed some holes.  */
+				addr = TASK_UNMAPPED_BASE;
+				goto full_search;
+			}
+			return -ENOMEM;
+		}
+		if (!vma || addr + len <= vma->vm_start) {
+			/* Remember the address where we stopped this search:  */
+			mm->free_area_cache = addr + len;
+			return addr;
+		}
+		addr = (vma->vm_end + align_mask) & ~align_mask;
+	}
+}
+
+asmlinkage long
+ia64_getpriority (int which, int who)
+{
+	long prio;
+
+	prio = sys_getpriority(which, who);
+	if (prio >= 0) {
+		force_successful_syscall_return();
+		prio = 20 - prio;
+	}
+	return prio;
+}
+
+/* XXX obsolete, but leave it here until the old libc is gone... */
+asmlinkage unsigned long
+sys_getpagesize (void)
+{
+	return PAGE_SIZE;
+}
+
+asmlinkage unsigned long
+ia64_shmat (int shmid, void __user *shmaddr, int shmflg)
+{
+	unsigned long raddr;
+	int retval;
+
+	retval = do_shmat(shmid, shmaddr, shmflg, &raddr);
+	if (retval < 0)
+		return retval;
+
+	force_successful_syscall_return();
+	return raddr;
+}
+
+asmlinkage unsigned long
+ia64_brk (unsigned long brk)
+{
+	unsigned long rlim, retval, newbrk, oldbrk;
+	struct mm_struct *mm = current->mm;
+
+	/*
+	 * Most of this replicates the code in sys_brk() except for an additional safety
+	 * check and the clearing of r8.  However, we can't call sys_brk() because we need
+	 * to acquire the mmap_sem before we can do the test...
+	 */
+	down_write(&mm->mmap_sem);
+
+	if (brk < mm->end_code)
+		goto out;
+	newbrk = PAGE_ALIGN(brk);
+	oldbrk = PAGE_ALIGN(mm->brk);
+	if (oldbrk == newbrk)
+		goto set_brk;
+
+	/* Always allow shrinking brk. */
+	if (brk <= mm->brk) {
+		if (!do_munmap(mm, newbrk, oldbrk-newbrk))
+			goto set_brk;
+		goto out;
+	}
+
+	/* Check against unimplemented/unmapped addresses: */
+	if ((newbrk - oldbrk) > RGN_MAP_LIMIT || REGION_OFFSET(newbrk) > RGN_MAP_LIMIT)
+		goto out;
+
+	/* Check against rlimit.. */
+	rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;
+	if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim)
+		goto out;
+
+	/* Check against existing mmap mappings. */
+	if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
+		goto out;
+
+	/* Ok, looks good - let it rip. */
+	if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk)
+		goto out;
+set_brk:
+	mm->brk = brk;
+out:
+	retval = mm->brk;
+	up_write(&mm->mmap_sem);
+	force_successful_syscall_return();
+	return retval;
+}
+
+/*
+ * On IA-64, we return the two file descriptors in ret0 and ret1 (r8
+ * and r9) as this is faster than doing a copy_to_user().
+ */
+asmlinkage long
+sys_pipe (void)
+{
+	struct pt_regs *regs = ia64_task_regs(current);
+	int fd[2];
+	int retval;
+
+	retval = do_pipe(fd);
+	if (retval)
+		goto out;
+	retval = fd[0];
+	regs->r9 = fd[1];
+  out:
+	return retval;
+}
+
+static inline unsigned long
+do_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, unsigned long pgoff)
+{
+	unsigned long roff;
+	struct file *file = NULL;
+
+	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
+	if (!(flags & MAP_ANONYMOUS)) {
+		file = fget(fd);
+		if (!file)
+			return -EBADF;
+
+		if (!file->f_op || !file->f_op->mmap) {
+			addr = -ENODEV;
+			goto out;
+		}
+	}
+
+	/*
+	 * A zero mmap always succeeds in Linux, independent of whether or not the
+	 * remaining arguments are valid.
+	 */
+	if (len == 0)
+		goto out;
+
+	/* Careful about overflows.. */
+	len = PAGE_ALIGN(len);
+	if (!len || len > TASK_SIZE) {
+		addr = -EINVAL;
+		goto out;
+	}
+
+	/*
+	 * Don't permit mappings into unmapped space, the virtual page table of a region,
+	 * or across a region boundary.  Note: RGN_MAP_LIMIT is equal to 2^n-PAGE_SIZE
+	 * (for some integer n <= 61) and len > 0.
+	 */
+	roff = REGION_OFFSET(addr);
+	if ((len > RGN_MAP_LIMIT) || (roff > (RGN_MAP_LIMIT - len))) {
+		addr = -EINVAL;
+		goto out;
+	}
+
+	down_write(&current->mm->mmap_sem);
+	addr = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
+	up_write(&current->mm->mmap_sem);
+
+out:	if (file)
+		fput(file);
+	return addr;
+}
+
+/*
+ * mmap2() is like mmap() except that the offset is expressed in units
+ * of PAGE_SIZE (instead of bytes).  This allows to mmap2() (pieces
+ * of) files that are larger than the address space of the CPU.
+ */
+asmlinkage unsigned long
+sys_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, long pgoff)
+{
+	addr = do_mmap2(addr, len, prot, flags, fd, pgoff);
+	if (!IS_ERR((void *) addr))
+		force_successful_syscall_return();
+	return addr;
+}
+
+asmlinkage unsigned long
+sys_mmap (unsigned long addr, unsigned long len, int prot, int flags, int fd, long off)
+{
+	if (offset_in_page(off) != 0)
+		return -EINVAL;
+
+	addr = do_mmap2(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
+	if (!IS_ERR((void *) addr))
+		force_successful_syscall_return();
+	return addr;
+}
+
+asmlinkage unsigned long
+ia64_mremap (unsigned long addr, unsigned long old_len, unsigned long new_len, unsigned long flags,
+	     unsigned long new_addr)
+{
+	extern unsigned long do_mremap (unsigned long addr,
+					unsigned long old_len,
+					unsigned long new_len,
+					unsigned long flags,
+					unsigned long new_addr);
+
+	down_write(&current->mm->mmap_sem);
+	{
+		addr = do_mremap(addr, old_len, new_len, flags, new_addr);
+	}
+	up_write(&current->mm->mmap_sem);
+
+	if (IS_ERR((void *) addr))
+		return addr;
+
+	force_successful_syscall_return();
+	return addr;
+}
+
+#ifndef CONFIG_PCI
+
+asmlinkage long
+sys_pciconfig_read (unsigned long bus, unsigned long dfn, unsigned long off, unsigned long len,
+		    void *buf)
+{
+	return -ENOSYS;
+}
+
+asmlinkage long
+sys_pciconfig_write (unsigned long bus, unsigned long dfn, unsigned long off, unsigned long len,
+		     void *buf)
+{
+	return -ENOSYS;
+}
+
+#endif /* CONFIG_PCI */
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -0,0 +1,255 @@
+/*
+ * linux/arch/ia64/kernel/time.c
+ *
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ *	David Mosberger <davidm@hpl.hp.com>
+ * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
+ * Copyright (C) 1999-2000 VA Linux Systems
+ * Copyright (C) 1999-2000 Walt Drummond <drummond@valinux.com>
+ */
+#include <linux/config.h>
+
+#include <linux/cpu.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/profile.h>
+#include <linux/sched.h>
+#include <linux/time.h>
+#include <linux/interrupt.h>
+#include <linux/efi.h>
+#include <linux/profile.h>
+#include <linux/timex.h>
+
+#include <asm/machvec.h>
+#include <asm/delay.h>
+#include <asm/hw_irq.h>
+#include <asm/ptrace.h>
+#include <asm/sal.h>
+#include <asm/sections.h>
+#include <asm/system.h>
+
+extern unsigned long wall_jiffies;
+
+u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES;
+
+EXPORT_SYMBOL(jiffies_64);
+
+#define TIME_KEEPER_ID	0	/* smp_processor_id() of time-keeper */
+
+#ifdef CONFIG_IA64_DEBUG_IRQ
+
+unsigned long last_cli_ip;
+EXPORT_SYMBOL(last_cli_ip);
+
+#endif
+
+static struct time_interpolator itc_interpolator = {
+	.shift = 16,
+	.mask = 0xffffffffffffffffLL,
+	.source = TIME_SOURCE_CPU
+};
+
+static irqreturn_t
+timer_interrupt (int irq, void *dev_id, struct pt_regs *regs)
+{
+	unsigned long new_itm;
+
+	if (unlikely(cpu_is_offline(smp_processor_id()))) {
+		return IRQ_HANDLED;
+	}
+
+	platform_timer_interrupt(irq, dev_id, regs);
+
+	new_itm = local_cpu_data->itm_next;
+
+	if (!time_after(ia64_get_itc(), new_itm))
+		printk(KERN_ERR "Oops: timer tick before it's due (itc=%lx,itm=%lx)\n",
+		       ia64_get_itc(), new_itm);
+
+	profile_tick(CPU_PROFILING, regs);
+
+	while (1) {
+		update_process_times(user_mode(regs));
+
+		new_itm += local_cpu_data->itm_delta;
+
+		if (smp_processor_id() == TIME_KEEPER_ID) {
+			/*
+			 * Here we are in the timer irq handler. We have irqs locally
+			 * disabled, but we don't know if the timer_bh is running on
+			 * another CPU. We need to avoid to SMP race by acquiring the
+			 * xtime_lock.
+			 */
+			write_seqlock(&xtime_lock);
+			do_timer(regs);
+			local_cpu_data->itm_next = new_itm;
+			write_sequnlock(&xtime_lock);
+		} else
+			local_cpu_data->itm_next = new_itm;
+
+		if (time_after(new_itm, ia64_get_itc()))
+			break;
+	}
+
+	do {
+		/*
+		 * If we're too close to the next clock tick for
+		 * comfort, we increase the safety margin by
+		 * intentionally dropping the next tick(s).  We do NOT
+		 * update itm.next because that would force us to call
+		 * do_timer() which in turn would let our clock run
+		 * too fast (with the potentially devastating effect
+		 * of losing monotony of time).
+		 */
+		while (!time_after(new_itm, ia64_get_itc() + local_cpu_data->itm_delta/2))
+			new_itm += local_cpu_data->itm_delta;
+		ia64_set_itm(new_itm);
+		/* double check, in case we got hit by a (slow) PMI: */
+	} while (time_after_eq(ia64_get_itc(), new_itm));
+	return IRQ_HANDLED;
+}
+
+/*
+ * Encapsulate access to the itm structure for SMP.
+ */
+void
+ia64_cpu_local_tick (void)
+{
+	int cpu = smp_processor_id();
+	unsigned long shift = 0, delta;
+
+	/* arrange for the cycle counter to generate a timer interrupt: */
+	ia64_set_itv(IA64_TIMER_VECTOR);
+
+	delta = local_cpu_data->itm_delta;
+	/*
+	 * Stagger the timer tick for each CPU so they don't occur all at (almost) the
+	 * same time:
+	 */
+	if (cpu) {
+		unsigned long hi = 1UL << ia64_fls(cpu);
+		shift = (2*(cpu - hi) + 1) * delta/hi/2;
+	}
+	local_cpu_data->itm_next = ia64_get_itc() + delta + shift;
+	ia64_set_itm(local_cpu_data->itm_next);
+}
+
+static int nojitter;
+
+static int __init nojitter_setup(char *str)
+{
+	nojitter = 1;
+	printk("Jitter checking for ITC timers disabled\n");
+	return 1;
+}
+
+__setup("nojitter", nojitter_setup);
+
+
+void __devinit
+ia64_init_itm (void)
+{
+	unsigned long platform_base_freq, itc_freq;
+	struct pal_freq_ratio itc_ratio, proc_ratio;
+	long status, platform_base_drift, itc_drift;
+
+	/*
+	 * According to SAL v2.6, we need to use a SAL call to determine the platform base
+	 * frequency and then a PAL call to determine the frequency ratio between the ITC
+	 * and the base frequency.
+	 */
+	status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM,
+				    &platform_base_freq, &platform_base_drift);
+	if (status != 0) {
+		printk(KERN_ERR "SAL_FREQ_BASE_PLATFORM failed: %s\n", ia64_sal_strerror(status));
+	} else {
+		status = ia64_pal_freq_ratios(&proc_ratio, NULL, &itc_ratio);
+		if (status != 0)
+			printk(KERN_ERR "PAL_FREQ_RATIOS failed with status=%ld\n", status);
+	}
+	if (status != 0) {
+		/* invent "random" values */
+		printk(KERN_ERR
+		       "SAL/PAL failed to obtain frequency info---inventing reasonable values\n");
+		platform_base_freq = 100000000;
+		platform_base_drift = -1;	/* no drift info */
+		itc_ratio.num = 3;
+		itc_ratio.den = 1;
+	}
+	if (platform_base_freq < 40000000) {
+		printk(KERN_ERR "Platform base frequency %lu bogus---resetting to 75MHz!\n",
+		       platform_base_freq);
+		platform_base_freq = 75000000;
+		platform_base_drift = -1;
+	}
+	if (!proc_ratio.den)
+		proc_ratio.den = 1;	/* avoid division by zero */
+	if (!itc_ratio.den)
+		itc_ratio.den = 1;	/* avoid division by zero */
+
+	itc_freq = (platform_base_freq*itc_ratio.num)/itc_ratio.den;
+
+	local_cpu_data->itm_delta = (itc_freq + HZ/2) / HZ;
+	printk(KERN_DEBUG "CPU %d: base freq=%lu.%03luMHz, ITC ratio=%lu/%lu, "
+	       "ITC freq=%lu.%03luMHz", smp_processor_id(),
+	       platform_base_freq / 1000000, (platform_base_freq / 1000) % 1000,
+	       itc_ratio.num, itc_ratio.den, itc_freq / 1000000, (itc_freq / 1000) % 1000);
+
+	if (platform_base_drift != -1) {
+		itc_drift = platform_base_drift*itc_ratio.num/itc_ratio.den;
+		printk("+/-%ldppm\n", itc_drift);
+	} else {
+		itc_drift = -1;
+		printk("\n");
+	}
+
+	local_cpu_data->proc_freq = (platform_base_freq*proc_ratio.num)/proc_ratio.den;
+	local_cpu_data->itc_freq = itc_freq;
+	local_cpu_data->cyc_per_usec = (itc_freq + USEC_PER_SEC/2) / USEC_PER_SEC;
+	local_cpu_data->nsec_per_cyc = ((NSEC_PER_SEC<<IA64_NSEC_PER_CYC_SHIFT)
+					+ itc_freq/2)/itc_freq;
+
+	if (!(sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT)) {
+		itc_interpolator.frequency = local_cpu_data->itc_freq;
+		itc_interpolator.drift = itc_drift;
+#ifdef CONFIG_SMP
+		/* On IA64 in an SMP configuration ITCs are never accurately synchronized.
+		 * Jitter compensation requires a cmpxchg which may limit
+		 * the scalability of the syscalls for retrieving time.
+		 * The ITC synchronization is usually successful to within a few
+		 * ITC ticks but this is not a sure thing. If you need to improve
+		 * timer performance in SMP situations then boot the kernel with the
+		 * "nojitter" option. However, doing so may result in time fluctuating (maybe
+		 * even going backward) if the ITC offsets between the individual CPUs
+		 * are too large.
+		 */
+		if (!nojitter) itc_interpolator.jitter = 1;
+#endif
+		register_time_interpolator(&itc_interpolator);
+	}
+
+	/* Setup the CPU local timer tick */
+	ia64_cpu_local_tick();
+}
+
+static struct irqaction timer_irqaction = {
+	.handler =	timer_interrupt,
+	.flags =	SA_INTERRUPT,
+	.name =		"timer"
+};
+
+void __init
+time_init (void)
+{
+	register_percpu_irq(IA64_TIMER_VECTOR, &timer_irqaction);
+	efi_gettimeofday(&xtime);
+	ia64_init_itm();
+
+	/*
+	 * Initialize wall_to_monotonic such that adding it to xtime will yield zero, the
+	 * tv_nsec field must be normalized (i.e., 0 <= nsec < NSEC_PER_SEC).
+	 */
+	set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec, -xtime.tv_nsec);
+}
--- a/arch/ia64/kernel/topology.c
+++ b/arch/ia64/kernel/topology.c
@@ -0,0 +1,92 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * This file contains NUMA specific variables and functions which can
+ * be split away from DISCONTIGMEM and are used on NUMA machines with
+ * contiguous memory.
+ * 		2002/08/07 Erich Focht <efocht@ess.nec.de>
+ * Populate cpu entries in sysfs for non-numa systems as well
+ *  	Intel Corporation - Ashok Raj
+ */
+
+#include <linux/config.h>
+#include <linux/cpu.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/node.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/nodemask.h>
+#include <asm/mmzone.h>
+#include <asm/numa.h>
+#include <asm/cpu.h>
+
+#ifdef CONFIG_NUMA
+static struct node *sysfs_nodes;
+#endif
+static struct ia64_cpu *sysfs_cpus;
+
+int arch_register_cpu(int num)
+{
+	struct node *parent = NULL;
+	
+#ifdef CONFIG_NUMA
+	parent = &sysfs_nodes[cpu_to_node(num)];
+#endif /* CONFIG_NUMA */
+
+	return register_cpu(&sysfs_cpus[num].cpu, num, parent);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+void arch_unregister_cpu(int num)
+{
+	struct node *parent = NULL;
+
+#ifdef CONFIG_NUMA
+	int node = cpu_to_node(num);
+	parent = &sysfs_nodes[node];
+#endif /* CONFIG_NUMA */
+
+	return unregister_cpu(&sysfs_cpus[num].cpu, parent);
+}
+EXPORT_SYMBOL(arch_register_cpu);
+EXPORT_SYMBOL(arch_unregister_cpu);
+#endif /*CONFIG_HOTPLUG_CPU*/
+
+
+static int __init topology_init(void)
+{
+	int i, err = 0;
+
+#ifdef CONFIG_NUMA
+	sysfs_nodes = kmalloc(sizeof(struct node) * MAX_NUMNODES, GFP_KERNEL);
+	if (!sysfs_nodes) {
+		err = -ENOMEM;
+		goto out;
+	}
+	memset(sysfs_nodes, 0, sizeof(struct node) * MAX_NUMNODES);
+
+	/* MCD - Do we want to register all ONLINE nodes, or all POSSIBLE nodes? */
+	for_each_online_node(i)
+		if ((err = register_node(&sysfs_nodes[i], i, 0)))
+			goto out;
+#endif
+
+	sysfs_cpus = kmalloc(sizeof(struct ia64_cpu) * NR_CPUS, GFP_KERNEL);
+	if (!sysfs_cpus) {
+		err = -ENOMEM;
+		goto out;
+	}
+	memset(sysfs_cpus, 0, sizeof(struct ia64_cpu) * NR_CPUS);
+
+	for_each_present_cpu(i)
+		if((err = arch_register_cpu(i)))
+			goto out;
+out:
+	return err;
+}
+
+__initcall(topology_init);
--- a/arch/ia64/kernel/traps.c
+++ b/arch/ia64/kernel/traps.c
@@ -0,0 +1,609 @@
+/*
+ * Architecture-specific trap handling.
+ *
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * 05/12/00 grao <goutham.rao@intel.com> : added isr in siginfo for SIGFPE
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/tty.h>
+#include <linux/vt_kern.h>		/* For unblank_screen() */
+#include <linux/module.h>       /* for EXPORT_SYMBOL */
+#include <linux/hardirq.h>
+
+#include <asm/fpswa.h>
+#include <asm/ia32.h>
+#include <asm/intrinsics.h>
+#include <asm/processor.h>
+#include <asm/uaccess.h>
+
+extern spinlock_t timerlist_lock;
+
+fpswa_interface_t *fpswa_interface;
+EXPORT_SYMBOL(fpswa_interface);
+
+void __init
+trap_init (void)
+{
+	if (ia64_boot_param->fpswa)
+		/* FPSWA fixup: make the interface pointer a kernel virtual address: */
+		fpswa_interface = __va(ia64_boot_param->fpswa);
+}
+
+/*
+ * Unlock any spinlocks which will prevent us from getting the message out (timerlist_lock
+ * is acquired through the console unblank code)
+ */
+void
+bust_spinlocks (int yes)
+{
+	int loglevel_save = console_loglevel;
+
+	if (yes) {
+		oops_in_progress = 1;
+		return;
+	}
+
+#ifdef CONFIG_VT
+	unblank_screen();
+#endif
+	oops_in_progress = 0;
+	/*
+	 * OK, the message is on the console.  Now we call printk() without
+	 * oops_in_progress set so that printk will give klogd a poke.  Hold onto
+	 * your hats...
+	 */
+	console_loglevel = 15;		/* NMI oopser may have shut the console up */
+	printk(" ");
+	console_loglevel = loglevel_save;
+}
+
+void
+die (const char *str, struct pt_regs *regs, long err)
+{
+	static struct {
+		spinlock_t lock;
+		u32 lock_owner;
+		int lock_owner_depth;
+	} die = {
+		.lock =			SPIN_LOCK_UNLOCKED,
+		.lock_owner =		-1,
+		.lock_owner_depth =	0
+	};
+	static int die_counter;
+
+	if (die.lock_owner != smp_processor_id()) {
+		console_verbose();
+		spin_lock_irq(&die.lock);
+		die.lock_owner = smp_processor_id();
+		die.lock_owner_depth = 0;
+		bust_spinlocks(1);
+	}
+
+	if (++die.lock_owner_depth < 3) {
+		printk("%s[%d]: %s %ld [%d]\n",
+			current->comm, current->pid, str, err, ++die_counter);
+		show_regs(regs);
+  	} else
+		printk(KERN_ERR "Recursive die() failure, output suppressed\n");
+
+	bust_spinlocks(0);
+	die.lock_owner = -1;
+	spin_unlock_irq(&die.lock);
+  	do_exit(SIGSEGV);
+}
+
+void
+die_if_kernel (char *str, struct pt_regs *regs, long err)
+{
+	if (!user_mode(regs))
+		die(str, regs, err);
+}
+
+void
+ia64_bad_break (unsigned long break_num, struct pt_regs *regs)
+{
+	siginfo_t siginfo;
+	int sig, code;
+
+	/* SIGILL, SIGFPE, SIGSEGV, and SIGBUS want these field initialized: */
+	siginfo.si_addr = (void __user *) (regs->cr_iip + ia64_psr(regs)->ri);
+	siginfo.si_imm = break_num;
+	siginfo.si_flags = 0;		/* clear __ISR_VALID */
+	siginfo.si_isr = 0;
+
+	switch (break_num) {
+	      case 0: /* unknown error (used by GCC for __builtin_abort()) */
+		die_if_kernel("bugcheck!", regs, break_num);
+		sig = SIGILL; code = ILL_ILLOPC;
+		break;
+
+	      case 1: /* integer divide by zero */
+		sig = SIGFPE; code = FPE_INTDIV;
+		break;
+
+	      case 2: /* integer overflow */
+		sig = SIGFPE; code = FPE_INTOVF;
+		break;
+
+	      case 3: /* range check/bounds check */
+		sig = SIGFPE; code = FPE_FLTSUB;
+		break;
+
+	      case 4: /* null pointer dereference */
+		sig = SIGSEGV; code = SEGV_MAPERR;
+		break;
+
+	      case 5: /* misaligned data */
+		sig = SIGSEGV; code = BUS_ADRALN;
+		break;
+
+	      case 6: /* decimal overflow */
+		sig = SIGFPE; code = __FPE_DECOVF;
+		break;
+
+	      case 7: /* decimal divide by zero */
+		sig = SIGFPE; code = __FPE_DECDIV;
+		break;
+
+	      case 8: /* packed decimal error */
+		sig = SIGFPE; code = __FPE_DECERR;
+		break;
+
+	      case 9: /* invalid ASCII digit */
+		sig = SIGFPE; code = __FPE_INVASC;
+		break;
+
+	      case 10: /* invalid decimal digit */
+		sig = SIGFPE; code = __FPE_INVDEC;
+		break;
+
+	      case 11: /* paragraph stack overflow */
+		sig = SIGSEGV; code = __SEGV_PSTKOVF;
+		break;
+
+	      case 0x3f000 ... 0x3ffff:	/* bundle-update in progress */
+		sig = SIGILL; code = __ILL_BNDMOD;
+		break;
+
+	      default:
+		if (break_num < 0x40000 || break_num > 0x100000)
+			die_if_kernel("Bad break", regs, break_num);
+
+		if (break_num < 0x80000) {
+			sig = SIGILL; code = __ILL_BREAK;
+		} else {
+			sig = SIGTRAP; code = TRAP_BRKPT;
+		}
+	}
+	siginfo.si_signo = sig;
+	siginfo.si_errno = 0;
+	siginfo.si_code = code;
+	force_sig_info(sig, &siginfo, current);
+}
+
+/*
+ * disabled_fph_fault() is called when a user-level process attempts to access f32..f127
+ * and it doesn't own the fp-high register partition.  When this happens, we save the
+ * current fph partition in the task_struct of the fpu-owner (if necessary) and then load
+ * the fp-high partition of the current task (if necessary).  Note that the kernel has
+ * access to fph by the time we get here, as the IVT's "Disabled FP-Register" handler takes
+ * care of clearing psr.dfh.
+ */
+static inline void
+disabled_fph_fault (struct pt_regs *regs)
+{
+	struct ia64_psr *psr = ia64_psr(regs);
+
+	/* first, grant user-level access to fph partition: */
+	psr->dfh = 0;
+#ifndef CONFIG_SMP
+	{
+		struct task_struct *fpu_owner
+			= (struct task_struct *)ia64_get_kr(IA64_KR_FPU_OWNER);
+
+		if (ia64_is_local_fpu_owner(current))
+			return;
+
+		if (fpu_owner)
+			ia64_flush_fph(fpu_owner);
+	}
+#endif /* !CONFIG_SMP */
+	ia64_set_local_fpu_owner(current);
+	if ((current->thread.flags & IA64_THREAD_FPH_VALID) != 0) {
+		__ia64_load_fpu(current->thread.fph);
+		psr->mfh = 0;
+	} else {
+		__ia64_init_fpu();
+		/*
+		 * Set mfh because the state in thread.fph does not match the state in
+		 * the fph partition.
+		 */
+		psr->mfh = 1;
+	}
+}
+
+static inline int
+fp_emulate (int fp_fault, void *bundle, long *ipsr, long *fpsr, long *isr, long *pr, long *ifs,
+	    struct pt_regs *regs)
+{
+	fp_state_t fp_state;
+	fpswa_ret_t ret;
+
+	if (!fpswa_interface)
+		return -1;
+
+	memset(&fp_state, 0, sizeof(fp_state_t));
+
+	/*
+	 * compute fp_state.  only FP registers f6 - f11 are used by the
+	 * kernel, so set those bits in the mask and set the low volatile
+	 * pointer to point to these registers.
+	 */
+	fp_state.bitmask_low64 = 0xfc0;  /* bit6..bit11 */
+
+	fp_state.fp_state_low_volatile = (fp_state_low_volatile_t *) &regs->f6;
+	/*
+	 * unsigned long (*EFI_FPSWA) (
+	 *      unsigned long    trap_type,
+	 *	void             *Bundle,
+	 *	unsigned long    *pipsr,
+	 *	unsigned long    *pfsr,
+	 *	unsigned long    *pisr,
+	 *	unsigned long    *ppreds,
+	 *	unsigned long    *pifs,
+	 *	void             *fp_state);
+	 */
+	ret = (*fpswa_interface->fpswa)((unsigned long) fp_fault, bundle,
+					(unsigned long *) ipsr, (unsigned long *) fpsr,
+					(unsigned long *) isr, (unsigned long *) pr,
+					(unsigned long *) ifs, &fp_state);
+
+	return ret.status;
+}
+
+/*
+ * Handle floating-point assist faults and traps.
+ */
+static int
+handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr)
+{
+	long exception, bundle[2];
+	unsigned long fault_ip;
+	struct siginfo siginfo;
+	static int fpu_swa_count = 0;
+	static unsigned long last_time;
+
+	fault_ip = regs->cr_iip;
+	if (!fp_fault && (ia64_psr(regs)->ri == 0))
+		fault_ip -= 16;
+	if (copy_from_user(bundle, (void __user *) fault_ip, sizeof(bundle)))
+		return -1;
+
+	if (jiffies - last_time > 5*HZ)
+		fpu_swa_count = 0;
+	if ((fpu_swa_count < 4) && !(current->thread.flags & IA64_THREAD_FPEMU_NOPRINT)) {
+		last_time = jiffies;
+		++fpu_swa_count;
+		printk(KERN_WARNING
+		       "%s(%d): floating-point assist fault at ip %016lx, isr %016lx\n",
+		       current->comm, current->pid, regs->cr_iip + ia64_psr(regs)->ri, isr);
+	}
+
+	exception = fp_emulate(fp_fault, bundle, &regs->cr_ipsr, &regs->ar_fpsr, &isr, &regs->pr,
+			       &regs->cr_ifs, regs);
+	if (fp_fault) {
+		if (exception == 0) {
+			/* emulation was successful */
+			ia64_increment_ip(regs);
+		} else if (exception == -1) {
+			printk(KERN_ERR "handle_fpu_swa: fp_emulate() returned -1\n");
+			return -1;
+		} else {
+			/* is next instruction a trap? */
+			if (exception & 2) {
+				ia64_increment_ip(regs);
+			}
+			siginfo.si_signo = SIGFPE;
+			siginfo.si_errno = 0;
+			siginfo.si_code = __SI_FAULT;	/* default code */
+			siginfo.si_addr = (void __user *) (regs->cr_iip + ia64_psr(regs)->ri);
+			if (isr & 0x11) {
+				siginfo.si_code = FPE_FLTINV;
+			} else if (isr & 0x22) {
+				/* denormal operand gets the same si_code as underflow 
+				* see arch/i386/kernel/traps.c:math_error()  */
+				siginfo.si_code = FPE_FLTUND;
+			} else if (isr & 0x44) {
+				siginfo.si_code = FPE_FLTDIV;
+			}
+			siginfo.si_isr = isr;
+			siginfo.si_flags = __ISR_VALID;
+			siginfo.si_imm = 0;
+			force_sig_info(SIGFPE, &siginfo, current);
+		}
+	} else {
+		if (exception == -1) {
+			printk(KERN_ERR "handle_fpu_swa: fp_emulate() returned -1\n");
+			return -1;
+		} else if (exception != 0) {
+			/* raise exception */
+			siginfo.si_signo = SIGFPE;
+			siginfo.si_errno = 0;
+			siginfo.si_code = __SI_FAULT;	/* default code */
+			siginfo.si_addr = (void __user *) (regs->cr_iip + ia64_psr(regs)->ri);
+			if (isr & 0x880) {
+				siginfo.si_code = FPE_FLTOVF;
+			} else if (isr & 0x1100) {
+				siginfo.si_code = FPE_FLTUND;
+			} else if (isr & 0x2200) {
+				siginfo.si_code = FPE_FLTRES;
+			}
+			siginfo.si_isr = isr;
+			siginfo.si_flags = __ISR_VALID;
+			siginfo.si_imm = 0;
+			force_sig_info(SIGFPE, &siginfo, current);
+		}
+	}
+	return 0;
+}
+
+struct illegal_op_return {
+	unsigned long fkt, arg1, arg2, arg3;
+};
+
+struct illegal_op_return
+ia64_illegal_op_fault (unsigned long ec, long arg1, long arg2, long arg3,
+		       long arg4, long arg5, long arg6, long arg7,
+		       struct pt_regs regs)
+{
+	struct illegal_op_return rv;
+	struct siginfo si;
+	char buf[128];
+
+#ifdef CONFIG_IA64_BRL_EMU
+	{
+		extern struct illegal_op_return ia64_emulate_brl (struct pt_regs *, unsigned long);
+
+		rv = ia64_emulate_brl(&regs, ec);
+		if (rv.fkt != (unsigned long) -1)
+			return rv;
+	}
+#endif
+
+	sprintf(buf, "IA-64 Illegal operation fault");
+	die_if_kernel(buf, &regs, 0);
+
+	memset(&si, 0, sizeof(si));
+	si.si_signo = SIGILL;
+	si.si_code = ILL_ILLOPC;
+	si.si_addr = (void __user *) (regs.cr_iip + ia64_psr(&regs)->ri);
+	force_sig_info(SIGILL, &si, current);
+	rv.fkt = 0;
+	return rv;
+}
+
+void
+ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
+	    unsigned long iim, unsigned long itir, long arg5, long arg6,
+	    long arg7, struct pt_regs regs)
+{
+	unsigned long code, error = isr, iip;
+	struct siginfo siginfo;
+	char buf[128];
+	int result, sig;
+	static const char *reason[] = {
+		"IA-64 Illegal Operation fault",
+		"IA-64 Privileged Operation fault",
+		"IA-64 Privileged Register fault",
+		"IA-64 Reserved Register/Field fault",
+		"Disabled Instruction Set Transition fault",
+		"Unknown fault 5", "Unknown fault 6", "Unknown fault 7", "Illegal Hazard fault",
+		"Unknown fault 9", "Unknown fault 10", "Unknown fault 11", "Unknown fault 12",
+		"Unknown fault 13", "Unknown fault 14", "Unknown fault 15"
+	};
+
+	if ((isr & IA64_ISR_NA) && ((isr & IA64_ISR_CODE_MASK) == IA64_ISR_CODE_LFETCH)) {
+		/*
+		 * This fault was due to lfetch.fault, set "ed" bit in the psr to cancel
+		 * the lfetch.
+		 */
+		ia64_psr(&regs)->ed = 1;
+		return;
+	}
+
+	iip = regs.cr_iip + ia64_psr(&regs)->ri;
+
+	switch (vector) {
+	      case 24: /* General Exception */
+		code = (isr >> 4) & 0xf;
+		sprintf(buf, "General Exception: %s%s", reason[code],
+			(code == 3) ? ((isr & (1UL << 37))
+				       ? " (RSE access)" : " (data access)") : "");
+		if (code == 8) {
+# ifdef CONFIG_IA64_PRINT_HAZARDS
+			printk("%s[%d]: possible hazard @ ip=%016lx (pr = %016lx)\n",
+			       current->comm, current->pid,
+			       regs.cr_iip + ia64_psr(&regs)->ri, regs.pr);
+# endif
+			return;
+		}
+		break;
+
+	      case 25: /* Disabled FP-Register */
+		if (isr & 2) {
+			disabled_fph_fault(&regs);
+			return;
+		}
+		sprintf(buf, "Disabled FPL fault---not supposed to happen!");
+		break;
+
+	      case 26: /* NaT Consumption */
+		if (user_mode(&regs)) {
+			void __user *addr;
+
+			if (((isr >> 4) & 0xf) == 2) {
+				/* NaT page consumption */
+				sig = SIGSEGV;
+				code = SEGV_ACCERR;
+				addr = (void __user *) ifa;
+			} else {
+				/* register NaT consumption */
+				sig = SIGILL;
+				code = ILL_ILLOPN;
+				addr = (void __user *) (regs.cr_iip
+							+ ia64_psr(&regs)->ri);
+			}
+			siginfo.si_signo = sig;
+			siginfo.si_code = code;
+			siginfo.si_errno = 0;
+			siginfo.si_addr = addr;
+			siginfo.si_imm = vector;
+			siginfo.si_flags = __ISR_VALID;
+			siginfo.si_isr = isr;
+			force_sig_info(sig, &siginfo, current);
+			return;
+		} else if (ia64_done_with_exception(&regs))
+			return;
+		sprintf(buf, "NaT consumption");
+		break;
+
+	      case 31: /* Unsupported Data Reference */
+		if (user_mode(&regs)) {
+			siginfo.si_signo = SIGILL;
+			siginfo.si_code = ILL_ILLOPN;
+			siginfo.si_errno = 0;
+			siginfo.si_addr = (void __user *) iip;
+			siginfo.si_imm = vector;
+			siginfo.si_flags = __ISR_VALID;
+			siginfo.si_isr = isr;
+			force_sig_info(SIGILL, &siginfo, current);
+			return;
+		}
+		sprintf(buf, "Unsupported data reference");
+		break;
+
+	      case 29: /* Debug */
+	      case 35: /* Taken Branch Trap */
+	      case 36: /* Single Step Trap */
+		if (fsys_mode(current, &regs)) {
+			extern char __kernel_syscall_via_break[];
+			/*
+			 * Got a trap in fsys-mode: Taken Branch Trap and Single Step trap
+			 * need special handling; Debug trap is not supposed to happen.
+			 */
+			if (unlikely(vector == 29)) {
+				die("Got debug trap in fsys-mode---not supposed to happen!",
+				    &regs, 0);
+				return;
+			}
+			/* re-do the system call via break 0x100000: */
+			regs.cr_iip = (unsigned long) __kernel_syscall_via_break;
+			ia64_psr(&regs)->ri = 0;
+			ia64_psr(&regs)->cpl = 3;
+			return;
+		}
+		switch (vector) {
+		      case 29:
+			siginfo.si_code = TRAP_HWBKPT;
+#ifdef CONFIG_ITANIUM
+			/*
+			 * Erratum 10 (IFA may contain incorrect address) now has
+			 * "NoFix" status.  There are no plans for fixing this.
+			 */
+			if (ia64_psr(&regs)->is == 0)
+			  ifa = regs.cr_iip;
+#endif
+			break;
+		      case 35: siginfo.si_code = TRAP_BRANCH; ifa = 0; break;
+		      case 36: siginfo.si_code = TRAP_TRACE; ifa = 0; break;
+		}
+		siginfo.si_signo = SIGTRAP;
+		siginfo.si_errno = 0;
+		siginfo.si_addr  = (void __user *) ifa;
+		siginfo.si_imm   = 0;
+		siginfo.si_flags = __ISR_VALID;
+		siginfo.si_isr   = isr;
+		force_sig_info(SIGTRAP, &siginfo, current);
+		return;
+
+	      case 32: /* fp fault */
+	      case 33: /* fp trap */
+		result = handle_fpu_swa((vector == 32) ? 1 : 0, &regs, isr);
+		if ((result < 0) || (current->thread.flags & IA64_THREAD_FPEMU_SIGFPE)) {
+			siginfo.si_signo = SIGFPE;
+			siginfo.si_errno = 0;
+			siginfo.si_code = FPE_FLTINV;
+			siginfo.si_addr = (void __user *) iip;
+			siginfo.si_flags = __ISR_VALID;
+			siginfo.si_isr = isr;
+			siginfo.si_imm = 0;
+			force_sig_info(SIGFPE, &siginfo, current);
+		}
+		return;
+
+	      case 34:
+		if (isr & 0x2) {
+			/* Lower-Privilege Transfer Trap */
+			/*
+			 * Just clear PSR.lp and then return immediately: all the
+			 * interesting work (e.g., signal delivery is done in the kernel
+			 * exit path).
+			 */
+			ia64_psr(&regs)->lp = 0;
+			return;
+		} else {
+			/* Unimplemented Instr. Address Trap */
+			if (user_mode(&regs)) {
+				siginfo.si_signo = SIGILL;
+				siginfo.si_code = ILL_BADIADDR;
+				siginfo.si_errno = 0;
+				siginfo.si_flags = 0;
+				siginfo.si_isr = 0;
+				siginfo.si_imm = 0;
+				siginfo.si_addr = (void __user *) iip;
+				force_sig_info(SIGILL, &siginfo, current);
+				return;
+			}
+			sprintf(buf, "Unimplemented Instruction Address fault");
+		}
+		break;
+
+	      case 45:
+#ifdef CONFIG_IA32_SUPPORT
+		if (ia32_exception(&regs, isr) == 0)
+			return;
+#endif
+		printk(KERN_ERR "Unexpected IA-32 exception (Trap 45)\n");
+		printk(KERN_ERR "  iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx\n",
+		       iip, ifa, isr);
+		force_sig(SIGSEGV, current);
+		break;
+
+	      case 46:
+#ifdef CONFIG_IA32_SUPPORT
+		if (ia32_intercept(&regs, isr) == 0)
+			return;
+#endif
+		printk(KERN_ERR "Unexpected IA-32 intercept trap (Trap 46)\n");
+		printk(KERN_ERR "  iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx, iim - 0x%lx\n",
+		       iip, ifa, isr, iim);
+		force_sig(SIGSEGV, current);
+		return;
+
+	      case 47:
+		sprintf(buf, "IA-32 Interruption Fault (int 0x%lx)", isr >> 16);
+		break;
+
+	      default:
+		sprintf(buf, "Fault %lu", vector);
+		break;
+	}
+	die_if_kernel(buf, &regs, error);
+	force_sig(SIGILL, current);
+}
--- a/Show More
+++ b/Show More