[IA64] Add Variable Page Size and IA64 Support in Intel IOMMU

The patch contains Intel IOMMU IA64 specific code. It defines new
machvec dig_vtd, hooks for IOMMU, DMAR table detection, cache line flush
function, etc.

For a generic kernel with CONFIG_DMAR=y, if Intel IOMMU is detected,
dig_vtd is used for machinve vector. Otherwise, kernel falls back to
dig machine vector. Kernel parameter "machvec=dig" or "intel_iommu=off"
can be used to force kernel to boot dig machine vector.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
This commit is contained in:
Fenghua Yu
2008-10-17 12:14:13 -07:00
committed by Tony Luck
parent 6bb7a93548
commit 62fdd7678a
23 changed files with 620 additions and 13 deletions

View File

@@ -42,6 +42,10 @@ obj-$(CONFIG_IA64_ESI) += esi.o
ifneq ($(CONFIG_IA64_ESI),)
obj-y += esi_stub.o # must be in kernel proper
endif
obj-$(CONFIG_DMAR) += pci-dma.o
ifeq ($(CONFIG_DMAR), y)
obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
endif
# The gate DSO image is built using a special linker script.
targets += gate.so gate-syms.o

View File

@@ -91,6 +91,9 @@ acpi_get_sysname(void)
struct acpi_table_rsdp *rsdp;
struct acpi_table_xsdt *xsdt;
struct acpi_table_header *hdr;
#ifdef CONFIG_DMAR
u64 i, nentries;
#endif
rsdp_phys = acpi_find_rsdp();
if (!rsdp_phys) {
@@ -123,6 +126,18 @@ acpi_get_sysname(void)
return "sn2";
}
#ifdef CONFIG_DMAR
/* Look for Intel IOMMU */
nentries = (hdr->length - sizeof(*hdr)) /
sizeof(xsdt->table_offset_entry[0]);
for (i = 0; i < nentries; i++) {
hdr = __va(xsdt->table_offset_entry[i]);
if (strncmp(hdr->signature, ACPI_SIG_DMAR,
sizeof(ACPI_SIG_DMAR) - 1) == 0)
return "dig_vtd";
}
#endif
return "dig";
#else
# if defined (CONFIG_IA64_HP_SIM)
@@ -137,6 +152,8 @@ acpi_get_sysname(void)
return "uv";
# elif defined (CONFIG_IA64_DIG)
return "dig";
# elif defined(CONFIG_IA64_DIG_VTD)
return "dig_vtd";
# else
# error Unknown platform. Fix acpi.c.
# endif

View File

@@ -5,6 +5,7 @@
#include <linux/pci.h>
#include <linux/irq.h>
#include <linux/msi.h>
#include <linux/dmar.h>
#include <asm/smp.h>
/*
@@ -162,3 +163,82 @@ void arch_teardown_msi_irq(unsigned int irq)
return ia64_teardown_msi_irq(irq);
}
#ifdef CONFIG_DMAR
#ifdef CONFIG_SMP
static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
{
struct irq_cfg *cfg = irq_cfg + irq;
struct msi_msg msg;
int cpu = first_cpu(mask);
if (!cpu_online(cpu))
return;
if (irq_prepare_move(irq, cpu))
return;
dmar_msi_read(irq, &msg);
msg.data &= ~MSI_DATA_VECTOR_MASK;
msg.data |= MSI_DATA_VECTOR(cfg->vector);
msg.address_lo &= ~MSI_ADDR_DESTID_MASK;
msg.address_lo |= MSI_ADDR_DESTID_CPU(cpu_physical_id(cpu));
dmar_msi_write(irq, &msg);
irq_desc[irq].affinity = mask;
}
#endif /* CONFIG_SMP */
struct irq_chip dmar_msi_type = {
.name = "DMAR_MSI",
.unmask = dmar_msi_unmask,
.mask = dmar_msi_mask,
.ack = ia64_ack_msi_irq,
#ifdef CONFIG_SMP
.set_affinity = dmar_msi_set_affinity,
#endif
.retrigger = ia64_msi_retrigger_irq,
};
static int
msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
{
struct irq_cfg *cfg = irq_cfg + irq;
unsigned dest;
cpumask_t mask;
cpus_and(mask, irq_to_domain(irq), cpu_online_map);
dest = cpu_physical_id(first_cpu(mask));
msg->address_hi = 0;
msg->address_lo =
MSI_ADDR_HEADER |
MSI_ADDR_DESTMODE_PHYS |
MSI_ADDR_REDIRECTION_CPU |
MSI_ADDR_DESTID_CPU(dest);
msg->data =
MSI_DATA_TRIGGER_EDGE |
MSI_DATA_LEVEL_ASSERT |
MSI_DATA_DELIVERY_FIXED |
MSI_DATA_VECTOR(cfg->vector);
return 0;
}
int arch_setup_dmar_msi(unsigned int irq)
{
int ret;
struct msi_msg msg;
ret = msi_compose_msg(NULL, irq, &msg);
if (ret < 0)
return ret;
dmar_msi_write(irq, &msg);
set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
"edge");
return 0;
}
#endif /* CONFIG_DMAR */

129
arch/ia64/kernel/pci-dma.c Normal file
View File

@@ -0,0 +1,129 @@
/*
* Dynamic DMA mapping support.
*/
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/pci.h>
#include <linux/module.h>
#include <linux/dmar.h>
#include <asm/iommu.h>
#include <asm/machvec.h>
#include <linux/dma-mapping.h>
#include <asm/machvec.h>
#include <asm/system.h>
#ifdef CONFIG_DMAR
#include <linux/kernel.h>
#include <linux/string.h>
#include <asm/page.h>
#include <asm/iommu.h>
dma_addr_t bad_dma_address __read_mostly;
EXPORT_SYMBOL(bad_dma_address);
static int iommu_sac_force __read_mostly;
int no_iommu __read_mostly;
#ifdef CONFIG_IOMMU_DEBUG
int force_iommu __read_mostly = 1;
#else
int force_iommu __read_mostly;
#endif
/* Set this to 1 if there is a HW IOMMU in the system */
int iommu_detected __read_mostly;
/* Dummy device used for NULL arguments (normally ISA). Better would
be probably a smaller DMA mask, but this is bug-to-bug compatible
to i386. */
struct device fallback_dev = {
.bus_id = "fallback device",
.coherent_dma_mask = DMA_32BIT_MASK,
.dma_mask = &fallback_dev.coherent_dma_mask,
};
void __init pci_iommu_alloc(void)
{
/*
* The order of these functions is important for
* fall-back/fail-over reasons
*/
detect_intel_iommu();
#ifdef CONFIG_SWIOTLB
pci_swiotlb_init();
#endif
}
static int __init pci_iommu_init(void)
{
if (iommu_detected)
intel_iommu_init();
return 0;
}
/* Must execute after PCI subsystem */
fs_initcall(pci_iommu_init);
void pci_iommu_shutdown(void)
{
return;
}
void __init
iommu_dma_init(void)
{
return;
}
struct dma_mapping_ops *dma_ops;
EXPORT_SYMBOL(dma_ops);
int iommu_dma_supported(struct device *dev, u64 mask)
{
struct dma_mapping_ops *ops = get_dma_ops(dev);
#ifdef CONFIG_PCI
if (mask > 0xffffffff && forbid_dac > 0) {
dev_info(dev, "Disallowing DAC for device\n");
return 0;
}
#endif
if (ops->dma_supported_op)
return ops->dma_supported_op(dev, mask);
/* Copied from i386. Doesn't make much sense, because it will
only work for pci_alloc_coherent.
The caller just has to use GFP_DMA in this case. */
if (mask < DMA_24BIT_MASK)
return 0;
/* Tell the device to use SAC when IOMMU force is on. This
allows the driver to use cheaper accesses in some cases.
Problem with this is that if we overflow the IOMMU area and
return DAC as fallback address the device may not handle it
correctly.
As a special case some controllers have a 39bit address
mode that is as efficient as 32bit (aic79xx). Don't force
SAC for these. Assume all masks <= 40 bits are of this
type. Normally this doesn't make any difference, but gives
more gentle handling of IOMMU overflow. */
if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) {
dev_info(dev, "Force SAC with mask %lx\n", mask);
return 0;
}
return 1;
}
EXPORT_SYMBOL(iommu_dma_supported);
#endif

View File

@@ -0,0 +1,46 @@
/* Glue code to lib/swiotlb.c */
#include <linux/pci.h>
#include <linux/cache.h>
#include <linux/module.h>
#include <linux/dma-mapping.h>
#include <asm/swiotlb.h>
#include <asm/dma.h>
#include <asm/iommu.h>
#include <asm/machvec.h>
int swiotlb __read_mostly;
EXPORT_SYMBOL(swiotlb);
struct dma_mapping_ops swiotlb_dma_ops = {
.mapping_error = swiotlb_dma_mapping_error,
.alloc_coherent = swiotlb_alloc_coherent,
.free_coherent = swiotlb_free_coherent,
.map_single = swiotlb_map_single,
.unmap_single = swiotlb_unmap_single,
.sync_single_for_cpu = swiotlb_sync_single_for_cpu,
.sync_single_for_device = swiotlb_sync_single_for_device,
.sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu,
.sync_single_range_for_device = swiotlb_sync_single_range_for_device,
.sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
.sync_sg_for_device = swiotlb_sync_sg_for_device,
.map_sg = swiotlb_map_sg,
.unmap_sg = swiotlb_unmap_sg,
.dma_supported_op = swiotlb_dma_supported,
};
void __init pci_swiotlb_init(void)
{
if (!iommu_detected) {
#ifdef CONFIG_IA64_GENERIC
swiotlb = 1;
printk(KERN_INFO "PCI-DMA: Re-initialize machine vector.\n");
machvec_init("dig");
swiotlb_init();
dma_ops = &swiotlb_dma_ops;
#else
panic("Unable to find Intel IOMMU");
#endif
}
}

View File

@@ -116,6 +116,13 @@ unsigned int num_io_spaces;
*/
#define I_CACHE_STRIDE_SHIFT 5 /* Safest way to go: 32 bytes by 32 bytes */
unsigned long ia64_i_cache_stride_shift = ~0;
/*
* "clflush_cache_range()" needs to know what processor dependent stride size to
* use when it flushes cache lines including both d-cache and i-cache.
*/
/* Safest way to go: 32 bytes by 32 bytes */
#define CACHE_STRIDE_SHIFT 5
unsigned long ia64_cache_stride_shift = ~0;
/*
* The merge_mask variable needs to be set to (max(iommu_page_size(iommu)) - 1). This
@@ -847,13 +854,14 @@ setup_per_cpu_areas (void)
}
/*
* Calculate the max. cache line size.
* Do the following calculations:
*
* In addition, the minimum of the i-cache stride sizes is calculated for
* "flush_icache_range()".
* 1. the max. cache line size.
* 2. the minimum of the i-cache stride sizes for "flush_icache_range()".
* 3. the minimum of the cache stride sizes for "clflush_cache_range()".
*/
static void __cpuinit
get_max_cacheline_size (void)
get_cache_info(void)
{
unsigned long line_size, max = 1;
u64 l, levels, unique_caches;
@@ -867,12 +875,14 @@ get_max_cacheline_size (void)
max = SMP_CACHE_BYTES;
/* Safest setup for "flush_icache_range()" */
ia64_i_cache_stride_shift = I_CACHE_STRIDE_SHIFT;
/* Safest setup for "clflush_cache_range()" */
ia64_cache_stride_shift = CACHE_STRIDE_SHIFT;
goto out;
}
for (l = 0; l < levels; ++l) {
status = ia64_pal_cache_config_info(l, /* cache_type (data_or_unified)= */ 2,
&cci);
/* cache_type (data_or_unified)=2 */
status = ia64_pal_cache_config_info(l, 2, &cci);
if (status != 0) {
printk(KERN_ERR
"%s: ia64_pal_cache_config_info(l=%lu, 2) failed (status=%ld)\n",
@@ -880,15 +890,21 @@ get_max_cacheline_size (void)
max = SMP_CACHE_BYTES;
/* The safest setup for "flush_icache_range()" */
cci.pcci_stride = I_CACHE_STRIDE_SHIFT;
/* The safest setup for "clflush_cache_range()" */
ia64_cache_stride_shift = CACHE_STRIDE_SHIFT;
cci.pcci_unified = 1;
} else {
if (cci.pcci_stride < ia64_cache_stride_shift)
ia64_cache_stride_shift = cci.pcci_stride;
line_size = 1 << cci.pcci_line_size;
if (line_size > max)
max = line_size;
}
line_size = 1 << cci.pcci_line_size;
if (line_size > max)
max = line_size;
if (!cci.pcci_unified) {
status = ia64_pal_cache_config_info(l,
/* cache_type (instruction)= */ 1,
&cci);
/* cache_type (instruction)=1*/
status = ia64_pal_cache_config_info(l, 1, &cci);
if (status != 0) {
printk(KERN_ERR
"%s: ia64_pal_cache_config_info(l=%lu, 1) failed (status=%ld)\n",
@@ -942,7 +958,7 @@ cpu_init (void)
}
#endif
get_max_cacheline_size();
get_cache_info();
/*
* We can't pass "local_cpu_data" to identify_cpu() because we haven't called