Merge tag 'powerpc-5.4-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux

Pull powerpc updates from Michael Ellerman:
 "This is a bit late, partly due to me travelling, and partly due to a
  power outage knocking out some of my test systems *while* I was
  travelling.

   - Initial support for running on a system with an Ultravisor, which
     is software that runs below the hypervisor and protects guests
     against some attacks by the hypervisor.

   - Support for building the kernel to run as a "Secure Virtual
     Machine", ie. as a guest capable of running on a system with an
     Ultravisor.

   - Some changes to our DMA code on bare metal, to allow devices with
     medium sized DMA masks (> 32 && < 59 bits) to use more than 2GB of
     DMA space.

   - Support for firmware assisted crash dumps on bare metal (powernv).

   - Two series fixing bugs in and refactoring our PCI EEH code.

   - A large series refactoring our exception entry code to use gas
     macros, both to make it more readable and also enable some future
     optimisations.

  As well as many cleanups and other minor features & fixups.

  Thanks to: Adam Zerella, Alexey Kardashevskiy, Alistair Popple, Andrew
  Donnellan, Aneesh Kumar K.V, Anju T Sudhakar, Anshuman Khandual,
  Balbir Singh, Benjamin Herrenschmidt, Cédric Le Goater, Christophe
  JAILLET, Christophe Leroy, Christopher M. Riedl, Christoph Hellwig,
  Claudio Carvalho, Daniel Axtens, David Gibson, David Hildenbrand,
  Desnes A. Nunes do Rosario, Ganesh Goudar, Gautham R. Shenoy, Greg
  Kurz, Guerney Hunt, Gustavo Romero, Halil Pasic, Hari Bathini, Joakim
  Tjernlund, Jonathan Neuschafer, Jordan Niethe, Leonardo Bras, Lianbo
  Jiang, Madhavan Srinivasan, Mahesh Salgaonkar, Mahesh Salgaonkar,
  Masahiro Yamada, Maxiwell S. Garcia, Michael Anderson, Nathan
  Chancellor, Nathan Lynch, Naveen N. Rao, Nicholas Piggin, Oliver
  O'Halloran, Qian Cai, Ram Pai, Ravi Bangoria, Reza Arbab, Ryan Grimm,
  Sam Bobroff, Santosh Sivaraj, Segher Boessenkool, Sukadev Bhattiprolu,
  Thiago Bauermann, Thiago Jung Bauermann, Thomas Gleixner, Tom
  Lendacky, Vasant Hegde"

* tag 'powerpc-5.4-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (264 commits)
  powerpc/mm/mce: Keep irqs disabled during lockless page table walk
  powerpc: Use ftrace_graph_ret_addr() when unwinding
  powerpc/ftrace: Enable HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
  ftrace: Look up the address of return_to_handler() using helpers
  powerpc: dump kernel log before carrying out fadump or kdump
  docs: powerpc: Add missing documentation reference
  powerpc/xmon: Fix output of XIVE IPI
  powerpc/xmon: Improve output of XIVE interrupts
  powerpc/mm/radix: remove useless kernel messages
  powerpc/fadump: support holes in kernel boot memory area
  powerpc/fadump: remove RMA_START and RMA_END macros
  powerpc/fadump: update documentation about option to release opalcore
  powerpc/fadump: consider f/w load area
  powerpc/opalcore: provide an option to invalidate /sys/firmware/opal/core file
  powerpc/opalcore: export /sys/firmware/opal/core for analysing opal crashes
  powerpc/fadump: update documentation about CONFIG_PRESERVE_FA_DUMP
  powerpc/fadump: add support to preserve crash data on FADUMP disabled kernel
  powerpc/fadump: improve how crashed kernel's memory is reserved
  powerpc/fadump: consider reserved ranges while releasing memory
  powerpc/fadump: make crash memory ranges array allocation generic
  ...
This commit is contained in:
Linus Torvalds
2019-09-20 11:48:06 -07:00
247 changed files with 9763 additions and 5475 deletions

View File

@@ -1 +1,2 @@
prom_init_check
vmlinux.lds

View File

@@ -52,7 +52,7 @@ obj-y := cputable.o ptrace.o syscalls.o \
of_platform.o prom_parse.o
obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \
signal_64.o ptrace32.o \
paca.o nvram_64.o firmware.o
paca.o nvram_64.o firmware.o note.o
obj-$(CONFIG_VDSO32) += vdso32/
obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
@@ -78,7 +78,9 @@ obj-$(CONFIG_EEH) += eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \
eeh_driver.o eeh_event.o eeh_sysfs.o
obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-$(CONFIG_FA_DUMP) += fadump.o
ifneq ($(CONFIG_FA_DUMP)$(CONFIG_PRESERVE_FA_DUMP),)
obj-y += fadump.o
endif
ifdef CONFIG_PPC32
obj-$(CONFIG_E500) += idle_e500.o
endif
@@ -155,6 +157,9 @@ endif
obj-$(CONFIG_EPAPR_PARAVIRT) += epapr_paravirt.o epapr_hcalls.o
obj-$(CONFIG_KVM_GUEST) += kvm.o kvm_emul.o
ifneq ($(CONFIG_PPC_POWERNV)$(CONFIG_PPC_SVM),)
obj-y += ucall.o
endif
# Disable GCOV, KCOV & sanitizers in odd or sensitive code
GCOV_PROFILE_prom_init.o := n
@@ -184,15 +189,13 @@ extra-$(CONFIG_ALTIVEC) += vector.o
extra-$(CONFIG_PPC64) += entry_64.o
extra-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_init.o
ifdef CONFIG_PPC_OF_BOOT_TRAMPOLINE
$(obj)/built-in.a: prom_init_check
extra-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_init_check
quiet_cmd_prom_init_check = CALL $<
cmd_prom_init_check = $(CONFIG_SHELL) $< "$(NM)" "$(obj)/prom_init.o"
quiet_cmd_prom_init_check = PROMCHK $@
cmd_prom_init_check = $(CONFIG_SHELL) $< "$(NM)" $(obj)/prom_init.o; touch $@
PHONY += prom_init_check
prom_init_check: $(src)/prom_init_check.sh $(obj)/prom_init.o
$(call cmd,prom_init_check)
endif
$(obj)/prom_init_check: $(src)/prom_init_check.sh $(obj)/prom_init.o FORCE
$(call if_changed,prom_init_check)
targets += prom_init_check
clean-files := vmlinux.lds

View File

@@ -506,6 +506,7 @@ int main(void)
OFFSET(KVM_VRMA_SLB_V, kvm, arch.vrma_slb_v);
OFFSET(KVM_RADIX, kvm, arch.radix);
OFFSET(KVM_FWNMI, kvm, arch.fwnmi_enabled);
OFFSET(KVM_SECURE_GUEST, kvm, arch.secure_guest);
OFFSET(VCPU_DSISR, kvm_vcpu, arch.shregs.dsisr);
OFFSET(VCPU_DAR, kvm_vcpu, arch.shregs.dar);
OFFSET(VCPU_VPA, kvm_vcpu, arch.vpa.pinned_addr);

View File

@@ -569,7 +569,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
#endif /* CONFIG_PPC_BOOK3S_64 */
#ifdef CONFIG_PPC32
#ifdef CONFIG_PPC_BOOK3S_32
#ifdef CONFIG_PPC_BOOK3S_601
{ /* 601 */
.pvr_mask = 0xffff0000,
.pvr_value = 0x00010000,
@@ -583,6 +583,8 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check = machine_check_generic,
.platform = "ppc601",
},
#endif /* CONFIG_PPC_BOOK3S_601 */
#ifdef CONFIG_PPC_BOOK3S_6xx
{ /* 603 */
.pvr_mask = 0xffff0000,
.pvr_value = 0x00030000,
@@ -1212,7 +1214,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check = machine_check_generic,
.platform = "ppc603",
},
#endif /* CONFIG_PPC_BOOK3S_32 */
#endif /* CONFIG_PPC_BOOK3S_6xx */
#ifdef CONFIG_PPC_8xx
{ /* 8xx */
.pvr_mask = 0xffff0000,

View File

@@ -122,18 +122,17 @@ int dma_iommu_dma_supported(struct device *dev, u64 mask)
{
struct iommu_table *tbl = get_iommu_table_base(dev);
if (!tbl) {
dev_info(dev, "Warning: IOMMU dma not supported: mask 0x%08llx"
", table unavailable\n", mask);
return 0;
}
if (dev_is_pci(dev) && dma_iommu_bypass_supported(dev, mask)) {
dev->archdata.iommu_bypass = true;
dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n");
return 1;
}
if (!tbl) {
dev_err(dev, "Warning: IOMMU dma not supported: mask 0x%08llx, table unavailable\n", mask);
return 0;
}
if (tbl->it_offset > (mask >> tbl->it_page_shift)) {
dev_info(dev, "Warning: IOMMU offset too big for device mask\n");
dev_info(dev, "mask: 0x%08llx, table offset: 0x%08lx\n",

View File

@@ -150,6 +150,16 @@ static int __init eeh_setup(char *str)
}
__setup("eeh=", eeh_setup);
void eeh_show_enabled(void)
{
if (eeh_has_flag(EEH_FORCE_DISABLED))
pr_info("EEH: Recovery disabled by kernel parameter.\n");
else if (eeh_has_flag(EEH_ENABLED))
pr_info("EEH: Capable adapter found: recovery enabled.\n");
else
pr_info("EEH: No capable adapters found: recovery disabled.\n");
}
/*
* This routine captures assorted PCI configuration space data
* for the indicated PCI device, and puts them into a buffer
@@ -410,11 +420,9 @@ static int eeh_phb_check_failure(struct eeh_pe *pe)
eeh_pe_mark_isolated(phb_pe);
eeh_serialize_unlock(flags);
pr_err("EEH: PHB#%x failure detected, location: %s\n",
pr_debug("EEH: PHB#%x failure detected, location: %s\n",
phb_pe->phb->global_number, eeh_pe_loc_get(phb_pe));
dump_stack();
eeh_send_failure_event(phb_pe);
return 1;
out:
eeh_serialize_unlock(flags);
@@ -441,7 +449,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
unsigned long flags;
struct device_node *dn;
struct pci_dev *dev;
struct eeh_pe *pe, *parent_pe, *phb_pe;
struct eeh_pe *pe, *parent_pe;
int rc = 0;
const char *location = NULL;
@@ -460,8 +468,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
/* Access to IO BARs might get this far and still not want checking. */
if (!pe) {
eeh_stats.ignored_check++;
pr_debug("EEH: Ignored check for %s\n",
eeh_pci_name(dev));
eeh_edev_dbg(edev, "Ignored check\n");
return 0;
}
@@ -501,12 +508,11 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
if (dn)
location = of_get_property(dn, "ibm,loc-code",
NULL);
printk(KERN_ERR "EEH: %d reads ignored for recovering device at "
"location=%s driver=%s pci addr=%s\n",
eeh_edev_err(edev, "%d reads ignored for recovering device at location=%s driver=%s\n",
pe->check_count,
location ? location : "unknown",
eeh_driver_name(dev), eeh_pci_name(dev));
printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n",
eeh_driver_name(dev));
eeh_edev_err(edev, "Might be infinite loop in %s driver\n",
eeh_driver_name(dev));
dump_stack();
}
@@ -573,13 +579,8 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
* a stack trace will help the device-driver authors figure
* out what happened. So print that out.
*/
phb_pe = eeh_phb_pe_get(pe->phb);
pr_err("EEH: Frozen PHB#%x-PE#%x detected\n",
pe->phb->global_number, pe->addr);
pr_err("EEH: PE location: %s, PHB location: %s\n",
eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe));
dump_stack();
pr_debug("EEH: %s: Frozen PHB#%x-PE#%x detected\n",
__func__, pe->phb->global_number, pe->addr);
eeh_send_failure_event(pe);
return 1;
@@ -697,7 +698,7 @@ int eeh_pci_enable(struct eeh_pe *pe, int function)
return rc;
}
static void *eeh_disable_and_save_dev_state(struct eeh_dev *edev,
static void eeh_disable_and_save_dev_state(struct eeh_dev *edev,
void *userdata)
{
struct pci_dev *pdev = eeh_dev_to_pci_dev(edev);
@@ -708,7 +709,7 @@ static void *eeh_disable_and_save_dev_state(struct eeh_dev *edev,
* state for the specified device
*/
if (!pdev || pdev == dev)
return NULL;
return;
/* Ensure we have D0 power state */
pci_set_power_state(pdev, PCI_D0);
@@ -721,18 +722,16 @@ static void *eeh_disable_and_save_dev_state(struct eeh_dev *edev,
* interrupt from the device
*/
pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE);
return NULL;
}
static void *eeh_restore_dev_state(struct eeh_dev *edev, void *userdata)
static void eeh_restore_dev_state(struct eeh_dev *edev, void *userdata)
{
struct pci_dn *pdn = eeh_dev_to_pdn(edev);
struct pci_dev *pdev = eeh_dev_to_pci_dev(edev);
struct pci_dev *dev = userdata;
if (!pdev)
return NULL;
return;
/* Apply customization from firmware */
if (pdn && eeh_ops->restore_config)
@@ -741,8 +740,6 @@ static void *eeh_restore_dev_state(struct eeh_dev *edev, void *userdata)
/* The caller should restore state for the specified device */
if (pdev != dev)
pci_restore_state(pdev);
return NULL;
}
int eeh_restore_vf_config(struct pci_dn *pdn)
@@ -868,7 +865,7 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat
* the indicated device and its children so that the bunch of the
* devices could be reset properly.
*/
static void *eeh_set_dev_freset(struct eeh_dev *edev, void *flag)
static void eeh_set_dev_freset(struct eeh_dev *edev, void *flag)
{
struct pci_dev *dev;
unsigned int *freset = (unsigned int *)flag;
@@ -876,8 +873,6 @@ static void *eeh_set_dev_freset(struct eeh_dev *edev, void *flag)
dev = eeh_dev_to_pci_dev(edev);
if (dev)
*freset |= dev->needs_freset;
return NULL;
}
static void eeh_pe_refreeze_passed(struct eeh_pe *root)
@@ -1063,23 +1058,6 @@ static struct notifier_block eeh_reboot_nb = {
.notifier_call = eeh_reboot_notifier,
};
void eeh_probe_devices(void)
{
struct pci_controller *hose, *tmp;
struct pci_dn *pdn;
/* Enable EEH for all adapters */
list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
pdn = hose->pci_data;
traverse_pci_dn(pdn, eeh_ops->probe, NULL);
}
if (eeh_enabled())
pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n");
else
pr_info("EEH: No capable adapters found\n");
}
/**
* eeh_init - EEH initialization
*
@@ -1120,6 +1098,8 @@ static int eeh_init(void)
list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
eeh_dev_phb_init_dynamic(hose);
eeh_addr_cache_init();
/* Initialize EEH event */
return eeh_event_init();
}
@@ -1190,15 +1170,14 @@ void eeh_add_device_late(struct pci_dev *dev)
struct pci_dn *pdn;
struct eeh_dev *edev;
if (!dev || !eeh_enabled())
if (!dev)
return;
pr_debug("EEH: Adding device %s\n", pci_name(dev));
pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn);
edev = pdn_to_eeh_dev(pdn);
eeh_edev_dbg(edev, "Adding device\n");
if (edev->pdev == dev) {
pr_debug("EEH: Already referenced !\n");
eeh_edev_dbg(edev, "Device already referenced!\n");
return;
}
@@ -1246,6 +1225,8 @@ void eeh_add_device_tree_late(struct pci_bus *bus)
{
struct pci_dev *dev;
if (eeh_has_flag(EEH_FORCE_DISABLED))
return;
list_for_each_entry(dev, &bus->devices, bus_list) {
eeh_add_device_late(dev);
if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
@@ -1299,10 +1280,10 @@ void eeh_remove_device(struct pci_dev *dev)
edev = pci_dev_to_eeh_dev(dev);
/* Unregister the device with the EEH/PCI address search system */
pr_debug("EEH: Removing device %s\n", pci_name(dev));
dev_dbg(&dev->dev, "EEH: Removing device\n");
if (!edev || !edev->pdev || !edev->pe) {
pr_debug("EEH: Not referenced !\n");
dev_dbg(&dev->dev, "EEH: Device not referenced!\n");
return;
}
@@ -1890,6 +1871,198 @@ static const struct file_operations eeh_force_recover_fops = {
.llseek = no_llseek,
.write = eeh_force_recover_write,
};
static ssize_t eeh_debugfs_dev_usage(struct file *filp,
char __user *user_buf,
size_t count, loff_t *ppos)
{
static const char usage[] = "input format: <domain>:<bus>:<dev>.<fn>\n";
return simple_read_from_buffer(user_buf, count, ppos,
usage, sizeof(usage) - 1);
}
static ssize_t eeh_dev_check_write(struct file *filp,
const char __user *user_buf,
size_t count, loff_t *ppos)
{
uint32_t domain, bus, dev, fn;
struct pci_dev *pdev;
struct eeh_dev *edev;
char buf[20];
int ret;
memset(buf, 0, sizeof(buf));
ret = simple_write_to_buffer(buf, sizeof(buf)-1, ppos, user_buf, count);
if (!ret)
return -EFAULT;
ret = sscanf(buf, "%x:%x:%x.%x", &domain, &bus, &dev, &fn);
if (ret != 4) {
pr_err("%s: expected 4 args, got %d\n", __func__, ret);
return -EINVAL;
}
pdev = pci_get_domain_bus_and_slot(domain, bus, (dev << 3) | fn);
if (!pdev)
return -ENODEV;
edev = pci_dev_to_eeh_dev(pdev);
if (!edev) {
pci_err(pdev, "No eeh_dev for this device!\n");
pci_dev_put(pdev);
return -ENODEV;
}
ret = eeh_dev_check_failure(edev);
pci_info(pdev, "eeh_dev_check_failure(%04x:%02x:%02x.%01x) = %d\n",
domain, bus, dev, fn, ret);
pci_dev_put(pdev);
return count;
}
static const struct file_operations eeh_dev_check_fops = {
.open = simple_open,
.llseek = no_llseek,
.write = eeh_dev_check_write,
.read = eeh_debugfs_dev_usage,
};
static int eeh_debugfs_break_device(struct pci_dev *pdev)
{
struct resource *bar = NULL;
void __iomem *mapped;
u16 old, bit;
int i, pos;
/* Do we have an MMIO BAR to disable? */
for (i = 0; i <= PCI_STD_RESOURCE_END; i++) {
struct resource *r = &pdev->resource[i];
if (!r->flags || !r->start)
continue;
if (r->flags & IORESOURCE_IO)
continue;
if (r->flags & IORESOURCE_UNSET)
continue;
bar = r;
break;
}
if (!bar) {
pci_err(pdev, "Unable to find Memory BAR to cause EEH with\n");
return -ENXIO;
}
pci_err(pdev, "Going to break: %pR\n", bar);
if (pdev->is_virtfn) {
#ifndef CONFIG_IOV
return -ENXIO;
#else
/*
* VFs don't have a per-function COMMAND register, so the best
* we can do is clear the Memory Space Enable bit in the PF's
* SRIOV control reg.
*
* Unfortunately, this requires that we have a PF (i.e doesn't
* work for a passed-through VF) and it has the potential side
* effect of also causing an EEH on every other VF under the
* PF. Oh well.
*/
pdev = pdev->physfn;
if (!pdev)
return -ENXIO; /* passed through VFs have no PF */
pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV);
pos += PCI_SRIOV_CTRL;
bit = PCI_SRIOV_CTRL_MSE;
#endif /* !CONFIG_IOV */
} else {
bit = PCI_COMMAND_MEMORY;
pos = PCI_COMMAND;
}
/*
* Process here is:
*
* 1. Disable Memory space.
*
* 2. Perform an MMIO to the device. This should result in an error
* (CA / UR) being raised by the device which results in an EEH
* PE freeze. Using the in_8() accessor skips the eeh detection hook
* so the freeze hook so the EEH Detection machinery won't be
* triggered here. This is to match the usual behaviour of EEH
* where the HW will asyncronously freeze a PE and it's up to
* the kernel to notice and deal with it.
*
* 3. Turn Memory space back on. This is more important for VFs
* since recovery will probably fail if we don't. For normal
* the COMMAND register is reset as a part of re-initialising
* the device.
*
* Breaking stuff is the point so who cares if it's racy ;)
*/
pci_read_config_word(pdev, pos, &old);
mapped = ioremap(bar->start, PAGE_SIZE);
if (!mapped) {
pci_err(pdev, "Unable to map MMIO BAR %pR\n", bar);
return -ENXIO;
}
pci_write_config_word(pdev, pos, old & ~bit);
in_8(mapped);
pci_write_config_word(pdev, pos, old);
iounmap(mapped);
return 0;
}
static ssize_t eeh_dev_break_write(struct file *filp,
const char __user *user_buf,
size_t count, loff_t *ppos)
{
uint32_t domain, bus, dev, fn;
struct pci_dev *pdev;
char buf[20];
int ret;
memset(buf, 0, sizeof(buf));
ret = simple_write_to_buffer(buf, sizeof(buf)-1, ppos, user_buf, count);
if (!ret)
return -EFAULT;
ret = sscanf(buf, "%x:%x:%x.%x", &domain, &bus, &dev, &fn);
if (ret != 4) {
pr_err("%s: expected 4 args, got %d\n", __func__, ret);
return -EINVAL;
}
pdev = pci_get_domain_bus_and_slot(domain, bus, (dev << 3) | fn);
if (!pdev)
return -ENODEV;
ret = eeh_debugfs_break_device(pdev);
pci_dev_put(pdev);
if (ret < 0)
return ret;
return count;
}
static const struct file_operations eeh_dev_break_fops = {
.open = simple_open,
.llseek = no_llseek,
.write = eeh_dev_break_write,
.read = eeh_debugfs_dev_usage,
};
#endif
static int __init eeh_init_proc(void)
@@ -1905,6 +2078,12 @@ static int __init eeh_init_proc(void)
debugfs_create_bool("eeh_disable_recovery", 0600,
powerpc_debugfs_root,
&eeh_debugfs_no_recover);
debugfs_create_file_unsafe("eeh_dev_check", 0600,
powerpc_debugfs_root, NULL,
&eeh_dev_check_fops);
debugfs_create_file_unsafe("eeh_dev_break", 0600,
powerpc_debugfs_root, NULL,
&eeh_dev_break_fops);
debugfs_create_file_unsafe("eeh_force_recover", 0600,
powerpc_debugfs_root, NULL,
&eeh_force_recover_fops);

View File

@@ -148,8 +148,8 @@ eeh_addr_cache_insert(struct pci_dev *dev, resource_size_t alo,
piar->pcidev = dev;
piar->flags = flags;
pr_debug("PIAR: insert range=[%pap:%pap] dev=%s\n",
&alo, &ahi, pci_name(dev));
eeh_edev_dbg(piar->edev, "PIAR: insert range=[%pap:%pap]\n",
&alo, &ahi);
rb_link_node(&piar->rb_node, parent, p);
rb_insert_color(&piar->rb_node, &pci_io_addr_cache_root.rb_root);
@@ -229,8 +229,8 @@ restart:
piar = rb_entry(n, struct pci_io_addr_range, rb_node);
if (piar->pcidev == dev) {
pr_debug("PIAR: remove range=[%pap:%pap] dev=%s\n",
&piar->addr_lo, &piar->addr_hi, pci_name(dev));
eeh_edev_dbg(piar->edev, "PIAR: remove range=[%pap:%pap]\n",
&piar->addr_lo, &piar->addr_hi);
rb_erase(n, &pci_io_addr_cache_root.rb_root);
kfree(piar);
goto restart;
@@ -258,37 +258,14 @@ void eeh_addr_cache_rmv_dev(struct pci_dev *dev)
}
/**
* eeh_addr_cache_build - Build a cache of I/O addresses
* eeh_addr_cache_init - Initialize a cache of I/O addresses
*
* Build a cache of pci i/o addresses. This cache will be used to
* Initialize a cache of pci i/o addresses. This cache will be used to
* find the pci device that corresponds to a given address.
* This routine scans all pci busses to build the cache.
* Must be run late in boot process, after the pci controllers
* have been scanned for devices (after all device resources are known).
*/
void eeh_addr_cache_build(void)
void eeh_addr_cache_init(void)
{
struct pci_dn *pdn;
struct eeh_dev *edev;
struct pci_dev *dev = NULL;
spin_lock_init(&pci_io_addr_cache_root.piar_lock);
for_each_pci_dev(dev) {
pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn);
if (!pdn)
continue;
edev = pdn_to_eeh_dev(pdn);
if (!edev)
continue;
dev->dev.archdata.edev = edev;
edev->pdev = dev;
eeh_addr_cache_insert_dev(dev);
eeh_sysfs_add_device(dev);
}
}
static int eeh_addr_cache_show(struct seq_file *s, void *v)

View File

@@ -47,6 +47,8 @@ struct eeh_dev *eeh_dev_init(struct pci_dn *pdn)
/* Associate EEH device with OF node */
pdn->edev = edev;
edev->pdn = pdn;
edev->bdfn = (pdn->busno << 8) | pdn->devfn;
edev->controller = pdn->phb;
return edev;
}

View File

@@ -27,6 +27,7 @@
#include <linux/irq.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/pci_hotplug.h>
#include <asm/eeh.h>
#include <asm/eeh_event.h>
#include <asm/ppc-pci.h>
@@ -81,23 +82,6 @@ static const char *pci_ers_result_name(enum pci_ers_result result)
}
};
static __printf(2, 3) void eeh_edev_info(const struct eeh_dev *edev,
const char *fmt, ...)
{
struct va_format vaf;
va_list args;
va_start(args, fmt);
vaf.fmt = fmt;
vaf.va = &args;
printk(KERN_INFO "EEH: PE#%x (PCI %s): %pV\n", edev->pe_config_addr,
edev->pdev ? dev_name(&edev->pdev->dev) : "none", &vaf);
va_end(args);
}
static enum pci_ers_result pci_ers_merge_result(enum pci_ers_result old,
enum pci_ers_result new)
{
@@ -113,8 +97,16 @@ static bool eeh_dev_removed(struct eeh_dev *edev)
static bool eeh_edev_actionable(struct eeh_dev *edev)
{
return (edev->pdev && !eeh_dev_removed(edev) &&
!eeh_pe_passed(edev->pe));
if (!edev->pdev)
return false;
if (edev->pdev->error_state == pci_channel_io_perm_failure)
return false;
if (eeh_dev_removed(edev))
return false;
if (eeh_pe_passed(edev->pe))
return false;
return true;
}
/**
@@ -214,12 +206,12 @@ static void eeh_enable_irq(struct eeh_dev *edev)
}
}
static void *eeh_dev_save_state(struct eeh_dev *edev, void *userdata)
static void eeh_dev_save_state(struct eeh_dev *edev, void *userdata)
{
struct pci_dev *pdev;
if (!edev)
return NULL;
return;
/*
* We cannot access the config space on some adapters.
@@ -229,14 +221,13 @@ static void *eeh_dev_save_state(struct eeh_dev *edev, void *userdata)
* device is created.
*/
if (edev->pe && (edev->pe->state & EEH_PE_CFG_RESTRICTED))
return NULL;
return;
pdev = eeh_dev_to_pci_dev(edev);
if (!pdev)
return NULL;
return;
pci_save_state(pdev);
return NULL;
}
static void eeh_set_channel_state(struct eeh_pe *root, enum pci_channel_state s)
@@ -274,20 +265,27 @@ static void eeh_set_irq_state(struct eeh_pe *root, bool enable)
}
typedef enum pci_ers_result (*eeh_report_fn)(struct eeh_dev *,
struct pci_dev *,
struct pci_driver *);
static void eeh_pe_report_edev(struct eeh_dev *edev, eeh_report_fn fn,
enum pci_ers_result *result)
{
struct pci_dev *pdev;
struct pci_driver *driver;
enum pci_ers_result new_result;
if (!edev->pdev) {
pci_lock_rescan_remove();
pdev = edev->pdev;
if (pdev)
get_device(&pdev->dev);
pci_unlock_rescan_remove();
if (!pdev) {
eeh_edev_info(edev, "no device");
return;
}
device_lock(&edev->pdev->dev);
device_lock(&pdev->dev);
if (eeh_edev_actionable(edev)) {
driver = eeh_pcid_get(edev->pdev);
driver = eeh_pcid_get(pdev);
if (!driver)
eeh_edev_info(edev, "no driver");
@@ -296,7 +294,7 @@ static void eeh_pe_report_edev(struct eeh_dev *edev, eeh_report_fn fn,
else if (edev->mode & EEH_DEV_NO_HANDLER)
eeh_edev_info(edev, "driver bound too late");
else {
new_result = fn(edev, driver);
new_result = fn(edev, pdev, driver);
eeh_edev_info(edev, "%s driver reports: '%s'",
driver->name,
pci_ers_result_name(new_result));
@@ -305,12 +303,15 @@ static void eeh_pe_report_edev(struct eeh_dev *edev, eeh_report_fn fn,
new_result);
}
if (driver)
eeh_pcid_put(edev->pdev);
eeh_pcid_put(pdev);
} else {
eeh_edev_info(edev, "not actionable (%d,%d,%d)", !!edev->pdev,
eeh_edev_info(edev, "not actionable (%d,%d,%d)", !!pdev,
!eeh_dev_removed(edev), !eeh_pe_passed(edev->pe));
}
device_unlock(&edev->pdev->dev);
device_unlock(&pdev->dev);
if (edev->pdev != pdev)
eeh_edev_warn(edev, "Device changed during processing!\n");
put_device(&pdev->dev);
}
static void eeh_pe_report(const char *name, struct eeh_pe *root,
@@ -337,20 +338,20 @@ static void eeh_pe_report(const char *name, struct eeh_pe *root,
* Report an EEH error to each device driver.
*/
static enum pci_ers_result eeh_report_error(struct eeh_dev *edev,
struct pci_dev *pdev,
struct pci_driver *driver)
{
enum pci_ers_result rc;
struct pci_dev *dev = edev->pdev;
if (!driver->err_handler->error_detected)
return PCI_ERS_RESULT_NONE;
eeh_edev_info(edev, "Invoking %s->error_detected(IO frozen)",
driver->name);
rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen);
rc = driver->err_handler->error_detected(pdev, pci_channel_io_frozen);
edev->in_error = true;
pci_uevent_ers(dev, PCI_ERS_RESULT_NONE);
pci_uevent_ers(pdev, PCI_ERS_RESULT_NONE);
return rc;
}
@@ -363,12 +364,13 @@ static enum pci_ers_result eeh_report_error(struct eeh_dev *edev,
* are now enabled.
*/
static enum pci_ers_result eeh_report_mmio_enabled(struct eeh_dev *edev,
struct pci_dev *pdev,
struct pci_driver *driver)
{
if (!driver->err_handler->mmio_enabled)
return PCI_ERS_RESULT_NONE;
eeh_edev_info(edev, "Invoking %s->mmio_enabled()", driver->name);
return driver->err_handler->mmio_enabled(edev->pdev);
return driver->err_handler->mmio_enabled(pdev);
}
/**
@@ -382,20 +384,21 @@ static enum pci_ers_result eeh_report_mmio_enabled(struct eeh_dev *edev,
* driver can work again while the device is recovered.
*/
static enum pci_ers_result eeh_report_reset(struct eeh_dev *edev,
struct pci_dev *pdev,
struct pci_driver *driver)
{
if (!driver->err_handler->slot_reset || !edev->in_error)
return PCI_ERS_RESULT_NONE;
eeh_edev_info(edev, "Invoking %s->slot_reset()", driver->name);
return driver->err_handler->slot_reset(edev->pdev);
return driver->err_handler->slot_reset(pdev);
}
static void *eeh_dev_restore_state(struct eeh_dev *edev, void *userdata)
static void eeh_dev_restore_state(struct eeh_dev *edev, void *userdata)
{
struct pci_dev *pdev;
if (!edev)
return NULL;
return;
/*
* The content in the config space isn't saved because
@@ -407,15 +410,14 @@ static void *eeh_dev_restore_state(struct eeh_dev *edev, void *userdata)
if (list_is_last(&edev->entry, &edev->pe->edevs))
eeh_pe_restore_bars(edev->pe);
return NULL;
return;
}
pdev = eeh_dev_to_pci_dev(edev);
if (!pdev)
return NULL;
return;
pci_restore_state(pdev);
return NULL;
}
/**
@@ -428,13 +430,14 @@ static void *eeh_dev_restore_state(struct eeh_dev *edev, void *userdata)
* to make the recovered device work again.
*/
static enum pci_ers_result eeh_report_resume(struct eeh_dev *edev,
struct pci_dev *pdev,
struct pci_driver *driver)
{
if (!driver->err_handler->resume || !edev->in_error)
return PCI_ERS_RESULT_NONE;
eeh_edev_info(edev, "Invoking %s->resume()", driver->name);
driver->err_handler->resume(edev->pdev);
driver->err_handler->resume(pdev);
pci_uevent_ers(edev->pdev, PCI_ERS_RESULT_RECOVERED);
#ifdef CONFIG_PCI_IOV
@@ -453,6 +456,7 @@ static enum pci_ers_result eeh_report_resume(struct eeh_dev *edev,
* dead, and that no further recovery attempts will be made on it.
*/
static enum pci_ers_result eeh_report_failure(struct eeh_dev *edev,
struct pci_dev *pdev,
struct pci_driver *driver)
{
enum pci_ers_result rc;
@@ -462,10 +466,10 @@ static enum pci_ers_result eeh_report_failure(struct eeh_dev *edev,
eeh_edev_info(edev, "Invoking %s->error_detected(permanent failure)",
driver->name);
rc = driver->err_handler->error_detected(edev->pdev,
rc = driver->err_handler->error_detected(pdev,
pci_channel_io_perm_failure);
pci_uevent_ers(edev->pdev, PCI_ERS_RESULT_DISCONNECT);
pci_uevent_ers(pdev, PCI_ERS_RESULT_DISCONNECT);
return rc;
}
@@ -473,12 +477,9 @@ static void *eeh_add_virt_device(struct eeh_dev *edev)
{
struct pci_driver *driver;
struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
struct pci_dn *pdn = eeh_dev_to_pdn(edev);
if (!(edev->physfn)) {
pr_warn("%s: EEH dev %04x:%02x:%02x.%01x not for VF\n",
__func__, pdn->phb->global_number, pdn->busno,
PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
eeh_edev_warn(edev, "Not for VF\n");
return NULL;
}
@@ -492,12 +493,12 @@ static void *eeh_add_virt_device(struct eeh_dev *edev)
}
#ifdef CONFIG_PCI_IOV
pci_iov_add_virtfn(edev->physfn, pdn->vf_index);
pci_iov_add_virtfn(edev->physfn, eeh_dev_to_pdn(edev)->vf_index);
#endif
return NULL;
}
static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata)
static void eeh_rmv_device(struct eeh_dev *edev, void *userdata)
{
struct pci_driver *driver;
struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
@@ -512,7 +513,7 @@ static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata)
*/
if (!eeh_edev_actionable(edev) ||
(dev->hdr_type == PCI_HEADER_TYPE_BRIDGE))
return NULL;
return;
if (rmv_data) {
driver = eeh_pcid_get(dev);
@@ -521,7 +522,7 @@ static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata)
driver->err_handler->error_detected &&
driver->err_handler->slot_reset) {
eeh_pcid_put(dev);
return NULL;
return;
}
eeh_pcid_put(dev);
}
@@ -554,8 +555,6 @@ static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata)
pci_stop_and_remove_bus_device(dev);
pci_unlock_rescan_remove();
}
return NULL;
}
static void *eeh_pe_detach_dev(struct eeh_pe *pe, void *userdata)
@@ -744,6 +743,99 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
*/
#define MAX_WAIT_FOR_RECOVERY 300
/* Walks the PE tree after processing an event to remove any stale PEs.
*
* NB: This needs to be recursive to ensure the leaf PEs get removed
* before their parents do. Although this is possible to do recursively
* we don't since this is easier to read and we need to garantee
* the leaf nodes will be handled first.
*/
static void eeh_pe_cleanup(struct eeh_pe *pe)
{
struct eeh_pe *child_pe, *tmp;
list_for_each_entry_safe(child_pe, tmp, &pe->child_list, child)
eeh_pe_cleanup(child_pe);
if (pe->state & EEH_PE_KEEP)
return;
if (!(pe->state & EEH_PE_INVALID))
return;
if (list_empty(&pe->edevs) && list_empty(&pe->child_list)) {
list_del(&pe->child);
kfree(pe);
}
}
/**
* eeh_check_slot_presence - Check if a device is still present in a slot
* @pdev: pci_dev to check
*
* This function may return a false positive if we can't determine the slot's
* presence state. This might happen for for PCIe slots if the PE containing
* the upstream bridge is also frozen, or the bridge is part of the same PE
* as the device.
*
* This shouldn't happen often, but you might see it if you hotplug a PCIe
* switch.
*/
static bool eeh_slot_presence_check(struct pci_dev *pdev)
{
const struct hotplug_slot_ops *ops;
struct pci_slot *slot;
u8 state;
int rc;
if (!pdev)
return false;
if (pdev->error_state == pci_channel_io_perm_failure)
return false;
slot = pdev->slot;
if (!slot || !slot->hotplug)
return true;
ops = slot->hotplug->ops;
if (!ops || !ops->get_adapter_status)
return true;
/* set the attention indicator while we've got the slot ops */
if (ops->set_attention_status)
ops->set_attention_status(slot->hotplug, 1);
rc = ops->get_adapter_status(slot->hotplug, &state);
if (rc)
return true;
return !!state;
}
static void eeh_clear_slot_attention(struct pci_dev *pdev)
{
const struct hotplug_slot_ops *ops;
struct pci_slot *slot;
if (!pdev)
return;
if (pdev->error_state == pci_channel_io_perm_failure)
return;
slot = pdev->slot;
if (!slot || !slot->hotplug)
return;
ops = slot->hotplug->ops;
if (!ops || !ops->set_attention_status)
return;
ops->set_attention_status(slot->hotplug, 0);
}
/**
* eeh_handle_normal_event - Handle EEH events on a specific PE
* @pe: EEH PE - which should not be used after we return, as it may
@@ -774,6 +866,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
enum pci_ers_result result = PCI_ERS_RESULT_NONE;
struct eeh_rmv_data rmv_data =
{LIST_HEAD_INIT(rmv_data.removed_vf_list), 0};
int devices = 0;
bus = eeh_pe_bus_get(pe);
if (!bus) {
@@ -782,7 +875,59 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
return;
}
eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
/*
* When devices are hot-removed we might get an EEH due to
* a driver attempting to touch the MMIO space of a removed
* device. In this case we don't have a device to recover
* so suppress the event if we can't find any present devices.
*
* The hotplug driver should take care of tearing down the
* device itself.
*/
eeh_for_each_pe(pe, tmp_pe)
eeh_pe_for_each_dev(tmp_pe, edev, tmp)
if (eeh_slot_presence_check(edev->pdev))
devices++;
if (!devices) {
pr_debug("EEH: Frozen PHB#%x-PE#%x is empty!\n",
pe->phb->global_number, pe->addr);
goto out; /* nothing to recover */
}
/* Log the event */
if (pe->type & EEH_PE_PHB) {
pr_err("EEH: PHB#%x failure detected, location: %s\n",
pe->phb->global_number, eeh_pe_loc_get(pe));
} else {
struct eeh_pe *phb_pe = eeh_phb_pe_get(pe->phb);
pr_err("EEH: Frozen PHB#%x-PE#%x detected\n",
pe->phb->global_number, pe->addr);
pr_err("EEH: PE location: %s, PHB location: %s\n",
eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe));
}
#ifdef CONFIG_STACKTRACE
/*
* Print the saved stack trace now that we've verified there's
* something to recover.
*/
if (pe->trace_entries) {
void **ptrs = (void **) pe->stack_trace;
int i;
pr_err("EEH: Frozen PHB#%x-PE#%x detected\n",
pe->phb->global_number, pe->addr);
/* FIXME: Use the same format as dump_stack() */
pr_err("EEH: Call Trace:\n");
for (i = 0; i < pe->trace_entries; i++)
pr_err("EEH: [%pK] %pS\n", ptrs[i], ptrs[i]);
pe->trace_entries = 0;
}
#endif /* CONFIG_STACKTRACE */
eeh_pe_update_time_stamp(pe);
pe->freeze_count++;
@@ -793,6 +938,10 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
result = PCI_ERS_RESULT_DISCONNECT;
}
eeh_for_each_pe(pe, tmp_pe)
eeh_pe_for_each_dev(tmp_pe, edev, tmp)
edev->mode &= ~EEH_DEV_NO_HANDLER;
/* Walk the various device drivers attached to this slot through
* a reset sequence, giving each an opportunity to do what it needs
* to accomplish the reset. Each child gets a report of the
@@ -969,6 +1118,19 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
return;
}
}
out:
/*
* Clean up any PEs without devices. While marked as EEH_PE_RECOVERYING
* we don't want to modify the PE tree structure so we do it here.
*/
eeh_pe_cleanup(pe);
/* clear the slot attention LED for all recovered devices */
eeh_for_each_pe(pe, tmp_pe)
eeh_pe_for_each_dev(tmp_pe, edev, tmp)
eeh_clear_slot_attention(edev->pdev);
eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true);
}
@@ -981,7 +1143,8 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
*/
void eeh_handle_special_event(void)
{
struct eeh_pe *pe, *phb_pe;
struct eeh_pe *pe, *phb_pe, *tmp_pe;
struct eeh_dev *edev, *tmp_edev;
struct pci_bus *bus;
struct pci_controller *hose;
unsigned long flags;
@@ -1040,6 +1203,7 @@ void eeh_handle_special_event(void)
*/
if (rc == EEH_NEXT_ERR_FROZEN_PE ||
rc == EEH_NEXT_ERR_FENCED_PHB) {
eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
eeh_handle_normal_event(pe);
} else {
pci_lock_rescan_remove();
@@ -1050,6 +1214,10 @@ void eeh_handle_special_event(void)
(phb_pe->state & EEH_PE_RECOVERING))
continue;
eeh_for_each_pe(pe, tmp_pe)
eeh_pe_for_each_dev(tmp_pe, edev, tmp_edev)
edev->mode &= ~EEH_DEV_NO_HANDLER;
/* Notify all devices to be down */
eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true);
eeh_set_channel_state(pe, pci_channel_io_perm_failure);

View File

@@ -40,7 +40,6 @@ static int eeh_event_handler(void * dummy)
{
unsigned long flags;
struct eeh_event *event;
struct eeh_pe *pe;
while (!kthread_should_stop()) {
if (wait_for_completion_interruptible(&eeh_eventlist_event))
@@ -59,19 +58,10 @@ static int eeh_event_handler(void * dummy)
continue;
/* We might have event without binding PE */
pe = event->pe;
if (pe) {
if (pe->type & EEH_PE_PHB)
pr_info("EEH: Detected error on PHB#%x\n",
pe->phb->global_number);
else
pr_info("EEH: Detected PCI bus error on "
"PHB#%x-PE#%x\n",
pe->phb->global_number, pe->addr);
eeh_handle_normal_event(pe);
} else {
if (event->pe)
eeh_handle_normal_event(event->pe);
else
eeh_handle_special_event();
}
kfree(event);
}
@@ -121,6 +111,24 @@ int __eeh_send_failure_event(struct eeh_pe *pe)
}
event->pe = pe;
/*
* Mark the PE as recovering before inserting it in the queue.
* This prevents the PE from being free()ed by a hotplug driver
* while the PE is sitting in the event queue.
*/
if (pe) {
#ifdef CONFIG_STACKTRACE
/*
* Save the current stack trace so we can dump it from the
* event handler thread.
*/
pe->trace_entries = stack_trace_save(pe->stack_trace,
ARRAY_SIZE(pe->stack_trace), 0);
#endif /* CONFIG_STACKTRACE */
eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
}
/* We may or may not be called in an interrupt context */
spin_lock_irqsave(&eeh_eventlist_lock, flags);
list_add(&event->list, &eeh_eventlist);

View File

@@ -231,29 +231,22 @@ void *eeh_pe_traverse(struct eeh_pe *root,
* The function is used to traverse the devices of the specified
* PE and its child PEs.
*/
void *eeh_pe_dev_traverse(struct eeh_pe *root,
void eeh_pe_dev_traverse(struct eeh_pe *root,
eeh_edev_traverse_func fn, void *flag)
{
struct eeh_pe *pe;
struct eeh_dev *edev, *tmp;
void *ret;
if (!root) {
pr_warn("%s: Invalid PE %p\n",
__func__, root);
return NULL;
return;
}
/* Traverse root PE */
eeh_for_each_pe(root, pe) {
eeh_pe_for_each_dev(pe, edev, tmp) {
ret = fn(edev, flag);
if (ret)
return ret;
}
}
return NULL;
eeh_for_each_pe(root, pe)
eeh_pe_for_each_dev(pe, edev, tmp)
fn(edev, flag);
}
/**
@@ -379,8 +372,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
/* Check if the PE number is valid */
if (!eeh_has_flag(EEH_VALID_PE_ZERO) && !edev->pe_config_addr) {
pr_err("%s: Invalid PE#0 for edev 0x%x on PHB#%x\n",
__func__, config_addr, pdn->phb->global_number);
eeh_edev_err(edev, "PE#0 is invalid for this PHB!\n");
return -EINVAL;
}
@@ -391,42 +383,34 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
* components.
*/
pe = eeh_pe_get(pdn->phb, edev->pe_config_addr, config_addr);
if (pe && !(pe->type & EEH_PE_INVALID)) {
/* Mark the PE as type of PCI bus */
pe->type = EEH_PE_BUS;
edev->pe = pe;
if (pe) {
if (pe->type & EEH_PE_INVALID) {
list_add_tail(&edev->entry, &pe->edevs);
edev->pe = pe;
/*
* We're running to here because of PCI hotplug caused by
* EEH recovery. We need clear EEH_PE_INVALID until the top.
*/
parent = pe;
while (parent) {
if (!(parent->type & EEH_PE_INVALID))
break;
parent->type &= ~EEH_PE_INVALID;
parent = parent->parent;
}
/* Put the edev to PE */
list_add_tail(&edev->entry, &pe->edevs);
pr_debug("EEH: Add %04x:%02x:%02x.%01x to Bus PE#%x\n",
pdn->phb->global_number,
pdn->busno,
PCI_SLOT(pdn->devfn),
PCI_FUNC(pdn->devfn),
pe->addr);
return 0;
} else if (pe && (pe->type & EEH_PE_INVALID)) {
list_add_tail(&edev->entry, &pe->edevs);
edev->pe = pe;
/*
* We're running to here because of PCI hotplug caused by
* EEH recovery. We need clear EEH_PE_INVALID until the top.
*/
parent = pe;
while (parent) {
if (!(parent->type & EEH_PE_INVALID))
break;
parent->type &= ~EEH_PE_INVALID;
parent = parent->parent;
eeh_edev_dbg(edev,
"Added to device PE (parent: PE#%x)\n",
pe->parent->addr);
} else {
/* Mark the PE as type of PCI bus */
pe->type = EEH_PE_BUS;
edev->pe = pe;
/* Put the edev to PE */
list_add_tail(&edev->entry, &pe->edevs);
eeh_edev_dbg(edev, "Added to bus PE\n");
}
pr_debug("EEH: Add %04x:%02x:%02x.%01x to Device "
"PE#%x, Parent PE#%x\n",
pdn->phb->global_number,
pdn->busno,
PCI_SLOT(pdn->devfn),
PCI_FUNC(pdn->devfn),
pe->addr, pe->parent->addr);
return 0;
}
@@ -468,13 +452,8 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
list_add_tail(&pe->child, &parent->child_list);
list_add_tail(&edev->entry, &pe->edevs);
edev->pe = pe;
pr_debug("EEH: Add %04x:%02x:%02x.%01x to "
"Device PE#%x, Parent PE#%x\n",
pdn->phb->global_number,
pdn->busno,
PCI_SLOT(pdn->devfn),
PCI_FUNC(pdn->devfn),
pe->addr, pe->parent->addr);
eeh_edev_dbg(edev, "Added to device PE (parent: PE#%x)\n",
pe->parent->addr);
return 0;
}
@@ -491,16 +470,12 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
{
struct eeh_pe *pe, *parent, *child;
bool keep, recover;
int cnt;
struct pci_dn *pdn = eeh_dev_to_pdn(edev);
pe = eeh_dev_to_pe(edev);
if (!pe) {
pr_debug("%s: No PE found for device %04x:%02x:%02x.%01x\n",
__func__, pdn->phb->global_number,
pdn->busno,
PCI_SLOT(pdn->devfn),
PCI_FUNC(pdn->devfn));
eeh_edev_dbg(edev, "No PE found for device.\n");
return -EEXIST;
}
@@ -516,10 +491,21 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
*/
while (1) {
parent = pe->parent;
/* PHB PEs should never be removed */
if (pe->type & EEH_PE_PHB)
break;
if (!(pe->state & EEH_PE_KEEP)) {
/*
* XXX: KEEP is set while resetting a PE. I don't think it's
* ever set without RECOVERING also being set. I could
* be wrong though so catch that with a WARN.
*/
keep = !!(pe->state & EEH_PE_KEEP);
recover = !!(pe->state & EEH_PE_RECOVERING);
WARN_ON(keep && !recover);
if (!keep && !recover) {
if (list_empty(&pe->edevs) &&
list_empty(&pe->child_list)) {
list_del(&pe->child);
@@ -528,6 +514,15 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
break;
}
} else {
/*
* Mark the PE as invalid. At the end of the recovery
* process any invalid PEs will be garbage collected.
*
* We need to delay the free()ing of them since we can
* remove edev's while traversing the PE tree which
* might trigger the removal of a PE and we can't
* deal with that (yet).
*/
if (list_empty(&pe->edevs)) {
cnt = 0;
list_for_each_entry(child, &pe->child_list, child) {
@@ -623,13 +618,11 @@ void eeh_pe_mark_isolated(struct eeh_pe *root)
}
EXPORT_SYMBOL_GPL(eeh_pe_mark_isolated);
static void *__eeh_pe_dev_mode_mark(struct eeh_dev *edev, void *flag)
static void __eeh_pe_dev_mode_mark(struct eeh_dev *edev, void *flag)
{
int mode = *((int *)flag);
edev->mode |= mode;
return NULL;
}
/**
@@ -717,17 +710,13 @@ static void eeh_bridge_check_link(struct eeh_dev *edev)
if (!(edev->mode & (EEH_DEV_ROOT_PORT | EEH_DEV_DS_PORT)))
return;
pr_debug("%s: Check PCIe link for %04x:%02x:%02x.%01x ...\n",
__func__, pdn->phb->global_number,
pdn->busno,
PCI_SLOT(pdn->devfn),
PCI_FUNC(pdn->devfn));
eeh_edev_dbg(edev, "Checking PCIe link...\n");
/* Check slot status */
cap = edev->pcie_cap;
eeh_ops->read_config(pdn, cap + PCI_EXP_SLTSTA, 2, &val);
if (!(val & PCI_EXP_SLTSTA_PDS)) {
pr_debug(" No card in the slot (0x%04x) !\n", val);
eeh_edev_dbg(edev, "No card in the slot (0x%04x) !\n", val);
return;
}
@@ -736,7 +725,7 @@ static void eeh_bridge_check_link(struct eeh_dev *edev)
if (val & PCI_EXP_SLTCAP_PCP) {
eeh_ops->read_config(pdn, cap + PCI_EXP_SLTCTL, 2, &val);
if (val & PCI_EXP_SLTCTL_PCC) {
pr_debug(" In power-off state, power it on ...\n");
eeh_edev_dbg(edev, "In power-off state, power it on ...\n");
val &= ~(PCI_EXP_SLTCTL_PCC | PCI_EXP_SLTCTL_PIC);
val |= (0x0100 & PCI_EXP_SLTCTL_PIC);
eeh_ops->write_config(pdn, cap + PCI_EXP_SLTCTL, 2, val);
@@ -752,7 +741,7 @@ static void eeh_bridge_check_link(struct eeh_dev *edev)
/* Check link */
eeh_ops->read_config(pdn, cap + PCI_EXP_LNKCAP, 4, &val);
if (!(val & PCI_EXP_LNKCAP_DLLLARC)) {
pr_debug(" No link reporting capability (0x%08x) \n", val);
eeh_edev_dbg(edev, "No link reporting capability (0x%08x) \n", val);
msleep(1000);
return;
}
@@ -769,10 +758,10 @@ static void eeh_bridge_check_link(struct eeh_dev *edev)
}
if (val & PCI_EXP_LNKSTA_DLLLA)
pr_debug(" Link up (%s)\n",
eeh_edev_dbg(edev, "Link up (%s)\n",
(val & PCI_EXP_LNKSTA_CLS_2_5GB) ? "2.5GB" : "5GB");
else
pr_debug(" Link not ready (0x%04x)\n", val);
eeh_edev_dbg(edev, "Link not ready (0x%04x)\n", val);
}
#define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF))
@@ -852,7 +841,7 @@ static void eeh_restore_device_bars(struct eeh_dev *edev)
* the expansion ROM base address, the latency timer, and etc.
* from the saved values in the device node.
*/
static void *eeh_restore_one_device_bars(struct eeh_dev *edev, void *flag)
static void eeh_restore_one_device_bars(struct eeh_dev *edev, void *flag)
{
struct pci_dn *pdn = eeh_dev_to_pdn(edev);
@@ -864,8 +853,6 @@ static void *eeh_restore_one_device_bars(struct eeh_dev *edev, void *flag)
if (eeh_ops->restore_config && pdn)
eeh_ops->restore_config(pdn);
return NULL;
}
/**

View File

@@ -230,7 +230,7 @@ transfer_to_handler_cont:
*/
lis r12,reenable_mmu@h
ori r12,r12,reenable_mmu@l
LOAD_MSR_KERNEL(r0, MSR_KERNEL)
LOAD_REG_IMMEDIATE(r0, MSR_KERNEL)
mtspr SPRN_SRR0,r12
mtspr SPRN_SRR1,r0
SYNC
@@ -304,7 +304,7 @@ stack_ovf:
addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
lis r9,StackOverflow@ha
addi r9,r9,StackOverflow@l
LOAD_MSR_KERNEL(r10,MSR_KERNEL)
LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr SPRN_NRI, r0
#endif
@@ -324,7 +324,7 @@ trace_syscall_entry_irq_off:
bl trace_hardirqs_on
/* Now enable for real */
LOAD_MSR_KERNEL(r10, MSR_KERNEL | MSR_EE)
LOAD_REG_IMMEDIATE(r10, MSR_KERNEL | MSR_EE)
mtmsr r10
REST_GPR(0, r1)
@@ -394,7 +394,7 @@ ret_from_syscall:
#endif
mr r6,r3
/* disable interrupts so current_thread_info()->flags can't change */
LOAD_MSR_KERNEL(r10,MSR_KERNEL) /* doesn't include MSR_EE */
LOAD_REG_IMMEDIATE(r10,MSR_KERNEL) /* doesn't include MSR_EE */
/* Note: We don't bother telling lockdep about it */
SYNC
MTMSRD(r10)
@@ -777,11 +777,19 @@ fast_exception_return:
1: lis r3,exc_exit_restart_end@ha
addi r3,r3,exc_exit_restart_end@l
cmplw r12,r3
#if CONFIG_PPC_BOOK3S_601
bge 2b
#else
bge 3f
#endif
lis r4,exc_exit_restart@ha
addi r4,r4,exc_exit_restart@l
cmplw r12,r4
#if CONFIG_PPC_BOOK3S_601
blt 2b
#else
blt 3f
#endif
lis r3,fee_restarts@ha
tophys(r3,r3)
lwz r5,fee_restarts@l(r3)
@@ -800,9 +808,6 @@ fee_restarts:
/* aargh, we don't know which trap this is */
/* but the 601 doesn't implement the RI bit, so assume it's OK */
3:
BEGIN_FTR_SECTION
b 2b
END_FTR_SECTION_IFSET(CPU_FTR_601)
li r10,-1
stw r10,_TRAP(r11)
addi r3,r1,STACK_FRAME_OVERHEAD
@@ -824,7 +829,7 @@ ret_from_except:
* can't change between when we test it and when we return
* from the interrupt. */
/* Note: We don't bother telling lockdep about it */
LOAD_MSR_KERNEL(r10,MSR_KERNEL)
LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
SYNC /* Some chip revs have problems here... */
MTMSRD(r10) /* disable interrupts */
@@ -991,7 +996,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
* can restart the exception exit path at the label
* exc_exit_restart below. -- paulus
*/
LOAD_MSR_KERNEL(r10,MSR_KERNEL & ~MSR_RI)
LOAD_REG_IMMEDIATE(r10,MSR_KERNEL & ~MSR_RI)
SYNC
MTMSRD(r10) /* clear the RI bit */
.globl exc_exit_restart
@@ -1066,7 +1071,7 @@ exc_exit_restart_end:
REST_NVGPRS(r1); \
lwz r3,_MSR(r1); \
andi. r3,r3,MSR_PR; \
LOAD_MSR_KERNEL(r10,MSR_KERNEL); \
LOAD_REG_IMMEDIATE(r10,MSR_KERNEL); \
bne user_exc_return; \
lwz r0,GPR0(r1); \
lwz r2,GPR2(r1); \
@@ -1236,7 +1241,7 @@ recheck:
* neither. Those disable/enable cycles used to peek at
* TI_FLAGS aren't advertised.
*/
LOAD_MSR_KERNEL(r10,MSR_KERNEL)
LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
SYNC
MTMSRD(r10) /* disable interrupts */
lwz r9,TI_FLAGS(r2)
@@ -1270,11 +1275,19 @@ nonrecoverable:
lis r10,exc_exit_restart_end@ha
addi r10,r10,exc_exit_restart_end@l
cmplw r12,r10
#ifdef CONFIG_PPC_BOOK3S_601
bgelr
#else
bge 3f
#endif
lis r11,exc_exit_restart@ha
addi r11,r11,exc_exit_restart@l
cmplw r12,r11
#ifdef CONFIG_PPC_BOOK3S_601
bltlr
#else
blt 3f
#endif
lis r10,ee_restarts@ha
lwz r12,ee_restarts@l(r10)
addi r12,r12,1
@@ -1283,9 +1296,6 @@ nonrecoverable:
blr
3: /* OK, we can't recover, kill this process */
/* but the 601 doesn't implement the RI bit, so assume it's OK */
BEGIN_FTR_SECTION
blr
END_FTR_SECTION_IFSET(CPU_FTR_601)
lwz r3,_TRAP(r1)
andi. r0,r3,1
beq 5f
@@ -1329,7 +1339,7 @@ _GLOBAL(enter_rtas)
lwz r4,RTASBASE(r4)
mfmsr r9
stw r9,8(r1)
LOAD_MSR_KERNEL(r0,MSR_KERNEL)
LOAD_REG_IMMEDIATE(r0,MSR_KERNEL)
SYNC /* disable interrupts so SRR0/1 */
MTMSRD(r0) /* don't get trashed */
li r9,MSR_KERNEL & ~(MSR_IR|MSR_DR)

View File

@@ -69,24 +69,20 @@ BEGIN_FTR_SECTION
bne .Ltabort_syscall
END_FTR_SECTION_IFSET(CPU_FTR_TM)
#endif
andi. r10,r12,MSR_PR
mr r10,r1
addi r1,r1,-INT_FRAME_SIZE
beq- 1f
ld r1,PACAKSAVE(r13)
1: std r10,0(r1)
std r10,0(r1)
std r11,_NIP(r1)
std r12,_MSR(r1)
std r0,GPR0(r1)
std r10,GPR1(r1)
beq 2f /* if from kernel mode */
#ifdef CONFIG_PPC_FSL_BOOK3E
START_BTB_FLUSH_SECTION
BTB_FLUSH(r10)
END_BTB_FLUSH_SECTION
#endif
ACCOUNT_CPU_USER_ENTRY(r13, r10, r11)
2: std r2,GPR2(r1)
std r2,GPR2(r1)
std r3,GPR3(r1)
mfcr r2
std r4,GPR4(r1)
@@ -122,14 +118,13 @@ END_BTB_FLUSH_SECTION
#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC_SPLPAR)
BEGIN_FW_FTR_SECTION
beq 33f
/* if from user, see if there are any DTL entries to process */
/* see if there are any DTL entries to process */
ld r10,PACALPPACAPTR(r13) /* get ptr to VPA */
ld r11,PACA_DTL_RIDX(r13) /* get log read index */
addi r10,r10,LPPACA_DTLIDX
LDX_BE r10,0,r10 /* get log write index */
cmpd cr1,r11,r10
beq+ cr1,33f
cmpd r11,r10
beq+ 33f
bl accumulate_stolen_time
REST_GPR(0,r1)
REST_4GPRS(3,r1)
@@ -203,6 +198,7 @@ system_call: /* label this so stack traces look sane */
mtctr r12
bctrl /* Call handler */
/* syscall_exit can exit to kernel mode, via ret_from_kernel_thread */
.Lsyscall_exit:
std r3,RESULT(r1)
@@ -216,11 +212,6 @@ system_call: /* label this so stack traces look sane */
ld r12, PACA_THREAD_INFO(r13)
ld r8,_MSR(r1)
#ifdef CONFIG_PPC_BOOK3S
/* No MSR:RI on BookE */
andi. r10,r8,MSR_RI
beq- .Lunrecov_restore
#endif
/*
* This is a few instructions into the actual syscall exit path (which actually

View File

@@ -750,12 +750,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
ld r15,PACATOC(r13)
ld r14,interrupt_base_book3e@got(r15)
ld r15,__end_interrupts@got(r15)
#else
LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e)
LOAD_REG_IMMEDIATE(r15,__end_interrupts)
#endif
cmpld cr0,r10,r14
cmpld cr1,r10,r15
#else
LOAD_REG_IMMEDIATE_SYM(r14, r15, interrupt_base_book3e)
cmpld cr0, r10, r14
LOAD_REG_IMMEDIATE_SYM(r14, r15, __end_interrupts)
cmpld cr1, r10, r14
#endif
blt+ cr0,1f
bge+ cr1,1f
@@ -820,12 +822,14 @@ kernel_dbg_exc:
ld r15,PACATOC(r13)
ld r14,interrupt_base_book3e@got(r15)
ld r15,__end_interrupts@got(r15)
#else
LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e)
LOAD_REG_IMMEDIATE(r15,__end_interrupts)
#endif
cmpld cr0,r10,r14
cmpld cr1,r10,r15
#else
LOAD_REG_IMMEDIATE_SYM(r14, r15, interrupt_base_book3e)
cmpld cr0, r10, r14
LOAD_REG_IMMEDIATE_SYM(r14, r15,__end_interrupts)
cmpld cr1, r10, r14
#endif
blt+ cr0,1f
bge+ cr1,1f
@@ -1449,7 +1453,7 @@ a2_tlbinit_code_start:
a2_tlbinit_after_linear_map:
/* Now we branch the new virtual address mapped by this entry */
LOAD_REG_IMMEDIATE(r3,1f)
LOAD_REG_IMMEDIATE_SYM(r3, r5, 1f)
mtctr r3
bctr

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -34,7 +34,16 @@
#include "head_32.h"
/* 601 only have IBAT; cr0.eq is set on 601 when using this macro */
/* 601 only have IBAT */
#ifdef CONFIG_PPC_BOOK3S_601
#define LOAD_BAT(n, reg, RA, RB) \
li RA,0; \
mtspr SPRN_IBAT##n##U,RA; \
lwz RA,(n*16)+0(reg); \
lwz RB,(n*16)+4(reg); \
mtspr SPRN_IBAT##n##U,RA; \
mtspr SPRN_IBAT##n##L,RB
#else
#define LOAD_BAT(n, reg, RA, RB) \
/* see the comment for clear_bats() -- Cort */ \
li RA,0; \
@@ -44,12 +53,11 @@
lwz RB,(n*16)+4(reg); \
mtspr SPRN_IBAT##n##U,RA; \
mtspr SPRN_IBAT##n##L,RB; \
beq 1f; \
lwz RA,(n*16)+8(reg); \
lwz RB,(n*16)+12(reg); \
mtspr SPRN_DBAT##n##U,RA; \
mtspr SPRN_DBAT##n##L,RB; \
1:
mtspr SPRN_DBAT##n##L,RB
#endif
__HEAD
.stabs "arch/powerpc/kernel/",N_SO,0,0,0f
@@ -557,9 +565,9 @@ DataStoreTLBMiss:
cmplw 0,r1,r3
mfspr r2, SPRN_SPRG_PGDIR
#ifdef CONFIG_SWAP
li r1, _PAGE_RW | _PAGE_PRESENT | _PAGE_ACCESSED
li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED
#else
li r1, _PAGE_RW | _PAGE_PRESENT
li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT
#endif
bge- 112f
lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */
@@ -820,9 +828,6 @@ load_up_mmu:
/* Load the BAT registers with the values set up by MMU_init.
MMU_init takes care of whether we're on a 601 or not. */
mfpvr r3
srwi r3,r3,16
cmpwi r3,1
lis r3,BATS@ha
addi r3,r3,BATS@l
tophys(r3,r3)
@@ -897,9 +902,11 @@ start_here:
bl machine_init
bl __save_cpu_setup
bl MMU_init
#ifdef CONFIG_KASAN
BEGIN_MMU_FTR_SECTION
bl MMU_init_hw_patch
END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
#endif
/*
* Go back to running unmapped so we can load up new values
@@ -996,11 +1003,8 @@ EXPORT_SYMBOL(switch_mmu_context)
*/
clear_bats:
li r10,0
mfspr r9,SPRN_PVR
rlwinm r9,r9,16,16,31 /* r9 = 1 for 601, 4 for 604 */
cmpwi r9, 1
beq 1f
#ifndef CONFIG_PPC_BOOK3S_601
mtspr SPRN_DBAT0U,r10
mtspr SPRN_DBAT0L,r10
mtspr SPRN_DBAT1U,r10
@@ -1009,7 +1013,7 @@ clear_bats:
mtspr SPRN_DBAT2L,r10
mtspr SPRN_DBAT3U,r10
mtspr SPRN_DBAT3L,r10
1:
#endif
mtspr SPRN_IBAT0U,r10
mtspr SPRN_IBAT0L,r10
mtspr SPRN_IBAT1U,r10
@@ -1104,10 +1108,7 @@ mmu_off:
*/
initial_bats:
lis r11,PAGE_OFFSET@h
mfspr r9,SPRN_PVR
rlwinm r9,r9,16,16,31 /* r9 = 1 for 601, 4 for 604 */
cmpwi 0,r9,1
bne 4f
#ifdef CONFIG_PPC_BOOK3S_601
ori r11,r11,4 /* set up BAT registers for 601 */
li r8,0x7f /* valid, block length = 8MB */
mtspr SPRN_IBAT0U,r11 /* N.B. 601 has valid bit in */
@@ -1120,10 +1121,8 @@ initial_bats:
addis r8,r8,0x800000@h
mtspr SPRN_IBAT2U,r11
mtspr SPRN_IBAT2L,r8
isync
blr
4: tophys(r8,r11)
#else
tophys(r8,r11)
#ifdef CONFIG_SMP
ori r8,r8,0x12 /* R/W access, M=1 */
#else
@@ -1135,10 +1134,10 @@ initial_bats:
mtspr SPRN_DBAT0U,r11 /* bit in upper BAT register */
mtspr SPRN_IBAT0L,r8
mtspr SPRN_IBAT0U,r11
#endif
isync
blr
#ifdef CONFIG_BOOTX_TEXT
setup_disp_bat:
/*
@@ -1153,15 +1152,13 @@ setup_disp_bat:
beqlr
lwz r11,0(r8)
lwz r8,4(r8)
mfspr r9,SPRN_PVR
rlwinm r9,r9,16,16,31 /* r9 = 1 for 601, 4 for 604 */
cmpwi 0,r9,1
beq 1f
#ifndef CONFIG_PPC_BOOK3S_601
mtspr SPRN_DBAT3L,r8
mtspr SPRN_DBAT3U,r11
blr
1: mtspr SPRN_IBAT3L,r8
#else
mtspr SPRN_IBAT3L,r8
mtspr SPRN_IBAT3U,r11
#endif
blr
#endif /* CONFIG_BOOTX_TEXT */

View File

@@ -4,19 +4,6 @@
#include <asm/ptrace.h> /* for STACK_FRAME_REGS_MARKER */
/*
* MSR_KERNEL is > 0x8000 on 4xx/Book-E since it include MSR_CE.
*/
.macro __LOAD_MSR_KERNEL r, x
.if \x >= 0x8000
lis \r, (\x)@h
ori \r, \r, (\x)@l
.else
li \r, (\x)
.endif
.endm
#define LOAD_MSR_KERNEL(r, x) __LOAD_MSR_KERNEL r, x
/*
* Exception entry code. This code runs with address translation
* turned off, i.e. using physical addresses.
@@ -92,7 +79,7 @@
#ifdef CONFIG_40x
rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */
#else
LOAD_MSR_KERNEL(r10, MSR_KERNEL & ~(MSR_IR|MSR_DR)) /* can take exceptions */
LOAD_REG_IMMEDIATE(r10, MSR_KERNEL & ~(MSR_IR|MSR_DR)) /* can take exceptions */
MTMSRD(r10) /* (except for mach check in rtas) */
#endif
lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
@@ -140,10 +127,10 @@
* otherwise we might risk taking an interrupt before we tell lockdep
* they are enabled.
*/
LOAD_MSR_KERNEL(r10, MSR_KERNEL)
LOAD_REG_IMMEDIATE(r10, MSR_KERNEL)
rlwimi r10, r9, 0, MSR_EE
#else
LOAD_MSR_KERNEL(r10, MSR_KERNEL | MSR_EE)
LOAD_REG_IMMEDIATE(r10, MSR_KERNEL | MSR_EE)
#endif
#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr SPRN_NRI, r0
@@ -187,7 +174,7 @@ label:
#define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret) \
li r10,trap; \
stw r10,_TRAP(r11); \
LOAD_MSR_KERNEL(r10, msr); \
LOAD_REG_IMMEDIATE(r10, msr); \
bl tfer; \
.long hdlr; \
.long ret

View File

@@ -182,7 +182,8 @@ __secondary_hold:
isync
bctr
#else
BUG_OPCODE
0: trap
EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0
#endif
CLOSE_FIXED_SECTION(first_256B)
@@ -635,7 +636,7 @@ __after_prom_start:
sub r5,r5,r11
#else
/* just copy interrupts */
LOAD_REG_IMMEDIATE(r5, FIXED_SYMBOL_ABS_ADDR(__end_interrupts))
LOAD_REG_IMMEDIATE_SYM(r5, r11, FIXED_SYMBOL_ABS_ADDR(__end_interrupts))
#endif
b 5f
3:
@@ -998,7 +999,8 @@ start_here_common:
bl start_kernel
/* Not reached */
BUG_OPCODE
trap
EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0
/*
* We put a few things here that have to be page-aligned.

View File

@@ -15,6 +15,7 @@
*/
#include <linux/init.h>
#include <linux/magic.h>
#include <asm/processor.h>
#include <asm/page.h>
#include <asm/mmu.h>
@@ -574,8 +575,6 @@ InstructionBreakpoint:
* by decoding the registers used by the dcbx instruction and adding them.
* DAR is set to the calculated address.
*/
/* define if you don't want to use self modifying code */
#define NO_SELF_MODIFYING_CODE
FixupDAR:/* Entry point for dcbx workaround. */
mtspr SPRN_M_TW, r10
/* fetch instruction from memory. */
@@ -639,27 +638,6 @@ FixupDAR:/* Entry point for dcbx workaround. */
rlwinm r10, r10,0,7,5 /* Clear store bit for buggy dcbst insn */
mtspr SPRN_DSISR, r10
142: /* continue, it was a dcbx, dcbi instruction. */
#ifndef NO_SELF_MODIFYING_CODE
andis. r10,r11,0x1f /* test if reg RA is r0 */
li r10,modified_instr@l
dcbtst r0,r10 /* touch for store */
rlwinm r11,r11,0,0,20 /* Zero lower 10 bits */
oris r11,r11,640 /* Transform instr. to a "add r10,RA,RB" */
ori r11,r11,532
stw r11,0(r10) /* store add/and instruction */
dcbf 0,r10 /* flush new instr. to memory. */
icbi 0,r10 /* invalidate instr. cache line */
mfspr r11, SPRN_SPRG_SCRATCH1 /* restore r11 */
mfspr r10, SPRN_SPRG_SCRATCH0 /* restore r10 */
isync /* Wait until new instr is loaded from memory */
modified_instr:
.space 4 /* this is where the add instr. is stored */
bne+ 143f
subf r10,r0,r10 /* r10=r10-r0, only if reg RA is r0 */
143: mtdar r10 /* store faulting EA in DAR */
mfspr r10,SPRN_M_TW
b DARFixed /* Go back to normal TLB handling */
#else
mfctr r10
mtdar r10 /* save ctr reg in DAR */
rlwinm r10, r11, 24, 24, 28 /* offset into jump table for reg RB */
@@ -723,7 +701,6 @@ modified_instr:
add r10, r10, r11 /* add it */
mfctr r11 /* restore r11 */
b 151b
#endif
/*
* This is where the main kernel code starts.
@@ -741,6 +718,9 @@ start_here:
/* stack */
lis r1,init_thread_union@ha
addi r1,r1,init_thread_union@l
lis r0, STACK_END_MAGIC@h
ori r0, r0, STACK_END_MAGIC@l
stw r0, 0(r1)
li r0,0
stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)

View File

@@ -195,18 +195,63 @@ void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs)
tsk->thread.last_hit_ubp = NULL;
}
static bool is_larx_stcx_instr(struct pt_regs *regs, unsigned int instr)
{
int ret, type;
struct instruction_op op;
ret = analyse_instr(&op, regs, instr);
type = GETTYPE(op.type);
return (!ret && (type == LARX || type == STCX));
}
/*
* Handle debug exception notifications.
*/
static bool stepping_handler(struct pt_regs *regs, struct perf_event *bp,
unsigned long addr)
{
unsigned int instr = 0;
if (__get_user_inatomic(instr, (unsigned int *)regs->nip))
goto fail;
if (is_larx_stcx_instr(regs, instr)) {
printk_ratelimited("Breakpoint hit on instruction that can't be emulated."
" Breakpoint at 0x%lx will be disabled.\n", addr);
goto disable;
}
/* Do not emulate user-space instructions, instead single-step them */
if (user_mode(regs)) {
current->thread.last_hit_ubp = bp;
regs->msr |= MSR_SE;
return false;
}
if (!emulate_step(regs, instr))
goto fail;
return true;
fail:
/*
* We've failed in reliably handling the hw-breakpoint. Unregister
* it and throw a warning message to let the user know about it.
*/
WARN(1, "Unable to handle hardware breakpoint. Breakpoint at "
"0x%lx will be disabled.", addr);
disable:
perf_event_disable_inatomic(bp);
return false;
}
int hw_breakpoint_handler(struct die_args *args)
{
int rc = NOTIFY_STOP;
struct perf_event *bp;
struct pt_regs *regs = args->regs;
#ifndef CONFIG_PPC_8xx
int stepped = 1;
unsigned int instr;
#endif
struct arch_hw_breakpoint *info;
unsigned long dar = regs->dar;
@@ -251,31 +296,9 @@ int hw_breakpoint_handler(struct die_args *args)
(dar - bp->attr.bp_addr < bp->attr.bp_len)))
info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
#ifndef CONFIG_PPC_8xx
/* Do not emulate user-space instructions, instead single-step them */
if (user_mode(regs)) {
current->thread.last_hit_ubp = bp;
regs->msr |= MSR_SE;
if (!IS_ENABLED(CONFIG_PPC_8xx) && !stepping_handler(regs, bp, info->address))
goto out;
}
stepped = 0;
instr = 0;
if (!__get_user_inatomic(instr, (unsigned int *) regs->nip))
stepped = emulate_step(regs, instr);
/*
* emulate_step() could not execute it. We've failed in reliably
* handling the hw-breakpoint. Unregister it and throw a warning
* message to let the user know about it.
*/
if (!stepped) {
WARN(1, "Unable to handle hardware breakpoint. Breakpoint at "
"0x%lx will be disabled.", info->address);
perf_event_disable_inatomic(bp);
goto out;
}
#endif
/*
* As a policy, the callback is invoked in a 'trigger-after-execute'
* fashion

View File

@@ -149,8 +149,8 @@ static const struct ppc_pci_io iowa_pci_io = {
};
#ifdef CONFIG_PPC_INDIRECT_MMIO
static void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size,
pgprot_t prot, void *caller)
void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size,
pgprot_t prot, void *caller)
{
struct iowa_bus *bus;
void __iomem *res = __ioremap_caller(addr, size, prot, caller);
@@ -163,20 +163,17 @@ static void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size,
}
return res;
}
#else /* CONFIG_PPC_INDIRECT_MMIO */
#define iowa_ioremap NULL
#endif /* !CONFIG_PPC_INDIRECT_MMIO */
bool io_workaround_inited;
/* Enable IO workaround */
static void io_workaround_init(void)
{
static int io_workaround_inited;
if (io_workaround_inited)
return;
ppc_pci_io = iowa_pci_io;
ppc_md.ioremap = iowa_ioremap;
io_workaround_inited = 1;
io_workaround_inited = true;
}
/* Register new bus to support workaround */

View File

@@ -633,11 +633,54 @@ static void iommu_table_clear(struct iommu_table *tbl)
#endif
}
static void iommu_table_reserve_pages(struct iommu_table *tbl,
unsigned long res_start, unsigned long res_end)
{
int i;
WARN_ON_ONCE(res_end < res_start);
/*
* Reserve page 0 so it will not be used for any mappings.
* This avoids buggy drivers that consider page 0 to be invalid
* to crash the machine or even lose data.
*/
if (tbl->it_offset == 0)
set_bit(0, tbl->it_map);
tbl->it_reserved_start = res_start;
tbl->it_reserved_end = res_end;
/* Check if res_start..res_end isn't empty and overlaps the table */
if (res_start && res_end &&
(tbl->it_offset + tbl->it_size < res_start ||
res_end < tbl->it_offset))
return;
for (i = tbl->it_reserved_start; i < tbl->it_reserved_end; ++i)
set_bit(i - tbl->it_offset, tbl->it_map);
}
static void iommu_table_release_pages(struct iommu_table *tbl)
{
int i;
/*
* In case we have reserved the first bit, we should not emit
* the warning below.
*/
if (tbl->it_offset == 0)
clear_bit(0, tbl->it_map);
for (i = tbl->it_reserved_start; i < tbl->it_reserved_end; ++i)
clear_bit(i - tbl->it_offset, tbl->it_map);
}
/*
* Build a iommu_table structure. This contains a bit map which
* is used to manage allocation of the tce space.
*/
struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid,
unsigned long res_start, unsigned long res_end)
{
unsigned long sz;
static int welcomed = 0;
@@ -656,13 +699,7 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
tbl->it_map = page_address(page);
memset(tbl->it_map, 0, sz);
/*
* Reserve page 0 so it will not be used for any mappings.
* This avoids buggy drivers that consider page 0 to be invalid
* to crash the machine or even lose data.
*/
if (tbl->it_offset == 0)
set_bit(0, tbl->it_map);
iommu_table_reserve_pages(tbl, res_start, res_end);
/* We only split the IOMMU table if we have 1GB or more of space */
if ((tbl->it_size << tbl->it_page_shift) >= (1UL * 1024 * 1024 * 1024))
@@ -714,12 +751,7 @@ static void iommu_table_free(struct kref *kref)
return;
}
/*
* In case we have reserved the first bit, we should not emit
* the warning below.
*/
if (tbl->it_offset == 0)
clear_bit(0, tbl->it_map);
iommu_table_release_pages(tbl);
/* verify that table contains no entries */
if (!bitmap_empty(tbl->it_map, tbl->it_size))
@@ -981,29 +1013,32 @@ int iommu_tce_check_gpa(unsigned long page_shift, unsigned long gpa)
}
EXPORT_SYMBOL_GPL(iommu_tce_check_gpa);
long iommu_tce_xchg(struct mm_struct *mm, struct iommu_table *tbl,
extern long iommu_tce_xchg_no_kill(struct mm_struct *mm,
struct iommu_table *tbl,
unsigned long entry, unsigned long *hpa,
enum dma_data_direction *direction)
{
long ret;
unsigned long size = 0;
ret = tbl->it_ops->exchange(tbl, entry, hpa, direction);
ret = tbl->it_ops->xchg_no_kill(tbl, entry, hpa, direction, false);
if (!ret && ((*direction == DMA_FROM_DEVICE) ||
(*direction == DMA_BIDIRECTIONAL)) &&
!mm_iommu_is_devmem(mm, *hpa, tbl->it_page_shift,
&size))
SetPageDirty(pfn_to_page(*hpa >> PAGE_SHIFT));
/* if (unlikely(ret))
pr_err("iommu_tce: %s failed on hwaddr=%lx ioba=%lx kva=%lx ret=%d\n",
__func__, hwaddr, entry << tbl->it_page_shift,
hwaddr, ret); */
return ret;
}
EXPORT_SYMBOL_GPL(iommu_tce_xchg);
EXPORT_SYMBOL_GPL(iommu_tce_xchg_no_kill);
void iommu_tce_kill(struct iommu_table *tbl,
unsigned long entry, unsigned long pages)
{
if (tbl->it_ops->tce_kill)
tbl->it_ops->tce_kill(tbl, entry, pages, false);
}
EXPORT_SYMBOL_GPL(iommu_tce_kill);
int iommu_take_ownership(struct iommu_table *tbl)
{
@@ -1017,22 +1052,21 @@ int iommu_take_ownership(struct iommu_table *tbl)
* requires exchange() callback defined so if it is not
* implemented, we disallow taking ownership over the table.
*/
if (!tbl->it_ops->exchange)
if (!tbl->it_ops->xchg_no_kill)
return -EINVAL;
spin_lock_irqsave(&tbl->large_pool.lock, flags);
for (i = 0; i < tbl->nr_pools; i++)
spin_lock(&tbl->pools[i].lock);
if (tbl->it_offset == 0)
clear_bit(0, tbl->it_map);
iommu_table_release_pages(tbl);
if (!bitmap_empty(tbl->it_map, tbl->it_size)) {
pr_err("iommu_tce: it_map is not empty");
ret = -EBUSY;
/* Restore bit#0 set by iommu_init_table() */
if (tbl->it_offset == 0)
set_bit(0, tbl->it_map);
/* Undo iommu_table_release_pages, i.e. restore bit#0, etc */
iommu_table_reserve_pages(tbl, tbl->it_reserved_start,
tbl->it_reserved_end);
} else {
memset(tbl->it_map, 0xff, sz);
}
@@ -1055,9 +1089,8 @@ void iommu_release_ownership(struct iommu_table *tbl)
memset(tbl->it_map, 0, sz);
/* Restore bit#0 set by iommu_init_table() */
if (tbl->it_offset == 0)
set_bit(0, tbl->it_map);
iommu_table_reserve_pages(tbl, tbl->it_reserved_start,
tbl->it_reserved_end);
for (i = 0; i < tbl->nr_pools; i++)
spin_unlock(&tbl->pools[i].lock);

View File

@@ -64,16 +64,17 @@
#define KVM_INST_MTSRIN 0x7c0001e4
static bool kvm_patching_worked = true;
char kvm_tmp[1024 * 1024];
extern char kvm_tmp[];
extern char kvm_tmp_end[];
static int kvm_tmp_index;
static inline void kvm_patch_ins(u32 *inst, u32 new_inst)
static void __init kvm_patch_ins(u32 *inst, u32 new_inst)
{
*inst = new_inst;
flush_icache_range((ulong)inst, (ulong)inst + 4);
}
static void kvm_patch_ins_ll(u32 *inst, long addr, u32 rt)
static void __init kvm_patch_ins_ll(u32 *inst, long addr, u32 rt)
{
#ifdef CONFIG_64BIT
kvm_patch_ins(inst, KVM_INST_LD | rt | (addr & 0x0000fffc));
@@ -82,7 +83,7 @@ static void kvm_patch_ins_ll(u32 *inst, long addr, u32 rt)
#endif
}
static void kvm_patch_ins_ld(u32 *inst, long addr, u32 rt)
static void __init kvm_patch_ins_ld(u32 *inst, long addr, u32 rt)
{
#ifdef CONFIG_64BIT
kvm_patch_ins(inst, KVM_INST_LD | rt | (addr & 0x0000fffc));
@@ -91,12 +92,12 @@ static void kvm_patch_ins_ld(u32 *inst, long addr, u32 rt)
#endif
}
static void kvm_patch_ins_lwz(u32 *inst, long addr, u32 rt)
static void __init kvm_patch_ins_lwz(u32 *inst, long addr, u32 rt)
{
kvm_patch_ins(inst, KVM_INST_LWZ | rt | (addr & 0x0000ffff));
}
static void kvm_patch_ins_std(u32 *inst, long addr, u32 rt)
static void __init kvm_patch_ins_std(u32 *inst, long addr, u32 rt)
{
#ifdef CONFIG_64BIT
kvm_patch_ins(inst, KVM_INST_STD | rt | (addr & 0x0000fffc));
@@ -105,17 +106,17 @@ static void kvm_patch_ins_std(u32 *inst, long addr, u32 rt)
#endif
}
static void kvm_patch_ins_stw(u32 *inst, long addr, u32 rt)
static void __init kvm_patch_ins_stw(u32 *inst, long addr, u32 rt)
{
kvm_patch_ins(inst, KVM_INST_STW | rt | (addr & 0x0000fffc));
}
static void kvm_patch_ins_nop(u32 *inst)
static void __init kvm_patch_ins_nop(u32 *inst)
{
kvm_patch_ins(inst, KVM_INST_NOP);
}
static void kvm_patch_ins_b(u32 *inst, int addr)
static void __init kvm_patch_ins_b(u32 *inst, int addr)
{
#if defined(CONFIG_RELOCATABLE) && defined(CONFIG_PPC_BOOK3S)
/* On relocatable kernels interrupts handlers and our code
@@ -128,11 +129,11 @@ static void kvm_patch_ins_b(u32 *inst, int addr)
kvm_patch_ins(inst, KVM_INST_B | (addr & KVM_INST_B_MASK));
}
static u32 *kvm_alloc(int len)
static u32 * __init kvm_alloc(int len)
{
u32 *p;
if ((kvm_tmp_index + len) > ARRAY_SIZE(kvm_tmp)) {
if ((kvm_tmp_index + len) > (kvm_tmp_end - kvm_tmp)) {
printk(KERN_ERR "KVM: No more space (%d + %d)\n",
kvm_tmp_index, len);
kvm_patching_worked = false;
@@ -151,7 +152,7 @@ extern u32 kvm_emulate_mtmsrd_orig_ins_offs;
extern u32 kvm_emulate_mtmsrd_len;
extern u32 kvm_emulate_mtmsrd[];
static void kvm_patch_ins_mtmsrd(u32 *inst, u32 rt)
static void __init kvm_patch_ins_mtmsrd(u32 *inst, u32 rt)
{
u32 *p;
int distance_start;
@@ -204,7 +205,7 @@ extern u32 kvm_emulate_mtmsr_orig_ins_offs;
extern u32 kvm_emulate_mtmsr_len;
extern u32 kvm_emulate_mtmsr[];
static void kvm_patch_ins_mtmsr(u32 *inst, u32 rt)
static void __init kvm_patch_ins_mtmsr(u32 *inst, u32 rt)
{
u32 *p;
int distance_start;
@@ -265,7 +266,7 @@ extern u32 kvm_emulate_wrtee_orig_ins_offs;
extern u32 kvm_emulate_wrtee_len;
extern u32 kvm_emulate_wrtee[];
static void kvm_patch_ins_wrtee(u32 *inst, u32 rt, int imm_one)
static void __init kvm_patch_ins_wrtee(u32 *inst, u32 rt, int imm_one)
{
u32 *p;
int distance_start;
@@ -322,7 +323,7 @@ extern u32 kvm_emulate_wrteei_0_branch_offs;
extern u32 kvm_emulate_wrteei_0_len;
extern u32 kvm_emulate_wrteei_0[];
static void kvm_patch_ins_wrteei_0(u32 *inst)
static void __init kvm_patch_ins_wrteei_0(u32 *inst)
{
u32 *p;
int distance_start;
@@ -363,7 +364,7 @@ extern u32 kvm_emulate_mtsrin_orig_ins_offs;
extern u32 kvm_emulate_mtsrin_len;
extern u32 kvm_emulate_mtsrin[];
static void kvm_patch_ins_mtsrin(u32 *inst, u32 rt, u32 rb)
static void __init kvm_patch_ins_mtsrin(u32 *inst, u32 rt, u32 rb)
{
u32 *p;
int distance_start;
@@ -399,7 +400,7 @@ static void kvm_patch_ins_mtsrin(u32 *inst, u32 rt, u32 rb)
#endif
static void kvm_map_magic_page(void *data)
static void __init kvm_map_magic_page(void *data)
{
u32 *features = data;
@@ -414,7 +415,7 @@ static void kvm_map_magic_page(void *data)
*features = out[0];
}
static void kvm_check_ins(u32 *inst, u32 features)
static void __init kvm_check_ins(u32 *inst, u32 features)
{
u32 _inst = *inst;
u32 inst_no_rt = _inst & ~KVM_MASK_RT;
@@ -658,7 +659,7 @@ static void kvm_check_ins(u32 *inst, u32 features)
extern u32 kvm_template_start[];
extern u32 kvm_template_end[];
static void kvm_use_magic_page(void)
static void __init kvm_use_magic_page(void)
{
u32 *p;
u32 *start, *end;
@@ -699,25 +700,13 @@ static void kvm_use_magic_page(void)
kvm_patching_worked ? "worked" : "failed");
}
static __init void kvm_free_tmp(void)
{
/*
* Inform kmemleak about the hole in the .bss section since the
* corresponding pages will be unmapped with DEBUG_PAGEALLOC=y.
*/
kmemleak_free_part(&kvm_tmp[kvm_tmp_index],
ARRAY_SIZE(kvm_tmp) - kvm_tmp_index);
free_reserved_area(&kvm_tmp[kvm_tmp_index],
&kvm_tmp[ARRAY_SIZE(kvm_tmp)], -1, NULL);
}
static int __init kvm_guest_init(void)
{
if (!kvm_para_available())
goto free_tmp;
return 0;
if (!epapr_paravirt_enabled)
goto free_tmp;
return 0;
if (kvm_para_has_feature(KVM_FEATURE_MAGIC_PAGE))
kvm_use_magic_page();
@@ -727,9 +716,6 @@ static int __init kvm_guest_init(void)
powersave_nap = 1;
#endif
free_tmp:
kvm_free_tmp();
return 0;
}

View File

@@ -192,6 +192,8 @@ kvm_emulate_mtmsr_orig_ins_offs:
kvm_emulate_mtmsr_len:
.long (kvm_emulate_mtmsr_end - kvm_emulate_mtmsr) / 4
#ifdef CONFIG_BOOKE
/* also used for wrteei 1 */
.global kvm_emulate_wrtee
kvm_emulate_wrtee:
@@ -285,6 +287,10 @@ kvm_emulate_wrteei_0_branch_offs:
kvm_emulate_wrteei_0_len:
.long (kvm_emulate_wrteei_0_end - kvm_emulate_wrteei_0) / 4
#endif /* CONFIG_BOOKE */
#ifdef CONFIG_PPC_BOOK3S_32
.global kvm_emulate_mtsrin
kvm_emulate_mtsrin:
@@ -334,5 +340,15 @@ kvm_emulate_mtsrin_orig_ins_offs:
kvm_emulate_mtsrin_len:
.long (kvm_emulate_mtsrin_end - kvm_emulate_mtsrin) / 4
#endif /* CONFIG_PPC_BOOK3S_32 */
.balign 4
.global kvm_tmp
kvm_tmp:
.space (64 * 1024)
.global kvm_tmp_end
kvm_tmp_end:
.global kvm_template_end
kvm_template_end:

View File

@@ -29,6 +29,8 @@
#include <asm/smp.h>
#include <asm/hw_breakpoint.h>
#include <asm/asm-prototypes.h>
#include <asm/svm.h>
#include <asm/ultravisor.h>
int default_machine_kexec_prepare(struct kimage *image)
{
@@ -327,6 +329,13 @@ void default_machine_kexec(struct kimage *image)
#ifdef CONFIG_PPC_PSERIES
kexec_paca.lppaca_ptr = NULL;
#endif
if (is_secure_guest() && !(image->preserve_context ||
image->type == KEXEC_TYPE_CRASH)) {
uv_unshare_all_pages();
printk("kexec: Unshared all shared pages.\n");
}
paca_ptrs[kexec_paca.paca_index] = &kexec_paca;
setup_paca(&kexec_paca);

View File

@@ -33,13 +33,18 @@ static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT],
mce_ue_event_queue);
static void machine_check_process_queued_event(struct irq_work *work);
void machine_check_ue_event(struct machine_check_event *evt);
static void machine_check_ue_irq_work(struct irq_work *work);
static void machine_check_ue_event(struct machine_check_event *evt);
static void machine_process_ue_event(struct work_struct *work);
static struct irq_work mce_event_process_work = {
.func = machine_check_process_queued_event,
};
static struct irq_work mce_ue_event_irq_work = {
.func = machine_check_ue_irq_work,
};
DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
static void mce_set_error_info(struct machine_check_event *mce,
@@ -144,6 +149,7 @@ void save_mce_event(struct pt_regs *regs, long handled,
if (phys_addr != ULONG_MAX) {
mce->u.ue_error.physical_address_provided = true;
mce->u.ue_error.physical_address = phys_addr;
mce->u.ue_error.ignore_event = mce_err->ignore_event;
machine_check_ue_event(mce);
}
}
@@ -199,11 +205,15 @@ void release_mce_event(void)
get_mce_event(NULL, true);
}
static void machine_check_ue_irq_work(struct irq_work *work)
{
schedule_work(&mce_ue_event_work);
}
/*
* Queue up the MCE event which then can be handled later.
*/
void machine_check_ue_event(struct machine_check_event *evt)
static void machine_check_ue_event(struct machine_check_event *evt)
{
int index;
@@ -216,7 +226,7 @@ void machine_check_ue_event(struct machine_check_event *evt)
memcpy(this_cpu_ptr(&mce_ue_event_queue[index]), evt, sizeof(*evt));
/* Queue work to process this event later. */
schedule_work(&mce_ue_event_work);
irq_work_queue(&mce_ue_event_irq_work);
}
/*
@@ -257,8 +267,17 @@ static void machine_process_ue_event(struct work_struct *work)
/*
* This should probably queued elsewhere, but
* oh! well
*
* Don't report this machine check because the caller has a
* asked us to ignore the event, it has a fixup handler which
* will do the appropriate error handling and reporting.
*/
if (evt->error_type == MCE_ERROR_TYPE_UE) {
if (evt->u.ue_error.ignore_event) {
__this_cpu_dec(mce_ue_count);
continue;
}
if (evt->u.ue_error.physical_address_provided) {
unsigned long pfn;
@@ -292,6 +311,12 @@ static void machine_check_process_queued_event(struct irq_work *work)
while (__this_cpu_read(mce_queue_count) > 0) {
index = __this_cpu_read(mce_queue_count) - 1;
evt = this_cpu_ptr(&mce_event_queue[index]);
if (evt->error_type == MCE_ERROR_TYPE_UE &&
evt->u.ue_error.ignore_event) {
__this_cpu_dec(mce_queue_count);
continue;
}
machine_check_print_event_info(evt, false, false);
__this_cpu_dec(mce_queue_count);
}
@@ -300,7 +325,7 @@ static void machine_check_process_queued_event(struct irq_work *work)
void machine_check_print_event_info(struct machine_check_event *evt,
bool user_mode, bool in_guest)
{
const char *level, *sevstr, *subtype, *err_type;
const char *level, *sevstr, *subtype, *err_type, *initiator;
uint64_t ea = 0, pa = 0;
int n = 0;
char dar_str[50];
@@ -385,6 +410,28 @@ void machine_check_print_event_info(struct machine_check_event *evt,
break;
}
switch(evt->initiator) {
case MCE_INITIATOR_CPU:
initiator = "CPU";
break;
case MCE_INITIATOR_PCI:
initiator = "PCI";
break;
case MCE_INITIATOR_ISA:
initiator = "ISA";
break;
case MCE_INITIATOR_MEMORY:
initiator = "Memory";
break;
case MCE_INITIATOR_POWERMGM:
initiator = "Power Management";
break;
case MCE_INITIATOR_UNKNOWN:
default:
initiator = "Unknown";
break;
}
switch (evt->error_type) {
case MCE_ERROR_TYPE_UE:
err_type = "UE";
@@ -451,6 +498,14 @@ void machine_check_print_event_info(struct machine_check_event *evt,
if (evt->u.link_error.effective_address_provided)
ea = evt->u.link_error.effective_address;
break;
case MCE_ERROR_TYPE_DCACHE:
err_type = "D-Cache";
subtype = "Unknown";
break;
case MCE_ERROR_TYPE_ICACHE:
err_type = "I-Cache";
subtype = "Unknown";
break;
default:
case MCE_ERROR_TYPE_UNKNOWN:
err_type = "Unknown";
@@ -483,9 +538,17 @@ void machine_check_print_event_info(struct machine_check_event *evt,
level, evt->cpu, evt->srr0, (void *)evt->srr0, pa_str);
}
printk("%sMCE: CPU%d: Initiator %s\n", level, evt->cpu, initiator);
subtype = evt->error_class < ARRAY_SIZE(mc_error_class) ?
mc_error_class[evt->error_class] : "Unknown";
printk("%sMCE: CPU%d: %s\n", level, evt->cpu, subtype);
#ifdef CONFIG_PPC_BOOK3S_64
/* Display faulty slb contents for SLB errors. */
if (evt->error_type == MCE_ERROR_TYPE_SLB)
slb_dump_contents(local_paca->mce_faulty_slbs);
#endif
}
EXPORT_SYMBOL_GPL(machine_check_print_event_info);

View File

@@ -11,6 +11,7 @@
#include <linux/types.h>
#include <linux/ptrace.h>
#include <linux/extable.h>
#include <asm/mmu.h>
#include <asm/mce.h>
#include <asm/machdep.h>
@@ -18,6 +19,7 @@
#include <asm/pte-walk.h>
#include <asm/sstep.h>
#include <asm/exception-64s.h>
#include <asm/extable.h>
/*
* Convert an address related to an mm to a PFN. NOTE: we are in real
@@ -26,7 +28,8 @@
unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr)
{
pte_t *ptep;
unsigned long flags;
unsigned int shift;
unsigned long pfn, flags;
struct mm_struct *mm;
if (user_mode(regs))
@@ -35,14 +38,23 @@ unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr)
mm = &init_mm;
local_irq_save(flags);
if (mm == current->mm)
ptep = find_current_mm_pte(mm->pgd, addr, NULL, NULL);
else
ptep = find_init_mm_pte(addr, NULL);
ptep = __find_linux_pte(mm->pgd, addr, NULL, &shift);
if (!ptep || pte_special(*ptep)) {
pfn = ULONG_MAX;
goto out;
}
if (shift <= PAGE_SHIFT)
pfn = pte_pfn(*ptep);
else {
unsigned long rpnmask = (1ul << shift) - PAGE_SIZE;
pfn = pte_pfn(__pte(pte_val(*ptep) | (addr & rpnmask)));
}
out:
local_irq_restore(flags);
if (!ptep || pte_special(*ptep))
return ULONG_MAX;
return pte_pfn(*ptep);
return pfn;
}
/* flush SLBs and reload */
@@ -344,7 +356,7 @@ static const struct mce_derror_table mce_p9_derror_table[] = {
MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0, false, 0, 0, 0, 0, 0 } };
static int mce_find_instr_ea_and_pfn(struct pt_regs *regs, uint64_t *addr,
static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr,
uint64_t *phys_addr)
{
/*
@@ -397,6 +409,8 @@ static int mce_handle_ierror(struct pt_regs *regs,
/* attempt to correct the error */
switch (table[i].error_type) {
case MCE_ERROR_TYPE_SLB:
if (local_paca->in_mce == 1)
slb_save_contents(local_paca->mce_faulty_slbs);
handled = mce_flush(MCE_FLUSH_SLB);
break;
case MCE_ERROR_TYPE_ERAT:
@@ -482,6 +496,8 @@ static int mce_handle_derror(struct pt_regs *regs,
/* attempt to correct the error */
switch (table[i].error_type) {
case MCE_ERROR_TYPE_SLB:
if (local_paca->in_mce == 1)
slb_save_contents(local_paca->mce_faulty_slbs);
if (mce_flush(MCE_FLUSH_SLB))
handled = 1;
break;
@@ -541,7 +557,8 @@ static int mce_handle_derror(struct pt_regs *regs,
* kernel/exception-64s.h
*/
if (get_paca()->in_mce < MAX_MCE_DEPTH)
mce_find_instr_ea_and_pfn(regs, addr, phys_addr);
mce_find_instr_ea_and_phys(regs, addr,
phys_addr);
}
found = 1;
}
@@ -558,9 +575,18 @@ static int mce_handle_derror(struct pt_regs *regs,
return 0;
}
static long mce_handle_ue_error(struct pt_regs *regs)
static long mce_handle_ue_error(struct pt_regs *regs,
struct mce_error_info *mce_err)
{
long handled = 0;
const struct exception_table_entry *entry;
entry = search_kernel_exception_table(regs->nip);
if (entry) {
mce_err->ignore_event = true;
regs->nip = extable_fixup(entry);
return 1;
}
/*
* On specific SCOM read via MMIO we may get a machine check
@@ -593,7 +619,7 @@ static long mce_handle_error(struct pt_regs *regs,
&phys_addr);
if (!handled && mce_err.error_type == MCE_ERROR_TYPE_UE)
handled = mce_handle_ue_error(regs);
handled = mce_handle_ue_error(regs, &mce_err);
save_mce_event(regs, handled, &mce_err, regs->nip, addr, phys_addr);

View File

@@ -292,22 +292,20 @@ _GLOBAL(flush_instruction_cache)
iccci 0,r3
#endif
#elif defined(CONFIG_FSL_BOOKE)
BEGIN_FTR_SECTION
#ifdef CONFIG_E200
mfspr r3,SPRN_L1CSR0
ori r3,r3,L1CSR0_CFI|L1CSR0_CLFC
/* msync; isync recommended here */
mtspr SPRN_L1CSR0,r3
isync
blr
END_FTR_SECTION_IFSET(CPU_FTR_UNIFIED_ID_CACHE)
#endif
mfspr r3,SPRN_L1CSR1
ori r3,r3,L1CSR1_ICFI|L1CSR1_ICLFR
mtspr SPRN_L1CSR1,r3
#elif defined(CONFIG_PPC_BOOK3S_601)
blr /* for 601, do nothing */
#else
mfspr r3,SPRN_PVR
rlwinm r3,r3,16,16,31
cmpwi 0,r3,1
beqlr /* for 601, do nothing */
/* 603/604 processor - use invalidate-all bit in HID0 */
mfspr r3,SPRN_HID0
ori r3,r3,HID0_ICFI
@@ -326,10 +324,10 @@ EXPORT_SYMBOL(flush_instruction_cache)
* flush_icache_range(unsigned long start, unsigned long stop)
*/
_GLOBAL(flush_icache_range)
BEGIN_FTR_SECTION
#if defined(CONFIG_PPC_BOOK3S_601) || defined(CONFIG_E200)
PURGE_PREFETCHED_INS
blr /* for 601, do nothing */
END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
blr /* for 601 and e200, do nothing */
#else
rlwinm r3,r3,0,0,31 - L1_CACHE_SHIFT
subf r4,r3,r4
addi r4,r4,L1_CACHE_BYTES - 1
@@ -355,6 +353,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
sync /* additional sync needed on g4 */
isync
blr
#endif
_ASM_NOKPROBE_SYMBOL(flush_icache_range)
EXPORT_SYMBOL(flush_icache_range)
@@ -362,15 +361,15 @@ EXPORT_SYMBOL(flush_icache_range)
* Flush a particular page from the data cache to RAM.
* Note: this is necessary because the instruction cache does *not*
* snoop from the data cache.
* This is a no-op on the 601 which has a unified cache.
* This is a no-op on the 601 and e200 which have a unified cache.
*
* void __flush_dcache_icache(void *page)
*/
_GLOBAL(__flush_dcache_icache)
BEGIN_FTR_SECTION
#if defined(CONFIG_PPC_BOOK3S_601) || defined(CONFIG_E200)
PURGE_PREFETCHED_INS
blr
END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
#else
rlwinm r3,r3,0,0,31-PAGE_SHIFT /* Get page base address */
li r4,PAGE_SIZE/L1_CACHE_BYTES /* Number of lines in a page */
mtctr r4
@@ -398,6 +397,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_44x)
sync
isync
blr
#endif
#ifndef CONFIG_BOOKE
/*
@@ -409,10 +409,10 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_44x)
* void __flush_dcache_icache_phys(unsigned long physaddr)
*/
_GLOBAL(__flush_dcache_icache_phys)
BEGIN_FTR_SECTION
#if defined(CONFIG_PPC_BOOK3S_601) || defined(CONFIG_E200)
PURGE_PREFETCHED_INS
blr /* for 601, do nothing */
END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
blr /* for 601 and e200, do nothing */
#else
mfmsr r10
rlwinm r0,r10,0,28,26 /* clear DR */
mtmsr r0
@@ -433,6 +433,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
mtmsr r10 /* restore DR */
isync
blr
#endif
#endif /* CONFIG_BOOKE */
/*
@@ -452,7 +453,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
stwu r9,16(r3)
_GLOBAL(copy_page)
rlwinm r5, r3, 0, L1_CACHE_BYTES - 1
addi r3,r3,-4
0: twnei r5, 0 /* WARN if r3 is not cache aligned */
EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
addi r4,r4,-4
li r5,4

View File

@@ -0,0 +1,40 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* PowerPC ELF notes.
*
* Copyright 2019, IBM Corporation
*/
#include <linux/elfnote.h>
#include <asm/elfnote.h>
/*
* Ultravisor-capable bit (PowerNV only).
*
* Bit 0 indicates that the powerpc kernel binary knows how to run in an
* ultravisor-enabled system.
*
* In an ultravisor-enabled system, some machine resources are now controlled
* by the ultravisor. If the kernel is not ultravisor-capable, but it ends up
* being run on a machine with ultravisor, the kernel will probably crash
* trying to access ultravisor resources. For instance, it may crash in early
* boot trying to set the partition table entry 0.
*
* In an ultravisor-enabled system, a bootloader could warn the user or prevent
* the kernel from being run if the PowerPC ultravisor capability doesn't exist
* or the Ultravisor-capable bit is not set.
*/
#ifdef CONFIG_PPC_POWERNV
#define PPCCAP_ULTRAVISOR_BIT (1 << 0)
#else
#define PPCCAP_ULTRAVISOR_BIT 0
#endif
/*
* Add the PowerPC Capabilities in the binary ELF note. It is a bitmap that
* can be used to advertise kernel capabilities to userland.
*/
#define PPC_CAPABILITIES_BITMAP (PPCCAP_ULTRAVISOR_BIT)
ELFNOTE(PowerPC, PPC_ELFNOTE_CAPABILITIES,
.long PPC_CAPABILITIES_BITMAP)

View File

@@ -14,6 +14,8 @@
#include <asm/sections.h>
#include <asm/pgtable.h>
#include <asm/kexec.h>
#include <asm/svm.h>
#include <asm/ultravisor.h>
#include "setup.h"
@@ -52,6 +54,43 @@ static void *__init alloc_paca_data(unsigned long size, unsigned long align,
#ifdef CONFIG_PPC_PSERIES
#define LPPACA_SIZE 0x400
static void *__init alloc_shared_lppaca(unsigned long size, unsigned long align,
unsigned long limit, int cpu)
{
size_t shared_lppaca_total_size = PAGE_ALIGN(nr_cpu_ids * LPPACA_SIZE);
static unsigned long shared_lppaca_size;
static void *shared_lppaca;
void *ptr;
if (!shared_lppaca) {
memblock_set_bottom_up(true);
shared_lppaca =
memblock_alloc_try_nid(shared_lppaca_total_size,
PAGE_SIZE, MEMBLOCK_LOW_LIMIT,
limit, NUMA_NO_NODE);
if (!shared_lppaca)
panic("cannot allocate shared data");
memblock_set_bottom_up(false);
uv_share_page(PHYS_PFN(__pa(shared_lppaca)),
shared_lppaca_total_size >> PAGE_SHIFT);
}
ptr = shared_lppaca + shared_lppaca_size;
shared_lppaca_size += size;
/*
* This is very early in boot, so no harm done if the kernel crashes at
* this point.
*/
BUG_ON(shared_lppaca_size >= shared_lppaca_total_size);
return ptr;
}
/*
* See asm/lppaca.h for more detail.
*
@@ -65,7 +104,7 @@ static inline void init_lppaca(struct lppaca *lppaca)
*lppaca = (struct lppaca) {
.desc = cpu_to_be32(0xd397d781), /* "LpPa" */
.size = cpu_to_be16(0x400),
.size = cpu_to_be16(LPPACA_SIZE),
.fpregs_in_use = 1,
.slb_count = cpu_to_be16(64),
.vmxregs_in_use = 0,
@@ -75,19 +114,22 @@ static inline void init_lppaca(struct lppaca *lppaca)
static struct lppaca * __init new_lppaca(int cpu, unsigned long limit)
{
struct lppaca *lp;
size_t size = 0x400;
BUILD_BUG_ON(size < sizeof(struct lppaca));
BUILD_BUG_ON(sizeof(struct lppaca) > LPPACA_SIZE);
if (early_cpu_has_feature(CPU_FTR_HVMODE))
return NULL;
lp = alloc_paca_data(size, 0x400, limit, cpu);
if (is_secure_guest())
lp = alloc_shared_lppaca(LPPACA_SIZE, 0x400, limit, cpu);
else
lp = alloc_paca_data(LPPACA_SIZE, 0x400, limit, cpu);
init_lppaca(lp);
return lp;
}
#endif /* CONFIG_PPC_BOOK3S */
#endif /* CONFIG_PPC_PSERIES */
#ifdef CONFIG_PPC_BOOK3S_64

View File

@@ -1379,10 +1379,6 @@ void __init pcibios_resource_survey(void)
pr_debug("PCI: Assigning unassigned resources...\n");
pci_assign_unassigned_resources();
}
/* Call machine dependent fixup */
if (ppc_md.pcibios_fixup)
ppc_md.pcibios_fixup();
}
/* This is used by the PCI hotplug driver to allocate resource

View File

@@ -55,11 +55,18 @@ EXPORT_SYMBOL_GPL(pci_find_bus_by_node);
void pcibios_release_device(struct pci_dev *dev)
{
struct pci_controller *phb = pci_bus_to_host(dev->bus);
struct pci_dn *pdn = pci_get_pdn(dev);
eeh_remove_device(dev);
if (phb->controller_ops.release_device)
phb->controller_ops.release_device(dev);
/* free()ing the pci_dn has been deferred to us, do it now */
if (pdn && (pdn->flags & PCI_DN_FLAG_DEAD)) {
pci_dbg(dev, "freeing dead pdn\n");
kfree(pdn);
}
}
/**

View File

@@ -263,6 +263,10 @@ static int __init pcibios_init(void)
/* Call common code to handle resource allocation */
pcibios_resource_survey();
/* Call machine dependent fixup */
if (ppc_md.pcibios_fixup)
ppc_md.pcibios_fixup();
/* Call machine dependent post-init code */
if (ppc_md.pcibios_after_init)
ppc_md.pcibios_after_init();

View File

@@ -54,14 +54,20 @@ static int __init pcibios_init(void)
pci_add_flags(PCI_ENABLE_PROC_DOMAINS | PCI_COMPAT_DOMAIN_0);
/* Scan all of the recorded PCI controllers. */
list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
pcibios_scan_phb(hose);
pci_bus_add_devices(hose->bus);
}
/* Call common code to handle resource allocation */
pcibios_resource_survey();
/* Add devices. */
list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
pci_bus_add_devices(hose->bus);
/* Call machine dependent fixup */
if (ppc_md.pcibios_fixup)
ppc_md.pcibios_fixup();
printk(KERN_DEBUG "PCI: Probing PCI hardware done\n");
return 0;

View File

@@ -323,6 +323,7 @@ void pci_remove_device_node_info(struct device_node *dn)
{
struct pci_dn *pdn = dn ? PCI_DN(dn) : NULL;
struct device_node *parent;
struct pci_dev *pdev;
#ifdef CONFIG_EEH
struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
@@ -336,12 +337,28 @@ void pci_remove_device_node_info(struct device_node *dn)
WARN_ON(!list_empty(&pdn->child_list));
list_del(&pdn->list);
/* Drop the parent pci_dn's ref to our backing dt node */
parent = of_get_parent(dn);
if (parent)
of_node_put(parent);
dn->data = NULL;
kfree(pdn);
/*
* At this point we *might* still have a pci_dev that was
* instantiated from this pci_dn. So defer free()ing it until
* the pci_dev's release function is called.
*/
pdev = pci_get_domain_bus_and_slot(pdn->phb->global_number,
pdn->busno, pdn->devfn);
if (pdev) {
/* NB: pdev has a ref to dn */
pci_dbg(pdev, "marked pdn (from %pOF) as dead\n", dn);
pdn->flags |= PCI_DN_FLAG_DEAD;
} else {
dn->data = NULL;
kfree(pdn);
}
pci_dev_put(pdev);
}
EXPORT_SYMBOL_GPL(pci_remove_device_node_info);

View File

@@ -34,31 +34,75 @@ static u32 get_int_prop(struct device_node *np, const char *name, u32 def)
* pci_parse_of_flags - Parse the flags cell of a device tree PCI address
* @addr0: value of 1st cell of a device tree PCI address.
* @bridge: Set this flag if the address is from a bridge 'ranges' property
*
* PCI Bus Binding to IEEE Std 1275-1994
*
* Bit# 33222222 22221111 11111100 00000000
* 10987654 32109876 54321098 76543210
* phys.hi cell: npt000ss bbbbbbbb dddddfff rrrrrrrr
* phys.mid cell: hhhhhhhh hhhhhhhh hhhhhhhh hhhhhhhh
* phys.lo cell: llllllll llllllll llllllll llllllll
*
* where:
* n is 0 if the address is relocatable, 1 otherwise
* p is 1 if the addressable region is "prefetchable", 0 otherwise
* t is 1 if the address is aliased (for non-relocatable I/O),
* below 1 MB (for Memory),or below 64 KB (for relocatable I/O).
* ss is the space code, denoting the address space:
* 00 denotes Configuration Space
* 01 denotes I/O Space
* 10 denotes 32-bit-address Memory Space
* 11 denotes 64-bit-address Memory Space
* bbbbbbbb is the 8-bit Bus Number
* ddddd is the 5-bit Device Number
* fff is the 3-bit Function Number
* rrrrrrrr is the 8-bit Register Number
*/
#define OF_PCI_ADDR0_SPACE(ss) (((ss)&3)<<24)
#define OF_PCI_ADDR0_SPACE_CFG OF_PCI_ADDR0_SPACE(0)
#define OF_PCI_ADDR0_SPACE_IO OF_PCI_ADDR0_SPACE(1)
#define OF_PCI_ADDR0_SPACE_MMIO32 OF_PCI_ADDR0_SPACE(2)
#define OF_PCI_ADDR0_SPACE_MMIO64 OF_PCI_ADDR0_SPACE(3)
#define OF_PCI_ADDR0_SPACE_MASK OF_PCI_ADDR0_SPACE(3)
#define OF_PCI_ADDR0_RELOC (1UL<<31)
#define OF_PCI_ADDR0_PREFETCH (1UL<<30)
#define OF_PCI_ADDR0_ALIAS (1UL<<29)
#define OF_PCI_ADDR0_BUS 0x00FF0000UL
#define OF_PCI_ADDR0_DEV 0x0000F800UL
#define OF_PCI_ADDR0_FN 0x00000700UL
#define OF_PCI_ADDR0_BARREG 0x000000FFUL
unsigned int pci_parse_of_flags(u32 addr0, int bridge)
{
unsigned int flags = 0;
unsigned int flags = 0, as = addr0 & OF_PCI_ADDR0_SPACE_MASK;
if (addr0 & 0x02000000) {
if (as == OF_PCI_ADDR0_SPACE_MMIO32 || as == OF_PCI_ADDR0_SPACE_MMIO64) {
flags = IORESOURCE_MEM | PCI_BASE_ADDRESS_SPACE_MEMORY;
flags |= (addr0 >> 22) & PCI_BASE_ADDRESS_MEM_TYPE_64;
if (flags & PCI_BASE_ADDRESS_MEM_TYPE_64)
flags |= IORESOURCE_MEM_64;
flags |= (addr0 >> 28) & PCI_BASE_ADDRESS_MEM_TYPE_1M;
if (addr0 & 0x40000000)
flags |= IORESOURCE_PREFETCH
| PCI_BASE_ADDRESS_MEM_PREFETCH;
if (as == OF_PCI_ADDR0_SPACE_MMIO64)
flags |= PCI_BASE_ADDRESS_MEM_TYPE_64 | IORESOURCE_MEM_64;
if (addr0 & OF_PCI_ADDR0_ALIAS)
flags |= PCI_BASE_ADDRESS_MEM_TYPE_1M;
if (addr0 & OF_PCI_ADDR0_PREFETCH)
flags |= IORESOURCE_PREFETCH |
PCI_BASE_ADDRESS_MEM_PREFETCH;
/* Note: We don't know whether the ROM has been left enabled
* by the firmware or not. We mark it as disabled (ie, we do
* not set the IORESOURCE_ROM_ENABLE flag) for now rather than
* do a config space read, it will be force-enabled if needed
*/
if (!bridge && (addr0 & 0xff) == 0x30)
if (!bridge && (addr0 & OF_PCI_ADDR0_BARREG) == PCI_ROM_ADDRESS)
flags |= IORESOURCE_READONLY;
} else if (addr0 & 0x01000000)
} else if (as == OF_PCI_ADDR0_SPACE_IO)
flags = IORESOURCE_IO | PCI_BASE_ADDRESS_SPACE_IO;
if (flags)
flags |= IORESOURCE_SIZEALIGN;
return flags;
}

View File

@@ -1587,8 +1587,9 @@ static void setup_ksp_vsid(struct task_struct *p, unsigned long sp)
/*
* Copy architecture-specific thread state
*/
int copy_thread(unsigned long clone_flags, unsigned long usp,
unsigned long kthread_arg, struct task_struct *p)
int copy_thread_tls(unsigned long clone_flags, unsigned long usp,
unsigned long kthread_arg, struct task_struct *p,
unsigned long tls)
{
struct pt_regs *childregs, *kregs;
extern void ret_from_fork(void);
@@ -1629,10 +1630,10 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
if (clone_flags & CLONE_SETTLS) {
#ifdef CONFIG_PPC64
if (!is_32bit_task())
childregs->gpr[13] = childregs->gpr[6];
childregs->gpr[13] = tls;
else
#endif
childregs->gpr[2] = childregs->gpr[6];
childregs->gpr[2] = tls;
}
f = ret_from_fork;
@@ -2033,10 +2034,8 @@ void show_stack(struct task_struct *tsk, unsigned long *stack)
int count = 0;
int firstframe = 1;
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
struct ftrace_ret_stack *ret_stack;
extern void return_to_handler(void);
unsigned long rth = (unsigned long)return_to_handler;
int curr_frame = 0;
unsigned long ret_addr;
int ftrace_idx = 0;
#endif
if (tsk == NULL)
@@ -2065,15 +2064,10 @@ void show_stack(struct task_struct *tsk, unsigned long *stack)
if (!firstframe || ip != lr) {
printk("["REG"] ["REG"] %pS", sp, ip, (void *)ip);
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
if ((ip == rth) && curr_frame >= 0) {
ret_stack = ftrace_graph_get_ret_stack(current,
curr_frame++);
if (ret_stack)
pr_cont(" (%pS)",
(void *)ret_stack->ret);
else
curr_frame = -1;
}
ret_addr = ftrace_graph_ret_addr(current,
&ftrace_idx, ip, stack);
if (ret_addr != ip)
pr_cont(" (%pS)", (void *)ret_addr);
#endif
if (firstframe)
pr_cont(" (unreliable)");

View File

@@ -55,6 +55,7 @@
#include <asm/firmware.h>
#include <asm/dt_cpu_ftrs.h>
#include <asm/drmem.h>
#include <asm/ultravisor.h>
#include <mm/mmu_decl.h>
@@ -702,9 +703,12 @@ void __init early_init_devtree(void *params)
#ifdef CONFIG_PPC_POWERNV
/* Some machines might need OPAL info for debugging, grab it now. */
of_scan_flat_dt(early_init_dt_scan_opal, NULL);
/* Scan tree for ultravisor feature */
of_scan_flat_dt(early_init_dt_scan_ultravisor, NULL);
#endif
#ifdef CONFIG_FA_DUMP
#if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP)
/* scan tree to see if dump is active during last boot */
of_scan_flat_dt(early_init_dt_scan_fw_dump, NULL);
#endif
@@ -731,7 +735,7 @@ void __init early_init_devtree(void *params)
if (PHYSICAL_START > MEMORY_START)
memblock_reserve(MEMORY_START, 0x8000);
reserve_kdump_trampoline();
#ifdef CONFIG_FA_DUMP
#if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP)
/*
* If we fail to reserve memory for firmware-assisted dump then
* fallback to kexec based kdump.

View File

@@ -40,6 +40,7 @@
#include <asm/sections.h>
#include <asm/machdep.h>
#include <asm/asm-prototypes.h>
#include <asm/ultravisor-api.h>
#include <linux/linux_logo.h>
@@ -94,7 +95,7 @@ static int of_workarounds __prombss;
#define PROM_BUG() do { \
prom_printf("kernel BUG at %s line 0x%x!\n", \
__FILE__, __LINE__); \
__asm__ __volatile__(".long " BUG_ILLEGAL_INSTR); \
__builtin_trap(); \
} while (0)
#ifdef DEBUG_PROM
@@ -171,6 +172,10 @@ static bool __prombss prom_radix_disable;
static bool __prombss prom_xive_disable;
#endif
#ifdef CONFIG_PPC_SVM
static bool __prombss prom_svm_enable;
#endif
struct platform_support {
bool hash_mmu;
bool radix_mmu;
@@ -812,6 +817,17 @@ static void __init early_cmdline_parse(void)
prom_debug("XIVE disabled from cmdline\n");
}
#endif /* CONFIG_PPC_PSERIES */
#ifdef CONFIG_PPC_SVM
opt = prom_strstr(prom_cmd_line, "svm=");
if (opt) {
bool val;
opt += sizeof("svm=") - 1;
if (!prom_strtobool(opt, &val))
prom_svm_enable = val;
}
#endif /* CONFIG_PPC_SVM */
}
#ifdef CONFIG_PPC_PSERIES
@@ -1712,6 +1728,43 @@ static void __init prom_close_stdin(void)
}
}
#ifdef CONFIG_PPC_SVM
static int prom_rtas_hcall(uint64_t args)
{
register uint64_t arg1 asm("r3") = H_RTAS;
register uint64_t arg2 asm("r4") = args;
asm volatile("sc 1\n" : "=r" (arg1) :
"r" (arg1),
"r" (arg2) :);
return arg1;
}
static struct rtas_args __prombss os_term_args;
static void __init prom_rtas_os_term(char *str)
{
phandle rtas_node;
__be32 val;
u32 token;
prom_debug("%s: start...\n", __func__);
rtas_node = call_prom("finddevice", 1, 1, ADDR("/rtas"));
prom_debug("rtas_node: %x\n", rtas_node);
if (!PHANDLE_VALID(rtas_node))
return;
val = 0;
prom_getprop(rtas_node, "ibm,os-term", &val, sizeof(val));
token = be32_to_cpu(val);
prom_debug("ibm,os-term: %x\n", token);
if (token == 0)
prom_panic("Could not get token for ibm,os-term\n");
os_term_args.token = cpu_to_be32(token);
prom_rtas_hcall((uint64_t)&os_term_args);
}
#endif /* CONFIG_PPC_SVM */
/*
* Allocate room for and instantiate RTAS
*/
@@ -3168,6 +3221,46 @@ static void unreloc_toc(void)
#endif
#endif
#ifdef CONFIG_PPC_SVM
/*
* Perform the Enter Secure Mode ultracall.
*/
static int enter_secure_mode(unsigned long kbase, unsigned long fdt)
{
register unsigned long r3 asm("r3") = UV_ESM;
register unsigned long r4 asm("r4") = kbase;
register unsigned long r5 asm("r5") = fdt;
asm volatile("sc 2" : "+r"(r3) : "r"(r4), "r"(r5));
return r3;
}
/*
* Call the Ultravisor to transfer us to secure memory if we have an ESM blob.
*/
static void setup_secure_guest(unsigned long kbase, unsigned long fdt)
{
int ret;
if (!prom_svm_enable)
return;
/* Switch to secure mode. */
prom_printf("Switching to secure mode.\n");
ret = enter_secure_mode(kbase, fdt);
if (ret != U_SUCCESS) {
prom_printf("Returned %d from switching to secure mode.\n", ret);
prom_rtas_os_term("Switch to secure mode failed.\n");
}
}
#else
static void setup_secure_guest(unsigned long kbase, unsigned long fdt)
{
}
#endif /* CONFIG_PPC_SVM */
/*
* We enter here early on, when the Open Firmware prom is still
* handling exceptions and the MMU hash table for us.
@@ -3366,6 +3459,9 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
unreloc_toc();
#endif
/* Move to secure memory if we're supposed to be secure guests. */
setup_secure_guest(kbase, hdr);
__start(hdr, kbase, 0, 0, 0, 0, 0);
return 0;

View File

@@ -16,6 +16,7 @@
#include <linux/capability.h>
#include <linux/delay.h>
#include <linux/cpu.h>
#include <linux/sched.h>
#include <linux/smp.h>
#include <linux/completion.h>
#include <linux/cpumask.h>
@@ -871,15 +872,17 @@ static int rtas_cpu_state_change_mask(enum rtas_cpu_state state,
return 0;
for_each_cpu(cpu, cpus) {
struct device *dev = get_cpu_device(cpu);
switch (state) {
case DOWN:
cpuret = cpu_down(cpu);
cpuret = device_offline(dev);
break;
case UP:
cpuret = cpu_up(cpu);
cpuret = device_online(dev);
break;
}
if (cpuret) {
if (cpuret < 0) {
pr_debug("%s: cpu_%s for cpu#%d returned %d.\n",
__func__,
((state == UP) ? "up" : "down"),
@@ -896,6 +899,7 @@ static int rtas_cpu_state_change_mask(enum rtas_cpu_state state,
cpumask_clear_cpu(cpu, cpus);
}
}
cond_resched();
}
return ret;
@@ -922,13 +926,11 @@ int rtas_online_cpus_mask(cpumask_var_t cpus)
return ret;
}
EXPORT_SYMBOL(rtas_online_cpus_mask);
int rtas_offline_cpus_mask(cpumask_var_t cpus)
{
return rtas_cpu_state_change_mask(DOWN, cpus);
}
EXPORT_SYMBOL(rtas_offline_cpus_mask);
int rtas_ibm_suspend_me(u64 handle)
{
@@ -968,6 +970,8 @@ int rtas_ibm_suspend_me(u64 handle)
data.token = rtas_token("ibm,suspend-me");
data.complete = &done;
lock_device_hotplug();
/* All present CPUs must be online */
cpumask_andnot(offline_mask, cpu_present_mask, cpu_online_mask);
cpuret = rtas_online_cpus_mask(offline_mask);
@@ -1006,6 +1010,7 @@ out_hotplug_enable:
__func__);
out:
unlock_device_hotplug();
free_cpumask_var(offline_mask);
return atomic_read(&data.error);
}

View File

@@ -28,7 +28,7 @@ static enum count_cache_flush_type count_cache_flush_type = COUNT_CACHE_FLUSH_NO
bool barrier_nospec_enabled;
static bool no_nospec;
static bool btb_flush_enabled;
#ifdef CONFIG_PPC_FSL_BOOK3E
#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_BOOK3S_64)
static bool no_spectrev2;
#endif
@@ -114,7 +114,7 @@ static __init int security_feature_debugfs_init(void)
device_initcall(security_feature_debugfs_init);
#endif /* CONFIG_DEBUG_FS */
#ifdef CONFIG_PPC_FSL_BOOK3E
#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_BOOK3S_64)
static int __init handle_nospectre_v2(char *p)
{
no_spectrev2 = true;
@@ -122,6 +122,9 @@ static int __init handle_nospectre_v2(char *p)
return 0;
}
early_param("nospectre_v2", handle_nospectre_v2);
#endif /* CONFIG_PPC_FSL_BOOK3E || CONFIG_PPC_BOOK3S_64 */
#ifdef CONFIG_PPC_FSL_BOOK3E
void setup_spectre_v2(void)
{
if (no_spectrev2 || cpu_mitigations_off())
@@ -399,7 +402,17 @@ static void toggle_count_cache_flush(bool enable)
void setup_count_cache_flush(void)
{
toggle_count_cache_flush(true);
bool enable = true;
if (no_spectrev2 || cpu_mitigations_off()) {
if (security_ftr_enabled(SEC_FTR_BCCTRL_SERIALISED) ||
security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED))
pr_warn("Spectre v2 mitigations not under software control, can't disable\n");
enable = false;
}
toggle_count_cache_flush(enable);
}
#ifdef CONFIG_DEBUG_FS

View File

@@ -800,9 +800,15 @@ static __init void print_system_info(void)
pr_info("mmu_features = 0x%08x\n", cur_cpu_spec->mmu_features);
#ifdef CONFIG_PPC64
pr_info("firmware_features = 0x%016lx\n", powerpc_firmware_features);
#ifdef CONFIG_PPC_BOOK3S
pr_info("vmalloc start = 0x%lx\n", KERN_VIRT_START);
pr_info("IO start = 0x%lx\n", KERN_IO_START);
pr_info("vmemmap start = 0x%lx\n", (unsigned long)vmemmap);
#endif
#endif
print_system_hash_info();
if (!early_radix_enabled())
print_system_hash_info();
if (PHYSICAL_START > 0)
pr_info("physical_start = 0x%llx\n",

View File

@@ -206,6 +206,6 @@ __init void initialize_cache_info(void)
dcache_bsize = cur_cpu_spec->dcache_bsize;
icache_bsize = cur_cpu_spec->icache_bsize;
ucache_bsize = 0;
if (cpu_has_feature(CPU_FTR_UNIFIED_ID_CACHE))
if (IS_ENABLED(CONFIG_PPC_BOOK3S_601) || IS_ENABLED(CONFIG_E200))
ucache_bsize = icache_bsize = dcache_bsize;
}

View File

@@ -182,7 +182,7 @@ static int __save_stack_trace_tsk_reliable(struct task_struct *tsk,
* FIXME: IMHO these tests do not belong in
* arch-dependent code, they are generic.
*/
ip = ftrace_graph_ret_addr(tsk, &graph_idx, ip, NULL);
ip = ftrace_graph_ret_addr(tsk, &graph_idx, ip, stack);
#ifdef CONFIG_KPROBES
/*
* Mark stacktraces with kretprobed functions on them

View File

@@ -19,6 +19,7 @@
#include <asm/smp.h>
#include <asm/pmc.h>
#include <asm/firmware.h>
#include <asm/svm.h>
#include "cacheinfo.h"
#include "setup.h"
@@ -715,6 +716,23 @@ static struct device_attribute pa6t_attrs[] = {
#endif /* HAS_PPC_PMC_PA6T */
#endif /* HAS_PPC_PMC_CLASSIC */
#ifdef CONFIG_PPC_SVM
static ssize_t show_svm(struct device *dev, struct device_attribute *attr, char *buf)
{
return sprintf(buf, "%u\n", is_secure_guest());
}
static DEVICE_ATTR(svm, 0444, show_svm, NULL);
static void create_svm_file(void)
{
device_create_file(cpu_subsys.dev_root, &dev_attr_svm);
}
#else
static void create_svm_file(void)
{
}
#endif /* CONFIG_PPC_SVM */
static int register_cpu_online(unsigned int cpu)
{
struct cpu *c = &per_cpu(cpu_devices, cpu);
@@ -1058,6 +1076,8 @@ static int __init topology_init(void)
sysfs_create_dscr_default();
#endif /* CONFIG_PPC64 */
create_svm_file();
return 0;
}
subsys_initcall(topology_init);

View File

@@ -944,7 +944,8 @@ int ftrace_disable_ftrace_graph_caller(void)
* Hook the return address and push it in the stack of return addrs
* in current thread info. Return the address we want to divert to.
*/
unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip)
unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip,
unsigned long sp)
{
unsigned long return_hooker;
@@ -956,7 +957,7 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip)
return_hooker = ppc_function_entry(return_to_handler);
if (!function_graph_enter(parent, ip, 0, NULL))
if (!function_graph_enter(parent, ip, 0, (unsigned long *)sp))
parent = return_hooker;
out:
return parent;

View File

@@ -50,6 +50,7 @@ _GLOBAL(ftrace_stub)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
_GLOBAL(ftrace_graph_caller)
addi r5, r1, 48
/* load r4 with local address */
lwz r4, 44(r1)
subi r4, r4, MCOUNT_INSN_SIZE

View File

@@ -294,6 +294,7 @@ _GLOBAL(ftrace_graph_caller)
std r2, 24(r1)
ld r2, PACATOC(r13) /* get kernel TOC in r2 */
addi r5, r1, 112
mfctr r4 /* ftrace_caller has moved local addr here */
std r4, 40(r1)
mflr r3 /* ftrace_caller has restored LR from stack */

View File

@@ -41,6 +41,7 @@ _GLOBAL(ftrace_stub)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
_GLOBAL(ftrace_graph_caller)
addi r5, r1, 112
/* load r4 with local address */
ld r4, 128(r1)
subi r4, r4, MCOUNT_INSN_SIZE

View File

@@ -472,6 +472,7 @@ void system_reset_exception(struct pt_regs *regs)
if (debugger(regs))
goto out;
kmsg_dump(KMSG_DUMP_OOPS);
/*
* A system reset is a request to dump, so we always send
* it through the crashdump code (if fadump or kdump are

View File

@@ -0,0 +1,14 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Generic code to perform an ultravisor call.
*
* Copyright 2019, IBM Corporation.
*
*/
#include <asm/ppc_asm.h>
#include <asm/export.h>
_GLOBAL(ucall_norets)
EXPORT_SYMBOL_GPL(ucall_norets)
sc 2 /* Invoke the ultravisor */
blr /* Return r3 = status */

View File

@@ -94,28 +94,6 @@ static struct vdso_patch_def vdso_patches[] = {
CPU_FTR_COHERENT_ICACHE, CPU_FTR_COHERENT_ICACHE,
"__kernel_sync_dicache", "__kernel_sync_dicache_p5"
},
#ifdef CONFIG_PPC32
{
CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
"__kernel_gettimeofday", NULL
},
{
CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
"__kernel_clock_gettime", NULL
},
{
CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
"__kernel_clock_getres", NULL
},
{
CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
"__kernel_get_tbfreq", NULL
},
{
CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
"__kernel_time", NULL
},
#endif
};
/*

View File

@@ -70,6 +70,7 @@ V_FUNCTION_END(__kernel_get_syscall_map)
*
* returns the timebase frequency in HZ
*/
#ifndef CONFIG_PPC_BOOK3S_601
V_FUNCTION_BEGIN(__kernel_get_tbfreq)
.cfi_startproc
mflr r12
@@ -82,3 +83,4 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq)
blr
.cfi_endproc
V_FUNCTION_END(__kernel_get_tbfreq)
#endif

View File

@@ -144,10 +144,13 @@ VERSION
__kernel_datapage_offset;
__kernel_get_syscall_map;
#ifndef CONFIG_PPC_BOOK3S_601
__kernel_gettimeofday;
__kernel_clock_gettime;
__kernel_clock_getres;
__kernel_time;
__kernel_get_tbfreq;
#endif
__kernel_sync_dicache;
__kernel_sync_dicache_p5;
__kernel_sigtramp32;
@@ -155,7 +158,6 @@ VERSION
#ifdef CONFIG_PPC64
__kernel_getcpu;
#endif
__kernel_time;
local: *;
};