parisc: Add alternative coding infrastructure

This patch adds the necessary code to patch a running kernel at runtime
to improve performance.

The current implementation offers a few optimizations variants:

- When running a SMP kernel on a single UP processor, unwanted assembler
  statements like locking functions are overwritten with NOPs. When
  multiple instructions shall be skipped, one branch instruction is used
  instead of multiple nop instructions.

- In the UP case, some pdtlb and pitlb instructions are patched to
  become pdtlb,l and pitlb,l which only flushes the CPU-local tlb
  entries instead of broadcasting the flush to other CPUs in the system
  and thus may improve performance.

- fic and fdc instructions are skipped if no I- or D-caches are
  installed.  This should speed up qemu emulation and cacheless systems.

- If no cache coherence is needed for IO operations, the relevant fdc
  and sync instructions in the sba and ccio drivers are replaced by
  nops.

- On systems which share I- and D-TLBs and thus don't have a seperate
  instruction TLB, the pitlb instruction is replaced by a nop.

Live-patching is done early in the boot process, just after having run
the system inventory. No drivers are running and thus no external
interrupts should arrive. So the hope is that no TLB exceptions will
occur during the patching. If this turns out to be wrong we will
probably need to do the patching in real-mode.

Signed-off-by: Helge Deller <deller@gmx.de>
This commit is contained in:
Helge Deller
2018-10-16 22:38:22 +02:00
vanhempi 34c201ae49
commit 3847dab774
14 muutettua tiedostoa jossa 233 lisäystä ja 62 poistoa

Näytä tiedosto

@@ -609,14 +609,13 @@ ccio_io_pdir_entry(u64 *pdir_ptr, space_t sid, unsigned long vba,
** PCX-T'? Don't know. (eg C110 or similar K-class)
**
** See PDC_MODEL/option 0/SW_CAP word for "Non-coherent IO-PDIR bit".
** Hopefully we can patch (NOP) these out at boot time somehow.
**
** "Since PCX-U employs an offset hash that is incompatible with
** the real mode coherence index generation of U2, the PDIR entry
** must be flushed to memory to retain coherence."
*/
asm volatile("fdc %%r0(%0)" : : "r" (pdir_ptr));
asm volatile("sync");
asm_io_fdc(pdir_ptr);
asm_io_sync();
}
/**
@@ -682,17 +681,14 @@ ccio_mark_invalid(struct ioc *ioc, dma_addr_t iova, size_t byte_cnt)
** FIXME: PCX_W platforms don't need FDC/SYNC. (eg C360)
** PCX-U/U+ do. (eg C200/C240)
** See PDC_MODEL/option 0/SW_CAP for "Non-coherent IO-PDIR bit".
**
** Hopefully someone figures out how to patch (NOP) the
** FDC/SYNC out at boot time.
*/
asm volatile("fdc %%r0(%0)" : : "r" (pdir_ptr[7]));
asm_io_fdc(pdir_ptr);
iovp += IOVP_SIZE;
byte_cnt -= IOVP_SIZE;
}
asm volatile("sync");
asm_io_sync();
ccio_clear_io_tlb(ioc, CCIO_IOVP(iova), saved_byte_cnt);
}

Näytä tiedosto

@@ -587,8 +587,7 @@ sba_io_pdir_entry(u64 *pdir_ptr, space_t sid, unsigned long vba,
* (bit #61, big endian), we have to flush and sync every time
* IO-PDIR is changed in Ike/Astro.
*/
if (ioc_needs_fdc)
asm volatile("fdc %%r0(%0)" : : "r" (pdir_ptr));
asm_io_fdc(pdir_ptr);
}
@@ -641,8 +640,8 @@ sba_mark_invalid(struct ioc *ioc, dma_addr_t iova, size_t byte_cnt)
do {
/* clear I/O Pdir entry "valid" bit first */
((u8 *) pdir_ptr)[7] = 0;
asm_io_fdc(pdir_ptr);
if (ioc_needs_fdc) {
asm volatile("fdc %%r0(%0)" : : "r" (pdir_ptr));
#if 0
entries_per_cacheline = L1_CACHE_SHIFT - 3;
#endif
@@ -661,8 +660,7 @@ sba_mark_invalid(struct ioc *ioc, dma_addr_t iova, size_t byte_cnt)
** could dump core on HPMC.
*/
((u8 *) pdir_ptr)[7] = 0;
if (ioc_needs_fdc)
asm volatile("fdc %%r0(%0)" : : "r" (pdir_ptr));
asm_io_fdc(pdir_ptr);
WRITE_REG( SBA_IOVA(ioc, iovp, 0, 0), ioc->ioc_hpa+IOC_PCOM);
}
@@ -773,8 +771,7 @@ sba_map_single(struct device *dev, void *addr, size_t size,
}
/* force FDC ops in io_pdir_entry() to be visible to IOMMU */
if (ioc_needs_fdc)
asm volatile("sync" : : );
asm_io_sync();
#ifdef ASSERT_PDIR_SANITY
sba_check_pdir(ioc,"Check after sba_map_single()");
@@ -858,8 +855,7 @@ sba_unmap_page(struct device *dev, dma_addr_t iova, size_t size,
sba_free_range(ioc, iova, size);
/* If fdc's were issued, force fdc's to be visible now */
if (ioc_needs_fdc)
asm volatile("sync" : : );
asm_io_sync();
READ_REG(ioc->ioc_hpa+IOC_PCOM); /* flush purges */
#endif /* DELAYED_RESOURCE_CNT == 0 */
@@ -1008,8 +1004,7 @@ sba_map_sg(struct device *dev, struct scatterlist *sglist, int nents,
filled = iommu_fill_pdir(ioc, sglist, nents, 0, sba_io_pdir_entry);
/* force FDC ops in io_pdir_entry() to be visible to IOMMU */
if (ioc_needs_fdc)
asm volatile("sync" : : );
asm_io_sync();
#ifdef ASSERT_PDIR_SANITY
if (sba_check_pdir(ioc,"Check after sba_map_sg()"))