Merge branch 'odp_fixes' into hmm.git
From rdma.git Jason Gunthorpe says: ==================== This is a collection of general cleanups for ODP to clarify some of the flows around umem creation and use of the interval tree. ==================== The branch is based on v5.3-rc5 due to dependencies, and is being taken into hmm.git due to dependencies in the next patches. * odp_fixes: RDMA/mlx5: Use odp instead of mr->umem in pagefault_mr RDMA/mlx5: Use ib_umem_start instead of umem.address RDMA/core: Make invalidate_range a device operation RDMA/odp: Use kvcalloc for the dma_list and page_list RDMA/odp: Check for overflow when computing the umem_odp end RDMA/odp: Provide ib_umem_odp_release() to undo the allocs RDMA/odp: Split creating a umem_odp from ib_umem_get RDMA/odp: Make the three ways to create a umem_odp clear RMDA/odp: Consolidate umem_odp initialization RDMA/odp: Make it clearer when a umem is an implicit ODP umem RDMA/odp: Iterate over the whole rbtree directly RDMA/odp: Use the common interval tree library instead of generic RDMA/mlx5: Fix MR npages calculation for IB_ACCESS_HUGETLB Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Esse commit está contido em:
@@ -465,6 +465,7 @@ config PCI_ENDPOINT_TEST
|
||||
|
||||
config XILINX_SDFEC
|
||||
tristate "Xilinx SDFEC 16"
|
||||
depends on HAS_IOMEM
|
||||
help
|
||||
This option enables support for the Xilinx SDFEC (Soft Decision
|
||||
Forward Error Correction) driver. This enables a char driver
|
||||
|
@@ -5,6 +5,7 @@ config EEPROM_AT24
|
||||
tristate "I2C EEPROMs / RAMs / ROMs from most vendors"
|
||||
depends on I2C && SYSFS
|
||||
select NVMEM
|
||||
select NVMEM_SYSFS
|
||||
select REGMAP_I2C
|
||||
help
|
||||
Enable this driver to get read/write support to most I2C EEPROMs
|
||||
@@ -34,6 +35,7 @@ config EEPROM_AT25
|
||||
tristate "SPI EEPROMs from most vendors"
|
||||
depends on SPI && SYSFS
|
||||
select NVMEM
|
||||
select NVMEM_SYSFS
|
||||
help
|
||||
Enable this driver to get read/write support to most SPI EEPROMs,
|
||||
after you configure the board init code to know about each eeprom
|
||||
@@ -80,6 +82,7 @@ config EEPROM_93XX46
|
||||
depends on SPI && SYSFS
|
||||
select REGMAP
|
||||
select NVMEM
|
||||
select NVMEM_SYSFS
|
||||
help
|
||||
Driver for the microwire EEPROM chipsets 93xx46x. The driver
|
||||
supports both read and write commands and also the command to
|
||||
|
@@ -685,7 +685,7 @@ static int at24_probe(struct i2c_client *client)
|
||||
nvmem_config.name = dev_name(dev);
|
||||
nvmem_config.dev = dev;
|
||||
nvmem_config.read_only = !writable;
|
||||
nvmem_config.root_only = true;
|
||||
nvmem_config.root_only = !(flags & AT24_FLAG_IRUGO);
|
||||
nvmem_config.owner = THIS_MODULE;
|
||||
nvmem_config.compat = true;
|
||||
nvmem_config.base_dev = dev;
|
||||
|
@@ -683,7 +683,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
|
||||
|
||||
rc = hl_poll_timeout_memory(hdev,
|
||||
&ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1),
|
||||
100, jiffies_to_usecs(hdev->timeout_jiffies));
|
||||
100, jiffies_to_usecs(hdev->timeout_jiffies), false);
|
||||
|
||||
if (rc == -ETIMEDOUT) {
|
||||
dev_err(hdev->dev,
|
||||
|
@@ -970,7 +970,8 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
|
||||
rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "failed to initialize kernel context\n");
|
||||
goto free_ctx;
|
||||
kfree(hdev->kernel_ctx);
|
||||
goto mmu_fini;
|
||||
}
|
||||
|
||||
rc = hl_cb_pool_init(hdev);
|
||||
@@ -1053,8 +1054,6 @@ release_ctx:
|
||||
if (hl_ctx_put(hdev->kernel_ctx) != 1)
|
||||
dev_err(hdev->dev,
|
||||
"kernel ctx is still alive on initialization failure\n");
|
||||
free_ctx:
|
||||
kfree(hdev->kernel_ctx);
|
||||
mmu_fini:
|
||||
hl_mmu_fini(hdev);
|
||||
eq_fini:
|
||||
|
@@ -24,7 +24,7 @@ int hl_fw_push_fw_to_device(struct hl_device *hdev, const char *fw_name,
|
||||
{
|
||||
const struct firmware *fw;
|
||||
const u64 *fw_data;
|
||||
size_t fw_size, i;
|
||||
size_t fw_size;
|
||||
int rc;
|
||||
|
||||
rc = request_firmware(&fw, fw_name, hdev->dev);
|
||||
@@ -45,22 +45,7 @@ int hl_fw_push_fw_to_device(struct hl_device *hdev, const char *fw_name,
|
||||
|
||||
fw_data = (const u64 *) fw->data;
|
||||
|
||||
if ((fw->size % 8) != 0)
|
||||
fw_size -= 8;
|
||||
|
||||
for (i = 0 ; i < fw_size ; i += 8, fw_data++, dst += 8) {
|
||||
if (!(i & (0x80000 - 1))) {
|
||||
dev_dbg(hdev->dev,
|
||||
"copied so far %zu out of %zu for %s firmware",
|
||||
i, fw_size, fw_name);
|
||||
usleep_range(20, 100);
|
||||
}
|
||||
|
||||
writeq(*fw_data, dst);
|
||||
}
|
||||
|
||||
if ((fw->size % 8) != 0)
|
||||
writel(*(const u32 *) fw_data, dst);
|
||||
memcpy_toio(dst, fw_data, fw_size);
|
||||
|
||||
out:
|
||||
release_firmware(fw);
|
||||
@@ -112,7 +97,8 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
|
||||
}
|
||||
|
||||
rc = hl_poll_timeout_memory(hdev, &pkt->fence, tmp,
|
||||
(tmp == ARMCP_PACKET_FENCE_VAL), 1000, timeout);
|
||||
(tmp == ARMCP_PACKET_FENCE_VAL), 1000,
|
||||
timeout, true);
|
||||
|
||||
hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
|
||||
|
||||
|
@@ -695,8 +695,8 @@ static int goya_sw_init(struct hl_device *hdev)
|
||||
goto free_dma_pool;
|
||||
}
|
||||
|
||||
dev_dbg(hdev->dev, "cpu accessible memory at bus address 0x%llx\n",
|
||||
hdev->cpu_accessible_dma_address);
|
||||
dev_dbg(hdev->dev, "cpu accessible memory at bus address %pad\n",
|
||||
&hdev->cpu_accessible_dma_address);
|
||||
|
||||
hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
|
||||
if (!hdev->cpu_accessible_dma_pool) {
|
||||
@@ -2729,9 +2729,10 @@ void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
|
||||
GOYA_ASYNC_EVENT_ID_PI_UPDATE);
|
||||
}
|
||||
|
||||
void goya_flush_pq_write(struct hl_device *hdev, u64 *pq, u64 exp_val)
|
||||
void goya_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd)
|
||||
{
|
||||
/* Not needed in Goya */
|
||||
/* The QMANs are on the SRAM so need to copy to IO space */
|
||||
memcpy_toio((void __iomem *) pqe, bd, sizeof(struct hl_bd));
|
||||
}
|
||||
|
||||
static void *goya_dma_alloc_coherent(struct hl_device *hdev, size_t size,
|
||||
@@ -2864,7 +2865,8 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
|
||||
}
|
||||
|
||||
rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
|
||||
(tmp == GOYA_QMAN0_FENCE_VAL), 1000, timeout);
|
||||
(tmp == GOYA_QMAN0_FENCE_VAL), 1000,
|
||||
timeout, true);
|
||||
|
||||
hl_hw_queue_inc_ci_kernel(hdev, GOYA_QUEUE_ID_DMA_0);
|
||||
|
||||
@@ -2945,7 +2947,7 @@ int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id)
|
||||
}
|
||||
|
||||
rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
|
||||
1000, GOYA_TEST_QUEUE_WAIT_USEC);
|
||||
1000, GOYA_TEST_QUEUE_WAIT_USEC, true);
|
||||
|
||||
hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
|
||||
|
||||
@@ -3312,9 +3314,11 @@ static int goya_validate_dma_pkt_no_mmu(struct hl_device *hdev,
|
||||
int rc;
|
||||
|
||||
dev_dbg(hdev->dev, "DMA packet details:\n");
|
||||
dev_dbg(hdev->dev, "source == 0x%llx\n", user_dma_pkt->src_addr);
|
||||
dev_dbg(hdev->dev, "destination == 0x%llx\n", user_dma_pkt->dst_addr);
|
||||
dev_dbg(hdev->dev, "size == %u\n", user_dma_pkt->tsize);
|
||||
dev_dbg(hdev->dev, "source == 0x%llx\n",
|
||||
le64_to_cpu(user_dma_pkt->src_addr));
|
||||
dev_dbg(hdev->dev, "destination == 0x%llx\n",
|
||||
le64_to_cpu(user_dma_pkt->dst_addr));
|
||||
dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
|
||||
|
||||
ctl = le32_to_cpu(user_dma_pkt->ctl);
|
||||
user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
|
||||
@@ -3343,9 +3347,11 @@ static int goya_validate_dma_pkt_mmu(struct hl_device *hdev,
|
||||
struct packet_lin_dma *user_dma_pkt)
|
||||
{
|
||||
dev_dbg(hdev->dev, "DMA packet details:\n");
|
||||
dev_dbg(hdev->dev, "source == 0x%llx\n", user_dma_pkt->src_addr);
|
||||
dev_dbg(hdev->dev, "destination == 0x%llx\n", user_dma_pkt->dst_addr);
|
||||
dev_dbg(hdev->dev, "size == %u\n", user_dma_pkt->tsize);
|
||||
dev_dbg(hdev->dev, "source == 0x%llx\n",
|
||||
le64_to_cpu(user_dma_pkt->src_addr));
|
||||
dev_dbg(hdev->dev, "destination == 0x%llx\n",
|
||||
le64_to_cpu(user_dma_pkt->dst_addr));
|
||||
dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
|
||||
|
||||
/*
|
||||
* WA for HW-23.
|
||||
@@ -3385,7 +3391,8 @@ static int goya_validate_wreg32(struct hl_device *hdev,
|
||||
|
||||
dev_dbg(hdev->dev, "WREG32 packet details:\n");
|
||||
dev_dbg(hdev->dev, "reg_offset == 0x%x\n", reg_offset);
|
||||
dev_dbg(hdev->dev, "value == 0x%x\n", wreg_pkt->value);
|
||||
dev_dbg(hdev->dev, "value == 0x%x\n",
|
||||
le32_to_cpu(wreg_pkt->value));
|
||||
|
||||
if (reg_offset != (mmDMA_CH_0_WR_COMP_ADDR_LO & 0x1FFF)) {
|
||||
dev_err(hdev->dev, "WREG32 packet with illegal address 0x%x\n",
|
||||
@@ -3427,12 +3434,13 @@ static int goya_validate_cb(struct hl_device *hdev,
|
||||
while (cb_parsed_length < parser->user_cb_size) {
|
||||
enum packet_id pkt_id;
|
||||
u16 pkt_size;
|
||||
void *user_pkt;
|
||||
struct goya_packet *user_pkt;
|
||||
|
||||
user_pkt = (void *) (uintptr_t)
|
||||
user_pkt = (struct goya_packet *) (uintptr_t)
|
||||
(parser->user_cb->kernel_address + cb_parsed_length);
|
||||
|
||||
pkt_id = (enum packet_id) (((*(u64 *) user_pkt) &
|
||||
pkt_id = (enum packet_id) (
|
||||
(le64_to_cpu(user_pkt->header) &
|
||||
PACKET_HEADER_PACKET_ID_MASK) >>
|
||||
PACKET_HEADER_PACKET_ID_SHIFT);
|
||||
|
||||
@@ -3452,7 +3460,8 @@ static int goya_validate_cb(struct hl_device *hdev,
|
||||
* need to validate here as well because patch_cb() is
|
||||
* not called in MMU path while this function is called
|
||||
*/
|
||||
rc = goya_validate_wreg32(hdev, parser, user_pkt);
|
||||
rc = goya_validate_wreg32(hdev,
|
||||
parser, (struct packet_wreg32 *) user_pkt);
|
||||
break;
|
||||
|
||||
case PACKET_WREG_BULK:
|
||||
@@ -3480,10 +3489,10 @@ static int goya_validate_cb(struct hl_device *hdev,
|
||||
case PACKET_LIN_DMA:
|
||||
if (is_mmu)
|
||||
rc = goya_validate_dma_pkt_mmu(hdev, parser,
|
||||
user_pkt);
|
||||
(struct packet_lin_dma *) user_pkt);
|
||||
else
|
||||
rc = goya_validate_dma_pkt_no_mmu(hdev, parser,
|
||||
user_pkt);
|
||||
(struct packet_lin_dma *) user_pkt);
|
||||
break;
|
||||
|
||||
case PACKET_MSG_LONG:
|
||||
@@ -3656,15 +3665,16 @@ static int goya_patch_cb(struct hl_device *hdev,
|
||||
enum packet_id pkt_id;
|
||||
u16 pkt_size;
|
||||
u32 new_pkt_size = 0;
|
||||
void *user_pkt, *kernel_pkt;
|
||||
struct goya_packet *user_pkt, *kernel_pkt;
|
||||
|
||||
user_pkt = (void *) (uintptr_t)
|
||||
user_pkt = (struct goya_packet *) (uintptr_t)
|
||||
(parser->user_cb->kernel_address + cb_parsed_length);
|
||||
kernel_pkt = (void *) (uintptr_t)
|
||||
kernel_pkt = (struct goya_packet *) (uintptr_t)
|
||||
(parser->patched_cb->kernel_address +
|
||||
cb_patched_cur_length);
|
||||
|
||||
pkt_id = (enum packet_id) (((*(u64 *) user_pkt) &
|
||||
pkt_id = (enum packet_id) (
|
||||
(le64_to_cpu(user_pkt->header) &
|
||||
PACKET_HEADER_PACKET_ID_MASK) >>
|
||||
PACKET_HEADER_PACKET_ID_SHIFT);
|
||||
|
||||
@@ -3679,15 +3689,18 @@ static int goya_patch_cb(struct hl_device *hdev,
|
||||
|
||||
switch (pkt_id) {
|
||||
case PACKET_LIN_DMA:
|
||||
rc = goya_patch_dma_packet(hdev, parser, user_pkt,
|
||||
kernel_pkt, &new_pkt_size);
|
||||
rc = goya_patch_dma_packet(hdev, parser,
|
||||
(struct packet_lin_dma *) user_pkt,
|
||||
(struct packet_lin_dma *) kernel_pkt,
|
||||
&new_pkt_size);
|
||||
cb_patched_cur_length += new_pkt_size;
|
||||
break;
|
||||
|
||||
case PACKET_WREG_32:
|
||||
memcpy(kernel_pkt, user_pkt, pkt_size);
|
||||
cb_patched_cur_length += pkt_size;
|
||||
rc = goya_validate_wreg32(hdev, parser, kernel_pkt);
|
||||
rc = goya_validate_wreg32(hdev, parser,
|
||||
(struct packet_wreg32 *) kernel_pkt);
|
||||
break;
|
||||
|
||||
case PACKET_WREG_BULK:
|
||||
@@ -4351,6 +4364,8 @@ static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr,
|
||||
size_t total_pkt_size;
|
||||
long result;
|
||||
int rc;
|
||||
int irq_num_entries, irq_arr_index;
|
||||
__le32 *goya_irq_arr;
|
||||
|
||||
total_pkt_size = sizeof(struct armcp_unmask_irq_arr_packet) +
|
||||
irq_arr_size;
|
||||
@@ -4368,8 +4383,16 @@ static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr,
|
||||
if (!pkt)
|
||||
return -ENOMEM;
|
||||
|
||||
pkt->length = cpu_to_le32(irq_arr_size / sizeof(irq_arr[0]));
|
||||
memcpy(&pkt->irqs, irq_arr, irq_arr_size);
|
||||
irq_num_entries = irq_arr_size / sizeof(irq_arr[0]);
|
||||
pkt->length = cpu_to_le32(irq_num_entries);
|
||||
|
||||
/* We must perform any necessary endianness conversation on the irq
|
||||
* array being passed to the goya hardware
|
||||
*/
|
||||
for (irq_arr_index = 0, goya_irq_arr = (__le32 *) &pkt->irqs;
|
||||
irq_arr_index < irq_num_entries ; irq_arr_index++)
|
||||
goya_irq_arr[irq_arr_index] =
|
||||
cpu_to_le32(irq_arr[irq_arr_index]);
|
||||
|
||||
pkt->armcp_pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY <<
|
||||
ARMCP_PKT_CTL_OPCODE_SHIFT);
|
||||
@@ -4449,7 +4472,6 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
|
||||
case GOYA_ASYNC_EVENT_ID_AXI_ECC:
|
||||
case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC:
|
||||
case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
|
||||
case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT:
|
||||
goya_print_irq_info(hdev, event_type, false);
|
||||
hl_device_reset(hdev, true, false);
|
||||
break;
|
||||
@@ -4485,6 +4507,7 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
|
||||
goya_unmask_irq(hdev, event_type);
|
||||
break;
|
||||
|
||||
case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT:
|
||||
case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
|
||||
case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
|
||||
case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
|
||||
@@ -5041,7 +5064,7 @@ static const struct hl_asic_funcs goya_funcs = {
|
||||
.resume = goya_resume,
|
||||
.cb_mmap = goya_cb_mmap,
|
||||
.ring_doorbell = goya_ring_doorbell,
|
||||
.flush_pq_write = goya_flush_pq_write,
|
||||
.pqe_write = goya_pqe_write,
|
||||
.asic_dma_alloc_coherent = goya_dma_alloc_coherent,
|
||||
.asic_dma_free_coherent = goya_dma_free_coherent,
|
||||
.get_int_queue_base = goya_get_int_queue_base,
|
||||
|
@@ -177,7 +177,7 @@ int goya_late_init(struct hl_device *hdev);
|
||||
void goya_late_fini(struct hl_device *hdev);
|
||||
|
||||
void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi);
|
||||
void goya_flush_pq_write(struct hl_device *hdev, u64 *pq, u64 exp_val);
|
||||
void goya_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd);
|
||||
void goya_update_eq_ci(struct hl_device *hdev, u32 val);
|
||||
void goya_restore_phase_topology(struct hl_device *hdev);
|
||||
int goya_context_switch(struct hl_device *hdev, u32 asid);
|
||||
|
@@ -441,7 +441,11 @@ enum hl_pll_frequency {
|
||||
* @resume: handles IP specific H/W or SW changes for resume.
|
||||
* @cb_mmap: maps a CB.
|
||||
* @ring_doorbell: increment PI on a given QMAN.
|
||||
* @flush_pq_write: flush PQ entry write if necessary, WARN if flushing failed.
|
||||
* @pqe_write: Write the PQ entry to the PQ. This is ASIC-specific
|
||||
* function because the PQs are located in different memory areas
|
||||
* per ASIC (SRAM, DRAM, Host memory) and therefore, the method of
|
||||
* writing the PQE must match the destination memory area
|
||||
* properties.
|
||||
* @asic_dma_alloc_coherent: Allocate coherent DMA memory by calling
|
||||
* dma_alloc_coherent(). This is ASIC function because
|
||||
* its implementation is not trivial when the driver
|
||||
@@ -510,7 +514,8 @@ struct hl_asic_funcs {
|
||||
int (*cb_mmap)(struct hl_device *hdev, struct vm_area_struct *vma,
|
||||
u64 kaddress, phys_addr_t paddress, u32 size);
|
||||
void (*ring_doorbell)(struct hl_device *hdev, u32 hw_queue_id, u32 pi);
|
||||
void (*flush_pq_write)(struct hl_device *hdev, u64 *pq, u64 exp_val);
|
||||
void (*pqe_write)(struct hl_device *hdev, __le64 *pqe,
|
||||
struct hl_bd *bd);
|
||||
void* (*asic_dma_alloc_coherent)(struct hl_device *hdev, size_t size,
|
||||
dma_addr_t *dma_handle, gfp_t flag);
|
||||
void (*asic_dma_free_coherent)(struct hl_device *hdev, size_t size,
|
||||
@@ -1062,9 +1067,17 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
|
||||
/*
|
||||
* address in this macro points always to a memory location in the
|
||||
* host's (server's) memory. That location is updated asynchronously
|
||||
* either by the direct access of the device or by another core
|
||||
* either by the direct access of the device or by another core.
|
||||
*
|
||||
* To work both in LE and BE architectures, we need to distinguish between the
|
||||
* two states (device or another core updates the memory location). Therefore,
|
||||
* if mem_written_by_device is true, the host memory being polled will be
|
||||
* updated directly by the device. If false, the host memory being polled will
|
||||
* be updated by host CPU. Required so host knows whether or not the memory
|
||||
* might need to be byte-swapped before returning value to caller.
|
||||
*/
|
||||
#define hl_poll_timeout_memory(hdev, addr, val, cond, sleep_us, timeout_us) \
|
||||
#define hl_poll_timeout_memory(hdev, addr, val, cond, sleep_us, timeout_us, \
|
||||
mem_written_by_device) \
|
||||
({ \
|
||||
ktime_t __timeout; \
|
||||
/* timeout should be longer when working with simulator */ \
|
||||
@@ -1077,10 +1090,14 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
|
||||
/* Verify we read updates done by other cores or by device */ \
|
||||
mb(); \
|
||||
(val) = *((u32 *) (uintptr_t) (addr)); \
|
||||
if (mem_written_by_device) \
|
||||
(val) = le32_to_cpu(val); \
|
||||
if (cond) \
|
||||
break; \
|
||||
if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \
|
||||
(val) = *((u32 *) (uintptr_t) (addr)); \
|
||||
if (mem_written_by_device) \
|
||||
(val) = le32_to_cpu(val); \
|
||||
break; \
|
||||
} \
|
||||
if (sleep_us) \
|
||||
|
@@ -290,23 +290,19 @@ static void int_hw_queue_schedule_job(struct hl_cs_job *job)
|
||||
struct hl_device *hdev = job->cs->ctx->hdev;
|
||||
struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id];
|
||||
struct hl_bd bd;
|
||||
u64 *pi, *pbd = (u64 *) &bd;
|
||||
__le64 *pi;
|
||||
|
||||
bd.ctl = 0;
|
||||
bd.len = __cpu_to_le32(job->job_cb_size);
|
||||
bd.ptr = __cpu_to_le64((u64) (uintptr_t) job->user_cb);
|
||||
bd.len = cpu_to_le32(job->job_cb_size);
|
||||
bd.ptr = cpu_to_le64((u64) (uintptr_t) job->user_cb);
|
||||
|
||||
pi = (u64 *) (uintptr_t) (q->kernel_address +
|
||||
pi = (__le64 *) (uintptr_t) (q->kernel_address +
|
||||
((q->pi & (q->int_queue_len - 1)) * sizeof(bd)));
|
||||
|
||||
pi[0] = pbd[0];
|
||||
pi[1] = pbd[1];
|
||||
|
||||
q->pi++;
|
||||
q->pi &= ((q->int_queue_len << 1) - 1);
|
||||
|
||||
/* Flush PQ entry write. Relevant only for specific ASICs */
|
||||
hdev->asic_funcs->flush_pq_write(hdev, pi, pbd[0]);
|
||||
hdev->asic_funcs->pqe_write(hdev, pi, &bd);
|
||||
|
||||
hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi);
|
||||
}
|
||||
|
@@ -52,6 +52,19 @@ enum goya_dma_direction {
|
||||
#define GOYA_PKT_CTL_MB_SHIFT 31
|
||||
#define GOYA_PKT_CTL_MB_MASK 0x80000000
|
||||
|
||||
/* All packets have, at least, an 8-byte header, which contains
|
||||
* the packet type. The kernel driver uses the packet header for packet
|
||||
* validation and to perform any necessary required preparation before
|
||||
* sending them off to the hardware.
|
||||
*/
|
||||
struct goya_packet {
|
||||
__le64 header;
|
||||
/* The rest of the packet data follows. Use the corresponding
|
||||
* packet_XXX struct to deference the data, based on packet type
|
||||
*/
|
||||
u8 contents[0];
|
||||
};
|
||||
|
||||
struct packet_nop {
|
||||
__le32 reserved;
|
||||
__le32 ctl;
|
||||
|
@@ -80,8 +80,7 @@ irqreturn_t hl_irq_handler_cq(int irq, void *arg)
|
||||
struct hl_cs_job *job;
|
||||
bool shadow_index_valid;
|
||||
u16 shadow_index;
|
||||
u32 *cq_entry;
|
||||
u32 *cq_base;
|
||||
struct hl_cq_entry *cq_entry, *cq_base;
|
||||
|
||||
if (hdev->disabled) {
|
||||
dev_dbg(hdev->dev,
|
||||
@@ -90,29 +89,29 @@ irqreturn_t hl_irq_handler_cq(int irq, void *arg)
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
cq_base = (u32 *) (uintptr_t) cq->kernel_address;
|
||||
cq_base = (struct hl_cq_entry *) (uintptr_t) cq->kernel_address;
|
||||
|
||||
while (1) {
|
||||
bool entry_ready = ((cq_base[cq->ci] & CQ_ENTRY_READY_MASK)
|
||||
bool entry_ready = ((le32_to_cpu(cq_base[cq->ci].data) &
|
||||
CQ_ENTRY_READY_MASK)
|
||||
>> CQ_ENTRY_READY_SHIFT);
|
||||
|
||||
if (!entry_ready)
|
||||
break;
|
||||
|
||||
cq_entry = (u32 *) &cq_base[cq->ci];
|
||||
cq_entry = (struct hl_cq_entry *) &cq_base[cq->ci];
|
||||
|
||||
/*
|
||||
* Make sure we read CQ entry contents after we've
|
||||
/* Make sure we read CQ entry contents after we've
|
||||
* checked the ownership bit.
|
||||
*/
|
||||
dma_rmb();
|
||||
|
||||
shadow_index_valid =
|
||||
((*cq_entry & CQ_ENTRY_SHADOW_INDEX_VALID_MASK)
|
||||
shadow_index_valid = ((le32_to_cpu(cq_entry->data) &
|
||||
CQ_ENTRY_SHADOW_INDEX_VALID_MASK)
|
||||
>> CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT);
|
||||
|
||||
shadow_index = (u16)
|
||||
((*cq_entry & CQ_ENTRY_SHADOW_INDEX_MASK)
|
||||
shadow_index = (u16) ((le32_to_cpu(cq_entry->data) &
|
||||
CQ_ENTRY_SHADOW_INDEX_MASK)
|
||||
>> CQ_ENTRY_SHADOW_INDEX_SHIFT);
|
||||
|
||||
queue = &hdev->kernel_queues[cq->hw_queue_id];
|
||||
@@ -122,8 +121,7 @@ irqreturn_t hl_irq_handler_cq(int irq, void *arg)
|
||||
queue_work(hdev->cq_wq, &job->finish_work);
|
||||
}
|
||||
|
||||
/*
|
||||
* Update ci of the context's queue. There is no
|
||||
/* Update ci of the context's queue. There is no
|
||||
* need to protect it with spinlock because this update is
|
||||
* done only inside IRQ and there is a different IRQ per
|
||||
* queue
|
||||
@@ -131,7 +129,8 @@ irqreturn_t hl_irq_handler_cq(int irq, void *arg)
|
||||
queue->ci = hl_queue_inc_ptr(queue->ci);
|
||||
|
||||
/* Clear CQ entry ready bit */
|
||||
cq_base[cq->ci] &= ~CQ_ENTRY_READY_MASK;
|
||||
cq_entry->data = cpu_to_le32(le32_to_cpu(cq_entry->data) &
|
||||
~CQ_ENTRY_READY_MASK);
|
||||
|
||||
cq->ci = hl_cq_inc_ptr(cq->ci);
|
||||
|
||||
|
@@ -1629,6 +1629,8 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx)
|
||||
dev_dbg(hdev->dev,
|
||||
"page list 0x%p of asid %d is still alive\n",
|
||||
phys_pg_list, ctx->asid);
|
||||
atomic64_sub(phys_pg_list->total_size,
|
||||
&hdev->dram_used_mem);
|
||||
free_phys_pg_pack(hdev, phys_pg_list);
|
||||
idr_remove(&vm->phys_pg_pack_handles, i);
|
||||
}
|
||||
|
@@ -81,6 +81,9 @@
|
||||
|
||||
#define MEI_DEV_ID_ICP_LP 0x34E0 /* Ice Lake Point LP */
|
||||
|
||||
#define MEI_DEV_ID_MCC 0x4B70 /* Mule Creek Canyon (EHL) */
|
||||
#define MEI_DEV_ID_MCC_4 0x4B75 /* Mule Creek Canyon 4 (EHL) */
|
||||
|
||||
/*
|
||||
* MEI HW Section
|
||||
*/
|
||||
|
@@ -98,6 +98,9 @@ static const struct pci_device_id mei_me_pci_tbl[] = {
|
||||
|
||||
{MEI_PCI_DEVICE(MEI_DEV_ID_ICP_LP, MEI_ME_PCH12_CFG)},
|
||||
|
||||
{MEI_PCI_DEVICE(MEI_DEV_ID_MCC, MEI_ME_PCH12_CFG)},
|
||||
{MEI_PCI_DEVICE(MEI_DEV_ID_MCC_4, MEI_ME_PCH8_CFG)},
|
||||
|
||||
/* required last entry */
|
||||
{0, }
|
||||
};
|
||||
|
Referência em uma nova issue
Block a user