+int goya_dma_map_sg(struct hl_device *hdev, struct scatterlist *sg, int nents,
+ enum dma_data_direction dir)
+{
+ if (!dma_map_sg(&hdev->pdev->dev, sg, nents, dir))
+ return -ENOMEM;
+
+ return 0;
+}
+
+void goya_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sg,
+ int nents, enum dma_data_direction dir)
+{
+ dma_unmap_sg(&hdev->pdev->dev, sg, nents, dir);
+}
+
+u32 goya_get_dma_desc_list_size(struct hl_device *hdev,
+ struct sg_table *sgt)
+{
+ struct scatterlist *sg, *sg_next_iter;
+ u32 count, len, dma_desc_cnt, len_next;
+ dma_addr_t addr, addr_next;
+
+ dma_desc_cnt = 0;
+
+ for_each_sg(sgt->sgl, sg, sgt->nents, count) {
+
+ len = sg_dma_len(sg);
+ addr = sg_dma_address(sg);
+
+ if (len == 0)
+ break;
+
+ while ((count + 1) < sgt->nents) {
+ sg_next_iter = sg_next(sg);
+ len_next = sg_dma_len(sg_next_iter);
+ addr_next = sg_dma_address(sg_next_iter);
+
+ if (len_next == 0)
+ break;
+
+ if ((addr + len == addr_next) &&
+ (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
+ len += len_next;
+ count++;
+ sg = sg_next_iter;
+ } else {
+ break;
+ }
+ }
+
+ dma_desc_cnt++;
+ }
+
+ return dma_desc_cnt * sizeof(struct packet_lin_dma);
+}
+
+static int goya_pin_memory_before_cs(struct hl_device *hdev,
+ struct hl_cs_parser *parser,
+ struct packet_lin_dma *user_dma_pkt,
+ u64 addr, enum dma_data_direction dir)
+{
+ struct hl_userptr *userptr;
+ int rc;
+
+ if (hl_userptr_is_pinned(hdev, addr, user_dma_pkt->tsize,
+ parser->job_userptr_list, &userptr))
+ goto already_pinned;
+
+ userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC);
+ if (!userptr)
+ return -ENOMEM;
+
+ rc = hl_pin_host_memory(hdev, addr, user_dma_pkt->tsize, userptr);
+ if (rc)
+ goto free_userptr;
+
+ list_add_tail(&userptr->job_node, parser->job_userptr_list);
+
+ rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
+ userptr->sgt->nents, dir);
+ if (rc) {
+ dev_err(hdev->dev, "failed to map sgt with DMA region\n");
+ goto unpin_memory;
+ }
+
+ userptr->dma_mapped = true;
+ userptr->dir = dir;
+
+already_pinned:
+ parser->patched_cb_size +=
+ goya_get_dma_desc_list_size(hdev, userptr->sgt);
+
+ return 0;
+
+unpin_memory:
+ hl_unpin_host_memory(hdev, userptr);
+free_userptr:
+ kfree(userptr);
+ return rc;
+}
+
+static int goya_validate_dma_pkt_host(struct hl_device *hdev,
+ struct hl_cs_parser *parser,
+ struct packet_lin_dma *user_dma_pkt)
+{
+ u64 device_memory_addr, addr;
+ enum dma_data_direction dir;
+ enum goya_dma_direction user_dir;
+ bool sram_addr = true;
+ bool skip_host_mem_pin = false;
+ bool user_memset;
+ int rc = 0;
+
+ user_dir = (user_dma_pkt->ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
+ GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
+
+ user_memset = (user_dma_pkt->ctl & GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
+ GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
+
+ switch (user_dir) {
+ case DMA_HOST_TO_DRAM:
+ dev_dbg(hdev->dev, "DMA direction is HOST --> DRAM\n");
+ dir = DMA_TO_DEVICE;
+ sram_addr = false;
+ addr = user_dma_pkt->src_addr;
+ device_memory_addr = user_dma_pkt->dst_addr;
+ if (user_memset)
+ skip_host_mem_pin = true;
+ break;
+
+ case DMA_DRAM_TO_HOST:
+ dev_dbg(hdev->dev, "DMA direction is DRAM --> HOST\n");
+ dir = DMA_FROM_DEVICE;
+ sram_addr = false;
+ addr = user_dma_pkt->dst_addr;
+ device_memory_addr = user_dma_pkt->src_addr;
+ break;
+
+ case DMA_HOST_TO_SRAM:
+ dev_dbg(hdev->dev, "DMA direction is HOST --> SRAM\n");
+ dir = DMA_TO_DEVICE;
+ addr = user_dma_pkt->src_addr;
+ device_memory_addr = user_dma_pkt->dst_addr;
+ if (user_memset)
+ skip_host_mem_pin = true;
+ break;
+
+ case DMA_SRAM_TO_HOST:
+ dev_dbg(hdev->dev, "DMA direction is SRAM --> HOST\n");
+ dir = DMA_FROM_DEVICE;
+ addr = user_dma_pkt->dst_addr;
+ device_memory_addr = user_dma_pkt->src_addr;
+ break;
+ default:
+ dev_err(hdev->dev, "DMA direction is undefined\n");
+ return -EFAULT;
+ }
+
+ if (parser->ctx_id != HL_KERNEL_ASID_ID) {
+ if (sram_addr) {
+ if (!hl_mem_area_inside_range(device_memory_addr,
+ user_dma_pkt->tsize,
+ hdev->asic_prop.sram_user_base_address,
+ hdev->asic_prop.sram_end_address)) {
+
+ dev_err(hdev->dev,
+ "SRAM address 0x%llx + 0x%x is invalid\n",
+ device_memory_addr,
+ user_dma_pkt->tsize);
+ return -EFAULT;
+ }
+ } else {
+ if (!hl_mem_area_inside_range(device_memory_addr,
+ user_dma_pkt->tsize,
+ hdev->asic_prop.dram_user_base_address,
+ hdev->asic_prop.dram_end_address)) {
+
+ dev_err(hdev->dev,
+ "DRAM address 0x%llx + 0x%x is invalid\n",
+ device_memory_addr,
+ user_dma_pkt->tsize);
+ return -EFAULT;
+ }
+ }
+ }
+
+ if (skip_host_mem_pin)
+ parser->patched_cb_size += sizeof(*user_dma_pkt);
+ else {
+ if ((dir == DMA_TO_DEVICE) &&
+ (parser->hw_queue_id > GOYA_QUEUE_ID_DMA_1)) {
+ dev_err(hdev->dev,
+ "Can't DMA from host on queue other then 1\n");
+ return -EFAULT;
+ }
+
+ rc = goya_pin_memory_before_cs(hdev, parser, user_dma_pkt,
+ addr, dir);
+ }
+
+ return rc;
+}
+
+static int goya_validate_dma_pkt_no_host(struct hl_device *hdev,
+ struct hl_cs_parser *parser,
+ struct packet_lin_dma *user_dma_pkt)
+{
+ u64 sram_memory_addr, dram_memory_addr;
+ enum goya_dma_direction user_dir;
+
+ user_dir = (user_dma_pkt->ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
+ GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
+
+ if (user_dir == DMA_DRAM_TO_SRAM) {
+ dev_dbg(hdev->dev, "DMA direction is DRAM --> SRAM\n");
+ dram_memory_addr = user_dma_pkt->src_addr;
+ sram_memory_addr = user_dma_pkt->dst_addr;
+ } else {
+ dev_dbg(hdev->dev, "DMA direction is SRAM --> DRAM\n");
+ sram_memory_addr = user_dma_pkt->src_addr;
+ dram_memory_addr = user_dma_pkt->dst_addr;
+ }
+
+ if (!hl_mem_area_inside_range(sram_memory_addr, user_dma_pkt->tsize,
+ hdev->asic_prop.sram_user_base_address,
+ hdev->asic_prop.sram_end_address)) {
+ dev_err(hdev->dev, "SRAM address 0x%llx + 0x%x is invalid\n",
+ sram_memory_addr, user_dma_pkt->tsize);
+ return -EFAULT;
+ }
+
+ if (!hl_mem_area_inside_range(dram_memory_addr, user_dma_pkt->tsize,
+ hdev->asic_prop.dram_user_base_address,
+ hdev->asic_prop.dram_end_address)) {
+ dev_err(hdev->dev, "DRAM address 0x%llx + 0x%x is invalid\n",
+ dram_memory_addr, user_dma_pkt->tsize);
+ return -EFAULT;
+ }
+
+ parser->patched_cb_size += sizeof(*user_dma_pkt);
+
+ return 0;
+}
+
+static int goya_validate_dma_pkt_no_mmu(struct hl_device *hdev,
+ struct hl_cs_parser *parser,
+ struct packet_lin_dma *user_dma_pkt)
+{
+ enum goya_dma_direction user_dir;
+ int rc;
+
+ dev_dbg(hdev->dev, "DMA packet details:\n");
+ dev_dbg(hdev->dev, "source == 0x%llx\n", user_dma_pkt->src_addr);
+ dev_dbg(hdev->dev, "destination == 0x%llx\n", user_dma_pkt->dst_addr);
+ dev_dbg(hdev->dev, "size == %u\n", user_dma_pkt->tsize);
+
+ user_dir = (user_dma_pkt->ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
+ GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
+
+ /*
+ * Special handling for DMA with size 0. The H/W has a bug where
+ * this can cause the QMAN DMA to get stuck, so block it here.
+ */
+ if (user_dma_pkt->tsize == 0) {
+ dev_err(hdev->dev,
+ "Got DMA with size 0, might reset the device\n");
+ return -EINVAL;
+ }
+
+ if ((user_dir == DMA_DRAM_TO_SRAM) || (user_dir == DMA_SRAM_TO_DRAM))
+ rc = goya_validate_dma_pkt_no_host(hdev, parser, user_dma_pkt);
+ else
+ rc = goya_validate_dma_pkt_host(hdev, parser, user_dma_pkt);
+
+ return rc;
+}
+
+static int goya_validate_dma_pkt_mmu(struct hl_device *hdev,
+ struct hl_cs_parser *parser,
+ struct packet_lin_dma *user_dma_pkt)
+{
+ dev_dbg(hdev->dev, "DMA packet details:\n");
+ dev_dbg(hdev->dev, "source == 0x%llx\n", user_dma_pkt->src_addr);
+ dev_dbg(hdev->dev, "destination == 0x%llx\n", user_dma_pkt->dst_addr);
+ dev_dbg(hdev->dev, "size == %u\n", user_dma_pkt->tsize);
+
+ /*
+ * WA for HW-23.
+ * We can't allow user to read from Host using QMANs other than 1.
+ */
+ if (parser->hw_queue_id > GOYA_QUEUE_ID_DMA_1 &&
+ hl_mem_area_inside_range(user_dma_pkt->src_addr,
+ user_dma_pkt->tsize,
+ hdev->asic_prop.va_space_host_start_address,
+ hdev->asic_prop.va_space_host_end_address)) {
+ dev_err(hdev->dev,
+ "Can't DMA from host on queue other then 1\n");
+ return -EFAULT;
+ }
+
+ if (user_dma_pkt->tsize == 0) {
+ dev_err(hdev->dev,
+ "Got DMA with size 0, might reset the device\n");
+ return -EINVAL;
+ }
+
+ parser->patched_cb_size += sizeof(*user_dma_pkt);
+
+ return 0;
+}
+
+static int goya_validate_wreg32(struct hl_device *hdev,
+ struct hl_cs_parser *parser,
+ struct packet_wreg32 *wreg_pkt)
+{
+ struct goya_device *goya = hdev->asic_specific;
+ u32 sob_start_addr, sob_end_addr;
+ u16 reg_offset;
+
+ reg_offset = wreg_pkt->ctl & GOYA_PKT_WREG32_CTL_REG_OFFSET_MASK;
+
+ dev_dbg(hdev->dev, "WREG32 packet details:\n");
+ dev_dbg(hdev->dev, "reg_offset == 0x%x\n", reg_offset);
+ dev_dbg(hdev->dev, "value == 0x%x\n", wreg_pkt->value);
+
+ if (reg_offset != (mmDMA_CH_1_WR_COMP_ADDR_LO & 0xFFFF)) {
+ dev_err(hdev->dev, "WREG32 packet with illegal address 0x%x\n",
+ reg_offset);
+ return -EPERM;
+ }
+
+ /*
+ * With MMU, DMA channels are not secured, so it doesn't matter where
+ * the WR COMP will be written to because it will go out with
+ * non-secured property
+ */
+ if (goya->hw_cap_initialized & HW_CAP_MMU)
+ return 0;
+
+ sob_start_addr = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
+ sob_end_addr = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1023);
+
+ if ((wreg_pkt->value < sob_start_addr) ||
+ (wreg_pkt->value > sob_end_addr)) {
+
+ dev_err(hdev->dev, "WREG32 packet with illegal value 0x%x\n",
+ wreg_pkt->value);
+ return -EPERM;
+ }
+
+ return 0;
+}
+
+static int goya_validate_cb(struct hl_device *hdev,
+ struct hl_cs_parser *parser, bool is_mmu)
+{
+ u32 cb_parsed_length = 0;
+ int rc = 0;
+
+ parser->patched_cb_size = 0;
+
+ /* cb_user_size is more than 0 so loop will always be executed */
+ while (cb_parsed_length < parser->user_cb_size) {
+ enum packet_id pkt_id;
+ u16 pkt_size;
+ void *user_pkt;
+
+ user_pkt = (void *) (uintptr_t)
+ (parser->user_cb->kernel_address + cb_parsed_length);
+
+ pkt_id = (enum packet_id) (((*(u64 *) user_pkt) &
+ PACKET_HEADER_PACKET_ID_MASK) >>
+ PACKET_HEADER_PACKET_ID_SHIFT);
+
+ pkt_size = goya_packet_sizes[pkt_id];
+ cb_parsed_length += pkt_size;
+ if (cb_parsed_length > parser->user_cb_size) {
+ dev_err(hdev->dev,
+ "packet 0x%x is out of CB boundary\n", pkt_id);
+ rc = -EINVAL;
+ break;
+ }
+
+ switch (pkt_id) {
+ case PACKET_WREG_32:
+ /*
+ * Although it is validated after copy in patch_cb(),
+ * need to validate here as well because patch_cb() is
+ * not called in MMU path while this function is called
+ */
+ rc = goya_validate_wreg32(hdev, parser, user_pkt);
+ break;
+
+ case PACKET_WREG_BULK:
+ dev_err(hdev->dev,
+ "User not allowed to use WREG_BULK\n");
+ rc = -EPERM;
+ break;
+
+ case PACKET_MSG_PROT:
+ dev_err(hdev->dev,
+ "User not allowed to use MSG_PROT\n");
+ rc = -EPERM;
+ break;
+
+ case PACKET_CP_DMA:
+ dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
+ rc = -EPERM;
+ break;
+
+ case PACKET_STOP:
+ dev_err(hdev->dev, "User not allowed to use STOP\n");
+ rc = -EPERM;
+ break;
+
+ case PACKET_LIN_DMA:
+ if (is_mmu)
+ rc = goya_validate_dma_pkt_mmu(hdev, parser,
+ user_pkt);
+ else
+ rc = goya_validate_dma_pkt_no_mmu(hdev, parser,
+ user_pkt);
+ break;
+
+ case PACKET_MSG_LONG:
+ case PACKET_MSG_SHORT:
+ case PACKET_FENCE:
+ case PACKET_NOP:
+ parser->patched_cb_size += pkt_size;
+ break;
+
+ default:
+ dev_err(hdev->dev, "Invalid packet header 0x%x\n",
+ pkt_id);
+ rc = -EINVAL;
+ break;
+ }
+
+ if (rc)
+ break;
+ }
+
+ /*
+ * The new CB should have space at the end for two MSG_PROT packets:
+ * 1. A packet that will act as a completion packet
+ * 2. A packet that will generate MSI-X interrupt
+ */
+ parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
+
+ return rc;
+}
+
+static int goya_patch_dma_packet(struct hl_device *hdev,
+ struct hl_cs_parser *parser,
+ struct packet_lin_dma *user_dma_pkt,
+ struct packet_lin_dma *new_dma_pkt,
+ u32 *new_dma_pkt_size)
+{
+ struct hl_userptr *userptr;
+ struct scatterlist *sg, *sg_next_iter;
+ u32 count, len, dma_desc_cnt, len_next;
+ dma_addr_t dma_addr, dma_addr_next;
+ enum goya_dma_direction user_dir;
+ u64 device_memory_addr, addr;
+ enum dma_data_direction dir;
+ struct sg_table *sgt;
+ bool skip_host_mem_pin = false;
+ bool user_memset;
+ u32 user_rdcomp_mask, user_wrcomp_mask;
+
+ user_dir = (user_dma_pkt->ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
+ GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
+
+ user_memset = (user_dma_pkt->ctl & GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
+ GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
+
+ if ((user_dir == DMA_DRAM_TO_SRAM) || (user_dir == DMA_SRAM_TO_DRAM) ||
+ (user_dma_pkt->tsize == 0)) {
+ memcpy(new_dma_pkt, user_dma_pkt, sizeof(*new_dma_pkt));
+ *new_dma_pkt_size = sizeof(*new_dma_pkt);
+ return 0;
+ }
+
+ if ((user_dir == DMA_HOST_TO_DRAM) || (user_dir == DMA_HOST_TO_SRAM)) {
+ addr = user_dma_pkt->src_addr;
+ device_memory_addr = user_dma_pkt->dst_addr;
+ dir = DMA_TO_DEVICE;
+ if (user_memset)
+ skip_host_mem_pin = true;
+ } else {
+ addr = user_dma_pkt->dst_addr;
+ device_memory_addr = user_dma_pkt->src_addr;
+ dir = DMA_FROM_DEVICE;
+ }
+
+ if ((!skip_host_mem_pin) &&
+ (hl_userptr_is_pinned(hdev, addr, user_dma_pkt->tsize,
+ parser->job_userptr_list, &userptr) == false)) {
+ dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
+ addr, user_dma_pkt->tsize);
+ return -EFAULT;
+ }
+
+ if ((user_memset) && (dir == DMA_TO_DEVICE)) {
+ memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
+ *new_dma_pkt_size = sizeof(*user_dma_pkt);
+ return 0;
+ }
+
+ user_rdcomp_mask =
+ (user_dma_pkt->ctl & GOYA_PKT_LIN_DMA_CTL_RDCOMP_MASK);
+
+ user_wrcomp_mask =
+ (user_dma_pkt->ctl & GOYA_PKT_LIN_DMA_CTL_WRCOMP_MASK);
+
+ sgt = userptr->sgt;
+ dma_desc_cnt = 0;
+
+ for_each_sg(sgt->sgl, sg, sgt->nents, count) {
+ len = sg_dma_len(sg);
+ dma_addr = sg_dma_address(sg);
+
+ if (len == 0)
+ break;
+
+ while ((count + 1) < sgt->nents) {
+ sg_next_iter = sg_next(sg);
+ len_next = sg_dma_len(sg_next_iter);
+ dma_addr_next = sg_dma_address(sg_next_iter);
+
+ if (len_next == 0)
+ break;
+
+ if ((dma_addr + len == dma_addr_next) &&
+ (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
+ len += len_next;
+ count++;
+ sg = sg_next_iter;
+ } else {
+ break;
+ }
+ }
+
+ new_dma_pkt->ctl = user_dma_pkt->ctl;
+ if (likely(dma_desc_cnt))
+ new_dma_pkt->ctl &= ~GOYA_PKT_CTL_EB_MASK;
+ new_dma_pkt->ctl &= ~(GOYA_PKT_LIN_DMA_CTL_RDCOMP_MASK |
+ GOYA_PKT_LIN_DMA_CTL_WRCOMP_MASK);
+ new_dma_pkt->tsize = len;
+
+ dma_addr += hdev->asic_prop.host_phys_base_address;
+
+ if (dir == DMA_TO_DEVICE) {
+ new_dma_pkt->src_addr = dma_addr;
+ new_dma_pkt->dst_addr = device_memory_addr;
+ } else {
+ new_dma_pkt->src_addr = device_memory_addr;
+ new_dma_pkt->dst_addr = dma_addr;
+ }
+
+ if (!user_memset)
+ device_memory_addr += len;
+ dma_desc_cnt++;
+ new_dma_pkt++;
+ }
+
+ if (!dma_desc_cnt) {
+ dev_err(hdev->dev,
+ "Error of 0 SG entries when patching DMA packet\n");
+ return -EFAULT;
+ }
+
+ /* Fix the last dma packet - rdcomp/wrcomp must be as user set them */
+ new_dma_pkt--;
+ new_dma_pkt->ctl |= (user_rdcomp_mask | user_wrcomp_mask);
+
+ *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
+
+ return 0;
+}
+
+static int goya_patch_cb(struct hl_device *hdev,
+ struct hl_cs_parser *parser)
+{
+ u32 cb_parsed_length = 0;
+ u32 cb_patched_cur_length = 0;
+ int rc = 0;
+
+ /* cb_user_size is more than 0 so loop will always be executed */
+ while (cb_parsed_length < parser->user_cb_size) {
+ enum packet_id pkt_id;
+ u16 pkt_size;
+ u32 new_pkt_size = 0;
+ void *user_pkt, *kernel_pkt;
+
+ user_pkt = (void *) (uintptr_t)
+ (parser->user_cb->kernel_address + cb_parsed_length);
+ kernel_pkt = (void *) (uintptr_t)
+ (parser->patched_cb->kernel_address +
+ cb_patched_cur_length);
+
+ pkt_id = (enum packet_id) (((*(u64 *) user_pkt) &
+ PACKET_HEADER_PACKET_ID_MASK) >>
+ PACKET_HEADER_PACKET_ID_SHIFT);
+
+ pkt_size = goya_packet_sizes[pkt_id];
+ cb_parsed_length += pkt_size;
+ if (cb_parsed_length > parser->user_cb_size) {
+ dev_err(hdev->dev,
+ "packet 0x%x is out of CB boundary\n", pkt_id);
+ rc = -EINVAL;
+ break;
+ }
+
+ switch (pkt_id) {
+ case PACKET_LIN_DMA:
+ rc = goya_patch_dma_packet(hdev, parser, user_pkt,
+ kernel_pkt, &new_pkt_size);
+ cb_patched_cur_length += new_pkt_size;
+ break;
+
+ case PACKET_WREG_32:
+ memcpy(kernel_pkt, user_pkt, pkt_size);
+ cb_patched_cur_length += pkt_size;
+ rc = goya_validate_wreg32(hdev, parser, kernel_pkt);
+ break;
+
+ case PACKET_WREG_BULK:
+ dev_err(hdev->dev,
+ "User not allowed to use WREG_BULK\n");
+ rc = -EPERM;
+ break;
+
+ case PACKET_MSG_PROT:
+ dev_err(hdev->dev,
+ "User not allowed to use MSG_PROT\n");
+ rc = -EPERM;
+ break;
+
+ case PACKET_CP_DMA:
+ dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
+ rc = -EPERM;
+ break;
+
+ case PACKET_STOP:
+ dev_err(hdev->dev, "User not allowed to use STOP\n");
+ rc = -EPERM;
+ break;
+
+ case PACKET_MSG_LONG:
+ case PACKET_MSG_SHORT:
+ case PACKET_FENCE:
+ case PACKET_NOP:
+ memcpy(kernel_pkt, user_pkt, pkt_size);
+ cb_patched_cur_length += pkt_size;
+ break;
+
+ default:
+ dev_err(hdev->dev, "Invalid packet header 0x%x\n",
+ pkt_id);
+ rc = -EINVAL;
+ break;
+ }
+
+ if (rc)
+ break;
+ }
+
+ return rc;
+}
+
+static int goya_parse_cb_mmu(struct hl_device *hdev,
+ struct hl_cs_parser *parser)
+{
+ u64 patched_cb_handle;
+ u32 patched_cb_size;
+ struct hl_cb *user_cb;
+ int rc;
+
+ /*
+ * The new CB should have space at the end for two MSG_PROT pkt:
+ * 1. A packet that will act as a completion packet
+ * 2. A packet that will generate MSI-X interrupt
+ */
+ parser->patched_cb_size = parser->user_cb_size +
+ sizeof(struct packet_msg_prot) * 2;
+
+ rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr,
+ parser->patched_cb_size,
+ &patched_cb_handle, HL_KERNEL_ASID_ID);
+
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to allocate patched CB for DMA CS %d\n",
+ rc);
+ return rc;
+ }
+
+ patched_cb_handle >>= PAGE_SHIFT;
+ parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
+ (u32) patched_cb_handle);
+ /* hl_cb_get should never fail here so use kernel WARN */
+ WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
+ (u32) patched_cb_handle);
+ if (!parser->patched_cb) {
+ rc = -EFAULT;
+ goto out;
+ }
+
+ /*
+ * The check that parser->user_cb_size <= parser->user_cb->size was done
+ * in validate_queue_index().
+ */
+ memcpy((void *) (uintptr_t) parser->patched_cb->kernel_address,
+ (void *) (uintptr_t) parser->user_cb->kernel_address,
+ parser->user_cb_size);
+
+ patched_cb_size = parser->patched_cb_size;
+
+ /* validate patched CB instead of user CB */
+ user_cb = parser->user_cb;
+ parser->user_cb = parser->patched_cb;
+ rc = goya_validate_cb(hdev, parser, true);
+ parser->user_cb = user_cb;
+
+ if (rc) {
+ hl_cb_put(parser->patched_cb);
+ goto out;
+ }
+
+ if (patched_cb_size != parser->patched_cb_size) {
+ dev_err(hdev->dev, "user CB size mismatch\n");
+ hl_cb_put(parser->patched_cb);
+ rc = -EINVAL;
+ goto out;
+ }
+
+out:
+ /*
+ * Always call cb destroy here because we still have 1 reference
+ * to it by calling cb_get earlier. After the job will be completed,
+ * cb_put will release it, but here we want to remove it from the
+ * idr
+ */
+ hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
+ patched_cb_handle << PAGE_SHIFT);
+
+ return rc;
+}
+
+int goya_parse_cb_no_mmu(struct hl_device *hdev, struct hl_cs_parser *parser)
+{
+ u64 patched_cb_handle;
+ int rc;
+
+ rc = goya_validate_cb(hdev, parser, false);
+
+ if (rc)
+ goto free_userptr;
+
+ rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr,
+ parser->patched_cb_size,
+ &patched_cb_handle, HL_KERNEL_ASID_ID);
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to allocate patched CB for DMA CS %d\n", rc);
+ goto free_userptr;
+ }
+
+ patched_cb_handle >>= PAGE_SHIFT;
+ parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
+ (u32) patched_cb_handle);
+ /* hl_cb_get should never fail here so use kernel WARN */
+ WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
+ (u32) patched_cb_handle);
+ if (!parser->patched_cb) {
+ rc = -EFAULT;
+ goto out;
+ }
+
+ rc = goya_patch_cb(hdev, parser);
+
+ if (rc)
+ hl_cb_put(parser->patched_cb);
+
+out:
+ /*
+ * Always call cb destroy here because we still have 1 reference
+ * to it by calling cb_get earlier. After the job will be completed,
+ * cb_put will release it, but here we want to remove it from the
+ * idr
+ */
+ hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
+ patched_cb_handle << PAGE_SHIFT);
+
+free_userptr:
+ if (rc)
+ hl_userptr_delete_list(hdev, parser->job_userptr_list);
+ return rc;
+}
+
+int goya_parse_cb_no_ext_quque(struct hl_device *hdev,
+ struct hl_cs_parser *parser)
+{
+ struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
+ struct goya_device *goya = hdev->asic_specific;
+
+ if (!(goya->hw_cap_initialized & HW_CAP_MMU)) {
+ /* For internal queue jobs, just check if cb address is valid */
+ if (hl_mem_area_inside_range(
+ (u64) (uintptr_t) parser->user_cb,
+ parser->user_cb_size,
+ asic_prop->sram_user_base_address,
+ asic_prop->sram_end_address))
+ return 0;
+
+ if (hl_mem_area_inside_range(
+ (u64) (uintptr_t) parser->user_cb,
+ parser->user_cb_size,
+ asic_prop->dram_user_base_address,
+ asic_prop->dram_end_address))
+ return 0;
+
+ dev_err(hdev->dev,
+ "Internal CB address 0x%llx + 0x%x is not in SRAM nor in DRAM\n",
+ (u64) (uintptr_t) parser->user_cb,
+ parser->user_cb_size);
+
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
+{
+ struct goya_device *goya = hdev->asic_specific;
+
+ if (!parser->ext_queue)
+ return goya_parse_cb_no_ext_quque(hdev, parser);
+
+ if ((goya->hw_cap_initialized & HW_CAP_MMU) && parser->use_virt_addr)
+ return goya_parse_cb_mmu(hdev, parser);
+ else
+ return goya_parse_cb_no_mmu(hdev, parser);
+}
+
+void goya_add_end_of_cb_packets(u64 kernel_address, u32 len, u64 cq_addr,
+ u32 cq_val, u32 msix_vec)
+{
+ struct packet_msg_prot *cq_pkt;
+
+ cq_pkt = (struct packet_msg_prot *) (uintptr_t)
+ (kernel_address + len - (sizeof(struct packet_msg_prot) * 2));
+
+ cq_pkt->ctl = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
+ (1 << GOYA_PKT_CTL_EB_SHIFT) |
+ (1 << GOYA_PKT_CTL_MB_SHIFT);
+ cq_pkt->value = cq_val;
+ cq_pkt->addr = cq_addr;
+
+ cq_pkt++;
+
+ cq_pkt->ctl = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
+ (1 << GOYA_PKT_CTL_MB_SHIFT);
+ cq_pkt->value = msix_vec & 0x7FF;
+ cq_pkt->addr = CFG_BASE + mmPCIE_DBI_MSIX_DOORBELL_OFF;
+}
+