blob: 0e3fbd2048f61942d26887c9b2db52c903e12dd9 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#include <algorithm>
#include "hexagon_common.h"
#include "hexagon_user_dma_descriptors.h"
#include "hexagon_user_dma_instructions.h"
#include "hexagon_user_dma_registers.h"
namespace tvm {
namespace runtime {
namespace hexagon {
int init_hexagon_user_dma() {
#if __HEXAGON_ARCH__ >= 68
// reset DMA engine
unsigned int status = dmpause() & DM0_STATUS_MASK;
if (status != DM0_STATUS_IDLE) {
return DMA_FAILURE;
}
#endif
return DMA_SUCCESS;
}
int hexagon_user_dma_1d_sync_helper(void* dst, void* src, uint32_t length) {
#if __HEXAGON_ARCH__ >= 68
static int config_dma = init_hexagon_user_dma();
if (config_dma != DMA_SUCCESS) {
return DMA_FAILURE;
}
uint64_t src64 = reinterpret_cast<uint64_t>(src);
// source address limited to 32 bits
if (src64 > DESC_SRC_MASK) {
return DMA_FAILURE;
}
uint64_t dst64 = reinterpret_cast<uint64_t>(dst);
// destination address limited to 32 bits
if (dst64 > DESC_DST_MASK) {
return DMA_FAILURE;
}
// length limited to 24 bits
if (length > DESC_LENGTH_MASK) {
return DMA_FAILURE;
}
uint32_t src32 = src64 & DESC_SRC_MASK;
uint32_t dst32 = dst64 & DESC_DST_MASK;
void* dma_desc = nullptr;
int ret = posix_memalign(&dma_desc, DMA_DESC_2D_SIZE, DMA_DESC_2D_SIZE);
if (ret) {
return DMA_FAILURE;
}
if (!dma_desc) {
return DMA_FAILURE;
}
dma_desc_set_state(dma_desc, DESC_STATE_READY);
dma_desc_set_next(dma_desc, DMA_NULL_PTR);
dma_desc_set_length(dma_desc, length);
dma_desc_set_desctype(dma_desc, DESC_DESCTYPE_1D);
dma_desc_set_dstcomp(dma_desc, DESC_COMP_NONE);
dma_desc_set_srccomp(dma_desc, DESC_COMP_NONE);
dma_desc_set_bypassdst(dma_desc, DESC_BYPASS_OFF);
dma_desc_set_bypasssrc(dma_desc, DESC_BYPASS_OFF);
dma_desc_set_order(dma_desc, DESC_ORDER_ORDER);
dma_desc_set_done(dma_desc, DESC_DONE_INCOMPLETE);
dma_desc_set_src(dma_desc, src32);
dma_desc_set_dst(dma_desc, dst32);
dmstart(dma_desc);
unsigned int status = dmwait() & DM0_STATUS_MASK;
unsigned int done = dma_desc_get_done(dma_desc);
free(dma_desc);
if (status == DM0_STATUS_IDLE && done == DESC_DONE_COMPLETE) {
return DMA_SUCCESS;
}
#endif
return DMA_FAILURE;
}
int hexagon_user_dma_1d_sync(void* dst, void* src, uint32_t length) {
// One DMA transfer can copy at most DESC_LENGTH_MASK bytes.
// Make the common case quick.
if (length <= DESC_LENGTH_MASK) return hexagon_user_dma_1d_sync_helper(dst, src, length);
// Split big transfers into smaller transfers.
char* cast_src = static_cast<char*>(src);
char* cast_dst = static_cast<char*>(dst);
for (uint32_t i = 0; i < length;) {
// Ensure there is no overflow while updating i
uint32_t cur_len = std::min<uint32_t>(length - i, DESC_LENGTH_MASK);
int ret_val = hexagon_user_dma_1d_sync_helper(&cast_dst[i], &cast_src[i], cur_len);
if (ret_val != DMA_SUCCESS) return ret_val;
// 2 cases for new val for i:
// 1. length - i <= DESC_LENGTH_MASK (<= MAX_UINT)
// new_i = i + (length - i) = length, no more iter
// and no overflow (since (length - i) <= (MAX_UINT - i))
// 2. length - i > DESC_LENGTH_MASK
// length > (i + DESC_LENGTH_MASK)
// new_i = (i + DESC_LENGTH_MASK)
// length > new_i for next iter, we're done
// length - i > DESC_LENGTH_MASK
// and length <= MAX_UINT,
// so MAX_UINT >= length > DESC_LEN_MASK + i
// MAX_UINT > (DESC_LEN_MASK + i), so no overflow
i += cur_len;
}
return DMA_SUCCESS;
}
} // namespace hexagon
} // namespace runtime
} // namespace tvm