blob: a324f3e190192840fd601aef2ed8d58f5ce367b8 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""VTA Package configuration module
This module is dependency free and can be used to configure package.
"""
from __future__ import absolute_import as _abs
import json
import glob
import os
def get_vta_hw_path():
"""Get the VTA HW path."""
curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
vta_hw_default = os.path.abspath(os.path.join(curr_path, ".."))
VTA_HW_PATH = os.getenv('VTA_HW_PATH', vta_hw_default)
return VTA_HW_PATH
def get_tvm_path():
"""Get the TVM path."""
curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
tvm_default = os.path.abspath(os.path.join(curr_path, "../../.."))
TVM_PATH = os.getenv('TVM_PATH', tvm_default)
return TVM_PATH
class PkgConfig(object):
"""Simple package config tool for VTA.
This is used to provide runtime specific configurations.
Parameters
----------
cfg : dict
The config dictionary
"""
cfg_keys = [
"TARGET",
"LOG_INP_WIDTH",
"LOG_WGT_WIDTH",
"LOG_ACC_WIDTH",
"LOG_BATCH",
"LOG_BLOCK",
"LOG_UOP_BUFF_SIZE",
"LOG_INP_BUFF_SIZE",
"LOG_WGT_BUFF_SIZE",
"LOG_ACC_BUFF_SIZE",
]
def __init__(self, cfg):
# Derived parameters
cfg["LOG_BLOCK_IN"] = cfg["LOG_BLOCK"]
cfg["LOG_BLOCK_OUT"] = cfg["LOG_BLOCK"]
cfg["LOG_OUT_WIDTH"] = cfg["LOG_INP_WIDTH"]
cfg["LOG_OUT_BUFF_SIZE"] = (
cfg["LOG_ACC_BUFF_SIZE"] +
cfg["LOG_OUT_WIDTH"] -
cfg["LOG_ACC_WIDTH"])
# Update cfg now that we've extended it
self.__dict__.update(cfg)
# VTA_HW path and TVM_PATH
vta_hw_path = get_vta_hw_path()
tvm_path = get_tvm_path()
# Include path
self.include_path = [
"-I%s/include" % tvm_path,
"-I%s/include" % vta_hw_path,
"-I%s/3rdparty/dlpack/include" % tvm_path,
"-I%s/3rdparty/dmlc-core/include" % tvm_path
]
# List of source files that can be used to build standalone library.
self.lib_source = []
self.lib_source += glob.glob("%s/src/*.cc" % vta_hw_path)
if self.TARGET in ["pynq", "ultra96", "zcu104"]:
# add pynq drivers for any board that uses pynq driver stack (see pynq.io)
self.lib_source += glob.glob("%s/src/pynq/*.cc" % vta_hw_path)
elif self.TARGET in ["de10nano"]:
self.lib_source += glob.glob("%s/src/de10nano/*.cc" % vta_hw_path)
self.include_path += [
"-I%s/src/de10nano" % vta_hw_path,
"-I%s/3rdparty" % tvm_path
]
# Linker flags
if self.TARGET in ["pynq", "ultra96", "zcu104"]:
self.ldflags = [
"-L/usr/lib",
"-l:libcma.so"]
else:
self.ldflags = []
# Derive bitstream config string.
self.bitstream = "{}x{}_i{}w{}a{}_{}_{}_{}_{}".format(
(1 << cfg["LOG_BATCH"]),
(1 << cfg["LOG_BLOCK"]),
(1 << cfg["LOG_INP_WIDTH"]),
(1 << cfg["LOG_WGT_WIDTH"]),
(1 << cfg["LOG_ACC_WIDTH"]),
cfg["LOG_UOP_BUFF_SIZE"],
cfg["LOG_INP_BUFF_SIZE"],
cfg["LOG_WGT_BUFF_SIZE"],
cfg["LOG_ACC_BUFF_SIZE"])
# Derive FPGA parameters from target
# - device: part number
# - family: fpga family
# - freq: PLL frequency
# - per: clock period to achieve in HLS
# (how aggressively design is pipelined)
# - axi_bus_width: axi bus width used for DMA transactions
# (property of FPGA memory interface)
# - axi_cache_bits: ARCACHE/AWCACHE signals for the AXI bus
# (e.g. 1111 is write-back read and write allocate)
# - axi_prot_bits: ARPROT/AWPROT signals for the AXI bus
if self.TARGET == "de10nano":
self.fpga_device = "5CSEBA6U23I7"
self.fpga_family = "Cyclone\\ V"
# TODO: The following parameters have not been propagated into
# current Chisel-based implement of VTA hardware for DE10-Nano.
# A future change should be made to propagate these parameters,
# in order to avoid duplicated definition.
self.fpga_freq = 100
self.fpga_per = 2
self.fpga_log_axi_bus_width = 6
self.axi_prot_bits = '100'
# IP register address map
self.ip_reg_map_range = "0x1000"
self.fetch_base_addr = "0xFF220000"
self.load_base_addr = "0xFF221000"
self.compute_base_addr = "0xFF222000"
self.store_base_addr = "0xFF223000"
elif self.TARGET == "ultra96":
self.fpga_device = "xczu3eg-sbva484-1-e"
self.fpga_family = "zynq-ultrascale+"
self.fpga_board = None
self.fpga_board_rev = None
self.fpga_freq = 333
self.fpga_per = 2
self.fpga_log_axi_bus_width = 7
self.axi_prot_bits = '010'
# IP register address map
self.ip_reg_map_range = "0x1000"
self.fetch_base_addr = "0xA0000000"
self.load_base_addr = "0xA0001000"
self.compute_base_addr = "0xA0002000"
self.store_base_addr = "0xA0003000"
elif self.TARGET == "zcu104":
self.fpga_device = "xczu7ev-ffvc1156-2-e"
self.fpga_family = "zynq-ultrascale+"
self.fpga_board = "xilinx.com:zcu104:part0"
self.fpga_board_rev = "1.1"
self.fpga_freq = 333
self.fpga_per = 2
self.fpga_log_axi_bus_width = 7
self.axi_prot_bits = '010'
# IP register address map
self.ip_reg_map_range = "0x1000"
self.fetch_base_addr = "0xA0000000"
self.load_base_addr = "0xA0001000"
self.compute_base_addr = "0xA0002000"
self.store_base_addr = "0xA0003000"
else:
# By default, we use the pynq parameters
self.fpga_device = "xc7z020clg484-1"
self.fpga_family = "zynq-7000"
self.fpga_board = None
self.fpga_board_rev = None
self.fpga_freq = 100
self.fpga_per = 7
self.fpga_log_axi_bus_width = 6
self.axi_prot_bits = '000'
# IP register address map
self.ip_reg_map_range = "0x1000"
self.fetch_base_addr = "0x43C00000"
self.load_base_addr = "0x43C01000"
self.compute_base_addr = "0x43C02000"
self.store_base_addr = "0x43C03000"
# Set coherence settings
coherent = True
if coherent:
self.axi_cache_bits = '1111'
self.coherent = True
# Define IP memory mapped registers offsets.
# In HLS 0x00-0x0C is reserved for block-level I/O protocol.
# Make sure to leave 8B between register offsets to maintain
# compatibility with 64bit systems.
self.fetch_insn_count_offset = 0x10
self.fetch_insn_addr_offset = self.fetch_insn_count_offset + 0x08
self.load_inp_addr_offset = 0x10
self.load_wgt_addr_offset = self.load_inp_addr_offset + 0x08
self.compute_done_wr_offset = 0x10
self.compute_done_rd_offset = self.compute_done_wr_offset + 0x08
self.compute_uop_addr_offset = self.compute_done_rd_offset + 0x08
self.compute_bias_addr_offset = self.compute_uop_addr_offset + 0x08
self.store_out_addr_offset = 0x10
# Derive SRAM parameters
# The goal here is to determine how many memory banks are needed,
# how deep and wide each bank needs to be. This is derived from
# the size of each memory element (result of data width, and tensor shape),
# and also how wide a memory can be as permitted by the FPGA tools.
#
# The mem axi ratio is a parameter used by HLS to resize memories
# so memory read/write ports are the same size as the design axi bus width.
#
# Max bus width allowed (property of FPGA vendor toolchain)
max_bus_width = 1024
# Bus width of a memory interface
mem_bus_width = 1 << self.fpga_log_axi_bus_width
# Input memory
inp_mem_bus_width = 1 << (cfg["LOG_INP_WIDTH"] + \
cfg["LOG_BATCH"] + \
cfg["LOG_BLOCK_IN"])
self.inp_mem_size = 1 << cfg["LOG_INP_BUFF_SIZE"] # bytes
self.inp_mem_banks = (inp_mem_bus_width + \
max_bus_width - 1) // \
max_bus_width
self.inp_mem_width = min(inp_mem_bus_width, max_bus_width)
self.inp_mem_depth = self.inp_mem_size * 8 // inp_mem_bus_width
self.inp_mem_axi_ratio = self.inp_mem_width // mem_bus_width
# Weight memory
wgt_mem_bus_width = 1 << (cfg["LOG_WGT_WIDTH"] + \
cfg["LOG_BLOCK_IN"] + \
cfg["LOG_BLOCK_OUT"])
self.wgt_mem_size = 1 << cfg["LOG_WGT_BUFF_SIZE"] # bytes
self.wgt_mem_banks = (wgt_mem_bus_width + \
max_bus_width - 1) // \
max_bus_width
self.wgt_mem_width = min(wgt_mem_bus_width, max_bus_width)
self.wgt_mem_depth = self.wgt_mem_size * 8 // wgt_mem_bus_width
self.wgt_mem_axi_ratio = self.wgt_mem_width // mem_bus_width
# Output memory
out_mem_bus_width = 1 << (cfg["LOG_OUT_WIDTH"] + \
cfg["LOG_BATCH"] + \
cfg["LOG_BLOCK_OUT"])
self.out_mem_size = 1 << cfg["LOG_OUT_BUFF_SIZE"] # bytes
self.out_mem_banks = (out_mem_bus_width + \
max_bus_width - 1) // \
max_bus_width
self.out_mem_width = min(out_mem_bus_width, max_bus_width)
self.out_mem_depth = self.out_mem_size * 8 // out_mem_bus_width
self.out_mem_axi_ratio = self.out_mem_width // mem_bus_width
# Macro defs
self.macro_defs = []
self.cfg_dict = {}
for key in cfg:
self.macro_defs.append("-DVTA_%s=%s" % (key, str(cfg[key])))
self.cfg_dict[key] = cfg[key]
self.macro_defs.append("-DVTA_LOG_BUS_WIDTH=%s" % (self.fpga_log_axi_bus_width))
# Macros used by the VTA driver
self.macro_defs.append("-DVTA_IP_REG_MAP_RANGE=%s" % (self.ip_reg_map_range))
self.macro_defs.append("-DVTA_FETCH_ADDR=%s" % (self.fetch_base_addr))
self.macro_defs.append("-DVTA_LOAD_ADDR=%s" % (self.load_base_addr))
self.macro_defs.append("-DVTA_COMPUTE_ADDR=%s" % (self.compute_base_addr))
self.macro_defs.append("-DVTA_STORE_ADDR=%s" % (self.store_base_addr))
# IP register offsets
self.macro_defs.append("-DVTA_FETCH_INSN_COUNT_OFFSET=%s" % \
(self.fetch_insn_count_offset))
self.macro_defs.append("-DVTA_FETCH_INSN_ADDR_OFFSET=%s" % \
(self.fetch_insn_addr_offset))
self.macro_defs.append("-DVTA_LOAD_INP_ADDR_OFFSET=%s" % \
(self.load_inp_addr_offset))
self.macro_defs.append("-DVTA_LOAD_WGT_ADDR_OFFSET=%s" % \
(self.load_wgt_addr_offset))
self.macro_defs.append("-DVTA_COMPUTE_DONE_WR_OFFSET=%s" % \
(self.compute_done_wr_offset))
self.macro_defs.append("-DVTA_COMPUTE_DONE_RD_OFFSET=%s" % \
(self.compute_done_rd_offset))
self.macro_defs.append("-DVTA_COMPUTE_UOP_ADDR_OFFSET=%s" % \
(self.compute_uop_addr_offset))
self.macro_defs.append("-DVTA_COMPUTE_BIAS_ADDR_OFFSET=%s" % \
(self.compute_bias_addr_offset))
self.macro_defs.append("-DVTA_STORE_OUT_ADDR_OFFSET=%s" % \
(self.store_out_addr_offset))
# Coherency
if coherent:
self.macro_defs.append("-DVTA_COHERENT_ACCESSES=true")
else:
self.macro_defs.append("-DVTA_COHERENT_ACCESSES=false")
@property
def cflags(self):
return self.include_path + self.macro_defs
@property
def cfg_json(self):
return json.dumps(self.cfg_dict, indent=2)
def same_config(self, cfg):
"""Compare if cfg is same as current config.
Parameters
----------
cfg : the configuration
The configuration
Returns
-------
equal : bool
Whether the configuration is the same.
"""
for k, v in self.cfg_dict.items():
if k not in cfg:
return False
if cfg[k] != v:
return False
return True