[VTA][HotFix] Relay->VTA quantization fix (#4433)
* relay -> vta fix
* setting optlevel to 3 for quantization to fold batchnorm
diff --git a/vta/scripts/tune_resnet.py b/vta/scripts/tune_resnet.py
index 00fe1e8..18aee09 100644
--- a/vta/scripts/tune_resnet.py
+++ b/vta/scripts/tune_resnet.py
@@ -125,9 +125,11 @@
dtype_dict.update({k: str(v.dtype) for k, v in params.items()})
# Perform quantization in Relay
- with relay.quantize.qconfig(global_scale=8.0,
- skip_conv_layers=[0]):
- relay_prog = relay.quantize.quantize(mod["main"], params=params)
+ # Note: We set opt_level to 3 in order to fold batch norm
+ with relay.build_config(opt_level=3):
+ with relay.quantize.qconfig(global_scale=8.0,
+ skip_conv_layers=[0]):
+ relay_prog = relay.quantize.quantize(mod["main"], params=params)
# Perform graph packing and constant folding for VTA target
if target.device_name == "vta":
diff --git a/vta/tutorials/autotvm/tune_relay_vta.py b/vta/tutorials/autotvm/tune_relay_vta.py
index 97dd742..a9ab6d7 100644
--- a/vta/tutorials/autotvm/tune_relay_vta.py
+++ b/vta/tutorials/autotvm/tune_relay_vta.py
@@ -89,15 +89,17 @@
dtype_dict.update({k: str(v.dtype) for k, v in params.items()})
# Perform quantization in Relay
- with relay.quantize.qconfig(global_scale=8.0,
- skip_conv_layers=[0]):
- relay_prog = relay.quantize.quantize(mod["main"], params=params)
+ # Note: We set opt_level to 3 in order to fold batch norm
+ with relay.build_config(opt_level=3):
+ with relay.quantize.qconfig(global_scale=8.0,
+ skip_conv_layers=[0]):
+ mod = relay.quantize.quantize(mod, params=params)
# Perform graph packing and constant folding for VTA target
if target.device_name == "vta":
assert env.BLOCK_IN == env.BLOCK_OUT
relay_prog = graph_pack(
- relay_prog,
+ mod["main"],
env.BATCH,
env.BLOCK_OUT,
env.WGT_WIDTH,
diff --git a/vta/tutorials/frontend/deploy_vision_on_vta.py b/vta/tutorials/frontend/deploy_vision_on_vta.py
index a508fc4..a316986 100644
--- a/vta/tutorials/frontend/deploy_vision_on_vta.py
+++ b/vta/tutorials/frontend/deploy_vision_on_vta.py
@@ -168,18 +168,20 @@
if target.device_name == "vta":
# Perform quantization in Relay
- with relay.quantize.qconfig(global_scale=8.0,
- skip_conv_layers=[0]):
- relay_prog = relay.quantize.quantize(mod["main"], params=params)
- # Perform graph packing and constant folding for VTA target
- assert env.BLOCK_IN == env.BLOCK_OUT
- relay_prog = graph_pack(
- relay_prog,
- env.BATCH,
- env.BLOCK_OUT,
- env.WGT_WIDTH,
- start_name=pack_dict[model][0],
- stop_name=pack_dict[model][1])
+ # Note: We set opt_level to 3 in order to fold batch norm
+ with relay.build_config(opt_level=3):
+ with relay.quantize.qconfig(global_scale=8.0,
+ skip_conv_layers=[0]):
+ mod = relay.quantize.quantize(mod, params=params)
+ # Perform graph packing and constant folding for VTA target
+ assert env.BLOCK_IN == env.BLOCK_OUT
+ relay_prog = graph_pack(
+ mod["main"],
+ env.BATCH,
+ env.BLOCK_OUT,
+ env.WGT_WIDTH,
+ start_name=pack_dict[model][0],
+ stop_name=pack_dict[model][1])
else:
relay_prog = mod["main"]