[REFACTOR][PY][API-Change] Polish tvm.runtime, tvm.runtime.module API update (#4837)

* [REFACTOR][PY-API] Polish tvm.runtime, tvm.runtime.module API update

This PR updates the tvm.runtime to use the new FFI style.

- Remove top-level tvm.module to avoid confusion between runtime.Module and IRModule
- API changes wrt to runtime.Module
  - tvm.module.load -> tvm.runtime.load_module
  - tvm.module.enabled -> tvm.runtime.enabled
  - tvm.module.system_lib -> tvm.runtime.system_lib
- Remove dep on api_internal from runtime.

* Update module.load in the latest API
diff --git a/apps/gemm/python/tsim.py b/apps/gemm/python/tsim.py
index f5e5648..c0f7b13 100644
--- a/apps/gemm/python/tsim.py
+++ b/apps/gemm/python/tsim.py
@@ -62,7 +62,7 @@
     if hw_backend in ("verilog", "chisel"):
         hw_lib = osp.join(cur_path, "..", "hardware", hw_backend, "build", hw_libname)
     load_sw()
-    m = tvm.module.load(hw_lib, "vta-tsim")
+    m = tvm.runtime.load_module(hw_lib, "vta-tsim")
     f = tvm.get_global_func("tvm.vta.tsim.init")
     f(m)
 
diff --git a/apps/tsim_example/python/tsim.py b/apps/tsim_example/python/tsim.py
index f5e5648..c0f7b13 100644
--- a/apps/tsim_example/python/tsim.py
+++ b/apps/tsim_example/python/tsim.py
@@ -62,7 +62,7 @@
     if hw_backend in ("verilog", "chisel"):
         hw_lib = osp.join(cur_path, "..", "hardware", hw_backend, "build", hw_libname)
     load_sw()
-    m = tvm.module.load(hw_lib, "vta-tsim")
+    m = tvm.runtime.load_module(hw_lib, "vta-tsim")
     f = tvm.get_global_func("tvm.vta.tsim.init")
     f(m)
 
diff --git a/python/vta/testing/simulator.py b/python/vta/testing/simulator.py
index eb2c1c2..38c9467 100644
--- a/python/vta/testing/simulator.py
+++ b/python/vta/testing/simulator.py
@@ -40,7 +40,7 @@
         assert lib_hw # make sure to build vta/hardware/chisel
         try:
             f = tvm.get_global_func("vta.tsim.init")
-            m = tvm.module.load(lib_hw[0], "vta-tsim")
+            m = tvm.runtime.load_module(lib_hw[0], "vta-tsim")
             f(m)
             return lib_hw
         except OSError:
diff --git a/scripts/tune_resnet.py b/scripts/tune_resnet.py
index 18aee09..9d8ed89 100644
--- a/scripts/tune_resnet.py
+++ b/scripts/tune_resnet.py
@@ -196,7 +196,7 @@
     opt = parse_arguments()
 
     # Make sure that TVM was compiled with RPC=1
-    assert tvm.module.enabled("rpc")
+    assert tvm.runtime.enabled("rpc")
 
     # Read in VTA environment
     env = vta.get_env()
@@ -234,7 +234,7 @@
     # VTA target and execution context
     target = env.target if opt.device == "vta" else env.target_vta_cpu
     ctx = remote.ext_dev(0) if opt.device == "vta" else remote.cpu(0)
-    
+
     # Compile Relay program
     print("Initial compile...")
     relay_prog, params = compile_network(opt, env, target)
@@ -266,7 +266,7 @@
     tune_tasks(tasks, **tuning_opt)
 
     # Compile kernels with history best records
-    with autotvm.tophub.context(target, extra_files=[opt.log_filename]): 
+    with autotvm.tophub.context(target, extra_files=[opt.log_filename]):
 
         # Compile network
         print("Compiling network with best tuning parameters...")
diff --git a/src/dpi/module.cc b/src/dpi/module.cc
index 27161c4..bb8284c 100644
--- a/src/dpi/module.cc
+++ b/src/dpi/module.cc
@@ -418,7 +418,7 @@
   return Module(n);
 }
 
-TVM_REGISTER_GLOBAL("module.loadfile_vta-tsim")
+TVM_REGISTER_GLOBAL("runtime.module.loadfile_vta-tsim")
 .set_body([](TVMArgs args, TVMRetValue* rv) {
     *rv = DPIModuleNode::Load(args[0]);
   });
diff --git a/tests/python/integration/test_benchmark_topi_conv2d.py b/tests/python/integration/test_benchmark_topi_conv2d.py
index 942776f..af71561 100644
--- a/tests/python/integration/test_benchmark_topi_conv2d.py
+++ b/tests/python/integration/test_benchmark_topi_conv2d.py
@@ -227,7 +227,7 @@
         if device == "vta":
             target = env.target
             if env.TARGET not in ["sim", "tsim"]:
-                assert tvm.module.enabled("rpc")
+                assert tvm.runtime.enabled("rpc")
                 program_fpga(remote, bitstream=None)
                 reconfig_runtime(remote)
         elif device == "arm_cpu":
diff --git a/tests/python/integration/test_benchmark_topi_conv2d_transpose.py b/tests/python/integration/test_benchmark_topi_conv2d_transpose.py
index e2601d1..d729fa5 100644
--- a/tests/python/integration/test_benchmark_topi_conv2d_transpose.py
+++ b/tests/python/integration/test_benchmark_topi_conv2d_transpose.py
@@ -215,7 +215,7 @@
         if device == "vta":
             target = env.target
             if env.TARGET not in ["sim", "tsim"]:
-                assert tvm.module.enabled("rpc")
+                assert tvm.runtime.enabled("rpc")
                 program_fpga(remote, bitstream=None)
                 reconfig_runtime(remote)
         elif device == "arm_cpu":
diff --git a/tests/python/integration/test_benchmark_topi_dense.py b/tests/python/integration/test_benchmark_topi_dense.py
index 174e966..b0ee2f5 100644
--- a/tests/python/integration/test_benchmark_topi_dense.py
+++ b/tests/python/integration/test_benchmark_topi_dense.py
@@ -178,7 +178,7 @@
         if device == "vta":
             target = env.target
             if env.TARGET not in ["sim", "tsim"]:
-                assert tvm.module.enabled("rpc")
+                assert tvm.runtime.enabled("rpc")
                 program_fpga(remote, bitstream=None)
                 reconfig_runtime(remote)
         elif device == "arm_cpu":
diff --git a/tests/python/integration/test_benchmark_topi_group_conv2d.py b/tests/python/integration/test_benchmark_topi_group_conv2d.py
index 975d5b9..7bba244 100644
--- a/tests/python/integration/test_benchmark_topi_group_conv2d.py
+++ b/tests/python/integration/test_benchmark_topi_group_conv2d.py
@@ -127,7 +127,7 @@
         w_np = np.random.randint(w_min, w_max, size=w_shape).astype(kernel.dtype)
         b_np = np.random.randint(b_min, b_max, size=b_shape).astype(env.acc_dtype)
         r_np = topi.testing.conv2d_nchw_python(
-            a_np.astype(env.acc_dtype), w_np.astype(env.acc_dtype), 
+            a_np.astype(env.acc_dtype), w_np.astype(env.acc_dtype),
             (wl.hstride, wl.wstride), wl.hpad, wl.groups).astype(env.acc_dtype)
         return a_np, w_np, b_np, r_np
 
@@ -224,7 +224,7 @@
         if device == "vta":
             target = env.target
             if env.TARGET not in ["sim", "tsim"]:
-                assert tvm.module.enabled("rpc")
+                assert tvm.runtime.enabled("rpc")
                 program_fpga(remote, bitstream=None)
                 reconfig_runtime(remote)
         elif device == "arm_cpu":
diff --git a/tests/python/pynq/test_program_rpc.py b/tests/python/pynq/test_program_rpc.py
index a7ef6f2..2d8da5a 100644
--- a/tests/python/pynq/test_program_rpc.py
+++ b/tests/python/pynq/test_program_rpc.py
@@ -29,14 +29,14 @@
     ----------
     path : path to bitstream (optional)
     """
-    assert tvm.module.enabled("rpc")
+    assert tvm.runtime.enabled("rpc")
     remote = rpc.connect(host, port)
     program_fpga(remote, path)
 
 def reconfig_rpc_runtime():
     """Reconfig the RPC server runtime
     """
-    assert tvm.module.enabled("rpc")
+    assert tvm.runtime.enabled("rpc")
     remote = rpc.connect(host, port)
     reconfig_runtime(remote)
 
diff --git a/tutorials/frontend/deploy_vision_on_vta.py b/tutorials/frontend/deploy_vision_on_vta.py
index 154eb85..c410d24 100644
--- a/tutorials/frontend/deploy_vision_on_vta.py
+++ b/tutorials/frontend/deploy_vision_on_vta.py
@@ -60,7 +60,7 @@
 from vta.top import graph_pack
 
 # Make sure that TVM was compiled with RPC=1
-assert tvm.module.enabled("rpc")
+assert tvm.runtime.enabled("rpc")
 
 ######################################################################
 # Define the platform and model targets
@@ -243,7 +243,7 @@
 m.set_input('data', image)
 
 # Perform inference and gather execution statistics
-# More on: https://docs.tvm.ai/api/python/module.html#tvm.module.Module.time_evaluator
+# More on: https://docs.tvm.ai/api/python/module.html#tvm.runtime.Module.time_evaluator
 num = 4 # number of times we run module for a single measurement
 rep = 3 # number of measurements (we derive std dev from this)
 timer = m.module.time_evaluator("run", ctx, number=num, repeat=rep)
diff --git a/tutorials/matrix_multiply.py b/tutorials/matrix_multiply.py
index 70a899b..3e46b42 100644
--- a/tutorials/matrix_multiply.py
+++ b/tutorials/matrix_multiply.py
@@ -54,7 +54,7 @@
 if env.TARGET == "pynq":
 
     # Make sure that TVM was compiled with RPC=1
-    assert tvm.module.enabled("rpc")
+    assert tvm.runtime.enabled("rpc")
     remote = rpc.connect(host, port)
 
     # Reconfigure the JIT runtime
diff --git a/tutorials/optimize/convolution_opt.py b/tutorials/optimize/convolution_opt.py
index f1e0ba3..e5cf8e5 100644
--- a/tutorials/optimize/convolution_opt.py
+++ b/tutorials/optimize/convolution_opt.py
@@ -58,7 +58,7 @@
 if env.TARGET == "pynq":
 
     # Make sure that TVM was compiled with RPC=1
-    assert tvm.module.enabled("rpc")
+    assert tvm.runtime.enabled("rpc")
     remote = rpc.connect(host, port)
 
     # Reconfigure the JIT runtime
diff --git a/tutorials/optimize/matrix_multiply_opt.py b/tutorials/optimize/matrix_multiply_opt.py
index b20094a..2d54b97 100644
--- a/tutorials/optimize/matrix_multiply_opt.py
+++ b/tutorials/optimize/matrix_multiply_opt.py
@@ -57,7 +57,7 @@
 if env.TARGET == "pynq":
 
     # Make sure that TVM was compiled with RPC=1
-    assert tvm.module.enabled("rpc")
+    assert tvm.runtime.enabled("rpc")
     remote = rpc.connect(host, port)
 
     # Reconfigure the JIT runtime
diff --git a/tutorials/vta_get_started.py b/tutorials/vta_get_started.py
index 93a0add..dd30515 100644
--- a/tutorials/vta_get_started.py
+++ b/tutorials/vta_get_started.py
@@ -78,7 +78,7 @@
 if env.TARGET == "pynq":
 
     # Make sure that TVM was compiled with RPC=1
-    assert tvm.module.enabled("rpc")
+    assert tvm.runtime.enabled("rpc")
     remote = rpc.connect(host, port)
 
     # Reconfigure the JIT runtime