improve profile performance (#69)

* improve profile performance
* support profile nested class
* support total_wasted time
* calculate totalChildrenWasted time for echarts
* only profile super class in the same package
* improve perf through local-tree,local-timer,local-stack
* improve test case
* add Stopwatch remove-child test
* fix can't profile parent class with 2 class not in the same package
* add totalChildrenTimes
* improve LocalTimer perf
* add PerfUtil.useLightStopwatch(true)
* define behavior of switching light-stopwatch process
* ignore other threads if profileSingleThread(true) with multi-threads
* improve the empty judgment the first call in useLightStopwatch()

Change-Id: Id1ae075ddffec77b95b31142d7ebcdae87371943
diff --git a/src/main/java/com/baidu/hugegraph/perf/LightStopwatch.java b/src/main/java/com/baidu/hugegraph/perf/LightStopwatch.java
new file mode 100644
index 0000000..b57ae37
--- /dev/null
+++ b/src/main/java/com/baidu/hugegraph/perf/LightStopwatch.java
@@ -0,0 +1,188 @@
+/*
+ * Copyright 2017 HugeGraph Authors
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to You under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ */
+
+package com.baidu.hugegraph.perf;
+
+import java.util.List;
+
+import com.baidu.hugegraph.perf.PerfUtil.FastMap;
+
+public final class LightStopwatch implements Stopwatch {
+
+    private long lastStartTime = -1L;
+
+    private long times = 0L;
+    private long totalCost = 0L;
+    private long totalChildrenTimes = -1L;
+
+    private final String name;
+    private final Path parent;
+    private final Path id;
+    private final FastMap<String, Stopwatch> children;
+
+    public LightStopwatch(String name, Stopwatch parent) {
+        this(name, parent.id());
+        parent.child(name, this);
+    }
+
+    public LightStopwatch(String name, Path parent) {
+        this.name = name;
+        this.parent = parent;
+        this.id = Stopwatch.id(parent, name);
+        this.children = new FastMap<>();
+    }
+
+    @Override
+    public Path id() {
+        return this.id;
+    }
+
+    @Override
+    public String name() {
+        return this.name;
+    }
+
+    @Override
+    public Path parent() {
+        return this.parent;
+    }
+
+    @Override
+    public void lastStartTime(long startTime) {
+        this.lastStartTime = startTime;
+    }
+
+    @Override
+    public void startTime(long startTime) {
+        this.times++;
+        this.lastStartTime = startTime;
+    }
+
+    @Override
+    public void endTime(long startTime) {
+        this.totalCost += PerfUtil.now() - this.lastStartTime;
+    }
+
+    @Override
+    public long times() {
+        return this.times;
+    }
+
+    @Override
+    public long totalTimes() {
+        if (this.totalChildrenTimes > 0L) {
+            return this.times + this.totalChildrenTimes;
+        }
+        return this.times;
+    }
+
+    @Override
+    public long totalChildrenTimes() {
+        return this.totalChildrenTimes;
+    }
+
+    @Override
+    public long totalCost() {
+        return this.totalCost;
+    }
+
+    @Override
+    public void totalCost(long totalCost) {
+        this.totalCost = totalCost;
+    }
+
+    @Override
+    public long minCost() {
+        return -1L;
+    }
+
+    @Override
+    public long maxCost() {
+        return -1L;
+    }
+
+    @Override
+    public long totalWasted() {
+        return 0L;
+    }
+
+    @Override
+    public long totalSelfWasted() {
+        return 0L;
+    }
+
+    @Override
+    public long totalChildrenWasted() {
+        return -1L;
+    }
+
+    @Override
+    public void fillChildrenTotal(List<Stopwatch> children) {
+        // Fill total times of children
+        this.totalChildrenTimes = children.stream().mapToLong(
+                                  c -> c.totalTimes()).sum();
+    }
+
+    @Override
+    public LightStopwatch copy() {
+        try {
+            return (LightStopwatch) super.clone();
+        } catch (CloneNotSupportedException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    @Override
+    public Stopwatch child(String name) {
+        return this.children.get(name);
+    }
+
+    @Override
+    public Stopwatch child(String name, Stopwatch watch) {
+        if (watch == null) {
+            return this.children.remove(name);
+        }
+        return this.children.put(name, watch);
+    }
+
+    @Override
+    public boolean empty() {
+        return this.children.size() == 0;
+    }
+
+    @Override
+    public void clear() {
+        this.lastStartTime = -1L;
+
+        this.times = 0L;
+        this.totalCost = 0L;
+        this.totalChildrenTimes = -1L;
+
+        this.children.clear();
+    }
+
+    @Override
+    public String toString() {
+        return String.format("{parent:%s,name:%s," +
+                             "times:%s,totalChildrenTimes:%s,totalCost:%s}",
+                             this.parent, this.name,
+                             this.times, this.totalChildrenTimes,
+                             this.totalCost);
+    }
+}
diff --git a/src/main/java/com/baidu/hugegraph/perf/NormalStopwatch.java b/src/main/java/com/baidu/hugegraph/perf/NormalStopwatch.java
new file mode 100644
index 0000000..c097cad
--- /dev/null
+++ b/src/main/java/com/baidu/hugegraph/perf/NormalStopwatch.java
@@ -0,0 +1,306 @@
+/*
+ * Copyright 2017 HugeGraph Authors
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to You under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ */
+
+package com.baidu.hugegraph.perf;
+
+import java.util.List;
+import java.util.function.BiFunction;
+
+import org.slf4j.Logger;
+
+import com.baidu.hugegraph.perf.PerfUtil.FastMap;
+import com.baidu.hugegraph.perf.PerfUtil.LocalStack;
+import com.baidu.hugegraph.testutil.Whitebox;
+import com.baidu.hugegraph.util.Log;
+
+public final class NormalStopwatch implements Stopwatch {
+
+    private static final Logger LOG = Log.logger(Stopwatch.class);
+
+    private static final String MULTI_THREAD_ACCESS_ERROR =
+                         "There may be multi-threaded access, ensure " +
+                         "not call PerfUtil.profileSingleThread(true) when " +
+                         "multithreading.";
+
+    private long lastStartTime = -1L;
+
+    private long times = 0L;
+    private long totalCost = 0L;
+    private long minCost = Long.MAX_VALUE;
+    private long maxCost = 0L;
+    private long totalSelfWasted = 0L;
+    private long totalChildrenWasted = -1L;
+    private long totalChildrenTimes = -1L;
+
+    private final String name;
+    private final Path parent;
+    private final Path id;
+    private final FastMap<String, Stopwatch> children;
+
+    public NormalStopwatch(String name, Stopwatch parent) {
+        this(name, parent.id());
+        parent.child(name, this);
+    }
+
+    public NormalStopwatch(String name, Path parent) {
+        this.name = name;
+        this.parent = parent;
+        this.id = Stopwatch.id(parent, name);
+        this.children = new FastMap<>();
+    }
+
+    @Override
+    public Path id() {
+        return this.id;
+    }
+
+    @Override
+    public String name() {
+        return this.name;
+    }
+
+    @Override
+    public Path parent() {
+        return this.parent;
+    }
+
+    @Override
+    public void lastStartTime(long startTime) {
+        this.lastStartTime = startTime;
+    }
+
+    @Override
+    public void startTime(long startTime) {
+        assert this.lastStartTime == -1L : MULTI_THREAD_ACCESS_ERROR;
+
+        this.times++;
+        this.lastStartTime = startTime;
+
+        long endTime = PerfUtil.now();
+        long wastedTime = endTime - startTime;
+        if (wastedTime <= 0L) {
+            wastedTime += eachStartWastedLost;
+        }
+
+        this.totalSelfWasted += wastedTime;
+    }
+
+    @Override
+    public void endTime(long startTime) {
+        assert startTime >= this.lastStartTime && this.lastStartTime != -1L :
+               MULTI_THREAD_ACCESS_ERROR;
+
+        long endTime = PerfUtil.now();
+        // The following code cost about 3ns~4ns
+        long wastedTime = endTime - startTime;
+        if (wastedTime <= 0L) {
+            wastedTime += eachEndWastedLost;
+        }
+
+        long cost = endTime - this.lastStartTime;
+
+        if (this.minCost > cost) {
+            this.minCost = cost;
+        }
+        if (this.maxCost < cost) {
+            this.maxCost = cost;
+        }
+
+        this.totalCost += cost;
+        this.totalSelfWasted += wastedTime;
+        this.lastStartTime = -1L;
+    }
+
+    @Override
+    public long times() {
+        return this.times;
+    }
+
+    @Override
+    public long totalCost() {
+        return this.totalCost;
+    }
+
+    @Override
+    public void totalCost(long totalCost) {
+        this.totalCost = totalCost;
+    }
+
+    @Override
+    public long minCost() {
+        return this.minCost;
+    }
+
+    @Override
+    public long maxCost() {
+        return this.maxCost;
+    }
+
+    @Override
+    public long totalTimes() {
+        if (this.totalChildrenTimes > 0L) {
+            return this.times + this.totalChildrenTimes;
+        }
+        return this.times;
+    }
+
+    @Override
+    public long totalChildrenTimes() {
+        return this.totalChildrenTimes;
+    }
+
+    @Override
+    public long totalWasted() {
+        if (this.totalChildrenWasted > 0L) {
+            return this.totalSelfWasted + this.totalChildrenWasted;
+        }
+        return this.totalSelfWasted;
+    }
+
+    @Override
+    public long totalSelfWasted() {
+        return this.totalSelfWasted;
+    }
+
+    @Override
+    public long totalChildrenWasted() {
+        return this.totalChildrenWasted;
+    }
+
+    @Override
+    public void fillChildrenTotal(List<Stopwatch> children) {
+        // Fill total wasted cost of children
+        this.totalChildrenWasted = children.stream().mapToLong(
+                                   c -> c.totalWasted()).sum();
+        // Fill total times of children
+        this.totalChildrenTimes = children.stream().mapToLong(
+                                  c -> c.totalTimes()).sum();
+    }
+
+    @Override
+    public Stopwatch copy() {
+        try {
+            return (Stopwatch) super.clone();
+        } catch (CloneNotSupportedException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    @Override
+    public Stopwatch child(String name) {
+        return this.children.get(name);
+    }
+
+    @Override
+    public Stopwatch child(String name, Stopwatch watch) {
+        if (watch == null) {
+            return this.children.remove(name);
+        }
+        return this.children.put(name, watch);
+    }
+
+    @Override
+    public boolean empty() {
+        return this.children.size() == 0;
+    }
+
+    @Override
+    public void clear() {
+        this.lastStartTime = -1L;
+
+        this.times = 0L;
+        this.totalCost = 0L;
+
+        this.minCost = Long.MAX_VALUE;
+        this.maxCost = 0L;
+        this.totalSelfWasted = 0L;
+        this.totalChildrenWasted = -1L;
+        this.totalChildrenTimes = -1L;
+
+        this.children.clear();
+    }
+
+    @Override
+    public String toString() {
+        return String.format("{parent:%s,name:%s," +
+                             "times:%s,totalChildrenTimes:%s," +
+                             "totalCost:%s,minCost:%s,maxCost:%s," +
+                             "totalSelfWasted:%s,totalChildrenWasted:%s}",
+                             this.parent, this.name,
+                             this.times, this.totalChildrenTimes,
+                             this.totalCost, this.minCost, this.maxCost,
+                             this.totalSelfWasted, this.totalChildrenWasted);
+    }
+
+    private static long eachStartWastedLost = 0L;
+    private static long eachEndWastedLost = 0L;
+
+    protected static void initEachWastedLost() {
+        int times = 100000000;
+
+        LocalStack<Stopwatch> callStack = Whitebox.getInternalState(
+                                          PerfUtil.instance(), "callStack");
+
+        long baseStart = PerfUtil.now();
+        for (int i = 0; i < times; i++) {
+            PerfUtil.instance();
+        }
+        long baseCost = PerfUtil.now() - baseStart;
+
+        BiFunction<String, Runnable, Long> testEachCost = (name, test) -> {
+            long start = PerfUtil.now();
+            test.run();
+            long end = PerfUtil.now();
+            long cost = end - start - baseCost;
+            assert cost > 0;
+            long eachCost = cost / times;
+
+            LOG.info("Wasted time test: cost={}ms, base_cost={}ms, {}={}ns",
+                     cost / 1000000.0, baseCost / 1000000.0, name, eachCost);
+            return eachCost;
+        };
+
+        String startName = "each_start_cost";
+        eachStartWastedLost = testEachCost.apply(startName, () -> {
+            Stopwatch watch = PerfUtil.instance().start(startName);
+            PerfUtil.instance().end(startName);
+            for (int i = 0; i < times; i++) {
+                // Test call start()
+                PerfUtil.instance().start(startName);
+                // Mock end()
+                watch.lastStartTime(-1L);
+                callStack.pop();
+            }
+        });
+
+        String endName = "each_end_cost";
+        eachEndWastedLost = testEachCost.apply(endName, () -> {
+            Stopwatch watch = PerfUtil.instance().start(endName);
+            PerfUtil.instance().end(endName);
+            for (int i = 0; i < times; i++) {
+                // Mock start()
+                callStack.push(watch);
+                watch.lastStartTime(0L);
+                // Test call start()
+                PerfUtil.instance().end(endName);
+                watch.totalCost(0L);
+            }
+        });
+    }
+}
diff --git a/src/main/java/com/baidu/hugegraph/perf/PerfUtil.java b/src/main/java/com/baidu/hugegraph/perf/PerfUtil.java
index 1cf22ae..e8ea2ea 100644
--- a/src/main/java/com/baidu/hugegraph/perf/PerfUtil.java
+++ b/src/main/java/com/baidu/hugegraph/perf/PerfUtil.java
@@ -19,12 +19,11 @@
 
 package com.baidu.hugegraph.perf;
 
-import java.io.IOException;
 import java.lang.annotation.ElementType;
 import java.lang.annotation.Retention;
 import java.lang.annotation.RetentionPolicy;
 import java.lang.annotation.Target;
-import java.security.InvalidParameterException;
+import java.util.EmptyStackException;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
@@ -32,14 +31,17 @@
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
-import java.util.Stack;
+import java.util.concurrent.CountDownLatch;
 import java.util.function.BiConsumer;
+import java.util.function.Function;
+import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
 import org.slf4j.Logger;
 
 import com.baidu.hugegraph.func.TriFunction;
-import com.baidu.hugegraph.util.E;
+import com.baidu.hugegraph.perf.Stopwatch.Path;
+import com.baidu.hugegraph.testutil.Assert.ThrowableConsumer;
 import com.baidu.hugegraph.util.Log;
 import com.baidu.hugegraph.util.ReflectionUtil;
 import com.google.common.reflect.ClassPath.ClassInfo;
@@ -48,22 +50,36 @@
 import javassist.ClassPool;
 import javassist.CtClass;
 import javassist.CtMethod;
-import javassist.NotFoundException;
 
-public class PerfUtil {
+public final class PerfUtil {
 
     private static final Logger LOG = Log.logger(PerfUtil.class);
+    private static final int DEFAUL_CAPATICY = 1024;
+
     private static final ThreadLocal<PerfUtil> INSTANCE = new ThreadLocal<>();
 
-    private final Map<String, Stopwatch> stopwatches;
-    private final Stack<String> callStack;
+    private static PerfUtil SINGLE_INSTANCE = null;
+    private static Thread SINGLE_THREAD = null;
+    private static LocalTimer LOCAL_TIMER = null;
+    private static boolean LIGHT_WATCH = false;
+
+    private final Map<Path, Stopwatch> stopwatches;
+    private final LocalStack<Stopwatch> callStack;
+    private final Stopwatch root;
 
     private PerfUtil() {
-        this.stopwatches = new HashMap<>();
-        this.callStack = new Stack<>();
+        this.stopwatches = new HashMap<>(DEFAUL_CAPATICY);
+        this.callStack = new LocalStack<>(DEFAUL_CAPATICY);
+        this.root = newStopwatch(Path.ROOT_NAME, Path.EMPTY);
     }
 
     public static PerfUtil instance() {
+        if (SINGLE_INSTANCE != null &&
+            SINGLE_THREAD == Thread.currentThread()) {
+            // Return the only one instance for single thread, for performance
+            return SINGLE_INSTANCE;
+        }
+
         PerfUtil p = INSTANCE.get();
         if (p == null) {
             p = new PerfUtil();
@@ -72,81 +88,182 @@
         return p;
     }
 
-    private static long now() {
+    public static void profileSingleThread(boolean yes) {
+        SINGLE_INSTANCE = yes ? PerfUtil.instance() : null;
+        SINGLE_THREAD = yes ? Thread.currentThread() : null;
+    }
+
+    public static void useLocalTimer(boolean yes) {
+        if (yes) {
+            if (LOCAL_TIMER != null) {
+                return;
+            }
+            LOCAL_TIMER = new LocalTimer();
+            try {
+                LOCAL_TIMER.startTimeUpdateLoop();
+            } catch (InterruptedException e) {
+                throw new RuntimeException(e);
+            }
+
+            if (!LIGHT_WATCH) {
+                NormalStopwatch.initEachWastedLost();
+            }
+        } else {
+            if (LOCAL_TIMER == null) {
+                return;
+            }
+            try {
+                LOCAL_TIMER.stop();
+            } catch (InterruptedException e) {
+                throw new RuntimeException(e);
+            } finally {
+                LOCAL_TIMER = null;
+            }
+        }
+    }
+
+    public static void useLightStopwatch(boolean yes) {
+        if (yes != LIGHT_WATCH) {
+            PerfUtil instance = INSTANCE.get();
+            boolean empty = instance == null || instance.empty();
+            String message = "Please call clear() before switching " +
+                             "light-stopwatch due to there is dirty watch";
+            com.baidu.hugegraph.util.E.checkArgument(empty, message);
+        }
+        LIGHT_WATCH = yes;
+    }
+
+    protected static long now() {
+        if (LOCAL_TIMER != null) {
+            return LOCAL_TIMER.now();
+        }
+        // System.nanoTime() cost about 40 ns each call
         return System.nanoTime();
     }
 
-    public boolean start(String name) {
-        String parent = this.callStack.empty() ? "" : this.callStack.peek();
-        Stopwatch item = this.stopwatches.get(Stopwatch.id(parent, name));
-        if (item == null) {
-            item = new Stopwatch(name, parent);
-            this.stopwatches.put(item.id(), item);
-        }
-        this.callStack.push(item.id());
-        item.startTime(now());
-
-        return true; // just for assert
+    protected static Stopwatch newStopwatch(String name, Path parent) {
+        return LIGHT_WATCH ? new LightStopwatch(name, parent) :
+                             new NormalStopwatch(name, parent);
     }
 
-    public boolean end(String name) {
-        long time = now();
-        String current = this.callStack.pop();
-        assert current.endsWith(name);
+    protected static Stopwatch newStopwatch(String name, Stopwatch parent) {
+        return LIGHT_WATCH ? new LightStopwatch(name, parent) :
+                             new NormalStopwatch(name, parent);
+    }
 
-        String parent = this.callStack.empty() ? "" : this.callStack.peek();
-        Stopwatch item = this.stopwatches.get(Stopwatch.id(parent, name));
-        if (item == null) {
-            throw new InvalidParameterException(name);
+    public Stopwatch start(String name) {
+        long start = now();
+
+        Stopwatch parent = this.callStack.empty() ?
+                           this.root : this.callStack.peek();
+
+        // Get watch by name from local tree
+        Stopwatch watch = parent.child(name);
+        if (watch == null) {
+            watch = newStopwatch(name, parent);
+            assert !this.stopwatches.containsKey(watch.id()) : watch;
+            this.stopwatches.put(watch.id(), watch);
         }
-        item.endTime(time);
+        this.callStack.push(watch);
 
-        return true;
+        watch.startTime(start);
+
+        return watch;
+    }
+
+    public Stopwatch start2(String name) {
+        long start = now(); // cost 70 ns with System.nanoTime()
+
+        Path parent = this.callStack.empty() ?
+                      Path.EMPTY : this.callStack.peek().id();
+        Path id = Stopwatch.id(parent, name); // cost 130
+        // Get watch by id from global map
+        Stopwatch watch = this.stopwatches.get(id); // cost 170
+        if (watch == null) {
+            watch = newStopwatch(name, parent);
+            this.stopwatches.put(watch.id(), watch); // cost 180
+        }
+        this.callStack.push(watch); // cost 190
+
+        watch.startTime(start);
+
+        return watch;
+    }
+
+    public void end(String name) {
+        long start = LIGHT_WATCH ? 0L : now();
+
+        Stopwatch watch = this.callStack.pop();
+        if (watch == null || watch.name() != name) {
+            throw new IllegalArgumentException("Invalid watch name: " + name);
+        }
+
+        watch.endTime(start);
+    }
+
+    public boolean empty() {
+        return this.stopwatches.isEmpty() && this.root.empty();
     }
 
     public void clear() {
-        E.checkState(this.callStack.empty(),
-                     "Can't be cleared when the call has not ended yet");
+        String error = "Can't be cleared when the call has not ended yet";
+        com.baidu.hugegraph.util.E.checkState(this.callStack.empty(), error);
+
         this.stopwatches.clear();
+        this.root.clear();
     }
 
-    public void profilePackage(String... packages)
-                               throws NotFoundException, IOException,
-                               ClassNotFoundException, CannotCompileException {
+    public void profilePackage(String... packages) throws Throwable {
         Set<String> loadedClasses = new HashSet<>();
 
+        Function<String, Boolean> inPackage = (cls) -> {
+            for (String pkg : packages) {
+                if (cls.startsWith(pkg)) {
+                    return true;
+                }
+            }
+            return false;
+        };
+
+        ThrowableConsumer<String> profileClassIfPresent = (cls) -> {
+            if (!loadedClasses.contains(cls)) {
+                // Profile super class
+                for (String s : ReflectionUtil.superClasses(cls)) {
+                    if (!loadedClasses.contains(s) && inPackage.apply(s)) {
+                        profileClass(s);
+                        loadedClasses.add(s);
+                    }
+                }
+                // Profile self class
+                profileClass(cls);
+                loadedClasses.add(cls);
+            }
+        };
+
         Iterator<ClassInfo> classes = ReflectionUtil.classes(packages);
         while (classes.hasNext()) {
             String cls = classes.next().getName();
-            // super class first
-            for (String s : ReflectionUtil.superClasses(cls)) {
-                if (!loadedClasses.contains(s)) {
-                    profileClass(s);
-                    loadedClasses.add(s);
-                }
-            }
-            // self class
-            if (!loadedClasses.contains(cls)) {
-                profileClass(cls);
-                loadedClasses.add(cls);
+            // Profile self class
+            profileClassIfPresent.accept(cls);
+            // Profile nested class
+            for (String s : ReflectionUtil.nestedClasses(cls)) {
+                profileClassIfPresent.accept(s);
             }
         }
     }
 
-    public void profileClass(String... classes)
-                             throws NotFoundException, CannotCompileException,
-                             ClassNotFoundException {
+    public void profileClass(String... classes) throws Throwable {
         ClassPool classPool = ClassPool.getDefault();
 
         for (String cls : classes) {
             CtClass ctClass = classPool.get(cls);
             List<CtMethod> methods = ReflectionUtil.getMethodsAnnotatedWith(
-                    ctClass, Watched.class, false);
+                                     ctClass, Watched.class, false);
             for (CtMethod method : methods) {
                 profile(method);
             }
 
-            // load class and make it effective
+            // Load class and make it effective
             if (!methods.isEmpty()) {
                 ctClass.toClass();
             }
@@ -185,7 +302,7 @@
     public String toJson() {
         StringBuilder sb = new StringBuilder(8 + this.stopwatches.size() * 96);
         sb.append('{');
-        for (Map.Entry<String, Stopwatch> w : this.stopwatches.entrySet()) {
+        for (Map.Entry<Path, Stopwatch> w : this.stopwatches.entrySet()) {
             sb.append('"');
             sb.append(w.getKey());
             sb.append('"');
@@ -205,9 +322,8 @@
 
     // TODO: move toECharts() method out of this class
     public String toECharts() {
-
         TriFunction<Integer, Integer, List<Stopwatch>, String> formatLevel = (
-                totalDepth, depth, items) -> {
+                    totalDepth, depth, items) -> {
             float factor = 100.0f / (totalDepth + 1);
             float showFactor = 1 + (totalDepth - depth) / (float) depth;
 
@@ -224,29 +340,17 @@
             sb.append(String.format("radius: ['%s%%', '%s%%'],",
                                     radiusFrom, radiusTo));
             sb.append(String.format(
-                    "label: {normal: {position: 'inner', formatter:" +
-                    "function(params) {" +
-                    "  if (params.percent > %s) return params.data.name;" +
-                    "  else return '';" +
-                    "}}},", showFactor));
+                      "label: {normal: {position: 'inner', formatter:" +
+                      "function(params) {" +
+                      "  if (params.percent > %s) return params.data.name;" +
+                      "  else return '';" +
+                      "}}},", showFactor));
             sb.append("data: [");
 
             items.sort((i, j) -> i.id().compareTo(j.id()));
             for (Stopwatch w : items) {
                 sb.append('{');
 
-                sb.append("value:");
-                sb.append(w.totalCost() / 1000000.0);
-                sb.append(',');
-
-                sb.append("min:");
-                sb.append(w.minCost());
-                sb.append(',');
-
-                sb.append("max:");
-                sb.append(w.maxCost());
-                sb.append(',');
-
                 sb.append("id:'");
                 sb.append(w.id());
                 sb.append("',");
@@ -255,8 +359,36 @@
                 sb.append(w.name());
                 sb.append("',");
 
+                sb.append("value:");
+                sb.append(w.totalCost()); // w.totalCost() - w.totalWasted() ?
+                sb.append(',');
+
+                sb.append("cost:");
+                sb.append(w.totalCost() / 1000000.0);
+                sb.append(',');
+
+                sb.append("minCost:");
+                sb.append(w.minCost());
+                sb.append(',');
+
+                sb.append("maxCost:");
+                sb.append(w.maxCost());
+                sb.append(',');
+
+                sb.append("wasted:");
+                sb.append(w.totalWasted() / 1000000.0);
+                sb.append(',');
+
+                sb.append("selfWasted:");
+                sb.append(w.totalSelfWasted() / 1000000.0);
+                sb.append(',');
+
                 sb.append("times:");
                 sb.append(w.times());
+                sb.append(',');
+
+                sb.append("totalTimes:");
+                sb.append(w.totalTimes());
 
                 sb.append('}');
                 sb.append(',');
@@ -268,51 +400,88 @@
             return sb.toString();
         };
 
+        BiConsumer<List<Stopwatch>, List<Stopwatch>> fillChildrenTotal =
+                                    (itemsOfLn, itemsOfLnParent) -> {
+            for (Stopwatch parent : itemsOfLnParent) {
+                List<Stopwatch> children = itemsOfLn.stream().filter(c -> {
+                    return c.parent().equals(parent.id());
+                }).collect(Collectors.toList());
+
+                parent.fillChildrenTotal(children);
+            }
+        };
+
         BiConsumer<List<Stopwatch>, List<Stopwatch>> fillOther =
-            (itemsOfI, parents) -> {
-            for (Stopwatch parent : parents) {
-                Stream<Stopwatch> children = itemsOfI.stream().filter(c -> {
+                                    (itemsOfLn, itemsOfLnParent) -> {
+            for (Stopwatch parent : itemsOfLnParent) {
+                Stream<Stopwatch> children = itemsOfLn.stream().filter(c -> {
                     return c.parent().equals(parent.id());
                 });
-                long sum = children.mapToLong(c -> c.totalCost()).sum();
-                if (sum < parent.totalCost()) {
-                    Stopwatch other = new Stopwatch("~", parent.id());
-                    other.totalCost(parent.totalCost() - sum);
-                    itemsOfI.add(other);
+                // Fill other cost
+                long sumCost = children.mapToLong(c -> c.totalCost()).sum();
+                long otherCost = parent.totalCost() - sumCost;
+                if (otherCost > 0L) {
+                    Stopwatch other = newStopwatch("~", parent.id());
+                    other.totalCost(otherCost);
+                    itemsOfLn.add(other);
                 }
             }
         };
 
-        Map<String, Stopwatch> items = this.stopwatches;
+        Map<Path, Stopwatch> items = this.stopwatches;
         Map<Integer, List<Stopwatch>> levelItems = new HashMap<>();
-        int maxDepth = 1;
-        for (Map.Entry<String, Stopwatch> e : items.entrySet()) {
-            int depth = e.getKey().split("/").length;
-            levelItems.putIfAbsent(depth, new LinkedList<>());
-            levelItems.get(depth).add(e.getValue().copy());
+        int maxDepth = 0;
+        for (Map.Entry<Path, Stopwatch> e : items.entrySet()) {
+            int depth = e.getKey().toString().split("/").length;
+            List<Stopwatch> levelItem = levelItems.get(depth);
+            if (levelItem == null) {
+                levelItem = new LinkedList<>();
+                levelItems.putIfAbsent(depth, levelItem);
+            }
+            levelItem.add(e.getValue().copy());
             if (depth > maxDepth) {
                 maxDepth = depth;
             }
         }
 
+        // Fill wasted cost from the outermost to innermost
+        for (int i = maxDepth; i > 0; i--) {
+            assert levelItems.containsKey(i) : i;
+            List<Stopwatch> itemsOfI = levelItems.get(i);
+            List<Stopwatch> itemsOfParent = levelItems.get(i - 1);
+            if (itemsOfParent != null) {
+                // Fill total value of children
+                fillChildrenTotal.accept(itemsOfI, itemsOfParent);
+            }
+        }
+
         StringBuilder sb = new StringBuilder(8 + items.size() * 128);
+        // Output results header
         sb.append("{");
         sb.append("tooltip: {trigger: 'item', " +
             "formatter: function(params) {" +
-            "    return params.data.name + ' ' + params.percent + '% <br/>'" +
-            "        + 'cost: ' + params.data.value + ' (ms) <br/>'" +
-            "        + 'min: ' + params.data.min + ' (ns) <br/>'" +
-            "        + 'max: ' + params.data.max + ' (ns) <br/>'" +
-            "        + 'times: ' + params.data.times + '<br/>'" +
-            "       + params.data.id + '<br/>';" +
+            "  return params.data.name + ' ' + params.percent + '% <br/>'" +
+            "    + 'cost: ' + params.data.cost + ' (ms) <br/>'" +
+            "    + 'min cost: ' + params.data.minCost + ' (ns) <br/>'" +
+            "    + 'max cost: ' + params.data.maxCost + ' (ns) <br/>'" +
+            "    + 'wasted: ' + params.data.wasted + ' (ms) <br/>'" +
+            "    + 'self wasted: ' + params.data.selfWasted + ' (ms) <br/>'" +
+            "    + 'times: ' + params.data.times + '<br/>'" +
+            "    + 'total times: ' + params.data.totalTimes + '<br/>'" +
+            "    + 'path: ' + params.data.id + '<br/>';" +
             "}");
         sb.append("},");
         sb.append("series: [");
-        for (int i = 1; levelItems.containsKey(i); i++) {
+        // Output results data
+        for (int i = 1; i <= maxDepth; i++) {
+            assert levelItems.containsKey(i) : i;
             List<Stopwatch> itemsOfI = levelItems.get(i);
-            if (i > 1) {
-                fillOther.accept(itemsOfI, levelItems.get(i - 1));
+            List<Stopwatch> itemsOfParent = levelItems.get(i - 1);
+            if (itemsOfParent != null) {
+                // Fill other cost for non-root level, ignore root level (i=1)
+                fillOther.accept(itemsOfI, itemsOfParent);
             }
+            // Output items of level I
             sb.append(formatLevel.apply(maxDepth, i, itemsOfI));
             sb.append(',');
         }
@@ -324,8 +493,193 @@
         return sb.toString();
     }
 
+    public static final class LocalTimer {
+
+        // Header: 4 bytes classptr + 8 bytes markword
+        private volatile long padding11 = 0L;
+        private volatile long padding12 = 0L;
+        private volatile long padding13 = 0L;
+        private volatile long padding14 = 0L;
+        private volatile long padding15 = 0L;
+        private volatile long padding16 = 0L; // the 1st 64 bytes
+
+        private volatile long time = 0L;
+
+        private volatile long padding21 = 0L;
+        private volatile long padding22 = 0L;
+        private volatile long padding23 = 0L;
+        private volatile long padding24 = 0L;
+        private volatile long padding25 = 0L;
+        private volatile long padding26 = 0L;
+        private volatile long padding27 = 0L; // the 2nd 64 bytes
+
+        private volatile boolean running = false;
+        private Thread thread = null;
+
+        public long now() {
+            // Read current ns time (be called frequently)
+            return this.time;
+        }
+
+        public void startTimeUpdateLoop() throws InterruptedException {
+            assert this.thread == null;
+            assert this.preventOptimizePadding() == 0L;
+            this.running = true;
+            CountDownLatch started = new CountDownLatch(1);
+            this.thread = new Thread(() -> {
+                started.countDown();
+                while (this.running) {
+                    this.time = System.nanoTime();
+                    // Prevent frequent updates for perf (5.2s => 3.6s for 8kw)
+                    Thread.yield();
+                }
+            }, "LocalTimer");
+            this.thread.setDaemon(true);
+            this.thread.start();
+            started.await();
+        }
+
+        public void stop() throws InterruptedException {
+            this.running = false;
+            if (this.thread != null) {
+                this.thread.join();
+            }
+        }
+
+        public long preventOptimizePadding() {
+            long p1 = this.padding11 + this.padding12 + this.padding13 +
+                      this.padding14 + this.padding15 + this.padding16;
+            long p2 = this.padding21 + this.padding22 + this.padding23 +
+                      this.padding24 + this.padding25 + this.padding26 +
+                      this.padding27;
+            return p1 + p2;
+        }
+    }
+
+    public static final class LocalStack<E> {
+
+        private final Object[] elementData;
+        private int elementCount;
+
+        public LocalStack(int capacity) {
+            this.elementData = new Object[capacity];
+            this.elementCount = 0;
+        }
+
+        int size() {
+            return this.elementCount;
+        }
+
+        boolean empty() {
+            return this.elementCount == 0;
+        }
+
+        public void push(E elem) {
+            this.elementData[this.elementCount++] = elem;
+        }
+
+        public E pop() {
+            if (this.elementCount == 0) {
+                throw new EmptyStackException();
+            }
+            this.elementCount--;
+            @SuppressWarnings("unchecked")
+            E elem = (E) this.elementData[this.elementCount];
+            this.elementData[this.elementCount] = null;
+            return elem;
+        }
+
+        public E peek() {
+            if (this.elementCount == 0) {
+                throw new EmptyStackException();
+            }
+            @SuppressWarnings("unchecked")
+            E elem = (E) this.elementData[this.elementCount - 1];
+            return elem;
+        }
+    }
+
+    public static final class FastMap<K, V> {
+
+        private final Map<K, V> hashMap;
+
+        private K key1;
+        private K key2;
+        private K key3;
+
+        private V val1;
+        private V val2;
+        private V val3;
+
+        public FastMap() {
+            this.hashMap = new HashMap<>();
+        }
+
+        public int size() {
+            return this.hashMap.size();
+        }
+
+        public boolean containsKey(Object key) {
+            return this.hashMap.containsKey(key);
+        }
+
+        public V get(Object key) {
+            if (key == this.key1) {
+                return this.val1;
+            } else if (key == this.key2) {
+                return this.val2;
+            } else if (key == this.key3) {
+                return this.val3;
+            }
+
+            return this.hashMap.get(key);
+        }
+
+        public V put(K key, V value) {
+            if (this.key1 == null) {
+                this.key1 = key;
+                this.val1 = value;
+            } else if (this.key2 == null) {
+                this.key2 = key;
+                this.val2 = value;
+            } else if (this.key3 == null) {
+                this.key3 = key;
+                this.val3 = value;
+            }
+
+            return this.hashMap.put(key, value);
+        }
+
+        public V remove(Object key) {
+            if (key == this.key1) {
+                this.key1 = null;
+                this.val1 = null;
+            } else if (key == this.key2) {
+                this.key2 = null;
+                this.val2 = null;
+            } else if (key == this.key3) {
+                this.key3 = null;
+                this.val3 = null;
+            }
+
+            return this.hashMap.remove(key);
+        }
+
+        public void clear() {
+            this.key1 = null;
+            this.key2 = null;
+            this.key3 = null;
+
+            this.val1 = null;
+            this.val2 = null;
+            this.val3 = null;
+
+            this.hashMap.clear();
+        }
+    }
+
     @Retention(RetentionPolicy.RUNTIME)
-    @Target(ElementType.METHOD)
+    @Target({ ElementType.METHOD, ElementType.CONSTRUCTOR })
     public static @interface Watched {
         public String value() default "";
         public String prefix() default "";
diff --git a/src/main/java/com/baidu/hugegraph/perf/Stopwatch.java b/src/main/java/com/baidu/hugegraph/perf/Stopwatch.java
index 500e1e8..a015a08 100644
--- a/src/main/java/com/baidu/hugegraph/perf/Stopwatch.java
+++ b/src/main/java/com/baidu/hugegraph/perf/Stopwatch.java
@@ -19,114 +19,125 @@
 
 package com.baidu.hugegraph.perf;
 
-public class Stopwatch implements Cloneable {
+import java.util.List;
 
-    private long lastStartTime = -1L;
+public interface Stopwatch extends Cloneable {
 
-    private long totalCost = 0L;
-    private long minCost = 0L;
-    private long maxCost = 0L;
+    public Path id();
+    public String name();
+    public Path parent();
 
-    private long times = 0L;
+    public void startTime(long startTime);
+    public void endTime(long startTime);
 
-    private String name;
-    private String parent;
+    public void lastStartTime(long startTime);
 
-    public Stopwatch(String name, String parent) {
-        this.name = name;
-        this.parent = parent;
+    public long times();
+    public long totalTimes();
+    public long totalChildrenTimes();
+
+    public long totalCost();
+    public void totalCost(long otherCost);
+
+    public long minCost();
+    public long maxCost();
+
+    public long totalWasted();
+    public long totalSelfWasted();
+    public long totalChildrenWasted();
+
+    public void fillChildrenTotal(List<Stopwatch> children);
+
+    public Stopwatch copy();
+
+    public Stopwatch child(String name);
+    public Stopwatch child(String name, Stopwatch watch);
+
+    public boolean empty();
+    public void clear();
+
+    public default String toJson() {
+        int len = 200 + this.name().length() + this.parent().length();
+        StringBuilder sb = new StringBuilder(len);
+        sb.append("{");
+        sb.append("\"parent\":\"").append(this.parent()).append("\"");
+        sb.append(",\"name\":\"").append(this.name()).append("\"");
+        sb.append(",\"times\":").append(this.times());
+        sb.append(",\"total_cost\":").append(this.totalCost());
+        sb.append(",\"min_cost\":").append(this.minCost());
+        sb.append(",\"max_cost\":").append(this.maxCost());
+        sb.append(",\"total_self_wasted\":").append(this.totalSelfWasted());
+        sb.append(",\"total_children_wasted\":").append(
+                                                 this.totalChildrenWasted());
+        sb.append(",\"total_children_times\":").append(
+                                                 this.totalChildrenTimes());
+        sb.append("}");
+        return sb.toString();
     }
 
-    public String id() {
-        return Stopwatch.id(this.parent, this.name);
-    }
-
-    public static String id(String parent, String name) {
-        if (parent == null || parent.isEmpty()) {
-            return name;
+    public static Path id(Path parent, String name) {
+        if (parent == Path.EMPTY && name == Path.ROOT_NAME) {
+            return Path.EMPTY;
         }
-        return parent + "/" + name;
+        return new Path(parent, name);
     }
 
-    public String name() {
-        return this.name;
-    }
+    public static final class Path implements Comparable<Path> {
 
-    public String parent() {
-        return this.parent;
-    }
+        public static final String ROOT_NAME = "root";
+        public static final Path EMPTY = new Path("");
 
-    public void startTime(long time) {
-        assert this.lastStartTime == -1L;
+        private final String path;
 
-        this.lastStartTime = time;
-        this.times++;
-    }
-
-    public void endTime(long time) {
-        assert time >= this.lastStartTime && this.lastStartTime != -1L;
-
-        long cost = time - this.lastStartTime;
-        this.totalCost += cost;
-        this.lastStartTime = -1L;
-        this.updateMinMax(cost);
-    }
-
-    protected void updateMinMax(long cost) {
-        if (this.minCost > cost || this.minCost == 0L) {
-            this.minCost = cost;
+        public Path(String self) {
+            this.path = self;
         }
-        if (this.maxCost < cost) {
-            this.maxCost = cost;
+
+        public Path(Path parent, String name) {
+            if (parent == EMPTY) {
+                this.path = name;
+            } else {
+                int len = parent.length() + 1 + name.length();
+                StringBuilder sb = new StringBuilder(len);
+                sb.append(parent.path).append('/').append(name);
+
+                this.path = sb.toString();
+            }
         }
-    }
 
-    protected void totalCost(long totalCost) {
-        this.totalCost = totalCost;
-    }
-
-    public long totalCost() {
-        return this.totalCost;
-    }
-
-    public long minCost() {
-        return this.minCost;
-    }
-
-    public long maxCost() {
-        return this.maxCost;
-    }
-
-    public long times() {
-        return this.times;
-    }
-
-    public Stopwatch copy() {
-        try {
-            return (Stopwatch) super.clone();
-        } catch (CloneNotSupportedException e) {
-            throw new RuntimeException(e);
+        public int length() {
+            return this.path.length();
         }
-    }
 
-    @Override
-    public String toString() {
-        return String.format(
-                "{totalCost:%sms, minCost:%sns, maxCost:%sns, times:%s}",
-                this.totalCost / 1000000.0F,
-                this.minCost, this.maxCost,
-                this.times);
-    }
+        @Override
+        public int hashCode() {
+            return this.path.hashCode();
+        }
 
-    public String toJson() {
-        return String.format("{\"totalCost\":%s, " +
-                "\"minCost\":%s, \"maxCost\":%s, \"times\":%s, " +
-                "\"name\":\"%s\", \"parent\":\"%s\"}",
-                this.totalCost,
-                this.minCost,
-                this.maxCost,
-                this.times,
-                this.name,
-                this.parent);
+        @Override
+        public boolean equals(Object obj) {
+            if (this.hashCode() != obj.hashCode()) {
+                return false;
+            }
+            if (!(obj instanceof Path)) {
+                return false;
+            }
+            Path other = (Path) obj;
+            return this.path.equals(other.path);
+        }
+
+        @Override
+        public int compareTo(Path other) {
+            return this.path.compareTo(other.path);
+        }
+
+        @Override
+        public String toString() {
+            return this.path;
+        }
+
+        public boolean endsWith(String name) {
+            return this.path.endsWith(name);
+        }
     }
 }
diff --git a/src/main/java/com/baidu/hugegraph/testutil/Assert.java b/src/main/java/com/baidu/hugegraph/testutil/Assert.java
index 535f41e..537e9d6 100644
--- a/src/main/java/com/baidu/hugegraph/testutil/Assert.java
+++ b/src/main/java/com/baidu/hugegraph/testutil/Assert.java
@@ -33,6 +33,11 @@
         void run() throws Throwable;
     }
 
+    @FunctionalInterface
+    public interface ThrowableConsumer<T> {
+        void accept(T t) throws Throwable;
+    }
+
     public static void assertThrows(Class<? extends Throwable> throwable,
                                     ThrowableRunnable runnable) {
         assertThrows(throwable, runnable, e -> {
diff --git a/src/main/java/com/baidu/hugegraph/util/ReflectionUtil.java b/src/main/java/com/baidu/hugegraph/util/ReflectionUtil.java
index 7b0eca1..c9092ff 100644
--- a/src/main/java/com/baidu/hugegraph/util/ReflectionUtil.java
+++ b/src/main/java/com/baidu/hugegraph/util/ReflectionUtil.java
@@ -98,13 +98,32 @@
     public static List<String> superClasses(String clazz)
                                             throws NotFoundException {
         CtClass klass = ClassPool.getDefault().get(clazz);
-        klass = klass.getSuperclass();
+        CtClass parent = klass.getSuperclass();
 
         List<String> results = new LinkedList<>();
-        while (klass != null) {
-            results.add(klass.getName());
-            klass = klass.getSuperclass();
+        while (parent != null) {
+            results.add(parent.getName());
+            parent = parent.getSuperclass();
         }
         return Lists.reverse(results);
     }
+
+    public static List<String> nestedClasses(String clazz)
+                                             throws NotFoundException {
+        CtClass klass = ClassPool.getDefault().get(clazz);
+
+        List<String> results = new LinkedList<>();
+        for (CtClass nested : klass.getNestedClasses()) {
+            results.add(nested.getName());
+        }
+        return results;
+    }
+
+    public static String packageName(String clazz) {
+        int offset = clazz.lastIndexOf(".");
+        if (offset > 0) {
+            return clazz.substring(0, offset);
+        }
+        return "";
+    }
 }
diff --git a/src/test/java/com/baidu/hugegraph/unit/UnitTestSuite.java b/src/test/java/com/baidu/hugegraph/unit/UnitTestSuite.java
index 19b12b7..e9e0732 100644
--- a/src/test/java/com/baidu/hugegraph/unit/UnitTestSuite.java
+++ b/src/test/java/com/baidu/hugegraph/unit/UnitTestSuite.java
@@ -49,6 +49,7 @@
 import com.baidu.hugegraph.unit.license.LicenseVerifyParamTest;
 import com.baidu.hugegraph.unit.license.MachineInfoTest;
 import com.baidu.hugegraph.unit.perf.PerfUtilTest;
+import com.baidu.hugegraph.unit.perf.StopwatchTest;
 import com.baidu.hugegraph.unit.rest.RestClientTest;
 import com.baidu.hugegraph.unit.rest.RestResultTest;
 import com.baidu.hugegraph.unit.util.BytesTest;
@@ -83,6 +84,7 @@
     BarrierEventTest.class,
     EventHubTest.class,
     PerfUtilTest.class,
+    StopwatchTest.class,
     RestClientTest.class,
     RestResultTest.class,
     VersionTest.class,
diff --git a/src/test/java/com/baidu/hugegraph/unit/perf/PerfUtilTest.java b/src/test/java/com/baidu/hugegraph/unit/perf/PerfUtilTest.java
index f7a5c52..926d9c8 100644
--- a/src/test/java/com/baidu/hugegraph/unit/perf/PerfUtilTest.java
+++ b/src/test/java/com/baidu/hugegraph/unit/perf/PerfUtilTest.java
@@ -19,23 +19,25 @@
 
 package com.baidu.hugegraph.unit.perf;
 
-import java.io.IOException;
 import java.util.Map;
 
 import org.junit.After;
 import org.junit.Test;
 
 import com.baidu.hugegraph.perf.PerfUtil;
-import com.baidu.hugegraph.testclass.TestClass.Foo;
-import com.baidu.hugegraph.testclass.TestClass.Sub;
 import com.baidu.hugegraph.testutil.Assert;
 import com.baidu.hugegraph.unit.BaseUnitTest;
-import com.fasterxml.jackson.core.JsonParseException;
-import com.fasterxml.jackson.databind.JsonMappingException;
+import com.baidu.hugegraph.unit.perf.testclass.TestClass;
+import com.baidu.hugegraph.unit.perf.testclass.TestLightClass;
+import com.baidu.hugegraph.unit.perf.testclass.TestPerfClass;
+import com.baidu.hugegraph.unit.perf.testclass.TestPerfLightClass;
+import com.baidu.hugegraph.unit.perf.testclass2.TestClass4Package;
 import com.fasterxml.jackson.databind.ObjectMapper;
 
 public class PerfUtilTest extends BaseUnitTest {
 
+    private static final String prefix =
+                                "com.baidu.hugegraph.unit.perf.testclass.";
     private static final PerfUtil perf = PerfUtil.instance();
 
     @After
@@ -44,24 +46,42 @@
     }
 
     @Test
-    public void testPerfUtil() throws Exception {
-        /*
-         * TODO: call profilePackage("com.baidu.hugegraph.testclass") and
-         * remove class Foo. now exception "duplicate class definition" throws
-         * since JUnit loaded class TestClass before testPerfUtil()
-         */
-        perf.profilePackage("com.baidu.hugegraph.testclass");
-        perf.profileClass("com.baidu.hugegraph.testclass.TestClass$Foo");
+    public void testPerfUtil() throws Throwable {
+        perf.profileClass(prefix + "TestClass$Foo");
 
-        Foo obj = new Foo();
+        TestClass.Foo obj = new TestClass.Foo();
         obj.foo();
 
         perf.toString();
         perf.toECharts();
         String json = perf.toJson();
 
-        assertContains(json, "foo.times", 1);
-        assertContains(json, "foo/bar.times", 1);
+        assertContains(json, "foo.foo#times", 1);
+        assertContains(json, "foo.foo#name", "foo.foo");
+        assertContains(json, "foo.foo#parent", "");
+        assertContains(json, "foo.foo#total_cost");
+        assertContains(json, "foo.foo#min_cost");
+        assertContains(json, "foo.foo#max_cost");
+        assertContains(json, "foo.foo#total_self_wasted");
+        assertContains(json, "foo.foo#total_children_wasted", -1);
+        assertContains(json, "foo.foo#total_children_times", -1);
+
+        assertContains(json, "foo.foo/foo.bar#times", 1);
+        assertContains(json, "foo.foo/foo.bar#name", "foo.bar");
+        assertContains(json, "foo.foo/foo.bar#parent", "foo.foo");
+        assertContains(json, "foo.foo/foo.bar#total_cost");
+        assertContains(json, "foo.foo/foo.bar#min_cost");
+        assertContains(json, "foo.foo/foo.bar#max_cost");
+        assertContains(json, "foo.foo/foo.bar#total_self_wasted");
+        assertContains(json, "foo.foo/foo.bar#total_children_wasted", -1);
+        assertContains(json, "foo.foo/foo.bar#total_children_times", -1);
+
+        TestClass test = new TestClass();
+        test.test();
+        json = perf.toJson();
+        assertContains(json, "foo.bar#times", 1);
+        assertContains(json, "foo.foo#times", 1);
+        assertContains(json, "foo.foo/foo.bar#times", 1);
 
         perf.clear();
 
@@ -72,16 +92,204 @@
         perf.toECharts();
         json = perf.toJson();
 
-        assertContains(json, "foo.times", 2);
-        assertContains(json, "foo/bar.times", 2);
+        assertContains(json, "foo.foo#times", 2);
+        assertContains(json, "foo.foo/foo.bar#times", 2);
     }
 
     @Test
-    public void testPerfUtilWithProfileClass() throws Exception {
-        perf.profileClass("com.baidu.hugegraph.testclass.TestClass$Base");
-        perf.profileClass("com.baidu.hugegraph.testclass.TestClass$Sub");
+    public void testPerfUtil4LightStopwatch() throws Throwable {
+        perf.profileClass(prefix + "TestLightClass$Foo");
 
-        Sub obj = new Sub();
+        PerfUtil.useLightStopwatch(true);
+
+        TestLightClass.Foo obj = new TestLightClass.Foo();
+        obj.foo();
+
+        perf.toString();
+        perf.toECharts();
+        String json = perf.toJson();
+
+        assertContains(json, "foo.foo#times", 1);
+        assertContains(json, "foo.foo#name", "foo.foo");
+        assertContains(json, "foo.foo#parent", "");
+        assertContains(json, "foo.foo#total_cost");
+        assertContains(json, "foo.foo#min_cost");
+        assertContains(json, "foo.foo#max_cost");
+        assertContains(json, "foo.foo#total_self_wasted");
+        assertContains(json, "foo.foo#total_children_wasted", -1);
+        assertContains(json, "foo.foo#total_children_times", -1);
+
+        assertContains(json, "foo.foo/foo.bar#times", 1);
+        assertContains(json, "foo.foo/foo.bar#name", "foo.bar");
+        assertContains(json, "foo.foo/foo.bar#parent", "foo.foo");
+        assertContains(json, "foo.foo/foo.bar#total_cost");
+        assertContains(json, "foo.foo/foo.bar#min_cost");
+        assertContains(json, "foo.foo/foo.bar#max_cost");
+        assertContains(json, "foo.foo/foo.bar#total_self_wasted");
+        assertContains(json, "foo.foo/foo.bar#total_children_wasted", -1);
+        assertContains(json, "foo.foo/foo.bar#total_children_times", -1);
+
+        perf.clear();
+
+        obj.foo();
+        obj.foo();
+
+        perf.toString();
+        perf.toECharts();
+        json = perf.toJson();
+
+        assertContains(json, "foo.foo#times", 2);
+        assertContains(json, "foo.foo/foo.bar#times", 2);
+
+        perf.clear();
+        PerfUtil.useLightStopwatch(false);
+    }
+
+    @Test
+    public void testPerfUtil4LightStopwatcAndSwitch() throws Throwable {
+        perf.profileClass(prefix + "TestLightClass$Bar");
+
+        TestLightClass.Bar bar = new TestLightClass.Bar();
+        bar.foo();
+
+        perf.toString();
+        perf.toECharts();
+        String json = perf.toJson();
+
+        assertContains(json, "bar.foo#times", 1);
+        assertContains(json, "bar.foo/bar.bar#times", 1);
+
+        PerfUtil.useLightStopwatch(false);
+        bar.foo();
+        json = perf.toJson();
+
+        assertContains(json, "bar.foo#times", 2);
+        assertContains(json, "bar.foo/bar.bar#times", 2);
+
+        Assert.assertThrows(IllegalArgumentException.class, () -> {
+            PerfUtil.useLightStopwatch(true);
+        }, e -> {
+            Assert.assertContains("clear() before switching light-stopwatch",
+                                  e.getMessage());
+        });
+
+        // Test switch from normal-watch to light-watch
+        perf.clear();
+        PerfUtil.useLightStopwatch(true);
+        bar.foo();
+
+        perf.toString();
+        perf.toECharts();
+        json = perf.toJson();
+
+        assertContains(json, "bar.foo#times", 1);
+        assertContains(json, "bar.foo/bar.bar#times", 1);
+
+        bar.foo();
+        json = perf.toJson();
+
+        assertContains(json, "bar.foo#times", 2);
+        assertContains(json, "bar.foo/bar.bar#times", 2);
+
+        Assert.assertThrows(IllegalArgumentException.class, () -> {
+            PerfUtil.useLightStopwatch(false);
+        }, e -> {
+            Assert.assertContains("clear() before switching light-stopwatch",
+                                  e.getMessage());
+        });
+
+        // Test switch from light-watch to normal-watch
+        perf.clear();
+        PerfUtil.useLightStopwatch(false);
+        bar.foo();
+
+        perf.toString();
+        perf.toECharts();
+        json = perf.toJson();
+
+        assertContains(json, "bar.foo#times", 1);
+        assertContains(json, "bar.foo/bar.bar#times", 1);
+    }
+
+    @Test
+    public void testPerfUtilWithSingleThread() throws Throwable {
+        perf.profileClass(prefix + "TestClass$Bar");
+        PerfUtil.profileSingleThread(true);
+
+        TestClass.Bar obj = new TestClass.Bar();
+        obj.foo();
+        perf.toString();
+        perf.toECharts();
+        String json = perf.toJson();
+
+        assertContains(json, "bar_foo#times", 1);
+        assertContains(json, "bar_foo/bar_bar#times", 1);
+
+        perf.clear();
+
+        obj.foo();
+        obj.foo();
+
+        perf.toString();
+        perf.toECharts();
+        json = perf.toJson();
+
+        assertContains(json, "bar_foo#times", 2);
+        assertContains(json, "bar_foo/bar_bar#times", 2);
+
+        PerfUtil.profileSingleThread(false);
+
+        obj.foo();
+
+        perf.toString();
+        perf.toECharts();
+        json = perf.toJson();
+
+        assertContains(json, "bar_foo#times", 3);
+        assertContains(json, "bar_foo/bar_bar#times", 3);
+    }
+
+    @Test
+    public void testPerfUtilWithProfilePackage() throws Throwable {
+        perf.profilePackage("com.baidu.hugegraph.unit.perf.testclass2");
+
+        TestClass4Package.Foo obj = new TestClass4Package.Foo();
+        obj.foo();
+
+        perf.toString();
+        perf.toECharts();
+        String json = perf.toJson();
+
+        assertContains(json, "foo#times", 1);
+        assertContains(json, "foo/bar#times", 1);
+
+        TestClass4Package test = new TestClass4Package();
+        test.test();
+        json = perf.toJson();
+        assertContains(json, "test#times", 1);
+        assertContains(json, "test/bar#times", 1);
+        assertContains(json, "foo#times", 1);
+        assertContains(json, "foo/bar#times", 1);
+
+        perf.clear();
+
+        obj.foo();
+        obj.foo();
+
+        perf.toString();
+        perf.toECharts();
+        json = perf.toJson();
+
+        assertContains(json, "foo#times", 2);
+        assertContains(json, "foo/bar#times", 2);
+    }
+
+    @Test
+    public void testPerfUtilWithProfileParentClass() throws Throwable {
+        perf.profileClass(prefix + "TestClass$Base");
+        perf.profileClass(prefix + "TestClass$Sub");
+
+        TestClass.Sub obj = new TestClass.Sub();
         obj.func();
         obj.func1();
         obj.func2();
@@ -89,17 +297,166 @@
         obj.func3();
         obj.func3();
 
+        perf.toString();
+        perf.toECharts();
         String json = perf.toJson();
-        assertContains(json, "func.times", 1);
-        assertContains(json, "func1.times", 1);
-        assertContains(json, "func3.times", 3);
+        assertContains(json, "func#times", 1);
+        assertContains(json, "func1#times", 1);
+        assertContains(json, "func3#times", 3);
+    }
+
+    @Test
+    public void testPerfUtilWithProfileManually() throws Throwable {
+        perf.profileClass(prefix + "TestClass$ManuallyProfile");
+
+        TestClass.ManuallyProfile obj = new TestClass.ManuallyProfile();
+
+        obj.foo();
+
+        perf.toString();
+        perf.toECharts();
+        String json = perf.toJson();
+
+        assertContains(json, "manu-foo#times", 1);
+        assertContains(json, "manu-foo/manu-bar#times", 1);
+        assertContains(json, "manu-foo/manu-bar2#times", 1);
+
+        obj.foo();
+        obj.bar();
+
+        perf.toString();
+        perf.toECharts();
+        json = perf.toJson();
+
+        assertContains(json, "manu-foo#times", 2);
+        assertContains(json, "manu-foo/manu-bar#times", 2);
+        assertContains(json, "manu-bar#times", 1);
+
+        obj.foo2();
+        obj.bar2();
+
+        perf.toString();
+        perf.toECharts();
+        json = perf.toJson();
+
+        assertContains(json, "manu-foo2#times", 1);
+        assertContains(json, "manu-foo2/manu-bar#times#times", 1);
+        assertContains(json, "manu-foo2/manu-bar2#times#times", 1);
+        assertContains(json, "manu-foo#times", 2);
+        assertContains(json, "manu-foo/manu-bar#times", 2);
+        assertContains(json, "manu-bar#times", 1);
+        assertContains(json, "manu-bar2#times", 1);
+    }
+
+    @Test
+    public void testPerfUtilPerf() throws Throwable {
+        perf.profileClass(prefix + "TestPerfClass");
+        perf.profileClass(prefix + "TestPerfClass$Foo");
+
+        PerfUtil.profileSingleThread(true);
+        PerfUtil.useLocalTimer(true);
+
+        int times = 10000000;
+        TestPerfClass test = new TestPerfClass();
+        test.test(times);
+        test.testNew();
+
+        perf.toString();
+        perf.toECharts();
+        String json = perf.toJson();
+
+        assertContains(json, "testNew#times", 1);
+        assertContains(json, "test/testNew#times", times);
+        assertContains(json, "test/testNewAndCall#times", times);
+        assertContains(json, "test/testCall#times", times);
+        assertContains(json, "test/testCallFooThenSum#times", times);
+
+        assertContains(json, "test/testNewAndCall/sum#times", times);
+        assertContains(json, "test/testCall/sum#times", times);
+        assertContains(json, "test/testCallFooThenSum/foo#times", times);
+        assertContains(json, "test/testCallFooThenSum/foo/sum#times", times);
+
+        // Test call multi-times and Reset false
+        PerfUtil.profileSingleThread(true);
+        PerfUtil.profileSingleThread(true);
+        PerfUtil.profileSingleThread(false);
+        PerfUtil.profileSingleThread(false);
+        PerfUtil.useLocalTimer(true);
+        PerfUtil.useLocalTimer(true);
+        PerfUtil.useLocalTimer(false);
+        PerfUtil.useLocalTimer(false);
+
+        test.testNew();
+        json = perf.toJson();
+        assertContains(json, "testNew#times", 2);
+    }
+
+    @Test
+    public void testPerfUtilPerf4LightStopwatch() throws Throwable {
+        perf.profileClass(prefix + "TestPerfLightClass");
+        perf.profileClass(prefix + "TestPerfLightClass$Foo");
+
+        PerfUtil.profileSingleThread(true);
+        PerfUtil.useLightStopwatch(true);
+        PerfUtil.useLocalTimer(true);
+
+        int times = 10000000;
+        TestPerfLightClass test = new TestPerfLightClass();
+        test.test(times);
+        test.testNew();
+
+        perf.toString();
+        perf.toECharts();
+        String json = perf.toJson();
+
+        assertContains(json, "testNew#times", 1);
+        assertContains(json, "test/testNew#times", times);
+        assertContains(json, "test/testNewAndCall#times", times);
+        assertContains(json, "test/testCall#times", times);
+        assertContains(json, "test/testCallFooThenSum#times", times);
+
+        assertContains(json, "test/testNewAndCall/sum#times", times);
+        assertContains(json, "test/testCall/sum#times", times);
+        assertContains(json, "test/testCallFooThenSum/foo#times", times);
+        assertContains(json, "test/testCallFooThenSum/foo/sum#times", times);
+
+        // Test reset and call multi-times
+        PerfUtil.profileSingleThread(true);
+        PerfUtil.profileSingleThread(true);
+        PerfUtil.profileSingleThread(false);
+        PerfUtil.profileSingleThread(false);
+
+        PerfUtil.useLocalTimer(true);
+        PerfUtil.useLocalTimer(true);
+        PerfUtil.useLocalTimer(false);
+        PerfUtil.useLocalTimer(false);
+
+        perf.clear();
+        PerfUtil.useLightStopwatch(false);
+        PerfUtil.useLightStopwatch(false);
+
+        test.testNew();
+        json = perf.toJson();
+        assertContains(json, "testNew#times", 1);
+    }
+
+    private static void assertContains(String json, String key)
+                                       throws Exception {
+        Assert.assertNotNull("Not exist key " + key, actualValue(json, key));
     }
 
     private static void assertContains(String json, String key, Object value)
-            throws JsonParseException, JsonMappingException, IOException {
+                                       throws Exception {
+        String error = String.format("not contains key '%s' with value <%s> " +
+                                     "in json: %s.\n", key, value, json);
+        Assert.assertEquals(error, value, actualValue(json, key));
+    }
+
+    private static Object actualValue(String json, String key)
+                                      throws Exception {
         ObjectMapper mapper = new ObjectMapper();
         Map<?, ?> map = mapper.readValue(json, Map.class);
-        String[] keys = key.split("\\.");
+        String[] keys = key.split("#");
         Object actual = null;
         for (String k : keys) {
             actual = map.get(k);
@@ -107,6 +464,6 @@
                 map = (Map<?, ?>) actual;
             }
         }
-        Assert.assertEquals(value, actual);
+        return actual;
     }
 }
diff --git a/src/test/java/com/baidu/hugegraph/unit/perf/StopwatchTest.java b/src/test/java/com/baidu/hugegraph/unit/perf/StopwatchTest.java
new file mode 100644
index 0000000..53d5e60
--- /dev/null
+++ b/src/test/java/com/baidu/hugegraph/unit/perf/StopwatchTest.java
@@ -0,0 +1,104 @@
+/*
+ * Copyright 2017 HugeGraph Authors
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to You under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ */
+
+package com.baidu.hugegraph.unit.perf;
+
+import org.junit.Test;
+
+import com.baidu.hugegraph.perf.LightStopwatch;
+import com.baidu.hugegraph.perf.NormalStopwatch;
+import com.baidu.hugegraph.perf.Stopwatch;
+import com.baidu.hugegraph.perf.Stopwatch.Path;
+import com.baidu.hugegraph.testutil.Assert;
+import com.baidu.hugegraph.unit.BaseUnitTest;
+
+public class StopwatchTest extends BaseUnitTest {
+
+    @Test
+    public void testNormalStopwatchChild() {
+        Stopwatch watch1 = new NormalStopwatch("w1", Path.EMPTY);
+
+        Stopwatch watch2 = new NormalStopwatch("w2", watch1);
+        Stopwatch watch3 = new NormalStopwatch("w3", watch1);
+        Stopwatch watch4 = new NormalStopwatch("w4", watch1);
+        Stopwatch watch5 = new NormalStopwatch("w5", watch1);
+
+        Assert.assertEquals(watch2, watch1.child("w2"));
+        Assert.assertEquals(watch3, watch1.child("w3"));
+        Assert.assertEquals(watch4, watch1.child("w4"));
+        Assert.assertEquals(watch5, watch1.child("w5"));
+
+        Assert.assertEquals(watch2, watch1.child("w2", null));
+        Assert.assertEquals(watch3, watch1.child("w3", null));
+        Assert.assertEquals(watch4, watch1.child("w4", null));
+        Assert.assertEquals(watch5, watch1.child("w5", null));
+
+        Assert.assertEquals(null, watch1.child("w2"));
+        Assert.assertEquals(null, watch1.child("w3"));
+        Assert.assertEquals(null, watch1.child("w4"));
+        Assert.assertEquals(null, watch1.child("w5"));
+
+        Assert.assertEquals(null, watch1.child("w2", watch2));
+        Assert.assertEquals(null, watch1.child("w3", watch3));
+        Assert.assertEquals(null, watch1.child("w4", watch4));
+        Assert.assertEquals(null, watch1.child("w5", watch5));
+
+        watch1.clear();
+        Assert.assertEquals(null, watch1.child("w2"));
+        Assert.assertEquals(null, watch1.child("w3"));
+        Assert.assertEquals(null, watch1.child("w4"));
+        Assert.assertEquals(null, watch1.child("w5"));
+    }
+
+    @Test
+    public void testLightStopwatchChild() {
+        Stopwatch watch1 = new LightStopwatch("w1", Path.EMPTY);
+
+        Stopwatch watch2 = new LightStopwatch("w2", watch1);
+        Stopwatch watch3 = new LightStopwatch("w3", watch1);
+        Stopwatch watch4 = new LightStopwatch("w4", watch1);
+        Stopwatch watch5 = new LightStopwatch("w5", watch1);
+
+        Assert.assertEquals(watch2, watch1.child("w2"));
+        Assert.assertEquals(watch3, watch1.child("w3"));
+        Assert.assertEquals(watch4, watch1.child("w4"));
+        Assert.assertEquals(watch5, watch1.child("w5"));
+
+        Assert.assertEquals(watch2, watch1.child("w2", null));
+        Assert.assertEquals(watch3, watch1.child("w3", null));
+        Assert.assertEquals(watch4, watch1.child("w4", null));
+        Assert.assertEquals(watch5, watch1.child("w5", null));
+
+        Assert.assertEquals(null, watch1.child("w2"));
+        Assert.assertEquals(null, watch1.child("w3"));
+        Assert.assertEquals(null, watch1.child("w4"));
+        Assert.assertEquals(null, watch1.child("w5"));
+
+        Assert.assertEquals(null, watch1.child("w2", watch2));
+        Assert.assertEquals(null, watch1.child("w3", watch3));
+        Assert.assertEquals(null, watch1.child("w4", watch4));
+        Assert.assertEquals(null, watch1.child("w5", watch5));
+
+        watch1.clear();
+        Assert.assertEquals(null, watch1.child("w2"));
+        Assert.assertEquals(null, watch1.child("w3"));
+        Assert.assertEquals(null, watch1.child("w4"));
+        Assert.assertEquals(null, watch1.child("w5"));
+    }
+}
diff --git a/src/test/java/com/baidu/hugegraph/unit/perf/testclass/TestClass.java b/src/test/java/com/baidu/hugegraph/unit/perf/testclass/TestClass.java
new file mode 100644
index 0000000..4288824
--- /dev/null
+++ b/src/test/java/com/baidu/hugegraph/unit/perf/testclass/TestClass.java
@@ -0,0 +1,107 @@
+/*
+ * Copyright 2017 HugeGraph Authors
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to You under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ */
+
+package com.baidu.hugegraph.unit.perf.testclass;
+
+import com.baidu.hugegraph.perf.PerfUtil;
+import com.baidu.hugegraph.perf.PerfUtil.Watched;
+
+public class TestClass {
+
+    @Watched
+    public void test() {
+        new Foo().bar();
+    }
+
+    public static class Foo {
+
+        @Watched(prefix="foo")
+        public void foo() {
+            this.bar();
+        }
+
+        @Watched(prefix="foo")
+        public void bar() {}
+    }
+
+    public static class Bar {
+
+        @Watched("bar_foo")
+        public void foo() {
+            this.bar();
+        }
+
+        @Watched("bar_bar")
+        public void bar() {}
+    }
+
+    public static class ManuallyProfile {
+
+        public void foo() {
+            PerfUtil.instance().start("manu-foo");
+            this.bar();
+            this.bar2();
+            PerfUtil.instance().end("manu-foo");
+        }
+
+        public void bar() {
+            PerfUtil.instance().start("manu-bar");
+            try {
+                Thread.sleep(0);
+            } catch (InterruptedException ignored) {
+                // pass
+            }
+            PerfUtil.instance().end("manu-bar");
+        }
+
+        public void foo2() {
+            PerfUtil.instance().start2("manu-foo2");
+            this.bar();
+            this.bar2();
+            PerfUtil.instance().end("manu-foo2");
+        }
+
+        public void bar2() {
+            PerfUtil.instance().start2("manu-bar2");
+            try {
+                Thread.sleep(0);
+            } catch (InterruptedException ignored) {
+                // pass
+            }
+            PerfUtil.instance().end("manu-bar2");
+        }
+    }
+
+    public static class Base {
+
+        @Watched
+        public void func() {}
+    }
+
+    public static class Sub extends Base {
+
+        @Watched
+        public void func1() {}
+
+        public void func2() {}
+
+        @Watched
+        public void func3() {}
+    }
+}
diff --git a/src/test/java/com/baidu/hugegraph/testclass/TestClass.java b/src/test/java/com/baidu/hugegraph/unit/perf/testclass/TestLightClass.java
similarity index 72%
copy from src/test/java/com/baidu/hugegraph/testclass/TestClass.java
copy to src/test/java/com/baidu/hugegraph/unit/perf/testclass/TestLightClass.java
index 0fe47a1..47ec3f1 100644
--- a/src/test/java/com/baidu/hugegraph/testclass/TestClass.java
+++ b/src/test/java/com/baidu/hugegraph/unit/perf/testclass/TestLightClass.java
@@ -17,37 +17,36 @@
  * under the License.
  */
 
-package com.baidu.hugegraph.testclass;
+package com.baidu.hugegraph.unit.perf.testclass;
 
 import com.baidu.hugegraph.perf.PerfUtil.Watched;
 
-public class TestClass {
+public class TestLightClass {
+
+    @Watched
+    public void test() {
+        new Foo().bar();
+    }
 
     public static class Foo {
 
-        @Watched
+        @Watched(prefix="foo")
         public void foo() {
             this.bar();
         }
 
-        @Watched
+        @Watched(prefix="foo")
         public void bar() {}
     }
 
-    public static class Base {
+    public static class Bar {
 
-        @Watched
-        public void func() {}
-    }
+        @Watched(prefix="bar")
+        public void foo() {
+            this.bar();
+        }
 
-    public static class Sub extends Base {
-
-        @Watched
-        public void func1() {}
-
-        public void func2() {}
-
-        @Watched
-        public void func3() {}
+        @Watched(prefix="bar")
+        public void bar() {}
     }
 }
diff --git a/src/test/java/com/baidu/hugegraph/unit/perf/testclass/TestPerfClass.java b/src/test/java/com/baidu/hugegraph/unit/perf/testclass/TestPerfClass.java
new file mode 100644
index 0000000..9ac381a
--- /dev/null
+++ b/src/test/java/com/baidu/hugegraph/unit/perf/testclass/TestPerfClass.java
@@ -0,0 +1,74 @@
+/*
+ * Copyright 2017 HugeGraph Authors
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to You under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ */
+
+package com.baidu.hugegraph.unit.perf.testclass;
+
+import com.baidu.hugegraph.perf.PerfUtil.Watched;
+
+public class TestPerfClass {
+
+    private Foo foo = new Foo();
+
+    @Watched
+    public void test(int times) {
+        for (int i = 0; i < times; i++) {
+            this.testNew();
+            this.testNewAndCall();
+            this.testCall();
+            this.testCallFooThenSum();
+        }
+    }
+
+    @Watched
+    public void testNew() {
+        new Foo();
+    }
+
+    @Watched
+    public void testNewAndCall() {
+        new Foo().sum(1, 2);
+    }
+
+    @Watched
+    public void testCall() {
+        this.foo.sum(1, 2);
+    }
+
+    @Watched
+    public void testCallFooThenSum() {
+        this.foo.foo();
+    }
+
+    public static class Foo {
+
+        @Watched
+        public void foo() {
+            this.sum(1, 2);
+        }
+
+        @Watched
+        public int sum(int a, int b) {
+            int sum = a;
+            for (int i = 0; i < 100; i++) {
+                sum += i;
+            }
+            return sum + b;
+        }
+    }
+}
diff --git a/src/test/java/com/baidu/hugegraph/unit/perf/testclass/TestPerfLightClass.java b/src/test/java/com/baidu/hugegraph/unit/perf/testclass/TestPerfLightClass.java
new file mode 100644
index 0000000..389ef23
--- /dev/null
+++ b/src/test/java/com/baidu/hugegraph/unit/perf/testclass/TestPerfLightClass.java
@@ -0,0 +1,74 @@
+/*
+ * Copyright 2017 HugeGraph Authors
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to You under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ */
+
+package com.baidu.hugegraph.unit.perf.testclass;
+
+import com.baidu.hugegraph.perf.PerfUtil.Watched;
+
+public class TestPerfLightClass {
+
+    private Foo foo = new Foo();
+
+    @Watched
+    public void test(int times) {
+        for (int i = 0; i < times; i++) {
+            this.testNew();
+            this.testNewAndCall();
+            this.testCall();
+            this.testCallFooThenSum();
+        }
+    }
+
+    @Watched
+    public void testNew() {
+        new Foo();
+    }
+
+    @Watched
+    public void testNewAndCall() {
+        new Foo().sum(1, 2);
+    }
+
+    @Watched
+    public void testCall() {
+        this.foo.sum(1, 2);
+    }
+
+    @Watched
+    public void testCallFooThenSum() {
+        this.foo.foo();
+    }
+
+    public static class Foo {
+
+        @Watched
+        public void foo() {
+            this.sum(1, 2);
+        }
+
+        @Watched
+        public int sum(int a, int b) {
+            int sum = a;
+            for (int i = 0; i < 100; i++) {
+                sum += i;
+            }
+            return sum + b;
+        }
+    }
+}
diff --git a/src/test/java/com/baidu/hugegraph/testclass/TestClass.java b/src/test/java/com/baidu/hugegraph/unit/perf/testclass2/TestClass4Package.java
similarity index 75%
rename from src/test/java/com/baidu/hugegraph/testclass/TestClass.java
rename to src/test/java/com/baidu/hugegraph/unit/perf/testclass2/TestClass4Package.java
index 0fe47a1..b868907 100644
--- a/src/test/java/com/baidu/hugegraph/testclass/TestClass.java
+++ b/src/test/java/com/baidu/hugegraph/unit/perf/testclass2/TestClass4Package.java
@@ -17,13 +17,31 @@
  * under the License.
  */
 
-package com.baidu.hugegraph.testclass;
+package com.baidu.hugegraph.unit.perf.testclass2;
 
 import com.baidu.hugegraph.perf.PerfUtil.Watched;
 
-public class TestClass {
+public class TestClass4Package {
 
-    public static class Foo {
+    @Watched
+    public void test() {
+        new Foo().bar();
+    }
+
+    public static class Foo extends FooBase {
+
+        @Watched
+        public void foo() {
+            this.bar();
+        }
+
+        @Watched
+        public void bar() {}
+    }
+
+    public static class FooBase {}
+
+    public static class Bar {
 
         @Watched
         public void foo() {
diff --git a/src/test/java/com/baidu/hugegraph/unit/util/ReflectionUtilTest.java b/src/test/java/com/baidu/hugegraph/unit/util/ReflectionUtilTest.java
index df8afcb..7031a44 100644
--- a/src/test/java/com/baidu/hugegraph/unit/util/ReflectionUtilTest.java
+++ b/src/test/java/com/baidu/hugegraph/unit/util/ReflectionUtilTest.java
@@ -27,10 +27,14 @@
 import org.junit.Test;
 
 import com.baidu.hugegraph.perf.PerfUtil.Watched;
-import com.baidu.hugegraph.testclass.TestClass.Base;
-import com.baidu.hugegraph.testclass.TestClass.Sub;
 import com.baidu.hugegraph.testutil.Assert;
 import com.baidu.hugegraph.unit.BaseUnitTest;
+import com.baidu.hugegraph.unit.perf.testclass.TestClass;
+import com.baidu.hugegraph.unit.perf.testclass.TestClass.Bar;
+import com.baidu.hugegraph.unit.perf.testclass.TestClass.Base;
+import com.baidu.hugegraph.unit.perf.testclass.TestClass.Foo;
+import com.baidu.hugegraph.unit.perf.testclass.TestClass.ManuallyProfile;
+import com.baidu.hugegraph.unit.perf.testclass.TestClass.Sub;
 import com.baidu.hugegraph.util.ReflectionUtil;
 import com.google.common.reflect.ClassPath.ClassInfo;
 
@@ -111,4 +115,48 @@
         Assert.assertEquals(Base.class.getName(), classes.get(0));
         Assert.assertEquals(Object.class.getName(), classes.get(1));
     }
+
+    @Test
+    public void testNestedClasses() throws NotFoundException {
+        List<String> classes = ReflectionUtil.nestedClasses(
+                               TestClass.class.getName());
+        Assert.assertEquals(5, classes.size());
+        classes.sort((c1, c2) -> c1.compareTo(c2));
+        Assert.assertEquals(Bar.class.getName(), classes.get(0));
+        Assert.assertEquals(Base.class.getName(), classes.get(1));
+        Assert.assertEquals(Foo.class.getName(), classes.get(2));
+        Assert.assertEquals(ManuallyProfile.class.getName(), classes.get(3));
+        Assert.assertEquals(Sub.class.getName(), classes.get(4));
+    }
+
+    @Test
+    public void testPackageName() {
+        String clazz = "com.baidu.hugegraph.unit.perf.testclass2.Test";
+        Assert.assertEquals("com.baidu.hugegraph.unit.perf.testclass2",
+                            ReflectionUtil.packageName(clazz));
+
+        clazz = "com.baidu.hugegraph.unit.perf.testclass2.Test$Bar";
+        Assert.assertEquals("com.baidu.hugegraph.unit.perf.testclass2",
+                            ReflectionUtil.packageName(clazz));
+
+        clazz = "com.baidu.hugegraph.unit.perf.testclass.Test$Bar";
+        Assert.assertEquals("com.baidu.hugegraph.unit.perf.testclass",
+                            ReflectionUtil.packageName(clazz));
+
+        clazz = "com.baidu.hugegraph.unit.perf.testclass..Test$Bar";
+        Assert.assertEquals("com.baidu.hugegraph.unit.perf.testclass.",
+                            ReflectionUtil.packageName(clazz));
+
+        clazz = "com";
+        Assert.assertEquals("", ReflectionUtil.packageName(clazz));
+
+        clazz = "com.";
+        Assert.assertEquals("com", ReflectionUtil.packageName(clazz));
+
+        clazz = "Test";
+        Assert.assertEquals("", ReflectionUtil.packageName(clazz));
+
+        clazz = ".Test";
+        Assert.assertEquals("", ReflectionUtil.packageName(clazz));
+    }
 }