tools/parsememdump.py - nuttx - Git at Google

 #!/usr/bin/env python3
 # tools/parsememdump.py
 #
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.  The
 # ASF licenses this file to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance with the
 # License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
 # License for the specific language governing permissions and limitations
 # under the License.
 #
 import argparse
 import os
 import re
 from concurrent.futures import ThreadPoolExecutor

 program_description = """
 This program will help you analyze memdump log files,
 analyze the number of occurrences of backtrace,
 and output stack information
 memdump log files need this format:
 pid   size  seq  addr   mem
 """


 class dump_line:
     def __init__(self, line_str):
         self.mem = []
         self.err = False
         self.cnt = 1
         tmp = re.search("( \d+ )", line_str)
         if tmp is None:
             self.err = True
             return
         self.pid = int(tmp.group(0)[1:])
         tmp = re.search("( \d+ )", line_str[tmp.span()[1] :])
         if tmp is None:
             self.err = True
             return
         self.size = int(tmp.group(0)[1:])
         tmp = re.search("( \d+ )", line_str[tmp.span()[1] :])
         if tmp is None:
             self.err = True
             return
         self.seq = int(tmp.group(0)[1:])

         tmp = re.findall("0x([0-9a-fA-F]+)", line_str[tmp.span()[1] :])
         self.addr = tmp[0]
         for s in tmp[1:]:
             self.mem.append(s)


 class log_output:
     def __init__(self, args):
         if args.output:
             self.file = open(args.output, "w")

     def output(self, str):
         if hasattr(self, "file"):
             self.file.write(str)
         else:
             print(str, end="")

     def __del__(self):
         if hasattr(self, "file"):
             self.file.close()


 def compare_dump_line(dump_line_list, str):
     t = dump_line(str)
     if t.err:
         return

     if dump_line_list.__len__() == 0:
         dump_line_list.append(t)
         return

     find = 0
     for line in dump_line_list:
         if line.mem == t.mem and line.size == t.size and t.mem != []:
             find = 1
             line.cnt += 1
             break

     if find == 0:
         dump_line_list.append(t)


 def multi_thread_executer(cmd):
     result = ""
     p = os.popen(cmd, "r")
     while True:
         line = p.readline()
         if line == "":
             break
         result += f"    {line}"
     return result


 class addr2line_db:
     def __init__(self, mem=[], ncpu=1, prefix="", file="nuttx.elf", batch_max=1):
         self.mem = mem
         self.ncpu = ncpu
         self.db = {}
         self.prefix = prefix
         self.file = file
         self.batch_max = batch_max
         self.parse_all()

     def split_array(self, arr, num_splits):
         k, m = divmod(len(arr), num_splits)
         return [
             arr[i * k + min(i, m) : (i + 1) * k + min(i + 1, m)]
             for i in range(num_splits)
         ]

     def parse_all(self):
         cmds = []
         batch_cnt = len(self.mem) // self.ncpu
         if batch_cnt > self.batch_max:
             batch_cnt = self.batch_max
         segments = self.split_array(self.mem, batch_cnt)

         for seg in segments:
             addrs = " ".join(seg)
             cmds.append(f"{self.prefix}addr2line -Cfe {self.file} {addrs}")

         with ThreadPoolExecutor(max_workers=self.ncpu) as executor:
             for keys, v in zip(segments, executor.map(multi_thread_executer, cmds)):
                 lines = v.split("\n")
                 values = [
                     lines[i] + "\n" + lines[i + 1] + "\n"
                     for i in range(0, len(lines) - 1, 2)
                 ]
                 for i in range(len(keys)):
                     self.db[keys[i]] = values[i]

     def parse(self, mem):
         if mem in self.db.keys():
             return self.db[mem]
         else:
             return ""


 if __name__ == "__main__":
     parser = argparse.ArgumentParser(
         description=program_description, formatter_class=argparse.RawTextHelpFormatter
     )
     parser.add_argument("-f", "--file", help="dump file", nargs=1, required=True)
     parser.add_argument(
         "-p", "--prefix", help="addr2line program prefix", nargs=1, default=""
     )
     parser.add_argument(
         "-j",
         "--ncpu",
         help="multi thread count, default all",
         type=int,
         default=0,
         required=False,
     )
     parser.add_argument(
         "-e",
         "--elffile",
         default="",
         help="elf file,use it can output stack info",
         nargs=1,
     )

     parser.add_argument("-o", "--output", help="output file, default output shell")

     args = parser.parse_args()
     dump_file = open("%s" % args.file[0], "r")
     lines = []
     while 1:
         line = dump_file.readline()
         if line == "":
             break
         compare_dump_line(lines, line)
     dump_file.close()
     lines.sort(key=lambda x: x.cnt, reverse=True)

     log = log_output(args)
     total_dir = {}
     for t in lines:
         if t.pid in total_dir:
             total_dir[t.pid] += t.size * t.cnt
         else:
             total_dir.setdefault(t.pid, t.size * t.cnt)

     log.output("total memory used for ervey pid\n")
     log.output("pid       total size\n")
     total_size = 0
     for pid, size in sorted(total_dir.items(), key=lambda x: x[1]):
         log.output("%-3d       %-6d\n" % (pid, size))
         total_size += size
     log.output("all used memory %-6d\n" % (total_size))

     log.output("cnt   size   pid   addr         mem\n")

     mems = []
     for line in lines:
         if line.mem == []:
             continue
         for mem in line.mem:
             if mem not in mems:
                 mems.append(mem)

     ncpu = args.ncpu
     if ncpu == 0:
         ncpu = os.cpu_count()

     db = addr2line_db(mem=mems, ncpu=ncpu, prefix=args.prefix[0], file=args.elffile[0])
     for t in lines:
         addr2line_str = ""
         log.output("%-4d  %-6d %-3d   %s   " % (t.cnt, t.size, t.pid, t.addr))
         if t.mem == []:
             log.output("\n")
             continue
         for mem in t.mem:
             log.output("%s " % mem)
             addr2line_str += db.parse(mem)
         log.output("\n")
         if addr2line_str != "":
             log.output(addr2line_str)
         log.output("\n")
     log.__del__()
	#!/usr/bin/env python3
	# tools/parsememdump.py
	#
	# SPDX-License-Identifier: Apache-2.0
	#
	# Licensed to the Apache Software Foundation (ASF) under one or more
	# contributor license agreements. See the NOTICE file distributed with
	# this work for additional information regarding copyright ownership. The
	# ASF licenses this file to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance with the
	# License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
	# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
	# License for the specific language governing permissions and limitations
	# under the License.
	#
	import argparse
	import os
	import re
	from concurrent.futures import ThreadPoolExecutor

	program_description = """
	This program will help you analyze memdump log files,
	analyze the number of occurrences of backtrace,
	and output stack information
	memdump log files need this format:
	pid size seq addr mem
	"""


	class dump_line:
	def __init__(self, line_str):
	self.mem = []
	self.err = False
	self.cnt = 1
	tmp = re.search("( \d+ )", line_str)
	if tmp is None:
	self.err = True
	return
	self.pid = int(tmp.group(0)[1:])
	tmp = re.search("( \d+ )", line_str[tmp.span()[1] :])
	if tmp is None:
	self.err = True
	return
	self.size = int(tmp.group(0)[1:])
	tmp = re.search("( \d+ )", line_str[tmp.span()[1] :])
	if tmp is None:
	self.err = True
	return
	self.seq = int(tmp.group(0)[1:])

	tmp = re.findall("0x([0-9a-fA-F]+)", line_str[tmp.span()[1] :])
	self.addr = tmp[0]
	for s in tmp[1:]:
	self.mem.append(s)


	class log_output:
	def __init__(self, args):
	if args.output:
	self.file = open(args.output, "w")

	def output(self, str):
	if hasattr(self, "file"):
	self.file.write(str)
	else:
	print(str, end="")

	def __del__(self):
	if hasattr(self, "file"):
	self.file.close()


	def compare_dump_line(dump_line_list, str):
	t = dump_line(str)
	if t.err:
	return

	if dump_line_list.__len__() == 0:
	dump_line_list.append(t)
	return

	find = 0
	for line in dump_line_list:
	if line.mem == t.mem and line.size == t.size and t.mem != []:
	find = 1
	line.cnt += 1
	break

	if find == 0:
	dump_line_list.append(t)


	def multi_thread_executer(cmd):
	result = ""
	p = os.popen(cmd, "r")
	while True:
	line = p.readline()
	if line == "":
	break
	result += f" {line}"
	return result


	class addr2line_db:
	def __init__(self, mem=[], ncpu=1, prefix="", file="nuttx.elf", batch_max=1):
	self.mem = mem
	self.ncpu = ncpu
	self.db = {}
	self.prefix = prefix
	self.file = file
	self.batch_max = batch_max
	self.parse_all()

	def split_array(self, arr, num_splits):
	k, m = divmod(len(arr), num_splits)
	return [
	arr[i * k + min(i, m) : (i + 1) * k + min(i + 1, m)]
	for i in range(num_splits)
	]

	def parse_all(self):
	cmds = []
	batch_cnt = len(self.mem) // self.ncpu
	if batch_cnt > self.batch_max:
	batch_cnt = self.batch_max
	segments = self.split_array(self.mem, batch_cnt)

	for seg in segments:
	addrs = " ".join(seg)
	cmds.append(f"{self.prefix}addr2line -Cfe {self.file} {addrs}")

	with ThreadPoolExecutor(max_workers=self.ncpu) as executor:
	for keys, v in zip(segments, executor.map(multi_thread_executer, cmds)):
	lines = v.split("\n")
	values = [
	lines[i] + "\n" + lines[i + 1] + "\n"
	for i in range(0, len(lines) - 1, 2)
	]
	for i in range(len(keys)):
	self.db[keys[i]] = values[i]

	def parse(self, mem):
	if mem in self.db.keys():
	return self.db[mem]
	else:
	return ""


	if __name__ == "__main__":
	parser = argparse.ArgumentParser(
	description=program_description, formatter_class=argparse.RawTextHelpFormatter
	)
	parser.add_argument("-f", "--file", help="dump file", nargs=1, required=True)
	parser.add_argument(
	"-p", "--prefix", help="addr2line program prefix", nargs=1, default=""
	)
	parser.add_argument(
	"-j",
	"--ncpu",
	help="multi thread count, default all",
	type=int,
	default=0,
	required=False,
	)
	parser.add_argument(
	"-e",
	"--elffile",
	default="",
	help="elf file,use it can output stack info",
	nargs=1,
	)

	parser.add_argument("-o", "--output", help="output file, default output shell")

	args = parser.parse_args()
	dump_file = open("%s" % args.file[0], "r")
	lines = []
	while 1:
	line = dump_file.readline()
	if line == "":
	break
	compare_dump_line(lines, line)
	dump_file.close()
	lines.sort(key=lambda x: x.cnt, reverse=True)

	log = log_output(args)
	total_dir = {}
	for t in lines:
	if t.pid in total_dir:
	total_dir[t.pid] += t.size * t.cnt
	else:
	total_dir.setdefault(t.pid, t.size * t.cnt)

	log.output("total memory used for ervey pid\n")
	log.output("pid total size\n")
	total_size = 0
	for pid, size in sorted(total_dir.items(), key=lambda x: x[1]):
	log.output("%-3d %-6d\n" % (pid, size))
	total_size += size
	log.output("all used memory %-6d\n" % (total_size))

	log.output("cnt size pid addr mem\n")

	mems = []
	for line in lines:
	if line.mem == []:
	continue
	for mem in line.mem:
	if mem not in mems:
	mems.append(mem)

	ncpu = args.ncpu
	if ncpu == 0:
	ncpu = os.cpu_count()

	db = addr2line_db(mem=mems, ncpu=ncpu, prefix=args.prefix[0], file=args.elffile[0])
	for t in lines:
	addr2line_str = ""
	log.output("%-4d %-6d %-3d %s " % (t.cnt, t.size, t.pid, t.addr))
	if t.mem == []:
	log.output("\n")
	continue
	for mem in t.mem:
	log.output("%s " % mem)
	addr2line_str += db.parse(mem)
	log.output("\n")
	if addr2line_str != "":
	log.output(addr2line_str)
	log.output("\n")
	log.__del__()