blob: d7fa54da1b4f2e539262595f9a57638de30fea13 [file] [log] [blame]
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import subprocess as sbp
import sys
try:
import pandas as pd
HAVE_PANDAS = True
except ImportError:
HAVE_PANDAS = False
SYMBOL_FILTERS = {
'std::chrono::duration': 'duration',
'std::__cxx11::basic_string': 'std::string',
'arrow::ArrayData': 'ArrayData',
'arrow::ArraySpan': 'ArraySpan',
'arrow::Datum': 'Datum',
'arrow::Scalar': 'Scalar',
'arrow::Status': 'Status',
'arrow::Type': 'Type',
'arrow::TimestampType': 'TsT',
'arrow::BinaryType': 'BinaryT',
'arrow::BooleanType': 'BoolT',
'arrow::StringType': 'StringT',
'arrow::LargeStringType': 'LStringT',
'arrow::DoubleType': 'DoubleT',
'arrow::FloatType': 'FloatT',
'arrow::Int64Type': 'Int64T',
'arrow::UInt64Type': 'UInt64T',
'arrow::LargeListType': 'LListT',
'arrow::ListType': 'ListT',
'arrow::FixedSizeListType': 'FSLT',
'arrow::compute::': 'ac::',
'ac::internal::': '',
'arrow::internal::': 'ai::',
'(anonymous namespace)::': '',
'internal::applicator::': '',
'internal::CastFunctor': 'CastFunctor',
'ac::KernelContext*': 'C*',
'ArrayData const&': 'A&',
'ArraySpan const&': 'A&',
'ArrayData*': 'O*',
'Scalar const&': 'S&',
'Datum const&': 'V&',
'Datum*': 'O*',
'ac::ExecBatch const&': 'B&',
'ac::ExecSpan const&': 'B&',
'ac::ExecValue const&': 'V&',
'ac::ExecResult*': 'O*',
'Type::type': 'T',
}
def filter_symbol(symbol_name):
for token, replacement in SYMBOL_FILTERS.items():
symbol_name = symbol_name.replace(token, replacement)
return symbol_name
def get_symbols_and_sizes(object_file):
cmd = f"nm --print-size --size-sort {object_file} | c++filt"
output = sbp.check_output(cmd, shell=True).decode('utf-8')
symbol_sizes = []
for x in output.split('\n'):
if len(x) == 0:
continue
_, hex_size, _, symbol_name = x.split(' ', 3)
symbol_name = filter_symbol(symbol_name)
symbol_sizes.append((symbol_name, int(hex_size, 16)))
return dict(symbol_sizes)
if __name__ == '__main__':
base, contender = sys.argv[1], sys.argv[2]
base_results = get_symbols_and_sizes(base)
contender_results = get_symbols_and_sizes(contender)
all_symbols = set(base_results.keys()) | set(contender_results.keys())
diff_table = []
for name in all_symbols:
if name in base_results and name in contender_results:
base_size = base_results[name]
contender_size = contender_results[name]
elif name in base_results:
base_size = base_results[name]
contender_size = 0
else:
base_size = 0
contender_size = contender_results[name]
diff = contender_size - base_size
diff_table.append((name, base_size, contender_size, diff))
diff_table.sort(key=lambda x: x[3])
if HAVE_PANDAS:
diff = pd.DataFrame.from_records(diff_table,
columns=['symbol', 'base',
'contender', 'diff'])
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = 150
print(diff[diff['diff'] < - 700])
print(diff[diff['diff'] > 700])
else:
# TODO
pass