blob: 021941bfe65186b996c610cdd965d9ef2d369474 [file] [log] [blame]
#!/usr/bin/env impala-python
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# This script generates testdata for collect_minidumps.py. 3 text files will be created
# containing statup flags for each of the services in (conf_dir)/impalad_flags,
# (conf_dir)/state_store_flags, and (conf_dir)/catalogsever_flags. Each of those files
# will have a parameter -minidump_path. Each path will look like (minidump_dir)/impalad,
# (minidump_dir)/catalogd, (minidump_dir)/statestored. Sample minidump files will be
# generated and placed into each of those directories. It is possible to control the
# minidump file timestamps by specifying the start_time and end_time. The timestamps will
# be spaced evenly in the interval. Alternatively, duration can be specified which will
# create the files in the interval [now - duration, now]. Minidumps are simulated by
# making the files easily compressible by having some repeated data.
import errno
import os
import random
import shutil
import time
from optparse import OptionParser
parser = OptionParser()
parser.add_option('--conf_dir', default='/tmp/impala-conf')
parser.add_option('--log_dir', default='/tmp/impala-logs')
parser.add_option('--minidump_dir', default='minidumps')
parser.add_option('--start_time', default=None, type='int')
parser.add_option('--end_time', default=None, type='int')
parser.add_option('--duration', default=3600, type='int',
help="if start and end time are not set, they will be calculated based on this value")
parser.add_option('--num_minidumps', default=20, type='int')
options, args = parser.parse_args()
CONFIG_FILE = '''-beeswax_port=21000
-fe_port=21000
-be_port=22000
-hs2_port=21050
-enable_webserver=true
-mem_limit=108232130560
-max_log_files=10
-webserver_port=25000
-max_result_cache_size=100000
-state_store_subscriber_port=23000
-statestore_subscriber_timeout_seconds=30
-scratch_dirs=/data/1/impala/impalad,/data/10/impala/impalad,/data/11/impala/impalad
-default_query_options
-log_filename=impalad
-audit_event_log_dir=/var/log/impalad/audit
-max_audit_event_log_file_size=5000
-abort_on_failed_audit_event=false
-lineage_event_log_dir=/var/log/impalad/lineage
-log_dir={0}
-minidump_path={1}
-max_lineage_log_file_size=5000
-hostname=host1.example.com
-state_store_host=host2.example.com
-state_store_port=24000
-catalog_service_host=host2.example.com
-catalog_service_port=26000
-local_library_dir=/var/lib/impala/udfs
-disk_spill_encryption=false
-abort_on_config_error=true'''
ROLE_NAMES = {'impalad': 'impalad_flags',
'statestored': 'state_store_flags',
'catalogd': 'catalogserver_flags'}
def generate_conf_files():
try:
os.makedirs(options.conf_dir)
except OSError as e:
if e.errno == errno.EEXIST and os.path.isdir(options.conf_dir):
pass
else:
raise e
for role_name in ROLE_NAMES:
with open(os.path.join(options.conf_dir, ROLE_NAMES[role_name]), 'w') as f:
f.write(CONFIG_FILE.format(options.log_dir, options.minidump_dir))
def random_bytes(num):
return ''.join(chr(random.randint(0, 255)) for _ in range(num))
def write_minidump(common_data, timestamp, target_dir):
'''Generate and write the minidump into the target_dir. atime and mtime of the minidump
will be set to timestamp.'''
file_name = ''.join(random.choice('abcdefghijklmnopqrstuvwxyz') for _ in xrange(10))
with open(os.path.join(target_dir, file_name), 'wb') as f:
# We want the minidump to be pretty similar to each other. The number 8192 was chosen
# arbitratily and seemed like a reasonable guess.
unique_data = random_bytes(8192)
f.write(unique_data)
f.write(common_data)
os.utime(os.path.join(target_dir, file_name), (timestamp, timestamp))
def generate_minidumps():
if options.start_time is None or options.end_time is None:
start_timestamp = time.time() - options.duration
end_timestamp = time.time()
else:
start_timestamp = options.start_time
end_timestamp = options.end_time
minidump_dir = options.minidump_dir
if not os.path.isabs(minidump_dir):
minidump_dir = os.path.join(options.log_dir, minidump_dir)
if os.path.exists(minidump_dir):
shutil.rmtree(minidump_dir)
for role_name in ROLE_NAMES:
os.makedirs(os.path.join(minidump_dir, role_name))
# We want the files to have a high compression ratio and be several megabytes in size.
# The parameters below should accomplish this.
repeated_token = random_bytes(256)
common_data = repeated_token * 40000
if options.num_minidumps == 1:
interval = 0
else:
interval = (end_timestamp - start_timestamp) // (options.num_minidumps - 1)
for i in xrange(options.num_minidumps):
write_minidump(common_data,
start_timestamp + interval * i,
os.path.join(minidump_dir, role_name))
def main():
generate_conf_files()
generate_minidumps()
if __name__ == '__main__':
main()