| #!/usr/bin/env impala-python |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| # This script generates testdata for collect_minidumps.py. 3 text files will be created |
| # containing statup flags for each of the services in (conf_dir)/impalad_flags, |
| # (conf_dir)/state_store_flags, and (conf_dir)/catalogsever_flags. Each of those files |
| # will have a parameter -minidump_path. Each path will look like (minidump_dir)/impalad, |
| # (minidump_dir)/catalogd, (minidump_dir)/statestored. Sample minidump files will be |
| # generated and placed into each of those directories. It is possible to control the |
| # minidump file timestamps by specifying the start_time and end_time. The timestamps will |
| # be spaced evenly in the interval. Alternatively, duration can be specified which will |
| # create the files in the interval [now - duration, now]. Minidumps are simulated by |
| # making the files easily compressible by having some repeated data. |
| |
| from __future__ import absolute_import, division, print_function |
| from builtins import range |
| import errno |
| import os |
| import random |
| import shutil |
| import time |
| |
| from optparse import OptionParser |
| |
| parser = OptionParser() |
| parser.add_option('--conf_dir', default='/tmp/impala-conf') |
| parser.add_option('--log_dir', default='/tmp/impala-logs') |
| parser.add_option('--minidump_dir', default='minidumps') |
| parser.add_option('--start_time', default=None, type='int') |
| parser.add_option('--end_time', default=None, type='int') |
| parser.add_option('--duration', default=3600, type='int', |
| help="if start and end time are not set, they will be calculated based on this value") |
| parser.add_option('--num_minidumps', default=20, type='int') |
| |
| options, args = parser.parse_args() |
| |
| CONFIG_FILE = '''-beeswax_port=21000 |
| -fe_port=21000 |
| -hs2_port=21050 |
| -enable_webserver=true |
| -mem_limit=108232130560 |
| -max_log_files=10 |
| -webserver_port=25000 |
| -max_result_cache_size=100000 |
| -state_store_subscriber_port=23000 |
| -statestore_subscriber_timeout_seconds=30 |
| -scratch_dirs=/data/1/impala/impalad,/data/10/impala/impalad,/data/11/impala/impalad |
| -default_query_options |
| -log_filename=impalad |
| -audit_event_log_dir=/var/log/impalad/audit |
| -max_audit_event_log_file_size=5000 |
| -abort_on_failed_audit_event=false |
| -lineage_event_log_dir=/var/log/impalad/lineage |
| -log_dir={0} |
| -minidump_path={1} |
| -max_lineage_log_file_size=5000 |
| -hostname=host1.example.com |
| -state_store_host=host2.example.com |
| -state_store_port=24000 |
| -catalog_service_host=host2.example.com |
| -catalog_service_port=26000 |
| -local_library_dir=/var/lib/impala/udfs |
| -disk_spill_encryption=false |
| -abort_on_config_error=true''' |
| |
| ROLE_NAMES = {'impalad': 'impalad_flags', |
| 'statestored': 'state_store_flags', |
| 'catalogd': 'catalogserver_flags'} |
| |
| def generate_conf_files(): |
| try: |
| os.makedirs(options.conf_dir) |
| except OSError as e: |
| if e.errno == errno.EEXIST and os.path.isdir(options.conf_dir): |
| pass |
| else: |
| raise e |
| for role_name in ROLE_NAMES: |
| with open(os.path.join(options.conf_dir, ROLE_NAMES[role_name]), 'w') as f: |
| f.write(CONFIG_FILE.format(options.log_dir, options.minidump_dir)) |
| |
| def random_bytes(num): |
| return ''.join(chr(random.randint(0, 255)) for _ in range(num)) |
| |
| def write_minidump(common_data, timestamp, target_dir): |
| '''Generate and write the minidump into the target_dir. atime and mtime of the minidump |
| will be set to timestamp.''' |
| file_name = ''.join(random.choice('abcdefghijklmnopqrstuvwxyz') for _ in range(10)) |
| with open(os.path.join(target_dir, file_name), 'wb') as f: |
| # We want the minidump to be pretty similar to each other. The number 8192 was chosen |
| # arbitratily and seemed like a reasonable guess. |
| unique_data = random_bytes(8192) |
| f.write(unique_data) |
| f.write(common_data) |
| os.utime(os.path.join(target_dir, file_name), (timestamp, timestamp)) |
| |
| def generate_minidumps(): |
| if options.start_time is None or options.end_time is None: |
| start_timestamp = time.time() - options.duration |
| end_timestamp = time.time() |
| else: |
| start_timestamp = options.start_time |
| end_timestamp = options.end_time |
| minidump_dir = options.minidump_dir |
| if not os.path.isabs(minidump_dir): |
| minidump_dir = os.path.join(options.log_dir, minidump_dir) |
| if os.path.exists(minidump_dir): |
| shutil.rmtree(minidump_dir) |
| for role_name in ROLE_NAMES: |
| os.makedirs(os.path.join(minidump_dir, role_name)) |
| # We want the files to have a high compression ratio and be several megabytes in size. |
| # The parameters below should accomplish this. |
| repeated_token = random_bytes(256) |
| common_data = repeated_token * 40000 |
| if options.num_minidumps == 1: |
| interval = 0 |
| else: |
| interval = (end_timestamp - start_timestamp) // (options.num_minidumps - 1) |
| for i in range(options.num_minidumps): |
| write_minidump(common_data, |
| start_timestamp + interval * i, |
| os.path.join(minidump_dir, role_name)) |
| |
| def main(): |
| generate_conf_files() |
| generate_minidumps() |
| |
| if __name__ == '__main__': |
| main() |