blob: 021941bfe65186b996c610cdd965d9ef2d369474 [file] [log] [blame]
#!/usr/bin/env impala-python
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# This script generates testdata for 3 text files will be created
# containing statup flags for each of the services in (conf_dir)/impalad_flags,
# (conf_dir)/state_store_flags, and (conf_dir)/catalogsever_flags. Each of those files
# will have a parameter -minidump_path. Each path will look like (minidump_dir)/impalad,
# (minidump_dir)/catalogd, (minidump_dir)/statestored. Sample minidump files will be
# generated and placed into each of those directories. It is possible to control the
# minidump file timestamps by specifying the start_time and end_time. The timestamps will
# be spaced evenly in the interval. Alternatively, duration can be specified which will
# create the files in the interval [now - duration, now]. Minidumps are simulated by
# making the files easily compressible by having some repeated data.
import errno
import os
import random
import shutil
import time
from optparse import OptionParser
parser = OptionParser()
parser.add_option('--conf_dir', default='/tmp/impala-conf')
parser.add_option('--log_dir', default='/tmp/impala-logs')
parser.add_option('--minidump_dir', default='minidumps')
parser.add_option('--start_time', default=None, type='int')
parser.add_option('--end_time', default=None, type='int')
parser.add_option('--duration', default=3600, type='int',
help="if start and end time are not set, they will be calculated based on this value")
parser.add_option('--num_minidumps', default=20, type='int')
options, args = parser.parse_args()
CONFIG_FILE = '''-beeswax_port=21000
ROLE_NAMES = {'impalad': 'impalad_flags',
'statestored': 'state_store_flags',
'catalogd': 'catalogserver_flags'}
def generate_conf_files():
except OSError as e:
if e.errno == errno.EEXIST and os.path.isdir(options.conf_dir):
raise e
for role_name in ROLE_NAMES:
with open(os.path.join(options.conf_dir, ROLE_NAMES[role_name]), 'w') as f:
f.write(CONFIG_FILE.format(options.log_dir, options.minidump_dir))
def random_bytes(num):
return ''.join(chr(random.randint(0, 255)) for _ in range(num))
def write_minidump(common_data, timestamp, target_dir):
'''Generate and write the minidump into the target_dir. atime and mtime of the minidump
will be set to timestamp.'''
file_name = ''.join(random.choice('abcdefghijklmnopqrstuvwxyz') for _ in xrange(10))
with open(os.path.join(target_dir, file_name), 'wb') as f:
# We want the minidump to be pretty similar to each other. The number 8192 was chosen
# arbitratily and seemed like a reasonable guess.
unique_data = random_bytes(8192)
os.utime(os.path.join(target_dir, file_name), (timestamp, timestamp))
def generate_minidumps():
if options.start_time is None or options.end_time is None:
start_timestamp = time.time() - options.duration
end_timestamp = time.time()
start_timestamp = options.start_time
end_timestamp = options.end_time
minidump_dir = options.minidump_dir
if not os.path.isabs(minidump_dir):
minidump_dir = os.path.join(options.log_dir, minidump_dir)
if os.path.exists(minidump_dir):
for role_name in ROLE_NAMES:
os.makedirs(os.path.join(minidump_dir, role_name))
# We want the files to have a high compression ratio and be several megabytes in size.
# The parameters below should accomplish this.
repeated_token = random_bytes(256)
common_data = repeated_token * 40000
if options.num_minidumps == 1:
interval = 0
interval = (end_timestamp - start_timestamp) // (options.num_minidumps - 1)
for i in xrange(options.num_minidumps):
start_timestamp + interval * i,
os.path.join(minidump_dir, role_name))
def main():
if __name__ == '__main__':