bin/generate_minidump_collection_testdata.py - impala - Git at Google

 #!/usr/bin/env impala-python
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 # This script generates testdata for collect_minidumps.py. 3 text files will be created
 # containing statup flags for each of the services in (conf_dir)/impalad_flags,
 # (conf_dir)/state_store_flags, and (conf_dir)/catalogsever_flags. Each of those files
 # will have a parameter -minidump_path. Each path will look like (minidump_dir)/impalad,
 # (minidump_dir)/catalogd, (minidump_dir)/statestored. Sample minidump files will be
 # generated and placed into each of those directories. It is possible to control the
 # minidump file timestamps by specifying the start_time and end_time. The timestamps will
 # be spaced evenly in the interval. Alternatively, duration can be specified which will
 # create the files in the interval [now - duration, now]. Minidumps are simulated by
 # making the files easily compressible by having some repeated data.

 import errno
 import os
 import random
 import shutil
 import time

 from optparse import OptionParser

 parser = OptionParser()
 parser.add_option('--conf_dir', default='/tmp/impala-conf')
 parser.add_option('--log_dir', default='/tmp/impala-logs')
 parser.add_option('--minidump_dir', default='minidumps')
 parser.add_option('--start_time', default=None, type='int')
 parser.add_option('--end_time', default=None, type='int')
 parser.add_option('--duration', default=3600, type='int',
     help="if start and end time are not set, they will be calculated based on this value")
 parser.add_option('--num_minidumps', default=20, type='int')

 options, args = parser.parse_args()

 CONFIG_FILE = '''-beeswax_port=21000
 -fe_port=21000
 -be_port=22000
 -hs2_port=21050
 -enable_webserver=true
 -mem_limit=108232130560
 -max_log_files=10
 -webserver_port=25000
 -max_result_cache_size=100000
 -state_store_subscriber_port=23000
 -statestore_subscriber_timeout_seconds=30
 -scratch_dirs=/data/1/impala/impalad,/data/10/impala/impalad,/data/11/impala/impalad
 -default_query_options
 -log_filename=impalad
 -audit_event_log_dir=/var/log/impalad/audit
 -max_audit_event_log_file_size=5000
 -abort_on_failed_audit_event=false
 -lineage_event_log_dir=/var/log/impalad/lineage
 -log_dir={0}
 -minidump_path={1}
 -max_lineage_log_file_size=5000
 -hostname=host1.example.com
 -state_store_host=host2.example.com
 -state_store_port=24000
 -catalog_service_host=host2.example.com
 -catalog_service_port=26000
 -local_library_dir=/var/lib/impala/udfs
 -disk_spill_encryption=false
 -abort_on_config_error=true'''

 ROLE_NAMES = {'impalad': 'impalad_flags',
     'statestored': 'state_store_flags',
     'catalogd':  'catalogserver_flags'}

 def generate_conf_files():
   try:
     os.makedirs(options.conf_dir)
   except OSError as e:
     if e.errno == errno.EEXIST and os.path.isdir(options.conf_dir):
       pass
     else:
       raise e
   for role_name in ROLE_NAMES:
     with open(os.path.join(options.conf_dir, ROLE_NAMES[role_name]), 'w') as f:
       f.write(CONFIG_FILE.format(options.log_dir, options.minidump_dir))

 def random_bytes(num):
   return ''.join(chr(random.randint(0, 255)) for _ in range(num))

 def write_minidump(common_data, timestamp, target_dir):
   '''Generate and write the minidump into the target_dir. atime and mtime of the minidump
   will be set to timestamp.'''
   file_name = ''.join(random.choice('abcdefghijklmnopqrstuvwxyz') for _ in xrange(10))
   with open(os.path.join(target_dir, file_name), 'wb') as f:
     # We want the minidump to be pretty similar to each other. The number 8192 was chosen
     # arbitratily and seemed like a reasonable guess.
     unique_data = random_bytes(8192)
     f.write(unique_data)
     f.write(common_data)
   os.utime(os.path.join(target_dir, file_name), (timestamp, timestamp))

 def generate_minidumps():
   if options.start_time is None or options.end_time is None:
     start_timestamp = time.time() - options.duration
     end_timestamp = time.time()
   else:
     start_timestamp = options.start_time
     end_timestamp = options.end_time
   minidump_dir = options.minidump_dir
   if not os.path.isabs(minidump_dir):
     minidump_dir = os.path.join(options.log_dir, minidump_dir)
   if os.path.exists(minidump_dir):
     shutil.rmtree(minidump_dir)
   for role_name in ROLE_NAMES:
     os.makedirs(os.path.join(minidump_dir, role_name))
     # We want the files to have a high compression ratio and be several megabytes in size.
     # The parameters below should accomplish this.
     repeated_token = random_bytes(256)
     common_data = repeated_token * 40000
     if options.num_minidumps == 1:
       interval = 0
     else:
       interval = (end_timestamp - start_timestamp) // (options.num_minidumps - 1)
     for i in xrange(options.num_minidumps):
       write_minidump(common_data,
           start_timestamp + interval * i,
           os.path.join(minidump_dir, role_name))

 def main():
   generate_conf_files()
   generate_minidumps()

 if __name__ == '__main__':
   main()
	#!/usr/bin/env impala-python
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	# This script generates testdata for collect_minidumps.py. 3 text files will be created
	# containing statup flags for each of the services in (conf_dir)/impalad_flags,
	# (conf_dir)/state_store_flags, and (conf_dir)/catalogsever_flags. Each of those files
	# will have a parameter -minidump_path. Each path will look like (minidump_dir)/impalad,
	# (minidump_dir)/catalogd, (minidump_dir)/statestored. Sample minidump files will be
	# generated and placed into each of those directories. It is possible to control the
	# minidump file timestamps by specifying the start_time and end_time. The timestamps will
	# be spaced evenly in the interval. Alternatively, duration can be specified which will
	# create the files in the interval [now - duration, now]. Minidumps are simulated by
	# making the files easily compressible by having some repeated data.

	import errno
	import os
	import random
	import shutil
	import time

	from optparse import OptionParser

	parser = OptionParser()
	parser.add_option('--conf_dir', default='/tmp/impala-conf')
	parser.add_option('--log_dir', default='/tmp/impala-logs')
	parser.add_option('--minidump_dir', default='minidumps')
	parser.add_option('--start_time', default=None, type='int')
	parser.add_option('--end_time', default=None, type='int')
	parser.add_option('--duration', default=3600, type='int',
	help="if start and end time are not set, they will be calculated based on this value")
	parser.add_option('--num_minidumps', default=20, type='int')

	options, args = parser.parse_args()

	CONFIG_FILE = '''-beeswax_port=21000
	-fe_port=21000
	-be_port=22000
	-hs2_port=21050
	-enable_webserver=true
	-mem_limit=108232130560
	-max_log_files=10
	-webserver_port=25000
	-max_result_cache_size=100000
	-state_store_subscriber_port=23000
	-statestore_subscriber_timeout_seconds=30
	-scratch_dirs=/data/1/impala/impalad,/data/10/impala/impalad,/data/11/impala/impalad
	-default_query_options
	-log_filename=impalad
	-audit_event_log_dir=/var/log/impalad/audit
	-max_audit_event_log_file_size=5000
	-abort_on_failed_audit_event=false
	-lineage_event_log_dir=/var/log/impalad/lineage
	-log_dir={0}
	-minidump_path={1}
	-max_lineage_log_file_size=5000
	-hostname=host1.example.com
	-state_store_host=host2.example.com
	-state_store_port=24000
	-catalog_service_host=host2.example.com
	-catalog_service_port=26000
	-local_library_dir=/var/lib/impala/udfs
	-disk_spill_encryption=false
	-abort_on_config_error=true'''

	ROLE_NAMES = {'impalad': 'impalad_flags',
	'statestored': 'state_store_flags',
	'catalogd': 'catalogserver_flags'}

	def generate_conf_files():
	try:
	os.makedirs(options.conf_dir)
	except OSError as e:
	if e.errno == errno.EEXIST and os.path.isdir(options.conf_dir):
	pass
	else:
	raise e
	for role_name in ROLE_NAMES:
	with open(os.path.join(options.conf_dir, ROLE_NAMES[role_name]), 'w') as f:
	f.write(CONFIG_FILE.format(options.log_dir, options.minidump_dir))

	def random_bytes(num):
	return ''.join(chr(random.randint(0, 255)) for _ in range(num))

	def write_minidump(common_data, timestamp, target_dir):
	'''Generate and write the minidump into the target_dir. atime and mtime of the minidump
	will be set to timestamp.'''
	file_name = ''.join(random.choice('abcdefghijklmnopqrstuvwxyz') for _ in xrange(10))
	with open(os.path.join(target_dir, file_name), 'wb') as f:
	# We want the minidump to be pretty similar to each other. The number 8192 was chosen
	# arbitratily and seemed like a reasonable guess.
	unique_data = random_bytes(8192)
	f.write(unique_data)
	f.write(common_data)
	os.utime(os.path.join(target_dir, file_name), (timestamp, timestamp))

	def generate_minidumps():
	if options.start_time is None or options.end_time is None:
	start_timestamp = time.time() - options.duration
	end_timestamp = time.time()
	else:
	start_timestamp = options.start_time
	end_timestamp = options.end_time
	minidump_dir = options.minidump_dir
	if not os.path.isabs(minidump_dir):
	minidump_dir = os.path.join(options.log_dir, minidump_dir)
	if os.path.exists(minidump_dir):
	shutil.rmtree(minidump_dir)
	for role_name in ROLE_NAMES:
	os.makedirs(os.path.join(minidump_dir, role_name))
	# We want the files to have a high compression ratio and be several megabytes in size.
	# The parameters below should accomplish this.
	repeated_token = random_bytes(256)
	common_data = repeated_token * 40000
	if options.num_minidumps == 1:
	interval = 0
	else:
	interval = (end_timestamp - start_timestamp) // (options.num_minidumps - 1)
	for i in xrange(options.num_minidumps):
	write_minidump(common_data,
	start_timestamp + interval * i,
	os.path.join(minidump_dir, role_name))

	def main():
	generate_conf_files()
	generate_minidumps()

	if __name__ == '__main__':
	main()