blob: 40f29d3d2058b3a98264c24787d7c898d0644b01 [file] [log] [blame]
#!/usr/bin/env python
# -----------------------------------------------------------------------
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# -----------------------------------------------------------------------
#
# This script reads a test configuration and updates the test job descriptions
# accordingly.
#
# The job desciprions contain a collection of "elapsed times", one for each
# workitem. Each AE sleeps for this time to simulate compuing. The driver
# program, "runducc" submits the jobs accordng to a schedule generated by
# this program.
#
# Each simulated job contains a simulated "start time" based on actual
# usage in a development cluster. This is used to calculate a total elapsed
# time for the run. The test configuration specifies a comprssion rate and
# spread, used thus:
# Each actual elapsesd time for the job is divided by the compression rate.
# The set of jobs is submitted with random spacing over the time specified
# by the spread.
# Knowing the original time span for the jobs, it is possible to execute a
# compressed run somewhat similar to the original.
#
# The configuration file also allows weighted-random selection of job memories,
# job classes, and service assignments (for running service tests).
#
import os
import sys
import getopt
import random
DUCC_HOME = os.path.abspath(__file__ + '/../../..')
sys.path.append(DUCC_HOME + '/admin')
from ducc_util import DuccUtil
from properties import Properties
from ducc import Ducc
class Prepare(DuccUtil):
def usage(self, msg):
if ( msg != None ):
print msg
print 'Usage:'
print ' prepare.py <properties>'
sys.exit(1)
def error(self, *str):
print ' '.join(str)
sys.exit(1)
def toint(self, props, name, dflt=None):
val = props.get(name)
if ( val == None ):
if ( dflt == None ):
self.error("Missing required property", name)
else:
return dflt
try:
return int(val)
except:
self.error("Property", name, "is not an int:", val)
def toboolean(self, props, name, dflt=None):
if ( str == None ):
return deflt
return str in ['t', 'T', 'true', 'True', 'TRUE','y', 'Y', 'yes', 'Yes', 'YES']
def toarray(self, props, name):
'''
Read 'name' from props, whose value is blank delimeted strings. Each string is
used as an associative index 'ndx' into a map. Construct the index
'name.ndx' for each index, look up the value in the props file, and return a
map (dictionary) of the values.
e.g the properties below this map: {'37': '50', '28': '50'}
job.memory = 28 37
job.memory.28 = 50
job.memory.37 = 50
'''
stem = props.get(name)
if ( stem == None ):
self.error('No such property:', name)
vals = stem.split()
ret = {}
for val in vals:
d = props.get(name + '.' + val)
if ( d == None ):
self.error('No such property:', d)
ret[val] = d
return ret
def show(self, vals, tag):
tally = {}
for v in vals:
if ( tally.has_key(v) ):
tally[v] = tally[v] + 1
else:
tally[v] = 1
for k in tally.keys():
print '%10s tally: %12s --> %d' % (tag, k, tally[k])
def distributeParameters(self, parmset, count, tag):
'''
Randomly assign the values in the memory set to the jobs. Otherwise use the supplied values.
parmset is a map where the keys are the set of memory values to set, and the values
are the weights used to distribute the memories among the jobs.
count is the number of things we need to distribute the stuff in parmset over
'''
denom = 0
for v in parmset.values():
denom = denom + int(v)
ndx = 0
tmp = []
ovfl = []
# set up an array (list) with the target values according the their configured distribution
for k in parmset.keys():
num = float(parmset[k])
ovfl.append(k)
val = ((num / denom) * count)
bound = int(round(val))
for j in range(0, bound):
tmp.append(k)
ndx = ndx + 1
# deal with leftovers (non-integral solution to the loop above)
if ( ndx < count ):
while ( ndx < count ):
x = ovfl[ random.randint(0, (len(ovfl)-1)) ]
tmp.append(x)
ndx = ndx + 1
# gotta love python - now randomly shuffle the values
random.shuffle(tmp)
self.show(tmp, tag)
return tmp
def writeControlFile(self, allfiles):
'''
allfiles is a list of tuples. Each tuple is (submittod, filename). The list is
sorted by tod so all we have to do is bop through it picking up files in the
right order. We calculate the "spread" - the approximate elapsed time of the test
from the submit tod and compression and write the control file accordingly.
'''
size = len(allfiles)
files_per_interval = self.spread / size
range = files_per_interval * 2
total = 0
print 'Size', size, 'files-per-interval', files_per_interval, 'range', range
outf = open(self.testdir + '/job.ctl', 'w')
for (tod, fname) in allfiles:
outf.write('s -c ' + str(self.compression) + ' ' + fname + '\n')
delay = random.randint(0, range)
outf.write('[sleep ' + str(delay) + 'S]\n\n')
total += delay
outf.close()
print 'Spread:', self.spread, 'actual:', total
def run(self):
print 'Prepare starts...'
if ( self.randomseed == 'TOD' ):
random.seed()
else:
random.seed(int(self.randomseed))
files = os.listdir(self.srcdir)
count = len(files)
mem_assignments = self.distributeParameters(self.memory, count, 'memory')
class_assignments = self.distributeParameters(self.classes, count, 'class')
if ( self.services != None ):
service_assignments = self.distributeParameters(self.services, count, 'services')
if ( not os.path.exists(self.destdir) ):
os.makedirs(self.destdir)
ndx = 0
allfiles = []
for f in files:
props = Properties()
props.load(self.srcdir + '/' + f)
cls_assignment = class_assignments[ndx]
mem_override = self.maxmem[cls_assignment]
if ( mem_override == None ):
props.put('memory', mem_assignments[ndx])
else:
props.put('memory', mem_override)
props.put('class' , cls_assignment)
if ( self.services != None ):
props.put('services', service_assignments[ndx])
process_override = self.maxproc[cls_assignment]
if ( process_override != None ): # force override of max_processes
props.put('machines', process_override)
type_override = self.type[cls_assignment]
if ( type_override != None ): # force override of max_processes
props.put('type', 'reserve')
fname = self.destdir + '/' + f
allfiles.append( (int(props.get('tod')), fname) )
props.write(self.destdir + '/' + f)
ndx = ndx + 1
self.writeControlFile(sorted(allfiles))
def from_keys(self, props, base, stem):
'''
Given a base dictionary, "base", use its keyset to form a property from the
stem and look for it in props.
Assign None as the value if not found.
'''
ret = {}
for bk in base.keys():
k = stem + '.' + bk
print 'LOOKING FOR', k
if ( props.has_key(k) ):
ret[bk] = props.get(k)
else:
ret[bk] = None
return ret
def main(self, argv):
if ( len(argv) < 1 ):
self.usage("Missing 'prepare' properties.")
inprops = argv[0]
if inprops in ('-h', '-?', '--help', '-help'):
self.usage(None)
self.testdir = os.path.dirname(inprops)
props = Properties()
props.load(inprops)
self.srcdir = self.testdir + '/' + props.get('src.dir')
self.destdir = self.testdir + '/' + props.get('dest.dir')
self.spread = self.toint (props, 'submission.spread')
self.compression = self.toint (props, 'compression' , 1)
self.classes = self.toarray (props, 'scheduling.classes')
self.maxproc = self.from_keys(props, self.classes, 'scheduling.maxproc')
self.maxmem = self.from_keys(props, self.classes, 'scheduling.maxmem')
self.type = self.from_keys(props, self.classes, 'scheduling.type')
self.memory = self.toarray (props, 'job.memory')
self.randomseed = props.get('random.seed')
if ( props.get('job.services') == None ):
self.services = None
else:
self.services = self.toarray (props, 'job.services')
print 'Running with'
print ' properties :', inprops
print ' testdir :', self.testdir
print ' srcdir :', self.srcdir
print ' destdir :', self.destdir
print ' spread :', self.spread
print ' compression :', self.compression
print ' classes :', self.classes
print ' maxproc :', self.maxproc
print ' maxmem :', self.maxmem
print ' type :', self.type
print ' memory :', self.memory
print ' services :', self.services
print ' randomseed :', self.randomseed
self.run()
if __name__ == "__main__":
prepare = Prepare()
prepare.main(sys.argv[1:])