blob: 3f53d3bb9f5f6a2151eb0e21210aec4dab0b64df [file] [log] [blame]
#!/usr/bin/env python
# -----------------------------------------------------------------------
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# -----------------------------------------------------------------------
import os
import sys
import getopt
#
# This is a one-time-use utility to move your history and checkpoint files
# int the database. It will not run if the database is already populated.
#
# If you need to run it again, you must first remove or drop your existing database.
#
from ducc_util import DuccUtil
class DbLoader(DuccUtil):
def __init__(self):
DuccUtil.__init__(self, True)
def usage(self, *args):
if ( args[0] != None ):
print ''
print ' '.join(args)
print ''
print 'Usage:'
print ''
print 'db_loader -i runtime [-t #threads] [--no-archive]'
print ''
print 'Where:'
print ' -i runtime'
print ' Specifies the DUCC_HOME of the DUCC runtime to be moved into the database.'
print ''
print ' -t #threads'
print ' Specifies number of processing threads (default is 10)'
print ''
print ' --no-archive'
print ' Suppresses archival of the input files.'
print ''
print 'Notes:'
print ' 1. Because this is a bootstrap script DUCC must be stopped.'
print ' The database will be started & stopped by this script.'
print ' 2. Reducing the number of processing threads will reduce the CPU load but'
print ' increase the processing time.'
print ' 3. Archival consists of renaming relevent input directories and files by appending'
print ' ".archive" to their names. It is fully non-destructive and may be reversed by'
print ' manually renaming them back to their original names.'
sys.exit(1);
def main(self, argv):
in_home = None
out_url = None
archive = True
nthreads = '10'
try:
opts, args = getopt.getopt(argv, 'i:t:h?', ['no-archive'])
except:
self.usage("Invalid arguments", ' '.join(argv))
for ( o, a ) in opts:
if o in ('-i'):
in_home = a
elif o in ('-t'):
nthreads = a
elif o in ('--no-archive'):
archive = False
else:
self.usage(None)
if ( in_home == None ):
self.usage("Missing input DUCC_HOME")
out_url = self.ducc_properties.get('ducc.database.host')
if ( out_url == None ):
self.usage("Cannot find 'ducc.database.host' in your properties file.");
self.db_start()
if ( not self.db_alive(3) ):
print "Database is not running or cannot be contacted."
return
DUCC_JVM_OPTS = '-DDUCC_HOME=' + self.DUCC_HOME
if ( not archive ):
DUCC_JVM_OPTS = DUCC_JVM_OPTS + ' -DDONT_ARCHIVE'
CMD = [self.java(), DUCC_JVM_OPTS, 'org.apache.uima.ducc.database.DbLoader', in_home, out_url, nthreads]
CMD = ' '.join(CMD)
os.environ['CLASSPATH'] = os.environ['CLASSPATH'] + ':' + self.DUCC_HOME + "/resources"
print 'CLASSPATH', os.environ['CLASSPATH']
print 'Executing', CMD
os.system(CMD)
self.db_stop()
if __name__ == "__main__":
console = DbLoader()
console.main(sys.argv[1:])