scheduler-simple/src/main/java/org/apache/ode/scheduler/simple/SimpleScheduler.java - ode - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *    http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 package org.apache.ode.scheduler.simple;

 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.ode.bpel.clapi.ClusterManager;
 import org.apache.ode.bpel.clapi.ClusterMemberListener;
 import org.apache.ode.bpel.iapi.ContextException;
 import org.apache.ode.bpel.iapi.Scheduler;

 import javax.transaction.*;
 import java.text.DateFormat;
 import java.text.SimpleDateFormat;
 import java.util.*;
 import java.util.concurrent.*;
 import java.util.concurrent.atomic.AtomicLong;

 /**
  * A reliable and relatively simple scheduler that uses a database to persist information about
  * scheduled tasks.
  *
  * The challenge is to achieve high performance in a small memory footprint without loss of reliability
  * while supporting distributed/clustered configurations.
  *
  * The design is based around three time horizons: "immediate", "near future", and "everything else".
  * Immediate jobs (i.e. jobs that are about to be up) are written to the database and kept in
  * an in-memory priority queue. When they execute, they are removed from the database. Near future
  * jobs are placed in the database and assigned to the current node, however they are not stored in
  * memory. Periodically jobs are "upgraded" from near-future to immediate status, at which point they
  * get loaded into memory. Jobs that are further out in time, are placed in the database without a
  * node identifer; when they are ready to be "upgraded" to near-future jobs they are assigned to one
  * of the known live nodes. Recovery is rather straighforward, with stale node identifiers being
  * reassigned to known good nodes.
  *
  * @author Maciej Szefler ( m s z e f l e r @ g m a i l . c o m )
  *
  */
 public class SimpleScheduler implements Scheduler, TaskRunner, ClusterMemberListener {
     private static final Logger __log = LoggerFactory.getLogger(SimpleScheduler.class);

     private static final int DEFAULT_TRANSACTION_TIMEOUT = 60 * 1000;

     /**
      * Jobs scheduled with a time that is between [now, now+immediateInterval] will be assigned to the current node, and placed
      * directly on the todo queue.
      */
     long _immediateInterval = 30000;

     /**
      * Jobs scheduled with a time that is between (now+immediateInterval,now+nearFutureInterval) will be assigned to the current
      * node, but will not be placed on the todo queue (the promoter will pick them up).
      */
     long _nearFutureInterval = 10 * 60 * 1000;

     /** 10s of no communication and you are deemed dead. */
     long _staleInterval = 10000;

     /** Duration used to log a warning if a job scheduled at a date D is queued at D'>D+_warningDelay */
     long _warningDelay = 5*60*1000;

     /**
      * Estimated sustained transaction per second capacity of the system.
      * e.g. 100 means the system can process 100 jobs per seconds, on average
      * This number is used to determine how many jobs to load from the database at once.
      */
     int _tps = 100;

     TransactionManager _txm;

     ExecutorService _exec;

     String _nodeId;

     /** Maximum number of jobs in the "near future" / todo queue. */
     int _todoLimit = 10000;

     /** The object that actually handles the jobs. */
     volatile JobProcessor _jobProcessor;

     volatile JobProcessor _polledRunnableProcessor;

     private SchedulerThread _todo;

     private DatabaseDelegate _db;

     private boolean _isClusterEnabled;

     private ClusterManager _clusterManager;

     /** All the nodes we know about */
     private CopyOnWriteArraySet<String> _knownNodes = new CopyOnWriteArraySet<String>();

     /** All the stale nodes */
     private CopyOnWriteArraySet<String> _staleNodes = new CopyOnWriteArraySet<String>();

     /** When we last heard from our nodes. */
     //private ConcurrentHashMap<String, Long> _lastHeartBeat = new ConcurrentHashMap<String, Long>();

     /** Set of outstanding jobs, i.e., jobs that have been enqueued but not dequeued or dispatched yet.
         Used to avoid cases where a job would be dispatched twice if the server is under high load and
         does not fully process a job before it is reloaded from the database. */
     private ConcurrentHashMap<String, Long> _outstandingJobs = new ConcurrentHashMap<String, Long>();
     /** Set of Jobs processed since the last LoadImmediate task.
         This prevents a race condition where a job is processed twice. This could happen if a LoadImediate tasks loads a job
         from the db before the job is processed but puts it in the _outstandingJobs map after the job was processed .
         In such a case the job is no longer in the _outstandingJobs map, and so it's queued again. */
     private ConcurrentHashMap<String, Long> _processedSinceLastLoadTask = new ConcurrentHashMap<String, Long>();

     private boolean _running;

     /** Time for next upgrade. */
     private AtomicLong _nextUpgrade = new AtomicLong();

     private Random _random = new Random();

     private long _pollIntervalForPolledRunnable = Long.getLong("org.apache.ode.polledRunnable.pollInterval", 10 * 60 * 1000);

     /** Number of immediate retries when the transaction fails **/
     private int _immediateTransactionRetryLimit = 3;

     /** Interval between immediate retries when the transaction fails **/
     private long _immediateTransactionRetryInterval = 1000;

     private DateFormat debugDateFormatter = new SimpleDateFormat("HH:mm:ss,SSS");

     public SimpleScheduler(String nodeId, DatabaseDelegate del, Properties conf) {
        this(nodeId,del,conf,false);
     }

     public SimpleScheduler(String nodeId, DatabaseDelegate del, Properties conf, boolean clusterState) {
         _nodeId = nodeId;
         _db = del;
         _isClusterEnabled = clusterState;
         _todoLimit = getIntProperty(conf, "ode.scheduler.queueLength", _todoLimit);
         _immediateInterval = getLongProperty(conf, "ode.scheduler.immediateInterval", _immediateInterval);
         _nearFutureInterval = getLongProperty(conf, "ode.scheduler.nearFutureInterval", _nearFutureInterval);
         _staleInterval = getLongProperty(conf, "ode.scheduler.staleInterval", _staleInterval);
         _tps = getIntProperty(conf, "ode.scheduler.transactionsPerSecond", _tps);
         _warningDelay =  getLongProperty(conf, "ode.scheduler.warningDelay", _warningDelay);

         _immediateTransactionRetryLimit = getIntProperty(conf, "ode.scheduler.immediateTransactionRetryLimit", _immediateTransactionRetryLimit);
         _immediateTransactionRetryInterval = getLongProperty(conf, "ode.scheduler.immediateTransactionRetryInterval", _immediateTransactionRetryInterval);

         _todo = new SchedulerThread(this);
     }

     public void setPollIntervalForPolledRunnable(long pollIntervalForPolledRunnable) {
         _pollIntervalForPolledRunnable = pollIntervalForPolledRunnable;
     }

     private int getIntProperty(Properties props, String propName, int defaultValue) {
         String s = props.getProperty(propName);
         if (s != null) return Integer.parseInt(s);
         else return defaultValue;
     }

     private long getLongProperty(Properties props, String propName, long defaultValue) {
         String s = props.getProperty(propName);
         if (s != null) return Long.parseLong(s);
         else return defaultValue;
     }

     public void setNodeId(String nodeId) {
         _nodeId = nodeId;
     }

     public void setClusterManager(ClusterManager cm) {
         _clusterManager = cm;
     }

     public void setStaleInterval(long staleInterval) {
         _staleInterval = staleInterval;
     }

     public void setImmediateInterval(long immediateInterval) {
         _immediateInterval = immediateInterval;
     }

     public void setNearFutureInterval(long nearFutureInterval) {
         _nearFutureInterval = nearFutureInterval;
     }

     public void setTransactionsPerSecond(int tps) {
         _tps = tps;
     }

     public void setTransactionManager(TransactionManager txm) {
         _txm = txm;
     }

     public void setDatabaseDelegate(DatabaseDelegate dbd) {
         _db = dbd;
     }

     public void setExecutorService(ExecutorService executorService) {
         _exec = executorService;
     }

     public void setPolledRunnableProcesser(JobProcessor polledRunnableProcessor) {
         _polledRunnableProcessor = polledRunnableProcessor;
     }

     public void cancelJob(String jobId) throws ContextException {
         _todo.dequeue(new Job(0, jobId, false, null));
         _outstandingJobs.remove(jobId);
         try {
             _db.deleteJob(jobId, _nodeId);
         } catch (DatabaseException e) {
             __log.debug("Job removal failed.", e);
             throw new ContextException("Job removal failed.", e);
         }
     }

     public <T> Future<T> execIsolatedTransaction(final Callable<T> transaction) throws Exception, ContextException {
         return _exec.submit(new Callable<T>() {
             public T call() throws Exception {
                 try {
                     return execTransaction(transaction);
                 } catch (Exception e) {
                     __log.error("An exception occured while executing an isolated transaction, " +
                             "the transaction is going to be abandoned.", e);
                     return null;
                 }
             }
         });
     }

     public <T> T execTransaction(Callable<T> transaction) throws Exception, ContextException {
         return execTransaction(transaction, 0);
     }

     public <T> T execTransaction(Callable<T> transaction, int timeout) throws Exception, ContextException {
         TransactionManager txm = _txm;
         if( txm == null ) {
             throw new ContextException("Cannot locate the transaction manager; the server might be shutting down.");
         }

         // The value of the timeout is in seconds. If the value is zero, the transaction service restores the default value.
         if (timeout < 0) {
            throw new IllegalArgumentException("Timeout must be positive, received: "+timeout);
         }

         boolean existingTransaction = false;
         try {
             existingTransaction = txm.getTransaction() != null;
         } catch (Exception ex) {
             String errmsg = "Internal Error, could not get current transaction.";
             throw new ContextException(errmsg, ex);
         }

         // already in transaction, execute and return directly
         if (existingTransaction) {
             return transaction.call();
         }

         // run in new transaction
         Exception ex = null;
         int immediateRetryCount = _immediateTransactionRetryLimit;

         _txm.setTransactionTimeout(timeout);
         if(__log.isDebugEnabled() && timeout!=0) __log.debug("Custom transaction timeout: "+timeout);
         try {
             do {
                 try {
                     if (__log.isDebugEnabled()) __log.debug("Beginning a new transaction");
                     txm.begin();
                 } catch (Exception e) {
                     String errmsg = "Internal Error, could not begin transaction.";
                     throw new ContextException(errmsg, e);
                 }

                 try {
                     ex = null;
                     return transaction.call();
                 } catch (Exception e) {
                     ex = e;
                 } finally {
                     if (ex == null) {
                         if (__log.isDebugEnabled()) __log.debug("Commiting on " + txm + "...");
                         try {
                             txm.commit();
                         } catch( Exception e2 ) {
                             ex = e2;
                         }
                     } else {
                         if (__log.isDebugEnabled()) __log.debug("Rollbacking on " + txm + "...");
                         txm.rollback();
                     }

                     if( ex != null && immediateRetryCount > 0 ) {
                         if (__log.isDebugEnabled())  __log.debug("Will retry the transaction in " + _immediateTransactionRetryInterval + " msecs on " + _txm + " for error: ", ex);
                         Thread.sleep(_immediateTransactionRetryInterval);
                     }
                 }
             } while( immediateRetryCount-- > 0 );
         } finally {
             // 0 restores the default value
             _txm.setTransactionTimeout(0);
         }

         throw ex;
     }

     public void setRollbackOnly() throws Exception {
         TransactionManager txm = _txm;
         if( txm == null ) {
             throw new ContextException("Cannot locate the transaction manager; the server might be shutting down.");
         }

         txm.setRollbackOnly();
     }

     public void registerSynchronizer(final Synchronizer synch) throws ContextException {
         TransactionManager txm = _txm;
         if( txm == null ) {
             throw new ContextException("Cannot locate the transaction manager; the server might be shutting down.");
         }

         try {
             txm.getTransaction().registerSynchronization(new Synchronization() {

                 public void beforeCompletion() {
                     synch.beforeCompletion();
                 }

                 public void afterCompletion(int status) {
                     synch.afterCompletion(status == Status.STATUS_COMMITTED);
                 }

             });
         } catch (Exception e) {
             throw new ContextException("Unable to register synchronizer.", e);
         }
     }

     public String schedulePersistedJob(final JobDetails jobDetail, Date when) throws ContextException {
         long ctime = System.currentTimeMillis();
         if (when == null)
             when = new Date(ctime);

         if (__log.isDebugEnabled())
             __log.debug("scheduling " + jobDetail + " for " + when);

         return schedulePersistedJob(new Job(when.getTime(), true, jobDetail), when, ctime);
     }

     public String scheduleMapSerializableRunnable(MapSerializableRunnable runnable, Date when) throws ContextException {
         long ctime = System.currentTimeMillis();
         if (when == null)
             when = new Date(ctime);

         JobDetails jobDetails = new JobDetails();
         jobDetails.getDetailsExt().put("runnable", runnable);
         runnable.storeToDetails(jobDetails);

         if (__log.isDebugEnabled())
             __log.debug("scheduling " + jobDetails + " for " + when);

         return schedulePersistedJob(new Job(when.getTime(), true, jobDetails), when, ctime);
     }

     private String schedulePersistedJob(Job job, Date when, long ctime) throws ContextException {
         boolean immediate = when.getTime() <= ctime + _immediateInterval;
         boolean nearfuture = !immediate && when.getTime() <= ctime + _nearFutureInterval;
         try {
             if (immediate) {
                 // Immediate scheduling means we put it in the DB for safe keeping
                 _db.insertJob(job, _nodeId, true);

                 // And add it to our todo list .
                 if (_outstandingJobs.size() < _todoLimit) {
                     addTodoOnCommit(job);
                 }
                 if (__log.isDebugEnabled()) {
                     __log.debug("scheduled immediate job: " + job.jobId);
                 }
             } else if (nearfuture) {
                 // Near future, assign the job to ourselves (why? -- this makes it very unlikely that we
                 // would get two nodes trying to process the same instance, which causes unsightly rollbacks).
                 _db.insertJob(job, _nodeId, false);
                 if (__log.isDebugEnabled()) {
                     __log.debug("scheduled near-future job: " + job.jobId);
                 }
             } else /* far future */ {
                 // Not the near future, we don't assign a node-id, we'll assign it later.
                 _db.insertJob(job, null, false);
                 if (__log.isDebugEnabled()) {
                     __log.debug("scheduled far-future job: " + job.jobId);
                 }
             }
         } catch (DatabaseException dbe) {
             __log.error("Database error.", dbe);
             throw new ContextException("Database error.", dbe);
         }
         return job.jobId;
     }

     public String scheduleVolatileJob(boolean transacted, JobDetails jobDetail) throws ContextException {
         return scheduleVolatileJob(transacted, jobDetail, null);
     }

     public String scheduleVolatileJob(boolean transacted, JobDetails jobDetail, Date when) throws ContextException {
         long ctime = System.currentTimeMillis();
         if (when == null)
             when = new Date(ctime);
         Job job = new Job(when.getTime(), transacted, jobDetail);
         job.persisted = false;
         addTodoOnCommit(job);
         return job.toString();
     }

     public void setJobProcessor(JobProcessor processor) throws ContextException {
         _jobProcessor = processor;
     }

     public void shutdown() {
         stop();
         _jobProcessor = null;
         _txm = null;
         _todo = null;
     }

     public synchronized void start() {
         if (_running)
             return;

         if (Boolean.parseBoolean(System.getProperty("org.apache.ode.scheduler.deleteJobsOnStart", "false"))) {
             __log.debug("DeleteJobsOnStart");
             try {
                 execTransaction(new Callable<Void>() {
                     public Void call() throws Exception {
                         _db.deleteAllJobs();
                         return null;
                     }
                 });
             } catch (Exception ex) {
                 __log.error("", ex);
                 throw new ContextException("", ex);
             }
         } else {
             __log.debug("no DeleteJobsOnStart");
         }

         if (_exec == null)
             _exec = Executors.newCachedThreadPool();

         _todo.clearTasks(UpgradeJobsTask.class);
         _todo.clearTasks(LoadImmediateTask.class);
         _todo.clearTasks(CheckStaleNodes.class);
         _processedSinceLastLoadTask.clear();
         _outstandingJobs.clear();

         _knownNodes.clear();
         _staleNodes.clear();

         long now = System.currentTimeMillis();

         // Pretend we got a heartbeat...
         //for (String s : _knownNodes) _lastHeartBeat.put(s, now);

         // schedule immediate job loading for now!
         _todo.enqueue(new LoadImmediateTask(now));

         if(!_isClusterEnabled) enqueueTasksReadnodeIds(now);

         else {
             if (_clusterManager.isMaster()) enqueueTasksReadnodeIds(now);
         }

         _todo.start();
         _running = true;
     }

     private long randomMean(long mean) {
         return (long) _random.nextDouble() * mean + (mean/2);
     }

     public synchronized void stop() {
         if (!_running)
             return;

         _todo.stop();
         _todo.clearTasks(UpgradeJobsTask.class);
         _todo.clearTasks(LoadImmediateTask.class);
         _todo.clearTasks(CheckStaleNodes.class);
         _processedSinceLastLoadTask.clear();
         _outstandingJobs.clear();

         // disable because this is not the right way to do it
         // will be fixed by ODE-595
         // graceful shutdown; any new submits will throw RejectedExecutionExceptions
 //        _exec.shutdown();
         _running = false;
     }

     public void memberAdded(String nodeId) {
         _knownNodes.add(nodeId);
     }

     public void memberRemoved(String nodeId) {
         _staleNodes.add(nodeId);
     }

     // Do enqueue CheckStaleNodes and UpgradeJobsTask after a new master is identified.
     public void memberElectedAsMaster(String masterId) {
         long now = System.currentTimeMillis();
         enqueueTasksReadnodeIds(now);
     }

     private void enqueueTasksReadnodeIds(long now) {
         try {
             execTransaction(new Callable<Void>() {

                 public Void call() throws Exception {
                     _knownNodes.addAll(_db.getNodeIds());
                     return null;
                 }

             });
         } catch (Exception ex) {
             __log.error("Error retrieving node list.", ex);
             throw new ContextException("Error retrieving node list.", ex);
         }

         //make double sure all the active nodes are included into _knownNodes
         if(_isClusterEnabled) _knownNodes.addAll(_clusterManager.getActiveNodes());

         else _knownNodes.add(_nodeId);

         // schedule check for stale nodes, make it random so that the nodes don't overlap.
         _todo.enqueue(new CheckStaleNodes(now + randomMean(_staleInterval)));

         // do the upgrade sometime (random) in the immediate interval.
         _todo.enqueue(new UpgradeJobsTask(now + randomMean(_immediateInterval)));
     }

     class RunJob implements Callable<Void> {
         final Job job;
         final JobProcessor processor;

         RunJob(Job job, JobProcessor processor) {
             this.job = job;
             this.processor = processor;
         }

         public Void call() throws Exception {
             try {
                 final Scheduler.JobInfo jobInfo = new Scheduler.JobInfo(job.jobId, job.detail,
                         job.detail.getRetryCount());
                 if (job.transacted) {
                     final boolean[] needRetry = new boolean[]{true};
                     try {
                         execTransaction(new Callable<Void>() {
                             public Void call() throws Exception {
                                 if (job.persisted)
                                     if (!_db.deleteJob(job.jobId, _nodeId))
                                         throw new JobNoLongerInDbException(job.jobId, _nodeId);
                                 try {
                                     processor.onScheduledJob(jobInfo);
                                     // If the job is a "runnable" job, schedule the next job occurence
                                     if (job.detail.getDetailsExt().get("runnable") != null && !"COMPLETED".equals(String.valueOf(jobInfo.jobDetail.getDetailsExt().get("runnable_status")))) {
                                         // the runnable is still in progress, schedule checker to 10 mins later
                                         if (_pollIntervalForPolledRunnable < 0) {
                                             if (__log.isWarnEnabled())
                                                 __log.warn("The poll interval for polled runnables is negative; setting it to 1000ms");
                                             _pollIntervalForPolledRunnable = 1000;
                                         }
                                         job.schedDate = System.currentTimeMillis() + _pollIntervalForPolledRunnable;
                                         _db.insertJob(job, _nodeId, false);
                                     }
                                 } catch (JobProcessorException jpe) {
                                     if (!jpe.retry) {
                                         needRetry[0] = false;
                                     }
                                     // Let execTransaction know that shit happened.
                                     throw jpe;
                                 }
                                 return null;
                             }
                         });
                     } catch (JobNoLongerInDbException jde) {
                         // This may happen if two node try to do the same job... we try to avoid
                         // it the synchronization is a best-effort but not perfect.
                         __log.debug("job no longer in db forced rollback: "+job);
                     } catch (final Exception ex) {
                         __log.error("Error while processing a "+(job.persisted?"":"non-")+"persisted job"+(needRetry[0] && job.persisted?": ":", no retry: ")+job, ex);

                         // We only get here if the above execTransaction fails, so that transaction got
                         // rollbacked already
                         if (job.persisted) {
                             execTransaction(new Callable<Void>() {
                                 public Void call() throws Exception {
                                     if (needRetry[0]) {
                                         int retry = job.detail.getRetryCount() + 1;
                                         if (retry <= 10) {
                                             job.detail.setRetryCount(retry);
                                             long delay = (long)(Math.pow(5, retry));
                                             job.schedDate = System.currentTimeMillis() + delay*1000;
                                             _db.updateJob(job);
                                             __log.error("Error while processing job, retrying in " + delay + "s");
                                         } else {
                                             _db.deleteJob(job.jobId, _nodeId);
                                             __log.error("Error while processing job after 10 retries, no more retries:" + job);
                                         }
                                     } else {
                                         _db.deleteJob(job.jobId, _nodeId);
                                     }
                                     return null;
                                 }
                             });
                         }
                     }
                 } else {
                     processor.onScheduledJob(jobInfo);
                 }
                 return null;
             } finally {
                 // the order of these 2 actions is crucial to avoid a race condition.
                 _processedSinceLastLoadTask.put(job.jobId, job.schedDate);
                 _outstandingJobs.remove(job.jobId);
             }
         }
     }

     /**
      * Run a job in the current thread.
      *
      * @param job job to run.
      */
     protected void runJob(final Job job) {
         _exec.submit(new RunJob(job, _jobProcessor));
     }

      /**
      * Run a job from a polled runnable thread. The runnable is not persistent,
      * however, the poller is persistent and wakes up every given interval to
      * check the status of the runnable.
      * <ul>
      * <li>1. The runnable is being scheduled; the poller persistent job dispatches
      * the runnable to a runnable delegate thread and schedules itself to a later time.</li>
      * <li>2. The runnable is running; the poller job re-schedules itself every time it
      * sees the runnable is not completed.</li>
      * <li>3. The runnable failed; the poller job passes the exception thrown on the runnable
      * down, and the standard scheduler retries happen.</li>
      * <li>4. The runnable completes; the poller persistent does not re-schedule itself.</li>
      * <li>5. System powered off and restarts; the poller job does not know what the status
      * of the runnable. This is handled just like the case #1.</li>
      * </ul>
      * <p/>
      * There is at least one re-scheduling of the poller job. Since, the runnable's state is
      * not persisted, and the same runnable may be tried again after system failure,
      * the runnable that's used with this polling should be repeatable.
      *
      * @param job job to run.
      */
     protected void runPolledRunnable(final Job job) {
          _exec.submit(new RunJob(job, _polledRunnableProcessor));
     }

     private void addTodoOnCommit(final Job job) {
         registerSynchronizer(new Synchronizer() {
             public void afterCompletion(boolean success) {
                 if (success) {
                     enqueue(job);
                 }
             }

             public void beforeCompletion() {
             }
         });
     }

     public boolean isTransacted() {
         TransactionManager txm = _txm;
         if( txm == null ) {
             throw new ContextException("Cannot locate the transaction manager; the server might be shutting down.");
         }

         try {
             Transaction tx = txm.getTransaction();
             return (tx != null && tx.getStatus() != Status.STATUS_NO_TRANSACTION);
         } catch (SystemException e) {
             throw new ContextException("Internal Error: Could not obtain transaction status.");
         }
     }

     public void runTask(final Task task) {
         if (task instanceof Job) {
             Job job = (Job)task;
             if( job.detail.getDetailsExt().get("runnable") != null ) {
                 runPolledRunnable(job);
             } else {
                 runJob(job);
             }
         } else if (task instanceof SchedulerTask) {
             _exec.submit(new Callable<Void>() {
                 public Void call() throws Exception {
                     try {
                         ((SchedulerTask) task).run();
                     } catch (Exception ex) {
                         __log.error("Error during SchedulerTask execution", ex);
                     }
                     return null;
                 }
             });
         }
     }

     public void updateHeartBeat(String nodeId) {
         if (nodeId == null)
             return;

         /*if (_nodeId.equals(nodeId))
             return;*/

         //_lastHeartBeat.put(nodeId, System.currentTimeMillis());
         _knownNodes.add(nodeId);
     }

     boolean doLoadImmediate() {
         __log.debug("LOAD IMMEDIATE started");

         // don't load anything if we're already half-full;  we've got plenty to do already
         if (_outstandingJobs.size() > _todoLimit/2) return true;

         List<Job> jobs;
         try {
             // don't load more than we can chew
             final int batch = Math.min((int) (_immediateInterval * _tps / 1000), _todoLimit-_outstandingJobs.size());

             // jobs might have been enqueued by #addTodoOnCommit meanwhile
             if (batch<=0) {
                 if (__log.isDebugEnabled()) __log.debug("Max capacity reached: "+_outstandingJobs.size()+" jobs dispacthed i.e. queued or being executed");
                 return true;
             }

             if (__log.isDebugEnabled()) __log.debug("loading "+batch+" jobs from db");
             jobs = execTransaction(new Callable<List<Job>>() {
                 public List<Job> call() throws Exception {
                     return _db.dequeueImmediate(_nodeId, System.currentTimeMillis() + _immediateInterval, batch);
                 }
             });
             if (__log.isDebugEnabled()) __log.debug("loaded "+jobs.size()+" jobs from db");

             long delayedTime = System.currentTimeMillis() - _warningDelay;
             int delayedCount = 0;
             boolean runningLate;

             for (Job j : jobs) {
                 // jobs might have been enqueued by #addTodoOnCommit meanwhile
                 if (_outstandingJobs.size() >= _todoLimit){
                     if (__log.isDebugEnabled()) __log.debug("Max capacity reached: "+_outstandingJobs.size()+" jobs dispacthed i.e. queued or being executed");
                     break;
                 }
                 runningLate = j.schedDate <= delayedTime;
                 if (runningLate) {
                     delayedCount++;
                 }
                 if (__log.isDebugEnabled())
                     __log.debug("todo.enqueue job from db: " + j.jobId.trim() + " for " + j.schedDate + "(" + debugDateFormatter.format(j.schedDate)+") "+(runningLate?" delayed=true":""));
                 enqueue(j);
             }
             if (delayedCount > 0) {
                 __log.warn("Dispatching jobs with more than "+(_warningDelay/60000)+" minutes delay. Either the server was down for some time or the job load is greater than available capacity");
             }

             // clear only if the batch succeeded
             _processedSinceLastLoadTask.clear();
             return true;
         } catch (Exception ex) {
             __log.error("Error loading immediate jobs from database.", ex);
             return false;
         } finally {
             __log.debug("LOAD IMMEDIATE complete");
         }
     }

     void enqueue(Job job) {
         if (_processedSinceLastLoadTask.get(job.jobId) == null) {
             if (_outstandingJobs.putIfAbsent(job.jobId, job.schedDate) == null) {
                 if (job.schedDate <= System.currentTimeMillis()) {
                     runTask(job);
                 } else {
                     _todo.enqueue(job);
                 }
             } else {
               if (__log.isDebugEnabled()) __log.debug("Job "+job.jobId+" is being processed (outstanding job)");
             }
         } else {
             if (__log.isDebugEnabled()) __log.debug("Job "+job.jobId+" is being processed (processed since last load)");
         }
     }

     boolean doUpgrade() {
         __log.debug("UPGRADE started");
         final ArrayList<String> activeNodes;

         // for cluster mode
         if (_isClusterEnabled) {
             if (_clusterManager.isMaster()) {
                 activeNodes = (ArrayList) _clusterManager.getActiveNodes();
             } else activeNodes = null;
         }
         //for standalone ODE deployments
         else {
             activeNodes = new ArrayList<String>();
             activeNodes.add(_nodeId);
         }

         Collections.sort(activeNodes);

         // We're going to try to upgrade near future jobs using the db only.
         // We assume that the distribution of the trailing digits in the
         // scheduled time are uniformly distributed, and use modular division
         // of the time by the number of nodes to create the node assignment.
         // This can be done in a single update statement.
         final long maxtime = System.currentTimeMillis() + _nearFutureInterval;
         try {
             return execTransaction(new Callable<Boolean>() {

                 public Boolean call() throws Exception {
                     int numNodes = activeNodes.size();
                     for (int i = 0; i < numNodes; ++i) {
                         String node = activeNodes.get(i);
                         _db.updateAssignToNode(node, i, numNodes, maxtime);
                     }
                     return true;
                 }

             });

         } catch (Exception ex) {
             __log.error("Database error upgrading jobs.", ex);
             return false;
         } finally {
             __log.debug("UPGRADE complete");
         }

     }

     /**
      * Re-assign stale node's jobs to self.
      * @param nodeId
      */
     void recoverStaleNode(final String nodeId) {
         if (__log.isDebugEnabled()) {
             __log.debug("recovering stale node " + nodeId);
         }
         try {
             int numrows = execTransaction(new Callable<Integer>() {
                 public Integer call() throws Exception {
                     return _db.updateReassign(nodeId, _nodeId);
                 }
             });

             if (__log.isDebugEnabled()) {
                 __log.debug("reassigned " + numrows + " jobs to self. ");
             }

             if(_isClusterEnabled) _staleNodes.remove(nodeId);

             // If the stale node id is in _knownNodes, remove it.
             _knownNodes.remove(nodeId);

             // We can now forget about this node, if we see it again, it will be
             // "new to us"
             //_knownNodes.remove(nodeId);
             //_lastHeartBeat.remove(nodeId);

             // Force a load-immediate to catch anything new from the recovered node.
             doLoadImmediate();

         } catch (Exception ex) {
             __log.error("Database error reassigning node.", ex);
         } finally {
             __log.debug("node recovery complete");
         }
     }

 //    private long doRetry(Job job) throws DatabaseException {
 //        int retry = job.detail.getRetryCount() + 1;
 //        job.detail.setRetryCount(retry);
 //        long delay = (long)(Math.pow(5, retry - 1));
 //        Job jobRetry = new Job(System.currentTimeMillis() + delay*1000, true, job.detail);
 //        _db.insertJob(jobRetry, _nodeId, false);
 //        return delay;
 //    }

     private abstract class SchedulerTask extends Task implements Runnable {
         SchedulerTask(long schedDate) {
             super(schedDate);
         }
     }

     private class LoadImmediateTask extends SchedulerTask {
         LoadImmediateTask(long schedDate) {
             super(schedDate);
         }

         public void run() {
             boolean success = false;
             try {
                 success = doLoadImmediate();
             } finally {
                 if (success)
                     _todo.enqueue(new LoadImmediateTask(System.currentTimeMillis() + (long) (_immediateInterval * .90)));
                 else
                     _todo.enqueue(new LoadImmediateTask(System.currentTimeMillis() + 1000));
             }
         }

     }

     /**
      * Upgrade jobs from far future to immediate future (basically, assign them to a node).
      * @author mszefler
      *
      */
     private class UpgradeJobsTask extends SchedulerTask {
         UpgradeJobsTask(long schedDate) {
             super(schedDate);
         }

         public void run() {
             long ctime = System.currentTimeMillis();
             long ntime = _nextUpgrade.get();
             if (__log.isDebugEnabled()) {
                 __log.debug("UPGRADE task for " + schedDate + " fired at " + ctime);
             }

             // We could be too early, this can happen if upgrade gets delayed due to another
             // node
             if (_nextUpgrade.get() > System.currentTimeMillis()) {
                 if (__log.isDebugEnabled()) {
                     __log.debug("UPGRADE skipped -- wait another " + (ntime - ctime) + "ms");
                 }
                 _todo.enqueue(new UpgradeJobsTask(ntime));
                 return;
             }

             boolean success = false;
             try {
               success = doUpgrade();
             } finally {
                 long future = System.currentTimeMillis() + (success ? (long) (_nearFutureInterval * .50) : 1000);
                 _nextUpgrade.set(future);
                 _todo.enqueue(new UpgradeJobsTask(future));
                 if (__log.isDebugEnabled()) {
                     __log.debug("UPGRADE completed, success = " + success + "; next time in " + (future - ctime) + "ms");
                 }
             }
         }
     }

     /**
      * Check if any of the nodes in our cluster are stale.
      */
     private class CheckStaleNodes extends SchedulerTask {
         CheckStaleNodes(long schedDate) {
             super(schedDate);
         }

         public void run() {
             _todo.enqueue(new CheckStaleNodes(System.currentTimeMillis() + _staleInterval));
             __log.debug("CHECK STALE NODES started");

             ArrayList<String> knownNodes = new ArrayList<String>(_knownNodes);

             // for cluster mode
             if (_isClusterEnabled) {
                 if (_clusterManager.isMaster()) {
                     ArrayList<String> memberList = (ArrayList) _clusterManager.getActiveNodes();

                     //find stale nodes
                     knownNodes.removeAll(memberList);
                     if (knownNodes.size() != 0) {
                         for (String nodeId : knownNodes) {
                             _staleNodes.add(nodeId);
                         }
                     }
                     for (String nodeId : _staleNodes) {
                         recoverStaleNode(nodeId);
                     }
                 }
             }
             // for standalone ode node
             else {
                 for (String nodeId : knownNodes) {
                     if (!_nodeId.equals(nodeId)) recoverStaleNode(nodeId);
                 }
             }
             /*for (String nodeId : _knownNodes) {
                 Long lastSeen = _lastHeartBeat.get(nodeId);
                 if ((lastSeen == null || (System.currentTimeMillis() - lastSeen) > _staleInterval)
                         && !_nodeId.equals(nodeId))
                 {
                     recoverStaleNode(nodeId);
                 }
             }*/
         }
     }

     public void acquireTransactionLocks() {
         _db.acquireTransactionLocks();
     }
 }