blob: ac5300327f59cf32dac15f6beaaf7c0f678e1080 [file] [log] [blame]
package org.apache.helix.integration.task;
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
import java.util.Map;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Sets;
import org.apache.helix.TestHelper;
import org.apache.helix.integration.manager.ClusterControllerManager;
import org.apache.helix.integration.manager.MockParticipantManager;
import org.apache.helix.model.ExternalView;
import org.apache.helix.model.MasterSlaveSMD;
import org.apache.helix.task.JobConfig;
import org.apache.helix.task.JobContext;
import org.apache.helix.task.TaskPartitionState;
import org.apache.helix.task.TaskState;
import org.apache.helix.task.TaskSynchronizedTestBase;
import org.apache.helix.task.TaskUtil;
import org.apache.helix.task.Workflow;
import org.apache.helix.task.WorkflowConfig;
import org.apache.helix.tools.ClusterVerifiers.BestPossibleExternalViewVerifier;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
public final class TestJobTimeout extends TaskSynchronizedTestBase {
@BeforeClass
public void beforeClass() throws Exception {
_numNodes = 2;
_numPartitions = 2;
_numReplicas = 1; // only Master, no Slave
_numDbs = 1;
_participants = new MockParticipantManager[_numNodes];
_gSetupTool.addCluster(CLUSTER_NAME, true);
setupParticipants();
setupDBs();
startParticipants();
createManagers();
_controller = new ClusterControllerManager(ZK_ADDR, CLUSTER_NAME, CONTROLLER_PREFIX);
_controller.syncStart();
}
@Test
public void testTaskRunningIndefinitely() throws InterruptedException {
// first job runs indefinitely and timeout, the second job runs successfully, the workflow
// succeed.
final String FIRST_JOB = "first_job";
final String SECOND_JOB = "second_job";
final String WORKFLOW_NAME = TestHelper.getTestMethodName();
final String DB_NAME = WorkflowGenerator.DEFAULT_TGT_DB;
JobConfig.Builder firstJobBuilder =
new JobConfig.Builder().setWorkflow(WORKFLOW_NAME).setTargetResource(DB_NAME)
.setTargetPartitionStates(Sets.newHashSet(MasterSlaveSMD.States.MASTER.name()))
.setCommand(MockTask.TASK_COMMAND)
.setJobCommandConfigMap(ImmutableMap.of(MockTask.JOB_DELAY, "99999999")) // task stuck
.setTimeout(2000L);
JobConfig.Builder secondJobBuilder =
new JobConfig.Builder().setWorkflow(WORKFLOW_NAME).setTargetResource(DB_NAME)
.setTargetPartitionStates(Sets.newHashSet(MasterSlaveSMD.States.MASTER.name()))
.setCommand(MockTask.TASK_COMMAND).setIgnoreDependentJobFailure(true); // ignore first
// job's timeout
WorkflowConfig.Builder workflowConfigBuilder =
new WorkflowConfig.Builder(WORKFLOW_NAME).setFailureThreshold(1); // workflow ignores first
// job's timeout and
// schedule second job and
// succeed.
Workflow.Builder workflowBuilder = new Workflow.Builder(WORKFLOW_NAME)
.setWorkflowConfig(workflowConfigBuilder.build()).addJob(FIRST_JOB, firstJobBuilder)
.addJob(SECOND_JOB, secondJobBuilder).addParentChildDependency(FIRST_JOB, SECOND_JOB);
_driver.start(workflowBuilder.build());
_driver.pollForJobState(WORKFLOW_NAME, TaskUtil.getNamespacedJobName(WORKFLOW_NAME, FIRST_JOB),
TaskState.TIMED_OUT);
_driver.pollForJobState(WORKFLOW_NAME, TaskUtil.getNamespacedJobName(WORKFLOW_NAME, SECOND_JOB),
TaskState.COMPLETED);
_driver.pollForWorkflowState(WORKFLOW_NAME, TaskState.COMPLETED);
JobContext jobContext =
_driver.getJobContext(TaskUtil.getNamespacedJobName(WORKFLOW_NAME, FIRST_JOB));
for (int pId : jobContext.getPartitionSet()) {
// All tasks aborted because of job timeout
Assert.assertEquals(jobContext.getPartitionState(pId), TaskPartitionState.TASK_ABORTED);
}
}
@Test
public void testNoSlaveToRunTask() throws InterruptedException {
// first job can't be assigned to any instance and stuck, timeout, second job runs and workflow
// succeed.
final String FIRST_JOB = "first_job";
final String SECOND_JOB = "second_job";
final String WORKFLOW_NAME = TestHelper.getTestMethodName();
final String DB_NAME = WorkflowGenerator.DEFAULT_TGT_DB;
JobConfig.Builder firstJobBuilder =
new JobConfig.Builder().setWorkflow(WORKFLOW_NAME).setTargetResource(DB_NAME)
// since replica=1, there's no Slave, but target only Slave, the task can't be assigned,
// job stuck and timeout
.setTargetPartitionStates(Sets.newHashSet(MasterSlaveSMD.States.SLAVE.name()))
.setCommand(MockTask.TASK_COMMAND).setTimeout(1000);
JobConfig.Builder secondJobBuilder =
new JobConfig.Builder().setWorkflow(WORKFLOW_NAME).setTargetResource(DB_NAME)
.setTargetPartitionStates(Sets.newHashSet(MasterSlaveSMD.States.MASTER.name()))
.setCommand(MockTask.TASK_COMMAND).setIgnoreDependentJobFailure(true); // ignore first
// job's timeout
WorkflowConfig.Builder workflowConfigBuilder =
new WorkflowConfig.Builder(WORKFLOW_NAME).setFailureThreshold(1); // workflow ignores first
// job's timeout and
// schedule second job and
// succeed.
Workflow.Builder workflowBuilder = new Workflow.Builder(WORKFLOW_NAME)
.setWorkflowConfig(workflowConfigBuilder.build()).addJob(FIRST_JOB, firstJobBuilder)
.addJob(SECOND_JOB, secondJobBuilder).addParentChildDependency(FIRST_JOB, SECOND_JOB);
// Check that there are no SLAVE replicas
BestPossibleExternalViewVerifier verifier =
new BestPossibleExternalViewVerifier.Builder(CLUSTER_NAME).setZkClient(_gZkClient)
.setWaitTillVerify(TestHelper.DEFAULT_REBALANCE_PROCESSING_WAIT_TIME)
.build();
Assert.assertTrue(verifier.verifyByPolling());
ExternalView view =
_gSetupTool.getClusterManagementTool().getResourceExternalView(CLUSTER_NAME, DB_NAME);
view.getPartitionSet().forEach(partition -> {
for (Map.Entry<String, String> entry : view.getStateMap(partition).entrySet()) {
if (!entry.getValue().equals("MASTER")) {
try {
Thread.sleep(2000L);
} catch (InterruptedException e) {
// OK
}
}
}
});
_driver.start(workflowBuilder.build());
_driver.pollForJobState(WORKFLOW_NAME, TaskUtil.getNamespacedJobName(WORKFLOW_NAME, FIRST_JOB),
TaskState.TIMED_OUT);
_driver.pollForJobState(WORKFLOW_NAME, TaskUtil.getNamespacedJobName(WORKFLOW_NAME, SECOND_JOB),
TaskState.COMPLETED);
_driver.pollForWorkflowState(WORKFLOW_NAME, TaskState.COMPLETED);
JobContext jobContext =
_driver.getJobContext(TaskUtil.getNamespacedJobName(WORKFLOW_NAME, FIRST_JOB));
for (int pId : jobContext.getPartitionSet()) {
// No task assigned for first job
Assert.assertNull(jobContext.getPartitionState(pId));
}
}
}