blob: f7b0b7608e87a356277288d9ced51b6468057c06 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nutch.util;
import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.mapreduce.Job;
import org.apache.nutch.metadata.Nutch;
public abstract class NutchTool extends Configured {
protected HashMap<String, Object> results = new HashMap<>();
protected Map<String, Object> status = Collections
.synchronizedMap(new HashMap<String, Object>());
protected Job currentJob;
protected int numJobs;
protected int currentJobNum;
/**
* Runs the tool, using a map of arguments. May return results, or null.
*/
public abstract Map<String, Object> run(Map<String, Object> args, String crawlId)
throws Exception;
public NutchTool(Configuration conf){
super(conf);
}
public NutchTool(){
super(null);
}
/** Returns relative progress of the tool, a float in range [0,1]. */
public float getProgress() {
float res = 0;
if (currentJob != null) {
try {
res = (currentJob.mapProgress() + currentJob.reduceProgress()) / 2.0f;
} catch (IOException e) {
e.printStackTrace();
res = 0;
} catch (IllegalStateException ile) {
ile.printStackTrace();
res = 0;
}
}
// take into account multiple jobs
if (numJobs > 1) {
res = (currentJobNum + res) / (float) numJobs;
}
status.put(Nutch.STAT_PROGRESS, res);
return res;
}
/** Returns current status of the running tool. */
public Map<String, Object> getStatus() {
return status;
}
/**
* Stop the job with the possibility to resume. Subclasses should override
* this, since by default it calls {@link #killJob()}.
*
* @return true if succeeded, false otherwise
*/
public boolean stopJob() throws Exception {
return killJob();
}
/**
* Kill the job immediately. Clients should assume that any results that the
* job produced so far are in inconsistent state or missing.
*
* @return true if succeeded, false otherwise.
* @throws Exception
*/
public boolean killJob() throws Exception {
if (currentJob != null && !currentJob.isComplete()) {
try {
currentJob.killJob();
return true;
} catch (Exception e) {
e.printStackTrace();
return false;
}
}
return false;
}
}