blob: 564e5dab8c63eb923d87d0c68755ac058424b601 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nutch.hostdb;
import java.lang.invoke.MethodHandles;
import java.net.InetAddress;
import java.net.UnknownHostException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer.Context;
import org.apache.hadoop.util.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Simple runnable that performs DNS lookup for a single host.
*/
public class ResolverThread implements Runnable {
private static final Logger LOG = LoggerFactory
.getLogger(MethodHandles.lookup().lookupClass());
protected String host = null;
protected HostDatum datum = null;
protected Text hostText = new Text();
protected Context context;
protected int purgeFailedHostsThreshold;
/**
* Constructor.
*/
public ResolverThread(String host, HostDatum datum,
Context context, int purgeFailedHostsThreshold) {
hostText.set(host);
this.host = host;
this.datum = datum;
this.context = context;
this.purgeFailedHostsThreshold = purgeFailedHostsThreshold;
}
/**
*
*/
public void run() {
// Resolve the host and act appropriatly
try {
// Throws an exception if host is not found
@SuppressWarnings("unused")
InetAddress inetAddr = InetAddress.getByName(host);
if (datum.isEmpty()) {
context.getCounter("UpdateHostDb", "new_known_host").increment(1);
datum.setLastCheck();
LOG.info(host + ": new_known_host " + datum);
} else if (datum.getDnsFailures() > 0) {
context.getCounter("UpdateHostDb", "rediscovered_host").increment(1);
datum.setLastCheck();
datum.setDnsFailures(0l);
LOG.info(host + ": rediscovered_host " + datum);
} else {
context.getCounter("UpdateHostDb", "existing_known_host").increment(1);
datum.setLastCheck();
LOG.info(host + ": existing_known_host " + datum);
}
// Write the host datum
context.write(hostText, datum);
} catch (UnknownHostException e) {
try {
// If the counter is empty we'll initialize with date = today and 1 failure
if (datum.isEmpty()) {
datum.setLastCheck();
datum.setDnsFailures(1l);
context.write(hostText, datum);
context.getCounter("UpdateHostDb", "new_unknown_host").increment(1);
LOG.info(host + ": new_unknown_host " + datum);
} else {
datum.setLastCheck();
datum.incDnsFailures();
// Check if this host should be forgotten
if (purgeFailedHostsThreshold == -1 ||
purgeFailedHostsThreshold < datum.getDnsFailures()) {
context.write(hostText, datum);
context.getCounter("UpdateHostDb", "existing_unknown_host").increment(1);
LOG.info(host + ": existing_unknown_host " + datum);
} else {
context.getCounter("UpdateHostDb", "purged_unknown_host").increment(1);
LOG.info(host + ": purged_unknown_host " + datum);
}
}
context.getCounter("UpdateHostDb",
Long.toString(datum.numFailures()) + "_times_failed").increment(1);
} catch (Exception ioe) {
LOG.warn(StringUtils.stringifyException(ioe));
}
} catch (Exception e) {
LOG.warn(StringUtils.stringifyException(e));
}
context.getCounter("UpdateHostDb", "checked_hosts").increment(1);
}
}