| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.nutch.hostdb; |
| |
| import java.lang.invoke.MethodHandles; |
| import java.net.InetAddress; |
| import java.net.UnknownHostException; |
| |
| import org.apache.hadoop.io.Text; |
| import org.apache.hadoop.mapreduce.Reducer.Context; |
| import org.apache.hadoop.util.StringUtils; |
| |
| import org.slf4j.Logger; |
| import org.slf4j.LoggerFactory; |
| |
| /** |
| * Simple runnable that performs DNS lookup for a single host. |
| */ |
| public class ResolverThread implements Runnable { |
| |
| private static final Logger LOG = LoggerFactory |
| .getLogger(MethodHandles.lookup().lookupClass()); |
| |
| protected String host = null; |
| protected HostDatum datum = null; |
| protected Text hostText = new Text(); |
| protected Context context; |
| protected int purgeFailedHostsThreshold; |
| |
| /** |
| * Constructor. |
| */ |
| public ResolverThread(String host, HostDatum datum, |
| Context context, int purgeFailedHostsThreshold) { |
| |
| hostText.set(host); |
| this.host = host; |
| this.datum = datum; |
| this.context = context; |
| this.purgeFailedHostsThreshold = purgeFailedHostsThreshold; |
| } |
| |
| /** |
| * |
| */ |
| public void run() { |
| // Resolve the host and act appropriatly |
| try { |
| // Throws an exception if host is not found |
| @SuppressWarnings("unused") |
| InetAddress inetAddr = InetAddress.getByName(host); |
| |
| if (datum.isEmpty()) { |
| context.getCounter("UpdateHostDb", "new_known_host").increment(1); |
| datum.setLastCheck(); |
| LOG.info(host + ": new_known_host " + datum); |
| } else if (datum.getDnsFailures() > 0) { |
| context.getCounter("UpdateHostDb", "rediscovered_host").increment(1); |
| datum.setLastCheck(); |
| datum.setDnsFailures(0l); |
| LOG.info(host + ": rediscovered_host " + datum); |
| } else { |
| context.getCounter("UpdateHostDb", "existing_known_host").increment(1); |
| datum.setLastCheck(); |
| LOG.info(host + ": existing_known_host " + datum); |
| } |
| |
| // Write the host datum |
| context.write(hostText, datum); |
| } catch (UnknownHostException e) { |
| try { |
| // If the counter is empty we'll initialize with date = today and 1 failure |
| if (datum.isEmpty()) { |
| datum.setLastCheck(); |
| datum.setDnsFailures(1l); |
| context.write(hostText, datum); |
| context.getCounter("UpdateHostDb", "new_unknown_host").increment(1); |
| LOG.info(host + ": new_unknown_host " + datum); |
| } else { |
| datum.setLastCheck(); |
| datum.incDnsFailures(); |
| |
| // Check if this host should be forgotten |
| if (purgeFailedHostsThreshold == -1 || |
| purgeFailedHostsThreshold < datum.getDnsFailures()) { |
| |
| context.write(hostText, datum); |
| context.getCounter("UpdateHostDb", "existing_unknown_host").increment(1); |
| LOG.info(host + ": existing_unknown_host " + datum); |
| } else { |
| context.getCounter("UpdateHostDb", "purged_unknown_host").increment(1); |
| LOG.info(host + ": purged_unknown_host " + datum); |
| } |
| } |
| |
| context.getCounter("UpdateHostDb", |
| Long.toString(datum.numFailures()) + "_times_failed").increment(1); |
| } catch (Exception ioe) { |
| LOG.warn(StringUtils.stringifyException(ioe)); |
| } |
| } catch (Exception e) { |
| LOG.warn(StringUtils.stringifyException(e)); |
| } |
| |
| context.getCounter("UpdateHostDb", "checked_hosts").increment(1); |
| } |
| } |