blob: 2bc9244d80dfd7c529ca4219f230d07713e84ab8 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nutch.hostdb;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.Date;
import java.util.Map.Entry;
import java.text.SimpleDateFormat;
import org.apache.hadoop.io.MapWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
/**
*/
public class HostDatum implements Writable, Cloneable {
protected long failures = 0;
protected float score = 0;
protected Date lastCheck = new Date(0);
protected String homepageUrl = new String();
protected MapWritable metaData = new MapWritable();
// Records the number of times DNS look-up failed, may indicate host no longer exists
protected long dnsFailures = 0;
// Records the number of connection failures, may indicate our netwerk being blocked by firewall
protected long connectionFailures = 0;
protected long unfetched = 0;
protected long fetched = 0;
protected long notModified = 0;
protected long redirTemp = 0;
protected long redirPerm = 0;
protected long gone = 0;
public HostDatum() {
}
public HostDatum(float score) {
this(score, new Date());
}
public HostDatum(float score, Date lastCheck) {
this(score, lastCheck, new String());
}
public HostDatum(float score, Date lastCheck, String homepageUrl) {
this.score = score;
this.lastCheck = lastCheck;
this.homepageUrl = homepageUrl;
}
public void resetFailures() {
setDnsFailures(0l);
setConnectionFailures(0l);
}
public void setDnsFailures(Long dnsFailures) {
this.dnsFailures = dnsFailures;
}
public void setConnectionFailures(Long connectionFailures) {
this.connectionFailures = connectionFailures;
}
public void incDnsFailures() {
this.dnsFailures++;
}
public void incConnectionFailures() {
this.connectionFailures++;
}
public Long numFailures() {
return getDnsFailures() + getConnectionFailures();
}
public Long getDnsFailures() {
return dnsFailures;
}
public Long getConnectionFailures() {
return connectionFailures;
}
public void setScore(float score) {
this.score = score;
}
public void setLastCheck() {
setLastCheck(new Date());
}
public void setLastCheck(Date date) {
lastCheck = date;
}
public boolean isEmpty() {
return (lastCheck.getTime() == 0) ? true : false;
}
public float getScore() {
return score;
}
public Long numRecords() {
return unfetched + fetched + gone + redirPerm + redirTemp + notModified;
}
public Date getLastCheck() {
return lastCheck;
}
public boolean hasHomepageUrl() {
return homepageUrl.length() > 0;
}
public String getHomepageUrl() {
return homepageUrl;
}
public void setHomepageUrl(String homepageUrl) {
this.homepageUrl = homepageUrl;
}
public void setUnfetched(long val) {
unfetched = val;
}
public long getUnfetched() {
return unfetched;
}
public void setFetched(long val) {
fetched = val;
}
public long getFetched() {
return fetched;
}
public void setNotModified(long val) {
notModified = val;
}
public long getNotModified() {
return notModified;
}
public void setRedirTemp(long val) {
redirTemp = val;
}
public long getRedirTemp() {
return redirTemp;
}
public void setRedirPerm(long val) {
redirPerm = val;
}
public long getRedirPerm() {
return redirPerm;
}
public void setGone(long val) {
gone = val;
}
public long getGone() {
return gone;
}
public void resetStatistics() {
setUnfetched(0);
setFetched(0);
setGone(0);
setRedirTemp(0);
setRedirPerm(0);
setNotModified(0);
}
public void setMetaData(org.apache.hadoop.io.MapWritable mapWritable) {
this.metaData = new org.apache.hadoop.io.MapWritable(mapWritable);
}
/**
* Add all metadata from other CrawlDatum to this CrawlDatum.
*
* @param other HostDatum
*/
public void putAllMetaData(HostDatum other) {
for (Entry<Writable, Writable> e : other.getMetaData().entrySet()) {
getMetaData().put(e.getKey(), e.getValue());
}
}
/**
* returns a MapWritable if it was set or read in @see readFields(DataInput),
* returns empty map in case CrawlDatum was freshly created (lazily instantiated).
*/
public org.apache.hadoop.io.MapWritable getMetaData() {
if (this.metaData == null) this.metaData = new org.apache.hadoop.io.MapWritable();
return this.metaData;
}
@Override
public Object clone() throws CloneNotSupportedException {
HostDatum result = (HostDatum)super.clone();
result.score = score;
result.lastCheck = lastCheck;
result.homepageUrl = homepageUrl;
result.dnsFailures = dnsFailures;
result.connectionFailures = connectionFailures;
result.unfetched = unfetched;
result.fetched = fetched;
result.notModified = notModified;
result.redirTemp = redirTemp;
result.redirPerm = redirPerm;
result.gone = gone;
result.metaData = metaData;
return result;
}
@Override
public void readFields(DataInput in) throws IOException {
score = in.readFloat();
lastCheck = new Date(in.readLong());
homepageUrl = Text.readString(in);
dnsFailures = in.readLong();
connectionFailures = in.readLong();
unfetched= in.readLong();
fetched= in.readLong();
notModified= in.readLong();
redirTemp= in.readLong();
redirPerm = in.readLong();
gone = in.readLong();
metaData = new org.apache.hadoop.io.MapWritable();
metaData.readFields(in);
}
@Override
public void write(DataOutput out) throws IOException {
out.writeFloat(score);
out.writeLong(lastCheck.getTime());
Text.writeString(out, homepageUrl);
out.writeLong(dnsFailures);
out.writeLong(connectionFailures);
out.writeLong(unfetched);
out.writeLong(fetched);
out.writeLong(notModified);
out.writeLong(redirTemp);
out.writeLong(redirPerm);
out.writeLong(gone);
metaData.write(out);
}
@Override
public String toString() {
StringBuilder buf = new StringBuilder();
buf.append(Long.toString(getUnfetched()));
buf.append("\t");
buf.append(Long.toString(getFetched()));
buf.append("\t");
buf.append(Long.toString(getGone()));
buf.append("\t");
buf.append(Long.toString(getRedirTemp()));
buf.append("\t");
buf.append(Long.toString(getRedirPerm()));
buf.append("\t");
buf.append(Long.toString(getNotModified()));
buf.append("\t");
buf.append(Long.toString(numRecords()));
buf.append("\t");
buf.append(Long.toString(getDnsFailures()));
buf.append("\t");
buf.append(Long.toString(getConnectionFailures()));
buf.append("\t");
buf.append(Long.toString(numFailures()));
buf.append("\t");
buf.append(Float.toString(score));
buf.append("\t");
buf.append(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(lastCheck));
buf.append("\t");
buf.append(homepageUrl);
buf.append("\t");
for (Entry<Writable, Writable> e : getMetaData().entrySet()) {
buf.append(e.getKey().toString());
buf.append(':');
buf.append(e.getValue().toString());
buf.append("|||");
}
return buf.toString();
}
}