| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.nutch.parse; |
| |
| import java.io.DataInput; |
| import java.io.DataOutput; |
| import java.io.IOException; |
| |
| import org.apache.hadoop.io.VersionMismatchException; |
| import org.apache.hadoop.io.Writable; |
| import org.apache.hadoop.io.WritableUtils; |
| import org.apache.hadoop.conf.Configuration; |
| |
| import org.apache.nutch.metadata.Metadata; |
| |
| /** |
| * @author Andrzej Bialecki <ab@getopt.org> |
| */ |
| public class ParseStatus implements Writable { |
| |
| private final static byte VERSION = 2; |
| |
| // Primary status codes: |
| |
| /** Parsing was not performed. */ |
| public static final byte NOTPARSED = 0; |
| /** Parsing succeeded. */ |
| public static final byte SUCCESS = 1; |
| /** General failure. There may be a more specific error message in arguments. */ |
| public static final byte FAILED = 2; |
| |
| public static final String[] majorCodes = { "notparsed", "success", "failed" }; |
| |
| // Secondary success codes go here: |
| |
| /** |
| * Parsed content contains a directive to redirect to another URL. The target |
| * URL can be retrieved from the arguments. |
| */ |
| public static final short SUCCESS_REDIRECT = 100; |
| |
| // Secondary failure codes go here: |
| |
| /** |
| * Parsing failed. An Exception occured (which may be retrieved from the |
| * arguments). |
| */ |
| public static final short FAILED_EXCEPTION = 200; |
| /** |
| * Parsing failed. Content was truncated, but the parser cannot handle |
| * incomplete content. |
| */ |
| public static final short FAILED_TRUNCATED = 202; |
| /** |
| * Parsing failed. Invalid format - the content may be corrupted or of wrong |
| * type. |
| */ |
| public static final short FAILED_INVALID_FORMAT = 203; |
| /** |
| * Parsing failed. Other related parts of the content are needed to complete |
| * parsing. The list of URLs to missing parts may be provided in arguments. |
| * The Fetcher may decide to fetch these parts at once, then put them into |
| * Content.metadata, and supply them for re-parsing. |
| */ |
| public static final short FAILED_MISSING_PARTS = 204; |
| /** |
| * Parsing failed. There was no content to be parsed - probably caused by |
| * errors at protocol stage. |
| */ |
| public static final short FAILED_MISSING_CONTENT = 205; |
| |
| public static final ParseStatus STATUS_NOTPARSED = new ParseStatus(NOTPARSED); |
| public static final ParseStatus STATUS_SUCCESS = new ParseStatus(SUCCESS); |
| public static final ParseStatus STATUS_FAILURE = new ParseStatus(FAILED); |
| |
| private byte majorCode = 0; |
| private short minorCode = 0; |
| private String[] args = null; |
| |
| public byte getVersion() { |
| return VERSION; |
| } |
| |
| public ParseStatus() { |
| |
| } |
| |
| public ParseStatus(int majorCode, int minorCode, String[] args) { |
| this.args = args; |
| this.majorCode = (byte) majorCode; |
| this.minorCode = (short) minorCode; |
| } |
| |
| public ParseStatus(int majorCode) { |
| this(majorCode, 0, (String[]) null); |
| } |
| |
| public ParseStatus(int majorCode, String[] args) { |
| this(majorCode, 0, args); |
| } |
| |
| public ParseStatus(int majorCode, int minorCode) { |
| this(majorCode, minorCode, (String[]) null); |
| } |
| |
| /** Simplified constructor for passing just a text message. */ |
| public ParseStatus(int majorCode, int minorCode, String message) { |
| this(majorCode, minorCode, new String[] { message }); |
| } |
| |
| /** Simplified constructor for passing just a text message. */ |
| public ParseStatus(int majorCode, String message) { |
| this(majorCode, 0, new String[] { message }); |
| } |
| |
| public ParseStatus(Throwable t) { |
| this(FAILED, FAILED_EXCEPTION, new String[] { t.toString() }); |
| } |
| |
| public static ParseStatus read(DataInput in) throws IOException { |
| ParseStatus res = new ParseStatus(); |
| res.readFields(in); |
| return res; |
| } |
| |
| public void readFields(DataInput in) throws IOException { |
| byte version = in.readByte(); |
| switch (version) { |
| case 1: |
| majorCode = in.readByte(); |
| minorCode = in.readShort(); |
| args = WritableUtils.readCompressedStringArray(in); |
| break; |
| case 2: |
| majorCode = in.readByte(); |
| minorCode = in.readShort(); |
| args = WritableUtils.readStringArray(in); |
| break; |
| default: |
| throw new VersionMismatchException(VERSION, version); |
| } |
| } |
| |
| public void write(DataOutput out) throws IOException { |
| out.writeByte(VERSION); |
| out.writeByte(majorCode); |
| out.writeShort(minorCode); |
| if (args == null) { |
| out.writeInt(-1); |
| } else { |
| WritableUtils.writeStringArray(out, args); |
| } |
| } |
| |
| /** |
| * A convenience method. Returns true if majorCode is SUCCESS, false |
| * otherwise. |
| */ |
| |
| public boolean isSuccess() { |
| return majorCode == SUCCESS; |
| } |
| |
| /** |
| * A convenience method. Return a String representation of the first argument, |
| * or null. |
| */ |
| public String getMessage() { |
| if (args != null && args.length > 0 && args[0] != null) |
| return args[0]; |
| return null; |
| } |
| |
| public String[] getArgs() { |
| return args; |
| } |
| |
| public int getMajorCode() { |
| return majorCode; |
| } |
| |
| public int getMinorCode() { |
| return minorCode; |
| } |
| |
| /** |
| * A convenience method. Creates an empty Parse instance, which returns this |
| * status. |
| */ |
| public Parse getEmptyParse(Configuration conf) { |
| return new EmptyParseImpl(this, conf); |
| } |
| |
| /** |
| * A convenience method. Creates an empty ParseResult, which contains this |
| * status. |
| */ |
| public ParseResult getEmptyParseResult(String url, Configuration conf) { |
| return ParseResult.createParseResult(url, getEmptyParse(conf)); |
| } |
| |
| public String toString() { |
| StringBuffer res = new StringBuffer(); |
| String name = null; |
| if (majorCode >= 0 && majorCode < majorCodes.length) |
| name = majorCodes[majorCode]; |
| else |
| name = "UNKNOWN!"; |
| res.append(name + "(" + majorCode + "," + minorCode + ")"); |
| if (args != null) { |
| if (args.length == 1) { |
| res.append(": " + String.valueOf(args[0])); |
| } else { |
| for (int i = 0; i < args.length; i++) { |
| if (args[i] != null) |
| res.append(", args[" + i + "]=" + String.valueOf(args[i])); |
| } |
| } |
| } |
| return res.toString(); |
| } |
| |
| public void setArgs(String[] args) { |
| this.args = args; |
| } |
| |
| public void setMessage(String msg) { |
| if (args == null || args.length == 0) { |
| args = new String[1]; |
| } |
| args[0] = msg; |
| } |
| |
| public void setMajorCode(byte majorCode) { |
| this.majorCode = majorCode; |
| } |
| |
| public void setMinorCode(short minorCode) { |
| this.minorCode = minorCode; |
| } |
| |
| public boolean equals(Object o) { |
| if (o == null) |
| return false; |
| if (!(o instanceof ParseStatus)) |
| return false; |
| boolean res = true; |
| ParseStatus other = (ParseStatus) o; |
| res = res && (this.majorCode == other.majorCode) |
| && (this.minorCode == other.minorCode); |
| if (!res) |
| return res; |
| if (this.args == null) { |
| if (other.args == null) |
| return true; |
| else |
| return false; |
| } else { |
| if (other.args == null) |
| return false; |
| if (other.args.length != this.args.length) |
| return false; |
| for (int i = 0; i < this.args.length; i++) { |
| if (!this.args[i].equals(other.args[i])) |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| private static class EmptyParseImpl implements Parse { |
| |
| private ParseData data = null; |
| |
| public EmptyParseImpl(ParseStatus status, Configuration conf) { |
| data = new ParseData(status, "", new Outlink[0], new Metadata(), |
| new Metadata()); |
| } |
| |
| public ParseData getData() { |
| return data; |
| } |
| |
| public String getText() { |
| return ""; |
| } |
| |
| public boolean isCanonical() { |
| return true; |
| } |
| } |
| } |