| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.beam.sdk.io.tika; |
| |
| import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkArgument; |
| import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkState; |
| |
| import java.io.Serializable; |
| import java.util.Arrays; |
| import java.util.Objects; |
| import javax.annotation.Nullable; |
| import org.apache.beam.sdk.util.SerializableThrowable; |
| import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.MoreObjects; |
| import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Throwables; |
| import org.apache.tika.metadata.Metadata; |
| |
| /** |
| * The result of parsing a single file with Tika: contains the file's location, metadata, extracted |
| * text, and optionally an error. If there is an error, the metadata and extracted text may be |
| * partial (i.e. not represent the entire file). |
| */ |
| public class ParseResult implements Serializable { |
| private final String fileLocation; |
| private final String content; |
| private final Metadata metadata; |
| private final String[] metadataNames; |
| @Nullable private final SerializableThrowable error; |
| |
| public static ParseResult success(String fileLocation, String content, Metadata metadata) { |
| return new ParseResult(fileLocation, content, metadata, null); |
| } |
| |
| public static ParseResult success(String fileLocation, String content) { |
| return new ParseResult(fileLocation, content, new Metadata(), null); |
| } |
| |
| public static ParseResult failure( |
| String fileLocation, String partialContent, Metadata partialMetadata, Throwable error) { |
| return new ParseResult(fileLocation, partialContent, partialMetadata, error); |
| } |
| |
| private ParseResult(String fileLocation, String content, Metadata metadata, Throwable error) { |
| checkArgument(fileLocation != null, "fileLocation can not be null"); |
| checkArgument(content != null, "content can not be null"); |
| checkArgument(metadata != null, "metadata can not be null"); |
| this.fileLocation = fileLocation; |
| this.content = content; |
| this.metadata = metadata; |
| this.metadataNames = metadata.names(); |
| this.error = (error == null) ? null : new SerializableThrowable(error); |
| } |
| |
| /** Returns the absolute path to the input file. */ |
| public String getFileLocation() { |
| return fileLocation; |
| } |
| |
| /** Returns whether this file was parsed successfully. */ |
| public boolean isSuccess() { |
| return error == null; |
| } |
| |
| /** Returns the parse error, if the file was parsed unsuccessfully. */ |
| public Throwable getError() { |
| checkState(error != null, "This is a successful ParseResult"); |
| return error.getThrowable(); |
| } |
| |
| /** |
| * Same as {@link #getError}, but returns the complete stack trace of the error as a {@link |
| * String}. |
| */ |
| public String getErrorAsString() { |
| return Throwables.getStackTraceAsString(getError()); |
| } |
| |
| /** Returns the extracted text. May be partial, if this parse result contains a failure. */ |
| public String getContent() { |
| return content; |
| } |
| |
| /** Returns the extracted metadata. May be partial, if this parse result contains a failure. */ |
| public Metadata getMetadata() { |
| return metadata; |
| } |
| |
| @Override |
| public int hashCode() { |
| return Objects.hash( |
| getFileLocation(), |
| getContent(), |
| getMetadataHashCode(), |
| isSuccess() ? "" : Throwables.getStackTraceAsString(getError())); |
| } |
| |
| @Override |
| public boolean equals(Object obj) { |
| if (!(obj instanceof ParseResult)) { |
| return false; |
| } |
| |
| ParseResult other = (ParseResult) obj; |
| return Objects.equals(getFileLocation(), other.getFileLocation()) |
| && Objects.equals(getContent(), other.getContent()) |
| && Objects.equals(getMetadata(), other.getMetadata()) |
| && (isSuccess() |
| ? other.isSuccess() |
| : (!other.isSuccess() && Objects.equals(getErrorAsString(), other.getErrorAsString()))); |
| } |
| |
| // TODO: Remove this function and use metadata.hashCode() once Apache Tika 1.17 gets released. |
| private int getMetadataHashCode() { |
| int hashCode = 0; |
| for (String name : metadataNames) { |
| hashCode += name.hashCode() ^ Arrays.hashCode(metadata.getValues(name)); |
| } |
| return hashCode; |
| } |
| |
| @Override |
| public String toString() { |
| return MoreObjects.toStringHelper(this) |
| .add("fileLocation", fileLocation) |
| .add("content", "<" + content.length() + " chars>") |
| .add("metadata", metadata) |
| .add("error", getError() == null ? null : Throwables.getStackTraceAsString(getError())) |
| .toString(); |
| } |
| } |