| /* ==================================================================== |
| Licensed to the Apache Software Foundation (ASF) under one or more |
| contributor license agreements. See the NOTICE file distributed with |
| this work for additional information regarding copyright ownership. |
| The ASF licenses this file to You under the Apache License, Version 2.0 |
| (the "License"); you may not use this file except in compliance with |
| the License. You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| ==================================================================== */ |
| package org.apache.poi.extractor; |
| |
| import org.apache.poi.POIDocument; |
| import org.apache.poi.hpsf.DocumentSummaryInformation; |
| import org.apache.poi.hpsf.SummaryInformation; |
| import org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor; |
| import org.apache.poi.poifs.filesystem.DirectoryEntry; |
| |
| /** |
| * Common Parent for OLE2 based Text Extractors |
| * of POI Documents, such as .doc, .xls |
| * You will typically find the implementation of |
| * a given format's text extractor under |
| * org.apache.poi.[format].extractor . |
| * |
| * @see org.apache.poi.hssf.extractor.ExcelExtractor |
| * @see org.apache.poi.hdgf.extractor.VisioTextExtractor |
| * @see org.apache.poi.hwpf.extractor.WordExtractor |
| */ |
| public interface POIOLE2TextExtractor extends POITextExtractor { |
| /** |
| * Returns the document information metadata for the document |
| * |
| * @return The Document Summary Information or null |
| * if it could not be read for this document. |
| */ |
| default DocumentSummaryInformation getDocSummaryInformation() { |
| return getDocument().getDocumentSummaryInformation(); |
| } |
| |
| /** |
| * Returns the summary information metadata for the document. |
| * |
| * @return The Summary information for the document or null |
| * if it could not be read for this document. |
| */ |
| default SummaryInformation getSummaryInformation() { |
| return getDocument().getSummaryInformation(); |
| } |
| |
| /** |
| * Returns an HPSF powered text extractor for the |
| * document properties metadata, such as title and author. |
| * |
| * @return an instance of POIExtractor that can extract meta-data. |
| */ |
| @Override |
| default POITextExtractor getMetadataTextExtractor() { |
| return new HPSFPropertiesExtractor(this); |
| } |
| |
| /** |
| * Return the underlying DirectoryEntry of this document. |
| * |
| * @return the DirectoryEntry that is associated with the POIDocument of this extractor. |
| */ |
| default DirectoryEntry getRoot() { |
| return getDocument().getDirectory(); |
| } |
| |
| /** |
| * Return the underlying POIDocument |
| * |
| * @return the underlying POIDocument |
| */ |
| @Override |
| POIDocument getDocument(); |
| } |