blob: 3348eb720fe0724f6f5c59a352623416cdeb3d73 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.pipes.core.extractor;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import org.apache.tika.extractor.EmbeddedDocumentBytesHandler;
import org.apache.tika.io.FilenameUtils;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.pipes.core.extractor.EmbeddedDocumentBytesConfig;
import org.apache.tika.utils.StringUtils;
public abstract class AbstractEmbeddedDocumentBytesHandler implements EmbeddedDocumentBytesHandler {
List<Integer> ids = new ArrayList<>();
public String getEmitKey(String containerEmitKey, int embeddedId,
EmbeddedDocumentBytesConfig embeddedDocumentBytesConfig,
Metadata metadata) {
String embeddedIdString = embeddedDocumentBytesConfig.getZeroPadName() > 0 ?
StringUtils.leftPad(Integer.toString(embeddedId),
embeddedDocumentBytesConfig.getZeroPadName(), "0") :
Integer.toString(embeddedId);
StringBuilder emitKey = new StringBuilder();
if (StringUtils.isBlank(embeddedDocumentBytesConfig.getEmitKeyBase())) {
emitKey.append(containerEmitKey);
emitKey.append("/")
.append(FilenameUtils.getName(containerEmitKey));
} else {
emitKey.append(embeddedDocumentBytesConfig.getEmitKeyBase());
}
//at this point the emit key has the full "file" part, now we
//add the embedded id prefix, the embedded id string and then maybe
//the file extension
emitKey.append(embeddedDocumentBytesConfig.getEmbeddedIdPrefix())
.append(embeddedIdString);
if (embeddedDocumentBytesConfig.getSuffixStrategy().equals(
EmbeddedDocumentBytesConfig.SUFFIX_STRATEGY.EXISTING)) {
String fName = metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY);
String suffix = FilenameUtils.getSuffixFromPath(fName);
suffix = suffix.toLowerCase(Locale.US);
emitKey.append(suffix);
}
return emitKey.toString();
}
@Override
public void add(int id, Metadata metadata, InputStream bytes) throws IOException {
ids.add(id);
}
@Override
public List<Integer> getIds() {
return ids;
}
}