blob: 6d8c3b3be03dc4324bfba7d29d787a0b7fabe941 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jackrabbit.oak.plugins.index.solr.index;
import java.io.IOException;
import java.io.InputStream;
import java.util.LinkedList;
import java.util.List;
import org.apache.jackrabbit.JcrConstants;
import org.apache.jackrabbit.oak.api.Blob;
import org.apache.jackrabbit.oak.api.CommitFailedException;
import org.apache.jackrabbit.oak.api.PropertyState;
import org.apache.jackrabbit.oak.api.Type;
import org.apache.jackrabbit.oak.commons.PathUtils;
import org.apache.jackrabbit.oak.plugins.index.IndexEditor;
import org.apache.jackrabbit.oak.plugins.index.IndexUpdateCallback;
import org.apache.jackrabbit.oak.plugins.index.solr.configuration.OakSolrConfiguration;
import org.apache.jackrabbit.oak.spi.commit.Editor;
import org.apache.jackrabbit.oak.spi.state.NodeState;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.WriteOutContentHandler;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.apache.jackrabbit.JcrConstants.JCR_DATA;
import static org.apache.jackrabbit.oak.commons.PathUtils.concat;
import static org.apache.jackrabbit.oak.plugins.index.solr.util.SolrUtils.getSortingField;
import static org.apache.jackrabbit.oak.plugins.index.solr.util.SolrUtils.partialEscape;
/**
* Index editor for keeping a Solr index up to date.
*/
class SolrIndexEditor implements IndexEditor {
private final Logger log = LoggerFactory.getLogger(getClass());
/**
* Parent editor, or {@code null} if this is the root editor.
*/
private final SolrIndexEditor parent;
/**
* Name of this node, or {@code null} for the root node.
*/
private final String name;
/**
* Path of this editor, built lazily in {@link #getPath()}.
*/
private String path;
private final SolrClient solrServer;
private final OakSolrConfiguration configuration;
private boolean propertiesChanged = false;
private final IndexUpdateCallback updateCallback;
private static final Parser parser = new AutoDetectParser();
SolrIndexEditor(
SolrClient solrServer,
OakSolrConfiguration configuration,
IndexUpdateCallback callback) {
this.parent = null;
this.name = null;
this.path = "/";
this.solrServer = solrServer;
this.configuration = configuration;
this.updateCallback = callback;
}
private SolrIndexEditor(SolrIndexEditor parent, String name) {
this.parent = parent;
this.name = name;
this.path = null;
this.solrServer = parent.solrServer;
this.configuration = parent.configuration;
this.updateCallback = parent.updateCallback;
}
String getPath() {
if (path == null) { // => parent != null
path = concat(parent.getPath(), name);
}
return path;
}
@Override
public void enter(NodeState before, NodeState after) {
}
@Override
public void leave(NodeState before, NodeState after)
throws CommitFailedException {
if (propertiesChanged || !before.exists()) {
updateCallback.indexUpdate();
try {
solrServer.add(docFromState(after));
} catch (SolrServerException e) {
throw new CommitFailedException(
"Solr", 2, "Failed to add a document to Solr", e);
} catch (IOException e) {
throw new CommitFailedException(
"Solr", 6, "Failed to send data to Solr", e);
}
}
if (parent == null) {
try {
commitByPolicy(solrServer, configuration.getCommitPolicy());
} catch (SolrServerException e) {
throw new CommitFailedException(
"Solr", 3, "Failed to commit changes to Solr", e);
} catch (IOException e) {
throw new CommitFailedException(
"Solr", 6, "Failed to send data to Solr", e);
}
}
}
private void commitByPolicy(SolrClient solrServer, OakSolrConfiguration.CommitPolicy commitPolicy) throws IOException, SolrServerException {
switch (commitPolicy) {
case HARD: {
solrServer.commit();
break;
}
case SOFT: {
solrServer.commit(false, false, true);
break;
}
case AUTO: {
break;
}
}
}
@Override
public void propertyAdded(PropertyState after) {
propertiesChanged = true;
}
@Override
public void propertyChanged(PropertyState before, PropertyState after) {
propertiesChanged = true;
}
@Override
public void propertyDeleted(PropertyState before) {
propertiesChanged = true;
}
@Override
public Editor childNodeAdded(String name, NodeState after) {
return new SolrIndexEditor(this, name);
}
@Override
public Editor childNodeChanged(
String name, NodeState before, NodeState after) {
return new SolrIndexEditor(this, name);
}
@Override
public Editor childNodeDeleted(String name, NodeState before)
throws CommitFailedException {
String path = partialEscape(PathUtils.concat(getPath(), name)).toString();
try {
String formattedQuery = String.format(
"%s:%s*", configuration.getPathField(), path);
if (log.isDebugEnabled()) {
log.debug("deleting by query {}", formattedQuery);
}
solrServer.deleteByQuery(formattedQuery);
updateCallback.indexUpdate();
} catch (SolrServerException e) {
throw new CommitFailedException(
"Solr", 5, "Failed to remove documents from Solr", e);
} catch (IOException e) {
throw new CommitFailedException(
"Solr", 6, "Failed to send data to Solr", e);
}
return null; // no need to recurse down the removed subtree
}
private SolrInputDocument docFromState(NodeState state) {
SolrInputDocument inputDocument = new SolrInputDocument();
String path = getPath();
inputDocument.addField(configuration.getPathField(), path);
inputDocument.addField(configuration.getPathDepthField(), PathUtils.getDepth(path));
if (configuration.collapseJcrContentNodes()) {
int jcrContentIndex = path.lastIndexOf(JcrConstants.JCR_CONTENT);
if (jcrContentIndex >= 0) {
int index = jcrContentIndex + JcrConstants.JCR_CONTENT.length();
String collapsedPath = path.substring(0, index);
inputDocument.addField(configuration.getCollapsedPathField(), collapsedPath);
}
}
for (PropertyState property : state.getProperties()) {
if ((configuration.getUsedProperties().size() > 0 && configuration.getUsedProperties().contains(property.getName()))
|| !configuration.getIgnoredProperties().contains(property.getName())) {
// try to get the field to use for this property from configuration
String fieldName = configuration.getFieldNameFor(property.getType());
Object fieldValue;
if (fieldName != null) {
fieldValue = property.getValue(property.getType());
} else {
fieldName = property.getName();
if (Type.BINARY.tag() == property.getType().tag()) {
fieldValue = extractTextValues(property, state);
} else if (property.isArray()) {
fieldValue = property.getValue(Type.STRINGS);
} else {
fieldValue = property.getValue(Type.STRING);
}
}
// add property field
inputDocument.addField(fieldName, fieldValue);
Object sortValue;
if (fieldValue instanceof Iterable) {
Iterable values = (Iterable) fieldValue;
StringBuilder builder = new StringBuilder();
String stringValue = null;
for (Object value : values) {
builder.append(value);
if (builder.length() > 1024) {
stringValue = builder.substring(0, 1024);
break;
}
}
if (stringValue == null) {
stringValue = builder.toString();
}
sortValue = stringValue;
} else {
if (fieldValue.toString().length() > 1024) {
sortValue = fieldValue.toString().substring(0, 1024);
} else {
sortValue = fieldValue;
}
}
// add sort field
inputDocument.addField(getSortingField(property.getType().tag(), property.getName()), sortValue);
}
}
return inputDocument;
}
private List<String> extractTextValues(
PropertyState property, NodeState state) {
List<String> values = new LinkedList<String>();
Metadata metadata = new Metadata();
if (JCR_DATA.equals(property.getName())) {
String type = state.getString(JcrConstants.JCR_MIMETYPE);
if (type != null) { // not mandatory
metadata.set(Metadata.CONTENT_TYPE, type);
}
String encoding = state.getString(JcrConstants.JCR_ENCODING);
if (encoding != null) { // not mandatory
metadata.set(Metadata.CONTENT_ENCODING, encoding);
}
}
for (Blob v : property.getValue(Type.BINARIES)) {
values.add(parseStringValue(v, metadata));
}
return values;
}
private String parseStringValue(Blob v, Metadata metadata) {
WriteOutContentHandler handler = new WriteOutContentHandler();
try {
InputStream stream = v.getNewStream();
try {
parser.parse(stream, handler, metadata, new ParseContext());
} finally {
stream.close();
}
} catch (LinkageError e) {
// Capture and ignore errors caused by extraction libraries
// not being present. This is equivalent to disabling
// selected media types in configuration, so we can simply
// ignore these errors.
} catch (Throwable t) {
// Capture and report any other full text extraction problems.
// The special STOP exception is used for normal termination.
if (!handler.isWriteLimitReached(t)) {
log.debug("Failed to extract text from a binary property: "
+ " This is a fairly common case, and nothing to"
+ " worry about. The stack trace is included to"
+ " help improve the text extraction feature.", t);
return "TextExtractionError";
}
}
return handler.toString();
}
}