blob: ac89419228d14b224b0814675e2e3ad2eae72879 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.response.transform;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.Multimap;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.join.BitSetProducer;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrException;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.search.BitsFilteredPostingsEnum;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SolrReturnFields;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.apache.solr.response.transform.ChildDocTransformerFactory.NUM_SEP_CHAR;
import static org.apache.solr.response.transform.ChildDocTransformerFactory.PATH_SEP_CHAR;
import static org.apache.solr.schema.IndexSchema.NEST_PATH_FIELD_NAME;
class ChildDocTransformer extends DocTransformer {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private static final String ANON_CHILD_KEY = "_childDocuments_";
private final String name;
private final BitSetProducer parentsFilter; // if null; resolve parent via uniqueKey instead
private final DocSet childDocSet;
private final int limit;
private final boolean isNestedSchema;
private final SolrReturnFields childReturnFields;
private String[] extraRequestedFields;
ChildDocTransformer(String name, BitSetProducer parentsFilter, DocSet childDocSet,
SolrReturnFields returnFields, boolean isNestedSchema, int limit,
String uniqueKeyField) {
this.name = name;
this.parentsFilter = parentsFilter;
this.childDocSet = childDocSet;
this.limit = limit;
this.isNestedSchema = isNestedSchema;
this.childReturnFields = returnFields!=null? returnFields: new SolrReturnFields();
this.extraRequestedFields = parentsFilter == null ? new String[]{uniqueKeyField} : null;
}
@Override
public String getName() {
return name;
}
@Override
public boolean needsSolrIndexSearcher() { return true; }
@Override
public String[] getExtraRequestFields() {
return extraRequestedFields;
}
private int getPrevRootGivenFilter(LeafReaderContext leafReaderContext, int segRootId) throws IOException {
final BitSet segParentsBitSet = parentsFilter.getBitSet(leafReaderContext);
if (segParentsBitSet != null) {
return segRootId == 0 ? -1 : segParentsBitSet.prevSetBit(segRootId - 1);
}
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"Parent filter '" + parentsFilter + "' doesn't match any parent documents");
}
private int getPrevRootGivenId(LeafReaderContext leafReaderContext, int segRootId,
BytesRef idBytes) throws IOException {
final LeafReader reader = leafReaderContext.reader();
final Terms terms = reader.terms(IndexSchema.ROOT_FIELD_NAME); // never returns null here
final TermsEnum iterator = terms.iterator();
if (iterator.seekExact(idBytes)) {
PostingsEnum docs = iterator.postings(null, PostingsEnum.NONE);
docs = BitsFilteredPostingsEnum.wrap(docs, reader.getLiveDocs());
int id = docs.nextDoc();
if (id != DocIdSetIterator.NO_MORE_DOCS) {
assert id <= segRootId : "integrity violation";
return id - 1;
}
}
return segRootId - 1; // thus no child docs
}
@Override
public void transform(SolrDocument rootDoc, int rootDocId) {
// note: this algorithm works if both if we have have _nest_path_ and also if we don't!
try {
// lookup what the *previous* rootDocId is, and figure which segment this is
final SolrIndexSearcher searcher = context.getSearcher();
final List<LeafReaderContext> leaves = searcher.getIndexReader().leaves();
final int seg = ReaderUtil.subIndex(rootDocId, leaves);
final LeafReaderContext leafReaderContext = leaves.get(seg);
final Bits liveDocs = leafReaderContext.reader().getLiveDocs();
final int segBaseId = leafReaderContext.docBase;
final int segRootId = rootDocId - segBaseId;
// can return be -1 and that's okay (happens for very first block)
final int segPrevRootId;
if (parentsFilter != null) {
segPrevRootId = getPrevRootGivenFilter(leafReaderContext, segRootId);
} else {
final IndexSchema schema = searcher.getSchema();
final String idStr = schema.printableUniqueKey(rootDoc);
if (idStr == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"[child] requires fl to include the ID");
}
final BytesRef idBytes = schema.indexableUniqueKey(idStr);
segPrevRootId = getPrevRootGivenId(leafReaderContext, rootDocId, idBytes);
}
if (segPrevRootId == (segRootId - 1)) {
// doc has no children, return fast
return;
}
// we'll need this soon...
final SortedDocValues segPathDocValues = DocValues.getSorted(leafReaderContext.reader(), NEST_PATH_FIELD_NAME);
// passing a different SortedDocValues obj since the child documents which come after are of smaller docIDs,
// and the iterator can not be reversed.
// The root doc is the input document to be transformed, and is not necessarily the root doc of the block of docs.
final String rootDocPath = getPathByDocId(segRootId, DocValues.getSorted(leafReaderContext.reader(), NEST_PATH_FIELD_NAME));
// the key in the Map is the document's ancestors key (one above the parent), while the key in the intermediate
// MultiMap is the direct child document's key(of the parent document)
final Map<String, Multimap<String, SolrDocument>> pendingParentPathsToChildren = new HashMap<>();
final int firstChildId = segBaseId + segPrevRootId + 1;
int matches = 0;
// Loop each child ID up to the parent (exclusive).
for (int docId = firstChildId; docId < rootDocId; ++docId) {
final int segDocId = docId - segBaseId;
// check whether doc is "live"
if (liveDocs != null && !liveDocs.get(segDocId)) {
// doc is not "live"; return fast
continue;
}
// get the path. (note will default to ANON_CHILD_KEY if schema is not nested or empty string if blank)
final String fullDocPath = getPathByDocId(segDocId, segPathDocValues);
if (isNestedSchema && !fullDocPath.startsWith(rootDocPath)) {
// is not a descendant of the transformed doc; return fast.
continue;
}
// Is this doc a direct ancestor of another doc we've seen?
boolean isAncestor = pendingParentPathsToChildren.containsKey(fullDocPath);
// Do we need to do anything with this doc (either ancestor or matched the child query)
if (isAncestor || childDocSet == null || childDocSet.exists(docId)) {
// If we reached the limit, only add if it's an ancestor
if (limit != -1 && matches >= limit && !isAncestor) {
continue;
}
++matches; // note: includes ancestors that are not necessarily in childDocSet
// load the doc
SolrDocument doc = searcher.getDocFetcher().solrDoc(docId, childReturnFields);
if(childReturnFields.getTransformer() != null) {
if(childReturnFields.getTransformer().context == null) {
childReturnFields.getTransformer().setContext(context);
}
childReturnFields.getTransformer().transform(doc, docId);
}
if (isAncestor) {
// if this path has pending child docs, add them.
addChildrenToParent(doc, pendingParentPathsToChildren.remove(fullDocPath)); // no longer pending
}
// get parent path
String parentDocPath = getParentPath(fullDocPath);
String lastPath = getLastPath(fullDocPath);
// put into pending:
// trim path if the doc was inside array, see trimPathIfArrayDoc()
// e.g. toppings#1/ingredients#1 -> outer map key toppings#1
// -> inner MultiMap key ingredients
// or lonely#/lonelyGrandChild# -> outer map key lonely#
// -> inner MultiMap key lonelyGrandChild#
pendingParentPathsToChildren.computeIfAbsent(parentDocPath, x -> ArrayListMultimap.create())
.put(trimLastPoundIfArray(lastPath), doc); // multimap add (won't replace)
}
}
if (pendingParentPathsToChildren.isEmpty()) {
// no child docs matched the child filter; return fast.
return;
}
// only children of parent remain
assert pendingParentPathsToChildren.keySet().size() == 1;
// size == 1, so get the last remaining entry
addChildrenToParent(rootDoc, pendingParentPathsToChildren.values().iterator().next());
} catch (IOException e) {
//TODO DWS: reconsider this unusual error handling approach; shouldn't we rethrow?
log.warn("Could not fetch child documents", e);
rootDoc.put(getName(), "Could not fetch child documents");
}
}
private static void addChildrenToParent(SolrDocument parent, Multimap<String, SolrDocument> children) {
for (String childLabel : children.keySet()) {
addChildrenToParent(parent, children.get(childLabel), childLabel);
}
}
private static void addChildrenToParent(SolrDocument parent, Collection<SolrDocument> children, String cDocsPath) {
// if no paths; we do not need to add the child document's relation to its parent document.
if (cDocsPath.equals(ANON_CHILD_KEY)) {
parent.addChildDocuments(children);
return;
}
// lookup leaf key for these children using path
// depending on the label, add to the parent at the right key/label
String trimmedPath = trimLastPound(cDocsPath);
// if the child doc's path does not end with #, it is an array(same string is returned by ChildDocTransformer#trimLastPound)
if (!parent.containsKey(trimmedPath) && (trimmedPath == cDocsPath)) {
List<SolrDocument> list = new ArrayList<>(children);
parent.setField(trimmedPath, list);
return;
}
// is single value
parent.setField(trimmedPath, ((List)children).get(0));
}
private static String getLastPath(String path) {
int lastIndexOfPathSepChar = path.lastIndexOf(PATH_SEP_CHAR);
if(lastIndexOfPathSepChar == -1) {
return path;
}
return path.substring(lastIndexOfPathSepChar + 1);
}
private static String trimLastPoundIfArray(String path) {
// remove index after last pound sign and if there is an array index e.g. toppings#1 -> toppings
// or return original string if child doc is not in an array ingredients# -> ingredients#
final int indexOfSepChar = path.lastIndexOf(NUM_SEP_CHAR);
if (indexOfSepChar == -1) {
return path;
}
int lastIndex = path.length() - 1;
boolean singleDocVal = indexOfSepChar == lastIndex;
return singleDocVal ? path: path.substring(0, indexOfSepChar);
}
private static String trimLastPound(String path) {
// remove index after last pound sign and index from e.g. toppings#1 -> toppings
int lastIndex = path.lastIndexOf('#');
return lastIndex == -1 ? path : path.substring(0, lastIndex);
}
/**
* Returns the *parent* path for this document.
* Children of the root will yield null.
*/
private static String getParentPath(String currDocPath) {
// chop off leaf (after last '/')
// if child of leaf then return null (special value)
int lastPathIndex = currDocPath.lastIndexOf(PATH_SEP_CHAR);
return lastPathIndex == -1 ? null : currDocPath.substring(0, lastPathIndex);
}
/** Looks up the nest path. If there is none, returns {@link #ANON_CHILD_KEY}. */
private String getPathByDocId(int segDocId, SortedDocValues segPathDocValues) throws IOException {
if (!isNestedSchema) {
return ANON_CHILD_KEY;
}
int numToAdvance = segPathDocValues.docID() == -1 ? segDocId : segDocId - (segPathDocValues.docID());
assert numToAdvance >= 0;
boolean advanced = segPathDocValues.advanceExact(segDocId);
return advanced ? segPathDocValues.binaryValue().utf8ToString(): "";
}
}