blob: 6556369a27740ada9c663cb4aaf7b874db6ec161 [file] [log] [blame]
From 3f29eff2a88d35178d5977be75d0a17589bf4d61 Mon Sep 17 00:00:00 2001
From: Martijn van Groningen <martijn.v.groningen@gmail.com>
Date: Tue, 31 May 2016 18:10:43 +0200
Subject: [PATCH 1/1] LUCENE-7304
---
.../lucene/search/join/ParentChildBlock.java | 145 ++++++++++
.../join/ToParentDocValuesBlockJoinQuery.java | 306 +++++++++++++++++++++
.../lucene/search/join/TestDocValuesBlockJoin.java | 159 +++++++++++
3 files changed, 610 insertions(+)
create mode 100644 lucene/join/src/java/org/apache/lucene/search/join/ParentChildBlock.java
create mode 100644 lucene/join/src/java/org/apache/lucene/search/join/ToParentDocValuesBlockJoinQuery.java
create mode 100644 lucene/join/src/test/org/apache/lucene/search/join/TestDocValuesBlockJoin.java
diff --git a/lucene/join/src/java/org/apache/lucene/search/join/ParentChildBlock.java b/lucene/join/src/java/org/apache/lucene/search/join/ParentChildBlock.java
new file mode 100644
index 0000000..616cc87
--- /dev/null
+++ b/lucene/join/src/java/org/apache/lucene/search/join/ParentChildBlock.java
@@ -0,0 +1,145 @@
+package org.apache.lucene.search.join;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.ArrayList;
+import java.util.Deque;
+import java.util.LinkedList;
+import java.util.List;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.index.IndexableField;
+
+/**
+ * Helper class to index parent and child documents into the right order as a single block
+ */
+public class ParentChildBlock {
+
+ private final String typeField;
+ private final String offsetFieldToFirstChildField;
+
+ private final Level root = new Level(0);
+ private final Deque<Level> levels = new LinkedList<>();
+
+ public ParentChildBlock(String typeField, String offsetFieldToFirstChildField, String parentType) {
+ this.typeField = typeField;
+ this.offsetFieldToFirstChildField = offsetFieldToFirstChildField;
+ root.type(parentType);
+ levels.add(root);
+ }
+
+ public List<Document> flatten() {
+ List<Document> documents = new ArrayList<>();
+ root.flatten(documents);
+ return documents;
+ }
+
+ public void nextLevel(Iterable<? extends IndexableField> document, String type) {
+ Level level = levels.peekLast().nextLevel(document);
+ level.type(type);
+ levels.add(level);
+ }
+
+ public void nextType(String type) {
+ levels.peekLast().type(type);
+ }
+
+ public void previousLevel() {
+ levels.removeLast();
+ }
+
+ public void addLeafDocuments(Iterable<? extends IndexableField> document) {
+ levels.peekLast().addChildDocument(document);
+ }
+
+ private class Level {
+
+ final int depth;
+ List<DocBlock> childDocs = new ArrayList<>();
+
+ String type;
+
+ private Level(int depth) {
+ this.depth = depth;
+ }
+
+ public void type(String type) {
+ this.type = type;
+ }
+
+ public void addChildDocument(Iterable<? extends IndexableField> document) {
+ Document copy = new Document();
+ copy.add(new StringField(typeField, type, Field.Store.NO));
+ document.forEach(copy::add);
+ childDocs.add(new DocBlock(null, copy));
+ }
+
+ public Level nextLevel(Iterable<? extends IndexableField> document) {
+ Level nextLevel = new Level(depth + 1);
+ Document copy = new Document();
+ copy.add(new StringField(typeField, type, Field.Store.NO));
+ document.forEach(copy::add);
+ childDocs.add(new DocBlock(nextLevel, copy));
+ return nextLevel;
+ }
+
+ void flatten(List<Document> docs) {
+ for (DocBlock docBlock : childDocs) {
+ if (docBlock.level != null) {
+ int numChildDocs = docBlock.level.numDocs();
+ docBlock.level.flatten(docs);
+ docBlock.document.add(new NumericDocValuesField(offsetFieldToFirstChildField, numChildDocs));
+ }
+ docs.add(docBlock.document);
+ }
+ }
+
+ int numDocs() {
+ int numDocs = 0;
+ for (DocBlock childDoc : childDocs) {
+ numDocs += childDoc.numDocs();
+ }
+ return numDocs;
+ }
+
+ private class DocBlock {
+
+ final Level level;
+ final Document document;
+
+ private DocBlock(Level level, Document document) {
+ this.level = level;
+ this.document = document;
+ }
+
+ int numDocs() {
+ int numDocs = 1;
+ if (level != null) {
+ numDocs += level.numDocs();
+ }
+ return numDocs;
+ }
+
+ }
+
+ }
+
+}
diff --git a/lucene/join/src/java/org/apache/lucene/search/join/ToParentDocValuesBlockJoinQuery.java b/lucene/join/src/java/org/apache/lucene/search/join/ToParentDocValuesBlockJoinQuery.java
new file mode 100644
index 0000000..4f66767
--- /dev/null
+++ b/lucene/join/src/java/org/apache/lucene/search/join/ToParentDocValuesBlockJoinQuery.java
@@ -0,0 +1,306 @@
+package org.apache.lucene.search.join;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Objects;
+import java.util.Set;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.ConstantScoreScorer;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.Explanation;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.Weight;
+
+public final class ToParentDocValuesBlockJoinQuery extends Query {
+
+ private final String offsetFieldToFirstChildField;
+ private final Query parentQuery;
+ private final Query childQuery;
+ private final ScoreMode scoreMode;
+
+ public ToParentDocValuesBlockJoinQuery(String offsetFieldToFirstChildField, String typeField, String parentType, String childType, Query childQuery, ScoreMode scoreMode) {
+ this.offsetFieldToFirstChildField = offsetFieldToFirstChildField;
+ this.parentQuery = new TermQuery(new Term(typeField, parentType));
+ BooleanQuery.Builder bq = new BooleanQuery.Builder();
+ bq.add(new TermQuery(new Term(typeField, childType)), BooleanClause.Occur.FILTER);
+ bq.add(childQuery, BooleanClause.Occur.MUST);
+ this.childQuery = bq.build();
+ this.scoreMode = scoreMode;
+ }
+
+ private ToParentDocValuesBlockJoinQuery(String offsetFieldToFirstChildField, Query parentQuery, Query childQuery, ScoreMode scoreMode) {
+ this.offsetFieldToFirstChildField = offsetFieldToFirstChildField;
+ this.parentQuery = parentQuery;
+ this.childQuery = childQuery;
+ this.scoreMode = scoreMode;
+ }
+
+ @Override
+ public Query rewrite(IndexReader reader) throws IOException {
+ Query rewrittenChildQuery = childQuery.rewrite(reader);
+ if (rewrittenChildQuery != childQuery) {
+ return new ToParentDocValuesBlockJoinQuery(offsetFieldToFirstChildField, parentQuery, rewrittenChildQuery, scoreMode);
+ } else {
+ return super.rewrite(reader);
+ }
+ }
+
+ @Override
+ public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
+ boolean requireScores = needsScores && scoreMode != ScoreMode.None;
+ Weight parentWeight = parentQuery.createWeight(searcher, false);
+ Weight childWeight = childQuery.createWeight(searcher, requireScores);
+ return new Weight(this) {
+ @Override
+ public void extractTerms(Set<Term> terms) {
+ childWeight.extractTerms(terms);
+ }
+
+ @Override
+ public Explanation explain(LeafReaderContext context, int doc) throws IOException {
+ return Explanation.noMatch("not implemented yet");
+ }
+
+ @Override
+ public float getValueForNormalization() throws IOException {
+ return childWeight.getValueForNormalization();
+ }
+
+ @Override
+ public void normalize(float norm, float boost) {
+ childWeight.normalize(norm, boost);
+ }
+
+ @Override
+ public Scorer scorer(LeafReaderContext context) throws IOException {
+ NumericDocValues offsetToFirstChildDV = context.reader().getNumericDocValues(offsetFieldToFirstChildField);
+ if (offsetToFirstChildDV == null) {
+ return null;
+ }
+
+ Scorer childScorer = childWeight.scorer(context);
+ if (childScorer == null) {
+ return null;
+ }
+
+ Scorer parentScorer = parentWeight.scorer(context);
+ if (parentScorer == null) {
+ return null;
+ }
+
+ DocIdSetIterator parentIterator = parentScorer.iterator();
+
+ if (requireScores) {
+ return new BlockJoinScorer(this, parentIterator, childScorer, offsetToFirstChildDV);
+ } else {
+ return new ConstantScoreScorer(this, 1f, new NoneScoringBlockJoinIterator(parentIterator, childScorer.iterator(), offsetToFirstChildDV));
+ }
+ }
+ };
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if ((o instanceof ToParentDocValuesBlockJoinQuery) == false) {
+ return false;
+ }
+ ToParentDocValuesBlockJoinQuery other = (ToParentDocValuesBlockJoinQuery) o;
+ return Objects.equals(childQuery, other.childQuery) &&
+ Objects.equals(offsetFieldToFirstChildField, other.offsetFieldToFirstChildField) &&
+ Objects.equals(scoreMode, other.scoreMode);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(childQuery, offsetFieldToFirstChildField, scoreMode);
+ }
+
+ @Override
+ public String toString(String field) {
+ return null;
+ }
+
+ class BlockJoinScorer extends Scorer {
+
+ final DocIdSetIterator parentIterator;
+ final Scorer childScorer;
+ final DocIdSetIterator childIt;
+ final NumericDocValues offsetToFirstChildDV;
+
+ int parentDocId = -1;
+ int childDocId;
+ float score;
+ int freq;
+ int numChildDocs;
+
+ public BlockJoinScorer(Weight weight, DocIdSetIterator parentIterator, Scorer childScorer, NumericDocValues offsetToFirstChildDV) throws IOException {
+ super(weight);
+ this.parentIterator = parentIterator;
+ this.childScorer = childScorer;
+ this.childIt = childScorer.iterator();
+ this.childDocId = childIt.nextDoc();
+ this.offsetToFirstChildDV = offsetToFirstChildDV;
+ }
+
+ @Override
+ public int docID() {
+ return parentDocId;
+ }
+
+ @Override
+ public float score() throws IOException {
+ return score;
+ }
+
+ @Override
+ public int freq() throws IOException {
+ return freq;
+ }
+
+ @Override
+ public DocIdSetIterator iterator() {
+ return new DocIdSetIterator() {
+
+ @Override
+ public int docID() {
+ return parentDocId;
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ if (childDocId == NO_MORE_DOCS) {
+ return parentDocId = NO_MORE_DOCS;
+ }
+
+ parentDocId = parentIterator.advance(childDocId);
+ numChildDocs = 0;
+ freq = 0;
+ switch (scoreMode) {
+ case Min:
+ score = Float.MAX_VALUE;
+ break;
+ case Max:
+ score = Float.MIN_VALUE;
+ break;
+ default:
+ score = 0f;
+ break;
+ }
+
+ do {
+ numChildDocs++;
+ freq += childScorer.freq();
+ switch (scoreMode) {
+ case Min:
+ score = Math.min(score, childScorer.score());
+ break;
+ case Max:
+ score = Math.max(score, childScorer.score());
+ break;
+ case Total:
+ case Avg:
+ score += childScorer.score();
+ break;
+ default:
+ throw new UnsupportedOperationException("unsupported score_mode [" + scoreMode + "]");
+ }
+ childDocId = childIt.nextDoc();
+ } while (childDocId < parentDocId);
+ if (scoreMode == ScoreMode.Avg) {
+ score /= numChildDocs;
+ }
+ return parentDocId;
+ }
+
+ @Override
+ public int advance(int parentTarget) throws IOException {
+ int firstChildOffset = (int) offsetToFirstChildDV.get(parentTarget);
+ int childTarget = parentTarget - firstChildOffset;
+ if (childTarget > childDocId) {
+ childDocId = childIt.advance(childTarget);
+ }
+ return nextDoc();
+ }
+
+ @Override
+ public long cost() {
+ return childIt.cost();
+ }
+
+ };
+ }
+ }
+
+ class NoneScoringBlockJoinIterator extends DocIdSetIterator {
+
+ final DocIdSetIterator parentIt;
+ final DocIdSetIterator childIt;
+ final NumericDocValues offsetToFirstChildDV;
+
+ int parentDocId = -1;
+ int childDocId;
+
+ NoneScoringBlockJoinIterator(DocIdSetIterator parentIt, DocIdSetIterator childIt, NumericDocValues offsetToFirstChildDV) throws IOException {
+ this.parentIt = parentIt;
+ this.childIt = childIt;
+ this.offsetToFirstChildDV = offsetToFirstChildDV;
+ this.childDocId = childIt.nextDoc();
+ }
+
+ @Override
+ public int docID() {
+ return parentDocId;
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ if (childDocId == NO_MORE_DOCS) {
+ return parentDocId = NO_MORE_DOCS;
+ }
+ parentDocId = parentIt.advance(childDocId);
+ childDocId = childIt.advance(parentDocId);
+ return parentDocId;
+ }
+
+ @Override
+ public int advance(int parentTarget) throws IOException {
+ int firstChildOffset = (int) offsetToFirstChildDV.get(parentTarget);
+ int childTarget = parentTarget - firstChildOffset;
+ if (childTarget > childDocId) {
+ childDocId = childIt.advance(childTarget);
+ }
+ return nextDoc();
+ }
+
+ @Override
+ public long cost() {
+ return childIt.cost();
+ }
+ }
+
+}
diff --git a/lucene/join/src/test/org/apache/lucene/search/join/TestDocValuesBlockJoin.java b/lucene/join/src/test/org/apache/lucene/search/join/TestDocValuesBlockJoin.java
new file mode 100644
index 0000000..8b5bf44
--- /dev/null
+++ b/lucene/join/src/test/org/apache/lucene/search/join/TestDocValuesBlockJoin.java
@@ -0,0 +1,159 @@
+package org.apache.lucene.search.join;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.IntPoint;
+import org.apache.lucene.document.StoredField;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.LuceneTestCase;
+
+import static org.apache.lucene.search.BooleanClause.Occur.MUST;
+
+public class TestDocValuesBlockJoin extends LuceneTestCase {
+
+ public void testSimple() throws Exception {
+ final Directory dir = newDirectory();
+ final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+
+ ParentChildBlock block = new ParentChildBlock(
+ "_type", "offset_to_first_child", "resume"
+ );
+ block.nextLevel(makeResume("Lisa", "United Kingdom"), "job");
+ block.addLeafDocuments(makeJob("java", 2007));
+ block.addLeafDocuments(makeJob("python", 2010));
+ block.previousLevel();
+ w.addDocuments(block.flatten());
+
+ block = new ParentChildBlock(
+ "_type", "offset_to_first_child", "resume"
+ );
+ block.nextLevel(makeResume("Frank", "United States"), "job");
+ block.addLeafDocuments(makeJob("ruby", 2005));
+ block.addLeafDocuments(makeJob("java", 2006));
+ block.previousLevel();
+ w.addDocuments(block.flatten());
+
+ block = new ParentChildBlock(
+ "_type", "offset_to_first_child", "resume"
+ );
+ block.nextLevel(makeResume("Frits", "Germany"), "job");
+ block.addLeafDocuments(makeJob("ruby", 2013));
+ block.addLeafDocuments(makeJob("go", 2014));
+ block.addLeafDocuments(makeJob("rust", 2015));
+ block.previousLevel();
+ w.addDocuments(block.flatten());
+
+ block = new ParentChildBlock(
+ "_type", "offset_to_first_child", "resume"
+ );
+ block.nextLevel(makeResume("Jim", "Australia"), "job");
+ block.addLeafDocuments(makeJob("c", 1995));
+ block.addLeafDocuments(makeJob("c++", 1999));
+ block.nextLevel(makeJob("java", 2000), "endorsement");
+ block.addLeafDocuments(makeEndorsement("Rob", "ceo"));
+ block.previousLevel();
+ block.previousLevel();
+ w.addDocuments(block.flatten());
+
+ block = new ParentChildBlock(
+ "_type", "offset_to_first_child", "resume"
+ );
+ block.nextLevel(makeResume("Theodor", "Canada"), "job");
+ block.nextLevel(makeJob("cobol", 1979), "endorsement");
+ block.addLeafDocuments(makeEndorsement("Tim", "coworker"));
+ block.previousLevel();
+ block.addLeafDocuments(makeJob("c++", 1992));
+ block.nextLevel(makeJob("java", 1995), "endorsement");
+ block.addLeafDocuments(makeEndorsement("Mike", "cto"));
+ block.previousLevel();
+ block.previousLevel();
+ w.addDocuments(block.flatten());
+
+ IndexReader r = w.getReader();
+ w.close();
+ IndexSearcher s = newSearcher(r, false);
+
+ ScoreMode scoreMode = ScoreMode.values()[random().nextInt(ScoreMode.values().length)];
+ ToParentDocValuesBlockJoinQuery joinQuery =
+ new ToParentDocValuesBlockJoinQuery("offset_to_first_child", "_type", "resume", "job", new TermQuery(new Term("skill", "java")), scoreMode);
+
+ TopDocs result = s.search(joinQuery, 10);
+ assertEquals(4, result.totalHits);
+ assertEquals(2, result.scoreDocs[0].doc);
+ assertEquals(5, result.scoreDocs[1].doc);
+ assertEquals(14, result.scoreDocs[2].doc);
+ assertEquals(20, result.scoreDocs[3].doc);
+
+ BooleanQuery.Builder fullQuery = new BooleanQuery.Builder();
+ Query parentQuery = new TermQuery(new Term("country", "Canada"));
+ fullQuery.add(new BooleanClause(parentQuery, MUST));
+ fullQuery.add(new BooleanClause(joinQuery, MUST));
+ result = s.search(fullQuery.build(), 10);
+ assertEquals(1, result.totalHits);
+ assertEquals(20, result.scoreDocs[0].doc);
+
+ Query childJoinQuery = new ToParentDocValuesBlockJoinQuery("offset_to_first_child", "_type", "job", "endorsement",
+ new TermQuery(new Term("role", "ceo")), scoreMode);
+ joinQuery = new ToParentDocValuesBlockJoinQuery("offset_to_first_child", "_type", "resume", "job",
+ new BooleanQuery.Builder()
+ .add(new TermQuery(new Term("skill", "java")), MUST)
+ .add(childJoinQuery, MUST)
+ .build(), scoreMode);
+
+ result = s.search(joinQuery, 10);
+ assertEquals(1, result.totalHits);
+ assertEquals(14, result.scoreDocs[0].doc);
+
+ r.close();
+ dir.close();
+ }
+
+ private static Document makeResume(String name, String country) {
+ Document resume = new Document();
+ resume.add(newStringField("name", name, Field.Store.NO));
+ resume.add(newStringField("country", country, Field.Store.NO));
+ return resume;
+ }
+
+ private static Document makeJob(String skill, int year) {
+ Document job = new Document();
+ job.add(newStringField("skill", skill, Field.Store.NO));
+ job.add(new IntPoint("year", year));
+ job.add(new StoredField("year", year));
+ return job;
+ }
+
+ private static Document makeEndorsement(String name, String role) {
+ Document endorsement = new Document();
+ endorsement.add(newStringField("name", name, Field.Store.NO));
+ endorsement.add(newStringField("role", role, Field.Store.NO));
+ return endorsement;
+ }
+
+}
--
2.7.4 (Apple Git-66)