blob: 3444ecab1193ebab178078a4b808746609c64f85 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.KnnVectorField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.util.VectorUtil;
public class TestKnnVectorFieldExistsQuery extends LuceneTestCase {
public void testRandom() throws IOException {
int iters = atLeast(10);
for (int iter = 0; iter < iters; ++iter) {
try (Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir)) {
int numDocs = atLeast(100);
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
boolean hasValue = random().nextBoolean();
if (hasValue) {
doc.add(new KnnVectorField("vector", randomVector(5)));
doc.add(new StringField("has_value", "yes", Store.NO));
}
doc.add(new StringField("field", "value", Store.NO));
iw.addDocument(doc);
}
if (random().nextBoolean()) {
iw.deleteDocuments(new TermQuery(new Term("f", "no")));
}
iw.commit();
try (IndexReader reader = iw.getReader()) {
IndexSearcher searcher = newSearcher(reader);
assertSameMatches(
searcher,
new TermQuery(new Term("has_value", "yes")),
new KnnVectorFieldExistsQuery("vector"),
false);
float boost = random().nextFloat() * 10;
assertSameMatches(
searcher,
new BoostQuery(
new ConstantScoreQuery(new TermQuery(new Term("has_value", "yes"))), boost),
new BoostQuery(new KnnVectorFieldExistsQuery("vector"), boost),
true);
}
}
}
}
public void testMissingField() throws IOException {
try (Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir)) {
iw.addDocument(new Document());
iw.commit();
try (IndexReader reader = iw.getReader()) {
IndexSearcher searcher = newSearcher(reader);
assertEquals(0, searcher.count(new KnnVectorFieldExistsQuery("f")));
}
}
}
public void testAllDocsHaveField() throws IOException {
try (Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir)) {
Document doc = new Document();
doc.add(new KnnVectorField("vector", randomVector(3)));
iw.addDocument(doc);
iw.commit();
try (IndexReader reader = iw.getReader()) {
IndexSearcher searcher = newSearcher(reader);
assertEquals(1, searcher.count(new KnnVectorFieldExistsQuery("vector")));
}
}
}
public void testConjunction() throws IOException {
try (Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir)) {
int numDocs = atLeast(100);
int numVectors = 0;
boolean allDocsHaveVector = random().nextBoolean();
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
if (allDocsHaveVector || random().nextBoolean()) {
doc.add(new KnnVectorField("vector", randomVector(5)));
numVectors++;
}
doc.add(new StringField("field", "value" + (i % 2), Store.NO));
iw.addDocument(doc);
}
try (IndexReader reader = iw.getReader()) {
IndexSearcher searcher = newSearcher(reader);
Occur occur = random().nextBoolean() ? Occur.MUST : Occur.FILTER;
BooleanQuery booleanQuery =
new BooleanQuery.Builder()
.add(new TermQuery(new Term("field", "value1")), occur)
.add(new KnnVectorFieldExistsQuery("vector"), Occur.FILTER)
.build();
int count = searcher.count(booleanQuery);
assertTrue(count <= numVectors);
if (allDocsHaveVector) {
assertEquals(numDocs / 2, count);
}
}
}
}
public void testFieldExistsButNoDocsHaveField() throws IOException {
try (Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir)) {
// 1st segment has the field, but 2nd one does not
Document doc = new Document();
doc.add(new KnnVectorField("vector", randomVector(3)));
iw.addDocument(doc);
iw.commit();
iw.addDocument(new Document());
iw.commit();
try (IndexReader reader = iw.getReader()) {
IndexSearcher searcher = newSearcher(reader);
assertEquals(1, searcher.count(new KnnVectorFieldExistsQuery("vector")));
}
}
}
private float[] randomVector(int dim) {
float[] v = new float[dim];
for (int i = 0; i < dim; i++) {
v[i] = random().nextFloat();
}
VectorUtil.l2normalize(v);
return v;
}
private void assertSameMatches(IndexSearcher searcher, Query q1, Query q2, boolean scores)
throws IOException {
final int maxDoc = searcher.getIndexReader().maxDoc();
final TopDocs td1 = searcher.search(q1, maxDoc, scores ? Sort.RELEVANCE : Sort.INDEXORDER);
final TopDocs td2 = searcher.search(q2, maxDoc, scores ? Sort.RELEVANCE : Sort.INDEXORDER);
assertEquals(td1.totalHits.value, td2.totalHits.value);
for (int i = 0; i < td1.scoreDocs.length; ++i) {
assertEquals(td1.scoreDocs[i].doc, td2.scoreDocs[i].doc);
if (scores) {
assertEquals(td1.scoreDocs[i].score, td2.scoreDocs[i].score, 10e-7);
}
}
}
}