Merge branch 'master' into jira/solr14977
diff --git a/.github/workflows/docker-test.yml b/.github/workflows/docker-test.yml
index 1cf2edf..76fa714 100644
--- a/.github/workflows/docker-test.yml
+++ b/.github/workflows/docker-test.yml
@@ -17,6 +17,10 @@
runs-on: ubuntu-latest
+ env:
+ SOLR_DOCKER_IMAGE_REPO: github-pr/solr
+ SOLR_DOCKER_IMAGE_TAG: ${{github.event.number}}
+
steps:
# Setup
- uses: actions/checkout@v2
diff --git a/build.gradle b/build.gradle
index aaf5848..59cddf5 100644
--- a/build.gradle
+++ b/build.gradle
@@ -154,6 +154,9 @@
apply from: file('gradle/generation/kuromoji.gradle')
apply from: file('gradle/generation/nori.gradle')
+// Shared configuration of subprojects containing native code.
+apply from: file('gradle/native/disable-native.gradle')
+
// Additional development aids.
apply from: file('gradle/maven/maven-local.gradle')
apply from: file('gradle/testing/per-project-summary.gradle')
diff --git a/gradle/documentation/render-javadoc.gradle b/gradle/documentation/render-javadoc.gradle
index 35f400a..ff26748 100644
--- a/gradle/documentation/render-javadoc.gradle
+++ b/gradle/documentation/render-javadoc.gradle
@@ -101,7 +101,6 @@
project.tasks.withType(RenderJavadocTask) {
// TODO: fix missing javadocs
javadocMissingLevel = "class"
- javadocMissingIgnore = [ "org.apache.lucene.analysis.classic", "org.apache.lucene.analysis.email" ]
}
}
@@ -150,16 +149,6 @@
project.tasks.withType(RenderJavadocTask) {
// TODO: fix missing @param tags
javadocMissingLevel = "method"
- // TODO: fix missing javadocs
- javadocMissingIgnore = [
- "org.apache.lucene.backward_codecs",
- "org.apache.lucene.backward_codecs.lucene50",
- "org.apache.lucene.backward_codecs.lucene60",
- "org.apache.lucene.backward_codecs.lucene80",
- "org.apache.lucene.backward_codecs.lucene84",
- "org.apache.lucene.backward_codecs.lucene86",
- "org.apache.lucene.backward_codecs.lucene87"
- ]
}
}
@@ -194,10 +183,6 @@
project.tasks.withType(RenderJavadocTask) {
// TODO: fix missing javadocs
javadocMissingLevel = "class"
- javadocMissingIgnore = [
- "org.apache.lucene.sandbox.search",
- "org.apache.lucene.sandbox.document"
- ]
}
}
@@ -205,16 +190,6 @@
project.tasks.withType(RenderJavadocTask) {
// TODO: fix missing javadocs
javadocMissingLevel = "class"
- // TODO: clean up split packages
- javadocMissingIgnore = [
- "org.apache.lucene.search",
- "org.apache.lucene.search.similarity",
- "org.apache.lucene.util",
- "org.apache.lucene.util.fst",
- "org.apache.lucene.store",
- "org.apache.lucene.document",
- "org.apache.lucene.index"
- ]
}
}
@@ -302,7 +277,8 @@
// Fix for Java 11 Javadoc tool that cannot handle split packages between modules correctly.
// (by removing all the packages which are part of lucene-core)
// See: https://issues.apache.org/jira/browse/LUCENE-8738?focusedCommentId=16818106&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-16818106
-configure(subprojects.findAll { it.path.startsWith(':lucene') && it.path != ':lucene:core' }) {
+// LUCENE-9499: This workaround should be applied only to test-framework (we have no split package in other modules).
+configure(project(":lucene:test-framework")) {
project.tasks.withType(RenderJavadocTask) {
doLast {
Set luceneCorePackages = file("${project(':lucene:core').tasks[name].outputDir}/element-list").readLines('UTF-8').toSet();
diff --git a/gradle/generation/snowball.gradle b/gradle/generation/snowball.gradle
index b7b37c4..db81a56 100644
--- a/gradle/generation/snowball.gradle
+++ b/gradle/generation/snowball.gradle
@@ -31,11 +31,11 @@
configure(project(":lucene:analysis:common")) {
ext {
// git commit hash of source code https://github.com/snowballstem/snowball/
- snowballStemmerCommit = "53739a805cfa6c77ff8496dc711dc1c106d987c1"
+ snowballStemmerCommit = "d8cf01ddf37a9c74a78ada44531c08f7952f2a39"
// git commit hash of stopwords https://github.com/snowballstem/snowball-website
- snowballWebsiteCommit = "5a8cf2451d108217585d8e32d744f8b8fd20c711"
+ snowballWebsiteCommit = "ee7cee9bc52f22802f21e94f42d887b0dfa7d2a8"
// git commit hash of test data https://github.com/snowballstem/snowball-data
- snowballDataCommit = "9145f8732ec952c8a3d1066be251da198a8bc792"
+ snowballDataCommit = "35461050d8f81e8aeac26e38f8a8dbf1afb82721"
snowballWorkDir = file("${buildDir}/snowball")
diff --git a/gradle/generation/snowball.patch b/gradle/generation/snowball.patch
index dc62267..a369acb 100644
--- a/gradle/generation/snowball.patch
+++ b/gradle/generation/snowball.patch
@@ -570,7 +570,7 @@
+ )
+)
diff --git a/compiler/generator_java.c b/compiler/generator_java.c
-index 3a18db7..5909f87 100644
+index 2958452..966adb4 100644
--- a/compiler/generator_java.c
+++ b/compiler/generator_java.c
@@ -272,7 +272,7 @@ static void generate_AE(struct generator * g, struct node * p) {
@@ -582,7 +582,7 @@
break;
}
}
-@@ -1138,6 +1138,7 @@ static void generate_class_begin(struct generator * g) {
+@@ -1140,6 +1140,7 @@ static void generate_class_begin(struct generator * g) {
w(g, " {~+~N"
"~N"
"~Mprivate static final long serialVersionUID = 1L;~N"
@@ -590,7 +590,7 @@
"~N");
}
-@@ -1184,7 +1185,7 @@ static void generate_among_table(struct generator * g, struct among * x) {
+@@ -1186,7 +1187,7 @@ static void generate_among_table(struct generator * g, struct among * x) {
if (v->function != 0) {
w(g, ", \"");
write_varname(g, v->function);
@@ -1013,7 +1013,7 @@
public abstract boolean stem();
diff --git a/libstemmer/modules.txt b/libstemmer/modules.txt
-index cb39621..9fe141e 100644
+index b8ec17a..d2c8e61 100644
--- a/libstemmer/modules.txt
+++ b/libstemmer/modules.txt
@@ -10,11 +10,13 @@
@@ -1030,7 +1030,7 @@
finnish UTF_8,ISO_8859_1 finnish,fi,fin
french UTF_8,ISO_8859_1 french,fr,fre,fra
german UTF_8,ISO_8859_1 german,de,ger,deu
-@@ -50,12 +52,12 @@ porter UTF_8,ISO_8859_1 porter english
+@@ -51,12 +53,12 @@ porter UTF_8,ISO_8859_1 porter english
# algorithms are:
#
# german2 - This is a slight modification of the german stemmer.
diff --git a/gradle/help.gradle b/gradle/help.gradle
index e03d724..4c1bf7e 100644
--- a/gradle/help.gradle
+++ b/gradle/help.gradle
@@ -29,6 +29,7 @@
["Git", "help/git.txt", "Git assistance and guides."],
["ValidateLogCalls", "help/validateLogCalls.txt", "How to use logging calls efficiently."],
["IDEs", "help/IDEs.txt", "IDE support."],
+ ["Docker", "help/docker.txt", "Building Solr Docker images."],
]
helpFiles.each { section, path, sectionInfo ->
diff --git a/gradle/native/disable-native.gradle b/gradle/native/disable-native.gradle
new file mode 100644
index 0000000..beb977a
--- /dev/null
+++ b/gradle/native/disable-native.gradle
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This is the master switch to disable all tasks that compile
+// native (cpp) code.
+rootProject.ext {
+ buildNative = propertyOrDefault("build.native", true).toBoolean()
+}
+
+// Explicitly list all projects that should be configured for native extensions.
+// We could scan for projects with a the cpp-library plugin but this is faster.
+def nativeProjects = allprojects.findAll {it.path in [
+ ":lucene:misc:native"
+]}
+
+def javaProjectsWithNativeDeps = allprojects.findAll {it.path in [
+ ":lucene:misc"
+]}
+
+// Set up defaults for projects with native dependencies.
+configure(javaProjectsWithNativeDeps, {
+ configurations {
+ nativeDeps {
+ attributes {
+ attributes.attribute(Usage.USAGE_ATTRIBUTE, objects.named(Usage.class, Usage.NATIVE_RUNTIME))
+ attributes.attribute(CppBinary.OPTIMIZED_ATTRIBUTE, false)
+ }
+ }
+ }
+
+ plugins.withType(JavaPlugin) {
+ ext {
+ testOptions += [
+ [propName: 'tests.native', value: buildNative, description: "Enable tests that require native extensions."]
+ ]
+
+ nativeDepsDir = file("${buildDir}/nativeDeps")
+ }
+
+ // Only copy and attach native deps if native build is enabled.
+ if (buildNative) {
+ task copyNativeDeps(type: Sync) {
+ from configurations.nativeDeps
+ into nativeDepsDir
+ }
+
+ tasks.withType(Test) {
+ dependsOn copyNativeDeps
+ systemProperty "java.library.path", nativeDepsDir
+ }
+ }
+ }
+})
+
+// If native build is disabled we just disable all tasks in the active task set that
+// originate from "native" projects.
+//
+// Perhaps there is a cleaner way to do it but removing their references from
+// settings.gradle would remove them from IDE detection, dependency resolution, etc.
+// This way seems better.
+if (!buildNative) {
+ gradle.taskGraph.whenReady { taskGraph ->
+ def tasks = taskGraph.getAllTasks()
+ tasks.findAll { task -> task.project in nativeProjects }.each { task ->
+ task.enabled = false
+ }
+ }
+}
diff --git a/gradle/testing/randomization/policies/tests.policy b/gradle/testing/randomization/policies/tests.policy
index c6f3f4b..66b0dea 100644
--- a/gradle/testing/randomization/policies/tests.policy
+++ b/gradle/testing/randomization/policies/tests.policy
@@ -62,6 +62,10 @@
permission java.lang.RuntimePermission "getClassLoader";
permission java.lang.RuntimePermission "setContextClassLoader";
+ // Needed for loading native library (lucene:misc:native) in lucene:misc
+ permission java.lang.RuntimePermission "loadLibrary.LuceneNativeIO";
+ permission java.lang.RuntimePermission "writeFileDescriptor";
+
// TestLockFactoriesMultiJVM opens a random port on 127.0.0.1 (port 0 = ephemeral port range):
permission java.net.SocketPermission "127.0.0.1:0", "accept,listen,resolve";
diff --git a/help/docker.txt b/help/docker.txt
new file mode 100644
index 0000000..c86572c
--- /dev/null
+++ b/help/docker.txt
@@ -0,0 +1,58 @@
+Docker Images for Solr
+======================
+
+Solr docker images are built using Palantir's Docker Gradle plugin, https://github.com/palantir/gradle-docker.
+
+Common Inputs
+-------------
+
+The docker image and its tag can be customized via the following options, all accepted via both Environment Variables and Gradle Properties.
+
+Docker Image Repository:
+ Default: "apache/solr"
+ EnvVar: SOLR_DOCKER_IMAGE_REPO
+ Gradle Property: -Psolr.docker.imageRepo
+
+Docker Image Tag:
+ Default: the Solr version, e.g. "9.0.0-SNAPSHOT"
+ EnvVar: SOLR_DOCKER_IMAGE_TAG
+ Gradle Property: -Psolr.docker.imageTag
+
+Docker Image Name: (Use this to explicitly set a whole image name. If given, the image repo and image version options above are ignored.)
+ Default: {image_repo}/{image_tag} (both options provided above, with defaults)
+ EnvVar: SOLR_DOCKER_IMAGE_NAME
+ Gradle Property: -Psolr.docker.imageName
+
+Building
+--------
+
+In order to build the Solr Docker image, run:
+
+gradlew docker
+
+The docker build task accepts the following inputs, in addition to the common inputs listed above:
+
+Base Docker Image: (The docker image used for the "FROM" in the Solr Dockerfile)
+ Default: "openjdk:11-jre-slim"
+ EnvVar: SOLR_DOCKER_BASE_IMAGE
+ Gradle Property: -Psolr.docker.baseImage
+
+Github URL or Mirror: (The URL of github or a mirror of github releases. This is of use when building the docker image behind a firewall that does not have access to external Github.)
+ Default: "github.com"
+ EnvVar: SOLR_DOCKER_GITHUB_URL
+ Gradle Property: -Psolr.docker.githubUrl
+
+Testing
+-------
+
+To test the docker image, run:
+
+gradlew dockerTest
+
+If a custom docker image name was used, via one of the common inputs described above, then the same input must be used while testing.
+
+You can also specify an explicit list of tests to run, or an explicit list of tests to ignore.
+Both inputs are optional, and by default all tests will be run.
+
+gradlew testDocker --tests create_core,demo
+gradlew testDocker --ignore demo-tini,initdb
\ No newline at end of file
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 163c09c..0fc90b4 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -9,6 +9,8 @@
* LUCENE-9322: Vector-valued fields, Lucene90 Codec (Mike Sokolov, Julie Tibshirani, Tomoko Uchida)
+* LUCENE-9004: Approximate nearest vector search via NSW graphs
+
System Requirements
* LUCENE-8738: Move to Java 11 as minimum Java version.
@@ -16,6 +18,10 @@
API Changes
+* LUCENE-8982: Separate out native code to another module to allow cpp
+ build with gradle. This also changes the name of the native "posix-support"
+ library to LuceneNativeIO. (Zachary Chen, Dawid Weiss)
+
* LUCENE-9562: All binary analysis packages (and corresponding
Maven artifacts) with names containing '-analyzers-' have been renamed
to '-analysis-'. (Dawid Weiss)
@@ -73,8 +79,8 @@
in Lucenes IndexWriter. The interface is not sufficient to efficiently
replace the functionality with reasonable efforts. (Simon Willnauer)
-* LUCENE-9317 LUCENE-9318 LUCENE-9319 LUCENE-9558 : Clean up package name conflicts between modules.
- See MIGRATE.md for details. (David Ryan, Tomoko Uchida, Uwe Schindler, Dawid Weiss)
+* LUCENE-9317 LUCENE-9318 LUCENE-9319 LUCENE-9558 LUCENE-9600 : Clean up package name conflicts
+ between modules. See MIGRATE.md for details. (David Ryan, Tomoko Uchida, Uwe Schindler, Dawid Weiss)
Improvements
@@ -156,6 +162,11 @@
* LUCENE-9531: Consolidated CharStream and FastCharStream classes: these have been moved
from each query parser package to org.apache.lucene.queryparser.charstream (Dawid Weiss).
+* LUCENE-9450: Use BinaryDocValues for the taxonomy index instead of StoredFields.
+ Add backwards compatibility tests for the taxonomy index. (Gautam Worah, Michael McCandless)
+
+* LUCENE-9605: Update snowball to d8cf01ddf37a, adds Yiddish stemmer. (Robert Muir)
+
Bug fixes
* LUCENE-8663: NRTCachingDirectory.slowFileExists may open a file while
@@ -216,18 +227,29 @@
* LUCENE-9553: New XYPoint query that accepts an array of XYGeometries. (Ignacio Vera)
+* LUCENE-9378: Doc values now allow configuring how to trade compression for
+ retrieval speed. (Adrien Grand)
+
Improvements
---------------------
* LUCENE-9455: ExitableTermsEnum should sample timeout and interruption
check before calling next(). (Zach Chen via Bruno Roustant)
+* LUCENE-9023: GlobalOrdinalsWithScore should not compute occurrences when the
+ provided min is 1. (Jim Ferenczi)
+
Optimizations
---------------------
* LUCENE-9536: Reduced memory usage for OrdinalMap when a segment has all
values. (Julie Tibshirani via Adrien Grand)
+Other
+---------------------
+
+* SOLR-14995: Update Jetty to 9.4.34 (Mike Drob)
+
======================= Lucene 8.7.0 =======================
API Changes
diff --git a/lucene/MIGRATE.md b/lucene/MIGRATE.md
index c18d418..f36b0e9 100644
--- a/lucene/MIGRATE.md
+++ b/lucene/MIGRATE.md
@@ -1,5 +1,17 @@
# Apache Lucene Migration Guide
+## Packages in misc module are renamed (LUCENE-9600)
+
+Following package names in misc module are renamed.
+
+- o.a.l.document is renamed to o.a.l.misc.document
+- o.a.l.index is renamed to o.a.l.misc.index
+- o.a.l.search is renamed to o.a.l.misc.search
+- o.a.l.store is renamed to o.a.l.misc.store
+- o.a.l.util is renamed to o.a.l.misc.util
+
+Also, o.a.l.document.InetAddressPoint and o.a.l.document.InetAddressRange are moved to core module.
+
## Packages in sandbox module are renamed (LUCENE-9319)
Following package names in sandbox module are renamed.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/classic/package-info.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/classic/package-info.java
new file mode 100644
index 0000000..4b2c471
--- /dev/null
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/classic/package-info.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Fast, general-purpose grammar-based tokenizers.
+ * {@link org.apache.lucene.analysis.classic.ClassicTokenizer ClassicTokenizer}:
+ * this class was formerly (prior to Lucene 3.1) named
+ * <code>StandardTokenizer</code>. (Its tokenization rules are not
+ * based on the Unicode Text Segmentation algorithm.)
+ * {@link org.apache.lucene.analysis.classic.ClassicAnalyzer ClassicAnalyzer} includes
+ * {@link org.apache.lucene.analysis.classic.ClassicTokenizer ClassicTokenizer},
+ * {@link org.apache.lucene.analysis.LowerCaseFilter LowerCaseFilter}
+ * and {@link org.apache.lucene.analysis.StopFilter StopFilter}.
+ */
+package org.apache.lucene.analysis.classic;
\ No newline at end of file
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/classic/package.html b/lucene/analysis/common/src/java/org/apache/lucene/analysis/classic/package.html
deleted file mode 100644
index d978878..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/classic/package.html
+++ /dev/null
@@ -1,37 +0,0 @@
-<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<!-- not a package-info.java, because we already defined this package in spatial/ -->
-<html>
-<head>
- <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-</head>
-<body>
- Fast, general-purpose grammar-based tokenizers.
- <ul>
- <li>{@link org.apache.lucene.analysis.classic.ClassicTokenizer ClassicTokenizer}:
- this class was formerly (prior to Lucene 3.1) named
- <code>StandardTokenizer</code>. (Its tokenization rules are not
- based on the Unicode Text Segmentation algorithm.)
- {@link org.apache.lucene.analysis.classic.ClassicAnalyzer ClassicAnalyzer} includes
- {@link org.apache.lucene.analysis.classic.ClassicTokenizer ClassicTokenizer},
- {@link org.apache.lucene.analysis.LowerCaseFilter LowerCaseFilter}
- and {@link org.apache.lucene.analysis.StopFilter StopFilter}.
- </li>
- </ul>
-</body>
-</html>
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/email/package-info.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/email/package-info.java
new file mode 100644
index 0000000..0e7ea11
--- /dev/null
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/email/package-info.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Fast, general-purpose URLs and email addresses tokenizers.
+ * <ul>
+ * <li>{@link org.apache.lucene.analysis.email.UAX29URLEmailTokenizer UAX29URLEmailTokenizer}:
+ * implements the Word Break rules from the Unicode Text Segmentation
+ * algorithm, as specified in
+ * <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a>, except
+ * URLs and email addresses are also tokenized according to the relevant RFCs.
+ * <br>
+ * {@link org.apache.lucene.analysis.email.UAX29URLEmailAnalyzer UAX29URLEmailAnalyzer} includes
+ * {@link org.apache.lucene.analysis.email.UAX29URLEmailTokenizer UAX29URLEmailTokenizer},
+ * {@link org.apache.lucene.analysis.LowerCaseFilter LowerCaseFilter}
+ * and {@link org.apache.lucene.analysis.StopFilter StopFilter}.
+ * </li>
+ * </ul>
+ * */
+package org.apache.lucene.analysis.email;
\ No newline at end of file
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/email/package.html b/lucene/analysis/common/src/java/org/apache/lucene/analysis/email/package.html
deleted file mode 100644
index 59970c8..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/email/package.html
+++ /dev/null
@@ -1,39 +0,0 @@
-<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<!-- not a package-info.java, because we already defined this package in spatial/ -->
-<html>
-<head>
- <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-</head>
-<body>
- Fast, general-purpose URLs and email addresses tokenizers.
- <ul>
- <li>{@link org.apache.lucene.analysis.email.UAX29URLEmailTokenizer UAX29URLEmailTokenizer}:
- implements the Word Break rules from the Unicode Text Segmentation
- algorithm, as specified in
- <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a>, except
- URLs and email addresses are also tokenized according to the relevant RFCs.
- <br>
- {@link org.apache.lucene.analysis.email.UAX29URLEmailAnalyzer UAX29URLEmailAnalyzer} includes
- {@link org.apache.lucene.analysis.email.UAX29URLEmailTokenizer UAX29URLEmailTokenizer},
- {@link org.apache.lucene.analysis.LowerCaseFilter LowerCaseFilter}
- and {@link org.apache.lucene.analysis.StopFilter StopFilter}.
- </li>
- </ul>
-</body>
-</html>
diff --git a/lucene/analysis/common/src/java/org/tartarus/snowball/ext/SerbianStemmer.java b/lucene/analysis/common/src/java/org/tartarus/snowball/ext/SerbianStemmer.java
index 2902b91..69daf14 100644
--- a/lucene/analysis/common/src/java/org/tartarus/snowball/ext/SerbianStemmer.java
+++ b/lucene/analysis/common/src/java/org/tartarus/snowball/ext/SerbianStemmer.java
@@ -2257,9 +2257,8 @@
private static final char g_rg[] = {1 };
-private int I_p3;
-private int I_p2;
private int I_p1;
+private boolean B_no_diacritics;
private boolean r_cyr_to_lat() {
@@ -2516,7 +2515,7 @@
}
private boolean r_mark_regions() {
- I_p3 = 0;
+ B_no_diacritics = true;
int v_1 = cursor;
lab0: {
golab1: while(true)
@@ -2534,11 +2533,10 @@
}
cursor++;
}
- I_p3 = cursor;
+ B_no_diacritics = false;
}
cursor = v_1;
I_p1 = limit;
- I_p2 = 0;
int v_3 = cursor;
lab3: {
golab4: while(true)
@@ -2557,59 +2555,55 @@
cursor++;
}
I_p1 = cursor;
- }
- cursor = v_3;
- int v_5 = cursor;
- lab6: {
- golab7: while(true)
+ if (!(I_p1 < 2))
{
- lab8: {
- if (!(eq_s("r")))
+ break lab3;
+ }
+ golab6: while(true)
+ {
+ lab7: {
+ if (!(out_grouping(g_v, 97, 117)))
{
- break lab8;
+ break lab7;
}
- break golab7;
+ break golab6;
}
if (cursor >= limit)
{
- break lab6;
+ break lab3;
}
cursor++;
}
- I_p2 = cursor;
- if (!((I_p1 - I_p2) > 1))
+ I_p1 = cursor;
+ }
+ cursor = v_3;
+ int v_6 = cursor;
+ lab8: {
+ golab9: while(true)
{
- break lab6;
- }
- I_p1 = I_p2;
- }
- cursor = v_5;
- if (!(I_p1 < 2))
- {
- return false;
- }
- lab9: {
- int v_7 = cursor;
- lab10: {
- if (!(I_p1 == I_p2))
- {
- break lab10;
- }
- golab11: while(true)
- {
- lab12: {
- if (!(eq_s("r")))
- {
- break lab12;
- }
- break golab11;
- }
- if (cursor >= limit)
+ lab10: {
+ if (!(eq_s("r")))
{
break lab10;
}
- cursor++;
+ break golab9;
}
+ if (cursor >= limit)
+ {
+ break lab8;
+ }
+ cursor++;
+ }
+ lab11: {
+ int v_8 = cursor;
+ lab12: {
+ if (!(cursor >= 2))
+ {
+ break lab12;
+ }
+ break lab11;
+ }
+ cursor = v_8;
golab13: while(true)
{
lab14: {
@@ -2621,49 +2615,18 @@
}
if (cursor >= limit)
{
- break lab10;
+ break lab8;
}
cursor++;
}
- break lab9;
}
- cursor = v_7;
- if (!(I_p1 != I_p2))
+ if (!((I_p1 - cursor) > 1))
{
- return false;
+ break lab8;
}
- golab15: while(true)
- {
- lab16: {
- if (!(in_grouping(g_v, 97, 117)))
- {
- break lab16;
- }
- break golab15;
- }
- if (cursor >= limit)
- {
- return false;
- }
- cursor++;
- }
- golab17: while(true)
- {
- lab18: {
- if (!(out_grouping(g_v, 97, 117)))
- {
- break lab18;
- }
- break golab17;
- }
- if (cursor >= limit)
- {
- return false;
- }
- cursor++;
- }
+ I_p1 = cursor;
}
- I_p1 = cursor;
+ cursor = v_6;
return true;
}
@@ -2675,14 +2638,6 @@
return true;
}
-private boolean r_R2() {
- if (!(I_p3 == 0))
- {
- return false;
- }
- return true;
-}
-
private boolean r_Step_1() {
int among_var;
ket = cursor;
@@ -2712,7 +2667,7 @@
slice_from("\u010Dajni");
break;
case 7:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
@@ -2788,7 +2743,7 @@
slice_from("du\u0161ni");
break;
case 31:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
@@ -2855,7 +2810,7 @@
slice_from("\u0161avi");
break;
case 52:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
@@ -2868,7 +2823,7 @@
slice_from("a\u010Dka");
break;
case 55:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
@@ -2878,7 +2833,7 @@
slice_from("u\u0161ka");
break;
case 57:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
@@ -2906,7 +2861,7 @@
slice_from("ti\u010Dni");
break;
case 65:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
@@ -2931,7 +2886,7 @@
slice_from("osti");
break;
case 72:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
@@ -2992,7 +2947,7 @@
slice_from("a\u0161ni");
break;
case 91:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
@@ -3377,308 +3332,308 @@
slice_from("at");
break;
case 121:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
slice_from("luc");
break;
case 122:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
slice_from("snj");
break;
case 123:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
slice_from("os");
break;
case 124:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
slice_from("ac");
break;
case 125:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
slice_from("ec");
break;
case 126:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
slice_from("uc");
break;
case 127:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
slice_from("rosi");
break;
case 128:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
slice_from("aca");
break;
case 129:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
slice_from("jas");
break;
case 130:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
slice_from("tas");
break;
case 131:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
slice_from("gas");
break;
case 132:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
slice_from("nas");
break;
case 133:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
slice_from("kas");
break;
case 134:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
slice_from("vas");
break;
case 135:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
slice_from("bas");
break;
case 136:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
slice_from("as");
break;
case 137:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
slice_from("cin");
break;
case 138:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
slice_from("astaj");
break;
case 139:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
slice_from("istaj");
break;
case 140:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
slice_from("ostaj");
break;
case 141:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
slice_from("asta");
break;
case 142:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
slice_from("ista");
break;
case 143:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
slice_from("osta");
break;
case 144:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
slice_from("ava");
break;
case 145:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
slice_from("eva");
break;
case 146:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
slice_from("iva");
break;
case 147:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
slice_from("uva");
break;
case 148:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
slice_from("ova");
break;
case 149:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
slice_from("jeti");
break;
case 150:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
slice_from("inj");
break;
case 151:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
slice_from("ist");
break;
case 152:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
slice_from("es");
break;
case 153:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
slice_from("et");
break;
case 154:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
slice_from("is");
break;
case 155:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
slice_from("ir");
break;
case 156:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
slice_from("ur");
break;
case 157:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
slice_from("uj");
break;
case 158:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
slice_from("ni");
break;
case 159:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
slice_from("sn");
break;
case 160:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
slice_from("ta");
break;
case 161:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
slice_from("a");
break;
case 162:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
slice_from("i");
break;
case 163:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
slice_from("e");
break;
case 164:
- if (!r_R2())
+ if (!(B_no_diacritics))
{
return false;
}
@@ -3706,9 +3661,7 @@
public boolean stem() {
r_cyr_to_lat();
r_prelude();
- int v_3 = cursor;
r_mark_regions();
- cursor = v_3;
limit_backward = cursor;
cursor = limit;
int v_4 = limit - cursor;
diff --git a/lucene/analysis/common/src/java/org/tartarus/snowball/ext/YiddishStemmer.java b/lucene/analysis/common/src/java/org/tartarus/snowball/ext/YiddishStemmer.java
new file mode 100644
index 0000000..6c50831
--- /dev/null
+++ b/lucene/analysis/common/src/java/org/tartarus/snowball/ext/YiddishStemmer.java
@@ -0,0 +1,1381 @@
+// Generated by Snowball 2.0.0 - https://snowballstem.org/
+
+package org.tartarus.snowball.ext;
+
+import org.tartarus.snowball.Among;
+
+/**
+ * This class implements the stemming algorithm defined by a snowball script.
+ * <p>
+ * Generated by Snowball 2.0.0 - https://snowballstem.org/
+ * </p>
+ */
+@SuppressWarnings("unused")
+public class YiddishStemmer extends org.tartarus.snowball.SnowballStemmer {
+
+ private static final long serialVersionUID = 1L;
+ private static final java.lang.invoke.MethodHandles.Lookup methodObject = java.lang.invoke.MethodHandles.lookup();
+
+private final static Among a_0[] = {
+ new Among("\u05D0\u05D3\u05D5\u05E8\u05DB", -1, 1),
+ new Among("\u05D0\u05D4\u05D9\u05E0", -1, 1),
+ new Among("\u05D0\u05D4\u05E2\u05E8", -1, 1),
+ new Among("\u05D0\u05D4\u05F2\u05DE", -1, 1),
+ new Among("\u05D0\u05D5\u05DE", -1, 1),
+ new Among("\u05D0\u05D5\u05E0\u05D8\u05E2\u05E8", -1, 1),
+ new Among("\u05D0\u05D9\u05D1\u05E2\u05E8", -1, 1),
+ new Among("\u05D0\u05E0", -1, 1),
+ new Among("\u05D0\u05E0\u05D8", 7, 1),
+ new Among("\u05D0\u05E0\u05D8\u05E7\u05E2\u05D2\u05E0", 8, 1),
+ new Among("\u05D0\u05E0\u05D9\u05D3\u05E2\u05E8", 7, 1),
+ new Among("\u05D0\u05E4", -1, 1),
+ new Among("\u05D0\u05E4\u05D9\u05E8", 11, 1),
+ new Among("\u05D0\u05E7\u05E2\u05D2\u05E0", -1, 1),
+ new Among("\u05D0\u05E8\u05D0\u05E4", -1, 1),
+ new Among("\u05D0\u05E8\u05D5\u05DE", -1, 1),
+ new Among("\u05D0\u05E8\u05D5\u05E0\u05D8\u05E2\u05E8", -1, 1),
+ new Among("\u05D0\u05E8\u05D9\u05D1\u05E2\u05E8", -1, 1),
+ new Among("\u05D0\u05E8\u05F1\u05E1", -1, 1),
+ new Among("\u05D0\u05E8\u05F1\u05E4", -1, 1),
+ new Among("\u05D0\u05E8\u05F2\u05E0", -1, 1),
+ new Among("\u05D0\u05F0\u05E2\u05E7", -1, 1),
+ new Among("\u05D0\u05F1\u05E1", -1, 1),
+ new Among("\u05D0\u05F1\u05E4", -1, 1),
+ new Among("\u05D0\u05F2\u05E0", -1, 1),
+ new Among("\u05D1\u05D0", -1, 1),
+ new Among("\u05D1\u05F2", -1, 1),
+ new Among("\u05D3\u05D5\u05E8\u05DB", -1, 1),
+ new Among("\u05D3\u05E2\u05E8", -1, 1),
+ new Among("\u05DE\u05D9\u05D8", -1, 1),
+ new Among("\u05E0\u05D0\u05DB", -1, 1),
+ new Among("\u05E4\u05D0\u05E8", -1, 1),
+ new Among("\u05E4\u05D0\u05E8\u05D1\u05F2", 31, 1),
+ new Among("\u05E4\u05D0\u05E8\u05F1\u05E1", 31, 1),
+ new Among("\u05E4\u05D5\u05E0\u05D0\u05E0\u05D3\u05E2\u05E8", -1, 1),
+ new Among("\u05E6\u05D5", -1, 1),
+ new Among("\u05E6\u05D5\u05D6\u05D0\u05DE\u05E2\u05E0", 35, 1),
+ new Among("\u05E6\u05D5\u05E0\u05F1\u05E4", 35, 1),
+ new Among("\u05E6\u05D5\u05E8\u05D9\u05E7", 35, 1),
+ new Among("\u05E6\u05E2", -1, 1)
+};
+
+private final static Among a_1[] = {
+ new Among("\u05D3\u05D6\u05E9", -1, -1),
+ new Among("\u05E9\u05D8\u05E8", -1, -1),
+ new Among("\u05E9\u05D8\u05E9", -1, -1),
+ new Among("\u05E9\u05E4\u05E8", -1, -1)
+};
+
+private final static Among a_2[] = {
+ new Among("\u05D5\u05E0\u05D2", -1, 1),
+ new Among("\u05E1\u05D8\u05D5", -1, 1),
+ new Among("\u05D8", -1, 1),
+ new Among("\u05D1\u05E8\u05D0\u05DB\u05D8", 2, 31),
+ new Among("\u05E1\u05D8", 2, 1),
+ new Among("\u05D9\u05E1\u05D8", 4, 1),
+ new Among("\u05D2\u05D9\u05E1\u05D8", 5, 33),
+ new Among("\u05E9\u05D9\u05E1\u05D8", 5, 33),
+ new Among("\u05E9\u05D0\u05E4\u05D8", 2, 1),
+ new Among("\u05D4\u05F2\u05D8", 2, 1),
+ new Among("\u05E7\u05F2\u05D8", 2, 1),
+ new Among("\u05D9\u05E7\u05F2\u05D8", 10, 1),
+ new Among("\u05DC\u05E2\u05DB", -1, 1),
+ new Among("\u05E2\u05DC\u05E2\u05DB", 12, 1),
+ new Among("\u05D9\u05D6\u05DE", -1, 1),
+ new Among("\u05D9\u05DE", -1, 1),
+ new Among("\u05E2\u05DE", -1, 1),
+ new Among("\u05E2\u05E0\u05E2\u05DE", 16, 3),
+ new Among("\u05D8\u05E2\u05E0\u05E2\u05DE", 17, 4),
+ new Among("\u05E0", -1, 1),
+ new Among("\u05E7\u05DC\u05D9\u05D1\u05E0", 19, 14),
+ new Among("\u05E8\u05D9\u05D1\u05E0", 19, 15),
+ new Among("\u05D8\u05E8\u05D9\u05D1\u05E0", 21, 12),
+ new Among("\u05E9\u05E8\u05D9\u05D1\u05E0", 21, 7),
+ new Among("\u05D4\u05F1\u05D1\u05E0", 19, 27),
+ new Among("\u05E9\u05F0\u05D9\u05D2\u05E0", 19, 17),
+ new Among("\u05D6\u05D5\u05E0\u05D2\u05E0", 19, 22),
+ new Among("\u05E9\u05DC\u05D5\u05E0\u05D2\u05E0", 19, 25),
+ new Among("\u05E6\u05F0\u05D5\u05E0\u05D2\u05E0", 19, 24),
+ new Among("\u05D1\u05F1\u05D2\u05E0", 19, 26),
+ new Among("\u05D1\u05D5\u05E0\u05D3\u05E0", 19, 20),
+ new Among("\u05F0\u05D9\u05D6\u05E0", 19, 11),
+ new Among("\u05D8\u05E0", 19, 4),
+ new Among("GE\u05D1\u05D9\u05D8\u05E0", 32, 9),
+ new Among("GE\u05DC\u05D9\u05D8\u05E0", 32, 13),
+ new Among("GE\u05DE\u05D9\u05D8\u05E0", 32, 8),
+ new Among("\u05E9\u05E0\u05D9\u05D8\u05E0", 32, 19),
+ new Among("\u05E1\u05D8\u05E0", 32, 1),
+ new Among("\u05D9\u05E1\u05D8\u05E0", 37, 1),
+ new Among("GE\u05D1\u05D9\u05E1\u05E0", 19, 10),
+ new Among("\u05E9\u05DE\u05D9\u05E1\u05E0", 19, 18),
+ new Among("GE\u05E8\u05D9\u05E1\u05E0", 19, 16),
+ new Among("\u05E2\u05E0", 19, 1),
+ new Among("\u05D2\u05D0\u05E0\u05D2\u05E2\u05E0", 42, 5),
+ new Among("\u05E2\u05DC\u05E2\u05E0", 42, 1),
+ new Among("\u05E0\u05D5\u05DE\u05E2\u05E0", 42, 6),
+ new Among("\u05D9\u05D6\u05DE\u05E2\u05E0", 42, 1),
+ new Among("\u05E9\u05D8\u05D0\u05E0\u05E2\u05E0", 42, 29),
+ new Among("\u05D8\u05E8\u05D5\u05E0\u05E7\u05E0", 19, 23),
+ new Among("\u05E4\u05D0\u05E8\u05DC\u05F1\u05E8\u05E0", 19, 28),
+ new Among("\u05E9\u05F0\u05F1\u05E8\u05E0", 19, 30),
+ new Among("\u05F0\u05D5\u05D8\u05E9\u05E0", 19, 21),
+ new Among("\u05D2\u05F2\u05E0", 19, 5),
+ new Among("\u05E1", -1, 1),
+ new Among("\u05D8\u05E1", 53, 4),
+ new Among("\u05E0\u05E1", 53, 1),
+ new Among("\u05D8\u05E0\u05E1", 55, 4),
+ new Among("\u05E2\u05E0\u05E1", 55, 3),
+ new Among("\u05E2\u05E1", 53, 1),
+ new Among("\u05D9\u05E2\u05E1", 58, 2),
+ new Among("\u05E2\u05DC\u05E2\u05E1", 58, 1),
+ new Among("\u05E2\u05E8\u05E1", 53, 1),
+ new Among("\u05E2\u05E0\u05E2\u05E8\u05E1", 61, 1),
+ new Among("\u05E2", -1, 1),
+ new Among("\u05D8\u05E2", 63, 4),
+ new Among("\u05E1\u05D8\u05E2", 64, 1),
+ new Among("\u05D9\u05E2", 63, -1),
+ new Among("\u05E2\u05DC\u05E2", 63, 1),
+ new Among("\u05E2\u05E0\u05E2", 63, 3),
+ new Among("\u05D8\u05E2\u05E0\u05E2", 68, 4),
+ new Among("\u05E2\u05E8", -1, 1),
+ new Among("\u05D8\u05E2\u05E8", 70, 4),
+ new Among("\u05E1\u05D8\u05E2\u05E8", 71, 1),
+ new Among("\u05E2\u05E0\u05E2\u05E8", 70, 3),
+ new Among("\u05D8\u05E2\u05E0\u05E2\u05E8", 73, 4),
+ new Among("\u05D5\u05EA", -1, 32)
+};
+
+private final static Among a_3[] = {
+ new Among("\u05D5\u05E0\u05D2", -1, 1),
+ new Among("\u05E9\u05D0\u05E4\u05D8", -1, 1),
+ new Among("\u05D4\u05F2\u05D8", -1, 1),
+ new Among("\u05E7\u05F2\u05D8", -1, 1),
+ new Among("\u05D9\u05E7\u05F2\u05D8", 3, 1),
+ new Among("\u05DC", -1, 2)
+};
+
+private final static Among a_4[] = {
+ new Among("\u05D9\u05D2", -1, 1),
+ new Among("\u05D9\u05E7", -1, 1),
+ new Among("\u05D3\u05D9\u05E7", 1, 1),
+ new Among("\u05E0\u05D3\u05D9\u05E7", 2, 1),
+ new Among("\u05E2\u05E0\u05D3\u05D9\u05E7", 3, 2),
+ new Among("\u05D1\u05DC\u05D9\u05E7", 1, -1),
+ new Among("\u05D2\u05DC\u05D9\u05E7", 1, -1),
+ new Among("\u05E0\u05D9\u05E7", 1, 1),
+ new Among("\u05D9\u05E9", -1, 1)
+};
+
+private static final char g_niked[] = {255, 155, 6 };
+
+private static final char g_vowel[] = {33, 2, 4, 0, 6 };
+
+private static final char g_consonant[] = {239, 254, 253, 131 };
+
+private int I_x;
+private int I_p1;
+
+
+private boolean r_prelude() {
+ int v_1 = cursor;
+ lab0: {
+ while(true)
+ {
+ int v_2 = cursor;
+ lab1: {
+ golab2: while(true)
+ {
+ int v_3 = cursor;
+ lab3: {
+ lab4: {
+ int v_4 = cursor;
+ lab5: {
+ bra = cursor;
+ if (!(eq_s("\u05D5\u05D5")))
+ {
+ break lab5;
+ }
+ ket = cursor;
+ {
+ int v_5 = cursor;
+ lab6: {
+ if (!(eq_s("\u05BC")))
+ {
+ break lab6;
+ }
+ break lab5;
+ }
+ cursor = v_5;
+ }
+ slice_from("\u05F0");
+ break lab4;
+ }
+ cursor = v_4;
+ lab7: {
+ bra = cursor;
+ if (!(eq_s("\u05D5\u05D9")))
+ {
+ break lab7;
+ }
+ ket = cursor;
+ {
+ int v_6 = cursor;
+ lab8: {
+ if (!(eq_s("\u05B4")))
+ {
+ break lab8;
+ }
+ break lab7;
+ }
+ cursor = v_6;
+ }
+ slice_from("\u05F1");
+ break lab4;
+ }
+ cursor = v_4;
+ lab9: {
+ bra = cursor;
+ if (!(eq_s("\u05D9\u05D9")))
+ {
+ break lab9;
+ }
+ ket = cursor;
+ {
+ int v_7 = cursor;
+ lab10: {
+ if (!(eq_s("\u05B4")))
+ {
+ break lab10;
+ }
+ break lab9;
+ }
+ cursor = v_7;
+ }
+ slice_from("\u05F2");
+ break lab4;
+ }
+ cursor = v_4;
+ lab11: {
+ bra = cursor;
+ if (!(eq_s("\u05DA")))
+ {
+ break lab11;
+ }
+ ket = cursor;
+ slice_from("\u05DB");
+ break lab4;
+ }
+ cursor = v_4;
+ lab12: {
+ bra = cursor;
+ if (!(eq_s("\u05DD")))
+ {
+ break lab12;
+ }
+ ket = cursor;
+ slice_from("\u05DE");
+ break lab4;
+ }
+ cursor = v_4;
+ lab13: {
+ bra = cursor;
+ if (!(eq_s("\u05DF")))
+ {
+ break lab13;
+ }
+ ket = cursor;
+ slice_from("\u05E0");
+ break lab4;
+ }
+ cursor = v_4;
+ lab14: {
+ bra = cursor;
+ if (!(eq_s("\u05E3")))
+ {
+ break lab14;
+ }
+ ket = cursor;
+ slice_from("\u05E4");
+ break lab4;
+ }
+ cursor = v_4;
+ bra = cursor;
+ if (!(eq_s("\u05E5")))
+ {
+ break lab3;
+ }
+ ket = cursor;
+ slice_from("\u05E6");
+ }
+ cursor = v_3;
+ break golab2;
+ }
+ cursor = v_3;
+ if (cursor >= limit)
+ {
+ break lab1;
+ }
+ cursor++;
+ }
+ continue;
+ }
+ cursor = v_2;
+ break;
+ }
+ }
+ cursor = v_1;
+ int v_8 = cursor;
+ lab15: {
+ while(true)
+ {
+ int v_9 = cursor;
+ lab16: {
+ golab17: while(true)
+ {
+ int v_10 = cursor;
+ lab18: {
+ bra = cursor;
+ if (!(in_grouping(g_niked, 1456, 1474)))
+ {
+ break lab18;
+ }
+ ket = cursor;
+ slice_del();
+ cursor = v_10;
+ break golab17;
+ }
+ cursor = v_10;
+ if (cursor >= limit)
+ {
+ break lab16;
+ }
+ cursor++;
+ }
+ continue;
+ }
+ cursor = v_9;
+ break;
+ }
+ }
+ cursor = v_8;
+ return true;
+}
+
+private boolean r_mark_regions() {
+ I_p1 = limit;
+ int v_1 = cursor;
+ lab0: {
+ bra = cursor;
+ if (!(eq_s("\u05D2\u05E2")))
+ {
+ cursor = v_1;
+ break lab0;
+ }
+ ket = cursor;
+ slice_from("GE");
+ }
+ int v_2 = cursor;
+ lab1: {
+ if (find_among(a_0) == 0)
+ {
+ cursor = v_2;
+ break lab1;
+ }
+ lab2: {
+ int v_3 = cursor;
+ lab3: {
+ lab4: {
+ int v_4 = cursor;
+ lab5: {
+ if (!(eq_s("\u05E6\u05D5\u05D2\u05E0")))
+ {
+ break lab5;
+ }
+ break lab4;
+ }
+ cursor = v_4;
+ lab6: {
+ if (!(eq_s("\u05E6\u05D5\u05E7\u05D8")))
+ {
+ break lab6;
+ }
+ break lab4;
+ }
+ cursor = v_4;
+ if (!(eq_s("\u05E6\u05D5\u05E7\u05E0")))
+ {
+ break lab3;
+ }
+ }
+ {
+ int v_5 = cursor;
+ lab7: {
+ {
+ int c = cursor + 1;
+ if (0 > c || c > limit)
+ {
+ break lab7;
+ }
+ cursor = c;
+ }
+ break lab3;
+ }
+ cursor = v_5;
+ }
+ {
+ int c = cursor + -4;
+ if (0 > c || c > limit)
+ {
+ break lab3;
+ }
+ cursor = c;
+ }
+ break lab2;
+ }
+ cursor = v_3;
+ lab8: {
+ bra = cursor;
+ if (!(eq_s("\u05D2\u05E2")))
+ {
+ break lab8;
+ }
+ ket = cursor;
+ slice_from("GE");
+ break lab2;
+ }
+ cursor = v_3;
+ bra = cursor;
+ if (!(eq_s("\u05E6\u05D5")))
+ {
+ cursor = v_2;
+ break lab1;
+ }
+ ket = cursor;
+ slice_from("TSU");
+ }
+ }
+ int v_6 = cursor;
+ {
+ int c = cursor + 3;
+ if (0 > c || c > limit)
+ {
+ return false;
+ }
+ cursor = c;
+ }
+ I_x = cursor;
+ cursor = v_6;
+ int v_7 = cursor;
+ lab9: {
+ if (find_among(a_1) == 0)
+ {
+ cursor = v_7;
+ break lab9;
+ }
+ }
+ {
+ int v_8 = cursor;
+ lab10: {
+ if (!(in_grouping(g_consonant, 1489, 1520)))
+ {
+ break lab10;
+ }
+ if (!(in_grouping(g_consonant, 1489, 1520)))
+ {
+ break lab10;
+ }
+ if (!(in_grouping(g_consonant, 1489, 1520)))
+ {
+ break lab10;
+ }
+ I_p1 = cursor;
+ return false;
+ }
+ cursor = v_8;
+ }
+ golab11: while(true)
+ {
+ int v_9 = cursor;
+ lab12: {
+ if (!(in_grouping(g_vowel, 1488, 1522)))
+ {
+ break lab12;
+ }
+ cursor = v_9;
+ break golab11;
+ }
+ cursor = v_9;
+ if (cursor >= limit)
+ {
+ return false;
+ }
+ cursor++;
+ }
+ while(true)
+ {
+ lab13: {
+ if (!(in_grouping(g_vowel, 1488, 1522)))
+ {
+ break lab13;
+ }
+ continue;
+ }
+ break;
+ }
+ I_p1 = cursor;
+ lab14: {
+ if (!(I_p1 < I_x))
+ {
+ break lab14;
+ }
+ I_p1 = I_x;
+ }
+ return true;
+}
+
+private boolean r_R1() {
+ if (!(I_p1 <= cursor))
+ {
+ return false;
+ }
+ return true;
+}
+
+private boolean r_standard_suffix() {
+ int among_var;
+ int v_1 = limit - cursor;
+ lab0: {
+ ket = cursor;
+ among_var = find_among_b(a_2);
+ if (among_var == 0)
+ {
+ break lab0;
+ }
+ bra = cursor;
+ switch (among_var) {
+ case 1:
+ if (!r_R1())
+ {
+ break lab0;
+ }
+ slice_del();
+ break;
+ case 2:
+ if (!r_R1())
+ {
+ break lab0;
+ }
+ slice_from("\u05D9\u05E2");
+ break;
+ case 3:
+ if (!r_R1())
+ {
+ break lab0;
+ }
+ slice_del();
+ {
+ int v_2 = limit - cursor;
+ lab1: {
+ ket = cursor;
+ if (!(eq_s_b("\u05D2\u05D0\u05E0\u05D2")))
+ {
+ break lab1;
+ }
+ bra = cursor;
+ slice_from("\u05D2\u05F2");
+ break lab0;
+ }
+ cursor = limit - v_2;
+ }
+ {
+ int v_3 = limit - cursor;
+ lab2: {
+ ket = cursor;
+ if (!(eq_s_b("\u05E0\u05D5\u05DE")))
+ {
+ break lab2;
+ }
+ bra = cursor;
+ slice_from("\u05E0\u05E2\u05DE");
+ break lab0;
+ }
+ cursor = limit - v_3;
+ }
+ {
+ int v_4 = limit - cursor;
+ lab3: {
+ ket = cursor;
+ if (!(eq_s_b("\u05DE\u05D9\u05D8")))
+ {
+ break lab3;
+ }
+ bra = cursor;
+ slice_from("\u05DE\u05F2\u05D3");
+ break lab0;
+ }
+ cursor = limit - v_4;
+ }
+ {
+ int v_5 = limit - cursor;
+ lab4: {
+ ket = cursor;
+ if (!(eq_s_b("\u05D1\u05D9\u05D8")))
+ {
+ break lab4;
+ }
+ bra = cursor;
+ slice_from("\u05D1\u05F2\u05D8");
+ break lab0;
+ }
+ cursor = limit - v_5;
+ }
+ {
+ int v_6 = limit - cursor;
+ lab5: {
+ ket = cursor;
+ if (!(eq_s_b("\u05D1\u05D9\u05E1")))
+ {
+ break lab5;
+ }
+ bra = cursor;
+ slice_from("\u05D1\u05F2\u05E1");
+ break lab0;
+ }
+ cursor = limit - v_6;
+ }
+ {
+ int v_7 = limit - cursor;
+ lab6: {
+ ket = cursor;
+ if (!(eq_s_b("\u05F0\u05D9\u05D6")))
+ {
+ break lab6;
+ }
+ bra = cursor;
+ slice_from("\u05F0\u05F2\u05D6");
+ break lab0;
+ }
+ cursor = limit - v_7;
+ }
+ {
+ int v_8 = limit - cursor;
+ lab7: {
+ ket = cursor;
+ if (!(eq_s_b("\u05D8\u05E8\u05D9\u05D1")))
+ {
+ break lab7;
+ }
+ bra = cursor;
+ slice_from("\u05D8\u05E8\u05F2\u05D1");
+ break lab0;
+ }
+ cursor = limit - v_8;
+ }
+ {
+ int v_9 = limit - cursor;
+ lab8: {
+ ket = cursor;
+ if (!(eq_s_b("\u05DC\u05D9\u05D8")))
+ {
+ break lab8;
+ }
+ bra = cursor;
+ slice_from("\u05DC\u05F2\u05D8");
+ break lab0;
+ }
+ cursor = limit - v_9;
+ }
+ {
+ int v_10 = limit - cursor;
+ lab9: {
+ ket = cursor;
+ if (!(eq_s_b("\u05E7\u05DC\u05D9\u05D1")))
+ {
+ break lab9;
+ }
+ bra = cursor;
+ slice_from("\u05E7\u05DC\u05F2\u05D1");
+ break lab0;
+ }
+ cursor = limit - v_10;
+ }
+ {
+ int v_11 = limit - cursor;
+ lab10: {
+ ket = cursor;
+ if (!(eq_s_b("\u05E8\u05D9\u05D1")))
+ {
+ break lab10;
+ }
+ bra = cursor;
+ slice_from("\u05E8\u05F2\u05D1");
+ break lab0;
+ }
+ cursor = limit - v_11;
+ }
+ {
+ int v_12 = limit - cursor;
+ lab11: {
+ ket = cursor;
+ if (!(eq_s_b("\u05E8\u05D9\u05E1")))
+ {
+ break lab11;
+ }
+ bra = cursor;
+ slice_from("\u05E8\u05F2\u05E1");
+ break lab0;
+ }
+ cursor = limit - v_12;
+ }
+ {
+ int v_13 = limit - cursor;
+ lab12: {
+ ket = cursor;
+ if (!(eq_s_b("\u05E9\u05F0\u05D9\u05D2")))
+ {
+ break lab12;
+ }
+ bra = cursor;
+ slice_from("\u05E9\u05F0\u05F2\u05D2");
+ break lab0;
+ }
+ cursor = limit - v_13;
+ }
+ {
+ int v_14 = limit - cursor;
+ lab13: {
+ ket = cursor;
+ if (!(eq_s_b("\u05E9\u05DE\u05D9\u05E1")))
+ {
+ break lab13;
+ }
+ bra = cursor;
+ slice_from("\u05E9\u05DE\u05F2\u05E1");
+ break lab0;
+ }
+ cursor = limit - v_14;
+ }
+ {
+ int v_15 = limit - cursor;
+ lab14: {
+ ket = cursor;
+ if (!(eq_s_b("\u05E9\u05E0\u05D9\u05D8")))
+ {
+ break lab14;
+ }
+ bra = cursor;
+ slice_from("\u05E9\u05E0\u05F2\u05D3");
+ break lab0;
+ }
+ cursor = limit - v_15;
+ }
+ {
+ int v_16 = limit - cursor;
+ lab15: {
+ ket = cursor;
+ if (!(eq_s_b("\u05E9\u05E8\u05D9\u05D1")))
+ {
+ break lab15;
+ }
+ bra = cursor;
+ slice_from("\u05E9\u05E8\u05F2\u05D1");
+ break lab0;
+ }
+ cursor = limit - v_16;
+ }
+ {
+ int v_17 = limit - cursor;
+ lab16: {
+ ket = cursor;
+ if (!(eq_s_b("\u05D1\u05D5\u05E0\u05D3")))
+ {
+ break lab16;
+ }
+ bra = cursor;
+ slice_from("\u05D1\u05D9\u05E0\u05D3");
+ break lab0;
+ }
+ cursor = limit - v_17;
+ }
+ {
+ int v_18 = limit - cursor;
+ lab17: {
+ ket = cursor;
+ if (!(eq_s_b("\u05F0\u05D5\u05D8\u05E9")))
+ {
+ break lab17;
+ }
+ bra = cursor;
+ slice_from("\u05F0\u05D9\u05D8\u05E9");
+ break lab0;
+ }
+ cursor = limit - v_18;
+ }
+ {
+ int v_19 = limit - cursor;
+ lab18: {
+ ket = cursor;
+ if (!(eq_s_b("\u05D6\u05D5\u05E0\u05D2")))
+ {
+ break lab18;
+ }
+ bra = cursor;
+ slice_from("\u05D6\u05D9\u05E0\u05D2");
+ break lab0;
+ }
+ cursor = limit - v_19;
+ }
+ {
+ int v_20 = limit - cursor;
+ lab19: {
+ ket = cursor;
+ if (!(eq_s_b("\u05D8\u05E8\u05D5\u05E0\u05E7")))
+ {
+ break lab19;
+ }
+ bra = cursor;
+ slice_from("\u05D8\u05E8\u05D9\u05E0\u05E7");
+ break lab0;
+ }
+ cursor = limit - v_20;
+ }
+ {
+ int v_21 = limit - cursor;
+ lab20: {
+ ket = cursor;
+ if (!(eq_s_b("\u05E6\u05F0\u05D5\u05E0\u05D2")))
+ {
+ break lab20;
+ }
+ bra = cursor;
+ slice_from("\u05E6\u05F0\u05D9\u05E0\u05D2");
+ break lab0;
+ }
+ cursor = limit - v_21;
+ }
+ {
+ int v_22 = limit - cursor;
+ lab21: {
+ ket = cursor;
+ if (!(eq_s_b("\u05E9\u05DC\u05D5\u05E0\u05D2")))
+ {
+ break lab21;
+ }
+ bra = cursor;
+ slice_from("\u05E9\u05DC\u05D9\u05E0\u05D2");
+ break lab0;
+ }
+ cursor = limit - v_22;
+ }
+ {
+ int v_23 = limit - cursor;
+ lab22: {
+ ket = cursor;
+ if (!(eq_s_b("\u05D1\u05F1\u05D2")))
+ {
+ break lab22;
+ }
+ bra = cursor;
+ slice_from("\u05D1\u05F2\u05D2");
+ break lab0;
+ }
+ cursor = limit - v_23;
+ }
+ {
+ int v_24 = limit - cursor;
+ lab23: {
+ ket = cursor;
+ if (!(eq_s_b("\u05D4\u05F1\u05D1")))
+ {
+ break lab23;
+ }
+ bra = cursor;
+ slice_from("\u05D4\u05F2\u05D1");
+ break lab0;
+ }
+ cursor = limit - v_24;
+ }
+ {
+ int v_25 = limit - cursor;
+ lab24: {
+ ket = cursor;
+ if (!(eq_s_b("\u05E4\u05D0\u05E8\u05DC\u05F1\u05E8")))
+ {
+ break lab24;
+ }
+ bra = cursor;
+ slice_from("\u05E4\u05D0\u05E8\u05DC\u05D9\u05E8");
+ break lab0;
+ }
+ cursor = limit - v_25;
+ }
+ {
+ int v_26 = limit - cursor;
+ lab25: {
+ ket = cursor;
+ if (!(eq_s_b("\u05E9\u05D8\u05D0\u05E0")))
+ {
+ break lab25;
+ }
+ bra = cursor;
+ slice_from("\u05E9\u05D8\u05F2");
+ break lab0;
+ }
+ cursor = limit - v_26;
+ }
+ {
+ int v_27 = limit - cursor;
+ lab26: {
+ ket = cursor;
+ if (!(eq_s_b("\u05E9\u05F0\u05F1\u05E8")))
+ {
+ break lab26;
+ }
+ bra = cursor;
+ slice_from("\u05E9\u05F0\u05E2\u05E8");
+ break lab0;
+ }
+ cursor = limit - v_27;
+ }
+ break;
+ case 4:
+ {
+ int v_28 = limit - cursor;
+ lab27: {
+ if (!r_R1())
+ {
+ break lab27;
+ }
+ slice_del();
+ {
+ int v_29 = limit - cursor;
+ lab28: {
+ ket = cursor;
+ lab29: {
+ int v_30 = limit - cursor;
+ lab30: {
+ if (!(eq_s_b("\u05D2\u05E2\u05D1\u05E8\u05D0\u05DB")))
+ {
+ break lab30;
+ }
+ break lab29;
+ }
+ cursor = limit - v_30;
+ if (!(eq_s_b("\u05D1\u05E8\u05D0\u05DB")))
+ {
+ break lab28;
+ }
+ }
+ bra = cursor;
+ slice_from("\u05D1\u05E8\u05E2\u05E0\u05D2");
+ break lab27;
+ }
+ cursor = limit - v_29;
+ }
+ break lab0;
+ }
+ cursor = limit - v_28;
+ }
+ while(true)
+ {
+ lab31: {
+ {
+ int c = cursor - -1;
+ if (limit_backward > c || c > limit)
+ {
+ break lab31;
+ }
+ cursor = c;
+ }
+ continue;
+ }
+ break;
+ }
+ ket = cursor;
+ lab32: {
+ int v_32 = limit - cursor;
+ lab33: {
+ if (!(eq_s_b("\u05E2\u05E0\u05E2\u05E8")))
+ {
+ break lab33;
+ }
+ break lab32;
+ }
+ cursor = limit - v_32;
+ lab34: {
+ if (!(eq_s_b("\u05E2\u05E0\u05E2\u05DE")))
+ {
+ break lab34;
+ }
+ break lab32;
+ }
+ cursor = limit - v_32;
+ lab35: {
+ if (!(eq_s_b("\u05E2\u05E0\u05E2")))
+ {
+ break lab35;
+ }
+ break lab32;
+ }
+ cursor = limit - v_32;
+ lab36: {
+ if (!(eq_s_b("\u05E0")))
+ {
+ break lab36;
+ }
+ break lab32;
+ }
+ cursor = limit - v_32;
+ lab37: {
+ if (!(eq_s_b("\u05E2\u05E8")))
+ {
+ break lab37;
+ }
+ break lab32;
+ }
+ cursor = limit - v_32;
+ lab38: {
+ if (!(eq_s_b("\u05E0\u05E1")))
+ {
+ break lab38;
+ }
+ break lab32;
+ }
+ cursor = limit - v_32;
+ lab39: {
+ if (!(eq_s_b("\u05E2")))
+ {
+ break lab39;
+ }
+ break lab32;
+ }
+ cursor = limit - v_32;
+ if (!(eq_s_b("\u05E1")))
+ {
+ break lab0;
+ }
+ }
+ bra = cursor;
+ if (!r_R1())
+ {
+ break lab0;
+ }
+ slice_del();
+ break;
+ case 5:
+ slice_from("\u05D2\u05F2");
+ break;
+ case 6:
+ slice_from("\u05E0\u05E2\u05DE");
+ break;
+ case 7:
+ slice_from("\u05E9\u05E8\u05F2\u05D1");
+ break;
+ case 8:
+ slice_from("\u05DE\u05F2\u05D3");
+ break;
+ case 9:
+ slice_from("\u05D1\u05F2\u05D8");
+ break;
+ case 10:
+ slice_from("\u05D1\u05F2\u05E1");
+ break;
+ case 11:
+ slice_from("\u05F0\u05F2\u05D6");
+ break;
+ case 12:
+ slice_from("\u05D8\u05E8\u05F2\u05D1");
+ break;
+ case 13:
+ slice_from("\u05DC\u05F2\u05D8");
+ break;
+ case 14:
+ slice_from("\u05E7\u05DC\u05F2\u05D1");
+ break;
+ case 15:
+ slice_from("\u05E8\u05F2\u05D1");
+ break;
+ case 16:
+ slice_from("\u05E8\u05F2\u05E1");
+ break;
+ case 17:
+ slice_from("\u05E9\u05F0\u05F2\u05D2");
+ break;
+ case 18:
+ slice_from("\u05E9\u05DE\u05F2\u05E1");
+ break;
+ case 19:
+ slice_from("\u05E9\u05E0\u05F2\u05D3");
+ break;
+ case 20:
+ slice_from("\u05D1\u05D9\u05E0\u05D3");
+ break;
+ case 21:
+ slice_from("\u05F0\u05D9\u05D8\u05E9");
+ break;
+ case 22:
+ slice_from("\u05D6\u05D9\u05E0\u05D2");
+ break;
+ case 23:
+ slice_from("\u05D8\u05E8\u05D9\u05E0\u05E7");
+ break;
+ case 24:
+ slice_from("\u05E6\u05F0\u05D9\u05E0\u05D2");
+ break;
+ case 25:
+ slice_from("\u05E9\u05DC\u05D9\u05E0\u05D2");
+ break;
+ case 26:
+ slice_from("\u05D1\u05F2\u05D2");
+ break;
+ case 27:
+ slice_from("\u05D4\u05F2\u05D1");
+ break;
+ case 28:
+ slice_from("\u05E4\u05D0\u05E8\u05DC\u05D9\u05E8");
+ break;
+ case 29:
+ slice_from("\u05E9\u05D8\u05F2");
+ break;
+ case 30:
+ slice_from("\u05E9\u05F0\u05E2\u05E8");
+ break;
+ case 31:
+ slice_from("\u05D1\u05E8\u05E2\u05E0\u05D2");
+ break;
+ case 32:
+ if (!r_R1())
+ {
+ break lab0;
+ }
+ slice_from("\u05D4");
+ break;
+ case 33:
+ {
+ int c = cursor - -4;
+ if (limit_backward > c || c > limit)
+ {
+ break lab0;
+ }
+ cursor = c;
+ }
+ ket = cursor;
+ if (!(eq_s_b("\u05D8")))
+ {
+ break lab0;
+ }
+ bra = cursor;
+ if (!r_R1())
+ {
+ break lab0;
+ }
+ slice_del();
+ break;
+ }
+ }
+ cursor = limit - v_1;
+ int v_33 = limit - cursor;
+ lab40: {
+ ket = cursor;
+ among_var = find_among_b(a_3);
+ if (among_var == 0)
+ {
+ break lab40;
+ }
+ bra = cursor;
+ switch (among_var) {
+ case 1:
+ if (!r_R1())
+ {
+ break lab40;
+ }
+ slice_del();
+ break;
+ case 2:
+ if (!r_R1())
+ {
+ break lab40;
+ }
+ if (!(in_grouping_b(g_consonant, 1489, 1520)))
+ {
+ break lab40;
+ }
+ slice_del();
+ break;
+ }
+ }
+ cursor = limit - v_33;
+ int v_34 = limit - cursor;
+ lab41: {
+ ket = cursor;
+ among_var = find_among_b(a_4);
+ if (among_var == 0)
+ {
+ break lab41;
+ }
+ bra = cursor;
+ switch (among_var) {
+ case 1:
+ if (!r_R1())
+ {
+ break lab41;
+ }
+ slice_del();
+ break;
+ case 2:
+ if (!r_R1())
+ {
+ break lab41;
+ }
+ {
+ int v_35 = limit - cursor;
+ lab42: {
+ int v_36 = limit - cursor;
+ lab43: {
+ int v_37 = limit - cursor;
+ lab44: {
+ if (!(eq_s_b("\u05E0\u05D2")))
+ {
+ break lab44;
+ }
+ break lab43;
+ }
+ cursor = limit - v_37;
+ lab45: {
+ if (!(eq_s_b("\u05E0\u05E7")))
+ {
+ break lab45;
+ }
+ break lab43;
+ }
+ cursor = limit - v_37;
+ lab46: {
+ if (!(eq_s_b("\u05E0")))
+ {
+ break lab46;
+ }
+ break lab43;
+ }
+ cursor = limit - v_37;
+ lab47: {
+ if (!(eq_s_b("\u05DE")))
+ {
+ break lab47;
+ }
+ break lab43;
+ }
+ cursor = limit - v_37;
+ lab48: {
+ if (!(eq_s_b("Lamed")))
+ {
+ break lab48;
+ }
+ if (!(in_grouping_b(g_consonant, 1489, 1520)))
+ {
+ break lab48;
+ }
+ break lab43;
+ }
+ cursor = limit - v_37;
+ if (!(in_grouping_b(g_vowel, 1488, 1522)))
+ {
+ break lab42;
+ }
+ }
+ cursor = limit - v_36;
+ slice_del();
+ break lab41;
+ }
+ cursor = limit - v_35;
+ }
+ {
+ int c = cursor - -5;
+ if (limit_backward > c || c > limit)
+ {
+ break lab41;
+ }
+ cursor = c;
+ }
+ if (!(eq_s_b("\u05E0\u05D3\u05D9\u05E7")))
+ {
+ break lab41;
+ }
+ slice_del();
+ break;
+ }
+ }
+ cursor = limit - v_34;
+ int v_38 = limit - cursor;
+ lab49: {
+ while(true)
+ {
+ int v_39 = limit - cursor;
+ lab50: {
+ golab51: while(true)
+ {
+ int v_40 = limit - cursor;
+ lab52: {
+ ket = cursor;
+ lab53: {
+ int v_41 = limit - cursor;
+ lab54: {
+ if (!(eq_s_b("GE")))
+ {
+ break lab54;
+ }
+ break lab53;
+ }
+ cursor = limit - v_41;
+ if (!(eq_s_b("TSU")))
+ {
+ break lab52;
+ }
+ }
+ bra = cursor;
+ slice_del();
+ cursor = limit - v_40;
+ break golab51;
+ }
+ cursor = limit - v_40;
+ if (cursor <= limit_backward)
+ {
+ break lab50;
+ }
+ cursor--;
+ }
+ continue;
+ }
+ cursor = limit - v_39;
+ break;
+ }
+ }
+ cursor = limit - v_38;
+ return true;
+}
+
+public boolean stem() {
+ r_prelude();
+ int v_2 = cursor;
+ r_mark_regions();
+ cursor = v_2;
+ limit_backward = cursor;
+ cursor = limit;
+ r_standard_suffix();
+ cursor = limit_backward;
+ return true;
+}
+
+@Override
+public boolean equals( Object o ) {
+ return o instanceof YiddishStemmer;
+}
+
+@Override
+public int hashCode() {
+ return YiddishStemmer.class.getName().hashCode();
+}
+
+
+
+}
+
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/languages.txt b/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/languages.txt
index 3526815..d60456d 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/languages.txt
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/languages.txt
@@ -30,3 +30,4 @@
Swedish
Tamil
Turkish
+Yiddish
diff --git a/lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/package-info.java
similarity index 79%
copy from lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java
copy to lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/package-info.java
index af94ced..de5befe 100644
--- a/lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/package-info.java
@@ -15,12 +15,5 @@
* limitations under the License.
*/
-package org.apache.lucene.util;
-
-/**
- * Tracks dynamic allocations/deallocations of memory for transient objects
- */
-public interface MemoryTracker {
- void updateBytes(long bytes);
- long getBytes();
-}
+/** Lucene 5.0 file format. */
+package org.apache.lucene.backward_codecs.lucene50;
\ No newline at end of file
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/package.html b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/package.html
deleted file mode 100644
index 352cef4..0000000
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/package.html
+++ /dev/null
@@ -1,25 +0,0 @@
-<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<html>
-<head>
- <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-</head>
-<body>
-Lucene 5.0 file format.
-</body>
-</html>
diff --git a/lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene60/package-info.java
similarity index 79%
copy from lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java
copy to lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene60/package-info.java
index af94ced..3fd0684 100644
--- a/lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene60/package-info.java
@@ -15,12 +15,5 @@
* limitations under the License.
*/
-package org.apache.lucene.util;
-
-/**
- * Tracks dynamic allocations/deallocations of memory for transient objects
- */
-public interface MemoryTracker {
- void updateBytes(long bytes);
- long getBytes();
-}
+/** Lucene 6.0 file format. */
+package org.apache.lucene.backward_codecs.lucene60;
\ No newline at end of file
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene60/package.html b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene60/package.html
deleted file mode 100644
index 6b4e234..0000000
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene60/package.html
+++ /dev/null
@@ -1,25 +0,0 @@
-<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<html>
-<head>
- <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-</head>
-<body>
-Lucene 6.0 file format.
-</body>
-</html>
diff --git a/lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/package-info.java
similarity index 79%
copy from lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java
copy to lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/package-info.java
index af94ced..3d4d9bd 100644
--- a/lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/package-info.java
@@ -15,12 +15,5 @@
* limitations under the License.
*/
-package org.apache.lucene.util;
-
-/**
- * Tracks dynamic allocations/deallocations of memory for transient objects
- */
-public interface MemoryTracker {
- void updateBytes(long bytes);
- long getBytes();
-}
+/** Lucene 8.0 file format. */
+package org.apache.lucene.backward_codecs.lucene80;
\ No newline at end of file
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/package.html b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/package.html
deleted file mode 100644
index 9d64473..0000000
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/package.html
+++ /dev/null
@@ -1,25 +0,0 @@
-<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<html>
-<head>
- <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-</head>
-<body>
-Lucene 8.0 file format.
-</body>
-</html>
diff --git a/lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/package-info.java
similarity index 79%
copy from lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java
copy to lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/package-info.java
index af94ced..d122fcc 100644
--- a/lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/package-info.java
@@ -15,12 +15,5 @@
* limitations under the License.
*/
-package org.apache.lucene.util;
-
-/**
- * Tracks dynamic allocations/deallocations of memory for transient objects
- */
-public interface MemoryTracker {
- void updateBytes(long bytes);
- long getBytes();
-}
+/** Lucene 8.4 file format. */
+package org.apache.lucene.backward_codecs.lucene84;
\ No newline at end of file
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/package.html b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/package.html
deleted file mode 100644
index d0ba893..0000000
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/package.html
+++ /dev/null
@@ -1,25 +0,0 @@
-<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<html>
-<head>
- <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-</head>
-<body>
-Lucene 8.4 file format.
-</body>
-</html>
diff --git a/lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene86/package-info.java
similarity index 79%
copy from lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java
copy to lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene86/package-info.java
index af94ced..2d78f09 100644
--- a/lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene86/package-info.java
@@ -15,12 +15,5 @@
* limitations under the License.
*/
-package org.apache.lucene.util;
-
-/**
- * Tracks dynamic allocations/deallocations of memory for transient objects
- */
-public interface MemoryTracker {
- void updateBytes(long bytes);
- long getBytes();
-}
+/** Lucene 8.6 file format. */
+package org.apache.lucene.backward_codecs.lucene86;
\ No newline at end of file
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene86/package.html b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene86/package.html
deleted file mode 100644
index 10560c6..0000000
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene86/package.html
+++ /dev/null
@@ -1,25 +0,0 @@
-<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<html>
-<head>
- <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-</head>
-<body>
-Lucene 8.6 file format.
-</body>
-</html>
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene87/Lucene87Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene87/Lucene87Codec.java
index 13bd32f..3280322 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene87/Lucene87Codec.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene87/Lucene87Codec.java
@@ -36,6 +36,7 @@
import org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat;
import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat;
import org.apache.lucene.codecs.lucene60.Lucene60FieldInfosFormat;
+import org.apache.lucene.codecs.lucene80.Lucene80DocValuesFormat;
import org.apache.lucene.codecs.lucene80.Lucene80NormsFormat;
import org.apache.lucene.codecs.lucene84.Lucene84PostingsFormat;
import org.apache.lucene.codecs.lucene86.Lucene86PointsFormat;
@@ -56,6 +57,23 @@
* @lucene.experimental
*/
public class Lucene87Codec extends Codec {
+
+ /** Configuration option for the codec. */
+ public static enum Mode {
+ /** Trade compression ratio for retrieval speed. */
+ BEST_SPEED(Lucene87StoredFieldsFormat.Mode.BEST_SPEED, Lucene80DocValuesFormat.Mode.BEST_SPEED),
+ /** Trade retrieval speed for compression ratio. */
+ BEST_COMPRESSION(Lucene87StoredFieldsFormat.Mode.BEST_COMPRESSION, Lucene80DocValuesFormat.Mode.BEST_COMPRESSION);
+
+ private final Lucene87StoredFieldsFormat.Mode storedMode;
+ private final Lucene80DocValuesFormat.Mode dvMode;
+
+ private Mode(Lucene87StoredFieldsFormat.Mode storedMode, Lucene80DocValuesFormat.Mode dvMode) {
+ this.storedMode = Objects.requireNonNull(storedMode);
+ this.dvMode = Objects.requireNonNull(dvMode);
+ }
+ }
+
private final TermVectorsFormat vectorsFormat = new Lucene50TermVectorsFormat();
private final FieldInfosFormat fieldInfosFormat = new Lucene60FieldInfosFormat();
private final SegmentInfoFormat segmentInfosFormat = new Lucene86SegmentInfoFormat();
@@ -84,19 +102,10 @@
* Instantiates a new codec.
*/
public Lucene87Codec() {
- this(Lucene87StoredFieldsFormat.Mode.BEST_SPEED);
- }
-
- /**
- * Instantiates a new codec, specifying the stored fields compression
- * mode to use.
- * @param mode stored fields compression mode to use for newly
- * flushed/merged segments.
- */
- public Lucene87Codec(Lucene87StoredFieldsFormat.Mode mode) {
super("Lucene87");
- this.storedFieldsFormat = new Lucene87StoredFieldsFormat(Objects.requireNonNull(mode));
+ this.storedFieldsFormat = new Lucene87StoredFieldsFormat();
this.defaultFormat = new Lucene84PostingsFormat();
+ this.defaultDVFormat = new Lucene80DocValuesFormat();
}
@Override
@@ -173,7 +182,7 @@
return docValuesFormat;
}
- private final DocValuesFormat defaultDVFormat = DocValuesFormat.forName("Lucene80");
+ private final DocValuesFormat defaultDVFormat;
private final NormsFormat normsFormat = new Lucene80NormsFormat();
diff --git a/lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene87/package-info.java
similarity index 79%
copy from lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java
copy to lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene87/package-info.java
index af94ced..66a8379 100644
--- a/lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene87/package-info.java
@@ -15,12 +15,5 @@
* limitations under the License.
*/
-package org.apache.lucene.util;
-
-/**
- * Tracks dynamic allocations/deallocations of memory for transient objects
- */
-public interface MemoryTracker {
- void updateBytes(long bytes);
- long getBytes();
-}
+/** Lucene 8.7 file format. */
+package org.apache.lucene.backward_codecs.lucene87;
\ No newline at end of file
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene87/package.html b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene87/package.html
deleted file mode 100644
index 3474ef9..0000000
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene87/package.html
+++ /dev/null
@@ -1,42 +0,0 @@
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- -->
-
-<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<html>
-<head>
- <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-</head>
-<body>
-Lucene 8.7 file format.
-</body>
-</html>
diff --git a/lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/package-info.java
similarity index 79%
copy from lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java
copy to lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/package-info.java
index af94ced..c2fe1f6 100644
--- a/lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/package-info.java
@@ -15,12 +15,5 @@
* limitations under the License.
*/
-package org.apache.lucene.util;
-
-/**
- * Tracks dynamic allocations/deallocations of memory for transient objects
- */
-public interface MemoryTracker {
- void updateBytes(long bytes);
- long getBytes();
-}
+/** Common APIs for use by backwards compatibility codecs. */
+package org.apache.lucene.backward_codecs;
\ No newline at end of file
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/package.html b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/package.html
deleted file mode 100644
index fe01fff..0000000
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/package.html
+++ /dev/null
@@ -1,26 +0,0 @@
-<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<!-- not a package-info.java, because we already defined this package in core/ -->
-<html>
-<head>
- <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-</head>
-<body>
-Common APIs for use by backwards compatibility codecs.
-</body>
-</html>
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene50/Lucene50RWStoredFieldsFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene50/Lucene50RWStoredFieldsFormat.java
index c6ecaa8..c625dc8 100644
--- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene50/Lucene50RWStoredFieldsFormat.java
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene50/Lucene50RWStoredFieldsFormat.java
@@ -18,7 +18,6 @@
import java.io.IOException;
-import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat;
import org.apache.lucene.codecs.StoredFieldsWriter;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.Directory;
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene50/TestForUtil.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene50/TestForUtil.java
index e550a8c..a79216d 100644
--- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene50/TestForUtil.java
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene50/TestForUtil.java
@@ -23,7 +23,6 @@
import java.io.IOException;
-import org.apache.lucene.backward_codecs.lucene50.ForUtil;
import org.apache.lucene.store.ByteBuffersDirectory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene60/Lucene60PointsWriter.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene60/Lucene60PointsWriter.java
index 072b5e3..2ca4f11 100644
--- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene60/Lucene60PointsWriter.java
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene60/Lucene60PointsWriter.java
@@ -24,8 +24,6 @@
import java.util.List;
import java.util.Map;
-import org.apache.lucene.backward_codecs.lucene60.Lucene60PointsFormat;
-import org.apache.lucene.backward_codecs.lucene60.Lucene60PointsReader;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.MutablePointValues;
import org.apache.lucene.codecs.PointsReader;
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene70/Lucene70RWSegmentInfoFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene70/Lucene70RWSegmentInfoFormat.java
index 21b10b9..90ccf4a 100644
--- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene70/Lucene70RWSegmentInfoFormat.java
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene70/Lucene70RWSegmentInfoFormat.java
@@ -20,7 +20,6 @@
import java.io.IOException;
import java.util.Set;
-import org.apache.lucene.backward_codecs.lucene70.Lucene70SegmentInfoFormat;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsWriter.java
index 43ae34e..f49451a 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsWriter.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsWriter.java
@@ -54,7 +54,7 @@
static final BytesRef STARTOFFSET = new BytesRef(" startoffset ");
static final BytesRef ENDOFFSET = new BytesRef(" endoffset ");
- static final String VECTORS_EXTENSION = "vec";
+ static final String VECTORS_EXTENSION = "tvc";
private IndexOutput out;
private int numDocsWritten = 0;
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextVectorFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextVectorFormat.java
index 3d4b5fe..b44979e 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextVectorFormat.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextVectorFormat.java
@@ -43,9 +43,9 @@
return new SimpleTextVectorReader(state);
}
- /** Extension of points data file */
+ /** Extension of vectors data file */
static final String VECTOR_EXTENSION = "vec";
- /** Extension of points index file */
+ /** Extension of vectors index file */
static final String META_EXTENSION = "gri";
}
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextVectorReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextVectorReader.java
index 4c19625..021ff53 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextVectorReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextVectorReader.java
@@ -27,6 +27,8 @@
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.RandomAccessVectorValues;
+import org.apache.lucene.index.RandomAccessVectorValuesProducer;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.VectorValues;
import org.apache.lucene.search.TopDocs;
@@ -158,7 +160,7 @@
}
}
- private static class SimpleTextVectorValues extends VectorValues implements VectorValues.RandomAccess {
+ private static class SimpleTextVectorValues extends VectorValues implements RandomAccessVectorValues, RandomAccessVectorValuesProducer {
private final BytesRefBuilder scratch = new BytesRefBuilder();
private final FieldEntry entry;
@@ -205,7 +207,7 @@
}
@Override
- public RandomAccess randomAccess() {
+ public RandomAccessVectorValues randomAccess() {
return this;
}
@@ -236,15 +238,15 @@
}
private void readAllVectors() throws IOException {
- for (int i = 0; i < values.length; i++) {
- readVector(values[i]);
+ for (float[] value : values) {
+ readVector(value);
}
}
private void readVector(float[] value) throws IOException {
SimpleTextUtil.readLine(in, scratch);
- // skip leading " [" and strip trailing "]"
- String s = new BytesRef(scratch.bytes(), 2, scratch.length() - 3).utf8ToString();
+ // skip leading "[" and strip trailing "]"
+ String s = new BytesRef(scratch.bytes(), 1, scratch.length() - 2).utf8ToString();
String[] floatStrings = s.split(",");
assert floatStrings.length == value.length : " read " + s + " when expecting " + value.length + " floats";
for (int i = 0; i < floatStrings.length; i++) {
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/VectorWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/VectorWriter.java
index a5f9e7f..7b13310 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/VectorWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/VectorWriter.java
@@ -26,6 +26,8 @@
import org.apache.lucene.index.DocIDMerger;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.MergeState;
+import org.apache.lucene.index.RandomAccessVectorValues;
+import org.apache.lucene.index.RandomAccessVectorValuesProducer;
import org.apache.lucene.index.VectorValues;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.BytesRef;
@@ -135,7 +137,7 @@
* View over multiple VectorValues supporting iterator-style access via DocIdMerger. Maintains a reverse ordinal
* mapping for documents having values in order to support random access by dense ordinal.
*/
- private static class VectorValuesMerger extends VectorValues {
+ private static class VectorValuesMerger extends VectorValues implements RandomAccessVectorValuesProducer {
private final List<VectorValuesSub> subs;
private final DocIDMerger<VectorValuesSub> docIdMerger;
private final int[] ordBase;
@@ -198,7 +200,7 @@
}
@Override
- public RandomAccess randomAccess() {
+ public RandomAccessVectorValues randomAccess() {
return new MergerRandomAccess();
}
@@ -227,14 +229,23 @@
return subs.get(0).values.searchStrategy();
}
- class MergerRandomAccess implements VectorValues.RandomAccess {
+ @Override
+ public TopDocs search(float[] target, int k, int fanout) throws IOException {
+ throw new UnsupportedOperationException();
+ }
- private final List<RandomAccess> raSubs;
+ class MergerRandomAccess implements RandomAccessVectorValues {
+
+ private final List<RandomAccessVectorValues> raSubs;
MergerRandomAccess() {
raSubs = new ArrayList<>(subs.size());
for (VectorValuesSub sub : subs) {
- raSubs.add(sub.values.randomAccess());
+ if (sub.values instanceof RandomAccessVectorValuesProducer) {
+ raSubs.add(((RandomAccessVectorValuesProducer) sub.values).randomAccess());
+ } else {
+ throw new IllegalStateException("Cannot merge VectorValues without support for random access");
+ }
}
}
@@ -272,12 +283,6 @@
public BytesRef binaryValue(int targetOrd) throws IOException {
throw new UnsupportedOperationException();
}
-
- @Override
- public TopDocs search(float[] target, int k, int fanout) throws IOException {
- throw new UnsupportedOperationException();
- }
-
}
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene80/Lucene80DocValuesConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene80/Lucene80DocValuesConsumer.java
index 33bf261..290812b 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene80/Lucene80DocValuesConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene80/Lucene80DocValuesConsumer.java
@@ -64,12 +64,14 @@
/** writer for {@link Lucene80DocValuesFormat} */
final class Lucene80DocValuesConsumer extends DocValuesConsumer implements Closeable {
+ final Lucene80DocValuesFormat.Mode mode;
IndexOutput data, meta;
final int maxDoc;
private final SegmentWriteState state;
/** expert: Creates a new writer */
- public Lucene80DocValuesConsumer(SegmentWriteState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
+ public Lucene80DocValuesConsumer(SegmentWriteState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension, Lucene80DocValuesFormat.Mode mode) throws IOException {
+ this.mode = mode;
boolean success = false;
try {
this.state = state;
@@ -490,13 +492,86 @@
}
}
-
@Override
public void addBinaryField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
meta.writeInt(field.number);
meta.writeByte(Lucene80DocValuesFormat.BINARY);
+ switch (mode) {
+ case BEST_SPEED:
+ meta.writeByte((byte) 0);
+ doAddUncompressedBinaryField(field, valuesProducer);
+ break;
+ case BEST_COMPRESSION:
+ meta.writeByte((byte) 1);
+ doAddCompressedBinaryField(field, valuesProducer);
+ break;
+ default:
+ throw new AssertionError();
+ }
+ }
+
+ private void doAddUncompressedBinaryField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
+ BinaryDocValues values = valuesProducer.getBinary(field);
+ long start = data.getFilePointer();
+ meta.writeLong(start); // dataOffset
+ int numDocsWithField = 0;
+ int minLength = Integer.MAX_VALUE;
+ int maxLength = 0;
+ for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
+ numDocsWithField++;
+ BytesRef v = values.binaryValue();
+ int length = v.length;
+ data.writeBytes(v.bytes, v.offset, v.length);
+ minLength = Math.min(length, minLength);
+ maxLength = Math.max(length, maxLength);
+ }
+ assert numDocsWithField <= maxDoc;
+ meta.writeLong(data.getFilePointer() - start); // dataLength
+
+ if (numDocsWithField == 0) {
+ meta.writeLong(-2); // docsWithFieldOffset
+ meta.writeLong(0L); // docsWithFieldLength
+ meta.writeShort((short) -1); // jumpTableEntryCount
+ meta.writeByte((byte) -1); // denseRankPower
+ } else if (numDocsWithField == maxDoc) {
+ meta.writeLong(-1); // docsWithFieldOffset
+ meta.writeLong(0L); // docsWithFieldLength
+ meta.writeShort((short) -1); // jumpTableEntryCount
+ meta.writeByte((byte) -1); // denseRankPower
+ } else {
+ long offset = data.getFilePointer();
+ meta.writeLong(offset); // docsWithFieldOffset
+ values = valuesProducer.getBinary(field);
+ final short jumpTableEntryCount = IndexedDISI.writeBitSet(values, data, IndexedDISI.DEFAULT_DENSE_RANK_POWER);
+ meta.writeLong(data.getFilePointer() - offset); // docsWithFieldLength
+ meta.writeShort(jumpTableEntryCount);
+ meta.writeByte(IndexedDISI.DEFAULT_DENSE_RANK_POWER);
+ }
+
+ meta.writeInt(numDocsWithField);
+ meta.writeInt(minLength);
+ meta.writeInt(maxLength);
+ if (maxLength > minLength) {
+ start = data.getFilePointer();
+ meta.writeLong(start);
+ meta.writeVInt(DIRECT_MONOTONIC_BLOCK_SHIFT);
+
+ final DirectMonotonicWriter writer = DirectMonotonicWriter.getInstance(meta, data, numDocsWithField + 1, DIRECT_MONOTONIC_BLOCK_SHIFT);
+ long addr = 0;
+ writer.add(addr);
+ values = valuesProducer.getBinary(field);
+ for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
+ addr += values.binaryValue().length;
+ writer.add(addr);
+ }
+ writer.finish();
+ meta.writeLong(data.getFilePointer() - start);
+ }
+ }
+
+ private void doAddCompressedBinaryField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
try (CompressedBinaryBlockWriter blockWriter = new CompressedBinaryBlockWriter()){
BinaryDocValues values = valuesProducer.getBinary(field);
long start = data.getFilePointer();
@@ -542,7 +617,6 @@
meta.writeInt(maxLength);
blockWriter.writeMetaData();
-
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene80/Lucene80DocValuesFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene80/Lucene80DocValuesFormat.java
index fa88ae9..9ca469f 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene80/Lucene80DocValuesFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene80/Lucene80DocValuesFormat.java
@@ -18,6 +18,7 @@
import java.io.IOException;
+import java.util.Objects;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesFormat;
@@ -131,14 +132,30 @@
*/
public final class Lucene80DocValuesFormat extends DocValuesFormat {
- /** Sole Constructor */
+ /** Configuration option for doc values. */
+ public static enum Mode {
+ /** Trade compression ratio for retrieval speed. */
+ BEST_SPEED,
+ /** Trade retrieval speed for compression ratio. */
+ BEST_COMPRESSION
+ }
+
+ private final Mode mode;
+
+ /** Default constructor. */
public Lucene80DocValuesFormat() {
+ this(Mode.BEST_SPEED);
+ }
+
+ /** Constructor */
+ public Lucene80DocValuesFormat(Mode mode) {
super("Lucene80");
+ this.mode = Objects.requireNonNull(mode);
}
@Override
public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
- return new Lucene80DocValuesConsumer(state, DATA_CODEC, DATA_EXTENSION, META_CODEC, META_EXTENSION);
+ return new Lucene80DocValuesConsumer(state, DATA_CODEC, DATA_EXTENSION, META_CODEC, META_EXTENSION, mode);
}
@Override
@@ -152,7 +169,8 @@
static final String META_EXTENSION = "dvm";
static final int VERSION_START = 0;
static final int VERSION_BIN_COMPRESSED = 1;
- static final int VERSION_CURRENT = VERSION_BIN_COMPRESSED;
+ static final int VERSION_CONFIGURABLE_COMPRESSION = 2;
+ static final int VERSION_CURRENT = VERSION_CONFIGURABLE_COMPRESSION;
// indicates docvalues type
static final byte NUMERIC = 0;
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene80/Lucene80DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene80/Lucene80DocValuesProducer.java
index ebcc82a..bc71d10 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene80/Lucene80DocValuesProducer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene80/Lucene80DocValuesProducer.java
@@ -174,6 +174,20 @@
private BinaryEntry readBinary(ChecksumIndexInput meta) throws IOException {
BinaryEntry entry = new BinaryEntry();
+ if (version >= Lucene80DocValuesFormat.VERSION_CONFIGURABLE_COMPRESSION) {
+ int b = meta.readByte();
+ switch (b) {
+ case 0:
+ case 1:
+ // valid
+ break;
+ default:
+ throw new CorruptIndexException("Unexpected byte: " + b + ", expected 0 or 1", meta);
+ }
+ entry.compressed = b != 0;
+ } else {
+ entry.compressed = version >= Lucene80DocValuesFormat.VERSION_BIN_COMPRESSED;
+ }
entry.dataOffset = meta.readLong();
entry.dataLength = meta.readLong();
entry.docsWithFieldOffset = meta.readLong();
@@ -183,19 +197,19 @@
entry.numDocsWithField = meta.readInt();
entry.minLength = meta.readInt();
entry.maxLength = meta.readInt();
- if ((version >= Lucene80DocValuesFormat.VERSION_BIN_COMPRESSED && entry.numDocsWithField > 0) || entry.minLength < entry.maxLength) {
+ if ((entry.compressed && entry.numDocsWithField > 0) || entry.minLength < entry.maxLength) {
entry.addressesOffset = meta.readLong();
// Old count of uncompressed addresses
long numAddresses = entry.numDocsWithField + 1L;
// New count of compressed addresses - the number of compresseed blocks
- if (version >= Lucene80DocValuesFormat.VERSION_BIN_COMPRESSED) {
+ if (entry.compressed) {
entry.numCompressedChunks = meta.readVInt();
entry.docsPerChunkShift = meta.readVInt();
entry.maxUncompressedChunkSize = meta.readVInt();
numAddresses = entry.numCompressedChunks;
}
-
+
final int blockShift = meta.readVInt();
entry.addressesMeta = DirectMonotonicReader.loadMeta(meta, numAddresses, blockShift);
ramBytesUsed += entry.addressesMeta.ramBytesUsed();
@@ -303,6 +317,7 @@
}
private static class BinaryEntry {
+ boolean compressed;
long dataOffset;
long dataLength;
long docsWithFieldOffset;
@@ -680,9 +695,7 @@
}
}
- // BWC - old binary format
- private BinaryDocValues getUncompressedBinary(FieldInfo field) throws IOException {
- BinaryEntry entry = binaries.get(field.name);
+ private BinaryDocValues getUncompressedBinary(BinaryEntry entry) throws IOException {
if (entry.docsWithFieldOffset == -2) {
return DocValues.emptyBinary();
}
@@ -844,11 +857,16 @@
@Override
public BinaryDocValues getBinary(FieldInfo field) throws IOException {
- if (version < Lucene80DocValuesFormat.VERSION_BIN_COMPRESSED) {
- return getUncompressedBinary(field);
- }
-
BinaryEntry entry = binaries.get(field.name);
+ if (entry.compressed) {
+ return getCompressedBinary(entry);
+ } else {
+ return getUncompressedBinary(entry);
+ }
+ }
+
+ private BinaryDocValues getCompressedBinary(BinaryEntry entry) throws IOException {
+
if (entry.docsWithFieldOffset == -2) {
return DocValues.emptyBinary();
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90Codec.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90Codec.java
index fa8f7af..94608e4 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90Codec.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90Codec.java
@@ -34,6 +34,7 @@
import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat;
import org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat;
import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat;
+import org.apache.lucene.codecs.lucene80.Lucene80DocValuesFormat;
import org.apache.lucene.codecs.lucene80.Lucene80NormsFormat;
import org.apache.lucene.codecs.lucene84.Lucene84PostingsFormat;
import org.apache.lucene.codecs.lucene86.Lucene86PointsFormat;
@@ -53,6 +54,23 @@
* @lucene.experimental
*/
public class Lucene90Codec extends Codec {
+
+ /** Configuration option for the codec. */
+ public static enum Mode {
+ /** Trade compression ratio for retrieval speed. */
+ BEST_SPEED(Lucene87StoredFieldsFormat.Mode.BEST_SPEED, Lucene80DocValuesFormat.Mode.BEST_SPEED),
+ /** Trade retrieval speed for compression ratio. */
+ BEST_COMPRESSION(Lucene87StoredFieldsFormat.Mode.BEST_COMPRESSION, Lucene80DocValuesFormat.Mode.BEST_COMPRESSION);
+
+ private final Lucene87StoredFieldsFormat.Mode storedMode;
+ private final Lucene80DocValuesFormat.Mode dvMode;
+
+ private Mode(Lucene87StoredFieldsFormat.Mode storedMode, Lucene80DocValuesFormat.Mode dvMode) {
+ this.storedMode = Objects.requireNonNull(storedMode);
+ this.dvMode = Objects.requireNonNull(dvMode);
+ }
+ }
+
private final TermVectorsFormat vectorsFormat = new Lucene50TermVectorsFormat();
private final FieldInfosFormat fieldInfosFormat = new Lucene90FieldInfosFormat();
private final SegmentInfoFormat segmentInfosFormat = new Lucene86SegmentInfoFormat();
@@ -82,7 +100,7 @@
* Instantiates a new codec.
*/
public Lucene90Codec() {
- this(Lucene87StoredFieldsFormat.Mode.BEST_SPEED);
+ this(Mode.BEST_SPEED);
}
/**
@@ -91,10 +109,11 @@
* @param mode stored fields compression mode to use for newly
* flushed/merged segments.
*/
- public Lucene90Codec(Lucene87StoredFieldsFormat.Mode mode) {
+ public Lucene90Codec(Mode mode) {
super("Lucene90");
- this.storedFieldsFormat = new Lucene87StoredFieldsFormat(Objects.requireNonNull(mode));
+ this.storedFieldsFormat = new Lucene87StoredFieldsFormat(Objects.requireNonNull(mode).storedMode);
this.defaultFormat = new Lucene84PostingsFormat();
+ this.defaultDVFormat = new Lucene80DocValuesFormat(mode.dvMode);
}
@Override
@@ -172,7 +191,7 @@
return docValuesFormat;
}
- private final DocValuesFormat defaultDVFormat = DocValuesFormat.forName("Lucene80");
+ private final DocValuesFormat defaultDVFormat;
private final NormsFormat normsFormat = new Lucene80NormsFormat();
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90VectorFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90VectorFormat.java
index 632bc81..5363c65 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90VectorFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90VectorFormat.java
@@ -27,15 +27,17 @@
/**
* Lucene 9.0 vector format, which encodes dense numeric vector values.
- * TODO: add support for approximate KNN search.
+ *
+ * @lucene.experimental
*/
public final class Lucene90VectorFormat extends VectorFormat {
static final String META_CODEC_NAME = "Lucene90VectorFormatMeta";
static final String VECTOR_DATA_CODEC_NAME = "Lucene90VectorFormatData";
-
+ static final String VECTOR_INDEX_CODEC_NAME = "Lucene90VectorFormatIndex";
static final String META_EXTENSION = "vem";
static final String VECTOR_DATA_EXTENSION = "vec";
+ static final String VECTOR_INDEX_EXTENSION = "vex";
static final int VERSION_START = 0;
static final int VERSION_CURRENT = VERSION_START;
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90VectorReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90VectorReader.java
index 9d313d0..674959f 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90VectorReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90VectorReader.java
@@ -22,6 +22,7 @@
import java.nio.FloatBuffer;
import java.util.HashMap;
import java.util.Map;
+import java.util.Random;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.VectorReader;
@@ -29,17 +30,28 @@
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.KnnGraphValues;
+import org.apache.lucene.index.RandomAccessVectorValues;
+import org.apache.lucene.index.RandomAccessVectorValuesProducer;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.VectorValues;
+import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.TotalHits;
import org.apache.lucene.store.ChecksumIndexInput;
+import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.hnsw.HnswGraph;
+import org.apache.lucene.util.hnsw.Neighbor;
+import org.apache.lucene.util.hnsw.Neighbors;
+
+import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
/**
- * Reads vectors from the index segments.
+ * Reads vectors from the index segments along with index data structures supporting KNN search.
* @lucene.experimental
*/
public final class Lucene90VectorReader extends VectorReader {
@@ -47,13 +59,21 @@
private final FieldInfos fieldInfos;
private final Map<String, FieldEntry> fields = new HashMap<>();
private final IndexInput vectorData;
- private final int maxDoc;
+ private final IndexInput vectorIndex;
+ private final long checksumSeed;
Lucene90VectorReader(SegmentReadState state) throws IOException {
this.fieldInfos = state.fieldInfos;
- this.maxDoc = state.segmentInfo.maxDoc();
- String metaFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Lucene90VectorFormat.META_EXTENSION);
+ int versionMeta = readMetadata(state, Lucene90VectorFormat.META_EXTENSION);
+ long[] checksumRef = new long[1];
+ vectorData = openDataInput(state, versionMeta, Lucene90VectorFormat.VECTOR_DATA_EXTENSION, Lucene90VectorFormat.VECTOR_DATA_CODEC_NAME, checksumRef);
+ vectorIndex = openDataInput(state, versionMeta, Lucene90VectorFormat.VECTOR_INDEX_EXTENSION, Lucene90VectorFormat.VECTOR_INDEX_CODEC_NAME, checksumRef);
+ checksumSeed = checksumRef[0];
+ }
+
+ private int readMetadata(SegmentReadState state, String fileExtension) throws IOException {
+ String metaFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, fileExtension);
int versionMeta = -1;
try (ChecksumIndexInput meta = state.directory.openChecksumInput(metaFileName, state.context)) {
Throwable priorE = null;
@@ -71,29 +91,32 @@
CodecUtil.checkFooter(meta, priorE);
}
}
+ return versionMeta;
+ }
+ private static IndexInput openDataInput(SegmentReadState state, int versionMeta, String fileExtension, String codecName, long[] checksumRef) throws IOException {
boolean success = false;
- String vectorDataFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Lucene90VectorFormat.VECTOR_DATA_EXTENSION);
- this.vectorData = state.directory.openInput(vectorDataFileName, state.context);
+ String fileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, fileExtension);
+ IndexInput in = state.directory.openInput(fileName, state.context);
try {
- int versionVectorData = CodecUtil.checkIndexHeader(vectorData,
- Lucene90VectorFormat.VECTOR_DATA_CODEC_NAME,
+ int versionVectorData = CodecUtil.checkIndexHeader(in,
+ codecName,
Lucene90VectorFormat.VERSION_START,
Lucene90VectorFormat.VERSION_CURRENT,
state.segmentInfo.getId(),
state.segmentSuffix);
if (versionMeta != versionVectorData) {
- throw new CorruptIndexException("Format versions mismatch: meta=" + versionMeta + ", vector data=" + versionVectorData, vectorData);
+ throw new CorruptIndexException("Format versions mismatch: meta=" + versionMeta + ", " + codecName + "=" + versionVectorData, in);
}
- CodecUtil.retrieveChecksum(vectorData);
-
+ checksumRef[0] = CodecUtil.retrieveChecksum(in);
success = true;
} finally {
if (!success) {
- IOUtils.closeWhileHandlingException(this.vectorData);
+ IOUtils.closeWhileHandlingException(in);
}
}
+ return in;
}
private void readFields(ChecksumIndexInput meta, FieldInfos infos) throws IOException {
@@ -102,23 +125,28 @@
if (info == null) {
throw new CorruptIndexException("Invalid field number: " + fieldNumber, meta);
}
- int searchStrategyId = meta.readInt();
- if (searchStrategyId < 0 || searchStrategyId >= VectorValues.SearchStrategy.values().length) {
- throw new CorruptIndexException("Invalid search strategy id: " + searchStrategyId, meta);
- }
- VectorValues.SearchStrategy searchStrategy = VectorValues.SearchStrategy.values()[searchStrategyId];
- long vectorDataOffset = meta.readVLong();
- long vectorDataLength = meta.readVLong();
- int dimension = meta.readInt();
- int size = meta.readInt();
- int[] ordToDoc = new int[size];
- for (int i = 0; i < size; i++) {
- int doc = meta.readVInt();
- ordToDoc[i] = doc;
- }
- FieldEntry fieldEntry = new FieldEntry(dimension, searchStrategy, maxDoc, vectorDataOffset, vectorDataLength,
- ordToDoc);
- fields.put(info.name, fieldEntry);
+ fields.put(info.name, readField(meta));
+ }
+ }
+
+ private VectorValues.SearchStrategy readSearchStrategy(DataInput input) throws IOException {
+ int searchStrategyId = input.readInt();
+ if (searchStrategyId < 0 || searchStrategyId >= VectorValues.SearchStrategy.values().length) {
+ throw new CorruptIndexException("Invalid search strategy id: " + searchStrategyId, input);
+ }
+ return VectorValues.SearchStrategy.values()[searchStrategyId];
+ }
+
+ private FieldEntry readField(DataInput input) throws IOException {
+ VectorValues.SearchStrategy searchStrategy = readSearchStrategy(input);
+ switch(searchStrategy) {
+ case NONE:
+ return new FieldEntry(input, searchStrategy);
+ case DOT_PRODUCT_HNSW:
+ case EUCLIDEAN_HNSW:
+ return new HnswGraphFieldEntry(input, searchStrategy);
+ default:
+ throw new CorruptIndexException("Unknown vector search strategy: " + searchStrategy, input);
}
}
@@ -135,6 +163,7 @@
@Override
public void checkIntegrity() throws IOException {
CodecUtil.checksumEntireFile(vectorData);
+ CodecUtil.checksumEntireFile(vectorIndex);
}
@Override
@@ -165,29 +194,58 @@
return new OffHeapVectorValues(fieldEntry, bytesSlice);
}
+ public KnnGraphValues getGraphValues(String field) throws IOException {
+ FieldInfo info = fieldInfos.fieldInfo(field);
+ if (info == null) {
+ throw new IllegalArgumentException("No such field '" + field + "'");
+ }
+ FieldEntry entry = fields.get(field);
+ if (entry != null && entry.indexDataLength > 0) {
+ return getGraphValues(entry);
+ } else {
+ return KnnGraphValues.EMPTY;
+ }
+ }
+
+ private KnnGraphValues getGraphValues(FieldEntry entry) throws IOException {
+ if (entry.searchStrategy.isHnsw()) {
+ HnswGraphFieldEntry graphEntry = (HnswGraphFieldEntry) entry;
+ IndexInput bytesSlice = vectorIndex.slice("graph-data", entry.indexDataOffset, entry.indexDataLength);
+ return new IndexedKnnGraphReader(graphEntry, bytesSlice);
+ } else {
+ return KnnGraphValues.EMPTY;
+ }
+ }
+
@Override
public void close() throws IOException {
- vectorData.close();
+ IOUtils.close(vectorData, vectorIndex);
}
private static class FieldEntry {
final int dimension;
final VectorValues.SearchStrategy searchStrategy;
- final int maxDoc;
final long vectorDataOffset;
final long vectorDataLength;
+ final long indexDataOffset;
+ final long indexDataLength;
final int[] ordToDoc;
- FieldEntry(int dimension, VectorValues.SearchStrategy searchStrategy, int maxDoc,
- long vectorDataOffset, long vectorDataLength, int[] ordToDoc) {
- this.dimension = dimension;
+ FieldEntry(DataInput input, VectorValues.SearchStrategy searchStrategy) throws IOException {
this.searchStrategy = searchStrategy;
- this.maxDoc = maxDoc;
- this.vectorDataOffset = vectorDataOffset;
- this.vectorDataLength = vectorDataLength;
- this.ordToDoc = ordToDoc;
+ vectorDataOffset = input.readVLong();
+ vectorDataLength = input.readVLong();
+ indexDataOffset = input.readVLong();
+ indexDataLength = input.readVLong();
+ dimension = input.readInt();
+ int size = input.readInt();
+ ordToDoc = new int[size];
+ for (int i = 0; i < size; i++) {
+ int doc = input.readVInt();
+ ordToDoc[i] = doc;
+ }
}
int size() {
@@ -195,8 +253,23 @@
}
}
+ private static class HnswGraphFieldEntry extends FieldEntry {
+
+ final long[] ordOffsets;
+
+ HnswGraphFieldEntry(DataInput input, VectorValues.SearchStrategy searchStrategy) throws IOException {
+ super(input, searchStrategy);
+ ordOffsets = new long[size()];
+ long offset = 0;
+ for (int i = 0; i < ordOffsets.length; i++) {
+ offset += input.readVLong();
+ ordOffsets[i] = offset;
+ }
+ }
+ }
+
/** Read the vector values from the index input. This supports both iterated and random access. */
- private final static class OffHeapVectorValues extends VectorValues {
+ private final class OffHeapVectorValues extends VectorValues implements RandomAccessVectorValuesProducer {
final FieldEntry fieldEntry;
final IndexInput dataIn;
@@ -277,24 +350,46 @@
}
@Override
- public RandomAccess randomAccess() {
+ public RandomAccessVectorValues randomAccess() {
return new OffHeapRandomAccess(dataIn.clone());
}
+ @Override
+ public TopDocs search(float[] vector, int topK, int fanout) throws IOException {
+ // use a seed that is fixed for the index so we get reproducible results for the same query
+ final Random random = new Random(checksumSeed);
+ Neighbors results = HnswGraph.search(vector, topK + fanout, topK + fanout, randomAccess(), getGraphValues(fieldEntry), random);
+ while (results.size() > topK) {
+ results.pop();
+ }
+ int i = 0;
+ ScoreDoc[] scoreDocs = new ScoreDoc[Math.min(results.size(), topK)];
+ boolean reversed = searchStrategy().reversed;
+ while (results.size() > 0) {
+ Neighbor n = results.pop();
+ float score;
+ if (reversed) {
+ score = (float) Math.exp(- n.score() / vector.length);
+ } else {
+ score = n.score();
+ }
+ scoreDocs[scoreDocs.length - ++i] = new ScoreDoc(fieldEntry.ordToDoc[n.node()], score);
+ }
+ // always return >= the case where we can assert == is only when there are fewer than topK vectors in the index
+ return new TopDocs(new TotalHits(results.visitedCount(), TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO), scoreDocs);
+ }
- class OffHeapRandomAccess implements VectorValues.RandomAccess {
+ class OffHeapRandomAccess implements RandomAccessVectorValues {
final IndexInput dataIn;
final BytesRef binaryValue;
final ByteBuffer byteBuffer;
final FloatBuffer floatBuffer;
- final int byteSize;
final float[] value;
OffHeapRandomAccess(IndexInput dataIn) {
this.dataIn = dataIn;
- byteSize = Float.BYTES * dimension();
byteBuffer = ByteBuffer.allocate(byteSize);
floatBuffer = byteBuffer.asFloatBuffer();
value = new float[dimension()];
@@ -335,11 +430,41 @@
dataIn.seek(offset);
dataIn.readBytes(byteBuffer.array(), byteBuffer.arrayOffset(), byteSize);
}
+ }
+ }
- @Override
- public TopDocs search(float[] vector, int topK, int fanout) throws IOException {
- throw new UnsupportedOperationException();
+ /** Read the nearest-neighbors graph from the index input */
+ private final class IndexedKnnGraphReader extends KnnGraphValues {
+
+ final HnswGraphFieldEntry entry;
+ final IndexInput dataIn;
+
+ int arcCount;
+ int arcUpTo;
+ int arc;
+
+ IndexedKnnGraphReader(HnswGraphFieldEntry entry, IndexInput dataIn) {
+ this.entry = entry;
+ this.dataIn = dataIn;
+ }
+
+ @Override
+ public void seek(int targetOrd) throws IOException {
+ // unsafe; no bounds checking
+ dataIn.seek(entry.ordOffsets[targetOrd]);
+ arcCount = dataIn.readInt();
+ arc = -1;
+ arcUpTo = 0;
+ }
+
+ @Override
+ public int nextNeighbor() throws IOException {
+ if (arcUpTo >= arcCount) {
+ return NO_MORE_DOCS;
}
+ ++arcUpTo;
+ arc += dataIn.readVInt();
+ return arc;
}
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90VectorWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90VectorWriter.java
index e64e061..71d103b 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90VectorWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90VectorWriter.java
@@ -18,18 +18,20 @@
package org.apache.lucene.codecs.lucene90;
import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
+import java.util.Arrays;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.VectorWriter;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.RandomAccessVectorValuesProducer;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.VectorValues;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.hnsw.HnswGraph;
+import org.apache.lucene.util.hnsw.HnswGraphBuilder;
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
@@ -39,7 +41,7 @@
*/
public final class Lucene90VectorWriter extends VectorWriter {
- private final IndexOutput meta, vectorData;
+ private final IndexOutput meta, vectorData, vectorIndex;
private boolean finished;
@@ -52,6 +54,9 @@
String vectorDataFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Lucene90VectorFormat.VECTOR_DATA_EXTENSION);
vectorData = state.directory.createOutput(vectorDataFileName, state.context);
+ String indexDataFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Lucene90VectorFormat.VECTOR_INDEX_EXTENSION);
+ vectorIndex = state.directory.createOutput(indexDataFileName, state.context);
+
try {
CodecUtil.writeIndexHeader(meta,
Lucene90VectorFormat.META_CODEC_NAME,
@@ -61,6 +66,10 @@
Lucene90VectorFormat.VECTOR_DATA_CODEC_NAME,
Lucene90VectorFormat.VERSION_CURRENT,
state.segmentInfo.getId(), state.segmentSuffix);
+ CodecUtil.writeIndexHeader(vectorIndex,
+ Lucene90VectorFormat.VECTOR_INDEX_CODEC_NAME,
+ Lucene90VectorFormat.VERSION_CURRENT,
+ state.segmentInfo.getId(), state.segmentSuffix);
} catch (IOException e) {
IOUtils.closeWhileHandlingException(this);
}
@@ -69,17 +78,47 @@
@Override
public void writeField(FieldInfo fieldInfo, VectorValues vectors) throws IOException {
long vectorDataOffset = vectorData.getFilePointer();
+
// TODO - use a better data structure; a bitset? DocsWithFieldSet is p.p. in o.a.l.index
- List<Integer> docIds = new ArrayList<>();
- int docV, ord = 0;
- for (docV = vectors.nextDoc(); docV != NO_MORE_DOCS; docV = vectors.nextDoc(), ord++) {
+ int[] docIds = new int[vectors.size()];
+ int count = 0;
+ for (int docV = vectors.nextDoc(); docV != NO_MORE_DOCS; docV = vectors.nextDoc(), count++) {
+ // write vector
writeVectorValue(vectors);
- docIds.add(docV);
- // TODO: write knn graph value
+ docIds[count] = docV;
}
+ // count may be < vectors.size() e,g, if some documents were deleted
+ long[] offsets = new long[count];
long vectorDataLength = vectorData.getFilePointer() - vectorDataOffset;
+ long vectorIndexOffset = vectorIndex.getFilePointer();
+ if (vectors.searchStrategy().isHnsw()) {
+ if (vectors instanceof RandomAccessVectorValuesProducer) {
+ writeGraph(vectorIndex, (RandomAccessVectorValuesProducer) vectors, vectorIndexOffset, offsets, count);
+ } else {
+ throw new IllegalArgumentException("Indexing an HNSW graph requires a random access vector values, got " + vectors);
+ }
+ }
+ long vectorIndexLength = vectorIndex.getFilePointer() - vectorIndexOffset;
if (vectorDataLength > 0) {
- writeMeta(fieldInfo, vectorDataOffset, vectorDataLength, docIds);
+ writeMeta(fieldInfo, vectorDataOffset, vectorDataLength, vectorIndexOffset, vectorIndexLength, count, docIds);
+ if (vectors.searchStrategy().isHnsw()) {
+ writeGraphOffsets(meta, offsets);
+ }
+ }
+ }
+
+ private void writeMeta(FieldInfo field, long vectorDataOffset, long vectorDataLength, long indexDataOffset, long indexDataLength, int size, int[] docIds) throws IOException {
+ meta.writeInt(field.number);
+ meta.writeInt(field.getVectorSearchStrategy().ordinal());
+ meta.writeVLong(vectorDataOffset);
+ meta.writeVLong(vectorDataLength);
+ meta.writeVLong(indexDataOffset);
+ meta.writeVLong(indexDataLength);
+ meta.writeInt(field.getVectorDimension());
+ meta.writeInt(size);
+ for (int i = 0; i < size; i ++) {
+ // TODO: delta-encode, or write as bitset
+ meta.writeVInt(docIds[i]);
}
}
@@ -90,16 +129,28 @@
vectorData.writeBytes(binaryValue.bytes, binaryValue.offset, binaryValue.length);
}
- private void writeMeta(FieldInfo field, long vectorDataOffset, long vectorDataLength, List<Integer> docIds) throws IOException {
- meta.writeInt(field.number);
- meta.writeInt(field.getVectorSearchStrategy().ordinal());
- meta.writeVLong(vectorDataOffset);
- meta.writeVLong(vectorDataLength);
- meta.writeInt(field.getVectorDimension());
- meta.writeInt(docIds.size());
- for (Integer docId : docIds) {
- // TODO: delta-encode, or write as bitset
- meta.writeVInt(docId);
+ private void writeGraphOffsets(IndexOutput out, long[] offsets) throws IOException {
+ long last = 0;
+ for (long offset : offsets) {
+ out.writeVLong(offset - last);
+ last = offset;
+ }
+ }
+
+ private void writeGraph(IndexOutput graphData, RandomAccessVectorValuesProducer vectorValues, long graphDataOffset, long[] offsets, int count) throws IOException {
+ HnswGraph graph = HnswGraphBuilder.build(vectorValues);
+ for (int ord = 0; ord < count; ord++) {
+ // write graph
+ offsets[ord] = graphData.getFilePointer() - graphDataOffset;
+ int[] arcs = graph.getNeighbors(ord);
+ Arrays.sort(arcs);
+ graphData.writeInt(arcs.length);
+ int lastArc = -1; // to make the assertion work?
+ for (int arc : arcs) {
+ assert arc > lastArc : "arcs out of order: " + lastArc + "," + arc;
+ graphData.writeVInt(arc - lastArc);
+ lastArc = arc;
+ }
}
}
@@ -117,11 +168,12 @@
}
if (vectorData != null) {
CodecUtil.writeFooter(vectorData);
+ CodecUtil.writeFooter(vectorIndex);
}
}
@Override
public void close() throws IOException {
- IOUtils.close(meta, vectorData);
+ IOUtils.close(meta, vectorData, vectorIndex);
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/document/FeatureField.java b/lucene/core/src/java/org/apache/lucene/document/FeatureField.java
index 2ca048c..dafcbf4 100644
--- a/lucene/core/src/java/org/apache/lucene/document/FeatureField.java
+++ b/lucene/core/src/java/org/apache/lucene/document/FeatureField.java
@@ -66,7 +66,7 @@
* 2<sup>-8</sup> = 0.00390625.
* <p>
* Given a scoring factor {@code S > 0} and its weight {@code w > 0}, there
- * are three ways that S can be turned into a score:
+ * are four ways that S can be turned into a score:
* <ul>
* <li>{@link #newLogQuery w * log(a + S)}, with a ≥ 1. This function
* usually makes sense because the distribution of scoring factors
@@ -82,6 +82,12 @@
* than the two above but is also harder to tune due to the fact it has
* 2 parameters. Like with {@code satu}, values are in the 0..1 range and
* 0.5 is obtained when S and k are equal.
+ * <li>{@link #newLinearQuery w * S}. Expert: This function doesn't apply
+ * any transformation to an indexed feature value, and the indexed value itself,
+ * multiplied by weight, determines the score. Thus, there is an expectation
+ * that a feature value is encoded in the index in a way that makes
+ * sense for scoring.
+ *
* </ul>
* <p>
* The constants in the above formulas typically need training in order to
@@ -217,6 +223,46 @@
FeatureFunction rewrite(IndexReader reader) throws IOException { return this; }
}
+ static final class LinearFunction extends FeatureFunction {
+ @Override
+ SimScorer scorer(float w) {
+ return new SimScorer() {
+ @Override
+ public float score(float freq, long norm) {
+ return (w * decodeFeatureValue(freq));
+ }
+ };
+ }
+
+ @Override
+ Explanation explain(String field, String feature, float w, int freq) {
+ float featureValue = decodeFeatureValue(freq);
+ float score = scorer(w).score(freq, 1L);
+ return Explanation.match(score,
+ "Linear function on the " + field + " field for the " + feature + " feature, computed as w * S from:",
+ Explanation.match(w, "w, weight of this function"),
+ Explanation.match(featureValue, "S, feature value"));
+ }
+
+ @Override
+ public String toString() {
+ return "LinearFunction";
+ }
+
+ @Override
+ public int hashCode() {
+ return getClass().hashCode();
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (obj == null || getClass() != obj.getClass()) {
+ return false;
+ }
+ return true;
+ }
+ };
+
static final class LogFunction extends FeatureFunction {
private final float scalingFactor;
@@ -406,6 +452,26 @@
*/
private static final float MAX_WEIGHT = Long.SIZE;
+
+ /**
+ * Return a new {@link Query} that will score documents as
+ * {@code weight * S} where S is the value of the static feature.
+ * @param fieldName field that stores features
+ * @param featureName name of the feature
+ * @param weight weight to give to this feature, must be in (0,64]
+ * @throws IllegalArgumentException if weight is not in (0,64]
+ */
+ public static Query newLinearQuery(String fieldName, String featureName, float weight) {
+ if (weight <= 0 || weight > MAX_WEIGHT) {
+ throw new IllegalArgumentException("weight must be in (0, " + MAX_WEIGHT + "], got: " + weight);
+ }
+ Query q = new FeatureQuery(fieldName, featureName, new LinearFunction());
+ if (weight != 1f) {
+ q = new BoostQuery(q, weight);
+ }
+ return q;
+ }
+
/**
* Return a new {@link Query} that will score documents as
* {@code weight * Math.log(scalingFactor + S)} where S is the value of the static feature.
diff --git a/lucene/misc/src/java/org/apache/lucene/document/InetAddressPoint.java b/lucene/core/src/java/org/apache/lucene/document/InetAddressPoint.java
similarity index 100%
rename from lucene/misc/src/java/org/apache/lucene/document/InetAddressPoint.java
rename to lucene/core/src/java/org/apache/lucene/document/InetAddressPoint.java
diff --git a/lucene/misc/src/java/org/apache/lucene/document/InetAddressRange.java b/lucene/core/src/java/org/apache/lucene/document/InetAddressRange.java
similarity index 100%
rename from lucene/misc/src/java/org/apache/lucene/document/InetAddressRange.java
rename to lucene/core/src/java/org/apache/lucene/document/InetAddressRange.java
diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexableFieldType.java b/lucene/core/src/java/org/apache/lucene/index/IndexableFieldType.java
index ce19472..38c1c2a 100644
--- a/lucene/core/src/java/org/apache/lucene/index/IndexableFieldType.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexableFieldType.java
@@ -28,7 +28,7 @@
public interface IndexableFieldType {
/** True if the field's value should be stored */
- public boolean stored();
+ boolean stored();
/**
* True if this field's value should be analyzed by the
@@ -39,7 +39,7 @@
*/
// TODO: shouldn't we remove this? Whether/how a field is
// tokenized is an impl detail under Field?
- public boolean tokenized();
+ boolean tokenized();
/**
* True if this field's indexed form should be also stored
@@ -52,7 +52,7 @@
* This option is illegal if {@link #indexOptions()} returns
* IndexOptions.NONE.
*/
- public boolean storeTermVectors();
+ boolean storeTermVectors();
/**
* True if this field's token character offsets should also
@@ -61,7 +61,7 @@
* This option is illegal if term vectors are not enabled for the field
* ({@link #storeTermVectors()} is false)
*/
- public boolean storeTermVectorOffsets();
+ boolean storeTermVectorOffsets();
/**
* True if this field's token positions should also be stored
@@ -70,7 +70,7 @@
* This option is illegal if term vectors are not enabled for the field
* ({@link #storeTermVectors()} is false).
*/
- public boolean storeTermVectorPositions();
+ boolean storeTermVectorPositions();
/**
* True if this field's token payloads should also be stored
@@ -79,7 +79,7 @@
* This option is illegal if term vector positions are not enabled
* for the field ({@link #storeTermVectors()} is false).
*/
- public boolean storeTermVectorPayloads();
+ boolean storeTermVectorPayloads();
/**
* True if normalization values should be omitted for the field.
@@ -87,42 +87,42 @@
* This saves memory, but at the expense of scoring quality (length normalization
* will be disabled), and if you omit norms, you cannot use index-time boosts.
*/
- public boolean omitNorms();
+ boolean omitNorms();
/** {@link IndexOptions}, describing what should be
* recorded into the inverted index */
- public IndexOptions indexOptions();
+ IndexOptions indexOptions();
/**
* DocValues {@link DocValuesType}: how the field's value will be indexed
* into docValues.
*/
- public DocValuesType docValuesType();
+ DocValuesType docValuesType();
/**
* If this is positive (representing the number of point dimensions), the field is indexed as a point.
*/
- public int pointDimensionCount();
+ int pointDimensionCount();
/**
* The number of dimensions used for the index key
*/
- public int pointIndexDimensionCount();
+ int pointIndexDimensionCount();
/**
* The number of bytes in each dimension's values.
*/
- public int pointNumBytes();
+ int pointNumBytes();
/**
* The number of dimensions of the field's vector value
*/
- public int vectorDimension();
+ int vectorDimension();
/**
* The {@link VectorValues.SearchStrategy} of the field's vector value
*/
- public VectorValues.SearchStrategy vectorSearchStrategy();
+ VectorValues.SearchStrategy vectorSearchStrategy();
/**
* Attributes for the field type.
@@ -132,5 +132,5 @@
*
* @return Map
*/
- public Map<String, String> getAttributes();
+ Map<String, String> getAttributes();
}
diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexingChain.java b/lucene/core/src/java/org/apache/lucene/index/IndexingChain.java
index c20a453..03d5aea 100644
--- a/lucene/core/src/java/org/apache/lucene/index/IndexingChain.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexingChain.java
@@ -584,17 +584,15 @@
if (fp == null) {
fp = getOrAddField(fieldName, fieldType, false);
}
- if (fieldType.stored()) {
- String value = field.stringValue();
- if (value != null && value.length() > IndexWriter.MAX_STORED_STRING_LENGTH) {
- throw new IllegalArgumentException("stored field \"" + field.name() + "\" is too large (" + value.length() + " characters) to store");
- }
- try {
- storedFieldsConsumer.writeField(fp.fieldInfo, field);
- } catch (Throwable th) {
- onAbortingException(th);
- throw th;
- }
+ String value = field.stringValue();
+ if (value != null && value.length() > IndexWriter.MAX_STORED_STRING_LENGTH) {
+ throw new IllegalArgumentException("stored field \"" + field.name() + "\" is too large (" + value.length() + " characters) to store");
+ }
+ try {
+ storedFieldsConsumer.writeField(fp.fieldInfo, field);
+ } catch (Throwable th) {
+ onAbortingException(th);
+ throw th;
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/index/KnnGraphValues.java b/lucene/core/src/java/org/apache/lucene/index/KnnGraphValues.java
new file mode 100644
index 0000000..d3ee0dc
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/index/KnnGraphValues.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.index;
+
+import java.io.IOException;
+
+import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
+
+/**
+ * Access to per-document neighbor lists in a (hierarchical) knn search graph.
+ * @lucene.experimental
+ */
+public abstract class KnnGraphValues {
+
+ /** Sole constructor */
+ protected KnnGraphValues() {}
+
+ /** Move the pointer to exactly {@code target}, the id of a node in the graph.
+ * After this method returns, call {@link #nextNeighbor()} to return successive (ordered) connected node ordinals.
+ * @param target must be a valid node in the graph, ie. ≥ 0 and < {@link VectorValues#size()}.
+ */
+ public abstract void seek(int target) throws IOException;
+
+ /**
+ * Iterates over the neighbor list. It is illegal to call this method after it returns
+ * NO_MORE_DOCS without calling {@link #seek(int)}, which resets the iterator.
+ * @return a node ordinal in the graph, or NO_MORE_DOCS if the iteration is complete.
+ */
+ public abstract int nextNeighbor() throws IOException;
+
+ /** Empty graph value */
+ public static KnnGraphValues EMPTY = new KnnGraphValues() {
+
+ @Override
+ public int nextNeighbor() {
+ return NO_MORE_DOCS;
+ }
+
+ @Override
+ public void seek(int target) {
+ }
+ };
+}
diff --git a/lucene/core/src/java/org/apache/lucene/index/OrdinalMap.java b/lucene/core/src/java/org/apache/lucene/index/OrdinalMap.java
index 1e73f1c..0284edd 100644
--- a/lucene/core/src/java/org/apache/lucene/index/OrdinalMap.java
+++ b/lucene/core/src/java/org/apache/lucene/index/OrdinalMap.java
@@ -195,6 +195,7 @@
// slow anyway
PackedLongValues.Builder globalOrdDeltas = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
PackedLongValues.Builder firstSegments = PackedLongValues.packedBuilder(PackedInts.COMPACT);
+ long firstSegmentBits = 0L;
final PackedLongValues.Builder[] ordDeltas = new PackedLongValues.Builder[subs.length];
for (int i = 0; i < ordDeltas.length; i++) {
ordDeltas[i] = PackedLongValues.monotonicBuilder(acceptableOverheadRatio);
@@ -269,6 +270,7 @@
// for each unique term, just mark the first segment index/delta where it occurs
firstSegments.add(firstSegmentIndex);
+ firstSegmentBits |= firstSegmentIndex;
globalOrdDeltas.add(globalOrdDelta);
globalOrd++;
}
@@ -278,7 +280,7 @@
// If the first segment contains all of the global ords, then we can apply a small optimization
// and hardcode the first segment indices and global ord deltas as all zeroes.
- if (ordDeltaBits.length > 0 && ordDeltaBits[0] == 0L && ordDeltas[0].size() == this.valueCount) {
+ if (ordDeltaBits.length > 0 && ordDeltaBits[0] == 0L && firstSegmentBits == 0L) {
this.firstSegments = LongValues.ZEROES;
this.globalOrdDeltas = LongValues.ZEROES;
} else {
diff --git a/lucene/core/src/java/org/apache/lucene/index/PointValues.java b/lucene/core/src/java/org/apache/lucene/index/PointValues.java
index 578a77f..5ef86af 100644
--- a/lucene/core/src/java/org/apache/lucene/index/PointValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/PointValues.java
@@ -25,6 +25,7 @@
import org.apache.lucene.document.DoublePoint;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FloatPoint;
+import org.apache.lucene.document.InetAddressPoint;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.LatLonPoint;
import org.apache.lucene.document.LongPoint;
@@ -47,8 +48,8 @@
* <tr><td>{@code float}</td><td>{@link FloatPoint}</td></tr>
* <tr><td>{@code double}</td><td>{@link DoublePoint}</td></tr>
* <tr><td>{@code byte[]}</td><td>{@link BinaryPoint}</td></tr>
+ * <tr><td>{@link InetAddress}</td><td>{@link InetAddressPoint}</td></tr>
* <tr><td>{@link BigInteger}</td><td><a href="{@docRoot}/../sandbox/org/apache/lucene/sandbox/document/BigIntegerPoint.html">BigIntegerPoint</a>*</td></tr>
- * <tr><td>{@link InetAddress}</td><td><a href="{@docRoot}/../misc/org/apache/lucene/document/InetAddressPoint.html">InetAddressPoint</a>*</td></tr>
* </table>
* * in the <i>lucene-sandbox</i> jar<br>
* <p>
diff --git a/lucene/core/src/java/org/apache/lucene/index/RandomAccessVectorValues.java b/lucene/core/src/java/org/apache/lucene/index/RandomAccessVectorValues.java
new file mode 100644
index 0000000..f19ea33
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/index/RandomAccessVectorValues.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.index;
+
+import java.io.IOException;
+
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * Provides random access to vectors by dense ordinal.
+ *
+ * @lucene.experimental
+ */
+public interface RandomAccessVectorValues {
+
+ /**
+ * Return the number of vector values
+ */
+ int size();
+
+ /**
+ * Return the dimension of the returned vector values
+ */
+ int dimension();
+
+ /**
+ * Return the search strategy used to compare these vectors
+ */
+ VectorValues.SearchStrategy searchStrategy();
+
+ /**
+ * Return the vector value indexed at the given ordinal. The provided floating point array may
+ * be shared and overwritten by subsequent calls to this method and {@link #binaryValue(int)}.
+ * @param targetOrd a valid ordinal, ≥ 0 and < {@link #size()}.
+ */
+ float[] vectorValue(int targetOrd) throws IOException;
+
+ /**
+ * Return the vector indexed at the given ordinal value as an array of bytes in a BytesRef;
+ * these are the bytes corresponding to the float array. The provided bytes may be shared and overwritten
+ * by subsequent calls to this method and {@link #vectorValue(int)}.
+ * @param targetOrd a valid ordinal, ≥ 0 and < {@link #size()}.
+ */
+ BytesRef binaryValue(int targetOrd) throws IOException;
+}
diff --git a/lucene/core/src/java/org/apache/lucene/index/RandomAccessVectorValuesProducer.java b/lucene/core/src/java/org/apache/lucene/index/RandomAccessVectorValuesProducer.java
new file mode 100644
index 0000000..beabace
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/index/RandomAccessVectorValuesProducer.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.index;
+
+/**
+ * Something (generally a {@link VectorValues}) that provides a {@link RandomAccessVectorValues}.
+ *
+ * @lucene.experimental
+ */
+public interface RandomAccessVectorValuesProducer {
+ /**
+ * Return a random access interface over this iterator's vectors. Calling the RandomAccess methods will
+ * have no effect on the progress of the iteration or the values returned by this iterator. Successive calls
+ * will retrieve independent copies that do not overwrite each others' returned values.
+ */
+ RandomAccessVectorValues randomAccess();
+}
diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java b/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java
index ed04ae0..aa61ea1 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java
@@ -224,7 +224,7 @@
int numMerged = merger.merge();
if (mergeState.infoStream.isEnabled("SM")) {
long t1 = System.nanoTime();
- mergeState.infoStream.message("SM", ((t1 - t0) / 1000000) + " msec to merge vector values [" + numMerged + " docs]");
+ mergeState.infoStream.message("SM", ((t1 - t0) / 1000000) + " msec to merge " + formatName + " [" + numMerged + " docs]");
}
return numMerged;
}
@@ -237,8 +237,7 @@
merger.merge();
if (mergeState.infoStream.isEnabled("SM")) {
long t1 = System.nanoTime();
- mergeState.infoStream.message("SM", ((t1 - t0) / 1000000) + " msec to merge vector values [" + numMerged + " docs]");
+ mergeState.infoStream.message("SM", ((t1 - t0) / 1000000) + " msec to merge " + formatName + " [" + numMerged + " docs]");
}
}
-
}
diff --git a/lucene/core/src/java/org/apache/lucene/index/SlowCodecReaderWrapper.java b/lucene/core/src/java/org/apache/lucene/index/SlowCodecReaderWrapper.java
index b2ce9aa..2cb8ee3 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SlowCodecReaderWrapper.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SlowCodecReaderWrapper.java
@@ -190,7 +190,7 @@
}
};
}
-
+
private static NormsProducer readerToNormsProducer(final LeafReader reader) {
return new NormsProducer() {
diff --git a/lucene/core/src/java/org/apache/lucene/index/VectorValues.java b/lucene/core/src/java/org/apache/lucene/index/VectorValues.java
index c6534f0..7ede15b 100644
--- a/lucene/core/src/java/org/apache/lucene/index/VectorValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/VectorValues.java
@@ -23,6 +23,9 @@
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.BytesRef;
+import static org.apache.lucene.util.VectorUtil.dotProduct;
+import static org.apache.lucene.util.VectorUtil.squareDistance;
+
/**
* This class provides access to per-document floating point vector values indexed as {@link
* org.apache.lucene.document.VectorField}.
@@ -75,76 +78,75 @@
}
/**
- * Return a random access interface over this iterator's vectors. Calling the RandomAccess methods will
- * have no effect on the progress of the iteration or the values returned by this iterator. Successive calls
- * will retrieve independent copies that do not overwrite each others' returned values.
+ * Return the k nearest neighbor documents as determined by comparison of their vector values
+ * for this field, to the given vector, by the field's search strategy. If the search strategy is
+ * reversed, lower values indicate nearer vectors, otherwise higher scores indicate nearer
+ * vectors. Unlike relevance scores, vector scores may be negative.
+ * @param target the vector-valued query
+ * @param k the number of docs to return
+ * @param fanout control the accuracy/speed tradeoff - larger values give better recall at higher cost
+ * @return the k nearest neighbor documents, along with their (searchStrategy-specific) scores.
*/
- public abstract RandomAccess randomAccess();
-
- /**
- * Provides random access to vectors by dense ordinal.
- *
- * @lucene.experimental
- */
- public interface RandomAccess {
-
- /**
- * Return the number of vector values
- */
- int size();
-
- /**
- * Return the dimension of the returned vector values
- */
- int dimension();
-
- /**
- * Return the search strategy used to compare these vectors
- */
- SearchStrategy searchStrategy();
-
- /**
- * Return the vector value indexed at the given ordinal. The provided floating point array may
- * be shared and overwritten by subsequent calls to this method and {@link #binaryValue(int)}.
- * @param targetOrd a valid ordinal, ≥ 0 and < {@link #size()}.
- */
- float[] vectorValue(int targetOrd) throws IOException;
-
- /**
- * Return the vector indexed at the given ordinal value as an array of bytes in a BytesRef;
- * these are the bytes corresponding to the float array. The provided bytes may be shared and overwritten
- * by subsequent calls to this method and {@link #vectorValue(int)}.
- * @param targetOrd a valid ordinal, ≥ 0 and < {@link #size()}.
- */
- BytesRef binaryValue(int targetOrd) throws IOException;
-
- /**
- * Return the k nearest neighbor documents as determined by comparison of their vector values
- * for this field, to the given vector, by the field's search strategy. If the search strategy is
- * reversed, lower values indicate nearer vectors, otherwise higher scores indicate nearer
- * vectors. Unlike relevance scores, vector scores may be negative.
- * @param target the vector-valued query
- * @param k the number of docs to return
- * @param fanout control the accuracy/speed tradeoff - larger values give better recall at higher cost
- * @return the k nearest neighbor documents, along with their (searchStrategy-specific) scores.
- */
- TopDocs search(float[] target, int k, int fanout) throws IOException;
- }
+ public abstract TopDocs search(float[] target, int k, int fanout) throws IOException;
/**
* Search strategy. This is a label describing the method used during indexing and searching of the vectors in order to
* determine the nearest neighbors.
*/
public enum SearchStrategy {
- /** No search strategy is provided. Note: {@link VectorValues.RandomAccess#search(float[], int, int)}
+
+ /** No search strategy is provided. Note: {@link VectorValues#search(float[], int, int)}
* is not supported for fields specifying this strategy. */
NONE,
/** HNSW graph built using Euclidean distance */
- EUCLIDEAN_HNSW,
+ EUCLIDEAN_HNSW(true),
/** HNSW graph buit using dot product */
- DOT_PRODUCT_HNSW
+ DOT_PRODUCT_HNSW;
+
+ /** If true, the scores associated with vector comparisons in this strategy are in reverse order; that is,
+ * lower scores represent more similar vectors. Otherwise, if false, higher scores represent more similar vectors.
+ */
+ public final boolean reversed;
+
+ SearchStrategy(boolean reversed) {
+ this.reversed = reversed;
+ }
+
+ SearchStrategy() {
+ reversed = false;
+ }
+
+ /**
+ * Calculates a similarity score between the two vectors with a specified function.
+ * @param v1 a vector
+ * @param v2 another vector, of the same dimension
+ * @return the value of the strategy's score function applied to the two vectors
+ */
+ public float compare(float[] v1, float[] v2) {
+ switch (this) {
+ case EUCLIDEAN_HNSW:
+ return squareDistance(v1, v2);
+ case DOT_PRODUCT_HNSW:
+ return dotProduct(v1, v2);
+ default:
+ throw new IllegalStateException("Incomparable search strategy: " + this);
+ }
+ }
+
+ /**
+ * Return true if vectors indexed using this strategy will be indexed using an HNSW graph
+ */
+ public boolean isHnsw() {
+ switch (this) {
+ case EUCLIDEAN_HNSW:
+ case DOT_PRODUCT_HNSW:
+ return true;
+ default:
+ return false;
+ }
+ }
}
/**
@@ -174,7 +176,7 @@
}
@Override
- public RandomAccess randomAccess() {
+ public TopDocs search(float[] target, int k, int fanout) {
throw new UnsupportedOperationException();
}
diff --git a/lucene/core/src/java/org/apache/lucene/index/VectorValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/VectorValuesWriter.java
index ae39b3a..aa78d07 100644
--- a/lucene/core/src/java/org/apache/lucene/index/VectorValuesWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/VectorValuesWriter.java
@@ -98,17 +98,17 @@
}
}
- static class SortingVectorValues extends VectorValues {
+ static class SortingVectorValues extends VectorValues implements RandomAccessVectorValuesProducer {
private final VectorValues delegate;
- private final VectorValues.RandomAccess randomAccess;
+ private final RandomAccessVectorValues randomAccess;
private final int[] docIdOffsets;
private final int[] ordMap;
private int docId = -1;
SortingVectorValues(VectorValues delegate, Sorter.DocMap sortMap) throws IOException {
this.delegate = delegate;
- randomAccess = delegate.randomAccess();
+ randomAccess = ((RandomAccessVectorValuesProducer) delegate).randomAccess();
docIdOffsets = new int[sortMap.size()];
int offset = 1; // 0 means no vector for this (field, document)
@@ -152,8 +152,8 @@
}
@Override
- public float[] vectorValue() {
- throw new UnsupportedOperationException();
+ public float[] vectorValue() throws IOException {
+ return randomAccess.vectorValue(docIdOffsets[docId] - 1);
}
@Override
@@ -177,49 +177,52 @@
}
@Override
+ public TopDocs search(float[] target, int k, int fanout) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
public long cost() {
return size();
}
@Override
- public RandomAccess randomAccess() {
- RandomAccess ra = delegate.randomAccess();
- return new RandomAccess() {
+ public RandomAccessVectorValues randomAccess() {
+
+ // Must make a new delegate randomAccess so that we have our own distinct float[]
+ final RandomAccessVectorValues delegateRA = ((RandomAccessVectorValuesProducer) SortingVectorValues.this.delegate).randomAccess();
+
+ return new RandomAccessVectorValues() {
@Override
public int size() {
- return delegate.size();
+ return delegateRA.size();
}
@Override
public int dimension() {
- return delegate.dimension();
+ return delegateRA.dimension();
}
@Override
public SearchStrategy searchStrategy() {
- return delegate.searchStrategy();
+ return delegateRA.searchStrategy();
}
@Override
public float[] vectorValue(int targetOrd) throws IOException {
- return ra.vectorValue(ordMap[targetOrd]);
+ return delegateRA.vectorValue(ordMap[targetOrd]);
}
@Override
public BytesRef binaryValue(int targetOrd) {
throw new UnsupportedOperationException();
}
-
- @Override
- public TopDocs search(float[] target, int k, int fanout) {
- throw new UnsupportedOperationException();
- }
};
}
}
- private static class BufferedVectorValues extends VectorValues implements VectorValues.RandomAccess {
+ private static class BufferedVectorValues extends VectorValues implements RandomAccessVectorValues, RandomAccessVectorValuesProducer {
final DocsWithFieldSet docsWithField;
@@ -249,8 +252,8 @@
}
@Override
- public RandomAccess randomAccess() {
- return this;
+ public RandomAccessVectorValues randomAccess() {
+ return new BufferedVectorValues(docsWithField, vectors, dimension, searchStrategy);
}
@Override
diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/BoundsChecker.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/BoundsChecker.java
new file mode 100644
index 0000000..e02cc40
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/BoundsChecker.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.util.hnsw;
+
+abstract class BoundsChecker {
+
+ float bound;
+
+ /**
+ * Update the bound if sample is better
+ */
+ abstract void update(float sample);
+
+ /**
+ * Return whether the sample exceeds (is worse than) the bound
+ */
+ abstract boolean check(float sample);
+
+ static BoundsChecker create(boolean reversed) {
+ if (reversed) {
+ return new Min();
+ } else {
+ return new Max();
+ }
+ }
+
+ static class Max extends BoundsChecker {
+ Max() {
+ bound = Float.NEGATIVE_INFINITY;
+ }
+
+ void update(float sample) {
+ if (sample > bound) {
+ bound = sample;
+ }
+ }
+
+ boolean check(float sample) {
+ return sample < bound;
+ }
+ }
+
+ static class Min extends BoundsChecker {
+
+ Min() {
+ bound = Float.POSITIVE_INFINITY;
+ }
+
+ void update(float sample) {
+ if (sample < bound) {
+ bound = sample;
+ }
+ }
+
+ boolean check(float sample) {
+ return sample > bound;
+ }
+ }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraph.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraph.java
new file mode 100644
index 0000000..ed7be7d
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraph.java
@@ -0,0 +1,223 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.util.hnsw;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Random;
+import java.util.Set;
+import java.util.TreeSet;
+
+import org.apache.lucene.index.KnnGraphValues;
+import org.apache.lucene.index.RandomAccessVectorValues;
+import org.apache.lucene.index.VectorValues;
+
+import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
+
+/**
+ * Navigable Small-world graph. Provides efficient approximate nearest neighbor
+ * search for high dimensional vectors. See <a href="https://doi.org/10.1016/j.is.2013.10.006">Approximate nearest
+ * neighbor algorithm based on navigable small world graphs [2014]</a> and <a
+ * href="https://arxiv.org/abs/1603.09320">this paper [2018]</a> for details.
+ *
+ * The nomenclature is a bit different here from what's used in those papers:
+ *
+ * <h3>Hyperparameters</h3>
+ * <ul>
+ * <li><code>numSeed</code> is the equivalent of <code>m</code> in the 2012 paper; it controls the number of random entry points to sample.</li>
+ * <li><code>beamWidth</code> in {@link HnswGraphBuilder} has the same meaning as <code>efConst</code> in the 2016 paper. It is the number of
+ * nearest neighbor candidates to track while searching the graph for each newly inserted node.</li>
+ * <li><code>maxConn</code> has the same meaning as <code>M</code> in the later paper; it controls how many of the <code>efConst</code> neighbors are
+ * connected to the new node</li>
+ * <li><code>fanout</code> the fanout parameter of {@link VectorValues#search(float[], int, int)}
+ * is used to control the values of <code>numSeed</code> and <code>topK</code> that are passed to this API.
+ * Thus <code>fanout</code> is like a combination of <code>ef</code> (search beam width) from the 2016 paper and <code>m</code> from the 2014 paper.
+ * </li>
+ * </ul>
+ *
+ * <p>Note: The graph may be searched by multiple threads concurrently, but updates are not thread-safe. Also note: there is no notion of
+ * deletions. Document searching built on top of this must do its own deletion-filtering.</p>
+ */
+public final class HnswGraph {
+
+ private final int maxConn;
+ private final VectorValues.SearchStrategy searchStrategy;
+
+ // Each entry lists the top maxConn neighbors of a node. The nodes correspond to vectors added to HnswBuilder, and the
+ // node values are the ordinals of those vectors.
+ private final List<Neighbors> graph;
+
+ HnswGraph(int maxConn, VectorValues.SearchStrategy searchStrategy) {
+ graph = new ArrayList<>();
+ graph.add(Neighbors.create(maxConn, searchStrategy.reversed));
+ this.maxConn = maxConn;
+ this.searchStrategy = searchStrategy;
+ }
+
+ /**
+ * Searches for the nearest neighbors of a query vector.
+ * @param query search query vector
+ * @param topK the number of nodes to be returned
+ * @param numSeed the number of random entry points to sample
+ * @param vectors vector values
+ * @param graphValues the graph values. May represent the entire graph, or a level in a hierarchical graph.
+ * @param random a source of randomness, used for generating entry points to the graph
+ * @return a priority queue holding the neighbors found
+ */
+ public static Neighbors search(float[] query, int topK, int numSeed, RandomAccessVectorValues vectors, KnnGraphValues graphValues,
+ Random random) throws IOException {
+ VectorValues.SearchStrategy searchStrategy = vectors.searchStrategy();
+ // TODO: use unbounded priority queue
+ TreeSet<Neighbor> candidates;
+ if (searchStrategy.reversed) {
+ candidates = new TreeSet<>(Comparator.reverseOrder());
+ } else {
+ candidates = new TreeSet<>();
+ }
+ int size = vectors.size();
+ for (int i = 0; i < numSeed && i < size; i++) {
+ int entryPoint = random.nextInt(size);
+ candidates.add(new Neighbor(entryPoint, searchStrategy.compare(query, vectors.vectorValue(entryPoint))));
+ }
+ // set of ordinals that have been visited by search on this layer, used to avoid backtracking
+ Set<Integer> visited = new HashSet<>();
+ // TODO: use PriorityQueue's sentinel optimization?
+ Neighbors results = Neighbors.create(topK, searchStrategy.reversed);
+ for (Neighbor c : candidates) {
+ visited.add(c.node());
+ results.insertWithOverflow(c);
+ }
+ // Set the bound to the worst current result and below reject any newly-generated candidates failing
+ // to exceed this bound
+ BoundsChecker bound = BoundsChecker.create(searchStrategy.reversed);
+ bound.bound = results.top().score();
+ while (candidates.size() > 0) {
+ // get the best candidate (closest or best scoring)
+ Neighbor c = candidates.pollLast();
+ if (results.size() >= topK) {
+ if (bound.check(c.score())) {
+ break;
+ }
+ }
+ graphValues.seek(c.node());
+ int friendOrd;
+ while ((friendOrd = graphValues.nextNeighbor()) != NO_MORE_DOCS) {
+ if (visited.contains(friendOrd)) {
+ continue;
+ }
+ visited.add(friendOrd);
+ float score = searchStrategy.compare(query, vectors.vectorValue(friendOrd));
+ if (results.size() < topK || bound.check(score) == false) {
+ Neighbor n = new Neighbor(friendOrd, score);
+ candidates.add(n);
+ results.insertWithOverflow(n);
+ bound.bound = results.top().score();
+ }
+ }
+ }
+ results.setVisitedCount(visited.size());
+ return results;
+ }
+
+ /**
+ * Returns the nodes connected to the given node by its outgoing neighborNodes in an unpredictable order. Each node inserted
+ * by HnswGraphBuilder corresponds to a vector, and the node is the vector's ordinal.
+ * @param node the node whose friends are returned
+ */
+ public int[] getNeighbors(int node) {
+ Neighbors neighbors = graph.get(node);
+ int[] result = new int[neighbors.size()];
+ int i = 0;
+ for (Neighbor n : neighbors) {
+ result[i++] = n.node();
+ }
+ return result;
+ }
+
+ /** Connects two nodes symmetrically, limiting the maximum number of connections from either node.
+ * node1 must be less than node2 and must already have been inserted to the graph */
+ void connectNodes(int node1, int node2, float score) {
+ connect(node1, node2, score);
+ if (node2 == graph.size()) {
+ addNode();
+ }
+ connect(node2, node1, score);
+ }
+
+ KnnGraphValues getGraphValues() {
+ return new HnswGraphValues();
+ }
+
+ /**
+ * Makes a connection from the node to a neighbor, dropping the worst connection when maxConn is exceeded
+ * @param node1 node to connect *from*
+ * @param node2 node to connect *to*
+ * @param score searchStrategy.score() of the vectors associated with the two nodes
+ */
+ boolean connect(int node1, int node2, float score) {
+ //System.out.println(" HnswGraph.connect " + node1 + " -> " + node2);
+ assert node1 >= 0 && node2 >= 0;
+ Neighbors nn = graph.get(node1);
+ assert nn != null;
+ if (nn.size() == maxConn) {
+ Neighbor top = nn.top();
+ if (score < top.score() == nn.reversed()) {
+ top.update(node2, score);
+ nn.updateTop();
+ return true;
+ }
+ } else {
+ nn.add(new Neighbor(node2, score));
+ return true;
+ }
+ return false;
+ }
+
+ int addNode() {
+ graph.add(Neighbors.create(maxConn, searchStrategy.reversed));
+ return graph.size() - 1;
+ }
+
+ /**
+ * Present this graph as KnnGraphValues, used for searching while inserting new nodes.
+ */
+ private class HnswGraphValues extends KnnGraphValues {
+
+ private int arcUpTo;
+ private int[] neighborNodes;
+
+ @Override
+ public void seek(int targetNode) {
+ arcUpTo = 0;
+ neighborNodes = HnswGraph.this.getNeighbors(targetNode);
+ }
+
+ @Override
+ public int nextNeighbor() {
+ if (arcUpTo >= neighborNodes.length) {
+ return NO_MORE_DOCS;
+ }
+ return neighborNodes[arcUpTo++];
+ }
+
+ }
+
+}
diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphBuilder.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphBuilder.java
new file mode 100644
index 0000000..d116179
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphBuilder.java
@@ -0,0 +1,194 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.util.hnsw;
+
+import java.io.IOException;
+import java.util.Random;
+
+import org.apache.lucene.index.KnnGraphValues;
+import org.apache.lucene.index.RandomAccessVectorValues;
+import org.apache.lucene.index.RandomAccessVectorValuesProducer;
+import org.apache.lucene.index.VectorValues;
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * Builder for HNSW graph. See {@link HnswGraph} for a gloss on the algorithm and the meaning of the hyperparameters.
+ */
+public final class HnswGraphBuilder {
+
+ // default random seed for level generation
+ private static final long DEFAULT_RAND_SEED = System.currentTimeMillis();
+
+ // expose for testing.
+ public static long randSeed = DEFAULT_RAND_SEED;
+
+ // These "default" hyperparameter settings are exposed (and nonfinal) to enable performance testing
+ // since the indexing API doesn't provide any control over them.
+
+ // default max connections per node
+ public static int DEFAULT_MAX_CONN = 16;
+
+ // default candidate list size
+ static int DEFAULT_BEAM_WIDTH = 16;
+
+ private final int maxConn;
+ private final int beamWidth;
+
+ private final BoundedVectorValues boundedVectors;
+ private final VectorValues.SearchStrategy searchStrategy;
+ private final HnswGraph hnsw;
+ private final Random random;
+
+ /**
+ * Reads all the vectors from a VectorValues, builds a graph connecting them by their dense ordinals, using default
+ * hyperparameter settings, and returns the resulting graph.
+ * @param vectorValues the vectors whose relations are represented by the graph
+ */
+ public static HnswGraph build(RandomAccessVectorValuesProducer vectorValues) throws IOException {
+ HnswGraphBuilder builder = new HnswGraphBuilder(vectorValues);
+ return builder.build(vectorValues.randomAccess());
+ }
+
+ /**
+ * Reads all the vectors from a VectorValues, builds a graph connecting them by their dense ordinals, using the given
+ * hyperparameter settings, and returns the resulting graph.
+ * @param vectorValues the vectors whose relations are represented by the graph
+ * @param maxConn the number of connections to make when adding a new graph node; roughly speaking the graph fanout.
+ * @param beamWidth the size of the beam search to use when finding nearest neighbors.
+ * @param seed the seed for a random number generator used during graph construction. Provide this to ensure repeatable construction.
+ */
+ public static HnswGraph build(RandomAccessVectorValuesProducer vectorValues, int maxConn, int beamWidth, long seed) throws IOException {
+ HnswGraphBuilder builder = new HnswGraphBuilder(vectorValues, maxConn, beamWidth, seed);
+ return builder.build(vectorValues.randomAccess());
+ }
+
+ /**
+ * Reads all the vectors from two copies of a random access VectorValues. Providing two copies enables efficient retrieval
+ * without extra data copying, while avoiding collision of the returned values.
+ * @param vectors the vectors for which to build a nearest neighbors graph. Must be an independet accessor for the vectors
+ */
+ HnswGraph build(RandomAccessVectorValues vectors) throws IOException {
+ if (vectors == boundedVectors.raDelegate) {
+ throw new IllegalArgumentException("Vectors to build must be independent of the source of vectors provided to HnswGraphBuilder()");
+ }
+ for (int node = 1; node < vectors.size(); node++) {
+ insert(vectors.vectorValue(node));
+ }
+ return hnsw;
+ }
+
+ /** Construct the builder with default configurations */
+ private HnswGraphBuilder(RandomAccessVectorValuesProducer vectors) {
+ this(vectors, DEFAULT_MAX_CONN, DEFAULT_BEAM_WIDTH, randSeed);
+ }
+
+ /** Full constructor */
+ HnswGraphBuilder(RandomAccessVectorValuesProducer vectors, int maxConn, int beamWidth, long seed) {
+ RandomAccessVectorValues vectorValues = vectors.randomAccess();
+ searchStrategy = vectorValues.searchStrategy();
+ if (searchStrategy == VectorValues.SearchStrategy.NONE) {
+ throw new IllegalStateException("No distance function");
+ }
+ if (maxConn <= 0) {
+ throw new IllegalArgumentException("maxConn must be positive");
+ }
+ if (beamWidth <= 0) {
+ throw new IllegalArgumentException("beamWidth must be positive");
+ }
+ this.maxConn = maxConn;
+ this.beamWidth = beamWidth;
+ boundedVectors = new BoundedVectorValues(vectorValues);
+ this.hnsw = new HnswGraph(maxConn, searchStrategy);
+ random = new Random(seed);
+ }
+
+ /** Inserts a doc with vector value to the graph */
+ private void insert(float[] value) throws IOException {
+ addGraphNode(value);
+
+ // add the vector value
+ boundedVectors.inc();
+ }
+
+ private void addGraphNode(float[] value) throws IOException {
+ KnnGraphValues graphValues = hnsw.getGraphValues();
+ Neighbors candidates = HnswGraph.search(value, beamWidth, 2 * beamWidth, boundedVectors, graphValues, random);
+
+ int node = hnsw.addNode();
+
+ // connect the nearest neighbors to the just inserted node
+ addNearestNeighbors(node, candidates);
+ }
+
+ private void addNearestNeighbors(int newNode, Neighbors neighbors) {
+ // connect the nearest neighbors, relying on the graph's Neighbors' priority queues to drop off distant neighbors
+ for (Neighbor neighbor : neighbors) {
+ if (hnsw.connect(newNode, neighbor.node(), neighbor.score())) {
+ hnsw.connect(neighbor.node(), newNode, neighbor.score());
+ }
+ }
+ }
+
+ /**
+ * Provides a random access VectorValues view over a delegate VectorValues, bounding the maximum ord.
+ * TODO: get rid of this, all it does is track a counter
+ */
+ private static class BoundedVectorValues implements RandomAccessVectorValues {
+
+ final RandomAccessVectorValues raDelegate;
+
+ int size;
+
+ BoundedVectorValues(RandomAccessVectorValues delegate) {
+ raDelegate = delegate;
+ if (delegate.size() > 0) {
+ // we implicitly add the first node
+ size = 1;
+ }
+ }
+
+ void inc() {
+ ++size;
+ }
+
+ @Override
+ public int size() {
+ return size;
+ }
+
+ @Override
+ public int dimension() { return raDelegate.dimension(); }
+
+ @Override
+ public VectorValues.SearchStrategy searchStrategy() {
+ return raDelegate.searchStrategy();
+ }
+
+ @Override
+ public float[] vectorValue(int target) throws IOException {
+ return raDelegate.vectorValue(target);
+ }
+
+ @Override
+ public BytesRef binaryValue(int targetOrd) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+ }
+
+
+}
diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/Neighbor.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/Neighbor.java
new file mode 100644
index 0000000..01cf231
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/Neighbor.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.util.hnsw;
+
+/** A neighbor node in the HNSW graph; holds the node ordinal and its distance score. */
+public class Neighbor implements Comparable<Neighbor> {
+
+ private int node;
+
+ private float score;
+
+ public Neighbor(int node, float score) {
+ this.node = node;
+ this.score = score;
+ }
+
+ public int node() {
+ return node;
+ }
+
+ public float score() {
+ return score;
+ }
+
+ void update(int node, float score) {
+ this.node = node;
+ this.score = score;
+ }
+
+ @Override
+ public int compareTo(Neighbor o) {
+ if (score == o.score) {
+ return o.node - node;
+ } else {
+ assert node != o.node : "attempt to add the same node " + node + " twice with different scores: " + score + " != " + o.score;
+ return Float.compare(score, o.score);
+ }
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ return other instanceof Neighbor
+ && ((Neighbor) other).node == node;
+ }
+
+ @Override
+ public int hashCode() {
+ return 39 + 61 * node;
+ }
+
+ @Override
+ public String toString() {
+ return "(" + node + ", " + score + ")";
+ }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/Neighbors.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/Neighbors.java
new file mode 100644
index 0000000..6ca761b
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/Neighbors.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.util.hnsw;
+
+import org.apache.lucene.util.PriorityQueue;
+
+/** Neighbors queue. */
+public abstract class Neighbors extends PriorityQueue<Neighbor> {
+
+ public static Neighbors create(int maxSize, boolean reversed) {
+ if (reversed) {
+ return new ReverseNeighbors(maxSize);
+ } else {
+ return new ForwardNeighbors(maxSize);
+ }
+ }
+
+ public abstract boolean reversed();
+
+ // Used to track the number of neighbors visited during a single graph traversal
+ private int visitedCount;
+
+ private Neighbors(int maxSize) {
+ super(maxSize);
+ }
+
+ private static class ForwardNeighbors extends Neighbors {
+ ForwardNeighbors(int maxSize) {
+ super(maxSize);
+ }
+
+ @Override
+ protected boolean lessThan(Neighbor a, Neighbor b) {
+ if (a.score() == b.score()) {
+ return a.node() > b.node();
+ }
+ return a.score() < b.score();
+ }
+
+ @Override
+ public boolean reversed() { return false; }
+ }
+
+ private static class ReverseNeighbors extends Neighbors {
+ ReverseNeighbors(int maxSize) {
+ super(maxSize);
+ }
+
+ @Override
+ protected boolean lessThan(Neighbor a, Neighbor b) {
+ if (a.score() == b.score()) {
+ return a.node() > b.node();
+ }
+ return b.score() < a.score();
+ }
+
+ @Override
+ public boolean reversed() { return true; }
+ }
+
+ void setVisitedCount(int visitedCount) {
+ this.visitedCount = visitedCount;
+ }
+
+ public int visitedCount() {
+ return visitedCount;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("Neighbors=[");
+ this.iterator().forEachRemaining(sb::append);
+ sb.append("]");
+ return sb.toString();
+ }
+
+}
diff --git a/lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/package-info.java
similarity index 78%
copy from lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java
copy to lucene/core/src/java/org/apache/lucene/util/hnsw/package-info.java
index af94ced..ba95e66 100644
--- a/lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/package-info.java
@@ -15,12 +15,8 @@
* limitations under the License.
*/
-package org.apache.lucene.util;
-
/**
- * Tracks dynamic allocations/deallocations of memory for transient objects
+ * Navigable Small-World graph, nominally Hierarchical but currently only has a single
+ * layer. Provides efficient approximate nearest neighbor search for high dimensional vectors.
*/
-public interface MemoryTracker {
- void updateBytes(long bytes);
- long getBytes();
-}
+package org.apache.lucene.util.hnsw;
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene80/TestLucene80DocValuesFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene80/BaseLucene80DocValuesFormatTestCase.java
similarity index 98%
rename from lucene/core/src/test/org/apache/lucene/codecs/lucene80/TestLucene80DocValuesFormat.java
rename to lucene/core/src/test/org/apache/lucene/codecs/lucene80/BaseLucene80DocValuesFormatTestCase.java
index aa238da..35c07bf 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene80/TestLucene80DocValuesFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene80/BaseLucene80DocValuesFormatTestCase.java
@@ -28,7 +28,6 @@
import java.util.function.Supplier;
import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.asserting.AssertingCodec;
@@ -70,15 +69,8 @@
/**
* Tests Lucene80DocValuesFormat
- * Copied directly from the lucene70 package for separation of codec-code
*/
-public class TestLucene80DocValuesFormat extends BaseCompressingDocValuesFormatTestCase {
- private final Codec codec = TestUtil.alwaysDocValuesFormat(new Lucene80DocValuesFormat());
-
- @Override
- protected Codec getCodec() {
- return codec;
- }
+public abstract class BaseLucene80DocValuesFormatTestCase extends BaseCompressingDocValuesFormatTestCase {
// TODO: these big methods can easily blow up some of the other ram-hungry codecs...
// for now just keep them here, as we want to test this for this format.
@@ -286,7 +278,7 @@
conf.setMergeScheduler(new SerialMergeScheduler());
// set to duel against a codec which has ordinals:
final PostingsFormat pf = TestUtil.getPostingsFormatWithOrds(random());
- final DocValuesFormat dv = new Lucene80DocValuesFormat();
+ final DocValuesFormat dv = getCodec().docValuesFormat();
conf.setCodec(new AssertingCodec() {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
diff --git a/lucene/misc/src/test/org/apache/lucene/store/TestRAFDirectory.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene80/TestBestCompressionLucene80DocValuesFormat.java
similarity index 64%
copy from lucene/misc/src/test/org/apache/lucene/store/TestRAFDirectory.java
copy to lucene/core/src/test/org/apache/lucene/codecs/lucene80/TestBestCompressionLucene80DocValuesFormat.java
index 598c123..2b1fa58 100644
--- a/lucene/misc/src/test/org/apache/lucene/store/TestRAFDirectory.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene80/TestBestCompressionLucene80DocValuesFormat.java
@@ -14,19 +14,20 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.store;
+package org.apache.lucene.codecs.lucene80;
-import java.io.IOException;
-import java.nio.file.Path;
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.util.TestUtil;
/**
- * Tests RAFDirectory
+ * Tests Lucene80DocValuesFormat
*/
-// See: https://issues.apache.org/jira/browse/SOLR-12028 Tests cannot remove files on Windows machines occasionally
-public class TestRAFDirectory extends BaseDirectoryTestCase {
+public class TestBestCompressionLucene80DocValuesFormat extends BaseLucene80DocValuesFormatTestCase {
+ private final Codec codec = TestUtil.alwaysDocValuesFormat(new Lucene80DocValuesFormat(Lucene80DocValuesFormat.Mode.BEST_COMPRESSION));
@Override
- protected Directory getDirectory(Path path) throws IOException {
- return new RAFDirectory(path);
+ protected Codec getCodec() {
+ return codec;
}
+
}
diff --git a/lucene/misc/src/test/org/apache/lucene/store/TestRAFDirectory.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene80/TestBestSpeedLucene80DocValuesFormat.java
similarity index 65%
copy from lucene/misc/src/test/org/apache/lucene/store/TestRAFDirectory.java
copy to lucene/core/src/test/org/apache/lucene/codecs/lucene80/TestBestSpeedLucene80DocValuesFormat.java
index 598c123..7d34105 100644
--- a/lucene/misc/src/test/org/apache/lucene/store/TestRAFDirectory.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene80/TestBestSpeedLucene80DocValuesFormat.java
@@ -14,19 +14,20 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.store;
+package org.apache.lucene.codecs.lucene80;
-import java.io.IOException;
-import java.nio.file.Path;
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.util.TestUtil;
/**
- * Tests RAFDirectory
+ * Tests Lucene80DocValuesFormat
*/
-// See: https://issues.apache.org/jira/browse/SOLR-12028 Tests cannot remove files on Windows machines occasionally
-public class TestRAFDirectory extends BaseDirectoryTestCase {
+public class TestBestSpeedLucene80DocValuesFormat extends BaseLucene80DocValuesFormatTestCase {
+ private final Codec codec = TestUtil.alwaysDocValuesFormat(new Lucene80DocValuesFormat(Lucene80DocValuesFormat.Mode.BEST_SPEED));
@Override
- protected Directory getDirectory(Path path) throws IOException {
- return new RAFDirectory(path);
+ protected Codec getCodec() {
+ return codec;
}
+
}
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene87/TestLucene87StoredFieldsFormatHighCompression.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene87/TestLucene87StoredFieldsFormatHighCompression.java
index f5dbf40..12dc939 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene87/TestLucene87StoredFieldsFormatHighCompression.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene87/TestLucene87StoredFieldsFormatHighCompression.java
@@ -18,8 +18,8 @@
import org.apache.lucene.codecs.Codec;
-import org.apache.lucene.codecs.lucene87.Lucene87StoredFieldsFormat.Mode;
import org.apache.lucene.codecs.lucene90.Lucene90Codec;
+import org.apache.lucene.codecs.lucene90.Lucene90Codec.Mode;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.BaseStoredFieldsFormatTestCase;
diff --git a/lucene/core/src/test/org/apache/lucene/document/TestFeatureField.java b/lucene/core/src/test/org/apache/lucene/document/TestFeatureField.java
index 79534c8..bc63d2e 100644
--- a/lucene/core/src/test/org/apache/lucene/document/TestFeatureField.java
+++ b/lucene/core/src/test/org/apache/lucene/document/TestFeatureField.java
@@ -101,6 +101,24 @@
assertEquals(DocIdSetIterator.NO_MORE_DOCS, s.iterator().nextDoc());
+ q = FeatureField.newLinearQuery("features", "pagerank", 3f);
+ w = q.createWeight(searcher, ScoreMode.TOP_SCORES, 2);
+ s = w.scorer(context);
+
+ assertEquals(0, s.iterator().nextDoc());
+ assertEquals((float) (6.0 * 10), s.score(), 0f);
+
+ assertEquals(1, s.iterator().nextDoc());
+ assertEquals((float) (6.0 * 100), s.score(), 0f);
+
+ assertEquals(3, s.iterator().nextDoc());
+ assertEquals((float) (6.0 * 1), s.score(), 0f);
+
+ assertEquals(4, s.iterator().nextDoc());
+ assertEquals((float) (6.0 * 42), s.score(), 0f);
+
+ assertEquals(DocIdSetIterator.NO_MORE_DOCS, s.iterator().nextDoc());
+
q = FeatureField.newSaturationQuery("features", "pagerank", 3f, 4.5f);
w = q.createWeight(searcher, ScoreMode.TOP_SCORES, 2);
s = w.scorer(context);
@@ -188,16 +206,19 @@
IndexSearcher searcher = new IndexSearcher(reader);
QueryUtils.check(random(), FeatureField.newLogQuery("features", "pagerank", 1f, 4.5f), searcher);
+ QueryUtils.check(random(), FeatureField.newLinearQuery("features", "pagerank", 1f), searcher);
QueryUtils.check(random(), FeatureField.newSaturationQuery("features", "pagerank", 1f, 12f), searcher);
QueryUtils.check(random(), FeatureField.newSigmoidQuery("features", "pagerank", 1f, 12f, 0.6f), searcher);
// Test boosts that are > 1
QueryUtils.check(random(), FeatureField.newLogQuery("features", "pagerank", 3f, 4.5f), searcher);
+ QueryUtils.check(random(), FeatureField.newLinearQuery("features", "pagerank", 3f), searcher);
QueryUtils.check(random(), FeatureField.newSaturationQuery("features", "pagerank", 3f, 12f), searcher);
QueryUtils.check(random(), FeatureField.newSigmoidQuery("features", "pagerank", 3f, 12f, 0.6f), searcher);
// Test boosts that are < 1
QueryUtils.check(random(), FeatureField.newLogQuery("features", "pagerank", .2f, 4.5f), searcher);
+ QueryUtils.check(random(), FeatureField.newLinearQuery("features", "pagerank", .2f), searcher);
QueryUtils.check(random(), FeatureField.newSaturationQuery("features", "pagerank", .2f, 12f), searcher);
QueryUtils.check(random(), FeatureField.newSigmoidQuery("features", "pagerank", .2f, 12f, 0.6f), searcher);
@@ -209,6 +230,10 @@
doTestSimScorer(new FeatureField.LogFunction(4.5f).scorer(3f));
}
+ public void testLinearSimScorer() {
+ doTestSimScorer(new FeatureField.LinearFunction().scorer(1f));
+ }
+
public void testSatuSimScorer() {
doTestSimScorer(new FeatureField.SaturationFunction("foo", "bar", 20f).scorer(3f));
}
diff --git a/lucene/misc/src/test/org/apache/lucene/document/TestInetAddressPoint.java b/lucene/core/src/test/org/apache/lucene/document/TestInetAddressPoint.java
similarity index 100%
rename from lucene/misc/src/test/org/apache/lucene/document/TestInetAddressPoint.java
rename to lucene/core/src/test/org/apache/lucene/document/TestInetAddressPoint.java
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestKnnGraph.java b/lucene/core/src/test/org/apache/lucene/index/TestKnnGraph.java
new file mode 100644
index 0000000..b6b43fa
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/index/TestKnnGraph.java
@@ -0,0 +1,382 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.index;
+
+
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.codecs.lucene90.Lucene90VectorReader;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.VectorField;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.LuceneTestCase;
+
+import org.apache.lucene.util.hnsw.HnswGraphBuilder;
+import org.junit.After;
+import org.junit.Before;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Set;
+
+import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
+import static org.apache.lucene.util.hnsw.HnswGraphBuilder.randSeed;
+
+/** Tests indexing of a knn-graph */
+public class TestKnnGraph extends LuceneTestCase {
+
+ private static final String KNN_GRAPH_FIELD = "vector";
+
+ private static int maxConn = HnswGraphBuilder.DEFAULT_MAX_CONN;
+
+ @Before
+ public void setup() {
+ randSeed = random().nextLong();
+ if (random().nextBoolean()) {
+ maxConn = HnswGraphBuilder.DEFAULT_MAX_CONN;
+ HnswGraphBuilder.DEFAULT_MAX_CONN = random().nextInt(1000) + 1;
+ }
+ }
+
+ @After
+ public void cleanup() {
+ HnswGraphBuilder.DEFAULT_MAX_CONN = maxConn;
+ }
+
+ /**
+ * Basic test of creating documents in a graph
+ */
+ public void testBasic() throws Exception {
+ try (Directory dir = newDirectory();
+ IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null).setCodec(Codec.forName("Lucene90")))) {
+ int numDoc = atLeast(10);
+ int dimension = atLeast(3);
+ float[][] values = new float[numDoc][];
+ for (int i = 0; i < numDoc; i++) {
+ if (random().nextBoolean()) {
+ values[i] = new float[dimension];
+ for (int j = 0; j < dimension; j++) {
+ values[i][j] = random().nextFloat();
+ }
+ }
+ add(iw, i, values[i]);
+ }
+ assertConsistentGraph(iw, values);
+ }
+ }
+
+ public void testSingleDocument() throws Exception {
+ try (Directory dir = newDirectory();
+ IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null).setCodec(Codec.forName("Lucene90")))) {
+ float[][] values = new float[][]{new float[]{0, 1, 2}};
+ add(iw, 0, values[0]);
+ assertConsistentGraph(iw, values);
+ iw.commit();
+ assertConsistentGraph(iw, values);
+ }
+ }
+
+ /**
+ * Verify that the graph properties are preserved when merging
+ */
+ public void testMerge() throws Exception {
+ try (Directory dir = newDirectory();
+ IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null).setCodec(Codec.forName("Lucene90")))) {
+ int numDoc = atLeast(100);
+ int dimension = atLeast(10);
+ float[][] values = new float[numDoc][];
+ for (int i = 0; i < numDoc; i++) {
+ if (random().nextBoolean()) {
+ values[i] = new float[dimension];
+ for (int j = 0; j < dimension; j++) {
+ values[i][j] = random().nextFloat();
+ }
+ }
+ add(iw, i, values[i]);
+ if (random().nextInt(10) == 3) {
+ //System.out.println("commit @" + i);
+ iw.commit();
+ }
+ }
+ if (random().nextBoolean()) {
+ iw.forceMerge(1);
+ }
+ assertConsistentGraph(iw, values);
+ }
+ }
+
+ private void dumpGraph(KnnGraphValues values, int size) throws IOException {
+ for (int node = 0; node < size; node++) {
+ int n;
+ System.out.print("" + node + ":");
+ values.seek(node);
+ while ((n = values.nextNeighbor()) != NO_MORE_DOCS) {
+ System.out.print(" " + n);
+ }
+ System.out.println();
+ }
+ }
+
+ // TODO: testSorted
+ // TODO: testDeletions
+
+ /**
+ * Verify that searching does something reasonable
+ */
+ public void testSearch() throws Exception {
+ try (Directory dir = newDirectory();
+ // don't allow random merges; they mess up the docid tie-breaking assertion
+ IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig().setCodec(Codec.forName("Lucene90")))) {
+ // Add a document for every cartesian point in an NxN square so we can
+ // easily know which are the nearest neighbors to every point. Insert by iterating
+ // using a prime number that is not a divisor of N*N so that we will hit each point once,
+ // and chosen so that points will be inserted in a deterministic
+ // but somewhat distributed pattern
+ int n = 5, stepSize = 17;
+ float[][] values = new float[n * n][];
+ int index = 0;
+ for (int i = 0; i < values.length; i++) {
+ // System.out.printf("%d: (%d, %d)\n", i, index % n, index / n);
+ values[i] = new float[]{index % n, index / n};
+ index = (index + stepSize) % (n * n);
+ add(iw, i, values[i]);
+ if (i == 13) {
+ // create 2 segments
+ iw.commit();
+ }
+ }
+ boolean forceMerge = random().nextBoolean();
+ //System.out.println("");
+ if (forceMerge) {
+ iw.forceMerge(1);
+ }
+ assertConsistentGraph(iw, values);
+ try (DirectoryReader dr = DirectoryReader.open(iw)) {
+ // results are ordered by score (descending) and docid (ascending);
+ // This is the insertion order:
+ // column major, origin at upper left
+ // 0 15 5 20 10
+ // 3 18 8 23 13
+ // 6 21 11 1 16
+ // 9 24 14 4 19
+ // 12 2 17 7 22
+
+ // For this small graph the "search" is exhaustive, so this mostly tests the APIs, the orientation of the
+ // various priority queues, the scoring function, but not so much the approximate KNN search algo
+ assertGraphSearch(new int[]{0, 15, 3, 18, 5}, new float[]{0f, 0.1f}, dr);
+ // test tiebreaking by docid
+ assertGraphSearch(new int[]{11, 1, 8, 14, 21}, new float[]{2, 2}, dr);
+ assertGraphSearch(new int[]{15, 18, 0, 3, 5},new float[]{0.3f, 0.8f}, dr);
+ }
+ }
+ }
+
+ private void assertGraphSearch(int[] expected, float[] vector, IndexReader reader) throws IOException {
+ TopDocs results = doKnnSearch(reader, vector, 5);
+ for (ScoreDoc doc : results.scoreDocs) {
+ // map docId to insertion id
+ int id = Integer.parseInt(reader.document(doc.doc).get("id"));
+ doc.doc = id;
+ }
+ assertResults(expected, results);
+ }
+
+ private static TopDocs doKnnSearch(IndexReader reader, float[] vector, int k) throws IOException {
+ TopDocs[] results = new TopDocs[reader.leaves().size()];
+ for (LeafReaderContext ctx: reader.leaves()) {
+ results[ctx.ord] = ctx.reader().getVectorValues(KNN_GRAPH_FIELD)
+ .search(vector, k, 10);
+ if (ctx.docBase > 0) {
+ for (ScoreDoc doc : results[ctx.ord].scoreDocs) {
+ doc.doc += ctx.docBase;
+ }
+ }
+ }
+ return TopDocs.merge(k, results);
+ }
+
+ private void assertResults(int[] expected, TopDocs results) {
+ assertEquals(results.toString(), expected.length, results.scoreDocs.length);
+ for (int i = expected.length - 1; i >= 0; i--) {
+ assertEquals(Arrays.toString(results.scoreDocs), expected[i], results.scoreDocs[i].doc);
+ }
+ }
+
+ // For each leaf, verify that its graph nodes are 1-1 with vectors, that the vectors are the expected values,
+ // and that the graph is fully connected and symmetric.
+ // NOTE: when we impose max-fanout on the graph it wil no longer be symmetric, but should still
+ // be fully connected. Is there any other invariant we can test? Well, we can check that max fanout
+ // is respected. We can test *desirable* properties of the graph like small-world (the graph diameter
+ // should be tightly bounded).
+ private void assertConsistentGraph(IndexWriter iw, float[][] values) throws IOException {
+ int totalGraphDocs = 0;
+ try (DirectoryReader dr = DirectoryReader.open(iw)) {
+ for (LeafReaderContext ctx: dr.leaves()) {
+ LeafReader reader = ctx.reader();
+ VectorValues vectorValues = reader.getVectorValues(KNN_GRAPH_FIELD);
+ Lucene90VectorReader vectorReader = ((Lucene90VectorReader) ((CodecReader) reader).getVectorReader());
+ if (vectorReader == null) {
+ continue;
+ }
+ KnnGraphValues graphValues = vectorReader.getGraphValues(KNN_GRAPH_FIELD);
+ assertTrue((vectorValues == null) == (graphValues == null));
+ if (vectorValues == null) {
+ continue;
+ }
+ int[][] graph = new int[reader.maxDoc()][];
+ boolean foundOrphan= false;
+ int graphSize = 0;
+ for (int i = 0; i < reader.maxDoc(); i++) {
+ int nextDocWithVectors = vectorValues.advance(i);
+ //System.out.println("advanced to " + nextDocWithVectors);
+ while (i < nextDocWithVectors && i < reader.maxDoc()) {
+ int id = Integer.parseInt(reader.document(i).get("id"));
+ assertNull("document " + id + " has no vector, but was expected to", values[id]);
+ ++i;
+ }
+ if (nextDocWithVectors == NO_MORE_DOCS) {
+ break;
+ }
+ int id = Integer.parseInt(reader.document(i).get("id"));
+ graphValues.seek(graphSize);
+ // documents with KnnGraphValues have the expected vectors
+ float[] scratch = vectorValues.vectorValue();
+ assertArrayEquals("vector did not match for doc " + i + ", id=" + id + ": " + Arrays.toString(scratch),
+ values[id], scratch, 0f);
+ // We collect neighbors for analysis below
+ List<Integer> friends = new ArrayList<>();
+ int arc;
+ while ((arc = graphValues.nextNeighbor()) != NO_MORE_DOCS) {
+ friends.add(arc);
+ }
+ if (friends.size() == 0) {
+ //System.out.printf("knngraph @%d is singleton (advance returns %d)\n", i, nextWithNeighbors);
+ foundOrphan = true;
+ } else {
+ // NOTE: these friends are dense ordinals, not docIds.
+ int[] friendCopy = new int[friends.size()];
+ for (int j = 0; j < friends.size(); j++) {
+ friendCopy[j] = friends.get(j);
+ }
+ graph[graphSize] = friendCopy;
+ //System.out.printf("knngraph @%d => %s\n", i, Arrays.toString(graph[i]));
+ }
+ graphSize++;
+ }
+ assertEquals(NO_MORE_DOCS, vectorValues.nextDoc());
+ if (foundOrphan) {
+ assertEquals("graph is not fully connected", 1, graphSize);
+ } else {
+ assertTrue("Graph has " + graphSize + " nodes, but one of them has no neighbors", graphSize > 1);
+ }
+ if (HnswGraphBuilder.DEFAULT_MAX_CONN > graphSize) {
+ // assert that the graph in each leaf is connected and undirected (ie links are reciprocated)
+ assertReciprocal(graph);
+ assertConnected(graph);
+ } else {
+ // assert that max-connections was respected
+ assertMaxConn(graph, HnswGraphBuilder.DEFAULT_MAX_CONN);
+ }
+ totalGraphDocs += graphSize;
+ }
+ }
+ int expectedCount = 0;
+ for (float[] friends : values) {
+ if (friends != null) {
+ ++expectedCount;
+ }
+ }
+ assertEquals(expectedCount, totalGraphDocs);
+ }
+
+ private void assertMaxConn(int[][] graph, int maxConn) {
+ for (int i = 0; i < graph.length; i++) {
+ if (graph[i] != null) {
+ assert (graph[i].length <= maxConn);
+ for (int j = 0; j < graph[i].length; j++) {
+ int k = graph[i][j];
+ assertNotNull(graph[k]);
+ }
+ }
+ }
+ }
+
+ private void assertReciprocal(int[][] graph) {
+ // The graph is undirected: if a -> b then b -> a.
+ for (int i = 0; i < graph.length; i++) {
+ if (graph[i] != null) {
+ for (int j = 0; j < graph[i].length; j++) {
+ int k = graph[i][j];
+ assertNotNull(graph[k]);
+ assertTrue("" + i + "->" + k + " is not reciprocated", Arrays.binarySearch(graph[k], i) >= 0);
+ }
+ }
+ }
+ }
+
+ private void assertConnected(int[][] graph) {
+ // every node in the graph is reachable from every other node
+ Set<Integer> visited = new HashSet<>();
+ List<Integer> queue = new LinkedList<>();
+ int count = 0;
+ for (int[] entry : graph) {
+ if (entry != null) {
+ if (queue.isEmpty()) {
+ queue.add(entry[0]); // start from any node
+ //System.out.println("start at " + entry[0]);
+ }
+ ++count;
+ }
+ }
+ while(queue.isEmpty() == false) {
+ int i = queue.remove(0);
+ assertNotNull("expected neighbors of " + i, graph[i]);
+ visited.add(i);
+ for (int j : graph[i]) {
+ if (visited.contains(j) == false) {
+ //System.out.println(" ... " + j);
+ queue.add(j);
+ }
+ }
+ }
+ for (int i = 0; i < count; i++) {
+ assertTrue("Attempted to walk entire graph but never visited " + i, visited.contains(i));
+ }
+ // we visited each node exactly once
+ assertEquals("Attempted to walk entire graph but only visited " + visited.size(), count, visited.size());
+ }
+
+
+ private void add(IndexWriter iw, int id, float[] vector) throws IOException {
+ Document doc = new Document();
+ if (vector != null) {
+ // TODO: choose random search strategy
+ doc.add(new VectorField(KNN_GRAPH_FIELD, vector, VectorValues.SearchStrategy.EUCLIDEAN_HNSW));
+ }
+ doc.add(new StringField("id", Integer.toString(id), Field.Store.YES));
+ //System.out.println("add " + id + " " + Arrays.toString(vector));
+ iw.addDocument(doc);
+ }
+
+}
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestVectorValues.java b/lucene/core/src/test/org/apache/lucene/index/TestVectorValues.java
index 6e54b88..a9cd946 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestVectorValues.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestVectorValues.java
@@ -593,7 +593,7 @@
assertEquals(4f, vectorValues.vectorValue()[0], 0);
assertEquals(NO_MORE_DOCS, vectorValues.nextDoc());
- VectorValues.RandomAccess ra = vectorValues.randomAccess();
+ RandomAccessVectorValues ra = ((RandomAccessVectorValuesProducer) vectorValues).randomAccess();
assertEquals(1f, ra.vectorValue(0)[0], 0);
assertEquals(2f, ra.vectorValue(1)[0], 0);
assertEquals(4f, ra.vectorValue(2)[0], 0);
@@ -601,6 +601,51 @@
}
}
+ public void testIndexMultipleVectorFields() throws Exception {
+ try (Directory dir = newDirectory();
+ IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig())) {
+ Document doc = new Document();
+ float[] v = new float[]{1};
+ doc.add(new VectorField("field1", v, SearchStrategy.EUCLIDEAN_HNSW));
+ doc.add(new VectorField("field2", new float[]{1, 2, 3}, SearchStrategy.NONE));
+ iw.addDocument(doc);
+ v[0] = 2;
+ iw.addDocument(doc);
+ doc = new Document();
+ doc.add(new VectorField("field3", new float[]{1, 2, 3}, SearchStrategy.DOT_PRODUCT_HNSW));
+ iw.addDocument(doc);
+ iw.forceMerge(1);
+ try (IndexReader reader = iw.getReader()) {
+ LeafReader leaf = reader.leaves().get(0).reader();
+
+ VectorValues vectorValues = leaf.getVectorValues("field1");
+ assertEquals(1, vectorValues.dimension());
+ assertEquals(2, vectorValues.size());
+ vectorValues.nextDoc();
+ assertEquals(1f, vectorValues.vectorValue()[0], 0);
+ vectorValues.nextDoc();
+ assertEquals(2f, vectorValues.vectorValue()[0], 0);
+ assertEquals(NO_MORE_DOCS, vectorValues.nextDoc());
+
+ VectorValues vectorValues2 = leaf.getVectorValues("field2");
+ assertEquals(3, vectorValues2.dimension());
+ assertEquals(2, vectorValues2.size());
+ vectorValues2.nextDoc();
+ assertEquals(2f, vectorValues2.vectorValue()[1], 0);
+ vectorValues2.nextDoc();
+ assertEquals(2f, vectorValues2.vectorValue()[1], 0);
+ assertEquals(NO_MORE_DOCS, vectorValues2.nextDoc());
+
+ VectorValues vectorValues3 = leaf.getVectorValues("field3");
+ assertEquals(3, vectorValues3.dimension());
+ assertEquals(1, vectorValues3.size());
+ vectorValues3.nextDoc();
+ assertEquals(1f, vectorValues3.vectorValue()[0], 0);
+ assertEquals(NO_MORE_DOCS, vectorValues3.nextDoc());
+ }
+ }
+ }
+
/**
* Index random vectors, sometimes skipping documents, sometimes deleting a document,
* sometimes merging, sometimes sorting the index,
diff --git a/lucene/misc/src/test/org/apache/lucene/search/TestInetAddressRangeQueries.java b/lucene/core/src/test/org/apache/lucene/search/TestInetAddressRangeQueries.java
similarity index 100%
rename from lucene/misc/src/test/org/apache/lucene/search/TestInetAddressRangeQueries.java
rename to lucene/core/src/test/org/apache/lucene/search/TestInetAddressRangeQueries.java
diff --git a/lucene/core/src/test/org/apache/lucene/util/hnsw/KnnGraphTester.java b/lucene/core/src/test/org/apache/lucene/util/hnsw/KnnGraphTester.java
new file mode 100644
index 0000000..ce8a6ed
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/util/hnsw/KnnGraphTester.java
@@ -0,0 +1,494 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.util.hnsw;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.lang.management.ManagementFactory;
+import java.lang.management.ThreadMXBean;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.FloatBuffer;
+import java.nio.IntBuffer;
+import java.nio.channels.FileChannel;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+
+import org.apache.lucene.codecs.lucene90.Lucene90VectorReader;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.StoredField;
+import org.apache.lucene.document.VectorField;
+import org.apache.lucene.index.CodecReader;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.KnnGraphValues;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.VectorValues;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.util.PrintStreamInfoStream;
+import org.apache.lucene.util.SuppressForbidden;
+
+import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
+
+/** For testing indexing and search performance of a knn-graph
+ *
+ * java -cp .../lib/*.jar org.apache.lucene.util.hnsw.KnnGraphTester -ndoc 1000000 -search .../vectors.bin
+*/
+public class KnnGraphTester {
+
+ private final static String KNN_FIELD = "knn";
+ private final static String ID_FIELD = "id";
+ private final static VectorValues.SearchStrategy SEARCH_STRATEGY = VectorValues.SearchStrategy.DOT_PRODUCT_HNSW;
+
+ private int numDocs;
+ private int dim;
+ private int topK;
+ private int numIters;
+ private int fanout;
+ private Path indexPath;
+ private boolean quiet;
+ private boolean reindex;
+ private int reindexTimeMsec;
+
+ @SuppressForbidden(reason="uses Random()")
+ private KnnGraphTester() {
+ // set defaults
+ numDocs = 1000;
+ numIters = 1000;
+ dim = 256;
+ topK = 100;
+ fanout = topK;
+ indexPath = Paths.get("knn_test_index");
+ }
+
+ public static void main(String... args) throws Exception {
+ new KnnGraphTester().run(args);
+ }
+
+ private void run(String... args) throws Exception {
+ String operation = null, docVectorsPath = null, queryPath = null;
+ for (int iarg = 0; iarg < args.length; iarg++) {
+ String arg = args[iarg];
+ switch(arg) {
+ case "-generate":
+ case "-search":
+ case "-check":
+ case "-stats":
+ if (operation != null) {
+ throw new IllegalArgumentException("Specify only one operation, not both " + arg + " and " + operation);
+ }
+ if (iarg == args.length - 1) {
+ throw new IllegalArgumentException("Operation " + arg + " requires a following pathname");
+ }
+ operation = arg;
+ docVectorsPath = args[++iarg];
+ if (operation.equals("-search")) {
+ queryPath = args[++iarg];
+ }
+ break;
+ case "-fanout":
+ if (iarg == args.length - 1) {
+ throw new IllegalArgumentException("-fanout requires a following number");
+ }
+ fanout = Integer.parseInt(args[++iarg]);
+ break;
+ case "-beamWidthIndex":
+ if (iarg == args.length - 1) {
+ throw new IllegalArgumentException("-beamWidthIndex requires a following number");
+ }
+ HnswGraphBuilder.DEFAULT_BEAM_WIDTH = Integer.parseInt(args[++iarg]);
+ break;
+ case "-maxConn":
+ if (iarg == args.length - 1) {
+ throw new IllegalArgumentException("-maxConn requires a following number");
+ }
+ HnswGraphBuilder.DEFAULT_MAX_CONN = Integer.parseInt(args[++iarg]);
+ break;
+ case "-dim":
+ if (iarg == args.length - 1) {
+ throw new IllegalArgumentException("-dim requires a following number");
+ }
+ dim = Integer.parseInt(args[++iarg]);
+ break;
+ case "-ndoc":
+ if (iarg == args.length - 1) {
+ throw new IllegalArgumentException("-ndoc requires a following number");
+ }
+ numDocs = Integer.parseInt(args[++iarg]);
+ break;
+ case "-niter":
+ if (iarg == args.length - 1) {
+ throw new IllegalArgumentException("-niter requires a following number");
+ }
+ numIters = Integer.parseInt(args[++iarg]);
+ break;
+ case "-reindex":
+ reindex = true;
+ break;
+ case "-forceMerge":
+ operation = "-forceMerge";
+ break;
+ case "-quiet":
+ quiet = true;
+ break;
+ default:
+ throw new IllegalArgumentException("unknown argument " + arg);
+ //usage();
+ }
+ }
+ if (operation == null) {
+ usage();
+ }
+ if (reindex) {
+ reindexTimeMsec = createIndex(Paths.get(docVectorsPath), indexPath);
+ }
+ switch (operation) {
+ case "-search":
+ testSearch(indexPath, Paths.get(queryPath), getNN(Paths.get(docVectorsPath), Paths.get(queryPath)));
+ break;
+ case "-forceMerge":
+ forceMerge();
+ break;
+ case "-stats":
+ printFanoutHist(indexPath);
+ break;
+ }
+ }
+
+ @SuppressForbidden(reason="Prints stuff")
+ private void printFanoutHist(Path indexPath) throws IOException {
+ try (Directory dir = FSDirectory.open(indexPath);
+ DirectoryReader reader = DirectoryReader.open(dir)) {
+ // int[] globalHist = new int[reader.maxDoc()];
+ for (LeafReaderContext context : reader.leaves()) {
+ LeafReader leafReader = context.reader();
+ KnnGraphValues knnValues = ((Lucene90VectorReader) ((CodecReader) leafReader).getVectorReader()).getGraphValues(KNN_FIELD);
+ System.out.printf("Leaf %d has %d documents\n", context.ord, leafReader.maxDoc());
+ printGraphFanout(knnValues, leafReader.maxDoc());
+ }
+ }
+ }
+
+ @SuppressForbidden(reason="Prints stuff")
+ private void forceMerge() throws IOException {
+ IndexWriterConfig iwc = new IndexWriterConfig()
+ .setOpenMode(IndexWriterConfig.OpenMode.APPEND);
+ iwc.setInfoStream(new PrintStreamInfoStream(System.out));
+ System.out.println("Force merge index in " + indexPath);
+ try (IndexWriter iw = new IndexWriter(FSDirectory.open(indexPath), iwc)) {
+ iw.forceMerge(1);
+ }
+ }
+
+ @SuppressForbidden(reason="Prints stuff")
+ private void printGraphFanout(KnnGraphValues knnValues, int numDocs) throws IOException {
+ int min = Integer.MAX_VALUE, max = 0, total = 0;
+ int count = 0;
+ int[] leafHist = new int[numDocs];
+ for (int node = 0; node < numDocs; node++) {
+ knnValues.seek(node);
+ int n = 0;
+ while (knnValues.nextNeighbor() != NO_MORE_DOCS) {
+ ++n;
+ }
+ ++leafHist[n];
+ max = Math.max(max, n);
+ min = Math.min(min, n);
+ if (n > 0) {
+ ++count;
+ total += n;
+ }
+ }
+ System.out.printf("Graph size=%d, Fanout min=%d, mean=%.2f, max=%d\n", count, min, total / (float) count, max);
+ printHist(leafHist, max, count, 10);
+ }
+
+ @SuppressForbidden(reason="Prints stuff")
+ private void printHist(int[] hist, int max, int count, int nbuckets) {
+ System.out.print("%");
+ for (int i=0; i <= nbuckets; i ++) {
+ System.out.printf("%4d", i * 100 / nbuckets);
+ }
+ System.out.printf("\n %4d", hist[0]);
+ int total = 0, ibucket = 1;
+ for (int i = 1; i <= max && ibucket <= nbuckets; i++) {
+ total += hist[i];
+ while (total >= count * ibucket / nbuckets) {
+ System.out.printf("%4d", i);
+ ++ibucket;
+ }
+ }
+ System.out.println();
+ }
+
+ @SuppressForbidden(reason="Prints stuff")
+ private void testSearch(Path indexPath, Path queryPath, int[][] nn) throws IOException {
+ TopDocs[] results = new TopDocs[numIters];
+ long elapsed, totalCpuTime, totalVisited = 0;
+ try (FileChannel q = FileChannel.open(queryPath)) {
+ FloatBuffer targets = q.map(FileChannel.MapMode.READ_ONLY, 0, numIters * dim * Float.BYTES)
+ .order(ByteOrder.LITTLE_ENDIAN)
+ .asFloatBuffer();
+ float[] target = new float[dim];
+ if (quiet == false) {
+ System.out.println("running " + numIters + " targets; topK=" + topK + ", fanout=" + fanout);
+ }
+ long start;
+ ThreadMXBean bean = ManagementFactory.getThreadMXBean();
+ long cpuTimeStartNs;
+ try (Directory dir = FSDirectory.open(indexPath);
+ DirectoryReader reader = DirectoryReader.open(dir)) {
+
+ for (int i = 0; i < 1000; i++) {
+ // warm up
+ targets.get(target);
+ results[i] = doKnnSearch(reader, KNN_FIELD, target, topK, fanout);
+ }
+ targets.position(0);
+ start = System.nanoTime();
+ cpuTimeStartNs = bean.getCurrentThreadCpuTime();
+ for (int i = 0; i < numIters; i++) {
+ targets.get(target);
+ results[i] = doKnnSearch(reader, KNN_FIELD, target, topK, fanout);
+ }
+ totalCpuTime = (bean.getCurrentThreadCpuTime() - cpuTimeStartNs) / 1_000_000;
+ elapsed = (System.nanoTime() - start) / 1_000_000; // ns -> ms
+ for (int i = 0; i < numIters; i++) {
+ totalVisited += results[i].totalHits.value;
+ for (ScoreDoc doc : results[i].scoreDocs) {
+ doc.doc = Integer.parseInt(reader.document(doc.doc).get("id"));
+ }
+ }
+ }
+ if (quiet == false) {
+ System.out.println("completed " + numIters + " searches in " + elapsed + " ms: " + ((1000 * numIters) / elapsed) + " QPS "
+ + "CPU time=" + totalCpuTime + "ms");
+ }
+ }
+ if (quiet == false) {
+ System.out.println("checking results");
+ }
+ float recall = checkResults(results, nn);
+ totalVisited /= numIters;
+ if (quiet) {
+ System.out.printf(Locale.ROOT, "%5.3f\t%5.2f\t%d\t%d\t%d\t%d\t%d\t%d\n", recall, totalCpuTime / (float) numIters,
+ numDocs, fanout, HnswGraphBuilder.DEFAULT_MAX_CONN, HnswGraphBuilder.DEFAULT_BEAM_WIDTH, totalVisited, reindexTimeMsec);
+ }
+ }
+
+ private static TopDocs doKnnSearch(IndexReader reader, String field, float[] vector, int k, int fanout) throws IOException {
+ TopDocs[] results = new TopDocs[reader.leaves().size()];
+ for (LeafReaderContext ctx: reader.leaves()) {
+ results[ctx.ord] = ctx.reader().getVectorValues(field).search(vector, k, fanout);
+ int docBase = ctx.docBase;
+ for (ScoreDoc scoreDoc : results[ctx.ord].scoreDocs) {
+ scoreDoc.doc += docBase;
+ }
+ }
+ return TopDocs.merge(k, results);
+ }
+
+ private float checkResults(TopDocs[] results, int[][] nn) {
+ int totalMatches = 0;
+ int totalResults = 0;
+ for (int i = 0; i < results.length; i++) {
+ int n = results[i].scoreDocs.length;
+ totalResults += n;
+ //System.out.println(Arrays.toString(nn[i]));
+ //System.out.println(Arrays.toString(results[i].scoreDocs));
+ totalMatches += compareNN(nn[i], results[i]);
+ }
+ if (quiet == false) {
+ System.out.println("total matches = " + totalMatches + " out of " + totalResults);
+ System.out.printf(Locale.ROOT, "Average overlap = %.2f%%\n", ((100.0 * totalMatches) / totalResults));
+ }
+ return totalMatches / (float) totalResults;
+ }
+
+ private int compareNN(int[] expected, TopDocs results) {
+ int matched = 0;
+ /*
+ System.out.print("expected=");
+ for (int j = 0; j < expected.length; j++) {
+ System.out.print(expected[j]);
+ System.out.print(", ");
+ }
+ System.out.print('\n');
+ System.out.println("results=");
+ for (int j = 0; j < results.scoreDocs.length; j++) {
+ System.out.print("" + results.scoreDocs[j].doc + ":" + results.scoreDocs[j].score + ", ");
+ }
+ System.out.print('\n');
+ */
+ Set<Integer> expectedSet = new HashSet<>();
+ for (int i = 0; i < results.scoreDocs.length; i++) {
+ expectedSet.add(expected[i]);
+ }
+ for (ScoreDoc scoreDoc : results.scoreDocs) {
+ if (expectedSet.contains(scoreDoc.doc)) {
+ ++matched;
+ }
+ }
+ return matched;
+ }
+
+ private int[][] getNN(Path docPath, Path queryPath) throws IOException {
+ // look in working directory for cached nn file
+ String nnFileName = "nn-" + numDocs + "-" + numIters + "-" + topK + "-" + dim + ".bin";
+ Path nnPath = Paths.get(nnFileName);
+ if (Files.exists(nnPath)) {
+ return readNN(nnPath);
+ } else {
+ int[][] nn = computeNN(docPath, queryPath);
+ writeNN(nn, nnPath);
+ return nn;
+ }
+ }
+
+ private int[][] readNN(Path nnPath) throws IOException {
+ int[][] result = new int[numIters][];
+ try (FileChannel in = FileChannel.open(nnPath)) {
+ IntBuffer intBuffer = in.map(FileChannel.MapMode.READ_ONLY, 0, numIters * topK * Integer.BYTES)
+ .order(ByteOrder.LITTLE_ENDIAN)
+ .asIntBuffer();
+ for (int i = 0; i < numIters; i++) {
+ result[i] = new int[topK];
+ intBuffer.get(result[i]);
+ }
+ }
+ return result;
+ }
+
+ private void writeNN(int[][] nn, Path nnPath) throws IOException {
+ if (quiet == false) {
+ System.out.println("writing true nearest neighbors to " + nnPath);
+ }
+ ByteBuffer tmp = ByteBuffer.allocate(nn[0].length * Integer.BYTES).order(ByteOrder.LITTLE_ENDIAN);
+ try (OutputStream out = Files.newOutputStream(nnPath)) {
+ for (int i = 0; i < numIters; i++) {
+ tmp.asIntBuffer().put(nn[i]);
+ out.write(tmp.array());
+ }
+ }
+ }
+
+ private int[][] computeNN(Path docPath, Path queryPath) throws IOException {
+ int[][] result = new int[numIters][];
+ if (quiet == false) {
+ System.out.println("computing true nearest neighbors of " + numIters + " target vectors");
+ }
+ try (FileChannel in = FileChannel.open(docPath);
+ FileChannel qIn = FileChannel.open(queryPath)) {
+ FloatBuffer queries = qIn.map(FileChannel.MapMode.READ_ONLY, 0, numIters * dim * Float.BYTES)
+ .order(ByteOrder.LITTLE_ENDIAN)
+ .asFloatBuffer();
+ float[] vector = new float[dim];
+ float[] query = new float[dim];
+ for (int i = 0; i < numIters; i++) {
+ queries.get(query);
+ long totalBytes = (long) numDocs * dim * Float.BYTES;
+ int blockSize = (int) Math.min(totalBytes, (Integer.MAX_VALUE / (dim * Float.BYTES)) * (dim * Float.BYTES)), offset = 0;
+ int j = 0;
+ //System.out.println("totalBytes=" + totalBytes);
+ while (j < numDocs) {
+ FloatBuffer vectors = in.map(FileChannel.MapMode.READ_ONLY, offset, blockSize)
+ .order(ByteOrder.LITTLE_ENDIAN)
+ .asFloatBuffer();
+ offset += blockSize;
+ Neighbors queue = Neighbors.create(topK, SEARCH_STRATEGY.reversed);
+ for (; j < numDocs && vectors.hasRemaining(); j++) {
+ vectors.get(vector);
+ float d = SEARCH_STRATEGY.compare(query, vector);
+ queue.insertWithOverflow(new Neighbor(j, d));
+ }
+ result[i] = new int[topK];
+ for (int k = topK - 1; k >= 0; k--) {
+ Neighbor n = queue.pop();
+ result[i][k] = n.node();
+ //System.out.print(" " + n);
+ }
+ if (quiet == false && (i + 1) % 10 == 0) {
+ System.out.print(" " + (i + 1));
+ System.out.flush();
+ }
+ }
+ }
+ }
+ return result;
+ }
+
+ private int createIndex(Path docsPath, Path indexPath) throws IOException {
+ IndexWriterConfig iwc = new IndexWriterConfig()
+ .setOpenMode(IndexWriterConfig.OpenMode.CREATE);
+ // iwc.setMergePolicy(NoMergePolicy.INSTANCE);
+ iwc.setRAMBufferSizeMB(1994d);
+ if (quiet == false) {
+ iwc.setInfoStream(new PrintStreamInfoStream(System.out));
+ System.out.println("creating index in " + indexPath);
+ }
+ long start = System.nanoTime();
+ long totalBytes = (long) numDocs * dim * Float.BYTES, offset = 0;
+ try (FSDirectory dir = FSDirectory.open(indexPath);
+ IndexWriter iw = new IndexWriter(dir, iwc)) {
+ int blockSize = (int) Math.min(totalBytes, (Integer.MAX_VALUE / (dim * Float.BYTES)) * (dim * Float.BYTES));
+ float[] vector = new float[dim];
+ try (FileChannel in = FileChannel.open(docsPath)) {
+ int i = 0;
+ while (i < numDocs) {
+ FloatBuffer vectors = in.map(FileChannel.MapMode.READ_ONLY, offset, blockSize)
+ .order(ByteOrder.LITTLE_ENDIAN)
+ .asFloatBuffer();
+ offset += blockSize;
+ for (; vectors.hasRemaining() && i < numDocs ; i++) {
+ vectors.get(vector);
+ Document doc = new Document();
+ //System.out.println("vector=" + vector[0] + "," + vector[1] + "...");
+ doc.add(new VectorField(KNN_FIELD, vector, VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
+ doc.add(new StoredField(ID_FIELD, i));
+ iw.addDocument(doc);
+ }
+ }
+ if (quiet == false) {
+ System.out.println("Done indexing " + numDocs + " documents; now flush");
+ }
+ }
+ }
+ long elapsed = System.nanoTime() - start;
+ if (quiet == false) {
+ System.out.println("Indexed " + numDocs + " documents in " + elapsed / 1_000_000_000 + "s");
+ }
+ return (int) (elapsed / 1_000_000);
+ }
+
+ private static void usage() {
+ String error = "Usage: TestKnnGraph -generate|-search|-stats|-check {datafile} [-beamWidth N]";
+ System.err.println(error);
+ System.exit(1);
+ }
+
+}
diff --git a/lucene/core/src/test/org/apache/lucene/util/hnsw/TestHnsw.java b/lucene/core/src/test/org/apache/lucene/util/hnsw/TestHnsw.java
new file mode 100644
index 0000000..8f50a1d
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/util/hnsw/TestHnsw.java
@@ -0,0 +1,465 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.util.hnsw;
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Random;
+import java.util.Set;
+
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.codecs.lucene90.Lucene90VectorReader;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.StoredField;
+import org.apache.lucene.document.VectorField;
+import org.apache.lucene.index.CodecReader;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.KnnGraphValues;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.RandomAccessVectorValues;
+import org.apache.lucene.index.RandomAccessVectorValuesProducer;
+import org.apache.lucene.index.VectorValues;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.LuceneTestCase;
+
+import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
+
+/** Tests HNSW KNN graphs */
+public class TestHnsw extends LuceneTestCase {
+
+ // test writing out and reading in a graph gives the same graph
+ public void testReadWrite() throws IOException {
+ int dim = random().nextInt(100) + 1;
+ int nDoc = random().nextInt(100) + 1;
+ RandomVectorValues vectors = new RandomVectorValues(nDoc, dim, random());
+ RandomVectorValues v2 = vectors.copy(), v3 = vectors.copy();
+ long seed = random().nextLong();
+ HnswGraphBuilder.randSeed = seed;
+ HnswGraph hnsw = HnswGraphBuilder.build((RandomAccessVectorValuesProducer) vectors);
+ // Recreate the graph while indexing with the same random seed and write it out
+ HnswGraphBuilder.randSeed = seed;
+ try (Directory dir = newDirectory()) {
+ int nVec = 0, indexedDoc = 0;
+ // Don't merge randomly, create a single segment because we rely on the docid ordering for this test
+ IndexWriterConfig iwc = new IndexWriterConfig()
+ .setCodec(Codec.forName("Lucene90"));
+ try (IndexWriter iw = new IndexWriter(dir, iwc)) {
+ while (v2.nextDoc() != NO_MORE_DOCS) {
+ while (indexedDoc < v2.docID()) {
+ // increment docId in the index by adding empty documents
+ iw.addDocument(new Document());
+ indexedDoc++;
+ }
+ Document doc = new Document();
+ doc.add(new VectorField("field", v2.vectorValue(), v2.searchStrategy));
+ doc.add(new StoredField("id", v2.docID()));
+ iw.addDocument(doc);
+ nVec++;
+ indexedDoc++;
+ }
+ }
+ try (IndexReader reader = DirectoryReader.open(dir)) {
+ for (LeafReaderContext ctx : reader.leaves()) {
+ VectorValues values = ctx.reader().getVectorValues("field");
+ assertEquals(vectors.searchStrategy, values.searchStrategy());
+ assertEquals(dim, values.dimension());
+ assertEquals(nVec, values.size());
+ assertEquals(indexedDoc, ctx.reader().maxDoc());
+ assertEquals(indexedDoc, ctx.reader().numDocs());
+ assertVectorsEqual(v3, values);
+ KnnGraphValues graphValues = ((Lucene90VectorReader) ((CodecReader) ctx.reader()).getVectorReader()).getGraphValues("field");
+ assertGraphEqual(hnsw.getGraphValues(), graphValues, nVec);
+ }
+ }
+ }
+ }
+
+ // Make sure we actually approximately find the closest k elements. Mostly this is about
+ // ensuring that we have all the distance functions, comparators, priority queues and so on
+ // oriented in the right directions
+ public void testAknn() throws IOException {
+ int nDoc = 100;
+ RandomAccessVectorValuesProducer vectors = new CircularVectorValues(nDoc);
+ HnswGraph hnsw = HnswGraphBuilder.build(vectors);
+ // run some searches
+ Neighbors nn = HnswGraph.search(new float[]{1, 0}, 10, 5, vectors.randomAccess(), hnsw.getGraphValues(), random());
+ int sum = 0;
+ for (Neighbor n : nn) {
+ sum += n.node();
+ }
+ // We expect to get approximately 100% recall; the lowest docIds are closest to zero; sum(0,9) = 45
+ assertTrue("sum(result docs)=" + sum, sum < 75);
+ }
+
+ public void testMaxConnections() throws Exception {
+ // verify that maxConnections is observed, and that the retained arcs point to the best-scoring neighbors
+ HnswGraph graph = new HnswGraph(1, VectorValues.SearchStrategy.DOT_PRODUCT_HNSW);
+ graph.connectNodes(0, 1, 1);
+ assertArrayEquals(new int[]{1}, graph.getNeighbors(0));
+ assertArrayEquals(new int[]{0}, graph.getNeighbors(1));
+ graph.connectNodes(0, 2, 2);
+ assertArrayEquals(new int[]{2}, graph.getNeighbors(0));
+ assertArrayEquals(new int[]{0}, graph.getNeighbors(1));
+ assertArrayEquals(new int[]{0}, graph.getNeighbors(2));
+ graph.connectNodes(2, 3, 1);
+ assertArrayEquals(new int[]{2}, graph.getNeighbors(0));
+ assertArrayEquals(new int[]{0}, graph.getNeighbors(1));
+ assertArrayEquals(new int[]{0}, graph.getNeighbors(2));
+ assertArrayEquals(new int[]{2}, graph.getNeighbors(3));
+
+ graph = new HnswGraph(1, VectorValues.SearchStrategy.EUCLIDEAN_HNSW);
+ graph.connectNodes(0, 1, 1);
+ assertArrayEquals(new int[]{1}, graph.getNeighbors(0));
+ assertArrayEquals(new int[]{0}, graph.getNeighbors(1));
+ graph.connectNodes(0, 2, 2);
+ assertArrayEquals(new int[]{1}, graph.getNeighbors(0));
+ assertArrayEquals(new int[]{0}, graph.getNeighbors(1));
+ assertArrayEquals(new int[]{0}, graph.getNeighbors(2));
+ graph.connectNodes(2, 3, 1);
+ assertArrayEquals(new int[]{1}, graph.getNeighbors(0));
+ assertArrayEquals(new int[]{0}, graph.getNeighbors(1));
+ assertArrayEquals(new int[]{3}, graph.getNeighbors(2));
+ assertArrayEquals(new int[]{2}, graph.getNeighbors(3));
+ }
+
+ /** Returns vectors evenly distributed around the unit circle.
+ */
+ class CircularVectorValues extends VectorValues implements RandomAccessVectorValues, RandomAccessVectorValuesProducer {
+ private final int size;
+ private final float[] value;
+
+ int doc = -1;
+
+ CircularVectorValues(int size) {
+ this.size = size;
+ value = new float[2];
+ }
+
+ public CircularVectorValues copy() {
+ return new CircularVectorValues(size);
+ }
+
+ @Override
+ public SearchStrategy searchStrategy() {
+ return SearchStrategy.DOT_PRODUCT_HNSW;
+ }
+
+ @Override
+ public int dimension() {
+ return 2;
+ }
+
+ @Override
+ public int size() {
+ return size;
+ }
+
+ @Override
+ public float[] vectorValue() {
+ return vectorValue(doc);
+ }
+
+ @Override
+ public RandomAccessVectorValues randomAccess() {
+ return new CircularVectorValues(size);
+ }
+
+ @Override
+ public int docID() {
+ return doc;
+ }
+
+ @Override
+ public int nextDoc() {
+ return advance(doc + 1);
+ }
+
+ @Override
+ public int advance(int target) {
+ if (target >= 0 && target < size) {
+ doc = target;
+ } else {
+ doc = NO_MORE_DOCS;
+ }
+ return doc;
+ }
+
+ @Override
+ public long cost() {
+ return size;
+ }
+
+ @Override
+ public float[] vectorValue(int ord) {
+ value[0] = (float) Math.cos(Math.PI * ord / (double) size);
+ value[1] = (float) Math.sin(Math.PI * ord / (double) size);
+ return value;
+ }
+
+ @Override
+ public BytesRef binaryValue(int ord) {
+ return null;
+ }
+
+ @Override
+ public TopDocs search(float[] target, int k, int fanout) {
+ return null;
+ }
+
+ }
+
+ private void assertGraphEqual(KnnGraphValues g, KnnGraphValues h, int size) throws IOException {
+ for (int node = 0; node < size; node ++) {
+ g.seek(node);
+ h.seek(node);
+ assertEquals("arcs differ for node " + node, getNeighbors(g), getNeighbors(h));
+ }
+ }
+
+ private Set<Integer> getNeighbors(KnnGraphValues g) throws IOException {
+ Set<Integer> neighbors = new HashSet<>();
+ for (int n = g.nextNeighbor(); n != NO_MORE_DOCS; n = g.nextNeighbor()) {
+ neighbors.add(n);
+ }
+ return neighbors;
+ }
+
+ private void assertVectorsEqual(VectorValues u, VectorValues v) throws IOException {
+ int uDoc, vDoc;
+ while (true) {
+ uDoc = u.nextDoc();
+ vDoc = v.nextDoc();
+ assertEquals(uDoc, vDoc);
+ if (uDoc == NO_MORE_DOCS) {
+ break;
+ }
+ assertArrayEquals("vectors do not match for doc=" + uDoc, u.vectorValue(), v.vectorValue(), 1e-4f);
+ }
+ }
+
+ public void testNeighbors() {
+ // make sure we have the sign correct
+ Neighbors nn = Neighbors.create(2, false);
+ Neighbor a = new Neighbor(1, 10);
+ Neighbor b = new Neighbor(2, 20);
+ Neighbor c = new Neighbor(3, 30);
+ assertNull(nn.insertWithOverflow(b));
+ assertNull(nn.insertWithOverflow(a));
+ assertSame(a, nn.insertWithOverflow(c));
+ assertEquals(20, (int) nn.top().score());
+ assertEquals(20, (int) nn.pop().score());
+ assertEquals(30, (int) nn.top().score());
+ assertEquals(30, (int) nn.pop().score());
+
+ Neighbors fn = Neighbors.create(2, true);
+ assertNull(fn.insertWithOverflow(b));
+ assertNull(fn.insertWithOverflow(a));
+ assertSame(c, fn.insertWithOverflow(c));
+ assertEquals(20, (int) fn.top().score());
+ assertEquals(20, (int) fn.pop().score());
+ assertEquals(10, (int) fn.top().score());
+ assertEquals(10, (int) fn.pop().score());
+ }
+
+ @SuppressWarnings("SelfComparison")
+ public void testNeighbor() {
+ Neighbor a = new Neighbor(1, 10);
+ Neighbor b = new Neighbor(2, 20);
+ Neighbor c = new Neighbor(3, 20);
+ assertEquals(0, a.compareTo(a));
+ assertEquals(-1, a.compareTo(b));
+ assertEquals(1, b.compareTo(a));
+ assertEquals(1, b.compareTo(c));
+ assertEquals(-1, c.compareTo(b));
+ }
+
+ private static float[] randomVector(Random random, int dim) {
+ float[] vec = new float[dim];
+ for (int i = 0; i < dim; i++) {
+ vec[i] = random.nextFloat();
+ }
+ return vec;
+ }
+
+ /**
+ * Produces random vectors and caches them for random-access.
+ */
+ class RandomVectorValues extends VectorValues implements RandomAccessVectorValues, RandomAccessVectorValuesProducer {
+
+ private final int dimension;
+ private final float[][] denseValues;
+ private final float[][] values;
+ private final float[] scratch;
+ private final SearchStrategy searchStrategy;
+
+ final int numVectors;
+ final int maxDoc;
+
+ private int pos = -1;
+
+ RandomVectorValues(int size, int dimension, Random random) {
+ this.dimension = dimension;
+ values = new float[size][];
+ denseValues = new float[size][];
+ scratch = new float[dimension];
+ int sz = 0;
+ int md = -1;
+ for (int offset = 0; offset < size; offset += random.nextInt(3) + 1) {
+ values[offset] = randomVector(random, dimension);
+ denseValues[sz++] = values[offset];
+ md = offset;
+ }
+ numVectors = sz;
+ maxDoc = md;
+ // get a random SearchStrategy other than NONE (0)
+ searchStrategy = SearchStrategy.values()[random.nextInt(SearchStrategy.values().length - 1) + 1];
+ }
+
+ private RandomVectorValues(int dimension, SearchStrategy searchStrategy, float[][] denseValues, float[][] values, int size) {
+ this.dimension = dimension;
+ this.searchStrategy = searchStrategy;
+ this.values = values;
+ this.denseValues = denseValues;
+ scratch = new float[dimension];
+ numVectors = size;
+ maxDoc = values.length - 1;
+ }
+
+ public RandomVectorValues copy() {
+ return new RandomVectorValues(dimension, searchStrategy, denseValues, values, numVectors);
+ }
+
+ @Override
+ public int size() {
+ return numVectors;
+ }
+
+ @Override
+ public SearchStrategy searchStrategy() {
+ return searchStrategy;
+ }
+
+ @Override
+ public int dimension() {
+ return dimension;
+ }
+
+ @Override
+ public float[] vectorValue() {
+ if(random().nextBoolean()) {
+ return values[pos];
+ } else {
+ // Sometimes use the same scratch array repeatedly, mimicing what the codec will do.
+ // This should help us catch cases of aliasing where the same VectorValues source is used twice in a
+ // single computation.
+ System.arraycopy(values[pos], 0, scratch, 0, dimension);
+ return scratch;
+ }
+ }
+
+ @Override
+ public RandomAccessVectorValues randomAccess() {
+ return copy();
+ }
+
+ @Override
+ public float[] vectorValue(int targetOrd) {
+ return denseValues[targetOrd];
+ }
+
+ @Override
+ public BytesRef binaryValue(int targetOrd) {
+ return null;
+ }
+
+ @Override
+ public TopDocs search(float[] target, int k, int fanout) {
+ return null;
+ }
+
+ private boolean seek(int target) {
+ if (target >= 0 && target < values.length && values[target] != null) {
+ pos = target;
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ @Override
+ public int docID() {
+ return pos;
+ }
+
+ @Override
+ public int nextDoc() {
+ return advance(pos + 1);
+ }
+
+ public int advance(int target) {
+ while (++pos < values.length) {
+ if (seek(pos)) {
+ return pos;
+ }
+ }
+ return NO_MORE_DOCS;
+ }
+
+ @Override
+ public long cost() {
+ return size();
+ }
+
+ }
+
+ public void testBoundsCheckerMax() {
+ BoundsChecker max = BoundsChecker.create(false);
+ float f = random().nextFloat() - 0.5f;
+ // any float > -MAX_VALUE is in bounds
+ assertFalse(max.check(f));
+ // f is now the bound (minus some delta)
+ max.update(f);
+ assertFalse(max.check(f)); // f is not out of bounds
+ assertFalse(max.check(f + 1)); // anything greater than f is in bounds
+ assertTrue(max.check(f - 1e-5f)); // delta is zero initially
+ }
+
+ public void testBoundsCheckerMin() {
+ BoundsChecker min = BoundsChecker.create(true);
+ float f = random().nextFloat() - 0.5f;
+ // any float < MAX_VALUE is in bounds
+ assertFalse(min.check(f));
+ // f is now the bound (minus some delta)
+ min.update(f);
+ assertFalse(min.check(f)); // f is not out of bounds
+ assertFalse(min.check(f - 1)); // anything less than f is in bounds
+ assertTrue(min.check(f + 1e-5f)); // delta is zero initially
+ }
+
+ public void testHnswGraphBuilderInvalid() {
+ expectThrows(NullPointerException.class, () -> new HnswGraphBuilder(null, 0, 0, 0));
+ expectThrows(IllegalArgumentException.class, () -> new HnswGraphBuilder(new RandomVectorValues(1, 1, random()), 0, 10, 0));
+ expectThrows(IllegalArgumentException.class, () -> new HnswGraphBuilder(new RandomVectorValues(1, 1, random()), 10, 0, 0));
+ }
+
+}
diff --git a/lucene/facet/build.gradle b/lucene/facet/build.gradle
index 6b6a6ef..e94d8b0 100644
--- a/lucene/facet/build.gradle
+++ b/lucene/facet/build.gradle
@@ -27,4 +27,6 @@
testImplementation project(':lucene:test-framework')
testImplementation project(':lucene:queries')
+ // Required for opening older indexes for backward compatibility tests
+ testCompile group: 'org.apache.lucene', name: 'lucene-codecs', version: '8.6.3'
}
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java
index bcc4c6d..e063230 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java
@@ -31,12 +31,15 @@
import org.apache.lucene.facet.taxonomy.LRUHashMap;
import org.apache.lucene.facet.taxonomy.ParallelTaxonomyArrays;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.CorruptIndexException; // javadocs
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiTerms;
import org.apache.lucene.index.PostingsEnum;
+import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.SegmentReader;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.Directory;
@@ -322,9 +325,24 @@
return res;
}
}
-
- Document doc = indexReader.document(ordinal);
- FacetLabel ret = new FacetLabel(FacetsConfig.stringToPath(doc.get(Consts.FULL)));
+
+ int readerIndex = ReaderUtil.subIndex(ordinal, indexReader.leaves());
+ LeafReader leafReader = indexReader.leaves().get(readerIndex).reader();
+ // TODO: Use LUCENE-9476 to get the bulk lookup API for extracting BinaryDocValues
+ BinaryDocValues values = leafReader.getBinaryDocValues(Consts.FULL);
+
+ FacetLabel ret;
+
+ if (values == null || values.advanceExact(ordinal-indexReader.leaves().get(readerIndex).docBase) == false) {
+ // The index uses the older StoredField format to store the mapping
+ // On recreating the index, the values will be stored using the BinaryDocValuesField format
+ Document doc = indexReader.document(ordinal);
+ ret = new FacetLabel(FacetsConfig.stringToPath(doc.get(Consts.FULL)));
+ } else {
+ // The index uses the BinaryDocValuesField format to store the mapping
+ ret = new FacetLabel(FacetsConfig.stringToPath(values.binaryValue().utf8ToString()));
+ }
+
synchronized (categoryCache) {
categoryCache.put(catIDInteger, ret);
}
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
index f8374a3..d03d32c 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
@@ -31,6 +31,7 @@
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
@@ -193,7 +194,7 @@
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setOmitNorms(true);
parentStreamField = new Field(Consts.FIELD_PAYLOADS, parentStream, ft);
- fullPathField = new StringField(Consts.FULL, "", Field.Store.YES);
+ fullPathField = new StringField(Consts.FULL, "", Field.Store.NO);
nextID = indexWriter.getDocStats().maxDoc;
@@ -492,8 +493,10 @@
Document d = new Document();
d.add(parentStreamField);
- fullPathField.setStringValue(FacetsConfig.pathToString(categoryPath.components, categoryPath.length));
+ String fieldPath = FacetsConfig.pathToString(categoryPath.components, categoryPath.length);
+ fullPathField.setStringValue(fieldPath);
d.add(fullPathField);
+ d.add(new BinaryDocValuesField(Consts.FULL, new BytesRef(fieldPath)));
// Note that we do no pass an Analyzer here because the fields that are
// added to the Document are untokenized or contains their own TokenStream.
diff --git a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestBackwardsCompatibility.java b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestBackwardsCompatibility.java
new file mode 100644
index 0000000..69d975d
--- /dev/null
+++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestBackwardsCompatibility.java
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.facet.taxonomy.directory;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+import org.apache.lucene.facet.taxonomy.FacetLabel;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.TestUtil;
+import org.junit.Ignore;
+
+/*
+ Verify we can read previous versions' taxonomy indexes, do searches
+ against them, and add documents to them.
+*/
+// See: https://issues.apache.org/jira/browse/SOLR-12028 Tests cannot remove files on Windows machines occasionally
+public class TestBackwardsCompatibility extends LuceneTestCase {
+
+ // To generate backcompat indexes with the current default codec, run the following gradle command:
+ // gradlew test -Dtestcase=TestBackwardsCompatibility -Dtests.bwcdir=/path/to/store/indexes
+ // -Dtests.codec=default -Dtests.useSecurityManager=false
+ // Also add testmethod with one of the index creation methods below, for example:
+ // -Dtestmethod=testCreateOldTaxonomy
+ //
+ // Zip up the generated indexes:
+ //
+ // cd /path/to/store/indexes/index.cfs ; zip index.<VERSION>-cfs.zip *
+ //
+ // Then move the zip file to your trunk checkout and use it in your test cases
+
+ public static final String oldTaxonomyIndexName = "taxonomy.8.6.3-cfs";
+
+ public void testCreateNewTaxonomy() throws IOException {
+ createNewTaxonomyIndex(oldTaxonomyIndexName);
+ }
+
+ // Opens up a pre-existing old taxonomy index and adds new BinaryDocValues based fields
+ private void createNewTaxonomyIndex(String dirName) throws IOException {
+ Path indexDir = createTempDir(oldTaxonomyIndexName);
+ TestUtil.unzip(getDataInputStream(dirName + ".zip"), indexDir);
+ Directory dir = newFSDirectory(indexDir);
+
+ DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir);
+
+ FacetLabel cp_b = new FacetLabel("b");
+ writer.addCategory(cp_b);
+ writer.getInternalIndexWriter().forceMerge(1);
+ writer.commit();
+
+ TaxonomyReader reader = new DirectoryTaxonomyReader(writer);
+
+ int ord1 = reader.getOrdinal(new FacetLabel("a"));
+ assert ord1 != TaxonomyReader.INVALID_ORDINAL;
+ // Just asserting ord1 != TaxonomyReader.INVALID_ORDINAL is not enough to check compatibility
+ assertNotNull(reader.getPath(ord1));
+
+ int ord2 = reader.getOrdinal(cp_b);
+ assert ord2 != TaxonomyReader.INVALID_ORDINAL;
+ assertNotNull(reader.getPath(ord2));
+
+ reader.close();
+ writer.close();
+ dir.close();
+ }
+
+ // Used to create a fresh taxonomy index with StoredFields
+ @Ignore
+ public void testCreateOldTaxonomy() throws IOException {
+ createOldTaxonomyIndex(oldTaxonomyIndexName);
+ }
+
+ private void createOldTaxonomyIndex(String dirName) throws IOException {
+ Path indexDir = getIndexDir().resolve(dirName);
+ Files.deleteIfExists(indexDir);
+ Directory dir = newFSDirectory(indexDir);
+
+ TaxonomyWriter writer = new DirectoryTaxonomyWriter(dir);
+
+ writer.addCategory(new FacetLabel("a"));
+ writer.commit();
+ writer.close();
+ dir.close();
+ }
+
+ private Path getIndexDir() {
+ String path = System.getProperty("tests.bwcdir");
+ assumeTrue("backcompat creation tests must be run with -Dtests.bwcdir=/path/to/write/indexes", path != null);
+ return Paths.get(path);
+ }
+}
\ No newline at end of file
diff --git a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/taxonomy.8.6.3-cfs.zip b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/taxonomy.8.6.3-cfs.zip
new file mode 100644
index 0000000..d04c706
--- /dev/null
+++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/taxonomy.8.6.3-cfs.zip
Binary files differ
diff --git a/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsWithScoreCollector.java b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsWithScoreCollector.java
index ee9731f..56c6991 100644
--- a/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsWithScoreCollector.java
+++ b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsWithScoreCollector.java
@@ -47,7 +47,7 @@
throw new IllegalStateException("Can't collect more than [" + Integer.MAX_VALUE + "] ids");
}
this.field = field;
- this.doMinMax = !(min <= 0 && max == Integer.MAX_VALUE);
+ this.doMinMax = min > 1 || max < Integer.MAX_VALUE;
this.min = min;
this.max = max;;
this.ordinalMap = ordinalMap;
diff --git a/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsWithScoreQuery.java b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsWithScoreQuery.java
index 183bca1..4a7ed92 100644
--- a/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsWithScoreQuery.java
+++ b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsWithScoreQuery.java
@@ -86,7 +86,7 @@
if (searcher.getTopReaderContext().id() != indexReaderContextId) {
throw new IllegalStateException("Creating the weight against a different index reader than this query has been built for.");
}
- boolean doNoMinMax = min <= 0 && max == Integer.MAX_VALUE;
+ boolean doNoMinMax = min <= 1 && max == Integer.MAX_VALUE;
if (scoreMode.needsScores() == false && doNoMinMax) {
// We don't need scores then quickly change the query to not uses the scores:
GlobalOrdinalsQuery globalOrdinalsQuery = new GlobalOrdinalsQuery(collector.collectedOrds, joinField, globalOrds,
diff --git a/lucene/join/src/java/org/apache/lucene/search/join/JoinUtil.java b/lucene/join/src/java/org/apache/lucene/search/join/JoinUtil.java
index 0e933c0..b9d0a43 100644
--- a/lucene/join/src/java/org/apache/lucene/search/join/JoinUtil.java
+++ b/lucene/join/src/java/org/apache/lucene/search/join/JoinUtil.java
@@ -496,7 +496,7 @@
globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Avg(joinField, ordinalMap, valueCount, min, max);
break;
case None:
- if (min <= 0 && max == Integer.MAX_VALUE) {
+ if (min <= 1 && max == Integer.MAX_VALUE) {
GlobalOrdinalsCollector globalOrdinalsCollector = new GlobalOrdinalsCollector(joinField, ordinalMap, valueCount);
searcher.search(rewrittenFromQuery, globalOrdinalsCollector);
return new GlobalOrdinalsQuery(globalOrdinalsCollector.getCollectorOrdinals(), joinField, ordinalMap, rewrittenToQuery,
diff --git a/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java b/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java
index 4d72b13..1803f03 100644
--- a/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java
+++ b/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java
@@ -640,7 +640,6 @@
}
assertEquals(expectedCount, collector.getTotalHits());
}
-
searcher.getIndexReader().close();
dir.close();
}
@@ -657,8 +656,19 @@
IndexReader reader = w.getReader();
IndexSearcher searcher = newSearcher(reader);
OrdinalMap ordMap = OrdinalMap.build(null, new SortedDocValues[0], 0f);
- Query joinQuery = JoinUtil.createJoinQuery("join_field", new MatchNoDocsQuery(), new MatchNoDocsQuery(), searcher, RandomPicks.randomFrom(random(), ScoreMode.values()), ordMap, 0, Integer.MAX_VALUE);
- searcher.search(joinQuery, 1); // no exception due to missing rewrites
+ {
+ Query joinQuery = JoinUtil.createJoinQuery("join_field", new MatchNoDocsQuery(),
+ new MatchNoDocsQuery(), searcher, RandomPicks.randomFrom(random(), ScoreMode.values()), ordMap, 0, Integer.MAX_VALUE);
+ searcher.search(joinQuery, 1); // no exception due to missing rewrites
+ }
+ {
+ Query joinQuery = JoinUtil.createJoinQuery("join_field", new MatchNoDocsQuery(),
+ new MatchNoDocsQuery(), searcher, ScoreMode.None, ordMap, 1, Integer.MAX_VALUE);
+ Query rewritten = searcher.rewrite(joinQuery);
+ // should simplify to GlobalOrdinalsQuery since min is set to 1
+ assertTrue(rewritten instanceof GlobalOrdinalsQuery);
+ searcher.search(joinQuery, 1); // no exception due to missing rewrites
+ }
reader.close();
w.close();
dir.close();
diff --git a/lucene/licenses/jetty-continuation-9.4.32.v20200930.jar.sha1 b/lucene/licenses/jetty-continuation-9.4.32.v20200930.jar.sha1
deleted file mode 100644
index 681e97a..0000000
--- a/lucene/licenses/jetty-continuation-9.4.32.v20200930.jar.sha1
+++ /dev/null
@@ -1 +0,0 @@
-b46713a1b8b2baf951f6514dd621c5a546254d6c
diff --git a/lucene/licenses/jetty-continuation-9.4.34.v20201102.jar.sha1 b/lucene/licenses/jetty-continuation-9.4.34.v20201102.jar.sha1
new file mode 100644
index 0000000..9cfe056
--- /dev/null
+++ b/lucene/licenses/jetty-continuation-9.4.34.v20201102.jar.sha1
@@ -0,0 +1 @@
+070923c6b55dcabd4bde53971554261048844b3f
diff --git a/lucene/licenses/jetty-http-9.4.32.v20200930.jar.sha1 b/lucene/licenses/jetty-http-9.4.32.v20200930.jar.sha1
deleted file mode 100644
index 0e08fdd..0000000
--- a/lucene/licenses/jetty-http-9.4.32.v20200930.jar.sha1
+++ /dev/null
@@ -1 +0,0 @@
-5fdcefd82178d11f895690f4fe6e843be69394b3
diff --git a/lucene/licenses/jetty-http-9.4.34.v20201102.jar.sha1 b/lucene/licenses/jetty-http-9.4.34.v20201102.jar.sha1
new file mode 100644
index 0000000..c44b514
--- /dev/null
+++ b/lucene/licenses/jetty-http-9.4.34.v20201102.jar.sha1
@@ -0,0 +1 @@
+7acb9ea4deaba904a49e304ac24f9b3a8ddb5881
diff --git a/lucene/licenses/jetty-io-9.4.32.v20200930.jar.sha1 b/lucene/licenses/jetty-io-9.4.32.v20200930.jar.sha1
deleted file mode 100644
index 1a59673..0000000
--- a/lucene/licenses/jetty-io-9.4.32.v20200930.jar.sha1
+++ /dev/null
@@ -1 +0,0 @@
-0d0f32c3b511d6b3a542787f95ed229731588810
diff --git a/lucene/licenses/jetty-io-9.4.34.v20201102.jar.sha1 b/lucene/licenses/jetty-io-9.4.34.v20201102.jar.sha1
new file mode 100644
index 0000000..46b17d2
--- /dev/null
+++ b/lucene/licenses/jetty-io-9.4.34.v20201102.jar.sha1
@@ -0,0 +1 @@
+80693ce94fa34647e9af613ba17c443feb624590
diff --git a/lucene/licenses/jetty-server-9.4.32.v20200930.jar.sha1 b/lucene/licenses/jetty-server-9.4.32.v20200930.jar.sha1
deleted file mode 100644
index 81c447c..0000000
--- a/lucene/licenses/jetty-server-9.4.32.v20200930.jar.sha1
+++ /dev/null
@@ -1 +0,0 @@
-d2d89099be5237cf68254bc943a7d800d3ee1945
diff --git a/lucene/licenses/jetty-server-9.4.34.v20201102.jar.sha1 b/lucene/licenses/jetty-server-9.4.34.v20201102.jar.sha1
new file mode 100644
index 0000000..bd9bf9f
--- /dev/null
+++ b/lucene/licenses/jetty-server-9.4.34.v20201102.jar.sha1
@@ -0,0 +1 @@
+19c5309325d5819a9e22bfee66a3c0d50750ed03
diff --git a/lucene/licenses/jetty-servlet-9.4.32.v20200930.jar.sha1 b/lucene/licenses/jetty-servlet-9.4.32.v20200930.jar.sha1
deleted file mode 100644
index e23407d..0000000
--- a/lucene/licenses/jetty-servlet-9.4.32.v20200930.jar.sha1
+++ /dev/null
@@ -1 +0,0 @@
-4253dd46c099e0bca4dd763fc1e10774e10de00a
diff --git a/lucene/licenses/jetty-servlet-9.4.34.v20201102.jar.sha1 b/lucene/licenses/jetty-servlet-9.4.34.v20201102.jar.sha1
new file mode 100644
index 0000000..822f159
--- /dev/null
+++ b/lucene/licenses/jetty-servlet-9.4.34.v20201102.jar.sha1
@@ -0,0 +1 @@
+606f9724b14bf58c915ee0e37f6425c52dae7b76
diff --git a/lucene/licenses/jetty-util-9.4.32.v20200930.jar.sha1 b/lucene/licenses/jetty-util-9.4.32.v20200930.jar.sha1
deleted file mode 100644
index e59c8f4..0000000
--- a/lucene/licenses/jetty-util-9.4.32.v20200930.jar.sha1
+++ /dev/null
@@ -1 +0,0 @@
-efefd29006dcc9c9960a679263504287ce4e6896
diff --git a/lucene/licenses/jetty-util-9.4.34.v20201102.jar.sha1 b/lucene/licenses/jetty-util-9.4.34.v20201102.jar.sha1
new file mode 100644
index 0000000..5b96d84
--- /dev/null
+++ b/lucene/licenses/jetty-util-9.4.34.v20201102.jar.sha1
@@ -0,0 +1 @@
+47993d1def63ca9e8bc7284716a89031f642db82
diff --git a/lucene/misc/build.gradle b/lucene/misc/build.gradle
index 11758f7..efed4f2 100644
--- a/lucene/misc/build.gradle
+++ b/lucene/misc/build.gradle
@@ -22,4 +22,6 @@
dependencies {
api project(':lucene:core')
testImplementation project(':lucene:test-framework')
-}
+
+ nativeDeps project(":lucene:misc:native")
+}
\ No newline at end of file
diff --git a/lucene/misc/native/build.gradle b/lucene/misc/native/build.gradle
new file mode 100644
index 0000000..3673660
--- /dev/null
+++ b/lucene/misc/native/build.gradle
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * This gets separated out from misc module into a native module due to incompatibility between cpp-library and java-library plugins.
+ * For details, please see https://github.com/gradle/gradle-native/issues/352#issuecomment-461724948
+ */
+import org.apache.tools.ant.taskdefs.condition.Os
+
+description = 'Module for native code'
+
+apply plugin: 'cpp-library'
+
+library {
+ baseName = 'LuceneNativeIO'
+
+ // Native build for Windows platform will be added in later stage
+ targetMachines = [
+ machines.linux.x86_64,
+ machines.macOS.x86_64,
+ machines.windows.x86_64
+ ]
+
+ // Point at platform-specific sources. Other platforms will be ignored
+ // (plugin won't find the toolchain).
+ if (Os.isFamily(Os.FAMILY_WINDOWS)) {
+ source.from file("${projectDir}/src/main/windows")
+ } else if (Os.isFamily(Os.FAMILY_UNIX) || Os.isFamily(Os.FAMILY_MAC)) {
+ source.from file("${projectDir}/src/main/posix")
+ }
+}
+
+tasks.withType(CppCompile).configureEach {
+ def javaHome = rootProject.ext.runtimeJava.getInstallationDirectory().getAsFile().getPath()
+
+ // Assume standard openjdk layout. This means only one architecture-specific include folder
+ // is present.
+ systemIncludes.from file("${javaHome}/include")
+
+ for (def path : [
+ file("${javaHome}/include/win32"),
+ file("${javaHome}/include/darwin"),
+ file("${javaHome}/include/linux"),
+ file("${javaHome}/include/solaris")]) {
+ if (path.exists()) {
+ systemIncludes.from path
+ }
+ }
+
+ compilerArgs.add '-fPIC'
+}
+
+tasks.withType(LinkSharedLibrary).configureEach {
+ linkerArgs.add '-lstdc++'
+}
diff --git a/lucene/misc/src/java/org/apache/lucene/store/NativePosixUtil.cpp b/lucene/misc/native/src/main/posix/NativePosixUtil.cpp
similarity index 87%
rename from lucene/misc/src/java/org/apache/lucene/store/NativePosixUtil.cpp
rename to lucene/misc/native/src/main/posix/NativePosixUtil.cpp
index 999e6a2..55d6ef4 100644
--- a/lucene/misc/src/java/org/apache/lucene/store/NativePosixUtil.cpp
+++ b/lucene/misc/native/src/main/posix/NativePosixUtil.cpp
@@ -38,12 +38,12 @@
#ifdef LINUX
/*
- * Class: org_apache_lucene_store_NativePosixUtil
+ * Class: org_apache_lucene_misc_store_NativePosixUtil
* Method: posix_fadvise
* Signature: (Ljava/io/FileDescriptor;JJI)V
*/
extern "C"
-JNIEXPORT jint JNICALL Java_org_apache_lucene_store_NativePosixUtil_posix_1fadvise(JNIEnv *env, jclass _ignore, jobject fileDescriptor, jlong offset, jlong len, jint advice)
+JNIEXPORT jint JNICALL Java_org_apache_lucene_misc_store_NativePosixUtil_posix_1fadvise(JNIEnv *env, jclass _ignore, jobject fileDescriptor, jlong offset, jlong len, jint advice)
{
jfieldID field_fd;
jmethodID const_fdesc;
@@ -103,12 +103,12 @@
#endif
/*
- * Class: org_apache_lucene_store_NativePosixUtil
+ * Class: org_apache_lucene_misc_store_NativePosixUtil
* Method: open_direct
* Signature: (Ljava/lang/String;Z)Ljava/io/FileDescriptor;
*/
extern "C"
-JNIEXPORT jobject JNICALL Java_org_apache_lucene_store_NativePosixUtil_open_1direct(JNIEnv *env, jclass _ignore, jstring filename, jboolean readOnly)
+JNIEXPORT jobject JNICALL Java_org_apache_lucene_misc_store_NativePosixUtil_open_1direct(JNIEnv *env, jclass _ignore, jstring filename, jboolean readOnly)
{
jfieldID field_fd;
jmethodID const_fdesc;
@@ -169,12 +169,12 @@
}
/*
- * Class: org_apache_lucene_store_NativePosixUtil
+ * Class: org_apache_lucene_misc_store_NativePosixUtil
* Method: pread
* Signature: (Ljava/io/FileDescriptor;JLjava/nio/ByteBuffer;)I
*/
extern "C"
-JNIEXPORT jlong JNICALL Java_org_apache_lucene_store_NativePosixUtil_pread(JNIEnv *env, jclass _ignore, jobject jfd, jlong pos, jobject byteBuf)
+JNIEXPORT jlong JNICALL Java_org_apache_lucene_misc_store_NativePosixUtil_pread(JNIEnv *env, jclass _ignore, jobject jfd, jlong pos, jobject byteBuf)
{
// get int fd:
jclass class_fdesc = env->FindClass("java/io/FileDescriptor");
@@ -214,12 +214,12 @@
}
/*
- * Class: org_apache_lucene_store_NativePosixUtil
+ * Class: org_apache_lucene_misc_store_NativePosixUtil
* Method: posix_madvise
* Signature: (Ljava/nio/ByteBuffer;I)I
*/
extern "C"
-JNIEXPORT jint JNICALL Java_org_apache_lucene_store_NativePosixUtil_posix_1madvise(JNIEnv *env, jclass _ignore, jobject buffer, jint advice) {
+JNIEXPORT jint JNICALL Java_org_apache_lucene_misc_store_NativePosixUtil_posix_1madvise(JNIEnv *env, jclass _ignore, jobject buffer, jint advice) {
void *p = env->GetDirectBufferAddress(buffer);
if (p == NULL) {
return -1;
@@ -280,12 +280,12 @@
/*
- * Class: org_apache_lucene_store_NativePosixUtil
+ * Class: org_apache_lucene_misc_store_NativePosixUtil
* Method: madvise
* Signature: (Ljava/nio/ByteBuffer;I)I
*/
extern "C"
-JNIEXPORT jint JNICALL Java_org_apache_lucene_store_NativePosixUtil_madvise(JNIEnv *env, jclass _ignore, jobject buffer, jint advice) {
+JNIEXPORT jint JNICALL Java_org_apache_lucene_misc_store_NativePosixUtil_madvise(JNIEnv *env, jclass _ignore, jobject buffer, jint advice) {
void *p = env->GetDirectBufferAddress(buffer);
if (p == NULL) {
return -1;
diff --git a/lucene/misc/src/java/org/apache/lucene/store/WindowsDirectory.cpp b/lucene/misc/native/src/main/windows/WindowsDirectory.cpp
similarity index 88%
rename from lucene/misc/src/java/org/apache/lucene/store/WindowsDirectory.cpp
rename to lucene/misc/native/src/main/windows/WindowsDirectory.cpp
index 416aed9..20f79fe 100644
--- a/lucene/misc/src/java/org/apache/lucene/store/WindowsDirectory.cpp
+++ b/lucene/misc/native/src/main/windows/WindowsDirectory.cpp
@@ -56,11 +56,11 @@
/**
* Opens a handle to a file.
*
- * Class: org_apache_lucene_store_WindowsDirectory
+ * Class: org_apache_lucene_misc_store_WindowsDirectory
* Method: open
* Signature: (Ljava/lang/String;)J
*/
-JNIEXPORT jlong JNICALL Java_org_apache_lucene_store_WindowsDirectory_open
+JNIEXPORT jlong JNICALL Java_org_apache_lucene_misc_store_WindowsDirectory_open
(JNIEnv *env, jclass ignored, jstring filename)
{
char *fname;
@@ -95,11 +95,11 @@
* Reads data into the byte array, starting at offset, for length characters.
* The read is positioned at pos.
*
- * Class: org_apache_lucene_store_WindowsDirectory
+ * Class: org_apache_lucene_misc_store_WindowsDirectory
* Method: read
* Signature: (J[BIIJ)I
*/
-JNIEXPORT jint JNICALL Java_org_apache_lucene_store_WindowsDirectory_read
+JNIEXPORT jint JNICALL Java_org_apache_lucene_misc_store_WindowsDirectory_read
(JNIEnv *env, jclass ignored, jlong fd, jbyteArray bytes, jint offset, jint length, jlong pos)
{
OVERLAPPED io = { 0 };
@@ -140,11 +140,11 @@
/**
* Closes a handle to a file
*
- * Class: org_apache_lucene_store_WindowsDirectory
+ * Class: org_apache_lucene_misc_store_WindowsDirectory
* Method: close
* Signature: (J)V
*/
-JNIEXPORT void JNICALL Java_org_apache_lucene_store_WindowsDirectory_close
+JNIEXPORT void JNICALL Java_org_apache_lucene_misc_store_WindowsDirectory_close
(JNIEnv *env, jclass ignored, jlong fd)
{
if (!CloseHandle((HANDLE) fd)) {
@@ -155,11 +155,11 @@
/**
* Returns the length in bytes of a file.
*
- * Class: org_apache_lucene_store_WindowsDirectory
+ * Class: org_apache_lucene_misc_store_WindowsDirectory
* Method: length
* Signature: (J)J
*/
-JNIEXPORT jlong JNICALL Java_org_apache_lucene_store_WindowsDirectory_length
+JNIEXPORT jlong JNICALL Java_org_apache_lucene_misc_store_WindowsDirectory_length
(JNIEnv *env, jclass ignored, jlong fd)
{
BY_HANDLE_FILE_INFORMATION info;
diff --git a/lucene/misc/src/java/org/apache/lucene/document/package.html b/lucene/misc/src/java/org/apache/lucene/document/package.html
deleted file mode 100644
index 3e55596..0000000
--- a/lucene/misc/src/java/org/apache/lucene/document/package.html
+++ /dev/null
@@ -1,22 +0,0 @@
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<!-- not a package-info.java, because we already defined this package in core/ -->
-<html>
-<body>
-Misc extensions of the Document/Field API.
-</body>
-</html>
diff --git a/lucene/misc/src/java/org/apache/lucene/misc/CollectorMemoryTracker.java b/lucene/misc/src/java/org/apache/lucene/misc/CollectorMemoryTracker.java
index 544b760..10d6b14 100644
--- a/lucene/misc/src/java/org/apache/lucene/misc/CollectorMemoryTracker.java
+++ b/lucene/misc/src/java/org/apache/lucene/misc/CollectorMemoryTracker.java
@@ -19,7 +19,7 @@
import java.util.concurrent.atomic.AtomicLong;
-import org.apache.lucene.util.MemoryTracker;
+import org.apache.lucene.misc.util.MemoryTracker;
/**
* Default implementation of {@code MemoryTracker} that tracks
diff --git a/lucene/misc/src/java/org/apache/lucene/misc/IndexMergeTool.java b/lucene/misc/src/java/org/apache/lucene/misc/IndexMergeTool.java
index 2a69acb..207283b 100644
--- a/lucene/misc/src/java/org/apache/lucene/misc/IndexMergeTool.java
+++ b/lucene/misc/src/java/org/apache/lucene/misc/IndexMergeTool.java
@@ -22,7 +22,7 @@
import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
-import org.apache.lucene.store.HardlinkCopyDirectoryWrapper;
+import org.apache.lucene.misc.store.HardlinkCopyDirectoryWrapper;
import org.apache.lucene.util.SuppressForbidden;
import java.nio.file.Paths;
diff --git a/lucene/misc/src/java/org/apache/lucene/document/LazyDocument.java b/lucene/misc/src/java/org/apache/lucene/misc/document/LazyDocument.java
similarity index 98%
rename from lucene/misc/src/java/org/apache/lucene/document/LazyDocument.java
rename to lucene/misc/src/java/org/apache/lucene/misc/document/LazyDocument.java
index 1f2cfe8..ef1d6fe 100644
--- a/lucene/misc/src/java/org/apache/lucene/document/LazyDocument.java
+++ b/lucene/misc/src/java/org/apache/lucene/misc/document/LazyDocument.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.document;
+package org.apache.lucene.misc.document;
import java.io.IOException;
import java.io.Reader;
@@ -27,6 +27,7 @@
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.document.Document;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
diff --git a/lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java b/lucene/misc/src/java/org/apache/lucene/misc/document/package-info.java
similarity index 79%
copy from lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java
copy to lucene/misc/src/java/org/apache/lucene/misc/document/package-info.java
index af94ced..0e37415 100644
--- a/lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java
+++ b/lucene/misc/src/java/org/apache/lucene/misc/document/package-info.java
@@ -15,12 +15,5 @@
* limitations under the License.
*/
-package org.apache.lucene.util;
-
-/**
- * Tracks dynamic allocations/deallocations of memory for transient objects
- */
-public interface MemoryTracker {
- void updateBytes(long bytes);
- long getBytes();
-}
+/** Misc extensions of the Document/Field API. */
+package org.apache.lucene.misc.document;
\ No newline at end of file
diff --git a/lucene/misc/src/java/org/apache/lucene/index/IndexSplitter.java b/lucene/misc/src/java/org/apache/lucene/misc/index/IndexSplitter.java
similarity index 95%
rename from lucene/misc/src/java/org/apache/lucene/index/IndexSplitter.java
rename to lucene/misc/src/java/org/apache/lucene/misc/index/IndexSplitter.java
index efc6ba3..9388840 100644
--- a/lucene/misc/src/java/org/apache/lucene/index/IndexSplitter.java
+++ b/lucene/misc/src/java/org/apache/lucene/misc/index/IndexSplitter.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.index;
+package org.apache.lucene.misc.index;
import java.io.IOException;
import java.nio.file.Files;
@@ -28,6 +28,9 @@
import java.util.List;
import java.util.Locale;
+import org.apache.lucene.index.SegmentCommitInfo;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.SuppressForbidden;
@@ -105,14 +108,6 @@
}
}
- private int getIdx(String name) {
- for (int x = 0; x < infos.size(); x++) {
- if (name.equals(infos.info(x).info.name))
- return x;
- }
- return -1;
- }
-
private SegmentCommitInfo getInfo(String name) {
for (int x = 0; x < infos.size(); x++) {
if (name.equals(infos.info(x).info.name))
@@ -123,8 +118,8 @@
public void remove(String[] segs) throws IOException {
for (String n : segs) {
- int idx = getIdx(n);
- infos.remove(idx);
+ SegmentCommitInfo info = getInfo(n);
+ infos.remove(info);
}
infos.changed();
infos.commit(fsDir);
diff --git a/lucene/misc/src/java/org/apache/lucene/index/MultiPassIndexSplitter.java b/lucene/misc/src/java/org/apache/lucene/misc/index/MultiPassIndexSplitter.java
similarity index 93%
rename from lucene/misc/src/java/org/apache/lucene/index/MultiPassIndexSplitter.java
rename to lucene/misc/src/java/org/apache/lucene/misc/index/MultiPassIndexSplitter.java
index 6d7e4ea..2f4c001 100644
--- a/lucene/misc/src/java/org/apache/lucene/index/MultiPassIndexSplitter.java
+++ b/lucene/misc/src/java/org/apache/lucene/misc/index/MultiPassIndexSplitter.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.index;
+package org.apache.lucene.misc.index;
import java.io.IOException;
import java.nio.file.Files;
@@ -23,7 +23,17 @@
import java.util.ArrayList;
import java.util.List;
+import org.apache.lucene.index.BaseCompositeReader;
+import org.apache.lucene.index.CodecReader;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.FilterCodecReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.MultiReader;
+import org.apache.lucene.index.SlowCodecReaderWrapper;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.FixedBitSet;
@@ -101,7 +111,7 @@
.setOpenMode(OpenMode.CREATE));
System.err.println("Writing part " + (i + 1) + " ...");
// pass the subreaders directly, as our wrapper's numDocs/hasDeletetions are not up-to-date
- final List<? extends FakeDeleteLeafIndexReader> sr = input.getSequentialSubReaders();
+ final List<? extends FakeDeleteLeafIndexReader> sr = input.getSequentialSubReadersWrapper();
w.addIndexes(sr.toArray(new CodecReader[sr.size()])); // TODO: maybe take List<IR> here?
w.close();
}
@@ -211,6 +221,10 @@
return null;
}
+ final List<? extends FakeDeleteLeafIndexReader> getSequentialSubReadersWrapper() {
+ return getSequentialSubReaders();
+ }
+
// no need to override numDocs/hasDeletions,
// as we pass the subreaders directly to IW.addIndexes().
}
diff --git a/lucene/misc/src/java/org/apache/lucene/index/PKIndexSplitter.java b/lucene/misc/src/java/org/apache/lucene/misc/index/PKIndexSplitter.java
similarity index 94%
rename from lucene/misc/src/java/org/apache/lucene/index/PKIndexSplitter.java
rename to lucene/misc/src/java/org/apache/lucene/misc/index/PKIndexSplitter.java
index 0854268..aef5c13 100644
--- a/lucene/misc/src/java/org/apache/lucene/index/PKIndexSplitter.java
+++ b/lucene/misc/src/java/org/apache/lucene/misc/index/PKIndexSplitter.java
@@ -14,12 +14,19 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.index;
+package org.apache.lucene.misc.index;
import java.io.IOException;
import java.util.List;
+import org.apache.lucene.index.CodecReader;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.FilterCodecReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.Term;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
diff --git a/lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java b/lucene/misc/src/java/org/apache/lucene/misc/index/package-info.java
similarity index 79%
copy from lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java
copy to lucene/misc/src/java/org/apache/lucene/misc/index/package-info.java
index af94ced..3db8b50 100644
--- a/lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java
+++ b/lucene/misc/src/java/org/apache/lucene/misc/index/package-info.java
@@ -15,12 +15,5 @@
* limitations under the License.
*/
-package org.apache.lucene.util;
-
-/**
- * Tracks dynamic allocations/deallocations of memory for transient objects
- */
-public interface MemoryTracker {
- void updateBytes(long bytes);
- long getBytes();
-}
+/** Misc index tools and index support. */
+package org.apache.lucene.misc.index;
\ No newline at end of file
diff --git a/lucene/misc/src/java/org/apache/lucene/search/DiversifiedTopDocsCollector.java b/lucene/misc/src/java/org/apache/lucene/misc/search/DiversifiedTopDocsCollector.java
similarity index 94%
rename from lucene/misc/src/java/org/apache/lucene/search/DiversifiedTopDocsCollector.java
rename to lucene/misc/src/java/org/apache/lucene/misc/search/DiversifiedTopDocsCollector.java
index 5228e6d..5e88cd8 100644
--- a/lucene/misc/src/java/org/apache/lucene/search/DiversifiedTopDocsCollector.java
+++ b/lucene/misc/src/java/org/apache/lucene/misc/search/DiversifiedTopDocsCollector.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.search;
+package org.apache.lucene.misc.search;
import java.io.IOException;
import java.util.HashMap;
@@ -23,7 +23,14 @@
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
-import org.apache.lucene.search.DiversifiedTopDocsCollector.ScoreDocKey;
+import org.apache.lucene.misc.search.DiversifiedTopDocsCollector.ScoreDocKey;
+import org.apache.lucene.search.LeafCollector;
+import org.apache.lucene.search.Scorable;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.ScoreMode;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.TopDocsCollector;
+import org.apache.lucene.search.TotalHits;
import org.apache.lucene.util.PriorityQueue;
/**
diff --git a/lucene/misc/src/java/org/apache/lucene/search/DocValuesStats.java b/lucene/misc/src/java/org/apache/lucene/misc/search/DocValuesStats.java
similarity index 99%
rename from lucene/misc/src/java/org/apache/lucene/search/DocValuesStats.java
rename to lucene/misc/src/java/org/apache/lucene/misc/search/DocValuesStats.java
index f3319ee..323c1e4 100644
--- a/lucene/misc/src/java/org/apache/lucene/search/DocValuesStats.java
+++ b/lucene/misc/src/java/org/apache/lucene/misc/search/DocValuesStats.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.search;
+package org.apache.lucene.misc.search;
import java.io.IOException;
diff --git a/lucene/misc/src/java/org/apache/lucene/search/DocValuesStatsCollector.java b/lucene/misc/src/java/org/apache/lucene/misc/search/DocValuesStatsCollector.java
similarity index 90%
rename from lucene/misc/src/java/org/apache/lucene/search/DocValuesStatsCollector.java
rename to lucene/misc/src/java/org/apache/lucene/misc/search/DocValuesStatsCollector.java
index 732a5f7..e429ba4 100644
--- a/lucene/misc/src/java/org/apache/lucene/search/DocValuesStatsCollector.java
+++ b/lucene/misc/src/java/org/apache/lucene/misc/search/DocValuesStatsCollector.java
@@ -14,11 +14,15 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.search;
+package org.apache.lucene.misc.search;
import java.io.IOException;
import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.search.Collector;
+import org.apache.lucene.search.LeafCollector;
+import org.apache.lucene.search.Scorable;
+import org.apache.lucene.search.ScoreMode;
/** A {@link Collector} which computes statistics for a DocValues field. */
public class DocValuesStatsCollector implements Collector {
diff --git a/lucene/misc/src/java/org/apache/lucene/search/MemoryAccountingBitsetCollector.java b/lucene/misc/src/java/org/apache/lucene/misc/search/MemoryAccountingBitsetCollector.java
similarity index 93%
rename from lucene/misc/src/java/org/apache/lucene/search/MemoryAccountingBitsetCollector.java
rename to lucene/misc/src/java/org/apache/lucene/misc/search/MemoryAccountingBitsetCollector.java
index 953bfaa..22b17d9 100644
--- a/lucene/misc/src/java/org/apache/lucene/search/MemoryAccountingBitsetCollector.java
+++ b/lucene/misc/src/java/org/apache/lucene/misc/search/MemoryAccountingBitsetCollector.java
@@ -15,12 +15,14 @@
* limitations under the License.
*/
-package org.apache.lucene.search;
+package org.apache.lucene.misc.search;
import java.io.IOException;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.misc.CollectorMemoryTracker;
+import org.apache.lucene.search.ScoreMode;
+import org.apache.lucene.search.SimpleCollector;
import org.apache.lucene.util.FixedBitSet;
/** Bitset collector which supports memory tracking */
diff --git a/lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java b/lucene/misc/src/java/org/apache/lucene/misc/search/package-info.java
similarity index 79%
copy from lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java
copy to lucene/misc/src/java/org/apache/lucene/misc/search/package-info.java
index af94ced..912b478 100644
--- a/lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java
+++ b/lucene/misc/src/java/org/apache/lucene/misc/search/package-info.java
@@ -15,12 +15,5 @@
* limitations under the License.
*/
-package org.apache.lucene.util;
-
-/**
- * Tracks dynamic allocations/deallocations of memory for transient objects
- */
-public interface MemoryTracker {
- void updateBytes(long bytes);
- long getBytes();
-}
+/** Misc search implementations. */
+package org.apache.lucene.misc.search;
\ No newline at end of file
diff --git a/lucene/misc/src/java/org/apache/lucene/search/similarity/LegacyBM25Similarity.java b/lucene/misc/src/java/org/apache/lucene/misc/search/similarity/LegacyBM25Similarity.java
similarity index 98%
rename from lucene/misc/src/java/org/apache/lucene/search/similarity/LegacyBM25Similarity.java
rename to lucene/misc/src/java/org/apache/lucene/misc/search/similarity/LegacyBM25Similarity.java
index 300c1c0..a005dcc 100644
--- a/lucene/misc/src/java/org/apache/lucene/search/similarity/LegacyBM25Similarity.java
+++ b/lucene/misc/src/java/org/apache/lucene/misc/search/similarity/LegacyBM25Similarity.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.search.similarity;
+package org.apache.lucene.misc.search.similarity;
import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.search.CollectionStatistics;
diff --git a/lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java b/lucene/misc/src/java/org/apache/lucene/misc/search/similarity/package-info.java
similarity index 79%
copy from lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java
copy to lucene/misc/src/java/org/apache/lucene/misc/search/similarity/package-info.java
index af94ced..7f72263 100644
--- a/lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java
+++ b/lucene/misc/src/java/org/apache/lucene/misc/search/similarity/package-info.java
@@ -15,12 +15,5 @@
* limitations under the License.
*/
-package org.apache.lucene.util;
-
-/**
- * Tracks dynamic allocations/deallocations of memory for transient objects
- */
-public interface MemoryTracker {
- void updateBytes(long bytes);
- long getBytes();
-}
+/** Misc similarity implementations. */
+package org.apache.lucene.misc.search.similarity;
\ No newline at end of file
diff --git a/lucene/misc/src/java/org/apache/lucene/store/HardlinkCopyDirectoryWrapper.java b/lucene/misc/src/java/org/apache/lucene/misc/store/HardlinkCopyDirectoryWrapper.java
similarity index 95%
rename from lucene/misc/src/java/org/apache/lucene/store/HardlinkCopyDirectoryWrapper.java
rename to lucene/misc/src/java/org/apache/lucene/misc/store/HardlinkCopyDirectoryWrapper.java
index c7b164a..816595b 100644
--- a/lucene/misc/src/java/org/apache/lucene/store/HardlinkCopyDirectoryWrapper.java
+++ b/lucene/misc/src/java/org/apache/lucene/misc/store/HardlinkCopyDirectoryWrapper.java
@@ -15,7 +15,12 @@
* limitations under the License.
*/
-package org.apache.lucene.store;
+package org.apache.lucene.misc.store;
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.store.FilterDirectory;
+import org.apache.lucene.store.IOContext;
import java.io.FileNotFoundException;
import java.io.IOException;
diff --git a/lucene/misc/src/java/org/apache/lucene/store/NativePosixUtil.java b/lucene/misc/src/java/org/apache/lucene/misc/store/NativePosixUtil.java
similarity index 96%
rename from lucene/misc/src/java/org/apache/lucene/store/NativePosixUtil.java
rename to lucene/misc/src/java/org/apache/lucene/misc/store/NativePosixUtil.java
index 6397ec7..c119e52 100644
--- a/lucene/misc/src/java/org/apache/lucene/store/NativePosixUtil.java
+++ b/lucene/misc/src/java/org/apache/lucene/misc/store/NativePosixUtil.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.store;
+package org.apache.lucene.misc.store;
import java.io.IOException;
import java.io.FileDescriptor;
@@ -33,7 +33,7 @@
public final static int NOREUSE = 5;
static {
- System.loadLibrary("NativePosixUtil");
+ System.loadLibrary("LuceneNativeIO");
}
private static native int posix_fadvise(FileDescriptor fd, long offset, long len, int advise) throws IOException;
diff --git a/lucene/misc/src/java/org/apache/lucene/store/NativeUnixDirectory.java b/lucene/misc/src/java/org/apache/lucene/misc/store/NativeUnixDirectory.java
similarity index 95%
rename from lucene/misc/src/java/org/apache/lucene/store/NativeUnixDirectory.java
rename to lucene/misc/src/java/org/apache/lucene/misc/store/NativeUnixDirectory.java
index 44fe418..5057a0a 100644
--- a/lucene/misc/src/java/org/apache/lucene/store/NativeUnixDirectory.java
+++ b/lucene/misc/src/java/org/apache/lucene/misc/store/NativeUnixDirectory.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.store;
+package org.apache.lucene.misc.store;
import java.io.EOFException;
import java.io.FileDescriptor;
@@ -25,7 +25,16 @@
import java.nio.channels.FileChannel;
import java.nio.file.Path;
+import org.apache.lucene.misc.store.NativePosixUtil;
+import org.apache.lucene.store.BufferedIndexInput;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.store.FSLockFactory;
+import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IOContext.Context;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.LockFactory;
import org.apache.lucene.util.SuppressForbidden;
// TODO
@@ -47,10 +56,9 @@
*
* <p>To use this you must compile
* NativePosixUtil.cpp (exposes Linux-specific APIs through
- * JNI) for your platform, by running <code>ant
- * build-native-unix</code>, and then putting the resulting
- * <code>libNativePosixUtil.so</code> (from
- * <code>lucene/build/native</code>) onto your dynamic
+ * JNI) for your platform, by running <code>./gradlew build</code>, and then putting the resulting
+ * <code>libLuceneNativeIO.so</code> or <code>libLuceneNativeIO.dylib</code>
+ * (from <code>lucene/misc/native/build/lib/release/platform/</code>) onto your dynamic
* linker search path.
*
* <p><b>WARNING</b>: this code is very new and quite easily
diff --git a/lucene/misc/src/java/org/apache/lucene/store/RAFDirectory.java b/lucene/misc/src/java/org/apache/lucene/misc/store/RAFDirectory.java
similarity index 93%
rename from lucene/misc/src/java/org/apache/lucene/store/RAFDirectory.java
rename to lucene/misc/src/java/org/apache/lucene/misc/store/RAFDirectory.java
index f1643c2..2f79a93 100644
--- a/lucene/misc/src/java/org/apache/lucene/store/RAFDirectory.java
+++ b/lucene/misc/src/java/org/apache/lucene/misc/store/RAFDirectory.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.store;
+package org.apache.lucene.misc.store;
import java.io.EOFException;
import java.io.File;
@@ -23,6 +23,14 @@
import java.nio.ByteBuffer;
import java.nio.file.Path;
+import org.apache.lucene.store.BufferedIndexInput;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.store.FSLockFactory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.LockFactory;
+import org.apache.lucene.store.MMapDirectory;
+import org.apache.lucene.store.NIOFSDirectory;
import org.apache.lucene.util.SuppressForbidden;
/** A straightforward implementation of {@link FSDirectory}
@@ -30,7 +38,7 @@
* poor concurrent performance (multiple threads will
* bottleneck) as it synchronizes when multiple threads
* read from the same file. It's usually better to use
- * {@link NIOFSDirectory} or {@link MMapDirectory} instead.
+ * {@link NIOFSDirectory} or {@link MMapDirectory} instead.
* <p>
* NOTE: Because this uses RandomAccessFile, it will generally
* not work with non-default filesystem providers. It is only
diff --git a/lucene/misc/src/java/org/apache/lucene/store/WindowsDirectory.java b/lucene/misc/src/java/org/apache/lucene/misc/store/WindowsDirectory.java
similarity index 85%
rename from lucene/misc/src/java/org/apache/lucene/store/WindowsDirectory.java
rename to lucene/misc/src/java/org/apache/lucene/misc/store/WindowsDirectory.java
index 43b8e12..d791eca 100644
--- a/lucene/misc/src/java/org/apache/lucene/store/WindowsDirectory.java
+++ b/lucene/misc/src/java/org/apache/lucene/misc/store/WindowsDirectory.java
@@ -14,7 +14,15 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.store;
+package org.apache.lucene.misc.store;
+
+import org.apache.lucene.store.BufferedIndexInput;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.store.FSLockFactory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.LockFactory;
import java.io.IOException;
import java.io.EOFException;
@@ -26,14 +34,10 @@
* <p>
* Steps:
* <ol>
- * <li>Compile the source code to create WindowsDirectory.dll:
- * <blockquote>
- * c:\mingw\bin\g++ -Wall -D_JNI_IMPLEMENTATION_ -Wl,--kill-at
- * -I"%JAVA_HOME%\include" -I"%JAVA_HOME%\include\win32" -static-libgcc
- * -static-libstdc++ -shared WindowsDirectory.cpp -o WindowsDirectory.dll
- * </blockquote>
- * For 64-bit JREs, use mingw64, with the -m64 option.
- * <li>Put WindowsDirectory.dll into some directory in your windows PATH
+ * <li>Compile the source code to create libLuceneNativeIO.dll: <code>./gradlew build</code>
+ * <li>Put the resulting <code>libLuceneNativeIO.dll</code>
+ * (from <code>lucene/misc/native/build/lib/release/platform/</code>)
+ * into some directory in your windows PATH
* <li>Open indexes with WindowsDirectory and use it.
* </ol>
* @lucene.experimental
@@ -42,7 +46,7 @@
private static final int DEFAULT_BUFFERSIZE = 4096; /* default pgsize on ia32/amd64 */
static {
- System.loadLibrary("WindowsDirectory");
+ System.loadLibrary("LuceneNativeIO");
}
/** Create a new WindowsDirectory for the named location.
diff --git a/lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java b/lucene/misc/src/java/org/apache/lucene/misc/store/package-info.java
similarity index 79%
copy from lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java
copy to lucene/misc/src/java/org/apache/lucene/misc/store/package-info.java
index af94ced..a006862 100644
--- a/lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java
+++ b/lucene/misc/src/java/org/apache/lucene/misc/store/package-info.java
@@ -15,12 +15,5 @@
* limitations under the License.
*/
-package org.apache.lucene.util;
-
-/**
- * Tracks dynamic allocations/deallocations of memory for transient objects
- */
-public interface MemoryTracker {
- void updateBytes(long bytes);
- long getBytes();
-}
+/** Misc Directory implementations. */
+package org.apache.lucene.misc.store;
\ No newline at end of file
diff --git a/lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java b/lucene/misc/src/java/org/apache/lucene/misc/util/MemoryTracker.java
similarity index 96%
rename from lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java
rename to lucene/misc/src/java/org/apache/lucene/misc/util/MemoryTracker.java
index af94ced..20a45e2 100644
--- a/lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java
+++ b/lucene/misc/src/java/org/apache/lucene/misc/util/MemoryTracker.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.lucene.util;
+package org.apache.lucene.misc.util;
/**
* Tracks dynamic allocations/deallocations of memory for transient objects
diff --git a/lucene/misc/src/java/org/apache/lucene/util/fst/ListOfOutputs.java b/lucene/misc/src/java/org/apache/lucene/misc/util/fst/ListOfOutputs.java
similarity index 97%
rename from lucene/misc/src/java/org/apache/lucene/util/fst/ListOfOutputs.java
rename to lucene/misc/src/java/org/apache/lucene/misc/util/fst/ListOfOutputs.java
index da5ce4d..9184736 100644
--- a/lucene/misc/src/java/org/apache/lucene/util/fst/ListOfOutputs.java
+++ b/lucene/misc/src/java/org/apache/lucene/misc/util/fst/ListOfOutputs.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.util.fst;
+package org.apache.lucene.misc.util.fst;
import java.io.IOException;
import java.util.ArrayList;
@@ -24,6 +24,8 @@
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.IntsRef; // javadocs
import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.fst.FSTCompiler;
+import org.apache.lucene.util.fst.Outputs;
/**
* Wraps another Outputs implementation and encodes one or
diff --git a/lucene/misc/src/java/org/apache/lucene/util/fst/UpToTwoPositiveIntOutputs.java b/lucene/misc/src/java/org/apache/lucene/misc/util/fst/UpToTwoPositiveIntOutputs.java
similarity index 97%
rename from lucene/misc/src/java/org/apache/lucene/util/fst/UpToTwoPositiveIntOutputs.java
rename to lucene/misc/src/java/org/apache/lucene/misc/util/fst/UpToTwoPositiveIntOutputs.java
index a6e0a66..afa8ac5 100644
--- a/lucene/misc/src/java/org/apache/lucene/util/fst/UpToTwoPositiveIntOutputs.java
+++ b/lucene/misc/src/java/org/apache/lucene/misc/util/fst/UpToTwoPositiveIntOutputs.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.util.fst;
+package org.apache.lucene.misc.util.fst;
import java.io.IOException;
@@ -22,6 +22,8 @@
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.SuppressForbidden;
+import org.apache.lucene.util.fst.FSTCompiler;
+import org.apache.lucene.util.fst.Outputs;
/**
* An FST {@link Outputs} implementation where each output
diff --git a/lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java b/lucene/misc/src/java/org/apache/lucene/misc/util/fst/package-info.java
similarity index 79%
copy from lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java
copy to lucene/misc/src/java/org/apache/lucene/misc/util/fst/package-info.java
index af94ced..b7e79a9 100644
--- a/lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java
+++ b/lucene/misc/src/java/org/apache/lucene/misc/util/fst/package-info.java
@@ -15,12 +15,5 @@
* limitations under the License.
*/
-package org.apache.lucene.util;
-
-/**
- * Tracks dynamic allocations/deallocations of memory for transient objects
- */
-public interface MemoryTracker {
- void updateBytes(long bytes);
- long getBytes();
-}
+/** Misc FST classes. */
+package org.apache.lucene.misc.util.fst;
\ No newline at end of file
diff --git a/lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java b/lucene/misc/src/java/org/apache/lucene/misc/util/package-info.java
similarity index 79%
copy from lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java
copy to lucene/misc/src/java/org/apache/lucene/misc/util/package-info.java
index af94ced..691e8d9 100644
--- a/lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java
+++ b/lucene/misc/src/java/org/apache/lucene/misc/util/package-info.java
@@ -15,12 +15,6 @@
* limitations under the License.
*/
-package org.apache.lucene.util;
-
-/**
- * Tracks dynamic allocations/deallocations of memory for transient objects
- */
-public interface MemoryTracker {
- void updateBytes(long bytes);
- long getBytes();
-}
+/** Memory Tracker interface which allows defining custom collector
+ level memory trackers */
+package org.apache.lucene.misc.util;
\ No newline at end of file
diff --git a/lucene/misc/src/java/org/apache/lucene/search/package.html b/lucene/misc/src/java/org/apache/lucene/search/package.html
deleted file mode 100644
index 8683a16..0000000
--- a/lucene/misc/src/java/org/apache/lucene/search/package.html
+++ /dev/null
@@ -1,22 +0,0 @@
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<!-- not a package-info.java, because we already defined this package in core/ -->
-<html>
-<body>
-Misc search implementations.
-</body>
-</html>
\ No newline at end of file
diff --git a/lucene/misc/src/java/org/apache/lucene/search/similarity/package.html b/lucene/misc/src/java/org/apache/lucene/search/similarity/package.html
deleted file mode 100644
index 7f624d4..0000000
--- a/lucene/misc/src/java/org/apache/lucene/search/similarity/package.html
+++ /dev/null
@@ -1,22 +0,0 @@
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<!-- not a package-info.java, because we already defined this package in core/ -->
-<html>
-<body>
-Misc similarity implementations.
-</body>
-</html>
\ No newline at end of file
diff --git a/lucene/misc/src/java/org/apache/lucene/store/package.html b/lucene/misc/src/java/org/apache/lucene/store/package.html
deleted file mode 100644
index 0bf56af..0000000
--- a/lucene/misc/src/java/org/apache/lucene/store/package.html
+++ /dev/null
@@ -1,22 +0,0 @@
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<!-- not a package-info.java, because we already defined this package in core/ -->
-<html>
-<body>
-Misc Directory implementations.
-</body>
-</html>
diff --git a/lucene/misc/src/java/org/apache/lucene/util/fst/package.html b/lucene/misc/src/java/org/apache/lucene/util/fst/package.html
deleted file mode 100644
index 257801f..0000000
--- a/lucene/misc/src/java/org/apache/lucene/util/fst/package.html
+++ /dev/null
@@ -1,22 +0,0 @@
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<!-- not a package-info.java, because we already defined this package in core/ -->
-<html>
-<body>
-Misc FST classes.
-</body>
-</html>
diff --git a/lucene/misc/src/java/org/apache/lucene/util/package.html b/lucene/misc/src/java/org/apache/lucene/util/package.html
deleted file mode 100644
index 24aa507..0000000
--- a/lucene/misc/src/java/org/apache/lucene/util/package.html
+++ /dev/null
@@ -1,23 +0,0 @@
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<!-- not a package-info.java, because we already defined this package in core/ -->
-<html>
-<body>
-Memory Tracker interface which allows defining custom collector
-level memory trackers
-</body>
-</html>
diff --git a/lucene/misc/src/java/overview.html b/lucene/misc/src/java/overview.html
index 51181ee..5cda451 100644
--- a/lucene/misc/src/java/overview.html
+++ b/lucene/misc/src/java/overview.html
@@ -35,33 +35,32 @@
have to compile on your platform.
<p>
-{@link org.apache.lucene.store.NativeUnixDirectory} is a Directory implementation that bypasses the
+{@link org.apache.lucene.misc.store.NativeUnixDirectory} is a Directory implementation that bypasses the
OS's buffer cache (using direct IO) for any IndexInput and IndexOutput
used during merging of segments larger than a specified size (default
10 MB). This avoids evicting hot pages that are still in-use for
searching, keeping search more responsive while large merges run.
<p>
-See <a target=_top href="http://blog.mikemccandless.com/2010/06/lucene-and-fadvisemadvise.html">this blog post</a>
+See <a target="_top" href="http://blog.mikemccandless.com/2010/06/lucene-and-fadvisemadvise.html">this blog post</a>
for details.
-Steps to build:
+<p>Steps to build (from the project's root directory):
<ul>
- <li> <code>cd lucene/misc/</code>
-
- <li> To compile NativePosixUtil.cpp -> libNativePosixUtil.so, run<code> ant build-native-unix</code>.
+ <li>Compile both the native library part (<code>libLuceneNativeIO</code>) and Java sources with:
+ <code>./gradlew -p lucene/misc build</code>.</li>
- <li><code>libNativePosixUtil.so</code> will be located in the <code>lucene/build/native/</code> folder
+ <li>The native library will be located in the <code>lucene/misc/native/build/lib/main/release/<i>your-platform</i></code> folder.</li>
- <li> Make sure libNativePosixUtil.so is on your LD_LIBRARY_PATH so java can find it (something like <code>export LD_LIBRARY_PATH=/path/to/dir:$LD_LIBRARY_PATH</code>, where /path/to/dir contains libNativePosixUtil.so)
-
- <li> <code>ant jar</code> to compile the java source and put that JAR on your CLASSPATH
+ <li>On Unix-ish systems, make sure <code>libNativePosixUtil.so</code> is on your
+ <code>LD_LIBRARY_PATH</code> so java can find it (something like <code>export LD_LIBRARY_PATH=/path/to/dir:$LD_LIBRARY_PATH</code>,
+ where /path/to/dir contains <code>libLuceneNativeIO.so</code>).</li>
</ul>
<p>
-NativePosixUtil.cpp/java also expose access to the posix_madvise,
-madvise, posix_fadvise functions, which are somewhat more cross
-platform than O_DIRECT, however, in testing (see above link), these
+The native library exposes access to the <code>posix_madvise</code>,
+<code>madvise</code>, <code>posix_fadvise</code> functions, which are somewhat more cross
+platform than <code>O_DIRECT</code>, however, in testing (see above link), these
APIs did not seem to help prevent buffer cache eviction.
</body>
diff --git a/lucene/misc/src/test/org/apache/lucene/document/TestLazyDocument.java b/lucene/misc/src/test/org/apache/lucene/misc/document/TestLazyDocument.java
similarity index 97%
rename from lucene/misc/src/test/org/apache/lucene/document/TestLazyDocument.java
rename to lucene/misc/src/test/org/apache/lucene/misc/document/TestLazyDocument.java
index e76491e..c59e113 100644
--- a/lucene/misc/src/test/org/apache/lucene/document/TestLazyDocument.java
+++ b/lucene/misc/src/test/org/apache/lucene/misc/document/TestLazyDocument.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.document;
+package org.apache.lucene.misc.document;
import java.io.IOException;
import java.util.Arrays;
@@ -25,6 +25,10 @@
import java.util.Set;
import org.apache.lucene.analysis.*;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.FieldType;
+import org.apache.lucene.document.TextField;
import org.apache.lucene.index.*;
import org.apache.lucene.search.*;
import org.apache.lucene.store.*;
diff --git a/lucene/misc/src/test/org/apache/lucene/index/TestIndexSplitter.java b/lucene/misc/src/test/org/apache/lucene/misc/index/TestIndexSplitter.java
similarity index 89%
rename from lucene/misc/src/test/org/apache/lucene/index/TestIndexSplitter.java
rename to lucene/misc/src/test/org/apache/lucene/misc/index/TestIndexSplitter.java
index bf58e53..448012c 100644
--- a/lucene/misc/src/test/org/apache/lucene/index/TestIndexSplitter.java
+++ b/lucene/misc/src/test/org/apache/lucene/misc/index/TestIndexSplitter.java
@@ -14,13 +14,20 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.index;
+package org.apache.lucene.misc.index;
import java.nio.file.Path;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.DocHelper;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.index.LogByteSizeMergePolicy;
+import org.apache.lucene.index.MergePolicy;
+import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.LuceneTestCase;
@@ -60,7 +67,7 @@
iw.addDocument(doc);
}
iw.commit();
- DirectoryReader iwReader = iw.getReader();
+ DirectoryReader iwReader = DirectoryReader.open(iw);
assertEquals(3, iwReader.leaves().size());
iwReader.close();
iw.close();
diff --git a/lucene/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java b/lucene/misc/src/test/org/apache/lucene/misc/index/TestMultiPassIndexSplitter.java
similarity index 93%
rename from lucene/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java
rename to lucene/misc/src/test/org/apache/lucene/misc/index/TestMultiPassIndexSplitter.java
index 688d7c7..0fa318a 100644
--- a/lucene/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java
+++ b/lucene/misc/src/test/org/apache/lucene/misc/index/TestMultiPassIndexSplitter.java
@@ -14,11 +14,18 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.index;
+package org.apache.lucene.misc.index;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.MultiTerms;
+import org.apache.lucene.index.NoMergePolicy;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
diff --git a/lucene/misc/src/test/org/apache/lucene/index/TestPKIndexSplitter.java b/lucene/misc/src/test/org/apache/lucene/misc/index/TestPKIndexSplitter.java
similarity index 93%
rename from lucene/misc/src/test/org/apache/lucene/index/TestPKIndexSplitter.java
rename to lucene/misc/src/test/org/apache/lucene/misc/index/TestPKIndexSplitter.java
index 01278c1..0fc8f44 100644
--- a/lucene/misc/src/test/org/apache/lucene/index/TestPKIndexSplitter.java
+++ b/lucene/misc/src/test/org/apache/lucene/misc/index/TestPKIndexSplitter.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.index;
+package org.apache.lucene.misc.index;
import java.text.DecimalFormat;
import java.text.DecimalFormatSymbols;
@@ -25,7 +25,13 @@
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.index.MultiBits;
+import org.apache.lucene.index.NoMergePolicy;
+import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.LuceneTestCase;
diff --git a/lucene/misc/src/test/org/apache/lucene/search/TestDiversifiedTopDocsCollector.java b/lucene/misc/src/test/org/apache/lucene/misc/search/TestDiversifiedTopDocsCollector.java
similarity index 96%
rename from lucene/misc/src/test/org/apache/lucene/search/TestDiversifiedTopDocsCollector.java
rename to lucene/misc/src/test/org/apache/lucene/misc/search/TestDiversifiedTopDocsCollector.java
index 03b294d..0786071 100644
--- a/lucene/misc/src/test/org/apache/lucene/search/TestDiversifiedTopDocsCollector.java
+++ b/lucene/misc/src/test/org/apache/lucene/misc/search/TestDiversifiedTopDocsCollector.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.search;
+package org.apache.lucene.misc.search;
import java.io.IOException;
import java.util.HashMap;
@@ -34,7 +34,21 @@
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.Explanation;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.LeafCollector;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.QueryVisitor;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.ScoreMode;
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.Weight;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
diff --git a/lucene/misc/src/test/org/apache/lucene/search/TestDocValuesStatsCollector.java b/lucene/misc/src/test/org/apache/lucene/misc/search/TestDocValuesStatsCollector.java
similarity index 96%
rename from lucene/misc/src/test/org/apache/lucene/search/TestDocValuesStatsCollector.java
rename to lucene/misc/src/test/org/apache/lucene/misc/search/TestDocValuesStatsCollector.java
index aeac785..e88823a 100644
--- a/lucene/misc/src/test/org/apache/lucene/search/TestDocValuesStatsCollector.java
+++ b/lucene/misc/src/test/org/apache/lucene/misc/search/TestDocValuesStatsCollector.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.search;
+package org.apache.lucene.misc.search;
import java.io.IOException;
import java.util.Arrays;
@@ -38,12 +38,16 @@
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
-import org.apache.lucene.search.DocValuesStats.DoubleDocValuesStats;
-import org.apache.lucene.search.DocValuesStats.LongDocValuesStats;
-import org.apache.lucene.search.DocValuesStats.SortedDocValuesStats;
-import org.apache.lucene.search.DocValuesStats.SortedDoubleDocValuesStats;
-import org.apache.lucene.search.DocValuesStats.SortedLongDocValuesStats;
-import org.apache.lucene.search.DocValuesStats.SortedSetDocValuesStats;
+import org.apache.lucene.misc.search.DocValuesStats.DoubleDocValuesStats;
+import org.apache.lucene.misc.search.DocValuesStats.LongDocValuesStats;
+import org.apache.lucene.misc.search.DocValuesStats.SortedDocValuesStats;
+import org.apache.lucene.misc.search.DocValuesStats.SortedDoubleDocValuesStats;
+import org.apache.lucene.misc.search.DocValuesStats.SortedLongDocValuesStats;
+import org.apache.lucene.misc.search.DocValuesStats.SortedSetDocValuesStats;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.MultiCollector;
+import org.apache.lucene.search.TotalHitCountCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
diff --git a/lucene/misc/src/test/org/apache/lucene/search/TestMemoryAccountingBitsetCollector.java b/lucene/misc/src/test/org/apache/lucene/misc/search/TestMemoryAccountingBitsetCollector.java
similarity index 91%
rename from lucene/misc/src/test/org/apache/lucene/search/TestMemoryAccountingBitsetCollector.java
rename to lucene/misc/src/test/org/apache/lucene/misc/search/TestMemoryAccountingBitsetCollector.java
index fdd163c..9a5c4cd 100644
--- a/lucene/misc/src/test/org/apache/lucene/search/TestMemoryAccountingBitsetCollector.java
+++ b/lucene/misc/src/test/org/apache/lucene/misc/search/TestMemoryAccountingBitsetCollector.java
@@ -15,13 +15,17 @@
* limitations under the License.
*/
-package org.apache.lucene.search;
+package org.apache.lucene.misc.search;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.MultiCollector;
+import org.apache.lucene.search.TotalHitCountCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.misc.CollectorMemoryTracker;
diff --git a/lucene/misc/src/test/org/apache/lucene/search/similarity/TestLegacyBM25Similarity.java b/lucene/misc/src/test/org/apache/lucene/misc/search/similarity/TestLegacyBM25Similarity.java
similarity index 98%
rename from lucene/misc/src/test/org/apache/lucene/search/similarity/TestLegacyBM25Similarity.java
rename to lucene/misc/src/test/org/apache/lucene/misc/search/similarity/TestLegacyBM25Similarity.java
index 9ffdc63..9552741 100644
--- a/lucene/misc/src/test/org/apache/lucene/search/similarity/TestLegacyBM25Similarity.java
+++ b/lucene/misc/src/test/org/apache/lucene/misc/search/similarity/TestLegacyBM25Similarity.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.lucene.search.similarity;
+package org.apache.lucene.misc.search.similarity;
import java.util.Random;
diff --git a/lucene/misc/src/test/org/apache/lucene/misc/store/NativeLibEnableRule.java b/lucene/misc/src/test/org/apache/lucene/misc/store/NativeLibEnableRule.java
new file mode 100644
index 0000000..33c4b57
--- /dev/null
+++ b/lucene/misc/src/test/org/apache/lucene/misc/store/NativeLibEnableRule.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.misc.store;
+
+import com.carrotsearch.randomizedtesting.rules.TestRuleAdapter;
+import org.apache.lucene.util.Constants;
+import org.junit.Assume;
+
+import java.util.Set;
+
+public class NativeLibEnableRule extends TestRuleAdapter {
+ enum OperatingSystem {
+ LINUX(Constants.LINUX),
+ WINDOWS(Constants.WINDOWS),
+ SUN_OS(Constants.SUN_OS),
+ MAC(Constants.MAC_OS_X),
+ FREE_BSD(Constants.FREE_BSD);
+
+ public final boolean enabled;
+
+ OperatingSystem(boolean enabled) {
+ this.enabled = enabled;
+ }
+ }
+
+ private final Set<OperatingSystem> runOn;
+
+ public NativeLibEnableRule(Set<OperatingSystem> runOn) {
+ this.runOn = runOn;
+ }
+
+ @Override
+ protected void before() {
+ Assume.assumeTrue("Test ignored (tests.native is false)",
+ Boolean.parseBoolean(System.getProperty("tests.native", "false")));
+
+ Assume.assumeTrue("Test ignored, only applies to architectures: " + runOn,
+ runOn.stream().anyMatch(os -> os.enabled));
+ }
+}
diff --git a/lucene/misc/src/test/org/apache/lucene/misc/store/NativeUnixDirectoryTest.java b/lucene/misc/src/test/org/apache/lucene/misc/store/NativeUnixDirectoryTest.java
new file mode 100644
index 0000000..d275950
--- /dev/null
+++ b/lucene/misc/src/test/org/apache/lucene/misc/store/NativeUnixDirectoryTest.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.misc.store;
+
+import com.carrotsearch.randomizedtesting.LifecycleScope;
+import com.carrotsearch.randomizedtesting.RandomizedTest;
+import org.apache.lucene.store.ByteBuffersDirectory;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.MergeInfo;
+import org.apache.lucene.util.LuceneTestCase;
+import org.junit.Rule;
+import org.junit.rules.TestRule;
+
+import java.io.IOException;
+import java.util.EnumSet;
+
+public class NativeUnixDirectoryTest extends LuceneTestCase {
+ @Rule
+ public static TestRule requiresNative = new NativeLibEnableRule(
+ EnumSet.of(NativeLibEnableRule.OperatingSystem.MAC,
+ NativeLibEnableRule.OperatingSystem.FREE_BSD,
+ NativeLibEnableRule.OperatingSystem.LINUX));
+
+ public void testLibraryLoaded() throws IOException {
+ try (ByteBuffersDirectory ramDir = new ByteBuffersDirectory();
+ Directory dir = new NativeUnixDirectory(RandomizedTest.newTempDir(LifecycleScope.TEST), ramDir)) {
+ MergeInfo mergeInfo = new MergeInfo(1000, Integer.MAX_VALUE, true, 1);
+ dir.createOutput("test", new IOContext(mergeInfo)).close();
+ }
+ }
+}
\ No newline at end of file
diff --git a/lucene/misc/src/test/org/apache/lucene/store/TestHardLinkCopyDirectoryWrapper.java b/lucene/misc/src/test/org/apache/lucene/misc/store/TestHardLinkCopyDirectoryWrapper.java
similarity index 90%
rename from lucene/misc/src/test/org/apache/lucene/store/TestHardLinkCopyDirectoryWrapper.java
rename to lucene/misc/src/test/org/apache/lucene/misc/store/TestHardLinkCopyDirectoryWrapper.java
index 1fc4ccf..5550293 100644
--- a/lucene/misc/src/test/org/apache/lucene/store/TestHardLinkCopyDirectoryWrapper.java
+++ b/lucene/misc/src/test/org/apache/lucene/misc/store/TestHardLinkCopyDirectoryWrapper.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.lucene.store;
+package org.apache.lucene.misc.store;
import java.io.IOException;
import java.net.URI;
@@ -26,8 +26,18 @@
import java.util.Collections;
import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.misc.store.HardlinkCopyDirectoryWrapper;
import org.apache.lucene.mockfile.FilterPath;
import org.apache.lucene.mockfile.WindowsFS;
+import org.apache.lucene.store.BaseDirectoryTestCase;
+import org.apache.lucene.store.ByteBuffersDirectory;
+import org.apache.lucene.store.ChecksumIndexInput;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.NIOFSDirectory;
import org.apache.lucene.util.Constants;
import org.apache.lucene.util.IOUtils;
diff --git a/lucene/misc/src/test/org/apache/lucene/store/TestRAFDirectory.java b/lucene/misc/src/test/org/apache/lucene/misc/store/TestRAFDirectory.java
similarity index 89%
rename from lucene/misc/src/test/org/apache/lucene/store/TestRAFDirectory.java
rename to lucene/misc/src/test/org/apache/lucene/misc/store/TestRAFDirectory.java
index 598c123..f0ca0ec 100644
--- a/lucene/misc/src/test/org/apache/lucene/store/TestRAFDirectory.java
+++ b/lucene/misc/src/test/org/apache/lucene/misc/store/TestRAFDirectory.java
@@ -14,7 +14,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.store;
+package org.apache.lucene.misc.store;
+
+import org.apache.lucene.store.BaseDirectoryTestCase;
+import org.apache.lucene.store.Directory;
import java.io.IOException;
import java.nio.file.Path;
diff --git a/lucene/misc/src/test/org/apache/lucene/misc/store/WindowsDirectoryTest.java b/lucene/misc/src/test/org/apache/lucene/misc/store/WindowsDirectoryTest.java
new file mode 100644
index 0000000..2323c2a
--- /dev/null
+++ b/lucene/misc/src/test/org/apache/lucene/misc/store/WindowsDirectoryTest.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.misc.store;
+
+import com.carrotsearch.randomizedtesting.LifecycleScope;
+import com.carrotsearch.randomizedtesting.RandomizedTest;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.util.LuceneTestCase;
+import org.junit.Rule;
+import org.junit.rules.TestRule;
+
+import java.io.IOException;
+import java.util.EnumSet;
+
+public class WindowsDirectoryTest extends LuceneTestCase {
+ @Rule
+ public static TestRule requiresNative = new NativeLibEnableRule(
+ EnumSet.of(NativeLibEnableRule.OperatingSystem.WINDOWS));
+
+ public void testLibraryLoaded() throws IOException {
+ try (Directory dir = new WindowsDirectory(RandomizedTest.newTempDir(LifecycleScope.TEST))) {
+ dir.createOutput("test", IOContext.DEFAULT).close();
+ }
+ }
+}
\ No newline at end of file
diff --git a/lucene/misc/src/test/org/apache/lucene/util/TestCollectorMemoryTracker.java b/lucene/misc/src/test/org/apache/lucene/misc/util/TestCollectorMemoryTracker.java
similarity index 95%
rename from lucene/misc/src/test/org/apache/lucene/util/TestCollectorMemoryTracker.java
rename to lucene/misc/src/test/org/apache/lucene/misc/util/TestCollectorMemoryTracker.java
index aaa5f6d..402ce04 100644
--- a/lucene/misc/src/test/org/apache/lucene/util/TestCollectorMemoryTracker.java
+++ b/lucene/misc/src/test/org/apache/lucene/misc/util/TestCollectorMemoryTracker.java
@@ -15,9 +15,10 @@
* limitations under the License.
*/
-package org.apache.lucene.util;
+package org.apache.lucene.misc.util;
import org.apache.lucene.misc.CollectorMemoryTracker;
+import org.apache.lucene.util.LuceneTestCase;
public class TestCollectorMemoryTracker extends LuceneTestCase {
public void testAdditionsAndDeletions() {
diff --git a/lucene/misc/src/test/org/apache/lucene/util/fst/TestFSTsMisc.java b/lucene/misc/src/test/org/apache/lucene/misc/util/fst/TestFSTsMisc.java
similarity index 94%
rename from lucene/misc/src/test/org/apache/lucene/util/fst/TestFSTsMisc.java
rename to lucene/misc/src/test/org/apache/lucene/misc/util/fst/TestFSTsMisc.java
index 9911182..ba1944d 100644
--- a/lucene/misc/src/test/org/apache/lucene/util/fst/TestFSTsMisc.java
+++ b/lucene/misc/src/test/org/apache/lucene/misc/util/fst/TestFSTsMisc.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.util.fst;
+package org.apache.lucene.misc.util.fst;
import java.io.IOException;
import java.util.ArrayList;
@@ -30,10 +30,12 @@
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
-import org.apache.lucene.util.fst.UpToTwoPositiveIntOutputs.TwoLongs;
-
-import static org.apache.lucene.util.fst.FSTTester.getRandomString;
-import static org.apache.lucene.util.fst.FSTTester.toIntsRef;
+import org.apache.lucene.misc.util.fst.UpToTwoPositiveIntOutputs.TwoLongs;
+import org.apache.lucene.util.fst.FST;
+import org.apache.lucene.util.fst.FSTCompiler;
+import org.apache.lucene.util.fst.FSTTester;
+import org.apache.lucene.util.fst.PositiveIntOutputs;
+import org.apache.lucene.util.fst.Util;
public class TestFSTsMisc extends LuceneTestCase {
@@ -68,8 +70,8 @@
Set<IntsRef> termsSet = new HashSet<>();
IntsRef[] terms = new IntsRef[numWords];
while(termsSet.size() < numWords) {
- final String term = getRandomString(random);
- termsSet.add(toIntsRef(term, inputMode));
+ final String term = FSTTester.getRandomString(random);
+ termsSet.add(FSTTester.toIntsRef(term, inputMode));
}
doTest(inputMode, termsSet.toArray(new IntsRef[termsSet.size()]));
}
diff --git a/lucene/packaging/build.gradle b/lucene/packaging/build.gradle
index 151e6d5..1c4aa3e 100644
--- a/lucene/packaging/build.gradle
+++ b/lucene/packaging/build.gradle
@@ -32,7 +32,9 @@
":lucene:packaging",
":lucene:documentation",
// Exclude parent container project of analysis modules (no artifacts).
- ":lucene:analysis"
+ ":lucene:analysis",
+ // Exclude native module, which requires manual copying and enabling
+ ":lucene:misc:native"
])
}
diff --git a/lucene/queries/src/java/org/apache/lucene/queries/intervals/IntervalQuery.java b/lucene/queries/src/java/org/apache/lucene/queries/intervals/IntervalQuery.java
index 10bff72..a3c4e5d 100644
--- a/lucene/queries/src/java/org/apache/lucene/queries/intervals/IntervalQuery.java
+++ b/lucene/queries/src/java/org/apache/lucene/queries/intervals/IntervalQuery.java
@@ -99,7 +99,7 @@
private IntervalQuery(String field, IntervalsSource intervalsSource, IntervalScoreFunction scoreFunction) {
Objects.requireNonNull(field, "null field aren't accepted");
Objects.requireNonNull(intervalsSource, "null intervalsSource aren't accepted");
- Objects.requireNonNull(scoreFunction, "null scoreFunction aren't accepted");
+ Objects.requireNonNull(scoreFunction, "null searchStrategy aren't accepted");
this.field = field;
this.intervalsSource = intervalsSource;
this.scoreFunction = scoreFunction;
diff --git a/lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/document/package-info.java
similarity index 70%
copy from lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java
copy to lucene/sandbox/src/java/org/apache/lucene/sandbox/document/package-info.java
index af94ced..c3e3a70 100644
--- a/lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/document/package-info.java
@@ -15,12 +15,11 @@
* limitations under the License.
*/
-package org.apache.lucene.util;
-
/**
- * Tracks dynamic allocations/deallocations of memory for transient objects
+ * This package contains several point types:
+ * <ul>
+ * <li>{@link org.apache.lucene.sandbox.document.BigIntegerPoint BigIntegerPoint} for 128-bit integers</li>
+ * <li>{@link org.apache.lucene.document.LatLonPoint LatLonPoint} for latitude/longitude geospatial points</li>
+ * </ul>
*/
-public interface MemoryTracker {
- void updateBytes(long bytes);
- long getBytes();
-}
+package org.apache.lucene.sandbox.document;
\ No newline at end of file
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/document/package.html b/lucene/sandbox/src/java/org/apache/lucene/sandbox/document/package.html
deleted file mode 100644
index 201bc6b..0000000
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/document/package.html
+++ /dev/null
@@ -1,32 +0,0 @@
-<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<!-- not a package-info.java, because we already defined this package in core/ -->
-
-<html>
-<head>
- <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-</head>
-<body>
-This package contains several point types:
-<ul>
- <li>{@link org.apache.lucene.sandbox.document.BigIntegerPoint BigIntegerPoint} for 128-bit integers</li>
- <li>{@link org.apache.lucene.document.LatLonPoint LatLonPoint} for latitude/longitude geospatial points</li>
-</ul>
-</body>
-</html>
diff --git a/lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/package-info.java
similarity index 79%
copy from lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java
copy to lucene/sandbox/src/java/org/apache/lucene/sandbox/search/package-info.java
index af94ced..c21c742 100644
--- a/lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/package-info.java
@@ -15,12 +15,5 @@
* limitations under the License.
*/
-package org.apache.lucene.util;
-
-/**
- * Tracks dynamic allocations/deallocations of memory for transient objects
- */
-public interface MemoryTracker {
- void updateBytes(long bytes);
- long getBytes();
-}
+/** This package contains a flexible graph-based proximity query, TermAutomatonQuery, and geospatial queries. */
+package org.apache.lucene.sandbox.search;
\ No newline at end of file
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/package.html b/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/package.html
deleted file mode 100644
index e4a0c8e..0000000
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/package.html
+++ /dev/null
@@ -1,28 +0,0 @@
-<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<!-- not a package-info.java, because we already defined this package in core/ -->
-
-<html>
-<head>
- <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-</head>
-<body>
-This package contains a flexible graph-based proximity query, TermAutomatonQuery, and geospatial queries.
-</body>
-</html>
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/DocHelper.java b/lucene/test-framework/src/java/org/apache/lucene/index/DocHelper.java
index 8a7a903..418e5e6 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/DocHelper.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/DocHelper.java
@@ -35,7 +35,8 @@
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.Directory;
-class DocHelper {
+/** Helper functions for tests that handles documents */
+public class DocHelper {
public static final FieldType customType;
public static final String FIELD_1_TEXT = "field one text";
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java b/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java
index 35c3af5..05cd84f 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java
@@ -40,6 +40,7 @@
import org.apache.lucene.codecs.blockterms.LuceneVarGapFixedInterval;
import org.apache.lucene.codecs.blocktreeords.BlockTreeOrdsPostingsFormat;
import org.apache.lucene.codecs.bloom.TestBloomFilteredLucenePostings;
+import org.apache.lucene.codecs.lucene80.Lucene80DocValuesFormat;
import org.apache.lucene.codecs.lucene86.Lucene86PointsReader;
import org.apache.lucene.codecs.lucene86.Lucene86PointsWriter;
import org.apache.lucene.codecs.memory.DirectPostingsFormat;
@@ -209,7 +210,7 @@
addDocValues(avoidCodecs,
TestUtil.getDefaultDocValuesFormat(),
-
+ new Lucene80DocValuesFormat(Lucene80DocValuesFormat.Mode.BEST_COMPRESSION),
new AssertingDocValuesFormat());
Collections.shuffle(formats, random);
diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java b/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java
index e15a59a..1d83913 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java
@@ -33,7 +33,6 @@
import org.apache.lucene.codecs.asserting.AssertingPostingsFormat;
import org.apache.lucene.codecs.cheapbastard.CheapBastardCodec;
import org.apache.lucene.codecs.compressing.CompressingCodec;
-import org.apache.lucene.codecs.lucene87.Lucene87StoredFieldsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90Codec;
import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat;
import org.apache.lucene.codecs.simpletext.SimpleTextCodec;
@@ -188,7 +187,7 @@
} else if ("Compressing".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal == 6 && !shouldAvoidCodec("Compressing"))) {
codec = CompressingCodec.randomInstance(random);
} else if ("Lucene90".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal == 5 && !shouldAvoidCodec("Lucene90"))) {
- codec = new Lucene90Codec(RandomPicks.randomFrom(random, Lucene87StoredFieldsFormat.Mode.values())
+ codec = new Lucene90Codec(RandomPicks.randomFrom(random, Lucene90Codec.Mode.values())
);
} else if (!"random".equals(TEST_CODEC)) {
codec = Codec.forName(TEST_CODEC);
diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/fst/FSTTester.java b/lucene/test-framework/src/java/org/apache/lucene/util/fst/FSTTester.java
index e844a5e..b6a320a 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/util/fst/FSTTester.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/util/fst/FSTTester.java
@@ -95,7 +95,11 @@
return br;
}
- static String getRandomString(Random random) {
+ /**
+ * [LUCENE-9600] This was made public because a misc module test depends on it.
+ * It is not recommended for generic usecase; consider {@link com.carrotsearch.randomizedtesting.generators.RandomStrings} to generate random strings.
+ */
+ public static String getRandomString(Random random) {
final String term;
if (random.nextBoolean()) {
term = TestUtil.randomRealisticUnicodeString(random);
@@ -121,7 +125,7 @@
return new String(buffer, 0, end);
}
- static IntsRef toIntsRef(String s, int inputMode) {
+ public static IntsRef toIntsRef(String s, int inputMode) {
return toIntsRef(s, inputMode, new IntsRefBuilder());
}
diff --git a/lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java b/lucene/test-framework/src/test/org/apache/lucene/util/TestPleaseFail.java
similarity index 61%
copy from lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java
copy to lucene/test-framework/src/test/org/apache/lucene/util/TestPleaseFail.java
index af94ced..b019875 100644
--- a/lucene/misc/src/java/org/apache/lucene/util/MemoryTracker.java
+++ b/lucene/test-framework/src/test/org/apache/lucene/util/TestPleaseFail.java
@@ -14,13 +14,23 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.lucene.util;
+import org.junit.Assert;
+
/**
- * Tracks dynamic allocations/deallocations of memory for transient objects
+ * This test is for manually causing assertion failures or errors (to
+ * trigger the event for occasional needs of testing the test framework
+ * itself from command line).
*/
-public interface MemoryTracker {
- void updateBytes(long bytes);
- long getBytes();
+public class TestPleaseFail extends LuceneTestCase {
+ public void testFail() {
+ Assert.assertNull("(intentional failure)", System.getProperty("please.fail"));
+ }
+
+ public void testError() {
+ if (System.getProperty("please.fail") != null) {
+ throw new RuntimeException("(intentional error)");
+ }
+ }
}
diff --git a/settings.gradle b/settings.gradle
index 61cf671..be2c09c 100644
--- a/settings.gradle
+++ b/settings.gradle
@@ -42,6 +42,7 @@
include "lucene:luke"
include "lucene:memory"
include "lucene:misc"
+include "lucene:misc:native"
include "lucene:monitor"
include "lucene:queries"
include "lucene:queryparser"
@@ -73,4 +74,4 @@
include "solr:documentation"
include "solr:packaging"
include "solr:docker"
-include "solr:docker:package"
\ No newline at end of file
+include "solr:docker:package"
diff --git a/solr/core/src/java/org/apache/solr/cloud/CloudUtil.java b/solr/core/src/java/org/apache/solr/cloud/CloudUtil.java
index fc20731..eefee37 100644
--- a/solr/core/src/java/org/apache/solr/cloud/CloudUtil.java
+++ b/solr/core/src/java/org/apache/solr/cloud/CloudUtil.java
@@ -29,6 +29,7 @@
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicReference;
+import java.util.stream.Collectors;
import org.apache.commons.io.FileUtils;
import org.apache.solr.client.solrj.cloud.SolrCloudManager;
@@ -81,7 +82,7 @@
if (thisCnn != null && thisCnn.equals(cnn)
&& !thisBaseUrl.equals(baseUrl)) {
- if (cc.getLoadedCoreNames().contains(desc.getName())) {
+ if (cc.isLoaded(desc.getName())) {
cc.unload(desc.getName());
}
@@ -285,5 +286,15 @@
};
}
-
+ /**
+ * Builds a string with sorted {@link CoreContainer#getLoadedCoreNames()} while truncating to the first 20 cores.
+ */
+ static String getLoadedCoreNamesAsString(CoreContainer coreContainer) {
+ List<String> loadedCoreNames = coreContainer.getLoadedCoreNames();
+ if (loadedCoreNames.size() <= 20) {
+ loadedCoreNames.sort(null);
+ }
+ return loadedCoreNames.stream().limit(20).collect(Collectors.toList())
+ + (loadedCoreNames.size() > 20 ? "...(truncated from " + loadedCoreNames.size() + " cores)" : "");
+ }
}
diff --git a/solr/core/src/java/org/apache/solr/cloud/ReplicateFromLeader.java b/solr/core/src/java/org/apache/solr/cloud/ReplicateFromLeader.java
index 1fa86c0..7b18ffd 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ReplicateFromLeader.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ReplicateFromLeader.java
@@ -69,7 +69,8 @@
if (cc.isShutDown()) {
return;
} else {
- throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "SolrCore not found:" + coreName + " in " + cc.getLoadedCoreNames());
+ throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "SolrCore not found:" + coreName + " in "
+ + CloudUtil.getLoadedCoreNamesAsString(cc));
}
}
SolrConfig.UpdateHandlerInfo uinfo = core.getSolrConfig().getUpdateHandlerInfo();
diff --git a/solr/core/src/java/org/apache/solr/cloud/ShardLeaderElectionContext.java b/solr/core/src/java/org/apache/solr/cloud/ShardLeaderElectionContext.java
index f6c96ca..69ccf75 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ShardLeaderElectionContext.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ShardLeaderElectionContext.java
@@ -288,7 +288,7 @@
if (core == null) {
if (log.isDebugEnabled()) {
- log.debug("SolrCore not found: {} in {}", coreName, cc.getLoadedCoreNames());
+ log.debug("SolrCore not found: {} in {}", coreName, CloudUtil.getLoadedCoreNamesAsString(cc));
}
return;
}
diff --git a/solr/core/src/java/org/apache/solr/cluster/events/impl/DefaultClusterEventProducer.java b/solr/core/src/java/org/apache/solr/cluster/events/impl/DefaultClusterEventProducer.java
index e24fb40..b91ffef 100644
--- a/solr/core/src/java/org/apache/solr/cluster/events/impl/DefaultClusterEventProducer.java
+++ b/solr/core/src/java/org/apache/solr/cluster/events/impl/DefaultClusterEventProducer.java
@@ -43,7 +43,7 @@
/**
* Implementation of {@link ClusterEventProducer}.
- * <h3>Implementation notes</h3>
+ * <h2>Implementation notes</h2>
* <p>For each cluster event relevant listeners are always invoked sequentially
* (not in parallel) and in arbitrary order. This means that if any listener blocks the
* processing other listeners may be invoked much later or not at all.</p>
diff --git a/solr/core/src/java/org/apache/solr/core/CoreContainer.java b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
index 831f81e..7331cef 100644
--- a/solr/core/src/java/org/apache/solr/core/CoreContainer.java
+++ b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
@@ -26,7 +26,6 @@
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
-import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
@@ -784,18 +783,18 @@
// initialize gauges for reporting the number of cores and disk total/free
- solrMetricsContext.gauge(() -> solrCores.getCores().size(),
+ solrMetricsContext.gauge(solrCores::getNumLoadedPermanentCores,
true, "loaded", SolrInfoBean.Category.CONTAINER.toString(), "cores");
- solrMetricsContext.gauge(() -> solrCores.getLoadedCoreNames().size() - solrCores.getCores().size(),
+ solrMetricsContext.gauge(solrCores::getNumLoadedTransientCores,
true, "lazy", SolrInfoBean.Category.CONTAINER.toString(), "cores");
- solrMetricsContext.gauge(() -> solrCores.getAllCoreNames().size() - solrCores.getLoadedCoreNames().size(),
+ solrMetricsContext.gauge(solrCores::getNumUnloadedCores,
true, "unloaded", SolrInfoBean.Category.CONTAINER.toString(), "cores");
Path dataHome = cfg.getSolrDataHome() != null ? cfg.getSolrDataHome() : cfg.getCoreRootDirectory();
solrMetricsContext.gauge(() -> dataHome.toFile().getTotalSpace(),
true, "totalSpace", SolrInfoBean.Category.CONTAINER.toString(), "fs");
solrMetricsContext.gauge(() -> dataHome.toFile().getUsableSpace(),
true, "usableSpace", SolrInfoBean.Category.CONTAINER.toString(), "fs");
- solrMetricsContext.gauge(() -> dataHome.toString(),
+ solrMetricsContext.gauge(dataHome::toString,
true, "path", SolrInfoBean.Category.CONTAINER.toString(), "fs");
solrMetricsContext.gauge(() -> cfg.getCoreRootDirectory().toFile().getTotalSpace(),
true, "totalSpace", SolrInfoBean.Category.CONTAINER.toString(), "fs", "coreRoot");
@@ -1280,13 +1279,13 @@
} else {
String msg = "Already creating a core with name '" + coreName + "', call aborted '";
log.warn(msg);
- throw new SolrException(ErrorCode.SERVER_ERROR, msg);
+ throw new SolrException(ErrorCode.CONFLICT, msg);
}
}
CoreDescriptor cd = new CoreDescriptor(coreName, instancePath, parameters, getContainerProperties(), getZkController());
// Since the core descriptor is removed when a core is unloaded, it should never be anywhere when a core is created.
- if (getAllCoreNames().contains(coreName)) {
+ if (getCoreDescriptor(coreName) != null) {
log.warn("Creating a core with existing name is not allowed: '{}'", coreName);
// TODO: Shouldn't this be a BAD_REQUEST?
throw new SolrException(ErrorCode.SERVER_ERROR, "Core with name '" + coreName + "' already exists.");
@@ -1568,34 +1567,50 @@
}
/**
- * @return a Collection of registered SolrCores
+ * Gets the permanent (non-transient) cores that are currently loaded.
+ *
+ * @return An unsorted list. This list is a new copy, it can be modified by the caller (e.g. it can be sorted).
*/
- public Collection<SolrCore> getCores() {
+ public List<SolrCore> getCores() {
return solrCores.getCores();
}
/**
- * Gets the cores that are currently loaded, i.e. cores that have
+ * Gets the permanent and transient cores that are currently loaded, i.e. cores that have
* 1: loadOnStartup=true and are either not-transient or, if transient, have been loaded and have not been aged out
* 2: loadOnStartup=false and have been loaded but are either non-transient or have not been aged out.
* <p>
* Put another way, this will not return any names of cores that are lazily loaded but have not been called for yet
* or are transient and either not loaded or have been swapped out.
+ * <p>
+ * For efficiency, prefer to check {@link #isLoaded(String)} instead of {@link #getLoadedCoreNames()}.contains(coreName).
+ *
+ * @return An unsorted list. This list is a new copy, it can be modified by the caller (e.g. it can be sorted).
*/
- public Collection<String> getLoadedCoreNames() {
+ public List<String> getLoadedCoreNames() {
return solrCores.getLoadedCoreNames();
}
/**
- * get a list of all the cores that are currently known, whether currently loaded or not
+ * Gets a collection of all the cores, permanent and transient, that are currently known, whether they are loaded or not.
+ * <p>
+ * For efficiency, prefer to check {@link #getCoreDescriptor(String)} != null instead of {@link #getAllCoreNames()}.contains(coreName).
*
- * @return a list of all the available core names in either permanent or transient cores
+ * @return An unsorted list. This list is a new copy, it can be modified by the caller (e.g. it can be sorted).
*/
- public Collection<String> getAllCoreNames() {
+ public List<String> getAllCoreNames() {
return solrCores.getAllCoreNames();
}
/**
+ * Gets the total number of cores, including permanent and transient cores, loaded and unloaded cores.
+ * Faster equivalent for {@link #getAllCoreNames()}.size().
+ */
+ public int getNumAllCores() {
+ return solrCores.getNumAllCores();
+ }
+
+ /**
* Returns an immutable Map of Exceptions that occurred when initializing
* SolrCores (either at startup, or do to runtime requests to create cores)
* keyed off of the name (String) of the SolrCore that had the Exception
diff --git a/solr/core/src/java/org/apache/solr/core/MetricsConfig.java b/solr/core/src/java/org/apache/solr/core/MetricsConfig.java
index cdaa56d..17449fd 100644
--- a/solr/core/src/java/org/apache/solr/core/MetricsConfig.java
+++ b/solr/core/src/java/org/apache/solr/core/MetricsConfig.java
@@ -32,13 +32,18 @@
private final PluginInfo timerSupplier;
private final PluginInfo histogramSupplier;
private final PluginInfo historyHandler;
+ private final Object nullNumber;
+ private final Object notANumber;
+ private final Object nullString;
+ private final Object nullObject;
private final boolean enabled;
private MetricsConfig(boolean enabled,
PluginInfo[] metricReporters, Set<String> hiddenSysProps,
PluginInfo counterSupplier, PluginInfo meterSupplier,
PluginInfo timerSupplier, PluginInfo histogramSupplier,
- PluginInfo historyHandler) {
+ PluginInfo historyHandler,
+ Object nullNumber, Object notANumber, Object nullString, Object nullObject) {
this.enabled = enabled;
this.metricReporters = metricReporters;
this.hiddenSysProps = hiddenSysProps;
@@ -47,6 +52,10 @@
this.timerSupplier = timerSupplier;
this.histogramSupplier = histogramSupplier;
this.historyHandler = historyHandler;
+ this.nullNumber = nullNumber;
+ this.notANumber = notANumber;
+ this.nullString = nullString;
+ this.nullObject = nullObject;
}
public boolean isEnabled() {
@@ -63,6 +72,22 @@
}
}
+ public Object getNullNumber() {
+ return nullNumber;
+ }
+
+ public Object getNotANumber() {
+ return notANumber;
+ }
+
+ public Object getNullString() {
+ return nullString;
+ }
+
+ public Object getNullObject() {
+ return nullObject;
+ }
+
public Set<String> getHiddenSysProps() {
if (enabled) {
return hiddenSysProps;
@@ -127,6 +152,10 @@
private PluginInfo timerSupplier;
private PluginInfo histogramSupplier;
private PluginInfo historyHandler;
+ private Object nullNumber = null;
+ private Object notANumber = null;
+ private Object nullString = null;
+ private Object nullObject = null;
// default to metrics enabled
private boolean enabled = true;
@@ -177,9 +206,30 @@
return this;
}
+ public MetricsConfigBuilder setNullNumber(Object nullNumber) {
+ this.nullNumber = nullNumber;
+ return this;
+ }
+
+ public MetricsConfigBuilder setNotANumber(Object notANumber) {
+ this.notANumber = notANumber;
+ return this;
+ }
+
+ public MetricsConfigBuilder setNullString(Object nullString) {
+ this.nullString = nullString;
+ return this;
+ }
+
+ public MetricsConfigBuilder setNullObject(Object nullObject) {
+ this.nullObject = nullObject;
+ return this;
+ }
+
public MetricsConfig build() {
return new MetricsConfig(enabled, metricReporterPlugins, hiddenSysProps, counterSupplier, meterSupplier,
- timerSupplier, histogramSupplier, historyHandler);
+ timerSupplier, histogramSupplier, historyHandler,
+ nullNumber, notANumber, nullString, nullObject);
}
}
diff --git a/solr/core/src/java/org/apache/solr/core/SchemaCodecFactory.java b/solr/core/src/java/org/apache/solr/core/SchemaCodecFactory.java
index ce0df02..a7668a2 100644
--- a/solr/core/src/java/org/apache/solr/core/SchemaCodecFactory.java
+++ b/solr/core/src/java/org/apache/solr/core/SchemaCodecFactory.java
@@ -20,11 +20,11 @@
import java.util.Arrays;
import java.util.Locale;
-import org.apache.lucene.backward_codecs.lucene87.Lucene87Codec;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.PostingsFormat;
-import org.apache.lucene.codecs.lucene87.Lucene87StoredFieldsFormat.Mode;
+import org.apache.lucene.codecs.lucene90.Lucene90Codec;
+import org.apache.lucene.codecs.lucene90.Lucene90Codec.Mode;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.util.NamedList;
@@ -92,7 +92,7 @@
compressionMode = SOLR_DEFAULT_COMPRESSION_MODE;
log.debug("Using default compressionMode: {}", compressionMode);
}
- codec = new Lucene87Codec(compressionMode) {
+ codec = new Lucene90Codec(compressionMode) {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
final SchemaField schemaField = core.getLatestSchema().getFieldOrNull(field);
diff --git a/solr/core/src/java/org/apache/solr/core/SolrCore.java b/solr/core/src/java/org/apache/solr/core/SolrCore.java
index e3489d0..a388bf3 100644
--- a/solr/core/src/java/org/apache/solr/core/SolrCore.java
+++ b/solr/core/src/java/org/apache/solr/core/SolrCore.java
@@ -1195,13 +1195,13 @@
newSearcherMaxReachedCounter = parentContext.counter("maxReached", Category.SEARCHER.toString(), "new");
newSearcherOtherErrorsCounter = parentContext.counter("errors", Category.SEARCHER.toString(), "new");
- parentContext.gauge(() -> name == null ? "(null)" : name, true, "coreName", Category.CORE.toString());
+ parentContext.gauge(() -> name == null ? parentContext.nullString() : name, true, "coreName", Category.CORE.toString());
parentContext.gauge(() -> startTime, true, "startTime", Category.CORE.toString());
parentContext.gauge(() -> getOpenCount(), true, "refCount", Category.CORE.toString());
parentContext.gauge(() -> getInstancePath().toString(), true, "instanceDir", Category.CORE.toString());
- parentContext.gauge(() -> isClosed() ? "(closed)" : getIndexDir(), true, "indexDir", Category.CORE.toString());
- parentContext.gauge(() -> isClosed() ? 0 : getIndexSize(), true, "sizeInBytes", Category.INDEX.toString());
- parentContext.gauge(() -> isClosed() ? "(closed)" : NumberUtils.readableSize(getIndexSize()), true, "size", Category.INDEX.toString());
+ parentContext.gauge(() -> isClosed() ? parentContext.nullString() : getIndexDir(), true, "indexDir", Category.CORE.toString());
+ parentContext.gauge(() -> isClosed() ? parentContext.nullNumber() : getIndexSize(), true, "sizeInBytes", Category.INDEX.toString());
+ parentContext.gauge(() -> isClosed() ? parentContext.nullString() : NumberUtils.readableSize(getIndexSize()), true, "size", Category.INDEX.toString());
if (coreContainer != null) {
final CloudDescriptor cd = getCoreDescriptor().getCloudDescriptor();
if (cd != null) {
@@ -1209,7 +1209,7 @@
if (cd.getCollectionName() != null) {
return cd.getCollectionName();
} else {
- return "_notset_";
+ return parentContext.nullString();
}
}, true, "collection", Category.CORE.toString());
@@ -1217,7 +1217,7 @@
if (cd.getShardId() != null) {
return cd.getShardId();
} else {
- return "_auto_";
+ return parentContext.nullString();
}
}, true, "shard", Category.CORE.toString());
}
diff --git a/solr/core/src/java/org/apache/solr/core/SolrCores.java b/solr/core/src/java/org/apache/solr/core/SolrCores.java
index 574bba0..4291b2e 100644
--- a/solr/core/src/java/org/apache/solr/core/SolrCores.java
+++ b/solr/core/src/java/org/apache/solr/core/SolrCores.java
@@ -16,37 +16,26 @@
*/
package org.apache.solr.core;
-import com.google.common.collect.Lists;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.util.ExecutorUtil;
-import org.apache.solr.logging.MDCLoggingContext;
import org.apache.solr.common.util.SolrNamedThreadFactory;
+import org.apache.solr.logging.MDCLoggingContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.lang.invoke.MethodHandles;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.TreeSet;
-import java.util.UUID;
+import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
-
class SolrCores {
private static Object modifyLock = new Object(); // for locking around manipulating any of the core maps.
private final Map<String, SolrCore> cores = new LinkedHashMap<>(); // For "permanent" cores
// These descriptors, once loaded, will _not_ be unloaded, i.e. they are not "transient".
- private final Map<String, CoreDescriptor> residentDesciptors = new LinkedHashMap<>();
+ private final Map<String, CoreDescriptor> residentDescriptors = new LinkedHashMap<>();
private final CoreContainer container;
@@ -62,10 +51,8 @@
// to essentially queue them up to be handled via pendingCoreOps.
private static final List<SolrCore> pendingCloses = new ArrayList<>();
- private TransientSolrCoreCacheFactory transientCoreCache;
+ private TransientSolrCoreCacheFactory transientSolrCoreCacheFactory;
- private TransientSolrCoreCache transientSolrCoreCache = null;
-
SolrCores(CoreContainer container) {
this.container = container;
}
@@ -73,13 +60,9 @@
protected void addCoreDescriptor(CoreDescriptor p) {
synchronized (modifyLock) {
if (p.isTransient()) {
- if (getTransientCacheHandler() != null) {
- getTransientCacheHandler().addTransientDescriptor(p.getName(), p);
- } else {
- log.warn("We encountered a core marked as transient, but there is no transient handler defined. This core will be inaccessible");
- }
+ getTransientCacheHandler().addTransientDescriptor(p.getName(), p);
} else {
- residentDesciptors.put(p.getName(), p);
+ residentDescriptors.put(p.getName(), p);
}
}
}
@@ -87,29 +70,30 @@
protected void removeCoreDescriptor(CoreDescriptor p) {
synchronized (modifyLock) {
if (p.isTransient()) {
- if (getTransientCacheHandler() != null) {
- getTransientCacheHandler().removeTransientDescriptor(p.getName());
- }
+ getTransientCacheHandler().removeTransientDescriptor(p.getName());
} else {
- residentDesciptors.remove(p.getName());
+ residentDescriptors.remove(p.getName());
}
}
}
public void load(SolrResourceLoader loader) {
- transientCoreCache = TransientSolrCoreCacheFactory.newInstance(loader, container);
+ synchronized (modifyLock) {
+ transientSolrCoreCacheFactory = TransientSolrCoreCacheFactory.newInstance(loader, container);
+ }
}
+
// We are shutting down. You can't hold the lock on the various lists of cores while they shut down, so we need to
// make a temporary copy of the names and shut them down outside the lock.
protected void close() {
waitForLoadingCoresToFinish(30*1000);
Collection<SolrCore> coreList = new ArrayList<>();
-
- TransientSolrCoreCache transientSolrCoreCache = getTransientCacheHandler();
- // Release observer
- if (transientSolrCoreCache != null) {
- transientSolrCoreCache.close();
+ // Release transient core cache.
+ synchronized (modifyLock) {
+ if (transientSolrCoreCacheFactory != null) {
+ getTransientCacheHandler().close();
+ }
}
// It might be possible for one of the cores to move from one list to another while we're closing them. So
@@ -121,8 +105,8 @@
// make a copy of the cores then clear the map so the core isn't handed out to a request again
coreList.addAll(cores.values());
cores.clear();
- if (transientSolrCoreCache != null) {
- coreList.addAll(transientSolrCoreCache.prepareForShutdown());
+ if (transientSolrCoreCacheFactory != null) {
+ coreList.addAll(getTransientCacheHandler().prepareForShutdown());
}
coreList.addAll(pendingCloses);
@@ -162,32 +146,28 @@
addCoreDescriptor(cd); // cd must always be registered if we register a core
if (cd.isTransient()) {
- if (getTransientCacheHandler() != null) {
- return getTransientCacheHandler().addCore(cd.getName(), core);
- }
+ return getTransientCacheHandler().addCore(cd.getName(), core);
} else {
return cores.put(cd.getName(), core);
}
}
- return null;
}
/**
+ * @return A list of "permanent" cores, i.e. cores that may not be swapped out and are currently loaded.
*
- * @return A list of "permanent" cores, i.e. cores that may not be swapped out and are currently loaded.
- *
* A core may be non-transient but still lazily loaded. If it is "permanent" and lazy-load _and_
* not yet loaded it will _not_ be returned by this call.
- *
- * Note: This is one of the places where SolrCloud is incompatible with Transient Cores. This call is used in
+ *
+ * This list is a new copy, it can be modified by the caller (e.g. it can be sorted).
+ *
+ * Note: This is one of the places where SolrCloud is incompatible with Transient Cores. This call is used in
* cancelRecoveries, transient cores don't participate.
*/
-
List<SolrCore> getCores() {
synchronized (modifyLock) {
- List<SolrCore> lst = new ArrayList<>(cores.values());
- return lst;
+ return new ArrayList<>(cores.values());
}
}
@@ -198,35 +178,86 @@
*
* Put another way, this will not return any names of cores that are lazily loaded but have not been called for yet
* or are transient and either not loaded or have been swapped out.
- *
- * @return List of currently loaded cores.
- */
- Set<String> getLoadedCoreNames() {
- Set<String> set;
-
- synchronized (modifyLock) {
- set = new TreeSet<>(cores.keySet());
- if (getTransientCacheHandler() != null) {
- set.addAll(getTransientCacheHandler().getLoadedCoreNames());
- }
- }
- return set;
- }
- /**
- * Gets a list of all cores, loaded and unloaded
*
- * @return all cores names, whether loaded or unloaded, transient or permanent.
+ * @return An unsorted list. This list is a new copy, it can be modified by the caller (e.g. it can be sorted).
*/
- public Collection<String> getAllCoreNames() {
- Set<String> set;
+ List<String> getLoadedCoreNames() {
synchronized (modifyLock) {
- set = new TreeSet<>(cores.keySet());
- if (getTransientCacheHandler() != null) {
- set.addAll(getTransientCacheHandler().getAllCoreNames());
- }
- set.addAll(residentDesciptors.keySet());
+ return distinctSetsUnion(cores.keySet(), getTransientCacheHandler().getLoadedCoreNames());
}
- return set;
+ }
+
+ /**
+ * Gets a collection of all cores names, loaded and unloaded.
+ * For efficiency, prefer to check {@link #getCoreDescriptor(String)} != null instead of {@link #getAllCoreNames()}.contains(String)
+ *
+ * @return An unsorted list. This list is a new copy, it can be modified by the caller (e.g. it can be sorted).
+ */
+ public List<String> getAllCoreNames() {
+ synchronized (modifyLock) {
+ return distinctSetsUnion(residentDescriptors.keySet(), getTransientCacheHandler().getAllCoreNames());
+ }
+ }
+
+ /**
+ * Makes the union of two distinct sets.
+ *
+ * @return An unsorted list. This list is a new copy, it can be modified by the caller (e.g. it can be sorted).
+ */
+ private static <T> List<T> distinctSetsUnion(Set<T> set1, Set<T> set2) {
+ assert areSetsDistinct(set1, set2);
+ List<T> union = new ArrayList<>(set1.size() + set2.size());
+ union.addAll(set1);
+ union.addAll(set2);
+ return union;
+ }
+
+ /**
+ * Indicates whether two sets are distinct (intersection is empty).
+ */
+ private static <T> boolean areSetsDistinct(Set<T> set1, Set<T> set2) {
+ return set1.stream().noneMatch(set2::contains);
+ }
+
+ /**
+ * Gets the number of currently loaded permanent (non transient) cores.
+ * Faster equivalent for {@link #getCores()}.size().
+ */
+ int getNumLoadedPermanentCores() {
+ synchronized (modifyLock) {
+ return cores.size();
+ }
+ }
+
+ /**
+ * Gets the number of currently loaded transient cores.
+ */
+ int getNumLoadedTransientCores() {
+ synchronized (modifyLock) {
+ return getTransientCacheHandler().getLoadedCoreNames().size();
+ }
+ }
+
+ /**
+ * Gets the number of unloaded cores, including permanent and transient cores.
+ */
+ int getNumUnloadedCores() {
+ synchronized (modifyLock) {
+ assert areSetsDistinct(residentDescriptors.keySet(), getTransientCacheHandler().getAllCoreNames());
+ return getTransientCacheHandler().getAllCoreNames().size() - getTransientCacheHandler().getLoadedCoreNames().size()
+ + residentDescriptors.size() - cores.size();
+ }
+ }
+
+ /**
+ * Gets the total number of cores, including permanent and transient cores, loaded and unloaded cores.
+ * Faster equivalent for {@link #getAllCoreNames()}.size().
+ */
+ public int getNumAllCores() {
+ synchronized (modifyLock) {
+ assert areSetsDistinct(residentDescriptors.keySet(), getTransientCacheHandler().getAllCoreNames());
+ return residentDescriptors.size() + getTransientCacheHandler().getAllCoreNames().size();
+ }
}
SolrCore getCore(String name) {
@@ -276,9 +307,8 @@
SolrCore ret = cores.remove(name);
// It could have been a newly-created core. It could have been a transient core. The newly-created cores
// in particular should be checked. It could have been a dynamic core.
- TransientSolrCoreCache transientHandler = getTransientCacheHandler();
- if (ret == null && transientHandler != null) {
- ret = transientHandler.removeCore(name);
+ if (ret == null) {
+ ret = getTransientCacheHandler().removeCore(name);
}
return ret;
}
@@ -292,7 +322,7 @@
synchronized (modifyLock) {
SolrCore core = cores.get(name);
- if (core == null && getTransientCacheHandler() != null) {
+ if (core == null) {
core = getTransientCacheHandler().getCore(name);
}
if(core != null && coreId != null && coreId != core.uniqueId) return null;
@@ -314,7 +344,7 @@
if (cores.containsKey(name)) {
return true;
}
- if (getTransientCacheHandler() != null && getTransientCacheHandler().containsCore(name)) {
+ if (getTransientCacheHandler().containsCore(name)) {
// Check pending
for (SolrCore core : pendingCloses) {
if (core.getName().equals(name)) {
@@ -330,22 +360,14 @@
protected boolean isLoaded(String name) {
synchronized (modifyLock) {
- if (cores.containsKey(name)) {
- return true;
- }
- if (getTransientCacheHandler() != null && getTransientCacheHandler().containsCore(name)) {
- return true;
- }
+ return cores.containsKey(name) || getTransientCacheHandler().containsCore(name);
}
- return false;
-
}
protected CoreDescriptor getUnloadedCoreDescriptor(String cname) {
synchronized (modifyLock) {
- CoreDescriptor desc = residentDesciptors.get(cname);
+ CoreDescriptor desc = residentDescriptors.get(cname);
if (desc == null) {
- if (getTransientCacheHandler() == null) return null;
desc = getTransientCacheHandler().getTransientDescriptor(cname);
if (desc == null) {
return null;
@@ -432,28 +454,27 @@
*/
public CoreDescriptor getCoreDescriptor(String coreName) {
synchronized (modifyLock) {
- if (residentDesciptors.containsKey(coreName))
- return residentDesciptors.get(coreName);
+ CoreDescriptor coreDescriptor = residentDescriptors.get(coreName);
+ if (coreDescriptor != null) {
+ return coreDescriptor;
+ }
return getTransientCacheHandler().getTransientDescriptor(coreName);
}
}
/**
- * Get the CoreDescriptors for every SolrCore managed here
- * @return a List of CoreDescriptors
+ * Get the CoreDescriptors for every {@link SolrCore} managed here (permanent and transient, loaded and unloaded).
+ *
+ * @return An unordered list copy. This list can be modified by the caller (e.g. sorted).
*/
public List<CoreDescriptor> getCoreDescriptors() {
- List<CoreDescriptor> cds = Lists.newArrayList();
synchronized (modifyLock) {
- for (String coreName : getAllCoreNames()) {
- // TODO: This null check is a bit suspicious - it seems that
- // getAllCoreNames might return deleted cores as well?
- CoreDescriptor cd = getCoreDescriptor(coreName);
- if (cd != null)
- cds.add(cd);
- }
+ Collection<CoreDescriptor> transientCoreDescriptors = getTransientCacheHandler().getTransientDescriptors();
+ List<CoreDescriptor> coreDescriptors = new ArrayList<>(residentDescriptors.size() + transientCoreDescriptors.size());
+ coreDescriptors.addAll(residentDescriptors.values());
+ coreDescriptors.addAll(transientCoreDescriptors);
+ return coreDescriptors;
}
- return cds;
}
// cores marked as loading will block on getCore
@@ -509,10 +530,7 @@
}
public boolean isCoreLoading(String name) {
- if (currentlyLoadingCores.contains(name)) {
- return true;
- }
- return false;
+ return currentlyLoadingCores.contains(name);
}
public void queueCoreToClose(SolrCore coreToClose) {
@@ -522,14 +540,16 @@
}
}
+ /**
+ * @return the cache holding the transient cores; never null.
+ */
public TransientSolrCoreCache getTransientCacheHandler() {
-
- if (transientCoreCache == null) {
- log.error("No transient handler has been defined. Check solr.xml to see if an attempt to provide a custom {}"
- , "TransientSolrCoreCacheFactory was done incorrectly since the default should have been used otherwise.");
- return null;
+ synchronized (modifyLock) {
+ if (transientSolrCoreCacheFactory == null) {
+ throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, getClass().getName() + " not loaded; call load() before using it");
+ }
+ return transientSolrCoreCacheFactory.getTransientSolrCoreCache();
}
- return transientCoreCache.getTransientSolrCoreCache();
}
}
diff --git a/solr/core/src/java/org/apache/solr/core/SolrXmlConfig.java b/solr/core/src/java/org/apache/solr/core/SolrXmlConfig.java
index e926e6d..6952295 100644
--- a/solr/core/src/java/org/apache/solr/core/SolrXmlConfig.java
+++ b/solr/core/src/java/org/apache/solr/core/SolrXmlConfig.java
@@ -45,6 +45,7 @@
import org.apache.solr.common.util.DOMUtil;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.PropertiesUtil;
+import org.apache.solr.common.util.Utils;
import org.apache.solr.logging.LogWatcherConfig;
import org.apache.solr.metrics.reporters.SolrJmxReporter;
import org.apache.solr.update.UpdateShardHandlerConfig;
@@ -531,6 +532,15 @@
if (node != null) {
builder = builder.setHistoryHandler(new PluginInfo(node, "history", false, false));
}
+ node = config.getNode("solr/metrics/missingValues", false);;
+ if (node != null) {
+ NamedList<Object> missingValues = DOMUtil.childNodesToNamedList(node);
+ builder.setNullNumber(decodeNullValue(missingValues.get("nullNumber")));
+ builder.setNotANumber(decodeNullValue(missingValues.get("notANumber")));
+ builder.setNullString(decodeNullValue(missingValues.get("nullString")));
+ builder.setNullObject(decodeNullValue(missingValues.get("nullObject")));
+ }
+
PluginInfo[] reporterPlugins = getMetricReporterPluginInfos(config);
Set<String> hiddenSysProps = getHiddenSysProps(config);
return builder
@@ -539,6 +549,20 @@
.build();
}
+ private static Object decodeNullValue(Object o) {
+ if (o instanceof String) { // check if it's a JSON object
+ String str = (String) o;
+ if (!str.isBlank() && (str.startsWith("{") || str.startsWith("["))) {
+ try {
+ o = Utils.fromJSONString((String) o);
+ } catch (Exception e) {
+ // ignore
+ }
+ }
+ }
+ return o;
+ }
+
private static PluginInfo[] getMetricReporterPluginInfos(XmlConfigFile config) {
NodeList nodes = (NodeList) config.evaluate("solr/metrics/reporter", XPathConstants.NODESET);
List<PluginInfo> configs = new ArrayList<>();
diff --git a/solr/core/src/java/org/apache/solr/core/TransientSolrCoreCache.java b/solr/core/src/java/org/apache/solr/core/TransientSolrCoreCache.java
index 0947bd7..77d5a65 100644
--- a/solr/core/src/java/org/apache/solr/core/TransientSolrCoreCache.java
+++ b/solr/core/src/java/org/apache/solr/core/TransientSolrCoreCache.java
@@ -97,6 +97,10 @@
// method and return the current core descriptor.
public abstract CoreDescriptor getTransientDescriptor(String name);
+ /**
+ * Gets the {@link CoreDescriptor} for all transient cores (loaded and unloaded).
+ */
+ public abstract Collection<CoreDescriptor> getTransientDescriptors();
// Remove the core descriptor from your list of transient descriptors.
public abstract CoreDescriptor removeTransientDescriptor(String name);
diff --git a/solr/core/src/java/org/apache/solr/core/TransientSolrCoreCacheDefault.java b/solr/core/src/java/org/apache/solr/core/TransientSolrCoreCacheDefault.java
index 2ac94b1..fcbfb0c 100644
--- a/solr/core/src/java/org/apache/solr/core/TransientSolrCoreCacheDefault.java
+++ b/solr/core/src/java/org/apache/solr/core/TransientSolrCoreCacheDefault.java
@@ -18,12 +18,7 @@
package org.apache.solr.core;
import java.lang.invoke.MethodHandles;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
+import java.util.*;
import org.apache.solr.common.util.NamedList;
import org.slf4j.Logger;
@@ -124,12 +119,12 @@
@Override
public Set<String> getAllCoreNames() {
- return transientDescriptors.keySet();
+ return Collections.unmodifiableSet(transientDescriptors.keySet());
}
@Override
public Set<String> getLoadedCoreNames() {
- return transientCores.keySet();
+ return Collections.unmodifiableSet(transientCores.keySet());
}
// Remove a core from the internal structures, presumably it
@@ -167,6 +162,11 @@
}
@Override
+ public Collection<CoreDescriptor> getTransientDescriptors() {
+ return Collections.unmodifiableCollection(transientDescriptors.values());
+ }
+
+ @Override
public CoreDescriptor removeTransientDescriptor(String name) {
return transientDescriptors.remove(name);
}
diff --git a/solr/core/src/java/org/apache/solr/core/TransientSolrCoreCacheFactory.java b/solr/core/src/java/org/apache/solr/core/TransientSolrCoreCacheFactory.java
index 981058e..5a2631f 100644
--- a/solr/core/src/java/org/apache/solr/core/TransientSolrCoreCacheFactory.java
+++ b/solr/core/src/java/org/apache/solr/core/TransientSolrCoreCacheFactory.java
@@ -20,6 +20,7 @@
import java.util.Collections;
import com.google.common.collect.ImmutableMap;
+import org.apache.solr.common.SolrException;
import org.apache.solr.util.plugin.PluginInfoInitialized;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -32,6 +33,9 @@
private volatile CoreContainer coreContainer = null;
+ /**
+ * @return the cache holding the transient cores; never null.
+ */
public abstract TransientSolrCoreCache getTransientSolrCoreCache();
/**
* Create a new TransientSolrCoreCacheFactory instance
@@ -51,19 +55,19 @@
// According to the docs, this returns a TransientSolrCoreCacheFactory with the default c'tor
TransientSolrCoreCacheFactory tccf = loader.findClass(info.className, TransientSolrCoreCacheFactory.class).getConstructor().newInstance();
- // OK, now we call it's init method.
+ // OK, now we call its init method.
if (PluginInfoInitialized.class.isAssignableFrom(tccf.getClass()))
PluginInfoInitialized.class.cast(tccf).init(info);
tccf.setCoreContainer(coreContainer);
return tccf;
} catch (Exception e) {
- // Many things could cause this, bad solrconfig, mis-typed class name, whatever. However, this should not
- // keep the enclosing coreContainer from instantiating, so log an error and continue.
- log.error("Error instantiating TransientSolrCoreCacheFactory class [{}]: ", info.className, e);
- return null;
+ // Many things could cause this, bad solrconfig, mis-typed class name, whatever.
+ // Throw an exception to stop loading here; never return null.
+ throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error instantiating "
+ + TransientSolrCoreCacheFactory.class.getName() + " class [" + info.className + "]", e);
}
-
}
+
public static final PluginInfo DEFAULT_TRANSIENT_SOLR_CACHE_INFO =
new PluginInfo("transientSolrCoreCacheFactory",
ImmutableMap.of("class", TransientSolrCoreCacheFactoryDefault.class.getName(),
diff --git a/solr/core/src/java/org/apache/solr/core/ZkContainer.java b/solr/core/src/java/org/apache/solr/core/ZkContainer.java
index 2ca62f8..eb7c850 100644
--- a/solr/core/src/java/org/apache/solr/core/ZkContainer.java
+++ b/solr/core/src/java/org/apache/solr/core/ZkContainer.java
@@ -22,13 +22,12 @@
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
-import java.util.ArrayList;
-import java.util.Collection;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeoutException;
import java.util.function.Predicate;
import java.util.function.Supplier;
+import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.apache.solr.cloud.SolrZkServer;
@@ -125,14 +124,8 @@
"A chroot was specified in ZkHost but the znode doesn't exist. " + zookeeperHost);
}
- Supplier<List<CoreDescriptor>> descriptorsSupplier = () -> {
- List<CoreDescriptor> descriptors = new ArrayList<>(cc.getLoadedCoreNames().size());
- Collection<SolrCore> cores = cc.getCores();
- for (SolrCore core : cores) {
- descriptors.add(core.getCoreDescriptor());
- }
- return descriptors;
- };
+ Supplier<List<CoreDescriptor>> descriptorsSupplier = () ->
+ cc.getCores().stream().map(SolrCore::getCoreDescriptor).collect(Collectors.toList());
ZkController zkController = new ZkController(cc, zookeeperHost, zkClientConnectTimeout, config, descriptorsSupplier);
diff --git a/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java b/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java
index e412429..9908292 100644
--- a/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java
@@ -883,13 +883,13 @@
@Override
public void initializeMetrics(SolrMetricsContext parentContext, String scope) {
super.initializeMetrics(parentContext, scope);
- solrMetricsContext.gauge(() -> (core != null && !core.isClosed() ? NumberUtils.readableSize(core.getIndexSize()) : ""),
+ solrMetricsContext.gauge(() -> (core != null && !core.isClosed() ? NumberUtils.readableSize(core.getIndexSize()) : parentContext.nullString()),
true, "indexSize", getCategory().toString(), scope);
- solrMetricsContext.gauge(() -> (core != null && !core.isClosed() ? getIndexVersion().toString() : ""),
+ solrMetricsContext.gauge(() -> (core != null && !core.isClosed() ? getIndexVersion().toString() : parentContext.nullString()),
true, "indexVersion", getCategory().toString(), scope);
- solrMetricsContext.gauge(() -> (core != null && !core.isClosed() ? getIndexVersion().generation : 0),
+ solrMetricsContext.gauge(() -> (core != null && !core.isClosed() ? getIndexVersion().generation : parentContext.nullNumber()),
true, GENERATION, getCategory().toString(), scope);
- solrMetricsContext.gauge(() -> (core != null && !core.isClosed() ? core.getIndexDir() : ""),
+ solrMetricsContext.gauge(() -> (core != null && !core.isClosed() ? core.getIndexDir() : parentContext.nullString()),
true, "indexPath", getCategory().toString(), scope);
solrMetricsContext.gauge(() -> isLeader,
true, "isLeader", getCategory().toString(), scope);
diff --git a/solr/core/src/java/org/apache/solr/handler/admin/HealthCheckHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/HealthCheckHandler.java
index 21a8d64..1365a01 100644
--- a/solr/core/src/java/org/apache/solr/handler/admin/HealthCheckHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/admin/HealthCheckHandler.java
@@ -126,7 +126,7 @@
rsp.add(STATUS, FAILURE);
rsp.add("num_cores_unhealthy", unhealthyCores);
rsp.setException(new SolrException(SolrException.ErrorCode.SERVICE_UNAVAILABLE, unhealthyCores + " out of "
- + cores.getAllCoreNames().size() + " replicas are currently initializing or recovering"));
+ + cores.getNumAllCores() + " replicas are currently initializing or recovering"));
return;
}
rsp.add("message", "All cores are healthy");
diff --git a/solr/core/src/java/org/apache/solr/handler/admin/StatusOp.java b/solr/core/src/java/org/apache/solr/handler/admin/StatusOp.java
index f2bddbd..d1dadfc 100644
--- a/solr/core/src/java/org/apache/solr/handler/admin/StatusOp.java
+++ b/solr/core/src/java/org/apache/solr/handler/admin/StatusOp.java
@@ -19,6 +19,7 @@
import java.util.Collections;
import java.util.HashMap;
+import java.util.List;
import java.util.Map;
import org.apache.solr.common.params.CoreAdminParams;
@@ -42,7 +43,9 @@
failures.put(failure.getKey(), failure.getValue().exception);
}
if (cname == null) {
- for (String name : it.handler.coreContainer.getAllCoreNames()) {
+ List<String> nameList = it.handler.coreContainer.getAllCoreNames();
+ nameList.sort(null);
+ for (String name : nameList) {
status.add(name, CoreAdminOperation.getCoreStatus(it.handler.coreContainer, name, isIndexInfoNeeded));
}
it.rsp.add("initFailures", failures);
diff --git a/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java b/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java
index 7956143..d824bd5 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java
@@ -791,7 +791,7 @@
}
/**
- * Ensure we don't have {@link org.apache.lucene.document.LazyDocument.LazyField} or equivalent.
+ * Ensure we don't have {@link org.apache.lucene.misc.document.LazyDocument.LazyField} or equivalent.
* It can pose problems if the searcher is about to be closed and we haven't fetched a value yet.
*/
private static IndexableField materialize(IndexableField in) {
diff --git a/solr/core/src/java/org/apache/solr/metrics/SolrMetricManager.java b/solr/core/src/java/org/apache/solr/metrics/SolrMetricManager.java
index 7e6a933..d0ebc26 100644
--- a/solr/core/src/java/org/apache/solr/metrics/SolrMetricManager.java
+++ b/solr/core/src/java/org/apache/solr/metrics/SolrMetricManager.java
@@ -150,6 +150,35 @@
}
/**
+ * Return an object used for representing a null (missing) numeric value.
+ */
+ public Object nullNumber() {
+ return metricsConfig.getNullNumber();
+ }
+
+ /**
+ * Return an object used for representing a "Not A Number" (NaN) value.
+ */
+ public Object notANumber() {
+ return metricsConfig.getNotANumber();
+ }
+
+ /**
+ * Return an object used for representing a null (missing) string value.
+ */
+ public Object nullString() {
+ return metricsConfig.getNullString();
+ }
+
+ /**
+ * Return an object used for representing a null (missing) object value.
+ */
+ public Object nullObject() {
+ return metricsConfig.getNullObject();
+ }
+
+
+ /**
* An implementation of {@link MetricFilter} that selects metrics
* with names that start with one of prefixes.
*/
diff --git a/solr/core/src/java/org/apache/solr/metrics/SolrMetricsContext.java b/solr/core/src/java/org/apache/solr/metrics/SolrMetricsContext.java
index 6bd856e..85217b9 100644
--- a/solr/core/src/java/org/apache/solr/metrics/SolrMetricsContext.java
+++ b/solr/core/src/java/org/apache/solr/metrics/SolrMetricsContext.java
@@ -48,6 +48,34 @@
}
/**
+ * See {@link SolrMetricManager#nullNumber()}.
+ */
+ public Object nullNumber() {
+ return metricManager.nullNumber();
+ }
+
+ /**
+ * See {@link SolrMetricManager#notANumber()}.
+ */
+ public Object notANumber() {
+ return metricManager.notANumber();
+ }
+
+ /**
+ * See {@link SolrMetricManager#nullString()}.
+ */
+ public Object nullString() {
+ return metricManager.nullString();
+ }
+
+ /**
+ * See {@link SolrMetricManager#nullObject()}.
+ */
+ public Object nullObject() {
+ return metricManager.nullObject();
+ }
+
+ /**
* Metrics tag that represents objects with the same life-cycle.
*/
public String getTag() {
diff --git a/solr/core/src/java/org/apache/solr/search/SolrDocumentFetcher.java b/solr/core/src/java/org/apache/solr/search/SolrDocumentFetcher.java
index 76b1737..fccd5d8 100644
--- a/solr/core/src/java/org/apache/solr/search/SolrDocumentFetcher.java
+++ b/solr/core/src/java/org/apache/solr/search/SolrDocumentFetcher.java
@@ -41,7 +41,7 @@
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DocumentStoredFieldVisitor;
import org.apache.lucene.document.FieldType;
-import org.apache.lucene.document.LazyDocument;
+import org.apache.lucene.misc.document.LazyDocument;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.BinaryDocValues;
diff --git a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
index 5d1af6d..9a5ac70 100644
--- a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
+++ b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
@@ -1605,7 +1605,7 @@
} else {
hitsRelation = topDocs.totalHits.relation;
}
- if (cmd.getSort() != null && query instanceof RankQuery == false && (cmd.getFlags() & GET_SCORES) != 0) {
+ if (cmd.getSort() != null && cmd.getQuery() instanceof RankQuery == false && (cmd.getFlags() & GET_SCORES) != 0) {
TopFieldCollector.populateScores(topDocs.scoreDocs, this, query);
}
populateNextCursorMarkFromTopDocs(qr, cmd, topDocs);
@@ -1714,7 +1714,7 @@
assert (totalHits == set.size()) || qr.isPartialResults();
TopDocs topDocs = topCollector.topDocs(0, len);
- if (cmd.getSort() != null && query instanceof RankQuery == false && (cmd.getFlags() & GET_SCORES) != 0) {
+ if (cmd.getSort() != null && cmd.getQuery() instanceof RankQuery == false && (cmd.getFlags() & GET_SCORES) != 0) {
TopFieldCollector.populateScores(topDocs.scoreDocs, this, query);
}
populateNextCursorMarkFromTopDocs(qr, cmd, topDocs);
@@ -2277,12 +2277,12 @@
parentContext.gauge(() -> warmupTime, true, "warmupTime", Category.SEARCHER.toString(), scope);
parentContext.gauge(() -> registerTime, true, "registeredAt", Category.SEARCHER.toString(), scope);
// reader stats
- parentContext.gauge(rgauge(-1, () -> reader.numDocs()), true, "numDocs", Category.SEARCHER.toString(), scope);
- parentContext.gauge(rgauge(-1, () -> reader.maxDoc()), true, "maxDoc", Category.SEARCHER.toString(), scope);
- parentContext.gauge(rgauge(-1, () -> reader.maxDoc() - reader.numDocs()), true, "deletedDocs", Category.SEARCHER.toString(), scope);
- parentContext.gauge(rgauge(-1, () -> reader.toString()), true, "reader", Category.SEARCHER.toString(), scope);
- parentContext.gauge(rgauge("", () -> reader.directory().toString()), true, "readerDir", Category.SEARCHER.toString(), scope);
- parentContext.gauge(rgauge(-1, () -> reader.getVersion()), true, "indexVersion", Category.SEARCHER.toString(), scope);
+ parentContext.gauge(rgauge(parentContext.nullNumber(), () -> reader.numDocs()), true, "numDocs", Category.SEARCHER.toString(), scope);
+ parentContext.gauge(rgauge(parentContext.nullNumber(), () -> reader.maxDoc()), true, "maxDoc", Category.SEARCHER.toString(), scope);
+ parentContext.gauge(rgauge(parentContext.nullNumber(), () -> reader.maxDoc() - reader.numDocs()), true, "deletedDocs", Category.SEARCHER.toString(), scope);
+ parentContext.gauge(rgauge(parentContext.nullString(), () -> reader.toString()), true, "reader", Category.SEARCHER.toString(), scope);
+ parentContext.gauge(rgauge(parentContext.nullString(), () -> reader.directory().toString()), true, "readerDir", Category.SEARCHER.toString(), scope);
+ parentContext.gauge(rgauge(parentContext.nullNumber(), () -> reader.getVersion()), true, "indexVersion", Category.SEARCHER.toString(), scope);
// size of the currently opened commit
parentContext.gauge(() -> {
try {
@@ -2293,7 +2293,7 @@
}
return total;
} catch (Exception e) {
- return -1;
+ return parentContext.nullNumber();
}
}, true, "indexCommitSize", Category.SEARCHER.toString(), scope);
// statsCache metrics
diff --git a/solr/core/src/java/org/apache/solr/search/similarities/LegacyBM25SimilarityFactory.java b/solr/core/src/java/org/apache/solr/search/similarities/LegacyBM25SimilarityFactory.java
index 235a410..66dd802 100644
--- a/solr/core/src/java/org/apache/solr/search/similarities/LegacyBM25SimilarityFactory.java
+++ b/solr/core/src/java/org/apache/solr/search/similarities/LegacyBM25SimilarityFactory.java
@@ -17,7 +17,7 @@
package org.apache.solr.search.similarities;
import org.apache.lucene.search.similarities.Similarity;
-import org.apache.lucene.search.similarity.LegacyBM25Similarity;
+import org.apache.lucene.misc.search.similarity.LegacyBM25Similarity;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.schema.SimilarityFactory;
diff --git a/solr/core/src/java/org/apache/solr/search/similarities/SchemaSimilarityFactory.java b/solr/core/src/java/org/apache/solr/search/similarities/SchemaSimilarityFactory.java
index e682b9ee..0b174a6 100644
--- a/solr/core/src/java/org/apache/solr/search/similarities/SchemaSimilarityFactory.java
+++ b/solr/core/src/java/org/apache/solr/search/similarities/SchemaSimilarityFactory.java
@@ -19,7 +19,7 @@
import org.apache.lucene.search.similarities.BM25Similarity;
import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
import org.apache.lucene.search.similarities.Similarity;
-import org.apache.lucene.search.similarity.LegacyBM25Similarity;
+import org.apache.lucene.misc.search.similarity.LegacyBM25Similarity;
import org.apache.lucene.util.Version;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
diff --git a/solr/core/src/java/org/apache/solr/update/SolrIndexSplitter.java b/solr/core/src/java/org/apache/solr/update/SolrIndexSplitter.java
index 2b8b86f..559a732 100644
--- a/solr/core/src/java/org/apache/solr/update/SolrIndexSplitter.java
+++ b/solr/core/src/java/org/apache/solr/update/SolrIndexSplitter.java
@@ -50,7 +50,7 @@
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.HardlinkCopyDirectoryWrapper;
+import org.apache.lucene.misc.store.HardlinkCopyDirectoryWrapper;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.Lock;
import org.apache.lucene.util.BitSetIterator;
diff --git a/solr/core/src/java/org/apache/solr/update/processor/AtomicUpdateDocumentMerger.java b/solr/core/src/java/org/apache/solr/update/processor/AtomicUpdateDocumentMerger.java
index 673c4fa..bdc4a72 100644
--- a/solr/core/src/java/org/apache/solr/update/processor/AtomicUpdateDocumentMerger.java
+++ b/solr/core/src/java/org/apache/solr/update/processor/AtomicUpdateDocumentMerger.java
@@ -16,21 +16,6 @@
*/
package org.apache.solr.update.processor;
-import java.io.IOException;
-import java.lang.invoke.MethodHandles;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.LinkedHashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Set;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-import java.util.stream.Stream;
-
import org.apache.commons.lang3.tuple.Pair;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.util.BytesRef;
@@ -53,10 +38,20 @@
import org.apache.solr.schema.NumericValueFieldType;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.update.AddUpdateCommand;
+import org.apache.solr.util.DateMathParser;
import org.apache.solr.util.RefCounted;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import java.io.IOException;
+import java.lang.invoke.MethodHandles;
+import java.util.*;
+import java.util.Map.Entry;
+import java.util.function.BiConsumer;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.stream.Stream;
+
import static org.apache.solr.common.params.CommonParams.ID;
/**
@@ -189,7 +184,7 @@
if (fieldName.equals(uniqueKeyFieldName)
|| fieldName.equals(CommonParams.VERSION_FIELD)
|| fieldName.equals(routeFieldOrNull)) {
- if (fieldValue instanceof Map ) {
+ if (fieldValue instanceof Map) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"Updating unique key, version or route field is not allowed: " + sdoc.getField(fieldName));
} else {
@@ -450,9 +445,6 @@
final String name = sif.getName();
SolrInputField existingField = toDoc.get(name);
- // throws exception if field doesn't exist
- SchemaField sf = schema.getField(name);
-
Collection<Object> original = existingField != null ?
existingField.getValues() :
new ArrayList<>();
@@ -460,16 +452,10 @@
int initialSize = original.size();
if (fieldVal instanceof Collection) {
for (Object object : (Collection) fieldVal) {
- Object obj = sf.getType().toNativeType(object);
- if (!original.contains(obj)) {
- original.add(obj);
- }
+ addValueIfDistinct(name, original, object);
}
} else {
- Object object = sf.getType().toNativeType(fieldVal);
- if (!original.contains(object)) {
- original.add(object);
- }
+ addValueIfDistinct(name, original, fieldVal);
}
if (original.size() > initialSize) { // update only if more are added
@@ -516,7 +502,7 @@
SolrInputField existingField = toDoc.get(name);
if (existingField == null) return;
@SuppressWarnings({"rawtypes"})
- final Collection original = existingField.getValues();
+ final Collection<Object> original = existingField.getValues();
if (fieldVal instanceof Collection) {
for (Object object : (Collection) fieldVal) {
removeObj(original, object, name);
@@ -582,11 +568,11 @@
return objValues.iterator().next() instanceof SolrDocumentBase;
}
- private void removeObj(@SuppressWarnings({"rawtypes"})Collection original, Object toRemove, String fieldName) {
+ private void removeObj(Collection<Object> original, Object toRemove, String fieldName) {
if(isChildDoc(toRemove)) {
removeChildDoc(original, (SolrInputDocument) toRemove);
} else {
- original.remove(getNativeFieldValue(fieldName, toRemove));
+ removeFieldValueWithNumericFudging(fieldName, original, toRemove);
}
}
@@ -600,6 +586,81 @@
}
}
+ private void removeFieldValueWithNumericFudging(String fieldName, @SuppressWarnings({"rawtypes"}) Collection<Object> original, Object toRemove) {
+ if (original.size() == 0) {
+ return;
+ }
+
+ final BiConsumer<Collection<Object>, Object> removePredicate = (coll, existingElement) -> coll.remove(existingElement);
+ modifyCollectionBasedOnFuzzyPresence(fieldName, original, toRemove, removePredicate, null);
+ }
+
+ private void addValueIfDistinct(String fieldName, Collection<Object> original, Object toAdd) {
+ final BiConsumer<Collection<Object>, Object> addPredicate = (coll, newElement) -> coll.add(newElement);
+ modifyCollectionBasedOnFuzzyPresence(fieldName, original, toAdd, null, addPredicate);
+ }
+
+ /**
+ * Modifies a collection based on the (loosely-judged) presence or absence of a specific value
+ *
+ * Several classes of atomic update (notably 'remove' and 'add-distinct') rely on being able to identify whether an
+ * item is already present in a given list of values. Unfortunately the 'item' being checked for may be of different
+ * types based on the format of the user request and on where the existing document was pulled from (tlog vs index).
+ * As a result atomic updates needs a "fuzzy" way of checking presence and equality that is more flexible than
+ * traditional equality checks allow. This method does light type-checking to catch some of these more common cases
+ * (Long compared against Integers, String compared against Date, etc.), and calls the provided lambda to modify the
+ * field values as necessary.
+ *
+ * @param fieldName the field name involved in this atomic update operation
+ * @param original the list of values currently present in the existing document
+ * @param rawValue a value to be checked for in 'original'
+ * @param ifPresent a function to execute if rawValue was found in 'original'
+ * @param ifAbsent a function to execute if rawValue was not found in 'original'
+ */
+ private void modifyCollectionBasedOnFuzzyPresence(String fieldName, Collection<Object> original, Object rawValue,
+ BiConsumer<Collection<Object>, Object> ifPresent,
+ BiConsumer<Collection<Object>, Object> ifAbsent) {
+ Object nativeValue = getNativeFieldValue(fieldName, rawValue);
+ Optional<Object> matchingValue = findObjectWithTypeFuzziness(original, rawValue, nativeValue);
+ if (matchingValue.isPresent() && ifPresent != null) {
+ ifPresent.accept(original, matchingValue.get());
+ } else if(matchingValue.isEmpty() && ifAbsent != null) {
+ ifAbsent.accept(original, rawValue);
+ }
+ }
+
+ private Optional<Object> findObjectWithTypeFuzziness(Collection<Object> original, Object rawValue, Object nativeValue) {
+ if (nativeValue instanceof Double || nativeValue instanceof Float) {
+ final Number nativeAsNumber = (Number) nativeValue;
+ return original.stream().filter(val ->
+ val.equals(rawValue) ||
+ val.equals(nativeValue) ||
+ (val instanceof Number && ((Number) val).doubleValue() == nativeAsNumber.doubleValue()) ||
+ (val instanceof String && val.equals(nativeAsNumber.toString())))
+ .findFirst();
+ } else if (nativeValue instanceof Long || nativeValue instanceof Integer) {
+ final Number nativeAsNumber = (Number) nativeValue;
+ return original.stream().filter(val ->
+ val.equals(rawValue) ||
+ val.equals(nativeValue) ||
+ (val instanceof Number && ((Number) val).longValue() == nativeAsNumber.longValue()) ||
+ (val instanceof String && val.equals(nativeAsNumber.toString())))
+ .findFirst();
+ } else if (nativeValue instanceof Date) {
+ return original.stream().filter(val ->
+ val.equals(rawValue) ||
+ val.equals(nativeValue) ||
+ (val instanceof String && DateMathParser.parseMath(null, (String)val).equals(nativeValue)))
+ .findFirst();
+ } else if (original.contains(nativeValue)) {
+ return Optional.of(nativeValue);
+ } else if (original.contains(rawValue)) {
+ return Optional.of(rawValue);
+ } else {
+ return Optional.empty();
+ }
+ }
+
/**
*
* @param doc document to search for
diff --git a/lucene/misc/src/java/org/apache/lucene/index/package.html b/solr/core/src/test-files/solr/solr-metricsconfig1.xml
similarity index 71%
rename from lucene/misc/src/java/org/apache/lucene/index/package.html
rename to solr/core/src/test-files/solr/solr-metricsconfig1.xml
index 33ce964..4782e87e 100644
--- a/lucene/misc/src/java/org/apache/lucene/index/package.html
+++ b/solr/core/src/test-files/solr/solr-metricsconfig1.xml
@@ -1,3 +1,4 @@
+<?xml version="1.0" encoding="UTF-8" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
@@ -14,9 +15,14 @@
See the License for the specific language governing permissions and
limitations under the License.
-->
-<!-- not a package-info.java, because we already defined this package in core/ -->
-<html>
-<body>
-Misc index tools and index support.
-</body>
-</html>
+
+<solr>
+ <metrics enabled="${metricsEnabled:true}">
+ <missingValues>
+ <null name="nullNumber"/>
+ <int name="notANumber">-1</int>
+ <str name="nullString"></str>
+ <str name="nullObject">{"value":"missing"}</str>
+ </missingValues>
+ </metrics>
+</solr>
diff --git a/solr/core/src/test/org/apache/solr/BasicFunctionalityTest.java b/solr/core/src/test/org/apache/solr/BasicFunctionalityTest.java
index 0630dec..60e1607 100644
--- a/solr/core/src/test/org/apache/solr/BasicFunctionalityTest.java
+++ b/solr/core/src/test/org/apache/solr/BasicFunctionalityTest.java
@@ -32,7 +32,7 @@
import com.codahale.metrics.Metric;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
-import org.apache.lucene.document.LazyDocument;
+import org.apache.lucene.misc.document.LazyDocument;
import org.apache.lucene.index.IndexableField;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.CommonParams;
diff --git a/solr/core/src/test/org/apache/solr/core/TestCodecSupport.java b/solr/core/src/test/org/apache/solr/core/TestCodecSupport.java
index 58e5f55..5ac9764 100644
--- a/solr/core/src/test/org/apache/solr/core/TestCodecSupport.java
+++ b/solr/core/src/test/org/apache/solr/core/TestCodecSupport.java
@@ -21,7 +21,7 @@
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.lucene87.Lucene87StoredFieldsFormat;
-import org.apache.lucene.codecs.lucene87.Lucene87StoredFieldsFormat.Mode;
+import org.apache.lucene.codecs.lucene90.Lucene90Codec.Mode;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
import org.apache.lucene.index.SegmentInfo;
diff --git a/solr/core/src/test/org/apache/solr/core/TestCoreContainer.java b/solr/core/src/test/org/apache/solr/core/TestCoreContainer.java
index 6c50932..d05a917 100644
--- a/solr/core/src/test/org/apache/solr/core/TestCoreContainer.java
+++ b/solr/core/src/test/org/apache/solr/core/TestCoreContainer.java
@@ -670,6 +670,7 @@
// check that we get null accessing a non-existent core
assertNull(cc.getCore("does_not_exist"));
+ assertFalse(cc.isLoaded("does_not_exist"));
// check that we get a 500 accessing the core with an init failure
SolrException thrown = expectThrows(SolrException.class, () -> {
SolrCore c = cc.getCore("col_bad");
@@ -691,7 +692,9 @@
assertNotNull("core names is null", cores);
assertEquals("wrong number of cores", 2, cores.size());
assertTrue("col_ok not found", cores.contains("col_ok"));
+ assertTrue(cc.isLoaded("col_ok"));
assertTrue("col_bad not found", cores.contains("col_bad"));
+ assertTrue(cc.isLoaded("col_bad"));
// check that we have the failures we expect
failures = cc.getCoreInitFailures();
diff --git a/solr/core/src/test/org/apache/solr/core/TestLazyCores.java b/solr/core/src/test/org/apache/solr/core/TestLazyCores.java
index a41b750..ff74d61 100644
--- a/solr/core/src/test/org/apache/solr/core/TestLazyCores.java
+++ b/solr/core/src/test/org/apache/solr/core/TestLazyCores.java
@@ -668,6 +668,7 @@
Collection<String> loadedNames = cc.getLoadedCoreNames();
for (String name : nameCheck) {
assertFalse("core " + name + " was found in the list of cores", loadedNames.contains(name));
+ assertFalse(cc.isLoaded(name));
}
// There was a problem at one point exacerbated by the poor naming conventions. So parallel to loaded cores, there
@@ -681,26 +682,33 @@
List<CoreDescriptor> descriptors = cc.getCoreDescriptors();
assertEquals("There should be as many coreDescriptors as coreNames", allNames.size(), descriptors.size());
+ assertEquals(allNames.size(), cc.getNumAllCores());
for (CoreDescriptor desc : descriptors) {
assertTrue("Name should have a corresponding descriptor", allNames.contains(desc.getName()));
+ assertNotNull(cc.getCoreDescriptor(desc.getName()));
}
// First check that all loaded cores are in allNames.
for (String name : loadedNames) {
assertTrue("Loaded core " + name + " should have been found in the list of all possible core names",
allNames.contains(name));
+ assertNotNull(cc.getCoreDescriptor(name));
+ assertTrue(cc.isLoaded(name));
}
- // failed cores should have had their descriptors removed.
+ // Unloaded cores should be in allNames.
for (String name : nameCheck) {
assertTrue("Not-currently-loaded core " + name + " should have been found in the list of all possible core names",
allNames.contains(name));
+ assertNotNull(cc.getCoreDescriptor(name));
}
// Failed cores should not be in coreDescriptors.
for (String name : namesBad) {
assertFalse("Failed core " + name + " should have been found in the list of all possible core names",
allNames.contains(name));
+ assertNull(cc.getCoreDescriptor(name));
+ assertFalse(cc.isLoaded(name));
}
}
diff --git a/solr/core/src/test/org/apache/solr/metrics/MetricsConfigTest.java b/solr/core/src/test/org/apache/solr/metrics/MetricsConfigTest.java
index 7ef5895..2cefedf 100644
--- a/solr/core/src/test/org/apache/solr/metrics/MetricsConfigTest.java
+++ b/solr/core/src/test/org/apache/solr/metrics/MetricsConfigTest.java
@@ -18,6 +18,7 @@
import java.io.File;
import java.io.InputStream;
+import java.util.Map;
import java.util.Properties;
import com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule;
@@ -58,7 +59,7 @@
@Test
public void testDefaults() throws Exception {
- NodeConfig cfg = loadNodeConfig();
+ NodeConfig cfg = loadNodeConfig("solr-metricsconfig.xml");
SolrMetricManager mgr = new SolrMetricManager(cfg.getSolrResourceLoader(), cfg.getMetricsConfig());
assertTrue(mgr.getCounterSupplier() instanceof MetricSuppliers.DefaultCounterSupplier);
assertTrue(mgr.getMeterSupplier() instanceof MetricSuppliers.DefaultMeterSupplier);
@@ -76,7 +77,7 @@
System.setProperty("histogram.size", "2048");
System.setProperty("histogram.window", "600");
System.setProperty("histogram.reservoir", SlidingTimeWindowReservoir.class.getName());
- NodeConfig cfg = loadNodeConfig();
+ NodeConfig cfg = loadNodeConfig("solr-metricsconfig.xml");
SolrMetricManager mgr = new SolrMetricManager(cfg.getSolrResourceLoader(), cfg.getMetricsConfig());
assertTrue(mgr.getCounterSupplier() instanceof MetricSuppliers.DefaultCounterSupplier);
assertTrue(mgr.getMeterSupplier() instanceof MetricSuppliers.DefaultMeterSupplier);
@@ -94,7 +95,7 @@
System.setProperty("meter.class", MockMeterSupplier.class.getName());
System.setProperty("timer.class", MockTimerSupplier.class.getName());
System.setProperty("histogram.class", MockHistogramSupplier.class.getName());
- NodeConfig cfg = loadNodeConfig();
+ NodeConfig cfg = loadNodeConfig("solr-metricsconfig.xml");
SolrMetricManager mgr = new SolrMetricManager(cfg.getSolrResourceLoader(), cfg.getMetricsConfig());
assertTrue(mgr.getCounterSupplier() instanceof MockCounterSupplier);
assertTrue(mgr.getMeterSupplier() instanceof MockMeterSupplier);
@@ -119,7 +120,7 @@
@Test
public void testDisabledMetrics() throws Exception {
System.setProperty("metricsEnabled", "false");
- NodeConfig cfg = loadNodeConfig();
+ NodeConfig cfg = loadNodeConfig("solr-metricsconfig.xml");
SolrMetricManager mgr = new SolrMetricManager(cfg.getSolrResourceLoader(), cfg.getMetricsConfig());
assertTrue(mgr.getCounterSupplier() instanceof MetricSuppliers.NoOpCounterSupplier);
assertTrue(mgr.getMeterSupplier() instanceof MetricSuppliers.NoOpMeterSupplier);
@@ -128,8 +129,21 @@
}
- private NodeConfig loadNodeConfig() throws Exception {
- InputStream is = MetricsConfigTest.class.getResourceAsStream("/solr/solr-metricsconfig.xml");
+ @Test
+ public void testMissingValuesConfig() throws Exception {
+ NodeConfig cfg = loadNodeConfig("solr-metricsconfig1.xml");
+ SolrMetricManager mgr = new SolrMetricManager(cfg.getSolrResourceLoader(), cfg.getMetricsConfig());
+ assertEquals("nullNumber", null, mgr.nullNumber());
+ assertEquals("notANumber", -1, mgr.notANumber());
+ assertEquals("nullNumber", "", mgr.nullString());
+ assertTrue("nullObject", mgr.nullObject() instanceof Map);
+ @SuppressWarnings("unchecked")
+ Map<String, Object> map = (Map<String, Object>) mgr.nullObject();
+ assertEquals("missing", map.get("value"));
+ }
+
+ private NodeConfig loadNodeConfig(String config) throws Exception {
+ InputStream is = MetricsConfigTest.class.getResourceAsStream("/solr/" + config);
return SolrXmlConfig.fromInputStream(TEST_PATH(), is, new Properties()); //TODO pass in props
}
}
diff --git a/solr/core/src/test/org/apache/solr/metrics/SolrMetricsIntegrationTest.java b/solr/core/src/test/org/apache/solr/metrics/SolrMetricsIntegrationTest.java
index 9677769..e0c9e11 100644
--- a/solr/core/src/test/org/apache/solr/metrics/SolrMetricsIntegrationTest.java
+++ b/solr/core/src/test/org/apache/solr/metrics/SolrMetricsIntegrationTest.java
@@ -116,6 +116,11 @@
SolrCoreMetricManager coreMetricManager = h.getCore().getCoreMetricManager();
Map<String, SolrMetricReporter> reporters = metricManager.getReporters(coreMetricManager.getRegistryName());
+ Gauge<?> gauge = (Gauge<?>) coreMetricManager.getRegistry().getMetrics().get("CORE.indexDir");
+ assertNotNull(gauge.getValue());
+ h.getCore().close();
+ assertEquals(metricManager.nullString(), gauge.getValue());
+
deleteCore();
for (String reporterName : RENAMED_REPORTERS) {
diff --git a/solr/core/src/test/org/apache/solr/search/LargeFieldTest.java b/solr/core/src/test/org/apache/solr/search/LargeFieldTest.java
index 48378f9..007b821 100644
--- a/solr/core/src/test/org/apache/solr/search/LargeFieldTest.java
+++ b/solr/core/src/test/org/apache/solr/search/LargeFieldTest.java
@@ -21,7 +21,7 @@
import java.util.Collections;
import org.apache.lucene.document.Document;
-import org.apache.lucene.document.LazyDocument;
+import org.apache.lucene.misc.document.LazyDocument;
import org.apache.lucene.index.IndexableField;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.schema.IndexSchema;
diff --git a/solr/core/src/test/org/apache/solr/search/SolrIndexSearcherTest.java b/solr/core/src/test/org/apache/solr/search/SolrIndexSearcherTest.java
index cb418e6..58b1901 100644
--- a/solr/core/src/test/org/apache/solr/search/SolrIndexSearcherTest.java
+++ b/solr/core/src/test/org/apache/solr/search/SolrIndexSearcherTest.java
@@ -19,12 +19,21 @@
import java.io.IOException;
import org.apache.lucene.index.Term;
+import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
+import org.apache.lucene.search.QueryVisitor;
+import org.apache.lucene.search.Rescorer;
+import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.ScoreMode;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.TopDocsCollector;
import org.apache.lucene.search.TotalHits;
import org.apache.lucene.search.Weight;
+import org.apache.solr.handler.component.MergeStrategy;
import org.apache.solr.SolrTestCaseJ4;
import org.junit.Before;
import org.junit.BeforeClass;
@@ -187,6 +196,120 @@
});
}
+ public void testReranking() throws Exception {
+ float fixedScore = 1.23f;
+ for (boolean doFilter : new boolean[]{ false, true }) {
+ for (boolean doSort : new boolean[]{ false, true }) {
+ for (int getDocSetFlag : new int[]{ 0, SolrIndexSearcher.GET_DOCSET }) {
+ implTestReranking(doFilter, doSort, getDocSetFlag, null); // don't fix score i.e. no re-ranking
+ implTestReranking(doFilter, doSort, getDocSetFlag, fixedScore); // fix score to be non-zero and non-one
+ fixedScore *= 2;
+ }
+ }
+ }
+ }
+
+ private void implTestReranking(boolean doFilter, boolean doSort, int getDocSetFlag, Float fixedScore) throws Exception {
+ h.getCore().withSearcher(searcher -> {
+
+ final QueryCommand cmd = new QueryCommand();
+ cmd.setFlags(SolrIndexSearcher.GET_SCORES | getDocSetFlag);
+
+ if (doSort) {
+ cmd.setSort(new Sort(SortField.FIELD_SCORE, new SortField("id", SortField.Type.STRING)));
+ }
+
+ if (doFilter) {
+ cmd.setFilterList(new TermQuery(new Term("field4_t", Integer.toString(NUM_DOCS - 1))));
+ }
+
+ cmd.setQuery(new TermQuery(new Term("field1_s", "foo")));
+
+ final float expectedScore;
+ if (fixedScore == null) {
+ expectedScore = 1f;
+ } else {
+ expectedScore = fixedScore.floatValue();
+ cmd.setQuery(new FixedScoreReRankQuery(cmd.getQuery(), expectedScore));
+ }
+
+ final QueryResult qr = new QueryResult();
+ searcher.search(qr, cmd);
+
+ // check score for the first document
+ final DocIterator iter = qr.getDocList().iterator();
+ iter.next();
+ assertEquals(expectedScore, iter.score(), 0);
+
+ return null;
+ });
+
+ }
+
+ private static final class FixedScoreReRankQuery extends RankQuery {
+
+ private Query q;
+ final private float fixedScore;
+
+ public FixedScoreReRankQuery(Query q, float fixedScore) {
+ this.q = q;
+ this.fixedScore = fixedScore;
+ }
+
+ public Weight createWeight(IndexSearcher indexSearcher, ScoreMode scoreMode, float boost) throws IOException {
+ return q.createWeight(indexSearcher, scoreMode, boost);
+ }
+
+ @Override
+ public void visit(QueryVisitor visitor) {
+ q.visit(visitor);
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ return this == obj;
+ }
+
+ @Override
+ public int hashCode() {
+ return q.hashCode();
+ }
+
+ @Override
+ public String toString(String field) {
+ return q.toString(field);
+ }
+
+ @Override
+ @SuppressWarnings({"rawtypes"})
+ public TopDocsCollector getTopDocsCollector(int len, QueryCommand cmd, IndexSearcher searcher) throws IOException {
+ return new ReRankCollector(len, len, new Rescorer() {
+ @Override
+ public TopDocs rescore(IndexSearcher searcher, TopDocs firstPassTopDocs, int topN) {
+ for (ScoreDoc scoreDoc : firstPassTopDocs.scoreDocs) {
+ scoreDoc.score = fixedScore;
+ }
+ return firstPassTopDocs;
+ }
+
+ @Override
+ public Explanation explain(IndexSearcher searcher, Explanation firstPassExplanation, int docID) {
+ return firstPassExplanation;
+ }
+ }, cmd, searcher, null);
+ }
+
+ @Override
+ public MergeStrategy getMergeStrategy() {
+ return null;
+ }
+
+ public RankQuery wrap(Query q) {
+ this.q = q;
+ return this;
+ }
+ }
+
public void testMinExactWithFilters() throws Exception {
h.getCore().withSearcher(searcher -> {
diff --git a/solr/core/src/test/org/apache/solr/search/similarities/TestLegacyBM25SimilarityFactory.java b/solr/core/src/test/org/apache/solr/search/similarities/TestLegacyBM25SimilarityFactory.java
index e4c04b5..e13e153 100644
--- a/solr/core/src/test/org/apache/solr/search/similarities/TestLegacyBM25SimilarityFactory.java
+++ b/solr/core/src/test/org/apache/solr/search/similarities/TestLegacyBM25SimilarityFactory.java
@@ -17,7 +17,7 @@
package org.apache.solr.search.similarities;
import org.apache.lucene.search.similarities.Similarity;
-import org.apache.lucene.search.similarity.LegacyBM25Similarity;
+import org.apache.lucene.misc.search.similarity.LegacyBM25Similarity;
import org.junit.BeforeClass;
/**
diff --git a/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdateJavabinTest.java b/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdateJavabinTest.java
new file mode 100644
index 0000000..758de5f
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdateJavabinTest.java
@@ -0,0 +1,370 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.update.processor;
+
+import org.apache.solr.client.solrj.request.CollectionAdminRequest;
+import org.apache.solr.client.solrj.request.QueryRequest;
+import org.apache.solr.client.solrj.request.UpdateRequest;
+import org.apache.solr.client.solrj.response.QueryResponse;
+import org.apache.solr.cloud.SolrCloudTestCase;
+import org.apache.solr.common.SolrDocument;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.params.ModifiableSolrParams;
+import org.apache.solr.common.util.NamedList;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import java.time.Instant;
+import java.util.Collection;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Tests Solr's atomic-update functionality using requests sent through SolrJ using wt=javabin
+ *
+ * {@link AtomicUpdatesTest} covers some of the same functionality, but does so by making xml-based requests. Recent
+ * changes to Solr have made it possible for the same data sent with different formats to result in different NamedLists
+ * after unmarshalling, so the test duplication is now necessary. See SOLR-13331 for an example.
+ */
+public class AtomicUpdateJavabinTest extends SolrCloudTestCase {
+ private static final String COMMITTED_DOC_ID = "1";
+ private static final String COMMITTED_DOC_STR_VALUES_ID = "1s";
+ private static final String UNCOMMITTED_DOC_ID = "2";
+ private static final String UNCOMMITTED_DOC_STR_VALUES_ID = "2s";
+ private static final String COLLECTION = "collection1";
+ private static final int NUM_SHARDS = 1;
+ private static final int NUM_REPLICAS = 1;
+ private static final Date DATE_1 = Date.from(Instant.ofEpochSecond(1554243309));
+ private static final String DATE_1_STR = "2019-04-02T22:15:09Z";
+ private static final Date DATE_2 = Date.from(Instant.ofEpochSecond(1554243609));
+ private static final String DATE_2_STR = "2019-04-02T22:20:09Z";
+ private static final Date DATE_3 = Date.from(Instant.ofEpochSecond(1554243909));
+ private static final String DATE_3_STR = "2019-04-02T22:25:09Z";
+
+
+ @BeforeClass
+ public static void setupCluster() throws Exception {
+ configureCluster(1)
+ .addConfig("conf", configset("cloud-dynamic"))
+ .configure();
+
+ CollectionAdminRequest.createCollection(COLLECTION, "conf", NUM_SHARDS, NUM_REPLICAS)
+ .process(cluster.getSolrClient());
+
+ cluster.waitForActiveCollection(COLLECTION, 1, 1);
+ }
+
+ @Before
+ public void setUp() throws Exception {
+ super.setUp();
+
+ final SolrInputDocument committedDoc = sdoc(
+ "id", COMMITTED_DOC_ID,
+ "title_s", "title_1", "title_s", "title_2",
+ "tv_mv_text", "text_1", "tv_mv_text", "text_2",
+ "count_is", 1, "count_is", 2,
+ "count_md", 1.0, "count_md", 2.0,
+ "timestamps_mdt", DATE_1, "timestamps_mdt", DATE_2);
+ final SolrInputDocument committedStrDoc = sdoc(
+ "id", COMMITTED_DOC_STR_VALUES_ID,
+ "title_s", "title_1", "title_s", "title_2",
+ "tv_mv_text", "text_1", "tv_mv_text", "text_2",
+ "count_is", "1", "count_is", "2",
+ "count_md", "1.0", "count_md", "2.0",
+ "timestamps_mdt", DATE_1_STR, "timestamps_mdt", DATE_2_STR);
+ final UpdateRequest committedRequest = new UpdateRequest()
+ .add(committedDoc)
+ .add(committedStrDoc);
+ committedRequest.commit(cluster.getSolrClient(), COLLECTION);
+
+ // Upload a copy of id:1 that's uncommitted to test how atomic-updates modify values in the tlog
+ // See SOLR-14971 for an example of why this case needs tested separately
+ final SolrInputDocument uncommittedDoc = sdoc(
+ "id", UNCOMMITTED_DOC_ID,
+ "title_s", "title_1", "title_s", "title_2",
+ "tv_mv_text", "text_1", "tv_mv_text", "text_2",
+ "count_is", 1, "count_is", 2,
+ "count_md", 1.0, "count_md", 2.0,
+ "timestamps_mdt", DATE_1, "timestamps_mdt", DATE_2);
+ final SolrInputDocument uncommittedStrDoc = sdoc(
+ "id", UNCOMMITTED_DOC_STR_VALUES_ID,
+ "title_s", "title_1", "title_s", "title_2",
+ "tv_mv_text", "text_1", "tv_mv_text", "text_2",
+ "count_is", "1", "count_is", "2",
+ "count_md", "1.0", "count_md", "2.0",
+ "timestamps_mdt", DATE_1_STR, "timestamps_mdt", DATE_2_STR);
+ final UpdateRequest uncommittedRequest = new UpdateRequest()
+ .add(uncommittedDoc)
+ .add(uncommittedStrDoc);
+ uncommittedRequest.process(cluster.getSolrClient(), COLLECTION);
+ }
+
+ @Test
+ public void testAtomicUpdateRemovalOfStrField() throws Exception {
+ ensureFieldHasValues(COMMITTED_DOC_ID, "title_s", "title_1", "title_2");
+ atomicRemoveValueFromField(COMMITTED_DOC_ID, "title_s", "title_1");
+ ensureFieldHasValues(COMMITTED_DOC_ID, "title_s", "title_2");
+
+ ensureFieldHasValues(UNCOMMITTED_DOC_ID, "title_s", "title_1", "title_2");
+ atomicRemoveValueFromField(UNCOMMITTED_DOC_ID, "title_s", "title_1");
+ ensureFieldHasValues(UNCOMMITTED_DOC_ID, "title_s", "title_2");
+ }
+
+ @Test
+ public void testAtomicUpdateRemovalOfTextField() throws Exception {
+ ensureFieldHasValues(COMMITTED_DOC_ID, "tv_mv_text", "text_1", "text_2");
+ atomicRemoveValueFromField(COMMITTED_DOC_ID, "tv_mv_text", "text_1");
+ ensureFieldHasValues(COMMITTED_DOC_ID, "tv_mv_text", "text_2");
+
+ ensureFieldHasValues(UNCOMMITTED_DOC_ID, "tv_mv_text", "text_1", "text_2");
+ atomicRemoveValueFromField(UNCOMMITTED_DOC_ID, "tv_mv_text", "text_1");
+ ensureFieldHasValues(UNCOMMITTED_DOC_ID, "tv_mv_text", "text_2");
+ }
+
+ @Test
+ public void testAtomicUpdateRemovalOfIntField() throws Exception {
+ ensureFieldHasValues(COMMITTED_DOC_ID, "count_is", 1, 2);
+ atomicRemoveValueFromField(COMMITTED_DOC_ID, "count_is", 1);
+ ensureFieldHasValues(COMMITTED_DOC_ID, "count_is", 2);
+
+ ensureFieldHasValues(UNCOMMITTED_DOC_ID, "count_is", 1, 2);
+ atomicRemoveValueFromField(UNCOMMITTED_DOC_ID, "count_is", 1);
+ ensureFieldHasValues(UNCOMMITTED_DOC_ID, "count_is", 2);
+ }
+
+ @Test
+ public void testAtomicUpdateRemovalOfDoubleField() throws Exception {
+ ensureFieldHasValues(COMMITTED_DOC_ID, "count_md", 1.0, 2.0);
+ atomicRemoveValueFromField(COMMITTED_DOC_ID, "count_md", 1.0);
+ ensureFieldHasValues(COMMITTED_DOC_ID, "count_md", 2.0);
+
+ ensureFieldHasValues(UNCOMMITTED_DOC_ID, "count_md", 1.0, 2.0);
+ atomicRemoveValueFromField(UNCOMMITTED_DOC_ID, "count_md", 1.0);
+ ensureFieldHasValues(UNCOMMITTED_DOC_ID, "count_md", 2.0);
+ }
+
+ @Test
+ public void testAtomicUpdateRemovalOfDateField() throws Exception {
+ ensureFieldHasValues(COMMITTED_DOC_ID, "timestamps_mdt", DATE_1, DATE_2);
+ atomicRemoveValueFromField(COMMITTED_DOC_ID, "timestamps_mdt", DATE_1);
+ ensureFieldHasValues(COMMITTED_DOC_ID, "timestamps_mdt", DATE_2);
+
+ ensureFieldHasValues(UNCOMMITTED_DOC_ID, "timestamps_mdt", DATE_1, DATE_2);
+ atomicRemoveValueFromField(UNCOMMITTED_DOC_ID, "timestamps_mdt", DATE_1);
+ ensureFieldHasValues(UNCOMMITTED_DOC_ID, "timestamps_mdt", DATE_2);
+ }
+
+ @Test
+ public void testAtomicUpdateAddDistinctOfDistinctValueOnStrField() throws Exception {
+ ensureFieldHasValues(COMMITTED_DOC_ID, "title_s", "title_1", "title_2");
+ atomicAddDistinctValueToField(COMMITTED_DOC_ID, "title_s", "title_3");
+ ensureFieldHasValues(COMMITTED_DOC_ID, "title_s", "title_1", "title_2", "title_3");
+
+ ensureFieldHasValues(UNCOMMITTED_DOC_ID, "title_s", "title_1", "title_2");
+ atomicAddDistinctValueToField(UNCOMMITTED_DOC_ID, "title_s", "title_3");
+ ensureFieldHasValues(UNCOMMITTED_DOC_ID, "title_s", "title_1", "title_2", "title_3");
+ }
+
+ @Test
+ public void testAtomicUpdateAddDistinctOfDuplicateValueOnStrField() throws Exception {
+ ensureFieldHasValues(COMMITTED_DOC_ID, "title_s", "title_1", "title_2");
+ atomicAddDistinctValueToField(COMMITTED_DOC_ID, "title_s", "title_2");
+ ensureFieldHasValues(COMMITTED_DOC_ID, "title_s", "title_1", "title_2");
+
+ ensureFieldHasValues(UNCOMMITTED_DOC_ID, "title_s", "title_1", "title_2");
+ atomicAddDistinctValueToField(UNCOMMITTED_DOC_ID, "title_s", "title_2");
+ ensureFieldHasValues(UNCOMMITTED_DOC_ID, "title_s", "title_1", "title_2");
+ }
+
+ @Test
+ public void testAtomicUpdateAddDistinctOfDistinctValueOnTextField() throws Exception {
+ ensureFieldHasValues(COMMITTED_DOC_ID, "tv_mv_text", "text_1", "text_2");
+ atomicAddDistinctValueToField(COMMITTED_DOC_ID, "tv_mv_text", "text_3");
+ ensureFieldHasValues(COMMITTED_DOC_ID, "tv_mv_text", "text_1", "text_2", "text_3");
+
+ ensureFieldHasValues(UNCOMMITTED_DOC_ID, "tv_mv_text", "text_1", "text_2");
+ atomicAddDistinctValueToField(UNCOMMITTED_DOC_ID, "tv_mv_text", "text_3");
+ ensureFieldHasValues(UNCOMMITTED_DOC_ID, "tv_mv_text", "text_1", "text_2", "text_3");
+ }
+
+ @Test
+ public void testAtomicUpdateAddDistinctOfDuplicateValueOnTextField() throws Exception {
+ ensureFieldHasValues(COMMITTED_DOC_ID, "tv_mv_text", "text_1", "text_2");
+ atomicAddDistinctValueToField(COMMITTED_DOC_ID, "tv_mv_text", "text_2");
+ ensureFieldHasValues(COMMITTED_DOC_ID, "tv_mv_text", "text_1", "text_2");
+
+ ensureFieldHasValues(UNCOMMITTED_DOC_ID, "tv_mv_text", "text_1", "text_2");
+ atomicAddDistinctValueToField(UNCOMMITTED_DOC_ID, "tv_mv_text", "text_2");
+ ensureFieldHasValues(UNCOMMITTED_DOC_ID, "tv_mv_text", "text_1", "text_2");
+ }
+
+ @Test
+ public void testAtomicUpdateAddDistinctOfDistinctValueOnIntField() throws Exception {
+ ensureFieldHasValues(COMMITTED_DOC_ID, "count_is", 1, 2);
+ atomicAddDistinctValueToField(COMMITTED_DOC_ID, "count_is", 3);
+ ensureFieldHasValues(COMMITTED_DOC_ID, "count_is", 1, 2, 3);
+
+ ensureFieldHasValues(COMMITTED_DOC_STR_VALUES_ID, "count_is", 1, 2);
+ atomicAddDistinctValueToField(COMMITTED_DOC_STR_VALUES_ID, "count_is", 3);
+ ensureFieldHasValues(COMMITTED_DOC_STR_VALUES_ID, "count_is", 1, 2, 3);
+
+ ensureFieldHasValues(UNCOMMITTED_DOC_ID, "count_is", 1, 2);
+ atomicAddDistinctValueToField(UNCOMMITTED_DOC_ID, "count_is", 3);
+ ensureFieldHasValues(UNCOMMITTED_DOC_ID, "count_is", 1, 2, 3);
+
+ ensureFieldHasValues(UNCOMMITTED_DOC_STR_VALUES_ID, "count_is", 1, 2);
+ atomicAddDistinctValueToField(UNCOMMITTED_DOC_STR_VALUES_ID, "count_is", 3);
+ ensureFieldHasValues(UNCOMMITTED_DOC_STR_VALUES_ID, "count_is", 1, 2, 3);
+ }
+
+ @Test
+ public void testAtomicUpdateAddDistinctOfDuplicateValueOnIntField() throws Exception {
+ ensureFieldHasValues(COMMITTED_DOC_ID, "count_is", 1, 2);
+ atomicAddDistinctValueToField(COMMITTED_DOC_ID, "count_is", 2);
+ ensureFieldHasValues(COMMITTED_DOC_ID, "count_is", 1, 2);
+
+ ensureFieldHasValues(COMMITTED_DOC_STR_VALUES_ID, "count_is", 1, 2);
+ atomicAddDistinctValueToField(COMMITTED_DOC_STR_VALUES_ID, "count_is", 2);
+ ensureFieldHasValues(COMMITTED_DOC_STR_VALUES_ID, "count_is", 1, 2);
+
+ ensureFieldHasValues(UNCOMMITTED_DOC_ID, "count_is", 1, 2);
+ atomicAddDistinctValueToField(UNCOMMITTED_DOC_ID, "count_is", 2);
+ ensureFieldHasValues(UNCOMMITTED_DOC_ID, "count_is", 1, 2);
+
+ ensureFieldHasValues(UNCOMMITTED_DOC_STR_VALUES_ID, "count_is", 1, 2);
+ atomicAddDistinctValueToField(UNCOMMITTED_DOC_STR_VALUES_ID, "count_is", 2);
+ ensureFieldHasValues(UNCOMMITTED_DOC_STR_VALUES_ID, "count_is", 1, 2);
+ }
+
+ @Test
+ public void testAtomicUpdateAddDistinctOfDistinctValueOnDoubleField() throws Exception {
+ ensureFieldHasValues(COMMITTED_DOC_ID, "count_md", 1.0, 2.0);
+ atomicAddDistinctValueToField(COMMITTED_DOC_ID, "count_md", 3.0);
+ ensureFieldHasValues(COMMITTED_DOC_ID, "count_md", 1.0, 2.0, 3.0);
+
+ ensureFieldHasValues(COMMITTED_DOC_STR_VALUES_ID, "count_md", 1.0, 2.0);
+ atomicAddDistinctValueToField(COMMITTED_DOC_STR_VALUES_ID, "count_md", 3.0);
+ ensureFieldHasValues(COMMITTED_DOC_STR_VALUES_ID, "count_md", 1.0, 2.0, 3.0);
+
+ ensureFieldHasValues(UNCOMMITTED_DOC_ID, "count_md", 1.0, 2.0);
+ atomicAddDistinctValueToField(UNCOMMITTED_DOC_ID, "count_md", 3.0);
+ ensureFieldHasValues(UNCOMMITTED_DOC_ID, "count_md", 1.0, 2.0, 3.0);
+
+ ensureFieldHasValues(UNCOMMITTED_DOC_STR_VALUES_ID, "count_md", 1.0, 2.0);
+ atomicAddDistinctValueToField(UNCOMMITTED_DOC_STR_VALUES_ID, "count_md", 3.0);
+ ensureFieldHasValues(UNCOMMITTED_DOC_STR_VALUES_ID, "count_md", 1.0, 2.0, 3.0);
+ }
+
+ @Test
+ public void testAtomicUpdateAddDistinctOfDuplicateValueOnDoubleField() throws Exception {
+ ensureFieldHasValues(COMMITTED_DOC_ID, "count_md", 1.0, 2.0);
+ atomicAddDistinctValueToField(COMMITTED_DOC_ID, "count_md", 2.0);
+ ensureFieldHasValues(COMMITTED_DOC_ID, "count_md", 1.0, 2.0);
+
+ ensureFieldHasValues(COMMITTED_DOC_STR_VALUES_ID, "count_md", 1.0, 2.0);
+ atomicAddDistinctValueToField(COMMITTED_DOC_STR_VALUES_ID, "count_md", 2.0);
+ ensureFieldHasValues(COMMITTED_DOC_STR_VALUES_ID, "count_md", 1.0, 2.0);
+
+ ensureFieldHasValues(UNCOMMITTED_DOC_ID, "count_md", 1.0, 2.0);
+ atomicAddDistinctValueToField(UNCOMMITTED_DOC_ID, "count_md", 2.0);
+ ensureFieldHasValues(UNCOMMITTED_DOC_ID, "count_md", 1.0, 2.0);
+
+ ensureFieldHasValues(UNCOMMITTED_DOC_STR_VALUES_ID, "count_md", 1.0, 2.0);
+ atomicAddDistinctValueToField(UNCOMMITTED_DOC_STR_VALUES_ID, "count_md", 2.0);
+ ensureFieldHasValues(UNCOMMITTED_DOC_STR_VALUES_ID, "count_md", 1.0, 2.0);
+ }
+
+ @Test
+ public void testAtomicUpdateAddDistinctOfDistinctValueOnDateField() throws Exception {
+ ensureFieldHasValues(COMMITTED_DOC_ID, "timestamps_mdt", DATE_1, DATE_2);
+ atomicAddDistinctValueToField(COMMITTED_DOC_ID, "timestamps_mdt", DATE_3);
+ ensureFieldHasValues(COMMITTED_DOC_ID, "timestamps_mdt", DATE_1, DATE_2, DATE_3);
+
+ ensureFieldHasValues(COMMITTED_DOC_STR_VALUES_ID, "timestamps_mdt", DATE_1, DATE_2);
+ atomicAddDistinctValueToField(COMMITTED_DOC_STR_VALUES_ID, "timestamps_mdt", DATE_3);
+ ensureFieldHasValues(COMMITTED_DOC_STR_VALUES_ID, "timestamps_mdt", DATE_1, DATE_2, DATE_3);
+
+ ensureFieldHasValues(UNCOMMITTED_DOC_ID, "timestamps_mdt", DATE_1, DATE_2);
+ atomicAddDistinctValueToField(UNCOMMITTED_DOC_ID, "timestamps_mdt", DATE_3);
+ ensureFieldHasValues(UNCOMMITTED_DOC_ID, "timestamps_mdt", DATE_1, DATE_2, DATE_3);
+
+ ensureFieldHasValues(UNCOMMITTED_DOC_STR_VALUES_ID, "timestamps_mdt", DATE_1, DATE_2);
+ atomicAddDistinctValueToField(UNCOMMITTED_DOC_STR_VALUES_ID, "timestamps_mdt", DATE_3);
+ ensureFieldHasValues(UNCOMMITTED_DOC_STR_VALUES_ID, "timestamps_mdt", DATE_1, DATE_2, DATE_3);
+ }
+
+ @Test
+ public void testAtomicUpdateAddDistinctOfDuplicateValueOnDateField() throws Exception {
+ ensureFieldHasValues(COMMITTED_DOC_ID, "timestamps_mdt", DATE_1, DATE_2);
+ atomicAddDistinctValueToField(COMMITTED_DOC_ID, "timestamps_mdt", DATE_2);
+ ensureFieldHasValues(COMMITTED_DOC_ID, "timestamps_mdt", DATE_1, DATE_2);
+
+ ensureFieldHasValues(COMMITTED_DOC_STR_VALUES_ID, "timestamps_mdt", DATE_1, DATE_2);
+ atomicAddDistinctValueToField(COMMITTED_DOC_STR_VALUES_ID, "timestamps_mdt", DATE_2);
+ ensureFieldHasValues(COMMITTED_DOC_STR_VALUES_ID, "timestamps_mdt", DATE_1, DATE_2);
+
+ ensureFieldHasValues(UNCOMMITTED_DOC_ID, "timestamps_mdt", DATE_1, DATE_2);
+ atomicAddDistinctValueToField(UNCOMMITTED_DOC_ID, "timestamps_mdt", DATE_2);
+ ensureFieldHasValues(UNCOMMITTED_DOC_ID, "timestamps_mdt", DATE_1, DATE_2);
+
+ ensureFieldHasValues(UNCOMMITTED_DOC_STR_VALUES_ID, "timestamps_mdt", DATE_1, DATE_2);
+ atomicAddDistinctValueToField(UNCOMMITTED_DOC_STR_VALUES_ID, "timestamps_mdt", DATE_2);
+ ensureFieldHasValues(UNCOMMITTED_DOC_STR_VALUES_ID, "timestamps_mdt", DATE_1, DATE_2);
+ }
+
+ private void atomicRemoveValueFromField(String docId, String fieldName, Object value) throws Exception {
+ final SolrInputDocument doc = new SolrInputDocument();
+ doc.setField("id", docId);
+ Map<String, Object> atomicUpdateRemoval = new HashMap<>(1);
+ atomicUpdateRemoval.put("remove", value);
+ doc.setField(fieldName, atomicUpdateRemoval);
+
+ cluster.getSolrClient().add(COLLECTION, doc);
+ }
+
+ private void atomicAddDistinctValueToField(String docId, String fieldName, Object value) throws Exception {
+ final SolrInputDocument doc = new SolrInputDocument();
+ doc.setField("id", docId);
+ Map<String, Object> atomicUpdateRemoval = new HashMap<>(1);
+ atomicUpdateRemoval.put("add-distinct", value);
+ doc.setField(fieldName, atomicUpdateRemoval);
+
+ cluster.getSolrClient().add(COLLECTION, doc);
+ }
+
+ private void ensureFieldHasValues(String identifyingDocId, String fieldName, Object... expectedValues) throws Exception {
+ final ModifiableSolrParams solrParams = new ModifiableSolrParams();
+ solrParams.set("id", identifyingDocId);
+ QueryRequest request = new QueryRequest(solrParams);
+ request.setPath("/get");
+ final QueryResponse response = request.process(cluster.getSolrClient(), COLLECTION);
+
+ final NamedList<Object> rawResponse = response.getResponse();
+ assertTrue(rawResponse.get("doc") != null);
+ assertTrue(rawResponse.get("doc") instanceof SolrDocument);
+ final SolrDocument doc = (SolrDocument) rawResponse.get("doc");
+ final Collection<Object> valuesAfterUpdate = doc.getFieldValues(fieldName);
+ assertEquals("Expected field to have " + expectedValues.length + " values, but found " + valuesAfterUpdate.size(),
+ expectedValues.length, valuesAfterUpdate.size());
+ for (Object expectedValue: expectedValues) {
+ assertTrue("Expected value [" + expectedValue + "] was not found in field", valuesAfterUpdate.contains(expectedValue));
+ }
+ }
+}
diff --git a/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdateRemovalJavabinTest.java b/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdateRemovalJavabinTest.java
deleted file mode 100644
index c028051..0000000
--- a/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdateRemovalJavabinTest.java
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.solr.update.processor;
-
-import java.time.Instant;
-import java.util.Collection;
-import java.util.Date;
-import java.util.HashMap;
-import java.util.Map;
-
-import org.apache.solr.client.solrj.SolrClient;
-import org.apache.solr.client.solrj.SolrQuery;
-import org.apache.solr.client.solrj.request.CollectionAdminRequest;
-import org.apache.solr.client.solrj.request.UpdateRequest;
-import org.apache.solr.client.solrj.response.QueryResponse;
-import org.apache.solr.cloud.SolrCloudTestCase;
-import org.apache.solr.common.SolrDocumentList;
-import org.apache.solr.common.SolrInputDocument;
-import org.junit.BeforeClass;
-import org.junit.Test;
-
-/**
- * Tests Solr's atomic-update functionality using requests sent through SolrJ using wt=javabin
- *
- * {@link AtomicUpdatesTest} covers some of the same functionality, but does so by making xml-based requests. Recent
- * changes to Solr have made it possible for the same data sent with different formats to result in different NamedLists
- * after unmarshalling, so the test duplication is now necessary. See SOLR-13331 for an example.
- */
-public class AtomicUpdateRemovalJavabinTest extends SolrCloudTestCase {
- private static final String COLLECTION = "collection1";
- private static final int NUM_SHARDS = 1;
- private static final int NUM_REPLICAS = 1;
- private static final Date DATE_1 = new Date();
- private static final Date DATE_2 = Date.from(Instant.ofEpochSecond(1554243909));
-
- @BeforeClass
- public static void setupCluster() throws Exception {
- configureCluster(1)
- .addConfig("conf", configset("cloud-dynamic"))
- .configure();
-
- CollectionAdminRequest.createCollection(COLLECTION, "conf", NUM_SHARDS, NUM_REPLICAS)
- .process(cluster.getSolrClient());
-
- cluster.waitForActiveCollection(COLLECTION, 1, 1);
-
- final SolrInputDocument doc1 = sdoc(
- "id", "1",
- "title_s", "title_1", "title_s", "title_2",
- "tv_mv_text", "text_1", "tv_mv_text", "text_2",
- "count_is", 1, "count_is", 2,
- "count_md", 1.0, "count_md", 2.0,
- "timestamps_mdt", DATE_1, "timestamps_mdt", DATE_2);
- final UpdateRequest req = new UpdateRequest()
- .add(doc1);
- req.commit(cluster.getSolrClient(), COLLECTION);
- }
-
- @Test
- public void testAtomicUpdateRemovalOfStrField() throws Exception {
- ensureFieldHasValues("1", "title_s", "title_1", "title_2");
- atomicRemoveValueFromField("1", "title_s", "title_1");
- ensureFieldHasValues("1", "title_s", "title_2");
- }
-
- @Test
- public void testAtomicUpdateRemovalOfTextField() throws Exception {
- ensureFieldHasValues("1", "tv_mv_text", "text_1", "text_2");
- atomicRemoveValueFromField("1", "tv_mv_text", "text_1");
- ensureFieldHasValues("1", "tv_mv_text", "text_2");
- }
-
- @Test
- public void testAtomicUpdateRemovalOfIntField() throws Exception {
- ensureFieldHasValues("1", "count_is", 1, 2);
- atomicRemoveValueFromField("1", "count_is", 1);
- ensureFieldHasValues("1", "count_is", 2);
- }
-
- @Test
- public void testAtomicUpdateRemovalOfDoubleField() throws Exception {
- ensureFieldHasValues("1", "count_md", 1.0, 2.0);
- atomicRemoveValueFromField("1", "count_md", 1.0);
- ensureFieldHasValues("1", "count_md", 2.0);
- }
-
- @Test
- public void testAtomicUpdateRemovalOfDateField() throws Exception {
- ensureFieldHasValues("1", "timestamps_mdt", DATE_1, DATE_2);
- atomicRemoveValueFromField("1", "timestamps_mdt", DATE_1);
- ensureFieldHasValues("1", "timestamps_mdt", DATE_2);
- }
-
- private void atomicRemoveValueFromField(String docId, String fieldName, Object value) throws Exception {
- final SolrInputDocument doc = new SolrInputDocument();
- doc.setField("id", docId);
- Map<String, Object> atomicUpdateRemoval = new HashMap<>(1);
- atomicUpdateRemoval.put("remove", value);
- doc.setField(fieldName, atomicUpdateRemoval);
-
- cluster.getSolrClient().add(COLLECTION, doc);
- cluster.getSolrClient().commit(COLLECTION);
- }
-
- private void ensureFieldHasValues(String identifyingDocId, String fieldName, Object... expectedValues) throws Exception {
- final SolrClient client = cluster.getSolrClient();
-
- final QueryResponse response = client.query(COLLECTION, new SolrQuery("id:" + identifyingDocId));
- final SolrDocumentList docs = response.getResults();
- assertEquals(1, docs.getNumFound());
- final Collection<Object> valuesAfterUpdate = docs.get(0).getFieldValues(fieldName);
- assertEquals(expectedValues.length, valuesAfterUpdate.size());
- for (Object expectedValue: expectedValues) {
- assertTrue("Expected value [" + expectedValue + "] was not found in field", valuesAfterUpdate.contains(expectedValue));
- }
- }
-}
diff --git a/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdatesTest.java b/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdatesTest.java
index 5a15e10..54e9f9d 100644
--- a/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdatesTest.java
+++ b/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdatesTest.java
@@ -181,6 +181,22 @@
assertQ(req("q", "intRemove:[* TO *]", "indent", "true"), "//result[@numFound = '4']");
assertQ(req("q", "intRemove:111", "indent", "true"), "//result[@numFound = '3']");
+
+ // Test that mv int fields can have values removed prior to being committed to index (see SOLR-14971)
+ doc = new SolrInputDocument();
+ doc.setField("id", "4242");
+ doc.setField("values_is", new String[] {"111", "222", "333"});
+ assertU(adoc(doc));
+
+ doc = new SolrInputDocument();
+ doc.setField("id", "4242");
+ doc.setField("values_is", ImmutableMap.of("remove", 111));
+ assertU(adoc(doc));
+ assertU(commit());
+
+ assertQ(req("q", "values_is:111", "indent", "true"), "//result[@numFound = '0']");
+ assertQ(req("q", "values_is:222", "indent", "true"), "//result[@numFound = '1']");
+ assertQ(req("q", "values_is:333", "indent", "true"), "//result[@numFound = '1']");
}
@@ -251,6 +267,22 @@
assertQ(req("q", "intRemove:[* TO *]", "indent", "true"), "//result[@numFound = '4']");
assertQ(req("q", "intRemove:111", "indent", "true"), "//result[@numFound = '3']");
+
+ // Test that mv int fields can have values removed prior to being committed to index (see SOLR-14971)
+ doc = new SolrInputDocument();
+ doc.setField("id", "4242");
+ doc.setField("values_is", new Integer[] {111, 222, 333});
+ assertU(adoc(doc));
+
+ doc = new SolrInputDocument();
+ doc.setField("id", "4242");
+ doc.setField("values_is", ImmutableMap.of("remove", 111));
+ assertU(adoc(doc));
+ assertU(commit());
+
+ assertQ(req("q", "values_is:111", "indent", "true"), "//result[@numFound = '0']");
+ assertQ(req("q", "values_is:222", "indent", "true"), "//result[@numFound = '1']");
+ assertQ(req("q", "values_is:333", "indent", "true"), "//result[@numFound = '1']");
}
@Test
diff --git a/solr/docker/build.gradle b/solr/docker/build.gradle
index a4cdeea..d6e80ac 100644
--- a/solr/docker/build.gradle
+++ b/solr/docker/build.gradle
@@ -15,6 +15,9 @@
* limitations under the License.
*/
+import com.google.common.base.Preconditions
+import com.google.common.base.Strings
+
apply plugin: 'base'
apply plugin: 'com.palantir.docker'
@@ -31,13 +34,16 @@
docker dockerPackage
}
-def dockerImageName = "apache/solr:${version}"
-def baseDockerImage = 'openjdk:11-jre-slim'
+def dockerImageRepo = propertyOrEnvOrDefault("solr.docker.imageRepo", "SOLR_DOCKER_IMAGE_REPO", "apache/solr")
+def dockerImageTag = propertyOrEnvOrDefault("solr.docker.imageTag", "SOLR_DOCKER_IMAGE_TAG", "${version}")
+def dockerImageName = propertyOrEnvOrDefault("solr.docker.imageName", "SOLR_DOCKER_IMAGE_NAME", "${dockerImageRepo}:${dockerImageTag}")
+def baseDockerImage = propertyOrEnvOrDefault("solr.docker.baseImage", "SOLR_DOCKER_BASE_IMAGE", 'openjdk:11-jre-slim')
+def githubUrlOrMirror = propertyOrEnvOrDefault("solr.docker.githubUrl", "SOLR_DOCKER_GITHUB_URL", 'github.com')
docker {
name = dockerImageName
files file('include')
- buildArgs(['BASE_IMAGE' : baseDockerImage, 'SOLR_PACKAGE_IMAGE' : 'apache/solr-build:local-package', 'SOLR_VERSION': "${version}"])
+ buildArgs(['BASE_IMAGE' : baseDockerImage, 'SOLR_PACKAGE_IMAGE' : 'apache/solr-build:local-package', 'SOLR_VERSION': "${version}", 'GITHUB_URL': githubUrlOrMirror])
}
tasks.docker {
@@ -53,12 +59,22 @@
}
abstract class DockerTestSuite extends DefaultTask {
+ private String solrImageName = null;
private List<String> tests = new ArrayList<>();
private List<String> ignore = new ArrayList<>();
@OutputDirectory
abstract DirectoryProperty getOutputDir()
+ public void setSolrImageName(String solrImageName) {
+ this.solrImageName = solrImageName
+ }
+
+ public String getSolrImageName() {
+ Preconditions.checkArgument(!Strings.isNullOrEmpty(solrImageName), "solrImageName is a required dockerTests configuration item.")
+ return solrImageName
+ }
+
@Option(option = "tests", description = "Only run these specified tests, comma separated.")
public void setTests(List<String> tests) {
this.tests = tests;
@@ -81,6 +97,8 @@
@TaskAction
void execute() {
+ // Print information on the image before it is tested
+ project.logger.lifecycle("Testing Solr Image: $solrImageName\n")
def sourceDir = project.file("tests/cases")
sourceDir.eachFile { file ->
def testName = file.getName()
@@ -92,7 +110,7 @@
project.exec {
environment "TEST_DIR", "$file"
environment "BUILD_DIR", "$testCaseBuildDir"
- commandLine "bash", "$file/test.sh", "apache/solr:${project.version}"
+ commandLine "bash", "$file/test.sh", solrImageName
}
}
}
@@ -101,4 +119,5 @@
task testDocker(type: DockerTestSuite) {
outputDir = project.file("$buildDir/tmp/tests")
+ solrImageName = dockerImageName
}
\ No newline at end of file
diff --git a/solr/docker/tests/cases/version/test.sh b/solr/docker/tests/cases/version/test.sh
deleted file mode 100755
index 0f8c7ee..0000000
--- a/solr/docker/tests/cases/version/test.sh
+++ /dev/null
@@ -1,45 +0,0 @@
-#!/bin/bash
-#
-set -euo pipefail
-
-TEST_DIR="${TEST_DIR:-$(dirname -- "${BASH_SOURCE[0]}")}"
-source "${TEST_DIR}/../../shared.sh"
-
-echo "Running $container_name"
-docker run --name "$container_name" -d "$tag"
-
-wait_for_server_started "$container_name"
-
-echo "Checking that the OS matches the tag '$tag'"
-if echo "$tag" | grep -q -- -alpine; then
- alpine_version=$(docker exec --user=solr "$container_name" cat /etc/alpine-release || true)
- if [[ -z $alpine_version ]]; then
- echo "Could not get alpine version from container $container_name"
- container_cleanup "$container_name"
- exit 1
- fi
- echo "Alpine $alpine_version"
-else
- debian_version=$(docker exec --user=solr "$container_name" cat /etc/debian_version || true)
- if [[ -z $debian_version ]]; then
- echo "Could not get debian version from container $container_name"
- container_cleanup "$container_name"
- exit 1
- fi
- echo "Debian $debian_version"
-fi
-
-# check that the version of Solr matches the tag
-changelog_version=$(docker exec --user=solr "$container_name" bash -c "grep -E '^==========* ' /opt/solr/CHANGES.txt | head -n 1 | tr -d '= '")
-echo "Solr version $changelog_version"
-solr_version_from_tag=$(echo "$tag" | sed -e 's/^.*://' -e 's/-.*//')
-
-if [[ $changelog_version != "$solr_version_from_tag" ]]; then
- echo "Solr version mismatch"
- container_cleanup "$container_name"
- exit 1
-fi
-
-container_cleanup "$container_name"
-
-echo "Test $TEST_NAME $tag succeeded"
diff --git a/solr/licenses/http2-client-9.4.32.v20200930.jar.sha1 b/solr/licenses/http2-client-9.4.32.v20200930.jar.sha1
deleted file mode 100644
index 4635454..0000000
--- a/solr/licenses/http2-client-9.4.32.v20200930.jar.sha1
+++ /dev/null
@@ -1 +0,0 @@
-2908966fd6b108bf7fbcc767edad6b92ed9fcac4
diff --git a/solr/licenses/http2-client-9.4.34.v20201102.jar.sha1 b/solr/licenses/http2-client-9.4.34.v20201102.jar.sha1
new file mode 100644
index 0000000..1c048a7
--- /dev/null
+++ b/solr/licenses/http2-client-9.4.34.v20201102.jar.sha1
@@ -0,0 +1 @@
+854ea016e9d02ca88d575c1600cf2f0629b6556f
diff --git a/solr/licenses/http2-common-9.4.32.v20200930.jar.sha1 b/solr/licenses/http2-common-9.4.32.v20200930.jar.sha1
deleted file mode 100644
index 9e9611c..0000000
--- a/solr/licenses/http2-common-9.4.32.v20200930.jar.sha1
+++ /dev/null
@@ -1 +0,0 @@
-8063644e52b75d324a125323ea24d2eabbd09a5d
diff --git a/solr/licenses/http2-common-9.4.34.v20201102.jar.sha1 b/solr/licenses/http2-common-9.4.34.v20201102.jar.sha1
new file mode 100644
index 0000000..e21fb11
--- /dev/null
+++ b/solr/licenses/http2-common-9.4.34.v20201102.jar.sha1
@@ -0,0 +1 @@
+37747b3e903eacb8532d8fbc7c15540086f05835
diff --git a/solr/licenses/http2-hpack-9.4.32.v20200930.jar.sha1 b/solr/licenses/http2-hpack-9.4.32.v20200930.jar.sha1
deleted file mode 100644
index d828699..0000000
--- a/solr/licenses/http2-hpack-9.4.32.v20200930.jar.sha1
+++ /dev/null
@@ -1 +0,0 @@
-7be7989699e4fb6c9ec77d49c8080564a347595d
diff --git a/solr/licenses/http2-hpack-9.4.34.v20201102.jar.sha1 b/solr/licenses/http2-hpack-9.4.34.v20201102.jar.sha1
new file mode 100644
index 0000000..1fe1dc7
--- /dev/null
+++ b/solr/licenses/http2-hpack-9.4.34.v20201102.jar.sha1
@@ -0,0 +1 @@
+4adaf9b343d17a786b5f26bf5c7067d4b0958164
diff --git a/solr/licenses/http2-http-client-transport-9.4.32.v20200930.jar.sha1 b/solr/licenses/http2-http-client-transport-9.4.32.v20200930.jar.sha1
deleted file mode 100644
index 674a1fc..0000000
--- a/solr/licenses/http2-http-client-transport-9.4.32.v20200930.jar.sha1
+++ /dev/null
@@ -1 +0,0 @@
-9753d2249f0c0e95e27accfa271555e1426d8e8f
diff --git a/solr/licenses/http2-http-client-transport-9.4.34.v20201102.jar.sha1 b/solr/licenses/http2-http-client-transport-9.4.34.v20201102.jar.sha1
new file mode 100644
index 0000000..d46467c
--- /dev/null
+++ b/solr/licenses/http2-http-client-transport-9.4.34.v20201102.jar.sha1
@@ -0,0 +1 @@
+1b4f1e7d279d9fb7ac4f37def8d23832172db476
diff --git a/solr/licenses/http2-server-9.4.32.v20200930.jar.sha1 b/solr/licenses/http2-server-9.4.32.v20200930.jar.sha1
deleted file mode 100644
index 87aef14..0000000
--- a/solr/licenses/http2-server-9.4.32.v20200930.jar.sha1
+++ /dev/null
@@ -1 +0,0 @@
-940412646b103231437f768a174ccde744d70171
diff --git a/solr/licenses/http2-server-9.4.34.v20201102.jar.sha1 b/solr/licenses/http2-server-9.4.34.v20201102.jar.sha1
new file mode 100644
index 0000000..2909455
--- /dev/null
+++ b/solr/licenses/http2-server-9.4.34.v20201102.jar.sha1
@@ -0,0 +1 @@
+eb2d1caf2772d0ea21d2c41aa77cb3e57d682c82
diff --git a/solr/licenses/jetty-alpn-client-9.4.32.v20200930.jar.sha1 b/solr/licenses/jetty-alpn-client-9.4.32.v20200930.jar.sha1
deleted file mode 100644
index 1fc1785..0000000
--- a/solr/licenses/jetty-alpn-client-9.4.32.v20200930.jar.sha1
+++ /dev/null
@@ -1 +0,0 @@
-90ffeef16f1259f2c0384b1c2417b3a9b049d82d
diff --git a/solr/licenses/jetty-alpn-client-9.4.34.v20201102.jar.sha1 b/solr/licenses/jetty-alpn-client-9.4.34.v20201102.jar.sha1
new file mode 100644
index 0000000..516be7f
--- /dev/null
+++ b/solr/licenses/jetty-alpn-client-9.4.34.v20201102.jar.sha1
@@ -0,0 +1 @@
+2b741945279551477cd038d9acefa34190f4efad
diff --git a/solr/licenses/jetty-alpn-java-client-9.4.32.v20200930.jar.sha1 b/solr/licenses/jetty-alpn-java-client-9.4.32.v20200930.jar.sha1
deleted file mode 100644
index f5142c4..0000000
--- a/solr/licenses/jetty-alpn-java-client-9.4.32.v20200930.jar.sha1
+++ /dev/null
@@ -1 +0,0 @@
-9787a29bb7677d30c0d43a3b34ad0f28d1a558a4
diff --git a/solr/licenses/jetty-alpn-java-client-9.4.34.v20201102.jar.sha1 b/solr/licenses/jetty-alpn-java-client-9.4.34.v20201102.jar.sha1
new file mode 100644
index 0000000..a2c9fa8
--- /dev/null
+++ b/solr/licenses/jetty-alpn-java-client-9.4.34.v20201102.jar.sha1
@@ -0,0 +1 @@
+2717d00a3e2e79304d3e2e620d10edab993715f9
diff --git a/solr/licenses/jetty-alpn-java-server-9.4.32.v20200930.jar.sha1 b/solr/licenses/jetty-alpn-java-server-9.4.32.v20200930.jar.sha1
deleted file mode 100644
index 7c5ee13..0000000
--- a/solr/licenses/jetty-alpn-java-server-9.4.32.v20200930.jar.sha1
+++ /dev/null
@@ -1 +0,0 @@
-cd21d62e95e5c438ea47404d8f11bbfba4af0aad
diff --git a/solr/licenses/jetty-alpn-java-server-9.4.34.v20201102.jar.sha1 b/solr/licenses/jetty-alpn-java-server-9.4.34.v20201102.jar.sha1
new file mode 100644
index 0000000..22a8586
--- /dev/null
+++ b/solr/licenses/jetty-alpn-java-server-9.4.34.v20201102.jar.sha1
@@ -0,0 +1 @@
+3b11b75b11f1e3b8230a56eefb175aabed89aad7
diff --git a/solr/licenses/jetty-alpn-server-9.4.32.v20200930.jar.sha1 b/solr/licenses/jetty-alpn-server-9.4.32.v20200930.jar.sha1
deleted file mode 100644
index d7d287f..0000000
--- a/solr/licenses/jetty-alpn-server-9.4.32.v20200930.jar.sha1
+++ /dev/null
@@ -1 +0,0 @@
-9b6bf18bc863d84764b3f7f542da0c3aa4467c41
diff --git a/solr/licenses/jetty-alpn-server-9.4.34.v20201102.jar.sha1 b/solr/licenses/jetty-alpn-server-9.4.34.v20201102.jar.sha1
new file mode 100644
index 0000000..46d541d
--- /dev/null
+++ b/solr/licenses/jetty-alpn-server-9.4.34.v20201102.jar.sha1
@@ -0,0 +1 @@
+ddac4f9a5c55dcd9c48a9672b5e6abac533c0e21
diff --git a/solr/licenses/jetty-client-9.4.32.v20200930.jar.sha1 b/solr/licenses/jetty-client-9.4.32.v20200930.jar.sha1
deleted file mode 100644
index b1c61d8..0000000
--- a/solr/licenses/jetty-client-9.4.32.v20200930.jar.sha1
+++ /dev/null
@@ -1 +0,0 @@
-df978763f12cf7ec74ab60c9b4769b6968ee2987
diff --git a/solr/licenses/jetty-client-9.4.34.v20201102.jar.sha1 b/solr/licenses/jetty-client-9.4.34.v20201102.jar.sha1
new file mode 100644
index 0000000..8af5418
--- /dev/null
+++ b/solr/licenses/jetty-client-9.4.34.v20201102.jar.sha1
@@ -0,0 +1 @@
+b34fa346d2e3a459d4c30d533e28ccf2aee3f8a8
diff --git a/solr/licenses/jetty-continuation-9.4.32.v20200930.jar.sha1 b/solr/licenses/jetty-continuation-9.4.32.v20200930.jar.sha1
deleted file mode 100644
index 681e97a..0000000
--- a/solr/licenses/jetty-continuation-9.4.32.v20200930.jar.sha1
+++ /dev/null
@@ -1 +0,0 @@
-b46713a1b8b2baf951f6514dd621c5a546254d6c
diff --git a/solr/licenses/jetty-continuation-9.4.34.v20201102.jar.sha1 b/solr/licenses/jetty-continuation-9.4.34.v20201102.jar.sha1
new file mode 100644
index 0000000..9cfe056
--- /dev/null
+++ b/solr/licenses/jetty-continuation-9.4.34.v20201102.jar.sha1
@@ -0,0 +1 @@
+070923c6b55dcabd4bde53971554261048844b3f
diff --git a/solr/licenses/jetty-deploy-9.4.32.v20200930.jar.sha1 b/solr/licenses/jetty-deploy-9.4.32.v20200930.jar.sha1
deleted file mode 100644
index 46b1e93..0000000
--- a/solr/licenses/jetty-deploy-9.4.32.v20200930.jar.sha1
+++ /dev/null
@@ -1 +0,0 @@
-e63e0fcc4afb55527666662c039edd9779ec413d
diff --git a/solr/licenses/jetty-deploy-9.4.34.v20201102.jar.sha1 b/solr/licenses/jetty-deploy-9.4.34.v20201102.jar.sha1
new file mode 100644
index 0000000..224b8b6
--- /dev/null
+++ b/solr/licenses/jetty-deploy-9.4.34.v20201102.jar.sha1
@@ -0,0 +1 @@
+6d0f0eefa7f210eab166ac2ec03d3d329b622632
diff --git a/solr/licenses/jetty-http-9.4.32.v20200930.jar.sha1 b/solr/licenses/jetty-http-9.4.32.v20200930.jar.sha1
deleted file mode 100644
index 0e08fdd..0000000
--- a/solr/licenses/jetty-http-9.4.32.v20200930.jar.sha1
+++ /dev/null
@@ -1 +0,0 @@
-5fdcefd82178d11f895690f4fe6e843be69394b3
diff --git a/solr/licenses/jetty-http-9.4.34.v20201102.jar.sha1 b/solr/licenses/jetty-http-9.4.34.v20201102.jar.sha1
new file mode 100644
index 0000000..c44b514
--- /dev/null
+++ b/solr/licenses/jetty-http-9.4.34.v20201102.jar.sha1
@@ -0,0 +1 @@
+7acb9ea4deaba904a49e304ac24f9b3a8ddb5881
diff --git a/solr/licenses/jetty-io-9.4.32.v20200930.jar.sha1 b/solr/licenses/jetty-io-9.4.32.v20200930.jar.sha1
deleted file mode 100644
index 1a59673..0000000
--- a/solr/licenses/jetty-io-9.4.32.v20200930.jar.sha1
+++ /dev/null
@@ -1 +0,0 @@
-0d0f32c3b511d6b3a542787f95ed229731588810
diff --git a/solr/licenses/jetty-io-9.4.34.v20201102.jar.sha1 b/solr/licenses/jetty-io-9.4.34.v20201102.jar.sha1
new file mode 100644
index 0000000..46b17d2
--- /dev/null
+++ b/solr/licenses/jetty-io-9.4.34.v20201102.jar.sha1
@@ -0,0 +1 @@
+80693ce94fa34647e9af613ba17c443feb624590
diff --git a/solr/licenses/jetty-jmx-9.4.32.v20200930.jar.sha1 b/solr/licenses/jetty-jmx-9.4.32.v20200930.jar.sha1
deleted file mode 100644
index bb1d733..0000000
--- a/solr/licenses/jetty-jmx-9.4.32.v20200930.jar.sha1
+++ /dev/null
@@ -1 +0,0 @@
-5e8e87a6f89b8eabf5b5b1765e3d758209001570
diff --git a/solr/licenses/jetty-jmx-9.4.34.v20201102.jar.sha1 b/solr/licenses/jetty-jmx-9.4.34.v20201102.jar.sha1
new file mode 100644
index 0000000..dd20013
--- /dev/null
+++ b/solr/licenses/jetty-jmx-9.4.34.v20201102.jar.sha1
@@ -0,0 +1 @@
+a0a065ed8acfd7a29712b22a16485c96fbdc9af8
diff --git a/solr/licenses/jetty-rewrite-9.4.32.v20200930.jar.sha1 b/solr/licenses/jetty-rewrite-9.4.32.v20200930.jar.sha1
deleted file mode 100644
index e56569f..0000000
--- a/solr/licenses/jetty-rewrite-9.4.32.v20200930.jar.sha1
+++ /dev/null
@@ -1 +0,0 @@
-53cf27a75d0c850a5e90543a86f2cac70d8d8b17
diff --git a/solr/licenses/jetty-rewrite-9.4.34.v20201102.jar.sha1 b/solr/licenses/jetty-rewrite-9.4.34.v20201102.jar.sha1
new file mode 100644
index 0000000..24275d1
--- /dev/null
+++ b/solr/licenses/jetty-rewrite-9.4.34.v20201102.jar.sha1
@@ -0,0 +1 @@
+87a350e23851a8d326defc2d4259e4e6846cb490
diff --git a/solr/licenses/jetty-security-9.4.32.v20200930.jar.sha1 b/solr/licenses/jetty-security-9.4.32.v20200930.jar.sha1
deleted file mode 100644
index aeed0dc..0000000
--- a/solr/licenses/jetty-security-9.4.32.v20200930.jar.sha1
+++ /dev/null
@@ -1 +0,0 @@
-16a6110fa40e49050146de5f597ab3a3a3fa83b5
diff --git a/solr/licenses/jetty-security-9.4.34.v20201102.jar.sha1 b/solr/licenses/jetty-security-9.4.34.v20201102.jar.sha1
new file mode 100644
index 0000000..d220aed
--- /dev/null
+++ b/solr/licenses/jetty-security-9.4.34.v20201102.jar.sha1
@@ -0,0 +1 @@
+bf4d5bb4deb938ed58813bcae75c6f1b82453df1
diff --git a/solr/licenses/jetty-server-9.4.32.v20200930.jar.sha1 b/solr/licenses/jetty-server-9.4.32.v20200930.jar.sha1
deleted file mode 100644
index 81c447c..0000000
--- a/solr/licenses/jetty-server-9.4.32.v20200930.jar.sha1
+++ /dev/null
@@ -1 +0,0 @@
-d2d89099be5237cf68254bc943a7d800d3ee1945
diff --git a/solr/licenses/jetty-server-9.4.34.v20201102.jar.sha1 b/solr/licenses/jetty-server-9.4.34.v20201102.jar.sha1
new file mode 100644
index 0000000..bd9bf9f
--- /dev/null
+++ b/solr/licenses/jetty-server-9.4.34.v20201102.jar.sha1
@@ -0,0 +1 @@
+19c5309325d5819a9e22bfee66a3c0d50750ed03
diff --git a/solr/licenses/jetty-servlet-9.4.32.v20200930.jar.sha1 b/solr/licenses/jetty-servlet-9.4.32.v20200930.jar.sha1
deleted file mode 100644
index e23407d..0000000
--- a/solr/licenses/jetty-servlet-9.4.32.v20200930.jar.sha1
+++ /dev/null
@@ -1 +0,0 @@
-4253dd46c099e0bca4dd763fc1e10774e10de00a
diff --git a/solr/licenses/jetty-servlet-9.4.34.v20201102.jar.sha1 b/solr/licenses/jetty-servlet-9.4.34.v20201102.jar.sha1
new file mode 100644
index 0000000..822f159
--- /dev/null
+++ b/solr/licenses/jetty-servlet-9.4.34.v20201102.jar.sha1
@@ -0,0 +1 @@
+606f9724b14bf58c915ee0e37f6425c52dae7b76
diff --git a/solr/licenses/jetty-servlets-9.4.32.v20200930.jar.sha1 b/solr/licenses/jetty-servlets-9.4.32.v20200930.jar.sha1
deleted file mode 100644
index 2d97555..0000000
--- a/solr/licenses/jetty-servlets-9.4.32.v20200930.jar.sha1
+++ /dev/null
@@ -1 +0,0 @@
-791a484aaa29871fb8e19bf28bf55b60100f6c50
diff --git a/solr/licenses/jetty-servlets-9.4.34.v20201102.jar.sha1 b/solr/licenses/jetty-servlets-9.4.34.v20201102.jar.sha1
new file mode 100644
index 0000000..b83ff99
--- /dev/null
+++ b/solr/licenses/jetty-servlets-9.4.34.v20201102.jar.sha1
@@ -0,0 +1 @@
+60220df1900bfe624b8d5e9fad4d3da755bd5c6a
diff --git a/solr/licenses/jetty-start-9.4.27.v20200227-shaded.jar.sha1 b/solr/licenses/jetty-start-9.4.27.v20200227-shaded.jar.sha1
deleted file mode 100644
index ad1ad8d..0000000
--- a/solr/licenses/jetty-start-9.4.27.v20200227-shaded.jar.sha1
+++ /dev/null
@@ -1 +0,0 @@
-d5aa7d9c9cb261a9b4c460c918fd972d1a9882ed
diff --git a/solr/licenses/jetty-start-9.4.34.v20201102-shaded.jar.sha1 b/solr/licenses/jetty-start-9.4.34.v20201102-shaded.jar.sha1
new file mode 100644
index 0000000..4d6bbe7
--- /dev/null
+++ b/solr/licenses/jetty-start-9.4.34.v20201102-shaded.jar.sha1
@@ -0,0 +1 @@
+8bcce89a62de93e4c5ff2cd77de6d461c79df83e
diff --git a/solr/licenses/jetty-util-9.4.32.v20200930.jar.sha1 b/solr/licenses/jetty-util-9.4.32.v20200930.jar.sha1
deleted file mode 100644
index e59c8f4..0000000
--- a/solr/licenses/jetty-util-9.4.32.v20200930.jar.sha1
+++ /dev/null
@@ -1 +0,0 @@
-efefd29006dcc9c9960a679263504287ce4e6896
diff --git a/solr/licenses/jetty-util-9.4.34.v20201102.jar.sha1 b/solr/licenses/jetty-util-9.4.34.v20201102.jar.sha1
new file mode 100644
index 0000000..5b96d84
--- /dev/null
+++ b/solr/licenses/jetty-util-9.4.34.v20201102.jar.sha1
@@ -0,0 +1 @@
+47993d1def63ca9e8bc7284716a89031f642db82
diff --git a/solr/licenses/jetty-webapp-9.4.32.v20200930.jar.sha1 b/solr/licenses/jetty-webapp-9.4.32.v20200930.jar.sha1
deleted file mode 100644
index 7677ef7..0000000
--- a/solr/licenses/jetty-webapp-9.4.32.v20200930.jar.sha1
+++ /dev/null
@@ -1 +0,0 @@
-2d97dfa12dfbf9a7151ad5f4346daf676fb242a5
diff --git a/solr/licenses/jetty-webapp-9.4.34.v20201102.jar.sha1 b/solr/licenses/jetty-webapp-9.4.34.v20201102.jar.sha1
new file mode 100644
index 0000000..aa39816
--- /dev/null
+++ b/solr/licenses/jetty-webapp-9.4.34.v20201102.jar.sha1
@@ -0,0 +1 @@
+a01524d9e59277e6849136a245056d91018a0988
diff --git a/solr/licenses/jetty-xml-9.4.32.v20200930.jar.sha1 b/solr/licenses/jetty-xml-9.4.32.v20200930.jar.sha1
deleted file mode 100644
index d601659..0000000
--- a/solr/licenses/jetty-xml-9.4.32.v20200930.jar.sha1
+++ /dev/null
@@ -1 +0,0 @@
-ed0a7f3c39ce1b15573018b6c162e8612cf5d6ee
diff --git a/solr/licenses/jetty-xml-9.4.34.v20201102.jar.sha1 b/solr/licenses/jetty-xml-9.4.34.v20201102.jar.sha1
new file mode 100644
index 0000000..b02a15b
--- /dev/null
+++ b/solr/licenses/jetty-xml-9.4.34.v20201102.jar.sha1
@@ -0,0 +1 @@
+72c779fe9ae051086ddce8dc01745ffd22646f60
diff --git a/solr/solr-ref-guide/src/collection-management.adoc b/solr/solr-ref-guide/src/collection-management.adoc
index c1d3af0..14e32b6 100644
--- a/solr/solr-ref-guide/src/collection-management.adoc
+++ b/solr/solr-ref-guide/src/collection-management.adoc
@@ -90,6 +90,11 @@
`property._name_=_value_`::
Set core property _name_ to _value_. See the section <<defining-core-properties.adoc#defining-core-properties,Defining core.properties>> for details on supported properties and values.
+[WARNING]
+====
+The entries in each core.properties file are vital for Solr to function correctly. Overriding entries can result in unusable collections. Altering these entries by specifying `property._name_=_value_` is an expert-level option and should only be used if you have a thorough understanding of the consequences.
+====
+
`async`::
Request ID to track this action which will be <<collections-api.adoc#asynchronous-calls,processed asynchronously>>.
diff --git a/solr/solr-ref-guide/src/metrics-reporting.adoc b/solr/solr-ref-guide/src/metrics-reporting.adoc
index 3516475..6720359 100644
--- a/solr/solr-ref-guide/src/metrics-reporting.adoc
+++ b/solr/solr-ref-guide/src/metrics-reporting.adoc
@@ -32,6 +32,45 @@
There is also a dedicated `/admin/metrics` handler that can be queried to report all or a subset of the current metrics from multiple registries.
+=== Missing metrics
+Long-lived metrics values are still reported when the underlying value is unavailable (eg. "INDEX.sizeInBytes" when
+IndexReader is closed). Short-lived transient metrics (such as cache entries) that are properties of complex gauges
+(internally represented as `MetricsMap`) are simply skipped when not available, and neither their names nor values
+appear in registries (or in /admin/metrics reports).
+
+When a missing value is encountered by default it's reported as null value, regardless of the metrics type.
+This can be configured in the `solr.xml:/solr/metrics/missingValues` element, which recognizes the following child elements
+(for string elements a JSON payload is supported):
+
+`nullNumber`::
+value to use when a missing (null) numeric metrics value is encountered.
+
+`notANumber`::
+value to use when an invalid numeric value is encountered.
+
+`nullString`::
+value to use when a missing (null) string metrics is encountered.
+
+`nullObject`::
+value to use when a missing (null) complex object is encountered.
+
+Example configuration that returns null for missing numbers, -1 for
+invalid numeric values, empty string for missing strings, and a Map for missing
+complex objects:
+
+[source,xml]
+----
+<metrics>
+ <missingValues>
+ <null name="nullNumber"/>
+ <int name="notANumber">-1</int>
+ <str name="nullString"></str>
+ <str name="nullObject">{"value":"missing"}</str>
+ </missingValues>
+</metrics>
+----
+
+
== Metric Registries
Solr includes multiple metric registries, which group related metrics.
diff --git a/solr/solr-ref-guide/src/replica-management.adoc b/solr/solr-ref-guide/src/replica-management.adoc
index 6896995..14dc264 100644
--- a/solr/solr-ref-guide/src/replica-management.adoc
+++ b/solr/solr-ref-guide/src/replica-management.adoc
@@ -78,6 +78,11 @@
`property._name_=_value_`::
Set core property _name_ to _value_. See <<defining-core-properties.adoc#defining-core-properties,Defining core.properties>> for details about supported properties and values.
+[WARNING]
+====
+The entries in each core.properties file are vital for Solr to function correctly. Overriding entries can result in unusable collections. Altering these entries by specifying `property._name_=_value_` is an expert-level option and should only be used if you have a thorough understanding of the consequences.
+====
+
`waitForFinalState`::
If `true`, the request will complete only when all affected replicas become active. The default is `false`, which means that the API will return the status of the single action, which may be before the new replica is online and active.
diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/request/TestCoreAdmin.java b/solr/solrj/src/test/org/apache/solr/client/solrj/request/TestCoreAdmin.java
index fb0073a..1b4ecc3 100644
--- a/solr/solrj/src/test/org/apache/solr/client/solrj/request/TestCoreAdmin.java
+++ b/solr/solrj/src/test/org/apache/solr/client/solrj/request/TestCoreAdmin.java
@@ -193,6 +193,7 @@
names = cores.getAllCoreNames();
assertFalse(names.toString(), names.contains("coreRenamed"));
assertTrue(names.toString(), names.contains("core1"));
+ assertEquals(names.size(), cores.getNumAllCores());
}
@Test
diff --git a/solr/solrj/src/test/org/apache/solr/common/cloud/TestZkMaintenanceUtils.java b/solr/solrj/src/test/org/apache/solr/common/cloud/TestZkMaintenanceUtils.java
index 3f6b6d7..661844a 100644
--- a/solr/solrj/src/test/org/apache/solr/common/cloud/TestZkMaintenanceUtils.java
+++ b/solr/solrj/src/test/org/apache/solr/common/cloud/TestZkMaintenanceUtils.java
@@ -28,8 +28,6 @@
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.cloud.ZkTestServer;
-import org.apache.solr.common.cloud.SolrZkClient;
-import org.apache.solr.common.cloud.ZkMaintenanceUtils;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException;
import org.junit.AfterClass;
diff --git a/versions.lock b/versions.lock
index 9108710..5727718 100644
--- a/versions.lock
+++ b/versions.lock
@@ -137,29 +137,29 @@
org.codehaus.mojo:animal-sniffer-annotations:1.14 (1 constraints: ea09d5aa)
org.codehaus.woodstox:stax2-api:3.1.4 (2 constraints: 241635f1)
org.codehaus.woodstox:woodstox-core-asl:4.4.1 (1 constraints: 0b050c36)
-org.eclipse.jetty:jetty-alpn-client:9.4.32.v20200930 (3 constraints: bc2c9ffb)
-org.eclipse.jetty:jetty-alpn-java-client:9.4.32.v20200930 (1 constraints: 7b071d7d)
-org.eclipse.jetty:jetty-alpn-java-server:9.4.32.v20200930 (1 constraints: 7b071d7d)
-org.eclipse.jetty:jetty-alpn-server:9.4.32.v20200930 (2 constraints: 161b3fdc)
-org.eclipse.jetty:jetty-client:9.4.32.v20200930 (1 constraints: c617f2ad)
-org.eclipse.jetty:jetty-continuation:9.4.32.v20200930 (2 constraints: 5018e7f9)
-org.eclipse.jetty:jetty-deploy:9.4.32.v20200930 (1 constraints: 7b071d7d)
-org.eclipse.jetty:jetty-http:9.4.32.v20200930 (5 constraints: 66490b40)
-org.eclipse.jetty:jetty-io:9.4.32.v20200930 (8 constraints: d27dea06)
-org.eclipse.jetty:jetty-jmx:9.4.32.v20200930 (1 constraints: 7b071d7d)
-org.eclipse.jetty:jetty-rewrite:9.4.32.v20200930 (1 constraints: 7b071d7d)
-org.eclipse.jetty:jetty-security:9.4.32.v20200930 (2 constraints: dd17a8dc)
-org.eclipse.jetty:jetty-server:9.4.32.v20200930 (6 constraints: 995e4fe5)
-org.eclipse.jetty:jetty-servlet:9.4.32.v20200930 (2 constraints: 57170cbe)
-org.eclipse.jetty:jetty-servlets:9.4.32.v20200930 (1 constraints: 7b071d7d)
-org.eclipse.jetty:jetty-util:9.4.32.v20200930 (7 constraints: 4264e8e9)
-org.eclipse.jetty:jetty-webapp:9.4.32.v20200930 (2 constraints: 651712bf)
-org.eclipse.jetty:jetty-xml:9.4.32.v20200930 (3 constraints: 4127f11b)
-org.eclipse.jetty.http2:http2-client:9.4.32.v20200930 (2 constraints: 3d1f223d)
-org.eclipse.jetty.http2:http2-common:9.4.32.v20200930 (3 constraints: 0c2b9619)
-org.eclipse.jetty.http2:http2-hpack:9.4.32.v20200930 (2 constraints: 4019315a)
-org.eclipse.jetty.http2:http2-http-client-transport:9.4.32.v20200930 (1 constraints: 7807027d)
-org.eclipse.jetty.http2:http2-server:9.4.32.v20200930 (1 constraints: 7807027d)
+org.eclipse.jetty:jetty-alpn-client:9.4.34.v20201102 (3 constraints: a72c1af6)
+org.eclipse.jetty:jetty-alpn-java-client:9.4.34.v20201102 (1 constraints: 7207007d)
+org.eclipse.jetty:jetty-alpn-java-server:9.4.34.v20201102 (1 constraints: 7207007d)
+org.eclipse.jetty:jetty-alpn-server:9.4.34.v20201102 (2 constraints: 071b04da)
+org.eclipse.jetty:jetty-client:9.4.34.v20201102 (1 constraints: c017f0ad)
+org.eclipse.jetty:jetty-continuation:9.4.34.v20201102 (2 constraints: 4118f4f7)
+org.eclipse.jetty:jetty-deploy:9.4.34.v20201102 (1 constraints: 7207007d)
+org.eclipse.jetty:jetty-http:9.4.34.v20201102 (5 constraints: 45492531)
+org.eclipse.jetty:jetty-io:9.4.34.v20201102 (8 constraints: 9f7d0ae0)
+org.eclipse.jetty:jetty-jmx:9.4.34.v20201102 (1 constraints: 7207007d)
+org.eclipse.jetty:jetty-rewrite:9.4.34.v20201102 (1 constraints: 7207007d)
+org.eclipse.jetty:jetty-security:9.4.34.v20201102 (2 constraints: ce17beda)
+org.eclipse.jetty:jetty-server:9.4.34.v20201102 (6 constraints: 785e1ad4)
+org.eclipse.jetty:jetty-servlet:9.4.34.v20201102 (2 constraints: 48172bbc)
+org.eclipse.jetty:jetty-servlets:9.4.34.v20201102 (1 constraints: 7207007d)
+org.eclipse.jetty:jetty-util:9.4.34.v20201102 (7 constraints: 156496cd)
+org.eclipse.jetty:jetty-webapp:9.4.34.v20201102 (2 constraints: 561731bd)
+org.eclipse.jetty:jetty-xml:9.4.34.v20201102 (3 constraints: 2c272017)
+org.eclipse.jetty.http2:http2-client:9.4.34.v20201102 (2 constraints: 311f743b)
+org.eclipse.jetty.http2:http2-common:9.4.34.v20201102 (3 constraints: fa2aa015)
+org.eclipse.jetty.http2:http2-hpack:9.4.34.v20201102 (2 constraints: 3419dd58)
+org.eclipse.jetty.http2:http2-http-client-transport:9.4.34.v20201102 (1 constraints: 7207007d)
+org.eclipse.jetty.http2:http2-server:9.4.34.v20201102 (1 constraints: 7207007d)
org.gagravarr:vorbis-java-core:0.8 (1 constraints: ac041f2c)
org.gagravarr:vorbis-java-tika:0.8 (1 constraints: ac041f2c)
org.hamcrest:hamcrest:2.2 (1 constraints: a8041f2c)
@@ -204,6 +204,8 @@
org.apache.kerby:kerb-simplekdc:1.0.1 (1 constraints: 0405f135)
org.apache.kerby:kerby-kdc:1.0.1 (1 constraints: 0405f135)
org.apache.logging.log4j:log4j-1.2-api:2.13.2 (1 constraints: 3a053a3b)
+org.apache.lucene:lucene-codecs:8.6.3 (1 constraints: 13052836)
+org.apache.lucene:lucene-core:8.6.3 (1 constraints: 7f0d022f)
org.asciidoctor:asciidoctorj:1.6.2 (1 constraints: 0b050436)
org.asciidoctor:asciidoctorj-api:1.6.2 (1 constraints: e30cfb0d)
org.hsqldb:hsqldb:2.4.0 (1 constraints: 08050136)
diff --git a/versions.props b/versions.props
index 981f095..ab51fb6f 100644
--- a/versions.props
+++ b/versions.props
@@ -82,8 +82,8 @@
org.codehaus.janino:*=3.0.9
org.codehaus.woodstox:stax2-api=3.1.4
org.codehaus.woodstox:woodstox-core-asl=4.4.1
-org.eclipse.jetty.http2:*=9.4.32.v20200930
-org.eclipse.jetty:*=9.4.27.v20200227
+org.eclipse.jetty.http2:*=9.4.34.v20201102
+org.eclipse.jetty:*=9.4.34.v20201102
org.gagravarr:*=0.8
org.hamcrest:*=2.2
org.hsqldb:hsqldb=2.4.0