Merge branch 'stabilize_benchmark'
diff --git a/buildSrc/src/main/java/org/apache/lucene/gradle/datasets/ExtractReuters.java b/buildSrc/src/main/java/org/apache/lucene/gradle/datasets/ExtractReuters.java
index b8d6735..34f046f 100644
--- a/buildSrc/src/main/java/org/apache/lucene/gradle/datasets/ExtractReuters.java
+++ b/buildSrc/src/main/java/org/apache/lucene/gradle/datasets/ExtractReuters.java
@@ -27,6 +27,7 @@
import java.nio.file.StandardCopyOption;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import java.util.stream.Stream;
/**
* Split the Reuters SGML documents into Simple Text files containing:
@@ -44,9 +45,10 @@
public void extract() throws IOException {
long count = 0;
Files.createDirectories(outputDir);
-
- if (Files.list(outputDir).count() > 0) {
- throw new IOException("The output directory must be empty: " + outputDir);
+ try(Stream<Path> files = Files.list(outputDir)) {
+ if (files.count() > 0) {
+ throw new IOException("The output directory must be empty: " + outputDir);
+ }
}
try (DirectoryStream<Path> stream = Files.newDirectoryStream(reutersDir, "*.sgm")) {
diff --git a/dev-tools/scripts/releaseWizard.py b/dev-tools/scripts/releaseWizard.py
index b57eefb..2fe72a6 100755
--- a/dev-tools/scripts/releaseWizard.py
+++ b/dev-tools/scripts/releaseWizard.py
@@ -63,7 +63,6 @@
import scriptutil
from consolemenu import ConsoleMenu
from consolemenu.items import FunctionItem, SubmenuItem, ExitItem
-from consolemenu.screen import Screen
from scriptutil import BranchType, Version, download, run
# Lucene-to-Java version mapping
@@ -654,8 +653,8 @@
return "%s%s (%d/%d)" % (prefix, self.title, self.num_done(), self.num_applies())
def get_submenu(self):
- menu = UpdatableConsoleMenu(title=self.title, subtitle=self.get_subtitle, prologue_text=self.get_description(),
- screen=MyScreen())
+ menu = ConsoleMenu(title=self.title, subtitle=self.get_subtitle, prologue_text=self.get_description(),
+ clear_screen=False)
menu.exit_item = CustomExitItem("Return")
for todo in self.get_todos():
if todo.applies(state.release_type):
@@ -663,7 +662,7 @@
return menu
def get_menu_item(self):
- item = UpdatableSubmenuItem(self.get_title, self.get_submenu())
+ item = SubmenuItem(self.get_title, self.get_submenu())
return item
def get_todos(self):
@@ -820,7 +819,7 @@
print("ERROR while executing todo %s (%s)" % (self.get_title(), e))
def get_menu_item(self):
- return UpdatableFunctionItem(self.get_title, self.display_and_confirm)
+ return FunctionItem(self.get_title, self.display_and_confirm)
def clone(self):
clone = Todo(self.id, self.title, description=self.description)
@@ -1234,104 +1233,6 @@
input("\nPress ENTER to continue...")
-# Custom classes for ConsoleMenu, to make menu texts dynamic
-# Needed until https://github.com/aegirhall/console-menu/pull/25 is released
-# See https://pypi.org/project/console-menu/ for other docs
-
-class UpdatableConsoleMenu(ConsoleMenu):
-
- def __repr__(self):
- return "%s: %s. %d items" % (self.get_title(), self.get_subtitle(), len(self.items))
-
- def draw(self):
- """
- Refreshes the screen and redraws the menu. Should be called whenever something changes that needs to be redrawn.
- """
- self.screen.printf(self.formatter.format(title=self.get_title(), subtitle=self.get_subtitle(), items=self.items,
- prologue_text=self.get_prologue_text(), epilogue_text=self.get_epilogue_text()))
-
- # Getters to get text in case method reference
- def get_title(self):
- return self.title() if callable(self.title) else self.title
-
- def get_subtitle(self):
- return self.subtitle() if callable(self.subtitle) else self.subtitle
-
- def get_prologue_text(self):
- return self.prologue_text() if callable(self.prologue_text) else self.prologue_text
-
- def get_epilogue_text(self):
- return self.epilogue_text() if callable(self.epilogue_text) else self.epilogue_text
-
-
-class UpdatableSubmenuItem(SubmenuItem):
- def __init__(self, text, submenu, menu=None, should_exit=False):
- """
- :ivar ConsoleMenu self.submenu: The submenu to be opened when this item is selected
- """
- super(UpdatableSubmenuItem, self).__init__(text=text, menu=menu, should_exit=should_exit, submenu=submenu)
-
- if menu:
- self.get_submenu().parent = menu
-
- def show(self, index):
- return "%2d - %s" % (index + 1, self.get_text())
-
- # Getters to get text in case method reference
- def get_text(self):
- return self.text() if callable(self.text) else self.text
-
- def set_menu(self, menu):
- """
- Sets the menu of this item.
- Should be used instead of directly accessing the menu attribute for this class.
-
- :param ConsoleMenu menu: the menu
- """
- self.menu = menu
- self.get_submenu().parent = menu
-
- def action(self):
- """
- This class overrides this method
- """
- self.get_submenu().start()
-
- def clean_up(self):
- """
- This class overrides this method
- """
- self.get_submenu().join()
- self.menu.clear_screen()
- self.menu.resume()
-
- def get_return(self):
- """
- :return: The returned value in the submenu
- """
- return self.get_submenu().returned_value
-
- def get_submenu(self):
- """
- We unwrap the submenu variable in case it is a reference to a method that returns a submenu
- """
- return self.submenu if not callable(self.submenu) else self.submenu()
-
-
-class UpdatableFunctionItem(FunctionItem):
- def show(self, index):
- return "%2d - %s" % (index + 1, self.get_text())
-
- # Getters to get text in case method reference
- def get_text(self):
- return self.text() if callable(self.text) else self.text
-
-
-class MyScreen(Screen):
- def clear(self):
- return
-
-
class CustomExitItem(ExitItem):
def show(self, index):
return super(CustomExitItem, self).show(index)
@@ -1346,6 +1247,13 @@
global templates
print("Lucene releaseWizard v%s" % getScriptVersion())
+
+ try:
+ ConsoleMenu(clear_screen=True)
+ except Exception as e:
+ sys.exit("You need to install 'consolemenu' package version 0.7.1 for the Wizard to function. Please run 'pip "
+ "install -r requirements.txt'")
+
c = parse_config()
if c.dry:
@@ -1402,18 +1310,18 @@
lucene_news_file = os.path.join(state.get_website_git_folder(), 'content', 'core', 'core_news',
"%s-%s-available.md" % (state.get_release_date_iso(), state.release_version.replace(".", "-")))
- main_menu = UpdatableConsoleMenu(title="Lucene ReleaseWizard",
+ main_menu = ConsoleMenu(title="Lucene ReleaseWizard",
subtitle=get_releasing_text,
prologue_text="Welcome to the release wizard. From here you can manage the process including creating new RCs. "
"All changes are persisted, so you can exit any time and continue later. Make sure to read the Help section.",
epilogue_text="® 2022 The Lucene project. Licensed under the Apache License 2.0\nScript version v%s)" % getScriptVersion(),
- screen=MyScreen())
+ clear_screen=False)
- todo_menu = UpdatableConsoleMenu(title=get_releasing_text,
+ todo_menu = ConsoleMenu(title=get_releasing_text,
subtitle=get_subtitle,
prologue_text=None,
epilogue_text=None,
- screen=MyScreen())
+ clear_screen=False)
todo_menu.exit_item = CustomExitItem("Return")
for todo_group in state.todo_groups:
@@ -1422,14 +1330,14 @@
menu_item.set_menu(todo_menu)
todo_menu.append_item(menu_item)
- main_menu.append_item(UpdatableSubmenuItem(get_todo_menuitem_title, todo_menu, menu=main_menu))
- main_menu.append_item(UpdatableFunctionItem(get_start_new_rc_menu_title, start_new_rc))
- main_menu.append_item(UpdatableFunctionItem('Clear and restart current RC', state.clear_rc))
- main_menu.append_item(UpdatableFunctionItem("Clear all state, restart the %s release" % state.release_version, reset_state))
- main_menu.append_item(UpdatableFunctionItem('Start release for a different version', release_other_version))
- main_menu.append_item(UpdatableFunctionItem('Generate Asciidoc guide for this release', generate_asciidoc))
- # main_menu.append_item(UpdatableFunctionItem('Dump YAML', dump_yaml))
- main_menu.append_item(UpdatableFunctionItem('Help', help))
+ main_menu.append_item(SubmenuItem(get_todo_menuitem_title, todo_menu, menu=main_menu))
+ main_menu.append_item(FunctionItem(get_start_new_rc_menu_title, start_new_rc))
+ main_menu.append_item(FunctionItem('Clear and restart current RC', state.clear_rc))
+ main_menu.append_item(FunctionItem("Clear all state, restart the %s release" % state.release_version, reset_state))
+ main_menu.append_item(FunctionItem('Start release for a different version', release_other_version))
+ main_menu.append_item(FunctionItem('Generate Asciidoc guide for this release', generate_asciidoc))
+ # main_menu.append_item(FunctionItem('Dump YAML', dump_yaml))
+ main_menu.append_item(FunctionItem('Help', help))
main_menu.show()
diff --git a/dev-tools/scripts/requirements.txt b/dev-tools/scripts/requirements.txt
index b8a124b..0617ad1 100644
--- a/dev-tools/scripts/requirements.txt
+++ b/dev-tools/scripts/requirements.txt
@@ -1,8 +1,8 @@
-six>=1.11.0
-Jinja2>=2.10.1
-PyYAML>=5.1
-holidays>=0.9.10
-ics>=0.4
-console-menu>=0.5.1
-PyGithub
-jira
\ No newline at end of file
+six~=1.16.0
+Jinja2~=3.1.1
+PyYAML~=6.0
+holidays~=0.16
+ics~=0.7.2
+console-menu~=0.7.1
+PyGithub~=1.56
+jira~=3.4.1
\ No newline at end of file
diff --git a/gradle/java/modules.gradle b/gradle/java/modules.gradle
index f9ebac3..cb8f7c8 100644
--- a/gradle/java/modules.gradle
+++ b/gradle/java/modules.gradle
@@ -67,6 +67,12 @@
tasks.named(sourceSet.getCompileJavaTaskName()).configure({ JavaCompile task ->
task.dependsOn modularPaths.compileModulePathConfiguration
+ // GH-12742: add the modular path as inputs so that if anything changes, the task
+ // is not up to date and is re-run. I [dw] believe this should be a @Classpath parameter
+ // on the task itself... but I don't know how to implement this on an existing class.
+ // this is a workaround but should work just fine though.
+ task.inputs.files(modularPaths.compileModulePathConfiguration)
+
// LUCENE-10327: don't allow gradle to emit an empty sourcepath as it would break
// compilation of modules.
task.options.setSourcepath(sourceSet.java.sourceDirectories)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 3f7d9fe..953e1b1 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -62,9 +62,11 @@
* GITHUB#12599: Add RandomAccessInput#readBytes method to the RandomAccessInput interface. (Ignacio Vera)
-* GITHUB#12709 Consolidate FSTStore and BytesStore in FST. Created FSTReader which contains the common methods
+* GITHUB#12709: Consolidate FSTStore and BytesStore in FST. Created FSTReader which contains the common methods
of the two (Anh Dung Bui)
+* GITHUB#12735: Remove FSTCompiler#getTermCount() and FSTCompiler.UnCompiledNode#inputCount (Anh Dung Bui)
+
New Features
---------------------
@@ -158,6 +160,10 @@
* GITHUB#12718: Make IndexSearcher#getSlices final as it is not expected to be overridden (Luca Cavanna)
+* GITHUB#12427: Automata#makeStringUnion #makeBinaryStringUnion now accept Iterable<BytesRef> instead of
+ Collection<BytesRef>. They also now explicitly throw IllegalArgumentException if input data is not properly sorted
+ instead of relying on assert. (Shubham Chaudhary)
+
New Features
---------------------
@@ -247,6 +253,8 @@
* GITHUB#12719: Top-level conjunctions that are not sorted by score now have a
specialized bulk scorer. (Adrien Grand)
+* GITHUB#11903: Faster sort on high-cardinality string fields. (Adrien Grand)
+
Changes in runtime behavior
---------------------
@@ -269,10 +277,16 @@
* GITHUB#12727: Ensure negative scores are not returned by vector similarity functions (Ben Trent)
+* GITHUB#12736: Fix NullPointerException when Monitor.getQuery cannot find the requested queryId (Davis Cook)
+
Build
---------------------
+* GITHUB#12742: JavaCompile tasks may be in up-to-date state when modular dependencies have changed
+ leading to odd runtime errors (Chris Hostetter, Dawid Weiss)
+
* GITHUB#12612: Upgrade forbiddenapis to version 3.6 and ASM for APIJAR extraction to 9.6. (Uwe Schindler)
+
* GITHUB#12655: Upgrade to Gradle 8.4 (Kevin Risden)
Other
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/CSVUtil.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CSVUtil.java
similarity index 94%
rename from lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/CSVUtil.java
rename to lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CSVUtil.java
index e3662f2..36d6e05 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/CSVUtil.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CSVUtil.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.analysis.ja.dict;
+package org.apache.lucene.analysis.util;
import java.util.ArrayList;
import java.util.regex.Matcher;
@@ -69,7 +69,7 @@
return new String[0];
}
- return result.toArray(new String[result.size()]);
+ return result.toArray(new String[0]);
}
private static String unQuoteUnEscape(String original) {
@@ -83,7 +83,7 @@
}
// Unescape
- if (result.indexOf(ESCAPED_QUOTE) >= 0) {
+ if (result.contains(ESCAPED_QUOTE)) {
result = result.replace(ESCAPED_QUOTE, "\"");
}
}
diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestCSVUtil.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCSVUtil.java
similarity index 95%
rename from lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestCSVUtil.java
rename to lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCSVUtil.java
index 8cc6fb6..85901ca 100644
--- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestCSVUtil.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCSVUtil.java
@@ -14,10 +14,9 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.analysis.ja;
+package org.apache.lucene.analysis.util;
import java.io.IOException;
-import org.apache.lucene.analysis.ja.dict.CSVUtil;
import org.apache.lucene.tests.util.LuceneTestCase;
/*
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionaryBuilder.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionaryBuilder.java
index 80b1cef..5a16db6 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionaryBuilder.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionaryBuilder.java
@@ -28,6 +28,7 @@
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
+import org.apache.lucene.analysis.util.CSVUtil;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.FSTCompiler;
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionaryEntryWriter.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionaryEntryWriter.java
index e5270b3..4bdfe50 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionaryEntryWriter.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionaryEntryWriter.java
@@ -20,6 +20,7 @@
import java.io.OutputStream;
import java.nio.ByteBuffer;
import org.apache.lucene.analysis.morph.DictionaryEntryWriter;
+import org.apache.lucene.analysis.util.CSVUtil;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.ArrayUtil;
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UnknownDictionaryBuilder.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UnknownDictionaryBuilder.java
index a367c49..ba5bc0e 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UnknownDictionaryBuilder.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UnknownDictionaryBuilder.java
@@ -25,6 +25,7 @@
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
+import org.apache.lucene.analysis.util.CSVUtil;
class UnknownDictionaryBuilder {
private static final String NGRAM_DICTIONARY_ENTRY = "NGRAM,5,5,-32768,記号,一般,*,*,*,*,*,*,*";
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserDictionary.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserDictionary.java
index 52604c4..de69c72 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserDictionary.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserDictionary.java
@@ -26,6 +26,7 @@
import java.util.Map;
import java.util.TreeMap;
import org.apache.lucene.analysis.morph.Dictionary;
+import org.apache.lucene.analysis.util.CSVUtil;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.FSTCompiler;
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserMorphData.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserMorphData.java
index be895f1..6bc4dc7 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserMorphData.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserMorphData.java
@@ -19,6 +19,8 @@
import static org.apache.lucene.analysis.ja.dict.UserDictionary.CUSTOM_DICTIONARY_WORD_ID_OFFSET;
import static org.apache.lucene.analysis.ja.dict.UserDictionary.INTERNAL_SEPARATOR;
+import org.apache.lucene.analysis.util.CSVUtil;
+
/** Morphological information for user dictionary. */
final class UserMorphData implements JaMorphData {
public static final int WORD_COST = -100000;
diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/dict/TestUnknownDictionary.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/dict/TestUnknownDictionary.java
index 5ccdaa6..2d245c7 100644
--- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/dict/TestUnknownDictionary.java
+++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/dict/TestUnknownDictionary.java
@@ -16,6 +16,7 @@
*/
package org.apache.lucene.analysis.ja.dict;
+import org.apache.lucene.analysis.util.CSVUtil;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.junit.Test;
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/CSVUtil.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/CSVUtil.java
deleted file mode 100644
index b9e3ff9..0000000
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/CSVUtil.java
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.ko.dict;
-
-import java.util.ArrayList;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-/** Utility class for parsing CSV text */
-public final class CSVUtil {
- private static final char QUOTE = '"';
-
- private static final char COMMA = ',';
-
- private static final Pattern QUOTE_REPLACE_PATTERN = Pattern.compile("^\"([^\"]+)\"$");
-
- private static final String ESCAPED_QUOTE = "\"\"";
-
- private CSVUtil() {} // no instance!!!
-
- /**
- * Parse CSV line
- *
- * @param line line containing csv-encoded data
- * @return Array of values
- */
- public static String[] parse(String line) {
- boolean insideQuote = false;
- ArrayList<String> result = new ArrayList<>();
- int quoteCount = 0;
- StringBuilder sb = new StringBuilder();
- for (int i = 0; i < line.length(); i++) {
- char c = line.charAt(i);
-
- if (c == QUOTE) {
- insideQuote = !insideQuote;
- quoteCount++;
- }
-
- if (c == COMMA && !insideQuote) {
- String value = sb.toString();
- value = unQuoteUnEscape(value);
- result.add(value);
- sb.setLength(0);
- continue;
- }
-
- sb.append(c);
- }
-
- result.add(sb.toString());
-
- // Validate
- if (quoteCount % 2 != 0) {
- return new String[0];
- }
-
- return result.toArray(new String[0]);
- }
-
- private static String unQuoteUnEscape(String original) {
- String result = original;
-
- // Unquote
- if (result.indexOf('\"') >= 0) {
- Matcher m = QUOTE_REPLACE_PATTERN.matcher(original);
- if (m.matches()) {
- result = m.group(1);
- }
-
- // Unescape
- if (result.contains(ESCAPED_QUOTE)) {
- result = result.replace(ESCAPED_QUOTE, "\"");
- }
- }
-
- return result;
- }
-}
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionaryBuilder.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionaryBuilder.java
index 3726f9e..e3db26b 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionaryBuilder.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionaryBuilder.java
@@ -28,6 +28,7 @@
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
+import org.apache.lucene.analysis.util.CSVUtil;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.FSTCompiler;
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionaryEntryWriter.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionaryEntryWriter.java
index f7ee696..95ce027 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionaryEntryWriter.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionaryEntryWriter.java
@@ -24,6 +24,7 @@
import java.util.List;
import org.apache.lucene.analysis.ko.POS;
import org.apache.lucene.analysis.morph.DictionaryEntryWriter;
+import org.apache.lucene.analysis.util.CSVUtil;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.ArrayUtil;
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UnknownDictionaryBuilder.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UnknownDictionaryBuilder.java
index 1004ab8..71099b2 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UnknownDictionaryBuilder.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UnknownDictionaryBuilder.java
@@ -25,6 +25,7 @@
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
+import org.apache.lucene.analysis.util.CSVUtil;
class UnknownDictionaryBuilder {
private static final String NGRAM_DICTIONARY_ENTRY = "NGRAM,1801,3559,3677,SY,*,*,*,*,*,*,*";
diff --git a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/TestUnknownDictionary.java b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/TestUnknownDictionary.java
index dbce890..13190b2 100644
--- a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/TestUnknownDictionary.java
+++ b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/TestUnknownDictionary.java
@@ -16,6 +16,7 @@
*/
package org.apache.lucene.analysis.ko.dict;
+import org.apache.lucene.analysis.util.CSVUtil;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.junit.Test;
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/PForUtil.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/PForUtil.java
index 4e99d3a..690bfa5 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/PForUtil.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/PForUtil.java
@@ -92,7 +92,7 @@
out.writeBytes(exceptions, exceptions.length);
}
- /** Decode 128 integers into {@code ints}. */
+ /** Decode 128 integers into {@code longs}. */
void decode(DataInput in, long[] longs) throws IOException {
final int token = Byte.toUnsignedInt(in.readByte());
final int bitsPerValue = token & 0x1f;
diff --git a/lucene/benchmark/conf/analyzer.alg b/lucene/benchmark/conf/analyzer.alg
index 497ec3d..4ed7779 100644
--- a/lucene/benchmark/conf/analyzer.alg
+++ b/lucene/benchmark/conf/analyzer.alg
@@ -32,8 +32,8 @@
doc.term.vector=false
log.step=500
-docs.dir=reuters-out
-#docs.dir=reuters-111
+work.dir=data
+docs.dir=reuters21578
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
diff --git a/lucene/benchmark/conf/collector-small.alg b/lucene/benchmark/conf/collector-small.alg
index 763cb04..e57ee86 100644
--- a/lucene/benchmark/conf/collector-small.alg
+++ b/lucene/benchmark/conf/collector-small.alg
@@ -21,7 +21,7 @@
# Fully Qualified Class Name of a Collector with a empty constructor
# topScoreDocOrdered - Creates a TopScoreDocCollector that requires in order docs
# topScoreDocUnordered - Like above, but allows out of order
-collector.class=coll:topScoreDocOrdered:topScoreDocUnordered:topScoreDocOrdered:topScoreDocUnordered
+collector.class=coll:topScoreDoc
analyzer=org.apache.lucene.analysis.core.WhitespaceAnalyzer
directory=FSDirectory
diff --git a/lucene/benchmark/conf/collector.alg b/lucene/benchmark/conf/collector.alg
index d85582a..e284349 100644
--- a/lucene/benchmark/conf/collector.alg
+++ b/lucene/benchmark/conf/collector.alg
@@ -21,7 +21,7 @@
# Fully Qualified Class Name of a Collector with a empty constructor
# topScoreDocOrdered - Creates a TopScoreDocCollector that requires in order docs
# topScoreDocUnordered - Like above, but allows out of order
-collector.class=coll:topScoreDocOrdered:topScoreDocUnordered:topScoreDocOrdered:topScoreDocUnordered
+collector.class=coll:topScoreDoc
analyzer=org.apache.lucene.analysis.core.WhitespaceAnalyzer
directory=FSDirectory
diff --git a/lucene/benchmark/conf/compound-penalty.alg b/lucene/benchmark/conf/compound-penalty.alg
index 06b2821..8626baa 100644
--- a/lucene/benchmark/conf/compound-penalty.alg
+++ b/lucene/benchmark/conf/compound-penalty.alg
@@ -37,8 +37,8 @@
log.step=500
log.step.DeleteDoc=100
-docs.dir=reuters-out
-#docs.dir=reuters-111
+work.dir=data
+docs.dir=reuters21578
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
diff --git a/lucene/benchmark/conf/english-porter-comparison.alg b/lucene/benchmark/conf/english-porter-comparison.alg
index e83f04a..e391c0b 100644
--- a/lucene/benchmark/conf/english-porter-comparison.alg
+++ b/lucene/benchmark/conf/english-porter-comparison.alg
@@ -20,7 +20,8 @@
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
doc.tokenized=false
doc.body.tokenized=true
-docs.dir=reuters-out
+work.dir=data
+docs.dir=reuters21578
-AnalyzerFactory(name:original-porter-stemmer,StandardTokenizer,
EnglishPossessiveFilter,LowerCaseFilter,StopFilter,
diff --git a/lucene/benchmark/conf/facets.alg b/lucene/benchmark/conf/facets.alg
index 63e7cac..32d7270 100644
--- a/lucene/benchmark/conf/facets.alg
+++ b/lucene/benchmark/conf/facets.alg
@@ -30,7 +30,8 @@
doc.term.vector=false
log.step=1000
-docs.dir=reuters-out
+work.dir=data
+docs.dir=reuters21578
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
diff --git a/lucene/benchmark/conf/highlights.alg b/lucene/benchmark/conf/highlights.alg
index 88b056e..7c5fd7d 100644
--- a/lucene/benchmark/conf/highlights.alg
+++ b/lucene/benchmark/conf/highlights.alg
@@ -30,7 +30,8 @@
doc.term.vector.positions=false
log.step=2000
-docs.dir=reuters-out
+work.dir=data
+docs.dir=reuters21578
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
diff --git a/lucene/benchmark/conf/indexing-flush-by-RAM-multithreaded.alg b/lucene/benchmark/conf/indexing-flush-by-RAM-multithreaded.alg
index 43a6c91..d86e182 100644
--- a/lucene/benchmark/conf/indexing-flush-by-RAM-multithreaded.alg
+++ b/lucene/benchmark/conf/indexing-flush-by-RAM-multithreaded.alg
@@ -32,8 +32,8 @@
doc.term.vector=false
log.step=2000
-docs.dir=reuters-out
-#docs.dir=reuters-111
+work.dir=data
+docs.dir=reuters21578
#content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
diff --git a/lucene/benchmark/conf/indexing-flush-by-RAM.alg b/lucene/benchmark/conf/indexing-flush-by-RAM.alg
index 0b6c797..0a911c9 100644
--- a/lucene/benchmark/conf/indexing-flush-by-RAM.alg
+++ b/lucene/benchmark/conf/indexing-flush-by-RAM.alg
@@ -32,8 +32,8 @@
doc.term.vector=false
log.step=2000
-docs.dir=reuters-out
-#docs.dir=reuters-111
+work.dir=data
+docs.dir=reuters21578
#content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
diff --git a/lucene/benchmark/conf/indexing-multithreaded.alg b/lucene/benchmark/conf/indexing-multithreaded.alg
index 1d2e18e..b34b826 100644
--- a/lucene/benchmark/conf/indexing-multithreaded.alg
+++ b/lucene/benchmark/conf/indexing-multithreaded.alg
@@ -32,8 +32,8 @@
doc.term.vector=false
log.step=2000
-docs.dir=reuters-out
-#docs.dir=reuters-111
+work.dir=data
+docs.dir=reuters21578
#content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
diff --git a/lucene/benchmark/conf/indexing.alg b/lucene/benchmark/conf/indexing.alg
index e31f871..b4a4d92 100644
--- a/lucene/benchmark/conf/indexing.alg
+++ b/lucene/benchmark/conf/indexing.alg
@@ -32,8 +32,8 @@
doc.term.vector=false
log.step=2000
-docs.dir=reuters-out
-#docs.dir=reuters-111
+work.dir=data
+docs.dir=reuters21578
#content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
diff --git a/lucene/benchmark/conf/micro-standard-flush-by-ram.alg b/lucene/benchmark/conf/micro-standard-flush-by-ram.alg
index 993e58a..d4a22f1 100644
--- a/lucene/benchmark/conf/micro-standard-flush-by-ram.alg
+++ b/lucene/benchmark/conf/micro-standard-flush-by-ram.alg
@@ -31,8 +31,8 @@
doc.term.vector=false
log.step=500
-docs.dir=reuters-out
-#docs.dir=reuters-111
+work.dir=data
+docs.dir=reuters21578
#content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
diff --git a/lucene/benchmark/conf/sample.alg b/lucene/benchmark/conf/sample.alg
index 4f93230..aa63293 100644
--- a/lucene/benchmark/conf/sample.alg
+++ b/lucene/benchmark/conf/sample.alg
@@ -42,8 +42,8 @@
doc.term.vector=false
log.step=500
-docs.dir=reuters-out
-#docs.dir=reuters-111
+work.dir=data
+docs.dir=reuters21578
content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
#content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
diff --git a/lucene/benchmark/conf/shingle.alg b/lucene/benchmark/conf/shingle.alg
index b074434..67b5130 100644
--- a/lucene/benchmark/conf/shingle.alg
+++ b/lucene/benchmark/conf/shingle.alg
@@ -16,7 +16,8 @@
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
doc.tokenized=false
doc.body.tokenized=true
-docs.dir=reuters-out
+work.dir=data
+docs.dir=reuters21578
log.step=1000
-AnalyzerFactory(name:shingle-bigrams-unigrams,
diff --git a/lucene/benchmark/conf/sloppy-phrase.alg b/lucene/benchmark/conf/sloppy-phrase.alg
index 4d06d6f..4c49ddd 100644
--- a/lucene/benchmark/conf/sloppy-phrase.alg
+++ b/lucene/benchmark/conf/sloppy-phrase.alg
@@ -30,7 +30,8 @@
doc.term.vector=false
log.step=500
-docs.dir=reuters-out
+work.dir=data
+docs.dir=reuters21578
#docs.dir=reuters-111
content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
diff --git a/lucene/benchmark/conf/sort-standard.alg b/lucene/benchmark/conf/sort-standard.alg
index 48cae96..08c7b90 100644
--- a/lucene/benchmark/conf/sort-standard.alg
+++ b/lucene/benchmark/conf/sort-standard.alg
@@ -31,7 +31,8 @@
doc.term.vector=false
log.step=100000
-docs.dir=reuters-out
+work.dir=data
+docs.dir=reuters21578
content.source=org.apache.lucene.benchmark.byTask.feeds.SortableSingleDocSource
diff --git a/lucene/benchmark/conf/standard-flush-by-RAM.alg b/lucene/benchmark/conf/standard-flush-by-RAM.alg
index 3ceed10..c3cb278 100644
--- a/lucene/benchmark/conf/standard-flush-by-RAM.alg
+++ b/lucene/benchmark/conf/standard-flush-by-RAM.alg
@@ -31,8 +31,8 @@
doc.term.vector=false
log.step=2000
-docs.dir=reuters-out
-#docs.dir=reuters-111
+work.dir=data
+docs.dir=reuters21578
#content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
diff --git a/lucene/benchmark/conf/standard.alg b/lucene/benchmark/conf/standard.alg
index 4d0b048..4885593 100644
--- a/lucene/benchmark/conf/standard.alg
+++ b/lucene/benchmark/conf/standard.alg
@@ -31,8 +31,8 @@
doc.term.vector=false
log.step=2000
-docs.dir=reuters-out
-#docs.dir=reuters-111
+work.dir=data
+docs.dir=reuters21578
#content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
diff --git a/lucene/benchmark/conf/wstok.alg b/lucene/benchmark/conf/wstok.alg
index c437590..ab6a659 100644
--- a/lucene/benchmark/conf/wstok.alg
+++ b/lucene/benchmark/conf/wstok.alg
@@ -18,7 +18,8 @@
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
doc.tokenized=false
doc.body.tokenized=true
-docs.dir=reuters-out
+work.dir=data
+docs.dir=reuters21578
-AnalyzerFactory(name:WhitespaceTokenizer, WhitespaceTokenizer(rule:java))
diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NewAnalyzerTask.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NewAnalyzerTask.java
index 2248756..032019f 100644
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NewAnalyzerTask.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NewAnalyzerTask.java
@@ -23,9 +23,9 @@
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.utils.AnalyzerFactory;
-import org.apache.lucene.util.Version;
/**
* Create a new {@link org.apache.lucene.analysis.Analyzer} and set it in the getRunData() for use
@@ -42,17 +42,13 @@
public static final Analyzer createAnalyzer(String className) throws Exception {
final Class<? extends Analyzer> clazz = Class.forName(className).asSubclass(Analyzer.class);
- try {
- // first try to use a ctor with version parameter (needed for many new Analyzers that have no
- // default one anymore
- Constructor<? extends Analyzer> cnstr = clazz.getConstructor(Version.class);
- return cnstr.newInstance(Version.LATEST);
- } catch (
- @SuppressWarnings("unused")
- NoSuchMethodException nsme) {
- // otherwise use default ctor
- return clazz.getConstructor().newInstance();
+ Constructor<? extends Analyzer> cnstr;
+ if (className.equals("org.apache.lucene.analysis.core.StopAnalyzer")) {
+ cnstr = clazz.getConstructor(CharArraySet.class);
+ return cnstr.newInstance(CharArraySet.EMPTY_SET);
}
+ cnstr = clazz.getConstructor();
+ return cnstr.newInstance();
}
@Override
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/PForUtil.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/PForUtil.java
index eb735c8..2119121 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/PForUtil.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/PForUtil.java
@@ -116,7 +116,7 @@
out.writeBytes(exceptions, exceptions.length);
}
- /** Decode 128 integers into {@code ints}. */
+ /** Decode 128 integers into {@code longs}. */
void decode(DataInput in, long[] longs) throws IOException {
final int token = Byte.toUnsignedInt(in.readByte());
final int bitsPerValue = token & 0x1f;
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsWriter.java
index 5515de2..691a730 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsWriter.java
@@ -842,9 +842,6 @@
@Override
public void close() throws IOException {
IOUtils.close(meta, vectorData, vectorIndex, quantizedVectorData);
- if (mergeExec != null) {
- mergeExec.shutdownNow();
- }
}
private abstract static class FieldWriter<T> extends KnnFieldVectorsWriter<T> {
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsWriter.java
index c048581..613fa89 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsWriter.java
@@ -260,7 +260,7 @@
quantizationDataInput)));
} finally {
if (success == false) {
- IOUtils.closeWhileHandlingException(quantizationDataInput);
+ IOUtils.closeWhileHandlingException(tempQuantizedVectorData, quantizationDataInput);
IOUtils.deleteFilesIgnoringExceptions(
segmentWriteState.directory, tempQuantizedVectorData.getName());
}
diff --git a/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java b/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java
index 0f579b9..9713923 100644
--- a/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java
+++ b/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java
@@ -18,8 +18,6 @@
import java.io.IOException;
import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
@@ -555,8 +553,6 @@
FieldInfos fieldInfos = null;
boolean any = false;
for (List<DocValuesFieldUpdates> updates : pendingDVUpdates.values()) {
- // Sort by increasing delGen:
- Collections.sort(updates, Comparator.comparingLong(a -> a.delGen));
for (DocValuesFieldUpdates update : updates) {
if (update.delGen <= maxDelGen && update.any()) {
any = true;
diff --git a/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionBulkScorer.java b/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionBulkScorer.java
index be38778..04d0112 100644
--- a/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionBulkScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionBulkScorer.java
@@ -37,7 +37,8 @@
private final Scorer[] scorers;
private final DocIdSetIterator[] iterators;
- private final DocIdSetIterator lead;
+ private final DocIdSetIterator lead1, lead2;
+ private final Scorer scorer1, scorer2;
private final DocAndScore scorable = new DocAndScore();
private final double[] sumOfOtherClauses;
private final int maxDoc;
@@ -50,7 +51,10 @@
Arrays.sort(this.scorers, Comparator.comparingLong(scorer -> scorer.iterator().cost()));
this.iterators =
Arrays.stream(this.scorers).map(Scorer::iterator).toArray(DocIdSetIterator[]::new);
- lead = iterators[0];
+ lead1 = iterators[0];
+ lead2 = iterators[1];
+ scorer1 = this.scorers[0];
+ scorer2 = this.scorers[1];
this.sumOfOtherClauses = new double[this.scorers.length];
this.maxDoc = maxDoc;
}
@@ -59,7 +63,7 @@
public int score(LeafCollector collector, Bits acceptDocs, int min, int max) throws IOException {
collector.setScorer(scorable);
- int windowMin = Math.max(lead.docID(), min);
+ int windowMin = Math.max(lead1.docID(), min);
while (windowMin < max) {
// Use impacts of the least costly scorer to compute windows
// NOTE: windowMax is inclusive
@@ -78,7 +82,7 @@
sumOfOtherClauses[i] += sumOfOtherClauses[i + 1];
}
scoreWindow(collector, acceptDocs, windowMin, windowMax + 1, (float) maxWindowScore);
- windowMin = Math.max(lead.docID(), windowMax + 1);
+ windowMin = Math.max(lead1.docID(), windowMax + 1);
}
return windowMin >= maxDoc ? DocIdSetIterator.NO_MORE_DOCS : windowMin;
@@ -92,13 +96,16 @@
return;
}
- if (lead.docID() < min) {
- lead.advance(min);
+ if (lead1.docID() < min) {
+ lead1.advance(min);
}
+
+ final double sumOfOtherMaxScoresAt1 = sumOfOtherClauses[1];
+
advanceHead:
- for (int doc = lead.docID(); doc < max; ) {
+ for (int doc = lead1.docID(); doc < max; ) {
if (acceptDocs != null && acceptDocs.get(doc) == false) {
- doc = lead.nextDoc();
+ doc = lead1.nextDoc();
continue;
}
@@ -109,26 +116,50 @@
final boolean hasMinCompetitiveScore = scorable.minCompetitiveScore > 0;
double currentScore;
if (hasMinCompetitiveScore) {
- currentScore = scorers[0].score();
+ currentScore = scorer1.score();
} else {
currentScore = 0;
}
- for (int i = 1; i < iterators.length; ++i) {
- // First check if we have a chance of having a match
+ // This is the same logic as in the below for loop, specialized for the 2nd least costly
+ // clause. This seems to help the JVM.
+
+ // First check if we have a chance of having a match based on max scores
+ if (hasMinCompetitiveScore
+ && (float) MathUtil.sumUpperBound(currentScore + sumOfOtherMaxScoresAt1, scorers.length)
+ < scorable.minCompetitiveScore) {
+ doc = lead1.nextDoc();
+ continue advanceHead;
+ }
+
+ // NOTE: lead2 may be on `doc` already if we `continue`d on the previous loop iteration.
+ if (lead2.docID() < doc) {
+ int next = lead2.advance(doc);
+ if (next != doc) {
+ doc = lead1.advance(next);
+ continue advanceHead;
+ }
+ }
+ assert lead2.docID() == doc;
+ if (hasMinCompetitiveScore) {
+ currentScore += scorer2.score();
+ }
+
+ for (int i = 2; i < iterators.length; ++i) {
+ // First check if we have a chance of having a match based on max scores
if (hasMinCompetitiveScore
&& (float) MathUtil.sumUpperBound(currentScore + sumOfOtherClauses[i], scorers.length)
< scorable.minCompetitiveScore) {
- doc = lead.nextDoc();
+ doc = lead1.nextDoc();
continue advanceHead;
}
- // NOTE: these iterators may already be on `doc` already if we called `continue advanceHead`
- // on the previous loop iteration.
+ // NOTE: these iterators may be on `doc` already if we called `continue advanceHead` on the
+ // previous loop iteration.
if (iterators[i].docID() < doc) {
int next = iterators[i].advance(doc);
if (next != doc) {
- doc = lead.advance(next);
+ doc = lead1.advance(next);
continue advanceHead;
}
}
@@ -151,13 +182,13 @@
return;
}
- doc = lead.nextDoc();
+ doc = lead1.nextDoc();
}
}
@Override
public long cost() {
- return lead.cost();
+ return lead1.cost();
}
private static class DocAndScore extends Scorable {
diff --git a/lucene/core/src/java/org/apache/lucene/search/comparators/TermOrdValComparator.java b/lucene/core/src/java/org/apache/lucene/search/comparators/TermOrdValComparator.java
index 548bbb4..616b8cf 100644
--- a/lucene/core/src/java/org/apache/lucene/search/comparators/TermOrdValComparator.java
+++ b/lucene/core/src/java/org/apache/lucene/search/comparators/TermOrdValComparator.java
@@ -475,7 +475,7 @@
private class CompetitiveIterator extends DocIdSetIterator {
- private static final int MAX_TERMS = 128;
+ private static final int MAX_TERMS = 1024;
private final LeafReaderContext context;
private final int maxDoc;
diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/Automata.java b/lucene/core/src/java/org/apache/lucene/util/automaton/Automata.java
index 8952278..9ecf748 100644
--- a/lucene/core/src/java/org/apache/lucene/util/automaton/Automata.java
+++ b/lucene/core/src/java/org/apache/lucene/util/automaton/Automata.java
@@ -43,8 +43,8 @@
*/
public final class Automata {
/**
- * {@link #makeStringUnion(Collection)} limits terms of this max length to ensure the stack
- * doesn't overflow while building, since our algorithm currently relies on recursion.
+ * {@link #makeStringUnion(Iterable)} limits terms of this max length to ensure the stack doesn't
+ * overflow while building, since our algorithm currently relies on recursion.
*/
public static final int MAX_STRING_UNION_TERM_LENGTH = 1000;
@@ -576,8 +576,8 @@
* @return An {@link Automaton} accepting all input strings. The resulting automaton is codepoint
* based (full unicode codepoints on transitions).
*/
- public static Automaton makeStringUnion(Collection<BytesRef> utf8Strings) {
- if (utf8Strings.isEmpty()) {
+ public static Automaton makeStringUnion(Iterable<BytesRef> utf8Strings) {
+ if (utf8Strings.iterator().hasNext() == false) {
return makeEmpty();
} else {
return StringsToAutomaton.build(utf8Strings, false);
@@ -593,8 +593,8 @@
* @return An {@link Automaton} accepting all input strings. The resulting automaton is binary
* based (UTF-8 encoded byte transition labels).
*/
- public static Automaton makeBinaryStringUnion(Collection<BytesRef> utf8Strings) {
- if (utf8Strings.isEmpty()) {
+ public static Automaton makeBinaryStringUnion(Iterable<BytesRef> utf8Strings) {
+ if (utf8Strings.iterator().hasNext() == false) {
return makeEmpty();
} else {
return StringsToAutomaton.build(utf8Strings, true);
diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/RegExp.java b/lucene/core/src/java/org/apache/lucene/util/automaton/RegExp.java
index ed1688e..0d17a6f 100644
--- a/lucene/core/src/java/org/apache/lucene/util/automaton/RegExp.java
+++ b/lucene/core/src/java/org/apache/lucene/util/automaton/RegExp.java
@@ -1128,6 +1128,10 @@
if (start != pos) m = Integer.parseInt(originalString.substring(start, pos));
} else m = n;
if (!match('}')) throw new IllegalArgumentException("expected '}' at position " + pos);
+ if (m != -1 && n > m) {
+ throw new IllegalArgumentException(
+ "invalid repetition range(out of order): " + n + ".." + m);
+ }
if (m == -1) e = makeRepeat(flags, e, n);
else e = makeRepeat(flags, e, n, m);
}
diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/StringsToAutomaton.java b/lucene/core/src/java/org/apache/lucene/util/automaton/StringsToAutomaton.java
index 3cfe945..58a081f 100644
--- a/lucene/core/src/java/org/apache/lucene/util/automaton/StringsToAutomaton.java
+++ b/lucene/core/src/java/org/apache/lucene/util/automaton/StringsToAutomaton.java
@@ -18,7 +18,6 @@
import java.io.IOException;
import java.util.Arrays;
-import java.util.Collection;
import java.util.HashMap;
import java.util.IdentityHashMap;
import org.apache.lucene.util.ArrayUtil;
@@ -35,8 +34,8 @@
* to directly build a binary {@link Automaton} representation. Users should access this
* functionality through {@link Automata} static methods.
*
- * @see Automata#makeStringUnion(Collection)
- * @see Automata#makeBinaryStringUnion(Collection)
+ * @see Automata#makeStringUnion(Iterable)
+ * @see Automata#makeBinaryStringUnion(Iterable)
* @see Automata#makeStringUnion(BytesRefIterator)
* @see Automata#makeBinaryStringUnion(BytesRefIterator)
*/
@@ -238,7 +237,7 @@
* UTF-8 codepoints as transition labels or binary (compiled) transition labels based on {@code
* asBinary}.
*/
- static Automaton build(Collection<BytesRef> input, boolean asBinary) {
+ static Automaton build(Iterable<BytesRef> input, boolean asBinary) {
final StringsToAutomaton builder = new StringsToAutomaton();
for (BytesRef b : input) {
@@ -273,9 +272,11 @@
+ current);
}
assert stateRegistry != null : "Automaton already built.";
- assert previous == null || previous.get().compareTo(current) <= 0
- : "Input must be in sorted UTF-8 order: " + previous.get() + " >= " + current;
- assert setPrevious(current);
+ if (previous != null && previous.get().compareTo(current) > 0) {
+ throw new IllegalArgumentException(
+ "Input must be in sorted UTF-8 order: " + previous.get() + " >= " + current);
+ }
+ setPrevious(current);
// Reusable codepoint information if we're building a non-binary based automaton
UnicodeUtil.UTF8CodePoint codePoint = null;
diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/FSTCompiler.java b/lucene/core/src/java/org/apache/lucene/util/fst/FSTCompiler.java
index f17c220..3af6241 100644
--- a/lucene/core/src/java/org/apache/lucene/util/fst/FSTCompiler.java
+++ b/lucene/core/src/java/org/apache/lucene/util/fst/FSTCompiler.java
@@ -270,10 +270,6 @@
return directAddressingMaxOversizingFactor;
}
- public long getTermCount() {
- return frontier[0].inputCount;
- }
-
public long getNodeCount() {
// 1+ in order to count the -1 implicit final node
return 1 + nodeCount;
@@ -749,7 +745,6 @@
// format cannot represent the empty input since
// 'finalness' is stored on the incoming arc, not on
// the node
- frontier[0].inputCount++;
frontier[0].isFinal = true;
fst.setEmptyOutput(output);
return;
@@ -760,9 +755,6 @@
int pos2 = input.offset;
final int pos1Stop = Math.min(lastInput.length(), input.length);
while (true) {
- frontier[pos1].inputCount++;
- // System.out.println(" incr " + pos1 + " ct=" + frontier[pos1].inputCount + " n=" +
- // frontier[pos1]);
if (pos1 >= pos1Stop || lastInput.intAt(pos1) != input.ints[pos2]) {
break;
}
@@ -786,7 +778,6 @@
// init tail states for current input
for (int idx = prefixLenPlus1; idx <= input.length; idx++) {
frontier[idx - 1].addArc(input.ints[input.offset + idx - 1], frontier[idx]);
- frontier[idx].inputCount++;
}
final UnCompiledNode<T> lastNode = frontier[input.length];
@@ -835,8 +826,6 @@
// save last input
lastInput.copyInts(input);
-
- // System.out.println(" count[0]=" + frontier[0].inputCount);
}
private boolean validOutput(T output) {
@@ -906,10 +895,6 @@
T output;
boolean isFinal;
- // TODO: remove this tracking? we used to use it for confusingly pruning NodeHash, but
- // we switched to LRU by RAM usage instead:
- long inputCount;
-
/** This node's depth, starting from the automaton root. */
final int depth;
@@ -935,7 +920,6 @@
numArcs = 0;
isFinal = false;
output = owner.NO_OUTPUT;
- inputCount = 0;
// We don't clear the depth here because it never changes
// for nodes on the frontier (even when reused).
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDeletionPolicy.java b/lucene/core/src/test/org/apache/lucene/index/TestDeletionPolicy.java
index f2b5686..ac2ff78 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestDeletionPolicy.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestDeletionPolicy.java
@@ -459,7 +459,8 @@
dir,
newIndexWriterConfig(new MockAnalyzer(random()))
.setIndexDeletionPolicy(policy)
- .setIndexCommit(lastCommit));
+ .setIndexCommit(lastCommit)
+ .setMergePolicy(newLogMergePolicy(10)));
assertEquals(10, writer.getDocStats().numDocs);
// Should undo our rollback:
@@ -476,12 +477,13 @@
dir,
newIndexWriterConfig(new MockAnalyzer(random()))
.setIndexDeletionPolicy(policy)
- .setIndexCommit(lastCommit));
+ .setIndexCommit(lastCommit)
+ .setMergePolicy(newLogMergePolicy(10)));
assertEquals(10, writer.getDocStats().numDocs);
// Commits the rollback:
writer.close();
- // Now 8 because we made another commit
+ // Now 7 because we made another commit
assertEquals(7, DirectoryReader.listCommits(dir).size());
r = DirectoryReader.open(dir);
@@ -507,7 +509,10 @@
// but this time keeping only the last commit:
writer =
new IndexWriter(
- dir, newIndexWriterConfig(new MockAnalyzer(random())).setIndexCommit(lastCommit));
+ dir,
+ newIndexWriterConfig(new MockAnalyzer(random()))
+ .setIndexCommit(lastCommit)
+ .setMergePolicy(newLogMergePolicy(10)));
assertEquals(10, writer.getDocStats().numDocs);
// Reader still sees fully merged index, because writer
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
index 5916ec3..1990ce9 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
@@ -2404,11 +2404,12 @@
writer.deleteDocuments(new Term("id", "xyz"));
assertTrue(writer.hasUncommittedChanges());
- // Must commit, waitForMerges, commit again, to be
- // certain that hasUncommittedChanges returns false:
- writer.commit();
- writer.waitForMerges();
- writer.commit();
+ // Must commit and wait for merges as long as the commit triggers merges to be certain that
+ // hasUncommittedChanges returns false
+ do {
+ writer.waitForMerges();
+ writer.commit();
+ } while (writer.hasPendingMerges());
assertFalse(writer.hasUncommittedChanges());
writer.close();
diff --git a/lucene/core/src/test/org/apache/lucene/util/automaton/TestRegExp.java b/lucene/core/src/test/org/apache/lucene/util/automaton/TestRegExp.java
index c960e73..8f6f765 100644
--- a/lucene/core/src/test/org/apache/lucene/util/automaton/TestRegExp.java
+++ b/lucene/core/src/test/org/apache/lucene/util/automaton/TestRegExp.java
@@ -86,6 +86,17 @@
}
}
+ public void testParseIllegalRepeatExp() {
+ // out of order
+ IllegalArgumentException expected =
+ expectThrows(
+ IllegalArgumentException.class,
+ () -> {
+ new RegExp("a{99,11}");
+ });
+ assertTrue(expected.getMessage().contains("out of order"));
+ }
+
static String randomDocValue(int minLength) {
String charPalette = "AAAaaaBbbCccc123456 \t";
StringBuilder sb = new StringBuilder();
diff --git a/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java b/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java
index 927fe05..f6dd84e 100644
--- a/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java
+++ b/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java
@@ -568,7 +568,6 @@
System.out.println(
((tMid - tStart) / (double) TimeUnit.SECONDS.toNanos(1)) + " sec to add all terms");
- assert fstCompiler.getTermCount() == ord;
FST<T> fst = fstCompiler.compile();
long tEnd = System.nanoTime();
System.out.println(
diff --git a/lucene/monitor/src/java/org/apache/lucene/monitor/QueryIndex.java b/lucene/monitor/src/java/org/apache/lucene/monitor/QueryIndex.java
index cac6ea1..5868ec5 100644
--- a/lucene/monitor/src/java/org/apache/lucene/monitor/QueryIndex.java
+++ b/lucene/monitor/src/java/org/apache/lucene/monitor/QueryIndex.java
@@ -68,7 +68,7 @@
search(
new TermQuery(new Term(FIELDS.query_id, queryId)),
(id, query, dataValues) -> bytesHolder[0] = dataValues.mq.binaryValue());
- return serializer.deserialize(bytesHolder[0]);
+ return bytesHolder[0] != null ? serializer.deserialize(bytesHolder[0]) : null;
}
public void scan(QueryCollector matcher) throws IOException {
diff --git a/lucene/monitor/src/test/org/apache/lucene/monitor/TestMonitorPersistence.java b/lucene/monitor/src/test/org/apache/lucene/monitor/TestMonitorPersistence.java
index 945abcd..1a60c29 100644
--- a/lucene/monitor/src/test/org/apache/lucene/monitor/TestMonitorPersistence.java
+++ b/lucene/monitor/src/test/org/apache/lucene/monitor/TestMonitorPersistence.java
@@ -28,16 +28,21 @@
private Path indexDirectory = createTempDir();
- public void testCacheIsRepopulated() throws IOException {
-
- Document doc = new Document();
- doc.add(newTextField(FIELD, "test", Field.Store.NO));
+ protected Monitor newMonitorWithPersistence() throws IOException {
MonitorConfiguration config =
new MonitorConfiguration()
.setIndexPath(
indexDirectory, MonitorQuerySerializer.fromParser(MonitorTestBase::parse));
- try (Monitor monitor = new Monitor(ANALYZER, config)) {
+ return new Monitor(ANALYZER, config);
+ }
+
+ public void testCacheIsRepopulated() throws IOException {
+
+ Document doc = new Document();
+ doc.add(newTextField(FIELD, "test", Field.Store.NO));
+
+ try (Monitor monitor = newMonitorWithPersistence()) {
monitor.register(
mq("1", "test"),
mq("2", "test"),
@@ -58,7 +63,7 @@
e.getMessage());
}
- try (Monitor monitor2 = new Monitor(ANALYZER, config)) {
+ try (Monitor monitor2 = newMonitorWithPersistence()) {
assertEquals(4, monitor2.getQueryCount());
assertEquals(4, monitor2.match(doc, QueryMatch.SIMPLE_MATCHER).getMatchCount());
@@ -67,9 +72,24 @@
}
}
+ public void testGetQueryPresent() throws IOException {
+ try (Monitor monitor = newMonitorWithPersistence()) {
+ MonitorQuery monitorQuery = mq("1", "test");
+ monitor.register(monitorQuery);
+
+ assertEquals(monitorQuery, monitor.getQuery("1"));
+ }
+ }
+
+ public void testGetQueryNotPresent() throws IOException {
+ try (Monitor monitor = newMonitorWithPersistence()) {
+ assertNull(monitor.getQuery("1"));
+ }
+ }
+
public void testEphemeralMonitorDoesNotStoreQueries() throws IOException {
- try (Monitor monitor2 = new Monitor(ANALYZER)) {
+ try (Monitor monitor2 = newMonitor(ANALYZER)) {
IllegalStateException e =
expectThrows(IllegalStateException.class, () -> monitor2.getQuery("query"));
assertEquals(