Merge branch 'stabilize_benchmark'

commit: 76aa38f7c0d6ab07f6a06eeb0b7593ef537a8e44 [log] [tgz]
author: Robert Muir <rmuir@apache.org> Thu Nov 02 13:31:24 2023 -0400
committer: Robert Muir <rmuir@apache.org> Thu Nov 02 13:31:24 2023 -0400
tree: dc11caec1a162ba37a2a1dccdf3e01e176acc406
parent: 2d50c345fea3d1a64090d6d0cffef6b70d482a9f [diff]
parent: 6bf2188b358469b5308cef6db9b848408c7d0d1f [diff]
diff --git a/buildSrc/src/main/java/org/apache/lucene/gradle/datasets/ExtractReuters.java b/buildSrc/src/main/java/org/apache/lucene/gradle/datasets/ExtractReuters.java
index b8d6735..34f046f 100644
--- a/buildSrc/src/main/java/org/apache/lucene/gradle/datasets/ExtractReuters.java
+++ b/buildSrc/src/main/java/org/apache/lucene/gradle/datasets/ExtractReuters.java

@@ -27,6 +27,7 @@
 import java.nio.file.StandardCopyOption;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
+import java.util.stream.Stream;
 
 /**
  * Split the Reuters SGML documents into Simple Text files containing:
@@ -44,9 +45,10 @@
   public void extract() throws IOException {
     long count = 0;
     Files.createDirectories(outputDir);
-
-    if (Files.list(outputDir).count() > 0) {
-      throw new IOException("The output directory must be empty: " + outputDir);
+    try(Stream<Path> files = Files.list(outputDir)) {
+      if (files.count() > 0) {
+        throw new IOException("The output directory must be empty: " + outputDir);
+      }
     }
 
     try (DirectoryStream<Path> stream = Files.newDirectoryStream(reutersDir, "*.sgm")) {

diff --git a/dev-tools/scripts/releaseWizard.py b/dev-tools/scripts/releaseWizard.py
index b57eefb..2fe72a6 100755
--- a/dev-tools/scripts/releaseWizard.py
+++ b/dev-tools/scripts/releaseWizard.py

@@ -63,7 +63,6 @@
 import scriptutil
 from consolemenu import ConsoleMenu
 from consolemenu.items import FunctionItem, SubmenuItem, ExitItem
-from consolemenu.screen import Screen
 from scriptutil import BranchType, Version, download, run
 
 # Lucene-to-Java version mapping
@@ -654,8 +653,8 @@
         return "%s%s (%d/%d)" % (prefix, self.title, self.num_done(), self.num_applies())
 
     def get_submenu(self):
-        menu = UpdatableConsoleMenu(title=self.title, subtitle=self.get_subtitle, prologue_text=self.get_description(),
-                           screen=MyScreen())
+        menu = ConsoleMenu(title=self.title, subtitle=self.get_subtitle, prologue_text=self.get_description(),
+                           clear_screen=False)
         menu.exit_item = CustomExitItem("Return")
         for todo in self.get_todos():
             if todo.applies(state.release_type):
@@ -663,7 +662,7 @@
         return menu
 
     def get_menu_item(self):
-        item = UpdatableSubmenuItem(self.get_title, self.get_submenu())
+        item = SubmenuItem(self.get_title, self.get_submenu())
         return item
 
     def get_todos(self):
@@ -820,7 +819,7 @@
             print("ERROR while executing todo %s (%s)" % (self.get_title(), e))
 
     def get_menu_item(self):
-        return UpdatableFunctionItem(self.get_title, self.display_and_confirm)
+        return FunctionItem(self.get_title, self.display_and_confirm)
 
     def clone(self):
         clone = Todo(self.id, self.title, description=self.description)
@@ -1234,104 +1233,6 @@
     input("\nPress ENTER to continue...")
 
 
-# Custom classes for ConsoleMenu, to make menu texts dynamic
-# Needed until https://github.com/aegirhall/console-menu/pull/25 is released
-# See https://pypi.org/project/console-menu/ for other docs
-
-class UpdatableConsoleMenu(ConsoleMenu):
-
-    def __repr__(self):
-        return "%s: %s. %d items" % (self.get_title(), self.get_subtitle(), len(self.items))
-
-    def draw(self):
-        """
-        Refreshes the screen and redraws the menu. Should be called whenever something changes that needs to be redrawn.
-        """
-        self.screen.printf(self.formatter.format(title=self.get_title(), subtitle=self.get_subtitle(), items=self.items,
-                                                 prologue_text=self.get_prologue_text(), epilogue_text=self.get_epilogue_text()))
-
-    # Getters to get text in case method reference
-    def get_title(self):
-        return self.title() if callable(self.title) else self.title
-
-    def get_subtitle(self):
-        return self.subtitle() if callable(self.subtitle) else self.subtitle
-
-    def get_prologue_text(self):
-        return self.prologue_text() if callable(self.prologue_text) else self.prologue_text
-
-    def get_epilogue_text(self):
-        return self.epilogue_text() if callable(self.epilogue_text) else self.epilogue_text
-
-
-class UpdatableSubmenuItem(SubmenuItem):
-    def __init__(self, text, submenu, menu=None, should_exit=False):
-        """
-        :ivar ConsoleMenu self.submenu: The submenu to be opened when this item is selected
-        """
-        super(UpdatableSubmenuItem, self).__init__(text=text, menu=menu, should_exit=should_exit, submenu=submenu)
-
-        if menu:
-            self.get_submenu().parent = menu
-
-    def show(self, index):
-        return "%2d - %s" % (index + 1, self.get_text())
-
-    # Getters to get text in case method reference
-    def get_text(self):
-        return self.text() if callable(self.text) else self.text
-
-    def set_menu(self, menu):
-        """
-        Sets the menu of this item.
-        Should be used instead of directly accessing the menu attribute for this class.
-
-        :param ConsoleMenu menu: the menu
-        """
-        self.menu = menu
-        self.get_submenu().parent = menu
-
-    def action(self):
-        """
-        This class overrides this method
-        """
-        self.get_submenu().start()
-
-    def clean_up(self):
-        """
-        This class overrides this method
-        """
-        self.get_submenu().join()
-        self.menu.clear_screen()
-        self.menu.resume()
-
-    def get_return(self):
-        """
-        :return: The returned value in the submenu
-        """
-        return self.get_submenu().returned_value
-
-    def get_submenu(self):
-        """
-        We unwrap the submenu variable in case it is a reference to a method that returns a submenu
-        """
-        return self.submenu if not callable(self.submenu) else self.submenu()
-
-
-class UpdatableFunctionItem(FunctionItem):
-    def show(self, index):
-        return "%2d - %s" % (index + 1, self.get_text())
-
-    # Getters to get text in case method reference
-    def get_text(self):
-        return self.text() if callable(self.text) else self.text
-
-
-class MyScreen(Screen):
-    def clear(self):
-        return
-
-
 class CustomExitItem(ExitItem):
     def show(self, index):
         return super(CustomExitItem, self).show(index)
@@ -1346,6 +1247,13 @@
     global templates
 
     print("Lucene releaseWizard v%s" % getScriptVersion())
+
+    try:
+      ConsoleMenu(clear_screen=True)
+    except Exception as e:
+      sys.exit("You need to install 'consolemenu' package version 0.7.1 for the Wizard to function. Please run 'pip "
+               "install -r requirements.txt'")
+
     c = parse_config()
 
     if c.dry:
@@ -1402,18 +1310,18 @@
     lucene_news_file = os.path.join(state.get_website_git_folder(), 'content', 'core', 'core_news',
       "%s-%s-available.md" % (state.get_release_date_iso(), state.release_version.replace(".", "-")))
 
-    main_menu = UpdatableConsoleMenu(title="Lucene ReleaseWizard",
+    main_menu = ConsoleMenu(title="Lucene ReleaseWizard",
                             subtitle=get_releasing_text,
                             prologue_text="Welcome to the release wizard. From here you can manage the process including creating new RCs. "
                                           "All changes are persisted, so you can exit any time and continue later. Make sure to read the Help section.",
                             epilogue_text="® 2022 The Lucene project. Licensed under the Apache License 2.0\nScript version v%s)" % getScriptVersion(),
-                            screen=MyScreen())
+                            clear_screen=False)
 
-    todo_menu = UpdatableConsoleMenu(title=get_releasing_text,
+    todo_menu = ConsoleMenu(title=get_releasing_text,
                             subtitle=get_subtitle,
                             prologue_text=None,
                             epilogue_text=None,
-                            screen=MyScreen())
+                            clear_screen=False)
     todo_menu.exit_item = CustomExitItem("Return")
 
     for todo_group in state.todo_groups:
@@ -1422,14 +1330,14 @@
             menu_item.set_menu(todo_menu)
             todo_menu.append_item(menu_item)
 
-    main_menu.append_item(UpdatableSubmenuItem(get_todo_menuitem_title, todo_menu, menu=main_menu))
-    main_menu.append_item(UpdatableFunctionItem(get_start_new_rc_menu_title, start_new_rc))
-    main_menu.append_item(UpdatableFunctionItem('Clear and restart current RC', state.clear_rc))
-    main_menu.append_item(UpdatableFunctionItem("Clear all state, restart the %s release" % state.release_version, reset_state))
-    main_menu.append_item(UpdatableFunctionItem('Start release for a different version', release_other_version))
-    main_menu.append_item(UpdatableFunctionItem('Generate Asciidoc guide for this release', generate_asciidoc))
-    # main_menu.append_item(UpdatableFunctionItem('Dump YAML', dump_yaml))
-    main_menu.append_item(UpdatableFunctionItem('Help', help))
+    main_menu.append_item(SubmenuItem(get_todo_menuitem_title, todo_menu, menu=main_menu))
+    main_menu.append_item(FunctionItem(get_start_new_rc_menu_title, start_new_rc))
+    main_menu.append_item(FunctionItem('Clear and restart current RC', state.clear_rc))
+    main_menu.append_item(FunctionItem("Clear all state, restart the %s release" % state.release_version, reset_state))
+    main_menu.append_item(FunctionItem('Start release for a different version', release_other_version))
+    main_menu.append_item(FunctionItem('Generate Asciidoc guide for this release', generate_asciidoc))
+    # main_menu.append_item(FunctionItem('Dump YAML', dump_yaml))
+    main_menu.append_item(FunctionItem('Help', help))
 
     main_menu.show()
 

diff --git a/dev-tools/scripts/requirements.txt b/dev-tools/scripts/requirements.txt
index b8a124b..0617ad1 100644
--- a/dev-tools/scripts/requirements.txt
+++ b/dev-tools/scripts/requirements.txt

@@ -1,8 +1,8 @@
-six>=1.11.0
-Jinja2>=2.10.1
-PyYAML>=5.1
-holidays>=0.9.10
-ics>=0.4
-console-menu>=0.5.1
-PyGithub
-jira
\ No newline at end of file
+six~=1.16.0
+Jinja2~=3.1.1
+PyYAML~=6.0
+holidays~=0.16
+ics~=0.7.2
+console-menu~=0.7.1
+PyGithub~=1.56
+jira~=3.4.1
\ No newline at end of file

diff --git a/gradle/java/modules.gradle b/gradle/java/modules.gradle
index f9ebac3..cb8f7c8 100644
--- a/gradle/java/modules.gradle
+++ b/gradle/java/modules.gradle

@@ -67,6 +67,12 @@
       tasks.named(sourceSet.getCompileJavaTaskName()).configure({ JavaCompile task ->
         task.dependsOn modularPaths.compileModulePathConfiguration
 
+        // GH-12742: add the modular path as inputs so that if anything changes, the task
+        // is not up to date and is re-run. I [dw] believe this should be a @Classpath parameter
+        // on the task itself... but I don't know how to implement this on an existing class.
+        // this is a workaround but should work just fine though.
+        task.inputs.files(modularPaths.compileModulePathConfiguration)
+
         // LUCENE-10327: don't allow gradle to emit an empty sourcepath as it would break
         // compilation of modules.
         task.options.setSourcepath(sourceSet.java.sourceDirectories)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 3f7d9fe..953e1b1 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt

@@ -62,9 +62,11 @@
 
 * GITHUB#12599: Add RandomAccessInput#readBytes method to the RandomAccessInput interface. (Ignacio Vera)
 
-* GITHUB#12709 Consolidate FSTStore and BytesStore in FST. Created FSTReader which contains the common methods
+* GITHUB#12709: Consolidate FSTStore and BytesStore in FST. Created FSTReader which contains the common methods
   of the two (Anh Dung Bui)
 
+* GITHUB#12735: Remove FSTCompiler#getTermCount() and FSTCompiler.UnCompiledNode#inputCount (Anh Dung Bui)
+
 New Features
 ---------------------
 
@@ -158,6 +160,10 @@
 
 * GITHUB#12718: Make IndexSearcher#getSlices final as it is not expected to be overridden (Luca Cavanna)
 
+* GITHUB#12427: Automata#makeStringUnion #makeBinaryStringUnion now accept Iterable<BytesRef> instead of
+  Collection<BytesRef>. They also now explicitly throw IllegalArgumentException if input data is not properly sorted
+  instead of relying on assert. (Shubham Chaudhary)
+
 New Features
 ---------------------
 
@@ -247,6 +253,8 @@
 * GITHUB#12719: Top-level conjunctions that are not sorted by score now have a
   specialized bulk scorer. (Adrien Grand)
 
+* GITHUB#11903: Faster sort on high-cardinality string fields. (Adrien Grand)
+
 Changes in runtime behavior
 ---------------------
 
@@ -269,10 +277,16 @@
 
 * GITHUB#12727: Ensure negative scores are not returned by vector similarity functions (Ben Trent)
 
+* GITHUB#12736: Fix NullPointerException when Monitor.getQuery cannot find the requested queryId (Davis Cook)
+
 Build
 ---------------------
 
+* GITHUB#12742: JavaCompile tasks may be in up-to-date state when modular dependencies have changed 
+  leading to odd runtime errors (Chris Hostetter, Dawid Weiss)
+
 * GITHUB#12612: Upgrade forbiddenapis to version 3.6 and ASM for APIJAR extraction to 9.6.  (Uwe Schindler)
+
 * GITHUB#12655: Upgrade to Gradle 8.4 (Kevin Risden)
 
 Other

diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/CSVUtil.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CSVUtil.java
similarity index 94%
rename from lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/CSVUtil.java
rename to lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CSVUtil.java
index e3662f2..36d6e05 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/CSVUtil.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CSVUtil.java

@@ -14,7 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.lucene.analysis.ja.dict;
+package org.apache.lucene.analysis.util;
 
 import java.util.ArrayList;
 import java.util.regex.Matcher;
@@ -69,7 +69,7 @@
       return new String[0];
     }
 
-    return result.toArray(new String[result.size()]);
+    return result.toArray(new String[0]);
   }
 
   private static String unQuoteUnEscape(String original) {
@@ -83,7 +83,7 @@
       }
 
       // Unescape
-      if (result.indexOf(ESCAPED_QUOTE) >= 0) {
+      if (result.contains(ESCAPED_QUOTE)) {
         result = result.replace(ESCAPED_QUOTE, "\"");
       }
     }

diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestCSVUtil.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCSVUtil.java
similarity index 95%
rename from lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestCSVUtil.java
rename to lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCSVUtil.java
index 8cc6fb6..85901ca 100644
--- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestCSVUtil.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCSVUtil.java

@@ -14,10 +14,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.lucene.analysis.ja;
+package org.apache.lucene.analysis.util;
 
 import java.io.IOException;
-import org.apache.lucene.analysis.ja.dict.CSVUtil;
 import org.apache.lucene.tests.util.LuceneTestCase;
 
 /*

diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionaryBuilder.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionaryBuilder.java
index 80b1cef..5a16db6 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionaryBuilder.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionaryBuilder.java

@@ -28,6 +28,7 @@
 import java.util.List;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
+import org.apache.lucene.analysis.util.CSVUtil;
 import org.apache.lucene.util.IntsRefBuilder;
 import org.apache.lucene.util.fst.FST;
 import org.apache.lucene.util.fst.FSTCompiler;

diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionaryEntryWriter.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionaryEntryWriter.java
index e5270b3..4bdfe50 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionaryEntryWriter.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionaryEntryWriter.java

@@ -20,6 +20,7 @@
 import java.io.OutputStream;
 import java.nio.ByteBuffer;
 import org.apache.lucene.analysis.morph.DictionaryEntryWriter;
+import org.apache.lucene.analysis.util.CSVUtil;
 import org.apache.lucene.store.DataOutput;
 import org.apache.lucene.util.ArrayUtil;
 

diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UnknownDictionaryBuilder.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UnknownDictionaryBuilder.java
index a367c49..ba5bc0e 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UnknownDictionaryBuilder.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UnknownDictionaryBuilder.java

@@ -25,6 +25,7 @@
 import java.util.ArrayList;
 import java.util.Comparator;
 import java.util.List;
+import org.apache.lucene.analysis.util.CSVUtil;
 
 class UnknownDictionaryBuilder {
   private static final String NGRAM_DICTIONARY_ENTRY = "NGRAM,5,5,-32768,記号,一般,*,*,*,*,*,*,*";

diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserDictionary.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserDictionary.java
index 52604c4..de69c72 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserDictionary.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserDictionary.java

@@ -26,6 +26,7 @@
 import java.util.Map;
 import java.util.TreeMap;
 import org.apache.lucene.analysis.morph.Dictionary;
+import org.apache.lucene.analysis.util.CSVUtil;
 import org.apache.lucene.util.IntsRefBuilder;
 import org.apache.lucene.util.fst.FST;
 import org.apache.lucene.util.fst.FSTCompiler;

diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserMorphData.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserMorphData.java
index be895f1..6bc4dc7 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserMorphData.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserMorphData.java

@@ -19,6 +19,8 @@
 import static org.apache.lucene.analysis.ja.dict.UserDictionary.CUSTOM_DICTIONARY_WORD_ID_OFFSET;
 import static org.apache.lucene.analysis.ja.dict.UserDictionary.INTERNAL_SEPARATOR;
 
+import org.apache.lucene.analysis.util.CSVUtil;
+
 /** Morphological information for user dictionary. */
 final class UserMorphData implements JaMorphData {
   public static final int WORD_COST = -100000;

diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/dict/TestUnknownDictionary.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/dict/TestUnknownDictionary.java
index 5ccdaa6..2d245c7 100644
--- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/dict/TestUnknownDictionary.java
+++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/dict/TestUnknownDictionary.java

@@ -16,6 +16,7 @@
  */
 package org.apache.lucene.analysis.ja.dict;
 
+import org.apache.lucene.analysis.util.CSVUtil;
 import org.apache.lucene.tests.util.LuceneTestCase;
 import org.junit.Test;
 

diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/CSVUtil.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/CSVUtil.java
deleted file mode 100644
index b9e3ff9..0000000
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/CSVUtil.java
+++ /dev/null

@@ -1,93 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.ko.dict;
-
-import java.util.ArrayList;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-/** Utility class for parsing CSV text */
-public final class CSVUtil {
-  private static final char QUOTE = '"';
-
-  private static final char COMMA = ',';
-
-  private static final Pattern QUOTE_REPLACE_PATTERN = Pattern.compile("^\"([^\"]+)\"$");
-
-  private static final String ESCAPED_QUOTE = "\"\"";
-
-  private CSVUtil() {} // no instance!!!
-
-  /**
-   * Parse CSV line
-   *
-   * @param line line containing csv-encoded data
-   * @return Array of values
-   */
-  public static String[] parse(String line) {
-    boolean insideQuote = false;
-    ArrayList<String> result = new ArrayList<>();
-    int quoteCount = 0;
-    StringBuilder sb = new StringBuilder();
-    for (int i = 0; i < line.length(); i++) {
-      char c = line.charAt(i);
-
-      if (c == QUOTE) {
-        insideQuote = !insideQuote;
-        quoteCount++;
-      }
-
-      if (c == COMMA && !insideQuote) {
-        String value = sb.toString();
-        value = unQuoteUnEscape(value);
-        result.add(value);
-        sb.setLength(0);
-        continue;
-      }
-
-      sb.append(c);
-    }
-
-    result.add(sb.toString());
-
-    // Validate
-    if (quoteCount % 2 != 0) {
-      return new String[0];
-    }
-
-    return result.toArray(new String[0]);
-  }
-
-  private static String unQuoteUnEscape(String original) {
-    String result = original;
-
-    // Unquote
-    if (result.indexOf('\"') >= 0) {
-      Matcher m = QUOTE_REPLACE_PATTERN.matcher(original);
-      if (m.matches()) {
-        result = m.group(1);
-      }
-
-      // Unescape
-      if (result.contains(ESCAPED_QUOTE)) {
-        result = result.replace(ESCAPED_QUOTE, "\"");
-      }
-    }
-
-    return result;
-  }
-}

diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionaryBuilder.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionaryBuilder.java
index 3726f9e..e3db26b 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionaryBuilder.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionaryBuilder.java

@@ -28,6 +28,7 @@
 import java.util.List;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
+import org.apache.lucene.analysis.util.CSVUtil;
 import org.apache.lucene.util.IntsRefBuilder;
 import org.apache.lucene.util.fst.FST;
 import org.apache.lucene.util.fst.FSTCompiler;

diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionaryEntryWriter.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionaryEntryWriter.java
index f7ee696..95ce027 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionaryEntryWriter.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionaryEntryWriter.java

@@ -24,6 +24,7 @@
 import java.util.List;
 import org.apache.lucene.analysis.ko.POS;
 import org.apache.lucene.analysis.morph.DictionaryEntryWriter;
+import org.apache.lucene.analysis.util.CSVUtil;
 import org.apache.lucene.store.DataOutput;
 import org.apache.lucene.util.ArrayUtil;
 

diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UnknownDictionaryBuilder.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UnknownDictionaryBuilder.java
index 1004ab8..71099b2 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UnknownDictionaryBuilder.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UnknownDictionaryBuilder.java

@@ -25,6 +25,7 @@
 import java.util.ArrayList;
 import java.util.Comparator;
 import java.util.List;
+import org.apache.lucene.analysis.util.CSVUtil;
 
 class UnknownDictionaryBuilder {
   private static final String NGRAM_DICTIONARY_ENTRY = "NGRAM,1801,3559,3677,SY,*,*,*,*,*,*,*";

diff --git a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/TestUnknownDictionary.java b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/TestUnknownDictionary.java
index dbce890..13190b2 100644
--- a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/TestUnknownDictionary.java
+++ b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/TestUnknownDictionary.java

@@ -16,6 +16,7 @@
  */
 package org.apache.lucene.analysis.ko.dict;
 
+import org.apache.lucene.analysis.util.CSVUtil;
 import org.apache.lucene.tests.util.LuceneTestCase;
 import org.junit.Test;
 

diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/PForUtil.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/PForUtil.java
index 4e99d3a..690bfa5 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/PForUtil.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/PForUtil.java

@@ -92,7 +92,7 @@
     out.writeBytes(exceptions, exceptions.length);
   }
 
-  /** Decode 128 integers into {@code ints}. */
+  /** Decode 128 integers into {@code longs}. */
   void decode(DataInput in, long[] longs) throws IOException {
     final int token = Byte.toUnsignedInt(in.readByte());
     final int bitsPerValue = token & 0x1f;

diff --git a/lucene/benchmark/conf/analyzer.alg b/lucene/benchmark/conf/analyzer.alg
index 497ec3d..4ed7779 100644
--- a/lucene/benchmark/conf/analyzer.alg
+++ b/lucene/benchmark/conf/analyzer.alg

@@ -32,8 +32,8 @@
 doc.term.vector=false
 log.step=500
 
-docs.dir=reuters-out
-#docs.dir=reuters-111
+work.dir=data
+docs.dir=reuters21578
 
 content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
 

diff --git a/lucene/benchmark/conf/collector-small.alg b/lucene/benchmark/conf/collector-small.alg
index 763cb04..e57ee86 100644
--- a/lucene/benchmark/conf/collector-small.alg
+++ b/lucene/benchmark/conf/collector-small.alg

@@ -21,7 +21,7 @@
 #    Fully Qualified Class Name of a Collector with a empty constructor
 #    topScoreDocOrdered - Creates a TopScoreDocCollector that requires in order docs
 #    topScoreDocUnordered - Like above, but allows out of order
-collector.class=coll:topScoreDocOrdered:topScoreDocUnordered:topScoreDocOrdered:topScoreDocUnordered
+collector.class=coll:topScoreDoc
 
 analyzer=org.apache.lucene.analysis.core.WhitespaceAnalyzer
 directory=FSDirectory

diff --git a/lucene/benchmark/conf/collector.alg b/lucene/benchmark/conf/collector.alg
index d85582a..e284349 100644
--- a/lucene/benchmark/conf/collector.alg
+++ b/lucene/benchmark/conf/collector.alg

@@ -21,7 +21,7 @@
 #    Fully Qualified Class Name of a Collector with a empty constructor
 #    topScoreDocOrdered - Creates a TopScoreDocCollector that requires in order docs
 #    topScoreDocUnordered - Like above, but allows out of order
-collector.class=coll:topScoreDocOrdered:topScoreDocUnordered:topScoreDocOrdered:topScoreDocUnordered
+collector.class=coll:topScoreDoc
 
 analyzer=org.apache.lucene.analysis.core.WhitespaceAnalyzer
 directory=FSDirectory

diff --git a/lucene/benchmark/conf/compound-penalty.alg b/lucene/benchmark/conf/compound-penalty.alg
index 06b2821..8626baa 100644
--- a/lucene/benchmark/conf/compound-penalty.alg
+++ b/lucene/benchmark/conf/compound-penalty.alg

@@ -37,8 +37,8 @@
 log.step=500
 log.step.DeleteDoc=100
 
-docs.dir=reuters-out
-#docs.dir=reuters-111
+work.dir=data
+docs.dir=reuters21578
 
 content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
 

diff --git a/lucene/benchmark/conf/english-porter-comparison.alg b/lucene/benchmark/conf/english-porter-comparison.alg
index e83f04a..e391c0b 100644
--- a/lucene/benchmark/conf/english-porter-comparison.alg
+++ b/lucene/benchmark/conf/english-porter-comparison.alg

@@ -20,7 +20,8 @@
 content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
 doc.tokenized=false
 doc.body.tokenized=true
-docs.dir=reuters-out
+work.dir=data
+docs.dir=reuters21578
 
 -AnalyzerFactory(name:original-porter-stemmer,StandardTokenizer,
   EnglishPossessiveFilter,LowerCaseFilter,StopFilter,

diff --git a/lucene/benchmark/conf/facets.alg b/lucene/benchmark/conf/facets.alg
index 63e7cac..32d7270 100644
--- a/lucene/benchmark/conf/facets.alg
+++ b/lucene/benchmark/conf/facets.alg

@@ -30,7 +30,8 @@
 doc.term.vector=false
 log.step=1000
 
-docs.dir=reuters-out
+work.dir=data
+docs.dir=reuters21578
 
 content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
 

diff --git a/lucene/benchmark/conf/highlights.alg b/lucene/benchmark/conf/highlights.alg
index 88b056e..7c5fd7d 100644
--- a/lucene/benchmark/conf/highlights.alg
+++ b/lucene/benchmark/conf/highlights.alg

@@ -30,7 +30,8 @@
 doc.term.vector.positions=false
 log.step=2000
 
-docs.dir=reuters-out
+work.dir=data
+docs.dir=reuters21578
 
 content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
 

diff --git a/lucene/benchmark/conf/indexing-flush-by-RAM-multithreaded.alg b/lucene/benchmark/conf/indexing-flush-by-RAM-multithreaded.alg
index 43a6c91..d86e182 100644
--- a/lucene/benchmark/conf/indexing-flush-by-RAM-multithreaded.alg
+++ b/lucene/benchmark/conf/indexing-flush-by-RAM-multithreaded.alg

@@ -32,8 +32,8 @@
 doc.term.vector=false
 log.step=2000
 
-docs.dir=reuters-out
-#docs.dir=reuters-111
+work.dir=data
+docs.dir=reuters21578
 
 #content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
 content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource

diff --git a/lucene/benchmark/conf/indexing-flush-by-RAM.alg b/lucene/benchmark/conf/indexing-flush-by-RAM.alg
index 0b6c797..0a911c9 100644
--- a/lucene/benchmark/conf/indexing-flush-by-RAM.alg
+++ b/lucene/benchmark/conf/indexing-flush-by-RAM.alg

@@ -32,8 +32,8 @@
 doc.term.vector=false
 log.step=2000
 
-docs.dir=reuters-out
-#docs.dir=reuters-111
+work.dir=data
+docs.dir=reuters21578
 
 #content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
 content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource

diff --git a/lucene/benchmark/conf/indexing-multithreaded.alg b/lucene/benchmark/conf/indexing-multithreaded.alg
index 1d2e18e..b34b826 100644
--- a/lucene/benchmark/conf/indexing-multithreaded.alg
+++ b/lucene/benchmark/conf/indexing-multithreaded.alg

@@ -32,8 +32,8 @@
 doc.term.vector=false
 log.step=2000
 
-docs.dir=reuters-out
-#docs.dir=reuters-111
+work.dir=data
+docs.dir=reuters21578
 
 #content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
 content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource

diff --git a/lucene/benchmark/conf/indexing.alg b/lucene/benchmark/conf/indexing.alg
index e31f871..b4a4d92 100644
--- a/lucene/benchmark/conf/indexing.alg
+++ b/lucene/benchmark/conf/indexing.alg

@@ -32,8 +32,8 @@
 doc.term.vector=false
 log.step=2000
 
-docs.dir=reuters-out
-#docs.dir=reuters-111
+work.dir=data
+docs.dir=reuters21578
 
 #content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
 content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource

diff --git a/lucene/benchmark/conf/micro-standard-flush-by-ram.alg b/lucene/benchmark/conf/micro-standard-flush-by-ram.alg
index 993e58a..d4a22f1 100644
--- a/lucene/benchmark/conf/micro-standard-flush-by-ram.alg
+++ b/lucene/benchmark/conf/micro-standard-flush-by-ram.alg

@@ -31,8 +31,8 @@
 doc.term.vector=false
 log.step=500
 
-docs.dir=reuters-out
-#docs.dir=reuters-111
+work.dir=data
+docs.dir=reuters21578
 
 #content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
 content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource

diff --git a/lucene/benchmark/conf/sample.alg b/lucene/benchmark/conf/sample.alg
index 4f93230..aa63293 100644
--- a/lucene/benchmark/conf/sample.alg
+++ b/lucene/benchmark/conf/sample.alg

@@ -42,8 +42,8 @@
 doc.term.vector=false
 log.step=500
 
-docs.dir=reuters-out
-#docs.dir=reuters-111
+work.dir=data
+docs.dir=reuters21578
 
 content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
 #content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource

diff --git a/lucene/benchmark/conf/shingle.alg b/lucene/benchmark/conf/shingle.alg
index b074434..67b5130 100644
--- a/lucene/benchmark/conf/shingle.alg
+++ b/lucene/benchmark/conf/shingle.alg

@@ -16,7 +16,8 @@
 content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
 doc.tokenized=false
 doc.body.tokenized=true
-docs.dir=reuters-out
+work.dir=data
+docs.dir=reuters21578
 log.step=1000
 
 -AnalyzerFactory(name:shingle-bigrams-unigrams,

diff --git a/lucene/benchmark/conf/sloppy-phrase.alg b/lucene/benchmark/conf/sloppy-phrase.alg
index 4d06d6f..4c49ddd 100644
--- a/lucene/benchmark/conf/sloppy-phrase.alg
+++ b/lucene/benchmark/conf/sloppy-phrase.alg

@@ -30,7 +30,8 @@
 doc.term.vector=false
 log.step=500
 
-docs.dir=reuters-out
+work.dir=data
+docs.dir=reuters21578
 #docs.dir=reuters-111
 
 content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource

diff --git a/lucene/benchmark/conf/sort-standard.alg b/lucene/benchmark/conf/sort-standard.alg
index 48cae96..08c7b90 100644
--- a/lucene/benchmark/conf/sort-standard.alg
+++ b/lucene/benchmark/conf/sort-standard.alg

@@ -31,7 +31,8 @@
 doc.term.vector=false
 log.step=100000
 
-docs.dir=reuters-out
+work.dir=data
+docs.dir=reuters21578
 
 content.source=org.apache.lucene.benchmark.byTask.feeds.SortableSingleDocSource
 

diff --git a/lucene/benchmark/conf/standard-flush-by-RAM.alg b/lucene/benchmark/conf/standard-flush-by-RAM.alg
index 3ceed10..c3cb278 100644
--- a/lucene/benchmark/conf/standard-flush-by-RAM.alg
+++ b/lucene/benchmark/conf/standard-flush-by-RAM.alg

@@ -31,8 +31,8 @@
 doc.term.vector=false
 log.step=2000
 
-docs.dir=reuters-out
-#docs.dir=reuters-111
+work.dir=data
+docs.dir=reuters21578
 
 #content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
 content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource

diff --git a/lucene/benchmark/conf/standard.alg b/lucene/benchmark/conf/standard.alg
index 4d0b048..4885593 100644
--- a/lucene/benchmark/conf/standard.alg
+++ b/lucene/benchmark/conf/standard.alg

@@ -31,8 +31,8 @@
 doc.term.vector=false
 log.step=2000
 
-docs.dir=reuters-out
-#docs.dir=reuters-111
+work.dir=data
+docs.dir=reuters21578
 
 #content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
 content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource

diff --git a/lucene/benchmark/conf/wstok.alg b/lucene/benchmark/conf/wstok.alg
index c437590..ab6a659 100644
--- a/lucene/benchmark/conf/wstok.alg
+++ b/lucene/benchmark/conf/wstok.alg

@@ -18,7 +18,8 @@
 content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
 doc.tokenized=false
 doc.body.tokenized=true
-docs.dir=reuters-out
+work.dir=data
+docs.dir=reuters21578
 
 -AnalyzerFactory(name:WhitespaceTokenizer, WhitespaceTokenizer(rule:java))
 

diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NewAnalyzerTask.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NewAnalyzerTask.java
index 2248756..032019f 100644
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NewAnalyzerTask.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NewAnalyzerTask.java

@@ -23,9 +23,9 @@
 import java.util.ArrayList;
 import java.util.List;
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.benchmark.byTask.PerfRunData;
 import org.apache.lucene.benchmark.byTask.utils.AnalyzerFactory;
-import org.apache.lucene.util.Version;
 
 /**
  * Create a new {@link org.apache.lucene.analysis.Analyzer} and set it in the getRunData() for use
@@ -42,17 +42,13 @@
 
   public static final Analyzer createAnalyzer(String className) throws Exception {
     final Class<? extends Analyzer> clazz = Class.forName(className).asSubclass(Analyzer.class);
-    try {
-      // first try to use a ctor with version parameter (needed for many new Analyzers that have no
-      // default one anymore
-      Constructor<? extends Analyzer> cnstr = clazz.getConstructor(Version.class);
-      return cnstr.newInstance(Version.LATEST);
-    } catch (
-        @SuppressWarnings("unused")
-        NoSuchMethodException nsme) {
-      // otherwise use default ctor
-      return clazz.getConstructor().newInstance();
+    Constructor<? extends Analyzer> cnstr;
+    if (className.equals("org.apache.lucene.analysis.core.StopAnalyzer")) {
+      cnstr = clazz.getConstructor(CharArraySet.class);
+      return cnstr.newInstance(CharArraySet.EMPTY_SET);
     }
+    cnstr = clazz.getConstructor();
+    return cnstr.newInstance();
   }
 
   @Override

diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/PForUtil.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/PForUtil.java
index eb735c8..2119121 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/PForUtil.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/PForUtil.java

@@ -116,7 +116,7 @@
     out.writeBytes(exceptions, exceptions.length);
   }
 
-  /** Decode 128 integers into {@code ints}. */
+  /** Decode 128 integers into {@code longs}. */
   void decode(DataInput in, long[] longs) throws IOException {
     final int token = Byte.toUnsignedInt(in.readByte());
     final int bitsPerValue = token & 0x1f;

diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsWriter.java
index 5515de2..691a730 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsWriter.java

@@ -842,9 +842,6 @@
   @Override
   public void close() throws IOException {
     IOUtils.close(meta, vectorData, vectorIndex, quantizedVectorData);
-    if (mergeExec != null) {
-      mergeExec.shutdownNow();
-    }
   }
 
   private abstract static class FieldWriter<T> extends KnnFieldVectorsWriter<T> {

diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsWriter.java
index c048581..613fa89 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsWriter.java

@@ -260,7 +260,7 @@
                   quantizationDataInput)));
     } finally {
       if (success == false) {
-        IOUtils.closeWhileHandlingException(quantizationDataInput);
+        IOUtils.closeWhileHandlingException(tempQuantizedVectorData, quantizationDataInput);
         IOUtils.deleteFilesIgnoringExceptions(
             segmentWriteState.directory, tempQuantizedVectorData.getName());
       }

diff --git a/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java b/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java
index 0f579b9..9713923 100644
--- a/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java
+++ b/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java

@@ -18,8 +18,6 @@
 
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Comparator;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
@@ -555,8 +553,6 @@
     FieldInfos fieldInfos = null;
     boolean any = false;
     for (List<DocValuesFieldUpdates> updates : pendingDVUpdates.values()) {
-      // Sort by increasing delGen:
-      Collections.sort(updates, Comparator.comparingLong(a -> a.delGen));
       for (DocValuesFieldUpdates update : updates) {
         if (update.delGen <= maxDelGen && update.any()) {
           any = true;

diff --git a/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionBulkScorer.java b/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionBulkScorer.java
index be38778..04d0112 100644
--- a/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionBulkScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionBulkScorer.java

@@ -37,7 +37,8 @@
 
   private final Scorer[] scorers;
   private final DocIdSetIterator[] iterators;
-  private final DocIdSetIterator lead;
+  private final DocIdSetIterator lead1, lead2;
+  private final Scorer scorer1, scorer2;
   private final DocAndScore scorable = new DocAndScore();
   private final double[] sumOfOtherClauses;
   private final int maxDoc;
@@ -50,7 +51,10 @@
     Arrays.sort(this.scorers, Comparator.comparingLong(scorer -> scorer.iterator().cost()));
     this.iterators =
         Arrays.stream(this.scorers).map(Scorer::iterator).toArray(DocIdSetIterator[]::new);
-    lead = iterators[0];
+    lead1 = iterators[0];
+    lead2 = iterators[1];
+    scorer1 = this.scorers[0];
+    scorer2 = this.scorers[1];
     this.sumOfOtherClauses = new double[this.scorers.length];
     this.maxDoc = maxDoc;
   }
@@ -59,7 +63,7 @@
   public int score(LeafCollector collector, Bits acceptDocs, int min, int max) throws IOException {
     collector.setScorer(scorable);
 
-    int windowMin = Math.max(lead.docID(), min);
+    int windowMin = Math.max(lead1.docID(), min);
     while (windowMin < max) {
       // Use impacts of the least costly scorer to compute windows
       // NOTE: windowMax is inclusive
@@ -78,7 +82,7 @@
         sumOfOtherClauses[i] += sumOfOtherClauses[i + 1];
       }
       scoreWindow(collector, acceptDocs, windowMin, windowMax + 1, (float) maxWindowScore);
-      windowMin = Math.max(lead.docID(), windowMax + 1);
+      windowMin = Math.max(lead1.docID(), windowMax + 1);
     }
 
     return windowMin >= maxDoc ? DocIdSetIterator.NO_MORE_DOCS : windowMin;
@@ -92,13 +96,16 @@
       return;
     }
 
-    if (lead.docID() < min) {
-      lead.advance(min);
+    if (lead1.docID() < min) {
+      lead1.advance(min);
     }
+
+    final double sumOfOtherMaxScoresAt1 = sumOfOtherClauses[1];
+
     advanceHead:
-    for (int doc = lead.docID(); doc < max; ) {
+    for (int doc = lead1.docID(); doc < max; ) {
       if (acceptDocs != null && acceptDocs.get(doc) == false) {
-        doc = lead.nextDoc();
+        doc = lead1.nextDoc();
         continue;
       }
 
@@ -109,26 +116,50 @@
       final boolean hasMinCompetitiveScore = scorable.minCompetitiveScore > 0;
       double currentScore;
       if (hasMinCompetitiveScore) {
-        currentScore = scorers[0].score();
+        currentScore = scorer1.score();
       } else {
         currentScore = 0;
       }
 
-      for (int i = 1; i < iterators.length; ++i) {
-        // First check if we have a chance of having a match
+      // This is the same logic as in the below for loop, specialized for the 2nd least costly
+      // clause. This seems to help the JVM.
+
+      // First check if we have a chance of having a match based on max scores
+      if (hasMinCompetitiveScore
+          && (float) MathUtil.sumUpperBound(currentScore + sumOfOtherMaxScoresAt1, scorers.length)
+              < scorable.minCompetitiveScore) {
+        doc = lead1.nextDoc();
+        continue advanceHead;
+      }
+
+      // NOTE: lead2 may be on `doc` already if we `continue`d on the previous loop iteration.
+      if (lead2.docID() < doc) {
+        int next = lead2.advance(doc);
+        if (next != doc) {
+          doc = lead1.advance(next);
+          continue advanceHead;
+        }
+      }
+      assert lead2.docID() == doc;
+      if (hasMinCompetitiveScore) {
+        currentScore += scorer2.score();
+      }
+
+      for (int i = 2; i < iterators.length; ++i) {
+        // First check if we have a chance of having a match based on max scores
         if (hasMinCompetitiveScore
             && (float) MathUtil.sumUpperBound(currentScore + sumOfOtherClauses[i], scorers.length)
                 < scorable.minCompetitiveScore) {
-          doc = lead.nextDoc();
+          doc = lead1.nextDoc();
           continue advanceHead;
         }
 
-        // NOTE: these iterators may already be on `doc` already if we called `continue advanceHead`
-        // on the previous loop iteration.
+        // NOTE: these iterators may be on `doc` already if we called `continue advanceHead` on the
+        // previous loop iteration.
         if (iterators[i].docID() < doc) {
           int next = iterators[i].advance(doc);
           if (next != doc) {
-            doc = lead.advance(next);
+            doc = lead1.advance(next);
             continue advanceHead;
           }
         }
@@ -151,13 +182,13 @@
         return;
       }
 
-      doc = lead.nextDoc();
+      doc = lead1.nextDoc();
     }
   }
 
   @Override
   public long cost() {
-    return lead.cost();
+    return lead1.cost();
   }
 
   private static class DocAndScore extends Scorable {

diff --git a/lucene/core/src/java/org/apache/lucene/search/comparators/TermOrdValComparator.java b/lucene/core/src/java/org/apache/lucene/search/comparators/TermOrdValComparator.java
index 548bbb4..616b8cf 100644
--- a/lucene/core/src/java/org/apache/lucene/search/comparators/TermOrdValComparator.java
+++ b/lucene/core/src/java/org/apache/lucene/search/comparators/TermOrdValComparator.java

@@ -475,7 +475,7 @@
 
   private class CompetitiveIterator extends DocIdSetIterator {
 
-    private static final int MAX_TERMS = 128;
+    private static final int MAX_TERMS = 1024;
 
     private final LeafReaderContext context;
     private final int maxDoc;

diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/Automata.java b/lucene/core/src/java/org/apache/lucene/util/automaton/Automata.java
index 8952278..9ecf748 100644
--- a/lucene/core/src/java/org/apache/lucene/util/automaton/Automata.java
+++ b/lucene/core/src/java/org/apache/lucene/util/automaton/Automata.java

@@ -43,8 +43,8 @@
  */
 public final class Automata {
   /**
-   * {@link #makeStringUnion(Collection)} limits terms of this max length to ensure the stack
-   * doesn't overflow while building, since our algorithm currently relies on recursion.
+   * {@link #makeStringUnion(Iterable)} limits terms of this max length to ensure the stack doesn't
+   * overflow while building, since our algorithm currently relies on recursion.
    */
   public static final int MAX_STRING_UNION_TERM_LENGTH = 1000;
 
@@ -576,8 +576,8 @@
    * @return An {@link Automaton} accepting all input strings. The resulting automaton is codepoint
    *     based (full unicode codepoints on transitions).
    */
-  public static Automaton makeStringUnion(Collection<BytesRef> utf8Strings) {
-    if (utf8Strings.isEmpty()) {
+  public static Automaton makeStringUnion(Iterable<BytesRef> utf8Strings) {
+    if (utf8Strings.iterator().hasNext() == false) {
       return makeEmpty();
     } else {
       return StringsToAutomaton.build(utf8Strings, false);
@@ -593,8 +593,8 @@
    * @return An {@link Automaton} accepting all input strings. The resulting automaton is binary
    *     based (UTF-8 encoded byte transition labels).
    */
-  public static Automaton makeBinaryStringUnion(Collection<BytesRef> utf8Strings) {
-    if (utf8Strings.isEmpty()) {
+  public static Automaton makeBinaryStringUnion(Iterable<BytesRef> utf8Strings) {
+    if (utf8Strings.iterator().hasNext() == false) {
       return makeEmpty();
     } else {
       return StringsToAutomaton.build(utf8Strings, true);

diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/RegExp.java b/lucene/core/src/java/org/apache/lucene/util/automaton/RegExp.java
index ed1688e..0d17a6f 100644
--- a/lucene/core/src/java/org/apache/lucene/util/automaton/RegExp.java
+++ b/lucene/core/src/java/org/apache/lucene/util/automaton/RegExp.java

@@ -1128,6 +1128,10 @@
           if (start != pos) m = Integer.parseInt(originalString.substring(start, pos));
         } else m = n;
         if (!match('}')) throw new IllegalArgumentException("expected '}' at position " + pos);
+        if (m != -1 && n > m) {
+          throw new IllegalArgumentException(
+              "invalid repetition range(out of order): " + n + ".." + m);
+        }
         if (m == -1) e = makeRepeat(flags, e, n);
         else e = makeRepeat(flags, e, n, m);
       }

diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/StringsToAutomaton.java b/lucene/core/src/java/org/apache/lucene/util/automaton/StringsToAutomaton.java
index 3cfe945..58a081f 100644
--- a/lucene/core/src/java/org/apache/lucene/util/automaton/StringsToAutomaton.java
+++ b/lucene/core/src/java/org/apache/lucene/util/automaton/StringsToAutomaton.java

@@ -18,7 +18,6 @@
 
 import java.io.IOException;
 import java.util.Arrays;
-import java.util.Collection;
 import java.util.HashMap;
 import java.util.IdentityHashMap;
 import org.apache.lucene.util.ArrayUtil;
@@ -35,8 +34,8 @@
  * to directly build a binary {@link Automaton} representation. Users should access this
  * functionality through {@link Automata} static methods.
  *
- * @see Automata#makeStringUnion(Collection)
- * @see Automata#makeBinaryStringUnion(Collection)
+ * @see Automata#makeStringUnion(Iterable)
+ * @see Automata#makeBinaryStringUnion(Iterable)
  * @see Automata#makeStringUnion(BytesRefIterator)
  * @see Automata#makeBinaryStringUnion(BytesRefIterator)
  */
@@ -238,7 +237,7 @@
    * UTF-8 codepoints as transition labels or binary (compiled) transition labels based on {@code
    * asBinary}.
    */
-  static Automaton build(Collection<BytesRef> input, boolean asBinary) {
+  static Automaton build(Iterable<BytesRef> input, boolean asBinary) {
     final StringsToAutomaton builder = new StringsToAutomaton();
 
     for (BytesRef b : input) {
@@ -273,9 +272,11 @@
               + current);
     }
     assert stateRegistry != null : "Automaton already built.";
-    assert previous == null || previous.get().compareTo(current) <= 0
-        : "Input must be in sorted UTF-8 order: " + previous.get() + " >= " + current;
-    assert setPrevious(current);
+    if (previous != null && previous.get().compareTo(current) > 0) {
+      throw new IllegalArgumentException(
+          "Input must be in sorted UTF-8 order: " + previous.get() + " >= " + current);
+    }
+    setPrevious(current);
 
     // Reusable codepoint information if we're building a non-binary based automaton
     UnicodeUtil.UTF8CodePoint codePoint = null;

diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/FSTCompiler.java b/lucene/core/src/java/org/apache/lucene/util/fst/FSTCompiler.java
index f17c220..3af6241 100644
--- a/lucene/core/src/java/org/apache/lucene/util/fst/FSTCompiler.java
+++ b/lucene/core/src/java/org/apache/lucene/util/fst/FSTCompiler.java

@@ -270,10 +270,6 @@
     return directAddressingMaxOversizingFactor;
   }
 
-  public long getTermCount() {
-    return frontier[0].inputCount;
-  }
-
   public long getNodeCount() {
     // 1+ in order to count the -1 implicit final node
     return 1 + nodeCount;
@@ -749,7 +745,6 @@
       // format cannot represent the empty input since
       // 'finalness' is stored on the incoming arc, not on
       // the node
-      frontier[0].inputCount++;
       frontier[0].isFinal = true;
       fst.setEmptyOutput(output);
       return;
@@ -760,9 +755,6 @@
     int pos2 = input.offset;
     final int pos1Stop = Math.min(lastInput.length(), input.length);
     while (true) {
-      frontier[pos1].inputCount++;
-      // System.out.println("  incr " + pos1 + " ct=" + frontier[pos1].inputCount + " n=" +
-      // frontier[pos1]);
       if (pos1 >= pos1Stop || lastInput.intAt(pos1) != input.ints[pos2]) {
         break;
       }
@@ -786,7 +778,6 @@
     // init tail states for current input
     for (int idx = prefixLenPlus1; idx <= input.length; idx++) {
       frontier[idx - 1].addArc(input.ints[input.offset + idx - 1], frontier[idx]);
-      frontier[idx].inputCount++;
     }
 
     final UnCompiledNode<T> lastNode = frontier[input.length];
@@ -835,8 +826,6 @@
 
     // save last input
     lastInput.copyInts(input);
-
-    // System.out.println("  count[0]=" + frontier[0].inputCount);
   }
 
   private boolean validOutput(T output) {
@@ -906,10 +895,6 @@
     T output;
     boolean isFinal;
 
-    // TODO: remove this tracking?  we used to use it for confusingly pruning NodeHash, but
-    // we switched to LRU by RAM usage instead:
-    long inputCount;
-
     /** This node's depth, starting from the automaton root. */
     final int depth;
 
@@ -935,7 +920,6 @@
       numArcs = 0;
       isFinal = false;
       output = owner.NO_OUTPUT;
-      inputCount = 0;
 
       // We don't clear the depth here because it never changes
       // for nodes on the frontier (even when reused).

diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDeletionPolicy.java b/lucene/core/src/test/org/apache/lucene/index/TestDeletionPolicy.java
index f2b5686..ac2ff78 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestDeletionPolicy.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestDeletionPolicy.java

@@ -459,7 +459,8 @@
             dir,
             newIndexWriterConfig(new MockAnalyzer(random()))
                 .setIndexDeletionPolicy(policy)
-                .setIndexCommit(lastCommit));
+                .setIndexCommit(lastCommit)
+                .setMergePolicy(newLogMergePolicy(10)));
     assertEquals(10, writer.getDocStats().numDocs);
 
     // Should undo our rollback:
@@ -476,12 +477,13 @@
             dir,
             newIndexWriterConfig(new MockAnalyzer(random()))
                 .setIndexDeletionPolicy(policy)
-                .setIndexCommit(lastCommit));
+                .setIndexCommit(lastCommit)
+                .setMergePolicy(newLogMergePolicy(10)));
     assertEquals(10, writer.getDocStats().numDocs);
     // Commits the rollback:
     writer.close();
 
-    // Now 8 because we made another commit
+    // Now 7 because we made another commit
     assertEquals(7, DirectoryReader.listCommits(dir).size());
 
     r = DirectoryReader.open(dir);
@@ -507,7 +509,10 @@
     // but this time keeping only the last commit:
     writer =
         new IndexWriter(
-            dir, newIndexWriterConfig(new MockAnalyzer(random())).setIndexCommit(lastCommit));
+            dir,
+            newIndexWriterConfig(new MockAnalyzer(random()))
+                .setIndexCommit(lastCommit)
+                .setMergePolicy(newLogMergePolicy(10)));
     assertEquals(10, writer.getDocStats().numDocs);
 
     // Reader still sees fully merged index, because writer

diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
index 5916ec3..1990ce9 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java

@@ -2404,11 +2404,12 @@
     writer.deleteDocuments(new Term("id", "xyz"));
     assertTrue(writer.hasUncommittedChanges());
 
-    // Must commit, waitForMerges, commit again, to be
-    // certain that hasUncommittedChanges returns false:
-    writer.commit();
-    writer.waitForMerges();
-    writer.commit();
+    // Must commit and wait for merges as long as the commit triggers merges to be certain that
+    // hasUncommittedChanges returns false
+    do {
+      writer.waitForMerges();
+      writer.commit();
+    } while (writer.hasPendingMerges());
     assertFalse(writer.hasUncommittedChanges());
     writer.close();
 

diff --git a/lucene/core/src/test/org/apache/lucene/util/automaton/TestRegExp.java b/lucene/core/src/test/org/apache/lucene/util/automaton/TestRegExp.java
index c960e73..8f6f765 100644
--- a/lucene/core/src/test/org/apache/lucene/util/automaton/TestRegExp.java
+++ b/lucene/core/src/test/org/apache/lucene/util/automaton/TestRegExp.java

@@ -86,6 +86,17 @@
     }
   }
 
+  public void testParseIllegalRepeatExp() {
+    // out of order
+    IllegalArgumentException expected =
+        expectThrows(
+            IllegalArgumentException.class,
+            () -> {
+              new RegExp("a{99,11}");
+            });
+    assertTrue(expected.getMessage().contains("out of order"));
+  }
+
   static String randomDocValue(int minLength) {
     String charPalette = "AAAaaaBbbCccc123456 \t";
     StringBuilder sb = new StringBuilder();

diff --git a/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java b/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java
index 927fe05..f6dd84e 100644
--- a/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java
+++ b/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java

@@ -568,7 +568,6 @@
         System.out.println(
             ((tMid - tStart) / (double) TimeUnit.SECONDS.toNanos(1)) + " sec to add all terms");
 
-        assert fstCompiler.getTermCount() == ord;
         FST<T> fst = fstCompiler.compile();
         long tEnd = System.nanoTime();
         System.out.println(

diff --git a/lucene/monitor/src/java/org/apache/lucene/monitor/QueryIndex.java b/lucene/monitor/src/java/org/apache/lucene/monitor/QueryIndex.java
index cac6ea1..5868ec5 100644
--- a/lucene/monitor/src/java/org/apache/lucene/monitor/QueryIndex.java
+++ b/lucene/monitor/src/java/org/apache/lucene/monitor/QueryIndex.java

@@ -68,7 +68,7 @@
     search(
         new TermQuery(new Term(FIELDS.query_id, queryId)),
         (id, query, dataValues) -> bytesHolder[0] = dataValues.mq.binaryValue());
-    return serializer.deserialize(bytesHolder[0]);
+    return bytesHolder[0] != null ? serializer.deserialize(bytesHolder[0]) : null;
   }
 
   public void scan(QueryCollector matcher) throws IOException {

diff --git a/lucene/monitor/src/test/org/apache/lucene/monitor/TestMonitorPersistence.java b/lucene/monitor/src/test/org/apache/lucene/monitor/TestMonitorPersistence.java
index 945abcd..1a60c29 100644
--- a/lucene/monitor/src/test/org/apache/lucene/monitor/TestMonitorPersistence.java
+++ b/lucene/monitor/src/test/org/apache/lucene/monitor/TestMonitorPersistence.java

@@ -28,16 +28,21 @@
 
   private Path indexDirectory = createTempDir();
 
-  public void testCacheIsRepopulated() throws IOException {
-
-    Document doc = new Document();
-    doc.add(newTextField(FIELD, "test", Field.Store.NO));
+  protected Monitor newMonitorWithPersistence() throws IOException {
     MonitorConfiguration config =
         new MonitorConfiguration()
             .setIndexPath(
                 indexDirectory, MonitorQuerySerializer.fromParser(MonitorTestBase::parse));
 
-    try (Monitor monitor = new Monitor(ANALYZER, config)) {
+    return new Monitor(ANALYZER, config);
+  }
+
+  public void testCacheIsRepopulated() throws IOException {
+
+    Document doc = new Document();
+    doc.add(newTextField(FIELD, "test", Field.Store.NO));
+
+    try (Monitor monitor = newMonitorWithPersistence()) {
       monitor.register(
           mq("1", "test"),
           mq("2", "test"),
@@ -58,7 +63,7 @@
           e.getMessage());
     }
 
-    try (Monitor monitor2 = new Monitor(ANALYZER, config)) {
+    try (Monitor monitor2 = newMonitorWithPersistence()) {
       assertEquals(4, monitor2.getQueryCount());
       assertEquals(4, monitor2.match(doc, QueryMatch.SIMPLE_MATCHER).getMatchCount());
 
@@ -67,9 +72,24 @@
     }
   }
 
+  public void testGetQueryPresent() throws IOException {
+    try (Monitor monitor = newMonitorWithPersistence()) {
+      MonitorQuery monitorQuery = mq("1", "test");
+      monitor.register(monitorQuery);
+
+      assertEquals(monitorQuery, monitor.getQuery("1"));
+    }
+  }
+
+  public void testGetQueryNotPresent() throws IOException {
+    try (Monitor monitor = newMonitorWithPersistence()) {
+      assertNull(monitor.getQuery("1"));
+    }
+  }
+
   public void testEphemeralMonitorDoesNotStoreQueries() throws IOException {
 
-    try (Monitor monitor2 = new Monitor(ANALYZER)) {
+    try (Monitor monitor2 = newMonitor(ANALYZER)) {
       IllegalStateException e =
           expectThrows(IllegalStateException.class, () -> monitor2.getQuery("query"));
       assertEquals(
commit	76aa38f7c0d6ab07f6a06eeb0b7593ef537a8e44	[log] [tgz]
author	Robert Muir <rmuir@apache.org>	Thu Nov 02 13:31:24 2023 -0400
committer	Robert Muir <rmuir@apache.org>	Thu Nov 02 13:31:24 2023 -0400
tree	dc11caec1a162ba37a2a1dccdf3e01e176acc406
parent	2d50c345fea3d1a64090d6d0cffef6b70d482a9f [diff]
parent	6bf2188b358469b5308cef6db9b848408c7d0d1f [diff]