updated apache lucene to 8.1.1 and made necessary code changes.
search index rebuild task is now triggered automatically during init if the index version is incompatible.
diff --git a/app/pom.xml b/app/pom.xml
index e382560..94bf7ae 100644
--- a/app/pom.xml
+++ b/app/pom.xml
@@ -49,7 +49,7 @@
<guice.version>4.2.2</guice.version>
<log4j.version>1.2.17</log4j.version>
<log4j2.version>2.10.0</log4j2.version>
- <lucene.version>4.10.4</lucene.version>
+ <lucene.version>8.1.1</lucene.version>
<oauth-core.version>20100527</oauth-core.version>
<maven-war.version>3.1.0</maven-war.version>
<maven-surefire.version>2.17</maven-surefire.version>
diff --git a/app/src/main/java/org/apache/roller/weblogger/business/search/FieldConstants.java b/app/src/main/java/org/apache/roller/weblogger/business/search/FieldConstants.java
index fc90002..7b19361 100644
--- a/app/src/main/java/org/apache/roller/weblogger/business/search/FieldConstants.java
+++ b/app/src/main/java/org/apache/roller/weblogger/business/search/FieldConstants.java
@@ -18,7 +18,6 @@
/* Created on Jul 19, 2003 */
package org.apache.roller.weblogger.business.search;
-import org.apache.lucene.util.Version;
/**
* Field constants for indexing blog entries and comments.
@@ -27,9 +26,6 @@
*/
public final class FieldConstants {
- // Set what version we are on
- public static final Version LUCENE_VERSION = Version.LUCENE_44;
-
public static final String ANCHOR = "anchor";
public static final String UPDATED = "updated";
public static final String ID = "id";
diff --git a/app/src/main/java/org/apache/roller/weblogger/business/search/IndexManagerImpl.java b/app/src/main/java/org/apache/roller/weblogger/business/search/IndexManagerImpl.java
index e835c4f..d93fc42 100644
--- a/app/src/main/java/org/apache/roller/weblogger/business/search/IndexManagerImpl.java
+++ b/app/src/main/java/org/apache/roller/weblogger/business/search/IndexManagerImpl.java
@@ -22,6 +22,7 @@
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
+import java.nio.file.Path;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;
@@ -39,7 +40,6 @@
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.RAMDirectory;
-import org.apache.lucene.util.Version;
import org.apache.roller.weblogger.WebloggerException;
import org.apache.roller.weblogger.business.InitializationException;
import org.apache.roller.weblogger.business.Weblogger;
@@ -87,7 +87,7 @@
private boolean inconsistentAtStartup = false;
- private ReadWriteLock rwl = new ReentrantReadWriteLock();
+ private final ReadWriteLock rwl = new ReentrantReadWriteLock();
// ~ Constructors
// ===========================================================
@@ -126,6 +126,7 @@
/**
* @inheritDoc
*/
+ @Override
public void initialize() throws InitializationException {
// only initialize the index if search is enabled
@@ -155,13 +156,24 @@
}
if (indexExists()) {
+ FSDirectory filesystem = getFSDirectory(false);
if (useRAMIndex) {
- Directory filesystem = getFSDirectory(false);
try {
fRAMindex = new RAMDirectory(filesystem, IOContext.DEFAULT);
} catch (IOException e) {
mLogger.error("Error creating in-memory index", e);
}
+ } else {
+ // test if the index is readable, if the version is outdated this might fail and we rebuild.
+ // TODO: we probably should just eagerly initialize the actual rader here, since we have it already
+ try {
+ DirectoryReader readerProbe = DirectoryReader.open(filesystem);
+ readerProbe.close();
+ } catch (IOException ex) {
+ mLogger.warn("Error opening search index, scheduling rebuild.", ex);
+ getFSDirectory(true);
+ inconsistentAtStartup = true;
+ }
}
} else {
mLogger.debug("Creating index");
@@ -191,27 +203,32 @@
// ~ Methods
// ================================================================
+ @Override
public void rebuildWebsiteIndex() throws WebloggerException {
scheduleIndexOperation(new RebuildWebsiteIndexOperation(roller, this,
null));
}
+ @Override
public void rebuildWebsiteIndex(Weblog website) throws WebloggerException {
scheduleIndexOperation(new RebuildWebsiteIndexOperation(roller, this,
website));
}
+ @Override
public void removeWebsiteIndex(Weblog website) throws WebloggerException {
scheduleIndexOperation(new RemoveWebsiteIndexOperation(roller, this,
website));
}
+ @Override
public void addEntryIndexOperation(WeblogEntry entry)
throws WebloggerException {
AddEntryOperation addEntry = new AddEntryOperation(roller, this, entry);
scheduleIndexOperation(addEntry);
}
+ @Override
public void addEntryReIndexOperation(WeblogEntry entry)
throws WebloggerException {
ReIndexEntryOperation reindex = new ReIndexEntryOperation(roller, this,
@@ -219,6 +236,7 @@
scheduleIndexOperation(reindex);
}
+ @Override
public void removeEntryIndexOperation(WeblogEntry entry)
throws WebloggerException {
RemoveEntryOperation removeOp = new RemoveEntryOperation(roller, this,
@@ -230,6 +248,7 @@
return rwl;
}
+ @Override
public boolean isInconsistentAtStartup() {
return inconsistentAtStartup;
}
@@ -240,25 +259,25 @@
* @return Analyzer to be used in manipulating the database.
*/
public static final Analyzer getAnalyzer() {
- return instantiateAnalyzer(FieldConstants.LUCENE_VERSION);
+ return instantiateAnalyzer();
}
- private static Analyzer instantiateAnalyzer(final Version luceneVersion) {
+ private static Analyzer instantiateAnalyzer() {
final String className = WebloggerConfig.getProperty("lucene.analyzer.class");
try {
final Class<?> clazz = Class.forName(className);
- return (Analyzer) ConstructorUtils.invokeConstructor(clazz, luceneVersion);
+ return (Analyzer) ConstructorUtils.invokeConstructor(clazz, null);
} catch (final ClassNotFoundException e) {
mLogger.error("failed to lookup analyzer class: " + className, e);
- return instantiateDefaultAnalyzer(luceneVersion);
+ return instantiateDefaultAnalyzer();
} catch (final NoSuchMethodException | InstantiationException | IllegalAccessException | InvocationTargetException e) {
mLogger.error("failed to instantiate analyzer: " + className, e);
- return instantiateDefaultAnalyzer(luceneVersion);
+ return instantiateDefaultAnalyzer();
}
}
- private static Analyzer instantiateDefaultAnalyzer(final Version luceneVersion) {
- return new StandardAnalyzer(luceneVersion);
+ private static Analyzer instantiateDefaultAnalyzer() {
+ return new StandardAnalyzer();
}
private void scheduleIndexOperation(final IndexOperation op) {
@@ -277,6 +296,7 @@
/**
* @param op
*/
+ @Override
public void executeIndexOperationNow(final IndexOperation op) {
try {
// only if search is enabled
@@ -298,7 +318,9 @@
if (reader == null) {
try {
reader = DirectoryReader.open(getIndexDirectory());
- } catch (IOException e) {
+ } catch (IOException ex) {
+ mLogger.error("Error opening DirectoryReader", ex);
+ throw new RuntimeException(ex);
}
}
return reader;
@@ -328,13 +350,13 @@
return false;
}
- private Directory getFSDirectory(boolean delete) {
+ private FSDirectory getFSDirectory(boolean delete) {
- Directory directory = null;
+ FSDirectory directory = null;
try {
- directory = FSDirectory.open(new File(indexDir));
+ directory = FSDirectory.open(Path.of(indexDir));
if (delete && directory != null) {
// clear old files
@@ -361,36 +383,37 @@
try {
IndexWriterConfig config = new IndexWriterConfig(
- FieldConstants.LUCENE_VERSION, new LimitTokenCountAnalyzer(
- IndexManagerImpl.getAnalyzer(),
- IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL));
+ new LimitTokenCountAnalyzer(
+ IndexManagerImpl.getAnalyzer(), 128));
writer = new IndexWriter(dir, config);
} catch (IOException e) {
mLogger.error("Error creating index", e);
} finally {
- try {
- if (writer != null) {
+ if (writer != null) {
+ try {
writer.close();
+ } catch (IOException ex) {
+ mLogger.warn("Unable to close IndexWriter.", ex);
}
- } catch (IOException e) {
}
}
}
private IndexOperation getSaveIndexOperation() {
return new WriteToIndexOperation(this) {
+ @Override
public void doRun() {
Directory dir = getIndexDirectory();
Directory fsdir = getFSDirectory(true);
IndexWriter writer = null;
try {
- IndexWriterConfig config = new IndexWriterConfig(FieldConstants.LUCENE_VERSION,
- new LimitTokenCountAnalyzer(IndexManagerImpl.getAnalyzer(),
- IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL));
+ IndexWriterConfig config = new IndexWriterConfig(
+ new LimitTokenCountAnalyzer(
+ IndexManagerImpl.getAnalyzer(), 128));
writer = new IndexWriter(fsdir, config);
- writer.addIndexes(new Directory[] { dir });
+ writer.addIndexes(dir);
writer.commit();
indexConsistencyMarker.delete();
} catch (IOException e) {
@@ -398,22 +421,24 @@
// Delete the directory, since there was a problem saving the RAM contents
getFSDirectory(true);
} finally {
- try {
- if (writer != null) {
+ if (writer != null) {
+ try {
writer.close();
+ } catch (IOException ex) {
+ mLogger.warn("Unable to close IndexWriter.", ex);
}
- } catch (IOException e1) {
- mLogger.warn("Unable to close IndexWriter.");
}
}
}
};
}
+ @Override
public void release() {
// no-op
}
+ @Override
public void shutdown() {
if (useRAMIndex) {
scheduleIndexOperation(getSaveIndexOperation());
@@ -421,12 +446,12 @@
indexConsistencyMarker.delete();
}
- try {
- if (reader != null) {
+ if (reader != null) {
+ try {
reader.close();
+ } catch (IOException ex) {
+ mLogger.error("Unable to close reader.", ex);
}
- } catch (IOException e) {
- // won't happen, since it was
}
}
diff --git a/app/src/main/java/org/apache/roller/weblogger/business/search/operations/IndexOperation.java b/app/src/main/java/org/apache/roller/weblogger/business/search/operations/IndexOperation.java
index a851327..22d8d7c 100644
--- a/app/src/main/java/org/apache/roller/weblogger/business/search/operations/IndexOperation.java
+++ b/app/src/main/java/org/apache/roller/weblogger/business/search/operations/IndexOperation.java
@@ -26,10 +26,12 @@
import org.apache.lucene.analysis.miscellaneous.LimitTokenCountAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
+import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.util.BytesRef;
import org.apache.roller.weblogger.business.search.FieldConstants;
import org.apache.roller.weblogger.business.search.IndexManagerImpl;
import org.apache.roller.weblogger.config.WebloggerConfig;
@@ -135,8 +137,8 @@
// keyword
if (data.getPubTime() != null) {
- doc.add(new StringField(FieldConstants.PUBLISHED, data.getPubTime()
- .toString(), Field.Store.YES));
+ // SearchOperation sorts results by date
+ doc.add(new SortedDocValuesField(FieldConstants.PUBLISHED, new BytesRef(data.getPubTime().toString())));
}
// index Category, needs to be in lower case as it is used in a term
@@ -173,8 +175,7 @@
IndexManagerImpl.getAnalyzer(),
WebloggerConfig.getIntProperty("lucene.analyzer.maxTokenCount"));
- IndexWriterConfig config = new IndexWriterConfig(
- FieldConstants.LUCENE_VERSION, analyzer);
+ IndexWriterConfig config = new IndexWriterConfig(analyzer);
writer = new IndexWriter(manager.getIndexDirectory(), config);
@@ -201,6 +202,7 @@
/**
* @see java.lang.Runnable#run()
*/
+ @Override
public void run() {
doRun();
}
diff --git a/app/src/main/java/org/apache/roller/weblogger/business/search/operations/ReadFromIndexOperation.java b/app/src/main/java/org/apache/roller/weblogger/business/search/operations/ReadFromIndexOperation.java
index 787fb4f..e45ac3b 100644
--- a/app/src/main/java/org/apache/roller/weblogger/business/search/operations/ReadFromIndexOperation.java
+++ b/app/src/main/java/org/apache/roller/weblogger/business/search/operations/ReadFromIndexOperation.java
@@ -32,13 +32,14 @@
private static Log mLogger = LogFactory.getFactory().getInstance(
ReadFromIndexOperation.class);
+ @Override
public final void run() {
try {
manager.getReadWriteLock().readLock().lock();
doRun();
} catch (Exception e) {
- mLogger.info("Error acquiring read lock on index", e);
+ mLogger.error("Error acquiring read lock on index", e);
} finally {
manager.getReadWriteLock().readLock().unlock();
}
diff --git a/app/src/main/java/org/apache/roller/weblogger/business/search/operations/SearchOperation.java b/app/src/main/java/org/apache/roller/weblogger/business/search/operations/SearchOperation.java
index 73dc2e6..165313d 100644
--- a/app/src/main/java/org/apache/roller/weblogger/business/search/operations/SearchOperation.java
+++ b/app/src/main/java/org/apache/roller/weblogger/business/search/operations/SearchOperation.java
@@ -52,11 +52,11 @@
private static Log mLogger = LogFactory.getFactory().getInstance(
SearchOperation.class);
- private static String[] SEARCH_FIELDS = new String[] {
+ private static final String[] SEARCH_FIELDS = new String[] {
FieldConstants.CONTENT, FieldConstants.TITLE,
FieldConstants.C_CONTENT };
- private static Sort SORTER = new Sort(new SortField(
+ private static final Sort SORTER = new Sort(new SortField(
FieldConstants.PUBLISHED, SortField.Type.STRING, true));
// ~ Instance fields
@@ -95,6 +95,7 @@
*
* @see java.lang.Runnable#run()
*/
+ @Override
public void doRun() {
final int docLimit = 500;
searchresults = null;
@@ -105,8 +106,7 @@
searcher = new IndexSearcher(reader);
MultiFieldQueryParser multiParser = new MultiFieldQueryParser(
- FieldConstants.LUCENE_VERSION, SEARCH_FIELDS,
- IndexManagerImpl.getAnalyzer());
+ SEARCH_FIELDS, IndexManagerImpl.getAnalyzer());
// Make it an AND by default. Comment this out for an or (default)
multiParser.setDefaultOperator(MultiFieldQueryParser.Operator.AND);
@@ -118,32 +118,30 @@
websiteHandle);
if (tUsername != null) {
- BooleanQuery bQuery = new BooleanQuery();
- bQuery.add(query, BooleanClause.Occur.MUST);
- bQuery.add(new TermQuery(tUsername), BooleanClause.Occur.MUST);
- query = bQuery;
+ query = new BooleanQuery.Builder()
+ .add(query, BooleanClause.Occur.MUST)
+ .add(new TermQuery(tUsername), BooleanClause.Occur.MUST)
+ .build();
}
if (category != null) {
Term tCategory = new Term(FieldConstants.CATEGORY, category.toLowerCase());
- BooleanQuery bQuery = new BooleanQuery();
- bQuery.add(query, BooleanClause.Occur.MUST);
- bQuery.add(new TermQuery(tCategory), BooleanClause.Occur.MUST);
- query = bQuery;
+ query = new BooleanQuery.Builder()
+ .add(query, BooleanClause.Occur.MUST)
+ .add(new TermQuery(tCategory), BooleanClause.Occur.MUST)
+ .build();
}
- Term tLocale = IndexUtil.getTerm(FieldConstants.LOCALE,
- locale);
+ Term tLocale = IndexUtil.getTerm(FieldConstants.LOCALE, locale);
if (tLocale != null) {
- BooleanQuery bQuery = new BooleanQuery();
- bQuery.add(query, BooleanClause.Occur.MUST);
- bQuery.add(new TermQuery(tLocale), BooleanClause.Occur.MUST);
- query = bQuery;
+ query = new BooleanQuery.Builder()
+ .add(query, BooleanClause.Occur.MUST)
+ .add(new TermQuery(tLocale), BooleanClause.Occur.MUST)
+ .build();
}
- searchresults = searcher.search(query, null/* Filter */, docLimit,
- SORTER);
+ searchresults = searcher.search(query, docLimit, SORTER);
} catch (IOException e) {
mLogger.error("Error searching index", e);
@@ -193,7 +191,7 @@
if (searchresults == null) {
return -1;
}
- return searchresults.totalHits;
+ return (int) searchresults.totalHits.value;
}
/**