blob: 2b0240bc83227c7693f1b31dd4e972a5cd157fa7 [file] [log] [blame]
package org.apache.lucene.ant;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.File;
import java.io.IOException;
import java.text.ParseException;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.Vector;
import java.lang.reflect.Constructor;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.analysis.StopAnalyzer;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LogMergePolicy;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.apache.tools.ant.BuildException;
import org.apache.tools.ant.DynamicConfigurator;
import org.apache.tools.ant.Project;
import org.apache.tools.ant.Task;
import org.apache.tools.ant.types.EnumeratedAttribute;
import org.apache.tools.ant.types.FileSet;
import org.apache.tools.ant.types.Resource;
import org.apache.tools.ant.types.ResourceCollection;
import org.apache.tools.ant.types.resources.FileResource;
/**
* Ant task to index files with Lucene
*
*/
public class IndexTask extends Task {
/**
* resources
*/
protected Vector<ResourceCollection> rcs = new Vector<ResourceCollection>();
/**
* overwrite index?
*/
private boolean overwrite = false;
/**
* index path
*/
private File indexDir;
/**
* document handler classname
*/
private String handlerClassName =
FileExtensionDocumentHandler.class.getName();
/**
* document handler instance
*/
private DocumentHandler handler;
/**
*
*/
private String analyzerClassName =
StandardAnalyzer.class.getName();
/**
* analyzer instance
*/
private Analyzer analyzer;
/**
* Lucene merge factor
*/
private int mergeFactor = 20;
private HandlerConfig handlerConfig;
private boolean useCompoundIndex = true;
/**
* Creates new instance
*/
public IndexTask() {
}
/**
* Specifies the directory where the index will be stored
*/
public void setIndex(File indexDir) {
this.indexDir = indexDir;
}
/**
* Sets the mergeFactor attribute of the IndexTask object
*
*@param mergeFactor The new mergeFactor value
*/
public void setMergeFactor(int mergeFactor) {
this.mergeFactor = mergeFactor;
}
/**
* Sets the overwrite attribute of the IndexTask object
*
*@param overwrite The new overwrite value
*/
public void setOverwrite(boolean overwrite) {
this.overwrite = overwrite;
}
/**
* If creating a new index and this is set to true, the
* index will be created in compound format.
*/
public void setUseCompoundIndex(boolean useCompoundIndex) {
this.useCompoundIndex = useCompoundIndex;
}
/**
* Sets the documentHandler attribute of the IndexTask object
*
*@param classname The new documentHandler value
*/
public void setDocumentHandler(String classname) {
handlerClassName = classname;
}
/**
* Sets the analyzer based on the builtin Lucene analyzer types.
*
* TODO: Enforce analyzer and analyzerClassName to be mutually exclusive
*/
public void setAnalyzer(AnalyzerType type) {
analyzerClassName = type.getClassname();
}
public void setAnalyzerClassName(String classname) {
analyzerClassName = classname;
}
/**
* Adds a set of files (nested fileset attribute).
*
*@param set FileSet to be added
*/
public void addFileset(FileSet set) {
add(set);
}
/**
* Add a collection of files to copy.
* @param res a resource collection to copy.
* @since Ant 1.7
*/
public void add(ResourceCollection res) {
rcs.add(res);
}
/**
* Sets custom properties for a configurable document handler.
*/
public void addConfig(HandlerConfig config) throws BuildException {
if (handlerConfig != null) {
throw new BuildException("Only one config element allowed");
}
handlerConfig = config;
}
private static final Analyzer createAnalyzer(String className) throws Exception{
final Class<? extends Analyzer> clazz = Class.forName(className).asSubclass(Analyzer.class);
try {
// first try to use a ctor with version parameter (needed for many new Analyzers that have no default one anymore
Constructor<? extends Analyzer> cnstr = clazz.getConstructor(Version.class);
return cnstr.newInstance(Version.LUCENE_CURRENT);
} catch (NoSuchMethodException nsme) {
// otherwise use default ctor
return clazz.newInstance();
}
}
/**
* Begins the indexing
*
*@exception BuildException If an error occurs indexing the
* fileset
*/
@Override
public void execute() throws BuildException {
// construct handler and analyzer dynamically
try {
handler = Class.forName(handlerClassName).asSubclass(DocumentHandler.class).newInstance();
analyzer = IndexTask.createAnalyzer(analyzerClassName);
} catch (Exception e) {
throw new BuildException(e);
}
log("Document handler = " + handler.getClass(), Project.MSG_VERBOSE);
log("Analyzer = " + analyzer.getClass(), Project.MSG_VERBOSE);
if (handler instanceof ConfigurableDocumentHandler) {
((ConfigurableDocumentHandler) handler).configure(handlerConfig.getProperties());
}
try {
indexDocs();
} catch (IOException e) {
throw new BuildException(e);
}
}
/**
* Index the fileset.
*
*@exception IOException if Lucene I/O exception
*TODO: refactor!!!!!
*/
private void indexDocs() throws IOException {
Date start = new Date();
boolean create = overwrite;
// If the index directory doesn't exist,
// create it and force create mode
if (indexDir.mkdirs() && !overwrite) {
create = true;
}
FSDirectory dir = FSDirectory.open(indexDir);
try {
Searcher searcher = null;
boolean checkLastModified = false;
if (!create) {
try {
searcher = new IndexSearcher(dir, true);
checkLastModified = true;
} catch (IOException ioe) {
log("IOException: " + ioe.getMessage());
// Empty - ignore, which indicates to index all
// documents
}
}
log("checkLastModified = " + checkLastModified, Project.MSG_VERBOSE);
IndexWriterConfig conf = new IndexWriterConfig(
Version.LUCENE_CURRENT, analyzer).setOpenMode(
create ? OpenMode.CREATE : OpenMode.APPEND);
LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy();
lmp.setUseCompoundFile(useCompoundIndex);
lmp.setUseCompoundDocStore(useCompoundIndex);
lmp.setMergeFactor(mergeFactor);
IndexWriter writer = new IndexWriter(dir, conf);
int totalFiles = 0;
int totalIndexed = 0;
int totalIgnored = 0;
try {
for (int i = 0; i < rcs.size(); i++) {
ResourceCollection rc = rcs.elementAt(i);
if (rc.isFilesystemOnly()) {
Iterator resources = rc.iterator();
while (resources.hasNext()) {
Resource r = (Resource) resources.next();
if (!r.isExists() || !(r instanceof FileResource)) {
continue;
}
totalFiles++;
File file = ((FileResource) r).getFile();
if (!file.exists() || !file.canRead()) {
throw new BuildException("File \"" +
file.getAbsolutePath()
+ "\" does not exist or is not readable.");
}
boolean indexIt = true;
if (checkLastModified) {
Term pathTerm =
new Term("path", file.getPath());
TermQuery query =
new TermQuery(pathTerm);
ScoreDoc[] hits = searcher.search(query, null, 1).scoreDocs;
// if document is found, compare the
// indexed last modified time with the
// current file
// - don't index if up to date
if (hits.length > 0) {
Document doc = searcher.doc(hits[0].doc);
String indexModified =
doc.get("modified").trim();
if (indexModified != null) {
long lastModified = 0;
try {
lastModified = DateTools.stringToTime(indexModified);
} catch (ParseException e) {
// if modified time is not parsable, skip
}
if (lastModified == file.lastModified()) {
// TODO: remove existing document
indexIt = false;
}
}
}
}
if (indexIt) {
try {
log("Indexing " + file.getPath(),
Project.MSG_VERBOSE);
Document doc =
handler.getDocument(file);
if (doc == null) {
totalIgnored++;
} else {
// Add the path of the file as a field named "path". Use a Keyword field, so
// that the index stores the path, and so that the path is searchable
doc.add(new Field("path", file.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED));
// Add the last modified date of the file a field named "modified". Use a
// Keyword field, so that it's searchable, but so that no attempt is made
// to tokenize the field into words.
doc.add(new Field("modified", DateTools.timeToString(file.lastModified(), DateTools.Resolution.MILLISECOND), Field.Store.YES, Field.Index.NOT_ANALYZED));
writer.addDocument(doc);
totalIndexed++;
}
} catch (DocumentHandlerException e) {
throw new BuildException(e);
}
}
}
// for j
}
// if (fs != null)
}
// for i
writer.optimize();
}
//try
finally {
// always make sure everything gets closed,
// no matter how we exit.
writer.close();
if (searcher != null) {
searcher.close();
}
}
Date end = new Date();
log(totalIndexed + " out of " + totalFiles + " indexed (" +
totalIgnored + " ignored) in " + (end.getTime() - start.getTime()) +
" milliseconds");
} finally {
dir.close();
}
}
public static class HandlerConfig implements DynamicConfigurator {
Properties props = new Properties();
public void setDynamicAttribute(String attributeName, String value) throws BuildException {
props.setProperty(attributeName, value);
}
public Object createDynamicElement(String elementName) throws BuildException {
throw new BuildException("Sub elements not supported");
}
public Properties getProperties() {
return props;
}
}
public static class AnalyzerType extends EnumeratedAttribute {
private static Map<String,String> analyzerLookup = new HashMap<String,String>();
static {
analyzerLookup.put("simple", SimpleAnalyzer.class.getName());
analyzerLookup.put("standard", StandardAnalyzer.class.getName());
analyzerLookup.put("stop", StopAnalyzer.class.getName());
analyzerLookup.put("whitespace", WhitespaceAnalyzer.class.getName());
}
/**
* @see EnumeratedAttribute#getValues
*/
@Override
public String[] getValues() {
Set<String> keys = analyzerLookup.keySet();
return keys.toArray(new String[0]);
}
public String getClassname() {
return analyzerLookup.get(getValue());
}
}
}