corpus-server-impl/src/main/java/org/apache/opennlp/corpus_server/impl/LuceneSearchService.java - opennlp-sandbox - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License. You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.opennlp.corpus_server.impl;

 import java.io.ByteArrayInputStream;
 import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Properties;
 import java.util.logging.Level;
 import java.util.logging.Logger;

 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.queryParser.ParseException;
 import org.apache.lucene.queryParser.QueryParser;
 import org.apache.lucene.search.Collector;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.Scorer;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.util.Version;
 import org.apache.opennlp.corpus_server.search.SearchService;
 import org.apache.opennlp.corpus_server.store.CorporaStore;
 import org.apache.opennlp.corpus_server.store.CorpusStore;
 import org.apache.uima.UIMAFramework;
 import org.apache.uima.analysis_engine.AnalysisEngine;
 import org.apache.uima.analysis_engine.AnalysisEngineDescription;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.cas.CAS;
 import org.apache.uima.cas.Feature;
 import org.apache.uima.cas.FeatureStructure;
 import org.apache.uima.cas.Type;
 import org.apache.uima.lucas.indexer.IndexWriterProviderImpl;
 import org.apache.uima.resource.FileResourceSpecifier;
 import org.apache.uima.resource.ResourceInitializationException;
 import org.apache.uima.resource.impl.FileResourceSpecifier_impl;
 import org.apache.uima.resource.metadata.MetaDataObject;
 import org.apache.uima.resource.metadata.TypeSystemDescription;
 import org.apache.uima.util.CasCreationUtils;
 import org.apache.uima.util.InvalidXMLException;
 import org.apache.uima.util.XMLInputSource;

 public class LuceneSearchService implements SearchService {

   final static String LUCENE_ID_FIELD = "id";

   private final static Logger LOGGER = Logger.getLogger(
       LuceneSearchService.class .getName());

   private CorporaStore store;

   /**
    * Maps the corpus id to the Lucas Indexer Analysis Engine.
    */
   private Map<String, AnalysisEngine> corpusIndexerMap = new HashMap<String, AnalysisEngine>();

   /**
    * Maps the corpus id to the Index Server instance, if one exists, otherwise
    * it will be created on first access.
    */
   private Map<String, IndexSearcher> corpusSearcherMap = new HashMap<String, IndexSearcher>();

   private static File getIndexDirectory(String corpusId) {
     return new File("index" + File.separator + corpusId);
   }

   private void createIndexWriter(String corpusId, boolean createIndex) throws IOException {

     // Set the index mapping file for this corpus in the analysis engine descriptor
     CorpusStore corpusStore = store.getCorpus(corpusId);

     XMLInputSource in = new XMLInputSource(LuceneSearchService.class.getResourceAsStream(
         "/org/apache/opennlp/corpus_server/impl/LuceneIndexer.xml"), new File(""));

     try {
       AnalysisEngineDescription specifier;
       specifier = (AnalysisEngineDescription) UIMAFramework.getXMLParser().parseResourceSpecifier(in);

       // TODO: How to store mapping file? Should be transmitted during corpus creation ...

       File mappingTmpFile = File.createTempFile("lucas-mapping", corpusId + ".xml");
       mappingTmpFile.deleteOnExit();

       InputStream mappingFileIn = new ByteArrayInputStream(corpusStore.getIndexMapping());
       OutputStream mappingTmpOut = null;

       try {
         mappingTmpOut = new FileOutputStream(mappingTmpFile);

         byte buffer[] = new byte[1024];
         int len = 0;
         while ((len = mappingFileIn.read(buffer)) > 0) {
           mappingTmpOut.write(buffer, 0, len);
         }
       }
       catch (IOException e) {
         // TODO: Or just ignore it ?! and do not create the indexer for this corpus?!
         throw e;
       }
       finally {
         if (mappingFileIn != null) {
           try {
             mappingFileIn.close();
           }
           catch (IOException e) {}
         }

         if (mappingTmpOut != null) {
           try {
             mappingTmpOut.close();
           }
           catch (IOException e) {}
         }
       }

       specifier.getAnalysisEngineMetaData().
       getConfigurationParameterSettings().setParameterValue("mappingFile",
           mappingTmpFile.getAbsolutePath());

       // Set the index writer properties file in the analysis engine
       // and replace the index path with the index location for this corpus

       Properties indexWriterProperties = new Properties();

       InputStream indexWriterPropertiesIn = null;
       try {
         // TODO: Retrieve file form somewhere for this corpus
         indexWriterPropertiesIn = LuceneSearchService.class.getResourceAsStream(
             "/org/apache/opennlp/corpus_server/impl/IndexWriter.properties");

         indexWriterProperties.load(indexWriterPropertiesIn);
       }
       finally {
         if (indexWriterPropertiesIn != null) {
           try {
             indexWriterPropertiesIn.close();
           }
           catch (IOException e) {}
         }
       }

       indexWriterProperties.setProperty(IndexWriterProviderImpl.INDEX_PATH_PROPERTY,
           getIndexDirectory(corpusId).getAbsolutePath());

       indexWriterProperties.setProperty(IndexWriterProviderImpl.CREATE_INDEX_PROPERTY,
           Boolean.toString(createIndex));

       File indexWriterTmpFile = File.createTempFile("index-writer", corpusId + ".properties");
       indexWriterTmpFile.deleteOnExit();

       OutputStream indexPropertiesOut = null;
       try {
         indexPropertiesOut = new FileOutputStream(indexWriterTmpFile);
         // write properties into a tmp file
         indexWriterProperties.store(indexPropertiesOut, null);
       }
       finally {
         if (indexPropertiesOut != null) {
           try {
             indexPropertiesOut.close();
           }
           catch (IOException e) {}
         }
       }

       FileResourceSpecifier indexWriterFileSpecifier = new FileResourceSpecifier_impl();
       indexWriterFileSpecifier.setFileUrl(indexWriterTmpFile.toURL().toString());
       // TODO: This will fail ...
       specifier.getResourceManagerConfiguration().getExternalResources()[0].setResourceSpecifier(indexWriterFileSpecifier);

       AnalysisEngine indexer = UIMAFramework.produceAnalysisEngine(specifier);
       corpusIndexerMap.put(corpusId, indexer);
     } catch (InvalidXMLException e) {
       throw new IOException(e);
     } catch (ResourceInitializationException e) {
       throw new IOException(e);
     }
   }

   @Override
   public synchronized void initialize(CorporaStore store) throws IOException {

     this.store = store;

     for (String corpusId : store.getCorpusIds()) {
       try {
         createIndexWriter(corpusId, false);
         LOGGER.info("Created Index Writer for " + corpusId + "corpus.");
       }
       catch (IOException e) {
         LOGGER.warning("Failed to open Index Writer for " + corpusId + "corpus.");
       }
     }
   }

   @Override
   public synchronized void createIndex(CorpusStore store) throws IOException {
     createIndexWriter(store.getCorpusId(), true);
     LOGGER.info("Created Index Writer for " + store.getCorpusId() + " corpus.");
   }

   public synchronized void dropIndex(CorpusStore store) throws IOException {

   }

   @Override
   public synchronized void index(CorpusStore store, String casId) throws IOException {

     // TODO: Need to take care for thread safety ..

     String corpusId = store.getCorpusId();

     AnalysisEngine indexer = corpusIndexerMap.get(corpusId);

     InputStream indexTsIn = LuceneSearchService.class.getResourceAsStream(
         "/org/apache/opennlp/corpus_server/impl/TypeSystem.xml");

     TypeSystemDescription indexTypeDesc;
     try {
       indexTypeDesc = UimaUtil.createTypeSystemDescription(indexTsIn);
     }
     finally {
       indexTsIn.close();
     }

     List<MetaDataObject> specs = new ArrayList<MetaDataObject>();
     specs.add(indexTypeDesc);
     TypeSystemDescription tsDescription = UimaUtil.createTypeSystemDescription(
           new ByteArrayInputStream(store.getTypeSystem()));
     specs.add(tsDescription);

     // Note: This might be a performance problem
     CAS cas;
     try {
       cas = CasCreationUtils.createCas(specs);
     } catch (ResourceInitializationException e) {
       throw new IOException(e);
     }

     byte[] casBytes = store.getCAS(casId);

     if (casBytes != null) {
       UimaUtil.deserializeXmiCAS(cas, new ByteArrayInputStream(casBytes));
     }
     else {
       cas.setDocumentText(null);
     }

     // Inject id feature structure into the CAS
     Type casIdType = cas.getTypeSystem().getType(LuceneIndexer.CAS_ID_TYPE);
     Feature casIdFeature =  casIdType.getFeatureByBaseName(LuceneIndexer.CAS_ID_FEEATURE);

     FeatureStructure casIdFS = cas.createFS(casIdType);
     casIdFS.setStringValue(casIdFeature, casId);
     cas.addFsToIndexes(casIdFS);

     try {
       indexer.process(cas);
     } catch (AnalysisEngineProcessException e) {
       LOGGER.log(Level.SEVERE, "Failed to index CAS: " + casId, e);
     }
   }

   @Override
   public void removeFromIndex(CorpusStore store, String casId)
       throws IOException {
     index(store, casId);
   }

   @Override
   public synchronized List<String> search(CorpusStore store, String q)
       throws IOException {

     // PERFORMANCE: This method can only be executed by one thread at a time
     //              when there are concurrent search requests this will result
     //              in longer than necessary delays to answer them.

     IndexSearcher searcher = corpusSearcherMap.get(store.getCorpusId());

     // Opening or reopening an index might fail,
     // in this case every search request fails as well.
     if (searcher == null) {
       File indexLocation = getIndexDirectory(store.getCorpusId());

       Directory indexDirectory = FSDirectory.open(indexLocation);

       IndexReader indexReader = IndexReader.open(indexDirectory, false);

       // Note: Reopening index for every request is slow,
       // modify code again to keep indexes open!

       searcher = new IndexSearcher(indexReader);

       corpusSearcherMap.put(store.getCorpusId(), searcher);
     }

     if (!searcher.getIndexReader().isCurrent()) {
       IndexReader freshIndexReader = searcher.getIndexReader().reopen();

       searcher.close();

       searcher = new IndexSearcher(freshIndexReader);
       corpusSearcherMap.put(store.getCorpusId(), searcher);
     }

     QueryParser parser = new QueryParser(Version.LUCENE_29, "text", new StandardAnalyzer(Version.LUCENE_29));

     Query query;
     try {
       query = parser.parse(q);
     } catch (ParseException e) {
       throw new IOException(e);
     }

     final List<String> results = new ArrayList<String>();


     final IndexSearcher finalSearcher = searcher;

     // query index ...
     searcher.search(query, new Collector() {

       int docBase = Integer.MIN_VALUE;

       @Override
       public void setScorer(Scorer scorer) throws IOException {
       }

       @Override
       public void setNextReader(IndexReader reader, int docBase) throws IOException {
         this.docBase = docBase;
       }

       @Override
       public void collect(int id) throws IOException {
         Document doc = finalSearcher.doc(docBase + id);
         String idString = doc.get(LUCENE_ID_FIELD);
         results.add(idString);
       }

       @Override
       public boolean acceptsDocsOutOfOrder() {
         return false;
       }
     });

     searcher.close();

     return results;
   }

   @Override
   public void shutdown() throws IOException {

     for (String corpusId : corpusIndexerMap.keySet()) {
       AnalysisEngine indexer = corpusIndexerMap.get(corpusId);

       if (indexer != null) {
         indexer.destroy();
       }
     }

     for (String corpusId : corpusSearcherMap.keySet()) {
       IndexSearcher searcher = corpusSearcherMap.get(corpusId);

       if (searcher != null) {
         try {
           searcher.close();
         }
         catch (IOException e) {
           LOGGER.log(Level.SEVERE, "Failed to shutdown searcher for "
               + corpusId + " corpus!", e);
         }
       }
     }
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package org.apache.opennlp.corpus_server.impl;

	import java.io.ByteArrayInputStream;
	import java.io.File;
	import java.io.FileOutputStream;
	import java.io.IOException;
	import java.io.InputStream;
	import java.io.OutputStream;
	import java.util.ArrayList;
	import java.util.HashMap;
	import java.util.List;
	import java.util.Map;
	import java.util.Properties;
	import java.util.logging.Level;
	import java.util.logging.Logger;

	import org.apache.lucene.analysis.standard.StandardAnalyzer;
	import org.apache.lucene.document.Document;
	import org.apache.lucene.index.IndexReader;
	import org.apache.lucene.queryParser.ParseException;
	import org.apache.lucene.queryParser.QueryParser;
	import org.apache.lucene.search.Collector;
	import org.apache.lucene.search.IndexSearcher;
	import org.apache.lucene.search.Query;
	import org.apache.lucene.search.Scorer;
	import org.apache.lucene.store.Directory;
	import org.apache.lucene.store.FSDirectory;
	import org.apache.lucene.util.Version;
	import org.apache.opennlp.corpus_server.search.SearchService;
	import org.apache.opennlp.corpus_server.store.CorporaStore;
	import org.apache.opennlp.corpus_server.store.CorpusStore;
	import org.apache.uima.UIMAFramework;
	import org.apache.uima.analysis_engine.AnalysisEngine;
	import org.apache.uima.analysis_engine.AnalysisEngineDescription;
	import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
	import org.apache.uima.cas.CAS;
	import org.apache.uima.cas.Feature;
	import org.apache.uima.cas.FeatureStructure;
	import org.apache.uima.cas.Type;
	import org.apache.uima.lucas.indexer.IndexWriterProviderImpl;
	import org.apache.uima.resource.FileResourceSpecifier;
	import org.apache.uima.resource.ResourceInitializationException;
	import org.apache.uima.resource.impl.FileResourceSpecifier_impl;
	import org.apache.uima.resource.metadata.MetaDataObject;
	import org.apache.uima.resource.metadata.TypeSystemDescription;
	import org.apache.uima.util.CasCreationUtils;
	import org.apache.uima.util.InvalidXMLException;
	import org.apache.uima.util.XMLInputSource;

	public class LuceneSearchService implements SearchService {

	final static String LUCENE_ID_FIELD = "id";

	private final static Logger LOGGER = Logger.getLogger(
	LuceneSearchService.class .getName());

	private CorporaStore store;

	/**
	* Maps the corpus id to the Lucas Indexer Analysis Engine.
	*/
	private Map<String, AnalysisEngine> corpusIndexerMap = new HashMap<String, AnalysisEngine>();

	/**
	* Maps the corpus id to the Index Server instance, if one exists, otherwise
	* it will be created on first access.
	*/
	private Map<String, IndexSearcher> corpusSearcherMap = new HashMap<String, IndexSearcher>();

	private static File getIndexDirectory(String corpusId) {
	return new File("index" + File.separator + corpusId);
	}

	private void createIndexWriter(String corpusId, boolean createIndex) throws IOException {

	// Set the index mapping file for this corpus in the analysis engine descriptor
	CorpusStore corpusStore = store.getCorpus(corpusId);

	XMLInputSource in = new XMLInputSource(LuceneSearchService.class.getResourceAsStream(
	"/org/apache/opennlp/corpus_server/impl/LuceneIndexer.xml"), new File(""));

	try {
	AnalysisEngineDescription specifier;
	specifier = (AnalysisEngineDescription) UIMAFramework.getXMLParser().parseResourceSpecifier(in);

	// TODO: How to store mapping file? Should be transmitted during corpus creation ...

	File mappingTmpFile = File.createTempFile("lucas-mapping", corpusId + ".xml");
	mappingTmpFile.deleteOnExit();

	InputStream mappingFileIn = new ByteArrayInputStream(corpusStore.getIndexMapping());
	OutputStream mappingTmpOut = null;

	try {
	mappingTmpOut = new FileOutputStream(mappingTmpFile);

	byte buffer[] = new byte[1024];
	int len = 0;
	while ((len = mappingFileIn.read(buffer)) > 0) {
	mappingTmpOut.write(buffer, 0, len);
	}
	}
	catch (IOException e) {
	// TODO: Or just ignore it ?! and do not create the indexer for this corpus?!
	throw e;
	}
	finally {
	if (mappingFileIn != null) {
	try {
	mappingFileIn.close();
	}
	catch (IOException e) {}
	}

	if (mappingTmpOut != null) {
	try {
	mappingTmpOut.close();
	}
	catch (IOException e) {}
	}
	}

	specifier.getAnalysisEngineMetaData().
	getConfigurationParameterSettings().setParameterValue("mappingFile",
	mappingTmpFile.getAbsolutePath());

	// Set the index writer properties file in the analysis engine
	// and replace the index path with the index location for this corpus

	Properties indexWriterProperties = new Properties();

	InputStream indexWriterPropertiesIn = null;
	try {
	// TODO: Retrieve file form somewhere for this corpus
	indexWriterPropertiesIn = LuceneSearchService.class.getResourceAsStream(
	"/org/apache/opennlp/corpus_server/impl/IndexWriter.properties");

	indexWriterProperties.load(indexWriterPropertiesIn);
	}
	finally {
	if (indexWriterPropertiesIn != null) {
	try {
	indexWriterPropertiesIn.close();
	}
	catch (IOException e) {}
	}
	}

	indexWriterProperties.setProperty(IndexWriterProviderImpl.INDEX_PATH_PROPERTY,
	getIndexDirectory(corpusId).getAbsolutePath());

	indexWriterProperties.setProperty(IndexWriterProviderImpl.CREATE_INDEX_PROPERTY,
	Boolean.toString(createIndex));

	File indexWriterTmpFile = File.createTempFile("index-writer", corpusId + ".properties");
	indexWriterTmpFile.deleteOnExit();

	OutputStream indexPropertiesOut = null;
	try {
	indexPropertiesOut = new FileOutputStream(indexWriterTmpFile);
	// write properties into a tmp file
	indexWriterProperties.store(indexPropertiesOut, null);
	}
	finally {
	if (indexPropertiesOut != null) {
	try {
	indexPropertiesOut.close();
	}
	catch (IOException e) {}
	}
	}

	FileResourceSpecifier indexWriterFileSpecifier = new FileResourceSpecifier_impl();
	indexWriterFileSpecifier.setFileUrl(indexWriterTmpFile.toURL().toString());
	// TODO: This will fail ...
	specifier.getResourceManagerConfiguration().getExternalResources()[0].setResourceSpecifier(indexWriterFileSpecifier);

	AnalysisEngine indexer = UIMAFramework.produceAnalysisEngine(specifier);
	corpusIndexerMap.put(corpusId, indexer);
	} catch (InvalidXMLException e) {
	throw new IOException(e);
	} catch (ResourceInitializationException e) {
	throw new IOException(e);
	}
	}

	@Override
	public synchronized void initialize(CorporaStore store) throws IOException {

	this.store = store;

	for (String corpusId : store.getCorpusIds()) {
	try {
	createIndexWriter(corpusId, false);
	LOGGER.info("Created Index Writer for " + corpusId + "corpus.");
	}
	catch (IOException e) {
	LOGGER.warning("Failed to open Index Writer for " + corpusId + "corpus.");
	}
	}
	}

	@Override
	public synchronized void createIndex(CorpusStore store) throws IOException {
	createIndexWriter(store.getCorpusId(), true);
	LOGGER.info("Created Index Writer for " + store.getCorpusId() + " corpus.");
	}

	public synchronized void dropIndex(CorpusStore store) throws IOException {

	}

	@Override
	public synchronized void index(CorpusStore store, String casId) throws IOException {

	// TODO: Need to take care for thread safety ..

	String corpusId = store.getCorpusId();

	AnalysisEngine indexer = corpusIndexerMap.get(corpusId);

	InputStream indexTsIn = LuceneSearchService.class.getResourceAsStream(
	"/org/apache/opennlp/corpus_server/impl/TypeSystem.xml");

	TypeSystemDescription indexTypeDesc;
	try {
	indexTypeDesc = UimaUtil.createTypeSystemDescription(indexTsIn);
	}
	finally {
	indexTsIn.close();
	}

	List<MetaDataObject> specs = new ArrayList<MetaDataObject>();
	specs.add(indexTypeDesc);
	TypeSystemDescription tsDescription = UimaUtil.createTypeSystemDescription(
	new ByteArrayInputStream(store.getTypeSystem()));
	specs.add(tsDescription);

	// Note: This might be a performance problem
	CAS cas;
	try {
	cas = CasCreationUtils.createCas(specs);
	} catch (ResourceInitializationException e) {
	throw new IOException(e);
	}

	byte[] casBytes = store.getCAS(casId);

	if (casBytes != null) {
	UimaUtil.deserializeXmiCAS(cas, new ByteArrayInputStream(casBytes));
	}
	else {
	cas.setDocumentText(null);
	}

	// Inject id feature structure into the CAS
	Type casIdType = cas.getTypeSystem().getType(LuceneIndexer.CAS_ID_TYPE);
	Feature casIdFeature = casIdType.getFeatureByBaseName(LuceneIndexer.CAS_ID_FEEATURE);

	FeatureStructure casIdFS = cas.createFS(casIdType);
	casIdFS.setStringValue(casIdFeature, casId);
	cas.addFsToIndexes(casIdFS);

	try {
	indexer.process(cas);
	} catch (AnalysisEngineProcessException e) {
	LOGGER.log(Level.SEVERE, "Failed to index CAS: " + casId, e);
	}
	}

	@Override
	public void removeFromIndex(CorpusStore store, String casId)
	throws IOException {
	index(store, casId);
	}

	@Override
	public synchronized List<String> search(CorpusStore store, String q)
	throws IOException {

	// PERFORMANCE: This method can only be executed by one thread at a time
	// when there are concurrent search requests this will result
	// in longer than necessary delays to answer them.

	IndexSearcher searcher = corpusSearcherMap.get(store.getCorpusId());

	// Opening or reopening an index might fail,
	// in this case every search request fails as well.
	if (searcher == null) {
	File indexLocation = getIndexDirectory(store.getCorpusId());

	Directory indexDirectory = FSDirectory.open(indexLocation);

	IndexReader indexReader = IndexReader.open(indexDirectory, false);

	// Note: Reopening index for every request is slow,
	// modify code again to keep indexes open!

	searcher = new IndexSearcher(indexReader);

	corpusSearcherMap.put(store.getCorpusId(), searcher);
	}

	if (!searcher.getIndexReader().isCurrent()) {
	IndexReader freshIndexReader = searcher.getIndexReader().reopen();

	searcher.close();

	searcher = new IndexSearcher(freshIndexReader);
	corpusSearcherMap.put(store.getCorpusId(), searcher);
	}

	QueryParser parser = new QueryParser(Version.LUCENE_29, "text", new StandardAnalyzer(Version.LUCENE_29));

	Query query;
	try {
	query = parser.parse(q);
	} catch (ParseException e) {
	throw new IOException(e);
	}

	final List<String> results = new ArrayList<String>();


	final IndexSearcher finalSearcher = searcher;

	// query index ...
	searcher.search(query, new Collector() {

	int docBase = Integer.MIN_VALUE;

	@Override
	public void setScorer(Scorer scorer) throws IOException {
	}

	@Override
	public void setNextReader(IndexReader reader, int docBase) throws IOException {
	this.docBase = docBase;
	}

	@Override
	public void collect(int id) throws IOException {
	Document doc = finalSearcher.doc(docBase + id);
	String idString = doc.get(LUCENE_ID_FIELD);
	results.add(idString);
	}

	@Override
	public boolean acceptsDocsOutOfOrder() {
	return false;
	}
	});

	searcher.close();

	return results;
	}

	@Override
	public void shutdown() throws IOException {

	for (String corpusId : corpusIndexerMap.keySet()) {
	AnalysisEngine indexer = corpusIndexerMap.get(corpusId);

	if (indexer != null) {
	indexer.destroy();
	}
	}

	for (String corpusId : corpusSearcherMap.keySet()) {
	IndexSearcher searcher = corpusSearcherMap.get(corpusId);

	if (searcher != null) {
	try {
	searcher.close();
	}
	catch (IOException e) {
	LOGGER.log(Level.SEVERE, "Failed to shutdown searcher for "
	+ corpusId + " corpus!", e);
	}
	}
	}
	}
	}