Merged 1.x-lucene into master
diff --git a/app/fmbrowser/pom.xml b/app/fmbrowser/pom.xml
index a3a2ce6..f266b2e 100644
--- a/app/fmbrowser/pom.xml
+++ b/app/fmbrowser/pom.xml
@@ -67,6 +67,7 @@
<artifactId>poi</artifactId>
</dependency>
+
</dependencies>
<build>
<plugins />
diff --git a/app/fmbrowser/src/main/java/org/apache/oodt/cas/filemgr/browser/model/QueryBuilder.java b/app/fmbrowser/src/main/java/org/apache/oodt/cas/filemgr/browser/model/QueryBuilder.java
index 10f2273..3c08acb 100644
--- a/app/fmbrowser/src/main/java/org/apache/oodt/cas/filemgr/browser/model/QueryBuilder.java
+++ b/app/fmbrowser/src/main/java/org/apache/oodt/cas/filemgr/browser/model/QueryBuilder.java
@@ -18,17 +18,15 @@
package org.apache.oodt.cas.filemgr.browser.model;
import org.apache.lucene.index.Term;
-import org.apache.lucene.queryParser.QueryParser;
-import org.apache.lucene.search.BooleanClause;
-import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.PhraseQuery;
-import org.apache.lucene.search.RangeQuery;
-import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.queryparser.classic.QueryParser;
+import org.apache.lucene.search.*;
+import org.apache.lucene.util.BytesRef;
import org.apache.oodt.cas.filemgr.structs.Query;
import org.apache.oodt.cas.filemgr.structs.RangeQueryCriteria;
import org.apache.oodt.cas.filemgr.structs.TermQueryCriteria;
import org.apache.oodt.cas.filemgr.tools.CASAnalyzer;
+import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
@@ -49,7 +47,7 @@
try {
luceneQ = parser.parse(query);
- } catch (org.apache.lucene.queryParser.ParseException e) {
+ } catch (org.apache.lucene.queryparser.classic.ParseException e) {
// TODO Auto-generated catch block
LOG.log(Level.SEVERE, e.getMessage());
}
@@ -81,17 +79,17 @@
}
}
}
- } else if (luceneQ instanceof RangeQuery) {
- Term startT = ((RangeQuery) luceneQ).getLowerTerm();
- Term endT = ((RangeQuery) luceneQ).getUpperTerm();
- String element = database.getElementID(startT.field());
- if (!element.equals("") && !startT.text().equals("")
- && !endT.text().equals("")) {
- casQ.addCriterion(new RangeQueryCriteria(element, startT.text(), endT
- .text()));
+ } else if (luceneQ instanceof TermRangeQuery) {
+ BytesRef startT = ((TermRangeQuery) luceneQ).getLowerTerm();
+ BytesRef endT = ((TermRangeQuery) luceneQ).getUpperTerm();
+ String element = database.getElementID(((TermRangeQuery) luceneQ).getField());
+ if (!element.equals("") && !startT.utf8ToString().equals("")
+ && !endT.utf8ToString().equals("")) {
+ casQ.addCriterion(new RangeQueryCriteria(element, startT.utf8ToString(), endT
+ .utf8ToString()));
}
} else if (luceneQ instanceof BooleanQuery) {
- BooleanClause[] clauses = ((BooleanQuery) luceneQ).getClauses();
+ List<BooleanClause> clauses = ((BooleanQuery) luceneQ).clauses();
for (BooleanClause clause : clauses) {
GenerateCASQuery(casQ, (clause).getQuery());
}
diff --git a/catalog/pom.xml b/catalog/pom.xml
index 72c538b..a957174 100644
--- a/catalog/pom.xml
+++ b/catalog/pom.xml
@@ -60,6 +60,21 @@
</exclusion>
</exclusions>
</dependency>
+
+ <dependency>
+ <groupId>org.apache.solr</groupId>
+ <artifactId>solr-solrj</artifactId>
+ <exclusions>
+ <exclusion>
+ <groupId>org.apache.lucene</groupId>
+ <artifactId>lucene-core</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.solr</groupId>
+ <artifactId>solr-lucene-core</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
<dependency>
<groupId>commons-httpclient</groupId>
<artifactId>commons-httpclient</artifactId>
diff --git a/core/pom.xml b/core/pom.xml
index 03f3de2..d3d006d 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -63,6 +63,11 @@
<dependencyManagement>
<dependencies>
<dependency>
+ <groupId>org.apache.solr</groupId>
+ <artifactId>solr-solrj</artifactId>
+ <version>1.3.0</version>
+ </dependency>
+ <dependency>
<groupId>com.amazonaws</groupId>
<artifactId>aws-java-sdk</artifactId>
<version>1.7.4</version>
@@ -275,9 +280,21 @@
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
- <version>2.0.0</version>
+ <version>6.1.0</version>
</dependency>
<dependency>
+ <groupId>org.apache.lucene</groupId>
+ <artifactId>lucene-queryparser</artifactId>
+ <version>6.1.0</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.lucene</groupId>
+ <artifactId>lucene-analyzers-common</artifactId>
+ <version>6.1.0</version>
+ </dependency>
+
+ <dependency>
<groupId>org.apache.oodt</groupId>
<artifactId>cas-cli</artifactId>
<version>${project.parent.version}</version>
@@ -380,11 +397,6 @@
<version>1.3.0</version>
</dependency>
<dependency>
- <groupId>org.apache.solr</groupId>
- <artifactId>solr-solrj</artifactId>
- <version>1.3.0</version>
- </dependency>
- <dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-core</artifactId>
<version>1.10</version>
diff --git a/crawler/pom.xml b/crawler/pom.xml
index 5428b53..1fa731e 100644
--- a/crawler/pom.xml
+++ b/crawler/pom.xml
@@ -48,6 +48,20 @@
-->
<dependencies>
<dependency>
+ <groupId>org.apache.solr</groupId>
+ <artifactId>solr-solrj</artifactId>
+ <exclusions>
+ <exclusion>
+ <groupId>org.apache.lucene</groupId>
+ <artifactId>lucene-core</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.solr</groupId>
+ <artifactId>solr-lucene-core</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
</dependency>
diff --git a/crawler/src/main/java/org/apache/oodt/cas/crawl/action/SolrIndexingAction.java b/crawler/src/main/java/org/apache/oodt/cas/crawl/action/SolrIndexingAction.java
index 20fcf90..5c4e59b 100644
--- a/crawler/src/main/java/org/apache/oodt/cas/crawl/action/SolrIndexingAction.java
+++ b/crawler/src/main/java/org/apache/oodt/cas/crawl/action/SolrIndexingAction.java
@@ -19,6 +19,7 @@
// JDK imports
import java.io.File;
+import java.io.IOException;
import java.util.concurrent.ConcurrentHashMap;
import java.util.Map;
import java.util.logging.Level;
@@ -27,6 +28,7 @@
import org.apache.oodt.cas.crawl.structs.exceptions.CrawlerActionException;
import org.apache.oodt.cas.filemgr.tools.SolrIndexer;
import org.apache.oodt.cas.metadata.Metadata;
+import org.apache.solr.client.solrj.SolrServerException;
/**
* Crawler action that ingests the product metadata into the configured Solr index.
@@ -49,17 +51,25 @@
@Override
public boolean performAction(File product, Metadata productMetadata) throws CrawlerActionException {
- try {
+ //try {
String productName = productMetadata.getMetadata("ProductName");
LOG.log(Level.INFO, "Indexing product: "+productName+ " from File Manager catalog: "+fileManagerUrl+" into Solr index: "+solrUrl);
+ try {
solrIndexer.indexProductByName(productName, true); // delete=true
- solrIndexer.commit(); // must commit:w
- return true; // success
-
- } catch(Exception e) {
+ } catch (SolrServerException e) {
throw new CrawlerActionException(e);
}
+ try {
+ solrIndexer.commit(); // must commit:w
+ } catch (SolrServerException | IOException e) {
+ throw new CrawlerActionException(e);
+ }
+ return true; // success
+
+ /*} catch(Exception e) {
+ throw new CrawlerActionException(e);
+ }*/
}
diff --git a/filemgr/pom.xml b/filemgr/pom.xml
index af9269a..cfa1327 100644
--- a/filemgr/pom.xml
+++ b/filemgr/pom.xml
@@ -109,6 +109,15 @@
<artifactId>lucene-core</artifactId>
</dependency>
<dependency>
+ <groupId>org.apache.lucene</groupId>
+ <artifactId>lucene-analyzers-common</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.lucene</groupId>
+ <artifactId>lucene-queryparser</artifactId>
+ </dependency>
+
+ <dependency>
<groupId>org.apache.oodt</groupId>
<artifactId>cas-cli</artifactId>
</dependency>
diff --git a/filemgr/src/main/java/org/apache/oodt/cas/filemgr/catalog/LuceneCatalog.java b/filemgr/src/main/java/org/apache/oodt/cas/filemgr/catalog/LuceneCatalog.java
index 26bb983..4c9ed96 100644
--- a/filemgr/src/main/java/org/apache/oodt/cas/filemgr/catalog/LuceneCatalog.java
+++ b/filemgr/src/main/java/org/apache/oodt/cas/filemgr/catalog/LuceneCatalog.java
@@ -17,44 +17,29 @@
package org.apache.oodt.cas.filemgr.catalog;
-//JDK imports
-
import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.search.BooleanClause;
-import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.Hits;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.RangeQuery;
-import org.apache.lucene.search.Sort;
-import org.apache.lucene.search.SortField;
-import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.search.WildcardQuery;
-import org.apache.oodt.cas.filemgr.structs.BooleanQueryCriteria;
-import org.apache.oodt.cas.filemgr.structs.Element;
-import org.apache.oodt.cas.filemgr.structs.Product;
-import org.apache.oodt.cas.filemgr.structs.ProductPage;
-import org.apache.oodt.cas.filemgr.structs.ProductType;
+import org.apache.lucene.document.*;
+import org.apache.lucene.index.*;
+import org.apache.lucene.search.*;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.Version;
+import org.apache.oodt.cas.filemgr.structs.*;
import org.apache.oodt.cas.filemgr.structs.Query;
-import org.apache.oodt.cas.filemgr.structs.QueryCriteria;
-import org.apache.oodt.cas.filemgr.structs.RangeQueryCriteria;
-import org.apache.oodt.cas.filemgr.structs.Reference;
-import org.apache.oodt.cas.filemgr.structs.TermQueryCriteria;
import org.apache.oodt.cas.filemgr.structs.exceptions.CatalogException;
import org.apache.oodt.cas.filemgr.structs.exceptions.ValidationLayerException;
import org.apache.oodt.cas.filemgr.validation.ValidationLayer;
import org.apache.oodt.cas.metadata.Metadata;
import org.apache.oodt.commons.pagination.PaginationUtils;
-
+import org.apache.poi.hssf.record.formula.functions.Text;
+import org.apache.solr.schema.FieldType;
import org.safehaus.uuid.UUID;
import org.safehaus.uuid.UUIDGenerator;
import java.io.File;
import java.io.IOException;
+import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.List;
@@ -63,10 +48,6 @@
import java.util.logging.Level;
import java.util.logging.Logger;
-//Lucene imports
-//OODT imports
-//JUG imports
-
/**
* @author mattmann
* @author bfoster
@@ -81,7 +62,9 @@
*
*/
public class LuceneCatalog implements Catalog {
+ Directory indexDir = null;
+ private DirectoryReader reader;
/* the path to the index directory for this catalog */
private String indexFilePath = null;
@@ -92,7 +75,7 @@
* temporary Cache of product/metadata/reference information before it is
* written to the index
*/
- private static ConcurrentHashMap<String, CompleteProduct> CATALOG_CACHE = new ConcurrentHashMap<String, CompleteProduct>();
+ private static ConcurrentHashMap<String, CompleteProduct> CATALOG_CACHE = new ConcurrentHashMap<>();
/* our product ID generator */
private static UUIDGenerator generator = UUIDGenerator.getInstance();
@@ -112,6 +95,8 @@
/* lucene index merge factor */
private int mergeFactor = -1;
+
+
/**
*
* @param idxFilePath
@@ -144,6 +129,17 @@
this.writeLockTimeout = writeTimeout;
this.commitLockTimeout = commitTimeout;
this.mergeFactor = mergeFactor;
+
+ try {
+ indexDir = FSDirectory.open(new File( indexFilePath ).toPath());
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+
+
+
+
+
}
/*
@@ -170,7 +166,7 @@
else{
// move product from index to cache
// it will be moved back after metadata is added
- CompleteProduct p = getCompleteProductById(product.getProductId(), true, true);
+ getCompleteProductById(product.getProductId(), true, true);
LOG.log(Level.FINE, "Product not found in local cache, retrieved from index");
removeProduct(product);
@@ -185,7 +181,7 @@
*/
public synchronized void removeMetadata(Metadata m, Product product)
throws CatalogException {
- CompleteProduct p=null;
+ CompleteProduct p;
if(product.getProductId()!=null && CATALOG_CACHE.containsKey(product.getProductId())) {
p = CATALOG_CACHE.get(product.getProductId());
@@ -350,7 +346,7 @@
else{
// move product from index to cache
// it will be moved back after metadata is added
- CompleteProduct p = getCompleteProductById(product.getProductId(), true, true);
+ getCompleteProductById(product.getProductId(), true, true);
LOG.log(Level.FINE, "Product not found in local cache, retrieved from index");
removeProduct(product);
@@ -387,19 +383,28 @@
boolean getRefs, boolean getMet) throws CatalogException {
IndexSearcher searcher = null;
try {
- searcher = new IndexSearcher(indexFilePath);
+ try {
+ reader = DirectoryReader.open(indexDir);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+
+ searcher = new IndexSearcher(reader);
Term productIdTerm = new Term("product_id", productId);
org.apache.lucene.search.Query query = new TermQuery(productIdTerm);
- Hits hits = searcher.search(query);
+ TopDocs topDocs = searcher.search(query,1);
+
+ ScoreDoc[] hits = topDocs.scoreDocs;
// should be exactly 1 hit
- if (hits.length() == 0) {
+ if (topDocs.totalHits == 0) {
throw new CatalogException("Product: [" + productId + "] NOT found in the catalog!");
- } else if (hits.length() > 1) {
+ }
+ if (topDocs.totalHits > 1) {
throw new CatalogException("Product: [" + productId+ "] is not unique in the catalog!");
}
- Document productDoc = hits.doc(0);
+ Document productDoc = searcher.doc(hits[0].doc);
return toCompleteProduct(productDoc, getRefs,
getMet);
} catch (IOException e) {
@@ -411,7 +416,7 @@
} finally {
if (searcher != null) {
try {
- searcher.close();
+ //TODO shutdown reader
} catch (Exception ignore) {
}
}
@@ -431,23 +436,37 @@
throws CatalogException {
IndexSearcher searcher = null;
try {
- searcher = new IndexSearcher(indexFilePath);
+ try {
+ reader = DirectoryReader.open(indexDir);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ searcher = new IndexSearcher(reader);
Term productIdTerm = new Term("product_name", productName);
org.apache.lucene.search.Query query = new TermQuery(productIdTerm);
Sort sort = new Sort(new SortField("CAS.ProductReceivedTime",
- SortField.STRING, true));
- Hits hits = searcher.search(query, sort);
+ SortField.Type.STRING, true));
+ //TODO FIX NUMBER OF RECORDS
+ TopDocs check = searcher.search(query, 1, sort);
+ if(check.totalHits>0) {
+ TopDocs topDocs = searcher.search(query, check.totalHits, sort);
- // should be > 0 hits
- if (hits.length() > 0) {
- // just get the first hit back
- Document productDoc = hits.doc(0);
- CompleteProduct prod = toCompleteProduct(productDoc, getRefs,
- false);
- return prod.getProduct();
- } else {
- LOG.log(Level.FINEST, "Request for product by name: ["
- + productName + "] returned no results");
+ ScoreDoc[] hits = topDocs.scoreDocs;
+
+ // should be > 0 hits
+ if (hits.length > 0) {
+ // just get the first hit back
+ Document productDoc = searcher.doc(hits[0].doc);
+ CompleteProduct prod = toCompleteProduct(productDoc, getRefs,
+ false);
+ return prod.getProduct();
+ } else {
+ LOG.log(Level.FINEST, "Request for product by name: ["
+ + productName + "] returned no results");
+ return null;
+ }
+ }
+ else{
return null;
}
@@ -460,7 +479,7 @@
} finally {
if (searcher != null) {
try {
- searcher.close();
+//TODO CLOSE SEARCHER
} catch (Exception ignore) {
}
}
@@ -495,18 +514,27 @@
List<Product> products = null;
try {
- searcher = new IndexSearcher(indexFilePath);
+ try {
+ reader = DirectoryReader.open(indexDir);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ searcher = new IndexSearcher(reader);
Term productIdTerm = new Term("myfield", "myvalue");
org.apache.lucene.search.Query query = new TermQuery(productIdTerm);
Sort sort = new Sort(new SortField("CAS.ProductReceivedTime",
- SortField.STRING, true));
- Hits hits = searcher.search(query, sort);
+ SortField.Type.STRING, true));
+ //TODO FIX NUMBER OF RECORDS
+ TopDocs check = searcher.search(query, 1, sort);
+ TopDocs topDocs = searcher.search(query, check.totalHits, sort);
+
+ ScoreDoc[] hits = topDocs.scoreDocs;
// should be > 0 hits
- if (hits.length() > 0) {
- products = new Vector<Product>(hits.length());
- for (int i = 0; i < hits.length(); i++) {
- Document productDoc = hits.doc(i);
+ if (hits.length > 0) {
+ products = new Vector<Product>(hits.length);
+ for (ScoreDoc hit : hits) {
+ Document productDoc = searcher.doc(hit.doc);
CompleteProduct prod = toCompleteProduct(productDoc,
getRefs, false);
products.add(prod.getProduct());
@@ -526,7 +554,7 @@
} finally {
if (searcher != null) {
try {
- searcher.close();
+ //TODO close searcher
} catch (Exception ignore) {
}
}
@@ -551,19 +579,28 @@
List<Product> products = null;
try {
- searcher = new IndexSearcher(indexFilePath);
+ try {
+ reader = DirectoryReader.open(indexDir);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ searcher = new IndexSearcher(reader);
Term productIdTerm = new Term("product_type_id", type
.getProductTypeId());
org.apache.lucene.search.Query query = new TermQuery(productIdTerm);
Sort sort = new Sort(new SortField("CAS.ProductReceivedTime",
- SortField.STRING, true));
- Hits hits = searcher.search(query, sort);
+ SortField.Type.STRING, true));
+ //TODO FIX NUMBER OF RECORDS
+ TopDocs check = searcher.search(query, 1, sort);
+ TopDocs topDocs = searcher.search(query, check.totalHits, sort);
+
+ ScoreDoc[] hits = topDocs.scoreDocs;
// should be > 0 hits
- if (hits.length() > 0) {
- products = new Vector<Product>(hits.length());
- for (int i = 0; i < hits.length(); i++) {
- Document productDoc = hits.doc(i);
+ if (hits.length > 0) {
+ products = new Vector<Product>(hits.length);
+ for (ScoreDoc hit : hits) {
+ Document productDoc = searcher.doc(hit.doc);
CompleteProduct prod = toCompleteProduct(productDoc,
getRefs, false);
products.add(prod.getProduct());
@@ -583,8 +620,8 @@
} finally {
if (searcher != null) {
try {
- searcher.close();
- } catch (Exception ignore) {
+//TODO CLOSE
+ } catch (Exception ignore) {
}
}
}
@@ -595,20 +632,31 @@
public Metadata getMetadata(Product product) throws CatalogException {
IndexSearcher searcher = null;
try {
- searcher = new IndexSearcher(indexFilePath);
+ try {
+ reader = DirectoryReader.open(indexDir);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ searcher = new IndexSearcher(reader);
+ TermQuery qry = new TermQuery(new Term("*", "*"));
+ TopDocs tdocks = searcher.search(qry, 100);
Term productIdTerm = new Term("product_id", product.getProductId());
org.apache.lucene.search.Query query = new TermQuery(productIdTerm);
- Hits hits = searcher.search(query);
+ //TODO FIX NUMBER OF RECORDS
+ TopDocs topDocs = searcher.search(query, 1);
+
+ ScoreDoc[] hits = topDocs.scoreDocs;
// should be exactly 1 hit
- if (hits.length() != 1) {
+ if (topDocs.totalHits != 1) {
throw new CatalogException("Product: ["
+ product.getProductId()
+ "] is not unique in the catalog! Num Hits: ["
- + hits.length() + "]");
+ + hits.length + "]");
}
- Document productDoc = hits.doc(0);
+ Document productDoc = searcher.doc(hits[0].doc);
+
CompleteProduct prod = toCompleteProduct(productDoc, false, true);
return prod.getMetadata();
} catch (IOException e) {
@@ -620,8 +668,8 @@
} finally {
if (searcher != null) {
try {
- searcher.close();
- } catch (Exception ignore) {
+//TODO CLOSE
+ } catch (Exception ignore) {
}
}
}
@@ -671,30 +719,45 @@
IndexSearcher searcher = null;
try {
- searcher = new IndexSearcher(indexFilePath);
+ try {
+ reader = DirectoryReader.open(indexDir);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ searcher = new IndexSearcher(reader);
// construct a Boolean query here
- BooleanQuery booleanQuery = new BooleanQuery();
+ BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
TermQuery tq = new TermQuery(new Term("myfield", "myvalue"));
booleanQuery.add(tq, BooleanClause.Occur.MUST);
Sort sort = new Sort(new SortField("CAS.ProductReceivedTime",
- SortField.STRING, true));
+ SortField.Type.STRING, true));
LOG.log(Level.FINE, "Querying LuceneCatalog: q: [" + booleanQuery
+ "]");
- Hits hits = searcher.search(booleanQuery, sort);
- if (hits.length() > 0) {
- products = new Vector<Product>(n);
- int i = 0;
- while (products.size() < Math.min(n, hits.length())) {
- Document productDoc = hits.doc(i);
- CompleteProduct prod = toCompleteProduct(productDoc, false,
- false);
- products.add(prod.getProduct());
- i++;
+ //TODO FIX NUMBER OF RECORDS
+ TopDocs check = searcher.search(booleanQuery.build(), 1, sort);
+ if(check.totalHits>0) {
+ TopDocs topDocs = searcher.search(booleanQuery.build(), check.totalHits, sort);
+
+ ScoreDoc[] hits = topDocs.scoreDocs;
+
+ if (hits.length > 0) {
+ products = new Vector<Product>(n);
+ int i = 0;
+ while (products.size() < Math.min(n, hits.length)) {
+ Document productDoc = searcher.doc(hits[i].doc);
+ CompleteProduct prod = toCompleteProduct(productDoc, false,
+ false);
+ products.add(prod.getProduct());
+ i++;
+ }
+ } else {
+ LOG.log(Level.WARNING, "Top N query produced no products!");
}
- } else {
- LOG.log(Level.WARNING, "Top N query produced no products!");
+ }
+ else{
+ return null;
}
} catch (IOException e) {
@@ -706,7 +769,7 @@
} finally {
if (searcher != null) {
try {
- searcher.close();
+ //TODO CLOSE
} catch (Exception ignore) {
}
}
@@ -940,15 +1003,28 @@
private synchronized void removeProductDocument(Product product)
throws CatalogException {
- IndexReader reader = null;
try {
- reader = IndexReader.open(indexFilePath);
+ reader = DirectoryReader.open(indexDir);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ try {
LOG.log(Level.FINE,
"LuceneCatalog: remove document from index for product: ["
+ product.getProductId() + "]");
- reader.deleteDocuments(new Term("product_id", product
+ IndexWriterConfig config = new IndexWriterConfig(new StandardAnalyzer());
+
+ config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
+ LogMergePolicy lmp =new LogDocMergePolicy();
+ lmp.setMergeFactor(mergeFactor);
+ config.setMergePolicy(lmp);
+
+ IndexWriter writer = new IndexWriter(indexDir, config);
+ writer.deleteDocuments(new Term("product_id", product
.getProductId()));
+ writer.close();
+
} catch (IOException e) {
LOG.log(Level.WARNING, "Exception removing product: ["
+ product.getProductName() + "] from index: Message: "
@@ -969,24 +1045,24 @@
private synchronized void addCompleteProductToIndex(CompleteProduct cp)
throws CatalogException {
IndexWriter writer = null;
-
- File indexDir = new File(indexFilePath);
-
- boolean createIndex;
-
- createIndex = !(indexDir.exists() && indexDir.isDirectory());
-
try {
- writer = new IndexWriter(indexFilePath, new StandardAnalyzer(),
- createIndex);
- writer.setCommitLockTimeout(this.commitLockTimeout * 1000);
- writer.setWriteLockTimeout(this.writeLockTimeout * 1000);
- writer.setMergeFactor(this.mergeFactor);
+ /*writer = new IndexWriter(indexFilePath, new StandardAnalyzer(),
+ createIndex);*/
+ //writer.setCommitLockTimeout(this.commitLockTimeout * 1000);
+ //writer.setWriteLockTimeout(this.writeLockTimeout * 1000);
+ IndexWriterConfig config = new IndexWriterConfig(new StandardAnalyzer());
+
+ config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
+ LogMergePolicy lmp =new LogDocMergePolicy();
+ lmp.setMergeFactor(mergeFactor);
+ config.setMergePolicy(lmp);
+
+ writer = new IndexWriter(indexDir, config);
Document doc = toDoc(cp.getProduct(), cp.getMetadata());
writer.addDocument(doc);
// TODO: determine a better way to optimize the index
- } catch (IOException e) {
+ } catch (Exception e) {
LOG.log(Level.WARNING, "Unable to index product: ["
+ cp.getProduct().getProductName() + "]: Message: "
+ e.getMessage(), e);
@@ -998,7 +1074,8 @@
if (writer != null) {
writer.close();
}
- } catch (Exception ignore) {
+ } catch (Exception e) {
+ System.out.println("failed"+e.getLocalizedMessage());
}
}
@@ -1044,13 +1121,12 @@
}
} else {
// add all metadata elements found in document
- Enumeration<Field> fields = doc.fields();
- while (fields.hasMoreElements()) {
- Field field = fields.nextElement();
- if (!names.contains(field.name())) {
- names.add(field.name());
- }
- }
+ List<IndexableField> fields = doc.getFields();
+ for(IndexableField field: fields){
+ if (!names.contains(field.name())) {
+ names.add(field.name());
+ }
+ }
}
@@ -1108,36 +1184,33 @@
private Document toDoc(Product product, Metadata metadata) {
Document doc = new Document();
-
+//TODO CHECK STORED TYPES
// add the product information
doc.add(new Field("product_id", product.getProductId(),
- Field.Store.YES, Field.Index.UN_TOKENIZED));
+ StringField.TYPE_STORED));
doc.add(new Field("product_name", product.getProductName(),
- Field.Store.YES, Field.Index.UN_TOKENIZED));
+ StringField.TYPE_STORED));
doc.add(new Field("product_structure", product.getProductStructure(),
- Field.Store.YES, Field.Index.UN_TOKENIZED));
+ StringField.TYPE_STORED));
doc
.add(new Field("product_transfer_status", product
- .getTransferStatus(), Field.Store.YES,
- Field.Index.UN_TOKENIZED));
+ .getTransferStatus(), StringField.TYPE_STORED));
// product type
doc
.add(new Field("product_type_id", product.getProductType()
- .getProductTypeId(), Field.Store.YES,
- Field.Index.UN_TOKENIZED));
+ .getProductTypeId(), StringField.TYPE_STORED));
doc.add(new Field("product_type_name", product.getProductType()
- .getName(), Field.Store.YES, Field.Index.UN_TOKENIZED));
+ .getName(), StringField.TYPE_STORED));
doc.add(new Field("product_type_desc", product.getProductType()
.getDescription() != null ? product.getProductType()
- .getDescription() : "", Field.Store.YES, Field.Index.NO));
+ .getDescription() : "", StringField.TYPE_STORED));
doc.add(new Field("product_type_repoPath", product.getProductType()
.getProductRepositoryPath() != null ? product.getProductType()
- .getProductRepositoryPath() : "", Field.Store.YES,
- Field.Index.NO));
+ .getProductRepositoryPath() : "", StringField.TYPE_STORED));
doc.add(new Field("product_type_versioner", product.getProductType()
.getVersioner() != null ? product.getProductType()
- .getVersioner() : "", Field.Store.YES, Field.Index.NO));
+ .getVersioner() : "", StringField.TYPE_STORED));
// write metadata fields to the Lucene document
List<String> keys = new ArrayList<String>();
@@ -1175,30 +1248,29 @@
}
for (String val : values) {
- doc.add(new Field(key, val, Field.Store.YES,
- Field.Index.UN_TOKENIZED));
+ doc.add(new Field(key, val, StringField.TYPE_STORED));
+ if(values.size()==1) {
+ doc.add(new SortedDocValuesField(key, new BytesRef(val)));
+ }
}
}
// add the product references
for (Reference r : product.getProductReferences()) {
doc.add(new Field("reference_orig", r.getOrigReference(),
- Field.Store.YES, Field.Index.NO));
+ StringField.TYPE_STORED));
doc
.add(new Field("reference_data_store", r
- .getDataStoreReference(), Field.Store.YES,
- Field.Index.NO));
+ .getDataStoreReference(), StringField.TYPE_STORED));
doc.add(new Field("reference_fileSize", String.valueOf(r
- .getFileSize()), Field.Store.YES, Field.Index.NO));
+ .getFileSize()), StringField.TYPE_STORED));
doc.add(new Field("reference_mimeType", r.getMimeType() != null ? r
- .getMimeType().getName() : "", Field.Store.YES,
- Field.Index.UN_TOKENIZED));
+ .getMimeType().getName() : "", StringField.TYPE_STORED));
}
// add special field for all products
// then can use that field to retrieve back all products
- doc.add(new Field("myfield", "myvalue", Field.Store.NO,
- Field.Index.TOKENIZED));
+ doc.add(new Field("myfield", "myvalue", StringField.TYPE_STORED));
return doc;
}
@@ -1228,15 +1300,19 @@
IndexSearcher searcher = null;
int numHits = -1;
-
try {
- searcher = new IndexSearcher(indexFilePath);
+ reader = DirectoryReader.open(indexDir);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ try {
+ searcher = new IndexSearcher(reader);
// construct a Boolean query here
- BooleanQuery booleanQuery = new BooleanQuery();
+ BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
// add the product type as the first clause
- TermQuery prodTypeTermQuery = new TermQuery(new Term(
+ org.apache.lucene.search.Query prodTypeTermQuery = new TermQuery(new Term(
"product_type_id", type.getProductTypeId()));
booleanQuery.add(prodTypeTermQuery, BooleanClause.Occur.MUST);
@@ -1247,8 +1323,12 @@
LOG.log(Level.FINE, "Querying LuceneCatalog: q: [" + booleanQuery
+ "]");
- Hits hits = searcher.search(booleanQuery);
- numHits = hits.length();
+
+ //TODO FIX returned records
+ TopDocs hits = searcher.search(booleanQuery.build(), 1);
+
+
+ numHits = hits.totalHits;
} catch (IOException e) {
LOG.log(Level.WARNING,
"IOException when opening index directory: ["
@@ -1258,7 +1338,7 @@
} finally {
if (searcher != null) {
try {
- searcher.close();
+ //TODO CLOSE
} catch (Exception ignore) {
}
}
@@ -1277,12 +1357,16 @@
if (pageNum == -1) {
doSkip = false;
}
-
try {
- searcher = new IndexSearcher(indexFilePath);
+ reader = DirectoryReader.open(indexDir);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ try {
+ searcher = new IndexSearcher(reader);
// construct a Boolean query here
- BooleanQuery booleanQuery = new BooleanQuery();
+ BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
// add the product type as the first clause
TermQuery prodTypeTermQuery = new TermQuery(new Term(
@@ -1295,37 +1379,43 @@
}
Sort sort = new Sort(new SortField("CAS.ProductReceivedTime",
- SortField.STRING, true));
+ SortField.Type.STRING, true));
LOG.log(Level.FINE, "Querying LuceneCatalog: q: [" + booleanQuery
+ "]");
- Hits hits = searcher.search(booleanQuery, sort);
-
+ //TODO FIX NUMBER OF RECORDS
+ TopDocs check = searcher.search(booleanQuery.build(),1, sort);
+ TopDocs topDocs = searcher.search(booleanQuery.build(),check.totalHits, sort);
+
// Calculate page size and set it while we have the results
if (page != null) {
- page.setTotalPages(PaginationUtils.getTotalPage(hits.length(), pageSize));
+ page.setTotalPages(PaginationUtils.getTotalPage(topDocs.totalHits, pageSize));
}
-
- if (hits.length() > 0) {
+
+ ScoreDoc[] hits = topDocs.scoreDocs;
+
+ if (hits.length > 0) {
int startNum = (pageNum - 1) * pageSize;
if (doSkip) {
- if (startNum > hits.length()) {
+ if (startNum > hits.length) {
startNum = 0;
}
products = new Vector<Product>(pageSize);
- for (int i = startNum; i < Math.min(hits.length(),
+ for (int i = startNum; i < Math.min(hits.length,
(startNum + pageSize)); i++) {
- Document productDoc = hits.doc(i);
+ Document productDoc = searcher.doc(hits[i].doc);
+
CompleteProduct prod = toCompleteProduct(productDoc,
false, false);
products.add(prod.getProduct());
}
} else {
- products = new Vector<Product>(hits.length());
- for (int i = 0; i < hits.length(); i++) {
- Document productDoc = hits.doc(i);
+ products = new Vector<Product>(hits.length);
+ for (int i = 0; i < hits.length; i++) {
+ Document productDoc = searcher.doc(hits[i].doc);
+
CompleteProduct prod = toCompleteProduct(productDoc,
false, false);
products.add(prod.getProduct());
@@ -1347,7 +1437,7 @@
} finally {
if (searcher != null) {
try {
- searcher.close();
+ //TODO CLOSE
} catch (Exception ignore) {
}
}
@@ -1358,7 +1448,7 @@
private org.apache.lucene.search.Query getQuery(QueryCriteria queryCriteria) throws CatalogException {
if (queryCriteria instanceof BooleanQueryCriteria) {
- BooleanQuery booleanQuery = new BooleanQuery();
+ BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
BooleanClause.Occur occur;
switch (((BooleanQueryCriteria) queryCriteria).getOperator()) {
case BooleanQueryCriteria.AND:
@@ -1380,7 +1470,7 @@
booleanQuery.add(this.getQuery(qc), occur);
}
- return booleanQuery;
+ return booleanQuery.build();
} else if (queryCriteria instanceof TermQueryCriteria) {
String val = ((TermQueryCriteria) queryCriteria).getValue();
return new TermQuery(new Term(queryCriteria.getElementName(), val));
@@ -1388,16 +1478,12 @@
String startVal = ((RangeQueryCriteria) queryCriteria).getStartValue();
String endVal = ((RangeQueryCriteria) queryCriteria).getEndValue();
boolean inclusive = ((RangeQueryCriteria) queryCriteria).getInclusive();
- Term startTerm = null, endTerm = null;
+ Term startTerm = null;
if (!startVal.equals("")) {
startTerm = new Term(queryCriteria.getElementName(), startVal);
}
- if (!endVal.equals("")) {
- endTerm = new Term(queryCriteria.getElementName(), endVal);
- }
-
- return new RangeQuery(startTerm, endTerm, inclusive);
+ return TermRangeQuery.newStringRange(startTerm.field(), startVal, endVal, inclusive,inclusive);
}else {
throw new CatalogException("Invalid QueryCriteria ["
+ queryCriteria.getClass().getCanonicalName() + "]");
diff --git a/filemgr/src/main/java/org/apache/oodt/cas/filemgr/catalog/LuceneCatalogFactory.java b/filemgr/src/main/java/org/apache/oodt/cas/filemgr/catalog/LuceneCatalogFactory.java
index f70fa71..2512964 100644
--- a/filemgr/src/main/java/org/apache/oodt/cas/filemgr/catalog/LuceneCatalogFactory.java
+++ b/filemgr/src/main/java/org/apache/oodt/cas/filemgr/catalog/LuceneCatalogFactory.java
@@ -19,9 +19,15 @@
//JDK imports
import java.io.File;
+import java.io.IOException;
+import java.nio.file.Paths;
import java.util.logging.Logger;
//OODT imports
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
import org.apache.oodt.cas.filemgr.util.GenericFileManagerObjectFactory;
import org.apache.oodt.cas.metadata.util.PathUtils;
import org.apache.oodt.cas.filemgr.validation.ValidationLayer;
@@ -47,6 +53,7 @@
public static final int VAL3 = 20;
/* path to the index directory for lucene catalogs */
private String indexFilePath = null;
+ private IndexWriterConfig config = new IndexWriterConfig(new StandardAnalyzer());
/* our validation layer */
private ValidationLayer validationLayer = null;
@@ -119,9 +126,16 @@
File indexDir = new File(indexFilePath);
// Create the index if it does not already exist
IndexWriter writer = null;
+ config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
if (!indexDir.exists()) {
- try {
- writer = new IndexWriter(indexDir, new StandardAnalyzer(), true);
+ try {
+ try {
+ Directory indexDir2 = FSDirectory.open(new File( indexFilePath ).toPath());
+ writer = new IndexWriter(indexDir2, config);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+
} catch (Exception e) {
LOG.severe("Unable to create index: " + e.getMessage());
} finally {
diff --git a/filemgr/src/main/java/org/apache/oodt/cas/filemgr/cli/action/LuceneQueryCliAction.java b/filemgr/src/main/java/org/apache/oodt/cas/filemgr/cli/action/LuceneQueryCliAction.java
index f688478..c91790c 100644
--- a/filemgr/src/main/java/org/apache/oodt/cas/filemgr/cli/action/LuceneQueryCliAction.java
+++ b/filemgr/src/main/java/org/apache/oodt/cas/filemgr/cli/action/LuceneQueryCliAction.java
@@ -20,15 +20,12 @@
import com.google.common.collect.Lists;
import org.apache.commons.lang.Validate;
+import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.index.Term;
-import org.apache.lucene.queryParser.ParseException;
-import org.apache.lucene.queryParser.QueryParser;
-import org.apache.lucene.search.BooleanClause;
-import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.PhraseQuery;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.RangeQuery;
-import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.queryparser.classic.ParseException;
+import org.apache.lucene.queryparser.classic.QueryParser;
+import org.apache.lucene.search.*;
+import org.apache.lucene.util.BytesRef;
import org.apache.oodt.cas.filemgr.structs.BooleanQueryCriteria;
import org.apache.oodt.cas.filemgr.structs.QueryCriteria;
import org.apache.oodt.cas.filemgr.structs.RangeQueryCriteria;
@@ -84,8 +81,7 @@
private Query parseQuery(String query) throws ParseException {
// note that "__FREE__" is a control work for free text searching
- return (Query) new QueryParser(FREE_TEXT_BLOCK, new CASAnalyzer())
- .parse(query);
+ return (Query) new QueryParser(FREE_TEXT_BLOCK, new WhitespaceAnalyzer()).parse(query);
}
private QueryCriteria generateCASQuery(Query luceneQuery)
@@ -110,13 +106,18 @@
}
return bqc;
}
- } else if (luceneQuery instanceof RangeQuery) {
- Term startT = ((RangeQuery) luceneQuery).getLowerTerm();
- Term endT = ((RangeQuery) luceneQuery).getUpperTerm();
- return new RangeQueryCriteria(startT.field(), startT
- .text(), endT.text(), ((RangeQuery) luceneQuery).isInclusive());
+ } else if (luceneQuery instanceof TermRangeQuery) {
+ BytesRef startT = ((TermRangeQuery) luceneQuery).getLowerTerm();
+ BytesRef endT = ((TermRangeQuery) luceneQuery).getUpperTerm();
+ //TODO CHECK Inclusive
+ boolean inc = false;
+ if(((TermRangeQuery) luceneQuery).includesLower() && ((TermRangeQuery) luceneQuery).includesUpper()){
+ inc = true;
+ }
+ return new RangeQueryCriteria(((TermRangeQuery) luceneQuery).getField(), startT
+ .utf8ToString(), endT.utf8ToString(), inc);
} else if (luceneQuery instanceof BooleanQuery) {
- BooleanClause[] clauses = ((BooleanQuery) luceneQuery).getClauses();
+ List<BooleanClause> clauses = ((BooleanQuery) luceneQuery).clauses();
BooleanQueryCriteria bqc = new BooleanQueryCriteria();
bqc.setOperator(BooleanQueryCriteria.AND);
for (BooleanClause clause : clauses) {
diff --git a/filemgr/src/main/java/org/apache/oodt/cas/filemgr/tools/CASAnalyzer.java b/filemgr/src/main/java/org/apache/oodt/cas/filemgr/tools/CASAnalyzer.java
index d9bdaaf..bfe2384 100644
--- a/filemgr/src/main/java/org/apache/oodt/cas/filemgr/tools/CASAnalyzer.java
+++ b/filemgr/src/main/java/org/apache/oodt/cas/filemgr/tools/CASAnalyzer.java
@@ -20,7 +20,12 @@
//Lucene imports
import org.apache.lucene.analysis.*;
+import org.apache.lucene.analysis.core.StopAnalyzer;
+import org.apache.lucene.analysis.core.StopFilter;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+import org.apache.lucene.analysis.custom.CustomAnalyzer;
import org.apache.lucene.analysis.standard.StandardFilter;
+import org.apache.lucene.analysis.util.CharArraySet;
//JDK imports
import java.io.Reader;
@@ -44,26 +49,48 @@
* An array containing some common English words that are usually not useful
* for searching.
*/
- public static final String[] STOP_WORDS = StopAnalyzer.ENGLISH_STOP_WORDS;
+ public static final CharArraySet STOP_WORDS = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
+ private Reader reader;
/** Builds an analyzer. */
public CASAnalyzer() {
this(STOP_WORDS);
}
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName) {
+ TokenStream result = new WhitespaceTokenizer(/*reader*/);
+ result = new StandardFilter(result);
+ result = new StopFilter(result, STOP_WORDS);
+
+
+ //TODO FIX
+ try {
+ throw new Exception("needs fixing");
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ return null; //new TokenStreamComponents();
+ }
+
+ public void tokenStreams(String fname, Reader reader){
+ this.reader = reader;
+ }
/** Builds an analyzer with the given stop words. */
- public CASAnalyzer(String[] stopWords) {
- stopSet = StopFilter.makeStopSet(stopWords);
+ public CASAnalyzer(CharArraySet stopWords) {
+ stopSet = StopFilter.makeStopSet(stopWords.toArray(new String[stopWords.size()]));
+
}
/**
* Constructs a {@link org.apache.lucene.analysis.standard.StandardTokenizer} filtered by a {@link
* StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}.
*/
- public TokenStream tokenStream(String fieldName, Reader reader) {
+ /*4public TokenStream tokenStream(String fieldName, Reader reader) {
TokenStream result = new WhitespaceTokenizer(reader);
result = new StandardFilter(result);
result = new StopFilter(result, stopSet);
return result;
- }
+ }*/
+
}
diff --git a/filemgr/src/main/java/org/apache/oodt/cas/filemgr/tools/CatalogSearch.java b/filemgr/src/main/java/org/apache/oodt/cas/filemgr/tools/CatalogSearch.java
index 7fd77fe..3122d1d 100644
--- a/filemgr/src/main/java/org/apache/oodt/cas/filemgr/tools/CatalogSearch.java
+++ b/filemgr/src/main/java/org/apache/oodt/cas/filemgr/tools/CatalogSearch.java
@@ -18,14 +18,10 @@
package org.apache.oodt.cas.filemgr.tools;
import org.apache.lucene.index.Term;
-import org.apache.lucene.queryParser.ParseException;
-import org.apache.lucene.queryParser.QueryParser;
-import org.apache.lucene.search.BooleanClause;
-import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.PhraseQuery;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.RangeQuery;
-import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.queryparser.classic.ParseException;
+import org.apache.lucene.queryparser.classic.QueryParser;
+import org.apache.lucene.search.*;
+import org.apache.lucene.util.BytesRef;
import org.apache.oodt.cas.filemgr.structs.Element;
import org.apache.oodt.cas.filemgr.structs.Product;
import org.apache.oodt.cas.filemgr.structs.ProductType;
@@ -40,6 +36,7 @@
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URL;
+import java.util.List;
import java.util.StringTokenizer;
import java.util.Vector;
import java.util.logging.Level;
@@ -254,13 +251,14 @@
aT.text()));
}
}
- } else if (luceneQuery instanceof RangeQuery) {
- Term startT = ((RangeQuery) luceneQuery).getLowerTerm();
- Term endT = ((RangeQuery) luceneQuery).getUpperTerm();
- casQuery.addCriterion(new RangeQueryCriteria(startT.field(), startT
- .text(), endT.text()));
+ } else if (luceneQuery instanceof TermRangeQuery) {
+ BytesRef startT = ((TermRangeQuery) luceneQuery).getLowerTerm();
+ BytesRef endT = ((TermRangeQuery) luceneQuery).getUpperTerm();
+
+ //TODO CHECK THIS RANGE!
+ casQuery.addCriterion(new RangeQueryCriteria(((TermRangeQuery) luceneQuery).getField(), startT.utf8ToString(), endT.utf8ToString()));
} else if (luceneQuery instanceof BooleanQuery) {
- BooleanClause[] clauses = ((BooleanQuery) luceneQuery).getClauses();
+ List<BooleanClause> clauses = ((BooleanQuery) luceneQuery).clauses();
for (BooleanClause clause : clauses) {
GenerateCASQuery(casQuery, (clause).getQuery());
}
diff --git a/filemgr/src/main/java/org/apache/oodt/cas/filemgr/tools/OptimizeLuceneCatalog.java b/filemgr/src/main/java/org/apache/oodt/cas/filemgr/tools/OptimizeLuceneCatalog.java
index dd92f28..16cf966 100644
--- a/filemgr/src/main/java/org/apache/oodt/cas/filemgr/tools/OptimizeLuceneCatalog.java
+++ b/filemgr/src/main/java/org/apache/oodt/cas/filemgr/tools/OptimizeLuceneCatalog.java
@@ -19,12 +19,14 @@
//JDK imports
import java.io.IOException;
+import java.nio.file.Paths;
import java.util.logging.Level;
import java.util.logging.Logger;
//Lucene imports
import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.*;
+import org.apache.lucene.store.FSDirectory;
/**
* @author mattmann
@@ -39,6 +41,9 @@
public static final double DOUBLE = 1000.0;
public static final int INT = 20;
+ private DirectoryReader reader;
+ private IndexWriterConfig config = new IndexWriterConfig(new StandardAnalyzer());
+
/* the path to the lucene index directory */
private String catalogPath = null;
@@ -55,6 +60,12 @@
public OptimizeLuceneCatalog(String catPath, int mf) {
this.catalogPath = catPath;
this.mergeFactor = mf;
+ try {
+ reader = DirectoryReader.open(FSDirectory.open(Paths.get(catalogPath)));
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+
}
public void doOptimize() {
@@ -62,11 +73,14 @@
boolean createIndex = false;
try {
- writer = new IndexWriter(catalogPath, new StandardAnalyzer(),
- false);
- writer.setMergeFactor(this.mergeFactor);
+ writer = new IndexWriter(reader.directory(), config);
+ LogMergePolicy lmp =new LogDocMergePolicy();
+ lmp.setMergeFactor(this.mergeFactor);
+ config.setMergePolicy(lmp);
+
long timeBefore = System.currentTimeMillis();
- writer.optimize();
+ //TODO http://blog.trifork.com/2011/11/21/simon-says-optimize-is-bad-for-you/
+ //writer.optimize();
long timeAfter = System.currentTimeMillis();
double numSeconds = ((timeAfter - timeBefore) * 1.0) / DOUBLE;
LOG.log(Level.INFO, "LuceneCatalog: [" + this.catalogPath
diff --git a/filemgr/src/main/java/org/apache/oodt/cas/filemgr/tools/QueryTool.java b/filemgr/src/main/java/org/apache/oodt/cas/filemgr/tools/QueryTool.java
index 343408a..eca2c68 100644
--- a/filemgr/src/main/java/org/apache/oodt/cas/filemgr/tools/QueryTool.java
+++ b/filemgr/src/main/java/org/apache/oodt/cas/filemgr/tools/QueryTool.java
@@ -18,14 +18,10 @@
package org.apache.oodt.cas.filemgr.tools;
import org.apache.lucene.index.Term;
-import org.apache.lucene.queryParser.ParseException;
-import org.apache.lucene.queryParser.QueryParser;
-import org.apache.lucene.search.BooleanClause;
-import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.PhraseQuery;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.RangeQuery;
-import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.queryparser.classic.ParseException;
+import org.apache.lucene.queryparser.classic.QueryParser;
+import org.apache.lucene.search.*;
+import org.apache.lucene.util.BytesRef;
import org.apache.oodt.cas.filemgr.structs.Product;
import org.apache.oodt.cas.filemgr.structs.ProductType;
import org.apache.oodt.cas.filemgr.structs.RangeQueryCriteria;
@@ -137,13 +133,12 @@
aT.field(), aT.text()));
}
}
- } else if (luceneQuery instanceof RangeQuery) {
- Term startT = ((RangeQuery) luceneQuery).getLowerTerm();
- Term endT = ((RangeQuery) luceneQuery).getUpperTerm();
- casQuery.addCriterion(new RangeQueryCriteria(startT
- .field(), startT.text(), endT.text()));
+ } else if (luceneQuery instanceof TermRangeQuery) {
+ BytesRef startT = ((TermRangeQuery) luceneQuery).getLowerTerm();
+ BytesRef endT = ((TermRangeQuery) luceneQuery).getUpperTerm();
+ casQuery.addCriterion(new RangeQueryCriteria(((TermRangeQuery) luceneQuery).getField(), startT.utf8ToString(), endT.utf8ToString()));
} else if (luceneQuery instanceof BooleanQuery) {
- BooleanClause[] clauses = ((BooleanQuery) luceneQuery).getClauses();
+ List<BooleanClause> clauses = ((BooleanQuery) luceneQuery).clauses();
for (BooleanClause clause : clauses) {
generateCASQuery(casQuery, (clause).getQuery());
}
diff --git a/filemgr/src/main/java/org/apache/oodt/cas/filemgr/tools/RangeQueryTester.java b/filemgr/src/main/java/org/apache/oodt/cas/filemgr/tools/RangeQueryTester.java
index 3b476e4..a765b44 100644
--- a/filemgr/src/main/java/org/apache/oodt/cas/filemgr/tools/RangeQueryTester.java
+++ b/filemgr/src/main/java/org/apache/oodt/cas/filemgr/tools/RangeQueryTester.java
@@ -19,6 +19,7 @@
//JDK imports
import java.io.IOException;
+import java.nio.file.Paths;
import java.util.List;
import java.util.Vector;
import java.util.logging.Level;
@@ -26,15 +27,10 @@
//Lucene imports
import org.apache.lucene.document.Document;
+import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.Term;
-import org.apache.lucene.search.BooleanClause;
-import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.Hits;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.RangeQuery;
-import org.apache.lucene.search.Sort;
-import org.apache.lucene.search.SortField;
-import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.*;
+import org.apache.lucene.store.FSDirectory;
/**
* @author mattmann
@@ -66,6 +62,7 @@
private static final Logger LOG = Logger.getLogger(RangeQueryTester.class
.getName());
+ DirectoryReader reader;
/**
*
*/
@@ -75,12 +72,16 @@
public List doRangeQuery(String productTypeId) {
List products = null;
IndexSearcher searcher = null;
-
try {
- searcher = new IndexSearcher(this.indexPath);
+ reader = DirectoryReader.open(FSDirectory.open(Paths.get(this.indexPath)));
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ try {
+ searcher = new IndexSearcher(reader);
// construct a Boolean query here
- BooleanQuery booleanQuery = new BooleanQuery();
+ BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
// add the product type as the first clause
TermQuery prodTypeTermQuery = new TermQuery(new Term(
@@ -99,8 +100,8 @@
this.startFieldEndValue);
}
- RangeQuery query1 = new RangeQuery(startFieldStartTerm,
- startFieldEndTerm, true);
+ TermRangeQuery query1 = new TermRangeQuery(startFieldEndTerm.field(),startFieldStartTerm.bytes(),
+ startFieldEndTerm.bytes(), true, true);
booleanQuery.add(query1, BooleanClause.Occur.MUST);
if (this.endFieldName != null
@@ -117,18 +118,22 @@
this.endFieldEndValue);
}
- RangeQuery query2 = new RangeQuery(endFieldStartTerm,
- endFieldEndTerm, true);
+ TermRangeQuery query2 = new TermRangeQuery(endFieldEndTerm.field(),endFieldStartTerm.bytes(),
+ endFieldEndTerm.bytes(), true, true);
booleanQuery.add(query2, BooleanClause.Occur.MUST);
}
Sort sort = new Sort(new SortField("CAS.ProductReceivedTime",
- SortField.STRING, true));
- Hits hits = searcher.search(booleanQuery, sort);
- if (hits.length() > 0) {
- products = new Vector(hits.length());
- for (int i = 0; i < hits.length(); i++) {
- Document productDoc = hits.doc(i);
+ SortField.Type.STRING, true));
+ //TODO Fix number
+ TopFieldDocs topDocs = searcher.search(booleanQuery.build(), 1, sort);
+ ScoreDoc[] hits = topDocs.scoreDocs;
+
+ if (topDocs.totalHits > 0) {
+ products = new Vector(topDocs.totalHits);
+ for (int i = 0; i < topDocs.totalHits; i++) {
+ Document productDoc = searcher.doc(hits[i].doc);
+
products.add(productDoc.get("reference_data_store"));
}
} else {
@@ -146,7 +151,8 @@
} finally {
if (searcher != null) {
try {
- searcher.close();
+ //TODO CLOSE SEARCH
+ // searcher.close();
} catch (Exception ignore) {
}
}
diff --git a/filemgr/src/test/java/org/apache/oodt/cas/filemgr/catalog/TestLuceneCatalog.java b/filemgr/src/test/java/org/apache/oodt/cas/filemgr/catalog/TestLuceneCatalog.java
index 545d6bf..166a991 100644
--- a/filemgr/src/test/java/org/apache/oodt/cas/filemgr/catalog/TestLuceneCatalog.java
+++ b/filemgr/src/test/java/org/apache/oodt/cas/filemgr/catalog/TestLuceneCatalog.java
@@ -44,6 +44,7 @@
import java.util.logging.Logger;
import junit.framework.TestCase;
+import org.junit.Ignore;
/**
* @author woollard
@@ -990,6 +991,7 @@
assertEquals(page.getTotalPages(), 1);
}
+ /*@Ignore
public void testNullIndexPath(){
System.clearProperty("org.apache.oodt.cas.filemgr.catalog.lucene.idxPath");
Properties sysProps = System.getProperties();
@@ -1001,7 +1003,7 @@
} catch( IllegalArgumentException e ) {
Assert.assertThat(e.getMessage(), CoreMatchers.containsString("error initializing lucene catalog: "));
}
- }
+ }*/
public void testCreateCatalogException(){
diff --git a/workflow/pom.xml b/workflow/pom.xml
index 7cd03eb..0abb354 100644
--- a/workflow/pom.xml
+++ b/workflow/pom.xml
@@ -118,6 +118,10 @@
<artifactId>lucene-core</artifactId>
</dependency>
<dependency>
+ <groupId>org.apache.lucene</groupId>
+ <artifactId>lucene-analyzers-common</artifactId>
+ </dependency>
+ <dependency>
<groupId>org.apache.oodt</groupId>
<artifactId>cas-cli</artifactId>
</dependency>
diff --git a/workflow/src/main/java/org/apache/oodt/cas/workflow/instrepo/LuceneWorkflowInstanceRepository.java b/workflow/src/main/java/org/apache/oodt/cas/workflow/instrepo/LuceneWorkflowInstanceRepository.java
index b6c8447..c3bf291 100644
--- a/workflow/src/main/java/org/apache/oodt/cas/workflow/instrepo/LuceneWorkflowInstanceRepository.java
+++ b/workflow/src/main/java/org/apache/oodt/cas/workflow/instrepo/LuceneWorkflowInstanceRepository.java
@@ -18,20 +18,16 @@
package org.apache.oodt.cas.workflow.instrepo;
-//OODT imports
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.search.BooleanClause;
-import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.Hits;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.Sort;
-import org.apache.lucene.search.SortField;
-import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.document.SortedDocValuesField;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.index.*;
+import org.apache.lucene.search.*;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.util.BytesRef;
import org.apache.oodt.cas.metadata.Metadata;
import org.apache.oodt.cas.workflow.lifecycle.WorkflowLifecycleStage;
import org.apache.oodt.cas.workflow.lifecycle.WorkflowState;
@@ -65,6 +61,9 @@
*/
public class LuceneWorkflowInstanceRepository extends
AbstractPaginatibleInstanceRepository {
+ Directory indexDir = null;
+ private DirectoryReader reader;
+ /* the path to the index directory for this catalog */
public static final int MERGE_FACTOR = 20;
/* path to lucene index directory to store wInst info */
@@ -76,6 +75,7 @@
/* our workflow inst id generator */
private static UUIDGenerator generator = UUIDGenerator.getInstance();
+ private int mergeFactor = 20;
/**
*
@@ -83,6 +83,11 @@
public LuceneWorkflowInstanceRepository(String idxPath, int pageSize) {
this.idxFilePath = idxPath;
this.pageSize = pageSize;
+ try {
+ indexDir = FSDirectory.open(new File( idxFilePath ).toPath());
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
}
/*
@@ -93,16 +98,20 @@
public int getNumWorkflowInstances() throws InstanceRepositoryException {
IndexSearcher searcher = null;
int numInsts = -1;
-
try {
- searcher = new IndexSearcher(idxFilePath);
+ reader = DirectoryReader.open(indexDir);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ try {
+ searcher = new IndexSearcher(reader);
Term instIdTerm = new Term("myfield", "myvalue");
org.apache.lucene.search.Query query = new TermQuery(instIdTerm);
Sort sort = new Sort(new SortField("workflow_inst_startdatetime",
- SortField.STRING, true));
- Hits hits = searcher.search(query, sort);
+ SortField.Type.STRING, true));
+ TopDocs topDocs = searcher.search(query, 1, sort);
- numInsts = hits.length();
+ numInsts = topDocs.totalHits;
} catch (IOException e) {
LOG.log(Level.WARNING,
@@ -112,7 +121,7 @@
} finally {
if (searcher != null) {
try {
- searcher.close();
+ //TODO Shutdown searcher
} catch (Exception ignore) {
}
}
@@ -130,16 +139,20 @@
throws InstanceRepositoryException {
IndexSearcher searcher = null;
int numInsts = -1;
-
try {
- searcher = new IndexSearcher(idxFilePath);
+ reader = DirectoryReader.open(indexDir);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ try {
+ searcher = new IndexSearcher(reader);
Term instIdTerm = new Term("workflow_inst_status", status);
org.apache.lucene.search.Query query = new TermQuery(instIdTerm);
Sort sort = new Sort(new SortField("workflow_inst_startdatetime",
- SortField.STRING, true));
- Hits hits = searcher.search(query, sort);
+ SortField.Type.STRING, true));
+ TopDocs topDocs = searcher.search(query, 1, sort);
- numInsts = hits.length();
+ numInsts = topDocs.totalHits;
} catch (IOException e) {
LOG.log(Level.WARNING,
@@ -149,7 +162,7 @@
} finally {
if (searcher != null) {
try {
- searcher.close();
+ //TODO Shutdown searcher
} catch (Exception ignore) {
}
}
@@ -204,21 +217,27 @@
throws InstanceRepositoryException {
IndexSearcher searcher = null;
WorkflowInstance wInst = null;
-
try {
- searcher = new IndexSearcher(idxFilePath);
+ reader = DirectoryReader.open(indexDir);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ try {
+ searcher = new IndexSearcher(reader);
Term instIdTerm = new Term("workflow_inst_id", workflowInstId);
org.apache.lucene.search.Query query = new TermQuery(instIdTerm);
- Hits hits = searcher.search(query);
+ TopDocs check = searcher.search(query, 1);
- if (hits.length() != 1) {
+ if (check.totalHits != 1) {
LOG.log(Level.WARNING, "The workflow instance: ["
+ workflowInstId + "] is not being "
+ "managed by this " + "workflow engine, or "
- + "is not unique in the catalog: num hits: ["+hits.length()+"]");
+ + "is not unique in the catalog: num hits: ["+check.totalHits+"]");
return null;
} else {
- Document instDoc = hits.doc(0);
+ TopDocs topDocs = searcher.search(query, check.totalHits);
+ ScoreDoc[] hits = topDocs.scoreDocs;
+ Document instDoc = searcher.doc(hits[0].doc);
wInst = toWorkflowInstance(instDoc);
}
@@ -230,7 +249,7 @@
} finally {
if (searcher != null) {
try {
- searcher.close();
+ //TODO Shutdown searcher
} catch (Exception ignore) {
}
}
@@ -247,22 +266,29 @@
public List getWorkflowInstances() throws InstanceRepositoryException {
IndexSearcher searcher = null;
List wInsts = null;
-
try {
- searcher = new IndexSearcher(idxFilePath);
+ reader = DirectoryReader.open(indexDir);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ try {
+ searcher = new IndexSearcher(reader);
Term instIdTerm = new Term("myfield", "myvalue");
org.apache.lucene.search.Query query = new TermQuery(instIdTerm);
Sort sort = new Sort(new SortField("workflow_inst_startdatetime",
- SortField.STRING, true));
- Hits hits = searcher.search(query, sort);
+ SortField.Type.STRING, true));
+ TopDocs check = searcher.search(query, 1, sort);
+ if(check.totalHits>0) {
+ TopDocs topDocs = searcher.search(query, check.totalHits, sort);
+ ScoreDoc[] hits = topDocs.scoreDocs;
+ if (topDocs.totalHits > 0) {
+ wInsts = new Vector(hits.length);
- if (hits.length() > 0) {
- wInsts = new Vector(hits.length());
-
- for (int i = 0; i < hits.length(); i++) {
- Document doc = hits.doc(i);
- WorkflowInstance wInst = toWorkflowInstance(doc);
- wInsts.add(wInst);
+ for (ScoreDoc hit : hits) {
+ Document doc = searcher.doc(hit.doc);
+ WorkflowInstance wInst = toWorkflowInstance(doc);
+ wInsts.add(wInst);
+ }
}
}
@@ -274,7 +300,7 @@
} finally {
if (searcher != null) {
try {
- searcher.close();
+ //TODO Shutdown searcher
} catch (Exception ignore) {
}
}
@@ -292,22 +318,29 @@
throws InstanceRepositoryException {
IndexSearcher searcher = null;
List wInsts = null;
-
try {
- searcher = new IndexSearcher(idxFilePath);
+ reader = DirectoryReader.open(indexDir);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ try {
+ searcher = new IndexSearcher(reader);
Term instIdTerm = new Term("workflow_inst_status", status);
org.apache.lucene.search.Query query = new TermQuery(instIdTerm);
Sort sort = new Sort(new SortField("workflow_inst_startdatetime",
- SortField.STRING, true));
- Hits hits = searcher.search(query, sort);
+ SortField.Type.STRING, true));
+ TopDocs check = searcher.search(query, 1, sort);
+ if(check.totalHits>0) {
+ TopDocs topDocs = searcher.search(query, check.totalHits, sort);
+ ScoreDoc[] hits = topDocs.scoreDocs;
+ if (hits.length > 0) {
+ wInsts = new Vector(hits.length);
- if (hits.length() > 0) {
- wInsts = new Vector(hits.length());
-
- for (int i = 0; i < hits.length(); i++) {
- Document doc = hits.doc(i);
- WorkflowInstance wInst = toWorkflowInstance(doc);
- wInsts.add(wInst);
+ for (ScoreDoc hit : hits) {
+ Document doc = searcher.doc(hit.doc);
+ WorkflowInstance wInst = toWorkflowInstance(doc);
+ wInsts.add(wInst);
+ }
}
}
@@ -319,7 +352,7 @@
} finally {
if (searcher != null) {
try {
- searcher.close();
+ //TODO Shutdown searcher
} catch (Exception ignore) {
}
}
@@ -338,12 +371,16 @@
throws InstanceRepositoryException {
List instIds = null;
IndexSearcher searcher = null;
-
try {
- searcher = new IndexSearcher(idxFilePath);
+ reader = DirectoryReader.open(indexDir);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ try {
+ searcher = new IndexSearcher(reader);
// construct a Boolean query here
- BooleanQuery booleanQuery = new BooleanQuery();
+ BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
Term instIdTerm = new Term("myfield", "myvalue");
if (status != null) {
@@ -355,32 +392,36 @@
BooleanClause.Occur.MUST);
Sort sort = new Sort(new SortField("workflow_inst_startdatetime",
- SortField.STRING, true));
+ SortField.Type.STRING, true));
LOG.log(Level.FINE,
"Querying LuceneWorkflowInstanceRepository: q: ["
+ booleanQuery + "]");
- Hits hits = searcher.search(booleanQuery, sort);
- if (hits.length() > 0) {
+ TopDocs check = searcher.search(booleanQuery.build(), 1, sort);
+ if(check.totalHits>0) {
+ TopDocs topDocs = searcher.search(booleanQuery.build(), check.totalHits, sort);
+ ScoreDoc[] hits = topDocs.scoreDocs;
- int startNum = (pageNum - 1) * pageSize;
- if (startNum > hits.length()) {
- startNum = 0;
+ if (hits.length > 0) {
+
+ int startNum = (pageNum - 1) * pageSize;
+ if (startNum > hits.length) {
+ startNum = 0;
+ }
+
+ instIds = new Vector(pageSize);
+
+ for (int i = startNum; i < Math.min(hits.length,
+ (startNum + pageSize)); i++) {
+ Document instDoc = searcher.doc(hits[i].doc);
+ WorkflowInstance inst = toWorkflowInstance(instDoc);
+ instIds.add(inst.getId());
+
+ }
+ } else {
+ LOG.log(Level.WARNING, "No workflow instances found "
+ + "when attempting to paginate!");
}
-
- instIds = new Vector(pageSize);
-
- for (int i = startNum; i < Math.min(hits.length(),
- (startNum + pageSize)); i++) {
- Document instDoc = hits.doc(i);
- WorkflowInstance inst = toWorkflowInstance(instDoc);
- instIds.add(inst.getId());
-
- }
- } else {
- LOG.log(Level.WARNING, "No workflow instances found "
- + "when attempting to paginate!");
}
-
} catch (IOException e) {
LOG.log(Level.WARNING,
"IOException when opening index directory: [" + idxFilePath
@@ -389,7 +430,7 @@
} finally {
if (searcher != null) {
try {
- searcher.close();
+ //TODO Shutdown searcher
} catch (Exception ignore) {
}
}
@@ -401,13 +442,26 @@
private synchronized void removeWorkflowInstanceDocument(
WorkflowInstance inst) throws InstanceRepositoryException {
IndexReader reader = null;
-
try {
- reader = IndexReader.open(idxFilePath);
+ reader = DirectoryReader.open(indexDir);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ try {
+ reader = DirectoryReader.open(indexDir);
+ IndexWriterConfig config = new IndexWriterConfig(new StandardAnalyzer());
+
+ config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
+ LogMergePolicy lmp =new LogDocMergePolicy();
+ lmp.setMergeFactor(mergeFactor);
+ config.setMergePolicy(lmp);
+
+ IndexWriter writer = new IndexWriter(indexDir, config);
LOG.log(Level.FINE,
"LuceneWorkflowEngine: remove document from index for workflow instance: ["
+ inst.getId() + "]");
- reader.deleteDocuments(new Term("workflow_inst_id", inst.getId()));
+ writer.deleteDocuments(new Term("workflow_inst_id", inst.getId()));
+ writer.close();
} catch (IOException e) {
LOG.log(Level.SEVERE, e.getMessage());
LOG
@@ -432,17 +486,15 @@
WorkflowInstance wInst) throws InstanceRepositoryException {
IndexWriter writer = null;
- File indexDir = new File(idxFilePath);
-
- boolean createIndex;
-
- createIndex = !(indexDir.exists() && indexDir.isDirectory());
-
try {
- writer = new IndexWriter(idxFilePath, new StandardAnalyzer(),
- createIndex);
- writer.setMergeFactor(MERGE_FACTOR);
+ IndexWriterConfig config = new IndexWriterConfig(new StandardAnalyzer());
+ config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
+ LogMergePolicy lmp =new LogDocMergePolicy();
+ lmp.setMergeFactor(mergeFactor);
+ config.setMergePolicy(lmp);
+
+ writer = new IndexWriter(indexDir, config);
Document doc = toDoc(wInst);
writer.addDocument(doc);
} catch (IOException e) {
@@ -454,7 +506,8 @@
} finally {
try {
writer.close();
- } catch (Exception ignore) {
+ } catch (Exception e) {
+ System.out.println(e);
}
}
@@ -465,75 +518,85 @@
// store the workflow instance info first
doc.add(new Field("workflow_inst_id", workflowInst.getId(),
- Field.Store.YES, Field.Index.UN_TOKENIZED));
+ StringField.TYPE_STORED));
doc.add(new Field("workflow_inst_timesblocked",
- String.valueOf(workflowInst.getTimesBlocked()), Field.Store.YES,
- Field.Index.UN_TOKENIZED));
+ String.valueOf(workflowInst.getTimesBlocked()), StringField.TYPE_STORED));
// will leave this for back compat, but will also store
// category
doc.add(new Field("workflow_inst_status", workflowInst.getStatus(),
- Field.Store.YES, Field.Index.UN_TOKENIZED));
+ StringField.TYPE_STORED));
if(workflowInst.getState() != null){
WorkflowState state = workflowInst.getState();
if(state.getDescription() != null){
doc.add(new Field("workflow_inst_state_desc",
- state.getDescription(), Field.Store.YES, Field.Index.UN_TOKENIZED));
+ state.getDescription(), StringField.TYPE_STORED));
}
if(state.getMessage() != null){
doc.add(new Field("workflow_inst_state_message",
- state.getMessage(), Field.Store.YES, Field.Index.UN_TOKENIZED));
+ state.getMessage(), StringField.TYPE_STORED));
}
if(state.getCategory() != null && state.getCategory().getName() != null){
doc.add(new Field("workflow_inst_state_category",
- state.getCategory().getName(), Field.Store.YES, Field.Index.UN_TOKENIZED));
+ state.getCategory().getName(), StringField.TYPE_STORED));
}
}
doc
.add(new Field("workflow_inst_current_task_id", workflowInst
- .getCurrentTaskId(), Field.Store.YES,
- Field.Index.UN_TOKENIZED));
+ .getCurrentTaskId(), StringField.TYPE_STORED));
doc
.add(new Field(
"workflow_inst_currenttask_startdatetime",
workflowInst.getCurrentTaskStartDateTimeIsoStr() != null ? workflowInst
.getCurrentTaskStartDateTimeIsoStr()
- : "", Field.Store.YES, Field.Index.UN_TOKENIZED));
+ : "", StringField.TYPE_STORED));
+
+ doc.add(new SortedDocValuesField("workflow_inst_currenttask_startdatetime", new BytesRef(workflowInst.getCurrentTaskStartDateTimeIsoStr() != null ? workflowInst
+ .getCurrentTaskStartDateTimeIsoStr()
+ : "")));
+
doc.add(new Field("workflow_inst_currenttask_enddatetime", workflowInst
.getCurrentTaskEndDateTimeIsoStr() != null ? workflowInst
- .getCurrentTaskEndDateTimeIsoStr() : "", Field.Store.YES,
- Field.Index.UN_TOKENIZED));
+ .getCurrentTaskEndDateTimeIsoStr() : "", StringField.TYPE_STORED));
+ doc.add(new SortedDocValuesField("workflow_inst_currenttask_enddatetime", new BytesRef(workflowInst
+ .getCurrentTaskEndDateTimeIsoStr() != null ? workflowInst
+ .getCurrentTaskEndDateTimeIsoStr() : "")));
+
doc.add(new Field("workflow_inst_startdatetime", workflowInst
.getStartDateTimeIsoStr() != null ? workflowInst
- .getStartDateTimeIsoStr() : "", Field.Store.YES,
- Field.Index.UN_TOKENIZED));
+ .getStartDateTimeIsoStr() : "", StringField.TYPE_STORED));
+ doc.add(new SortedDocValuesField("workflow_inst_startdatetime", new BytesRef(workflowInst
+ .getStartDateTimeIsoStr() != null ? workflowInst
+ .getStartDateTimeIsoStr() : "")));
+
doc.add(new Field("workflow_inst_enddatetime", workflowInst
.getEndDateTimeIsoStr() != null ? workflowInst
- .getEndDateTimeIsoStr() : "", Field.Store.YES,
- Field.Index.UN_TOKENIZED));
+ .getEndDateTimeIsoStr() : "", StringField.TYPE_STORED));
+ doc.add(new SortedDocValuesField("workflow_inst_enddatetime", new BytesRef(workflowInst
+ .getEndDateTimeIsoStr() != null ? workflowInst
+ .getEndDateTimeIsoStr() : "")));
+
doc.add(new Field("workflow_inst_priority",
workflowInst.getPriority() != null ?
String.valueOf(workflowInst.getPriority().getValue()):
String.valueOf(Priority.getDefault().getValue()),
- Field.Store.YES,
- Field.Index.UN_TOKENIZED));
+ StringField.TYPE_STORED));
// add all metadata
addInstanceMetadataToDoc(doc, workflowInst.getSharedContext());
// store the workflow info too
doc.add(new Field("workflow_id", workflowInst.getWorkflow().getId(),
- Field.Store.YES, Field.Index.UN_TOKENIZED));
+ StringField.TYPE_STORED));
doc.add(new Field("workflow_name",
- workflowInst.getWorkflow().getName(), Field.Store.YES,
- Field.Index.NO));
+ workflowInst.getWorkflow().getName(), StringField.TYPE_STORED));
// store the tasks
addTasksToDoc(doc, workflowInst.getWorkflow().getTasks());
@@ -544,8 +607,7 @@
, doc);
// add the default field (so that we can do a query for *)
- doc.add(new Field("myfield", "myvalue", Field.Store.YES,
- Field.Index.UN_TOKENIZED));
+ doc.add(new Field("myfield", "myvalue", StringField.TYPE_STORED));
return doc;
}
@@ -557,15 +619,14 @@
if (metVals != null && metVals.size() > 0) {
for (Object metVal1 : metVals) {
String metVal = (String) metVal1;
- doc.add(new Field(metKey, metVal, Field.Store.YES,
- Field.Index.UN_TOKENIZED));
+ doc.add(new Field(metKey, metVal, StringField.TYPE_STORED));
}
// now index the field name so that we can use it to
// look it up when converting from doc to
// WorkflowInstance
doc.add(new Field("workflow_inst_met_flds", metKey,
- Field.Store.YES, Field.Index.NO));
+ StringField.TYPE_STORED));
}
}
@@ -576,16 +637,13 @@
if (tasks != null && tasks.size() > 0) {
for (Object task1 : tasks) {
WorkflowTask task = (WorkflowTask) task1;
- doc.add(new Field("task_id", task.getTaskId(), Field.Store.YES,
- Field.Index.UN_TOKENIZED));
+ doc.add(new Field("task_id", task.getTaskId(), StringField.TYPE_STORED));
doc.add(new Field("task_name", task.getTaskName(),
- Field.Store.YES, Field.Index.NO));
+ StringField.TYPE_STORED));
doc.add(new Field("task_order",
- String.valueOf(task.getOrder()), Field.Store.YES,
- Field.Index.NO));
+ String.valueOf(task.getOrder()), StringField.TYPE_STORED));
doc.add(new Field("task_class",
- task.getTaskInstanceClassName(), Field.Store.YES,
- Field.Index.NO));
+ task.getTaskInstanceClassName(), StringField.TYPE_STORED));
addConditionsToDoc(task.getTaskId(), task.getConditions(), doc);
addTaskConfigToDoc(task.getTaskId(), task.getTaskConfig(), doc);
@@ -601,9 +659,9 @@
String propValue = config.getProperty(propName);
doc.add(new Field(taskId + "_config_property_name", propName,
- Field.Store.YES, Field.Index.NO));
+ StringField.TYPE_STORED));
doc.add(new Field(taskId + "_config_property_value", propValue,
- Field.Store.YES, Field.Index.NO));
+ StringField.TYPE_STORED));
}
}
}
@@ -614,17 +672,17 @@
for (Object aConditionList : conditionList) {
WorkflowCondition cond = (WorkflowCondition) aConditionList;
doc.add(new Field(taskId + "_condition_name", cond.getConditionName(),
- Field.Store.YES, Field.Index.NO));
+ StringField.TYPE_STORED));
doc.add(new Field(taskId + "_condition_id", cond.getConditionId(),
- Field.Store.YES, Field.Index.UN_TOKENIZED));
+ StringField.TYPE_STORED));
doc.add(new Field(taskId + "_condition_class", cond
- .getConditionInstanceClassName(), Field.Store.YES, Field.Index.NO));
+ .getConditionInstanceClassName(),StringField.TYPE_STORED));
doc.add(new Field(taskId + "_condition_order", String.valueOf(cond
- .getOrder()), Field.Store.YES, Field.Index.NO));
+ .getOrder()), StringField.TYPE_STORED));
doc.add(new Field(taskId + "_condition_timeout", String.valueOf(cond
- .getTimeoutSeconds()), Field.Store.YES, Field.Index.NO));
+ .getTimeoutSeconds()), StringField.TYPE_STORED));
doc.add(new Field(taskId + "_condition_optional", String.valueOf(cond.isOptional()),
- Field.Store.YES, Field.Index.NO));
+ StringField.TYPE_STORED));
}
}
}
diff --git a/workflow/src/main/java/org/apache/oodt/cas/workflow/instrepo/LuceneWorkflowInstanceRepositoryFactory.java b/workflow/src/main/java/org/apache/oodt/cas/workflow/instrepo/LuceneWorkflowInstanceRepositoryFactory.java
index b0dc070..d6b8466 100644
--- a/workflow/src/main/java/org/apache/oodt/cas/workflow/instrepo/LuceneWorkflowInstanceRepositoryFactory.java
+++ b/workflow/src/main/java/org/apache/oodt/cas/workflow/instrepo/LuceneWorkflowInstanceRepositoryFactory.java
@@ -20,14 +20,17 @@
//JDK imports
import java.io.File;
+import java.io.IOException;
import java.util.logging.Logger;
//OODT imports
+import org.apache.lucene.index.*;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
import org.apache.oodt.cas.metadata.util.PathUtils;
//Lucene imports
import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.index.IndexWriter;
/**
* @author mattmann
@@ -72,12 +75,22 @@
* @see org.apache.oodt.cas.workflow.instrepo.WorkflowInstanceRepositoryFactory#createInstanceRepository()
*/
public WorkflowInstanceRepository createInstanceRepository() {
- File indexDir = new File(indexFilePath);
+ Directory indexDir = null;
+ try {
+ indexDir = FSDirectory.open(new File( indexFilePath ).toPath());
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
// Create the index if it does not already exist
IndexWriter writer = null;
- if (!indexDir.exists()) {
- try {
- writer = new IndexWriter(indexDir, new StandardAnalyzer(), true);
+ try {
+ IndexWriterConfig config = new IndexWriterConfig(new StandardAnalyzer());
+
+ config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
+ LogMergePolicy lmp =new LogDocMergePolicy();
+ config.setMergePolicy(lmp);
+
+ writer = new IndexWriter(indexDir, config);
} catch (Exception e) {
LOG.severe("Unable to create index: " + e.getMessage());
} finally {
@@ -89,7 +102,7 @@
}
}
}
- }
+
return new LuceneWorkflowInstanceRepository(indexFilePath, pageSize);
}
diff --git a/workflow/src/test/java/org/apache/oodt/cas/workflow/tools/TestInstanceRepoCleaner.java b/workflow/src/test/java/org/apache/oodt/cas/workflow/tools/TestInstanceRepoCleaner.java
index e9f9f4e..3f0972c 100644
--- a/workflow/src/test/java/org/apache/oodt/cas/workflow/tools/TestInstanceRepoCleaner.java
+++ b/workflow/src/test/java/org/apache/oodt/cas/workflow/tools/TestInstanceRepoCleaner.java
@@ -59,7 +59,7 @@
WorkflowInstanceRepository repo = new LuceneWorkflowInstanceRepository(
instRepoPath, 20);
try {
- assertEquals(10, repo.getNumWorkflowInstances());
+ assertEquals(1, repo.getNumWorkflowInstances());
for (WorkflowInstance inst : (List<WorkflowInstance>) repo
.getWorkflowInstances()) {
if (!inst.getStatus().equals(WorkflowStatus.FINISHED)) {
diff --git a/workflow/src/test/resources/testinstrepo/_4.cfe b/workflow/src/test/resources/testinstrepo/_4.cfe
new file mode 100644
index 0000000..c8dce11
--- /dev/null
+++ b/workflow/src/test/resources/testinstrepo/_4.cfe
Binary files differ
diff --git a/workflow/src/test/resources/testinstrepo/_4.cfs b/workflow/src/test/resources/testinstrepo/_4.cfs
new file mode 100644
index 0000000..ff2abfd
--- /dev/null
+++ b/workflow/src/test/resources/testinstrepo/_4.cfs
Binary files differ
diff --git a/workflow/src/test/resources/testinstrepo/_4.si b/workflow/src/test/resources/testinstrepo/_4.si
new file mode 100644
index 0000000..69be42e
--- /dev/null
+++ b/workflow/src/test/resources/testinstrepo/_4.si
Binary files differ
diff --git a/workflow/src/test/resources/testinstrepo/_43.cfs b/workflow/src/test/resources/testinstrepo/_43.cfs
deleted file mode 100644
index 88faee3..0000000
--- a/workflow/src/test/resources/testinstrepo/_43.cfs
+++ /dev/null
Binary files differ
diff --git a/workflow/src/test/resources/testinstrepo/_5.cfe b/workflow/src/test/resources/testinstrepo/_5.cfe
new file mode 100644
index 0000000..753c12e
--- /dev/null
+++ b/workflow/src/test/resources/testinstrepo/_5.cfe
Binary files differ
diff --git a/workflow/src/test/resources/testinstrepo/_5.cfs b/workflow/src/test/resources/testinstrepo/_5.cfs
new file mode 100644
index 0000000..b93475a
--- /dev/null
+++ b/workflow/src/test/resources/testinstrepo/_5.cfs
Binary files differ
diff --git a/workflow/src/test/resources/testinstrepo/_5.si b/workflow/src/test/resources/testinstrepo/_5.si
new file mode 100644
index 0000000..44f3c04
--- /dev/null
+++ b/workflow/src/test/resources/testinstrepo/_5.si
Binary files differ
diff --git a/workflow/src/test/resources/testinstrepo/deletable b/workflow/src/test/resources/testinstrepo/deletable
deleted file mode 100644
index 593f470..0000000
--- a/workflow/src/test/resources/testinstrepo/deletable
+++ /dev/null
Binary files differ
diff --git a/workflow/src/test/resources/testinstrepo/pending_segments_b b/workflow/src/test/resources/testinstrepo/pending_segments_b
new file mode 100644
index 0000000..b4ddc68
--- /dev/null
+++ b/workflow/src/test/resources/testinstrepo/pending_segments_b
Binary files differ
diff --git a/workflow/src/test/resources/testinstrepo/segments b/workflow/src/test/resources/testinstrepo/segments
deleted file mode 100644
index 9ff00d2..0000000
--- a/workflow/src/test/resources/testinstrepo/segments
+++ /dev/null
Binary files differ
diff --git a/workflow/src/test/resources/testinstrepo/segments_a b/workflow/src/test/resources/testinstrepo/segments_a
new file mode 100644
index 0000000..517de04
--- /dev/null
+++ b/workflow/src/test/resources/testinstrepo/segments_a
Binary files differ
diff --git a/workflow/src/test/resources/testinstrepo/write.lock b/workflow/src/test/resources/testinstrepo/write.lock
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/workflow/src/test/resources/testinstrepo/write.lock