0.10/catalog/src/main/java/org/apache/oodt/cas/catalog/query/FreeTextQueryExpression.java - oodt - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.oodt.cas.catalog.query;

 //JDK imports
 import java.util.Arrays;
 import java.util.HashSet;
 import java.util.List;
 import java.util.StringTokenizer;
 import java.util.Vector;

 /**
  * @author woollard
  * @author bfoster
  * @version $Revision$
  *
  * <p>
  * A Free Text TermQueryExpression
  * </p>
  *
  */
 public class FreeTextQueryExpression extends TermQueryExpression {

 	private HashSet<String> noiseWordHash = new HashSet<String>(
 		Arrays.asList(new String[] { "a", "all", "am", "an", "and",
         "any", "are", "as", "at", "be", "but", "can", "did", "do", "does",
         "for", "from", "had", "has", "have", "here", "how", "i", "if",
         "in", "is", "it", "no", "not", "of", "on", "or", "so", "that",
         "the", "then", "there", "this", "to", "too", "up", "use", "what",
         "when", "where", "who", "why", "you" }));

     /**
      * A method for adding unparsed free text to the FreeTextCriteria. Free text
      * entered as a string is tokenized and punctuation and common words are
      * dropped before the values are added to the query. In order to query for
      * pre-parsed keywords, see the setValues method of this class.
      *
      * @param text
      *            The free text to be parsed and searched on.
      */
     public void addFreeText(String text) {
         // remove punctuation from the text
         text = text.replaceAll("\\p{Punct}+", "");

         // tokenize string using default delimiters
         StringTokenizer tok = new StringTokenizer(text);
         String token = null;

         // filter noise words and add to values vector
         List<String> values = new Vector<String>();
         while (tok.hasMoreElements()) {
             token = tok.nextToken();
             if (!noiseWordHash.contains(token))
                 values.add(token);
         }
         if (values.size() > 0) {
         	values.addAll(this.term.getValues());
         	this.term.setValues(values);
         }
     }

     /**
      * Implementation of the abstract method inherited from QueryCriteria for
      * generating a human-parsable string version of the query criteria. Note
      * that the returned String follows the Lucene query language.
      *
      * @return The query as a String.
      */
     public String toString() {
         String serial = "({" + this.bucketNames + "} " + this.term.getName() + " :|";
         for (String value : this.term.getValues())
             serial += "+" + value;
         serial += "|: )";
         return serial;
     }

 	@Override
 	public FreeTextQueryExpression clone() {
 		FreeTextQueryExpression ftQE = new FreeTextQueryExpression();
 		ftQE.noiseWordHash = new HashSet<String>(this.noiseWordHash);
 		ftQE.setTerm(this.term.clone());
 		ftQE.setBucketNames(this.bucketNames);
 		return ftQE;
 	}

 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.oodt.cas.catalog.query;

	//JDK imports
	import java.util.Arrays;
	import java.util.HashSet;
	import java.util.List;
	import java.util.StringTokenizer;
	import java.util.Vector;

	/**
	* @author woollard
	* @author bfoster
	* @version $Revision$
	*
	* <p>
	* A Free Text TermQueryExpression
	* </p>
	*
	*/
	public class FreeTextQueryExpression extends TermQueryExpression {

	private HashSet<String> noiseWordHash = new HashSet<String>(
	Arrays.asList(new String[] { "a", "all", "am", "an", "and",
	"any", "are", "as", "at", "be", "but", "can", "did", "do", "does",
	"for", "from", "had", "has", "have", "here", "how", "i", "if",
	"in", "is", "it", "no", "not", "of", "on", "or", "so", "that",
	"the", "then", "there", "this", "to", "too", "up", "use", "what",
	"when", "where", "who", "why", "you" }));

	/**
	* A method for adding unparsed free text to the FreeTextCriteria. Free text
	* entered as a string is tokenized and punctuation and common words are
	* dropped before the values are added to the query. In order to query for
	* pre-parsed keywords, see the setValues method of this class.
	*
	* @param text
	* The free text to be parsed and searched on.
	*/
	public void addFreeText(String text) {
	// remove punctuation from the text
	text = text.replaceAll("\\p{Punct}+", "");

	// tokenize string using default delimiters
	StringTokenizer tok = new StringTokenizer(text);
	String token = null;

	// filter noise words and add to values vector
	List<String> values = new Vector<String>();
	while (tok.hasMoreElements()) {
	token = tok.nextToken();
	if (!noiseWordHash.contains(token))
	values.add(token);
	}
	if (values.size() > 0) {
	values.addAll(this.term.getValues());
	this.term.setValues(values);
	}
	}

	/**
	* Implementation of the abstract method inherited from QueryCriteria for
	* generating a human-parsable string version of the query criteria. Note
	* that the returned String follows the Lucene query language.
	*
	* @return The query as a String.
	*/
	public String toString() {
	String serial = "({" + this.bucketNames + "} " + this.term.getName() + " :\|";
	for (String value : this.term.getValues())
	serial += "+" + value;
	serial += "\|: )";
	return serial;
	}

	@Override
	public FreeTextQueryExpression clone() {
	FreeTextQueryExpression ftQE = new FreeTextQueryExpression();
	ftQE.noiseWordHash = new HashSet<String>(this.noiseWordHash);
	ftQE.setTerm(this.term.clone());
	ftQE.setBucketNames(this.bucketNames);
	return ftQE;
	}

	}