lucene/queryparser/src/java/org/apache/lucene/queryparser/surround/query/SrndTruncQuery.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.lucene.queryparser.surround.query;
 import org.apache.lucene.index.MultiTerms;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.StringHelper;
 import org.apache.lucene.index.IndexReader;

 import java.io.IOException;

 import java.util.regex.Pattern;
 import java.util.regex.Matcher;

 /**
  * Query that matches wildcards
  */
 public class SrndTruncQuery extends SimpleTerm {
   public SrndTruncQuery(String truncated, char unlimited, char mask) {
     super(false); /* not quoted */
     this.truncated = truncated;
     this.unlimited = unlimited;
     this.mask = mask;
     truncatedToPrefixAndPattern();
   }

   private final String truncated;
   private final char unlimited;
   private final char mask;

   private String prefix;
   private BytesRef prefixRef;
   private Pattern pattern;


   public String getTruncated() {return truncated;}

   @Override
   public String toStringUnquoted() {return getTruncated();}


   protected boolean matchingChar(char c) {
     return (c != unlimited) && (c != mask);
   }

   protected void appendRegExpForChar(char c, StringBuilder re) {
     if (c == unlimited)
       re.append(".*");
     else if (c == mask)
       re.append(".");
     else
       re.append(c);
   }

   protected void truncatedToPrefixAndPattern() {
     int i = 0;
     while ((i < truncated.length()) && matchingChar(truncated.charAt(i))) {
       i++;
     }
     prefix = truncated.substring(0, i);
     prefixRef = new BytesRef(prefix);

     StringBuilder re = new StringBuilder();
     while (i < truncated.length()) {
       appendRegExpForChar(truncated.charAt(i), re);
       i++;
     }
     pattern = Pattern.compile(re.toString());
   }

   @Override
   public void visitMatchingTerms(
     IndexReader reader,
     String fieldName,
     MatchingTermVisitor mtv) throws IOException
   {
     int prefixLength = prefix.length();
     Terms terms = MultiTerms.getTerms(reader, fieldName);
     if (terms != null) {
       Matcher matcher = pattern.matcher("");
       try {
         TermsEnum termsEnum = terms.iterator();

         TermsEnum.SeekStatus status = termsEnum.seekCeil(prefixRef);
         BytesRef text;
         if (status == TermsEnum.SeekStatus.FOUND) {
           text = prefixRef;
         } else if (status == TermsEnum.SeekStatus.NOT_FOUND) {
           text = termsEnum.term();
         } else {
           text = null;
         }

         while(text != null) {
           if (text != null && StringHelper.startsWith(text, prefixRef)) {
             String textString = text.utf8ToString();
             matcher.reset(textString.substring(prefixLength));
             if (matcher.matches()) {
               mtv.visitMatchingTerm(new Term(fieldName, textString));
             }
           } else {
             break;
           }
           text = termsEnum.next();
         }
       } finally {
         matcher.reset();
       }
     }
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.lucene.queryparser.surround.query;
	import org.apache.lucene.index.MultiTerms;
	import org.apache.lucene.index.Term;
	import org.apache.lucene.index.TermsEnum;
	import org.apache.lucene.index.Terms;
	import org.apache.lucene.util.BytesRef;
	import org.apache.lucene.util.StringHelper;
	import org.apache.lucene.index.IndexReader;

	import java.io.IOException;

	import java.util.regex.Pattern;
	import java.util.regex.Matcher;

	/**
	* Query that matches wildcards
	*/
	public class SrndTruncQuery extends SimpleTerm {
	public SrndTruncQuery(String truncated, char unlimited, char mask) {
	super(false); /* not quoted */
	this.truncated = truncated;
	this.unlimited = unlimited;
	this.mask = mask;
	truncatedToPrefixAndPattern();
	}

	private final String truncated;
	private final char unlimited;
	private final char mask;

	private String prefix;
	private BytesRef prefixRef;
	private Pattern pattern;


	public String getTruncated() {return truncated;}

	@Override
	public String toStringUnquoted() {return getTruncated();}


	protected boolean matchingChar(char c) {
	return (c != unlimited) && (c != mask);
	}

	protected void appendRegExpForChar(char c, StringBuilder re) {
	if (c == unlimited)
	re.append(".*");
	else if (c == mask)
	re.append(".");
	else
	re.append(c);
	}

	protected void truncatedToPrefixAndPattern() {
	int i = 0;
	while ((i < truncated.length()) && matchingChar(truncated.charAt(i))) {
	i++;
	}
	prefix = truncated.substring(0, i);
	prefixRef = new BytesRef(prefix);

	StringBuilder re = new StringBuilder();
	while (i < truncated.length()) {
	appendRegExpForChar(truncated.charAt(i), re);
	i++;
	}
	pattern = Pattern.compile(re.toString());
	}

	@Override
	public void visitMatchingTerms(
	IndexReader reader,
	String fieldName,
	MatchingTermVisitor mtv) throws IOException
	{
	int prefixLength = prefix.length();
	Terms terms = MultiTerms.getTerms(reader, fieldName);
	if (terms != null) {
	Matcher matcher = pattern.matcher("");
	try {
	TermsEnum termsEnum = terms.iterator();

	TermsEnum.SeekStatus status = termsEnum.seekCeil(prefixRef);
	BytesRef text;
	if (status == TermsEnum.SeekStatus.FOUND) {
	text = prefixRef;
	} else if (status == TermsEnum.SeekStatus.NOT_FOUND) {
	text = termsEnum.term();
	} else {
	text = null;
	}

	while(text != null) {
	if (text != null && StringHelper.startsWith(text, prefixRef)) {
	String textString = text.utf8ToString();
	matcher.reset(textString.substring(prefixLength));
	if (matcher.matches()) {
	mtv.visitMatchingTerm(new Term(fieldName, textString));
	}
	} else {
	break;
	}
	text = termsEnum.next();
	}
	} finally {
	matcher.reset();
	}
	}
	}
	}