extras/indexing/src/main/java/org/apache/rya/indexing/accumulo/freetext/LuceneTokenizer.java - rya - Git at Google

 package org.apache.rya.indexing.accumulo.freetext;

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 import java.io.IOException;
 import java.io.StringReader;
 import java.util.SortedSet;
 import java.util.TreeSet;

 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;

 /**
  * A {@link Tokenizer} that delegates to Lucene functions
  */
 public class LuceneTokenizer implements Tokenizer {
 	private static final Analyzer ANALYZER = new StandardAnalyzer();

 	@Override
 	public SortedSet<String> tokenize(String string) {
 		SortedSet<String> set = new TreeSet<String>();
 		try (final TokenStream stream = ANALYZER.tokenStream(null, new StringReader(string))) {
 			stream.reset();
 			while (stream.incrementToken()) {
 				set.add(stream.getAttribute(CharTermAttribute.class).toString());
 			}
 		} catch (IOException e) {
 			// not thrown b/c we're using a string reader...
 			throw new RuntimeException(e);
 		}

 		return set;
 	}
 }
	package org.apache.rya.indexing.accumulo.freetext;

	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/

	import java.io.IOException;
	import java.io.StringReader;
	import java.util.SortedSet;
	import java.util.TreeSet;

	import org.apache.lucene.analysis.Analyzer;
	import org.apache.lucene.analysis.TokenStream;
	import org.apache.lucene.analysis.standard.StandardAnalyzer;
	import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;

	/**
	* A {@link Tokenizer} that delegates to Lucene functions
	*/
	public class LuceneTokenizer implements Tokenizer {
	private static final Analyzer ANALYZER = new StandardAnalyzer();

	@Override
	public SortedSet<String> tokenize(String string) {
	SortedSet<String> set = new TreeSet<String>();
	try (final TokenStream stream = ANALYZER.tokenStream(null, new StringReader(string))) {
	stream.reset();
	while (stream.incrementToken()) {
	set.add(stream.getAttribute(CharTermAttribute.class).toString());
	}
	} catch (IOException e) {
	// not thrown b/c we're using a string reader...
	throw new RuntimeException(e);
	}

	return set;
	}
	}