lucene/luke/src/java/org/apache/lucene/luke/models/analysis/AnalysisImpl.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.lucene.luke.models.analysis;

 import java.io.IOException;
 import java.io.Reader;
 import java.io.StringReader;
 import java.lang.reflect.Modifier;
 import java.net.URL;
 import java.net.URLClassLoader;
 import java.nio.file.FileSystems;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.Iterator;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
 import java.util.Objects;
 import java.util.Set;
 import java.util.stream.Collectors;

 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.custom.CustomAnalyzer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.util.CharFilterFactory;
 import org.apache.lucene.analysis.util.TokenFilterFactory;
 import org.apache.lucene.analysis.util.TokenizerFactory;
 import org.apache.lucene.luke.models.LukeException;
 import org.apache.lucene.luke.util.reflection.ClassScanner;
 import org.apache.lucene.util.AttributeImpl;
 import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.IOUtils;

 /** Default implementation of {@link AnalysisImpl} */
 public final class AnalysisImpl implements Analysis {

   private List<Class<? extends Analyzer>> presetAnalyzerTypes;

   private Analyzer analyzer;

   @Override
   public void addExternalJars(List<String> jarFiles) {
     List<URL> urls = new ArrayList<>();

     for (String jarFile : jarFiles) {
       Path path = FileSystems.getDefault().getPath(jarFile);
       if (!Files.exists(path) || !jarFile.endsWith(".jar")) {
         throw new LukeException(String.format(Locale.ENGLISH, "Invalid jar file path: %s", jarFile));
       }
       try {
         URL url = path.toUri().toURL();
         urls.add(url);
       } catch (IOException e) {
         throw new LukeException(e.getMessage(), e);
       }
     }

     // reload available tokenizers, charfilters, and tokenfilters
     URLClassLoader classLoader = new URLClassLoader(
         urls.toArray(new URL[0]), this.getClass().getClassLoader());
     CharFilterFactory.reloadCharFilters(classLoader);
     TokenizerFactory.reloadTokenizers(classLoader);
     TokenFilterFactory.reloadTokenFilters(classLoader);
   }

   @Override
   public Collection<Class<? extends Analyzer>> getPresetAnalyzerTypes() {
     if (Objects.isNull(presetAnalyzerTypes)) {
       List<Class<? extends Analyzer>> types = new ArrayList<>();
       for (Class<? extends Analyzer> clazz : getInstantiableSubTypesBuiltIn(Analyzer.class)) {
         try {
           // add to presets if no args constructor is available
           clazz.getConstructor();
           types.add(clazz);
         } catch (NoSuchMethodException e) {
         }
       }
       presetAnalyzerTypes = Collections.unmodifiableList(types);
     }
     return presetAnalyzerTypes;
   }

   @Override
   public Collection<String> getAvailableCharFilters() {
     return CharFilterFactory.availableCharFilters().stream().sorted().collect(Collectors.toList());
   }

   @Override
   public Collection<String> getAvailableTokenizers() {
     return TokenizerFactory.availableTokenizers().stream().sorted().collect(Collectors.toList());
   }

   @Override
   public Collection<String> getAvailableTokenFilters() {
     return TokenFilterFactory.availableTokenFilters().stream().sorted().collect(Collectors.toList());
   }

   private <T> List<Class<? extends T>> getInstantiableSubTypesBuiltIn(Class<T> superType) {
     ClassScanner scanner = new ClassScanner("org.apache.lucene.analysis", getClass().getClassLoader());
     Set<Class<? extends T>> types = scanner.scanSubTypes(superType);
     return types.stream()
         .filter(type -> !Modifier.isAbstract(type.getModifiers()))
         .filter(type -> !type.getSimpleName().startsWith("Mock"))
         .sorted(Comparator.comparing(Class::getName))
         .collect(Collectors.toList());
   }

   @Override
   public List<Token> analyze(String text) {
     Objects.requireNonNull(text);

     if (analyzer == null) {
       throw new LukeException("Analyzer is not set.");
     }

     try {
       List<Token> result = new ArrayList<>();
       TokenStream stream = analyzer.tokenStream("", text);
       stream.reset();

       CharTermAttribute charAtt = stream.getAttribute(CharTermAttribute.class);

       // iterate tokens
       while (stream.incrementToken()) {
         List<TokenAttribute> attributes = copyAttributes(stream, charAtt);
         result.add(new Token(charAtt.toString(), attributes));
       }
       stream.close();

       return result;
     } catch (IOException e) {
       throw new LukeException(e.getMessage(), e);
     }
   }

   private List<TokenAttribute> copyAttributes(TokenStream tokenStream, CharTermAttribute charAtt) {
     List<TokenAttribute> attributes = new ArrayList<>();
     Iterator<AttributeImpl> itr = tokenStream.getAttributeImplsIterator();
     while(itr.hasNext()) {
       AttributeImpl att = itr.next();
       Map<String, String> attValues = new LinkedHashMap<>();
       att.reflectWith((attClass, key, value) -> {
         if (value != null)
           attValues.put(key, value.toString());
       });
       attributes.add(new TokenAttribute(att.getClass().getSimpleName(), attValues));
     }
     return attributes;
   }

   @Override
   public Analyzer createAnalyzerFromClassName(String analyzerType) {
     Objects.requireNonNull(analyzerType);

     try {
       Class<? extends Analyzer> clazz = Class.forName(analyzerType).asSubclass(Analyzer.class);
       this.analyzer = clazz.newInstance();
       return analyzer;
     } catch (ReflectiveOperationException e) {
       throw new LukeException(String.format(Locale.ENGLISH, "Failed to instantiate class: %s", analyzerType), e);
     }
   }

   @Override
   public Analyzer buildCustomAnalyzer(CustomAnalyzerConfig config) {
     Objects.requireNonNull(config);
     try {
       // create builder
       CustomAnalyzer.Builder builder = config.getConfigDir()
           .map(path -> CustomAnalyzer.builder(FileSystems.getDefault().getPath(path)))
           .orElse(CustomAnalyzer.builder());

       // set tokenizer
       builder.withTokenizer(config.getTokenizerConfig().getName(), config.getTokenizerConfig().getParams());

       // add char filters
       for (CustomAnalyzerConfig.ComponentConfig cfConf : config.getCharFilterConfigs()) {
         builder.addCharFilter(cfConf.getName(), cfConf.getParams());
       }

       // add token filters
       for (CustomAnalyzerConfig.ComponentConfig tfConf : config.getTokenFilterConfigs()) {
         builder.addTokenFilter(tfConf.getName(), tfConf.getParams());
       }

       // build analyzer
       this.analyzer = builder.build();
       return analyzer;
     } catch (Exception e) {
       throw new LukeException("Failed to build custom analyzer.", e);
     }
   }

   @Override
   public Analyzer currentAnalyzer() {
     if (analyzer == null) {
       throw new LukeException("Analyzer is not set.");
     }
     return analyzer;
   }

   @Override
   public StepByStepResult analyzeStepByStep(String text){
     Objects.requireNonNull(text);
     if (analyzer == null) {
       throw new LukeException("Analyzer is not set.");
     }

     if (!(analyzer instanceof CustomAnalyzer)) {
       throw new LukeException("Analyzer is not CustomAnalyzer.");
     }

     List<NamedTokens> namedTokens = new ArrayList<>();
     List<CharfilteredText> charfilteredTexts = new ArrayList<>();

     try {
       CustomAnalyzer customAnalyzer = (CustomAnalyzer)analyzer;
       final List<CharFilterFactory> charFilterFactories = customAnalyzer.getCharFilterFactories();
       Reader reader = new StringReader(text);
       String charFilteredSource = text;
       if (charFilterFactories.size() > 0) {
         Reader cs = reader;
         for (CharFilterFactory charFilterFactory : charFilterFactories) {
           cs = charFilterFactory.create(reader);
           Reader readerForWriteOut = new StringReader(charFilteredSource);
           readerForWriteOut = charFilterFactory.create(readerForWriteOut);
           charFilteredSource = writeCharStream(readerForWriteOut);
           charfilteredTexts.add(new CharfilteredText(CharFilterFactory.findSPIName(charFilterFactory.getClass()), charFilteredSource));
         }
         reader = cs;
       }

       final TokenizerFactory tokenizerFactory = customAnalyzer.getTokenizerFactory();
       final List<TokenFilterFactory> tokenFilterFactories = customAnalyzer.getTokenFilterFactories();

       TokenStream tokenStream = tokenizerFactory.create();
       ((Tokenizer)tokenStream).setReader(reader);
       List<Token> tokens = new ArrayList<>();
       List<AttributeSource> attributeSources = analyzeTokenStream(tokenStream, tokens);
       namedTokens.add(new NamedTokens(TokenizerFactory.findSPIName(tokenizerFactory.getClass()), tokens));

       ListBasedTokenStream listBasedTokenStream = new ListBasedTokenStream(tokenStream, attributeSources);
       for (TokenFilterFactory tokenFilterFactory : tokenFilterFactories) {
         tokenStream = tokenFilterFactory.create(listBasedTokenStream);
         tokens = new ArrayList<>();
         attributeSources = analyzeTokenStream(tokenStream, tokens);
         namedTokens.add(new NamedTokens(TokenFilterFactory.findSPIName(tokenFilterFactory.getClass()), tokens));
         try {
           listBasedTokenStream.close();
         } catch (IOException e) {
           // do nothing;
         }
         listBasedTokenStream = new ListBasedTokenStream(listBasedTokenStream, attributeSources);
       }
       try {
         listBasedTokenStream.close();
       } catch (IOException e) {
         // do nothing.
       } finally {
         reader.close();
       }
       return new StepByStepResult(charfilteredTexts, namedTokens);
     } catch (Exception e) {
       throw new LukeException(e.getMessage(), e);
     }
   }

   /**
    * Analyzes the given TokenStream, collecting the Tokens it produces.
    *
    * @param tokenStream TokenStream to analyze
    *
    * @return List of tokens produced from the TokenStream
    */
   private List<AttributeSource> analyzeTokenStream(TokenStream tokenStream, List<Token> result) {
     final List<AttributeSource> tokens = new ArrayList<>();
     try {
       tokenStream.reset();
       CharTermAttribute charAtt = tokenStream.getAttribute(CharTermAttribute.class);
       while (tokenStream.incrementToken()) {
         tokens.add(tokenStream.cloneAttributes());
         List<TokenAttribute> attributes = copyAttributes(tokenStream, charAtt);
         result.add(new Token(charAtt.toString(), attributes));
       }
       tokenStream.end();
     } catch (IOException ioe) {
       throw new RuntimeException("Error occurred while iterating over TokenStream", ioe);
     } finally {
       IOUtils.closeWhileHandlingException(tokenStream);
     }
     return tokens;
   }

   /**
    * TokenStream that iterates over a list of pre-existing Tokens
    * see org.apache.solr.handler.AnalysisRequestHandlerBase#ListBasedTokenStream
    */
   protected final static class ListBasedTokenStream extends TokenStream {
     private final List<AttributeSource> tokens;
     private Iterator<AttributeSource> tokenIterator;

     /**
      * Creates a new ListBasedTokenStream which uses the given tokens as its token source.
      *
      * @param attributeSource source of the attribute factory and attribute impls
      * @param tokens Source of tokens to be used
      */
     ListBasedTokenStream(AttributeSource attributeSource, List<AttributeSource> tokens) {
       super(attributeSource.getAttributeFactory());
       this.tokens = tokens;
       // Make sure all the attributes of the source are here too
       addAttributes(attributeSource);
     }

     @Override
     public void reset() throws IOException {
       super.reset();
       tokenIterator = tokens.iterator();
     }

     @Override
     public boolean incrementToken() {
       if (tokenIterator.hasNext()) {
         clearAttributes();
         AttributeSource next = tokenIterator.next();
         addAttributes(next);
         next.copyTo(this);
         return true;
       } else {
         return false;
       }
     }

     void addAttributes(AttributeSource attributeSource) {
       Iterator<AttributeImpl> atts = attributeSource.getAttributeImplsIterator();
       while (atts.hasNext()) {
         addAttributeImpl(atts.next()); // adds both impl & interfaces
       }
     }
   }

   private static String writeCharStream(Reader input ){
     final int BUFFER_SIZE = 1024;
     char[] buf = new char[BUFFER_SIZE];
     int len = 0;
     StringBuilder sb = new StringBuilder();
     do {
       try {
         len = input.read( buf, 0, BUFFER_SIZE );
       } catch (IOException e) {
         throw new RuntimeException("Error occurred while iterating over charfiltering", e);
       }
       if( len > 0 )
         sb.append(buf, 0, len);
     } while( len == BUFFER_SIZE );
     return sb.toString();
   }

 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package org.apache.lucene.luke.models.analysis;

	import java.io.IOException;
	import java.io.Reader;
	import java.io.StringReader;
	import java.lang.reflect.Modifier;
	import java.net.URL;
	import java.net.URLClassLoader;
	import java.nio.file.FileSystems;
	import java.nio.file.Files;
	import java.nio.file.Path;
	import java.util.ArrayList;
	import java.util.Collection;
	import java.util.Collections;
	import java.util.Comparator;
	import java.util.Iterator;
	import java.util.LinkedHashMap;
	import java.util.List;
	import java.util.Locale;
	import java.util.Map;
	import java.util.Objects;
	import java.util.Set;
	import java.util.stream.Collectors;

	import org.apache.lucene.analysis.Analyzer;
	import org.apache.lucene.analysis.TokenStream;
	import org.apache.lucene.analysis.Tokenizer;
	import org.apache.lucene.analysis.custom.CustomAnalyzer;
	import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
	import org.apache.lucene.analysis.util.CharFilterFactory;
	import org.apache.lucene.analysis.util.TokenFilterFactory;
	import org.apache.lucene.analysis.util.TokenizerFactory;
	import org.apache.lucene.luke.models.LukeException;
	import org.apache.lucene.luke.util.reflection.ClassScanner;
	import org.apache.lucene.util.AttributeImpl;
	import org.apache.lucene.util.AttributeSource;
	import org.apache.lucene.util.IOUtils;

	/** Default implementation of {@link AnalysisImpl} */
	public final class AnalysisImpl implements Analysis {

	private List<Class<? extends Analyzer>> presetAnalyzerTypes;

	private Analyzer analyzer;

	@Override
	public void addExternalJars(List<String> jarFiles) {
	List<URL> urls = new ArrayList<>();

	for (String jarFile : jarFiles) {
	Path path = FileSystems.getDefault().getPath(jarFile);
	if (!Files.exists(path) \|\| !jarFile.endsWith(".jar")) {
	throw new LukeException(String.format(Locale.ENGLISH, "Invalid jar file path: %s", jarFile));
	}
	try {
	URL url = path.toUri().toURL();
	urls.add(url);
	} catch (IOException e) {
	throw new LukeException(e.getMessage(), e);
	}
	}

	// reload available tokenizers, charfilters, and tokenfilters
	URLClassLoader classLoader = new URLClassLoader(
	urls.toArray(new URL[0]), this.getClass().getClassLoader());
	CharFilterFactory.reloadCharFilters(classLoader);
	TokenizerFactory.reloadTokenizers(classLoader);
	TokenFilterFactory.reloadTokenFilters(classLoader);
	}

	@Override
	public Collection<Class<? extends Analyzer>> getPresetAnalyzerTypes() {
	if (Objects.isNull(presetAnalyzerTypes)) {
	List<Class<? extends Analyzer>> types = new ArrayList<>();
	for (Class<? extends Analyzer> clazz : getInstantiableSubTypesBuiltIn(Analyzer.class)) {
	try {
	// add to presets if no args constructor is available
	clazz.getConstructor();
	types.add(clazz);
	} catch (NoSuchMethodException e) {
	}
	}
	presetAnalyzerTypes = Collections.unmodifiableList(types);
	}
	return presetAnalyzerTypes;
	}

	@Override
	public Collection<String> getAvailableCharFilters() {
	return CharFilterFactory.availableCharFilters().stream().sorted().collect(Collectors.toList());
	}

	@Override
	public Collection<String> getAvailableTokenizers() {
	return TokenizerFactory.availableTokenizers().stream().sorted().collect(Collectors.toList());
	}

	@Override
	public Collection<String> getAvailableTokenFilters() {
	return TokenFilterFactory.availableTokenFilters().stream().sorted().collect(Collectors.toList());
	}

	private <T> List<Class<? extends T>> getInstantiableSubTypesBuiltIn(Class<T> superType) {
	ClassScanner scanner = new ClassScanner("org.apache.lucene.analysis", getClass().getClassLoader());
	Set<Class<? extends T>> types = scanner.scanSubTypes(superType);
	return types.stream()
	.filter(type -> !Modifier.isAbstract(type.getModifiers()))
	.filter(type -> !type.getSimpleName().startsWith("Mock"))
	.sorted(Comparator.comparing(Class::getName))
	.collect(Collectors.toList());
	}

	@Override
	public List<Token> analyze(String text) {
	Objects.requireNonNull(text);

	if (analyzer == null) {
	throw new LukeException("Analyzer is not set.");
	}

	try {
	List<Token> result = new ArrayList<>();
	TokenStream stream = analyzer.tokenStream("", text);
	stream.reset();

	CharTermAttribute charAtt = stream.getAttribute(CharTermAttribute.class);

	// iterate tokens
	while (stream.incrementToken()) {
	List<TokenAttribute> attributes = copyAttributes(stream, charAtt);
	result.add(new Token(charAtt.toString(), attributes));
	}
	stream.close();

	return result;
	} catch (IOException e) {
	throw new LukeException(e.getMessage(), e);
	}
	}

	private List<TokenAttribute> copyAttributes(TokenStream tokenStream, CharTermAttribute charAtt) {
	List<TokenAttribute> attributes = new ArrayList<>();
	Iterator<AttributeImpl> itr = tokenStream.getAttributeImplsIterator();
	while(itr.hasNext()) {
	AttributeImpl att = itr.next();
	Map<String, String> attValues = new LinkedHashMap<>();
	att.reflectWith((attClass, key, value) -> {
	if (value != null)
	attValues.put(key, value.toString());
	});
	attributes.add(new TokenAttribute(att.getClass().getSimpleName(), attValues));
	}
	return attributes;
	}

	@Override
	public Analyzer createAnalyzerFromClassName(String analyzerType) {
	Objects.requireNonNull(analyzerType);

	try {
	Class<? extends Analyzer> clazz = Class.forName(analyzerType).asSubclass(Analyzer.class);
	this.analyzer = clazz.newInstance();
	return analyzer;
	} catch (ReflectiveOperationException e) {
	throw new LukeException(String.format(Locale.ENGLISH, "Failed to instantiate class: %s", analyzerType), e);
	}
	}

	@Override
	public Analyzer buildCustomAnalyzer(CustomAnalyzerConfig config) {
	Objects.requireNonNull(config);
	try {
	// create builder
	CustomAnalyzer.Builder builder = config.getConfigDir()
	.map(path -> CustomAnalyzer.builder(FileSystems.getDefault().getPath(path)))
	.orElse(CustomAnalyzer.builder());

	// set tokenizer
	builder.withTokenizer(config.getTokenizerConfig().getName(), config.getTokenizerConfig().getParams());

	// add char filters
	for (CustomAnalyzerConfig.ComponentConfig cfConf : config.getCharFilterConfigs()) {
	builder.addCharFilter(cfConf.getName(), cfConf.getParams());
	}

	// add token filters
	for (CustomAnalyzerConfig.ComponentConfig tfConf : config.getTokenFilterConfigs()) {
	builder.addTokenFilter(tfConf.getName(), tfConf.getParams());
	}

	// build analyzer
	this.analyzer = builder.build();
	return analyzer;
	} catch (Exception e) {
	throw new LukeException("Failed to build custom analyzer.", e);
	}
	}

	@Override
	public Analyzer currentAnalyzer() {
	if (analyzer == null) {
	throw new LukeException("Analyzer is not set.");
	}
	return analyzer;
	}

	@Override
	public StepByStepResult analyzeStepByStep(String text){
	Objects.requireNonNull(text);
	if (analyzer == null) {
	throw new LukeException("Analyzer is not set.");
	}

	if (!(analyzer instanceof CustomAnalyzer)) {
	throw new LukeException("Analyzer is not CustomAnalyzer.");
	}

	List<NamedTokens> namedTokens = new ArrayList<>();
	List<CharfilteredText> charfilteredTexts = new ArrayList<>();

	try {
	CustomAnalyzer customAnalyzer = (CustomAnalyzer)analyzer;
	final List<CharFilterFactory> charFilterFactories = customAnalyzer.getCharFilterFactories();
	Reader reader = new StringReader(text);
	String charFilteredSource = text;
	if (charFilterFactories.size() > 0) {
	Reader cs = reader;
	for (CharFilterFactory charFilterFactory : charFilterFactories) {
	cs = charFilterFactory.create(reader);
	Reader readerForWriteOut = new StringReader(charFilteredSource);
	readerForWriteOut = charFilterFactory.create(readerForWriteOut);
	charFilteredSource = writeCharStream(readerForWriteOut);
	charfilteredTexts.add(new CharfilteredText(CharFilterFactory.findSPIName(charFilterFactory.getClass()), charFilteredSource));
	}
	reader = cs;
	}

	final TokenizerFactory tokenizerFactory = customAnalyzer.getTokenizerFactory();
	final List<TokenFilterFactory> tokenFilterFactories = customAnalyzer.getTokenFilterFactories();

	TokenStream tokenStream = tokenizerFactory.create();
	((Tokenizer)tokenStream).setReader(reader);
	List<Token> tokens = new ArrayList<>();
	List<AttributeSource> attributeSources = analyzeTokenStream(tokenStream, tokens);
	namedTokens.add(new NamedTokens(TokenizerFactory.findSPIName(tokenizerFactory.getClass()), tokens));

	ListBasedTokenStream listBasedTokenStream = new ListBasedTokenStream(tokenStream, attributeSources);
	for (TokenFilterFactory tokenFilterFactory : tokenFilterFactories) {
	tokenStream = tokenFilterFactory.create(listBasedTokenStream);
	tokens = new ArrayList<>();
	attributeSources = analyzeTokenStream(tokenStream, tokens);
	namedTokens.add(new NamedTokens(TokenFilterFactory.findSPIName(tokenFilterFactory.getClass()), tokens));
	try {
	listBasedTokenStream.close();
	} catch (IOException e) {
	// do nothing;
	}
	listBasedTokenStream = new ListBasedTokenStream(listBasedTokenStream, attributeSources);
	}
	try {
	listBasedTokenStream.close();
	} catch (IOException e) {
	// do nothing.
	} finally {
	reader.close();
	}
	return new StepByStepResult(charfilteredTexts, namedTokens);
	} catch (Exception e) {
	throw new LukeException(e.getMessage(), e);
	}
	}

	/**
	* Analyzes the given TokenStream, collecting the Tokens it produces.
	*
	* @param tokenStream TokenStream to analyze
	*
	* @return List of tokens produced from the TokenStream
	*/
	private List<AttributeSource> analyzeTokenStream(TokenStream tokenStream, List<Token> result) {
	final List<AttributeSource> tokens = new ArrayList<>();
	try {
	tokenStream.reset();
	CharTermAttribute charAtt = tokenStream.getAttribute(CharTermAttribute.class);
	while (tokenStream.incrementToken()) {
	tokens.add(tokenStream.cloneAttributes());
	List<TokenAttribute> attributes = copyAttributes(tokenStream, charAtt);
	result.add(new Token(charAtt.toString(), attributes));
	}
	tokenStream.end();
	} catch (IOException ioe) {
	throw new RuntimeException("Error occurred while iterating over TokenStream", ioe);
	} finally {
	IOUtils.closeWhileHandlingException(tokenStream);
	}
	return tokens;
	}

	/**
	* TokenStream that iterates over a list of pre-existing Tokens
	* see org.apache.solr.handler.AnalysisRequestHandlerBase#ListBasedTokenStream
	*/
	protected final static class ListBasedTokenStream extends TokenStream {
	private final List<AttributeSource> tokens;
	private Iterator<AttributeSource> tokenIterator;

	/**
	* Creates a new ListBasedTokenStream which uses the given tokens as its token source.
	*
	* @param attributeSource source of the attribute factory and attribute impls
	* @param tokens Source of tokens to be used
	*/
	ListBasedTokenStream(AttributeSource attributeSource, List<AttributeSource> tokens) {
	super(attributeSource.getAttributeFactory());
	this.tokens = tokens;
	// Make sure all the attributes of the source are here too
	addAttributes(attributeSource);
	}

	@Override
	public void reset() throws IOException {
	super.reset();
	tokenIterator = tokens.iterator();
	}

	@Override
	public boolean incrementToken() {
	if (tokenIterator.hasNext()) {
	clearAttributes();
	AttributeSource next = tokenIterator.next();
	addAttributes(next);
	next.copyTo(this);
	return true;
	} else {
	return false;
	}
	}

	void addAttributes(AttributeSource attributeSource) {
	Iterator<AttributeImpl> atts = attributeSource.getAttributeImplsIterator();
	while (atts.hasNext()) {
	addAttributeImpl(atts.next()); // adds both impl & interfaces
	}
	}
	}

	private static String writeCharStream(Reader input ){
	final int BUFFER_SIZE = 1024;
	char[] buf = new char[BUFFER_SIZE];
	int len = 0;
	StringBuilder sb = new StringBuilder();
	do {
	try {
	len = input.read( buf, 0, BUFFER_SIZE );
	} catch (IOException e) {
	throw new RuntimeException("Error occurred while iterating over charfiltering", e);
	}
	if( len > 0 )
	sb.append(buf, 0, len);
	} while( len == BUFFER_SIZE );
	return sb.toString();
	}

	}