solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.solr.schema;
 import java.io.IOException;
 import java.io.Reader;
 import java.io.StringReader;
 import java.lang.invoke.MethodHandles;
 import java.lang.reflect.Constructor;
 import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;

 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.queries.function.ValueSource;
 import org.apache.lucene.queries.function.valuesource.SortedSetFieldSource;
 import org.apache.lucene.search.SortField;
 import org.apache.lucene.search.SortedSetSelector;
 import org.apache.lucene.util.AttributeFactory;
 import org.apache.lucene.util.AttributeSource.State;
 import org.apache.lucene.util.AttributeSource;
 import org.apache.solr.analysis.SolrAnalyzer;
 import org.apache.solr.response.TextResponseWriter;
 import org.apache.solr.search.QParser;
 import org.apache.solr.uninverting.UninvertingReader.Type;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

 import static org.apache.solr.common.params.CommonParams.JSON;

 /**
  * Pre-analyzed field type provides a way to index a serialized token stream,
  * optionally with an independent stored value of a field.
  */
 public class PreAnalyzedField extends TextField implements HasImplicitIndexAnalyzer {
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());

   /** Init argument name. Value is a fully-qualified class name of the parser
    * that implements {@link PreAnalyzedParser}.
    */
   public static final String PARSER_IMPL = "parserImpl";

   private static final String DEFAULT_IMPL = JsonPreAnalyzedParser.class.getName();


   private PreAnalyzedParser parser;
   private PreAnalyzedAnalyzer preAnalyzer;

   @Override
   public void init(IndexSchema schema, Map<String, String> args) {
     super.init(schema, args);
     String implName = args.get(PARSER_IMPL);
     if (implName == null) {
       parser = new JsonPreAnalyzedParser();
     } else {
       // short name
       if (JSON.equalsIgnoreCase(implName)) {
         parser = new JsonPreAnalyzedParser();
       } else if ("simple".equalsIgnoreCase(implName)) {
         parser = new SimplePreAnalyzedParser();
       } else {
         try {
           Class<? extends PreAnalyzedParser> implClazz = schema.getSolrClassLoader().findClass(implName, PreAnalyzedParser.class);
           Constructor<?> c = implClazz.getConstructor(new Class<?>[0]);
           parser = (PreAnalyzedParser) c.newInstance(new Object[0]);
         } catch (Exception e) {
           log.warn("Can't use the configured PreAnalyzedParser class '{}', using defualt {}"
               , implName, DEFAULT_IMPL, e);
           parser = new JsonPreAnalyzedParser();
         }
       }
       args.remove(PARSER_IMPL);
     }
     // create Analyzer instance for reuse:
     preAnalyzer = new PreAnalyzedAnalyzer(parser);
   }

   /**
    * Overridden to return an analyzer consisting of a {@link PreAnalyzedTokenizer}.
    * NOTE: If an index analyzer is specified in the schema, it will be ignored.
    */
   @Override
   public Analyzer getIndexAnalyzer() {
     return preAnalyzer;
   }

   /**
    * Returns the query analyzer defined via the schema, unless there is none,
    * in which case the index-time pre-analyzer is returned.
    *
    * Note that if the schema specifies an index-time analyzer via either
    * {@code <analyzer>} or {@code <analyzer type="index">}, but no query-time
    * analyzer, the query analyzer returned here will be the index-time
    * analyzer specified in the schema rather than the pre-analyzer.
    */
   @Override
   public Analyzer getQueryAnalyzer() {
     Analyzer queryAnalyzer = super.getQueryAnalyzer();
     return queryAnalyzer instanceof FieldType.DefaultAnalyzer ? getIndexAnalyzer() : queryAnalyzer;
   }

   @Override
   public IndexableField createField(SchemaField field, Object value) {
     IndexableField f = null;
     try {
       f = fromString(field, String.valueOf(value));
     } catch (Exception e) {
       log.warn("Error parsing pre-analyzed field '{}'", field.getName(), e);
       return null;
     }
     return f;
   }

   @Override
   public SortField getSortField(SchemaField field, boolean top) {
     return getSortedSetSortField(field, SortedSetSelector.Type.MIN, top,
                                  SortField.STRING_FIRST, SortField.STRING_LAST);
   }

   @Override
   public ValueSource getValueSource(SchemaField field, QParser parser) {
     return new SortedSetFieldSource(field.getName());
   }

   @Override
   public Type getUninversionType(SchemaField sf) {
     return Type.SORTED_SET_BINARY;
   }

   @Override
   public void write(TextResponseWriter writer, String name, IndexableField f)
           throws IOException {
     writer.writeStr(name, toExternal(f), true);
   }

   /** Utility method to convert a field to a string that is parse-able by this
    * class.
    * @param f field to convert
    * @return string that is compatible with the serialization format
    * @throws IOException If there is a low-level I/O error.
    */
   public String toFormattedString(Field f) throws IOException {
     return parser.toFormattedString(f);
   }

   /**
    * Utility method to create a {@link org.apache.lucene.document.FieldType}
    * based on the {@link SchemaField}
    */
   public static org.apache.lucene.document.FieldType createFieldType(SchemaField field) {
     if (!field.indexed() && !field.stored()) {
       log.trace("Ignoring unindexed/unstored field: {}", field);
       return null;
     }
     org.apache.lucene.document.FieldType newType = new org.apache.lucene.document.FieldType();
     newType.setTokenized(field.isTokenized());
     newType.setStored(field.stored());
     newType.setOmitNorms(field.omitNorms());
     IndexOptions options = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
     if (field.omitTermFreqAndPositions()) {
       options = IndexOptions.DOCS;
     } else if (field.omitPositions()) {
       options = IndexOptions.DOCS_AND_FREQS;
     } else if (field.storeOffsetsWithPositions()) {
       options = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
     }
     newType.setIndexOptions(options);
     newType.setStoreTermVectors(field.storeTermVector());
     newType.setStoreTermVectorOffsets(field.storeTermOffsets());
     newType.setStoreTermVectorPositions(field.storeTermPositions());
     newType.setStoreTermVectorPayloads(field.storeTermPayloads());
     return newType;
   }

   /**
    * This is a simple holder of a stored part and the collected states (tokens with attributes).
    */
   public static class ParseResult {
     public String str;
     public byte[] bin;
     public List<State> states = new LinkedList<>();
   }

   /**
    * Parse the input and return the stored part and the tokens with attributes.
    */
   public static interface PreAnalyzedParser {
     /**
      * Parse input.
      * @param reader input to read from
      * @param parent parent who will own the resulting states (tokens with attributes)
      * @return parse result, with possibly null stored and/or states fields.
      * @throws IOException if a parsing error or IO error occurs
      */
     public ParseResult parse(Reader reader, AttributeSource parent) throws IOException;

     /**
      * Format a field so that the resulting String is valid for parsing with {@link #parse(Reader, AttributeSource)}.
      * @param f field instance
      * @return formatted string
      * @throws IOException If there is a low-level I/O error.
      */
     public String toFormattedString(Field f) throws IOException;
   }


   public IndexableField fromString(SchemaField field, String val) throws Exception {
     if (val == null || val.trim().length() == 0) {
       return null;
     }
     PreAnalyzedTokenizer parse = new PreAnalyzedTokenizer(parser);
     Reader reader = new StringReader(val);
     parse.setReader(reader);
     parse.decodeInput(reader); // consume
     parse.reset();
     org.apache.lucene.document.FieldType type = createFieldType(field);
     if (type == null) {
       parse.close();
       return null;
     }
     Field f = null;
     if (parse.getStringValue() != null) {
       if (field.stored()) {
         f = new Field(field.getName(), parse.getStringValue(), type);
       } else {
         type.setStored(false);
       }
     } else if (parse.getBinaryValue() != null) {
       if (field.isBinary()) {
         f = new Field(field.getName(), parse.getBinaryValue(), type);
       }
     } else {
       type.setStored(false);
     }

     if (parse.hasTokenStream()) {
       if (field.indexed()) {
         type.setTokenized(true);
         if (f != null) {
           f.setTokenStream(parse);
         } else {
           f = new Field(field.getName(), parse, type);
         }
       } else {
         if (f != null) {
           type.setIndexOptions(IndexOptions.NONE);
           type.setTokenized(false);
         }
       }
     }
     return f;
   }

   /**
    * Token stream that works from a list of saved states.
    */
   private static class PreAnalyzedTokenizer extends Tokenizer {
     private final List<AttributeSource.State> cachedStates = new LinkedList<>();
     private Iterator<AttributeSource.State> it = null;
     private String stringValue = null;
     private byte[] binaryValue = null;
     private PreAnalyzedParser parser;
     private IOException readerConsumptionException;
     private int lastEndOffset;

     public PreAnalyzedTokenizer(PreAnalyzedParser parser) {
       // we don't pack attributes: since we are used for (de)serialization and dont want bloat.
       super(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY);
       this.parser = parser;
     }

     public boolean hasTokenStream() {
       return !cachedStates.isEmpty();
     }

     public String getStringValue() {
       return stringValue;
     }

     public byte[] getBinaryValue() {
       return binaryValue;
     }

     @Override
     public final boolean incrementToken() {
       if (!it.hasNext()) {
         return false;
       }

       AttributeSource.State state = it.next();
       restoreState(state.clone());
       // TODO: why can't I lookup the OffsetAttribute up in ctor instead?
       lastEndOffset = addAttribute(OffsetAttribute.class).endOffset();
       return true;
     }

     /**
      * Throws a delayed exception if one was thrown from decodeInput()
      * while reading from the input reader.
      */
     @Override
     public final void reset() throws IOException {
       super.reset();
       if (readerConsumptionException != null) {
         IOException e = new IOException(readerConsumptionException);
         readerConsumptionException = null;
         throw e;
       }
       it = cachedStates.iterator();
     }

     @Override
     public void end() throws IOException {
       super.end();
       // we must set the end offset correctly so multi-valued fields don't try to send offsets backwards:
       addAttribute(OffsetAttribute.class).setOffset(lastEndOffset, lastEndOffset);
     }

     private void setReaderConsumptionException(IOException e) {
       readerConsumptionException = e;
     }

     /**
      * Parses the input reader and adds attributes specified there.
      */
     private void decodeInput(Reader reader) throws IOException {
       removeAllAttributes();  // reset attributes to the empty set
       cachedStates.clear();
       stringValue = null;
       binaryValue = null;
       try {
         ParseResult res = parser.parse(reader, this);
         if (res != null) {
           stringValue = res.str;
           binaryValue = res.bin;
           if (res.states != null) {
             cachedStates.addAll(res.states);
           }
         }
       } catch (IOException e) {
         removeAllAttributes();  // reset attributes to the empty set
         throw e; // rethrow
       }
     }
   }

   private static class PreAnalyzedAnalyzer extends SolrAnalyzer {
     private PreAnalyzedParser parser;

     PreAnalyzedAnalyzer(PreAnalyzedParser parser) {
       this.parser = parser;
     }

     @Override
     protected TokenStreamComponents createComponents(String fieldName) {
       final PreAnalyzedTokenizer tokenizer = new PreAnalyzedTokenizer(parser);
       return new TokenStreamComponents(r -> {
         try {
           tokenizer.decodeInput(r);
         } catch (IOException e) {
           tokenizer.setReaderConsumptionException(e);
         }
       }, tokenizer);
     }
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.solr.schema;
	import java.io.IOException;
	import java.io.Reader;
	import java.io.StringReader;
	import java.lang.invoke.MethodHandles;
	import java.lang.reflect.Constructor;
	import java.util.Iterator;
	import java.util.LinkedList;
	import java.util.List;
	import java.util.Map;

	import org.apache.lucene.analysis.Analyzer;
	import org.apache.lucene.analysis.Tokenizer;
	import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
	import org.apache.lucene.document.Field;
	import org.apache.lucene.index.IndexOptions;
	import org.apache.lucene.index.IndexableField;
	import org.apache.lucene.queries.function.ValueSource;
	import org.apache.lucene.queries.function.valuesource.SortedSetFieldSource;
	import org.apache.lucene.search.SortField;
	import org.apache.lucene.search.SortedSetSelector;
	import org.apache.lucene.util.AttributeFactory;
	import org.apache.lucene.util.AttributeSource.State;
	import org.apache.lucene.util.AttributeSource;
	import org.apache.solr.analysis.SolrAnalyzer;
	import org.apache.solr.response.TextResponseWriter;
	import org.apache.solr.search.QParser;
	import org.apache.solr.uninverting.UninvertingReader.Type;
	import org.slf4j.Logger;
	import org.slf4j.LoggerFactory;

	import static org.apache.solr.common.params.CommonParams.JSON;

	/**
	* Pre-analyzed field type provides a way to index a serialized token stream,
	* optionally with an independent stored value of a field.
	*/
	public class PreAnalyzedField extends TextField implements HasImplicitIndexAnalyzer {
	private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());

	/** Init argument name. Value is a fully-qualified class name of the parser
	* that implements {@link PreAnalyzedParser}.
	*/
	public static final String PARSER_IMPL = "parserImpl";

	private static final String DEFAULT_IMPL = JsonPreAnalyzedParser.class.getName();


	private PreAnalyzedParser parser;
	private PreAnalyzedAnalyzer preAnalyzer;

	@Override
	public void init(IndexSchema schema, Map<String, String> args) {
	super.init(schema, args);
	String implName = args.get(PARSER_IMPL);
	if (implName == null) {
	parser = new JsonPreAnalyzedParser();
	} else {
	// short name
	if (JSON.equalsIgnoreCase(implName)) {
	parser = new JsonPreAnalyzedParser();
	} else if ("simple".equalsIgnoreCase(implName)) {
	parser = new SimplePreAnalyzedParser();
	} else {
	try {
	Class<? extends PreAnalyzedParser> implClazz = schema.getSolrClassLoader().findClass(implName, PreAnalyzedParser.class);
	Constructor<?> c = implClazz.getConstructor(new Class<?>[0]);
	parser = (PreAnalyzedParser) c.newInstance(new Object[0]);
	} catch (Exception e) {
	log.warn("Can't use the configured PreAnalyzedParser class '{}', using defualt {}"
	, implName, DEFAULT_IMPL, e);
	parser = new JsonPreAnalyzedParser();
	}
	}
	args.remove(PARSER_IMPL);
	}
	// create Analyzer instance for reuse:
	preAnalyzer = new PreAnalyzedAnalyzer(parser);
	}

	/**
	* Overridden to return an analyzer consisting of a {@link PreAnalyzedTokenizer}.
	* NOTE: If an index analyzer is specified in the schema, it will be ignored.
	*/
	@Override
	public Analyzer getIndexAnalyzer() {
	return preAnalyzer;
	}

	/**
	* Returns the query analyzer defined via the schema, unless there is none,
	* in which case the index-time pre-analyzer is returned.
	*
	* Note that if the schema specifies an index-time analyzer via either
	* {@code <analyzer>} or {@code <analyzer type="index">}, but no query-time
	* analyzer, the query analyzer returned here will be the index-time
	* analyzer specified in the schema rather than the pre-analyzer.
	*/
	@Override
	public Analyzer getQueryAnalyzer() {
	Analyzer queryAnalyzer = super.getQueryAnalyzer();
	return queryAnalyzer instanceof FieldType.DefaultAnalyzer ? getIndexAnalyzer() : queryAnalyzer;
	}

	@Override
	public IndexableField createField(SchemaField field, Object value) {
	IndexableField f = null;
	try {
	f = fromString(field, String.valueOf(value));
	} catch (Exception e) {
	log.warn("Error parsing pre-analyzed field '{}'", field.getName(), e);
	return null;
	}
	return f;
	}

	@Override
	public SortField getSortField(SchemaField field, boolean top) {
	return getSortedSetSortField(field, SortedSetSelector.Type.MIN, top,
	SortField.STRING_FIRST, SortField.STRING_LAST);
	}

	@Override
	public ValueSource getValueSource(SchemaField field, QParser parser) {
	return new SortedSetFieldSource(field.getName());
	}

	@Override
	public Type getUninversionType(SchemaField sf) {
	return Type.SORTED_SET_BINARY;
	}

	@Override
	public void write(TextResponseWriter writer, String name, IndexableField f)
	throws IOException {
	writer.writeStr(name, toExternal(f), true);
	}

	/** Utility method to convert a field to a string that is parse-able by this
	* class.
	* @param f field to convert
	* @return string that is compatible with the serialization format
	* @throws IOException If there is a low-level I/O error.
	*/
	public String toFormattedString(Field f) throws IOException {
	return parser.toFormattedString(f);
	}

	/**
	* Utility method to create a {@link org.apache.lucene.document.FieldType}
	* based on the {@link SchemaField}
	*/
	public static org.apache.lucene.document.FieldType createFieldType(SchemaField field) {
	if (!field.indexed() && !field.stored()) {
	log.trace("Ignoring unindexed/unstored field: {}", field);
	return null;
	}
	org.apache.lucene.document.FieldType newType = new org.apache.lucene.document.FieldType();
	newType.setTokenized(field.isTokenized());
	newType.setStored(field.stored());
	newType.setOmitNorms(field.omitNorms());
	IndexOptions options = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
	if (field.omitTermFreqAndPositions()) {
	options = IndexOptions.DOCS;
	} else if (field.omitPositions()) {
	options = IndexOptions.DOCS_AND_FREQS;
	} else if (field.storeOffsetsWithPositions()) {
	options = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
	}
	newType.setIndexOptions(options);
	newType.setStoreTermVectors(field.storeTermVector());
	newType.setStoreTermVectorOffsets(field.storeTermOffsets());
	newType.setStoreTermVectorPositions(field.storeTermPositions());
	newType.setStoreTermVectorPayloads(field.storeTermPayloads());
	return newType;
	}

	/**
	* This is a simple holder of a stored part and the collected states (tokens with attributes).
	*/
	public static class ParseResult {
	public String str;
	public byte[] bin;
	public List<State> states = new LinkedList<>();
	}

	/**
	* Parse the input and return the stored part and the tokens with attributes.
	*/
	public static interface PreAnalyzedParser {
	/**
	* Parse input.
	* @param reader input to read from
	* @param parent parent who will own the resulting states (tokens with attributes)
	* @return parse result, with possibly null stored and/or states fields.
	* @throws IOException if a parsing error or IO error occurs
	*/
	public ParseResult parse(Reader reader, AttributeSource parent) throws IOException;

	/**
	* Format a field so that the resulting String is valid for parsing with {@link #parse(Reader, AttributeSource)}.
	* @param f field instance
	* @return formatted string
	* @throws IOException If there is a low-level I/O error.
	*/
	public String toFormattedString(Field f) throws IOException;
	}


	public IndexableField fromString(SchemaField field, String val) throws Exception {
	if (val == null \|\| val.trim().length() == 0) {
	return null;
	}
	PreAnalyzedTokenizer parse = new PreAnalyzedTokenizer(parser);
	Reader reader = new StringReader(val);
	parse.setReader(reader);
	parse.decodeInput(reader); // consume
	parse.reset();
	org.apache.lucene.document.FieldType type = createFieldType(field);
	if (type == null) {
	parse.close();
	return null;
	}
	Field f = null;
	if (parse.getStringValue() != null) {
	if (field.stored()) {
	f = new Field(field.getName(), parse.getStringValue(), type);
	} else {
	type.setStored(false);
	}
	} else if (parse.getBinaryValue() != null) {
	if (field.isBinary()) {
	f = new Field(field.getName(), parse.getBinaryValue(), type);
	}
	} else {
	type.setStored(false);
	}

	if (parse.hasTokenStream()) {
	if (field.indexed()) {
	type.setTokenized(true);
	if (f != null) {
	f.setTokenStream(parse);
	} else {
	f = new Field(field.getName(), parse, type);
	}
	} else {
	if (f != null) {
	type.setIndexOptions(IndexOptions.NONE);
	type.setTokenized(false);
	}
	}
	}
	return f;
	}

	/**
	* Token stream that works from a list of saved states.
	*/
	private static class PreAnalyzedTokenizer extends Tokenizer {
	private final List<AttributeSource.State> cachedStates = new LinkedList<>();
	private Iterator<AttributeSource.State> it = null;
	private String stringValue = null;
	private byte[] binaryValue = null;
	private PreAnalyzedParser parser;
	private IOException readerConsumptionException;
	private int lastEndOffset;

	public PreAnalyzedTokenizer(PreAnalyzedParser parser) {
	// we don't pack attributes: since we are used for (de)serialization and dont want bloat.
	super(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY);
	this.parser = parser;
	}

	public boolean hasTokenStream() {
	return !cachedStates.isEmpty();
	}

	public String getStringValue() {
	return stringValue;
	}

	public byte[] getBinaryValue() {
	return binaryValue;
	}

	@Override
	public final boolean incrementToken() {
	if (!it.hasNext()) {
	return false;
	}

	AttributeSource.State state = it.next();
	restoreState(state.clone());
	// TODO: why can't I lookup the OffsetAttribute up in ctor instead?
	lastEndOffset = addAttribute(OffsetAttribute.class).endOffset();
	return true;
	}

	/**
	* Throws a delayed exception if one was thrown from decodeInput()
	* while reading from the input reader.
	*/
	@Override
	public final void reset() throws IOException {
	super.reset();
	if (readerConsumptionException != null) {
	IOException e = new IOException(readerConsumptionException);
	readerConsumptionException = null;
	throw e;
	}
	it = cachedStates.iterator();
	}

	@Override
	public void end() throws IOException {
	super.end();
	// we must set the end offset correctly so multi-valued fields don't try to send offsets backwards:
	addAttribute(OffsetAttribute.class).setOffset(lastEndOffset, lastEndOffset);
	}

	private void setReaderConsumptionException(IOException e) {
	readerConsumptionException = e;
	}

	/**
	* Parses the input reader and adds attributes specified there.
	*/
	private void decodeInput(Reader reader) throws IOException {
	removeAllAttributes(); // reset attributes to the empty set
	cachedStates.clear();
	stringValue = null;
	binaryValue = null;
	try {
	ParseResult res = parser.parse(reader, this);
	if (res != null) {
	stringValue = res.str;
	binaryValue = res.bin;
	if (res.states != null) {
	cachedStates.addAll(res.states);
	}
	}
	} catch (IOException e) {
	removeAllAttributes(); // reset attributes to the empty set
	throw e; // rethrow
	}
	}
	}

	private static class PreAnalyzedAnalyzer extends SolrAnalyzer {
	private PreAnalyzedParser parser;

	PreAnalyzedAnalyzer(PreAnalyzedParser parser) {
	this.parser = parser;
	}

	@Override
	protected TokenStreamComponents createComponents(String fieldName) {
	final PreAnalyzedTokenizer tokenizer = new PreAnalyzedTokenizer(parser);
	return new TokenStreamComponents(r -> {
	try {
	tokenizer.decodeInput(r);
	} catch (IOException e) {
	tokenizer.setReaderConsumptionException(e);
	}
	}, tokenizer);
	}
	}
	}