lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AnalyzerFactoryTask.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.lucene.benchmark.byTask.tasks;

 import java.io.StreamTokenizer;
 import java.io.StringReader;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.regex.Pattern;
 import org.apache.lucene.analysis.AbstractAnalysisFactory;
 import org.apache.lucene.analysis.CharFilterFactory;
 import org.apache.lucene.analysis.TokenFilterFactory;
 import org.apache.lucene.analysis.TokenizerFactory;
 import org.apache.lucene.analysis.util.FilesystemResourceLoader;
 import org.apache.lucene.benchmark.byTask.PerfRunData;
 import org.apache.lucene.benchmark.byTask.utils.AnalyzerFactory;
 import org.apache.lucene.util.ResourceLoaderAware;
 import org.apache.lucene.util.Version;

 /**
  * Analyzer factory construction task. The name given to the constructed factory may be given to
  * NewAnalyzerTask, which will call AnalyzerFactory.create().
  *
  * <p>Params are in the form argname:argvalue or argname:"argvalue" or argname:'argvalue'; use
  * backslashes to escape '"' or "'" inside a quoted value when it's used as the enclosing quotation
  * mark,
  *
  * <p>Specify params in a comma separated list of the following, in order:
  *
  * <ol>
  *   <li>Analyzer args:
  *       <ul>
  *         <li><b>Required</b>: <code>name:<i>analyzer-factory-name</i></code>
  *         <li>Optional: <code>positionIncrementGap:<i>int value</i></code> (default: 0)
  *         <li>Optional: <code>offsetGap:<i>int value</i></code> (default: 1)
  *       </ul>
  *   <li>zero or more CharFilterFactory's, followed by
  *   <li>exactly one TokenizerFactory, followed by
  *   <li>zero or more TokenFilterFactory's
  * </ol>
  *
  * Each component analysis factory may specify <code>luceneMatchVersion</code> (defaults to {@link
  * Version#LATEST}) and any of the args understood by the specified *Factory class, in the
  * above-describe param format.
  *
  * <p>Example:
  *
  * <pre>
  *     -AnalyzerFactory(name:'strip html, fold to ascii, whitespace tokenize, max 10k tokens',
  *                      positionIncrementGap:100,
  *                      HTMLStripCharFilter,
  *                      MappingCharFilter(mapping:'mapping-FoldToASCII.txt'),
  *                      WhitespaceTokenizer(luceneMatchVersion:LUCENE_5_0_0),
  *                      TokenLimitFilter(maxTokenCount:10000, consumeAllTokens:false))
  *     [...]
  *     -NewAnalyzer('strip html, fold to ascii, whitespace tokenize, max 10k tokens')
  * </pre>
  *
  * <p>AnalyzerFactory will direct analysis component factories to look for resources under the
  * directory specified in the "work.dir" property.
  */
 public class AnalyzerFactoryTask extends PerfTask {
   private static final String LUCENE_ANALYSIS_PACKAGE_PREFIX = "org.apache.lucene.analysis.";
   private static final Pattern ANALYSIS_COMPONENT_SUFFIX_PATTERN =
       Pattern.compile("(?s:(?:(?:Token|Char)?Filter|Tokenizer)(?:Factory)?)$");
   private static final Pattern TRAILING_DOT_ZERO_PATTERN = Pattern.compile("\\.0$");

   private enum ArgType {
     ANALYZER_ARG,
     ANALYZER_ARG_OR_CHARFILTER_OR_TOKENIZER,
     TOKENFILTER
   }

   String factoryName = null;
   Integer positionIncrementGap = null;
   Integer offsetGap = null;
   private List<CharFilterFactory> charFilterFactories = new ArrayList<>();
   private TokenizerFactory tokenizerFactory = null;
   private List<TokenFilterFactory> tokenFilterFactories = new ArrayList<>();

   public AnalyzerFactoryTask(PerfRunData runData) {
     super(runData);
   }

   @Override
   public int doLogic() {
     return 1;
   }

   /**
    * Sets the params. Analysis component factory names may optionally include the "Factory" suffix.
    *
    * @param params analysis pipeline specification: name, (optional) positionIncrementGap,
    *     (optional) offsetGap, 0+ CharFilterFactory's, 1 TokenizerFactory, and 0+
    *     TokenFilterFactory's
    */
   @Override
   @SuppressWarnings("fallthrough")
   public void setParams(String params) {
     super.setParams(params);
     ArgType expectedArgType = ArgType.ANALYZER_ARG;

     final StreamTokenizer stok = new StreamTokenizer(new StringReader(params));
     stok.commentChar('#');
     stok.quoteChar('"');
     stok.quoteChar('\'');
     stok.eolIsSignificant(false);
     stok.ordinaryChar('(');
     stok.ordinaryChar(')');
     stok.ordinaryChar(':');
     stok.ordinaryChar(',');
     try {
       while (stok.nextToken() != StreamTokenizer.TT_EOF) {
         switch (stok.ttype) {
           case ',':
             {
               // Do nothing
               break;
             }
           case StreamTokenizer.TT_WORD:
             {
               if (expectedArgType.equals(ArgType.ANALYZER_ARG)) {
                 final String argName = stok.sval;
                 if (!argName.equalsIgnoreCase("name")
                     && !argName.equalsIgnoreCase("positionIncrementGap")
                     && !argName.equalsIgnoreCase("offsetGap")) {
                   throw new RuntimeException(
                       "Line #"
                           + lineno(stok)
                           + ": Missing 'name' param to AnalyzerFactory: '"
                           + params
                           + "'");
                 }
                 stok.nextToken();
                 if (stok.ttype != ':') {
                   throw new RuntimeException(
                       "Line #"
                           + lineno(stok)
                           + ": Missing ':' after '"
                           + argName
                           + "' param to AnalyzerFactory");
                 }

                 stok.nextToken();
                 String argValue = stok.sval;
                 switch (stok.ttype) {
                   case StreamTokenizer.TT_NUMBER:
                     {
                       argValue = Double.toString(stok.nval);
                       // Drop the ".0" from numbers, for integer arguments
                       argValue = TRAILING_DOT_ZERO_PATTERN.matcher(argValue).replaceFirst("");
                       // Intentional fallthrough
                     }
                   case '"':
                   case '\'':
                   case StreamTokenizer.TT_WORD:
                     {
                       if (argName.equalsIgnoreCase("name")) {
                         factoryName = argValue;
                         expectedArgType = ArgType.ANALYZER_ARG_OR_CHARFILTER_OR_TOKENIZER;
                       } else {
                         int intArgValue = 0;
                         try {
                           intArgValue = Integer.parseInt(argValue);
                         } catch (NumberFormatException e) {
                           throw new RuntimeException(
                               "Line #"
                                   + lineno(stok)
                                   + ": Exception parsing "
                                   + argName
                                   + " value '"
                                   + argValue
                                   + "'",
                               e);
                         }
                         if (argName.equalsIgnoreCase("positionIncrementGap")) {
                           positionIncrementGap = intArgValue;
                         } else if (argName.equalsIgnoreCase("offsetGap")) {
                           offsetGap = intArgValue;
                         }
                       }
                       break;
                     }
                   case StreamTokenizer.TT_EOF:
                     {
                       throw new RuntimeException("Unexpected EOF: " + stok.toString());
                     }
                   default:
                     {
                       throw new RuntimeException(
                           "Line #" + lineno(stok) + ": Unexpected token: " + stok.toString());
                     }
                 }
               } else if (expectedArgType.equals(ArgType.ANALYZER_ARG_OR_CHARFILTER_OR_TOKENIZER)) {
                 final String argName = stok.sval;

                 if (argName.equalsIgnoreCase("positionIncrementGap")
                     || argName.equalsIgnoreCase("offsetGap")) {
                   stok.nextToken();
                   if (stok.ttype != ':') {
                     throw new RuntimeException(
                         "Line #"
                             + lineno(stok)
                             + ": Missing ':' after '"
                             + argName
                             + "' param to AnalyzerFactory");
                   }
                   stok.nextToken();
                   int intArgValue = (int) stok.nval;
                   switch (stok.ttype) {
                     case '"':
                     case '\'':
                     case StreamTokenizer.TT_WORD:
                       {
                         intArgValue = 0;
                         try {
                           intArgValue = Integer.parseInt(stok.sval.trim());
                         } catch (NumberFormatException e) {
                           throw new RuntimeException(
                               "Line #"
                                   + lineno(stok)
                                   + ": Exception parsing "
                                   + argName
                                   + " value '"
                                   + stok.sval
                                   + "'",
                               e);
                         }
                         // Intentional fall-through
                       }
                     case StreamTokenizer.TT_NUMBER:
                       {
                         if (argName.equalsIgnoreCase("positionIncrementGap")) {
                           positionIncrementGap = intArgValue;
                         } else if (argName.equalsIgnoreCase("offsetGap")) {
                           offsetGap = intArgValue;
                         }
                         break;
                       }
                     case StreamTokenizer.TT_EOF:
                       {
                         throw new RuntimeException("Unexpected EOF: " + stok.toString());
                       }
                     default:
                       {
                         throw new RuntimeException(
                             "Line #" + lineno(stok) + ": Unexpected token: " + stok.toString());
                       }
                   }
                   break;
                 }
                 try {
                   final Class<? extends CharFilterFactory> clazz;
                   clazz = lookupAnalysisClass(argName, CharFilterFactory.class);
                   createAnalysisPipelineComponent(stok, clazz);
                 } catch (IllegalArgumentException e) {
                   try {
                     final Class<? extends TokenizerFactory> clazz;
                     clazz = lookupAnalysisClass(argName, TokenizerFactory.class);
                     createAnalysisPipelineComponent(stok, clazz);
                     expectedArgType = ArgType.TOKENFILTER;
                   } catch (IllegalArgumentException e2) {
                     throw new RuntimeException(
                         "Line #"
                             + lineno(stok)
                             + ": Can't find class '"
                             + argName
                             + "' as CharFilterFactory or TokenizerFactory");
                   }
                 }
               } else { // expectedArgType = ArgType.TOKENFILTER
                 final String className = stok.sval;
                 final Class<? extends TokenFilterFactory> clazz;
                 try {
                   clazz = lookupAnalysisClass(className, TokenFilterFactory.class);
                 } catch (IllegalArgumentException e) {
                   throw new RuntimeException(
                       "Line #"
                           + lineno(stok)
                           + ": Can't find class '"
                           + className
                           + "' as TokenFilterFactory");
                 }
                 createAnalysisPipelineComponent(stok, clazz);
               }
               break;
             }
           default:
             {
               throw new RuntimeException(
                   "Line #" + lineno(stok) + ": Unexpected token: " + stok.toString());
             }
         }
       }
     } catch (RuntimeException e) {
       if (e.getMessage().startsWith("Line #")) {
         throw e;
       } else {
         throw new RuntimeException("Line #" + lineno(stok) + ": ", e);
       }
     } catch (Throwable t) {
       throw new RuntimeException("Line #" + lineno(stok) + ": ", t);
     }

     final AnalyzerFactory analyzerFactory =
         new AnalyzerFactory(charFilterFactories, tokenizerFactory, tokenFilterFactories);
     analyzerFactory.setPositionIncrementGap(positionIncrementGap);
     analyzerFactory.setOffsetGap(offsetGap);
     getRunData().getAnalyzerFactories().put(factoryName, analyzerFactory);
   }

   /**
    * Instantiates the given analysis factory class after pulling params from the given stream
    * tokenizer, then stores the result in the appropriate pipeline component list.
    *
    * @param stok stream tokenizer from which to draw analysis factory params
    * @param clazz analysis factory class to instantiate
    */
   @SuppressWarnings("fallthrough")
   private void createAnalysisPipelineComponent(
       StreamTokenizer stok, Class<? extends AbstractAnalysisFactory> clazz) {
     Map<String, String> argMap = new HashMap<>();
     boolean parenthetical = false;
     try {
       WHILE_LOOP:
       while (stok.nextToken() != StreamTokenizer.TT_EOF) {
         switch (stok.ttype) {
           case ',':
             {
               if (parenthetical) {
                 // Do nothing
                 break;
               } else {
                 // Finished reading this analysis factory configuration
                 break WHILE_LOOP;
               }
             }
           case '(':
             {
               if (parenthetical) {
                 throw new RuntimeException(
                     "Line #" + lineno(stok) + ": Unexpected opening parenthesis.");
               }
               parenthetical = true;
               break;
             }
           case ')':
             {
               if (parenthetical) {
                 parenthetical = false;
               } else {
                 throw new RuntimeException(
                     "Line #" + lineno(stok) + ": Unexpected closing parenthesis.");
               }
               break;
             }
           case StreamTokenizer.TT_WORD:
             {
               if (!parenthetical) {
                 throw new RuntimeException(
                     "Line #" + lineno(stok) + ": Unexpected token '" + stok.sval + "'");
               }
               String argName = stok.sval;
               stok.nextToken();
               if (stok.ttype != ':') {
                 throw new RuntimeException(
                     "Line #"
                         + lineno(stok)
                         + ": Missing ':' after '"
                         + argName
                         + "' param to "
                         + clazz.getSimpleName());
               }
               stok.nextToken();
               String argValue = stok.sval;
               switch (stok.ttype) {
                 case StreamTokenizer.TT_NUMBER:
                   {
                     argValue = Double.toString(stok.nval);
                     // Drop the ".0" from numbers, for integer arguments
                     argValue = TRAILING_DOT_ZERO_PATTERN.matcher(argValue).replaceFirst("");
                     // Intentional fall-through
                   }
                 case '"':
                 case '\'':
                 case StreamTokenizer.TT_WORD:
                   {
                     argMap.put(argName, argValue);
                     break;
                   }
                 case StreamTokenizer.TT_EOF:
                   {
                     throw new RuntimeException("Unexpected EOF: " + stok.toString());
                   }
                 default:
                   {
                     throw new RuntimeException(
                         "Line #" + lineno(stok) + ": Unexpected token: " + stok.toString());
                   }
               }
             }
         }
       }
       if (!argMap.containsKey("luceneMatchVersion")) {
         argMap.put("luceneMatchVersion", Version.LATEST.toString());
       }
       final AbstractAnalysisFactory instance;
       try {
         instance = clazz.getConstructor(Map.class).newInstance(argMap);
       } catch (Exception e) {
         throw new RuntimeException("Line #" + lineno(stok) + ": ", e);
       }
       if (instance instanceof ResourceLoaderAware) {
         Path baseDir = Paths.get(getRunData().getConfig().get("work.dir", "work"));
         if (!Files.isDirectory(baseDir)) {
           baseDir = Paths.get(".");
         }
         ((ResourceLoaderAware) instance).inform(new FilesystemResourceLoader(baseDir));
       }
       if (CharFilterFactory.class.isAssignableFrom(clazz)) {
         charFilterFactories.add((CharFilterFactory) instance);
       } else if (TokenizerFactory.class.isAssignableFrom(clazz)) {
         tokenizerFactory = (TokenizerFactory) instance;
       } else if (TokenFilterFactory.class.isAssignableFrom(clazz)) {
         tokenFilterFactories.add((TokenFilterFactory) instance);
       }
     } catch (RuntimeException e) {
       if (e.getMessage().startsWith("Line #")) {
         throw (e);
       } else {
         throw new RuntimeException("Line #" + lineno(stok) + ": ", e);
       }
     } catch (Throwable t) {
       throw new RuntimeException("Line #" + lineno(stok) + ": ", t);
     }
   }

   /**
    * This method looks up a class with its fully qualified name (FQN), or a short-name
    * class-simplename, or with a package suffix, assuming "org.apache.lucene.analysis." as the
    * package prefix (e.g. "standard.ClassicTokenizerFactory" -&gt;
    * "org.apache.lucene.analysis.standard.ClassicTokenizerFactory").
    *
    * <p>If className contains a period, the class is first looked up as-is, assuming that it is an
    * FQN. If this fails, lookup is retried after prepending the Lucene analysis package prefix to
    * the class name.
    *
    * <p>If className does not contain a period, the analysis SPI *Factory.lookupClass() methods are
    * used to find the class.
    *
    * @param className The name or the short name of the class.
    * @param expectedType The superclass className is expected to extend
    * @return the loaded class.
    * @throws ClassNotFoundException if lookup fails
    */
   public <T> Class<? extends T> lookupAnalysisClass(String className, Class<T> expectedType)
       throws ClassNotFoundException {
     if (className.contains(".")) {
       try {
         // First, try className == FQN
         return Class.forName(className).asSubclass(expectedType);
       } catch (ClassNotFoundException e) {
         try {
           // Second, retry lookup after prepending the Lucene analysis package prefix
           return Class.forName(LUCENE_ANALYSIS_PACKAGE_PREFIX + className).asSubclass(expectedType);
         } catch (ClassNotFoundException e1) {
           throw new ClassNotFoundException(
               "Can't find class '"
                   + className
                   + "' or '"
                   + LUCENE_ANALYSIS_PACKAGE_PREFIX
                   + className
                   + "'");
         }
       }
     }
     // No dot - use analysis SPI lookup
     final String analysisComponentName =
         ANALYSIS_COMPONENT_SUFFIX_PATTERN.matcher(className).replaceFirst("");
     if (CharFilterFactory.class.isAssignableFrom(expectedType)) {
       return CharFilterFactory.lookupClass(analysisComponentName).asSubclass(expectedType);
     } else if (TokenizerFactory.class.isAssignableFrom(expectedType)) {
       return TokenizerFactory.lookupClass(analysisComponentName).asSubclass(expectedType);
     } else if (TokenFilterFactory.class.isAssignableFrom(expectedType)) {
       return TokenFilterFactory.lookupClass(analysisComponentName).asSubclass(expectedType);
     }

     throw new ClassNotFoundException("Can't find class '" + className + "'");
   }

   /* (non-Javadoc)
    * @see org.apache.lucene.benchmark.byTask.tasks.PerfTask#supportsParams()
    */
   @Override
   public boolean supportsParams() {
     return true;
   }

   /** Returns the current line in the algorithm file */
   public int lineno(StreamTokenizer stok) {
     return getAlgLineNum() + stok.lineno();
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.lucene.benchmark.byTask.tasks;

	import java.io.StreamTokenizer;
	import java.io.StringReader;
	import java.nio.file.Files;
	import java.nio.file.Path;
	import java.nio.file.Paths;
	import java.util.ArrayList;
	import java.util.HashMap;
	import java.util.List;
	import java.util.Map;
	import java.util.regex.Pattern;
	import org.apache.lucene.analysis.AbstractAnalysisFactory;
	import org.apache.lucene.analysis.CharFilterFactory;
	import org.apache.lucene.analysis.TokenFilterFactory;
	import org.apache.lucene.analysis.TokenizerFactory;
	import org.apache.lucene.analysis.util.FilesystemResourceLoader;
	import org.apache.lucene.benchmark.byTask.PerfRunData;
	import org.apache.lucene.benchmark.byTask.utils.AnalyzerFactory;
	import org.apache.lucene.util.ResourceLoaderAware;
	import org.apache.lucene.util.Version;

	/**
	* Analyzer factory construction task. The name given to the constructed factory may be given to
	* NewAnalyzerTask, which will call AnalyzerFactory.create().
	*
	* <p>Params are in the form argname:argvalue or argname:"argvalue" or argname:'argvalue'; use
	* backslashes to escape '"' or "'" inside a quoted value when it's used as the enclosing quotation
	* mark,
	*
	* <p>Specify params in a comma separated list of the following, in order:
	*
	* <ol>
	* <li>Analyzer args:
	* <ul>
	* <li><b>Required</b>: <code>name:<i>analyzer-factory-name</i></code>
	* <li>Optional: <code>positionIncrementGap:<i>int value</i></code> (default: 0)
	* <li>Optional: <code>offsetGap:<i>int value</i></code> (default: 1)
	* </ul>
	* <li>zero or more CharFilterFactory's, followed by
	* <li>exactly one TokenizerFactory, followed by
	* <li>zero or more TokenFilterFactory's
	* </ol>
	*
	* Each component analysis factory may specify <code>luceneMatchVersion</code> (defaults to {@link
	* Version#LATEST}) and any of the args understood by the specified *Factory class, in the
	* above-describe param format.
	*
	* <p>Example:
	*
	* <pre>
	* -AnalyzerFactory(name:'strip html, fold to ascii, whitespace tokenize, max 10k tokens',
	* positionIncrementGap:100,
	* HTMLStripCharFilter,
	* MappingCharFilter(mapping:'mapping-FoldToASCII.txt'),
	* WhitespaceTokenizer(luceneMatchVersion:LUCENE_5_0_0),
	* TokenLimitFilter(maxTokenCount:10000, consumeAllTokens:false))
	* [...]
	* -NewAnalyzer('strip html, fold to ascii, whitespace tokenize, max 10k tokens')
	* </pre>
	*
	* <p>AnalyzerFactory will direct analysis component factories to look for resources under the
	* directory specified in the "work.dir" property.
	*/
	public class AnalyzerFactoryTask extends PerfTask {
	private static final String LUCENE_ANALYSIS_PACKAGE_PREFIX = "org.apache.lucene.analysis.";
	private static final Pattern ANALYSIS_COMPONENT_SUFFIX_PATTERN =
	Pattern.compile("(?s:(?:(?:Token\|Char)?Filter\|Tokenizer)(?:Factory)?)$");
	private static final Pattern TRAILING_DOT_ZERO_PATTERN = Pattern.compile("\\.0$");

	private enum ArgType {
	ANALYZER_ARG,
	ANALYZER_ARG_OR_CHARFILTER_OR_TOKENIZER,
	TOKENFILTER
	}

	String factoryName = null;
	Integer positionIncrementGap = null;
	Integer offsetGap = null;
	private List<CharFilterFactory> charFilterFactories = new ArrayList<>();
	private TokenizerFactory tokenizerFactory = null;
	private List<TokenFilterFactory> tokenFilterFactories = new ArrayList<>();

	public AnalyzerFactoryTask(PerfRunData runData) {
	super(runData);
	}

	@Override
	public int doLogic() {
	return 1;
	}

	/**
	* Sets the params. Analysis component factory names may optionally include the "Factory" suffix.
	*
	* @param params analysis pipeline specification: name, (optional) positionIncrementGap,
	* (optional) offsetGap, 0+ CharFilterFactory's, 1 TokenizerFactory, and 0+
	* TokenFilterFactory's
	*/
	@Override
	@SuppressWarnings("fallthrough")
	public void setParams(String params) {
	super.setParams(params);
	ArgType expectedArgType = ArgType.ANALYZER_ARG;

	final StreamTokenizer stok = new StreamTokenizer(new StringReader(params));
	stok.commentChar('#');
	stok.quoteChar('"');
	stok.quoteChar('\'');
	stok.eolIsSignificant(false);
	stok.ordinaryChar('(');
	stok.ordinaryChar(')');
	stok.ordinaryChar(':');
	stok.ordinaryChar(',');
	try {
	while (stok.nextToken() != StreamTokenizer.TT_EOF) {
	switch (stok.ttype) {
	case ',':
	{
	// Do nothing
	break;
	}
	case StreamTokenizer.TT_WORD:
	{
	if (expectedArgType.equals(ArgType.ANALYZER_ARG)) {
	final String argName = stok.sval;
	if (!argName.equalsIgnoreCase("name")
	&& !argName.equalsIgnoreCase("positionIncrementGap")
	&& !argName.equalsIgnoreCase("offsetGap")) {
	throw new RuntimeException(
	"Line #"
	+ lineno(stok)
	+ ": Missing 'name' param to AnalyzerFactory: '"
	+ params
	+ "'");
	}
	stok.nextToken();
	if (stok.ttype != ':') {
	throw new RuntimeException(
	"Line #"
	+ lineno(stok)
	+ ": Missing ':' after '"
	+ argName
	+ "' param to AnalyzerFactory");
	}

	stok.nextToken();
	String argValue = stok.sval;
	switch (stok.ttype) {
	case StreamTokenizer.TT_NUMBER:
	{
	argValue = Double.toString(stok.nval);
	// Drop the ".0" from numbers, for integer arguments
	argValue = TRAILING_DOT_ZERO_PATTERN.matcher(argValue).replaceFirst("");
	// Intentional fallthrough
	}
	case '"':
	case '\'':
	case StreamTokenizer.TT_WORD:
	{
	if (argName.equalsIgnoreCase("name")) {
	factoryName = argValue;
	expectedArgType = ArgType.ANALYZER_ARG_OR_CHARFILTER_OR_TOKENIZER;
	} else {
	int intArgValue = 0;
	try {
	intArgValue = Integer.parseInt(argValue);
	} catch (NumberFormatException e) {
	throw new RuntimeException(
	"Line #"
	+ lineno(stok)
	+ ": Exception parsing "
	+ argName
	+ " value '"
	+ argValue
	+ "'",
	e);
	}
	if (argName.equalsIgnoreCase("positionIncrementGap")) {
	positionIncrementGap = intArgValue;
	} else if (argName.equalsIgnoreCase("offsetGap")) {
	offsetGap = intArgValue;
	}
	}
	break;
	}
	case StreamTokenizer.TT_EOF:
	{
	throw new RuntimeException("Unexpected EOF: " + stok.toString());
	}
	default:
	{
	throw new RuntimeException(
	"Line #" + lineno(stok) + ": Unexpected token: " + stok.toString());
	}
	}
	} else if (expectedArgType.equals(ArgType.ANALYZER_ARG_OR_CHARFILTER_OR_TOKENIZER)) {
	final String argName = stok.sval;

	if (argName.equalsIgnoreCase("positionIncrementGap")
	\|\| argName.equalsIgnoreCase("offsetGap")) {
	stok.nextToken();
	if (stok.ttype != ':') {
	throw new RuntimeException(
	"Line #"
	+ lineno(stok)
	+ ": Missing ':' after '"
	+ argName
	+ "' param to AnalyzerFactory");
	}
	stok.nextToken();
	int intArgValue = (int) stok.nval;
	switch (stok.ttype) {
	case '"':
	case '\'':
	case StreamTokenizer.TT_WORD:
	{
	intArgValue = 0;
	try {
	intArgValue = Integer.parseInt(stok.sval.trim());
	} catch (NumberFormatException e) {
	throw new RuntimeException(
	"Line #"
	+ lineno(stok)
	+ ": Exception parsing "
	+ argName
	+ " value '"
	+ stok.sval
	+ "'",
	e);
	}
	// Intentional fall-through
	}
	case StreamTokenizer.TT_NUMBER:
	{
	if (argName.equalsIgnoreCase("positionIncrementGap")) {
	positionIncrementGap = intArgValue;
	} else if (argName.equalsIgnoreCase("offsetGap")) {
	offsetGap = intArgValue;
	}
	break;
	}
	case StreamTokenizer.TT_EOF:
	{
	throw new RuntimeException("Unexpected EOF: " + stok.toString());
	}
	default:
	{
	throw new RuntimeException(
	"Line #" + lineno(stok) + ": Unexpected token: " + stok.toString());
	}
	}
	break;
	}
	try {
	final Class<? extends CharFilterFactory> clazz;
	clazz = lookupAnalysisClass(argName, CharFilterFactory.class);
	createAnalysisPipelineComponent(stok, clazz);
	} catch (IllegalArgumentException e) {
	try {
	final Class<? extends TokenizerFactory> clazz;
	clazz = lookupAnalysisClass(argName, TokenizerFactory.class);
	createAnalysisPipelineComponent(stok, clazz);
	expectedArgType = ArgType.TOKENFILTER;
	} catch (IllegalArgumentException e2) {
	throw new RuntimeException(
	"Line #"
	+ lineno(stok)
	+ ": Can't find class '"
	+ argName
	+ "' as CharFilterFactory or TokenizerFactory");
	}
	}
	} else { // expectedArgType = ArgType.TOKENFILTER
	final String className = stok.sval;
	final Class<? extends TokenFilterFactory> clazz;
	try {
	clazz = lookupAnalysisClass(className, TokenFilterFactory.class);
	} catch (IllegalArgumentException e) {
	throw new RuntimeException(
	"Line #"
	+ lineno(stok)
	+ ": Can't find class '"
	+ className
	+ "' as TokenFilterFactory");
	}
	createAnalysisPipelineComponent(stok, clazz);
	}
	break;
	}
	default:
	{
	throw new RuntimeException(
	"Line #" + lineno(stok) + ": Unexpected token: " + stok.toString());
	}
	}
	}
	} catch (RuntimeException e) {
	if (e.getMessage().startsWith("Line #")) {
	throw e;
	} else {
	throw new RuntimeException("Line #" + lineno(stok) + ": ", e);
	}
	} catch (Throwable t) {
	throw new RuntimeException("Line #" + lineno(stok) + ": ", t);
	}

	final AnalyzerFactory analyzerFactory =
	new AnalyzerFactory(charFilterFactories, tokenizerFactory, tokenFilterFactories);
	analyzerFactory.setPositionIncrementGap(positionIncrementGap);
	analyzerFactory.setOffsetGap(offsetGap);
	getRunData().getAnalyzerFactories().put(factoryName, analyzerFactory);
	}

	/**
	* Instantiates the given analysis factory class after pulling params from the given stream
	* tokenizer, then stores the result in the appropriate pipeline component list.
	*
	* @param stok stream tokenizer from which to draw analysis factory params
	* @param clazz analysis factory class to instantiate
	*/
	@SuppressWarnings("fallthrough")
	private void createAnalysisPipelineComponent(
	StreamTokenizer stok, Class<? extends AbstractAnalysisFactory> clazz) {
	Map<String, String> argMap = new HashMap<>();
	boolean parenthetical = false;
	try {
	WHILE_LOOP:
	while (stok.nextToken() != StreamTokenizer.TT_EOF) {
	switch (stok.ttype) {
	case ',':
	{
	if (parenthetical) {
	// Do nothing
	break;
	} else {
	// Finished reading this analysis factory configuration
	break WHILE_LOOP;
	}
	}
	case '(':
	{
	if (parenthetical) {
	throw new RuntimeException(
	"Line #" + lineno(stok) + ": Unexpected opening parenthesis.");
	}
	parenthetical = true;
	break;
	}
	case ')':
	{
	if (parenthetical) {
	parenthetical = false;
	} else {
	throw new RuntimeException(
	"Line #" + lineno(stok) + ": Unexpected closing parenthesis.");
	}
	break;
	}
	case StreamTokenizer.TT_WORD:
	{
	if (!parenthetical) {
	throw new RuntimeException(
	"Line #" + lineno(stok) + ": Unexpected token '" + stok.sval + "'");
	}
	String argName = stok.sval;
	stok.nextToken();
	if (stok.ttype != ':') {
	throw new RuntimeException(
	"Line #"
	+ lineno(stok)
	+ ": Missing ':' after '"
	+ argName
	+ "' param to "
	+ clazz.getSimpleName());
	}
	stok.nextToken();
	String argValue = stok.sval;
	switch (stok.ttype) {
	case StreamTokenizer.TT_NUMBER:
	{
	argValue = Double.toString(stok.nval);
	// Drop the ".0" from numbers, for integer arguments
	argValue = TRAILING_DOT_ZERO_PATTERN.matcher(argValue).replaceFirst("");
	// Intentional fall-through
	}
	case '"':
	case '\'':
	case StreamTokenizer.TT_WORD:
	{
	argMap.put(argName, argValue);
	break;
	}
	case StreamTokenizer.TT_EOF:
	{
	throw new RuntimeException("Unexpected EOF: " + stok.toString());
	}
	default:
	{
	throw new RuntimeException(
	"Line #" + lineno(stok) + ": Unexpected token: " + stok.toString());
	}
	}
	}
	}
	}
	if (!argMap.containsKey("luceneMatchVersion")) {
	argMap.put("luceneMatchVersion", Version.LATEST.toString());
	}
	final AbstractAnalysisFactory instance;
	try {
	instance = clazz.getConstructor(Map.class).newInstance(argMap);
	} catch (Exception e) {
	throw new RuntimeException("Line #" + lineno(stok) + ": ", e);
	}
	if (instance instanceof ResourceLoaderAware) {
	Path baseDir = Paths.get(getRunData().getConfig().get("work.dir", "work"));
	if (!Files.isDirectory(baseDir)) {
	baseDir = Paths.get(".");
	}
	((ResourceLoaderAware) instance).inform(new FilesystemResourceLoader(baseDir));
	}
	if (CharFilterFactory.class.isAssignableFrom(clazz)) {
	charFilterFactories.add((CharFilterFactory) instance);
	} else if (TokenizerFactory.class.isAssignableFrom(clazz)) {
	tokenizerFactory = (TokenizerFactory) instance;
	} else if (TokenFilterFactory.class.isAssignableFrom(clazz)) {
	tokenFilterFactories.add((TokenFilterFactory) instance);
	}
	} catch (RuntimeException e) {
	if (e.getMessage().startsWith("Line #")) {
	throw (e);
	} else {
	throw new RuntimeException("Line #" + lineno(stok) + ": ", e);
	}
	} catch (Throwable t) {
	throw new RuntimeException("Line #" + lineno(stok) + ": ", t);
	}
	}

	/**
	* This method looks up a class with its fully qualified name (FQN), or a short-name
	* class-simplename, or with a package suffix, assuming "org.apache.lucene.analysis." as the
	* package prefix (e.g. "standard.ClassicTokenizerFactory" ->
	* "org.apache.lucene.analysis.standard.ClassicTokenizerFactory").
	*
	* <p>If className contains a period, the class is first looked up as-is, assuming that it is an
	* FQN. If this fails, lookup is retried after prepending the Lucene analysis package prefix to
	* the class name.
	*
	* <p>If className does not contain a period, the analysis SPI *Factory.lookupClass() methods are
	* used to find the class.
	*
	* @param className The name or the short name of the class.
	* @param expectedType The superclass className is expected to extend
	* @return the loaded class.
	* @throws ClassNotFoundException if lookup fails
	*/
	public <T> Class<? extends T> lookupAnalysisClass(String className, Class<T> expectedType)
	throws ClassNotFoundException {
	if (className.contains(".")) {
	try {
	// First, try className == FQN
	return Class.forName(className).asSubclass(expectedType);
	} catch (ClassNotFoundException e) {
	try {
	// Second, retry lookup after prepending the Lucene analysis package prefix
	return Class.forName(LUCENE_ANALYSIS_PACKAGE_PREFIX + className).asSubclass(expectedType);
	} catch (ClassNotFoundException e1) {
	throw new ClassNotFoundException(
	"Can't find class '"
	+ className
	+ "' or '"
	+ LUCENE_ANALYSIS_PACKAGE_PREFIX
	+ className
	+ "'");
	}
	}
	}
	// No dot - use analysis SPI lookup
	final String analysisComponentName =
	ANALYSIS_COMPONENT_SUFFIX_PATTERN.matcher(className).replaceFirst("");
	if (CharFilterFactory.class.isAssignableFrom(expectedType)) {
	return CharFilterFactory.lookupClass(analysisComponentName).asSubclass(expectedType);
	} else if (TokenizerFactory.class.isAssignableFrom(expectedType)) {
	return TokenizerFactory.lookupClass(analysisComponentName).asSubclass(expectedType);
	} else if (TokenFilterFactory.class.isAssignableFrom(expectedType)) {
	return TokenFilterFactory.lookupClass(analysisComponentName).asSubclass(expectedType);
	}

	throw new ClassNotFoundException("Can't find class '" + className + "'");
	}

	/* (non-Javadoc)
	* @see org.apache.lucene.benchmark.byTask.tasks.PerfTask#supportsParams()
	*/
	@Override
	public boolean supportsParams() {
	return true;
	}

	/** Returns the current line in the algorithm file */
	public int lineno(StreamTokenizer stok) {
	return getAlgLineNum() + stok.lineno();
	}
	}