lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AnalyzerFactoryTask.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.lucene.benchmark.byTask.tasks;


 import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
 import org.apache.lucene.analysis.util.CharFilterFactory;
 import org.apache.lucene.analysis.util.FilesystemResourceLoader;
 import org.apache.lucene.analysis.util.ResourceLoaderAware;
 import org.apache.lucene.analysis.util.TokenFilterFactory;
 import org.apache.lucene.analysis.util.TokenizerFactory;
 import org.apache.lucene.benchmark.byTask.PerfRunData;
 import org.apache.lucene.benchmark.byTask.utils.AnalyzerFactory;
 import org.apache.lucene.util.Version;

 import java.io.StreamTokenizer;
 import java.io.StringReader;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.regex.Pattern;

 /**
  * Analyzer factory construction task.  The name given to the constructed factory may
  * be given to NewAnalyzerTask, which will call AnalyzerFactory.create().
  *
  * Params are in the form argname:argvalue or argname:"argvalue" or argname:'argvalue';
  * use backslashes to escape '"' or "'" inside a quoted value when it's used as the enclosing
  * quotation mark,
  *
  * Specify params in a comma separated list of the following, in order:
  * <ol>
  *   <li>Analyzer args:
  *     <ul>
  *       <li><b>Required</b>: <code>name:<i>analyzer-factory-name</i></code></li>
  *       <li>Optional: <tt>positionIncrementGap:<i>int value</i></tt> (default: 0)</li>
  *       <li>Optional: <tt>offsetGap:<i>int value</i></tt> (default: 1)</li>
  *     </ul>
  *   </li>
  *   <li>zero or more CharFilterFactory's, followed by</li>
  *   <li>exactly one TokenizerFactory, followed by</li>
  *   <li>zero or more TokenFilterFactory's</li>
  * </ol>
  *
  * Each component analysis factory may specify <tt>luceneMatchVersion</tt> (defaults to
  * {@link Version#LATEST}) and any of the args understood by the specified
  * *Factory class, in the above-describe param format.
  * <p>
  * Example:
  * <pre>
  *     -AnalyzerFactory(name:'strip html, fold to ascii, whitespace tokenize, max 10k tokens',
  *                      positionIncrementGap:100,
  *                      HTMLStripCharFilter,
  *                      MappingCharFilter(mapping:'mapping-FoldToASCII.txt'),
  *                      WhitespaceTokenizer(luceneMatchVersion:LUCENE_5_0_0),
  *                      TokenLimitFilter(maxTokenCount:10000, consumeAllTokens:false))
  *     [...]
  *     -NewAnalyzer('strip html, fold to ascii, whitespace tokenize, max 10k tokens')
  * </pre>
  * <p>
  * AnalyzerFactory will direct analysis component factories to look for resources
  * under the directory specified in the "work.dir" property.
  */
 public class AnalyzerFactoryTask extends PerfTask {
   private static final String LUCENE_ANALYSIS_PACKAGE_PREFIX = "org.apache.lucene.analysis.";
   private static final Pattern ANALYSIS_COMPONENT_SUFFIX_PATTERN
       = Pattern.compile("(?s:(?:(?:Token|Char)?Filter|Tokenizer)(?:Factory)?)$");
   private static final Pattern TRAILING_DOT_ZERO_PATTERN = Pattern.compile("\\.0$");

   private enum ArgType {ANALYZER_ARG, ANALYZER_ARG_OR_CHARFILTER_OR_TOKENIZER, TOKENFILTER }

   String factoryName = null;
   Integer positionIncrementGap = null;
   Integer offsetGap = null;
   private List<CharFilterFactory> charFilterFactories = new ArrayList<>();
   private TokenizerFactory tokenizerFactory = null;
   private List<TokenFilterFactory> tokenFilterFactories = new ArrayList<>();

   public AnalyzerFactoryTask(PerfRunData runData) {
     super(runData);
   }

   @Override
   public int doLogic() {
     return 1;
   }

   /**
    * Sets the params.
    * Analysis component factory names may optionally include the "Factory" suffix.
    *
    * @param params analysis pipeline specification: name, (optional) positionIncrementGap,
    *               (optional) offsetGap, 0+ CharFilterFactory's, 1 TokenizerFactory,
    *               and 0+ TokenFilterFactory's
    */
   @Override
   @SuppressWarnings("fallthrough")
   public void setParams(String params) {
     super.setParams(params);
     ArgType expectedArgType = ArgType.ANALYZER_ARG;

     final StreamTokenizer stok = new StreamTokenizer(new StringReader(params));
     stok.commentChar('#');
     stok.quoteChar('"');
     stok.quoteChar('\'');
     stok.eolIsSignificant(false);
     stok.ordinaryChar('(');
     stok.ordinaryChar(')');
     stok.ordinaryChar(':');
     stok.ordinaryChar(',');
     try {
       while (stok.nextToken() != StreamTokenizer.TT_EOF) {
         switch (stok.ttype) {
           case ',': {
             // Do nothing
             break;
           }
           case StreamTokenizer.TT_WORD: {
             if (expectedArgType.equals(ArgType.ANALYZER_ARG)) {
               final String argName = stok.sval;
               if ( ! argName.equalsIgnoreCase("name")
                   && ! argName.equalsIgnoreCase("positionIncrementGap")
                   && ! argName.equalsIgnoreCase("offsetGap")) {
                 throw new RuntimeException
                     ("Line #" + lineno(stok) + ": Missing 'name' param to AnalyzerFactory: '" + params + "'");
               }
               stok.nextToken();
               if (stok.ttype != ':') {
                 throw new RuntimeException
                     ("Line #" + lineno(stok) + ": Missing ':' after '" + argName + "' param to AnalyzerFactory");
               }

               stok.nextToken();
               String argValue = stok.sval;
               switch (stok.ttype) {
                 case StreamTokenizer.TT_NUMBER: {
                   argValue = Double.toString(stok.nval);
                   // Drop the ".0" from numbers, for integer arguments
                   argValue = TRAILING_DOT_ZERO_PATTERN.matcher(argValue).replaceFirst("");
                   // Intentional fallthrough
                 }
                 case '"':
                 case '\'':
                 case StreamTokenizer.TT_WORD: {
                   if (argName.equalsIgnoreCase("name")) {
                     factoryName = argValue;
                     expectedArgType = ArgType.ANALYZER_ARG_OR_CHARFILTER_OR_TOKENIZER;
                   } else {
                     int intArgValue = 0;
                     try {
                       intArgValue = Integer.parseInt(argValue);
                     } catch (NumberFormatException e) {
                       throw new RuntimeException
                           ("Line #" + lineno(stok) + ": Exception parsing " + argName + " value '" + argValue + "'", e);
                     }
                     if (argName.equalsIgnoreCase("positionIncrementGap")) {
                       positionIncrementGap = intArgValue;
                     } else if (argName.equalsIgnoreCase("offsetGap")) {
                       offsetGap = intArgValue;
                     }
                   }
                   break;
                 }
                 case StreamTokenizer.TT_EOF: {
                   throw new RuntimeException("Unexpected EOF: " + stok.toString());
                 }
                 default: {
                   throw new RuntimeException
                       ("Line #" + lineno(stok) + ": Unexpected token: " + stok.toString());
                 }
               }
             } else if (expectedArgType.equals(ArgType.ANALYZER_ARG_OR_CHARFILTER_OR_TOKENIZER)) {
               final String argName = stok.sval;

               if (argName.equalsIgnoreCase("positionIncrementGap")
                   || argName.equalsIgnoreCase("offsetGap")) {
                 stok.nextToken();
                 if (stok.ttype != ':') {
                   throw new RuntimeException
                       ("Line #" + lineno(stok) + ": Missing ':' after '" + argName + "' param to AnalyzerFactory");
                 }
                 stok.nextToken();
                 int intArgValue = (int)stok.nval;
                 switch (stok.ttype) {
                   case '"':
                   case '\'':
                   case StreamTokenizer.TT_WORD: {
                     intArgValue = 0;
                     try {
                       intArgValue = Integer.parseInt(stok.sval.trim());
                     } catch (NumberFormatException e) {
                       throw new RuntimeException
                           ("Line #" + lineno(stok) + ": Exception parsing " + argName + " value '" + stok.sval + "'", e);
                     }
                     // Intentional fall-through
                   }
                   case StreamTokenizer.TT_NUMBER: {
                     if (argName.equalsIgnoreCase("positionIncrementGap")) {
                       positionIncrementGap = intArgValue;
                     } else if (argName.equalsIgnoreCase("offsetGap")) {
                       offsetGap = intArgValue;
                     }
                     break;
                   }
                   case StreamTokenizer.TT_EOF: {
                     throw new RuntimeException("Unexpected EOF: " + stok.toString());
                   }
                   default: {
                     throw new RuntimeException
                         ("Line #" + lineno(stok) + ": Unexpected token: " + stok.toString());
                   }
                 }
                 break;
               }
               try {
                 final Class<? extends CharFilterFactory> clazz;
                 clazz = lookupAnalysisClass(argName, CharFilterFactory.class);
                 createAnalysisPipelineComponent(stok, clazz);
               } catch (IllegalArgumentException e) {
                 try {
                   final Class<? extends TokenizerFactory> clazz;
                   clazz = lookupAnalysisClass(argName, TokenizerFactory.class);
                   createAnalysisPipelineComponent(stok, clazz);
                   expectedArgType = ArgType.TOKENFILTER;
                 } catch (IllegalArgumentException e2) {
                   throw new RuntimeException("Line #" + lineno(stok) + ": Can't find class '"
                                              + argName + "' as CharFilterFactory or TokenizerFactory");
                 }
               }
             } else { // expectedArgType = ArgType.TOKENFILTER
               final String className = stok.sval;
               final Class<? extends TokenFilterFactory> clazz;
               try {
                 clazz = lookupAnalysisClass(className, TokenFilterFactory.class);
               } catch (IllegalArgumentException e) {
                   throw new RuntimeException
                       ("Line #" + lineno(stok) + ": Can't find class '" + className + "' as TokenFilterFactory");
               }
               createAnalysisPipelineComponent(stok, clazz);
             }
             break;
           }
           default: {
             throw new RuntimeException("Line #" + lineno(stok) + ": Unexpected token: " + stok.toString());
           }
         }
       }
     } catch (RuntimeException e) {
       if (e.getMessage().startsWith("Line #")) {
         throw e;
       } else {
         throw new RuntimeException("Line #" + lineno(stok) + ": ", e);
       }
     } catch (Throwable t) {
       throw new RuntimeException("Line #" + lineno(stok) + ": ", t);
     }

     final AnalyzerFactory analyzerFactory = new AnalyzerFactory
         (charFilterFactories, tokenizerFactory, tokenFilterFactories);
     analyzerFactory.setPositionIncrementGap(positionIncrementGap);
     analyzerFactory.setOffsetGap(offsetGap);
     getRunData().getAnalyzerFactories().put(factoryName, analyzerFactory);
   }

   /**
    * Instantiates the given analysis factory class after pulling params from
    * the given stream tokenizer, then stores the result in the appropriate
    * pipeline component list.
    *
    * @param stok stream tokenizer from which to draw analysis factory params
    * @param clazz analysis factory class to instantiate
    */
   @SuppressWarnings("fallthrough")
   private void createAnalysisPipelineComponent
       (StreamTokenizer stok, Class<? extends AbstractAnalysisFactory> clazz) {
     Map<String,String> argMap = new HashMap<>();
     boolean parenthetical = false;
     try {
       WHILE_LOOP: while (stok.nextToken() != StreamTokenizer.TT_EOF) {
         switch (stok.ttype) {
           case ',': {
             if (parenthetical) {
               // Do nothing
               break;
             } else {
               // Finished reading this analysis factory configuration
               break WHILE_LOOP;
             }
           }
           case '(': {
             if (parenthetical) {
               throw new RuntimeException
                   ("Line #" + lineno(stok) + ": Unexpected opening parenthesis.");
             }
             parenthetical = true;
             break;
           }
           case ')': {
             if (parenthetical) {
               parenthetical = false;
             } else {
               throw new RuntimeException
                   ("Line #" + lineno(stok) + ": Unexpected closing parenthesis.");
             }
             break;
           }
           case StreamTokenizer.TT_WORD: {
             if ( ! parenthetical) {
               throw new RuntimeException("Line #" + lineno(stok) + ": Unexpected token '" + stok.sval + "'");
             }
             String argName = stok.sval;
             stok.nextToken();
             if (stok.ttype != ':') {
               throw new RuntimeException
                   ("Line #" + lineno(stok) + ": Missing ':' after '" + argName + "' param to " + clazz.getSimpleName());
             }
             stok.nextToken();
             String argValue = stok.sval;
             switch (stok.ttype) {
               case StreamTokenizer.TT_NUMBER: {
                   argValue = Double.toString(stok.nval);
                   // Drop the ".0" from numbers, for integer arguments
                   argValue = TRAILING_DOT_ZERO_PATTERN.matcher(argValue).replaceFirst("");
                   // Intentional fall-through
               }
               case '"':
               case '\'':
               case StreamTokenizer.TT_WORD: {
                 argMap.put(argName, argValue);
                 break;
               }
               case StreamTokenizer.TT_EOF: {
                 throw new RuntimeException("Unexpected EOF: " + stok.toString());
               }
               default: {
                 throw new RuntimeException
                     ("Line #" + lineno(stok) + ": Unexpected token: " + stok.toString());
               }
             }
           }
         }
       }
       if (!argMap.containsKey("luceneMatchVersion")) {
         argMap.put("luceneMatchVersion", Version.LATEST.toString());
       }
       final AbstractAnalysisFactory instance;
       try {
         instance = clazz.getConstructor(Map.class).newInstance(argMap);
       } catch (Exception e) {
         throw new RuntimeException("Line #" + lineno(stok) + ": ", e);
       }
       if (instance instanceof ResourceLoaderAware) {
         Path baseDir = Paths.get(getRunData().getConfig().get("work.dir", "work"));
         if (!Files.isDirectory(baseDir)) {
           baseDir = Paths.get(".");
         }
         ((ResourceLoaderAware)instance).inform(new FilesystemResourceLoader(baseDir));
       }
       if (CharFilterFactory.class.isAssignableFrom(clazz)) {
         charFilterFactories.add((CharFilterFactory)instance);
       } else if (TokenizerFactory.class.isAssignableFrom(clazz)) {
         tokenizerFactory = (TokenizerFactory)instance;
       } else if (TokenFilterFactory.class.isAssignableFrom(clazz)) {
         tokenFilterFactories.add((TokenFilterFactory)instance);
       }
     } catch (RuntimeException e) {
       if (e.getMessage().startsWith("Line #")) {
         throw (e);
       } else {
         throw new RuntimeException("Line #" + lineno(stok) + ": ", e);
       }
     } catch (Throwable t) {
       throw new RuntimeException("Line #" + lineno(stok) + ": ", t);
     }
   }

   /**
    * This method looks up a class with its fully qualified name (FQN), or a short-name
    * class-simplename, or with a package suffix, assuming "org.apache.lucene.analysis."
    * as the package prefix (e.g. "standard.ClassicTokenizerFactory" -&gt;
    * "org.apache.lucene.analysis.standard.ClassicTokenizerFactory").
    *
    * If className contains a period, the class is first looked up as-is, assuming that it
    * is an FQN.  If this fails, lookup is retried after prepending the Lucene analysis
    * package prefix to the class name.
    *
    * If className does not contain a period, the analysis SPI *Factory.lookupClass()
    * methods are used to find the class.
    *
    * @param className The name or the short name of the class.
    * @param expectedType The superclass className is expected to extend
    * @return the loaded class.
    * @throws ClassNotFoundException if lookup fails
    */
   public <T> Class<? extends T> lookupAnalysisClass(String className, Class<T> expectedType)
       throws ClassNotFoundException {
     if (className.contains(".")) {
       try {
         // First, try className == FQN
         return Class.forName(className).asSubclass(expectedType);
       } catch (ClassNotFoundException e) {
         try {
           // Second, retry lookup after prepending the Lucene analysis package prefix
           return Class.forName(LUCENE_ANALYSIS_PACKAGE_PREFIX + className).asSubclass(expectedType);
         } catch (ClassNotFoundException e1) {
           throw new ClassNotFoundException("Can't find class '" + className
                                            + "' or '" + LUCENE_ANALYSIS_PACKAGE_PREFIX + className + "'");
         }
       }
     }
     // No dot - use analysis SPI lookup
     final String analysisComponentName = ANALYSIS_COMPONENT_SUFFIX_PATTERN.matcher(className).replaceFirst("");
     if (CharFilterFactory.class.isAssignableFrom(expectedType)) {
       return CharFilterFactory.lookupClass(analysisComponentName).asSubclass(expectedType);
     } else if (TokenizerFactory.class.isAssignableFrom(expectedType)) {
       return TokenizerFactory.lookupClass(analysisComponentName).asSubclass(expectedType);
     } else if (TokenFilterFactory.class.isAssignableFrom(expectedType)) {
       return TokenFilterFactory.lookupClass(analysisComponentName).asSubclass(expectedType);
     }

     throw new ClassNotFoundException("Can't find class '" + className + "'");
   }


   /* (non-Javadoc)
    * @see org.apache.lucene.benchmark.byTask.tasks.PerfTask#supportsParams()
    */
   @Override
   public boolean supportsParams() {
     return true;
   }

   /** Returns the current line in the algorithm file */
   public int lineno(StreamTokenizer stok) {
     return getAlgLineNum() + stok.lineno();
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.lucene.benchmark.byTask.tasks;


	import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
	import org.apache.lucene.analysis.util.CharFilterFactory;
	import org.apache.lucene.analysis.util.FilesystemResourceLoader;
	import org.apache.lucene.analysis.util.ResourceLoaderAware;
	import org.apache.lucene.analysis.util.TokenFilterFactory;
	import org.apache.lucene.analysis.util.TokenizerFactory;
	import org.apache.lucene.benchmark.byTask.PerfRunData;
	import org.apache.lucene.benchmark.byTask.utils.AnalyzerFactory;
	import org.apache.lucene.util.Version;

	import java.io.StreamTokenizer;
	import java.io.StringReader;
	import java.nio.file.Files;
	import java.nio.file.Path;
	import java.nio.file.Paths;
	import java.util.ArrayList;
	import java.util.HashMap;
	import java.util.List;
	import java.util.Map;
	import java.util.regex.Pattern;

	/**
	* Analyzer factory construction task. The name given to the constructed factory may
	* be given to NewAnalyzerTask, which will call AnalyzerFactory.create().
	*
	* Params are in the form argname:argvalue or argname:"argvalue" or argname:'argvalue';
	* use backslashes to escape '"' or "'" inside a quoted value when it's used as the enclosing
	* quotation mark,
	*
	* Specify params in a comma separated list of the following, in order:
	* <ol>
	* <li>Analyzer args:
	* <ul>
	* <li><b>Required</b>: <code>name:<i>analyzer-factory-name</i></code></li>
	* <li>Optional: <tt>positionIncrementGap:<i>int value</i></tt> (default: 0)</li>
	* <li>Optional: <tt>offsetGap:<i>int value</i></tt> (default: 1)</li>
	* </ul>
	* </li>
	* <li>zero or more CharFilterFactory's, followed by</li>
	* <li>exactly one TokenizerFactory, followed by</li>
	* <li>zero or more TokenFilterFactory's</li>
	* </ol>
	*
	* Each component analysis factory may specify <tt>luceneMatchVersion</tt> (defaults to
	* {@link Version#LATEST}) and any of the args understood by the specified
	* *Factory class, in the above-describe param format.
	* <p>
	* Example:
	* <pre>
	* -AnalyzerFactory(name:'strip html, fold to ascii, whitespace tokenize, max 10k tokens',
	* positionIncrementGap:100,
	* HTMLStripCharFilter,
	* MappingCharFilter(mapping:'mapping-FoldToASCII.txt'),
	* WhitespaceTokenizer(luceneMatchVersion:LUCENE_5_0_0),
	* TokenLimitFilter(maxTokenCount:10000, consumeAllTokens:false))
	* [...]
	* -NewAnalyzer('strip html, fold to ascii, whitespace tokenize, max 10k tokens')
	* </pre>
	* <p>
	* AnalyzerFactory will direct analysis component factories to look for resources
	* under the directory specified in the "work.dir" property.
	*/
	public class AnalyzerFactoryTask extends PerfTask {
	private static final String LUCENE_ANALYSIS_PACKAGE_PREFIX = "org.apache.lucene.analysis.";
	private static final Pattern ANALYSIS_COMPONENT_SUFFIX_PATTERN
	= Pattern.compile("(?s:(?:(?:Token\|Char)?Filter\|Tokenizer)(?:Factory)?)$");
	private static final Pattern TRAILING_DOT_ZERO_PATTERN = Pattern.compile("\\.0$");

	private enum ArgType {ANALYZER_ARG, ANALYZER_ARG_OR_CHARFILTER_OR_TOKENIZER, TOKENFILTER }

	String factoryName = null;
	Integer positionIncrementGap = null;
	Integer offsetGap = null;
	private List<CharFilterFactory> charFilterFactories = new ArrayList<>();
	private TokenizerFactory tokenizerFactory = null;
	private List<TokenFilterFactory> tokenFilterFactories = new ArrayList<>();

	public AnalyzerFactoryTask(PerfRunData runData) {
	super(runData);
	}

	@Override
	public int doLogic() {
	return 1;
	}

	/**
	* Sets the params.
	* Analysis component factory names may optionally include the "Factory" suffix.
	*
	* @param params analysis pipeline specification: name, (optional) positionIncrementGap,
	* (optional) offsetGap, 0+ CharFilterFactory's, 1 TokenizerFactory,
	* and 0+ TokenFilterFactory's
	*/
	@Override
	@SuppressWarnings("fallthrough")
	public void setParams(String params) {
	super.setParams(params);
	ArgType expectedArgType = ArgType.ANALYZER_ARG;

	final StreamTokenizer stok = new StreamTokenizer(new StringReader(params));
	stok.commentChar('#');
	stok.quoteChar('"');
	stok.quoteChar('\'');
	stok.eolIsSignificant(false);
	stok.ordinaryChar('(');
	stok.ordinaryChar(')');
	stok.ordinaryChar(':');
	stok.ordinaryChar(',');
	try {
	while (stok.nextToken() != StreamTokenizer.TT_EOF) {
	switch (stok.ttype) {
	case ',': {
	// Do nothing
	break;
	}
	case StreamTokenizer.TT_WORD: {
	if (expectedArgType.equals(ArgType.ANALYZER_ARG)) {
	final String argName = stok.sval;
	if ( ! argName.equalsIgnoreCase("name")
	&& ! argName.equalsIgnoreCase("positionIncrementGap")
	&& ! argName.equalsIgnoreCase("offsetGap")) {
	throw new RuntimeException
	("Line #" + lineno(stok) + ": Missing 'name' param to AnalyzerFactory: '" + params + "'");
	}
	stok.nextToken();
	if (stok.ttype != ':') {
	throw new RuntimeException
	("Line #" + lineno(stok) + ": Missing ':' after '" + argName + "' param to AnalyzerFactory");
	}

	stok.nextToken();
	String argValue = stok.sval;
	switch (stok.ttype) {
	case StreamTokenizer.TT_NUMBER: {
	argValue = Double.toString(stok.nval);
	// Drop the ".0" from numbers, for integer arguments
	argValue = TRAILING_DOT_ZERO_PATTERN.matcher(argValue).replaceFirst("");
	// Intentional fallthrough
	}
	case '"':
	case '\'':
	case StreamTokenizer.TT_WORD: {
	if (argName.equalsIgnoreCase("name")) {
	factoryName = argValue;
	expectedArgType = ArgType.ANALYZER_ARG_OR_CHARFILTER_OR_TOKENIZER;
	} else {
	int intArgValue = 0;
	try {
	intArgValue = Integer.parseInt(argValue);
	} catch (NumberFormatException e) {
	throw new RuntimeException
	("Line #" + lineno(stok) + ": Exception parsing " + argName + " value '" + argValue + "'", e);
	}
	if (argName.equalsIgnoreCase("positionIncrementGap")) {
	positionIncrementGap = intArgValue;
	} else if (argName.equalsIgnoreCase("offsetGap")) {
	offsetGap = intArgValue;
	}
	}
	break;
	}
	case StreamTokenizer.TT_EOF: {
	throw new RuntimeException("Unexpected EOF: " + stok.toString());
	}
	default: {
	throw new RuntimeException
	("Line #" + lineno(stok) + ": Unexpected token: " + stok.toString());
	}
	}
	} else if (expectedArgType.equals(ArgType.ANALYZER_ARG_OR_CHARFILTER_OR_TOKENIZER)) {
	final String argName = stok.sval;

	if (argName.equalsIgnoreCase("positionIncrementGap")
	\|\| argName.equalsIgnoreCase("offsetGap")) {
	stok.nextToken();
	if (stok.ttype != ':') {
	throw new RuntimeException
	("Line #" + lineno(stok) + ": Missing ':' after '" + argName + "' param to AnalyzerFactory");
	}
	stok.nextToken();
	int intArgValue = (int)stok.nval;
	switch (stok.ttype) {
	case '"':
	case '\'':
	case StreamTokenizer.TT_WORD: {
	intArgValue = 0;
	try {
	intArgValue = Integer.parseInt(stok.sval.trim());
	} catch (NumberFormatException e) {
	throw new RuntimeException
	("Line #" + lineno(stok) + ": Exception parsing " + argName + " value '" + stok.sval + "'", e);
	}
	// Intentional fall-through
	}
	case StreamTokenizer.TT_NUMBER: {
	if (argName.equalsIgnoreCase("positionIncrementGap")) {
	positionIncrementGap = intArgValue;
	} else if (argName.equalsIgnoreCase("offsetGap")) {
	offsetGap = intArgValue;
	}
	break;
	}
	case StreamTokenizer.TT_EOF: {
	throw new RuntimeException("Unexpected EOF: " + stok.toString());
	}
	default: {
	throw new RuntimeException
	("Line #" + lineno(stok) + ": Unexpected token: " + stok.toString());
	}
	}
	break;
	}
	try {
	final Class<? extends CharFilterFactory> clazz;
	clazz = lookupAnalysisClass(argName, CharFilterFactory.class);
	createAnalysisPipelineComponent(stok, clazz);
	} catch (IllegalArgumentException e) {
	try {
	final Class<? extends TokenizerFactory> clazz;
	clazz = lookupAnalysisClass(argName, TokenizerFactory.class);
	createAnalysisPipelineComponent(stok, clazz);
	expectedArgType = ArgType.TOKENFILTER;
	} catch (IllegalArgumentException e2) {
	throw new RuntimeException("Line #" + lineno(stok) + ": Can't find class '"
	+ argName + "' as CharFilterFactory or TokenizerFactory");
	}
	}
	} else { // expectedArgType = ArgType.TOKENFILTER
	final String className = stok.sval;
	final Class<? extends TokenFilterFactory> clazz;
	try {
	clazz = lookupAnalysisClass(className, TokenFilterFactory.class);
	} catch (IllegalArgumentException e) {
	throw new RuntimeException
	("Line #" + lineno(stok) + ": Can't find class '" + className + "' as TokenFilterFactory");
	}
	createAnalysisPipelineComponent(stok, clazz);
	}
	break;
	}
	default: {
	throw new RuntimeException("Line #" + lineno(stok) + ": Unexpected token: " + stok.toString());
	}
	}
	}
	} catch (RuntimeException e) {
	if (e.getMessage().startsWith("Line #")) {
	throw e;
	} else {
	throw new RuntimeException("Line #" + lineno(stok) + ": ", e);
	}
	} catch (Throwable t) {
	throw new RuntimeException("Line #" + lineno(stok) + ": ", t);
	}

	final AnalyzerFactory analyzerFactory = new AnalyzerFactory
	(charFilterFactories, tokenizerFactory, tokenFilterFactories);
	analyzerFactory.setPositionIncrementGap(positionIncrementGap);
	analyzerFactory.setOffsetGap(offsetGap);
	getRunData().getAnalyzerFactories().put(factoryName, analyzerFactory);
	}

	/**
	* Instantiates the given analysis factory class after pulling params from
	* the given stream tokenizer, then stores the result in the appropriate
	* pipeline component list.
	*
	* @param stok stream tokenizer from which to draw analysis factory params
	* @param clazz analysis factory class to instantiate
	*/
	@SuppressWarnings("fallthrough")
	private void createAnalysisPipelineComponent
	(StreamTokenizer stok, Class<? extends AbstractAnalysisFactory> clazz) {
	Map<String,String> argMap = new HashMap<>();
	boolean parenthetical = false;
	try {
	WHILE_LOOP: while (stok.nextToken() != StreamTokenizer.TT_EOF) {
	switch (stok.ttype) {
	case ',': {
	if (parenthetical) {
	// Do nothing
	break;
	} else {
	// Finished reading this analysis factory configuration
	break WHILE_LOOP;
	}
	}
	case '(': {
	if (parenthetical) {
	throw new RuntimeException
	("Line #" + lineno(stok) + ": Unexpected opening parenthesis.");
	}
	parenthetical = true;
	break;
	}
	case ')': {
	if (parenthetical) {
	parenthetical = false;
	} else {
	throw new RuntimeException
	("Line #" + lineno(stok) + ": Unexpected closing parenthesis.");
	}
	break;
	}
	case StreamTokenizer.TT_WORD: {
	if ( ! parenthetical) {
	throw new RuntimeException("Line #" + lineno(stok) + ": Unexpected token '" + stok.sval + "'");
	}
	String argName = stok.sval;
	stok.nextToken();
	if (stok.ttype != ':') {
	throw new RuntimeException
	("Line #" + lineno(stok) + ": Missing ':' after '" + argName + "' param to " + clazz.getSimpleName());
	}
	stok.nextToken();
	String argValue = stok.sval;
	switch (stok.ttype) {
	case StreamTokenizer.TT_NUMBER: {
	argValue = Double.toString(stok.nval);
	// Drop the ".0" from numbers, for integer arguments
	argValue = TRAILING_DOT_ZERO_PATTERN.matcher(argValue).replaceFirst("");
	// Intentional fall-through
	}
	case '"':
	case '\'':
	case StreamTokenizer.TT_WORD: {
	argMap.put(argName, argValue);
	break;
	}
	case StreamTokenizer.TT_EOF: {
	throw new RuntimeException("Unexpected EOF: " + stok.toString());
	}
	default: {
	throw new RuntimeException
	("Line #" + lineno(stok) + ": Unexpected token: " + stok.toString());
	}
	}
	}
	}
	}
	if (!argMap.containsKey("luceneMatchVersion")) {
	argMap.put("luceneMatchVersion", Version.LATEST.toString());
	}
	final AbstractAnalysisFactory instance;
	try {
	instance = clazz.getConstructor(Map.class).newInstance(argMap);
	} catch (Exception e) {
	throw new RuntimeException("Line #" + lineno(stok) + ": ", e);
	}
	if (instance instanceof ResourceLoaderAware) {
	Path baseDir = Paths.get(getRunData().getConfig().get("work.dir", "work"));
	if (!Files.isDirectory(baseDir)) {
	baseDir = Paths.get(".");
	}
	((ResourceLoaderAware)instance).inform(new FilesystemResourceLoader(baseDir));
	}
	if (CharFilterFactory.class.isAssignableFrom(clazz)) {
	charFilterFactories.add((CharFilterFactory)instance);
	} else if (TokenizerFactory.class.isAssignableFrom(clazz)) {
	tokenizerFactory = (TokenizerFactory)instance;
	} else if (TokenFilterFactory.class.isAssignableFrom(clazz)) {
	tokenFilterFactories.add((TokenFilterFactory)instance);
	}
	} catch (RuntimeException e) {
	if (e.getMessage().startsWith("Line #")) {
	throw (e);
	} else {
	throw new RuntimeException("Line #" + lineno(stok) + ": ", e);
	}
	} catch (Throwable t) {
	throw new RuntimeException("Line #" + lineno(stok) + ": ", t);
	}
	}

	/**
	* This method looks up a class with its fully qualified name (FQN), or a short-name
	* class-simplename, or with a package suffix, assuming "org.apache.lucene.analysis."
	* as the package prefix (e.g. "standard.ClassicTokenizerFactory" ->
	* "org.apache.lucene.analysis.standard.ClassicTokenizerFactory").
	*
	* If className contains a period, the class is first looked up as-is, assuming that it
	* is an FQN. If this fails, lookup is retried after prepending the Lucene analysis
	* package prefix to the class name.
	*
	* If className does not contain a period, the analysis SPI *Factory.lookupClass()
	* methods are used to find the class.
	*
	* @param className The name or the short name of the class.
	* @param expectedType The superclass className is expected to extend
	* @return the loaded class.
	* @throws ClassNotFoundException if lookup fails
	*/
	public <T> Class<? extends T> lookupAnalysisClass(String className, Class<T> expectedType)
	throws ClassNotFoundException {
	if (className.contains(".")) {
	try {
	// First, try className == FQN
	return Class.forName(className).asSubclass(expectedType);
	} catch (ClassNotFoundException e) {
	try {
	// Second, retry lookup after prepending the Lucene analysis package prefix
	return Class.forName(LUCENE_ANALYSIS_PACKAGE_PREFIX + className).asSubclass(expectedType);
	} catch (ClassNotFoundException e1) {
	throw new ClassNotFoundException("Can't find class '" + className
	+ "' or '" + LUCENE_ANALYSIS_PACKAGE_PREFIX + className + "'");
	}
	}
	}
	// No dot - use analysis SPI lookup
	final String analysisComponentName = ANALYSIS_COMPONENT_SUFFIX_PATTERN.matcher(className).replaceFirst("");
	if (CharFilterFactory.class.isAssignableFrom(expectedType)) {
	return CharFilterFactory.lookupClass(analysisComponentName).asSubclass(expectedType);
	} else if (TokenizerFactory.class.isAssignableFrom(expectedType)) {
	return TokenizerFactory.lookupClass(analysisComponentName).asSubclass(expectedType);
	} else if (TokenFilterFactory.class.isAssignableFrom(expectedType)) {
	return TokenFilterFactory.lookupClass(analysisComponentName).asSubclass(expectedType);
	}

	throw new ClassNotFoundException("Can't find class '" + className + "'");
	}


	/* (non-Javadoc)
	* @see org.apache.lucene.benchmark.byTask.tasks.PerfTask#supportsParams()
	*/
	@Override
	public boolean supportsParams() {
	return true;
	}

	/** Returns the current line in the algorithm file */
	public int lineno(StreamTokenizer stok) {
	return getAlgLineNum() + stok.lineno();
	}
	}