lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.lucene.benchmark.byTask.tasks;

 import java.io.Reader;
 import java.util.List;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
 import org.apache.lucene.benchmark.byTask.PerfRunData;
 import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.IndexableField;

 /**
  * Simple task to test performance of tokenizers. It just creates a token stream for each field of
  * the document and read all tokens out of that stream.
  */
 public class ReadTokensTask extends PerfTask {

   public ReadTokensTask(PerfRunData runData) {
     super(runData);
   }

   private int totalTokenCount = 0;

   // volatile data passed between setup(), doLogic(), tearDown().
   private Document doc = null;

   @Override
   public void setup() throws Exception {
     super.setup();
     DocMaker docMaker = getRunData().getDocMaker();
     doc = docMaker.makeDocument();
   }

   @Override
   protected String getLogMessage(int recsCount) {
     return "read " + recsCount + " docs; " + totalTokenCount + " tokens";
   }

   @Override
   public void tearDown() throws Exception {
     doc = null;
     super.tearDown();
   }

   @Override
   public int doLogic() throws Exception {
     List<IndexableField> fields = doc.getFields();
     Analyzer analyzer = getRunData().getAnalyzer();
     int tokenCount = 0;
     for (final IndexableField field : fields) {
       if (field.fieldType().indexOptions() == IndexOptions.NONE
           || field.fieldType().tokenized() == false) {
         continue;
       }

       final TokenStream stream = field.tokenStream(analyzer, null);
       // reset the TokenStream to the first token
       stream.reset();

       TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
       while (stream.incrementToken()) {
         termAtt.getBytesRef();
         tokenCount++;
       }
       stream.end();
       stream.close();
     }
     totalTokenCount += tokenCount;
     return tokenCount;
   }

   /* Simple StringReader that can be reset to a new string;
    * we use this when tokenizing the string value from a
    * Field. */
   ReusableStringReader stringReader = new ReusableStringReader();

   private static final class ReusableStringReader extends Reader {
     int upto;
     int left;
     String s;

     void init(String s) {
       this.s = s;
       left = s.length();
       this.upto = 0;
     }

     @Override
     public int read(char[] c) {
       return read(c, 0, c.length);
     }

     @Override
     public int read(char[] c, int off, int len) {
       if (left > len) {
         s.getChars(upto, upto + len, c, off);
         upto += len;
         left -= len;
         return len;
       } else if (0 == left) {
         return -1;
       } else {
         s.getChars(upto, upto + left, c, off);
         int r = left;
         left = 0;
         upto = s.length();
         return r;
       }
     }

     @Override
     public void close() {}
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.lucene.benchmark.byTask.tasks;

	import java.io.Reader;
	import java.util.List;
	import org.apache.lucene.analysis.Analyzer;
	import org.apache.lucene.analysis.TokenStream;
	import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
	import org.apache.lucene.benchmark.byTask.PerfRunData;
	import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
	import org.apache.lucene.document.Document;
	import org.apache.lucene.index.IndexOptions;
	import org.apache.lucene.index.IndexableField;

	/**
	* Simple task to test performance of tokenizers. It just creates a token stream for each field of
	* the document and read all tokens out of that stream.
	*/
	public class ReadTokensTask extends PerfTask {

	public ReadTokensTask(PerfRunData runData) {
	super(runData);
	}

	private int totalTokenCount = 0;

	// volatile data passed between setup(), doLogic(), tearDown().
	private Document doc = null;

	@Override
	public void setup() throws Exception {
	super.setup();
	DocMaker docMaker = getRunData().getDocMaker();
	doc = docMaker.makeDocument();
	}

	@Override
	protected String getLogMessage(int recsCount) {
	return "read " + recsCount + " docs; " + totalTokenCount + " tokens";
	}

	@Override
	public void tearDown() throws Exception {
	doc = null;
	super.tearDown();
	}

	@Override
	public int doLogic() throws Exception {
	List<IndexableField> fields = doc.getFields();
	Analyzer analyzer = getRunData().getAnalyzer();
	int tokenCount = 0;
	for (final IndexableField field : fields) {
	if (field.fieldType().indexOptions() == IndexOptions.NONE
	\|\| field.fieldType().tokenized() == false) {
	continue;
	}

	final TokenStream stream = field.tokenStream(analyzer, null);
	// reset the TokenStream to the first token
	stream.reset();

	TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
	while (stream.incrementToken()) {
	termAtt.getBytesRef();
	tokenCount++;
	}
	stream.end();
	stream.close();
	}
	totalTokenCount += tokenCount;
	return tokenCount;
	}

	/* Simple StringReader that can be reset to a new string;
	* we use this when tokenizing the string value from a
	* Field. */
	ReusableStringReader stringReader = new ReusableStringReader();

	private static final class ReusableStringReader extends Reader {
	int upto;
	int left;
	String s;

	void init(String s) {
	this.s = s;
	left = s.length();
	this.upto = 0;
	}

	@Override
	public int read(char[] c) {
	return read(c, 0, c.length);
	}

	@Override
	public int read(char[] c, int off, int len) {
	if (left > len) {
	s.getChars(upto, upto + len, c, off);
	upto += len;
	left -= len;
	return len;
	} else if (0 == left) {
	return -1;
	} else {
	s.getChars(upto, upto + left, c, off);
	int r = left;
	left = 0;
	upto = s.length();
	return r;
	}
	}

	@Override
	public void close() {}
	}
	}