src/java/org/apache/fulcrum/parser/DataStreamParser.java - turbine-fulcrum-parser - Git at Google

 package org.apache.fulcrum.parser;


 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */


 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.io.Reader;
 import java.io.StreamTokenizer;
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
 import java.util.NoSuchElementException;

 import org.apache.avalon.framework.logger.LogEnabled;
 import org.apache.avalon.framework.logger.Logger;

 /**
  * DataStreamParser is used to parse a stream with a fixed format and
  * generate ValueParser objects which can be used to extract the values
  * in the desired type.
  *
  * <p>The class itself is abstract - a concrete subclass which implements
  * the initTokenizer method such as CSVParser or TSVParser is required
  * to use the functionality.
  *
  * <p>The class implements the java.util.Iterator interface for convenience.
  * This allows simple use in a Velocity template for example:
  *
  * <pre>
  * #foreach ($row in $datastream)
  *   Name: $row.Name
  *   Description: $row.Description
  * #end
  * </pre>
  *
  * @author <a href="mailto:sean@informage.net">Sean Legassick</a>
  * @version $Id$
  */
 public abstract class DataStreamParser
     implements Iterator<ValueParser>, LogEnabled
 {
     /**
      * The list of column names.
      */
     private List<String>    columnNames;

     /**
      * The stream tokenizer for reading values from the input reader.
      */
     private final StreamTokenizer tokenizer;

     /**
      * The parameter parser holding the values of columns for the current line.
      */
     private ValueParser     lineValues;

     /**
      * Indicates whether or not the tokenizer has read anything yet.
      */
     private boolean         neverRead = true;

     /**
      * The character encoding of the input
      */
     private String          characterEncoding;

     /**
      * Logger to use
      */
     protected Logger log;

     /**
      * Create a new DataStreamParser instance. Requires a Reader to read the
      * comma-separated values from, a list of column names and a
      * character encoding.
      *
      * @param in the input reader.
      * @param columnNames a list of column names.
      * @param characterEncoding the character encoding of the input.
      */
     public DataStreamParser(Reader in, List<String> columnNames,
             String characterEncoding)
     {
         this.columnNames = columnNames;
         this.characterEncoding = characterEncoding;

         if (this.characterEncoding == null)
         {
             // try and get the characterEncoding from the reader
             this.characterEncoding = "US-ASCII";
             try
             {
                 this.characterEncoding = ((InputStreamReader)in).getEncoding();
             }
             catch (ClassCastException e)
             {
                 // ignore
             }
         }

         tokenizer = new StreamTokenizer(new BufferedReader(in));
         initTokenizer(tokenizer);
     }

     /**
      * Initialize the StreamTokenizer instance used to read the lines
      * from the input reader. This must be implemented in subclasses to
      * set up the tokenizing properties.
      *
      * @param tokenizer the StreamTokenizer to use
      */
     protected abstract void initTokenizer(StreamTokenizer tokenizer);

     /**
      * Provide a logger
      *
      * @see org.apache.avalon.framework.logger.LogEnabled#enableLogging(org.apache.avalon.framework.logger.Logger)
      */
     public void enableLogging(Logger logger)
     {
         this.log = logger.getChildLogger("DataStreamParser");
     }

     /**
      * Set the list of column names explicitly.
      *
      * @param columnNames A list of column names.
      */
     public void setColumnNames(List<String> columnNames)
     {
         this.columnNames = columnNames;
     }

     /**
      * Read the list of column names from the input reader using the
      * tokenizer.
      *
      * @exception IOException an IOException occurred.
      */
     public void readColumnNames()
         throws IOException
     {
         columnNames = new ArrayList<String>();

         neverRead = false;
         tokenizer.nextToken();
         while (tokenizer.ttype == StreamTokenizer.TT_WORD
                || tokenizer.ttype == '"')
         {
             columnNames.add(tokenizer.sval);
             tokenizer.nextToken();
         }
     }

     /**
      * Determine whether a further row of values exists in the input.
      *
      * @return true if the input has more rows.
      * @exception IOException an IOException occurred.
      */
     public boolean hasNextRow()
         throws IOException
     {
         // check for end of line ensures that an empty last line doesn't
         // give a false positive for hasNextRow
         if (neverRead || tokenizer.ttype == StreamTokenizer.TT_EOL)
         {
             tokenizer.nextToken();
             tokenizer.pushBack();
             neverRead = false;
         }
         return tokenizer.ttype != StreamTokenizer.TT_EOF;
     }

     /**
      * Returns a ValueParser object containing the next row of values.
      *
      * @return a ValueParser object.
      * @exception IOException an IOException occurred.
      * @exception NoSuchElementException there are no more rows in the input.
      */
     public ValueParser nextRow()
         throws IOException, NoSuchElementException
     {
         if (!hasNextRow())
         {
             throw new NoSuchElementException();
         }

         if (lineValues == null)
         {
             lineValues = new BaseValueParser(characterEncoding);
         }
         else
         {
             lineValues.clear();
         }

         Iterator<String> it = columnNames.iterator();
         tokenizer.nextToken();
         while (tokenizer.ttype == StreamTokenizer.TT_WORD
                || tokenizer.ttype == '"')
         {
             // note this means that if there are more values than
             // column names, the extra values are discarded.
             if (it.hasNext())
             {
                 String colname = it.next().toString();
                 String colval  = tokenizer.sval;
                 if (log.isDebugEnabled())
                 {
                     log.debug("DataStreamParser.nextRow(): " +
                               colname + '=' + colval);
                 }
                 lineValues.add(colname, colval);
             }
             tokenizer.nextToken();
         }

         return lineValues;
     }

     /**
      * Determine whether a further row of values exists in the input.
      *
      * @return true if the input has more rows.
      */
     public boolean hasNext()
     {
         boolean hasNext = false;

         try
         {
             hasNext = hasNextRow();
         }
         catch (IOException e)
         {
             log.error("IOException in CSVParser.hasNext", e);
         }

         return hasNext;
     }

     /**
      * Returns a ValueParser object containing the next row of values.
      *
      * @return a ValueParser object as an Object.
      * @exception NoSuchElementException there are no more rows in the input
      *                                   or an IOException occurred.
      */
     public ValueParser next()
         throws NoSuchElementException
     {
         ValueParser nextRow = null;

         try
         {
             nextRow = nextRow();
         }
         catch (IOException e)
         {
             log.error("IOException in CSVParser.next", e);
             throw new NoSuchElementException();
         }

         return nextRow;
     }

     /**
      * The optional Iterator.remove method is not supported.
      *
      * @exception UnsupportedOperationException the operation is not supported.
      */
     public void remove()
         throws UnsupportedOperationException
     {
         throw new UnsupportedOperationException();
     }
 }
	package org.apache.fulcrum.parser;


	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/


	import java.io.BufferedReader;
	import java.io.IOException;
	import java.io.InputStreamReader;
	import java.io.Reader;
	import java.io.StreamTokenizer;
	import java.util.ArrayList;
	import java.util.Iterator;
	import java.util.List;
	import java.util.NoSuchElementException;

	import org.apache.avalon.framework.logger.LogEnabled;
	import org.apache.avalon.framework.logger.Logger;

	/**
	* DataStreamParser is used to parse a stream with a fixed format and
	* generate ValueParser objects which can be used to extract the values
	* in the desired type.
	*
	* <p>The class itself is abstract - a concrete subclass which implements
	* the initTokenizer method such as CSVParser or TSVParser is required
	* to use the functionality.
	*
	* <p>The class implements the java.util.Iterator interface for convenience.
	* This allows simple use in a Velocity template for example:
	*
	* <pre>
	* #foreach ($row in $datastream)
	* Name: $row.Name
	* Description: $row.Description
	* #end
	* </pre>
	*
	* @author <a href="mailto:sean@informage.net">Sean Legassick</a>
	* @version $Id$
	*/
	public abstract class DataStreamParser
	implements Iterator<ValueParser>, LogEnabled
	{
	/**
	* The list of column names.
	*/
	private List<String> columnNames;

	/**
	* The stream tokenizer for reading values from the input reader.
	*/
	private final StreamTokenizer tokenizer;

	/**
	* The parameter parser holding the values of columns for the current line.
	*/
	private ValueParser lineValues;

	/**
	* Indicates whether or not the tokenizer has read anything yet.
	*/
	private boolean neverRead = true;

	/**
	* The character encoding of the input
	*/
	private String characterEncoding;

	/**
	* Logger to use
	*/
	protected Logger log;

	/**
	* Create a new DataStreamParser instance. Requires a Reader to read the
	* comma-separated values from, a list of column names and a
	* character encoding.
	*
	* @param in the input reader.
	* @param columnNames a list of column names.
	* @param characterEncoding the character encoding of the input.
	*/
	public DataStreamParser(Reader in, List<String> columnNames,
	String characterEncoding)
	{
	this.columnNames = columnNames;
	this.characterEncoding = characterEncoding;

	if (this.characterEncoding == null)
	{
	// try and get the characterEncoding from the reader
	this.characterEncoding = "US-ASCII";
	try
	{
	this.characterEncoding = ((InputStreamReader)in).getEncoding();
	}
	catch (ClassCastException e)
	{
	// ignore
	}
	}

	tokenizer = new StreamTokenizer(new BufferedReader(in));
	initTokenizer(tokenizer);
	}

	/**
	* Initialize the StreamTokenizer instance used to read the lines
	* from the input reader. This must be implemented in subclasses to
	* set up the tokenizing properties.
	*
	* @param tokenizer the StreamTokenizer to use
	*/
	protected abstract void initTokenizer(StreamTokenizer tokenizer);

	/**
	* Provide a logger
	*
	* @see org.apache.avalon.framework.logger.LogEnabled#enableLogging(org.apache.avalon.framework.logger.Logger)
	*/
	public void enableLogging(Logger logger)
	{
	this.log = logger.getChildLogger("DataStreamParser");
	}

	/**
	* Set the list of column names explicitly.
	*
	* @param columnNames A list of column names.
	*/
	public void setColumnNames(List<String> columnNames)
	{
	this.columnNames = columnNames;
	}

	/**
	* Read the list of column names from the input reader using the
	* tokenizer.
	*
	* @exception IOException an IOException occurred.
	*/
	public void readColumnNames()
	throws IOException
	{
	columnNames = new ArrayList<String>();

	neverRead = false;
	tokenizer.nextToken();
	while (tokenizer.ttype == StreamTokenizer.TT_WORD
	\|\| tokenizer.ttype == '"')
	{
	columnNames.add(tokenizer.sval);
	tokenizer.nextToken();
	}
	}

	/**
	* Determine whether a further row of values exists in the input.
	*
	* @return true if the input has more rows.
	* @exception IOException an IOException occurred.
	*/
	public boolean hasNextRow()
	throws IOException
	{
	// check for end of line ensures that an empty last line doesn't
	// give a false positive for hasNextRow
	if (neverRead \|\| tokenizer.ttype == StreamTokenizer.TT_EOL)
	{
	tokenizer.nextToken();
	tokenizer.pushBack();
	neverRead = false;
	}
	return tokenizer.ttype != StreamTokenizer.TT_EOF;
	}

	/**
	* Returns a ValueParser object containing the next row of values.
	*
	* @return a ValueParser object.
	* @exception IOException an IOException occurred.
	* @exception NoSuchElementException there are no more rows in the input.
	*/
	public ValueParser nextRow()
	throws IOException, NoSuchElementException
	{
	if (!hasNextRow())
	{
	throw new NoSuchElementException();
	}

	if (lineValues == null)
	{
	lineValues = new BaseValueParser(characterEncoding);
	}
	else
	{
	lineValues.clear();
	}

	Iterator<String> it = columnNames.iterator();
	tokenizer.nextToken();
	while (tokenizer.ttype == StreamTokenizer.TT_WORD
	\|\| tokenizer.ttype == '"')
	{
	// note this means that if there are more values than
	// column names, the extra values are discarded.
	if (it.hasNext())
	{
	String colname = it.next().toString();
	String colval = tokenizer.sval;
	if (log.isDebugEnabled())
	{
	log.debug("DataStreamParser.nextRow(): " +
	colname + '=' + colval);
	}
	lineValues.add(colname, colval);
	}
	tokenizer.nextToken();
	}

	return lineValues;
	}

	/**
	* Determine whether a further row of values exists in the input.
	*
	* @return true if the input has more rows.
	*/
	public boolean hasNext()
	{
	boolean hasNext = false;

	try
	{
	hasNext = hasNextRow();
	}
	catch (IOException e)
	{
	log.error("IOException in CSVParser.hasNext", e);
	}

	return hasNext;
	}

	/**
	* Returns a ValueParser object containing the next row of values.
	*
	* @return a ValueParser object as an Object.
	* @exception NoSuchElementException there are no more rows in the input
	* or an IOException occurred.
	*/
	public ValueParser next()
	throws NoSuchElementException
	{
	ValueParser nextRow = null;

	try
	{
	nextRow = nextRow();
	}
	catch (IOException e)
	{
	log.error("IOException in CSVParser.next", e);
	throw new NoSuchElementException();
	}

	return nextRow;
	}

	/**
	* The optional Iterator.remove method is not supported.
	*
	* @exception UnsupportedOperationException the operation is not supported.
	*/
	public void remove()
	throws UnsupportedOperationException
	{
	throw new UnsupportedOperationException();
	}
	}