blob: 2bc19d9699b1309d1484ef5638be6db5bc07723a [file] [log] [blame]
package org.apache.fulcrum.parser;
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StreamTokenizer;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
import org.apache.avalon.framework.logger.LogEnabled;
import org.apache.avalon.framework.logger.Logger;
/**
* DataStreamParser is used to parse a stream with a fixed format and
* generate ValueParser objects which can be used to extract the values
* in the desired type.
*
* <p>The class itself is abstract - a concrete subclass which implements
* the initTokenizer method such as CSVParser or TSVParser is required
* to use the functionality.
*
* <p>The class implements the java.util.Iterator interface for convenience.
* This allows simple use in a Velocity template for example:
*
* <pre>
* #foreach ($row in $datastream)
* Name: $row.Name
* Description: $row.Description
* #end
* </pre>
*
* @author <a href="mailto:sean@informage.net">Sean Legassick</a>
* @version $Id$
*/
public abstract class DataStreamParser
implements Iterator<ValueParser>, LogEnabled
{
/**
* The list of column names.
*/
private List<String> columnNames;
/**
* The stream tokenizer for reading values from the input reader.
*/
private final StreamTokenizer tokenizer;
/**
* The parameter parser holding the values of columns for the current line.
*/
private ValueParser lineValues;
/**
* Indicates whether or not the tokenizer has read anything yet.
*/
private boolean neverRead = true;
/**
* The character encoding of the input
*/
private String characterEncoding;
/**
* Logger to use
*/
protected Logger log;
/**
* Create a new DataStreamParser instance. Requires a Reader to read the
* comma-separated values from, a list of column names and a
* character encoding.
*
* @param in the input reader.
* @param columnNames a list of column names.
* @param characterEncoding the character encoding of the input.
*/
public DataStreamParser(Reader in, List<String> columnNames,
String characterEncoding)
{
this.columnNames = columnNames;
this.characterEncoding = characterEncoding;
if (this.characterEncoding == null)
{
// try and get the characterEncoding from the reader
this.characterEncoding = "US-ASCII";
try
{
this.characterEncoding = ((InputStreamReader)in).getEncoding();
}
catch (ClassCastException e)
{
// ignore
}
}
tokenizer = new StreamTokenizer(new BufferedReader(in));
initTokenizer(tokenizer);
}
/**
* Initialize the StreamTokenizer instance used to read the lines
* from the input reader. This must be implemented in subclasses to
* set up the tokenizing properties.
*
* @param tokenizer the StreamTokenizer to use
*/
protected abstract void initTokenizer(StreamTokenizer tokenizer);
/**
* Provide a logger
*
* @see org.apache.avalon.framework.logger.LogEnabled#enableLogging(org.apache.avalon.framework.logger.Logger)
*/
public void enableLogging(Logger logger)
{
this.log = logger.getChildLogger("DataStreamParser");
}
/**
* Set the list of column names explicitly.
*
* @param columnNames A list of column names.
*/
public void setColumnNames(List<String> columnNames)
{
this.columnNames = columnNames;
}
/**
* Read the list of column names from the input reader using the
* tokenizer.
*
* @exception IOException an IOException occurred.
*/
public void readColumnNames()
throws IOException
{
columnNames = new ArrayList<String>();
neverRead = false;
tokenizer.nextToken();
while (tokenizer.ttype == StreamTokenizer.TT_WORD
|| tokenizer.ttype == '"')
{
columnNames.add(tokenizer.sval);
tokenizer.nextToken();
}
}
/**
* Determine whether a further row of values exists in the input.
*
* @return true if the input has more rows.
* @exception IOException an IOException occurred.
*/
public boolean hasNextRow()
throws IOException
{
// check for end of line ensures that an empty last line doesn't
// give a false positive for hasNextRow
if (neverRead || tokenizer.ttype == StreamTokenizer.TT_EOL)
{
tokenizer.nextToken();
tokenizer.pushBack();
neverRead = false;
}
return tokenizer.ttype != StreamTokenizer.TT_EOF;
}
/**
* Returns a ValueParser object containing the next row of values.
*
* @return a ValueParser object.
* @exception IOException an IOException occurred.
* @exception NoSuchElementException there are no more rows in the input.
*/
public ValueParser nextRow()
throws IOException, NoSuchElementException
{
if (!hasNextRow())
{
throw new NoSuchElementException();
}
if (lineValues == null)
{
lineValues = new BaseValueParser(characterEncoding);
}
else
{
lineValues.clear();
}
Iterator<String> it = columnNames.iterator();
tokenizer.nextToken();
while (tokenizer.ttype == StreamTokenizer.TT_WORD
|| tokenizer.ttype == '"')
{
// note this means that if there are more values than
// column names, the extra values are discarded.
if (it.hasNext())
{
String colname = it.next().toString();
String colval = tokenizer.sval;
if (log.isDebugEnabled())
{
log.debug("DataStreamParser.nextRow(): " +
colname + '=' + colval);
}
lineValues.add(colname, colval);
}
tokenizer.nextToken();
}
return lineValues;
}
/**
* Determine whether a further row of values exists in the input.
*
* @return true if the input has more rows.
*/
public boolean hasNext()
{
boolean hasNext = false;
try
{
hasNext = hasNextRow();
}
catch (IOException e)
{
log.error("IOException in CSVParser.hasNext", e);
}
return hasNext;
}
/**
* Returns a ValueParser object containing the next row of values.
*
* @return a ValueParser object as an Object.
* @exception NoSuchElementException there are no more rows in the input
* or an IOException occurred.
*/
public ValueParser next()
throws NoSuchElementException
{
ValueParser nextRow = null;
try
{
nextRow = nextRow();
}
catch (IOException e)
{
log.error("IOException in CSVParser.next", e);
throw new NoSuchElementException();
}
return nextRow;
}
/**
* The optional Iterator.remove method is not supported.
*
* @exception UnsupportedOperationException the operation is not supported.
*/
public void remove()
throws UnsupportedOperationException
{
throw new UnsupportedOperationException();
}
}