| package org.apache.fulcrum.parser; |
| |
| |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| |
| import java.io.BufferedReader; |
| import java.io.IOException; |
| import java.io.InputStreamReader; |
| import java.io.Reader; |
| import java.io.StreamTokenizer; |
| import java.util.ArrayList; |
| import java.util.Iterator; |
| import java.util.List; |
| import java.util.NoSuchElementException; |
| |
| import org.apache.avalon.framework.logger.LogEnabled; |
| import org.apache.avalon.framework.logger.Logger; |
| |
| /** |
| * DataStreamParser is used to parse a stream with a fixed format and |
| * generate ValueParser objects which can be used to extract the values |
| * in the desired type. |
| * |
| * <p>The class itself is abstract - a concrete subclass which implements |
| * the initTokenizer method such as CSVParser or TSVParser is required |
| * to use the functionality. |
| * |
| * <p>The class implements the java.util.Iterator interface for convenience. |
| * This allows simple use in a Velocity template for example: |
| * |
| * <pre> |
| * #foreach ($row in $datastream) |
| * Name: $row.Name |
| * Description: $row.Description |
| * #end |
| * </pre> |
| * |
| * @author <a href="mailto:sean@informage.net">Sean Legassick</a> |
| * @version $Id$ |
| */ |
| public abstract class DataStreamParser |
| implements Iterator<ValueParser>, LogEnabled |
| { |
| /** |
| * The list of column names. |
| */ |
| private List<String> columnNames; |
| |
| /** |
| * The stream tokenizer for reading values from the input reader. |
| */ |
| private final StreamTokenizer tokenizer; |
| |
| /** |
| * The parameter parser holding the values of columns for the current line. |
| */ |
| private ValueParser lineValues; |
| |
| /** |
| * Indicates whether or not the tokenizer has read anything yet. |
| */ |
| private boolean neverRead = true; |
| |
| /** |
| * The character encoding of the input |
| */ |
| private String characterEncoding; |
| |
| /** |
| * Logger to use |
| */ |
| protected Logger log; |
| |
| /** |
| * Create a new DataStreamParser instance. Requires a Reader to read the |
| * comma-separated values from, a list of column names and a |
| * character encoding. |
| * |
| * @param in the input reader. |
| * @param columnNames a list of column names. |
| * @param characterEncoding the character encoding of the input. |
| */ |
| public DataStreamParser(Reader in, List<String> columnNames, |
| String characterEncoding) |
| { |
| this.columnNames = columnNames; |
| this.characterEncoding = characterEncoding; |
| |
| if (this.characterEncoding == null) |
| { |
| // try and get the characterEncoding from the reader |
| this.characterEncoding = "US-ASCII"; |
| try |
| { |
| this.characterEncoding = ((InputStreamReader)in).getEncoding(); |
| } |
| catch (ClassCastException e) |
| { |
| // ignore |
| } |
| } |
| |
| tokenizer = new StreamTokenizer(new BufferedReader(in)); |
| initTokenizer(tokenizer); |
| } |
| |
| /** |
| * Initialize the StreamTokenizer instance used to read the lines |
| * from the input reader. This must be implemented in subclasses to |
| * set up the tokenizing properties. |
| * |
| * @param tokenizer the StreamTokenizer to use |
| */ |
| protected abstract void initTokenizer(StreamTokenizer tokenizer); |
| |
| /** |
| * Provide a logger |
| * |
| * @see org.apache.avalon.framework.logger.LogEnabled#enableLogging(org.apache.avalon.framework.logger.Logger) |
| */ |
| public void enableLogging(Logger logger) |
| { |
| this.log = logger.getChildLogger("DataStreamParser"); |
| } |
| |
| /** |
| * Set the list of column names explicitly. |
| * |
| * @param columnNames A list of column names. |
| */ |
| public void setColumnNames(List<String> columnNames) |
| { |
| this.columnNames = columnNames; |
| } |
| |
| /** |
| * Read the list of column names from the input reader using the |
| * tokenizer. |
| * |
| * @exception IOException an IOException occurred. |
| */ |
| public void readColumnNames() |
| throws IOException |
| { |
| columnNames = new ArrayList<String>(); |
| |
| neverRead = false; |
| tokenizer.nextToken(); |
| while (tokenizer.ttype == StreamTokenizer.TT_WORD |
| || tokenizer.ttype == '"') |
| { |
| columnNames.add(tokenizer.sval); |
| tokenizer.nextToken(); |
| } |
| } |
| |
| /** |
| * Determine whether a further row of values exists in the input. |
| * |
| * @return true if the input has more rows. |
| * @exception IOException an IOException occurred. |
| */ |
| public boolean hasNextRow() |
| throws IOException |
| { |
| // check for end of line ensures that an empty last line doesn't |
| // give a false positive for hasNextRow |
| if (neverRead || tokenizer.ttype == StreamTokenizer.TT_EOL) |
| { |
| tokenizer.nextToken(); |
| tokenizer.pushBack(); |
| neverRead = false; |
| } |
| return tokenizer.ttype != StreamTokenizer.TT_EOF; |
| } |
| |
| /** |
| * Returns a ValueParser object containing the next row of values. |
| * |
| * @return a ValueParser object. |
| * @exception IOException an IOException occurred. |
| * @exception NoSuchElementException there are no more rows in the input. |
| */ |
| public ValueParser nextRow() |
| throws IOException, NoSuchElementException |
| { |
| if (!hasNextRow()) |
| { |
| throw new NoSuchElementException(); |
| } |
| |
| if (lineValues == null) |
| { |
| lineValues = new BaseValueParser(characterEncoding); |
| } |
| else |
| { |
| lineValues.clear(); |
| } |
| |
| Iterator<String> it = columnNames.iterator(); |
| tokenizer.nextToken(); |
| while (tokenizer.ttype == StreamTokenizer.TT_WORD |
| || tokenizer.ttype == '"') |
| { |
| // note this means that if there are more values than |
| // column names, the extra values are discarded. |
| if (it.hasNext()) |
| { |
| String colname = it.next().toString(); |
| String colval = tokenizer.sval; |
| if (log.isDebugEnabled()) |
| { |
| log.debug("DataStreamParser.nextRow(): " + |
| colname + '=' + colval); |
| } |
| lineValues.add(colname, colval); |
| } |
| tokenizer.nextToken(); |
| } |
| |
| return lineValues; |
| } |
| |
| /** |
| * Determine whether a further row of values exists in the input. |
| * |
| * @return true if the input has more rows. |
| */ |
| public boolean hasNext() |
| { |
| boolean hasNext = false; |
| |
| try |
| { |
| hasNext = hasNextRow(); |
| } |
| catch (IOException e) |
| { |
| log.error("IOException in CSVParser.hasNext", e); |
| } |
| |
| return hasNext; |
| } |
| |
| /** |
| * Returns a ValueParser object containing the next row of values. |
| * |
| * @return a ValueParser object as an Object. |
| * @exception NoSuchElementException there are no more rows in the input |
| * or an IOException occurred. |
| */ |
| public ValueParser next() |
| throws NoSuchElementException |
| { |
| ValueParser nextRow = null; |
| |
| try |
| { |
| nextRow = nextRow(); |
| } |
| catch (IOException e) |
| { |
| log.error("IOException in CSVParser.next", e); |
| throw new NoSuchElementException(); |
| } |
| |
| return nextRow; |
| } |
| |
| /** |
| * The optional Iterator.remove method is not supported. |
| * |
| * @exception UnsupportedOperationException the operation is not supported. |
| */ |
| public void remove() |
| throws UnsupportedOperationException |
| { |
| throw new UnsupportedOperationException(); |
| } |
| } |