blob: da17ff132b6c039ea36668316b3ddd48dcd52eaa [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.metamodel.fixedwidth;
import java.io.BufferedInputStream;
import java.io.Closeable;
import java.io.IOException;
import java.io.InputStream;
import java.text.CharacterIterator;
import java.text.StringCharacterIterator;
import java.util.ArrayList;
import java.util.List;
/**
* Reader capable of separating values based on a fixed width setting.
*/
class FixedWidthReader implements Closeable {
private static final int END_OF_STREAM = -1;
private static final int LINE_FEED = '\n';
private static final int CARRIAGE_RETURN = '\r';
protected final String _charsetName;
private final int _fixedValueWidth;
private final int[] _valueWidths;
private int _valueIndex = 0;
private final boolean _failOnInconsistentLineWidth;
private final boolean _constantWidth;
private volatile int _rowNumber;
protected final BufferedInputStream _stream;
protected final int _expectedLineLength;
public FixedWidthReader(InputStream stream, String charsetName, int fixedValueWidth,
boolean failOnInconsistentLineWidth) {
this(new BufferedInputStream(stream), charsetName, fixedValueWidth, failOnInconsistentLineWidth);
}
private FixedWidthReader(BufferedInputStream stream, String charsetName, int fixedValueWidth,
boolean failOnInconsistentLineWidth) {
_stream = stream;
_charsetName = charsetName;
_fixedValueWidth = fixedValueWidth;
_failOnInconsistentLineWidth = failOnInconsistentLineWidth;
_rowNumber = 0;
_valueWidths = null;
_constantWidth = true;
_expectedLineLength = -1;
}
public FixedWidthReader(InputStream stream, String charsetName, int[] valueWidths,
boolean failOnInconsistentLineWidth) {
this(new BufferedInputStream(stream), charsetName, valueWidths, failOnInconsistentLineWidth);
}
FixedWidthReader(BufferedInputStream stream, String charsetName, int[] valueWidths,
boolean failOnInconsistentLineWidth) {
_stream = stream;
_charsetName = charsetName;
_fixedValueWidth = -1;
_valueWidths = valueWidths;
_failOnInconsistentLineWidth = failOnInconsistentLineWidth;
_rowNumber = 0;
_constantWidth = false;
int expectedLineLength = 0;
for (final int _valueWidth : _valueWidths) {
expectedLineLength += _valueWidth;
}
_expectedLineLength = expectedLineLength;
}
/**
* This reads and returns the next record from the file. Usually, it is a line but in case the new line characters
* are not present, the length of the content depends on the column-widths setting.
*
* @return an array of values in the next line, or null if the end of the file has been reached.
* @throws IllegalStateException if an exception occurs while reading the file.
*/
public String[] readLine() throws IllegalStateException {
try {
beforeReadLine();
_rowNumber++;
return getValues();
} catch (IOException e) {
throw new IllegalStateException(e);
}
}
/**
* Empty hook that enables special behavior in sub-classed readers (by overriding this method).
*/
protected void beforeReadLine() {
return;
}
private String[] getValues() throws IOException {
final List<String> values = new ArrayList<>();
final String singleRecordData = readSingleRecordData();
if (singleRecordData == null) {
return null;
}
processSingleRecordData(singleRecordData, values);
String[] result = values.toArray(new String[values.size()]);
if (!_failOnInconsistentLineWidth && !_constantWidth) {
result = correctResult(result);
}
validateConsistentValue(singleRecordData, result, values.size());
return result;
}
private void validateConsistentValue(String recordData, String[] result, int valuesSize) {
if (!_failOnInconsistentLineWidth) {
return;
}
InconsistentValueWidthException inconsistentValueException = null;
if (_constantWidth) {
if (recordData.length() % _fixedValueWidth != 0) {
inconsistentValueException = new InconsistentValueWidthException(result, recordData, _rowNumber);
}
} else if (result.length != valuesSize || recordData.length() != _expectedLineLength) {
inconsistentValueException = new InconsistentValueWidthException(result, recordData, _rowNumber);
}
if (inconsistentValueException != null) {
throw inconsistentValueException;
}
}
private void processSingleRecordData(final String singleRecordData, final List<String> values) {
StringBuilder nextValue = new StringBuilder();
final CharacterIterator it = new StringCharacterIterator(singleRecordData);
_valueIndex = 0;
for (char c = it.first(); c != CharacterIterator.DONE; c = it.next()) {
processCharacter(c, nextValue, values, singleRecordData);
}
if (nextValue.length() > 0) {
addNewValueIfAppropriate(values, nextValue);
}
}
String readSingleRecordData() throws IOException {
StringBuilder line = new StringBuilder();
int ch;
for (ch = _stream.read(); !isEndingCharacter(ch); ch = _stream.read()) {
line.append((char) ch);
}
if (ch == CARRIAGE_RETURN) {
readLineFeedIfFollows();
}
return (line.length()) > 0 ? line.toString() : null;
}
private void readLineFeedIfFollows() throws IOException {
_stream.mark(1);
if (_stream.read() != LINE_FEED) {
_stream.reset();
}
}
private boolean isEndingCharacter(int ch) {
return (ch == CARRIAGE_RETURN || ch == LINE_FEED || ch == END_OF_STREAM);
}
private void processCharacter(char c, StringBuilder nextValue, List<String> values, String recordData) {
nextValue.append(c);
final int valueWidth = getValueWidth(values, recordData);
if (nextValue.length() == valueWidth) {
addNewValueIfAppropriate(values, nextValue);
nextValue.setLength(0); // clear the buffer
if (_valueWidths != null) {
_valueIndex = (_valueIndex + 1) % _valueWidths.length;
}
}
}
private int getValueWidth(List<String> values, String recordData) {
if (_constantWidth) {
return _fixedValueWidth;
} else {
if (_valueIndex >= _valueWidths.length) {
if (_failOnInconsistentLineWidth) {
String[] result = values.toArray(new String[values.size()]);
throw new InconsistentValueWidthException(result, recordData, _rowNumber + 1);
} else {
return -1; // silently ignore the inconsistency
}
}
return _valueWidths[_valueIndex];
}
}
private void addNewValueIfAppropriate(List<String> values, StringBuilder nextValue) {
if (_valueWidths != null) {
if (values.size() < _valueWidths.length) {
values.add(nextValue.toString().trim());
}
} else {
values.add(nextValue.toString().trim());
}
}
private String[] correctResult(String[] result) {
if (result.length != _valueWidths.length) {
String[] correctedResult = new String[_valueWidths.length];
for (int i = 0; i < result.length && i < _valueWidths.length; i++) {
correctedResult[i] = result[i];
}
result = correctedResult;
}
return result;
}
@Override
public void close() throws IOException {
_stream.close();
}
}