METAMODEL-250: Added support for EBCDIC files
Closes #103
diff --git a/CHANGES.md b/CHANGES.md
index f0264c6..c0b90cc 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -2,6 +2,7 @@
* [METAMODEL-1099] - Created a new DataContextFactory SPI and a extensible registry of implementations based on ServiceLoader.
* [METAMODEL-1099] - Implemented DataContextFactory SPI for connectors: JDBC, CSV, ElasticSearch
+ * [METAMODEL-250] - Added support for EBCDIC files (part of 'fixedwidth' module).
* [METAMODEL-1103] - Fixed a bug pertaining to anchoring of wildcards in LIKE operands.
* [METAMODEL-1088] - Add support for aliases in MongoDB.
* [METAMODEL-1086] - Fixed encoding issue when CsvDataContext is instantiated with InputStream.
diff --git a/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/EbcdicConfiguration.java b/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/EbcdicConfiguration.java
new file mode 100644
index 0000000..389a4f8
--- /dev/null
+++ b/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/EbcdicConfiguration.java
@@ -0,0 +1,60 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.metamodel.fixedwidth;
+
+/**
+ * Special fixed-width configuration for EBCDIC files.
+ */
+public final class EbcdicConfiguration extends FixedWidthConfiguration {
+
+ private final boolean _skipEbcdicHeader;
+ private final boolean _eolPresent;
+
+ public EbcdicConfiguration(int columnNameLineNumber, String encoding, int fixedValueWidth,
+ boolean failOnInconsistentLineWidth, boolean skipEbcdicHeader, boolean eolPresent) {
+ super(columnNameLineNumber, encoding, fixedValueWidth, failOnInconsistentLineWidth);
+ _skipEbcdicHeader = skipEbcdicHeader;
+ _eolPresent = eolPresent;
+ }
+
+ public EbcdicConfiguration(int columnNameLineNumber, String encoding, int[] valueWidths,
+ boolean failOnInconsistentLineWidth, boolean skipEbcdicHeader, boolean eolPresent) {
+ super(columnNameLineNumber, null, encoding, valueWidths, failOnInconsistentLineWidth);
+ _skipEbcdicHeader = skipEbcdicHeader;
+ _eolPresent = eolPresent;
+ }
+
+ /**
+ * Determines if the input file contains a header that should be skipped before reading records data.
+ *
+ * @return a boolean indicating whether or not to skip EBCDIC header.
+ */
+ public boolean isSkipEbcdicHeader() {
+ return _skipEbcdicHeader;
+ }
+
+ /**
+ * Determines if the input file contains new line characters.
+ *
+ * @return a boolean indicating whether or not the input contains new line characters.
+ */
+ public boolean isEolPresent() {
+ return _eolPresent;
+ }
+}
diff --git a/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/EbcdicReader.java b/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/EbcdicReader.java
new file mode 100644
index 0000000..a7639fc
--- /dev/null
+++ b/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/EbcdicReader.java
@@ -0,0 +1,75 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.metamodel.fixedwidth;
+
+import java.io.BufferedInputStream;
+import java.io.IOException;
+
+/**
+ * Reader capable of separating values based on a fixed width setting.
+ */
+class EbcdicReader extends FixedWidthReader {
+
+ private final boolean _skipEbcdicHeader;
+ private final boolean _eolPresent;
+ private boolean _headerSkipped;
+
+ public EbcdicReader(BufferedInputStream stream, String charsetName, int[] valueWidths,
+ boolean failOnInconsistentLineWidth, boolean skipEbcdicHeader, boolean eolPresent) {
+ super(stream, charsetName, valueWidths, failOnInconsistentLineWidth);
+ _skipEbcdicHeader = skipEbcdicHeader;
+ _eolPresent = eolPresent;
+ }
+
+ @Override
+ protected void beforeReadLine() {
+ if (shouldSkipHeader()) {
+ try {
+ skipHeader();
+ } catch (IOException e) {
+ throw new IllegalStateException("A problem occurred while skipping the input stream. ", e);
+ }
+ }
+ }
+
+ private boolean shouldSkipHeader() {
+ return (_skipEbcdicHeader && !_headerSkipped);
+ }
+
+ private void skipHeader() throws IOException {
+ _headerSkipped = true;
+ _stream.skip(_expectedLineLength);
+ }
+
+ @Override
+ protected String readSingleRecordData() throws IOException {
+ if (_eolPresent) {
+ return super.readSingleRecordData();
+ } else {
+ byte[] buffer = new byte[_expectedLineLength];
+ int bytesRead = _stream.read(buffer, 0, _expectedLineLength);
+
+ if (bytesRead < 0) {
+ return null;
+ }
+
+ return new String(buffer, _charsetName);
+ }
+ }
+}
diff --git a/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthColumnSpec.java b/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthColumnSpec.java
index 65ec219..dedfbcd 100644
--- a/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthColumnSpec.java
+++ b/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthColumnSpec.java
@@ -24,7 +24,7 @@
* Represents the specification of a single column for a
* {@link FixedWidthDataContext}.
*/
-public final class FixedWidthColumnSpec implements HasName {
+final class FixedWidthColumnSpec implements HasName {
private final String name;
private final int width;
diff --git a/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthConfiguration.java b/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthConfiguration.java
index 2b2cae5..c53ff16 100644
--- a/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthConfiguration.java
+++ b/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthConfiguration.java
@@ -31,32 +31,29 @@
import org.apache.metamodel.util.HasNameMapper;
/**
- * Configuration of metadata about a fixed width values datacontext.
+ * Configuration of metadata about a fixed width values data context.
*/
-public final class FixedWidthConfiguration extends BaseObject implements
- Serializable {
+public class FixedWidthConfiguration extends BaseObject implements Serializable {
- private static final long serialVersionUID = 1L;
+ private static final long serialVersionUID = 1L;
- public static final int NO_COLUMN_NAME_LINE = 0;
- public static final int DEFAULT_COLUMN_NAME_LINE = 1;
+ public static final int NO_COLUMN_NAME_LINE = 0;
+ public static final int DEFAULT_COLUMN_NAME_LINE = 1;
- private final String encoding;
- private final int fixedValueWidth;
- private final int[] valueWidths;
- private final int columnNameLineNumber;
- private final boolean failOnInconsistentLineWidth;
- private final ColumnNamingStrategy columnNamingStrategy;
+ private final String encoding;
+ private final int fixedValueWidth;
+ private final int[] valueWidths;
+ private final int columnNameLineNumber;
+ private final boolean failOnInconsistentLineWidth;
+ private final ColumnNamingStrategy columnNamingStrategy;
- public FixedWidthConfiguration(int fixedValueWidth) {
- this(DEFAULT_COLUMN_NAME_LINE, FileHelper.DEFAULT_ENCODING,
- fixedValueWidth);
- }
+ public FixedWidthConfiguration(int fixedValueWidth) {
+ this(DEFAULT_COLUMN_NAME_LINE, FileHelper.DEFAULT_ENCODING, fixedValueWidth);
+ }
- public FixedWidthConfiguration(int[] valueWidth) {
- this(DEFAULT_COLUMN_NAME_LINE, FileHelper.DEFAULT_ENCODING, valueWidth,
- false);
- }
+ public FixedWidthConfiguration(int[] valueWidth) {
+ this(DEFAULT_COLUMN_NAME_LINE, FileHelper.DEFAULT_ENCODING, valueWidth, false);
+ }
public FixedWidthConfiguration(int columnNameLineNumber, String encoding, int fixedValueWidth) {
this(columnNameLineNumber, encoding, fixedValueWidth, false);
@@ -72,11 +69,11 @@
this.valueWidths = new int[0];
}
- public FixedWidthConfiguration(int columnNameLineNumber, String encoding,
- int[] valueWidths, boolean failOnInconsistentLineWidth) {
+ public FixedWidthConfiguration(int columnNameLineNumber, String encoding, int[] valueWidths,
+ boolean failOnInconsistentLineWidth) {
this(columnNameLineNumber, null, encoding, valueWidths, failOnInconsistentLineWidth);
}
-
+
public FixedWidthConfiguration(int columnNameLineNumber, ColumnNamingStrategy columnNamingStrategy, String encoding,
int[] valueWidths, boolean failOnInconsistentLineWidth) {
this.encoding = encoding;
@@ -86,7 +83,7 @@
this.columnNamingStrategy = columnNamingStrategy;
this.valueWidths = valueWidths;
}
-
+
public FixedWidthConfiguration(String encoding, List<FixedWidthColumnSpec> columnSpecs) {
this(encoding, columnSpecs, false);
}
@@ -106,84 +103,84 @@
}
/**
- * The line number (1 based) from which to get the names of the columns.
- *
- * @return an int representing the line number of the column headers/names.
- */
- public int getColumnNameLineNumber() {
- return columnNameLineNumber;
- }
-
- /**
- * Gets a {@link ColumnNamingStrategy} to use if needed.
- * @return
- */
- public ColumnNamingStrategy getColumnNamingStrategy() {
- if (columnNamingStrategy == null) {
- return ColumnNamingStrategies.defaultStrategy();
- }
+ * The line number (1 based) from which to get the names of the columns.
+ *
+ * @return an int representing the line number of the column headers/names.
+ */
+ public int getColumnNameLineNumber() {
+ return columnNameLineNumber;
+ }
+
+ /**
+ * Gets a {@link ColumnNamingStrategy} to use if needed.
+ * @return column naming strategy
+ */
+ public ColumnNamingStrategy getColumnNamingStrategy() {
+ if (columnNamingStrategy == null) {
+ return ColumnNamingStrategies.defaultStrategy();
+ }
return columnNamingStrategy;
}
- /**
- * Gets the file encoding to use for reading the file.
- *
- * @return the text encoding to use for reading the file.
- */
- public String getEncoding() {
- return encoding;
- }
+ /**
+ * Gets the file encoding to use for reading the file.
+ *
+ * @return the text encoding to use for reading the file.
+ */
+ public String getEncoding() {
+ return encoding;
+ }
- /**
- * Gets the width of each value within the fixed width value file.
- *
- * @return the fixed width to use when parsing the file.
- */
- public int getFixedValueWidth() {
- return fixedValueWidth;
- }
+ /**
+ * Gets the width of each value within the fixed width value file.
+ *
+ * @return the fixed width to use when parsing the file.
+ */
+ public int getFixedValueWidth() {
+ return fixedValueWidth;
+ }
- public int[] getValueWidths() {
- return valueWidths;
- }
+ public int[] getValueWidths() {
+ return valueWidths;
+ }
- /**
- * Determines if the {@link DataSet#next()} should throw an exception in
- * case of inconsistent line width in the fixed width value file.
- *
- * @return a boolean indicating whether or not to fail on inconsistent line
- * widths.
- */
- public boolean isFailOnInconsistentLineWidth() {
- return failOnInconsistentLineWidth;
- }
+ /**
+ * Determines if the {@link DataSet#next()} should throw an exception in
+ * case of inconsistent line width in the fixed width value file.
+ *
+ * @return a boolean indicating whether or not to fail on inconsistent line
+ * widths.
+ */
+ public boolean isFailOnInconsistentLineWidth() {
+ return failOnInconsistentLineWidth;
+ }
- @Override
- protected void decorateIdentity(List<Object> identifiers) {
- identifiers.add(columnNameLineNumber);
- identifiers.add(encoding);
- identifiers.add(fixedValueWidth);
- identifiers.add(valueWidths);
- identifiers.add(failOnInconsistentLineWidth);
- }
+ @Override
+ protected void decorateIdentity(List<Object> identifiers) {
+ identifiers.add(columnNameLineNumber);
+ identifiers.add(encoding);
+ identifiers.add(fixedValueWidth);
+ identifiers.add(valueWidths);
+ identifiers.add(failOnInconsistentLineWidth);
+ }
- @Override
- public String toString() {
- return "FixedWidthConfiguration[encoding=" + encoding
- + ", fixedValueWidth=" + fixedValueWidth + ", valueWidths="
- + Arrays.toString(valueWidths) + ", columnNameLineNumber="
- + columnNameLineNumber + ", failOnInconsistentLineWidth="
- + failOnInconsistentLineWidth + "]";
- }
+ @Override
+ public String toString() {
+ return "FixedWidthConfiguration[encoding=" + encoding
+ + ", fixedValueWidth=" + fixedValueWidth + ", valueWidths="
+ + Arrays.toString(valueWidths) + ", columnNameLineNumber="
+ + columnNameLineNumber + ", failOnInconsistentLineWidth="
+ + failOnInconsistentLineWidth + "]";
+ }
- public boolean isConstantValueWidth() {
- return fixedValueWidth != -1;
- }
+ public boolean isConstantValueWidth() {
+ return fixedValueWidth != -1;
+ }
- public int getValueWidth(int columnIndex) {
- if (isConstantValueWidth()) {
- return fixedValueWidth;
- }
- return valueWidths[columnIndex];
- }
+ public int getValueWidth(int columnIndex) {
+ if (isConstantValueWidth()) {
+ return fixedValueWidth;
+ }
+ return valueWidths[columnIndex];
+ }
}
diff --git a/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthConfigurationReader.java b/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthConfigurationReader.java
index 9154e5e..264287f 100644
--- a/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthConfigurationReader.java
+++ b/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthConfigurationReader.java
@@ -60,10 +60,9 @@
* "http://support.sas.com/documentation/cdl/en/etlug/67323/HTML/default/viewer.htm#p0h03yig7fp1qan1arghp3lwjqi6.htm">
* described here</a>.
*
- * @param encoding
- * @param resource
- * the format file resource
- * @param failOnInconsistentLineWidth
+ * @param encoding the format file encoding
+ * @param resource the format file resource
+ * @param failOnInconsistentLineWidth flag specifying whether inconsistent line should stop processing or not
* @return a {@link FixedWidthConfiguration} object to use
*/
public FixedWidthConfiguration readFromSasFormatFile(String encoding, Resource resource,
@@ -88,13 +87,11 @@
/**
* Reads a {@link FixedWidthConfiguration} based on a SAS INPUT declaration.
- * The reader method also optionally will look for a LABEL defintion for
- * column naming.
+ * The reader method also optionally will look for a LABEL definition for column naming.
*
- * @param encoding
- * @param resource
- * the format file resource
- * @param failOnInconsistentLineWidth
+ * @param encoding the format file encoding
+ * @param resource the format file resource
+ * @param failOnInconsistentLineWidth flag specifying whether inconsistent line should stop processing or not
* @return a {@link FixedWidthConfiguration} object to use
*/
public FixedWidthConfiguration readFromSasInputDefinition(String encoding, Resource resource,
@@ -176,5 +173,4 @@
return new FixedWidthConfiguration(encoding, columnSpecs, failOnInconsistentLineWidth);
}
-
}
diff --git a/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthDataContext.java b/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthDataContext.java
index d28a0b2..027cdab 100644
--- a/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthDataContext.java
+++ b/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthDataContext.java
@@ -18,9 +18,9 @@
*/
package org.apache.metamodel.fixedwidth;
+import java.io.BufferedInputStream;
import java.io.File;
import java.io.InputStream;
-import java.io.Reader;
import org.apache.metamodel.MetaModelException;
import org.apache.metamodel.QueryPostprocessDataContext;
@@ -106,7 +106,7 @@
/**
* Gets the resource being read
*
- * @return
+ * @return a {@link Resource} object
*/
public Resource getResource() {
return _resource;
@@ -184,16 +184,23 @@
private FixedWidthReader createReader() {
final InputStream inputStream = _resource.read();
- final Reader fileReader = FileHelper.getReader(inputStream, _configuration.getEncoding());
final FixedWidthReader reader;
- if (_configuration.isConstantValueWidth()) {
- reader = new FixedWidthReader(fileReader, _configuration.getFixedValueWidth(), _configuration
- .isFailOnInconsistentLineWidth());
+
+ if (_configuration instanceof EbcdicConfiguration) {
+ reader = new EbcdicReader((BufferedInputStream) inputStream, _configuration.getEncoding(),
+ _configuration.getValueWidths(), _configuration.isFailOnInconsistentLineWidth(),
+ ((EbcdicConfiguration) _configuration).isSkipEbcdicHeader(),
+ ((EbcdicConfiguration) _configuration).isEolPresent());
} else {
- reader = new FixedWidthReader(fileReader, _configuration.getValueWidths(), _configuration
- .isFailOnInconsistentLineWidth());
+ if (_configuration.isConstantValueWidth()) {
+ reader = new FixedWidthReader(inputStream, _configuration.getEncoding(),
+ _configuration.getFixedValueWidth(), _configuration.isFailOnInconsistentLineWidth());
+ } else {
+ reader = new FixedWidthReader(inputStream, _configuration.getEncoding(),
+ _configuration.getValueWidths(), _configuration.isFailOnInconsistentLineWidth());
+ }
}
+
return reader;
}
-
}
diff --git a/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthDataSet.java b/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthDataSet.java
index 44ce808..4f78bab 100644
--- a/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthDataSet.java
+++ b/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthDataSet.java
@@ -98,8 +98,7 @@
if (columnNumber < stringValues.length) {
rowValues[i] = stringValues[columnNumber];
} else {
- // Ticket #125: Missing values should be enterpreted as
- // null.
+ // Ticket #125: Missing values should be interpreted as null.
rowValues[i] = null;
}
}
diff --git a/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthReader.java b/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthReader.java
index d7a18cf..da17ff1 100644
--- a/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthReader.java
+++ b/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthReader.java
@@ -18,78 +18,235 @@
*/
package org.apache.metamodel.fixedwidth;
-import java.io.BufferedReader;
+import java.io.BufferedInputStream;
import java.io.Closeable;
import java.io.IOException;
-import java.io.Reader;
+import java.io.InputStream;
+import java.text.CharacterIterator;
+import java.text.StringCharacterIterator;
+import java.util.ArrayList;
+import java.util.List;
/**
* Reader capable of separating values based on a fixed width setting.
*/
-final public class FixedWidthReader implements Closeable {
+class FixedWidthReader implements Closeable {
+ private static final int END_OF_STREAM = -1;
+ private static final int LINE_FEED = '\n';
+ private static final int CARRIAGE_RETURN = '\r';
+
+ protected final String _charsetName;
+ private final int _fixedValueWidth;
+ private final int[] _valueWidths;
+ private int _valueIndex = 0;
+ private final boolean _failOnInconsistentLineWidth;
+ private final boolean _constantWidth;
+ private volatile int _rowNumber;
+ protected final BufferedInputStream _stream;
+ protected final int _expectedLineLength;
- private final BufferedReader _reader;
- private final FixedWidthLineParser _parser;
+ public FixedWidthReader(InputStream stream, String charsetName, int fixedValueWidth,
+ boolean failOnInconsistentLineWidth) {
+ this(new BufferedInputStream(stream), charsetName, fixedValueWidth, failOnInconsistentLineWidth);
+ }
- public FixedWidthReader(Reader reader, int fixedValueWidth,
- boolean failOnInconsistentLineWidth) {
- this(new BufferedReader(reader), fixedValueWidth,
- failOnInconsistentLineWidth);
- }
+ private FixedWidthReader(BufferedInputStream stream, String charsetName, int fixedValueWidth,
+ boolean failOnInconsistentLineWidth) {
+ _stream = stream;
+ _charsetName = charsetName;
+ _fixedValueWidth = fixedValueWidth;
+ _failOnInconsistentLineWidth = failOnInconsistentLineWidth;
+ _rowNumber = 0;
+ _valueWidths = null;
+ _constantWidth = true;
+ _expectedLineLength = -1;
+ }
- public FixedWidthReader(BufferedReader reader, int fixedValueWidth,
- boolean failOnInconsistentLineWidth) {
- _reader = reader;
- final FixedWidthConfiguration fixedWidthConfiguration = new FixedWidthConfiguration(
- FixedWidthConfiguration.NO_COLUMN_NAME_LINE, null, fixedValueWidth, failOnInconsistentLineWidth);
- _parser = new FixedWidthLineParser(fixedWidthConfiguration, -1, 0);
- }
+ public FixedWidthReader(InputStream stream, String charsetName, int[] valueWidths,
+ boolean failOnInconsistentLineWidth) {
+ this(new BufferedInputStream(stream), charsetName, valueWidths, failOnInconsistentLineWidth);
+ }
- public FixedWidthReader(Reader reader, int[] valueWidths,
- boolean failOnInconsistentLineWidth) {
- this(new BufferedReader(reader), valueWidths,
- failOnInconsistentLineWidth);
- }
+ FixedWidthReader(BufferedInputStream stream, String charsetName, int[] valueWidths,
+ boolean failOnInconsistentLineWidth) {
+ _stream = stream;
+ _charsetName = charsetName;
+ _fixedValueWidth = -1;
+ _valueWidths = valueWidths;
+ _failOnInconsistentLineWidth = failOnInconsistentLineWidth;
+ _rowNumber = 0;
+ _constantWidth = false;
+ int expectedLineLength = 0;
- public FixedWidthReader(BufferedReader reader, int[] valueWidths,
- boolean failOnInconsistentLineWidth) {
- _reader = reader;
- int fixedValueWidth = -1;
- int expectedLineLength = 0;
- if (fixedValueWidth == -1) {
- for (int i = 0; i < valueWidths.length; i++) {
- expectedLineLength += valueWidths[i];
- }
- }
- final FixedWidthConfiguration fixedWidthConfiguration = new FixedWidthConfiguration(
- FixedWidthConfiguration.NO_COLUMN_NAME_LINE, null, valueWidths, failOnInconsistentLineWidth);
- _parser = new FixedWidthLineParser(fixedWidthConfiguration, expectedLineLength, 0);
- }
+ for (final int _valueWidth : _valueWidths) {
+ expectedLineLength += _valueWidth;
+ }
-
- /***
- * Reads the next line in the file.
- *
- * @return an array of values in the next line, or null if the end of the
- * file has been reached.
- *
- * @throws IllegalStateException
- * if an exception occurs while reading the file.
- */
- public String[] readLine() throws IllegalStateException {
- String line;
+ _expectedLineLength = expectedLineLength;
+ }
+
+ /**
+ * This reads and returns the next record from the file. Usually, it is a line but in case the new line characters
+ * are not present, the length of the content depends on the column-widths setting.
+ *
+ * @return an array of values in the next line, or null if the end of the file has been reached.
+ * @throws IllegalStateException if an exception occurs while reading the file.
+ */
+ public String[] readLine() throws IllegalStateException {
try {
- line = _reader.readLine();
- return _parser.parseLine(line);
+ beforeReadLine();
+ _rowNumber++;
+ return getValues();
} catch (IOException e) {
throw new IllegalStateException(e);
}
- }
-
+ }
- @Override
- public void close() throws IOException {
- _reader.close();
- }
+ /**
+ * Empty hook that enables special behavior in sub-classed readers (by overriding this method).
+ */
+ protected void beforeReadLine() {
+ return;
+ }
+ private String[] getValues() throws IOException {
+ final List<String> values = new ArrayList<>();
+ final String singleRecordData = readSingleRecordData();
+
+ if (singleRecordData == null) {
+ return null;
+ }
+
+ processSingleRecordData(singleRecordData, values);
+ String[] result = values.toArray(new String[values.size()]);
+
+ if (!_failOnInconsistentLineWidth && !_constantWidth) {
+ result = correctResult(result);
+ }
+
+ validateConsistentValue(singleRecordData, result, values.size());
+
+ return result;
+ }
+
+ private void validateConsistentValue(String recordData, String[] result, int valuesSize) {
+ if (!_failOnInconsistentLineWidth) {
+ return;
+ }
+
+ InconsistentValueWidthException inconsistentValueException = null;
+
+ if (_constantWidth) {
+ if (recordData.length() % _fixedValueWidth != 0) {
+ inconsistentValueException = new InconsistentValueWidthException(result, recordData, _rowNumber);
+ }
+ } else if (result.length != valuesSize || recordData.length() != _expectedLineLength) {
+ inconsistentValueException = new InconsistentValueWidthException(result, recordData, _rowNumber);
+ }
+
+ if (inconsistentValueException != null) {
+ throw inconsistentValueException;
+ }
+ }
+
+ private void processSingleRecordData(final String singleRecordData, final List<String> values) {
+ StringBuilder nextValue = new StringBuilder();
+ final CharacterIterator it = new StringCharacterIterator(singleRecordData);
+ _valueIndex = 0;
+
+ for (char c = it.first(); c != CharacterIterator.DONE; c = it.next()) {
+ processCharacter(c, nextValue, values, singleRecordData);
+ }
+
+ if (nextValue.length() > 0) {
+ addNewValueIfAppropriate(values, nextValue);
+ }
+ }
+
+ String readSingleRecordData() throws IOException {
+ StringBuilder line = new StringBuilder();
+ int ch;
+
+ for (ch = _stream.read(); !isEndingCharacter(ch); ch = _stream.read()) {
+ line.append((char) ch);
+ }
+
+ if (ch == CARRIAGE_RETURN) {
+ readLineFeedIfFollows();
+ }
+
+ return (line.length()) > 0 ? line.toString() : null;
+ }
+
+ private void readLineFeedIfFollows() throws IOException {
+ _stream.mark(1);
+
+ if (_stream.read() != LINE_FEED) {
+ _stream.reset();
+ }
+ }
+
+ private boolean isEndingCharacter(int ch) {
+ return (ch == CARRIAGE_RETURN || ch == LINE_FEED || ch == END_OF_STREAM);
+ }
+
+ private void processCharacter(char c, StringBuilder nextValue, List<String> values, String recordData) {
+ nextValue.append(c);
+ final int valueWidth = getValueWidth(values, recordData);
+
+ if (nextValue.length() == valueWidth) {
+ addNewValueIfAppropriate(values, nextValue);
+ nextValue.setLength(0); // clear the buffer
+
+ if (_valueWidths != null) {
+ _valueIndex = (_valueIndex + 1) % _valueWidths.length;
+ }
+ }
+ }
+
+ private int getValueWidth(List<String> values, String recordData) {
+ if (_constantWidth) {
+ return _fixedValueWidth;
+ } else {
+ if (_valueIndex >= _valueWidths.length) {
+ if (_failOnInconsistentLineWidth) {
+ String[] result = values.toArray(new String[values.size()]);
+ throw new InconsistentValueWidthException(result, recordData, _rowNumber + 1);
+ } else {
+ return -1; // silently ignore the inconsistency
+ }
+ }
+
+ return _valueWidths[_valueIndex];
+ }
+ }
+
+ private void addNewValueIfAppropriate(List<String> values, StringBuilder nextValue) {
+ if (_valueWidths != null) {
+ if (values.size() < _valueWidths.length) {
+ values.add(nextValue.toString().trim());
+ }
+ } else {
+ values.add(nextValue.toString().trim());
+ }
+ }
+
+ private String[] correctResult(String[] result) {
+ if (result.length != _valueWidths.length) {
+ String[] correctedResult = new String[_valueWidths.length];
+
+ for (int i = 0; i < result.length && i < _valueWidths.length; i++) {
+ correctedResult[i] = result[i];
+ }
+
+ result = correctedResult;
+ }
+
+ return result;
+ }
+
+ @Override
+ public void close() throws IOException {
+ _stream.close();
+ }
}
diff --git a/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/EBCDICTest.java b/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/EBCDICTest.java
new file mode 100644
index 0000000..ea19960
--- /dev/null
+++ b/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/EBCDICTest.java
@@ -0,0 +1,77 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.metamodel.fixedwidth;
+
+import java.io.File;
+
+import org.apache.metamodel.data.DataSet;
+import org.apache.metamodel.schema.Schema;
+import org.apache.metamodel.schema.Table;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+public class EBCDICTest {
+ private static final int[] COLUMN_WIDTHS = new int[] { 2, 7, 10, 10 };
+ private static final long EXPECTED_ROWS_COUNT = 49; // 50 lines, 1. is a header
+ private static final String ENCODING = "IBM500";
+ private static final String[] EXPECTED_ROWS = new String[] {
+ "Row[values=[01, name-01, surname-01, address-01]]",
+ "Row[values=[02, name-02, surname-02, address-02]]",
+ "Row[values=[03, name-03, surname-03, address-03]]",
+ };
+ private final FixedWidthDataContext _context;
+ private final Table _table;
+
+ public EBCDICTest() {
+ String fileName = "fixed-width-2-7-10-10.ebc";
+ FixedWidthConfiguration configuration = new EbcdicConfiguration(FixedWidthConfiguration.NO_COLUMN_NAME_LINE,
+ ENCODING, COLUMN_WIDTHS, false, true, false);
+ _context = new FixedWidthDataContext(new File("src/test/resources/" + fileName), configuration);
+ Schema schema = _context.getDefaultSchema();
+ _table = schema.getTableByName(fileName);
+ }
+
+ @Test
+ public void testRowsCount() throws Exception {
+ long rows = 0;
+
+ try (final DataSet dataSet = _context.query().from(_table).selectCount().execute()) {
+ if (dataSet.next()) {
+ Object[] values = dataSet.getRow().getValues();
+ rows = (long) values[0];
+ }
+ }
+
+ assertEquals(EXPECTED_ROWS_COUNT, rows);
+ }
+
+ @Test
+ public void testFirstRows() throws Exception {
+ int limit = EXPECTED_ROWS.length;
+ int i = 0;
+
+ try (final DataSet dataSet = _context.query().from(_table).selectAll().limit(limit).execute()) {
+ while (dataSet.next()) {
+ assertEquals(EXPECTED_ROWS[i], dataSet.getRow().toString());
+ i++;
+ }
+ }
+ }
+}
diff --git a/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthConfigurationTest.java b/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthConfigurationTest.java
index 8225be0..f03d633 100644
--- a/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthConfigurationTest.java
+++ b/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthConfigurationTest.java
@@ -18,8 +18,6 @@
*/
package org.apache.metamodel.fixedwidth;
-import org.apache.metamodel.fixedwidth.FixedWidthConfiguration;
-
import junit.framework.TestCase;
public class FixedWidthConfigurationTest extends TestCase {
@@ -31,14 +29,11 @@
}
public void testEquals() throws Exception {
- FixedWidthConfiguration conf1 = new FixedWidthConfiguration(1, "UTF8",
- 10, true);
- FixedWidthConfiguration conf2 = new FixedWidthConfiguration(1, "UTF8",
- 10, true);
+ FixedWidthConfiguration conf1 = new FixedWidthConfiguration(1, "UTF8", 10, true);
+ FixedWidthConfiguration conf2 = new FixedWidthConfiguration(1, "UTF8", 10, true);
assertEquals(conf1, conf2);
- FixedWidthConfiguration conf3 = new FixedWidthConfiguration(1, "UTF8",
- 10, false);
+ FixedWidthConfiguration conf3 = new FixedWidthConfiguration(1, "UTF8", 10, false);
assertFalse(conf1.equals(conf3));
}
}
diff --git a/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthDataContextTest.java b/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthDataContextTest.java
index 2ac3680..7962cf6 100644
--- a/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthDataContextTest.java
+++ b/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthDataContextTest.java
@@ -25,9 +25,6 @@
import org.apache.metamodel.DataContext;
import org.apache.metamodel.data.DataSet;
-import org.apache.metamodel.fixedwidth.FixedWidthConfiguration;
-import org.apache.metamodel.fixedwidth.FixedWidthDataContext;
-import org.apache.metamodel.fixedwidth.InconsistentValueWidthException;
import org.apache.metamodel.query.Query;
import org.apache.metamodel.schema.Schema;
import org.apache.metamodel.schema.Table;
diff --git a/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthReaderTest.java b/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthReaderTest.java
index 4d11f0e..8f40c1d 100644
--- a/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthReaderTest.java
+++ b/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthReaderTest.java
@@ -18,11 +18,9 @@
*/
package org.apache.metamodel.fixedwidth;
-import static org.junit.Assert.assertEquals;
-
-import java.io.BufferedReader;
+import java.io.BufferedInputStream;
import java.io.File;
-import java.io.FileReader;
+import java.io.FileInputStream;
import java.io.IOException;
import java.util.Arrays;
@@ -30,7 +28,10 @@
import org.junit.Test;
import org.junit.rules.ExpectedException;
+import static org.junit.Assert.assertEquals;
+
public class FixedWidthReaderTest {
+ private static final String CHARSET = "UTF-8";
@Rule
public final ExpectedException exception = ExpectedException.none();
@@ -38,9 +39,9 @@
@Test
public void testBufferedReader1() throws IOException {
final File file = new File("src/test/resources/example_simple1.txt");
- final BufferedReader reader = new BufferedReader(new FileReader(file));
+ final BufferedInputStream stream = new BufferedInputStream(new FileInputStream(file));
int[] widths = new int[] { 8, 9 };
- try (final FixedWidthReader fixedWidthReader = new FixedWidthReader(reader, widths, false)) {
+ try (final FixedWidthReader fixedWidthReader = new FixedWidthReader(stream, CHARSET, widths, false)) {
final String[] line1 = fixedWidthReader.readLine();
assertEquals("[greeting, greeter]", Arrays.asList(line1).toString());
final String[] line2 = fixedWidthReader.readLine();
@@ -53,9 +54,9 @@
@Test
public void testBufferedReader2() throws IOException {
final File file = new File("src/test/resources/example_simple2.txt");
- final BufferedReader reader = new BufferedReader(new FileReader(file));
+ final BufferedInputStream stream = new BufferedInputStream(new FileInputStream(file));
int[] widths = new int[] {1, 8, 9 };
- try (final FixedWidthReader fixedWidthReader = new FixedWidthReader(reader, widths, false)) {
+ try (final FixedWidthReader fixedWidthReader = new FixedWidthReader(stream, CHARSET, widths, false)) {
final String[] line1 = fixedWidthReader.readLine();
assertEquals("[i, greeting, greeter]", Arrays.asList(line1).toString());
final String[] line2 = fixedWidthReader.readLine();
@@ -68,8 +69,8 @@
@Test
public void testBufferedReader3() throws IOException {
final File file = new File("src/test/resources/example_simple3.txt");
- final BufferedReader reader = new BufferedReader(new FileReader(file));
- try (final FixedWidthReader fixedWidthReader = new FixedWidthReader(reader, 5, false)) {
+ final BufferedInputStream stream = new BufferedInputStream(new FileInputStream(file));
+ try (final FixedWidthReader fixedWidthReader = new FixedWidthReader(stream, CHARSET, 5, false)) {
final String[] line1 = fixedWidthReader.readLine();
assertEquals("[hello]", Arrays.asList(line1).toString());
final String[] line2 = fixedWidthReader.readLine();
@@ -84,8 +85,8 @@
@Test
public void testBufferedReaderFailOnInconsistentRows() throws IOException {
final File file = new File("src/test/resources/example_simple3.txt");
- final BufferedReader reader = new BufferedReader(new FileReader(file));
- try (final FixedWidthReader fixedWidthReader = new FixedWidthReader(reader, 5, true)) {
+ final BufferedInputStream stream = new BufferedInputStream(new FileInputStream(file));
+ try (final FixedWidthReader fixedWidthReader = new FixedWidthReader(stream, CHARSET, 5, true)) {
final String[] line1 = fixedWidthReader.readLine();
assertEquals("[hello]", Arrays.asList(line1).toString());
final String[] line2 = fixedWidthReader.readLine();
@@ -98,6 +99,4 @@
final String[] line4 = fixedWidthReader.readLine();
}
}
-
-
}
diff --git a/fixedwidth/src/test/resources/fixed-width-2-7-10-10.ebc b/fixedwidth/src/test/resources/fixed-width-2-7-10-10.ebc
new file mode 100644
index 0000000..09fcc70
--- /dev/null
+++ b/fixedwidth/src/test/resources/fixed-width-2-7-10-10.ebc
@@ -0,0 +1 @@
+ÉÄÕÁÔÅ`ÉÄâäÙÕÁÔÅ`ÉÄÁÄÄÙÅââ`ÉÄðñ
`ðñ¢¤
`ðñ
¢¢`ðñðò
`ðò¢¤
`ðò
¢¢`ðòðó
`ðó¢¤
`ðó
¢¢`ðóðô
`ðô¢¤
`ðô
¢¢`ðôðõ
`ðõ¢¤
`ðõ
¢¢`ðõðö
`ðö¢¤
`ðö
¢¢`ðöð÷
`ð÷¢¤
`ð÷
¢¢`ð÷ðø
`ðø¢¤
`ðø
¢¢`ðøðù
`ðù¢¤
`ðù
¢¢`ðùñð
`ñð¢¤
`ñð
¢¢`ñðññ
`ññ¢¤
`ññ
¢¢`ñññò
`ñò¢¤
`ñò
¢¢`ñòñó
`ñó¢¤
`ñó
¢¢`ñóñô
`ñô¢¤
`ñô
¢¢`ñôñõ
`ñõ¢¤
`ñõ
¢¢`ñõñö
`ñö¢¤
`ñö
¢¢`ñöñ÷
`ñ÷¢¤
`ñ÷
¢¢`ñ÷ñø
`ñø¢¤
`ñø
¢¢`ñøñù
`ñù¢¤
`ñù
¢¢`ñùòð
`òð¢¤
`òð
¢¢`òðòñ
`òñ¢¤
`òñ
¢¢`òñòò
`òò¢¤
`òò
¢¢`òòòó
`òó¢¤
`òó
¢¢`òóòô
`òô¢¤
`òô
¢¢`òôòõ
`òõ¢¤
`òõ
¢¢`òõòö
`òö¢¤
`òö
¢¢`òöò÷
`ò÷¢¤
`ò÷
¢¢`ò÷òø
`òø¢¤
`òø
¢¢`òøòù
`òù¢¤
`òù
¢¢`òùóð
`óð¢¤
`óð
¢¢`óðóñ
`óñ¢¤
`óñ
¢¢`óñóò
`óò¢¤
`óò
¢¢`óòóó
`óó¢¤
`óó
¢¢`óóóô
`óô¢¤
`óô
¢¢`óôóõ
`óõ¢¤
`óõ
¢¢`óõóö
`óö¢¤
`óö
¢¢`óöó÷
`ó÷¢¤
`ó÷
¢¢`ó÷óø
`óø¢¤
`óø
¢¢`óøóù
`óù¢¤
`óù
¢¢`óùôð
`ôð¢¤
`ôð
¢¢`ôðôñ
`ôñ¢¤
`ôñ
¢¢`ôñôò
`ôò¢¤
`ôò
¢¢`ôòôó
`ôó¢¤
`ôó
¢¢`ôóôô
`ôô¢¤
`ôô
¢¢`ôôôõ
`ôõ¢¤
`ôõ
¢¢`ôõôö
`ôö¢¤
`ôö
¢¢`ôöô÷
`ô÷¢¤
`ô÷
¢¢`ô÷ôø
`ôø¢¤
`ôø
¢¢`ôøôù
`ôù¢¤
`ôù
¢¢`ôù
\ No newline at end of file