blob: 7b96d4daefd6acd78943eae3c5a55a5fa68d2438 [file] [log] [blame]
/*******************************************************************************
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*******************************************************************************/
package org.ofbiz.datafile;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.Stack;
/**
* Record Iterator for reading large files
* Note: this is a memory intensive and will not handle files that exceed memory.
*
*/
public class RecordIterator {
public static final String module = RecordIterator.class.getName();
protected BufferedReader br;
protected ModelDataFile modelDataFile;
protected InputStream dataFileStream;
protected boolean closed = false;
protected String locationInfo;
protected int nextLineNum = 0;
protected String curLine = null;
protected Record curRecord = null;
protected String nextLine = null;
protected Record nextRecord = null;
protected String eof = "\u001A"; // aka ASCII char 26, aka substitute, aka 0x1A, aka CTRL-Z, aka EOF DOS character. Added because problems in some DOS file, specifically file extracted from zip archives.
public RecordIterator(URL fileUrl, ModelDataFile modelDataFile) throws DataFileException {
this.modelDataFile = modelDataFile;
InputStream urlStream = null;
try {
urlStream = fileUrl.openStream();
} catch (IOException e) {
throw new DataFileException("Error open URL: " + fileUrl.toString(), e);
}
this.setupStream(urlStream, fileUrl.toString());
}
public RecordIterator(InputStream dataFileStream, ModelDataFile modelDataFile, String locationInfo) throws DataFileException {
this.modelDataFile = modelDataFile;
this.setupStream(dataFileStream, locationInfo);
}
protected void setupStream(InputStream dataFileStream, String locationInfo) throws DataFileException {
this.locationInfo = locationInfo;
this.dataFileStream = dataFileStream;
try {
this.br = new BufferedReader(new InputStreamReader(dataFileStream, "UTF-8"));
} catch (Exception e) {
throw new DataFileException("UTF-8 is not supported");
}
// get the line seeded
this.getNextLine();
}
protected boolean getNextLine() throws DataFileException {
this.nextLine = null;
this.nextRecord = null;
boolean isFixedRecord = ModelDataFile.SEP_FIXED_RECORD.equals(modelDataFile.separatorStyle);
boolean isDelimited = ModelDataFile.SEP_DELIMITED.equals(modelDataFile.separatorStyle);
// if (Debug.infoOn()) Debug.logInfo("[DataFile.readDataFile] separatorStyle is " + modelDataFile.separatorStyle + ", isFixedRecord: " + isFixedRecord, module);
if (isFixedRecord) {
if (modelDataFile.recordLength <= 0) {
throw new DataFileException("Cannot read a fixed record length file if no record length is specified");
}
try {
char[] charData = new char[modelDataFile.recordLength + 1];
// if (Debug.infoOn()) Debug.logInfo("[DataFile.readDataFile] reading line " + lineNum + " from position " + (lineNum-1)*modelDataFile.recordLength + ", length is " + modelDataFile.recordLength, module);
if (br.read(charData, 0, modelDataFile.recordLength) == -1) {
nextLine = null;
// Debug.logInfo("[DataFile.readDataFile] found end of file, got -1", module);
} else {
nextLine = new String(charData);
// if (Debug.infoOn()) Debug.logInfo("[DataFile.readDataFile] read line " + lineNum + " line is: \"" + line + "\"", module);
}
} catch (IOException e) {
throw new DataFileException("Error reading line #" + nextLineNum + " (index " + (nextLineNum - 1) * modelDataFile.recordLength + " length " +
modelDataFile.recordLength + ") from location: " + locationInfo, e);
}
} else {
try {
nextLine = br.readLine();
//Debug.logInfo("br.readLine()=\"" + nextLine + "\"", module);
} catch (IOException e) {
throw new DataFileException("Error reading line #" + nextLineNum + " from location: " + locationInfo, e);
}
}
//if (nextLine != null && !(eof.equals(nextLine.substring(0,1)) && 1 == nextLine.length())) {
if (nextLine != null && !((nextLine.contains(eof) ) )) {
nextLineNum++;
ModelRecord modelRecord = findModelForLine(nextLine, nextLineNum, modelDataFile);
if (isDelimited) {
this.nextRecord = Record.createDelimitedRecord(nextLine, nextLineNum, modelRecord, modelDataFile.delimiter, modelDataFile.textDelimiter);
} else {
this.nextRecord = Record.createRecord(nextLine, nextLineNum, modelRecord);
}
return true;
} else {
this.close();
return false;
}
}
public int getCurrentLineNumber() {
return this.nextLineNum - 1;
}
public boolean hasNext() {
//return nextLine != null && !(eof.equals(nextLine.substring(0,1)) && 1 == nextLine.length());
return nextLine != null && !((nextLine.contains(eof) ) );
}
public Record next() throws DataFileException {
if (!hasNext()) {
return null;
}
if (ModelDataFile.SEP_DELIMITED.equals(modelDataFile.separatorStyle) || ModelDataFile.SEP_FIXED_RECORD.equals(modelDataFile.separatorStyle) || ModelDataFile.SEP_FIXED_LENGTH.equals(modelDataFile.separatorStyle)) {
boolean isFixedRecord = ModelDataFile.SEP_FIXED_RECORD.equals(modelDataFile.separatorStyle);
// if (Debug.infoOn()) Debug.logInfo("[DataFile.readDataFile] separatorStyle is " + modelDataFile.separatorStyle + ", isFixedRecord: " + isFixedRecord, module);
// advance the line (we have already checked to make sure there is a next line
this.curLine = this.nextLine;
this.curRecord = this.nextRecord;
// get a new next line
this.getNextLine();
// first check to see if the file type has a line size, and if so if this line complies
if (!isFixedRecord && modelDataFile.recordLength > 0 && curLine.length() != modelDataFile.recordLength) {
throw new DataFileException("Line number " + this.getCurrentLineNumber() + " was not the expected length; expected: " + modelDataFile.recordLength + ", got: " + curLine.length());
}
// if this record has children, put it on the parentStack and get/check the children now
if (this.curRecord.getModelRecord().childRecords.size() > 0) {
Stack<Record> parentStack = new Stack<Record>();
parentStack.push(curRecord);
while (this.nextRecord != null && this.nextRecord.getModelRecord().parentRecord != null) {
// if parent equals top parent on stack, add to that parents child list, otherwise pop off parent and try again
Record parentRecord = null;
while (parentStack.size() > 0) {
parentRecord = parentStack.peek();
if (parentRecord.recordName.equals(this.nextRecord.getModelRecord().parentName)) {
break;
} else {
parentStack.pop();
parentRecord = null;
}
}
if (parentRecord == null) {
throw new DataFileException("Expected Parent Record not found for line " + this.getCurrentLineNumber() + "; record name of expected parent is " + this.nextRecord.getModelRecord().parentName);
}
parentRecord.addChildRecord(this.nextRecord);
// if the child record we just added is also a parent, push it onto the stack
if (this.nextRecord.getModelRecord().childRecords.size() > 0) {
parentStack.push(this.nextRecord);
}
// if it can't find a next line it will nextRecord will be null and the loop will break out
this.getNextLine();
}
}
} else {
throw new DataFileException("Separator style " + modelDataFile.separatorStyle + " not recognized.");
}
return curRecord;
}
public void close() throws DataFileException {
if (this.closed) {
return;
}
try {
this.br.close(); // this should also close the stream
this.closed = true;
} catch (IOException e) {
throw new DataFileException("Error closing data file input stream", e);
}
}
/** Searches through the record models to find one with a matching type-code, if no type-code exists that model will always be used if it gets to it
* @param line
* @param lineNum
* @param modelDataFile
* @throws DataFileException Exception thown for various errors, generally has a nested exception
* @return return the ModelRecord Object found
*/
protected static ModelRecord findModelForLine(String line, int lineNum, ModelDataFile modelDataFile) throws DataFileException {
// if (Debug.infoOn()) Debug.logInfo("[DataFile.findModelForLine] line: " + line, module);
ModelRecord modelRecord = null;
for (ModelRecord curModelRecord: modelDataFile.records) {
if (curModelRecord.tcPosition < 0) {
modelRecord = curModelRecord;
break;
}
String typeCode = line.substring(curModelRecord.tcPosition, curModelRecord.tcPosition + curModelRecord.tcLength);
// try to match with a single typecode
if (curModelRecord.typeCode.length() > 0) {
// if (Debug.infoOn()) Debug.logInfo("[DataFile.findModelForLine] Doing plain typecode match - code=" + curModelRecord.typeCode + ", filelinecode=" + typeCode, module);
if (typeCode != null && typeCode.equals(curModelRecord.typeCode)) {
modelRecord = curModelRecord;
break;
}
} // try to match a ranged typecode (tcMin <= typeCode <= tcMax)
else if (curModelRecord.tcMin.length() > 0 || curModelRecord.tcMax.length() > 0) {
if (curModelRecord.tcIsNum) {
// if (Debug.infoOn()) Debug.logInfo("[DataFile.findModelForLine] Doing ranged number typecode match - minNum=" + curModelRecord.tcMinNum + ", maxNum=" + curModelRecord.tcMaxNum + ", filelinecode=" + typeCode, module);
long typeCodeNum = Long.parseLong(typeCode);
if ((curModelRecord.tcMinNum < 0 || typeCodeNum >= curModelRecord.tcMinNum) &&
(curModelRecord.tcMaxNum < 0 || typeCodeNum <= curModelRecord.tcMaxNum)) {
modelRecord = curModelRecord;
break;
}
} else {
// if (Debug.infoOn()) Debug.logInfo("[DataFile.findModelForLine] Doing ranged String typecode match - min=" + curModelRecord.tcMin + ", max=" + curModelRecord.tcMax + ", filelinecode=" + typeCode, module);
if ((typeCode.compareTo(curModelRecord.tcMin) >= 0) && (typeCode.compareTo(curModelRecord.tcMax) <= 0)) {
modelRecord = curModelRecord;
break;
}
}
}
}
if (modelRecord == null) {
throw new DataFileException("Could not find record definition for line " + lineNum + "; first bytes: " +
line.substring(0, (line.length() > 5) ? 5 : line.length()));
}
// if (Debug.infoOn()) Debug.logInfo("[DataFile.findModelForLine] Got record model named " + modelRecord.name, module);
return modelRecord;
}
}