| /******************************************************************************* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| *******************************************************************************/ |
| package org.apache.ofbiz.datafile; |
| |
| |
| import java.io.BufferedReader; |
| import java.io.IOException; |
| import java.io.InputStream; |
| import java.io.InputStreamReader; |
| import java.net.URL; |
| import java.util.Stack; |
| |
| /** |
| * Record Iterator for reading large files |
| * Note: this is a memory intensive and will not handle files that exceed memory. |
| * |
| */ |
| |
| public class RecordIterator { |
| |
| public static final String module = RecordIterator.class.getName(); |
| |
| protected BufferedReader br; |
| protected ModelDataFile modelDataFile; |
| protected InputStream dataFileStream; |
| protected boolean closed = false; |
| protected String locationInfo; |
| |
| protected int nextLineNum = 0; |
| protected String curLine = null; |
| protected Record curRecord = null; |
| protected String nextLine = null; |
| protected Record nextRecord = null; |
| protected String eof = "\u001A"; // aka ASCII char 26, aka substitute, aka 0x1A, aka CTRL-Z, aka EOF DOS character. Added because problems in some DOS file, specifically file extracted from zip archives. |
| |
| public RecordIterator(URL fileUrl, ModelDataFile modelDataFile) throws DataFileException { |
| this.modelDataFile = modelDataFile; |
| |
| InputStream urlStream = null; |
| try { |
| urlStream = fileUrl.openStream(); |
| } catch (IOException e) { |
| throw new DataFileException("Error open URL: " + fileUrl.toString(), e); |
| } |
| this.setupStream(urlStream, fileUrl.toString()); |
| } |
| |
| public RecordIterator(InputStream dataFileStream, ModelDataFile modelDataFile, String locationInfo) throws DataFileException { |
| this.modelDataFile = modelDataFile; |
| this.setupStream(dataFileStream, locationInfo); |
| } |
| |
| protected void setupStream(InputStream dataFileStream, String locationInfo) throws DataFileException { |
| this.locationInfo = locationInfo; |
| this.dataFileStream = dataFileStream; |
| try { |
| this.br = new BufferedReader(new InputStreamReader(dataFileStream, "UTF-8")); |
| } catch (Exception e) { |
| throw new DataFileException("UTF-8 is not supported"); |
| } |
| // get the line seeded |
| this.getNextLine(); |
| } |
| |
| protected boolean getNextLine() throws DataFileException { |
| this.nextLine = null; |
| this.nextRecord = null; |
| |
| boolean isFixedRecord = ModelDataFile.SEP_FIXED_RECORD.equals(modelDataFile.separatorStyle); |
| boolean isDelimited = ModelDataFile.SEP_DELIMITED.equals(modelDataFile.separatorStyle); |
| // if (Debug.infoOn()) Debug.logInfo("[DataFile.readDataFile] separatorStyle is " + modelDataFile.separatorStyle + ", isFixedRecord: " + isFixedRecord, module); |
| |
| if (isFixedRecord) { |
| if (modelDataFile.recordLength <= 0) { |
| throw new DataFileException("Cannot read a fixed record length file if no record length is specified"); |
| } |
| try { |
| char[] charData = new char[modelDataFile.recordLength + 1]; |
| |
| // if (Debug.infoOn()) Debug.logInfo("[DataFile.readDataFile] reading line " + lineNum + " from position " + (lineNum-1)*modelDataFile.recordLength + ", length is " + modelDataFile.recordLength, module); |
| if (br.read(charData, 0, modelDataFile.recordLength) == -1) { |
| nextLine = null; |
| // Debug.logInfo("[DataFile.readDataFile] found end of file, got -1", module); |
| } else { |
| nextLine = new String(charData); |
| // if (Debug.infoOn()) Debug.logInfo("[DataFile.readDataFile] read line " + lineNum + " line is: \"" + line + "\"", module); |
| } |
| } catch (IOException e) { |
| throw new DataFileException("Error reading line #" + nextLineNum + " (index " + (nextLineNum - 1) * modelDataFile.recordLength + " length " + |
| modelDataFile.recordLength + ") from location: " + locationInfo, e); |
| } |
| } else { |
| try { |
| nextLine = br.readLine(); |
| //Debug.logInfo("br.readLine()=\"" + nextLine + "\"", module); |
| } catch (IOException e) { |
| throw new DataFileException("Error reading line #" + nextLineNum + " from location: " + locationInfo, e); |
| } |
| } |
| |
| //if (nextLine != null && !(eof.equals(nextLine.substring(0,1)) && 1 == nextLine.length())) { |
| if (nextLine != null && !((nextLine.contains(eof) ) )) { |
| nextLineNum++; |
| ModelRecord modelRecord = findModelForLine(nextLine, nextLineNum, modelDataFile); |
| if (isDelimited) { |
| this.nextRecord = Record.createDelimitedRecord(nextLine, nextLineNum, modelRecord, modelDataFile.delimiter, modelDataFile.textDelimiter); |
| } else { |
| this.nextRecord = Record.createRecord(nextLine, nextLineNum, modelRecord); |
| } |
| return true; |
| } else { |
| this.close(); |
| return false; |
| } |
| } |
| |
| public int getCurrentLineNumber() { |
| return this.nextLineNum - 1; |
| } |
| |
| public boolean hasNext() { |
| //return nextLine != null && !(eof.equals(nextLine.substring(0,1)) && 1 == nextLine.length()); |
| return nextLine != null && !((nextLine.contains(eof) ) ); |
| } |
| |
| public Record next() throws DataFileException { |
| if (!hasNext()) { |
| return null; |
| } |
| |
| if (ModelDataFile.SEP_DELIMITED.equals(modelDataFile.separatorStyle) || ModelDataFile.SEP_FIXED_RECORD.equals(modelDataFile.separatorStyle) || ModelDataFile.SEP_FIXED_LENGTH.equals(modelDataFile.separatorStyle)) { |
| boolean isFixedRecord = ModelDataFile.SEP_FIXED_RECORD.equals(modelDataFile.separatorStyle); |
| // if (Debug.infoOn()) Debug.logInfo("[DataFile.readDataFile] separatorStyle is " + modelDataFile.separatorStyle + ", isFixedRecord: " + isFixedRecord, module); |
| // advance the line (we have already checked to make sure there is a next line |
| this.curLine = this.nextLine; |
| this.curRecord = this.nextRecord; |
| |
| // get a new next line |
| this.getNextLine(); |
| |
| // first check to see if the file type has a line size, and if so if this line complies |
| if (!isFixedRecord && modelDataFile.recordLength > 0 && curLine.length() != modelDataFile.recordLength) { |
| throw new DataFileException("Line number " + this.getCurrentLineNumber() + " was not the expected length; expected: " + modelDataFile.recordLength + ", got: " + curLine.length()); |
| } |
| |
| // if this record has children, put it on the parentStack and get/check the children now |
| if (this.curRecord.getModelRecord().childRecords.size() > 0) { |
| Stack<Record> parentStack = new Stack<Record>(); |
| parentStack.push(curRecord); |
| |
| while (this.nextRecord != null && this.nextRecord.getModelRecord().parentRecord != null) { |
| // if parent equals top parent on stack, add to that parents child list, otherwise pop off parent and try again |
| Record parentRecord = null; |
| |
| while (parentStack.size() > 0) { |
| parentRecord = parentStack.peek(); |
| if (parentRecord.recordName.equals(this.nextRecord.getModelRecord().parentName)) { |
| break; |
| } else { |
| parentStack.pop(); |
| parentRecord = null; |
| } |
| } |
| if (parentRecord == null) { |
| throw new DataFileException("Expected Parent Record not found for line " + this.getCurrentLineNumber() + "; record name of expected parent is " + this.nextRecord.getModelRecord().parentName); |
| } |
| parentRecord.addChildRecord(this.nextRecord); |
| |
| // if the child record we just added is also a parent, push it onto the stack |
| if (this.nextRecord.getModelRecord().childRecords.size() > 0) { |
| parentStack.push(this.nextRecord); |
| } |
| // if it can't find a next line it will nextRecord will be null and the loop will break out |
| this.getNextLine(); |
| } |
| } |
| } else { |
| throw new DataFileException("Separator style " + modelDataFile.separatorStyle + " not recognized."); |
| } |
| return curRecord; |
| } |
| |
| public void close() throws DataFileException { |
| if (this.closed) { |
| return; |
| } |
| try { |
| this.br.close(); // this should also close the stream |
| this.closed = true; |
| } catch (IOException e) { |
| throw new DataFileException("Error closing data file input stream", e); |
| } |
| } |
| |
| |
| /** Searches through the record models to find one with a matching type-code, if no type-code exists that model will always be used if it gets to it |
| * @param line |
| * @param lineNum |
| * @param modelDataFile |
| * @throws DataFileException Exception thown for various errors, generally has a nested exception |
| * @return return the ModelRecord Object found |
| */ |
| protected static ModelRecord findModelForLine(String line, int lineNum, ModelDataFile modelDataFile) throws DataFileException { |
| // if (Debug.infoOn()) Debug.logInfo("[DataFile.findModelForLine] line: " + line, module); |
| ModelRecord modelRecord = null; |
| |
| for (ModelRecord curModelRecord: modelDataFile.records) { |
| if (curModelRecord.tcPosition < 0) { |
| modelRecord = curModelRecord; |
| break; |
| } |
| String typeCode = line.substring(curModelRecord.tcPosition, curModelRecord.tcPosition + curModelRecord.tcLength); |
| |
| // try to match with a single typecode |
| if (curModelRecord.typeCode.length() > 0) { |
| // if (Debug.infoOn()) Debug.logInfo("[DataFile.findModelForLine] Doing plain typecode match - code=" + curModelRecord.typeCode + ", filelinecode=" + typeCode, module); |
| if (typeCode != null && typeCode.equals(curModelRecord.typeCode)) { |
| modelRecord = curModelRecord; |
| break; |
| } |
| } // try to match a ranged typecode (tcMin <= typeCode <= tcMax) |
| else if (curModelRecord.tcMin.length() > 0 || curModelRecord.tcMax.length() > 0) { |
| if (curModelRecord.tcIsNum) { |
| // if (Debug.infoOn()) Debug.logInfo("[DataFile.findModelForLine] Doing ranged number typecode match - minNum=" + curModelRecord.tcMinNum + ", maxNum=" + curModelRecord.tcMaxNum + ", filelinecode=" + typeCode, module); |
| long typeCodeNum = Long.parseLong(typeCode); |
| if ((curModelRecord.tcMinNum < 0 || typeCodeNum >= curModelRecord.tcMinNum) && |
| (curModelRecord.tcMaxNum < 0 || typeCodeNum <= curModelRecord.tcMaxNum)) { |
| modelRecord = curModelRecord; |
| break; |
| } |
| } else { |
| // if (Debug.infoOn()) Debug.logInfo("[DataFile.findModelForLine] Doing ranged String typecode match - min=" + curModelRecord.tcMin + ", max=" + curModelRecord.tcMax + ", filelinecode=" + typeCode, module); |
| if ((typeCode.compareTo(curModelRecord.tcMin) >= 0) && (typeCode.compareTo(curModelRecord.tcMax) <= 0)) { |
| modelRecord = curModelRecord; |
| break; |
| } |
| } |
| } |
| } |
| |
| if (modelRecord == null) { |
| throw new DataFileException("Could not find record definition for line " + lineNum + "; first bytes: " + |
| line.substring(0, (line.length() > 5) ? 5 : line.length())); |
| } |
| // if (Debug.infoOn()) Debug.logInfo("[DataFile.findModelForLine] Got record model named " + modelRecord.name, module); |
| return modelRecord; |
| } |
| } |