| /************************************************************** |
| * |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| * |
| *************************************************************/ |
| |
| |
| /* |
| * SDFReader.java |
| * |
| * |
| */ |
| |
| package com.sun.star.tooling.converter; |
| |
| import java.io.File; |
| import java.io.FileInputStream; |
| import java.io.IOException; |
| import java.io.InputStreamReader; |
| import java.text.DecimalFormat; |
| import java.util.*; |
| |
| /** |
| * |
| * SDFReader is a FileReader that knows about |
| * the content of SDFFiles |
| * |
| * A SDFBlock is read of the given file. A SDFBlock |
| * consists of all SDFLines that are traanslations of the |
| * same String and the SDFLine containing the source string |
| * itself. SDFFile lines are read and checked whether they |
| * have the allowed column count and don't contain illeagal |
| * characters (like most unprintable characters below 0x00df). |
| * If the given source language is not found in the first |
| * block of SDFLines a ConverterException is thrown at runtime. |
| * If the given target language is "" (that means not given) |
| * the first language that is not the given source language |
| * is taken for target language. The found values are returned in HashMaps that |
| * use the following keys: |
| * <br/> |
| * "BlockNr" originally coming from reading the sdf file, contains 'block nr in sdf file'+'-'+'hash value of the sdf id fields'.<br/> |
| * "Project" first column in sdf file format.<br/> |
| * "SourceFile" second column in sdf file format.<br/> |
| * "Dummy" third column in sdf file format.<br/> |
| * "ResType" 4. column in sdf file format.<br/> |
| * "GID" 5. column in sdf file format. <br/> |
| * "LID" 6. column in sdf file format.<br/> |
| * "HID" 7. column in sdf file format.<br/> |
| * "Platform" 8. column in sdf file format. <br/> |
| * "Width", 9. column in sdf file format.<br/> |
| * "SourceLanguageID" 10. column in sdf file format(in the line with the source language).<br/> |
| * "SourceText" 11. column in sdf file format(in the line with the source language).<br/> |
| * "SourceHText" 12. column in sdf file format(in the line with the source language).<br/> |
| * "SourceQText" 13. column in sdf file format(in the line with the source language).<br/> |
| * "SourceTitle" 14. column in sdf file format(in the line with the source language).<br/> |
| * "TargetLanguageID" 10. column in sdf file format (in the line with the target language).<br/> |
| * "TargetText" 11. column in sdf file format (in the line with the target language).<br/> |
| * "TargetHText" 12. column in sdf file format (in the line with the target language).<br/> |
| * "TargetQText" 13. column in sdf file format (in the line with the target language).<br/> |
| * "TargetTitle", 14. column in sdf file format (in the line with the target language).<br/> |
| * "TimeStamp" 15. column in sdf file format.<br/> |
| * |
| * @author Christian Schmidt 2005 |
| * |
| */ |
| |
| public class SDFReader extends DataReader { |
| |
| /** |
| * an array of the SDF files column names |
| */ |
| final static String[] fieldnames = { "Project", "SourceFile", "Dummy", |
| "ResType", "GID", "LID", "HID", "Platform", "Width", "LanguageID", |
| "Text", "HText", "QText", "Title", "TimeStamp" }; |
| |
| /** |
| * an array of the SDF files column names if the source language is in |
| */ |
| final static String[] sourceLineNames = { "Project", "SourceFile", "Dummy", |
| "ResType", "GID", "LID", "HID", "Platform", "Width", |
| "SourceLanguageID", "SourceText", "SourceHText", "SourceQText", |
| "SourceTitle", "TimeStamp" }; |
| |
| /** |
| * an array of the SDF files column names if the target language is in |
| */ |
| final static String[] targetLineNames = { "Project", "SourceFile", "Dummy", |
| "ResType", "GID", "LID", "HID", "Platform", "Width", |
| "TargetLanguageID", "TargetText", "TargetHText", "TargetQText", |
| "TargetTitle", "TimeStamp" }; |
| |
| final static String EMPTY = new String(""); |
| |
| private int dotCount = 0; |
| |
| /** |
| * a Map containing an SDF line with source language |
| */ |
| private Map sourceMap; |
| |
| /** |
| * a Map containing an SDF line with target language |
| */ |
| private Map targetMap; |
| |
| /** |
| * a Map containing an SDF |
| */ |
| private Map SDFMap; |
| |
| /** |
| * a Map Array containing one SDF source language line and one SDF target |
| * language line |
| */ |
| private Map[] data = { sourceMap, targetMap }; |
| |
| /** |
| * The Id of the current SDFBlock |
| */ |
| private String CurrentBlockId; |
| |
| /** |
| * The SDF file to read from |
| */ |
| private File sourceFile; |
| |
| /** |
| * The language in the source file that should be handelt as source language |
| */ |
| protected String sourceLanguage; |
| |
| /** |
| * The language in the source file that should be handelt as target language |
| */ |
| protected String targetLanguage; |
| |
| /** |
| * A counter holding the number of blocks just read |
| * from this file |
| */ |
| private long blockNr = 0;// If we use Integer, more then numbers greater than 128k would be signed |
| |
| /** |
| * A counter holding the number of skipped lines that means |
| * lines that can not be worked with because they contain an error |
| */ |
| private int skippedLines = 0; |
| |
| /** |
| * This switch is set for indicating that all source file lines |
| * are read and no lines remain buffered. Finding this like 'true' |
| * means the source file is finished |
| */ |
| private boolean endIt = false; |
| |
| /** |
| * Indicates whether the targetLanguage is found in this source file so far |
| */ |
| private boolean foundTarget = false; |
| /** |
| * Indicates whether the sourceLanguage is found in this source file so far |
| */ |
| private boolean foundSource = false; |
| |
| /** |
| * Counts how many lines were skipped because the language is |
| * neither sourceLanguage nor targetLanguage |
| */ |
| private int langMiss; |
| |
| /** |
| * Indicates whether there is a line in the read buffer or not |
| */ |
| private boolean useBuffer = false; |
| |
| /** |
| * A buffer for SDFLines |
| */ |
| private String lineBuffer; |
| |
| /** |
| * The buffer for the already splitted SDFLines |
| */ |
| private String[] splittedLineBuffer; |
| |
| /** |
| * Counts how many Blocks were skipped |
| * f.e. because no sourceLanguage is found |
| * in it |
| */ |
| private int skippedBlocks; |
| |
| /** |
| * Counts the blocks without targetLanguage |
| */ |
| private int targetLangMiss; |
| |
| /** |
| * Counts the blocks without sourceLanguage |
| */ |
| private int sourceLangMiss; |
| |
| /** |
| * Counts the lines where no targetLanguage line was found |
| * and so empty lines were created |
| */ |
| private int targetLangCreate; |
| |
| |
| DecimalFormat blockNrFormatter = new DecimalFormat("000000"); |
| |
| /** |
| * The hashcode of the current block |
| */ |
| private int CurrentBlockHash; |
| |
| private boolean skip; |
| |
| /** |
| * Create a new Instance of SDFREader |
| * |
| * @param source the file to read from |
| * @param sourceLanguage the sourceLanguage (must not be empty) |
| * @param targetLanguage the targetLanguage |
| * @param charset the charset used to read source |
| * @throws java.io.IOException |
| * @throws Exception |
| */ |
| public SDFReader(File source, String sourceLanguage, String targetLanguage, |
| String charset) throws java.io.IOException { |
| super(new InputStreamReader(new FileInputStream(source), charset)); |
| sourceFile = source; |
| this.sourceLanguage = sourceLanguage; |
| this.targetLanguage = targetLanguage; |
| String line; |
| String[] splitLine; |
| //read first line to get the first |
| //SDF block id |
| mark(16000); |
| if ((line = readLine()) != null) { |
| if ((splitLine = split(line)) != null){ |
| this.CurrentBlockId = getSDFBlockId(splitLine); |
| this.CurrentBlockHash=this.CurrentBlockId.hashCode(); |
| //found the first |
| this.blockNr++; |
| } |
| this.splittedLineBuffer = splitLine; |
| } |
| reset(); |
| |
| } |
| |
| /* (non-Javadoc) |
| * @see com.sun.star.tooling.converter.DataReader#getData() |
| */ |
| public Map getData()throws IOException { |
| |
| Map map=new HashMap(); |
| // do { |
| this.skip=false; |
| Map[] help=readBlock(); |
| if(help==null||help[1]==null||help[0]==null){ |
| return null; |
| // }else if (help[1].get("TargetLanguageID")==null||help[0].get("SourceLanguageID")==null) { |
| // OutputHandler.log("Missing Language Id in block "+blockNr+"\nthe block is skipped." ); |
| // this.skippedBlocks++; |
| // this.skip=true; |
| }else{ |
| map.putAll(help[1]); |
| map.putAll(help[0]); |
| |
| } |
| // }while(this.skip=true); |
| return map; |
| } |
| |
| |
| /** |
| * Read a Block from the sdf file and return |
| * @return a Map[] where [0] holds the source and [1] the target language data. |
| * |
| * @throws java.io.IOException |
| */ |
| public Map[] readBlock() throws java.io.IOException { |
| String line = EMPTY; |
| String blockId = EMPTY; |
| String[] splittedLine = null; |
| data[0]=new ExtMap(); |
| data[1]=new ExtMap(); |
| String help; |
| String c = null; |
| //read next line or use buffered line |
| while (useBuffer || (line = readLine()) != null) { //works because '||' is shortcut |
| |
| try { |
| // buffer used? |
| if (useBuffer) { |
| line = this.lineBuffer; |
| splittedLine = this.splittedLineBuffer; |
| this.SDFMap = new ExtMap(SDFReader.fieldnames, splittedLine); |
| |
| try { |
| checkLanguage(splittedLine); |
| } catch (ConverterException e) { |
| throw e; |
| }finally{ |
| |
| useBuffer = false; |
| } |
| } else { |
| //...are there wrong characters? |
| if ((check(line)).length() < line.length()) { |
| throw new LineErrorException(getLineNumber() |
| + " : Line contains wrong character " |
| //+ Integer.toHexString(Integer.parseInt(c)) |
| + " \n" + line); |
| } |
| //...is the number of columns ok? |
| if ((splittedLine = split(line)) == null) { |
| |
| throw new LineErrorException(super.getLineNumber() |
| + " : Line has wrong column number \n" + line); |
| //continue; |
| } else { |
| // TODO makeDot is better in Data Handler |
| makeDot(); |
| // ...is this line in a new SDF block ? |
| if ((blockId = getSDFBlockId(splittedLine)) |
| .equals(CurrentBlockId)) { |
| |
| this.SDFMap = new ExtMap(SDFReader.fieldnames, |
| splittedLine); |
| //what language is in it ? |
| checkLanguage(splittedLine); |
| |
| } else { |
| |
| /* |
| * we found the next block , but do we have the |
| * target text? |
| */ |
| |
| if (!foundTarget) { |
| createTargetLine(); |
| } |
| |
| blockNr++; |
| splittedLineBuffer = splittedLine;//read one line |
| // too much so |
| // buffer it |
| lineBuffer = line; |
| useBuffer = true;//reset(); |
| |
| this.CurrentBlockId = blockId; |
| this.CurrentBlockHash=this.CurrentBlockId.hashCode(); |
| |
| /* ...and what about the source text ? */ |
| if (!foundSource) { |
| |
| OutputHandler |
| .log("Error in Line:" |
| + getLineNumber() |
| + "Source Language is missing maybe " |
| + "previous block has an error.\nBlock " |
| + (blockNr - 1) |
| + " is skipped. before line: \n" |
| + line); |
| |
| foundTarget = false;//no target without source |
| skippedBlocks++; |
| skippedLines++; |
| sourceLangMiss++; |
| continue;// skip output of this block if no |
| // source language is found |
| |
| } |
| |
| break; |
| |
| } |
| } |
| |
| } |
| |
| } catch (LineErrorException e) { |
| |
| OutputHandler.log(e.getMessage()); |
| this.skippedLines++; |
| } catch (ConverterException e) { |
| OutputHandler.log(e.getMessage()); |
| } |
| |
| } |
| // did we read the whole stuff? |
| if (null != line) { |
| // no |
| foundSource = false; |
| foundTarget = false; |
| |
| return this.data; |
| |
| } else { |
| // ok , its the end but is everything written now? |
| if (!endIt) { |
| // there is something to write |
| // but next time we can end it |
| endIt = true; |
| if(!foundTarget){ |
| createTargetLine(); |
| } |
| // write |
| return this.data;//last lines |
| } else { |
| |
| showStat(); |
| return null; |
| } |
| } |
| // }catch(ConverterException e) { |
| // Converter.log(e.getMessage()); |
| // return null; |
| // } |
| } |
| |
| /** |
| * |
| */ |
| private void createTargetLine() { |
| targetLangMiss++; |
| // if not, create one ... |
| data[1] = new ExtMap(SDFReader.targetLineNames, |
| splittedLineBuffer); |
| data[1].put("TargetLanguageID", |
| this.targetLanguage); |
| if ((String) data[1].get("TargetText") != EMPTY) |
| data[1].put("TargetText", EMPTY); |
| if ((String) data[1].get("TargetHText") != EMPTY) |
| data[1].put("TargetHText", EMPTY); |
| if ((String) data[1].get("TargetQText") != EMPTY) |
| data[1].put("TargetQText", EMPTY); |
| if ((String) data[1].get("TargetTitle") != EMPTY) |
| data[1].put("TargetTitle", EMPTY); |
| this.data[1].put("BlockNr", blockNrFormatter.format(blockNr)+'-'+Integer.toString(this.CurrentBlockHash)); |
| targetLangCreate++; |
| } |
| |
| /** |
| * Show the statistic information got while |
| * reading the file |
| * |
| * @throws IOException |
| */ |
| private void showStat() throws IOException { |
| OutputHandler.out(EMPTY);OutputHandler.out(EMPTY); |
| // OutputHandler.out("Hashes: " + (theHashes.size()) + " "); |
| OutputHandler.out("Blocks found: " + blockNr + " "); |
| OutputHandler.out(EMPTY); |
| OutputHandler.out("Lines read: " + (getLineNumber()) + " "); |
| OutputHandler |
| .dbg("Lines created " + (targetLangCreate) + " "); |
| OutputHandler.dbg(" -------"); |
| OutputHandler.dbg("Lines total: " |
| + (getLineNumber() + targetLangCreate) + " "); |
| OutputHandler.dbg("Lines skipped: " + skippedLines + " "); |
| |
| OutputHandler.dbg("Source Language misses: " + sourceLangMiss + " "); |
| OutputHandler.dbg("Target Language misses: " + targetLangMiss + " "); |
| OutputHandler.dbg("Blocks found: " + blockNr + " "); |
| OutputHandler.dbg("Blocks skipped: " + skippedBlocks + " "); |
| if ((sourceLangMiss + skippedBlocks + skippedLines) > 0) |
| OutputHandler.out("\n---! Errors found !--- view Logfile.\n\n" |
| + "To enable logfile use -l option at command line.\n" |
| + "For help type 'convert -h {Enter}'.\n"); |
| } |
| |
| /** |
| * Check the current line whether the source language |
| * or target language is in it |
| * |
| * @throws ConverterException if a not needed language or no target language is found |
| * in this block |
| * @throws IOException |
| * |
| */ |
| final private void checkLanguage(String[] splittedLine) |
| throws ConverterException, IOException { |
| String langID = (String) SDFMap.get("LanguageID"); |
| |
| //maybe the source language is in this line |
| if (!foundSource && this.sourceLanguage.equals(langID)) { |
| // found the next source language line |
| this.data[0] = new ExtMap(SDFReader.sourceLineNames, splittedLine); |
| |
| // this.data[0].put("BlockNr", Integer.toHexString(blockNr)); |
| // this.data[0].put("BlockHash", Integer.toHexString(this.CurrentBlockHash)); |
| this.data[0].put("BlockNr", blockNrFormatter.format(blockNr)+'-'+Integer.toHexString(this.CurrentBlockHash)); |
| // this.data[0].put("BlockHash", blockHashFormatter.format(this.CurrentBlockHash)); |
| foundSource = true; |
| return; |
| |
| } else { |
| // or the target language is in this line |
| if (!foundTarget) { |
| //no target language is given at command line |
| if (this.targetLanguage.equals(EMPTY)) { |
| //try if we can use the current lines language for target |
| // language |
| if (!langID.equals(this.sourceLanguage)) { |
| //yes , we can use this lines laanguage as target |
| this.targetLanguage = langID; |
| |
| //source and target language both are known: show it |
| OutputHandler.out("Source Language is: " |
| + this.sourceLanguage + " "); |
| OutputHandler.out("Target Language is: " |
| + this.targetLanguage + " "); |
| OutputHandler.out(EMPTY); |
| System.out.println("Start"); |
| |
| } else { |
| throw new ConverterException("(" + getLineNumber() |
| + ") No target language found: " |
| + this.targetLanguage); |
| } |
| } |
| if (this.targetLanguage.equals(langID)) { |
| this.data[1] = new ExtMap(SDFReader.targetLineNames, |
| splittedLine);// found the next target language line |
| this.data[1].put("BlockNr", blockNrFormatter.format(blockNr)+'-'+Integer.toHexString(this.CurrentBlockHash)); |
| foundTarget = true; |
| |
| return; |
| } |
| }//end !foundTarget |
| } |
| //if we go here we dont need the found language... |
| throw new ConverterException("found not needed language '" |
| + this.SDFMap.get("LanguageID") + "' in Line: " |
| + getLineNumber()); |
| |
| } |
| |
| /** |
| * Make a dot on the screen to show the user that it is going on |
| */ |
| private void makeDot() { |
| int count = 0; |
| if ((count = (int) super.getLineNumber() / 1000) > this.dotCount) { |
| this.dotCount = count; |
| OutputHandler.printDot(); |
| } |
| |
| } |
| |
| /** |
| * split the SDFLine in its columns |
| * |
| * @param line the current SDFLine |
| * @return the splitted SDFLine as array of String |
| * or null if an error occours |
| * @throws IOException |
| */ |
| private String[] split(String line) throws IOException { |
| check(line); |
| String[] splitLine; |
| if ((splitLine = line.split("\t")).length == 15) |
| return splitLine; |
| else |
| //an error occured |
| return null; |
| } |
| |
| /** |
| * create a block Id from a splitted SDFLine |
| * the blockId consists of the column one to eight of an SDF File |
| * |
| * @param splitLine the line to create a block id from |
| * @return the blockId as String |
| */ |
| private String getSDFBlockId(String[] splitLine) { |
| StringBuffer BlockId = new StringBuffer(""); |
| for (int i = 0; i < 8; i++) { |
| BlockId.append(splitLine[i]); |
| } |
| return BlockId.toString(); |
| } |
| |
| // public final boolean canRead() { |
| // return this.sourceFile.canRead(); |
| // } |
| |
| /** |
| * Check if there are not allowed characters in this line |
| * |
| * @param line the SDFLine to check |
| * @return if everything, ok the original |
| * else the wrong character as String |
| * |
| * @throws java.io.IOException |
| */ |
| private String check(String line) throws java.io.IOException { |
| char c = ' '; |
| for (int i = 0; i < line.length(); i++) { |
| c = line.charAt(i); |
| if (c < 30 && c != 9) { |
| return (new Character(c)).toString(); |
| } |
| |
| } |
| return line; |
| |
| } |
| } |