blob: c6600b9efa470e7e459f4d4942c59bec465b9654 [file] [log] [blame]
/**************************************************************
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*************************************************************/
/*
* XLIFFReader.java
*
*
*/
package com.sun.star.tooling.converter;
import java.io.IOException;
import java.util.Hashtable;
import java.util.Map;
import com.sun.star.tooling.languageResolver.LanguageResolver;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.helpers.AttributesImpl;
import org.xml.sax.helpers.DefaultHandler;
/**
* Parse the given file and extract the content needed.
* <br/>
* This Reader understands the parts of the
* <a href="http://www.oasis-open.org/committees/xliff/documents/cs-xliff-core-1.1-20031031.htm">xliff</a> spezification used to translate
* the strings in Star-Office and Open-Office.
* <br/>
* The given file is parsed and the content is stored in a HashMap with those keys:
* <br/>
* "BlockNr" originally coming from reading the sdf file, contains 'block nr in sdf file'+'-'+'hash value of the sdf id fields'.<br/>
* "Project" first column in sdf file format.<br/>
* "SourceFile" second column in sdf file format.<br/>
* "Dummy" third column in sdf file format.<br/>
* "ResType" 4. column in sdf file format.<br/>
* "GID" 5. column in sdf file format. <br/>
* "LID" 6. column in sdf file format.<br/>
* "HID" 7. column in sdf file format.<br/>
* "Platform" 8. column in sdf file format. <br/>
* "Width", 9. column in sdf file format.<br/>
* "SourceLanguageID" 10. column in sdf file format(in the line with the source language).<br/>
* "SourceText" 11. column in sdf file format(in the line with the source language).<br/>
* "SourceHText" 12. column in sdf file format(in the line with the source language).<br/>
* "SourceQText" 13. column in sdf file format(in the line with the source language).<br/>
* "SourceTitle" 14. column in sdf file format(in the line with the source language).<br/>
* "TargetLanguageID" 10. column in sdf file format (in the line with the target language).<br/>
* "TargetText" 11. column in sdf file format (in the line with the target language).<br/>
* "TargetHText" 12. column in sdf file format (in the line with the target language).<br/>
* "TargetQText" 13. column in sdf file format (in the line with the target language).<br/>
* "TargetTitle", 14. column in sdf file format (in the line with the target language).<br/>
* "TimeStamp" 15. column in sdf file format.<br/>
* @
* @author Christian Schmidt 2005
*
*/
public class XLIFFReader extends DefaultHandler {
/**
* A String array holding the keys used by the HashMap holding the Data
*/
private final String[] dataNames = { "BlockNr", "Project",
"SourceFile", "Dummy", "ResType", "GID", "LID", "HID", "Platform",
"Width", "SourceLanguageID", "SourceText", "SourceHText",
"SourceQText", "SourceTitle", "TargetLanguageID", "TargetText",
"TargetHText", "TargetQText", "TargetTitle", "TimeStamp" };
/**
* Used to index in the data array
*/
static int index = 0;
/**
* The Map that holds the data returned by this class
*/
private Map moveData = new ExtMap();
/**
* A Map that holds yet incomplete data
* until all depending transunits are found
*/
private Hashtable DataStore = new Hashtable();
/**
* An Elements name
*/
private String name = new String("");
/**
*List of Attributes used by an Element
*/
private Attributes attrs;
// private String tagElement = new String("");
/**
* Indicates whether the next found content string should be printed
*/
private boolean printThis = false;
/**
* Indicates whether the next found content string should be stored
*/
private boolean storeIt = false;
/**
* data holds the information created while parsing
*
*/
private String[] data = new String[26];
/**
* The handler used by this class
*/
private final DataHandler handler;
/**
* The target used by this class
*/
private final DataWriter target;
// private boolean searchForText = false;
/**
* counts how many dots are made
*/
private int dotCount;
/**
* Counts how many Trans Units are read
*/
private int transUnitCounter;
/**
* used source Language
*/
private String sourceLanguage;
/**
* used target language
*/
private String targetLanguage;
/**
* indicates whether this is the first Transunit
*/
private boolean isFirst = true;
private static final String EMPTY = new String("");
/**
* the last index in data where something is written
*/
private int oldindex;
// private boolean isBptEptTag;
// private String innerString;
//
// private String key;
/**
* Index for the BlockNr in the data array
*/
private static final int BLOCKNR_IDX = 0;
/**
* Index for the Project in the data array
*/
private static final int PROJECT_IDX = 1;
/**
* Index for the Sourcefile name in the data array
*/
private static final int SOURCEFILE_IDX = 2;
/**
* Index for the 'dummy' in the data array
*/
private static final int DUMMY_IDX = 3;
/**
* Index for the Group Id in the data array
*/
private static final int GID_IDX = 4;
/**
* Index for the Local Id in the data array
*/
private static final int LID_IDX = 5;
/**
* Index for the Help Id in the data array
*/
private static final int HID_IDX = 6;
/**
* Index for the Platform in the data array
*/
private static final int PLATFORM_IDX = 7;
/**
* Index for the 'Width' in the data array
*/
private static final int WIDTH_IDX = 8;
/**
* Index for the Sourcelanguage Id in the data array
*/
private static final int SOURCE_LANGUAGE_ID_IDX = 10;
/**
* Index for the Source Text in the data array
*/
private static final int SOURCE_TEXT_IDX = 11;
/**
* Index for the Source Helptext in the data array
*/
private static final int SOURCE_HELPTEXT_IDX = 12;
/**
* Index for the Source Quickhelp Text in the data array
*/
private static final int SOURCE_QUICK_HELPTEXT_IDX = 13;
/**
* Index for the Source Titletext in the data array
*/
private static final int SOURCE_TITLETEXT_IDX = 14;
/**
* Index for the Timestamp in the data array
*/
private static final int TIMESTAMP_IDX = 15;
/**
* Index for the res type in the data array
*/
private static final int RESTYPE_IDX = 16;
/**
* Index for the Target Language Id in the data array
*/
private static final int TARGET_LANGUAGE_ID_IDX = 20;
/**
* Index for the Target Text in the data array
*/
private static final int TARGET_TEXT_IDX = 21;
/**
* Index for the Target Helptext in the data array
*/
private static final int TARGET_HELP_TEXT_IDX = 22;
/**
* Index for the Target Quickhelp Text in the data array
*/
private static final int TARGET_QUICKHELP_TEXT_IDX = 23;
/**
* Index for the Target Titletext in the data array
*/
private static final int TARGET_TITLE_TEXT_IDX = 24;
/**
* Index for the Found Parts Counter in the data array
*/
private static final int FOUND_PARTS_COUNTER_IDX = 18;
/**
* used to find the matching ISO or RFC3066 language code
*/
LanguageResolver languageResolver;
private boolean doBlockCompleteCheck=true;
/**
* Create a new Instance of XLIFFReader
*
* @param handler the DataHandler to use
* @param target the target used
* @throws IOException
*/
public XLIFFReader(DataHandler handler, DataWriter target) throws IOException {
this.languageResolver = new LanguageResolver();
this.handler = handler;
this.target = target;
}
/**
* Create a new Instance of XLIFFReader
*
* @param handler the DataHandler to use
* @param target the target used
* @param doBlockCompleteCheck indicates whether every single transunit should be returned or the whole block data is to be collected
*
* @throws IOException
*/
public XLIFFReader(DataHandler handler, DataWriter target,boolean doBlockCompleteCheck) throws IOException {
this(handler, target);
this.languageResolver = new LanguageResolver();
this.doBlockCompleteCheck=doBlockCompleteCheck;
}
/**
* delete and initialize the data content
*/
public void initData() {
for (int i = BLOCKNR_IDX; i < SOURCE_LANGUAGE_ID_IDX; i++) {
data[i] = "";
}
for (int i = SOURCE_TEXT_IDX; i < TIMESTAMP_IDX; i++) { // skip Time Stamp
data[i] = "";
}
for (int i = RESTYPE_IDX; i < TARGET_LANGUAGE_ID_IDX; i++) { // skip Source language ID
data[i] = "";
}
for (int i = TARGET_TEXT_IDX; i < 26; i++) {// skip Target language ID,
data[i] = "";
}
data[DUMMY_IDX] = "0";//dummy
data[FOUND_PARTS_COUNTER_IDX] = "1";//parts found
}
/** (non-Javadoc)
* @see org.xml.sax.ContentHandler#startDocument()
*/
public void startDocument() {
initData();
//System.out.print("Start");
}
/** (non-Javadoc)
* @see org.xml.sax.ContentHandler#endDocument()
*/
public void endDocument() {
try {
showStatistic();
} catch (IOException e) {
OutputHandler.log(e.getMessage());
}
}
/** (non-Javadoc)
* @throws SAXException
* @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
*/
public void startElement(String namespaceURI, String sName, String qName,
Attributes attrs) throws SAXException {
this.name = new String(qName);
this.attrs = new AttributesImpl(attrs);
String resType;
String attributeName = new String("");
String attribute = new String("");
String tagElement = new String("");
int i;
if (qName.equals("bpt")||qName.equals("ept")||qName.equals("sub")||qName.equals("ex")) {
//ignore bpt, ept, ex and sub tags
// content of the tags will be stored
storeIt=true;
return;
}
if (qName.equals("target")) {
if ((resType = data[RESTYPE_IDX]) == null) {
} else {
if ("res".equals(resType)) {
index = TARGET_TEXT_IDX;
storeIt = true;
return;
}
// if("res-Help".equals(resType)){
// index=TARGET_HELP_TEXT_IDX;
// storeIt=true;
// return;
// }
if ("res-QuickHelp".equals(resType)) {
index = TARGET_QUICKHELP_TEXT_IDX;
storeIt = true;
return;
}
if ("res-Title".equals(resType)) {
index = TARGET_TITLE_TEXT_IDX;
storeIt = true;
return;
}
}
}
if (qName.equals("source")) {
if ((resType = data[RESTYPE_IDX]) == null) {
//throw new SAXException("Ressource type not found");
} else {
if ("res".equals(resType)) {
index = SOURCE_TEXT_IDX;
storeIt = true;
return;
}
// if("res-Help".equals(resType)){
// index=SOURCEHELPTEXT_IDX;
// storeIt=true;
// return;
// }
if ("res-QuickHelp".equals(resType)) {
index = SOURCE_QUICK_HELPTEXT_IDX;
storeIt = true;
return;
}
if ("res-Title".equals(resType)) {
index = SOURCE_TITLETEXT_IDX;
storeIt = true;
return;
}
}
}
if (qName.equals("file")) {
data[TIMESTAMP_IDX] = attrs.getValue("date");
//data[17]=(attrs.getValue("original"));
try{
data[SOURCE_LANGUAGE_ID_IDX] = (languageResolver.getISOFromRFC((String)attrs.getValue("source-language")));
if(languageResolver.getISOFromRFC((String)attrs.getValue("target-language"))!=null){
data[TARGET_LANGUAGE_ID_IDX] = (languageResolver.getISOFromRFC((String)attrs.getValue("target-language")));
}
}catch(Exception e){
OutputHandler.log(e.getMessage());
}
return;
}
if (qName.equals("trans-unit")) {
String id = attrs.getValue("id");
if ((DataStore.get(id)) != null) {
//TODO arraycopy might not be nessessary
System.arraycopy((String[]) DataStore.get(id), 0, data, 0,
data.length);
int help = (new Integer(data[FOUND_PARTS_COUNTER_IDX])).intValue(); //found one more part
help++; // refresh the actual found parts
data[FOUND_PARTS_COUNTER_IDX] = (new Integer(help)).toString(); // belonging to this information
DataStore.remove(attrs.getValue("id")); // TODO this can be deleted?
} else {
data[BLOCKNR_IDX] = (attrs.getValue("id")); // a new part
}
data[RESTYPE_IDX] = (attrs.getValue("restype"));
return;
}
if (qName.equals("context")) {
String value = attrs.getValue("context-type");
if ("SourceHelpText".equals(value)) {
index = SOURCE_HELPTEXT_IDX;
storeIt = true;
return;
}else if ("TargetHelpText".equals(value)) {
index = TARGET_HELP_TEXT_IDX;
storeIt = true;
return;
}else if ("DBType".equals(value)) {
//index=SOURCEFILE_IDX;
//storeIt=true;
return;
}else if ("Project".equals(value)) {
index = PROJECT_IDX;
storeIt = true;
return;
}else if ("Filename".equals(value)) {
index = SOURCEFILE_IDX;
storeIt = true;
return;
}else if ("Type".equals(value)) {
index = RESTYPE_IDX;
storeIt = true;
return;
}else if ("GID".equals(value)) {
index = GID_IDX;
storeIt = true;
return;
}else if ("LID".equals(value)) {
index = LID_IDX;
storeIt = true;
return;
}else if ("HID".equals(value)) {
index = HID_IDX;
storeIt = true;
return;
}else if ("Platform".equals(value)) {
index = PLATFORM_IDX;
storeIt = true;
return;
}else if ("Width".equals(value)) {
index = WIDTH_IDX;
storeIt = true;
return;
}
}
}
/** (non-Javadoc)
* @see org.xml.sax.ContentHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
*/
public void endElement(String namespaceURI, String sName, String qName)
throws SAXException {
//we ignore bpt and ept tags
if(!(qName.equals("bpt")||qName.equals("ept")||qName.equals("sub")||qName.equals("ex"))){
storeIt = false;
}
if (qName.equals("trans-unit")) {
showData();
}
}
/** (non-Javadoc)
* @see org.xml.sax.ContentHandler#characters(char[], int, int)
*/
public void characters(char[] ch, int start, int length) {
// checkContent();
String str2 = new String(ch, start, length);
if (storeIt) {
String str = new String(ch, start, length);
if (index == oldindex) {
data[index] += str;
} else {
data[index] = str;
}
}
oldindex = index;
}
/** (non-Javadoc)
* @see org.xml.sax.ErrorHandler#error(org.xml.sax.SAXParseException)
*/
public void error(SAXParseException e) throws SAXParseException {
OutputHandler.log(e.getMessage());
}
/** (non-Javadoc)
* @see org.xml.sax.ErrorHandler#fatalError(org.xml.sax.SAXParseException)
*/
public void fatalError(SAXParseException e) throws SAXParseException {
OutputHandler.log("PARSE ERROR in line " + e.getLineNumber() + ", "
+ e.getMessage() );
}
/** (non-Javadoc)
* @see org.xml.sax.ErrorHandler#warning(org.xml.sax.SAXParseException)
*/
public void warning(SAXParseException e) throws SAXParseException {
//throw e;
OutputHandler.log(e.getMessage());
}
/**
* Put the Data to the DataHandler
* tell the Writer to write it
*
* @throws SAXException
*/
public void showData() throws SAXException {
transUnitCounter++;
makeDot();
if (isComplete()) {
try {
moveData();
if (isFirst == true) {
this.sourceLanguage = (String) this.moveData
.get("SourceLanguageID");
this.targetLanguage = (String) this.moveData
.get("TargetLanguageID");
OutputHandler.out(EMPTY);
OutputHandler.out("Source Language is: "
+ this.sourceLanguage);
OutputHandler.out("Target Language is: "
+ this.targetLanguage);
OutputHandler.out(EMPTY);
OutputHandler.out("Start");
OutputHandler.out(EMPTY);
isFirst = false;
}
target.getDataFrom(handler);
target.writeData();
} catch (java.io.IOException e) {
throw new SAXException(e);
}
} else {
DataStore.put(data[BLOCKNR_IDX], data.clone());
initData();
}
initData();
}
/**
* put the data in an Map in the format that
* DataHandler can handle it
*/
final public void moveData() {
moveData.put("BlockNr", data[BLOCKNR_IDX]);
moveData.put("Project", data[PROJECT_IDX]);
moveData.put("SourceFile", data[SOURCEFILE_IDX]);
moveData.put("Dummy", "0");
moveData.put("ResType", data[RESTYPE_IDX]);
moveData.put("GID", data[GID_IDX]);
moveData.put("LID", data[LID_IDX]);
moveData.put("HID", data[HID_IDX]);
moveData.put("Platform", data[PLATFORM_IDX]);
if (EMPTY.equals(data[WIDTH_IDX]))
data[WIDTH_IDX] = "0";
moveData.put("Width", data[WIDTH_IDX]);
moveData.put("SourceLanguageID", data[SOURCE_LANGUAGE_ID_IDX]);
moveData.put("SourceText", data[SOURCE_TEXT_IDX]);
moveData.put("SourceHText", data[SOURCE_HELPTEXT_IDX]);
moveData.put("SourceQText", data[SOURCE_QUICK_HELPTEXT_IDX]);
moveData.put("SourceTitle", data[SOURCE_TITLETEXT_IDX]);
moveData.put("TargetLanguageID", data[TARGET_LANGUAGE_ID_IDX]);
moveData.put("TargetText", data[TARGET_TEXT_IDX]);
moveData.put("TargetHText", data[TARGET_HELP_TEXT_IDX]);
moveData.put("TargetQText", data[TARGET_QUICKHELP_TEXT_IDX]);
moveData.put("TargetTitle", data[TARGET_TITLE_TEXT_IDX]);
moveData.put("TimeStamp", data[TIMESTAMP_IDX]);
//and give it to the data handler
this.handler.fillDataWith(moveData);
}
/**
* complete means all depending parts have been found esp. all res types
* that belong to the same SDF Line
*
* @return true if the data is complete
*
*/
final public boolean isComplete() {
if(!doBlockCompleteCheck){
return true;
}
String sParts;
if (data[FOUND_PARTS_COUNTER_IDX] == EMPTY)
data[FOUND_PARTS_COUNTER_IDX] = "1"; //this is the first part
String sFoundParts = data[FOUND_PARTS_COUNTER_IDX];
//create the new 'id'
sParts = data[BLOCKNR_IDX].substring(data[BLOCKNR_IDX].lastIndexOf(":") + 1);
if (sFoundParts.equals(sParts)) {
return true;
}
return false;
}
// TODO this belongs in OutputHandler
/**
* show the user that it is going
* on by printing dots on the screen
*
*/
private void makeDot() {
int count = 0;
if ((count = (int) this.transUnitCounter / 1000) > this.dotCount) {
this.dotCount = count;
OutputHandler.printDot();
}
}
/**
* show the statistic data found while parse this file
*
* @throws IOException
*/
final void showStatistic() throws IOException {
OutputHandler.out(EMPTY);
OutputHandler.out("TransUnits found: " + this.transUnitCounter);
// every data in DataStore is
// skipped 'cause its not complete
// TODO count really every transunit not only the data (might consist of
// more than one
OutputHandler.dbg("TransUnits skip : " + this.DataStore.size());
//Converter.out(EMPTY);
}
}