blob: c3baee0c568b28759836083c84af8c04ac9f3e31 [file] [log] [blame]
/**************************************************************
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*************************************************************/
package org.apache.openoffice.ooxml.schema;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Queue;
import java.util.Set;
import java.util.Vector;
import javax.xml.stream.XMLStreamException;
import org.apache.openoffice.ooxml.schema.automaton.FiniteAutomatonContainer;
import org.apache.openoffice.ooxml.schema.automaton.NonValidatingCreator;
import org.apache.openoffice.ooxml.schema.automaton.ValidatingCreator;
import org.apache.openoffice.ooxml.schema.generator.LogGenerator;
import org.apache.openoffice.ooxml.schema.generator.ParserTablesGenerator;
import org.apache.openoffice.ooxml.schema.generator.html.HtmlGenerator;
import org.apache.openoffice.ooxml.schema.model.schema.Schema;
import org.apache.openoffice.ooxml.schema.model.schema.SchemaBase;
import org.apache.openoffice.ooxml.schema.parser.SchemaParser;
import org.apache.openoffice.ooxml.schema.simple.SimpleTypeContainer;
public class SchemaReader
{
public static void main (final String ... aArgumentList)
{
if (aArgumentList.length != 1)
{
System.err.printf("usage: SchemaParser <driver-file>\n");
System.err.printf(" driver file can contain these lines:\n");
System.err.printf("# Comments\n");
System.err.printf(" are ignored\n");
System.err.printf("schema <mark> <file-name>\n");
System.err.printf(" specifies a top-level schema file to read\n");
System.err.printf("output-schema <file-name>\n");
System.err.printf(" write schema information to file\n");
System.err.printf("output-optimized-schema <file-name>\n");
System.err.printf(" write information about optimized schema to file\n");
System.exit(1);
}
final SchemaReader aReader = new SchemaReader(new File(aArgumentList[0]));
aReader.Run();
}
private SchemaReader (final File aDriverFile)
{
maSchemaBase = new SchemaBase();
maTopLevelSchemas = new HashMap<>();
maMainSchemaFiles = new Vector<>();
maSchemaFiles = new HashSet<>();
maWorkList = new LinkedList<>();
maOutputOperations = new Vector<>();
mnTotalLineCount = 0;
mnTotalByteCount = 0;
ParseDriverFile(aDriverFile);
}
/** Read and parse the driver file that specifies which schema files to read
* and where the output should go.
*/
private void ParseDriverFile (final File aDriverFile)
{
if (aDriverFile == null || ! aDriverFile.exists() || ! aDriverFile.canRead())
{
System.err.printf("can not read driver file\n");
System.exit(1);
}
try
{
final BufferedReader aIn = new BufferedReader(new FileReader(aDriverFile));
while(true)
{
String sLine = aIn.readLine();
if (sLine == null)
break;
// Lines starting with # are comment lines and are ignored.
if (sLine.matches("^\\s*#.*"))
continue;
// Lines containing only whitespace are also ignored.
else if (sLine.matches("^\\s*$"))
continue;
// Handle line continuation.
while (sLine.endsWith("\\"))
sLine = sLine.substring(0, sLine.length()-1) + aIn.readLine();
final Vector<String> aParts = SplitLine(sLine);
switch (aParts.get(0))
{
case "schema":
maMainSchemaFiles.add(new String[]{aParts.get(1), aParts.get(2)});
break;
case "output-schema":
maOutputOperations.add(new Runnable()
{
final File maFile = CreateCheckedOutputFile(aParts.get(1));
@Override public void run()
{
WriteSchema(maFile);
}
});
break;
case "output-optimized-schema":
maOutputOperations.add(new Runnable()
{
final File maFile = CreateCheckedOutputFile(aParts.get(1));
@Override public void run()
{
WriteOptimizedSchema(maFile);
}
});
break;
case "output-nonvalidating-parse-tables":
maOutputOperations.add(new Runnable()
{
final File aAutomatonLogFile = CreateCheckedOutputFile(aParts.get(1));
final File aSimpleTypeLogFile = CreateCheckedOutputFile(aParts.get(2));
final File aParseTableFile = CreateCheckedOutputFile(aParts.get(3));
@Override public void run()
{
WriteNonValidatingParseTables(
aAutomatonLogFile,
aSimpleTypeLogFile,
aParseTableFile);
}
});
break;
case "output-validating-parse-tables":
maOutputOperations.add(new Runnable()
{
final File aAutomatonLogFile = CreateCheckedOutputFile(aParts.get(1));
final File aSimpleTypeLogFile = CreateCheckedOutputFile(aParts.get(2));
final File aParseTableFile = CreateCheckedOutputFile(aParts.get(3));
@Override public void run()
{
WriteValidatingParseTables(
aAutomatonLogFile,
aSimpleTypeLogFile,
aParseTableFile);
}
});
break;
case "output-html-page":
maOutputOperations.add(new Runnable()
{
final File aHTMLPageFile = CreateCheckedOutputFile(aParts.get(1));
@Override public void run()
{
WriteHTMLPage(aHTMLPageFile);
}
});
break;
default:
System.err.printf("unknown command '%s' in driver file", aParts.get(0));
System.exit(1);
}
}
aIn.close();
}
catch (final Exception aException)
{
aException.printStackTrace();
}
}
private void Run ()
{
try
{
ParseSchemaFiles();
}
catch (final Exception aException)
{
aException.printStackTrace();
}
maOptimizedSchemaBase = maSchemaBase.GetOptimizedSchema(maTopLevelSchemas.values());
for (final Entry<String, Schema> aEntry : maTopLevelSchemas.entrySet())
aEntry.setValue(aEntry.getValue().GetOptimizedSchema(maOptimizedSchemaBase));
System.out.printf(" optimization left %d complex types and %d simple types\n",
maOptimizedSchemaBase.ComplexTypes.GetCount(),
maOptimizedSchemaBase.SimpleTypes.GetCount());
for (final Runnable aOperation : maOutputOperations)
{
aOperation.run();
}
}
private void ParseSchemaFiles ()
throws XMLStreamException
{
System.out.printf("parsing %d main schema files\n", maMainSchemaFiles.size());
for (final String[] aEntry : maMainSchemaFiles)
{
final String sMainSchemaShortname = aEntry[0];
final String sMainSchemaFile = aEntry[1];
final File aMainSchemaFile = new File(sMainSchemaFile);
if ( ! aMainSchemaFile.exists())
{
System.err.printf(" schema file does not exist\n");
System.exit(1);
}
if ( ! aMainSchemaFile.canRead())
{
System.err.printf("can not read schema file\n");
System.exit(1);
}
final Schema aSchema = new Schema(sMainSchemaShortname, maSchemaBase);
ParseSchemaFile(sMainSchemaFile, aSchema);
maTopLevelSchemas.put(sMainSchemaShortname, aSchema);
}
long nStartTime = System.currentTimeMillis();
while ( ! maWorkList.isEmpty())
{
ParseSchemaFile(maWorkList.poll(), null);
}
long nEndTime = System.currentTimeMillis();
System.out.printf("parsed %d schema files with a total of %d lines and %d bytes in %fs\n",
maSchemaFiles.size(),
mnTotalLineCount,
mnTotalByteCount,
(nEndTime-nStartTime)/1000.0);
System.out.printf(" found %d complex types and %d simple types\n",
maSchemaBase.ComplexTypes.GetCount(),
maSchemaBase.SimpleTypes.GetCount());
int nTopLevelElementCount = 0;
for (final Schema aSchema : maTopLevelSchemas.values())
nTopLevelElementCount += aSchema.TopLevelElements.GetCount();
System.out.printf(" the %d top level schemas have %d elements\n",
maTopLevelSchemas.size(),
nTopLevelElementCount);
}
private void ParseSchemaFile (
final String sSchemaFilename,
final Schema aSchema)
throws XMLStreamException
{
System.out.printf("parsing %s\n", sSchemaFilename);
maSchemaFiles.add(sSchemaFilename);
final SchemaParser aParser = new SchemaParser(new File(sSchemaFilename), aSchema, maSchemaBase);
aParser.Parse();
mnTotalLineCount += aParser.GetLineCount();
mnTotalByteCount += aParser.GetByteCount();
for (final File aFile : aParser.GetImportedSchemaFilenames())
AddSchemaReference(aFile.getAbsolutePath());
}
private void AddSchemaReference (final String sSchemaFilename)
{
if ( ! maSchemaFiles.contains(sSchemaFilename))
{
if (sSchemaFilename == null)
throw new RuntimeException();
// We don't know yet the file name of the schema, so just store null to mark the schema name as 'known'.
maSchemaFiles.add(sSchemaFilename);
maWorkList.add(sSchemaFilename);
}
}
/** Split the given string at whitespace but not at whitespace inside double quotes.
*
*/
private Vector<String> SplitLine (final String sLine)
{
final Vector<String> aParts = new Vector<>();
boolean bIsInsideQuotes = false;
for (final String sPart : sLine.split("\""))
{
if (bIsInsideQuotes)
aParts.add(sPart);
else
for (final String sInnerPart : sPart.split("\\s+"))
{
if (sInnerPart == null)
throw new RuntimeException();
else if ( ! sInnerPart.isEmpty())
aParts.add(sInnerPart);
}
bIsInsideQuotes = ! bIsInsideQuotes;
}
return aParts;
}
/** Create a File object for a given file name.
* Check that the file is writable, i.e. its directory exists and that if
* the file already exists it can be replaced.
* Throws a RuntimeException when a check fails.
*/
private File CreateCheckedOutputFile (final String sFilename)
{
final File aFile = new File(sFilename);
if ( ! aFile.getParentFile().exists())
throw new RuntimeException("directory of "+sFilename+" does not exist: can not create file");
if (aFile.exists() && ! aFile.canWrite())
throw new RuntimeException("file "+sFilename+" already exists and can not be replaced");
return aFile;
}
private void WriteSchema (final File aOutputFile)
{
LogGenerator.Write(aOutputFile, maSchemaBase, maTopLevelSchemas.values());
}
private void WriteOptimizedSchema (final File aOutputFile)
{
LogGenerator.Write(aOutputFile, maOptimizedSchemaBase, maTopLevelSchemas.values());
}
private void WriteNonValidatingParseTables (
final File aAutomatonLogFile,
final File aSimpleTypeLogFile,
final File aParseTableFile)
{
long nStartTime = System.currentTimeMillis();
final NonValidatingCreator aCreator = new NonValidatingCreator(maOptimizedSchemaBase, aAutomatonLogFile);
FiniteAutomatonContainer aAutomatons = aCreator.Create(maTopLevelSchemas.values());
long nEndTime = System.currentTimeMillis();
System.out.printf(
"created %d non-validating automatons with %d states and %d transitions in %fs\n",
aAutomatons.GetAutomatonCount(),
aAutomatons.GetStateCount(),
aAutomatons.GetTransitionCount(),
(nEndTime-nStartTime)/1000.0);
nStartTime = System.currentTimeMillis();
final SimpleTypeContainer aSimpleTypes = SimpleTypeContainer.Create(
maOptimizedSchemaBase,
aSimpleTypeLogFile);
nEndTime = System.currentTimeMillis();
System.out.printf(
"created %d simple type descriptions in %fs\n",
aSimpleTypes.GetSimpleTypeCount(),
(nEndTime-nStartTime)/1000.0);
new ParserTablesGenerator(
aAutomatons,
maOptimizedSchemaBase.Namespaces,
aSimpleTypes,
maOptimizedSchemaBase.AttributeValueToIdMap)
.Generate(aParseTableFile);
}
private void WriteValidatingParseTables (
final File aAutomatonLogFile,
final File aSimpleTypeLogFile,
final File aParseTableFile)
{
long nStartTime = System.currentTimeMillis();
final ValidatingCreator aCreator = new ValidatingCreator(maOptimizedSchemaBase, aAutomatonLogFile);
FiniteAutomatonContainer aAutomatons = aCreator.Create();
long nEndTime = System.currentTimeMillis();
System.out.printf(
"created %d validating stack automatons with %d states and %d transitions in %fs\n",
aAutomatons.GetAutomatonCount(),
aAutomatons.GetStateCount(),
aAutomatons.GetTransitionCount(),
(nEndTime-nStartTime)/1000.0);
nStartTime = System.currentTimeMillis();
aAutomatons = aAutomatons.CreateDFAs();
nEndTime = System.currentTimeMillis();
System.out.printf(
"created %d deterministic automatons with %d states and %d transitions in %fs\n",
aAutomatons.GetAutomatonCount(),
aAutomatons.GetStateCount(),
aAutomatons.GetTransitionCount(),
(nEndTime-nStartTime)/1000.0);
nStartTime = System.currentTimeMillis();
aAutomatons = aAutomatons.MinimizeDFAs();
nEndTime = System.currentTimeMillis();
System.out.printf(
"minimized automaton in %fs, there are now %d states and %d transitions\n",
(nEndTime-nStartTime)/1000.0,
aAutomatons.GetStateCount(),
aAutomatons.GetTransitionCount());
nStartTime = System.currentTimeMillis();
final SimpleTypeContainer aSimpleTypes = SimpleTypeContainer.Create(
maOptimizedSchemaBase,
aSimpleTypeLogFile);
nEndTime = System.currentTimeMillis();
System.out.printf(
"created %d simple type descriptions in %fs\n",
aSimpleTypes.GetSimpleTypeCount(),
(nEndTime-nStartTime)/1000.0);
new ParserTablesGenerator(
aAutomatons,
maOptimizedSchemaBase.Namespaces,
aSimpleTypes,
maOptimizedSchemaBase.AttributeValueToIdMap)
.Generate(aParseTableFile);
}
private void WriteHTMLPage (
final File aHTMLPageFile)
{
long nStartTime = System.currentTimeMillis();
new HtmlGenerator(maOptimizedSchemaBase, maTopLevelSchemas, aHTMLPageFile).Generate();
long nEndTime = System.currentTimeMillis();
System.out.printf(
"created HTML page in %fs\n",
(nEndTime-nStartTime)/1000.0);
}
private final SchemaBase maSchemaBase;
private SchemaBase maOptimizedSchemaBase;
private final Map<String,Schema> maTopLevelSchemas;
private final Vector<String[]> maMainSchemaFiles;
private final Queue<String> maWorkList;
private final Vector<Runnable> maOutputOperations;
private final Set<String> maSchemaFiles;
private int mnTotalLineCount;
private int mnTotalByteCount;
}