| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.tika.parser.iwork; |
| |
| import org.apache.tika.metadata.Metadata; |
| import org.apache.tika.sax.XHTMLContentHandler; |
| import org.xml.sax.Attributes; |
| import org.xml.sax.SAXException; |
| import org.xml.sax.helpers.DefaultHandler; |
| |
| import java.util.HashMap; |
| import java.util.Map; |
| |
| class NumbersContentHandler extends DefaultHandler { |
| |
| private final XHTMLContentHandler xhtml; |
| private final Metadata metadata; |
| |
| private boolean inSheet = false; |
| |
| private boolean inText = false; |
| private boolean parseText = false; |
| |
| private boolean inMetadata = false; |
| private String metadataKey; |
| private String metadataPropertyQName; |
| |
| private boolean inTable = false; |
| private int numberOfSheets = 0; |
| private int numberOfColumns = -1; |
| private int currentColumn = 0; |
| |
| private Map<String, String> menuItems = new HashMap<String, String>(); |
| private String currentMenuItemId; |
| |
| NumbersContentHandler(XHTMLContentHandler xhtml, Metadata metadata) { |
| this.xhtml = xhtml; |
| this.metadata = metadata; |
| } |
| |
| @Override |
| public void endDocument() throws SAXException { |
| metadata.set(Metadata.PAGE_COUNT, String.valueOf(numberOfSheets)); |
| } |
| |
| @Override |
| public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { |
| if ("ls:workspace".equals(qName)) { |
| inSheet = true; |
| numberOfSheets++; |
| xhtml.startElement("div"); |
| String sheetName = attributes.getValue("ls:workspace-name"); |
| metadata.add("sheetNames", sheetName); |
| } |
| |
| if ("sf:text".equals(qName)) { |
| inText = true; |
| xhtml.startElement("p"); |
| } |
| |
| if ("sf:p".equals(qName)) { |
| parseText = true; |
| } |
| |
| if ("sf:metadata".equals(qName)) { |
| inMetadata = true; |
| return; |
| } |
| |
| if (inMetadata && metadataKey == null) { |
| metadataKey = resolveMetadataKey(localName); |
| metadataPropertyQName = qName; |
| } |
| |
| if (inMetadata && metadataKey != null && "sf:string".equals(qName)) { |
| metadata.add(metadataKey, attributes.getValue("sfa:string")); |
| } |
| |
| if (!inSheet) { |
| return; |
| } |
| |
| if ("sf:tabular-model".equals(qName)) { |
| inTable = true; |
| xhtml.startElement("table"); |
| xhtml.startElement("tr"); |
| currentColumn = 0; |
| // String tableName = attributes.getValue("sf:name"); |
| } |
| |
| if ("sf:menu-choices".equals(qName)) { |
| menuItems = new HashMap<String, String>(); |
| } |
| |
| if (inTable && "sf:grid".equals(qName)) { |
| numberOfColumns = Integer.parseInt(attributes.getValue("sf:numcols")); |
| } |
| |
| if (menuItems != null && "sf:t".equals(qName)) { |
| currentMenuItemId = attributes.getValue("sfa:ID"); |
| } |
| |
| if (currentMenuItemId != null && "sf:ct".equals(qName)) { |
| menuItems.put(currentMenuItemId, attributes.getValue("sfa:s")); |
| } |
| |
| if (inTable && "sf:ct".equals(qName)) { |
| if (currentColumn >= numberOfColumns) { |
| currentColumn = 0; |
| xhtml.endElement("tr"); |
| xhtml.startElement("tr"); |
| } |
| |
| xhtml.element("td", attributes.getValue("sfa:s")); |
| currentColumn++; |
| } |
| |
| if (inTable && ("sf:n".equals(qName) || "sf:rn".equals(qName))) { |
| if (currentColumn >= numberOfColumns) { |
| currentColumn = 0; |
| xhtml.endElement("tr"); |
| xhtml.startElement("tr"); |
| } |
| |
| xhtml.element("td", attributes.getValue("sf:v")); |
| currentColumn++; |
| } |
| |
| if (inTable && "sf:proxied-cell-ref".equals(qName)) { |
| if (currentColumn >= numberOfColumns) { |
| currentColumn = 0; |
| xhtml.endElement("tr"); |
| xhtml.startElement("tr"); |
| } |
| |
| xhtml.element("td", menuItems.get(attributes.getValue("sfa:IDREF"))); |
| currentColumn++; |
| } |
| } |
| |
| @Override |
| public void characters(char[] ch, int start, int length) throws SAXException { |
| if (parseText && length > 0) { |
| xhtml.characters(ch, start, length); |
| } |
| } |
| |
| @Override |
| public void endElement(String uri, String localName, String qName) throws SAXException { |
| if ("ls:workspace".equals(qName)) { |
| inSheet = false; |
| xhtml.endElement("div"); |
| } |
| |
| if ("sf:text".equals(qName)) { |
| inText = false; |
| xhtml.endElement("p"); |
| } |
| |
| if ("sf:p".equals(qName)) { |
| parseText = false; |
| } |
| |
| if ("sf:metadata".equals(qName)) { |
| inMetadata = false; |
| } |
| |
| if (inMetadata && qName.equals(metadataPropertyQName)) { |
| metadataPropertyQName = null; |
| metadataKey = null; |
| } |
| |
| if (!inSheet) { |
| return; |
| } |
| |
| if ("sf:menu-choices".equals(qName)) { |
| } |
| |
| if ("sf:tabular-model".equals(qName)) { |
| inTable = false; |
| xhtml.endElement("tr"); |
| xhtml.endElement("table"); |
| } |
| |
| if (currentMenuItemId != null && "sf:t".equals(qName)) { |
| currentMenuItemId = null; |
| } |
| } |
| |
| private String resolveMetadataKey(String localName) { |
| if ("authors".equals(localName)) { |
| return Metadata.AUTHOR; |
| } |
| |
| return localName; |
| } |
| } |