blob: 4239344b8ce7602fb38742e7dc6d7f6cc89ffb98 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*/
package org.apache.vysper.xml.decoder;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* holds a particle of XML, either representing an start or end element, or an elements body, or other text nodes.
*
* @author The Apache MINA Project (dev@mina.apache.org)
*/
public class XMLParticle {
private boolean isOpeningElement = false;
private boolean isClosingElement = false;
private boolean isSpecialElement = false;
String elementName = null;
String content = null;
public static final String ELEMENT_OPEN = "<";
public static final String ELEMENT_START_CLOSING = "</";
public static final String ELEMENT_END_START_AND_END = "/>";
public static final Pattern PATTERN_NAME_FROM_CLOSINGONLY_ELEMENT = Pattern.compile("\\<[\\!\\?\\/]?\\W*([-:\\w]*)\\W*[\\!\\?\\/]?\\>");
//TODO REMOVE public static final Pattern PATTERN_NAME_FROM_OPENING_ELEMENT = Pattern.compile("\\<[\\!\\?]?(\\w[\\-\\:\\w]*)\\W+.*");
public XMLParticle(String content) {
this.content = content;
if (content.startsWith(ELEMENT_OPEN)) {
if (content.startsWith(ELEMENT_START_CLOSING)) {
isOpeningElement = false;
isClosingElement = true;
} else if (content.endsWith(ELEMENT_END_START_AND_END)) {
isOpeningElement = true;
isClosingElement = true;
} else {
isOpeningElement = true;
isClosingElement = false;
}
if (content.startsWith("<!") && !content.startsWith("<!--")) {
isSpecialElement = true;
isOpeningElement = true;
isClosingElement = true;
} else if (content.startsWith("<?")) {
isSpecialElement = true;
isOpeningElement = true;
isClosingElement = true;
}
}
}
public boolean isOpeningElement() {
return isOpeningElement;
}
public boolean isOpeningOnlyElement() {
return isOpeningElement && !isClosingElement;
}
public boolean isClosingElement() {
return isClosingElement;
}
public boolean isClosingOnlyElement() {
return !isOpeningElement && isClosingElement;
}
public boolean isSpecialElement() {
return isSpecialElement;
}
public boolean isText() {
return !isOpeningElement && !isClosingElement;
}
public String getContent() {
return content;
}
public String getContentWithoutElement() throws DecodingException {
String elementNameLocal = getElementName();
int i = content.indexOf(elementNameLocal);
if (i < 0) return null;
return content.substring(i + elementNameLocal.length());
}
public String getElementName() throws DecodingException {
if (elementName != null) return elementName;
Matcher matcher = null;
if (isClosingOnlyElement()) {
matcher = PATTERN_NAME_FROM_CLOSINGONLY_ELEMENT.matcher(content);
if (!matcher.matches()) throw new DecodingException("closing element name could not be determined by parser for " + content);
} else if (isOpeningElement()) {
elementName = parseElementName();
if (":".equals(elementName)) throw new DecodingException("':' is not a legitimate XML element name");
return elementName;
} else throw new IllegalStateException("element must be opening or closing (or both)");
elementName = matcher.group(1);
return elementName;
}
public String parseElementName() throws DecodingException {
StringBuilder elementNameBuilder = new StringBuilder();
boolean beforeElement = true;
for (int i = 0; i < content.length(); i++) {
int current = content.codePointAt(i);
if (i == 0) {
if (current != '<') throw new DecodingException("element does not start with '<'");
continue;
} else if (i == 1) {
if (current == '!' || current == '?' || current == '/') continue; // TODO check, if next char is '>'
}
if (beforeElement) {
if (!isLegitemateNameStartChar(current)) {
throw new DecodingException("cannot start element name with char " + (char)current);
}
beforeElement = false;
} else {
if (!isLegitemateNameChar(current)) {
if (!isWhitespace(current) && current != '>' && current != '/') {
throw new DecodingException("char not allowed in element name: " + (char)current);
} else {
break; // name is completed
}
}
}
elementNameBuilder.append((char)current);
}
return elementNameBuilder.toString();
}
private boolean isWhitespace(int current) {
return (Character.isWhitespace(current) || /* next char is not relevant */
current == '!' || current == '?' /* TODO check, if next char is '>'*/
);
}
/**
*
* NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender
* @param c
* @return
*/
private boolean isLegitemateNameChar(int c) {
return isLegitemateNameStartChar(c) || Character.isDigit(c) || c == '.' || c == '-';
}
private boolean isLegitemateNameStartChar(int c) {
return Character.isLetter(c) || c == '_' || c == ':';
}
}