| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| package org.apache.brooklyn.util.core.xstream; |
| |
| import static org.testng.Assert.assertEquals; |
| import static org.testng.Assert.assertFalse; |
| import static org.testng.Assert.assertTrue; |
| |
| import java.util.List; |
| |
| import javax.xml.xpath.XPathConstants; |
| |
| import org.apache.brooklyn.test.Asserts; |
| import org.apache.brooklyn.util.core.xstream.XmlUtil.Escaper; |
| import org.slf4j.Logger; |
| import org.slf4j.LoggerFactory; |
| import org.testng.annotations.Test; |
| import org.w3c.dom.Node; |
| import org.w3c.dom.NodeList; |
| |
| import com.google.common.annotations.Beta; |
| import com.google.common.base.Joiner; |
| import com.google.common.base.Optional; |
| import com.google.common.collect.ImmutableList; |
| import com.google.common.collect.Lists; |
| |
| public class XmlUtilTest { |
| |
| /* |
| * For invalid unicode characters, see https://issues.apache.org/jira/browse/BROOKLYN-305 and |
| * https://en.wikipedia.org/wiki/Valid_characters_in_XML. |
| * |
| * If we include the xml version in the xml string, then more unicode characters are accepted. |
| * But if it still contains something illegal, then we can resort to calling |
| * xpathHandlingIllegalChars (to temporarily escape those characters |
| * |
| */ |
| |
| private static final Logger LOG = LoggerFactory.getLogger(XmlUtilTest.class); |
| |
| @Test |
| public void testXpath() throws Exception { |
| String xml = "<a><b>myb</b></a>"; |
| for (int i = 0; i < 2; i++) { |
| assertEquals(XmlUtil.xpath(xml, "/a/b[text()]"), "myb"); |
| } |
| } |
| |
| @Test |
| public void testXpathGetsNumber() throws Exception { |
| String xml = "<a><b>1</b></a>"; |
| assertEquals(XmlUtil.xpath(xml, "/a/b[text()]", XPathConstants.NUMBER), 1.0D); |
| } |
| |
| @Test |
| public void testXpathGetsBoolean() throws Exception { |
| String xml = "<a><b>true</b></a>"; |
| assertEquals(XmlUtil.xpath(xml, "/a/b[text()]", XPathConstants.BOOLEAN), Boolean.TRUE); |
| } |
| |
| @Test |
| public void testXpathGetsNodeSet() throws Exception { |
| String xml = "<a><b><string>v1</string><string>v2</string></b></a>"; |
| NodeList resultSet = (NodeList) XmlUtil.xpath(xml, "/a/b/string", XPathConstants.NODESET); |
| List<String> results = Lists.newArrayList(); |
| for (int i = 0; i < resultSet.getLength(); i++) { |
| Node item = resultSet.item(i); |
| results.add(item.getTextContent()); |
| } |
| assertEquals(results, ImmutableList.of("v1", "v2")); |
| } |
| |
| @Test |
| public void testXpathWithEscapedCharsAndXmlVersion1_1() throws Exception { |
| StringBuilder xml = new StringBuilder("<?xml version=\"1.1\" encoding=\"UTF-8\"?>"+"\n"+ |
| "<a><b>myb</b><c>"); |
| for (int i = 0; i < Integer.valueOf("FFFF", 16); i++) { |
| if (isValidUnicodeInXml1_1(i)) { |
| String unicode = Integer.toHexString(i); |
| xml.append("&#x"+unicode+";"); |
| } |
| } |
| xml.append("</c></a>"); |
| assertEquals(XmlUtil.xpath(xml.toString(), "/a/b[text()]"), "myb"); |
| } |
| |
| @Test |
| public void testWeirdStrings() throws Exception { |
| // should work, and shouldn't print error to stderr anymore |
| Asserts.assertEquals(XmlUtil |
| .xpathHandlingIllegalChars("<x>" + |
| // "\u001b" + |
| ""+ |
| "abc<y>a</y></x>", "/x/y[text()]"), |
| "a"); |
| } |
| |
| @Test |
| public void testXpathWithEscapedCharsAndXmlUnversioned() throws Exception { |
| StringBuilder xml = new StringBuilder("<a><b>myb</b><c>"); |
| for (int i = 0; i < Integer.valueOf("FFFF", 16); i++) { |
| if (isValidUnicodeInXml1_0(i)) { |
| String unicode = Integer.toHexString(i); |
| xml.append("&#x"+unicode+";"); |
| } |
| } |
| xml.append("</c></a>"); |
| assertEquals(XmlUtil.xpath(xml.toString(), "/a/b[text()]"), "myb"); |
| } |
| |
| @Test |
| public void testXpathHandlingIllegalChars() throws Exception { |
| StringBuilder xml = new StringBuilder("<a><b>myb</b><c>"); |
| for (int i = 0; i < Integer.valueOf("FFFF", 16); i++) { |
| String unicode = Integer.toHexString(i); |
| xml.append("&#x"+unicode+";"); |
| } |
| xml.append("</c></a>"); |
| assertEquals(XmlUtil.xpathHandlingIllegalChars(xml.toString(), "/a/b[text()]"), "myb"); |
| } |
| |
| @Test |
| public void testEscaper() throws Exception { |
| // Escapes unicode char, ignoring things around it |
| assertEscapeAndUnescape("", "BR_UNICODE_x001b;"); |
| assertEscapeAndUnescape("", "BR_UNICODE_00027;"); |
| assertEscapeAndUnescape("", "BR_UNICODE_x1b;"); |
| assertEscapeAndUnescape("SUFFIX", "BR_UNICODE_x1b;SUFFIX"); |
| assertEscapeAndUnescape("PREFIX", "PREFIXBR_UNICODE_x1b;"); |
| assertEscapeAndUnescape("PREFIXSUFFIX", "PREFIXBR_UNICODE_x1b;SUFFIX"); |
| |
| // Ignores invalid unicodes |
| assertEscapeAndUnescape("", ""); // no ";" |
| assertEscapeAndUnescape("g;", "g;"); // chars out of range |
| assertEscapeAndUnescape("ƻ", "ƻ"); // too many chars |
| |
| // Handles strings that already contain the expected escape sequence |
| assertEscapeAndUnescape("BR_UNICODE_x1b;", "NOT_BR_UNICODE_x1b;"); |
| assertEscapeAndUnescape("NOT_BR_UNICODE_x1b;", "NOT_NOT_BR_UNICODE_x1b;"); |
| assertEscapeAndUnescape("BR_UNICODE_x1b;THEN", "NOT_BR_UNICODE_x1b;THENBR_UNICODE_x1b;"); |
| } |
| |
| protected void assertEscapeAndUnescape(String val) { |
| assertEscapeAndUnescape(val, Optional.<String>absent()); |
| } |
| |
| protected void assertEscapeAndUnescape(String val, String expectedEscaped) { |
| assertEscapeAndUnescape(val, Optional.of(expectedEscaped)); |
| } |
| |
| protected void assertEscapeAndUnescape(String val, Optional<String> expectedEscaped) { |
| Escaper escaper = new XmlUtil.Escaper(); |
| String escaped = escaper.escape(val); |
| if (expectedEscaped.isPresent()) { |
| assertEquals(escaped, expectedEscaped.get()); |
| } |
| assertEquals(escaper.unescape(escaped), val); |
| } |
| |
| @Beta |
| protected boolean isValidUnicodeInXml1_1(int c) { |
| int min = 0; |
| int max = 65535; // xFFFF |
| return min <= c && c <= max && |
| c != 0 && (c <= 55295 || c >= 57344) && c != 65534; |
| } |
| |
| @Beta |
| protected boolean isValidUnicodeInXml1_0(int c) { |
| // see https://www.w3.org/TR/xml/#charsets |
| return c == 0x9 || c == 0xA || c == 0xD || (0x20 <= c && c <= 0xD7FF) |
| || (0xE000 <= c && c <= 0xFFFD) || (0x10000 <= c && c <= 0x10FFFF); |
| } |
| |
| // For checking our assumptions about what is a valid and invalid (escaped) unicode char |
| // in XML version 1.0 and 1.1. |
| @Test(groups={"WIP"}, enabled=false) |
| public void testXpathWithEachEscapeCharacterAndXmlVersion() throws Exception { |
| List<Integer> errsInXml1_1 = Lists.newArrayList(); |
| List<Integer> errsInXmlUnversioned = Lists.newArrayList(); |
| for (int i = 0; i < Integer.valueOf("FFFF", 16); i++) { |
| String unicode = Integer.toHexString(i); |
| try { |
| String xml = Joiner.on("\n").join( |
| "<?xml version=\"1.1\" encoding=\"UTF-8\"?>", |
| "<a><b>myb</b><c>&#x"+unicode+";</c></a>"); |
| assertEquals(XmlUtil.xpath(xml, "/a/b[text()]"), "myb"); |
| assertTrue(isValidUnicodeInXml1_1(i), "i="+i+"; unicode="+unicode); |
| } catch (Throwable t) { |
| LOG.debug("Failed for code "+unicode+": "+t.getMessage()); |
| errsInXml1_1.add(Integer.valueOf(unicode, 16)); |
| assertFalse(isValidUnicodeInXml1_1(i), "i="+i+"; unicode="+unicode); |
| } |
| try { |
| String xml = "<a><b>myb</b><c>&#x"+unicode+";</c></a>"; |
| assertEquals(XmlUtil.xpath(xml, "/a/b[text()]"), "myb"); |
| assertTrue(isValidUnicodeInXml1_0(i), "i="+i+"; unicode="+unicode); |
| } catch (Throwable t) { |
| LOG.debug("Failed for code "+unicode+": "+t.getMessage()); |
| errsInXmlUnversioned.add(Integer.valueOf(unicode, 16)); |
| assertFalse(isValidUnicodeInXml1_0(i), "i="+i+"; unicode="+unicode); |
| } |
| } |
| LOG.info("XML version 1.1 invalidCodes="+errsInXml1_1); |
| LOG.info("XML unversioned invalidCodes="+errsInXmlUnversioned); |
| } |
| } |