blob: 05715c1ad14dc63211c46e0395e054e792a2082a [file] [log] [blame]
package org.apache.maven.shared.utils;
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.SequenceInputStream;
import junit.framework.ComparisonFailure;
import junit.framework.TestCase;
import org.apache.maven.shared.utils.io.IOUtil;
import org.apache.maven.shared.utils.xml.XmlStreamReader;
/**
*
* @author <a href="mailto:hboutemy@apache.org">Hervé Boutemy</a>
*/
public class XmlStreamReaderTest
extends TestCase
{
/** french */
private static final String TEXT_LATIN1 = "eacute: \u00E9";
/** greek */
private static final String TEXT_LATIN7 = "alpha: \u03B1";
/** euro support */
private static final String TEXT_LATIN15 = "euro: \u20AC";
/** japanese */
private static final String TEXT_EUC_JP = "hiragana A: \u3042";
/** Unicode: support everything */
private static final String TEXT_UNICODE =
TEXT_LATIN1 + ", " +
TEXT_LATIN7 + ", " +
TEXT_LATIN15 + ", " +
TEXT_EUC_JP;
/** see http://unicode.org/faq/utf_bom.html#BOM */
private static final byte[] BOM_UTF8 = { (byte)0xEF, (byte)0xBB, (byte)0xBF };
private static final byte[] BOM_UTF16BE = { (byte)0xFE, (byte)0xFF };
private static final byte[] BOM_UTF16LE = { (byte)0xFF, (byte)0xFE };
private static final byte[] BOM_UTF32BE = { (byte)0x00, (byte)0x00, (byte)0xFF, (byte)0xFE };
private static final byte[] BOM_UTF32LE = { (byte)0xFF, (byte)0xFE, (byte)0x00, (byte)0x00 };
private static String createXmlContent( String text, String encoding )
{
String xmlDecl = "<?xml version=\"1.0\"?>";
if ( encoding != null )
{
xmlDecl = "<?xml version=\"1.0\" encoding=\"" + encoding + "\"?>";
}
return xmlDecl + "\n<text>" + text + "</text>";
}
private static void checkXmlContent( String xml, String encoding )
throws IOException
{
checkXmlContent( xml, encoding, null );
}
private static void checkXmlContent( String xml, String encoding, byte[] bom )
throws IOException
{
byte[] xmlContent = xml.getBytes( encoding );
InputStream in = new ByteArrayInputStream( xmlContent );
if ( bom != null )
{
in = new SequenceInputStream( new ByteArrayInputStream( bom ), in );
}
XmlStreamReader reader = new XmlStreamReader( in );
assertEquals( encoding, reader.getEncoding() );
String result = IOUtil.toString( reader );
assertEquals( xml, result );
}
private static void checkXmlStreamReader( String text, String encoding, String effectiveEncoding )
throws IOException
{
checkXmlStreamReader( text, encoding, effectiveEncoding, null );
}
private static void checkXmlStreamReader( String text, String encoding )
throws IOException
{
checkXmlStreamReader( text, encoding, encoding, null );
}
private static void checkXmlStreamReader( String text, String encoding, byte[] bom )
throws IOException
{
checkXmlStreamReader( text, encoding, encoding, bom );
}
private static void checkXmlStreamReader( String text, String encoding, String effectiveEncoding, byte[] bom )
throws IOException
{
String xml = createXmlContent( text, encoding );
checkXmlContent( xml, effectiveEncoding, bom );
}
public void testNoXmlHeader()
throws IOException
{
String xml = "<text>text with no XML header</text>";
checkXmlContent( xml, "UTF-8" );
checkXmlContent( xml, "UTF-8", BOM_UTF8 );
}
public void testDefaultEncoding()
throws IOException
{
checkXmlStreamReader( TEXT_UNICODE, null, "UTF-8" );
checkXmlStreamReader( TEXT_UNICODE, null, "UTF-8", BOM_UTF8 );
}
public void testUTF8Encoding()
throws IOException
{
checkXmlStreamReader( TEXT_UNICODE, "UTF-8" );
checkXmlStreamReader( TEXT_UNICODE, "UTF-8", BOM_UTF8 );
}
public void testUTF16Encoding()
throws IOException
{
checkXmlStreamReader( TEXT_UNICODE, "UTF-16", "UTF-16BE", null );
checkXmlStreamReader( TEXT_UNICODE, "UTF-16", "UTF-16LE", BOM_UTF16LE );
checkXmlStreamReader( TEXT_UNICODE, "UTF-16", "UTF-16BE", BOM_UTF16BE );
}
public void testUTF16BEEncoding()
throws IOException
{
checkXmlStreamReader( TEXT_UNICODE, "UTF-16BE" );
}
public void testUTF16LEEncoding()
throws IOException
{
checkXmlStreamReader( TEXT_UNICODE, "UTF-16LE" );
}
public void testLatin1Encoding()
throws IOException
{
checkXmlStreamReader( TEXT_LATIN1, "ISO-8859-1" );
}
public void testLatin7Encoding()
throws IOException
{
checkXmlStreamReader( TEXT_LATIN7, "ISO-8859-7" );
}
public void testLatin15Encoding()
throws IOException
{
checkXmlStreamReader( TEXT_LATIN15, "ISO-8859-15" );
}
public void testEUC_JPEncoding()
throws IOException
{
checkXmlStreamReader( TEXT_EUC_JP, "EUC-JP" );
}
public void testEBCDICEncoding()
throws IOException
{
checkXmlStreamReader( "simple text in EBCDIC", "CP1047" );
}
public void testInappropriateEncoding()
throws IOException
{
try
{
checkXmlStreamReader( TEXT_UNICODE, "ISO-8859-2" );
fail( "Check should have failed, since some characters are not available in the specified encoding" );
}
catch ( ComparisonFailure cf )
{
// expected failure, since the encoding does not contain some characters
}
}
public void testEncodingAttribute()
throws IOException
{
String xml = "<?xml version='1.0' encoding='US-ASCII'?><element encoding='attribute value'/>";
checkXmlContent( xml, "US-ASCII" );
xml = "<?xml version='1.0' encoding = 'US-ASCII' ?><element encoding='attribute value'/>";
checkXmlContent( xml, "US-ASCII" );
xml = "<?xml version='1.0'?><element encoding='attribute value'/>";
checkXmlContent( xml, "UTF-8" );
xml = "<?xml\nversion='1.0'\nencoding\n=\n'US-ASCII'\n?>\n<element encoding='attribute value'/>";
checkXmlContent( xml, "US-ASCII" );
xml = "<?xml\nversion='1.0'\n?>\n<element encoding='attribute value'/>";
checkXmlContent( xml, "UTF-8" );
xml = "<element encoding='attribute value'/>";
checkXmlContent( xml, "UTF-8" );
}
}