| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.commons.io.output; |
| |
| import java.io.File; |
| import java.io.FileNotFoundException; |
| import java.io.FileOutputStream; |
| import java.io.IOException; |
| import java.io.OutputStream; |
| import java.io.OutputStreamWriter; |
| import java.io.StringWriter; |
| import java.io.Writer; |
| import java.nio.charset.Charset; |
| import java.nio.charset.StandardCharsets; |
| import java.util.Locale; |
| import java.util.Objects; |
| import java.util.regex.Matcher; |
| |
| import org.apache.commons.io.Charsets; |
| import org.apache.commons.io.IOUtils; |
| import org.apache.commons.io.build.AbstractStreamBuilder; |
| import org.apache.commons.io.input.XmlStreamReader; |
| |
| /** |
| * Character stream that handles all the necessary work to figure out the charset encoding of the XML document written to the stream. |
| * <p> |
| * To build an instance, use {@link Builder}. |
| * </p> |
| * |
| * @see Builder |
| * @see XmlStreamReader |
| * @since 2.0 |
| */ |
| public class XmlStreamWriter extends Writer { |
| |
| // @formatter:off |
| /** |
| * Builds a new {@link XmlStreamWriter}. |
| * |
| * <p> |
| * For example: |
| * </p> |
| * <pre>{@code |
| * WriterOutputStream w = WriterOutputStream.builder() |
| * .setPath(path) |
| * .setCharset(StandardCharsets.UTF_8) |
| * .get();} |
| * </pre> |
| * |
| * @see #get() |
| * @since 2.12.0 |
| */ |
| // @formatter:off |
| public static class Builder extends AbstractStreamBuilder<XmlStreamWriter, Builder> { |
| |
| /** |
| * Constructs a new {@link Builder}. |
| */ |
| public Builder() { |
| setCharsetDefault(StandardCharsets.UTF_8); |
| setCharset(StandardCharsets.UTF_8); |
| } |
| |
| /** |
| * Builds a new {@link XmlStreamWriter}. |
| * <p> |
| * You must set input that supports {@link #getOutputStream()} on this builder, otherwise, this method throws an exception. |
| * </p> |
| * <p> |
| * This builder use the following aspects: |
| * </p> |
| * <ul> |
| * <li>{@link #getOutputStream()}</li> |
| * <li>{@link #getCharset()}</li> |
| * </ul> |
| * |
| * @return a new instance. |
| * @throws IllegalStateException if the {@code origin} is {@code null}. |
| * @throws UnsupportedOperationException if the origin cannot be converted to an {@link OutputStream}. |
| * @throws IOException if an I/O error occurs. |
| * @see #getOutputStream() |
| */ |
| @SuppressWarnings("resource") |
| @Override |
| public XmlStreamWriter get() throws IOException { |
| return new XmlStreamWriter(getOutputStream(), getCharset()); |
| } |
| |
| } |
| |
| private static final int BUFFER_SIZE = IOUtils.DEFAULT_BUFFER_SIZE; |
| |
| /** |
| * Constructs a new {@link Builder}. |
| * |
| * @return a new {@link Builder}. |
| * @since 2.12.0 |
| */ |
| public static Builder builder() { |
| return new Builder(); |
| } |
| |
| private final OutputStream out; |
| |
| private final Charset defaultCharset; |
| |
| private StringWriter prologWriter = new StringWriter(BUFFER_SIZE); |
| |
| private Writer writer; |
| |
| private Charset charset; |
| |
| /** |
| * Constructs a new XML stream writer for the specified file |
| * with a default encoding of UTF-8. |
| * |
| * @param file The file to write to |
| * @throws FileNotFoundException if there is an error creating or |
| * opening the file |
| * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} |
| */ |
| @Deprecated |
| public XmlStreamWriter(final File file) throws FileNotFoundException { |
| this(file, null); |
| } |
| |
| /** |
| * Constructs a new XML stream writer for the specified file |
| * with the specified default encoding. |
| * |
| * @param file The file to write to |
| * @param defaultEncoding The default encoding if not encoding could be detected |
| * @throws FileNotFoundException if there is an error creating or |
| * opening the file |
| * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} |
| */ |
| @Deprecated |
| @SuppressWarnings("resource") |
| public XmlStreamWriter(final File file, final String defaultEncoding) throws FileNotFoundException { |
| this(new FileOutputStream(file), defaultEncoding); |
| } |
| |
| /** |
| * Constructs a new XML stream writer for the specified output stream |
| * with a default encoding of UTF-8. |
| * |
| * @param out The output stream |
| * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} |
| */ |
| @Deprecated |
| public XmlStreamWriter(final OutputStream out) { |
| this(out, StandardCharsets.UTF_8); |
| } |
| |
| /** |
| * Constructs a new XML stream writer for the specified output stream |
| * with the specified default encoding. |
| * |
| * @param out The output stream |
| * @param defaultEncoding The default encoding if not encoding could be detected |
| */ |
| private XmlStreamWriter(final OutputStream out, final Charset defaultEncoding) { |
| this.out = out; |
| this.defaultCharset = Objects.requireNonNull(defaultEncoding); |
| } |
| |
| /** |
| * Constructs a new XML stream writer for the specified output stream |
| * with the specified default encoding. |
| * |
| * @param out The output stream |
| * @param defaultEncoding The default encoding if not encoding could be detected |
| * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} |
| */ |
| @Deprecated |
| public XmlStreamWriter(final OutputStream out, final String defaultEncoding) { |
| this(out, Charsets.toCharset(defaultEncoding, StandardCharsets.UTF_8)); |
| } |
| |
| /** |
| * Closes the underlying writer. |
| * |
| * @throws IOException if an error occurs closing the underlying writer |
| */ |
| @Override |
| public void close() throws IOException { |
| if (writer == null) { |
| charset = defaultCharset; |
| writer = new OutputStreamWriter(out, charset); |
| writer.write(prologWriter.toString()); |
| } |
| writer.close(); |
| } |
| |
| /** |
| * Detects the encoding. |
| * |
| * @param cbuf the buffer to write the characters from |
| * @param off The start offset |
| * @param len The number of characters to write |
| * @throws IOException if an error occurs detecting the encoding |
| */ |
| private void detectEncoding(final char[] cbuf, final int off, final int len) |
| throws IOException { |
| int size = len; |
| final StringBuffer xmlProlog = prologWriter.getBuffer(); |
| if (xmlProlog.length() + len > BUFFER_SIZE) { |
| size = BUFFER_SIZE - xmlProlog.length(); |
| } |
| prologWriter.write(cbuf, off, size); |
| |
| // try to determine encoding |
| if (xmlProlog.length() >= 5) { |
| if (xmlProlog.substring(0, 5).equals("<?xml")) { |
| // try to extract encoding from XML prolog |
| final int xmlPrologEnd = xmlProlog.indexOf("?>"); |
| if (xmlPrologEnd > 0) { |
| // ok, full XML prolog written: let's extract encoding |
| final Matcher m = XmlStreamReader.ENCODING_PATTERN.matcher(xmlProlog.substring(0, |
| xmlPrologEnd)); |
| if (m.find()) { |
| final String encName = m.group(1).toUpperCase(Locale.ROOT); |
| charset = Charset.forName(encName.substring(1, encName.length() - 1)); |
| } else { |
| // no encoding found in XML prolog: using default |
| // encoding |
| charset = defaultCharset; |
| } |
| } else if (xmlProlog.length() >= BUFFER_SIZE) { |
| // no encoding found in first characters: using default |
| // encoding |
| charset = defaultCharset; |
| } |
| } else { |
| // no XML prolog: using default encoding |
| charset = defaultCharset; |
| } |
| if (charset != null) { |
| // encoding has been chosen: let's do it |
| prologWriter = null; |
| writer = new OutputStreamWriter(out, charset); |
| writer.write(xmlProlog.toString()); |
| if (len > size) { |
| writer.write(cbuf, off + size, len - size); |
| } |
| } |
| } |
| } |
| |
| /** |
| * Flushes the underlying writer. |
| * |
| * @throws IOException if an error occurs flushing the underlying writer |
| */ |
| @Override |
| public void flush() throws IOException { |
| if (writer != null) { |
| writer.flush(); |
| } |
| } |
| |
| /** |
| * Returns the default encoding. |
| * |
| * @return the default encoding |
| */ |
| public String getDefaultEncoding() { |
| return defaultCharset.name(); |
| } |
| |
| /** |
| * Returns the detected encoding. |
| * |
| * @return the detected encoding |
| */ |
| public String getEncoding() { |
| return charset.name(); |
| } |
| |
| /** |
| * Writes the characters to the underlying writer, detecting encoding. |
| * |
| * @param cbuf the buffer to write the characters from |
| * @param off The start offset |
| * @param len The number of characters to write |
| * @throws IOException if an error occurs detecting the encoding |
| */ |
| @Override |
| public void write(final char[] cbuf, final int off, final int len) throws IOException { |
| if (prologWriter != null) { |
| detectEncoding(cbuf, off, len); |
| } else { |
| writer.write(cbuf, off, len); |
| } |
| } |
| } |