blob: fe680c2fe931a050bb78e81e481fafc4f6e6ed15 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. The ASF licenses this file to You
* under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License. For additional information regarding
* copyright in this work, please see the NOTICE file in the top level
* directory of this distribution.
*/
package org.apache.abdera2.common.text;
import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.io.Reader;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.channels.Channels;
import java.nio.channels.ReadableByteChannel;
import java.nio.channels.WritableByteChannel;
import java.nio.charset.Charset;
import java.util.Iterator;
import java.util.NoSuchElementException;
import com.ibm.icu.text.UCharacterIterator;
public abstract class CodepointIterator
implements Iterator<Integer> {
/**
* Get a CodepointIterator for the specified char array
*/
public static CodepointIterator getInstance(char[] array) {
return new CharArrayCodepointIterator(array);
}
/**
* Get a CodepointIterator for the specified CharSequence
*/
public static CodepointIterator getInstance(CharSequence seq) {
return new CharSequenceCodepointIterator(seq);
}
/**
* Get a CodepointIterator for the specified byte array, using the default charset
*/
public static CodepointIterator getInstance(byte[] array) {
return new ByteArrayCodepointIterator(array);
}
/**
* Get a CodepointIterator for the specified byte array, using the specified charset
*/
public static CodepointIterator getInstance(byte[] array, String charset) {
return new ByteArrayCodepointIterator(array, charset);
}
/**
* Get a CodepointIterator for the specified CharBuffer
*/
public static CodepointIterator getInstance(CharBuffer buffer) {
return new CharBufferCodepointIterator(buffer);
}
/**
* Get a CodepointIterator for the specified ReadableByteChannel
*/
public static CodepointIterator getInstance(ReadableByteChannel channel) {
return new ReadableByteChannelCodepointIterator(channel);
}
/**
* Get a CodepointIterator for the specified ReadableByteChannel
*/
public static CodepointIterator getInstance(ReadableByteChannel channel, String charset) {
return new ReadableByteChannelCodepointIterator(channel, charset);
}
/**
* Get a CodepointIterator for the specified InputStream
*/
public static CodepointIterator getInstance(InputStream in) {
return new ReadableByteChannelCodepointIterator(Channels.newChannel(in));
}
/**
* Get a CodepointIterator for the specified InputStream using the specified charset
*/
public static CodepointIterator getInstance(InputStream in, String charset) {
return new ReadableByteChannelCodepointIterator(Channels.newChannel(in), charset);
}
/**
* Get a CodepointIterator for the specified Reader
*/
public static CodepointIterator getInstance(Reader in) {
return new ReaderCodepointIterator(in);
}
/**
* Get the next codepoint
*/
protected abstract int get();
/**
* True if there are codepoints remaining
*/
public abstract boolean hasNext();
/**
* Return the next codepoint
*/
public Integer next() throws InvalidCharacterException {
return get();
}
public void remove() {
throw new UnsupportedOperationException();
}
static class ByteArrayCodepointIterator extends CharArrayCodepointIterator {
public ByteArrayCodepointIterator(byte[] bytes) {
this(bytes, Charset.defaultCharset());
}
public ByteArrayCodepointIterator(byte[] bytes, String charset) {
this(bytes, Charset.forName(charset));
}
public ByteArrayCodepointIterator(byte[] bytes, Charset charset) {
super(charset.decode(ByteBuffer.wrap(bytes)).array());
}
}
static class CharArrayCodepointIterator extends CodepointIterator {
protected final UCharacterIterator i;
protected int current;
public CharArrayCodepointIterator(char[] buffer) {
this.i = UCharacterIterator.getInstance(buffer);
current = i.next();
}
public CharArrayCodepointIterator(char[] buffer, int n, int e) {
this.i = UCharacterIterator.getInstance(buffer, n, e);
current = i.next();
}
protected int get() {
if (current != UCharacterIterator.DONE) {
int v = current;
current = i.nextCodePoint();
return v;
} else throw new NoSuchElementException();
}
public boolean hasNext() {
return current != UCharacterIterator.DONE;
}
}
static class CharBufferCodepointIterator extends CharArrayCodepointIterator {
public CharBufferCodepointIterator(CharBuffer cb) {
super(cb.array());
}
}
static class CharSequenceCodepointIterator extends CharArrayCodepointIterator {
public CharSequenceCodepointIterator(CharSequence buffer) {
this(buffer, 0, buffer.length());
}
public CharSequenceCodepointIterator(CharSequence buffer, int n, int e) {
super(toCharArray(buffer,n,e));
}
private static char[] toCharArray(CharSequence seq, int n, int e) {
char[] ret = new char[seq.length()];
for (int i = n; i < e; i++)
ret[i] = seq.charAt(i);
return ret;
}
}
static class ReadableByteChannelCodepointIterator extends CharArrayCodepointIterator {
public ReadableByteChannelCodepointIterator(ReadableByteChannel channel) {
this(channel, Charset.defaultCharset());
}
public ReadableByteChannelCodepointIterator(ReadableByteChannel channel, String charset) {
this(channel, Charset.forName(charset));
}
public ReadableByteChannelCodepointIterator(ReadableByteChannel channel, Charset charset) {
super(toCharArray(channel, charset));
}
private static char[] toCharArray(ReadableByteChannel channel, Charset charset) {
try {
ByteBuffer buf = ByteBuffer.allocate(1024);
ByteArrayOutputStream out = new ByteArrayOutputStream();
WritableByteChannel outc = Channels.newChannel(out);
while (channel.read(buf) > 0) {
buf.flip();
outc.write(buf);
}
CharBuffer cb = charset.decode(ByteBuffer.wrap(out.toByteArray()));
return cb.array();
} catch (Throwable t) {
throw new RuntimeException(t);
}
}
}
static class ReaderCodepointIterator extends CharArrayCodepointIterator {
public ReaderCodepointIterator(Reader reader) {
super(toCharArray(reader));
}
private static char[] toCharArray(Reader reader) {
try {
StringBuilder sb = new StringBuilder();
char[] buf = new char[1024];
int n = -1;
while ((n = reader.read(buf)) > -1) {
sb.append(buf, 0, n);
}
char[] ret = new char[sb.length()];
sb.getChars(0, sb.length(), ret, 0);
return ret;
} catch (Throwable t) {
throw new RuntimeException(t);
}
}
}
}