/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.directmemory.lightning.internal.util;

import java.io.IOException;
import java.lang.reflect.Constructor;

import org.apache.directmemory.lightning.Source;
import org.apache.directmemory.lightning.Target;

/*
 * This codebase is derived from the org.apache.lucene.util.UnicodeUtil class from Apache Lucene project.
 */

/*
 * Some of this code came from the excellent Unicode
 * conversion examples from:
 *
 *   http://www.unicode.org/Public/PROGRAMS/CVTUTF
 *
 * Full Copyright for that code follows:
 */

/*
 * Copyright 2001-2004 Unicode, Inc.
 * 
 * Disclaimer
 * 
 * This source code is provided as is by Unicode, Inc. No claims are
 * made as to fitness for any particular purpose. No warranties of any
 * kind are expressed or implied. The recipient agrees to determine
 * applicability of information provided. If this file has been
 * purchased on magnetic or optical media from Unicode, Inc., the
 * sole remedy for any claim will be exchange of defective media
 * within 90 days of receipt.
 * 
 * Limitations on Rights to Redistribute This Code
 * 
 * Unicode, Inc. hereby grants the right to freely use the information
 * supplied in this file in the creation of products supporting the
 * Unicode Standard, and to make copies of this file in any form
 * for internal or external distribution as long as this notice
 * remains attached.
 */

/*
 * Additional code came from the IBM ICU library.
 *
 *  http://www.icu-project.org
 *
 * Full Copyright for that code follows.
 */

/*
 * Copyright (C) 1999-2010, International Business Machines
 * Corporation and others.  All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, and/or sell copies of the
 * Software, and to permit persons to whom the Software is furnished to do so,
 * provided that the above copyright notice(s) and this permission notice appear
 * in all copies of the Software and that both the above copyright notice(s) and
 * this permission notice appear in supporting documentation.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
 * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE
 * LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR
 * ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
 * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
 * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 *
 * Except as contained in this notice, the name of a copyright holder shall not
 * be used in advertising or otherwise to promote the sale, use or other
 * dealings in this Software without prior written authorization of the
 * copyright holder.
 */

/**
 * Class to encode java's UTF16 char[] into UTF8 byte[] without always allocating a new byte[] as
 * String.getBytes("UTF-8") does.
 */
public final class UnicodeUtil
{

    private UnicodeUtil()
    {
    } // no instance

    public static final int UNI_SUR_HIGH_START = 0xD800;

    public static final int UNI_SUR_HIGH_END = 0xDBFF;

    public static final int UNI_SUR_LOW_START = 0xDC00;

    public static final int UNI_SUR_LOW_END = 0xDFFF;

    public static final int UNI_REPLACEMENT_CHAR = 0xFFFD;

    private static final long UNI_MAX_BMP = 0x0000FFFF;

    private static final long HALF_SHIFT = 10;

    private static final long HALF_MASK = 0x3FFL;

    private static final int SURROGATE_OFFSET = Character.MIN_SUPPLEMENTARY_CODE_POINT
        - ( UNI_SUR_HIGH_START << HALF_SHIFT ) - UNI_SUR_LOW_START;

    // Special String package private internal constructor for sharing char-array usage
    private static final Constructor<String> STRING_PP_CONSTRUCTOR;

    static
    {
        Constructor<String> constructor = null;
        try
        {
            constructor = String.class.getDeclaredConstructor( int.class, int.class, char[].class );
            constructor.setAccessible( true );
        }
        catch ( SecurityException e )
        {
            // intentionally left blank
        }
        catch ( NoSuchMethodException e )
        {
            // intentionally left blank
        }
        STRING_PP_CONSTRUCTOR = constructor;
    }

    /**
     * Encode characters from the given {@link String}, starting at offset 0 for length chars. Returns length of the
     * encoded String in bytes.
     * 
     * @throws IOException
     */
    public static int UTF16toUTF8( String value, Target target )
        throws IOException
    {
        char[] characters = value.toCharArray();
        int length = characters.length;

        // Write string length to target
        target.writeInt( length );

        int i = 0;
        final int end = length;

        int writtenBytes = 0;
        while ( i < end )
        {

            final int code = characters[i++];

            if ( code < 0x80 )
            {
                target.writeByte( (byte) code );
                writtenBytes++;
            }
            else if ( code < 0x800 )
            {
                target.writeByte( (byte) ( 0xC0 | ( code >> 6 ) ) );
                target.writeByte( (byte) ( 0x80 | ( code & 0x3F ) ) );
                writtenBytes += 2;
            }
            else if ( code < 0xD800 || code > 0xDFFF )
            {
                target.writeByte( (byte) ( 0xE0 | ( code >> 12 ) ) );
                target.writeByte( (byte) ( 0x80 | ( ( code >> 6 ) & 0x3F ) ) );
                target.writeByte( (byte) ( 0x80 | ( code & 0x3F ) ) );
                writtenBytes += 3;
            }
            else
            {
                // surrogate pair
                // confirm valid high surrogate
                if ( code < 0xDC00 && i < end )
                {
                    int utf32 = characters[i];
                    // confirm valid low surrogate and write pair
                    if ( utf32 >= 0xDC00 && utf32 <= 0xDFFF )
                    {
                        utf32 = ( code << 10 ) + utf32 + SURROGATE_OFFSET;
                        i++;
                        target.writeByte( (byte) ( 0xF0 | ( utf32 >> 18 ) ) );
                        target.writeByte( (byte) ( 0x80 | ( ( utf32 >> 12 ) & 0x3F ) ) );
                        target.writeByte( (byte) ( 0x80 | ( ( utf32 >> 6 ) & 0x3F ) ) );
                        target.writeByte( (byte) ( 0x80 | ( utf32 & 0x3F ) ) );
                        writtenBytes += 4;
                        continue;
                    }
                }
                // replace unpaired surrogate or out-of-order low surrogate
                // with substitution character
                target.writeByte( (byte) 0xEF );
                target.writeByte( (byte) 0xBF );
                target.writeByte( (byte) 0xBD );
                writtenBytes += 3;
            }
        }
        return writtenBytes;
    }

    public static boolean validUTF16String( CharSequence s )
    {
        final int size = s.length();
        for ( int i = 0; i < size; i++ )
        {
            char ch = s.charAt( i );
            if ( ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END )
            {
                if ( i < size - 1 )
                {
                    i++;
                    char nextCH = s.charAt( i );
                    if ( nextCH >= UNI_SUR_LOW_START && nextCH <= UNI_SUR_LOW_END )
                    {
                        // Valid surrogate pair
                    }
                    else
                        // Unmatched high surrogate
                        return false;
                }
                else
                    // Unmatched high surrogate
                    return false;
            }
            else if ( ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END )
                // Unmatched low surrogate
                return false;
        }

        return true;
    }

    public static boolean validUTF16String( char[] s, int size )
    {
        for ( int i = 0; i < size; i++ )
        {
            char ch = s[i];
            if ( ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END )
            {
                if ( i < size - 1 )
                {
                    i++;
                    char nextCH = s[i];
                    if ( nextCH >= UNI_SUR_LOW_START && nextCH <= UNI_SUR_LOW_END )
                    {
                        // Valid surrogate pair
                    }
                    else
                        return false;
                }
                else
                    return false;
            }
            else if ( ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END )
                // Unmatched low surrogate
                return false;
        }

        return true;
    }

    /**
     * Interprets the bytes from the given {@link Source} as UTF-8 and converts to UTF-16.
     * <p>
     * NOTE: Full characters are read, even if this reads past the length passed (and can result in an
     * ArrayOutOfBoundsException if invalid UTF-8 is passed). Explicit checks for valid UTF-8 are not performed.
     * 
     * @throws IOException
     */
    public static String UTF8toUTF16( Source source )
        throws IOException
    {
        int charLength = source.readInt();

        int offset = 0;
        final char[] out = new char[charLength];
        while ( offset < charLength )
        {
            int b = source.readByte() & 0xff;
            if ( b < 0xc0 )
            {
                assert b < 0x80;
                out[offset++] = (char) b;
            }
            else if ( b < 0xe0 )
            {
                out[offset++] = (char) ( ( ( b & 0x1f ) << 6 ) + ( source.readByte() & 0x3f ) );
            }
            else if ( b < 0xf0 )
            {
                out[offset++] =
                    (char) ( ( ( b & 0xf ) << 12 ) + ( ( source.readByte() & 0x3f ) << 6 ) + ( source.readByte() & 0x3f ) );
            }
            else
            {
                assert b < 0xf8 : "b = 0x" + Integer.toHexString( b );
                int ch =
                    ( ( b & 0x7 ) << 18 ) + ( ( source.readByte() & 0x3f ) << 12 )
                        + ( ( source.readByte() & 0x3f ) << 6 ) + ( source.readByte() & 0x3f );
                if ( ch < UNI_MAX_BMP )
                {
                    out[offset++] = (char) ch;
                }
                else
                {
                    int chHalf = ch - 0x0010000;
                    out[offset++] = (char) ( ( chHalf >> 10 ) + 0xD800 );
                    out[offset++] = (char) ( ( chHalf & HALF_MASK ) + 0xDC00 );
                }
            }
        }

        if ( STRING_PP_CONSTRUCTOR == null )
        {
            return new String( out );
        }
        else
        {
            try
            {
                return STRING_PP_CONSTRUCTOR.newInstance( 0, out.length, out );
            }
            catch ( Exception e )
            {
                return new String( out );
            }
        }
    }

}
