src/main/java/org/apache/commons/lang3/CharSequenceUtils.java - commons-lang - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.commons.lang3;

 /**
  * <p>Operations on {@link CharSequence} that are
  * {@code null} safe.</p>
  *
  * @see CharSequence
  * @since 3.0
  */
 public class CharSequenceUtils {

     private static final int NOT_FOUND = -1;

     /**
      * <p>{@code CharSequenceUtils} instances should NOT be constructed in
      * standard programming. </p>
      *
      * <p>This constructor is public to permit tools that require a JavaBean
      * instance to operate.</p>
      */
     public CharSequenceUtils() {
         super();
     }

     //-----------------------------------------------------------------------
     /**
      * <p>Returns a new {@code CharSequence} that is a subsequence of this
      * sequence starting with the {@code char} value at the specified index.</p>
      *
      * <p>This provides the {@code CharSequence} equivalent to {@link String#substring(int)}.
      * The length (in {@code char}) of the returned sequence is {@code length() - start},
      * so if {@code start == end} then an empty sequence is returned.</p>
      *
      * @param cs  the specified subsequence, null returns null
      * @param start  the start index, inclusive, valid
      * @return a new subsequence, may be null
      * @throws IndexOutOfBoundsException if {@code start} is negative or if
      *  {@code start} is greater than {@code length()}
      */
     public static CharSequence subSequence(final CharSequence cs, final int start) {
         return cs == null ? null : cs.subSequence(start, cs.length());
     }

     //-----------------------------------------------------------------------
     /**
      * Returns the index within {@code cs} of the first occurrence of the
      * specified character, starting the search at the specified index.
      * <p>
      * If a character with value {@code searchChar} occurs in the
      * character sequence represented by the {@code cs}
      * object at an index no smaller than {@code start}, then
      * the index of the first such occurrence is returned. For values
      * of {@code searchChar} in the range from 0 to 0xFFFF (inclusive),
      * this is the smallest value <i>k</i> such that:
      * <blockquote><pre>
      * (this.charAt(<i>k</i>) == searchChar) &amp;&amp; (<i>k</i> &gt;= start)
      * </pre></blockquote>
      * is true. For other values of {@code searchChar}, it is the
      * smallest value <i>k</i> such that:
      * <blockquote><pre>
      * (this.codePointAt(<i>k</i>) == searchChar) &amp;&amp; (<i>k</i> &gt;= start)
      * </pre></blockquote>
      * is true. In either case, if no such character occurs inm {@code cs}
      * at or after position {@code start}, then
      * {@code -1} is returned.
      *
      * <p>
      * There is no restriction on the value of {@code start}. If it
      * is negative, it has the same effect as if it were zero: the entire
      * {@code CharSequence} may be searched. If it is greater than
      * the length of {@code cs}, it has the same effect as if it were
      * equal to the length of {@code cs}: {@code -1} is returned.
      *
      * <p>All indices are specified in {@code char} values
      * (Unicode code units).
      *
      * @param cs  the {@code CharSequence} to be processed, not null
      * @param searchChar  the char to be searched for
      * @param start  the start index, negative starts at the string start
      * @return the index where the search char was found, -1 if not found
      * @since 3.6 updated to behave more like {@code String}
      */
     static int indexOf(final CharSequence cs, final int searchChar, int start) {
         if (cs instanceof String) {
             return ((String) cs).indexOf(searchChar, start);
         }
         final int sz = cs.length();
         if (start < 0) {
             start = 0;
         }
         if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
             for (int i = start; i < sz; i++) {
                 if (cs.charAt(i) == searchChar) {
                     return i;
                 }
             }
         }
         //supplementary characters (LANG1300)
         if (searchChar <= Character.MAX_CODE_POINT) {
             final char[] chars = Character.toChars(searchChar);
             for (int i = start; i < sz - 1; i++) {
                 final char high = cs.charAt(i);
                 final char low = cs.charAt(i + 1);
                 if (high == chars[0] && low == chars[1]) {
                     return i;
                 }
             }
         }
         return NOT_FOUND;
     }

     /**
      * Used by the indexOf(CharSequence methods) as a green implementation of indexOf.
      *
      * @param cs the {@code CharSequence} to be processed
      * @param searchChar the {@code CharSequence} to be searched for
      * @param start the start index
      * @return the index where the search sequence was found
      */
     static int indexOf(final CharSequence cs, final CharSequence searchChar, final int start) {
         return cs.toString().indexOf(searchChar.toString(), start);
 //        if (cs instanceof String && searchChar instanceof String) {
 //            // TODO: Do we assume searchChar is usually relatively small;
 //            //       If so then calling toString() on it is better than reverting to
 //            //       the green implementation in the else block
 //            return ((String) cs).indexOf((String) searchChar, start);
 //        } else {
 //            // TODO: Implement rather than convert to String
 //            return cs.toString().indexOf(searchChar.toString(), start);
 //        }
     }

     /**
      * Returns the index within {@code cs} of the last occurrence of
      * the specified character, searching backward starting at the
      * specified index. For values of {@code searchChar} in the range
      * from 0 to 0xFFFF (inclusive), the index returned is the largest
      * value <i>k</i> such that:
      * <blockquote><pre>
      * (this.charAt(<i>k</i>) == searchChar) &amp;&amp; (<i>k</i> &lt;= start)
      * </pre></blockquote>
      * is true. For other values of {@code searchChar}, it is the
      * largest value <i>k</i> such that:
      * <blockquote><pre>
      * (this.codePointAt(<i>k</i>) == searchChar) &amp;&amp; (<i>k</i> &lt;= start)
      * </pre></blockquote>
      * is true. In either case, if no such character occurs in {@code cs}
      * at or before position {@code start}, then {@code -1} is returned.
      *
      * <p>All indices are specified in {@code char} values
      * (Unicode code units).
      *
      * @param cs  the {@code CharSequence} to be processed
      * @param searchChar  the char to be searched for
      * @param start  the start index, negative returns -1, beyond length starts at end
      * @return the index where the search char was found, -1 if not found
      * @since 3.6 updated to behave more like {@code String}
      */
     static int lastIndexOf(final CharSequence cs, final int searchChar, int start) {
         if (cs instanceof String) {
             return ((String) cs).lastIndexOf(searchChar, start);
         }
         final int sz = cs.length();
         if (start < 0) {
             return NOT_FOUND;
         }
         if (start >= sz) {
             start = sz - 1;
         }
         if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
             for (int i = start; i >= 0; --i) {
                 if (cs.charAt(i) == searchChar) {
                     return i;
                 }
             }
         }
         //supplementary characters (LANG1300)
         //NOTE - we must do a forward traversal for this to avoid duplicating code points
         if (searchChar <= Character.MAX_CODE_POINT) {
             final char[] chars = Character.toChars(searchChar);
             //make sure it's not the last index
             if (start == sz - 1) {
                 return NOT_FOUND;
             }
             for (int i = start; i >= 0; i--) {
                 final char high = cs.charAt(i);
                 final char low = cs.charAt(i + 1);
                 if (chars[0] == high && chars[1] == low) {
                     return i;
                 }
             }
         }
         return NOT_FOUND;
     }

     /**
      * Used by the lastIndexOf(CharSequence methods) as a green implementation of lastIndexOf
      *
      * @param cs the {@code CharSequence} to be processed
      * @param searchChar the {@code CharSequence} to be searched for
      * @param start the start index
      * @return the index where the search sequence was found
      */
     static int lastIndexOf(final CharSequence cs, final CharSequence searchChar, final int start) {
         return cs.toString().lastIndexOf(searchChar.toString(), start);
 //        if (cs instanceof String && searchChar instanceof String) {
 //            // TODO: Do we assume searchChar is usually relatively small;
 //            //       If so then calling toString() on it is better than reverting to
 //            //       the green implementation in the else block
 //            return ((String) cs).lastIndexOf((String) searchChar, start);
 //        } else {
 //            // TODO: Implement rather than convert to String
 //            return cs.toString().lastIndexOf(searchChar.toString(), start);
 //        }
     }

     /**
      * Green implementation of toCharArray.
      *
      * @param cs the {@code CharSequence} to be processed
      * @return the resulting char array
      */
     static char[] toCharArray(final CharSequence cs) {
         if (cs instanceof String) {
             return ((String) cs).toCharArray();
         }
         final int sz = cs.length();
         final char[] array = new char[cs.length()];
         for (int i = 0; i < sz; i++) {
             array[i] = cs.charAt(i);
         }
         return array;
     }

     /**
      * Green implementation of regionMatches.
      *
      * @param cs the {@code CharSequence} to be processed
      * @param ignoreCase whether or not to be case insensitive
      * @param thisStart the index to start on the {@code cs} CharSequence
      * @param substring the {@code CharSequence} to be looked for
      * @param start the index to start on the {@code substring} CharSequence
      * @param length character length of the region
      * @return whether the region matched
      */
     static boolean regionMatches(final CharSequence cs, final boolean ignoreCase, final int thisStart,
             final CharSequence substring, final int start, final int length)    {
         if (cs instanceof String && substring instanceof String) {
             return ((String) cs).regionMatches(ignoreCase, thisStart, (String) substring, start, length);
         }
         int index1 = thisStart;
         int index2 = start;
         int tmpLen = length;

         // Extract these first so we detect NPEs the same as the java.lang.String version
         final int srcLen = cs.length() - thisStart;
         final int otherLen = substring.length() - start;

         // Check for invalid parameters
         if (thisStart < 0 || start < 0 || length < 0) {
             return false;
         }

         // Check that the regions are long enough
         if (srcLen < length || otherLen < length) {
             return false;
         }

         while (tmpLen-- > 0) {
             final char c1 = cs.charAt(index1++);
             final char c2 = substring.charAt(index2++);

             if (c1 == c2) {
                 continue;
             }

             if (!ignoreCase) {
                 return false;
             }

             // The same check as in String.regionMatches():
             if (Character.toUpperCase(c1) != Character.toUpperCase(c2)
                     && Character.toLowerCase(c1) != Character.toLowerCase(c2)) {
                 return false;
             }
         }

         return true;
     }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.commons.lang3;

	/**
	* <p>Operations on {@link CharSequence} that are
	* {@code null} safe.</p>
	*
	* @see CharSequence
	* @since 3.0
	*/
	public class CharSequenceUtils {

	private static final int NOT_FOUND = -1;

	/**
	* <p>{@code CharSequenceUtils} instances should NOT be constructed in
	* standard programming. </p>
	*
	* <p>This constructor is public to permit tools that require a JavaBean
	* instance to operate.</p>
	*/
	public CharSequenceUtils() {
	super();
	}

	//-----------------------------------------------------------------------
	/**
	* <p>Returns a new {@code CharSequence} that is a subsequence of this
	* sequence starting with the {@code char} value at the specified index.</p>
	*
	* <p>This provides the {@code CharSequence} equivalent to {@link String#substring(int)}.
	* The length (in {@code char}) of the returned sequence is {@code length() - start},
	* so if {@code start == end} then an empty sequence is returned.</p>
	*
	* @param cs the specified subsequence, null returns null
	* @param start the start index, inclusive, valid
	* @return a new subsequence, may be null
	* @throws IndexOutOfBoundsException if {@code start} is negative or if
	* {@code start} is greater than {@code length()}
	*/
	public static CharSequence subSequence(final CharSequence cs, final int start) {
	return cs == null ? null : cs.subSequence(start, cs.length());
	}

	//-----------------------------------------------------------------------
	/**
	* Returns the index within {@code cs} of the first occurrence of the
	* specified character, starting the search at the specified index.
	* <p>
	* If a character with value {@code searchChar} occurs in the
	* character sequence represented by the {@code cs}
	* object at an index no smaller than {@code start}, then
	* the index of the first such occurrence is returned. For values
	* of {@code searchChar} in the range from 0 to 0xFFFF (inclusive),
	* this is the smallest value <i>k</i> such that:
	* <blockquote><pre>
	* (this.charAt(<i>k</i>) == searchChar) && (<i>k</i> >= start)
	* </pre></blockquote>
	* is true. For other values of {@code searchChar}, it is the
	* smallest value <i>k</i> such that:
	* <blockquote><pre>
	* (this.codePointAt(<i>k</i>) == searchChar) && (<i>k</i> >= start)
	* </pre></blockquote>
	* is true. In either case, if no such character occurs inm {@code cs}
	* at or after position {@code start}, then
	* {@code -1} is returned.
	*
	* <p>
	* There is no restriction on the value of {@code start}. If it
	* is negative, it has the same effect as if it were zero: the entire
	* {@code CharSequence} may be searched. If it is greater than
	* the length of {@code cs}, it has the same effect as if it were
	* equal to the length of {@code cs}: {@code -1} is returned.
	*
	* <p>All indices are specified in {@code char} values
	* (Unicode code units).
	*
	* @param cs the {@code CharSequence} to be processed, not null
	* @param searchChar the char to be searched for
	* @param start the start index, negative starts at the string start
	* @return the index where the search char was found, -1 if not found
	* @since 3.6 updated to behave more like {@code String}
	*/
	static int indexOf(final CharSequence cs, final int searchChar, int start) {
	if (cs instanceof String) {
	return ((String) cs).indexOf(searchChar, start);
	}
	final int sz = cs.length();
	if (start < 0) {
	start = 0;
	}
	if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
	for (int i = start; i < sz; i++) {
	if (cs.charAt(i) == searchChar) {
	return i;
	}
	}
	}
	//supplementary characters (LANG1300)
	if (searchChar <= Character.MAX_CODE_POINT) {
	final char[] chars = Character.toChars(searchChar);
	for (int i = start; i < sz - 1; i++) {
	final char high = cs.charAt(i);
	final char low = cs.charAt(i + 1);
	if (high == chars[0] && low == chars[1]) {
	return i;
	}
	}
	}
	return NOT_FOUND;
	}

	/**
	* Used by the indexOf(CharSequence methods) as a green implementation of indexOf.
	*
	* @param cs the {@code CharSequence} to be processed
	* @param searchChar the {@code CharSequence} to be searched for
	* @param start the start index
	* @return the index where the search sequence was found
	*/
	static int indexOf(final CharSequence cs, final CharSequence searchChar, final int start) {
	return cs.toString().indexOf(searchChar.toString(), start);
	// if (cs instanceof String && searchChar instanceof String) {
	// // TODO: Do we assume searchChar is usually relatively small;
	// // If so then calling toString() on it is better than reverting to
	// // the green implementation in the else block
	// return ((String) cs).indexOf((String) searchChar, start);
	// } else {
	// // TODO: Implement rather than convert to String
	// return cs.toString().indexOf(searchChar.toString(), start);
	// }
	}

	/**
	* Returns the index within {@code cs} of the last occurrence of
	* the specified character, searching backward starting at the
	* specified index. For values of {@code searchChar} in the range
	* from 0 to 0xFFFF (inclusive), the index returned is the largest
	* value <i>k</i> such that:
	* <blockquote><pre>
	* (this.charAt(<i>k</i>) == searchChar) && (<i>k</i> <= start)
	* </pre></blockquote>
	* is true. For other values of {@code searchChar}, it is the
	* largest value <i>k</i> such that:
	* <blockquote><pre>
	* (this.codePointAt(<i>k</i>) == searchChar) && (<i>k</i> <= start)
	* </pre></blockquote>
	* is true. In either case, if no such character occurs in {@code cs}
	* at or before position {@code start}, then {@code -1} is returned.
	*
	* <p>All indices are specified in {@code char} values
	* (Unicode code units).
	*
	* @param cs the {@code CharSequence} to be processed
	* @param searchChar the char to be searched for
	* @param start the start index, negative returns -1, beyond length starts at end
	* @return the index where the search char was found, -1 if not found
	* @since 3.6 updated to behave more like {@code String}
	*/
	static int lastIndexOf(final CharSequence cs, final int searchChar, int start) {
	if (cs instanceof String) {
	return ((String) cs).lastIndexOf(searchChar, start);
	}
	final int sz = cs.length();
	if (start < 0) {
	return NOT_FOUND;
	}
	if (start >= sz) {
	start = sz - 1;
	}
	if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
	for (int i = start; i >= 0; --i) {
	if (cs.charAt(i) == searchChar) {
	return i;
	}
	}
	}
	//supplementary characters (LANG1300)
	//NOTE - we must do a forward traversal for this to avoid duplicating code points
	if (searchChar <= Character.MAX_CODE_POINT) {
	final char[] chars = Character.toChars(searchChar);
	//make sure it's not the last index
	if (start == sz - 1) {
	return NOT_FOUND;
	}
	for (int i = start; i >= 0; i--) {
	final char high = cs.charAt(i);
	final char low = cs.charAt(i + 1);
	if (chars[0] == high && chars[1] == low) {
	return i;
	}
	}
	}
	return NOT_FOUND;
	}

	/**
	* Used by the lastIndexOf(CharSequence methods) as a green implementation of lastIndexOf
	*
	* @param cs the {@code CharSequence} to be processed
	* @param searchChar the {@code CharSequence} to be searched for
	* @param start the start index
	* @return the index where the search sequence was found
	*/
	static int lastIndexOf(final CharSequence cs, final CharSequence searchChar, final int start) {
	return cs.toString().lastIndexOf(searchChar.toString(), start);
	// if (cs instanceof String && searchChar instanceof String) {
	// // TODO: Do we assume searchChar is usually relatively small;
	// // If so then calling toString() on it is better than reverting to
	// // the green implementation in the else block
	// return ((String) cs).lastIndexOf((String) searchChar, start);
	// } else {
	// // TODO: Implement rather than convert to String
	// return cs.toString().lastIndexOf(searchChar.toString(), start);
	// }
	}

	/**
	* Green implementation of toCharArray.
	*
	* @param cs the {@code CharSequence} to be processed
	* @return the resulting char array
	*/
	static char[] toCharArray(final CharSequence cs) {
	if (cs instanceof String) {
	return ((String) cs).toCharArray();
	}
	final int sz = cs.length();
	final char[] array = new char[cs.length()];
	for (int i = 0; i < sz; i++) {
	array[i] = cs.charAt(i);
	}
	return array;
	}

	/**
	* Green implementation of regionMatches.
	*
	* @param cs the {@code CharSequence} to be processed
	* @param ignoreCase whether or not to be case insensitive
	* @param thisStart the index to start on the {@code cs} CharSequence
	* @param substring the {@code CharSequence} to be looked for
	* @param start the index to start on the {@code substring} CharSequence
	* @param length character length of the region
	* @return whether the region matched
	*/
	static boolean regionMatches(final CharSequence cs, final boolean ignoreCase, final int thisStart,
	final CharSequence substring, final int start, final int length) {
	if (cs instanceof String && substring instanceof String) {
	return ((String) cs).regionMatches(ignoreCase, thisStart, (String) substring, start, length);
	}
	int index1 = thisStart;
	int index2 = start;
	int tmpLen = length;

	// Extract these first so we detect NPEs the same as the java.lang.String version
	final int srcLen = cs.length() - thisStart;
	final int otherLen = substring.length() - start;

	// Check for invalid parameters
	if (thisStart < 0 \|\| start < 0 \|\| length < 0) {
	return false;
	}

	// Check that the regions are long enough
	if (srcLen < length \|\| otherLen < length) {
	return false;
	}

	while (tmpLen-- > 0) {
	final char c1 = cs.charAt(index1++);
	final char c2 = substring.charAt(index2++);

	if (c1 == c2) {
	continue;
	}

	if (!ignoreCase) {
	return false;
	}

	// The same check as in String.regionMatches():
	if (Character.toUpperCase(c1) != Character.toUpperCase(c2)
	&& Character.toLowerCase(c1) != Character.toLowerCase(c2)) {
	return false;
	}
	}

	return true;
	}
	}