solr/solrj/src/java/org/apache/solr/common/util/StrUtils.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.solr.common.util;

 import java.io.IOException;
 import java.text.MessageFormat;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
 import java.util.Locale;

 import org.apache.solr.common.SolrException;

 /**
  *
  */
 public class StrUtils {
   public static final char[] HEX_DIGITS = {'0', '1', '2', '3', '4', '5', '6',
       '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};

   public static List<String> splitSmart(String s, char separator) {
     ArrayList<String> lst = new ArrayList<>(4);
     splitSmart(s, separator, lst);
     return lst;

   }

   static final String DELIM_CHARS = "/:;.,%#";

   public static List<String> split(String s, char sep) {
     if (DELIM_CHARS.indexOf(s.charAt(0)) > -1) {
       sep = s.charAt(0);
     }
     return splitSmart(s, sep, true);

   }

   public static List<String> splitSmart(String s, char separator, boolean trimEmpty) {
     List<String> l = splitSmart(s, separator);
     if (trimEmpty) {
       if (l.size() > 0 && l.get(0).isEmpty()) l.remove(0);
     }
     return l;
   }

   /**
    * Split a string based on a separator, but don't split if it's inside
    * a string.  Assume '\' escapes the next char both inside and
    * outside strings.
    */
   public static void splitSmart(String s, char separator, List<String> lst) {
     int pos = 0, start = 0, end = s.length();
     char inString = 0;
     char ch = 0;
     while (pos < end) {
       char prevChar = ch;
       ch = s.charAt(pos++);
       if (ch == '\\') {    // skip escaped chars
         pos++;
       } else if (inString != 0 && ch == inString) {
         inString = 0;
       } else if (ch == '\'' || ch == '"') {
         // If char is directly preceeded by a number or letter
         // then don't treat it as the start of a string.
         // Examples: 50" TV, or can't
         if (!Character.isLetterOrDigit(prevChar)) {
           inString = ch;
         }
       } else if (ch == separator && inString == 0) {
         lst.add(s.substring(start, pos - 1));
         start = pos;
       }
     }
     if (start < end) {
       lst.add(s.substring(start, end));
     }

     /***
      if (SolrCore.log.isLoggable(Level.FINEST)) {
      SolrCore.log.trace("splitCommand={}", lst);
      }
      ***/

   }

   /**
    * Splits a backslash escaped string on the separator.
    * <p>
    * Current backslash escaping supported:
    * <br> \n \t \r \b \f are escaped the same as a Java String
    * <br> Other characters following a backslash are produced verbatim (\c =&gt; c)
    *
    * @param s         the string to split
    * @param separator the separator to split on
    * @param decode    decode backslash escaping
    * @return not null
    */
   public static List<String> splitSmart(String s, String separator, boolean decode) {
     ArrayList<String> lst = new ArrayList<>(2);
     StringBuilder sb = new StringBuilder();
     int pos = 0, end = s.length();
     while (pos < end) {
       if (s.startsWith(separator, pos)) {
         if (sb.length() > 0) {
           lst.add(sb.toString());
           sb = new StringBuilder();
         }
         pos += separator.length();
         continue;
       }

       char ch = s.charAt(pos++);
       if (ch == '\\') {
         if (!decode) sb.append(ch);
         if (pos >= end) break;  // ERROR, or let it go?
         ch = s.charAt(pos++);
         if (decode) {
           switch (ch) {
             case 'n':
               ch = '\n';
               break;
             case 't':
               ch = '\t';
               break;
             case 'r':
               ch = '\r';
               break;
             case 'b':
               ch = '\b';
               break;
             case 'f':
               ch = '\f';
               break;
           }
         }
       }

       sb.append(ch);
     }

     if (sb.length() > 0) {
       lst.add(sb.toString());
     }

     return lst;
   }

   /**
    * Splits file names separated by comma character.
    * File names can contain comma characters escaped by backslash '\'
    *
    * @param fileNames the string containing file names
    * @return a list of file names with the escaping backslashed removed
    */
   public static List<String> splitFileNames(String fileNames) {
     if (fileNames == null)
       return Collections.<String>emptyList();

     List<String> result = new ArrayList<>();
     for (String file : fileNames.split("(?<!\\\\),")) {
       result.add(file.replaceAll("\\\\(?=,)", ""));
     }

     return result;
   }

   /**
    * Creates a backslash escaped string, joining all the items.
    *
    * @see #escapeTextWithSeparator
    */
   public static String join(Collection<?> items, char separator) {
     if (items == null) return "";
     StringBuilder sb = new StringBuilder(items.size() << 3);
     boolean first = true;
     for (Object o : items) {
       String item = String.valueOf(o);
       if (first) {
         first = false;
       } else {
         sb.append(separator);
       }
       appendEscapedTextToBuilder(sb, item, separator);
     }
     return sb.toString();
   }


   public static List<String> splitWS(String s, boolean decode) {
     ArrayList<String> lst = new ArrayList<>(2);
     StringBuilder sb = new StringBuilder();
     int pos = 0, end = s.length();
     while (pos < end) {
       char ch = s.charAt(pos++);
       if (Character.isWhitespace(ch)) {
         if (sb.length() > 0) {
           lst.add(sb.toString());
           sb = new StringBuilder();
         }
         continue;
       }

       if (ch == '\\') {
         if (!decode) sb.append(ch);
         if (pos >= end) break;  // ERROR, or let it go?
         ch = s.charAt(pos++);
         if (decode) {
           switch (ch) {
             case 'n':
               ch = '\n';
               break;
             case 't':
               ch = '\t';
               break;
             case 'r':
               ch = '\r';
               break;
             case 'b':
               ch = '\b';
               break;
             case 'f':
               ch = '\f';
               break;
           }
         }
       }

       sb.append(ch);
     }

     if (sb.length() > 0) {
       lst.add(sb.toString());
     }

     return lst;
   }

   public static List<String> toLower(List<String> strings) {
     ArrayList<String> ret = new ArrayList<>(strings.size());
     for (String str : strings) {
       ret.add(str.toLowerCase(Locale.ROOT));
     }
     return ret;
   }


   /**
    * Return if a string starts with '1', 't', or 'T'
    * and return false otherwise.
    */
   public static boolean parseBoolean(String s) {
     char ch = s.length() > 0 ? s.charAt(0) : 0;
     return (ch == '1' || ch == 't' || ch == 'T');
   }

   /**
    * how to transform a String into a boolean... more flexible than
    * Boolean.parseBoolean() to enable easier integration with html forms.
    */
   public static boolean parseBool(String s) {
     if (s != null) {
       if (s.startsWith("true") || s.startsWith("on") || s.startsWith("yes")) {
         return true;
       }
       if (s.startsWith("false") || s.startsWith("off") || s.equals("no")) {
         return false;
       }
     }
     throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "invalid boolean value: " + s);
   }

   /**
    * {@link NullPointerException} and {@link SolrException} free version of {@link #parseBool(String)}
    *
    * @return parsed boolean value (or def, if s is null or invalid)
    */
   public static boolean parseBool(String s, boolean def) {
     if (s != null) {
       if (s.startsWith("true") || s.startsWith("on") || s.startsWith("yes")) {
         return true;
       }
       if (s.startsWith("false") || s.startsWith("off") || s.equals("no")) {
         return false;
       }
     }
     return def;
   }

   /**
    * URLEncodes a value, replacing only enough chars so that
    * the URL may be unambiguously pasted back into a browser.
    * <p>
    * Characters with a numeric value less than 32 are encoded.
    * &amp;,=,%,+,space are encoded.
    */
   public static void partialURLEncodeVal(Appendable dest, String val) throws IOException {
     for (int i = 0; i < val.length(); i++) {
       char ch = val.charAt(i);
       if (ch < 32) {
         dest.append('%');
         if (ch < 0x10) dest.append('0');
         dest.append(Integer.toHexString(ch));
       } else {
         switch (ch) {
           case ' ':
             dest.append('+');
             break;
           case '&':
             dest.append("%26");
             break;
           case '%':
             dest.append("%25");
             break;
           case '=':
             dest.append("%3D");
             break;
           case '+':
             dest.append("%2B");
             break;
           default:
             dest.append(ch);
             break;
         }
       }
     }
   }

   /**
    * Creates a new copy of the string with the separator backslash escaped.
    *
    * @see #join
    */
   public static String escapeTextWithSeparator(String item, char separator) {
     StringBuilder sb = new StringBuilder(item.length() * 2);
     appendEscapedTextToBuilder(sb, item, separator);
     return sb.toString();
   }

   /**
    * writes chars from item to out, backslash escaping as needed based on separator --
    * but does not append the separator itself
    */
   public static void appendEscapedTextToBuilder(StringBuilder out,
                                                 String item,
                                                 char separator) {
     for (int i = 0; i < item.length(); i++) {
       char ch = item.charAt(i);
       if (ch == '\\' || ch == separator) {
         out.append('\\');
       }
       out.append(ch);
     }
   }

   /**
    * Format using {@link MessageFormat} but with the ROOT locale
    */
   public static String formatString(String pattern, Object... args) {
     return new MessageFormat(pattern, Locale.ROOT).format(args);
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.solr.common.util;

	import java.io.IOException;
	import java.text.MessageFormat;
	import java.util.ArrayList;
	import java.util.Collection;
	import java.util.Collections;
	import java.util.List;
	import java.util.Locale;

	import org.apache.solr.common.SolrException;

	/**
	*
	*/
	public class StrUtils {
	public static final char[] HEX_DIGITS = {'0', '1', '2', '3', '4', '5', '6',
	'7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};

	public static List<String> splitSmart(String s, char separator) {
	ArrayList<String> lst = new ArrayList<>(4);
	splitSmart(s, separator, lst);
	return lst;

	}

	static final String DELIM_CHARS = "/:;.,%#";

	public static List<String> split(String s, char sep) {
	if (DELIM_CHARS.indexOf(s.charAt(0)) > -1) {
	sep = s.charAt(0);
	}
	return splitSmart(s, sep, true);

	}

	public static List<String> splitSmart(String s, char separator, boolean trimEmpty) {
	List<String> l = splitSmart(s, separator);
	if (trimEmpty) {
	if (l.size() > 0 && l.get(0).isEmpty()) l.remove(0);
	}
	return l;
	}

	/**
	* Split a string based on a separator, but don't split if it's inside
	* a string. Assume '\' escapes the next char both inside and
	* outside strings.
	*/
	public static void splitSmart(String s, char separator, List<String> lst) {
	int pos = 0, start = 0, end = s.length();
	char inString = 0;
	char ch = 0;
	while (pos < end) {
	char prevChar = ch;
	ch = s.charAt(pos++);
	if (ch == '\\') { // skip escaped chars
	pos++;
	} else if (inString != 0 && ch == inString) {
	inString = 0;
	} else if (ch == '\'' \|\| ch == '"') {
	// If char is directly preceeded by a number or letter
	// then don't treat it as the start of a string.
	// Examples: 50" TV, or can't
	if (!Character.isLetterOrDigit(prevChar)) {
	inString = ch;
	}
	} else if (ch == separator && inString == 0) {
	lst.add(s.substring(start, pos - 1));
	start = pos;
	}
	}
	if (start < end) {
	lst.add(s.substring(start, end));
	}

	/***
	if (SolrCore.log.isLoggable(Level.FINEST)) {
	SolrCore.log.trace("splitCommand={}", lst);
	}
	***/

	}

	/**
	* Splits a backslash escaped string on the separator.
	* <p>
	* Current backslash escaping supported:
	* <br> \n \t \r \b \f are escaped the same as a Java String
	* <br> Other characters following a backslash are produced verbatim (\c => c)
	*
	* @param s the string to split
	* @param separator the separator to split on
	* @param decode decode backslash escaping
	* @return not null
	*/
	public static List<String> splitSmart(String s, String separator, boolean decode) {
	ArrayList<String> lst = new ArrayList<>(2);
	StringBuilder sb = new StringBuilder();
	int pos = 0, end = s.length();
	while (pos < end) {
	if (s.startsWith(separator, pos)) {
	if (sb.length() > 0) {
	lst.add(sb.toString());
	sb = new StringBuilder();
	}
	pos += separator.length();
	continue;
	}

	char ch = s.charAt(pos++);
	if (ch == '\\') {
	if (!decode) sb.append(ch);
	if (pos >= end) break; // ERROR, or let it go?
	ch = s.charAt(pos++);
	if (decode) {
	switch (ch) {
	case 'n':
	ch = '\n';
	break;
	case 't':
	ch = '\t';
	break;
	case 'r':
	ch = '\r';
	break;
	case 'b':
	ch = '\b';
	break;
	case 'f':
	ch = '\f';
	break;
	}
	}
	}

	sb.append(ch);
	}

	if (sb.length() > 0) {
	lst.add(sb.toString());
	}

	return lst;
	}

	/**
	* Splits file names separated by comma character.
	* File names can contain comma characters escaped by backslash '\'
	*
	* @param fileNames the string containing file names
	* @return a list of file names with the escaping backslashed removed
	*/
	public static List<String> splitFileNames(String fileNames) {
	if (fileNames == null)
	return Collections.<String>emptyList();

	List<String> result = new ArrayList<>();
	for (String file : fileNames.split("(?<!\\\\),")) {
	result.add(file.replaceAll("\\\\(?=,)", ""));
	}

	return result;
	}

	/**
	* Creates a backslash escaped string, joining all the items.
	*
	* @see #escapeTextWithSeparator
	*/
	public static String join(Collection<?> items, char separator) {
	if (items == null) return "";
	StringBuilder sb = new StringBuilder(items.size() << 3);
	boolean first = true;
	for (Object o : items) {
	String item = String.valueOf(o);
	if (first) {
	first = false;
	} else {
	sb.append(separator);
	}
	appendEscapedTextToBuilder(sb, item, separator);
	}
	return sb.toString();
	}


	public static List<String> splitWS(String s, boolean decode) {
	ArrayList<String> lst = new ArrayList<>(2);
	StringBuilder sb = new StringBuilder();
	int pos = 0, end = s.length();
	while (pos < end) {
	char ch = s.charAt(pos++);
	if (Character.isWhitespace(ch)) {
	if (sb.length() > 0) {
	lst.add(sb.toString());
	sb = new StringBuilder();
	}
	continue;
	}

	if (ch == '\\') {
	if (!decode) sb.append(ch);
	if (pos >= end) break; // ERROR, or let it go?
	ch = s.charAt(pos++);
	if (decode) {
	switch (ch) {
	case 'n':
	ch = '\n';
	break;
	case 't':
	ch = '\t';
	break;
	case 'r':
	ch = '\r';
	break;
	case 'b':
	ch = '\b';
	break;
	case 'f':
	ch = '\f';
	break;
	}
	}
	}

	sb.append(ch);
	}

	if (sb.length() > 0) {
	lst.add(sb.toString());
	}

	return lst;
	}

	public static List<String> toLower(List<String> strings) {
	ArrayList<String> ret = new ArrayList<>(strings.size());
	for (String str : strings) {
	ret.add(str.toLowerCase(Locale.ROOT));
	}
	return ret;
	}


	/**
	* Return if a string starts with '1', 't', or 'T'
	* and return false otherwise.
	*/
	public static boolean parseBoolean(String s) {
	char ch = s.length() > 0 ? s.charAt(0) : 0;
	return (ch == '1' \|\| ch == 't' \|\| ch == 'T');
	}

	/**
	* how to transform a String into a boolean... more flexible than
	* Boolean.parseBoolean() to enable easier integration with html forms.
	*/
	public static boolean parseBool(String s) {
	if (s != null) {
	if (s.startsWith("true") \|\| s.startsWith("on") \|\| s.startsWith("yes")) {
	return true;
	}
	if (s.startsWith("false") \|\| s.startsWith("off") \|\| s.equals("no")) {
	return false;
	}
	}
	throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "invalid boolean value: " + s);
	}

	/**
	* {@link NullPointerException} and {@link SolrException} free version of {@link #parseBool(String)}
	*
	* @return parsed boolean value (or def, if s is null or invalid)
	*/
	public static boolean parseBool(String s, boolean def) {
	if (s != null) {
	if (s.startsWith("true") \|\| s.startsWith("on") \|\| s.startsWith("yes")) {
	return true;
	}
	if (s.startsWith("false") \|\| s.startsWith("off") \|\| s.equals("no")) {
	return false;
	}
	}
	return def;
	}

	/**
	* URLEncodes a value, replacing only enough chars so that
	* the URL may be unambiguously pasted back into a browser.
	* <p>
	* Characters with a numeric value less than 32 are encoded.
	* &,=,%,+,space are encoded.
	*/
	public static void partialURLEncodeVal(Appendable dest, String val) throws IOException {
	for (int i = 0; i < val.length(); i++) {
	char ch = val.charAt(i);
	if (ch < 32) {
	dest.append('%');
	if (ch < 0x10) dest.append('0');
	dest.append(Integer.toHexString(ch));
	} else {
	switch (ch) {
	case ' ':
	dest.append('+');
	break;
	case '&':
	dest.append("%26");
	break;
	case '%':
	dest.append("%25");
	break;
	case '=':
	dest.append("%3D");
	break;
	case '+':
	dest.append("%2B");
	break;
	default:
	dest.append(ch);
	break;
	}
	}
	}
	}

	/**
	* Creates a new copy of the string with the separator backslash escaped.
	*
	* @see #join
	*/
	public static String escapeTextWithSeparator(String item, char separator) {
	StringBuilder sb = new StringBuilder(item.length() * 2);
	appendEscapedTextToBuilder(sb, item, separator);
	return sb.toString();
	}

	/**
	* writes chars from item to out, backslash escaping as needed based on separator --
	* but does not append the separator itself
	*/
	public static void appendEscapedTextToBuilder(StringBuilder out,
	String item,
	char separator) {
	for (int i = 0; i < item.length(); i++) {
	char ch = item.charAt(i);
	if (ch == '\\' \|\| ch == separator) {
	out.append('\\');
	}
	out.append(ch);
	}
	}

	/**
	* Format using {@link MessageFormat} but with the ROOT locale
	*/
	public static String formatString(String pattern, Object... args) {
	return new MessageFormat(pattern, Locale.ROOT).format(args);
	}
	}