blob: 9a68c3bacefd5c5289024698f4a6426d2f307438 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.common.util;
import java.io.IOException;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Locale;
import org.apache.solr.common.SolrException;
/**
*
*/
public class StrUtils {
public static final char[] HEX_DIGITS = { '0', '1', '2', '3', '4', '5', '6',
'7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
public static List<String> splitSmart(String s, char separator) {
ArrayList<String> lst = new ArrayList<>(4);
splitSmart(s, separator, lst);
return lst;
}
static final String DELIM_CHARS = "/:;.,%#";
public static List<String> split(String s, char sep){
if(DELIM_CHARS.indexOf(s.charAt(0)) >-1){
sep = s.charAt(0);
}
return splitSmart(s,sep, true);
}
public static List<String> splitSmart(String s, char separator, boolean trimEmpty) {
List<String> l = splitSmart(s, separator);
if(trimEmpty){
if (l.size() > 0 && l.get(0).isEmpty()) l.remove(0);
}
return l;
}
/**
* Split a string based on a separator, but don't split if it's inside
* a string. Assume '\' escapes the next char both inside and
* outside strings.
*/
public static void splitSmart(String s, char separator, List<String> lst) {
int pos=0, start=0, end=s.length();
char inString=0;
char ch=0;
while (pos < end) {
char prevChar=ch;
ch = s.charAt(pos++);
if (ch=='\\') { // skip escaped chars
pos++;
} else if (inString != 0 && ch==inString) {
inString=0;
} else if (ch=='\'' || ch=='"') {
// If char is directly preceeded by a number or letter
// then don't treat it as the start of a string.
// Examples: 50" TV, or can't
if (!Character.isLetterOrDigit(prevChar)) {
inString=ch;
}
} else if (ch==separator && inString==0) {
lst.add(s.substring(start,pos-1));
start=pos;
}
}
if (start < end) {
lst.add(s.substring(start,end));
}
/***
if (SolrCore.log.isLoggable(Level.FINEST)) {
SolrCore.log.trace("splitCommand=" + lst);
}
***/
}
/** Splits a backslash escaped string on the separator.
* <p>
* Current backslash escaping supported:
* <br> \n \t \r \b \f are escaped the same as a Java String
* <br> Other characters following a backslash are produced verbatim (\c =&gt; c)
*
* @param s the string to split
* @param separator the separator to split on
* @param decode decode backslash escaping
* @return not null
*/
public static List<String> splitSmart(String s, String separator, boolean decode) {
ArrayList<String> lst = new ArrayList<>(2);
StringBuilder sb = new StringBuilder();
int pos=0, end=s.length();
while (pos < end) {
if (s.startsWith(separator,pos)) {
if (sb.length() > 0) {
lst.add(sb.toString());
sb=new StringBuilder();
}
pos+=separator.length();
continue;
}
char ch = s.charAt(pos++);
if (ch=='\\') {
if (!decode) sb.append(ch);
if (pos>=end) break; // ERROR, or let it go?
ch = s.charAt(pos++);
if (decode) {
switch(ch) {
case 'n' : ch='\n'; break;
case 't' : ch='\t'; break;
case 'r' : ch='\r'; break;
case 'b' : ch='\b'; break;
case 'f' : ch='\f'; break;
}
}
}
sb.append(ch);
}
if (sb.length() > 0) {
lst.add(sb.toString());
}
return lst;
}
/**
* Splits file names separated by comma character.
* File names can contain comma characters escaped by backslash '\'
*
* @param fileNames the string containing file names
* @return a list of file names with the escaping backslashed removed
*/
public static List<String> splitFileNames(String fileNames) {
if (fileNames == null)
return Collections.emptyList();
List<String> result = new ArrayList<>();
for (String file : fileNames.split("(?<!\\\\),")) {
result.add(file.replaceAll("\\\\(?=,)", ""));
}
return result;
}
/**
* Creates a backslash escaped string, joining all the items.
* @see #escapeTextWithSeparator
*/
public static String join(Collection<?> items, char separator) {
if (items == null) return "";
StringBuilder sb = new StringBuilder(items.size() << 3);
boolean first=true;
for (Object o : items) {
String item = String.valueOf(o);
if (first) {
first = false;
} else {
sb.append(separator);
}
appendEscapedTextToBuilder(sb, item, separator);
}
return sb.toString();
}
public static List<String> splitWS(String s, boolean decode) {
ArrayList<String> lst = new ArrayList<>(2);
StringBuilder sb = new StringBuilder();
int pos=0, end=s.length();
while (pos < end) {
char ch = s.charAt(pos++);
if (Character.isWhitespace(ch)) {
if (sb.length() > 0) {
lst.add(sb.toString());
sb=new StringBuilder();
}
continue;
}
if (ch=='\\') {
if (!decode) sb.append(ch);
if (pos>=end) break; // ERROR, or let it go?
ch = s.charAt(pos++);
if (decode) {
switch(ch) {
case 'n' : ch='\n'; break;
case 't' : ch='\t'; break;
case 'r' : ch='\r'; break;
case 'b' : ch='\b'; break;
case 'f' : ch='\f'; break;
}
}
}
sb.append(ch);
}
if (sb.length() > 0) {
lst.add(sb.toString());
}
return lst;
}
public static List<String> toLower(List<String> strings) {
ArrayList<String> ret = new ArrayList<>(strings.size());
for (String str : strings) {
ret.add(str.toLowerCase(Locale.ROOT));
}
return ret;
}
/** Return if a string starts with '1', 't', or 'T'
* and return false otherwise.
*/
public static boolean parseBoolean(String s) {
char ch = s.length()>0 ? s.charAt(0) : 0;
return (ch=='1' || ch=='t' || ch=='T');
}
/** how to transform a String into a boolean... more flexible than
* Boolean.parseBoolean() to enable easier integration with html forms.
*/
public static boolean parseBool(String s) {
if( s != null ) {
if( s.startsWith("true") || s.startsWith("on") || s.startsWith("yes") ) {
return true;
}
if( s.startsWith("false") || s.startsWith("off") || s.equals("no") ) {
return false;
}
}
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "invalid boolean value: "+s );
}
/**
* {@link NullPointerException} and {@link SolrException} free version of {@link #parseBool(String)}
* @return parsed boolean value (or def, if s is null or invalid)
*/
public static boolean parseBool(String s, boolean def) {
if( s != null ) {
if( s.startsWith("true") || s.startsWith("on") || s.startsWith("yes") ) {
return true;
}
if( s.startsWith("false") || s.startsWith("off") || s.equals("no") ) {
return false;
}
}
return def;
}
/**
* URLEncodes a value, replacing only enough chars so that
* the URL may be unambiguously pasted back into a browser.
* <p>
* Characters with a numeric value less than 32 are encoded.
* &amp;,=,%,+,space are encoded.
*/
public static void partialURLEncodeVal(Appendable dest, String val) throws IOException {
for (int i=0; i<val.length(); i++) {
char ch = val.charAt(i);
if (ch < 32) {
dest.append('%');
if (ch < 0x10) dest.append('0');
dest.append(Integer.toHexString(ch));
} else {
switch (ch) {
case ' ': dest.append('+'); break;
case '&': dest.append("%26"); break;
case '%': dest.append("%25"); break;
case '=': dest.append("%3D"); break;
case '+': dest.append("%2B"); break;
default : dest.append(ch); break;
}
}
}
}
/**
* Creates a new copy of the string with the separator backslash escaped.
* @see #join
*/
public static String escapeTextWithSeparator(String item, char separator) {
StringBuilder sb = new StringBuilder(item.length() * 2);
appendEscapedTextToBuilder(sb, item, separator);
return sb.toString();
}
/**
* writes chars from item to out, backslash escaping as needed based on separator --
* but does not append the separator itself
*/
public static void appendEscapedTextToBuilder(StringBuilder out,
String item,
char separator) {
for (int i = 0; i < item.length(); i++) {
char ch = item.charAt(i);
if (ch == '\\' || ch == separator) {
out.append('\\');
}
out.append(ch);
}
}
/**Format using MesssageFormat but with the ROOT locale
*/
public static String formatString(String pattern, Object... args) {
return new MessageFormat(pattern, Locale.ROOT).format(args);
}
}