blob: ba8d18eef78baf1574c008e38bd7e226fe27f8ec [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.lib;
import java.util.regex.Pattern;
/**
* Static helper class that will help format data with quotes and escape chars.
*/
public final class FieldFormatter {
private static final Pattern REPLACE_PATTERN = Pattern.compile("\\n|\\r|\01");
/**
* This drops all default Hive delimiters from the string and passes it on.
*
* These delimiters are \n, \r and \01. This method is invoked when the
* command line option {@code --hive-drop-delims} is provided.
*
* @param str
* @param delimiters
* @return
*/
public static String hiveStringDropDelims(String str, DelimiterSet delimiters) {
return hiveStringReplaceDelims(str, "", delimiters);
}
/**
* replace hive delimiters with a user-defined string passed to the
* --hive-delims-replacement option.
* @param str
* @param delimiters
* @return
*/
public static String hiveStringReplaceDelims(String str, String replacement, DelimiterSet delimiters) {
String droppedDelims = REPLACE_PATTERN.matcher(str).replaceAll(replacement);
return escapeAndEnclose(droppedDelims, delimiters);
}
/**
* Takes an input string representing the value of a field, encloses it in
* enclosing chars, and escapes any occurrences of such characters in the
* middle. The escape character itself is also escaped if it appears in the
* text of the field. If there is no enclosing character, then any
* delimiters present in the field body are escaped instead.
*
* The field is enclosed only if:
* enclose != '\000', and:
* encloseRequired is true, or
* one of the fields-terminated-by or lines-terminated-by characters is
* present in the string.
*
* Escaping is not performed if the escape char is '\000'.
*
* @param str - The user's string to escape and enclose
* @param delimiters - The DelimiterSet to use identifying the escape and
* enclose semantics. If the specified escape or enclose characters are
* '\000', those operations are not performed.
* @return the escaped, enclosed version of 'str'.
*/
public static String escapeAndEnclose(String str, DelimiterSet delimiters) {
char escape = delimiters.getEscapedBy();
char enclose = delimiters.getEnclosedBy();
boolean encloseRequired = delimiters.isEncloseRequired();
// true if we can use an escape character.
boolean escapingLegal = DelimiterSet.NULL_CHAR != escape;
String withEscapes;
if (null == str) {
return null;
}
if (escapingLegal) {
// escaping is legal. Escape any instances of the escape char itself.
withEscapes = str.replace("" + escape, "" + escape + escape);
} else {
// no need to double-escape
withEscapes = str;
}
if (DelimiterSet.NULL_CHAR == enclose) {
// The enclose-with character was left unset, so we can't enclose items.
if (escapingLegal) {
// If the user has used the fields-terminated-by or
// lines-terminated-by characters in the string, escape them if we
// have an escape character.
String fields = "" + delimiters.getFieldsTerminatedBy();
String lines = "" + delimiters.getLinesTerminatedBy();
withEscapes = withEscapes.replace(fields, "" + escape + fields);
withEscapes = withEscapes.replace(lines, "" + escape + lines);
}
// No enclosing possible, so now return this.
return withEscapes;
}
// if we have an enclosing character, and escaping is legal, then the
// encloser must always be escaped.
if (escapingLegal) {
withEscapes = withEscapes.replace("" + enclose, "" + escape + enclose);
}
boolean actuallyDoEnclose = encloseRequired;
if (!actuallyDoEnclose) {
// check if the string requires enclosing.
char [] mustEncloseFor = new char[2];
mustEncloseFor[0] = delimiters.getFieldsTerminatedBy();
mustEncloseFor[1] = delimiters.getLinesTerminatedBy();
for (char reason : mustEncloseFor) {
if (str.indexOf(reason) != -1) {
actuallyDoEnclose = true;
break;
}
}
}
if (actuallyDoEnclose) {
return "" + enclose + withEscapes + enclose;
} else {
return withEscapes;
}
}
private FieldFormatter() { }
}