blob: e464f16ce564e1232ad69bc308255c53f93ea508 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jackrabbit.oak.commons;
import static java.io.File.createTempFile;
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.apache.commons.io.FileUtils.forceDelete;
import static org.apache.commons.io.IOUtils.closeQuietly;
import static org.apache.commons.io.IOUtils.copyLarge;
import static org.apache.jackrabbit.guava.common.io.Closeables.close;
import static org.apache.jackrabbit.guava.common.io.FileWriteMode.APPEND;
import static org.apache.jackrabbit.guava.common.io.Files.asByteSink;
import static org.apache.jackrabbit.guava.common.io.Files.move;
import static org.apache.jackrabbit.guava.common.io.Files.newWriter;
import static org.apache.jackrabbit.oak.commons.sort.EscapeUtils.escapeLineBreak;
import static org.apache.jackrabbit.oak.commons.sort.EscapeUtils.unescapeLineBreaks;
import static org.apache.jackrabbit.oak.commons.sort.ExternalSort.mergeSortedFiles;
import static org.apache.jackrabbit.oak.commons.sort.ExternalSort.sortInBatch;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.function.Function;
import org.apache.commons.io.FileUtils;
import org.apache.jackrabbit.guava.common.base.Strings;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.slf4j.Logger;
/**
* Simple File utils
*/
public final class FileIOUtils {
private FileIOUtils() {
}
public final static Comparator<String> lexComparator = new Comparator<String>() {
@Override public int compare(String s1, String s2) {
return s1.compareTo(s2);
}
};
/**
* Sorts the given file externally using the {@link #lexComparator} and removes duplicates.
*
* @param file file whose contents needs to be sorted
*/
public static void sort(File file) throws IOException {
File sorted = createTempFile("fleioutilssort", null);
merge(sortInBatch(file, lexComparator, true), sorted);
move(sorted, file);
}
/**
* Sorts the given file externally with the given comparator and removes duplicates.
*
* @param file file whose contents needs to be sorted
* @param comparator to compare
* @throws IOException
*/
public static void sort(File file, Comparator<String> comparator) throws IOException {
File sorted = createTempFile("fleioutilssort", null);
merge(sortInBatch(file, comparator, true), sorted, comparator);
move(sorted, file);
}
/**
* Merges a list of files after sorting with the {@link #lexComparator}.
*
* @param files files to merge
* @param output merge output file
* @throws IOException
*/
public static void merge(List<File> files, File output) throws IOException {
mergeSortedFiles(
files,
output, lexComparator, true);
}
/**
* Merges a list of files after sorting with the given comparator.
*
* @param files files to merge
* @param output merge output file
* @throws IOException
*/
public static void merge(List<File> files, File output, Comparator<String> comparator) throws IOException {
mergeSortedFiles(
files,
output, comparator, true);
}
/**
* Copies an input stream to a file.
*
* @param stream steam to copy
* @return
* @throws IOException
*/
public static File copy(InputStream stream) throws IOException {
File file = createTempFile("fleioutilscopy", null);
copyInputStreamToFile(stream, file);
return file;
}
/**
* Appends the contents of the list of files to the given file and deletes the files
* if the delete flag is enabled.
*
* If there is a scope for lines in the files containing line break characters it should be
* ensured that the files are written with {@link #writeAsLine(BufferedWriter, String, boolean)}
* with true to escape line break characters.
* @param files
* @param appendTo
* @throws IOException
*/
public static void append(List<File> files, File appendTo, boolean delete) throws IOException {
OutputStream appendStream = null;
boolean threw = true;
try {
appendStream = asByteSink(appendTo, APPEND).openBufferedStream();
for (File f : files) {
InputStream iStream = new FileInputStream(f);
try {
copyLarge(iStream, appendStream);
} finally {
closeQuietly(iStream);
}
}
threw = false;
} finally {
if (delete) {
for (File f : files) {
f.delete();
}
}
close(appendStream, threw);
}
}
/**
* Writes a string as a new line into the given buffered writer and optionally
* escapes the line for line breaks.
*
* @param writer to write the string
* @param str the string to write
* @param escape whether to escape string for line breaks
* @throws IOException
*/
public static void writeAsLine(BufferedWriter writer, String str, boolean escape) throws IOException {
if (escape) {
writer.write(escapeLineBreak(str));
} else {
writer.write(str);
}
writer.newLine();
}
/**
* Writes string from the given iterator to the given file and optionally
* escape the written strings for line breaks.
*
* @param iterator the source of the strings
* @param f file to write to
* @param escape whether to escape for line breaks
* @return count
* @throws IOException
*/
public static int writeStrings(Iterator<String> iterator, File f, boolean escape)
throws IOException {
return writeStrings(iterator, f, escape, null, "");
}
/**
* Writes string from the given iterator to the given file and optionally
* escape the written strings for line breaks.
*
* @param iterator the source of the strings
* @param f file to write to
* @param escape escape whether to escape for line breaks
* @param logger logger to log progress
* @param message message to log
* @return
* @throws IOException
*/
public static int writeStrings(Iterator<String> iterator, File f, boolean escape,
@Nullable Logger logger, @Nullable String message) throws IOException {
return writeStrings(iterator, f, escape, Function.identity(), logger, message);
}
/**
* Writes string from the given iterator to the given file and optionally
* escape the written strings for line breaks.
*
* @param iterator the source of the strings
* @param f file to write to
* @param escape escape whether to escape for line breaks
* @param transformer any transformation on the input
* @param logger logger to log progress
* @param message message to log
* @return
* @throws IOException
*/
public static int writeStrings(Iterator<String> iterator, File f, boolean escape,
@NotNull Function<String, String> transformer, @Nullable Logger logger, @Nullable String message) throws IOException {
BufferedWriter writer = newWriter(f, UTF_8);
boolean threw = true;
int count = 0;
try {
while (iterator.hasNext()) {
writeAsLine(writer, transformer.apply(iterator.next()), escape);
count++;
if (logger != null) {
if (count % 100000 == 0) {
logger.info(Strings.nullToEmpty(message) + count);
}
}
}
threw = false;
} finally {
close(writer, threw);
}
return count;
}
/**
* Reads strings from the given stream into a set and optionally unescaping for line breaks.
*
* @param stream the source of the strings
* @param unescape whether to unescape for line breaks
* @return set
* @throws IOException
*/
public static Set<String> readStringsAsSet(InputStream stream, boolean unescape) throws IOException {
BufferedReader reader = null;
Set<String> set = new HashSet<>();
boolean threw = true;
try {
reader = new BufferedReader(new InputStreamReader(stream, UTF_8));
String line = null;
while ((line = reader.readLine()) != null) {
if (unescape) {
set.add(unescapeLineBreaks(line));
} else {
set.add(line);
}
}
threw = false;
} finally {
close(reader, threw);
}
return set;
}
/**
* Composing iterator which unescapes for line breaks and delegates to the given comparator.
* When using this it should be ensured that the data source has been correspondingly escaped.
*
* @param delegate the actual comparison iterator
* @return comparator aware of line breaks
*/
public static Comparator<String> lineBreakAwareComparator (Comparator<String> delegate) {
return new FileIOUtils.TransformingComparator(delegate, new Function<String, String>() {
@Nullable
@Override
public String apply(@Nullable String input) {
return unescapeLineBreaks(input);
}
});
}
/**
*
* Copy the input stream to the given file. Delete the file in case of exception.
*
* @param source the input stream source
* @param destination the file to write to
* @throws IOException
*/
public static void copyInputStreamToFile(final InputStream source, final File destination) throws IOException {
boolean success = false;
try {
FileUtils.copyInputStreamToFile(source, destination);
success = true;
} finally {
if (!success) {
forceDelete(destination);
}
}
}
/**
* Decorates the given comparator and applies the function before delegating to the decorated
* comparator.
*/
public static class TransformingComparator implements Comparator<String> {
private Comparator<String> delegate;
private Function<String, String> func;
public TransformingComparator(Comparator<String> delegate, Function<String, String> func) {
this.delegate = delegate;
this.func = func;
}
@Override
public int compare(String s1, String s2) {
return delegate.compare(func.apply(s1), func.apply(s2));
}
}
}