blob: 5df2385e892a440099950148ec861119f72fc8f0 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jackrabbit.oak.commons;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.Closeable;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import com.google.common.base.Charsets;
import com.google.common.base.Function;
import com.google.common.base.Strings;
import com.google.common.collect.AbstractIterator;
import com.google.common.collect.Iterators;
import com.google.common.collect.PeekingIterator;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.LineIterator;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static com.google.common.base.Charsets.UTF_8;
import static com.google.common.collect.Sets.newHashSet;
import static com.google.common.io.Closeables.close;
import static com.google.common.io.FileWriteMode.APPEND;
import static com.google.common.io.Files.asByteSink;
import static com.google.common.io.Files.move;
import static com.google.common.io.Files.newWriter;
import static java.io.File.createTempFile;
import static org.apache.commons.io.FileUtils.forceDelete;
import static org.apache.commons.io.IOUtils.closeQuietly;
import static org.apache.commons.io.IOUtils.copyLarge;
import static org.apache.commons.io.LineIterator.closeQuietly;
import static org.apache.jackrabbit.oak.commons.sort.EscapeUtils.escapeLineBreak;
import static org.apache.jackrabbit.oak.commons.sort.EscapeUtils.unescapeLineBreaks;
import static org.apache.jackrabbit.oak.commons.sort.ExternalSort.mergeSortedFiles;
import static org.apache.jackrabbit.oak.commons.sort.ExternalSort.sortInBatch;
/**
* Simple File utils
*/
public final class FileIOUtils {
private FileIOUtils() {
}
public final static Comparator<String> lexComparator = new Comparator<String>() {
@Override public int compare(String s1, String s2) {
return s1.compareTo(s2);
}
};
public final static Function<String, String> passThruTransformer = new Function<String, String>() {
@Nullable @Override public String apply(@Nullable String input) {
return input;
}
};
/**
* Sorts the given file externally using the {@link #lexComparator} and removes duplicates.
*
* @param file file whose contents needs to be sorted
*/
public static void sort(File file) throws IOException {
File sorted = createTempFile("fleioutilssort", null);
merge(sortInBatch(file, lexComparator, true), sorted);
move(sorted, file);
}
/**
* Sorts the given file externally with the given comparator and removes duplicates.
*
* @param file file whose contents needs to be sorted
* @param comparator to compare
* @throws IOException
*/
public static void sort(File file, Comparator<String> comparator) throws IOException {
File sorted = createTempFile("fleioutilssort", null);
merge(sortInBatch(file, comparator, true), sorted, comparator);
move(sorted, file);
}
/**
* Merges a list of files after sorting with the {@link #lexComparator}.
*
* @param files files to merge
* @param output merge output file
* @throws IOException
*/
public static void merge(List<File> files, File output) throws IOException {
mergeSortedFiles(
files,
output, lexComparator, true);
}
/**
* Merges a list of files after sorting with the given comparator.
*
* @param files files to merge
* @param output merge output file
* @throws IOException
*/
public static void merge(List<File> files, File output, Comparator<String> comparator) throws IOException {
mergeSortedFiles(
files,
output, comparator, true);
}
/**
* Copies an input stream to a file.
*
* @param stream steam to copy
* @return
* @throws IOException
*/
public static File copy(InputStream stream) throws IOException {
File file = createTempFile("fleioutilscopy", null);
copyInputStreamToFile(stream, file);
return file;
}
/**
* Appends the contents of the list of files to the given file and deletes the files
* if the delete flag is enabled.
*
* If there is a scope for lines in the files containing line break characters it should be
* ensured that the files are written with {@link #writeAsLine(BufferedWriter, String, boolean)}
* with true to escape line break characters.
* @param files
* @param appendTo
* @throws IOException
*/
public static void append(List<File> files, File appendTo, boolean delete) throws IOException {
OutputStream appendStream = null;
boolean threw = true;
try {
appendStream = asByteSink(appendTo, APPEND).openBufferedStream();
for (File f : files) {
InputStream iStream = new FileInputStream(f);
try {
copyLarge(iStream, appendStream);
} finally {
closeQuietly(iStream);
}
}
threw = false;
} finally {
if (delete) {
for (File f : files) {
f.delete();
}
}
close(appendStream, threw);
}
}
/**
* Writes a string as a new line into the given buffered writer and optionally
* escapes the line for line breaks.
*
* @param writer to write the string
* @param str the string to write
* @param escape whether to escape string for line breaks
* @throws IOException
*/
public static void writeAsLine(BufferedWriter writer, String str, boolean escape) throws IOException {
if (escape) {
writer.write(escapeLineBreak(str));
} else {
writer.write(str);
}
writer.newLine();
}
/**
* Writes string from the given iterator to the given file and optionally
* escape the written strings for line breaks.
*
* @param iterator the source of the strings
* @param f file to write to
* @param escape whether to escape for line breaks
* @return count
* @throws IOException
*/
public static int writeStrings(Iterator<String> iterator, File f, boolean escape)
throws IOException {
return writeStrings(iterator, f, escape, null, "");
}
/**
* Writes string from the given iterator to the given file and optionally
* escape the written strings for line breaks.
*
* @param iterator the source of the strings
* @param f file to write to
* @param escape escape whether to escape for line breaks
* @param logger logger to log progress
* @param message message to log
* @return
* @throws IOException
*/
public static int writeStrings(Iterator<String> iterator, File f, boolean escape,
@Nullable Logger logger, @Nullable String message) throws IOException {
return writeStrings(iterator, f, escape, passThruTransformer, logger, message);
}
/**
* Writes string from the given iterator to the given file and optionally
* escape the written strings for line breaks.
*
* @param iterator the source of the strings
* @param f file to write to
* @param escape escape whether to escape for line breaks
* @param transformer any transformation on the input
* @param logger logger to log progress
* @param message message to log
* @return
* @throws IOException
*/
public static int writeStrings(Iterator<String> iterator, File f, boolean escape,
@NotNull Function<String, String> transformer, @Nullable Logger logger, @Nullable String message) throws IOException {
BufferedWriter writer = newWriter(f, UTF_8);
boolean threw = true;
int count = 0;
try {
while (iterator.hasNext()) {
writeAsLine(writer, transformer.apply(iterator.next()), escape);
count++;
if (logger != null) {
if (count % 1000 == 0) {
logger.info(Strings.nullToEmpty(message) + count);
}
}
}
threw = false;
} finally {
close(writer, threw);
}
return count;
}
/**
* Reads strings from the given stream into a set and optionally unescaping for line breaks.
*
* @param stream the source of the strings
* @param unescape whether to unescape for line breaks
* @return set
* @throws IOException
*/
public static Set<String> readStringsAsSet(InputStream stream, boolean unescape) throws IOException {
BufferedReader reader = null;
Set<String> set = newHashSet();
boolean threw = true;
try {
reader = new BufferedReader(new InputStreamReader(stream, Charsets.UTF_8));
String line = null;
while ((line = reader.readLine()) != null) {
if (unescape) {
set.add(unescapeLineBreaks(line));
} else {
set.add(line);
}
}
threw = false;
} finally {
close(reader, threw);
}
return set;
}
/**
* Composing iterator which unescapes for line breaks and delegates to the given comparator.
* When using this it should be ensured that the data source has been correspondingly escaped.
*
* @param delegate the actual comparison iterator
* @return comparator aware of line breaks
*/
public static Comparator<String> lineBreakAwareComparator (Comparator<String> delegate) {
return new FileIOUtils.TransformingComparator(delegate, new Function<String, String>() {
@Nullable
@Override
public String apply(@Nullable String input) {
return unescapeLineBreaks(input);
}
});
}
/**
*
* Copy the input stream to the given file. Delete the file in case of exception.
*
* @param source the input stream source
* @param destination the file to write to
* @throws IOException
*/
public static void copyInputStreamToFile(final InputStream source, final File destination) throws IOException {
boolean success = false;
try {
FileUtils.copyInputStreamToFile(source, destination);
success = true;
} finally {
if (!success) {
forceDelete(destination);
}
}
}
/**
* Decorates the given comparator and applies the function before delegating to the decorated
* comparator.
*/
public static class TransformingComparator implements Comparator<String> {
private Comparator delegate;
private Function<String, String> func;
public TransformingComparator(Comparator delegate, Function<String, String> func) {
this.delegate = delegate;
this.func = func;
}
@Override
public int compare(String s1, String s2) {
return delegate.compare(func.apply(s1), func.apply(s2));
}
}
/**
* FileLineDifferenceIterator class which iterates over the difference of 2 files line by line.
*
* If there is a scope for lines in the files containing line break characters it should be
* ensured that both the files are written with
* {@link #writeAsLine(BufferedWriter, String, boolean)} with true to escape line break
* characters.
*/
public static class FileLineDifferenceIterator extends AbstractIterator<String> implements Closeable {
private final PeekingIterator<String> peekMarked;
private final LineIterator marked;
private final LineIterator all;
private Function<String, String> transformer = new Function<String, String>() {
@Override
public String apply(String input) {
return input;
}
};
public FileLineDifferenceIterator(LineIterator marked, LineIterator available) throws IOException {
this(marked, available, null);
}
public FileLineDifferenceIterator(File marked, File available,
@Nullable Function<String, String> transformer) throws IOException {
this(FileUtils.lineIterator(marked, UTF_8.toString()),
FileUtils.lineIterator(available, UTF_8.toString()), transformer);
}
public FileLineDifferenceIterator(LineIterator marked, LineIterator available,
@Nullable Function<String, String> transformer) throws IOException {
this.marked = marked;
this.peekMarked = Iterators.peekingIterator(marked);
this.all = available;
if (transformer != null) {
this.transformer = transformer;
}
}
@Override
protected String computeNext() {
String diff = computeNextDiff();
if (diff == null) {
close();
return endOfData();
}
return diff;
}
@Override
public void close() {
LineIterator.closeQuietly(marked);
LineIterator.closeQuietly(all);
}
private String computeNextDiff() {
if (!all.hasNext()) {
return null;
}
//Marked finish the rest of all are part of diff
if (!peekMarked.hasNext()) {
return all.next();
}
String diff = null;
while (all.hasNext() && diff == null) {
diff = all.next();
while (peekMarked.hasNext()) {
String marked = peekMarked.peek();
int comparisonResult = transformer.apply(diff).compareTo(transformer.apply((marked)));
if (comparisonResult > 0) {
//Extra entries in marked. Ignore them and move on
peekMarked.next();
} else if (comparisonResult == 0) {
//Matching entry found in marked move past it. Not a
//dif candidate
peekMarked.next();
diff = null;
break;
} else {
//This entry is not found in marked entries
//hence part of diff
return diff;
}
}
}
return diff;
}
}
/**
* Implements a {@link java.io.Closeable} wrapper over a {@link LineIterator}.
* Also has a transformer to transform the output. If the underlying file is
* provide then it deletes the file on {@link #close()}.
*
* If there is a scope for lines in the file containing line break characters it should be
* ensured that the files is written with
* {@link #writeAsLine(BufferedWriter, String, boolean)} with true to escape line break
* characters and should be properly unescaped on read.
* A custom transformer can also be provided to unescape.
*
* @param <T> the type of elements in the iterator
*/
public static class BurnOnCloseFileIterator<T> extends AbstractIterator<T> implements Closeable {
private final Logger log = LoggerFactory.getLogger(getClass());
private final LineIterator iterator;
private final Function<String, T> transformer;
private File backingFile;
public BurnOnCloseFileIterator(LineIterator iterator, Function<String, T> transformer) {
this.iterator = iterator;
this.transformer = transformer;
}
public BurnOnCloseFileIterator(LineIterator iterator, File backingFile,
Function<String, T> transformer) {
this.iterator = iterator;
this.transformer = transformer;
this.backingFile = backingFile;
}
@Override
protected T computeNext() {
if (iterator.hasNext()) {
return transformer.apply(iterator.next());
}
try {
close();
} catch (IOException e) {
log.warn("Error closing iterator", e);
}
return endOfData();
}
@Override
public void close() throws IOException {
closeQuietly(iterator);
if (backingFile != null && backingFile.exists()) {
forceDelete(backingFile);
}
}
public static BurnOnCloseFileIterator<String> wrap(LineIterator iter) {
return new BurnOnCloseFileIterator<String>(iter, new Function<String, String>() {
public String apply(String s) {
return s;
}
});
}
public static BurnOnCloseFileIterator<String> wrap(LineIterator iter, File backingFile) {
return new BurnOnCloseFileIterator<String>(iter, backingFile,
new Function<String, String>() {
public String apply(String s) {
return s;
}
});
}
}
}