| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| * |
| */ |
| |
| /* |
| * This package is based on the work done by Timothy Gerard Endres |
| * (time@ice.com) to whom the Ant project is very grateful for his great code. |
| */ |
| |
| package org.apache.commons.compress.archivers.tar; |
| |
| import java.io.ByteArrayOutputStream; |
| import java.io.IOException; |
| import java.io.InputStream; |
| import java.util.HashMap; |
| import java.util.Map; |
| |
| import org.apache.commons.compress.archivers.ArchiveEntry; |
| import org.apache.commons.compress.archivers.ArchiveInputStream; |
| import org.apache.commons.compress.archivers.zip.ZipEncoding; |
| import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; |
| import org.apache.commons.compress.utils.ArchiveUtils; |
| import org.apache.commons.compress.utils.CharsetNames; |
| import org.apache.commons.compress.utils.IOUtils; |
| |
| /** |
| * The TarInputStream reads a UNIX tar archive as an InputStream. |
| * methods are provided to position at each successive entry in |
| * the archive, and the read each entry as a normal input stream |
| * using read(). |
| * @NotThreadSafe |
| */ |
| public class TarArchiveInputStream extends ArchiveInputStream { |
| |
| private static final int SMALL_BUFFER_SIZE = 256; |
| |
| private final byte[] smallBuf = new byte[SMALL_BUFFER_SIZE]; |
| |
| /** The size the TAR header */ |
| private final int recordSize; |
| |
| /** The size of a block */ |
| private final int blockSize; |
| |
| /** True if file has hit EOF */ |
| private boolean hasHitEOF; |
| |
| /** Size of the current entry */ |
| private long entrySize; |
| |
| /** How far into the entry the stream is at */ |
| private long entryOffset; |
| |
| /** An input stream to read from */ |
| private final InputStream is; |
| |
| /** The meta-data about the current entry */ |
| private TarArchiveEntry currEntry; |
| |
| /** The encoding of the file */ |
| private final ZipEncoding zipEncoding; |
| |
| // the provided encoding (for unit tests) |
| final String encoding; |
| |
| // the global PAX header |
| private Map<String, String> globalPaxHeaders = new HashMap<>(); |
| |
| /** Whether the last attempt to read an ArchiveEntry failed */ |
| private boolean headerErrorOccurred = false; |
| |
| /** |
| * Constructor for TarInputStream. |
| * @param is the input stream to use |
| */ |
| public TarArchiveInputStream(final InputStream is) { |
| this(is, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE); |
| } |
| |
| /** |
| * Constructor for TarInputStream. |
| * @param is the input stream to use |
| * @param encoding name of the encoding to use for file names |
| * @since 1.4 |
| */ |
| public TarArchiveInputStream(final InputStream is, final String encoding) { |
| this(is, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, |
| encoding); |
| } |
| |
| /** |
| * Constructor for TarInputStream. |
| * @param is the input stream to use |
| * @param blockSize the block size to use |
| */ |
| public TarArchiveInputStream(final InputStream is, final int blockSize) { |
| this(is, blockSize, TarConstants.DEFAULT_RCDSIZE); |
| } |
| |
| /** |
| * Constructor for TarInputStream. |
| * @param is the input stream to use |
| * @param blockSize the block size to use |
| * @param encoding name of the encoding to use for file names |
| * @since 1.4 |
| */ |
| public TarArchiveInputStream(final InputStream is, final int blockSize, |
| final String encoding) { |
| this(is, blockSize, TarConstants.DEFAULT_RCDSIZE, encoding); |
| } |
| |
| /** |
| * Constructor for TarInputStream. |
| * @param is the input stream to use |
| * @param blockSize the block size to use |
| * @param recordSize the record size to use |
| */ |
| public TarArchiveInputStream(final InputStream is, final int blockSize, final int recordSize) { |
| this(is, blockSize, recordSize, null); |
| } |
| |
| /** |
| * Constructor for TarInputStream. |
| * @param is the input stream to use |
| * @param blockSize the block size to use |
| * @param recordSize the record size to use |
| * @param encoding name of the encoding to use for file names |
| * @since 1.4 |
| */ |
| public TarArchiveInputStream(final InputStream is, final int blockSize, final int recordSize, |
| final String encoding) { |
| this.is = is; |
| this.hasHitEOF = false; |
| this.encoding = encoding; |
| this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); |
| this.recordSize = recordSize; |
| this.blockSize = blockSize; |
| } |
| |
| /** |
| * Closes this stream. Calls the TarBuffer's close() method. |
| * @throws IOException on error |
| */ |
| @Override |
| public void close() throws IOException { |
| is.close(); |
| } |
| |
| /** |
| * Get the record size being used by this stream's buffer. |
| * |
| * @return The TarBuffer record size. |
| */ |
| public int getRecordSize() { |
| return recordSize; |
| } |
| |
| /** |
| * Get the available data that can be read from the current |
| * entry in the archive. This does not indicate how much data |
| * is left in the entire archive, only in the current entry. |
| * This value is determined from the entry's size header field |
| * and the amount of data already read from the current entry. |
| * Integer.MAX_VALUE is returned in case more than Integer.MAX_VALUE |
| * bytes are left in the current entry in the archive. |
| * |
| * @return The number of available bytes for the current entry. |
| * @throws IOException for signature |
| */ |
| @Override |
| public int available() throws IOException { |
| if (isDirectory()) { |
| return 0; |
| } |
| if (entrySize - entryOffset > Integer.MAX_VALUE) { |
| return Integer.MAX_VALUE; |
| } |
| return (int) (entrySize - entryOffset); |
| } |
| |
| |
| /** |
| * Skips over and discards <code>n</code> bytes of data from this input |
| * stream. The <code>skip</code> method may, for a variety of reasons, end |
| * up skipping over some smaller number of bytes, possibly <code>0</code>. |
| * This may result from any of a number of conditions; reaching end of file |
| * or end of entry before <code>n</code> bytes have been skipped; are only |
| * two possibilities. The actual number of bytes skipped is returned. If |
| * <code>n</code> is negative, no bytes are skipped. |
| * |
| * |
| * @param n |
| * the number of bytes to be skipped. |
| * @return the actual number of bytes skipped. |
| * @throws IOException |
| * if some other I/O error occurs. |
| */ |
| @Override |
| public long skip(final long n) throws IOException { |
| if (n <= 0 || isDirectory()) { |
| return 0; |
| } |
| |
| final long available = entrySize - entryOffset; |
| final long skipped = IOUtils.skip(is, Math.min(n, available)); |
| count(skipped); |
| entryOffset += skipped; |
| return skipped; |
| } |
| |
| /** |
| * Since we do not support marking just yet, we return false. |
| * |
| * @return False. |
| */ |
| @Override |
| public boolean markSupported() { |
| return false; |
| } |
| |
| /** |
| * Since we do not support marking just yet, we do nothing. |
| * |
| * @param markLimit The limit to mark. |
| */ |
| @Override |
| public void mark(final int markLimit) { |
| } |
| |
| /** |
| * Since we do not support marking just yet, we do nothing. |
| */ |
| @Override |
| public synchronized void reset() { |
| } |
| |
| /** |
| * Get the next entry in this tar archive. This will skip |
| * over any remaining data in the current entry, if there |
| * is one, and place the input stream at the header of the |
| * next entry, and read the header and instantiate a new |
| * TarEntry from the header bytes and return that entry. |
| * If there are no more entries in the archive, null will |
| * be returned to indicate that the end of the archive has |
| * been reached. |
| * |
| * @return The next TarEntry in the archive, or null. |
| * @throws IOException on error |
| * @throws InvalidTarHeaderException if the next block cannot be |
| * parsed as a tar header. In this case it may be possible to skip |
| * some corrupted blocks and process the next valid tar header by |
| * calling this method again. |
| */ |
| public TarArchiveEntry getNextTarEntry() throws IOException { |
| if (isAtEOF()) { |
| return null; |
| } |
| |
| if (currEntry != null) { |
| /* Skip will only go to the end of the current entry */ |
| IOUtils.skip(this, Long.MAX_VALUE); |
| |
| /* skip to the end of the last record */ |
| skipRecordPadding(); |
| |
| /* Set currEntry to null, to make sure we don't skip again |
| if reading the next header fails */ |
| currEntry = null; |
| entrySize = 0; |
| } |
| |
| byte[] headerBuf = getRecord(); |
| |
| if (headerErrorOccurred) { |
| while (headerBuf != null) { |
| try { |
| if (TarUtils.verifyCheckSum(headerBuf)) { |
| break; |
| } |
| } catch (IllegalArgumentException e) { //NOSONAR |
| // next record is not a valid tar header either |
| } |
| entryOffset += recordSize; |
| headerBuf = getRecord(); |
| } |
| } |
| headerErrorOccurred = false; |
| |
| if (headerBuf == null) { |
| /* hit EOF */ |
| currEntry = null; |
| return null; |
| } |
| |
| try { |
| currEntry = new TarArchiveEntry(headerBuf, zipEncoding); |
| } catch (final IllegalArgumentException e) { |
| headerErrorOccurred = true; |
| throw new InvalidTarHeaderException(e); |
| } |
| |
| entryOffset = 0; |
| entrySize = currEntry.getSize(); |
| |
| if (currEntry.isGNULongLinkEntry()) { |
| final byte[] longLinkData = getLongNameData(); |
| if (longLinkData == null) { |
| // Bugzilla: 40334 |
| // Malformed tar file - long link entry name not followed by |
| // entry |
| return null; |
| } |
| currEntry.setLinkName(zipEncoding.decode(longLinkData)); |
| } |
| |
| if (currEntry.isGNULongNameEntry()) { |
| final byte[] longNameData = getLongNameData(); |
| if (longNameData == null) { |
| // Bugzilla: 40334 |
| // Malformed tar file - long entry name not followed by |
| // entry |
| return null; |
| } |
| currEntry.setName(zipEncoding.decode(longNameData)); |
| } |
| |
| if (currEntry.isGlobalPaxHeader()){ // Process Global Pax headers |
| readGlobalPaxHeaders(); |
| } |
| |
| if (currEntry.isPaxHeader()){ // Process Pax headers |
| paxHeaders(); |
| } else if (!globalPaxHeaders.isEmpty()) { |
| applyPaxHeadersToCurrentEntry(globalPaxHeaders); |
| } |
| |
| if (currEntry.isOldGNUSparse()){ // Process sparse files |
| readOldGNUSparse(); |
| } |
| |
| // If the size of the next element in the archive has changed |
| // due to a new size being reported in the posix header |
| // information, we update entrySize here so that it contains |
| // the correct value. |
| entrySize = currEntry.getSize(); |
| |
| return currEntry; |
| } |
| |
| /** |
| * The last record block should be written at the full size, so skip any |
| * additional space used to fill a record after an entry |
| */ |
| private void skipRecordPadding() throws IOException { |
| if (!isDirectory() && this.entrySize > 0 && this.entrySize % this.recordSize != 0) { |
| final long numRecords = (this.entrySize / this.recordSize) + 1; |
| final long padding = (numRecords * this.recordSize) - this.entrySize; |
| final long skipped = IOUtils.skip(is, padding); |
| count(skipped); |
| } |
| } |
| |
| /** |
| * Get the next entry in this tar archive as longname data. |
| * |
| * @return The next entry in the archive as longname data, or null. |
| * @throws IOException on error |
| */ |
| protected byte[] getLongNameData() throws IOException { |
| // read in the name |
| final ByteArrayOutputStream longName = new ByteArrayOutputStream(); |
| int length = 0; |
| while ((length = read(smallBuf)) >= 0) { |
| longName.write(smallBuf, 0, length); |
| } |
| getNextEntry(); |
| if (currEntry == null) { |
| // Bugzilla: 40334 |
| // Malformed tar file - long entry name not followed by entry |
| return null; |
| } |
| byte[] longNameData = longName.toByteArray(); |
| // remove trailing null terminator(s) |
| length = longNameData.length; |
| while (length > 0 && longNameData[length - 1] == 0) { |
| --length; |
| } |
| if (length != longNameData.length) { |
| final byte[] l = new byte[length]; |
| System.arraycopy(longNameData, 0, l, 0, length); |
| longNameData = l; |
| } |
| return longNameData; |
| } |
| |
| /** |
| * Get the next record in this tar archive. This will skip |
| * over any remaining data in the current entry, if there |
| * is one, and place the input stream at the header of the |
| * next entry. |
| * |
| * <p>If there are no more entries in the archive, null will be |
| * returned to indicate that the end of the archive has been |
| * reached. At the same time the {@code hasHitEOF} marker will be |
| * set to true.</p> |
| * |
| * @return The next header in the archive, or null. |
| * @throws IOException on error |
| */ |
| private byte[] getRecord() throws IOException { |
| byte[] headerBuf = readRecord(); |
| setAtEOF(isEOFRecord(headerBuf)); |
| if (isAtEOF() && headerBuf != null) { |
| tryToConsumeSecondEOFRecord(); |
| consumeRemainderOfLastBlock(); |
| headerBuf = null; |
| } |
| return headerBuf; |
| } |
| |
| /** |
| * Determine if an archive record indicate End of Archive. End of |
| * archive is indicated by a record that consists entirely of null bytes. |
| * |
| * @param record The record data to check. |
| * @return true if the record data is an End of Archive |
| */ |
| protected boolean isEOFRecord(final byte[] record) { |
| return record == null || ArchiveUtils.isArrayZero(record, recordSize); |
| } |
| |
| /** |
| * Read a record from the input stream and return the data. |
| * |
| * @return The record data or null if EOF has been hit. |
| * @throws IOException on error |
| */ |
| protected byte[] readRecord() throws IOException { |
| |
| final byte[] record = new byte[recordSize]; |
| |
| final int readNow = IOUtils.readFully(is, record); |
| count(readNow); |
| if (readNow != recordSize) { |
| return null; |
| } |
| |
| return record; |
| } |
| |
| private void readGlobalPaxHeaders() throws IOException { |
| globalPaxHeaders = parsePaxHeaders(this); |
| getNextEntry(); // Get the actual file entry |
| } |
| |
| private void paxHeaders() throws IOException{ |
| final Map<String, String> headers = parsePaxHeaders(this); |
| getNextEntry(); // Get the actual file entry |
| applyPaxHeadersToCurrentEntry(headers); |
| } |
| |
| // NOTE, using a Map here makes it impossible to ever support GNU |
| // sparse files using the PAX Format 0.0, see |
| // https://www.gnu.org/software/tar/manual/html_section/tar_92.html#SEC188 |
| Map<String, String> parsePaxHeaders(final InputStream i) |
| throws IOException { |
| final Map<String, String> headers = new HashMap<>(globalPaxHeaders); |
| // Format is "length keyword=value\n"; |
| while(true){ // get length |
| int ch; |
| int len = 0; |
| int read = 0; |
| while((ch = i.read()) != -1) { |
| read++; |
| if (ch == '\n') { // blank line in header |
| break; |
| } else if (ch == ' '){ // End of length string |
| // Get keyword |
| final ByteArrayOutputStream coll = new ByteArrayOutputStream(); |
| while((ch = i.read()) != -1) { |
| read++; |
| if (ch == '='){ // end of keyword |
| final String keyword = coll.toString(CharsetNames.UTF_8); |
| // Get rest of entry |
| final int restLen = len - read; |
| if (restLen == 1) { // only NL |
| headers.remove(keyword); |
| } else { |
| final byte[] rest = new byte[restLen]; |
| final int got = IOUtils.readFully(i, rest); |
| if (got != restLen) { |
| throw new IOException("Failed to read " |
| + "Paxheader. Expected " |
| + restLen |
| + " bytes, read " |
| + got); |
| } |
| // Drop trailing NL |
| final String value = new String(rest, 0, |
| restLen - 1, CharsetNames.UTF_8); |
| headers.put(keyword, value); |
| } |
| break; |
| } |
| coll.write((byte) ch); |
| } |
| break; // Processed single header |
| } |
| len *= 10; |
| len += ch - '0'; |
| } |
| if (ch == -1){ // EOF |
| break; |
| } |
| } |
| return headers; |
| } |
| |
| private void applyPaxHeadersToCurrentEntry(final Map<String, String> headers) { |
| currEntry.updateEntryFromPaxHeaders(headers); |
| |
| } |
| |
| /** |
| * Adds the sparse chunks from the current entry to the sparse chunks, |
| * including any additional sparse entries following the current entry. |
| * |
| * @throws IOException on error |
| * |
| * @todo Sparse files get not yet really processed. |
| */ |
| private void readOldGNUSparse() throws IOException { |
| /* we do not really process sparse files yet |
| sparses = new ArrayList(); |
| sparses.addAll(currEntry.getSparses()); |
| */ |
| if (currEntry.isExtended()) { |
| TarArchiveSparseEntry entry; |
| do { |
| final byte[] headerBuf = getRecord(); |
| if (headerBuf == null) { |
| currEntry = null; |
| break; |
| } |
| entry = new TarArchiveSparseEntry(headerBuf); |
| /* we do not really process sparse files yet |
| sparses.addAll(entry.getSparses()); |
| */ |
| } while (entry.isExtended()); |
| } |
| } |
| |
| private boolean isDirectory() { |
| return currEntry != null && currEntry.isDirectory(); |
| } |
| |
| /** |
| * Returns the next Archive Entry in this Stream. |
| * |
| * @return the next entry, |
| * or {@code null} if there are no more entries |
| * @throws IOException if the next entry could not be read |
| */ |
| @Override |
| public ArchiveEntry getNextEntry() throws IOException { |
| return getNextTarEntry(); |
| } |
| |
| /** |
| * Tries to read the next record rewinding the stream if it is not a EOF record. |
| * |
| * <p>This is meant to protect against cases where a tar |
| * implementation has written only one EOF record when two are |
| * expected. Actually this won't help since a non-conforming |
| * implementation likely won't fill full blocks consisting of - by |
| * default - ten records either so we probably have already read |
| * beyond the archive anyway.</p> |
| */ |
| private void tryToConsumeSecondEOFRecord() throws IOException { |
| boolean shouldReset = true; |
| final boolean marked = is.markSupported(); |
| if (marked) { |
| is.mark(recordSize); |
| } |
| try { |
| shouldReset = !isEOFRecord(readRecord()); |
| } finally { |
| if (shouldReset && marked) { |
| pushedBackBytes(recordSize); |
| is.reset(); |
| } |
| } |
| } |
| |
| /** |
| * Reads bytes from the current tar archive entry. |
| * |
| * This method is aware of the boundaries of the current |
| * entry in the archive and will deal with them as if they |
| * were this stream's start and EOF. |
| * |
| * @param buf The buffer into which to place bytes read. |
| * @param offset The offset at which to place bytes read. |
| * @param numToRead The number of bytes to read. |
| * @return The number of bytes read, or -1 at EOF. |
| * @throws IOException on error |
| */ |
| @Override |
| public int read(final byte[] buf, final int offset, int numToRead) throws IOException { |
| int totalRead = 0; |
| |
| if (isAtEOF() || isDirectory() || entryOffset >= entrySize) { |
| return -1; |
| } |
| |
| if (currEntry == null) { |
| throw new IllegalStateException("No current tar entry"); |
| } |
| |
| numToRead = Math.min(numToRead, available()); |
| |
| totalRead = is.read(buf, offset, numToRead); |
| |
| if (totalRead == -1) { |
| if (numToRead > 0) { |
| throw new IOException("Truncated TAR archive"); |
| } |
| setAtEOF(true); |
| } else { |
| count(totalRead); |
| entryOffset += totalRead; |
| } |
| |
| return totalRead; |
| } |
| |
| /** |
| * Whether this class is able to read the given entry. |
| * |
| * <p>May return false if the current entry is a sparse file.</p> |
| */ |
| @Override |
| public boolean canReadEntryData(final ArchiveEntry ae) { |
| if (ae instanceof TarArchiveEntry) { |
| final TarArchiveEntry te = (TarArchiveEntry) ae; |
| return !te.isSparse(); |
| } |
| return false; |
| } |
| |
| /** |
| * Get the current TAR Archive Entry that this input stream is processing |
| * |
| * @return The current Archive Entry |
| */ |
| public TarArchiveEntry getCurrentEntry() { |
| return currEntry; |
| } |
| |
| protected final void setCurrentEntry(final TarArchiveEntry e) { |
| currEntry = e; |
| } |
| |
| protected final boolean isAtEOF() { |
| return hasHitEOF; |
| } |
| |
| protected final void setAtEOF(final boolean b) { |
| hasHitEOF = b; |
| } |
| |
| /** |
| * This method is invoked once the end of the archive is hit, it |
| * tries to consume the remaining bytes under the assumption that |
| * the tool creating this archive has padded the last block. |
| */ |
| private void consumeRemainderOfLastBlock() throws IOException { |
| final long bytesReadOfLastBlock = getBytesRead() % blockSize; |
| if (bytesReadOfLastBlock > 0) { |
| final long skipped = IOUtils.skip(is, blockSize - bytesReadOfLastBlock); |
| count(skipped); |
| } |
| } |
| |
| /** |
| * Checks if the signature matches what is expected for a tar file. |
| * |
| * @param signature |
| * the bytes to check |
| * @param length |
| * the number of bytes to check |
| * @return true, if this stream is a tar archive stream, false otherwise |
| */ |
| public static boolean matches(final byte[] signature, final int length) { |
| if (length < TarConstants.VERSION_OFFSET+TarConstants.VERSIONLEN) { |
| return false; |
| } |
| |
| if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_POSIX, |
| signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN) |
| && |
| ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_POSIX, |
| signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN) |
| ){ |
| return true; |
| } |
| if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_GNU, |
| signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN) |
| && |
| ( |
| ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_SPACE, |
| signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN) |
| || |
| ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_ZERO, |
| signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN) |
| ) |
| ){ |
| return true; |
| } |
| // COMPRESS-107 - recognise Ant tar files |
| return ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_ANT, |
| signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN) |
| && |
| ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_ANT, |
| signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN); |
| } |
| |
| } |