| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| * |
| */ |
| |
| /* |
| * This package is based on the work done by Timothy Gerard Endres |
| * (time@ice.com) to whom the Ant project is very grateful for his great code. |
| */ |
| |
| package org.apache.tools.tar; |
| |
| import java.io.ByteArrayOutputStream; |
| import java.io.FilterInputStream; |
| import java.io.IOException; |
| import java.io.InputStream; |
| import java.io.OutputStream; |
| import java.util.HashMap; |
| import java.util.Map; |
| import java.util.Map.Entry; |
| |
| import org.apache.tools.zip.ZipEncoding; |
| import org.apache.tools.zip.ZipEncodingHelper; |
| |
| /** |
| * The TarInputStream reads a UNIX tar archive as an InputStream. |
| * methods are provided to position at each successive entry in |
| * the archive, and the read each entry as a normal input stream |
| * using read(). |
| * |
| */ |
| public class TarInputStream extends FilterInputStream { |
| private static final int SMALL_BUFFER_SIZE = 256; |
| private static final int BUFFER_SIZE = 8 * 1024; |
| private static final int LARGE_BUFFER_SIZE = 32 * 1024; |
| private static final int BYTE_MASK = 0xFF; |
| |
| private final byte[] SKIP_BUF = new byte[BUFFER_SIZE]; |
| private final byte[] SMALL_BUF = new byte[SMALL_BUFFER_SIZE]; |
| |
| // CheckStyle:VisibilityModifier OFF - bc |
| protected boolean debug; |
| protected boolean hasHitEOF; |
| protected long entrySize; |
| protected long entryOffset; |
| protected byte[] readBuf; |
| protected TarBuffer buffer; |
| protected TarEntry currEntry; |
| |
| /** |
| * This contents of this array is not used at all in this class, |
| * it is only here to avoid repreated object creation during calls |
| * to the no-arg read method. |
| */ |
| protected byte[] oneBuf; |
| |
| // CheckStyle:VisibilityModifier ON |
| |
| private final ZipEncoding encoding; |
| |
| /** |
| * Constructor for TarInputStream. |
| * @param is the input stream to use |
| */ |
| public TarInputStream(InputStream is) { |
| this(is, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE); |
| } |
| |
| /** |
| * Constructor for TarInputStream. |
| * @param is the input stream to use |
| * @param encoding name of the encoding to use for file names |
| */ |
| public TarInputStream(InputStream is, String encoding) { |
| this(is, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE, encoding); |
| } |
| |
| /** |
| * Constructor for TarInputStream. |
| * @param is the input stream to use |
| * @param blockSize the block size to use |
| */ |
| public TarInputStream(InputStream is, int blockSize) { |
| this(is, blockSize, TarBuffer.DEFAULT_RCDSIZE); |
| } |
| |
| /** |
| * Constructor for TarInputStream. |
| * @param is the input stream to use |
| * @param blockSize the block size to use |
| * @param encoding name of the encoding to use for file names |
| */ |
| public TarInputStream(InputStream is, int blockSize, String encoding) { |
| this(is, blockSize, TarBuffer.DEFAULT_RCDSIZE, encoding); |
| } |
| |
| /** |
| * Constructor for TarInputStream. |
| * @param is the input stream to use |
| * @param blockSize the block size to use |
| * @param recordSize the record size to use |
| */ |
| public TarInputStream(InputStream is, int blockSize, int recordSize) { |
| this(is, blockSize, recordSize, null); |
| } |
| |
| /** |
| * Constructor for TarInputStream. |
| * @param is the input stream to use |
| * @param blockSize the block size to use |
| * @param recordSize the record size to use |
| * @param encoding name of the encoding to use for file names |
| */ |
| public TarInputStream(InputStream is, int blockSize, int recordSize, |
| String encoding) { |
| super(is); |
| this.buffer = new TarBuffer(is, blockSize, recordSize); |
| this.readBuf = null; |
| this.oneBuf = new byte[1]; |
| this.debug = false; |
| this.hasHitEOF = false; |
| this.encoding = ZipEncodingHelper.getZipEncoding(encoding); |
| } |
| |
| /** |
| * Sets the debugging flag. |
| * |
| * @param debug True to turn on debugging. |
| */ |
| public void setDebug(boolean debug) { |
| this.debug = debug; |
| buffer.setDebug(debug); |
| } |
| |
| /** |
| * Closes this stream. Calls the TarBuffer's close() method. |
| * @throws IOException on error |
| */ |
| @Override |
| public void close() throws IOException { |
| buffer.close(); |
| } |
| |
| /** |
| * Get the record size being used by this stream's TarBuffer. |
| * |
| * @return The TarBuffer record size. |
| */ |
| public int getRecordSize() { |
| return buffer.getRecordSize(); |
| } |
| |
| /** |
| * Get the available data that can be read from the current |
| * entry in the archive. This does not indicate how much data |
| * is left in the entire archive, only in the current entry. |
| * This value is determined from the entry's size header field |
| * and the amount of data already read from the current entry. |
| * Integer.MAX_VALUE is returned in case more than Integer.MAX_VALUE |
| * bytes are left in the current entry in the archive. |
| * |
| * @return The number of available bytes for the current entry. |
| * @throws IOException for signature |
| */ |
| @Override |
| public int available() throws IOException { |
| if (entrySize - entryOffset > Integer.MAX_VALUE) { |
| return Integer.MAX_VALUE; |
| } |
| return (int) (entrySize - entryOffset); |
| } |
| |
| /** |
| * Skip bytes in the input buffer. This skips bytes in the |
| * current entry's data, not the entire archive, and will |
| * stop at the end of the current entry's data if the number |
| * to skip extends beyond that point. |
| * |
| * @param numToSkip The number of bytes to skip. |
| * @return the number actually skipped |
| * @throws IOException on error |
| */ |
| @Override |
| public long skip(long numToSkip) throws IOException { |
| // REVIEW |
| // This is horribly inefficient, but it ensures that we |
| // properly skip over bytes via the TarBuffer... |
| // |
| long skip = numToSkip; |
| while (skip > 0) { |
| int realSkip = (int) (skip > SKIP_BUF.length |
| ? SKIP_BUF.length : skip); |
| int numRead = read(SKIP_BUF, 0, realSkip); |
| if (numRead == -1) { |
| break; |
| } |
| skip -= numRead; |
| } |
| return (numToSkip - skip); |
| } |
| |
| /** |
| * Since we do not support marking just yet, we return false. |
| * |
| * @return False. |
| */ |
| @Override |
| public boolean markSupported() { |
| return false; |
| } |
| |
| /** |
| * Since we do not support marking just yet, we do nothing. |
| * |
| * @param markLimit The limit to mark. |
| */ |
| @Override |
| public void mark(int markLimit) { |
| } |
| |
| /** |
| * Since we do not support marking just yet, we do nothing. |
| */ |
| @Override |
| public void reset() { |
| } |
| |
| /** |
| * Get the next entry in this tar archive. This will skip |
| * over any remaining data in the current entry, if there |
| * is one, and place the input stream at the header of the |
| * next entry, and read the header and instantiate a new |
| * TarEntry from the header bytes and return that entry. |
| * If there are no more entries in the archive, null will |
| * be returned to indicate that the end of the archive has |
| * been reached. |
| * |
| * @return The next TarEntry in the archive, or null. |
| * @throws IOException on error |
| */ |
| public TarEntry getNextEntry() throws IOException { |
| if (hasHitEOF) { |
| return null; |
| } |
| |
| if (currEntry != null) { |
| long numToSkip = entrySize - entryOffset; |
| |
| if (debug) { |
| System.err.println("TarInputStream: SKIP currENTRY '" |
| + currEntry.getName() + "' SZ " |
| + entrySize + " OFF " |
| + entryOffset + " skipping " |
| + numToSkip + " bytes"); |
| } |
| |
| while (numToSkip > 0) { |
| long skipped = skip(numToSkip); |
| if (skipped <= 0) { |
| throw new RuntimeException("failed to skip current tar" |
| + " entry"); |
| } |
| numToSkip -= skipped; |
| } |
| |
| readBuf = null; |
| } |
| |
| byte[] headerBuf = getRecord(); |
| |
| if (hasHitEOF) { |
| currEntry = null; |
| return null; |
| } |
| |
| try { |
| currEntry = new TarEntry(headerBuf, encoding); |
| } catch (IllegalArgumentException e) { |
| IOException ioe = new IOException("Error detected parsing the header"); |
| ioe.initCause(e); |
| throw ioe; |
| } |
| if (debug) { |
| System.err.println("TarInputStream: SET CURRENTRY '" |
| + currEntry.getName() |
| + "' size = " |
| + currEntry.getSize()); |
| } |
| |
| entryOffset = 0; |
| entrySize = currEntry.getSize(); |
| |
| if (currEntry.isGNULongLinkEntry()) { |
| byte[] longLinkData = getLongNameData(); |
| if (longLinkData == null) { |
| // Bugzilla: 40334 |
| // Malformed tar file - long link entry name not followed by |
| // entry |
| return null; |
| } |
| currEntry.setLinkName(encoding.decode(longLinkData)); |
| } |
| |
| if (currEntry.isGNULongNameEntry()) { |
| byte[] longNameData = getLongNameData(); |
| if (longNameData == null) { |
| // Bugzilla: 40334 |
| // Malformed tar file - long entry name not followed by |
| // entry |
| return null; |
| } |
| currEntry.setName(encoding.decode(longNameData)); |
| } |
| |
| if (currEntry.isPaxHeader()){ // Process Pax headers |
| paxHeaders(); |
| } |
| |
| if (currEntry.isGNUSparse()){ // Process sparse files |
| readGNUSparse(); |
| } |
| |
| // If the size of the next element in the archive has changed |
| // due to a new size being reported in the posix header |
| // information, we update entrySize here so that it contains |
| // the correct value. |
| entrySize = currEntry.getSize(); |
| return currEntry; |
| } |
| |
| /** |
| * Get the next entry in this tar archive as longname data. |
| * |
| * @return The next entry in the archive as longname data, or null. |
| * @throws IOException on error |
| */ |
| protected byte[] getLongNameData() throws IOException { |
| // read in the name |
| ByteArrayOutputStream longName = new ByteArrayOutputStream(); |
| int length = 0; |
| while ((length = read(SMALL_BUF)) >= 0) { |
| longName.write(SMALL_BUF, 0, length); |
| } |
| getNextEntry(); |
| if (currEntry == null) { |
| // Bugzilla: 40334 |
| // Malformed tar file - long entry name not followed by entry |
| return null; |
| } |
| byte[] longNameData = longName.toByteArray(); |
| // remove trailing null terminator(s) |
| length = longNameData.length; |
| while (length > 0 && longNameData[length - 1] == 0) { |
| --length; |
| } |
| if (length != longNameData.length) { |
| byte[] l = new byte[length]; |
| System.arraycopy(longNameData, 0, l, 0, length); |
| longNameData = l; |
| } |
| return longNameData; |
| } |
| |
| /** |
| * Get the next record in this tar archive. This will skip |
| * over any remaining data in the current entry, if there |
| * is one, and place the input stream at the header of the |
| * next entry. |
| * If there are no more entries in the archive, null will |
| * be returned to indicate that the end of the archive has |
| * been reached. |
| * |
| * @return The next header in the archive, or null. |
| * @throws IOException on error |
| */ |
| private byte[] getRecord() throws IOException { |
| if (hasHitEOF) { |
| return null; |
| } |
| |
| byte[] headerBuf = buffer.readRecord(); |
| |
| if (headerBuf == null) { |
| if (debug) { |
| System.err.println("READ NULL RECORD"); |
| } |
| hasHitEOF = true; |
| } else if (buffer.isEOFRecord(headerBuf)) { |
| if (debug) { |
| System.err.println("READ EOF RECORD"); |
| } |
| hasHitEOF = true; |
| } |
| |
| return hasHitEOF ? null : headerBuf; |
| } |
| |
| private void paxHeaders() throws IOException{ |
| Map<String, String> headers = parsePaxHeaders(this); |
| getNextEntry(); // Get the actual file entry |
| applyPaxHeadersToCurrentEntry(headers); |
| } |
| |
| Map<String, String> parsePaxHeaders(InputStream i) throws IOException { |
| Map<String, String> headers = new HashMap<String, String>(); |
| // Format is "length keyword=value\n"; |
| while(true){ // get length |
| int ch; |
| int len = 0; |
| int read = 0; |
| while((ch = i.read()) != -1) { |
| read++; |
| if (ch == ' '){ // End of length string |
| // Get keyword |
| ByteArrayOutputStream coll = new ByteArrayOutputStream(); |
| while((ch = i.read()) != -1) { |
| read++; |
| if (ch == '='){ // end of keyword |
| String keyword = coll.toString("UTF-8"); |
| // Get rest of entry |
| final int restLen = len - read; |
| byte[] rest = new byte[restLen]; |
| int got = 0; |
| while (got < restLen && (ch = i.read()) != -1) { |
| rest[got++] = (byte) ch; |
| } |
| if (got != restLen) { |
| throw new IOException("Failed to read " |
| + "Paxheader. Expected " |
| + restLen |
| + " bytes, read " |
| + got); |
| } |
| // Drop trailing NL |
| String value = new String(rest, 0, |
| restLen - 1, "UTF-8"); |
| headers.put(keyword, value); |
| break; |
| } |
| coll.write((byte) ch); |
| } |
| break; // Processed single header |
| } |
| len *= 10; |
| len += ch - '0'; |
| } |
| if (ch == -1){ // EOF |
| break; |
| } |
| } |
| return headers; |
| } |
| |
| private void applyPaxHeadersToCurrentEntry(Map<String, String> headers) { |
| /* |
| * The following headers are defined for Pax. |
| * atime, ctime, charset: cannot use these without changing TarEntry fields |
| * mtime |
| * comment |
| * gid, gname |
| * linkpath |
| * size |
| * uid,uname |
| * SCHILY.devminor, SCHILY.devmajor: don't have setters/getters for those |
| */ |
| for (Entry<String, String> ent : headers.entrySet()){ |
| String key = ent.getKey(); |
| String val = ent.getValue(); |
| if ("path".equals(key)){ |
| currEntry.setName(val); |
| } else if ("linkpath".equals(key)){ |
| currEntry.setLinkName(val); |
| } else if ("gid".equals(key)){ |
| currEntry.setGroupId(Long.parseLong(val)); |
| } else if ("gname".equals(key)){ |
| currEntry.setGroupName(val); |
| } else if ("uid".equals(key)){ |
| currEntry.setUserId(Long.parseLong(val)); |
| } else if ("uname".equals(key)){ |
| currEntry.setUserName(val); |
| } else if ("size".equals(key)){ |
| currEntry.setSize(Long.parseLong(val)); |
| } else if ("mtime".equals(key)){ |
| currEntry.setModTime((long) (Double.parseDouble(val) * 1000)); |
| } else if ("SCHILY.devminor".equals(key)){ |
| currEntry.setDevMinor(Integer.parseInt(val)); |
| } else if ("SCHILY.devmajor".equals(key)){ |
| currEntry.setDevMajor(Integer.parseInt(val)); |
| } |
| } |
| } |
| |
| /** |
| * Adds the sparse chunks from the current entry to the sparse chunks, |
| * including any additional sparse entries following the current entry. |
| * |
| * @throws IOException on error |
| * |
| * @todo Sparse files get not yet really processed. |
| */ |
| private void readGNUSparse() throws IOException { |
| /* we do not really process sparse files yet |
| sparses = new ArrayList(); |
| sparses.addAll(currEntry.getSparses()); |
| */ |
| if (currEntry.isExtended()) { |
| TarArchiveSparseEntry entry; |
| do { |
| byte[] headerBuf = getRecord(); |
| if (hasHitEOF) { |
| currEntry = null; |
| break; |
| } |
| entry = new TarArchiveSparseEntry(headerBuf); |
| /* we do not really process sparse files yet |
| sparses.addAll(entry.getSparses()); |
| */ |
| } while (entry.isExtended()); |
| } |
| } |
| |
| /** |
| * Reads a byte from the current tar archive entry. |
| * |
| * This method simply calls read( byte[], int, int ). |
| * |
| * @return The byte read, or -1 at EOF. |
| * @throws IOException on error |
| */ |
| @Override |
| public int read() throws IOException { |
| int num = read(oneBuf, 0, 1); |
| return num == -1 ? -1 : (oneBuf[0]) & BYTE_MASK; |
| } |
| |
| /** |
| * Reads bytes from the current tar archive entry. |
| * |
| * This method is aware of the boundaries of the current |
| * entry in the archive and will deal with them as if they |
| * were this stream's start and EOF. |
| * |
| * @param buf The buffer into which to place bytes read. |
| * @param offset The offset at which to place bytes read. |
| * @param numToRead The number of bytes to read. |
| * @return The number of bytes read, or -1 at EOF. |
| * @throws IOException on error |
| */ |
| @Override |
| public int read(byte[] buf, int offset, int numToRead) throws IOException { |
| int totalRead = 0; |
| |
| if (entryOffset >= entrySize) { |
| return -1; |
| } |
| |
| if ((numToRead + entryOffset) > entrySize) { |
| numToRead = (int) (entrySize - entryOffset); |
| } |
| |
| if (readBuf != null) { |
| int sz = (numToRead > readBuf.length) ? readBuf.length |
| : numToRead; |
| |
| System.arraycopy(readBuf, 0, buf, offset, sz); |
| |
| if (sz >= readBuf.length) { |
| readBuf = null; |
| } else { |
| int newLen = readBuf.length - sz; |
| byte[] newBuf = new byte[newLen]; |
| |
| System.arraycopy(readBuf, sz, newBuf, 0, newLen); |
| |
| readBuf = newBuf; |
| } |
| |
| totalRead += sz; |
| numToRead -= sz; |
| offset += sz; |
| } |
| |
| while (numToRead > 0) { |
| byte[] rec = buffer.readRecord(); |
| |
| if (rec == null) { |
| // Unexpected EOF! |
| throw new IOException("unexpected EOF with " + numToRead |
| + " bytes unread"); |
| } |
| |
| int sz = numToRead; |
| int recLen = rec.length; |
| |
| if (recLen > sz) { |
| System.arraycopy(rec, 0, buf, offset, sz); |
| |
| readBuf = new byte[recLen - sz]; |
| |
| System.arraycopy(rec, sz, readBuf, 0, recLen - sz); |
| } else { |
| sz = recLen; |
| |
| System.arraycopy(rec, 0, buf, offset, recLen); |
| } |
| |
| totalRead += sz; |
| numToRead -= sz; |
| offset += sz; |
| } |
| |
| entryOffset += totalRead; |
| |
| return totalRead; |
| } |
| |
| /** |
| * Copies the contents of the current tar archive entry directly into |
| * an output stream. |
| * |
| * @param out The OutputStream into which to write the entry's data. |
| * @throws IOException on error |
| */ |
| public void copyEntryContents(OutputStream out) throws IOException { |
| byte[] buf = new byte[LARGE_BUFFER_SIZE]; |
| |
| while (true) { |
| int numRead = read(buf, 0, buf.length); |
| |
| if (numRead == -1) { |
| break; |
| } |
| |
| out.write(buf, 0, numRead); |
| } |
| } |
| |
| /** |
| * Whether this class is able to read the given entry. |
| * |
| * <p>May return false if the current entry is a sparse file.</p> |
| */ |
| public boolean canReadEntryData(TarEntry te) { |
| return !te.isGNUSparse(); |
| } |
| } |
| |