| // Copyright 2014-2017 Ulrich Kunitz. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package lzma |
| |
| import ( |
| "errors" |
| "fmt" |
| "io" |
| ) |
| |
| const ( |
| // maximum size of compressed data in a chunk |
| maxCompressed = 1 << 16 |
| // maximum size of uncompressed data in a chunk |
| maxUncompressed = 1 << 21 |
| ) |
| |
| // chunkType represents the type of an LZMA2 chunk. Note that this |
| // value is an internal representation and no actual encoding of a LZMA2 |
| // chunk header. |
| type chunkType byte |
| |
| // Possible values for the chunk type. |
| const ( |
| // end of stream |
| cEOS chunkType = iota |
| // uncompressed; reset dictionary |
| cUD |
| // uncompressed; no reset of dictionary |
| cU |
| // LZMA compressed; no reset |
| cL |
| // LZMA compressed; reset state |
| cLR |
| // LZMA compressed; reset state; new property value |
| cLRN |
| // LZMA compressed; reset state; new property value; reset dictionary |
| cLRND |
| ) |
| |
| // chunkTypeStrings provide a string representation for the chunk types. |
| var chunkTypeStrings = [...]string{ |
| cEOS: "EOS", |
| cU: "U", |
| cUD: "UD", |
| cL: "L", |
| cLR: "LR", |
| cLRN: "LRN", |
| cLRND: "LRND", |
| } |
| |
| // String returns a string representation of the chunk type. |
| func (c chunkType) String() string { |
| if !(cEOS <= c && c <= cLRND) { |
| return "unknown" |
| } |
| return chunkTypeStrings[c] |
| } |
| |
| // Actual encodings for the chunk types in the value. Note that the high |
| // uncompressed size bits are stored in the header byte additionally. |
| const ( |
| hEOS = 0 |
| hUD = 1 |
| hU = 2 |
| hL = 1 << 7 |
| hLR = 1<<7 | 1<<5 |
| hLRN = 1<<7 | 1<<6 |
| hLRND = 1<<7 | 1<<6 | 1<<5 |
| ) |
| |
| // errHeaderByte indicates an unsupported value for the chunk header |
| // byte. These bytes starts the variable-length chunk header. |
| var errHeaderByte = errors.New("lzma: unsupported chunk header byte") |
| |
| // headerChunkType converts the header byte into a chunk type. It |
| // ignores the uncompressed size bits in the chunk header byte. |
| func headerChunkType(h byte) (c chunkType, err error) { |
| if h&hL == 0 { |
| // no compression |
| switch h { |
| case hEOS: |
| c = cEOS |
| case hUD: |
| c = cUD |
| case hU: |
| c = cU |
| default: |
| return 0, errHeaderByte |
| } |
| return |
| } |
| switch h & hLRND { |
| case hL: |
| c = cL |
| case hLR: |
| c = cLR |
| case hLRN: |
| c = cLRN |
| case hLRND: |
| c = cLRND |
| default: |
| return 0, errHeaderByte |
| } |
| return |
| } |
| |
| // uncompressedHeaderLen provides the length of an uncompressed header |
| const uncompressedHeaderLen = 3 |
| |
| // headerLen returns the length of the LZMA2 header for a given chunk |
| // type. |
| func headerLen(c chunkType) int { |
| switch c { |
| case cEOS: |
| return 1 |
| case cU, cUD: |
| return uncompressedHeaderLen |
| case cL, cLR: |
| return 5 |
| case cLRN, cLRND: |
| return 6 |
| } |
| panic(fmt.Errorf("unsupported chunk type %d", c)) |
| } |
| |
| // chunkHeader represents the contents of a chunk header. |
| type chunkHeader struct { |
| ctype chunkType |
| uncompressed uint32 |
| compressed uint16 |
| props Properties |
| } |
| |
| // String returns a string representation of the chunk header. |
| func (h *chunkHeader) String() string { |
| return fmt.Sprintf("%s %d %d %s", h.ctype, h.uncompressed, |
| h.compressed, &h.props) |
| } |
| |
| // UnmarshalBinary reads the content of the chunk header from the data |
| // slice. The slice must have the correct length. |
| func (h *chunkHeader) UnmarshalBinary(data []byte) error { |
| if len(data) == 0 { |
| return errors.New("no data") |
| } |
| c, err := headerChunkType(data[0]) |
| if err != nil { |
| return err |
| } |
| |
| n := headerLen(c) |
| if len(data) < n { |
| return errors.New("incomplete data") |
| } |
| if len(data) > n { |
| return errors.New("invalid data length") |
| } |
| |
| *h = chunkHeader{ctype: c} |
| if c == cEOS { |
| return nil |
| } |
| |
| h.uncompressed = uint32(uint16BE(data[1:3])) |
| if c <= cU { |
| return nil |
| } |
| h.uncompressed |= uint32(data[0]&^hLRND) << 16 |
| |
| h.compressed = uint16BE(data[3:5]) |
| if c <= cLR { |
| return nil |
| } |
| |
| h.props, err = PropertiesForCode(data[5]) |
| return err |
| } |
| |
| // MarshalBinary encodes the chunk header value. The function checks |
| // whether the content of the chunk header is correct. |
| func (h *chunkHeader) MarshalBinary() (data []byte, err error) { |
| if h.ctype > cLRND { |
| return nil, errors.New("invalid chunk type") |
| } |
| if err = h.props.verify(); err != nil { |
| return nil, err |
| } |
| |
| data = make([]byte, headerLen(h.ctype)) |
| |
| switch h.ctype { |
| case cEOS: |
| return data, nil |
| case cUD: |
| data[0] = hUD |
| case cU: |
| data[0] = hU |
| case cL: |
| data[0] = hL |
| case cLR: |
| data[0] = hLR |
| case cLRN: |
| data[0] = hLRN |
| case cLRND: |
| data[0] = hLRND |
| } |
| |
| putUint16BE(data[1:3], uint16(h.uncompressed)) |
| if h.ctype <= cU { |
| return data, nil |
| } |
| data[0] |= byte(h.uncompressed>>16) &^ hLRND |
| |
| putUint16BE(data[3:5], h.compressed) |
| if h.ctype <= cLR { |
| return data, nil |
| } |
| |
| data[5] = h.props.Code() |
| return data, nil |
| } |
| |
| // readChunkHeader reads the chunk header from the IO reader. |
| func readChunkHeader(r io.Reader) (h *chunkHeader, err error) { |
| p := make([]byte, 1, 6) |
| if _, err = io.ReadFull(r, p); err != nil { |
| return |
| } |
| c, err := headerChunkType(p[0]) |
| if err != nil { |
| return |
| } |
| p = p[:headerLen(c)] |
| if _, err = io.ReadFull(r, p[1:]); err != nil { |
| return |
| } |
| h = new(chunkHeader) |
| if err = h.UnmarshalBinary(p); err != nil { |
| return nil, err |
| } |
| return h, nil |
| } |
| |
| // uint16BE converts a big-endian uint16 representation to an uint16 |
| // value. |
| func uint16BE(p []byte) uint16 { |
| return uint16(p[0])<<8 | uint16(p[1]) |
| } |
| |
| // putUint16BE puts the big-endian uint16 presentation into the given |
| // slice. |
| func putUint16BE(p []byte, x uint16) { |
| p[0] = byte(x >> 8) |
| p[1] = byte(x) |
| } |
| |
| // chunkState is used to manage the state of the chunks |
| type chunkState byte |
| |
| // start and stop define the initial and terminating state of the chunk |
| // state |
| const ( |
| start chunkState = 'S' |
| stop = 'T' |
| ) |
| |
| // errors for the chunk state handling |
| var ( |
| errChunkType = errors.New("lzma: unexpected chunk type") |
| errState = errors.New("lzma: wrong chunk state") |
| ) |
| |
| // next transitions state based on chunk type input |
| func (c *chunkState) next(ctype chunkType) error { |
| switch *c { |
| // start state |
| case 'S': |
| switch ctype { |
| case cEOS: |
| *c = 'T' |
| case cUD: |
| *c = 'R' |
| case cLRND: |
| *c = 'L' |
| default: |
| return errChunkType |
| } |
| // normal LZMA mode |
| case 'L': |
| switch ctype { |
| case cEOS: |
| *c = 'T' |
| case cUD: |
| *c = 'R' |
| case cU: |
| *c = 'U' |
| case cL, cLR, cLRN, cLRND: |
| break |
| default: |
| return errChunkType |
| } |
| // reset required |
| case 'R': |
| switch ctype { |
| case cEOS: |
| *c = 'T' |
| case cUD, cU: |
| break |
| case cLRN, cLRND: |
| *c = 'L' |
| default: |
| return errChunkType |
| } |
| // uncompressed |
| case 'U': |
| switch ctype { |
| case cEOS: |
| *c = 'T' |
| case cUD: |
| *c = 'R' |
| case cU: |
| break |
| case cL, cLR, cLRN, cLRND: |
| *c = 'L' |
| default: |
| return errChunkType |
| } |
| // terminal state |
| case 'T': |
| return errChunkType |
| default: |
| return errState |
| } |
| return nil |
| } |
| |
| // defaultChunkType returns the default chunk type for each chunk state. |
| func (c chunkState) defaultChunkType() chunkType { |
| switch c { |
| case 'S': |
| return cLRND |
| case 'L', 'U': |
| return cL |
| case 'R': |
| return cLRN |
| default: |
| // no error |
| return cEOS |
| } |
| } |
| |
| // maxDictCap defines the maximum dictionary capacity supported by the |
| // LZMA2 dictionary capacity encoding. |
| const maxDictCap = 1<<32 - 1 |
| |
| // maxDictCapCode defines the maximum dictionary capacity code. |
| const maxDictCapCode = 40 |
| |
| // The function decodes the dictionary capacity byte, but doesn't change |
| // for the correct range of the given byte. |
| func decodeDictCap(c byte) int64 { |
| return (2 | int64(c)&1) << (11 + (c>>1)&0x1f) |
| } |
| |
| // DecodeDictCap decodes the encoded dictionary capacity. The function |
| // returns an error if the code is out of range. |
| func DecodeDictCap(c byte) (n int64, err error) { |
| if c >= maxDictCapCode { |
| if c == maxDictCapCode { |
| return maxDictCap, nil |
| } |
| return 0, errors.New("lzma: invalid dictionary size code") |
| } |
| return decodeDictCap(c), nil |
| } |
| |
| // EncodeDictCap encodes a dictionary capacity. The function returns the |
| // code for the capacity that is greater or equal n. If n exceeds the |
| // maximum support dictionary capacity, the maximum value is returned. |
| func EncodeDictCap(n int64) byte { |
| a, b := byte(0), byte(40) |
| for a < b { |
| c := a + (b-a)>>1 |
| m := decodeDictCap(c) |
| if n <= m { |
| if n == m { |
| return c |
| } |
| b = c |
| } else { |
| a = c + 1 |
| } |
| } |
| return a |
| } |