257 lines
		
	
	
		
			7.1 KiB
		
	
	
	
		
			Go
		
	
	
	
			
		
		
	
	
			257 lines
		
	
	
		
			7.1 KiB
		
	
	
	
		
			Go
		
	
	
	
| /*
 | |
|  * Package xz Go Reader API
 | |
|  *
 | |
|  * Author: Michael Cross <https://github.com/xi2>
 | |
|  *
 | |
|  * This file has been put into the public domain.
 | |
|  * You can do whatever you want with this file.
 | |
|  */
 | |
| 
 | |
| package xz
 | |
| 
 | |
| import (
 | |
| 	"errors"
 | |
| 	"io"
 | |
| )
 | |
| 
 | |
| // Package specific errors.
 | |
| var (
 | |
| 	ErrUnsupportedCheck = errors.New("xz: integrity check type not supported")
 | |
| 	ErrMemlimit         = errors.New("xz: LZMA2 dictionary size exceeds max")
 | |
| 	ErrFormat           = errors.New("xz: file format not recognized")
 | |
| 	ErrOptions          = errors.New("xz: compression options not supported")
 | |
| 	ErrData             = errors.New("xz: data is corrupt")
 | |
| 	ErrBuf              = errors.New("xz: data is truncated or corrupt")
 | |
| )
 | |
| 
 | |
| // DefaultDictMax is the default maximum dictionary size in bytes used
 | |
| // by the decoder. This value is sufficient to decompress files
 | |
| // created with XZ Utils "xz -9".
 | |
| const DefaultDictMax = 1 << 26 // 64 MiB
 | |
| 
 | |
| // inBufSize is the input buffer size used by the decoder.
 | |
| const inBufSize = 1 << 13 // 8 KiB
 | |
| 
 | |
| // A Reader is an io.Reader that can be used to retrieve uncompressed
 | |
| // data from an XZ file.
 | |
| //
 | |
| // In general, an XZ file can be a concatenation of other XZ
 | |
| // files. Reads from the Reader return the concatenation of the
 | |
| // uncompressed data of each.
 | |
| type Reader struct {
 | |
| 	Header
 | |
| 	r           io.Reader       // the wrapped io.Reader
 | |
| 	multistream bool            // true if reader is in multistream mode
 | |
| 	rEOF        bool            // true after io.EOF received on r
 | |
| 	dEOF        bool            // true after decoder has completed
 | |
| 	padding     int             // bytes of stream padding read (or -1)
 | |
| 	in          [inBufSize]byte // backing array for buf.in
 | |
| 	buf         *xzBuf          // decoder input/output buffers
 | |
| 	dec         *xzDec          // decoder state
 | |
| 	err         error           // the result of the last decoder call
 | |
| }
 | |
| 
 | |
| // NewReader creates a new Reader reading from r. The decompressor
 | |
| // will use an LZMA2 dictionary size up to dictMax bytes in
 | |
| // size. Passing a value of zero sets dictMax to DefaultDictMax.  If
 | |
| // an individual XZ stream requires a dictionary size greater than
 | |
| // dictMax in order to decompress, Read will return ErrMemlimit.
 | |
| //
 | |
| // If NewReader is passed a value of nil for r then a Reader is
 | |
| // created such that all read attempts will return io.EOF. This is
 | |
| // useful if you just want to allocate memory for a Reader which will
 | |
| // later be initialized with Reset.
 | |
| //
 | |
| // Due to internal buffering, the Reader may read more data than
 | |
| // necessary from r.
 | |
| func NewReader(r io.Reader, dictMax uint32) (*Reader, error) {
 | |
| 	if dictMax == 0 {
 | |
| 		dictMax = DefaultDictMax
 | |
| 	}
 | |
| 	z := &Reader{
 | |
| 		r:           r,
 | |
| 		multistream: true,
 | |
| 		padding:     -1,
 | |
| 		buf:         &xzBuf{},
 | |
| 	}
 | |
| 	if r == nil {
 | |
| 		z.rEOF, z.dEOF = true, true
 | |
| 	}
 | |
| 	z.dec = xzDecInit(dictMax, &z.Header)
 | |
| 	var err error
 | |
| 	if r != nil {
 | |
| 		_, err = z.Read(nil) // read stream header
 | |
| 	}
 | |
| 	return z, err
 | |
| }
 | |
| 
 | |
| // decode is a wrapper around xzDecRun that additionally handles
 | |
| // stream padding. It treats the padding as a kind of stream that
 | |
| // decodes to nothing.
 | |
| //
 | |
| // When decoding padding, z.padding >= 0
 | |
| // When decoding a real stream, z.padding == -1
 | |
| func (z *Reader) decode() (ret xzRet) {
 | |
| 	if z.padding >= 0 {
 | |
| 		// read all padding in input buffer
 | |
| 		for z.buf.inPos < len(z.buf.in) &&
 | |
| 			z.buf.in[z.buf.inPos] == 0 {
 | |
| 			z.buf.inPos++
 | |
| 			z.padding++
 | |
| 		}
 | |
| 		switch {
 | |
| 		case z.buf.inPos == len(z.buf.in) && z.rEOF:
 | |
| 			// case: out of padding. no more input data available
 | |
| 			if z.padding%4 != 0 {
 | |
| 				ret = xzDataError
 | |
| 			} else {
 | |
| 				ret = xzStreamEnd
 | |
| 			}
 | |
| 		case z.buf.inPos == len(z.buf.in):
 | |
| 			// case: read more padding next loop iteration
 | |
| 			ret = xzOK
 | |
| 		default:
 | |
| 			// case: out of padding. more input data available
 | |
| 			if z.padding%4 != 0 {
 | |
| 				ret = xzDataError
 | |
| 			} else {
 | |
| 				xzDecReset(z.dec)
 | |
| 				ret = xzStreamEnd
 | |
| 			}
 | |
| 		}
 | |
| 	} else {
 | |
| 		ret = xzDecRun(z.dec, z.buf)
 | |
| 	}
 | |
| 	return
 | |
| }
 | |
| 
 | |
| func (z *Reader) Read(p []byte) (n int, err error) {
 | |
| 	// restore err
 | |
| 	err = z.err
 | |
| 	// set decoder output buffer to p
 | |
| 	z.buf.out = p
 | |
| 	z.buf.outPos = 0
 | |
| 	for {
 | |
| 		// update n
 | |
| 		n = z.buf.outPos
 | |
| 		// if last call to decoder ended with an error, return that error
 | |
| 		if err != nil {
 | |
| 			break
 | |
| 		}
 | |
| 		// if decoder has finished, return with err == io.EOF
 | |
| 		if z.dEOF {
 | |
| 			err = io.EOF
 | |
| 			break
 | |
| 		}
 | |
| 		// if p full, return with err == nil, unless we have not yet
 | |
| 		// read the stream header with Read(nil)
 | |
| 		if n == len(p) && z.CheckType != checkUnset {
 | |
| 			break
 | |
| 		}
 | |
| 		// if needed, read more data from z.r
 | |
| 		if z.buf.inPos == len(z.buf.in) && !z.rEOF {
 | |
| 			rn, e := z.r.Read(z.in[:])
 | |
| 			if e != nil && e != io.EOF {
 | |
| 				// read error
 | |
| 				err = e
 | |
| 				break
 | |
| 			}
 | |
| 			if e == io.EOF {
 | |
| 				z.rEOF = true
 | |
| 			}
 | |
| 			// set new input buffer in z.buf
 | |
| 			z.buf.in = z.in[:rn]
 | |
| 			z.buf.inPos = 0
 | |
| 		}
 | |
| 		// decode more data
 | |
| 		ret := z.decode()
 | |
| 		switch ret {
 | |
| 		case xzOK:
 | |
| 			// no action needed
 | |
| 		case xzStreamEnd:
 | |
| 			if z.padding >= 0 {
 | |
| 				z.padding = -1
 | |
| 				if !z.multistream || z.rEOF {
 | |
| 					z.dEOF = true
 | |
| 				}
 | |
| 			} else {
 | |
| 				z.padding = 0
 | |
| 			}
 | |
| 		case xzUnsupportedCheck:
 | |
| 			err = ErrUnsupportedCheck
 | |
| 		case xzMemlimitError:
 | |
| 			err = ErrMemlimit
 | |
| 		case xzFormatError:
 | |
| 			err = ErrFormat
 | |
| 		case xzOptionsError:
 | |
| 			err = ErrOptions
 | |
| 		case xzDataError:
 | |
| 			err = ErrData
 | |
| 		case xzBufError:
 | |
| 			err = ErrBuf
 | |
| 		}
 | |
| 		// save err
 | |
| 		z.err = err
 | |
| 	}
 | |
| 	return
 | |
| }
 | |
| 
 | |
| // Multistream controls whether the reader is operating in multistream
 | |
| // mode.
 | |
| //
 | |
| // If enabled (the default), the Reader expects the input to be a
 | |
| // sequence of XZ streams, possibly interspersed with stream padding,
 | |
| // which it reads one after another. The effect is that the
 | |
| // concatenation of a sequence of XZ streams or XZ files is
 | |
| // treated as equivalent to the compressed result of the concatenation
 | |
| // of the sequence. This is standard behaviour for XZ readers.
 | |
| //
 | |
| // Calling Multistream(false) disables this behaviour; disabling the
 | |
| // behaviour can be useful when reading file formats that distinguish
 | |
| // individual XZ streams. In this mode, when the Reader reaches the
 | |
| // end of the stream, Read returns io.EOF. To start the next stream,
 | |
| // call z.Reset(nil) followed by z.Multistream(false). If there is no
 | |
| // next stream, z.Reset(nil) will return io.EOF.
 | |
| func (z *Reader) Multistream(ok bool) {
 | |
| 	z.multistream = ok
 | |
| }
 | |
| 
 | |
| // Reset, for non-nil values of io.Reader r, discards the Reader z's
 | |
| // state and makes it equivalent to the result of its original state
 | |
| // from NewReader, but reading from r instead. This permits reusing a
 | |
| // Reader rather than allocating a new one.
 | |
| //
 | |
| // If you wish to leave r unchanged use z.Reset(nil). This keeps r
 | |
| // unchanged and ensures internal buffering is preserved. If the
 | |
| // Reader was at the end of a stream it is then ready to read any
 | |
| // follow on streams. If there are no follow on streams z.Reset(nil)
 | |
| // returns io.EOF. If the Reader was not at the end of a stream then
 | |
| // z.Reset(nil) does nothing.
 | |
| func (z *Reader) Reset(r io.Reader) error {
 | |
| 	switch {
 | |
| 	case r == nil:
 | |
| 		z.multistream = true
 | |
| 		if !z.dEOF {
 | |
| 			return nil
 | |
| 		}
 | |
| 		if z.rEOF {
 | |
| 			return io.EOF
 | |
| 		}
 | |
| 		z.dEOF = false
 | |
| 		_, err := z.Read(nil) // read stream header
 | |
| 		return err
 | |
| 	default:
 | |
| 		z.r = r
 | |
| 		z.multistream = true
 | |
| 		z.rEOF = false
 | |
| 		z.dEOF = false
 | |
| 		z.padding = -1
 | |
| 		z.buf.in = nil
 | |
| 		z.buf.inPos = 0
 | |
| 		xzDecReset(z.dec)
 | |
| 		z.err = nil
 | |
| 		_, err := z.Read(nil) // read stream header
 | |
| 		return err
 | |
| 	}
 | |
| }
 |