1#@!#!123s

: / opt / imunify360 / venv / lib / python3.11 / site-packages / pyzstd /
Filename : __init__.py
back
import _compression
import io
from sys import maxsize
try:
    from os import PathLike
except ImportError:
    # For Python 3.5
    class PathLike:
        pass

try:
    # Import C implementation
    from .c.c_pyzstd import *
    from .c.c_pyzstd import _train_dict, _finalize_dict, \
                            _ZSTD_DStreamInSize
except ImportError:
    try:
        # Import CFFI implementation
        from .cffi.cffi_pyzstd import *
        from .cffi.cffi_pyzstd import _train_dict, _finalize_dict, \
                                      _ZSTD_DStreamInSize
        CFFI_PYZSTD = True
    except ImportError:
        raise ImportError(
            "pyzstd module: Neither C implementation nor CFFI implementation "
            "can be imported. If pyzstd module is dynamically linked to zstd "
            "library, make sure not to remove zstd library, and the run-time "
            "zstd library's version can't be lower than that at compile-time.")

__version__ = '0.15.3'

__doc__ = '''\
Python bindings to Zstandard (zstd) compression library, the API is similar to
Python's bz2/lzma/zlib modules.

Documentation: https://pyzstd.readthedocs.io
GitHub: https://github.com/animalize/pyzstd
PyPI: https://pypi.org/project/pyzstd'''

__all__ = ('ZstdCompressor', 'RichMemZstdCompressor',
           'ZstdDecompressor', 'EndlessZstdDecompressor',
           'CParameter', 'DParameter', 'Strategy', 'ZstdError',
           'compress', 'richmem_compress', 'decompress',
           'compress_stream', 'decompress_stream',
           'ZstdDict', 'train_dict', 'finalize_dict',
           'get_frame_info', 'get_frame_size', 'ZstdFile', 'open',
           'zstd_version', 'zstd_version_info',
           'zstd_support_multithread', 'compressionLevel_values')


def compress(data, level_or_option=None, zstd_dict=None):
    """Compress a block of data, return a bytes object.

    Compressing b'' will get an empty content frame (9 bytes or more).

    Parameters
    data:            A bytes-like object, data to be compressed.
    level_or_option: When it's an int object, it represents compression level.
                     When it's a dict object, it contains advanced compression
                     parameters.
    zstd_dict:       A ZstdDict object, pre-trained dictionary for compression.
    """
    comp = ZstdCompressor(level_or_option, zstd_dict)
    return comp.compress(data, ZstdCompressor.FLUSH_FRAME)


def richmem_compress(data, level_or_option=None, zstd_dict=None):
    """Compress a block of data, return a bytes object.

    Use rich memory mode, it's faster than compress() in some cases, but
    allocates more memory.

    Compressing b'' will get an empty content frame (9 bytes or more).

    Parameters
    data:            A bytes-like object, data to be compressed.
    level_or_option: When it's an int object, it represents compression level.
                     When it's a dict object, it contains advanced compression
                     parameters.
    zstd_dict:       A ZstdDict object, pre-trained dictionary for compression.
    """
    comp = RichMemZstdCompressor(level_or_option, zstd_dict)
    return comp.compress(data)


def _nbytes(dat):
    if isinstance(dat, (bytes, bytearray)):
        return len(dat)
    return memoryview(dat).nbytes


def train_dict(samples, dict_size):
    """Train a zstd dictionary, return a ZstdDict object.

    Parameters
    samples:   An iterable of samples, a sample is a bytes-like object
               represents a file.
    dict_size: The dictionary's maximum size, in bytes.
    """
    # Check argument's type
    if not isinstance(dict_size, int):
        raise TypeError('dict_size argument should be an int object.')

    # Prepare data
    chunks = []
    chunk_sizes = []
    for chunk in samples:
        chunks.append(chunk)
        chunk_sizes.append(_nbytes(chunk))

    chunks = b''.join(chunks)
    if not chunks:
        raise ValueError("The samples are empty content, can't train dictionary.")

    # samples_bytes: samples be stored concatenated in a single flat buffer.
    # samples_size_list: a list of each sample's size.
    # dict_size: size of the dictionary, in bytes.
    dict_content = _train_dict(chunks, chunk_sizes, dict_size)

    return ZstdDict(dict_content)


def finalize_dict(zstd_dict, samples, dict_size, level):
    """Finalize a zstd dictionary, return a ZstdDict object.

    Given a custom content as a basis for dictionary, and a set of samples,
    finalize dictionary by adding headers and statistics according to the zstd
    dictionary format.

    You may compose an effective dictionary content by hand, which is used as
    basis dictionary, and use some samples to finalize a dictionary. The basis
    dictionary can be a "raw content" dictionary, see is_raw parameter in
    ZstdDict.__init__ method.

    Parameters
    zstd_dict: A ZstdDict object, basis dictionary.
    samples:   An iterable of samples, a sample is a bytes-like object
               represents a file.
    dict_size: The dictionary's maximum size, in bytes.
    level:     The compression level expected to use in production. The
               statistics for each compression level differ, so tuning the
               dictionary for the compression level can help quite a bit.
    """
    if zstd_version_info < (1, 4, 5):
        msg = ("This function only available when the underlying zstd "
               "library's version is greater than or equal to v1.4.5, "
               "the current underlying zstd library's version is v%s.") % zstd_version
        raise NotImplementedError(msg)

    # Check arguments' type
    if not isinstance(zstd_dict, ZstdDict):
        raise TypeError('zstd_dict argument should be a ZstdDict object.')
    if not isinstance(dict_size, int):
        raise TypeError('dict_size argument should be an int object.')
    if not isinstance(level, int):
        raise TypeError('level argument should be an int object.')

    # Prepare data
    chunks = []
    chunk_sizes = []
    for chunk in samples:
        chunks.append(chunk)
        chunk_sizes.append(_nbytes(chunk))

    chunks = b''.join(chunks)
    if not chunks:
        raise ValueError("The samples are empty content, can't finalize dictionary.")

    # custom_dict_bytes: existing dictionary.
    # samples_bytes: samples be stored concatenated in a single flat buffer.
    # samples_size_list: a list of each sample's size.
    # dict_size: maximal size of the dictionary, in bytes.
    # compression_level: compression level expected to use in production.
    dict_content = _finalize_dict(zstd_dict.dict_content,
                                  chunks, chunk_sizes,
                                  dict_size, level)

    return ZstdDict(dict_content)


# In pyzstd module's blocks output buffer, the first block is 32 KiB. It has a
# fast path for this size, if the output data is 32 KiB, it only allocates
# memory and copies data once.
_32_KiB = 32*1024

# Copied from Python stdlib (_compression.py), except:
# 1, ZstdDecompressReader.read():
#      Uses ZSTD_DStreamInSize() (131,075 in zstd v1.x) instead of
#      _compression.BUFFER_SIZE (default is 8 KiB) as read size.
# 2, ZstdDecompressReader.seek():
#      Uses 32 KiB instead of io.DEFAULT_BUFFER_SIZE (default is 8 KiB) as
#      max_length. See _32_KiB's comment for details.
class ZstdDecompressReader(_compression.DecompressReader):
    # Add .readall() method for speedup
    # https://bugs.python.org/issue41486
    def readall(self):
        chunks = []
        while True:
            # sys.maxsize means the max length of output buffer is unlimited,
            # so that the whole input buffer can be decompressed within one
            # .decompress() call.
            data = self.read(maxsize)
            if not data:
                break
            chunks.append(data)
        return b''.join(chunks)

    # Compare to super().read():
    # 1, Use ZSTD_DStreamInSize() instead of BUFFER_SIZE (default is 8 KiB)
    # 2, Use EndlessZstdDecompressor to simplify the code
    def read(self, size=-1):
        if size < 0:
            return self.readall()
        if not size or self._eof:
            return b""

        # Depending on the input data, our call to the decompressor may not
        # return any data. In this case, try again after reading another block.
        while True:
            if self._decompressor.needs_input:
                rawblock = self._fp.read(_ZSTD_DStreamInSize)
                if not rawblock:
                    if self._decompressor.at_frame_edge:
                        self._eof = True
                        self._size = self._pos
                        return b""
                    else:
                        raise EOFError("Compressed file ended before the "
                                       "end-of-stream marker was reached")
            else:
                rawblock = b""

            data = self._decompressor.decompress(rawblock, size)
            if data:
                self._pos += len(data)
                return data

    # Copied from base class, except use 32 KiB instead of
    # io.DEFAULT_BUFFER_SIZE (default is 8 KiB) as max_length.
    def seek(self, offset, whence=io.SEEK_SET):
        # Recalculate offset as an absolute file position.
        if whence == io.SEEK_SET:
            pass
        elif whence == io.SEEK_CUR:
            offset = self._pos + offset
        elif whence == io.SEEK_END:
            # Seeking relative to EOF - we need to know the file's size.
            if self._size < 0:
                while self.read(_32_KiB):
                    pass
            offset = self._size + offset
        else:
            raise ValueError("Invalid value for whence: {}".format(whence))

        # Make it so that offset is the number of bytes to skip forward.
        if offset < self._pos:
            self._rewind()
        else:
            offset -= self._pos

        # Read and discard data until we reach the desired position.
        while offset > 0:
            data = self.read(min(_32_KiB, offset))
            if not data:
                break
            offset -= len(data)

        return self._pos


_MODE_CLOSED = 0
_MODE_READ   = 1
_MODE_WRITE  = 2

# Copied from Python stdlib (lzma.py), except:
# 1, Add .readinto()/.readinto1() methods.
# 2, Implement .flush() method.
# 3, Remove BaseStream._check_*() overheads.
#    The implementation needs to ensure:
#      If in _MODE_READ mode, ._buffer is an io.BufferedReader object.
#      If in _MODE_WRITE mode, ._compressor is a ZstdCompressor object.
#      If in _MODE_CLOSED mode, they and ._fp don't exist or are None.
#    Then if not in a mode, perform the corresponding actions will raise
#    AttributeError, and ._check_mode() will raise a proper exception.
# 4, ZstdFile.__init__():
#      io.BufferedReader uses 32 KiB buffer size instead of default value
#      io.DEFAULT_BUFFER_SIZE (default is 8 KiB).
#      See _32_KiB's comment for details.
# 5, ZstdFile.read1():
#      Uses 32 KiB instead of io.DEFAULT_BUFFER_SIZE (default is 8 KiB).
#      Consistent with ZstdFile.__init__().
class ZstdFile(io.BufferedIOBase):
    """A file object providing transparent zstd (de)compression.

    A ZstdFile can act as a wrapper for an existing file object, or refer
    directly to a named file on disk.

    Note that ZstdFile provides a *binary* file interface - data read is
    returned as bytes, and data to be written must be given as bytes.
    """

    def __init__(self, filename, mode="r", *,
                 level_or_option=None, zstd_dict=None):
        """Open a zstd compressed file in binary mode.

        filename can be either an actual file name (given as a str, bytes, or
        PathLike object), in which case the named file is opened, or it can be
        an existing file object to read from or write to.

        mode can be "r" for reading (default), "w" for (over)writing, "x" for
        creating exclusively, or "a" for appending. These can equivalently be
        given as "rb", "wb", "xb" and "ab" respectively.

        Parameters
        level_or_option: When it's an int object, it represents compression
            level. When it's a dict object, it contains advanced compression
            parameters. Note, in read mode (decompression), it can only be a
            dict object, that represents decompression option. It doesn't
            support int type compression level in this case.
        zstd_dict: A ZstdDict object, pre-trained dictionary for compression /
            decompression.
        """
        self._fp = None
        self._closefp = False
        self._mode = _MODE_CLOSED

        if not isinstance(zstd_dict, (type(None), ZstdDict)):
            raise TypeError("zstd_dict argument should be a ZstdDict object.")

        # Read or write mode
        if mode in ("r", "rb"):
            if not isinstance(level_or_option, (type(None), dict)):
                msg = ("In read mode (decompression), level_or_option argument "
                       "should be a dict object, that represents decompression "
                       "option. It doesn't support int type compression level "
                       "in this case.")
                raise TypeError(msg)
            mode_code = _MODE_READ
        elif mode in ("w", "wb", "a", "ab", "x", "xb"):
            if not isinstance(level_or_option, (type(None), int, dict)):
                msg = "level_or_option argument should be int or dict object."
                raise TypeError(msg)
            mode_code = _MODE_WRITE
            self._compressor = ZstdCompressor(level_or_option, zstd_dict)
            self._pos = 0
        else:
            raise ValueError("Invalid mode: {!r}".format(mode))

        # File object
        if isinstance(filename, (str, bytes, PathLike)):
            if "b" not in mode:
                mode += "b"
            self._fp = io.open(filename, mode)
            self._closefp = True
            # Set ._mode here for ._closefp in .close(). If the following code
            # fails, IOBase's cleanup code will call .close(), so that ._fp can
            # be closed.
            self._mode = mode_code
        elif hasattr(filename, "read") or hasattr(filename, "write"):
            self._fp = filename
            self._mode = mode_code
        else:
            raise TypeError("filename must be a str, bytes, file or PathLike object")

        # ZstdDecompressReader
        if mode_code == _MODE_READ:
            raw = ZstdDecompressReader(self._fp, EndlessZstdDecompressor,
                                       trailing_error=ZstdError,
                                       zstd_dict=zstd_dict, option=level_or_option)
            self._buffer = io.BufferedReader(raw, _32_KiB)

    def close(self):
        """Flush and close the file.

        May be called more than once without error. Once the file is
        closed, any other operation on it will raise a ValueError.
        """
        if self._mode == _MODE_CLOSED:
            return

        try:
            # In .__init__ method, if fails after setting ._mode to _MODE_READ,
            # ._buffer doesn't exist.
            if hasattr(self, "_buffer"):
                try:
                    self._buffer.close()
                finally:
                    # Set to None for ._check_mode()
                    self._buffer = None
            elif self._mode == _MODE_WRITE:
                try:
                    self._fp.write(self._compressor.flush())
                finally:
                    # Set to None for ._check_mode()
                    self._compressor = None
        finally:
            try:
                if self._closefp:
                    self._fp.close()
            finally:
                self._fp = None
                self._closefp = False
                self._mode = _MODE_CLOSED

    # None argument means the file should be closed
    def _check_mode(self, expected_mode=None):
        # If closed, raise ValueError.
        if self._mode == _MODE_CLOSED:
            raise ValueError("I/O operation on closed file")

        # Check _MODE_READ/_MODE_WRITE mode
        if expected_mode == _MODE_READ:
            if self._mode != _MODE_READ:
                raise io.UnsupportedOperation("File not open for reading")
        elif expected_mode == _MODE_WRITE:
            if self._mode != _MODE_WRITE:
                raise io.UnsupportedOperation("File not open for writing")

        # Re-raise other AttributeError exception
        raise

    def write(self, data):
        """Write a bytes-like object to the file.

        Returns the number of uncompressed bytes written, which is
        always the length of data in bytes. Note that due to buffering,
        the file on disk may not reflect the data written until close()
        is called.
        """
        # Get the length of uncompressed data
        if isinstance(data, (bytes, bytearray)):
            length = len(data)
        else:
            # Accept any data that supports the buffer protocol
            data = memoryview(data)
            length = data.nbytes

        # Compress
        try:
            compressed = self._compressor.compress(data)
        except AttributeError:
            self._check_mode(_MODE_WRITE)

        # Write to file. If haven't gathered enough uncompressed data for one
        # zstd block (128 KiB at most), `compressed` is b''.
        if compressed:
            self._fp.write(compressed)

        self._pos += length
        return length

    def flush(self):
        """Flush remaining data to the underlying stream.

        It uses ZstdCompressor.FLUSH_BLOCK mode. Abuse of this method will
        reduce compression ratio, use it only when necessary.

        If the program is interrupted afterwards, all data can be recovered.
        To ensure saving to disk, also need to use os.fsync(fd).

        This method does nothing in reading mode.
        """
        # Like IOBase.flush(), do nothing in reading mode.
        # TextIOWrapper.close() relies on this behavior.
        if self._mode == _MODE_READ:
            return

        # Flush zstd block
        try:
            compressed = self._compressor.flush(ZstdCompressor.FLUSH_BLOCK)
        except AttributeError:
            # Closed, raise ValueError.
            self._check_mode()

        # Write to file
        if compressed:
            self._fp.write(compressed)

        # Flush the file. Some file-like objects don't have .flush() method.
        if hasattr(self._fp, "flush"):
            self._fp.flush()

    def read(self, size=-1):
        """Read up to size uncompressed bytes from the file.

        If size is negative or omitted, read until EOF is reached.
        Returns b"" if the file is already at EOF.
        """
        try:
            return self._buffer.read(size)
        except AttributeError:
            self._check_mode(_MODE_READ)

    def read1(self, size=-1):
        """Read up to size uncompressed bytes, while trying to avoid
        making multiple reads from the underlying stream. Reads up to a
        buffer's worth of data if size is negative.

        Returns b"" if the file is at EOF.
        """
        if size < 0:
            size = _32_KiB

        try:
            return self._buffer.read1(size)
        except AttributeError:
            self._check_mode(_MODE_READ)

    def readinto(self, b):
        """Read bytes into b.

        Returns the number of bytes read (0 for EOF).
        """
        try:
            return self._buffer.readinto(b)
        except AttributeError:
            self._check_mode(_MODE_READ)

    def readinto1(self, b):
        """Read bytes into b, while trying to avoid making multiple reads
        from the underlying stream.

        Returns the number of bytes read (0 for EOF).
        """
        try:
            return self._buffer.readinto1(b)
        except AttributeError:
            self._check_mode(_MODE_READ)

    def readline(self, size=-1):
        """Read a line of uncompressed bytes from the file.

        The terminating newline (if present) is retained. If size is
        non-negative, no more than size bytes will be read (in which
        case the line may be incomplete). Returns b'' if already at EOF.
        """
        try:
            return self._buffer.readline(size)
        except AttributeError:
            self._check_mode(_MODE_READ)

    def seek(self, offset, whence=io.SEEK_SET):
        """Change the file position.

        The new position is specified by offset, relative to the
        position indicated by whence. Possible values for whence are:

            0: start of stream (default): offset must not be negative
            1: current stream position
            2: end of stream; offset must not be positive

        Returns the new file position.

        Note that seeking is emulated, so depending on the arguments,
        this operation may be extremely slow.
        """
        try:
            # BufferedReader.seek() checks seekable
            return self._buffer.seek(offset, whence)
        except AttributeError:
            self._check_mode(_MODE_READ)

    def peek(self, size=-1):
        """Return buffered data without advancing the file position.

        Always returns at least one byte of data, unless at EOF.
        The exact number of bytes returned is unspecified.
        """
        # Relies on the undocumented fact that BufferedReader.peek() always
        # returns at least one byte (except at EOF)
        try:
            return self._buffer.peek(size)
        except AttributeError:
            self._check_mode(_MODE_READ)

    def tell(self):
        """Return the current file position."""
        if self._mode == _MODE_READ:
            return self._buffer.tell()
        elif self._mode == _MODE_WRITE:
            return self._pos

        # Closed, raise ValueError.
        self._check_mode()

    def fileno(self):
        """Return the file descriptor for the underlying file."""
        try:
            return self._fp.fileno()
        except AttributeError:
            # Closed, raise ValueError.
            self._check_mode()

    @property
    def closed(self):
        """True if this file is closed."""
        return self._mode == _MODE_CLOSED

    def writable(self):
        """Return whether the file was opened for writing."""
        if self._mode == _MODE_WRITE:
            return True
        elif self._mode == _MODE_READ:
            return False

        # Closed, raise ValueError.
        self._check_mode()

    def readable(self):
        """Return whether the file was opened for reading."""
        if self._mode == _MODE_READ:
            return True
        elif self._mode == _MODE_WRITE:
            return False

        # Closed, raise ValueError.
        self._check_mode()

    def seekable(self):
        """Return whether the file supports seeking."""
        if self._mode == _MODE_READ:
            return self._buffer.seekable()
        elif self._mode == _MODE_WRITE:
            return False

        # Closed, raise ValueError.
        self._check_mode()


# Copied from lzma module
def open(filename, mode="rb", *, level_or_option=None, zstd_dict=None,
         encoding=None, errors=None, newline=None):
    """Open a zstd compressed file in binary or text mode.

    filename can be either an actual file name (given as a str, bytes, or
    PathLike object), in which case the named file is opened, or it can be an
    existing file object to read from or write to.

    The mode parameter can be "r", "rb" (default), "w", "wb", "x", "xb", "a",
    "ab" for binary mode, or "rt", "wt", "xt", "at" for text mode.

    The level_or_option and zstd_dict parameters specify the settings, as for
    ZstdCompressor, ZstdDecompressor and ZstdFile.

    When using read mode (decompression), the level_or_option parameter can
    only be a dict object, that represents decompression option. It doesn't
    support int type compression level in this case.

    For binary mode, this function is equivalent to the ZstdFile constructor:
    ZstdFile(filename, mode, ...). In this case, the encoding, errors and
    newline parameters must not be provided.

    For text mode, an ZstdFile object is created, and wrapped in an
    io.TextIOWrapper instance with the specified encoding, error handling
    behavior, and line ending(s).
    """

    if "t" in mode:
        if "b" in mode:
            raise ValueError("Invalid mode: %r" % (mode,))
    else:
        if encoding is not None:
            raise ValueError("Argument 'encoding' not supported in binary mode")
        if errors is not None:
            raise ValueError("Argument 'errors' not supported in binary mode")
        if newline is not None:
            raise ValueError("Argument 'newline' not supported in binary mode")

    zstd_mode = mode.replace("t", "")
    binary_file = ZstdFile(filename, zstd_mode,
                           level_or_option=level_or_option, zstd_dict=zstd_dict)

    if "t" in mode:
        return io.TextIOWrapper(binary_file, encoding, errors, newline)
    else:
        return binary_file