| /*------------------------------------------------------------------------- |
| * |
| * toast_compression.c |
| * Functions for toast compression. |
| * |
| * Copyright (c) 2021-2023, PostgreSQL Global Development Group |
| * |
| * |
| * IDENTIFICATION |
| * src/backend/access/common/toast_compression.c |
| * |
| *------------------------------------------------------------------------- |
| */ |
| #include "postgres.h" |
| |
| #ifdef USE_LZ4 |
| #include <lz4.h> |
| #endif |
| |
| #include "access/detoast.h" |
| #include "access/toast_compression.h" |
| #include "common/pg_lzcompress.h" |
| #include "fmgr.h" |
| #include "utils/builtins.h" |
| #include "varatt.h" |
| |
| /* GUC */ |
| int default_toast_compression = TOAST_PGLZ_COMPRESSION; |
| |
| #define NO_LZ4_SUPPORT() \ |
| ereport(ERROR, \ |
| (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \ |
| errmsg("compression method lz4 not supported"), \ |
| errdetail("This functionality requires the server to be built with lz4 support."))) |
| |
| /* |
| * Compress a varlena using PGLZ. |
| * |
| * Returns the compressed varlena, or NULL if compression fails. |
| */ |
| struct varlena * |
| pglz_compress_datum(const struct varlena *value) |
| { |
| int32 valsize, |
| len; |
| struct varlena *tmp = NULL; |
| |
| valsize = VARSIZE_ANY_EXHDR(value); |
| |
| /* |
| * No point in wasting a palloc cycle if value size is outside the allowed |
| * range for compression. |
| */ |
| if (valsize < PGLZ_strategy_default->min_input_size || |
| valsize > PGLZ_strategy_default->max_input_size) |
| return NULL; |
| |
| /* |
| * Figure out the maximum possible size of the pglz output, add the bytes |
| * that will be needed for varlena overhead, and allocate that amount. |
| */ |
| tmp = (struct varlena *) palloc(PGLZ_MAX_OUTPUT(valsize) + |
| VARHDRSZ_COMPRESSED); |
| |
| len = pglz_compress(VARDATA_ANY(value), |
| valsize, |
| (char *) tmp + VARHDRSZ_COMPRESSED, |
| NULL); |
| if (len < 0) |
| { |
| pfree(tmp); |
| return NULL; |
| } |
| |
| SET_VARSIZE_COMPRESSED(tmp, len + VARHDRSZ_COMPRESSED); |
| |
| return tmp; |
| } |
| |
| /* |
| * Decompress a varlena that was compressed using PGLZ. |
| */ |
| struct varlena * |
| pglz_decompress_datum(const struct varlena *value) |
| { |
| struct varlena *result; |
| int32 rawsize; |
| |
| /* allocate memory for the uncompressed data */ |
| result = (struct varlena *) palloc(VARDATA_COMPRESSED_GET_EXTSIZE(value) + VARHDRSZ); |
| |
| /* decompress the data */ |
| rawsize = pglz_decompress((char *) value + VARHDRSZ_COMPRESSED, |
| VARSIZE(value) - VARHDRSZ_COMPRESSED, |
| VARDATA(result), |
| VARDATA_COMPRESSED_GET_EXTSIZE(value), true); |
| if (rawsize < 0) |
| ereport(ERROR, |
| (errcode(ERRCODE_DATA_CORRUPTED), |
| errmsg_internal("compressed pglz data is corrupt"))); |
| |
| SET_VARSIZE(result, rawsize + VARHDRSZ); |
| |
| return result; |
| } |
| |
| /* |
| * Decompress part of a varlena that was compressed using PGLZ. |
| */ |
| struct varlena * |
| pglz_decompress_datum_slice(const struct varlena *value, |
| int32 slicelength) |
| { |
| struct varlena *result; |
| int32 rawsize; |
| |
| /* allocate memory for the uncompressed data */ |
| result = (struct varlena *) palloc(slicelength + VARHDRSZ); |
| |
| /* decompress the data */ |
| rawsize = pglz_decompress((char *) value + VARHDRSZ_COMPRESSED, |
| VARSIZE(value) - VARHDRSZ_COMPRESSED, |
| VARDATA(result), |
| slicelength, false); |
| if (rawsize < 0) |
| ereport(ERROR, |
| (errcode(ERRCODE_DATA_CORRUPTED), |
| errmsg_internal("compressed pglz data is corrupt"))); |
| |
| SET_VARSIZE(result, rawsize + VARHDRSZ); |
| |
| return result; |
| } |
| |
| /* |
| * Compress a varlena using LZ4. |
| * |
| * Returns the compressed varlena, or NULL if compression fails. |
| */ |
| struct varlena * |
| lz4_compress_datum(const struct varlena *value) |
| { |
| #ifndef USE_LZ4 |
| NO_LZ4_SUPPORT(); |
| return NULL; /* keep compiler quiet */ |
| #else |
| int32 valsize; |
| int32 len; |
| int32 max_size; |
| struct varlena *tmp = NULL; |
| |
| valsize = VARSIZE_ANY_EXHDR(value); |
| |
| /* |
| * Figure out the maximum possible size of the LZ4 output, add the bytes |
| * that will be needed for varlena overhead, and allocate that amount. |
| */ |
| max_size = LZ4_compressBound(valsize); |
| tmp = (struct varlena *) palloc(max_size + VARHDRSZ_COMPRESSED); |
| |
| len = LZ4_compress_default(VARDATA_ANY(value), |
| (char *) tmp + VARHDRSZ_COMPRESSED, |
| valsize, max_size); |
| if (len <= 0) |
| elog(ERROR, "lz4 compression failed"); |
| |
| /* data is incompressible so just free the memory and return NULL */ |
| if (len > valsize) |
| { |
| pfree(tmp); |
| return NULL; |
| } |
| |
| SET_VARSIZE_COMPRESSED(tmp, len + VARHDRSZ_COMPRESSED); |
| |
| return tmp; |
| #endif |
| } |
| |
| /* |
| * Decompress a varlena that was compressed using LZ4. |
| */ |
| struct varlena * |
| lz4_decompress_datum(const struct varlena *value) |
| { |
| #ifndef USE_LZ4 |
| NO_LZ4_SUPPORT(); |
| return NULL; /* keep compiler quiet */ |
| #else |
| int32 rawsize; |
| struct varlena *result; |
| |
| /* allocate memory for the uncompressed data */ |
| result = (struct varlena *) palloc(VARDATA_COMPRESSED_GET_EXTSIZE(value) + VARHDRSZ); |
| |
| /* decompress the data */ |
| rawsize = LZ4_decompress_safe((char *) value + VARHDRSZ_COMPRESSED, |
| VARDATA(result), |
| VARSIZE(value) - VARHDRSZ_COMPRESSED, |
| VARDATA_COMPRESSED_GET_EXTSIZE(value)); |
| if (rawsize < 0) |
| ereport(ERROR, |
| (errcode(ERRCODE_DATA_CORRUPTED), |
| errmsg_internal("compressed lz4 data is corrupt"))); |
| |
| |
| SET_VARSIZE(result, rawsize + VARHDRSZ); |
| |
| return result; |
| #endif |
| } |
| |
| /* |
| * Decompress part of a varlena that was compressed using LZ4. |
| */ |
| struct varlena * |
| lz4_decompress_datum_slice(const struct varlena *value, int32 slicelength) |
| { |
| #ifndef USE_LZ4 |
| NO_LZ4_SUPPORT(); |
| return NULL; /* keep compiler quiet */ |
| #else |
| int32 rawsize; |
| struct varlena *result; |
| |
| /* slice decompression not supported prior to 1.8.3 */ |
| if (LZ4_versionNumber() < 10803) |
| return lz4_decompress_datum(value); |
| |
| /* allocate memory for the uncompressed data */ |
| result = (struct varlena *) palloc(slicelength + VARHDRSZ); |
| |
| /* decompress the data */ |
| rawsize = LZ4_decompress_safe_partial((char *) value + VARHDRSZ_COMPRESSED, |
| VARDATA(result), |
| VARSIZE(value) - VARHDRSZ_COMPRESSED, |
| slicelength, |
| slicelength); |
| if (rawsize < 0) |
| ereport(ERROR, |
| (errcode(ERRCODE_DATA_CORRUPTED), |
| errmsg_internal("compressed lz4 data is corrupt"))); |
| |
| SET_VARSIZE(result, rawsize + VARHDRSZ); |
| |
| return result; |
| #endif |
| } |
| |
| /* |
| * Extract compression ID from a varlena. |
| * |
| * Returns TOAST_INVALID_COMPRESSION_ID if the varlena is not compressed. |
| */ |
| ToastCompressionId |
| toast_get_compression_id(struct varlena *attr) |
| { |
| ToastCompressionId cmid = TOAST_INVALID_COMPRESSION_ID; |
| |
| /* |
| * If it is stored externally then fetch the compression method id from |
| * the external toast pointer. If compressed inline, fetch it from the |
| * toast compression header. |
| */ |
| if (VARATT_IS_EXTERNAL_ONDISK(attr)) |
| { |
| struct varatt_external toast_pointer; |
| |
| VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); |
| |
| if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) |
| cmid = VARATT_EXTERNAL_GET_COMPRESS_METHOD(toast_pointer); |
| } |
| else if (VARATT_IS_COMPRESSED(attr)) |
| cmid = VARDATA_COMPRESSED_GET_COMPRESS_METHOD(attr); |
| |
| return cmid; |
| } |
| |
| /* |
| * CompressionNameToMethod - Get compression method from compression name |
| * |
| * Search in the available built-in methods. If the compression not found |
| * in the built-in methods then return InvalidCompressionMethod. |
| */ |
| char |
| CompressionNameToMethod(const char *compression) |
| { |
| if (strcmp(compression, "pglz") == 0) |
| return TOAST_PGLZ_COMPRESSION; |
| else if (strcmp(compression, "lz4") == 0) |
| { |
| #ifndef USE_LZ4 |
| NO_LZ4_SUPPORT(); |
| #endif |
| return TOAST_LZ4_COMPRESSION; |
| } |
| |
| return InvalidCompressionMethod; |
| } |
| |
| /* |
| * GetCompressionMethodName - Get compression method name |
| */ |
| const char * |
| GetCompressionMethodName(char method) |
| { |
| switch (method) |
| { |
| case TOAST_PGLZ_COMPRESSION: |
| return "pglz"; |
| case TOAST_LZ4_COMPRESSION: |
| return "lz4"; |
| default: |
| elog(ERROR, "invalid compression method %c", method); |
| return NULL; /* keep compiler quiet */ |
| } |
| } |