From abd1f1576a2775d1a8cc27162f58e9c630bd85fe Mon Sep 17 00:00:00 2001 From: Matthieu Suiche Date: Thu, 10 Jul 2008 09:31:43 +0000 Subject: [PATCH] lib/compression: Import of lzxpress decompression algorithm --- source/lib/basic.mk | 2 +- source/lib/compression/lzxpress.c | 370 +++++++++++++++++++++++++++++++++++++ source/lib/compression/lzxpress.h | 48 +++++ 3 files changed, 419 insertions(+), 1 deletions(-) create mode 100644 source/lib/compression/lzxpress.c create mode 100644 source/lib/compression/lzxpress.h diff --git a/source/lib/basic.mk b/source/lib/basic.mk index b86df5d..0c68fda 100644 --- a/source/lib/basic.mk +++ b/source/lib/basic.mk @@ -1,6 +1,6 @@ [SUBSYSTEM::LIBCOMPRESSION] -LIBCOMPRESSION_OBJ_FILES = $(libcompressionsrcdir)/mszip.o +LIBCOMPRESSION_OBJ_FILES = $(libcompressionsrcdir)/mszip.o $(libcompressionsrcdir)/lzxpress.o [SUBSYSTEM::GENCACHE] PRIVATE_DEPENDENCIES = TDB_WRAP diff --git a/source/lib/compression/lzxpress.c b/source/lib/compression/lzxpress.c new file mode 100644 index 0000000..fb79f8e --- /dev/null +++ b/source/lib/compression/lzxpress.c @@ -0,0 +1,370 @@ +/* + * Copyright (C) Matthieu Suiche 2008 + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the author nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#include "includes.h" +#include "replace.h" +#include "lzxpress.h" + + +#define __BUF_POS_CONST(buf,ofs)(((const uint8_t *)buf)+(ofs)) +#define __PULL_BYTE(buf,ofs) \ + ((uint8_t)((*__BUF_POS_CONST(buf,ofs)) & 0xFF)) + +#ifndef PULL_UINT8 +#define PULL_UINT8(buf,ofs) ((uint8_t)( \ + ((uint8_t)(((uint8_t)(__PULL_BYTE(buf,(ofs)+0))) << 0)) \ +)) +#endif + + +#ifndef PULL_LE_UINT16 +#define PULL_LE_UINT16(buf,ofs) ((uint16_t)( \ + ((uint16_t)(((uint16_t)(__PULL_BYTE(buf,(ofs)+0))) << 0)) | \ + ((uint16_t)(((uint16_t)(__PULL_BYTE(buf,(ofs)+1))) << 8)) \ +)) +#endif + +#ifndef PULL_LE_UINT32 +#define PULL_LE_UINT32(buf,ofs) ((uint32_t)( \ + ((uint32_t)(((uint32_t)(__PULL_BYTE(buf,(ofs)+0))) << 0)) | \ + ((uint32_t)(((uint32_t)(__PULL_BYTE(buf,(ofs)+1))) << 8)) | \ + ((uint32_t)(((uint32_t)(__PULL_BYTE(buf,(ofs)+2))) << 16)) | \ + ((uint32_t)(((uint32_t)(__PULL_BYTE(buf,(ofs)+3))) << 24)) \ +)) +#endif + +#define PULL_INT8(buf,ofs)((int8_t)PULL_UINT8(buf,ofs)) +#define PULL_LE_INT16(buf,ofs)((int16_t)PULL_LE_UINT16(buf,ofs)) +#define PULL_LE_INT32(buf,ofs)((int32_t)PULL_LE_UINT32(buf,ofs)) + +static uint32_t xpress_write_metadata(uint32_t size, + uint32_t offset, + uint8_t *output, + uint32_t pos, + uint32_t nibble) +{ + uint32_t written_bytes; + uint16_t metadata; + uint32_t len; + uint16_t *output2; + + output2 = (uint16_t *)&output[pos]; + len = size; + len -= 3; + offset--; + + if (len < 7) { + /* Classical meta-data */ + metadata = (uint16_t)((offset << 3) | len); + output2[0] = metadata; + written_bytes = sizeof(uint16_t); + } else { + metadata = (uint16_t)((offset << 3) | 7); + output2[0] = metadata; + written_bytes = sizeof(uint16_t); + + if ((len + 3) < (15 + 7 + 3)) { + /* Shared byte */ + if (!nibble) { + output[pos + written_bytes] |= (uint8_t)((len - 7) & 0xFF); + written_bytes += sizeof(uint8_t); + } else { + output[nibble] |= (uint8_t)((len - 7) & 0xFF) << 4; + } + } else if ((len + 3) < (3 + 7 + 15 + 255)) { + /* Shared byte */ + if (!nibble) { + output[pos + written_bytes] |= 15; + written_bytes += sizeof(uint8_t); + } else { + output[nibble] |= 15 << 4; + } + + /* Additionnal len */ + if (!nibble) { + output[pos + written_bytes] = (uint8_t)((len - (7 + 15)) & 0xFF); + } else { + output[pos + written_bytes] = (uint8_t)((len - (7 + 15)) & 0xFF); + } + + written_bytes += sizeof(uint8_t); + + } else { + /* Shared byte */ + if (!nibble) { + output[pos + written_bytes] |= 15; + written_bytes += sizeof(uint8_t); + } else { + output[nibble] |= 15 << 4; + } + + /* Additionnal len */ + if (!nibble) { + output[pos + written_bytes] = 255; + } else { + output[pos + written_bytes] = 255; + } + + written_bytes += sizeof(uint8_t); + + output2[2] = (uint16_t)(len & 0xFFFF); + written_bytes += sizeof(uint16_t); + } + + } + return written_bytes; +} + +static void xpress_write_indicator(uint8_t **offset, + uint32_t *indicator, + uint8_t *output, + uint32_t *position) +{ + *(uint32_t *)(*offset) = *indicator; + *indicator = 0; + *offset = &output[*position]; + *position += sizeof(uint32_t); +} + +/* + Exchange team decide to compress identitical string with a minimum of 3 bytes matches. + Moreover, if a series of identical bytes is present, it detects it correctly. +*/ +static int xpress_compress(const uint8_t *in, + uint8_t *out, + uint32_t insize) +{ + uint32_t input_pos, output_pos, bytes_left; + uint32_t max_offset, best_offset; + int32_t offset; + uint32_t max_length, length, best_length; + uint8_t *string1, *string2; + uint32_t indicator; + uint8_t *indicator_pos; + uint32_t indicator_bit, nibble_index; + uint32_t metadata_size; + + if (!insize) return false; + + input_pos = 0; + indicator = 0; + output_pos = sizeof(uint32_t); + indicator_pos = &out[0]; + + bytes_left = insize; + indicator_bit = 0; + nibble_index = 0; + + do { + if (input_pos > XPRESS_BLOCK_SIZE) { + max_offset = XPRESS_BLOCK_SIZE; + } else { + max_offset = input_pos; + } + + string1 = &in[input_pos]; + + best_length = 2; + best_offset = 0; + + for (offset = 1; offset <= max_offset; ++offset) { + string2 = &string1[-offset]; + + if ((string1[0] == string2[0]) && + (string1[best_length] == string2[best_length])) { + max_length = (bytes_left < offset) ? bytes_left : offset; + if (offset == 1) { + if ((string1[0] == string1[1]) && (string1[0] == string1[2])) { + for (length = 0; (length < bytes_left) && (string1[0] == string1[length]); ++length); + + if (length > best_length) { + best_length = length; + best_offset = 1; + } + } + } + + for (length = 0; (length < max_length) && (string1[length] == string2[length]); ++length); + + if (length > best_length) { + best_length = length; + best_offset = offset; + } + } + } + + if ((best_length >= 3) && (best_offset <= 0x1FFF) && (best_length < bytes_left)) { + metadata_size = xpress_write_metadata(best_length, best_offset, out, output_pos, nibble_index); + + indicator |= 1 << (32 - ((indicator_bit % 32) + 1)); + + if (best_length > 10) { + if (nibble_index == 0) { + nibble_index = output_pos + sizeof(uint16_t); + } else { + nibble_index = 0; + } + } + + output_pos += metadata_size; + + input_pos += best_length; + bytes_left -= best_length; + } else { + out[output_pos++] = in[input_pos++]; + bytes_left--; + } + + indicator_bit++; + + if (((indicator_bit - 1) % 32) > (indicator_bit % 32)) { + xpress_write_indicator(&indicator_pos, + &indicator, + out, + &output_pos); + } + } while (bytes_left > 3); + + do { + out[output_pos++] = in[input_pos]; + indicator_bit++; + input_pos++; + } while (input_pos < insize); + + if ((indicator_bit % 32) > 0) { + for (indicator_bit; (indicator_bit % 32) != 0; indicator_bit++) { + indicator |= 1 << (32 - ((indicator_bit % 32) + 1)); + } + xpress_write_indicator(&indicator_pos, + &indicator, + out, + &output_pos); + } + + return output_pos; +} + +uint32_t lzxpress_compress(const uint8_t *in_buf, + uint32_t in_len, + uint8_t *out_buf, + uint32_t out_len) +{ + return xpress_compress(in_buf, out_buf, out_len); +} + +static uint32_t xpress_decompress(uint8_t *input, + uint32_t input_size, + uint8_t *output, + uint32_t output_size) +{ + uint32_t output_index, input_index; + uint32_t indicator, indicator_bit; + uint32_t length; + uint32_t offset; + uint32_t nibble_index; + + output_index = 0; + input_index = 0; + indicator = 0; + indicator_bit = 0; + length = 0; + offset = 0; + nibble_index = 0; + + do { + if (indicator_bit == 0) { + indicator = PULL_LE_UINT32(input, input_index); + input_index += sizeof(uint32_t); + indicator_bit = 32; + } + indicator_bit--; + + /* + * check whether the bit specified by indicator_bit is set or not + * set in indicator. For example, if indicator_bit has value 4 + * check whether the 4th bit of the value in indicator is set + */ + if (((indicator >> indicator_bit) & 1) == 0) { + output[output_index] = input[input_index]; + input_index += sizeof(uint8_t); + output_index += sizeof(uint8_t); + } else { + length = PULL_LE_UINT16(input, input_index); + input_index += sizeof(uint16_t); + offset = length / 8; + length = length % 8; + + if (length == 7) { + if (nibble_index == 0) { + nibble_index = input_index; + length = input[input_index] % 16; + input_index += sizeof(uint8_t); + } else { + length = input[nibble_index] / 16; + nibble_index = 0; + } + + if (length == 15) { + length = input[input_index]; + input_index += sizeof(uint8_t); + if (length == 255) { + length = PULL_LE_UINT16(input, input_index); + input_index += sizeof(uint16_t); + length -= (15 + 7); + } + length += 15; + } + length += 7; + } + + length += 3; + + do { + if (output_index >= output_size) break; + output[output_index] = output[output_index - offset - 1]; + output_index += sizeof(uint8_t); + length -= sizeof(uint8_t); + } while (length != 0); + } + + } while ((output_index < output_size) && (input_index < input_size)); + + return output_index; +} + +uint32_t lzxpress_decompress(DATA_BLOB *inbuf, + DATA_BLOB *outbuf) +{ + return xpress_decompress(inbuf->data, inbuf->length, outbuf->data, outbuf->length); +} diff --git a/source/lib/compression/lzxpress.h b/source/lib/compression/lzxpress.h new file mode 100644 index 0000000..15e8444 --- /dev/null +++ b/source/lib/compression/lzxpress.h @@ -0,0 +1,48 @@ +/* + * Copyright (C) Matthieu Suiche 2008 + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the author nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#ifndef _LZXPRESS_H +#define _LZXPRESS_H + +#define XPRESS_BLOCK_SIZE 0x10000 + +uint32_t lzxpress_compress(const uint8_t *in_buf, + uint32_t in_len, + uint8_t *out_buf, + uint32_t out_len); + +uint32_t lzxpress_decompress(DATA_BLOB *inbuf, + DATA_BLOB *outbuf); + +#endif /* _LZXPRESS_H */ -- 1.5.6 From 8f68773f53b1c461ef178190511dfb0b57958e2d Mon Sep 17 00:00:00 2001 From: Matthieu Suiche Date: Thu, 10 Jul 2008 09:31:43 +0000 Subject: [PATCH] librpc/ndr: add support for XPRESS decompression --- source/librpc/ndr/ndr_compression.c | 61 ++++++++++++++++++++++++++++++++--- 1 files changed, 56 insertions(+), 5 deletions(-) diff --git a/source/librpc/ndr/ndr_compression.c b/source/librpc/ndr/ndr_compression.c index 86a5a25..ffa37f6 100644 --- a/source/librpc/ndr/ndr_compression.c +++ b/source/librpc/ndr/ndr_compression.c @@ -4,6 +4,7 @@ libndr compression support Copyright (C) Stefan Metzmacher 2005 + Copyright (C) Matthieu Suiche 2008 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -21,6 +22,7 @@ #include "includes.h" #include "lib/compression/mszip.h" +#include "lib/compression/lzxpress.h" #include "librpc/ndr/libndr.h" #include "librpc/ndr/ndr_compression.h" @@ -158,12 +160,12 @@ static enum ndr_err_code ndr_pull_compression_xpress_chunk(struct ndr_pull *ndrp bool *last) { DATA_BLOB comp_chunk; + DATA_BLOB plain_chunk; uint32_t comp_chunk_offset; + uint32_t plain_chunk_offset; uint32_t comp_chunk_size; uint32_t plain_chunk_size; - comp_chunk_offset = ndrpull->offset; - NDR_CHECK(ndr_pull_uint32(ndrpull, NDR_SCALARS, &plain_chunk_size)); if (plain_chunk_size > 0x00010000) { return ndr_pull_error(ndrpull, NDR_ERR_COMPRESSION, "Bad XPRESS plain chunk size %08X > 0x00010000 (PULL)", @@ -172,15 +174,21 @@ static enum ndr_err_code ndr_pull_compression_xpress_chunk(struct ndr_pull *ndrp NDR_CHECK(ndr_pull_uint32(ndrpull, NDR_SCALARS, &comp_chunk_size)); + comp_chunk_offset = ndrpull->offset; NDR_CHECK(ndr_pull_advance(ndrpull, comp_chunk_size)); - comp_chunk.length = comp_chunk_size + 8; + comp_chunk.length = comp_chunk_size; comp_chunk.data = ndrpull->data + comp_chunk_offset; + plain_chunk_offset = ndrpush->offset; + NDR_CHECK(ndr_push_zero(ndrpush, plain_chunk_size)); + plain_chunk.length = plain_chunk_size; + plain_chunk.data = ndrpush->data + plain_chunk_offset; + DEBUG(10,("XPRESS plain_chunk_size: %08X (%u) comp_chunk_size: %08X (%u)\n", plain_chunk_size, plain_chunk_size, comp_chunk_size, comp_chunk_size)); - /* For now, we just copy over the compressed blob */ - NDR_CHECK(ndr_push_bytes(ndrpush, comp_chunk.data, comp_chunk.length)); + /* Uncompressing the buffer using LZ Xpress algorithm */ + lzxpress_decompress(&comp_chunk, &plain_chunk); if ((plain_chunk_size < 0x00010000) || (ndrpull->offset+4 >= ndrpull->data_size)) { /* this is the last chunk */ @@ -197,6 +205,10 @@ static enum ndr_err_code ndr_pull_compression_xpress(struct ndr_pull *subndr, struct ndr_push *ndrpush; struct ndr_pull *comndr; DATA_BLOB uncompressed; + uint32_t payload_header[4]; + uint32_t payload_size; + uint32_t payload_offset; + uint8_t *payload; bool last = false; ndrpush = ndr_push_init_ctx(subndr, subndr->iconv_convenience); @@ -207,6 +219,13 @@ static enum ndr_err_code ndr_pull_compression_xpress(struct ndr_pull *subndr, } uncompressed = ndr_push_blob(ndrpush); + if (uncompressed.length != decompressed_len) { + return ndr_pull_error(subndr, NDR_ERR_COMPRESSION, + "Bad XPRESS uncompressed_len [%u] != [%u](0x%08X) (PULL)", + (int)uncompressed.length, + (int)decompressed_len, + (int)decompressed_len); + } comndr = talloc_zero(subndr, struct ndr_pull); NDR_ERR_HAVE_NO_MEMORY(comndr); @@ -219,6 +238,38 @@ static enum ndr_err_code ndr_pull_compression_xpress(struct ndr_pull *subndr, comndr->iconv_convenience = talloc_reference(comndr, subndr->iconv_convenience); + NDR_CHECK(ndr_pull_uint32(comndr, NDR_SCALARS, &payload_header[0])); + NDR_CHECK(ndr_pull_uint32(comndr, NDR_SCALARS, &payload_header[1])); + NDR_CHECK(ndr_pull_uint32(comndr, NDR_SCALARS, &payload_header[2])); + NDR_CHECK(ndr_pull_uint32(comndr, NDR_SCALARS, &payload_header[3])); + + if (payload_header[0] != 0x00081001) { + return ndr_pull_error(subndr, NDR_ERR_COMPRESSION, + "Bad XPRESS payload_header[0] [0x%08X] != [0x00081001] (PULL)", + payload_header[0]); + } + if (payload_header[1] != 0xCCCCCCCC) { + return ndr_pull_error(subndr, NDR_ERR_COMPRESSION, + "Bad XPRESS payload_header[1] [0x%08X] != [0xCCCCCCCC] (PULL)", + payload_header[1]); + } + + payload_size = payload_header[2]; + + if (payload_header[3] != 0x00000000) { + return ndr_pull_error(subndr, NDR_ERR_COMPRESSION, + "Bad XPRESS payload_header[3] [0x%08X] != [0x00000000] (PULL)", + payload_header[3]); + } + + payload_offset = comndr->offset; + NDR_CHECK(ndr_pull_advance(comndr, payload_size)); + payload = comndr->data + payload_offset; + + comndr->data = payload; + comndr->data_size = payload_size; + comndr->offset = 0; + *_comndr = comndr; return NDR_ERR_SUCCESS; } -- 1.5.6