Thread: [lc-devel] [RFC] LZO de/compression support - take 6
Status: Beta
Brought to you by:
nitin_sf
From: Nitin G. <nit...@gm...> - 2007-05-28 14:34:57
Attachments:
patch_lzo_2.6.22-rc3.bz2
|
Hi, This is kernel port of LZO1X-1 compressor and LZO1X decompressor (safe version only). * Changes since 'take 5' (Full Changelog after this): - Added compressor and decomrpesssor as separate and hidden config options (default: n) - Cleanups: removed LZO_CHECK_MPOS_NON_DET macro - removed PTR* macros. * Benchmarks: (system: P4 3GHz, 1G RAM) (Using tester program from Daniel) Following compares this kernel port ('take 6') vs original miniLZO code: 'TinyLZO' refers to this kernel port. 10000 run averages: 'Tiny LZO': Combined: 61.2223 usec Compression: 41.8412 usec Decompression: 19.3811 usec 'miniLZO': Combined: 66.0444 usec Compression: 46.6323 usec Decompression: 19.4121 usec Result: Overall: TinyLZO is 7.3% faster Compressor: TinyLZO is 10.2% faster Decompressor: TinyLZO is 0.15% faster TODO: test 'take 6' port on archs other than x86(_32) * Changelog vs. original LZO: 1) Used standard/kernel defined data types: (this eliminated _huge_ #ifdef chunks) lzo_bytep -> unsigned char * lzo_uint -> size_t lzo_xint -> size_t lzo_uint32p -> u32 * lzo_uintptr_t -> unsigned long 2) Removed everything #ifdef'ed under COPY_DICT (this is not set for LZO1X, so removed corres. parts). 3) Removed code #ifdef'ed for LZO1Y, LZO1Z, other variants. 4) Reformatted the code to match general kernel style. 5) The only code change: (as suggested by Andrey) -#if defined(__LITTLE_ENDIAN) - m_pos = op - 1; - m_pos -= (*(const unsigned short *)ip) >> 2; -#else - m_pos = op - 1; - m_pos -= (ip[0] >> 2) + (ip[1] << 6); -#endif + m_pos = op - 1 - (cpu_to_le16(*(const u16 *)ip) >> 2); (Andrey suggested le16_to_cpu for above but I think it should be cpu_to_le16). *** Need testing on big endian machine *** Similarly: -#if defined(__LITTLE_ENDIAN) - m_pos -= (*(const unsigned short *)ip) >> 2; -#else - m_pos -= (ip[0] >> 2) + (ip[1] << 6); -#endif + m_pos -= cpu_to_le16(*(const u16 *)ip) >> 2; 6) Get rid of LZO_CHECK_MPOS_NON_DET macro and PTR* macros. I think it's now ready for mainline inclusion. include/linux/lzo1x.h | 66 +++++++++++ lib/Kconfig | 6 + lib/Makefile | 2 + lib/lzo1x/Makefile | 2 + lib/lzo1x/lzo1x_compress.c | 259 ++++++++++++++++++++++++++++++++++++++++++ lib/lzo1x/lzo1x_decompress.c | 238 ++++++++++++++++++++++++++++++++++++++ lib/lzo1x/lzo1x_int.h | 85 ++++++++++++++ 7 files changed, 658 insertions(+), 0 deletions(-) Signed-off-by: Nitin Gupta <nitingupta910 [at] gmail [dot] com> --- diff --git a/include/linux/lzo1x.h b/include/linux/lzo1x.h new file mode 100755 index 0000000..11a6f23 --- /dev/null +++ b/include/linux/lzo1x.h @@ -0,0 +1,66 @@ +/* lzo1x.h -- public interface of the LZO1X compression algorithm + + This file is part of the LZO real-time data compression library. + + Copyright (C) 1996-2005 Markus Franz Xaver Johannes Oberhumer + All Rights Reserved. + + The LZO library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + The LZO library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the LZO library; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + + Markus F.X.J. Oberhumer + <ma...@ob...> + http://www.oberhumer.com/opensource/lzo/ + + + This file is modified version of lzo1x.h found in original LZO 2.02 + code. Some additional changes have also been made to make it work + in kernel space. + + Nitin Gupta + <nit...@gm...> + */ + +#ifndef __LZO1X_H +#define __LZO1X_H + +/* LZO return codes */ +#define LZO_E_OK 0 +#define LZO_E_ERROR (-1) +#define LZO_E_OUT_OF_MEMORY (-2) /* [not used right now] */ +#define LZO_E_NOT_COMPRESSIBLE (-3) /* [not used right now] */ +#define LZO_E_INPUT_OVERRUN (-4) +#define LZO_E_OUTPUT_OVERRUN (-5) +#define LZO_E_LOOKBEHIND_OVERRUN (-6) +#define LZO_E_EOF_NOT_FOUND (-7) +#define LZO_E_INPUT_NOT_CONSUMED (-8) +#define LZO_E_NOT_YET_IMPLEMENTED (-9) /* [not used right now] */ + +/* Size of temp buffer (workmem) required by lzo1x_compress */ +#define LZO1X_WORKMEM_SIZE ((size_t) (16384L * sizeof(unsigned char *))) + +/* + * This requires 'workmem' of size LZO1X_WORKMEM_SIZE + */ +int lzo1x_compress(const unsigned char *src, size_t src_len, + unsigned char *dst, size_t *dst_len, + void *workmem); + +/* + * This decompressor will catch all compressed data violations and + * return an error code in this case. + */ +int lzo1x_decompress(const unsigned char *src, size_t src_len, + unsigned char *dst, size_t *dst_len); +#endif diff --git a/lib/Kconfig b/lib/Kconfig index 2e7ae6b..eb95eaa 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -64,6 +64,12 @@ config ZLIB_INFLATE config ZLIB_DEFLATE tristate +config LZO1X_COMPRESS + tristate + +config LZO1X_DECOMPRESS + tristate + # # Generic allocator support is selected if needed # diff --git a/lib/Makefile b/lib/Makefile index c8c8e20..448ae37 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -49,6 +49,8 @@ obj-$(CONFIG_GENERIC_ALLOCATOR) += genalloc.o obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate/ obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/ obj-$(CONFIG_REED_SOLOMON) += reed_solomon/ +obj-$(CONFIG_LZO1X_COMPRESS) += lzo1x/ +obj-$(CONFIG_LZO1X_DECOMPRESS) += lzo1x/ obj-$(CONFIG_TEXTSEARCH) += textsearch.o obj-$(CONFIG_TEXTSEARCH_KMP) += ts_kmp.o diff --git a/lib/lzo1x/Makefile b/lib/lzo1x/Makefile new file mode 100644 index 0000000..7b56a4d --- /dev/null +++ b/lib/lzo1x/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_LZO1X_COMPRESS) += lzo1x_compress.o +obj-$(CONFIG_LZO1X_DECOMPRESS) += lzo1x_decompress.o diff --git a/lib/lzo1x/lzo1x_compress.c b/lib/lzo1x/lzo1x_compress.c new file mode 100755 index 0000000..b525be6 --- /dev/null +++ b/lib/lzo1x/lzo1x_compress.c @@ -0,0 +1,259 @@ +/* lzo1x_compress.c -- LZO1X-1 compression + + This file is part of the LZO real-time data compression library. + + Copyright (C) 1996-2005 Markus Franz Xaver Johannes Oberhumer + All Rights Reserved. + + The LZO library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + The LZO library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the LZO library; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + + Markus F.X.J. Oberhumer + <ma...@ob...> + http://www.oberhumer.com/opensource/lzo/ + + + This file is derived from lzo1x_1.c and lzo1x_c.ch found in original + LZO 2.02 code. Some additional changes have also been made to make + it work in kernel space. + + Nitin Gupta + <nit...@gm...> + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/compiler.h> +#include <linux/lzo1x.h> + +#include "lzo1x_int.h" + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("LZO1X Compression"); + +/* compress a block of data. */ +static noinline unsigned int +lzo1x_compress_worker(const unsigned char *in, size_t in_len, + unsigned char *out, size_t *out_len, + void *workmem) +{ + register const unsigned char *ip; + unsigned char *op; + const unsigned char * const in_end = in + in_len; + const unsigned char * const ip_end = in + in_len - M2_MAX_LEN - 5; + const unsigned char *ii; + const unsigned char ** const dict = (const unsigned char **)workmem; + + op = out; + ip = in; + ii = ip; + + ip += 4; + for (;;) { + register const unsigned char *m_pos; + size_t m_off; + size_t m_len; + size_t dindex; + + DINDEX1(dindex, ip); + m_pos = dict[dindex]; + + if ((m_pos < in) || (m_off = (size_t)(ip - m_pos)) <= 0 + || m_off > M4_MAX_OFFSET) + goto literal; + + if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3]) + goto try_match; + + DINDEX2(dindex, ip); + m_pos = dict[dindex]; + + if ((m_pos < in) || (m_off = (size_t)(ip - m_pos)) <= 0 + || m_off > M4_MAX_OFFSET) + goto literal; + + if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3]) + goto try_match; + + goto literal; + +try_match: + if (*(const unsigned short *)m_pos == + *(const unsigned short *)ip) { + if (likely(m_pos[2] == ip[2])) + goto match; + } + + /* a literal */ +literal: + dict[dindex] = ip; + ++ip; + if (unlikely(ip >= ip_end)) + break; + continue; + + /* a match */ +match: + dict[dindex] = ip; + /* store current literal run */ + if ((size_t)(ip - ii) > 0) { + register size_t t = (size_t)(ip - ii); + if (t <= 3) + op[-2] |= (unsigned char)t; + else if (t <= 18) + *op++ = (unsigned char)(t - 3); + else { + register size_t tt = t - 18; + *op++ = 0; + while (tt > 255) { + tt -= 255; + *op++ = 0; + } + *op++ = (unsigned char)tt; + } + do + *op++ = *ii++; + while (--t > 0); + } + + /* code the match */ + ip += 3; + if (m_pos[3] != *ip++ || m_pos[4] != *ip++ || + m_pos[5] != *ip++ || m_pos[6] != *ip++ || + m_pos[7] != *ip++ || m_pos[8] != *ip++) { + --ip; + m_len = (size_t)(ip - ii); + + if (m_off <= M2_MAX_OFFSET) { + m_off -= 1; + *op++ = (unsigned char)(((m_len - 1) << 5) | + ((m_off & 7) << 2)); + *op++ = (unsigned char)(m_off >> 3); + } + else if (m_off <= M3_MAX_OFFSET) { + m_off -= 1; + *op++ = (unsigned char)(M3_MARKER | + (m_len - 2)); + goto m3_m4_offset; + } else { + m_off -= 0x4000; + *op++ = (unsigned char)(M4_MARKER | + ((m_off & 0x4000) >> 11) | + (m_len - 2)); + goto m3_m4_offset; + } + } else { + const unsigned char *end = in_end; + const unsigned char *m = m_pos + M2_MAX_LEN + 1; + while (ip < end && *m == *ip) + m++, ip++; + m_len = (size_t)(ip - ii); + + if (m_off <= M3_MAX_OFFSET) { + m_off -= 1; + if (m_len <= 33) + *op++ = (unsigned char)(M3_MARKER | + (m_len - 2)); + else { + m_len -= 33; + *op++ = M3_MARKER | 0; + goto m3_m4_len; + } + } else { + m_off -= 0x4000; + if (m_len <= M4_MAX_LEN) + *op++ = (unsigned char)(M4_MARKER | + ((m_off & 0x4000) >> 11) | + (m_len - 2)); + else { + m_len -= M4_MAX_LEN; + *op++ = (unsigned char)(M4_MARKER | + ((m_off & 0x4000) >> 11)); +m3_m4_len: + while (m_len > 255) { + m_len -= 255; + *op++ = 0; + } + *op++ = (unsigned char)(m_len); + } + } + +m3_m4_offset: + *op++ = (unsigned char)((m_off & 63) << 2); + *op++ = (unsigned char)(m_off >> 6); + } + + ii = ip; + if (unlikely(ip >= ip_end)) + break; + } + + *out_len = (size_t)(op - out); + return (size_t)(in_end - ii); +} + + +/* + * This requires buffer (workmem) of size LZO1X_WORKMEM_SIZE + * (exported by lzo1x.h). + */ +int +lzo1x_compress(const unsigned char *in, size_t in_len, + unsigned char *out, size_t *out_len, + void *workmem) +{ + unsigned char *op = out; + size_t t; + + if (!workmem) + return -EINVAL; + + if (unlikely(in_len <= M2_MAX_LEN + 5)) + t = in_len; + else { + t = lzo1x_compress_worker(in, in_len, op, out_len, workmem); + op += *out_len; + } + + if (t > 0) { + const unsigned char *ii = in + in_len - t; + + if (op == out && t <= 238) + *op++ = (unsigned char)(17 + t); + else if (t <= 3) + op[-2] |= (unsigned char)t; + else if (t <= 18) + *op++ = (unsigned char)(t - 3); + else { + size_t tt = t - 18; + *op++ = 0; + while (tt > 255) { + tt -= 255; + *op++ = 0; + } + *op++ = (unsigned char)tt; + } + do + *op++ = *ii++; + while (--t > 0); + } + *op++ = M4_MARKER | 1; + *op++ = 0; + *op++ = 0; + + *out_len = (size_t)(op - out); + return LZO_E_OK; +} + +EXPORT_SYMBOL(lzo1x_compress); diff --git a/lib/lzo1x/lzo1x_decompress.c b/lib/lzo1x/lzo1x_decompress.c new file mode 100755 index 0000000..75ce294 --- /dev/null +++ b/lib/lzo1x/lzo1x_decompress.c @@ -0,0 +1,238 @@ +/* lzo1x_decompress.c -- LZO1X decompression + + This file is part of the LZO real-time data compression library. + + Copyright (C) 1996-2005 Markus Franz Xaver Johannes Oberhumer + All Rights Reserved. + + The LZO library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + The LZO library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the LZO library; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + + Markus F.X.J. Oberhumer + <ma...@ob...> + http://www.oberhumer.com/opensource/lzo/ + + + This file is derived from lzo1x_d1.c and lzo1x_d.ch found in original + LZO 2.02 code. Some additional changes have also been made to make + it work in kernel space. + + Nitin Gupta + <nit...@gm...> + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <asm/byteorder.h> +#include <linux/lzo1x.h> + +#include "lzo1x_int.h" + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("LZO1X Decompression"); + +int +lzo1x_decompress(const unsigned char *in, size_t in_len, + unsigned char *out, size_t *out_len) +{ + register size_t t; + register unsigned char *op = out; + register const unsigned char *ip = in, *m_pos; + const unsigned char * const ip_end = in + in_len; + unsigned char * const op_end = out + *out_len; + *out_len = 0; + + if (*ip > 17) { + t = *ip++ - 17; + if (t < 4) + goto match_next; + NEED_OP(t); + NEED_IP(t + 1); + do + *op++ = *ip++; + while (--t > 0); + goto first_literal_run; + } + + while (TEST_IP) { + t = *ip++; + if (t >= 16) + goto match; + /* a literal run */ + if (t == 0) { + NEED_IP(1); + while (*ip == 0) { + t += 255; + ip++; + NEED_IP(1); + } + t += 15 + *ip++; + } + /* copy literals */ + NEED_OP(t + 3); + NEED_IP(t + 4); + COPY4(op, ip); + op += 4; ip += 4; + if (--t > 0) { + if (t >= 4) { + do { + COPY4(op, ip); + op += 4; ip += 4; t -= 4; + } while (t >= 4); + if (t > 0) + do + *op++ = *ip++; + while (--t > 0); + } + else + do + *op++ = *ip++; + while (--t > 0); + } + +first_literal_run: + t = *ip++; + if (t >= 16) + goto match; + m_pos = op - (1 + M2_MAX_OFFSET); + m_pos -= t >> 2; + m_pos -= *ip++ << 2; + TEST_LB(m_pos); + NEED_OP(3); + *op++ = *m_pos++; + *op++ = *m_pos++; + *op++ = *m_pos; + goto match_done; + + /* handle matches */ + do { +match: + if (t >= 64) { /* a M2 match */ + m_pos = op - 1; + m_pos -= (t >> 2) & 7; + m_pos -= *ip++ << 3; + t = (t >> 5) - 1; + TEST_LB(m_pos); + NEED_OP(t + 3 - 1); + goto copy_match; + } else if (t >= 32) { /* a M3 match */ + t &= 31; + if (t == 0) { + NEED_IP(1); + while (*ip == 0) { + t += 255; + ip++; + NEED_IP(1); + } + t += 31 + *ip++; + } + m_pos = op - 1 - (cpu_to_le16( + *(const unsigned short *)ip) >> 2); + ip += 2; + } else if (t >= 16) { /* a M4 match */ + m_pos = op; + m_pos -= (t & 8) << 11; + t &= 7; + if (t == 0) { + NEED_IP(1); + while (*ip == 0) { + t += 255; + ip++; + NEED_IP(1); + } + t += 7 + *ip++; + } + m_pos -= cpu_to_le16( + *(const unsigned short *)ip) >> 2; + ip += 2; + if (m_pos == op) + goto eof_found; + m_pos -= 0x4000; + } else { /* a M1 match */ + m_pos = op - 1; + m_pos -= t >> 2; + m_pos -= *ip++ << 2; + TEST_LB(m_pos); + NEED_OP(2); + *op++ = *m_pos++; + *op++ = *m_pos; + goto match_done; + } + + /* copy match */ + TEST_LB(m_pos); + NEED_OP(t + 3 - 1); + + if (t >= 2 * 4 - (3 - 1) && (op - m_pos) >= 4) { + COPY4(op, m_pos); + op += 4; m_pos += 4; t -= 4 - (3 - 1); + do { + COPY4(op, m_pos); + op += 4; m_pos += 4; t -= 4; + } while (t >= 4); + if (t > 0) + do *op++ = *m_pos++; + while (--t > 0); + } else { +copy_match: + *op++ = *m_pos++; + *op++ = *m_pos++; + do + *op++ = *m_pos++; + while (--t > 0); + } + +match_done: + t = ip[-2] & 3; + if (t == 0) + break; + + /* copy literals */ +match_next: + NEED_OP(t); + NEED_IP(t + 1); + *op++ = *ip++; + if (t > 1) { + *op++ = *ip++; + if (t > 2) + *op++ = *ip++; + } + t = *ip++; + } while (TEST_IP); + } + + /* no EOF code was found */ + *out_len = (size_t)(op - out); + return LZO_E_EOF_NOT_FOUND; + +eof_found: + *out_len = (size_t)(op - out); + return (ip == ip_end ? LZO_E_OK : + (ip < ip_end ? LZO_E_INPUT_NOT_CONSUMED : + LZO_E_INPUT_OVERRUN)); + +input_overrun: + *out_len = (size_t)(op - out); + return LZO_E_INPUT_OVERRUN; + +output_overrun: + *out_len = (size_t)(op - out); + return LZO_E_OUTPUT_OVERRUN; + +lookbehind_overrun: + *out_len = (size_t)(op - out); + return LZO_E_LOOKBEHIND_OVERRUN; +} + +EXPORT_SYMBOL(lzo1x_decompress); diff --git a/lib/lzo1x/lzo1x_int.h b/lib/lzo1x/lzo1x_int.h new file mode 100755 index 0000000..4f0fe8d --- /dev/null +++ b/lib/lzo1x/lzo1x_int.h @@ -0,0 +1,85 @@ +/* lzo1x_int.h -- to be used internally by LZO de/compression algorithms + + This file is part of the LZO real-time data compression library. + + Copyright (C) 1996-2005 Markus Franz Xaver Johannes Oberhumer + All Rights Reserved. + + The LZO library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + The LZO library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the LZO library; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + + Markus F.X.J. Oberhumer + <ma...@ob...> + http://www.oberhumer.com/opensource/lzo/ + + + This file was derived from several header files found in original + LZO 2.02 code. Some additional changes have also been made to make + it work in kernel space. + + Nitin Gupta + <nit...@gm...> + */ + +#ifndef __LZO1X_INT_H +#define __LZO1X_INT_H + +#include <linux/types.h> + +#define D_BITS 14 +#define D_SIZE (1u << D_BITS) +#define D_MASK (D_SIZE - 1) +#define D_HIGH ((D_MASK >> 1) + 1) + +#define DX2(p,s1,s2) \ + (((((size_t)((p)[2]) << (s2)) ^ (p)[1]) << (s1)) ^ (p)[0]) +#define DX3(p,s1,s2,s3) \ + ((DX2((p) + 1, s2, s3) << (s1)) ^ (p)[0]) +#define DINDEX1(d,p) \ + d = ((size_t)(0x21 * DX3(p, 5, 5, 6)) >> 5) & D_MASK +#define DINDEX2(d,p) \ + d = (d & (D_MASK & 0x7ff)) ^ (D_HIGH | 0x1f) + +#define COPY4(dst,src) *(u32 *)(dst) = *(u32 *)(src) + +/* LZO1X Specific constants */ +#define M1_MAX_OFFSET 0x0400 +#define M2_MAX_OFFSET 0x0800 +#define M3_MAX_OFFSET 0x4000 +#define M4_MAX_OFFSET 0xbfff + +#define M1_MIN_LEN 2 +#define M1_MAX_LEN 2 +#define M2_MIN_LEN 3 +#define M2_MAX_LEN 8 +#define M3_MIN_LEN 3 +#define M3_MAX_LEN 33 +#define M4_MIN_LEN 3 +#define M4_MAX_LEN 9 + +#define M1_MARKER 0 +#define M2_MARKER 64 +#define M3_MARKER 32 +#define M4_MARKER 16 + +/* Bounds checking */ +#define TEST_IP (ip < ip_end) +#define NEED_IP(x) \ + if ((size_t)(ip_end - ip) < (size_t)(x)) goto input_overrun +#define NEED_OP(x) \ + if ((size_t)(op_end - op) < (size_t)(x)) goto output_overrun +#define TEST_LB(m_pos) \ + if (m_pos < out || m_pos >= op) goto lookbehind_overrun + +#endif |
From: Nitin G. <nit...@gm...> - 2007-05-28 14:40:41
Attachments:
lzo1x-test-4.tar.bz2
|
Hi, Attached is tester code used for testing. (developed by Daniel Hazelton -- modified slightly to now use 'take 6' version for 'TinyLZO') Cheers, Nitin On 5/28/07, Nitin Gupta <nit...@gm...> wrote: > (Using tester program from Daniel) > > Following compares this kernel port ('take 6') vs original miniLZO code: > > 'TinyLZO' refers to this kernel port. > > 10000 run averages: > 'Tiny LZO': > Combined: 61.2223 usec > Compression: 41.8412 usec > Decompression: 19.3811 usec > 'miniLZO': > Combined: 66.0444 usec > Compression: 46.6323 usec > Decompression: 19.4121 usec > > Result: > Overall: TinyLZO is 7.3% faster > Compressor: TinyLZO is 10.2% faster > Decompressor: TinyLZO is 0.15% faster > |
From: Nitin G. <nit...@gm...> - 2007-05-28 15:07:07
|
On 5/28/07, Daniel Hazelton <dha...@en...> wrote: > On Monday 28 May 2007 10:40:31 Nitin Gupta wrote: > > Hi, > > > > Attached is tester code used for testing. > > (developed by Daniel Hazelton -- modified slightly to now use 'take 6' > > version for 'TinyLZO') > > > > Cheers, > > Nitin > > > > <snip> > I haven't tested with version 6, but after removing the LZO_CHECK_MPOS_NON_DET > macro from the 'take 5' code and replacing the open-coded byte-for-byte > copies with calls to memcpy: > I did memcpy() changes in some initial post (take '2', I think). That caused some _correctness_ issue in de/compressor code -- Bret's test could not succeed on ppc machine. After going back to byte-by-byte copying, his tests were successful. So, it's better not to include such changes now or test on all supported archs if you really want to do so :) > 10000 run averages: > 'Tiny LZO': > Combined: 57.4691 usec > Compression: 39.8837 usec > Decompression: 17.5854 usec > 'miniLZO': > Combined: 64.0484 usec > Compression: 46.0604 usec > Decompression: 17.988 usec > > which means: > Overall TinyLZO is 10.2% faster > TinyLZO compresses 13.4% faster > TinyLZO decompresses 2.23% faster > > -Benchmark run a a Pentium-M 1.73GHz, 1GB Ram > With the speed-up seen with just the removal of the LZO_CHECK_MPOS_NON_DET I > wasn't sure that changing the open-coded copy to a call to memcpy() was going > to have a big impact on the code, but it does appear to have has several > percentage points of difference. Yes, memcpy() changes have potential of giving significant perf gain but I am not too sure if memcpy() will be good if we want to copy just few bytes (which is the case at many times in de/compressor). Also, at some places, memcpy() changes are not trival and have actually caused correctness issues as mentioned above. So, before this change, it will be good if it gets merged in mainline and tested, at least for correctness, on all supported achs. All the while, we will have a good feeling that there is still a good scope for perf improvement :) Cheers, Nitin |
From: Nitin G. <nit...@gm...> - 2007-05-28 15:48:42
|
On 5/28/07, Adrian Bunk <bu...@st...> wrote: > On Mon, May 28, 2007 at 08:10:31PM +0530, Nitin Gupta wrote: > > Hi, > > > > Attached is tester code used for testing. > > (developed by Daniel Hazelton -- modified slightly to now use 'take 6' > > version for 'TinyLZO') > > > > Cheers, > > Nitin > > > > On 5/28/07, Nitin Gupta <nit...@gm...> wrote: > >> (Using tester program from Daniel) > >> > >> Following compares this kernel port ('take 6') vs original miniLZO code: > >> > >> 'TinyLZO' refers to this kernel port. > >> > >> 10000 run averages: > >> 'Tiny LZO': > >> Combined: 61.2223 usec > >> Compression: 41.8412 usec > >> Decompression: 19.3811 usec > >> 'miniLZO': > >> Combined: 66.0444 usec > >> Compression: 46.6323 usec > >> Decompression: 19.4121 usec > >> > >> Result: > >> Overall: TinyLZO is 7.3% faster > >> Compressor: TinyLZO is 10.2% faster > >> Decompressor: TinyLZO is 0.15% faster > > So your the compressor of your version runs 10.2% faster than the > original version. > > That's a huge difference. > > Why exactly is it that much faster? > > cu > Adrian I am not sure on how to account for this _big_ perf. gain but from benchmarks I see that whenever I remove unncessary casting from tight loops I get perf. gain of 1-2%. For e.g. open coding LZO_CHECK_MPOS_NON_DET macro with all unnecessary casting removed, gave perf. gain of ~2%. Similarly, I found many other places where such casting was unnecessary. These changes have been tested on x86, amd64, ppc. Testing of 'take 6' version is yet to be done - this will confirm that I didn't reintroduce some error. - Nitin |
From: Nitin G. <nit...@gm...> - 2007-05-28 16:03:41
|
On 5/28/07, Adrian Bunk <bu...@st...> wrote: > On Mon, May 28, 2007 at 08:36:44PM +0530, Nitin Gupta wrote: > >... > > So, before this change, it will be good if it gets merged in mainline > > and tested, at least for correctness, on all supported achs. All the > > while, we will have a good feeling that there is still a good scope > > for perf improvement :) > > The correct order is: > - create one version with all the optimizations you have in mind Already done. One more optimization is regarding use of memcpy() in place of COPY4() macros and open byte-by-byte copying. There are some places where it's very hard to get it correct without adding additional checks on various values which casues futher overhead by iteslf - even then I could not get them correct so I decided not to go with this particular optimization by myself. > - then ensure that it works correctly on all architectures and Already tested on x86, amd64, ppc (by Bret). I do not have machines from other archs available. Bret tested 'take 3' version but no changes were introduced in further revisions that could affect correctness - but still it will be good to have this version tested too. Only with inclusion in -mm and testing by much wider user base can make it to mainline (I suppose nobody uses -mm for production use anyway). > document why your version is that much faster than the original > version and why you know your optimizations have no side effects Replied in earlier mail regarding this. > - then get it tested in -mm > This is what I am looking for :) > After these steps, you can start considering getting it into mainline. > - Nitin |
From: Nitin G. <nit...@gm...> - 2007-05-29 05:48:32
|
On 5/29/07, Bret Towe <ma...@gm...> wrote: > On 5/28/07, Nitin Gupta <nit...@gm...> wrote: > > Hi, > > > > Attached is tester code used for testing. > > (developed by Daniel Hazelton -- modified slightly to now use 'take 6' > > version for 'TinyLZO') > > > > Cheers, > > Nitin > > > > On 5/28/07, Nitin Gupta <nit...@gm...> wrote: > > > (Using tester program from Daniel) > > > > > > Following compares this kernel port ('take 6') vs original miniLZO code: > > > > > > 'TinyLZO' refers to this kernel port. > > > > > > 10000 run averages: > > > 'Tiny LZO': > > > Combined: 61.2223 usec > > > Compression: 41.8412 usec > > > Decompression: 19.3811 usec > > > 'miniLZO': > > > Combined: 66.0444 usec > > > Compression: 46.6323 usec > > > Decompression: 19.4121 usec > > > > > > Result: > > > Overall: TinyLZO is 7.3% faster > > > Compressor: TinyLZO is 10.2% faster > > > Decompressor: TinyLZO is 0.15% faster > > > > > > > > > 10000 run averages: > 'Tiny LZO': > Combined: 112.6642 usec > Compression: 56.3321 usec > Decompression: 56.3321 usec > 'miniLZO': > Combined: 77.6642 usec > Compression: 56.3416 usec > Decompression: 21.3226 usec > > now the interesting bit I thought was the following > root@mini:/usr/src/lzo1x-test-4# ./fulltest > [test_lzo.c::compress (93)] run took 42 microseconds > [test_lzo.c::decompress (127)] run took 20 microseconds > root@mini:/usr/src/lzo1x-test-4# ./tinytest > [test.c::compress (91)] run took 44 microseconds > [test.c::decompress (117)] BUG: lzo1x_decompress has failed ( t == -6 ) > [test.c::main (149)] BUG: Decompression routine failure > Did you use x86 for above test? Maybe some problem with testing script? What data did you use for this test? - Nitin |
From: Nitin G. <nit...@gm...> - 2007-05-29 05:55:54
|
On 5/28/07, Adrian Bunk <bu...@st...> wrote: > On Mon, May 28, 2007 at 09:33:32PM +0530, Nitin Gupta wrote: > > On 5/28/07, Adrian Bunk <bu...@st...> wrote: <snip> > > I have not seen any explanations: > - Why did the upstream author write the code that way? > - Why are your changes correct? > - Why do your changes allow the compiler to produce faster code? > > The upstream author of the code is available - and he might be able to > help you getting answers. No matter whether your changes are incorrect > or correct optimizations that should go upstream, in both cases > discussing these issues with upstream is the best solution. The changelog I posted along with patch mentions all the changes I made. I thought we will find all problems with this changelog in hand and considering that its just ~500 LOC. But still, ok, asking author himself will be good if he replies. I will mail him detailed changelog and seek his feedback on this. This should answer all of your questions. > > And testing is nice, but if you broke some case that's outside of your > tests you'll never notice. > Yes. We cannot come up with exhaustive set of test cases to cover all cases. But assuming that _original_ version is right and taking the chagelog we should be able to verify if the porting is correct. - Nitin |
From: Nitin G. <nit...@gm...> - 2007-05-29 05:58:57
|
On 5/29/07, Bret Towe <ma...@gm...> wrote: > > tested this on ppc and its still good > is there any reason to bother with a test on amd64? > if there is I might be able to get to it tonight > Yes, this test is desired on 'take 6' (In future I will append version to patch bz2) Thanks, Nitin |
From: Nitin G. <nit...@gm...> - 2007-05-29 12:03:56
|
On 5/29/07, Adrian Bunk <bu...@st...> wrote: > On Tue, May 29, 2007 at 09:08:27AM +0100, Michael-Luke Jones wrote: > > On 28 May 2007, at 18:11, Adrian Bunk wrote: > > > >> I have not seen any explanations: > >> - Why did the upstream author write the code that way? > > > > Apparently due to his requirement for extreme portability. The original > > code was designed to work on everything from 16-bit DOS through CRAY > > supercomputers through Windows, Unices and Linux. > > Sure, this could be the reason in some or all cases. > > The upstream author knows the code best, and discussing such issues with > him will in many cases be a win: > > It could be that there was in some cases no good reason, and the > upstream code that gets used by many other projects could become faster. > > Or there was a good reason that applies also to the in-kernel version > and a change breaks some corner case. > I have mailed the author with detailed changelog - waiting for reply. > > The author has stated on the thread that it's a good idea to remove > > unnecessary ifdefs when porting the code into the kernel, given that the > > portability requirements are obviously no longer needed. > > "remove unnecessary ifdefs" implies "generated code is identical". > > That's quite different from "code is 10% faster". > Daniel made some changes to his testing code and now the perf gain is just 1.6%. - Nitin |
From: Nitin G. <nit...@gm...> - 2007-05-30 05:19:47
|
On 5/30/07, Daniel Hazelton <dha...@en...> wrote: > I just noticed a bug in my testbed/benchmarking code. It's fixed, but I > decided to compare version 6 of the code against the *unsafe* decompressor > again. The results of the three runs I've put it through after changing it to > compare against the unsafe decompressor were startling. `Tiny's` compressor > is still faster - I've seen it be rated up to 3% faster. The decompressor, > OTOH, when compared to the unsafe version (which is the comparison that > started me on this binge of hacking), is more than 7% worse. About 11% slower > on the original test against a C source file, and about 6% slower for random > data. Unsafe vs safe is within 10%. Its okay. > However, looking at the numbers involved, I can't see a reason to keep > the unsafe version around - the percentages look worse than they are - from 1 > to 3 microseconds. Not just numbers. Most of applications cannot afford to use unsafe versions anyway (like fs people). (well, the compressed-cache people might want those extra > usecs - but the difference will never be noticeable anywhere outside the > kernel) > > DRH > compressed cache people require every single percent of that performance. For now, ccaching is not ready for mainline (many things need to be done). So, till then I will keep off the unsafe version. If ever compressed caching is on its way to mainline _then_ I will try and add back the unsafe version. But I see no other project that really cares about unsafe version so it's okay to keep it off. - Nitin |
From: Nitin G. <nit...@gm...> - 2007-05-30 05:55:00
|
On 5/30/07, Jan Engelhardt <je...@li...> wrote: > > On May 28 2007 20:04, Nitin Gupta wrote: > > > > * Changelog vs. original LZO: > > 1) Used standard/kernel defined data types: (this eliminated _huge_ > > #ifdef chunks) > > lzo_bytep -> unsigned char * > > lzo_uint -> size_t > > lzo_xint -> size_t > > Is this safe (as far as compressed LZO stream is concerned) -- > or is it even needed (could it be unsigned int)? > > > - m_pos -= (*(const unsigned short *)ip) >> 2; > > -#else > > - m_pos = op - 1; > > - m_pos -= (ip[0] >> 2) + (ip[1] << 6); > > -#endif > > > > + m_pos = op - 1 - (cpu_to_le16(*(const u16 *)ip) >> 2); > > > > (Andrey suggested le16_to_cpu for above but I think it should be cpu_to_le16). > > *** Need testing on big endian machine *** > > On i386, both cpu_to_le16 and le16_to_cpu do nothing. > On sparc for example, cpu_to_leXX and leXX_to_cpu do 'the same' ;-) > they swap 1234<->4321. > > It is the bytestream (ip) that is reinterpreted as uint16_t. > And I really doubt that the LZO author has a big-endian machine, > given the days of ubiquitous x86. > le16_to_cpu it is. > I just missed the point that leXX_to_cpu() and cpu_to_leXX() are identical on BE machine anyway. But then why you think it should be le_16_cpu() -- how will this make any difference? For your ref (from big_endian.h): #define __cpu_to_le16(x) ((__force __le16)__swab16((x))) #define __le16_to_cpu(x) __swab16((__force __u16)(__le16)(x)) - Nitin |
From: Nitin G. <nit...@gm...> - 2007-05-30 05:32:01
|
On 5/30/07, Adrian Bunk <bu...@st...> wrote: > On Tue, May 29, 2007 at 11:10:05PM +0200, Jan Engelhardt wrote: > > > > On May 28 2007 19:11, Adrian Bunk wrote: > You completely miss the point of my question. > > It's about the performance improvements of the modified code that were > mentioned. > > What you are talking about shouldn't have any effect on the generated > code. > After Daniel refined this testing program, we can see that perf gain is < 2% which can surely be accounted to cleanups like unnecessary castings in tight loops. Again, all the original code has been retained _as-is_. Whatever was changed, has been mentioned in that detailed changelog that I post along with patch. If someone want to review these 500 lines with this changelog in hand, it should not take more than couple of hours. If no-one can see any problem in code by now and considering that it's tested on x86(_32), amd64, ppc and giving somewhat better perf. than original then I believe it is unnecessarily hanging outside of -mm tree. I also contacted author (Markus Oberhumer) regarding above changes but he seems not be responding right now. But still, if it gets into -mm and gets used by various related projects then it should be good enough for mainline also. Cheers, Nitin |
From: Nitin G. <nit...@gm...> - 2007-05-30 10:47:56
|
On 5/30/07, Jan Engelhardt <je...@li...> wrote: > > On May 30 2007 11:24, Nitin Gupta wrote: > >> > >> It is the bytestream (ip) that is reinterpreted as uint16_t. > >> And I really doubt that the LZO author has a big-endian machine, > >> given the days of ubiquitous x86. > > > >> le16_to_cpu it is. > > > > But then why you think it should be > > le_16_cpu() -- how will this make any difference? > > Like I said, we are reinterpreting the byte stream as a uint16_t > (= reading from bytestream to CPU). While writing out an uint16_t > as a bytestream is cpu_to_le16. > I didn't properly understand this. But you seem to be correct - now I ran sparse check: * using cpu_to_le16() lib/lzo1x/lzo1x_decompress.c:141:35: error: incompatible types for operation (>>) lib/lzo1x/lzo1x_decompress.c:141:35: left side has type restricted unsigned short [usertype] [force] <noident> lib/lzo1x/lzo1x_decompress.c:141:35: right side has type int lib/lzo1x/lzo1x_decompress.c:140:20: error: incompatible types for operation (-) lib/lzo1x/lzo1x_decompress.c:140:20: left side has type unsigned char *register [assigned] op lib/lzo1x/lzo1x_decompress.c:140:20: right side has type bad type lib/lzo1x/lzo1x_decompress.c:157:35: error: incompatible types for operation (>>) lib/lzo1x/lzo1x_decompress.c:157:35: left side has type restricted unsigned short [usertype] [force] <noident> lib/lzo1x/lzo1x_decompress.c:157:35: right side has type int lib/lzo1x/lzo1x_decompress.c:156:11: error: incompatible types for operation (-=) * using le16_to_cpu() lib/lzo1x/lzo1x_decompress.c:140:23: warning: cast to restricted type lib/lzo1x/lzo1x_decompress.c:156:14: warning: cast to restricted type But still, how do I get rid of these two warnings? or, do these warning suggest some real problem that still exist in code? - Nitin > > For your ref (from big_endian.h): > > # define __cpu_to_le16(x) ((__force __le16)__swab16((x))) > > # define __le16_to_cpu(x) __swab16((__force __u16)(__le16)(x)) > > > Jan > -- > |
From: Nitin G. <nit...@gm...> - 2007-05-31 12:34:45
|
Hi, FYI... The author (Markus Oberhumer) of LZO provided these comments for this patch: --- I've only briefly looked over it, but it's obvious that your version does not work on architechtures which do not allow unaligned access (arm, mips, ...). As for further quality assurance, your version should generate byte-identical object code to LZO 2.02 when using the same compiler & flags. So you could do some semi-automatic testing by compiling with -ffunction-sections and use "objcopy --only-section .text.XXX" to compare the md5sum of all generated functions. This also works fine with crosscompilers. Finally I'm not too happy that you renamed the functions and #defines like LZO1X_WORKMEM_SIZE - please stay compatible with the official library version. ---- Still a lot to do... - Nitin On 5/28/07, Nitin Gupta <nit...@gm...> wrote: > This is kernel port of LZO1X-1 compressor and LZO1X decompressor (safe > version only). > > * Changes since 'take 5' (Full Changelog after this): > - Added compressor and decomrpesssor as separate and hidden config > options (default: n) > - Cleanups: removed LZO_CHECK_MPOS_NON_DET macro > - removed PTR* macros. > > * Benchmarks: (system: P4 3GHz, 1G RAM) > (Using tester program from Daniel) > > Following compares this kernel port ('take 6') vs original miniLZO code: > > 'TinyLZO' refers to this kernel port. > > 10000 run averages: > 'Tiny LZO': > Combined: 61.2223 usec > Compression: 41.8412 usec > Decompression: 19.3811 usec > 'miniLZO': > Combined: 66.0444 usec > Compression: 46.6323 usec > Decompression: 19.4121 usec > > Result: > Overall: TinyLZO is 7.3% faster > Compressor: TinyLZO is 10.2% faster > Decompressor: TinyLZO is 0.15% faster > > TODO: test 'take 6' port on archs other than x86(_32) > > * Changelog vs. original LZO: > 1) Used standard/kernel defined data types: (this eliminated _huge_ > #ifdef chunks) > lzo_bytep -> unsigned char * > lzo_uint -> size_t > lzo_xint -> size_t > lzo_uint32p -> u32 * > lzo_uintptr_t -> unsigned long > 2) Removed everything #ifdef'ed under COPY_DICT (this is not set for > LZO1X, so removed corres. parts). > 3) Removed code #ifdef'ed for LZO1Y, LZO1Z, other variants. > 4) Reformatted the code to match general kernel style. > 5) The only code change: (as suggested by Andrey) > -#if defined(__LITTLE_ENDIAN) > - m_pos = op - 1; > - m_pos -= (*(const unsigned short *)ip) >> 2; > -#else > - m_pos = op - 1; > - m_pos -= (ip[0] >> 2) + (ip[1] << 6); > -#endif > > + m_pos = op - 1 - (cpu_to_le16(*(const u16 *)ip) >> 2); > > (Andrey suggested le16_to_cpu for above but I think it should be cpu_to_le16). > *** Need testing on big endian machine *** > > Similarly: > -#if defined(__LITTLE_ENDIAN) > - m_pos -= (*(const unsigned short *)ip) >> 2; > -#else > - m_pos -= (ip[0] >> 2) + (ip[1] << 6); > -#endif > > + m_pos -= cpu_to_le16(*(const u16 *)ip) >> 2; > > 6) Get rid of LZO_CHECK_MPOS_NON_DET macro and PTR* macros. > > > I think it's now ready for mainline inclusion. > > > include/linux/lzo1x.h | 66 +++++++++++ > lib/Kconfig | 6 + > lib/Makefile | 2 + > lib/lzo1x/Makefile | 2 + > lib/lzo1x/lzo1x_compress.c | 259 ++++++++++++++++++++++++++++++++++++++++++ > lib/lzo1x/lzo1x_decompress.c | 238 ++++++++++++++++++++++++++++++++++++++ > lib/lzo1x/lzo1x_int.h | 85 ++++++++++++++ > 7 files changed, 658 insertions(+), 0 deletions(-) > > Signed-off-by: Nitin Gupta <nitingupta910 [at] gmail [dot] com> > --- > diff --git a/include/linux/lzo1x.h b/include/linux/lzo1x.h > new file mode 100755 > index 0000000..11a6f23 > --- /dev/null > +++ b/include/linux/lzo1x.h > @@ -0,0 +1,66 @@ > +/* lzo1x.h -- public interface of the LZO1X compression algorithm > + > + This file is part of the LZO real-time data compression library. > + > + Copyright (C) 1996-2005 Markus Franz Xaver Johannes Oberhumer > + All Rights Reserved. > + > + The LZO library is free software; you can redistribute it and/or > + modify it under the terms of the GNU General Public License, > + version 2, as published by the Free Software Foundation. > + > + The LZO library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + GNU General Public License for more details. > + > + You should have received a copy of the GNU General Public License > + along with the LZO library; see the file COPYING. > + If not, write to the Free Software Foundation, Inc., > + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. > + > + Markus F.X.J. Oberhumer > + <ma...@ob...> > + http://www.oberhumer.com/opensource/lzo/ > + > + > + This file is modified version of lzo1x.h found in original LZO 2.02 > + code. Some additional changes have also been made to make it work > + in kernel space. > + > + Nitin Gupta > + <nit...@gm...> > + */ > + > +#ifndef __LZO1X_H > +#define __LZO1X_H > + > +/* LZO return codes */ > +#define LZO_E_OK 0 > +#define LZO_E_ERROR (-1) > +#define LZO_E_OUT_OF_MEMORY (-2) /* [not used right now] */ > +#define LZO_E_NOT_COMPRESSIBLE (-3) /* [not used right now] */ > +#define LZO_E_INPUT_OVERRUN (-4) > +#define LZO_E_OUTPUT_OVERRUN (-5) > +#define LZO_E_LOOKBEHIND_OVERRUN (-6) > +#define LZO_E_EOF_NOT_FOUND (-7) > +#define LZO_E_INPUT_NOT_CONSUMED (-8) > +#define LZO_E_NOT_YET_IMPLEMENTED (-9) /* [not used right now] */ > + > +/* Size of temp buffer (workmem) required by lzo1x_compress */ > +#define LZO1X_WORKMEM_SIZE ((size_t) (16384L * sizeof(unsigned char *))) > + > +/* > + * This requires 'workmem' of size LZO1X_WORKMEM_SIZE > + */ > +int lzo1x_compress(const unsigned char *src, size_t src_len, > + unsigned char *dst, size_t *dst_len, > + void *workmem); > + > +/* > + * This decompressor will catch all compressed data violations and > + * return an error code in this case. > + */ > +int lzo1x_decompress(const unsigned char *src, size_t src_len, > + unsigned char *dst, size_t *dst_len); > +#endif > diff --git a/lib/Kconfig b/lib/Kconfig > index 2e7ae6b..eb95eaa 100644 > --- a/lib/Kconfig > +++ b/lib/Kconfig > @@ -64,6 +64,12 @@ config ZLIB_INFLATE > config ZLIB_DEFLATE > tristate > > +config LZO1X_COMPRESS > + tristate > + > +config LZO1X_DECOMPRESS > + tristate > + > # > # Generic allocator support is selected if needed > # > diff --git a/lib/Makefile b/lib/Makefile > index c8c8e20..448ae37 100644 > --- a/lib/Makefile > +++ b/lib/Makefile > @@ -49,6 +49,8 @@ obj-$(CONFIG_GENERIC_ALLOCATOR) += genalloc.o > obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate/ > obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/ > obj-$(CONFIG_REED_SOLOMON) += reed_solomon/ > +obj-$(CONFIG_LZO1X_COMPRESS) += lzo1x/ > +obj-$(CONFIG_LZO1X_DECOMPRESS) += lzo1x/ > > obj-$(CONFIG_TEXTSEARCH) += textsearch.o > obj-$(CONFIG_TEXTSEARCH_KMP) += ts_kmp.o > diff --git a/lib/lzo1x/Makefile b/lib/lzo1x/Makefile > new file mode 100644 > index 0000000..7b56a4d > --- /dev/null > +++ b/lib/lzo1x/Makefile > @@ -0,0 +1,2 @@ > +obj-$(CONFIG_LZO1X_COMPRESS) += lzo1x_compress.o > +obj-$(CONFIG_LZO1X_DECOMPRESS) += lzo1x_decompress.o > diff --git a/lib/lzo1x/lzo1x_compress.c b/lib/lzo1x/lzo1x_compress.c > new file mode 100755 > index 0000000..b525be6 > --- /dev/null > +++ b/lib/lzo1x/lzo1x_compress.c > @@ -0,0 +1,259 @@ > +/* lzo1x_compress.c -- LZO1X-1 compression > + > + This file is part of the LZO real-time data compression library. > + > + Copyright (C) 1996-2005 Markus Franz Xaver Johannes Oberhumer > + All Rights Reserved. > + > + The LZO library is free software; you can redistribute it and/or > + modify it under the terms of the GNU General Public License, > + version 2, as published by the Free Software Foundation. > + > + The LZO library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + GNU General Public License for more details. > + > + You should have received a copy of the GNU General Public License > + along with the LZO library; see the file COPYING. > + If not, write to the Free Software Foundation, Inc., > + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. > + > + Markus F.X.J. Oberhumer > + <ma...@ob...> > + http://www.oberhumer.com/opensource/lzo/ > + > + > + This file is derived from lzo1x_1.c and lzo1x_c.ch found in original > + LZO 2.02 code. Some additional changes have also been made to make > + it work in kernel space. > + > + Nitin Gupta > + <nit...@gm...> > + */ > + > +#include <linux/kernel.h> > +#include <linux/module.h> > +#include <linux/compiler.h> > +#include <linux/lzo1x.h> > + > +#include "lzo1x_int.h" > + > +MODULE_LICENSE("GPL"); > +MODULE_DESCRIPTION("LZO1X Compression"); > + > +/* compress a block of data. */ > +static noinline unsigned int > +lzo1x_compress_worker(const unsigned char *in, size_t in_len, > + unsigned char *out, size_t *out_len, > + void *workmem) > +{ > + register const unsigned char *ip; > + unsigned char *op; > + const unsigned char * const in_end = in + in_len; > + const unsigned char * const ip_end = in + in_len - M2_MAX_LEN - 5; > + const unsigned char *ii; > + const unsigned char ** const dict = (const unsigned char **)workmem; > + > + op = out; > + ip = in; > + ii = ip; > + > + ip += 4; > + for (;;) { > + register const unsigned char *m_pos; > + size_t m_off; > + size_t m_len; > + size_t dindex; > + > + DINDEX1(dindex, ip); > + m_pos = dict[dindex]; > + > + if ((m_pos < in) || (m_off = (size_t)(ip - m_pos)) <= 0 > + || m_off > M4_MAX_OFFSET) > + goto literal; > + > + if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3]) > + goto try_match; > + > + DINDEX2(dindex, ip); > + m_pos = dict[dindex]; > + > + if ((m_pos < in) || (m_off = (size_t)(ip - m_pos)) <= 0 > + || m_off > M4_MAX_OFFSET) > + goto literal; > + > + if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3]) > + goto try_match; > + > + goto literal; > + > +try_match: > + if (*(const unsigned short *)m_pos == > + *(const unsigned short *)ip) { > + if (likely(m_pos[2] == ip[2])) > + goto match; > + } > + > + /* a literal */ > +literal: > + dict[dindex] = ip; > + ++ip; > + if (unlikely(ip >= ip_end)) > + break; > + continue; > + > + /* a match */ > +match: > + dict[dindex] = ip; > + /* store current literal run */ > + if ((size_t)(ip - ii) > 0) { > + register size_t t = (size_t)(ip - ii); > + if (t <= 3) > + op[-2] |= (unsigned char)t; > + else if (t <= 18) > + *op++ = (unsigned char)(t - 3); > + else { > + register size_t tt = t - 18; > + *op++ = 0; > + while (tt > 255) { > + tt -= 255; > + *op++ = 0; > + } > + *op++ = (unsigned char)tt; > + } > + do > + *op++ = *ii++; > + while (--t > 0); > + } > + > + /* code the match */ > + ip += 3; > + if (m_pos[3] != *ip++ || m_pos[4] != *ip++ || > + m_pos[5] != *ip++ || m_pos[6] != *ip++ || > + m_pos[7] != *ip++ || m_pos[8] != *ip++) { > + --ip; > + m_len = (size_t)(ip - ii); > + > + if (m_off <= M2_MAX_OFFSET) { > + m_off -= 1; > + *op++ = (unsigned char)(((m_len - 1) << 5) | > + ((m_off & 7) << 2)); > + *op++ = (unsigned char)(m_off >> 3); > + } > + else if (m_off <= M3_MAX_OFFSET) { > + m_off -= 1; > + *op++ = (unsigned char)(M3_MARKER | > + (m_len - 2)); > + goto m3_m4_offset; > + } else { > + m_off -= 0x4000; > + *op++ = (unsigned char)(M4_MARKER | > + ((m_off & 0x4000) >> 11) | > + (m_len - 2)); > + goto m3_m4_offset; > + } > + } else { > + const unsigned char *end = in_end; > + const unsigned char *m = m_pos + M2_MAX_LEN + 1; > + while (ip < end && *m == *ip) > + m++, ip++; > + m_len = (size_t)(ip - ii); > + > + if (m_off <= M3_MAX_OFFSET) { > + m_off -= 1; > + if (m_len <= 33) > + *op++ = (unsigned char)(M3_MARKER | > + (m_len - 2)); > + else { > + m_len -= 33; > + *op++ = M3_MARKER | 0; > + goto m3_m4_len; > + } > + } else { > + m_off -= 0x4000; > + if (m_len <= M4_MAX_LEN) > + *op++ = (unsigned char)(M4_MARKER | > + ((m_off & 0x4000) >> 11) | > + (m_len - 2)); > + else { > + m_len -= M4_MAX_LEN; > + *op++ = (unsigned char)(M4_MARKER | > + ((m_off & 0x4000) >> 11)); > +m3_m4_len: > + while (m_len > 255) { > + m_len -= 255; > + *op++ = 0; > + } > + *op++ = (unsigned char)(m_len); > + } > + } > + > +m3_m4_offset: > + *op++ = (unsigned char)((m_off & 63) << 2); > + *op++ = (unsigned char)(m_off >> 6); > + } > + > + ii = ip; > + if (unlikely(ip >= ip_end)) > + break; > + } > + > + *out_len = (size_t)(op - out); > + return (size_t)(in_end - ii); > +} > + > + > +/* > + * This requires buffer (workmem) of size LZO1X_WORKMEM_SIZE > + * (exported by lzo1x.h). > + */ > +int > +lzo1x_compress(const unsigned char *in, size_t in_len, > + unsigned char *out, size_t *out_len, > + void *workmem) > +{ > + unsigned char *op = out; > + size_t t; > + > + if (!workmem) > + return -EINVAL; > + > + if (unlikely(in_len <= M2_MAX_LEN + 5)) > + t = in_len; > + else { > + t = lzo1x_compress_worker(in, in_len, op, out_len, workmem); > + op += *out_len; > + } > + > + if (t > 0) { > + const unsigned char *ii = in + in_len - t; > + > + if (op == out && t <= 238) > + *op++ = (unsigned char)(17 + t); > + else if (t <= 3) > + op[-2] |= (unsigned char)t; > + else if (t <= 18) > + *op++ = (unsigned char)(t - 3); > + else { > + size_t tt = t - 18; > + *op++ = 0; > + while (tt > 255) { > + tt -= 255; > + *op++ = 0; > + } > + *op++ = (unsigned char)tt; > + } > + do > + *op++ = *ii++; > + while (--t > 0); > + } > + *op++ = M4_MARKER | 1; > + *op++ = 0; > + *op++ = 0; > + > + *out_len = (size_t)(op - out); > + return LZO_E_OK; > +} > + > +EXPORT_SYMBOL(lzo1x_compress); > diff --git a/lib/lzo1x/lzo1x_decompress.c b/lib/lzo1x/lzo1x_decompress.c > new file mode 100755 > index 0000000..75ce294 > --- /dev/null > +++ b/lib/lzo1x/lzo1x_decompress.c > @@ -0,0 +1,238 @@ > +/* lzo1x_decompress.c -- LZO1X decompression > + > + This file is part of the LZO real-time data compression library. > + > + Copyright (C) 1996-2005 Markus Franz Xaver Johannes Oberhumer > + All Rights Reserved. > + > + The LZO library is free software; you can redistribute it and/or > + modify it under the terms of the GNU General Public License, > + version 2, as published by the Free Software Foundation. > + > + The LZO library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + GNU General Public License for more details. > + > + You should have received a copy of the GNU General Public License > + along with the LZO library; see the file COPYING. > + If not, write to the Free Software Foundation, Inc., > + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. > + > + Markus F.X.J. Oberhumer > + <ma...@ob...> > + http://www.oberhumer.com/opensource/lzo/ > + > + > + This file is derived from lzo1x_d1.c and lzo1x_d.ch found in original > + LZO 2.02 code. Some additional changes have also been made to make > + it work in kernel space. > + > + Nitin Gupta > + <nit...@gm...> > + */ > + > +#include <linux/kernel.h> > +#include <linux/module.h> > +#include <asm/byteorder.h> > +#include <linux/lzo1x.h> > + > +#include "lzo1x_int.h" > + > +MODULE_LICENSE("GPL"); > +MODULE_DESCRIPTION("LZO1X Decompression"); > + > +int > +lzo1x_decompress(const unsigned char *in, size_t in_len, > + unsigned char *out, size_t *out_len) > +{ > + register size_t t; > + register unsigned char *op = out; > + register const unsigned char *ip = in, *m_pos; > + const unsigned char * const ip_end = in + in_len; > + unsigned char * const op_end = out + *out_len; > + *out_len = 0; > + > + if (*ip > 17) { > + t = *ip++ - 17; > + if (t < 4) > + goto match_next; > + NEED_OP(t); > + NEED_IP(t + 1); > + do > + *op++ = *ip++; > + while (--t > 0); > + goto first_literal_run; > + } > + > + while (TEST_IP) { > + t = *ip++; > + if (t >= 16) > + goto match; > + /* a literal run */ > + if (t == 0) { > + NEED_IP(1); > + while (*ip == 0) { > + t += 255; > + ip++; > + NEED_IP(1); > + } > + t += 15 + *ip++; > + } > + /* copy literals */ > + NEED_OP(t + 3); > + NEED_IP(t + 4); > + COPY4(op, ip); > + op += 4; ip += 4; > + if (--t > 0) { > + if (t >= 4) { > + do { > + COPY4(op, ip); > + op += 4; ip += 4; t -= 4; > + } while (t >= 4); > + if (t > 0) > + do > + *op++ = *ip++; > + while (--t > 0); > + } > + else > + do > + *op++ = *ip++; > + while (--t > 0); > + } > + > +first_literal_run: > + t = *ip++; > + if (t >= 16) > + goto match; > + m_pos = op - (1 + M2_MAX_OFFSET); > + m_pos -= t >> 2; > + m_pos -= *ip++ << 2; > + TEST_LB(m_pos); > + NEED_OP(3); > + *op++ = *m_pos++; > + *op++ = *m_pos++; > + *op++ = *m_pos; > + goto match_done; > + > + /* handle matches */ > + do { > +match: > + if (t >= 64) { /* a M2 match */ > + m_pos = op - 1; > + m_pos -= (t >> 2) & 7; > + m_pos -= *ip++ << 3; > + t = (t >> 5) - 1; > + TEST_LB(m_pos); > + NEED_OP(t + 3 - 1); > + goto copy_match; > + } else if (t >= 32) { /* a M3 match */ > + t &= 31; > + if (t == 0) { > + NEED_IP(1); > + while (*ip == 0) { > + t += 255; > + ip++; > + NEED_IP(1); > + } > + t += 31 + *ip++; > + } > + m_pos = op - 1 - (cpu_to_le16( > + *(const unsigned short *)ip) >> 2); > + ip += 2; > + } else if (t >= 16) { /* a M4 match */ > + m_pos = op; > + m_pos -= (t & 8) << 11; > + t &= 7; > + if (t == 0) { > + NEED_IP(1); > + while (*ip == 0) { > + t += 255; > + ip++; > + NEED_IP(1); > + } > + t += 7 + *ip++; > + } > + m_pos -= cpu_to_le16( > + *(const unsigned short *)ip) >> 2; > + ip += 2; > + if (m_pos == op) > + goto eof_found; > + m_pos -= 0x4000; > + } else { /* a M1 match */ > + m_pos = op - 1; > + m_pos -= t >> 2; > + m_pos -= *ip++ << 2; > + TEST_LB(m_pos); > + NEED_OP(2); > + *op++ = *m_pos++; > + *op++ = *m_pos; > + goto match_done; > + } > + > + /* copy match */ > + TEST_LB(m_pos); > + NEED_OP(t + 3 - 1); > + > + if (t >= 2 * 4 - (3 - 1) && (op - m_pos) >= 4) { > + COPY4(op, m_pos); > + op += 4; m_pos += 4; t -= 4 - (3 - 1); > + do { > + COPY4(op, m_pos); > + op += 4; m_pos += 4; t -= 4; > + } while (t >= 4); > + if (t > 0) > + do *op++ = *m_pos++; > + while (--t > 0); > + } else { > +copy_match: > + *op++ = *m_pos++; > + *op++ = *m_pos++; > + do > + *op++ = *m_pos++; > + while (--t > 0); > + } > + > +match_done: > + t = ip[-2] & 3; > + if (t == 0) > + break; > + > + /* copy literals */ > +match_next: > + NEED_OP(t); > + NEED_IP(t + 1); > + *op++ = *ip++; > + if (t > 1) { > + *op++ = *ip++; > + if (t > 2) > + *op++ = *ip++; > + } > + t = *ip++; > + } while (TEST_IP); > + } > + > + /* no EOF code was found */ > + *out_len = (size_t)(op - out); > + return LZO_E_EOF_NOT_FOUND; > + > +eof_found: > + *out_len = (size_t)(op - out); > + return (ip == ip_end ? LZO_E_OK : > + (ip < ip_end ? LZO_E_INPUT_NOT_CONSUMED : > + LZO_E_INPUT_OVERRUN)); > + > +input_overrun: > + *out_len = (size_t)(op - out); > + return LZO_E_INPUT_OVERRUN; > + > +output_overrun: > + *out_len = (size_t)(op - out); > + return LZO_E_OUTPUT_OVERRUN; > + > +lookbehind_overrun: > + *out_len = (size_t)(op - out); > + return LZO_E_LOOKBEHIND_OVERRUN; > +} > + > +EXPORT_SYMBOL(lzo1x_decompress); > diff --git a/lib/lzo1x/lzo1x_int.h b/lib/lzo1x/lzo1x_int.h > new file mode 100755 > index 0000000..4f0fe8d > --- /dev/null > +++ b/lib/lzo1x/lzo1x_int.h > @@ -0,0 +1,85 @@ > +/* lzo1x_int.h -- to be used internally by LZO de/compression algorithms > + > + This file is part of the LZO real-time data compression library. > + > + Copyright (C) 1996-2005 Markus Franz Xaver Johannes Oberhumer > + All Rights Reserved. > + > + The LZO library is free software; you can redistribute it and/or > + modify it under the terms of the GNU General Public License, > + version 2, as published by the Free Software Foundation. > + > + The LZO library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + GNU General Public License for more details. > + > + You should have received a copy of the GNU General Public License > + along with the LZO library; see the file COPYING. > + If not, write to the Free Software Foundation, Inc., > + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. > + > + Markus F.X.J. Oberhumer > + <ma...@ob...> > + http://www.oberhumer.com/opensource/lzo/ > + > + > + This file was derived from several header files found in original > + LZO 2.02 code. Some additional changes have also been made to make > + it work in kernel space. > + > + Nitin Gupta > + <nit...@gm...> > + */ > + > +#ifndef __LZO1X_INT_H > +#define __LZO1X_INT_H > + > +#include <linux/types.h> > + > +#define D_BITS 14 > +#define D_SIZE (1u << D_BITS) > +#define D_MASK (D_SIZE - 1) > +#define D_HIGH ((D_MASK >> 1) + 1) > + > +#define DX2(p,s1,s2) \ > + (((((size_t)((p)[2]) << (s2)) ^ (p)[1]) << (s1)) ^ (p)[0]) > +#define DX3(p,s1,s2,s3) \ > + ((DX2((p) + 1, s2, s3) << (s1)) ^ (p)[0]) > +#define DINDEX1(d,p) \ > + d = ((size_t)(0x21 * DX3(p, 5, 5, 6)) >> 5) & D_MASK > +#define DINDEX2(d,p) \ > + d = (d & (D_MASK & 0x7ff)) ^ (D_HIGH | 0x1f) > + > +#define COPY4(dst,src) *(u32 *)(dst) = *(u32 *)(src) > + > +/* LZO1X Specific constants */ > +#define M1_MAX_OFFSET 0x0400 > +#define M2_MAX_OFFSET 0x0800 > +#define M3_MAX_OFFSET 0x4000 > +#define M4_MAX_OFFSET 0xbfff > + > +#define M1_MIN_LEN 2 > +#define M1_MAX_LEN 2 > +#define M2_MIN_LEN 3 > +#define M2_MAX_LEN 8 > +#define M3_MIN_LEN 3 > +#define M3_MAX_LEN 33 > +#define M4_MIN_LEN 3 > +#define M4_MAX_LEN 9 > + > +#define M1_MARKER 0 > +#define M2_MARKER 64 > +#define M3_MARKER 32 > +#define M4_MARKER 16 > + > +/* Bounds checking */ > +#define TEST_IP (ip < ip_end) > +#define NEED_IP(x) \ > + if ((size_t)(ip_end - ip) < (size_t)(x)) goto input_overrun > +#define NEED_OP(x) \ > + if ((size_t)(op_end - op) < (size_t)(x)) goto output_overrun > +#define TEST_LB(m_pos) \ > + if (m_pos < out || m_pos >= op) goto lookbehind_overrun > + > +#endif > > |