WEB开发网
开发学院软件开发C++ lzw压缩算法的c语言实现 阅读

lzw压缩算法的c语言实现

 2008-03-08 12:31:28 来源:WEB开发网   
核心提示: 1 程序由五个模块组成,(1)lzw.h定义了一些基本的数据结构,lzw压缩算法的c语言实现,常量,还有变量的初始化等,从理解原理到代码的实现花费了不少时间与精力,但是真正的快乐也就在这里,#ifndef __LZW_H__#define __LZW_H__//--#

1 程序由五个模块组成。

(1) lzw.h   定义了一些基本的数据结构,常量,还有变量的初始化等。

#ifndef __LZW_H__
#define __LZW_H__
//------------------------------------------------------------------------------
#include <stdio.h>
#include <stdlib.h>
#include <windows.h>
#include <memory.h>
//------------------------------------------------------------------------------
#define LZW_BASE  0x102// The code base
#define CODE_LEN    12  // Max code length
#define TABLE_LEN   4099 // It must be PRime number and bigger than 2^CODE_LEN=4096.
    // SUCh as 5051 is also ok.
#define BUFFERSIZE   1024
//------------------------------------------------------------------------------
typedef struct
{
  HANDLE   h_sour; // Source file handle.
  HANDLE   h_dest; // Destination file handle.
  
  HANDLE   h_suffix; // Suffix table handle.
  HANDLE   h_prefix; // Prefix table handle.
  HANDLE   h_code; // Code table handle.
  
  LPWord   lp_prefix; // Prefix table head pointer.
  LPBYTE   lp_suffix; // Suffix table head pointer.
  LPWORD   lp_code; // Code table head pointer.

  WORD    code;
  WORD    prefix;
  BYTE    suffix;

  BYTE    cur_code_len; // Current code length.[ used in Dynamic-Code-Length mode ]

}LZW_DATA,*PLZW_DATA;


typedef struct
{
  WORD    top;
  WORD    index;

  LPBYTE   lp_buffer;
  HANDLE   h_buffer;
  
  BYTE    by_left;
  DWORD    dw_buffer;

  BOOL    end_flag;

}BUFFER_DATA,*PBUFFER_DATA;


typedef struct         //Stack used in decode
{
WORD    index;
HANDLE   h_stack;
LPBYTE   lp_stack;

}STACK_DATA,*PSTACK_DATA;
//------------------------------------------------------------------------------
VOID stack_create( PSTACK_DATA stack )
{
stack->h_stack = GlobalAlloc( GHND , TABLE_LEN*sizeof(BYTE) );
stack->lp_stack = GlobalLock( stack->h_stack );
stack->index = 0;
}
//------------------------------------------------------------------------------
VOID stack_destory( PSTACK_DATA stack )
{
GlobalUnlock( stack->h_stack );
  GlobalFree ( stack->h_stack );
}
//------------------------------------------------------------------------------
VOID buffer_create( PBUFFER_DATA  buffer )
{
  buffer->h_buffer  = GlobalAlloc( GHND, BUFFERSIZE*sizeof(BYTE) );
  buffer->lp_buffer = GlobalLock( buffer->h_buffer );
  buffer->top    = 0;
  buffer->index   = 0;
  buffer->by_left  = 0;
  buffer->dw_buffer = 0;
  buffer->end_flag  = FALSE;
}
//------------------------------------------------------------------------------
VOID buffer_destory( PBUFFER_DATA  buffer )
{
  GlobalUnlock( buffer->h_buffer );
  GlobalFree ( buffer->h_buffer );
}
//------------------------------------------------------------------------------
VOID re_init_lzw( PLZW_DATA lzw )  //When code table reached its top it should
{                  //be reinitialized.   
  memset( lzw->lp_code, 0xFFFF, TABLE_LEN*sizeof(WORD) );
  lzw->code     = LZW_BASE;
  lzw->cur_code_len = 9;
}
//------------------------------------------------------------------------------
VOID lzw_create(PLZW_DATA  lzw,  HANDLE h_sour,  HANDLE h_dest)
{
WORD i;
  lzw->h_code    = GlobalAlloc( GHND, TABLE_LEN*sizeof(WORD) );
  lzw->h_prefix   = GlobalAlloc( GHND, TABLE_LEN*sizeof(WORD) );
  lzw->h_suffix   = GlobalAlloc( GHND, TABLE_LEN*sizeof(BYTE) );
  lzw->lp_code    = GlobalLock( lzw->h_code  );
  lzw->lp_prefix   = GlobalLock( lzw->h_prefix );
  lzw->lp_suffix   = GlobalLock( lzw->h_suffix );
  lzw->code     = LZW_BASE;
  lzw->cur_code_len = 9;
  lzw->h_sour    = h_sour;
  lzw->h_dest    = h_dest;
  memset( lzw->lp_code, 0xFFFF, TABLE_LEN*sizeof(WORD) );

}
//------------------------------------------------------------------------------
VOID lzw_destory(PLZW_DATA  lzw)
{ 
  GlobalUnlock( lzw->h_code  );
  GlobalUnlock( lzw->h_prefix );
  GlobalUnlock( lzw->h_suffix );

GlobalFree( lzw->h_code );
  GlobalFree( lzw->h_prefix );
  GlobalFree( lzw->h_suffix );  
}
//------------------------------------------------------------------------------
#endif

(2) fileio.h  定义了一些文件操作

#ifndef __FILEIO_H__
#define __FILEIO_H__
//------------------------------------------------------------------------------
#include <stdio.h>
#include <stdlib.h>
#include <windows.h>
//------------------------------------------------------------------------------
HANDLE file_handle(CHAR* file_name)
{
  HANDLE h_file;
  h_file = CreateFile(file_name,
            GENERIC_READGENERIC_WRITE,
            FILE_SHARE_READFILE_SHARE_WRITE,
            NULL,
            OPEN_ALWAYS,
            0,
            NULL
            );
  return h_file;
}
//------------------------------------------------------------------------------
WORD load_buffer(HANDLE h_sour, PBUFFER_DATA buffer) // Load file to buffer
{
  DWORD ret;
  ReadFile(h_sour,buffer->lp_buffer,BUFFERSIZE,&ret,NULL);
  buffer->index = 0;
  buffer->top = (WORD)ret;
  return (WORD)ret;
}
//------------------------------------------------------------------------------
WORD empty_buffer( PLZW_DATA lzw, PBUFFER_DATA buffer)// Output buffer to file
{
  
  DWORD ret;
  if(buffer->end_flag) // The flag mark the end of decode
{
 if( buffer->by_left )
 {
  buffer->lp_buffer[ buffer->index++ ] = (BYTE)( buffer->dw_buffer >> 32-buffer->by_left )<<(8-buffer->by_left);
 }
}
WriteFile(lzw->h_dest, buffer->lp_buffer,buffer->index,&ret,NULL);
  buffer->index = 0;
  buffer->top = ret;
  return (WORD)ret;
}
//------------------------------------------------------------------------------
#endif

(3) hash.h 定义了压缩时所用的码表操作函数,为了快速查找使用了hash算法,还有处理hash冲突的函数

#ifndef __HASH_H__
#define __HASH_H__
//------------------------------------------------------------------------------
#include <stdio.h>
#include <stdlib.h>
#include <windows.h>
//------------------------------------------------------------------------------
#define  DIV    TABLE_LEN
#define  HASHSTEP 13     // It should bigger than 0.
//------------------------------------------------------------------------------
WORD get_hash_index( PLZW_DATA lzw )
{
  DWORD tmp;
  WORD result;
  DWORD prefix;
  DWORD suffix;
  prefix = lzw->prefix;
  suffix = lzw->suffix;
  tmp = prefix<<8 suffix;
  result = tmp % DIV;
  return result;
}
//------------------------------------------------------------------------------
WORD re_hash_index( WORD hash ) // If hash conflict occured we must recalculate
{                // hash index .
  WORD result;
  result = hash + HASHSTEP;
  result = result % DIV;
  return result;
}
//------------------------------------------------------------------------------
BOOL in_table( PLZW_DATA lzw ) // To find whether current code is already in table.
{
  BOOL result;
  WORD hash;

  hash = get_hash_index( lzw );
  if( lzw->lp_code[ hash ] == 0xFFFF )
  {
    result = FALSE;  
  }
  else
  {
    if( lzw->lp_prefix[ hash ] == lzw->prefix &&
      lzw->lp_suffix[ hash ] == lzw->suffix )
    {
      result = TRUE;
    }
    else
    {
      result = FALSE;
      while( lzw->lp_code[ hash ] != 0xFFFF )
      {
        if( lzw->lp_prefix[ hash ] == lzw->prefix &&
          lzw->lp_suffix[ hash ] == lzw->suffix )
        {
            result = TRUE;
            break;  
        }
        hash = re_hash_index( hash );
      }
    }
  }
  return result;
}
//------------------------------------------------------------------------------
WORD get_code( PLZW_DATA lzw )
{
  WORD hash;
  WORD code;
  hash = get_hash_index( lzw );
  if( lzw->lp_prefix[ hash ] == lzw->prefix &&
    lzw->lp_suffix[ hash ] == lzw->suffix )
  {
    code = lzw->lp_code[ hash ];
  }
  else
  {
    while( lzw->lp_prefix[ hash ] != lzw->prefix
        lzw->lp_suffix[ hash ] != lzw->suffix )
    {
        hash = re_hash_index( hash );  
    }
    code = lzw->lp_code[ hash ];
  }
  return code;
}
//------------------------------------------------------------------------------
VOID insert_table( PLZW_DATA lzw )
{

  WORD hash;
  hash = get_hash_index( lzw );
  if( lzw->lp_code[ hash ] == 0xFFFF )
  {
    lzw->lp_prefix[ hash ] = lzw->prefix;
    lzw->lp_suffix[ hash ] = lzw->suffix;
    lzw->lp_code[ hash ]  = lzw->code;
  }
  else
  {
    while( lzw->lp_code[ hash ] != 0xFFFF )
    {
        hash = re_hash_index( hash );  
    }
    lzw->lp_prefix[ hash ] = lzw->prefix;
    lzw->lp_suffix[ hash ] = lzw->suffix;
    lzw->lp_code[ hash ]  = lzw->code;
  }

}
//------------------------------------------------------------------------------


#endif

(4) encode.h 压缩程序主函数

#ifndef __ENCODE_H__
#define __ENCODE_H__
//------------------------------------------------------------------------------
#include <stdio.h>
#include <stdlib.h>
#include <windows.h>

//------------------------------------------------------------------------------
VOID output_code( DWORD code ,PBUFFER_DATA out, PLZW_DATA lzw)
{
  out->dw_buffer = code << ( 32 - out->by_left - lzw->cur_code_len );
  out->by_left += lzw->cur_code_len;

  while( out->by_left >= 8 )
  {
    if( out->index == BUFFERSIZE )
    {
      empty_buffer( lzw,out);
    }

    out->lp_buffer[ out->index++ ] = (BYTE)( out->dw_buffer >> 24 );
    out->dw_buffer <<= 8;
    out->by_left -= 8;
  }
}
//------------------------------------------------------------------------------
VOID do_encode( PBUFFER_DATA in, PBUFFER_DATA out, PLZW_DATA lzw)
{
  WORD prefix;
  while( in->index != in->top )
  {
    if( !in_table(lzw) )
    {
        // current code not in code table
        // then add it to table and output prefix


        insert_table(lzw);
        prefix = lzw->suffix;
        output_code( lzw->prefix ,out ,lzw );
        lzw->code++;

        if( lzw->code == (WORD)1<< lzw->cur_code_len )
        {
          // code reached current code top(1<<cur_code_len)
          // then current code length add one
   lzw->cur_code_len++;
   if( lzw->cur_code_len == CODE_LEN + 1 )
   {
   re_init_lzw( lzw );
   }

        }
    }
    else
    {
        // current code already in code table
        // then output nothing
        prefix = get_code(lzw);

    }
    lzw->prefix = prefix;
    lzw->suffix = in->lp_buffer[ in->index++ ];
  }
}

//------------------------------------------------------------------------------
VOID encode(HANDLE h_sour,HANDLE h_dest)
{
  LZW_DATA    lzw;
  BUFFER_DATA   in ;
  BUFFER_DATA   out;
  
  BOOL first_run = TRUE;

  lzw_create( &lzw ,h_sour,h_dest );
  buffer_create( &in );
  buffer_create( &out );


  while( load_buffer( h_sour, &in ) )
  {
    if( first_run )
    {// File length should be considered but here we simply
     // believe file length bigger than 2 bytes.
        lzw.prefix = in.lp_buffer[ in.index++ ];
        lzw.suffix = in.lp_buffer[ in.index++ ];
        first_run = FALSE;
    }
    do_encode(&in , &out, &lzw);
  }
  
output_code(lzw.prefix, &out , &lzw);
output_code(lzw.suffix, &out , &lzw);
out.end_flag = TRUE;
  empty_buffer( &lzw,&out);

  lzw_destory( &lzw );
  buffer_destory( &in );
  buffer_destory( &out );
}

//------------------------------------------------------------------------------

#endif

(5) decode.h 解压函数主函数

#ifndef __DECODE_H__
#define __DECODE_H__
//------------------------------------------------------------------------------
#include <stdio.h>
#include <stdlib.h>
#include <windows.h>
//------------------------------------------------------------------------------
VOID out_code( WORD code ,PBUFFER_DATA buffer,PLZW_DATA lzw,PSTACK_DATA stack)
{
WORD tmp;
if( code < 0x100 )
{
 stack->lp_stack[ stack->index++ ] = code;
}
else
{
  stack->lp_stack[ stack->index++ ] = lzw->lp_suffix[ code ];
  tmp = lzw->lp_prefix[ code ];
  while( tmp > 0x100 )
  {
  stack->lp_stack[ stack->index++ ] = lzw->lp_suffix[ tmp ];
  tmp = lzw->lp_prefix[ tmp ];
  }
  stack->lp_stack[ stack->index++ ] = (BYTE)tmp;

}


while( stack->index )
{
 if( buffer->index == BUFFERSIZE )
 {
  empty_buffer(lzw,buffer);
 }
 buffer->lp_buffer[ buffer->index++ ] = stack->lp_stack[ --stack->index ] ;
}
}
//------------------------------------------------------------------------------
VOID insert_2_table(PLZW_DATA lzw )
{

lzw->lp_code[ lzw->code ]  = lzw->code;
lzw->lp_prefix[ lzw->code ] = lzw->prefix;
lzw->lp_suffix[ lzw->code ] = lzw->suffix;
lzw->code++;

if( lzw->code == ((WORD)1<<lzw->cur_code_len)-1 )
{
 lzw->cur_code_len++;
 if( lzw->cur_code_len == CODE_LEN+1 )
   lzw->cur_code_len = 9;
}
if(lzw->code >= 1<<CODE_LEN )
{
 re_init_lzw(lzw);
}

}
//------------------------------------------------------------------------------
WORD get_next_code( PBUFFER_DATA buffer , PLZW_DATA lzw )
{

BYTE next;
WORD code;
while( buffer->by_left < lzw->cur_code_len )
{
 if( buffer->index == BUFFERSIZE )
 {
  load_buffer( lzw->h_sour, buffer );
 }
 next = buffer->lp_buffer[ buffer->index++ ];
 buffer->dw_buffer = (DWORD)next << (24-buffer->by_left);
 buffer->by_left  += 8;
}
code = buffer->dw_buffer >> ( 32 - lzw->cur_code_len );
buffer->dw_buffer <<= lzw->cur_code_len;
buffer->by_left  -= lzw->cur_code_len;

return code;
}
//------------------------------------------------------------------------------
VOID do_decode( PBUFFER_DATA in, PBUFFER_DATA out, PLZW_DATA lzw, PSTACK_DATA stack)
{
WORD code;
WORD tmp;
while( in->index != in->top )
{
 code = get_next_code( in ,lzw );

 if( code < 0x100 )
 {
  // code already in table
  // then simply output the code
  lzw->suffix = (BYTE)code;
 }
 else
 {
  if( code < lzw->code )
  {
  // code also in table
  // then output code chain
  
  tmp = lzw->lp_prefix[ code ];
  while( tmp > 0x100 )
  {
   tmp = lzw->lp_prefix[ tmp ];
  }
  lzw->suffix = (BYTE)tmp;
  }
  else
  {
  // code == lzw->code
  // code not in table
  // add code into table
  // and out put code
  tmp = lzw->prefix;
  while( tmp > 0x100 )
  {
   tmp = lzw->lp_prefix[ tmp ];
  }
  lzw->suffix = (BYTE)tmp;
  }
 }
 insert_2_table( lzw );
 out_code(code,out,lzw,stack);

 lzw->prefix = code;

}

}
//------------------------------------------------------------------------------
VOID decode( HANDLE h_sour, HANDLE h_dest )
{
  LZW_DATA    lzw;
  BUFFER_DATA   in ;
  BUFFER_DATA   out;
STACK_DATA   stack;
BOOL  first_run;

first_run = TRUE;


  lzw_create( &lzw ,h_sour,h_dest );
  buffer_create( &in );
  buffer_create( &out );
stack_create(&stack );

  while( load_buffer( h_sour, &in ) )
  {
 if( first_run )
 {
  lzw.prefix = get_next_code( &in, &lzw );
  lzw.suffix = lzw.prefix;
  out_code(lzw.prefix, &out, &lzw , &stack);
  first_run = FALSE;
 }
    do_decode(&in , &out, &lzw, &stack);
  }

  empty_buffer( &lzw,&out);

  lzw_destory( &lzw );
  buffer_destory( &in );
  buffer_destory( &out );
stack_destory( &stack);
}

#endif

2 下面给出一个应用上面模块的简单例子

#include <stdio.h>
#include <stdlib.h>
//------------------------------------------------------------------------------

#include "lzw.h"
#include "hash.h"
#include "fileio.h"
#include "encode.h"
#include "decode.h"

//------------------------------------------------------------------------------
HANDLE h_file_sour; 
HANDLE h_file_dest;
HANDLE h_file;
CHAR* file_name_in = "d:\\code.c";
CHAR* file_name_out= "d:\\encode.e";
CHAR* file_name  = "d:\\decode.d";


//------------------------------------------------------------------------------
int main(int argc, char *argv[])
{
  h_file_sour = file_handle(file_name_in);
  h_file_dest = file_handle(file_name_out);
  h_file   = file_handle(file_name);


 encode(h_file_sour, h_file_dest); 
// decode(h_file_dest,h_file);


  CloseHandle(h_file_sour);
  CloseHandle(h_file_dest); 
  CloseHandle(h_file);

 return 0;   
}  

3 后语

 之前研究gif文件格式时偶然接触了lzw压缩算法,于是就想自己动手实现。从一开始看人家的原码,然后跟着模拟,到现在用自己的语言表达出来,从理解原理到代码的实现花费了不少时间与精力,但是真正的快乐也就在这里,现在把她拿出来跟大家分享也就是分享快乐。

Tags:lzw 压缩 算法

编辑录入:爽爽 [复制链接] [打 印]
赞助商链接