HOWTO decompress an LZ4 archive

From SEGGER Wiki
Jump to: navigation, search

emCompress-Pro provides the capability to decompress streams that are compressed in various formats, but it does not provide out-of-the-box container support.

Recognizing that customers may possibly want to deal with encapsulated streams, this code can be used with emCompress-Pro to check the integrity of a an LZ4 file containing an LZ4 frame (containing multiple LZ4 blocks):

/*********************************************************************
*                   (c) SEGGER Microcontroller GmbH                  *
*                        The Embedded Experts                        *
*                           www.segger.com                           *
**********************************************************************

-------------------------- END-OF-HEADER -----------------------------

File        : CX_LZ4.c
Purpose     : List the content of a LZ4-compressed file.

References  : LZ4 frame format - https://github.com/lz4/lz4/blob/master/doc/lz4_Frame_format.md
              LZ4 block format - https://github.com/lz4/lz4/blob/master/doc/lz4_Block_format.md
              xxHash Algorithm - https://github.com/Cyan4973/xxHash/blob/dev/doc/xxhash_spec.md

*/

/*********************************************************************
*
*       #include Section
*
**********************************************************************
*/

#include "CX_Int.h"
#include "SEGGER_MEM.h"
#include "SEGGER_UTIL.h"
#include <stdio.h>
#include <stdlib.h>

/*********************************************************************
*
*       Defines, fixed
*
**********************************************************************
*/

//
// Flag byte
//
#define FLG_VERSION 0xC0
#define FLG_BINDEP  0x20
#define FLG_BCHK    0x10
#define FLG_CSIZE   0x08
#define FLG_CCHK    0x04
#define FLG_DICTID  0x01

//
// Block descriptor
//
#define BD_BSIZE    0x70

//
// xxHash
//
#define PRIME32_1   2654435761u
#define PRIME32_2   2246822519u
#define PRIME32_3   3266489917u
#define PRIME32_4    668265263u
#define PRIME32_5    374761393u

//
// 32-bit rotate left
//
#define ROL(X, N)   (((X) << N) | ((X) >> (32-N)))

/*********************************************************************
*
*       Local types
*
**********************************************************************
*/

typedef struct {
  U32      aAcc[4];
  U8       aData[16];
  unsigned DataLen;
  unsigned TotalLen;
} XXHASH_CONTEXT;

typedef struct {
  U8             FLG;
  U8             BD;
  U8             HC;
  U8             aMagic     [4];
  U8             aCSize     [8];
  U8             aCRC       [8];
  U8             aDictID    [4];
  U8             aBlockSize [4];
  XXHASH_CONTEXT Cksum;
  U32            BlockSize;
  U64            CalcCSize;
} FRAME_INFO;

/*********************************************************************
*
*       Static data
*
**********************************************************************
*/

static SEGGER_MEM_CONTEXT _StaticAllocator;

/*********************************************************************
*
*       Static code
*
**********************************************************************
*/

/*********************************************************************
*
*       _XXHASH_Init()
*
*  Function description
*    Initialize xxHash context.
*
*  Parameters
*    pSelf - Pointer to xxHash context.
*    Seed  - Value of seed to use.
*/
static void _XXHASH_Init(XXHASH_CONTEXT *pSelf, U32 Seed) {
  pSelf->aAcc[0]  = Seed + PRIME32_1 + PRIME32_2;
  pSelf->aAcc[1]  = Seed + PRIME32_2;
  pSelf->aAcc[2]  = Seed + 0;
  pSelf->aAcc[3]  = Seed - PRIME32_1;
  pSelf->DataLen  = 0;
  pSelf->TotalLen = 0;
}

/*********************************************************************
*
*       _XXHASH_AddByte()
*
*  Function description
*    Add octet to xxHash.
*
*  Parameters
*    pSelf - Pointer to xxHash context.
*    Data  - Octet to add.
*/
static void _XXHASH_AddByte(XXHASH_CONTEXT *pSelf, U8 Data) {
  unsigned i;
  //
  pSelf->aData[pSelf->DataLen++] = Data;
  if (pSelf->DataLen == 16) {
    for (i = 0; i < 4; ++i) {
      pSelf->aAcc[i] = pSelf->aAcc[i] + SEGGER_RdU32LE(&pSelf->aData[4*i]) * PRIME32_2;
      pSelf->aAcc[i] = ROL(pSelf->aAcc[i], 13);
      pSelf->aAcc[i] = pSelf->aAcc[i] * PRIME32_1;
    }
    pSelf->DataLen = 0;
  }
  pSelf->TotalLen += 1;
}

/*********************************************************************
*
*       _XXHASH_Add()
*
*  Function description
*    Add octet string to xxHash.
*
*  Parameters
*    pSelf   - Pointer to xxHash context.
*    pData   - Pointer to octet string.
*    DataLen - Octet length of the octet string.
*/
static void _XXHASH_Add(XXHASH_CONTEXT *pSelf, U8 *pData, unsigned DataLen) {
  while (DataLen > 0) {
    _XXHASH_AddByte(pSelf, *pData);
    pData   += 1;
    DataLen -= 1;
  }
}

/*********************************************************************
*
*       _XXHASH_Get()
*
*  Function description
*    Finalize and return xxHash value.
*
*  Parameters
*    pSelf   - Pointer to xxHash context.
*
*  Return value
*    Computed xxHash value over input data.
*/
static U32 _XXHASH_Get(XXHASH_CONTEXT *pSelf) {
  U32        Acc;
  U8       * pData;
  unsigned   DataLen;
  //
  if (pSelf->TotalLen < 16) {
    Acc = pSelf->aAcc[2] + PRIME32_5;
  } else {
    Acc = ROL(pSelf->aAcc[0], 1) +
          ROL(pSelf->aAcc[1], 7) +
          ROL(pSelf->aAcc[2], 12) +
          ROL(pSelf->aAcc[3], 18);
  }
  Acc += pSelf->TotalLen;
  //
  pData = pSelf->aData;
  DataLen = pSelf->DataLen;
  while (DataLen >= 4) {
    Acc += SEGGER_RdU32LE(pData) * PRIME32_3;
    Acc  = ROL(Acc, 17) * PRIME32_4;
    pData   += 4;
    DataLen -= 4;
  }
  //
  while (DataLen > 0) {
    Acc += *pData * PRIME32_5;
    Acc  = ROL(Acc, 11) * PRIME32_1;
    pData   += 1;
    DataLen -= 1;
  }
  //
  Acc ^= Acc >> 15;
  Acc *= PRIME32_2;
  Acc ^= Acc >> 13;
  Acc *= PRIME32_3;
  Acc ^= Acc >> 16;
  //
  return Acc;
}

/*********************************************************************
*
*       _PrintSignOn()
*
*  Function description
*    Displays the application's help information on stderr.
*/
static void _PrintSignOn(void) {
  printf("\n");
  printf("emCompress-Flex LZ4 Checker V%s ", CX_GetVersionText());
  printf("compiled " __DATE__ " " __TIME__ "\n");
  printf("%s    www.segger.com\n\n", CX_GetCopyrightText());
}

/*********************************************************************
*
*       _Decompress()
*
*  Function description
*    Decompress LZ4 blocks.
*
*  Parameters
*    pFile - Pointer to input file.
*    pInfo - Pointer to member information.
*/
static void _Decompress(CX_DECODE_CONTEXT *pContext, FILE *pFile, FRAME_INFO *pInfo) {
  CX_STREAM         Stream;
  int               Status;
  U8                ByteIn;
  U8                ByteOut;
  unsigned          AvailIn;
  //
  Stream.AvailIn  = 0;
  Stream.AvailOut = 0;
  //
  AvailIn = pInfo->BlockSize;
  //
  for (;;) {
    if (Stream.AvailIn == 0) {
      if (AvailIn != 0) {
        fread(&ByteIn, 1, 1, pFile);
        Stream.pIn = &ByteIn;
        Stream.AvailIn = 1;
        --AvailIn;
      }
    }
    Stream.pOut = &ByteOut;
    Stream.AvailOut = 1;
    //
    if (AvailIn == 0) {
      Status = CX_DECODE_Process(pContext, &Stream, pInfo->FLG & FLG_BINDEP ? CX_FLUSH_ALL : CX_FLUSH_SYNC);
    } else {
      Status = CX_DECODE_Process(pContext, &Stream, CX_FLUSH_NONE);
    }
    if (Status < 0) {
      printf("%s\n", CX_GetErrorText(Status));
      exit(100);
    }
    if (Stream.AvailOut == 0) {
      pInfo->CalcCSize++;
      _XXHASH_AddByte(&pInfo->Cksum, ByteOut);
    }
    if (Status == 1) {
      break;
    }
  }
}

/*********************************************************************
*
*       _DecodeFrame()
*
*  Function description
*    Decode the content of an LZ4 frame, ensures compressed data
*    can be decompressed.
*
*  Parameters
*    pFile - Pointer to input file.
*
*  Return value
*    Application exit status, zero when no error.
*/
static int _DecodeFrame(FILE *pFile) {
  CX_DECODE_CONTEXT Context;
  CX_PARAS          Paras;
  FRAME_INFO        Info;
  XXHASH_CONTEXT    Cksum;
  U32               CRC;
  char              aStatus[128];
  //
  printf("Size               CRC\n");
  printf("Computed   Stored  Computed   Stored  Status\n");
  printf("-------- --------  -------- --------  -----------------------\n");
  //         
  fread(Info.aMagic, 1, sizeof(Info.aMagic), pFile);
  if (SEGGER_RdU32LE(Info.aMagic) != 0x184D2204) {
    printf("Incorrect ID bytes for an LZ4 file\n");
    exit(100);
  }
  //
  _XXHASH_Init(&Info.Cksum, 0);
  _XXHASH_Init(&Cksum, 0);
  fread(&Info.FLG, 1, 1, pFile);
  fread(&Info.BD, 1, 1, pFile);
  if (feof(pFile) || ferror(pFile)) {
    perror("Can't read input file");
    exit(100);
  }
  _XXHASH_Add(&Cksum, &Info.FLG, 1);
  _XXHASH_Add(&Cksum, &Info.BD, 1);
  //
  if ((Info.FLG & FLG_VERSION) != 0x40) {
    printf("LZ4 version not supported by this utility\n");
    exit(100);
  }
  //
  if (Info.FLG & FLG_CSIZE) {
    fread(Info.aCSize, 1, 4, pFile);
    _XXHASH_Add(&Cksum, Info.aCSize, 8);
  }
  if (Info.FLG & FLG_DICTID) {
    fread(Info.aDictID, 1, 4, pFile);
    _XXHASH_Add(&Cksum, Info.aDictID, 4);
  }
  fread(&Info.HC, 1, 1, pFile);
  if (Info.HC != ((_XXHASH_Get(&Cksum) >> 8) & 0xFF)) {
    printf("Header checksum error\n");
    exit(100);
  }
  //
  Paras.WindowSize = 65536;
  Paras.MinLen     = 3;
  Paras.MaxLen     = 258;
  Paras.BlockLen   = 0;
  CX_DECODE_Init(&Context, &CX_LZ4_Decode, &_StaticAllocator, &Paras);
  //
  Info.CalcCSize = 0;
  //
  for (;;) {
    //
    // Process a block.
    //
    fread(Info.aBlockSize, 1, 4, pFile);
    Info.BlockSize = SEGGER_RdU32LE(Info.aBlockSize);
    if (Info.BlockSize == 0) {
      break;
    }
    _Decompress(&Context, pFile, &Info);
  }
  //
  aStatus[0] = 0;
  //  
  if (Info.FLG & FLG_CSIZE) {
    printf("%8llu %8llu  ", Info.CalcCSize, SEGGER_RdU64LE(Info.aCSize));
    if (Info.CalcCSize != SEGGER_RdU64LE(Info.aCSize)) {
      strcat(aStatus, ", content size mismatch");
    }
  } else {
    printf("%8llu %8s  ", Info.CalcCSize, "-");
  }
  //
  CX_DECODE_Exit(&Context);
  //
  CRC = _XXHASH_Get(&Info.Cksum);
  if (Info.FLG & FLG_CCHK) {
    fread(Info.aCRC, 1, 4, pFile);
    printf("%08X %08X  ", CRC, SEGGER_RdU32LE(Info.aCRC));
    if (CRC != SEGGER_RdU32LE(Info.aCRC)) {
      strcat(aStatus, ", CRC mismatch");
    }
  } else {
    printf("%08X %8s  ", CRC, "-");
  }
  if (fgetc(pFile) != EOF) {
    strcat(aStatus, ", junk at end of LZ4 file");
  }
  //
  if (aStatus[0] != 0) {
    printf("FAIL: %s\n", &aStatus[2]);
    return 100;
  } else {
    printf("OK (%s)\n", Info.FLG & FLG_BINDEP ? "Block independent" : "Block dependent");
    return 0;
  }
}

/*********************************************************************
*
*       Public code
*
**********************************************************************
*/

/*********************************************************************
*
*       main()
*
*  Function description
*    Application entry point.
*
*  Parameters
*    argc - Argument count.
*    argv - Argument vector.
*
*  Return value
*    Exit status.
*/
int main(int argc, char **argv) {
  FILE * pFile;
  int    Status;
  //
  SEGGER_MEM_SYSTEM_HEAP_Init(&_StaticAllocator);
  //
  _PrintSignOn();
  if (argc != 2) {
    printf("Syntax: %s <filename>\n", argv[0]);
    exit(100);
  }
  //
  pFile = fopen(argv[1], "rb");
  if (pFile == NULL) {
    printf("%s: can't open '%s' for reading\n", argv[0], argv[1]);
    exit(100);
  }
  //
  Status = _DecodeFrame(pFile);
  fclose(pFile);
  exit(Status);
}

/*************************** End of file ****************************/