HOWTO decompress an LZ4 archive

From SEGGER Wiki
Revision as of 23:47, 11 June 2019 by Paul (talk | contribs)
Jump to: navigation, search

emCompress-Flex provides the capability to decompress streams that are compressed in various formats, but it does not provide canned container support.

Recognizing that customers may possibly want to deal with encapsulated streams, this code can be used with emCompress-Flex to check the integrity of a an LZ4 file containing an LZ4 frame (containing multiple LZ4 blocks):

/*********************************************************************
*                   (c) SEGGER Microcontroller GmbH                  *
*                        The Embedded Experts                        *
*                           www.segger.com                           *
**********************************************************************

-------------------------- END-OF-HEADER -----------------------------

File        : CX_LZ4.c
Purpose     : List the content of a LZ4-compressed file.

References  : LZ4 frame format - https://github.com/lz4/lz4/blob/master/doc/lz4_Frame_format.md
              LZ4 block format - https://github.com/lz4/lz4/blob/master/doc/lz4_Block_format.md
              xxHash Algorithm - https://github.com/Cyan4973/xxHash/blob/dev/doc/xxhash_spec.md

*/

/*********************************************************************
*
*       #include Section
*
**********************************************************************
*/

#include "CX_Int.h"
#include "SEGGER_MEM.h"
#include "SEGGER_UTIL.h"
#include <stdio.h>
#include <stdlib.h>

/*********************************************************************
*
*       Defines, fixed
*
**********************************************************************
*/

//
// Flag byte
//
#define FLG_VERSION 0xC0
#define FLG_BINDEP  0x20
#define FLG_BCHK    0x10
#define FLG_CSIZE   0x08
#define FLG_CCHK    0x04
#define FLG_DICTID  0x01

//
// Block descriptor
//
#define BD_BSIZE    0x70

//
// xxHash
//
#define PRIME32_1   2654435761u
#define PRIME32_2   2246822519u
#define PRIME32_3   3266489917u
#define PRIME32_4    668265263u
#define PRIME32_5    374761393u

//
// 32-bit rotate left
//
#define ROL(X, N)   (((X) << N) | ((X) >> (32-N)))

/*********************************************************************
*
*       Local types
*
**********************************************************************
*/

typedef struct {
  U32      aAcc[4];
  U8       aData[16];
  unsigned DataLen;
  unsigned TotalLen;
} XXHASH_CONTEXT;

typedef struct {
  U8             FLG;
  U8             BD;
  U8             HC;
  U8             aMagic     [4];
  U8             aCSize     [8];
  U8             aCRC       [8];
  U8             aDictID    [4];
  U8             aBlockSize [4];
  XXHASH_CONTEXT Cksum;
  U32            BlockSize;
  U64            CalcCSize;
} FRAME_INFO;

/*********************************************************************
*
*       Static data
*
**********************************************************************
*/

static SEGGER_MEM_CONTEXT _StaticAllocator;

/*********************************************************************
*
*       Static code
*
**********************************************************************
*/

/*********************************************************************
*
*       _XXHASH_Init()
*
*  Function description
*    Initialize xxHash context.
*
*  Parameters
*    pSelf - Pointer to xxHash context.
*    Seed  - Value of seed to use.
*/
static void _XXHASH_Init(XXHASH_CONTEXT *pSelf, U32 Seed) {
  pSelf->aAcc[0]  = Seed + PRIME32_1 + PRIME32_2;
  pSelf->aAcc[1]  = Seed + PRIME32_2;
  pSelf->aAcc[2]  = Seed + 0;
  pSelf->aAcc[3]  = Seed - PRIME32_1;
  pSelf->DataLen  = 0;
  pSelf->TotalLen = 0;
}

/*********************************************************************
*
*       _XXHASH_AddByte()
*
*  Function description
*    Add octet to xxHash.
*
*  Parameters
*    pSelf - Pointer to xxHash context.
*    Data  - Octet to add.
*/
static void _XXHASH_AddByte(XXHASH_CONTEXT *pSelf, U8 Data) {
  unsigned i;
  //
  pSelf->aData[pSelf->DataLen++] = Data;
  if (pSelf->DataLen == 16) {
    for (i = 0; i < 4; ++i) {
      pSelf->aAcc[i] = pSelf->aAcc[i] + SEGGER_RdU32LE(&pSelf->aData[4*i]) * PRIME32_2;
      pSelf->aAcc[i] = ROL(pSelf->aAcc[i], 13);
      pSelf->aAcc[i] = pSelf->aAcc[i] * PRIME32_1;
    }
    pSelf->DataLen = 0;
  }
  pSelf->TotalLen += 1;
}

/*********************************************************************
*
*       _XXHASH_Add()
*
*  Function description
*    Add octet string to xxHash.
*
*  Parameters
*    pSelf   - Pointer to xxHash context.
*    pData   - Pointer to octet string.
*    DataLen - Octet length of the octet string.
*/
static void _XXHASH_Add(XXHASH_CONTEXT *pSelf, U8 *pData, unsigned DataLen) {
  while (DataLen > 0) {
    _XXHASH_AddByte(pSelf, *pData);
    pData   += 1;
    DataLen -= 1;
  }
}

/*********************************************************************
*
*       _XXHASH_Get()
*
*  Function description
*    Finalize and return xxHash value.
*
*  Parameters
*    pSelf   - Pointer to xxHash context.
*
*  Return value
*    Computed xxHash value over input data.
*/
static U32 _XXHASH_Get(XXHASH_CONTEXT *pSelf) {
  U32        Acc;
  U8       * pData;
  unsigned   DataLen;
  //
  if (pSelf->TotalLen < 16) {
    Acc = pSelf->aAcc[2] + PRIME32_5;
  } else {
    Acc = ROL(pSelf->aAcc[0], 1) +
          ROL(pSelf->aAcc[1], 7) +
          ROL(pSelf->aAcc[2], 12) +
          ROL(pSelf->aAcc[3], 18);
  }
  Acc += pSelf->TotalLen;
  //
  pData = pSelf->aData;
  DataLen = pSelf->DataLen;
  while (DataLen >= 4) {
    Acc += SEGGER_RdU32LE(pData) * PRIME32_3;
    Acc  = ROL(Acc, 17) * PRIME32_4;
    pData   += 4;
    DataLen -= 4;
  }
  //
  while (DataLen > 0) {
    Acc += *pData * PRIME32_5;
    Acc  = ROL(Acc, 11) * PRIME32_1;
    pData   += 1;
    DataLen -= 1;
  }
  //
  Acc ^= Acc >> 15;
  Acc *= PRIME32_2;
  Acc ^= Acc >> 13;
  Acc *= PRIME32_3;
  Acc ^= Acc >> 16;
  //
  return Acc;
}

/*********************************************************************
*
*       _PrintSignOn()
*
*  Function description
*    Displays the application's help information on stderr.
*/
static void _PrintSignOn(void) {
  printf("\n");
  printf("emCompress-Flex LZ4 Checker V%s ", CX_GetVersionText());
  printf("compiled " __DATE__ " " __TIME__ "\n");
  printf("%s    www.segger.com\n\n", CX_GetCopyrightText());
}

/*********************************************************************
*
*       _Decompress()
*
*  Function description
*    Decompress LZ4 blocks.
*
*  Parameters
*    pFile - Pointer to input file.
*    pInfo - Pointer to member information.
*/
static void _Decompress(CX_DECODE_CONTEXT *pContext, FILE *pFile, FRAME_INFO *pInfo) {
  CX_STREAM         Stream;
  int               Status;
  U8                ByteIn;
  U8                ByteOut;
  unsigned          AvailIn;
  //
  Stream.AvailIn  = 0;
  Stream.AvailOut = 0;
  //
  AvailIn = pInfo->BlockSize;
  //
  for (;;) {
    if (Stream.AvailIn == 0) {
      if (AvailIn != 0) {
        fread(&ByteIn, 1, 1, pFile);
        Stream.pIn = &ByteIn;
        Stream.AvailIn = 1;
        --AvailIn;
      }
    }
    Stream.pOut = &ByteOut;
    Stream.AvailOut = 1;
    //
    if (AvailIn == 0) {
      Status = CX_DECODE_Process(pContext, &Stream, pInfo->FLG & FLG_BINDEP ? CX_FLUSH_ALL : CX_FLUSH_SYNC);
    } else {
      Status = CX_DECODE_Process(pContext, &Stream, CX_FLUSH_NONE);
    }
    if (Status < 0) {
      printf("%s\n", CX_GetErrorText(Status));
      exit(100);
    }
    if (Stream.AvailOut == 0) {
      pInfo->CalcCSize++;
      _XXHASH_AddByte(&pInfo->Cksum, ByteOut);
    }
    if (Status == 1) {
      break;
    }
  }
}

/*********************************************************************
*
*       _DecodeFrame()
*
*  Function description
*    Decode the content of an LZ4 frame, ensures compressed data
*    can be decompressed.
*
*  Parameters
*    pFile - Pointer to input file.
*
*  Return value
*    Application exit status, zero when no error.
*/
static int _DecodeFrame(FILE *pFile) {
  CX_DECODE_CONTEXT Context;
  CX_PARAS          Paras;
  FRAME_INFO        Info;
  XXHASH_CONTEXT    Cksum;
  U32               CRC;
  char              aStatus[128];
  //
  printf("Size               CRC\n");
  printf("Computed   Stored  Computed   Stored  Status\n");
  printf("-------- --------  -------- --------  -----------------------\n");
  //         
  fread(Info.aMagic, 1, sizeof(Info.aMagic), pFile);
  if (SEGGER_RdU32LE(Info.aMagic) != 0x184D2204) {
    printf("Incorrect ID bytes for an LZ4 file\n");
    exit(100);
  }
  //
  _XXHASH_Init(&Info.Cksum, 0);
  _XXHASH_Init(&Cksum, 0);
  fread(&Info.FLG, 1, 1, pFile);
  fread(&Info.BD, 1, 1, pFile);
  if (feof(pFile) || ferror(pFile)) {
    perror("Can't read input file");
    exit(100);
  }
  _XXHASH_Add(&Cksum, &Info.FLG, 1);
  _XXHASH_Add(&Cksum, &Info.BD, 1);
  //
  if ((Info.FLG & FLG_VERSION) != 0x40) {
    printf("LZ4 version not supported by this utility\n");
    exit(100);
  }
  //
  if (Info.FLG & FLG_CSIZE) {
    fread(Info.aCSize, 1, 4, pFile);
    _XXHASH_Add(&Cksum, Info.aCSize, 8);
  }
  if (Info.FLG & FLG_DICTID) {
    fread(Info.aDictID, 1, 4, pFile);
    _XXHASH_Add(&Cksum, Info.aDictID, 4);
  }
  fread(&Info.HC, 1, 1, pFile);
  if (Info.HC != ((_XXHASH_Get(&Cksum) >> 8) & 0xFF)) {
    printf("Header checksum error\n");
    exit(100);
  }
  //
  Paras.WindowSize = 65536;
  Paras.MinLen     = 3;
  Paras.MaxLen     = 258;
  Paras.BlockLen   = 0;
  CX_DECODE_Init(&Context, &CX_LZ4_Decode, &_StaticAllocator, &Paras);
  //
  Info.CalcCSize = 0;
  //
  for (;;) {
    //
    // Process a block.
    //
    fread(Info.aBlockSize, 1, 4, pFile);
    Info.BlockSize = SEGGER_RdU32LE(Info.aBlockSize);
    if (Info.BlockSize == 0) {
      break;
    }
    _Decompress(&Context, pFile, &Info);
  }
  //
  aStatus[0] = 0;
  //  
  if (Info.FLG & FLG_CSIZE) {
    printf("%8llu %8llu  ", Info.CalcCSize, SEGGER_RdU64LE(Info.aCSize));
    if (Info.CalcCSize != SEGGER_RdU64LE(Info.aCSize)) {
      strcat(aStatus, ", content size mismatch");
    }
  } else {
    printf("%8llu %8s  ", Info.CalcCSize, "-");
  }
  //
  CX_DECODE_Exit(&Context);
  //
  CRC = _XXHASH_Get(&Info.Cksum);
  if (Info.FLG & FLG_CCHK) {
    fread(Info.aCRC, 1, 4, pFile);
    printf("%08X %08X  ", CRC, SEGGER_RdU32LE(Info.aCRC));
    if (CRC != SEGGER_RdU32LE(Info.aCRC)) {
      strcat(aStatus, ", CRC mismatch");
    }
  } else {
    printf("%08X %8s  ", CRC, "-");
  }
  if (fgetc(pFile) != EOF) {
    strcat(aStatus, ", junk at end of LZ4 file");
  }
  //
  if (aStatus[0] != 0) {
    printf("FAIL: %s\n", &aStatus[2]);
    return 100;
  } else {
    printf("OK (%s)\n", Info.FLG & FLG_BINDEP ? "Block independent" : "Block dependent");
    return 0;
  }
}

/*********************************************************************
*
*       Public code
*
**********************************************************************
*/

/*********************************************************************
*
*       main()
*
*  Function description
*    Application entry point.
*
*  Parameters
*    argc - Argument count.
*    argv - Argument vector.
*
*  Return value
*    Exit status.
*/
int main(int argc, char **argv) {
  FILE * pFile;
  int    Status;
  //
  SEGGER_MEM_SYSTEM_HEAP_Init(&_StaticAllocator);
  //
  _PrintSignOn();
  if (argc != 2) {
    printf("Syntax: %s <filename>\n", argv[0]);
    exit(100);
  }
  //
  pFile = fopen(argv[1], "rb");
  if (pFile == NULL) {
    printf("%s: can't open '%s' for reading\n", argv[0], argv[1]);
    exit(100);
  }
  //
  Status = _DecodeFrame(pFile);
  fclose(pFile);
  exit(Status);
}

/*************************** End of file ****************************/