HOWTO decompress an LZ4 archive
emCompress-Flex provides the capability to decompress streams that are compressed in various formats, but it does not provide canned container support.
Recognizing that customers may possibly want to deal with encapsulated streams, this code can be used with emCompress-Flex to check the integrity of a an LZ4 file containing an LZ4 frame (containing multiple LZ4 blocks):
/*********************************************************************
* (c) SEGGER Microcontroller GmbH & Co. KG *
* The Embedded Experts *
* www.segger.com *
**********************************************************************
-------------------------- END-OF-HEADER -----------------------------
File : CX_LZ4.c
Purpose : List the content of a LZ4-compressed file.
References : LZ4 frame format - https://github.com/lz4/lz4/blob/master/doc/lz4_Frame_format.md
LZ4 block format - https://github.com/lz4/lz4/blob/master/doc/lz4_Block_format.md
xxHash Algorithm - https://github.com/Cyan4973/xxHash/blob/dev/doc/xxhash_spec.md
*/
/*********************************************************************
*
* #include Section
*
**********************************************************************
*/
#include "CX_Int.h"
#include "SEGGER_MEM.h"
#include "SEGGER_UTIL.h"
#include <stdio.h>
#include <stdlib.h>
/*********************************************************************
*
* Defines, fixed
*
**********************************************************************
*/
//
// Flag byte
//
#define FLG_VERSION 0xC0
#define FLG_BINDEP 0x20
#define FLG_BCHK 0x10
#define FLG_CSIZE 0x08
#define FLG_CCHK 0x04
#define FLG_DICTID 0x01
//
// Block descriptor
//
#define BD_BSIZE 0x70
//
// xxHash
//
#define PRIME32_1 2654435761u
#define PRIME32_2 2246822519u
#define PRIME32_3 3266489917u
#define PRIME32_4 668265263u
#define PRIME32_5 374761393u
//
// 32-bit rotate left
//
#define ROL(X, N) (((X) << N) | ((X) >> (32-N)))
/*********************************************************************
*
* Local types
*
**********************************************************************
*/
typedef struct {
U32 aAcc[4];
U8 aData[16];
unsigned DataLen;
unsigned TotalLen;
} XXHASH_CONTEXT;
typedef struct {
U8 FLG;
U8 BD;
U8 HC;
U8 aMagic [4];
U8 aCSize [8];
U8 aCRC [8];
U8 aDictID [4];
U8 aBlockSize [4];
XXHASH_CONTEXT Cksum;
U32 BlockSize;
U64 CalcCSize;
} FRAME_INFO;
/*********************************************************************
*
* Static data
*
**********************************************************************
*/
static SEGGER_MEM_CONTEXT _StaticAllocator;
/*********************************************************************
*
* Static code
*
**********************************************************************
*/
/*********************************************************************
*
* _XXHASH_Init()
*
* Function description
* Initialize xxHash context.
*
* Parameters
* pSelf - Pointer to xxHash context.
* Seed - Value of seed to use.
*/
static void _XXHASH_Init(XXHASH_CONTEXT *pSelf, U32 Seed) {
pSelf->aAcc[0] = Seed + PRIME32_1 + PRIME32_2;
pSelf->aAcc[1] = Seed + PRIME32_2;
pSelf->aAcc[2] = Seed + 0;
pSelf->aAcc[3] = Seed - PRIME32_1;
pSelf->DataLen = 0;
pSelf->TotalLen = 0;
}
/*********************************************************************
*
* _XXHASH_AddByte()
*
* Function description
* Add octet to xxHash.
*
* Parameters
* pSelf - Pointer to xxHash context.
* Data - Octet to add.
*/
static void _XXHASH_AddByte(XXHASH_CONTEXT *pSelf, U8 Data) {
unsigned i;
//
pSelf->aData[pSelf->DataLen++] = Data;
if (pSelf->DataLen == 16) {
for (i = 0; i < 4; ++i) {
pSelf->aAcc[i] = pSelf->aAcc[i] + SEGGER_RdU32LE(&pSelf->aData[4*i]) * PRIME32_2;
pSelf->aAcc[i] = ROL(pSelf->aAcc[i], 13);
pSelf->aAcc[i] = pSelf->aAcc[i] * PRIME32_1;
}
pSelf->DataLen = 0;
}
pSelf->TotalLen += 1;
}
/*********************************************************************
*
* _XXHASH_Add()
*
* Function description
* Add octet string to xxHash.
*
* Parameters
* pSelf - Pointer to xxHash context.
* pData - Pointer to octet string.
* DataLen - Octet length of the octet string.
*/
static void _XXHASH_Add(XXHASH_CONTEXT *pSelf, U8 *pData, unsigned DataLen) {
while (DataLen > 0) {
_XXHASH_AddByte(pSelf, *pData);
pData += 1;
DataLen -= 1;
}
}
/*********************************************************************
*
* _XXHASH_Get()
*
* Function description
* Finalize and return xxHash value.
*
* Parameters
* pSelf - Pointer to xxHash context.
*
* Return value
* Computed xxHash value over input data.
*/
static U32 _XXHASH_Get(XXHASH_CONTEXT *pSelf) {
U32 Acc;
U8 * pData;
unsigned DataLen;
//
if (pSelf->TotalLen < 16) {
Acc = pSelf->aAcc[2] + PRIME32_5;
} else {
Acc = ROL(pSelf->aAcc[0], 1) +
ROL(pSelf->aAcc[1], 7) +
ROL(pSelf->aAcc[2], 12) +
ROL(pSelf->aAcc[3], 18);
}
Acc += pSelf->TotalLen;
//
pData = pSelf->aData;
DataLen = pSelf->DataLen;
while (DataLen >= 4) {
Acc += SEGGER_RdU32LE(pData) * PRIME32_3;
Acc = ROL(Acc, 17) * PRIME32_4;
pData += 4;
DataLen -= 4;
}
//
while (DataLen > 0) {
Acc += *pData * PRIME32_5;
Acc = ROL(Acc, 11) * PRIME32_1;
pData += 1;
DataLen -= 1;
}
//
Acc ^= Acc >> 15;
Acc *= PRIME32_2;
Acc ^= Acc >> 13;
Acc *= PRIME32_3;
Acc ^= Acc >> 16;
//
return Acc;
}
/*********************************************************************
*
* _PrintSignOn()
*
* Function description
* Displays the application's help information on stderr.
*/
static void _PrintSignOn(void) {
printf("\n");
printf("emCompress-Flex LZ4 Checker V%s ", CX_GetVersionText());
printf("compiled " __DATE__ " " __TIME__ "\n");
printf("%s www.segger.com\n\n", CX_GetCopyrightText());
}
/*********************************************************************
*
* _Decompress()
*
* Function description
* Decompress LZ4 blocks.
*
* Parameters
* pFile - Pointer to input file.
* pInfo - Pointer to member information.
*/
static void _Decompress(CX_DECODE_CONTEXT *pContext, FILE *pFile, FRAME_INFO *pInfo) {
CX_STREAM Stream;
int Status;
U8 ByteIn;
U8 ByteOut;
unsigned AvailIn;
//
Stream.AvailIn = 0;
Stream.AvailOut = 0;
//
AvailIn = pInfo->BlockSize;
//
for (;;) {
if (Stream.AvailIn == 0) {
if (AvailIn != 0) {
fread(&ByteIn, 1, 1, pFile);
Stream.pIn = &ByteIn;
Stream.AvailIn = 1;
--AvailIn;
}
}
Stream.pOut = &ByteOut;
Stream.AvailOut = 1;
//
if (AvailIn == 0) {
Status = CX_DECODE_Process(pContext, &Stream, pInfo->FLG & FLG_BINDEP ? CX_FLUSH_ALL : CX_FLUSH_SYNC);
} else {
Status = CX_DECODE_Process(pContext, &Stream, CX_FLUSH_NONE);
}
if (Status < 0) {
printf("%s\n", CX_GetErrorText(Status));
exit(100);
}
if (Stream.AvailOut == 0) {
pInfo->CalcCSize++;
_XXHASH_AddByte(&pInfo->Cksum, ByteOut);
}
if (Status == 1) {
break;
}
}
}
/*********************************************************************
*
* _DecodeFrame()
*
* Function description
* Decode the content of an LZ4 frame, ensures compressed data
* can be decompressed.
*
* Parameters
* pFile - Pointer to input file.
*
* Return value
* Application exit status, zero when no error.
*/
static int _DecodeFrame(FILE *pFile) {
CX_DECODE_CONTEXT Context;
CX_PARAS Paras;
FRAME_INFO Info;
XXHASH_CONTEXT Cksum;
U32 CRC;
char aStatus[128];
//
printf("Size CRC\n");
printf("Computed Stored Computed Stored Status\n");
printf("-------- -------- -------- -------- -----------------------\n");
//
fread(Info.aMagic, 1, sizeof(Info.aMagic), pFile);
if (SEGGER_RdU32LE(Info.aMagic) != 0x184D2204) {
printf("Incorrect ID bytes for an LZ4 file\n");
exit(100);
}
//
_XXHASH_Init(&Info.Cksum, 0);
_XXHASH_Init(&Cksum, 0);
fread(&Info.FLG, 1, 1, pFile);
fread(&Info.BD, 1, 1, pFile);
if (feof(pFile) || ferror(pFile)) {
perror("Can't read input file");
exit(100);
}
_XXHASH_Add(&Cksum, &Info.FLG, 1);
_XXHASH_Add(&Cksum, &Info.BD, 1);
//
if ((Info.FLG & FLG_VERSION) != 0x40) {
printf("LZ4 version not supported by this utility\n");
exit(100);
}
//
if (Info.FLG & FLG_CSIZE) {
fread(Info.aCSize, 1, 4, pFile);
_XXHASH_Add(&Cksum, Info.aCSize, 8);
}
if (Info.FLG & FLG_DICTID) {
fread(Info.aDictID, 1, 4, pFile);
_XXHASH_Add(&Cksum, Info.aDictID, 4);
}
fread(&Info.HC, 1, 1, pFile);
if (Info.HC != ((_XXHASH_Get(&Cksum) >> 8) & 0xFF)) {
printf("Header checksum error\n");
exit(100);
}
//
Paras.WindowSize = 65536;
Paras.MinLen = 3;
Paras.MaxLen = 258;
Paras.BlockLen = 0;
CX_DECODE_Init(&Context, &CX_LZ4_Decode, &_StaticAllocator, &Paras);
//
Info.CalcCSize = 0;
//
for (;;) {
//
// Process a block.
//
fread(Info.aBlockSize, 1, 4, pFile);
Info.BlockSize = SEGGER_RdU32LE(Info.aBlockSize);
if (Info.BlockSize == 0) {
break;
}
_Decompress(&Context, pFile, &Info);
}
//
aStatus[0] = 0;
//
if (Info.FLG & FLG_CSIZE) {
printf("%8llu %8llu ", Info.CalcCSize, SEGGER_RdU64LE(Info.aCSize));
if (Info.CalcCSize != SEGGER_RdU64LE(Info.aCSize)) {
strcat(aStatus, ", content size mismatch");
}
} else {
printf("%8llu %8s ", Info.CalcCSize, "-");
}
//
CX_DECODE_Exit(&Context);
//
CRC = _XXHASH_Get(&Info.Cksum);
if (Info.FLG & FLG_CCHK) {
fread(Info.aCRC, 1, 4, pFile);
printf("%08X %08X ", CRC, SEGGER_RdU32LE(Info.aCRC));
if (CRC != SEGGER_RdU32LE(Info.aCRC)) {
strcat(aStatus, ", CRC mismatch");
}
} else {
printf("%08X %8s ", CRC, "-");
}
if (fgetc(pFile) != EOF) {
strcat(aStatus, ", junk at end of LZ4 file");
}
//
if (aStatus[0] != 0) {
printf("FAIL: %s\n", &aStatus[2]);
return 100;
} else {
printf("OK (%s)\n", Info.FLG & FLG_BINDEP ? "Block independent" : "Block dependent");
return 0;
}
}
/*********************************************************************
*
* Public code
*
**********************************************************************
*/
/*********************************************************************
*
* main()
*
* Function description
* Application entry point.
*
* Parameters
* argc - Argument count.
* argv - Argument vector.
*
* Return value
* Exit status.
*/
int main(int argc, char **argv) {
FILE * pFile;
int Status;
//
SEGGER_MEM_SYSTEM_HEAP_Init(&_StaticAllocator);
//
_PrintSignOn();
if (argc != 2) {
printf("Syntax: %s <filename>\n", argv[0]);
exit(100);
}
//
pFile = fopen(argv[1], "rb");
if (pFile == NULL) {
printf("%s: can't open '%s' for reading\n", argv[0], argv[1]);
exit(100);
}
//
Status = _DecodeFrame(pFile);
fclose(pFile);
exit(Status);
}
/*************************** End of file ****************************/