Difference between revisions of "HOWTO decompress an LZ4 archive"
m |
|||
Line 1: | Line 1: | ||
− | emCompress- |
+ | emCompress-Pro provides the capability to decompress streams that are compressed in various formats, but it does not provide out-of-the-box container support. |
− | Recognizing that customers may possibly want to deal with encapsulated streams, this code can be used with emCompress- |
+ | Recognizing that customers may possibly want to deal with encapsulated streams, this code can be used with emCompress-Pro to check the integrity of a an LZ4 file containing an LZ4 frame (containing multiple LZ4 blocks): |
<syntaxhighlight lang="c"> |
<syntaxhighlight lang="c"> |
Latest revision as of 11:06, 17 May 2023
emCompress-Pro provides the capability to decompress streams that are compressed in various formats, but it does not provide out-of-the-box container support.
Recognizing that customers may possibly want to deal with encapsulated streams, this code can be used with emCompress-Pro to check the integrity of a an LZ4 file containing an LZ4 frame (containing multiple LZ4 blocks):
/*********************************************************************
* (c) SEGGER Microcontroller GmbH *
* The Embedded Experts *
* www.segger.com *
**********************************************************************
-------------------------- END-OF-HEADER -----------------------------
File : CX_LZ4.c
Purpose : List the content of a LZ4-compressed file.
References : LZ4 frame format - https://github.com/lz4/lz4/blob/master/doc/lz4_Frame_format.md
LZ4 block format - https://github.com/lz4/lz4/blob/master/doc/lz4_Block_format.md
xxHash Algorithm - https://github.com/Cyan4973/xxHash/blob/dev/doc/xxhash_spec.md
*/
/*********************************************************************
*
* #include Section
*
**********************************************************************
*/
#include "CX_Int.h"
#include "SEGGER_MEM.h"
#include "SEGGER_UTIL.h"
#include <stdio.h>
#include <stdlib.h>
/*********************************************************************
*
* Defines, fixed
*
**********************************************************************
*/
//
// Flag byte
//
#define FLG_VERSION 0xC0
#define FLG_BINDEP 0x20
#define FLG_BCHK 0x10
#define FLG_CSIZE 0x08
#define FLG_CCHK 0x04
#define FLG_DICTID 0x01
//
// Block descriptor
//
#define BD_BSIZE 0x70
//
// xxHash
//
#define PRIME32_1 2654435761u
#define PRIME32_2 2246822519u
#define PRIME32_3 3266489917u
#define PRIME32_4 668265263u
#define PRIME32_5 374761393u
//
// 32-bit rotate left
//
#define ROL(X, N) (((X) << N) | ((X) >> (32-N)))
/*********************************************************************
*
* Local types
*
**********************************************************************
*/
typedef struct {
U32 aAcc[4];
U8 aData[16];
unsigned DataLen;
unsigned TotalLen;
} XXHASH_CONTEXT;
typedef struct {
U8 FLG;
U8 BD;
U8 HC;
U8 aMagic [4];
U8 aCSize [8];
U8 aCRC [8];
U8 aDictID [4];
U8 aBlockSize [4];
XXHASH_CONTEXT Cksum;
U32 BlockSize;
U64 CalcCSize;
} FRAME_INFO;
/*********************************************************************
*
* Static data
*
**********************************************************************
*/
static SEGGER_MEM_CONTEXT _StaticAllocator;
/*********************************************************************
*
* Static code
*
**********************************************************************
*/
/*********************************************************************
*
* _XXHASH_Init()
*
* Function description
* Initialize xxHash context.
*
* Parameters
* pSelf - Pointer to xxHash context.
* Seed - Value of seed to use.
*/
static void _XXHASH_Init(XXHASH_CONTEXT *pSelf, U32 Seed) {
pSelf->aAcc[0] = Seed + PRIME32_1 + PRIME32_2;
pSelf->aAcc[1] = Seed + PRIME32_2;
pSelf->aAcc[2] = Seed + 0;
pSelf->aAcc[3] = Seed - PRIME32_1;
pSelf->DataLen = 0;
pSelf->TotalLen = 0;
}
/*********************************************************************
*
* _XXHASH_AddByte()
*
* Function description
* Add octet to xxHash.
*
* Parameters
* pSelf - Pointer to xxHash context.
* Data - Octet to add.
*/
static void _XXHASH_AddByte(XXHASH_CONTEXT *pSelf, U8 Data) {
unsigned i;
//
pSelf->aData[pSelf->DataLen++] = Data;
if (pSelf->DataLen == 16) {
for (i = 0; i < 4; ++i) {
pSelf->aAcc[i] = pSelf->aAcc[i] + SEGGER_RdU32LE(&pSelf->aData[4*i]) * PRIME32_2;
pSelf->aAcc[i] = ROL(pSelf->aAcc[i], 13);
pSelf->aAcc[i] = pSelf->aAcc[i] * PRIME32_1;
}
pSelf->DataLen = 0;
}
pSelf->TotalLen += 1;
}
/*********************************************************************
*
* _XXHASH_Add()
*
* Function description
* Add octet string to xxHash.
*
* Parameters
* pSelf - Pointer to xxHash context.
* pData - Pointer to octet string.
* DataLen - Octet length of the octet string.
*/
static void _XXHASH_Add(XXHASH_CONTEXT *pSelf, U8 *pData, unsigned DataLen) {
while (DataLen > 0) {
_XXHASH_AddByte(pSelf, *pData);
pData += 1;
DataLen -= 1;
}
}
/*********************************************************************
*
* _XXHASH_Get()
*
* Function description
* Finalize and return xxHash value.
*
* Parameters
* pSelf - Pointer to xxHash context.
*
* Return value
* Computed xxHash value over input data.
*/
static U32 _XXHASH_Get(XXHASH_CONTEXT *pSelf) {
U32 Acc;
U8 * pData;
unsigned DataLen;
//
if (pSelf->TotalLen < 16) {
Acc = pSelf->aAcc[2] + PRIME32_5;
} else {
Acc = ROL(pSelf->aAcc[0], 1) +
ROL(pSelf->aAcc[1], 7) +
ROL(pSelf->aAcc[2], 12) +
ROL(pSelf->aAcc[3], 18);
}
Acc += pSelf->TotalLen;
//
pData = pSelf->aData;
DataLen = pSelf->DataLen;
while (DataLen >= 4) {
Acc += SEGGER_RdU32LE(pData) * PRIME32_3;
Acc = ROL(Acc, 17) * PRIME32_4;
pData += 4;
DataLen -= 4;
}
//
while (DataLen > 0) {
Acc += *pData * PRIME32_5;
Acc = ROL(Acc, 11) * PRIME32_1;
pData += 1;
DataLen -= 1;
}
//
Acc ^= Acc >> 15;
Acc *= PRIME32_2;
Acc ^= Acc >> 13;
Acc *= PRIME32_3;
Acc ^= Acc >> 16;
//
return Acc;
}
/*********************************************************************
*
* _PrintSignOn()
*
* Function description
* Displays the application's help information on stderr.
*/
static void _PrintSignOn(void) {
printf("\n");
printf("emCompress-Flex LZ4 Checker V%s ", CX_GetVersionText());
printf("compiled " __DATE__ " " __TIME__ "\n");
printf("%s www.segger.com\n\n", CX_GetCopyrightText());
}
/*********************************************************************
*
* _Decompress()
*
* Function description
* Decompress LZ4 blocks.
*
* Parameters
* pFile - Pointer to input file.
* pInfo - Pointer to member information.
*/
static void _Decompress(CX_DECODE_CONTEXT *pContext, FILE *pFile, FRAME_INFO *pInfo) {
CX_STREAM Stream;
int Status;
U8 ByteIn;
U8 ByteOut;
unsigned AvailIn;
//
Stream.AvailIn = 0;
Stream.AvailOut = 0;
//
AvailIn = pInfo->BlockSize;
//
for (;;) {
if (Stream.AvailIn == 0) {
if (AvailIn != 0) {
fread(&ByteIn, 1, 1, pFile);
Stream.pIn = &ByteIn;
Stream.AvailIn = 1;
--AvailIn;
}
}
Stream.pOut = &ByteOut;
Stream.AvailOut = 1;
//
if (AvailIn == 0) {
Status = CX_DECODE_Process(pContext, &Stream, pInfo->FLG & FLG_BINDEP ? CX_FLUSH_ALL : CX_FLUSH_SYNC);
} else {
Status = CX_DECODE_Process(pContext, &Stream, CX_FLUSH_NONE);
}
if (Status < 0) {
printf("%s\n", CX_GetErrorText(Status));
exit(100);
}
if (Stream.AvailOut == 0) {
pInfo->CalcCSize++;
_XXHASH_AddByte(&pInfo->Cksum, ByteOut);
}
if (Status == 1) {
break;
}
}
}
/*********************************************************************
*
* _DecodeFrame()
*
* Function description
* Decode the content of an LZ4 frame, ensures compressed data
* can be decompressed.
*
* Parameters
* pFile - Pointer to input file.
*
* Return value
* Application exit status, zero when no error.
*/
static int _DecodeFrame(FILE *pFile) {
CX_DECODE_CONTEXT Context;
CX_PARAS Paras;
FRAME_INFO Info;
XXHASH_CONTEXT Cksum;
U32 CRC;
char aStatus[128];
//
printf("Size CRC\n");
printf("Computed Stored Computed Stored Status\n");
printf("-------- -------- -------- -------- -----------------------\n");
//
fread(Info.aMagic, 1, sizeof(Info.aMagic), pFile);
if (SEGGER_RdU32LE(Info.aMagic) != 0x184D2204) {
printf("Incorrect ID bytes for an LZ4 file\n");
exit(100);
}
//
_XXHASH_Init(&Info.Cksum, 0);
_XXHASH_Init(&Cksum, 0);
fread(&Info.FLG, 1, 1, pFile);
fread(&Info.BD, 1, 1, pFile);
if (feof(pFile) || ferror(pFile)) {
perror("Can't read input file");
exit(100);
}
_XXHASH_Add(&Cksum, &Info.FLG, 1);
_XXHASH_Add(&Cksum, &Info.BD, 1);
//
if ((Info.FLG & FLG_VERSION) != 0x40) {
printf("LZ4 version not supported by this utility\n");
exit(100);
}
//
if (Info.FLG & FLG_CSIZE) {
fread(Info.aCSize, 1, 4, pFile);
_XXHASH_Add(&Cksum, Info.aCSize, 8);
}
if (Info.FLG & FLG_DICTID) {
fread(Info.aDictID, 1, 4, pFile);
_XXHASH_Add(&Cksum, Info.aDictID, 4);
}
fread(&Info.HC, 1, 1, pFile);
if (Info.HC != ((_XXHASH_Get(&Cksum) >> 8) & 0xFF)) {
printf("Header checksum error\n");
exit(100);
}
//
Paras.WindowSize = 65536;
Paras.MinLen = 3;
Paras.MaxLen = 258;
Paras.BlockLen = 0;
CX_DECODE_Init(&Context, &CX_LZ4_Decode, &_StaticAllocator, &Paras);
//
Info.CalcCSize = 0;
//
for (;;) {
//
// Process a block.
//
fread(Info.aBlockSize, 1, 4, pFile);
Info.BlockSize = SEGGER_RdU32LE(Info.aBlockSize);
if (Info.BlockSize == 0) {
break;
}
_Decompress(&Context, pFile, &Info);
}
//
aStatus[0] = 0;
//
if (Info.FLG & FLG_CSIZE) {
printf("%8llu %8llu ", Info.CalcCSize, SEGGER_RdU64LE(Info.aCSize));
if (Info.CalcCSize != SEGGER_RdU64LE(Info.aCSize)) {
strcat(aStatus, ", content size mismatch");
}
} else {
printf("%8llu %8s ", Info.CalcCSize, "-");
}
//
CX_DECODE_Exit(&Context);
//
CRC = _XXHASH_Get(&Info.Cksum);
if (Info.FLG & FLG_CCHK) {
fread(Info.aCRC, 1, 4, pFile);
printf("%08X %08X ", CRC, SEGGER_RdU32LE(Info.aCRC));
if (CRC != SEGGER_RdU32LE(Info.aCRC)) {
strcat(aStatus, ", CRC mismatch");
}
} else {
printf("%08X %8s ", CRC, "-");
}
if (fgetc(pFile) != EOF) {
strcat(aStatus, ", junk at end of LZ4 file");
}
//
if (aStatus[0] != 0) {
printf("FAIL: %s\n", &aStatus[2]);
return 100;
} else {
printf("OK (%s)\n", Info.FLG & FLG_BINDEP ? "Block independent" : "Block dependent");
return 0;
}
}
/*********************************************************************
*
* Public code
*
**********************************************************************
*/
/*********************************************************************
*
* main()
*
* Function description
* Application entry point.
*
* Parameters
* argc - Argument count.
* argv - Argument vector.
*
* Return value
* Exit status.
*/
int main(int argc, char **argv) {
FILE * pFile;
int Status;
//
SEGGER_MEM_SYSTEM_HEAP_Init(&_StaticAllocator);
//
_PrintSignOn();
if (argc != 2) {
printf("Syntax: %s <filename>\n", argv[0]);
exit(100);
}
//
pFile = fopen(argv[1], "rb");
if (pFile == NULL) {
printf("%s: can't open '%s' for reading\n", argv[0], argv[1]);
exit(100);
}
//
Status = _DecodeFrame(pFile);
fclose(pFile);
exit(Status);
}
/*************************** End of file ****************************/