HOWTO decompress a gzip archive

From SEGGER Wiki
Jump to: navigation, search

emCompress-Flex provides the capability to decompress streams that are compressed in various formats, but it does not provide canned container support.

Recognizing that customers may possibly want to deal with encapsulated streams, this code can be used with emCompress-Flex to check the integrity of a a gzip archive that contains files compressed with the DEFLATE algorithm:

/*********************************************************************
*                   (c) SEGGER Microcontroller GmbH                  *
*                        The Embedded Experts                        *
*                           www.segger.com                           *
**********************************************************************

-------------------------- END-OF-HEADER -----------------------------

File        : CX_GZip.c
Purpose     : List the content of a gzip-compressed file.

*/

/*********************************************************************
*
*       #include Section
*
**********************************************************************
*/

#include "CX_Int.h"
#include "SEGGER_MEM.h"
#include "SEGGER_UTIL.h"
#include "SEGGER_CRC.h"
#include <stdio.h>
#include <stdlib.h>

/*********************************************************************
*
*       Defines, fixed
*
**********************************************************************
*/

#define  FTEXT      0x01
#define  FHCRC      0x02
#define  FEXTRA     0x04
#define  FNAME      0x08
#define  FCOMMENT   0x10

/*********************************************************************
*
*       Local types
*
**********************************************************************
*/

typedef struct {
  U32 ISize;
  U32 CalcISize;
  U32 CRC;
  U32 CalcCRC;
} MEMBER_INFO;

/*********************************************************************
*
*       Static data
*
**********************************************************************
*/

static SEGGER_MEM_CONTEXT _StaticAllocator;

/*********************************************************************
*
*       Static code
*
**********************************************************************
*/

/*********************************************************************
*
*       _PrintSignOn()
*
*  Function description
*    Displays the application's help information on stderr.
*/
static void _PrintSignOn(void) {
  printf("\n");
  printf("emCompress-Flex GZip Checker V%s ", CX_GetVersionText());
  printf("compiled " __DATE__ " " __TIME__ "\n");
  printf("%s    www.segger.com\n\n", CX_GetCopyrightText());
}

/*********************************************************************
*
*       _DecompressBlocks()
*
*  Function description
*    Decompress DEFLATE blocks.
*
*  Parameters
*    pFile - Pointer to input file.
*    pInfo - Pointer to member information.
*/
static void _DecompressBlocks(FILE *pFile, MEMBER_INFO *pInfo) {
  CX_DECODE_CONTEXT C;
  CX_PARAS          Paras;
  CX_STREAM         Stream;
  int               Status;
  U8                ByteIn;
  U8                ByteOut;
  int               Overread;
  //
  Paras.WindowSize = 32768;
  Paras.MinLen     = 3;
  Paras.MaxLen     = 258;
  Paras.BlockLen   = 0;
  //
  pInfo->ISize     = 0;
  pInfo->CalcISize = 0;
  pInfo->CRC       = 0;
  pInfo->CalcCRC   = 0xFFFFFFFF;
  //
  Stream.AvailIn  = 0;
  Stream.AvailOut = 0;
  CX_DECODE_Init(&C, &CX_DEFLATE_Decode, &_StaticAllocator, &Paras);
  for (;;) {
    if (Stream.AvailIn == 0) {
      if (fread(&ByteIn, 1, 1, pFile) > 0) {
        Stream.pIn     = &ByteIn;
        Stream.AvailIn = 1;
      }
    }
    Stream.pOut     = &ByteOut;
    Stream.AvailOut = 1;
    //
    Status = CX_DECODE_Process(&C, &Stream, feof(pFile) ? CX_FLUSH_END : CX_FLUSH_NONE);
    if (Status < 0) {
      printf("%s\n", CX_GetErrorText(Status));
      exit(100);
    }
    if (Stream.AvailOut == 0) {
      pInfo->CalcISize++;
      pInfo->CalcCRC = SEGGER_CRC_Calc_EDB88320(&ByteOut, 1, pInfo->CalcCRC);
    }
    if (Status == 1) {
      break;
    }
  }
  //
  pInfo->CalcCRC = ~pInfo->CalcCRC;
  //
  Overread = CX_DECODE_GetBufferLen(&C) + Stream.AvailIn;
  fseek(pFile, -Overread, SEEK_CUR);
  //
  CX_DECODE_Exit(&C);
}

/*********************************************************************
*
*       _PrintListing()
*
*  Function description
*    Displays the content of a file, ensures compressed data
*    can be decompressed.
*
*  Parameters
*    pFile - Pointer to input file.
*
*  Syntax
*    From [RFC 1952], each member has the following structure:
*
*    +---+---+---+---+---+---+---+---+---+---+
*    |ID1|ID2|CM |FLG|     MTIME     |XFL|OS | (more-->)
*    +---+---+---+---+---+---+---+---+---+---+
*
*    (if FLG.FEXTRA set)
*
*    +---+---+=================================+
*    | XLEN  |...XLEN bytes of "extra field"...| (more-->)
*    +---+---+=================================+
*
*    (if FLG.FNAME set)
*
*    +=========================================+
*    |...original file name, zero-terminated...| (more-->)
*    +=========================================+
*
*    (if FLG.FCOMMENT set)
*
*    +===================================+
*    |...file comment, zero-terminated...| (more-->)
*    +===================================+
*
*    (if FLG.FHCRC set)
*
*    +---+---+
*    | CRC16 |
*    +---+---+
*
*    +=======================+
*    |...compressed blocks...| (more-->)
*    +=======================+
*
*    0   1   2   3   4   5   6   7
*    +---+---+---+---+---+---+---+---+
*    |     CRC32     |     ISIZE     |
*    +---+---+---+---+---+---+---+---+
*/
static void _PrintListing(FILE *pFile) {
  MEMBER_INFO Info;
  U8          aHeader[10];
  U8          aCRC[4];
  U8          aISize[4];
  char        aName[256];
  char        c;
  unsigned    i;
  //
  printf("                     Size               CRC\n");
  printf("File name            Computed   Stored  Computed   Stored  Status\n");
  printf("-------------------  -------- --------  -------- --------  -----------\n");
  //         
  for (;;) {
    //
    fread(aHeader, 1, sizeof(aHeader), pFile);
    if (feof(pFile)) {
      return;
    }
    if (ferror(pFile)) {
      perror("Can't read input file");
      exit(100);
    }
    //
    if (aHeader[0] != 0x1F || aHeader[1]!= 0x8B) {
      printf("Incorrect ID bytes for a gzip file\n");
      exit(100);
    }
    if (aHeader[2] != 0x08) {
      printf("Compression method is not DEFLATE\n");
      exit(100);
    }
    //
    if (aHeader[3] & FEXTRA) {
      fseek(pFile, aHeader[8], SEEK_CUR);
    }
    if (aHeader[3] & FNAME) {
      i = 0;
      do {
        fread(&c, 1, 1, pFile);
        if (i < sizeof(aName)) {
          aName[i++] = c;
        }
      } while (c != 0);
      aName[sizeof(aName)-1] = '\0';
    } else {
      strcpy(aName, "<stdin>");
    }
    if (aHeader[3] & FCOMMENT) {
      do {
        fread(&c, 1, 1, pFile);
      } while (c != 0);
      aName[sizeof(aName)-1] = '\0';
    }
    if (aHeader[3] & FHCRC) {
      fread(&c, 1, 1, pFile);
      fread(&c, 1, 1, pFile);
    }
    //
    _DecompressBlocks(pFile, &Info);
    //
    fread(aCRC,   1, 4, pFile);
    fread(aISize, 1, 4, pFile);
    Info.CRC   = SEGGER_RdU32LE(aCRC);
    Info.ISize = SEGGER_RdU32LE(aISize);
    //
    printf("%-20s %8u %8u  %08X %08X  ",
           aName,
           Info.ISize,
           Info.CalcISize,
           Info.CRC,
           Info.CalcCRC);
    if (Info.ISize == Info.CalcISize && Info.CRC == Info.CalcCRC) {
      printf("OK\n");
    } else {
      printf("Mismatch\n");
      exit(100);
    }
  }
}

/*********************************************************************
*
*       Public code
*
**********************************************************************
*/

/*********************************************************************
*
*       main()
*
*  Function description
*    Application entry point.
*
*  Parameters
*    argc - Argument count.
*    argv - Argument vector.
*
*  Return value
*    Exit status.
*/
int main(int argc, char **argv) {
  FILE *pFile;
  //
  SEGGER_MEM_SYSTEM_HEAP_Init(&_StaticAllocator);
  //
  _PrintSignOn();
  if (argc != 2) {
    printf("Syntax: %s <filename>\n", argv[0]);
    exit(100);
  }
  //
  pFile = fopen(argv[1], "rb");
  if (pFile == NULL) {
    printf("%s: can't open '%s' for reading\n", argv[0], argv[1]);
    exit(100);
  }
  //
  _PrintListing(pFile);
  fclose(pFile);
  exit(0);
}

/*************************** End of file ****************************/