HOWTO decompress a gzip archive

From SEGGER Wiki
Jump to: navigation, search

emCompress-Pro provides the capability to decompress streams that are compressed in various formats, but it does not provide out-of-the-box container support.

Recognizing that customers may possibly want to deal with encapsulated streams, this code can be used with emCompress-Pro to check the integrity of a a gzip archive that contains files compressed with the DEFLATE algorithm:

/*********************************************************************
*                   (c) SEGGER Microcontroller GmbH                  *
*                        The Embedded Experts                        *
*                           www.segger.com                           *
**********************************************************************

-------------------------- END-OF-HEADER -----------------------------

File        : CX_GZip.c
Purpose     : List the content of a gzip-compressed file.

*/

/*********************************************************************
*
*       #include Section
*
**********************************************************************
*/

#include "CX_Int.h"
#include "SEGGER_MEM.h"
#include "SEGGER_UTIL.h"
#include "SEGGER_CRC.h"
#include <stdio.h>
#include <stdlib.h>

/*********************************************************************
*
*       Defines, fixed
*
**********************************************************************
*/

#define  FTEXT      0x01
#define  FHCRC      0x02
#define  FEXTRA     0x04
#define  FNAME      0x08
#define  FCOMMENT   0x10

/*********************************************************************
*
*       Local types
*
**********************************************************************
*/

typedef struct {
  U32 ISize;
  U32 CalcISize;
  U32 CRC;
  U32 CalcCRC;
} MEMBER_INFO;

/*********************************************************************
*
*       Static data
*
**********************************************************************
*/

static SEGGER_MEM_CONTEXT _StaticAllocator;

/*********************************************************************
*
*       Static code
*
**********************************************************************
*/

/*********************************************************************
*
*       _PrintSignOn()
*
*  Function description
*    Displays the application's help information on stderr.
*/
static void _PrintSignOn(void) {
  printf("\n");
  printf("emCompress-Flex GZip Checker V%s ", CX_GetVersionText());
  printf("compiled " __DATE__ " " __TIME__ "\n");
  printf("%s    www.segger.com\n\n", CX_GetCopyrightText());
}

/*********************************************************************
*
*       _DecompressBlocks()
*
*  Function description
*    Decompress DEFLATE blocks.
*
*  Parameters
*    pFile - Pointer to input file.
*    pInfo - Pointer to member information.
*/
static void _DecompressBlocks(FILE *pFile, MEMBER_INFO *pInfo) {
  CX_DECODE_CONTEXT C;
  CX_PARAS          Paras;
  CX_STREAM         Stream;
  int               Status;
  U8                ByteIn;
  U8                ByteOut;
  int               Overread;
  //
  Paras.WindowSize = 32768;
  Paras.MinLen     = 3;
  Paras.MaxLen     = 258;
  Paras.BlockLen   = 0;
  //
  pInfo->ISize     = 0;
  pInfo->CalcISize = 0;
  pInfo->CRC       = 0;
  pInfo->CalcCRC   = 0xFFFFFFFF;
  //
  Stream.AvailIn  = 0;
  Stream.AvailOut = 0;
  CX_DECODE_Init(&C, &CX_DEFLATE_Decode, &_StaticAllocator, &Paras);
  for (;;) {
    if (Stream.AvailIn == 0) {
      if (fread(&ByteIn, 1, 1, pFile) > 0) {
        Stream.pIn     = &ByteIn;
        Stream.AvailIn = 1;
      }
    }
    Stream.pOut     = &ByteOut;
    Stream.AvailOut = 1;
    //
    Status = CX_DECODE_Process(&C, &Stream, feof(pFile) ? CX_FLUSH_END : CX_FLUSH_NONE);
    if (Status < 0) {
      printf("%s\n", CX_GetErrorText(Status));
      exit(100);
    }
    if (Stream.AvailOut == 0) {
      pInfo->CalcISize++;
      pInfo->CalcCRC = SEGGER_CRC_Calc_EDB88320(&ByteOut, 1, pInfo->CalcCRC);
    }
    if (Status == 1) {
      break;
    }
  }
  //
  pInfo->CalcCRC = ~pInfo->CalcCRC;
  //
  Overread = CX_DECODE_GetBufferLen(&C) + Stream.AvailIn;
  fseek(pFile, -Overread, SEEK_CUR);
  //
  CX_DECODE_Exit(&C);
}

/*********************************************************************
*
*       _PrintListing()
*
*  Function description
*    Displays the content of a file, ensures compressed data
*    can be decompressed.
*
*  Parameters
*    pFile - Pointer to input file.
*
*  Syntax
*    From [RFC 1952], each member has the following structure:
*
*    +---+---+---+---+---+---+---+---+---+---+
*    |ID1|ID2|CM |FLG|     MTIME     |XFL|OS | (more-->)
*    +---+---+---+---+---+---+---+---+---+---+
*
*    (if FLG.FEXTRA set)
*
*    +---+---+=================================+
*    | XLEN  |...XLEN bytes of "extra field"...| (more-->)
*    +---+---+=================================+
*
*    (if FLG.FNAME set)
*
*    +=========================================+
*    |...original file name, zero-terminated...| (more-->)
*    +=========================================+
*
*    (if FLG.FCOMMENT set)
*
*    +===================================+
*    |...file comment, zero-terminated...| (more-->)
*    +===================================+
*
*    (if FLG.FHCRC set)
*
*    +---+---+
*    | CRC16 |
*    +---+---+
*
*    +=======================+
*    |...compressed blocks...| (more-->)
*    +=======================+
*
*    0   1   2   3   4   5   6   7
*    +---+---+---+---+---+---+---+---+
*    |     CRC32     |     ISIZE     |
*    +---+---+---+---+---+---+---+---+
*/
static void _PrintListing(FILE *pFile) {
  MEMBER_INFO Info;
  U8          aHeader[10];
  U8          aCRC[4];
  U8          aISize[4];
  char        aName[256];
  char        c;
  unsigned    i;
  //
  printf("                     Size               CRC\n");
  printf("File name            Computed   Stored  Computed   Stored  Status\n");
  printf("-------------------  -------- --------  -------- --------  -----------\n");
  //         
  for (;;) {
    //
    fread(aHeader, 1, sizeof(aHeader), pFile);
    if (feof(pFile)) {
      return;
    }
    if (ferror(pFile)) {
      perror("Can't read input file");
      exit(100);
    }
    //
    if (aHeader[0] != 0x1F || aHeader[1]!= 0x8B) {
      printf("Incorrect ID bytes for a gzip file\n");
      exit(100);
    }
    if (aHeader[2] != 0x08) {
      printf("Compression method is not DEFLATE\n");
      exit(100);
    }
    //
    if (aHeader[3] & FEXTRA) {
      fseek(pFile, aHeader[8], SEEK_CUR);
    }
    if (aHeader[3] & FNAME) {
      i = 0;
      do {
        fread(&c, 1, 1, pFile);
        if (i < sizeof(aName)) {
          aName[i++] = c;
        }
      } while (c != 0);
      aName[sizeof(aName)-1] = '\0';
    } else {
      strcpy(aName, "<stdin>");
    }
    if (aHeader[3] & FCOMMENT) {
      do {
        fread(&c, 1, 1, pFile);
      } while (c != 0);
      aName[sizeof(aName)-1] = '\0';
    }
    if (aHeader[3] & FHCRC) {
      fread(&c, 1, 1, pFile);
      fread(&c, 1, 1, pFile);
    }
    //
    _DecompressBlocks(pFile, &Info);
    //
    fread(aCRC,   1, 4, pFile);
    fread(aISize, 1, 4, pFile);
    Info.CRC   = SEGGER_RdU32LE(aCRC);
    Info.ISize = SEGGER_RdU32LE(aISize);
    //
    printf("%-20s %8u %8u  %08X %08X  ",
           aName,
           Info.ISize,
           Info.CalcISize,
           Info.CRC,
           Info.CalcCRC);
    if (Info.ISize == Info.CalcISize && Info.CRC == Info.CalcCRC) {
      printf("OK\n");
    } else {
      printf("Mismatch\n");
      exit(100);
    }
  }
}

/*********************************************************************
*
*       Public code
*
**********************************************************************
*/

/*********************************************************************
*
*       main()
*
*  Function description
*    Application entry point.
*
*  Parameters
*    argc - Argument count.
*    argv - Argument vector.
*
*  Return value
*    Exit status.
*/
int main(int argc, char **argv) {
  FILE *pFile;
  //
  SEGGER_MEM_SYSTEM_HEAP_Init(&_StaticAllocator);
  //
  _PrintSignOn();
  if (argc != 2) {
    printf("Syntax: %s <filename>\n", argv[0]);
    exit(100);
  }
  //
  pFile = fopen(argv[1], "rb");
  if (pFile == NULL) {
    printf("%s: can't open '%s' for reading\n", argv[0], argv[1]);
    exit(100);
  }
  //
  _PrintListing(pFile);
  fclose(pFile);
  exit(0);
}

/*************************** End of file ****************************/