HOWTO decompress a Zip archive

From SEGGER Wiki
Jump to: navigation, search

emCompress-Pro provides the capability to decompress streams that are compressed in various formats, but it does not provide out-of-the-box container support.

Recognizing that customers may possibly want to deal with encapsulated streams, this code can be used with emCompress-Pro to check the integrity of a a Zip archive that contains files compressed with the DEFLATE algorithm or the LZMA algorithm:

/*********************************************************************
*                   (c) SEGGER Microcontroller GmbH                  *
*                        The Embedded Experts                        *
*                           www.segger.com                           *
**********************************************************************

-------------------------- END-OF-HEADER -----------------------------

File        : CX_Zip.c
Purpose     : Display content of a zip-compressed file.

*/

/*********************************************************************
*
*       #include Section
*
**********************************************************************
*/

#include "CX_Int.h"
#include "SEGGER_MEM.h"
#include "SEGGER_UTIL.h"
#include "SEGGER_CRC.h"
#include <stdio.h>
#include <stdlib.h>

/*********************************************************************
*
*       Local types
*
**********************************************************************
*/

typedef struct {
  U32  ISize;           // Input plain file size from local file header
  U32  CalcISize;       // Calculated input plain file size from decompressed data
  U32  OSize;           // Output compressed file size from local file header
  U32  CRC;             // Input plain file CRC from local file header
  U32  CalcCRC;         // Calculated input plain file CRC from decompressed data
  char aName[32];       // Name of file from local file header
  char aStatus[32];     // Additional information from decoder
  U8   aProperties[13];
} MEMBER_INFO;

typedef struct {
  U8 aSignature        [4];
  U8 aVersion          [2];
  U8 aFlags            [2];
  U8 aCompression      [2];
  U8 aModTime          [2];
  U8 aModDate          [2];
  U8 aCRC              [4];
  U8 aCompressedSize   [4];
  U8 aUncompressedSize [4];
  U8 aFileNameLen      [2];
  U8 aExtraFieldLen    [2];
} ZIP_LOCAL_HEADER;

/*********************************************************************
*
*       Static const data
*
**********************************************************************
*/

/*********************************************************************
*
*       Static data
*
**********************************************************************
*/

static SEGGER_MEM_CONTEXT _StaticAllocator;

/*********************************************************************
*
*       Static code
*
**********************************************************************
*/

/*********************************************************************
*
*       _PrintSignOn()
*
*  Function description
*    Displays the application's help information on stderr.
*/
static void _PrintSignOn(void) {
  printf("\n");
  printf("emCompress-Flex Zip Checker V%s ", CX_GetVersionText());
  printf("compiled " __DATE__ " " __TIME__ "\n");
  printf("%s    www.segger.com\n\n", CX_GetCopyrightText());
}

/*********************************************************************
*
*       _DecompressBlocks()
*
*  Function description
*    Decompress DEFLATE or LZMA blocks.
*
*  Parameters
*    pFile  - Pointer to input file.
*    pAPI   - Pointer to decompression API.
*    pParas - Pointer to decompression parameters.
*    pInfo  - Pointer to member information.
*/
static void _DecompressBlocks(      FILE          * pFile,
                              const CX_DECODE_API * pAPI,
                              const CX_PARAS      * pParas,
                                    MEMBER_INFO   * pInfo) {
  CX_DECODE_CONTEXT C;
  CX_STREAM         Stream;
  int               Status;
  U8                ByteIn;
  U8                ByteOut;
  //
  pInfo->CalcISize = 0;
  pInfo->CalcCRC   = 0xFFFFFFFF;
  //
  Stream.AvailIn  = 0;
  Stream.AvailOut = 0;
  Status = CX_DECODE_Init(&C, pAPI, &_StaticAllocator, pParas);
  if (Status < 0) {
    printf("Can't instantiate decompressor!\n");
    exit(100);
  }
  for (;;) {
    if (Stream.AvailIn == 0 && pInfo->OSize > 0) {
      fread(&ByteIn, 1, 1, pFile);
      Stream.pIn     = &ByteIn;
      Stream.AvailIn = 1;
      pInfo->OSize--;
    }
    Stream.pOut     = &ByteOut;
    Stream.AvailOut = 1;
    //
    Status = CX_DECODE_Process(&C, &Stream, pInfo->OSize == 0 ? CX_FLUSH_END : CX_FLUSH_NONE);
    if (Status < 0) {
      printf("%s\n", CX_GetErrorText(Status));
      exit(100);
    }
    if (Stream.AvailOut == 0) {
      pInfo->CalcISize++;
      pInfo->CalcCRC = SEGGER_CRC_Calc_EDB88320(&ByteOut, 1, pInfo->CalcCRC);
    }
    if (Status == 1) {
      break;
    }
  }
  //
  pInfo->CalcCRC = ~pInfo->CalcCRC;
  //
  CX_DECODE_Exit(&C);
}

/*********************************************************************
*
*       _PrintListing()
*
*  Function description
*    Displays the content of a file, ensures compressed data
*    can be decompressed.
*
*  Parameters
*    pFile - Pointer to input file.
*/
static void _PrintListing(FILE *pFile) {
  ZIP_LOCAL_HEADER Header;
  MEMBER_INFO      Info;
  CX_PARAS         Paras;
  char             c;
  unsigned         i;
  unsigned         p;
  unsigned         Len;
  //
  printf("                     Size               CRC\n");
  printf("File name            Computed   Stored  Computed   Stored  Status\n");
  printf("-------------------  -------- --------  -------- --------  -----------\n");
  //
  for (;;) {
    //
    fread(&Header, 1, sizeof(Header), pFile);
    if (feof(pFile)) {
      return;
    }
    if (ferror(pFile)) {
      perror("Can't read input file");
      exit(100);
    }
    //
    if (SEGGER_RdU32BE(&Header.aSignature[0]) == 0x504B0304) {
      // Local header
    } else if (SEGGER_RdU32BE(&Header.aSignature[0]) == 0x504B0102) {
      // Central directory
      break;
    } else {
      printf("Incorrect ID bytes for a zip file\n");
      exit(100);
    }
    if (SEGGER_RdU16LE(&Header.aCompression[0]) != 0x08 &&
        SEGGER_RdU16LE(&Header.aCompression[0]) != 0x0E &&
        SEGGER_RdU16LE(&Header.aCompression[0]) != 0x00) {
      printf("Compression method is not DEFLATE, LZMA, or STORED\n");
      exit(100);
    }
    //
    Info.ISize = SEGGER_RdU32LE(&Header.aUncompressedSize[0]);
    Info.OSize = SEGGER_RdU32LE(&Header.aCompressedSize[0]);
    Info.CRC   = SEGGER_RdU32LE(&Header.aCRC[0]);
    //
    i   = 0;
    p   = 0;
    Len = SEGGER_RdU16LE(&Header.aFileNameLen[0]);
    for (i = 0; i < Len; ++i) {
      fread(&c, 1, 1, pFile);
      if (p < sizeof(Info.aName)-1) {
        Info.aName[p++] = c;
        Info.aName[p]   = 0;
      }
      if (c == '/') {
        p = 0;
      }
    }
    Info.aName[SEGGER_MIN(Len, sizeof(Info.aName)-1)] = '\0';
    Info.aStatus[0] = 0;
    //
    fseek(pFile, SEGGER_RdU16LE(&Header.aExtraFieldLen[0]),  SEEK_CUR);
    //
    if (SEGGER_RdU16LE(&Header.aCompression[0]) == 0x08) {
      //
      // DEFLATE algorithm.
      //
      memset(&Paras, 0, sizeof(Paras));
      Paras.WindowSize = 32768;
      Paras.MinLen     = 3;
      Paras.MaxLen     = 258;
      sprintf(Info.aStatus, " (DEFLATE)");
      _DecompressBlocks(pFile, &CX_DEFLATE_Decode, &Paras, &Info);
    } else if (SEGGER_RdU16LE(&Header.aCompression[0]) == 0x0E) {
      U8 aVersion[2];
      U8 aPropertiesSize[2];
      //
      // LZMA algorithm.  This uses a header scheme similar to
      // LZMA-alone, the header control byte is stored along
      // with the woindow size.
      //
      fread(aVersion,        1, 2, pFile);
      fread(aPropertiesSize, 1, 2, pFile);
      if (SEGGER_RdU16LE(aPropertiesSize) != 5) {
        printf("Unexpected property size for LZMA\n");
        exit(100);
      }
      memset(Info.aProperties, 0, sizeof(Info.aProperties));
      fread(Info.aProperties, 1, 5, pFile);
      //
      memset(&Paras, 0, sizeof(Paras));
      Paras.MinLen     = 2;
      Paras.MaxLen     = 273;
      Paras.WindowSize = SEGGER_RdU32LE(&Info.aProperties[1]);
      if (CX_LZMA_DECODE_UnpackControlByte(&Paras, Info.aProperties[0]) < 0) {
        printf("Bad LZMA properties byte\n");
        exit(100);
      }
      sprintf(Info.aStatus, " (LZMA with LC=%u, LP=%u, PB=%u)", Paras.P1, Paras.P2, Paras.P3);
      Info.OSize -= 9;  // Remove aVersion, aPropertiesSize, and aProperties from encoded size
      _DecompressBlocks(pFile, &CX_LZMA_Decode, &Paras, &Info);
    } else {
      Info.CalcCRC   = 0;
      Info.CalcISize = 0;
      fseek(pFile, SEGGER_RdU32LE(&Header.aCompressedSize[0]), SEEK_CUR);
    }
    //
    printf("%-20s %8u %8u  %08X %08X  ",
           Info.aName,
           Info.CalcISize,
           Info.ISize,
           Info.CalcCRC,
           Info.CRC);
    if (Info.ISize == Info.CalcISize && Info.CRC == Info.CalcCRC) {
      printf("OK%s\n", Info.aStatus);
    } else {
      printf("Mismatch\n");
      exit(100);
    }
  }
}

/*********************************************************************
*
*       Public code
*
**********************************************************************
*/

/*********************************************************************
*
*       main()
*
*  Function description
*    Application entry point.
*
*  Parameters
*    argc - Argument count.
*    argv - Argument vector.
*
*  Return value
*    Exit status.
*/
int main(int argc, char **argv) {
  FILE *pFile;
  //
  SEGGER_MEM_SYSTEM_HEAP_Init(&_StaticAllocator);
  //
  _PrintSignOn();
  if (argc != 2) {
    printf("Syntax: %s <filename>\n", argv[0]);
    exit(100);
  }
  //
  pFile = fopen(argv[1], "rb");
  if (pFile == NULL) {
    printf("%s: can't open '%s' for reading\n", argv[0], argv[1]);
    exit(100);
  }
  //
  _PrintListing(pFile);
  fclose(pFile);
  exit(0);
}

/*************************** End of file ****************************/