HOWTO decompress a Zip archive

From SEGGER Wiki
Jump to: navigation, search

emCompress-Flex provides the capability to decompress streams that are compressed in various formats, but it does not provide canned container support.

Recognizing that customers may possibly want to deal with encapsulated streams, this code can be used with emCompress-Flex to check the integrity of a a Zip archive that contains files compressed with the DEFLATE algorithm or the LZMA algorithm:

/*********************************************************************
*                   (c) SEGGER Microcontroller GmbH                  *
*                        The Embedded Experts                        *
*                           www.segger.com                           *
**********************************************************************

-------------------------- END-OF-HEADER -----------------------------

File        : CX_Zip.c
Purpose     : Display content of a zip-compressed file.

*/

/*********************************************************************
*
*       #include Section
*
**********************************************************************
*/

#include "CX_Int.h"
#include "SEGGER_MEM.h"
#include "SEGGER_UTIL.h"
#include "SEGGER_CRC.h"
#include <stdio.h>
#include <stdlib.h>

/*********************************************************************
*
*       Local types
*
**********************************************************************
*/

typedef struct {
  U32  ISize;           // Input plain file size from local file header
  U32  CalcISize;       // Calculated input plain file size from decompressed data
  U32  OSize;           // Output compressed file size from local file header
  U32  CRC;             // Input plain file CRC from local file header
  U32  CalcCRC;         // Calculated input plain file CRC from decompressed data
  char aName[32];       // Name of file from local file header
  char aStatus[32];     // Additional information from decoder
  U8   aProperties[13];
} MEMBER_INFO;

typedef struct {
  U8 aSignature        [4];
  U8 aVersion          [2];
  U8 aFlags            [2];
  U8 aCompression      [2];
  U8 aModTime          [2];
  U8 aModDate          [2];
  U8 aCRC              [4];
  U8 aCompressedSize   [4];
  U8 aUncompressedSize [4];
  U8 aFileNameLen      [2];
  U8 aExtraFieldLen    [2];
} ZIP_LOCAL_HEADER;

/*********************************************************************
*
*       Static const data
*
**********************************************************************
*/

/*********************************************************************
*
*       Static data
*
**********************************************************************
*/

static SEGGER_MEM_CONTEXT _StaticAllocator;

/*********************************************************************
*
*       Static code
*
**********************************************************************
*/

/*********************************************************************
*
*       _PrintSignOn()
*
*  Function description
*    Displays the application's help information on stderr.
*/
static void _PrintSignOn(void) {
  printf("\n");
  printf("emCompress-Flex Zip Checker V%s ", CX_GetVersionText());
  printf("compiled " __DATE__ " " __TIME__ "\n");
  printf("%s    www.segger.com\n\n", CX_GetCopyrightText());
}

/*********************************************************************
*
*       _DecompressBlocks()
*
*  Function description
*    Decompress DEFLATE or LZMA blocks.
*
*  Parameters
*    pFile  - Pointer to input file.
*    pAPI   - Pointer to decompression API.
*    pParas - Pointer to decompression parameters.
*    pInfo  - Pointer to member information.
*/
static void _DecompressBlocks(      FILE          * pFile,
                              const CX_DECODE_API * pAPI,
                              const CX_PARAS      * pParas,
                                    MEMBER_INFO   * pInfo) {
  CX_DECODE_CONTEXT C;
  CX_STREAM         Stream;
  int               Status;
  U8                ByteIn;
  U8                ByteOut;
  //
  pInfo->CalcISize = 0;
  pInfo->CalcCRC   = 0xFFFFFFFF;
  //
  Stream.AvailIn  = 0;
  Stream.AvailOut = 0;
  Status = CX_DECODE_Init(&C, pAPI, &_StaticAllocator, pParas);
  if (Status < 0) {
    printf("Can't instantiate decompressor!\n");
    exit(100);
  }
  for (;;) {
    if (Stream.AvailIn == 0 && pInfo->OSize > 0) {
      fread(&ByteIn, 1, 1, pFile);
      Stream.pIn     = &ByteIn;
      Stream.AvailIn = 1;
      pInfo->OSize--;
    }
    Stream.pOut     = &ByteOut;
    Stream.AvailOut = 1;
    //
    Status = CX_DECODE_Process(&C, &Stream, pInfo->OSize == 0 ? CX_FLUSH_END : CX_FLUSH_NONE);
    if (Status < 0) {
      printf("%s\n", CX_GetErrorText(Status));
      exit(100);
    }
    if (Stream.AvailOut == 0) {
      pInfo->CalcISize++;
      pInfo->CalcCRC = SEGGER_CRC_Calc_EDB88320(&ByteOut, 1, pInfo->CalcCRC);
    }
    if (Status == 1) {
      break;
    }
  }
  //
  pInfo->CalcCRC = ~pInfo->CalcCRC;
  //
  CX_DECODE_Exit(&C);
}

/*********************************************************************
*
*       _PrintListing()
*
*  Function description
*    Displays the content of a file, ensures compressed data
*    can be decompressed.
*
*  Parameters
*    pFile - Pointer to input file.
*/
static void _PrintListing(FILE *pFile) {
  ZIP_LOCAL_HEADER Header;
  MEMBER_INFO      Info;
  CX_PARAS         Paras;
  char             c;
  unsigned         i;
  unsigned         p;
  unsigned         Len;
  //
  printf("                     Size               CRC\n");
  printf("File name            Computed   Stored  Computed   Stored  Status\n");
  printf("-------------------  -------- --------  -------- --------  -----------\n");
  //
  for (;;) {
    //
    fread(&Header, 1, sizeof(Header), pFile);
    if (feof(pFile)) {
      return;
    }
    if (ferror(pFile)) {
      perror("Can't read input file");
      exit(100);
    }
    //
    if (SEGGER_RdU32BE(&Header.aSignature[0]) == 0x504B0304) {
      // Local header
    } else if (SEGGER_RdU32BE(&Header.aSignature[0]) == 0x504B0102) {
      // Central directory
      break;
    } else {
      printf("Incorrect ID bytes for a zip file\n");
      exit(100);
    }
    if (SEGGER_RdU16LE(&Header.aCompression[0]) != 0x08 &&
        SEGGER_RdU16LE(&Header.aCompression[0]) != 0x0E &&
        SEGGER_RdU16LE(&Header.aCompression[0]) != 0x00) {
      printf("Compression method is not DEFLATE, LZMA, or STORED\n");
      exit(100);
    }
    //
    Info.ISize = SEGGER_RdU32LE(&Header.aUncompressedSize[0]);
    Info.OSize = SEGGER_RdU32LE(&Header.aCompressedSize[0]);
    Info.CRC   = SEGGER_RdU32LE(&Header.aCRC[0]);
    //
    i   = 0;
    p   = 0;
    Len = SEGGER_RdU16LE(&Header.aFileNameLen[0]);
    for (i = 0; i < Len; ++i) {
      fread(&c, 1, 1, pFile);
      if (p < sizeof(Info.aName)-1) {
        Info.aName[p++] = c;
        Info.aName[p]   = 0;
      }
      if (c == '/') {
        p = 0;
      }
    }
    Info.aName[SEGGER_MIN(Len, sizeof(Info.aName)-1)] = '\0';
    Info.aStatus[0] = 0;
    //
    fseek(pFile, SEGGER_RdU16LE(&Header.aExtraFieldLen[0]),  SEEK_CUR);
    //
    if (SEGGER_RdU16LE(&Header.aCompression[0]) == 0x08) {
      //
      // DEFLATE algorithm.
      //
      memset(&Paras, 0, sizeof(Paras));
      Paras.WindowSize = 32768;
      Paras.MinLen     = 3;
      Paras.MaxLen     = 258;
      sprintf(Info.aStatus, " (DEFLATE)");
      _DecompressBlocks(pFile, &CX_DEFLATE_Decode, &Paras, &Info);
    } else if (SEGGER_RdU16LE(&Header.aCompression[0]) == 0x0E) {
      U8 aVersion[2];
      U8 aPropertiesSize[2];
      //
      // LZMA algorithm.  This uses a header scheme similar to
      // LZMA-alone, the header control byte is stored along
      // with the woindow size.
      //
      fread(aVersion,        1, 2, pFile);
      fread(aPropertiesSize, 1, 2, pFile);
      if (SEGGER_RdU16LE(aPropertiesSize) != 5) {
        printf("Unexpected property size for LZMA\n");
        exit(100);
      }
      memset(Info.aProperties, 0, sizeof(Info.aProperties));
      fread(Info.aProperties, 1, 5, pFile);
      //
      memset(&Paras, 0, sizeof(Paras));
      Paras.MinLen     = 2;
      Paras.MaxLen     = 273;
      Paras.WindowSize = SEGGER_RdU32LE(&Info.aProperties[1]);
      if (CX_LZMA_DECODE_UnpackControlByte(&Paras, Info.aProperties[0]) < 0) {
        printf("Bad LZMA properties byte\n");
        exit(100);
      }
      sprintf(Info.aStatus, " (LZMA with LC=%u, LP=%u, PB=%u)", Paras.P1, Paras.P2, Paras.P3);
      Info.OSize -= 9;  // Remove aVersion, aPropertiesSize, and aProperties from encoded size
      _DecompressBlocks(pFile, &CX_LZMA_Decode, &Paras, &Info);
    } else {
      Info.CalcCRC   = 0;
      Info.CalcISize = 0;
      fseek(pFile, SEGGER_RdU32LE(&Header.aCompressedSize[0]), SEEK_CUR);
    }
    //
    printf("%-20s %8u %8u  %08X %08X  ",
           Info.aName,
           Info.CalcISize,
           Info.ISize,
           Info.CalcCRC,
           Info.CRC);
    if (Info.ISize == Info.CalcISize && Info.CRC == Info.CalcCRC) {
      printf("OK%s\n", Info.aStatus);
    } else {
      printf("Mismatch\n");
      exit(100);
    }
  }
}

/*********************************************************************
*
*       Public code
*
**********************************************************************
*/

/*********************************************************************
*
*       main()
*
*  Function description
*    Application entry point.
*
*  Parameters
*    argc - Argument count.
*    argv - Argument vector.
*
*  Return value
*    Exit status.
*/
int main(int argc, char **argv) {
  FILE *pFile;
  //
  SEGGER_MEM_SYSTEM_HEAP_Init(&_StaticAllocator);
  //
  _PrintSignOn();
  if (argc != 2) {
    printf("Syntax: %s <filename>\n", argv[0]);
    exit(100);
  }
  //
  pFile = fopen(argv[1], "rb");
  if (pFile == NULL) {
    printf("%s: can't open '%s' for reading\n", argv[0], argv[1]);
    exit(100);
  }
  //
  _PrintListing(pFile);
  fclose(pFile);
  exit(0);
}

/*************************** End of file ****************************/