Skip to content

Instantly share code, notes, and snippets.

@riicchhaarrd
Last active June 27, 2024 14:47
Show Gist options
  • Save riicchhaarrd/7d6c025eb259d8193df820765e041e94 to your computer and use it in GitHub Desktop.
Save riicchhaarrd/7d6c025eb259d8193df820765e041e94 to your computer and use it in GitHub Desktop.
Read filenames from a ZIP file in C
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <stdint.h>
// https://en.wikipedia.org/wiki/ZIP_(file_format)
bool read_cdfh(FILE *fp, uint32_t *lfh_offset)
{
uint32_t magic;
fread(&magic, sizeof(magic), 1, fp);
if(magic != 0x02014b50) // Central directory file header signature mismatch
{
return false;
}
fseek(fp, 24, SEEK_CUR);
uint16_t n = 0; // File name length
uint16_t m = 0; // Extra field length
uint16_t k = 0; // File comment length
fread(&n, sizeof(n), 1, fp);
fread(&m, sizeof(m), 1, fp);
fread(&k, sizeof(k), 1, fp);
fseek(fp, 8, SEEK_CUR);
*lfh_offset = 0;
fread(lfh_offset, sizeof(*lfh_offset), 1, fp);
fseek(fp, n + m + k, SEEK_CUR);
return true;
}
#pragma pack(push, 1)
typedef struct
{
uint32_t magic; // 0x04034b50
uint16_t min_version;
uint16_t gp_bitflag;
uint16_t compression_method; // NONE = 0, DEFLATE = "\0x08\0x00"
uint16_t modification_time;
uint16_t modification_date;
uint32_t checksum; //CRC-32
uint32_t compressed_size;
uint32_t uncompressed_size;
uint16_t file_name_length;
uint16_t extra_field_length;
//char file_name[file_name_length];
//char extra_field[extra_field_length];
} ZIP_LocalFileHeader;
#pragma pack(pop)
bool read_lfh(FILE *fp,
char *filename,
size_t max_filename_length,
uint32_t *uncompressed_size,
uint32_t *compressed_size)
{
ZIP_LocalFileHeader lfh = {0};
fread(&lfh, sizeof(lfh), 1, fp);
if(lfh.magic != 0x04034b50) // Local file header signature mismatch
{
return false;
}
*uncompressed_size = lfh.uncompressed_size;
*compressed_size = lfh.compressed_size;
uint16_t filename_length = lfh.file_name_length;
if(filename_length >= max_filename_length)
{
filename_length = max_filename_length - 1;
}
fread(filename, filename_length, 1, fp);
fseek(fp, lfh.extra_field_length, SEEK_CUR);
return true;
}
bool find_eocd_signature(FILE *fp)
{
fseek(fp, 0, SEEK_END);
long file_size = ftell(fp);
uint32_t magic;
for(long i = file_size - 4; i >= 0; --i)
{
fseek(fp, i, SEEK_SET);
fread(&magic, sizeof(magic), 1, fp);
if(magic == 0x06054b50)
{
return true;
}
}
return false;
}
int main(int argc, char **argv)
{
if(argc != 2)
{
fprintf(stderr, "Usage: %s <zipfile>\n", argv[0]);
return 1;
}
FILE *fp = fopen(argv[1], "rb");
if(!fp)
{
fprintf(stderr, "Failed to open file '%s'\n", argv[1]);
return 1;
}
if(!find_eocd_signature(fp))
{
fprintf(stderr, "Not a valid ZIP file\n");
return 1;
}
// TODO: FIXME seek for EOCD if it's not at EOF
// fseek(fp, -22, SEEK_END);
// uint32_t eocd_magic;
// fread(&eocd_magic, sizeof(eocd_magic), 1, fp);
// if(eocd_magic != 0x06054b50)
// {
// fprintf(stderr, "Not a valid ZIP file\n");
// return 1;
// }
fseek(fp, 2, SEEK_CUR); // Skip "Number of this disk (or 0xffff for ZIP64)"
fseek(fp, 2, SEEK_CUR); // Skip "Disk where central directory starts (or 0xffff for ZIP64)"
fseek(fp, 2, SEEK_CUR); // Skip "Number of central directory records on this disk (or 0xffff for ZIP64)"
uint16_t total_central_directory_record_count;
fread(&total_central_directory_record_count, sizeof(total_central_directory_record_count), 1, fp);
fseek(fp, 4, SEEK_CUR); // Skip "Size of central directory (bytes) (or 0xffffffff for ZIP64)"
uint32_t cdfh_offset;
fread(&cdfh_offset, sizeof(cdfh_offset), 1, fp);
printf("%d entries\n", total_central_directory_record_count);
fseek(fp, cdfh_offset, SEEK_SET);
for(size_t i = 0; i < total_central_directory_record_count; ++i)
{
uint32_t lfh_offset;
if(!read_cdfh(fp, &lfh_offset))
{
fprintf(stderr, "Failed to read cdfh\n");
return 1;
}
uint32_t cur = ftell(fp);
fseek(fp, lfh_offset, SEEK_SET);
char filename[256] = { 0 };
uint32_t uncompressed_size, compressed_size;
if(!read_lfh(fp, filename, sizeof(filename), &uncompressed_size, &compressed_size))
{
fprintf(stderr, "Failed to read lfh\n");
return 1;
}
fseek(fp, cur, SEEK_SET);
if(uncompressed_size > 0)
{
printf("Filename: %s (%f MiB)\n", filename, (float)uncompressed_size / 1024.f / 1024.f);
}
}
fclose(fp);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment