Last active
June 27, 2024 14:47
-
-
Save riicchhaarrd/7d6c025eb259d8193df820765e041e94 to your computer and use it in GitHub Desktop.
Read filenames from a ZIP file in C
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <stdbool.h> | |
#include <stdint.h> | |
// https://en.wikipedia.org/wiki/ZIP_(file_format) | |
bool read_cdfh(FILE *fp, uint32_t *lfh_offset) | |
{ | |
uint32_t magic; | |
fread(&magic, sizeof(magic), 1, fp); | |
if(magic != 0x02014b50) // Central directory file header signature mismatch | |
{ | |
return false; | |
} | |
fseek(fp, 24, SEEK_CUR); | |
uint16_t n = 0; // File name length | |
uint16_t m = 0; // Extra field length | |
uint16_t k = 0; // File comment length | |
fread(&n, sizeof(n), 1, fp); | |
fread(&m, sizeof(m), 1, fp); | |
fread(&k, sizeof(k), 1, fp); | |
fseek(fp, 8, SEEK_CUR); | |
*lfh_offset = 0; | |
fread(lfh_offset, sizeof(*lfh_offset), 1, fp); | |
fseek(fp, n + m + k, SEEK_CUR); | |
return true; | |
} | |
#pragma pack(push, 1) | |
typedef struct | |
{ | |
uint32_t magic; // 0x04034b50 | |
uint16_t min_version; | |
uint16_t gp_bitflag; | |
uint16_t compression_method; // NONE = 0, DEFLATE = "\0x08\0x00" | |
uint16_t modification_time; | |
uint16_t modification_date; | |
uint32_t checksum; //CRC-32 | |
uint32_t compressed_size; | |
uint32_t uncompressed_size; | |
uint16_t file_name_length; | |
uint16_t extra_field_length; | |
//char file_name[file_name_length]; | |
//char extra_field[extra_field_length]; | |
} ZIP_LocalFileHeader; | |
#pragma pack(pop) | |
bool read_lfh(FILE *fp, | |
char *filename, | |
size_t max_filename_length, | |
uint32_t *uncompressed_size, | |
uint32_t *compressed_size) | |
{ | |
ZIP_LocalFileHeader lfh = {0}; | |
fread(&lfh, sizeof(lfh), 1, fp); | |
if(lfh.magic != 0x04034b50) // Local file header signature mismatch | |
{ | |
return false; | |
} | |
*uncompressed_size = lfh.uncompressed_size; | |
*compressed_size = lfh.compressed_size; | |
uint16_t filename_length = lfh.file_name_length; | |
if(filename_length >= max_filename_length) | |
{ | |
filename_length = max_filename_length - 1; | |
} | |
fread(filename, filename_length, 1, fp); | |
fseek(fp, lfh.extra_field_length, SEEK_CUR); | |
return true; | |
} | |
bool find_eocd_signature(FILE *fp) | |
{ | |
fseek(fp, 0, SEEK_END); | |
long file_size = ftell(fp); | |
uint32_t magic; | |
for(long i = file_size - 4; i >= 0; --i) | |
{ | |
fseek(fp, i, SEEK_SET); | |
fread(&magic, sizeof(magic), 1, fp); | |
if(magic == 0x06054b50) | |
{ | |
return true; | |
} | |
} | |
return false; | |
} | |
int main(int argc, char **argv) | |
{ | |
if(argc != 2) | |
{ | |
fprintf(stderr, "Usage: %s <zipfile>\n", argv[0]); | |
return 1; | |
} | |
FILE *fp = fopen(argv[1], "rb"); | |
if(!fp) | |
{ | |
fprintf(stderr, "Failed to open file '%s'\n", argv[1]); | |
return 1; | |
} | |
if(!find_eocd_signature(fp)) | |
{ | |
fprintf(stderr, "Not a valid ZIP file\n"); | |
return 1; | |
} | |
// TODO: FIXME seek for EOCD if it's not at EOF | |
// fseek(fp, -22, SEEK_END); | |
// uint32_t eocd_magic; | |
// fread(&eocd_magic, sizeof(eocd_magic), 1, fp); | |
// if(eocd_magic != 0x06054b50) | |
// { | |
// fprintf(stderr, "Not a valid ZIP file\n"); | |
// return 1; | |
// } | |
fseek(fp, 2, SEEK_CUR); // Skip "Number of this disk (or 0xffff for ZIP64)" | |
fseek(fp, 2, SEEK_CUR); // Skip "Disk where central directory starts (or 0xffff for ZIP64)" | |
fseek(fp, 2, SEEK_CUR); // Skip "Number of central directory records on this disk (or 0xffff for ZIP64)" | |
uint16_t total_central_directory_record_count; | |
fread(&total_central_directory_record_count, sizeof(total_central_directory_record_count), 1, fp); | |
fseek(fp, 4, SEEK_CUR); // Skip "Size of central directory (bytes) (or 0xffffffff for ZIP64)" | |
uint32_t cdfh_offset; | |
fread(&cdfh_offset, sizeof(cdfh_offset), 1, fp); | |
printf("%d entries\n", total_central_directory_record_count); | |
fseek(fp, cdfh_offset, SEEK_SET); | |
for(size_t i = 0; i < total_central_directory_record_count; ++i) | |
{ | |
uint32_t lfh_offset; | |
if(!read_cdfh(fp, &lfh_offset)) | |
{ | |
fprintf(stderr, "Failed to read cdfh\n"); | |
return 1; | |
} | |
uint32_t cur = ftell(fp); | |
fseek(fp, lfh_offset, SEEK_SET); | |
char filename[256] = { 0 }; | |
uint32_t uncompressed_size, compressed_size; | |
if(!read_lfh(fp, filename, sizeof(filename), &uncompressed_size, &compressed_size)) | |
{ | |
fprintf(stderr, "Failed to read lfh\n"); | |
return 1; | |
} | |
fseek(fp, cur, SEEK_SET); | |
if(uncompressed_size > 0) | |
{ | |
printf("Filename: %s (%f MiB)\n", filename, (float)uncompressed_size / 1024.f / 1024.f); | |
} | |
} | |
fclose(fp); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment