Created
March 17, 2025 02:50
-
-
Save 7etsuo/5fd58211c3dafdda8f3136f2d2eb6ded to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** x.com/7etsuo | |
* | |
* compile with gcc -o pre_process hotdog_preprocessor.c -lgd -lm | |
* usage: ./pre_process dataset/train/hotdog/ dataset/train/nothotdog/ | |
*/ | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <dirent.h> | |
#include <time.h> | |
#include <gd.h> | |
#define TARGET_WIDTH 224 | |
#define TARGET_HEIGHT 224 | |
#define CHANNELS 3 | |
// Structure to hold image data and label | |
#pragma pack(push, 1) | |
typedef struct | |
{ | |
float *data; | |
int label; // 1 for hotdog, 0 for not-hotdog | |
} ImageSample; | |
#pragma pack(pop) | |
int | |
preprocess_image (const char *input_path, float *output_buffer, | |
int target_width, int target_height) | |
{ | |
FILE *in; | |
gdImagePtr img = NULL, resized = NULL; | |
int x, y, color; | |
unsigned char r, g, b; | |
// Open the image file | |
in = fopen (input_path, "rb"); | |
if (!in) | |
{ | |
fprintf (stderr, "Could not open image file %s\n", input_path); | |
return -1; | |
} | |
// Detect image type and load accordingly | |
if (strstr (input_path, ".jpg") || strstr (input_path, ".jpeg")) | |
{ | |
img = gdImageCreateFromJpeg (in); | |
} | |
else if (strstr (input_path, ".png")) | |
{ | |
img = gdImageCreateFromPng (in); | |
} | |
else if (strstr (input_path, ".gif")) | |
{ | |
img = gdImageCreateFromGif (in); | |
} | |
else if (strstr (input_path, ".bmp")) | |
{ | |
img = gdImageCreateFromBmp (in); | |
} | |
else | |
{ | |
fprintf (stderr, "Unsupported image format for %s\n", input_path); | |
fclose (in); | |
return -1; | |
} | |
fclose (in); | |
if (!img) | |
{ | |
fprintf (stderr, "Failed to load image %s\n", input_path); | |
return -1; | |
} | |
// Create a new image with the target dimensions | |
resized = gdImageCreateTrueColor (target_width, target_height); | |
if (!resized) | |
{ | |
fprintf (stderr, "Failed to create resized image buffer\n"); | |
gdImageDestroy (img); | |
return -1; | |
} | |
// Resize the image | |
gdImageCopyResampled (resized, img, 0, 0, 0, 0, target_width, target_height, | |
gdImageSX (img), gdImageSY (img)); | |
// Normalize pixel values to [0,1] and store in output_buffer | |
for (y = 0; y < target_height; y++) | |
{ | |
for (x = 0; x < target_width; x++) | |
{ | |
color = gdImageGetPixel (resized, x, y); | |
// Extract RGB components | |
r = gdImageRed (resized, color); | |
g = gdImageGreen (resized, color); | |
b = gdImageBlue (resized, color); | |
// Store normalized RGB values in output_buffer | |
int idx = (y * target_width + x) * CHANNELS; | |
output_buffer[idx] = r / 255.0f; | |
output_buffer[idx + 1] = g / 255.0f; | |
output_buffer[idx + 2] = b / 255.0f; | |
} | |
} | |
// Clean up | |
gdImageDestroy (img); | |
gdImageDestroy (resized); | |
return 0; | |
} | |
// Process all images in a directory | |
int | |
process_directory (const char *dir_path, int label, ImageSample **samples, | |
int *count) | |
{ | |
DIR *dir; | |
struct dirent *entry; | |
char filepath[1024]; | |
int img_count = 0; | |
// Open directory | |
dir = opendir (dir_path); | |
if (!dir) | |
{ | |
fprintf (stderr, "Could not open directory %s\n", dir_path); | |
return -1; | |
} | |
// First count the number of image files | |
while ((entry = readdir (dir)) != NULL) | |
{ | |
// Skip . and .. directories | |
if (strcmp (entry->d_name, ".") == 0 || strcmp (entry->d_name, "..") == 0) | |
continue; | |
// Check for common image extensions | |
if (strstr (entry->d_name, ".jpg") || strstr (entry->d_name, ".jpeg") | |
|| strstr (entry->d_name, ".png") || strstr (entry->d_name, ".gif") | |
|| strstr (entry->d_name, ".bmp")) | |
{ | |
img_count++; | |
} | |
} | |
// Rewind directory | |
rewinddir (dir); | |
// Allocate memory for the samples | |
*samples = (ImageSample *) malloc (img_count * sizeof (ImageSample)); | |
if (!*samples) | |
{ | |
fprintf (stderr, "Memory allocation failed\n"); | |
closedir (dir); | |
return -1; | |
} | |
// Process each image file | |
int idx = 0; | |
while ((entry = readdir (dir)) != NULL) | |
{ | |
// Skip . and .. directories | |
if (strcmp (entry->d_name, ".") == 0 || strcmp (entry->d_name, "..") == 0) | |
continue; | |
// Check for common image extensions | |
if (strstr (entry->d_name, ".jpg") || strstr (entry->d_name, ".jpeg") | |
|| strstr (entry->d_name, ".png") || strstr (entry->d_name, ".gif") | |
|| strstr (entry->d_name, ".bmp")) | |
{ | |
// Construct full file path | |
snprintf (filepath, sizeof (filepath), "%s/%s", dir_path, entry->d_name); | |
// Allocate memory for image data | |
float *img_data = (float *) malloc (TARGET_WIDTH * TARGET_HEIGHT | |
* CHANNELS * sizeof (float)); | |
if (!img_data) | |
{ | |
fprintf (stderr, "Memory allocation failed for image data\n"); | |
continue; | |
} | |
// Process the image | |
if (preprocess_image (filepath, img_data, TARGET_WIDTH, TARGET_HEIGHT) | |
== 0) | |
{ | |
(*samples)[idx].data = img_data; | |
(*samples)[idx].label = label; | |
idx++; | |
printf ("Processed %s\n", filepath); | |
} | |
else | |
{ | |
free (img_data); | |
} | |
} | |
} | |
*count = idx; | |
closedir (dir); | |
return 0; | |
} | |
// Fisher-Yates shuffle algorithm to randomly shuffle the samples | |
void | |
shuffle_samples (ImageSample *samples, int count) | |
{ | |
srand (time (NULL)); | |
for (int i = count - 1; i > 0; i--) | |
{ | |
int j = rand () % (i + 1); | |
// Swap samples[i] and samples[j] | |
ImageSample temp = samples[i]; | |
samples[i] = samples[j]; | |
samples[j] = temp; | |
} | |
} | |
// Save processed dataset to a binary file | |
void | |
save_dataset (const char *filename, ImageSample *samples, int count) | |
{ | |
FILE *file = fopen (filename, "wb"); | |
if (!file) | |
{ | |
fprintf (stderr, "Could not open file %s for writing\n", filename); | |
return; | |
} | |
// Write number of samples | |
fwrite (&count, sizeof (int), 1, file); | |
// Write image dimensions | |
int dimensions[3] = {TARGET_WIDTH, TARGET_HEIGHT, CHANNELS}; | |
fwrite (dimensions, sizeof (int), 3, file); | |
// Write each sample | |
for (int i = 0; i < count; i++) | |
{ | |
// Write label | |
fwrite (&samples[i].label, sizeof (int), 1, file); | |
// Write image data | |
int data_size = TARGET_WIDTH * TARGET_HEIGHT * CHANNELS; | |
fwrite (samples[i].data, sizeof (float), data_size, file); | |
} | |
fclose (file); | |
printf ("Saved %d samples to %s\n", count, filename); | |
} | |
int | |
main (int argc, char *argv[]) | |
{ | |
if (argc != 3) | |
{ | |
fprintf (stderr, "Usage: %s <hotdog_folder> <nothotdog_folder>\n", argv[0]); | |
return 1; | |
} | |
const char *hotdog_dir = argv[1]; | |
const char *nothotdog_dir = argv[2]; | |
ImageSample *hotdog_samples = NULL; | |
ImageSample *nothotdog_samples = NULL; | |
int hotdog_count = 0; | |
int nothotdog_count = 0; | |
// Process hotdog directory (label 1) | |
if (process_directory (hotdog_dir, 1, &hotdog_samples, &hotdog_count) != 0) | |
{ | |
fprintf (stderr, "Failed to process hotdog directory\n"); | |
return 1; | |
} | |
// Process not-hotdog directory (label 0) | |
if (process_directory (nothotdog_dir, 0, ¬hotdog_samples, ¬hotdog_count) | |
!= 0) | |
{ | |
fprintf (stderr, "Failed to process not-hotdog directory\n"); | |
// Free hotdog samples | |
for (int i = 0; i < hotdog_count; i++) | |
{ | |
free (hotdog_samples[i].data); | |
} | |
free (hotdog_samples); | |
return 1; | |
} | |
// Combine both datasets | |
int total_count = hotdog_count + nothotdog_count; | |
ImageSample *all_samples | |
= (ImageSample *) malloc (total_count * sizeof (ImageSample)); | |
if (!all_samples) | |
{ | |
fprintf (stderr, "Memory allocation failed for combined dataset\n"); | |
return 1; | |
} | |
// Copy samples to combined array | |
memcpy (all_samples, hotdog_samples, hotdog_count * sizeof (ImageSample)); | |
memcpy (all_samples + hotdog_count, nothotdog_samples, | |
nothotdog_count * sizeof (ImageSample)); | |
// Shuffle the combined dataset | |
shuffle_samples (all_samples, total_count); | |
// Split into training (80%) and test (20%) sets | |
int train_count = (int) (total_count * 0.8); | |
int test_count = total_count - train_count; | |
printf ("Total images: %d (Hotdogs: %d, Not-hotdogs: %d)\n", total_count, | |
hotdog_count, nothotdog_count); | |
printf ("Training set: %d images\n", train_count); | |
printf ("Test set: %d images\n", test_count); | |
// Save training and test datasets | |
save_dataset ("train_data.bin", all_samples, train_count); | |
save_dataset ("test_data.bin", all_samples + train_count, test_count); | |
// Free memory | |
for (int i = 0; i < total_count; i++) | |
{ | |
free (all_samples[i].data); | |
} | |
free (all_samples); | |
free (hotdog_samples); | |
free (nothotdog_samples); | |
printf ("Preprocessing complete!\n"); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment