Skip to content

Instantly share code, notes, and snippets.

@saxbophone
Last active March 20, 2017 16:40
Show Gist options
  • Select an option

  • Save saxbophone/00de17a081c9f0cf66cee339a6590e0b to your computer and use it in GitHub Desktop.

Select an option

Save saxbophone/00de17a081c9f0cf66cee339a6590e0b to your computer and use it in GitHub Desktop.
Generate a CSV file of all 32-bit floating point (single-precision) numbers in decimal form
all_floats
all_floats.csv
#include <ctype.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// 511 integer digits + decimal point + 511 decimal digits + 1 null terminator
const static size_t MAX_STRING_LENGTH = 511U + 1U + 511U + 1U;
// we have 16,384 columns (maximumm that Microsoft Excel can support)
const static size_t NUM_COLUMNS = 16384;
// the number of rows is 4,294,967,296 / NUM_COLUMNS
const static size_t NUM_ROWS = 262144;
// converts an unsigned 32-bit integer to a string of the float it represents
static char* unsigned_to_float_string(uint32_t raw);
int main(void) {
// open file for writing to
FILE* output_file = fopen("all_floats.csv", "w");
if(output_file == NULL) {
fprintf(stderr, "Couldn't open file for writinng\n");
return 1;
}
/*
* single-precision floating point numbers are 32-bit, so we can find them
* all by iterating over them in a rectangular grid with 16,384 columns
* (this is the maximmum Microsoft Excel supports) and 262,144 rows
* (16,384 * 262,144 = 4,294,967,296)
*/
for(uint32_t y = 0; y < NUM_ROWS; y++) {
for(uint16_t x = 0; x < NUM_COLUMNS; x++) {
// convert indexes to 32-bit uint
uint32_t raw = ((uint32_t)x << 14) + y;
// get float string
char* str_float = unsigned_to_float_string(raw);
// write out one float into a column
fprintf(output_file, "%s", str_float);
// write out column separator if it's not the last column
if(x < (NUM_COLUMNS - 1)) {
fprintf(output_file, ",");
}
// free the string (memory was allocated for it)
free(str_float);
}
// end row
fprintf(output_file, "\n");
// print out a message if we achieved another percentage of completion
if((y % (NUM_ROWS / 100)) == 0) {
printf("%zu%%\n", y / (NUM_ROWS / 100));
}
}
// close output file
fclose(output_file);
return 0;
}
// removes insignificant runs of chars from a string and returns the new version
char* remove_insignificant_chars(char* string);
static char* unsigned_to_float_string(uint32_t raw) {
// here is a special raw-conversion union
union raw_float {
uint32_t raw;
float real;
};
union raw_float converter;
// convert raw to float
converter.raw = raw;
// prepare output string
char* str_float = calloc(MAX_STRING_LENGTH, sizeof(char));
// convert to float string and store this string
snprintf(str_float, MAX_STRING_LENGTH, "%-511.511f", converter.real);
// remove insignificant chars
char* output = remove_insignificant_chars(str_float);
// free our memory
free(str_float);
return output;
}
// returns true if the given char is an integer, false if not
bool not_an_integer(char c);
char* remove_insignificant_chars(char* string) {
// get current string length
size_t length = strlen(string);
// allocate some memory for a new string (including null-terminator)
char* output = calloc(length + 1, sizeof(char));
// index variables for use later
size_t first, last;
// if the third char is not an integer, we need to find the first space
// character (it's a special value i.e. nan, inf or something else)
if(not_an_integer(string[2])) {
// no need to remove any spacing from first
first = 0;
for(last = 0; last < (length - 1); last++) {
if(isspace(string[last])) {
break;
}
}
} else {
// find the first non-duplicated zero
for(first = 0; first < (length - 1); first++) {
if(!(string[first] == '0' && string[first + 1] == '0')) {
break;
}
}
// find the last non-duplicated zero
for(last = (length - 1); last > 0; last--) {
if(!(string[last] == '0' && string[last - 1] == '0')) {
break;
}
}
}
// create a copy of the string within this range
strncpy(output, string + first, last - first);
// get new string length
size_t new_length = strlen(output);
// if last character is '.', we can remove it (it's an integer)
if(output[new_length - 1] == '.') {
// insert null terminator
output[new_length - 1] = '\0';
}
// resize the copy
output = realloc(output, strlen(output) + 1);
return output;
}
bool not_an_integer(char c) {
switch(c) {
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
return false;
default:
return true;
}
}
#!/bin/bash
gcc -O3 all_floats.c -o all_floats
#!/bin/bash
# This script will run the program and keep track of the size of the output
# file 'all_floats.csv' and kill the program if the size is greater than a
# given amount
# run program and daemonise
./all_floats&
while true; do
# if file size greater than 200GiB, kill the program
if [[ $(find all_floats.csv -type f -size +200G 2>/dev/null) ]]; then
killall all_floats;
exit 1;
fi
sleep 1;
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment