Last active
March 20, 2017 16:40
-
-
Save saxbophone/00de17a081c9f0cf66cee339a6590e0b to your computer and use it in GitHub Desktop.
Generate a CSV file of all 32-bit floating point (single-precision) numbers in decimal form
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| all_floats | |
| all_floats.csv |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <ctype.h> | |
| #include <stdbool.h> | |
| #include <stddef.h> | |
| #include <stdint.h> | |
| #include <stdio.h> | |
| #include <stdlib.h> | |
| #include <string.h> | |
| // 511 integer digits + decimal point + 511 decimal digits + 1 null terminator | |
| const static size_t MAX_STRING_LENGTH = 511U + 1U + 511U + 1U; | |
| // we have 16,384 columns (maximumm that Microsoft Excel can support) | |
| const static size_t NUM_COLUMNS = 16384; | |
| // the number of rows is 4,294,967,296 / NUM_COLUMNS | |
| const static size_t NUM_ROWS = 262144; | |
| // converts an unsigned 32-bit integer to a string of the float it represents | |
| static char* unsigned_to_float_string(uint32_t raw); | |
| int main(void) { | |
| // open file for writing to | |
| FILE* output_file = fopen("all_floats.csv", "w"); | |
| if(output_file == NULL) { | |
| fprintf(stderr, "Couldn't open file for writinng\n"); | |
| return 1; | |
| } | |
| /* | |
| * single-precision floating point numbers are 32-bit, so we can find them | |
| * all by iterating over them in a rectangular grid with 16,384 columns | |
| * (this is the maximmum Microsoft Excel supports) and 262,144 rows | |
| * (16,384 * 262,144 = 4,294,967,296) | |
| */ | |
| for(uint32_t y = 0; y < NUM_ROWS; y++) { | |
| for(uint16_t x = 0; x < NUM_COLUMNS; x++) { | |
| // convert indexes to 32-bit uint | |
| uint32_t raw = ((uint32_t)x << 14) + y; | |
| // get float string | |
| char* str_float = unsigned_to_float_string(raw); | |
| // write out one float into a column | |
| fprintf(output_file, "%s", str_float); | |
| // write out column separator if it's not the last column | |
| if(x < (NUM_COLUMNS - 1)) { | |
| fprintf(output_file, ","); | |
| } | |
| // free the string (memory was allocated for it) | |
| free(str_float); | |
| } | |
| // end row | |
| fprintf(output_file, "\n"); | |
| // print out a message if we achieved another percentage of completion | |
| if((y % (NUM_ROWS / 100)) == 0) { | |
| printf("%zu%%\n", y / (NUM_ROWS / 100)); | |
| } | |
| } | |
| // close output file | |
| fclose(output_file); | |
| return 0; | |
| } | |
| // removes insignificant runs of chars from a string and returns the new version | |
| char* remove_insignificant_chars(char* string); | |
| static char* unsigned_to_float_string(uint32_t raw) { | |
| // here is a special raw-conversion union | |
| union raw_float { | |
| uint32_t raw; | |
| float real; | |
| }; | |
| union raw_float converter; | |
| // convert raw to float | |
| converter.raw = raw; | |
| // prepare output string | |
| char* str_float = calloc(MAX_STRING_LENGTH, sizeof(char)); | |
| // convert to float string and store this string | |
| snprintf(str_float, MAX_STRING_LENGTH, "%-511.511f", converter.real); | |
| // remove insignificant chars | |
| char* output = remove_insignificant_chars(str_float); | |
| // free our memory | |
| free(str_float); | |
| return output; | |
| } | |
| // returns true if the given char is an integer, false if not | |
| bool not_an_integer(char c); | |
| char* remove_insignificant_chars(char* string) { | |
| // get current string length | |
| size_t length = strlen(string); | |
| // allocate some memory for a new string (including null-terminator) | |
| char* output = calloc(length + 1, sizeof(char)); | |
| // index variables for use later | |
| size_t first, last; | |
| // if the third char is not an integer, we need to find the first space | |
| // character (it's a special value i.e. nan, inf or something else) | |
| if(not_an_integer(string[2])) { | |
| // no need to remove any spacing from first | |
| first = 0; | |
| for(last = 0; last < (length - 1); last++) { | |
| if(isspace(string[last])) { | |
| break; | |
| } | |
| } | |
| } else { | |
| // find the first non-duplicated zero | |
| for(first = 0; first < (length - 1); first++) { | |
| if(!(string[first] == '0' && string[first + 1] == '0')) { | |
| break; | |
| } | |
| } | |
| // find the last non-duplicated zero | |
| for(last = (length - 1); last > 0; last--) { | |
| if(!(string[last] == '0' && string[last - 1] == '0')) { | |
| break; | |
| } | |
| } | |
| } | |
| // create a copy of the string within this range | |
| strncpy(output, string + first, last - first); | |
| // get new string length | |
| size_t new_length = strlen(output); | |
| // if last character is '.', we can remove it (it's an integer) | |
| if(output[new_length - 1] == '.') { | |
| // insert null terminator | |
| output[new_length - 1] = '\0'; | |
| } | |
| // resize the copy | |
| output = realloc(output, strlen(output) + 1); | |
| return output; | |
| } | |
| bool not_an_integer(char c) { | |
| switch(c) { | |
| case '0': | |
| case '1': | |
| case '2': | |
| case '3': | |
| case '4': | |
| case '5': | |
| case '6': | |
| case '7': | |
| case '8': | |
| case '9': | |
| return false; | |
| default: | |
| return true; | |
| } | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| gcc -O3 all_floats.c -o all_floats |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # This script will run the program and keep track of the size of the output | |
| # file 'all_floats.csv' and kill the program if the size is greater than a | |
| # given amount | |
| # run program and daemonise | |
| ./all_floats& | |
| while true; do | |
| # if file size greater than 200GiB, kill the program | |
| if [[ $(find all_floats.csv -type f -size +200G 2>/dev/null) ]]; then | |
| killall all_floats; | |
| exit 1; | |
| fi | |
| sleep 1; | |
| done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment