Blog 2020/5/28
<- previous | index | next ->
After writing a trivial sudoku solver in C yesterday, today I'll use it to benchmark the compilation speed of TCC vs GCC across a variety of processor speeds.
| model name : Geode(TM) Integrated Processor by National Semi | |
| tcc version 0.9.27 (prerelease) (i386 Linux) | |
| gcc (Debian 6.3.0-18+deb9u1) 6.3.0 20170516 | |
| time tcc sudoku.c 2>&1 && mv a.out a.out.tcc | |
| 0.11user 0.03system 0:00.15elapsed 95%CPU (0avgtext+0avgdata 2340maxresident)k | |
| 0inputs+16outputs (0major+236minor)pagefaults 0swaps | |
| time gcc -std=c89 -Wall -Werror -O0 sudoku.c -o a.out.gccO0 2>&1 | |
| 1.78user 0.27system 0:02.09elapsed 98%CPU (0avgtext+0avgdata 18944maxresident)k | |
| 0inputs+96outputs (0major+3319minor)pagefaults 0swaps | |
| time gcc -std=c89 -Wall -Werror -O1 sudoku.c -o a.out.gccO1 2>&1 | |
| 3.40user 0.32system 0:03.75elapsed 99%CPU (0avgtext+0avgdata 20044maxresident)k | |
| 0inputs+96outputs (0major+3550minor)pagefaults 0swaps | |
| time gcc -std=c89 -Wall -Werror -O2 sudoku.c -o a.out.gccO2 2>&1 | |
| 6.07user 0.33system 0:06.44elapsed 99%CPU (0avgtext+0avgdata 21920maxresident)k | |
| 0inputs+104outputs (0major+3804minor)pagefaults 0swaps | |
| time gcc -std=c89 -Wall -Werror -Os sudoku.c -o a.out.gccOs 2>&1 | |
| 4.18user 0.27system 0:04.54elapsed 97%CPU (0avgtext+0avgdata 21176maxresident)k | |
| 0inputs+88outputs (0major+3582minor)pagefaults 0swaps | |
| time gcc -std=c89 -Wall -Werror -O3 sudoku.c -o a.out.gccO3 2>&1 | |
| 23.83user 0.40system 0:24.31elapsed 99%CPU (0avgtext+0avgdata 28560maxresident)k | |
| 0inputs+200outputs (0major+5593minor)pagefaults 0swaps |
| model name : Intel(R) Core(TM) i3-2350M CPU @ 2.30GHz | |
| tcc version 0.9.27 (x86_64 Linux) | |
| gcc (Debian 8.3.0-6) 8.3.0 | |
| time tcc sudoku.c 2>&1 && mv a.out a.out.tcc | |
| 0.01user 0.00system 0:00.01elapsed 90%CPU (0avgtext+0avgdata 2836maxresident)k | |
| 0inputs+24outputs (0major+313minor)pagefaults 0swaps | |
| time gcc -std=c89 -Wall -Werror -O0 sudoku.c -o a.out.gccO0 2>&1 | |
| 0.06user 0.02system 0:00.09elapsed 100%CPU (0avgtext+0avgdata 21464maxresident)k | |
| 0inputs+96outputs (0major+4847minor)pagefaults 0swaps | |
| time gcc -std=c89 -Wall -Werror -O1 sudoku.c -o a.out.gccO1 2>&1 | |
| 0.11user 0.02system 0:00.13elapsed 99%CPU (0avgtext+0avgdata 24224maxresident)k | |
| 0inputs+88outputs (0major+5086minor)pagefaults 0swaps | |
| time gcc -std=c89 -Wall -Werror -O2 sudoku.c -o a.out.gccO2 2>&1 | |
| 0.19user 0.02system 0:00.21elapsed 100%CPU (0avgtext+0avgdata 26164maxresident)k | |
| 0inputs+88outputs (0major+5407minor)pagefaults 0swaps | |
| time gcc -std=c89 -Wall -Werror -Os sudoku.c -o a.out.gccOs 2>&1 | |
| 0.14user 0.02system 0:00.17elapsed 99%CPU (0avgtext+0avgdata 25632maxresident)k | |
| 0inputs+80outputs (0major+5163minor)pagefaults 0swaps | |
| time gcc -std=c89 -Wall -Werror -O3 sudoku.c -o a.out.gccO3 2>&1 | |
| 0.51user 0.04system 0:00.56elapsed 99%CPU (0avgtext+0avgdata 39968maxresident)k | |
| 0inputs+240outputs (0major+8583minor)pagefaults 0swaps |
| a.out: sudoku.c | |
| gcc -std=c89 -Wall -Werror -Os sudoku.c -o a.out | |
| ccbench: | |
| @cat /proc/cpuinfo | grep 'model name' | head -n1 | |
| @tcc -v || true | |
| @gcc --version | head -n1 | |
| @echo | |
| @tcc sudoku.c | |
| time tcc sudoku.c 2>&1 && mv a.out a.out.tcc | |
| @echo | |
| @gcc -std=c89 -Wall -Werror -O0 sudoku.c -o a.out.gccO0 | |
| time gcc -std=c89 -Wall -Werror -O0 sudoku.c -o a.out.gccO0 2>&1 | |
| @echo | |
| @gcc -std=c89 -Wall -Werror -O1 sudoku.c -o a.out.gccO1 | |
| time gcc -std=c89 -Wall -Werror -O1 sudoku.c -o a.out.gccO1 2>&1 | |
| @echo | |
| @gcc -std=c89 -Wall -Werror -O2 sudoku.c -o a.out.gccO2 | |
| time gcc -std=c89 -Wall -Werror -O2 sudoku.c -o a.out.gccO2 2>&1 | |
| @echo | |
| @gcc -std=c89 -Wall -Werror -Os sudoku.c -o a.out.gccOs | |
| time gcc -std=c89 -Wall -Werror -Os sudoku.c -o a.out.gccOs 2>&1 | |
| @echo | |
| @gcc -std=c89 -Wall -Werror -O3 sudoku.c -o a.out.gccO3 | |
| time gcc -std=c89 -Wall -Werror -O3 sudoku.c -o a.out.gccO3 2>&1 | |
| clean: | |
| rm -f a.out a.out.* | |
| .PHONY: ccbench clean |
| model name : Intel(R) Atom(TM) CPU N270 @ 1.60GHz | |
| tcc version 0.9.27 (i386 Linux) | |
| gcc (Debian 8.3.0-6) 8.3.0 | |
| time tcc sudoku.c 2>&1 && mv a.out a.out.tcc | |
| 0.01user 0.01system 0:00.02elapsed 91%CPU (0avgtext+0avgdata 2592maxresident)k | |
| 0inputs+16outputs (0major+257minor)pagefaults 0swaps | |
| time gcc -std=c89 -Wall -Werror -O0 sudoku.c -o a.out.gccO0 2>&1 | |
| 0.22user 0.08system 0:00.31elapsed 99%CPU (0avgtext+0avgdata 18864maxresident)k | |
| 0inputs+104outputs (0major+3475minor)pagefaults 0swaps | |
| time gcc -std=c89 -Wall -Werror -O1 sudoku.c -o a.out.gccO1 2>&1 | |
| 0.45user 0.06system 0:00.52elapsed 99%CPU (0avgtext+0avgdata 21172maxresident)k | |
| 0inputs+104outputs (0major+3698minor)pagefaults 0swaps | |
| time gcc -std=c89 -Wall -Werror -O2 sudoku.c -o a.out.gccO2 2>&1 | |
| 0.81user 0.06system 0:00.88elapsed 99%CPU (0avgtext+0avgdata 23384maxresident)k | |
| 0inputs+112outputs (0major+3975minor)pagefaults 0swaps | |
| time gcc -std=c89 -Wall -Werror -Os sudoku.c -o a.out.gccOs 2>&1 | |
| 0.57user 0.05system 0:00.64elapsed 99%CPU (0avgtext+0avgdata 22876maxresident)k | |
| 0inputs+96outputs (0major+3766minor)pagefaults 0swaps | |
| time gcc -std=c89 -Wall -Werror -O3 sudoku.c -o a.out.gccO3 2>&1 | |
| 2.22user 0.07system 0:02.30elapsed 99%CPU (0avgtext+0avgdata 29940maxresident)k | |
| 0inputs+192outputs (0major+5540minor)pagefaults 0swaps |
| model name : Geode(TM) Integrated Processor by AMD PCS | |
| tcc version 0.9.27 (prerelease) (i386 Linux) | |
| gcc (Debian 6.3.0-18+deb9u1) 6.3.0 20170516 | |
| time tcc sudoku.c 2>&1 && mv a.out a.out.tcc | |
| 0.06user 0.00system 0:00.06elapsed 89%CPU (0avgtext+0avgdata 1388maxresident)k | |
| 0inputs+0outputs (0major+403minor)pagefaults 0swaps | |
| time gcc -std=c89 -Wall -Werror -O0 sudoku.c -o a.out.gccO0 2>&1 | |
| 0.92user 0.07system 0:01.02elapsed 97%CPU (0avgtext+0avgdata 10932maxresident)k | |
| 0inputs+0outputs (0major+6296minor)pagefaults 0swaps | |
| time gcc -std=c89 -Wall -Werror -O1 sudoku.c -o a.out.gccO1 2>&1 | |
| 1.70user 0.06system 0:01.82elapsed 96%CPU (0avgtext+0avgdata 13036maxresident)k | |
| 0inputs+0outputs (0major+6886minor)pagefaults 0swaps | |
| time gcc -std=c89 -Wall -Werror -O2 sudoku.c -o a.out.gccO2 2>&1 | |
| 2.95user 0.04system 0:03.06elapsed 97%CPU (0avgtext+0avgdata 14768maxresident)k | |
| 0inputs+0outputs (0major+7333minor)pagefaults 0swaps | |
| time gcc -std=c89 -Wall -Werror -Os sudoku.c -o a.out.gccOs 2>&1 | |
| 2.00user 0.12system 0:02.17elapsed 97%CPU (0avgtext+0avgdata 14040maxresident)k | |
| 0inputs+0outputs (0major+7104minor)pagefaults 0swaps | |
| time gcc -std=c89 -Wall -Werror -O3 sudoku.c -o a.out.gccO3 2>&1 | |
| 11.03user 0.14system 0:11.23elapsed 99%CPU (0avgtext+0avgdata 21780maxresident)k | |
| 0inputs+0outputs (0major+9177minor)pagefaults 0swaps |
| model name : Intel(R) Core(TM) i5-4590 CPU @ 3.30GHz | |
| tcc version 0.9.27 (x86_64 Linux) | |
| gcc (Debian 8.3.0-6) 8.3.0 | |
| time tcc sudoku.c 2>&1 && mv a.out a.out.tcc | |
| 0.00user 0.00system 0:00.00elapsed 100%CPU (0avgtext+0avgdata 2604maxresident)k | |
| 0inputs+24outputs (0major+308minor)pagefaults 0swaps | |
| time gcc -std=c89 -Wall -Werror -O0 sudoku.c -o a.out.gccO0 2>&1 | |
| 0.03user 0.00system 0:00.04elapsed 100%CPU (0avgtext+0avgdata 21408maxresident)k | |
| 0inputs+96outputs (0major+4828minor)pagefaults 0swaps | |
| time gcc -std=c89 -Wall -Werror -O1 sudoku.c -o a.out.gccO1 2>&1 | |
| 0.06user 0.00system 0:00.06elapsed 98%CPU (0avgtext+0avgdata 24068maxresident)k | |
| 0inputs+88outputs (0major+5078minor)pagefaults 0swaps | |
| time gcc -std=c89 -Wall -Werror -O2 sudoku.c -o a.out.gccO2 2>&1 | |
| 0.09user 0.00system 0:00.10elapsed 100%CPU (0avgtext+0avgdata 26196maxresident)k | |
| 0inputs+88outputs (0major+5410minor)pagefaults 0swaps | |
| time gcc -std=c89 -Wall -Werror -Os sudoku.c -o a.out.gccOs 2>&1 | |
| 0.07user 0.00system 0:00.08elapsed 98%CPU (0avgtext+0avgdata 25612maxresident)k | |
| 0inputs+80outputs (0major+5158minor)pagefaults 0swaps | |
| time gcc -std=c89 -Wall -Werror -O3 sudoku.c -o a.out.gccO3 2>&1 | |
| 0.25user 0.02system 0:00.27elapsed 99%CPU (0avgtext+0avgdata 39944maxresident)k | |
| 0inputs+240outputs (0major+8574minor)pagefaults 0swaps |
| model name : Intel(R) Core(TM)2 CPU 6300 @ 1.86GHz | |
| tcc version 0.9.27 (x86_64 Linux) | |
| gcc (Debian 8.3.0-6) 8.3.0 | |
| time tcc sudoku.c 2>&1 && mv a.out a.out.tcc | |
| 0.00user 0.00system 0:00.00elapsed 88%CPU (0avgtext+0avgdata 2808maxresident)k | |
| 0inputs+24outputs (0major+314minor)pagefaults 0swaps | |
| time gcc -std=c89 -Wall -Werror -O0 sudoku.c -o a.out.gccO0 2>&1 | |
| 0.08user 0.02system 0:00.11elapsed 99%CPU (0avgtext+0avgdata 21568maxresident)k | |
| 0inputs+96outputs (0major+4851minor)pagefaults 0swaps | |
| time gcc -std=c89 -Wall -Werror -O1 sudoku.c -o a.out.gccO1 2>&1 | |
| 0.13user 0.04system 0:00.18elapsed 99%CPU (0avgtext+0avgdata 24296maxresident)k | |
| 0inputs+88outputs (0major+5112minor)pagefaults 0swaps | |
| time gcc -std=c89 -Wall -Werror -O2 sudoku.c -o a.out.gccO2 2>&1 | |
| 0.25user 0.02system 0:00.28elapsed 99%CPU (0avgtext+0avgdata 26128maxresident)k | |
| 0inputs+88outputs (0major+5430minor)pagefaults 0swaps | |
| time gcc -std=c89 -Wall -Werror -Os sudoku.c -o a.out.gccOs 2>&1 | |
| 0.19user 0.01system 0:00.21elapsed 100%CPU (0avgtext+0avgdata 25636maxresident)k | |
| 0inputs+80outputs (0major+5150minor)pagefaults 0swaps | |
| time gcc -std=c89 -Wall -Werror -O3 sudoku.c -o a.out.gccO3 2>&1 | |
| 0.75user 0.05system 0:00.81elapsed 99%CPU (0avgtext+0avgdata 40160maxresident)k | |
| 0inputs+240outputs (0major+8605minor)pagefaults 0swaps |
| #include <stdlib.h> | |
| #include <stdint.h> | |
| #include <stdio.h> | |
| #include <assert.h> | |
| #include <stdbool.h> | |
| /* A sudoku solver. */ | |
| /* | |
| Copyright (c) 2020 Jason Pepas | |
| Released under the terms of the MIT license. | |
| See https://opensource.org/licenses/MIT | |
| */ | |
| /* | |
| Board file format: plain text, 9 lines of 9 numbers (zero for empty box). | |
| Example: | |
| 530070000 | |
| 600195000 | |
| 098000060 | |
| 800060003 | |
| 400803001 | |
| 700020006 | |
| 060000280 | |
| 000419005 | |
| 000080079 | |
| */ | |
| /* | |
| Data structures: | |
| - A 'box' is a set of the remaining possible solutions for a spot in the grid. | |
| This set is represented as a uint16_t bitvector, with the sudoke numbers 1-9 | |
| represented as the first 9 bits in the bitvector. | |
| - A solved box has only one bit set. | |
| - A 'board' is a 2-dimensional array (9x9) of boxes (bitvectors). | |
| - A solved board is a 9x9 grid of bitvectors which each have only one item. | |
| */ | |
| /* | |
| The general approach is to iterate over the board and use logic to reduce | |
| the number of possible solutions in each box until the board is solved. | |
| Algorithm: | |
| - For each row, build a set of the solved numbers in that row, then | |
| remove those solutions from the remaining possibilities in each of the | |
| unsolved boxes in that row. | |
| - Do the same for each column. | |
| - Do the same for each 3x3 grid. | |
| - Repeat the above process until either the board is solved, or until | |
| deadlock is detected (i.e. the board state remains unchanged after an | |
| iteration). | |
| */ | |
| /* the board. */ | |
| uint16_t g_board[9][9]; | |
| /* set all 9 bits in this box. */ | |
| void board_set_all(int row, int col) { | |
| uint16_t all_nums = /* 0b0000000111111111 */ 0x1FF; | |
| g_board[row][col] = all_nums; | |
| } | |
| /* convert a number to a bitvector. */ | |
| uint16_t num_to_bitvec(int num) { | |
| return 1 << (num-1); | |
| } | |
| /* add num to the box's bitvector. */ | |
| void board_set_num(int row, int col, int num) { | |
| g_board[row][col] |= num_to_bitvec(num); | |
| } | |
| /* count the number of bits which are set in the bitvector. */ | |
| int count_set_bits(uint16_t box) { | |
| int set_bits_count = 0; | |
| int i = 0; | |
| while (i < 16) { | |
| if (box & /* 0b1 */ 0x1) { | |
| set_bits_count++; | |
| } | |
| box = (box >> 1); | |
| i++; | |
| continue; | |
| } | |
| return set_bits_count; | |
| } | |
| /* is the box solved? */ | |
| bool box_is_solved(int row, int col) { | |
| uint16_t box = g_board[row][col]; | |
| int bitcount = count_set_bits(box); | |
| return bitcount == 1; | |
| } | |
| /* is the board solved? */ | |
| bool board_is_solved() { | |
| int row = 0; | |
| int col = 0; | |
| while (row < 9) { | |
| while (col < 9) { | |
| if (!box_is_solved(row, col)) { | |
| return false; | |
| } | |
| col++; | |
| continue; | |
| } | |
| row++; | |
| continue; | |
| } | |
| return true; | |
| } | |
| /* return the character representation of the box. */ | |
| char board_get_ch(int row, int col) { | |
| if (!box_is_solved(row, col)) { | |
| return '0'; | |
| } | |
| uint16_t box = g_board[row][col]; | |
| uint8_t num = 1; | |
| while (num < 10) { | |
| if (box & /* 0b1 */ 0x1) { | |
| return (char)(num + '0'); | |
| } | |
| box = (box >> 1); | |
| num++; | |
| continue; | |
| } | |
| assert(false); | |
| exit(2); | |
| } | |
| /* print the board state. prints '0' for unsolved boxes. */ | |
| void print_board() { | |
| int row = 0; | |
| while (row < 9) { | |
| int col = 0; | |
| while (col < 9) { | |
| char ch = board_get_ch(row, col); | |
| putchar(ch); | |
| col++; | |
| continue; | |
| } | |
| putchar('\n'); | |
| row++; | |
| continue; | |
| } | |
| } | |
| /* return the total count of all remaining possibliities in every box. | |
| a solved board has a possibilities count of 81. */ | |
| int possibilities_count() { | |
| int pcount = 0; | |
| int row = 0; | |
| while (row < 9) { | |
| int col = 0; | |
| while (col < 9) { | |
| uint16_t box = g_board[row][col]; | |
| int bitcount = count_set_bits(box); | |
| pcount += bitcount; | |
| col++; | |
| continue; | |
| } | |
| row++; | |
| continue; | |
| } | |
| return pcount; | |
| } | |
| /* read the board in from a file. */ | |
| void load_board(char* fname) { | |
| FILE* f = fopen(fname, "r"); | |
| assert(f != NULL); | |
| const int bufsize = 110; /* 9+2 per line * 9 lines + 1 null */ | |
| char buf[bufsize]; | |
| size_t count = fread(buf, 1, bufsize, f); | |
| assert(count >= 90); | |
| fclose(f); | |
| int i = 0; | |
| int row = 0; | |
| while (row < 9) { | |
| int col = 0; | |
| while (col < 9) { | |
| assert (i < bufsize); | |
| char ch = buf[i]; | |
| if (ch == '0') { | |
| board_set_all(row, col); | |
| } else { | |
| int num = ch - '0'; | |
| board_set_num(row, col, num); | |
| } | |
| i++; | |
| col++; | |
| continue; | |
| } | |
| /* skip over the newline. */ | |
| char ch = buf[i]; | |
| while (ch == '\n' || ch == '\r') { | |
| i++; | |
| ch = buf[i]; | |
| continue; | |
| } | |
| row++; | |
| continue; | |
| } | |
| } | |
| /* return the combined bitmask of all solved boxes in the row. */ | |
| uint16_t get_row_solved_bitmask(int row) { | |
| uint16_t bitmask = 0; | |
| int col = 0; | |
| while (col < 9) { | |
| if (box_is_solved(row, col)) { | |
| bitmask |= g_board[row][col]; | |
| } | |
| col++; | |
| continue; | |
| } | |
| return bitmask; | |
| } | |
| /* iterate over the rows, reducing possibilities. */ | |
| void iterate_rows() { | |
| int row = 0; | |
| while (row < 9) { | |
| uint16_t solved = get_row_solved_bitmask(row); | |
| int col = 0; | |
| while (col < 9) { | |
| if (!box_is_solved(row, col)) { | |
| /* subtract the solved bits from this box. */ | |
| uint16_t box = g_board[row][col]; | |
| box &= ~(solved); | |
| g_board[row][col] = box; | |
| } | |
| col++; | |
| continue; | |
| } | |
| row++; | |
| continue; | |
| } | |
| } | |
| /* return the combined bitmask of all solved boxes in the column. */ | |
| uint16_t get_col_solved_bitmask(int col) { | |
| uint16_t bitmask = 0; | |
| int row = 0; | |
| while (row < 9) { | |
| if (box_is_solved(row, col)) { | |
| bitmask |= g_board[row][col]; | |
| } | |
| row++; | |
| continue; | |
| } | |
| return bitmask; | |
| } | |
| /* iterate over the columns, reducing possibilities. */ | |
| void iterate_columns() { | |
| int col = 0; | |
| while (col < 9) { | |
| uint16_t solved = get_col_solved_bitmask(col); | |
| int row = 0; | |
| while (row < 9) { | |
| if (!box_is_solved(row, col)) { | |
| /* subtract the solved bits from this box. */ | |
| uint16_t box = g_board[row][col]; | |
| box &= ~(solved); | |
| g_board[row][col] = box; | |
| } | |
| row++; | |
| continue; | |
| } | |
| col++; | |
| continue; | |
| } | |
| } | |
| /* return the combined bitmask of all solved boxes in the grid. */ | |
| uint16_t get_grid_solved_bitmask(int grid) { | |
| uint16_t bitmask = 0; | |
| int first_row = (grid / 3) * 3; | |
| int first_col = (grid % 3) * 3; | |
| int row = first_row; | |
| while (row < (first_row + 3)) { | |
| int col = first_col; | |
| while (col < (first_col + 3)) { | |
| if (box_is_solved(row, col)) { | |
| bitmask |= g_board[row][col]; | |
| } | |
| col++; | |
| continue; | |
| } | |
| row++; | |
| continue; | |
| } | |
| return bitmask; | |
| } | |
| /* iterate over the grids, reducing possibilities. */ | |
| void iterate_grids() { | |
| int grid = 0; | |
| while (grid < 9) { | |
| uint16_t solved = get_grid_solved_bitmask(grid); | |
| int first_row = (grid / 3) * 3; | |
| int first_col = (grid % 3) * 3; | |
| int row = first_row; | |
| while (row < (first_row + 3)) { | |
| int col = first_col; | |
| while (col < (first_col + 3)) { | |
| if (!box_is_solved(row, col)) { | |
| /* subtract the solved bits from this box. */ | |
| uint16_t box = g_board[row][col]; | |
| box &= ~(solved); | |
| g_board[row][col] = box; | |
| } | |
| col++; | |
| continue; | |
| } | |
| row++; | |
| continue; | |
| } | |
| grid++; | |
| continue; | |
| } | |
| } | |
| /* iterate over the board, reducing possibilities. */ | |
| void iterate_board() { | |
| iterate_rows(); | |
| iterate_columns(); | |
| iterate_grids(); | |
| } | |
| int main(int argc, char* argv[]) { | |
| load_board(argv[1]); | |
| int before_pcount = possibilities_count(); | |
| while (!board_is_solved()) { | |
| iterate_board(); | |
| int after_pcount = possibilities_count(); | |
| if (after_pcount == before_pcount) { | |
| fprintf(stderr, "Error: don't know how to solve this board.\n"); | |
| exit(3); | |
| } else { | |
| before_pcount = after_pcount; | |
| continue; | |
| } | |
| } | |
| print_board(); | |
| return EXIT_SUCCESS; | |
| } |