To the extent possible under law, Otesunki has waived all copyright and related
or neighboring rights to C Preprocessor Abuse. This work is published from:
Saudi Arabia.
However, CPP operates at a purely lexical [!!] level, transforming input character sequences [?!] directly to output character sequences when preprocessing C/C++ source code (a.k.a. macro expansion).
- Static Validation of C Preprocessor Macros
Preprocessing means a set of low-level textual [?!] conversions on the source; the C and C++ language specification […] it has no connection with the language syntax. These text-based, unstructured transformations are hard to follow.
- Opening Up The C/C++ Preprocessor Black Box
CPP is in fact, not text-based. It does not act upon characters, it acts upon tokens. While this distinction may seem pedantic, it is one of the biggest sources of confusion when understanding complicated macros. for example, one may think that:
#define FOO +
#define BAR +FOO
BAR
…expands to the singular token ++
, when in reality, it expands to the
two tokens +
and +
. While standard C uses a token-based
preprocessor, Pre-standard C did use a text-based preprocessor,
hence why occasionally you may see macros like:
#ifdef __STDC__
# define PASTE2(x, y) x##y
#else
# define PASTE2(x, y) x/**/y
#endif
#ifdef __STDC__
# define STRINGIZE(x) #x
#else
# define STRINGIZE(x) "x"
#endif
…because pre-standard C, x/**/y
did actually expand to xy
, and,
sometimes, "x"
expanded to x
surrounded by quotes.
typeof
(and, similarly, _Generic
) are not macros, nor are they part of
the preprocessor at all. This is why code such as:
#define AUTO_TYPE(expr) CAT2(foo_, typeof(expr))
#define CAT2(x, y) PASTE2(x, y)
#define PASTE2(x, y) x##y
enum { foo_int = 9; }
int x;
printf("%d\n", MACRO(x));
…doesn’t print 9
, but causes a linker error claiming that foo_typeof
is
undefined– the code was expanded to foo_typeof(x)
.
Similarly,
#define PRINTF_SPECIFIER(expr) \
_Generic((expr), \
int: "%d", \
float: "%g", \
default: "" \
)
int x;
printf("x: " MY_SUPER_STANDARD_MACRO(x) "\n", x);
…does not expand to printf("x: %d\n", x);
as one might hope, but instead
causes a syntax error. The Preprocessor concatenates adjacent strings, but
_Generic
runs after adjacent string concatenation was already done.
For reference, the compiler processes code in this order:
- Character set normalization. – Preprocessor, ex. converting CRLF to just LF
- Trigraph substitution. – Preprocessor, ex.
??!
to\
- Logical line conversion. – Preprocessor, what
\
at the end of a line does - Preprocessor tokenization. – Preprocessor, Specifically, the file is tokenized into comments, whitespace, and misc tokens.
- Comment Elimintation. – Preprocessor, converts comments into a singular space character since C89, and not an empty string.
- Preprocessor directive parsing. – Preprocessor, All
#include
s are substituted for the file’s contents, and critically: all#define
s are parsed and removed at this step. - Macro expansion. – Preprocessor, macros can’t emit preprocessor directives because they were already parsed in the last step, and won’t be parsed ever again.
- Backslash escaping..
_Pragma
- Adjacent string literal concatenation.
- Parsing.
- Compiling. – This is where
typeof
and_Generic
are substituted. - Linking.
Assert that cond
is true
.
/* C78|C++98> UB if =cond= is -1 and not a compile-time constant (VLA) */
char error_message[cond ? 1 : -1];
/* C78|C++98> eliminates the UB above */
static char error_message[cond ? 1 : -1];
/* C89|C++98> expression version of the above */
(void) sizeof(struct { char error_message[cond ? 1 : -1] });
/* C++11> */
static_assert(cond, "error message");
/* C11> */
_Static_assert(cond, "error message");
/* C11> */
#include <assert.h>
static_assert(cond, "error message");
/* C23> */
static_assert(cond, "error message");
/* CL/MSVC C78|C++98> forces CL.EXE to produce an error including the */
/* error message [https://stackoverflow.com/a/4815532] */
typedef struct { int error_message : !!(cond); } error_message;
/* C89> from glibc's hack for _Static_assert pre-C11 */
extern int (*__Static_assert_function (void))
[!!sizeof (struct { int error_message: expr ? 2 : -1; })]
/* C78> altered version of the above */
extern int (*__Static_assert_function ())
[!!sizeof (struct { int error_message: expr ? 2 : -1; })]
(Check if | Assert that) expr
is of type T
.
/* ASSERT C78|C++98> requires =expr= to be an lvalue */
/* may not result in a compiler error but a compiler warning */
(*(0 ? (T *) NULL : &expr))
/* ASSERT C99> breaks if =expr= and =T= are identically sized */
/* integers that only differ in signedness */
(0 ? (T) {0} : expr)
/* CHECK GNU89> */
__builtin_types_compatible_p(typeof(expr), T);
/* CHECK C11> */
_Generic((expr), T: 1, default: 0)
(Check if | Assert that) expr
is a compile-time constant.
/* ASSERT C78|C++98> only for nonnegative integers */
sizeof(sizeof((struct { char c[expr]; }) {0}.c))
/* CHECK GNU89> */
__builtin_constant_p(c)
(Check if | Assert that) expr
is a literal.
Check if the call to IS_EMPTY
contains 0 arguments.
The idea of passing 0 arguments to a macro that takes a variable number of
arguments being UB is a misconception.
(including those arguments consisting of no preprocessing tokens) […] There shall be more arguments in the invocation than there are parameters in the macro definition (excluding the
...
). There shall exist a)
preprocessing token that terminates the invocation.
- ISO 9899:1999, paragrpah 6.10.3, bulletpoint 4.
In other words, given #define MACRO(...)
, MACRO()
is a perfectly valid
invocation. but given #define MACRO2(foo, ...)
, MACRO2(foo)
is not
valid, and should instead be MACRO2(foo, )
.
/* C99|C++11> */
/* [https://gustedt.wordpress.com/2010/06/08/detect-empty-macro-arguments] */
#define PP_NARG_1(...) \
PP_NARG_2(__VA_ARGS__,PP_RSEQ_N())
#define PP_NARG_2(...) \
PP_ARG_N(__VA_ARGS__)
#define PP_ARG_N(_1, _2, _3, _4, _5, _6, _7, _8, _9,_10, \
_11,_12,_13,_14,_15,_16,_17,_18,_19,_20, \
_21,_22,_23,_24,_25,_26,_27,_28,_29,_30, \
_31,_32,_33,_34,_35,_36,_37,_38,_39,_40, \
_41,_42,_43,_44,_45,_46,_47,_48,_49,_50, \
_51,_52,_53,_54,_55,_56,_57,_58,_59,_60, \
_61,_62,_63,N,...) N
#define PP_RSEQ_N() \
63,62,61,60, \
59,58,57,56,55,54,53,52,51,50, \
49,48,47,46,45,44,43,42,41,40, \
39,38,37,36,35,34,33,32,31,30, \
29,28,27,26,25,24,23,22,21,20, \
19,18,17,16,15,14,13,12,11,10, \
9,8,7,6,5,4,3,2,1,0
#define HAS_COMMA_RSEQ_N() \
1,1,1,1, \
1,1,1,1,1,1,1,1,1,1, \
1,1,1,1,1,1,1,1,1,1, \
1,1,1,1,1,1,1,1,1,1, \
1,1,1,1,1,1,1,1,1,1, \
1,1,1,1,1,1,1,1,1,1, \
1,1,1,1,1,1,1,1,0
#define HAS_COMMA(...) PP_NARG_1(__VA_ARGS__, HAS_COMMA_RSEQ_N())
#define TRIGGER_PARENTHESIS(...) ,
#define IS_EMPTY(...) \
ISEMPTY_SWITCH( \
0, \
1, \
HAS_COMMA(__VA_ARGS__), \
HAS_COMMA(TRIGGER_PARENTHESIS __VA_ARGS__), \
HAS_COMMA(__VA_ARGS__ (/**/)), \
HAS_COMMA(TRIGGER_PARENTHESIS __VA_ARGS__ (/**/)))
#define IS_EMPTY_CASE_0000(YES, NO) NO
#define IS_EMPTY_CASE_0001(YES, NO) YES
#define IS_EMPTY_CASE_0010(YES, NO) NO
#define IS_EMPTY_CASE_0011(YES, NO) NO
#define IS_EMPTY_CASE_0100(YES, NO) NO
#define IS_EMPTY_CASE_0101(YES, NO) NO
#define IS_EMPTY_CASE_0110(YES, NO) NO
#define IS_EMPTY_CASE_0111(YES, NO) NO
#define IS_EMPTY_CASE_1001(YES, NO) NO
#define IS_EMPTY_CASE_1010(YES, NO) NO
#define IS_EMPTY_CASE_1011(YES, NO) NO
#define IS_EMPTY_CASE_1100(YES, NO) NO
#define IS_EMPTY_CASE_1101(YES, NO) NO
#define IS_EMPTY_CASE_1110(YES, NO) NO
#define IS_EMPTY_CASE_1111(YES, NO) NO
#define PASTE5(_0, _1, _2, _3, _4) _0 ## _1 ## _2 ## _3 ## _4
#define ISEMPTY_SWITCH(YES, NO, _0, _1, _2, _3) \
PASTE5(_IS_EMPTY_CASE_, _0, _1, _2, _3)(YES, NO)
Count the number of arguments passed to PP_NARG
.
/* C++11> compile time constant, not preprocessor time constant */
std::tuple_size<decltype(std::make_tuple(__VA_ARGS__))>::value
/* C99|C++11> breaks when passed no arguments */
/* [https://groups.google.com/g/comp.std.c/c/d-6Mj5Lko_s/m/5R6bMWTEbzQJ] */
#define PP_NARG(...) \
PP_NARG_1(__VA_ARGS__,PP_RSEQ_N())
#define PP_NARG_1(...) \
PP_ARG_N(__VA_ARGS__)
#define PP_ARG_N(_1, _2, _3, _4, _5, _6, _7, _8, _9,_10, \
_11,_12,_13,_14,_15,_16,_17,_18,_19,_20, \
_21,_22,_23,_24,_25,_26,_27,_28,_29,_30, \
_31,_32,_33,_34,_35,_36,_37,_38,_39,_40, \
_41,_42,_43,_44,_45,_46,_47,_48,_49,_50, \
_51,_52,_53,_54,_55,_56,_57,_58,_59,_60, \
_61,_62,_63,N,...) N
#define PP_RSEQ_N() \
63,62,61,60, \
59,58,57,56,55,54,53,52,51,50, \
49,48,47,46,45,44,43,42,41,40, \
39,38,37,36,35,34,33,32,31,30, \
29,28,27,26,25,24,23,22,21,20, \
19,18,17,16,15,14,13,12,11,10, \
9,8,7,6,5,4,3,2,1,0
/* C99|C++11> fix for the above, breaks when passed a last argument that */
/* is a function-like macro accepting at least 2 arguments */
/* [https://gustedt.wordpress.com/2010/06/08/detect-empty-macro-arguments] */
#define PP_NARG_1(...) \
PP_NARG_2(__VA_ARGS__,PP_RSEQ_N())
#define PP_NARG_2(...) \
PP_ARG_N(__VA_ARGS__)
#define PP_ARG_N(_1, _2, _3, _4, _5, _6, _7, _8, _9,_10, \
_11,_12,_13,_14,_15,_16,_17,_18,_19,_20, \
_21,_22,_23,_24,_25,_26,_27,_28,_29,_30, \
_31,_32,_33,_34,_35,_36,_37,_38,_39,_40, \
_41,_42,_43,_44,_45,_46,_47,_48,_49,_50, \
_51,_52,_53,_54,_55,_56,_57,_58,_59,_60, \
_61,_62,_63,N,...) N
#define PP_RSEQ_N() \
63,62,61,60, \
59,58,57,56,55,54,53,52,51,50, \
49,48,47,46,45,44,43,42,41,40, \
39,38,37,36,35,34,33,32,31,30, \
29,28,27,26,25,24,23,22,21,20, \
19,18,17,16,15,14,13,12,11,10, \
9,8,7,6,5,4,3,2,1,0
#define HAS_COMMA_RSEQ_N() \
1,1,1,1, \
1,1,1,1,1,1,1,1,1,1, \
1,1,1,1,1,1,1,1,1,1, \
1,1,1,1,1,1,1,1,1,1, \
1,1,1,1,1,1,1,1,1,1, \
1,1,1,1,1,1,1,1,1,1, \
1,1,1,1,1,1,1,1,0
#define HAS_COMMA(...) PP_NARG_1(__VA_ARGS__, HAS_COMMA_RSEQ_N())
#define TRIGGER_PARENTHESIS(...) ,
#define PP_NARG(...) \
ISEMPTY_SWITCH( \
0, \
PP_NARG_1(__VA_ARGS__), \
HAS_COMMA(__VA_ARGS__), \
HAS_COMMA(TRIGGER_PARENTHESIS __VA_ARGS__), \
HAS_COMMA(__VA_ARGS__ (/**/)), \
HAS_COMMA(TRIGGER_PARENTHESIS __VA_ARGS__ (/**/)))
#define IS_EMPTY_CASE_0000(YES, NO) NO
#define IS_EMPTY_CASE_0001(YES, NO) YES
#define IS_EMPTY_CASE_0010(YES, NO) NO
#define IS_EMPTY_CASE_0011(YES, NO) NO
#define IS_EMPTY_CASE_0100(YES, NO) NO
#define IS_EMPTY_CASE_0101(YES, NO) NO
#define IS_EMPTY_CASE_0110(YES, NO) NO
#define IS_EMPTY_CASE_0111(YES, NO) NO
#define IS_EMPTY_CASE_1001(YES, NO) NO
#define IS_EMPTY_CASE_1010(YES, NO) NO
#define IS_EMPTY_CASE_1011(YES, NO) NO
#define IS_EMPTY_CASE_1100(YES, NO) NO
#define IS_EMPTY_CASE_1101(YES, NO) NO
#define IS_EMPTY_CASE_1110(YES, NO) NO
#define IS_EMPTY_CASE_1111(YES, NO) NO
#define PASTE5(_0, _1, _2, _3, _4) _0 ## _1 ## _2 ## _3 ## _4
#define ISEMPTY_SWITCH(YES, NO, _0, _1, _2, _3) \
PASTE5(IS_EMPTY_CASE_, _0, _1, _2, _3)(YES, NO)
CHECK(PROBE(~))
should expand to 1
, and all other CHECK(...)
s should
expand to 0
. On the reason why ~
in particular is used:
The choice of
~
as a first argument is essentially arbitrary (sinceSECOND
will always cause it to disappear). However this particular character is a popular convention since if a bug in your macros results in one sneaking out into the final expansion it frequently results in a syntax error in the compiler alerting you to the problem.
/* C99|C++11> [https://github.com/pfultz2/Cloak/wiki/C-Preprocessor-tricks,-tips,-and-idioms#detection] */
#define CHECK_N(x, n, ...) n
#define CHECK(...) CHECK_N(__VA_ARGS__, 0,)
#define PROBE(x) x, 1,s
Perform multiple operations in one macro.
/* C99|C++98> breaks when used twice [multiple declarations] */
#define swap(x, y) \
char buffer[sizeof(0 ? (x) : (y))]; \
void *x_ = &(x), *y_ = &(y); \
memcpy(&buffer, x_); \
memcpy(x_, y_); \
memcpy(y_, &buffer);
/* C78|C++98> breaks in =if (cond) swap(x, y); else { ... }= */
#define swap(x, y) { \
char buffer[sizeof(0 ? (x) : (y))]; \
void *x_ = &(x), *y_ = &(y); \
memcpy(&buffer, x_); \
memcpy(x_, y_); \
memcpy(y_, &buffer); \
}
/* C78|C++98> breaks in =return cond > swap(x, y) : y;= */
#define swap(x, y) do { \
char buffer[sizeof(0 ? (x) : (y))]; \
void *x_ = &(x), *y_ = &(y); \
memcpy(&buffer, x_); \
memcpy(x_, y_); \
memcpy(y_, &buffer); \
} while (0)
/* C99> more efficient, as it allocates on the stack */
#define swap(x, y) \
_swapf(&(x), &(y), (char [sizeof(0 ? (x) : (y))]) { 0 }, \
sizeof(0 ? (x) : (y)))
static inline void _swapf(void *left, void *right, void *tmp, size_t len) {
memcpy(tmp, left, len);
memcpy(left, right, len);
memcpy(right, tmp, len);
}
/* C89> requires VLAs. */
#define swap(x, y) \
_swapf(&(x), &(y), sizeof(0 ? (x) : (y)))
static void _swapf(void *left, void *right, size_t len) {
char tmp[len];
memcpy(tmp, left, len);
memcpy(left, right, len);
memcpy(right, tmp, len);
}
/* C78> allocates on the heap. */
#define swap(x, y) \
_swapf(&(x), &(y), sizeof(0 ? (x) : (y)))
static int _swapf(void *left, void *right, size_t len) {
void *tmp = calloc(1, len);
memcpy(tmp, left, len);
memcpy(left, right, len);
memcpy(right, tmp, len);
free(tmp);
return(0);
}
/* GNU89> */
#define swap(x, y) ({ \
typeof(y) *x_ = &(x); \
typeof(x) *y_ = &(y); \
typeof(x) tmp = *x_; \
*x_ = *y_; *y_ = tmp; \
tmp; \
})
/* C78> does not function as expected, =break= is a no-op */
#define macro(foo) do { \
if (condition) \
break; \
} while (0)
/* C78> breaks in =return cond > macro(foo) : foo;=, may cause warnings */
#define macro(foo) if (1) { \
if (condition) \
break; \
} else do ; while (0)
/* C89> breaks in =return cond > macro(foo) : foo;= */
#define macro(foo) if (1) { \
if (condition) \
break; \
} else (void) 0
/* C78> breaks in =return cond > debug(("foo: %d\n", foo)) : foo;= */
#define debug(args) do { printf("debug: "); printf args; } while (0)
/* C78> */
#define debug(args) (printf("debug: "), printf args)