Here's a list of mildly interesting things about the C language that I learned mostly by consuming Clang's ASTs. Although surprises are getting sparser, I might continue to update this document over time.
There are many more mildly interesting features of C++, but the language is literally known for being weird, whereas C is usually considered smaller and simpler, so this is (almost) only about C.
1. Combined type and variable/field declaration, inside a struct scope [https://godbolt.org/g/Rh94Go]
struct foo {
struct bar {
int x;
} baz;
};
void frob() {
struct bar b; // <-- defined in body of `struct foo`
}
2. Compound literals are lvalues [https://godbolt.org/g/Zup5ZB]
struct foo {
int bar;
};
void baz() {
// compound literal:
// https://en.cppreference.com/w/c/language/compound_literal
(struct foo){};
// these are actually lvalues
((struct foo){}).bar = 4;
&(struct foo){};
}
3. Switch cases anywhere [https://godbolt.org/g/fSeL18]
void foo(int p, char* complicated) {
switch (p) {
case 0:
if (complicated[0] == 'a') {
if (complicated[1] == 'b') {
case 1:
complicated[2] = 'c';
}
}
break;
}
}
(also see: Duff's Device)
4. Flexible array members [https://godbolt.org/g/HCjfzX]
struct flex {
int count;
int elems[]; // <-- flexible array member
};
// this lays out the object exactly as expected
struct flex f = {
.count = 3,
.elems = {32, 31, 30}
};
_Static_assert(sizeof(struct flex) == sizeof(int), "");
// sizeof(f) does not include the size of statically-declared elements
_Static_assert(sizeof(f) == sizeof(struct flex), "");
// this only builds because .elems is not initialized:
struct flex g[2];
5. {0} as a universal initializer [https://godbolt.org/g/MPKkXv]
typedef int empty_array_t[0];
typedef struct {} empty_struct_t;
typedef int array_t[10];
typedef struct { int f; } struct_t;
typedef float vector_t __attribute__((ext_vector_type(4)));
// {} can initialize structs and arrays and vectors, but not scalars:
empty_array_t ea = {};
empty_struct_t es = {};
array_t a = {};
struct_t s = {};
vector_t v = {};
void* p = {}; // <-- error
int i = {}; // <-- error
// {0} can initialize any data type, including empty arrays/structs.
empty_array_t eaa = {0};
empty_struct_t ess = {0};
array_t aa = {0};
struct_t bb = {0};
vector_t cc = {0};
void* dd = {0}; // <-- happy!
int ee = {0}; // <-- happy!
6. Function typedefs [https://godbolt.org/g/5ctrLv]
typedef void (*function_pointer_t)(int); // <-- this creates a function pointer type
typedef void function_t(int); // <-- this creates a function type
// function_pointer_t == function_t*
function_t my_func; // <-- this declares "void my_func(int)"
void bar() {
my_func(42);
}
7. Array pointers [https://godbolt.org/g/N85dvv]
typedef int array_t[10]; // array typedef
typedef array_t* array_ptr_t; // array pointer typedef
// same as:
// typedef int (*array_ptr_t)[10];
void foo(array_ptr_t array_ptr) {
int x = (*array_ptr)[1];
}
void bar() {
int arr_10[10];
foo(&arr_10); // <-- yep
int arr_11[11];
foo(&arr_11); // <-- nope
}
8. Modifiers to array sizes in parameter definitions [https://godbolt.org/z/FnwYUs]
void foo(int arr[static const restrict volatile 10]) {
// static: the array contains at least 10 elements
// const, volatile and restrict all apply to the array type.
}
(corrected by Reddit user /u/romv1)
9. Flat initializer lists [https://godbolt.org/g/RmwnoG]
struct foo {
int x, y;
};
struct lots_of_inits {
struct foo z[2];
int w[3];
};
// this is probably more typical
struct lots_of_inits init = {
{{1, 2}, {3, 4}}, {5, 6, 7}
};
// but braces for inner elements are optional
struct lots_of_inits flat_init = {
1, 2, 3, 4, 5, 6, 7
};
10. What’s an lvalue, anyway [https://godbolt.org/g/5echfM]
struct bitfield {
unsigned x: 3;
};
void foo() {
int a[2];
int i;
const int j;
struct bitfield bf;
// these are all lvalues
a; // DeclRefExpr <col:5> 'int [2]' lvalue Var 0x556800650150 'a' 'int [2]'
i; // DeclRefExpr <col:5> 'int' lvalue Var 0x56289851bf20 'i' 'int'
j; // DeclRefExpr <col:5> 'const int' lvalue Var 0x555fc6694ff0 'j' 'const int'
bf.x; // MemberExpr <col:5, col:8> 'unsigned int' lvalue bitfield .x 0x55dab002de28
// this is not an lvalue
foo; // DeclRefExpr <col:6> 'void ()' Function 0x563cb79da098 'foo' 'void ()'
// ... but you can't assign to all of them
// a = (int [2]){1, 2};
i = 4;
// j = 4;
bf.x = 4;
// ... and you can't take all of their addresses
&a;
&i;
&j;
// &bf.x;
&foo; // but you can take the address of a function, which is not an lvalue
// so, an lvalue is a value that:
// - can have its address taken...
// - unless it is a bitfield (still an lvalue)
// - unless it is a function (not an lvalue)
// - can be assigned to...
// - unless it is an array (still an lvalue)
// - unless it is a constant (still an lvalue)
}
11. Void globals [https://godbolt.org/z/C52Wn2]
// You can declare extern globals to incomplete types,
// including `void`.
extern void foo;
12. Alignment implications of bitfields [https://godbolt.org/z/KmB4CB]
struct foo {
char a;
long b: 16;
char c;
};
// `struct foo` has the alignment of its most-aligned member:
// `long b` has an alignment of 8...
int alignof_foo = _Alignof(struct foo);
// ...but `long b: 16` is a bitfield, and is aligned on a char
// boundary.
int offsetof_c = __builtin_offsetof(struct foo, c);
13. static
variables are scope-local [https://godbolt.org/z/hdcLYW]
int foo() {
int* a;
int* b;
{
static int foo;
a = &foo;
}
{
static int foo;
b = &foo;
}
// this always returns false: two static variables with the same name
// but declared in different scope refer to different storage.
return a == b;
}
14. Typedef goes anywhere [https://godbolt.org/z/vZmgha]
short typedef signed s16;
unsigned int typedef u32;
struct foo { int bar } const typedef baz;
s16 a;
u32 b;
baz c;
15. Indexing into an integer [https://godbolt.org/z/IBA5Gr]
int foo(int* ptr, int index) {
// When indexing, the pointer and integer parts
// of the subscript expression are interchangeable.
return ptr[index] + index[ptr];
// It works this way, according to the standard (§6.5.2.1:2),
// because A[B] is the same as *(A + B), and addition
// is commutative.
}
16. The type of enums vs. the type of enumerators [https://godbolt.org/z/Mhsn1n7nd]
In C, enumerators (values declared in enums) have integer type rather than the type of their enclosing enum. For instance:
enum foo { bar, baz, frob };
enum foo
is a valid type to use that can store the value of bar
, baz
and frob
.
However, the type of bar
, baz
and frob
is an implementation-defined integer type! On
many implementations, bar
has type int
and enum foo
has the underlying type
unsigned
. This means that a check as simple as this one:
enum foo f = bar;
f < baz;
involves a comparison of integers with different signedness.
Further, the type of each enumerator is not guaranteed to be the same. In this example:
enum foo { bar, baz = 0x80000000 };
The type of bar
can be int
and the type of baz
can be unsigned
.
1. The power of UB [https://godbolt.org/g/H6mBFT]
extern void this_is_not_directly_called_by_main();
static void (*side_effects)() = 0;
void bar() {
side_effects = this_is_not_directly_called_by_main;
}
int main() {
side_effects();
}
compiles to:
bar: # @bar
ret
main: # @main
push rax
xor eax, eax
call this_is_not_directly_called_by_main
xor eax, eax
pop rcx
ret
Main directly calls this_is_not_directly_called_by_main
in this implementation. This happens because:
- LLVM sees that
side_effects
has only two possible values: NULL (the initial value) orthis_is_not_directly_called_by_main
(ifbar
is called) - LLVM sees that
side_effects
is called, and it is UB to call a null pointer - UB is impossible, so LLVM assumes that
bar
will have executed by the timemain
runs rather than face the consequences - Under this assumption,
side_effects
is alwaysthis_is_not_directly_called_by_main
.
2. A constant-expression macro that tells you if an expression is an integer constant [https://godbolt.org/g/a41gmx]
#define ICE_P(x) (sizeof(int) == sizeof(*(1 ? ((void*)((x) * 0l)) : (int*)1)))
int is_a_constant = ICE_P(4);
int is_not_a_constant = ICE_P(is_a_constant);
From Martin Uecker, on the Linux kernel ML. __builtin_constant_p
does the same thing on Clang and GCC.
3. Labels inside expression statements in really weird places [https://godbolt.org/g/k9wDRf]
You can make some pretty weird stuff in C, but for a real disaster, you need C++.
class foo {
int x;
public:
foo();
};
foo::foo() : x(({ a: 4; })) {
goto a;
}
Needless to say, statement expressions are not standard C++ (or standard C), but if your compiler has them, chances are that you can use them in really interesting ways.
1 ? ((void*)((x) * 0l)) : (int*)1
Can someone explain this ternary operator on special mention #2? It seems like it would always choose the first argument
((void*)((x) * 0l))
since1
evaluates to true. This is confusing.