Last active
August 14, 2019 04:59
-
-
Save rui314/4b897109884f58c57b8fa5ebf9d03146 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Copyright (C) 2019 Rui Ueyama | |
// Licensed under the MIT license | |
// | |
// This command parses a C declaration. Here are a few examples: | |
// | |
// $ ./cdecl 'int x' | |
// x: int | |
// | |
// $ ./cdecl 'int **const *x' | |
// x: pointer to const pointer to pointer to int | |
// | |
// $ ./cdecl 'int (*x)()' | |
// x: pointer to function returning int | |
// | |
// $ ./cdecl 'int long long signed signed typedef const x' | |
// x: typedef const longlong | |
// | |
// $ ./cdecl 'int ((*const x)[])()' | |
// x: const pointer to array of function returning int | |
// | |
// | |
// To build this file, just run "gcc -o cdecl cdecl.c". | |
#define _GNU_SOURCE 1 | |
#include <stdarg.h> | |
#include <stdbool.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <stdnoreturn.h> | |
#include <string.h> | |
noreturn void error(char *fmt, ...) { | |
va_list ap; | |
va_start(ap, fmt); | |
vfprintf(stderr, fmt, ap); | |
fprintf(stderr, "\n"); | |
exit(1); | |
} | |
// We tokenize an input string into a vector of tokens. | |
static char *tokens[100]; | |
// The current position in `tokens`. | |
static int pos = 0; | |
// Skip a token if the current token is the same as a given one. | |
static bool consume(char *tok) { | |
if (!tokens[pos] || strcmp(tokens[pos], tok) != 0) | |
return false; | |
pos++; | |
return true; | |
} | |
// Ensure that the current token is a given one. | |
static void expect(char *tok) { | |
if (!tokens[pos] || strcmp(tokens[pos], tok) != 0) | |
error("%s expected\n", tok); | |
pos++; | |
} | |
enum BaseType { | |
TY_VOID = 1, | |
TY_BOOL, | |
TY_CHAR, | |
TY_SHORT, | |
TY_INT, | |
TY_LONG, | |
TY_LLONG, | |
TY_FLOAT, | |
TY_DOUBLE, | |
TY_LDOUBLE, | |
TY_ARRAY, | |
TY_PTR, | |
TY_FUNCTION, | |
}; | |
// Represents a C type. | |
typedef struct Type { | |
enum BaseType ty; | |
bool is_signed; | |
struct Type *ptr_of; | |
struct Type *array_of; | |
struct Type *return_type; | |
bool is_typedef; | |
bool is_extern; | |
bool is_static; | |
bool is_const; | |
} Type; | |
static Type *read_struct() { return NULL; } | |
static Type *read_union() { return NULL; } | |
static Type *read_enum() { return NULL; } | |
// Read a valid type specifier, e.g. | |
// | |
// int | |
// typedef const int | |
// long const int long typedef | |
static Type *read_type_specifier() { | |
Type *ty = calloc(1, sizeof(Type)); | |
enum { | |
SIGNED = 1, | |
UNSIGNED = 1 << 2, | |
VOID = 1 << 4, | |
BOOL = 1 << 6, | |
CHAR = 1 << 8, | |
SHORT = 1 << 10, | |
INT = 1 << 12, | |
LONG = 1 << 14, | |
FLOAT = 1 << 16, | |
DOUBLE = 1 << 18, | |
}; | |
int base_type = 0; | |
Type *user_type = NULL; | |
bool is_typedef = false; | |
bool is_extern = false; | |
bool is_static = false; | |
bool is_const = false; | |
for (;;) { | |
// Read one token at a time. | |
if (consume("typedef")) | |
is_typedef = true; | |
else if (consume("extern")) | |
is_extern = true; | |
else if (consume("static")) | |
is_static = true; | |
else if (consume("const")) | |
is_const = true; | |
else if (consume("signed")) | |
base_type |= SIGNED; | |
else if (consume("unsigned")) | |
base_type |= UNSIGNED; | |
else if (consume("void")) | |
base_type += VOID; | |
else if (consume("bool")) | |
base_type += BOOL; | |
else if (consume("char")) | |
base_type += CHAR; | |
else if (consume("short")) | |
base_type += SHORT; | |
else if (consume("int")) | |
base_type += INT; | |
else if (consume("long")) | |
base_type += LONG; | |
else if (consume("float")) | |
base_type += FLOAT; | |
else if (consume("double")) | |
base_type += DOUBLE; | |
else if (consume("struct")) | |
user_type = read_struct(); | |
else if (consume("union")) | |
user_type = read_union(); | |
else if (consume("enum")) | |
user_type = read_enum(); | |
else | |
break; | |
if (is_extern && is_static) | |
error("extern and static may not be used together"); | |
// Something like `int struct foo x` is an error. | |
if (base_type && user_type) | |
error("invalid type"); | |
// The valid combinations of type specifiers are irregular. For | |
// example, `long int` and `short int` are allowed and interpreted | |
// as `long` and `short`, but `char int` is not `char` but just | |
// invalid. We simply enumerate all valid combinations in this | |
// switch. | |
switch (base_type) { | |
case VOID: | |
*ty = (Type){TY_VOID}; | |
break; | |
case BOOL: | |
*ty = (Type){TY_BOOL}; | |
break; | |
case SIGNED + CHAR: | |
*ty = (Type){TY_CHAR, true}; | |
break; | |
case CHAR: | |
case UNSIGNED + CHAR: | |
*ty = (Type){TY_CHAR}; | |
break; | |
case SHORT: | |
case SHORT + INT: | |
case SIGNED + SHORT: | |
case SIGNED + SHORT + INT: | |
*ty = (Type){TY_SHORT, true}; | |
break; | |
case UNSIGNED + SHORT: | |
case UNSIGNED + SHORT + INT: | |
*ty = (Type){TY_SHORT}; | |
break; | |
case INT: | |
case SIGNED: | |
case SIGNED + INT: | |
*ty = (Type){TY_INT, true}; | |
break; | |
case UNSIGNED: | |
case UNSIGNED + INT: | |
*ty = (Type){TY_INT}; | |
break; | |
case LONG: | |
case LONG + INT: | |
case SIGNED + LONG: | |
case SIGNED + LONG + INT: | |
*ty = (Type){TY_LONG, true}; | |
break; | |
case UNSIGNED + LONG: | |
case UNSIGNED + LONG + INT: | |
*ty = (Type){TY_LONG}; | |
break; | |
case LONG + LONG: | |
case LONG + LONG + INT: | |
case SIGNED + LONG + LONG: | |
case SIGNED + LONG + LONG + INT: | |
*ty = (Type){TY_LLONG, true}; | |
break; | |
case UNSIGNED + LONG + LONG: | |
case UNSIGNED + LONG + LONG + INT: | |
*ty = (Type){TY_LLONG}; | |
break; | |
case FLOAT: | |
*ty = (Type){TY_FLOAT}; | |
break; | |
case DOUBLE: | |
*ty = (Type){TY_DOUBLE}; | |
break; | |
case LONG + DOUBLE: | |
*ty = (Type){TY_LDOUBLE}; | |
break; | |
case 0: | |
// If there's no type specifier, it becomes `int`. | |
// For example, x in `const x` is `int`. | |
*ty = user_type ? *user_type : (Type){TY_INT}; | |
break; | |
default: | |
error("invalid type"); | |
} | |
} | |
ty->is_typedef = is_typedef; | |
ty->is_extern = is_extern; | |
ty->is_static = is_static; | |
ty->is_const = is_const; | |
return ty; | |
} | |
static Type *read_declarator(Type *ty, char **name); | |
// Read [] for an array size or () for function parameters. | |
static Type *read_direct_declarator_tail(Type *ty) { | |
for (;;) { | |
if (consume("(")) { | |
expect(")"); | |
Type *new_ty = calloc(1, sizeof(Type)); | |
new_ty->ty = TY_FUNCTION; | |
new_ty->return_type = ty; | |
ty = new_ty; | |
continue; | |
} | |
if (consume("[")) { | |
expect("]"); | |
Type *new_ty = calloc(1, sizeof(Type)); | |
new_ty->ty = TY_ARRAY; | |
new_ty->array_of = ty; | |
ty = new_ty; | |
continue; | |
} | |
return ty; | |
} | |
} | |
// Returns true if `s` seems like a part of a type name. | |
static bool is_reserved(char *s) { | |
return strcmp("typedef", s) == 0 || strcmp("extern", s) == 0 || | |
strcmp("static", s) == 0 || strcmp("thread_local", s) == 0 || | |
strcmp("const", s) == 0 || strcmp("noreturn", s) == 0 || | |
strcmp("signed", s) == 0 || strcmp("unsigned", s) == 0 || | |
strcmp("void", s) == 0 || strcmp("bool", s) == 0 || | |
strcmp("char", s) == 0 || strcmp("short", s) == 0 || | |
strcmp("int", s) == 0 || strcmp("long", s) == 0 || | |
strcmp("float", s) == 0 || strcmp("double", s) == 0 || | |
strcmp("struct", s) == 0 || strcmp("union", s) == 0 || | |
strcmp("enum", s) == 0; | |
} | |
// Read an identifier (if exists) and following [] or (), e.g. | |
// | |
// x[] (x is an array of ...) | |
// (*x)() (x is a pointer to a function returning ...) | |
static Type *read_direct_declarator(Type *ty, char **name) { | |
if (consume("(")) { | |
Type *placeholder = calloc(1, sizeof(Type)); | |
Type *new_ty = read_declarator(placeholder, name); | |
expect(")"); | |
*placeholder = *read_direct_declarator_tail(ty); | |
return new_ty; | |
} | |
if (!is_reserved(tokens[pos])) | |
*name = tokens[pos++]; | |
// Read following optional [] or (). | |
return read_direct_declarator_tail(ty); | |
} | |
// Read optional '*' for pointers, e.g. | |
// | |
// * | |
// * const * | |
// *** | |
static Type *read_declarator(Type *ty, char **name) { | |
while (consume("*")) { | |
Type *new_ty = calloc(1, sizeof(Type)); | |
new_ty->ty = TY_PTR; | |
new_ty->ptr_of = ty; | |
while (consume("const")) | |
new_ty->is_const = true; | |
ty = new_ty; | |
} | |
return read_direct_declarator(ty, name); | |
} | |
// Print out a given type to stdou. | |
void print_type(Type *ty) { | |
if (ty->is_typedef) | |
printf(" typedef"); | |
if (ty->is_extern) | |
printf(" extern"); | |
if (ty->is_static) | |
printf(" static"); | |
if (ty->is_const) | |
printf(" const"); | |
if (ty->ty == TY_VOID) | |
printf(" void"); | |
else if (ty->ty == TY_BOOL) | |
printf(" bool"); | |
else if (ty->ty == TY_CHAR) | |
printf(ty->is_signed ? " char" : " uchar"); | |
else if (ty->ty == TY_SHORT) | |
printf(ty->is_signed ? " short" : " ushort"); | |
else if (ty->ty == TY_INT) | |
printf(ty->is_signed ? " int" : " uint"); | |
else if (ty->ty == TY_LONG) | |
printf(ty->is_signed ? " long" : " ulong"); | |
else if (ty->ty == TY_LLONG) | |
printf(ty->is_signed ? " longlong" : " ulonglong"); | |
else if (ty->ty == TY_FLOAT) | |
printf(" float"); | |
else if (ty->ty == TY_DOUBLE) | |
printf(" double"); | |
else if (ty->ty == TY_ARRAY) | |
printf(" array of"); | |
else if (ty->ty == TY_PTR) | |
printf(" pointer to"); | |
else if (ty->ty == TY_FUNCTION) | |
printf(" function returning"); | |
if (ty->ptr_of) | |
print_type(ty->ptr_of); | |
if (ty->array_of) | |
print_type(ty->array_of); | |
if (ty->return_type) | |
print_type(ty->return_type); | |
} | |
static bool is_alnum(char c) { | |
return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || | |
('0' <= c && c <= '9') || (c == '_'); | |
} | |
int main(int argc, char **argv) { | |
if (argc != 2) | |
error("Usage: %s declaration\nExample: %s 'int const (*x)[][]", argv[0]); | |
// Tokenize argv[1]. | |
int i = 0; | |
for (char *p = argv[1]; *p;) { | |
if (*p == ' ') { | |
p++; | |
continue; | |
} | |
if (is_alnum(*p)) { | |
char *q = p + 1; | |
while (is_alnum(*q)) | |
q++; | |
tokens[i++] = strndup(p, q - p); | |
p = q; | |
continue; | |
} | |
tokens[i++] = strndup(p, 1); | |
p++; | |
} | |
// Parse the input. | |
Type *ty = read_type_specifier(); | |
char *name = NULL; | |
ty = read_declarator(ty, &name); | |
// Print it out. | |
printf("%s:", name); | |
print_type(ty); | |
printf("\n"); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment