Created
September 24, 2023 17:06
-
-
Save alexd2580/c53f5243a2b03ea9d7c62fa603f95200 to your computer and use it in GitHub Desktop.
Polyglot in 18 Languages
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define X[<?php ob_end_clean();echo"PHP\n";__halt_compiler();#print(("ALGOL68",newline))COMMENT] | |
#if 0/*++++++++++[->++++++++++<]>[->+>+>+>+<<<<]+++++[->>+>++>+++<<<<]<++++++++[->++++++++<]*/ | |
### >++.>>>>-.<<<---.>.>.<---.>>+++.<<<++.>>---.>>++++++++++.>[ \ | |
q="""=;BEGIN{print'Perl';exit;}"+%^ set a 1;test "0$a" -eq 1 &&echo "Fish" &&exit;let a=4;test "$a[1]" = "4" &&echo "Zsh" ||echo "Bash";exit | |
#""";__import__("sys").stdout.write("Python2\n" if 1/2==0 else "Python3\n");"""^;puts"Ruby";exit;%^ | |
string cat : /*x | |
set b : | |
puts tcl; exit; | |
define b | |
console.log "Literate Coffeescript" | |
### | |
#*/ | |
console.log "Coffeescript" | |
`a=\` | |
#else | |
char c(){};/*;'*/typedef int program;typedef int end;const char* print(char*){}int x;main(){struct x{};__builtin_puts(sizeof(x)==4?"C":"C++");}char const*f(){ | |
program f; | |
c();return | |
print("(A)"),("Fortran"); | |
end program; | |
c();}/* | |
> main=putStrLn"Haskell" | |
endef #*/ | |
/*x: ; @echo "Makefile"; exit; #*/ | |
#endif"""#^#]\``#'#COMMENT |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
file=$1 | |
function try { | |
language=$1 | |
extension=$2 | |
compile=$3 | |
run=$4 | |
cleanup=$5 | |
echo -e "\033[1;33m$language\033[0m" | |
if [[ -n "$extension" ]] && ! cp_res="$(cp "$file" "$file.$extension" 2>&1)"; then | |
echo -e " \033[0;31mcp \"$file\" \"$file.$extension\"\033[0m" | |
echo -e " $cp_res" | |
fi | |
if [[ -n "$compile" ]] && ! compile_res="$(eval " $compile" 2>&1)"; then | |
echo -e " \033[0;31m$compile:\033[0m" | |
echo -e " $compile_res" | |
else | |
if run_res="$(eval " $run" 2>&1)"; then | |
if [[ "$run_res" == "$language" ]]; then | |
echo -e " \033[0;32m\033[0m" | |
else | |
echo -e " \033[0;31m$run\033[0m" | |
echo -e " Expected: $language" | |
echo -e " Got : $run_res" | |
fi | |
else | |
echo -e " \033[0;31m$run\033[0m" | |
echo -e " $run_res" | |
fi | |
fi | |
if [[ -n "$cleanup" ]] && ! cleanup_res="$(eval " $cleanup" 2>&1)"; then | |
echo -e " \033[0;31m$cleanup\033[0m" | |
echo -e " $cleanup_res" | |
fi | |
if [[ -n "$extension" ]] && ! rm_res="$(rm "$file.$extension" 2>&1)"; then | |
echo -e " \033[0;31mrm \"$file.$extension\"\033[0m" | |
echo -e " $rm_res" | |
fi | |
} | |
try "C" "c" "gcc $file.c -o c" "./c" "rm c" | |
try "C++" "cpp" "g++ $file.cpp -o cpp" "./cpp" "rm cpp" | |
# gcc-fortran | |
try "Fortran" "f" "gfortran -cpp $file.f -o f" "./f" "rm f" | |
# algol68g | |
try "ALGOL68" "" "" "a68g $file" "rm .Random.seed" | |
# coffeescript | |
try "Literate Coffeescript" "litcoffee" "" "coffee $file.litcoffee" "" | |
try "Coffeescript" "coffee" "" "coffee $file.coffee" "" | |
try "PHP" "" "" "php --define output_buffering=On $file" "" | |
try "Haskell" "lhs" "ghc -no-keep-hi-files -no-keep-o-files -cpp -optP -Wno-endif-labels -optL -q $file.lhs -o hs" "./hs" "rm hs" | |
try "Bash" "" "" "bash $file" "" | |
try "Zsh" "" "" "zsh $file" "" | |
try "Fish" "" "" "fish $file" "" | |
# tcsh | |
# try "csh" "" "" "csh $file" "" | |
# try "ksh" "" "" "ksh $file" "" | |
try "Perl" "" "" "perl $file" "" | |
# python2-bin | |
try "Python2" "" "" "python2 $file" "" | |
try "Python3" "" "" "python3 $file" "" | |
try "Ruby" "" "" "ruby $file" "" | |
# Brainfuck | |
try "Brainfuck" "" "bfc $file" "./a.out" "rm a.out" | |
try "tcl" "" "" "tclsh $file" "" | |
try "Makefile" "" "" "make -f $file" "" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
About
This is version 2 of my venture into polyglot programming, v1 can be found here.
In this polyglot i count C/C++ as two different languages. It gets more interesting with python 2 vs python 3, which are labelled two different languages. Coffeescript and literate coffeescript use the same tool
coffee
but are parsed by greatly differing grammars.Finally - bash, zsh and fish are treated as different languages here because they can be distinguished from each other using cleverly arranged instructions, which are not intended to be used to detect what shell this is, e.g.
$shell
, or the_cplusplus
preprocessor macro, or the 0-th argument, which contains the executable name.How to run
Requirements
Tricks used
Algol68
This language has lots of different comment styles. Block comments can be wrapped in
#
or literallyCOMMENT
. This is great since the first line starts with a # mark anyway.Python
Treats
#
as comment line and"""
as string expression."""
. Except for the first code line after the header, all foreign code is "string-ed" out, and the preprocessor instructions are comments.Python2 vs 2 can be detected with
1/2
: python2 does integer division, python3 uses floating numbers. To remove the import line we use the__import__
function.Fortran (77)
Can use the C preprocessor to enter the same
#ifdef
scope. In fixed mode (77 mode) fortran treats lines starting with a c, C or * as comments. Otherwise the first 6 columns are reserved for labels (numeric?). The/**/
comments are removed by the preprocessor and all that is left is C/C++ code that starts with c-s that fortran ignores/C/C++
This time without boring macros and trigraphs. The preprocssor is only used to separate all other languages from C,C++, Fortran and Haskell. We hide foreign code in
/**/
. To make the fortran code, which cannot be "hidden" into valid C code, we use typedefs, the comma operator, redundant parentheses, semicolons and the return statement, creating a pointless, but valid function. The difference in C vs C++ is detected bysizeof(x)
where x is either a global 4-byte int, or a local empty 1-byte struct definition, which does not shadow the global int in C, because you would have to writestruct x
instead of justx
to address the struct.Bash
Bash doesn't require the entire file to be valid bash if we can early exit using
exit
.#
are comments. Most languages come after bash, soexit
skips them. However, bash and fish have a lot of differences. To make bash work with "fishy" restrictions, the perl q-string at the beginning of l.4 is treated as a local env variable. To detect bash vs fish, we use fish'esset
variable command which does something different in bash.Zsh
Zsh and bash are similar in many things but array access. What looks like an array access to zsh looks like regular string interpolation with some randomly placed latters to bash.
Fish
Fish is interesting in that it requires the entire file to be parseable (but not necessarily interpretable). This imposes some restrictions that surface in the C-block. Luckily these can be circumvented using a multiline string
'
literal that is invis to C.Brainfuck
BF ignores most letters and symbols. The only relevant ones are
[ ] + - < > , .
. We put the BF code as early as possible to prevent pollution of the BF tape with unintended use of these symbols. We leave the BF code in a state where the register index points to an empty (0) tape cell. Then, by putting the rest of the code between[
and]
we can do a jump to the end of the code. In between, all square brackets must come in pairs, the rest is irrelevant, because we don't execute it.Literate Haskell
LHS ignores everything that is not marked with
>
at the start of the line. It also uses a variant of the C preprocessor, which for some reason does NOT remove/**/
comments?! Anyway, with some additional options for disabling CPP errors and allowing joined doc/code blocks we end at the above.Coffeescript
Same here, literate CS only sees blocks that are indented. The rest is implicit comments.
However normal coffeescript uses triple # (
###
) as a multiline comment. It also allows inserting literal JS into the evaluated code using backticks. The scripting languages are "commented out" using triple-hash, the C block is commented out using a multiline JS string assignment inside a literal JS block.Perl
q-strings -
q=
starts a string that ends at the next=
sign. Luckily this looks exactly like an assignment in bash/ruby/python. We useBEGIN
to exit as soon as possible to prevent parsing the rest of the file, removing the need for a second q-string.Ruby
%-strings - same as in perl, just with
%
as the string marker.q=%^;...
looks like an assignment of "%^" toq
in bash, but to ruby it looks like an assignment of a long%
-string to q. Strings can be added with+
. This string wraps most foreign code.#
are comments, thus skipping the preprocessor header.PHP
Doesn't care about anything other than the PHP markers. To remove unwanted output we put the PHP at the start of the code, and enable output buffering via command line, clearing it asap.
tcl
tcl interprets
\
at the end of lines as a line continuation, moving l.4 into the comment above it. tcl doesn't care for indentation, and strings can be written without quotes. Coincidentally tcl has astring cat
function that may be used as a Make multitarget.Makefile
Uses # for comments, executes the first target it sees, does not allow foreign syntax in the file. Doesn't interpret /* as a comment. The first target is
string cat : /*x
which defines a multi-target rule, dependent on/*x
. Then comes another rule with deliberately chosen spacing, making a tcl instruction look like a make target definition. The code inside the second rule is tcl, which looks like valid bash, i.e. valid make. Then we use adefine
that ends at the very end of the file, just before a final rule, namely/*x
, which prints "Makefile" while being invis to C due to being wrapped in a block comment.