Created
March 5, 2022 07:25
-
-
Save soasme/bb2873ea4f6cd10ed8b9a9e4427006d5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
---- MODULE VirtualMachine ---- | |
VM_Version == "1.0.0" | |
LOCAL INSTANCE Sequences | |
LOCAL INSTANCE Integers | |
LOCAL INSTANCE TLC | |
CONSTANT PC | |
CONSTANT SUBJ | |
VARIABLES pc, \* index of PC | |
subj, \* index of SUBJ | |
err, \* err code, halt immediately | |
match, \* if not match, back track | |
refs, | |
callstack, | |
capstack, | |
precstack | |
Vars == <<pc, subj, match, err, refs, callstack, capstack, precstack>> | |
CallStack == INSTANCE Stack WITH arr <- callstack | |
NatStackEntry(n) == [type |-> "nat", pc |-> n, pos |-> 0, captures |-> <<>>, cut |-> FALSE] | |
BacktrackStackEntry(p, pos, captures, precs, cut) == [type |-> "backtrack", pc |-> p, pos |-> pos, captures |-> captures, precs |-> precs, cut |-> cut] | |
CapStack == INSTANCE Stack WITH arr <- capstack | |
LeftCapEntry(p, pos) == [type |-> "L", pc |-> p, pos |-> pos] | |
RightCapEntry(p, pos) == [type |-> "R", pc |-> p, pos |-> pos] | |
PrecStack == INSTANCE Stack WITH arr <- precstack | |
Inst == PC[pc] | |
Code == Inst[1] | |
Label == Inst[2] | |
LabelEx == Inst[3] | |
ErrInvalidInstCode == 1 | |
ErrNoMatch == 2 | |
ErrNothingToCommit == 3 | |
ErrEndOfText == 4 | |
ErrUnpairPrec == 5 | |
\* TLA Plus does not support very complex string processing so we have limited support | |
\* in this spec. The real implementation can compare the ascii code of the characters. | |
CompareChar(a, b) == LET chrs == {"\t", "\r", "\n", " ", "!", "\"", "#", "$", "'", "(", ")", "*", "+", ",", "-", ".", "/", | |
"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", ":", ";", "<", "=", ">", "?", "@", | |
"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", | |
"[", "]", "^", "_", | |
"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", | |
"{", "|", "}", "~"} | |
aIdx == CHOOSE idx \in 1..Len(chrs): chrs[idx] = a | |
bIdx == CHOOSE idx \in 1..Len(chrs): chrs[idx] = b | |
IN aIdx > bIdx | |
\* Find nth char in str. N is 1-based. | |
FindNth(str, idx) == IF idx <= Len(str) THEN SubSeq(str, idx, idx) ELSE "" | |
\* A matching pair of l & r in stack means l.type is L and r.type is R and all inners are matching. | |
RECURSIVE MatchCapture(_, _, _) | |
MatchCapture(stack, l, r) == /\ stack[l].type = "L" | |
/\ stack[r].type = "R" | |
/\ \/ l + 1 = r | |
\/ MatchCapture(stack, l + 1, r - 1) | |
FindLeftCapture(stack, idx) == CHOOSE idx_l \in 1..idx: MatchCapture(stack, idx_l, idx) | |
HandleReturn == IF Len(callstack) = 0 | |
THEN UNCHANGED Vars /\ PrintT(Vars) \* [][VM_Next]_Vars stutters | |
ELSE /\ pc' = CallStack!SPeek.pc | |
/\ CallStack!SPop | |
/\ UNCHANGED <<subj, match, err, refs, capstack, precstack>> | |
HandleChar == IF FindNth(SUBJ, subj) = Label | |
THEN /\ subj' = subj + 1 | |
/\ pc' = pc + 1 | |
/\ UNCHANGED <<match, err, refs, callstack, capstack, precstack>> | |
ELSE /\ match' = FALSE | |
/\ PrintT(<<"Char not match", "pc", pc, PC[pc]>>) | |
/\ UNCHANGED <<pc, subj, err, refs, callstack, capstack, precstack>> | |
HandleJump == /\ pc' = pc + Label | |
/\ UNCHANGED <<subj, match, err, refs, callstack, capstack, precstack>> | |
HandleCall == /\ pc' = pc + Label | |
/\ CallStack!SPush(NatStackEntry(pc + 1)) | |
/\ UNCHANGED <<subj, match, err, refs, capstack, precstack>> | |
HandleChoice == /\ pc' = pc + 1 | |
/\ CallStack!SPush(BacktrackStackEntry(pc+Label, subj, capstack, precstack, FALSE)) | |
/\ UNCHANGED <<subj, match, err, refs, capstack, precstack>> | |
HandleCommit == IF CallStack!SEmpty | |
THEN /\ err' = ErrNothingToCommit | |
/\ PrintT(<<"Nothing to commit", "pc", pc, PC[pc]>>) | |
/\ UNCHANGED <<pc, subj, match, refs, callstack, capstack, precstack>> | |
ELSE /\ pc' = pc + Label | |
/\ CallStack!SPop | |
/\ match' = TRUE | |
/\ UNCHANGED <<subj, err, refs, capstack, precstack>> | |
HandleBackCommit == /\ pc' = pc + Label | |
/\ subj' = CallStack!SPeek.pos | |
/\ capstack' = CallStack!SPeek.captures | |
/\ CallStack!SPop | |
/\ UNCHANGED <<match, err, refs, precstack>> | |
HandlePrecedencePush == IF Label = 0 \/ PrecStack!SEmpty | |
THEN /\ PrecStack!SPush(Label) | |
/\ pc' = pc + 1 | |
/\ UNCHANGED <<subj, match, err, refs, callstack, capstack>> | |
ELSE LET prec == Label | |
assoc == LabelEx | |
IN IF \/ PrecStack!SPeek < prec | |
\/ assoc = "R" /\ PrecStack!SPeek = prec | |
THEN /\ PrecStack!SPush(prec) | |
/\ pc' = pc + 1 | |
/\ UNCHANGED <<subj, match, err, refs, callstack, capstack>> | |
ELSE /\ match' = FALSE | |
/\ UNCHANGED <<pc, subj, err, refs, callstack, capstack, precstack>> | |
HandlePrecedencePop == IF PrecStack!SEmpty | |
THEN /\ err' = ErrUnpairPrec | |
/\ PrintT(<<"Unpair Precedence Pop", Code>>) | |
/\ UNCHANGED <<pc, subj, match, refs, callstack, capstack, precstack>> | |
ELSE /\ PrecStack!SPop | |
/\ pc' = pc + 1 | |
/\ UNCHANGED <<subj, match, err, refs, callstack, capstack>> | |
HandleLeftCapture == /\ pc' = pc + 1 | |
/\ CapStack!SPush(LeftCapEntry(pc, subj)) | |
/\ UNCHANGED <<subj, match, err, refs, callstack, precstack>> | |
HandleRightCapture == /\ pc' = pc + 1 | |
/\ CapStack!SPush(RightCapEntry(pc, subj)) | |
/\ UNCHANGED <<subj, match, err, refs, callstack, precstack>> | |
\* find capstack top R and its pairing L, associate two indices with an ID globally. | |
HandleDefineReference == LET r == Len(capstack) | |
l == FindLeftCapture(capstack, r) | |
IN /\ pc' = pc + 1 | |
/\ refs' = [n \in DOMAIN refs \union {Label} |-> IF n = Label THEN [l |-> l, r |-> r] ELSE refs[n]] | |
/\ UNCHANGED <<subj, match, err, callstack, capstack, precstack>> | |
\* check if a reference has been defined. consume nothing. | |
HandleHasReference == IF Label \in DOMAIN refs | |
THEN /\ pc' = pc + 1 | |
/\ UNCHANGED <<subj, match, err, refs,callstack, capstack, precstack>> | |
ELSE /\ match' = FALSE | |
/\ UNCHANGED <<pc, subj, err, refs, callstack, capstack, precstack>> | |
\* compare the substring indexed by ID. | |
HandleBackReference == LET slice == refs[Label] | |
len == slice.r - slice.l | |
patt == SubSeq(SUBJ, slice.l, slice.r - 1) | |
text == SubSeq(SUBJ, subj, subj + len - 1) | |
IN IF patt = text | |
THEN /\ pc' = pc + 1 | |
/\ subj' = subj + len | |
/\ UNCHANGED <<match, err, refs, callstack, capstack, precstack>> | |
ELSE /\ match' = FALSE | |
/\ PrintT(<<"backref not match", slice, len, patt, text>>) | |
/\ UNCHANGED <<pc, subj, err, refs, callstack, capstack, precstack>> | |
HandleDropCapture == LET r == Len(capstack) | |
l == FindLeftCapture(capstack, r) | |
IN /\ pc' = pc + 1 | |
/\ capstack' = IF l = 1 THEN <<>> ELSE SubSeq(capstack, 1, l-1) | |
/\ UNCHANGED <<subj, err, match, refs, callstack, precstack>> | |
HandleCut == /\ pc' = pc + 1 | |
/\ IF CallStack!SEmpty THEN UNCHANGED callstack ELSE CallStack!SUpdateTop(BacktrackStackEntry(CallStack!SPeek.pc, CallStack!SPeek.pos, CallStack!SPeek.captures, CallStack!SPeek.precs, TRUE)) | |
/\ UNCHANGED <<subj, err, match, refs, capstack, precstack>> | |
HandleLoopSet == IF FindNth(SUBJ, subj) \in Label | |
THEN /\ subj' = subj + 1 | |
/\ UNCHANGED <<pc, err, match, refs, callstack, capstack, precstack>> | |
ELSE /\ pc' = pc + 1 | |
/\ UNCHANGED <<subj, err, match, refs, callstack, capstack, precstack>> | |
HandleSet == IF FindNth(SUBJ, subj) \in Label | |
THEN /\ pc' = pc + 1 | |
/\ subj' = subj + 1 | |
/\ UNCHANGED <<err, match, refs, callstack, capstack, precstack>> | |
ELSE /\ match' = FALSE | |
/\ UNCHANGED <<pc, subj, err, refs, callstack, capstack, precstack>> | |
HandleNoOp == /\ pc' = pc + 1 | |
/\ UNCHANGED <<subj, match, err, refs, callstack, capstack, precstack>> | |
HandleAny == IF subj + Label <= Len(SUBJ) + 1 | |
THEN /\ subj' = subj + Label | |
/\ pc' = pc + 1 | |
/\ UNCHANGED <<match, err, refs, callstack, capstack, precstack>> | |
ELSE /\ err' = ErrEndOfText | |
/\ UNCHANGED <<pc, subj, match, refs, callstack, capstack, precstack>> | |
HandleLoop == /\ pc' = pc - Label | |
/\ CallStack!SUpdateTop(BacktrackStackEntry(CallStack!SPeek.pc, subj, capstack, precstack, FALSE)) | |
/\ UNCHANGED <<match, subj, err, refs, capstack, precstack>> | |
HandleFail == /\ match' = FALSE | |
/\ UNCHANGED <<pc, subj, err, refs, callstack, capstack, precstack>> | |
HandleInvalidInstCode == /\ err' = ErrInvalidInstCode | |
/\ PrintT(<<"Invalid Inst Code", Code>>) | |
/\ UNCHANGED <<pc, subj, match, refs, callstack, capstack, precstack>> | |
HandleEndOfText == /\ err' = ErrEndOfText | |
/\ PrintT(<<"End of text", Code>>) | |
/\ UNCHANGED <<pc, subj, match, refs, callstack, capstack, precstack>> | |
HandleExit == IF CallStack!SEmpty | |
THEN /\ err' = ErrNoMatch | |
/\ IF Len(callstack) = 0 THEN PrintT(<<"Can't back track", "pc", pc, PC[pc]>>) | |
ELSE PrintT(<<"No match", "pc", CallStack!SPeek.pc-1, PC[CallStack!SPeek.pc-1]>>) | |
/\ UNCHANGED <<pc, subj, match, refs, callstack, capstack, precstack>> | |
ELSE IF CallStack!SPeek.cut | |
THEN /\ err' = ErrNoMatch | |
/\ PrintT(<<"Cut", "pc", CallStack!SPeek.pc-1, PC[CallStack!SPeek.pc-1]>>) | |
/\ UNCHANGED <<pc, subj, match, refs, callstack, capstack, precstack>> | |
ELSE \/ /\ CallStack!SPeek.type = "nat" | |
/\ CallStack!SPop | |
/\ UNCHANGED <<pc, subj, err, refs, capstack, precstack>> | |
\/ /\ CallStack!SPeek.type = "backtrack" | |
/\ pc' = CallStack!SPeek.pc | |
/\ subj' = CallStack!SPeek.pos | |
/\ capstack' = CallStack!SPeek.captures | |
/\ precstack' = CallStack!SPeek.precs | |
/\ CallStack!SPop | |
/\ match' = TRUE | |
/\ UNCHANGED <<err, refs>> | |
VM_Invariant == err = 0 | |
VM_Init == /\ pc = 1 | |
/\ subj = 1 | |
/\ err = 0 | |
/\ match = TRUE | |
/\ refs = [n \in {} |-> TRUE] | |
/\ callstack = <<>> | |
/\ capstack = <<>> | |
/\ precstack = <<>> | |
VM_Next == IF match = FALSE THEN HandleExit | |
ELSE IF Code = "RET" THEN HandleReturn | |
ELSE IF Code = "CHAR" THEN HandleChar | |
ELSE IF Code = "JMP" THEN HandleJump | |
ELSE IF Code = "CALL" THEN HandleCall | |
ELSE IF Code = "FAIL" THEN HandleFail | |
ELSE IF Code = "CHOICE" THEN HandleChoice | |
ELSE IF Code = "CMT" THEN HandleCommit | |
ELSE IF Code = "NOP" THEN HandleNoOp | |
ELSE IF Code = "ANY" THEN HandleAny | |
ELSE IF Code = "LOOP" THEN HandleLoop | |
ELSE IF Code = "BCMT" THEN HandleBackCommit | |
ELSE IF Code = "PPSH" THEN HandlePrecedencePush | |
ELSE IF Code = "PPOP" THEN HandlePrecedencePop | |
ELSE IF Code = "LCAP" THEN HandleLeftCapture | |
ELSE IF Code = "RCAP" THEN HandleRightCapture | |
ELSE IF Code = "DREF" THEN HandleDefineReference | |
ELSE IF Code = "BREF" THEN HandleBackReference | |
ELSE IF Code = "HREF" THEN HandleHasReference | |
ELSE IF Code = "DCAP" THEN HandleDropCapture | |
ELSE IF Code = "CUT" THEN HandleCut | |
ELSE IF Code = "LSET" THEN HandleLoopSet | |
ELSE IF Code = "SET" THEN HandleSet | |
ELSE HandleInvalidInstCode | |
VM_Spec == VM_Init /\ [][VM_Next]_Vars | |
\* rule = "a" | |
VM_TestCase0_PC == << | |
<<"CHAR", "a">>, | |
<<"RET">> | |
>> | |
VM_TestCase0_SUBJ == "a" | |
\* rule = "a" | |
VM_TestCase1_PC == << | |
<<"JMP", 1>>, | |
<<"CHAR", "a">>, | |
<<"RET">> | |
>> | |
VM_TestCase1_SUBJ == "a" | |
\* rule = a | |
\* a = "a" | |
VM_TestCase2_PC == << | |
<<"CALL", 2>>, | |
<<"RET">>, | |
<<"CHAR", "a">>, | |
<<"RET">> | |
>> | |
VM_TestCase2_SUBJ == "a" | |
\* rule = "a" / "A" | |
VM_TestCase3_PC == << | |
<<"CHOICE", 3>>, | |
<<"CHAR", "a">>, | |
<<"CMT", 2>>, | |
<<"CHAR", "A">>, | |
<<"RET">> | |
>> | |
VM_TestCase3_SUBJ == "A" | |
\* rule = "a"? "b" | |
VM_TestCase4_PC == << | |
<<"CHOICE", 3>>, | |
<<"CHAR", "a">>, | |
<<"CMT", 2>>, | |
<<"NOP">>, | |
<<"CHAR", "b">>, | |
<<"RET">> | |
>> | |
VM_TestCase4_SUBJ == "b" | |
VM_TestCase4_1_SUBJ == "ab" | |
\* rule = . | |
VM_TestCase5_PC == << | |
<<"ANY", 1>>, | |
<<"RET">> | |
>> | |
VM_TestCase5_SUBJ == "a" | |
\* rule = "a"* | |
VM_TestCase6_PC == << | |
<<"CHOICE", 3>>, | |
<<"CHAR", "a">>, | |
<<"LOOP", 1>>, | |
<<"RET">> | |
>> | |
VM_TestCase6_SUBJ == "aaaaaa" | |
\* rule = &"a" . "b" | |
VM_TestCase7_PC == << | |
<<"CHOICE", 3>>, | |
<<"CHAR", "a">>, | |
<<"BCMT", 2>>, | |
<<"FAIL">>, | |
<<"ANY">>, | |
<<"CHAR", "b">>, | |
<<"RET">> | |
>> | |
VM_TestCase7_SUBJ == "ab" | |
VM_TestCase7_1_SUBJ == "bb" | |
\* rule = "+" "1" ^ 1 / "*" "1" ^ 2 / "^" "1" ^^ 3 | |
VM_TestCase8_PC == << | |
<<"CHAR", "1">>, | |
<<"CHOICE", 6>>, | |
<<"PPSH", 1, "L">>, | |
<<"CHAR", "+">>, | |
<<"CHAR", "1">>, | |
<<"PPOP">>, | |
<<"CMT", 14>>, | |
<<"CHOICE", 6>>, | |
<<"PPSH", 1, "L">>, | |
<<"CHAR", "*">>, | |
<<"CHAR", "1">>, | |
<<"PPOP">>, | |
<<"CMT", 8>>, | |
<<"CHOICE", 6>>, | |
<<"PPSH", 1, "R">>, | |
<<"CHAR", "^">>, | |
<<"CHAR", "1">>, | |
<<"PPOP">>, | |
<<"CMT", 2>>, | |
<<"FAIL">>, | |
<<"RET">> | |
>> | |
VM_TestCase8_SUBJ == "1+1" | |
VM_TestCase8_1_SUBJ == "1*1" | |
VM_TestCase8_2_SUBJ == "1^1" | |
\* rule = {"a"} | |
VM_TestCase9_PC == << | |
<<"LCAP">>, | |
<<"CHAR", "a">>, | |
<<"RCAP">>, | |
<<"RET">> | |
>> | |
VM_TestCase9_SUBJ == "a" | |
\* rule = <REF_1 {"a"}> <REF_1> {<REF_1>} | |
VM_TestCase10_PC == << | |
<<"LCAP">>, | |
<<"CHAR", "a">>, | |
<<"RCAP">>, | |
<<"DREF", 1>>, | |
<<"BREF", 1>>, | |
<<"LCAP">>, | |
<<"BREF", 1>>, | |
<<"RCAP">>, | |
<<"RET">> | |
>> | |
VM_TestCase10_SUBJ == "aaa" | |
VM_TestCase11_PC == << | |
<<"CHAR", "a">>, | |
<<"LCAP">>, | |
<<"CHAR", "b">>, | |
<<"RCAP">>, | |
<<"DCAP">>, \* https://docs.racket-lang.org/peg/index.html | pop the last capture | |
<<"RET">> | |
>> | |
VM_TestCase11_SUBJ == "ab" | |
\* rule = "a" ~ "b" / "a" "c" | |
VM_TestCase12_PC == << | |
<<"CHOICE", 5>>, | |
<<"CHAR", "a">>, | |
<<"CUT">>, | |
<<"CHAR", "b">>, | |
<<"CMT", 3>>, | |
<<"CHAR", "a">>, | |
<<"CHAR", "c">>, | |
<<"RET">> | |
>> | |
VM_TestCase12_SUBJ == "ac" | |
VM_TestCase13_PC == << | |
<<"LSET", {"0", "1"}>>, | |
<<"RET">> | |
>> | |
VM_TestCase13_SUBJ == "01010101" | |
VM_TestCase14_PC == << | |
<<"SET", {"0", "1"}>>, | |
<<"RET">> | |
>> | |
VM_TestCase14_SUBJ == "1" | |
\* rule = <REF_1 {"a"}> HREF_1 | |
VM_TestCase15_PC == << | |
<<"LCAP">>, | |
<<"CHAR", "a">>, | |
<<"RCAP">>, | |
<<"DREF", 1>>, | |
<<"HREF", 1>>, | |
<<"RET">> | |
>> | |
VM_TestCase15_SUBJ == "a" | |
================================ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment