Skip to content

Instantly share code, notes, and snippets.

@vkobel
Last active August 29, 2015 14:01
Show Gist options
  • Save vkobel/7a6b9e1edae6e5c4592f to your computer and use it in GitHub Desktop.
Save vkobel/7a6b9e1edae6e5c4592f to your computer and use it in GitHub Desktop.
Simple lexer for a minimalist asm-like language
module Lexer
open System
open System.IO
open System.Text.RegularExpressions
type Register =
| A // 8 bits
| B // 8 bits
| D // 16 bits (A + B)
| X // 16 bits
| Y // 16 bits
type Operand =
| Val of string // Standard value
| Hex of string // Hexadecimal value
| Reg of Register // Register pointer
| Ptr of string // Label pointer
type Operator =
| LDA // LOAD reg A with Operand
| LDX // LOAD reg X with Operand
| STA // STORE val of reg A in dest by Operand
| BGW // BEGINWITH pointer
| END // END of program (if not in code -> EOF)
type Operation =
| Operand of Operand
| Operator of Operator
| Comment of string
| Label of string
| Unknown of string
let rec getNextToken (input: string) (index: int) : (Operation * int) =
(* Get the token as a string * int tuple, int is the current index *)
let rec getStrToken (data: string) (i: int) (token: string) =
match i with
| i when i >= data.Length -> ("END", i) // emit END token if EOF reached
| _ -> match data.[i] with
| '\r' -> (token, i + 2) // if \r\n return token and eat 2 chars
| '\n' -> (token, i + 1) // if \n only return token and eat 1 char
| ' ' when token.Trim() <> "" -> (token, i) // if it's a space after an operator, emit the operator BUT don't eat the space (else it's a Label)
| ';' -> // Handle comments -> eat from ; to the end of line
let endIndex = data.IndexOf('\n', i)
(data.Substring(i, endIndex - i).Trim(), endIndex + 1)
| c -> getStrToken data (i + 1) (token + c.ToString()) // continue if any other char
let token, i = getStrToken input index ""
(* Convert string tokens in actual types tokens *)
match Regex.Replace(token, @"\s+", " ") with // replace multiple spaces/tab by one
| t when String.IsNullOrWhiteSpace t -> getNextToken input i // ignore whitespace only
| " LDA" -> (Operator LDA, i)
| " LDX" -> (Operator LDX, i)
| " STA" -> (Operator STA, i)
| " BGW" -> (Operator BGW, i)
| "END" -> (Operator END, i)
| t when t.[0] = ';' -> (Comment(t.TrimStart ';'), i)
| t when not (Char.IsWhiteSpace t.[0]) && t.EndsWith(":") -> (Label(t.TrimEnd ':'), i) // Label
| t when t.Trim().StartsWith("#$") -> (Operand(Hex(t.Substring(3))), i) // Hex
| t when t.Trim().StartsWith("#") -> (Operand(Val(t.Substring(2))), i) // Val
| t when t.Trim().StartsWith(":") -> (Operand(Ptr(t.Substring(2))), i) // Ptr
| t -> match t.Trim() with // Reg
| "A" -> (Operand(Reg A), i)
| "B" -> (Operand(Reg B), i)
| "D" -> (Operand(Reg D), i)
| "X" -> (Operand(Reg X), i)
| "Y" -> (Operand(Reg Y), i)
| o -> (Unknown o, i) // all other -> Unknown
module main
open System.IO
open Lexer
let rec lexcall lexer index =
match lexer index with
| (Operator END, _) -> ()
| op, idx ->
printfn "%A" op
lexcall lexer idx
[<EntryPoint>]
let main argv =
let input = File.OpenText(@"C:\B32\Test.asm").ReadToEnd()
let lexer = Lexer.getNextToken input
lexcall lexer 0
System.Console.ReadLine() |> ignore
0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment