Compare commits
5 commits
d2597d2de7
...
2df4d182f9
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2df4d182f9 | ||
|
|
099d774634 | ||
|
|
838c96f04f | ||
|
|
886525cf7e | ||
|
|
c609fe4ec6 |
178
lang/src/ast.asm
178
lang/src/ast.asm
|
|
@ -10,6 +10,8 @@ section .rdata
|
||||||
AST_NUMBER equ 4
|
AST_NUMBER equ 4
|
||||||
AST_BINARY_OP equ 5
|
AST_BINARY_OP equ 5
|
||||||
AST_RETURN_STATEMENT equ 6
|
AST_RETURN_STATEMENT equ 6
|
||||||
|
AST_VALUE_TO_PLACE equ 7
|
||||||
|
AST_PLACE_TO_VALUE equ 8
|
||||||
|
|
||||||
TYPE_VOID equ 1
|
TYPE_VOID equ 1
|
||||||
TYPE_BOOL equ 2
|
TYPE_BOOL equ 2
|
||||||
|
|
@ -18,6 +20,12 @@ section .rdata
|
||||||
TYPE_STR equ 5
|
TYPE_STR equ 5
|
||||||
;; end-consts
|
;; end-consts
|
||||||
|
|
||||||
|
PRECEDENCE_ADD equ 90
|
||||||
|
PRECEDENCE_SUB equ 90
|
||||||
|
PRECEDENCE_MUL equ 100
|
||||||
|
PRECEDENCE_DIV equ 100
|
||||||
|
PRECEDENCE_REM equ 100
|
||||||
|
|
||||||
section .text
|
section .text
|
||||||
extern vec_init_with
|
extern vec_init_with
|
||||||
extern vec_push
|
extern vec_push
|
||||||
|
|
@ -58,6 +66,7 @@ global parse_block
|
||||||
;; struct AstNode {
|
;; struct AstNode {
|
||||||
;; kind: u8,
|
;; kind: u8,
|
||||||
;; data: *const (),
|
;; data: *const (),
|
||||||
|
;; extra: usize,
|
||||||
;; }
|
;; }
|
||||||
;;
|
;;
|
||||||
;; struct Argument {
|
;; struct Argument {
|
||||||
|
|
@ -132,12 +141,13 @@ parse_func:
|
||||||
mov rdx, 48
|
mov rdx, 48
|
||||||
call memcpy
|
call memcpy
|
||||||
mov byte [rsp], AST_FUNCTION ; kind
|
mov byte [rsp], AST_FUNCTION ; kind
|
||||||
mov [rsp + 8], rdi ; data
|
mov [rsp + 8], rdi ; data
|
||||||
mov rdi, [rsp + 48] ; Ast
|
mov qword [rsp + 16], 0 ; extra
|
||||||
lea rsi, [rsp]
|
mov rdi, [rsp + 48] ; Ast
|
||||||
|
lea rsi, [rsp] ; &AstNode
|
||||||
call vec_push
|
call vec_push
|
||||||
mov rax, [rsp + 48] ; Ast
|
mov rax, [rsp + 48] ; Ast
|
||||||
mov rax, [rdi + 8] ; return Ast.nodes.len()
|
mov rax, [rdi + 8] ; return Ast.nodes.len()
|
||||||
dec rax
|
dec rax
|
||||||
add rsp, 48
|
add rsp, 48
|
||||||
pop rdi
|
pop rdi
|
||||||
|
|
@ -255,17 +265,21 @@ parse_number:
|
||||||
call panic
|
call panic
|
||||||
|
|
||||||
;; rdi: *mut Ast
|
;; rdi: *mut Ast
|
||||||
;; define-fn: fn parse_primary_expr(ast: *mut Ast) -> u64
|
;; define-fn: fn parse_primary_expr(ast: *mut Ast) -> (u64, bool)
|
||||||
parse_primary_expr:
|
parse_primary_expr:
|
||||||
push rbp
|
push rbp
|
||||||
mov rbp, rsp
|
mov rbp, rsp
|
||||||
sub rsp, 24
|
sub rsp, 32
|
||||||
mov [rsp], rdi ; Ast
|
mov [rsp], rdi ; Ast
|
||||||
|
|
||||||
mov dil, TOKEN_NUMBER
|
mov dil, TOKEN_NUMBER
|
||||||
call expect_token
|
call expect_token
|
||||||
test rax, rax
|
test rax, rax
|
||||||
jnz .number
|
jnz .number
|
||||||
|
mov dil, TOKEN_LPARENS
|
||||||
|
call expect_token
|
||||||
|
test rax, rax
|
||||||
|
jnz .paren_expr
|
||||||
jmp .panic
|
jmp .panic
|
||||||
.number:
|
.number:
|
||||||
mov rdi, rax ; lexeme ptr
|
mov rdi, rax ; lexeme ptr
|
||||||
|
|
@ -274,12 +288,23 @@ parse_primary_expr:
|
||||||
mov rdi, [rsp] ; Ast
|
mov rdi, [rsp] ; Ast
|
||||||
mov byte [rsp + 8], AST_NUMBER ; kind
|
mov byte [rsp + 8], AST_NUMBER ; kind
|
||||||
mov [rsp + 16], rax ; data
|
mov [rsp + 16], rax ; data
|
||||||
lea rsi, [rsp + 8] ; AstNode
|
mov qword [rsp + 24], 0 ; extra
|
||||||
|
lea rsi, [rsp + 8] ; &AstNode
|
||||||
call vec_push
|
call vec_push
|
||||||
mov rdi, [rsp] ; Ast
|
mov rdi, [rsp] ; Ast
|
||||||
mov rax, [rdi + 8] ; return Ast.nodes.len()
|
mov rax, [rdi + 8] ; return Ast.nodes.len()
|
||||||
dec rax
|
dec rax
|
||||||
add rsp, 24
|
mov rdx, 0 ; placeness = false
|
||||||
|
jmp .epilogue
|
||||||
|
.paren_expr:
|
||||||
|
mov rdi, [rsp] ; Ast
|
||||||
|
call parse_expr
|
||||||
|
mov [rsp + 8], rax ; expr
|
||||||
|
mov dil, TOKEN_RPARENS
|
||||||
|
call unwrap_token
|
||||||
|
mov rax, [rsp + 8] ; expr
|
||||||
|
.epilogue:
|
||||||
|
add rsp, 32
|
||||||
pop rbp
|
pop rbp
|
||||||
ret
|
ret
|
||||||
.panic:
|
.panic:
|
||||||
|
|
@ -288,7 +313,7 @@ parse_primary_expr:
|
||||||
|
|
||||||
;; rdi: *mut Ast
|
;; rdi: *mut Ast
|
||||||
;; sil: precedence
|
;; sil: precedence
|
||||||
;; define-fn: fn parse_binary_expr(ast: *mut Ast, precedence: u8) -> u64
|
;; define-fn: fn parse_binary_expr(ast: *mut Ast, precedence: u8) -> (u64, bool)
|
||||||
parse_binary_expr:
|
parse_binary_expr:
|
||||||
push rbp
|
push rbp
|
||||||
mov rbp, rsp
|
mov rbp, rsp
|
||||||
|
|
@ -305,7 +330,10 @@ parse_binary_expr:
|
||||||
sub rsp, 64
|
sub rsp, 64
|
||||||
; lexeme: Lexeme [32..56]
|
; lexeme: Lexeme [32..56]
|
||||||
; right: u64 [24..32]
|
; right: u64 [24..32]
|
||||||
; precedence: u8 [17..18]
|
; right_placeness: u8 [20..21]
|
||||||
|
; left_placeness: u8 [19..20]
|
||||||
|
; our_precedence: u8 [18..19]
|
||||||
|
; upper_precedence: u8 [17..18]
|
||||||
; operator: u8 [16..17]
|
; operator: u8 [16..17]
|
||||||
; left: u64 [8..16]
|
; left: u64 [8..16]
|
||||||
; rdi: *mut Ast [0..8]
|
; rdi: *mut Ast [0..8]
|
||||||
|
|
@ -316,29 +344,72 @@ parse_binary_expr:
|
||||||
|
|
||||||
call parse_primary_expr
|
call parse_primary_expr
|
||||||
mov [rsp + 8], rax ; left
|
mov [rsp + 8], rax ; left
|
||||||
|
mov [rsp + 19], dl ; left_placeness
|
||||||
|
|
||||||
.loop:
|
.loop:
|
||||||
lea rdi, [rsp + 32] ; lexeme
|
lea rdi, [rsp + 32] ; lexeme
|
||||||
call peek_lexeme
|
call peek_lexeme
|
||||||
mov rax, [rsp + 32]
|
mov rax, [rsp + 32]
|
||||||
mov dil, [rsp + 17]
|
|
||||||
cmp al, dil ; our_precedence <= upper_precedence
|
|
||||||
jle .done ; also covers some non-binary operator tokens
|
|
||||||
cmp al, TOKEN_PLUS
|
cmp al, TOKEN_PLUS
|
||||||
je .plus
|
je .add
|
||||||
|
cmp al, TOKEN_MINUS
|
||||||
|
je .sub
|
||||||
|
cmp al, TOKEN_STAR
|
||||||
|
je .mul
|
||||||
|
cmp al, TOKEN_SLASH
|
||||||
|
je .div
|
||||||
jmp .done
|
jmp .done
|
||||||
|
|
||||||
.plus:
|
.add:
|
||||||
mov dil, TOKEN_PLUS
|
mov dil, TOKEN_PLUS
|
||||||
call unwrap_token
|
call unwrap_token
|
||||||
mov byte [rsp + 16], TOKEN_PLUS
|
mov byte [rsp + 16], TOKEN_PLUS
|
||||||
|
mov byte [rsp + 18], PRECEDENCE_ADD
|
||||||
|
jmp .right
|
||||||
|
.sub:
|
||||||
|
mov dil, TOKEN_MINUS
|
||||||
|
call unwrap_token
|
||||||
|
mov byte [rsp + 16], TOKEN_MINUS
|
||||||
|
mov byte [rsp + 18], PRECEDENCE_SUB
|
||||||
|
jmp .right
|
||||||
|
.mul:
|
||||||
|
mov dil, TOKEN_STAR
|
||||||
|
call unwrap_token
|
||||||
|
mov byte [rsp + 16], TOKEN_STAR
|
||||||
|
mov byte [rsp + 18], PRECEDENCE_MUL
|
||||||
|
jmp .right
|
||||||
|
.div:
|
||||||
|
mov dil, TOKEN_SLASH
|
||||||
|
call unwrap_token
|
||||||
|
mov byte [rsp + 16], TOKEN_SLASH
|
||||||
|
mov byte [rsp + 18], PRECEDENCE_DIV
|
||||||
jmp .right
|
jmp .right
|
||||||
|
|
||||||
.right:
|
.right:
|
||||||
|
mov dil, [rsp + 17]
|
||||||
|
mov al, [rsp + 18] ; our_precedence
|
||||||
|
cmp al, dil ; our_precedence <= upper_precedence
|
||||||
|
jle .done
|
||||||
|
|
||||||
mov rdi, [rsp] ; Ast
|
mov rdi, [rsp] ; Ast
|
||||||
mov sil, [rsp + 16]
|
mov sil, [rsp + 18]
|
||||||
call parse_binary_expr
|
call parse_binary_expr
|
||||||
mov [rsp + 24], rax ; right
|
mov [rsp + 24], rax ; right
|
||||||
|
mov [rsp + 20], dl ; right_placeness
|
||||||
|
|
||||||
|
; convert left and right to values
|
||||||
|
|
||||||
|
mov rdi, [rsp] ; Ast
|
||||||
|
mov rsi, [rsp + 8] ; left
|
||||||
|
mov dl, [rsp + 19] ; left_placeness
|
||||||
|
call ast_place_to_value
|
||||||
|
mov [rsp + 8], rax ; left
|
||||||
|
|
||||||
|
mov rdi, [rsp] ; Ast
|
||||||
|
mov rsi, [rsp + 24] ; right
|
||||||
|
mov dl, [rsp + 20] ; right_placeness
|
||||||
|
call ast_place_to_value
|
||||||
|
mov [rsp + 24], rax ; right
|
||||||
|
|
||||||
mov rdi, 24
|
mov rdi, 24
|
||||||
mov rsi, 8
|
mov rsi, 8
|
||||||
|
|
@ -352,6 +423,7 @@ parse_binary_expr:
|
||||||
|
|
||||||
mov byte [rsp + 32], AST_BINARY_OP ; AstNode.kind
|
mov byte [rsp + 32], AST_BINARY_OP ; AstNode.kind
|
||||||
mov [rsp + 40], rax ; AstNode.data
|
mov [rsp + 40], rax ; AstNode.data
|
||||||
|
mov qword [rsp + 48], 0 ; AstNode.extra
|
||||||
mov rdi, [rsp] ; Ast
|
mov rdi, [rsp] ; Ast
|
||||||
lea rsi, [rsp + 32] ; &AstNode
|
lea rsi, [rsp + 32] ; &AstNode
|
||||||
call vec_push
|
call vec_push
|
||||||
|
|
@ -386,6 +458,9 @@ parse_expr:
|
||||||
parse_statement:
|
parse_statement:
|
||||||
push rbp
|
push rbp
|
||||||
mov rbp, rsp
|
mov rbp, rsp
|
||||||
|
|
||||||
|
; Ast [24..32]
|
||||||
|
; AstNode [0..24]
|
||||||
sub rsp, 32
|
sub rsp, 32
|
||||||
mov [rsp + 24], rdi ; Ast
|
mov [rsp + 24], rdi ; Ast
|
||||||
|
|
||||||
|
|
@ -400,8 +475,9 @@ parse_statement:
|
||||||
call parse_expr
|
call parse_expr
|
||||||
mov byte [rsp], AST_RETURN_STATEMENT ; kind
|
mov byte [rsp], AST_RETURN_STATEMENT ; kind
|
||||||
mov [rsp + 8], rax ; data
|
mov [rsp + 8], rax ; data
|
||||||
|
mov qword [rsp + 16], 0 ; extra
|
||||||
mov rdi, [rsp + 24] ; Ast
|
mov rdi, [rsp + 24] ; Ast
|
||||||
lea rsi, [rsp] ; AstNode
|
lea rsi, [rsp] ; &AstNode
|
||||||
call vec_push
|
call vec_push
|
||||||
mov rdi, [rsp + 24] ; Ast
|
mov rdi, [rsp + 24] ; Ast
|
||||||
mov rax, [rdi + 8] ; Ast.nodes.len()
|
mov rax, [rdi + 8] ; Ast.nodes.len()
|
||||||
|
|
@ -423,13 +499,6 @@ parse_block:
|
||||||
push rbp
|
push rbp
|
||||||
mov rbp, rsp
|
mov rbp, rsp
|
||||||
|
|
||||||
; start-structs
|
|
||||||
; struct Block {
|
|
||||||
; statements: *const u64,
|
|
||||||
; statements_len: usize,
|
|
||||||
; }
|
|
||||||
; end-structs
|
|
||||||
|
|
||||||
; Ast: *mut Ast [56..64]
|
; Ast: *mut Ast [56..64]
|
||||||
; statements: Vec<Statement> [8..56]
|
; statements: Vec<Statement> [8..56]
|
||||||
; statement: u64 [0..8]
|
; statement: u64 [0..8]
|
||||||
|
|
@ -457,14 +526,15 @@ parse_block:
|
||||||
jnz .done
|
jnz .done
|
||||||
mov rdi, [rsp + 56] ; Ast
|
mov rdi, [rsp + 56] ; Ast
|
||||||
call parse_statement
|
call parse_statement
|
||||||
lea rdi, [rsp + 8] ; vec
|
lea rdi, [rsp + 8] ; vec
|
||||||
mov [rsp], rax ; statement
|
mov [rsp], rax ; statement
|
||||||
lea rsi, [rsp]
|
lea rsi, [rsp]
|
||||||
call vec_push
|
call vec_push
|
||||||
jmp .loop
|
jmp .loop
|
||||||
.done:
|
.done:
|
||||||
mov rdi, [rsp + 56] ; Ast
|
mov rdi, [rsp + 56] ; Ast
|
||||||
lea rsi, [rsp + 8] ; statements vec-slice
|
mov qword [rsp], AST_BLOCK ; kind
|
||||||
|
lea rsi, [rsp] ; &AstNode
|
||||||
call vec_push
|
call vec_push
|
||||||
mov rdi, [rsp + 56] ; Ast
|
mov rdi, [rsp + 56] ; Ast
|
||||||
mov rax, [rdi + 8] ; Ast.nodes.len()
|
mov rax, [rdi + 8] ; Ast.nodes.len()
|
||||||
|
|
@ -512,3 +582,57 @@ parse_type:
|
||||||
ret
|
ret
|
||||||
.panic:
|
.panic:
|
||||||
call panic
|
call panic
|
||||||
|
|
||||||
|
;; rdi: *mut Ast
|
||||||
|
;; rsi: index of node
|
||||||
|
;; rdx: is_placeness
|
||||||
|
;; fn ast_value_to_place(ast: *mut Ast, node_index: u64, is_placeness: bool) -> u64
|
||||||
|
ast_value_to_place:
|
||||||
|
push rbp
|
||||||
|
mov rbp, rsp
|
||||||
|
|
||||||
|
cmp dl, 0
|
||||||
|
xor rax, rax
|
||||||
|
je .done
|
||||||
|
; create new AST node
|
||||||
|
sub rsp, 32
|
||||||
|
mov [rsp], rdi
|
||||||
|
mov byte [rsp + 8], AST_VALUE_TO_PLACE ; kind
|
||||||
|
mov [rsp + 16], rsi ; data
|
||||||
|
mov qword [rsp + 24], 0 ; extra
|
||||||
|
lea rsi, [rsp + 8] ; &AstNode
|
||||||
|
call vec_push
|
||||||
|
mov rdi, [rsp] ; Ast
|
||||||
|
mov rax, [rdi + 8] ; Ast.nodes.len()
|
||||||
|
dec rax
|
||||||
|
add rsp, 32
|
||||||
|
.done:
|
||||||
|
pop rbp
|
||||||
|
ret
|
||||||
|
|
||||||
|
;; rdi: *mut Ast
|
||||||
|
;; rsi: index of node
|
||||||
|
;; rdx: is_placeness
|
||||||
|
;; fn ast_place_to_value(ast: *mut Ast, node_index: u64, is_placeness: bool) -> u64
|
||||||
|
ast_place_to_value:
|
||||||
|
push rbp
|
||||||
|
mov rbp, rsp
|
||||||
|
|
||||||
|
cmp dl, 1
|
||||||
|
xor rax, rax
|
||||||
|
je .done
|
||||||
|
; create new AST node
|
||||||
|
sub rsp, 32
|
||||||
|
mov [rsp], rdi
|
||||||
|
mov byte [rsp + 8], AST_PLACE_TO_VALUE ; kind
|
||||||
|
mov [rsp + 16], rsi ; data
|
||||||
|
mov qword [rsp + 24], 0 ; extra
|
||||||
|
lea rsi, [rsp + 8] ; &AstNode
|
||||||
|
call vec_push
|
||||||
|
mov rdi, [rsp] ; Ast
|
||||||
|
mov rax, [rdi + 8] ; Ast.nodes.len()
|
||||||
|
dec rax
|
||||||
|
add rsp, 32
|
||||||
|
.done:
|
||||||
|
pop rbp
|
||||||
|
ret
|
||||||
|
|
|
||||||
|
|
@ -32,6 +32,8 @@ LEXEMES:
|
||||||
dq LEX_LBRACKET
|
dq LEX_LBRACKET
|
||||||
dq LEX_RBRACKET
|
dq LEX_RBRACKET
|
||||||
dq LEX_VOID
|
dq LEX_VOID
|
||||||
|
dq LEX_SLASH
|
||||||
|
dq LEX_STAR
|
||||||
|
|
||||||
align 8
|
align 8
|
||||||
TOKENS:
|
TOKENS:
|
||||||
|
|
@ -66,6 +68,8 @@ TOKENS:
|
||||||
db TOKEN_LBRACKET ;; 28
|
db TOKEN_LBRACKET ;; 28
|
||||||
db TOKEN_RBRACKET ;; 29
|
db TOKEN_RBRACKET ;; 29
|
||||||
db TOKEN_VOID ;; 30
|
db TOKEN_VOID ;; 30
|
||||||
|
db TOKEN_SLASH ;; 31
|
||||||
|
db TOKEN_STAR ;; 32
|
||||||
|
|
||||||
align 8
|
align 8
|
||||||
LEXEME_LENS:
|
LEXEME_LENS:
|
||||||
|
|
@ -100,9 +104,11 @@ LEXEME_LENS:
|
||||||
dq LEX_LBRACKET_len
|
dq LEX_LBRACKET_len
|
||||||
dq LEX_RBRACKET_len
|
dq LEX_RBRACKET_len
|
||||||
dq LEX_VOID_len
|
dq LEX_VOID_len
|
||||||
|
dq LEX_SLASH_len
|
||||||
|
dq LEX_STAR_len
|
||||||
|
|
||||||
align 8
|
align 8
|
||||||
NUM_LEXEMES: dq 31
|
NUM_LEXEMES: dq 33
|
||||||
|
|
||||||
LEX_NOT_A_LEXEME db "<not a lexeme>", 0
|
LEX_NOT_A_LEXEME db "<not a lexeme>", 0
|
||||||
LEX_LET db "let"
|
LEX_LET db "let"
|
||||||
|
|
@ -165,6 +171,10 @@ NUM_LEXEMES: dq 31
|
||||||
LEX_RBRACKET_len equ $ - LEX_RBRACKET
|
LEX_RBRACKET_len equ $ - LEX_RBRACKET
|
||||||
LEX_VOID db "void"
|
LEX_VOID db "void"
|
||||||
LEX_VOID_len equ $ - LEX_VOID
|
LEX_VOID_len equ $ - LEX_VOID
|
||||||
|
LEX_SLASH db "/"
|
||||||
|
LEX_SLASH_len equ $ - LEX_SLASH
|
||||||
|
LEX_STAR db "*"
|
||||||
|
LEX_STAR_len equ $ - LEX_STAR
|
||||||
LEX_IDENT db "<identifier>"
|
LEX_IDENT db "<identifier>"
|
||||||
LEX_IDENT_len equ $ - LEX_IDENT
|
LEX_IDENT_len equ $ - LEX_IDENT
|
||||||
LEX_NUMBER db "<number>"
|
LEX_NUMBER db "<number>"
|
||||||
|
|
@ -175,39 +185,41 @@ NUM_LEXEMES: dq 31
|
||||||
LEX_COMMENT_len equ $ - LEX_COMMENT
|
LEX_COMMENT_len equ $ - LEX_COMMENT
|
||||||
|
|
||||||
;; start-consts
|
;; start-consts
|
||||||
TOKEN_EOF equ 0
|
TOKEN_EOF equ 0 ; :u8
|
||||||
TOKEN_LET equ 1
|
TOKEN_LET equ 1 ; :u8
|
||||||
TOKEN_IF equ 2
|
TOKEN_IF equ 2 ; :u8
|
||||||
TOKEN_ELSE equ 3
|
TOKEN_ELSE equ 3 ; :u8
|
||||||
TOKEN_FN equ 4
|
TOKEN_FN equ 4 ; :u8
|
||||||
TOKEN_RETURN equ 5
|
TOKEN_RETURN equ 5 ; :u8
|
||||||
TOKEN_LOOP equ 6
|
TOKEN_LOOP equ 6 ; :u8
|
||||||
TOKEN_BREAK equ 7
|
TOKEN_BREAK equ 7 ; :u8
|
||||||
TOKEN_CONTINUE equ 8
|
TOKEN_CONTINUE equ 8 ; :u8
|
||||||
TOKEN_TRUE equ 9
|
TOKEN_TRUE equ 9 ; :u8
|
||||||
TOKEN_FALSE equ 10
|
TOKEN_FALSE equ 10 ; :u8
|
||||||
TOKEN_BOOL equ 11
|
TOKEN_BOOL equ 11 ; :u8
|
||||||
TOKEN_ARROW equ 12
|
TOKEN_ARROW equ 12 ; :u8
|
||||||
TOKEN_I32 equ 13
|
TOKEN_I32 equ 13 ; :u8
|
||||||
TOKEN_U32 equ 14
|
TOKEN_U32 equ 14 ; :u8
|
||||||
TOKEN_EQUALS equ 15
|
TOKEN_EQUALS equ 15 ; :u8
|
||||||
TOKEN_PLUS equ 16
|
TOKEN_PLUS equ 16 ; :u8
|
||||||
TOKEN_MINUS equ 17
|
TOKEN_MINUS equ 17 ; :u8
|
||||||
TOKEN_RPARENS equ 18
|
TOKEN_RPARENS equ 18 ; :u8
|
||||||
TOKEN_LPARENS equ 19
|
TOKEN_LPARENS equ 19 ; :u8
|
||||||
TOKEN_RBRACE equ 20
|
TOKEN_RBRACE equ 20 ; :u8
|
||||||
TOKEN_LBRACE equ 21
|
TOKEN_LBRACE equ 21 ; :u8
|
||||||
TOKEN_COLON equ 22
|
TOKEN_COLON equ 22 ; :u8
|
||||||
TOKEN_SEMI equ 23
|
TOKEN_SEMI equ 23 ; :u8
|
||||||
TOKEN_COMMA equ 24
|
TOKEN_COMMA equ 24 ; :u8
|
||||||
TOKEN_PIPE equ 25
|
TOKEN_PIPE equ 25 ; :u8
|
||||||
TOKEN_AMP equ 26
|
TOKEN_AMP equ 26 ; :u8
|
||||||
TOKEN_EQEQ equ 27
|
TOKEN_EQEQ equ 27 ; :u8
|
||||||
TOKEN_LBRACKET equ 28
|
TOKEN_LBRACKET equ 28 ; :u8
|
||||||
TOKEN_RBRACKET equ 29
|
TOKEN_RBRACKET equ 29 ; :u8
|
||||||
TOKEN_VOID equ 30
|
TOKEN_VOID equ 30 ; :u8
|
||||||
TOKEN_IDENT equ 31
|
TOKEN_SLASH equ 31 ; :u8
|
||||||
TOKEN_NUMBER equ 32
|
TOKEN_STAR equ 32 ; :u8
|
||||||
TOKEN_STRING equ 33
|
TOKEN_IDENT equ 33 ; :u8
|
||||||
TOKEN_COMMENT equ 34
|
TOKEN_NUMBER equ 34 ; :u8
|
||||||
|
TOKEN_STRING equ 35 ; :u8
|
||||||
|
TOKEN_COMMENT equ 36 ; :u8
|
||||||
;; end-consts
|
;; end-consts
|
||||||
|
|
|
||||||
|
|
@ -44,6 +44,7 @@ DEFINE_FN_RE = re.compile(r'^\s*;+\s*define-fn:\s*(.+)$', re.IGNORECASE)
|
||||||
CONST_EQU_RE = re.compile(r'^\s*([A-Za-z_]\w*)\s+equ\s+(.+)$', re.IGNORECASE)
|
CONST_EQU_RE = re.compile(r'^\s*([A-Za-z_]\w*)\s+equ\s+(.+)$', re.IGNORECASE)
|
||||||
STRUCT_START_RE = re.compile(r'^\s*struct\s+([A-Za-z_]\w*)\s*\{') # after comment markers stripped
|
STRUCT_START_RE = re.compile(r'^\s*struct\s+([A-Za-z_]\w*)\s*\{') # after comment markers stripped
|
||||||
RUST_FENCE_RE = re.compile(r'^\s*```\s*(rust)?\s*$', re.IGNORECASE) # matches ``` or ```rust (after stripping leading comment)
|
RUST_FENCE_RE = re.compile(r'^\s*```\s*(rust)?\s*$', re.IGNORECASE) # matches ``` or ```rust (after stripping leading comment)
|
||||||
|
TYPE_ANNOT_RE = re.compile(r':\s*([A-Za-z0-9_\<\>\*\s\[\]\:&]+)') # matches :u8, : *const u8, Vec<T>, etc.
|
||||||
|
|
||||||
|
|
||||||
def strip_leading_semicolons(line: str) -> str:
|
def strip_leading_semicolons(line: str) -> str:
|
||||||
|
|
@ -110,12 +111,12 @@ def parse_file(path: Path) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Parse a single assembly file and return dict with keys: 'functions', 'consts', 'structs', 'rust_blocks'
|
Parse a single assembly file and return dict with keys: 'functions', 'consts', 'structs', 'rust_blocks'
|
||||||
- functions: list of signature strings (e.g. "parse_ast(data: *const u8) -> Ast")
|
- functions: list of signature strings (e.g. "parse_ast(data: *const u8) -> Ast")
|
||||||
- consts: list of (name, value)
|
- consts: list of (name, value, type)
|
||||||
- structs: list of (name, field_lines)
|
- structs: list of (name, field_lines)
|
||||||
- rust_blocks: list of rust code blocks; each block is list[str] of code lines (no fences, uncommented)
|
- rust_blocks: list of rust code blocks; each block is list[str] of code lines (no fences, uncommented)
|
||||||
"""
|
"""
|
||||||
functions: List[str] = []
|
functions: List[str] = []
|
||||||
consts: List[Tuple[str, str]] = []
|
consts: List[Tuple[str, str, str]] = []
|
||||||
structs: List[Tuple[str, List[str]]] = []
|
structs: List[Tuple[str, List[str]]] = []
|
||||||
rust_blocks: List[List[str]] = []
|
rust_blocks: List[List[str]] = []
|
||||||
|
|
||||||
|
|
@ -158,8 +159,20 @@ def parse_file(path: Path) -> Dict[str, Any]:
|
||||||
m = CONST_EQU_RE.match(s)
|
m = CONST_EQU_RE.match(s)
|
||||||
if m:
|
if m:
|
||||||
name = m.group(1)
|
name = m.group(1)
|
||||||
value = m.group(2).strip()
|
rest = m.group(2).strip()
|
||||||
consts.append((name, value))
|
# Defaults
|
||||||
|
value = rest
|
||||||
|
ctype = 'u32'
|
||||||
|
# If there's an inline comment (assembly comments start with ';'), split it off.
|
||||||
|
if ';' in rest:
|
||||||
|
val_part, comment_part = rest.split(';', 1)
|
||||||
|
value = val_part.strip()
|
||||||
|
# Strip any leading semicolons left in comment_part (e.g. ";; :u8")
|
||||||
|
comment = comment_part.lstrip(';').strip()
|
||||||
|
mtype = TYPE_ANNOT_RE.search(comment)
|
||||||
|
if mtype:
|
||||||
|
ctype = mtype.group(1).strip()
|
||||||
|
consts.append((name, value, ctype))
|
||||||
in_consts = False
|
in_consts = False
|
||||||
const_buffer = []
|
const_buffer = []
|
||||||
i += 1
|
i += 1
|
||||||
|
|
@ -215,7 +228,7 @@ def parse_file(path: Path) -> Dict[str, Any]:
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def render_rust(function_sigs: List[str], consts: List[Tuple[str, str]],
|
def render_rust(function_sigs: List[str], consts: List[Tuple[str, str, str]],
|
||||||
structs: List[Tuple[str, List[str]]], rust_blocks: List[List[str]]) -> str:
|
structs: List[Tuple[str, List[str]]], rust_blocks: List[List[str]]) -> str:
|
||||||
parts: List[str] = []
|
parts: List[str] = []
|
||||||
parts.append('#![allow(non_camel_case_types, dead_code, non_upper_case_globals, improper_ctypes)]')
|
parts.append('#![allow(non_camel_case_types, dead_code, non_upper_case_globals, improper_ctypes)]')
|
||||||
|
|
@ -230,8 +243,8 @@ def render_rust(function_sigs: List[str], consts: List[Tuple[str, str]],
|
||||||
parts.append('') # blank line
|
parts.append('') # blank line
|
||||||
|
|
||||||
# Consts
|
# Consts
|
||||||
for name, value in consts:
|
for name, value, ctype in consts:
|
||||||
parts.append(f'pub const {name}: u32 = {value};')
|
parts.append(f'pub const {name}: {ctype} = {value};')
|
||||||
if consts:
|
if consts:
|
||||||
parts.append('')
|
parts.append('')
|
||||||
|
|
||||||
|
|
@ -265,7 +278,7 @@ def main(argv=None):
|
||||||
args = parser.parse_args(argv)
|
args = parser.parse_args(argv)
|
||||||
|
|
||||||
combined_functions: List[str] = []
|
combined_functions: List[str] = []
|
||||||
combined_consts: List[Tuple[str, str]] = []
|
combined_consts: List[Tuple[str, str, str]] = []
|
||||||
combined_structs: List[Tuple[str, List[str]]] = []
|
combined_structs: List[Tuple[str, List[str]]] = []
|
||||||
combined_rust_blocks: List[List[str]] = []
|
combined_rust_blocks: List[List[str]] = []
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -17,30 +17,41 @@ fn main() {
|
||||||
|
|
||||||
let src = b"3 + 4";
|
let src = b"3 + 4";
|
||||||
|
|
||||||
unsafe {
|
fn print_ast(src: &[u8], parser: impl FnOnce(&mut Ast)) {
|
||||||
// tokeniser_init_buf(src.as_ptr(), src.len());
|
unsafe {
|
||||||
// let mut ast = Ast {
|
tokeniser_init_buf(src.as_ptr(), src.len());
|
||||||
// nodes: util::vec::Vec::new(),
|
let mut ast = Ast {
|
||||||
// };
|
nodes: util::vec::Vec::new(),
|
||||||
// let expr_id = parse_expr(&mut ast);
|
};
|
||||||
// println!("Parsed expression with ID: {}", expr_id);
|
let expr_id = parser(&mut ast);
|
||||||
// println!("{:#}", &ast);
|
println!("{:#}", &ast);
|
||||||
|
|
||||||
let src = b"fn main() -> void { return 1 + 2; }";
|
|
||||||
tokeniser_init_buf(src.as_ptr(), src.len());
|
|
||||||
let mut ast = Ast {
|
|
||||||
nodes: util::vec::Vec::new(),
|
|
||||||
};
|
};
|
||||||
let expr_id = parse_func(&mut ast);
|
|
||||||
println!("Parsed function with ID: {}", expr_id);
|
|
||||||
println!("{:#}", &ast);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
print_ast(b"3 + 4", |ast| unsafe {
|
||||||
|
parse_expr(ast);
|
||||||
|
});
|
||||||
|
print_ast(b"fn main() -> void { return 1 + 2; }", |ast| unsafe {
|
||||||
|
parse_func(ast);
|
||||||
|
});
|
||||||
|
print_ast(b"fn main() -> void { return (1 + (2)); }", |ast| unsafe {
|
||||||
|
parse_func(ast);
|
||||||
|
});
|
||||||
|
print_ast(
|
||||||
|
b"fn main() -> void { return (1 + (2 * 3)) / 4; }",
|
||||||
|
|ast| unsafe {
|
||||||
|
parse_func(ast);
|
||||||
|
},
|
||||||
|
);
|
||||||
|
print_ast(b"fn main() -> void { return 1 + 2 * 3; }", |ast| unsafe {
|
||||||
|
parse_func(ast);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
impl std::fmt::Display for AstNode {
|
impl std::fmt::Display for AstNode {
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
use util::defs::{
|
use util::defs::{
|
||||||
BinaryExpr, AST_BINARY_OP, AST_FUNCTION, AST_NUMBER, AST_RETURN_STATEMENT,
|
BinaryExpr, AST_BINARY_OP, AST_BLOCK, AST_FUNCTION, AST_NUMBER, AST_RETURN_STATEMENT,
|
||||||
};
|
};
|
||||||
match self.kind as u32 {
|
match self.kind as u32 {
|
||||||
AST_NUMBER => {
|
AST_NUMBER => {
|
||||||
|
|
@ -74,6 +85,11 @@ impl std::fmt::Display for AstNode {
|
||||||
func.body
|
func.body
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
AST_BLOCK => {
|
||||||
|
write!(f, "Block(statements: {:?})", unsafe {
|
||||||
|
std::slice::from_raw_parts(self.data.cast::<u64>(), self.extra as usize)
|
||||||
|
})
|
||||||
|
}
|
||||||
_ => write!(f, "UnknownNode"),
|
_ => write!(f, "UnknownNode"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -4,8 +4,8 @@
|
||||||
unsafe extern "C" {
|
unsafe extern "C" {
|
||||||
pub unsafe fn parse_func(ast: *mut Ast) -> u64;
|
pub unsafe fn parse_func(ast: *mut Ast) -> u64;
|
||||||
pub unsafe fn parse_args(ast: *mut Ast) -> (*const Argument, usize);
|
pub unsafe fn parse_args(ast: *mut Ast) -> (*const Argument, usize);
|
||||||
pub unsafe fn parse_primary_expr(ast: *mut Ast) -> u64;
|
pub unsafe fn parse_primary_expr(ast: *mut Ast) -> (u64, bool);
|
||||||
pub unsafe fn parse_binary_expr(ast: *mut Ast, precedence: u8) -> u64;
|
pub unsafe fn parse_binary_expr(ast: *mut Ast, precedence: u8) -> (u64, bool);
|
||||||
pub unsafe fn parse_expr(ast: *mut Ast) -> u64;
|
pub unsafe fn parse_expr(ast: *mut Ast) -> u64;
|
||||||
pub unsafe fn parse_statement(ast: *mut Ast) -> u64;
|
pub unsafe fn parse_statement(ast: *mut Ast) -> u64;
|
||||||
pub unsafe fn parse_block(ast: *mut Ast) -> u64;
|
pub unsafe fn parse_block(ast: *mut Ast) -> u64;
|
||||||
|
|
@ -18,46 +18,50 @@ pub const AST_VARIABLE: u32 = 3;
|
||||||
pub const AST_NUMBER: u32 = 4;
|
pub const AST_NUMBER: u32 = 4;
|
||||||
pub const AST_BINARY_OP: u32 = 5;
|
pub const AST_BINARY_OP: u32 = 5;
|
||||||
pub const AST_RETURN_STATEMENT: u32 = 6;
|
pub const AST_RETURN_STATEMENT: u32 = 6;
|
||||||
|
pub const AST_VALUE_TO_PLACE: u32 = 7;
|
||||||
|
pub const AST_PLACE_TO_VALUE: u32 = 8;
|
||||||
pub const TYPE_VOID: u32 = 1;
|
pub const TYPE_VOID: u32 = 1;
|
||||||
pub const TYPE_BOOL: u32 = 2;
|
pub const TYPE_BOOL: u32 = 2;
|
||||||
pub const TYPE_I32: u32 = 3;
|
pub const TYPE_I32: u32 = 3;
|
||||||
pub const TYPE_U32: u32 = 4;
|
pub const TYPE_U32: u32 = 4;
|
||||||
pub const TYPE_STR: u32 = 5;
|
pub const TYPE_STR: u32 = 5;
|
||||||
pub const TOKEN_EOF: u32 = 0;
|
pub const TOKEN_EOF: u8 = 0;
|
||||||
pub const TOKEN_LET: u32 = 1;
|
pub const TOKEN_LET: u8 = 1;
|
||||||
pub const TOKEN_IF: u32 = 2;
|
pub const TOKEN_IF: u8 = 2;
|
||||||
pub const TOKEN_ELSE: u32 = 3;
|
pub const TOKEN_ELSE: u8 = 3;
|
||||||
pub const TOKEN_FN: u32 = 4;
|
pub const TOKEN_FN: u8 = 4;
|
||||||
pub const TOKEN_RETURN: u32 = 5;
|
pub const TOKEN_RETURN: u8 = 5;
|
||||||
pub const TOKEN_LOOP: u32 = 6;
|
pub const TOKEN_LOOP: u8 = 6;
|
||||||
pub const TOKEN_BREAK: u32 = 7;
|
pub const TOKEN_BREAK: u8 = 7;
|
||||||
pub const TOKEN_CONTINUE: u32 = 8;
|
pub const TOKEN_CONTINUE: u8 = 8;
|
||||||
pub const TOKEN_TRUE: u32 = 9;
|
pub const TOKEN_TRUE: u8 = 9;
|
||||||
pub const TOKEN_FALSE: u32 = 10;
|
pub const TOKEN_FALSE: u8 = 10;
|
||||||
pub const TOKEN_BOOL: u32 = 11;
|
pub const TOKEN_BOOL: u8 = 11;
|
||||||
pub const TOKEN_ARROW: u32 = 12;
|
pub const TOKEN_ARROW: u8 = 12;
|
||||||
pub const TOKEN_I32: u32 = 13;
|
pub const TOKEN_I32: u8 = 13;
|
||||||
pub const TOKEN_U32: u32 = 14;
|
pub const TOKEN_U32: u8 = 14;
|
||||||
pub const TOKEN_EQUALS: u32 = 15;
|
pub const TOKEN_EQUALS: u8 = 15;
|
||||||
pub const TOKEN_PLUS: u32 = 16;
|
pub const TOKEN_PLUS: u8 = 16;
|
||||||
pub const TOKEN_MINUS: u32 = 17;
|
pub const TOKEN_MINUS: u8 = 17;
|
||||||
pub const TOKEN_RPARENS: u32 = 18;
|
pub const TOKEN_RPARENS: u8 = 18;
|
||||||
pub const TOKEN_LPARENS: u32 = 19;
|
pub const TOKEN_LPARENS: u8 = 19;
|
||||||
pub const TOKEN_RBRACE: u32 = 20;
|
pub const TOKEN_RBRACE: u8 = 20;
|
||||||
pub const TOKEN_LBRACE: u32 = 21;
|
pub const TOKEN_LBRACE: u8 = 21;
|
||||||
pub const TOKEN_COLON: u32 = 22;
|
pub const TOKEN_COLON: u8 = 22;
|
||||||
pub const TOKEN_SEMI: u32 = 23;
|
pub const TOKEN_SEMI: u8 = 23;
|
||||||
pub const TOKEN_COMMA: u32 = 24;
|
pub const TOKEN_COMMA: u8 = 24;
|
||||||
pub const TOKEN_PIPE: u32 = 25;
|
pub const TOKEN_PIPE: u8 = 25;
|
||||||
pub const TOKEN_AMP: u32 = 26;
|
pub const TOKEN_AMP: u8 = 26;
|
||||||
pub const TOKEN_EQEQ: u32 = 27;
|
pub const TOKEN_EQEQ: u8 = 27;
|
||||||
pub const TOKEN_LBRACKET: u32 = 28;
|
pub const TOKEN_LBRACKET: u8 = 28;
|
||||||
pub const TOKEN_RBRACKET: u32 = 29;
|
pub const TOKEN_RBRACKET: u8 = 29;
|
||||||
pub const TOKEN_VOID: u32 = 30;
|
pub const TOKEN_VOID: u8 = 30;
|
||||||
pub const TOKEN_IDENT: u32 = 31;
|
pub const TOKEN_SLASH: u8 = 31;
|
||||||
pub const TOKEN_NUMBER: u32 = 32;
|
pub const TOKEN_STAR: u8 = 32;
|
||||||
pub const TOKEN_STRING: u32 = 33;
|
pub const TOKEN_IDENT: u8 = 33;
|
||||||
pub const TOKEN_COMMENT: u32 = 34;
|
pub const TOKEN_NUMBER: u8 = 34;
|
||||||
|
pub const TOKEN_STRING: u8 = 35;
|
||||||
|
pub const TOKEN_COMMENT: u8 = 36;
|
||||||
|
|
||||||
#[repr(C)]
|
#[repr(C)]
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
|
|
@ -70,6 +74,7 @@ pub struct Ast {
|
||||||
pub struct AstNode {
|
pub struct AstNode {
|
||||||
pub kind: u8,
|
pub kind: u8,
|
||||||
pub data: *const (),
|
pub data: *const (),
|
||||||
|
pub extra: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[repr(C)]
|
#[repr(C)]
|
||||||
|
|
@ -105,11 +110,4 @@ pub struct BinaryExpr {
|
||||||
pub right: u64,
|
pub right: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[repr(C)]
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub struct Block {
|
|
||||||
pub statements: *const u64,
|
|
||||||
pub statements_len: usize,
|
|
||||||
}
|
|
||||||
|
|
||||||
use super::vec::Vec;
|
use super::vec::Vec;
|
||||||
|
|
|
||||||
|
|
@ -79,6 +79,8 @@ fn collect_tokens() -> Vec<Lexeme> {
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
unsafe {
|
unsafe {
|
||||||
|
use util::defs::*;
|
||||||
|
|
||||||
// assert initial state
|
// assert initial state
|
||||||
assert_eq!((&raw const input_file).read(), 0);
|
assert_eq!((&raw const input_file).read(), 0);
|
||||||
assert_eq!((&raw const buffer_len).read(), 0);
|
assert_eq!((&raw const buffer_len).read(), 0);
|
||||||
|
|
@ -134,16 +136,16 @@ fn main() {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
&collect_tokens()[..],
|
&collect_tokens()[..],
|
||||||
&[
|
&[
|
||||||
Lexeme(31, "this-is-an-ident"),
|
Lexeme(TOKEN_IDENT, "this-is-an-ident"),
|
||||||
Lexeme(31, "another_ident123"),
|
Lexeme(TOKEN_IDENT, "another_ident123"),
|
||||||
Lexeme(31, "_underscore_test"),
|
Lexeme(TOKEN_IDENT, "_underscore_test"),
|
||||||
Lexeme(31, "mixedCASEIdent"),
|
Lexeme(TOKEN_IDENT, "mixedCASEIdent"),
|
||||||
Lexeme(31, "number12345"),
|
Lexeme(TOKEN_IDENT, "number12345"),
|
||||||
Lexeme(31, "____"),
|
Lexeme(TOKEN_IDENT, "____"),
|
||||||
Lexeme(31, "_"),
|
Lexeme(TOKEN_IDENT, "_"),
|
||||||
Lexeme(17, ""),
|
Lexeme(17, ""),
|
||||||
Lexeme(31, "leading-minus"),
|
Lexeme(TOKEN_IDENT, "leading-minus"),
|
||||||
Lexeme(31, "trailing-minus-"),
|
Lexeme(TOKEN_IDENT, "trailing-minus-"),
|
||||||
]
|
]
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
@ -155,7 +157,7 @@ fn main() {
|
||||||
&collect_tokens()[..],
|
&collect_tokens()[..],
|
||||||
&[
|
&[
|
||||||
Lexeme(4, ""),
|
Lexeme(4, ""),
|
||||||
Lexeme(31, "my-function"),
|
Lexeme(TOKEN_IDENT, "my-function"),
|
||||||
Lexeme(19, ""),
|
Lexeme(19, ""),
|
||||||
Lexeme(18, ""),
|
Lexeme(18, ""),
|
||||||
Lexeme(12, ""),
|
Lexeme(12, ""),
|
||||||
|
|
@ -174,7 +176,7 @@ fn main() {
|
||||||
|
|
||||||
assert_eq!(expect_token(2).into_option(), None);
|
assert_eq!(expect_token(2).into_option(), None);
|
||||||
assert_eq!(expect_token(4).into_option().unwrap().as_str(), "fn");
|
assert_eq!(expect_token(4).into_option().unwrap().as_str(), "fn");
|
||||||
assert_eq!(unwrap_token(31).as_str(), "my-function");
|
assert_eq!(unwrap_token(TOKEN_IDENT).as_str(), "my-function");
|
||||||
|
|
||||||
eprint!("Initializing tokeniser.. ");
|
eprint!("Initializing tokeniser.. ");
|
||||||
tokeniser_init(c"tests/tokens/comment.l".as_ptr());
|
tokeniser_init(c"tests/tokens/comment.l".as_ptr());
|
||||||
|
|
@ -183,15 +185,15 @@ fn main() {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
&collect_tokens()[..],
|
&collect_tokens()[..],
|
||||||
&[
|
&[
|
||||||
Lexeme(34, ""),
|
Lexeme(TOKEN_COMMENT, ""),
|
||||||
Lexeme(4, ""),
|
Lexeme(4, ""),
|
||||||
Lexeme(31, "my-function"),
|
Lexeme(TOKEN_IDENT, "my-function"),
|
||||||
Lexeme(19, ""),
|
Lexeme(19, ""),
|
||||||
Lexeme(18, ""),
|
Lexeme(18, ""),
|
||||||
Lexeme(12, ""),
|
Lexeme(12, ""),
|
||||||
Lexeme(11, ""),
|
Lexeme(11, ""),
|
||||||
Lexeme(21, ""),
|
Lexeme(21, ""),
|
||||||
Lexeme(34, ""),
|
Lexeme(TOKEN_COMMENT, ""),
|
||||||
Lexeme(5, ""),
|
Lexeme(5, ""),
|
||||||
Lexeme(10, ""),
|
Lexeme(10, ""),
|
||||||
Lexeme(23, ""),
|
Lexeme(23, ""),
|
||||||
|
|
@ -206,11 +208,11 @@ fn main() {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
&collect_tokens()[..],
|
&collect_tokens()[..],
|
||||||
&[
|
&[
|
||||||
Lexeme(32, "1234"),
|
Lexeme(TOKEN_NUMBER, "1234"),
|
||||||
Lexeme(32, "123_345_"),
|
Lexeme(TOKEN_NUMBER, "123_345_"),
|
||||||
Lexeme(32, "1234____56"),
|
Lexeme(TOKEN_NUMBER, "1234____56"),
|
||||||
Lexeme(32, "1"),
|
Lexeme(TOKEN_NUMBER, "1"),
|
||||||
Lexeme(32, "0"),
|
Lexeme(TOKEN_NUMBER, "0"),
|
||||||
]
|
]
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
@ -221,11 +223,14 @@ fn main() {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
&collect_tokens()[..],
|
&collect_tokens()[..],
|
||||||
&[
|
&[
|
||||||
Lexeme(33, "\"this is a string\""),
|
Lexeme(TOKEN_STRING, "\"this is a string\""),
|
||||||
Lexeme(33, "\"another\nstring\nspanning multiple\n lines\""),
|
Lexeme(
|
||||||
Lexeme(33, "\"string with a \\\"quoted\\\" word\""),
|
TOKEN_STRING,
|
||||||
Lexeme(33, "\"a\""),
|
"\"another\nstring\nspanning multiple\n lines\""
|
||||||
Lexeme(33, "\"\"")
|
),
|
||||||
|
Lexeme(TOKEN_STRING, "\"string with a \\\"quoted\\\" word\""),
|
||||||
|
Lexeme(TOKEN_STRING, "\"a\""),
|
||||||
|
Lexeme(TOKEN_STRING, "\"\"")
|
||||||
],
|
],
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
@ -236,7 +241,11 @@ fn main() {
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
&collect_tokens()[..],
|
&collect_tokens()[..],
|
||||||
&[Lexeme(32, "3"), Lexeme(16, "+"), Lexeme(32, "4")],
|
&[
|
||||||
|
Lexeme(TOKEN_NUMBER, "3"),
|
||||||
|
Lexeme(16, "+"),
|
||||||
|
Lexeme(TOKEN_NUMBER, "4")
|
||||||
|
],
|
||||||
);
|
);
|
||||||
|
|
||||||
eprint!("Initializing tokeniser.. ");
|
eprint!("Initializing tokeniser.. ");
|
||||||
|
|
@ -248,21 +257,42 @@ fn main() {
|
||||||
&collect_tokens()[..],
|
&collect_tokens()[..],
|
||||||
&[
|
&[
|
||||||
Lexeme(4, "fn"),
|
Lexeme(4, "fn"),
|
||||||
Lexeme(31, "main"),
|
Lexeme(TOKEN_IDENT, "main"),
|
||||||
Lexeme(19, "("),
|
Lexeme(19, "("),
|
||||||
Lexeme(18, ")"),
|
Lexeme(18, ")"),
|
||||||
Lexeme(12, "->"),
|
Lexeme(12, "->"),
|
||||||
Lexeme(30, "void"),
|
Lexeme(30, "void"),
|
||||||
Lexeme(21, "{"),
|
Lexeme(21, "{"),
|
||||||
Lexeme(5, "return"),
|
Lexeme(5, "return"),
|
||||||
Lexeme(32, "1"),
|
Lexeme(TOKEN_NUMBER, "1"),
|
||||||
Lexeme(16, "+"),
|
Lexeme(16, "+"),
|
||||||
Lexeme(32, "2"),
|
Lexeme(TOKEN_NUMBER, "2"),
|
||||||
Lexeme(23, ";"),
|
Lexeme(23, ";"),
|
||||||
Lexeme(20, "}"),
|
Lexeme(20, "}"),
|
||||||
],
|
],
|
||||||
);
|
);
|
||||||
|
|
||||||
|
eprint!("Initializing tokeniser.. ");
|
||||||
|
let src = b"(b / d + c) * 42;";
|
||||||
|
tokeniser_init_buf(src.as_ptr(), src.len());
|
||||||
|
eprintln!("ok.");
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
&collect_tokens()[..],
|
||||||
|
&[
|
||||||
|
Lexeme(19, "("),
|
||||||
|
Lexeme(33, "b"),
|
||||||
|
Lexeme(31, "/"),
|
||||||
|
Lexeme(33, "d"),
|
||||||
|
Lexeme(16, "+"),
|
||||||
|
Lexeme(33, "c"),
|
||||||
|
Lexeme(18, ")"),
|
||||||
|
Lexeme(32, "*"),
|
||||||
|
Lexeme(34, "42"),
|
||||||
|
Lexeme(23, ";")
|
||||||
|
],
|
||||||
|
);
|
||||||
|
|
||||||
eprintln!("Finished tokenising.");
|
eprintln!("Finished tokenising.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue