Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit f267976

Browse files
committed
Simple lexer; Support for parsing identifiers and other simple stuff
1 parent e8649e3 commit f267976

3 files changed

Lines changed: 163 additions & 0 deletions

File tree

src/lexer/lexeme.rs

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#[derive(Clone, Copy, Debug, PartialEq)]
2+
pub enum LexemeKind {
3+
Eof,
4+
Whitespace,
5+
Comment,
6+
Unknown,
7+
8+
LeftParen,
9+
RightParen,
10+
LeftBrace,
11+
RightBrace,
12+
LeftSquare,
13+
RightSquare,
14+
15+
Plus,
16+
Minus,
17+
Asterisk,
18+
Slash,
19+
20+
Identifier,
21+
}
22+
23+
#[derive(Clone, Copy, Debug)]
24+
pub struct Lexeme<'a> {
25+
pub kind: LexemeKind,
26+
pub code_span: &'a str,
27+
pub location: (usize, usize),
28+
}

src/lexer/lexer.rs

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
use crate::lexer::{Lexeme, LexemeKind};
2+
3+
#[derive(Debug)]
4+
pub struct Lexer<'a> {
5+
input: &'a str,
6+
7+
position: usize,
8+
line_number: usize,
9+
column_number: usize,
10+
11+
has_hit_eof: bool,
12+
}
13+
14+
impl<'a> Iterator for Lexer<'a> {
15+
type Item = Lexeme<'a>;
16+
17+
fn next(&mut self) -> Option<Self::Item> {
18+
self.next_lexeme()
19+
}
20+
}
21+
22+
impl<'a> Lexer<'a> {
23+
fn lexeme(&mut self, kind: LexemeKind, length: usize) -> Option<Lexeme<'a>> {
24+
let lexeme = Lexeme {
25+
kind,
26+
code_span: &self.input[self.position..self.position + length],
27+
location: (self.line_number, self.column_number),
28+
};
29+
30+
self.position += length;
31+
self.column_number += length;
32+
33+
Some(lexeme)
34+
}
35+
36+
fn new_line(&mut self) -> Option<Lexeme<'a>> {
37+
let lexeme = Lexeme {
38+
kind: LexemeKind::Whitespace,
39+
code_span: &self.input[self.position..self.position + 1],
40+
location: (self.line_number, self.column_number),
41+
};
42+
43+
self.position += 1;
44+
self.line_number += 1;
45+
self.column_number = 0;
46+
47+
Some(lexeme)
48+
}
49+
50+
pub fn new(input: &'a str) -> Lexer<'a> {
51+
Lexer {
52+
input,
53+
position: 0,
54+
line_number: 0,
55+
column_number: 0,
56+
has_hit_eof: false,
57+
}
58+
}
59+
60+
pub fn next_lexeme(&mut self) -> Option<Lexeme<'a>> {
61+
let mut chars = self.input.chars();
62+
63+
if let Some(ch) = chars.nth(self.position) {
64+
match ch {
65+
'(' => self.lexeme(LexemeKind::LeftParen, 1),
66+
')' => self.lexeme(LexemeKind::RightParen, 1),
67+
'{' => self.lexeme(LexemeKind::LeftBrace, 1),
68+
'}' => self.lexeme(LexemeKind::RightBrace, 1),
69+
'[' => self.lexeme(LexemeKind::LeftSquare, 1),
70+
']' => self.lexeme(LexemeKind::RightSquare, 1),
71+
72+
'+' => self.lexeme(LexemeKind::Plus, 1),
73+
'-' => self.lexeme(LexemeKind::Minus, 1),
74+
'*' => self.lexeme(LexemeKind::Asterisk, 1),
75+
'/' => {
76+
let next_ch = chars.next()?;
77+
78+
if next_ch == '/' {
79+
let mut length = 2;
80+
81+
while let Some(comment_ch) = chars.next() {
82+
if comment_ch == '\n' {
83+
break;
84+
}
85+
86+
length += 1;
87+
}
88+
89+
self.lexeme(LexemeKind::Comment, length)
90+
}
91+
else {
92+
self.lexeme(LexemeKind::Slash, 1)
93+
}
94+
},
95+
96+
'\n' => self.new_line(),
97+
'\t' => self.lexeme(LexemeKind::Whitespace, 1),
98+
99+
ch @ _ if ch.is_whitespace() => self.lexeme(LexemeKind::Whitespace, 1),
100+
ch @ _ if ch.is_alphabetic() => {
101+
let mut length = 1;
102+
103+
while let Some(ident_ch) = chars.next() {
104+
if !ident_ch.is_alphanumeric() && ident_ch != '_' {
105+
break;
106+
}
107+
108+
length += 1;
109+
}
110+
111+
self.lexeme(LexemeKind::Identifier, length)
112+
},
113+
114+
_ => self.lexeme(LexemeKind::Unknown, 1),
115+
}
116+
} else {
117+
if self.has_hit_eof {
118+
None
119+
} else {
120+
self.has_hit_eof = true;
121+
122+
Some(Lexeme {
123+
kind: LexemeKind::Eof,
124+
code_span: &self.input[self.input.len()..],
125+
location: (self.line_number, self.column_number),
126+
})
127+
}
128+
}
129+
}
130+
}

src/lexer/mod.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pub mod lexeme;
2+
pub mod lexer;
3+
4+
pub use lexeme::*;
5+
pub use lexer::*;

0 commit comments

Comments
 (0)