Updated regexp example to modern Rust as well
Getty Ritter
10 years ago
1 | 1 | use re::instruction::{Instr,IChar,IMatch,IJmp,ISplit}; |
2 | use std::vec::append; | |
3 | mod instruction; | |
2 | // use std::vec::Vec; | |
4 | 3 | |
5 | 4 | /* A regular expression parse tree */ |
5 | #[deriving(Show)] | |
6 | 6 | enum Regexp { |
7 | 7 | RChar(char), |
8 | RSeq(~Regexp, ~Regexp), | |
9 | RChc(~Regexp, ~Regexp), | |
10 |
R |
|
8 | RSeq(Box<Regexp>, Box<Regexp>), | |
9 | RChc(Box<Regexp>, Box<Regexp>), | |
10 | RRep(Box<Regexp>), | |
11 | 11 | } |
12 | 12 | |
13 | 13 | /* We're assuming a prefix regexp here. That means that we have |
23 | 23 | match s.char_at(0) { |
24 | 24 | '.' => { let (s1, r1) = parse(s.slice_from(1)); |
25 | 25 | let (s2, r2) = parse(s1); |
26 |
(s2, RSeq( |
|
26 | (s2, RSeq(box r1, box r2)) }, | |
27 | 27 | '|' => { let (s1, r1) = parse(s.slice_from(1)); |
28 | 28 | let (s2, r2) = parse(s1); |
29 |
(s2, RChc( |
|
29 | (s2, RChc(box r1, box r2)) }, | |
30 | 30 | '*' => { let (s1, r1) = parse(s.slice_from(1)); |
31 |
(s1, RRep( |
|
31 | (s1, RRep(box r1)) }, | |
32 | 32 | c => (s.slice_from(1), RChar(c)), |
33 | 33 | } |
34 | 34 | } |
35 | 35 | |
36 | 36 | /* Compiling an AST for regexps to the instructions */ |
37 |
fn emit(r: &Regexp, i: uint) -> (uint, |
|
37 | fn emit(r: &Regexp, i: uint) -> (uint, Vec<Instr>) { | |
38 | 38 | match *r { |
39 | RChar(c) => { (i+1, ~[IChar(c)]) }, | |
40 | RSeq(ref a, ref b) => | |
41 | { let (ai, v1) = emit(*a, i); | |
42 | let (bi, v2) = emit(*b, ai); | |
43 | (bi, append(v1, v2)) }, | |
44 | RChc(ref a, ref b) => | |
45 | { let (ai, v1) = emit(*a, i + 1); | |
46 | let (bi, v2) = emit(*b, ai + 1); | |
47 | let spl = ~[ ISplit(i + 1, ai + 1) ]; | |
48 | let jmp = ~[ IJmp(ai) ]; | |
49 | (bi, append(spl, append(v1, append(jmp, v2)))) }, | |
50 | RRep(ref a) => | |
51 | { let (ai, v1) = emit(*a, i + 1); | |
52 | let spl = ~[ ISplit(i + 1, ai + 1) ]; | |
53 | let jmp = ~[ IJmp(i) ]; | |
54 | (ai + 1, append(spl, append(v1, jmp))) }, | |
39 | RChar(c) => { (i+1, vec![IChar(c)]) }, | |
40 | RSeq(box ref a, box ref b) => | |
41 | { let (ai, mut v1) = emit(a, i); | |
42 | let (bi, v2) = emit(b, ai); | |
43 | v1.push_all_move(v2); | |
44 | (bi, v1) }, | |
45 | RChc(box ref a, box ref b) => | |
46 | { let (ai, v1) = emit(a, i + 1); | |
47 | let (bi, v2) = emit(b, ai + 1); | |
48 | let mut spl = vec![ ISplit(i + 1, ai + 1) ]; | |
49 | let jmp = vec![ IJmp(ai) ]; | |
50 | spl.push_all_move(v1); | |
51 | spl.push_all_move(jmp); | |
52 | spl.push_all_move(v2); | |
53 | (bi, spl) }, | |
54 | RRep(box ref a) => | |
55 | { let (ai, v1) = emit(a, i + 1); | |
56 | let mut spl = vec![ ISplit(i + 1, ai + 1) ]; | |
57 | let jmp = vec![ IJmp(i) ]; | |
58 | spl.push_all_move(v1); | |
59 | spl.push_all_move(jmp); | |
60 | (ai + 1, spl) }, | |
55 | 61 | } |
56 | 62 | } |
57 | 63 | |
58 | 64 | /* A wrapper over these processes */ |
59 |
pub fn compile(s: &str) -> |
|
65 | pub fn compile(s: &str) -> Vec<Instr> { | |
60 | 66 | let (_, re) = parse(s); |
61 |
println!("{ |
|
67 | println!("{}", re); | |
62 | 68 | let (_, ins) = emit(&re, 0); |
63 | println!("{:?}", ins); | |
64 | return append(ins, [IMatch]); | |
69 | println!("{}", ins); | |
70 | return ins.append([IMatch]); | |
65 | 71 | } |
1 | 1 | /* A single instruction as used in the VM-based matcher */ |
2 |
#[deriving(Clone |
|
2 | #[deriving(Clone,Show)] | |
3 | 3 | pub enum Instr { |
4 | 4 | IChar(char), /* match a character or fail */ |
5 | 5 | IMatch, /* match anything successfully */ |
1 | 1 | use re::instruction::{Instr,IChar,IMatch,IJmp,ISplit}; |
2 | mod instruction; | |
3 | 2 | |
4 | 3 | /* We wrap the real evaluation function, as we're always going to |
5 | 4 | * start executing instruction 0 with no string matched. */ |
1 | 1 | use re::instruction::{Instr,IChar,IMatch,IJmp,ISplit}; |
2 | mod instruction; | |
3 | 2 | |
4 | 3 | /* The state of a program can be unambiguously specified by |
5 | 4 | * a current instruction and a current position in the string. */ |
8 | 7 | /* An evaluator that maintains a manual, mutable stack for doing |
9 | 8 | * regular-expression matching. */ |
10 | 9 | pub fn eval(instrs: &[Instr], input: &str) -> bool { |
11 |
let mut stack = |
|
10 | let mut stack = vec![ EvalState {pc: 0, cc: 0} ]; | |
12 | 11 | |
13 | 12 | while stack.len() > 0 { |
14 |
let st = stack.pop() |
|
13 | let st = stack.pop().unwrap(); | |
15 | 14 | match instrs[st.pc] { |
16 | 15 | IChar(_) if st.cc >= input.len() => |
17 | 16 | continue, |
14 | 14 | /* our sample regexp corresponds to /ab*c/ in |
15 | 15 | * the usual notation. */ |
16 | 16 | let re = compile("..a*bc"); |
17 |
println |
|
17 | println!("Recursive:"); | |
18 | 18 | println!(" match(re, \"abbbc\")\t== {}", |
19 |
::re::recursive::eval(re |
|
19 | ::re::recursive::eval(re.as_slice(), "abbbc")); | |
20 | 20 | println!(" match(re, \"ac\")\t== {}", |
21 |
::re::recursive::eval(re |
|
21 | ::re::recursive::eval(re.as_slice(), "ac")); | |
22 | 22 | println!(" match(re, \"abd\")\t== {}", |
23 | ::re::recursive::eval(re, "abd")); | |
24 | println("Manual Stack:"); | |
23 | ::re::recursive::eval(re.as_slice(), "abd")); | |
24 | println!("Manual Stack:"); | |
25 | 25 | println!(" match(re, \"abbbc\")\t== {}", |
26 |
::re::stack::eval(re |
|
26 | ::re::stack::eval(re.as_slice(), "abbbc")); | |
27 | 27 | println!(" match(re, \"ac\")\t== {}", |
28 |
::re::stack::eval(re |
|
28 | ::re::stack::eval(re.as_slice(), "ac")); | |
29 | 29 | println!(" match(re, \"abd\")\t== {}", |
30 |
::re::stack::eval(re |
|
30 | ::re::stack::eval(re.as_slice(), "abd")); | |
31 | 31 | } |