Fixed bug and changed names
Getty Ritter
10 years ago
12 | 12 | |
13 | 13 | fn main() { |
14 | 14 | /* our sample regexp corresponds to /ab*c/ in |
15 | * the usual notation. */ | |
16 | let re = compile("..a*bc"); | |
15 | * the usual notation. | |
16 | * These two lines can be collapsed into one once | |
17 | * this RFC lands: https://github.com/rust-lang/rfcs/pull/66 | |
18 | */ | |
19 | let regexp = compile("..a*bc"); | |
20 | let instrs = regexp.as_slice(); | |
21 | ||
17 | 22 | println!("Recursive:"); |
18 | 23 | println!(" match(re, \"abbbc\")\t== {}", |
19 |
::re::recursive::eval( |
|
24 | ::re::recursive::eval(instrs, "abbbc")); | |
20 | 25 | println!(" match(re, \"ac\")\t== {}", |
21 |
::re::recursive::eval( |
|
26 | ::re::recursive::eval(instrs, "ac")); | |
22 | 27 | println!(" match(re, \"abd\")\t== {}", |
23 |
::re::recursive::eval( |
|
28 | ::re::recursive::eval(instrs, "abd")); | |
29 | ||
24 | 30 | println!("Manual Stack:"); |
25 | 31 | println!(" match(re, \"abbbc\")\t== {}", |
26 |
::re::stack::eval( |
|
32 | ::re::stack::eval(instrs, "abbbc")); | |
27 | 33 | println!(" match(re, \"ac\")\t== {}", |
28 |
::re::stack::eval( |
|
34 | ::re::stack::eval(instrs, "ac")); | |
29 | 35 | println!(" match(re, \"abd\")\t== {}", |
30 |
::re::stack::eval( |
|
36 | ::re::stack::eval(instrs, "abd")); | |
31 | 37 | } |
33 | 33 | } |
34 | 34 | } |
35 | 35 | |
36 | /* Compiling an AST for regexps to the instructions */ | |
37 | fn emit(r: &Regexp, i: uint) -> (uint, Vec<Instr>) { | |
38 | match *r { | |
39 | RChar(c) => { (i+1, vec![IChar(c)]) }, | |
40 | RSeq(box ref a, box ref b) => | |
41 | { let (ai, mut v1) = emit(a, i); | |
42 | let (bi, v2) = emit(b, ai); | |
36 | /* Compiling an AST for regexps to the instructions. | |
37 | * The return values correspond to the length of the | |
38 | * vector (so that subsequent instructions to be added | |
39 | * know what pc to use) and the vector of instructions. | |
40 | */ | |
41 | fn emit(regexp: &Regexp, pc: uint) -> (uint, Vec<Instr>) { | |
42 | match *regexp { | |
43 | /* For a match, we produce this code: | |
44 | * ---- <- pc | |
45 | * | IChar(chr) | |
46 | * ---- <- pc + 1 | |
47 | */ | |
48 | RChar(chr) => { (pc+1, vec![IChar(chr)]) }, | |
49 | /* For a sequencing, we produce this code: | |
50 | * ---- <- pc | |
51 | * | [[ first ]] | |
52 | * ---- <- first_pc | |
53 | * | [[ second ]] | |
54 | * ---- <- second_pc | |
55 | */ | |
56 | RSeq(box ref first, box ref second) => | |
57 | { let (first_pc, mut v1) = emit(first, pc); | |
58 | let (second_pc, v2) = emit(second, first_pc); | |
43 | 59 | v1.push_all_move(v2); |
44 | (bi, v1) }, | |
45 | RChc(box ref a, box ref b) => | |
46 | { let (ai, v1) = emit(a, i + 1); | |
47 | let (bi, v2) = emit(b, ai + 1); | |
48 | let mut spl = vec![ ISplit(i + 1, ai + 1) ]; | |
49 | let jmp = vec![ IJmp(ai) ]; | |
60 | (second_pc, v1) | |
61 | }, | |
62 | /* For a choice, we produce this code: | |
63 | * ---- <- pc | |
64 | * | ISplit(pc+1, first_pc+1) | |
65 | * ---- <- pc + 1 | |
66 | * | [[ first ]] | |
67 | * ---- <- first_pc | |
68 | * | IJmp(second_pc) | |
69 | * ---- <- first_pc + 1 | |
70 | * | [[ second ]] | |
71 | * ---- <- second_pc | |
72 | */ | |
73 | RChc(box ref first, box ref second) => | |
74 | { let (first_pc, v1) = emit(first, pc + 1); | |
75 | let (second_pc, v2) = emit(second, first_pc + 1); | |
76 | let mut split_instr = vec![ ISplit(pc + 1, first_pc + 1) ]; | |
77 | let jmp_instr = vec![ IJmp(second_pc) ]; | |
78 | split_instr.push_all_move(v1); | |
79 | split_instr.push_all_move(jmp_instr); | |
80 | split_instr.push_all_move(v2); | |
81 | (second_pc, split_instr) | |
82 | }, | |
83 | /* For a repetition, we produce this code: | |
84 | * ---- <- pc | |
85 | * | ISplit(pc+1, expr_pc + 1) | |
86 | * ---- <- pc + 1 | |
87 | * | [[ expr ]] | |
88 | * ---- <- expr_pc | |
89 | * | IJmp(pc) | |
90 | * ---- <- expr_pc + 1 | |
91 | */ | |
92 | RRep(box ref expr) => | |
93 | { let (expr_pc, v1) = emit(expr, pc + 1); | |
94 | let mut spl = vec![ ISplit(pc + 1, expr_pc + 1) ]; | |
95 | let jmp = vec![ IJmp(pc) ]; | |
50 | 96 | spl.push_all_move(v1); |
51 | 97 | spl.push_all_move(jmp); |
52 | spl.push_all_move(v2); | |
53 | (bi, spl) }, | |
54 | RRep(box ref a) => | |
55 | { let (ai, v1) = emit(a, i + 1); | |
56 | let mut spl = vec![ ISplit(i + 1, ai + 1) ]; | |
57 | let jmp = vec![ IJmp(i) ]; | |
58 | spl.push_all_move(v1); | |
59 | spl.push_all_move(jmp); | |
60 |
( |
|
98 | (expr_pc + 1, spl) | |
99 | }, | |
61 | 100 | } |
62 | 101 | } |
63 | 102 | |
67 | 106 | println!("{}", re); |
68 | 107 | let (_, ins) = emit(&re, 0); |
69 | 108 | println!("{}", ins); |
109 | /* If we get to the end of a compiled regular expression, | |
110 | * that means it hasn't aborted and we can match. | |
111 | */ | |
70 | 112 | return ins.append([IMatch]); |
71 | 113 | } |
7 | 7 | } |
8 | 8 | |
9 | 9 | /* We use the Rust stack as our stack in this naive recursive |
10 |
* implementation. |
|
10 | * implementation. We have a vector slice of instructions, | |
11 | * a string we're matching over, the current program counter | |
12 | * in the instructions, and the current point to which we've | |
13 | * traversed the string. */ | |
11 | 14 | fn eval1(instrs: &[Instr], input: &str, pc: uint, cc: uint) -> bool { |
12 | 15 | match instrs[pc] { |
13 | 16 | IChar(_) if cc >= input.len() => return false, |
16 | 19 | IChar(_) => return false, |
17 | 20 | IMatch => return true, |
18 | 21 | IJmp(i) => eval1(instrs, input, i, cc), |
19 | ISplit(i, _) if eval1(instrs, input, i, cc) => true, | |
20 | ISplit(_, j) => eval1(instrs, input, j, cc), | |
22 | ISplit(i, j) => eval1(instrs, input, i, cc) || | |
23 | eval1(instrs, input, j, cc), | |
21 | 24 | } |
22 | 25 | } |
9 | 9 | pub fn eval(instrs: &[Instr], input: &str) -> bool { |
10 | 10 | let mut stack = vec![ EvalState {pc: 0, cc: 0} ]; |
11 | 11 | |
12 | /* Every time we find that a possibility is impossible, we | |
13 | * remove it from the stack. If we have completed a match, | |
14 | * we'll short-circuit out of this loop; otherwise, an empty | |
15 | * stack means we have failed every possible branch and can | |
16 | * return false. */ | |
12 | 17 | while stack.len() > 0 { |
18 | /* This call to .unwrap() is safe because we've already | |
19 | * manually checked the stack length. */ | |
13 | 20 | let st = stack.pop().unwrap(); |
14 | 21 | match instrs[st.pc] { |
15 | 22 | IChar(_) if st.cc >= input.len() => |