gdritter repos rust-examples / 2361cd0
Fixed bug and changed names Getty Ritter 10 years ago
4 changed file(s) with 91 addition(s) and 33 deletion(s). Collapse all Expand all
1212
1313 fn main() {
1414 /* our sample regexp corresponds to /ab*c/ in
15 * the usual notation. */
16 let re = compile("..a*bc");
15 * the usual notation.
16 * These two lines can be collapsed into one once
17 * this RFC lands: https://github.com/rust-lang/rfcs/pull/66
18 */
19 let regexp = compile("..a*bc");
20 let instrs = regexp.as_slice();
21
1722 println!("Recursive:");
1823 println!(" match(re, \"abbbc\")\t== {}",
19 ::re::recursive::eval(re.as_slice(), "abbbc"));
24 ::re::recursive::eval(instrs, "abbbc"));
2025 println!(" match(re, \"ac\")\t== {}",
21 ::re::recursive::eval(re.as_slice(), "ac"));
26 ::re::recursive::eval(instrs, "ac"));
2227 println!(" match(re, \"abd\")\t== {}",
23 ::re::recursive::eval(re.as_slice(), "abd"));
28 ::re::recursive::eval(instrs, "abd"));
29
2430 println!("Manual Stack:");
2531 println!(" match(re, \"abbbc\")\t== {}",
26 ::re::stack::eval(re.as_slice(), "abbbc"));
32 ::re::stack::eval(instrs, "abbbc"));
2733 println!(" match(re, \"ac\")\t== {}",
28 ::re::stack::eval(re.as_slice(), "ac"));
34 ::re::stack::eval(instrs, "ac"));
2935 println!(" match(re, \"abd\")\t== {}",
30 ::re::stack::eval(re.as_slice(), "abd"));
36 ::re::stack::eval(instrs, "abd"));
3137 }
3333 }
3434 }
3535
36 /* Compiling an AST for regexps to the instructions */
37 fn emit(r: &Regexp, i: uint) -> (uint, Vec<Instr>) {
38 match *r {
39 RChar(c) => { (i+1, vec![IChar(c)]) },
40 RSeq(box ref a, box ref b) =>
41 { let (ai, mut v1) = emit(a, i);
42 let (bi, v2) = emit(b, ai);
36 /* Compiling an AST for regexps to the instructions.
37 * The return values correspond to the length of the
38 * vector (so that subsequent instructions to be added
39 * know what pc to use) and the vector of instructions.
40 */
41 fn emit(regexp: &Regexp, pc: uint) -> (uint, Vec<Instr>) {
42 match *regexp {
43 /* For a match, we produce this code:
44 * ---- <- pc
45 * | IChar(chr)
46 * ---- <- pc + 1
47 */
48 RChar(chr) => { (pc+1, vec![IChar(chr)]) },
49 /* For a sequencing, we produce this code:
50 * ---- <- pc
51 * | [[ first ]]
52 * ---- <- first_pc
53 * | [[ second ]]
54 * ---- <- second_pc
55 */
56 RSeq(box ref first, box ref second) =>
57 { let (first_pc, mut v1) = emit(first, pc);
58 let (second_pc, v2) = emit(second, first_pc);
4359 v1.push_all_move(v2);
44 (bi, v1) },
45 RChc(box ref a, box ref b) =>
46 { let (ai, v1) = emit(a, i + 1);
47 let (bi, v2) = emit(b, ai + 1);
48 let mut spl = vec![ ISplit(i + 1, ai + 1) ];
49 let jmp = vec![ IJmp(ai) ];
60 (second_pc, v1)
61 },
62 /* For a choice, we produce this code:
63 * ---- <- pc
64 * | ISplit(pc+1, first_pc+1)
65 * ---- <- pc + 1
66 * | [[ first ]]
67 * ---- <- first_pc
68 * | IJmp(second_pc)
69 * ---- <- first_pc + 1
70 * | [[ second ]]
71 * ---- <- second_pc
72 */
73 RChc(box ref first, box ref second) =>
74 { let (first_pc, v1) = emit(first, pc + 1);
75 let (second_pc, v2) = emit(second, first_pc + 1);
76 let mut split_instr = vec![ ISplit(pc + 1, first_pc + 1) ];
77 let jmp_instr = vec![ IJmp(second_pc) ];
78 split_instr.push_all_move(v1);
79 split_instr.push_all_move(jmp_instr);
80 split_instr.push_all_move(v2);
81 (second_pc, split_instr)
82 },
83 /* For a repetition, we produce this code:
84 * ---- <- pc
85 * | ISplit(pc+1, expr_pc + 1)
86 * ---- <- pc + 1
87 * | [[ expr ]]
88 * ---- <- expr_pc
89 * | IJmp(pc)
90 * ---- <- expr_pc + 1
91 */
92 RRep(box ref expr) =>
93 { let (expr_pc, v1) = emit(expr, pc + 1);
94 let mut spl = vec![ ISplit(pc + 1, expr_pc + 1) ];
95 let jmp = vec![ IJmp(pc) ];
5096 spl.push_all_move(v1);
5197 spl.push_all_move(jmp);
52 spl.push_all_move(v2);
53 (bi, spl) },
54 RRep(box ref a) =>
55 { let (ai, v1) = emit(a, i + 1);
56 let mut spl = vec![ ISplit(i + 1, ai + 1) ];
57 let jmp = vec![ IJmp(i) ];
58 spl.push_all_move(v1);
59 spl.push_all_move(jmp);
60 (ai + 1, spl) },
98 (expr_pc + 1, spl)
99 },
61100 }
62101 }
63102
67106 println!("{}", re);
68107 let (_, ins) = emit(&re, 0);
69108 println!("{}", ins);
109 /* If we get to the end of a compiled regular expression,
110 * that means it hasn't aborted and we can match.
111 */
70112 return ins.append([IMatch]);
71113 }
77 }
88
99 /* We use the Rust stack as our stack in this naive recursive
10 * implementation. */
10 * implementation. We have a vector slice of instructions,
11 * a string we're matching over, the current program counter
12 * in the instructions, and the current point to which we've
13 * traversed the string. */
1114 fn eval1(instrs: &[Instr], input: &str, pc: uint, cc: uint) -> bool {
1215 match instrs[pc] {
1316 IChar(_) if cc >= input.len() => return false,
1619 IChar(_) => return false,
1720 IMatch => return true,
1821 IJmp(i) => eval1(instrs, input, i, cc),
19 ISplit(i, _) if eval1(instrs, input, i, cc) => true,
20 ISplit(_, j) => eval1(instrs, input, j, cc),
22 ISplit(i, j) => eval1(instrs, input, i, cc) ||
23 eval1(instrs, input, j, cc),
2124 }
2225 }
99 pub fn eval(instrs: &[Instr], input: &str) -> bool {
1010 let mut stack = vec![ EvalState {pc: 0, cc: 0} ];
1111
12 /* Every time we find that a possibility is impossible, we
13 * remove it from the stack. If we have completed a match,
14 * we'll short-circuit out of this loop; otherwise, an empty
15 * stack means we have failed every possible branch and can
16 * return false. */
1217 while stack.len() > 0 {
18 /* This call to .unwrap() is safe because we've already
19 * manually checked the stack length. */
1320 let st = stack.pop().unwrap();
1421 match instrs[st.pc] {
1522 IChar(_) if st.cc >= input.len() =>