gdritter repos rust-examples / f756de9
Updated regexp example to modern Rust as well Getty Ritter 10 years ago
5 changed file(s) with 46 addition(s) and 42 deletion(s). Collapse all Expand all
11 use re::instruction::{Instr,IChar,IMatch,IJmp,ISplit};
2 use std::vec::append;
3 mod instruction;
2 // use std::vec::Vec;
43
54 /* A regular expression parse tree */
5 #[deriving(Show)]
66 enum Regexp {
77 RChar(char),
8 RSeq(~Regexp, ~Regexp),
9 RChc(~Regexp, ~Regexp),
10 RRep(~Regexp),
8 RSeq(Box<Regexp>, Box<Regexp>),
9 RChc(Box<Regexp>, Box<Regexp>),
10 RRep(Box<Regexp>),
1111 }
1212
1313 /* We're assuming a prefix regexp here. That means that we have
2323 match s.char_at(0) {
2424 '.' => { let (s1, r1) = parse(s.slice_from(1));
2525 let (s2, r2) = parse(s1);
26 (s2, RSeq(~r1, ~r2)) },
26 (s2, RSeq(box r1, box r2)) },
2727 '|' => { let (s1, r1) = parse(s.slice_from(1));
2828 let (s2, r2) = parse(s1);
29 (s2, RChc(~r1, ~r2)) },
29 (s2, RChc(box r1, box r2)) },
3030 '*' => { let (s1, r1) = parse(s.slice_from(1));
31 (s1, RRep(~r1)) },
31 (s1, RRep(box r1)) },
3232 c => (s.slice_from(1), RChar(c)),
3333 }
3434 }
3535
3636 /* Compiling an AST for regexps to the instructions */
37 fn emit(r: &Regexp, i: uint) -> (uint, ~[Instr]) {
37 fn emit(r: &Regexp, i: uint) -> (uint, Vec<Instr>) {
3838 match *r {
39 RChar(c) => { (i+1, ~[IChar(c)]) },
40 RSeq(ref a, ref b) =>
41 { let (ai, v1) = emit(*a, i);
42 let (bi, v2) = emit(*b, ai);
43 (bi, append(v1, v2)) },
44 RChc(ref a, ref b) =>
45 { let (ai, v1) = emit(*a, i + 1);
46 let (bi, v2) = emit(*b, ai + 1);
47 let spl = ~[ ISplit(i + 1, ai + 1) ];
48 let jmp = ~[ IJmp(ai) ];
49 (bi, append(spl, append(v1, append(jmp, v2)))) },
50 RRep(ref a) =>
51 { let (ai, v1) = emit(*a, i + 1);
52 let spl = ~[ ISplit(i + 1, ai + 1) ];
53 let jmp = ~[ IJmp(i) ];
54 (ai + 1, append(spl, append(v1, jmp))) },
39 RChar(c) => { (i+1, vec![IChar(c)]) },
40 RSeq(box ref a, box ref b) =>
41 { let (ai, mut v1) = emit(a, i);
42 let (bi, v2) = emit(b, ai);
43 v1.push_all_move(v2);
44 (bi, v1) },
45 RChc(box ref a, box ref b) =>
46 { let (ai, v1) = emit(a, i + 1);
47 let (bi, v2) = emit(b, ai + 1);
48 let mut spl = vec![ ISplit(i + 1, ai + 1) ];
49 let jmp = vec![ IJmp(ai) ];
50 spl.push_all_move(v1);
51 spl.push_all_move(jmp);
52 spl.push_all_move(v2);
53 (bi, spl) },
54 RRep(box ref a) =>
55 { let (ai, v1) = emit(a, i + 1);
56 let mut spl = vec![ ISplit(i + 1, ai + 1) ];
57 let jmp = vec![ IJmp(i) ];
58 spl.push_all_move(v1);
59 spl.push_all_move(jmp);
60 (ai + 1, spl) },
5561 }
5662 }
5763
5864 /* A wrapper over these processes */
59 pub fn compile(s: &str) -> ~[Instr] {
65 pub fn compile(s: &str) -> Vec<Instr> {
6066 let (_, re) = parse(s);
61 println!("{:?}", re);
67 println!("{}", re);
6268 let (_, ins) = emit(&re, 0);
63 println!("{:?}", ins);
64 return append(ins, [IMatch]);
69 println!("{}", ins);
70 return ins.append([IMatch]);
6571 }
11 /* A single instruction as used in the VM-based matcher */
2 #[deriving(Clone)]
2 #[deriving(Clone,Show)]
33 pub enum Instr {
44 IChar(char), /* match a character or fail */
55 IMatch, /* match anything successfully */
11 use re::instruction::{Instr,IChar,IMatch,IJmp,ISplit};
2 mod instruction;
32
43 /* We wrap the real evaluation function, as we're always going to
54 * start executing instruction 0 with no string matched. */
11 use re::instruction::{Instr,IChar,IMatch,IJmp,ISplit};
2 mod instruction;
32
43 /* The state of a program can be unambiguously specified by
54 * a current instruction and a current position in the string. */
87 /* An evaluator that maintains a manual, mutable stack for doing
98 * regular-expression matching. */
109 pub fn eval(instrs: &[Instr], input: &str) -> bool {
11 let mut stack = ~[ EvalState {pc: 0, cc: 0} ];
10 let mut stack = vec![ EvalState {pc: 0, cc: 0} ];
1211
1312 while stack.len() > 0 {
14 let st = stack.pop();
13 let st = stack.pop().unwrap();
1514 match instrs[st.pc] {
1615 IChar(_) if st.cc >= input.len() =>
1716 continue,
1414 /* our sample regexp corresponds to /ab*c/ in
1515 * the usual notation. */
1616 let re = compile("..a*bc");
17 println("Recursive:");
17 println!("Recursive:");
1818 println!(" match(re, \"abbbc\")\t== {}",
19 ::re::recursive::eval(re, "abbbc"));
19 ::re::recursive::eval(re.as_slice(), "abbbc"));
2020 println!(" match(re, \"ac\")\t== {}",
21 ::re::recursive::eval(re, "ac"));
21 ::re::recursive::eval(re.as_slice(), "ac"));
2222 println!(" match(re, \"abd\")\t== {}",
23 ::re::recursive::eval(re, "abd"));
24 println("Manual Stack:");
23 ::re::recursive::eval(re.as_slice(), "abd"));
24 println!("Manual Stack:");
2525 println!(" match(re, \"abbbc\")\t== {}",
26 ::re::stack::eval(re, "abbbc"));
26 ::re::stack::eval(re.as_slice(), "abbbc"));
2727 println!(" match(re, \"ac\")\t== {}",
28 ::re::stack::eval(re, "ac"));
28 ::re::stack::eval(re.as_slice(), "ac"));
2929 println!(" match(re, \"abd\")\t== {}",
30 ::re::stack::eval(re, "abd"));
30 ::re::stack::eval(re.as_slice(), "abd"));
3131 }