gdritter repos rust-examples / 938ddfe
Updated regexp example to latest nightly Getty Ritter 9 years ago
5 changed file(s) with 46 addition(s) and 45 deletion(s). Collapse all Expand all
1 use re::instruction::{Instr,IChar,IMatch,IJmp,ISplit};
1 use re::instruction::Instr;
22 // use std::vec::Vec;
33
44 /* A regular expression parse tree */
55 #[deriving(Show)]
66 enum Regexp {
7 RChar(char),
8 RSeq(Box<Regexp>, Box<Regexp>),
9 RChc(Box<Regexp>, Box<Regexp>),
10 RRep(Box<Regexp>),
7 Char(char),
8 Seq(Box<Regexp>, Box<Regexp>),
9 Chc(Box<Regexp>, Box<Regexp>),
10 Rep(Box<Regexp>),
1111 }
1212
1313 /* We're assuming a prefix regexp here. That means that we have
2323 match s.char_at(0) {
2424 '.' => { let (s1, r1) = parse(s.slice_from(1));
2525 let (s2, r2) = parse(s1);
26 (s2, RSeq(box r1, box r2)) },
26 (s2, Regexp::Seq(box r1, box r2)) },
2727 '|' => { let (s1, r1) = parse(s.slice_from(1));
2828 let (s2, r2) = parse(s1);
29 (s2, RChc(box r1, box r2)) },
29 (s2, Regexp::Chc(box r1, box r2)) },
3030 '*' => { let (s1, r1) = parse(s.slice_from(1));
31 (s1, RRep(box r1)) },
32 c => (s.slice_from(1), RChar(c)),
31 (s1, Regexp::Rep(box r1)) },
32 c => (s.slice_from(1), Regexp::Char(c)),
3333 }
3434 }
3535
4545 * | IChar(chr)
4646 * ---- <- pc + 1
4747 */
48 RChar(chr) => { (pc+1, vec![IChar(chr)]) },
48 Regexp::Char(chr) => { (pc+1, vec![Instr::Char(chr)]) },
4949 /* For a sequencing, we produce this code:
5050 * ---- <- pc
5151 * | [[ first ]]
5353 * | [[ second ]]
5454 * ---- <- second_pc
5555 */
56 RSeq(box ref first, box ref second) =>
56 Regexp::Seq(box ref first, box ref second) =>
5757 { let (first_pc, mut v1) = emit(first, pc);
5858 let (second_pc, v2) = emit(second, first_pc);
59 v1.push_all_move(v2);
59 v1.push_all(v2.as_slice());
6060 (second_pc, v1)
6161 },
6262 /* For a choice, we produce this code:
7070 * | [[ second ]]
7171 * ---- <- second_pc
7272 */
73 RChc(box ref first, box ref second) =>
73 Regexp::Chc(box ref first, box ref second) =>
7474 { let (first_pc, v1) = emit(first, pc + 1);
7575 let (second_pc, v2) = emit(second, first_pc + 1);
76 let mut split_instr = vec![ ISplit(pc + 1, first_pc + 1) ];
77 let jmp_instr = vec![ IJmp(second_pc) ];
78 split_instr.push_all_move(v1);
79 split_instr.push_all_move(jmp_instr);
80 split_instr.push_all_move(v2);
76 let mut split_instr = vec![ Instr::Split(pc + 1, first_pc + 1) ];
77 let jmp_instr = vec![ Instr::Jmp(second_pc) ];
78 split_instr.push_all(v1.as_slice());
79 split_instr.push_all(jmp_instr.as_slice());
80 split_instr.push_all(v2.as_slice());
8181 (second_pc, split_instr)
8282 },
8383 /* For a repetition, we produce this code:
8989 * | IJmp(pc)
9090 * ---- <- expr_pc + 1
9191 */
92 RRep(box ref expr) =>
92 Regexp::Rep(box ref expr) =>
9393 { let (expr_pc, v1) = emit(expr, pc + 1);
94 let mut spl = vec![ ISplit(pc + 1, expr_pc + 1) ];
95 let jmp = vec![ IJmp(pc) ];
96 spl.push_all_move(v1);
97 spl.push_all_move(jmp);
94 let mut spl = vec![ Instr::Split(pc + 1, expr_pc + 1) ];
95 let jmp = vec![ Instr::Jmp(pc) ];
96 spl.push_all(v1.as_slice());
97 spl.push_all(jmp.as_slice());
9898 (expr_pc + 1, spl)
9999 },
100100 }
104104 pub fn compile(s: &str) -> Vec<Instr> {
105105 let (_, re) = parse(s);
106106 println!("{}", re);
107 let (_, ins) = emit(&re, 0);
107 let (_, mut ins) = emit(&re, 0);
108108 println!("{}", ins);
109109 /* If we get to the end of a compiled regular expression,
110110 * that means it hasn't aborted and we can match.
111111 */
112 return ins.append([IMatch]);
112 ins.push(Instr::Match);
113 return ins;
113114 }
11 /* A single instruction as used in the VM-based matcher */
22 #[deriving(Clone,Show)]
33 pub enum Instr {
4 IChar(char), /* match a character or fail */
5 IMatch, /* match anything successfully */
6 IJmp(uint) , /* jump to instr i */
7 ISplit(uint, uint), /* try both instrs i and j */
8 }
4 Char(char), /* match a character or fail */
5 Match, /* match anything successfully */
6 Jmp(uint) , /* jump to instr i */
7 Split(uint, uint), /* try both instrs i and j */
8 }
11 pub use re::compile::compile;
2 pub use re::instruction::{Instr,IChar,IMatch,IJmp,ISplit};
2 pub use re::instruction::Instr;
33 pub mod compile;
44 pub mod instruction;
55 pub mod recursive;
1 use re::instruction::{Instr,IChar,IMatch,IJmp,ISplit};
1 use re::instruction::Instr;
22
33 /* We wrap the real evaluation function, as we're always going to
44 * start executing instruction 0 with no string matched. */
1313 * traversed the string. */
1414 fn eval1(instrs: &[Instr], input: &str, pc: uint, cc: uint) -> bool {
1515 match instrs[pc] {
16 IChar(_) if cc >= input.len() => return false,
17 IChar(c) if c == input.char_at(cc) =>
16 Instr::Char(_) if cc >= input.len() => return false,
17 Instr::Char(c) if c == input.char_at(cc) =>
1818 eval1(instrs, input, pc + 1, cc + 1),
19 IChar(_) => return false,
20 IMatch => return true,
21 IJmp(i) => eval1(instrs, input, i, cc),
22 ISplit(i, j) => eval1(instrs, input, i, cc) ||
19 Instr::Char(_) => return false,
20 Instr::Match => return true,
21 Instr::Jmp(i) => eval1(instrs, input, i, cc),
22 Instr::Split(i, j) => eval1(instrs, input, i, cc) ||
2323 eval1(instrs, input, j, cc),
2424 }
2525 }
1 use re::instruction::{Instr,IChar,IMatch,IJmp,ISplit};
1 use re::instruction::Instr;
22
33 /* The state of a program can be unambiguously specified by
44 * a current instruction and a current position in the string. */
1919 * manually checked the stack length. */
2020 let st = stack.pop().unwrap();
2121 match instrs[st.pc] {
22 IChar(_) if st.cc >= input.len() =>
22 Instr::Char(_) if st.cc >= input.len() =>
2323 continue,
24 IChar(c) if c == input.char_at(st.cc) =>
24 Instr::Char(c) if c == input.char_at(st.cc) =>
2525 stack.push(EvalState { pc: st.pc + 1, cc: st.cc + 1 }),
26 IChar(_) =>
26 Instr::Char(_) =>
2727 continue,
28 IMatch =>
28 Instr::Match =>
2929 return true,
30 IJmp(i) =>
30 Instr::Jmp(i) =>
3131 stack.push(EvalState { pc: i, cc: st.cc }),
32 ISplit(i, j) => {
32 Instr::Split(i, j) => {
3333 stack.push(EvalState { pc: j, cc: st.cc });
3434 stack.push(EvalState { pc: i, cc: st.cc });
3535 },