gdritter repos rust-examples / a271cb1
Updated regexp to use Cargo, as well. Getty Ritter 9 years ago
16 changed file(s) with 177 addition(s) and 177 deletion(s). Collapse all Expand all
1 [package]
3 name = "regexp"
4 version = "0.0.2"
5 authors = [ "" ]
regexp/ less more
1 if [ -e regexp ]; then rm regexp; fi
regexp/ less more
1 redo-ifchange regexp
regexp/re/ less more
1 use re::instruction::{Instr,IChar,IMatch,IJmp,ISplit};
2 // use std::vec::Vec;
4 /* A regular expression parse tree */
5 #[deriving(Show)]
6 enum Regexp {
7 RChar(char),
8 RSeq(Box<Regexp>, Box<Regexp>),
9 RChc(Box<Regexp>, Box<Regexp>),
10 RRep(Box<Regexp>),
11 }
13 /* We're assuming a prefix regexp here. That means that we have
14 * the following operators:
15 * .ab => ab
16 * |ab => a|b
17 * *a => a*
18 * but these nest, so (ab|c)* would become
19 * *|c.ab
20 * This is easier to parse. Deal with it.
21 */
22 fn parse<'a>(s: &'a str) -> (&'a str, Regexp) {
23 match s.char_at(0) {
24 '.' => { let (s1, r1) = parse(s.slice_from(1));
25 let (s2, r2) = parse(s1);
26 (s2, RSeq(box r1, box r2)) },
27 '|' => { let (s1, r1) = parse(s.slice_from(1));
28 let (s2, r2) = parse(s1);
29 (s2, RChc(box r1, box r2)) },
30 '*' => { let (s1, r1) = parse(s.slice_from(1));
31 (s1, RRep(box r1)) },
32 c => (s.slice_from(1), RChar(c)),
33 }
34 }
36 /* Compiling an AST for regexps to the instructions */
37 fn emit(r: &Regexp, i: uint) -> (uint, Vec<Instr>) {
38 match *r {
39 RChar(c) => { (i+1, vec![IChar(c)]) },
40 RSeq(box ref a, box ref b) =>
41 { let (ai, mut v1) = emit(a, i);
42 let (bi, v2) = emit(b, ai);
43 v1.push_all_move(v2);
44 (bi, v1) },
45 RChc(box ref a, box ref b) =>
46 { let (ai, v1) = emit(a, i + 1);
47 let (bi, v2) = emit(b, ai + 1);
48 let mut spl = vec![ ISplit(i + 1, ai + 1) ];
49 let jmp = vec![ IJmp(ai) ];
50 spl.push_all_move(v1);
51 spl.push_all_move(jmp);
52 spl.push_all_move(v2);
53 (bi, spl) },
54 RRep(box ref a) =>
55 { let (ai, v1) = emit(a, i + 1);
56 let mut spl = vec![ ISplit(i + 1, ai + 1) ];
57 let jmp = vec![ IJmp(i) ];
58 spl.push_all_move(v1);
59 spl.push_all_move(jmp);
60 (ai + 1, spl) },
61 }
62 }
64 /* A wrapper over these processes */
65 pub fn compile(s: &str) -> Vec<Instr> {
66 let (_, re) = parse(s);
67 println!("{}", re);
68 let (_, ins) = emit(&re, 0);
69 println!("{}", ins);
70 return ins.append([IMatch]);
71 }
regexp/re/ less more
1 /* A single instruction as used in the VM-based matcher */
2 #[deriving(Clone,Show)]
3 pub enum Instr {
4 IChar(char), /* match a character or fail */
5 IMatch, /* match anything successfully */
6 IJmp(uint) , /* jump to instr i */
7 ISplit(uint, uint), /* try both instrs i and j */
8 }
regexp/re/ less more
1 pub use re::compile::compile;
2 pub use re::instruction::{Instr,IChar,IMatch,IJmp,ISplit};
3 pub use re::recursive::eval;
4 pub use re::stack::eval;
5 pub mod compile;
6 pub mod instruction;
7 pub mod recursive;
8 pub mod stack;
regexp/re/ less more
1 use re::instruction::{Instr,IChar,IMatch,IJmp,ISplit};
3 /* We wrap the real evaluation function, as we're always going to
4 * start executing instruction 0 with no string matched. */
5 pub fn eval(instrs: &[Instr], input: &str) -> bool {
6 eval1(instrs, input, 0, 0)
7 }
9 /* We use the Rust stack as our stack in this naive recursive
10 * implementation. */
11 fn eval1(instrs: &[Instr], input: &str, pc: uint, cc: uint) -> bool {
12 match instrs[pc] {
13 IChar(_) if cc >= input.len() => return false,
14 IChar(c) if c == input.char_at(cc) =>
15 eval1(instrs, input, pc + 1, cc + 1),
16 IChar(_) => return false,
17 IMatch => return true,
18 IJmp(i) => eval1(instrs, input, i, cc),
19 ISplit(i, _) if eval1(instrs, input, i, cc) => true,
20 ISplit(_, j) => eval1(instrs, input, j, cc),
21 }
22 }
regexp/re/ less more
1 use re::instruction::{Instr,IChar,IMatch,IJmp,ISplit};
3 /* The state of a program can be unambiguously specified by
4 * a current instruction and a current position in the string. */
5 struct EvalState { pc: uint, cc: uint }
7 /* An evaluator that maintains a manual, mutable stack for doing
8 * regular-expression matching. */
9 pub fn eval(instrs: &[Instr], input: &str) -> bool {
10 let mut stack = vec![ EvalState {pc: 0, cc: 0} ];
12 while stack.len() > 0 {
13 let st = stack.pop().unwrap();
14 match instrs[st.pc] {
15 IChar(_) if >= input.len() =>
16 continue,
17 IChar(c) if c == input.char_at( =>
18 stack.push(EvalState { pc: st.pc + 1, cc: + 1 }),
19 IChar(_) =>
20 continue,
21 IMatch =>
22 return true,
23 IJmp(i) =>
24 stack.push(EvalState { pc: i, cc: }),
25 ISplit(i, j) => {
26 stack.push(EvalState { pc: j, cc: });
27 stack.push(EvalState { pc: i, cc: });
28 },
29 }
30 }
31 return false;
32 }
regexp/ less more
1 DEPS=" re/ re/ re/ re/ re/"
2 redo-ifchange $DEPS
3 rustc -o $3
regexp/ less more
1 /* This is a basic implementation of a regular expression matcher,
2 * based on Henry Spencer's virtual-machine approach to regular
3 * expression matching outlined by Russ Cox here:
4 *
5 *
6 * For ease of parsing, I'm using a highly non-standard Polish
7 * notation for regular expressions, in which . and | are
8 * prefix binary operators for catenation and choice, respectively,
9 * and * is a prefix unary operator for repetition. */
10 use re::compile;
11 mod re;
13 fn main() {
14 /* our sample regexp corresponds to /ab*c/ in
15 * the usual notation. */
16 let re = compile("..a*bc");
17 println!("Recursive:");
18 println!(" match(re, \"abbbc\")\t== {}",
19 ::re::recursive::eval(re.as_slice(), "abbbc"));
20 println!(" match(re, \"ac\")\t== {}",
21 ::re::recursive::eval(re.as_slice(), "ac"));
22 println!(" match(re, \"abd\")\t== {}",
23 ::re::recursive::eval(re.as_slice(), "abd"));
24 println!("Manual Stack:");
25 println!(" match(re, \"abbbc\")\t== {}",
26 ::re::stack::eval(re.as_slice(), "abbbc"));
27 println!(" match(re, \"ac\")\t== {}",
28 ::re::stack::eval(re.as_slice(), "ac"));
29 println!(" match(re, \"abd\")\t== {}",
30 ::re::stack::eval(re.as_slice(), "abd"));
31 }
1 /* This is a basic implementation of a regular expression matcher,
2 * based on Henry Spencer's virtual-machine approach to regular
3 * expression matching outlined by Russ Cox here:
4 *
5 *
6 * For ease of parsing, I'm using a highly non-standard Polish
7 * notation for regular expressions, in which . and | are
8 * prefix binary operators for catenation and choice, respectively,
9 * and * is a prefix unary operator for repetition. */
10 use re::compile;
11 mod re;
13 fn main() {
14 /* our sample regexp corresponds to /ab*c/ in
15 * the usual notation. */
16 let re = compile("..a*bc");
17 println!("Recursive:");
18 println!(" match(re, \"abbbc\")\t== {}",
19 ::re::recursive::eval(re.as_slice(), "abbbc"));
20 println!(" match(re, \"ac\")\t== {}",
21 ::re::recursive::eval(re.as_slice(), "ac"));
22 println!(" match(re, \"abd\")\t== {}",
23 ::re::recursive::eval(re.as_slice(), "abd"));
24 println!("Manual Stack:");
25 println!(" match(re, \"abbbc\")\t== {}",
26 ::re::stack::eval(re.as_slice(), "abbbc"));
27 println!(" match(re, \"ac\")\t== {}",
28 ::re::stack::eval(re.as_slice(), "ac"));
29 println!(" match(re, \"abd\")\t== {}",
30 ::re::stack::eval(re.as_slice(), "abd"));
31 }
1 use re::instruction::{Instr,IChar,IMatch,IJmp,ISplit};
2 // use std::vec::Vec;
4 /* A regular expression parse tree */
5 #[deriving(Show)]
6 enum Regexp {
7 RChar(char),
8 RSeq(Box<Regexp>, Box<Regexp>),
9 RChc(Box<Regexp>, Box<Regexp>),
10 RRep(Box<Regexp>),
11 }
13 /* We're assuming a prefix regexp here. That means that we have
14 * the following operators:
15 * .ab => ab
16 * |ab => a|b
17 * *a => a*
18 * but these nest, so (ab|c)* would become
19 * *|c.ab
20 * This is easier to parse. Deal with it.
21 */
22 fn parse<'a>(s: &'a str) -> (&'a str, Regexp) {
23 match s.char_at(0) {
24 '.' => { let (s1, r1) = parse(s.slice_from(1));
25 let (s2, r2) = parse(s1);
26 (s2, RSeq(box r1, box r2)) },
27 '|' => { let (s1, r1) = parse(s.slice_from(1));
28 let (s2, r2) = parse(s1);
29 (s2, RChc(box r1, box r2)) },
30 '*' => { let (s1, r1) = parse(s.slice_from(1));
31 (s1, RRep(box r1)) },
32 c => (s.slice_from(1), RChar(c)),
33 }
34 }
36 /* Compiling an AST for regexps to the instructions */
37 fn emit(r: &Regexp, i: uint) -> (uint, Vec<Instr>) {
38 match *r {
39 RChar(c) => { (i+1, vec![IChar(c)]) },
40 RSeq(box ref a, box ref b) =>
41 { let (ai, mut v1) = emit(a, i);
42 let (bi, v2) = emit(b, ai);
43 v1.push_all_move(v2);
44 (bi, v1) },
45 RChc(box ref a, box ref b) =>
46 { let (ai, v1) = emit(a, i + 1);
47 let (bi, v2) = emit(b, ai + 1);
48 let mut spl = vec![ ISplit(i + 1, ai + 1) ];
49 let jmp = vec![ IJmp(ai) ];
50 spl.push_all_move(v1);
51 spl.push_all_move(jmp);
52 spl.push_all_move(v2);
53 (bi, spl) },
54 RRep(box ref a) =>
55 { let (ai, v1) = emit(a, i + 1);
56 let mut spl = vec![ ISplit(i + 1, ai + 1) ];
57 let jmp = vec![ IJmp(i) ];
58 spl.push_all_move(v1);
59 spl.push_all_move(jmp);
60 (ai + 1, spl) },
61 }
62 }
64 /* A wrapper over these processes */
65 pub fn compile(s: &str) -> Vec<Instr> {
66 let (_, re) = parse(s);
67 println!("{}", re);
68 let (_, ins) = emit(&re, 0);
69 println!("{}", ins);
70 return ins.append([IMatch]);
71 }
1 /* A single instruction as used in the VM-based matcher */
2 #[deriving(Clone,Show)]
3 pub enum Instr {
4 IChar(char), /* match a character or fail */
5 IMatch, /* match anything successfully */
6 IJmp(uint) , /* jump to instr i */
7 ISplit(uint, uint), /* try both instrs i and j */
8 }
1 pub use re::compile::compile;
2 pub use re::instruction::{Instr,IChar,IMatch,IJmp,ISplit};
3 pub use re::recursive::eval;
4 pub use re::stack::eval;
5 pub mod compile;
6 pub mod instruction;
7 pub mod recursive;
8 pub mod stack;
1 use re::instruction::{Instr,IChar,IMatch,IJmp,ISplit};
3 /* We wrap the real evaluation function, as we're always going to
4 * start executing instruction 0 with no string matched. */
5 pub fn eval(instrs: &[Instr], input: &str) -> bool {
6 eval1(instrs, input, 0, 0)
7 }
9 /* We use the Rust stack as our stack in this naive recursive
10 * implementation. */
11 fn eval1(instrs: &[Instr], input: &str, pc: uint, cc: uint) -> bool {
12 match instrs[pc] {
13 IChar(_) if cc >= input.len() => return false,
14 IChar(c) if c == input.char_at(cc) =>
15 eval1(instrs, input, pc + 1, cc + 1),
16 IChar(_) => return false,
17 IMatch => return true,
18 IJmp(i) => eval1(instrs, input, i, cc),
19 ISplit(i, _) if eval1(instrs, input, i, cc) => true,
20 ISplit(_, j) => eval1(instrs, input, j, cc),
21 }
22 }
1 use re::instruction::{Instr,IChar,IMatch,IJmp,ISplit};
3 /* The state of a program can be unambiguously specified by
4 * a current instruction and a current position in the string. */
5 struct EvalState { pc: uint, cc: uint }
7 /* An evaluator that maintains a manual, mutable stack for doing
8 * regular-expression matching. */
9 pub fn eval(instrs: &[Instr], input: &str) -> bool {
10 let mut stack = vec![ EvalState {pc: 0, cc: 0} ];
12 while stack.len() > 0 {
13 let st = stack.pop().unwrap();
14 match instrs[st.pc] {
15 IChar(_) if >= input.len() =>
16 continue,
17 IChar(c) if c == input.char_at( =>
18 stack.push(EvalState { pc: st.pc + 1, cc: + 1 }),
19 IChar(_) =>
20 continue,
21 IMatch =>
22 return true,
23 IJmp(i) =>
24 stack.push(EvalState { pc: i, cc: }),
25 ISplit(i, j) => {
26 stack.push(EvalState { pc: j, cc: });
27 stack.push(EvalState { pc: i, cc: });
28 },
29 }
30 }
31 return false;
32 }