Commit baa8e8ea7613cfe0df6eee57bddaa9fe8b565525 - rust-examples

Updated regexp example to Rust stable Getty Ritter 9 years ago

5 changed file(s) with 73 addition(s) and 44 deletion(s). Collapse all Expand all

+11

-8

regexp/src/main.rs less more

10	10	use re::compile;
11	11	mod re;
12	12
	13	fn as_chars(s: &str) -> Vec<char> {
	14	s.chars().collect()
	15	}
	16
13	17	fn main() {
14	18	/* our sample regexp corresponds to /ab*c/ in
15	19	* the usual notation.
16	20	* These two lines can be collapsed into one once
17	21	* this RFC lands: https://github.com/rust-lang/rfcs/pull/66
18	22	*/
19		let regexp = compile("..a*bc");
20		let instrs = regexp.as_slice();
	23	let instrs = &compile("..a*bc");
21	24
22	25	println!("Recursive:");
23	26	println!(" match(re, \"abbbc\")\t== {}",
24		::re::recursive::eval(instrs, ~~"abbbc"~~));
	27	::re::recursive::eval(instrs, &as_chars("abbbc")));
25	28	println!(" match(re, \"ac\")\t== {}",
26		::re::recursive::eval(instrs, ~~"ac"~~));
	29	::re::recursive::eval(instrs, &as_chars("ac")));
27	30	println!(" match(re, \"abd\")\t== {}",
28		::re::recursive::eval(instrs, ~~"abd"~~));
	31	::re::recursive::eval(instrs, &as_chars("abd")));
29	32
30	33	println!("Manual Stack:");
31	34	println!(" match(re, \"abbbc\")\t== {}",
32		::re::stack::eval(instrs, ~~"abbbc"~~));
	35	::re::stack::eval(instrs, &as_chars("abbbc")));
33	36	println!(" match(re, \"ac\")\t== {}",
34		::re::stack::eval(instrs, ~~"ac"~~));
	37	::re::stack::eval(instrs, &as_chars("ac")));
35	38	println!(" match(re, \"abd\")\t== {}",
36		::re::stack::eval(instrs, ~~"abd"~~));
	39	::re::stack::eval(instrs, &as_chars("abd")));
37	40	}

+52

-26

regexp/src/re/compile.rs less more

1	1	use re::instruction::Instr;
2		~~// use std::vec::Vec~~;
	2	use std::str::Chars;
3	3
4	4	/* A regular expression parse tree */
5		#[deriv~~ing(Show~~)]
	5	#[derive(Debug)]
6	6	enum Regexp {
7	7	Char(char),
8	8	Seq(Box<Regexp>, Box<Regexp>),
9	9	Chc(Box<Regexp>, Box<Regexp>),
10	10	Rep(Box<Regexp>),
	11	}
	12
	13	fn chr(c: char) -> Box<Regexp> {
	14	Box::new(Regexp::Char(c))
	15	}
	16
	17	fn seq(l: Box<Regexp>, r: Box<Regexp>) -> Box<Regexp> {
	18	Box::new(Regexp::Seq(l, r))
	19	}
	20
	21	fn chc(l: Box<Regexp>, r: Box<Regexp>) -> Box<Regexp> {
	22	Box::new(Regexp::Chc(l, r))
	23	}
	24
	25	fn rep(x: Box<Regexp>) -> Box<Regexp> {
	26	Box::new(Regexp::Rep(x))
11	27	}
12	28
13	29	/* We're assuming a prefix regexp here. That means that we have

19	35	* *\|c.ab
20	36	* This is easier to parse. Deal with it.
21	37	*/
22		fn parse<'a>(s: &'a str) -> (&'a str, Regexp) {
23		match s.char_at(0) {
24		'.' => { let (s1, r1) = parse(s.slice_from(1));
25		let (s2, r2) = parse(s1);
26		(s2, Regexp::Seq(box r1, box r2)) },
27		'\|' => { let (s1, r1) = parse(s.slice_from(1));
28		let (s2, r2) = parse(s1);
29		(s2, Regexp::Chc(box r1, box r2)) },
30		'*' => { let (s1, r1) = parse(s.slice_from(1));
31		(s1, Regexp::Rep(box r1)) },
32		c => (s.slice_from(1), Regexp::Char(c)),
	38	fn parse<'a>(s: &'a mut Chars<'a>) -> (&'a mut Chars<'a>, Box<Regexp>) {
	39	match s.next() {
	40	Some('.') => { let (s1, r1) = parse(s);
	41	let (s2, r2) = parse(s1);
	42	(s2, seq(r1, r2)) },
	43	Some('\|') => { let (s1, r1) = parse(s);
	44	let (s2, r2) = parse(s1);
	45	(s2, chc(r1, r2)) },
	46	Some('*') => { let (s1, r1) = parse(s);
	47	(s1, rep(r1)) },
	48	Some(c) => (s, chr(c)),
	49	None => panic!("Unexpected EOF"),
	50	}
	51	}
	52
	53	/* This should eventually be added to a stable API, but right now
	54	* isn't available in the stable stdlib. */
	55	fn push_all<'a, A, I>(target: &mut Vec<A>, source: I)
	56	where A: Clone + 'a, I: Iterator<Item=&'a A> {
	57	for x in source {
	58	target.push(x.clone());
33	59	}
34	60	}
35	61

38	64	* vector (so that subsequent instructions to be added
39	65	* know what pc to use) and the vector of instructions.
40	66	*/
41		fn emit(regexp: &Regexp, pc: u~~int) -> (uint~~, Vec<Instr>) {
	67	fn emit(regexp: &Regexp, pc: usize) -> (usize, Vec<Instr>) {
42	68	match *regexp {
43	69	/* For a match, we produce this code:
44	70	* ---- <- pc

53	79	* \| [[ second ]]
54	80	* ---- <- second_pc
55	81	*/
56		Regexp::Seq(~~box ref first, box~~ ref second) =>
	82	Regexp::Seq(ref first, ref second) =>
57	83	{ let (first_pc, mut v1) = emit(first, pc);
58	84	let (second_pc, v2) = emit(second, first_pc);
59		~~v1.push_all(v2.as_slice~~());
	85	push_all(&mut v1, v2.iter());
60	86	(second_pc, v1)
61	87	},
62	88	/* For a choice, we produce this code:

70	96	* \| [[ second ]]
71	97	* ---- <- second_pc
72	98	*/
73		Regexp::Chc(~~box ref first, box~~ ref second) =>
	99	Regexp::Chc(ref first, ref second) =>
74	100	{ let (first_pc, v1) = emit(first, pc + 1);
75	101	let (second_pc, v2) = emit(second, first_pc + 1);
76	102	let mut split_instr = vec![ Instr::Split(pc + 1, first_pc + 1) ];
77	103	let jmp_instr = vec![ Instr::Jmp(second_pc) ];
78		split_instr.push_all(v1.as_slice());
79		split_instr.push_all(jmp_instr.as_slice());
80		~~split_instr.push_all(v2.as_slice~~());
	104	push_all(&mut split_instr, v1.iter());
	105	push_all(&mut split_instr, jmp_instr.iter());
	106	push_all(&mut split_instr, v2.iter());
81	107	(second_pc, split_instr)
82	108	},
83	109	/* For a repetition, we produce this code:

89	115	* \| IJmp(pc)
90	116	* ---- <- expr_pc + 1
91	117	*/
92		Regexp::Rep(~~box~~ ref expr) =>
	118	Regexp::Rep(ref expr) =>
93	119	{ let (expr_pc, v1) = emit(expr, pc + 1);
94	120	let mut spl = vec![ Instr::Split(pc + 1, expr_pc + 1) ];
95	121	let jmp = vec![ Instr::Jmp(pc) ];
96		spl.push_all(v1.as_slice());
97		spl.push_all(jmp.as_slice());
	122	push_all(&mut spl, v1.iter());
	123	push_all(&mut spl, jmp.iter());
98	124	(expr_pc + 1, spl)
99	125	},
100	126	}

102	128
103	129	/* A wrapper over these processes */
104	130	pub fn compile(s: &str) -> Vec<Instr> {
105		let (_, re) = parse(s);
106		println!("{}", re);
	131	let (_, re) = parse(&mut s.chars());
	132	println!("{:?}", re);
107	133	let (_, mut ins) = emit(&re, 0);
108		println!("{}", ins);
	134	println!("{:?}", ins);
109	135	/* If we get to the end of a compiled regular expression,
110	136	* that means it hasn't aborted and we can match.
111	137	*/

-4

regexp/src/re/instruction.rs less more

1	1	/* A single instruction as used in the VM-based matcher */
2		#[deriv~~ing(Clone,Show~~)]
	2	#[derive(Clone,Debug)]
3	3	pub enum Instr {
4	4	Char(char), /* match a character or fail */
5	5	Match, /* match anything successfully */
6		Jmp(uint) , /* jump to instr i */
7		Split(uint, uint), /* try both instrs i and j */
8		}⏎
	6	Jmp(usize), /* jump to instr i */
	7	Split(usize, usize), /* try both instrs i and j */
	8	}

-3

regexp/src/re/recursive.rs less more

2	2
3	3	/* We wrap the real evaluation function, as we're always going to
4	4	* start executing instruction 0 with no string matched. */
5		pub fn eval(instrs: &[Instr], input: &~~str~~) -> bool {
	5	pub fn eval(instrs: &[Instr], input: &[char]) -> bool {
6	6	eval1(instrs, input, 0, 0)
7	7	}
8	8

11	11	* a string we're matching over, the current program counter
12	12	* in the instructions, and the current point to which we've
13	13	* traversed the string. */
14		fn eval1(instrs: &[Instr], input: &~~str, pc: uint, cc: uint~~) -> bool {
	14	fn eval1(instrs: &[Instr], input: &[char], pc: usize, cc: usize) -> bool {
15	15	match instrs[pc] {
16	16	Instr::Char(_) if cc >= input.len() => return false,
17		Instr::Char(c) if c == input~~.char_at(cc)~~ =>
	17	Instr::Char(c) if c == input[cc] =>
18	18	eval1(instrs, input, pc + 1, cc + 1),
19	19	Instr::Char(_) => return false,
20	20	Instr::Match => return true,

-3

regexp/src/re/stack.rs less more

2	2
3	3	/* The state of a program can be unambiguously specified by
4	4	* a current instruction and a current position in the string. */
5		struct EvalState { pc: u~~int, cc: uint~~ }
	5	struct EvalState { pc: usize, cc: usize }
6	6
7	7	/* An evaluator that maintains a manual, mutable stack for doing
8	8	* regular-expression matching. */
9		pub fn eval(instrs: &[Instr], input: &~~str~~) -> bool {
	9	pub fn eval(instrs: &[Instr], input: &[char]) -> bool {
10	10	let mut stack = vec![ EvalState {pc: 0, cc: 0} ];
11	11
12	12	/* Every time we find that a possibility is impossible, we

21	21	match instrs[st.pc] {
22	22	Instr::Char(_) if st.cc >= input.len() =>
23	23	continue,
24		Instr::Char(c) if c == input~~.char_at(st.cc)~~ =>
	24	Instr::Char(c) if c == input[st.cc] =>
25	25	stack.push(EvalState { pc: st.pc + 1, cc: st.cc + 1 }),
26	26	Instr::Char(_) =>
27	27	continue,