Commit a271cb122c8e174207445e333754254d39f7cb47 - rust-examples

+5

-0

regexp/Cargo.toml less more

	1	[package]
	2
	3	name = "regexp"
	4	version = "0.0.2"
	5	authors = [ "gdritter@galois.com" ]

+0

-1

~~regexp/clean.do~~ less more

1

if [ -e regexp ]; then rm regexp; fi

+0

-1

~~regexp/default.do~~ less more

1

redo-ifchange regexp

+0

-71

~~regexp/re/compile.rs~~ less more

1		use re::instruction::{Instr,IChar,IMatch,IJmp,ISplit};
2		// use std::vec::Vec;
3
4		/* A regular expression parse tree */
5		#[deriving(Show)]
6		enum Regexp {
7		RChar(char),
8		RSeq(Box<Regexp>, Box<Regexp>),
9		RChc(Box<Regexp>, Box<Regexp>),
10		RRep(Box<Regexp>),
11		}
12
13		/* We're assuming a prefix regexp here. That means that we have
14		* the following operators:
15		* .ab => ab
16		* \|ab => a\|b
17		* a => a
18		* but these nest, so (ab\|c)* would become
19		* *\|c.ab
20		* This is easier to parse. Deal with it.
21		*/
22		fn parse<'a>(s: &'a str) -> (&'a str, Regexp) {
23		match s.char_at(0) {
24		'.' => { let (s1, r1) = parse(s.slice_from(1));
25		let (s2, r2) = parse(s1);
26		(s2, RSeq(box r1, box r2)) },
27		'\|' => { let (s1, r1) = parse(s.slice_from(1));
28		let (s2, r2) = parse(s1);
29		(s2, RChc(box r1, box r2)) },
30		'*' => { let (s1, r1) = parse(s.slice_from(1));
31		(s1, RRep(box r1)) },
32		c => (s.slice_from(1), RChar(c)),
33		}
34		}
35
36		/* Compiling an AST for regexps to the instructions */
37		fn emit(r: &Regexp, i: uint) -> (uint, Vec<Instr>) {
38		match *r {
39		RChar(c) => { (i+1, vec![IChar(c)]) },
40		RSeq(box ref a, box ref b) =>
41		{ let (ai, mut v1) = emit(a, i);
42		let (bi, v2) = emit(b, ai);
43		v1.push_all_move(v2);
44		(bi, v1) },
45		RChc(box ref a, box ref b) =>
46		{ let (ai, v1) = emit(a, i + 1);
47		let (bi, v2) = emit(b, ai + 1);
48		let mut spl = vec![ ISplit(i + 1, ai + 1) ];
49		let jmp = vec![ IJmp(ai) ];
50		spl.push_all_move(v1);
51		spl.push_all_move(jmp);
52		spl.push_all_move(v2);
53		(bi, spl) },
54		RRep(box ref a) =>
55		{ let (ai, v1) = emit(a, i + 1);
56		let mut spl = vec![ ISplit(i + 1, ai + 1) ];
57		let jmp = vec![ IJmp(i) ];
58		spl.push_all_move(v1);
59		spl.push_all_move(jmp);
60		(ai + 1, spl) },
61		}
62		}
63
64		/* A wrapper over these processes */
65		pub fn compile(s: &str) -> Vec<Instr> {
66		let (_, re) = parse(s);
67		println!("{}", re);
68		let (_, ins) = emit(&re, 0);
69		println!("{}", ins);
70		return ins.append([IMatch]);
71		}

+0

-8

~~regexp/re/instruction.rs~~ less more

1		/* A single instruction as used in the VM-based matcher */
2		#[deriving(Clone,Show)]
3		pub enum Instr {
4		IChar(char), /* match a character or fail */
5		IMatch, /* match anything successfully */
6		IJmp(uint) , /* jump to instr i */
7		ISplit(uint, uint), /* try both instrs i and j */
8		}

+0

-8

~~regexp/re/mod.rs~~ less more

1		pub use re::compile::compile;
2		pub use re::instruction::{Instr,IChar,IMatch,IJmp,ISplit};
3		pub use re::recursive::eval;
4		pub use re::stack::eval;
5		pub mod compile;
6		pub mod instruction;
7		pub mod recursive;
8		pub mod stack;

+0

-22

~~regexp/re/recursive.rs~~ less more

1		use re::instruction::{Instr,IChar,IMatch,IJmp,ISplit};
2
3		/* We wrap the real evaluation function, as we're always going to
4		* start executing instruction 0 with no string matched. */
5		pub fn eval(instrs: &[Instr], input: &str) -> bool {
6		eval1(instrs, input, 0, 0)
7		}
8
9		/* We use the Rust stack as our stack in this naive recursive
10		* implementation. */
11		fn eval1(instrs: &[Instr], input: &str, pc: uint, cc: uint) -> bool {
12		match instrs[pc] {
13		IChar(_) if cc >= input.len() => return false,
14		IChar(c) if c == input.char_at(cc) =>
15		eval1(instrs, input, pc + 1, cc + 1),
16		IChar(_) => return false,
17		IMatch => return true,
18		IJmp(i) => eval1(instrs, input, i, cc),
19		ISplit(i, _) if eval1(instrs, input, i, cc) => true,
20		ISplit(_, j) => eval1(instrs, input, j, cc),
21		}
22		}

+0

-32

~~regexp/re/stack.rs~~ less more

1		use re::instruction::{Instr,IChar,IMatch,IJmp,ISplit};
2
3		/* The state of a program can be unambiguously specified by
4		* a current instruction and a current position in the string. */
5		struct EvalState { pc: uint, cc: uint }
6
7		/* An evaluator that maintains a manual, mutable stack for doing
8		* regular-expression matching. */
9		pub fn eval(instrs: &[Instr], input: &str) -> bool {
10		let mut stack = vec![ EvalState {pc: 0, cc: 0} ];
11
12		while stack.len() > 0 {
13		let st = stack.pop().unwrap();
14		match instrs[st.pc] {
15		IChar(_) if st.cc >= input.len() =>
16		continue,
17		IChar(c) if c == input.char_at(st.cc) =>
18		stack.push(EvalState { pc: st.pc + 1, cc: st.cc + 1 }),
19		IChar(_) =>
20		continue,
21		IMatch =>
22		return true,
23		IJmp(i) =>
24		stack.push(EvalState { pc: i, cc: st.cc }),
25		ISplit(i, j) => {
26		stack.push(EvalState { pc: j, cc: st.cc });
27		stack.push(EvalState { pc: i, cc: st.cc });
28		},
29		}
30		}
31		return false;
32		}

+0

-3

~~regexp/regexp.do~~ less more

1		DEPS="regexp.rs re/compile.rs re/instruction.rs re/mod.rs re/recursive.rs re/stack.rs"
2		redo-ifchange $DEPS
3		rustc regexp.rs -o $3

+0

-31

~~regexp/regexp.rs~~ less more

1		/* This is a basic implementation of a regular expression matcher,
2		* based on Henry Spencer's virtual-machine approach to regular
3		* expression matching outlined by Russ Cox here:
4		* http://swtch.com/~rsc/regexp/regexp2.html
5		*
6		* For ease of parsing, I'm using a highly non-standard Polish
7		* notation for regular expressions, in which . and \| are
8		* prefix binary operators for catenation and choice, respectively,
9		* and * is a prefix unary operator for repetition. */
10		use re::compile;
11		mod re;
12
13		fn main() {
14		/* our sample regexp corresponds to /ab*c/ in
15		* the usual notation. */
16		let re = compile("..a*bc");
17		println!("Recursive:");
18		println!(" match(re, \"abbbc\")\t== {}",
19		::re::recursive::eval(re.as_slice(), "abbbc"));
20		println!(" match(re, \"ac\")\t== {}",
21		::re::recursive::eval(re.as_slice(), "ac"));
22		println!(" match(re, \"abd\")\t== {}",
23		::re::recursive::eval(re.as_slice(), "abd"));
24		println!("Manual Stack:");
25		println!(" match(re, \"abbbc\")\t== {}",
26		::re::stack::eval(re.as_slice(), "abbbc"));
27		println!(" match(re, \"ac\")\t== {}",
28		::re::stack::eval(re.as_slice(), "ac"));
29		println!(" match(re, \"abd\")\t== {}",
30		::re::stack::eval(re.as_slice(), "abd"));
31		}

+31

-0

regexp/src/main.rs less more

	1	/* This is a basic implementation of a regular expression matcher,
	2	* based on Henry Spencer's virtual-machine approach to regular
	3	* expression matching outlined by Russ Cox here:
	4	* http://swtch.com/~rsc/regexp/regexp2.html
	5	*
	6	* For ease of parsing, I'm using a highly non-standard Polish
	7	* notation for regular expressions, in which . and \| are
	8	* prefix binary operators for catenation and choice, respectively,
	9	* and * is a prefix unary operator for repetition. */
	10	use re::compile;
	11	mod re;
	12
	13	fn main() {
	14	/* our sample regexp corresponds to /ab*c/ in
	15	* the usual notation. */
	16	let re = compile("..a*bc");
	17	println!("Recursive:");
	18	println!(" match(re, \"abbbc\")\t== {}",
	19	::re::recursive::eval(re.as_slice(), "abbbc"));
	20	println!(" match(re, \"ac\")\t== {}",
	21	::re::recursive::eval(re.as_slice(), "ac"));
	22	println!(" match(re, \"abd\")\t== {}",
	23	::re::recursive::eval(re.as_slice(), "abd"));
	24	println!("Manual Stack:");
	25	println!(" match(re, \"abbbc\")\t== {}",
	26	::re::stack::eval(re.as_slice(), "abbbc"));
	27	println!(" match(re, \"ac\")\t== {}",
	28	::re::stack::eval(re.as_slice(), "ac"));
	29	println!(" match(re, \"abd\")\t== {}",
	30	::re::stack::eval(re.as_slice(), "abd"));
	31	}

+71

-0

regexp/src/re/compile.rs less more

	1	use re::instruction::{Instr,IChar,IMatch,IJmp,ISplit};
	2	// use std::vec::Vec;
	3
	4	/* A regular expression parse tree */
	5	#[deriving(Show)]
	6	enum Regexp {
	7	RChar(char),
	8	RSeq(Box<Regexp>, Box<Regexp>),
	9	RChc(Box<Regexp>, Box<Regexp>),
	10	RRep(Box<Regexp>),
	11	}
	12
	13	/* We're assuming a prefix regexp here. That means that we have
	14	* the following operators:
	15	* .ab => ab
	16	* \|ab => a\|b
	17	* a => a
	18	* but these nest, so (ab\|c)* would become
	19	* *\|c.ab
	20	* This is easier to parse. Deal with it.
	21	*/
	22	fn parse<'a>(s: &'a str) -> (&'a str, Regexp) {
	23	match s.char_at(0) {
	24	'.' => { let (s1, r1) = parse(s.slice_from(1));
	25	let (s2, r2) = parse(s1);
	26	(s2, RSeq(box r1, box r2)) },
	27	'\|' => { let (s1, r1) = parse(s.slice_from(1));
	28	let (s2, r2) = parse(s1);
	29	(s2, RChc(box r1, box r2)) },
	30	'*' => { let (s1, r1) = parse(s.slice_from(1));
	31	(s1, RRep(box r1)) },
	32	c => (s.slice_from(1), RChar(c)),
	33	}
	34	}
	35
	36	/* Compiling an AST for regexps to the instructions */
	37	fn emit(r: &Regexp, i: uint) -> (uint, Vec<Instr>) {
	38	match *r {
	39	RChar(c) => { (i+1, vec![IChar(c)]) },
	40	RSeq(box ref a, box ref b) =>
	41	{ let (ai, mut v1) = emit(a, i);
	42	let (bi, v2) = emit(b, ai);
	43	v1.push_all_move(v2);
	44	(bi, v1) },
	45	RChc(box ref a, box ref b) =>
	46	{ let (ai, v1) = emit(a, i + 1);
	47	let (bi, v2) = emit(b, ai + 1);
	48	let mut spl = vec![ ISplit(i + 1, ai + 1) ];
	49	let jmp = vec![ IJmp(ai) ];
	50	spl.push_all_move(v1);
	51	spl.push_all_move(jmp);
	52	spl.push_all_move(v2);
	53	(bi, spl) },
	54	RRep(box ref a) =>
	55	{ let (ai, v1) = emit(a, i + 1);
	56	let mut spl = vec![ ISplit(i + 1, ai + 1) ];
	57	let jmp = vec![ IJmp(i) ];
	58	spl.push_all_move(v1);
	59	spl.push_all_move(jmp);
	60	(ai + 1, spl) },
	61	}
	62	}
	63
	64	/* A wrapper over these processes */
	65	pub fn compile(s: &str) -> Vec<Instr> {
	66	let (_, re) = parse(s);
	67	println!("{}", re);
	68	let (_, ins) = emit(&re, 0);
	69	println!("{}", ins);
	70	return ins.append([IMatch]);
	71	}

+8

-0

regexp/src/re/instruction.rs less more

	1	/* A single instruction as used in the VM-based matcher */
	2	#[deriving(Clone,Show)]
	3	pub enum Instr {
	4	IChar(char), /* match a character or fail */
	5	IMatch, /* match anything successfully */
	6	IJmp(uint) , /* jump to instr i */
	7	ISplit(uint, uint), /* try both instrs i and j */
	8	}

+8

-0

regexp/src/re/mod.rs less more

	1	pub use re::compile::compile;
	2	pub use re::instruction::{Instr,IChar,IMatch,IJmp,ISplit};
	3	pub use re::recursive::eval;
	4	pub use re::stack::eval;
	5	pub mod compile;
	6	pub mod instruction;
	7	pub mod recursive;
	8	pub mod stack;

+22

-0

regexp/src/re/recursive.rs less more

	1	use re::instruction::{Instr,IChar,IMatch,IJmp,ISplit};
	2
	3	/* We wrap the real evaluation function, as we're always going to
	4	* start executing instruction 0 with no string matched. */
	5	pub fn eval(instrs: &[Instr], input: &str) -> bool {
	6	eval1(instrs, input, 0, 0)
	7	}
	8
	9	/* We use the Rust stack as our stack in this naive recursive
	10	* implementation. */
	11	fn eval1(instrs: &[Instr], input: &str, pc: uint, cc: uint) -> bool {
	12	match instrs[pc] {
	13	IChar(_) if cc >= input.len() => return false,
	14	IChar(c) if c == input.char_at(cc) =>
	15	eval1(instrs, input, pc + 1, cc + 1),
	16	IChar(_) => return false,
	17	IMatch => return true,
	18	IJmp(i) => eval1(instrs, input, i, cc),
	19	ISplit(i, _) if eval1(instrs, input, i, cc) => true,
	20	ISplit(_, j) => eval1(instrs, input, j, cc),
	21	}
	22	}

+32

-0

regexp/src/re/stack.rs less more

	1	use re::instruction::{Instr,IChar,IMatch,IJmp,ISplit};
	2
	3	/* The state of a program can be unambiguously specified by
	4	* a current instruction and a current position in the string. */
	5	struct EvalState { pc: uint, cc: uint }
	6
	7	/* An evaluator that maintains a manual, mutable stack for doing
	8	* regular-expression matching. */
	9	pub fn eval(instrs: &[Instr], input: &str) -> bool {
	10	let mut stack = vec![ EvalState {pc: 0, cc: 0} ];
	11
	12	while stack.len() > 0 {
	13	let st = stack.pop().unwrap();
	14	match instrs[st.pc] {
	15	IChar(_) if st.cc >= input.len() =>
	16	continue,
	17	IChar(c) if c == input.char_at(st.cc) =>
	18	stack.push(EvalState { pc: st.pc + 1, cc: st.cc + 1 }),
	19	IChar(_) =>
	20	continue,
	21	IMatch =>
	22	return true,
	23	IJmp(i) =>
	24	stack.push(EvalState { pc: i, cc: st.cc }),
	25	ISplit(i, j) => {
	26	stack.push(EvalState { pc: j, cc: st.cc });
	27	stack.push(EvalState { pc: i, cc: st.cc });
	28	},
	29	}
	30	}
	31	return false;
	32	}