1 | |
use re::instruction::{Instr,IChar,IMatch,IJmp,ISplit};
|
| 1 |
use re::instruction::Instr;
|
2 | 2 |
// use std::vec::Vec;
|
3 | 3 |
|
4 | 4 |
/* A regular expression parse tree */
|
5 | 5 |
#[deriving(Show)]
|
6 | 6 |
enum Regexp {
|
7 | |
RChar(char),
|
8 | |
RSeq(Box<Regexp>, Box<Regexp>),
|
9 | |
RChc(Box<Regexp>, Box<Regexp>),
|
10 | |
RRep(Box<Regexp>),
|
| 7 |
Char(char),
|
| 8 |
Seq(Box<Regexp>, Box<Regexp>),
|
| 9 |
Chc(Box<Regexp>, Box<Regexp>),
|
| 10 |
Rep(Box<Regexp>),
|
11 | 11 |
}
|
12 | 12 |
|
13 | 13 |
/* We're assuming a prefix regexp here. That means that we have
|
|
23 | 23 |
match s.char_at(0) {
|
24 | 24 |
'.' => { let (s1, r1) = parse(s.slice_from(1));
|
25 | 25 |
let (s2, r2) = parse(s1);
|
26 | |
(s2, RSeq(box r1, box r2)) },
|
| 26 |
(s2, Regexp::Seq(box r1, box r2)) },
|
27 | 27 |
'|' => { let (s1, r1) = parse(s.slice_from(1));
|
28 | 28 |
let (s2, r2) = parse(s1);
|
29 | |
(s2, RChc(box r1, box r2)) },
|
| 29 |
(s2, Regexp::Chc(box r1, box r2)) },
|
30 | 30 |
'*' => { let (s1, r1) = parse(s.slice_from(1));
|
31 | |
(s1, RRep(box r1)) },
|
32 | |
c => (s.slice_from(1), RChar(c)),
|
| 31 |
(s1, Regexp::Rep(box r1)) },
|
| 32 |
c => (s.slice_from(1), Regexp::Char(c)),
|
33 | 33 |
}
|
34 | 34 |
}
|
35 | 35 |
|
|
45 | 45 |
* | IChar(chr)
|
46 | 46 |
* ---- <- pc + 1
|
47 | 47 |
*/
|
48 | |
RChar(chr) => { (pc+1, vec![IChar(chr)]) },
|
| 48 |
Regexp::Char(chr) => { (pc+1, vec![Instr::Char(chr)]) },
|
49 | 49 |
/* For a sequencing, we produce this code:
|
50 | 50 |
* ---- <- pc
|
51 | 51 |
* | [[ first ]]
|
|
53 | 53 |
* | [[ second ]]
|
54 | 54 |
* ---- <- second_pc
|
55 | 55 |
*/
|
56 | |
RSeq(box ref first, box ref second) =>
|
| 56 |
Regexp::Seq(box ref first, box ref second) =>
|
57 | 57 |
{ let (first_pc, mut v1) = emit(first, pc);
|
58 | 58 |
let (second_pc, v2) = emit(second, first_pc);
|
59 | |
v1.push_all_move(v2);
|
| 59 |
v1.push_all(v2.as_slice());
|
60 | 60 |
(second_pc, v1)
|
61 | 61 |
},
|
62 | 62 |
/* For a choice, we produce this code:
|
|
70 | 70 |
* | [[ second ]]
|
71 | 71 |
* ---- <- second_pc
|
72 | 72 |
*/
|
73 | |
RChc(box ref first, box ref second) =>
|
| 73 |
Regexp::Chc(box ref first, box ref second) =>
|
74 | 74 |
{ let (first_pc, v1) = emit(first, pc + 1);
|
75 | 75 |
let (second_pc, v2) = emit(second, first_pc + 1);
|
76 | |
let mut split_instr = vec![ ISplit(pc + 1, first_pc + 1) ];
|
77 | |
let jmp_instr = vec![ IJmp(second_pc) ];
|
78 | |
split_instr.push_all_move(v1);
|
79 | |
split_instr.push_all_move(jmp_instr);
|
80 | |
split_instr.push_all_move(v2);
|
| 76 |
let mut split_instr = vec![ Instr::Split(pc + 1, first_pc + 1) ];
|
| 77 |
let jmp_instr = vec![ Instr::Jmp(second_pc) ];
|
| 78 |
split_instr.push_all(v1.as_slice());
|
| 79 |
split_instr.push_all(jmp_instr.as_slice());
|
| 80 |
split_instr.push_all(v2.as_slice());
|
81 | 81 |
(second_pc, split_instr)
|
82 | 82 |
},
|
83 | 83 |
/* For a repetition, we produce this code:
|
|
89 | 89 |
* | IJmp(pc)
|
90 | 90 |
* ---- <- expr_pc + 1
|
91 | 91 |
*/
|
92 | |
RRep(box ref expr) =>
|
| 92 |
Regexp::Rep(box ref expr) =>
|
93 | 93 |
{ let (expr_pc, v1) = emit(expr, pc + 1);
|
94 | |
let mut spl = vec![ ISplit(pc + 1, expr_pc + 1) ];
|
95 | |
let jmp = vec![ IJmp(pc) ];
|
96 | |
spl.push_all_move(v1);
|
97 | |
spl.push_all_move(jmp);
|
| 94 |
let mut spl = vec![ Instr::Split(pc + 1, expr_pc + 1) ];
|
| 95 |
let jmp = vec![ Instr::Jmp(pc) ];
|
| 96 |
spl.push_all(v1.as_slice());
|
| 97 |
spl.push_all(jmp.as_slice());
|
98 | 98 |
(expr_pc + 1, spl)
|
99 | 99 |
},
|
100 | 100 |
}
|
|
104 | 104 |
pub fn compile(s: &str) -> Vec<Instr> {
|
105 | 105 |
let (_, re) = parse(s);
|
106 | 106 |
println!("{}", re);
|
107 | |
let (_, ins) = emit(&re, 0);
|
| 107 |
let (_, mut ins) = emit(&re, 0);
|
108 | 108 |
println!("{}", ins);
|
109 | 109 |
/* If we get to the end of a compiled regular expression,
|
110 | 110 |
* that means it hasn't aborted and we can match.
|
111 | 111 |
*/
|
112 | |
return ins.append([IMatch]);
|
| 112 |
ins.push(Instr::Match);
|
| 113 |
return ins;
|
113 | 114 |
}
|