Updated regexp example to modern Rust as well
Getty Ritter
11 years ago
| 1 | 1 | use re::instruction::{Instr,IChar,IMatch,IJmp,ISplit}; |
| 2 | use std::vec::append; | |
| 3 | mod instruction; | |
| 2 | // use std::vec::Vec; | |
| 4 | 3 | |
| 5 | 4 | /* A regular expression parse tree */ |
| 5 | #[deriving(Show)] | |
| 6 | 6 | enum Regexp { |
| 7 | 7 | RChar(char), |
| 8 | RSeq(~Regexp, ~Regexp), | |
| 9 | RChc(~Regexp, ~Regexp), | |
| 10 |
R |
|
| 8 | RSeq(Box<Regexp>, Box<Regexp>), | |
| 9 | RChc(Box<Regexp>, Box<Regexp>), | |
| 10 | RRep(Box<Regexp>), | |
| 11 | 11 | } |
| 12 | 12 | |
| 13 | 13 | /* We're assuming a prefix regexp here. That means that we have |
| 23 | 23 | match s.char_at(0) { |
| 24 | 24 | '.' => { let (s1, r1) = parse(s.slice_from(1)); |
| 25 | 25 | let (s2, r2) = parse(s1); |
| 26 |
(s2, RSeq( |
|
| 26 | (s2, RSeq(box r1, box r2)) }, | |
| 27 | 27 | '|' => { let (s1, r1) = parse(s.slice_from(1)); |
| 28 | 28 | let (s2, r2) = parse(s1); |
| 29 |
(s2, RChc( |
|
| 29 | (s2, RChc(box r1, box r2)) }, | |
| 30 | 30 | '*' => { let (s1, r1) = parse(s.slice_from(1)); |
| 31 |
(s1, RRep( |
|
| 31 | (s1, RRep(box r1)) }, | |
| 32 | 32 | c => (s.slice_from(1), RChar(c)), |
| 33 | 33 | } |
| 34 | 34 | } |
| 35 | 35 | |
| 36 | 36 | /* Compiling an AST for regexps to the instructions */ |
| 37 |
fn emit(r: &Regexp, i: uint) -> (uint, |
|
| 37 | fn emit(r: &Regexp, i: uint) -> (uint, Vec<Instr>) { | |
| 38 | 38 | match *r { |
| 39 | RChar(c) => { (i+1, ~[IChar(c)]) }, | |
| 40 | RSeq(ref a, ref b) => | |
| 41 | { let (ai, v1) = emit(*a, i); | |
| 42 | let (bi, v2) = emit(*b, ai); | |
| 43 | (bi, append(v1, v2)) }, | |
| 44 | RChc(ref a, ref b) => | |
| 45 | { let (ai, v1) = emit(*a, i + 1); | |
| 46 | let (bi, v2) = emit(*b, ai + 1); | |
| 47 | let spl = ~[ ISplit(i + 1, ai + 1) ]; | |
| 48 | let jmp = ~[ IJmp(ai) ]; | |
| 49 | (bi, append(spl, append(v1, append(jmp, v2)))) }, | |
| 50 | RRep(ref a) => | |
| 51 | { let (ai, v1) = emit(*a, i + 1); | |
| 52 | let spl = ~[ ISplit(i + 1, ai + 1) ]; | |
| 53 | let jmp = ~[ IJmp(i) ]; | |
| 54 | (ai + 1, append(spl, append(v1, jmp))) }, | |
| 39 | RChar(c) => { (i+1, vec![IChar(c)]) }, | |
| 40 | RSeq(box ref a, box ref b) => | |
| 41 | { let (ai, mut v1) = emit(a, i); | |
| 42 | let (bi, v2) = emit(b, ai); | |
| 43 | v1.push_all_move(v2); | |
| 44 | (bi, v1) }, | |
| 45 | RChc(box ref a, box ref b) => | |
| 46 | { let (ai, v1) = emit(a, i + 1); | |
| 47 | let (bi, v2) = emit(b, ai + 1); | |
| 48 | let mut spl = vec![ ISplit(i + 1, ai + 1) ]; | |
| 49 | let jmp = vec![ IJmp(ai) ]; | |
| 50 | spl.push_all_move(v1); | |
| 51 | spl.push_all_move(jmp); | |
| 52 | spl.push_all_move(v2); | |
| 53 | (bi, spl) }, | |
| 54 | RRep(box ref a) => | |
| 55 | { let (ai, v1) = emit(a, i + 1); | |
| 56 | let mut spl = vec![ ISplit(i + 1, ai + 1) ]; | |
| 57 | let jmp = vec![ IJmp(i) ]; | |
| 58 | spl.push_all_move(v1); | |
| 59 | spl.push_all_move(jmp); | |
| 60 | (ai + 1, spl) }, | |
| 55 | 61 | } |
| 56 | 62 | } |
| 57 | 63 | |
| 58 | 64 | /* A wrapper over these processes */ |
| 59 |
pub fn compile(s: &str) -> |
|
| 65 | pub fn compile(s: &str) -> Vec<Instr> { | |
| 60 | 66 | let (_, re) = parse(s); |
| 61 |
println!("{ |
|
| 67 | println!("{}", re); | |
| 62 | 68 | let (_, ins) = emit(&re, 0); |
| 63 | println!("{:?}", ins); | |
| 64 | return append(ins, [IMatch]); | |
| 69 | println!("{}", ins); | |
| 70 | return ins.append([IMatch]); | |
| 65 | 71 | } |
| 1 | 1 | /* A single instruction as used in the VM-based matcher */ |
| 2 |
#[deriving(Clone |
|
| 2 | #[deriving(Clone,Show)] | |
| 3 | 3 | pub enum Instr { |
| 4 | 4 | IChar(char), /* match a character or fail */ |
| 5 | 5 | IMatch, /* match anything successfully */ |
| 1 | 1 | use re::instruction::{Instr,IChar,IMatch,IJmp,ISplit}; |
| 2 | mod instruction; | |
| 3 | 2 | |
| 4 | 3 | /* We wrap the real evaluation function, as we're always going to |
| 5 | 4 | * start executing instruction 0 with no string matched. */ |
| 1 | 1 | use re::instruction::{Instr,IChar,IMatch,IJmp,ISplit}; |
| 2 | mod instruction; | |
| 3 | 2 | |
| 4 | 3 | /* The state of a program can be unambiguously specified by |
| 5 | 4 | * a current instruction and a current position in the string. */ |
| 8 | 7 | /* An evaluator that maintains a manual, mutable stack for doing |
| 9 | 8 | * regular-expression matching. */ |
| 10 | 9 | pub fn eval(instrs: &[Instr], input: &str) -> bool { |
| 11 |
let mut stack = |
|
| 10 | let mut stack = vec![ EvalState {pc: 0, cc: 0} ]; | |
| 12 | 11 | |
| 13 | 12 | while stack.len() > 0 { |
| 14 |
let st = stack.pop() |
|
| 13 | let st = stack.pop().unwrap(); | |
| 15 | 14 | match instrs[st.pc] { |
| 16 | 15 | IChar(_) if st.cc >= input.len() => |
| 17 | 16 | continue, |
| 14 | 14 | /* our sample regexp corresponds to /ab*c/ in |
| 15 | 15 | * the usual notation. */ |
| 16 | 16 | let re = compile("..a*bc"); |
| 17 |
println |
|
| 17 | println!("Recursive:"); | |
| 18 | 18 | println!(" match(re, \"abbbc\")\t== {}", |
| 19 |
::re::recursive::eval(re |
|
| 19 | ::re::recursive::eval(re.as_slice(), "abbbc")); | |
| 20 | 20 | println!(" match(re, \"ac\")\t== {}", |
| 21 |
::re::recursive::eval(re |
|
| 21 | ::re::recursive::eval(re.as_slice(), "ac")); | |
| 22 | 22 | println!(" match(re, \"abd\")\t== {}", |
| 23 | ::re::recursive::eval(re, "abd")); | |
| 24 | println("Manual Stack:"); | |
| 23 | ::re::recursive::eval(re.as_slice(), "abd")); | |
| 24 | println!("Manual Stack:"); | |
| 25 | 25 | println!(" match(re, \"abbbc\")\t== {}", |
| 26 |
::re::stack::eval(re |
|
| 26 | ::re::stack::eval(re.as_slice(), "abbbc")); | |
| 27 | 27 | println!(" match(re, \"ac\")\t== {}", |
| 28 |
::re::stack::eval(re |
|
| 28 | ::re::stack::eval(re.as_slice(), "ac")); | |
| 29 | 29 | println!(" match(re, \"abd\")\t== {}", |
| 30 |
::re::stack::eval(re |
|
| 30 | ::re::stack::eval(re.as_slice(), "abd")); | |
| 31 | 31 | } |