Fixed bug and changed names
Getty Ritter
11 years ago
| 12 | 12 | |
| 13 | 13 | fn main() { |
| 14 | 14 | /* our sample regexp corresponds to /ab*c/ in |
| 15 | * the usual notation. */ | |
| 16 | let re = compile("..a*bc"); | |
| 15 | * the usual notation. | |
| 16 | * These two lines can be collapsed into one once | |
| 17 | * this RFC lands: https://github.com/rust-lang/rfcs/pull/66 | |
| 18 | */ | |
| 19 | let regexp = compile("..a*bc"); | |
| 20 | let instrs = regexp.as_slice(); | |
| 21 | ||
| 17 | 22 | println!("Recursive:"); |
| 18 | 23 | println!(" match(re, \"abbbc\")\t== {}", |
| 19 |
::re::recursive::eval( |
|
| 24 | ::re::recursive::eval(instrs, "abbbc")); | |
| 20 | 25 | println!(" match(re, \"ac\")\t== {}", |
| 21 |
::re::recursive::eval( |
|
| 26 | ::re::recursive::eval(instrs, "ac")); | |
| 22 | 27 | println!(" match(re, \"abd\")\t== {}", |
| 23 |
::re::recursive::eval( |
|
| 28 | ::re::recursive::eval(instrs, "abd")); | |
| 29 | ||
| 24 | 30 | println!("Manual Stack:"); |
| 25 | 31 | println!(" match(re, \"abbbc\")\t== {}", |
| 26 |
::re::stack::eval( |
|
| 32 | ::re::stack::eval(instrs, "abbbc")); | |
| 27 | 33 | println!(" match(re, \"ac\")\t== {}", |
| 28 |
::re::stack::eval( |
|
| 34 | ::re::stack::eval(instrs, "ac")); | |
| 29 | 35 | println!(" match(re, \"abd\")\t== {}", |
| 30 |
::re::stack::eval( |
|
| 36 | ::re::stack::eval(instrs, "abd")); | |
| 31 | 37 | } |
| 33 | 33 | } |
| 34 | 34 | } |
| 35 | 35 | |
| 36 | /* Compiling an AST for regexps to the instructions */ | |
| 37 | fn emit(r: &Regexp, i: uint) -> (uint, Vec<Instr>) { | |
| 38 | match *r { | |
| 39 | RChar(c) => { (i+1, vec![IChar(c)]) }, | |
| 40 | RSeq(box ref a, box ref b) => | |
| 41 | { let (ai, mut v1) = emit(a, i); | |
| 42 | let (bi, v2) = emit(b, ai); | |
| 36 | /* Compiling an AST for regexps to the instructions. | |
| 37 | * The return values correspond to the length of the | |
| 38 | * vector (so that subsequent instructions to be added | |
| 39 | * know what pc to use) and the vector of instructions. | |
| 40 | */ | |
| 41 | fn emit(regexp: &Regexp, pc: uint) -> (uint, Vec<Instr>) { | |
| 42 | match *regexp { | |
| 43 | /* For a match, we produce this code: | |
| 44 | * ---- <- pc | |
| 45 | * | IChar(chr) | |
| 46 | * ---- <- pc + 1 | |
| 47 | */ | |
| 48 | RChar(chr) => { (pc+1, vec![IChar(chr)]) }, | |
| 49 | /* For a sequencing, we produce this code: | |
| 50 | * ---- <- pc | |
| 51 | * | [[ first ]] | |
| 52 | * ---- <- first_pc | |
| 53 | * | [[ second ]] | |
| 54 | * ---- <- second_pc | |
| 55 | */ | |
| 56 | RSeq(box ref first, box ref second) => | |
| 57 | { let (first_pc, mut v1) = emit(first, pc); | |
| 58 | let (second_pc, v2) = emit(second, first_pc); | |
| 43 | 59 | v1.push_all_move(v2); |
| 44 | (bi, v1) }, | |
| 45 | RChc(box ref a, box ref b) => | |
| 46 | { let (ai, v1) = emit(a, i + 1); | |
| 47 | let (bi, v2) = emit(b, ai + 1); | |
| 48 | let mut spl = vec![ ISplit(i + 1, ai + 1) ]; | |
| 49 | let jmp = vec![ IJmp(ai) ]; | |
| 60 | (second_pc, v1) | |
| 61 | }, | |
| 62 | /* For a choice, we produce this code: | |
| 63 | * ---- <- pc | |
| 64 | * | ISplit(pc+1, first_pc+1) | |
| 65 | * ---- <- pc + 1 | |
| 66 | * | [[ first ]] | |
| 67 | * ---- <- first_pc | |
| 68 | * | IJmp(second_pc) | |
| 69 | * ---- <- first_pc + 1 | |
| 70 | * | [[ second ]] | |
| 71 | * ---- <- second_pc | |
| 72 | */ | |
| 73 | RChc(box ref first, box ref second) => | |
| 74 | { let (first_pc, v1) = emit(first, pc + 1); | |
| 75 | let (second_pc, v2) = emit(second, first_pc + 1); | |
| 76 | let mut split_instr = vec![ ISplit(pc + 1, first_pc + 1) ]; | |
| 77 | let jmp_instr = vec![ IJmp(second_pc) ]; | |
| 78 | split_instr.push_all_move(v1); | |
| 79 | split_instr.push_all_move(jmp_instr); | |
| 80 | split_instr.push_all_move(v2); | |
| 81 | (second_pc, split_instr) | |
| 82 | }, | |
| 83 | /* For a repetition, we produce this code: | |
| 84 | * ---- <- pc | |
| 85 | * | ISplit(pc+1, expr_pc + 1) | |
| 86 | * ---- <- pc + 1 | |
| 87 | * | [[ expr ]] | |
| 88 | * ---- <- expr_pc | |
| 89 | * | IJmp(pc) | |
| 90 | * ---- <- expr_pc + 1 | |
| 91 | */ | |
| 92 | RRep(box ref expr) => | |
| 93 | { let (expr_pc, v1) = emit(expr, pc + 1); | |
| 94 | let mut spl = vec![ ISplit(pc + 1, expr_pc + 1) ]; | |
| 95 | let jmp = vec![ IJmp(pc) ]; | |
| 50 | 96 | spl.push_all_move(v1); |
| 51 | 97 | spl.push_all_move(jmp); |
| 52 | spl.push_all_move(v2); | |
| 53 | (bi, spl) }, | |
| 54 | RRep(box ref a) => | |
| 55 | { let (ai, v1) = emit(a, i + 1); | |
| 56 | let mut spl = vec![ ISplit(i + 1, ai + 1) ]; | |
| 57 | let jmp = vec![ IJmp(i) ]; | |
| 58 | spl.push_all_move(v1); | |
| 59 | spl.push_all_move(jmp); | |
| 60 |
( |
|
| 98 | (expr_pc + 1, spl) | |
| 99 | }, | |
| 61 | 100 | } |
| 62 | 101 | } |
| 63 | 102 | |
| 67 | 106 | println!("{}", re); |
| 68 | 107 | let (_, ins) = emit(&re, 0); |
| 69 | 108 | println!("{}", ins); |
| 109 | /* If we get to the end of a compiled regular expression, | |
| 110 | * that means it hasn't aborted and we can match. | |
| 111 | */ | |
| 70 | 112 | return ins.append([IMatch]); |
| 71 | 113 | } |
| 7 | 7 | } |
| 8 | 8 | |
| 9 | 9 | /* We use the Rust stack as our stack in this naive recursive |
| 10 |
* implementation. |
|
| 10 | * implementation. We have a vector slice of instructions, | |
| 11 | * a string we're matching over, the current program counter | |
| 12 | * in the instructions, and the current point to which we've | |
| 13 | * traversed the string. */ | |
| 11 | 14 | fn eval1(instrs: &[Instr], input: &str, pc: uint, cc: uint) -> bool { |
| 12 | 15 | match instrs[pc] { |
| 13 | 16 | IChar(_) if cc >= input.len() => return false, |
| 16 | 19 | IChar(_) => return false, |
| 17 | 20 | IMatch => return true, |
| 18 | 21 | IJmp(i) => eval1(instrs, input, i, cc), |
| 19 | ISplit(i, _) if eval1(instrs, input, i, cc) => true, | |
| 20 | ISplit(_, j) => eval1(instrs, input, j, cc), | |
| 22 | ISplit(i, j) => eval1(instrs, input, i, cc) || | |
| 23 | eval1(instrs, input, j, cc), | |
| 21 | 24 | } |
| 22 | 25 | } |
| 9 | 9 | pub fn eval(instrs: &[Instr], input: &str) -> bool { |
| 10 | 10 | let mut stack = vec![ EvalState {pc: 0, cc: 0} ]; |
| 11 | 11 | |
| 12 | /* Every time we find that a possibility is impossible, we | |
| 13 | * remove it from the stack. If we have completed a match, | |
| 14 | * we'll short-circuit out of this loop; otherwise, an empty | |
| 15 | * stack means we have failed every possible branch and can | |
| 16 | * return false. */ | |
| 12 | 17 | while stack.len() > 0 { |
| 18 | /* This call to .unwrap() is safe because we've already | |
| 19 | * manually checked the stack length. */ | |
| 13 | 20 | let st = stack.pop().unwrap(); |
| 14 | 21 | match instrs[st.pc] { |
| 15 | 22 | IChar(_) if st.cc >= input.len() => |