Basic, hacky impl of limited parser
Getty Ritter
7 years ago
| 1 | [package] | |
| 2 | name = "rrecutils" | |
| 3 | version = "0.1.0" | |
| 4 | authors = ["Getty Ritter <gettylefou@gmail.com>"] | |
| 5 | ||
| 6 | [lib] | |
| 7 | name = "rrecutils" | |
| 8 | path = "src/lib.rs" | |
| 9 | ||
| 10 | [dependencies] | |
| 11 | regex = "0.2" | |
| 12 | serde = "*" | |
| 13 | serde_json = "*" | |
| 14 | clap = "2.27.1" | |
| 15 | ||
| 16 | [[bin]] | |
| 17 | name = "rr-pretty" | |
| 18 | path = "src/tools/pretty.rs" | |
| 19 | ||
| 20 | [[bin]] | |
| 21 | name = "rr-to-json" | |
| 22 | path = "src/tools/tojson.rs" |
| 1 | %rec: Article | |
| 2 | ||
| 3 | Id: 1 | |
| 4 | Title: Article 1 | |
| 5 | ||
| 6 | Id: 2 | |
| 7 | Title: Article 2 | |
| 8 | ||
| 9 | %rec: Stock | |
| 10 | ||
| 11 | Id: 1 | |
| 12 | Type: sell | |
| 13 | Date: 20 April 2011 | |
| 14 | ||
| 15 | Id: 2 | |
| 16 | Type: stock | |
| 17 | Date: 21 April 2011 |
| 1 | Id: 1 | |
| 2 | Title: Blah | |
| 3 | ||
| 4 | Id: 2 | |
| 5 | Title: Bleh | |
| 6 | ||
| 7 | %rec: Movement | |
| 8 | ||
| 9 | Date: 13-Aug-2012 | |
| 10 | Concept: 20 | |
| 11 | ||
| 12 | Date: 24-Sept-2012 | |
| 13 | Concept: 12 |
| 1 | struct ParsingContext { | |
| 2 | continuation_line: bool, | |
| 3 | current_record_type: Option<String>, | |
| 4 | } | |
| 5 | ||
| 6 | #[derive(Eq, PartialEq, Debug)] | |
| 7 | pub struct Record { | |
| 8 | pub rec_type: Option<String>, | |
| 9 | pub fields: Vec<(String, String)>, | |
| 10 | } | |
| 11 | ||
| 12 | #[derive(Eq, PartialEq, Debug)] | |
| 13 | pub struct Recfile { | |
| 14 | pub records: Vec<Record>, | |
| 15 | } | |
| 16 | ||
| 17 | ||
| 18 | impl Recfile { | |
| 19 | pub fn parse<I>(i: I) -> Result<Recfile, String> | |
| 20 | where I: std::io::BufRead | |
| 21 | { | |
| 22 | let mut iter = i.lines(); | |
| 23 | let mut current = Record { | |
| 24 | fields: vec![], | |
| 25 | rec_type: None, | |
| 26 | }; | |
| 27 | let mut buf = vec![]; | |
| 28 | let mut ctx = ParsingContext { | |
| 29 | continuation_line: false, | |
| 30 | current_record_type: None, | |
| 31 | }; | |
| 32 | ||
| 33 | while let Some(Ok(ln)) = iter.next() { | |
| 34 | let ln = ln.trim_left_matches(' '); | |
| 35 | ||
| 36 | if ln.starts_with('#') { | |
| 37 | // skip comment lines | |
| 38 | } else if ln.is_empty() { | |
| 39 | if !current.fields.is_empty() { | |
| 40 | buf.push(current); | |
| 41 | current = Record { | |
| 42 | rec_type: ctx.current_record_type.clone(), | |
| 43 | fields: vec![], | |
| 44 | }; | |
| 45 | } | |
| 46 | } else if ln.starts_with('+') { | |
| 47 | if let Some(val) = current.fields.last_mut() { | |
| 48 | val.1.push_str("\n"); | |
| 49 | val.1.push_str( | |
| 50 | if ln[1..].starts_with(' ') { | |
| 51 | &ln[2..] | |
| 52 | } else { | |
| 53 | &ln[1..] | |
| 54 | }); | |
| 55 | } else { | |
| 56 | return Err(format!( | |
| 57 | "Found continuation line in nonsensical place: {}", | |
| 58 | ln)); | |
| 59 | } | |
| 60 | } else if let Some(pos) = ln.find(':') { | |
| 61 | let (key, val) = ln.split_at(pos); | |
| 62 | current.fields.push(( | |
| 63 | key.to_owned(), | |
| 64 | val[1..].trim_left().to_owned())); | |
| 65 | if key == "%rec" { | |
| 66 | ctx.current_record_type = Some(val[1..].trim_left().to_owned()); | |
| 67 | } | |
| 68 | } else { | |
| 69 | return Err(format!("Invalid line: {:?}", ln)); | |
| 70 | } | |
| 71 | } | |
| 72 | ||
| 73 | if !current.fields.is_empty() { | |
| 74 | buf.push(current); | |
| 75 | } | |
| 76 | ||
| 77 | Ok(Recfile { records: buf }) | |
| 78 | } | |
| 79 | } | |
| 80 | ||
| 81 | #[cfg(test)] | |
| 82 | mod tests { | |
| 83 | use ::{Recfile,Record}; | |
| 84 | ||
| 85 | fn test_parse(input: &[u8], expected: Vec<Vec<(&str, &str)>>) { | |
| 86 | let file = Recfile { | |
| 87 | records: expected.iter().map( |v| { | |
| 88 | Record { | |
| 89 | fields: v.iter().map( |&(k, v)| { | |
| 90 | (k.to_owned(), v.to_owned()) | |
| 91 | }).collect(), | |
| 92 | } | |
| 93 | }).collect(), | |
| 94 | }; | |
| 95 | assert_eq!(Recfile::parse(input), Ok(file)); | |
| 96 | } | |
| 97 | ||
| 98 | #[test] | |
| 99 | fn empty_file() { | |
| 100 | test_parse(b"\n", vec![]); | |
| 101 | } | |
| 102 | ||
| 103 | #[test] | |
| 104 | fn only_comments() { | |
| 105 | test_parse(b"# an empty file\n", vec![]); | |
| 106 | } | |
| 107 | ||
| 108 | #[test] | |
| 109 | fn one_section() { | |
| 110 | test_parse(b"hello: yes\n", vec![ vec![ ("hello", "yes") ] ]); | |
| 111 | } | |
| 112 | ||
| 113 | #[test] | |
| 114 | fn two_sections() { | |
| 115 | test_parse( | |
| 116 | b"hello: yes\n\ngoodbye: no\n", | |
| 117 | vec![ | |
| 118 | vec![ ("hello", "yes") ], | |
| 119 | vec![ ("goodbye", "no") ], | |
| 120 | ], | |
| 121 | ); | |
| 122 | } | |
| 123 | ||
| 124 | #[test] | |
| 125 | fn continuation_with_space() { | |
| 126 | test_parse( | |
| 127 | b"hello: yes\n+ but also no\n", | |
| 128 | vec![ | |
| 129 | vec![ ("hello", "yes\nbut also no") ], | |
| 130 | ], | |
| 131 | ); | |
| 132 | } | |
| 133 | ||
| 134 | #[test] | |
| 135 | fn continuation_without_space() { | |
| 136 | test_parse( | |
| 137 | b"hello: yes\n+but also no\n", | |
| 138 | vec![ | |
| 139 | vec![ ("hello", "yes\nbut also no") ], | |
| 140 | ], | |
| 141 | ); | |
| 142 | } | |
| 143 | ||
| 144 | #[test] | |
| 145 | fn continuation_with_two_spaces() { | |
| 146 | test_parse( | |
| 147 | b"hello: yes\n+ but also no\n", | |
| 148 | vec![ | |
| 149 | vec![ ("hello", "yes\n but also no") ], | |
| 150 | ], | |
| 151 | ); | |
| 152 | } | |
| 153 | ||
| 154 | } |
| 1 | extern crate clap; | |
| 2 | extern crate rrecutils; | |
| 3 | ||
| 4 | fn main() { | |
| 5 | let matches = clap::App::new("rr-pretty") | |
| 6 | .version("0.0") | |
| 7 | .author("Getty Ritter <rrecutils@infinitenegativeutility.com>") | |
| 8 | .about("Display the Rust AST for a Recutils file") | |
| 9 | .get_matches(); | |
| 10 | let source = std::io::stdin(); | |
| 11 | let records = rrecutils::Recfile::parse(source.lock()); | |
| 12 | println!("{:#?}", records); | |
| 13 | } |
| 1 | extern crate clap; | |
| 2 | extern crate rrecutils; | |
| 3 | extern crate serde_json; | |
| 4 | ||
| 5 | use std::{fmt,fs,io}; | |
| 6 | ||
| 7 | use serde_json::Value; | |
| 8 | use serde_json::map::Map; | |
| 9 | ||
| 10 | fn record_to_json(rec: &rrecutils::Record) -> Value { | |
| 11 | let mut m = Map::new(); | |
| 12 | for tup in rec.fields.iter() { | |
| 13 | let k = tup.0.clone(); | |
| 14 | let v = tup.1.clone(); | |
| 15 | m.insert(k, Value::String(v)); | |
| 16 | } | |
| 17 | Value::Object(m) | |
| 18 | } | |
| 19 | ||
| 20 | fn unwrap_err<L, R: fmt::Debug>(value: Result<L, R>) -> L { | |
| 21 | match value { | |
| 22 | Ok(v) => v, | |
| 23 | Err(err) => { | |
| 24 | println!("{:?}", err); | |
| 25 | std::process::exit(99) | |
| 26 | } | |
| 27 | } | |
| 28 | } | |
| 29 | ||
| 30 | fn main() { | |
| 31 | let matches = clap::App::new("rr-to-json") | |
| 32 | .version("0.0") | |
| 33 | .author("Getty Ritter <rrecutils@infinitenegativeutility.com>") | |
| 34 | .about("Display the Rust AST for a Recutils file") | |
| 35 | .arg(clap::Arg::with_name("pretty") | |
| 36 | .short("p") | |
| 37 | .long("pretty") | |
| 38 | .help("Pretty-print the resulting JSON")) | |
| 39 | .arg(clap::Arg::with_name("input") | |
| 40 | .short("i") | |
| 41 | .long("input") | |
| 42 | .value_name("FILE") | |
| 43 | .help("The input recfile (or - for stdin)")) | |
| 44 | .arg(clap::Arg::with_name("output") | |
| 45 | .short("o") | |
| 46 | .long("output") | |
| 47 | .value_name("FILE") | |
| 48 | .help("The desired output location (or - for stdout)")) | |
| 49 | .get_matches(); | |
| 50 | ||
| 51 | let stdin = io::stdin(); | |
| 52 | ||
| 53 | let input: Box<io::BufRead> = | |
| 54 | match matches.value_of("input").unwrap_or("-") { | |
| 55 | "-" => Box::new(stdin.lock()), | |
| 56 | path => | |
| 57 | Box::new(io::BufReader::new(unwrap_err(fs::File::open(path)))), | |
| 58 | }; | |
| 59 | ||
| 60 | let json = Value::Array(unwrap_err(rrecutils::Recfile::parse(input)) | |
| 61 | .records | |
| 62 | .iter() | |
| 63 | .map(|x| record_to_json(x)) | |
| 64 | .collect()); | |
| 65 | ||
| 66 | let mut output: Box<io::Write> = | |
| 67 | match matches.value_of("output").unwrap_or("-") { | |
| 68 | "-" => Box::new(io::stdout()), | |
| 69 | path => Box::new(unwrap_err(fs::File::open(path))), | |
| 70 | }; | |
| 71 | ||
| 72 | let serialized = if matches.is_present("pretty") { | |
| 73 | unwrap_err(serde_json::to_string_pretty(&json)) | |
| 74 | } else { | |
| 75 | json.to_string() | |
| 76 | }; | |
| 77 | ||
| 78 | unwrap_err(writeln!(output, "{}", serialized)); | |
| 79 | ||
| 80 | } |