Basic, hacky impl of limited parser
Getty Ritter
6 years ago
1 | [package] | |
2 | name = "rrecutils" | |
3 | version = "0.1.0" | |
4 | authors = ["Getty Ritter <gettylefou@gmail.com>"] | |
5 | ||
6 | [lib] | |
7 | name = "rrecutils" | |
8 | path = "src/lib.rs" | |
9 | ||
10 | [dependencies] | |
11 | regex = "0.2" | |
12 | serde = "*" | |
13 | serde_json = "*" | |
14 | clap = "2.27.1" | |
15 | ||
16 | [[bin]] | |
17 | name = "rr-pretty" | |
18 | path = "src/tools/pretty.rs" | |
19 | ||
20 | [[bin]] | |
21 | name = "rr-to-json" | |
22 | path = "src/tools/tojson.rs" |
1 | %rec: Article | |
2 | ||
3 | Id: 1 | |
4 | Title: Article 1 | |
5 | ||
6 | Id: 2 | |
7 | Title: Article 2 | |
8 | ||
9 | %rec: Stock | |
10 | ||
11 | Id: 1 | |
12 | Type: sell | |
13 | Date: 20 April 2011 | |
14 | ||
15 | Id: 2 | |
16 | Type: stock | |
17 | Date: 21 April 2011 |
1 | Id: 1 | |
2 | Title: Blah | |
3 | ||
4 | Id: 2 | |
5 | Title: Bleh | |
6 | ||
7 | %rec: Movement | |
8 | ||
9 | Date: 13-Aug-2012 | |
10 | Concept: 20 | |
11 | ||
12 | Date: 24-Sept-2012 | |
13 | Concept: 12 |
1 | struct ParsingContext { | |
2 | continuation_line: bool, | |
3 | current_record_type: Option<String>, | |
4 | } | |
5 | ||
6 | #[derive(Eq, PartialEq, Debug)] | |
7 | pub struct Record { | |
8 | pub rec_type: Option<String>, | |
9 | pub fields: Vec<(String, String)>, | |
10 | } | |
11 | ||
12 | #[derive(Eq, PartialEq, Debug)] | |
13 | pub struct Recfile { | |
14 | pub records: Vec<Record>, | |
15 | } | |
16 | ||
17 | ||
18 | impl Recfile { | |
19 | pub fn parse<I>(i: I) -> Result<Recfile, String> | |
20 | where I: std::io::BufRead | |
21 | { | |
22 | let mut iter = i.lines(); | |
23 | let mut current = Record { | |
24 | fields: vec![], | |
25 | rec_type: None, | |
26 | }; | |
27 | let mut buf = vec![]; | |
28 | let mut ctx = ParsingContext { | |
29 | continuation_line: false, | |
30 | current_record_type: None, | |
31 | }; | |
32 | ||
33 | while let Some(Ok(ln)) = iter.next() { | |
34 | let ln = ln.trim_left_matches(' '); | |
35 | ||
36 | if ln.starts_with('#') { | |
37 | // skip comment lines | |
38 | } else if ln.is_empty() { | |
39 | if !current.fields.is_empty() { | |
40 | buf.push(current); | |
41 | current = Record { | |
42 | rec_type: ctx.current_record_type.clone(), | |
43 | fields: vec![], | |
44 | }; | |
45 | } | |
46 | } else if ln.starts_with('+') { | |
47 | if let Some(val) = current.fields.last_mut() { | |
48 | val.1.push_str("\n"); | |
49 | val.1.push_str( | |
50 | if ln[1..].starts_with(' ') { | |
51 | &ln[2..] | |
52 | } else { | |
53 | &ln[1..] | |
54 | }); | |
55 | } else { | |
56 | return Err(format!( | |
57 | "Found continuation line in nonsensical place: {}", | |
58 | ln)); | |
59 | } | |
60 | } else if let Some(pos) = ln.find(':') { | |
61 | let (key, val) = ln.split_at(pos); | |
62 | current.fields.push(( | |
63 | key.to_owned(), | |
64 | val[1..].trim_left().to_owned())); | |
65 | if key == "%rec" { | |
66 | ctx.current_record_type = Some(val[1..].trim_left().to_owned()); | |
67 | } | |
68 | } else { | |
69 | return Err(format!("Invalid line: {:?}", ln)); | |
70 | } | |
71 | } | |
72 | ||
73 | if !current.fields.is_empty() { | |
74 | buf.push(current); | |
75 | } | |
76 | ||
77 | Ok(Recfile { records: buf }) | |
78 | } | |
79 | } | |
80 | ||
81 | #[cfg(test)] | |
82 | mod tests { | |
83 | use ::{Recfile,Record}; | |
84 | ||
85 | fn test_parse(input: &[u8], expected: Vec<Vec<(&str, &str)>>) { | |
86 | let file = Recfile { | |
87 | records: expected.iter().map( |v| { | |
88 | Record { | |
89 | fields: v.iter().map( |&(k, v)| { | |
90 | (k.to_owned(), v.to_owned()) | |
91 | }).collect(), | |
92 | } | |
93 | }).collect(), | |
94 | }; | |
95 | assert_eq!(Recfile::parse(input), Ok(file)); | |
96 | } | |
97 | ||
98 | #[test] | |
99 | fn empty_file() { | |
100 | test_parse(b"\n", vec![]); | |
101 | } | |
102 | ||
103 | #[test] | |
104 | fn only_comments() { | |
105 | test_parse(b"# an empty file\n", vec![]); | |
106 | } | |
107 | ||
108 | #[test] | |
109 | fn one_section() { | |
110 | test_parse(b"hello: yes\n", vec![ vec![ ("hello", "yes") ] ]); | |
111 | } | |
112 | ||
113 | #[test] | |
114 | fn two_sections() { | |
115 | test_parse( | |
116 | b"hello: yes\n\ngoodbye: no\n", | |
117 | vec![ | |
118 | vec![ ("hello", "yes") ], | |
119 | vec![ ("goodbye", "no") ], | |
120 | ], | |
121 | ); | |
122 | } | |
123 | ||
124 | #[test] | |
125 | fn continuation_with_space() { | |
126 | test_parse( | |
127 | b"hello: yes\n+ but also no\n", | |
128 | vec![ | |
129 | vec![ ("hello", "yes\nbut also no") ], | |
130 | ], | |
131 | ); | |
132 | } | |
133 | ||
134 | #[test] | |
135 | fn continuation_without_space() { | |
136 | test_parse( | |
137 | b"hello: yes\n+but also no\n", | |
138 | vec![ | |
139 | vec![ ("hello", "yes\nbut also no") ], | |
140 | ], | |
141 | ); | |
142 | } | |
143 | ||
144 | #[test] | |
145 | fn continuation_with_two_spaces() { | |
146 | test_parse( | |
147 | b"hello: yes\n+ but also no\n", | |
148 | vec![ | |
149 | vec![ ("hello", "yes\n but also no") ], | |
150 | ], | |
151 | ); | |
152 | } | |
153 | ||
154 | } |
1 | extern crate clap; | |
2 | extern crate rrecutils; | |
3 | ||
4 | fn main() { | |
5 | let matches = clap::App::new("rr-pretty") | |
6 | .version("0.0") | |
7 | .author("Getty Ritter <rrecutils@infinitenegativeutility.com>") | |
8 | .about("Display the Rust AST for a Recutils file") | |
9 | .get_matches(); | |
10 | let source = std::io::stdin(); | |
11 | let records = rrecutils::Recfile::parse(source.lock()); | |
12 | println!("{:#?}", records); | |
13 | } |
1 | extern crate clap; | |
2 | extern crate rrecutils; | |
3 | extern crate serde_json; | |
4 | ||
5 | use std::{fmt,fs,io}; | |
6 | ||
7 | use serde_json::Value; | |
8 | use serde_json::map::Map; | |
9 | ||
10 | fn record_to_json(rec: &rrecutils::Record) -> Value { | |
11 | let mut m = Map::new(); | |
12 | for tup in rec.fields.iter() { | |
13 | let k = tup.0.clone(); | |
14 | let v = tup.1.clone(); | |
15 | m.insert(k, Value::String(v)); | |
16 | } | |
17 | Value::Object(m) | |
18 | } | |
19 | ||
20 | fn unwrap_err<L, R: fmt::Debug>(value: Result<L, R>) -> L { | |
21 | match value { | |
22 | Ok(v) => v, | |
23 | Err(err) => { | |
24 | println!("{:?}", err); | |
25 | std::process::exit(99) | |
26 | } | |
27 | } | |
28 | } | |
29 | ||
30 | fn main() { | |
31 | let matches = clap::App::new("rr-to-json") | |
32 | .version("0.0") | |
33 | .author("Getty Ritter <rrecutils@infinitenegativeutility.com>") | |
34 | .about("Display the Rust AST for a Recutils file") | |
35 | .arg(clap::Arg::with_name("pretty") | |
36 | .short("p") | |
37 | .long("pretty") | |
38 | .help("Pretty-print the resulting JSON")) | |
39 | .arg(clap::Arg::with_name("input") | |
40 | .short("i") | |
41 | .long("input") | |
42 | .value_name("FILE") | |
43 | .help("The input recfile (or - for stdin)")) | |
44 | .arg(clap::Arg::with_name("output") | |
45 | .short("o") | |
46 | .long("output") | |
47 | .value_name("FILE") | |
48 | .help("The desired output location (or - for stdout)")) | |
49 | .get_matches(); | |
50 | ||
51 | let stdin = io::stdin(); | |
52 | ||
53 | let input: Box<io::BufRead> = | |
54 | match matches.value_of("input").unwrap_or("-") { | |
55 | "-" => Box::new(stdin.lock()), | |
56 | path => | |
57 | Box::new(io::BufReader::new(unwrap_err(fs::File::open(path)))), | |
58 | }; | |
59 | ||
60 | let json = Value::Array(unwrap_err(rrecutils::Recfile::parse(input)) | |
61 | .records | |
62 | .iter() | |
63 | .map(|x| record_to_json(x)) | |
64 | .collect()); | |
65 | ||
66 | let mut output: Box<io::Write> = | |
67 | match matches.value_of("output").unwrap_or("-") { | |
68 | "-" => Box::new(io::stdout()), | |
69 | path => Box::new(unwrap_err(fs::File::open(path))), | |
70 | }; | |
71 | ||
72 | let serialized = if matches.is_present("pretty") { | |
73 | unwrap_err(serde_json::to_string_pretty(&json)) | |
74 | } else { | |
75 | json.to_string() | |
76 | }; | |
77 | ||
78 | unwrap_err(writeln!(output, "{}", serialized)); | |
79 | ||
80 | } |