Better pretty-printing and new tag content examples
Getty Ritter
6 years ago
8 | 8 | self.attrs = {} |
9 | 9 | self.elems = {} |
10 | 10 | self.content = set() |
11 | self.observed = 1 | |
12 | ||
13 | def note_observed(self): | |
14 | self.observed += 1 | |
11 | 15 | |
12 | 16 | def add_attr(self, k, v): |
13 | 17 | (optional, samples) = self.attrs.get(k, (False, set())) |
17 | 21 | self.elems[elem.tag] = self.elems.get(elem.tag, False) |
18 | 22 | |
19 | 23 | def add_content(self, stuff): |
20 |
self.content |= set( |
|
24 | self.content |= set([stuff]) | |
21 | 25 | |
22 | 26 | def make_attrs_optional(self, attrs): |
23 | 27 | missing = set(self.attrs) - set(attrs) |
48 | 52 | tag.add_elem(child) |
49 | 53 | self.add_element(child) |
50 | 54 | |
55 | if elem.text and elem.text.strip(): | |
56 | tag.add_content(elem.text.strip()) | |
57 | ||
51 | 58 | def add_subsequent_element(self, elem): |
52 | 59 | tag = self.cache[elem.tag] |
60 | tag.note_observed() | |
53 | 61 | |
54 | 62 | for k, v in elem.attrib.items(): |
55 | 63 | tag.add_attr(k, v) |
57 | 65 | for child in elem: |
58 | 66 | tag.add_elem(elem) |
59 | 67 | self.add_element(child) |
68 | ||
69 | if elem.text and elem.text.strip(): | |
70 | tag.add_content(elem.text.strip()) | |
60 | 71 | |
61 | 72 | tag.make_attrs_optional(elem.attrib.keys()) |
62 | 73 | tag.make_elems_optional([e.tag for e in elem]) |
68 | 79 | self.add_subsequent_element(elem) |
69 | 80 | |
70 | 81 | |
71 |
def |
|
82 | def sample_seq(seq): | |
83 | strs = [] | |
84 | for s in seq: | |
85 | str = repr(s) | |
86 | if len(str) > 24: | |
87 | strs.append(str[:20] + '...' + str[0]) | |
88 | else: | |
89 | strs.append(str) | |
90 | return ', '.join(strs[:5]) | |
91 | ||
92 | ||
93 | def optional_text(is_optional): | |
94 | if is_optional: | |
95 | return 'sometimes' | |
96 | else: | |
97 | return 'always' | |
98 | ||
99 | ||
100 | def main(paths): | |
72 | 101 | t = Traverse() |
73 |
|
|
102 | for p in paths: | |
103 | t.add_element(xml.parse(p).getroot()) | |
104 | ||
74 | 105 | for k, v in t.cache.items(): |
75 |
print('tag {0} |
|
106 | print('tag {0} (observed {1} sample{2})'.format(k, v.observed, 's' if v.observed > 1 else '' )) | |
107 | ||
76 | 108 | if v.attrs: |
77 | 109 | for (attr, (optional, sample)) in v.attrs.items(): |
78 | print(' - attr {0} ({1})'.format( | |
79 | attr, 'optional' if optional else 'mandatory' | |
80 |
|
|
110 | print('| - attr {0} ({1})'.format(attr, optional_text(optional))) | |
111 | print('| sample values: {0}'.format(sample_seq(sample))) | |
112 | ||
81 | 113 | if v.elems: |
82 | 114 | for (elem, optional) in v.elems.items(): |
83 | print(' - child {0} ({1})'.format( | |
84 | elem, 'optional' if optional else 'mandatory' | |
85 |
|
|
115 | print('| - child {0} ({1})'.format(elem, optional_text(optional))) | |
116 | ||
117 | if v.content: | |
118 | print('| - has textual content') | |
119 | print('| sample content: {0}'.format(sample_seq(v.content))) | |
120 | ||
121 | print() | |
86 | 122 | |
87 | 123 | if __name__ == '__main__': |
88 | 124 | if sys.argv[1:]: |
89 |
main(sys.argv[1 |
|
125 | main(sys.argv[1:]) | |
90 | 126 | else: |
91 | 127 | sys.stderr.write( |
92 |
'usage: {0} [file.xml] |
|
128 | 'usage: {0} [file.xml] ...\n'.format(sys.argv[0])) |