Better pretty-printing and new tag content examples
Getty Ritter
6 years ago
| 8 | 8 | self.attrs = {} |
| 9 | 9 | self.elems = {} |
| 10 | 10 | self.content = set() |
| 11 | self.observed = 1 | |
| 12 | ||
| 13 | def note_observed(self): | |
| 14 | self.observed += 1 | |
| 11 | 15 | |
| 12 | 16 | def add_attr(self, k, v): |
| 13 | 17 | (optional, samples) = self.attrs.get(k, (False, set())) |
| 17 | 21 | self.elems[elem.tag] = self.elems.get(elem.tag, False) |
| 18 | 22 | |
| 19 | 23 | def add_content(self, stuff): |
| 20 |
self.content |= set( |
|
| 24 | self.content |= set([stuff]) | |
| 21 | 25 | |
| 22 | 26 | def make_attrs_optional(self, attrs): |
| 23 | 27 | missing = set(self.attrs) - set(attrs) |
| 48 | 52 | tag.add_elem(child) |
| 49 | 53 | self.add_element(child) |
| 50 | 54 | |
| 55 | if elem.text and elem.text.strip(): | |
| 56 | tag.add_content(elem.text.strip()) | |
| 57 | ||
| 51 | 58 | def add_subsequent_element(self, elem): |
| 52 | 59 | tag = self.cache[elem.tag] |
| 60 | tag.note_observed() | |
| 53 | 61 | |
| 54 | 62 | for k, v in elem.attrib.items(): |
| 55 | 63 | tag.add_attr(k, v) |
| 57 | 65 | for child in elem: |
| 58 | 66 | tag.add_elem(elem) |
| 59 | 67 | self.add_element(child) |
| 68 | ||
| 69 | if elem.text and elem.text.strip(): | |
| 70 | tag.add_content(elem.text.strip()) | |
| 60 | 71 | |
| 61 | 72 | tag.make_attrs_optional(elem.attrib.keys()) |
| 62 | 73 | tag.make_elems_optional([e.tag for e in elem]) |
| 68 | 79 | self.add_subsequent_element(elem) |
| 69 | 80 | |
| 70 | 81 | |
| 71 |
def |
|
| 82 | def sample_seq(seq): | |
| 83 | strs = [] | |
| 84 | for s in seq: | |
| 85 | str = repr(s) | |
| 86 | if len(str) > 24: | |
| 87 | strs.append(str[:20] + '...' + str[0]) | |
| 88 | else: | |
| 89 | strs.append(str) | |
| 90 | return ', '.join(strs[:5]) | |
| 91 | ||
| 92 | ||
| 93 | def optional_text(is_optional): | |
| 94 | if is_optional: | |
| 95 | return 'sometimes' | |
| 96 | else: | |
| 97 | return 'always' | |
| 98 | ||
| 99 | ||
| 100 | def main(paths): | |
| 72 | 101 | t = Traverse() |
| 73 |
|
|
| 102 | for p in paths: | |
| 103 | t.add_element(xml.parse(p).getroot()) | |
| 104 | ||
| 74 | 105 | for k, v in t.cache.items(): |
| 75 |
print('tag {0} |
|
| 106 | print('tag {0} (observed {1} sample{2})'.format(k, v.observed, 's' if v.observed > 1 else '' )) | |
| 107 | ||
| 76 | 108 | if v.attrs: |
| 77 | 109 | for (attr, (optional, sample)) in v.attrs.items(): |
| 78 | print(' - attr {0} ({1})'.format( | |
| 79 | attr, 'optional' if optional else 'mandatory' | |
| 80 |
|
|
| 110 | print('| - attr {0} ({1})'.format(attr, optional_text(optional))) | |
| 111 | print('| sample values: {0}'.format(sample_seq(sample))) | |
| 112 | ||
| 81 | 113 | if v.elems: |
| 82 | 114 | for (elem, optional) in v.elems.items(): |
| 83 | print(' - child {0} ({1})'.format( | |
| 84 | elem, 'optional' if optional else 'mandatory' | |
| 85 |
|
|
| 115 | print('| - child {0} ({1})'.format(elem, optional_text(optional))) | |
| 116 | ||
| 117 | if v.content: | |
| 118 | print('| - has textual content') | |
| 119 | print('| sample content: {0}'.format(sample_seq(v.content))) | |
| 120 | ||
| 121 | print() | |
| 86 | 122 | |
| 87 | 123 | if __name__ == '__main__': |
| 88 | 124 | if sys.argv[1:]: |
| 89 |
main(sys.argv[1 |
|
| 125 | main(sys.argv[1:]) | |
| 90 | 126 | else: |
| 91 | 127 | sys.stderr.write( |
| 92 |
'usage: {0} [file.xml] |
|
| 128 | 'usage: {0} [file.xml] ...\n'.format(sys.argv[0])) | |