gdritter repos infer-xml / 14c6743
Better pretty-printing and new tag content examples Getty Ritter 6 years ago
1 changed file(s) with 48 addition(s) and 12 deletion(s). Collapse all Expand all
88 self.attrs = {}
99 self.elems = {}
1010 self.content = set()
11 self.observed = 1
12
13 def note_observed(self):
14 self.observed += 1
1115
1216 def add_attr(self, k, v):
1317 (optional, samples) = self.attrs.get(k, (False, set()))
1721 self.elems[elem.tag] = self.elems.get(elem.tag, False)
1822
1923 def add_content(self, stuff):
20 self.content |= set(stuff)
24 self.content |= set([stuff])
2125
2226 def make_attrs_optional(self, attrs):
2327 missing = set(self.attrs) - set(attrs)
4852 tag.add_elem(child)
4953 self.add_element(child)
5054
55 if elem.text and elem.text.strip():
56 tag.add_content(elem.text.strip())
57
5158 def add_subsequent_element(self, elem):
5259 tag = self.cache[elem.tag]
60 tag.note_observed()
5361
5462 for k, v in elem.attrib.items():
5563 tag.add_attr(k, v)
5765 for child in elem:
5866 tag.add_elem(elem)
5967 self.add_element(child)
68
69 if elem.text and elem.text.strip():
70 tag.add_content(elem.text.strip())
6071
6172 tag.make_attrs_optional(elem.attrib.keys())
6273 tag.make_elems_optional([e.tag for e in elem])
6879 self.add_subsequent_element(elem)
6980
7081
71 def main(path):
82 def sample_seq(seq):
83 strs = []
84 for s in seq:
85 str = repr(s)
86 if len(str) > 24:
87 strs.append(str[:20] + '...' + str[0])
88 else:
89 strs.append(str)
90 return ', '.join(strs[:5])
91
92
93 def optional_text(is_optional):
94 if is_optional:
95 return 'sometimes'
96 else:
97 return 'always'
98
99
100 def main(paths):
72101 t = Traverse()
73 t.add_element(xml.parse(path).getroot())
102 for p in paths:
103 t.add_element(xml.parse(p).getroot())
104
74105 for k, v in t.cache.items():
75 print('tag {0}'.format(k))
106 print('tag {0} (observed {1} sample{2})'.format(k, v.observed, 's' if v.observed > 1 else '' ))
107
76108 if v.attrs:
77109 for (attr, (optional, sample)) in v.attrs.items():
78 print(' - attr {0} ({1})'.format(
79 attr, 'optional' if optional else 'mandatory'
80 ))
110 print('| - attr {0} ({1})'.format(attr, optional_text(optional)))
111 print('| sample values: {0}'.format(sample_seq(sample)))
112
81113 if v.elems:
82114 for (elem, optional) in v.elems.items():
83 print(' - child {0} ({1})'.format(
84 elem, 'optional' if optional else 'mandatory'
85 ))
115 print('| - child {0} ({1})'.format(elem, optional_text(optional)))
116
117 if v.content:
118 print('| - has textual content')
119 print('| sample content: {0}'.format(sample_seq(v.content)))
120
121 print()
86122
87123 if __name__ == '__main__':
88124 if sys.argv[1:]:
89 main(sys.argv[1])
125 main(sys.argv[1:])
90126 else:
91127 sys.stderr.write(
92 'usage: {0} [file.xml]\n'.format(sys.argv[0]))
128 'usage: {0} [file.xml] ...\n'.format(sys.argv[0]))