1
2
3 """
4 This module implements the XML POM -- the Python Object Model for XML. It is
5 something like DOM, but more Python-ic, and easier to use. These base classes
6 are used to build POM source files which are self-validating python-based XML
7 constructor objects. The major parts of the dtd2py command line tool are also
8 here.
9
10 """
11
12 import sys, os, re, string
13
14 try:
15 True
16 except NameError:
17 True = 1
18 False = 0
19
21 """ValidationError
22 This exception is raised when an attempt is made to construct an XML POM
23 tree that would be invalid.
24
25 """
26 pass
27
28
29
30
31
32
33
34
35
36 -class IndentedText(str):
37 - def __init__(self, data=""):
38 self.data = unescape(unicode(data))
39 self._level = 0
40 self._parent = None
41 - def set_text(self, data):
42 self.data = unescape(unicode(data))
45 - def insert(self, data):
46 self.data = unescape(unicode(data)) + self.data
47 - def add_text(self,data):
48 self.data = self.data + unescape(unicode(data))
49 append = add_text
51 return "%s%s" % ("\t"*self._level, escape(self.data))
52 - def __unicode__(self):
53 return u"%s%s" % ("\t"*self._level, escape(self.data))
55 return "%s(%r)" % (self.__class__.__name__, escape(self.data))
56 - def set_level(self, level):
58
59
60
61
62
63
64
65
67 return len(escape(self.data))
69 self.data = None
70 self._parent = None
72 if self._parent:
73 return "%s = %r" % (self._parent.fullpath(), self.data)
74 else:
75 return `self.data`
76 - def matchpath(self, pe):
78 - def has_children(self):
80 - def has_attributes(self):
82 - def full_repr(self):
84
85
86 -class Text(IndentedText):
88 return escape(self.data)
89 - def __unicode__(self):
90 return escape(self.data)
91
115
116
117
118
119
120
121
122
124 ATTLIST = None
125 CONTENTMODEL = None
126 _acquired = { "_indented":1, "_namespace":None }
128 self._attribs = {}
129 for key, value in attribs.items():
130 if self._validate_attribute(key):
131 self._attribs[key] = value
132 else:
133 raise ValidationError, "invalid attribute name for this element"
134 self._children = []
135 self._parent = None
136 self._level = 0
137 self._inline = 0
138
139
140
141
142
143 self._name = self.__class__.__name__
144
145
152
154 if not self.ATTLIST:
155 return None
156 for attr in self.ATTLIST:
157 aval = self._attribs.get(attr.name, None)
158 if aval is None:
159 if attr.a_decl == REQUIRED:
160 raise ValidationError, "required attribute not present: " + attr.name
161 else:
162 attr.verify(aval)
163
164
167
169 if self._parent:
170 i = self._parent.index(self)
171 del self._parent[i]
172 newparent.append(self)
173
175 self._parent = None
176 self._level = 0
177
179 """destroy() Remove this node and all child node references."""
180
181 if self._parent:
182 i = self._parent.index(self)
183 del self._parent[i]
184 self._parent = None
185 for n in self._children:
186 n.detach()
187 self._children = None
188
190 self._level = int(level)
191
193 self._inline = not not tf
194
196 self._indented = not not tf
197
199 "clears indentation flag so that it may be acquired from parent."
200 try:
201 del self.__dict__["_indented"]
202 except KeyError:
203 pass
204
207
208
210 self._name = self.__class__.__name__.lower()
211
213 self._name = self.__class__.__name__.upper()
214
216 self._name = self.__class__.__name__
217
219 objid = id(obj)
220 i = 0
221 for o in self._children:
222 if id(o) == objid:
223 return i
224 i += 1
225 raise ValueError, "ElementNode: Object not contained here."
226
231
235
236 - def insert(self, index, obj):
240
241 - def add(self, klass, **kwargs):
245
247 return self._children[:]
248
250 return iter(self._children)
251
252 - def add_text(self, text):
253 "Adding text to elements is so common, there is a special method for it."
254 if self.has_children() and isinstance(self._children[-1], IndentedText):
255 self._children[-1].add_text(text)
256 else:
257 t = Text(text)
258 t.set_level(0)
259 self.append(t)
260
261 - def replace_text(self, text):
262 if self._children:
263 del self._children[-1]
264 self.append(Text(text))
265
267 return len(self._children)
268
269
272
274 return len(self._attribs)
275 has_attributes = hasAttributes
276
278 if name in self._attribs.keys():
279 return 1
280 else:
281 return 0
282
285
287 return len(self._children)
288
290 """set_attribute(name, value)
291 This exists to set attributes that have names with illegal Python
292 identifier characters.
293
294 """
295 if self._validate_attribute(name):
296 self._attribs[name] = val
297
299 """get_attribute(name)
300 This exists to set attributes that have names with illegal Python
301 identifier characters.
302
303 """
304 return self._attribs[name]
305
311
312
313
315 try:
316 return self._attribs[name]
317 except KeyError:
318 pass
319 try:
320 return self._acquire(name)
321 except:
322 pass
323 raise AttributeError, "AttributeError: %s has no attribute '%s'" % (self._name, name)
324
326 if self._parent:
327 try:
328 return self._parent.__dict__[name]
329 except KeyError:
330 pass
331 return self._parent._acquire(name)
332 else:
333 try:
334 return self._acquired[name]
335 except KeyError:
336 pass
337 raise AttributeError
338
340 del self._attribs[name]
341
343 if type(index) is str:
344 for i in xrange(len(self._children)):
345 if self._children[i].matchpath(index):
346 return i
347 raise IndexError, "no elements match"
348 else:
349 return index
350
352 if type(index) is str:
353 el = self.get_element(index)
354 if el is None:
355 raise IndexError, "no item matches"
356 else:
357 return el
358 else:
359 return self._children[index]
360
361 - def get(self, index, default = None):
368
370 if isinstance(index, str):
371 return self.get_element(index) is not None
372 raise TypeError("Can only use has_key on a string")
373
374
380
385
387 attrs = map(lambda t: '%s=%r' % t, self._attribs.items())
388 return "%s(%s)" % (self.__class__, ", ".join(attrs))
389
396
403
405 s = ["n%d = %r" % ( self._level, self)]
406 s.append("n%d.set_level(%d)" % (self._level, self._level+1))
407 for c in self._children:
408 if not c.has_children():
409 s.append("n%d.append(%r)" % (self._level, c))
410 else:
411 s.append(c.full_repr())
412 s.append("n%d.append(n%d)" % (self._level, self._level+1))
413 s.append("del n%d" % (self._level+1))
414 return "\n".join(s)
415
417 return "\t"*(self._level*self._indented)
418
420 return IF(self._namespace, "%s:" % self._namespace, "")
421
430
433
435 attrs = map(lambda t: ' %s="%s"' % t, map(lambda t: (t[0], escape(str(t[1]))), filter(lambda t: t[1] is not None, self._attribs.items())))
436 return "".join(attrs)
437
446
449
451 attrs = map(lambda t: u' %s="%s"' % t, map(lambda t: (t[0], escape(unicode(t[1]))), filter(lambda t: t[1] is not None, self._attribs.items())))
452 return u"".join(attrs)
453
454
456 """pathname() returns the ElementNode as a string in xpath format."""
457 if self._attribs:
458 s = map(lambda i: "@%s='%s'" % (i[0],i[1]), self._attribs.items())
459 return "%s[%s]" % (self.__class__.__name__, " and ".join(s))
460 else:
461 return self.__class__.__name__
462
464 """fullpath() returns the ElementNode's full path as a string in xpath format."""
465 if self._parent:
466 base = self._parent.fullpath()
467 else:
468 base = ""
469 return "%s/%s" % (base, self.pathname() )
470
472 if "[" not in pathelement:
473 return pathelement == self._name
474 else:
475 xpath_re = re.compile(r'(\w*)(\[.*])')
476 mo = xpath_re.match(pathelement)
477 if mo:
478 name, match = mo.groups()
479 match = match.replace("@", "self.")
480 match = match.replace("=", "==")
481 return (name == self._name and eval(match[1:-1]))
482 else:
483 raise ValueError, "ivalid path element"
484
486 rv = []
487 for child in self._children:
488 if child.matchpath(pathelement):
489 rv.append(child)
490 return rv
491
493 for child in self._children:
494 if child.matchpath(pathelement):
495 return child
496 return None
497
499 if collect is None:
500 collection = []
501 else:
502 collection = collect
503 for el in self._children:
504 if el.has_children():
505 el._find_node(eltype, collection)
506 if isinstance(el, eltype):
507 collection.append(el)
508 return collection
509
510 - def find(self, elclass, **attribs):
511 for obj in self._children:
512 if isinstance(obj, elclass):
513 if self._attribs_match(obj, attribs):
514 return obj
515 return None
516
517 - def getall(self, elclass, depth=0, collect=None):
518 if collect is None:
519 rv = []
520 else:
521 rv = collect
522 for el in self._children:
523 if isinstance(el, elclass):
524 rv.append(el)
525 if depth > 0:
526 el.getall(elclass, depth-1, rv)
527 return rv
528
530 for tname, tval in attribdict.items():
531 try:
532 if getattr(obj, tname) != tval:
533 return 0
534 except AttributeError:
535 return 0
536 return 1
537
540
541
544
547
550
553
554
555
557 """Fragments is a special holder class to hold 'loose' markup fragments.
558 That is, bits of markup that don't have a common container. It is invisible."""
559
561 s = []
562 map(s.append, map(str, self._children))
563 if self._inline:
564 return "".join(s)
565 else:
566 return "\n".join(s)
567
569 s = []
570 map(s.append, map(str, self._children))
571 if self._inline:
572 return u"".join(s)
573 else:
574 return u"\n".join(s)
575
576
577
578
580 HEADER = '<?xml version="1.0" encoding="iso-8859-1"?>\n'
582 self.dtd = dtd
583 self.root = None
584 self.parser = None
585 self.dirty = 0
586
588 return self.HEADER + str(self.root) + "\n"
589
591 return self.HEADER + unicode(self.root) + "\n"
592
595
596 - def get_parser(self, handlerclass=None, module=None):
600
603
605 self.root = doc
606 self.dirty = 0
607
608 - def parse(self, url, handlerclass=None, module=None):
614
615 - def parseFile(self, fo, handlerclass=None, module=None):
621
623 filename = filename or self.filename
624 if filename:
625 fo = open(os.path.expanduser(filename), "w")
626 try:
627 fo.write(str(self))
628 finally:
629 fo.close()
630 self.dirty = 0
631 writefile = write_xmlfile
632
635
637 self.get_parser(module=dtdmodule)
638 self.parse(filename)
639 self.filename = filename
640
657
661
663 els = path.split("/")
664 path, endnode = "/".join(els[:-1]), els[-1]
665 node = self.getnode(path)
666 del node[endnode]
667
668 - def addnode(self, basepath, newnode):
671
672 - def add_text(self, basepath, text):
673 node = self.getnode(basepath)
674 node.add_text(text)
675
676 - def _write_text(self, fo, node):
677 for n in node:
678 if isinstance(n, IndentedText):
679 fo.write(n.fullpath())
680 fo.write("\n")
681 else:
682 self._write_text(fo, n)
683
685 realfile = 0
686 if type(fo) is str:
687 fo = open(fo, "w")
688 realfile = 1
689 fo.write(self.root.full_repr())
690 if realfile:
691 fo.close()
692
693 - def read_repr(self, filename, localdict=None):
694 localdict = localdict or {}
695 execfile(filename, globals(), localdict)
696 self.root = localdict["n0"]
697
699 realfile = 0
700 if type(fileobject) is str:
701 fileobject = open(fileobject, "w")
702 realfile = 1
703 self._write_text(fileobject, self.root)
704 if realfile:
705 fileobject.close()
706
707
708
709
710
712 - def __init__(self, callback, module=None):
713 self.stack = []
714 self.msg = None
715 self.callback = callback
716
717 self.modules = []
718 if module is not None:
719 if type(module) is list:
720 self.modules.extend(module)
721 else:
722 self.modules.append(module)
723
725 self.modules.append(module)
726
728 klass = None
729 for mod in self.modules:
730 try:
731 klass = getattr(mod, name)
732 except AttributeError:
733 continue
734 if klass:
735 return klass
736 raise AttributeError
737
738
741
743 if self.stack:
744 raise ValidationError, "unbalanced document!"
745 self.callback(self.msg)
746 self.msg = None
747
749 "Handle an event for the beginning of an element."
750 try:
751 klass = self._get_class(name)
752 except AttributeError:
753 raise ValidationError, "Undefined element tag: "+name
754 attr = {}
755 def fixatts(t):
756 attr[str(t[0])] = unescape(str(t[1]))
757 map(fixatts, atts.items())
758 obj = klass( *(), **attr)
759 obj.set_level(len(self.stack))
760 self.stack.append(obj)
761
763 "Handle an event for the end of an element."
764 obj = self.stack.pop()
765 if self.stack:
766 self.stack[-1].append(obj)
767 else:
768 self.msg = obj
769
775
779 "Handle a processing instruction event."
780 print "unhandled processing instruction:", target, data
782 "Receive an object for locating the origin of SAX document events."
783 pass
784
785
788
789 -def get_parser(handlerclass=None, callback=None, module=None):
790 import xml.sax
791 hc = handlerclass or ObjectParserHandler
792 cb = callback or _default_parser_callback
793 mod = module or sys.modules[__name__]
794 handler = hc(cb, mod)
795 parser = xml.sax.make_parser()
796 parser.setContentHandler(handler)
797 return parser
798
799
809
810
811
812
813
814
816 """Represents and validates a content model.
817
818 """
819 - def __init__(self, rawmodel=None):
820 self.model = rawmodel
821
822 - def __repr__(self):
823 return "%s(%r)" % (self.__class__, self.model)
824
825 - def is_empty(self):
826 return not self.model
827
828
830 """_ContentModelGenerator(rawmodel)
831 The DTD parser generated and final content model are so different that a
832 different content model generator is used for this object.
833
834 """
835 - def __init__(self, rawmodel=None):
836 tm_type = type(rawmodel)
837 if tm_type is str:
838 if rawmodel == "EMPTY":
839 self.model = EMPTY
840 elif rawmodel == "#PCDATA":
841 self.model = PCDATA
842 elif rawmodel == "ANY":
843 self.model = ANY
844 else:
845 raise ValidationError, "ContentModelGenerator: unknown special type"
846 elif tm_type is tuple:
847 self.model = rawmodel
848 elif tm_type is type(None):
849 self.model = None
850 else:
851 raise RuntimeError, "unknown content model format"
852
853 - def __repr__(self):
854 return "%s(%r)" % (ContentModel, self.model)
855
856
859
860
863 return "%s(%r)" % (self.__class__, self.data)
865 return " ".join(map(str, self.data))
867 return u" ".join(map(str, self.data))
868
871 return "%s('%s')" % (self.__class__.__name__, self)
872
876
881
882 -class CDATA(_AttributeType):
884 -class ID(_AttributeType):
886 -class IDREF(_AttributeType):
892
893
894 PCDATA = Text
895 ANY = True
896 EMPTY = None
897
898
899 AT_CDATA = 1
900 AT_ID = 2
901 AT_IDREF = 3
902 AT_IDREFS = 4
903 AT_ENTITY = 5
904 AT_ENTITIES = 6
905 AT_NMTOKEN = 7
906 AT_NMTOKENS = 8
907
908 REQUIRED = 11
909 IMPLIED = 12
910 DEFAULT = 13
911 FIXED = 14
912
913 _ATTRTYPEMAP = {
914 "CDATA": AT_CDATA,
915 "ID": AT_ID,
916 "IDREF": AT_IDREF,
917 "IDREFS": AT_IDREFS,
918 "ENTITY": AT_ENTITY,
919 "ENTITIES": AT_ENTITIES,
920 "NMTOKEN": AT_NMTOKEN,
921 "NMTOKENS": AT_NMTOKENS
922 }
923
924 _ATTRCLASSMAP = {
925 AT_CDATA: CDATA,
926 AT_ID: ID,
927 AT_IDREF: IDREF,
928 AT_IDREFS: IDREFS,
929 AT_ENTITY: ENTITY,
930 AT_ENTITIES: ENTITIES,
931 AT_NMTOKEN: NMTOKEN,
932 AT_NMTOKENS: NMTOKENS
933 }
934
935 _DEFAULTMAP = {
936 u'#REQUIRED': REQUIRED,
937 u'#IMPLIED': IMPLIED,
938 u'#DEFAULT': DEFAULT,
939 u'#FIXED': FIXED,
940 }
941
943 - def __init__(self, name, a_type, a_decl, default=None):
944 self.name = str(name)
945 a_type_type = type(a_type)
946
947 if a_type_type is unicode:
948 self.a_type = _ATTRTYPEMAP.get(str(a_type), a_type)
949
950
951 elif a_type_type is int:
952 self.a_type = _ATTRCLASSMAP.get(a_type, a_type)
953 elif a_type_type is list:
954 self.a_type = Enumeration(map(str, a_type))
955 else:
956 self.a_type = a_type
957
958
959 self.a_decl = _DEFAULTMAP.get(a_decl, a_decl)
960 self.default = default
961
962 self.a_type_type = type(self.a_type)
963
965 return "%s(%r, %r, %r, %r)" % (self.__class__, self.name, self.a_type, self.a_decl, self.default)
966
968 if type(self.a_type) is list:
969 if value not in self.a_type:
970 raise ValidationError, "Enumeration has wrong value. %s is not one of %r." % (value, self.a_type)
971
972
973
974
975
976
978 - def __init__(self, generator, mixins=None, toupper=0):
979 self.generator = generator
980 self.elements = {}
981 self.parameter_entities = {}
982 self.general_entities = {}
983 self.toupper = toupper
984 self.mixins = mixins
985
987 print "Starting to parse DTD...",
988 self.generator.add_comment("This file generated by a program. do not edit.")
989 self.generator.add_import(sys.modules[__name__])
990 if self.mixins:
991 self.generator.add_import(self.mixins)
992
994 print "done parsing. Writing file."
995 self.generator.write()
996
1009
1022
1026
1028 "Receives internal parameter entity declarations."
1029
1030 self.parameter_entities[name] = val
1031
1033 "Receives external parameter entity declarations."
1034
1035
1037 "Receives internal general entity declarations."
1038 self.general_entities[name] = val
1039
1040
1041
1042
1044 """Receives external general entity declarations. 'ndata' is the
1045 empty string if the entity is parsed."""
1046
1047 print "XXX external entity:"
1048 print ent_name, pub_id, sys_id, ndata
1049
1051 "Receives notation declarations."
1052
1053 print "XXX unhandled notation:",
1054 print name, pubid, sysid
1055
1057 "Receives the target and data of processing instructions."
1058
1059 print "XXX unhandled PI:",
1060 print target, data
1061
1062
1063
1064
1065
1066 -def IF(test, tv, fv=None):
1067 if test:
1068 return tv
1069 else:
1070 return fv
1071
1073 """get_mod_file(sourcefilename)
1074 Converts a file name into a file name inside the dtds package. This file
1075 name is the destination for generated python files.
1076 """
1077 import DTDs as dtds
1078 from string import maketrans
1079 modname = os.path.splitext(os.path.split(sourcefilename)[1])[0]
1080 return os.path.join(dtds.__path__[0], modname.translate(maketrans("-.", "__"))+".py")
1081
1082
1084 for mod in modules:
1085 try:
1086 return getattr(mod, elname)
1087 except AttributeError:
1088 continue
1089 return None
1090
1092 if "[" not in name:
1093 nc = _find_element(name, modules)
1094 if nc is None:
1095 raise ValidationError, "no such element name in modules"
1096 return nc()
1097 else:
1098 xpath_re = re.compile(r'(\w*)(\[.*])')
1099 mo = xpath_re.match(name)
1100 if mo:
1101 attdict = {}
1102 ename, attribs = mo.groups()
1103 nc = _find_element(ename, modules)
1104 if nc is None:
1105 raise ValidationError, "no such element name in modules"
1106 attribs = attribs[1:-1].split("and")
1107 attribs = map(string.strip, attribs)
1108 for att in attribs:
1109 name, val = att.split("=")
1110 attdict[name[1:]] = val[1:-1]
1111 return nc( *(), **attdict)
1112
1113
1114
1116 """make_Node(path, modules, [value])
1117 Makes a node or an XML fragment given a path, element module list, and an
1118 optional value.
1119 """
1120 if type(modules) is not list:
1121 modules = [modules]
1122 pathelements = path.split("/")
1123 if not pathelements[0]:
1124 del pathelements[0]
1125 rootnode = current = _construct_node(pathelements[0], modules)
1126 for element in pathelements[1:]:
1127 new = _construct_node(element, modules)
1128 current.append(new)
1129 current = new
1130 current.set_inline()
1131 if value is not None:
1132 current.add_text(value)
1133 return rootnode
1134
1135
1137 if '&' not in s:
1138 return s
1139 s = s.replace("<", "<")
1140 s = s.replace(">", ">")
1141
1142 s = s.replace(""", '"')
1143 s = s.replace("&", "&")
1144 return s
1145
1147 s = s.replace("&", "&")
1148 s = s.replace("<", "<")
1149 s = s.replace(">", ">")
1150
1151 s = s.replace('"', """)
1152 return s
1153
1154
1155 if __name__ == "__main__":
1156 import os
1157 FILE = os.path.join(os.environ["PAF_HOME"], "etc", "dtd", "WCSinvalidation.dtd")
1158 outfilename = get_mod_file(FILE)
1159 argc = len(sys.argv)
1160
1161
1162
1163
1164 dtdp = get_dtd_compiler(sys.stdout)
1165 dtdp.parse_resource(FILE)
1166
1167 print Comment("some ------- comment-")
1168 print "+++++++"
1169 import dtds.pvsystem as pvs
1170 n = make_node("/pvsystem[@major='2' and @dot='0' and @minor='0']/pvac/httpOwsPort", pvs, 8080)
1171 print n
1172 print "+++++++"
1173 print make_node('/pvsystem[@major="2" and @minor="0" and @dot="0"]/globals/enableMonitor', pvs, "true")
1174 print "+++++++"
1175 print make_node('globals/enableMonitor', pvs, "true")
1176 print "+++++++"
1177 print make_node('enableMonitor', pvs, "true")
1178 print "+++++++"
1179 print make_node('enableMonitor', pvs)
1180