Package Bio :: Package EUtils :: Module POM
[hide private]
[frames] | no frames]

Source Code for Module Bio.EUtils.POM

   1  #!/usr/bin/env python 
   2   
   3  """ 
   4  This module implements the XML POM -- the Python Object Model for XML. It is 
   5  something like DOM, but more Python-ic, and easier to use. These base classes 
   6  are used to build POM source files which are self-validating python-based XML 
   7  constructor objects. The major parts of the dtd2py command line tool are also 
   8  here. 
   9   
  10  """ 
  11   
  12  import sys, os, re, string 
  13   
  14  try: 
  15          True 
  16  except NameError: 
  17          True = 1 
  18          False = 0 
  19   
20 -class ValidationError(ValueError):
21 """ValidationError 22 This exception is raised when an attempt is made to construct an XML POM 23 tree that would be invalid. 24 25 """ 26 pass
27 28 ######################################################### 29 # XML generating classes 30 # These classes are used to generate XML documents, similar to DOM. But, this 31 # interface is simpler and more Python-ic. 32 ######################################################### 33 34 # plain text data to be added to a GenericNode. 35 # this class needs to emulate much of the ElementNode interface.
36 -class IndentedText(str):
37 - def __init__(self, data=""):
38 self.data = unescape(unicode(data)) 39 self._level = 0 40 self._parent = None
41 - def set_text(self, data):
42 self.data = unescape(unicode(data))
43 - def get_text(self):
44 return self.data
45 - def insert(self, data):
46 self.data = unescape(unicode(data)) + self.data
47 - def add_text(self,data):
48 self.data = self.data + unescape(unicode(data))
49 append = add_text
50 - def __str__(self):
51 return "%s%s" % ("\t"*self._level, escape(self.data))
52 - def __unicode__(self):
53 return u"%s%s" % ("\t"*self._level, escape(self.data))
54 - def __repr__(self):
55 return "%s(%r)" % (self.__class__.__name__, escape(self.data))
56 - def set_level(self, level):
57 self._level = level
58 # def __len__(self): 59 # return len(self.data) 60 # def __getslice__(self, start, end): 61 # return self.data[start:end] 62 # def __setslice__(self, start, end, v): 63 # self.data[start:end] = v 64 # def __delslice__(self, start, end): 65 # del self.data[start:end]
66 - def get_escape_length(self):
67 return len(escape(self.data))
68 - def destroy(self):
69 self.data = None 70 self._parent = None
71 - def fullpath(self):
72 if self._parent: 73 return "%s = %r" % (self._parent.fullpath(), self.data) 74 else: 75 return `self.data`
76 - def matchpath(self, pe):
77 return 0
78 - def has_children(self):
79 return 0
80 - def has_attributes(self):
81 return 0
82 - def full_repr(self):
83 return repr(self)
84 85
86 -class Text(IndentedText):
87 - def __str__(self):
88 return escape(self.data)
89 - def __unicode__(self):
90 return escape(self.data)
91
92 -class Comment(IndentedText):
93 - def __init__(self, data=""):
94 self.data = unicode(data) 95 self._level = 0 96 self._parent = None
97 - def __str__(self):
98 return "%s<!-- %s -->" % ("\t"*self._level, self._fix(self.data))
99 - def __unicode__(self):
100 return u"%s<!-- %s -->" % ("\t"*self._level, self._fix(self.data))
101 - def set_text(self, data):
102 self.data = unicode(data)
103 - def get_text(self):
104 return self.data
105 - def insert(self, data):
106 self.data = unicode(data) + self.data
107 - def add_text(self,data):
108 self.data = self.data + unicode(data)
109 append = add_text 110
111 - def _fix(self, data):
112 if data.find(u"--") != -1: 113 data = data.replace(u"--", u"- ") 114 return data
115 116 117 118 # abstract base class for generic XML node generation. 119 # Create an XML node by subclassing this and defining allowed attribute names 120 # in ATTLIST. CONTENTMODEL holds the content specification from the DTD. 121 # Then name of the subclass should exactly match the name of the XML element. 122
123 -class ElementNode:
124 ATTLIST = None 125 CONTENTMODEL = None 126 _acquired = { "_indented":1, "_namespace":None } # default acquired values
127 - def __init__(self, **attribs):
128 self._attribs = {} 129 for key, value in attribs.items(): 130 if self._validate_attribute(key): 131 self._attribs[key] = value 132 else: 133 raise ValidationError, "invalid attribute name for this element" 134 self._children = [] 135 self._parent = None 136 self._level = 0 137 self._inline = 0 138 #self._indented = 1 # may be acquired. 139 #self._namespace = None # may be acquired. 140 # you can force element names to a particular case, regardless of 141 # subclass name. This is sometimes needed overcome clashes with Python 142 # keyword names. 143 self._name = self.__class__.__name__
144 145 # check if attribute name is defined for this element
146 - def _validate_attribute(self, name):
147 if self.ATTLIST: 148 for xmlattr in self.ATTLIST: 149 if name == xmlattr.name: 150 return True 151 return False
152
153 - def _verify_attributes(self):
154 if not self.ATTLIST: 155 return None 156 for attr in self.ATTLIST: 157 aval = self._attribs.get(attr.name, None) 158 if aval is None: 159 if attr.a_decl == REQUIRED: 160 raise ValidationError, "required attribute not present: " + attr.name 161 else: 162 attr.verify(aval)
163 164
165 - def get_parent(self):
166 return self._parent
167
168 - def reparent(self, newparent):
169 if self._parent: 170 i = self._parent.index(self) 171 del self._parent[i] 172 newparent.append(self)
173
174 - def detach(self):
175 self._parent = None 176 self._level = 0
177
178 - def destroy(self):
179 """destroy() Remove this node and all child node references.""" 180 # remove parent _children list reference 181 if self._parent: 182 i = self._parent.index(self) 183 del self._parent[i] 184 self._parent = None 185 for n in self._children: 186 n.detach() 187 self._children = None
188
189 - def set_level(self, level):
190 self._level = int(level)
191
192 - def set_inline(self, tf=1):
193 self._inline = not not tf # force to boolean
194
195 - def set_indented(self, tf=1):
196 self._indented = not not tf # force to boolean
197
198 - def inherit_indent(self):
199 "clears indentation flag so that it may be acquired from parent." 200 try: 201 del self.__dict__["_indented"] 202 except KeyError: 203 pass
204
205 - def set_namespace(self, ns):
206 self._namespace = ns
207 208 # some ugly stuff for case-insensitive XHTML
209 - def use_lowercase(self):
210 self._name = self.__class__.__name__.lower()
211
212 - def use_uppercase(self):
213 self._name = self.__class__.__name__.upper()
214
215 - def use_truecase(self):
216 self._name = self.__class__.__name__
217
218 - def index(self, obj):
219 objid = id(obj) 220 i = 0 221 for o in self._children: 222 if id(o) == objid: 223 return i 224 i += 1 225 raise ValueError, "ElementNode: Object not contained here."
226
227 - def append(self, obj):
228 obj.set_level(self._level+1) 229 obj._parent = self 230 self._children.append(obj)
231
232 - def extend(self, objlist):
233 for obj in objlist: 234 self.append(obj)
235
236 - def insert(self, index, obj):
237 obj.set_level(self._level+1) 238 obj._parent = self 239 self._children.insert(index, obj)
240
241 - def add(self, klass, **kwargs):
242 obj = klass( *(), **kwargs) 243 self.append(obj) 244 return obj
245
246 - def get_children(self):
247 return self._children[:]
248
249 - def __iter__(self):
250 return iter(self._children)
251
252 - def add_text(self, text):
253 "Adding text to elements is so common, there is a special method for it." 254 if self.has_children() and isinstance(self._children[-1], IndentedText): 255 self._children[-1].add_text(text) 256 else: 257 t = Text(text) 258 t.set_level(0) 259 self.append(t)
260
261 - def replace_text(self, text):
262 if self._children: 263 del self._children[-1] 264 self.append(Text(text))
265
266 - def __len__(self):
267 return len(self._children)
268 269 # The truth is, we exist.
270 - def __nonzero__(self):
271 return 1
272
273 - def hasAttributes(self):
274 return len(self._attribs)
275 has_attributes = hasAttributes 276
277 - def has_attribute(self, name):
278 if name in self._attribs.keys(): 279 return 1 280 else: 281 return 0
282
283 - def attributes(self):
284 return map(lambda o: o.name, self.ATTLIST)
285
286 - def has_children(self):
287 return len(self._children)
288
289 - def set_attribute(self, name, val):
290 """set_attribute(name, value) 291 This exists to set attributes that have names with illegal Python 292 identifier characters. 293 294 """ 295 if self._validate_attribute(name): 296 self._attribs[name] = val
297
298 - def get_attribute(self, name):
299 """get_attribute(name) 300 This exists to set attributes that have names with illegal Python 301 identifier characters. 302 303 """ 304 return self._attribs[name]
305
306 - def __setattr__(self, name, value):
307 if self._validate_attribute(name): 308 self._attribs[name] = value 309 else: 310 self.__dict__[name] = value
311 312 # this plus the _parent and _acquired attributes implement "acquisiton", 313 # or run-time inheritance.
314 - def __getattr__(self, name):
315 try: 316 return self._attribs[name] 317 except KeyError: 318 pass 319 try: 320 return self._acquire(name) 321 except: 322 pass 323 raise AttributeError, "AttributeError: %s has no attribute '%s'" % (self._name, name)
324
325 - def _acquire(self, name):
326 if self._parent: 327 try: 328 return self._parent.__dict__[name] 329 except KeyError: 330 pass 331 return self._parent._acquire(name) 332 else: 333 try: 334 return self._acquired[name] 335 except KeyError: 336 pass 337 raise AttributeError
338
339 - def __delattr__(self, name):
340 del self._attribs[name]
341
342 - def _find_index(self, index):
343 if type(index) is str: 344 for i in xrange(len(self._children)): 345 if self._children[i].matchpath(index): 346 return i 347 raise IndexError, "no elements match" 348 else: 349 return index
350
351 - def __getitem__(self, index):
352 if type(index) is str: 353 el = self.get_element(index) 354 if el is None: 355 raise IndexError, "no item matches" 356 else: 357 return el 358 else: 359 return self._children[index]
360
361 - def get(self, index, default = None):
362 if isinstance(index, str): 363 el = self.get_element(index) 364 if el is None: 365 return default 366 return el 367 return self._children[index]
368
369 - def has_key(self, index):
370 if isinstance(index, str): 371 return self.get_element(index) is not None 372 raise TypeError("Can only use has_key on a string")
373 374
375 - def __setitem__(self, index, obj):
376 index = self._find_index(index) 377 obj.set_level(self._level+1) 378 obj._parent = self 379 self._children[index] = obj
380
381 - def __delitem__(self, index):
382 index = self._find_index(index) 383 # self._children[index].destroy() 384 del self._children[index]
385
386 - def __repr__(self):
387 attrs = map(lambda t: '%s=%r' % t, self._attribs.items()) 388 return "%s(%s)" % (self.__class__, ", ".join(attrs))
389
390 - def __str__(self):
391 self._verify_attributes() 392 if not self.CONTENTMODEL or self.CONTENTMODEL.is_empty(): 393 return self._empty_str() 394 else: 395 return self._non_empty_str()
396
397 - def __unicode__(self):
398 self._verify_attributes() 399 if not self.CONTENTMODEL or self.CONTENTMODEL.is_empty(): 400 return self._empty_unistr() 401 else: 402 return self._non_empty_unistr()
403
404 - def full_repr(self):
405 s = ["n%d = %r" % ( self._level, self)] 406 s.append("n%d.set_level(%d)" % (self._level, self._level+1)) 407 for c in self._children: 408 if not c.has_children(): 409 s.append("n%d.append(%r)" % (self._level, c)) 410 else: 411 s.append(c.full_repr()) 412 s.append("n%d.append(n%d)" % (self._level, self._level+1)) 413 s.append("del n%d" % (self._level+1)) 414 return "\n".join(s)
415
416 - def _tabs(self):
417 return "\t"*(self._level*self._indented)
418
419 - def _get_ns(self):
420 return IF(self._namespace, "%s:" % self._namespace, "")
421
422 - def _non_empty_str(self):
423 s = ["%s<%s%s%s>" % (self._tabs(), self._get_ns(), self._name, self._attr_str())] 424 map(s.append, map(str, self._children)) 425 s.append("%s</%s%s>" % (IF(self._inline, "", self._tabs()), self._get_ns(), self._name)) 426 if self._inline: 427 return "".join(s) 428 else: 429 return "\n".join(s)
430
431 - def _empty_str(self):
432 return "%s<%s%s%s />" % (self._tabs(), self._get_ns(), self._name, self._attr_str())
433
434 - def _attr_str(self):
435 attrs = map(lambda t: ' %s="%s"' % t, map(lambda t: (t[0], escape(str(t[1]))), filter(lambda t: t[1] is not None, self._attribs.items()))) 436 return "".join(attrs)
437
438 - def _non_empty_unistr(self):
439 s = [u"%s<%s%s%s>" % (self._tabs(), self._get_ns(), self._name, self._attr_unistr())] 440 map(s.append, map(unicode, self._children)) 441 s.append(u"%s</%s%s>" % (IF(self._inline, "", self._tabs()), self._get_ns(), self._name)) 442 if self._inline: 443 return u"".join(s) 444 else: 445 return u"\n".join(s)
446
447 - def _empty_unistr(self):
448 return u"%s<%s%s%s />" % (self._tabs(), self._get_ns(), self._name, self._attr_unistr())
449
450 - def _attr_unistr(self):
451 attrs = map(lambda t: u' %s="%s"' % t, map(lambda t: (t[0], escape(unicode(t[1]))), filter(lambda t: t[1] is not None, self._attribs.items()))) 452 return u"".join(attrs)
453 454 # methods for node path manipulation
455 - def pathname(self):
456 """pathname() returns the ElementNode as a string in xpath format.""" 457 if self._attribs: 458 s = map(lambda i: "@%s='%s'" % (i[0],i[1]), self._attribs.items()) 459 return "%s[%s]" % (self.__class__.__name__, " and ".join(s)) 460 else: 461 return self.__class__.__name__
462
463 - def fullpath(self):
464 """fullpath() returns the ElementNode's full path as a string in xpath format.""" 465 if self._parent: 466 base = self._parent.fullpath() 467 else: 468 base = "" 469 return "%s/%s" % (base, self.pathname() )
470
471 - def matchpath(self, pathelement):
472 if "[" not in pathelement: 473 return pathelement == self._name 474 else: 475 xpath_re = re.compile(r'(\w*)(\[.*])') 476 mo = xpath_re.match(pathelement) 477 if mo: 478 name, match = mo.groups() 479 match = match.replace("@", "self.") 480 match = match.replace("=", "==") 481 return (name == self._name and eval(match[1:-1])) 482 else: 483 raise ValueError, "ivalid path element"
484
485 - def find_elements(self, pathelement):
486 rv = [] 487 for child in self._children: 488 if child.matchpath(pathelement): 489 rv.append(child) 490 return rv
491
492 - def get_element(self, pathelement):
493 for child in self._children: 494 if child.matchpath(pathelement): 495 return child 496 return None
497
498 - def _find_node(self, eltype, collect=None):
499 if collect is None: 500 collection = [] 501 else: 502 collection = collect # should be a list 503 for el in self._children: 504 if el.has_children(): 505 el._find_node(eltype, collection) 506 if isinstance(el, eltype): 507 collection.append(el) 508 return collection
509
510 - def find(self, elclass, **attribs):
511 for obj in self._children: 512 if isinstance(obj, elclass): 513 if self._attribs_match(obj, attribs): 514 return obj 515 return None
516
517 - def getall(self, elclass, depth=0, collect=None):
518 if collect is None: 519 rv = [] 520 else: 521 rv = collect # should be a list 522 for el in self._children: 523 if isinstance(el, elclass): 524 rv.append(el) 525 if depth > 0: 526 el.getall(elclass, depth-1, rv) 527 return rv
528
529 - def _attribs_match(self, obj, attribdict):
530 for tname, tval in attribdict.items(): 531 try: 532 if getattr(obj, tname) != tval: 533 return 0 534 except AttributeError: 535 return 0 536 return 1
537
538 - def tostring(self):
539 return "".join([x.get_text() for x in self.text()])
540 541 # XPath-like functions
542 - def comment(self):
543 return self._find_node(Comment)
544
545 - def text(self):
546 return self._find_node(IndentedText)
547
548 - def processing_instruction(self):
549 return self._find_node(ProcessingInstruction)
550
551 - def node(self):
552 return self._find_node(ElementNode)
553 554 555
556 -class Fragments(ElementNode):
557 """Fragments is a special holder class to hold 'loose' markup fragments. 558 That is, bits of markup that don't have a common container. It is invisible.""" 559
560 - def __str__(self):
561 s = [] 562 map(s.append, map(str, self._children)) 563 if self._inline: 564 return "".join(s) 565 else: 566 return "\n".join(s)
567
568 - def __unicode__(self):
569 s = [] 570 map(s.append, map(str, self._children)) 571 if self._inline: 572 return u"".join(s) 573 else: 574 return u"\n".join(s)
575 576 577 578 # base class for whole POM documents, including Header.
579 -class POMDocument:
580 HEADER = '<?xml version="1.0" encoding="iso-8859-1"?>\n'
581 - def __init__(self, dtd=None):
582 self.dtd = dtd 583 self.root = None 584 self.parser = None 585 self.dirty = 0
586
587 - def __str__(self):
588 return self.HEADER + str(self.root) + "\n"
589
590 - def __unicode__(self):
591 return self.HEADER + unicode(self.root) + "\n"
592
593 - def set_dirty(self, val=1):
594 self.dirty = val
595
596 - def get_parser(self, handlerclass=None, module=None):
597 mod = module or self.dtd 598 self.parser = get_parser(handlerclass, self._callback, mod) 599 return self.parser
600
601 - def del_parser(self):
602 self.parser = None
603
604 - def _callback(self, doc):
605 self.root = doc 606 self.dirty = 0
607
608 - def parse(self, url, handlerclass=None, module=None):
609 mod = module or self.dtd 610 if not self.parser: 611 self.get_parser(handlerclass, mod) 612 self.parser.parse(url) 613 self.del_parser()
614
615 - def parseFile(self, fo, handlerclass=None, module=None):
616 mod = module or self.dtd 617 if not self.parser: 618 self.get_parser(handlerclass, mod) 619 self.parser.parseFile(fo) 620 self.del_parser()
621
622 - def write_xmlfile(self, filename=None):
623 filename = filename or self.filename 624 if filename: 625 fo = open(os.path.expanduser(filename), "w") 626 try: 627 fo.write(str(self)) 628 finally: 629 fo.close() 630 self.dirty = 0
631 writefile = write_xmlfile 632
633 - def writefileobject(self, fo):
634 fo.write(str(self))
635
636 - def get_document(self, filename, dtdmodule):
637 self.get_parser(module=dtdmodule) 638 self.parse(filename) 639 self.filename = filename
640
641 - def getnode(self, path):
642 """getnode(path) Returns an ElementNode addressed by the path.""" 643 elements = path.split("/") 644 while not elements[0]: # eat empty first element 645 elements.pop(0) 646 node = self.root 647 pathelement = elements.pop(0) 648 if node.matchpath(pathelement): 649 while elements: 650 pathelement = elements.pop(0) 651 node = node.get_element(pathelement) 652 if node is None: 653 raise IndexError, "path element not found" 654 return node 655 else: 656 raise IndexError, "first path element not found"
657
658 - def setnode(self, path, text):
659 node = self.getnode(path) 660 node.replace_text(text)
661
662 - def delnode(self, path):
663 els = path.split("/") 664 path, endnode = "/".join(els[:-1]), els[-1] 665 node = self.getnode(path) 666 del node[endnode]
667
668 - def addnode(self, basepath, newnode):
669 node = self.getnode(basepath) 670 node.append(newnode)
671
672 - def add_text(self, basepath, text):
673 node = self.getnode(basepath) 674 node.add_text(text)
675
676 - def _write_text(self, fo, node):
677 for n in node: 678 if isinstance(n, IndentedText): 679 fo.write(n.fullpath()) 680 fo.write("\n") 681 else: 682 self._write_text(fo, n)
683
684 - def write_repr(self, fo):
685 realfile = 0 686 if type(fo) is str: 687 fo = open(fo, "w") 688 realfile = 1 689 fo.write(self.root.full_repr()) 690 if realfile: 691 fo.close()
692
693 - def read_repr(self, filename, localdict=None):
694 localdict = localdict or {} 695 execfile(filename, globals(), localdict) 696 self.root = localdict["n0"]
697
698 - def write_paths(self, fileobject):
699 realfile = 0 700 if type(fileobject) is str: 701 fileobject = open(fileobject, "w") 702 realfile = 1 703 self._write_text(fileobject, self.root) 704 if realfile: 705 fileobject.close()
706 707 708 709 # parses XML files into a POM object model. A callback function is then called 710 # with this object model as a paramter.
711 -class ObjectParserHandler:
712 - def __init__(self, callback, module=None):
713 self.stack = [] 714 self.msg = None 715 self.callback = callback # gets called when message fully parsed. The 716 # argument is the toplevel message object. 717 self.modules = [] 718 if module is not None: 719 if type(module) is list: 720 self.modules.extend(module) 721 else: 722 self.modules.append(module)
723
724 - def add_module(self, module):
725 self.modules.append(module)
726
727 - def _get_class(self, name):
728 klass = None 729 for mod in self.modules: 730 try: 731 klass = getattr(mod, name) 732 except AttributeError: 733 continue 734 if klass: 735 return klass 736 raise AttributeError
737 738
739 - def startDocument(self):
740 self.stack = []
741
742 - def endDocument(self):
743 if self.stack: # stack should be empty now 744 raise ValidationError, "unbalanced document!" 745 self.callback(self.msg) 746 self.msg = None
747
748 - def startElement(self, name, atts):
749 "Handle an event for the beginning of an element." 750 try: 751 klass = self._get_class(name) 752 except AttributeError: 753 raise ValidationError, "Undefined element tag: "+name 754 attr = {} # atts is a instance with unicode keys.. must convert to str.. 755 def fixatts(t): 756 attr[str(t[0])] = unescape(str(t[1]))
757 map(fixatts, atts.items()) 758 obj = klass( *(), **attr) 759 obj.set_level(len(self.stack)) 760 self.stack.append(obj)
761
762 - def endElement(self, name):
763 "Handle an event for the end of an element." 764 obj = self.stack.pop() 765 if self.stack: 766 self.stack[-1].append(obj) 767 else: 768 self.msg = obj
769
770 - def characters(self, text):
771 if self.stack: 772 text = text.strip() 773 if text: 774 self.stack[-1].append(Text(text))
775
776 - def ignorableWhitespace(self, ch, start, length):
777 pass
778 - def processingInstruction(self, target, data):
779 "Handle a processing instruction event." 780 print "unhandled processing instruction:", target, data
781 - def setDocumentLocator(self, locator):
782 "Receive an object for locating the origin of SAX document events." 783 pass
784 785
786 -def _default_parser_callback(obj):
787 print obj
788
789 -def get_parser(handlerclass=None, callback=None, module=None):
790 import xml.sax 791 hc = handlerclass or ObjectParserHandler 792 cb = callback or _default_parser_callback 793 mod = module or sys.modules[__name__] 794 handler = hc(cb, mod) 795 parser = xml.sax.make_parser() 796 parser.setContentHandler(handler) 797 return parser
798 799 #from xml.parsers.xmlproc.xmlapp import DTDConsumer
800 -def get_dtd_compiler(fo, mixinmodule=None, toupper=0):
801 global sourcegen 802 import sourcegen 803 from xml.parsers.xmlproc.dtdparser import DTDParser 804 generator = sourcegen.get_sourcefile(fo) 805 dh = DTDConsumerForSourceGeneration(generator, mixinmodule, toupper) 806 parser = DTDParser() 807 parser.set_dtd_consumer(dh) 808 return parser
809 810 811 812 # xml helper classes, used in both generation and operation 813 # The are instantiated during compilation to generate themselves. 814 # Then, when imported by the user from the dtds package, are used normally.
815 -class ContentModel:
816 """Represents and validates a content model. 817 818 """
819 - def __init__(self, rawmodel=None):
820 self.model = rawmodel # XXX
821
822 - def __repr__(self):
823 return "%s(%r)" % (self.__class__, self.model)
824
825 - def is_empty(self):
826 return not self.model
827 828
829 -class _ContentModelGenerator:
830 """_ContentModelGenerator(rawmodel) 831 The DTD parser generated and final content model are so different that a 832 different content model generator is used for this object. 833 834 """
835 - def __init__(self, rawmodel=None):
836 tm_type = type(rawmodel) 837 if tm_type is str: 838 if rawmodel == "EMPTY": 839 self.model = EMPTY 840 elif rawmodel == "#PCDATA": 841 self.model = PCDATA 842 elif rawmodel == "ANY": 843 self.model = ANY 844 else: 845 raise ValidationError, "ContentModelGenerator: unknown special type" 846 elif tm_type is tuple: 847 self.model = rawmodel # XXX 848 elif tm_type is type(None): 849 self.model = None 850 else: 851 raise RuntimeError, "unknown content model format"
852
853 - def __repr__(self):
854 return "%s(%r)" % (ContentModel, self.model)
855 856
857 -class Enumeration(list):
858 pass
859 # XXX 860
861 -class AttributeList(list):
862 - def __repr__(self):
863 return "%s(%r)" % (self.__class__, self.data)
864 - def __str__(self):
865 return " ".join(map(str, self.data))
866 - def __unicode__(self):
867 return u" ".join(map(str, self.data))
868
869 -class _AttributeType(str):
870 - def __repr__(self):
871 return "%s('%s')" % (self.__class__.__name__, self)
872
873 -class IDREFS(AttributeList):
874 - def add_ref(self, value):
875 self.data.append(IDREF(value))
876
877 -class ENTITIES(AttributeList):
878 pass
879 -class NMTOKENS(AttributeList):
880 pass
881
882 -class CDATA(_AttributeType):
883 pass
884 -class ID(_AttributeType):
885 pass
886 -class IDREF(_AttributeType):
887 pass
888 -class NMTOKEN(_AttributeType):
889 pass
890 -class ENTITY(_AttributeType):
891 pass
892 893 894 PCDATA = Text 895 ANY = True 896 EMPTY = None 897 898 # enumerations 899 AT_CDATA = 1 900 AT_ID = 2 901 AT_IDREF = 3 902 AT_IDREFS = 4 903 AT_ENTITY = 5 904 AT_ENTITIES = 6 905 AT_NMTOKEN = 7 906 AT_NMTOKENS = 8 907 908 REQUIRED = 11 909 IMPLIED = 12 910 DEFAULT = 13 911 FIXED = 14 912 913 _ATTRTYPEMAP = { 914 "CDATA": AT_CDATA, 915 "ID": AT_ID, 916 "IDREF": AT_IDREF, 917 "IDREFS": AT_IDREFS, 918 "ENTITY": AT_ENTITY, 919 "ENTITIES": AT_ENTITIES, 920 "NMTOKEN": AT_NMTOKEN, 921 "NMTOKENS": AT_NMTOKENS 922 } 923 924 _ATTRCLASSMAP = { 925 AT_CDATA: CDATA, 926 AT_ID: ID, 927 AT_IDREF: IDREF, 928 AT_IDREFS: IDREFS, 929 AT_ENTITY: ENTITY, 930 AT_ENTITIES: ENTITIES, 931 AT_NMTOKEN: NMTOKEN, 932 AT_NMTOKENS: NMTOKENS 933 } 934 935 _DEFAULTMAP = { 936 u'#REQUIRED': REQUIRED, 937 u'#IMPLIED': IMPLIED, 938 u'#DEFAULT': DEFAULT, 939 u'#FIXED': FIXED, 940 } 941
942 -class XMLAttribute:
943 - def __init__(self, name, a_type, a_decl, default=None):
944 self.name = str(name) 945 a_type_type = type(a_type) 946 #a_decl_type = type(a_decl) 947 if a_type_type is unicode: # from the parser 948 self.a_type = _ATTRTYPEMAP.get(str(a_type), a_type) 949 # elif a_type_type is tuple or a_type_type is list: 950 # self.a_type = a_type # XXX 951 elif a_type_type is int: # from the generated file 952 self.a_type = _ATTRCLASSMAP.get(a_type, a_type) 953 elif a_type_type is list: 954 self.a_type = Enumeration(map(str, a_type)) 955 else: 956 self.a_type = a_type 957 # declaration 958 # convert string to int value when generating, just use the int when using. 959 self.a_decl = _DEFAULTMAP.get(a_decl, a_decl) 960 self.default = default 961 # save the type to speed verify 962 self.a_type_type = type(self.a_type)
963
964 - def __repr__(self):
965 return "%s(%r, %r, %r, %r)" % (self.__class__, self.name, self.a_type, self.a_decl, self.default)
966
967 - def verify(self, value):
968 if type(self.a_type) is list: 969 if value not in self.a_type: 970 raise ValidationError, "Enumeration has wrong value. %s is not one of %r." % (value, self.a_type)
971 972 973 974 975 976 # this DTD parser consumer generates the Python source code from the DTD.
977 -class DTDConsumerForSourceGeneration:
978 - def __init__(self, generator, mixins=None, toupper=0):
979 self.generator = generator 980 self.elements = {} 981 self.parameter_entities = {} 982 self.general_entities = {} 983 self.toupper = toupper # should element names be converted to all caps? 984 self.mixins = mixins # should be a module object
985
986 - def dtd_start(self):
987 print "Starting to parse DTD...", 988 self.generator.add_comment("This file generated by a program. do not edit.") 989 self.generator.add_import(sys.modules[__name__]) 990 if self.mixins: 991 self.generator.add_import(self.mixins)
992
993 - def dtd_end(self):
994 print "done parsing. Writing file." 995 self.generator.write()
996
997 - def new_element_type(self, elem_name, elem_cont):
998 "Receives the declaration of an element type." 999 try: 1000 element = self.elements[elem_name] 1001 except KeyError: 1002 parents = [ElementNode] 1003 mixinname = "%sMixin" % ( elem_name ) 1004 if self.mixins and hasattr(self.mixins, mixinname): 1005 parents.insert(0, getattr(self.mixins, mixinname)) 1006 ch = self.generator.add_class(IF(self.toupper, elem_name.upper(), elem_name), tuple(parents)) 1007 ch.add_attribute("CONTENTMODEL", _ContentModelGenerator(elem_cont)) 1008 self.elements[elem_name] = ch
1009
1010 - def new_attribute(self, elem, attr, a_type, a_decl, a_def):
1011 "Receives the declaration of a new attribute." 1012 try: 1013 element = self.elements[elem] 1014 except KeyError: 1015 raise ValidationError, "attribute defined before element!" 1016 try: 1017 attlist = element.get_attribute("ATTLIST") 1018 except KeyError: 1019 element.add_attribute("ATTLIST", AttributeList()) 1020 attlist = element.get_attribute("ATTLIST") 1021 attlist.append(XMLAttribute(attr, a_type, a_decl, a_def))
1022
1023 - def handle_comment(self, contents):
1024 "Receives the contents of a comment." 1025 self.generator.add_comment(contents)
1026
1027 - def new_parameter_entity(self,name,val):
1028 "Receives internal parameter entity declarations." 1029 # these are handled internally by the DTD parser. but.. save it anyway. 1030 self.parameter_entities[name] = val
1031
1032 - def new_external_pe(self, name, pubid, sysid):
1033 "Receives external parameter entity declarations."
1034 # these are handled internally by the DTD parser. 1035
1036 - def new_general_entity(self,name,val):
1037 "Receives internal general entity declarations." 1038 self.general_entities[name] = val
1039 # XXX do we need to handle this? 1040 #print "XXX general entity:" 1041 #print name, val 1042
1043 - def new_external_entity(self, ent_name, pub_id, sys_id, ndata):
1044 """Receives external general entity declarations. 'ndata' is the 1045 empty string if the entity is parsed.""" 1046 # XXX do we need to handle this? 1047 print "XXX external entity:" 1048 print ent_name, pub_id, sys_id, ndata
1049
1050 - def new_notation(self,name,pubid,sysid):
1051 "Receives notation declarations." 1052 # XXX do we need to handle this? 1053 print "XXX unhandled notation:", 1054 print name, pubid, sysid
1055
1056 - def handle_pi(self, target, data):
1057 "Receives the target and data of processing instructions." 1058 # XXX do we need to handle this? 1059 print "XXX unhandled PI:", 1060 print target, data
1061 1062 ######################################################### 1063 # Utility functions 1064 ######################################################### 1065
1066 -def IF(test, tv, fv=None):
1067 if test: 1068 return tv 1069 else: 1070 return fv
1071
1072 -def get_mod_file(sourcefilename):
1073 """get_mod_file(sourcefilename) 1074 Converts a file name into a file name inside the dtds package. This file 1075 name is the destination for generated python files. 1076 """ 1077 import DTDs as dtds 1078 from string import maketrans 1079 modname = os.path.splitext(os.path.split(sourcefilename)[1])[0] 1080 return os.path.join(dtds.__path__[0], modname.translate(maketrans("-.", "__"))+".py")
1081 1082
1083 -def _find_element(elname, modules):
1084 for mod in modules: 1085 try: 1086 return getattr(mod, elname) 1087 except AttributeError: 1088 continue 1089 return None
1090
1091 -def _construct_node(name, modules):
1092 if "[" not in name: 1093 nc = _find_element(name, modules) 1094 if nc is None: 1095 raise ValidationError, "no such element name in modules" 1096 return nc() # node 1097 else: 1098 xpath_re = re.compile(r'(\w*)(\[.*])') 1099 mo = xpath_re.match(name) 1100 if mo: 1101 attdict = {} 1102 ename, attribs = mo.groups() 1103 nc = _find_element(ename, modules) 1104 if nc is None: 1105 raise ValidationError, "no such element name in modules" 1106 attribs = attribs[1:-1].split("and") # chop brackets and split on 'and' 1107 attribs = map(string.strip, attribs) # strip whitespace 1108 for att in attribs: # dict elememnts are name and vaue 1109 name, val = att.split("=") 1110 attdict[name[1:]] = val[1:-1] 1111 return nc( *(), **attdict)
1112 1113 1114
1115 -def make_node(path, modules, value=None):
1116 """make_Node(path, modules, [value]) 1117 Makes a node or an XML fragment given a path, element module list, and an 1118 optional value. 1119 """ 1120 if type(modules) is not list: 1121 modules = [modules] 1122 pathelements = path.split("/") 1123 if not pathelements[0]: # delete possible empty root node 1124 del pathelements[0] 1125 rootnode = current = _construct_node(pathelements[0], modules) 1126 for element in pathelements[1:]: 1127 new = _construct_node(element, modules) 1128 current.append(new) 1129 current = new 1130 current.set_inline() 1131 if value is not None: 1132 current.add_text(value) 1133 return rootnode
1134 1135
1136 -def unescape(s):
1137 if '&' not in s: 1138 return s 1139 s = s.replace("&lt;", "<") 1140 s = s.replace("&gt;", ">") 1141 # s = s.replace("&apos;", "'") 1142 s = s.replace("&quot;", '"') 1143 s = s.replace("&amp;", "&") # Must be last 1144 return s
1145
1146 -def escape(s):
1147 s = s.replace("&", "&amp;") # Must be first 1148 s = s.replace("<", "&lt;") 1149 s = s.replace(">", "&gt;") 1150 # s = s.replace("'", "&apos;") 1151 s = s.replace('"', "&quot;") 1152 return s
1153 1154 # self test 1155 if __name__ == "__main__": 1156 import os 1157 FILE = os.path.join(os.environ["PAF_HOME"], "etc", "dtd", "WCSinvalidation.dtd") 1158 outfilename = get_mod_file(FILE) 1159 argc = len(sys.argv) 1160 # outfile = open(outfilename, "w") 1161 1162 # note: running this script as __main__ will not generate valid source code. 1163 # Use the dtd2py script for that. 1164 dtdp = get_dtd_compiler(sys.stdout) 1165 dtdp.parse_resource(FILE) 1166 # outfile.close() 1167 print Comment("some ------- comment-") 1168 print "+++++++" 1169 import dtds.pvsystem as pvs 1170 n = make_node("/pvsystem[@major='2' and @dot='0' and @minor='0']/pvac/httpOwsPort", pvs, 8080) 1171 print n 1172 print "+++++++" 1173 print make_node('/pvsystem[@major="2" and @minor="0" and @dot="0"]/globals/enableMonitor', pvs, "true") 1174 print "+++++++" 1175 print make_node('globals/enableMonitor', pvs, "true") 1176 print "+++++++" 1177 print make_node('enableMonitor', pvs, "true") 1178 print "+++++++" 1179 print make_node('enableMonitor', pvs) 1180