# Copyright (c) 1999-2007 by Fredrik Lundh# 2008 Bastian Blank <bblank@thinkmo.de>## By obtaining, using, and/or copying this software and/or its# associated documentation, you agree that you have read, understood,# and will comply with the following terms and conditions:## Permission to use, copy, modify, and distribute this software and# its associated documentation for any purpose and without fee is# hereby granted, provided that the above copyright notice appears in# all copies, and that both that copyright notice and this permission# notice appear in supporting documentation, and that the name of# Secret Labs AB or the author not be used in advertising or publicity# pertaining to distribution of the software without specific, written# prior permission.## SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE# OF THIS SOFTWARE.from__future__importgenerators__all__=[# public symbols"Comment","dump","Element","ElementTree","fromstring","fromstringlist","iterparse","Node","parse","ParseError","PI","ProcessingInstruction","QName","SubElement","tostring","tostringlist","TreeBuilder","XML","XMLParser","XMLWriter",]### The <b>Element</b> type is a flexible container object, designed to# store hierarchical data structures in memory. The type can be# described as a cross between a list and a dictionary.# <p># Each element has a number of properties associated with it:# <ul># <li>a <i>tag</i>. This is a string identifying what kind of data# this element represents (the element type, in other words).</li># <li>a number of <i>attributes</i>, stored in a Python dictionary.</li># <li>a <i>text</i> string.</li># <li>an optional <i>tail</i> string.</li># <li>a number of <i>child elements</i>, stored in a Python sequence</li># </ul>## To create an element instance, use the {@link #Element} constructor# or the {@link #SubElement} factory function.# <p># The {@link #ElementTree} class can be used to wrap an element# structure, and convert it from and to XML.##importElementPathclassParseError(SyntaxError):pass# --------------------------------------------------------------------classNode(object):""" Node class. """defwrite(self,write,encoding=None,namespaces={},method=None):ifnotmethodormethod=="xml":Writer=XMLWriterelifmethod=="html":Writer=HTMLWriterelse:Writer=TextWriterWriter(encoding,namespaces).write(write,self)### Element class. This class defines the Element interface, and# provides a reference implementation of this interface.# <p># The element name, attribute names, and attribute values can be# either 8-bit ASCII strings or Unicode strings.## @param tag The element name.# @param attrib An optional dictionary, containing element attributes.# @param **extra Additional attributes, given as keyword arguments.# @see Element# @see SubElement# @see Comment# @see ProcessingInstructionclassElement(Node):# <tag attrib>text<child/>...</tag>tail### (Attribute) Element tag.tag=None### (Attribute) Element attribute dictionary. Where possible, use# {@link #Element.get},# {@link #Element.set},# {@link #Element.keys}, and# {@link #Element.items} to access# element attributes.attrib=None### (Attribute) Text before first subelement. This is either a# string or the value None, if there was no text.@propertydeftext(self):iflen(self)andisinstance(self[0],basestring):returnself[0]### (Attribute) Text after this element's end tag, but before the# next sibling element's start tag. This is either a string or# the value None, if there was no text.@propertydeftail(self):raiseRuntimeError('The tail argument is not supported')def__init__(self,tag,attrib=None,children=(),**extra):ifattrib:ifisinstance(attrib,dict):attrib=attrib.copy()else:raiseTypeError('attrib')else:attrib={}attrib.update(extra)self.tag=tagself.attrib=attribself._children=[self._check_node(i)foriinchildren]def__repr__(self):return"<Element %s at %x>"%(repr(self.tag),id(self))### Returns the number of subelements.## @return The number of subelements.def__len__(self):returnlen(self._children)def__nonzero__(self):returnTrue### Returns the given subelement.## @param index What subelement to return.# @return The given subelement.# @exception IndexError If the given element does not exist.def__getitem__(self,index):returnself._children.__getitem__(index)### Replaces the given subelement.## @param index What subelement to replace.# @param element The new element value.# @exception IndexError If the given element does not exist.# @exception AssertionError If element is not a valid object.def__setitem__(self,index,element):ifisinstance(index,slice):element=[self._check_node(i)foriinelement]else:element=self._check_node(element)self._children.__setitem__(index,element)### Deletes the given subelement.## @param index What subelement to delete.# @exception IndexError If the given element does not exist.def__delitem__(self,index):self._children.__delitem__(index)@staticmethoddef_check_node(node):ifisinstance(node,(Node,unicode)):returnnodeifisinstance(node,str):returnunicode(node)raiseTypeError### Adds a subelement to the end of this element.## @param element The element to add.# @exception AssertionError If a sequence member is not a valid object.defappend(self,element):element=self._check_node(element)self._children.append(element)### Appends subelements from a sequence.## @param elements A sequence object with zero or more elements.# @exception AssertionError If a subelement is not a valid object.# @since 1.3defextend(self,elements):elements=[self._check_node(i)foriinelements]self._children.extend(elements)### Inserts a subelement at the given position in this element.## @param index Where to insert the new subelement.# @exception AssertionError If the element is not a valid object.definsert(self,index,element):element=self._check_node(element)self._children.insert(index,element)### Removes a matching subelement. Unlike the <b>find</b> methods,# this method compares elements based on identity, not on tag# value or contents.## @param element What element to remove.# @exception ValueError If a matching element could not be found.# @exception AssertionError If the element is not a valid object.defremove(self,element):self._children.remove(element)### Finds the first matching subelement, by tag name or path.## @param path What element to look for.# @return The first matching element, or None if no element was found.# @defreturn Element or Nonedeffind(self,path):returnElementPath.find(self,path)### Finds text for the first matching subelement, by tag name or path.## @param path What element to look for.# @param default What to return if the element was not found.# @return The text content of the first matching element, or the# default value no element was found. Note that if the element# has is found, but has no text content, this method returns an# empty string.# @defreturn stringdeffindtext(self,path,default=None):returnElementPath.findtext(self,path,default)### Finds all matching subelements, by tag name or path.## @param path What element to look for.# @return A list or iterator containing all matching elements,# in document order.# @defreturn list of Element instancesdeffindall(self,path):returnElementPath.findall(self,path)### Resets an element. This function removes all subelements, clears# all attributes, and sets the text and tail attributes to None.defclear(self):self.attrib.clear()self._children=[]### Gets an element attribute.## @param key What attribute to look for.# @param default What to return if the attribute was not found.# @return The attribute value, or the default value, if the# attribute was not found.defget(self,key,default=None):returnself.attrib.get(key,default)### Sets an element attribute.## @param key What attribute to set.# @param value The attribute value.defset(self,key,value):self.attrib[key]=value### Gets a list of attribute names. The names are returned in an# arbitrary order (just like for an ordinary Python dictionary).## @return A list of element attribute names.# @defreturn list of stringsdefkeys(self):returnself.attrib.keys()### Gets element attributes, as a sequence. The attributes are# returned in an arbitrary order.## @return A list of (name, value) tuples for all attributes.# @defreturn list of (string, string) tuplesdefitems(self):returnself.attrib.items()def__iter__(self):""" Creates a element iterator. The iterator loops over all children. """returnself._children.__iter__()### Creates a tree iterator. The iterator loops over this element# and all subelements, in document order, and returns all elements# with a matching tag.# <p># If the tree structure is modified during iteration, new or removed# elements may or may not be included. To get a stable set, use the# list() function on the iterator, and loop over the resulting list.## @param tag What tags to look for (default is to return all elements).# @return An iterator containing all the matching elements.# @defreturn iteratordefiter(self,tag=None):iftag=="*":tag=NoneiftagisNoneorself.tag==tag:yieldselfforeinself._children:ifisinstance(e,Element):foreine.iter(tag):yieldeelse:yielde### Creates a text iterator. The iterator loops over this element# and all subelements, in document order, and returns all inner# text.## @return An iterator containing all inner text.# @defreturn iteratordefitertext(self):foreinself:ifisinstance(e,Element):forsine.itertext():yieldselifisinstance(e,basestring):yielde### Subelement factory. This function creates an element instance, and# appends it to an existing element.# <p># The element name, attribute names, and attribute values can be# either 8-bit ASCII strings or Unicode strings.## @param parent The parent element.# @param tag The subelement name.# @param attrib An optional dictionary, containing element attributes.# @param **extra Additional attributes, given as keyword arguments.# @return An element instance.# @defreturn ElementdefSubElement(parent,tag,attrib=None,**extra):attrib=attribandattrib.copy()or{}attrib.update(extra)element=parent.makeelement(tag,attrib)parent.append(element)returnelement### Comment element factory. This factory function creates a special# element that will be serialized as an XML comment by the standard# serializer.# <p># The comment string can be either an 8-bit ASCII string or a Unicode# string.## @param text A string containing the comment string.# @return An element instance, representing a comment.# @defreturn ElementclassComment(Node):def__init__(self,text=None):self.text=text### PI element factory. This factory function creates a special element# that will be serialized as an XML processing instruction by the standard# serializer.## @param target A string containing the PI target.# @param text A string containing the PI contents, if any.# @return An element instance, representing a PI.# @defreturn ElementclassProcessingInstruction(Node):def__init__(self,target,text=None):self.target,self.text=target,textPI=ProcessingInstructionclassQName(unicode):""" QName wrapper. This can be used to wrap a QName attribute value, in order to get proper namespace handling on output. @ivar name: local part of the QName @type name: unicode @ivar uri: URI part of the QName @type uri: unicode """__slots__='name','uri'def__new__(cls,name,uri=None):text=name=unicode(name)ifname[0]=='{':ifuriisnotNone:raiseValueErrori=name.find('}')ifi==-1:raiseValueErroruri=name[1:i]name=name[i+1:]ifuriisnotNone:uri=unicode(uri)text='{'+uri+'}'+nameret=unicode.__new__(cls,text)unicode.__setattr__(ret,'name',name)unicode.__setattr__(ret,'uri',uri)returnretdef__getnewargs__(self):returnself.name,self.uridef__getstate__(self):passdef__repr__(self):return'%s(%r, %r)'%(self.__class__.__name__,self.name,self.uri)def__setattr__(self,key,value):raiseAttributeError('read-only')__delattr__=__setattr__# --------------------------------------------------------------------### ElementTree wrapper class. This class represents an entire element# hierarchy, and adds some extra support for serialization to and from# standard XML.## @param element Optional root element.# @keyparam file Optional file handle or file name. If given, the# tree is initialized with the contents of this XML file.classElementTree(object):def__init__(self,element=None,file=None):assertelementisNoneorisinstance(element,Node)self._root=element# first nodeiffile:self.parse(file)### Gets the root element for this tree.## @return An element instance.# @defreturn Elementdefgetroot(self):returnself._root### Loads an external XML document into this element tree.## @param source A file name or file object.# @keyparam parser An optional parser instance. If not given, the# standard {@link XMLParser} parser is used.# @return The document root element.# @defreturn Elementdefparse(self,source,parser=None):ifnothasattr(source,"read"):source=open(source,"rb")ifnotparser:parser=XMLParser(target=TreeBuilder())while1:data=source.read(32768)ifnotdata:breakparser.feed(data)self._root=parser.close()returnself._root### Creates a tree iterator for the root element. The iterator loops# over all elements in this tree, in document order.## @param tag What tags to look for (default is to return all elements)# @return An iterator.# @defreturn iteratordefiter(self,tag=None):assertself._rootisnotNonereturnself._root.iter(tag)getiterator=iter### Finds the first toplevel element with given tag.# Same as getroot().find(path).## @param path What element to look for.# @return The first matching element, or None if no element was found.# @defreturn Element or Nonedeffind(self,path):assertself._rootisnotNoneifpath[:1]=="/":path="."+pathimportwarningswarnings.warn("This search is broken in 1.3 and earlier; if you rely ""on the current behaviour, change it to %r"%path,FutureWarning)returnself._root.find(path)### Finds the element text for the first toplevel element with given# tag. Same as getroot().findtext(path).## @param path What toplevel element to look for.# @param default What to return if the element was not found.# @return The text content of the first matching element, or the# default value no element was found. Note that if the element# has is found, but has no text content, this method returns an# empty string.# @defreturn stringdeffindtext(self,path,default=None):assertself._rootisnotNoneifpath[:1]=="/":path="."+pathimportwarningswarnings.warn("This search is broken in 1.3 and earlier; if you rely ""on the current behaviour, change it to %r"%path,FutureWarning)returnself._root.findtext(path,default)### Finds all toplevel elements with the given tag.# Same as getroot().findall(path).## @param path What element to look for.# @return A list or iterator containing all matching elements,# in document order.# @defreturn list of Element instancesdeffindall(self,path):assertself._rootisnotNoneifpath[:1]=="/":path="."+pathimportwarningswarnings.warn("This search is broken in 1.3 and earlier; if you rely ""on the current behaviour, change it to %r"%path,FutureWarning)returnself._root.findall(path)### Writes the element tree to a file, as XML.## @param file A file name, or a file object opened for writing.# @keyparam encoding Optional output encoding (default is US-ASCII).# @keyparam method Optional output method ("xml" or "html"; default# is "xml".# @keyparam xml_declaration Controls if an XML declaration should# be added to the file. Use False for never, True for always,# None for only if not US-ASCII or UTF-8. None is default.defwrite(self,file,# keyword argumentsencoding="us-ascii",xml_declaration=None,default_namespace=None,method=None,namespaces={}):assertself._rootisnotNoneifnothasattr(file,"write"):file=open(file,"wb")write=file.writeifnotencoding:encoding="us-ascii"ifdefault_namespace:namespaces=namespaces.copy()namespaces[default_namespace]=''self._root.write(write,encoding=encoding,namespaces=namespaces,method=method)# --------------------------------------------------------------------# serialization support# --------------------------------------------------------------------### Generates a string representation of an XML element, including all# subelements.## @param element An Element instance.# @return An encoded string containing the XML data.# @defreturn stringdeftostring(element,encoding=None,method=None):classdummy:passdata=[]file=dummy()file.write=data.appendElementTree(element).write(file,encoding,method=method)return"".join(data)### Generates a string representation of an XML element, including all# subelements. The string is returned as a sequence of string fragments.## @param element An Element instance.# @return A sequence object containing the XML data.# @defreturn sequence# @since 1.3deftostringlist(element,encoding=None):classdummy:passdata=[]file=dummy()file.write=data.appendElementTree(element).write(file,encoding)# FIXME: merge small fragments into larger partsreturndata### Writes an element tree or element structure to sys.stdout. This# function should be used for debugging only.# <p># The exact output format is implementation dependent. In this# version, it's written as an ordinary XML file.## @param elem An element tree or an individual element.defdump(elem):# debuggingimportsysifnotisinstance(elem,ElementTree):elem=ElementTree(elem)elem.write(sys.stdout)tail=elem.getroot().tailifnottailortail[-1]!="\n":sys.stdout.write("\n")# --------------------------------------------------------------------# parsing### Parses an XML document into an element tree.## @param source A filename or file object containing XML data.# @param parser An optional parser instance. If not given, the# standard {@link XMLParser} parser is used.# @return An ElementTree instancedefparse(source,parser=None):tree=ElementTree()tree.parse(source,parser)returntree### Parses an XML document into an element tree incrementally, and reports# what's going on to the user.## @param source A filename or file object containing XML data.# @param events A list of events to report back. If omitted, only "end"# events are reported.# @param parser An optional parser instance. If not given, the# standard {@link XMLParser} parser is used.# @return A (event, elem) iterator.defiterparse(source,events=None,parser=None):ifnothasattr(source,"read"):source=open(source,"rb")ifnotparser:parser=XMLParser(target=TreeBuilder())return_IterParseIterator(source,events,parser)class_IterParseIterator(object):def__init__(self,source,events,parser):self._file=sourceself._events=[]self._index=0self.root=self._root=Noneself._parser=parser# wire up the parser for event reportingparser=self._parser._parserappend=self._events.appendifeventsisNone:events=["end"]foreventinevents:ifevent=="start":try:parser.ordered_attributes=1parser.specified_attributes=1defhandler(tag,attrib_in,event=event,append=append,start=self._parser._start_list):append((event,start(tag,attrib_in)))parser.StartElementHandler=handlerexceptAttributeError:defhandler(tag,attrib_in,event=event,append=append,start=self._parser._start):append((event,start(tag,attrib_in)))parser.StartElementHandler=handlerelifevent=="end":defhandler(tag,event=event,append=append,end=self._parser._end):append((event,end(tag)))parser.EndElementHandler=handlerelifevent=="start-ns":defhandler(prefix,uri,event=event,append=append):try:uri=uri.encode("ascii")exceptUnicodeError:passappend((event,(prefixor"",uri)))parser.StartNamespaceDeclHandler=handlerelifevent=="end-ns":defhandler(prefix,event=event,append=append):append((event,None))parser.EndNamespaceDeclHandler=handlerdefnext(self):while1:try:item=self._events[self._index]exceptIndexError:ifself._parserisNone:self.root=self._rootraiseStopIteration# load event bufferdelself._events[:]self._index=0data=self._file.read(16384)ifdata:self._parser.feed(data)else:self._root=self._parser.close()self._parser=Noneelse:self._index=self._index+1returnitemdef__iter__(self):returnself### Parses an XML document from a string constant. This function can# be used to embed "XML literals" in Python code.## @param source A string containing XML data.# @param parser An optional parser instance. If not given, the# standard {@link XMLParser} parser is used.# @return An Element instance.# @defreturn ElementdefXML(text,parser=None):ifnotparser:parser=XMLParser(target=TreeBuilder())parser.feed(text)returnparser.close()### Parses an XML document from a string constant, and also returns# a dictionary which maps from element id:s to elements.## @param source A string containing XML data.# @param parser An optional parser instance. If not given, the# standard {@link XMLParser} parser is used.# @return A tuple containing an Element instance and a dictionary.# @defreturn (Element, dictionary)defXMLID(text,parser=None):ifnotparser:parser=XMLParser(target=TreeBuilder())parser.feed(text)tree=parser.close()ids={}forelemintree.getiterator():id=elem.get("id")ifid:ids[id]=elemreturntree,ids### Parses an XML document from a string constant. Same as {@link #XML}.## @def fromstring(text)# @param source A string containing XML data.# @return An Element instance.# @defreturn Elementfromstring=XML### Parses an XML document from a sequence of string fragments.## @param sequence A list or other sequence containing XML data fragments.# @param parser An optional parser instance. If not given, the# standard {@link XMLParser} parser is used.# @return An Element instance.# @defreturn Element# @since 1.3deffromstringlist(sequence,parser=None):ifnotparser:parser=XMLParser(target=TreeBuilder())fortextinsequence:parser.feed(text)returnparser.close()# --------------------------------------------------------------------### Generic element structure builder. This builder converts a sequence# of {@link #TreeBuilder.start}, {@link #TreeBuilder.data}, and {@link# #TreeBuilder.end} method calls to a well-formed element structure.# <p># You can use this class to build an element structure using a custom XML# parser, or a parser for some other XML-like format.## @param element_factory Optional element factory. This factory# is called to create new Element instances, as necessary.classTreeBuilder(object):def__init__(self,element_factory=None):self._data=[]# data collectorself._elem=[]# element stackself._last=None# last elementifelement_factoryisNone:element_factory=Elementself._factory=element_factory### Flushes the builder buffers, and returns the toplevel document# element.## @return An Element instance.# @defreturn Elementdefclose(self):assertlen(self._elem)==0,"missing end tags"assertself._lastisnotNone,"missing toplevel element"returnself._lastdef_flush(self):ifself._data:text="".join(self._data)self._elem[-1].append(text)self._data=[]### Adds text to the current element.## @param data A string. This should be either an 8-bit string# containing ASCII text, or a Unicode string.defdata(self,data):self._data.append(data)### Opens a new element.## @param tag The element name.# @param attrib A dictionary containing element attributes.# @return The opened element.# @defreturn Elementdefstart(self,tag,attrs):self._flush()self._last=elem=self._factory(tag,attrs)ifself._elem:self._elem[-1].append(elem)self._elem.append(elem)returnelem### Closes the current element.## @param tag The element name.# @return The closed element.# @defreturn Elementdefend(self,tag):self._flush()self._last=self._elem.pop()assertself._last.tag==tag,\
"end tag mismatch (expected %s, got %s)"%(self._last.tag,tag)returnself._last### Element structure builder for XML source data, based on the# <b>expat</b> parser.## @keyparam target Target object. If omitted, the builder uses an# instance of the standard {@link #TreeBuilder} class.# @keyparam html Predefine HTML entities. This flag is not supported# by the current implementation.# @keyparam encoding Optional encoding. If given, the value overrides# the encoding specified in the XML file.# @see #ElementTree# @see #TreeBuilderclassXMLParser(object):def__init__(self,html=0,target=None,encoding=None):try:fromxml.parsersimportexpatexceptImportError:raiseImportError("No module named expat; use SimpleXMLTreeBuilder instead")parser=expat.ParserCreate(encoding,"}")iftargetisNone:target=TreeBuilder()# underscored names are provided for compatibility onlyself.parser=self._parser=parserself.target=self._target=targetself._error=expat.errorself._names={}# name memo cache# callbacksparser.DefaultHandlerExpand=self._defaultparser.StartElementHandler=self._startparser.EndElementHandler=self._endparser.CharacterDataHandler=self._data# let expat do the buffering, if supportedtry:self._parser.buffer_text=1exceptAttributeError:pass# use new-style attribute handling, if supportedtry:self._parser.ordered_attributes=1self._parser.specified_attributes=1parser.StartElementHandler=self._start_listexceptAttributeError:passself._doctype=Noneself.entity={}try:self.version="Expat %d.%d.%d"%expat.version_infoexceptAttributeError:pass# unknowndef_raiseerror(self,value):err=ParseError(value)err.code=value.codeerr.position=value.lineno,value.offsetraiseerrdef_fixname(self,key):# expand qname, and convert name string to ascii, if possibleifkeyinself._names:returnself._names[key]if'}'inkey:uri,name=key.split('}',1)name=QName(name,uri)else:name=QName(key)self._names[key]=namereturnnamedef_start(self,tag,attrib_in):fixname=self._fixnametag=fixname(tag)attrib={}forkey,valueinattrib_in.items():attrib[fixname(key)]=valuereturnself.target.start(tag,attrib)def_start_list(self,tag,attrib_in):fixname=self._fixnametag=fixname(tag)attrib={}ifattrib_in:foriinrange(0,len(attrib_in),2):attrib[fixname(attrib_in[i])]=attrib_in[i+1]returnself.target.start(tag,attrib)def_data(self,text):returnself.target.data(text)def_end(self,tag):returnself.target.end(self._fixname(tag))def_default(self,text):prefix=text[:1]ifprefix=="&":# deal with undefined entitiestry:self.target.data(self.entity[text[1:-1]])exceptKeyError:fromxml.parsersimportexpaterr=expat.error("undefined entity %s: line %d, column %d"%(text,self._parser.ErrorLineNumber,self._parser.ErrorColumnNumber))err.code=11# XML_ERROR_UNDEFINED_ENTITYerr.lineno=self._parser.ErrorLineNumbererr.offset=self._parser.ErrorColumnNumberraiseerrelifprefix=="<"andtext[:9]=="<!DOCTYPE":self._doctype=[]# inside a doctype declarationelifself._doctypeisnotNone:# parse doctype contentsifprefix==">":self._doctype=Nonereturntext=text.strip()ifnottext:returnself._doctype.append(text)n=len(self._doctype)ifn>2:type=self._doctype[1]iftype=="PUBLIC"andn==4:name,type,pubid,system=self._doctypeeliftype=="SYSTEM"andn==3:name,type,system=self._doctypepubid=Noneelse:returnifpubid:pubid=pubid[1:-1]ifhasattr(self.target,"doctype"):self.target.doctype(name,pubid,system[1:-1])self._doctype=None### Feeds data to the parser.## @param data Encoded data.deffeed(self,data):try:self._parser.Parse(data,0)exceptself._error,v:self._raiseerror(v)### Finishes feeding data to the parser.## @return An element structure.# @defreturn Elementdefclose(self):try:self._parser.Parse("",1)# end of dataexceptself._error,v:self._raiseerror(v)tree=self.target.close()delself.target,self._parser# get rid of circular referencesreturntreeclassBaseWriter(object):def__init__(self,encoding=None,namespaces={}):self.encoding=encodingself.namespaces=namespacesdef_encode(self,text):ifself.encoding:returntext.encode(self.encoding,"xmlcharrefreplace")returntextdef_escape_cdata(self,text):# escape character data# it's worth avoiding do-nothing calls for strings that are# shorter than 500 character, or so. assume that's, by far,# the most common case in most applications.if"&"intext:text=text.replace("&","&amp;")if"<"intext:text=text.replace("<","&lt;")if">"intext:text=text.replace(">","&gt;")returnself._encode(text)def_escape_attrib(self,text):# escape attribute valueif"\""intext:text=text.replace("\"","&quot;")if"\n"intext:text=text.replace("\n","&#10;")returnself._escape_cdata(text)def_namespaces(self,elem):# identify namespaces used in this tree# maps qnames to *encoded* prefix:local namesqnames={None:None}# maps uri:s to prefixescandidate_namespaces=self._namespace_map.copy()candidate_namespaces={}candidate_namespaces.update(self.namespaces)used_namespaces={}defadd_qname(qname):ifqnameinqnames:return# calculate serialized qname representationtry:ifqname.uriisnotNone:uri=qname.uriprefix=used_namespaces.get(uri,None)ifprefixisNone:prefix=candidate_namespaces.get(uri,None)ifprefixisNone:prefix="ns%d"%len(used_namespaces)ifprefix!="xml":used_namespaces[uri]=prefixifprefix:qnames[qname]="%s:%s"%(prefix,qname.name)else:qnames[qname]=qname.nameelse:# XXX: What happens with undefined namespace?qnames[qname]=qname.nameexceptTypeError:self._raise_serialization_error(qname)# populate qname and namespaces tableifisinstance(elem,Element):foreleminelem.iter():ifisinstance(elem,Element):tag=elem.tagifisinstance(tag,QName):add_qname(tag)elifisinstance(tag,basestring):add_qname(QName(tag))eliftagisnotNone:self._raise_serialization_error(tag)forkeyinelem.keys():ifisinstance(key,QName):add_qname(key)elifisinstance(key,basestring):add_qname(QName(key))elifkeyisnotNone:self._raise_serialization_error(key)returnqnames,used_namespaces@staticmethoddef_raise_serialization_error(text):raiseTypeError("cannot serialize %r (type %s)"%(text,type(text).__name__))### Registers a namespace prefix. The registry is global, and any# existing mapping for either the given prefix or the namespace URI# will be removed.## @param prefix Namespace prefix.# @param uri Namespace uri. Tags and attributes in this namespace# will be serialized with the given prefix, if at all possible.# @raise ValueError If the prefix is reserved, or is otherwise# invalid.@classmethoddefregister_namespace(cls,prefix,uri):importreifre.match("ns\d+$",prefix):raiseValueError("Prefix format reserved for internal use")fork,vincls._namespace_map.items():ifk==uriorv==prefix:del_namespace_map[k]cls._namespace_map[uri]=prefix_namespace_map={# "well-known" namespace prefixes"http://www.w3.org/XML/1998/namespace":"xml","http://www.w3.org/1999/xhtml":"html","http://www.w3.org/1999/02/22-rdf-syntax-ns#":"rdf","http://schemas.xmlsoap.org/wsdl/":"wsdl",# xml schema"http://www.w3.org/2001/XMLSchema":"xs","http://www.w3.org/2001/XMLSchema-instance":"xsi",# dublic core"http://purl.org/dc/elements/1.1/":"dc",}defserialize_start(self,write):passdefwrite(self,write,element):qnames,namespaces=self._namespaces(element)self.serialize_start(write)self.serialize(write,element,qnames,namespaces)classTextWriter(BaseWriter):defserialize(self,write,elem,qnames=None,namespaces=None):forpartinelem.itertext():write(self._encode(part))classXMLWriter(BaseWriter):defserialize(self,write,elem,qnames,namespaces={}):ifisinstance(elem,Element):tag=qnames[elem.tag]iftagisnotNone:write("<"+tag)ifelem.attrib:items=elem.attrib.items()items.sort(key=lambdax:x[0])fork,vinitems:k=qnames[k]ifisinstance(v,QName):v=qnames[v]else:v=self._escape_attrib(unicode(v))write(' '+k+'="'+v+'"')ifnamespaces:items=namespaces.items()items.sort(key=lambdax:x[1])# sort on prefixforv,kinitems:ifk:k=":"+kwrite(" xmlns%s=\"%s\""%(self._encode(k),self._escape_attrib(v)))iflen(elem):write(">")foreinelem:self.serialize(write,e,qnames)write("</"+tag+">")else:write(" />")else:foreinelem:self.serialize(write,e,encoding,qnames)elifisinstance(elem,Comment):write("<!--%s-->"%self._escape_cdata(elem.text))elifisinstance(elem,ProcessingInstruction):text=self._escape_cdata(elem.target)ifelem.textisnotNone:text+=' '+self._escape_cdata(elem.text)write("<?%s?>"%text)else:write(self._escape_cdata(unicode(elem)))defserialize_start(self,write):ifself.encodingandself.encodingnotin("utf-8","us-ascii"):write("<?xml version='1.0' encoding='%s'?>\n"%self.encoding)classHTMLWriter(BaseWriter):empty_elements=frozenset(("area","base","basefont","br","col","frame","hr","img","input","isindex","link","meta""param"))def__init__(self,encoding=None,namespaces={}):namespaces["http://www.w3.org/1999/xhtml"]=''super(HTMLWriter,self).__init__(encoding,namespaces)defserialize(self,write,elem,qnames,namespaces={}):ifisinstance(elem,Element):tag=qnames[elem.tag]iftagisnotNone:write("<"+tag)ifelem.attrib:items=elem.attrib.items()items.sort(key=lambdax:x[0])fork,vinitems:k=qnames[k]ifisinstance(v,QName):v=qnames[v]else:v=self._escape_attrib(unicode(v))# FIXME: handle boolean attributeswrite(' '+k+'="'+v+'"')ifnamespaces:items=namespaces.items()items.sort(key=lambdax:x[1])# sort on prefixforv,kinitems:ifk:k=":"+kwrite(" xmlns%s=\"%s\""%(self._encode(k),self._escape_attrib(v)))write(">")iftag.lower()in('script','style'):write(self._encode(''.join(elem.itertext())))else:foreinelem:self.serialize(write,e,qnames)iftagnotinself.empty_elements:write("</"+tag+">")else:foreinelem:self.serialize(write,e,qnames)elifisinstance(elem,Comment):write("<!--%s-->"%self._escape_cdata(elem.text))elifisinstance(elem,ProcessingInstruction):text=self._escape_cdata(elem.target)ifelem.textisnotNone:text+=' '+self._escape_cdata(elem.text)write("<?%s?>"%text)else:write(self._escape_cdata(elem))