# This module contains abstractions for the input stream. You don't have to# looks further, there are no pretty code.## We define two classes here.## Mark(source, line, column)# It's just a record and its only use is producing nice error messages.# Parser does not use it for any other purposes.## Reader(source, data)# Reader determines the encoding of `data` and converts it to unicode.# Reader provides the following methods and attributes:# reader.peek(length=1) - return the next `length` characters# reader.forward(length=1) - move the current position to `length` characters.# reader.index - the number of the current character.# reader.line, stream.column - the line and the column of the current character.__all__=['Reader','ReaderError']fromerrorimportYAMLError,Markimportcodecs,reclassReaderError(YAMLError):def__init__(self,name,position,character,encoding,reason):self.name=nameself.character=characterself.position=positionself.encoding=encodingself.reason=reasondef__str__(self):ifisinstance(self.character,str):return"'%s' codec can't decode byte #x%02x: %s\n" \
" in \"%s\", position %d" \
%(self.encoding,ord(self.character),self.reason,self.name,self.position)else:return"unacceptable character #x%04x: %s\n" \
" in \"%s\", position %d" \
%(self.character,self.reason,self.name,self.position)classReader(object):# Reader:# - determines the data encoding and converts it to unicode,# - checks if characters are in allowed range,# - adds '\0' to the end.# Reader accepts# - a `str` object,# - a `unicode` object,# - a file-like object with its `read` method returning `str`,# - a file-like object with its `read` method returning `unicode`.# Yeah, it's ugly and slow.def__init__(self,stream):self.name=Noneself.stream=Noneself.stream_pointer=0self.eof=Trueself.buffer=u''self.pointer=0self.raw_buffer=Noneself.raw_decode=Noneself.encoding=Noneself.index=0self.line=0self.column=0ifisinstance(stream,unicode):self.name="<unicode string>"self.check_printable(stream)self.buffer=stream+u'\0'elifisinstance(stream,str):self.name="<string>"self.raw_buffer=streamself.determine_encoding()else:self.stream=streamself.name=getattr(stream,'name',"<file>")self.eof=Falseself.raw_buffer=''self.determine_encoding()defpeek(self,index=0):try:returnself.buffer[self.pointer+index]exceptIndexError:self.update(index+1)returnself.buffer[self.pointer+index]defprefix(self,length=1):ifself.pointer+length>=len(self.buffer):self.update(length)returnself.buffer[self.pointer:self.pointer+length]defforward(self,length=1):ifself.pointer+length+1>=len(self.buffer):self.update(length+1)whilelength:ch=self.buffer[self.pointer]self.pointer+=1self.index+=1ifchinu'\n\x85\u2028\u2029' \
or(ch==u'\r'andself.buffer[self.pointer]!=u'\n'):self.line+=1self.column=0elifch!=u'\uFEFF':self.column+=1length-=1defget_mark(self):ifself.streamisNone:returnMark(self.name,self.index,self.line,self.column,self.buffer,self.pointer)else:returnMark(self.name,self.index,self.line,self.column,None,None)defdetermine_encoding(self):whilenotself.eofandlen(self.raw_buffer)<2:self.update_raw()ifnotisinstance(self.raw_buffer,unicode):ifself.raw_buffer.startswith(codecs.BOM_UTF16_LE):self.raw_decode=codecs.utf_16_le_decodeself.encoding='utf-16-le'elifself.raw_buffer.startswith(codecs.BOM_UTF16_BE):self.raw_decode=codecs.utf_16_be_decodeself.encoding='utf-16-be'else:self.raw_decode=codecs.utf_8_decodeself.encoding='utf-8'self.update(1)NON_PRINTABLE=re.compile(u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]')defcheck_printable(self,data):match=self.NON_PRINTABLE.search(data)ifmatch:character=match.group()position=self.index+(len(self.buffer)-self.pointer)+match.start()raiseReaderError(self.name,position,ord(character),'unicode',"special characters are not allowed")defupdate(self,length):ifself.raw_bufferisNone:returnself.buffer=self.buffer[self.pointer:]self.pointer=0whilelen(self.buffer)<length:ifnotself.eof:self.update_raw()ifself.raw_decodeisnotNone:try:data,converted=self.raw_decode(self.raw_buffer,'strict',self.eof)exceptUnicodeDecodeError,exc:character=exc.object[exc.start]ifself.streamisnotNone:position=self.stream_pointer-len(self.raw_buffer)+exc.startelse:position=exc.startraiseReaderError(self.name,position,character,exc.encoding,exc.reason)else:data=self.raw_bufferconverted=len(data)self.check_printable(data)self.buffer+=dataself.raw_buffer=self.raw_buffer[converted:]ifself.eof:self.buffer+=u'\0'self.raw_buffer=Nonebreakdefupdate_raw(self,size=1024):data=self.stream.read(size)ifdata:self.raw_buffer+=dataself.stream_pointer+=len(data)else:self.eof=True#try:# import psyco# psyco.bind(Reader)#except ImportError:# pass