1# Copyright (C) 2009 by Eric Talevich (eric.talevich@gmail.com) 2# Based on Bio.Nexus, copyright 2005-2008 by Frank Kauff & Cymon J. Cox. 3# All rights reserved. 4# This code is part of the Biopython distribution and governed by its 5# license. Please see the LICENSE file that should have been included 6# as part of this package. 7 8"""I/O function wrappers for the Newick file format. 9 10See: http://evolution.genetics.washington.edu/phylip/newick_doc.html 11""" 12 13__docformat__="restructuredtext en" 14 15importre 16fromBio._py3kimportStringIO 17 18fromBio.PhyloimportNewick

64iftext.isdigit(): 65returnint(text) 66# NB: Could make this more consistent by treating as a percentage 67# return int(text) / 100. 68try: 69returnfloat(text) 70# NB: This should be in [0.0, 1.0], but who knows what people will do 71# assert 0 <= current_clade.confidence <= 1 72exceptValueError: 73returnNone

102"""Parse the text stream this object was initialized with."""103self.values_are_confidence=values_are_confidence104self.comments_are_confidence=comments_are_confidence105self.rooted=rooted106buf=''107unicodeChecked=False108unicodeLines=("\xef","\xff","\xfe","\x00")109forlineinself.handle:110ifnotunicodeChecked:111# check for unicode byte order marks on first line only,112# these lead to parsing errors (on Python 2)113ifline.startswith(unicodeLines):114raiseNewickError("The file or stream you attempted to parse includes "115"unicode byte order marks. You must convert it to "116"ASCII before it can be parsed.")117unicodeChecked=True118buf+=line.rstrip()119ifbuf.endswith(';'):120yieldself._parse_tree(buf)121buf=''122ifbuf:123# Last tree is missing a terminal ';' character -- that's OK124yieldself._parse_tree(buf)

127"""Parses the text representation into an Tree object."""128tokens=re.finditer(tokenizer,text.strip())129130new_clade=self.new_clade131root_clade=new_clade()132133current_clade=root_clade134entering_branch_length=False135136lp_count=0137rp_count=0138formatchintokens:139token=match.group()140141iftoken.startswith("'"):142# quoted label; add characters to clade name143current_clade.name=token[1:-1]144145eliftoken.startswith('['):146# comment147current_clade.comment=token[1:-1]148ifself.comments_are_confidence:149# Try to use this comment as a numeric support value150current_clade.confidence=_parse_confidence(current_clade.comment)151152eliftoken=='(':153# start a new clade, which is a child of the current clade154current_clade=new_clade(current_clade)155entering_branch_length=False156lp_count+=1157158eliftoken==',':159# if the current clade is the root, then the external parentheses160# are missing and a new root should be created161ifcurrent_cladeisroot_clade:162root_clade=new_clade()163current_clade.parent=root_clade164# start a new child clade at the same level as the current clade165parent=self.process_clade(current_clade)166current_clade=new_clade(parent)167entering_branch_length=False168169eliftoken==')':170# done adding children for this parent clade171parent=self.process_clade(current_clade)172ifnotparent:173raiseNewickError('Parenthesis mismatch.')174current_clade=parent175entering_branch_length=False176rp_count+=1177178eliftoken==';':179break180181eliftoken.startswith(':'):182# branch length or confidence183value=float(token[1:])184ifself.values_are_confidence:185current_clade.confidence=value186else:187current_clade.branch_length=value188189eliftoken=='\n':190pass191192else:193# unquoted node label194current_clade.name=token195196ifnotlp_count==rp_count:197raiseNewickError('Number of open/close parentheses do not match.')198199# if ; token broke out of for loop, there should be no remaining tokens200try:201next_token=next(tokens)202raiseNewickError('Text after semicolon in Newick tree: %s'203%next_token.group())204exceptStopIteration:205pass206207self.process_clade(current_clade)208self.process_clade(root_clade)209returnNewick.Tree(root=root_clade,rooted=self.rooted)

258"""Return an iterable of PAUP-compatible tree lines."""259# If there's a conflict in the arguments, we override plain=True260ifconfidence_as_branch_lengthorbranch_length_only:261plain=False262make_info_string=self._info_factory(plain,263confidence_as_branch_length,branch_length_only,max_confidence,264format_confidence,format_branch_length)265266defnewickize(clade):267"""Convert a node tree to a Newick tree string, recursively."""268label=clade.nameor''269iflabel:270unquoted_label=re.match(token_dict['unquoted node label'],label)271if(notunquoted_label)or(unquoted_label.end()<len(label)):272label="'%s'"%label.replace(273'\\','\\\\').replace("'","\\'")274275ifclade.is_terminal():# terminal276return(label277+make_info_string(clade,terminal=True))278else:279subtrees=(newickize(sub)forsubinclade)280return'(%s)%s'%(','.join(subtrees),281label+make_info_string(clade))