| Trees | Indices | Help |
|---|
|
|
1 # Copyright (C) 2009 by Eric Talevich (eric.talevich@gmail.com)
2 # Based on Bio.Nexus, copyright 2005-2008 by Frank Kauff & Cymon J. Cox.
3 # All rights reserved.
4 # This code is part of the Biopython distribution and governed by its
5 # license. Please see the LICENSE file that should have been included
6 # as part of this package.
7
8 """I/O function wrappers for the Newick file format.
9
10 See: U{ http://evolution.genetics.washington.edu/phylip/newick_doc.html }
11 """
12 __docformat__ = "epytext en"
13
14 from cStringIO import StringIO
15
16 from Bio.Phylo import Newick
17
18 # Definitions retrieved from Bio.Nexus.Trees
19 NODECOMMENT_START = '[&'
20 NODECOMMENT_END = ']'
26
27
28 # ---------------------------------------------------------
29 # Public API
30
31 -def parse(handle):
32 """Iterate over the trees in a Newick file handle.
33
34 @return: a generator of Bio.Phylo.Newick.Tree objects.
35 """
36 return Parser(handle).parse()
37
39 """Write a trees in Newick format to the given file handle.
40
41 @return: number of trees written.
42 """
43 return Writer(trees).write(handle, plain=plain, **kwargs)
44
45
46 # ---------------------------------------------------------
47 # Input
48
49 -class Parser(object):
50 """Parse a Newick tree given a file handle.
51
52 Based on the parser in Bio.Nexus.Trees.
53 """
54
57
58 @classmethod
62
64 """Parse the text stream this object was initialized with."""
65 self.values_are_support = values_are_support
66 self.rooted = rooted
67 buf = ''
68 for line in self.handle:
69 buf += line.rstrip()
70 if buf.endswith(';'):
71 yield self._parse_tree(buf)
72 buf = ''
73 if buf:
74 # Last tree is missing a terminal ';' character -- that's OK
75 yield self._parse_tree(buf)
76
78 """Parses the text representation into an Tree object."""
79 # XXX what global info do we have here? Any? Use **kwargs?
80 return Newick.Tree(root=self._parse_subtree(text))
81
83 """Parse (a,b,c...)[[[xx]:]yy] into subcomponents, recursively."""
84 text = text.strip().rstrip(';')
85 if text.count('(')!=text.count(')'):
86 raise NewickError("Parentheses do not match in (sub)tree: " + text)
87 # Text is now "(...)..." (balanced parens) or "..." (leaf node)
88 if text.count('(') == 0:
89 # Leaf/terminal node -- recursion stops here
90 return self._parse_tag(text)
91 # Handle one layer of the nested subtree
92 # XXX what if there's a paren in a comment or other string?
93 close_posn = text.rfind(')')
94 subtrees = []
95 # Locate subtrees by counting nesting levels of parens
96 plevel = 0
97 prev = 1
98 for posn in range(1, close_posn):
99 if text[posn] == '(':
100 plevel += 1
101 elif text[posn] == ')':
102 plevel -= 1
103 elif text[posn] == ',' and plevel == 0:
104 subtrees.append(text[prev:posn])
105 prev = posn + 1
106 subtrees.append(text[prev:close_posn])
107 # Construct a new clade from trailing text, then attach subclades
108 clade = self._parse_tag(text[close_posn+1:])
109 clade.clades = [self._parse_subtree(st) for st in subtrees]
110 return clade
111
113 """Extract the data for a node from text.
114
115 @return: Clade instance containing any available data
116 """
117 # Extract the comment
118 comment_start = text.find(NODECOMMENT_START)
119 if comment_start != -1:
120 comment_end = text.find(NODECOMMENT_END)
121 if comment_end == -1:
122 raise NewickError('Error in tree description: '
123 'Found %s without matching %s'
124 % (NODECOMMENT_START, NODECOMMENT_END))
125 comment = text[comment_start+len(NODECOMMENT_START):comment_end]
126 text = text[:comment_start] + text[comment_end+len(NODECOMMENT_END):]
127 else:
128 comment = None
129 clade = Newick.Clade(comment=comment)
130 # Extract name (taxon), and optionally support, branch length
131 # Float values are support and branch length, the string is name/taxon
132 values = []
133 for part in (t.strip() for t in text.split(':')):
134 if part:
135 try:
136 values.append(float(part))
137 except ValueError:
138 assert clade.name is None, "Two string taxonomies?"
139 clade.name = part
140 if len(values) == 1:
141 # Real branch length, or support as branch length
142 if self.values_are_support:
143 clade.confidence = values[0]
144 else:
145 clade.branch_length = values[0]
146 elif len(values) == 2:
147 # Two non-taxon values: support comes first. (Is that always so?)
148 clade.confidence, clade.branch_length = values
149 elif len(values) > 2:
150 raise NewickError("Too many colons in tag: " + text)
151 return clade
152
153
154 # ---------------------------------------------------------
155 # Output
156
157 -class Writer(object):
158 """Based on the writer in Bio.Nexus.Trees (str, to_string)."""
159
162
164 """Write this instance's trees to a file handle."""
165 count = 0
166 for treestr in self.to_strings(**kwargs):
167 handle.write(treestr + '\n')
168 count += 1
169 return count
170
171 - def to_strings(self, support_as_branchlengths=False,
172 branchlengths_only=False, plain=False,
173 plain_newick=True, ladderize=None,
174 max_support=1.0):
175 """Return an iterable of PAUP-compatible tree lines."""
176 # If there's a conflict in the arguments, we override plain=True
177 if support_as_branchlengths or branchlengths_only:
178 plain = False
179 make_info_string = self._info_factory(plain, support_as_branchlengths,
180 branchlengths_only, max_support)
181 def newickize(clade):
182 """Convert a node tree to a Newick tree string, recursively."""
183 if clade.is_terminal(): #terminal
184 return ((clade.name or '')
185 + make_info_string(clade, terminal=True))
186 else:
187 subtrees = (newickize(sub) for sub in clade)
188 return '(%s)%s' % (','.join(subtrees),
189 make_info_string(clade))
190
191 # Convert each tree to a string
192 for tree in self.trees:
193 if ladderize in ('left', 'LEFT', 'right', 'RIGHT'):
194 # Nexus compatibility shim, kind of
195 tree.ladderize(reverse=(ladderize in ('right', 'RIGHT')))
196 rawtree = newickize(tree.root) + ';'
197 if plain_newick:
198 yield rawtree
199 continue
200 # Nexus-style (?) notation before the raw Newick tree
201 treeline = ['tree', (tree.name or 'a_tree'), '=']
202 if tree.weight != 1:
203 treeline.append('[&W%s]' % round(float(tree.weight), 3))
204 if tree.rooted:
205 treeline.append('[&R]')
206 treeline.append(rawtree)
207 yield ' '.join(treeline)
208
209 - def _info_factory(self, plain, support_as_branchlengths,
210 branchlengths_only, max_support):
216
217 elif support_as_branchlengths:
218 # Support as branchlengths (eg. PAUP), ignore actual branchlengths
219 def make_info_string(clade, terminal=False):
220 if terminal:
221 # terminal branches have 100% support
222 return ':%1.2f' % max_support
223 else:
224 return ':%1.2f' % (clade.confidence)
225
226 elif branchlengths_only:
227 # write only branchlengths, ignore support
228 def make_info_string(clade, terminal=False):
229 return ':%1.5f' % (clade.branch_length)
230
231 else:
232 # write support and branchlengths (e.g. .con tree of mrbayes)
233 def make_info_string(clade, terminal=False):
234 if terminal:
235 return ':%1.5f' % (clade.branch_length or 1.0)
236 else:
237 if (clade.branch_length is not None and
238 hasattr(clade, 'confidence') and
239 clade.confidence is not None):
240 # we have blen and suppport
241 return '%1.2f:%1.5f' % (clade.confidence,
242 clade.branch_length)
243 elif clade.branch_length is not None:
244 # we have only blen
245 return '0.00000:%1.5f' % clade.branch_length
246 elif (hasattr(clade, 'confidence') and
247 clade.confidence is not None):
248 # we have only support
249 return '%1.2f:0.00000' % clade.confidence
250 else:
251 return '0.00:0.00000'
252
253 return make_info_string
254
| Trees | Indices | Help |
|---|
| Generated by Epydoc 3.0.1 on Fri Nov 26 16:20:10 2010 | http://epydoc.sourceforge.net |