| Trees | Indices | Help |
|---|
|
|
1 # Copyright (C) 2011 by Brandon Invergo (b.invergo@gmail.com)
2 # This code is part of the Biopython distribution and governed by its
3 # license. Please see the LICENSE file that should have been included
4 # as part of this package.
5
6 import re
7
8 line_floats_re = re.compile("-*\d+\.\d+")
9
11 """Parse the basics that should be present in most baseml results files.
12 """
13 version_re = re.compile("BASEML \(in paml version (\d+\.\d+[a-z]*).*")
14 np_re = re.compile("lnL\(ntime:\s+\d+\s+np:\s+(\d+)\)")
15 num_params = -1
16 for line in lines:
17 # Find all floating point numbers in this line
18 line_floats_res = line_floats_re.findall(line)
19 line_floats = [float(val) for val in line_floats_res]
20 # Find the version number
21 # Example match:
22 # "BASEML (in paml version 4.3, August 2009) alignment.phylip"
23 version_res = version_re.match(line)
24 if version_res is not None:
25 results["version"] = version_res.group(1)
26 # Find max lnL
27 # Example match:
28 # ln Lmax (unconstrained) = -316.049385
29 if "ln Lmax" in line and len(line_floats) == 1:
30 results["lnL max"] = line_floats[0]
31 # Find lnL values.
32 # Example match (lnL = -2021.348300):
33 # "lnL(ntime: 19 np: 22): -2021.348300 +0.000000"
34 elif "lnL(ntime:" in line and len(line_floats) > 0:
35 results["lnL"] = line_floats[0]
36 np_res = np_re.match(line)
37 if np_res is not None:
38 num_params = int(np_res.group(1))
39 # Find tree lengths.
40 # Example match: "tree length = 1.71931"
41 elif "tree length" in line and len(line_floats) == 1:
42 results["tree length"] = line_floats[0]
43 # Find the estimated tree, only taking the tree if it has
44 # branch lengths
45 elif re.match("\(+", line) is not None:
46 if ":" in line:
47 results["tree"] = line.strip()
48 return (results, num_params)
49
51 """Parse the various parameters from the file.
52 """
53 parameters = {}
54 parameters = parse_parameter_list(lines, parameters, num_params)
55 parameters = parse_kappas(lines, parameters)
56 parameters = parse_rates(lines, parameters)
57 parameters = parse_freqs(lines, parameters)
58 results["parameters"] = parameters
59 return results
60
62 """ Parse the parameters list, which is just an unlabeled list of numeric values.
63 """
64 for line_num in range(len(lines)):
65 line = lines[line_num]
66 # Find all floating point numbers in this line
67 line_floats_res = line_floats_re.findall(line)
68 line_floats = [float(val) for val in line_floats_res]
69 # Get parameter list. This can be useful for specifying starting
70 # parameters in another run by copying the list of parameters
71 # to a file called in.baseml. Since the parameters must be in
72 # a fixed order and format, copying and pasting to the file is
73 # best. For this reason, they are grabbed here just as a long
74 # string and not as individual numbers.
75 if len(line_floats) == num_params:
76 parameters["parameter list"] = line.strip()
77 # Find SEs. The same format as parameters above is maintained
78 # since there is a correspondance between the SE format and
79 # the parameter format.
80 # Example match:
81 # "SEs for parameters:
82 # -1.00000 -1.00000 -1.00000 801727.63247 730462.67590 -1.00000
83 if "SEs for parameters:" in lines[line_num + 1]:
84 SEs_line = lines[line_num + 2]
85 parameters["SEs"] = SEs_line.strip()
86 break
87 return parameters
88
90 """Parse out the kappa parameters.
91 """
92 kappa_found = False
93 for line in lines:
94 # Find all floating point numbers in this line
95 line_floats_res = line_floats_re.findall(line)
96 line_floats = [float(val) for val in line_floats_res]
97 # Find kappa parameter (F84, HKY85, T92 model)
98 # Example match:
99 # "Parameters (kappa) in the rate matrix (F84) (Yang 1994 J Mol Evol 39:105-111):
100 # 3.00749"
101 if "Parameters (kappa)" in line:
102 kappa_found = True
103 elif kappa_found and len(line_floats) > 0:
104 branch_res = re.match("\s(\d+\.\.\d+)", line)
105 if branch_res is None:
106 if len(line_floats) == 1:
107 parameters["kappa"] = line_floats[0]
108 else:
109 parameters["kappa"] = line_floats
110 kappa_found = False
111 else:
112 if parameters.get("branches") is None:
113 parameters["branches"] = {}
114 branch = branch_res.group(1)
115 if len(line_floats) > 0:
116 parameters["branches"][branch] = \
117 {"t":line_floats[0], "kappa":line_floats[1],
118 "TS":line_floats[2], "TV":line_floats[3]}
119 # Find kappa under REV
120 # Example match:
121 # kappa under REV: 999.00000 145.76453 0.00001 0.00001 0.00001
122 elif "kappa under" in line and len(line_floats) > 0:
123 if len(line_floats) == 1:
124 parameters["kappa"] = line_floats[0]
125 else:
126 parameters["kappa"] = line_floats
127 return parameters
128
130 """Parse the rate parameters.
131 """
132 Q_mat_found = False
133 for line in lines:
134 # Find all floating point numbers in this line
135 line_floats_res = line_floats_re.findall(line)
136 line_floats = [float(val) for val in line_floats_res]
137 # Find rate parameters
138 # Example match:
139 # "Rate parameters: 999.00000 145.59775 0.00001 0.00001 0.00001"
140 if "Rate parameters:" in line and len(line_floats) > 0:
141 parameters["rate parameters"] = line_floats
142 # Find rates
143 # Example match:
144 # "rate: 0.90121 0.96051 0.99831 1.03711 1.10287"
145 elif "rate: " in line and len(line_floats) > 0:
146 parameters["rates"] = line_floats
147 # Find Rate matrix Q & average kappa (REV model)
148 # Example match:
149 # Rate matrix Q, Average Ts/Tv = 3.0308
150 # -2.483179 1.865730 0.617449 0.000000
151 # 2.298662 -2.298662 0.000000 0.000000
152 # 0.335015 0.000000 -0.338059 0.003044
153 # 0.000000 0.000000 0.004241 -0.004241
154 elif "matrix Q" in line:
155 parameters["Q matrix"] = {"matrix":[]}
156 if len(line_floats) > 0:
157 parameters["Q matrix"]["average Ts/Tv"] = \
158 line_floats[0]
159 Q_mat_found = True
160 elif Q_mat_found and len(line_floats) > 0:
161 parameters["Q matrix"]["matrix"].append(line_floats)
162 if len(parameters["Q matrix"]["matrix"]) == 4:
163 Q_mat_found = False
164 # Find alpha (gamma shape parameter for variable rates)
165 # Example match: "alpha (gamma, K=5) = 192.47918"
166 elif "alpha" in line and len(line_floats) > 0:
167 parameters["alpha"] = line_floats[0]
168 return parameters
169
171 """Parse the basepair frequencies.
172 """
173 root_re = re.compile("Note: node (\d+) is root.")
174 branch_freqs_found = False
175 for line in lines:
176 # Find all floating point numbers in this line
177 line_floats_res = line_floats_re.findall(line)
178 line_floats = [float(val) for val in line_floats_res]
179 # Find base frequencies
180 # Example match:
181 # "Base frequencies: 0.20090 0.16306 0.37027 0.26577"
182 if "Base frequencies" in line and len(line_floats) > 0:
183 base_frequencies = {}
184 base_frequencies["T"] = line_floats[0]
185 base_frequencies["C"] = line_floats[1]
186 base_frequencies["A"] = line_floats[2]
187 base_frequencies["G"] = line_floats[3]
188 parameters["base frequencies"] = base_frequencies
189 # Find frequencies
190 # Example match:
191 # "freq: 0.90121 0.96051 0.99831 1.03711 1.10287"
192 elif "freq: " in line and len(line_floats) > 0:
193 parameters["rate frequencies"] = line_floats
194 # Find branch-specific frequency parameters
195 # Example match (note: I think it's possible to have 4 more
196 # values per line, enclosed in brackets, so I'll account for
197 # this):
198 # (frequency parameters for branches) [frequencies at nodes] (see Yang & Roberts 1995 fig 1)
199 #
200 # Node #1 ( 0.25824 0.24176 0.25824 0.24176 )
201 # Node #2 ( 0.00000 0.50000 0.00000 0.50000 )
202 elif "(frequency parameters for branches)" in line:
203 parameters["nodes"] = {}
204 branch_freqs_found = True
205 elif branch_freqs_found is True:
206 if len(line_floats) > 0:
207 node_res = re.match("Node \#(\d+)", line)
208 node_num = int(node_res.group(1))
209 node = {"root":False}
210 node["frequency parameters"] = line_floats[:4]
211 if len(line_floats) > 4:
212 node["base frequencies"] = {"T":line_floats[4],
213 "C":line_floats[5],
214 "A":line_floats[6],
215 "G":line_floats[7]}
216 parameters["nodes"][node_num] = node
217 else:
218 root_res = root_re.match(line)
219 if root_res is not None:
220 root_node = int(root_res.group(1))
221 parameters["nodes"][root_node]["root"] =\
222 True
223 branch_freqs_found = False
224 return parameters
225
| Trees | Indices | Help |
|---|
| Generated by Epydoc 3.0.1 on Thu Aug 18 18:22:41 2011 | http://epydoc.sourceforge.net |