| Trees | Indices | Help |
|---|
|
|
1 #!/usr/bin/env python
2 #
3 # Restriction Analysis Libraries.
4 # Copyright (C) 2004. Frederic Sohm.
5 #
6 # This code is part of the Biopython distribution and governed by its
7 # license. Please see the LICENSE file that should have been included
8 # as part of this package.
9 #
10
11 import re
12 import itertools
13 from Bio.Restriction import RanaConfig as RanaConf
14
15 """
16 Usage:
17
18 PrintFormat allow to print the results from restriction analysis in 3
19 different format.
20 List, column or map.
21
22 the easiest way to use it is:
23
24 >>> from Bio.Restriction.PrintFormat import PrintFormat
25 >>> from Bio.Restriction.Restriction import AllEnzymes
26 >>> from Bio import Entrez
27 >>> from Bio import SeqIO
28 >>> handle = Entrez.efetch(db="nucleotide", rettype="fasta", id="SYNPBR322")
29 >>> pBR322 = SeqIO.read(handle, "fasta")
30 >>> handle.close()
31 >>> dct = AllEnzymes.search(pBR322.seq)
32 >>> new = PrintFormat()
33 >>> new.print_that(dct, '\n my pBR322 analysis\n\n','\n no site :\n\n')
34
35 my pBR322 analysis
36
37 AasI : 2169, 2582.
38 AatII : 4289.
39 ...
40 More enzymes.
41 ...
42 ZraI : 4287.
43 ZrmI : 3847.
44
45 no site:
46
47 AarI AatI Acc65I AcsI AcvI AdeI AflII AgeI
48 ...
49 More enzymes.
50 ...
51 Vha464I XapI XbaI XcmI XhoI XmaCI XmaI XmaJI
52 Zsp2I
53
54 >>> new.sequence = pBR322.seq
55 >>> new.print_as("map")
56 >>> new.print_that(dct)
57 ...
58
59 Some of the methods of PrintFormat are meant to be overriden by derived
60 class.
61 """
62
64 """PrintFormat allow the printing of results of restriction analysis."""
65
66 ConsoleWidth = RanaConf.ConsoleWidth
67 NameWidth = RanaConf.NameWidth
68 MaxSize = RanaConf.MaxSize
69 Cmodulo = ConsoleWidth%NameWidth
70 PrefWidth = ConsoleWidth - Cmodulo
71 Indent = RanaConf.Indent
72 linesize = PrefWidth - NameWidth
73
77
79 """PF.print_as([what='list']) -> print the results as specified.
80
81 Valid format are:
82 'list' -> alphabetical order
83 'number' -> number of sites in the sequence
84 'map' -> a map representation of the sequence with the sites.
85
86 If you want more flexibility over-ride the virtual method make_format.
87 """
88 if what == 'map':
89 self.make_format = self._make_map
90 elif what == 'number':
91 self.make_format = self._make_number
92 else:
93 self.make_format = self._make_list
94
95 return
96
97
99 """PF.print_that(dct, [title[, s1]]) -> Print dct nicely formatted.
100
101 dct is a dictionary as returned by a RestrictionBatch.search()
102
103 title is the title of the map.
104 It must be a formated string, i.e. you must include the line break.
105
106 s1 is the title separating the list of enzymes that have sites from
107 those without sites.
108 s1 must be a formatted string as well.
109
110 The format of print_that is a list."""
111 if not dct:
112 dct = self.results
113 ls, nc = [], []
114 for k, v in dct.iteritems():
115 if v:
116 ls.append((k,v))
117 else:
118 nc.append(k)
119 print self.make_format(ls, title, nc, s1)
120 return
121
123 """PF.make_format(cut, nc, title, s) -> string
124
125 Virtual method.
126 Here to be pointed to one of the _make_* methods.
127 You can as well create a new method and point make_format to it."""
128 return self._make_list(cut,title, nc,s1)
129
130 ###### _make_* methods to be used with the virtual method make_format
131
133 """PF._make_number(ls,title, nc,s1) -> string.
134
135 return a string of form:
136
137 title.
138
139 enzyme1 : position1, position2.
140 enzyme2 : position1, position2, position3.
141
142 ls is a list of cutting enzymes.
143 title is the title.
144 nc is a list of non cutting enzymes.
145 s1 is the sentence before the non cutting enzymes."""
146 return self._make_list_only(ls, title) + self._make_nocut_only(nc, s1)
147
149 """PF._make_number(ls,title, nc,s1) -> string.
150
151 return a string of form:
152
153 title.
154
155 enzyme1, position
156 |
157 AAAAAAAAAAAAAAAAAAAAA...
158 |||||||||||||||||||||
159 TTTTTTTTTTTTTTTTTTTTT...
160
161 ls is a list of cutting enzymes.
162 title is the title.
163 nc is a list of non cutting enzymes.
164 s1 is the sentence before the non cutting enzymes."""
165 return self._make_map_only(ls, title) + self._make_nocut_only(nc, s1)
166
168 """PF._make_number(ls,title, nc,s1) -> string.
169
170 title.
171
172 enzyme which cut 1 time:
173
174 enzyme1 : position1.
175
176 enzyme which cut 2 times:
177
178 enzyme2 : position1, position2.
179 ...
180
181 ls is a list of cutting enzymes.
182 title is the title.
183 nc is a list of non cutting enzymes.
184 s1 is the sentence before the non cutting enzymes."""
185 return self._make_number_only(ls, title)+self._make_nocut_only(nc,s1)
186
188 """PF._make_nocut(ls,title, nc,s1) -> string.
189
190 return a formatted string of the non cutting enzymes.
191
192 ls is a list of cutting enzymes -> will not be used.
193 Here for compatibility with make_format.
194
195 title is the title.
196 nc is a list of non cutting enzymes.
197 s1 is the sentence before the non cutting enzymes."""
198 return title + self._make_nocut_only(nc, s1)
199
201 """PF._make_nocut_only(nc, s1) -> string.
202
203 return a formatted string of the non cutting enzymes.
204
205 nc is a list of non cutting enzymes.
206 s1 is the sentence before the non cutting enzymes.
207 """
208 if not nc:
209 return s1
210 nc.sort()
211 st = ''
212 stringsite = s1 or '\n Enzymes which do not cut the sequence.\n\n'
213 Join = ''.join
214 for key in nc:
215 st = Join((st, str.ljust(str(key), self.NameWidth)))
216 if len(st) > self.linesize:
217 stringsite = Join((stringsite, st, '\n'))
218 st = ''
219 stringsite = Join((stringsite, st, '\n'))
220 return stringsite
221
223 """PF._make_list_only(ls, title) -> string.
224
225 return a string of form:
226
227 title.
228
229 enzyme1 : position1, position2.
230 enzyme2 : position1, position2, position3.
231 ...
232
233 ls is a list of results.
234 title is a string.
235 Non cutting enzymes are not included."""
236 if not ls:
237 return title
238 return self.__next_section(ls, title)
239
241 """PF._make_number_only(ls, title) -> string.
242
243 return a string of form:
244
245 title.
246
247 enzyme which cut 1 time:
248
249 enzyme1 : position1.
250
251 enzyme which cut 2 times:
252
253 enzyme2 : position1, position2.
254 ...
255
256
257 ls is a list of results.
258 title is a string.
259 Non cutting enzymes are not included."""
260 if not ls:
261 return title
262 ls.sort(lambda x,y : cmp(len(x[1]), len(y[1])))
263 iterator = iter(ls)
264 cur_len = 1
265 new_sect = []
266 for name, sites in iterator:
267 l = len(sites)
268 if l > cur_len:
269 title += "\n\nenzymes which cut %i times :\n\n"%cur_len
270 title = self.__next_section(new_sect, title)
271 new_sect, cur_len = [(name, sites)], l
272 continue
273 new_sect.append((name,sites))
274 title += "\n\nenzymes which cut %i times :\n\n"%cur_len
275 return self.__next_section(new_sect, title)
276
278 """PF._make_map_only(ls, title) -> string.
279
280 return a string of form:
281
282 title.
283
284 enzyme1, position
285 |
286 AAAAAAAAAAAAAAAAAAAAA...
287 |||||||||||||||||||||
288 TTTTTTTTTTTTTTTTTTTTT...
289
290
291 ls is a list of results.
292 title is a string.
293 Non cutting enzymes are not included.
294 """
295 if not ls:
296 return title
297 resultKeys = [str(x) for x,y in ls]
298 resultKeys.sort()
299 map = title or ''
300 enzymemap = {}
301 for (enzyme, cut) in ls:
302 for c in cut:
303 if c in enzymemap:
304 enzymemap[c].append(str(enzyme))
305 else:
306 enzymemap[c] = [str(enzyme)]
307 mapping = enzymemap.keys()
308 mapping.sort()
309 cutloc = {}
310 x, counter, length = 0, 0, len(self.sequence)
311 for x in xrange(60, length, 60):
312 counter = x - 60
313 l=[]
314 for key in mapping:
315 if key <= x:
316 l.append(key)
317 else:
318 cutloc[counter] = l
319 mapping = mapping[mapping.index(key):]
320 break
321 cutloc[x] = l
322 cutloc[x] = mapping
323 sequence = self.sequence.tostring()
324 revsequence = self.sequence.complement().tostring()
325 a = '|'
326 base, counter = 0, 0
327 emptyline = ' ' * 60
328 Join = ''.join
329 for base in xrange(60, length, 60):
330 counter = base - 60
331 line = emptyline
332 for key in cutloc[counter]:
333 s = ''
334 if key == base:
335 for n in enzymemap[key] : s = ' '.join((s,n))
336 l = line[0:59]
337 lineo = Join((l, str(key), s, '\n'))
338 line2 = Join((l, a, '\n'))
339 linetot = Join((lineo, line2))
340 map = Join((map, linetot))
341 break
342 for n in enzymemap[key] : s = ' '.join((s,n))
343 k = key%60
344 lineo = Join((line[0:(k-1)], str(key), s, '\n'))
345 line = Join((line[0:(k-1)], a, line[k:]))
346 line2 = Join((line[0:(k-1)], a, line[k:], '\n'))
347 linetot = Join((lineo,line2))
348 map = Join((map,linetot))
349 mapunit = '\n'.join((sequence[counter : base],a * 60,
350 revsequence[counter : base],
351 Join((str.ljust(str(counter+1), 15), ' '* 30,
352 str.rjust(str(base), 15),'\n\n'))
353 ))
354 map = Join((map, mapunit))
355 line = ' '* 60
356 for key in cutloc[base]:
357 s = ''
358 if key == length:
359 for n in enzymemap[key]:
360 s = Join((s,' ',n))
361 l = line[0:(length-1)]
362 lineo = Join((l,str(key),s,'\n'))
363 line2 = Join((l,a,'\n'))
364 linetot = Join((lineo, line2))
365 map = Join((map, linetot))
366 break
367 for n in enzymemap[key] : s = Join((s,' ',n))
368 k = key%60
369 lineo = Join((line[0:(k-1)],str(key),s,'\n'))
370 line = Join((line[0:(k-1)],a,line[k:]))
371 line2 = Join((line[0:(k-1)],a,line[k:],'\n'))
372 linetot = Join((lineo,line2))
373 map = Join((map,linetot))
374 mapunit = ''
375 mapunit = Join((sequence[base : length], '\n'))
376 mapunit = Join((mapunit, a * (length-base), '\n'))
377 mapunit = Join((mapunit,revsequence[base:length], '\n'))
378 mapunit = Join((mapunit, Join((str.ljust(str(base+1), 15), ' '*(
379 length-base-30),str.rjust(str(length), 15),
380 '\n\n'))))
381 map = Join((map,mapunit))
382 return map
383
384 ###### private method to do lists:
385
387 """FP.__next_section(ls, into) -> string.
388
389 ls is a list of tuple (string, [int, int]).
390 into is a string to which the formatted ls will be added.
391
392 Format ls as a string of lines:
393 The form is:
394
395 enzyme1 : position1.
396 enzyme2 : position2, position3.
397
398 then add the formatted ls to tot
399 return tot."""
400 ls.sort()
401 indentation = '\n' + (self.NameWidth + self.Indent) * ' '
402 linesize = self.linesize - self.MaxSize
403 pat = re.compile("([\w,\s()]){1,%i}[,\.]"%linesize)
404 several, Join = '', ''.join
405 for name, sites in ls:
406 stringsite = ''
407 l = Join((', '.join([str(site) for site in sites]), '.'))
408 if len(l) > linesize:
409 #
410 # cut where appropriate and add the indentation
411 #
412 l = [x.group() for x in re.finditer(pat, l)]
413 stringsite = indentation.join(l)
414 else:
415 stringsite = l
416 into = Join((into,
417 str(name).ljust(self.NameWidth),' : ',stringsite,'\n'))
418 return into
419
| Trees | Indices | Help |
|---|
| Generated by Epydoc 3.0.1 on Wed Dec 16 11:27:53 2009 | http://epydoc.sourceforge.net |