| Class | Ronn::RoffFilter |
| In: |
lib/ronn/roff.rb
|
| Parent: | Object |
| HTML_ROFF_ENTITIES | = | { '•' => '\(bu', '<' => '<', '>' => '>', ' ' => '\~', '©' => '\(co', '”' => '\(rs', '—' => '\(em', '®' => '\(rg', '&sec;' => '\(sc', '≥' => '\(>=', '≤' => '\(<=', '≠' => '\(!=', '≡' => '\(==' |
Convert Ronn HTML to roff.
# File lib/ronn/roff.rb, line 9
9: def initialize(html, name, section, tagline, manual=nil, version=nil, date=nil)
10: @buf = []
11: title_heading name, section, tagline, manual, version, date
12: doc = Hpricot(html)
13: remove_extraneous_elements! doc
14: normalize_whitespace! doc
15: block_filter doc
16: write "\n"
17: end
# File lib/ronn/roff.rb, line 80
80: def block_filter(node)
81: if node.kind_of?(Array) || node.kind_of?(Hpricot::Elements)
82: node.each { |ch| block_filter(ch) }
83:
84: elsif node.doc?
85: block_filter(node.children)
86:
87: elsif node.text?
88: warn "unexpected text: %p", node
89:
90: elsif node.elem?
91: case node.name
92: when 'div'
93: block_filter(node.children)
94: when 'h1'
95: # discard
96: when 'h2'
97: macro "SH", quote(escape(node.html))
98: when 'h3'
99: macro "SS", quote(escape(node.html))
100:
101: when 'p'
102: prev = previous(node)
103: if prev && %w[dd li].include?(node.parent.name)
104: macro "IP"
105: elsif prev && !%w[h1 h2 h3].include?(prev.name)
106: macro "P"
107: end
108: inline_filter(node.children)
109:
110: when 'pre'
111: prev = previous(node)
112: indent = prev.nil? || !%w[h1 h2 h3].include?(prev.name)
113: macro "IP", %w["" 4] if indent
114: macro "nf"
115: write "\n"
116: inline_filter(node.children)
117: macro "fi"
118: macro "IP", %w["" 0] if indent
119:
120: when 'dl'
121: macro "TP"
122: block_filter(node.children)
123: when 'dt'
124: prev = previous(node)
125: macro "TP" unless prev.nil?
126: inline_filter(node.children)
127: write "\n"
128: when 'dd'
129: if node.at('p')
130: block_filter(node.children)
131: else
132: inline_filter(node.children)
133: end
134: write "\n"
135:
136: when 'ol', 'ul'
137: block_filter(node.children)
138: macro "IP", %w["" 0]
139: when 'li'
140: case node.parent.name
141: when 'ol'
142: macro "IP", ??["#{node.position + 1}." 4]
143: when 'ul'
144: macro "IP", %w["\(bu" 4]
145: end
146: if node.at('p|ol|ul|dl|div')
147: block_filter(node.children)
148: else
149: inline_filter(node.children)
150: end
151: write "\n"
152:
153: else
154: warn "unrecognized block tag: %p", node.name
155: end
156:
157: else
158: fail "unexpected node: #{node.inspect}"
159: end
160: end
# File lib/ronn/roff.rb, line 241
241: def escape(text)
242: return text.to_s if text.nil? || text.empty?
243: ent = HTML_ROFF_ENTITIES
244: text = text.dup
245: text.gsub!(/&#x([0-9A-Fa-f]+);/) { $1.to_i(16).chr } # hex entities
246: text.gsub!(/&#(\d+);/) { $1.to_i.chr } # dec entities
247: text.gsub!('\\', '\e') # backslash
248: text.gsub!(/['.-]/) { |m| "\\#{m}" } # control chars
249: text.gsub!(/(&[A-Za-z]+;)/) { ent[$1] || $1 } # named entities
250: text.gsub!('&', '&') # amps
251: text
252: end
# File lib/ronn/roff.rb, line 162
162: def inline_filter(node)
163: return unless node # is an empty node
164:
165: if node.kind_of?(Array) || node.kind_of?(Hpricot::Elements)
166: node.each { |ch| inline_filter(ch) }
167:
168: elsif node.text?
169: text = node.to_html.dup
170: write escape(text)
171:
172: elsif node.elem?
173: case node.name
174: when 'span'
175: inline_filter(node.children)
176: when 'code'
177: if child_of?(node, 'pre')
178: inline_filter(node.children)
179: else
180: write '\fB'
181: inline_filter(node.children)
182: write '\fR'
183: end
184:
185: when 'b', 'strong', 'kbd', 'samp'
186: write '\fB'
187: inline_filter(node.children)
188: write '\fR'
189:
190: when 'var', 'em', 'i', 'u'
191: write '\fI'
192: inline_filter(node.children)
193: write '\fR'
194:
195: when 'br'
196: macro 'br'
197:
198: when 'a'
199: if node.classes.include?('man-ref')
200: inline_filter(node.children)
201: elsif node.has_attribute?('data-bare-link')
202: write '\fI'
203: inline_filter(node.children)
204: write '\fR'
205: else
206: inline_filter(node.children)
207: write ' '
208: write '\fI'
209: write escape(node.attributes['href'])
210: write '\fR'
211: end
212: else
213: warn "unrecognized inline tag: %p", node.name
214: end
215:
216: else
217: fail "unexpected node: #{node.inspect}"
218: end
219: end
# File lib/ronn/roff.rb, line 221
221: def macro(name, value=nil)
222: writeln ".\n.#{[name, value].compact.join(' ')}"
223: end
# File lib/ronn/roff.rb, line 47
47: def normalize_whitespace!(node)
48: case
49: when node.kind_of?(Array) || node.kind_of?(Hpricot::Elements)
50: node.to_a.dup.each { |ch| normalize_whitespace! ch }
51: when node.text?
52: preceding, following = node.previous, node.next
53: content = node.content.gsub(/[\n ]+/m, ' ')
54: if preceding.nil? || block_element?(preceding.name) ||
55: preceding.name == 'br'
56: content.lstrip!
57: end
58: if following.nil? || block_element?(following.name) ||
59: following.name == 'br'
60: content.rstrip!
61: end
62: if content.empty?
63: node.parent.children.delete(node)
64: else
65: node.content = content
66: end
67: when node.elem? && node.name == 'pre'
68: # stop traversing
69: when node.elem? && node.children
70: normalize_whitespace! node.children
71: when node.elem?
72: # element has no children
73: when node.doc?
74: normalize_whitespace! node.children
75: else
76: warn "unexpected node during whitespace normalization: %p", node
77: end
78: end
# File lib/ronn/roff.rb, line 24
24: def previous(node)
25: if node.respond_to?(:previous)
26: prev = node.previous
27: prev = prev.previous until prev.nil? || prev.elem?
28: prev
29: end
30: end
# File lib/ronn/roff.rb, line 39
39: def remove_extraneous_elements!(doc)
40: doc.traverse_all_element do |node|
41: if node.comment? || node.procins? || node.doctype? || node.xmldecl?
42: node.parent.children.delete(node)
43: end
44: end
45: end
# File lib/ronn/roff.rb, line 32
32: def title_heading(name, section, tagline, manual, version, date)
33: comment "generated with Ronn/v#{Ronn.version}"
34: comment "http://github.com/rtomayko/ronn/tree/#{Ronn.revision}"
35: return if name.nil?
36: macro "TH", %["#{escape(name.upcase)}" "#{section}" "#{date.strftime('%B %Y')}" "#{version}" "#{manual}"]
37: end
# File lib/ronn/roff.rb, line 280
280: def warn(text, *args)
281: $stderr.puts "warn: #{text}" % args
282: end
write text to output buffer
# File lib/ronn/roff.rb, line 259
259: def write(text)
260: return if text.nil? || text.empty?
261: # lines cannot start with a '.'. insert zero-width character before.
262: if text[0,2] == '\.' &&
263: (@buf.last && @buf.last[-1] == ?\n)
264: @buf << '\&'
265: end
266: @buf << text
267: end