1 package DevHelp::Formatter;
6 our $VERSION = "1.003";
8 use constant DEBUG => 0;
13 return bless {}, $class;
21 my ($self, $imagename, $align) = @_;
27 my ($self, $url, $type) = @_;
36 my ($width, $height) = @_;
38 $tag .= qq! width="$width"! if length $width;
39 $tag .= qq! size="$height"! if length $height;
44 # produces a table, possibly with options for the <table> and <tr> tags
46 my ($options, $text) = @_;
50 if ($options =~ /=/) {
51 $tag .= " " . unescape_html($options);
53 elsif ($options =~ /\S/) {
55 my ($width, $bg, $pad, $fontsz, $fontface) = split /\|/, $options;
56 for ($width, $bg, $pad, $fontsz, $fontface) {
57 $_ = '' unless defined;
59 $tag .= qq! width="$width"! if length $width;
60 $tag .= qq! bgcolor="$bg"! if length $bg;
61 $tag .= qq! cellpadding="$pad"! if length $pad;
62 if (length $fontsz || length $fontface) {
63 $cellstart = qq!<font!;
64 $cellstart .= qq! size="$fontsz"! if length $fontsz;
65 $cellstart .= qq! face="$fontface"! if length $fontface;
71 my @rows = split '\n', $text;
74 my ($opts, @cols) = split /\|/, $row;
76 if (defined $opts && $opts =~ /=/) {
77 $tag .= " ".unescape_html($opts);
79 $tag .= "><td>$cellstart".join("$cellend</td><td>$cellstart", @cols)
80 ."$cellend</td></tr>";
90 $text =~ s/^\s+|\s+$//g;
91 my @points = split /(?:\r?\n)? *\*\*\s*/, $text;
92 shift @points if @points and $points[0] eq '';
93 return '' unless @points;
94 for my $point (@points) {
96 and $point = "<p>$point</p>";
98 return "<ul><li>".join("</li><li>", @points)."</li></ul>";
103 my ($text, $type, $code) = @_;
105 print STDERR "_format_ol(..., $type, $code)\n" if DEBUG;
106 print STDERR "text: ",unpack("H*", $text),"\n" if DEBUG;
107 $text =~ s/^\s+|\s+$//g;
109 my @points = split /(?:\r?\n)? *$code\s*/, $text;
110 shift @points if @points and $points[0] eq '';
111 return '' unless @points;
112 for my $point (@points) {
114 and $point = "<p>$point</p>";
117 $ol .= qq! type="$type"! if $type;
119 return "$ol<li>".join("</li><li>", @points)."</li></ol>";
127 while (length $text) {
128 if ($text =~ s(^((?: *\#\#[^\n]+(?:\n(?!\*\*|\#\#|\%\%)[^\n]+)*(?:\n *|$)\n?[^\S\n]*)+)\n?)()) {
129 $out .= _format_ol($1);
131 elsif ($text =~ s(^((?: *\*\*[^\n]+(?:\n(?!\*\*|\#\#|\%\%)[^\n]+)*(?:\n *|$)\n?[^\S\n]*)+)\n?)()) {
132 $out .= _format_bullets($1);
134 elsif ($text =~ s(^((?: *%%[^\n]+(?:\n(?!\*\*|\#\#|\%\%)[^\n]+)*(?:\n *|$)\n?[^\S\n]*)+)\n?)()) {
135 $out .= _format_ol($1, 'a', '%%');
146 # raw html - this has some limitations
147 # the input text has already been escaped, so we need to unescape it
148 # too bad if you want [] in your html (but you can use entities)
150 return unescape_html($_[0]);
154 my ($self, $start, $end, $text, $type) = @_;
157 my $class = $self->tag_class($type);
159 $start =~ s/>$/ class="$class">/;
163 $text =~ s!(\n(?:[ \r]*\n)+)!$end$1$start!g;
169 my ($self, $url, $text) = @_;
171 $self->_fix_spanned(qq/<a href="/ . $self->rewrite_url($url, $text, "link") . qq/">/, "</a>", $text, 'link')
175 my ($self, $rpart) = @_;
176 $$rpart =~ s#(acronym|abbr|dfn)\[(?:\r?\n)?([^|\]\[]+)\|([^\]\[]+)\|([^\]\[]+?)(?:\r?\n)?\]#
177 $self->_fix_spanned(qq/<$1 class="$3" title="$2">/, "</$1>", $4)#egi
179 $$rpart =~ s#(acronym|abbr|dfn)\[(?:\r?\n)?([^|\]\[]+)\|([^\]\[]+?)(?:\r?\n)?\]#
180 $self->_fix_spanned(qq/<$1 title="$2">/, "</$1>", $3)#egi
182 $$rpart =~ s#(acronym|abbr|dfn)\[(?:\r?\n)?\|([^\]\[]+?)(?:\r?\n)?\]#
183 $self->_fix_spanned("<$1>", "</$1>", $2)#egi
185 $$rpart =~ s#(acronym|abbr|dfn)\[(?:\r?\n)?([^\]\[]+?)(?:\r?\n)?\]#
186 $self->_fix_spanned("<$1>", "</$1>", $2)#egi
188 $$rpart =~ s#bdo\[(?:\r?\n)?([^|\]\[]+)\|([^\]\[]+?)(?:\r?\n)?\]#
189 $self->_fix_spanned(qq/<bdo dir="$1">/, "</bdo>", $2)#egi
191 $$rpart =~ s#(strong|em|samp|code|var|sub|sup|kbd|q|b|i|tt|span)\[(?:\r?\n)?([^|\]\[]+)\|([^\]\[]+?)(?:\r?\n)?\]#
192 $self->_fix_spanned(qq/<$1 class="$2">/, "</$1>", $3)#egi
194 $$rpart =~ s#(strong|em|samp|code|var|sub|sup|kbd|q|b|i|tt|span)\[(?:\r?\n)?\|([^\]\[]+?)(?:\r?\n)?\]#
195 $self->_fix_spanned("<$1>", "</$1>", $2)#egi
197 $$rpart =~ s#(strong|em|samp|code|var|sub|sup|kbd|q|b|i|tt|span)\[(?:\r?\n)?([^\]\[]+?)(?:\r?\n)?\]#
198 $self->_fix_spanned("<$1>", "</$1>", $2)#egi
200 $$rpart =~ s#poplink\[([^|\]\[]+)\|([^\]\[]+)\]#
201 $self->_fix_spanned(qq/<a href="/ . $self->rewrite_url($1, $2, "poplink") . qq/" target="_blank">/, "</a>", $2, 'poplink')#eig
203 $$rpart =~ s#poplink\[([^|\]\[]+)\]#
204 $self->_fix_spanned(qq/<a href="/ . $self->rewrite_url($1, $1, "poplink") . qq/" target="_blank">/, "</a>", $1, 'poplink')#ieg
206 $$rpart =~ s#link\[([^|\]\[]+)\|([^\]\[]+)\]#
207 $self->link($1, $2)#eig
209 $$rpart =~ s#link\[([^|\]\[]+)\]#
210 $self->link($1, $1)#ieg
212 $$rpart =~ s#font\[([^|\]\[]+)\|([^\]\[]+)\]#
213 $self->_fix_spanned(qq/<font size="$1">/, "</font>", $2)#egi
215 $$rpart =~ s#anchor\[([^|\]\[]*)\]#<a name="$1"></a>#ig
217 $$rpart =~ s#fontcolor\[([^|\]\[]+)\|([^\]\[]+)\|([^\]\[]+)\]#
218 $self->_fix_spanned(qq/<font size="$1" color="$2">/, "</font>", $3)#egi
220 $$rpart =~ s!(?<=\W)\[([^\]\[]+)\]![$1]!g
226 sub _tag_with_attrs {
227 my ($self, $tag, $extra) = @_;
232 if ($extra =~ s/^\#([\w-]+)(?:\s+|$)//) {
233 $out .= qq! id="$1"!;
235 elsif ($extra =~ s/^([a-z][\w-]*)(?:\s+|$)//i) {
238 elsif ($extra =~ s/^((?:[a-z][\w-]*: .*?;\s*)+)//) {
239 $out .= qq! style="$1"!;
242 print STDERR "** don't understand $extra from $tag **\n";
247 $out .= qq! class="@classes"!;
255 my ($self, $tag, $attrs, $text) = @_;
257 return $self->_fix_spanned
258 ("\n\n" . $self->_tag_with_attrs($tag, $attrs), "</$tag>\n\n", $text)
262 my ($self, $body) = @_;
264 print STDERR "format(...)\nbody: ",unpack("H*", $body),"\n" if DEBUG;
270 $body = $self->escape($body);
272 for my $part (split /((?:html\[(?:[^\[\]]*(?:(?:\[[^\[\]]*\])[^\[\]]*)*)\])
273 |embed\[(?:[^,\[\]]*)(?:,(?:[^,\[\]]*)){0,2}\]
274 |pre\[(?:[^\[\]]*(?:(?:\[[^\[\]]*\])[^\[\]]*)*)\])/ix, $body) {
275 #print STDERR "Part is $part\n";
276 if ($part =~ /^html\[([^\[\]]*(?:(?:\[[^\[\]]*\])[^\[\]]*)*)\]$/i) {
277 $out .= _make_html($1);
279 elsif ($part =~ /^embed\[([^,\[\]]*),([^,\[\]]*),([^,\[\]]*)\]$/i) {
280 $out .= $self->embed($1, $2, $3);
282 elsif ($part =~ /^embed\[([^,\[\]]*),([^,\[\]]*)\]$/i) {
283 $out .= $self->embed($1, $2);
285 elsif ($part =~ /^embed\[([^,\[\]]*)\]$/i) {
286 $out .= $self->embed($1)
288 elsif ($part =~ /^pre\[([^\[\]]*(?:(?:\[[^\[\]]*\])[^\[\]]*)*)\]$/i) {
290 1 while $self->replace_char(\$work);
291 $out .= "<pre>$work</pre>";
294 next unless $part =~ /\S/;
296 $self->replace(\$part)
298 $self->replace_char(\$part)
300 $part =~ s#pre\[([^\]\[]+)\]#<pre>$1</pre>#ig
302 $part =~ s#h([1-6])\[([^\[\]\|]+)\|([^\[\]]+)\](?:\r?\n)?#
303 $self->_blocktag("h$1", $2, $3)#ieg
305 $part =~ s#\n*h([1-6])\[\|([^\[\]]+)\]\n*#
306 $self->_blocktag("h$1", '', $2)#ieg
308 $part =~ s#\n*h([1-6])\[([^\[\]]+)\]\n*#
309 $self->_blocktag("h$1", '', $2)#ieg
311 $part =~ s#align\[([^|\]\[]+)\|([^\]\[]+)\]#\n\n<div align="$1">$2</div>\n\n#ig
313 $part =~ s#hr\[([^|\]\[]*)\|([^\]\[]*)\]#_make_hr($1, $2)#ieg
315 $part =~ s#hr\[([^|\]\[]*)\]#_make_hr($1, '')#ieg
317 $part =~ s#table\[([^\n\[\]]*)\n([^\[\]]+)\n\s*\]#_make_table($1, $2)#ieg
319 $part =~ s#table\[([^\]\[]+)\|([^\]\[|]+)\]#_make_table($1, "|$2")#ieg
321 #print STDERR "step: ",unpack("H*", $part),"\n$part\n";
322 $part =~ s((?:^|\n+|\G)
325 \ * # maybe some spaces
326 (?:\*\*|\#\#|\%\%) # marker
327 [^\n]+(?:\n(?!\*\*|\#\#|\%\%)[^\n]+)* # some non-newline text
328 (?:\n|$)\n? # with one or two line endings
329 [^\S\n]* # and any extra non-newline whitespace
331 + # one or more times
332 )(\n|$)?)("\n\n"._format_lists($1)."\n\n")egx
334 $part =~ s#indent\[([^\]\[]+)\]#<ul>$1</ul>#ig
336 $part =~ s#center\[([^\]\[]+)\]#<center>$1</center>#ig
338 $part =~ s#hrcolor\[([^|\]\[]+)\|([^\]\[]+)\|([^\]\[]+)\]#<table width="$1" height="$2" border="0" bgcolor="$3" cellpadding="0" cellspacing="0"><tr><td><img src="/images/trans_pixel.gif" width="1" height="1" alt="" /></td></tr></table>#ig
340 $part =~ s#image\[([^\]\[]+)\]# $self->image($1) #ige
342 $part =~ s#class\[([^\]\[\|]+)\|([^\]\[]+)\]#
343 $self->_fix_spanned(qq/<span class="$1">/, "</span>", $2)#eig
345 $part =~ s#style\[([^\]\[\|]+)\|([^\]\[]+)\]#
346 $self->_fix_spanned(qq/<span style="$1">/, "</span>", $2)#eig
348 $part =~ s#(div|address|blockquote)\[\n*([^\[\]\|]+)\|\n*([^\[\]]+?)\n*\]#"\n\n" . $self->_tag_with_attrs($1, $2) . "$3</$1>\n\n"#eig
350 $part =~ s#comment\[[^\[\]]*\]##ig
352 $part =~ s#(div|address|blockquote)\[\n*\|([^\[\]]+?)\n*]#\n\n<$1>$2</$1>\n\n#ig
354 $part =~ s#(div|address|blockquote)\[\n*([^\[\]]+?)\n*]#\n\n<$1>$2</$1>\n\n#ig
358 $part =~ s/^\s+|\s+\z//g; # avoid spurious leading/trailing <p>
359 $part =~ s!(\n([ \r]*\n)*)!$1 eq "\n" ? "<br />\n" : "</p>\n<p>"!eg;
360 $part = "<p>$part</p>";
361 1 while $part =~ s/<p>(<div(?: [^>]*)?>)/$1<p>/g;
362 1 while $part =~ s!</div></p>!</p></div>!g;
363 1 while $part =~ s/<p>(<blockquote(?: [^>]*)?>)/$1<p>/g;
364 1 while $part =~ s!</blockquote></p>!</p></blockquote>!g;
365 1 while $part =~ s/<p>(<address(?: [^>]*)?>)/$1<p>/g;
366 1 while $part =~ s!</address></p>!</p></address>!g;
367 $part =~ s!<p>(<hr[^>]*>)</p>!$1!g;
368 $part =~ s!<p>(<(?:table|ol|ul|center|h[1-6])[^>]*>)!$1!g;
369 $part =~ s!(</(?:table|ol|ul|center|h[1-6])>)</p>!$1!g;
370 # attempts to convert class[name|paragraph] into <p class="name">...
371 # tried to use a negative lookahead but it wouldn't work
372 $part =~ s#(<p><span class="([^"<>]+)">(.*?)</span></p>)#
373 my ($one, $two, $three)= ($1, $2, $3);
374 $3 =~ /<span/ ? $one : qq!<p class="$two">$three</p>!#ge;
375 $part =~ s#(<p><span style="([^"<>]+)">(.*?)</span></p>)#
376 my ($one, $two, $three)= ($1, $2, $3);
377 $3 =~ /<span/ ? $one : qq!<p style="$two">$three</p>!#ge;
378 if (my $p_class = $self->tag_class('p')) {
379 $part =~ s!(<p(?: style="[^"<>]+")?)>!$1 class="$p_class">!g;
381 #$part =~ s!\n!<br />!g;
390 my ($self, $body) = @_;
393 or confess "undef body supplied to remove_format";
395 if ($body =~ /^<html>/i) {
396 return _strip_html(substr($body, 6));
400 for my $part (split /((?:html\[(?:[^\[\]]*(?:(?:\[[^\[\]]*\])[^\[\]]*)*)\])
401 |embed\[(?:[^,\[\]]*)(?:,(?:[^,\[\]]*)){0,2}\]
402 |pre\[(?:[^\[\]]*(?:(?:\[[^\[\]]*\])[^\[\]]*)*)\])/ix, $body) {
403 #print STDERR "Part is $part\n";
404 if ($part =~ /^html\[([^\[\]]*(?:(?:\[[^\[\]]*\])[^\[\]]*)*)\]$/i) {
405 $out .= _strip_html($1);
407 elsif ($part =~ /^embed\[([^,\[\]]*),([^,\[\]]*)\]$/i) {
408 $out .= ""; # what would you do here?
410 elsif ($part =~ /^embed\[([^,\[\]]*)\]$/i) {
413 elsif ($part =~ /^pre\[([^\[\]]*(?:(?:\[[^\[\]]*\])[^\[\]]*)*)\]$/i) {
415 $out .= $self->remove_format($work);
419 $self->remove(\$part)
421 $part =~ s#(?:acronym|abbr|dfn)\[([^|\]\[]+)\|([^\]\[]+)\|([^\]\[]*)\]#$3#ig
423 $part =~ s#(?:acronym|abbr|dfn|bdo)\[([^|\]\[]+)\|([^\]\[]*)\]#$2#ig
425 $part =~ s#(?:acronym|abbr|dfn|bdo)\[\|([^|\]\[]*)\]#$1#ig
427 $part =~ s#(?:acronym|abbr|dfn)\[([^|\]\[]*)\]#$1#ig
429 $part =~ s#(?:strong|em|samp|code|var|sub|sup|kbd|q|address|blockquote|b|i|tt|span)\[([^|\]\[]+)\|([^\]\[]*)\]#$2#ig
431 $part =~ s#(?:strong|em|samp|code|var|sub|sup|kbd|q|address|blockquote|b|i|tt|span)\[\|([^\]\[]*)\]#$1#ig
433 $part =~ s#(?:strong|em|samp|code|var|sub|sup|kbd|q|address|blockquote|b|i|tt|span)\[([^\]\[]*)\]#$1#ig
435 $part =~ s#div\[([^\[\]\|]+)\|([^\[\]]*)\](?:\r?\n)?#$2#ig
437 $part =~ s#comment\[([^\[\]]*)\](?:\r?\n)?##ig
439 $part =~ s#h([1-6])\[([^\[\]\|]*)\|([^\[\]]*)\](?:\r?\n)?#$3#ig
441 $part =~ s#h([1-6])\[\|([^\[\]]*)\](?:\r?\n)?#$2#ig
443 $part =~ s#h([1-6])\[([^\[\]]*)\](?:\r?\n)?#$2#ig
445 $part =~ s#poplink\[([^|\]\[]*)\|([^\]\[]*)\]#$2#ig
447 $part =~ s#poplink\[([^|\]\[]*)\]#$1#ig
449 $part =~ s#link\[([^|\]\[]*)\|([^\]\[]*)\]#$2#ig
451 $part =~ s#link\[([^|\]\[]*)\]#$1#ig
453 $part =~ s#align\[([^|\]\[]+)\|([^\]\[]*)\]#$2#ig
455 $part =~ s#font\[([^|\]\[]+)\|([^\]\[]*)\]#$2#ig
457 $part =~ s#hr\[([^|\]\[]*)\|([^\]\[]*)\]##ig
459 $part =~ s#hr\[([^|\]\[]*)\]##ig
461 $part =~ s#anchor\[([^|\]\[]*)\]##ig
463 $part =~ s#table\[([^\n\[\]]*)\n([^\[\]]+)\n\s*\]#_cleanup_table($1, $2)#ieg
465 $part =~ s#table\[([^\]\[]+)\|([^\]\[|]+)\]#_cleanup_table($1, "|$2")#ieg
467 $part =~ s#\*\*([^\n]+)#$1#g
469 $part =~ s!##([^\n]+)!$1!g
471 $part =~ s!%%([^\n]+)!$1!g
473 $part =~ s#fontcolor\[([^|\]\[]+)\|([^\]\[]+)\|([^\]\[]+)\]#$3#ig
475 $part =~ s#(?:indent|center)\[([^\]\[]*)\]#$1#ig
477 $part =~ s#hrcolor\[([^|\]\[]+)\|([^\]\[]+)\|([^\]\[]+)\]##ig
479 $part =~ s#image\[([^\]\[]+)\] *##ig
481 $part =~ s#class\[([^\]\[\|]+)\|([^\]\[]*)\]#$2#ig
483 $part =~ s#style\[([^\]\[\|]+)\|([^\]\[]*)\]#$2#ig
485 $part =~ s!(?<=\W)\[([^\]\[]+)\]!\x01$1\x02!g
490 $part =~ tr/\x01\x02/[]/; # put back the bare []
502 # removes any html tags from the supplied text
507 require HTML::Parser;
509 # this may need to detect and skip <script></script> and stylesheets
510 my $ignore_text = 0; # non-zero in a <script></script> or <style></style>
513 ++$ignore_text if $_[0] eq 'script' or $_[0] eq 'style';
514 if ($_[0] eq 'img' && $_[1]{alt} && !$ignore_text) {
520 --$ignore_text if $_[0] eq 'script' or $_[0] eq 'style';
524 $out .= $_[0] unless $ignore_text
526 my $p = HTML::Parser->new( text_h => [ $text_h, "dtext" ],
527 start_h => [ $start_h, "tagname, attr" ],
528 end_h => [ $end_h, "tagname" ]);
537 # this takes the same inputs as _make_table(), but eliminates any
540 my ($opts, $data) = @_;
541 my @lines = split /\n/, $data;
546 return join(' ', @lines);
585 my ($self, $html) = @_;
587 if ($self->{conservative_escape}) {
588 return escape_html($html, '<>&"');
590 elsif ($self->{msentify}) {
591 $html =~ s{([<>&\"\x80-\x9F])}
592 { $ms_entities{ord $1} ? "&$ms_entities{ord $1};"
593 : "** unknown code ".ord($1). " **"; }ge;
598 return escape_html($html);
602 # for subclasses to override