| MKDoc-Text-Structured documentation | Contained in the MKDoc-Text-Structured distribution. |
MKDoc::Text::Structured::Inline - convert text to HTML without handling block-level tags
my $text = some_structured_text(); my $this = MKDoc::Text::Structured::Inline::process ($text); my $that = MKDoc::Text::Structured::Inline::process_entities_only ($text);
MKDoc::Text::Structured::Inline is used by MKDoc::Text::Structured to generate inline HTML elements such as hyperlinks, emphasis and entities.
This module is also useful directly when the full block-level rendering of MKDoc::Text::Structured is unwanted.
| MKDoc-Text-Structured documentation | Contained in the MKDoc-Text-Structured distribution. |
package MKDoc::Text::Structured::Inline; use URI::Find; use warnings; use strict; our $Text = ''; our $LongestWord = 78; our $NoFollow = 0;
sub process { local $Text; $Text = shift; $Text = " $Text "; $Text =~ s/\n/ /gsm; _make_entities(); $Text =~ s/>/ >/g; # automagically finds hyperlinks my $finder = URI::Find->new ( sub { my ($uri, $orig_uri) = @_; $orig_uri =~ s/^mailto://; # http://googleblog.blogspot.com/2005/01/preventing-comment-spam.html if ($NoFollow) { return qq|<a href="$uri" rel="nofollow">$orig_uri</a>|; } else { return qq|<a href="$uri">$orig_uri</a>|; } } ); $finder->find (\$Text); $Text =~ s/ >/>/g; # abbreviations while ($Text =~ s/([[:upper:]][[:upper:]]+)\s+(\(.*?\))/_make_abbr_implicit ($1, $2)/e) {}; # implicit while ($Text =~ s/([[:upper:]][[:upper:]]+)(\(.*?\))/_make_abbr_explicit ($1, $2)/e) {}; # explicit _make_simplequotes(); _make_doublequotes(); _make_strong(); _make_em(); _make_smilies(); _break_long_words(); $Text =~ s/^ //; $Text =~ s/ $//; return $Text; }
sub process_entities_only { local $Text; $Text = shift; $Text = " $Text "; $Text =~ s/\n/ /gsm; _make_entities(); _make_simplequotes(); _make_doublequotes(); _break_long_words(); $Text =~ s/^ //; $Text =~ s/ $//; return $Text; } sub _make_entities { $Text =~ s/&/&/g; $Text =~ s/</</g; $Text =~ s/>/>/g; $Text =~ s/"/"/g; $Text =~ s/(?<=(?:\s|\n))--(?=(?:\s|\n))/\—/g; # -- becomes em-dash $Text =~ s/(?<=(?:\s|\n))-(?=(?:\s|\n))/\–/g; # - becomes en-dash $Text =~ s/(?<!\.)\.\.\.(?!\.)/\…/g; # ... becomes ellipsis $Text =~ s/\(tm\)(?=(?:\s|\n|\p{IsPunct}))/\™/gi; # (tm) becomes trademark $Text =~ s/\(r\)(?=(?:\s|\n|\p{IsPunct}))/\®/gi; # (r) becomes registered $Text =~ s/\(c\)(?=(?:\s|\n|\p{IsPunct}))/\©/gi; # (c) becomes copyright $Text =~ s/(?<=(?:\s|\n))(\d+)\s*x\s*(\d+)(?=(?:\s|\n|\p{isPunct}))/$1\×$2/g; # x becomes dimension } sub _make_abbr_implicit { my $abbr = shift; my $brack = shift; my $title = $brack; $title =~ s/^\s*\(\s*//; $title =~ s/\s*\)\s*$//; return qq|<abbr title="$title">$abbr</abbr> ($title)|; } sub _make_abbr_explicit { my $abbr = shift; my $brack = shift; my $title = $brack; $title =~ s/^\s*\(\s*//; $title =~ s/\s*\)\s*$//; return qq|<abbr title="$title">$abbr</abbr>|; } sub _make_simplequotes { $Text = join '', map { my $stuff = $_; $stuff = " $stuff "; while ($stuff =~ s/ (?<=(?:\s|\n)) # must start with space or carriage return \' # simple quote ([^ \t\n\']|[^ \t\n\'].*?[^ \t\n\']) # stuff to capture and smart-quotize \' # simple quote (?=(?:<|\s|\n|\p{IsPunct}(?:\s|\n|<))) # must be followed by space, \n or (punctuation + space or \n) /_make_simplequotes_wrap ($1)/xes) {} $stuff =~ s/^ //; $stuff =~ s/ $//; $stuff; } _tokenize ($Text); } sub _make_simplequotes_wrap { my $stuff = shift; local $Text = $stuff; return "‘$Text’"; } sub _make_doublequotes { $Text = join '', map { my $stuff = $_; $stuff = " $stuff "; $stuff =~ s/"/<QUOT>/g; $stuff =~ s/"/"/g; while ($stuff =~ s/ (?<=(?:\s|\n)) # must start with space or carriage return \" # double quote ([^ \t\n\"]|[^ \t\n\"].*?[^ \t\n\"]) # stuff to capture and smart-quotize \" # double quote (?=(?:<|\s|\n|\p{IsPunct}(?:\s|\n|<))) # must be followed by space, \n or (punctuation + space or \n) /_make_doublequotes_wrap ($1)/xes) {} $stuff =~ s/^ //; $stuff =~ s/ $//; $stuff =~ s/"/"/g; $stuff =~ s/<QUOT>/"/g; $stuff; } _tokenize ($Text); } sub _make_doublequotes_wrap { my $stuff = shift; local $Text = $stuff; return "“$Text”"; } sub _make_strong { $Text = join '', map { my $stuff = $_; $stuff = " $stuff "; while ($stuff =~ s/ (?<=(?:\s|\n)) # must start with space or carriage return \* # star (\S|\S.*?\S) # stuff to capture and emphasize \* # star (?=(?:<|\s|\n|\p{IsPunct}(?:\s|\n|<))) # must be followed by space, \n or (punctuation + space or \n) /_make_strong_wrap ($1)/xes) {} $stuff =~ s/^ //; $stuff =~ s/ $//; $stuff; } _tokenize ($Text); } sub _make_strong_wrap { my $stuff = shift; local $Text = $stuff; _make_em ($Text); return "<strong>$Text</strong>"; } sub _make_em { $Text = join '', map { my $stuff = $_; $stuff = " $stuff "; while ($stuff =~ s/ (?<=(?:\s|\n)) # must start with space or carriage return _ # underscore (\S|\S.*?\S) # stuff to capture and emphasize _ # underscore (?=(?:<|\s|\n|\p{IsPunct}(?:\s|\n))) # must be followed by space, \n or (punctuation + space or \n) /_make_em_wrap ($1)/xes) {} $stuff =~ s/^ //; $stuff =~ s/ $//; $stuff; } _tokenize ($Text); } sub _make_em_wrap { my $stuff = shift; local $Text = $stuff; _make_strong ($Text); return "<em>$Text</em>"; } sub _make_smilies { $Text = join '', map { my $stuff = $_; $stuff =~ s/:-\)/<span class="smiley-happy">:-)<\/span>/g unless ($stuff =~ /^</); $stuff =~ s/:-\(/<span class="smiley-sad">:-(<\/span>/g unless ($stuff =~ /^</); # don't do ;-) think about what happens with &-) $stuff; } _tokenize ($Text); } sub _break_long_words { $Text = join '', map { my $stuff = $_; $stuff = _insert_spaces ($stuff, $LongestWord) unless ($stuff =~ /^</); $stuff; } _tokenize ($Text); } sub _insert_spaces { my $text = shift; my $length = shift || return $text; # we can break continuous non-space text after "/", ";" or "-" $text =~ s/(\S{$length}[\/;-])(?=\S)/$1 /g; # we can break continuous non-space text so long as it doesn't contain an ampersand $text =~ s/([^[:space:]&]{$length})(?=\S)/$1 /g; return $text; } sub _tokenize { my $text = shift; my @res = $text =~ /([^<]+)|(<.+?>)/g; return grep { defined $_ } @res; } 1; __END__