[MKDoc-commit] [MKDoc-Text-Structured] method to insert spaces into long words

bruno at mkdoc.demon.co.uk bruno at mkdoc.demon.co.uk
Thu Apr 21 17:07:01 BST 2005


Log Message:
-----------
[MKDoc-Text-Structured] method to insert spaces into long words

Modified Files:
--------------
    MKDoc-Text-Structured:
        Changes
    MKDoc-Text-Structured/lib/MKDoc/Text:
        Structured.pm
    MKDoc-Text-Structured/lib/MKDoc/Text/Structured:
        Inline.pm

Added Files:
-----------
    MKDoc-Text-Structured/t:
        019_insert_spaces.t

-------------- next part --------------
Index: Changes
===================================================================
RCS file: /var/spool/cvs/MKDoc-Text-Structured/Changes,v
retrieving revision 1.22
retrieving revision 1.23
diff -LChanges -LChanges -u -r1.22 -r1.23
--- Changes
+++ Changes
@@ -2,6 +2,7 @@
 
 0.83
     - fix for <pre> indenting bug when first line is more indented than second
+    - method to insert spaces into long words
 
 0.82 Thu Mar 31 13:45:00 2005
     - fixed failure to change " to &quot; bug
Index: Structured.pm
===================================================================
RCS file: /var/spool/cvs/MKDoc-Text-Structured/lib/MKDoc/Text/Structured.pm,v
retrieving revision 1.25
retrieving revision 1.26
diff -Llib/MKDoc/Text/Structured.pm -Llib/MKDoc/Text/Structured.pm -u -r1.25 -r1.26
--- lib/MKDoc/Text/Structured.pm
+++ lib/MKDoc/Text/Structured.pm
@@ -315,6 +315,17 @@
 Additionally, once the XHTML fragment is produced, you could use
 L<MKDoc::XML::Tagger> to hyperlink it against a glossary of hyperlinks.
 
+=head1 Long Words
+
+Long words are split up into fragments separated by spaces if the length
+exceeds a 78 character default.
+
+Change the default length using a package variable:
+
+  local $MKDoc::Text::Structured::Inline::LongestWord = 12;
+
+Disable this fuctionality by setting a value of 0.
+
 =head1 AUTHOR
 
 Copyright 2003 - MKDoc Holdings Ltd.
Index: Inline.pm
===================================================================
RCS file: /var/spool/cvs/MKDoc-Text-Structured/lib/MKDoc/Text/Structured/Inline.pm,v
retrieving revision 1.11
retrieving revision 1.12
diff -Llib/MKDoc/Text/Structured/Inline.pm -Llib/MKDoc/Text/Structured/Inline.pm -u -r1.11 -r1.12
--- lib/MKDoc/Text/Structured/Inline.pm
+++ lib/MKDoc/Text/Structured/Inline.pm
@@ -5,6 +5,7 @@
 
 our $Text    = '';
 
+our $LongestWord = 78;
 
 sub process
 {
@@ -33,6 +34,7 @@
     _make_doublequotes();
     _make_strong();
     _make_em();
+    _break_long_words();
 
     $Text =~ s/^ //;
     $Text =~ s/ $//;
@@ -64,6 +66,7 @@
     _make_entities();
     _make_simplequotes();
     _make_doublequotes();
+    _break_long_words();
     
     $Text =~ s/^ //;
     $Text =~ s/ $//;
@@ -231,6 +234,28 @@
 }
 
 
+sub _break_long_words
+{
+    $Text = join '', map {
+	my $stuff = $_;
+        $stuff = _insert_spaces ($stuff, $LongestWord) unless ($stuff =~ /^</);
+	$stuff;
+    } _tokenize ($Text);
+}
+
+
+sub _insert_spaces
+{
+    my $text = shift;
+    my $length = shift || return $text;
+    # we can break continuous non-space text after "/", ";" or "-"
+    $text =~ s/(\S{$length}[\/;-])(?=\S)/$1 /g;
+    # we can break continuous non-space text so long as it doesn't contain an ampersand
+    $text =~ s/([^[:space:]&]{$length})(?=\S)/$1 /g;
+    return $text;
+}
+
+
 sub _tokenize
 {
     my $text = shift;
--- /dev/null
+++ t/019_insert_spaces.t
@@ -0,0 +1,70 @@
+use warnings;
+use strict;
+use Test::More 'no_plan';
+use lib ('lib', '../lib');
+use MKDoc::Text::Structured::Inline;
+
+my $text;
+my $l = 10;
+
+like ('& amp;', '/&[^;]* /');
+like ('&a mp;', '/&[^;]* /');
+like ('&am p;', '/&[^;]* /');
+like ('&amp ;', '/&[^;]* /');
+like ('& #247;', '/&[^;]* /');
+like ('&# 247;', '/&[^;]* /');
+like ('&#2 47;', '/&[^;]* /');
+unlike ('a short series of short words', "/\\S\\S{$l}/");
+unlike ('some abounding nine letter embodying words', "/\\S\\S{$l}/");
+unlike ('some abrogative words of ten abominable letters', "/\\S\\S{$l}/");
+like ('some mothproofed words of more than ten agitational letters', "/\\S\\S{$l}/");
+
+$text = "a long url: http://www.example.com/path/to/a/very/very/deep/placed/document/deep/in/the/heirarchy/cgi-bin/foo.cgi?a=b&amp;c=d\n";
+$text = MKDoc::Text::Structured::Inline::_insert_spaces ($text, $l);
+unlike ($text, '/&[^;]* /');
+$text =~ s/&[#[:alnum:]]+;/./g;
+unlike ($text, "/\\S\\S{$l}/");
+
+$text = "some looo&gt;&#247;&#247;&#247;&#247;&#247;&#247;&#247;&#247;&#247;&#247;&#247;&#247;&#247;&#247;&#247;&#247;&#247;&#247;&#247;&#247;&#247;&#247;&#247;&#247;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;ooooooooooooooooooooooooooooooong words\n";
+$text = MKDoc::Text::Structured::Inline::_insert_spaces ($text, $l);
+unlike ($text, '/&[^;]* /');
+$text =~ s/&[#[:alnum:]]+;/./g;
+unlike ($text, "/\\S\\S{$l}/");
+
+$text = "some looo&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;ooooooooooooooooooooooooooooooong words\n";
+$text = MKDoc::Text::Structured::Inline::_insert_spaces ($text, $l);
+unlike ($text, '/&[^;]* /');
+$text =~ s/&[#[:alnum:]]+;/./g;
+unlike ($text, "/\\S\\S{$l}/");
+
+$text = "some loooo&gt;o&gt;o&gt;o&gt;o&gt;o&gt;o&gt;o&gt;o&gt;o&gt;o&gt;o&gt;o&gt;o&gt;ooooooooooooooooong words\n";
+$text = MKDoc::Text::Structured::Inline::_insert_spaces ($text, $l);
+unlike ($text, '/&[^;]* /');
+$text =~ s/&[#[:alnum:]]+;/./g;
+unlike ($text, "/\\S\\S{$l}/");
+
+$text = "some looooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooong woooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooords\n";
+$text = MKDoc::Text::Structured::Inline::_insert_spaces ($text, $l);
+unlike ($text, '/&[^;]* /');
+$text =~ s/&[#[:alnum:]]+;/./g;
+unlike ($text, "/\\S\\S{$l}/");
+
+
+unlike ("<p>telnetters</p>", "/>[^<]*[^<[:space:]][^<[:space:]]{$l}/");
+like ("<p>comtelnetters</p>", "/>[^<]*[^<[:space:]][^<[:space:]]{$l}/");
+
+$MKDoc::Text::Structured::Inline::Text = "<p class=\"supercalifragilisticexpialicocioussupercalifragilisticexpialicocioussupercalifragilisticexpialicocioussupercalifragilisticexpialicocioussupercalifragilisticexpialicocioussupercalifragilisticexpialicocious\">Some fantasticallysupercalifragilisticexpialicocioussupercalifragilisticexpialicocioussupercalifragilisticexpialicocioussupercalifragilisticexpialicocioussupercalifragilisticexpialicocious <a href=\"http://www.example.com/some/long/supercalifragilisticexpialicocioussupercalifragilisticexpialicocioussupercalifragilisticexpialicocioussupercalifragilisticexpialicocioussupercalifragilisticexpialicocious/apologetically/wordings/\">http://www.example.com/some/long/supercalifragilisticexpialicocioussupercalifragilisticexpialicocioussupercalifragilisticexpialicocioussupercalifragilisticexpialicocioussupercalifragilisticexpialicocious/apologetically/wordings/</a></p>\n";
+$MKDoc::Text::Structured::Inline::LongestWord = $l;
+MKDoc::Text::Structured::Inline::_break_long_words ();
+unlike ($MKDoc::Text::Structured::Inline::Text, '/&[^;]* /');
+$MKDoc::Text::Structured::Inline::Text =~ s/&[#[:alnum:]]+;/./g;
+unlike ($MKDoc::Text::Structured::Inline::Text, "/>[^<]*[^<[:space:]][^<[:space:]]{$l}/");
+
+$MKDoc::Text::Structured::Inline::Text = "<p class=\"supercalifragilisticexpialicocioussupercalifragilisticexpialicocioussupercalifragilisticexpialicocioussupercalifragilisticexpialicocioussupercalifragilisticexpialicocioussupercalifragilisticexpialicocious\">Some fantasticallysupercalifragilisticexpialicocioussupercalifragilisticexpialicocioussupercalifragilisticexpialicocioussupercalifragilisticexpialicocioussupercalifragilisticexpialicocious <a href=\"http://www.example.com/some/long/supercalifragilisticexpialicocioussupercalifragilisticexpialicocioussupercalifragilisticexpialicocioussupercalifragilisticexpialicocioussupercalifragilisticexpialicocious/apologetically/wordings/\">http://www.example.com/some/long/supercalifragilisticexpialicocioussupercalifragilisticexpialicocioussupercalifragilisticexpialicocioussupercalifragilisticexpialicocioussupercalifragilisticexpialicocious/apologetically/wordings/</a></p>\n";
+$MKDoc::Text::Structured::Inline::LongestWord = 0;
+MKDoc::Text::Structured::Inline::_break_long_words ();
+like ($MKDoc::Text::Structured::Inline::Text, '/supercalifragilisticexpialicocioussupercalifragilisticexpialicocioussupercalifragilisticexpialicocioussupercalifragilisticexpialicocioussupercalifragilisticexpialicocioussupercalifragilisticexpialicocious\">Some fantasticallysupercalifragilisticexpialicocioussupercalifragilisticexpialicocioussupercalifragilisticexpialicocioussupercalifragilisticexpialicocioussupercalifragilisticexpialicocious/');
+
+1;
+
+__END__


More information about the MKDoc-commit mailing list