[MKDoc-commit] Patch from Sam to enable external links to open in new windows.

chris at mkdoc.demon.co.uk chris at mkdoc.demon.co.uk
Wed Jul 13 15:11:42 BST 2005


Log Message:
-----------
Patch from Sam to enable external links to open in new windows.

Tags:
----
mkdoc-1-6

Modified Files:
--------------
    mkd/flo:
        Editor.pm
    mkd/flo/editor:
        Link.pm
    mkd/templates/editor/link:
        en.html

Added Files:
-----------
    mkd/MKDoc/Util:
        LinkParser.pm
    mkd/tools:
        update_links.pl

-------------- next part --------------
Index: Editor.pm
===================================================================
RCS file: /var/spool/cvs/mkd/flo/Editor.pm,v
retrieving revision 1.12.2.41
retrieving revision 1.12.2.42
diff -Lflo/Editor.pm -Lflo/Editor.pm -u -r1.12.2.41 -r1.12.2.42
--- flo/Editor.pm
+++ flo/Editor.pm
@@ -31,6 +31,10 @@
 use 5.008_000;
 use utf8;
 
+# Set this to true and external links (as determined by
+# MKDoc::Util::LinkParser) will open in a new window.
+our $EXTERNAL_LINKS_OPEN_IN_NEW_WINDOW = 1;
+
 use constant BLOCK => 'edit_block';
 
 
@@ -149,9 +153,11 @@
 ##
 # $self->links;
 # -------------
-# This subroutine returns all the links of the current document
-# and caches them into $LINKS, these links can then be used by
-# MKDoc::XML::Tagger to linkify the whole document.
+# This subroutine returns all the links of the current document and
+# caches them into $LINKS, these links can then be used by
+# MKDoc::XML::Tagger to linkify the whole document.  The actual calls
+# to MKDoc::XML::Tagger occur in the components which may contain
+# links - flo::editor::Text and flo::editor::HTML, for example.
 #
 # It may look like a horrible hack (and it is), but since the
 # component system needs to be redesigned it does not bother
@@ -176,12 +182,26 @@
 	# retrieve the links for that document
 	foreach my $link_component (@links_components)
 	{
+            my $url = $link_component->render_url;
+
+            # setup class based on whether this is an internal or external link
+            my $class = "mkdoc-link-" . ( $link_component->is_internal ? 
+                                          "internal" : "external" );
+
+            # setup target to open a new window in needed
+            my @target;
+            if ($EXTERNAL_LINKS_OPEN_IN_NEW_WINDOW and 
+                not $link_component->is_internal) {
+                @target = (target => "_blank");
+            }
+            
 	    push @links, {
-               class => "mkdoc-link",
-		href => $link_component->url,
-		desc => $link_component->description,
-		expr => $link_component->title,
-	    } if ($link_component->url ne '' and $link_component->title ne '');
+                class => $class,
+		href  => $url,
+		desc  => $link_component->description,
+		expr  => $link_component->title,              
+                @target,
+	    } if ($url ne '' and $link_component->title ne '');
 	}
 	
 	# plus we want to hyperlink all the children
Index: Link.pm
===================================================================
RCS file: /var/spool/cvs/mkd/flo/editor/Link.pm,v
retrieving revision 1.7.2.20
retrieving revision 1.7.2.21
diff -Lflo/editor/Link.pm -Lflo/editor/Link.pm -u -r1.7.2.20 -r1.7.2.21
--- flo/editor/Link.pm
+++ flo/editor/Link.pm
@@ -26,12 +26,18 @@
 use flo::Standard;
 use MKDoc::CGI;
 use MKDoc::Util::Text2HTML;
+use MKDoc::Util::LinkParser;
 use strict;
 
 use base qw /flo::Component
 	     flo::editor::Mixin::compute_name
 	     flo::editor::Mixin::normalize_name/;
 
+# keep a global link parser around
+my $link_parser = MKDoc::Util::LinkParser->new();
+
+# placeholder for the URL entry field
+my $EMPTY_URL = 'http://';
 
 sub preferred_extension { 'link' }
 
@@ -47,11 +53,49 @@
     my $self = shift;
     my $args = $self->cgi_args()      || return;
 
-    $self->{url}         = $args->{'url'}         || 'http://';
     $self->{title}       = $args->{'title'}       || '';
     $self->{description} = $args->{'description'} || '';
+
+    if ($args->{url}) {
+        $self->parse_url($args->{url});
+    } else {
+        $self->{url} = $EMPTY_URL;
+    }
+}
+
+
+sub parse_url {
+    my ($self, $url) = @_;
+    $link_parser->parse($url);
+
+    if ($link_parser->is_internal and $link_parser->is_valid) {
+        # store relevent details for an internal link (invalid
+        # links are handled by validate_url)
+        $self->{internal_link} = $link_parser->freeze();
+
+        # reflect the canonical form in the UI
+        $self->{url}           = $link_parser->as_string;
+    } else {
+        # store just the url as entered for external links
+        delete $self->{internal_link};
+        $self->{url} = $url;
+    }
 }
 
+# template rendering for the URL
+sub render_url {
+    my $self = shift;
+    if ($self->{internal_link}) {
+        $link_parser->thaw($self->{internal_link});
+        return $link_parser->as_string;
+    } else {
+        return $self->{url};
+    }
+}
+
+sub is_internal {
+    return shift->{internal_link} ? 1 : 0;
+}
 
 sub validate
 {
@@ -70,10 +114,16 @@
 sub validate_url
 {
     my $self = shift;
-    $self->{url} and $self->{url} ne 'http://' or do {
+    $self->{url} and $self->{url} ne $EMPTY_URL or do {
 	new MKDoc::Ouch 'component/link/url_empty';
 	return 0;
     };
+
+    $link_parser->parse($self->{url});
+    if ($link_parser->is_internal and not $link_parser->is_valid) {
+        new MKDoc::Ouch 'component/link/invalid_internal_url';
+        return 0;
+    }
     
     return 1;
 }
Index: en.html
===================================================================
RCS file: /var/spool/cvs/mkd/templates/editor/link/Attic/en.html,v
retrieving revision 1.1.2.9
retrieving revision 1.1.2.10
diff -Ltemplates/editor/link/en.html -Ltemplates/editor/link/en.html -u -r1.1.2.9 -r1.1.2.10
--- templates/editor/link/en.html
+++ templates/editor/link/en.html
@@ -50,6 +50,10 @@
     >The 'URI' field is empty.</p>
     <p
       xml:lang="en" lang="en" dir="ltr" class="error"
+      petal:condition="error/is --component/link/invalid_internal_url"
+    >The 'URI' field points to a document which does not exist.</p>
+    <p
+      xml:lang="en" lang="en" dir="ltr" class="error"
       petal:condition="error/is --component/link/description_empty"
     >The 'Description' field is empty.</p>
   </div>
@@ -110,7 +114,7 @@
         value="http://example.com/"
         size="35"
         title="Enter the URI of the link here."
-        petal:attributes="name name_url; id name_url; value self/url"
+        petal:attributes="name name_url; id name_url; value self/render_url"
       />
     </p>
 
--- /dev/null
+++ MKDoc/Util/LinkParser.pm
@@ -0,0 +1,307 @@
+# ----------------------------------------------------------------------------
+# MKDoc::Util::LinkParser
+# ----------------------------------------------------------------------------
+# Author: Sam Tregar
+# Copyright: (c) MKDoc Holdings Ltd, 2005
+#
+# This file is part of MKDoc. 
+# 
+# MKDoc is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# MKDoc is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with MKDoc; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#
+# ---------------------------------------------------------------------------
+package MKDoc::Util::LinkParser;
+use strict;
+use warnings;
+
+=head1 NAME
+
+MKDoc::Util::LinkParser - parses links to differentiate internal and external links
+
+=head1 SYNOPSIS
+
+  use MKDoc::Util::LinkParser;
+
+  # setup a new parser
+  $parser = MKDoc::Util::LinkParser->new();
+  
+  # parse a link
+  $parser->parse('http://www.example.com/foo');
+
+  # deal with results
+  if ($parser->is_internal) {
+     if ($parser->is_valid) {
+        $document_id = $parser->document_id;
+        print "Found a link to document ID $document_id.\n";
+     } else {
+        die "Invalid internal link found!";
+     }
+  } else {
+     print "External link found.\n";
+  }
+
+=head1 DESCRIPTION
+
+This module parses links and determines whether they are internal
+(pointing to the site served by MKDoc) or external.  For internal
+links it determines if they are valid (pointing to an active document).
+
+=head1 CLASS METHODS
+
+=head2 new
+
+Creates a new parser.  Accepts no arguments.
+
+=head2 parse
+
+Parse a link.  Takes a single argument, the link to parse.
+
+=head2 thaw
+
+Loads data produced by store() just as though the link had been parsed
+with parse().
+
+=head1 INSTANCE METHODS
+
+=head2 is_internal
+
+Returns true if the last link parsed was internal.
+
+=head2 is_external
+
+Returns true if the last link parsed was external.
+
+=head2 is_valid
+
+Returns true if the last link was a valid internal link.  When this is
+true document_id will be available.
+
+=head2 document_id
+
+Returns the document_id for the last link parsed.
+
+=head2 uri
+
+Returns the URI object for the last link parsed.  See L<URI> for more
+details.
+
+=head2 as_string
+
+Produce a string containing a canonical version of the link.
+
+=head2 operation
+
+Returns the extra MKDoc-specific operation portion of the URI for
+internal links.  This is the part of the URI that points to an
+operation rather than a documemt.  For example, given this link:
+
+  http://example.com/test-doc/.admin.content
+
+operation() would return '.admin.content'.
+
+This part of the URI will not be present in the object returned from
+uri() and it will not affect is_valid() and document_id() matches.
+
+=head2 freeze
+
+Returns a hash-ref representiting all the relevent details of the
+link.  This is the data which may be passed later to thaw().
+
+This method may only be called on valid internal links.
+
+=head1 AUTHOR
+
+Sam Tregar <sam at tregar.com>
+
+=cut
+
+use URI;
+use Carp qw(croak);
+use lib::sql::DBH;
+
+sub new { 
+    my $self = bless {}, shift;
+
+    # make sure the environment is properly setup
+    for my $var qw(PUBLIC_DOMAIN USER_DOMAIN) {
+        croak("$var not set - needed by " . __PACKAGE__) unless $ENV{$var};
+    }
+
+    # setup default base URI
+    $self->{base} = URI->new($ENV{PUBLIC_DOMAIN})->canonical;
+
+    # setup list of internals base URLs
+    $self->{internal_bases} = [ $self->{base},
+                                URI->new($ENV{USER_DOMAIN})->canonical ];
+    
+    # add in foo.com for www.foo.com
+    my $pub = $ENV{PUBLIC_DOMAIN};
+    $pub =~ s!^(\w+://)\w+\.!$1!;
+    push @{$self->{internal_bases}}, URI->new($pub)->canonical;
+   
+    # produce a list of canonical internal prefixes minus scheme for
+    # fast internal matching
+    my @prefixes = map { $_->opaque } @{$self->{internal_bases}};
+
+    # compile a regex from the internal prefixes
+    my $re = join('|', map { qr/\Q$_\E/ } @prefixes);
+    $self->{internal_re} = qr/^$re/;
+
+    return $self;
+}
+
+sub parse {
+    my ($self, $link) = @_;
+    $self->_clear();
+
+    my $uri;
+    if ($link =~ m!^[^:/?#]+:!) {
+        # if it's a full URI parse it as-is
+        $uri = URI->new($link)->canonical;
+    } else {
+        # otherwise base it off the default base
+        $uri = URI->new_abs($link, $self->{base})->canonical;
+    }
+    $self->{uri} = $uri;
+
+    # pull off the operation part of internal paths
+    my $path = $uri->path;
+    if ($self->is_internal and $path =~ s!/(\.[a-z\.]+)$!!) {
+        $self->{operation} = $1;
+        $path = '/' unless length $path;
+        $self->{uri}->path($path);
+    }
+}
+
+# clear link state
+sub _clear {
+    my $self = shift;
+    delete $self->{$_} for qw(is_internal document_id operation uri);
+}
+
+sub as_string {
+    my $self = shift;
+    my $uri  = $self->{uri}->clone;
+
+    # the URI object has everything already unless this is an internal
+    # link or has an operation
+    return $uri->as_string 
+      unless $self->{document_id} or $self->{operation};
+
+    # setup path based on document_id if we've got one
+    if ($self->{document_id}) {
+        my $dbh    = lib::sql::DBH->get();
+        my ($path) = $dbh->selectrow_array('SELECT Full_Path FROM Document WHERE ID = ?', undef, $self->{document_id});
+        $uri->path($path);
+    }
+
+    # add in operation if there is one
+    if ($self->{operation}) {
+        my $path = $uri->path();
+        $path .= '/' unless $path =~ m!/$!;
+        $uri->path($path . $self->{operation});
+    }
+
+    return $uri->as_string;
+}
+
+sub operation { shift->{operation} }
+
+sub is_internal {
+    my $self = shift;
+    return $self->{is_internal} if defined $self->{is_internal};
+    my $uri  = $self->uri;
+
+    # only http and https URLs can be internals
+    my $protocol = $uri->scheme;
+    return $self->{is_internal} = 0 
+      unless  $protocol eq 'http' or $protocol eq 'https';
+
+    my $prefix = $self->{uri}->opaque;
+    if ($prefix =~ $self->{internal_re}) {
+        return $self->{is_internal} = 1;
+    } else {
+        return $self->{is_internal} = 0;
+    }
+}
+
+sub is_external {
+    my $self = shift;
+    return not $self->is_internal;
+}
+
+sub is_valid {
+    my $self = shift;
+    return 0 if defined $self->{document_id} and not $self->{document_id};
+    return 1 if $self->document_id;
+}
+
+sub document_id {
+    my $self = shift;
+    return $self->{document_id} if $self->{document_id};
+
+    my $uri  = $self->uri;
+    my $dbh  = lib::sql::DBH->get();
+    
+    # fix path to end with a /
+    my $path = $uri->path;
+    $path .= '/' unless $path =~ m!/$!;
+
+    my ($id) = $dbh->selectrow_array('SELECT ID
+                                      FROM Document 
+                                      WHERE Full_Path = ?', undef, $path);
+    return $self->{document_id} = $id ? $id : 0;
+}
+
+sub uri { shift->{uri} }
+
+sub freeze {
+    my $self = shift;
+    croak("Called freeze on an external or invalid link!")
+      unless $self->{is_internal} and $self->{document_id};
+    my %return;
+
+    # store relevent details for an internal link
+    my $uri = $self->{uri};
+    $return{is_internal} = $self->{is_internal};
+    $return{operation}   = $self->{operation};
+    $return{document_id} = $self->{document_id};
+
+    # save necessary URI parts
+    $return{scheme}      = $uri->scheme;
+    $return{query}       = $uri->query;
+    $return{fragment}    = $uri->fragment;
+    $return{authority}   = $uri->authority;
+
+    return \%return;
+}
+
+sub thaw {
+    my ($self, $data) = @_;
+    $self->_clear();
+
+    my $uri = URI->new();
+    $uri->scheme($data->{scheme});
+    $uri->query($data->{query});
+    $uri->fragment($data->{fragment});
+    $uri->authority($data->{authority});
+    $self->{uri} = $uri;
+
+    $self->{is_internal} = $data->{is_internal};
+    $self->{operation}   = $data->{operation};
+    $self->{document_id} = $data->{document_id};
+}
+
+1;
+
--- /dev/null
+++ tools/update_links.pl
@@ -0,0 +1,104 @@
+# ------------------------------------------------------------------
+# update_links.pl
+# ------------------------------------------------------------------
+# Author : Sam Tregar
+# Copyright : (c) MKDoc Holdings Ltd, 2005
+#
+# This file is part of MKDoc. 
+# 
+# MKDoc is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# MKDoc is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with MKDoc; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#
+# ------------------------------------------------------------------
+
+# This script updates links into the new format which differentiates
+# between internal and external links.  Internal links are matched to
+# the documents they point to and stored by ID.  The script will
+# output a line for each external, internal and invalid internal link
+# it finds.
+#
+# It would be a good idea to make a backup of your database before
+# running this script as it has not been extensively tested.
+ 
+#!/usr/bin/perl
+use strict;
+use warnings;
+use MKDoc;
+use MKDoc::Util::LinkParser;
+
+# make sure SITE_DIR is set since MKDoc->init needs it
+die "SITE_DIR isn't set.  Please source mksetenv.sh from an installed MKDoc site ".
+  "and try again.\n" 
+  unless $ENV{SITE_DIR};
+
+# initialize MKDoc, needed for database connection
+MKDoc->init;
+
+# setup global accessors
+my $dbh    = lib::sql::DBH->get();
+my $doc_t  = flo::Standard::table('Document');
+my $parser = MKDoc::Util::LinkParser->new();
+
+# step through documents one at a time
+my $document_ids = $dbh->selectcol_arrayref('SELECT ID FROM Document');
+foreach my $document_id (@$document_ids) {
+    my $doc   = $doc_t->get($document_id);
+
+    # setup an editor so we can make changes if necessary
+    my $editor = flo::Editor->_new();
+    $editor->parse_xml($doc->{Body}, $document_id);
+
+    my $dirty = 0;
+
+    # look at each link
+    my @links = grep { $_->isa('flo::editor::Link') } $editor->access();
+    foreach my $link (@links) {
+        # ignore already processed internal links
+        next if $link->{internal_link};
+
+        # evaluate plain links, ignoring broken ones
+        my $url = $link->{url};
+        next unless $url;
+
+        $parser->parse($url);
+
+        # external links stay as-is
+        if (not $parser->is_internal) {
+            print "=== External Link ($document_id): $url\n";
+            next;
+        }          
+
+        # flag invalid internal links
+        if (not $parser->is_valid) {
+            print "!!! Invalid Internal Link ($document_id): $url\n";
+            next;
+        }
+
+        # update valid internal links
+        $link->{internal_link} = $parser->freeze();
+        $link->{url}           = $parser->as_string;
+        $dirty++;
+        print "+++ Valid Internal Link ($document_id): $url\n"
+    }
+
+    # if changes were made they need to get updated in the DB.  (It's
+    # hard to believe there isn't a better way to do this, but I'm
+    # pretty sure there isn't.)
+    if ($dirty) {
+        print "*** Updating Document $document_id with $dirty new links.\n";
+        $doc->{Body} = $editor->generate_xml();
+        $doc->save();
+    }
+}
+


More information about the MKDoc-commit mailing list