[MKDoc-commit] Patch from Sam to enable external links to open in
new windows.
chris at mkdoc.demon.co.uk
chris at mkdoc.demon.co.uk
Wed Jul 13 15:11:42 BST 2005
Log Message:
-----------
Patch from Sam to enable external links to open in new windows.
Tags:
----
mkdoc-1-6
Modified Files:
--------------
mkd/flo:
Editor.pm
mkd/flo/editor:
Link.pm
mkd/templates/editor/link:
en.html
Added Files:
-----------
mkd/MKDoc/Util:
LinkParser.pm
mkd/tools:
update_links.pl
-------------- next part --------------
Index: Editor.pm
===================================================================
RCS file: /var/spool/cvs/mkd/flo/Editor.pm,v
retrieving revision 1.12.2.41
retrieving revision 1.12.2.42
diff -Lflo/Editor.pm -Lflo/Editor.pm -u -r1.12.2.41 -r1.12.2.42
--- flo/Editor.pm
+++ flo/Editor.pm
@@ -31,6 +31,10 @@
use 5.008_000;
use utf8;
+# Set this to true and external links (as determined by
+# MKDoc::Util::LinkParser) will open in a new window.
+our $EXTERNAL_LINKS_OPEN_IN_NEW_WINDOW = 1;
+
use constant BLOCK => 'edit_block';
@@ -149,9 +153,11 @@
##
# $self->links;
# -------------
-# This subroutine returns all the links of the current document
-# and caches them into $LINKS, these links can then be used by
-# MKDoc::XML::Tagger to linkify the whole document.
+# This subroutine returns all the links of the current document and
+# caches them into $LINKS, these links can then be used by
+# MKDoc::XML::Tagger to linkify the whole document. The actual calls
+# to MKDoc::XML::Tagger occur in the components which may contain
+# links - flo::editor::Text and flo::editor::HTML, for example.
#
# It may look like a horrible hack (and it is), but since the
# component system needs to be redesigned it does not bother
@@ -176,12 +182,26 @@
# retrieve the links for that document
foreach my $link_component (@links_components)
{
+ my $url = $link_component->render_url;
+
+ # setup class based on whether this is an internal or external link
+ my $class = "mkdoc-link-" . ( $link_component->is_internal ?
+ "internal" : "external" );
+
+ # setup target to open a new window in needed
+ my @target;
+ if ($EXTERNAL_LINKS_OPEN_IN_NEW_WINDOW and
+ not $link_component->is_internal) {
+ @target = (target => "_blank");
+ }
+
push @links, {
- class => "mkdoc-link",
- href => $link_component->url,
- desc => $link_component->description,
- expr => $link_component->title,
- } if ($link_component->url ne '' and $link_component->title ne '');
+ class => $class,
+ href => $url,
+ desc => $link_component->description,
+ expr => $link_component->title,
+ @target,
+ } if ($url ne '' and $link_component->title ne '');
}
# plus we want to hyperlink all the children
Index: Link.pm
===================================================================
RCS file: /var/spool/cvs/mkd/flo/editor/Link.pm,v
retrieving revision 1.7.2.20
retrieving revision 1.7.2.21
diff -Lflo/editor/Link.pm -Lflo/editor/Link.pm -u -r1.7.2.20 -r1.7.2.21
--- flo/editor/Link.pm
+++ flo/editor/Link.pm
@@ -26,12 +26,18 @@
use flo::Standard;
use MKDoc::CGI;
use MKDoc::Util::Text2HTML;
+use MKDoc::Util::LinkParser;
use strict;
use base qw /flo::Component
flo::editor::Mixin::compute_name
flo::editor::Mixin::normalize_name/;
+# keep a global link parser around
+my $link_parser = MKDoc::Util::LinkParser->new();
+
+# placeholder for the URL entry field
+my $EMPTY_URL = 'http://';
sub preferred_extension { 'link' }
@@ -47,11 +53,49 @@
my $self = shift;
my $args = $self->cgi_args() || return;
- $self->{url} = $args->{'url'} || 'http://';
$self->{title} = $args->{'title'} || '';
$self->{description} = $args->{'description'} || '';
+
+ if ($args->{url}) {
+ $self->parse_url($args->{url});
+ } else {
+ $self->{url} = $EMPTY_URL;
+ }
+}
+
+
+sub parse_url {
+ my ($self, $url) = @_;
+ $link_parser->parse($url);
+
+ if ($link_parser->is_internal and $link_parser->is_valid) {
+ # store relevent details for an internal link (invalid
+ # links are handled by validate_url)
+ $self->{internal_link} = $link_parser->freeze();
+
+ # reflect the canonical form in the UI
+ $self->{url} = $link_parser->as_string;
+ } else {
+ # store just the url as entered for external links
+ delete $self->{internal_link};
+ $self->{url} = $url;
+ }
}
+# template rendering for the URL
+sub render_url {
+ my $self = shift;
+ if ($self->{internal_link}) {
+ $link_parser->thaw($self->{internal_link});
+ return $link_parser->as_string;
+ } else {
+ return $self->{url};
+ }
+}
+
+sub is_internal {
+ return shift->{internal_link} ? 1 : 0;
+}
sub validate
{
@@ -70,10 +114,16 @@
sub validate_url
{
my $self = shift;
- $self->{url} and $self->{url} ne 'http://' or do {
+ $self->{url} and $self->{url} ne $EMPTY_URL or do {
new MKDoc::Ouch 'component/link/url_empty';
return 0;
};
+
+ $link_parser->parse($self->{url});
+ if ($link_parser->is_internal and not $link_parser->is_valid) {
+ new MKDoc::Ouch 'component/link/invalid_internal_url';
+ return 0;
+ }
return 1;
}
Index: en.html
===================================================================
RCS file: /var/spool/cvs/mkd/templates/editor/link/Attic/en.html,v
retrieving revision 1.1.2.9
retrieving revision 1.1.2.10
diff -Ltemplates/editor/link/en.html -Ltemplates/editor/link/en.html -u -r1.1.2.9 -r1.1.2.10
--- templates/editor/link/en.html
+++ templates/editor/link/en.html
@@ -50,6 +50,10 @@
>The 'URI' field is empty.</p>
<p
xml:lang="en" lang="en" dir="ltr" class="error"
+ petal:condition="error/is --component/link/invalid_internal_url"
+ >The 'URI' field points to a document which does not exist.</p>
+ <p
+ xml:lang="en" lang="en" dir="ltr" class="error"
petal:condition="error/is --component/link/description_empty"
>The 'Description' field is empty.</p>
</div>
@@ -110,7 +114,7 @@
value="http://example.com/"
size="35"
title="Enter the URI of the link here."
- petal:attributes="name name_url; id name_url; value self/url"
+ petal:attributes="name name_url; id name_url; value self/render_url"
/>
</p>
--- /dev/null
+++ MKDoc/Util/LinkParser.pm
@@ -0,0 +1,307 @@
+# ----------------------------------------------------------------------------
+# MKDoc::Util::LinkParser
+# ----------------------------------------------------------------------------
+# Author: Sam Tregar
+# Copyright: (c) MKDoc Holdings Ltd, 2005
+#
+# This file is part of MKDoc.
+#
+# MKDoc is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# MKDoc is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with MKDoc; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# ---------------------------------------------------------------------------
+package MKDoc::Util::LinkParser;
+use strict;
+use warnings;
+
+=head1 NAME
+
+MKDoc::Util::LinkParser - parses links to differentiate internal and external links
+
+=head1 SYNOPSIS
+
+ use MKDoc::Util::LinkParser;
+
+ # setup a new parser
+ $parser = MKDoc::Util::LinkParser->new();
+
+ # parse a link
+ $parser->parse('http://www.example.com/foo');
+
+ # deal with results
+ if ($parser->is_internal) {
+ if ($parser->is_valid) {
+ $document_id = $parser->document_id;
+ print "Found a link to document ID $document_id.\n";
+ } else {
+ die "Invalid internal link found!";
+ }
+ } else {
+ print "External link found.\n";
+ }
+
+=head1 DESCRIPTION
+
+This module parses links and determines whether they are internal
+(pointing to the site served by MKDoc) or external. For internal
+links it determines if they are valid (pointing to an active document).
+
+=head1 CLASS METHODS
+
+=head2 new
+
+Creates a new parser. Accepts no arguments.
+
+=head2 parse
+
+Parse a link. Takes a single argument, the link to parse.
+
+=head2 thaw
+
+Loads data produced by store() just as though the link had been parsed
+with parse().
+
+=head1 INSTANCE METHODS
+
+=head2 is_internal
+
+Returns true if the last link parsed was internal.
+
+=head2 is_external
+
+Returns true if the last link parsed was external.
+
+=head2 is_valid
+
+Returns true if the last link was a valid internal link. When this is
+true document_id will be available.
+
+=head2 document_id
+
+Returns the document_id for the last link parsed.
+
+=head2 uri
+
+Returns the URI object for the last link parsed. See L<URI> for more
+details.
+
+=head2 as_string
+
+Produce a string containing a canonical version of the link.
+
+=head2 operation
+
+Returns the extra MKDoc-specific operation portion of the URI for
+internal links. This is the part of the URI that points to an
+operation rather than a documemt. For example, given this link:
+
+ http://example.com/test-doc/.admin.content
+
+operation() would return '.admin.content'.
+
+This part of the URI will not be present in the object returned from
+uri() and it will not affect is_valid() and document_id() matches.
+
+=head2 freeze
+
+Returns a hash-ref representiting all the relevent details of the
+link. This is the data which may be passed later to thaw().
+
+This method may only be called on valid internal links.
+
+=head1 AUTHOR
+
+Sam Tregar <sam at tregar.com>
+
+=cut
+
+use URI;
+use Carp qw(croak);
+use lib::sql::DBH;
+
+sub new {
+ my $self = bless {}, shift;
+
+ # make sure the environment is properly setup
+ for my $var qw(PUBLIC_DOMAIN USER_DOMAIN) {
+ croak("$var not set - needed by " . __PACKAGE__) unless $ENV{$var};
+ }
+
+ # setup default base URI
+ $self->{base} = URI->new($ENV{PUBLIC_DOMAIN})->canonical;
+
+ # setup list of internals base URLs
+ $self->{internal_bases} = [ $self->{base},
+ URI->new($ENV{USER_DOMAIN})->canonical ];
+
+ # add in foo.com for www.foo.com
+ my $pub = $ENV{PUBLIC_DOMAIN};
+ $pub =~ s!^(\w+://)\w+\.!$1!;
+ push @{$self->{internal_bases}}, URI->new($pub)->canonical;
+
+ # produce a list of canonical internal prefixes minus scheme for
+ # fast internal matching
+ my @prefixes = map { $_->opaque } @{$self->{internal_bases}};
+
+ # compile a regex from the internal prefixes
+ my $re = join('|', map { qr/\Q$_\E/ } @prefixes);
+ $self->{internal_re} = qr/^$re/;
+
+ return $self;
+}
+
+sub parse {
+ my ($self, $link) = @_;
+ $self->_clear();
+
+ my $uri;
+ if ($link =~ m!^[^:/?#]+:!) {
+ # if it's a full URI parse it as-is
+ $uri = URI->new($link)->canonical;
+ } else {
+ # otherwise base it off the default base
+ $uri = URI->new_abs($link, $self->{base})->canonical;
+ }
+ $self->{uri} = $uri;
+
+ # pull off the operation part of internal paths
+ my $path = $uri->path;
+ if ($self->is_internal and $path =~ s!/(\.[a-z\.]+)$!!) {
+ $self->{operation} = $1;
+ $path = '/' unless length $path;
+ $self->{uri}->path($path);
+ }
+}
+
+# clear link state
+sub _clear {
+ my $self = shift;
+ delete $self->{$_} for qw(is_internal document_id operation uri);
+}
+
+sub as_string {
+ my $self = shift;
+ my $uri = $self->{uri}->clone;
+
+ # the URI object has everything already unless this is an internal
+ # link or has an operation
+ return $uri->as_string
+ unless $self->{document_id} or $self->{operation};
+
+ # setup path based on document_id if we've got one
+ if ($self->{document_id}) {
+ my $dbh = lib::sql::DBH->get();
+ my ($path) = $dbh->selectrow_array('SELECT Full_Path FROM Document WHERE ID = ?', undef, $self->{document_id});
+ $uri->path($path);
+ }
+
+ # add in operation if there is one
+ if ($self->{operation}) {
+ my $path = $uri->path();
+ $path .= '/' unless $path =~ m!/$!;
+ $uri->path($path . $self->{operation});
+ }
+
+ return $uri->as_string;
+}
+
+sub operation { shift->{operation} }
+
+sub is_internal {
+ my $self = shift;
+ return $self->{is_internal} if defined $self->{is_internal};
+ my $uri = $self->uri;
+
+ # only http and https URLs can be internals
+ my $protocol = $uri->scheme;
+ return $self->{is_internal} = 0
+ unless $protocol eq 'http' or $protocol eq 'https';
+
+ my $prefix = $self->{uri}->opaque;
+ if ($prefix =~ $self->{internal_re}) {
+ return $self->{is_internal} = 1;
+ } else {
+ return $self->{is_internal} = 0;
+ }
+}
+
+sub is_external {
+ my $self = shift;
+ return not $self->is_internal;
+}
+
+sub is_valid {
+ my $self = shift;
+ return 0 if defined $self->{document_id} and not $self->{document_id};
+ return 1 if $self->document_id;
+}
+
+sub document_id {
+ my $self = shift;
+ return $self->{document_id} if $self->{document_id};
+
+ my $uri = $self->uri;
+ my $dbh = lib::sql::DBH->get();
+
+ # fix path to end with a /
+ my $path = $uri->path;
+ $path .= '/' unless $path =~ m!/$!;
+
+ my ($id) = $dbh->selectrow_array('SELECT ID
+ FROM Document
+ WHERE Full_Path = ?', undef, $path);
+ return $self->{document_id} = $id ? $id : 0;
+}
+
+sub uri { shift->{uri} }
+
+sub freeze {
+ my $self = shift;
+ croak("Called freeze on an external or invalid link!")
+ unless $self->{is_internal} and $self->{document_id};
+ my %return;
+
+ # store relevent details for an internal link
+ my $uri = $self->{uri};
+ $return{is_internal} = $self->{is_internal};
+ $return{operation} = $self->{operation};
+ $return{document_id} = $self->{document_id};
+
+ # save necessary URI parts
+ $return{scheme} = $uri->scheme;
+ $return{query} = $uri->query;
+ $return{fragment} = $uri->fragment;
+ $return{authority} = $uri->authority;
+
+ return \%return;
+}
+
+sub thaw {
+ my ($self, $data) = @_;
+ $self->_clear();
+
+ my $uri = URI->new();
+ $uri->scheme($data->{scheme});
+ $uri->query($data->{query});
+ $uri->fragment($data->{fragment});
+ $uri->authority($data->{authority});
+ $self->{uri} = $uri;
+
+ $self->{is_internal} = $data->{is_internal};
+ $self->{operation} = $data->{operation};
+ $self->{document_id} = $data->{document_id};
+}
+
+1;
+
--- /dev/null
+++ tools/update_links.pl
@@ -0,0 +1,104 @@
+# ------------------------------------------------------------------
+# update_links.pl
+# ------------------------------------------------------------------
+# Author : Sam Tregar
+# Copyright : (c) MKDoc Holdings Ltd, 2005
+#
+# This file is part of MKDoc.
+#
+# MKDoc is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# MKDoc is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with MKDoc; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# ------------------------------------------------------------------
+
+# This script updates links into the new format which differentiates
+# between internal and external links. Internal links are matched to
+# the documents they point to and stored by ID. The script will
+# output a line for each external, internal and invalid internal link
+# it finds.
+#
+# It would be a good idea to make a backup of your database before
+# running this script as it has not been extensively tested.
+
+#!/usr/bin/perl
+use strict;
+use warnings;
+use MKDoc;
+use MKDoc::Util::LinkParser;
+
+# make sure SITE_DIR is set since MKDoc->init needs it
+die "SITE_DIR isn't set. Please source mksetenv.sh from an installed MKDoc site ".
+ "and try again.\n"
+ unless $ENV{SITE_DIR};
+
+# initialize MKDoc, needed for database connection
+MKDoc->init;
+
+# setup global accessors
+my $dbh = lib::sql::DBH->get();
+my $doc_t = flo::Standard::table('Document');
+my $parser = MKDoc::Util::LinkParser->new();
+
+# step through documents one at a time
+my $document_ids = $dbh->selectcol_arrayref('SELECT ID FROM Document');
+foreach my $document_id (@$document_ids) {
+ my $doc = $doc_t->get($document_id);
+
+ # setup an editor so we can make changes if necessary
+ my $editor = flo::Editor->_new();
+ $editor->parse_xml($doc->{Body}, $document_id);
+
+ my $dirty = 0;
+
+ # look at each link
+ my @links = grep { $_->isa('flo::editor::Link') } $editor->access();
+ foreach my $link (@links) {
+ # ignore already processed internal links
+ next if $link->{internal_link};
+
+ # evaluate plain links, ignoring broken ones
+ my $url = $link->{url};
+ next unless $url;
+
+ $parser->parse($url);
+
+ # external links stay as-is
+ if (not $parser->is_internal) {
+ print "=== External Link ($document_id): $url\n";
+ next;
+ }
+
+ # flag invalid internal links
+ if (not $parser->is_valid) {
+ print "!!! Invalid Internal Link ($document_id): $url\n";
+ next;
+ }
+
+ # update valid internal links
+ $link->{internal_link} = $parser->freeze();
+ $link->{url} = $parser->as_string;
+ $dirty++;
+ print "+++ Valid Internal Link ($document_id): $url\n"
+ }
+
+ # if changes were made they need to get updated in the DB. (It's
+ # hard to believe there isn't a better way to do this, but I'm
+ # pretty sure there isn't.)
+ if ($dirty) {
+ print "*** Updating Document $document_id with $dirty new links.\n";
+ $doc->{Body} = $editor->generate_xml();
+ $doc->save();
+ }
+}
+
More information about the MKDoc-commit
mailing list