[MKDoc-commit] Patch from Sam - Redirect fix
chris at mkdoc.demon.co.uk
chris at mkdoc.demon.co.uk
Mon Aug 8 10:44:14 BST 2005
Log Message:
-----------
Patch from Sam - Redirect fix
Tags:
----
mkdoc-1-6
Modified Files:
--------------
mkd/MKDoc/Util:
LinkParser.pm
-------------- next part --------------
Index: LinkParser.pm
===================================================================
RCS file: /var/spool/cvs/mkd/MKDoc/Util/Attic/LinkParser.pm,v
retrieving revision 1.1.2.5
retrieving revision 1.1.2.6
diff -LMKDoc/Util/LinkParser.pm -LMKDoc/Util/LinkParser.pm -u -r1.1.2.5 -r1.1.2.6
--- MKDoc/Util/LinkParser.pm
+++ MKDoc/Util/LinkParser.pm
@@ -273,33 +273,23 @@
my $path = $uri->path;
$path .= '/' unless $path =~ m!/$!;
- # do a look for this path, returning immediately if it was found
- my ($id, undef) = $self->lookup_path($path, $dbh);
- return $self->{document_id} = $id if $id;
-
- # walk down path components replacing them with redirects as
- # needed. This is necessary to deal with interstitial redirects.
- my @old = grep { defined and length } split('/', $path);
- my @new;
- while (@old) {
- # step down one level
- push @new, shift @old;
+ # limit redirection to 100 links to avoid infinite loops
+ my $redirect_limit = 100;
+
+ # find an ID, following redirects as needed
+ my $id;
+ while (1) {
+ # does this path exist in the DB?
+ last if $id = $self->lookup_path($path, $dbh);
+
+ # look for a redirect and loop if we found one
+ last unless $path = $self->find_redirect($path, $dbh);
- # lookup this path for redirects
- my $this_path = '/' . join('/', @new) . '/';
- ($id, $path) = $self->lookup_path($this_path, $dbh);
-
- # if we found something and it's not a direct link then base
- # the new path off that
- if ($path and $path ne $this_path) {
- # restart the search
- @new = ();
- @old = ((grep { defined and length } split('/', $path)), @old);
- }
- }
+ # don't keep looping forever
+ last unless --$redirect_limit;
+ }
- # return what we found, undef if no match was found in the search
- # or the ID if one was
+ # return what we found, 0 or a real ID
return $self->{document_id} = $id;
}
@@ -311,14 +301,34 @@
my ($id) = $dbh->selectrow_array('SELECT ID
FROM Document
WHERE Full_Path = ?', undef, $path);
- return $self->{document_id} = $id if $id;
+ return $id || 0;
+}
- # it wasn't in Document, but it might be in Redirect
- ($path) = $dbh->selectrow_array('SELECT New_Path
- FROM Redirect
- WHERE Old_Path = ?', undef, $path);
- return ($id, $path);
+sub find_redirect {
+ my ($self, $path, $dbh) = @_;
+
+ # pull all paths that might pertain to this path
+ my @parts = grep { defined and length } split('/', $path);
+ my @paths;
+ foreach my $x (0 .. $#parts) {
+ push(@paths, '/' . join('/', @parts[0 .. $x]) . '/');
+ }
+
+ # find the one which matches the longest part of this path, which
+ # will be the most specific
+ my ($from, $to) = $dbh->selectrow_array(
+ 'SELECT Old_Path, New_Path FROM Redirect WHERE Old_Path IN (' .
+ join(',', ('?') x @paths) . ') '.
+ 'ORDER BY LENGTH(Old_Path) DESC LIMIT 1', undef, @paths);
+ if ($from) {
+ # apply the redirect and return it
+ $path =~ s!^\Q$from\E!$to!;
+ return $path;
+ }
+
+ # no redirect found
+ return;
}
sub uri { shift->{uri} }
More information about the MKDoc-commit
mailing list