[MKDoc-commit] Patch from Sam - Redirect fix

chris at mkdoc.demon.co.uk chris at mkdoc.demon.co.uk
Mon Aug 8 10:44:14 BST 2005


Log Message:
-----------
Patch from Sam - Redirect fix

Tags:
----
mkdoc-1-6

Modified Files:
--------------
    mkd/MKDoc/Util:
        LinkParser.pm

-------------- next part --------------
Index: LinkParser.pm
===================================================================
RCS file: /var/spool/cvs/mkd/MKDoc/Util/Attic/LinkParser.pm,v
retrieving revision 1.1.2.5
retrieving revision 1.1.2.6
diff -LMKDoc/Util/LinkParser.pm -LMKDoc/Util/LinkParser.pm -u -r1.1.2.5 -r1.1.2.6
--- MKDoc/Util/LinkParser.pm
+++ MKDoc/Util/LinkParser.pm
@@ -273,33 +273,23 @@
     my $path = $uri->path;
     $path .= '/' unless $path =~ m!/$!;
 
-    # do a look for this path, returning immediately if it was found
-    my ($id, undef) = $self->lookup_path($path, $dbh);
-    return $self->{document_id} = $id if $id;
-
-    # walk down path components replacing them with redirects as
-    # needed.  This is necessary to deal with interstitial redirects.
-    my @old = grep { defined and length } split('/', $path);
-    my @new;
-    while (@old) {
-        # step down one level
-        push @new, shift @old;
+    # limit redirection to 100 links to avoid infinite loops
+    my $redirect_limit = 100;
+
+    # find an ID, following redirects as needed
+    my $id;
+    while (1) {
+        # does this path exist in the DB?
+        last if $id = $self->lookup_path($path, $dbh);
+
+        # look for a redirect and loop if we found one
+        last unless $path = $self->find_redirect($path, $dbh);
         
-        # lookup this path for redirects
-        my $this_path = '/' . join('/', @new) . '/';
-        ($id, $path) = $self->lookup_path($this_path, $dbh);
-
-        # if we found something and it's not a direct link then base
-        # the new path off that
-        if ($path and $path ne $this_path) {
-            # restart the search
-            @new = ();
-            @old = ((grep { defined and length } split('/', $path)), @old);
-        }
-    }
+        # don't keep looping forever
+        last unless --$redirect_limit;
+    } 
 
-    # return what we found, undef if no match was found in the search
-    # or the ID if one was
+    # return what we found, 0 or a real ID
     return $self->{document_id} = $id;
 }
 
@@ -311,14 +301,34 @@
     my ($id) = $dbh->selectrow_array('SELECT ID
                                       FROM Document 
                                       WHERE Full_Path = ?', undef, $path);
-    return $self->{document_id} = $id if $id;
+    return $id || 0;
+}
 
-    # it wasn't in Document, but it might be in Redirect
-    ($path) = $dbh->selectrow_array('SELECT New_Path
-                                     FROM Redirect
-                                     WHERE Old_Path = ?', undef, $path);
 
-    return ($id, $path);
+sub find_redirect {
+    my ($self, $path, $dbh) = @_;
+
+    # pull all paths that might pertain to this path
+    my @parts = grep { defined and length } split('/', $path);
+    my @paths;
+    foreach my $x (0 .. $#parts) {
+        push(@paths, '/' . join('/', @parts[0 .. $x]) . '/');
+    }
+
+    # find the one which matches the longest part of this path, which
+    # will be the most specific
+    my ($from, $to) = $dbh->selectrow_array(
+        'SELECT Old_Path, New_Path FROM Redirect WHERE Old_Path IN (' . 
+        join(',', ('?') x @paths) . ') '.
+        'ORDER BY LENGTH(Old_Path) DESC LIMIT 1', undef, @paths);
+    if ($from) {
+        # apply the redirect and return it
+        $path =~ s!^\Q$from\E!$to!;
+        return $path;
+    }
+    
+    # no redirect found
+    return;
 }
 
 sub uri { shift->{uri} }


More information about the MKDoc-commit mailing list