[MKDoc-commit] [MKDoc-XML] added xhtmlclean command-line tool

bruno at mkdoc.demon.co.uk bruno at mkdoc.demon.co.uk
Thu Sep 8 17:31:49 BST 2005


Log Message:
-----------
[MKDoc-XML] added xhtmlclean command-line tool

Modified Files:
--------------
    MKDoc-XML:
        Changes
        MANIFEST
        Makefile.PL

Added Files:
-----------
    MKDoc-XML/bin:
        xhtmlclean

-------------- next part --------------
Index: MANIFEST
===================================================================
RCS file: /var/spool/cvs/MKDoc-XML/MANIFEST,v
retrieving revision 1.15
retrieving revision 1.16
diff -LMANIFEST -LMANIFEST -u -r1.15 -r1.16
--- MANIFEST
+++ MANIFEST
@@ -1,4 +1,5 @@
 .cvsignore
+bin/xhtmlclean
 Changes
 lib/MKDoc/XML.pm
 lib/MKDoc/XML/Decode.pm
Index: Changes
===================================================================
RCS file: /var/spool/cvs/MKDoc-XML/Changes,v
retrieving revision 1.44
retrieving revision 1.45
diff -LChanges -LChanges -u -r1.44 -r1.45
--- Changes
+++ Changes
@@ -2,6 +2,7 @@
 
 0.76
     - mkdoc16 and xmhtmlttansitional tag attribute stripping less enthusiastic
+    - xhtmlclean command-line tool added
 
 0.75 Thu Mar 10 15:12:00 2005
     - Added tests for tagger bugs when matching numbers and double escaping attribute contents
Index: Makefile.PL
===================================================================
RCS file: /var/spool/cvs/MKDoc-XML/Makefile.PL,v
retrieving revision 1.10
retrieving revision 1.11
diff -LMakefile.PL -LMakefile.PL -u -r1.10 -r1.11
--- Makefile.PL
+++ Makefile.PL
@@ -4,6 +4,7 @@
 WriteMakefile(
     'NAME'		=> 'MKDoc::XML',
     'VERSION_FROM'      => 'lib/MKDoc/XML.pm', # finds $VERSION
+    'EXE_FILES'         => [ 'bin/xhtmlclean' ],
     'PREREQ_PM'		=> {
         'Test::More'    => '0.47',
         'Scalar::Util'  => '1.07',
--- /dev/null
+++ bin/xhtmlclean
@@ -0,0 +1,130 @@
+#!/usr/bin/perl
+use strict;
+use warnings;
+use lib 'lib';
+
+use Getopt::Declare;
+use MKDoc::XML::Stripper;
+use MKDoc::XML;
+our $VERSION = $MKDoc::XML::VERSION;
+
+my $usage = q(
+  [nocase] [strict]
+  [mutex: --mkdoc16 --strict --transitional --frameset]
+
+  XHTML Clean
+  Allowable elements/attributes
+
+  --mkdoc16		MKDoc 1.6 [required]
+  --strict		XHTML 1.0 Strict [required]
+  --transitional	XHTML 1.0 Transitional [required]
+  --frameset		XHTML 1.0 Frameset [required]
+);
+
+my $arg = new Getopt::Declare($usage);
+
+die if not defined $arg;
+
+my $strip = new MKDoc::XML::Stripper;
+my $content = '';
+
+$strip->load_def('mkdoc16') if $arg->{'--mkdoc16'};
+$strip->load_def('xhtml10strict') if $arg->{'--strict'};
+$strip->load_def('xhtml10transitional') if $arg->{'--transitional'};
+$strip->load_def('xhtml10frameset') if $arg->{'--frameset'};
+
+while (<STDIN>)
+{
+  $content .= join('', $_);
+}
+
+print $strip->process_data($content);
+
+die "$0: No output.\n"
+  if ($content eq '');
+
+1;
+
+__END__
+
+=head1 NAME
+
+xhtmlclean - Utility to strip unwanted tags and attributes from (well formed) XHTML
+
+=head1 Synopsis
+
+  xhtmlclean --transitional < temp.html > index.html
+  tidy wysiwyg.html | xhtmlclean --strict > index.html
+  echo '<p class="ok" ms:foo="AE56%">' | xhtmlclean --strict
+  echo '<foo>foo</foo> <em>emphasis</em>' | xhtmlclean --strict
+
+=head1 DESCRIPTION
+
+'xhtmlclean' is a command-line utility program for taking well-formed XML text
+and filtering-out non-xhtml tags and attributes.  It uses the L<MKDoc::XML>
+library to tokenise the XML and has different modes for XHTML Strict, XHTML
+Transitional etc..
+
+Warning, input must be well-formed XML otherwise results may be strange.
+
+L<http://search.cpan.org/dist/MKDoc-XML>
+
+=head1 Calling syntax
+
+  xhtmlclean [--mkdoc16|--strict|--transitional|--frameset]
+
+=head2 Options
+
+=over
+
+=item -h
+
+The I<-h> option will display a short usage summary.
+
+=item -v
+
+This option displays the version number of L<MKDoc::XML>
+
+=item --strict
+
+Use this option to allow only tags defined by the XHTML 1.0 Strict
+specification:
+
+L<http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd>
+
+=item --transitional
+
+Use this option to allow only tags defined by the looser XHTML 1.0 Transitional
+specification:
+
+L<http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd>
+
+=item --frameset
+
+Use this option to allow only tags and attributes defined by the XHTML 1.0
+Frameset specification (body tags are disallowed, use frameset instead):
+
+L<http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd>
+
+=item --mkdoc16
+
+Use this option to allow only tags permitted with the MKDoc CMS, useful for
+debugging only.
+
+=back
+
+=head1 License
+
+This program is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 See Also
+
+L<perl>, L<MKDoc::XML>
+
+=head1 Author
+
+Paul Arzul
+
+=cut
+


More information about the MKDoc-commit mailing list