[MKDoc-commit] [MKDoc-XML] added xhtmlclean command-line tool
bruno at mkdoc.demon.co.uk
bruno at mkdoc.demon.co.uk
Thu Sep 8 17:31:49 BST 2005
Log Message:
-----------
[MKDoc-XML] added xhtmlclean command-line tool
Modified Files:
--------------
MKDoc-XML:
Changes
MANIFEST
Makefile.PL
Added Files:
-----------
MKDoc-XML/bin:
xhtmlclean
-------------- next part --------------
Index: MANIFEST
===================================================================
RCS file: /var/spool/cvs/MKDoc-XML/MANIFEST,v
retrieving revision 1.15
retrieving revision 1.16
diff -LMANIFEST -LMANIFEST -u -r1.15 -r1.16
--- MANIFEST
+++ MANIFEST
@@ -1,4 +1,5 @@
.cvsignore
+bin/xhtmlclean
Changes
lib/MKDoc/XML.pm
lib/MKDoc/XML/Decode.pm
Index: Changes
===================================================================
RCS file: /var/spool/cvs/MKDoc-XML/Changes,v
retrieving revision 1.44
retrieving revision 1.45
diff -LChanges -LChanges -u -r1.44 -r1.45
--- Changes
+++ Changes
@@ -2,6 +2,7 @@
0.76
- mkdoc16 and xmhtmlttansitional tag attribute stripping less enthusiastic
+ - xhtmlclean command-line tool added
0.75 Thu Mar 10 15:12:00 2005
- Added tests for tagger bugs when matching numbers and double escaping attribute contents
Index: Makefile.PL
===================================================================
RCS file: /var/spool/cvs/MKDoc-XML/Makefile.PL,v
retrieving revision 1.10
retrieving revision 1.11
diff -LMakefile.PL -LMakefile.PL -u -r1.10 -r1.11
--- Makefile.PL
+++ Makefile.PL
@@ -4,6 +4,7 @@
WriteMakefile(
'NAME' => 'MKDoc::XML',
'VERSION_FROM' => 'lib/MKDoc/XML.pm', # finds $VERSION
+ 'EXE_FILES' => [ 'bin/xhtmlclean' ],
'PREREQ_PM' => {
'Test::More' => '0.47',
'Scalar::Util' => '1.07',
--- /dev/null
+++ bin/xhtmlclean
@@ -0,0 +1,130 @@
+#!/usr/bin/perl
+use strict;
+use warnings;
+use lib 'lib';
+
+use Getopt::Declare;
+use MKDoc::XML::Stripper;
+use MKDoc::XML;
+our $VERSION = $MKDoc::XML::VERSION;
+
+my $usage = q(
+ [nocase] [strict]
+ [mutex: --mkdoc16 --strict --transitional --frameset]
+
+ XHTML Clean
+ Allowable elements/attributes
+
+ --mkdoc16 MKDoc 1.6 [required]
+ --strict XHTML 1.0 Strict [required]
+ --transitional XHTML 1.0 Transitional [required]
+ --frameset XHTML 1.0 Frameset [required]
+);
+
+my $arg = new Getopt::Declare($usage);
+
+die if not defined $arg;
+
+my $strip = new MKDoc::XML::Stripper;
+my $content = '';
+
+$strip->load_def('mkdoc16') if $arg->{'--mkdoc16'};
+$strip->load_def('xhtml10strict') if $arg->{'--strict'};
+$strip->load_def('xhtml10transitional') if $arg->{'--transitional'};
+$strip->load_def('xhtml10frameset') if $arg->{'--frameset'};
+
+while (<STDIN>)
+{
+ $content .= join('', $_);
+}
+
+print $strip->process_data($content);
+
+die "$0: No output.\n"
+ if ($content eq '');
+
+1;
+
+__END__
+
+=head1 NAME
+
+xhtmlclean - Utility to strip unwanted tags and attributes from (well formed) XHTML
+
+=head1 Synopsis
+
+ xhtmlclean --transitional < temp.html > index.html
+ tidy wysiwyg.html | xhtmlclean --strict > index.html
+ echo '<p class="ok" ms:foo="AE56%">' | xhtmlclean --strict
+ echo '<foo>foo</foo> <em>emphasis</em>' | xhtmlclean --strict
+
+=head1 DESCRIPTION
+
+'xhtmlclean' is a command-line utility program for taking well-formed XML text
+and filtering-out non-xhtml tags and attributes. It uses the L<MKDoc::XML>
+library to tokenise the XML and has different modes for XHTML Strict, XHTML
+Transitional etc..
+
+Warning, input must be well-formed XML otherwise results may be strange.
+
+L<http://search.cpan.org/dist/MKDoc-XML>
+
+=head1 Calling syntax
+
+ xhtmlclean [--mkdoc16|--strict|--transitional|--frameset]
+
+=head2 Options
+
+=over
+
+=item -h
+
+The I<-h> option will display a short usage summary.
+
+=item -v
+
+This option displays the version number of L<MKDoc::XML>
+
+=item --strict
+
+Use this option to allow only tags defined by the XHTML 1.0 Strict
+specification:
+
+L<http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd>
+
+=item --transitional
+
+Use this option to allow only tags defined by the looser XHTML 1.0 Transitional
+specification:
+
+L<http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd>
+
+=item --frameset
+
+Use this option to allow only tags and attributes defined by the XHTML 1.0
+Frameset specification (body tags are disallowed, use frameset instead):
+
+L<http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd>
+
+=item --mkdoc16
+
+Use this option to allow only tags permitted with the MKDoc CMS, useful for
+debugging only.
+
+=back
+
+=head1 License
+
+This program is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 See Also
+
+L<perl>, L<MKDoc::XML>
+
+=head1 Author
+
+Paul Arzul
+
+=cut
+
More information about the MKDoc-commit
mailing list