加入收藏 | 设为首页 | 会员中心 | 我要投稿 李大同 (https://www.lidatong.com.cn/)- 科技、建站、经验、云计算、5G、大数据,站长网!
当前位置: 首页 > 大数据 > 正文

perl 将XML格式转换为html、txt、pod格式

发布时间:2020-12-15 21:01:12 所属栏目:大数据 来源:网络整理
导读:xmlfile: [root@dou xml]# cat example ?xml version="1.0" encoding="UTF-8"? README ? NAMETest README File/NAME ? SYNOPSIS ???? This is a summary of the file. ?????? It should appear in PRE tags ? /SYNOPSIS ? DESCRIPTION ???? TEXTThis is the


[root@dou xml]# cat example
<?xml version="1.0" encoding="UTF-8"?>

???? This is a summary of the file.
?????? It should appear in PRE tags

???? <TEXT>This is the full description of the file</TEXT>
?????? <HEAD>Subsection Title</HEAD>
?????? <TEXT>Subsection text</TEXT>
?????? <HEAD>Another Subsection Title</HEAD>
?????? <TEXT>More Subsection text</TEXT>
?????? <LIST TYPE='bullet'>
???????? <ITEM>List item 1</ITEM>
???????? <ITEM>List item 2</ITEM>
?????? </LIST>

???? <ANAME>Dave Cross</ANAME>
???? <EMAIL>dave@mag-sol.com</EMAIL>

???? <LIST TYPE='bullet'>
?????? <ITEM>Something</ITEM>
?????? <ITEM>Something else</ITEM>
???? </LIST>
[root@dou xml]#


#!/usr/bin/perl -w

use strict;

use XML::Parser;
use Getopt::Std;
use Text::Wrap;

my %formats = (h => {name => 'html'},
p => {name => 'pod'},
t => {name => 'text'});

my %opts;
(getopts('f:',%opts) && @ARGV) || die "usage: format_xml.pl -f h|p|t xml_filen";

die "Invalid format: $opts{f}n" unless exists $formats{$opts{f}};

warn "Formatting file as $formats{$opts{f}}->{name}n";

my $p = XML::Parser->new(Style => 'Tree');
my $tree = $p->parsefile(shift);

my $level = 0;
my $ind = '';
my $head = 1;




sub process_node {
?? my ($type,$content) = @_;

?? $ind = ' ' x $level;

?? if ($type) {

???? local $_ = $type;

???? my $attrs = shift @$content;

???? /^NAME$/ && name($content);
???? /^SYNOPSIS$/ && synopsis($content);
???? /^DESCRIPTION$/ && description();
???? /^TEXT$/ && text($content);
???? /^CODE$/ && code($content);
???? /^HEAD$/ && head($content);
???? /^LIST$/ && do {list($attrs,$content); @$content = ()};
???? /^AUTHOR$/ && author();
???? /^ANAME$/ && aname($content);
???? /^EMAIL$/ && email($content);
???? /^SEE_ALSO$/ && see_also($content);

???? while (my @node = splice @$content,2) {
?????? ++$level;
?????? ++$head if $type eq 'SUBSECTION';
?????? process_node(@node);
?????? --$head if $type eq 'SUBSECTION';
?????? --$level;
???? }
?? }

sub top {
?? $tree = shift;

?? if ($opts{f} eq 'h') {
???? print "<html>n";
?????????? print "<head>n";
???? print "<title>$tree->[1]->[4]->[2]</title>n";
???? print "</head>n<body>n";
?? } elsif ($opts{f} eq 'p') {
???? print "=podnn";
?? } elsif ($opts{f} eq 't') {
???? print "n",$tree->[1]->[4]->[2],"n";
???? print '-' x length($tree->[1]->[4]->[2]),"nn";
?? }

sub bot {
?? if ($opts{f} eq 'h') {
???? print "</body>n</html>n";
?? } elsif ($opts{f} eq 'p') {
???? print "=cutnn";
?? } elsif ($opts{f} eq 't') {
???? # do nothing
?? }

sub name {
?? my $content = shift;

?? if ($opts{f} eq 'h') {
???? print "<h1>NAME</h1>n";
???? print "<p>$content->[1]</p>n"
?? } elsif ($opts{f} eq 'p') {
???? print "=head1 NAMEnn";
???? print "$content->[1]nn";
?? } elsif ($opts{f} eq 't') {
???? print "NAMEnn";
???? print $ind,"$content->[1]nn";
?? }

sub synopsis {
?? my $content = shift;

?? if ($opts{f} eq 'h') {
???? print "<h1>SYNOPSIS</h1>n";
???? print "<pre>$content->[1]</pre>n"
?? } elsif ($opts{f} eq 'p') {
???? print "=head1 SYNOPSISnn";
???? print "$content->[1]n";
?? } elsif ($opts{f} eq 't') {
???? print "SYNOPSISn";
???? print "$content->[1]n";
?? }
sub description {

?? if ($opts{f} eq 'h') {
???? print "<h1>DESCRIPTION</h1>n";
?? } elsif ($opts{f} eq 'p') {
???? print "=head1 DESCRIPTIONnn";
?? } elsif ($opts{f} eq 't') {
???? print "DESCRIPTIONnn";
?? }

sub text {
?? my $content = shift;

?? if ($opts{f} eq 'h') {
???? print "<p>$content->[1]</p>n"
?? } elsif ($opts{f} eq 'p') {
???? print wrap('','',trim($content->[1])),"nn";
?? } elsif ($opts{f} eq 't') {
???? print wrap($ind,$ind,"nn";
?? }

sub code {
?? my $content = shift;

?? if ($opts{f} eq 'h') {
???? print "<pre>$content->[1]</pre>n"
?? } elsif ($opts{f} eq 'p') {
???? print "$content->[1]n";
?? } elsif ($opts{f} eq 't') {
???? print "$content->[1]n";
?? }

sub head {
?? my $content = shift;

?? if ($opts{f} eq 'h') {
???? print "<h$head>",trim($content->[1]),"</h$head>n"
?? } elsif ($opts{f} eq 'p') {
???? print "=head$head ","nn";
?? } elsif ($opts{f} eq 't') {
???? print trim($content->[1]),"nn";
?? }

sub list {
?? my ($attrs,$content) = @_;

?? my %list = (bullet => 'ul',numbered => 'ol');
?? my $type = $attrs->{TYPE};

??? if ($opts{f} eq 'h') {
????? print "<$list{$type}>n";
????? while (my @node = splice @$content,2) {
???????? if ($node[0] eq 'ITEM') {
?????????? print "<li>$node[1]->[2]</li>n";
???????? }
????? }
????? print "</$list{$type}>n";
??? } elsif ($opts{f} eq 'p') {
????? print "=over 4n";
????? while (my @node = splice @$content,2) {
???????? my $cnt = 1;
???????? if ($node[0] eq 'ITEM') {
?????????? print "=item *n$node[1]->[2]nn";
???????? }
????? }
????? print "=backnn";
??? } elsif ($opts{f} eq 't') {
????? while (my @node = splice @$content,2) {
???????? my $cnt = 1;
???????? if ($node[0] eq 'ITEM') {
?????????? print $ind,"* $node[1]->[2]n";
???????? }
????? }
????? print "n";
??? }

sub author {
??? if ($opts{f} eq 'h') {
????? print "<h1>AUTHOR</h1>n";
??? } elsif ($opts{f} eq 'p') {
????? print "=head1 AUTHORnn";
??? } elsif ($opts{f} eq 't') {
????? print "AUTHORnn";
??? }

sub aname {
??? my $content = shift;

??? if ($opts{f} eq 'h') {
????? print "<p>$content->[1]n"
??? } elsif ($opts{f} eq 'p') {
????? print trim($content->[1]),' ';
??? } elsif ($opts{f} eq 't') {
????? print $ind,' ';
??? }

sub email {
?? my $content = shift;

?? if ($opts{f} eq 'h') {
????? print '&lt;',"&gt;</p>n"
?? } elsif ($opts{f} eq 'p') {
????? print '<',">nn";
?? } elsif ($opts{f} eq 't') {
????? print '<',">nn";
?? }

sub see_also {

?? if ($opts{f} eq 'h') {
????? print "<h1>SEE ALSO</h1>n";
?? } elsif ($opts{f} eq 'p') {
????? print "=head1 SEE ALSOnn";
?? } elsif ($opts{f} eq 't') {
????? print "SEE ALSOnn";
?? }

sub trim {
?? local $_ = shift;

?? s/n/ /g;
?? s/^s+//;
?? s/s+$//;

?? $_;



[root@dou xml]# perl ex1.pl -f h example
Formatting file as html
<title>Test README File</title>
<p>Test README File</p>
???? This is a summary of the file.
?????? It should appear in PRE tags
? </pre>
<p>This is the full description of the file</p>
<h2>Subsection Title</h2>
<p>Subsection text</p>
<h2>Another Subsection Title</h2>
<p>More Subsection text</p>
<li>List item 1</li>
<li>List item 2</li>
<p>Dave Cross
<h1>SEE ALSO</h1>
<li>Something else</li>
[root@dou xml]#
[root@dou xml]# perl ex1.pl -f t example
Formatting file as text

Test README File


?Test README File


???? This is a summary of the file.
?????? It should appear in PRE tags


? This is the full description of the file

Subsection Title

?? Subsection text

Another Subsection Title

?? More Subsection text

?? * List item 1
?? * List item 2


? Dave Cross <dave@mag-sol.com>


? * Something
? * Something else

[root@dou xml]# perl ex1.pl -f p example
Formatting file as pod

=head1 NAME

Test README File


???? This is a summary of the file.
?????? It should appear in PRE tags


This is the full description of the file

=head2 Subsection Title

Subsection text

=head2 Another Subsection Title

More Subsection text

=over 4
=item *
List item 1

=item *
List item 2


=head1 AUTHOR

Dave Cross <dave@mag-sol.com>

=head1 SEE ALSO

=over 4
=item *

=item *
Something else



[root@dou xml]# ?


