From: Grant M. <gr...@us...> - 2004-04-05 09:25:28
|
Update of /cvsroot/perl-xml/xml-simple/lib/XML In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv6360/lib/XML Modified Files: Simple.pm Log Message: - added NumericEscape option - added ValueAttr option (patch from Anton Berezin) - suppress 'wide character in print' warning (reported by Dawei Lin) Index: Simple.pm =================================================================== RCS file: /cvsroot/perl-xml/xml-simple/lib/XML/Simple.pm,v retrieving revision 1.19 retrieving revision 1.20 diff -u -d -r1.19 -r1.20 --- Simple.pm 31 Mar 2004 10:07:46 -0000 1.19 +++ Simple.pm 5 Apr 2004 09:12:47 -0000 1.20 @@ -70,7 +70,8 @@ my @KnownOptOut = qw(keyattr keeproot contentkey noattr rootname xmldecl outputfile noescape suppressempty - grouptags nsexpand handler noindent attrindent nosort); + grouptags nsexpand handler noindent attrindent nosort + valueattr numericescape); my @DefKeyAttr = qw(name key id); my $DefRootName = qq(opt); @@ -330,7 +331,7 @@ if($filename) { # $tree = $xp->parsefile($filename); # Changed due to prob w/mod_perl local(*XML_FILE); - open(XML_FILE, "<$filename") || croak qq($filename - $!); + open(XML_FILE, '<', $filename) || croak qq($filename - $!); $tree = $xp->parse(*XML_FILE); close(XML_FILE); } @@ -540,8 +541,9 @@ } else { local(*OUT); - open(OUT, ">$self->{opt}->{outputfile}") || + open(OUT, '>', "$self->{opt}->{outputfile}") || croak "open($self->{opt}->{outputfile}): $!"; + binmode(OUT, ':utf8') if($] >= 5.008); print OUT $xml || croak "print: $!"; close(OUT); } @@ -1251,7 +1253,6 @@ } - # Convert to XML if(ref($ref)) { @@ -1283,7 +1284,7 @@ $ref = $self->hash_to_array($name, $ref); } - + my @result = (); my($key, $value); @@ -1384,6 +1385,14 @@ $value = {}; } } + + if(!ref($value) + and $self->{opt}->{valueattr} + and $self->{opt}->{valueattr}->{$key} + ) { + $value = { $self->{opt}->{valueattr}->{$key} => $value }; + } + if(ref($value) or $self->{opt}->{noattr}) { push @nested, $self->value_to_xml($value, $key, "$indent "); @@ -1518,7 +1527,24 @@ $data =~ s/>/>/sg; $data =~ s/"/"/sg; - return($data); + my $level = $self->{opt}->{numericescape} or return $data; + + return $self->numeric_escape($data, $level); +} + +sub numeric_escape { + my($self, $data, $level) = @_; + + use utf8; # required for 5.6 + + if($self->{opt}->{numericescape} eq '2') { + $data =~ s/([^\x00-\x7F])/'&#' . ord($1) . ';'/gse; + } + else { + $data =~ s/([^\x00-\xFF])/'&#' . ord($1) . ';'/gse; + } + + return $data; } @@ -2362,12 +2388,29 @@ I<Note: You must have the XML::NamespaceSupport module installed if you want C<XMLout()> to translate URIs back to prefixes>. +=head2 NumericEscape => 0 | 1 | 2 I<# out - handy> + +Use this option to have 'high' (non-ASCII) characters in your Perl data +structure converted to numeric entities (eg: €) in the XML output. Three +levels are possible: + +0 - default: no numeric escaping (OK if you're writing out UTF8) + +1 - only characters above 0xFF are escaped (ie: characters in the 0x80-FF range are not escaped), possibly useful with ISO8859-1 output + +2 - all characters above 0x7F are escaped (good for plain ASCII output) + =head2 OutputFile => <file specifier> I<# out - handy> The default behaviour of C<XMLout()> is to return the XML as a string. If you wish to write the XML to a file, simply supply the filename using the -'OutputFile' option. Alternatively, you can supply an IO handle object instead -of a filename. +'OutputFile' option. + +This option also accepts an IO handle object - especially useful in Perl 5.8.0 +and later for writing out in an encoding other than UTF-8, eg: + + open my $fh, '>:encoding(iso-8859-1)', $path or die "open($path): $!"; + XMLout($ref, OutputFile => $fh); =head2 ParserOpts => [ XML::Parser Options ] I<# in - don't use this> @@ -2417,17 +2460,6 @@ empty elements (rather than empty attributes), it also suppresses the generation of warnings about undefined values. -=head2 Variables => { name => value } I<# in - handy> - -This option allows variables in the XML to be expanded when the file is read. -(there is no facility for putting the variable names back if you regenerate -XML using C<XMLout>). - -A 'variable' is any text of the form C<${name}> which occurs in an attribute -value or in the text content of an element. If 'name' matches a key in the -supplied hashref, C<${name}> will be replaced with the corresponding value from -the hashref. If no matching key is found, the variable will not be replaced. - =head2 ValueAttr => [ names ] I<# in - handy> Use this option to deal elements which always have a single attribute and no @@ -2462,6 +2494,20 @@ the element and the attribute names. This is not only safer, it also allows the original XML to be reconstructed by C<XMLout()>. +Note: You probably don't want to use this option and the NoAttr option at the +same time. + +=head2 Variables => { name => value } I<# in - handy> + +This option allows variables in the XML to be expanded when the file is read. +(there is no facility for putting the variable names back if you regenerate +XML using C<XMLout>). + +A 'variable' is any text of the form C<${name}> which occurs in an attribute +value or in the text content of an element. If 'name' matches a key in the +supplied hashref, C<${name}> will be replaced with the corresponding value from +the hashref. If no matching key is found, the variable will not be replaced. + =head2 VarAttr => 'attr_name' I<# in - handy> In addition to the variables defined using C<Variables>, this option allows |