From: Peter M. <pet...@ma...> - 2002-01-06 13:19:26
|
Hi, i have tried to use canonical_dn with UTF 8 and I think it treats strings with UTF8 encoded values wrong. Characters with codes > 127 have UTF8 encodings that consist of 2 or more bytes that have all codes > 127. Since these characters are legal in LDAPv3 DNs they should not get escaped. So line 310 of Net/LDAP/Util.pm should read $val =3D~ s/([\x00-\x1f])/sprintf("\\%02x",ord($1))/eg; instead of the current version: $val =3D~ s/([\x00-\x1f\x7f-\xff])/sprintf("\\%02x",ord($1))/eg; When changing canonical_dn() anyway, maybe changing the=20 implementation into three functions would be helpful. It would give users of Net::LDAP a standardized way of dealing with=20 DNs and parts of it (very helpful when moving entries, ..) without having to reimplement the wheel themselves. Here is my idea: ## split a DN string into its parts; code stolen from canonical_dn() ## # Synopsis: @rdns =3D splitDN($dn, %optionHash) # allowed options: # * lowercase: convert attribute names to lower case # * uppercase: convert attribute names to upper case # * sortRDN: sort RDN values # * splitRDN: split multi part RDNs into their parts sub splitDN($%) { my $dn =3D shift; my %opt =3D @_; my @dn; my @rdn; $dn =3D $dn->dn if ref($dn); while ($dn =3D~ /\G(?: \s* ([a-zA-Z][-a-zA-Z0-9]*|(?:[Oo][Ii][Dd]\.)?\d+(?:\.\d+)*) \s* =3D \s* ( (?:[^\\",=3D+<>\#;]*[^\\",=3D+<>\#;\s]|\\(?:[\\=20 ",=3D+<>#;]|[0-9a-fA-F]{2}))* | \#(?:[0-9a-fA-F]{2})+ | "(?:[^\\"]+|\\(?:[\\",=3D+<>#;]|[0-9a-fA-F]{2}))*" ) \s* (?:([;,+])\s*(?=3D\S)|$) )\s*/gcx) { my ($type,$val,$sep) =3D ($1,$2,$3); $type =3D~ s/^oid\.(\d+(\.\d+)*)$/$1/i; $type =3D lc($type) if ($opt{lowercase}); $type =3D uc($type) if ($opt{uppercase}); if ($val !~ /^#/) { $val =3D~ s/^"(.*)"$/$1/; $val =3D~ s/\\([\\ ",=3D+<>#;]|[0-9a-fA-F]{2}) /length($1)=3D=3D1 ? $1 : chr(hex($1)) /xeg; $val =3D~ s/([\\",=3D+<>#;])/\\$1/g; $val =3D~ s/([\x00-\x1F])/sprintf("\\%02x",ord($1))/eg; $val =3D~ s/(^\s+|\s+$)/"\\20" x length $1/ge; } push @rdn, "$type=3D$val"; unless (defined $sep and $sep eq '+') { @rdn =3D sort(@rdn) if ($opt{sortRDN}); push @dn, ($opt{splitRDN}) ? ((scalar(@rdn) > 1) ? [ @rdn ] : ($rdn[0] || '')) : join('+', @rdn); @rdn =3D (); } } return((length($dn) !=3D (pos($dn) || 0)) ? () : @dn); } ## join RDNs and RDN parts into a DN string ## # Synopsis: $dn =3D joinDN(@dnpartref, %optionhash) sub joinDN(\@%) { my @dnparts =3D @i{+shift}; my %opt =3D @_; my $dn =3D ''; @dnparts =3D reverse(@dnparts) if ($opt{reversed}); foreach my $part (@dnparts) { $dn .=3D (($opt{reversed}) ? \000 : ',') if ($dn); if (ref($part)) # multi part RDN { my $partlist =3D ($opt{revered}) ? reverse(@$part) : @$part; my $rdn; foreach my $rdnpart (@partlist) { return if (!$rdnpart); $rdn .=3D (($opt{reversed}) ? \001 : '+') if ($rdn); $rdn .=3D $rdnpart; } $dn .=3D $rdn; } else # single part RDN { return if (!$part); $dn .=3D $part; } } return($dn); } These two basic functions now allow to implement=20 canonical_dn() with only a few lines: sub canonical_dn($;$) { my ($dn, $rev) =3D @_; $dn =3D $dn->dn if ref($dn); my @dnparts =3D splitDN($dn, uppercase =3D> 1, splitRDN =3D> 1, sortRDN= =3D> 1); joinDN(@dnparts, reversed =3D> ($rev||0)); } Yours Peter --=20 Peter Marschall | eMail: pet...@ma... Scheffelstra=DFe 15 | pet...@is... 97072 W=FCrzburg | Tel: 0931/14721 PGP: D7 FF 20 FE E6 6B 31 74 D1 10 88 E0 3C FE 28 35 |