[SimBot-commits] CVS: simbot simbot.pl,1.133,1.134
Status: Abandoned
Brought to you by:
kstange
|
From: Pete P. <fou...@us...> - 2005-08-07 18:25:43
|
Update of /cvsroot/simbot/simbot In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv25342 Modified Files: simbot.pl Log Message: htmlize now masks email addresses (or things that look like email addresses) Index: simbot.pl =================================================================== RCS file: /cvsroot/simbot/simbot/simbot.pl,v retrieving revision 1.133 retrieving revision 1.134 diff -u -d -p -r1.133 -r1.134 --- simbot.pl 29 Jul 2005 06:44:40 -0000 1.133 +++ simbot.pl 7 Aug 2005 18:25:26 -0000 1.134 @@ -568,9 +568,52 @@ sub htmlize { $string .= $line . "</div>\n"; } # end foreach lines $string =~ s%(http|ftp)://[^\s\n<>]+%<a href="$&">$&</a>%g; + while($string =~ m/\b(\S+@[a-z\-\.]+\.[a-z]+)/i) { + my $email = $&; + my $masked = &html_mask_email($email); + $string =~ s/$email/$masked/g; + } return $string; } +# HTML_MASK_EMAIL: Returns the HTML for a masked email address. +# Currently, we break the address apart into user and host, +# turn each character into its HTML escaped ascii code, +# and return a simple javascript with the address broken up and out of order +# that, when run, outputs the address properly (and properly linked) +# This doesn't make harvesting impossible, but it does make it more difficult. +# Viewers without javascript see [email removed] instead. +sub html_mask_email { + my ($user, $host) = @_[0] =~ m/^(\S+)@(\S+)$/; + my ($nuser, $nhost); + for(my $i; $i < length $user; $i++) { + $nuser .= '&#' . ord(substr($user, $i, 1)) . ';'; + } + for(my $i; $i < length $host; $i++) { + $nhost .= '&#' . ord(substr($host, $i, 1)) . ';'; + } + + return <<EOT; +<script type="text/javascript"> +var p='$nhost'; +var w='to:'; +var l='$nuser'; +var u='ma'; +var s='@'; +var d='il'; +document.write('<a href="'); +document.write(u+d); +document.write(w+l); +document.write(s+p); +document.write('">'); +document.write(l); +document.write(s+p); +document.write('</a>'); +</script><noscript>[email removed]</noscript> +EOT + +} + # NUMBERIZE: Find all the word-based numbers in a string and replace them # with digit-based numbers. sub numberize { |