From: Ed A. <ep...@us...> - 2004-05-23 16:23:26
|
Update of /cvsroot/xmltv/xmltv/grab/uk_rt In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv20385/uk_rt Modified Files: tv_grab_uk_rt.in Log Message: Take a more sensible approach to cleaning up bad characters - if there are any left outside the legal ISO-8859-1 range give a warning. Index: tv_grab_uk_rt.in =================================================================== RCS file: /cvsroot/xmltv/xmltv/grab/uk_rt/tv_grab_uk_rt.in,v retrieving revision 1.64 retrieving revision 1.65 diff -C2 -d -r1.64 -r1.65 *** tv_grab_uk_rt.in 13 May 2004 18:52:31 -0000 1.64 --- tv_grab_uk_rt.in 23 May 2004 16:23:15 -0000 1.65 *************** *** 676,679 **** --- 676,680 ---- # HTML-demoronizing. # + my $warned_bad_chars; sub get_url( $ ) { my $url = shift; *************** *** 684,688 **** tr/\222\222\226/''-/; tr/\010//d; ! # There could be other illegal chars but I haven't seen them. return $_; } --- 685,694 ---- tr/\222\222\226/''-/; tr/\010//d; ! tr/\t/ /; ! if (s/([^\012\015\040-\176\240-\377]+)//g) { ! warn "removing bad characters: '$1'" ! unless $warned_bad_chars++; ! } ! return $_; } |