[Htmlparser-user] wrong link encoding?

SourceForge Headquarters 1320 Columbia Street Suite 310 San Diego, CA 92101 +1 (858) 422-6466

Hello!
I'm trying to parse this page and extract all links there: 
http://www.vu.lt/lt/naujienos/337/

for some reason the link to PDF file looks like: 
http://www.vu.lt/site_files/InfS/Naujienos/istorik??%20dienos.pdf

which is wrong. It seems like some wrong charset was used?

Here is part of my code which does the parsing:

public LinkedList parseDocument(InputStream document, String encoding) {
    try {
	Lexer lexer = new Lexer(new Page(document, encoding));
	String href;
	try {
	    lexer.reset();
	    if (banner != null)
		validateBanner(lexer);
	    lexer.reset();
	    Parser parser = new Parser(lexer);
	    NodeList list = null;
	    try {
		list = parser
			.extractAllNodesThatMatch(new InterestedTagsFilter());
	    } catch (EncodingChangeException e) {
		log.warn(e);
		lexer.reset();
		lexer.getPage().setEncoding(parser.getEncoding());
		list = parser
			.extractAllNodesThatMatch(new InterestedTagsFilter());
	    }
	    for (SimpleNodeIterator it = list.elements(); it.hasMoreNodes();) {
		TagNode node = (TagNode) it.nextNode();
		href = null;
		if (LinkTag.class.equals(node.getClass())
			&& validateLink((LinkTag) node)) {
		    href = ((LinkTag) node).getLink();
		} else if (ImageTag.class.equals(node.getClass())
			|| FrameTag.class.equals(node.getClass())) {
		    href = node.getAttribute("src");
		} else if (TitleTag.class.equals(node.getClass())) {
		    title = ((TitleTag) node).getTitle();
		} else if (BaseHrefTag.class.equals(node.getClass())) {
		    try {
			baseTag = getBaseURL(new URI(((BaseHrefTag) node)
				.getBaseUrl(), false));
		    } catch (URIException e2) {
		    }
		} else if (MetaTag.class.equals(node.getClass())
			&& "refresh".equalsIgnoreCase(((MetaTag) node)
				.getHttpEquiv())) {
		    String URL = ((MetaTag) node).getMetaContent();
		    if (URL != null && URL.length() > 0) {
			String arr[] = URL.split("URL=");
			if (arr != null && arr.length == 2)
			    href = arr[1];
		    }
		}
		if (href != null && href.length() > 0) {
		    if (log.isDebugEnabled())
------->		log.debug(href);		<-----------
		    results.add(getURL(StringEscapeUtils
			    .unescapeHtml(getEscapedURL(href.trim()))));
		}
	    }
	    this.encoding = parser.getEncoding();
	    if (log.isDebugEnabled())
		log.debug(this.encoding);
	} catch (ParserException e1) {
	    log.error(e1, e1);
	}
    } catch (UnsupportedEncodingException e) {
	log.error(e, e);
    }
    return results;
}

And on marked line application logs
/site_files/InfS/Naujienos/istorik??%20dienos.pdf

what could be wrong there?

-- 
Eugene N Dzhurinsky