[aKregator-devel] [Bug 126128] cannot find link tag in html
Brought to you by:
lippel
From: Frank O. <fra...@kd...> - 2006-08-20 17:13:45
|
------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. http://bugs.kde.org/show_bug.cgi?id=126128 frank.osterfeld kdemail net changed: What |Removed |Added ---------------------------------------------------------------------------- Status|NEW |RESOLVED Resolution| |FIXED ------- Additional Comments From frank.osterfeld kdemail net 2006-08-20 19:13 ------- SVN commit 575039 by osterfeld: use the more robust FeedDetector class from the konq plugin instead of the old and unmaintained "feed discovery" code in loader.cpp BUG: 126128 M +1 -1 Makefile.am A feeddetector.cpp [License: GPL (v2+) (+Qt exception)] A feeddetector.h [License: GPL (v2+) (+Qt exception)] M +25 -63 loader.cpp --- branches/KDE/3.5/kdepim/akregator/src/librss/Makefile.am #575038:575039 @ -9,7 +9,7 @ loader.h librss.h enclosure.h librsslocal_la_SOURCES = article.cpp document.cpp image.cpp textinput.cpp \ - tools_p.cpp loader.cpp enclosure.cpp category.cpp + tools_p.cpp loader.cpp enclosure.cpp category.cpp feeddetector.cpp librsslocal_la_METASOURCES = AUTO --- branches/KDE/3.5/kdepim/akregator/src/librss/loader.cpp #575038:575039 @ -10,6 +10,7 @ */ #include "loader.h" #include "document.h" +#include "feeddetector.h" #include <kio/job.h> #include <kprocess.h> @ -377,74 +378,35 @ void Loader::discoverFeeds(const QByteArray &data) { QString str = QString(data).simplifyWhiteSpace(); - QString s2; - //QTextStream ts( &str, IO_WriteOnly ); - //ts << data.data(); - - // "<[\\s]link[^>]*rel[\\s]=[\\s]\\\"[\\s]alternate[\\s]\\\"[^>]*>" - // "type[\\s]=[\\s]\\\"application/rss+xml\\\"" - // "href[\\s]=[\\s]\\\"application/rss+xml\\\"" - QRegExp rx( "(?:REL)[^=]*=[^sAa]*(?:service.feed|ALTERNATE)[\\s]*[^s][^s](?:[^>]*)(?:HREF)[^=]*=[^A-Z0-9-_~,./$]*([^'\">\\s]*)", false); - if (rx.search(str)!=-1) - s2=rx.cap(1); - else{ - // does not support Atom/RSS autodiscovery.. try finding feeds by brute force.... - int pos=0; - QStringList feeds; - QString host=d->url.host(); - rx.setPattern("(?:<A )[^H]*(?:HREF)[^=]*=[^A-Z0-9-_~,./]*([^'\">\\s]*)"); - while ( pos >= 0 ) { - pos = rx.search( str, pos ); - s2=rx.cap(1); - if (s2.endsWith(".rdf") || s2.endsWith(".rss") || s2.endsWith(".xml")) - feeds.append(s2); - if ( pos >= 0 ) { - pos += rx.matchedLength(); - } - } - - s2=feeds.first(); - KURL testURL; - // loop through, prefer feeds on same host - QStringList::Iterator end( feeds.end() ); - for ( QStringList::Iterator it = feeds.begin(); it != end; ++it ) { - testURL=*it; - if (testURL.host()==host) - { - s2=*it; - break; - } - } - } - - if (s2.isNull()) { - //kdDebug() << "No feed found for a site" << endl; - return; - } - - if (KURL::isRelativeURL(s2)) + + QStringList feeds; + + FeedDetectorEntryList list = FeedDetector::extractFromLinkTags(str); + + for (FeedDetectorEntryList::ConstIterator it = list.begin(); it != list.end(); ++it) { - if (s2.startsWith("//")) + feeds += (*it).url(); + } + + if (list.isEmpty()) + feeds = FeedDetector::extractBruteForce(str); + + QString feed = feeds.first(); + QString host = d->url.host(); + KURL testURL; + // loop through, prefer feeds on same host + QStringList::Iterator end( feeds.end() ); + for ( QStringList::Iterator it = feeds.begin(); it != end; ++it) + { + testURL=*it; + if (testURL.host() == host) { - s2=s2.prepend(d->url.protocol()+":"); - d->discoveredFeedURL=s2; + feed = *it; + break; } - else if (s2.startsWith("/")) - { - d->discoveredFeedURL=d->url; - d->discoveredFeedURL.setPath(s2); - } - else - { - d->discoveredFeedURL=d->url; - d->discoveredFeedURL.addPath(s2); - } - d->discoveredFeedURL.cleanPath(); } - else - d->discoveredFeedURL=s2; - d->discoveredFeedURL.cleanPath(); + d->discoveredFeedURL = feed.isNull() ? QString() : FeedDetector::fixRelativeURL(feed, d->url); } #include "loader.moc" |