From: <my...@us...> - 2011-12-29 16:11:49
|
Revision: 2624 http://aperture.svn.sourceforge.net/aperture/?rev=2624&view=rev Author: mylka Date: 2011-12-29 16:11:42 +0000 (Thu, 29 Dec 2011) Log Message: ----------- [3458993] added proper mappings for three rss feed formats, updated X2R to version 0.0.4, it incorporates the latest fixes, necessary for those mappings to work Modified Paths: -------------- aperture/trunk/core/pom.xml aperture/trunk/core/src/main/resources/org/semanticdesktop/aperture/x2r/atom-mapping.ttl aperture/trunk/core/src/main/resources/org/semanticdesktop/aperture/x2r/rss-1.0-mapping.ttl aperture/trunk/core/src/main/resources/org/semanticdesktop/aperture/x2r/rss-2.0-mapping.ttl aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/x2r/RSSSubCrawlerTest.java Modified: aperture/trunk/core/pom.xml =================================================================== --- aperture/trunk/core/pom.xml 2011-12-23 16:59:03 UTC (rev 2623) +++ aperture/trunk/core/pom.xml 2011-12-29 16:11:42 UTC (rev 2624) @@ -599,7 +599,7 @@ <dependency> <groupId>pl.edu.agh.x2r</groupId> <artifactId>x2r-core</artifactId> - <version>0.0.3</version> + <version>0.0.4</version> <exclusions> <exclusion> <artifactId>jldap</artifactId> @@ -644,6 +644,16 @@ </exclusions> </dependency> <dependency> + <groupId>xom</groupId> + <artifactId>xom</artifactId> + <version>1.2.6</version> + </dependency> + <dependency> + <groupId>gov.lbl.acs.nux</groupId> + <artifactId>nux</artifactId> + <version>1.6-onelementxpath</version> + </dependency> + <dependency> <groupId>net.sourceforge.saxon</groupId> <artifactId>saxon</artifactId> <version>9.1.0.8</version> Modified: aperture/trunk/core/src/main/resources/org/semanticdesktop/aperture/x2r/atom-mapping.ttl =================================================================== --- aperture/trunk/core/src/main/resources/org/semanticdesktop/aperture/x2r/atom-mapping.ttl 2011-12-23 16:59:03 UTC (rev 2623) +++ aperture/trunk/core/src/main/resources/org/semanticdesktop/aperture/x2r/atom-mapping.ttl 2011-12-29 16:11:42 UTC (rev 2624) @@ -2,9 +2,12 @@ @prefix ax: <http://aperture.sourceforge.net/2011/07/x2rsubcrawler#> . @prefix dc: <http://purl.org/dc/elements/1.1/> . @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . -@prefix : <uri:test:dblp1#> . +@prefix nie: <http://www.semanticdesktop.org/ontologies/2007/01/19/nie#> . +@prefix nco: <http://www.semanticdesktop.org/ontologies/2007/03/22/nco#> . +@prefix nfo: <http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#> . +@prefix : <uri:test:atom#> . -:atomMapping a xml2r:Mapping ; +:rssMapping a xml2r:Mapping ; xml2r:namespaceDefinition [ xml2r:namespacePrefix "atom" ; xml2r:namespaceUri "http://www.w3.org/2005/Atom" @@ -12,3 +15,36 @@ ax:mimeType "application/atom+xml" ; ax:rootElementName "feed" ; ax:rootElementNameSpace "http://www.w3.org/2005/Atom" . + +:atomElementClassMap a xml2r:ClassMap ; + xml2r:belongsToMapping :rssMapping; + xml2r:nodeXPath "/atom:feed/atom:*" . + +:idMap a xml2r:ClassMap ; + xml2r:delegatedFrom :atomElementClassMap; + xml2r:onElementName "id" ; + xml2r:setVariable [ + xml2r:variableName "id" ; + xml2r:pattern "${text()||domain-from-url}" + ] . + +:entryMap a xml2r:ClassMap ; + xml2r:delegatedFrom :atomElementClassMap; + xml2r:onElementName "entry" ; + xml2r:uriPattern "&{id}${atom:link[1]/data(@href)}" ; + xml2r:class nfo:TextDocument . + +:descriptionBridge a xml2r:PropertyBridge ; + xml2r:belongsToClassMap :entryMap ; + xml2r:property nie:description ; + xml2r:pattern "${summary/text()}" . + +:contentBridge a xml2r:PropertyBridge ; + xml2r:belongsToClassMap :entryMap ; + xml2r:property nie:description ; + xml2r:pattern "${atom:content}" . + +:dateBridge a xml2r:PropertyBridge ; + xml2r:belongsToClassMap :entryMap ; + xml2r:property nie:contentLastModified ; + xml2r:pattern "${atom:updated/text()}" . \ No newline at end of file Modified: aperture/trunk/core/src/main/resources/org/semanticdesktop/aperture/x2r/rss-1.0-mapping.ttl =================================================================== --- aperture/trunk/core/src/main/resources/org/semanticdesktop/aperture/x2r/rss-1.0-mapping.ttl 2011-12-23 16:59:03 UTC (rev 2623) +++ aperture/trunk/core/src/main/resources/org/semanticdesktop/aperture/x2r/rss-1.0-mapping.ttl 2011-12-29 16:11:42 UTC (rev 2624) @@ -2,13 +2,55 @@ @prefix ax: <http://aperture.sourceforge.net/2011/07/x2rsubcrawler#> . @prefix dc: <http://purl.org/dc/elements/1.1/> . @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . -@prefix nie: <http://www.semanticdesktop.org/ontologies/2007/01/19/nie#> . -@prefix nfo: <http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#> . -@prefix : <uri:test:dblp1#> . +@prefix nie: <http://www.semanticdesktop.org/ontologies/2007/01/19/nie#> . +@prefix nfo: <http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#> . +@prefix nco: <http://www.semanticdesktop.org/ontologies/2007/03/22/nco#> . +@prefix : <uri:test:rss1_0#> . -:wikipediaMapping a xml2r:Mapping ; +:rssMapping a xml2r:Mapping ; xml2r:namespaceDefinition [ - xml2r:namespacePrefix "rss" ; - xml2r:namespaceUri "http://purl.org/rss/1.0/" + xml2r:namespacePrefix "rss" ; + xml2r:namespaceUri "http://purl.org/rss/1.0/" ] ; + xml2r:namespaceDefinition [ + xml2r:namespacePrefix "rdf" ; + xml2r:namespaceUri "http://www.w3.org/1999/02/22-rdf-syntax-ns#" + ] ; + xml2r:namespaceDefinition [ + xml2r:namespacePrefix "dc" ; + xml2r:namespaceUri "http://purl.org/dc/elements/1.1/" + ] ; ax:mimeType "application/x-rss-1.0" . + +:rssItemClassMap a xml2r:ClassMap ; + xml2r:belongsToMapping :rssMapping; + xml2r:nodeXPath "/rdf:RDF/rss:item" ; + xml2r:uriPattern "${data(@rdf:about)}" ; + xml2r:class nfo:TextDocument . + +:titleBridge a xml2r:PropertyBridge ; + xml2r:belongsToClassMap :rssItemClassMap ; + xml2r:property nie:title ; + xml2r:pattern "${rss:title/text()}" . + +:descriptionBridge a xml2r:PropertyBridge ; + xml2r:belongsToClassMap :rssItemClassMap ; + xml2r:property nie:plainTextContent ; + xml2r:pattern "${rss:description/text()}" . + +:dataBridge a xml2r:PropertyBridge ; + xml2r:belongsToClassMap :rssItemClassMap ; + xml2r:property nie:contentCreated ; + xml2r:pattern "${dc:date/text()}" . + +:rssItemAuthorMap a xml2r:ClassMap ; + xml2r:parentClassMap :rssItemClassMap ; + xml2r:relativeNodeXPath "dc:creator" ; + xml2r:fromParentProperty nco:creator ; + xml2r:uriPattern "uri:person:${text()||mwurlify}" ; + xml2r:class nco:Contact . + +:rssItemAuthorNameBridge a xml2r:PropertyBridge ; + xml2r:belongsToClassMap :rssItemAuthorMap ; + xml2r:property nco:fullname ; + xml2r:pattern "${text()}" . \ No newline at end of file Modified: aperture/trunk/core/src/main/resources/org/semanticdesktop/aperture/x2r/rss-2.0-mapping.ttl =================================================================== --- aperture/trunk/core/src/main/resources/org/semanticdesktop/aperture/x2r/rss-2.0-mapping.ttl 2011-12-23 16:59:03 UTC (rev 2623) +++ aperture/trunk/core/src/main/resources/org/semanticdesktop/aperture/x2r/rss-2.0-mapping.ttl 2011-12-29 16:11:42 UTC (rev 2624) @@ -3,13 +3,52 @@ @prefix dc: <http://purl.org/dc/elements/1.1/> . @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . @prefix nie: <http://www.semanticdesktop.org/ontologies/2007/01/19/nie#> . +@prefix nco: <http://www.semanticdesktop.org/ontologies/2007/03/22/nco#> . @prefix nfo: <http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#> . -@prefix : <uri:test:dblp1#> . +@prefix : <uri:test:rss2_0#> . -:wikipediaMapping a xml2r:Mapping ; +:rssMapping a xml2r:Mapping ; xml2r:namespaceDefinition [ xml2r:namespacePrefix "content" ; xml2r:namespaceUri "http://purl.org/rss/1.0/modules/content/" ] ; ax:mimeType "application/rss+xml" ; ax:rootElementName "rss" . + +:rssItemClassMap a xml2r:ClassMap ; + xml2r:belongsToMapping :rssMapping; + xml2r:nodeXPath "/rss/channel/item" ; + xml2r:uriPattern "${link/text()}" ; + xml2r:class nfo:TextDocument . + +:titleBridge a xml2r:PropertyBridge ; + xml2r:belongsToClassMap :rssItemClassMap ; + xml2r:property nie:title ; + xml2r:pattern "${title/text()}" . + +:descriptionBridge a xml2r:PropertyBridge ; + xml2r:belongsToClassMap :rssItemClassMap ; + xml2r:property nie:description ; + xml2r:pattern "${description/text()}" . + +:contentBridge a xml2r:PropertyBridge ; + xml2r:belongsToClassMap :rssItemClassMap ; + xml2r:property nie:description ; + xml2r:pattern "${content:encoded/text()}" . + +:dateBridge a xml2r:PropertyBridge ; + xml2r:belongsToClassMap :rssItemClassMap ; + xml2r:property nie:contentCreated ; + xml2r:pattern "${pubDate/text()}" . + +:rssItemAuthorMap a xml2r:ClassMap ; + xml2r:parentClassMap :rssItemClassMap ; + xml2r:relativeNodeXPath "author" ; + xml2r:fromParentProperty nco:creator ; + xml2r:uriPattern "uri:person:${text()||mwurlify}" ; + xml2r:class nco:Contact . + +:rssItemAuthorNameBridge a xml2r:PropertyBridge ; + xml2r:belongsToClassMap :rssItemAuthorMap ; + xml2r:property nco:fullname ; + xml2r:pattern "${text()}" . \ No newline at end of file Modified: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/x2r/RSSSubCrawlerTest.java =================================================================== --- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/x2r/RSSSubCrawlerTest.java 2011-12-23 16:59:03 UTC (rev 2623) +++ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/x2r/RSSSubCrawlerTest.java 2011-12-29 16:11:42 UTC (rev 2624) @@ -1,16 +1,70 @@ package org.semanticdesktop.aperture.x2r; -import org.semanticdesktop.aperture.test.ApertureTestBase; +import java.io.InputStream; +import info.aduna.io.IOUtil; + +import org.ontoware.rdf2go.model.Model; +import org.ontoware.rdf2go.model.node.Resource; +import org.ontoware.rdf2go.model.node.URI; +import org.semanticdesktop.aperture.subcrawler.SubCrawler; +import org.semanticdesktop.aperture.test.subcrawler.SubCrawlerTestBase; +import org.semanticdesktop.aperture.test.subcrawler.TestBasicSubCrawlerHandler; +import org.semanticdesktop.aperture.vocabulary.NCO; +import org.semanticdesktop.aperture.vocabulary.NIE; + /** * Tests the subcrawlers for RSS formats. * @author Antoni * */ -public class RSSSubCrawlerTest extends ApertureTestBase { +public class RSSSubCrawlerTest extends SubCrawlerTestBase { public void testRSS1_0SubCrawler() throws Exception { - // do nothing for the time being + InputStream mappingStream = + getClass().getResourceAsStream("rss-1.0-mapping.ttl"); + String mappingString = IOUtil.readString(mappingStream); + SubCrawler subCrawler = new X2RSubCrawlerFactory( + "application/x-rss-1.0", + mappingString).get(); + TestBasicSubCrawlerHandler handler = new TestBasicSubCrawlerHandler(); + subCrawl("rss-1.0-feed.xml", subCrawler, handler); + assertNewModUnmod(handler, 16, 0, 0); + Model m = handler.getModel(); + + URI u = m.createURI( + "http://www.infoq.com/news/2011/12/relational-nosql-databases"); + + checkStatement(m, u, NIE.title, + "James Phillips on Moving from Relational to NoSQL Databases"); + checkStatement(m, u, NIE.plainTextContent, + "differences between a distributed document"); + Resource r = findSingleObjectResource(m, u, NCO.creator); + checkStatement(m, r, NCO.fullname, "Srini Penchikala"); } + public void testRSS2_0SubCrawler() throws Exception { + InputStream mappingStream = + getClass().getResourceAsStream("rss-2.0-mapping.ttl"); + String mappingString = IOUtil.readString(mappingStream); + SubCrawler subCrawler = new X2RSubCrawlerFactory( + "application/rss+xml", + mappingString).get(); + TestBasicSubCrawlerHandler handler = new TestBasicSubCrawlerHandler(); + subCrawl("rss-2.0-feed.xml", subCrawler, handler); + assertNewModUnmod(handler, 20, 0, 0); + } + + public void testAtomSubCrawler() throws Exception { + InputStream mappingStream = + getClass().getResourceAsStream("atom-mapping.ttl"); + String mappingString = IOUtil.readString(mappingStream); + SubCrawler subCrawler = new X2RSubCrawlerFactory( + "application/atom+xml", + mappingString).get(); + TestBasicSubCrawlerHandler handler = new TestBasicSubCrawlerHandler(); + subCrawl("rss-atom-1.0-feed.xml", subCrawler, handler); + assertNewModUnmod(handler, 20, 0, 0); + } + } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |