From: <dfl...@us...> - 2013-09-09 11:21:55
|
Revision: 4104 http://sourceforge.net/p/dl-learner/code/4104 Author: dfleischhacker Date: 2013-09-09 11:21:51 +0000 (Mon, 09 Sep 2013) Log Message: ----------- Add test for entity linking and make use of lemmatizing optional in SimpleSemanticIndex Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java 2013-09-09 11:03:04 UTC (rev 4103) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java 2013-09-09 11:21:51 UTC (rev 4104) @@ -23,20 +23,42 @@ /** * Initializes the semantic index to use {@code ontology} for finding all labels of an entity and + * {@code syntacticIndex} to query for documents containing these labels. This consutrctor initializes with + * full lemmatizing enabled. + * + * @param ontology ontology to retrieve entity labels from + * @param syntacticIndex index to query for documents containing the labels + */ + public SimpleSemanticIndex(OWLOntology ontology, SyntacticIndex syntacticIndex) { + this(ontology, syntacticIndex, true); + } + + /** + * Initializes the semantic index to use {@code ontology} for finding all labels of an entity and * {@code syntacticIndex} to query for documents containing these labels. * * @param ontology ontology to retrieve entity labels from * @param syntacticIndex index to query for documents containing the labels + * @param useWordNormalization whether word normalization should be used or not */ - public SimpleSemanticIndex(OWLOntology ontology, SyntacticIndex syntacticIndex) { + public SimpleSemanticIndex(OWLOntology ontology, SyntacticIndex syntacticIndex, boolean useWordNormalization) { super(ontology); - SimpleEntityCandidatesTrie trie = new SimpleEntityCandidatesTrie(new RDFSLabelEntityTextRetriever(ontology), - ontology, new SimpleEntityCandidatesTrie.LemmatizingWordNetNameGenerator(5)); + SimpleEntityCandidatesTrie trie; + if (useWordNormalization) { + trie = new SimpleEntityCandidatesTrie(new RDFSLabelEntityTextRetriever(ontology), + ontology, new SimpleEntityCandidatesTrie.LemmatizingWordNetNameGenerator(5)); + } + else { + trie = new SimpleEntityCandidatesTrie(new RDFSLabelEntityTextRetriever(ontology), + ontology, new SimpleEntityCandidatesTrie.DummyNameGenerator()); + } // trie.printTrie(); + TrieLinguisticAnnotator linguisticAnnotator = new TrieLinguisticAnnotator(trie); + linguisticAnnotator.setNormalizeWords(useWordNormalization); setSemanticAnnotator(new SemanticAnnotator( new SimpleWordSenseDisambiguation(ontology), new TrieEntityCandidateGenerator(ontology, trie), - new TrieLinguisticAnnotator(trie))); + linguisticAnnotator)); } Modified: trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java =================================================================== --- trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java 2013-09-09 11:03:04 UTC (rev 4103) +++ trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java 2013-09-09 11:21:51 UTC (rev 4104) @@ -3,16 +3,11 @@ */ package org.dllearner.algorithms.isle; -import java.io.File; -import java.io.IOException; -import java.text.DecimalFormat; -import java.util.HashSet; -import java.util.Map; -import java.util.Set; - +import com.google.common.base.Charsets; +import com.google.common.base.Joiner; +import com.google.common.io.Files; import org.dllearner.algorithms.celoe.CELOE; -import org.dllearner.algorithms.isle.index.AnnotatedDocument; -import org.dllearner.algorithms.isle.index.TextDocument; +import org.dllearner.algorithms.isle.index.*; import org.dllearner.algorithms.isle.index.semantic.SemanticIndex; import org.dllearner.algorithms.isle.index.semantic.simple.SimpleSemanticIndex; import org.dllearner.algorithms.isle.index.syntactic.OWLOntologyLuceneSyntacticIndexCreator; @@ -36,12 +31,14 @@ import org.semanticweb.owlapi.model.OWLDataFactory; import org.semanticweb.owlapi.model.OWLOntology; import org.semanticweb.owlapi.model.OWLOntologyManager; - import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl; -import com.google.common.base.Charsets; -import com.google.common.base.Joiner; -import com.google.common.io.Files; +import java.io.File; +import java.io.IOException; +import java.text.DecimalFormat; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; /** * Some tests for the ISLE algorithm. @@ -160,6 +157,39 @@ isle.start(); } + @Test + public void testEntityLinkingWithLemmatizing() throws Exception { + EntityCandidatesTrie ect = new SimpleEntityCandidatesTrie(new RDFSLabelEntityTextRetriever(ontology), ontology, + new SimpleEntityCandidatesTrie.LemmatizingWordNetNameGenerator(5)); + LinguisticAnnotator linguisticAnnotator = new TrieLinguisticAnnotator(ect); + WordSenseDisambiguation wsd = new SimpleWordSenseDisambiguation(ontology); + EntityCandidateGenerator ecg = new TrieEntityCandidateGenerator(ontology, ect); + SemanticAnnotator semanticAnnotator = new SemanticAnnotator(wsd, ecg, linguisticAnnotator); + + Set<TextDocument> docs = createDocuments(); + for (TextDocument doc : docs) { + AnnotatedDocument annotated = semanticAnnotator.processDocument(doc); + System.out.println(annotated); + } + } + + @Test + public void testEntityLinkingWithSimpleStringMatching() throws Exception { + EntityCandidatesTrie ect = new SimpleEntityCandidatesTrie(new RDFSLabelEntityTextRetriever(ontology), ontology, + new SimpleEntityCandidatesTrie.DummyNameGenerator()); + TrieLinguisticAnnotator linguisticAnnotator = new TrieLinguisticAnnotator(ect); + linguisticAnnotator.setNormalizeWords(false); + WordSenseDisambiguation wsd = new SimpleWordSenseDisambiguation(ontology); + EntityCandidateGenerator ecg = new TrieEntityCandidateGenerator(ontology, ect); + SemanticAnnotator semanticAnnotator = new SemanticAnnotator(wsd, ecg, linguisticAnnotator); + + Set<TextDocument> docs = createDocuments(); + for (TextDocument doc : docs) { + AnnotatedDocument annotated = semanticAnnotator.processDocument(doc); + System.out.println(annotated); + } + } + @Test public void compareISLE() throws Exception { KnowledgeSource ks = new OWLAPIOntology(ontology); @@ -170,7 +200,7 @@ lp.setClassToDescribe(cls); lp.init(); - semanticIndex = new SimpleSemanticIndex(ontology, syntacticIndex); + semanticIndex = new SimpleSemanticIndex(ontology, syntacticIndex, false); semanticIndex.buildIndex(createDocuments()); relevance = new PMIRelevanceMetric(semanticIndex); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |