dl-learner-svn Mailing List for DL-Learner (Page 7)

Status: Beta

Brought to you by: jenslehmann, patrickwestphal

dl-learner-svn — DL-Learner Subversion commits

You can subscribe to this list here.

2007	Jan	Feb	Mar	Apr	May	Jun	Jul	Aug (120)	Sep (36)	Oct (116)	Nov (17)	Dec (44)
2008	Jan (143)	Feb (192)	Mar (74)	Apr (84)	May (105)	Jun (64)	Jul (49)	Aug (120)	Sep (159)	Oct (156)	Nov (51)	Dec (28)
2009	Jan (17)	Feb (55)	Mar (33)	Apr (57)	May (54)	Jun (28)	Jul (6)	Aug (16)	Sep (38)	Oct (30)	Nov (26)	Dec (52)
2010	Jan (7)	Feb (91)	Mar (65)	Apr (2)	May (14)	Jun (25)	Jul (38)	Aug (48)	Sep (80)	Oct (70)	Nov (75)	Dec (77)
2011	Jan (68)	Feb (53)	Mar (51)	Apr (35)	May (65)	Jun (101)	Jul (29)	Aug (230)	Sep (95)	Oct (49)	Nov (110)	Dec (63)
2012	Jan (41)	Feb (42)	Mar (25)	Apr (46)	May (51)	Jun (44)	Jul (45)	Aug (29)	Sep (12)	Oct (9)	Nov (17)	Dec (2)
2013	Jan (12)	Feb (14)	Mar (7)	Apr (16)	May (54)	Jun (27)	Jul (11)	Aug (5)	Sep (85)	Oct (27)	Nov (37)	Dec (32)
2014	Jan (8)	Feb (29)	Mar (5)	Apr (3)	May (22)	Jun (3)	Jul (4)	Aug (3)	Sep	Oct	Nov	Dec

Flat | Threaded

<< < 1 .. 5 6 7 8 9 .. 171 > >> (Page 7 of 171)

[DL-Learner SVN] SF.net SVN: dl-learner:[4135] trunk/components-core/src/main/java/org/ dllearner/algorithms/isle/StructuralEntityContext.java

From: <lor...@us...> - 2013-10-29 14:43:28

Revision: 4135
          http://sourceforge.net/p/dl-learner/code/4135
Author:   lorenz_b
Date:     2013-10-29 14:43:25 +0000 (Tue, 29 Oct 2013)
Log Message:
-----------
Add always entity itself to context.

Modified Paths:
--------------
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/StructuralEntityContext.java

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/StructuralEntityContext.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/StructuralEntityContext.java	2013-10-29 14:20:38 UTC (rev 4134)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/StructuralEntityContext.java	2013-10-29 14:43:25 UTC (rev 4135)
@@ -81,15 +81,20 @@
 	 */
 	public static Set<OWLEntity> getContext(OWLOntology ontology, OWLEntity entity){
 		
+		Set<OWLEntity> context;
 		if(entity.isOWLClass()){
-			return getContext(ontology, entity.asOWLClass());
+			context = getContext(ontology, entity.asOWLClass());
 		} else if(entity.isOWLObjectProperty()){
-			return getContext(ontology, entity.asOWLObjectProperty());
+			context = getContext(ontology, entity.asOWLObjectProperty());
 		} else if(entity.isOWLDataProperty()){
-			return getContext(ontology, entity.asOWLDataProperty());
+			context = getContext(ontology, entity.asOWLDataProperty());
+		} else {
+			throw new UnsupportedOperationException("Unsupported entity type: " + entity);
 		}
 		
-		throw new UnsupportedOperationException("Unsupported entity type: " + entity);
+		context.add(entity);
+		
+		return context;
 	}
 	
 	/**

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[4134] trunk/components-core/src/main/java/org/ dllearner/algorithms/isle

From: <lor...@us...> - 2013-10-29 14:20:42

Revision: 4134
          http://sourceforge.net/p/dl-learner/code/4134
Author:   lorenz_b
Date:     2013-10-29 14:20:38 +0000 (Tue, 29 Oct 2013)
Log Message:
-----------
Remove owl:Thing

Modified Paths:
--------------
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/AnnotationEntityTextRetriever.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/WindowBasedContextExtractor.java

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/AnnotationEntityTextRetriever.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/AnnotationEntityTextRetriever.java	2013-10-29 14:11:07 UTC (rev 4133)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/AnnotationEntityTextRetriever.java	2013-10-29 14:20:38 UTC (rev 4134)
@@ -8,7 +8,6 @@
 import java.util.Map;
 import java.util.Set;
 
-import org.dllearner.algorithms.isle.index.LinguisticAnnotator;
 import org.dllearner.algorithms.isle.index.LinguisticUtil;
 import org.dllearner.core.owl.Entity;
 import org.dllearner.kb.OWLAPIOntology;
@@ -16,6 +15,7 @@
 import org.semanticweb.owlapi.model.IRI;
 import org.semanticweb.owlapi.model.OWLAnnotation;
 import org.semanticweb.owlapi.model.OWLAnnotationProperty;
+import org.semanticweb.owlapi.model.OWLClass;
 import org.semanticweb.owlapi.model.OWLEntity;
 import org.semanticweb.owlapi.model.OWLLiteral;
 import org.semanticweb.owlapi.model.OWLOntology;
@@ -23,6 +23,8 @@
 import org.semanticweb.owlapi.util.IRIShortFormProvider;
 import org.semanticweb.owlapi.util.SimpleIRIShortFormProvider;
 
+import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl;
+
 import com.google.common.base.Joiner;
 
 
@@ -42,6 +44,8 @@
 	private IRIShortFormProvider sfp = new SimpleIRIShortFormProvider();
 	
 	private OWLAnnotationProperty[] properties;
+	
+	private static final OWLClass OWL_THING = new OWLDataFactoryImpl().getOWLThing();
 
 	public AnnotationEntityTextRetriever(OWLOntology ontology, OWLAnnotationProperty... properties) {
 		this.ontology = ontology;
@@ -111,6 +115,7 @@
 		schemaEntities.addAll(ontology.getClassesInSignature());
 		schemaEntities.addAll(ontology.getObjectPropertiesInSignature());
 		schemaEntities.addAll(ontology.getDataPropertiesInSignature());
+		schemaEntities.remove(OWL_THING);
 		
 		Map<String, Double> relevantText;
 		for (OWLEntity owlEntity : schemaEntities) {

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/WindowBasedContextExtractor.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/WindowBasedContextExtractor.java	2013-10-29 14:11:07 UTC (rev 4133)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/WindowBasedContextExtractor.java	2013-10-29 14:20:38 UTC (rev 4134)
@@ -23,18 +23,32 @@
 public class WindowBasedContextExtractor implements ContextExtractor{
 	
 	private StanfordCoreNLP pipeline;
+	private int tokensLeft = 10;
+	private int tokensRight = 10;
 
-	/**
-	 * 
-	 */
-	public WindowBasedContextExtractor() {
+	public WindowBasedContextExtractor(int tokensLeft, int tokensRight) {
+		this.tokensLeft = tokensLeft;
+		this.tokensRight = tokensRight;
+		
+		Properties props = new Properties();
+		props.put("annotators", "tokenize, ssplit");
+		pipeline = new StanfordCoreNLP(props);
+	}
 	
+	public WindowBasedContextExtractor(int tokensLeftRight) {
+		tokensLeft = tokensLeftRight;
+		tokensRight = tokensLeftRight;
+		
 		Properties props = new Properties();
 		props.put("annotators", "tokenize, ssplit");
 		pipeline = new StanfordCoreNLP(props);
-
-		
 	}
+	
+	public WindowBasedContextExtractor() {
+		Properties props = new Properties();
+		props.put("annotators", "tokenize, ssplit");
+		pipeline = new StanfordCoreNLP(props);
+	}
 
 	/* (non-Javadoc)
 	 * @see org.dllearner.algorithms.isle.wsd.ContextExtractor#extractContext(java.lang.String, java.lang.String)

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[4133] trunk/components-core/src/main/java/org/ dllearner/algorithms/isle/index

From: <dfl...@us...> - 2013-10-29 14:11:10

Revision: 4133
          http://sourceforge.net/p/dl-learner/code/4133
Author:   dfleischhacker
Date:     2013-10-29 14:11:07 +0000 (Tue, 29 Oct 2013)
Log Message:
-----------
Bug searching

Modified Paths:
--------------
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Annotation.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieLinguisticAnnotator.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Annotation.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Annotation.java	2013-10-29 13:23:45 UTC (rev 4132)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Annotation.java	2013-10-29 14:11:07 UTC (rev 4133)
@@ -14,8 +14,17 @@
 	private Document referencedDocument;
 	private int offset;
 	private int length;
-	
-	public Annotation(Document referencedDocument, int offset, int length) {
+    private String matchedString;
+
+    public String getMatchedString() {
+        return matchedString;
+    }
+
+    public void setMatchedString(String matchedString) {
+        this.matchedString = matchedString;
+    }
+
+    public Annotation(Document referencedDocument, int offset, int length) {
 		this.referencedDocument = referencedDocument;
 		this.offset = offset;
 		this.length = length;

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java	2013-10-29 13:23:45 UTC (rev 4132)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java	2013-10-29 14:11:07 UTC (rev 4133)
@@ -1,20 +1,13 @@
 package org.dllearner.algorithms.isle.index;
 
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-import java.util.regex.Pattern;
-
+import com.google.common.collect.Sets;
 import org.dllearner.algorithms.isle.EntityCandidateGenerator;
 import org.dllearner.algorithms.isle.StopWordFilter;
 import org.dllearner.core.owl.Entity;
 import org.semanticweb.owlapi.model.OWLOntology;
 
-import com.google.common.collect.Sets;
+import java.util.*;
+import java.util.regex.Pattern;
 
 /**
  * Generates candidates using a entity candidates prefix trie
@@ -33,7 +26,7 @@
 	}
 	
 	public Set<Entity> getCandidates(Annotation annotation) {
-		return candidatesTrie.getCandidateEntities(annotation.getToken());
+		return candidatesTrie.getCandidateEntities(annotation.getMatchedString());
 	}
 
     /**
@@ -131,7 +124,7 @@
 		for (Annotation annotation: annotations) 
 			candidatesMap.put(annotation, getCandidates(annotation));
 		
-		postProcess(candidatesMap, window, stopWordFilter);
+		//postProcess(candidatesMap, window, stopWordFilter);
 		
 		return candidatesMap;
 	}

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieLinguisticAnnotator.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieLinguisticAnnotator.java	2013-10-29 13:23:45 UTC (rev 4132)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieLinguisticAnnotator.java	2013-10-29 14:11:07 UTC (rev 4133)
@@ -36,6 +36,7 @@
             String match = candidatesTrie.getLongestMatchingText(unparsed);
             if (match != null && !match.isEmpty()) {
                 Annotation annotation = mapper.getOriginalAnnotationForPosition(i, match.length());
+                annotation.setMatchedString(match);
                 annotations.add(annotation);
                 i += match.length() - 1;
             }

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java	2013-10-29 13:23:45 UTC (rev 4132)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java	2013-10-29 14:11:07 UTC (rev 4133)
@@ -53,7 +53,7 @@
             trie = new SimpleEntityCandidatesTrie(new RDFSLabelEntityTextRetriever(ontology),
                     ontology, new SimpleEntityCandidatesTrie.DummyNameGenerator());
         }
-//        trie.printTrie();
+        trie.printTrie();
         TrieLinguisticAnnotator linguisticAnnotator = new TrieLinguisticAnnotator(trie);
         linguisticAnnotator.setNormalizeWords(useWordNormalization);
         setSemanticAnnotator(new SemanticAnnotator(

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[4132] trunk/components-core/src

From: <dfl...@us...> - 2013-10-29 13:23:48

Revision: 4132
          http://sourceforge.net/p/dl-learner/code/4132
Author:   dfleischhacker
Date:     2013-10-29 13:23:45 +0000 (Tue, 29 Oct 2013)
Log Message:
-----------
Bible test case activated

Modified Paths:
--------------
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/syntactic/OWLOntologyLuceneSyntacticIndexCreator.java
    trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETestCorpus.java

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java	2013-10-29 08:55:58 UTC (rev 4131)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java	2013-10-29 13:23:45 UTC (rev 4132)
@@ -55,7 +55,7 @@
                 addSubsequencesWordNet(entity, text);
                 
                 for (String alternativeText : nameGenerator.getAlternativeText(text)) {
-                    addEntry(alternativeText, entity, text);
+                    addEntry(alternativeText.toLowerCase(), entity, text);
                 }
             }
         }
@@ -101,36 +101,46 @@
             }
 
             // generate subsequences starting at the given start index of the given size
-            Set<String> allPossibleSubsequences = getAllPossibleSubsequences(wordnetTokens);
+            Set<String[]> allPossibleSubsequences = getAllPossibleSubsequences(tokens, wordnetTokens);
 
-            for (String s : allPossibleSubsequences) {
-                addEntry(s, entity);
+            for (String[] s : allPossibleSubsequences) {
+                addEntry(s[0], entity, s[1]);
             }
         }
     }
 
-    private static Set<String> getAllPossibleSubsequences(List<String>[] wordnetTokens) {
-        ArrayList<String> res = new ArrayList<String>();
+    private static Set<String[]> getAllPossibleSubsequences(String[] originalTokens, List<String>[] wordnetTokens) {
+        ArrayList<String[]> res = new ArrayList<String[]>();
 
         for (int size = 1; size < wordnetTokens.length + 1; size++) {
             for (int start = 0; start < wordnetTokens.length - size + 1; start++) {
-                getPossibleSubsequencesRec(res, new ArrayList<String>(), wordnetTokens, 0, size);
+                getPossibleSubsequencesRec(originalTokens, res, new ArrayList<String>(), new ArrayList<String>(),
+                        wordnetTokens, 0, size);
             }
         }
 
-        return new HashSet<String>(res);
+        return new HashSet<String[]>(res);
     }
 
-    private static void getPossibleSubsequencesRec(List<String> allSubsequences, List<String> currentSubsequence, List<String>[] wordnetTokens,
-                                            int curStart, int maxLength) {
+
+    private static void getPossibleSubsequencesRec(String[] originalTokens, List<String[]> allSubsequences,
+                                                   List<String> currentSubsequence,
+                                                   List<String> currentOriginalSubsequence,
+                                                   List<String>[] wordnetTokens,
+                                                   int curStart, int maxLength) {
+
         if (currentSubsequence.size() == maxLength) {
-            allSubsequences.add(StringUtils.join(currentSubsequence, " "));
+            allSubsequences.add(new String[]{StringUtils.join(currentSubsequence, " ").toLowerCase(), StringUtils
+                    .join(currentOriginalSubsequence, " ").toLowerCase()});
             return;
         }
         for (String w : wordnetTokens[curStart]) {
             ArrayList<String> tmpSequence = new ArrayList<String>(currentSubsequence);
+            ArrayList<String> tmpOriginalSequence = new ArrayList<String>(currentOriginalSubsequence);
             tmpSequence.add(w);
-            getPossibleSubsequencesRec(allSubsequences, tmpSequence, wordnetTokens, curStart + 1, maxLength);
+            tmpOriginalSequence.add(originalTokens[curStart]);
+            getPossibleSubsequencesRec(originalTokens, allSubsequences, tmpSequence, tmpOriginalSequence, wordnetTokens,
+                    curStart + 1, maxLength);
         }
     }
 
@@ -183,7 +193,7 @@
 		List<String> termsList = new ArrayList<String>(trieMap.keySet());
 		Collections.sort(termsList);
 		for (String key : termsList) {
-			output += key + ":\n";
+			output += key + " (" + trieMap.get(key).getFullToken() + ") :\n";
 			for (Entity candidate: trieMap.get(key).getEntitySet()) {
 				output += "\t"+candidate+"\n";
 			}
@@ -207,10 +217,10 @@
         }
 
         // generate subsequences starting at the given start index of the given size
-        Set<String> allPossibleSubsequences = getAllPossibleSubsequences(wordnetTokens);
+        Set<String[]> allPossibleSubsequences = getAllPossibleSubsequences(tokens, wordnetTokens);
 
-        for (String s : allPossibleSubsequences) {
-            System.out.println(s);
+        for (String[] s : allPossibleSubsequences) {
+            System.out.println(String.format("%s - %s", s[0], s[1]));
         }
     }
 

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/syntactic/OWLOntologyLuceneSyntacticIndexCreator.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/syntactic/OWLOntologyLuceneSyntacticIndexCreator.java	2013-10-29 08:55:58 UTC (rev 4131)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/syntactic/OWLOntologyLuceneSyntacticIndexCreator.java	2013-10-29 13:23:45 UTC (rev 4132)
@@ -42,7 +42,7 @@
 		this.ontology = ontology;
 		this.annotationProperty = annotationProperty;
 		this.searchField = searchField;
-		
+
 		schemaEntities = new HashSet<OWLEntity>();
 		schemaEntities.addAll(ontology.getClassesInSignature());
 		schemaEntities.addAll(ontology.getObjectPropertiesInSignature());

Modified: trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETestCorpus.java
===================================================================
--- trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETestCorpus.java	2013-10-29 08:55:58 UTC (rev 4131)
+++ trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETestCorpus.java	2013-10-29 13:23:45 UTC (rev 4132)
@@ -6,14 +6,12 @@
 import com.google.common.base.Charsets;
 import com.google.common.base.Joiner;
 import com.google.common.io.Files;
-import com.hp.hpl.jena.vocabulary.RDFS;
-
 import org.dllearner.algorithms.celoe.CELOE;
 import org.dllearner.algorithms.isle.index.*;
 import org.dllearner.algorithms.isle.index.semantic.SemanticIndex;
 import org.dllearner.algorithms.isle.index.semantic.simple.SimpleSemanticIndex;
-import org.dllearner.algorithms.isle.index.syntactic.OWLOntologyLuceneSyntacticIndexCreator;
 import org.dllearner.algorithms.isle.index.syntactic.SyntacticIndex;
+import org.dllearner.algorithms.isle.index.syntactic.TextDocumentSyntacticIndexCreator;
 import org.dllearner.algorithms.isle.metrics.PMIRelevanceMetric;
 import org.dllearner.algorithms.isle.metrics.RelevanceMetric;
 import org.dllearner.algorithms.isle.metrics.RelevanceUtils;
@@ -32,17 +30,12 @@
 import org.junit.Before;
 import org.junit.Test;
 import org.semanticweb.owlapi.apibinding.OWLManager;
-import org.semanticweb.owlapi.model.IRI;
-import org.semanticweb.owlapi.model.OWLDataFactory;
-import org.semanticweb.owlapi.model.OWLEntity;
-import org.semanticweb.owlapi.model.OWLOntology;
-import org.semanticweb.owlapi.model.OWLOntologyManager;
-import org.semanticweb.owlapi.vocab.OWLRDFVocabulary;
-
+import org.semanticweb.owlapi.model.*;
 import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl;
 
 import java.io.File;
 import java.io.IOException;
+import java.net.URL;
 import java.text.DecimalFormat;
 import java.util.HashSet;
 import java.util.Map;
@@ -80,10 +73,10 @@
 		manager = OWLManager.createOWLOntologyManager();
 		ontology = manager.loadOntologyFromOntologyDocument(new File(testFolder + "ontology.owl"));
 		textRetriever = new RDFSLabelEntityTextRetriever(ontology);
-		syntacticIndex = new OWLOntologyLuceneSyntacticIndexCreator(ontology, df.getRDFSLabel(), searchField).buildIndex();
-		
-		
-	}
+        RemoteDataProvider chapterIndexProvider = new RemoteDataProvider(
+                new URL("http://gold.linkeddata.org/data/bible/chapter_index.zip"));
+        syntacticIndex = TextDocumentSyntacticIndexCreator.loadIndex(chapterIndexProvider.getLocalDirectory());
+    }
 	
 	private Set<TextDocument> createDocuments(){
 		Set<TextDocument> documents = new HashSet<TextDocument>();
@@ -100,10 +93,27 @@
 		}
 		return documents;
 	}
-	
-	
 
-	/**
+    private Set<TextDocument> createBibleDocuments() throws IOException {
+        Set<TextDocument> documents = new HashSet<TextDocument>();
+        RemoteDataProvider bibleByChapter = new RemoteDataProvider(
+                new URL("http://gold.linkeddata.org/data/bible/split_by_chapter.zip"));
+        File folder = bibleByChapter.getLocalDirectory();
+        for (File file  : folder.listFiles()) {
+            if(!file.isDirectory() && !file.isHidden()){
+                try {
+                    String text = Files.toString(file, Charsets.UTF_8);
+                    documents.add(new TextDocument(text));
+                } catch (IOException e) {
+                    e.printStackTrace();
+                }
+            }
+        }
+        return documents;
+    }
+
+
+    /**
 	 * @throws java.lang.Exception
 	 */
 	@Before
@@ -153,7 +163,7 @@
 		lp.init();
 		
 		semanticIndex = new SimpleSemanticIndex(ontology, syntacticIndex);
-		semanticIndex.buildIndex(createDocuments());
+		semanticIndex.buildIndex(createBibleDocuments());
 		
 		relevance = new PMIRelevanceMetric(semanticIndex);
 		
@@ -209,10 +219,10 @@
 		ClassLearningProblem lp = new ClassLearningProblem(reasoner);
 		lp.setClassToDescribe(cls);
 		lp.init();
+
+        semanticIndex = new SimpleSemanticIndex(ontology, syntacticIndex, false);
+		semanticIndex.buildIndex(createBibleDocuments());
 		
-		semanticIndex = new SimpleSemanticIndex(ontology, syntacticIndex, false);
-		semanticIndex.buildIndex(createDocuments());
-		
 		relevance = new PMIRelevanceMetric(semanticIndex);
 		
 		Map<Entity, Double> entityRelevance = RelevanceUtils.getRelevantEntities(cls, ontology, relevance);

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[4131] trunk/components-core/src/main/java/org/ dllearner/algorithms/isle/index/RemoteDataProvider.java

From: <dfl...@us...> - 2013-10-29 08:56:00

Revision: 4131
          http://sourceforge.net/p/dl-learner/code/4131
Author:   dfleischhacker
Date:     2013-10-29 08:55:58 +0000 (Tue, 29 Oct 2013)
Log Message:
-----------
Add RemoteDataProvider for downloading zipped data for local access

Added Paths:
-----------
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/RemoteDataProvider.java

Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/RemoteDataProvider.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/RemoteDataProvider.java	                        (rev 0)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/RemoteDataProvider.java	2013-10-29 08:55:58 UTC (rev 4131)
@@ -0,0 +1,203 @@
+package org.dllearner.algorithms.isle.index;
+
+import org.apache.commons.codec.digest.DigestUtils;
+import org.slf4j.Logger;
+import org.springframework.util.FileSystemUtils;
+
+import java.io.*;
+import java.net.URL;
+import java.net.URLConnection;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipInputStream;
+
+/**
+ * Provides methods to download zipped zipped files from remote locations and extracts and stores them locally.
+ * @author Daniel Fleischhacker
+ */
+public class RemoteDataProvider {
+    private final static Logger log = org.slf4j.LoggerFactory.getLogger(RemoteDataProvider.class);
+
+    public static String DATA_DIRECTORY = "tmp/";
+    private URL url;
+    private File localDirectory;
+
+    private File lastModifiedCache;
+
+    /**
+     * Initializes this downloader to fetch data from the given URL. The download process is started
+     * immediately.
+     * @param url URL to download data from
+     * @throws IOException on errors downloading or extracting the file
+     */
+    public RemoteDataProvider(URL url) throws IOException {
+        this.url = url;
+
+        log.debug("Initializing for URL '{}'", url);
+
+        log.debug("Data directory is '{}'", DATA_DIRECTORY);
+        File dataDir = new File(DATA_DIRECTORY);
+        if (!dataDir.exists()) {
+            log.debug("Data directory not yet existing, trying to create");
+            if (!dataDir.mkdirs()) {
+                throw new RuntimeException(
+                        "Unable to create temporary file directory: " + dataDir.getAbsoluteFile());
+            }
+        }
+
+        this.localDirectory = new File(DATA_DIRECTORY + DigestUtils.md5Hex(url.toString()));
+        log.debug("'{}' --> '{}'", url, localDirectory.getAbsolutePath());
+        this.lastModifiedCache = new File(DATA_DIRECTORY + DigestUtils.md5Hex(url.toString()) + ".last");
+
+        downloadData();
+    }
+
+    /**
+     * Downloads the file from the URL assigned to this RemoteDataProvider and extracts it into
+     * the tmp subdirectory of the current working directory. The actual path to access the data
+     * can be retrieved using {@link #getLocalDirectory()}.
+     *
+     * @throws IOException on errors downloading or extracting the file
+     */
+    private void downloadData() throws IOException {
+        String localModified = getLocalLastModified();
+
+        log.debug("Local last modified: {}", localModified);
+        boolean triggerDownload = false;
+
+        if (localModified == null) {
+            log.debug("No local last modified date found, triggering download");
+            triggerDownload = true;
+        }
+        else {
+            URLConnection conn = url.openConnection();
+            long lastModified = conn.getLastModified();
+            log.debug("Remote last modified: {}", lastModified);
+            if (!Long.valueOf(localModified).equals(lastModified)) {
+                log.debug("Last modified dates do not match, triggering download");
+                triggerDownload = true;
+            }
+        }
+
+        if (triggerDownload) {
+            deleteData();
+            if (!this.localDirectory.mkdir()) {
+                throw new RuntimeException(
+                        "Unable to create temporary file directory: " + localDirectory.getAbsoluteFile());
+            }
+            ZipInputStream zin = new ZipInputStream(this.url.openStream());
+
+            ZipEntry ze;
+            byte[] buffer = new byte[2048];
+            while ((ze = zin.getNextEntry()) != null) {
+                File outpath = new File(localDirectory.getAbsolutePath() + "/" + ze.getName());
+                if (!outpath.getParentFile().exists()) {
+                    outpath.getParentFile().mkdirs();
+                }
+                if (ze.isDirectory()) {
+                    outpath.mkdirs();
+                }
+                else {
+                    FileOutputStream output = null;
+                    try {
+                        output = new FileOutputStream(outpath);
+                        int len = 0;
+                        while ((len = zin.read(buffer)) > 0) {
+                            output.write(buffer, 0, len);
+                        }
+                    }
+                    finally {
+                        if (output != null) {
+                            output.close();
+                        }
+                    }
+                }
+            }
+            zin.close();
+
+            BufferedWriter writer = new BufferedWriter(new FileWriter(lastModifiedCache));
+            long lastModified = url.openConnection().getLastModified();
+            log.debug("Writing local last modified date: '{}'", lastModified);
+            writer.write(String.valueOf(lastModified));
+            writer.close();
+        }
+        else {
+            log.debug("Local data is up to date, skipping download");
+        }
+    }
+
+    /**
+     * Forces a redownload of the data. The data directory is first deleted and then recreated.
+     */
+    public void redownload() throws IOException {
+        deleteData();
+        downloadData();
+    }
+
+    /**
+     * Deletes the data downloaded.
+     */
+    public void deleteData() {
+        FileSystemUtils.deleteRecursively(localDirectory);
+        lastModifiedCache.delete();
+    }
+
+    /**
+     * Returns the folder to access the downloaded data. The returned File object points to the directory
+     * created for the downloaded data.
+     * @return file pointing to the downloaded data's directory
+     */
+    public File getLocalDirectory() {
+        return localDirectory;
+    }
+
+    /**
+     * Returns the URL assigned to this RemoteDataProvider
+     * @return the URL assigned to this downloader
+     */
+    public URL getUrl() {
+        return url;
+    }
+
+    /**
+     * Returns the content of the local last modified cache for this URL. If no such file exists, null is returned
+     * @return content of local last modified cache, if not existing null
+     */
+    private String getLocalLastModified() {
+        if (!lastModifiedCache.exists()) {
+            return null;
+        }
+        String res;
+        BufferedReader reader = null;
+        try {
+            reader = new BufferedReader(new FileReader(lastModifiedCache));
+            res = reader.readLine();
+            reader.close();
+            return res;
+        }
+        catch (FileNotFoundException e) {
+            return null;
+        }
+        catch (IOException e) {
+            return null;
+        }
+        finally {
+            if (reader != null) {
+                try {
+                    reader.close();
+                }
+                catch (IOException e) {
+                    log.error("Unable to close last modified cache property", e);
+                }
+            }
+        }
+    }
+
+    public static void main(String[] args) throws IOException {
+        RemoteDataProvider rid = new RemoteDataProvider(
+                new URL("http://gold.linkeddata.org/data/bible/verse_index.zip"));
+        System.out.println(rid.getLocalDirectory().getAbsolutePath());
+        RemoteDataProvider rid2 = new RemoteDataProvider(
+                new URL("http://gold.linkeddata.org/data/bible/chapter_index.zip"));
+        System.out.println(rid2.getLocalDirectory().getAbsolutePath());
+    }
+}

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[4130] trunk/components-core/src/main/java/org/ dllearner/algorithms/isle/index

From: <dfl...@us...> - 2013-10-24 13:56:30

Revision: 4130
          http://sourceforge.net/p/dl-learner/code/4130
Author:   dfleischhacker
Date:     2013-10-24 13:56:26 +0000 (Thu, 24 Oct 2013)
Log Message:
-----------
Get the ISLE pipeline working

* Ability to resolve match in trie to the producing string (the pre-wordnet one)
* Add NormalizedTextMapper for mapping normalized words to their original documents
* Activate structure based WSD

Modified Paths:
--------------
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/EntityCandidatesTrie.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieLinguisticAnnotator.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java

Added Paths:
-----------
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/FullTokenEntitySetPair.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/NormalizedTextMapper.java

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/EntityCandidatesTrie.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/EntityCandidatesTrie.java	2013-10-24 13:47:58 UTC (rev 4129)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/EntityCandidatesTrie.java	2013-10-24 13:56:26 UTC (rev 4130)
@@ -1,11 +1,9 @@
 package org.dllearner.algorithms.isle.index;
 
-import java.util.Map.Entry;
+import org.dllearner.core.owl.Entity;
+
 import java.util.Set;
 
-import org.dllearner.core.owl.Entity;
-import org.dllearner.utilities.datastructures.PrefixTrie;
-
 public interface EntityCandidatesTrie {
 	
 	/**
@@ -22,14 +20,22 @@
 	 * @return
 	 */
 	public Set<Entity> getCandidateEntities(String s);
-	
-	
+
+
 	/**
-	 * Gets the longest matching string
-	 * @param s
-	 * @return
+	 * Returns the string on which this entry is based on. This is used e.g. for storing the original
+     * ontology string when the parameter string has been added to the trie after generation by using
+     * WordNet or other additional methods.
+     *
+	 * @param s the string to search in the trie
+	 * @return string generating the path of the longest match in the trie
 	 */
-	public String getLongestMatch(String s);
-	
-	
+	public String getGeneratingStringForLongestMatch(String s);
+
+    /**
+     * Gets the longest matching string
+     * @param s
+     * @return
+     */
+    public String getLongestMatchingText(String s);
 }

Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/FullTokenEntitySetPair.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/FullTokenEntitySetPair.java	                        (rev 0)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/FullTokenEntitySetPair.java	2013-10-24 13:56:26 UTC (rev 4130)
@@ -0,0 +1,31 @@
+package org.dllearner.algorithms.isle.index;
+
+import org.dllearner.core.owl.Entity;
+
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * A pair consisting of a full string token and the corresponding entities
+ */
+public class FullTokenEntitySetPair {
+    private String fullToken;
+    private Set<Entity> entitySet;
+
+    public FullTokenEntitySetPair(String fullToken) {
+        this.fullToken = fullToken;
+        this.entitySet = new HashSet<Entity>();
+    }
+
+    public String getFullToken() {
+        return fullToken;
+    }
+
+    public Set<Entity> getEntitySet() {
+        return entitySet;
+    }
+
+    public void addEntity(Entity entity) {
+        entitySet.add(entity);
+    }
+}

Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/NormalizedTextMapper.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/NormalizedTextMapper.java	                        (rev 0)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/NormalizedTextMapper.java	2013-10-24 13:56:26 UTC (rev 4130)
@@ -0,0 +1,140 @@
+package org.dllearner.algorithms.isle.index;
+
+import java.util.ArrayList;
+
+/**
+ * Provides text normalization and mapping of normalized ranges to the original ones.
+ */
+public class NormalizedTextMapper {
+    private Document originalDocument;
+    private String originalText;
+    private String normalizedText;
+
+    private ArrayList<OccurenceMappingPair> normalizedIndexToOriginalIndex;
+
+    public NormalizedTextMapper(Document original) {
+        this.originalDocument = original;
+        this.originalText = original.getContent();
+        this.normalizedIndexToOriginalIndex = new ArrayList<OccurenceMappingPair>();
+
+        StringBuilder sb = new StringBuilder();
+        int currentOriginalIndex = 0;
+        for (String originalWord : originalText.split(" ")) {
+            String normalizedWord = getNormalizedWord(originalWord);
+            normalizedIndexToOriginalIndex
+                    .add(new OccurenceMappingPair(currentOriginalIndex, originalWord.length(), sb.length(),
+                            normalizedWord.length()));
+            currentOriginalIndex += originalWord.length() + 1;
+            sb.append(normalizedWord);
+            sb.append(" ");
+        }
+        normalizedText = sb.toString();
+    }
+
+    public String getOriginalText() {
+        return originalText;
+    }
+
+    public String getNormalizedText() {
+        return normalizedText;
+    }
+
+    /**
+     * Returns the annotation for the original text matching the given position and length in the normalized
+     * text.
+     *
+     * @param position   position in the normalized text to get annotation for
+     * @param length length of the text to get annotation for
+     * @return
+     */
+    public Annotation getOriginalAnnotationForPosition(int position, int length) {
+        int curNormalizedLength = 0;
+        int originalStart = -1;
+        int curOriginalLength = 0;
+
+        for (OccurenceMappingPair p : normalizedIndexToOriginalIndex) {
+            if (p.getNormalizedIndex() == position) {
+                originalStart = p.getOriginalIndex();
+            }
+            if (originalStart != -1) {
+                curNormalizedLength += p.getNormalizedLength();
+                curOriginalLength += p.getOriginalLength();
+                if (curNormalizedLength >= length) {
+                    return new Annotation(originalDocument, originalStart, curOriginalLength);
+                }
+
+                // include space
+                curNormalizedLength += 1;
+                curOriginalLength += 1;
+            }
+        }
+
+        return null;
+    }
+
+    /**
+     * Returns the normalized form of the given word. Word must not contain any spaces or the like.
+     * @param word
+     * @return
+     */
+    private String getNormalizedWord(String word) {
+        return LinguisticUtil.getInstance().getNormalizedForm(word);
+    }
+
+    public static void main(String[] args) {
+        NormalizedTextMapper n = new NormalizedTextMapper(new TextDocument("This is a testing text using letters"));
+        System.out.println(n.getOriginalText());
+        System.out.println(n.getNormalizedText());
+        for (OccurenceMappingPair p : n.normalizedIndexToOriginalIndex) {
+            System.out.println(p);
+        }
+        System.out.println(n.getOriginalAnnotationForPosition(7,6));
+        System.out.println(n.getOriginalAnnotationForPosition(23,6));
+        System.out.println(n.getOriginalAnnotationForPosition(7,1));
+        System.out.println(n.getOriginalAnnotationForPosition(14,15));
+    }
+
+    /**
+     * Maps words identified by index and length in the normalized texts to the original word.
+     */
+    private class OccurenceMappingPair {
+        private int originalIndex;
+        private int originalLength;
+        private int normalizedIndex;
+        private int normalizedLength;
+
+        private OccurenceMappingPair(int originalIndex, int originalLength, int normalizedIndex, int normalizedLength) {
+
+            this.originalIndex = originalIndex;
+            this.originalLength = originalLength;
+            this.normalizedIndex = normalizedIndex;
+            this.normalizedLength = normalizedLength;
+        }
+
+        private int getNormalizedIndex() {
+            return normalizedIndex;
+        }
+
+        private int getNormalizedLength() {
+            return normalizedLength;
+        }
+
+        private int getOriginalLength() {
+            return originalLength;
+        }
+
+        private int getOriginalIndex() {
+            return originalIndex;
+        }
+
+        @Override
+        public String toString() {
+            return "OccurenceMappingPair{" +
+                    "originalIndex=" + originalIndex +
+                    ", originalLength=" + originalLength +
+                    ", normalizedIndex=" + normalizedIndex +
+                    ", normalizedLength=" + normalizedLength +
+                    '}';
+        }
+    }
+}

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java	2013-10-24 13:47:58 UTC (rev 4129)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java	2013-10-24 13:56:26 UTC (rev 4130)
@@ -10,7 +10,7 @@
 
 public class SimpleEntityCandidatesTrie implements EntityCandidatesTrie {
 
-	PrefixTrie<Set<Entity>> trie;
+	PrefixTrie<FullTokenEntitySetPair> trie;
 	EntityTextRetriever entityTextRetriever;
 
 //    /**
@@ -39,7 +39,7 @@
     }
 	
 	public void buildTrie(OWLOntology ontology, NameGenerator nameGenerator) {
-		this.trie = new PrefixTrie<Set<Entity>>();
+		this.trie = new PrefixTrie<FullTokenEntitySetPair>();
 		Map<Entity, Set<String>> relevantText = entityTextRetriever.getRelevantText(ontology);
 		
 		for (Entity entity : relevantText.keySet()) {
@@ -55,7 +55,7 @@
                 addSubsequencesWordNet(entity, text);
                 
                 for (String alternativeText : nameGenerator.getAlternativeText(text)) {
-                    addEntry(alternativeText, entity);
+                    addEntry(alternativeText, entity, text);
                 }
             }
         }
@@ -136,37 +136,55 @@
 
     @Override
 	public void addEntry(String s, Entity e) {
-		Set<Entity> candidates;
+		FullTokenEntitySetPair candidates;
 		if (trie.contains(s)) 
 			candidates = trie.get(s);
 		else
-			candidates = new HashSet<Entity>();
+			candidates = new FullTokenEntitySetPair(s);
 		
-		candidates.add(e);
+		candidates.addEntity(e);
 		
 		trie.put(s, candidates);
 	}
 
+    public void addEntry(String s, Entity e, String originalString) {
+        FullTokenEntitySetPair candidates;
+        if (trie.contains(s))
+            candidates = trie.get(s);
+        else
+            candidates = new FullTokenEntitySetPair(originalString);
+
+        candidates.addEntity(e);
+
+        trie.put(s, candidates);
+    }
+
 	@Override
 	public Set<Entity> getCandidateEntities(String s) {
-        Set<Entity> res = trie.get(s);
-		return res == null ? new HashSet<Entity>() : trie.get(s);
+        FullTokenEntitySetPair res = trie.get(s);
+		return res == null ? new HashSet<Entity>() : trie.get(s).getEntitySet();
 	}
 
 	@Override
-	public String getLongestMatch(String s) {
+	public String getGeneratingStringForLongestMatch(String s) {
 		CharSequence match = trie.getLongestMatch(s);
-		return (match!=null) ? match.toString() : null;
+		return (match!=null) ? trie.get(match).getFullToken() : null;
 	}
+
+    @Override
+    public String getLongestMatchingText(String s) {
+        CharSequence match = trie.getLongestMatch(s);
+        return (match!=null) ? match.toString() : null;
+    }
 	
 	public String toString() {
 		String output = "";
-		Map<String,Set<Entity>> trieMap = trie.toMap();
+		Map<String,FullTokenEntitySetPair> trieMap = trie.toMap();
 		List<String> termsList = new ArrayList<String>(trieMap.keySet());
 		Collections.sort(termsList);
 		for (String key : termsList) {
 			output += key + ":\n";
-			for (Entity candidate: trieMap.get(key)) {
+			for (Entity candidate: trieMap.get(key).getEntitySet()) {
 				output += "\t"+candidate+"\n";
 			}
 		}

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieLinguisticAnnotator.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieLinguisticAnnotator.java	2013-10-24 13:47:58 UTC (rev 4129)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieLinguisticAnnotator.java	2013-10-24 13:56:26 UTC (rev 4130)
@@ -25,24 +25,23 @@
      */
     @Override
     public Set<Annotation> annotate(Document document) {
-        String content = document.getContent();
         Set<Annotation> annotations = new HashSet<Annotation>();
+        NormalizedTextMapper mapper = new NormalizedTextMapper(document);
+        String content = mapper.getNormalizedText();
         for (int i = 0; i < content.length(); i++) {
             if (Character.isWhitespace(content.charAt(i))) {
                 continue;
             }
             String unparsed = content.substring(i);
-            if (normalizeWords) {
-                unparsed = LinguisticUtil.getInstance().getNormalizedForm(unparsed);
-            }
-            String match = candidatesTrie.getLongestMatch(unparsed);
+            String match = candidatesTrie.getLongestMatchingText(unparsed);
             if (match != null && !match.isEmpty()) {
-
-                //TODO: here we are losing the original offset and index...
-                Annotation annotation = new Annotation(document, i, match.length());
+                Annotation annotation = mapper.getOriginalAnnotationForPosition(i, match.length());
                 annotations.add(annotation);
                 i += match.length() - 1;
             }
+            while (!Character.isWhitespace(content.charAt(i)) && i < content.length()) {
+                i++;
+            }
         }
         return annotations;
     }

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java	2013-10-24 13:47:58 UTC (rev 4129)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java	2013-10-24 13:56:26 UTC (rev 4130)
@@ -10,7 +10,8 @@
 import org.dllearner.algorithms.isle.index.semantic.SemanticIndex;
 import org.dllearner.algorithms.isle.index.syntactic.SyntacticIndex;
 import org.dllearner.algorithms.isle.textretrieval.RDFSLabelEntityTextRetriever;
-import org.dllearner.algorithms.isle.wsd.SimpleWordSenseDisambiguation;
+import org.dllearner.algorithms.isle.wsd.StructureBasedWordSenseDisambiguation;
+import org.dllearner.algorithms.isle.wsd.WindowBasedContextExtractor;
 import org.semanticweb.owlapi.model.OWLOntology;
 
 /**
@@ -56,7 +57,7 @@
         TrieLinguisticAnnotator linguisticAnnotator = new TrieLinguisticAnnotator(trie);
         linguisticAnnotator.setNormalizeWords(useWordNormalization);
         setSemanticAnnotator(new SemanticAnnotator(
-                new SimpleWordSenseDisambiguation(ontology),
+                new StructureBasedWordSenseDisambiguation(new WindowBasedContextExtractor(), ontology),
                 new TrieEntityCandidateGenerator(ontology, trie),
                 linguisticAnnotator));
 

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[4129] trunk/components-core/src/main/java/org/ dllearner/algorithms/isle/index/TextDocument.java

From: <dfl...@us...> - 2013-10-24 13:48:02

Revision: 4129
          http://sourceforge.net/p/dl-learner/code/4129
Author:   dfleischhacker
Date:     2013-10-24 13:47:58 +0000 (Thu, 24 Oct 2013)
Log Message:
-----------
Fix wrong cleaning in TextDocument

Modified Paths:
--------------
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TextDocument.java

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TextDocument.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TextDocument.java	2013-10-24 13:47:14 UTC (rev 4128)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TextDocument.java	2013-10-24 13:47:58 UTC (rev 4129)
@@ -21,7 +21,7 @@
         this.content = content.toLowerCase();
         this.content = this.content.replaceAll("[^a-z ]", " ");
         this.content = this.content.replaceAll("\\s{2,}", " ");
-        this.content = content.trim();
+        this.content = this.content.trim();
     }
 
     @Override

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[4128] trunk/components-core/src/main/java/org/ dllearner/algorithms/isle/VSMCosineDocumentSimilarity.java

From: <dfl...@us...> - 2013-10-24 13:47:17

Revision: 4128
          http://sourceforge.net/p/dl-learner/code/4128
Author:   dfleischhacker
Date:     2013-10-24 13:47:14 +0000 (Thu, 24 Oct 2013)
Log Message:
-----------
Fix NPE in VSMCosineDocumentSimilarity by returning empty set if unable to produce a vector

Modified Paths:
--------------
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/VSMCosineDocumentSimilarity.java

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/VSMCosineDocumentSimilarity.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/VSMCosineDocumentSimilarity.java	2013-10-24 13:40:06 UTC (rev 4127)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/VSMCosineDocumentSimilarity.java	2013-10-24 13:47:14 UTC (rev 4128)
@@ -182,6 +182,10 @@
     private Map<String, Double> getTermWeights(IndexReader reader, int docId)
             throws IOException {
         Terms vector = reader.getTermVector(docId, CONTENT);
+        //TODO: not sure if this is reasonable but it prevents NPEs
+        if (vector == null) {
+            return new HashMap<String, Double>();
+        }
         TermsEnum termsEnum = vector.iterator(null);
         Map<String, Double> weights = new HashMap<String, Double>();
         BytesRef text = null;

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[4127] trunk/components-core/src/main/java/org/ dllearner/algorithms/isle/wsd

From: <dfl...@us...> - 2013-10-24 13:40:09

Revision: 4127
          http://sourceforge.net/p/dl-learner/code/4127
Author:   dfleischhacker
Date:     2013-10-24 13:40:06 +0000 (Thu, 24 Oct 2013)
Log Message:
-----------
Fix ContextExtractors

* Use processed content instead of raw since annotations link to the former
* Fix bug occurring for tokens at index 0

Modified Paths:
--------------
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/SentenceBasedContextExtractor.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/WindowBasedContextExtractor.java

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/SentenceBasedContextExtractor.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/SentenceBasedContextExtractor.java	2013-10-22 14:08:14 UTC (rev 4126)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/SentenceBasedContextExtractor.java	2013-10-24 13:40:06 UTC (rev 4127)
@@ -3,12 +3,6 @@
  */
 package org.dllearner.algorithms.isle.wsd;
 
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Properties;
-
-import org.dllearner.algorithms.isle.index.TextDocument;
-
 import edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation;
 import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation;
 import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation;
@@ -16,7 +10,12 @@
 import edu.stanford.nlp.pipeline.Annotation;
 import edu.stanford.nlp.pipeline.StanfordCoreNLP;
 import edu.stanford.nlp.util.CoreMap;
+import org.dllearner.algorithms.isle.index.TextDocument;
 
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Properties;
+
 /**
  * @author Lorenz Buehmann
  *
@@ -37,14 +36,14 @@
 	@Override
 	public List<String> extractContext(org.dllearner.algorithms.isle.index.Annotation annotation) {
 		//split text into sentences
-		List<CoreMap> sentences = getSentences(annotation.getReferencedDocument().getRawContent());
-		
+		List<CoreMap> sentences = getSentences(annotation.getReferencedDocument().getContent());
+
 		//find the sentence containing the token of the annotation
 		int tokenStart = annotation.getOffset();
 		int index = 0;
 		for (CoreMap sentence : sentences) {
 			String s = sentence.toString();
-			if (index < tokenStart && s.length() > tokenStart) {
+			if (index <= tokenStart && s.length() > tokenStart) {
 				List<String> context = new ArrayList<String>();
 				for (CoreLabel label : sentence.get(TokensAnnotation.class)) {
 					// this is the text of the token

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/WindowBasedContextExtractor.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/WindowBasedContextExtractor.java	2013-10-22 14:08:14 UTC (rev 4126)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/WindowBasedContextExtractor.java	2013-10-24 13:40:06 UTC (rev 4127)
@@ -3,12 +3,6 @@
  */
 package org.dllearner.algorithms.isle.wsd;
 
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Properties;
-
-import org.dllearner.algorithms.isle.index.TextDocument;
-
 import edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation;
 import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation;
 import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation;
@@ -16,7 +10,12 @@
 import edu.stanford.nlp.pipeline.Annotation;
 import edu.stanford.nlp.pipeline.StanfordCoreNLP;
 import edu.stanford.nlp.util.CoreMap;
+import org.dllearner.algorithms.isle.index.TextDocument;
 
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Properties;
+
 /**
  * @author Lorenz Buehmann
  *
@@ -43,14 +42,14 @@
 	@Override
 	public List<String> extractContext(org.dllearner.algorithms.isle.index.Annotation annotation) {
 		// split text into sentences
-		List<CoreMap> sentences = getSentences(annotation.getReferencedDocument().getRawContent());
+		List<CoreMap> sentences = getSentences(annotation.getReferencedDocument().getContent());
 
 		// find the sentence containing the token of the annotation
 		int tokenStart = annotation.getOffset();
 		int index = 0;
 		for (CoreMap sentence : sentences) {
 			String s = sentence.toString();
-			if (index < tokenStart && s.length() > tokenStart) {
+			if (index <= tokenStart && s.length() > tokenStart) {
 				List<String> context = new ArrayList<String>();
 				for (CoreLabel label : sentence.get(TokensAnnotation.class)) {
 					// this is the text of the token
@@ -62,8 +61,8 @@
 			}
 			index += s.length();
 		}
-		throw new RuntimeException("Token " + annotation.getToken() + " not found in text "
-				+ annotation.getReferencedDocument().getRawContent());
+		throw new RuntimeException("Token " + annotation + " not found in text "
+				+ annotation.getReferencedDocument().getContent());
 
 	}
 	

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[4126] trunk/components-core/src/main/java/org/ dllearner/algorithms/isle/index/syntactic/TextDocumentSyntacticIndexCreator .java

From: <dfl...@us...> - 2013-10-22 14:08:18

Revision: 4126
          http://sourceforge.net/p/dl-learner/code/4126
Author:   dfleischhacker
Date:     2013-10-22 14:08:14 +0000 (Tue, 22 Oct 2013)
Log Message:
-----------
Add TextDocumentSyntacticIndexCreator for creating Lucene indexes from text files

Added Paths:
-----------
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/syntactic/TextDocumentSyntacticIndexCreator.java

Copied: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/syntactic/TextDocumentSyntacticIndexCreator.java (from rev 4123, trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/syntactic/OWLOntologyLuceneSyntacticIndexCreator.java)
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/syntactic/TextDocumentSyntacticIndexCreator.java	                        (rev 0)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/syntactic/TextDocumentSyntacticIndexCreator.java	2013-10-22 14:08:14 UTC (rev 4126)
@@ -0,0 +1,93 @@
+/**
+ * 
+ */
+package org.dllearner.algorithms.isle.index.syntactic;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.FieldType;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.SimpleFSDirectory;
+import org.apache.lucene.util.Version;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * Creates a syntactic index from text files stored on disk
+ *
+ */
+public class TextDocumentSyntacticIndexCreator {
+
+	private Directory indexDirectory;
+    private final File inputDirectory;
+    private final static String searchField = "text";
+
+    public TextDocumentSyntacticIndexCreator(File inputDirectory, File indexDirectory)
+            throws IOException {
+        this.indexDirectory = new SimpleFSDirectory(indexDirectory);
+        this.inputDirectory = inputDirectory;
+    }
+
+    public SyntacticIndex buildIndex() throws Exception{
+		Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43);
+		IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_43, analyzer);
+		IndexWriter writer = new IndexWriter(indexDirectory, indexWriterConfig);
+		System.out.println( "Creating index ..." );
+
+        Set<org.apache.lucene.document.Document> luceneDocuments = new HashSet<org.apache.lucene.document.Document>();
+        FieldType stringType = new FieldType(StringField.TYPE_STORED);
+        stringType.setStoreTermVectors(false);
+        FieldType textType = new FieldType(TextField.TYPE_STORED);
+        textType.setStoreTermVectors(false);
+		
+		for (File f : inputDirectory.listFiles()) {
+            if (!f.getName().endsWith(".txt")) {
+                continue;
+            }
+            org.apache.lucene.document.Document luceneDocument = new org.apache.lucene.document.Document();
+            luceneDocument.add(new Field("uri", f.toURI().toString(), stringType));
+
+            StringBuilder content = new StringBuilder();
+            BufferedReader reader = new BufferedReader(new FileReader(f));
+
+            String line;
+            while ((line = reader.readLine()) != null) {
+                content.append(line);
+                content.append("\n");
+            }
+            reader.close();
+
+            luceneDocument.add(new Field(searchField, content.toString(), textType));
+            luceneDocuments.add(luceneDocument);
+        }
+        writer.addDocuments(luceneDocuments);
+		
+		System.out.println("Done.");
+		writer.close();
+		
+		return new LuceneSyntacticIndex(indexDirectory, searchField);
+	}
+
+    public static SyntacticIndex loadIndex(File indexDirectory) throws Exception {
+        return new LuceneSyntacticIndex(new SimpleFSDirectory(indexDirectory), searchField);
+    }
+
+    public static void main(String[] args) throws Exception {
+        if (args.length != 2) {
+            System.err.println("Usage: <input director> <index directory>");
+            System.exit(1);
+            return;
+        }
+        new TextDocumentSyntacticIndexCreator(new File(args[0]), new File(args[1])).buildIndex();
+    }
+}

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[4125] trunk/components-core/src/test/java/org/ dllearner/algorithms/isle/ISLETestCorpus.java

From: <jen...@us...> - 2013-10-22 12:35:02

Revision: 4125
          http://sourceforge.net/p/dl-learner/code/4125
Author:   jenslehmann
Date:     2013-10-22 12:34:58 +0000 (Tue, 22 Oct 2013)
Log Message:
-----------
small ISLE test case changes

Modified Paths:
--------------
    trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETestCorpus.java

Modified: trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETestCorpus.java
===================================================================
--- trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETestCorpus.java	2013-10-22 12:07:35 UTC (rev 4124)
+++ trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETestCorpus.java	2013-10-22 12:34:58 UTC (rev 4125)
@@ -68,8 +68,10 @@
 	// we assume that the ontology is named "ontology.owl" and that all text files
 	// are in a subdirectory called "corpus"
 	private String testFolder = "../test/isle/swore/";
-//	NamedClass cls = new NamedClass("http://example.com/father#father");
-	NamedClass cls = new NamedClass("http://ns.softwiki.de/req/CustomerRequirement");
+	private NamedClass cls = new NamedClass("http://ns.softwiki.de/req/CustomerRequirement");
+//	private String testFolder = "../test/isle/father/";
+//	private NamedClass cls = new NamedClass("http://example.com/father#father");
+
 	
 	/**
 	 * 

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[4124] trunk

From: <jen...@us...> - 2013-10-22 12:07:38

Revision: 4124
          http://sourceforge.net/p/dl-learner/code/4124
Author:   jenslehmann
Date:     2013-10-22 12:07:35 +0000 (Tue, 22 Oct 2013)
Log Message:
-----------
unit test for ISLE without external corpus

Modified Paths:
--------------
    trunk/test/isle/swore/ontology.owl

Added Paths:
-----------
    trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETestNoCorpus.java

Added: trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETestNoCorpus.java
===================================================================
--- trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETestNoCorpus.java	                        (rev 0)
+++ trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETestNoCorpus.java	2013-10-22 12:07:35 UTC (rev 4124)
@@ -0,0 +1,87 @@
+package org.dllearner.algorithms.isle;
+
+import java.io.File;
+import java.util.Map;
+
+import org.dllearner.algorithms.isle.index.semantic.SemanticIndex;
+import org.dllearner.algorithms.isle.index.semantic.simple.SimpleSemanticIndex;
+import org.dllearner.algorithms.isle.index.syntactic.OWLOntologyLuceneSyntacticIndexCreator;
+import org.dllearner.algorithms.isle.index.syntactic.SyntacticIndex;
+import org.dllearner.algorithms.isle.metrics.PMIRelevanceMetric;
+import org.dllearner.algorithms.isle.metrics.RelevanceMetric;
+import org.dllearner.algorithms.isle.metrics.RelevanceUtils;
+import org.dllearner.algorithms.isle.textretrieval.EntityTextRetriever;
+import org.dllearner.algorithms.isle.textretrieval.RDFSLabelEntityTextRetriever;
+import org.dllearner.core.AbstractReasonerComponent;
+import org.dllearner.core.KnowledgeSource;
+import org.dllearner.core.owl.Entity;
+import org.dllearner.core.owl.NamedClass;
+import org.dllearner.kb.OWLAPIOntology;
+import org.dllearner.learningproblems.ClassLearningProblem;
+import org.dllearner.reasoning.FastInstanceChecker;
+import org.junit.Test;
+import org.semanticweb.owlapi.apibinding.OWLManager;
+import org.semanticweb.owlapi.model.OWLDataFactory;
+import org.semanticweb.owlapi.model.OWLOntology;
+import org.semanticweb.owlapi.model.OWLOntologyManager;
+import org.semanticweb.owlapi.vocab.OWLRDFVocabulary;
+
+import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl;
+
+public class ISLETestNoCorpus {
+
+	private OWLOntologyManager manager;
+	private OWLOntology ontology;
+	private OWLDataFactory df = new OWLDataFactoryImpl();
+	private EntityTextRetriever textRetriever;
+	private RelevanceMetric relevance;
+	private String searchField = "label";
+	private SemanticIndex semanticIndex;
+	private SyntacticIndex syntacticIndex;
+	
+	// we assume that the ontology is named "ontology.owl" and that all text files
+	// are in a subdirectory called "corpus"
+	private String testFolder = "../test/isle/swore/";
+//	NamedClass cls = new NamedClass("http://example.com/father#father");
+	NamedClass cls = new NamedClass("http://ns.softwiki.de/req/CustomerRequirement");
+	
+	public ISLETestNoCorpus() throws Exception{
+		manager = OWLManager.createOWLOntologyManager();
+		ontology = manager.loadOntologyFromOntologyDocument(new File(testFolder + "ontology.owl"));
+		textRetriever = new RDFSLabelEntityTextRetriever(ontology);
+		syntacticIndex = new OWLOntologyLuceneSyntacticIndexCreator(ontology, df.getRDFSLabel(), searchField).buildIndex();
+		
+		
+	}	
+	
+	// uses the rdfs:label, rdfs:comment (or other properties) of the class directly instead of an external corpus
+	@Test
+	public void testISLENoCorpus() throws Exception {
+		KnowledgeSource ks = new OWLAPIOntology(ontology);
+		AbstractReasonerComponent reasoner = new FastInstanceChecker(ks);
+		reasoner.init();
+		
+		ClassLearningProblem lp = new ClassLearningProblem(reasoner);
+		lp.setClassToDescribe(cls);
+		lp.init();
+		
+		semanticIndex = new SimpleSemanticIndex(ontology, syntacticIndex);
+		semanticIndex.buildIndex(df.getOWLAnnotationProperty(OWLRDFVocabulary.RDFS_COMMENT.getIRI()), "en");
+		
+//		semanticIndex = new SimpleSemanticIndex(ontology, syntacticIndex);
+//		semanticIndex.buildIndex(createDocuments());
+		
+		relevance = new PMIRelevanceMetric(semanticIndex);
+		
+		Map<Entity, Double> entityRelevance = RelevanceUtils.getRelevantEntities(cls, ontology, relevance);
+		NLPHeuristic heuristic = new NLPHeuristic(entityRelevance);
+		
+		ISLE isle = new ISLE(lp, reasoner);
+		isle.setHeuristic(heuristic);
+		isle.init();
+		
+		isle.start();
+	}
+
+	
+}

Modified: trunk/test/isle/swore/ontology.owl
===================================================================
--- trunk/test/isle/swore/ontology.owl	2013-10-15 14:46:35 UTC (rev 4123)
+++ trunk/test/isle/swore/ontology.owl	2013-10-22 12:07:35 UTC (rev 4124)
@@ -669,7 +669,7 @@
     <!-- http://ns.softwiki.de/req/CustomerRequirement -->
 
     <owl:Class rdf:about="&req;CustomerRequirement">
-        <rdfs:label>customer requirement</rdfs:label>
+        <rdfs:label>customer requirement (a requirement created by a customer)</rdfs:label>
         <owl:equivalentClass>
             <owl:Class>
                 <owl:intersectionOf rdf:parseType="Collection">

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[4123] trunk

From: <jen...@us...> - 2013-10-15 14:46:40

Revision: 4123
          http://sourceforge.net/p/dl-learner/code/4123
Author:   jenslehmann
Date:     2013-10-15 14:46:35 +0000 (Tue, 15 Oct 2013)
Log Message:
-----------
added new unit test for the ISLE case without external corpus

Modified Paths:
--------------
    trunk/components-core/src/main/java/org/dllearner/algorithms/celoe/OENode.java

Added Paths:
-----------
    trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETestCorpus.java
    trunk/test/isle/swore/ontology_with_comments.owl

Removed Paths:
-------------
    trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/celoe/OENode.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/celoe/OENode.java	2013-10-15 12:06:45 UTC (rev 4122)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/celoe/OENode.java	2013-10-15 14:46:35 UTC (rev 4123)
@@ -24,6 +24,7 @@
 import java.util.List;
 
 import org.dllearner.algorithms.SearchTreeNode;
+import org.dllearner.algorithms.isle.NLPHeuristic;
 import org.dllearner.core.owl.Description;
 
 /**
@@ -119,6 +120,7 @@
 	
 	public String getShortDescription(String baseURI) {
 		String ret = description.toString(baseURI,null) + " [";
+//		ret += "score" + NLPHeuristic.getNodeScore(this) + ",";
 		ret += "acc:" + dfPercent.format(accuracy) + ", ";
 		ret += "he:" + horizontalExpansion + ", ";
 		ret += "c:" + children.size() + ", ";

Deleted: trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java
===================================================================
--- trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java	2013-10-15 12:06:45 UTC (rev 4122)
+++ trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java	2013-10-15 14:46:35 UTC (rev 4123)
@@ -1,264 +0,0 @@
-/**
- * 
- */
-package org.dllearner.algorithms.isle;
-
-import com.google.common.base.Charsets;
-import com.google.common.base.Joiner;
-import com.google.common.io.Files;
-
-import org.dllearner.algorithms.celoe.CELOE;
-import org.dllearner.algorithms.isle.index.*;
-import org.dllearner.algorithms.isle.index.semantic.SemanticIndex;
-import org.dllearner.algorithms.isle.index.semantic.simple.SimpleSemanticIndex;
-import org.dllearner.algorithms.isle.index.syntactic.OWLOntologyLuceneSyntacticIndexCreator;
-import org.dllearner.algorithms.isle.index.syntactic.SyntacticIndex;
-import org.dllearner.algorithms.isle.metrics.PMIRelevanceMetric;
-import org.dllearner.algorithms.isle.metrics.RelevanceMetric;
-import org.dllearner.algorithms.isle.metrics.RelevanceUtils;
-import org.dllearner.algorithms.isle.textretrieval.EntityTextRetriever;
-import org.dllearner.algorithms.isle.textretrieval.RDFSLabelEntityTextRetriever;
-import org.dllearner.algorithms.isle.wsd.SimpleWordSenseDisambiguation;
-import org.dllearner.algorithms.isle.wsd.WordSenseDisambiguation;
-import org.dllearner.core.AbstractReasonerComponent;
-import org.dllearner.core.KnowledgeSource;
-import org.dllearner.core.owl.Entity;
-import org.dllearner.core.owl.NamedClass;
-import org.dllearner.kb.OWLAPIOntology;
-import org.dllearner.learningproblems.ClassLearningProblem;
-import org.dllearner.reasoning.FastInstanceChecker;
-import org.dllearner.utilities.Helper;
-import org.junit.Before;
-import org.junit.Test;
-import org.semanticweb.owlapi.apibinding.OWLManager;
-import org.semanticweb.owlapi.model.IRI;
-import org.semanticweb.owlapi.model.OWLDataFactory;
-import org.semanticweb.owlapi.model.OWLEntity;
-import org.semanticweb.owlapi.model.OWLOntology;
-import org.semanticweb.owlapi.model.OWLOntologyManager;
-
-import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl;
-
-import java.io.File;
-import java.io.IOException;
-import java.text.DecimalFormat;
-import java.util.HashSet;
-import java.util.Map;
-import java.util.Set;
-
-/**
- * Some tests for the ISLE algorithm.
- * 
- * @author Lorenz Buehmann
- * @author Jens Lehmann
- */
-public class ISLETest {
-	
-	private OWLOntologyManager manager;
-	private OWLOntology ontology;
-	private OWLDataFactory df = new OWLDataFactoryImpl();
-	private EntityTextRetriever textRetriever;
-	private RelevanceMetric relevance;
-	private String searchField = "label";
-	private SemanticIndex semanticIndex;
-	private SyntacticIndex syntacticIndex;
-	
-	// we assume that the ontology is named "ontology.owl" and that all text files
-	// are in a subdirectory called "corpus"
-	private String testFolder = "../test/isle/swore/";
-//	NamedClass cls = new NamedClass("http://example.com/father#father");
-	NamedClass cls = new NamedClass("http://ns.softwiki.de/req/CustomerRequirement");
-	
-	/**
-	 * 
-	 */
-	public ISLETest() throws Exception{
-		manager = OWLManager.createOWLOntologyManager();
-		ontology = manager.loadOntologyFromOntologyDocument(new File(testFolder + "ontology.owl"));
-		textRetriever = new RDFSLabelEntityTextRetriever(ontology);
-		syntacticIndex = new OWLOntologyLuceneSyntacticIndexCreator(ontology, df.getRDFSLabel(), searchField).buildIndex();
-		
-		
-	}
-	
-	private Set<TextDocument> createDocuments(){
-		Set<TextDocument> documents = new HashSet<TextDocument>();
-		File folder = new File(testFolder+"corpus/");
-		for (File file  : folder.listFiles()) {
-			if(!file.isDirectory() && !file.isHidden()){
-				try {
-					String text = Files.toString(file, Charsets.UTF_8);
-					documents.add(new TextDocument(text));
-				} catch (IOException e) {
-					e.printStackTrace();
-				}
-			}
-		}
-		return documents;
-	}
-	
-	
-
-	/**
-	 * @throws java.lang.Exception
-	 */
-	@Before
-	public void setUp() throws Exception{
-		
-	}
-
-//	@Test
-	public void testTextRetrieval() {
-		System.out.println("Text for entity " + cls + ":");
-		Map<String, Double> relevantText = textRetriever.getRelevantText(cls);
-		System.out.println(Joiner.on("\n").join(relevantText.entrySet()));
-	}
-	
-//	@Test
-	public void testEntityRelevance() throws Exception {
-		System.out.println("Relevant entities for entity " + cls + ":");
-		Map<Entity, Double> entityRelevance = RelevanceUtils.getRelevantEntities(cls, ontology, relevance);
-		System.out.println(Joiner.on("\n").join(entityRelevance.entrySet()));
-	}
-	
-	@Test
-	public void testSemanticIndexAnnotationProperty(){
-		semanticIndex = new SimpleSemanticIndex(ontology, syntacticIndex);
-		semanticIndex.buildIndex(df.getRDFSLabel(), null);		
-//		NamedClass nc = new NamedClass("http://example.com/father#father");
-		Set<AnnotatedDocument> documents = semanticIndex.getDocuments(cls);
-		System.out.println("Documents for " + cls + ":\n" + documents);
-	}
-	
-	@Test
-	public void testSemanticIndexCorpus(){
-		semanticIndex = new SimpleSemanticIndex(ontology, syntacticIndex);
-		semanticIndex.buildIndex(createDocuments());
-		Set<AnnotatedDocument> documents = semanticIndex.getDocuments(cls);
-		System.out.println(documents);
-	}
-	
-	@Test
-	public void testISLE() throws Exception {
-		KnowledgeSource ks = new OWLAPIOntology(ontology);
-		AbstractReasonerComponent reasoner = new FastInstanceChecker(ks);
-		reasoner.init();
-		
-		ClassLearningProblem lp = new ClassLearningProblem(reasoner);
-		lp.setClassToDescribe(cls);
-		lp.init();
-		
-		semanticIndex = new SimpleSemanticIndex(ontology, syntacticIndex);
-		semanticIndex.buildIndex(createDocuments());
-		
-		relevance = new PMIRelevanceMetric(semanticIndex);
-		
-		Map<Entity, Double> entityRelevance = RelevanceUtils.getRelevantEntities(cls, ontology, relevance);
-		NLPHeuristic heuristic = new NLPHeuristic(entityRelevance);
-		
-		ISLE isle = new ISLE(lp, reasoner);
-		isle.setHeuristic(heuristic);
-		isle.init();
-		
-		isle.start();
-	}
-
-    @Test
-    public void testEntityLinkingWithLemmatizing() throws Exception {
-        EntityCandidatesTrie ect = new SimpleEntityCandidatesTrie(new RDFSLabelEntityTextRetriever(ontology), ontology,
-                new SimpleEntityCandidatesTrie.LemmatizingWordNetNameGenerator(5));
-        LinguisticAnnotator linguisticAnnotator = new TrieLinguisticAnnotator(ect);
-        WordSenseDisambiguation wsd = new SimpleWordSenseDisambiguation(ontology);
-        EntityCandidateGenerator ecg = new TrieEntityCandidateGenerator(ontology, ect);
-        SemanticAnnotator semanticAnnotator = new SemanticAnnotator(wsd, ecg, linguisticAnnotator);
-
-        Set<TextDocument> docs = createDocuments();
-        for (TextDocument doc : docs) {
-            AnnotatedDocument annotated = semanticAnnotator.processDocument(doc);
-            System.out.println(annotated);
-        }
-    }
-
-    @Test
-    public void testEntityLinkingWithSimpleStringMatching() throws Exception {
-        EntityCandidatesTrie ect = new SimpleEntityCandidatesTrie(new RDFSLabelEntityTextRetriever(ontology), ontology,
-                new SimpleEntityCandidatesTrie.DummyNameGenerator());
-        TrieLinguisticAnnotator linguisticAnnotator = new TrieLinguisticAnnotator(ect);
-        linguisticAnnotator.setNormalizeWords(false);
-        WordSenseDisambiguation wsd = new SimpleWordSenseDisambiguation(ontology);
-        EntityCandidateGenerator ecg = new TrieEntityCandidateGenerator(ontology, ect);
-        SemanticAnnotator semanticAnnotator = new SemanticAnnotator(wsd, ecg, linguisticAnnotator);
-
-        Set<TextDocument> docs = createDocuments();
-        for (TextDocument doc : docs) {
-            AnnotatedDocument annotated = semanticAnnotator.processDocument(doc);
-            System.out.println(annotated);
-        }
-    }
-
-	@Test
-	public void compareISLE() throws Exception {
-		KnowledgeSource ks = new OWLAPIOntology(ontology);
-		AbstractReasonerComponent reasoner = new FastInstanceChecker(ks);
-		reasoner.init();
-		
-		ClassLearningProblem lp = new ClassLearningProblem(reasoner);
-		lp.setClassToDescribe(cls);
-		lp.init();
-		
-		semanticIndex = new SimpleSemanticIndex(ontology, syntacticIndex, false);
-		semanticIndex.buildIndex(createDocuments());
-		
-		relevance = new PMIRelevanceMetric(semanticIndex);
-		
-		Map<Entity, Double> entityRelevance = RelevanceUtils.getRelevantEntities(cls, ontology, relevance);
-		NLPHeuristic heuristic = new NLPHeuristic(entityRelevance);
-		
-		// run ISLE
-		ISLE isle = new ISLE(lp, reasoner);
-		isle.setHeuristic(heuristic);
-		isle.setSearchTreeFile(testFolder + "searchTreeISLE.txt");
-		isle.setWriteSearchTree(true);
-		isle.setReplaceSearchTree(true);
-		isle.setTerminateOnNoiseReached(true);
-		isle.init();
-		isle.start();
-		
-		// run standard CELOE as reference
-		CELOE celoe = new CELOE(lp, reasoner);
-//		celoe.setHeuristic(heuristic);
-		celoe.setSearchTreeFile(testFolder + "searchTreeCELOE.txt");
-		celoe.setWriteSearchTree(true);
-		celoe.setTerminateOnNoiseReached(true);
-		celoe.setReplaceSearchTree(true);
-		celoe.init();
-		celoe.start();
-		System.out.println();
-		
-		DecimalFormat df = new DecimalFormat("#00.00");
-		System.out.println("Summary ISLE vs. CELOE");
-		System.out.println("======================");
-		System.out.println("accuracy:           " + df.format(100*isle.getCurrentlyBestAccuracy())+"%  vs.  " + df.format(100*celoe.getCurrentlyBestAccuracy())+"%");
-		System.out.println("expressions tested: " + isle.getClassExpressionTests() + "  vs.  " + celoe.getClassExpressionTests());
-		System.out.println("search tree nodes:  " + isle.getNodes().size() + "  vs.  " + celoe.getNodes().size());
-		System.out.println("runtime:            " + Helper.prettyPrintNanoSeconds(isle.getTotalRuntimeNs()) + "  vs.  " + Helper.prettyPrintNanoSeconds(celoe.getTotalRuntimeNs()));
-	
-		// only ISLE
-//		System.out.println("accuracy:           " + df.format(100*isle.getCurrentlyBestAccuracy())+"%");
-//		System.out.println("expressions tested: " + isle.getClassExpressionTests());
-//		System.out.println("search tree nodes:  " + isle.getNodes().size());
-//		System.out.println("runtime:            " + Helper.prettyPrintNanoSeconds(isle.getTotalRuntimeNs()));
-		
-	}	
-	
-	@Test
-	public void testWordSenseDisambiguation() throws Exception {
-		Set<OWLEntity> context = StructuralEntityContext.getContext(ontology, df.getOWLClass(IRI.create(cls.getName())));
-		System.out.println(context);
-		
-		Set<String> contextNL = StructuralEntityContext.getContextInNaturalLanguage(ontology, df.getOWLClass(IRI.create(cls.getName())));
-		System.out.println(contextNL);
-	}
-	
-	
-}

Copied: trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETestCorpus.java (from rev 4122, trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java)
===================================================================
--- trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETestCorpus.java	                        (rev 0)
+++ trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETestCorpus.java	2013-10-15 14:46:35 UTC (rev 4123)
@@ -0,0 +1,266 @@
+/**
+ * 
+ */
+package org.dllearner.algorithms.isle;
+
+import com.google.common.base.Charsets;
+import com.google.common.base.Joiner;
+import com.google.common.io.Files;
+import com.hp.hpl.jena.vocabulary.RDFS;
+
+import org.dllearner.algorithms.celoe.CELOE;
+import org.dllearner.algorithms.isle.index.*;
+import org.dllearner.algorithms.isle.index.semantic.SemanticIndex;
+import org.dllearner.algorithms.isle.index.semantic.simple.SimpleSemanticIndex;
+import org.dllearner.algorithms.isle.index.syntactic.OWLOntologyLuceneSyntacticIndexCreator;
+import org.dllearner.algorithms.isle.index.syntactic.SyntacticIndex;
+import org.dllearner.algorithms.isle.metrics.PMIRelevanceMetric;
+import org.dllearner.algorithms.isle.metrics.RelevanceMetric;
+import org.dllearner.algorithms.isle.metrics.RelevanceUtils;
+import org.dllearner.algorithms.isle.textretrieval.EntityTextRetriever;
+import org.dllearner.algorithms.isle.textretrieval.RDFSLabelEntityTextRetriever;
+import org.dllearner.algorithms.isle.wsd.SimpleWordSenseDisambiguation;
+import org.dllearner.algorithms.isle.wsd.WordSenseDisambiguation;
+import org.dllearner.core.AbstractReasonerComponent;
+import org.dllearner.core.KnowledgeSource;
+import org.dllearner.core.owl.Entity;
+import org.dllearner.core.owl.NamedClass;
+import org.dllearner.kb.OWLAPIOntology;
+import org.dllearner.learningproblems.ClassLearningProblem;
+import org.dllearner.reasoning.FastInstanceChecker;
+import org.dllearner.utilities.Helper;
+import org.junit.Before;
+import org.junit.Test;
+import org.semanticweb.owlapi.apibinding.OWLManager;
+import org.semanticweb.owlapi.model.IRI;
+import org.semanticweb.owlapi.model.OWLDataFactory;
+import org.semanticweb.owlapi.model.OWLEntity;
+import org.semanticweb.owlapi.model.OWLOntology;
+import org.semanticweb.owlapi.model.OWLOntologyManager;
+import org.semanticweb.owlapi.vocab.OWLRDFVocabulary;
+
+import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl;
+
+import java.io.File;
+import java.io.IOException;
+import java.text.DecimalFormat;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * Some tests for the ISLE algorithm.
+ * 
+ * @author Lorenz Buehmann
+ * @author Jens Lehmann
+ */
+public class ISLETestCorpus {
+	
+	private OWLOntologyManager manager;
+	private OWLOntology ontology;
+	private OWLDataFactory df = new OWLDataFactoryImpl();
+	private EntityTextRetriever textRetriever;
+	private RelevanceMetric relevance;
+	private String searchField = "label";
+	private SemanticIndex semanticIndex;
+	private SyntacticIndex syntacticIndex;
+	
+	// we assume that the ontology is named "ontology.owl" and that all text files
+	// are in a subdirectory called "corpus"
+	private String testFolder = "../test/isle/swore/";
+//	NamedClass cls = new NamedClass("http://example.com/father#father");
+	NamedClass cls = new NamedClass("http://ns.softwiki.de/req/CustomerRequirement");
+	
+	/**
+	 * 
+	 */
+	public ISLETestCorpus() throws Exception{
+		manager = OWLManager.createOWLOntologyManager();
+		ontology = manager.loadOntologyFromOntologyDocument(new File(testFolder + "ontology.owl"));
+		textRetriever = new RDFSLabelEntityTextRetriever(ontology);
+		syntacticIndex = new OWLOntologyLuceneSyntacticIndexCreator(ontology, df.getRDFSLabel(), searchField).buildIndex();
+		
+		
+	}
+	
+	private Set<TextDocument> createDocuments(){
+		Set<TextDocument> documents = new HashSet<TextDocument>();
+		File folder = new File(testFolder+"corpus/");
+		for (File file  : folder.listFiles()) {
+			if(!file.isDirectory() && !file.isHidden()){
+				try {
+					String text = Files.toString(file, Charsets.UTF_8);
+					documents.add(new TextDocument(text));
+				} catch (IOException e) {
+					e.printStackTrace();
+				}
+			}
+		}
+		return documents;
+	}
+	
+	
+
+	/**
+	 * @throws java.lang.Exception
+	 */
+	@Before
+	public void setUp() throws Exception{
+		
+	}
+
+//	@Test
+	public void testTextRetrieval() {
+		System.out.println("Text for entity " + cls + ":");
+		Map<String, Double> relevantText = textRetriever.getRelevantText(cls);
+		System.out.println(Joiner.on("\n").join(relevantText.entrySet()));
+	}
+	
+//	@Test
+	public void testEntityRelevance() throws Exception {
+		System.out.println("Relevant entities for entity " + cls + ":");
+		Map<Entity, Double> entityRelevance = RelevanceUtils.getRelevantEntities(cls, ontology, relevance);
+		System.out.println(Joiner.on("\n").join(entityRelevance.entrySet()));
+	}
+	
+	@Test
+	public void testSemanticIndexAnnotationProperty(){
+		semanticIndex = new SimpleSemanticIndex(ontology, syntacticIndex);
+		semanticIndex.buildIndex(df.getRDFSLabel(), null);		
+//		NamedClass nc = new NamedClass("http://example.com/father#father");
+		Set<AnnotatedDocument> documents = semanticIndex.getDocuments(cls);
+		System.out.println("Documents for " + cls + ":\n" + documents);
+	}
+	
+	@Test
+	public void testSemanticIndexCorpus(){
+		semanticIndex = new SimpleSemanticIndex(ontology, syntacticIndex);
+		semanticIndex.buildIndex(createDocuments());
+		Set<AnnotatedDocument> documents = semanticIndex.getDocuments(cls);
+		System.out.println(documents);
+	}
+	
+	@Test
+	public void testISLE() throws Exception {
+		KnowledgeSource ks = new OWLAPIOntology(ontology);
+		AbstractReasonerComponent reasoner = new FastInstanceChecker(ks);
+		reasoner.init();
+		
+		ClassLearningProblem lp = new ClassLearningProblem(reasoner);
+		lp.setClassToDescribe(cls);
+		lp.init();
+		
+		semanticIndex = new SimpleSemanticIndex(ontology, syntacticIndex);
+		semanticIndex.buildIndex(createDocuments());
+		
+		relevance = new PMIRelevanceMetric(semanticIndex);
+		
+		Map<Entity, Double> entityRelevance = RelevanceUtils.getRelevantEntities(cls, ontology, relevance);
+		NLPHeuristic heuristic = new NLPHeuristic(entityRelevance);
+		
+		ISLE isle = new ISLE(lp, reasoner);
+		isle.setHeuristic(heuristic);
+		isle.init();
+		
+		isle.start();
+	}
+	
+    @Test
+    public void testEntityLinkingWithLemmatizing() throws Exception {
+        EntityCandidatesTrie ect = new SimpleEntityCandidatesTrie(new RDFSLabelEntityTextRetriever(ontology), ontology,
+                new SimpleEntityCandidatesTrie.LemmatizingWordNetNameGenerator(5));
+        LinguisticAnnotator linguisticAnnotator = new TrieLinguisticAnnotator(ect);
+        WordSenseDisambiguation wsd = new SimpleWordSenseDisambiguation(ontology);
+        EntityCandidateGenerator ecg = new TrieEntityCandidateGenerator(ontology, ect);
+        SemanticAnnotator semanticAnnotator = new SemanticAnnotator(wsd, ecg, linguisticAnnotator);
+
+        Set<TextDocument> docs = createDocuments();
+        for (TextDocument doc : docs) {
+            AnnotatedDocument annotated = semanticAnnotator.processDocument(doc);
+            System.out.println(annotated);
+        }
+    }
+
+    @Test
+    public void testEntityLinkingWithSimpleStringMatching() throws Exception {
+        EntityCandidatesTrie ect = new SimpleEntityCandidatesTrie(new RDFSLabelEntityTextRetriever(ontology), ontology,
+                new SimpleEntityCandidatesTrie.DummyNameGenerator());
+        TrieLinguisticAnnotator linguisticAnnotator = new TrieLinguisticAnnotator(ect);
+        linguisticAnnotator.setNormalizeWords(false);
+        WordSenseDisambiguation wsd = new SimpleWordSenseDisambiguation(ontology);
+        EntityCandidateGenerator ecg = new TrieEntityCandidateGenerator(ontology, ect);
+        SemanticAnnotator semanticAnnotator = new SemanticAnnotator(wsd, ecg, linguisticAnnotator);
+
+        Set<TextDocument> docs = createDocuments();
+        for (TextDocument doc : docs) {
+            AnnotatedDocument annotated = semanticAnnotator.processDocument(doc);
+            System.out.println(annotated);
+        }
+    }
+
+	@Test
+	public void compareISLE() throws Exception {
+		KnowledgeSource ks = new OWLAPIOntology(ontology);
+		AbstractReasonerComponent reasoner = new FastInstanceChecker(ks);
+		reasoner.init();
+		
+		ClassLearningProblem lp = new ClassLearningProblem(reasoner);
+		lp.setClassToDescribe(cls);
+		lp.init();
+		
+		semanticIndex = new SimpleSemanticIndex(ontology, syntacticIndex, false);
+		semanticIndex.buildIndex(createDocuments());
+		
+		relevance = new PMIRelevanceMetric(semanticIndex);
+		
+		Map<Entity, Double> entityRelevance = RelevanceUtils.getRelevantEntities(cls, ontology, relevance);
+		NLPHeuristic heuristic = new NLPHeuristic(entityRelevance);
+		
+		// run ISLE
+		ISLE isle = new ISLE(lp, reasoner);
+		isle.setHeuristic(heuristic);
+		isle.setSearchTreeFile(testFolder + "searchTreeISLE.txt");
+		isle.setWriteSearchTree(true);
+//		isle.setReplaceSearchTree(true);
+		isle.setTerminateOnNoiseReached(true);
+		isle.init();
+		isle.start();
+		
+		// run standard CELOE as reference
+		CELOE celoe = new CELOE(lp, reasoner);
+//		celoe.setHeuristic(heuristic);
+		celoe.setSearchTreeFile(testFolder + "searchTreeCELOE.txt");
+		celoe.setWriteSearchTree(true);
+		celoe.setTerminateOnNoiseReached(true);
+		celoe.setReplaceSearchTree(true);
+		celoe.init();
+		celoe.start();
+		System.out.println();
+		
+		DecimalFormat df = new DecimalFormat("#00.00");
+		System.out.println("Summary ISLE vs. CELOE");
+		System.out.println("======================");
+		System.out.println("accuracy:           " + df.format(100*isle.getCurrentlyBestAccuracy())+"%  vs.  " + df.format(100*celoe.getCurrentlyBestAccuracy())+"%");
+		System.out.println("expressions tested: " + isle.getClassExpressionTests() + "  vs.  " + celoe.getClassExpressionTests());
+		System.out.println("search tree nodes:  " + isle.getNodes().size() + "  vs.  " + celoe.getNodes().size());
+		System.out.println("runtime:            " + Helper.prettyPrintNanoSeconds(isle.getTotalRuntimeNs()) + "  vs.  " + Helper.prettyPrintNanoSeconds(celoe.getTotalRuntimeNs()));
+	
+		// only ISLE
+//		System.out.println("accuracy:           " + df.format(100*isle.getCurrentlyBestAccuracy())+"%");
+//		System.out.println("expressions tested: " + isle.getClassExpressionTests());
+//		System.out.println("search tree nodes:  " + isle.getNodes().size());
+//		System.out.println("runtime:            " + Helper.prettyPrintNanoSeconds(isle.getTotalRuntimeNs()));
+		
+	}	
+	
+	@Test
+	public void testWordSenseDisambiguation() throws Exception {
+		Set<OWLEntity> context = StructuralEntityContext.getContext(ontology, df.getOWLClass(IRI.create(cls.getName())));
+		System.out.println(context);
+		
+		Set<String> contextNL = StructuralEntityContext.getContextInNaturalLanguage(ontology, df.getOWLClass(IRI.create(cls.getName())));
+		System.out.println(contextNL);
+	}
+	
+	
+}

Added: trunk/test/isle/swore/ontology_with_comments.owl
===================================================================
--- trunk/test/isle/swore/ontology_with_comments.owl	                        (rev 0)
+++ trunk/test/isle/swore/ontology_with_comments.owl	2013-10-15 14:46:35 UTC (rev 4123)
@@ -0,0 +1,2344 @@
+<?xml version="1.0"?>
+
+
+<!DOCTYPE rdf:RDF [
+    <!ENTITY req "http://ns.softwiki.de/req/" >
+    <!ENTITY foaf2 "http://xmlns.com/foaf/0.1/" >
+    <!ENTITY dcmitype "http://purl.org/dc/dcmitype/" >
+    <!ENTITY owl "http://www.w3.org/2002/07/owl#" >
+    <!ENTITY dc "http://purl.org/dc/elements/1.1/" >
+    <!ENTITY xsd "http://www.w3.org/2001/XMLSchema#" >
+    <!ENTITY owl2xml "http://www.w3.org/2006/12/owl2-xml#" >
+    <!ENTITY foaf "http://www.holygoat.co.uk/foaf.rdf#" >
+    <!ENTITY skos "http://www.w3.org/2004/02/skos/core#" >
+    <!ENTITY rdfs "http://www.w3.org/2000/01/rdf-schema#" >
+    <!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#" >
+    <!ENTITY tags "http://www.holygoat.co.uk/owl/redwood/0.1/tags/" >
+]>
+
+
+<rdf:RDF xmlns="http://ns.softwiki.de/req/"
+     xml:base="http://ns.softwiki.de/req/"
+     xmlns:tags="http://www.holygoat.co.uk/owl/redwood/0.1/tags/"
+     xmlns:dc="http://purl.org/dc/elements/1.1/"
+     xmlns:foaf2="http://xmlns.com/foaf/0.1/"
+     xmlns:foaf="http://www.holygoat.co.uk/foaf.rdf#"
+     xmlns:dcmitype="http://purl.org/dc/dcmitype/"
+     xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
+     xmlns:owl2xml="http://www.w3.org/2006/12/owl2-xml#"
+     xmlns:owl="http://www.w3.org/2002/07/owl#"
+     xmlns:xsd="http://www.w3.org/2001/XMLSchema#"
+     xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+     xmlns:req="http://ns.softwiki.de/req/"
+     xmlns:skos="http://www.w3.org/2004/02/skos/core#">
+    <owl:Ontology rdf:about="http://ns.softwiki.de/req/">
+        <rdfs:label rdf:datatype="&xsd;string">SoftWiki Ontology for Requirements Engineering</rdfs:label>
+        <rdfs:comment rdf:datatype="&xsd;string">A requirements engineering ontology for the SoftWiki project.</rdfs:comment>
+        <dc:contributor rdf:datatype="&xsd;string">Jens Lehmann</dc:contributor>
+        <dc:contributor rdf:datatype="&xsd;string">Sebastian Dietzold</dc:contributor>
+        <owl:versionInfo rdf:datatype="&xsd;string">version 1.00 - Thomas Riechert, Steffen Lohmann, Kim Lauenroth, Philipp Heim - starting the next generation of SWORE on 8th of July 2008 in Duisburg
+version 0.8 - Sebastian Dietzold - skos, tags and dc alignment (title now functional)
+version 0.7 - Sebastian Dietzold - labels completed and namespace correction
+version 0.6 - name space changed to ns.softwiki.de/req
+version 0.5 - refined by Thomas according to ESWC Poster submission 
+version 0.4 - refined by Jens on the way home from Essen
+version 0.3 - refined by Jens during discussion with Kim and Steffen on 13 March 2007 in Essen
+version 0.2 - refined by Thomas and Jens in the evening of 12 March 2007 in Essen
+version 0.1 - simple initial version by Thomas and Jens before meeting in Essen</owl:versionInfo>
+    </owl:Ontology>
+    
+
+
+    <!-- 
+    ///////////////////////////////////////////////////////////////////////////////////////
+    //
+    // Annotation properties
+    //
+    ///////////////////////////////////////////////////////////////////////////////////////
+     -->
+
+    <owl:AnnotationProperty rdf:about="&owl;versionInfo"/>
+    <owl:AnnotationProperty rdf:about="&dc;contributor"/>
+    <owl:AnnotationProperty rdf:about="&rdfs;label"/>
+    <owl:AnnotationProperty rdf:about="&rdfs;comment"/>
+    
+
+
+    <!-- 
+    ///////////////////////////////////////////////////////////////////////////////////////
+    //
+    // Datatypes
+    //
+    ///////////////////////////////////////////////////////////////////////////////////////
+     -->
+
+    
+
+
+    <!-- http://www.w3.org/2001/XMLSchema#dateTime -->
+
+    <rdf:Description rdf:about="&xsd;dateTime">
+        <rdfs:label rdf:datatype="&xsd;string">dateTime</rdfs:label>
+    </rdf:Description>
+    
+
+
+    <!-- http://www.w3.org/2001/XMLSchema#string -->
+
+    <rdf:Description rdf:about="&xsd;string">
+        <rdfs:label rdf:datatype="&xsd;string">string</rdfs:label>
+    </rdf:Description>
+    
+
+
+    <!-- 
+    ///////////////////////////////////////////////////////////////////////////////////////
+    //
+    // Object Properties
+    //
+    ///////////////////////////////////////////////////////////////////////////////////////
+     -->
+
+    
+
+
+    <!-- http://ns.softwiki.de/req/broader -->
+
+    <owl:ObjectProperty rdf:about="&req;broader"/>
+    
+
+
+    <!-- http://ns.softwiki.de/req/comments -->
+
+    <owl:ObjectProperty rdf:about="&req;comments">
+        <rdf:type rdf:resource="&owl;InverseFunctionalProperty"/>
+        <rdfs:domain rdf:resource="&req;AbstractComment"/>
+        <rdfs:range rdf:resource="&req;AbstractRequirement"/>
+    </owl:ObjectProperty>
+    
+
+
+    <!-- http://ns.softwiki.de/req/conflicts -->
+
+    <owl:ObjectProperty rdf:about="&req;conflicts">
+        <rdf:type rdf:resource="&owl;SymmetricProperty"/>
+        <owl:inverseOf rdf:resource="&req;conflicts"/>
+        <rdfs:subPropertyOf rdf:resource="&req;undirectedrelation"/>
+    </owl:ObjectProperty>
+    
+
+
+    <!-- http://ns.softwiki.de/req/cui -->
+
+    <owl:ObjectProperty rdf:about="&req;cui"/>
+    
+
+
+    <!-- http://ns.softwiki.de/req/defines -->
+
+    <owl:ObjectProperty rdf:about="&req;defines">
+        <rdfs:label rdf:datatype="&xsd;string">defines</rdfs:label>
+        <rdfs:domain rdf:resource="&req;Author"/>
+        <rdfs:range>
+            <owl:Class>
+                <owl:unionOf rdf:parseType="Collection">
+                    <rdf:Description rdf:about="&req;AbstractComment"/>
+                    <rdf:Description rdf:about="&req;AbstractRequirement"/>
+                    <rdf:Description rdf:about="&req;Keyword"/>
+                </owl:unionOf>
+            </owl:Class>
+        </rdfs:range>
+    </owl:ObjectProperty>
+    
+
+
+    <!-- http://ns.softwiki.de/req/definition -->
+
+    <owl:ObjectProperty rdf:about="&req;definition">
+        <rdfs:range rdf:resource="&xsd;string"/>
+    </owl:ObjectProperty>
+    
+
+
+    <!-- http://ns.softwiki.de/req/depentsOn -->
+
+    <owl:ObjectProperty rdf:about="&req;depentsOn">
+        <rdfs:range rdf:resource="&req;AbstractRequirement"/>
+        <rdfs:domain rdf:resource="&req;AbstractRequirement"/>
+        <owl:inverseOf rdf:resource="&req;entails"/>
+    </owl:ObjectProperty>
+    
+
+
+    <!-- http://ns.softwiki.de/req/details -->
+
+    <owl:ObjectProperty rdf:about="&req;details">
+        <rdfs:label rdf:datatype="&xsd;string">details</rdfs:label>
+        <rdfs:range rdf:resource="&req;AbstractRequirement"/>
+        <rdfs:domain rdf:resource="&req;AbstractRequirement"/>
+    </owl:ObjectProperty>
+    
+
+
+    <!-- http://ns.softwiki.de/req/documentation -->
+
+    <owl:ObjectProperty rdf:about="&req;documentation"/>
+    
+
+
+    <!-- http://ns.softwiki.de/req/entails -->
+
+    <owl:ObjectProperty rdf:about="&req;entails">
+        <rdfs:domain rdf:resource="&req;AbstractRequirement"/>
+        <rdfs:range rdf:resource="&req;AbstractRequirement"/>
+    </owl:ObjectProperty>
+    
+
+
+    <!-- http://ns.softwiki.de/req/invalidates -->
+
+    <owl:ObjectProperty rdf:about="&req;invalidates">
+        <rdfs:domain rdf:resource="&req;AbstractRequirement"/>
+        <rdfs:range rdf:resource="&req;AbstractRequirement"/>
+    </owl:ObjectProperty>
+    
+
+
+    <!-- http://ns.softwiki.de/req/isCommentedBy -->
+
+    <owl:ObjectProperty rdf:about="&req;isCommentedBy">
+        <rdfs:range rdf:resource="&req;AbstractComment"/>
+        <rdfs:domain rdf:resource="&req;AbstractRequirement"/>
+        <owl:inverseOf rdf:resource="&req;comments"/>
+    </owl:ObjectProperty>
+    
+
+
+    <!-- http://ns.softwiki.de/req/isCreatedBy -->
+
+    <owl:ObjectProperty rdf:about="&req;isCreatedBy">
+        <rdf:type rdf:resource="&owl;FunctionalProperty"/>
+        <rdfs:label>is created by</rdfs:label>
+        <rdfs:comment>specifies the persons who created the requirement</rdfs:comment>
+        <rdfs:domain rdf:resource="&req;AbstractRequirement"/>
+    </owl:ObjectProperty>
+    
+
+
+    <!-- http://ns.softwiki.de/req/isDefinedBy -->
+
+    <owl:ObjectProperty rdf:about="&req;isDefinedBy">
+        <rdfs:label rdf:datatype="&xsd;string">defined by</rdfs:label>
+        <rdfs:range rdf:resource="&req;Author"/>
+        <owl:inverseOf rdf:resource="&req;defines"/>
+        <rdfs:domain>
+            <owl:Class>
+                <owl:unionOf rdf:parseType="Collection">
+                    <rdf:Description rdf:about="&req;AbstractComment"/>
+                    <rdf:Description rdf:about="&req;AbstractRequirement"/>
+                    <rdf:Description rdf:about="&req;Keyword"/>
+                </owl:unionOf>
+            </owl:Class>
+        </rdfs:domain>
+    </owl:ObjectProperty>
+    
+
+
+    <!-- http://ns.softwiki.de/req/isDetailedBy -->
+
+    <owl:ObjectProperty rdf:about="&req;isDetailedBy">
+        <rdfs:label rdf:datatype="&xsd;string">detailed by</rdfs:label>
+        <rdfs:domain rdf:resource="&req;AbstractRequirement"/>
+        <rdfs:range rdf:resource="&req;AbstractRequirement"/>
+        <owl:inverseOf rdf:resource="&req;details"/>
+    </owl:ObjectProperty>
+    
+
+
+    <!-- http://ns.softwiki.de/req/isInvalidFor -->
+
+    <owl:ObjectProperty rdf:about="&req;isInvalidFor">
+        <rdf:type rdf:resource="&owl;InverseFunctionalProperty"/>
+        <rdfs:range rdf:resource="&req;AbstractRequirement"/>
+        <rdfs:domain rdf:resource="&req;AbstractRequirement"/>
+        <owl:inverseOf rdf:resource="&req;invalidates"/>
+    </owl:ObjectProperty>
+    
+
+
+    <!-- http://ns.softwiki.de/req/isLeadingTo -->
+
+    <owl:ObjectProperty rdf:about="&req;isLeadingTo">
+        <rdfs:label rdf:datatype="&xsd;string">lead to</rdfs:label>
+        <rdfs:domain rdf:resource="&req;AbstractRequirement"/>
+        <rdfs:range rdf:resource="&req;AbstractSource"/>
+    </owl:ObjectProperty>
+    
+
+
+    <!-- http://ns.softwiki.de/req/isRedundant -->
+
+    <owl:ObjectProperty rdf:about="&req;isRedundant">
+        <rdf:type rdf:resource="&owl;SymmetricProperty"/>
+        <owl:inverseOf rdf:resource="&req;isRedundant"/>
+        <rdfs:subPropertyOf rdf:resource="&req;undirectedrelation"/>
+    </owl:ObjectProperty>
+    
+
+
+    <!-- http://ns.softwiki.de/req/isRelated -->
+
+    <owl:ObjectProperty rdf:about="&req;isRelated">
+        <rdf:type rdf:resource="&owl;SymmetricProperty"/>
+        <owl:inverseOf rdf:resource="&req;isRelated"/>
+        <rdfs:subPropertyOf rdf:resource="&req;undirectedrelation"/>
+    </owl:ObjectProperty>
+    
+
+
+    <!-- http://ns.softwiki.de/req/isReleatedTo -->
+
+    <owl:ObjectProperty rdf:about="&req;isReleatedTo">
+        <rdfs:range rdf:resource="&req;Customer"/>
+        <rdfs:domain rdf:resource="&req;CustomerRequirement"/>
+    </owl:ObjectProperty>
+    
+
+
+    <!-- http://ns.softwiki.de/req/isSimilarTo -->
+
+    <owl:ObjectProperty rdf:about="&req;isSimilarTo">
+        <rdf:type rdf:resource="&owl;SymmetricProperty"/>
+        <rdfs:domain rdf:resource="&req;AbstractRequirement"/>
+        <rdfs:range rdf:resource="&req;AbstractRequirement"/>
+        <owl:inverseOf rdf:resource="&req;isSimilarTo"/>
+        <rdfs:subPropertyOf rdf:resource="&req;undirectedrelation"/>
+    </owl:ObjectProperty>
+    
+
+
+    <!-- http://ns.softwiki.de/req/leadsTo -->
+
+    <owl:ObjectProperty rdf:about="&req;leadsTo">
+        <rdfs:label rdf:datatype="&xsd;string">leads to</rdfs:label>
+        <rdfs:range rdf:resource="&req;AbstractRequirement"/>
+        <rdfs:domain rdf:resource="&req;AbstractSource"/>
+        <owl:inverseOf rdf:resource="&req;isLeadingTo"/>
+    </owl:ObjectProperty>
+    
+
+
+    <!-- http://ns.softwiki.de/req/rates -->
+
+    <owl:ObjectProperty rdf:about="&req;rates">
+        <rdfs:range rdf:resource="&req;AbstractRequirement"/>
+        <rdfs:domain rdf:resource="&req;Rating"/>
+    </owl:ObjectProperty>
+    
+
+
+    <!-- http://ns.softwiki.de/req/rational -->
+
+    <owl:ObjectProperty rdf:about="&req;rational">
+        <rdfs:label rdf:datatype="&xsd;string">rational</rdfs:label>
+        <rdfs:range rdf:resource="&xsd;string"/>
+    </owl:ObjectProperty>
+    
+
+
+    <!-- http://ns.softwiki.de/req/refersTo -->
+
+    <owl:ObjectProperty rdf:about="&req;refersTo">
+        <rdfs:label rdf:datatype="&xsd;string">refers to</rdfs:label>
+        <rdfs:comment xml:lang="de">Relevanter Aspekt eines geplantes Systems (ähnlich zu Tagging).</rdfs:comment>
+        <rdfs:range rdf:resource="&req;AbstractReferencePoint"/>
+        <rdfs:domain rdf:resource="&req;AbstractRequirement"/>
+        <owl:inverseOf rdf:resource="&req;relevantRequirements"/>
+    </owl:ObjectProperty>
+    
+
+
+    <!-- http://ns.softwiki.de/req/relevantRequirements -->
+
+    <owl:ObjectProperty rdf:about="&req;relevantRequirements">
+        <rdfs:label rdf:datatype="&xsd;string">relevant requirements</rdfs:label>
+        <rdfs:domain rdf:resource="&req;AbstractReferencePoint"/>
+        <rdfs:range rdf:resource="&req;AbstractRequirement"/>
+    </owl:ObjectProperty>
+    
+
+
+    <!-- http://ns.softwiki.de/req/result -->
+
+    <owl:ObjectProperty rdf:about="&req;result">
+        <rdfs:label rdf:datatype="&xsd;string">result</rdfs:label>
+        <rdfs:comment xml:lang="de">z.B. Veränderung von priority und agreement</rdfs:comment>
+        <rdfs:range rdf:resource="&xsd;string"/>
+    </owl:ObjectProperty>
+    
+
+
+    <!-- http://ns.softwiki.de/req/scenarioStep -->
+
+    <owl:ObjectProperty rdf:about="&req;scenarioStep">
+        <rdfs:label rdf:datatype="&xsd;string">scenario step</rdfs:label>
+        <rdfs:comment rdf:datatype="&xsd;string"></rdfs:comment>
+        <owl:versionInfo rdf:datatype="&xsd;string">TODO: es muss eine konkrete Reihenfolge der Steps gegeben sein (Listenstruktur)</owl:versionInfo>
+        <rdfs:range rdf:resource="&xsd;string"/>
+    </owl:ObjectProperty>
+    
+
+
+    <!-- http://ns.softwiki.de/req/specifies -->
+
+    <owl:ObjectProperty rdf:about="&req;specifies">
+        <rdfs:range rdf:resource="&req;AbstractRequirement"/>
+        <rdfs:domain rdf:resource="&req;Topic"/>
+    </owl:ObjectProperty>
+    
+
+
+    <!-- http://ns.softwiki.de/req/undirectedrelation -->
+
+    <owl:ObjectProperty rdf:about="&req;undirectedrelation">
+        <rdf:type rdf:resource="&owl;FunctionalProperty"/>
+        <rdf:type rdf:resource="&owl;InverseFunctionalProperty"/>
+        <rdf:type rdf:resource="&owl;SymmetricProperty"/>
+        <rdfs:comment rdf:datatype="&xsd;string">Rule: only one ration between the same pair of two requirements allowed.</rdfs:comment>
+        <owl:inverseOf rdf:resource="&req;undirectedrelation"/>
+    </owl:ObjectProperty>
+    
+
+
+    <!-- http://ns.softwiki.de/req/votes -->
+
+    <owl:ObjectProperty rdf:about="&req;votes">
+        <rdfs:range rdf:resource="&req;AbstractRequirement"/>
+        <rdfs:domain rdf:resource="&req;Stakeholder"/>
+    </owl:ObjectProperty>
+    
+
+
+    <!-- http://ns.softwiki.de/req/willLeadTo -->
+
+    <owl:ObjectProperty rdf:about="&req;willLeadTo">
+        <rdfs:domain rdf:resource="&req;Requirement"/>
+        <rdfs:range rdf:resource="&req;SystemRequirement"/>
+    </owl:ObjectProperty>
+    
+
+
+    <!-- http://purl.org/dc/elements/1.1/description -->
+
+    <owl:ObjectProperty rdf:about="&dc;description">
+        <rdfs:label rdf:datatype="&xsd;string">description</rdfs:label>
+        <rdfs:label xml:lang="de">Beschreibung</rdfs:label>
+        <rdfs:range rdf:resource="&xsd;string"/>
+    </owl:ObjectProperty>
+    
+
+
+    <!-- http://www.holygoat.co.uk/owl/redwood/0.1/tags/taggedWithTag -->
+
+    <owl:ObjectProperty rdf:about="&tags;taggedWithTag">
+        <rdfs:label xml:lang="de">Tags</rdfs:label>
+    </owl:ObjectProperty>
+    
+
+
+    <!-- 
+    ///////////////////////////////////////////////////////////////////////////////////////
+    //
+    // Data properties
+    //
+    ///////////////////////////////////////////////////////////////////////////////////////
+     -->
+
+    
+
+
+    <!-- http://ns.softwiki.de/req/averagePriorityRate -->
+
+    <owl:DatatypeProperty rdf:about="&req;averagePriorityRate">
+        <rdfs:subPropertyOf rdf:resource="&req;averageRate"/>
+    </owl:DatatypeProperty>
+    
+
+
+    <!-- http://ns.softwiki.de/req/averageQualityRate -->
+
+    <owl:DatatypeProperty rdf:about="&req;averageQualityRate">
+        <rdfs:subPropertyOf rdf:resource="&req;averageRate"/>
+    </owl:DatatypeProperty>
+    
+
+
+    <!-- http://ns.softwiki.de/req/averageRate -->
+
+    <owl:DatatypeProperty rdf:about="&req;averageRate">
+        <rdfs:comment rdf:datatype="&xsd;string">Is calculated by given rates.</rdfs:comment>
+        <rdfs:domain rdf:resource="&req;AbstractRequirement"/>
+        <rdfs:range rdf:resource="&xsd;float"/>
+    </owl:DatatypeProperty>
+    
+
+
+    <!-- http://ns.softwiki.de/req/changeDate -->
+
+    <owl:DatatypeProperty rdf:about="&req;changeDate">
+        <rdfs:domain rdf:resource="&req;AbstractRequirement"/>
+        <rdfs:range rdf:resource="&xsd;dateTime"/>
+    </owl:DatatypeProperty>
+    
+
+
+    <!-- http://ns.softwiki.de/req/creationDate -->
+
+    <owl:DatatypeProperty rdf:about="&req;creationDate">
+        <rdfs:domain rdf:resource="&req;AbstractRequirement"/>
+        <rdfs:range rdf:resource="&xsd;dateTime"/>
+    </owl:DatatypeProperty>
+    
+
+
+    <!-- http://ns.softwiki.de/req/definition -->
+
+    <owl:DatatypeProperty rdf:about="&req;definition">
+        <rdfs:domain rdf:resource="&req;DefinedKeyword"/>
+    </owl:DatatypeProperty>
+    
+
+
+    <!-- http://ns.softwiki.de/req/rate -->
+
+    <owl:DatatypeProperty rdf:about="&req;rate">
+        <rdfs:domain rdf:resource="&req;Rating"/>
+        <rdfs:range rdf:resource="&xsd;float"/>
+    </owl:DatatypeProperty>
+    
+
+
+    <!-- http://ns.softwiki.de/req/rational -->
+
+    <owl:DatatypeProperty rdf:about="&req;rational">
+        <rdf:type rdf:resource="&owl;FunctionalProperty"/>
+        <rdfs:label rdf:datatype="&xsd;string">rational</rdfs:label>
+    </owl:DatatypeProperty>
+    
+
+
+    <!-- http://ns.softwiki.de/req/result -->
+
+    <owl:DatatypeProperty rdf:about="&req;result">
+        <rdf:type rdf:resource="&owl;FunctionalProperty"/>
+        <rdfs:label rdf:datatype="&xsd;string">result</rdfs:label>
+        <rdfs:comment xml:lang="de">z.B. Veränderung von priority und agreement</rdfs:comment>
+    </owl:DatatypeProperty>
+    
+
+
+    <!-- http://ns.softwiki.de/req/role -->
+
+    <owl:DatatypeProperty rdf:about="&req;role">
+        <rdfs:domain rdf:resource="&req;Author"/>
+    </owl:DatatypeProperty>
+    
+
+
+    <!-- http://ns.softwiki.de/req/scenarioStep -->
+
+    <owl:DatatypeProperty rdf:about="&req;scenarioStep">
+        <rdfs:label rdf:datatype="&xsd;string">scenario step</rdfs:label>
+        <rdfs:comment rdf:datatype="&xsd;string"></rdfs:comment>
+        <owl:versionInfo rdf:datatype="&xsd;string">TODO: es muss eine konkrete Reihenfolge der Steps gegeben sein (Listenstruktur)</owl:versionInfo>
+        <rdfs:domain rdf:resource="&req;TextualScenario"/>
+    </owl:DatatypeProperty>
+    
+
+
+    <!-- http://ns.softwiki.de/req/state -->
+
+    <owl:DatatypeProperty rdf:about="&req;state">
+        <rdfs:domain rdf:resource="&req;AbstractRequirement"/>
+        <rdfs:range>
+            <rdfs:Datatype>
+                <owl:oneOf>
+                    <rdf:Description>
+                        <rdf:type rdf:resource="&rdf;List"/>
+                        <rdf:first rdf:datatype="&xsd;string">isNegativDecided</rdf:first>
+                        <rdf:rest>
+                            <rdf:Description>
+                                <rdf:type rdf:resource="&rdf;List"/>
+                                <rdf:first rdf:datatype="&xsd;string">isPositvDecided</rdf:first>
+                                <rdf:rest rdf:resource="&rdf;nil"/>
+                            </rdf:Description>
+                        </rdf:rest>
+                    </rdf:Description>
+                </owl:oneOf>
+            </rdfs:Datatype>
+        </rdfs:range>
+    </owl:DatatypeProperty>
+    
+
+
+    <!-- http://ns.softwiki.de/req/voteTime -->
+
+    <owl:DatatypeProperty rdf:about="&req;voteTime">
+        <rdfs:label rdf:datatype="&xsd;string">vote time</rdfs:label>
+        <rdfs:domain rdf:resource="&req;Vote"/>
+        <rdfs:range rdf:resource="&xsd;dateTime"/>
+    </owl:DatatypeProperty>
+    
+
+
+    <!-- http://purl.org/dc/elements/1.1/description -->
+
+    <owl:DatatypeProperty rdf:about="&dc;description">
+        <rdf:type rdf:resource="&owl;FunctionalProperty"/>
+        <rdfs:label rdf:datatype="&xsd;string">description</rdfs:label>
+        <rdfs:label xml:lang="de">Beschreibung</rdfs:label>
+        <rdfs:domain>
+            <owl:Class>
+                <owl:unionOf rdf:parseType="Collection">
+                    <rdf:Description rdf:about="&req;Goal"/>
+                    <rdf:Description rdf:about="&req;Requirement"/>
+                </owl:unionOf>
+            </owl:Class>
+        </rdfs:domain>
+    </owl:DatatypeProperty>
+    
+
+
+    <!-- http://purl.org/dc/elements/1.1/title -->
+
+    <owl:DatatypeProperty rdf:about="&dc;title"/>
+    
+
+
+    <!-- 
+    ///////////////////////////////////////////////////////////////////////////////////////
+    //
+    // Classes
+    //
+    ///////////////////////////////////////////////////////////////////////////////////////
+     -->
+
+    
+
+
+    <!-- http://ns.softwiki.de/req/AbstractComment -->
+
+    <owl:Class rdf:about="&req;AbstractComment">
+        <rdfs:label>abstract comment</rdfs:label>
+    </owl:Class>
+    
+
+
+    <!-- http://ns.softwiki.de/req/AbstractReferencePoint -->
+
+    <owl:Class rdf:about="&req;AbstractReferencePoint">
+        <rdfs:label rdf:datatype="&xsd;string">reference point</rdfs:label>
+        <owl:disjointWith rdf:resource="&req;AbstractRequirement"/>
+        <owl:disjointWith rdf:resource="&req;AbstractSource"/>
+        <owl:disjointWith rdf:resource="&req;Author"/>
+        <owl:disjointWith rdf:resource="&req;Vote"/>
+    </owl:Class>
+    
+
+
+    <!-- http://ns.softwiki.de/req/AbstractRequirement -->
+
+    <owl:Class rdf:about="&req;AbstractRequirement">
+        <rdfs:label rdf:datatype="&xsd;string">abstract requirement</rdfs:label>
+        <rdfs:label xml:lang="de">abstraktes Requirement</rdfs:label>
+        <owl:disjointWith rdf:resource="&req;AbstractSource"/>
+        <owl:disjointWith rdf:resource="&req;Author"/>
+        <owl:disjointWith rdf:resource="&req;Vote"/>
+        <rdfs:comment rdf:datatype="&xsd;string">Es ist ungünstig, dass Requirement Subklasse von AbstractRequirement ist.</rdfs:comment>
+    </owl:Class>
+    
+
+
+    <!-- http://ns.softwiki.de/req/AbstractSource -->
+
+    <owl:Class rdf:about="&req;AbstractSource">
+        <rdfs:label rdf:datatype="&xsd;string">abstract source</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&owl;Thing"/>
+        <owl:disjointWith rdf:resource="&req;Vote"/>
+        <rdfs:comment rdf:datatype="&xsd;string"></rdfs:comment>
+    </owl:Class>
+    
+
+
+    <!-- http://ns.softwiki.de/req/AllocatedRequirement -->
+
+    <owl:Class rdf:about="&req;AllocatedRequirement">
+        <rdfs:label>allocated requirement</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&req;Requirement"/>
+    </owl:Class>
+    
+
+
+    <!-- http://ns.softwiki.de/req/ApplicationPointer -->
+
+    <owl:Class rdf:about="&req;ApplicationPointer">
+        <rdfs:label>application pointer</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&req;AbstractReferencePoint"/>
+    </owl:Class>
+    
+
+
+    <!-- http://ns.softwiki.de/req/ApplicationState -->
+
+    <owl:Class rdf:about="&req;ApplicationState">
+        <rdfs:label>application state</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&req;AbstractReferencePoint"/>
+    </owl:Class>
+    
+
+
+    <!-- http://ns.softwiki.de/req/Author -->
+
+    <owl:Class rdf:about="&req;Author">
+        <rdfs:label xml:lang="de">Autor</rdfs:label>
+        <rdfs:label xml:lang="en">author</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&req;Stakeholder"/>
+        <owl:disjointWith rdf:resource="&req;Vote"/>
+    </owl:Class>
+    
+
+
+    <!-- http://ns.softwiki.de/req/Comment -->
+
+    <owl:Class rdf:about="&req;Comment">
+        <rdfs:label>comment</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&req;AbstractComment"/>
+    </owl:Class>
+    
+
+
+    <!-- http://ns.softwiki.de/req/Creditor -->
+
+    <owl:Class rdf:about="&req;Creditor">
+        <rdfs:label>creditor</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&req;Stakeholder"/>
+    </owl:Class>
+    
+
+
+    <!-- http://ns.softwiki.de/req/Customer -->
+
+    <owl:Class rdf:about="&req;Customer">
+        <rdfs:label>customer</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&req;Stakeholder"/>
+        <owl:disjointWith rdf:resource="&req;Programmer"/>
+    </owl:Class>
+    
+
+
+    <!-- http://ns.softwiki.de/req/CustomerRequirement -->
+
+    <owl:Class rdf:about="&req;CustomerRequirement">
+        <rdfs:label>customer requirement</rdfs:label>
+        <owl:equivalentClass>
+            <owl:Class>
+                <owl:intersectionOf rdf:parseType="Collection">
+                    <rdf:Description rdf:about="&req;Requirement"/>
+                    <owl:Restriction>
+                        <owl:onProperty rdf:resource="&req;isCreatedBy"/>
+                        <owl:someValuesFrom rdf:resource="&req;Customer"/>
+                    </owl:Restriction>
+                </owl:intersectionOf>
+            </owl:Class>
+        </owl:equivalentClass>
+        <rdfs:subClassOf rdf:resource="&req;Requirement"/>
+        <rdfs:comment>A customer requirement is a requirement, which was created by a customer.</rdfs:comment>
+    </owl:Class>
+    
+
+
+    <!-- http://ns.softwiki.de/req/DefinedKeyword -->
+
+    <owl:Class rdf:about="&req;DefinedKeyword">
+        <rdfs:label>defined keyword</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&req;Keyword"/>
+    </owl:Class>
+    
+
+
+    <!-- http://ns.softwiki.de/req/DerivedRequirement -->
+
+    <owl:Class rdf:about="&req;DerivedRequirement">
+        <rdfs:label>derived requirement</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&req;Requirement"/>
+    </owl:Class>
+    
+
+
+    <!-- http://ns.softwiki.de/req/DesignRequirement -->
+
+    <owl:Class rdf:about="&req;DesignRequirement">
+        <rdfs:label>design requirement</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&req;Requirement"/>
+    </owl:Class>
+    
+
+
+    <!-- http://ns.softwiki.de/req/Document -->
+
+    <owl:Class rdf:about="&req;Document">
+        <rdfs:label>document</rdfs:label>
+        <owl:equivalentClass>
+            <owl:Class>
+                <owl:intersectionOf rdf:parseType="Collection">
+                    <rdf:Description rdf:about="&req;AbstractSource"/>
+                    <owl:Restriction>
+                        <owl:onProperty rdf:resource="&req;leadsTo"/>
+                        <owl:someValuesFrom rdf:resource="&req;AbstractRequirement"/>
+                    </owl:Restriction>
+                </owl:intersectionOf>
+            </owl:Class>
+        </owl:equivalentClass>
+        <rdfs:subClassOf rdf:resource="&req;AbstractSource"/>
+    </owl:Class>
+    
+
+
+    <!-- http://ns.softwiki.de/req/FunctionalRequirement -->
+
+    <owl:Class rdf:about="&req;FunctionalRequirement">
+        <rdfs:label rdf:datatype="&xsd;string">functional requirement</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&req;Requirement"/>
+        <owl:disjointWith rdf:resource="&req;QualityRequirement"/>
+        <rdfs:comment rdf:datatype="&xsd;string">refers to functional reference point, for instance components of the system</rdfs:comment>
+    </owl:Class>
+    
+
+
+    <!-- http://ns.softwiki.de/req/Goal -->
+
+    <owl:Class rdf:about="&req;Goal">
+        <rdfs:label rdf:datatype="&xsd;string">goal</rdfs:label>
+        <rdfs:label xml:lang="de">Ziel</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&req;AbstractRequirement"/>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&dc;description"/>
+                <owl:cardinality rdf:datatype="&xsd;nonNegativeInteger">1</owl:cardinality>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+        <owl:disjointWith rdf:resource="&req;Requirement"/>
+        <owl:disjointWith rdf:resource="&req;Scenario"/>
+    </owl:Class>
+    
+
+
+    <!-- http://ns.softwiki.de/req/Government -->
+
+    <owl:Class rdf:about="&req;Government">
+        <rdfs:label>government</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&req;Stakeholder"/>
+    </owl:Class>
+    
+
+
+    <!-- http://ns.softwiki.de/req/Keyword -->
+
+    <owl:Class rdf:about="&req;Keyword">
+        <rdfs:label>keyword</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&req;AbstractReferencePoint"/>
+    </owl:Class>
+    
+
+
+    <!-- http://ns.softwiki.de/req/PerformanceRequirement -->
+
+    <owl:Class rdf:about="&req;PerformanceRequirement">
+        <rdfs:label>performance requirement</rdfs:label>
+        <owl:equivalentClass>
+            <owl:Class>
+                <owl:intersectionOf rdf:parseType="Collection">
+                    <rdf:Description rdf:about="&req;Requirement"/>
+                    <owl:Restriction>
+                        <owl:onProperty rdf:resource="&req;willLeadTo"/>
+                        <owl:someValuesFrom rdf:resource="&req;SystemRequirement"/>
+                    </owl:Restriction>
+                </owl:intersectionOf>
+            </owl:Class>
+        </owl:equivalentClass>
+        <rdfs:subClassOf rdf:resource="&req;Requirement"/>
+        <owl:disjointWith rdf:resource="&req;SystemRequirement"/>
+    </owl:Class>
+    
+
+
+    <!-- http://ns.softwiki.de/req/PriorityRating -->
+
+    <owl:Class rdf:about="&req;PriorityRating">
+        <rdfs:label>priority rating</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&req;Rating"/>
+        <rdfs:comment rdf:datatype="&xsd;string">Rule: Every Author only defines at most one rating about the priority for each requirement.</rdfs:comment>
+    </owl:Class>
+    
+
+
+    <!-- http://ns.softwiki.de/req/Programmer -->
+
+    <owl:Class rdf:about="&req;Programmer">
+        <rdfs:label>programmer</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&req;Stakeholder"/>
+    </owl:Class>
+    
+
+
+    <!-- http://ns.softwiki.de/req/QualityRating -->
+
+    <owl:Class rdf:about="&req;QualityRating">
+        <rdfs:label>quality rating</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&req;Rating"/>
+        <rdfs:comment rdf:datatype="&xsd;string">Rule: Every Author only defines at most one rating about the quality for each requirement.</rdfs:comment>
+    </owl:Class>
+    
+
+
+    <!-- http://ns.softwiki.de/req/QualityRequirement -->
+
+    <owl:Class rdf:about="&req;QualityRequirement">
+        <rdfs:label rdf:datatype="&xsd;string">quality requirement</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&req;Requirement"/>
+        <rdfs:comment rdf:datatype="&xsd;string">refers to quality reference point, e.g. reliability, performance, usability</rdfs:comment>
+    </owl:Class>
+    
+
+
+    <!-- http://ns.softwiki.de/req/Rating -->
+
+    <owl:Class rdf:about="&req;Rating">
+        <rdfs:label>rating</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&req;AbstractComment"/>
+    </owl:Class>
+    
+
+
+    <!-- http://ns.softwiki.de/req/Requirement -->
+
+    <owl:Class rdf:about="&req;Requirement">
+        <rdfs:label rdf:datatype="&xsd;string">requirement</rdfs:label>
+        <rdfs:label xml:lang="de">Anforderung(en)</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&req;AbstractRequirement"/>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&dc;description"/>
+                <owl:cardinality rdf:datatype="&xsd;nonNegativeInteger">1</owl:cardinality>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+        <owl:disjointWith rdf:resource="&req;Scenario"/>
+        <rdfs:comment rdf:datatype="&xsd;string"></rdfs:comment>
+        <owl:versionInfo rdf:datatype="&xsd;string">TODO: semantische Verfeinerung geplant, d.h. Anforderungen nicht nur als Textstring, sondern z.B. als RDF-Triple formulieren</owl:versionInfo>
+    </owl:Class>
+    
+
+
+    <!-- http://ns.softwiki.de/req/Scenario -->
+
+    <owl:Class rdf:about="&req;Scenario">
+        <rdfs:label rdf:datatype="&xsd;string">scenario</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&req;AbstractRequirement"/>
+    </owl:Class>
+    
+
+
+    <!-- http://ns.softwiki.de/req/SeniorManagementStaff -->
+
+    <owl:Class rdf:about="&req;SeniorManagementStaff">
+        <rdfs:label>senior management staff</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&req;Stakeholder"/>
+    </owl:Class>
+    
+
+
+    <!-- http://ns.softwiki.de/req/Stakeholder -->
+
+    <owl:Class rdf:about="&req;Stakeholder">
+        <rdfs:label>stakeholder</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&req;AbstractSource"/>
+    </owl:Class>
+    
+
+
+    <!-- http://ns.softwiki.de/req/SystemRequirement -->
+
+    <owl:Class rdf:about="&req;SystemRequirement">
+        <rdfs:label>system requirement</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&owl;Thing"/>
+    </owl:Class>
+    
+
+
+    <!-- http://ns.softwiki.de/req/TextualScenario -->
+
+    <owl:Class rdf:about="&req;TextualScenario">
+        <rdfs:label rdf:datatype="&xsd;string">textual scenario</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&req;Scenario"/>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&req;scenarioStep"/>
+                <owl:minCardinality rdf:datatype="&xsd;nonNegativeInteger">1</owl:minCardinality>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+        <rdfs:comment xml:lang="de">Szenario, welches aus mehreren textuell beschriebenen Szenarioschritten besteht.</rdfs:comment>
+    </owl:Class>
+    
+
+
+    <!-- http://ns.softwiki.de/req/Topic -->
+
+    <owl:Class rdf:about="&req;Topic">
+        <rdfs:label>topic</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&req;DefinedKeyword"/>
+        <rdfs:comment rdf:datatype="&xsd;string">Rule: Every Requirement refers to exact one topic.</rdfs:comment>
+    </owl:Class>
+    
+
+
+    <!-- http://ns.softwiki.de/req/TradeUnion -->
+
+    <owl:Class rdf:about="&req;TradeUnion">
+        <rdfs:label>trade union</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&req;Stakeholder"/>
+    </owl:Class>
+    
+
+
+    <!-- http://ns.softwiki.de/req/Vote -->
+
+    <owl:Class rdf:about="&req;Vote">
+        <rdfs:label rdf:datatype="&xsd;string">vote</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&req;AbstractComment"/>
+        <rdfs:comment rdf:datatype="&xsd;string">Rule: Every Author only votes at most one requirement.</rdfs:comment>
+    </owl:Class>
+    
+
+
+    <!-- http://purl.org/dc/dcmitype/Image -->
+
+    <owl:Class rdf:about="&dcmitype;Image">
+        <rdfs:label>image</rdfs:label>
+    </owl:Class>
+    
+
+
+    <!-- http://www.w3.org/2000/01/rdf-schema#Resource -->
+
+    <owl:Class rdf:about="&rdfs;Resource">
+        <rdfs:label>resource</rdfs:label>
+    </owl:Class>
+    
+
+
+    <!-- http://www.w3.org/2001/XMLSchema#string -->
+
+    <owl:Class rdf:about="&xsd;string">
+        <rdfs:label rdf:datatype="&xsd;string">string</rdfs:label>
+    </owl:Class>
+    
+
+
+    <!-- http://www.w3.org/2002/07/owl#Datatype -->
+
+    <owl:Class rdf:about="&owl;Datatype"/>
+    
+
+
+    <!-- http://www.w3.org/2002/07/owl#Thing -->
+
+    <owl:Class rdf:about="&owl;Thing"/>
+    
+
+
+    <!-- http://www.w3.org/2004/02/skos/core#Concept -->
+
+    <owl:Class rdf:about="&skos;Concept">
+        <rdfs:label>concept</rdfs:label>
+        <rdfs:label xml:lang="de">Thema</rdfs:label>
+    </owl:Class>
+    
+
+
+    <!-- http://xmlns.com/foaf/0.1/Document -->
+
+    <owl:Class rdf:about="&foaf2;Document">
+        <rdfs:label>document</rdfs:label>
+    </owl:Class>
+    
+
+
+    <!-- 
+    ///////////////////////////////////////////////////////////////////////////////////////
+    //
+    // Individuals
+    //
+    ///////////////////////////////////////////////////////////////////////////////////////
+     -->
+
+    
+
+
+    <!-- http://ns.softwiki.de/req/1 -->
+
+    <owl:Thing rdf:about="&req;1">
+        <rdf:type rdf:resource="&req;QualityRating"/>
+        <rdf:type rdf:resource="&owl;NamedIndividual"/>
...
 
[truncated message content]

[DL-Learner SVN] SF.net SVN: dl-learner:[4122] trunk/components-core/src/main/java/org/ dllearner/algorithms/isle/wsd

From: <lor...@us...> - 2013-10-15 12:06:49

Revision: 4122
          http://sourceforge.net/p/dl-learner/code/4122
Author:   lorenz_b
Date:     2013-10-15 12:06:45 +0000 (Tue, 15 Oct 2013)
Log Message:
-----------
Added context extractor based on token sentence. Almost finished WSD.

Modified Paths:
--------------
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/ContextExtractor.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/StructureBasedWordSenseDisambiguation.java

Added Paths:
-----------
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/SentenceBasedContextExtractor.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/WindowBasedContextExtractor.java

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/ContextExtractor.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/ContextExtractor.java	2013-10-11 21:29:34 UTC (rev 4121)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/ContextExtractor.java	2013-10-15 12:06:45 UTC (rev 4122)
@@ -3,13 +3,16 @@
  */
 package org.dllearner.algorithms.isle.wsd;
 
-import java.util.Set;
+import java.util.List;
 
+import org.dllearner.algorithms.isle.index.Annotation;
+
+
 /**
  * @author Lorenz Buehmann
  *
  */
 public interface ContextExtractor {
 
-	Set<String> extractContext(String token, String document);
+	List<String> extractContext(Annotation annotation);
 }

Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/SentenceBasedContextExtractor.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/SentenceBasedContextExtractor.java	                        (rev 0)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/SentenceBasedContextExtractor.java	2013-10-15 12:06:45 UTC (rev 4122)
@@ -0,0 +1,89 @@
+/**
+ * 
+ */
+package org.dllearner.algorithms.isle.wsd;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Properties;
+
+import org.dllearner.algorithms.isle.index.TextDocument;
+
+import edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation;
+import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation;
+import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation;
+import edu.stanford.nlp.ling.CoreLabel;
+import edu.stanford.nlp.pipeline.Annotation;
+import edu.stanford.nlp.pipeline.StanfordCoreNLP;
+import edu.stanford.nlp.util.CoreMap;
+
+/**
+ * @author Lorenz Buehmann
+ *
+ */
+public class SentenceBasedContextExtractor implements ContextExtractor{
+	
+	private StanfordCoreNLP pipeline;
+
+	public SentenceBasedContextExtractor() {
+		Properties props = new Properties();
+		props.put("annotators", "tokenize, ssplit");
+		pipeline = new StanfordCoreNLP(props);
+	}
+
+	/* (non-Javadoc)
+	 * @see org.dllearner.algorithms.isle.wsd.ContextExtractor#extractContext(java.lang.String, java.lang.String)
+	 */
+	@Override
+	public List<String> extractContext(org.dllearner.algorithms.isle.index.Annotation annotation) {
+		//split text into sentences
+		List<CoreMap> sentences = getSentences(annotation.getReferencedDocument().getRawContent());
+		
+		//find the sentence containing the token of the annotation
+		int tokenStart = annotation.getOffset();
+		int index = 0;
+		for (CoreMap sentence : sentences) {
+			String s = sentence.toString();
+			if (index < tokenStart && s.length() > tokenStart) {
+				List<String> context = new ArrayList<String>();
+				for (CoreLabel label : sentence.get(TokensAnnotation.class)) {
+					// this is the text of the token
+					String word = label.get(TextAnnotation.class);
+					
+					if(!word.isEmpty() && !word.matches("\\p{Punct}")){
+						context.add(word);
+					}
+				}
+				return context;
+			}
+			index += s.length();
+		}
+		throw new RuntimeException("Token " + annotation.getToken() + " not found in text " + annotation.getReferencedDocument().getRawContent());
+	}
+	
+	private List<CoreMap> getSentences(String document) {
+		// create an empty Annotation just with the given text
+		Annotation annotation = new Annotation(document);
+
+		// run all Annotators on this text
+		pipeline.annotate(annotation);
+
+		// these are all the sentences in this document
+		// a CoreMap is essentially a Map that uses class objects as keys and
+		// has values with custom types
+		List<CoreMap> sentences = annotation.get(SentencesAnnotation.class);
+
+		return sentences;
+	}
+	
+	public static void main(String[] args) throws Exception {
+		String s = "International Business Machines Corporation, or IBM, is an American multinational services technology and consulting corporation, with headquarters in Armonk, New York, United States. IBM manufactures and markets computer hardware and software,"
+				+ " and offers infrastructure, hosting and consulting services in areas ranging from mainframe computers to nanotechnology.";
+	
+		String token = "services";
+		SentenceBasedContextExtractor extractor = new SentenceBasedContextExtractor();
+		List<String> context = extractor.extractContext(new org.dllearner.algorithms.isle.index.Annotation(new TextDocument(s), s.indexOf(token), token.length()));
+		System.out.println(context);
+	}
+
+}

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/StructureBasedWordSenseDisambiguation.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/StructureBasedWordSenseDisambiguation.java	2013-10-11 21:29:34 UTC (rev 4121)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/StructureBasedWordSenseDisambiguation.java	2013-10-15 12:06:45 UTC (rev 4122)
@@ -3,15 +3,22 @@
  */
 package org.dllearner.algorithms.isle.wsd;
 
+import java.io.IOException;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
 import java.util.Set;
 
 import org.dllearner.algorithms.isle.StructuralEntityContext;
+import org.dllearner.algorithms.isle.VSMCosineDocumentSimilarity;
 import org.dllearner.algorithms.isle.index.Annotation;
 import org.dllearner.algorithms.isle.index.SemanticAnnotation;
 import org.dllearner.core.owl.Entity;
-import org.semanticweb.owlapi.model.OWLEntity;
 import org.semanticweb.owlapi.model.OWLOntology;
 
+import com.google.common.base.Joiner;
+import com.google.common.collect.Sets;
+
 /**
  * @author Lorenz Buehmann
  *
@@ -33,13 +40,52 @@
 	 */
 	@Override
 	public SemanticAnnotation disambiguate(Annotation annotation, Set<Entity> candidateEntities) {
-		//get the context of the annotated token
-		Set<String> tokenContext = contextExtractor.extractContext(annotation.getToken(), annotation.getReferencedDocument().getContent());
-		//compare this context with the context of each entity candidate
-		for (Entity entity : candidateEntities) {
-			Set<String> entityContext = StructuralEntityContext.getContextInNaturalLanguage(ontology, entity);
+		if(!candidateEntities.isEmpty()){
+			//get the context of the annotated token
+			List<String> tokenContext = contextExtractor.extractContext(annotation);
 			
+			//compare this context with the context of each entity candidate
+			double maxScore = Double.MIN_VALUE;
+			Entity bestEntity = null;
+			for (Entity entity : candidateEntities) {
+				//get the context of the entity by analyzing the structure of the ontology
+				Set<String> entityContext = StructuralEntityContext.getContextInNaturalLanguage(ontology, entity);
+				//compute the VSM Cosine Similarity
+				double score = computeScore(tokenContext, entityContext);
+				//set best entity
+				if(score > maxScore){
+					maxScore = score;
+					bestEntity = entity;
+				}
+			}
+			
+			return new SemanticAnnotation(annotation, bestEntity);
 		}
 		return null;
 	}
+	
+	/**
+	 * Compute the overlap between 2 set of words
+	 * @param words1
+	 * @param words2
+	 * @return
+	 */
+	private double computeScoreSimple(Collection<String> words1, Collection<String> words2){
+		return Sets.intersection(new HashSet<String>(words1), new HashSet<String>(words2)).size();
+	}
+	
+	/**
+	 * Compute the Cosine Similarity using as VSM.
+	 * @param words1
+	 * @param words2
+	 */
+	private double computeScore(Collection<String> words1, Collection<String> words2){
+		double score = 0d;
+		try {
+			score = VSMCosineDocumentSimilarity.getCosineSimilarity(Joiner.on(" ").join(words1), Joiner.on(" ").join(words2));
+		} catch (IOException e) {
+			e.printStackTrace();
+		}
+		return score;
+	}
 }

Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/WindowBasedContextExtractor.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/WindowBasedContextExtractor.java	                        (rev 0)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/WindowBasedContextExtractor.java	2013-10-15 12:06:45 UTC (rev 4122)
@@ -0,0 +1,95 @@
+/**
+ * 
+ */
+package org.dllearner.algorithms.isle.wsd;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Properties;
+
+import org.dllearner.algorithms.isle.index.TextDocument;
+
+import edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation;
+import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation;
+import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation;
+import edu.stanford.nlp.ling.CoreLabel;
+import edu.stanford.nlp.pipeline.Annotation;
+import edu.stanford.nlp.pipeline.StanfordCoreNLP;
+import edu.stanford.nlp.util.CoreMap;
+
+/**
+ * @author Lorenz Buehmann
+ *
+ */
+public class WindowBasedContextExtractor implements ContextExtractor{
+	
+	private StanfordCoreNLP pipeline;
+
+	/**
+	 * 
+	 */
+	public WindowBasedContextExtractor() {
+	
+		Properties props = new Properties();
+		props.put("annotators", "tokenize, ssplit");
+		pipeline = new StanfordCoreNLP(props);
+
+		
+	}
+
+	/* (non-Javadoc)
+	 * @see org.dllearner.algorithms.isle.wsd.ContextExtractor#extractContext(java.lang.String, java.lang.String)
+	 */
+	@Override
+	public List<String> extractContext(org.dllearner.algorithms.isle.index.Annotation annotation) {
+		// split text into sentences
+		List<CoreMap> sentences = getSentences(annotation.getReferencedDocument().getRawContent());
+
+		// find the sentence containing the token of the annotation
+		int tokenStart = annotation.getOffset();
+		int index = 0;
+		for (CoreMap sentence : sentences) {
+			String s = sentence.toString();
+			if (index < tokenStart && s.length() > tokenStart) {
+				List<String> context = new ArrayList<String>();
+				for (CoreLabel label : sentence.get(TokensAnnotation.class)) {
+					// this is the text of the token
+					String word = label.get(TextAnnotation.class);
+
+					context.add(word);
+				}
+				return context;
+			}
+			index += s.length();
+		}
+		throw new RuntimeException("Token " + annotation.getToken() + " not found in text "
+				+ annotation.getReferencedDocument().getRawContent());
+
+	}
+	
+	private List<CoreMap> getSentences(String document) {
+		// create an empty Annotation just with the given text
+		Annotation annotation = new Annotation(document);
+
+		// run all Annotators on this text
+		pipeline.annotate(annotation);
+
+		// these are all the sentences in this document
+		// a CoreMap is essentially a Map that uses class objects as keys and
+		// has values with custom types
+		List<CoreMap> sentences = annotation.get(SentencesAnnotation.class);
+
+		return sentences;
+	}
+	
+	public static void main(String[] args) throws Exception {
+		String s = "International Business Machines Corporation, or IBM, is an American multinational services technology and consulting corporation, with headquarters in Armonk, New York, United States. IBM manufactures and markets computer hardware and software,"
+				+ " and offers infrastructure, hosting and consulting services in areas ranging from mainframe computers to nanotechnology.";
+	
+		String token = "services";
+		WindowBasedContextExtractor extractor = new WindowBasedContextExtractor();
+		List<String> context = extractor.extractContext(new org.dllearner.algorithms.isle.index.Annotation(new TextDocument(s), s.indexOf(token), token.length()));
+		System.out.println(context);
+	}
+
+}

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[4121] trunk

From: <lor...@us...> - 2013-10-11 21:29:38

Revision: 4121
          http://sourceforge.net/p/dl-learner/code/4121
Author:   lorenz_b
Date:     2013-10-11 21:29:34 +0000 (Fri, 11 Oct 2013)
Log Message:
-----------
Updated OWLAPI deps.

Modified Paths:
--------------
    trunk/components-core/pom.xml
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/StructureBasedWordSenseDisambiguation.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/NoiseSensitiveLGG.java
    trunk/components-core/src/main/java/org/dllearner/kb/SparqlEndpointKS.java
    trunk/pom.xml
    trunk/scripts/pom.xml

Added Paths:
-----------
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/ContextExtractor.java

Modified: trunk/components-core/pom.xml
===================================================================
--- trunk/components-core/pom.xml	2013-10-07 09:15:20 UTC (rev 4120)
+++ trunk/components-core/pom.xml	2013-10-11 21:29:34 UTC (rev 4121)
@@ -102,7 +102,6 @@
 		<dependency>
 			<groupId>net.sourceforge.owlapi</groupId>
 			<artifactId>owlapi-distribution</artifactId>
-			<version>3.4.4</version>
 		</dependency>
 
 		<dependency>
@@ -291,11 +290,6 @@
 			<groupId>edu.stanford.nlp</groupId>
 			<artifactId>stanford-corenlp</artifactId>
 			<version>1.3.4</version>
-		</dependency>
-		<dependency>
-			<groupId>edu.stanford.nlp</groupId>
-			<artifactId>stanford-corenlp</artifactId>
-			<version>1.3.4</version>
 			<classifier>models</classifier>
 		</dependency>
 		<dependency>

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java	2013-10-07 09:15:20 UTC (rev 4120)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java	2013-10-11 21:29:34 UTC (rev 4121)
@@ -14,7 +14,7 @@
 import org.dllearner.core.owl.Entity;
 import org.semanticweb.owlapi.model.OWLOntology;
 
-import edu.stanford.nlp.util.Sets;
+import com.google.common.collect.Sets;
 
 /**
  * Generates candidates using a entity candidates prefix trie

Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/ContextExtractor.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/ContextExtractor.java	                        (rev 0)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/ContextExtractor.java	2013-10-11 21:29:34 UTC (rev 4121)
@@ -0,0 +1,15 @@
+/**
+ * 
+ */
+package org.dllearner.algorithms.isle.wsd;
+
+import java.util.Set;
+
+/**
+ * @author Lorenz Buehmann
+ *
+ */
+public interface ContextExtractor {
+
+	Set<String> extractContext(String token, String document);
+}

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/StructureBasedWordSenseDisambiguation.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/StructureBasedWordSenseDisambiguation.java	2013-10-07 09:15:20 UTC (rev 4120)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/StructureBasedWordSenseDisambiguation.java	2013-10-11 21:29:34 UTC (rev 4121)
@@ -18,11 +18,14 @@
  */
 public class StructureBasedWordSenseDisambiguation extends WordSenseDisambiguation{
 
+	private ContextExtractor contextExtractor;
+
 	/**
 	 * @param ontology
 	 */
-	public StructureBasedWordSenseDisambiguation(OWLOntology ontology) {
+	public StructureBasedWordSenseDisambiguation(ContextExtractor contextExtractor, OWLOntology ontology) {
 		super(ontology);
+		this.contextExtractor = contextExtractor;
 	}
 
 	/* (non-Javadoc)
@@ -30,12 +33,13 @@
 	 */
 	@Override
 	public SemanticAnnotation disambiguate(Annotation annotation, Set<Entity> candidateEntities) {
-		//TODO we should find the sentence in which the annotated token is contained in
-		String content = annotation.getReferencedDocument().getContent();
+		//get the context of the annotated token
+		Set<String> tokenContext = contextExtractor.extractContext(annotation.getToken(), annotation.getReferencedDocument().getContent());
+		//compare this context with the context of each entity candidate
 		for (Entity entity : candidateEntities) {
 			Set<String> entityContext = StructuralEntityContext.getContextInNaturalLanguage(ontology, entity);
+			
 		}
 		return null;
 	}
-
 }

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/NoiseSensitiveLGG.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/NoiseSensitiveLGG.java	2013-10-07 09:15:20 UTC (rev 4120)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/NoiseSensitiveLGG.java	2013-10-11 21:29:34 UTC (rev 4121)
@@ -16,8 +16,6 @@
 import com.jamonapi.Monitor;
 import com.jamonapi.MonitorFactory;
 
-import edu.stanford.nlp.util.Sets;
-
 public class NoiseSensitiveLGG<N> {
 	
 	

Modified: trunk/components-core/src/main/java/org/dllearner/kb/SparqlEndpointKS.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/kb/SparqlEndpointKS.java	2013-10-07 09:15:20 UTC (rev 4120)
+++ trunk/components-core/src/main/java/org/dllearner/kb/SparqlEndpointKS.java	2013-10-11 21:29:34 UTC (rev 4121)
@@ -20,10 +20,16 @@
 package org.dllearner.kb;
 
 import java.net.URL;
+import java.sql.SQLException;
 import java.util.LinkedList;
 import java.util.List;
+import java.util.concurrent.TimeUnit;
 
+import org.aksw.jena_sparql_api.cache.core.QueryExecutionFactoryCacheEx;
+import org.aksw.jena_sparql_api.cache.extra.CacheCoreEx;
+import org.aksw.jena_sparql_api.cache.extra.CacheCoreH2;
 import org.aksw.jena_sparql_api.cache.extra.CacheEx;
+import org.aksw.jena_sparql_api.cache.extra.CacheExImpl;
 import org.dllearner.core.ComponentAnn;
 import org.dllearner.core.ComponentInitException;
 import org.dllearner.core.KnowledgeSource;
@@ -66,7 +72,7 @@
 	}
 	
 	public SparqlEndpointKS(SparqlEndpoint endpoint) {
-		this(endpoint, null);
+		this(endpoint, (String)null);
 	}
 	
 	public SparqlEndpointKS(SparqlEndpoint endpoint, CacheEx cache) {
@@ -74,6 +80,21 @@
 		this.cache = cache;
 	}
 	
+	public SparqlEndpointKS(SparqlEndpoint endpoint, String cacheDirectory) {
+		this.endpoint = endpoint;
+		if(cacheDirectory != null){
+			try {
+				long timeToLive = TimeUnit.DAYS.toMillis(30);
+				CacheCoreEx cacheBackend = CacheCoreH2.create(cacheDirectory, timeToLive, true);
+				cache = new CacheExImpl(cacheBackend);
+			} catch (ClassNotFoundException e) {
+				e.printStackTrace();
+			} catch (SQLException e) {
+				e.printStackTrace();
+			}
+		}
+	}
+	
 	public CacheEx getCache() {
 		return cache;
 	}

Modified: trunk/pom.xml
===================================================================
--- trunk/pom.xml	2013-10-07 09:15:20 UTC (rev 4120)
+++ trunk/pom.xml	2013-10-11 21:29:34 UTC (rev 4121)
@@ -123,8 +123,7 @@
 			<dependency>
 			<groupId>net.sourceforge.owlapi</groupId>
 			<artifactId>owlapi-distribution</artifactId>
-			<version>3.4.4</version>
-			<type>pom</type>
+			<version>3.4.5</version>
 		</dependency>
 		<dependency>
 			<groupId>net.sourceforge.owlapi</groupId>

Modified: trunk/scripts/pom.xml
===================================================================
--- trunk/scripts/pom.xml	2013-10-07 09:15:20 UTC (rev 4120)
+++ trunk/scripts/pom.xml	2013-10-11 21:29:34 UTC (rev 4121)
@@ -116,7 +116,6 @@
 		<dependency>
 			<groupId>net.sourceforge.owlapi</groupId>
 			<artifactId>owlapi-distribution</artifactId>
-			<type>pom</type>
 		</dependency>
 		<dependency>
 			<groupId>net.sourceforge.owlapi</groupId>

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[4120] trunk/components-core/src/main/java/org/ dllearner/algorithms/isle/index

From: <dfl...@us...> - 2013-10-07 09:15:23

Revision: 4120
          http://sourceforge.net/p/dl-learner/code/4120
Author:   dfleischhacker
Date:     2013-10-07 09:15:20 +0000 (Mon, 07 Oct 2013)
Log Message:
-----------
Fix bug leading to out of bounds exception

Modified Paths:
--------------
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieLinguisticAnnotator.java

Added Paths:
-----------
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/StanfordLemmatizer.java

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java	2013-10-07 07:38:17 UTC (rev 4119)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java	2013-10-07 09:15:20 UTC (rev 4120)
@@ -137,7 +137,7 @@
                 else {
                     res.append(" ");
                 }
-                res.append(lemmatizeSingleWord(word));
+                res.append(lemmatizeSingleWord(w));
             }
             catch (Exception e) {
                throw new RuntimeException(e);

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java	2013-10-07 07:38:17 UTC (rev 4119)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java	2013-10-07 09:15:20 UTC (rev 4120)
@@ -149,7 +149,8 @@
 
 	@Override
 	public Set<Entity> getCandidateEntities(String s) {
-		return trie.get(s);
+        Set<Entity> res = trie.get(s);
+		return res == null ? new HashSet<Entity>() : trie.get(s);
 	}
 
 	@Override
@@ -263,4 +264,34 @@
             return res;
         }
     }
+
+    /**
+     * Pair of the actual word and the word after processing.
+     */
+    public static class ActualModifiedWordPair {
+        private String actualString;
+        private String modifiedString;
+
+        public String getActualString() {
+            return actualString;
+        }
+
+        public void setActualString(String actualString) {
+            this.actualString = actualString;
+        }
+
+        public String getModifiedString() {
+            return modifiedString;
+        }
+
+        public void setModifiedString(String modifiedString) {
+            this.modifiedString = modifiedString;
+        }
+
+        public ActualModifiedWordPair(String actualString, String modifiedString) {
+
+            this.actualString = actualString;
+            this.modifiedString = modifiedString;
+        }
+    }
 }

Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/StanfordLemmatizer.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/StanfordLemmatizer.java	                        (rev 0)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/StanfordLemmatizer.java	2013-10-07 09:15:20 UTC (rev 4120)
@@ -0,0 +1,54 @@
+package org.dllearner.algorithms.isle.index;
+
+import edu.stanford.nlp.ling.CoreAnnotations;
+import edu.stanford.nlp.ling.CoreLabel;
+import edu.stanford.nlp.pipeline.StanfordCoreNLP;
+import edu.stanford.nlp.util.CoreMap;
+
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Properties;
+
+/**
+ *
+ */
+class StanfordLemmatizer {
+
+    protected StanfordCoreNLP pipeline;
+
+    public StanfordLemmatizer() {
+        // Create StanfordCoreNLP object properties, with POS tagging
+        // (required for lemmatization), and lemmatization
+        Properties props;
+        props = new Properties();
+        props.put("annotators", "tokenize, ssplit, pos, lemma");
+
+        // StanfordCoreNLP loads a lot of models, so you probably
+        // only want to do this once per execution
+        this.pipeline = new StanfordCoreNLP(props);
+    }
+
+    public String lemmatize(String documentText)
+    {
+        List<String> lemmas = new LinkedList<String>();
+
+        // create an empty Annotation just with the given text
+        edu.stanford.nlp.pipeline.Annotation document = new edu.stanford.nlp.pipeline.Annotation(documentText);
+
+        // run all Annotators on this text
+        this.pipeline.annotate(document);
+
+        // Iterate over all of the sentences found
+        List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
+        for(CoreMap sentence: sentences) {
+            // Iterate over all tokens in a sentence
+            for (CoreLabel token: sentence.get(CoreAnnotations.TokensAnnotation.class)) {
+                // Retrieve and add the lemma for each word into the
+                // list of lemmas
+                lemmas.add(token.get(CoreAnnotations.LemmaAnnotation.class));
+            }
+        }
+
+        return lemmas.get(0);
+    }
+}

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieLinguisticAnnotator.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieLinguisticAnnotator.java	2013-10-07 07:38:17 UTC (rev 4119)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieLinguisticAnnotator.java	2013-10-07 09:15:20 UTC (rev 4120)
@@ -37,6 +37,8 @@
             }
             String match = candidatesTrie.getLongestMatch(unparsed);
             if (match != null && !match.isEmpty()) {
+
+                //TODO: here we are losing the original offset and index...
                 Annotation annotation = new Annotation(document, i, match.length());
                 annotations.add(annotation);
                 i += match.length() - 1;

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[4119] trunk

From: <lor...@us...> - 2013-10-07 07:38:22

Revision: 4119
          http://sourceforge.net/p/dl-learner/code/4119
Author:   lorenz_b
Date:     2013-10-07 07:38:17 +0000 (Mon, 07 Oct 2013)
Log Message:
-----------
Added methods to SPARQL reasoner for testing if a property has some defined characterstics, e.g. functionality.

Modified Paths:
--------------
    trunk/components-core/src/main/java/org/dllearner/core/AbstractAxiomLearningAlgorithm.java
    trunk/components-core/src/main/java/org/dllearner/kb/SparqlEndpointKS.java
    trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java
    trunk/scripts/src/main/java/org/dllearner/scripts/pattern/OWLAxiomPatternUsageEvaluation.java

Modified: trunk/components-core/src/main/java/org/dllearner/core/AbstractAxiomLearningAlgorithm.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/core/AbstractAxiomLearningAlgorithm.java	2013-10-05 09:10:00 UTC (rev 4118)
+++ trunk/components-core/src/main/java/org/dllearner/core/AbstractAxiomLearningAlgorithm.java	2013-10-07 07:38:17 UTC (rev 4119)
@@ -32,6 +32,10 @@
 import java.util.SortedSet;
 import java.util.TreeSet;
 
+import org.aksw.jena_sparql_api.cache.core.QueryExecutionFactoryCacheEx;
+import org.aksw.jena_sparql_api.core.QueryExecutionFactory;
+import org.aksw.jena_sparql_api.http.QueryExecutionFactoryHttp;
+import org.aksw.jena_sparql_api.model.QueryExecutionFactoryModel;
 import org.dllearner.core.config.BooleanEditor;
 import org.dllearner.core.config.ConfigOption;
 import org.dllearner.core.config.IntegerEditor;
@@ -60,14 +64,10 @@
 import com.hp.hpl.jena.query.ParameterizedSparqlString;
 import com.hp.hpl.jena.query.Query;
 import com.hp.hpl.jena.query.QueryExecution;
-import com.hp.hpl.jena.query.QueryExecutionFactory;
-import com.hp.hpl.jena.query.QueryFactory;
 import com.hp.hpl.jena.query.ResultSet;
-import com.hp.hpl.jena.query.Syntax;
 import com.hp.hpl.jena.rdf.model.Model;
 import com.hp.hpl.jena.rdf.model.ModelFactory;
 import com.hp.hpl.jena.rdf.model.RDFNode;
-import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP;
 import com.hp.hpl.jena.sparql.engine.http.QueryExceptionHTTP;
 import com.hp.hpl.jena.sparql.expr.E_Regex;
 import com.hp.hpl.jena.sparql.expr.E_Str;
@@ -98,6 +98,7 @@
 	
 	protected SparqlEndpointKS ks;
 	protected SPARQLReasoner reasoner;
+	private QueryExecutionFactory qef;
 	
 	protected List<EvaluatedAxiom> currentlyBestAxioms;
 	protected SortedSet<Axiom> existingAxioms;
@@ -191,6 +192,17 @@
 
 	@Override
 	public void init() throws ComponentInitException {
+		if(ks.isRemote()){
+			SparqlEndpoint endpoint = ks.getEndpoint();
+			qef = new QueryExecutionFactoryHttp(endpoint.getURL().toString(), endpoint.getDefaultGraphURIs());
+			if(ks.getCache() != null){
+				qef = new QueryExecutionFactoryCacheEx(qef, ks.getCache());
+			}
+//			qef = new QueryExecutionFactoryPaginated(qef, 10000);
+			
+		} else {
+			qef = new QueryExecutionFactoryModel(((LocalModelBasedSparqlEndpointKS)ks).getModel());
+		}
 		ks.init();
 		if(reasoner == null){
 			reasoner = new SPARQLReasoner((SparqlEndpointKS) ks);
@@ -279,93 +291,61 @@
 	
 	protected Model executeConstructQuery(String query) {
 		logger.trace("Sending query\n{} ...", query);
-		if(ks.isRemote()){
-			SparqlEndpoint endpoint = ((SparqlEndpointKS) ks).getEndpoint();
-			QueryEngineHTTP queryExecution = new QueryEngineHTTP(endpoint.getURL().toString(),
-					query);
-			queryExecution.setTimeout(getRemainingRuntimeInMilliSeconds());
-			queryExecution.setDefaultGraphURIs(endpoint.getDefaultGraphURIs());
-			queryExecution.setNamedGraphURIs(endpoint.getNamedGraphURIs());
-			try {
-				Model model = queryExecution.execConstruct();
-				fetchedRows += model.size();
-				timeout = false;
-				if(model.size() == 0){
-					fullDataLoaded = true;
-				}
-				logger.debug("Got " + model.size() + " triples.");
-				return model;
-			} catch (QueryExceptionHTTP e) {
-				if(e.getCause() instanceof SocketTimeoutException){
-					logger.warn("Got timeout");
-				} else {
-					logger.error("Exception executing query", e);
-				}
-				return ModelFactory.createDefaultModel();
-			}
-		} else {
-			QueryExecution queryExecution = QueryExecutionFactory.create(query, ((LocalModelBasedSparqlEndpointKS)ks).getModel());
-			Model model = queryExecution.execConstruct();
+		QueryExecution qe = qef.createQueryExecution(query);
+		try {
+			Model model = qe.execConstruct();
 			fetchedRows += model.size();
+			timeout = false;
 			if(model.size() == 0){
 				fullDataLoaded = true;
 			}
+			logger.debug("Got " + model.size() + " triples.");
 			return model;
+		} catch (QueryExceptionHTTP e) {
+			if(e.getCause() instanceof SocketTimeoutException){
+				logger.warn("Got timeout");
+			} else {
+				logger.error("Exception executing query", e);
+			}
+			return ModelFactory.createDefaultModel();
 		}
 	}
 	
 	protected ResultSet executeSelectQuery(String query) {
 		logger.trace("Sending query\n{} ...", query);
-		if(ks.isRemote()){
-			SparqlEndpoint endpoint = ((SparqlEndpointKS) ks).getEndpoint();
-			QueryEngineHTTP queryExecution = new QueryEngineHTTP(endpoint.getURL().toString(),
-					query);
-			queryExecution.setTimeout(getRemainingRuntimeInMilliSeconds());
-			queryExecution.setDefaultGraphURIs(endpoint.getDefaultGraphURIs());
-			queryExecution.setNamedGraphURIs(endpoint.getNamedGraphURIs());
-			try {
-				ResultSet rs = queryExecution.execSelect();
-				timeout = false;
-				return rs;
-			} catch (QueryExceptionHTTP e) {
-				if(e.getCause() instanceof SocketTimeoutException){
-					if(timeout){
-						logger.warn("Got timeout");
-						throw e;
-					} else {
-						logger.trace("Got local timeout");
-					}
-					
+		
+		QueryExecution qe = qef.createQueryExecution(query);
+		try {
+			ResultSet rs = qe.execSelect();
+			timeout = false;
+			return rs;
+		} catch (QueryExceptionHTTP e) {
+			if(e.getCause() instanceof SocketTimeoutException){
+				if(timeout){
+					logger.warn("Got timeout");
+					throw e;
 				} else {
-					logger.error("Exception executing query", e);
+					logger.trace("Got local timeout");
 				}
-				return new ResultSetMem();
+				
+			} else {
+				logger.error("Exception executing query", e);
 			}
-		} else {
-			return executeSelectQuery(query, ((LocalModelBasedSparqlEndpointKS)ks).getModel());
+			return new ResultSetMem();
 		}
 	}
 	
 	protected ResultSet executeSelectQuery(String query, Model model) {
 		logger.trace("Sending query on local model\n{} ...", query);
-		QueryExecution qexec = QueryExecutionFactory.create(QueryFactory.create(query, Syntax.syntaxARQ), model);
-		ResultSet rs = qexec.execSelect();;
-
+		QueryExecutionFactory qef = new QueryExecutionFactoryModel(model);
+		QueryExecution qexec = qef.createQueryExecution(query);
+		ResultSet rs = qexec.execSelect();
 		return rs;
 	}
 	
 	protected boolean executeAskQuery(String query){
 		logger.trace("Sending query\n{} ...", query);
-		if(ks.isRemote()){
-			SparqlEndpoint endpoint = ((SparqlEndpointKS) ks).getEndpoint();
-			QueryEngineHTTP queryExecution = new QueryEngineHTTP(endpoint.getURL().toString(), query);
-			queryExecution.setDefaultGraphURIs(endpoint.getDefaultGraphURIs());
-			queryExecution.setNamedGraphURIs(endpoint.getNamedGraphURIs());
-			return queryExecution.execAsk();
-		} else {
-			QueryExecution queryExecution = QueryExecutionFactory.create(query, ((LocalModelBasedSparqlEndpointKS)ks).getModel());
-			return queryExecution.execAsk();
-		}
+		return qef.createQueryExecution(query).execAsk();
 	}
 	
 	protected <K, V extends Comparable<V>> List<Entry<K, V>> sortByValues(Map<K, V> map){

Modified: trunk/components-core/src/main/java/org/dllearner/kb/SparqlEndpointKS.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/kb/SparqlEndpointKS.java	2013-10-05 09:10:00 UTC (rev 4118)
+++ trunk/components-core/src/main/java/org/dllearner/kb/SparqlEndpointKS.java	2013-10-07 07:38:17 UTC (rev 4119)
@@ -23,12 +23,12 @@
 import java.util.LinkedList;
 import java.util.List;
 
+import org.aksw.jena_sparql_api.cache.extra.CacheEx;
 import org.dllearner.core.ComponentAnn;
 import org.dllearner.core.ComponentInitException;
 import org.dllearner.core.KnowledgeSource;
 import org.dllearner.core.config.ConfigOption;
 import org.dllearner.core.config.ListStringEditor;
-import org.dllearner.kb.sparql.ExtractionDBCache;
 import org.dllearner.kb.sparql.SPARQLTasks;
 import org.dllearner.kb.sparql.SparqlEndpoint;
 import org.springframework.beans.propertyeditors.URLEditor;
@@ -45,7 +45,7 @@
 public class SparqlEndpointKS implements KnowledgeSource {
 
 	private SparqlEndpoint endpoint;
-	private ExtractionDBCache cache;
+	private CacheEx cache;
 	private boolean supportsSPARQL_1_1 = false;
 	private boolean isRemote = true;
 	private boolean initialized = false;
@@ -69,15 +69,22 @@
 		this(endpoint, null);
 	}
 	
-	public SparqlEndpointKS(SparqlEndpoint endpoint, ExtractionDBCache cache) {
+	public SparqlEndpointKS(SparqlEndpoint endpoint, CacheEx cache) {
 		this.endpoint = endpoint;
 		this.cache = cache;
 	}
 	
-	public ExtractionDBCache getCache() {
+	public CacheEx getCache() {
 		return cache;
 	}
 	
+	/**
+	 * @param cache the cache to set
+	 */
+	public void setCache(CacheEx cache) {
+		this.cache = cache;
+	}
+	
 	@Override
 	public void init() throws ComponentInitException {
 		if(!initialized){

Modified: trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java	2013-10-05 09:10:00 UTC (rev 4118)
+++ trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java	2013-10-07 07:38:17 UTC (rev 4119)
@@ -387,6 +387,26 @@
 		}
 		return hierarchy;
 	}
+	
+	public boolean isFunctional(ObjectProperty property){
+		String query = "ASK {<" + property + "> a " + OWL.FunctionalProperty.getURI() + "}";
+		return qef.createQueryExecution(query).execAsk();
+	}
+	
+	public boolean isInverseFunctional(ObjectProperty property){
+		String query = "ASK {<" + property + "> a " + OWL.InverseFunctionalProperty.getURI() + "}";
+		return qef.createQueryExecution(query).execAsk();
+	}
+	
+	public boolean isAsymmetric(ObjectProperty property){
+		String query = "ASK {<" + property + "> a " + OWL2.AsymmetricProperty.getURI() + "}";
+		return qef.createQueryExecution(query).execAsk();
+	}
+	
+	public boolean isIrreflexive(ObjectProperty property){
+		String query = "ASK {<" + property + "> a " + OWL2.IrreflexiveProperty.getURI() + "}";
+		return qef.createQueryExecution(query).execAsk();
+	}
 
 	public final ClassHierarchy prepareSubsumptionHierarchyFast() {
 		logger.info("Preparing subsumption hierarchy ...");

Modified: trunk/scripts/src/main/java/org/dllearner/scripts/pattern/OWLAxiomPatternUsageEvaluation.java
===================================================================
--- trunk/scripts/src/main/java/org/dllearner/scripts/pattern/OWLAxiomPatternUsageEvaluation.java	2013-10-05 09:10:00 UTC (rev 4118)
+++ trunk/scripts/src/main/java/org/dllearner/scripts/pattern/OWLAxiomPatternUsageEvaluation.java	2013-10-07 07:38:17 UTC (rev 4119)
@@ -9,7 +9,6 @@
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.StringWriter;
-import java.io.UnsupportedEncodingException;
 import java.net.MalformedURLException;
 import java.net.SocketTimeoutException;
 import java.net.URI;
@@ -41,6 +40,14 @@
 import joptsimple.OptionParser;
 import joptsimple.OptionSet;
 
+import org.aksw.jena_sparql_api.cache.core.QueryExecutionFactoryCacheEx;
+import org.aksw.jena_sparql_api.cache.extra.CacheCoreEx;
+import org.aksw.jena_sparql_api.cache.extra.CacheCoreH2;
+import org.aksw.jena_sparql_api.cache.extra.CacheEx;
+import org.aksw.jena_sparql_api.cache.extra.CacheExImpl;
+import org.aksw.jena_sparql_api.core.QueryExecutionFactory;
+import org.aksw.jena_sparql_api.http.QueryExecutionFactoryHttp;
+import org.aksw.jena_sparql_api.model.QueryExecutionFactoryModel;
 import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
 import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
 import org.apache.log4j.Logger;
@@ -51,10 +58,7 @@
 import org.dllearner.core.owl.NamedClass;
 import org.dllearner.kb.LocalModelBasedSparqlEndpointKS;
 import org.dllearner.kb.SparqlEndpointKS;
-import org.dllearner.kb.sparql.ExtractionDBCache;
-import org.dllearner.kb.sparql.QueryEngineHTTP;
 import org.dllearner.kb.sparql.SparqlEndpoint;
-import org.dllearner.kb.sparql.SparqlQuery;
 import org.dllearner.learningproblems.AxiomScore;
 import org.dllearner.learningproblems.Heuristics;
 import org.dllearner.reasoning.SPARQLReasoner;
@@ -109,7 +113,6 @@
 import com.hp.hpl.jena.query.ParameterizedSparqlString;
 import com.hp.hpl.jena.query.Query;
 import com.hp.hpl.jena.query.QueryExecution;
-import com.hp.hpl.jena.query.QueryExecutionFactory;
 import com.hp.hpl.jena.query.QueryFactory;
 import com.hp.hpl.jena.query.QuerySolution;
 import com.hp.hpl.jena.query.ResultSet;
@@ -132,10 +135,12 @@
 	private OWLObjectRenderer axiomRenderer = new ManchesterOWLSyntaxOWLObjectRendererImpl();
 	private OWLDataFactory df = new OWLDataFactoryImpl();
 	
-	private ExtractionDBCache cache = new ExtractionDBCache("pattern-cache/db");
+	private String cacheDirectory = "pattern-cache/db";
+	private CacheEx cache;
 	private SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpedia();
+	private QueryExecutionFactory qef;
 	
-	private SparqlEndpointKS ks = new SparqlEndpointKS(endpoint, cache);//new LocalModelBasedSparqlEndpointKS(model);
+	private SparqlEndpointKS ks = new SparqlEndpointKS(endpoint);//new LocalModelBasedSparqlEndpointKS(model);
 	private String ns = "http://dbpedia.org/ontology/";
 	
 	private DecimalFormat format = new DecimalFormat("00.0%");
@@ -185,6 +190,25 @@
 			e.printStackTrace();
 		}
 		
+		if(ks.isRemote()){
+			qef = new QueryExecutionFactoryHttp(endpoint.getURL().toString(), endpoint.getDefaultGraphURIs());
+			if(cacheDirectory != null){
+				try {
+					long timeToLive = TimeUnit.DAYS.toMillis(30);
+					CacheCoreEx cacheBackend = CacheCoreH2.create(cacheDirectory, timeToLive, true);
+					cache = new CacheExImpl(cacheBackend);
+					qef = new QueryExecutionFactoryCacheEx(qef, cache);
+					ks.setCache(cache);
+				} catch (ClassNotFoundException e) {
+					e.printStackTrace();
+				} catch (SQLException e) {
+					e.printStackTrace();
+				}
+			}
+		} else {
+			qef = new QueryExecutionFactoryModel(((LocalModelBasedSparqlEndpointKS)ks).getModel());
+		}
+		
 		initDBConnection();
 		
 		samplesDir = new File("pattern-instantiations-samples");
@@ -730,7 +754,8 @@
 		}
 		filterModel(fragment);
 		logger.info("...got " + fragment.size() + " triples ");
-		ResultSet rs = QueryExecutionFactory.create("SELECT (COUNT(DISTINCT ?s) AS ?cnt) WHERE {?s a <" + cls.getName() + ">. }", fragment).execSelect();
+		QueryExecutionFactory qef = new QueryExecutionFactoryModel(fragment);
+		ResultSet rs = qef.createQueryExecution("SELECT (COUNT(DISTINCT ?s) AS ?cnt) WHERE {?s a <" + cls.getName() + ">. }").execSelect();
 		int nrOfInstances = rs.next().getLiteral("cnt").getInt();
 		logger.info("with " + nrOfInstances + " instances of class " + cls.getName());
 		fragmentStatistics.addValue(nrOfInstances);
@@ -928,14 +953,15 @@
 		
 		//2. execute SPARQL query on local model 
 		query = QueryFactory.create("SELECT (COUNT(DISTINCT ?x) AS ?cnt) WHERE {" + converter.convert("?x", patternSubClass) + "}",Syntax.syntaxARQ);
-		int subClassCnt = QueryExecutionFactory.create(query, fragment).execSelect().next().getLiteral("cnt").getInt();
+		QueryExecutionFactory qef = new QueryExecutionFactoryModel(fragment);
+		int subClassCnt = qef.createQueryExecution(query).execSelect().next().getLiteral("cnt").getInt();
 		System.out.println(subClassCnt);
 		
 		Set<OWLEntity> signature = patternSuperClass.getSignature();
 		signature.remove(patternSubClass);
 		query = converter.asQuery("?x", df.getOWLObjectIntersectionOf(patternSubClass, patternSuperClass), signature, true);
 		Map<OWLEntity, String> variablesMapping = converter.getVariablesMapping();
-		com.hp.hpl.jena.query.ResultSet rs = QueryExecutionFactory.create(query, fragment).execSelect();
+		com.hp.hpl.jena.query.ResultSet rs = qef.createQueryExecution(query).execSelect();
 		QuerySolution qs;
 		while(rs.hasNext()){
 			qs = rs.next();
@@ -1032,7 +1058,8 @@
 		Query query = converter.asQuery("?x", df.getOWLObjectIntersectionOf(cls, patternSuperClass), signature);
 		logger.info("Running query\n" + query);
 		Map<OWLEntity, String> variablesMapping = converter.getVariablesMapping();
-		com.hp.hpl.jena.query.ResultSet rs = QueryExecutionFactory.create(query, fragment).execSelect();
+		QueryExecutionFactory qef = new QueryExecutionFactoryModel(fragment);
+		com.hp.hpl.jena.query.ResultSet rs = qef.createQueryExecution(query).execSelect();
 		QuerySolution qs;
 		Set<String> resources = new HashSet<String>();
 		Multiset<OWLAxiom> instantiations = HashMultiset.create();
@@ -1106,14 +1133,15 @@
 		Query query = QueryFactory.create(
 				"SELECT (COUNT(DISTINCT ?x) AS ?cnt) WHERE {" + converter.convert("?x", patternSubClass) + "}",
 				Syntax.syntaxARQ);
-		int subClassCnt = QueryExecutionFactory.create(query, fragment).execSelect().next().getLiteral("cnt").getInt();
+		QueryExecutionFactory qef = new QueryExecutionFactoryModel(fragment);
+		int subClassCnt = qef.createQueryExecution(query).execSelect().next().getLiteral("cnt").getInt();
 
 		Set<OWLEntity> signature = patternSuperClass.getSignature();
 		signature.remove(patternSubClass);
 		query = converter.asQuery("?x", df.getOWLObjectIntersectionOf(patternSubClass, patternSuperClass), signature, true);
 		logger.info("Running query\n" + query);
 		Map<OWLEntity, String> variablesMapping = converter.getVariablesMapping();
-		com.hp.hpl.jena.query.ResultSet rs = QueryExecutionFactory.create(query, fragment).execSelect();
+		com.hp.hpl.jena.query.ResultSet rs = qef.createQueryExecution(query).execSelect();
 		QuerySolution qs;
 		while (rs.hasNext()) {
 			qs = rs.next();
@@ -1233,146 +1261,41 @@
 	}
 	
 	protected com.hp.hpl.jena.query.ResultSet executeSelectQuery(Query query) {
-		com.hp.hpl.jena.query.ResultSet rs = null;
-		if(ks.isRemote()){
-			SparqlEndpoint endpoint = ((SparqlEndpointKS) ks).getEndpoint();
-			ExtractionDBCache cache = ks.getCache();
-			if(cache != null){
-				rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query.toString()));
-			} else {
-				QueryEngineHTTP queryExecution = new QueryEngineHTTP(endpoint.getURL().toString(),
-						query);
-				queryExecution.setDefaultGraphURIs(endpoint.getDefaultGraphURIs());
-				queryExecution.setNamedGraphURIs(endpoint.getNamedGraphURIs());
-				try {
-					rs = queryExecution.execSelect();
-					return rs;
-				} catch (QueryExceptionHTTP e) {
-					if(e.getCause() instanceof SocketTimeoutException){
-						logger.warn("Got timeout");
-					} else {
-						logger.error("Exception executing query", e);
-					}
-				}
-			}
-			try {
-				Thread.sleep(100);
-			} catch (InterruptedException e) {
-				e.printStackTrace();
-			}
-			
-		} else {
-			QueryExecution queryExecution = QueryExecutionFactory.create(query, ((LocalModelBasedSparqlEndpointKS)ks).getModel());
-			rs = queryExecution.execSelect();
-		}
+		com.hp.hpl.jena.query.ResultSet rs = qef.createQueryExecution(query).execSelect();
 		return rs;
 	}
 	
 	protected com.hp.hpl.jena.query.ResultSet executeSelectQuery(Query query, boolean cached) {
-		com.hp.hpl.jena.query.ResultSet rs = null;
-		if(ks.isRemote()){
-			SparqlEndpoint endpoint = ((SparqlEndpointKS) ks).getEndpoint();
-			ExtractionDBCache cache = ks.getCache();
-			if(cache != null && cached){
-				rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query.toString()));
-			} else {
-				QueryEngineHTTP queryExecution = new QueryEngineHTTP(endpoint.getURL().toString(),
-						query);
-				queryExecution.setDefaultGraphURIs(endpoint.getDefaultGraphURIs());
-				queryExecution.setNamedGraphURIs(endpoint.getNamedGraphURIs());
-				try {
-					rs = queryExecution.execSelect();
-					return rs;
-				} catch (QueryExceptionHTTP e) {
-					if(e.getCause() instanceof SocketTimeoutException){
-						logger.warn("Got timeout");
-					} else {
-						logger.error("Exception executing query", e);
-					}
-				}
-			}
-			
-		} else {
-			QueryExecution queryExecution = QueryExecutionFactory.create(query, ((LocalModelBasedSparqlEndpointKS)ks).getModel());
-			rs = queryExecution.execSelect();
-		}
+		com.hp.hpl.jena.query.ResultSet rs = qef.createQueryExecution(query).execSelect();
 		return rs;
 	}
 	
 	protected Model executeConstructQuery(Query query, long timeout) {
-		if(ks.isRemote()){
-			SparqlEndpoint endpoint = ((SparqlEndpointKS) ks).getEndpoint();
-			ExtractionDBCache cache = ks.getCache();
-			Model model = null;
-			try {
-//				if(cache != null){
-//					try {
-//						model = cache.executeConstructQuery(endpoint, query.toString());
-//					} catch (UnsupportedEncodingException e) {
-//						e.printStackTrace();
-//					} catch (SQLException e) {
-//						e.printStackTrace();
-//					}
-//				} else {
-					QueryEngineHTTP queryExecution = new QueryEngineHTTP(endpoint.getURL().toString(),
-							query);
-					queryExecution.setDefaultGraphURIs(endpoint.getDefaultGraphURIs());
-					queryExecution.setNamedGraphURIs(endpoint.getNamedGraphURIs());
-					queryExecution.setTimeout(timeout, timeout);
-					model = queryExecution.execConstruct();
-//				}
-				logger.debug("Got " + model.size() + " triples.");
-				return model;
-			} catch (QueryExceptionHTTP e) {
-				if(e.getCause() instanceof SocketTimeoutException){
-					logger.warn("Got timeout");
-				} else {
-					logger.error("Exception executing query", e);
-				}
-				return ModelFactory.createDefaultModel();
+		QueryExecution qe = qef.createQueryExecution(query);
+		qe.setTimeout(timeout);
+		try {
+			return qe.execConstruct();
+		} catch (QueryExceptionHTTP e) {
+			if(e.getCause() instanceof SocketTimeoutException){
+				logger.warn("Got timeout");
+			} else {
+				logger.error("Exception executing query", e);
 			}
-		} else {
-			QueryExecution queryExecution = QueryExecutionFactory.create(query, ((LocalModelBasedSparqlEndpointKS)ks).getModel());
-			Model model = queryExecution.execConstruct();
-			return model;
+			return ModelFactory.createDefaultModel();
 		}
 	}
 	
 	protected Model executeConstructQuery(Query query) {
-		if(ks.isRemote()){
-			SparqlEndpoint endpoint = ((SparqlEndpointKS) ks).getEndpoint();
-			ExtractionDBCache cache = ks.getCache();
-			Model model = null;
-			try {
-				if(cache != null){
-					try {
-						model = cache.executeConstructQuery(endpoint, query.toString());
-					} catch (UnsupportedEncodingException e) {
-						e.printStackTrace();
-					} catch (SQLException e) {
-						e.printStackTrace();
-					}
-				} else {
-					QueryEngineHTTP queryExecution = new QueryEngineHTTP(endpoint.getURL().toString(),
-							query);
-					queryExecution.setDefaultGraphURIs(endpoint.getDefaultGraphURIs());
-					queryExecution.setNamedGraphURIs(endpoint.getNamedGraphURIs());
-					model = queryExecution.execConstruct();
-				}
-				logger.debug("Got " + model.size() + " triples.");
-				return model;
-			} catch (QueryExceptionHTTP e) {
-				if(e.getCause() instanceof SocketTimeoutException){
-					logger.warn("Got timeout");
-				} else {
-					logger.error("Exception executing query", e);
-				}
-				return ModelFactory.createDefaultModel();
+		QueryExecution qe = qef.createQueryExecution(query);
+		try {
+			return qe.execConstruct();
+		} catch (QueryExceptionHTTP e) {
+			if(e.getCause() instanceof SocketTimeoutException){
+				logger.warn("Got timeout");
+			} else {
+				logger.error("Exception executing query", e);
 			}
-		} else {
-			QueryExecution queryExecution = QueryExecutionFactory.create(query, ((LocalModelBasedSparqlEndpointKS)ks).getModel());
-			Model model = queryExecution.execConstruct();
-			return model;
+			return ModelFactory.createDefaultModel();
 		}
 	}
 	

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[4118] trunk/components-core/src/main/java/org/ dllearner/algorithms/DisjointClassesLearner.java

From: <lor...@us...> - 2013-10-05 09:10:04

Revision: 4118
          http://sourceforge.net/p/dl-learner/code/4118
Author:   lorenz_b
Date:     2013-10-05 09:10:00 +0000 (Sat, 05 Oct 2013)
Log Message:
-----------
Refined disjointness learner.

Modified Paths:
--------------
    trunk/components-core/src/main/java/org/dllearner/algorithms/DisjointClassesLearner.java

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/DisjointClassesLearner.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/DisjointClassesLearner.java	2013-10-04 21:00:12 UTC (rev 4117)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/DisjointClassesLearner.java	2013-10-05 09:10:00 UTC (rev 4118)
@@ -568,9 +568,7 @@
 		
 		for (NamedClass clsA : classes) {
 			for (NamedClass clsB : classes) {
-				if(!clsA.equals(clsB)){
-					axioms.add(computeDisjointess(clsA, clsB));
-				}
+				axioms.add(computeDisjointess(clsA, clsB));
 			}
 		}
 		

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[4117] trunk/components-core/src/main/java/org/ dllearner

From: <lor...@us...> - 2013-10-04 21:00:15

Revision: 4117
          http://sourceforge.net/p/dl-learner/code/4117
Author:   lorenz_b
Date:     2013-10-04 21:00:12 +0000 (Fri, 04 Oct 2013)
Log Message:
-----------
Adapted log levels.

Modified Paths:
--------------
    trunk/components-core/src/main/java/org/dllearner/algorithms/DisjointClassesLearner.java
    trunk/components-core/src/main/java/org/dllearner/core/AbstractAxiomLearningAlgorithm.java
    trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/DisjointClassesLearner.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/DisjointClassesLearner.java	2013-10-03 10:30:55 UTC (rev 4116)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/DisjointClassesLearner.java	2013-10-04 21:00:12 UTC (rev 4117)
@@ -505,9 +505,19 @@
 		return evaluatedDescriptions;
 	}
 	
-	private EvaluatedAxiom computeDisjointess(NamedClass clsA, NamedClass clsB){
+	public EvaluatedAxiom computeDisjointess(NamedClass clsA, NamedClass clsB){
 		logger.debug("Computing disjointness between " + clsA + " and " + clsB + "...");
 		
+		//if clsA = clsB
+		if(clsA.equals(clsB)){
+			return new EvaluatedAxiom(new DisjointClassesAxiom(clsA, clsB), new AxiomScore(0d, 1d));
+		};
+		
+		//if the classes are connected via subsumption we assume that they are not disjoint 
+		if(reasoner.isSuperClassOf(clsA, clsB) || reasoner.isSuperClassOf(clsB, clsA)){
+			return new EvaluatedAxiom(new DisjointClassesAxiom(clsA, clsB), new AxiomScore(0d, 1d));
+		};
+		
 		double scoreValue = 0;
 		
 		//get number of instances of A

Modified: trunk/components-core/src/main/java/org/dllearner/core/AbstractAxiomLearningAlgorithm.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/core/AbstractAxiomLearningAlgorithm.java	2013-10-03 10:30:55 UTC (rev 4116)
+++ trunk/components-core/src/main/java/org/dllearner/core/AbstractAxiomLearningAlgorithm.java	2013-10-04 21:00:12 UTC (rev 4117)
@@ -278,7 +278,7 @@
 	}
 	
 	protected Model executeConstructQuery(String query) {
-		logger.debug("Sending query\n{} ...", query);
+		logger.trace("Sending query\n{} ...", query);
 		if(ks.isRemote()){
 			SparqlEndpoint endpoint = ((SparqlEndpointKS) ks).getEndpoint();
 			QueryEngineHTTP queryExecution = new QueryEngineHTTP(endpoint.getURL().toString(),
@@ -315,7 +315,7 @@
 	}
 	
 	protected ResultSet executeSelectQuery(String query) {
-		logger.info("Sending query\n{} ...", query);
+		logger.trace("Sending query\n{} ...", query);
 		if(ks.isRemote()){
 			SparqlEndpoint endpoint = ((SparqlEndpointKS) ks).getEndpoint();
 			QueryEngineHTTP queryExecution = new QueryEngineHTTP(endpoint.getURL().toString(),
@@ -347,7 +347,7 @@
 	}
 	
 	protected ResultSet executeSelectQuery(String query, Model model) {
-		logger.debug("Sending query on local model\n{} ...", query);
+		logger.trace("Sending query on local model\n{} ...", query);
 		QueryExecution qexec = QueryExecutionFactory.create(QueryFactory.create(query, Syntax.syntaxARQ), model);
 		ResultSet rs = qexec.execSelect();;
 
@@ -355,7 +355,7 @@
 	}
 	
 	protected boolean executeAskQuery(String query){
-		logger.debug("Sending query\n{} ...", query);
+		logger.trace("Sending query\n{} ...", query);
 		if(ks.isRemote()){
 			SparqlEndpoint endpoint = ((SparqlEndpointKS) ks).getEndpoint();
 			QueryEngineHTTP queryExecution = new QueryEngineHTTP(endpoint.getURL().toString(), query);

Modified: trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java	2013-10-03 10:30:55 UTC (rev 4116)
+++ trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java	2013-10-04 21:00:12 UTC (rev 4117)
@@ -592,6 +592,22 @@
 		}
 		return types;
 	}
+	
+	public Set<NamedClass> getTypes(Individual individual, String namespace) {
+		Set<NamedClass> types = new HashSet<NamedClass>();
+		String query = "SELECT DISTINCT ?class WHERE {<" + individual.getName() + "> a ?class.";
+		if(namespace != null){
+			query += "FILTER(REGEX(STR(?class),'^" + namespace + "'))";
+		}
+		query += "}";
+		ResultSet rs = executeSelectQuery(query);
+		QuerySolution qs;
+		while(rs.hasNext()){
+			qs = rs.next();
+			types.add(new NamedClass(qs.getResource("class").getURI()));
+		}
+		return types;
+	}
 
 	public Set<NamedClass> getTypes() {
 		return getTypes((String)null);
@@ -1638,14 +1654,14 @@
 	}
 
 	private ResultSet executeSelectQuery(String query){
-		logger.debug("Sending query \n {}", query);
+		logger.trace("Sending query \n {}", query);
 		QueryExecution qe = qef.createQueryExecution(query);
 		ResultSet rs = qe.execSelect();
 		return rs;
 	}
 
 	private ResultSet executeSelectQuery(String query, long timeout){
-		logger.debug("Sending query \n {}", query);
+		logger.trace("Sending query \n {}", query);
 		QueryExecution qe = qef.createQueryExecution(query);
 		qe.setTimeout(timeout);
 		ResultSet rs = qe.execSelect();
@@ -1653,7 +1669,7 @@
 	}
 	
 	private boolean executeAskQuery(String query){
-		logger.debug("Sending query \n {}", query);
+		logger.trace("Sending query \n {}", query);
 		QueryExecution qe = qef.createQueryExecution(query);
 		boolean ret = qe.execAsk();
 		return ret;

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[4116] trunk/components-core/src/main/java/org/ dllearner

From: <lor...@us...> - 2013-10-03 10:30:58

Revision: 4116
          http://sourceforge.net/p/dl-learner/code/4116
Author:   lorenz_b
Date:     2013-10-03 10:30:55 +0000 (Thu, 03 Oct 2013)
Log Message:
-----------
Added equals + hashcode to disjointness axiom.

Modified Paths:
--------------
    trunk/components-core/src/main/java/org/dllearner/algorithms/DisjointClassesLearner.java
    trunk/components-core/src/main/java/org/dllearner/core/owl/DisjointClassesAxiom.java

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/DisjointClassesLearner.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/DisjointClassesLearner.java	2013-10-03 09:14:35 UTC (rev 4115)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/DisjointClassesLearner.java	2013-10-03 10:30:55 UTC (rev 4116)
@@ -132,8 +132,6 @@
 			return;
 		}
 		
-		computeAllDisjointClassAxiomsOptimized();
-		
 		//at first get all existing classes in knowledge base
 		allClasses = getAllClasses();
 		allClasses.remove(classToDescribe);

Modified: trunk/components-core/src/main/java/org/dllearner/core/owl/DisjointClassesAxiom.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/core/owl/DisjointClassesAxiom.java	2013-10-03 09:14:35 UTC (rev 4115)
+++ trunk/components-core/src/main/java/org/dllearner/core/owl/DisjointClassesAxiom.java	2013-10-03 10:30:55 UTC (rev 4116)
@@ -119,6 +119,39 @@
 		}
 		sb.append(")");
 		return sb.toString();
+	}
+
+	/* (non-Javadoc)
+	 * @see java.lang.Object#hashCode()
+	 */
+	@Override
+	public int hashCode() {
+		final int prime = 31;
+		int result = 1;
+		result = prime * result + ((descriptions == null) ? 0 : descriptions.hashCode());
+		return result;
+	}
+
+	/* (non-Javadoc)
+	 * @see java.lang.Object#equals(java.lang.Object)
+	 */
+	@Override
+	public boolean equals(Object obj) {
+		if (this == obj)
+			return true;
+		if (obj == null)
+			return false;
+		if (getClass() != obj.getClass())
+			return false;
+		DisjointClassesAxiom other = (DisjointClassesAxiom) obj;
+		if (descriptions == null) {
+			if (other.descriptions != null)
+				return false;
+		} else if (!descriptions.equals(other.descriptions))
+			return false;
+		return true;
 	}	
 	
+	
+	
 }

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[4115] trunk/components-core/src/main/java/org/ dllearner/reasoning/SPARQLReasoner.java

From: <lor...@us...> - 2013-10-03 09:14:39

Revision: 4115
          http://sourceforge.net/p/dl-learner/code/4115
Author:   lorenz_b
Date:     2013-10-03 09:14:35 +0000 (Thu, 03 Oct 2013)
Log Message:
-----------
Added method which returns a set of domains and ranges instead of the intersection.

Modified Paths:
--------------
    trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java

Modified: trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java	2013-10-03 08:53:06 UTC (rev 4114)
+++ trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java	2013-10-03 09:14:35 UTC (rev 4115)
@@ -570,7 +570,7 @@
 		return schema;
 	}
 
-	private Model loadIncrementally(String query){System.err.println(query);
+	private Model loadIncrementally(String query){
 		QueryExecutionFactory old = qef;
 		qef = new QueryExecutionFactoryPaginated(qef, 10000);
 		QueryExecution qe = qef.createQueryExecution(query);
@@ -1200,7 +1200,24 @@
 		} 
 		return null;
 	}
+	
+	public SortedSet<NamedClass> getDomains(ObjectProperty objectProperty) {
+		String query = String.format("SELECT ?domain WHERE {" +
+				"<%s> <%s> ?domain. FILTER(isIRI(?domain))" +
+				"}", 
+				objectProperty.getName(), RDFS.domain.getURI());
 
+		ResultSet rs = executeSelectQuery(query);
+		QuerySolution qs;
+		SortedSet<NamedClass> domains = new TreeSet<NamedClass>();
+		while(rs.hasNext()){
+			qs = rs.next();
+			domains.add(new NamedClass(qs.getResource("domain").getURI()));
+
+		}
+		return domains;
+	}
+
 	@Override
 	public Description getDomain(DatatypeProperty datatypeProperty) {
 		String query = String.format("SELECT ?domain WHERE {" +
@@ -1237,7 +1254,6 @@
 		while(rs.hasNext()){
 			qs = rs.next();
 			ranges.add(new NamedClass(qs.getResource("range").getURI()));
-
 		}
 		if(ranges.size() == 1){
 			return ranges.get(0);
@@ -1246,7 +1262,23 @@
 		} 
 		return null;
 	}
+	
+	public SortedSet<NamedClass> getRanges(ObjectProperty objectProperty) {
+		String query = String.format("SELECT ?range WHERE {" +
+				"<%s> <%s> ?range. FILTER(isIRI(?range))" +
+				"}", 
+				objectProperty.getName(), RDFS.range.getURI());
 
+		ResultSet rs = executeSelectQuery(query);
+		QuerySolution qs;
+		SortedSet<NamedClass> ranges = new TreeSet<NamedClass>();
+		while(rs.hasNext()){
+			qs = rs.next();
+			ranges.add(new NamedClass(qs.getResource("range").getURI()));
+		}
+		return ranges;
+	}
+
 	public boolean isObjectProperty(String propertyURI){
 		String query = String.format("ASK {<%s> a <%s>}", propertyURI, OWL.ObjectProperty.getURI());
 		boolean isObjectProperty = executeAskQuery(query);

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[4114] trunk/components-core/src/main

From: <lor...@us...> - 2013-10-03 08:53:10

Revision: 4114
          http://sourceforge.net/p/dl-learner/code/4114
Author:   lorenz_b
Date:     2013-10-03 08:53:06 +0000 (Thu, 03 Oct 2013)
Log Message:
-----------
Moved WSD into separate package.

Added Paths:
-----------
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/StructuralEntityContext.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/RandomWordSenseDisambiguation.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/SimpleWordSenseDisambiguation.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/StructureBasedWordSenseDisambiguation.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/WordSenseDisambiguation.java
    trunk/components-core/src/main/resources/log4j.properties

Removed Paths:
-------------
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/RandomWordSenseDisambiguation.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/SimpleWordSenseDisambiguation.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/WordSenseDisambiguation.java

Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/RandomWordSenseDisambiguation.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/RandomWordSenseDisambiguation.java	2013-10-03 08:50:57 UTC (rev 4113)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/RandomWordSenseDisambiguation.java	2013-10-03 08:53:06 UTC (rev 4114)
@@ -1,59 +0,0 @@
-/**
- * Copyright (C) 2007-2013, Jens Lehmann
- *
- * This file is part of DL-Learner.
- *
- * DL-Learner is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * DL-Learner is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-package org.dllearner.algorithms.isle;
-
-import java.util.Random;
-import java.util.Set;
-
-import org.dllearner.algorithms.isle.index.Annotation;
-import org.dllearner.algorithms.isle.index.SemanticAnnotation;
-import org.dllearner.core.owl.Entity;
-import org.semanticweb.owlapi.model.OWLOntology;
-
-/**
- * Disambiguation by randomly selecting one of the candidates (baseline method).
- * 
- * @author Jens Lehmann
- *
- */
-public class RandomWordSenseDisambiguation extends WordSenseDisambiguation {
-
-	private Random random;
-	
-	public RandomWordSenseDisambiguation(OWLOntology ontology) {
-		super(ontology);
-		random = new Random();
-	}
-
-	@Override
-	public SemanticAnnotation disambiguate(Annotation annotation,
-			Set<Entity> candidateEntities) {
-		int pos = random.nextInt(candidateEntities.size());
-		int i = 0;
-		for(Entity e : candidateEntities)
-		{
-		    if (i == pos) {
-		    	return new SemanticAnnotation(annotation, e);
-		    }
-		    i++;
-		}
-		return null;
-	}
-
-}

Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/SimpleWordSenseDisambiguation.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/SimpleWordSenseDisambiguation.java	2013-10-03 08:50:57 UTC (rev 4113)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/SimpleWordSenseDisambiguation.java	2013-10-03 08:53:06 UTC (rev 4114)
@@ -1,107 +0,0 @@
-/**
- * 
- */
-package org.dllearner.algorithms.isle;
-
-import java.util.HashSet;
-import java.util.Set;
-
-import org.apache.log4j.Logger;
-import org.dllearner.algorithms.isle.index.Annotation;
-import org.dllearner.algorithms.isle.index.SemanticAnnotation;
-import org.dllearner.core.owl.Entity;
-import org.dllearner.utilities.owl.OWLAPIConverter;
-import org.semanticweb.owlapi.model.IRI;
-import org.semanticweb.owlapi.model.OWLAnnotationAssertionAxiom;
-import org.semanticweb.owlapi.model.OWLAnnotationProperty;
-import org.semanticweb.owlapi.model.OWLDataFactory;
-import org.semanticweb.owlapi.model.OWLEntity;
-import org.semanticweb.owlapi.model.OWLLiteral;
-import org.semanticweb.owlapi.model.OWLOntology;
-import org.semanticweb.owlapi.util.IRIShortFormProvider;
-import org.semanticweb.owlapi.util.SimpleIRIShortFormProvider;
-
-import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl;
-
-/**
- * @author Lorenz Buehmann
- *
- */
-public class SimpleWordSenseDisambiguation extends WordSenseDisambiguation{
-	
-	
-	private static final Logger logger = Logger.getLogger(SimpleWordSenseDisambiguation.class.getName());
-	
-	private IRIShortFormProvider sfp = new SimpleIRIShortFormProvider();
-	private OWLDataFactory df = new OWLDataFactoryImpl();
-	private OWLAnnotationProperty annotationProperty = df.getRDFSLabel();
-
-	/**
-	 * @param ontology
-	 */
-	public SimpleWordSenseDisambiguation(OWLOntology ontology) {
-		super(ontology);
-	}
-
-	/* (non-Javadoc)
-	 * @see org.dllearner.algorithms.isle.WordSenseDisambiguation#disambiguate(org.dllearner.algorithms.isle.index.Annotation, java.util.Set)
-	 */
-	@Override
-	public SemanticAnnotation disambiguate(Annotation annotation, Set<Entity> candidateEntities) {
-		logger.debug("Linguistic annotations:\n" + annotation);
-		logger.debug("Candidate entities:" + candidateEntities);
-		String token = annotation.getToken().trim();
-		//check if annotation token matches label of entity or the part behind #(resp. /)
-		for (Entity entity : candidateEntities) {
-			Set<String> labels = getLabels(entity);
-			for (String label : labels) {
-				if(label.equals(token)){
-					logger.debug("Disambiguated entity: " + entity);
-					return new SemanticAnnotation(annotation, entity);
-				}
-			}
-			String shortForm = sfp.getShortForm(IRI.create(entity.getURI()));
-			if(annotation.equals(shortForm)){
-				logger.debug("Disambiguated entity: " + entity);
-				return new SemanticAnnotation(annotation, entity);
-			}
-		}
-		return null;
-	}
-	
-	private Set<String> getLabels(Entity entity){
-		Set<String> labels = new HashSet<String>();
-		OWLEntity owlEntity = OWLAPIConverter.getOWLAPIEntity(entity);
-		Set<OWLAnnotationAssertionAxiom> axioms = ontology.getAnnotationAssertionAxioms(owlEntity.getIRI());
-		for (OWLAnnotationAssertionAxiom annotation : axioms) {
-			if(annotation.getProperty().equals(annotationProperty)){
-				if (annotation.getValue() instanceof OWLLiteral) {
-                    OWLLiteral val = (OWLLiteral) annotation.getValue();
-                    labels.add(val.getLiteral());
-                }
-			}
-		}
-		return labels;
-	}
-	
-	private Set<String> getRelatedWordPhrases(Entity entity){
-		//add the labels if exist
-		Set<String> relatedWordPhrases = new HashSet<String>();
-		OWLEntity owlEntity = OWLAPIConverter.getOWLAPIEntity(entity);
-		Set<OWLAnnotationAssertionAxiom> axioms = ontology.getAnnotationAssertionAxioms(owlEntity.getIRI());
-		for (OWLAnnotationAssertionAxiom annotation : axioms) {
-			if(annotation.getProperty().equals(annotationProperty)){
-				if (annotation.getValue() instanceof OWLLiteral) {
-                    OWLLiteral val = (OWLLiteral) annotation.getValue();
-                    relatedWordPhrases.add(val.getLiteral());
-                }
-			}
-		}
-		//add the short form of the URI if no labels are available
-		if(relatedWordPhrases.isEmpty()){
-			relatedWordPhrases.add(sfp.getShortForm(IRI.create(entity.getURI())));
-		}
-		return relatedWordPhrases;
-	}
-
-}

Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/StructuralEntityContext.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/StructuralEntityContext.java	                        (rev 0)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/StructuralEntityContext.java	2013-10-03 08:53:06 UTC (rev 4114)
@@ -0,0 +1,207 @@
+/**
+ * 
+ */
+package org.dllearner.algorithms.isle;
+
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Set;
+
+import org.dllearner.core.owl.Entity;
+import org.dllearner.utilities.owl.OWLAPIConverter;
+import org.semanticweb.owlapi.model.AxiomType;
+import org.semanticweb.owlapi.model.OWLAnnotationAssertionAxiom;
+import org.semanticweb.owlapi.model.OWLAnnotationProperty;
+import org.semanticweb.owlapi.model.OWLAxiom;
+import org.semanticweb.owlapi.model.OWLClass;
+import org.semanticweb.owlapi.model.OWLDataFactory;
+import org.semanticweb.owlapi.model.OWLDataProperty;
+import org.semanticweb.owlapi.model.OWLEntity;
+import org.semanticweb.owlapi.model.OWLLiteral;
+import org.semanticweb.owlapi.model.OWLObjectProperty;
+import org.semanticweb.owlapi.model.OWLOntology;
+
+import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl;
+
+import com.google.common.collect.Sets;
+
+/**
+ * @author Lorenz Buehmann
+ *
+ */
+public class StructuralEntityContext {
+	
+	private static OWLDataFactory df = new OWLDataFactoryImpl();
+	private static Set<OWLAnnotationProperty> annotationProperties = Sets.newHashSet(
+			df.getRDFSLabel(),
+			df.getRDFSComment());
+	private static Set<String> languages = Sets.newHashSet("en");
+	
+	/**
+	 * Returns a set of words that describe entities related to the given entity.
+	 * @param ontology
+	 * @param entity
+	 * @return
+	 */
+	public static Set<String> getContextInNaturalLanguage(OWLOntology ontology, OWLEntity entity){
+		Set<String> context = new HashSet<String>();
+		
+		Set<OWLEntity> contextEntities = getContext(ontology, entity);
+		//add annotations for each entity
+		for (OWLEntity contextEntity : contextEntities) {
+			context.addAll(getAnnotations(ontology, contextEntity));
+		}
+		
+		return context;
+	}
+	
+	/**
+	 * Returns a set of words that describe entities related to the given entity.
+	 * @param ontology
+	 * @param entity
+	 * @return
+	 */
+	public static Set<String> getContextInNaturalLanguage(OWLOntology ontology, Entity entity){
+		Set<String> context = new HashSet<String>();
+		
+		Set<OWLEntity> contextEntities = getContext(ontology, entity);
+		//add annotations for each entity
+		for (OWLEntity contextEntity : contextEntities) {
+			context.addAll(getAnnotations(ontology, contextEntity));
+		}
+		
+		return context;
+	}
+	
+	/**
+	 * Returns a set of entities that are structural related to the given entity.
+	 * @param ontology
+	 * @param entity
+	 * @return
+	 */
+	public static Set<OWLEntity> getContext(OWLOntology ontology, OWLEntity entity){
+		
+		if(entity.isOWLClass()){
+			return getContext(ontology, entity.asOWLClass());
+		} else if(entity.isOWLObjectProperty()){
+			return getContext(ontology, entity.asOWLObjectProperty());
+		} else if(entity.isOWLDataProperty()){
+			return getContext(ontology, entity.asOWLDataProperty());
+		}
+		
+		throw new UnsupportedOperationException("Unsupported entity type: " + entity);
+	}
+	
+	/**
+	 * Returns a set of entities that are structural related to the given entity.
+	 * @param ontology
+	 * @param entity
+	 * @return
+	 */
+	public static Set<OWLEntity> getContext(OWLOntology ontology, Entity entity){
+		
+		OWLEntity owlEntity = OWLAPIConverter.getOWLAPIEntity(entity);
+		if(owlEntity.isOWLClass()){
+			return getContext(ontology, owlEntity.asOWLClass());
+		} else if(owlEntity.isOWLObjectProperty()){
+			return getContext(ontology, owlEntity.asOWLObjectProperty());
+		} else if(owlEntity.isOWLDataProperty()){
+			return getContext(ontology, owlEntity.asOWLDataProperty());
+		}
+		
+		throw new UnsupportedOperationException("Unsupported entity type: " + entity);
+	}
+	
+	public static Set<OWLEntity> getContext(OWLOntology ontology, OWLObjectProperty property){
+		Set<OWLEntity> context = new HashSet<OWLEntity>();
+		
+		Set<OWLAxiom> relatedAxioms = new HashSet<OWLAxiom>();
+		relatedAxioms.addAll(ontology.getObjectSubPropertyAxiomsForSubProperty(property));
+		relatedAxioms.addAll(ontology.getEquivalentObjectPropertiesAxioms(property));
+		relatedAxioms.addAll(ontology.getObjectPropertyDomainAxioms(property));
+		relatedAxioms.addAll(ontology.getObjectPropertyRangeAxioms(property));
+				
+		for (OWLAxiom axiom : relatedAxioms) {
+			context.addAll(axiom.getSignature());
+		}
+		
+		return context;
+	}
+	
+	public static Set<OWLEntity> getContext(OWLOntology ontology, OWLDataProperty property){
+		Set<OWLEntity> context = new HashSet<OWLEntity>();
+		
+		Set<OWLAxiom> relatedAxioms = new HashSet<OWLAxiom>();
+		relatedAxioms.addAll(ontology.getDataSubPropertyAxiomsForSubProperty(property));
+		relatedAxioms.addAll(ontology.getEquivalentDataPropertiesAxioms(property));
+		relatedAxioms.addAll(ontology.getDataPropertyDomainAxioms(property));
+		
+		for (OWLAxiom axiom : relatedAxioms) {
+			context.addAll(axiom.getSignature());
+		}
+		
+		return context;
+	}
+	
+	public static Set<OWLEntity> getContext(OWLOntology ontology, OWLClass cls){
+		Set<OWLEntity> context = new HashSet<OWLEntity>();
+		
+		Set<OWLAxiom> relatedAxioms = new HashSet<OWLAxiom>();
+		relatedAxioms.addAll(ontology.getSubClassAxiomsForSubClass(cls));
+		relatedAxioms.addAll(ontology.getEquivalentClassesAxioms(cls));
+		
+		//axioms where cls is domain of a property
+		Set<OWLAxiom> domainAxioms = new HashSet<OWLAxiom>();
+		domainAxioms.addAll(ontology.getAxioms(AxiomType.OBJECT_PROPERTY_DOMAIN));
+		domainAxioms.addAll(ontology.getAxioms(AxiomType.DATA_PROPERTY_DOMAIN));
+		for (Iterator<OWLAxiom> iterator = domainAxioms.iterator(); iterator.hasNext();) {
+			OWLAxiom axiom = iterator.next();
+			if(!axiom.getSignature().contains(cls)){
+				iterator.remove();
+			}
+		}
+		relatedAxioms.addAll(domainAxioms);
+		
+		//axioms where cls is range of a object property
+		Set<OWLAxiom> rangeAxioms = new HashSet<OWLAxiom>();
+		rangeAxioms.addAll(ontology.getAxioms(AxiomType.OBJECT_PROPERTY_RANGE));
+		for (Iterator<OWLAxiom> iterator = rangeAxioms.iterator(); iterator.hasNext();) {
+			OWLAxiom axiom = iterator.next();
+			if(!axiom.getSignature().contains(cls)){
+				iterator.remove();
+			}
+		}
+		relatedAxioms.addAll(rangeAxioms);
+		
+		for (OWLAxiom axiom : relatedAxioms) {
+			context.addAll(axiom.getSignature());
+		}
+		
+		return context;
+	}
+	
+	private static Set<String> getAnnotations(OWLOntology ontology, OWLEntity entity){
+		Set<String> annotations = new HashSet<String>();
+		Set<OWLAnnotationAssertionAxiom> axioms = ontology.getAnnotationAssertionAxioms(entity.getIRI());
+		for (OWLAnnotationAssertionAxiom annotation : axioms) {
+			if(annotationProperties.contains(annotation.getProperty())){
+				if (annotation.getValue() instanceof OWLLiteral) {
+                    OWLLiteral val = (OWLLiteral) annotation.getValue();
+                    if(val.getLang() != null && !val.getLang().isEmpty()){
+                    	if(languages.contains(val.getLang())){
+                    		if(!val.getLiteral().isEmpty()){
+                    			annotations.add(val.getLiteral());
+                    		}
+                    	}
+                    } else {
+                    	if(!val.getLiteral().isEmpty()){
+                			annotations.add(val.getLiteral());
+                		}
+                    }
+                }
+			}
+		}
+		return annotations;
+	}
+
+}

Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/WordSenseDisambiguation.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/WordSenseDisambiguation.java	2013-10-03 08:50:57 UTC (rev 4113)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/WordSenseDisambiguation.java	2013-10-03 08:53:06 UTC (rev 4114)
@@ -1,36 +0,0 @@
-package org.dllearner.algorithms.isle;
-
-import org.dllearner.algorithms.isle.index.Annotation;
-import org.dllearner.algorithms.isle.index.Document;
-import org.dllearner.algorithms.isle.index.SemanticAnnotation;
-import org.dllearner.core.owl.Entity;
-import org.semanticweb.owlapi.model.OWLOntology;
-
-import java.util.Set;
-
-/**
- * Abstract class for the word sense disambiguation component.
- *
- * @author Daniel Fleischhacker
- */
-public abstract class WordSenseDisambiguation {
-    OWLOntology ontology;
-
-    /**
-     * Initializes the word sense disambiguation to use the given ontology.
-     *
-     * @param ontology the ontology to disambiguate on
-     */
-    public WordSenseDisambiguation(OWLOntology ontology) {
-        this.ontology = ontology;
-    }
-
-    /**
-     * Chooses the correct entity for the given annotation from a set of candidate entities.
-     *
-     * @param annotation        the annotation to find entity for
-     * @param candidateEntities the set of candidate entities
-     * @return semantic annotation containing the given annotation and the chosen entity
-     */
-    public abstract SemanticAnnotation disambiguate(Annotation annotation, Set<Entity> candidateEntities);
-}

Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/RandomWordSenseDisambiguation.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/RandomWordSenseDisambiguation.java	                        (rev 0)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/RandomWordSenseDisambiguation.java	2013-10-03 08:53:06 UTC (rev 4114)
@@ -0,0 +1,59 @@
+/**
+ * Copyright (C) 2007-2013, Jens Lehmann
+ *
+ * This file is part of DL-Learner.
+ *
+ * DL-Learner is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * DL-Learner is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+package org.dllearner.algorithms.isle.wsd;
+
+import java.util.Random;
+import java.util.Set;
+
+import org.dllearner.algorithms.isle.index.Annotation;
+import org.dllearner.algorithms.isle.index.SemanticAnnotation;
+import org.dllearner.core.owl.Entity;
+import org.semanticweb.owlapi.model.OWLOntology;
+
+/**
+ * Disambiguation by randomly selecting one of the candidates (baseline method).
+ * 
+ * @author Jens Lehmann
+ *
+ */
+public class RandomWordSenseDisambiguation extends WordSenseDisambiguation {
+
+	private Random random;
+	
+	public RandomWordSenseDisambiguation(OWLOntology ontology) {
+		super(ontology);
+		random = new Random();
+	}
+
+	@Override
+	public SemanticAnnotation disambiguate(Annotation annotation,
+			Set<Entity> candidateEntities) {
+		int pos = random.nextInt(candidateEntities.size());
+		int i = 0;
+		for(Entity e : candidateEntities)
+		{
+		    if (i == pos) {
+		    	return new SemanticAnnotation(annotation, e);
+		    }
+		    i++;
+		}
+		return null;
+	}
+
+}

Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/SimpleWordSenseDisambiguation.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/SimpleWordSenseDisambiguation.java	                        (rev 0)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/SimpleWordSenseDisambiguation.java	2013-10-03 08:53:06 UTC (rev 4114)
@@ -0,0 +1,107 @@
+/**
+ * 
+ */
+package org.dllearner.algorithms.isle.wsd;
+
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.log4j.Logger;
+import org.dllearner.algorithms.isle.index.Annotation;
+import org.dllearner.algorithms.isle.index.SemanticAnnotation;
+import org.dllearner.core.owl.Entity;
+import org.dllearner.utilities.owl.OWLAPIConverter;
+import org.semanticweb.owlapi.model.IRI;
+import org.semanticweb.owlapi.model.OWLAnnotationAssertionAxiom;
+import org.semanticweb.owlapi.model.OWLAnnotationProperty;
+import org.semanticweb.owlapi.model.OWLDataFactory;
+import org.semanticweb.owlapi.model.OWLEntity;
+import org.semanticweb.owlapi.model.OWLLiteral;
+import org.semanticweb.owlapi.model.OWLOntology;
+import org.semanticweb.owlapi.util.IRIShortFormProvider;
+import org.semanticweb.owlapi.util.SimpleIRIShortFormProvider;
+
+import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl;
+
+/**
+ * @author Lorenz Buehmann
+ *
+ */
+public class SimpleWordSenseDisambiguation extends WordSenseDisambiguation{
+	
+	
+	private static final Logger logger = Logger.getLogger(SimpleWordSenseDisambiguation.class.getName());
+	
+	private IRIShortFormProvider sfp = new SimpleIRIShortFormProvider();
+	private OWLDataFactory df = new OWLDataFactoryImpl();
+	private OWLAnnotationProperty annotationProperty = df.getRDFSLabel();
+
+	/**
+	 * @param ontology
+	 */
+	public SimpleWordSenseDisambiguation(OWLOntology ontology) {
+		super(ontology);
+	}
+
+	/* (non-Javadoc)
+	 * @see org.dllearner.algorithms.isle.WordSenseDisambiguation#disambiguate(org.dllearner.algorithms.isle.index.Annotation, java.util.Set)
+	 */
+	@Override
+	public SemanticAnnotation disambiguate(Annotation annotation, Set<Entity> candidateEntities) {
+		logger.debug("Linguistic annotations:\n" + annotation);
+		logger.debug("Candidate entities:" + candidateEntities);
+		String token = annotation.getToken().trim();
+		//check if annotation token matches label of entity or the part behind #(resp. /)
+		for (Entity entity : candidateEntities) {
+			Set<String> labels = getLabels(entity);
+			for (String label : labels) {
+				if(label.equals(token)){
+					logger.debug("Disambiguated entity: " + entity);
+					return new SemanticAnnotation(annotation, entity);
+				}
+			}
+			String shortForm = sfp.getShortForm(IRI.create(entity.getURI()));
+			if(annotation.equals(shortForm)){
+				logger.debug("Disambiguated entity: " + entity);
+				return new SemanticAnnotation(annotation, entity);
+			}
+		}
+		return null;
+	}
+	
+	private Set<String> getLabels(Entity entity){
+		Set<String> labels = new HashSet<String>();
+		OWLEntity owlEntity = OWLAPIConverter.getOWLAPIEntity(entity);
+		Set<OWLAnnotationAssertionAxiom> axioms = ontology.getAnnotationAssertionAxioms(owlEntity.getIRI());
+		for (OWLAnnotationAssertionAxiom annotation : axioms) {
+			if(annotation.getProperty().equals(annotationProperty)){
+				if (annotation.getValue() instanceof OWLLiteral) {
+                    OWLLiteral val = (OWLLiteral) annotation.getValue();
+                    labels.add(val.getLiteral());
+                }
+			}
+		}
+		return labels;
+	}
+	
+	private Set<String> getRelatedWordPhrases(Entity entity){
+		//add the labels if exist
+		Set<String> relatedWordPhrases = new HashSet<String>();
+		OWLEntity owlEntity = OWLAPIConverter.getOWLAPIEntity(entity);
+		Set<OWLAnnotationAssertionAxiom> axioms = ontology.getAnnotationAssertionAxioms(owlEntity.getIRI());
+		for (OWLAnnotationAssertionAxiom annotation : axioms) {
+			if(annotation.getProperty().equals(annotationProperty)){
+				if (annotation.getValue() instanceof OWLLiteral) {
+                    OWLLiteral val = (OWLLiteral) annotation.getValue();
+                    relatedWordPhrases.add(val.getLiteral());
+                }
+			}
+		}
+		//add the short form of the URI if no labels are available
+		if(relatedWordPhrases.isEmpty()){
+			relatedWordPhrases.add(sfp.getShortForm(IRI.create(entity.getURI())));
+		}
+		return relatedWordPhrases;
+	}
+
+}

Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/StructureBasedWordSenseDisambiguation.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/StructureBasedWordSenseDisambiguation.java	                        (rev 0)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/StructureBasedWordSenseDisambiguation.java	2013-10-03 08:53:06 UTC (rev 4114)
@@ -0,0 +1,41 @@
+/**
+ * 
+ */
+package org.dllearner.algorithms.isle.wsd;
+
+import java.util.Set;
+
+import org.dllearner.algorithms.isle.StructuralEntityContext;
+import org.dllearner.algorithms.isle.index.Annotation;
+import org.dllearner.algorithms.isle.index.SemanticAnnotation;
+import org.dllearner.core.owl.Entity;
+import org.semanticweb.owlapi.model.OWLEntity;
+import org.semanticweb.owlapi.model.OWLOntology;
+
+/**
+ * @author Lorenz Buehmann
+ *
+ */
+public class StructureBasedWordSenseDisambiguation extends WordSenseDisambiguation{
+
+	/**
+	 * @param ontology
+	 */
+	public StructureBasedWordSenseDisambiguation(OWLOntology ontology) {
+		super(ontology);
+	}
+
+	/* (non-Javadoc)
+	 * @see org.dllearner.algorithms.isle.wsd.WordSenseDisambiguation#disambiguate(org.dllearner.algorithms.isle.index.Annotation, java.util.Set)
+	 */
+	@Override
+	public SemanticAnnotation disambiguate(Annotation annotation, Set<Entity> candidateEntities) {
+		//TODO we should find the sentence in which the annotated token is contained in
+		String content = annotation.getReferencedDocument().getContent();
+		for (Entity entity : candidateEntities) {
+			Set<String> entityContext = StructuralEntityContext.getContextInNaturalLanguage(ontology, entity);
+		}
+		return null;
+	}
+
+}

Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/WordSenseDisambiguation.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/WordSenseDisambiguation.java	                        (rev 0)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/WordSenseDisambiguation.java	2013-10-03 08:53:06 UTC (rev 4114)
@@ -0,0 +1,35 @@
+package org.dllearner.algorithms.isle.wsd;
+
+import java.util.Set;
+
+import org.dllearner.algorithms.isle.index.Annotation;
+import org.dllearner.algorithms.isle.index.SemanticAnnotation;
+import org.dllearner.core.owl.Entity;
+import org.semanticweb.owlapi.model.OWLOntology;
+
+/**
+ * Abstract class for the word sense disambiguation component.
+ *
+ * @author Daniel Fleischhacker
+ */
+public abstract class WordSenseDisambiguation {
+    OWLOntology ontology;
+
+    /**
+     * Initializes the word sense disambiguation to use the given ontology.
+     *
+     * @param ontology the ontology to disambiguate on
+     */
+    public WordSenseDisambiguation(OWLOntology ontology) {
+        this.ontology = ontology;
+    }
+
+    /**
+     * Chooses the correct entity for the given annotation from a set of candidate entities.
+     *
+     * @param annotation        the annotation to find entity for
+     * @param candidateEntities the set of candidate entities
+     * @return semantic annotation containing the given annotation and the chosen entity
+     */
+    public abstract SemanticAnnotation disambiguate(Annotation annotation, Set<Entity> candidateEntities);
+}

Added: trunk/components-core/src/main/resources/log4j.properties
===================================================================
--- trunk/components-core/src/main/resources/log4j.properties	                        (rev 0)
+++ trunk/components-core/src/main/resources/log4j.properties	2013-10-03 08:53:06 UTC (rev 4114)
@@ -0,0 +1,17 @@
+# Direct log messages to stdout
+# Root logger option
+log4j.rootLogger=INFO,stdout
+
+log4j.appender.stdout=org.apache.log4j.ConsoleAppender
+log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
+#log4j.appender.stdout.layout.ConversionPattern=%d{ABSOLUTE} %p [%c] %L - %m%n
+log4j.appender.stdout.layout.ConversionPattern=%d{ABSOLUTE} [%c] - %m%n
+
+#File Appender
+log4j.appender.FA=org.apache.log4j.FileAppender
+log4j.appender.FA.File=REX.log
+log4j.appender.FA.layout=org.apache.log4j.PatternLayout
+log4j.appender.FA.layout.ConversionPattern=%d{ABSOLUTE} %p [%c] %L - %m%n
+
+
+log4j.category.org.dllearner.algorithms=DEBUG


Property changes on: trunk/components-core/src/main/resources/log4j.properties
___________________________________________________________________
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[4113] trunk/components-core

From: <lor...@us...> - 2013-10-03 08:51:00

Revision: 4113
          http://sourceforge.net/p/dl-learner/code/4113
Author:   lorenz_b
Date:     2013-10-03 08:50:57 +0000 (Thu, 03 Oct 2013)
Log Message:
-----------
Minor bug fix in SPARQL reasoner.

Modified Paths:
--------------
    trunk/components-core/pom.xml
    trunk/components-core/src/main/java/org/dllearner/algorithms/DisjointClassesLearner.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotator.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java
    trunk/components-core/src/main/java/org/dllearner/core/owl/Description.java
    trunk/components-core/src/main/java/org/dllearner/parser/KBParser.java
    trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java
    trunk/components-core/src/main/java/org/dllearner/utilities/GreedyCohaerencyExtractor.java
    trunk/components-core/src/main/java/org/dllearner/utilities/owl/OWLClassExpressionToSPARQLConverter.java
    trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java

Modified: trunk/components-core/pom.xml
===================================================================
--- trunk/components-core/pom.xml	2013-09-23 12:22:30 UTC (rev 4112)
+++ trunk/components-core/pom.xml	2013-10-03 08:50:57 UTC (rev 4113)
@@ -308,6 +308,23 @@
 			<artifactId>commons-math3</artifactId>
 			<version>3.1.1</version>
 		</dependency>
+		<dependency>
+				<groupId>org.aksw.commons</groupId>
+				<artifactId>collections</artifactId>
+			</dependency>
+			<dependency>
+				<groupId>org.aksw.commons</groupId>
+				<artifactId>util</artifactId>
+			</dependency>
+			<dependency>
+				<groupId>org.apache.commons</groupId>
+				<artifactId>commons-compress</artifactId>
+				<version>1.4.1</version>
+			</dependency>
+			<dependency>
+				<groupId>com.h2database</groupId>
+				<artifactId>h2</artifactId>
+			</dependency>
 	</dependencies>
 	<dependencyManagement>
 		<dependencies>

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/DisjointClassesLearner.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/DisjointClassesLearner.java	2013-09-23 12:22:30 UTC (rev 4112)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/DisjointClassesLearner.java	2013-10-03 08:50:57 UTC (rev 4113)
@@ -42,11 +42,15 @@
 import org.dllearner.core.owl.ClassHierarchy;
 import org.dllearner.core.owl.Description;
 import org.dllearner.core.owl.DisjointClassesAxiom;
+import org.dllearner.core.owl.Intersection;
 import org.dllearner.core.owl.NamedClass;
 import org.dllearner.kb.LocalModelBasedSparqlEndpointKS;
 import org.dllearner.kb.SparqlEndpointKS;
 import org.dllearner.kb.sparql.SparqlEndpoint;
 import org.dllearner.learningproblems.AxiomScore;
+import org.dllearner.reasoning.SPARQLReasoner;
+import org.dllearner.utilities.owl.OWLClassExpressionToSPARQLConverter;
+import org.semanticweb.owlapi.model.OWLClassExpression;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -128,6 +132,8 @@
 			return;
 		}
 		
+		computeAllDisjointClassAxiomsOptimized();
+		
 		//at first get all existing classes in knowledge base
 		allClasses = getAllClasses();
 		allClasses.remove(classToDescribe);
@@ -177,7 +183,7 @@
 			int cnt = qs.getLiteral("cnt").getInt();
 			class2Overlap.put(cls, cnt);
 		}
-		//for each property in knowledge base
+		//for each class in knowledge base
 		for(NamedClass cls : allClasses){if(!cls.toString().equals("http://dbpedia.org/ontology/MotorcycleRider"))continue;
 			//get the popularity
 			int otherPopularity = reasoner.getPopularity(cls);
@@ -353,7 +359,7 @@
 		
 		
 		EvaluatedDescription evalDesc;
-		//firstly, create disjoint classexpressions which not occur and give score of 1
+		//firstly, create disjoint classexpressions which do not occur and give score of 1
 		for(NamedClass cls : completeDisjointclasses){
 			if(useClassPopularity){
 				int overlap = 0;
@@ -413,6 +419,7 @@
 			} else {
 				evalDesc = new EvaluatedDescription(cls, new AxiomScore(1));
 			}
+			evalDescs.add(evalDesc);
 		}
 		
 		class2Count.put(classToDescribe, total);
@@ -450,16 +457,142 @@
 		}
 	}
 	
+	private void computeAllDisjointClassAxiomsOptimized(){
+		//get number of instances of A
+		int instanceCountA = reasoner.getPopularity(classToDescribe);
+		
+		//firstly, we compute the disjointness to all sibling classes
+		Set<EvaluatedDescription> disjointessOfSiblings = computeDisjointessOfSiblings(classToDescribe);
+		System.out.println(disjointessOfSiblings);
+		
+		//we go the hierarchy up
+		SortedSet<Description> superClasses = reasoner.getSuperClasses(classToDescribe);
+		for (Description sup : superClasses) {
+			Set<EvaluatedDescription> disjointessOfSuperClass = computeDisjointessOfSiblings(sup.asNamedClass());
+			System.out.println(disjointessOfSuperClass);
+		}
+	}
+	
+	private Set<EvaluatedDescription> computeDisjointessOfSiblings(NamedClass cls){
+		Set<EvaluatedDescription> evaluatedDescriptions = new HashSet<EvaluatedDescription>();
+		
+		//get number of instances of A
+		int instanceCountA = reasoner.getPopularity(cls);
+		
+		if(instanceCountA > 0){
+			//we compute the disjointness to all sibling classes
+			Set<NamedClass> siblingClasses = reasoner.getSiblingClasses(cls);
+			
+			for (NamedClass sib : siblingClasses) {
+				//get number of instances of B
+				int instanceCountB = reasoner.getPopularity(sib);
+				
+				if(instanceCountB > 0){
+					//get number of instances of (A and B)
+					int instanceCountAB = reasoner.getPopularity(new Intersection(cls, sib));
+
+					//we compute the estimated precision
+					double precision = accuracy(instanceCountB, instanceCountAB);
+					//we compute the estimated recall
+					double recall = accuracy(instanceCountA, instanceCountAB);
+					//compute the overall score
+					double score = 1 - fMEasure(precision, recall);
+					
+					EvaluatedDescription evalDesc = new EvaluatedDescription(sib, new AxiomScore(score));
+					evaluatedDescriptions.add(evalDesc);
+				}
+			}
+		}
+		
+		return evaluatedDescriptions;
+	}
+	
+	private EvaluatedAxiom computeDisjointess(NamedClass clsA, NamedClass clsB){
+		logger.debug("Computing disjointness between " + clsA + " and " + clsB + "...");
+		
+		double scoreValue = 0;
+		
+		//get number of instances of A
+		int instanceCountA = reasoner.getPopularity(clsA);
+		
+		//get number of instances of B
+		int instanceCountB = reasoner.getPopularity(clsB);
+		
+		if(instanceCountA > 0 && instanceCountB > 0){
+			//get number of instances of (A and B)
+			int instanceCountAB = reasoner.getPopularity(new Intersection(clsA, clsB));
+			
+			//we compute the estimated precision
+			double precision = accuracy(instanceCountB, instanceCountAB);
+			
+			//we compute the estimated recall
+			double recall = accuracy(instanceCountA, instanceCountAB);
+			
+			//compute the overall score
+			scoreValue = 1 - fMEasure(precision, recall);
+			
+		}
+		
+		AxiomScore score = new AxiomScore(scoreValue);
+		
+		return new EvaluatedAxiom(new DisjointClassesAxiom(clsA, clsB), score);
+	}
+	
+	public Set<EvaluatedAxiom> computeSchemaDisjointness(){
+		Set<EvaluatedAxiom> axioms = new HashSet<EvaluatedAxiom>();
+		
+		Set<NamedClass> classes = reasoner.getOWLClasses("http://dbpedia.org/ontology/");
+		computeDisjointness(classes);
+		
+		//start from the top level classes, i.e. the classes whose direct super class is owl:Thing
+		SortedSet<Description> topLevelClasses = reasoner.getMostGeneralClasses();
+		axioms.addAll(computeDisjointness(asNamedClasses(topLevelClasses)));
+		
+		for (Description cls : topLevelClasses) {
+			
+		}
+		
+		return axioms;
+	}
+	
+	public Set<EvaluatedAxiom> computeDisjointness(Set<NamedClass> classes){
+		Set<EvaluatedAxiom> axioms = new HashSet<EvaluatedAxiom>();
+		
+		for (NamedClass clsA : classes) {
+			for (NamedClass clsB : classes) {
+				if(!clsA.equals(clsB)){
+					axioms.add(computeDisjointess(clsA, clsB));
+				}
+			}
+		}
+		
+		return axioms;
+	}
+	
+	public static Set<NamedClass> asNamedClasses(Set<Description> descriptions){
+		Set<NamedClass> classes = new TreeSet<NamedClass>();
+		for (Description description : descriptions) {
+			if(description.isNamedClass()){
+				classes.add(description.asNamedClass());
+			}
+		}
+		return classes;
+	}
+	
 	public static void main(String[] args) throws Exception{
-		SparqlEndpointKS ks = new SparqlEndpointKS(new SparqlEndpoint(new URL("http://dbpedia.aksw.org:8902/sparql"), Collections.singletonList("http://dbpedia.org"), Collections.<String>emptyList()));
+		SparqlEndpointKS ks = new SparqlEndpointKS(SparqlEndpoint.getEndpointDBpedia());
 		ks = new LocalModelBasedSparqlEndpointKS(new URL("http://dl-learner.svn.sourceforge.net/viewvc/dl-learner/trunk/examples/swore/swore.rdf?revision=2217"));
-		ks = new SparqlEndpointKS(SparqlEndpoint.getEndpointDBpediaLiveAKSW());
+		ks = new SparqlEndpointKS(SparqlEndpoint.getEndpointDBpedia());
 		DisjointClassesLearner l = new DisjointClassesLearner(ks);
-		l.setClassToDescribe(new NamedClass("http://dbpedia.org/ontology/Agent"));
+		SPARQLReasoner sparqlReasoner = new SPARQLReasoner(ks, "cache");
+		sparqlReasoner.prepareSubsumptionHierarchy();
+		sparqlReasoner.precomputeClassPopularity();
+		l.setReasoner(sparqlReasoner);
+		l.setClassToDescribe(new NamedClass("http://dbpedia.org/ontology/Actor"));
 		l.setMaxExecutionTimeInSeconds(60);
 		l.init();
-		l.getReasoner().prepareSubsumptionHierarchy();
-		l.getReasoner().precomputeClassPopularity();
+		l.computeSchemaDisjointness();
+		
 //		System.out.println(l.getReasoner().getClassHierarchy().getSubClasses(new NamedClass("http://dbpedia.org/ontology/Athlete"), false));System.exit(0);
 		l.start();
 		

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotator.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotator.java	2013-09-23 12:22:30 UTC (rev 4112)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotator.java	2013-10-03 08:50:57 UTC (rev 4113)
@@ -5,7 +5,7 @@
 import java.util.Set;
 
 import org.dllearner.algorithms.isle.EntityCandidateGenerator;
-import org.dllearner.algorithms.isle.WordSenseDisambiguation;
+import org.dllearner.algorithms.isle.wsd.WordSenseDisambiguation;
 import org.dllearner.core.owl.Entity;
 
 /**

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java	2013-09-23 12:22:30 UTC (rev 4112)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java	2013-10-03 08:50:57 UTC (rev 4113)
@@ -7,12 +7,12 @@
 
 import org.apache.log4j.Logger;
 import org.dllearner.algorithms.isle.EntityCandidateGenerator;
-import org.dllearner.algorithms.isle.WordSenseDisambiguation;
 import org.dllearner.algorithms.isle.index.AnnotatedDocument;
 import org.dllearner.algorithms.isle.index.LinguisticAnnotator;
 import org.dllearner.algorithms.isle.index.SemanticAnnotator;
 import org.dllearner.algorithms.isle.index.TextDocument;
 import org.dllearner.algorithms.isle.index.syntactic.SyntacticIndex;
+import org.dllearner.algorithms.isle.wsd.WordSenseDisambiguation;
 import org.dllearner.core.owl.Entity;
 import org.semanticweb.owlapi.model.OWLAnnotation;
 import org.semanticweb.owlapi.model.OWLAnnotationProperty;

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java	2013-09-23 12:22:30 UTC (rev 4112)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java	2013-10-03 08:50:57 UTC (rev 4113)
@@ -3,7 +3,6 @@
  */
 package org.dllearner.algorithms.isle.index.semantic.simple;
 
-import org.dllearner.algorithms.isle.SimpleWordSenseDisambiguation;
 import org.dllearner.algorithms.isle.index.SemanticAnnotator;
 import org.dllearner.algorithms.isle.index.SimpleEntityCandidatesTrie;
 import org.dllearner.algorithms.isle.index.TrieEntityCandidateGenerator;
@@ -11,6 +10,7 @@
 import org.dllearner.algorithms.isle.index.semantic.SemanticIndex;
 import org.dllearner.algorithms.isle.index.syntactic.SyntacticIndex;
 import org.dllearner.algorithms.isle.textretrieval.RDFSLabelEntityTextRetriever;
+import org.dllearner.algorithms.isle.wsd.SimpleWordSenseDisambiguation;
 import org.semanticweb.owlapi.model.OWLOntology;
 
 /**

Modified: trunk/components-core/src/main/java/org/dllearner/core/owl/Description.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/core/owl/Description.java	2013-09-23 12:22:30 UTC (rev 4112)
+++ trunk/components-core/src/main/java/org/dllearner/core/owl/Description.java	2013-10-03 08:50:57 UTC (rev 4113)
@@ -212,6 +212,14 @@
 		return toKBSyntaxString(null, null);
 	}
 	
+	public boolean isNamedClass(){
+		return this instanceof NamedClass;
+	}
+	
+	public NamedClass asNamedClass(){
+		return (NamedClass)this;
+	}
+	
 	/**
 	 * Returns all named entities. 
 	 * @return

Modified: trunk/components-core/src/main/java/org/dllearner/parser/KBParser.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/parser/KBParser.java	2013-09-23 12:22:30 UTC (rev 4112)
+++ trunk/components-core/src/main/java/org/dllearner/parser/KBParser.java	2013-10-03 08:50:57 UTC (rev 4113)
@@ -36,7 +36,7 @@
                         return name;
                 else
                         return internalNamespace + name;
-        }
+        }
 
         public static Description parseConcept(String string) throws ParseException {
                 // when just parsing the string as concept, we have no guarantee

Modified: trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java	2013-09-23 12:22:30 UTC (rev 4112)
+++ trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java	2013-10-03 08:50:57 UTC (rev 4113)
@@ -42,6 +42,7 @@
 import org.aksw.jena_sparql_api.core.QueryExecutionFactory;
 import org.aksw.jena_sparql_api.http.QueryExecutionFactoryHttp;
 import org.aksw.jena_sparql_api.model.QueryExecutionFactoryModel;
+import org.aksw.jena_sparql_api.pagination.core.QueryExecutionFactoryPaginated;
 import org.dllearner.core.ComponentAnn;
 import org.dllearner.core.IndividualReasoner;
 import org.dllearner.core.SchemaReasoner;
@@ -69,6 +70,7 @@
 import org.dllearner.kb.sparql.SparqlEndpoint;
 import org.dllearner.utilities.datastructures.SortedSetTuple;
 import org.dllearner.utilities.owl.ConceptComparator;
+import org.dllearner.utilities.owl.OWLClassExpressionToSPARQLConverter;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -110,6 +112,7 @@
 	private boolean prepared = false;
 	
 	private ConceptComparator conceptComparator = new ConceptComparator();
+	private OWLClassExpressionToSPARQLConverter converter = new OWLClassExpressionToSPARQLConverter();
 
 
 	public SPARQLReasoner(SparqlEndpointKS ks) {
@@ -145,6 +148,10 @@
 	}
 	
 	public SPARQLReasoner(SparqlEndpointKS ks, CacheCoreEx cacheBackend) {
+		this(ks, new CacheExImpl(cacheBackend));
+	}
+	
+	public SPARQLReasoner(SparqlEndpointKS ks, CacheEx cache) {
 		this.ks = ks;
 
 		classPopularityMap = new HashMap<NamedClass, Integer>();
@@ -153,8 +160,7 @@
 		if(ks.isRemote()){
 			SparqlEndpoint endpoint = ks.getEndpoint();
 			qef = new QueryExecutionFactoryHttp(endpoint.getURL().toString(), endpoint.getDefaultGraphURIs());
-			CacheEx cacheFrontend = new CacheExImpl(cacheBackend);
-			qef = new QueryExecutionFactoryCacheEx(qef, cacheFrontend);
+			qef = new QueryExecutionFactoryCacheEx(qef, cache);
 //			qef = new QueryExecutionFactoryPaginated(qef, 10000);
 		} else {
 			qef = new QueryExecutionFactoryModel(((LocalModelBasedSparqlEndpointKS)ks).getModel());
@@ -287,6 +293,17 @@
 		}
 
 	}
+	
+	public int getPopularity(Description description){
+		if(classPopularityMap != null && classPopularityMap.containsKey(description)){
+			return classPopularityMap.get(description);
+		} else {
+			String query = converter.asCountQuery(description).toString();
+			ResultSet rs = executeSelectQuery(query);
+			int cnt = rs.next().getLiteral("cnt").getInt();
+			return cnt;
+		}
+	}
 
 	public int getPopularity(ObjectProperty op){
 		if(objectPropertyPopularityMap != null && objectPropertyPopularityMap.containsKey(op)){
@@ -503,22 +520,25 @@
 	 */
 	public Model loadOWLSchema(){
 		Model schema = ModelFactory.createDefaultModel();
+		String prefixes = 
+				"PREFIX owl:<http://www.w3.org/2002/07/owl#> "
+				+ "PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#> ";
 		//axioms according to owl:Class entities
-		String query = 
+		String query = prefixes +
 				"CONSTRUCT {" +
 				"?s a owl:Class." +
 				"?s rdfs:subClassOf ?sup." +
 				"?s owl:equivalentClass ?equiv." +
-				"?s owl:djsointWith ?disj." +
+				"?s owl:disjointWith ?disj." +
 				"} WHERE {" +
-				"?s a owl:Class." +
-				"OPTIONAL{?s rdfs:subClassOf ?sup.}" +
-				"OPTIONAL{?s owl:equivalentClass ?equiv.}" +
-				"OPTIONAL{?s owl:djsointWith ?disj.}" +
+				"?s a owl:Class. " +
+				"OPTIONAL{?s rdfs:subClassOf ?sup.} " +
+				"OPTIONAL{?s owl:equivalentClass ?equiv.} " +
+				"OPTIONAL{?s owl:disjointWith ?disj.}" +
 				"}";
 		schema.add(loadIncrementally(query));
 		//axioms according to owl:ObjectProperty entities
-		query = 
+		query = prefixes +
 				"CONSTRUCT {" +
 				"?s a owl:ObjectProperty." +
 				"?s a ?type." +
@@ -526,14 +546,14 @@
 				"?s rdfs:range ?range." +
 				"} WHERE {" +
 				"?s a owl:ObjectProperty." +
-				"?s a ?type." +
-				"OPTIONAL{?s rdfs:domain ?domain.}" +
+				"?s a ?type. " +
+				"OPTIONAL{?s rdfs:domain ?domain.} " +
 				"OPTIONAL{?s rdfs:range ?range.}" +
 				"}";
 		schema.add(loadIncrementally(query));
 
 		//axioms according to owl:ObjectProperty entities
-		query = 
+		query = prefixes +
 				"CONSTRUCT {" +
 				"?s a owl:DatatypeProperty." +
 				"?s a ?type." +
@@ -541,8 +561,8 @@
 				"?s rdfs:range ?range." +
 				"} WHERE {" +
 				"?s a owl:DatatypeProperty." +
-				"?s a ?type." +
-				"OPTIONAL{?s rdfs:domain ?domain.}" +
+				"?s a ?type. " +
+				"OPTIONAL{?s rdfs:domain ?domain.} " +
 				"OPTIONAL{?s rdfs:range ?range.}" +
 				"}";		
 		schema.add(loadIncrementally(query));
@@ -550,11 +570,13 @@
 		return schema;
 	}
 
-	private Model loadIncrementally(String query){
-		System.out.println(query);
+	private Model loadIncrementally(String query){System.err.println(query);
+		QueryExecutionFactory old = qef;
+		qef = new QueryExecutionFactoryPaginated(qef, 10000);
 		QueryExecution qe = qef.createQueryExecution(query);
 		Model model = qe.execConstruct();
 		qe.close();
+		qef = old;
 		return model;
 	}
 
@@ -693,9 +715,13 @@
 		return types;
 	}
 	
-	public Set<NamedClass> getOWLClasses(String namespace) {
-		Set<NamedClass> types = new HashSet<NamedClass>();
-		String query = String.format("SELECT DISTINCT ?class WHERE {?class a <%s>. FILTER(REGEX(?class,'%s'))}",OWL.Class.getURI(), namespace);
+	public SortedSet<NamedClass> getOWLClasses(String namespace) {
+		SortedSet<NamedClass> types = new TreeSet<NamedClass>();
+		String query = "SELECT DISTINCT ?class WHERE {?class a <" + OWL.Class.getURI() + ">.";
+		if(namespace != null){
+			query += "FILTER(REGEX(STR(?class),'" + namespace + "'))";
+		}
+		query += "}";
 		ResultSet rs = executeSelectQuery(query);
 		QuerySolution qs;
 		while(rs.hasNext()){
@@ -716,7 +742,7 @@
 		Set<NamedClass> siblings = new TreeSet<NamedClass>();
 		String query = "SELECT ?sub WHERE { <" + cls.getName() + "> <http://www.w3.org/2000/01/rdf-schema#subClassOf> ?super .";
 		query += "?sub <http://www.w3.org/2000/01/rdf-schema#subClassOf> ?super .";
-		query += "FILTER( !SAMETERM(?sub, <" + cls.getName() + ">)) . }";System.out.println(query);
+		query += "FILTER( !SAMETERM(?sub, <" + cls.getName() + ">)) . }";
 		ResultSet rs = executeSelectQuery(query);
 		QuerySolution qs;
 		while(rs.hasNext()){
@@ -1360,6 +1386,10 @@
 	public ClassHierarchy getClassHierarchy() {
 		return hierarchy;
 	}
+	
+	public SortedSet<Description> getMostGeneralClasses() {
+		return hierarchy.getMostGeneralClasses();
+	}
 
 	@Override
 	public SortedSet<Description> getSuperClasses(Description description) {

Modified: trunk/components-core/src/main/java/org/dllearner/utilities/GreedyCohaerencyExtractor.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/utilities/GreedyCohaerencyExtractor.java	2013-09-23 12:22:30 UTC (rev 4112)
+++ trunk/components-core/src/main/java/org/dllearner/utilities/GreedyCohaerencyExtractor.java	2013-10-03 08:50:57 UTC (rev 4113)
@@ -12,10 +12,10 @@
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
 import java.util.SortedSet;
 import java.util.TreeSet;
-import java.util.Map.Entry;
-import java.util.Set;
 
 import org.apache.commons.collections15.BidiMap;
 import org.apache.commons.collections15.bidimap.DualHashBidiMap;

Modified: trunk/components-core/src/main/java/org/dllearner/utilities/owl/OWLClassExpressionToSPARQLConverter.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/utilities/owl/OWLClassExpressionToSPARQLConverter.java	2013-09-23 12:22:30 UTC (rev 4112)
+++ trunk/components-core/src/main/java/org/dllearner/utilities/owl/OWLClassExpressionToSPARQLConverter.java	2013-10-03 08:50:57 UTC (rev 4113)
@@ -11,7 +11,7 @@
 import java.util.Stack;
 import java.util.TreeSet;
 
-import org.aksw.commons.collections.diff.ModelDiff;
+import org.dllearner.core.owl.Description;
 import org.semanticweb.owlapi.apibinding.OWLManager;
 import org.semanticweb.owlapi.io.ToStringRenderer;
 import org.semanticweb.owlapi.model.OWLClass;
@@ -63,7 +63,6 @@
 
 import com.google.common.collect.HashMultimap;
 import com.google.common.collect.Multimap;
-import com.google.common.collect.Multimaps;
 import com.google.common.collect.Sets;
 import com.hp.hpl.jena.query.Query;
 import com.hp.hpl.jena.query.QueryFactory;
@@ -96,6 +95,10 @@
 	public VariablesMapping getVariablesMapping() {
 		return mapping;
 	}
+	
+	public String convert(String rootVariable, Description description){
+		return convert(rootVariable, OWLAPIConverter.getOWLAPIDescription(description));
+	}
 
 	public String convert(String rootVariable, OWLClassExpression expr){
 		this.expr = expr;
@@ -129,6 +132,23 @@
 		return QueryFactory.create(queryString, Syntax.syntaxARQ);
 	}
 	
+	public Query asCountQuery(OWLClassExpression expr){
+		String rootVariable = "?s";
+		String queryString = "SELECT (COUNT(DISTINCT " + rootVariable + ") AS ?cnt) WHERE {";
+		String triplePattern = convert(rootVariable, expr);
+		queryString += triplePattern;
+		queryString += "}";
+		return QueryFactory.create(queryString, Syntax.syntaxARQ);
+	}
+	
+	public Query asCountQuery(Description description){
+		return asCountQuery(OWLAPIConverter.getOWLAPIDescription(description));
+	}
+	
+	public Query asQuery(String rootVariable, Description desc, boolean countQuery){
+		return asQuery(rootVariable, OWLAPIConverter.getOWLAPIDescription(desc), countQuery);
+	}
+	
 	public Query asQuery(String rootVariable, OWLClassExpression expr, Set<? extends OWLEntity> variableEntities){
 		return asQuery(rootVariable, expr, variableEntities, false);
 	}

Modified: trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java
===================================================================
--- trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java	2013-09-23 12:22:30 UTC (rev 4112)
+++ trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java	2013-10-03 08:50:57 UTC (rev 4113)
@@ -6,6 +6,7 @@
 import com.google.common.base.Charsets;
 import com.google.common.base.Joiner;
 import com.google.common.io.Files;
+
 import org.dllearner.algorithms.celoe.CELOE;
 import org.dllearner.algorithms.isle.index.*;
 import org.dllearner.algorithms.isle.index.semantic.SemanticIndex;
@@ -17,6 +18,8 @@
 import org.dllearner.algorithms.isle.metrics.RelevanceUtils;
 import org.dllearner.algorithms.isle.textretrieval.EntityTextRetriever;
 import org.dllearner.algorithms.isle.textretrieval.RDFSLabelEntityTextRetriever;
+import org.dllearner.algorithms.isle.wsd.SimpleWordSenseDisambiguation;
+import org.dllearner.algorithms.isle.wsd.WordSenseDisambiguation;
 import org.dllearner.core.AbstractReasonerComponent;
 import org.dllearner.core.KnowledgeSource;
 import org.dllearner.core.owl.Entity;
@@ -28,9 +31,12 @@
 import org.junit.Before;
 import org.junit.Test;
 import org.semanticweb.owlapi.apibinding.OWLManager;
+import org.semanticweb.owlapi.model.IRI;
 import org.semanticweb.owlapi.model.OWLDataFactory;
+import org.semanticweb.owlapi.model.OWLEntity;
 import org.semanticweb.owlapi.model.OWLOntology;
 import org.semanticweb.owlapi.model.OWLOntologyManager;
+
 import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl;
 
 import java.io.File;
@@ -245,4 +251,14 @@
 		
 	}	
 	
+	@Test
+	public void testWordSenseDisambiguation() throws Exception {
+		Set<OWLEntity> context = StructuralEntityContext.getContext(ontology, df.getOWLClass(IRI.create(cls.getName())));
+		System.out.println(context);
+		
+		Set<String> contextNL = StructuralEntityContext.getContextInNaturalLanguage(ontology, df.getOWLClass(IRI.create(cls.getName())));
+		System.out.println(contextNL);
+	}
+	
+	
 }

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[4112] trunk/components-core/src/main/java/org/ dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java

From: <dfl...@us...> - 2013-09-23 12:22:33

Revision: 4112
          http://sourceforge.net/p/dl-learner/code/4112
Author:   dfleischhacker
Date:     2013-09-23 12:22:30 +0000 (Mon, 23 Sep 2013)
Log Message:
-----------
Use WordNet expansion and lemmatizing also for subsequences

Modified Paths:
--------------
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java	2013-09-20 09:58:45 UTC (rev 4111)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java	2013-09-23 12:22:30 UTC (rev 4112)
@@ -52,13 +52,11 @@
                 }
                 
                 addEntry(text, entity);
-                addSubsequences(entity, text);
+                addSubsequencesWordNet(entity, text);
                 
                 for (String alternativeText : nameGenerator.getAlternativeText(text)) {
                     addEntry(alternativeText, entity);
                 }
-                
-                
             }
         }
 	}
@@ -86,8 +84,57 @@
         	}
         }
 	}
-	
-	@Override
+
+    private void addSubsequencesWordNet(Entity entity, String text) {
+        if (text.contains(" ")) {
+            String[] tokens = text.split(" ");
+
+            List<String>[] wordnetTokens = (ArrayList<String>[]) new ArrayList[tokens.length];
+
+            // generate list of lemmatized wordnet synonyms for each token
+            for (int i = 0; i < tokens.length; i++) {
+                wordnetTokens[i] = new ArrayList<String>();
+                wordnetTokens[i].add(LinguisticUtil.getInstance().getNormalizedForm(tokens[i].toLowerCase()));
+                for (String w : LinguisticUtil.getInstance().getTopSynonymsForWord(tokens[i], 5)) {
+                    wordnetTokens[i].add(LinguisticUtil.getInstance().getNormalizedForm(w).toLowerCase());
+                }
+            }
+
+            // generate subsequences starting at the given start index of the given size
+            Set<String> allPossibleSubsequences = getAllPossibleSubsequences(wordnetTokens);
+
+            for (String s : allPossibleSubsequences) {
+                addEntry(s, entity);
+            }
+        }
+    }
+
+    private static Set<String> getAllPossibleSubsequences(List<String>[] wordnetTokens) {
+        ArrayList<String> res = new ArrayList<String>();
+
+        for (int size = 1; size < wordnetTokens.length + 1; size++) {
+            for (int start = 0; start < wordnetTokens.length - size + 1; start++) {
+                getPossibleSubsequencesRec(res, new ArrayList<String>(), wordnetTokens, 0, size);
+            }
+        }
+
+        return new HashSet<String>(res);
+    }
+
+    private static void getPossibleSubsequencesRec(List<String> allSubsequences, List<String> currentSubsequence, List<String>[] wordnetTokens,
+                                            int curStart, int maxLength) {
+        if (currentSubsequence.size() == maxLength) {
+            allSubsequences.add(StringUtils.join(currentSubsequence, " "));
+            return;
+        }
+        for (String w : wordnetTokens[curStart]) {
+            ArrayList<String> tmpSequence = new ArrayList<String>(currentSubsequence);
+            tmpSequence.add(w);
+            getPossibleSubsequencesRec(allSubsequences, tmpSequence, wordnetTokens, curStart + 1, maxLength);
+        }
+    }
+
+    @Override
 	public void addEntry(String s, Entity e) {
 		Set<Entity> candidates;
 		if (trie.contains(s)) 
@@ -124,8 +171,31 @@
 		}
 		return output;
 	}
-	
-	public void printTrie() {
+
+    public static void main(String[] args) {
+        String[] tokens = "this is a long and very complex text".split(" ");
+
+        List<String>[] wordnetTokens = (ArrayList<String>[]) new ArrayList[tokens.length];
+
+        // generate list of lemmatized wordnet synonyms for each token
+        for (int i = 0; i < tokens.length; i++) {
+            wordnetTokens[i] = new ArrayList<String>();
+            wordnetTokens[i].add(LinguisticUtil.getInstance().getNormalizedForm(tokens[i]));
+            for (String w : LinguisticUtil.getInstance().getTopSynonymsForWord(tokens[i], 5)) {
+                System.out.println("Adding: " + LinguisticUtil.getInstance().getNormalizedForm(w));
+                wordnetTokens[i].add(LinguisticUtil.getInstance().getNormalizedForm(w).replaceAll("_", " "));
+            }
+        }
+
+        // generate subsequences starting at the given start index of the given size
+        Set<String> allPossibleSubsequences = getAllPossibleSubsequences(wordnetTokens);
+
+        for (String s : allPossibleSubsequences) {
+            System.out.println(s);
+        }
+    }
+
+    public void printTrie() {
 		System.out.println(this.toString());
 		
 	}
@@ -186,9 +256,8 @@
             ArrayList<String> res = new ArrayList<String>();
             res.add(LinguisticUtil.getInstance().getNormalizedForm(word));
 
-            for (String w : LinguisticUtil.getInstance().getTopSynonymsForWord(
-                    LinguisticUtil.getInstance().getNormalizedForm(word), maxNumberOfSenses)) {
-                res.add(w.replaceAll("_", " "));
+            for (String w : LinguisticUtil.getInstance().getTopSynonymsForWord(word, maxNumberOfSenses)) {
+                res.add(LinguisticUtil.getInstance().getNormalizedForm(w.replaceAll("_", " ")));
             }
 
             return res;

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[4111] trunk/components-core/src/main/java/org/ dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java

From: <and...@us...> - 2013-09-20 09:58:49

Revision: 4111
          http://sourceforge.net/p/dl-learner/code/4111
Author:   andremelo
Date:     2013-09-20 09:58:45 +0000 (Fri, 20 Sep 2013)
Log Message:
-----------
Changing buildTrie to add all subsequences of an entity's text, instead of only composing tokens

Modified Paths:
--------------
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java	2013-09-16 00:14:34 UTC (rev 4110)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java	2013-09-20 09:58:45 UTC (rev 4111)
@@ -50,24 +50,43 @@
                 if (text.trim().isEmpty()) {
                     continue;
                 }
+                
                 addEntry(text, entity);
+                addSubsequences(entity, text);
+                
                 for (String alternativeText : nameGenerator.getAlternativeText(text)) {
                     addEntry(alternativeText, entity);
                 }
-                // Adds also composing words, e.g. for "has child", "has" and "child" are also added
-                if (text.contains(" ")) {
-                    for (String subtext : text.split(" ")) {
-                        addEntry(subtext, entity);
-                        for (String alternativeText : nameGenerator.getAlternativeText(subtext)) {
-                            addEntry(alternativeText, entity);
-                        }
-                        //System.out.println("trie.add("+subtext+","++")");
-                    }
-                }
+                
+                
             }
         }
 	}
 	
+	/**
+	 * Adds the subsequences of a test
+	 * @param entity
+	 * @param text
+	 */
+	private void addSubsequences(Entity entity, String text) {
+        if (text.contains(" ")) {
+        	String[] tokens = text.split(" ");
+        	for (int size=1; size<tokens.length; size++) {
+        		
+        		for (int start=0; start<tokens.length-size+1; start++) {
+        			String subsequence = "";
+        			for (int i=0; i<size; i++) {
+        				subsequence += tokens[start+i] + " ";
+        			}
+        			subsequence = subsequence.trim();
+        			
+            		addEntry(subsequence, entity);
+        		}
+        		
+        	}
+        }
+	}
+	
 	@Override
 	public void addEntry(String s, Entity e) {
 		Set<Entity> candidates;

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

3 messages has been excluded from this view by a project administrator.

Flat | Threaded

<< < 1 .. 5 6 7 8 9 .. 171 > >> (Page 7 of 171)