dl-learner-svn Mailing List for DL-Learner (Page 5)

Status: Beta

Brought to you by: jenslehmann, patrickwestphal

dl-learner-svn — DL-Learner Subversion commits

You can subscribe to this list here.

2007	Jan	Feb	Mar	Apr	May	Jun	Jul	Aug (120)	Sep (36)	Oct (116)	Nov (17)	Dec (44)
2008	Jan (143)	Feb (192)	Mar (74)	Apr (84)	May (105)	Jun (64)	Jul (49)	Aug (120)	Sep (159)	Oct (156)	Nov (51)	Dec (28)
2009	Jan (17)	Feb (55)	Mar (33)	Apr (57)	May (54)	Jun (28)	Jul (6)	Aug (16)	Sep (38)	Oct (30)	Nov (26)	Dec (52)
2010	Jan (7)	Feb (91)	Mar (65)	Apr (2)	May (14)	Jun (25)	Jul (38)	Aug (48)	Sep (80)	Oct (70)	Nov (75)	Dec (77)
2011	Jan (68)	Feb (53)	Mar (51)	Apr (35)	May (65)	Jun (101)	Jul (29)	Aug (230)	Sep (95)	Oct (49)	Nov (110)	Dec (63)
2012	Jan (41)	Feb (42)	Mar (25)	Apr (46)	May (51)	Jun (44)	Jul (45)	Aug (29)	Sep (12)	Oct (9)	Nov (17)	Dec (2)
2013	Jan (12)	Feb (14)	Mar (7)	Apr (16)	May (54)	Jun (27)	Jul (11)	Aug (5)	Sep (85)	Oct (27)	Nov (37)	Dec (32)
2014	Jan (8)	Feb (29)	Mar (5)	Apr (3)	May (22)	Jun (3)	Jul (4)	Aug (3)	Sep	Oct	Nov	Dec

Flat | Threaded

<< < 1 .. 3 4 5 6 7 .. 171 > >> (Page 5 of 171)

[DL-Learner SVN] SF.net SVN: dl-learner:[4185] trunk/components-core/src/main/java/org/ dllearner/algorithms/isle/index/TextDocument.java

From: <dfl...@us...> - 2013-12-02 15:19:09

Revision: 4185
          http://sourceforge.net/p/dl-learner/code/4185
Author:   dfleischhacker
Date:     2013-12-02 15:19:06 +0000 (Mon, 02 Dec 2013)
Log Message:
-----------
Add getTokensStartingAtToken without numerOfToken parameter

Modified Paths:
--------------
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TextDocument.java

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TextDocument.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TextDocument.java	2013-12-02 14:59:36 UTC (rev 4184)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TextDocument.java	2013-12-02 15:19:06 UTC (rev 4185)
@@ -99,6 +99,31 @@
         return tokens;
     }
 
+    /**
+     * Returns a list containing all successive tokens from this document starting at the given start
+     * token. If {@code ignorePunctuation} is set, tokens which represent punctuation are added to the result but not
+     * counted for the number of tokens.
+     *
+     * @param start             token to start collecting tokens from the document
+     * @param ignorePunctuation if true, punctuation are not counted towards the number of tokens to return
+     * @return list containing all relevant tokens, depending in the value of ignorePunctuation, the
+     *          list might contain additional non-relevant (punctuation) tokens
+     */
+    public List<Token> getTokensStartingAtToken(Token start, boolean ignorePunctuation) {
+        ArrayList<Token> tokens = new ArrayList<Token>();
+
+        boolean found = false;
+
+        for (int i = 0; i < this.size(); i++) {
+            Token t = this.get(i);
+            if (t == start) {
+                return this.subList(i, this.size());
+            }
+        }
+
+        return tokens;
+    }
+
     private String getStringForLevel(Token t, SurfaceFormLevel l) {
         switch (l) {
             case RAW:

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[4184] trunk/components-core/src/main/java/org/ dllearner/algorithms/isle/index

From: <dfl...@us...> - 2013-12-02 14:59:39

Revision: 4184
          http://sourceforge.net/p/dl-learner/code/4184
Author:   dfleischhacker
Date:     2013-12-02 14:59:36 +0000 (Mon, 02 Dec 2013)
Log Message:
-----------
Adapt to new Token implementation

Modified Paths:
--------------
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/EntityCandidatesTrie.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/EntityCandidatesTrie.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/EntityCandidatesTrie.java	2013-12-02 14:52:33 UTC (rev 4183)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/EntityCandidatesTrie.java	2013-12-02 14:59:36 UTC (rev 4184)
@@ -2,6 +2,7 @@
 
 import org.dllearner.core.owl.Entity;
 
+import java.util.List;
 import java.util.Set;
 
 public interface EntityCandidatesTrie {
@@ -11,7 +12,7 @@
 	 * @param s
 	 * @param e
 	 */
-	public void addEntry(String s, Entity e);
+	public void addEntry(List<Token> s, Entity e);
 	
 	
 	/**

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java	2013-12-02 14:52:33 UTC (rev 4183)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java	2013-12-02 14:59:36 UTC (rev 4184)
@@ -3,7 +3,6 @@
 import org.apache.commons.lang.StringUtils;
 import org.dllearner.algorithms.isle.textretrieval.EntityTextRetriever;
 import org.dllearner.core.owl.Entity;
-import org.dllearner.utilities.MapUtils;
 import org.dllearner.utilities.datastructures.PrefixTrie;
 import org.semanticweb.owlapi.model.OWLOntology;
 
@@ -11,7 +10,7 @@
 import java.util.Map.Entry;
 
 public class SimpleEntityCandidatesTrie implements EntityCandidatesTrie {
-
+    TokenTree tree;
 	PrefixTrie<FullTokenEntitySetPair> trie;
 	EntityTextRetriever entityTextRetriever;
 
@@ -41,7 +40,7 @@
     }
 	
 	public void buildTrie(OWLOntology ontology, NameGenerator nameGenerator) {
-		this.trie = new PrefixTrie<FullTokenEntitySetPair>();
+		this.tree = new TokenTree();
 		Map<Entity, Set<List<Token>>> entity2TokenSet = entityTextRetriever.getRelevantText(ontology);
 		
 		
@@ -62,51 +61,45 @@
 	/**
 	 * Adds the subsequences of a test
 	 * @param entity
-	 * @param text
+     * @param tokens
 	 */
-	private void addSubsequences(Entity entity, String text) {
-        if (text.contains(" ")) {
-        	String[] tokens = text.split(" ");
-        	for (int size=1; size<tokens.length; size++) {
-        		
-        		for (int start=0; start<tokens.length-size+1; start++) {
-        			String subsequence = "";
-        			for (int i=0; i<size; i++) {
-        				subsequence += tokens[start+i] + " ";
-        			}
-        			subsequence = subsequence.trim();
-        			
-            		addEntry(subsequence, entity);
-        		}
-        		
-        	}
-        }
-	}
-
-    private void addSubsequencesWordNet(Entity entity, String text) {
-        if (text.contains(" ")) {
-            String[] tokens = text.split(" ");
-
-            List<String>[] wordnetTokens = (ArrayList<String>[]) new ArrayList[tokens.length];
-
-            // generate list of lemmatized wordnet synonyms for each token
-            for (int i = 0; i < tokens.length; i++) {
-                wordnetTokens[i] = new ArrayList<String>();
-                wordnetTokens[i].add(LinguisticUtil.getInstance().getNormalizedForm(tokens[i].toLowerCase()));
-                for (String w : LinguisticUtil.getInstance().getTopSynonymsForWord(tokens[i], 5)) {
-                    wordnetTokens[i].add(LinguisticUtil.getInstance().getNormalizedForm(w).toLowerCase());
+    private void addSubsequences(Entity entity, List<Token> tokens) {
+        tree.add(tokens, entity);
+        for (int size = 1; size < tokens.size(); size++) {
+            for (int start = 0; start < tokens.size() - size + 1; start++) {
+                ArrayList<Token> subsequence = new ArrayList<>();
+                for (int i = 0; i < size; i++) {
+                    subsequence.add(tokens.get(start + i));
                 }
+                addEntry(subsequence, entity);
             }
-
-            // generate subsequences starting at the given start index of the given size
-            Set<String[]> allPossibleSubsequences = getAllPossibleSubsequences(tokens, wordnetTokens);
-
-            for (String[] s : allPossibleSubsequences) {
-                addEntry(s[0], entity, s[1]);
-            }
         }
     }
 
+//    private void addSubsequencesWordNet(Entity entity, String text) {
+//        if (text.contains(" ")) {
+//            String[] tokens = text.split(" ");
+//
+//            List<String>[] wordnetTokens = (ArrayList<String>[]) new ArrayList[tokens.length];
+//
+//            // generate list of lemmatized wordnet synonyms for each token
+//            for (int i = 0; i < tokens.length; i++) {
+//                wordnetTokens[i] = new ArrayList<String>();
+//                wordnetTokens[i].add(LinguisticUtil.getInstance().getNormalizedForm(tokens[i].toLowerCase()));
+//                for (String w : LinguisticUtil.getInstance().getTopSynonymsForWord(tokens[i], 5)) {
+//                    wordnetTokens[i].add(LinguisticUtil.getInstance().getNormalizedForm(w).toLowerCase());
+//                }
+//            }
+//
+//            // generate subsequences starting at the given start index of the given size
+//            Set<String[]> allPossibleSubsequences = getAllPossibleSubsequences(tokens, wordnetTokens);
+//
+//            for (String[] s : allPossibleSubsequences) {
+//                addEntry(s[0], entity, s[1]);
+//            }
+//        }
+//    }
+
     private static Set<String[]> getAllPossibleSubsequences(String[] originalTokens, List<String>[] wordnetTokens) {
         ArrayList<String[]> res = new ArrayList<String[]>();
 
@@ -143,30 +136,12 @@
     }
 
     @Override
-	public void addEntry(String s, Entity e) {
-    	s = s.trim();
-		FullTokenEntitySetPair candidates;
-		if (trie.contains(s)) 
-			candidates = trie.get(s);
-		else
-			candidates = new FullTokenEntitySetPair(s);
-		
-		candidates.addEntity(e);
-		
-		trie.put(s, candidates);
+	public void addEntry(List<Token> s, Entity e) {
+        tree.add(s, e);
 	}
 
-    public void addEntry(String s, Entity e, String originalString) {
-    	s = s.trim();
-        FullTokenEntitySetPair candidates;
-        if (trie.contains(s))
-            candidates = trie.get(s);
-        else
-            candidates = new FullTokenEntitySetPair(originalString);
-
-        candidates.addEntity(e);
-
-        trie.put(s, candidates);
+    public void addEntry(List<Token> s, Entity e, List<Token> originalTokens) {
+        tree.add(s, e, originalTokens);
     }
 
 	@Override

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java	2013-12-02 14:52:33 UTC (rev 4183)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java	2013-12-02 14:59:36 UTC (rev 4184)
@@ -15,10 +15,12 @@
 public class TokenTree {
     private HashMap<Token, TokenTree> children;
     private Set<Entity> entities;
+    private List<Token> originalTokens;
 
     public TokenTree() {
         this.children = new HashMap<>();
         this.entities = new HashSet<>();
+        this.originalTokens = new ArrayList<>();
     }
 
     /**
@@ -27,7 +29,7 @@
      * @param tokens   tokens to locate insertion point for entities
      * @param entities entities to add
      */
-    public void add(List<Token> tokens, Set<Entity> entities) {
+    public void add(List<Token> tokens, Set<Entity> entities, List<Token> originalTokens) {
         TokenTree curNode = this;
         for (Token t : tokens) {
             TokenTree nextNode = curNode.children.get(t);
@@ -38,8 +40,13 @@
             curNode = nextNode;
         }
         curNode.entities.addAll(entities);
+        curNode.originalTokens = new ArrayList<>(originalTokens);
     }
 
+    public void add(List<Token> tokens, Set<Entity> entities) {
+        add(tokens, entities, tokens);
+    }
+
     /**
      * Adds the given entity to the tree.
      *
@@ -50,6 +57,10 @@
         add(tokens, Collections.singleton(entity));
     }
 
+    public void add(List<Token> tokens, Entity entity, List<Token> originalTokens) {
+        add(tokens, Collections.singleton(entity), originalTokens);
+    }
+
     /**
      * Returns the set of entities located by the given list of tokens.
      *
@@ -112,6 +123,27 @@
         return fallback == null ? Collections.<Entity>emptySet() : fallback.entities;
     }
 
+    /**
+     * Returns the original token for the longest match
+     */
+    public List<Token> getOriginalTokensForLongestMatch(List<Token> tokens) {
+        TokenTree fallback = this.entities.isEmpty() ? null : this;
+        TokenTree curNode = this;
+
+        for (Token t : tokens) {
+            TokenTree nextNode = curNode.children.get(t);
+            if (nextNode == null) {
+                return fallback == null ? null : fallback.originalTokens;
+            }
+            curNode = nextNode;
+            if (!curNode.entities.isEmpty()) {
+                fallback = curNode;
+            }
+        }
+
+        return fallback == null ? Collections.<Token>emptyList() : fallback.originalTokens;
+    }
+
     public static void main(String[] args) throws Exception {
     	List<Token> tokens1 = Lists.newLinkedList();
     	for (String s : Splitter.on(" ").split("this is a token tree")) {

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[4183] trunk/components-core/src/main/java/org/ dllearner/algorithms/isle

From: <lor...@us...> - 2013-12-02 14:52:35

Revision: 4183
          http://sourceforge.net/p/dl-learner/code/4183
Author:   lorenz_b
Date:     2013-12-02 14:52:33 +0000 (Mon, 02 Dec 2013)
Log Message:
-----------
Refactoring.

Modified Paths:
--------------
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/AnnotationEntityTextRetriever.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/EntityTextRetriever.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/RDFSLabelEntityTextRetriever.java

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java	2013-12-02 14:41:21 UTC (rev 4182)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java	2013-12-02 14:52:33 UTC (rev 4183)
@@ -42,26 +42,21 @@
 	
 	public void buildTrie(OWLOntology ontology, NameGenerator nameGenerator) {
 		this.trie = new PrefixTrie<FullTokenEntitySetPair>();
-		Map<Entity, Set<String>> relevantText = entityTextRetriever.getRelevantText(ontology);
+		Map<Entity, Set<List<Token>>> entity2TokenSet = entityTextRetriever.getRelevantText(ontology);
 		
-		for (Entity entity : relevantText.keySet()) {
-
-			for (String text : relevantText.get(entity)) {
-                text = StringUtils.join(LinguisticUtil.getInstance().getWordsFromCamelCase(text), " ");
-                text = StringUtils.join(LinguisticUtil.getInstance().getWordsFromUnderscored(text), " ");
-                if (text.trim().isEmpty()) {
-                    continue;
-                }
-                
-                addEntry(text, entity);
-                addSubsequencesWordNet(entity, text);
-                
-                for (String alternativeText : nameGenerator.getAlternativeText(text)) {
-                    addEntry(alternativeText.toLowerCase(), entity, text);
-                }
-            }
-        }
 		
+		for (Entry<Entity, Set<List<Token>>> entry : entity2TokenSet.entrySet()) {
+			Entity entity = entry.getKey();
+			Set<List<Token>> tokenSet = entry.getValue();
+			for (List<Token> tokens : tokenSet) {
+				addEntry(tokens, entity);
+                addSubsequences(entity, tokens);
+//                addSubsequencesWordNet(entity, text);
+//                for (String alternativeText : nameGenerator.getAlternativeText(text)) {
+//                    addEntry(alternativeText.toLowerCase(), entity, text);
+//                }
+			}
+		}
 	}
 	
 	/**

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java	2013-12-02 14:41:21 UTC (rev 4182)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java	2013-12-02 14:52:33 UTC (rev 4183)
@@ -127,6 +127,15 @@
 		tree.add(tokens1, new NamedClass("TokenTree"));
 		tree.add(tokens2, new NamedClass("TokenizedTree"));
         System.out.println(tree);
+        
+        System.out.println(tree.getEntitiesForLongestMatch(tokens1));
+        System.out.println(tree.getLongestMatch(tokens1));
+        
+        List<Token> tokens3 = Lists.newLinkedList();
+    	for (String s : Splitter.on(" ").split("this is a very nice tokenized tree")) {
+			tokens3.add(new Token(s, s, s, false, false));
+		};
+        System.out.println(tree.getLongestMatch(tokens3));
     }
 
     

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/AnnotationEntityTextRetriever.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/AnnotationEntityTextRetriever.java	2013-12-02 14:41:21 UTC (rev 4182)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/AnnotationEntityTextRetriever.java	2013-12-02 14:52:33 UTC (rev 4183)
@@ -5,10 +5,13 @@
 
 import java.util.HashMap;
 import java.util.HashSet;
+import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
+import org.dllearner.algorithms.isle.TextDocumentGenerator;
 import org.dllearner.algorithms.isle.index.LinguisticUtil;
+import org.dllearner.algorithms.isle.index.Token;
 import org.dllearner.core.owl.Entity;
 import org.dllearner.kb.OWLAPIOntology;
 import org.dllearner.utilities.owl.OWLAPIConverter;
@@ -75,8 +78,8 @@
 	 * @see org.dllearner.algorithms.isle.EntityTextRetriever#getRelevantText(org.dllearner.core.owl.Entity)
 	 */
 	@Override
-	public Map<String, Double> getRelevantText(Entity entity) {
-		Map<String, Double> textWithWeight = new HashMap<String, Double>();
+	public Map<List<Token>, Double> getRelevantText(Entity entity) {
+		Map<List<Token>, Double> textWithWeight = new HashMap<List<Token>, Double>();
 		
 		OWLEntity e = OWLAPIConverter.getOWLAPIEntity(entity);
 		
@@ -87,7 +90,7 @@
 		            OWLLiteral val = (OWLLiteral) annotation.getValue();
 		            if (val.hasLang(language)) {
 		            	String label = val.getLiteral().trim();
-		            	textWithWeight.put(label, weight);
+		            	textWithWeight.put(TextDocumentGenerator.getInstance().generateDocument(label), weight);
 		            }
 		        }
 			}
@@ -97,7 +100,7 @@
 			String shortForm = sfp.getShortForm(IRI.create(entity.getURI()));
 			shortForm = Joiner.on(" ").join(LinguisticUtil.getInstance().getWordsFromCamelCase(shortForm));
 			shortForm = Joiner.on(" ").join(LinguisticUtil.getInstance().getWordsFromUnderscored(shortForm)).trim();
-			textWithWeight.put(shortForm, weight);
+			textWithWeight.put(TextDocumentGenerator.getInstance().generateDocument(shortForm), weight);
 		}
 		
 		return textWithWeight;
@@ -108,8 +111,8 @@
 	 * @return
 	 */
 	@Override
-	public Map<Entity, Set<String>> getRelevantText(OWLOntology ontology) {
-		Map<Entity, Set<String>> entity2RelevantText = new HashMap<Entity, Set<String>>();
+	public Map<Entity, Set<List<Token>>> getRelevantText(OWLOntology ontology) {
+		Map<Entity, Set<List<Token>>> entity2RelevantText = new HashMap<>();
 		
 		Set<OWLEntity> schemaEntities = new HashSet<OWLEntity>();
 		schemaEntities.addAll(ontology.getClassesInSignature());
@@ -117,7 +120,7 @@
 		schemaEntities.addAll(ontology.getDataPropertiesInSignature());
 		schemaEntities.remove(OWL_THING);
 		
-		Map<String, Double> relevantText;
+		Map<List<Token>, Double> relevantText;
 		for (OWLEntity owlEntity : schemaEntities) {
 			Entity entity = OWLAPIConverter.getEntity(owlEntity);
 			relevantText = getRelevantText(entity);

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/EntityTextRetriever.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/EntityTextRetriever.java	2013-12-02 14:41:21 UTC (rev 4182)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/EntityTextRetriever.java	2013-12-02 14:52:33 UTC (rev 4183)
@@ -19,9 +19,11 @@
 
 package org.dllearner.algorithms.isle.textretrieval;
 
+import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
+import org.dllearner.algorithms.isle.index.Token;
 import org.dllearner.core.owl.Entity;
 import org.semanticweb.owlapi.model.OWLOntology;
 
@@ -45,8 +47,8 @@
 	 * @param entity The entity to handle.
 	 * @return A weighted set of strings. For a value x, we need to have 0 <= x <= 1.
 	 */
-	public Map<String, Double> getRelevantText(Entity entity);
+	public Map<List<Token>, Double> getRelevantText(Entity entity);
 	
-	public Map<Entity, Set<String>> getRelevantText(OWLOntology ontology);
+	public Map<Entity, Set<List<Token>>> getRelevantText(OWLOntology ontology);
 	
 }

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/RDFSLabelEntityTextRetriever.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/RDFSLabelEntityTextRetriever.java	2013-12-02 14:41:21 UTC (rev 4182)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/RDFSLabelEntityTextRetriever.java	2013-12-02 14:52:33 UTC (rev 4183)
@@ -4,12 +4,14 @@
 package org.dllearner.algorithms.isle.textretrieval;
 
 import java.io.File;
+import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.Set;
 import java.util.SortedMap;
 import java.util.TreeMap;
 
+import org.dllearner.algorithms.isle.index.Token;
 import org.dllearner.core.owl.Entity;
 import org.dllearner.kb.OWLAPIOntology;
 import org.semanticweb.owlapi.apibinding.OWLManager;
@@ -43,13 +45,13 @@
 		OWLOntology ontology = man.loadOntology(IRI.create("http://www.semanticbible.com/2006/11/NTNames.owl"));
 		
 		RDFSLabelEntityTextRetriever labelRetriever = new RDFSLabelEntityTextRetriever(ontology);
-		Map<Entity, Set<String>> relevantText = labelRetriever.getRelevantText(ontology);
+		Map<Entity, Set<List<Token>>> relevantText = labelRetriever.getRelevantText(ontology);
 		SortedMap<String, String> uri2Labels = new TreeMap<String, String>();
 		
-		for (Entry<Entity, Set<String>> entry : relevantText.entrySet()) {
+		for (Entry<Entity, Set<List<Token>>> entry : relevantText.entrySet()) {
 			Entity key = entry.getKey();
-			Set<String> value = entry.getValue();
-			uri2Labels.put(key.getName(), value.iterator().next());
+			Set<List<Token>> value = entry.getValue();
+			uri2Labels.put(key.getName(), value.iterator().next().get(0).getRawForm());
 		}
 		
 		StringBuilder csv = new StringBuilder();

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[4182] trunk/components-core/src/main/java/org/ dllearner/algorithms/isle/index/TokenTree.java

From: <dfl...@us...> - 2013-12-02 14:41:24

Revision: 4182
          http://sourceforge.net/p/dl-learner/code/4182
Author:   dfleischhacker
Date:     2013-12-02 14:41:21 +0000 (Mon, 02 Dec 2013)
Log Message:
-----------
Prevent possible NPE

Modified Paths:
--------------
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java	2013-12-02 14:34:42 UTC (rev 4181)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java	2013-12-02 14:41:21 UTC (rev 4182)
@@ -109,7 +109,7 @@
             }
         }
 
-        return fallback.entities;
+        return fallback == null ? Collections.<Entity>emptySet() : fallback.entities;
     }
 
     public static void main(String[] args) throws Exception {

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[4181] trunk/components-core/src/main/java/org/ dllearner/algorithms/isle/index/TokenTree.java

From: <dfl...@us...> - 2013-12-02 14:34:45

Revision: 4181
          http://sourceforge.net/p/dl-learner/code/4181
Author:   dfleischhacker
Date:     2013-12-02 14:34:42 +0000 (Mon, 02 Dec 2013)
Log Message:
-----------
Add toString to TokenTree

Modified Paths:
--------------
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java	2013-12-02 14:30:18 UTC (rev 4180)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java	2013-12-02 14:34:42 UTC (rev 4181)
@@ -1,11 +1,10 @@
 package org.dllearner.algorithms.isle.index;
 
+import com.google.common.base.Splitter;
+import com.google.common.collect.Lists;
 import org.dllearner.core.owl.Entity;
 import org.dllearner.core.owl.NamedClass;
 
-import com.google.common.base.Splitter;
-import com.google.common.collect.Lists;
-
 import java.util.*;
 
 /**
@@ -112,7 +111,7 @@
 
         return fallback.entities;
     }
-    
+
     public static void main(String[] args) throws Exception {
     	List<Token> tokens1 = Lists.newLinkedList();
     	for (String s : Splitter.on(" ").split("this is a token tree")) {
@@ -127,5 +126,28 @@
 		TokenTree tree = new TokenTree();
 		tree.add(tokens1, new NamedClass("TokenTree"));
 		tree.add(tokens2, new NamedClass("TokenizedTree"));
-	}
+        System.out.println(tree);
+    }
+
+    
+    public String toString() {
+        return "TokenTree\n"+ toString(0);
+    }
+
+    public String toString(int indent) {
+        StringBuilder indentStringBuilder = new StringBuilder();
+        for (int i = 0; i < indent; i++) {
+            indentStringBuilder.append(" ");
+        }
+        String indentString = indentStringBuilder.toString();
+        StringBuilder sb = new StringBuilder();
+        for (Map.Entry<Token, TokenTree> e : children.entrySet()) {
+            sb.append(indentString).append(e.getKey().toString());
+            sb.append("\n");
+            sb.append(e.getValue().toString(indent + 1));
+        }
+        return sb.toString();
+    }
+
+    
 }

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[4180] trunk/components-core/src/main/java/org/ dllearner/algorithms/isle/index/Token.java

From: <lor...@us...> - 2013-12-02 14:30:21

Revision: 4180
          http://sourceforge.net/p/dl-learner/code/4180
Author:   lorenz_b
Date:     2013-12-02 14:30:18 +0000 (Mon, 02 Dec 2013)
Log Message:
-----------
Updated toString.

Modified Paths:
--------------
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Token.java

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Token.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Token.java	2013-12-02 14:27:30 UTC (rev 4179)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Token.java	2013-12-02 14:30:18 UTC (rev 4180)
@@ -95,9 +95,7 @@
 	 */
 	@Override
 	public String toString() {
-		return "\n[Word: " + rawForm + "\n" 
-				+ "Stemmed word: " + stemmedForm + "\n"
-				+ "POS tag: " + posTag + "]";
+		return "[Word: " + rawForm + " | Stemmed word: " + stemmedForm + " | POS tag: " + posTag + "]";
 	}
 
     @Override

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[4179] trunk/components-core/src

From: <lor...@us...> - 2013-12-02 14:27:33

Revision: 4179
          http://sourceforge.net/p/dl-learner/code/4179
Author:   lorenz_b
Date:     2013-12-02 14:27:30 +0000 (Mon, 02 Dec 2013)
Log Message:
-----------
Added main method to TokenTree.

Modified Paths:
--------------
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java
    trunk/components-core/src/test/java/org/dllearner/algorithms/isle/SemanticBibleExperiment.java

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java	2013-12-02 13:36:13 UTC (rev 4178)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java	2013-12-02 14:27:30 UTC (rev 4179)
@@ -1,7 +1,11 @@
 package org.dllearner.algorithms.isle.index;
 
 import org.dllearner.core.owl.Entity;
+import org.dllearner.core.owl.NamedClass;
 
+import com.google.common.base.Splitter;
+import com.google.common.collect.Lists;
+
 import java.util.*;
 
 /**
@@ -108,4 +112,20 @@
 
         return fallback.entities;
     }
+    
+    public static void main(String[] args) throws Exception {
+    	List<Token> tokens1 = Lists.newLinkedList();
+    	for (String s : Splitter.on(" ").split("this is a token tree")) {
+			tokens1.add(new Token(s, s, s, false, false));
+		};
+		
+		List<Token> tokens2 = Lists.newLinkedList();
+    	for (String s : Splitter.on(" ").split("this is a tokenized tree")) {
+			tokens2.add(new Token(s, s, s, false, false));
+		};
+		
+		TokenTree tree = new TokenTree();
+		tree.add(tokens1, new NamedClass("TokenTree"));
+		tree.add(tokens2, new NamedClass("TokenizedTree"));
+	}
 }

Modified: trunk/components-core/src/test/java/org/dllearner/algorithms/isle/SemanticBibleExperiment.java
===================================================================
--- trunk/components-core/src/test/java/org/dllearner/algorithms/isle/SemanticBibleExperiment.java	2013-12-02 13:36:13 UTC (rev 4178)
+++ trunk/components-core/src/test/java/org/dllearner/algorithms/isle/SemanticBibleExperiment.java	2013-12-02 14:27:30 UTC (rev 4179)
@@ -87,7 +87,7 @@
 			            String text = Files.toString(file, Charsets.UTF_8);
 //			            String posTagged = getPOSTaggedText(text);
 //			            Files.write(posTagged, new File(taggedFolder, file.getName() + ".tagged"), Charsets.UTF_8);
-			            documents.add(TextDocumentGenerator.getInstance().generateDocument(text));
+//			            documents.add(TextDocumentGenerator.getInstance().generateDocument(text));
 			        } catch (IOException e) {
 			            e.printStackTrace();
 			        }

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[4178] trunk/components-core/src/main/java/org/ dllearner/algorithms/isle/index/Token.java

From: <dfl...@us...> - 2013-12-02 13:36:19

Revision: 4178
          http://sourceforge.net/p/dl-learner/code/4178
Author:   dfleischhacker
Date:     2013-12-02 13:36:13 +0000 (Mon, 02 Dec 2013)
Log Message:
-----------
Add equals and hashCode to Token class

Modified Paths:
--------------
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Token.java

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Token.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Token.java	2013-12-02 12:51:30 UTC (rev 4177)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Token.java	2013-12-02 13:36:13 UTC (rev 4178)
@@ -99,4 +99,32 @@
 				+ "Stemmed word: " + stemmedForm + "\n"
 				+ "POS tag: " + posTag + "]";
 	}
+
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) {
+            return true;
+        }
+        if (o == null || getClass() != o.getClass()) {
+            return false;
+        }
+
+        Token token = (Token) o;
+
+        if (!posTag.equals(token.posTag)) {
+            return false;
+        }
+        if (!stemmedForm.equals(token.stemmedForm)) {
+            return false;
+        }
+
+        return true;
+    }
+
+    @Override
+    public int hashCode() {
+        int result = stemmedForm.hashCode();
+        result = 31 * result + posTag.hashCode();
+        return result;
+    }
 }

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[4177] trunk/components-core/src/main/java/org/ dllearner/algorithms/isle/index/TokenTree.java

From: <dfl...@us...> - 2013-12-02 12:51:33

Revision: 4177
          http://sourceforge.net/p/dl-learner/code/4177
Author:   dfleischhacker
Date:     2013-12-02 12:51:30 +0000 (Mon, 02 Dec 2013)
Log Message:
-----------
Add TokenTree class

Added Paths:
-----------
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java

Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java	                        (rev 0)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java	2013-12-02 12:51:30 UTC (rev 4177)
@@ -0,0 +1,111 @@
+package org.dllearner.algorithms.isle.index;
+
+import org.dllearner.core.owl.Entity;
+
+import java.util.*;
+
+/**
+ * Tree for finding longest matching Token sequence
+ *
+ * @author Daniel Fleischhacker
+ */
+public class TokenTree {
+    private HashMap<Token, TokenTree> children;
+    private Set<Entity> entities;
+
+    public TokenTree() {
+        this.children = new HashMap<>();
+        this.entities = new HashSet<>();
+    }
+
+    /**
+     * Adds all given entities to the end of the path resulting from the given tokens.
+     *
+     * @param tokens   tokens to locate insertion point for entities
+     * @param entities entities to add
+     */
+    public void add(List<Token> tokens, Set<Entity> entities) {
+        TokenTree curNode = this;
+        for (Token t : tokens) {
+            TokenTree nextNode = curNode.children.get(t);
+            if (nextNode == null) {
+                nextNode = new TokenTree();
+                curNode.children.put(t, nextNode);
+            }
+            curNode = nextNode;
+        }
+        curNode.entities.addAll(entities);
+    }
+
+    /**
+     * Adds the given entity to the tree.
+     *
+     * @param tokens tokens to locate insertion point for entities
+     * @param entity entity to add
+     */
+    public void add(List<Token> tokens, Entity entity) {
+        add(tokens, Collections.singleton(entity));
+    }
+
+    /**
+     * Returns the set of entities located by the given list of tokens.
+     *
+     * @param tokens tokens to locate the information to get
+     * @return located set of entities or null if token sequence not contained in tree
+     */
+    public Set<Entity> get(List<Token> tokens) {
+        TokenTree curNode = this;
+        for (Token t : tokens) {
+            TokenTree nextNode = curNode.children.get(t);
+            if (nextNode == null) {
+                return null;
+            }
+            curNode = nextNode;
+        }
+        return curNode.entities;
+    }
+
+    /**
+     * Returns the list of tokens which are the longest match with entities assigned in this tree.
+     *
+     * @param tokens list of tokens to check for longest match
+     * @return list of tokens being the longest match, sublist of {@code tokens} anchored at the first token
+     */
+    public List<Token> getLongestMatch(List<Token> tokens) {
+        List<Token> fallbackTokenList = new ArrayList<>();
+        TokenTree curNode = this;
+
+        for (Token t : tokens) {
+            TokenTree nextNode = curNode.children.get(t);
+            if (nextNode == null) {
+                return fallbackTokenList;
+            }
+            curNode = nextNode;
+            fallbackTokenList.add(t);
+        }
+        return fallbackTokenList;
+    }
+
+    /**
+     * Returns the set of entities assigned to the longest matching token subsequence of the given token sequence.
+     * @param tokens    token sequence to search for longest match
+     * @return set of entities assigned to the longest matching token subsequence of the given token sequence
+     */
+    public Set<Entity> getEntitiesForLongestMatch(List<Token> tokens) {
+        TokenTree fallback = this.entities.isEmpty() ? null : this;
+        TokenTree curNode = this;
+
+        for (Token t : tokens) {
+            TokenTree nextNode = curNode.children.get(t);
+            if (nextNode == null) {
+                return fallback == null ? null : fallback.entities;
+            }
+            curNode = nextNode;
+            if (!curNode.entities.isEmpty()) {
+                fallback = curNode;
+            }
+        }
+
+        return fallback.entities;
+    }
+}

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[4176] trunk

From: <ki...@us...> - 2013-11-27 14:41:08

Revision: 4176
          http://sourceforge.net/p/dl-learner/code/4176
Author:   kirdie
Date:     2013-11-27 14:41:05 +0000 (Wed, 27 Nov 2013)
Log Message:
-----------
changed index from interface to abstract class, thus removing much redundant code.

Modified Paths:
--------------
    trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java
    trunk/components-ext/src/main/java/org/dllearner/common/index/HierarchicalIndex.java
    trunk/components-ext/src/main/java/org/dllearner/common/index/Index.java
    trunk/components-ext/src/main/java/org/dllearner/common/index/SOLRIndex.java
    trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java

Modified: trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java	2013-11-25 14:20:13 UTC (rev 4175)
+++ trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java	2013-11-27 14:41:05 UTC (rev 4176)
@@ -331,8 +331,8 @@
 			return dataPropertyPopularityMap.get(dp);
 		} else {
 			String queryTemplate = "SELECT (COUNT(*) AS ?cnt) WHERE {?s <%s> ?o}";
-
-			ResultSet rs = executeSelectQuery(String.format(queryTemplate, dp.getName()));
+String query = String.format(queryTemplate, dp.getName());
+			ResultSet rs = executeSelectQuery(query);
 			int cnt = rs.next().getLiteral("cnt").getInt();
 			dataPropertyPopularityMap.put(dp, cnt);
 			return cnt;

Modified: trunk/components-ext/src/main/java/org/dllearner/common/index/HierarchicalIndex.java
===================================================================
--- trunk/components-ext/src/main/java/org/dllearner/common/index/HierarchicalIndex.java	2013-11-25 14:20:13 UTC (rev 4175)
+++ trunk/components-ext/src/main/java/org/dllearner/common/index/HierarchicalIndex.java	2013-11-27 14:41:05 UTC (rev 4176)
@@ -3,11 +3,9 @@
 import java.util.ArrayList;
 import java.util.List;
 
-public class HierarchicalIndex implements Index{
+public class HierarchicalIndex extends Index
+{
 	
-	private static final int DEFAULT_LIMIT = 10;
-	private static final int DEFAULT_OFFSET = 0;
-	
 	private Index primaryIndex;
 	private Index secondaryIndex;
 	
@@ -23,18 +21,8 @@
 	public Index getSecondaryIndex() {
 		return secondaryIndex;
 	}
-
-	@Override
-	public List<String> getResources(String queryString) {
-		return getResources(queryString, DEFAULT_LIMIT);
-	}
 	
 	@Override
-	public List<String> getResources(String queryString, int limit) {
-		return getResources(queryString, limit, DEFAULT_OFFSET);
-	}
-	
-	@Override
 	public List<String> getResources(String queryString, int limit, int offset) {
 		List<String> resources = new ArrayList<String>();
 		resources = primaryIndex.getResources(queryString, limit, offset);
@@ -50,11 +38,6 @@
 	}
 
 	@Override
-	public IndexResultSet getResourcesWithScores(String queryString, int limit) {
-		return getResourcesWithScores(queryString, limit, DEFAULT_OFFSET);
-	}
-
-	@Override
 	public IndexResultSet getResourcesWithScores(String queryString, int limit, int offset) {
 		IndexResultSet rs = primaryIndex.getResourcesWithScores(queryString, limit, offset);
 		if(rs.getItems().size() < limit){
@@ -63,4 +46,4 @@
 		return rs;
 	}
 
-}
+}
\ No newline at end of file

Modified: trunk/components-ext/src/main/java/org/dllearner/common/index/Index.java
===================================================================
--- trunk/components-ext/src/main/java/org/dllearner/common/index/Index.java	2013-11-25 14:20:13 UTC (rev 4175)
+++ trunk/components-ext/src/main/java/org/dllearner/common/index/Index.java	2013-11-27 14:41:05 UTC (rev 4176)
@@ -1,13 +1,16 @@
 package org.dllearner.common.index;
 
 import java.util.List;
-import java.util.Map;
 
-public interface Index {
-	List<String> getResources(String queryString);
-	List<String> getResources(String queryString, int limit);
-	List<String> getResources(String queryString, int limit, int offset);
-	IndexResultSet getResourcesWithScores(String queryString);
-	IndexResultSet getResourcesWithScores(String queryString, int limit);
-	IndexResultSet getResourcesWithScores(String queryString, int limit, int offset);
-}
+public abstract class Index
+{
+	static final int	DEFAULT_LIMIT	= 10;
+	
+	public List<String> getResources(String queryString) {return getResources(queryString,DEFAULT_LIMIT);}
+	public List<String> getResources(String queryString, int limit) {return getResources(queryString,DEFAULT_LIMIT,0);}
+	abstract public List<String> getResources(String queryString, int limit, int offset);
+	
+	public IndexResultSet getResourcesWithScores(String queryString) {return getResourcesWithScores(queryString,DEFAULT_LIMIT);}
+	public IndexResultSet getResourcesWithScores(String queryString, int limit) {return getResourcesWithScores(queryString,DEFAULT_LIMIT,0);}
+	abstract public IndexResultSet getResourcesWithScores(String queryString, int limit, int offset);
+}
\ No newline at end of file

Modified: trunk/components-ext/src/main/java/org/dllearner/common/index/SOLRIndex.java
===================================================================
--- trunk/components-ext/src/main/java/org/dllearner/common/index/SOLRIndex.java	2013-11-25 14:20:13 UTC (rev 4175)
+++ trunk/components-ext/src/main/java/org/dllearner/common/index/SOLRIndex.java	2013-11-27 14:41:05 UTC (rev 4176)
@@ -13,13 +13,10 @@
 import org.apache.solr.common.SolrDocumentList;
 import org.apache.solr.common.params.ModifiableSolrParams;
 
-public class SOLRIndex implements Index{
+public class SOLRIndex extends Index{
 	
 private HttpSolrServer server;
 	
-	private static final int DEFAULT_LIMIT = 10;
-	private static final int DEFAULT_OFFSET = 0;
-	
 	private String primarySearchField;
 	private String secondarySearchField;
 	
@@ -52,16 +49,6 @@
 	}
 	
 	@Override
-	public List<String> getResources(String queryString) {
-		return getResources(queryString, DEFAULT_LIMIT);
-	}
-
-	@Override
-	public List<String> getResources(String queryString, int limit) {
-		return getResources(queryString, limit, DEFAULT_OFFSET);
-	}
-
-	@Override
 	public List<String> getResources(String queryString, int limit, int offset) {
 		List<String> resources = new ArrayList<String>();
 		QueryResponse response;
@@ -82,16 +69,6 @@
 	}
 
 	@Override
-	public IndexResultSet getResourcesWithScores(String queryString) {
-		return getResourcesWithScores(queryString, DEFAULT_LIMIT);
-	}
-
-	@Override
-	public IndexResultSet getResourcesWithScores(String queryString, int limit) {
-		return getResourcesWithScores(queryString, limit, DEFAULT_OFFSET);
-	}
-
-	@Override
 	public IndexResultSet getResourcesWithScores(String queryString, int limit, int offset) {
 		IndexResultSet rs = new IndexResultSet();
 		
@@ -148,4 +125,4 @@
 		this.sortField = sortField;
 	}
 
-}
+}
\ No newline at end of file

Modified: trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java
===================================================================
--- trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java	2013-11-25 14:20:13 UTC (rev 4175)
+++ trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java	2013-11-27 14:41:05 UTC (rev 4176)
@@ -16,11 +16,8 @@
 import com.hp.hpl.jena.rdf.model.RDFNode;
 import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP;
 
-public class SPARQLIndex implements Index{
+public class SPARQLIndex extends Index{
 	
-	private static final int DEFAULT_LIMIT = 10;
-	private static final int DEFAULT_OFFSET = 0;
-	
 	private SparqlEndpoint endpoint;
 	private ExtractionDBCache cache;
 
@@ -65,16 +62,6 @@
 	}
 	
 	@Override
-	public List<String> getResources(String searchTerm) {
-		return getResources(searchTerm, DEFAULT_LIMIT);
-	}
-
-	@Override
-	public List<String> getResources(String searchTerm, int limit) {
-		return getResources(searchTerm, limit, DEFAULT_OFFSET);
-	}
-	
-	@Override
 	public List<String> getResources(String searchTerm, int limit, int offset) {
 		List<String> resources = new ArrayList<String>();
 		
@@ -92,18 +79,8 @@
 		}
 		return resources;
 	}
-	
-	@Override
-	public IndexResultSet getResourcesWithScores(String searchTerm) {
-		return getResourcesWithScores(searchTerm, DEFAULT_LIMIT);
-	}
 
 	@Override
-	public IndexResultSet getResourcesWithScores(String searchTerm, int limit) {
-		return getResourcesWithScores(searchTerm, limit, DEFAULT_OFFSET);
-	}
-
-	@Override
 	public IndexResultSet getResourcesWithScores(String searchTerm, int limit, int offset) {
 		IndexResultSet irs = new IndexResultSet();
 		
@@ -151,4 +128,4 @@
 		return model;
 	}
 	
-}
+}
\ No newline at end of file

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[4175] trunk/components-core/src/main/java/org/ dllearner/reasoning/SPARQLReasoner.java

From: <lor...@us...> - 2013-11-25 14:20:16

Revision: 4175
          http://sourceforge.net/p/dl-learner/code/4175
Author:   lorenz_b
Date:     2013-11-25 14:20:13 +0000 (Mon, 25 Nov 2013)
Log Message:
-----------
Fixed bug.

Modified Paths:
--------------
    trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java

Modified: trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java	2013-11-25 09:47:35 UTC (rev 4174)
+++ trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java	2013-11-25 14:20:13 UTC (rev 4175)
@@ -125,6 +125,8 @@
 
 		classPopularityMap = new HashMap<NamedClass, Integer>();
 		objectPropertyPopularityMap = new HashMap<ObjectProperty, Integer>();
+		dataPropertyPopularityMap = new HashMap<DatatypeProperty, Integer>();
+		individualPopularityMap = new HashMap<Individual, Integer>();
 		
 		if(ks.isRemote()){
 			SparqlEndpoint endpoint = ks.getEndpoint();

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[4174] trunk/components-core/src/main/java/org/ dllearner/algorithms/isle/index

From: <lor...@us...> - 2013-11-25 09:47:39

Revision: 4174
          http://sourceforge.net/p/dl-learner/code/4174
Author:   lorenz_b
Date:     2013-11-25 09:47:35 +0000 (Mon, 25 Nov 2013)
Log Message:
-----------
ISLE.

Modified Paths:
--------------
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/NormalizedTextMapper.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/NormalizedTextMapper.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/NormalizedTextMapper.java	2013-11-25 09:42:56 UTC (rev 4173)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/NormalizedTextMapper.java	2013-11-25 09:47:35 UTC (rev 4174)
@@ -60,6 +60,7 @@
                 curNormalizedLength += p.getNormalizedLength();
                 curOriginalLength += p.getOriginalLength();
                 if (curNormalizedLength >= length) {
+                	//TODO refactoring
 //                    return new Annotation(originalDocument, originalStart, curOriginalLength);
                 }
 

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java	2013-11-25 09:42:56 UTC (rev 4173)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java	2013-11-25 09:47:35 UTC (rev 4174)
@@ -1,6 +1,8 @@
 package org.dllearner.algorithms.isle.index;
 
+import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;
+
 import org.dllearner.algorithms.isle.EntityCandidateGenerator;
 import org.dllearner.algorithms.isle.StopWordFilter;
 import org.dllearner.core.owl.Entity;
@@ -39,6 +41,7 @@
     public void postProcess(HashMap<Annotation,Set<Entity>> candidatesMap, int window, StopWordFilter stopWordFilter) {
     	Set<Annotation> annotations = candidatesMap.keySet();
     	List<Annotation> sortedAnnotations = new ArrayList<Annotation>(annotations);
+    	//TODO refactoring
     	/**
     	  
     	
@@ -108,17 +111,10 @@
     }
 
 	private Annotation mergeAnnotations(Annotation annotation_i, Annotation annotation_j) {
-		return null;
-//		int offset;
-//		int length;
-//		if (annotation_i.getOffset() < annotation_j.getOffset()) {
-//			offset = annotation_i.getOffset();
-//			length = annotation_j.getOffset() - offset + annotation_j.getLength(); 
-//		} else {
-//			offset = annotation_j.getOffset();
-//			length = annotation_i.getOffset() - offset + annotation_i.getLength();
-//		}
-//		return new Annotation(annotation_i.getReferencedDocument(), offset, length);
+		List<Token> tokens = Lists.newArrayList();
+		tokens.addAll(annotation_i.getTokens());
+		tokens.addAll(annotation_j.getTokens());
+		return new Annotation(annotation_i.getReferencedDocument(), tokens);
 	}
 
 	@Override

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[4173] trunk/components-core/src

From: <lor...@us...> - 2013-11-25 09:43:00

Revision: 4173
          http://sourceforge.net/p/dl-learner/code/4173
Author:   lorenz_b
Date:     2013-11-25 09:42:56 +0000 (Mon, 25 Nov 2013)
Log Message:
-----------
ISLE.

Modified Paths:
--------------
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/NormalizedTextMapper.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java
    trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java
    trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETestCorpus.java
    trunk/components-core/src/test/java/org/dllearner/algorithms/isle/SemanticBibleExperiment.java

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/NormalizedTextMapper.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/NormalizedTextMapper.java	2013-11-22 12:44:10 UTC (rev 4172)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/NormalizedTextMapper.java	2013-11-25 09:42:56 UTC (rev 4173)
@@ -60,7 +60,7 @@
                 curNormalizedLength += p.getNormalizedLength();
                 curOriginalLength += p.getOriginalLength();
                 if (curNormalizedLength >= length) {
-                    return new Annotation(originalDocument, originalStart, curOriginalLength);
+//                    return new Annotation(originalDocument, originalStart, curOriginalLength);
                 }
 
                 // include space
@@ -82,16 +82,16 @@
     }
 
     public static void main(String[] args) {
-        NormalizedTextMapper n = new NormalizedTextMapper(new TextDocument("This is a testing text using letters"));
-        System.out.println(n.getOriginalText());
-        System.out.println(n.getNormalizedText());
-        for (OccurenceMappingPair p : n.normalizedIndexToOriginalIndex) {
-            System.out.println(p);
-        }
-        System.out.println(n.getOriginalAnnotationForPosition(7,6));
-        System.out.println(n.getOriginalAnnotationForPosition(23,6));
-        System.out.println(n.getOriginalAnnotationForPosition(7,1));
-        System.out.println(n.getOriginalAnnotationForPosition(14,15));
+//        NormalizedTextMapper n = new NormalizedTextMapper(new TextDocument("This is a testing text using letters"));
+//        System.out.println(n.getOriginalText());
+//        System.out.println(n.getNormalizedText());
+//        for (OccurenceMappingPair p : n.normalizedIndexToOriginalIndex) {
+//            System.out.println(p);
+//        }
+//        System.out.println(n.getOriginalAnnotationForPosition(7,6));
+//        System.out.println(n.getOriginalAnnotationForPosition(23,6));
+//        System.out.println(n.getOriginalAnnotationForPosition(7,1));
+//        System.out.println(n.getOriginalAnnotationForPosition(14,15));
     }
 
     /**

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java	2013-11-22 12:44:10 UTC (rev 4172)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java	2013-11-25 09:42:56 UTC (rev 4173)
@@ -39,6 +39,8 @@
     public void postProcess(HashMap<Annotation,Set<Entity>> candidatesMap, int window, StopWordFilter stopWordFilter) {
     	Set<Annotation> annotations = candidatesMap.keySet();
     	List<Annotation> sortedAnnotations = new ArrayList<Annotation>(annotations);
+    	/**
+    	  
     	
     	// Sort annotations by offset in ascending order
     	Collections.sort(sortedAnnotations, new Comparator<Annotation>(){
@@ -102,20 +104,21 @@
     	}
     	
     	
-    	
+    	 */
     }
 
 	private Annotation mergeAnnotations(Annotation annotation_i, Annotation annotation_j) {
-		int offset;
-		int length;
-		if (annotation_i.getOffset() < annotation_j.getOffset()) {
-			offset = annotation_i.getOffset();
-			length = annotation_j.getOffset() - offset + annotation_j.getLength(); 
-		} else {
-			offset = annotation_j.getOffset();
-			length = annotation_i.getOffset() - offset + annotation_i.getLength();
-		}
-		return new Annotation(annotation_i.getReferencedDocument(), offset, length);
+		return null;
+//		int offset;
+//		int length;
+//		if (annotation_i.getOffset() < annotation_j.getOffset()) {
+//			offset = annotation_i.getOffset();
+//			length = annotation_j.getOffset() - offset + annotation_j.getLength(); 
+//		} else {
+//			offset = annotation_j.getOffset();
+//			length = annotation_i.getOffset() - offset + annotation_i.getLength();
+//		}
+//		return new Annotation(annotation_i.getReferencedDocument(), offset, length);
 	}
 
 	@Override

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java	2013-11-22 12:44:10 UTC (rev 4172)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java	2013-11-25 09:42:56 UTC (rev 4173)
@@ -7,10 +7,12 @@
 
 import org.apache.log4j.Logger;
 import org.dllearner.algorithms.isle.EntityCandidateGenerator;
+import org.dllearner.algorithms.isle.TextDocumentGenerator;
 import org.dllearner.algorithms.isle.index.AnnotatedDocument;
 import org.dllearner.algorithms.isle.index.LinguisticAnnotator;
 import org.dllearner.algorithms.isle.index.SemanticAnnotator;
 import org.dllearner.algorithms.isle.index.TextDocument;
+import org.dllearner.algorithms.isle.index.Token;
 import org.dllearner.algorithms.isle.index.syntactic.SyntacticIndex;
 import org.dllearner.algorithms.isle.wsd.WordSenseDisambiguation;
 import org.dllearner.core.owl.Entity;
@@ -108,7 +110,7 @@
                 }
             }
             if (label != null) {
-                documents.add(new TextDocument(label));
+                documents.add(TextDocumentGenerator.getInstance().generateDocument(label));
             }
         }
         buildIndex(documents);

Modified: trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java	2013-11-22 12:44:10 UTC (rev 4172)
+++ trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java	2013-11-25 09:42:56 UTC (rev 4173)
@@ -108,6 +108,7 @@
 	private Map<NamedClass, Integer> classPopularityMap;
 	private Map<ObjectProperty, Integer> objectPropertyPopularityMap;
 	private Map<DatatypeProperty, Integer> dataPropertyPopularityMap;
+	private Map<Individual, Integer> individualPopularityMap;
 	
 	private boolean prepared = false;
 	
@@ -156,6 +157,8 @@
 
 		classPopularityMap = new HashMap<NamedClass, Integer>();
 		objectPropertyPopularityMap = new HashMap<ObjectProperty, Integer>();
+		dataPropertyPopularityMap = new HashMap<DatatypeProperty, Integer>();
+		individualPopularityMap = new HashMap<Individual, Integer>();
 		
 		if(ks.isRemote()){
 			SparqlEndpoint endpoint = ks.getEndpoint();
@@ -176,6 +179,8 @@
 
 		classPopularityMap = new HashMap<NamedClass, Integer>();
 		objectPropertyPopularityMap = new HashMap<ObjectProperty, Integer>();
+		dataPropertyPopularityMap = new HashMap<DatatypeProperty, Integer>();
+		individualPopularityMap = new HashMap<Individual, Integer>();
 	}
 
 	public void precomputePopularity(){
@@ -330,7 +335,19 @@
 			dataPropertyPopularityMap.put(dp, cnt);
 			return cnt;
 		}
+	}
+	
+	public int getPopularity(Individual ind){
+		if(individualPopularityMap != null && individualPopularityMap.containsKey(ind)){
+			return individualPopularityMap.get(ind);
+		} else {
+			String queryTemplate = "SELECT (COUNT(*) AS ?cnt) WHERE {<%s> ?p ?o}";
 
+			ResultSet rs = executeSelectQuery(String.format(queryTemplate, ind.getName()));
+			int cnt = rs.next().getLiteral("cnt").getInt();
+			individualPopularityMap.put(ind, cnt);
+			return cnt;
+		}
 	}
 
 	public final ClassHierarchy prepareSubsumptionHierarchy() {

Modified: trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETestCorpus.java
===================================================================
--- trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETestCorpus.java	2013-11-22 12:44:10 UTC (rev 4172)
+++ trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETestCorpus.java	2013-11-25 09:42:56 UTC (rev 4173)
@@ -85,7 +85,7 @@
 			if(!file.isDirectory() && !file.isHidden()){
 				try {
 					String text = Files.toString(file, Charsets.UTF_8);
-					documents.add(new TextDocument(text));
+					documents.add(TextDocumentGenerator.getInstance().generateDocument(text));
 				} catch (IOException e) {
 					e.printStackTrace();
 				}
@@ -103,7 +103,7 @@
             if(!file.isDirectory() && !file.isHidden()){
                 try {
                     String text = Files.toString(file, Charsets.UTF_8);
-                    documents.add(new TextDocument(text));
+                    documents.add(TextDocumentGenerator.getInstance().generateDocument(text));
                 } catch (IOException e) {
                     e.printStackTrace();
                 }

Modified: trunk/components-core/src/test/java/org/dllearner/algorithms/isle/SemanticBibleExperiment.java
===================================================================
--- trunk/components-core/src/test/java/org/dllearner/algorithms/isle/SemanticBibleExperiment.java	2013-11-22 12:44:10 UTC (rev 4172)
+++ trunk/components-core/src/test/java/org/dllearner/algorithms/isle/SemanticBibleExperiment.java	2013-11-25 09:42:56 UTC (rev 4173)
@@ -87,7 +87,7 @@
 			            String text = Files.toString(file, Charsets.UTF_8);
 //			            String posTagged = getPOSTaggedText(text);
 //			            Files.write(posTagged, new File(taggedFolder, file.getName() + ".tagged"), Charsets.UTF_8);
-			            documents.add(new TextDocument(text));
+			            documents.add(TextDocumentGenerator.getInstance().generateDocument(text));
 			        } catch (IOException e) {
 			            e.printStackTrace();
 			        }
@@ -98,9 +98,9 @@
 		} catch (IOException e) {
 			e.printStackTrace();
 		}
-        
-        documents = Sets.newHashSet(new TextDocument("and in that day seven women shall take hold of one man saying we will eat our own bread and wear our own apparel only let us be called by thy name to take away our reproach in that day shall the branch of the lord be beautiful and glorious and the fruit of the earth excellent and comely for them that are escaped of israel and it shall come to pass left in zion and remaineth in jerusalem shall be called holy every one that is written among the living in jerusalem when the lord shall have washed away the filth of the daughters of zion and shall have purged the blood of jerusalem from the midst thereof by the spirit of judgment and by the spirit of burning and the lord will create upon every dwelling place of mount zion and upon her assemblies a cloud and smoke by day and the shining of a flaming fire by night for upon all the glory a defence and there shall be a tabernacle for a shadow in the daytime from the heat and for a place of refuge and for a covert from storm and from rain"));
-        
+        documents.clear();
+        TextDocument doc = TextDocumentGenerator.getInstance().generateDocument("and in that day seven women shall take hold of one man saying we will eat our own bread and wear our own apparel only let us be called by thy name to take away our reproach in that day shall the branch of the lord be beautiful and glorious and the fruit of the earth excellent and comely for them that are escaped of israel and it shall come to pass left in zion and remaineth in jerusalem shall be called holy every one that is written among the living in jerusalem when the lord shall have washed away the filth of the daughters of zion and shall have purged the blood of jerusalem from the midst thereof by the spirit of judgment and by the spirit of burning and the lord will create upon every dwelling place of mount zion and upon her assemblies a cloud and smoke by day and the shining of a flaming fire by night for upon all the glory a defence and there shall be a tabernacle for a shadow in the daytime from the heat and for a place of refuge and for a covert from storm and from rain");
+        documents.add(doc);
         return documents;
 	}
 	

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[4172] trunk/components-ext/src/main/java/org/ dllearner/common/index/SOLRIndex.java

From: <lor...@us...> - 2013-11-22 12:44:13

Revision: 4172
          http://sourceforge.net/p/dl-learner/code/4172
Author:   lorenz_b
Date:     2013-11-22 12:44:10 +0000 (Fri, 22 Nov 2013)
Log Message:
-----------
Added constructor to set search field.

Modified Paths:
--------------
    trunk/components-ext/src/main/java/org/dllearner/common/index/SOLRIndex.java

Modified: trunk/components-ext/src/main/java/org/dllearner/common/index/SOLRIndex.java
===================================================================
--- trunk/components-ext/src/main/java/org/dllearner/common/index/SOLRIndex.java	2013-11-21 20:44:41 UTC (rev 4171)
+++ trunk/components-ext/src/main/java/org/dllearner/common/index/SOLRIndex.java	2013-11-22 12:44:10 UTC (rev 4172)
@@ -32,6 +32,12 @@
 		server.setRequestWriter(new BinaryRequestWriter());
 	}
 	
+	public SOLRIndex(String solrServerURL, String primarySearchField){
+		server = new HttpSolrServer(solrServerURL);
+		server.setRequestWriter(new BinaryRequestWriter());
+		this.primarySearchField = primarySearchField;
+	}
+	
 	public void setSearchFields(String primarySearchField, String secondarySearchField){
 		this.primarySearchField = primarySearchField;
 		this.secondarySearchField = secondarySearchField;

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[4171] trunk/test/fuzzydll/CONFIG

From: <lor...@us...> - 2013-11-21 20:44:43

Revision: 4171
          http://sourceforge.net/p/dl-learner/code/4171
Author:   lorenz_b
Date:     2013-11-21 20:44:41 +0000 (Thu, 21 Nov 2013)
Log Message:
-----------
Updated CONFIG.

Modified Paths:
--------------
    trunk/test/fuzzydll/CONFIG

Modified: trunk/test/fuzzydll/CONFIG
===================================================================
--- trunk/test/fuzzydll/CONFIG	2013-11-21 15:24:32 UTC (rev 4170)
+++ trunk/test/fuzzydll/CONFIG	2013-11-21 20:44:41 UTC (rev 4171)
@@ -1,6 +1,4 @@
-MILP_SOLVER = /Users/josue/Documents/PhD/AKSW/fuzzySemanticTools/FuzzyDLMacOSX/FuzzyDL/fuzzyDLcbc
-EPSILON = 0.01
-solver = z
-max_individuals = -1
 debugPrint = false
-CBC = 1
+epsilon = 0.001
+maxIndividuals = -1
+showVersion = true
\ No newline at end of file

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[4170] trunk/components-core/src/main/java/org/ dllearner/algorithms/isle/index

From: <dfl...@us...> - 2013-11-21 15:24:35

Revision: 4170
          http://sourceforge.net/p/dl-learner/code/4170
Author:   dfleischhacker
Date:     2013-11-21 15:24:32 +0000 (Thu, 21 Nov 2013)
Log Message:
-----------
Remove unused offset-based methods

Modified Paths:
--------------
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/AnnotatedDocument.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/AnnotatedTextDocument.java

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/AnnotatedDocument.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/AnnotatedDocument.java	2013-11-21 13:53:20 UTC (rev 4169)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/AnnotatedDocument.java	2013-11-21 15:24:32 UTC (rev 4170)
@@ -3,10 +3,10 @@
  */
 package org.dllearner.algorithms.isle.index;
 
+import org.dllearner.core.owl.Entity;
+
 import java.util.Set;
 
-import org.dllearner.core.owl.Entity;
-
 /**
  * @author Lorenz Buehmann
  *
@@ -25,14 +25,6 @@
 	 */
 	Set<SemanticAnnotation> getAnnotations();
 	
-	/**
-	 * Returns the annotation at the given position(offset) of given length.
-	 * @param offset
-	 * @param length
-	 * @return
-	 */
-	SemanticAnnotation getAnnotation(int offset, int length);
-
     /**
      * Returns the number of occurrences of the given entity in this document.
      *

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/AnnotatedTextDocument.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/AnnotatedTextDocument.java	2013-11-21 13:53:20 UTC (rev 4169)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/AnnotatedTextDocument.java	2013-11-21 15:24:32 UTC (rev 4170)
@@ -3,11 +3,11 @@
  */
 package org.dllearner.algorithms.isle.index;
 
+import org.dllearner.core.owl.Entity;
+
 import java.util.HashSet;
 import java.util.Set;
 
-import org.dllearner.core.owl.Entity;
-
 /**
  * @author Lorenz Buehmann
  *
@@ -70,19 +70,6 @@
 	}
 
 	/* (non-Javadoc)
-	 * @see org.dllearner.algorithms.isle.index.AnnotatedDocument#getAnnotation(int, int)
-	 */
-	@Override
-	public SemanticAnnotation getAnnotation(int offset, int length) {
-		for (SemanticAnnotation annotation : annotations) {
-			if(annotation.getOffset() == offset && annotation.getLength() == length){
-				return annotation;
-			}
-		}
-		return null;
-	}
-
-	/* (non-Javadoc)
 	 * @see org.dllearner.algorithms.isle.index.AnnotatedDocument#getEntityFrequency(org.dllearner.core.owl.Entity)
 	 */
 	@Override

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[4169] trunk/components-core/src/main/java/org/ dllearner/algorithms/isle/index/SemanticAnnotation.java

From: <lor...@us...> - 2013-11-21 13:53:23

Revision: 4169
          http://sourceforge.net/p/dl-learner/code/4169
Author:   lorenz_b
Date:     2013-11-21 13:53:20 +0000 (Thu, 21 Nov 2013)
Log Message:
-----------
Refactored annotation class.

Modified Paths:
--------------
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotation.java

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotation.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotation.java	2013-11-21 13:53:13 UTC (rev 4168)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotation.java	2013-11-21 13:53:20 UTC (rev 4169)
@@ -14,14 +14,9 @@
 	private Entity entity;
 	
 	public SemanticAnnotation(Annotation annotation, Entity entity) {
-		super(annotation.getReferencedDocument(), annotation.getOffset(), annotation.getLength());
+		super(annotation.getReferencedDocument(), annotation.getTokens());
 		this.entity = entity;
 	}
-	
-	public SemanticAnnotation(Document getReferencedDocument, Entity entity, int offset, int length) {
-		super(getReferencedDocument, offset, length);
-		this.entity = entity;
-	}
 
 	public Entity getEntity() {
 		return entity;

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[4168] trunk/components-core/src/main/java/org/ dllearner/algorithms/isle/index/NGramGeneratingAnnotator.java

From: <dfl...@us...> - 2013-11-21 13:53:16

Revision: 4168
          http://sourceforge.net/p/dl-learner/code/4168
Author:   dfleischhacker
Date:     2013-11-21 13:53:13 +0000 (Thu, 21 Nov 2013)
Log Message:
-----------
Remove unused NGramGeneratingAnnotator

Removed Paths:
-------------
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/NGramGeneratingAnnotator.java

Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/NGramGeneratingAnnotator.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/NGramGeneratingAnnotator.java	2013-11-21 13:52:45 UTC (rev 4167)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/NGramGeneratingAnnotator.java	2013-11-21 13:53:13 UTC (rev 4168)
@@ -1,76 +0,0 @@
-package org.dllearner.algorithms.isle.index;
-
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.Set;
-import java.util.regex.Pattern;
-
-/**
- * Generates word n-grams
- * @author Daniel Fleischhacker
- */
-public class NGramGeneratingAnnotator implements LinguisticAnnotator {
-    private int length;
-
-    /**
-     * Initializes the annotator to generate word n-grams of the given length ({@code length} words per n-gram)
-     * @param length length of the single n-grams
-     */
-    public NGramGeneratingAnnotator(int length) {
-        this.length = length;
-    }
-
-    @Override
-    public Set<Annotation> annotate(Document document) {
-        String text = document.getContent();
-
-        Pattern legalChars = Pattern.compile("[A-Za-z]");
-
-        // clean up all texts
-        int curWordStartPosition = 0;
-        StringBuilder curWord = new StringBuilder();
-        ArrayList<String> wordsInText = new ArrayList<String>();
-        ArrayList<Integer> wordStart = new ArrayList<Integer>();
-        ArrayList<Integer> wordEnd = new ArrayList<Integer>();
-
-        int i = 0;
-        while (i < text.length()) {
-            Character curChar = text.charAt(i);
-            if (!legalChars.matcher(curChar.toString()).matches()) {
-                if (curWord.length() == 0) {
-                    curWordStartPosition = i + 1;
-                    i++;
-                    continue;
-                }
-                // current word finished
-                wordsInText.add(curWord.toString());
-                wordStart.add(curWordStartPosition);
-                wordEnd.add(i);
-                curWord = new StringBuilder();
-                curWordStartPosition = i + 1;
-            }
-            else {
-                curWord.append(curChar);
-            }
-            i++;
-        }
-
-        HashSet<Annotation> annotations = new HashSet<Annotation>();
-
-        i = 0;
-        while (i < wordsInText.size() - (length-1)) {
-            StringBuilder sb = new StringBuilder();
-            int curStart = wordStart.get(i);
-            int lastEnd = wordEnd.get(i);
-            for (int j = 1; j < length; j++) {
-                sb.append(wordsInText.get(i + j));
-                lastEnd = wordEnd.get(i + j);
-            }
-            String nGram = sb.toString().trim();
-            annotations.add(new Annotation(document, curStart, lastEnd - curStart));
-            i++;
-        }
-
-        return annotations;
-    }
-}

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[4167] trunk/components-core/src/main/java/org/ dllearner/algorithms/isle/index/SimpleLinguisticAnnotator.java

From: <dfl...@us...> - 2013-11-21 13:52:48

Revision: 4167
          http://sourceforge.net/p/dl-learner/code/4167
Author:   dfleischhacker
Date:     2013-11-21 13:52:45 +0000 (Thu, 21 Nov 2013)
Log Message:
-----------
Remove SimpleLinguisticAnnotator

Removed Paths:
-------------
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleLinguisticAnnotator.java

Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleLinguisticAnnotator.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleLinguisticAnnotator.java	2013-11-21 13:40:31 UTC (rev 4166)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleLinguisticAnnotator.java	2013-11-21 13:52:45 UTC (rev 4167)
@@ -1,62 +0,0 @@
-package org.dllearner.algorithms.isle.index;
-
-import java.io.IOException;
-import java.io.StringReader;
-import java.util.HashSet;
-import java.util.Set;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.en.PorterStemFilter;
-import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.util.Version;
-import org.dllearner.algorithms.isle.StopWordFilter;
-
-/**
- * 
- * @author Jens Lehmann
- * 
- */
-public class SimpleLinguisticAnnotator implements LinguisticAnnotator {
-	
-	private StopWordFilter stopWordFilter = new StopWordFilter();
-    NGramGeneratingAnnotator nGramAnnotator = new NGramGeneratingAnnotator(2);
-
-	@Override
-	public Set<Annotation> annotate(Document document) {
-		String s = document.getContent().trim();
-		System.out.println("Document:" + s);
-//		s = stopWordFilter.removeStopWords(s);
-		Set<Annotation> annotations = new HashSet<Annotation>();
-		Pattern pattern = Pattern.compile("(\\u0020)+");
-		Matcher matcher = pattern.matcher(s);
-		// Check all occurrences
-		int start = 0;
-		while (matcher.find()) {
-			int end = matcher.start();
-			annotations.add(new Annotation(document, start, end - start));
-			start = matcher.end();
-		}
-		if(start < s.length()-1){
-			annotations.add(new Annotation(document, start, s.length() - start));
-		}
-        annotations.addAll(nGramAnnotator.annotate(document));
-//		stopWordFilter.removeStopWordAnnotations(annotations);
-		return annotations;
-	}
-	
-	public static void main(String[] args) throws Exception {
-		String s = "male person    least 1 child";
-		Pattern pattern = Pattern.compile("(\\u0020)+");
-		Matcher matcher = pattern.matcher(s);
-		int start = 0;
-		while (matcher.find()) {
-			int end = matcher.start();
-			System.out.println(s.substring(start, end));
-			start = matcher.end();
-		}
-	}
-
-}

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[4166] trunk/components-core/src/main/java/org/ dllearner/algorithms/isle/index/TextDocument.java

From: <dfl...@us...> - 2013-11-21 13:40:34

Revision: 4166
          http://sourceforge.net/p/dl-learner/code/4166
Author:   dfleischhacker
Date:     2013-11-21 13:40:31 +0000 (Thu, 21 Nov 2013)
Log Message:
-----------
Add method for getting a number of tokens starting at a given token

Modified Paths:
--------------
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TextDocument.java

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TextDocument.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TextDocument.java	2013-11-21 13:39:34 UTC (rev 4165)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TextDocument.java	2013-11-21 13:40:31 UTC (rev 4166)
@@ -1,6 +1,8 @@
 package org.dllearner.algorithms.isle.index;
 
+import java.util.ArrayList;
 import java.util.LinkedList;
+import java.util.List;
 
 /**
  * A simple text document without further formatting or markup.
@@ -8,6 +10,16 @@
  * @author Daniel Fleischhacker
  */
 public class TextDocument extends LinkedList<Token> implements Document {
+    public static void main(String[] args) {
+        TextDocument t = new TextDocument();
+        String s = "This is a very long, nice text for testing our new implementation of TextDocument.";
+        for (String e : s.split(" ")) {
+            t.add(new Token(e));
+        }
+
+        System.out.println(t.getRawContent());
+    }
+
     @Override
     public String getContent() {
         return getContentStartingAtToken(this.getFirst(), SurfaceFormLevel.STEMMED);
@@ -28,7 +40,7 @@
      * surface forms according to {@code level} are used to build the string.
      *
      * @param start token to start building the string at, i.e., the first token in the returned string
-     * @param l level of surface forms to use
+     * @param l     level of surface forms to use
      * @return built string
      */
     public String getContentStartingAtToken(Token start, SurfaceFormLevel l) {
@@ -51,6 +63,42 @@
         return sb.toString();
     }
 
+    /**
+     * Returns a list containing {@code numberOfTokens} successive tokens from this document starting at the given start
+     * token. If {@code ignorePunctuation} is set, tokens which represent punctuation are added to the result but not
+     * counted for the number of tokens.
+     *
+     * @param start             token to start collecting tokens from the document
+     * @param numberOfTokens    number of tokens to collect from the document
+     * @param ignorePunctuation if true, punctuation are not counted towards the number of tokens to return
+     * @return list containing the given number of relevant tokens, depending in the value of ignorePunctuation, the
+     *          list might contain additional non-relevant (punctuation) tokens
+     */
+    public List<Token> getTokensStartingAtToken(Token start, int numberOfTokens, boolean ignorePunctuation) {
+        ArrayList<Token> tokens = new ArrayList<Token>();
+
+        int relevantTokens = 0;
+        boolean found = false;
+
+        for (Token t : this) {
+            if (found) {
+                tokens.add(t);
+                if (!ignorePunctuation || !t.isPunctuation()) {
+                    relevantTokens++;
+                }
+            }
+            else if (t == start) {
+                found = true;
+                tokens.add(t);
+            }
+            if (relevantTokens == numberOfTokens) {
+                break;
+            }
+        }
+
+        return tokens;
+    }
+
     private String getStringForLevel(Token t, SurfaceFormLevel l) {
         switch (l) {
             case RAW:
@@ -63,14 +111,4 @@
 
         return null;
     }
-
-    public static void main(String[] args) {
-        TextDocument t = new TextDocument();
-        String s = "This is a very long, nice text for testing our new implementation of TextDocument.";
-        for (String e : s.split(" ")) {
-            t.add(new Token(e));
-        }
-
-        System.out.println(t.getRawContent());
-    }
 }

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[4165] trunk/components-core/src/main/java/org/ dllearner/algorithms/isle/StopWordFilter.java

From: <lor...@us...> - 2013-11-21 13:39:37

Revision: 4165
          http://sourceforge.net/p/dl-learner/code/4165
Author:   lorenz_b
Date:     2013-11-21 13:39:34 +0000 (Thu, 21 Nov 2013)
Log Message:
-----------
Refactored context extractors.

Modified Paths:
--------------
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/StopWordFilter.java

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/StopWordFilter.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/StopWordFilter.java	2013-11-21 13:38:03 UTC (rev 4164)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/StopWordFilter.java	2013-11-21 13:39:34 UTC (rev 4165)
@@ -45,8 +45,7 @@
 	public void removeStopWordAnnotations(Set<Annotation> annotations) {
 		for (Iterator<Annotation> iter = annotations.iterator(); iter.hasNext();) {
 			Annotation annotation = iter.next();
-			String content = annotation.getReferencedDocument().getContent();
-			String token = content.substring(annotation.getOffset(), annotation.getOffset()+annotation.getLength());
+			String token = annotation.getTokens().get(0).getRawForm();
 			if(stopWords.contains(token)){
 				iter.remove();
 			}

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[4164] trunk/components-core/src/main/java/org/ dllearner/algorithms/isle

From: <lor...@us...> - 2013-11-21 13:38:25

Revision: 4164
          http://sourceforge.net/p/dl-learner/code/4164
Author:   lorenz_b
Date:     2013-11-21 13:38:03 +0000 (Thu, 21 Nov 2013)
Log Message:
-----------
Refactored context extractors.

Modified Paths:
--------------
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/TextDocumentGenerator.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Annotation.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/syntactic/LuceneSyntacticIndex.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/SentenceBasedContextExtractor.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/WindowBasedContextExtractor.java

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/TextDocumentGenerator.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/TextDocumentGenerator.java	2013-11-21 13:16:13 UTC (rev 4163)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/TextDocumentGenerator.java	2013-11-21 13:38:03 UTC (rev 4164)
@@ -37,7 +37,7 @@
 		return instance;
 	}
 
-	public TextDocument tag(String text) {
+	public TextDocument generateDocument(String text) {
 		TextDocument document = new TextDocument();
 	    // create an empty Annotation just with the given text
 	    Annotation annotatedDocument = new Annotation(text);
@@ -72,7 +72,7 @@
 	}
 	
 	public static void main(String[] args) throws Exception {
-		TextDocument document = TextDocumentGenerator.getInstance().tag("And he said, Amos, what seest thou? And I said, A basket of summer fruit. Then said the LORD unto me, The end is come upon my people of Israel; I will not again pass by them any more. ");
+		TextDocument document = TextDocumentGenerator.getInstance().generateDocument("And he said, Amos, what seest thou? And I said, A basket of summer fruit. Then said the LORD unto me, The end is come upon my people of Israel; I will not again pass by them any more. ");
 		System.out.println(document);
 	}
 }

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Annotation.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Annotation.java	2013-11-21 13:16:13 UTC (rev 4163)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Annotation.java	2013-11-21 13:38:03 UTC (rev 4164)
@@ -34,6 +34,13 @@
 	public Document getReferencedDocument() {
 		return referencedDocument;
 	}
+	
+	/**
+	 * @return the tokens
+	 */
+	public ArrayList<Token> getTokens() {
+		return tokens;
+	}
 
 	public String getString(){
         StringBuilder sb = new StringBuilder();

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/syntactic/LuceneSyntacticIndex.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/syntactic/LuceneSyntacticIndex.java	2013-11-21 13:16:13 UTC (rev 4163)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/syntactic/LuceneSyntacticIndex.java	2013-11-21 13:38:03 UTC (rev 4164)
@@ -16,6 +16,7 @@
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.util.Version;
+import org.dllearner.algorithms.isle.TextDocumentGenerator;
 import org.dllearner.algorithms.isle.index.TextDocument;
 
 import java.io.File;
@@ -61,7 +62,7 @@
 			ScoreDoc[] result = searcher.search(query, getSize()).scoreDocs;
 			for (int i = 0; i < result.length; i++) {
 				Document doc = searcher.doc(result[i].doc);
-				documents.add(new TextDocument(doc.get(searchField)));
+				documents.add(TextDocumentGenerator.getInstance().generateDocument(doc.get(searchField)));
 			}
 		} catch (ParseException e) {
 			e.printStackTrace();
@@ -85,7 +86,7 @@
 			try {
 				Document doc = indexReader.document(i);
 				String content = doc.get(searchField);
-				documents.add(new TextDocument(content));
+				documents.add(TextDocumentGenerator.getInstance().generateDocument(content));
 			} catch (IOException e) {
 				e.printStackTrace();
 			}

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/SentenceBasedContextExtractor.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/SentenceBasedContextExtractor.java	2013-11-21 13:16:13 UTC (rev 4163)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/SentenceBasedContextExtractor.java	2013-11-21 13:38:03 UTC (rev 4164)
@@ -3,6 +3,14 @@
  */
 package org.dllearner.algorithms.isle.wsd;
 
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Properties;
+
+import org.dllearner.algorithms.isle.TextDocumentGenerator;
+import org.dllearner.algorithms.isle.index.Token;
+
 import edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation;
 import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation;
 import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation;
@@ -10,12 +18,7 @@
 import edu.stanford.nlp.pipeline.Annotation;
 import edu.stanford.nlp.pipeline.StanfordCoreNLP;
 import edu.stanford.nlp.util.CoreMap;
-import org.dllearner.algorithms.isle.index.TextDocument;
 
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Properties;
-
 /**
  * @author Lorenz Buehmann
  *
@@ -36,26 +39,29 @@
 	@Override
 	public List<String> extractContext(org.dllearner.algorithms.isle.index.Annotation annotation) {
 		//split text into sentences
-		List<CoreMap> sentences = getSentences(annotation.getReferencedDocument().getContent());
+		List<CoreMap> sentences = getSentences(annotation.getReferencedDocument().getRawContent());
 
 		//find the sentence containing the token of the annotation
-		int tokenStart = annotation.getOffset();
-		int index = 0;
+		Token firstToken = annotation.getTokens().get(0);
 		for (CoreMap sentence : sentences) {
-			String s = sentence.toString();
-			if (index <= tokenStart && s.length() > tokenStart) {
+			boolean found = false;
+			for (CoreLabel label : sentence.get(TokensAnnotation.class)) {
+				// this is the text of the token
+				String word = label.get(TextAnnotation.class);
+				if(word.equals(firstToken.getRawForm())){
+					found = true;
+					break;
+				}
+			}
+			if(found){
 				List<String> context = new ArrayList<String>();
 				for (CoreLabel label : sentence.get(TokensAnnotation.class)) {
 					// this is the text of the token
 					String word = label.get(TextAnnotation.class);
-					
-					if(!word.isEmpty() && !word.matches("\\p{Punct}")){
-						context.add(word);
-					}
+					context.add(word);
 				}
 				return context;
 			}
-			index += s.length();
 		}
 		throw new RuntimeException("Token " + annotation.getString() + " not found in text " + annotation.getReferencedDocument().getRawContent());
 	}
@@ -79,9 +85,8 @@
 		String s = "International Business Machines Corporation, or IBM, is an American multinational services technology and consulting corporation, with headquarters in Armonk, New York, United States. IBM manufactures and markets computer hardware and software,"
 				+ " and offers infrastructure, hosting and consulting services in areas ranging from mainframe computers to nanotechnology.";
 	
-		String token = "services";
 		SentenceBasedContextExtractor extractor = new SentenceBasedContextExtractor();
-		List<String> context = extractor.extractContext(new org.dllearner.algorithms.isle.index.Annotation(new TextDocument(s), s.indexOf(token), token.length()));
+		List<String> context = extractor.extractContext(new org.dllearner.algorithms.isle.index.Annotation(TextDocumentGenerator.getInstance().generateDocument(s), Arrays.asList(new Token("American"))));
 		System.out.println(context);
 	}
 

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/WindowBasedContextExtractor.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/WindowBasedContextExtractor.java	2013-11-21 13:16:13 UTC (rev 4163)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/WindowBasedContextExtractor.java	2013-11-21 13:38:03 UTC (rev 4164)
@@ -3,6 +3,14 @@
  */
 package org.dllearner.algorithms.isle.wsd;
 
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Properties;
+
+import org.dllearner.algorithms.isle.TextDocumentGenerator;
+import org.dllearner.algorithms.isle.index.Token;
+
 import edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation;
 import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation;
 import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation;
@@ -10,18 +18,13 @@
 import edu.stanford.nlp.pipeline.Annotation;
 import edu.stanford.nlp.pipeline.StanfordCoreNLP;
 import edu.stanford.nlp.util.CoreMap;
-import org.dllearner.algorithms.isle.index.TextDocument;
 
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Properties;
-
 /**
  * @author Lorenz Buehmann
- *
+ * 
  */
-public class WindowBasedContextExtractor implements ContextExtractor{
-	
+public class WindowBasedContextExtractor implements ContextExtractor {
+
 	private StanfordCoreNLP pipeline;
 	private int tokensLeft = 10;
 	private int tokensRight = 10;
@@ -29,57 +32,66 @@
 	public WindowBasedContextExtractor(int tokensLeft, int tokensRight) {
 		this.tokensLeft = tokensLeft;
 		this.tokensRight = tokensRight;
-		
+
 		Properties props = new Properties();
 		props.put("annotators", "tokenize, ssplit");
 		pipeline = new StanfordCoreNLP(props);
 	}
-	
+
 	public WindowBasedContextExtractor(int tokensLeftRight) {
 		tokensLeft = tokensLeftRight;
 		tokensRight = tokensLeftRight;
-		
+
 		Properties props = new Properties();
 		props.put("annotators", "tokenize, ssplit");
 		pipeline = new StanfordCoreNLP(props);
 	}
-	
+
 	public WindowBasedContextExtractor() {
 		Properties props = new Properties();
 		props.put("annotators", "tokenize, ssplit");
 		pipeline = new StanfordCoreNLP(props);
 	}
 
-	/* (non-Javadoc)
-	 * @see org.dllearner.algorithms.isle.wsd.ContextExtractor#extractContext(java.lang.String, java.lang.String)
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see
+	 * org.dllearner.algorithms.isle.wsd.ContextExtractor#extractContext(java
+	 * .lang.String, java.lang.String)
 	 */
 	@Override
 	public List<String> extractContext(org.dllearner.algorithms.isle.index.Annotation annotation) {
 		// split text into sentences
-		List<CoreMap> sentences = getSentences(annotation.getReferencedDocument().getContent());
+		List<CoreMap> sentences = getSentences(annotation.getReferencedDocument().getRawContent());
 
 		// find the sentence containing the token of the annotation
-		int tokenStart = annotation.getOffset();
-		int index = 0;
+		Token firstToken = annotation.getTokens().get(0);
 		for (CoreMap sentence : sentences) {
-			String s = sentence.toString();
-			if (index <= tokenStart && s.length() > tokenStart) {
+			boolean found = false;
+			for (CoreLabel label : sentence.get(TokensAnnotation.class)) {
+				// this is the text of the token
+				String word = label.get(TextAnnotation.class);
+				if (word.equals(firstToken.getRawForm())) {
+					found = true;
+					break;
+				}
+			}
+			if (found) {
 				List<String> context = new ArrayList<String>();
 				for (CoreLabel label : sentence.get(TokensAnnotation.class)) {
 					// this is the text of the token
 					String word = label.get(TextAnnotation.class);
-
 					context.add(word);
 				}
 				return context;
 			}
-			index += s.length();
 		}
-		throw new RuntimeException("Token " + annotation + " not found in text "
-				+ annotation.getReferencedDocument().getContent());
+		throw new RuntimeException("Token " + annotation.getString() + " not found in text "
+				+ annotation.getReferencedDocument().getRawContent());
 
 	}
-	
+
 	private List<CoreMap> getSentences(String document) {
 		// create an empty Annotation just with the given text
 		Annotation annotation = new Annotation(document);
@@ -94,14 +106,14 @@
 
 		return sentences;
 	}
-	
+
 	public static void main(String[] args) throws Exception {
 		String s = "International Business Machines Corporation, or IBM, is an American multinational services technology and consulting corporation, with headquarters in Armonk, New York, United States. IBM manufactures and markets computer hardware and software,"
 				+ " and offers infrastructure, hosting and consulting services in areas ranging from mainframe computers to nanotechnology.";
-	
+
 		String token = "services";
 		WindowBasedContextExtractor extractor = new WindowBasedContextExtractor();
-		List<String> context = extractor.extractContext(new org.dllearner.algorithms.isle.index.Annotation(new TextDocument(s), s.indexOf(token), token.length()));
+		List<String> context = extractor.extractContext(new org.dllearner.algorithms.isle.index.Annotation(TextDocumentGenerator.getInstance().generateDocument(s), Arrays.asList(new Token("American"))));
 		System.out.println(context);
 	}
 

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[4163] trunk/components-core/src/main/java/org/ dllearner/algorithms/isle

From: <dfl...@us...> - 2013-11-21 13:16:16

Revision: 4163
          http://sourceforge.net/p/dl-learner/code/4163
Author:   dfleischhacker
Date:     2013-11-21 13:16:13 +0000 (Thu, 21 Nov 2013)
Log Message:
-----------
Annotation refactoring

Modified Paths:
--------------
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Annotation.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/SentenceBasedContextExtractor.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/SimpleWordSenseDisambiguation.java

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Annotation.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Annotation.java	2013-11-21 13:00:33 UTC (rev 4162)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Annotation.java	2013-11-21 13:16:13 UTC (rev 4163)
@@ -4,6 +4,9 @@
 package org.dllearner.algorithms.isle.index;
 
 
+import java.util.ArrayList;
+import java.util.List;
+
 /**
  * A (non-semantic) annotation which represents an entity in a document by its offset and length.
  * @author Lorenz Buehmann
@@ -12,8 +15,7 @@
 public class Annotation {
 	
 	private Document referencedDocument;
-	private int offset;
-	private int length;
+    private ArrayList<Token> tokens;
     private String matchedString;
 
     public String getMatchedString() {
@@ -24,64 +26,64 @@
         this.matchedString = matchedString;
     }
 
-    public Annotation(Document referencedDocument, int offset, int length) {
+    public Annotation(Document referencedDocument, List<Token> tokens) {
 		this.referencedDocument = referencedDocument;
-		this.offset = offset;
-		this.length = length;
-	}
+        this.tokens = new ArrayList<Token>(tokens);
+    }
 
 	public Document getReferencedDocument() {
 		return referencedDocument;
 	}
 
-	public int getOffset() {
-		return offset;
-	}
+	public String getString(){
+        StringBuilder sb = new StringBuilder();
+        for (Token t : tokens) {
+            if (sb.length() > 0) {
+                sb.append(" ");
+            }
+            sb.append(t.getStemmedForm());
+        }
+        return sb.toString();
+    }
 
-	public int getLength() {
-		return length;
-	}
-	
-	public String getToken(){
-		return referencedDocument.getContent().substring(offset, offset + length);
-	}
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) {
+            return true;
+        }
+        if (o == null || getClass() != o.getClass()) {
+            return false;
+        }
 
-	@Override
-	public int hashCode() {
-		final int prime = 31;
-		int result = 1;
-		result = prime * result + ((referencedDocument == null) ? 0 : referencedDocument.hashCode());
-		result = prime * result + length;
-		result = prime * result + offset;
-		return result;
-	}
+        Annotation that = (Annotation) o;
 
+        if (matchedString != null ? !matchedString.equals(that.matchedString) : that.matchedString != null) {
+            return false;
+        }
+        if (referencedDocument != null ? !referencedDocument.equals(that.referencedDocument) :
+                that.referencedDocument != null) {
+            return false;
+        }
+        if (tokens != null ? !tokens.equals(that.tokens) : that.tokens != null) {
+            return false;
+        }
+
+        return true;
+    }
+
+    @Override
+    public int hashCode() {
+        int result = referencedDocument != null ? referencedDocument.hashCode() : 0;
+        result = 31 * result + (tokens != null ? tokens.hashCode() : 0);
+        result = 31 * result + (matchedString != null ? matchedString.hashCode() : 0);
+        return result;
+    }
+
+    /* (non-Javadoc)
+         * @see java.lang.Object#toString()
+         */
 	@Override
-	public boolean equals(Object obj) {
-		if (this == obj)
-			return true;
-		if (obj == null)
-			return false;
-		if (getClass() != obj.getClass())
-			return false;
-		Annotation other = (Annotation) obj;
-		if (referencedDocument == null) {
-			if (other.referencedDocument != null)
-				return false;
-		} else if (!referencedDocument.equals(other.referencedDocument))
-			return false;
-		if (length != other.length)
-			return false;
-		if (offset != other.offset)
-			return false;
-		return true;
-	}
-	
-	/* (non-Javadoc)
-	 * @see java.lang.Object#toString()
-	 */
-	@Override
 	public String toString() {
-		return "\"" + referencedDocument.getContent().substring(offset, offset+length) + "\" at position " + offset;
-	}
+        return getString();
+    }
 }

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java	2013-11-21 13:00:33 UTC (rev 4162)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java	2013-11-21 13:16:13 UTC (rev 4163)
@@ -54,7 +54,7 @@
     		Annotation annotation_i = sortedAnnotations.get(i);
     		int begin_i = annotation_i.getOffset();
     		int end_i = begin_i + annotation_i.getLength()-1;
-    		String token_i = annotation_i.getToken();
+    		String token_i = annotation_i.getString();
     		Set<Entity> candidates_i = getCandidates(annotation_i);
     		Set<Entity> newCandidates_i = new HashSet<Entity>();
     		
@@ -68,7 +68,7 @@
     		for (int j=windowStart; j<sortedAnnotations.size() && j<windowEnd; j++) {
     			if (j!=i) {
 	    			Annotation annotation_j = sortedAnnotations.get(j);
-	    			String token_j = annotation_j.getToken();
+	    			String token_j = annotation_j.getString();
 	    			Set<Entity> candidates_j = getCandidates(annotation_j);
 	    			Set<Entity> intersection = Sets.intersection(candidates_i, candidates_j);
 	    			Set<Entity> newCandidates_ij = new HashSet<Entity>();
@@ -83,7 +83,7 @@
 	    			if (!newCandidates_ij.isEmpty()) {
 	    				Annotation mergedAnnotation = mergeAnnotations(annotation_i,annotation_j);
 	    				// If there's no punctuation in the merged annotation
-	    				if (!Pattern.matches("\\p{Punct}", mergedAnnotation.getToken())) {
+	    				if (!Pattern.matches("\\p{Punct}", mergedAnnotation.getString())) {
 		    				candidatesMap.put(mergedAnnotation, newCandidates_ij);
 		    				candidatesMap.remove(annotation_i);
 		    				candidatesMap.remove(annotation_j);

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java	2013-11-21 13:00:33 UTC (rev 4162)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java	2013-11-21 13:16:13 UTC (rev 4163)
@@ -48,6 +48,7 @@
 
     public SemanticIndex(OWLOntology ontology) {
         this.ontology = ontology;
+
     }
 
     /**

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/SentenceBasedContextExtractor.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/SentenceBasedContextExtractor.java	2013-11-21 13:00:33 UTC (rev 4162)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/SentenceBasedContextExtractor.java	2013-11-21 13:16:13 UTC (rev 4163)
@@ -57,7 +57,7 @@
 			}
 			index += s.length();
 		}
-		throw new RuntimeException("Token " + annotation.getToken() + " not found in text " + annotation.getReferencedDocument().getRawContent());
+		throw new RuntimeException("Token " + annotation.getString() + " not found in text " + annotation.getReferencedDocument().getRawContent());
 	}
 	
 	private List<CoreMap> getSentences(String document) {

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/SimpleWordSenseDisambiguation.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/SimpleWordSenseDisambiguation.java	2013-11-21 13:00:33 UTC (rev 4162)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/SimpleWordSenseDisambiguation.java	2013-11-21 13:16:13 UTC (rev 4163)
@@ -50,7 +50,7 @@
 	public SemanticAnnotation disambiguate(Annotation annotation, Set<Entity> candidateEntities) {
 		logger.debug("Linguistic annotations:\n" + annotation);
 		logger.debug("Candidate entities:" + candidateEntities);
-		String token = annotation.getToken().trim();
+		String token = annotation.getString().trim();
 		//check if annotation token matches label of entity or the part behind #(resp. /)
 		for (Entity entity : candidateEntities) {
 			Set<String> labels = getLabels(entity);

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[4162] trunk/components-core/src/main/java/org/ dllearner/algorithms/isle/index/TextDocument.java

From: <dfl...@us...> - 2013-11-21 13:00:36

Revision: 4162
          http://sourceforge.net/p/dl-learner/code/4162
Author:   dfleischhacker
Date:     2013-11-21 13:00:33 +0000 (Thu, 21 Nov 2013)
Log Message:
-----------
Ignore punctuation in stemmed text

Modified Paths:
--------------
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TextDocument.java

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TextDocument.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TextDocument.java	2013-11-21 12:57:10 UTC (rev 4161)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TextDocument.java	2013-11-21 13:00:33 UTC (rev 4162)
@@ -37,7 +37,10 @@
         for (Token t : this) {
             if (found) {
                 sb.append(" ");
-                sb.append(getStringForLevel(t, l));
+                String surfaceForm = getStringForLevel(t, l);
+                if (surfaceForm != null) {
+                    sb.append(surfaceForm);
+                }
             }
             else if (t == start) {
                 found = true;
@@ -55,9 +58,19 @@
             case POS_TAGGED:
                 return t.getPOSTag();
             case STEMMED:
-                return t.getStemmedForm();
+                return t.isPunctuation() ? null : t.getStemmedForm();
         }
 
         return null;
     }
+
+    public static void main(String[] args) {
+        TextDocument t = new TextDocument();
+        String s = "This is a very long, nice text for testing our new implementation of TextDocument.";
+        for (String e : s.split(" ")) {
+            t.add(new Token(e));
+        }
+
+        System.out.println(t.getRawContent());
+    }
 }

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[4161] trunk/components-core/src/main/java/org/ dllearner/algorithms/isle

From: <lor...@us...> - 2013-11-21 12:57:14

Revision: 4161
          http://sourceforge.net/p/dl-learner/code/4161
Author:   lorenz_b
Date:     2013-11-21 12:57:10 +0000 (Thu, 21 Nov 2013)
Log Message:
-----------
Cont. text document generator.

Modified Paths:
--------------
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/TextDocumentGenerator.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Token.java

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/TextDocumentGenerator.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/TextDocumentGenerator.java	2013-11-21 12:51:05 UTC (rev 4160)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/TextDocumentGenerator.java	2013-11-21 12:57:10 UTC (rev 4161)
@@ -19,7 +19,10 @@
 public class TextDocumentGenerator {
 
 	private static TextDocumentGenerator instance;
+	
 	private StanfordCoreNLP pipeline;
+	private final String punctuationPattern = "\\p{Punct}";
+	private final StopWordFilter stopWordFilter = new StopWordFilter();
 	
 	private TextDocumentGenerator(){
 		Properties props = new Properties();
@@ -54,14 +57,22 @@
 	            String pos = label.get(PartOfSpeechAnnotation.class);
 	            //this is the POS tag of the token
 	            String lemma = label.get(LemmaAnnotation.class);
+	            //check if token is punctuation
+	            boolean isPunctuation = word.matches(punctuationPattern);
+	            //check if it is a stop word
+	            boolean isStopWord = stopWordFilter.isStopWord(word);
 	           
-	            Token token = new Token(word);
-	            token.setPOSTag(pos);
-	            token.setStemmedForm(lemma);
+	            Token token = new Token(word, lemma, pos, isPunctuation, isStopWord);
+	           
 	            document.add(token);
 	          }
 	    }
 		
 		return document;
 	}
+	
+	public static void main(String[] args) throws Exception {
+		TextDocument document = TextDocumentGenerator.getInstance().tag("And he said, Amos, what seest thou? And I said, A basket of summer fruit. Then said the LORD unto me, The end is come upon my people of Israel; I will not again pass by them any more. ");
+		System.out.println(document);
+	}
 }

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Token.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Token.java	2013-11-21 12:51:05 UTC (rev 4160)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Token.java	2013-11-21 12:57:10 UTC (rev 4161)
@@ -12,11 +12,21 @@
 	private String rawForm;
 	private String stemmedForm;
 	private String posTag;
+	private boolean isPunctuation;
+	private boolean isStopWord;
 	
 	public Token(String rawForm) {
-		posTag = rawForm;
+		this.rawForm = rawForm;
 	}
 	
+	public Token(String rawForm, String stemmedForm, String posTag, boolean isPunctuation, boolean isStopWord) {
+		this.rawForm = rawForm;
+		this.stemmedForm = stemmedForm;
+		this.posTag = posTag;
+		this.isPunctuation = isPunctuation;
+		this.isStopWord = isStopWord;
+	}
+
 	/**
 	 * @return the rawForm
 	 */
@@ -39,6 +49,20 @@
 	}
 	
 	/**
+	 * @return the isPunctuation
+	 */
+	public boolean isPunctuation() {
+		return isPunctuation;
+	}
+	
+	/**
+	 * @return the isStopWord
+	 */
+	public boolean isStopWord() {
+		return isStopWord;
+	}
+	
+	/**
 	 * @param stemmedForm the stemmedForm to set
 	 */
 	public void setStemmedForm(String stemmedForm) {
@@ -51,14 +75,28 @@
 	public void setPOSTag(String posTag) {
 		this.posTag = posTag;
 	}
+	
+	/**
+	 * @param isPunctuation the isPunctuation to set
+	 */
+	public void setIsPunctuation(boolean isPunctuation) {
+		this.isPunctuation = isPunctuation;
+	}
+	
+	/**
+	 * @param isStopWord the isStopWord to set
+	 */
+	public void setIsStopWord(boolean isStopWord) {
+		this.isStopWord = isStopWord;
+	}
 
 	/* (non-Javadoc)
 	 * @see java.lang.Object#toString()
 	 */
 	@Override
 	public String toString() {
-		return "Word: " + rawForm + "\n" 
+		return "\n[Word: " + rawForm + "\n" 
 				+ "Stemmed word: " + stemmedForm + "\n"
-				+ "POS tag: " + posTag;
+				+ "POS tag: " + posTag + "]";
 	}
 }

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

3 messages has been excluded from this view by a project administrator.

Flat | Threaded

<< < 1 .. 3 4 5 6 7 .. 171 > >> (Page 5 of 171)