From: <lor...@us...> - 2013-12-02 14:52:35
|
Revision: 4183 http://sourceforge.net/p/dl-learner/code/4183 Author: lorenz_b Date: 2013-12-02 14:52:33 +0000 (Mon, 02 Dec 2013) Log Message: ----------- Refactoring. Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/AnnotationEntityTextRetriever.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/EntityTextRetriever.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/RDFSLabelEntityTextRetriever.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java 2013-12-02 14:41:21 UTC (rev 4182) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java 2013-12-02 14:52:33 UTC (rev 4183) @@ -42,26 +42,21 @@ public void buildTrie(OWLOntology ontology, NameGenerator nameGenerator) { this.trie = new PrefixTrie<FullTokenEntitySetPair>(); - Map<Entity, Set<String>> relevantText = entityTextRetriever.getRelevantText(ontology); + Map<Entity, Set<List<Token>>> entity2TokenSet = entityTextRetriever.getRelevantText(ontology); - for (Entity entity : relevantText.keySet()) { - - for (String text : relevantText.get(entity)) { - text = StringUtils.join(LinguisticUtil.getInstance().getWordsFromCamelCase(text), " "); - text = StringUtils.join(LinguisticUtil.getInstance().getWordsFromUnderscored(text), " "); - if (text.trim().isEmpty()) { - continue; - } - - addEntry(text, entity); - addSubsequencesWordNet(entity, text); - - for (String alternativeText : nameGenerator.getAlternativeText(text)) { - addEntry(alternativeText.toLowerCase(), entity, text); - } - } - } + for (Entry<Entity, Set<List<Token>>> entry : entity2TokenSet.entrySet()) { + Entity entity = entry.getKey(); + Set<List<Token>> tokenSet = entry.getValue(); + for (List<Token> tokens : tokenSet) { + addEntry(tokens, entity); + addSubsequences(entity, tokens); +// addSubsequencesWordNet(entity, text); +// for (String alternativeText : nameGenerator.getAlternativeText(text)) { +// addEntry(alternativeText.toLowerCase(), entity, text); +// } + } + } } /** Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java 2013-12-02 14:41:21 UTC (rev 4182) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java 2013-12-02 14:52:33 UTC (rev 4183) @@ -127,6 +127,15 @@ tree.add(tokens1, new NamedClass("TokenTree")); tree.add(tokens2, new NamedClass("TokenizedTree")); System.out.println(tree); + + System.out.println(tree.getEntitiesForLongestMatch(tokens1)); + System.out.println(tree.getLongestMatch(tokens1)); + + List<Token> tokens3 = Lists.newLinkedList(); + for (String s : Splitter.on(" ").split("this is a very nice tokenized tree")) { + tokens3.add(new Token(s, s, s, false, false)); + }; + System.out.println(tree.getLongestMatch(tokens3)); } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/AnnotationEntityTextRetriever.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/AnnotationEntityTextRetriever.java 2013-12-02 14:41:21 UTC (rev 4182) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/AnnotationEntityTextRetriever.java 2013-12-02 14:52:33 UTC (rev 4183) @@ -5,10 +5,13 @@ import java.util.HashMap; import java.util.HashSet; +import java.util.List; import java.util.Map; import java.util.Set; +import org.dllearner.algorithms.isle.TextDocumentGenerator; import org.dllearner.algorithms.isle.index.LinguisticUtil; +import org.dllearner.algorithms.isle.index.Token; import org.dllearner.core.owl.Entity; import org.dllearner.kb.OWLAPIOntology; import org.dllearner.utilities.owl.OWLAPIConverter; @@ -75,8 +78,8 @@ * @see org.dllearner.algorithms.isle.EntityTextRetriever#getRelevantText(org.dllearner.core.owl.Entity) */ @Override - public Map<String, Double> getRelevantText(Entity entity) { - Map<String, Double> textWithWeight = new HashMap<String, Double>(); + public Map<List<Token>, Double> getRelevantText(Entity entity) { + Map<List<Token>, Double> textWithWeight = new HashMap<List<Token>, Double>(); OWLEntity e = OWLAPIConverter.getOWLAPIEntity(entity); @@ -87,7 +90,7 @@ OWLLiteral val = (OWLLiteral) annotation.getValue(); if (val.hasLang(language)) { String label = val.getLiteral().trim(); - textWithWeight.put(label, weight); + textWithWeight.put(TextDocumentGenerator.getInstance().generateDocument(label), weight); } } } @@ -97,7 +100,7 @@ String shortForm = sfp.getShortForm(IRI.create(entity.getURI())); shortForm = Joiner.on(" ").join(LinguisticUtil.getInstance().getWordsFromCamelCase(shortForm)); shortForm = Joiner.on(" ").join(LinguisticUtil.getInstance().getWordsFromUnderscored(shortForm)).trim(); - textWithWeight.put(shortForm, weight); + textWithWeight.put(TextDocumentGenerator.getInstance().generateDocument(shortForm), weight); } return textWithWeight; @@ -108,8 +111,8 @@ * @return */ @Override - public Map<Entity, Set<String>> getRelevantText(OWLOntology ontology) { - Map<Entity, Set<String>> entity2RelevantText = new HashMap<Entity, Set<String>>(); + public Map<Entity, Set<List<Token>>> getRelevantText(OWLOntology ontology) { + Map<Entity, Set<List<Token>>> entity2RelevantText = new HashMap<>(); Set<OWLEntity> schemaEntities = new HashSet<OWLEntity>(); schemaEntities.addAll(ontology.getClassesInSignature()); @@ -117,7 +120,7 @@ schemaEntities.addAll(ontology.getDataPropertiesInSignature()); schemaEntities.remove(OWL_THING); - Map<String, Double> relevantText; + Map<List<Token>, Double> relevantText; for (OWLEntity owlEntity : schemaEntities) { Entity entity = OWLAPIConverter.getEntity(owlEntity); relevantText = getRelevantText(entity); Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/EntityTextRetriever.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/EntityTextRetriever.java 2013-12-02 14:41:21 UTC (rev 4182) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/EntityTextRetriever.java 2013-12-02 14:52:33 UTC (rev 4183) @@ -19,9 +19,11 @@ package org.dllearner.algorithms.isle.textretrieval; +import java.util.List; import java.util.Map; import java.util.Set; +import org.dllearner.algorithms.isle.index.Token; import org.dllearner.core.owl.Entity; import org.semanticweb.owlapi.model.OWLOntology; @@ -45,8 +47,8 @@ * @param entity The entity to handle. * @return A weighted set of strings. For a value x, we need to have 0 <= x <= 1. */ - public Map<String, Double> getRelevantText(Entity entity); + public Map<List<Token>, Double> getRelevantText(Entity entity); - public Map<Entity, Set<String>> getRelevantText(OWLOntology ontology); + public Map<Entity, Set<List<Token>>> getRelevantText(OWLOntology ontology); } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/RDFSLabelEntityTextRetriever.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/RDFSLabelEntityTextRetriever.java 2013-12-02 14:41:21 UTC (rev 4182) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/RDFSLabelEntityTextRetriever.java 2013-12-02 14:52:33 UTC (rev 4183) @@ -4,12 +4,14 @@ package org.dllearner.algorithms.isle.textretrieval; import java.io.File; +import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; +import org.dllearner.algorithms.isle.index.Token; import org.dllearner.core.owl.Entity; import org.dllearner.kb.OWLAPIOntology; import org.semanticweb.owlapi.apibinding.OWLManager; @@ -43,13 +45,13 @@ OWLOntology ontology = man.loadOntology(IRI.create("http://www.semanticbible.com/2006/11/NTNames.owl")); RDFSLabelEntityTextRetriever labelRetriever = new RDFSLabelEntityTextRetriever(ontology); - Map<Entity, Set<String>> relevantText = labelRetriever.getRelevantText(ontology); + Map<Entity, Set<List<Token>>> relevantText = labelRetriever.getRelevantText(ontology); SortedMap<String, String> uri2Labels = new TreeMap<String, String>(); - for (Entry<Entity, Set<String>> entry : relevantText.entrySet()) { + for (Entry<Entity, Set<List<Token>>> entry : relevantText.entrySet()) { Entity key = entry.getKey(); - Set<String> value = entry.getValue(); - uri2Labels.put(key.getName(), value.iterator().next()); + Set<List<Token>> value = entry.getValue(); + uri2Labels.put(key.getName(), value.iterator().next().get(0).getRawForm()); } StringBuilder csv = new StringBuilder(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |