You can subscribe to this list here.
2007 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
(120) |
Sep
(36) |
Oct
(116) |
Nov
(17) |
Dec
(44) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2008 |
Jan
(143) |
Feb
(192) |
Mar
(74) |
Apr
(84) |
May
(105) |
Jun
(64) |
Jul
(49) |
Aug
(120) |
Sep
(159) |
Oct
(156) |
Nov
(51) |
Dec
(28) |
2009 |
Jan
(17) |
Feb
(55) |
Mar
(33) |
Apr
(57) |
May
(54) |
Jun
(28) |
Jul
(6) |
Aug
(16) |
Sep
(38) |
Oct
(30) |
Nov
(26) |
Dec
(52) |
2010 |
Jan
(7) |
Feb
(91) |
Mar
(65) |
Apr
(2) |
May
(14) |
Jun
(25) |
Jul
(38) |
Aug
(48) |
Sep
(80) |
Oct
(70) |
Nov
(75) |
Dec
(77) |
2011 |
Jan
(68) |
Feb
(53) |
Mar
(51) |
Apr
(35) |
May
(65) |
Jun
(101) |
Jul
(29) |
Aug
(230) |
Sep
(95) |
Oct
(49) |
Nov
(110) |
Dec
(63) |
2012 |
Jan
(41) |
Feb
(42) |
Mar
(25) |
Apr
(46) |
May
(51) |
Jun
(44) |
Jul
(45) |
Aug
(29) |
Sep
(12) |
Oct
(9) |
Nov
(17) |
Dec
(2) |
2013 |
Jan
(12) |
Feb
(14) |
Mar
(7) |
Apr
(16) |
May
(54) |
Jun
(27) |
Jul
(11) |
Aug
(5) |
Sep
(85) |
Oct
(27) |
Nov
(37) |
Dec
(32) |
2014 |
Jan
(8) |
Feb
(29) |
Mar
(5) |
Apr
(3) |
May
(22) |
Jun
(3) |
Jul
(4) |
Aug
(3) |
Sep
|
Oct
|
Nov
|
Dec
|
From: <and...@us...> - 2013-09-05 08:43:01
|
Revision: 4085 http://sourceforge.net/p/dl-learner/code/4085 Author: andremelo Date: 2013-09-05 08:42:59 +0000 (Thu, 05 Sep 2013) Log Message: ----------- Adding printTrie Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java 2013-09-05 08:32:57 UTC (rev 4084) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java 2013-09-05 08:42:59 UTC (rev 4085) @@ -1,6 +1,10 @@ package org.dllearner.algorithms.isle.index; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; import java.util.HashSet; +import java.util.List; import java.util.Map; import java.util.Set; @@ -58,5 +62,18 @@ CharSequence match = trie.getLongestMatch(s); return (match!=null) ? trie.getLongestMatch(s).toString() : null; } + + public void printTrie() { + System.out.println("Printing tree content:"); + Map<String,Set<Entity>> trieMap = trie.toMap(); + List<String> termsList = new ArrayList(trieMap.keySet()); + Collections.sort(termsList); + for (String key : termsList) { + System.out.println(key); + for (Entity candidate: trieMap.get(key)) { + System.out.println("\t"+candidate); + } + } + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <and...@us...> - 2013-09-05 08:33:00
|
Revision: 4084 http://sourceforge.net/p/dl-learner/code/4084 Author: andremelo Date: 2013-09-05 08:32:57 +0000 (Thu, 05 Sep 2013) Log Message: ----------- Left shifting the output of getLongestMatch Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/utilities/datastructures/PrefixTrie.java Modified: trunk/components-core/src/main/java/org/dllearner/utilities/datastructures/PrefixTrie.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/utilities/datastructures/PrefixTrie.java 2013-09-05 08:20:31 UTC (rev 4083) +++ trunk/components-core/src/main/java/org/dllearner/utilities/datastructures/PrefixTrie.java 2013-09-05 08:32:57 UTC (rev 4084) @@ -116,10 +116,10 @@ break; } } - if (i<=0) + if (i<=1) return null; else - return s.subSequence(0, i-1); + return s.subSequence(1, i); } /** This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <jen...@us...> - 2013-09-05 08:20:33
|
Revision: 4083 http://sourceforge.net/p/dl-learner/code/4083 Author: jenslehmann Date: 2013-09-05 08:20:31 +0000 (Thu, 05 Sep 2013) Log Message: ----------- updated SWORE test case Modified Paths: -------------- trunk/test/isle/swore/corpus/requirements_management.txt Added Paths: ----------- trunk/test/isle/swore/corpus/customer_requirement.txt trunk/test/isle/swore/corpus/requirement.txt trunk/test/isle/swore/corpus/requirements_analysis.txt Property Changed: ---------------- trunk/test/isle/swore/ Index: trunk/test/isle/swore =================================================================== --- trunk/test/isle/swore 2013-09-05 08:03:11 UTC (rev 4082) +++ trunk/test/isle/swore 2013-09-05 08:20:31 UTC (rev 4083) Property changes on: trunk/test/isle/swore ___________________________________________________________________ Added: svn:ignore ## -0,0 +1 ## +searchTree*.txt Added: trunk/test/isle/swore/corpus/customer_requirement.txt =================================================================== --- trunk/test/isle/swore/corpus/customer_requirement.txt (rev 0) +++ trunk/test/isle/swore/corpus/customer_requirement.txt 2013-09-05 08:20:31 UTC (rev 4083) @@ -0,0 +1 @@ +A customer requirement is usually desired by at least one customer and usually specificed or captured via requirements engineering systems for later inspection by software developers and maintainers. The goal of involving customer in the requirements elicitation process is to improve the quality of the software (see also: requirements elicitation, open source software, E-Government). Added: trunk/test/isle/swore/corpus/requirement.txt =================================================================== --- trunk/test/isle/swore/corpus/requirement.txt (rev 0) +++ trunk/test/isle/swore/corpus/requirement.txt 2013-09-05 08:20:31 UTC (rev 4083) @@ -0,0 +1 @@ +In product development and process optimization, a requirement is a singular documented physical and functional need that a particular design, product or process must be able to perform. It is most commonly used in a formal sense in systems engineering, software engineering, or enterprise engineering. It is a statement that identifies a necessary attribute, capability, characteristic, or quality of a system for it to have value and utility to a customer, organisation, internal user, or other stakeholder. A specification (often abbreviated as spec) may refer to an explicit set of requirements to be satisfied by a material, design, product, or service. In the classical engineering approach, sets of requirements are used as inputs into the design stages of product development. Requirements are also an important input into the verification process, since tests should trace back to specific requirements. Requirements show what elements and functions are necessary for the particular project. This is reflected in the waterfall model of the software life-cycle. However, when iterative methods of software development or agile methods are used, the system requirements are incrementally developed in parallel with design and implementation. Added: trunk/test/isle/swore/corpus/requirements_analysis.txt =================================================================== --- trunk/test/isle/swore/corpus/requirements_analysis.txt (rev 0) +++ trunk/test/isle/swore/corpus/requirements_analysis.txt 2013-09-05 08:20:31 UTC (rev 4083) @@ -0,0 +1 @@ +Requirements analysis in systems engineering and software engineering, encompasses those tasks that go into determining the needs or conditions to meet for a new or altered product, taking account of the possibly conflicting requirements of the various stakeholders, analysing, documenting, validating and managing software or system requirements. Requirements analysis is critical to the success of a systems or software project. The requirements should be documented, actionable, measurable, testable, traceable, related to identified business needs or opportunities, and defined to a level of detail sufficient for system design. Modified: trunk/test/isle/swore/corpus/requirements_management.txt =================================================================== --- trunk/test/isle/swore/corpus/requirements_management.txt 2013-09-05 08:03:11 UTC (rev 4082) +++ trunk/test/isle/swore/corpus/requirements_management.txt 2013-09-05 08:20:31 UTC (rev 4083) @@ -0,0 +1 @@ +Requirements management is the process of documenting, analyzing, tracing, prioritizing and agreeing on requirements and then controlling change and communicating to relevant stakeholders. It is a continuous process throughout a project. A requirement is a capability to which a project outcome (product or service) should conform. This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <dfl...@us...> - 2013-09-05 08:03:13
|
Revision: 4082 http://sourceforge.net/p/dl-learner/code/4082 Author: dfleischhacker Date: 2013-09-05 08:03:11 +0000 (Thu, 05 Sep 2013) Log Message: ----------- Check that semanticAnnotator is not null Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java 2013-09-05 07:46:34 UTC (rev 4081) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java 2013-09-05 08:03:11 UTC (rev 4082) @@ -59,6 +59,9 @@ * Precompute the whole index, i.e. iterate over all entities and compute all annotated documents. */ public void buildIndex(Set<TextDocument> documents) { + if (semanticAnnotator == null) { + throw new RuntimeException("No semantic annotator defined, must be set using the setSemanticAnnotator method"); + } logger.info("Creating semantic index..."); index = new HashMap<Entity, Set<AnnotatedDocument>>(); for (TextDocument document : documents) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <dfl...@us...> - 2013-09-05 07:46:37
|
Revision: 4081 http://sourceforge.net/p/dl-learner/code/4081 Author: dfleischhacker Date: 2013-09-05 07:46:34 +0000 (Thu, 05 Sep 2013) Log Message: ----------- Pass ontology into SimpleSemanticIndex Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java 2013-09-05 07:22:38 UTC (rev 4080) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java 2013-09-05 07:46:34 UTC (rev 4081) @@ -28,103 +28,101 @@ * @author Daniel Fleischhacker */ public abstract class SemanticIndex { - - - private static final Logger logger = Logger.getLogger(SemanticIndex.class.getName()); - - private SemanticAnnotator semanticAnnotator; - private SyntacticIndex syntacticIndex; - private Map<Entity, Set<AnnotatedDocument>> index; - private OWLOntology ontology; - - public SemanticIndex(OWLOntology ontology, SyntacticIndex syntacticIndex, WordSenseDisambiguation wordSenseDisambiguation, - EntityCandidateGenerator entityCandidateGenerator, LinguisticAnnotator linguisticAnnotator) { - this.ontology = ontology; - this.syntacticIndex = syntacticIndex; - semanticAnnotator = new SemanticAnnotator(wordSenseDisambiguation, entityCandidateGenerator, linguisticAnnotator); - } - - public SemanticIndex(OWLOntology ontology, SyntacticIndex syntacticIndex, SemanticAnnotator semanticAnnotator) { - this.semanticAnnotator = semanticAnnotator; - } - - public SemanticIndex() { -} - - /** - * @param semanticAnnotator the semanticAnnotator to set - */ - public void setSemanticAnnotator(SemanticAnnotator semanticAnnotator) { - this.semanticAnnotator = semanticAnnotator; - } - - /** - * Precompute the whole index, i.e. iterate over all entities and compute all annotated documents. - */ - public void buildIndex(Set<TextDocument> documents){ - logger.info("Creating semantic index..."); - index = new HashMap<Entity, Set<AnnotatedDocument>>(); - for (TextDocument document : documents) { - logger.info("Processing document:\n" + document); - AnnotatedDocument annotatedDocument = semanticAnnotator.processDocument(document); - for (Entity entity : annotatedDocument.getContainedEntities()) { - Set<AnnotatedDocument> existingAnnotatedDocuments = index.get(entity); - if(existingAnnotatedDocuments == null){ - existingAnnotatedDocuments = new HashSet<AnnotatedDocument>(); - index.put(entity, existingAnnotatedDocuments); - } - existingAnnotatedDocuments.add(annotatedDocument); - } - } - logger.info("...done."); - } - - public void buildIndex(OWLAnnotationProperty annotationProperty, String language){ - Set<OWLEntity> schemaEntities = new HashSet<OWLEntity>(); - schemaEntities.addAll(ontology.getClassesInSignature()); - schemaEntities.addAll(ontology.getObjectPropertiesInSignature()); - schemaEntities.addAll(ontology.getDataPropertiesInSignature()); - Set<TextDocument> documents = new HashSet<TextDocument>(); - for (OWLEntity entity : schemaEntities) { - String label = null; - Set<OWLAnnotation> annotations = entity.getAnnotations(ontology, annotationProperty); - for (OWLAnnotation annotation : annotations) { - if (annotation.getValue() instanceof OWLLiteral) { - OWLLiteral val = (OWLLiteral) annotation.getValue(); - if (language != null) { - if(val.hasLang(language)){ - label = val.getLiteral(); - } - - } else { - label = val.getLiteral(); - } - } - } - if(label != null){ - documents.add(new TextDocument(label)); - } - } - buildIndex(documents); - } - + + + private static final Logger logger = Logger.getLogger(SemanticIndex.class.getName()); + + private SemanticAnnotator semanticAnnotator; + private SyntacticIndex syntacticIndex; + private Map<Entity, Set<AnnotatedDocument>> index; + private OWLOntology ontology; + + public SemanticIndex(OWLOntology ontology, SyntacticIndex syntacticIndex, WordSenseDisambiguation wordSenseDisambiguation, + EntityCandidateGenerator entityCandidateGenerator, LinguisticAnnotator linguisticAnnotator) { + this.ontology = ontology; + this.syntacticIndex = syntacticIndex; + semanticAnnotator = new SemanticAnnotator(wordSenseDisambiguation, entityCandidateGenerator, linguisticAnnotator); + } + + public SemanticIndex(OWLOntology ontology) { + this.ontology = ontology; + } + /** + * @param semanticAnnotator the semanticAnnotator to set + */ + public void setSemanticAnnotator(SemanticAnnotator semanticAnnotator) { + this.semanticAnnotator = semanticAnnotator; + } + + /** + * Precompute the whole index, i.e. iterate over all entities and compute all annotated documents. + */ + public void buildIndex(Set<TextDocument> documents) { + logger.info("Creating semantic index..."); + index = new HashMap<Entity, Set<AnnotatedDocument>>(); + for (TextDocument document : documents) { + logger.info("Processing document:\n" + document); + AnnotatedDocument annotatedDocument = semanticAnnotator.processDocument(document); + for (Entity entity : annotatedDocument.getContainedEntities()) { + Set<AnnotatedDocument> existingAnnotatedDocuments = index.get(entity); + if (existingAnnotatedDocuments == null) { + existingAnnotatedDocuments = new HashSet<AnnotatedDocument>(); + index.put(entity, existingAnnotatedDocuments); + } + existingAnnotatedDocuments.add(annotatedDocument); + } + } + logger.info("...done."); + } + + public void buildIndex(OWLAnnotationProperty annotationProperty, String language) { + Set<OWLEntity> schemaEntities = new HashSet<OWLEntity>(); + schemaEntities.addAll(ontology.getClassesInSignature()); + schemaEntities.addAll(ontology.getObjectPropertiesInSignature()); + schemaEntities.addAll(ontology.getDataPropertiesInSignature()); + Set<TextDocument> documents = new HashSet<TextDocument>(); + for (OWLEntity entity : schemaEntities) { + String label = null; + Set<OWLAnnotation> annotations = entity.getAnnotations(ontology, annotationProperty); + for (OWLAnnotation annotation : annotations) { + if (annotation.getValue() instanceof OWLLiteral) { + OWLLiteral val = (OWLLiteral) annotation.getValue(); + if (language != null) { + if (val.hasLang(language)) { + label = val.getLiteral(); + } + + } + else { + label = val.getLiteral(); + } + } + } + if (label != null) { + documents.add(new TextDocument(label)); + } + } + buildIndex(documents); + } + + /** * Returns the set of annotated documents which reference the given entity using one of its surface forms. * * @param entity entity to retrieve documents * @return documents referencing given entity */ - public Set<AnnotatedDocument> getDocuments(Entity entity){ - if(index == null){ - System.err.println("You have to prebuild the index before you can use this method."); - System.exit(1); - } - - Set<AnnotatedDocument> annotatedDocuments = index.get(entity); - if(annotatedDocuments == null) { - annotatedDocuments = new HashSet<AnnotatedDocument>(); - } - return annotatedDocuments; + public Set<AnnotatedDocument> getDocuments(Entity entity) { + if (index == null) { + System.err.println("You have to prebuild the index before you can use this method."); + System.exit(1); + } + + Set<AnnotatedDocument> annotatedDocuments = index.get(entity); + if (annotatedDocuments == null) { + annotatedDocuments = new HashSet<AnnotatedDocument>(); + } + return annotatedDocuments; } /** @@ -133,8 +131,8 @@ * @param entity entity to return number of referencing documents for * @return number of documents for the given entity in this index */ - public int count(Entity entity){ - return index.get(entity).size(); + public int count(Entity entity) { + return index.get(entity).size(); } /** @@ -142,7 +140,7 @@ * * @return the total number of documents contained in the index */ - public int getSize(){ - return index.size(); + public int getSize() { + return index.size(); } } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java 2013-09-05 07:22:38 UTC (rev 4080) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java 2013-09-05 07:46:34 UTC (rev 4081) @@ -29,8 +29,9 @@ * @param syntacticIndex index to query for documents containing the labels */ public SimpleSemanticIndex(OWLOntology ontology, SyntacticIndex syntacticIndex) { - SimpleEntityCandidatesTrie trie = new SimpleEntityCandidatesTrie(new RDFSLabelEntityTextRetriever(ontology), ontology); - setSemanticAnnotator(new SemanticAnnotator( + super(ontology); + SimpleEntityCandidatesTrie trie = new SimpleEntityCandidatesTrie(new RDFSLabelEntityTextRetriever(ontology), ontology); + setSemanticAnnotator(new SemanticAnnotator( new SimpleWordSenseDisambiguation(ontology), new TrieEntityCandidateGenerator(ontology, trie), new TrieLinguisticAnnotator(trie))); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <and...@us...> - 2013-09-05 07:22:41
|
Revision: 4080 http://sourceforge.net/p/dl-learner/code/4080 Author: andremelo Date: 2013-09-05 07:22:38 +0000 (Thu, 05 Sep 2013) Log Message: ----------- Fixing PrefixTrie IndexOutOfBounds and NullPointerEnception Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java trunk/components-core/src/main/java/org/dllearner/utilities/datastructures/PrefixTrie.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java 2013-09-04 16:44:09 UTC (rev 4079) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java 2013-09-05 07:22:38 UTC (rev 4080) @@ -55,7 +55,8 @@ @Override public String getLongestMatch(String s) { - return trie.getLongestMatch(s).toString(); + CharSequence match = trie.getLongestMatch(s); + return (match!=null) ? trie.getLongestMatch(s).toString() : null; } } Modified: trunk/components-core/src/main/java/org/dllearner/utilities/datastructures/PrefixTrie.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/utilities/datastructures/PrefixTrie.java 2013-09-04 16:44:09 UTC (rev 4079) +++ trunk/components-core/src/main/java/org/dllearner/utilities/datastructures/PrefixTrie.java 2013-09-05 07:22:38 UTC (rev 4080) @@ -116,7 +116,10 @@ break; } } - return s.subSequence(0, i-1); + if (i<=0) + return null; + else + return s.subSequence(0, i-1); } /** This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2013-09-04 16:44:12
|
Revision: 4079 http://sourceforge.net/p/dl-learner/code/4079 Author: lorenz_b Date: 2013-09-04 16:44:09 +0000 (Wed, 04 Sep 2013) Log Message: ----------- Removed dir loading. Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java 2013-09-04 16:33:44 UTC (rev 4078) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java 2013-09-04 16:44:09 UTC (rev 4079) @@ -48,14 +48,24 @@ this.semanticAnnotator = semanticAnnotator; } + public SemanticIndex() { +} + /** + * @param semanticAnnotator the semanticAnnotator to set + */ + public void setSemanticAnnotator(SemanticAnnotator semanticAnnotator) { + this.semanticAnnotator = semanticAnnotator; + } + + /** * Precompute the whole index, i.e. iterate over all entities and compute all annotated documents. */ public void buildIndex(Set<TextDocument> documents){ logger.info("Creating semantic index..."); index = new HashMap<Entity, Set<AnnotatedDocument>>(); for (TextDocument document : documents) { - logger.debug("Processing document:\n" + document); + logger.info("Processing document:\n" + document); AnnotatedDocument annotatedDocument = semanticAnnotator.processDocument(document); for (Entity entity : annotatedDocument.getContainedEntities()) { Set<AnnotatedDocument> existingAnnotatedDocuments = index.get(entity); Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java 2013-09-04 16:33:44 UTC (rev 4078) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java 2013-09-04 16:44:09 UTC (rev 4079) @@ -4,13 +4,12 @@ package org.dllearner.algorithms.isle.index.semantic.simple; import org.dllearner.algorithms.isle.SimpleWordSenseDisambiguation; -import org.dllearner.algorithms.isle.index.SimpleEntityCandidateGenerator; +import org.dllearner.algorithms.isle.index.SemanticAnnotator; import org.dllearner.algorithms.isle.index.SimpleEntityCandidatesTrie; -import org.dllearner.algorithms.isle.index.SimpleLinguisticAnnotator; import org.dllearner.algorithms.isle.index.TrieEntityCandidateGenerator; +import org.dllearner.algorithms.isle.index.TrieLinguisticAnnotator; import org.dllearner.algorithms.isle.index.semantic.SemanticIndex; import org.dllearner.algorithms.isle.index.syntactic.SyntacticIndex; -import org.dllearner.algorithms.isle.textretrieval.AnnotationEntityTextRetriever; import org.dllearner.algorithms.isle.textretrieval.RDFSLabelEntityTextRetriever; import org.semanticweb.owlapi.model.OWLOntology; @@ -30,11 +29,11 @@ * @param syntacticIndex index to query for documents containing the labels */ public SimpleSemanticIndex(OWLOntology ontology, SyntacticIndex syntacticIndex) { - super(ontology, - syntacticIndex, + SimpleEntityCandidatesTrie trie = new SimpleEntityCandidatesTrie(new RDFSLabelEntityTextRetriever(ontology), ontology); + setSemanticAnnotator(new SemanticAnnotator( new SimpleWordSenseDisambiguation(ontology), - new TrieEntityCandidateGenerator(ontology, new SimpleEntityCandidatesTrie(new RDFSLabelEntityTextRetriever(ontology), ontology)), - new SimpleLinguisticAnnotator()); + new TrieEntityCandidateGenerator(ontology, trie), + new TrieLinguisticAnnotator(trie))); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2013-09-04 16:33:47
|
Revision: 4078 http://sourceforge.net/p/dl-learner/code/4078 Author: lorenz_b Date: 2013-09-04 16:33:44 +0000 (Wed, 04 Sep 2013) Log Message: ----------- Removed dir loading. Modified Paths: -------------- trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java Modified: trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java =================================================================== --- trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java 2013-09-04 16:30:27 UTC (rev 4077) +++ trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java 2013-09-04 16:33:44 UTC (rev 4078) @@ -80,11 +80,13 @@ Set<TextDocument> documents = new HashSet<TextDocument>(); File folder = new File(testFolder+"corpus/"); for (File file : folder.listFiles()) { - try { - String text = Files.toString(file, Charsets.UTF_8); - documents.add(new TextDocument(text)); - } catch (IOException e) { - e.printStackTrace(); + if(!file.isDirectory() && !file.isHidden()){ + try { + String text = Files.toString(file, Charsets.UTF_8); + documents.add(new TextDocument(text)); + } catch (IOException e) { + e.printStackTrace(); + } } } return documents; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <dfl...@us...> - 2013-09-04 16:30:31
|
Revision: 4077 http://sourceforge.net/p/dl-learner/code/4077 Author: dfleischhacker Date: 2013-09-04 16:30:27 +0000 (Wed, 04 Sep 2013) Log Message: ----------- Fix usage of raw contents from documents Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleLinguisticAnnotator.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieLinguisticAnnotator.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleLinguisticAnnotator.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleLinguisticAnnotator.java 2013-09-04 16:26:24 UTC (rev 4076) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleLinguisticAnnotator.java 2013-09-04 16:30:27 UTC (rev 4077) @@ -26,7 +26,7 @@ @Override public Set<Annotation> annotate(Document document) { - String s = document.getRawContent().trim(); + String s = document.getContent().trim(); System.out.println("Document:" + s); // s = stopWordFilter.removeStopWords(s); Set<Annotation> annotations = new HashSet<Annotation>(); Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieLinguisticAnnotator.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieLinguisticAnnotator.java 2013-09-04 16:26:24 UTC (rev 4076) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieLinguisticAnnotator.java 2013-09-04 16:30:27 UTC (rev 4077) @@ -19,12 +19,11 @@ /** * Generates annotation based on trie's longest matching strings * @param document - * @param candidatesTrie * @return */ @Override public Set<Annotation> annotate(Document document) { - String content = document.getRawContent(); + String content = document.getContent(); Set<Annotation> annotations = new HashSet<Annotation>(); for (int i=0; i<content.length(); i++) { String unparsed = content.substring(i); Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java 2013-09-04 16:26:24 UTC (rev 4076) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java 2013-09-04 16:30:27 UTC (rev 4077) @@ -33,7 +33,7 @@ super(ontology, syntacticIndex, new SimpleWordSenseDisambiguation(ontology), - new TrieEntityCandidateGenerator(ontology, new SimpleEntityCandidatesTrie(new RDFSLabelEntityTextRetriever(ontology))), + new TrieEntityCandidateGenerator(ontology, new SimpleEntityCandidatesTrie(new RDFSLabelEntityTextRetriever(ontology), ontology)), new SimpleLinguisticAnnotator()); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <and...@us...> - 2013-09-04 16:26:28
|
Revision: 4076 http://sourceforge.net/p/dl-learner/code/4076 Author: andremelo Date: 2013-09-04 16:26:24 +0000 (Wed, 04 Sep 2013) Log Message: ----------- Remove contructor without ontology Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java 2013-09-04 16:25:24 UTC (rev 4075) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java 2013-09-04 16:26:24 UTC (rev 4076) @@ -2,14 +2,11 @@ import java.util.HashSet; import java.util.Map; -import java.util.Map.Entry; import java.util.Set; -import org.dllearner.algorithms.isle.textretrieval.AnnotationEntityTextRetriever; import org.dllearner.algorithms.isle.textretrieval.EntityTextRetriever; import org.dllearner.core.owl.Entity; import org.dllearner.utilities.datastructures.PrefixTrie; -import org.semanticweb.owlapi.model.OWLEntity; import org.semanticweb.owlapi.model.OWLOntology; public class SimpleEntityCandidatesTrie implements EntityCandidatesTrie { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <and...@us...> - 2013-09-04 16:25:27
|
Revision: 4075 http://sourceforge.net/p/dl-learner/code/4075 Author: andremelo Date: 2013-09-04 16:25:24 +0000 (Wed, 04 Sep 2013) Log Message: ----------- Remove constructor without ontology Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java 2013-09-04 16:22:34 UTC (rev 4074) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java 2013-09-04 16:25:24 UTC (rev 4075) @@ -17,17 +17,13 @@ PrefixTrie<Set<Entity>> trie; EntityTextRetriever entityTextRetriever; - public SimpleEntityCandidatesTrie(EntityTextRetriever entityTextRetriever) { + public SimpleEntityCandidatesTrie(EntityTextRetriever entityTextRetriever, OWLOntology ontology) { this.entityTextRetriever = entityTextRetriever; - this.trie = new PrefixTrie<Set<Entity>>(); - } - - public SimpleEntityCandidatesTrie(EntityTextRetriever entityTextRetriever, OWLOntology ontology) { - this(entityTextRetriever); buildTrie(ontology); } - public void buildTrie(OWLOntology ontology) { + public void buildTrie(OWLOntology ontology) { + this.trie = new PrefixTrie<Set<Entity>>(); Map<Entity, Set<String>> relevantText = entityTextRetriever.getRelevantText(ontology); for (Entity entity : relevantText.keySet()) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <and...@us...> - 2013-09-04 16:22:37
|
Revision: 4074 http://sourceforge.net/p/dl-learner/code/4074 Author: andremelo Date: 2013-09-04 16:22:34 +0000 (Wed, 04 Sep 2013) Log Message: ----------- Add constructor with buildTrie Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java 2013-09-04 16:14:38 UTC (rev 4073) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java 2013-09-04 16:22:34 UTC (rev 4074) @@ -22,6 +22,11 @@ this.trie = new PrefixTrie<Set<Entity>>(); } + public SimpleEntityCandidatesTrie(EntityTextRetriever entityTextRetriever, OWLOntology ontology) { + this(entityTextRetriever); + buildTrie(ontology); + } + public void buildTrie(OWLOntology ontology) { Map<Entity, Set<String>> relevantText = entityTextRetriever.getRelevantText(ontology); @@ -32,6 +37,7 @@ if (text.contains(" ")) { for (String subtext : text.split(" ")) { addEntry(subtext, entity); + //System.out.println("trie.add("+subtext+","++")"); } } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2013-09-04 16:14:42
|
Revision: 4073 http://sourceforge.net/p/dl-learner/code/4073 Author: lorenz_b Date: 2013-09-04 16:14:38 +0000 (Wed, 04 Sep 2013) Log Message: ----------- Added text normalization. Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleLinguisticAnnotator.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleLinguisticAnnotator.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleLinguisticAnnotator.java 2013-09-04 16:10:03 UTC (rev 4072) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleLinguisticAnnotator.java 2013-09-04 16:14:38 UTC (rev 4073) @@ -43,7 +43,7 @@ annotations.add(new Annotation(document, start, s.length() - start)); } annotations.addAll(nGramAnnotator.annotate(document)); - stopWordFilter.removeStopWordAnnotations(annotations); +// stopWordFilter.removeStopWordAnnotations(annotations); return annotations; } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java 2013-09-04 16:10:03 UTC (rev 4072) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java 2013-09-04 16:14:38 UTC (rev 4073) @@ -3,7 +3,6 @@ */ package org.dllearner.algorithms.isle.index.semantic.simple; -import org.dllearner.algorithms.isle.RandomWordSenseDisambiguation; import org.dllearner.algorithms.isle.SimpleWordSenseDisambiguation; import org.dllearner.algorithms.isle.index.SimpleEntityCandidateGenerator; import org.dllearner.algorithms.isle.index.SimpleEntityCandidatesTrie; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <dfl...@us...> - 2013-09-04 16:10:06
|
Revision: 4072 http://sourceforge.net/p/dl-learner/code/4072 Author: dfleischhacker Date: 2013-09-04 16:10:03 +0000 (Wed, 04 Sep 2013) Log Message: ----------- Use trie Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java 2013-09-04 16:04:20 UTC (rev 4071) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java 2013-09-04 16:10:03 UTC (rev 4072) @@ -2,15 +2,10 @@ import edu.northwestern.at.utils.corpuslinguistics.lemmatizer.DefaultLemmatizer; import edu.northwestern.at.utils.corpuslinguistics.lemmatizer.Lemmatizer; -import edu.stanford.nlp.ling.CoreAnnotations; -import edu.stanford.nlp.ling.CoreLabel; -import edu.stanford.nlp.pipeline.*; -import edu.stanford.nlp.util.CoreMap; import net.didion.jwnl.data.POS; import org.dllearner.algorithms.isle.WordNet; import java.util.ArrayList; -import java.util.Properties; /** * Provides shortcuts to commonly used linguistic operations Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java 2013-09-04 16:04:20 UTC (rev 4071) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java 2013-09-04 16:10:03 UTC (rev 4072) @@ -6,9 +6,13 @@ import org.dllearner.algorithms.isle.RandomWordSenseDisambiguation; import org.dllearner.algorithms.isle.SimpleWordSenseDisambiguation; import org.dllearner.algorithms.isle.index.SimpleEntityCandidateGenerator; +import org.dllearner.algorithms.isle.index.SimpleEntityCandidatesTrie; import org.dllearner.algorithms.isle.index.SimpleLinguisticAnnotator; +import org.dllearner.algorithms.isle.index.TrieEntityCandidateGenerator; import org.dllearner.algorithms.isle.index.semantic.SemanticIndex; import org.dllearner.algorithms.isle.index.syntactic.SyntacticIndex; +import org.dllearner.algorithms.isle.textretrieval.AnnotationEntityTextRetriever; +import org.dllearner.algorithms.isle.textretrieval.RDFSLabelEntityTextRetriever; import org.semanticweb.owlapi.model.OWLOntology; /** @@ -27,11 +31,12 @@ * @param syntacticIndex index to query for documents containing the labels */ public SimpleSemanticIndex(OWLOntology ontology, SyntacticIndex syntacticIndex) { - super(ontology, - syntacticIndex, - new SimpleWordSenseDisambiguation(ontology), - new SimpleEntityCandidateGenerator(ontology), - new SimpleLinguisticAnnotator()); + super(ontology, + syntacticIndex, + new SimpleWordSenseDisambiguation(ontology), + new TrieEntityCandidateGenerator(ontology, new SimpleEntityCandidatesTrie(new RDFSLabelEntityTextRetriever(ontology))), + new SimpleLinguisticAnnotator()); + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2013-09-04 16:04:23
|
Revision: 4071 http://sourceforge.net/p/dl-learner/code/4071 Author: lorenz_b Date: 2013-09-04 16:04:20 +0000 (Wed, 04 Sep 2013) Log Message: ----------- Added text normalization. Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/AnnotationEntityTextRetriever.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/AnnotationEntityTextRetriever.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/AnnotationEntityTextRetriever.java 2013-09-04 15:59:46 UTC (rev 4070) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/AnnotationEntityTextRetriever.java 2013-09-04 16:04:20 UTC (rev 4071) @@ -8,6 +8,8 @@ import java.util.Map; import java.util.Set; +import org.dllearner.algorithms.isle.index.LinguisticAnnotator; +import org.dllearner.algorithms.isle.index.LinguisticUtil; import org.dllearner.core.owl.Entity; import org.dllearner.kb.OWLAPIOntology; import org.dllearner.utilities.owl.OWLAPIConverter; @@ -21,7 +23,9 @@ import org.semanticweb.owlapi.util.IRIShortFormProvider; import org.semanticweb.owlapi.util.SimpleIRIShortFormProvider; +import com.google.common.base.Joiner; + /** * @author Lorenz Buehmann * @@ -86,7 +90,10 @@ } if(textWithWeight.isEmpty() && useShortFormFallback){ - textWithWeight.put(sfp.getShortForm(IRI.create(entity.getURI())), weight); + String shortForm = sfp.getShortForm(IRI.create(entity.getURI())); + shortForm = Joiner.on(" ").join(LinguisticUtil.getWordsFromCamelCase(shortForm)); + shortForm = Joiner.on(" ").join(LinguisticUtil.getWordsFromUnderscored(shortForm)); + textWithWeight.put(shortForm, weight); } return textWithWeight; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <and...@us...> - 2013-09-04 15:59:49
|
Revision: 4070 http://sourceforge.net/p/dl-learner/code/4070 Author: andremelo Date: 2013-09-04 15:59:46 +0000 (Wed, 04 Sep 2013) Log Message: ----------- Adding OWLOntology parameter to getRelevantText at AnnotationEntityTextRetriever and updating interface to include it Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/AnnotationEntityTextRetriever.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/EntityTextRetriever.java trunk/protege/pom.xml Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/AnnotationEntityTextRetriever.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/AnnotationEntityTextRetriever.java 2013-09-04 15:58:20 UTC (rev 4069) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/AnnotationEntityTextRetriever.java 2013-09-04 15:59:46 UTC (rev 4070) @@ -96,7 +96,8 @@ * Returns for each entity in the ontology all relevant text, i.e. eitherthe annotations or the short form of the IRI as fallback. * @return */ - public Map<Entity, Set<String>> getRelevantText() { + @Override + public Map<Entity, Set<String>> getRelevantText(OWLOntology ontology) { Map<Entity, Set<String>> entity2RelevantText = new HashMap<Entity, Set<String>>(); Set<OWLEntity> schemaEntities = new HashSet<OWLEntity>(); Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/EntityTextRetriever.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/EntityTextRetriever.java 2013-09-04 15:58:20 UTC (rev 4069) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/EntityTextRetriever.java 2013-09-04 15:59:46 UTC (rev 4070) @@ -20,8 +20,10 @@ package org.dllearner.algorithms.isle.textretrieval; import java.util.Map; +import java.util.Set; import org.dllearner.core.owl.Entity; +import org.semanticweb.owlapi.model.OWLOntology; /** * Interface for methods, which retrieve relevant texts given an entity @@ -45,4 +47,6 @@ */ public Map<String, Double> getRelevantText(Entity entity); + public Map<Entity, Set<String>> getRelevantText(OWLOntology ontology); + } Modified: trunk/protege/pom.xml =================================================================== --- trunk/protege/pom.xml 2013-09-04 15:58:20 UTC (rev 4069) +++ trunk/protege/pom.xml 2013-09-04 15:59:46 UTC (rev 4070) @@ -164,5 +164,42 @@ </executions> </plugin> </plugins> + <pluginManagement> + <plugins> + <!--This plugin's configuration is used to store Eclipse m2e settings only. It has no influence on the Maven build itself.--> + <plugin> + <groupId>org.eclipse.m2e</groupId> + <artifactId>lifecycle-mapping</artifactId> + <version>1.0.0</version> + <configuration> + <lifecycleMappingMetadata> + <pluginExecutions> + <pluginExecution> + <pluginExecutionFilter> + <groupId> + org.apache.maven.plugins + </groupId> + <artifactId> + maven-dependency-plugin + </artifactId> + <versionRange> + [2.4,) + </versionRange> + <goals> + <goal> + copy-dependencies + </goal> + </goals> + </pluginExecutionFilter> + <action> + <ignore></ignore> + </action> + </pluginExecution> + </pluginExecutions> + </lifecycleMappingMetadata> + </configuration> + </plugin> + </plugins> + </pluginManagement> </build> </project> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <and...@us...> - 2013-09-04 15:58:25
|
Revision: 4069 http://sourceforge.net/p/dl-learner/code/4069 Author: andremelo Date: 2013-09-04 15:58:20 +0000 (Wed, 04 Sep 2013) Log Message: ----------- Adding the buildTrie method Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java 2013-09-04 15:39:08 UTC (rev 4068) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java 2013-09-04 15:58:20 UTC (rev 4069) @@ -1,23 +1,43 @@ package org.dllearner.algorithms.isle.index; import java.util.HashSet; +import java.util.Map; import java.util.Map.Entry; import java.util.Set; +import org.dllearner.algorithms.isle.textretrieval.AnnotationEntityTextRetriever; +import org.dllearner.algorithms.isle.textretrieval.EntityTextRetriever; import org.dllearner.core.owl.Entity; import org.dllearner.utilities.datastructures.PrefixTrie; +import org.semanticweb.owlapi.model.OWLEntity; import org.semanticweb.owlapi.model.OWLOntology; public class SimpleEntityCandidatesTrie implements EntityCandidatesTrie { PrefixTrie<Set<Entity>> trie; - OWLOntology ontology; + EntityTextRetriever entityTextRetriever; - public SimpleEntityCandidatesTrie(OWLOntology ontology) { - this.ontology = ontology; + public SimpleEntityCandidatesTrie(EntityTextRetriever entityTextRetriever) { + this.entityTextRetriever = entityTextRetriever; this.trie = new PrefixTrie<Set<Entity>>(); } + public void buildTrie(OWLOntology ontology) { + Map<Entity, Set<String>> relevantText = entityTextRetriever.getRelevantText(ontology); + + for (Entity entity : relevantText.keySet()) { + for (String text : relevantText.get(entity)) { + addEntry(text, entity); + // Adds also composing words, e.g. for "has child", "has" and "child" are also added + if (text.contains(" ")) { + for (String subtext : text.split(" ")) { + addEntry(subtext, entity); + } + } + } + } + } + @Override public void addEntry(String s, Entity e) { Set<Entity> candidates = trie.get(s); @@ -25,12 +45,13 @@ candidates = new HashSet<Entity>(); candidates.add(e); + + trie.put(s, candidates); } @Override public Set<Entity> getCandidateEntities(String s) { - // TODO Auto-generated method stub - return null; + return trie.get(s); } @Override @@ -38,12 +59,4 @@ return trie.getLongestMatch(s).toString(); } - /** - * @param args - */ - public static void main(String[] args) { - // TODO Auto-generated method stub - - } - } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2013-09-04 15:39:12
|
Revision: 4068 http://sourceforge.net/p/dl-learner/code/4068 Author: lorenz_b Date: 2013-09-04 15:39:08 +0000 (Wed, 04 Sep 2013) Log Message: ----------- Added method to get relvant text for all entities. Modified Paths: -------------- trunk/components-core/pom.xml trunk/components-core/src/main/java/org/dllearner/algorithms/isle/ISLE.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/NLPHeuristic.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/SimpleWordSenseDisambiguation.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/PMIRelevanceMetric.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/RelevanceUtils.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/AnnotationEntityTextRetriever.java trunk/components-core/src/main/java/org/dllearner/core/owl/Description.java trunk/components-core/src/main/java/org/dllearner/core/owl/Nothing.java trunk/examples/isle/father_labeled.owl trunk/pom.xml trunk/scripts/pom.xml trunk/scripts/src/main/java/org/dllearner/scripts/pattern/OWLAxiomPatternDetectionEvaluation.java trunk/scripts/src/main/java/org/dllearner/scripts/pattern/OWLAxiomPatternUsageEvaluation.java Modified: trunk/components-core/pom.xml =================================================================== --- trunk/components-core/pom.xml 2013-09-04 15:23:29 UTC (rev 4067) +++ trunk/components-core/pom.xml 2013-09-04 15:39:08 UTC (rev 4068) @@ -314,11 +314,6 @@ <artifactId>jwnl</artifactId> <version>1.4.1.RC2</version> </dependency> - <dependency> - <groupId>com.google.collections</groupId> - <artifactId>google-collections</artifactId> - <version>1.0</version> - </dependency> </dependencies> <dependencyManagement> <dependencies> Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/ISLE.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/ISLE.java 2013-09-04 15:23:29 UTC (rev 4067) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/ISLE.java 2013-09-04 15:39:08 UTC (rev 4068) @@ -34,6 +34,7 @@ import org.dllearner.algorithms.celoe.CELOE; import org.dllearner.algorithms.celoe.OENode; import org.dllearner.core.AbstractCELA; +import org.dllearner.core.AbstractKnowledgeSource; import org.dllearner.core.AbstractLearningProblem; import org.dllearner.core.AbstractReasonerComponent; import org.dllearner.core.ComponentAnn; @@ -47,10 +48,12 @@ import org.dllearner.core.owl.NamedClass; import org.dllearner.core.owl.Restriction; import org.dllearner.core.owl.Thing; +import org.dllearner.kb.OWLFile; import org.dllearner.learningproblems.ClassLearningProblem; import org.dllearner.learningproblems.PosNegLP; import org.dllearner.learningproblems.PosNegLPStandard; import org.dllearner.learningproblems.PosOnlyLP; +import org.dllearner.reasoning.FastInstanceChecker; import org.dllearner.refinementoperators.CustomHierarchyRefinementOperator; import org.dllearner.refinementoperators.CustomStartRefinementOperator; import org.dllearner.refinementoperators.LengthLimitedRefinementOperator; @@ -94,8 +97,7 @@ // all nodes in the search tree (used for selecting most promising node) private TreeSet<OENode> nodes; -// private OEHeuristicRuntime heuristic; // = new OEHeuristicRuntime(); - private NLPHeuristic heuristic = new NLPHeuristic(); + private NLPHeuristic heuristic; // = new OEHeuristicRuntime(); // root of search tree private OENode startNode; // the class with which we start the refinement process @@ -146,10 +148,11 @@ private int expressionTests = 0; private int minHorizExp = 0; private int maxHorizExp = 0; - private long totalRuntimeNs; + private long totalRuntimeNs = 0; // TODO: turn those into config options + // important: do not initialise those with empty sets // null = no settings for allowance / ignorance // empty set = allow / ignore nothing (it is often not desired to allow no class!) @@ -896,6 +899,10 @@ } } + public TreeSet<OENode> getNodes() { + return nodes; + } + public int getMaximumHorizontalExpansion() { return maxHorizExp; } @@ -1099,14 +1106,30 @@ public void setStopOnFirstDefinition(boolean stopOnFirstDefinition) { this.stopOnFirstDefinition = stopOnFirstDefinition; - } - + } + public long getTotalRuntimeNs() { return totalRuntimeNs; } - - public TreeSet<OENode> getNodes() { - return nodes; + + public static void main(String[] args) throws Exception{ + AbstractKnowledgeSource ks = new OWLFile("../examples/family/father_oe.owl"); + ks.init(); + + AbstractReasonerComponent rc = new FastInstanceChecker(ks); + rc.init(); + + ClassLearningProblem lp = new ClassLearningProblem(rc); + lp.setClassToDescribe(new NamedClass("http://example.com/father#father")); + lp.init(); + + CELOE alg = new CELOE(lp, rc); + alg.setMaxExecutionTimeInSeconds(10); + alg.init(); + + alg.start(); + } + } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/NLPHeuristic.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/NLPHeuristic.java 2013-09-04 15:23:29 UTC (rev 4067) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/NLPHeuristic.java 2013-09-04 15:39:08 UTC (rev 4068) @@ -108,17 +108,18 @@ //the NLP based scoring - Description expression = node.getExpression(); - OWLClassExpression owlapiDescription = OWLAPIConverter.getOWLAPIDescription(expression); - Set<Entity> entities = OWLAPIConverter.getEntities(owlapiDescription.getSignature()); - double sum = 0; - for (Entity entity : entities) { - double relevance = entityRelevance.containsKey(entity) ? entityRelevance.get(entity) : 0; - if(!Double.isInfinite(relevance)){ - sum += relevance; - } - } - score += nlpBonusFactor * sum; +// Description expression = node.getExpression(); +//// OWLClassExpression owlapiDescription = OWLAPIConverter.getOWLAPIDescription(expression); +//// Set<Entity> entities = OWLAPIConverter.getEntities(owlapiDescription.getSignature()); +// Set<Entity> entities = expression.getSignature(); +// double sum = 0; +// for (Entity entity : entities) { +// double relevance = entityRelevance.containsKey(entity) ? entityRelevance.get(entity) : 0; +// if(!Double.isInfinite(relevance)){ +// sum += relevance; +// } +// } +// score += nlpBonusFactor * sum; return score; } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/SimpleWordSenseDisambiguation.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/SimpleWordSenseDisambiguation.java 2013-09-04 15:23:29 UTC (rev 4067) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/SimpleWordSenseDisambiguation.java 2013-09-04 15:39:08 UTC (rev 4068) @@ -75,5 +75,25 @@ } return labels; } + + private Set<String> getRelatedWordPhrases(Entity entity){ + //add the labels if exist + Set<String> relatedWordPhrases = new HashSet<String>(); + OWLEntity owlEntity = OWLAPIConverter.getOWLAPIEntity(entity); + Set<OWLAnnotationAssertionAxiom> axioms = ontology.getAnnotationAssertionAxioms(owlEntity.getIRI()); + for (OWLAnnotationAssertionAxiom annotation : axioms) { + if(annotation.getProperty().equals(annotationProperty)){ + if (annotation.getValue() instanceof OWLLiteral) { + OWLLiteral val = (OWLLiteral) annotation.getValue(); + relatedWordPhrases.add(val.getLiteral()); + } + } + } + //add the short form of the URI if no labels are available + if(relatedWordPhrases.isEmpty()){ + relatedWordPhrases.add(sfp.getShortForm(IRI.create(entity.getURI()))); + } + return relatedWordPhrases; + } } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/PMIRelevanceMetric.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/PMIRelevanceMetric.java 2013-09-04 15:23:29 UTC (rev 4067) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/PMIRelevanceMetric.java 2013-09-04 15:39:08 UTC (rev 4068) @@ -28,10 +28,12 @@ Set<AnnotatedDocument> documentsAB = Sets.intersection(documentsA, documentsB); int nrOfDocuments = index.getSize(); - double dPClass = nrOfDocuments == 0 ? 0 : ((double) documentsA.size() / (double) nrOfDocuments); - double dPClassEntity = documentsB.size() == 0 ? 0 : (double) documentsAB.size() / (double) documentsB.size(); - double pmi = Math.log(dPClassEntity / dPClass); + double pA = nrOfDocuments == 0 ? 0 : ((double) documentsA.size() / (double) nrOfDocuments); + double pB = nrOfDocuments == 0 ? 0 : ((double) documentsB.size() / (double) nrOfDocuments); + double pAB = nrOfDocuments == 0 ? 0 : ((double) documentsAB.size() / (double) nrOfDocuments); + double pmi = Math.log(pAB / pA * pB); + return pmi; } @@ -42,11 +44,15 @@ Set<AnnotatedDocument> documentsAB = Sets.intersection(documentsA, documentsB); int nrOfDocuments = index.getSize(); - double dPClass = nrOfDocuments == 0 ? 0 : ((double) documentsA.size() / (double) nrOfDocuments); - double dPClassEntity = documentsB.size() == 0 ? 0 : (double) documentsAB.size() / (double) documentsB.size(); - double pmi = Math.log(dPClassEntity / dPClass); + double pA = nrOfDocuments == 0 ? 0 : ((double) documentsA.size() / (double) nrOfDocuments); + double pB = nrOfDocuments == 0 ? 0 : ((double) documentsB.size() / (double) nrOfDocuments); + double pAB = nrOfDocuments == 0 ? 0 : ((double) documentsAB.size() / (double) nrOfDocuments); - double pAB = (double) documentsAB.size() / (double) nrOfDocuments; + if(pA * pB == 0){ + return 0; + } + double pmi = Math.log(pAB / pA * pB); + double normalizedPMI = (pmi/-Math.log(pAB) + 1)/2; return normalizedPMI; Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/RelevanceUtils.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/RelevanceUtils.java 2013-09-04 15:23:29 UTC (rev 4067) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/RelevanceUtils.java 2013-09-04 15:39:08 UTC (rev 4068) @@ -40,7 +40,7 @@ Set<Entity> otherEntities = OWLAPIConverter.getEntities(owlEntities); for (Entity otherEntity : otherEntities) { - double relevance = metric.getRelevance(entity, otherEntity); + double relevance = metric.getNormalizedRelevance(entity, otherEntity); relevantEntities.put(otherEntity, relevance); } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/AnnotationEntityTextRetriever.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/AnnotationEntityTextRetriever.java 2013-09-04 15:23:29 UTC (rev 4067) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/AnnotationEntityTextRetriever.java 2013-09-04 15:39:08 UTC (rev 4068) @@ -4,6 +4,7 @@ package org.dllearner.algorithms.isle.textretrieval; import java.util.HashMap; +import java.util.HashSet; import java.util.Map; import java.util.Set; @@ -90,4 +91,26 @@ return textWithWeight; } + + /** + * Returns for each entity in the ontology all relevant text, i.e. eitherthe annotations or the short form of the IRI as fallback. + * @return + */ + public Map<Entity, Set<String>> getRelevantText() { + Map<Entity, Set<String>> entity2RelevantText = new HashMap<Entity, Set<String>>(); + + Set<OWLEntity> schemaEntities = new HashSet<OWLEntity>(); + schemaEntities.addAll(ontology.getClassesInSignature()); + schemaEntities.addAll(ontology.getObjectPropertiesInSignature()); + schemaEntities.addAll(ontology.getDataPropertiesInSignature()); + + Map<String, Double> relevantText; + for (OWLEntity owlEntity : schemaEntities) { + Entity entity = OWLAPIConverter.getEntity(owlEntity); + relevantText = getRelevantText(entity); + entity2RelevantText.put(entity, relevantText.keySet()); + } + + return entity2RelevantText; + } } Modified: trunk/components-core/src/main/java/org/dllearner/core/owl/Description.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/core/owl/Description.java 2013-09-04 15:23:29 UTC (rev 4067) +++ trunk/components-core/src/main/java/org/dllearner/core/owl/Description.java 2013-09-04 15:39:08 UTC (rev 4068) @@ -19,9 +19,11 @@ package org.dllearner.core.owl; +import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.Set; /** * A class description is sometimes also called "complex class" or "concept". @@ -211,6 +213,36 @@ } /** + * Returns all named entities. + * @return + */ + public Set<Entity> getSignature(){ + Set<Entity> entities = new HashSet<Entity>(); + if(this instanceof NamedClass){ + entities.add((NamedClass)this); + } else if(this instanceof Thing){ + entities.add(new NamedClass(Thing.uri)); + } else if(this instanceof Nothing){ + entities.add(new NamedClass(Nothing.uri)); + } else if(this instanceof Restriction){ + PropertyExpression propertyExpression = ((Restriction)this).getRestrictedPropertyExpression(); + if(propertyExpression instanceof ObjectProperty){ + entities.add((ObjectProperty)propertyExpression); + } else if(propertyExpression instanceof DatatypeProperty){ + entities.add((DatatypeProperty)propertyExpression); + } + entities.addAll(getChild(0).getSignature()); + + } else { + for (Description child : children) { + entities.addAll(child.getSignature()); + } + } + + return entities; + } + + /** * Returns a manchester syntax string of this description. For a * reference, see * <a href="http://www.co-ode.org/resources/reference/manchester_syntax">here</a> Modified: trunk/components-core/src/main/java/org/dllearner/core/owl/Nothing.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/core/owl/Nothing.java 2013-09-04 15:23:29 UTC (rev 4067) +++ trunk/components-core/src/main/java/org/dllearner/core/owl/Nothing.java 2013-09-04 15:39:08 UTC (rev 4068) @@ -19,6 +19,7 @@ package org.dllearner.core.owl; +import java.net.URI; import java.util.Map; /** @@ -35,6 +36,8 @@ private static final long serialVersionUID = -3053885252153066318L; public static final Nothing instance = new Nothing(); + public static final URI uri = URI.create("http://www.w3.org/2002/07/owl#Thing"); + public String toString(String baseURI, Map<String,String> prefixes) { return "BOTTOM"; } @@ -52,7 +55,14 @@ // in Protege 4.0 only Nothing //return "owl:Nothing"; return "Nothing"; - } + } + + /** + * @return the uri + */ + public static URI getURI() { + return uri; + } public int getLength() { return 1; Modified: trunk/examples/isle/father_labeled.owl =================================================================== --- trunk/examples/isle/father_labeled.owl 2013-09-04 15:23:29 UTC (rev 4067) +++ trunk/examples/isle/father_labeled.owl 2013-09-04 15:39:08 UTC (rev 4068) @@ -58,7 +58,7 @@ <!-- http://example.com/father#father --> <owl:Class rdf:about="&father;father"> - <rdfs:label xml:lang="en">person which has at least 1 child</rdfs:label> + <rdfs:label xml:lang="en">male person which has at least 1 child</rdfs:label> <rdfs:subClassOf rdf:resource="&father;male"/> </owl:Class> Modified: trunk/pom.xml =================================================================== --- trunk/pom.xml 2013-09-04 15:23:29 UTC (rev 4067) +++ trunk/pom.xml 2013-09-04 15:39:08 UTC (rev 4068) @@ -164,7 +164,7 @@ <dependency> <groupId>org.semanticweb.hermit</groupId> <artifactId>hermit</artifactId> - <version>1.3.3</version> + <version>1.3.8</version> </dependency> <!-- SOLR Dependency --> Modified: trunk/scripts/pom.xml =================================================================== --- trunk/scripts/pom.xml 2013-09-04 15:23:29 UTC (rev 4067) +++ trunk/scripts/pom.xml 2013-09-04 15:39:08 UTC (rev 4068) @@ -139,6 +139,13 @@ </exclusions> </dependency> + + <dependency> + <groupId>org.apache.commons</groupId> + <artifactId>commons-math3</artifactId> + <version>3.0</version> +</dependency> + </dependencies> <build> Modified: trunk/scripts/src/main/java/org/dllearner/scripts/pattern/OWLAxiomPatternDetectionEvaluation.java =================================================================== --- trunk/scripts/src/main/java/org/dllearner/scripts/pattern/OWLAxiomPatternDetectionEvaluation.java 2013-09-04 15:23:29 UTC (rev 4067) +++ trunk/scripts/src/main/java/org/dllearner/scripts/pattern/OWLAxiomPatternDetectionEvaluation.java 2013-09-04 15:39:08 UTC (rev 4068) @@ -13,6 +13,8 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -39,6 +41,8 @@ import org.semanticweb.owlapi.model.OWLOntologyCreationException; import org.semanticweb.owlapi.model.UnloadableImportException; +import com.google.common.math.IntMath; + import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl; import uk.ac.manchester.cs.owl.owlapi.mansyntaxrenderer.ManchesterOWLSyntaxOWLObjectRendererImpl; @@ -57,6 +61,11 @@ private boolean formatNumbers = true; private int numberOfRowsPerTable = 25; + private int minOntologies = 5; + + private Map<OWLAxiom, Integer> winsorizedFrequencies = new HashMap<OWLAxiom, Integer>(); + private int percentileInPercent = 95; + public OWLAxiomPatternDetectionEvaluation() { initDBConnection(); @@ -311,12 +320,13 @@ String latexTable = "\\begin{table}\n"; latexTable += "\\begin{tabular}{lrrr}\n"; latexTable += "\\toprule\n"; - latexTable += "Pattern & Frequency & \\#Ontologies\\\\\\midrule\n"; + latexTable += "Pattern & Frequency & Winsorised Frequency & \\#Ontologies\\\\\\midrule\n"; for (Entry<OWLAxiom, Pair<Integer, Integer>> entry : topN.entrySet()) { OWLAxiom axiom = entry.getKey(); Integer frequency = entry.getValue().getKey(); Integer df = entry.getValue().getValue(); + Integer winsorizedFrequency = winsorizedFrequencies.get(axiom); if(axiom != null){ String axiomColumn = axiomRenderer.render(axiom); @@ -330,7 +340,7 @@ } if(formatNumbers){ - latexTable += axiomColumn + " & " + "\\num{" + frequency + "} & " + df + "\\\\\n"; + latexTable += axiomColumn + " & \\num{" + frequency + "} & \\num{" + winsorizedFrequency + "} & "+ df + "\\\\\n"; } else { latexTable += axiomColumn + " & " + frequency + " & " + df + "\\\\\n"; } @@ -347,13 +357,13 @@ LatexWriter w = new LatexWriter(sw); LatexObjectVisitor renderer = new LatexObjectVisitor(w, df); String latexTable = "\\begin{table}\n"; - latexTable += "\\begin{tabular}{rlrr"; + latexTable += "\\begin{tabular}{rlrrr"; for (int i = 0; i < repositories.size(); i++) { latexTable += "r"; } latexTable += "}\n"; latexTable += "\\toprule\n"; - latexTable += " & Pattern & Frequency & \\#Ontologies"; + latexTable += " & Pattern & Frequency & Winsorized Frequency & \\#Ontologies"; for (OntologyRepository repository : repositories) { latexTable += " & " + repository.getName(); } @@ -366,6 +376,7 @@ OWLAxiom axiom = entry.getValue().keySet().iterator().next(); Integer frequency = entry.getValue().values().iterator().next().getKey(); Integer df = entry.getValue().values().iterator().next().getValue(); + int winsorizedFrequency = winsorizedFrequencies.get(axiom); if(axiom != null){ String axiomColumn = axiomRenderer.render(axiom); @@ -378,7 +389,7 @@ } if(formatNumbers){ - latexTable += i + ". & " + axiomColumn + " & " + "\\num{" + frequency + "} & " + df; + latexTable += i + ". & " + axiomColumn + " & \\num{" + frequency + "} & \\num{" + winsorizedFrequency + "} & "+ df; for (OntologyRepository repository : repositories) { int rank = 0; boolean contained = false; @@ -433,21 +444,62 @@ ps = conn.prepareStatement("SELECT P.id, pattern,SUM(occurrences),COUNT(ontology_id) FROM " + "Ontology_Pattern OP, Pattern P, Ontology O WHERE " + "(P.id=OP.pattern_id AND O.id=OP.ontology_id AND P.axiom_type=?) " + - "GROUP BY P.id ORDER BY SUM(`OP`.`occurrences`) DESC LIMIT ?"); + "GROUP BY P.id HAVING COUNT(ontology_id)>=? ORDER BY SUM(`OP`.`occurrences`) DESC LIMIT ?"); ps.setString(1, axiomType.name()); - ps.setInt(2, n); + ps.setInt(2, minOntologies); + ps.setInt(3, n); rs = ps.executeQuery(); while(rs.next()){ + int patternID = rs.getInt(1); + OWLAxiom axiom = asOWLAxiom(rs.getString(2)); Map<OWLAxiom, Pair<Integer, Integer>> m = new LinkedHashMap<OWLAxiom, Pair<Integer,Integer>>(); - m.put(asOWLAxiom(rs.getString(2)), new Pair<Integer, Integer>(rs.getInt(3), rs.getInt(4))); - topN.put(rs.getInt(1), m); + m.put(axiom, new Pair<Integer, Integer>(rs.getInt(3), rs.getInt(4))); + topN.put(patternID, m); + + //get winsorized frequency + ps = conn.prepareStatement("SELECT occurrences FROM " + + "Ontology_Pattern WHERE " + + "(pattern_id=?) "); + ps.setInt(1, patternID); + ResultSet rs2 = ps.executeQuery(); + System.out.println("Pattern ID:" + patternID); + System.out.println(axiom); + + List<Integer> values = new ArrayList<Integer>(); + while(rs2.next()){ + values.add(rs2.getInt(1)); + } + winsorize(values); + int sum = 0; + for (Integer val : values) { + sum += val; + } + winsorizedFrequencies.put(axiom, sum); } + + + } catch(SQLException e){ e.printStackTrace(); } return topN; } + private void winsorize(List<Integer> values){ + //compute 95th percentile + int percentile = (int) Math.round(percentileInPercent/100d * values.size() + 1/2d); + //sort values + Collections.sort(values);System.out.println(values); + //get the value at percentile rank + int max = values.get(percentile-1); + //set all values after to max + for (int i = percentile; i < values.size(); i++) { + values.set(i, max); + } + System.out.println(percentile); + System.out.println(values); + } + private Map<Integer, Map<OWLAxiom, Pair<Integer, Integer>>> getTopNAxiomPatternsWithId(OntologyRepository repository, AxiomTypeCategory axiomType, int n){ Map<Integer, Map<OWLAxiom, Pair<Integer, Integer>>> topN = new LinkedHashMap<Integer, Map<OWLAxiom, Pair<Integer, Integer>>>(); PreparedStatement ps; @@ -569,6 +621,5 @@ new OWLAxiomPatternDetectionEvaluation().run(analyzeRepositories, Arrays.asList( new TONESRepository(), new BioPortalRepository(), new OxfordRepository())); } - } Modified: trunk/scripts/src/main/java/org/dllearner/scripts/pattern/OWLAxiomPatternUsageEvaluation.java =================================================================== --- trunk/scripts/src/main/java/org/dllearner/scripts/pattern/OWLAxiomPatternUsageEvaluation.java 2013-09-04 15:23:29 UTC (rev 4067) +++ trunk/scripts/src/main/java/org/dllearner/scripts/pattern/OWLAxiomPatternUsageEvaluation.java 2013-09-04 15:39:08 UTC (rev 4068) @@ -42,6 +42,7 @@ import joptsimple.OptionSet; import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; +import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics; import org.apache.log4j.Logger; import org.coode.owlapi.turtle.TurtleOntologyFormat; import org.dllearner.core.EvaluatedAxiom; @@ -51,6 +52,7 @@ import org.dllearner.kb.LocalModelBasedSparqlEndpointKS; import org.dllearner.kb.SparqlEndpointKS; import org.dllearner.kb.sparql.ExtractionDBCache; +import org.dllearner.kb.sparql.QueryEngineHTTP; import org.dllearner.kb.sparql.SparqlEndpoint; import org.dllearner.kb.sparql.SparqlQuery; import org.dllearner.learningproblems.AxiomScore; @@ -92,7 +94,6 @@ import com.clarkparsia.pellet.owlapiv3.PelletReasonerFactory; import com.google.common.base.Charsets; import com.google.common.base.Joiner; -import com.google.common.cache.Cache; import com.google.common.cache.CacheBuilder; import com.google.common.cache.CacheLoader; import com.google.common.cache.LoadingCache; @@ -117,7 +118,6 @@ import com.hp.hpl.jena.rdf.model.ModelFactory; import com.hp.hpl.jena.rdf.model.Resource; import com.hp.hpl.jena.rdf.model.Statement; -import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP; import com.hp.hpl.jena.sparql.engine.http.QueryExceptionHTTP; import com.hp.hpl.jena.vocabulary.RDF; import com.jamonapi.Monitor; @@ -165,7 +165,12 @@ private File samplesDir; private File instantiationsDir; + private DescriptiveStatistics fragmentStatistics = new DescriptiveStatistics(100); + + + private int nrOfEarlyTerminations = 0; + public OWLAxiomPatternUsageEvaluation() { try { BZip2CompressorInputStream is = new BZip2CompressorInputStream(new URL(ontologyURL).openStream()); @@ -374,7 +379,9 @@ e.printStackTrace(); } } -// System.exit(0); + logger.info("Early terminations: " + nrOfEarlyTerminations ); + logger.info(fragmentStatistics.getMin() + "--" + fragmentStatistics.getMax() + "--" + fragmentStatistics.getMean()); + System.exit(0); Monitor patternTimeMon = MonitorFactory.getTimeMonitor("pattern-runtime"); //for each pattern @@ -694,28 +701,39 @@ long startTime = System.currentTimeMillis(); int offset = 0; boolean hasMoreResults = true; - while(hasMoreResults && (System.currentTimeMillis() - startTime)<= maxFragmentExtractionTime){ + long remainingTime = maxFragmentExtractionTime - (System.currentTimeMillis() - startTime); + while(hasMoreResults && remainingTime > 0){ query.setOffset(offset); logger.info(query); - Model m = executeConstructQuery(query); + Model m = executeConstructQuery(query, remainingTime); fragment.add(m); + remainingTime = maxFragmentExtractionTime - (System.currentTimeMillis() - startTime); if(m.size() == 0){ hasMoreResults = false; + if(remainingTime > 0){ + logger.info("No more triples left. Early termination..."); + nrOfEarlyTerminations++; + } + } offset += queryLimit; - try { - Thread.sleep(500); - } catch (InterruptedException e) { - e.printStackTrace(); - } +// try { +// Thread.sleep(500); +// } catch (InterruptedException e) { +// e.printStackTrace(); +// } } - logger.info("...got " + fragment.size() + " triples."); try { fragment.write(new FileOutputStream(file), "TURTLE"); } catch (FileNotFoundException e) { e.printStackTrace(); } filterModel(fragment); + logger.info("...got " + fragment.size() + " triples "); + ResultSet rs = QueryExecutionFactory.create("SELECT (COUNT(DISTINCT ?s) AS ?cnt) WHERE {?s a <" + cls.getName() + ">. }", fragment).execSelect(); + int nrOfInstances = rs.next().getLiteral("cnt").getInt(); + logger.info("with " + nrOfInstances + " instances of class " + cls.getName()); + fragmentStatistics.addValue(nrOfInstances); return fragment; } @@ -1281,6 +1299,45 @@ return rs; } + protected Model executeConstructQuery(Query query, long timeout) { + if(ks.isRemote()){ + SparqlEndpoint endpoint = ((SparqlEndpointKS) ks).getEndpoint(); + ExtractionDBCache cache = ks.getCache(); + Model model = null; + try { +// if(cache != null){ +// try { +// model = cache.executeConstructQuery(endpoint, query.toString()); +// } catch (UnsupportedEncodingException e) { +// e.printStackTrace(); +// } catch (SQLException e) { +// e.printStackTrace(); +// } +// } else { + QueryEngineHTTP queryExecution = new QueryEngineHTTP(endpoint.getURL().toString(), + query); + queryExecution.setDefaultGraphURIs(endpoint.getDefaultGraphURIs()); + queryExecution.setNamedGraphURIs(endpoint.getNamedGraphURIs()); + queryExecution.setTimeout(timeout, timeout); + model = queryExecution.execConstruct(); +// } + logger.debug("Got " + model.size() + " triples."); + return model; + } catch (QueryExceptionHTTP e) { + if(e.getCause() instanceof SocketTimeoutException){ + logger.warn("Got timeout"); + } else { + logger.error("Exception executing query", e); + } + return ModelFactory.createDefaultModel(); + } + } else { + QueryExecution queryExecution = QueryExecutionFactory.create(query, ((LocalModelBasedSparqlEndpointKS)ks).getModel()); + Model model = queryExecution.execConstruct(); + return model; + } + } + protected Model executeConstructQuery(Query query) { if(ks.isRemote()){ SparqlEndpoint endpoint = ((SparqlEndpointKS) ks).getEndpoint(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <and...@us...> - 2013-09-04 15:23:34
|
Revision: 4067 http://sourceforge.net/p/dl-learner/code/4067 Author: andremelo Date: 2013-09-04 15:23:29 +0000 (Wed, 04 Sep 2013) Log Message: ----------- Modifying PrefixTrie to include "String getLongestMatch(String s)" Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/utilities/datastructures/PrefixTrie.java Modified: trunk/components-core/src/main/java/org/dllearner/utilities/datastructures/PrefixTrie.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/utilities/datastructures/PrefixTrie.java 2013-09-04 15:22:06 UTC (rev 4066) +++ trunk/components-core/src/main/java/org/dllearner/utilities/datastructures/PrefixTrie.java 2013-09-04 15:23:29 UTC (rev 4067) @@ -102,6 +102,22 @@ } return deepestWithValue.value; } + + public CharSequence getLongestMatch(CharSequence s) { + Node<T> current = root; + int i; + for (i = 0; i < s.length(); i++) { + int nodeIndex = s.charAt(i) - rangeOffset; + if (nodeIndex < 0 || rangeSize <= nodeIndex) { + return null; + } + current = current.next[nodeIndex]; + if (current == null) { + break; + } + } + return s.subSequence(0, i-1); + } /** * Returns a Map containing the same data as this structure. This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <and...@us...> - 2013-09-04 15:22:11
|
Revision: 4066 http://sourceforge.net/p/dl-learner/code/4066 Author: andremelo Date: 2013-09-04 15:22:06 +0000 (Wed, 04 Sep 2013) Log Message: ----------- Adding SimpleEntityCandidates and updating EntityCandidateTrie Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/EntityCandidatesTrie.java Added Paths: ----------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/EntityCandidatesTrie.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/EntityCandidatesTrie.java 2013-09-04 15:04:37 UTC (rev 4065) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/EntityCandidatesTrie.java 2013-09-04 15:22:06 UTC (rev 4066) @@ -4,11 +4,12 @@ import java.util.Set; import org.dllearner.core.owl.Entity; +import org.dllearner.utilities.datastructures.PrefixTrie; public interface EntityCandidatesTrie { - + /** - * Adds an entry to the trie. If string already existent, adds to entity to its set of candidates + * Adds an entity to the set of candidates of a string * @param s * @param e */ @@ -24,13 +25,6 @@ /** - * Gets longest matching string and its candidate entities - * @param s - * @return - */ - public Entry<String,Set<Entity>> getLongestMatchWithCandidates(String s); - - /** * Gets the longest matching string * @param s * @return Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java 2013-09-04 15:22:06 UTC (rev 4066) @@ -0,0 +1,49 @@ +package org.dllearner.algorithms.isle.index; + +import java.util.HashSet; +import java.util.Map.Entry; +import java.util.Set; + +import org.dllearner.core.owl.Entity; +import org.dllearner.utilities.datastructures.PrefixTrie; +import org.semanticweb.owlapi.model.OWLOntology; + +public class SimpleEntityCandidatesTrie implements EntityCandidatesTrie { + + PrefixTrie<Set<Entity>> trie; + OWLOntology ontology; + + public SimpleEntityCandidatesTrie(OWLOntology ontology) { + this.ontology = ontology; + this.trie = new PrefixTrie<Set<Entity>>(); + } + + @Override + public void addEntry(String s, Entity e) { + Set<Entity> candidates = trie.get(s); + if (candidates==null) + candidates = new HashSet<Entity>(); + + candidates.add(e); + } + + @Override + public Set<Entity> getCandidateEntities(String s) { + // TODO Auto-generated method stub + return null; + } + + @Override + public String getLongestMatch(String s) { + return trie.getLongestMatch(s).toString(); + } + + /** + * @param args + */ + public static void main(String[] args) { + // TODO Auto-generated method stub + + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <dfl...@us...> - 2013-09-04 15:04:42
|
Revision: 4065 http://sourceforge.net/p/dl-learner/code/4065 Author: dfleischhacker Date: 2013-09-04 15:04:37 +0000 (Wed, 04 Sep 2013) Log Message: ----------- Add lemmatizing to linguistic utils Modified Paths: -------------- trunk/components-core/pom.xml trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java Modified: trunk/components-core/pom.xml =================================================================== --- trunk/components-core/pom.xml 2013-09-04 14:39:59 UTC (rev 4064) +++ trunk/components-core/pom.xml 2013-09-04 15:04:37 UTC (rev 4065) @@ -195,7 +195,13 @@ <version>1.0</version> </dependency> + <dependency> + <groupId>edu.northwestern.at</groupId> + <artifactId>morphadorner</artifactId> + <version>2009-04-30</version> + </dependency> + <!-- This module is a library module, so it needs only to have the slf api dependency to enable logging --> <dependency> <groupId>org.slf4j</groupId> Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java 2013-09-04 14:39:59 UTC (rev 4064) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java 2013-09-04 15:04:37 UTC (rev 4065) @@ -1,9 +1,16 @@ package org.dllearner.algorithms.isle.index; +import edu.northwestern.at.utils.corpuslinguistics.lemmatizer.DefaultLemmatizer; +import edu.northwestern.at.utils.corpuslinguistics.lemmatizer.Lemmatizer; +import edu.stanford.nlp.ling.CoreAnnotations; +import edu.stanford.nlp.ling.CoreLabel; +import edu.stanford.nlp.pipeline.*; +import edu.stanford.nlp.util.CoreMap; import net.didion.jwnl.data.POS; import org.dllearner.algorithms.isle.WordNet; import java.util.ArrayList; +import java.util.Properties; /** * Provides shortcuts to commonly used linguistic operations @@ -12,7 +19,17 @@ public class LinguisticUtil { private static final WordNet wn = new WordNet(); private static POS[] RELEVANT_POS = new POS[]{POS.NOUN, POS.VERB}; + private static Lemmatizer lemmatizer; + static { + try { + lemmatizer = new DefaultLemmatizer(); + } + catch (Exception e) { + e.printStackTrace(); + } + } + /** * Processes the given string and puts camelCased words into single words. * @param camelCase the word containing camelcase to split @@ -54,7 +71,13 @@ return underScored.split("_"); } - // get synonyms + /** + * Returns an array of all synonyms for the given word. Only synonyms for the POS in {@link #RELEVANT_POS} are + * returned. + * + * @param word the word to retrieve synonyms for + * @return synonyms for the given word + */ public static String[] getSynonymsForWord(String word) { ArrayList<String> synonyms = new ArrayList<String>(); @@ -64,7 +87,28 @@ return synonyms.toArray(new String[synonyms.size()]); } + /** + * Returns the normalized form of the given word. This method is only able to work with single words! If there is an + * error normalizing the given word, the word itself is returned. + * + * @param word the word to get normalized form for + * @return normalized form of the word or the word itself on an error + */ + public static String getNormalizedForm(String word) { + try { + if (lemmatizer == null) { + return word; + } + return lemmatizer.lemmatize(word); + } + catch (Exception e) { + e.printStackTrace(); + } + return word; + } + public static void main(String[] args) { + System.out.println(getNormalizedForm("going")); for (String s : getWordsFromCamelCase("thisIsAClassWith1Name123")) { System.out.println(s); for (String w : getSynonymsForWord(s)) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <and...@us...> - 2013-09-04 14:40:03
|
Revision: 4064 http://sourceforge.net/p/dl-learner/code/4064 Author: andremelo Date: 2013-09-04 14:39:59 +0000 (Wed, 04 Sep 2013) Log Message: ----------- Adding EntityCandidatesTries and the implementations of EntityCandidateGenerator and LinguisticAnnotator based on it Added Paths: ----------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/EntityCandidatesTrie.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieLinguisticAnnotator.java Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/EntityCandidatesTrie.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/EntityCandidatesTrie.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/EntityCandidatesTrie.java 2013-09-04 14:39:59 UTC (rev 4064) @@ -0,0 +1,41 @@ +package org.dllearner.algorithms.isle.index; + +import java.util.Map.Entry; +import java.util.Set; + +import org.dllearner.core.owl.Entity; + +public interface EntityCandidatesTrie { + + /** + * Adds an entry to the trie. If string already existent, adds to entity to its set of candidates + * @param s + * @param e + */ + public void addEntry(String s, Entity e); + + + /** + * Gets set of candidate entities for an exact given String + * @param s + * @return + */ + public Set<Entity> getCandidateEntities(String s); + + + /** + * Gets longest matching string and its candidate entities + * @param s + * @return + */ + public Entry<String,Set<Entity>> getLongestMatchWithCandidates(String s); + + /** + * Gets the longest matching string + * @param s + * @return + */ + public String getLongestMatch(String s); + + +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java 2013-09-04 14:39:59 UTC (rev 4064) @@ -0,0 +1,27 @@ +package org.dllearner.algorithms.isle.index; + +import java.util.Set; + +import org.dllearner.algorithms.isle.EntityCandidateGenerator; +import org.dllearner.core.owl.Entity; +import org.semanticweb.owlapi.model.OWLOntology; + +/** + * Generates candidates using a entity candidates prefix trie + * @author Andre Melo + * + */ +public class TrieEntityCandidateGenerator extends EntityCandidateGenerator{ + + EntityCandidatesTrie candidatesTrie; + + public TrieEntityCandidateGenerator(OWLOntology ontology, EntityCandidatesTrie candidatesTrie) { + super(ontology); + this.candidatesTrie = candidatesTrie; + } + + public Set<Entity> getCandidates(Annotation annotation) { + return candidatesTrie.getCandidateEntities(annotation.getToken()); + } + +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieLinguisticAnnotator.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieLinguisticAnnotator.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieLinguisticAnnotator.java 2013-09-04 14:39:59 UTC (rev 4064) @@ -0,0 +1,41 @@ +package org.dllearner.algorithms.isle.index; + +import java.util.HashSet; +import java.util.Set; + +/** + * Annotates a document using a prefix trie + * @author Andre Melo + * + */ +public class TrieLinguisticAnnotator implements LinguisticAnnotator { + + EntityCandidatesTrie candidatesTrie; + + public TrieLinguisticAnnotator(EntityCandidatesTrie candidatesTrie) { + this.candidatesTrie = candidatesTrie; + } + + /** + * Generates annotation based on trie's longest matching strings + * @param document + * @param candidatesTrie + * @return + */ + @Override + public Set<Annotation> annotate(Document document) { + String content = document.getRawContent(); + Set<Annotation> annotations = new HashSet<Annotation>(); + for (int i=0; i<content.length(); i++) { + String unparsed = content.substring(i); + String match = candidatesTrie.getLongestMatch(unparsed); + if (match!=null && !match.isEmpty()) { + Annotation annotation = new Annotation(document, i, match.length()); + annotations.add(annotation); + i += match.length()-1; + } + } + return annotations; + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <dfl...@us...> - 2013-09-04 14:31:56
|
Revision: 4063 http://sourceforge.net/p/dl-learner/code/4063 Author: dfleischhacker Date: 2013-09-04 14:31:52 +0000 (Wed, 04 Sep 2013) Log Message: ----------- Update LinguisticUtil Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java 2013-09-04 14:29:43 UTC (rev 4062) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java 2013-09-04 14:31:52 UTC (rev 4063) @@ -6,7 +6,7 @@ import java.util.ArrayList; /** - * Provides shortcuts to + * Provides shortcuts to commonly used linguistic operations * @author Daniel Fleischhacker */ public class LinguisticUtil { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <dfl...@us...> - 2013-09-04 14:29:47
|
Revision: 4062 http://sourceforge.net/p/dl-learner/code/4062 Author: dfleischhacker Date: 2013-09-04 14:29:43 +0000 (Wed, 04 Sep 2013) Log Message: ----------- Cleanup LinguisticUtil Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java 2013-09-04 14:28:12 UTC (rev 4061) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java 2013-09-04 14:29:43 UTC (rev 4062) @@ -4,9 +4,6 @@ import org.dllearner.algorithms.isle.WordNet; import java.util.ArrayList; -import java.util.List; -import java.util.regex.Matcher; -import java.util.regex.Pattern; /** * Provides shortcuts to This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <dfl...@us...> - 2013-09-04 14:28:16
|
Revision: 4061 http://sourceforge.net/p/dl-learner/code/4061 Author: dfleischhacker Date: 2013-09-04 14:28:12 +0000 (Wed, 04 Sep 2013) Log Message: ----------- Add class providing wrapper for common linguistic operations Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/WordNet.java Added Paths: ----------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/WordNet.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/WordNet.java 2013-09-04 14:26:47 UTC (rev 4060) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/WordNet.java 2013-09-04 14:28:12 UTC (rev 4061) @@ -4,6 +4,7 @@ import java.util.ArrayList; import java.util.Iterator; import java.util.List; +import java.util.Set; import net.didion.jwnl.JWNL; import net.didion.jwnl.JWNLException; @@ -71,8 +72,31 @@ } return synonyms; } - - public List<String> getSisterTerms(POS pos, String s){ + + public List<String> getAllSynonyms(POS pos, String s) { + List<String> synonyms = new ArrayList<String>(); + try { + IndexWord iw = dict.getIndexWord(pos, s); + if (iw != null) { + Synset[] synsets = iw.getSenses(); + for (Synset synset : synsets) { + for (Word w : synset.getWords()) { + String lemma = w.getLemma(); + if (!lemma.equals(s) && !lemma.contains(" ")) { + synonyms.add(lemma); + } + } + } + } + } + catch (JWNLException e) { + e.printStackTrace(); + } + + return synonyms; + } + + public List<String> getSisterTerms(POS pos, String s){ List<String> sisterTerms = new ArrayList<String>(); try { Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java 2013-09-04 14:28:12 UTC (rev 4061) @@ -0,0 +1,78 @@ +package org.dllearner.algorithms.isle.index; + +import net.didion.jwnl.data.POS; +import org.dllearner.algorithms.isle.WordNet; + +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Provides shortcuts to + * @author Daniel Fleischhacker + */ +public class LinguisticUtil { + private static final WordNet wn = new WordNet(); + private static POS[] RELEVANT_POS = new POS[]{POS.NOUN, POS.VERB}; + + /** + * Processes the given string and puts camelCased words into single words. + * @param camelCase the word containing camelcase to split + * @return all words as camelcase contained in the given word + */ + public static String[] getWordsFromCamelCase(String camelCase) { + ArrayList<String> resultingWords = new ArrayList<String>(); + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < camelCase.length(); i++) { + // we just ignore characters not matching the defined pattern + char curChar = camelCase.charAt(i); + if (!Character.isLetter(curChar)) { + continue; + } + if (Character.isUpperCase(curChar)) { // found a new upper case letter + resultingWords.add(sb.toString()); + sb = new StringBuilder(); + sb.append(Character.toLowerCase(curChar)); + } + else { // lower case letter + sb.append(curChar); + } + } + + if (sb.length() > 0) { + resultingWords.add(sb.toString()); + } + + return resultingWords.toArray(new String[resultingWords.size()]); + } + + /** + * Split word into words it contains divided by underscores. + * + * @param underScored word to split at underscores + * @return words contained in given word + */ + public static String[] getWordsFromUnderscored(String underScored) { + return underScored.split("_"); + } + + // get synonyms + public static String[] getSynonymsForWord(String word) { + ArrayList<String> synonyms = new ArrayList<String>(); + + for (POS pos : RELEVANT_POS) { + synonyms.addAll(wn.getAllSynonyms(pos, word)); + } + return synonyms.toArray(new String[synonyms.size()]); + } + + public static void main(String[] args) { + for (String s : getWordsFromCamelCase("thisIsAClassWith1Name123")) { + System.out.println(s); + for (String w : getSynonymsForWord(s)) { + System.out.println(" --> " + w); + } + } + } +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |