From: <lor...@us...> - 2011-11-22 14:39:25
|
Revision: 3426 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3426&view=rev Author: lorenz_b Date: 2011-11-22 14:39:14 +0000 (Tue, 22 Nov 2011) Log Message: ----------- Started WordNet similarity measure. Added Wald95 method to disjoint classes algorithm. Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/DisjointClassesLearner.java trunk/components-core/src/main/java/org/dllearner/algorithms/SimpleSubclassLearner.java Added Paths: ----------- trunk/components-core/src/main/java/org/dllearner/utilities/WordnetSimilarity.java trunk/components-core/src/main/resources/wordnet_properties.xml Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/DisjointClassesLearner.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/DisjointClassesLearner.java 2011-11-21 18:38:32 UTC (rev 3425) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/DisjointClassesLearner.java 2011-11-22 14:39:14 UTC (rev 3426) @@ -45,6 +45,7 @@ import org.dllearner.kb.sparql.SPARQLTasks; import org.dllearner.kb.sparql.SparqlEndpoint; import org.dllearner.learningproblems.AxiomScore; +import org.dllearner.learningproblems.Heuristics; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -126,7 +127,10 @@ oldCnt = result.get(cls); if(oldCnt == null){ oldCnt = Integer.valueOf(newCnt); + } else { + oldCnt += newCnt; } + result.put(cls, oldCnt); qs.getLiteral("count").getInt(); repeat = true; @@ -215,8 +219,15 @@ //secondly, create disjoint classexpressions with score 1 - (#occurence/#all) for(Entry<NamedClass, Integer> entry : sortByValues(class2Count)){ +// evalDesc = new EvaluatedDescription(entry.getKey(), +// new AxiomScore(1 - (entry.getValue() / (double)all))); + System.out.println(entry.getKey()); + System.out.println(all); + System.out.println(entry.getValue()); + double[] confidenceInterval = Heuristics.getConfidenceInterval95Wald(all, entry.getValue()); + double accuracy = (confidenceInterval[0] + confidenceInterval[1]) / 2; evalDesc = new EvaluatedDescription(entry.getKey(), - new AxiomScore(1 - (entry.getValue() / (double)all))); + new AxiomScore(1 - accuracy)); evalDescs.add(evalDesc); } @@ -227,7 +238,7 @@ public static void main(String[] args) throws Exception{ DisjointClassesLearner l = new DisjointClassesLearner(new SparqlEndpointKS(new SparqlEndpoint(new URL("http://dbpedia.aksw.org:8902/sparql"), Collections.singletonList("http://dbpedia.org"), Collections.<String>emptyList()))); - l.setClassToDescribe(new NamedClass("http://dbpedia.org/ontology/Person")); + l.setClassToDescribe(new NamedClass("http://dbpedia.org/ontology/AdministrativeRegion")); l.init(); l.getReasoner().prepareSubsumptionHierarchy(); // System.out.println(l.getReasoner().getClassHierarchy().getSubClasses(new NamedClass("http://dbpedia.org/ontology/Athlete"), false));System.exit(0); Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/SimpleSubclassLearner.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/SimpleSubclassLearner.java 2011-11-21 18:38:32 UTC (rev 3425) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/SimpleSubclassLearner.java 2011-11-22 14:39:14 UTC (rev 3426) @@ -19,7 +19,9 @@ package org.dllearner.algorithms; +import java.net.URL; import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -214,7 +216,8 @@ } public static void main(String[] args) throws Exception{ - SparqlEndpointKS ks = new SparqlEndpointKS(SparqlEndpoint.getEndpointDBpediaLiveOpenLink()); + SparqlEndpointKS ks = new SparqlEndpointKS(new SparqlEndpoint(new URL("http://dbpedia.aksw.org:8902/sparql"), + Collections.singletonList("http://dbpedia.org"), Collections.<String>emptyList())); SPARQLReasoner reasoner = new SPARQLReasoner(ks); reasoner.prepareSubsumptionHierarchy(); @@ -223,11 +226,13 @@ l.setReasoner(reasoner); ConfigHelper.configure(l, "maxExecutionTimeInSeconds", 10); - l.setClassToDescribe(new NamedClass("http://dbpedia.org/ontology/Bridge")); + l.setClassToDescribe(new NamedClass("http://dbpedia.org/ontology/AdministrativeRegion")); l.init(); l.start(); - System.out.println(l.getCurrentlyBestEvaluatedDescriptions(5)); + for(EvaluatedAxiom e : l.getCurrentlyBestEvaluatedAxioms(Integer.MAX_VALUE, 0.75)){ + System.out.println(e); + } } } Added: trunk/components-core/src/main/java/org/dllearner/utilities/WordnetSimilarity.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/utilities/WordnetSimilarity.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/utilities/WordnetSimilarity.java 2011-11-22 14:39:14 UTC (rev 3426) @@ -0,0 +1,68 @@ +package org.dllearner.utilities; + +import java.util.ArrayList; +import java.util.List; + +import net.didion.jwnl.JWNL; +import net.didion.jwnl.JWNLException; +import net.didion.jwnl.data.IndexWord; +import net.didion.jwnl.data.POS; +import net.didion.jwnl.data.PointerTarget; +import net.didion.jwnl.data.PointerType; +import net.didion.jwnl.data.Synset; +import net.didion.jwnl.data.Word; +import net.didion.jwnl.dictionary.Dictionary; + +public class WordnetSimilarity { + + public Dictionary dict; + + public WordnetSimilarity(){ + try { + JWNL.initialize(this.getClass().getClassLoader().getResourceAsStream("wordnet_properties.xml")); + dict = Dictionary.getInstance(); + } catch (JWNLException e) { + e.printStackTrace(); + } + } + + public double computeSimilarity(String s1, String s2, POS pos){ + List<String> synonyms = new ArrayList<String>(); + + try { + IndexWord iw = dict.getIndexWord(pos, s1);//dict.getMorphologicalProcessor().lookupBaseForm(pos, s) +// IndexWord iw = dict.getMorphologicalProcessor().lookupBaseForm(pos, s); + if(iw != null){ + Synset[] synsets = iw.getSenses(); + for(Synset s : synsets){ + System.out.println(s); + PointerTarget[] targets = s.getTargets(PointerType.HYPERNYM); + for (PointerTarget target : targets) + { + Word[] words = ((Synset) target).getWords(); + for (Word word : words) + { + System.out.println(word); + } + } + } +// Word[] words = synsets[0].getWords(); +// for(Word w : words){ +// String c = w.getLemma(); +// System.out.println(c); +// } + } + + } catch (JWNLException e) { + e.printStackTrace(); + } + + + return -1; + } + + public static void main(String[] args) { + System.out.println(new WordnetSimilarity().computeSimilarity("writer", "teacher", POS.NOUN)); + } + +} Added: trunk/components-core/src/main/resources/wordnet_properties.xml =================================================================== --- trunk/components-core/src/main/resources/wordnet_properties.xml (rev 0) +++ trunk/components-core/src/main/resources/wordnet_properties.xml 2011-11-22 14:39:14 UTC (rev 3426) @@ -0,0 +1,45 @@ +<?xml version="1.0" encoding="UTF-8"?> +<jwnl_properties language="en"> + <version publisher="Princeton" number="3.0" language="en"/> + <dictionary class="net.didion.jwnl.dictionary.FileBackedDictionary"> + <param name="morphological_processor" value="net.didion.jwnl.dictionary.morph.DefaultMorphologicalProcessor"> + <param name="operations"> + <param value="net.didion.jwnl.dictionary.morph.LookupExceptionsOperation"/> + <param value="net.didion.jwnl.dictionary.morph.DetachSuffixesOperation"> + <param name="noun" value="|s=|ses=s|xes=x|zes=z|ches=ch|shes=sh|men=man|ies=y|"/> + <param name="verb" value="|s=|ies=y|es=e|es=|ed=e|ed=|ing=e|ing=|"/> + <param name="adjective" value="|er=|est=|er=e|est=e|"/> + <param name="operations"> + <param value="net.didion.jwnl.dictionary.morph.LookupIndexWordOperation"/> + <param value="net.didion.jwnl.dictionary.morph.LookupExceptionsOperation"/> + </param> + </param> + <param value="net.didion.jwnl.dictionary.morph.TokenizerOperation"> + <param name="delimiters"> + <param value=" "/> + <param value="-"/> + </param> + <param name="token_operations"> + <param value="net.didion.jwnl.dictionary.morph.LookupIndexWordOperation"/> + <param value="net.didion.jwnl.dictionary.morph.LookupExceptionsOperation"/> + <param value="net.didion.jwnl.dictionary.morph.DetachSuffixesOperation"> + <param name="noun" value="|s=|ses=s|xes=x|zes=z|ches=ch|shes=sh|men=man|ies=y|"/> + <param name="verb" value="|s=|ies=y|es=e|es=|ed=e|ed=|ing=e|ing=|"/> + <param name="adjective" value="|er=|est=|er=e|est=e|"/> + <param name="operations"> + <param value="net.didion.jwnl.dictionary.morph.LookupIndexWordOperation"/> + <param value="net.didion.jwnl.dictionary.morph.LookupExceptionsOperation"/> + </param> + </param> + </param> + </param> + </param> + </param> + <param name="dictionary_element_factory" value="net.didion.jwnl.princeton.data.PrincetonWN17FileDictionaryElementFactory"/> + <param name="file_manager" value="net.didion.jwnl.dictionary.file_manager.FileManagerImpl"> + <param name="file_type" value="net.didion.jwnl.princeton.file.PrincetonRandomAccessDictionaryFile"/> + <param name="dictionary_path" value="/opt/wordnet/dict"/> + </param> + </dictionary> + <resource class="PrincetonResource"/> +</jwnl_properties> \ No newline at end of file This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2013-09-04 09:56:48
|
Revision: 4054 http://sourceforge.net/p/dl-learner/code/4054 Author: lorenz_b Date: 2013-09-04 09:56:41 +0000 (Wed, 04 Sep 2013) Log Message: ----------- Added simple stop word filtering list. Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/LGGGeneratorImpl.java trunk/components-core/src/main/java/org/dllearner/kb/sparql/SparqlEndpoint.java Added Paths: ----------- trunk/components-core/src/main/resources/stopwords.txt Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/LGGGeneratorImpl.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/LGGGeneratorImpl.java 2013-09-04 09:55:35 UTC (rev 4053) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/LGGGeneratorImpl.java 2013-09-04 09:56:41 UTC (rev 4054) @@ -26,10 +26,16 @@ import java.util.TreeSet; import org.apache.log4j.Logger; +import org.dllearner.algorithms.qtl.cache.QueryTreeCache; import org.dllearner.algorithms.qtl.datastructures.QueryTree; import org.dllearner.algorithms.qtl.datastructures.impl.QueryTreeImpl; +import org.dllearner.kb.sparql.ConciseBoundedDescriptionGenerator; +import org.dllearner.kb.sparql.ConciseBoundedDescriptionGeneratorImpl; +import org.dllearner.kb.sparql.SparqlEndpoint; +import com.google.common.collect.Lists; import com.hp.hpl.jena.datatypes.RDFDatatype; +import com.hp.hpl.jena.rdf.model.Model; import com.jamonapi.Monitor; import com.jamonapi.MonitorFactory; @@ -112,7 +118,7 @@ return lgg; } - private QueryTree<N> computeLGG(QueryTree<N> tree1, QueryTree<N> tree2, boolean learnFilters){ + private QueryTree<N> computeLGG(QueryTree<N> tree1, QueryTree<N> tree2, boolean learnFilters){System.out.println("call"); if(logger.isDebugEnabled()){ logger.debug("Computing LGG for"); logger.debug(tree1.getStringRepresentation()); @@ -222,5 +228,30 @@ addNumbering(child); } } + + public static void main(String[] args) throws Exception { + LGGGenerator<String> lggGen = new LGGGeneratorImpl<String>(); + + List<QueryTree<String>> trees = new ArrayList<QueryTree<String>>(); + QueryTree<String> tree; + Model model; + ConciseBoundedDescriptionGenerator cbdGenerator = new ConciseBoundedDescriptionGeneratorImpl(SparqlEndpoint.getEndpointDBpedia(), "cache"); + cbdGenerator.setRecursionDepth(1); + QueryTreeCache treeCache = new QueryTreeCache(); + List<String> resources = Lists.newArrayList("http://dbpedia.org/resource/Leipzig");//, "http://dbpedia.org/resource/Dresden"); + for(String resource : resources){ + try { + System.out.println(resource); + model = cbdGenerator.getConciseBoundedDescription(resource); + tree = treeCache.getQueryTree(resource, model); + System.out.println(tree.getStringRepresentation()); + trees.add(tree); + trees.add(tree); + } catch (Exception e) { + e.printStackTrace(); + } + } + lggGen.getLGG(trees); + } } Modified: trunk/components-core/src/main/java/org/dllearner/kb/sparql/SparqlEndpoint.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/kb/sparql/SparqlEndpoint.java 2013-09-04 09:55:35 UTC (rev 4053) +++ trunk/components-core/src/main/java/org/dllearner/kb/sparql/SparqlEndpoint.java 2013-09-04 09:56:41 UTC (rev 4054) @@ -227,6 +227,18 @@ return new SparqlEndpoint(u, defaultGraphURIs, new LinkedList<String>()); } + public static SparqlEndpoint getEndpointDBpediaLOD2Cloud() { + URL u = null; + try { + u = new URL("http://lod.openlinksw.com/sparql/"); + } catch (Exception e) { + e.printStackTrace(); + } + LinkedList<String> defaultGraphURIs=new LinkedList<String>(); + defaultGraphURIs.add("http://dbpedia.org"); + return new SparqlEndpoint(u, defaultGraphURIs, new LinkedList<String>()); + } + public static SparqlEndpoint getEndpointLinkedGeoData() { URL u = null; try { Added: trunk/components-core/src/main/resources/stopwords.txt =================================================================== --- trunk/components-core/src/main/resources/stopwords.txt (rev 0) +++ trunk/components-core/src/main/resources/stopwords.txt 2013-09-04 09:56:41 UTC (rev 4054) @@ -0,0 +1,174 @@ +a +about +above +after +again +against +all +am +an +and +any +are +aren't +as +at +be +because +been +before +being +below +between +both +but +by +can't +cannot +could +couldn't +did +didn't +do +does +doesn't +doing +don't +down +during +each +few +for +from +further +had +hadn't +has +hasn't +have +haven't +having +he +he'd +he'll +he's +her +here +here's +hers +herself +him +himself +his +how +how's +i +i'd +i'll +i'm +i've +if +in +into +is +isn't +it +it's +its +itself +let's +me +more +most +mustn't +my +myself +no +nor +not +of +off +on +once +only +or +other +ought +our +ours +ourselves +out +over +own +same +shan't +she +she'd +she'll +she's +should +shouldn't +so +some +such +than +that +that's +the +their +theirs +them +themselves +then +there +there's +these +they +they'd +they'll +they're +they've +this +those +through +to +too +under +until +up +very +was +wasn't +we +we'd +we'll +we're +we've +were +weren't +what +what's +when +when's +where +where's +which +while +who +who's +whom +why +why's +with +won't +would +wouldn't +you +you'd +you'll +you're +you've +your +yours +yourself +yourselves \ No newline at end of file This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2013-10-03 08:53:10
|
Revision: 4114 http://sourceforge.net/p/dl-learner/code/4114 Author: lorenz_b Date: 2013-10-03 08:53:06 +0000 (Thu, 03 Oct 2013) Log Message: ----------- Moved WSD into separate package. Added Paths: ----------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/StructuralEntityContext.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/RandomWordSenseDisambiguation.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/SimpleWordSenseDisambiguation.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/StructureBasedWordSenseDisambiguation.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/WordSenseDisambiguation.java trunk/components-core/src/main/resources/log4j.properties Removed Paths: ------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/RandomWordSenseDisambiguation.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/SimpleWordSenseDisambiguation.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/WordSenseDisambiguation.java Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/RandomWordSenseDisambiguation.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/RandomWordSenseDisambiguation.java 2013-10-03 08:50:57 UTC (rev 4113) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/RandomWordSenseDisambiguation.java 2013-10-03 08:53:06 UTC (rev 4114) @@ -1,59 +0,0 @@ -/** - * Copyright (C) 2007-2013, Jens Lehmann - * - * This file is part of DL-Learner. - * - * DL-Learner is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3 of the License, or - * (at your option) any later version. - * - * DL-Learner is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ -package org.dllearner.algorithms.isle; - -import java.util.Random; -import java.util.Set; - -import org.dllearner.algorithms.isle.index.Annotation; -import org.dllearner.algorithms.isle.index.SemanticAnnotation; -import org.dllearner.core.owl.Entity; -import org.semanticweb.owlapi.model.OWLOntology; - -/** - * Disambiguation by randomly selecting one of the candidates (baseline method). - * - * @author Jens Lehmann - * - */ -public class RandomWordSenseDisambiguation extends WordSenseDisambiguation { - - private Random random; - - public RandomWordSenseDisambiguation(OWLOntology ontology) { - super(ontology); - random = new Random(); - } - - @Override - public SemanticAnnotation disambiguate(Annotation annotation, - Set<Entity> candidateEntities) { - int pos = random.nextInt(candidateEntities.size()); - int i = 0; - for(Entity e : candidateEntities) - { - if (i == pos) { - return new SemanticAnnotation(annotation, e); - } - i++; - } - return null; - } - -} Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/SimpleWordSenseDisambiguation.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/SimpleWordSenseDisambiguation.java 2013-10-03 08:50:57 UTC (rev 4113) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/SimpleWordSenseDisambiguation.java 2013-10-03 08:53:06 UTC (rev 4114) @@ -1,107 +0,0 @@ -/** - * - */ -package org.dllearner.algorithms.isle; - -import java.util.HashSet; -import java.util.Set; - -import org.apache.log4j.Logger; -import org.dllearner.algorithms.isle.index.Annotation; -import org.dllearner.algorithms.isle.index.SemanticAnnotation; -import org.dllearner.core.owl.Entity; -import org.dllearner.utilities.owl.OWLAPIConverter; -import org.semanticweb.owlapi.model.IRI; -import org.semanticweb.owlapi.model.OWLAnnotationAssertionAxiom; -import org.semanticweb.owlapi.model.OWLAnnotationProperty; -import org.semanticweb.owlapi.model.OWLDataFactory; -import org.semanticweb.owlapi.model.OWLEntity; -import org.semanticweb.owlapi.model.OWLLiteral; -import org.semanticweb.owlapi.model.OWLOntology; -import org.semanticweb.owlapi.util.IRIShortFormProvider; -import org.semanticweb.owlapi.util.SimpleIRIShortFormProvider; - -import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl; - -/** - * @author Lorenz Buehmann - * - */ -public class SimpleWordSenseDisambiguation extends WordSenseDisambiguation{ - - - private static final Logger logger = Logger.getLogger(SimpleWordSenseDisambiguation.class.getName()); - - private IRIShortFormProvider sfp = new SimpleIRIShortFormProvider(); - private OWLDataFactory df = new OWLDataFactoryImpl(); - private OWLAnnotationProperty annotationProperty = df.getRDFSLabel(); - - /** - * @param ontology - */ - public SimpleWordSenseDisambiguation(OWLOntology ontology) { - super(ontology); - } - - /* (non-Javadoc) - * @see org.dllearner.algorithms.isle.WordSenseDisambiguation#disambiguate(org.dllearner.algorithms.isle.index.Annotation, java.util.Set) - */ - @Override - public SemanticAnnotation disambiguate(Annotation annotation, Set<Entity> candidateEntities) { - logger.debug("Linguistic annotations:\n" + annotation); - logger.debug("Candidate entities:" + candidateEntities); - String token = annotation.getToken().trim(); - //check if annotation token matches label of entity or the part behind #(resp. /) - for (Entity entity : candidateEntities) { - Set<String> labels = getLabels(entity); - for (String label : labels) { - if(label.equals(token)){ - logger.debug("Disambiguated entity: " + entity); - return new SemanticAnnotation(annotation, entity); - } - } - String shortForm = sfp.getShortForm(IRI.create(entity.getURI())); - if(annotation.equals(shortForm)){ - logger.debug("Disambiguated entity: " + entity); - return new SemanticAnnotation(annotation, entity); - } - } - return null; - } - - private Set<String> getLabels(Entity entity){ - Set<String> labels = new HashSet<String>(); - OWLEntity owlEntity = OWLAPIConverter.getOWLAPIEntity(entity); - Set<OWLAnnotationAssertionAxiom> axioms = ontology.getAnnotationAssertionAxioms(owlEntity.getIRI()); - for (OWLAnnotationAssertionAxiom annotation : axioms) { - if(annotation.getProperty().equals(annotationProperty)){ - if (annotation.getValue() instanceof OWLLiteral) { - OWLLiteral val = (OWLLiteral) annotation.getValue(); - labels.add(val.getLiteral()); - } - } - } - return labels; - } - - private Set<String> getRelatedWordPhrases(Entity entity){ - //add the labels if exist - Set<String> relatedWordPhrases = new HashSet<String>(); - OWLEntity owlEntity = OWLAPIConverter.getOWLAPIEntity(entity); - Set<OWLAnnotationAssertionAxiom> axioms = ontology.getAnnotationAssertionAxioms(owlEntity.getIRI()); - for (OWLAnnotationAssertionAxiom annotation : axioms) { - if(annotation.getProperty().equals(annotationProperty)){ - if (annotation.getValue() instanceof OWLLiteral) { - OWLLiteral val = (OWLLiteral) annotation.getValue(); - relatedWordPhrases.add(val.getLiteral()); - } - } - } - //add the short form of the URI if no labels are available - if(relatedWordPhrases.isEmpty()){ - relatedWordPhrases.add(sfp.getShortForm(IRI.create(entity.getURI()))); - } - return relatedWordPhrases; - } - -} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/StructuralEntityContext.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/StructuralEntityContext.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/StructuralEntityContext.java 2013-10-03 08:53:06 UTC (rev 4114) @@ -0,0 +1,207 @@ +/** + * + */ +package org.dllearner.algorithms.isle; + +import java.util.HashSet; +import java.util.Iterator; +import java.util.Set; + +import org.dllearner.core.owl.Entity; +import org.dllearner.utilities.owl.OWLAPIConverter; +import org.semanticweb.owlapi.model.AxiomType; +import org.semanticweb.owlapi.model.OWLAnnotationAssertionAxiom; +import org.semanticweb.owlapi.model.OWLAnnotationProperty; +import org.semanticweb.owlapi.model.OWLAxiom; +import org.semanticweb.owlapi.model.OWLClass; +import org.semanticweb.owlapi.model.OWLDataFactory; +import org.semanticweb.owlapi.model.OWLDataProperty; +import org.semanticweb.owlapi.model.OWLEntity; +import org.semanticweb.owlapi.model.OWLLiteral; +import org.semanticweb.owlapi.model.OWLObjectProperty; +import org.semanticweb.owlapi.model.OWLOntology; + +import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl; + +import com.google.common.collect.Sets; + +/** + * @author Lorenz Buehmann + * + */ +public class StructuralEntityContext { + + private static OWLDataFactory df = new OWLDataFactoryImpl(); + private static Set<OWLAnnotationProperty> annotationProperties = Sets.newHashSet( + df.getRDFSLabel(), + df.getRDFSComment()); + private static Set<String> languages = Sets.newHashSet("en"); + + /** + * Returns a set of words that describe entities related to the given entity. + * @param ontology + * @param entity + * @return + */ + public static Set<String> getContextInNaturalLanguage(OWLOntology ontology, OWLEntity entity){ + Set<String> context = new HashSet<String>(); + + Set<OWLEntity> contextEntities = getContext(ontology, entity); + //add annotations for each entity + for (OWLEntity contextEntity : contextEntities) { + context.addAll(getAnnotations(ontology, contextEntity)); + } + + return context; + } + + /** + * Returns a set of words that describe entities related to the given entity. + * @param ontology + * @param entity + * @return + */ + public static Set<String> getContextInNaturalLanguage(OWLOntology ontology, Entity entity){ + Set<String> context = new HashSet<String>(); + + Set<OWLEntity> contextEntities = getContext(ontology, entity); + //add annotations for each entity + for (OWLEntity contextEntity : contextEntities) { + context.addAll(getAnnotations(ontology, contextEntity)); + } + + return context; + } + + /** + * Returns a set of entities that are structural related to the given entity. + * @param ontology + * @param entity + * @return + */ + public static Set<OWLEntity> getContext(OWLOntology ontology, OWLEntity entity){ + + if(entity.isOWLClass()){ + return getContext(ontology, entity.asOWLClass()); + } else if(entity.isOWLObjectProperty()){ + return getContext(ontology, entity.asOWLObjectProperty()); + } else if(entity.isOWLDataProperty()){ + return getContext(ontology, entity.asOWLDataProperty()); + } + + throw new UnsupportedOperationException("Unsupported entity type: " + entity); + } + + /** + * Returns a set of entities that are structural related to the given entity. + * @param ontology + * @param entity + * @return + */ + public static Set<OWLEntity> getContext(OWLOntology ontology, Entity entity){ + + OWLEntity owlEntity = OWLAPIConverter.getOWLAPIEntity(entity); + if(owlEntity.isOWLClass()){ + return getContext(ontology, owlEntity.asOWLClass()); + } else if(owlEntity.isOWLObjectProperty()){ + return getContext(ontology, owlEntity.asOWLObjectProperty()); + } else if(owlEntity.isOWLDataProperty()){ + return getContext(ontology, owlEntity.asOWLDataProperty()); + } + + throw new UnsupportedOperationException("Unsupported entity type: " + entity); + } + + public static Set<OWLEntity> getContext(OWLOntology ontology, OWLObjectProperty property){ + Set<OWLEntity> context = new HashSet<OWLEntity>(); + + Set<OWLAxiom> relatedAxioms = new HashSet<OWLAxiom>(); + relatedAxioms.addAll(ontology.getObjectSubPropertyAxiomsForSubProperty(property)); + relatedAxioms.addAll(ontology.getEquivalentObjectPropertiesAxioms(property)); + relatedAxioms.addAll(ontology.getObjectPropertyDomainAxioms(property)); + relatedAxioms.addAll(ontology.getObjectPropertyRangeAxioms(property)); + + for (OWLAxiom axiom : relatedAxioms) { + context.addAll(axiom.getSignature()); + } + + return context; + } + + public static Set<OWLEntity> getContext(OWLOntology ontology, OWLDataProperty property){ + Set<OWLEntity> context = new HashSet<OWLEntity>(); + + Set<OWLAxiom> relatedAxioms = new HashSet<OWLAxiom>(); + relatedAxioms.addAll(ontology.getDataSubPropertyAxiomsForSubProperty(property)); + relatedAxioms.addAll(ontology.getEquivalentDataPropertiesAxioms(property)); + relatedAxioms.addAll(ontology.getDataPropertyDomainAxioms(property)); + + for (OWLAxiom axiom : relatedAxioms) { + context.addAll(axiom.getSignature()); + } + + return context; + } + + public static Set<OWLEntity> getContext(OWLOntology ontology, OWLClass cls){ + Set<OWLEntity> context = new HashSet<OWLEntity>(); + + Set<OWLAxiom> relatedAxioms = new HashSet<OWLAxiom>(); + relatedAxioms.addAll(ontology.getSubClassAxiomsForSubClass(cls)); + relatedAxioms.addAll(ontology.getEquivalentClassesAxioms(cls)); + + //axioms where cls is domain of a property + Set<OWLAxiom> domainAxioms = new HashSet<OWLAxiom>(); + domainAxioms.addAll(ontology.getAxioms(AxiomType.OBJECT_PROPERTY_DOMAIN)); + domainAxioms.addAll(ontology.getAxioms(AxiomType.DATA_PROPERTY_DOMAIN)); + for (Iterator<OWLAxiom> iterator = domainAxioms.iterator(); iterator.hasNext();) { + OWLAxiom axiom = iterator.next(); + if(!axiom.getSignature().contains(cls)){ + iterator.remove(); + } + } + relatedAxioms.addAll(domainAxioms); + + //axioms where cls is range of a object property + Set<OWLAxiom> rangeAxioms = new HashSet<OWLAxiom>(); + rangeAxioms.addAll(ontology.getAxioms(AxiomType.OBJECT_PROPERTY_RANGE)); + for (Iterator<OWLAxiom> iterator = rangeAxioms.iterator(); iterator.hasNext();) { + OWLAxiom axiom = iterator.next(); + if(!axiom.getSignature().contains(cls)){ + iterator.remove(); + } + } + relatedAxioms.addAll(rangeAxioms); + + for (OWLAxiom axiom : relatedAxioms) { + context.addAll(axiom.getSignature()); + } + + return context; + } + + private static Set<String> getAnnotations(OWLOntology ontology, OWLEntity entity){ + Set<String> annotations = new HashSet<String>(); + Set<OWLAnnotationAssertionAxiom> axioms = ontology.getAnnotationAssertionAxioms(entity.getIRI()); + for (OWLAnnotationAssertionAxiom annotation : axioms) { + if(annotationProperties.contains(annotation.getProperty())){ + if (annotation.getValue() instanceof OWLLiteral) { + OWLLiteral val = (OWLLiteral) annotation.getValue(); + if(val.getLang() != null && !val.getLang().isEmpty()){ + if(languages.contains(val.getLang())){ + if(!val.getLiteral().isEmpty()){ + annotations.add(val.getLiteral()); + } + } + } else { + if(!val.getLiteral().isEmpty()){ + annotations.add(val.getLiteral()); + } + } + } + } + } + return annotations; + } + +} Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/WordSenseDisambiguation.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/WordSenseDisambiguation.java 2013-10-03 08:50:57 UTC (rev 4113) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/WordSenseDisambiguation.java 2013-10-03 08:53:06 UTC (rev 4114) @@ -1,36 +0,0 @@ -package org.dllearner.algorithms.isle; - -import org.dllearner.algorithms.isle.index.Annotation; -import org.dllearner.algorithms.isle.index.Document; -import org.dllearner.algorithms.isle.index.SemanticAnnotation; -import org.dllearner.core.owl.Entity; -import org.semanticweb.owlapi.model.OWLOntology; - -import java.util.Set; - -/** - * Abstract class for the word sense disambiguation component. - * - * @author Daniel Fleischhacker - */ -public abstract class WordSenseDisambiguation { - OWLOntology ontology; - - /** - * Initializes the word sense disambiguation to use the given ontology. - * - * @param ontology the ontology to disambiguate on - */ - public WordSenseDisambiguation(OWLOntology ontology) { - this.ontology = ontology; - } - - /** - * Chooses the correct entity for the given annotation from a set of candidate entities. - * - * @param annotation the annotation to find entity for - * @param candidateEntities the set of candidate entities - * @return semantic annotation containing the given annotation and the chosen entity - */ - public abstract SemanticAnnotation disambiguate(Annotation annotation, Set<Entity> candidateEntities); -} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/RandomWordSenseDisambiguation.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/RandomWordSenseDisambiguation.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/RandomWordSenseDisambiguation.java 2013-10-03 08:53:06 UTC (rev 4114) @@ -0,0 +1,59 @@ +/** + * Copyright (C) 2007-2013, Jens Lehmann + * + * This file is part of DL-Learner. + * + * DL-Learner is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * DL-Learner is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +package org.dllearner.algorithms.isle.wsd; + +import java.util.Random; +import java.util.Set; + +import org.dllearner.algorithms.isle.index.Annotation; +import org.dllearner.algorithms.isle.index.SemanticAnnotation; +import org.dllearner.core.owl.Entity; +import org.semanticweb.owlapi.model.OWLOntology; + +/** + * Disambiguation by randomly selecting one of the candidates (baseline method). + * + * @author Jens Lehmann + * + */ +public class RandomWordSenseDisambiguation extends WordSenseDisambiguation { + + private Random random; + + public RandomWordSenseDisambiguation(OWLOntology ontology) { + super(ontology); + random = new Random(); + } + + @Override + public SemanticAnnotation disambiguate(Annotation annotation, + Set<Entity> candidateEntities) { + int pos = random.nextInt(candidateEntities.size()); + int i = 0; + for(Entity e : candidateEntities) + { + if (i == pos) { + return new SemanticAnnotation(annotation, e); + } + i++; + } + return null; + } + +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/SimpleWordSenseDisambiguation.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/SimpleWordSenseDisambiguation.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/SimpleWordSenseDisambiguation.java 2013-10-03 08:53:06 UTC (rev 4114) @@ -0,0 +1,107 @@ +/** + * + */ +package org.dllearner.algorithms.isle.wsd; + +import java.util.HashSet; +import java.util.Set; + +import org.apache.log4j.Logger; +import org.dllearner.algorithms.isle.index.Annotation; +import org.dllearner.algorithms.isle.index.SemanticAnnotation; +import org.dllearner.core.owl.Entity; +import org.dllearner.utilities.owl.OWLAPIConverter; +import org.semanticweb.owlapi.model.IRI; +import org.semanticweb.owlapi.model.OWLAnnotationAssertionAxiom; +import org.semanticweb.owlapi.model.OWLAnnotationProperty; +import org.semanticweb.owlapi.model.OWLDataFactory; +import org.semanticweb.owlapi.model.OWLEntity; +import org.semanticweb.owlapi.model.OWLLiteral; +import org.semanticweb.owlapi.model.OWLOntology; +import org.semanticweb.owlapi.util.IRIShortFormProvider; +import org.semanticweb.owlapi.util.SimpleIRIShortFormProvider; + +import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl; + +/** + * @author Lorenz Buehmann + * + */ +public class SimpleWordSenseDisambiguation extends WordSenseDisambiguation{ + + + private static final Logger logger = Logger.getLogger(SimpleWordSenseDisambiguation.class.getName()); + + private IRIShortFormProvider sfp = new SimpleIRIShortFormProvider(); + private OWLDataFactory df = new OWLDataFactoryImpl(); + private OWLAnnotationProperty annotationProperty = df.getRDFSLabel(); + + /** + * @param ontology + */ + public SimpleWordSenseDisambiguation(OWLOntology ontology) { + super(ontology); + } + + /* (non-Javadoc) + * @see org.dllearner.algorithms.isle.WordSenseDisambiguation#disambiguate(org.dllearner.algorithms.isle.index.Annotation, java.util.Set) + */ + @Override + public SemanticAnnotation disambiguate(Annotation annotation, Set<Entity> candidateEntities) { + logger.debug("Linguistic annotations:\n" + annotation); + logger.debug("Candidate entities:" + candidateEntities); + String token = annotation.getToken().trim(); + //check if annotation token matches label of entity or the part behind #(resp. /) + for (Entity entity : candidateEntities) { + Set<String> labels = getLabels(entity); + for (String label : labels) { + if(label.equals(token)){ + logger.debug("Disambiguated entity: " + entity); + return new SemanticAnnotation(annotation, entity); + } + } + String shortForm = sfp.getShortForm(IRI.create(entity.getURI())); + if(annotation.equals(shortForm)){ + logger.debug("Disambiguated entity: " + entity); + return new SemanticAnnotation(annotation, entity); + } + } + return null; + } + + private Set<String> getLabels(Entity entity){ + Set<String> labels = new HashSet<String>(); + OWLEntity owlEntity = OWLAPIConverter.getOWLAPIEntity(entity); + Set<OWLAnnotationAssertionAxiom> axioms = ontology.getAnnotationAssertionAxioms(owlEntity.getIRI()); + for (OWLAnnotationAssertionAxiom annotation : axioms) { + if(annotation.getProperty().equals(annotationProperty)){ + if (annotation.getValue() instanceof OWLLiteral) { + OWLLiteral val = (OWLLiteral) annotation.getValue(); + labels.add(val.getLiteral()); + } + } + } + return labels; + } + + private Set<String> getRelatedWordPhrases(Entity entity){ + //add the labels if exist + Set<String> relatedWordPhrases = new HashSet<String>(); + OWLEntity owlEntity = OWLAPIConverter.getOWLAPIEntity(entity); + Set<OWLAnnotationAssertionAxiom> axioms = ontology.getAnnotationAssertionAxioms(owlEntity.getIRI()); + for (OWLAnnotationAssertionAxiom annotation : axioms) { + if(annotation.getProperty().equals(annotationProperty)){ + if (annotation.getValue() instanceof OWLLiteral) { + OWLLiteral val = (OWLLiteral) annotation.getValue(); + relatedWordPhrases.add(val.getLiteral()); + } + } + } + //add the short form of the URI if no labels are available + if(relatedWordPhrases.isEmpty()){ + relatedWordPhrases.add(sfp.getShortForm(IRI.create(entity.getURI()))); + } + return relatedWordPhrases; + } + +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/StructureBasedWordSenseDisambiguation.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/StructureBasedWordSenseDisambiguation.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/StructureBasedWordSenseDisambiguation.java 2013-10-03 08:53:06 UTC (rev 4114) @@ -0,0 +1,41 @@ +/** + * + */ +package org.dllearner.algorithms.isle.wsd; + +import java.util.Set; + +import org.dllearner.algorithms.isle.StructuralEntityContext; +import org.dllearner.algorithms.isle.index.Annotation; +import org.dllearner.algorithms.isle.index.SemanticAnnotation; +import org.dllearner.core.owl.Entity; +import org.semanticweb.owlapi.model.OWLEntity; +import org.semanticweb.owlapi.model.OWLOntology; + +/** + * @author Lorenz Buehmann + * + */ +public class StructureBasedWordSenseDisambiguation extends WordSenseDisambiguation{ + + /** + * @param ontology + */ + public StructureBasedWordSenseDisambiguation(OWLOntology ontology) { + super(ontology); + } + + /* (non-Javadoc) + * @see org.dllearner.algorithms.isle.wsd.WordSenseDisambiguation#disambiguate(org.dllearner.algorithms.isle.index.Annotation, java.util.Set) + */ + @Override + public SemanticAnnotation disambiguate(Annotation annotation, Set<Entity> candidateEntities) { + //TODO we should find the sentence in which the annotated token is contained in + String content = annotation.getReferencedDocument().getContent(); + for (Entity entity : candidateEntities) { + Set<String> entityContext = StructuralEntityContext.getContextInNaturalLanguage(ontology, entity); + } + return null; + } + +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/WordSenseDisambiguation.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/WordSenseDisambiguation.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/WordSenseDisambiguation.java 2013-10-03 08:53:06 UTC (rev 4114) @@ -0,0 +1,35 @@ +package org.dllearner.algorithms.isle.wsd; + +import java.util.Set; + +import org.dllearner.algorithms.isle.index.Annotation; +import org.dllearner.algorithms.isle.index.SemanticAnnotation; +import org.dllearner.core.owl.Entity; +import org.semanticweb.owlapi.model.OWLOntology; + +/** + * Abstract class for the word sense disambiguation component. + * + * @author Daniel Fleischhacker + */ +public abstract class WordSenseDisambiguation { + OWLOntology ontology; + + /** + * Initializes the word sense disambiguation to use the given ontology. + * + * @param ontology the ontology to disambiguate on + */ + public WordSenseDisambiguation(OWLOntology ontology) { + this.ontology = ontology; + } + + /** + * Chooses the correct entity for the given annotation from a set of candidate entities. + * + * @param annotation the annotation to find entity for + * @param candidateEntities the set of candidate entities + * @return semantic annotation containing the given annotation and the chosen entity + */ + public abstract SemanticAnnotation disambiguate(Annotation annotation, Set<Entity> candidateEntities); +} Added: trunk/components-core/src/main/resources/log4j.properties =================================================================== --- trunk/components-core/src/main/resources/log4j.properties (rev 0) +++ trunk/components-core/src/main/resources/log4j.properties 2013-10-03 08:53:06 UTC (rev 4114) @@ -0,0 +1,17 @@ +# Direct log messages to stdout +# Root logger option +log4j.rootLogger=INFO,stdout + +log4j.appender.stdout=org.apache.log4j.ConsoleAppender +log4j.appender.stdout.layout=org.apache.log4j.PatternLayout +#log4j.appender.stdout.layout.ConversionPattern=%d{ABSOLUTE} %p [%c] %L - %m%n +log4j.appender.stdout.layout.ConversionPattern=%d{ABSOLUTE} [%c] - %m%n + +#File Appender +log4j.appender.FA=org.apache.log4j.FileAppender +log4j.appender.FA.File=REX.log +log4j.appender.FA.layout=org.apache.log4j.PatternLayout +log4j.appender.FA.layout.ConversionPattern=%d{ABSOLUTE} %p [%c] %L - %m%n + + +log4j.category.org.dllearner.algorithms=DEBUG Property changes on: trunk/components-core/src/main/resources/log4j.properties ___________________________________________________________________ Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |