From: <lor...@us...> - 2011-11-22 14:39:25
|
Revision: 3426 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3426&view=rev Author: lorenz_b Date: 2011-11-22 14:39:14 +0000 (Tue, 22 Nov 2011) Log Message: ----------- Started WordNet similarity measure. Added Wald95 method to disjoint classes algorithm. Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/DisjointClassesLearner.java trunk/components-core/src/main/java/org/dllearner/algorithms/SimpleSubclassLearner.java Added Paths: ----------- trunk/components-core/src/main/java/org/dllearner/utilities/WordnetSimilarity.java trunk/components-core/src/main/resources/wordnet_properties.xml Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/DisjointClassesLearner.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/DisjointClassesLearner.java 2011-11-21 18:38:32 UTC (rev 3425) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/DisjointClassesLearner.java 2011-11-22 14:39:14 UTC (rev 3426) @@ -45,6 +45,7 @@ import org.dllearner.kb.sparql.SPARQLTasks; import org.dllearner.kb.sparql.SparqlEndpoint; import org.dllearner.learningproblems.AxiomScore; +import org.dllearner.learningproblems.Heuristics; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -126,7 +127,10 @@ oldCnt = result.get(cls); if(oldCnt == null){ oldCnt = Integer.valueOf(newCnt); + } else { + oldCnt += newCnt; } + result.put(cls, oldCnt); qs.getLiteral("count").getInt(); repeat = true; @@ -215,8 +219,15 @@ //secondly, create disjoint classexpressions with score 1 - (#occurence/#all) for(Entry<NamedClass, Integer> entry : sortByValues(class2Count)){ +// evalDesc = new EvaluatedDescription(entry.getKey(), +// new AxiomScore(1 - (entry.getValue() / (double)all))); + System.out.println(entry.getKey()); + System.out.println(all); + System.out.println(entry.getValue()); + double[] confidenceInterval = Heuristics.getConfidenceInterval95Wald(all, entry.getValue()); + double accuracy = (confidenceInterval[0] + confidenceInterval[1]) / 2; evalDesc = new EvaluatedDescription(entry.getKey(), - new AxiomScore(1 - (entry.getValue() / (double)all))); + new AxiomScore(1 - accuracy)); evalDescs.add(evalDesc); } @@ -227,7 +238,7 @@ public static void main(String[] args) throws Exception{ DisjointClassesLearner l = new DisjointClassesLearner(new SparqlEndpointKS(new SparqlEndpoint(new URL("http://dbpedia.aksw.org:8902/sparql"), Collections.singletonList("http://dbpedia.org"), Collections.<String>emptyList()))); - l.setClassToDescribe(new NamedClass("http://dbpedia.org/ontology/Person")); + l.setClassToDescribe(new NamedClass("http://dbpedia.org/ontology/AdministrativeRegion")); l.init(); l.getReasoner().prepareSubsumptionHierarchy(); // System.out.println(l.getReasoner().getClassHierarchy().getSubClasses(new NamedClass("http://dbpedia.org/ontology/Athlete"), false));System.exit(0); Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/SimpleSubclassLearner.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/SimpleSubclassLearner.java 2011-11-21 18:38:32 UTC (rev 3425) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/SimpleSubclassLearner.java 2011-11-22 14:39:14 UTC (rev 3426) @@ -19,7 +19,9 @@ package org.dllearner.algorithms; +import java.net.URL; import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -214,7 +216,8 @@ } public static void main(String[] args) throws Exception{ - SparqlEndpointKS ks = new SparqlEndpointKS(SparqlEndpoint.getEndpointDBpediaLiveOpenLink()); + SparqlEndpointKS ks = new SparqlEndpointKS(new SparqlEndpoint(new URL("http://dbpedia.aksw.org:8902/sparql"), + Collections.singletonList("http://dbpedia.org"), Collections.<String>emptyList())); SPARQLReasoner reasoner = new SPARQLReasoner(ks); reasoner.prepareSubsumptionHierarchy(); @@ -223,11 +226,13 @@ l.setReasoner(reasoner); ConfigHelper.configure(l, "maxExecutionTimeInSeconds", 10); - l.setClassToDescribe(new NamedClass("http://dbpedia.org/ontology/Bridge")); + l.setClassToDescribe(new NamedClass("http://dbpedia.org/ontology/AdministrativeRegion")); l.init(); l.start(); - System.out.println(l.getCurrentlyBestEvaluatedDescriptions(5)); + for(EvaluatedAxiom e : l.getCurrentlyBestEvaluatedAxioms(Integer.MAX_VALUE, 0.75)){ + System.out.println(e); + } } } Added: trunk/components-core/src/main/java/org/dllearner/utilities/WordnetSimilarity.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/utilities/WordnetSimilarity.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/utilities/WordnetSimilarity.java 2011-11-22 14:39:14 UTC (rev 3426) @@ -0,0 +1,68 @@ +package org.dllearner.utilities; + +import java.util.ArrayList; +import java.util.List; + +import net.didion.jwnl.JWNL; +import net.didion.jwnl.JWNLException; +import net.didion.jwnl.data.IndexWord; +import net.didion.jwnl.data.POS; +import net.didion.jwnl.data.PointerTarget; +import net.didion.jwnl.data.PointerType; +import net.didion.jwnl.data.Synset; +import net.didion.jwnl.data.Word; +import net.didion.jwnl.dictionary.Dictionary; + +public class WordnetSimilarity { + + public Dictionary dict; + + public WordnetSimilarity(){ + try { + JWNL.initialize(this.getClass().getClassLoader().getResourceAsStream("wordnet_properties.xml")); + dict = Dictionary.getInstance(); + } catch (JWNLException e) { + e.printStackTrace(); + } + } + + public double computeSimilarity(String s1, String s2, POS pos){ + List<String> synonyms = new ArrayList<String>(); + + try { + IndexWord iw = dict.getIndexWord(pos, s1);//dict.getMorphologicalProcessor().lookupBaseForm(pos, s) +// IndexWord iw = dict.getMorphologicalProcessor().lookupBaseForm(pos, s); + if(iw != null){ + Synset[] synsets = iw.getSenses(); + for(Synset s : synsets){ + System.out.println(s); + PointerTarget[] targets = s.getTargets(PointerType.HYPERNYM); + for (PointerTarget target : targets) + { + Word[] words = ((Synset) target).getWords(); + for (Word word : words) + { + System.out.println(word); + } + } + } +// Word[] words = synsets[0].getWords(); +// for(Word w : words){ +// String c = w.getLemma(); +// System.out.println(c); +// } + } + + } catch (JWNLException e) { + e.printStackTrace(); + } + + + return -1; + } + + public static void main(String[] args) { + System.out.println(new WordnetSimilarity().computeSimilarity("writer", "teacher", POS.NOUN)); + } + +} Added: trunk/components-core/src/main/resources/wordnet_properties.xml =================================================================== --- trunk/components-core/src/main/resources/wordnet_properties.xml (rev 0) +++ trunk/components-core/src/main/resources/wordnet_properties.xml 2011-11-22 14:39:14 UTC (rev 3426) @@ -0,0 +1,45 @@ +<?xml version="1.0" encoding="UTF-8"?> +<jwnl_properties language="en"> + <version publisher="Princeton" number="3.0" language="en"/> + <dictionary class="net.didion.jwnl.dictionary.FileBackedDictionary"> + <param name="morphological_processor" value="net.didion.jwnl.dictionary.morph.DefaultMorphologicalProcessor"> + <param name="operations"> + <param value="net.didion.jwnl.dictionary.morph.LookupExceptionsOperation"/> + <param value="net.didion.jwnl.dictionary.morph.DetachSuffixesOperation"> + <param name="noun" value="|s=|ses=s|xes=x|zes=z|ches=ch|shes=sh|men=man|ies=y|"/> + <param name="verb" value="|s=|ies=y|es=e|es=|ed=e|ed=|ing=e|ing=|"/> + <param name="adjective" value="|er=|est=|er=e|est=e|"/> + <param name="operations"> + <param value="net.didion.jwnl.dictionary.morph.LookupIndexWordOperation"/> + <param value="net.didion.jwnl.dictionary.morph.LookupExceptionsOperation"/> + </param> + </param> + <param value="net.didion.jwnl.dictionary.morph.TokenizerOperation"> + <param name="delimiters"> + <param value=" "/> + <param value="-"/> + </param> + <param name="token_operations"> + <param value="net.didion.jwnl.dictionary.morph.LookupIndexWordOperation"/> + <param value="net.didion.jwnl.dictionary.morph.LookupExceptionsOperation"/> + <param value="net.didion.jwnl.dictionary.morph.DetachSuffixesOperation"> + <param name="noun" value="|s=|ses=s|xes=x|zes=z|ches=ch|shes=sh|men=man|ies=y|"/> + <param name="verb" value="|s=|ies=y|es=e|es=|ed=e|ed=|ing=e|ing=|"/> + <param name="adjective" value="|er=|est=|er=e|est=e|"/> + <param name="operations"> + <param value="net.didion.jwnl.dictionary.morph.LookupIndexWordOperation"/> + <param value="net.didion.jwnl.dictionary.morph.LookupExceptionsOperation"/> + </param> + </param> + </param> + </param> + </param> + </param> + <param name="dictionary_element_factory" value="net.didion.jwnl.princeton.data.PrincetonWN17FileDictionaryElementFactory"/> + <param name="file_manager" value="net.didion.jwnl.dictionary.file_manager.FileManagerImpl"> + <param name="file_type" value="net.didion.jwnl.princeton.file.PrincetonRandomAccessDictionaryFile"/> + <param name="dictionary_path" value="/opt/wordnet/dict"/> + </param> + </dictionary> + <resource class="PrincetonResource"/> +</jwnl_properties> \ No newline at end of file This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |