From: <and...@us...> - 2014-02-11 16:48:05
|
Revision: 4221 http://sourceforge.net/p/dl-learner/code/4221 Author: andremelo Date: 2014-02-11 16:48:02 +0000 (Tue, 11 Feb 2014) Log Message: ----------- Adding relevance measures from http://www.cse.iitb.ac.in/~damani/papers/conll13.pdf Added Paths: ----------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/ChiSquareRelevanceMetric.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/DiceRelevanceMetric.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/JaccardRelevanceMetric.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/LLRRelevanceMetric.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/SCIRelevanceMetric.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/SignificantPMIRelevanceMetric.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/TTestRelevanceMetric.java trunk/components-core/src/test/java/org/dllearner/algorithms/isle/metrics/RelevanceMetricsTest.java Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/ChiSquareRelevanceMetric.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/ChiSquareRelevanceMetric.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/ChiSquareRelevanceMetric.java 2014-02-11 16:48:02 UTC (rev 4221) @@ -0,0 +1,52 @@ +/** + * + */ +package org.dllearner.algorithms.isle.metrics; + +import org.dllearner.algorithms.isle.index.Index; +import org.dllearner.core.owl.Entity; + +/** + * Chi Squared + * @author Andre Melo + * + */ +public class ChiSquareRelevanceMetric extends AbstractRelevanceMetric { + + public ChiSquareRelevanceMetric(Index index) { + super(index); + } + + private double chiSquareIteration(double fXY, double e_fXY) { + return Math.pow(fXY - e_fXY,2)/e_fXY; + } + + @Override + public synchronized double getRelevance(Entity entityA, Entity entityB){ + double fA = index.getNumberOfDocumentsFor(entityA); + double fB = index.getNumberOfDocumentsFor(entityB); + double fAB = index.getNumberOfDocumentsFor(entityA, entityB); + double N = index.getTotalNumberOfDocuments(); + + double e_fAB = fA*fB/N; // Expected frequency of A and B assuming independence + + double chi2 = 0; + + // X=A and Y=B + chi2 += chiSquareIteration(fAB, e_fAB); + // X=A and Y=not B + chi2 += chiSquareIteration(fA-fAB, fA-e_fAB); + // X=not A and Y=B + chi2 += chiSquareIteration(fB-fAB, fB-e_fAB); + // X=not A and Y=not B + chi2 += chiSquareIteration(N-fA-fB+fAB, N-fA-fB+e_fAB); + + return chi2; + } + + @Override + public synchronized double getNormalizedRelevance(Entity entityA, Entity entityB){ + return Double.NaN; + } + +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/DiceRelevanceMetric.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/DiceRelevanceMetric.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/DiceRelevanceMetric.java 2014-02-11 16:48:02 UTC (rev 4221) @@ -0,0 +1,33 @@ +package org.dllearner.algorithms.isle.metrics; + +import org.dllearner.algorithms.isle.index.Index; +import org.dllearner.core.owl.Entity; + +/** + * @author Andre Melo + * + */ +public class DiceRelevanceMetric extends AbstractRelevanceMetric{ + + public DiceRelevanceMetric(Index index) { + super(index); + } + + @Override + public double getRelevance(Entity entityA, Entity entityB) { + double nrOfDocumentsA = index.getNumberOfDocumentsFor(entityA); + double nrOfDocumentsB = index.getNumberOfDocumentsFor(entityB); + double nrOfDocumentsAandB = index.getNumberOfDocumentsFor(entityA, entityB); + + double dice = 2 * nrOfDocumentsAandB / (nrOfDocumentsA + nrOfDocumentsB); + + return dice; + } + + @Override + public double getNormalizedRelevance(Entity entity1, Entity entity2) { + return getRelevance(entity1, entity2); + } + + +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/JaccardRelevanceMetric.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/JaccardRelevanceMetric.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/JaccardRelevanceMetric.java 2014-02-11 16:48:02 UTC (rev 4221) @@ -0,0 +1,37 @@ +package org.dllearner.algorithms.isle.metrics; + +import org.dllearner.algorithms.isle.index.Index; +import org.dllearner.core.owl.Entity; + +/** + * @author Andre Melo + * + */ +public class JaccardRelevanceMetric extends AbstractRelevanceMetric{ + + public JaccardRelevanceMetric(Index index) { + super(index); + } + + @Override + public double getRelevance(Entity entityA, Entity entityB) { + long nrOfDocumentsA = index.getNumberOfDocumentsFor(entityA); + long nrOfDocumentsB = index.getNumberOfDocumentsFor(entityB); + double nrOfDocumentsAandB = index.getNumberOfDocumentsFor(entityA, entityB); + double nrOfDocumentsAorB = nrOfDocumentsA + nrOfDocumentsB - nrOfDocumentsAandB; + + if (nrOfDocumentsAorB==0) + return 1.0; + + double jaccard = nrOfDocumentsAandB / nrOfDocumentsAorB; + + return jaccard; + } + + @Override + public double getNormalizedRelevance(Entity entity1, Entity entity2) { + return getRelevance(entity1, entity2); + } + + +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/LLRRelevanceMetric.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/LLRRelevanceMetric.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/LLRRelevanceMetric.java 2014-02-11 16:48:02 UTC (rev 4221) @@ -0,0 +1,54 @@ +/** + * + */ +package org.dllearner.algorithms.isle.metrics; + +import org.dllearner.algorithms.isle.index.Index; +import org.dllearner.core.owl.Entity; + +/** + * Log Likelihood Ratio + * @author Andre Melo + * + */ +public class LLRRelevanceMetric extends AbstractRelevanceMetric { + + public LLRRelevanceMetric(Index index) { + super(index); + } + + private double llrIteration(double pXY, double pX, double pY) { + return pXY * Math.log(pXY/(pX*pY)); + } + + @Override + public synchronized double getRelevance(Entity entityA, Entity entityB){ + double fA = index.getNumberOfDocumentsFor(entityA); + double fB = index.getNumberOfDocumentsFor(entityB); + double fAB = index.getNumberOfDocumentsFor(entityA, entityB); + double N = index.getTotalNumberOfDocuments(); + + double pA = fA/N; + double pB = fB/N; + double pAB = fAB/N; + + double llr = 0; + + // X=A and Y=B + llr += llrIteration( pAB, pA, pB ); + // X=A and Y=not B + llr += llrIteration( pA-pAB, pA, 1-pB ); + // X=not A and Y=B + llr += llrIteration( pB-pAB, 1-pA, pB ); + // X=not A and Y=not B + llr += llrIteration( 1-pA-pB+pAB,1-pA, 1-pB ); + + return llr; + } + + @Override + public synchronized double getNormalizedRelevance(Entity entityA, Entity entityB){ + return Double.NaN; + } + +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/SCIRelevanceMetric.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/SCIRelevanceMetric.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/SCIRelevanceMetric.java 2014-02-11 16:48:02 UTC (rev 4221) @@ -0,0 +1,42 @@ +/** + * + */ +package org.dllearner.algorithms.isle.metrics; + +import org.dllearner.algorithms.isle.index.Index; +import org.dllearner.core.owl.Entity; + +/** + * @author Andre Melo + * + */ +public class SCIRelevanceMetric extends AbstractRelevanceMetric { + + public SCIRelevanceMetric(Index index) { + super(index); + } + + @Override + public synchronized double getRelevance(Entity entityA, Entity entityB){ + long nrOfDocumentsA = index.getNumberOfDocumentsFor(entityA); + long nrOfDocumentsB = index.getNumberOfDocumentsFor(entityB); + long nrOfDocumentsAB = index.getNumberOfDocumentsFor(entityA, entityB); + + long nrOfDocuments = index.getTotalNumberOfDocuments(); + + double pA = nrOfDocuments == 0 ? 0 : ((double) nrOfDocumentsA / (double) nrOfDocuments); + double pB = nrOfDocuments == 0 ? 0 : ((double) nrOfDocumentsB / (double) nrOfDocuments); + double pAB = nrOfDocuments == 0 ? 0 : ((double) nrOfDocumentsAB / (double) nrOfDocuments); + + double sci = pAB / (pA * Math.sqrt(pB)); + + return sci; + } + + @Override + public synchronized double getNormalizedRelevance(Entity entityA, Entity entityB){ + //TODO + return Double.NaN; + } + +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/SignificantPMIRelevanceMetric.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/SignificantPMIRelevanceMetric.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/SignificantPMIRelevanceMetric.java 2014-02-11 16:48:02 UTC (rev 4221) @@ -0,0 +1,48 @@ +/** + * + */ +package org.dllearner.algorithms.isle.metrics; + +import org.dllearner.algorithms.isle.index.Index; +import org.dllearner.core.owl.Entity; + +/** + * @author Lorenz Buehmann + * + */ +public class SignificantPMIRelevanceMetric extends AbstractRelevanceMetric { + + protected final double delta; + + /** + * + * @param index: semantic index + * @param delta: parameter varying from 0 to 1 + */ + public SignificantPMIRelevanceMetric(Index index, double delta) { + super(index); + if (delta<0 ||delta>1) + throw new IllegalArgumentException("Delta parameter should be in [0,1]"); + this.delta = delta; + } + + @Override + public synchronized double getRelevance(Entity entityA, Entity entityB){ + double fA = index.getNumberOfDocumentsFor(entityA); + double fB = index.getNumberOfDocumentsFor(entityB); + double fAB = index.getNumberOfDocumentsFor(entityA, entityB); + + double N = index.getTotalNumberOfDocuments(); + + double pmi = Math.log(fAB / (fA*fB/N + Math.sqrt(fA)*Math.sqrt(Math.log(delta)/-2))); + + return pmi; + } + + @Override + public synchronized double getNormalizedRelevance(Entity entityA, Entity entityB){ + //TODO + return Double.NaN; + } + +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/TTestRelevanceMetric.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/TTestRelevanceMetric.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/TTestRelevanceMetric.java 2014-02-11 16:48:02 UTC (rev 4221) @@ -0,0 +1,39 @@ +/** + * + */ +package org.dllearner.algorithms.isle.metrics; + +import org.dllearner.algorithms.isle.index.Index; +import org.dllearner.core.owl.Entity; + +/** + * @author Andre Melo + * + */ +public class TTestRelevanceMetric extends AbstractRelevanceMetric { + + public TTestRelevanceMetric(Index index) { + super(index); + } + + @Override + public synchronized double getRelevance(Entity entityA, Entity entityB){ + double nrOfDocumentsA = index.getNumberOfDocumentsFor(entityA); + double nrOfDocumentsB = index.getNumberOfDocumentsFor(entityB); + double nrOfDocumentsAB = index.getNumberOfDocumentsFor(entityA, entityB); + + double nrOfDocuments = index.getTotalNumberOfDocuments(); + + double ttest = (nrOfDocumentsAB - (nrOfDocumentsA*nrOfDocumentsB)/nrOfDocuments) / + Math.sqrt(nrOfDocumentsAB*(1-nrOfDocumentsAB/nrOfDocuments)); + + return ttest; + } + + @Override + public synchronized double getNormalizedRelevance(Entity entityA, Entity entityB){ + //TODO + return Double.NaN; + } + +} Added: trunk/components-core/src/test/java/org/dllearner/algorithms/isle/metrics/RelevanceMetricsTest.java =================================================================== --- trunk/components-core/src/test/java/org/dllearner/algorithms/isle/metrics/RelevanceMetricsTest.java (rev 0) +++ trunk/components-core/src/test/java/org/dllearner/algorithms/isle/metrics/RelevanceMetricsTest.java 2014-02-11 16:48:02 UTC (rev 4221) @@ -0,0 +1,140 @@ +/** + * + */ +package org.dllearner.algorithms.isle.metrics; + +import static org.junit.Assert.fail; + +import java.io.BufferedInputStream; +import java.io.InputStream; +import java.net.URL; + +import org.apache.commons.compress.compressors.CompressorInputStream; +import org.apache.commons.compress.compressors.CompressorStreamFactory; +import org.dllearner.algorithms.isle.index.Index; +import org.dllearner.algorithms.isle.index.syntactic.SolrSyntacticIndex; +import org.dllearner.core.owl.Entity; +import org.dllearner.core.owl.NamedClass; +import org.dllearner.core.owl.ObjectProperty; +import org.junit.Test; +import org.semanticweb.owlapi.apibinding.OWLManager; +import org.semanticweb.owlapi.model.OWLOntology; + +/** + * @author Lorenz Buehmann + * + */ +public class RelevanceMetricsTest { + + AbstractRelevanceMetric metric; + Index index; + static final String solrServerURL = "http://solr.aksw.org/en_dbpedia_resources/"; + static final String searchField = "comment"; + static final String DBPEDIA_NS = "http://dbpedia.org/ontology/"; + + /** + * + */ + public RelevanceMetricsTest() { + OWLOntology ontology = null; + try { + URL url = new URL("http://downloads.dbpedia.org/3.9/dbpedia_3.9.owl.bz2"); + InputStream is = new BufferedInputStream(url.openStream()); + CompressorInputStream in = new CompressorStreamFactory().createCompressorInputStream("bzip2", is); + ontology = OWLManager.createOWLOntologyManager().loadOntologyFromOntologyDocument(in); + } catch (Exception e){ + e.printStackTrace(); + } + index = new SolrSyntacticIndex(ontology, solrServerURL, searchField); + + } + + private void computeRelevanceScores(AbstractRelevanceMetric metric) { + //dbo:Person and dbo:Film + Entity entity1 = new NamedClass(DBPEDIA_NS + "Person"); + Entity entity2 = new NamedClass(DBPEDIA_NS + "Film"); + double relevance = metric.getRelevance(entity1, entity2); + System.out.println(relevance); + + //dbo:Person and dbo:Animal + entity1 = new NamedClass(DBPEDIA_NS + "Person"); + entity2 = new NamedClass(DBPEDIA_NS + "Animal"); + relevance = metric.getRelevance(entity1, entity2); + System.out.println(relevance); + + // dbo:Person and dbo:Animal + entity1 = new NamedClass(DBPEDIA_NS + "Person"); + entity2 = new ObjectProperty(DBPEDIA_NS + "birthPlace"); + relevance = metric.getRelevance(entity1, entity2); + System.out.println(relevance); + } + + /** + * Test method for {@link org.dllearner.algorithms.isle.metrics.PMIRelevanceMetric#getRelevance(org.dllearner.core.owl.Entity, org.dllearner.core.owl.Entity)}. + */ + @Test + public void testGetRelevanceJaccard() { + System.out.println("JACCARD: "); + metric = new JaccardRelevanceMetric(index); + computeRelevanceScores(metric); + } + + @Test + public void testGetRelevancePMI() { + System.out.println("PMI: "); + metric = new PMIRelevanceMetric(index); + computeRelevanceScores(metric); + } + + @Test + public void testGetRelevanceSignificantPMI() { + System.out.println("SignificantPMI: "); + double delta = 0.5; + metric = new SignificantPMIRelevanceMetric(index,delta); + computeRelevanceScores(metric); + } + + @Test + public void testGetRelevanceDice() { + System.out.println("DICE: "); + metric = new DiceRelevanceMetric(index); + computeRelevanceScores(metric); + } + + @Test + public void testGetRelevanceSCI() { + System.out.println("SCI: "); + metric = new SCIRelevanceMetric(index); + computeRelevanceScores(metric); + } + + @Test + public void testGetRelevanceTTest() { + System.out.println("T-TEST: "); + metric = new TTestRelevanceMetric(index); + computeRelevanceScores(metric); + } + + @Test + public void testGetRelevanceChiSquared() { + System.out.println("CHI^2: "); + metric = new ChiSquareRelevanceMetric(index); + computeRelevanceScores(metric); + } + + @Test + public void testGetRelevanceLLR() { + System.out.println("LLR: "); + metric = new LLRRelevanceMetric(index); + computeRelevanceScores(metric); + } + + /** + * Test method for {@link org.dllearner.algorithms.isle.metrics.PMIRelevanceMetric#getNormalizedRelevance(org.dllearner.core.owl.Entity, org.dllearner.core.owl.Entity)}. + */ + @Test + public void testGetNormalizedRelevance() { + fail("Not yet implemented"); + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |