From: <and...@us...> - 2013-09-04 09:38:10
|
Revision: 4048 http://sourceforge.net/p/dl-learner/code/4048 Author: andremelo Date: 2013-09-04 09:38:06 +0000 (Wed, 04 Sep 2013) Log Message: ----------- Adding getNormalizedRelvance, which returns a value in [0,1] Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/PMIRelevanceMetric.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/RelevanceMetric.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/PMIRelevanceMetric.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/PMIRelevanceMetric.java 2013-09-04 09:18:50 UTC (rev 4047) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/PMIRelevanceMetric.java 2013-09-04 09:38:06 UTC (rev 4048) @@ -30,9 +30,25 @@ double dPClass = nrOfDocuments == 0 ? 0 : ((double) documentsA.size() / (double) nrOfDocuments); double dPClassEntity = documentsB.size() == 0 ? 0 : (double) documentsAB.size() / (double) documentsB.size(); - double pmi = Math.log(dPClassEntity / dPClass); + double pmi = Math.log(dPClassEntity / dPClass); return pmi; } + + @Override + public double getNormalizedRelevance(Entity entityA, Entity entityB){ + Set<AnnotatedDocument> documentsA = index.getDocuments(entityA); + Set<AnnotatedDocument> documentsB = index.getDocuments(entityB); + Set<AnnotatedDocument> documentsAB = Sets.intersection(documentsA, documentsB); + int nrOfDocuments = index.getSize(); + + double dPClass = nrOfDocuments == 0 ? 0 : ((double) documentsA.size() / (double) nrOfDocuments); + double dPClassEntity = documentsB.size() == 0 ? 0 : (double) documentsAB.size() / (double) documentsB.size(); + double pmi = Math.log(dPClassEntity / dPClass); + + double normalizedPMI = (pmi/-Math.log(dPClassEntity) + 1)/2; + + return normalizedPMI; + } } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/RelevanceMetric.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/RelevanceMetric.java 2013-09-04 09:18:50 UTC (rev 4047) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/RelevanceMetric.java 2013-09-04 09:38:06 UTC (rev 4048) @@ -30,4 +30,12 @@ * @return */ double getRelevance(Entity entity1, Entity entity2); + + /** + * Get normalized relevance value in [0,1] * + * @param entity1 + * @param entity2 + * @return + */ + double getNormalizedRelevance(Entity entity1, Entity entity2); } \ No newline at end of file This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <and...@us...> - 2014-02-13 11:47:58
|
Revision: 4223 http://sourceforge.net/p/dl-learner/code/4223 Author: andremelo Date: 2014-02-13 11:47:52 +0000 (Thu, 13 Feb 2014) Log Message: ----------- Preventing NaN values for the relevance measures Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/ChiSquareRelevanceMetric.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/DiceRelevanceMetric.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/JaccardRelevanceMetric.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/LLRRelevanceMetric.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/SCIRelevanceMetric.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/TTestRelevanceMetric.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/ChiSquareRelevanceMetric.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/ChiSquareRelevanceMetric.java 2014-02-12 13:58:05 UTC (rev 4222) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/ChiSquareRelevanceMetric.java 2014-02-13 11:47:52 UTC (rev 4223) @@ -18,16 +18,20 @@ } private double chiSquareIteration(double fXY, double e_fXY) { - return Math.pow(fXY - e_fXY,2)/e_fXY; + return Math.pow(fXY - e_fXY, 2)/e_fXY; } @Override public synchronized double getRelevance(Entity entityA, Entity entityB){ double fA = index.getNumberOfDocumentsFor(entityA); double fB = index.getNumberOfDocumentsFor(entityB); - double fAB = index.getNumberOfDocumentsFor(entityA, entityB); double N = index.getTotalNumberOfDocuments(); + if (N==0 || fA==0 || fB==0) + return 0; + + double fAB = index.getNumberOfDocumentsFor(entityA, entityB); + double e_fAB = fA*fB/N; // Expected frequency of A and B assuming independence double chi2 = 0; Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/DiceRelevanceMetric.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/DiceRelevanceMetric.java 2014-02-12 13:58:05 UTC (rev 4222) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/DiceRelevanceMetric.java 2014-02-13 11:47:52 UTC (rev 4223) @@ -17,6 +17,10 @@ public double getRelevance(Entity entityA, Entity entityB) { double nrOfDocumentsA = index.getNumberOfDocumentsFor(entityA); double nrOfDocumentsB = index.getNumberOfDocumentsFor(entityB); + + if (nrOfDocumentsA==0 || nrOfDocumentsB==0) + return 0; + double nrOfDocumentsAandB = index.getNumberOfDocumentsFor(entityA, entityB); double dice = 2 * nrOfDocumentsAandB / (nrOfDocumentsA + nrOfDocumentsB); Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/JaccardRelevanceMetric.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/JaccardRelevanceMetric.java 2014-02-12 13:58:05 UTC (rev 4222) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/JaccardRelevanceMetric.java 2014-02-13 11:47:52 UTC (rev 4223) @@ -17,12 +17,13 @@ public double getRelevance(Entity entityA, Entity entityB) { long nrOfDocumentsA = index.getNumberOfDocumentsFor(entityA); long nrOfDocumentsB = index.getNumberOfDocumentsFor(entityB); + + if (nrOfDocumentsA==0 || nrOfDocumentsB==0) + return 0; + double nrOfDocumentsAandB = index.getNumberOfDocumentsFor(entityA, entityB); double nrOfDocumentsAorB = nrOfDocumentsA + nrOfDocumentsB - nrOfDocumentsAandB; - if (nrOfDocumentsAorB==0) - return 1.0; - double jaccard = nrOfDocumentsAandB / nrOfDocumentsAorB; return jaccard; Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/LLRRelevanceMetric.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/LLRRelevanceMetric.java 2014-02-12 13:58:05 UTC (rev 4222) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/LLRRelevanceMetric.java 2014-02-13 11:47:52 UTC (rev 4223) @@ -18,6 +18,8 @@ } private double llrIteration(double pXY, double pX, double pY) { + if (pXY==0 || pX==0 || pY==0) + return 0; return pXY * Math.log(pXY/(pX*pY)); } @@ -25,9 +27,14 @@ public synchronized double getRelevance(Entity entityA, Entity entityB){ double fA = index.getNumberOfDocumentsFor(entityA); double fB = index.getNumberOfDocumentsFor(entityB); + double N = index.getTotalNumberOfDocuments(); double fAB = index.getNumberOfDocumentsFor(entityA, entityB); - double N = index.getTotalNumberOfDocuments(); + if (N==0 || fA==0 || fB==0) + return 0; + + + double pA = fA/N; double pB = fB/N; double pAB = fAB/N; Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/SCIRelevanceMetric.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/SCIRelevanceMetric.java 2014-02-12 13:58:05 UTC (rev 4222) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/SCIRelevanceMetric.java 2014-02-13 11:47:52 UTC (rev 4223) @@ -18,16 +18,19 @@ @Override public synchronized double getRelevance(Entity entityA, Entity entityB){ - long nrOfDocumentsA = index.getNumberOfDocumentsFor(entityA); - long nrOfDocumentsB = index.getNumberOfDocumentsFor(entityB); - long nrOfDocumentsAB = index.getNumberOfDocumentsFor(entityA, entityB); + double fA = index.getNumberOfDocumentsFor(entityA); + double fB = index.getNumberOfDocumentsFor(entityB); + double fAB = index.getNumberOfDocumentsFor(entityA, entityB); + double N = index.getTotalNumberOfDocuments(); - long nrOfDocuments = index.getTotalNumberOfDocuments(); + if (fA==0 || fB==0 || fAB==0) + return 0; - double pA = nrOfDocuments == 0 ? 0 : ((double) nrOfDocumentsA / (double) nrOfDocuments); - double pB = nrOfDocuments == 0 ? 0 : ((double) nrOfDocumentsB / (double) nrOfDocuments); - double pAB = nrOfDocuments == 0 ? 0 : ((double) nrOfDocumentsAB / (double) nrOfDocuments); + double pA = fA / N; + double pB = fB / N; + double pAB = fAB / N; + double sci = pAB / (pA * Math.sqrt(pB)); return sci; Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/TTestRelevanceMetric.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/TTestRelevanceMetric.java 2014-02-12 13:58:05 UTC (rev 4222) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/TTestRelevanceMetric.java 2014-02-13 11:47:52 UTC (rev 4223) @@ -23,7 +23,10 @@ double nrOfDocumentsAB = index.getNumberOfDocumentsFor(entityA, entityB); double nrOfDocuments = index.getTotalNumberOfDocuments(); - + + if (nrOfDocumentsA==0 || nrOfDocumentsB==0 || nrOfDocumentsAB==0) + return 0; + double ttest = (nrOfDocumentsAB - (nrOfDocumentsA*nrOfDocumentsB)/nrOfDocuments) / Math.sqrt(nrOfDocumentsAB*(1-nrOfDocumentsAB/nrOfDocuments)); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |