From: <lor...@us...> - 2013-07-17 11:44:44
|
Revision: 4021 http://sourceforge.net/p/dl-learner/code/4021 Author: lorenz_b Date: 2013-07-17 11:44:41 +0000 (Wed, 17 Jul 2013) Log Message: ----------- Refactored ISLE components. Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityExtraction.java Added Paths: ----------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LuceneSyntacticIndex.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/OWLOntologyLuceneSyntacticIndexCreator.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticIndex.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticIndexCreator.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleSemanticIndex.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SyntacticIndex.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/AbstractRelevanceMetric.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/PMIRelevanceMetric.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/RelevanceMetric.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/RelevanceUtils.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/AnnotationEntityTextRetriever.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/EntityTextRetriever.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/RDFSCommentEntityTextRetriever.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/RDFSLabelEntityTextRetriever.java Removed Paths: ------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/AnnotationEntityTextRetriever.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityTextRetriever.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneBasedRelevance.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneDocument.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneIndexer.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneSearcher.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/OWLOntologyLuceneIndex.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/PMILuceneBasedRelevance.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/RDFSCommentEntityTextRetriever.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/RDFSLabelEntityTextRetriever.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/Relevance.java Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/AnnotationEntityTextRetriever.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/AnnotationEntityTextRetriever.java 2013-07-16 05:25:41 UTC (rev 4020) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/AnnotationEntityTextRetriever.java 2013-07-17 11:44:41 UTC (rev 4021) @@ -1,93 +0,0 @@ -/** - * - */ -package org.dllearner.algorithms.isle; - -import java.util.HashMap; -import java.util.Map; -import java.util.Set; - -import org.dllearner.core.owl.Entity; -import org.dllearner.kb.OWLAPIOntology; -import org.dllearner.utilities.owl.OWLAPIConverter; -import org.semanticweb.owlapi.model.IRI; -import org.semanticweb.owlapi.model.OWLAnnotation; -import org.semanticweb.owlapi.model.OWLAnnotationProperty; -import org.semanticweb.owlapi.model.OWLEntity; -import org.semanticweb.owlapi.model.OWLLiteral; -import org.semanticweb.owlapi.model.OWLOntology; -import org.semanticweb.owlapi.model.OWLOntologyManager; -import org.semanticweb.owlapi.util.IRIShortFormProvider; -import org.semanticweb.owlapi.util.SimpleIRIShortFormProvider; - - -/** - * @author Lorenz Buehmann - * - */ -public class AnnotationEntityTextRetriever implements EntityTextRetriever{ - - private OWLOntology ontology; - private OWLOntologyManager manager; - - private String language = "en"; - private double weight = 1d; - - private boolean useShortFormFallback = true; - private IRIShortFormProvider sfp = new SimpleIRIShortFormProvider(); - - private OWLAnnotationProperty[] properties; - - public AnnotationEntityTextRetriever(OWLOntology ontology, OWLAnnotationProperty... properties) { - this.ontology = ontology; - this.properties = properties; - } - - public AnnotationEntityTextRetriever(OWLAPIOntology ontology, OWLAnnotationProperty... properties) { - this.ontology = ontology.createOWLOntology(manager); - } - - /** - * @param language the language to set - */ - public void setLanguage(String language) { - this.language = language; - } - - /** - * Whether to use the short form of the IRI as fallback, if no label is given. - * @param useShortFormFallback the useShortFormFallback to set - */ - public void setUseShortFormFallback(boolean useShortFormFallback) { - this.useShortFormFallback = useShortFormFallback; - } - - /* (non-Javadoc) - * @see org.dllearner.algorithms.isle.EntityTextRetriever#getRelevantText(org.dllearner.core.owl.Entity) - */ - @Override - public Map<String, Double> getRelevantText(Entity entity) { - Map<String, Double> textWithWeight = new HashMap<String, Double>(); - - OWLEntity e = OWLAPIConverter.getOWLAPIEntity(entity); - - for (OWLAnnotationProperty property : properties) { - Set<OWLAnnotation> annotations = e.getAnnotations(ontology, property); - for (OWLAnnotation annotation : annotations) { - if (annotation.getValue() instanceof OWLLiteral) { - OWLLiteral val = (OWLLiteral) annotation.getValue(); - if (val.hasLang(language)) { - String label = val.getLiteral(); - textWithWeight.put(label, weight); - } - } - } - } - - if(textWithWeight.isEmpty() && useShortFormFallback){ - textWithWeight.put(sfp.getShortForm(IRI.create(entity.getURI())), weight); - } - - return textWithWeight; - } -} Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityExtraction.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityExtraction.java 2013-07-16 05:25:41 UTC (rev 4020) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityExtraction.java 2013-07-17 11:44:41 UTC (rev 4021) @@ -4,6 +4,7 @@ package org.dllearner.algorithms.isle; import java.util.Map; +import java.util.Set; import org.dllearner.core.owl.Entity; @@ -17,7 +18,7 @@ * Extracts all entities contained in the working text with some confidence value. * @return */ - Map<Entity, Double> extractEntities(); + Map<Entity, Set<String>> extractEntities(); /** * Extracts all entities of the given <code>type</code> contained in the working text with some confidence value. Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityTextRetriever.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityTextRetriever.java 2013-07-16 05:25:41 UTC (rev 4020) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityTextRetriever.java 2013-07-17 11:44:41 UTC (rev 4021) @@ -1,48 +0,0 @@ -/** - * Copyright (C) 2007-2011, Jens Lehmann - * - * This file is part of DL-Learner. - * - * DL-Learner is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3 of the License, or - * (at your option) any later version. - * - * DL-Learner is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -package org.dllearner.algorithms.isle; - -import java.util.Map; - -import org.dllearner.core.owl.Entity; - -/** - * Interface for methods, which retrieve relevant texts given an entity - * in an ontology. An entity text retriever can do simple operations such - * as converting the URI into text or retrieving an rdfs:label, but could - * also search web pages for textual explanations of an entity. - * - * @author Jens Lehmann - * - */ -public interface EntityTextRetriever { - - /** - * The method retrieves a string or a set of strings, which is weighted by - * importance with respect to the entity. For instance, an rdfs:label of - * an entity can be given more weight than an rdfs:comment, which in turn - * can be more important than a description retrieved from a web page. - * - * @param entity The entity to handle. - * @return A weighted set of strings. For a value x, we need to have 0 <= x <= 1. - */ - public Map<String, Double> getRelevantText(Entity entity); - -} Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneBasedRelevance.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneBasedRelevance.java 2013-07-16 05:25:41 UTC (rev 4020) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneBasedRelevance.java 2013-07-17 11:44:41 UTC (rev 4021) @@ -1,145 +0,0 @@ -/** - * Copyright (C) 2007-2011, Jens Lehmann - * - * This file is part of DL-Learner. - * - * DL-Learner is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3 of the License, or - * (at your option) any later version. - * - * DL-Learner is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - - -package org.dllearner.algorithms.isle; - -import java.util.HashMap; -import java.util.HashSet; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Set; - -import org.dllearner.core.owl.Entity; -import org.dllearner.utilities.owl.OWLAPIConverter; -import org.semanticweb.owlapi.model.OWLEntity; -import org.semanticweb.owlapi.model.OWLOntology; - - -public abstract class LuceneBasedRelevance implements Relevance{ - - private EntityTextRetriever textRetriever; - private LuceneSearcher searcher; - private OWLOntology ontology; - private Set<OWLEntity> entities; - -// public void printScores() throws Exception { -// for( OWLClass c: m_classes ) -// { -// Map<OWLEntity,Double> hmEntity2Score = getEntityRelevance(c); -// // normalization per class? -// hmEntity2Score = normalize( hmEntity2Score ); -// for( OWLEntity e : hmEntity2Score.keySet() ) -// { -// double dScore = hmEntity2Score.get(e); -// System.out.println( "P( "+ getLabel(c) +", "+ getLabel(e) +" ) = "+ dScore ); -// } -// } -// m_searcher.close(); -// } - - public LuceneBasedRelevance(OWLOntology ontology, LuceneSearcher searcher, EntityTextRetriever textRetriever) { - this.searcher = searcher; - this.ontology = ontology; - this.textRetriever = textRetriever; - - entities = new HashSet<OWLEntity>(); - entities.addAll(ontology.getClassesInSignature()); - entities.addAll(ontology.getObjectPropertiesInSignature()); - entities.addAll(ontology.getDataPropertiesInSignature()); - } - - public Map<OWLEntity,Double> normalizeMinMax( Map<OWLEntity,Double> hmEntity2Score ){ - Map<OWLEntity,Double> hmEntity2Norm = new HashMap<OWLEntity,Double>(); - double dMin = Double.MAX_VALUE; - Double dMax = Double.MIN_VALUE; - for( OWLEntity e : hmEntity2Score.keySet() ) - { - double dValue = hmEntity2Score.get(e); - if( dValue < dMin ){ - dMin = dValue; - } - else if( dValue > dMax ){ - dMax = dValue; - } - } - // System.out.println( "min="+ dMin +" max="+ dMax ); - for( OWLEntity e : hmEntity2Score.keySet() ) - { - double dValue = hmEntity2Score.get(e); - double dNorm = 0; - if( dMin == dMax ){ - dNorm = dValue; - } - else { - dNorm = ( dValue - dMin ) / ( dMax - dMin ); - } - hmEntity2Norm.put( e, dNorm ); - } - return hmEntity2Norm; - } - - @Override - public Map<Entity,Double> getEntityRelevance(Entity entity) throws Exception { - // computes relevance of entity for this class - // conditional probability: P(C,E)=f(C,E)/f(E) - // PMI(C,E)=log( P(C,E) / P(C) ) - Map<Entity, Double> hmEntity2Score = new HashMap<Entity, Double>(); - Map<String, Double> relevantText = textRetriever.getRelevantText(entity); - - for (Entry<String, Double> entry : relevantText.entrySet()) { - String text = entry.getKey(); - Double value = entry.getValue(); - - String sClass = text; - int nrOfDocumentsA = searcher.count(sClass); - int nrOfDocuments = searcher.indexSize(); - - for (OWLEntity otherEntity : entities) { - - Map<String, Double> otherRelevantText = textRetriever.getRelevantText(OWLAPIConverter - .getEntity(otherEntity)); - - for (Entry<String, Double> entry2 : otherRelevantText.entrySet()) { - String otherText = entry2.getKey(); - Double otherValue = entry2.getValue(); - - String sEntity = otherText; - int nrOfDocumentsB = searcher.count(sEntity); - int nrOfDocumentsAB = searcher.count(sClass + " AND " + sEntity); - // double dPEntity = (double)iEntity / (double)iAll; - - double score = computeScore(nrOfDocuments, nrOfDocumentsA, nrOfDocumentsB, nrOfDocumentsAB); - if (!Double.isNaN(score)){// && !Double.isInfinite(score)) { - hmEntity2Score.put(OWLAPIConverter.getEntity(otherEntity), score); - } - } - } - } - - return hmEntity2Score; - } - - /** - * Computes the score which is returned in {@link org.dllearner.algorithms.isle.LuceneBasedRelevance#getEntityRelevance} - * @return - */ - public abstract double computeScore(int nrOfDocuments, int nrOfDocumentsA, int nrOfDocumentsB, int nrOfDocumentsAB); - -} \ No newline at end of file Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneDocument.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneDocument.java 2013-07-16 05:25:41 UTC (rev 4020) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneDocument.java 2013-07-17 11:44:41 UTC (rev 4021) @@ -1,43 +0,0 @@ -/** - * Copyright (C) 2007-2011, Jens Lehmann - * - * This file is part of DL-Learner. - * - * DL-Learner is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3 of the License, or - * (at your option) any later version. - * - * DL-Learner is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - - -package org.dllearner.algorithms.isle; - -import java.io.File; -import java.io.FileReader; - -import org.apache.lucene.document.DateTools; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; - - -public class LuceneDocument { - - public static Document Document( File f ) throws java.io.FileNotFoundException { - Document doc = new Document(); - doc.add( new Field( "path", f.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED ) ); - doc.add( new Field( "modified", - DateTools.timeToString(f.lastModified(), DateTools.Resolution.MINUTE), - Field.Store.YES, Field.Index.NOT_ANALYZED)); - doc.add( new Field( "contents", new FileReader(f) ) ); - return doc; - } -} - Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneIndexer.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneIndexer.java 2013-07-16 05:25:41 UTC (rev 4020) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneIndexer.java 2013-07-17 11:44:41 UTC (rev 4021) @@ -1,100 +0,0 @@ -/** - * Copyright (C) 2007-2011, Jens Lehmann - * - * This file is part of DL-Learner. - * - * DL-Learner is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3 of the License, or - * (at your option) any later version. - * - * DL-Learner is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - - -package org.dllearner.algorithms.isle; - -import java.io.File; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.util.Date; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.standard.StandardAnalyzer; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.store.FSDirectory; -import org.apache.lucene.util.Version; - - -public class LuceneIndexer { - - static final File INDEX = new File( "index" ); - - public static void main( String[] args ) { - if( INDEX.exists() ) - { - System.out.println("<delete index!>"); - System.exit(1); - } -// final File docDir = new File( args[0] ); -// LuceneIndexer indexer = new LuceneIndexer( docDir ); - } - - @SuppressWarnings("deprecation") - public LuceneIndexer( File docDir ){ - System.out.println( "LuceneIndex: "+ docDir ); - Date start = new Date(); - try { - - Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43); - IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_43, analyzer); - IndexWriter writer = new IndexWriter( FSDirectory.open( INDEX ), indexWriterConfig); - System.out.println( "Creating index ..." ); - index( writer, docDir ); - System.out.println( "Optimizing index ..." ); - writer.close(); - Date end = new Date(); - System.out.println( end.getTime() - start.getTime() + " total milliseconds" ); - } - catch (IOException e) { - e.printStackTrace(); - } - } - - private void index( IndexWriter writer, File file ) throws IOException { - // System.out.println( "LuceneIndexer.index: "+ file ); - if( file.canRead() ) - { - if( file.isDirectory() ) - { - String[] files = file.list(); - if( files != null ) - { - for( int i = 0; i < files.length; i++ ) { - index( writer, new File( file, files[i] ) ); - } - } - } - else { - // System.out.println( "Indexer.index: adding " + file ); - try { - writer.addDocument( LuceneDocument.Document( file ) ); - } - catch (FileNotFoundException fnfe) { - fnfe.printStackTrace(); - } - } - } - else { - System.out.println( "<cannot read file!>" ); - } - } - -} Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneSearcher.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneSearcher.java 2013-07-16 05:25:41 UTC (rev 4020) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneSearcher.java 2013-07-17 11:44:41 UTC (rev 4021) @@ -1,176 +0,0 @@ -/** - * Copyright (C) 2007-2011, Jens Lehmann - * - * This file is part of DL-Learner. - * - * DL-Learner is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3 of the License, or - * (at your option) any later version. - * - * DL-Learner is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - - -package org.dllearner.algorithms.isle; - -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.standard.StandardAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.DirectoryReader; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.queryparser.classic.QueryParser; -import org.apache.lucene.search.Collector; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.Scorer; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.FSDirectory; -import org.apache.lucene.util.Version; - -public class LuceneSearcher { - - private String INDEX = "/home/me/DBpedia-Lucene-Index"; - private String FIELD = "short-abstract"; - - private IndexReader m_reader = null; - private IndexSearcher m_searcher = null; - private Analyzer m_analyzer = null; - private QueryParser m_parser = null; - - private Map<Document,Float> m_results = null; - - - public static void main( String[] args ) throws Exception { - String sQuery = args[0]; - LuceneSearcher searcher = new LuceneSearcher(); - List<Document> docs = searcher.search( sQuery ); - System.out.println( "\nquery='"+ sQuery +"' all="+ searcher.indexSize() +" hits="+ docs.size() ); -// for( Document doc : docs ) -// { -//// String sDoc = doc.toString(); -// float score = searcher.getScore( doc ); -// System.out.println( "score="+ score +" doc="+ doc ); -// } - } - - @SuppressWarnings("deprecation") - public LuceneSearcher() throws Exception { - m_reader = DirectoryReader.open( FSDirectory.open( new File( INDEX ) )); - m_searcher = new IndexSearcher( m_reader ); - m_analyzer = new StandardAnalyzer( Version.LUCENE_43); - m_parser = new QueryParser( Version.LUCENE_43, FIELD, m_analyzer ); - } - - public LuceneSearcher(IndexReader indexReader) throws Exception { - m_reader = indexReader; - m_searcher = new IndexSearcher( m_reader ); - m_analyzer = new StandardAnalyzer( Version.LUCENE_43); - m_parser = new QueryParser( Version.LUCENE_43, FIELD, m_analyzer ); - } - - public LuceneSearcher(Directory directory, String seachField) throws Exception { - this.FIELD = seachField; - m_reader = DirectoryReader.open(directory); - m_searcher = new IndexSearcher( m_reader ); - m_analyzer = new StandardAnalyzer( Version.LUCENE_43); - m_parser = new QueryParser( Version.LUCENE_43, FIELD, m_analyzer ); - } - - public LuceneSearcher(String indexDirectory) throws Exception { - m_reader = DirectoryReader.open(FSDirectory.open(new File(indexDirectory))); - m_searcher = new IndexSearcher( m_reader ); - m_analyzer = new StandardAnalyzer( Version.LUCENE_43); - m_parser = new QueryParser( Version.LUCENE_43, FIELD, m_analyzer ); - } - - public void close() throws Exception { - m_reader.close(); - } - - public int indexSize(){ - return m_reader.numDocs(); - } - - public List<Document> search( String sQuery ) throws Exception { - m_results = new HashMap<Document,Float>(); - Query query = m_parser.parse( sQuery ); - search( query ); - // m_reader.close(); - return getDocuments(); - } - - public int count( String sQuery ) throws Exception { - return search( sQuery ).size(); - } - - public List<Document> getDocuments(){ - List<Document> docs = new ArrayList<Document>(); - for( Document doc: m_results.keySet() ){ - docs.add( doc ); - } - Collections.sort( docs, new Comparator<Document>(){ - public int compare( Document d1, Document d2 ){ - float s1 = getScore( d1 ); - float s2 = getScore( d2 ); - if( s1 > s2 ) return -1; - else if( s1 < s2 ) return 1; - return 0; - } - @Override - public boolean equals( Object obj ){ - return false; - } - } ); - return docs; - } - - public float getScore( Document doc ){ - return m_results.get( doc ); - } - - private void search( Query query ) throws IOException { - @SuppressWarnings("unused") - Collector collector = new Collector() - { - private Scorer scorer; - private int docBase; - private Map<Document,Float> results = new HashMap<Document,Float>(); - - @Override - public void collect(int doc) throws IOException { - // System.out.println("doc=" + doc + docBase + " score=" + scorer.score()); - m_results.put( m_searcher.doc( doc ), scorer.score() ); - } - @Override - public boolean acceptsDocsOutOfOrder() { - return true; - } - @Override - public void setScorer(Scorer scorer) throws IOException { - this.scorer = scorer; - } - @Override - public void setNextReader(AtomicReaderContext context) throws IOException { - this.docBase = context.docBase; - } - }; - m_searcher.search( query, collector ); - } -} Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/OWLOntologyLuceneIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/OWLOntologyLuceneIndex.java 2013-07-16 05:25:41 UTC (rev 4020) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/OWLOntologyLuceneIndex.java 2013-07-17 11:44:41 UTC (rev 4021) @@ -1,142 +0,0 @@ -/** - * - */ -package org.dllearner.algorithms.isle; - -import java.io.IOException; -import java.util.HashSet; -import java.util.Set; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.standard.StandardAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.FieldType; -import org.apache.lucene.document.StringField; -import org.apache.lucene.document.TextField; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.FSDirectory; -import org.apache.lucene.store.RAMDirectory; -import org.apache.lucene.util.Version; -import org.semanticweb.owlapi.model.IRI; -import org.semanticweb.owlapi.model.OWLAnnotation; -import org.semanticweb.owlapi.model.OWLAnnotationProperty; -import org.semanticweb.owlapi.model.OWLDataFactory; -import org.semanticweb.owlapi.model.OWLEntity; -import org.semanticweb.owlapi.model.OWLLiteral; -import org.semanticweb.owlapi.model.OWLOntology; -import org.semanticweb.owlapi.vocab.OWLRDFVocabulary; - -import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl; - -/** - * Creates a Lucene Index for the labels if classes and properties. - * @author Lorenz Buehmann - * - */ -public class OWLOntologyLuceneIndex { - - private Directory directory = new RAMDirectory(); - private OWLOntology ontology; - private Set<OWLEntity> schemaEntities; - - private OWLDataFactory df = new OWLDataFactoryImpl(); - private OWLAnnotationProperty annotationProperty = df.getOWLAnnotationProperty(OWLRDFVocabulary.RDFS_LABEL.getIRI()); - private String language = "en"; - private String searchField; - - public OWLOntologyLuceneIndex(OWLOntology ontology, String searchField) throws IOException { - this.ontology = ontology; - this.searchField = searchField; - - schemaEntities = new HashSet<OWLEntity>(); - schemaEntities.addAll(ontology.getClassesInSignature()); - schemaEntities.addAll(ontology.getObjectPropertiesInSignature()); - schemaEntities.addAll(ontology.getDataPropertiesInSignature()); - - buildIndex(); - } - - public OWLOntologyLuceneIndex(OWLOntology ontology, OWLAnnotationProperty annotationProperty) throws IOException { - this.ontology = ontology; - this.annotationProperty = annotationProperty; - - schemaEntities = new HashSet<OWLEntity>(); - schemaEntities.addAll(ontology.getClassesInSignature()); - schemaEntities.addAll(ontology.getObjectPropertiesInSignature()); - schemaEntities.addAll(ontology.getDataPropertiesInSignature()); - - buildIndex(); - } - - /** - * @return the ontology - */ - public OWLOntology getOntology() { - return ontology; - } - - /** - * @return the directory - */ - public Directory getDirectory() { - return directory; - } - - /** - * @param annotationProperty the annotationProperty to set - */ - public void setAnnotationProperty(OWLAnnotationProperty annotationProperty) { - this.annotationProperty = annotationProperty; - } - - /** - * @param annotationProperty the annotationProperty to set - */ - public void setAnnotationProperty(String annotationPropertyIRI) { - this.annotationProperty = df.getOWLAnnotationProperty(IRI.create(annotationPropertyIRI)); - } - - public void buildIndex() throws IOException{ - Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43); - IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_43, analyzer); - IndexWriter writer = new IndexWriter(directory, indexWriterConfig); - System.out.println( "Creating index ..." ); - - Set<Document> luceneDocuments = new HashSet<Document>(); - FieldType stringType = new FieldType(StringField.TYPE_STORED); - stringType.setStoreTermVectors(false); - FieldType textType = new FieldType(TextField.TYPE_STORED); - textType.setStoreTermVectors(false); - - for (OWLEntity entity : schemaEntities) { - String label = null; - Set<OWLAnnotation> annotations = entity.getAnnotations(ontology, annotationProperty); - for (OWLAnnotation annotation : annotations) { - if (annotation.getValue() instanceof OWLLiteral) { - OWLLiteral val = (OWLLiteral) annotation.getValue(); - if (val.hasLang(language)) { - label = val.getLiteral(); - } - } - } - - if(label != null){ - Document luceneDocument = new Document(); - luceneDocument.add(new Field("uri", entity.toStringID(), stringType)); - luceneDocument.add(new Field(searchField, label, textType)); - luceneDocuments.add(luceneDocument); - } - - } - writer.addDocuments(luceneDocuments); - - System.out.println("Done."); - writer.close(); - } - - - -} Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/PMILuceneBasedRelevance.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/PMILuceneBasedRelevance.java 2013-07-16 05:25:41 UTC (rev 4020) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/PMILuceneBasedRelevance.java 2013-07-17 11:44:41 UTC (rev 4021) @@ -1,48 +0,0 @@ -/** - * Copyright (C) 2007-2011, Jens Lehmann - * - * This file is part of DL-Learner. - * - * DL-Learner is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3 of the License, or - * (at your option) any later version. - * - * DL-Learner is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - - -package org.dllearner.algorithms.isle; - -import org.semanticweb.owlapi.model.OWLOntology; - - -public class PMILuceneBasedRelevance extends LuceneBasedRelevance{ - - /** - * @param ontology - * @param searcher - * @param textRetriever - */ - public PMILuceneBasedRelevance(OWLOntology ontology, LuceneSearcher searcher, EntityTextRetriever textRetriever) { - super(ontology, searcher, textRetriever); - - } - - /* (non-Javadoc) - * @see org.dllearner.algorithms.isle.LuceneBasedRelevance#computeScore(int, int, int, int) - */ - @Override - public double computeScore(int nrOfDocuments, int nrOfDocumentsA, int nrOfDocumentsB, int nrOfDocumentsAB) { - double dPClass = nrOfDocuments == 0 ? 0 : ((double) nrOfDocumentsA / (double) nrOfDocuments); - double dPClassEntity = nrOfDocumentsB == 0 ? 0 : (double) nrOfDocumentsAB / (double) nrOfDocumentsB; - double pmi = Math.log(dPClassEntity / dPClass); - return pmi; - } -} \ No newline at end of file Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/RDFSCommentEntityTextRetriever.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/RDFSCommentEntityTextRetriever.java 2013-07-16 05:25:41 UTC (rev 4020) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/RDFSCommentEntityTextRetriever.java 2013-07-17 11:44:41 UTC (rev 4021) @@ -1,26 +0,0 @@ -/** - * - */ -package org.dllearner.algorithms.isle; - -import org.dllearner.kb.OWLAPIOntology; -import org.semanticweb.owlapi.model.OWLOntology; -import org.semanticweb.owlapi.vocab.OWLRDFVocabulary; - -import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl; - - -/** - * @author Lorenz Buehmann - * - */ -public class RDFSCommentEntityTextRetriever extends AnnotationEntityTextRetriever{ - - public RDFSCommentEntityTextRetriever(OWLOntology ontology) { - super(ontology, new OWLDataFactoryImpl().getOWLAnnotationProperty(OWLRDFVocabulary.RDFS_COMMENT.getIRI())); - } - - public RDFSCommentEntityTextRetriever(OWLAPIOntology ontology) { - super(ontology, new OWLDataFactoryImpl().getOWLAnnotationProperty(OWLRDFVocabulary.RDFS_COMMENT.getIRI())); - } -} Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/RDFSLabelEntityTextRetriever.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/RDFSLabelEntityTextRetriever.java 2013-07-16 05:25:41 UTC (rev 4020) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/RDFSLabelEntityTextRetriever.java 2013-07-17 11:44:41 UTC (rev 4021) @@ -1,26 +0,0 @@ -/** - * - */ -package org.dllearner.algorithms.isle; - -import org.dllearner.kb.OWLAPIOntology; -import org.semanticweb.owlapi.model.OWLOntology; -import org.semanticweb.owlapi.vocab.OWLRDFVocabulary; - -import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl; - - -/** - * @author Lorenz Buehmann - * - */ -public class RDFSLabelEntityTextRetriever extends AnnotationEntityTextRetriever{ - - public RDFSLabelEntityTextRetriever(OWLOntology ontology) { - super(ontology, new OWLDataFactoryImpl().getOWLAnnotationProperty(OWLRDFVocabulary.RDFS_LABEL.getIRI())); - } - - public RDFSLabelEntityTextRetriever(OWLAPIOntology ontology) { - super(ontology, new OWLDataFactoryImpl().getOWLAnnotationProperty(OWLRDFVocabulary.RDFS_LABEL.getIRI())); - } -} Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/Relevance.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/Relevance.java 2013-07-16 05:25:41 UTC (rev 4020) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/Relevance.java 2013-07-17 11:44:41 UTC (rev 4021) @@ -1,31 +0,0 @@ -/** - * Copyright (C) 2007-2011, Jens Lehmann - * - * This file is part of DL-Learner. - * - * DL-Learner is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3 of the License, or - * (at your option) any later version. - * - * DL-Learner is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - - -package org.dllearner.algorithms.isle; - -import java.util.Map; - -import org.dllearner.core.owl.Entity; - - -public interface Relevance { - - public Map<Entity,Double> getEntityRelevance(Entity entity) throws Exception; -} \ No newline at end of file Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LuceneSyntacticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LuceneSyntacticIndex.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LuceneSyntacticIndex.java 2013-07-17 11:44:41 UTC (rev 4021) @@ -0,0 +1,99 @@ +/** + * + */ +package org.dllearner.algorithms.isle.index; + +import java.io.File; +import java.io.IOException; +import java.util.HashSet; +import java.util.Set; + +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.queryparser.classic.ParseException; +import org.apache.lucene.queryparser.classic.QueryParser; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TotalHitCountCollector; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.util.Version; + +/** + * @author Lorenz Buehmann + * + */ +public class LuceneSyntacticIndex implements SyntacticIndex { + + private IndexSearcher searcher; + private QueryParser parser; + private IndexReader indexReader; + private String searchField; + + public LuceneSyntacticIndex(IndexReader indexReader, String searchField) throws Exception { + this.indexReader = indexReader; + this.searchField = searchField; + searcher = new IndexSearcher(indexReader); + StandardAnalyzer analyzer = new StandardAnalyzer( Version.LUCENE_43); + parser = new QueryParser( Version.LUCENE_43, searchField, analyzer ); + } + + public LuceneSyntacticIndex(Directory directory, String seachField) throws Exception { + this(DirectoryReader.open(directory), seachField); + } + + public LuceneSyntacticIndex(String indexDirectory, String seachField) throws Exception { + this(DirectoryReader.open(FSDirectory.open(new File(indexDirectory))), seachField); + } + + /* (non-Javadoc) + * @see org.dllearner.algorithms.isle.SyntacticIndex#getDocuments(java.lang.String) + */ + @Override + public Set<String> getDocuments(String searchString) { + Set<String> documents = new HashSet<String>(); + try { + Query query = parser.parse(searchString); + ScoreDoc[] result = searcher.search(query, getSize()).scoreDocs; + for (int i = 0; i < result.length; i++) { + Document doc = searcher.doc(result[i].doc); + documents.add(doc.get(searchField)); + } + } catch (ParseException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } + return null; + } + + /* (non-Javadoc) + * @see org.dllearner.algorithms.isle.SyntacticIndex#getSize() + */ + @Override + public int getSize() { + return indexReader.numDocs(); + } + + /* (non-Javadoc) + * @see org.dllearner.algorithms.isle.SyntacticIndex#count(java.lang.String) + */ + @Override + public int count(String searchString) { + try { + Query query = parser.parse(searchString); + TotalHitCountCollector results = new TotalHitCountCollector(); + searcher.search(query, results); + return results.getTotalHits(); + } catch (ParseException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } + return -1; + } + +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/OWLOntologyLuceneSyntacticIndexCreator.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/OWLOntologyLuceneSyntacticIndexCreator.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/OWLOntologyLuceneSyntacticIndexCreator.java 2013-07-17 11:44:41 UTC (rev 4021) @@ -0,0 +1,101 @@ +/** + * + */ +package org.dllearner.algorithms.isle.index; + +import java.io.IOException; +import java.util.HashSet; +import java.util.Set; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.Version; +import org.semanticweb.owlapi.model.OWLAnnotation; +import org.semanticweb.owlapi.model.OWLAnnotationProperty; +import org.semanticweb.owlapi.model.OWLDataFactory; +import org.semanticweb.owlapi.model.OWLEntity; +import org.semanticweb.owlapi.model.OWLLiteral; +import org.semanticweb.owlapi.model.OWLOntology; +import org.semanticweb.owlapi.vocab.OWLRDFVocabulary; + +import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl; + +/** + * Creates a Lucene Index for the labels if classes and properties. + * @author Lorenz Buehmann + * + */ +public class OWLOntologyLuceneSyntacticIndexCreator { + + private Directory directory = new RAMDirectory(); + private OWLOntology ontology; + private Set<OWLEntity> schemaEntities; + + private OWLDataFactory df = new OWLDataFactoryImpl(); + private OWLAnnotationProperty annotationProperty = df.getOWLAnnotationProperty(OWLRDFVocabulary.RDFS_LABEL.getIRI()); + private String language = "en"; + private String searchField; + + public OWLOntologyLuceneSyntacticIndexCreator(OWLOntology ontology, OWLAnnotationProperty annotationProperty, String searchField) throws IOException { + this.ontology = ontology; + this.annotationProperty = annotationProperty; + this.searchField = searchField; + + schemaEntities = new HashSet<OWLEntity>(); + schemaEntities.addAll(ontology.getClassesInSignature()); + schemaEntities.addAll(ontology.getObjectPropertiesInSignature()); + schemaEntities.addAll(ontology.getDataPropertiesInSignature()); + } + + public SyntacticIndex buildIndex() throws Exception{ + Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43); + IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_43, analyzer); + IndexWriter writer = new IndexWriter(directory, indexWriterConfig); + System.out.println( "Creating index ..." ); + + Set<Document> luceneDocuments = new HashSet<Document>(); + FieldType stringType = new FieldType(StringField.TYPE_STORED); + stringType.setStoreTermVectors(false); + FieldType textType = new FieldType(TextField.TYPE_STORED); + textType.setStoreTermVectors(false); + + for (OWLEntity entity : schemaEntities) { + String label = null; + Set<OWLAnnotation> annotations = entity.getAnnotations(ontology, annotationProperty); + for (OWLAnnotation annotation : annotations) { + if (annotation.getValue() instanceof OWLLiteral) { + OWLLiteral val = (OWLLiteral) annotation.getValue(); + if (val.hasLang(language)) { + label = val.getLiteral(); + } + } + } + + if(label != null){ + Document luceneDocument = new Document(); + luceneDocument.add(new Field("uri", entity.toStringID(), stringType)); + luceneDocument.add(new Field(searchField, label, textType)); + luceneDocuments.add(luceneDocument); + } + + } + writer.addDocuments(luceneDocuments); + + System.out.println("Done."); + writer.close(); + + return new LuceneSyntacticIndex(directory, searchField); + } + + + +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticIndex.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticIndex.java 2013-07-17 11:44:41 UTC (rev 4021) @@ -0,0 +1,35 @@ +/** + * + */ +package org.dllearner.algorithms.isle.index; + +import java.util.Set; + +import org.dllearner.core.owl.Entity; + +/** + * This class + * @author Lorenz Buehmann + * + */ +public interface SemanticIndex { + + /** + * This method returns a set of documents for the given entity. + * @param entity + * @return + */ + Set<String> getDocuments(Entity entity); + /** + * This method returns the number of documents for the given entity. + * @param entity + * @return + */ + int count(Entity entity); + /** + * This methods returns the total number of documents contained in the index. + * @return the total number of documents contained in the index + */ + int getSize(); + +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticIndexCreator.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticIndexCreator.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticIndexCreator.java 2013-07-17 11:44:41 UTC (rev 4021) @@ -0,0 +1,22 @@ +/** + * + */ +package org.dllearner.algorithms.isle.index; + +/** + * This gets a syntactic index and returns a semantic index by applying WSD etc. + * @author Lorenz Buehmann + * + */ +public class SemanticIndexCreator { + + private SyntacticIndex syntacticIndex; + + public SemanticIndexCreator(SyntacticIndex syntacticIndex) { + this.syntacticIndex = syntacticIndex; + } + + public SemanticIndex createSemanticIndex(){ + return null; + } +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleSemanticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleSemanticIndex.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleSemanticIndex.java 2013-07-17 11:44:41 UTC (rev 4021) @@ -0,0 +1,43 @@ +/** + * + */ +package org.dllearner.algorithms.isle.index; + +import java.util.Set; + +import org.dllearner.core.owl.Entity; + +/** + * @author Lorenz Buehmann + * + */ +public class SimpleSemanticIndex implements SemanticIndex{ + + + /* (non-Javadoc) + * @see org.dllearner.algorithms.isle.SemanticIndex#getDocuments(org.dllearner.core.owl.Entity) + */ + @Override + public Set<String> getDocuments(Entity entity) { + return null; + } + + /* (non-Javadoc) + * @see org.dllearner.algorithms.isle.SemanticIndex#count(java.lang.String) + */ + @Override + public int count(Entity entity) { + return 0; + } + + /* (non-Javadoc) + * @see org.dllearner.algorithms.isle.SemanticIndex#getSize() + */ + @Override + public int getSize() { + return 0; + } + + + +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SyntacticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SyntacticIndex.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SyntacticIndex.java 2013-07-17 11:44:41 UTC (rev 4021) @@ -0,0 +1,32 @@ +/** + * + */ +package org.dllearner.algorithms.isle.index; + +import java.util.Set; + +/** + * @author Lorenz Buehmann + * + */ +public interface SyntacticIndex { + + /** + * This method returns a set of documents based on how the underlying index is processing the given search string. + * @param searchString + * @return + */ + Set<String> getDocuments(String searchString); + /** + * This method returns the number of documents based on how the underlying index is processing the given search string. + * @param searchString + * @return + */ + int count(String searchString); + /** + * This methods returns the total number of documents contained in the index. + * @return the total number of documents contained in the index + */ + int getSize(); + +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/AbstractRelevanceMetric.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/AbstractRelevanceMetric.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/AbstractRelevanceMetric.java 2013-07-17 11:44:41 UTC (rev 4021) @@ -0,0 +1,54 @@ +/** + * + */ +package org.dllearner.algorithms.isle.metrics; + +import java.util.HashMap; +import java.util.Map; + +import org.dllearner.algorithms.isle.index.SemanticIndex; +import org.semanticweb.owlapi.model.OWLEntity; + +/** + * @author Lorenz Buehmann + * + */ +public abstract class AbstractRelevanceMetric implements RelevanceMetric { + + protected SemanticIndex index; + + public AbstractRelevanceMetric(SemanticIndex index) { + this.index = index; + } + + public Map<OWLEntity,Double> normalizeMinMax( Map<OWLEntity,Double> hmEntity2Score ){ + Map<OWLEntity,Double> hmEntity2Norm = new HashMap<OWLEntity,Double>(); + double dMin = Double.MAX_VALUE; + Double dMax = Double.MIN_VALUE; + for( OWLEntity e : hmEntity2Score.keySet() ) + { + double dValue = hmEntity2Score.get(e); + if( dValue < dMin ){ + dMin = dValue; + } + else if( dValue > dMax ){ + dMax = dValue; + } + } + // System.out.println( "min="+ dMin +" max="+ dMax ); + for( OWLEntity e : hmEntity2Score.keySet() ) + { + double dValue = hmEntity2Score.get(e); + double dNorm = 0; + if( dMin == dMax ){ + dNorm = dValue; + } + else { + dNorm = ( dValue - dMin ) / ( dMax - dMin ); + } + hmEntity2Norm.put( e, dNorm ); + } + return hmEntity2Norm; + } + +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/PMIRelevanceMetric.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/PMIRelevanceMetric.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/PMIRelevanceMetric.java 2013-07-17 11:44:41 UTC (rev 4021) @@ -0,0 +1,37 @@ +/** + * + */ +package org.dllearner.algorithms.isle.metrics; + +import java.util.Set; + +import org.dllearner.algorithms.isle.index.SemanticIndex; +import org.dllearner.core.owl.Entity; + +import com.google.common.collect.Sets; + +/** + * @author Lorenz Buehmann + * + */ +public class PMIRelevanceMetric extends AbstractRelevanceMetric { + + public PMIRelevanceMetric(SemanticIndex index) { + super(index); + } + + @Override + public double getRelevance(Entity entityA, Entity entityB){ + Set<String> documentsA = index.getDocuments(entityA); + Set<String> documentsB = index.getDocuments(entityB); + Set<String> documentsAB = Sets.intersection(documentsA, documentsB); + int nrOfDocuments = index.getSize(); + + double dPClass = nrOfDocuments == 0 ? 0 : ((double) documentsA.size() / (double) nrOfDocuments); + double dPClassEntity = documentsB.size() == 0 ? 0 : (double) documentsAB.size() / (double) documentsB.size(); + double pmi = Math.log(dPClassEntity / dPClass); + + return pmi; + } + +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/RelevanceMetric.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/RelevanceMetric.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/RelevanceMetric.java 2013-07-17 11:44:41 UTC (rev 4021) @@ -0,0 +1,33 @@ +/** + * Copyright (C) 2007-2011, Jens Lehmann + * + * This file is part of DL-Learner. + * + * DL-Learner is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * DL-Learner is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + + +package org.dllearner.algorithms.isle.metrics; + +import org.dllearner.core.owl.Entity; + + +public interface RelevanceMetric { + /** + * @param entity1 + * @param entity2 + * @return + */ + double getRelevance(Entity entity1, Entity entity2); +} \ No newline at end of file Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/RelevanceUtils.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/RelevanceUtils.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/RelevanceUtils.java 2013-07-17 11:44:41 UTC (rev 4021) @@ -0,0 +1,50 @@ +/** + * + */ +package org.dllearner.algorithms.isle.metrics; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +import org.dllearner.core.owl.Entity; +import org.dllearner.utilities.owl.OWLAPIConverter; +import org.semanticweb.owlapi.model.OWLEntity; +import org.semanticweb.owlapi.model.OWLOntology; + +/** + * @author Lorenz Buehmann + * + */ +public class RelevanceUtils { + + public static Map<Entity, Double> getRelevantEntities(Entity entity, Set<Entity> otherEntities, RelevanceMetric metric){ + Map<Entity, Double> relevantEntities = new HashMap<Entity, Double>(); + + for (Entity otherEntity : otherEntities) { + double relevance = metric.getRelevance(entity, otherEntity); + relevantEntities.put(otherEntity, relevance); + } + + return relevantEntities; + } + + public static Map<Entity, Double> getRelevantEntities(Entity entity, OWLOntology ontology, RelevanceMetric metric){ + Map<Entity, Double> relevantEntities = new HashMap<Entity, Double>(); + + Set<OWLEntity> owlEntities = new HashSet<OWLEntity>(); + owlEntities.addAll(ontology.getClassesInSignature()); + owlEntities.addAll(ontology.getDataPropertiesInSignature()); + owlEntities.addAll(ontology.getObjectPropertiesInSignature()); + Set<Entity> otherEntities = OWLAPIConverter.getEntities(owlEntities); + + for (Entity otherEntity : otherEntities) { + double relevance = metric.getRelevance(entity, otherEntity); + relevantEntities.put(otherEntity, relevance); + } + + return relevantEntities; + } + +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/AnnotationEntityTextRetriever.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/AnnotationEntityTextRetriever.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/AnnotationEntityTextRetriever.java 2013-07-17 11:44:41 UTC (rev 4021) @@ -0,0 +1,93 @@ +/** + * + */ +package org.dllearner.algorithms.isle.textretrieval; + +import java.util.HashMap; +import java.util.Map; +import java.util.Set; + +import org.dllearner.core.owl.Entity; +import org.dllearner.kb.OWLAPIOntology; +import org.dllearner.utilities.owl.OWLAPIConverter; +import org.semanticweb.owlapi.model.IRI; +import org.semanticweb.owlapi.model.OWLAnnotation; +import org.semanticweb.owlapi.model.OWLAnnotationProperty; +import org.semanticweb.owlapi.model.OWLEntity; +import org.semanticweb.owlapi.model.OWLLiteral; +import org.semanticweb.owlapi.model.OWLOntology; +import org.semanticweb.owlapi.model.OWLOntologyManager; +import org.semanticweb.owlapi.util.IRIShortFormProvider; +import org.semanticweb.owlapi.util.SimpleIRIShortFormProvider; + + +/** + * @author Lorenz Buehmann + * + */ +public class AnnotationEntityTextRetriever implements EntityTextRetriever{ + + private OWLOntology ontology; + private OWLOntologyManager manager; + + private String language = "en"; + private double weight = 1d; + + private boolean useShortFormFallback = true; + private IRIShortFormProvider sfp = new SimpleIRIShortFormProvider(); + + private OWLAnnotationProperty[] properties; + + public AnnotationEntityTextRetriever(OWLOntology ontology, OWLAnnotationProperty... properties) { + this.ontology = ontology; + this.properties = properties; + } + + public AnnotationEntityTextRetriever(OWLAPIOntology ontology, OWLAnnotationProperty... properties) { + this.ontology = ontology.createOWLOntology(manager); + } + + /** + * @param language the language to set + */ + public void setLanguage(String language) { + this.language = language; + } + + /** + * Whether to use the short form of the IRI as fallback, if no label is given. + * @param useShortFormFallback the useShortFormFallback to set + */ + public void setUseShortFormFallback(boolean useShortFormFallback) { + this.useShortFormFallback = useShortFormFallback; + } + + /* (non-Javadoc) + * @see org.dllearner.algorithms.isle.EntityTextRetriever#getRelevantText(org.dllearner.core.owl.Entity) + */ + @Override + public Map<String, Double> getRelevantText(Entity entity) { + Map<String, Double> textWithWeight = new HashMap<String, Double>(); + + OWLEntity e = OWLAPIConverter.getOWLAPIEntity(entity); + + for (OWLAnnotationProperty property : properties) { + Set<OWLAnnotation> annotations = e.getAnnotations(ontology, property); + for (OWLAnnotation annotation : annotations) { + if (annotation.getValue() instanceof OWLLiteral) { + OWLLiteral val = (OWLLiteral) annotation.getValue(); + if (val.hasLang(language)) { + String label = val.getLiteral(); + textWithWeight.put(label, weight); + } + } + } + } + + if(textWithWeight.isEmpty() && useShortFormFallback){ + textWithWeight.put(sfp.getShortForm(IRI.create(entity.getURI())), weight); + } + + return textWithWeight; + } +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/EntityTextRetriever.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/EntityTextRetriever.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/EntityTe... [truncated message content] |