Thread: [DL-Learner SVN] SF.net SVN: dl-learner:[4021] trunk/components-core/src/main/java/org/ dllearner/a

SourceForge Headquarters 1320 Columbia Street Suite 310 San Diego, CA 92101 +1 (858) 422-6466

Revision: 4021
          http://sourceforge.net/p/dl-learner/code/4021
Author:   lorenz_b
Date:     2013-07-17 11:44:41 +0000 (Wed, 17 Jul 2013)
Log Message:
-----------
Refactored ISLE components.

Modified Paths:
--------------
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityExtraction.java

Added Paths:
-----------
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LuceneSyntacticIndex.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/OWLOntologyLuceneSyntacticIndexCreator.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticIndex.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticIndexCreator.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleSemanticIndex.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SyntacticIndex.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/AbstractRelevanceMetric.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/PMIRelevanceMetric.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/RelevanceMetric.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/RelevanceUtils.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/AnnotationEntityTextRetriever.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/EntityTextRetriever.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/RDFSCommentEntityTextRetriever.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/RDFSLabelEntityTextRetriever.java

Removed Paths:
-------------
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/AnnotationEntityTextRetriever.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityTextRetriever.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneBasedRelevance.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneDocument.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneIndexer.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneSearcher.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/OWLOntologyLuceneIndex.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/PMILuceneBasedRelevance.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/RDFSCommentEntityTextRetriever.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/RDFSLabelEntityTextRetriever.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/Relevance.java

Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/AnnotationEntityTextRetriever.java
===================================================================

--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/AnnotationEntityTextRetriever.java	2013-07-16 05:25:41 UTC (rev 4020)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/AnnotationEntityTextRetriever.java	2013-07-17 11:44:41 UTC (rev 4021)
@@ -1,93 +0,0 @@
-/**
- * 
- */
-package org.dllearner.algorithms.isle;
-
-import java.util.HashMap;
-import java.util.Map;
-import java.util.Set;
-
-import org.dllearner.core.owl.Entity;
-import org.dllearner.kb.OWLAPIOntology;
-import org.dllearner.utilities.owl.OWLAPIConverter;
-import org.semanticweb.owlapi.model.IRI;
-import org.semanticweb.owlapi.model.OWLAnnotation;
-import org.semanticweb.owlapi.model.OWLAnnotationProperty;
-import org.semanticweb.owlapi.model.OWLEntity;
-import org.semanticweb.owlapi.model.OWLLiteral;
-import org.semanticweb.owlapi.model.OWLOntology;
-import org.semanticweb.owlapi.model.OWLOntologyManager;
-import org.semanticweb.owlapi.util.IRIShortFormProvider;
-import org.semanticweb.owlapi.util.SimpleIRIShortFormProvider;
-
-
-/**
- * @author Lorenz Buehmann
- *
- */
-public class AnnotationEntityTextRetriever implements EntityTextRetriever{
-	
-	private OWLOntology ontology;
-	private OWLOntologyManager manager;
-	
-	private String language = "en";
-	private double weight = 1d;
-	
-	private boolean useShortFormFallback = true;
-	private IRIShortFormProvider sfp = new SimpleIRIShortFormProvider();
-	
-	private OWLAnnotationProperty[] properties;
-
-	public AnnotationEntityTextRetriever(OWLOntology ontology, OWLAnnotationProperty... properties) {
-		this.ontology = ontology;
-		this.properties = properties;
-	}
-	
-	public AnnotationEntityTextRetriever(OWLAPIOntology ontology, OWLAnnotationProperty... properties) {
-		this.ontology = ontology.createOWLOntology(manager);
-	}
-	
-	/**
-	 * @param language the language to set
-	 */
-	public void setLanguage(String language) {
-		this.language = language;
-	}
-	
-	/**
-	 * Whether to use the short form of the IRI as fallback, if no label is given.
-	 * @param useShortFormFallback the useShortFormFallback to set
-	 */
-	public void setUseShortFormFallback(boolean useShortFormFallback) {
-		this.useShortFormFallback = useShortFormFallback;
-	}
-
-	/* (non-Javadoc)
-	 * @see org.dllearner.algorithms.isle.EntityTextRetriever#getRelevantText(org.dllearner.core.owl.Entity)
-	 */
-	@Override
-	public Map<String, Double> getRelevantText(Entity entity) {
-		Map<String, Double> textWithWeight = new HashMap<String, Double>();
-		
-		OWLEntity e = OWLAPIConverter.getOWLAPIEntity(entity);
-		
-		for (OWLAnnotationProperty property : properties) {
-			Set<OWLAnnotation> annotations = e.getAnnotations(ontology, property);
-			for (OWLAnnotation annotation : annotations) {
-				if (annotation.getValue() instanceof OWLLiteral) {
-		            OWLLiteral val = (OWLLiteral) annotation.getValue();
-		            if (val.hasLang(language)) {
-		            	String label = val.getLiteral();
-		            	textWithWeight.put(label, weight);
-		            }
-		        }
-			}
-		}
-		
-		if(textWithWeight.isEmpty() && useShortFormFallback){
-			textWithWeight.put(sfp.getShortForm(IRI.create(entity.getURI())), weight);
-		}
-		
-		return textWithWeight;
-	}
-}

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityExtraction.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityExtraction.java	2013-07-16 05:25:41 UTC (rev 4020)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityExtraction.java	2013-07-17 11:44:41 UTC (rev 4021)
@@ -4,6 +4,7 @@
 package org.dllearner.algorithms.isle;
 
 import java.util.Map;
+import java.util.Set;
 
 import org.dllearner.core.owl.Entity;
 
@@ -17,7 +18,7 @@
 	 * Extracts all entities contained in the working text with some confidence value.
 	 * @return
 	 */
-	Map<Entity, Double> extractEntities();
+	Map<Entity, Set<String>> extractEntities();
 	
 	/**
 	 * Extracts all entities of the given <code>type</code> contained in the working text with some confidence value.

Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityTextRetriever.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityTextRetriever.java	2013-07-16 05:25:41 UTC (rev 4020)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityTextRetriever.java	2013-07-17 11:44:41 UTC (rev 4021)
@@ -1,48 +0,0 @@
-/**
- * Copyright (C) 2007-2011, Jens Lehmann
- *
- * This file is part of DL-Learner.
- *
- * DL-Learner is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * DL-Learner is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-package org.dllearner.algorithms.isle;
-
-import java.util.Map;
-
-import org.dllearner.core.owl.Entity;
-
-/**
- * Interface for methods, which retrieve relevant texts given an entity
- * in an ontology. An entity text retriever can do simple operations such
- * as converting the URI into text or retrieving an rdfs:label, but could
- * also search web pages for textual explanations of an entity.
- * 
- * @author Jens Lehmann
- *
- */
-public interface EntityTextRetriever {
-	
-	/**
-	 * The method retrieves a string or a set of strings, which is weighted by
-	 * importance with respect to the entity. For instance, an rdfs:label of
-	 * an entity can be given more weight than an rdfs:comment, which in turn 
-	 * can be more important than a description retrieved from a web page.
-	 *  
-	 * @param entity The entity to handle.
-	 * @return A weighted set of strings. For a value x, we need to have 0 <= x <= 1.
-	 */
-	public Map<String, Double> getRelevantText(Entity entity);
-	
-}

Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneBasedRelevance.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneBasedRelevance.java	2013-07-16 05:25:41 UTC (rev 4020)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneBasedRelevance.java	2013-07-17 11:44:41 UTC (rev 4021)
@@ -1,145 +0,0 @@
-/**
- * Copyright (C) 2007-2011, Jens Lehmann
- *
- * This file is part of DL-Learner.
- *
- * DL-Learner is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * DL-Learner is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-
-package org.dllearner.algorithms.isle;
-
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Set;
-
-import org.dllearner.core.owl.Entity;
-import org.dllearner.utilities.owl.OWLAPIConverter;
-import org.semanticweb.owlapi.model.OWLEntity;
-import org.semanticweb.owlapi.model.OWLOntology;
-
-
-public abstract class LuceneBasedRelevance implements Relevance{
-	
-	private EntityTextRetriever textRetriever;
-	private LuceneSearcher searcher;
-	private OWLOntology ontology;
-	private Set<OWLEntity> entities;
-	
-//	public void printScores() throws Exception {
-//		for( OWLClass c: m_classes )
-//		{
-//			Map<OWLEntity,Double> hmEntity2Score = getEntityRelevance(c);
-//			// normalization per class?
-//			hmEntity2Score = normalize( hmEntity2Score );
-//			for( OWLEntity e : hmEntity2Score.keySet() )
-//			{
-//				double dScore = hmEntity2Score.get(e);
-//				System.out.println( "P( "+ getLabel(c) +", "+ getLabel(e) +" ) = "+ dScore );
-//			}
-//		}		
-//		m_searcher.close();
-//	}
-
-	public LuceneBasedRelevance(OWLOntology ontology, LuceneSearcher searcher, EntityTextRetriever textRetriever) {
-		this.searcher = searcher;
-		this.ontology = ontology;
-		this.textRetriever = textRetriever;
-		
-		entities = new HashSet<OWLEntity>();
-		entities.addAll(ontology.getClassesInSignature());
-		entities.addAll(ontology.getObjectPropertiesInSignature());
-		entities.addAll(ontology.getDataPropertiesInSignature());
-	}
-	
-	public Map<OWLEntity,Double> normalizeMinMax( Map<OWLEntity,Double> hmEntity2Score ){
-		Map<OWLEntity,Double> hmEntity2Norm = new HashMap<OWLEntity,Double>();
-		double dMin = Double.MAX_VALUE;
-		Double dMax = Double.MIN_VALUE;
-		for( OWLEntity e : hmEntity2Score.keySet() )
-		{
-			double dValue = hmEntity2Score.get(e);
-			if( dValue < dMin ){
-				dMin = dValue;
-			}
-			else if( dValue > dMax ){
-				dMax = dValue;
-			}
-		}
-		// System.out.println( "min="+ dMin +" max="+ dMax );
-		for( OWLEntity e : hmEntity2Score.keySet() )
-		{
-			double dValue = hmEntity2Score.get(e);
-			double dNorm = 0;
-			if( dMin == dMax ){
-				dNorm = dValue;
-			} 
-			else {
-				dNorm = ( dValue - dMin ) / ( dMax - dMin );
-			}
-			hmEntity2Norm.put( e, dNorm );
-		}
-		return hmEntity2Norm;
-	}
-	
-	@Override
-	public Map<Entity,Double> getEntityRelevance(Entity entity) throws Exception {
-		// computes relevance of entity for this class
-		// conditional probability: P(C,E)=f(C,E)/f(E)
-		// PMI(C,E)=log( P(C,E) / P(C) )
-		Map<Entity, Double> hmEntity2Score = new HashMap<Entity, Double>();
-		Map<String, Double> relevantText = textRetriever.getRelevantText(entity);
-		
-		for (Entry<String, Double> entry : relevantText.entrySet()) {
-			String text = entry.getKey();
-			Double value = entry.getValue();
-
-			String sClass = text;
-			int nrOfDocumentsA = searcher.count(sClass);
-			int nrOfDocuments = searcher.indexSize();
-			
-			for (OWLEntity otherEntity : entities) {
-
-				Map<String, Double> otherRelevantText = textRetriever.getRelevantText(OWLAPIConverter
-						.getEntity(otherEntity));
-
-				for (Entry<String, Double> entry2 : otherRelevantText.entrySet()) {
-					String otherText = entry2.getKey();
-					Double otherValue = entry2.getValue();
-
-					String sEntity = otherText;
-					int nrOfDocumentsB = searcher.count(sEntity);
-					int nrOfDocumentsAB = searcher.count(sClass + " AND " + sEntity);
-					// double dPEntity = (double)iEntity / (double)iAll;
-					
-					double score = computeScore(nrOfDocuments, nrOfDocumentsA, nrOfDocumentsB, nrOfDocumentsAB);
-					if (!Double.isNaN(score)){// && !Double.isInfinite(score)) {
-						hmEntity2Score.put(OWLAPIConverter.getEntity(otherEntity), score);
-					}
-				}
-			}
-		}
-		
-		return hmEntity2Score;
-	}
-	
-	/**
-	 * Computes the score which is returned in {@link org.dllearner.algorithms.isle.LuceneBasedRelevance#getEntityRelevance} 
-	 * @return
-	 */
-	public abstract double computeScore(int nrOfDocuments, int nrOfDocumentsA, int nrOfDocumentsB, int nrOfDocumentsAB);
-	
-}
\ No newline at end of file

Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneDocument.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneDocument.java	2013-07-16 05:25:41 UTC (rev 4020)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneDocument.java	2013-07-17 11:44:41 UTC (rev 4021)
@@ -1,43 +0,0 @@
-/**
- * Copyright (C) 2007-2011, Jens Lehmann
- *
- * This file is part of DL-Learner.
- *
- * DL-Learner is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * DL-Learner is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-
-package org.dllearner.algorithms.isle;
-
-import java.io.File;
-import java.io.FileReader;
-
-import org.apache.lucene.document.DateTools;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-
-
-public class LuceneDocument {
-
-	public static Document Document( File f ) throws java.io.FileNotFoundException {
-		Document doc = new Document();
-		doc.add( new Field( "path", f.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED ) );
-		doc.add( new Field( "modified",
-						   DateTools.timeToString(f.lastModified(), DateTools.Resolution.MINUTE),
-						   Field.Store.YES, Field.Index.NOT_ANALYZED));
-		doc.add( new Field( "contents", new FileReader(f) ) );
-		return doc;
-	}
-}
-    

Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneIndexer.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneIndexer.java	2013-07-16 05:25:41 UTC (rev 4020)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneIndexer.java	2013-07-17 11:44:41 UTC (rev 4021)
@@ -1,100 +0,0 @@
-/**
- * Copyright (C) 2007-2011, Jens Lehmann
- *
- * This file is part of DL-Learner.
- *
- * DL-Learner is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * DL-Learner is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-
-package org.dllearner.algorithms.isle;
-
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.util.Date;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.store.FSDirectory;
-import org.apache.lucene.util.Version;
-
-
-public class LuceneIndexer {
-
-	static final File INDEX = new File( "index" );
-
-	public static void main( String[] args ) {
-		if( INDEX.exists() ) 
-		{
-			System.out.println("<delete index!>");
-			System.exit(1);
-		}
-//		final File docDir = new File( args[0] );
-//		LuceneIndexer indexer = new LuceneIndexer( docDir );
-	}
-	
-	@SuppressWarnings("deprecation")
-	public LuceneIndexer( File docDir ){
-		System.out.println( "LuceneIndex: "+ docDir );
-		Date start = new Date();
-		try {
-
-			Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43);
-			IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_43, analyzer);
-			IndexWriter writer = new IndexWriter( FSDirectory.open( INDEX ), indexWriterConfig);
-			System.out.println( "Creating index ..." );
-			index( writer, docDir );
-			System.out.println( "Optimizing index ..." );
-			writer.close();
-			Date end = new Date();
-			System.out.println( end.getTime() - start.getTime() + " total milliseconds" );
-		} 
-		catch (IOException e) {
-			e.printStackTrace();
-		}
-	}
-
-	private void index( IndexWriter writer, File file ) throws IOException {
-		// System.out.println( "LuceneIndexer.index: "+ file );
-		if( file.canRead() ) 
-		{
-			if( file.isDirectory() ) 
-			{
-				String[] files = file.list();
-				if( files != null )
-				{
-					for( int i = 0; i < files.length; i++ ) {
-						index( writer, new File( file, files[i] ) );
-					}
-				}
-			} 
-			else {
-				// System.out.println( "Indexer.index: adding " + file );
-				try {
-					writer.addDocument( LuceneDocument.Document( file ) );
-				}
-				catch (FileNotFoundException fnfe) {
-					fnfe.printStackTrace();
-				}
-			}
-		}
-		else {
-			System.out.println( "<cannot read file!>" );
-		}
-	}
-  
-}

Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneSearcher.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneSearcher.java	2013-07-16 05:25:41 UTC (rev 4020)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneSearcher.java	2013-07-17 11:44:41 UTC (rev 4021)
@@ -1,176 +0,0 @@
-/**
- * Copyright (C) 2007-2011, Jens Lehmann
- *
- * This file is part of DL-Learner.
- *
- * DL-Learner is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * DL-Learner is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-
-package org.dllearner.algorithms.isle;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.index.AtomicReaderContext;
-import org.apache.lucene.index.DirectoryReader;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.queryparser.classic.QueryParser;
-import org.apache.lucene.search.Collector;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.Scorer;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.FSDirectory;
-import org.apache.lucene.util.Version;
-
-public class LuceneSearcher {
-	
-	private String INDEX = "/home/me/DBpedia-Lucene-Index";
-	private String FIELD = "short-abstract";
-	
-	private IndexReader m_reader = null;
-	private IndexSearcher m_searcher = null;
-	private Analyzer m_analyzer = null;
-	private QueryParser m_parser = null;
-	
-	private Map<Document,Float> m_results = null;
-	
-
-	public static void main( String[] args ) throws Exception {
-		String sQuery = args[0];
-		LuceneSearcher searcher = new LuceneSearcher();
-		List<Document> docs = searcher.search( sQuery );
-		System.out.println( "\nquery='"+ sQuery +"' all="+ searcher.indexSize() +" hits="+ docs.size() );
-//		for( Document doc : docs )
-//		{
-////			String sDoc = doc.toString();
-//			float score = searcher.getScore( doc );
-//			System.out.println( "score="+ score +" doc="+ doc );
-//		}
-	}
-	
-	@SuppressWarnings("deprecation")
-	public LuceneSearcher() throws Exception {
-		m_reader = DirectoryReader.open( FSDirectory.open( new File( INDEX ) ));
-		m_searcher = new IndexSearcher( m_reader );
-		m_analyzer = new StandardAnalyzer( Version.LUCENE_43);
-		m_parser = new QueryParser( Version.LUCENE_43, FIELD, m_analyzer );
-	}
-	
-	public LuceneSearcher(IndexReader indexReader) throws Exception {
-		m_reader = indexReader;
-		m_searcher = new IndexSearcher( m_reader );
-		m_analyzer = new StandardAnalyzer( Version.LUCENE_43);
-		m_parser = new QueryParser( Version.LUCENE_43, FIELD, m_analyzer );
-	}
-	
-	public LuceneSearcher(Directory directory, String seachField) throws Exception {
-		this.FIELD = seachField;
-		m_reader = DirectoryReader.open(directory);
-		m_searcher = new IndexSearcher( m_reader );
-		m_analyzer = new StandardAnalyzer( Version.LUCENE_43);
-		m_parser = new QueryParser( Version.LUCENE_43, FIELD, m_analyzer );
-	}
-	
-	public LuceneSearcher(String indexDirectory) throws Exception {
-		m_reader = DirectoryReader.open(FSDirectory.open(new File(indexDirectory)));
-		m_searcher = new IndexSearcher( m_reader );
-		m_analyzer = new StandardAnalyzer( Version.LUCENE_43);
-		m_parser = new QueryParser( Version.LUCENE_43, FIELD, m_analyzer );
-	}
-	
-	public void close() throws Exception {
-		m_reader.close();
-	}
-	
-	public int indexSize(){
-		return m_reader.numDocs();
-	}
-	
-	public List<Document> search( String sQuery ) throws Exception {	
-		m_results = new HashMap<Document,Float>();
-		Query query = m_parser.parse( sQuery );
-		search( query );
-		// m_reader.close();
-		return getDocuments();
-	}
-	
-	public int count( String sQuery ) throws Exception {
-		return search( sQuery ).size();
-	}
-	
-	public List<Document> getDocuments(){
-		List<Document> docs = new ArrayList<Document>();
-		for( Document doc: m_results.keySet() ){
-			docs.add( doc );
-		}
-		Collections.sort( docs, new Comparator<Document>(){
-			public int compare( Document d1, Document d2 ){
-				float s1 = getScore( d1 );
-				float s2 = getScore( d2 );
-				if( s1 > s2 ) return -1;
-				else if( s1 < s2 ) return 1;
-				return 0;
-			}
-			@Override
-			public boolean equals( Object obj ){
-				return false;
-			}
-		} );
-		return docs;
-	}
-	
-	public float getScore( Document doc ){
-		return m_results.get( doc );
-	}
-
-	private void search( Query query ) throws IOException {
-		@SuppressWarnings("unused")
-		Collector collector = new Collector() 
-		{
-			private Scorer scorer;
-			private int docBase;
-			private Map<Document,Float> results = new HashMap<Document,Float>();
-      
-			@Override
-			public void collect(int doc) throws IOException {
-				// System.out.println("doc=" + doc + docBase + " score=" + scorer.score());
-				m_results.put( m_searcher.doc( doc ), scorer.score() );
-			}
-			@Override
-			public boolean acceptsDocsOutOfOrder() {
-				return true;
-			}
-			@Override
-			public void setScorer(Scorer scorer) throws IOException {
-				this.scorer = scorer;
-			}
-			@Override
-			public void setNextReader(AtomicReaderContext context) throws IOException {
-				this.docBase = context.docBase;
-			}
-		};
-		m_searcher.search( query, collector );
-	}
-}

Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/OWLOntologyLuceneIndex.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/OWLOntologyLuceneIndex.java	2013-07-16 05:25:41 UTC (rev 4020)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/OWLOntologyLuceneIndex.java	2013-07-17 11:44:41 UTC (rev 4021)
@@ -1,142 +0,0 @@
-/**
- * 
- */
-package org.dllearner.algorithms.isle;
-
-import java.io.IOException;
-import java.util.HashSet;
-import java.util.Set;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.FieldType;
-import org.apache.lucene.document.StringField;
-import org.apache.lucene.document.TextField;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.FSDirectory;
-import org.apache.lucene.store.RAMDirectory;
-import org.apache.lucene.util.Version;
-import org.semanticweb.owlapi.model.IRI;
-import org.semanticweb.owlapi.model.OWLAnnotation;
-import org.semanticweb.owlapi.model.OWLAnnotationProperty;
-import org.semanticweb.owlapi.model.OWLDataFactory;
-import org.semanticweb.owlapi.model.OWLEntity;
-import org.semanticweb.owlapi.model.OWLLiteral;
-import org.semanticweb.owlapi.model.OWLOntology;
-import org.semanticweb.owlapi.vocab.OWLRDFVocabulary;
-
-import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl;
-
-/**
- * Creates a Lucene Index for the labels if classes and properties.
- * @author Lorenz Buehmann
- *
- */
-public class OWLOntologyLuceneIndex {
-
-	private Directory directory = new RAMDirectory();
-	private OWLOntology ontology;
-	private Set<OWLEntity> schemaEntities;
-	
-	private OWLDataFactory df = new OWLDataFactoryImpl();
-	private OWLAnnotationProperty annotationProperty = df.getOWLAnnotationProperty(OWLRDFVocabulary.RDFS_LABEL.getIRI());
-	private String language = "en";
-	private String searchField;
-	
-	public OWLOntologyLuceneIndex(OWLOntology ontology, String searchField) throws IOException {
-		this.ontology = ontology;
-		this.searchField = searchField;
-		
-		schemaEntities = new HashSet<OWLEntity>();
-		schemaEntities.addAll(ontology.getClassesInSignature());
-		schemaEntities.addAll(ontology.getObjectPropertiesInSignature());
-		schemaEntities.addAll(ontology.getDataPropertiesInSignature());
-		
-		buildIndex();
-	}
-	
-	public OWLOntologyLuceneIndex(OWLOntology ontology, OWLAnnotationProperty annotationProperty) throws IOException {
-		this.ontology = ontology;
-		this.annotationProperty = annotationProperty;
-		
-		schemaEntities = new HashSet<OWLEntity>();
-		schemaEntities.addAll(ontology.getClassesInSignature());
-		schemaEntities.addAll(ontology.getObjectPropertiesInSignature());
-		schemaEntities.addAll(ontology.getDataPropertiesInSignature());
-		
-		buildIndex();
-	}
-	
-	/**
-	 * @return the ontology
-	 */
-	public OWLOntology getOntology() {
-		return ontology;
-	}
-	
-	/**
-	 * @return the directory
-	 */
-	public Directory getDirectory() {
-		return directory;
-	}
-	
-	/**
-	 * @param annotationProperty the annotationProperty to set
-	 */
-	public void setAnnotationProperty(OWLAnnotationProperty annotationProperty) {
-		this.annotationProperty = annotationProperty;
-	}
-	
-	/**
-	 * @param annotationProperty the annotationProperty to set
-	 */
-	public void setAnnotationProperty(String annotationPropertyIRI) {
-		this.annotationProperty = df.getOWLAnnotationProperty(IRI.create(annotationPropertyIRI));
-	}
-	
-	public void buildIndex() throws IOException{
-		Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43);
-		IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_43, analyzer);
-		IndexWriter writer = new IndexWriter(directory, indexWriterConfig);
-		System.out.println( "Creating index ..." );
-		
-		Set<Document> luceneDocuments = new HashSet<Document>();
-        FieldType stringType = new FieldType(StringField.TYPE_STORED);
-        stringType.setStoreTermVectors(false);
-        FieldType textType = new FieldType(TextField.TYPE_STORED);
-        textType.setStoreTermVectors(false);
-		
-		for (OWLEntity entity : schemaEntities) {
-			String label = null;
-			Set<OWLAnnotation> annotations = entity.getAnnotations(ontology, annotationProperty);
-			for (OWLAnnotation annotation : annotations) {
-				if (annotation.getValue() instanceof OWLLiteral) {
-		            OWLLiteral val = (OWLLiteral) annotation.getValue();
-		            if (val.hasLang(language)) {
-		            	label = val.getLiteral();
-		            }
-		        }
-			}
-			
-			if(label != null){
-				Document luceneDocument = new Document();
-	            luceneDocument.add(new Field("uri", entity.toStringID(), stringType));
-	            luceneDocument.add(new Field(searchField, label, textType));
-	            luceneDocuments.add(luceneDocument);
-			}
-			
-		}
-		writer.addDocuments(luceneDocuments);
-		
-		System.out.println("Done.");
-		writer.close();
-	}
-	
-	
-
-}

Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/PMILuceneBasedRelevance.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/PMILuceneBasedRelevance.java	2013-07-16 05:25:41 UTC (rev 4020)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/PMILuceneBasedRelevance.java	2013-07-17 11:44:41 UTC (rev 4021)
@@ -1,48 +0,0 @@
-/**
- * Copyright (C) 2007-2011, Jens Lehmann
- *
- * This file is part of DL-Learner.
- *
- * DL-Learner is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * DL-Learner is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-
-package org.dllearner.algorithms.isle;
-
-import org.semanticweb.owlapi.model.OWLOntology;
-
-
-public class PMILuceneBasedRelevance extends LuceneBasedRelevance{
-
-	/**
-	 * @param ontology
-	 * @param searcher
-	 * @param textRetriever
-	 */
-	public PMILuceneBasedRelevance(OWLOntology ontology, LuceneSearcher searcher, EntityTextRetriever textRetriever) {
-		super(ontology, searcher, textRetriever);
-		
-	}
-
-	/* (non-Javadoc)
-	 * @see org.dllearner.algorithms.isle.LuceneBasedRelevance#computeScore(int, int, int, int)
-	 */
-	@Override
-	public double computeScore(int nrOfDocuments, int nrOfDocumentsA, int nrOfDocumentsB, int nrOfDocumentsAB) {
-		double dPClass = nrOfDocuments == 0 ? 0 : ((double) nrOfDocumentsA / (double) nrOfDocuments);
-		double dPClassEntity = nrOfDocumentsB == 0 ? 0 : (double) nrOfDocumentsAB / (double) nrOfDocumentsB;
-		double pmi = Math.log(dPClassEntity / dPClass);
-		return pmi;
-	}
-}
\ No newline at end of file

Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/RDFSCommentEntityTextRetriever.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/RDFSCommentEntityTextRetriever.java	2013-07-16 05:25:41 UTC (rev 4020)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/RDFSCommentEntityTextRetriever.java	2013-07-17 11:44:41 UTC (rev 4021)
@@ -1,26 +0,0 @@
-/**
- * 
- */
-package org.dllearner.algorithms.isle;
-
-import org.dllearner.kb.OWLAPIOntology;
-import org.semanticweb.owlapi.model.OWLOntology;
-import org.semanticweb.owlapi.vocab.OWLRDFVocabulary;
-
-import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl;
-
-
-/**
- * @author Lorenz Buehmann
- *
- */
-public class RDFSCommentEntityTextRetriever extends AnnotationEntityTextRetriever{
-	
-	public RDFSCommentEntityTextRetriever(OWLOntology ontology) {
-		super(ontology, new OWLDataFactoryImpl().getOWLAnnotationProperty(OWLRDFVocabulary.RDFS_COMMENT.getIRI()));
-	}
-	
-	public RDFSCommentEntityTextRetriever(OWLAPIOntology ontology) {
-		super(ontology, new OWLDataFactoryImpl().getOWLAnnotationProperty(OWLRDFVocabulary.RDFS_COMMENT.getIRI()));
-	}
-}

Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/RDFSLabelEntityTextRetriever.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/RDFSLabelEntityTextRetriever.java	2013-07-16 05:25:41 UTC (rev 4020)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/RDFSLabelEntityTextRetriever.java	2013-07-17 11:44:41 UTC (rev 4021)
@@ -1,26 +0,0 @@
-/**
- * 
- */
-package org.dllearner.algorithms.isle;
-
-import org.dllearner.kb.OWLAPIOntology;
-import org.semanticweb.owlapi.model.OWLOntology;
-import org.semanticweb.owlapi.vocab.OWLRDFVocabulary;
-
-import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl;
-
-
-/**
- * @author Lorenz Buehmann
- *
- */
-public class RDFSLabelEntityTextRetriever extends AnnotationEntityTextRetriever{
-	
-	public RDFSLabelEntityTextRetriever(OWLOntology ontology) {
-		super(ontology, new OWLDataFactoryImpl().getOWLAnnotationProperty(OWLRDFVocabulary.RDFS_LABEL.getIRI()));
-	}
-	
-	public RDFSLabelEntityTextRetriever(OWLAPIOntology ontology) {
-		super(ontology, new OWLDataFactoryImpl().getOWLAnnotationProperty(OWLRDFVocabulary.RDFS_LABEL.getIRI()));
-	}
-}

Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/Relevance.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/Relevance.java	2013-07-16 05:25:41 UTC (rev 4020)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/Relevance.java	2013-07-17 11:44:41 UTC (rev 4021)
@@ -1,31 +0,0 @@
-/**
- * Copyright (C) 2007-2011, Jens Lehmann
- *
- * This file is part of DL-Learner.
- *
- * DL-Learner is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * DL-Learner is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-
-package org.dllearner.algorithms.isle;
-
-import java.util.Map;
-
-import org.dllearner.core.owl.Entity;
-
-
-public interface Relevance {
-
-	public Map<Entity,Double> getEntityRelevance(Entity entity) throws Exception;
-}
\ No newline at end of file

Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LuceneSyntacticIndex.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LuceneSyntacticIndex.java	                        (rev 0)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LuceneSyntacticIndex.java	2013-07-17 11:44:41 UTC (rev 4021)
@@ -0,0 +1,99 @@
+/**
+ * 
+ */
+package org.dllearner.algorithms.isle.index;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.queryparser.classic.ParseException;
+import org.apache.lucene.queryparser.classic.QueryParser;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TotalHitCountCollector;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.util.Version;
+
+/**
+ * @author Lorenz Buehmann
+ *
+ */
+public class LuceneSyntacticIndex implements SyntacticIndex {
+	
+	private IndexSearcher searcher;
+	private QueryParser parser;
+	private IndexReader indexReader;
+	private String searchField;
+
+	public LuceneSyntacticIndex(IndexReader indexReader, String searchField) throws Exception {
+		this.indexReader = indexReader;
+		this.searchField = searchField;
+		searcher = new IndexSearcher(indexReader);
+		StandardAnalyzer analyzer = new StandardAnalyzer( Version.LUCENE_43);
+		parser = new QueryParser( Version.LUCENE_43, searchField, analyzer );
+	}
+	
+	public LuceneSyntacticIndex(Directory directory, String seachField) throws Exception {
+		this(DirectoryReader.open(directory), seachField);
+	}
+	
+	public LuceneSyntacticIndex(String indexDirectory, String seachField) throws Exception {
+		this(DirectoryReader.open(FSDirectory.open(new File(indexDirectory))), seachField);
+	}
+
+	/* (non-Javadoc)
+	 * @see org.dllearner.algorithms.isle.SyntacticIndex#getDocuments(java.lang.String)
+	 */
+	@Override
+	public Set<String> getDocuments(String searchString) {
+		Set<String> documents = new HashSet<String>();
+		try {
+			Query query = parser.parse(searchString);
+			ScoreDoc[] result = searcher.search(query, getSize()).scoreDocs;
+			for (int i = 0; i < result.length; i++) {
+				Document doc = searcher.doc(result[i].doc);
+				documents.add(doc.get(searchField));
+			}
+		} catch (ParseException e) {
+			e.printStackTrace();
+		} catch (IOException e) {
+			e.printStackTrace();
+		}
+		return null;
+	}
+
+	/* (non-Javadoc)
+	 * @see org.dllearner.algorithms.isle.SyntacticIndex#getSize()
+	 */
+	@Override
+	public int getSize() {
+		return indexReader.numDocs();
+	}
+
+	/* (non-Javadoc)
+	 * @see org.dllearner.algorithms.isle.SyntacticIndex#count(java.lang.String)
+	 */
+	@Override
+	public int count(String searchString) {
+		try {
+			Query query = parser.parse(searchString);
+			TotalHitCountCollector results = new TotalHitCountCollector();
+			searcher.search(query, results);
+			return results.getTotalHits();
+		} catch (ParseException e) {
+			e.printStackTrace();
+		} catch (IOException e) {
+			e.printStackTrace();
+		}
+		return -1;
+	}
+
+}

Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/OWLOntologyLuceneSyntacticIndexCreator.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/OWLOntologyLuceneSyntacticIndexCreator.java	                        (rev 0)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/OWLOntologyLuceneSyntacticIndexCreator.java	2013-07-17 11:44:41 UTC (rev 4021)
@@ -0,0 +1,101 @@
+/**
+ * 
+ */
+package org.dllearner.algorithms.isle.index;
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.FieldType;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.Version;
+import org.semanticweb.owlapi.model.OWLAnnotation;
+import org.semanticweb.owlapi.model.OWLAnnotationProperty;
+import org.semanticweb.owlapi.model.OWLDataFactory;
+import org.semanticweb.owlapi.model.OWLEntity;
+import org.semanticweb.owlapi.model.OWLLiteral;
+import org.semanticweb.owlapi.model.OWLOntology;
+import org.semanticweb.owlapi.vocab.OWLRDFVocabulary;
+
+import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl;
+
+/**
+ * Creates a Lucene Index for the labels if classes and properties.
+ * @author Lorenz Buehmann
+ *
+ */
+public class OWLOntologyLuceneSyntacticIndexCreator {
+
+	private Directory directory = new RAMDirectory();
+	private OWLOntology ontology;
+	private Set<OWLEntity> schemaEntities;
+	
+	private OWLDataFactory df = new OWLDataFactoryImpl();
+	private OWLAnnotationProperty annotationProperty = df.getOWLAnnotationProperty(OWLRDFVocabulary.RDFS_LABEL.getIRI());
+	private String language = "en";
+	private String searchField;
+	
+	public OWLOntologyLuceneSyntacticIndexCreator(OWLOntology ontology, OWLAnnotationProperty annotationProperty, String searchField) throws IOException {
+		this.ontology = ontology;
+		this.annotationProperty = annotationProperty;
+		this.searchField = searchField;
+		
+		schemaEntities = new HashSet<OWLEntity>();
+		schemaEntities.addAll(ontology.getClassesInSignature());
+		schemaEntities.addAll(ontology.getObjectPropertiesInSignature());
+		schemaEntities.addAll(ontology.getDataPropertiesInSignature());
+	}
+	
+	public SyntacticIndex buildIndex() throws Exception{
+		Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43);
+		IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_43, analyzer);
+		IndexWriter writer = new IndexWriter(directory, indexWriterConfig);
+		System.out.println( "Creating index ..." );
+		
+		Set<Document> luceneDocuments = new HashSet<Document>();
+        FieldType stringType = new FieldType(StringField.TYPE_STORED);
+        stringType.setStoreTermVectors(false);
+        FieldType textType = new FieldType(TextField.TYPE_STORED);
+        textType.setStoreTermVectors(false);
+		
+		for (OWLEntity entity : schemaEntities) {
+			String label = null;
+			Set<OWLAnnotation> annotations = entity.getAnnotations(ontology, annotationProperty);
+			for (OWLAnnotation annotation : annotations) {
+				if (annotation.getValue() instanceof OWLLiteral) {
+		            OWLLiteral val = (OWLLiteral) annotation.getValue();
+		            if (val.hasLang(language)) {
+		            	label = val.getLiteral();
+		            }
+		        }
+			}
+			
+			if(label != null){
+				Document luceneDocument = new Document();
+	            luceneDocument.add(new Field("uri", entity.toStringID(), stringType));
+	            luceneDocument.add(new Field(searchField, label, textType));
+	            luceneDocuments.add(luceneDocument);
+			}
+			
+		}
+		writer.addDocuments(luceneDocuments);
+		
+		System.out.println("Done.");
+		writer.close();
+		
+		return new LuceneSyntacticIndex(directory, searchField);
+	}
+	
+	
+
+}

Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticIndex.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticIndex.java	                        (rev 0)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticIndex.java	2013-07-17 11:44:41 UTC (rev 4021)
@@ -0,0 +1,35 @@
+/**
+ * 
+ */
+package org.dllearner.algorithms.isle.index;
+
+import java.util.Set;
+
+import org.dllearner.core.owl.Entity;
+
+/**
+ * This class 
+ * @author Lorenz Buehmann
+ *
+ */
+public interface SemanticIndex {
+
+	/**
+	 * This method returns a set of documents for the given entity.
+	 * @param entity
+	 * @return
+	 */
+	Set<String> getDocuments(Entity entity);
+	/**
+	 * This method returns the number of documents for the given entity.
+	 * @param entity
+	 * @return
+	 */
+	int count(Entity entity);
+	/**
+	 * This methods returns the total number of documents contained in the index.
+	 * @return the total number of documents contained in the index
+	 */
+	int getSize();
+
+}

Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticIndexCreator.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticIndexCreator.java	                        (rev 0)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticIndexCreator.java	2013-07-17 11:44:41 UTC (rev 4021)
@@ -0,0 +1,22 @@
+/**
+ * 
+ */
+package org.dllearner.algorithms.isle.index;
+
+/**
+ * This gets a syntactic index and returns a semantic index by applying WSD etc.
+ * @author Lorenz Buehmann
+ *
+ */
+public class SemanticIndexCreator {
+
+	private SyntacticIndex syntacticIndex;
+
+	public SemanticIndexCreator(SyntacticIndex syntacticIndex) {
+		this.syntacticIndex = syntacticIndex;
+	}
+	
+	public SemanticIndex createSemanticIndex(){
+		return null;
+	}
+}

Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleSemanticIndex.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleSemanticIndex.java	                        (rev 0)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleSemanticIndex.java	2013-07-17 11:44:41 UTC (rev 4021)
@@ -0,0 +1,43 @@
+/**
+ * 
+ */
+package org.dllearner.algorithms.isle.index;
+
+import java.util.Set;
+
+import org.dllearner.core.owl.Entity;
+
+/**
+ * @author Lorenz Buehmann
+ *
+ */
+public class SimpleSemanticIndex implements SemanticIndex{
+	
+
+	/* (non-Javadoc)
+	 * @see org.dllearner.algorithms.isle.SemanticIndex#getDocuments(org.dllearner.core.owl.Entity)
+	 */
+	@Override
+	public Set<String> getDocuments(Entity entity) {
+		return null;
+	}
+
+	/* (non-Javadoc)
+	 * @see org.dllearner.algorithms.isle.SemanticIndex#count(java.lang.String)
+	 */
+	@Override
+	public int count(Entity entity) {
+		return 0;
+	}
+
+	/* (non-Javadoc)
+	 * @see org.dllearner.algorithms.isle.SemanticIndex#getSize()
+	 */
+	@Override
+	public int getSize() {
+		return 0;
+	}
+	
+	
+
+}

Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SyntacticIndex.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SyntacticIndex.java	                        (rev 0)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SyntacticIndex.java	2013-07-17 11:44:41 UTC (rev 4021)
@@ -0,0 +1,32 @@
+/**
+ * 
+ */
+package org.dllearner.algorithms.isle.index;
+
+import java.util.Set;
+
+/**
+ * @author Lorenz Buehmann
+ *
+ */
+public interface SyntacticIndex {
+
+	/**
+	 * This method returns a set of documents based on how the underlying index is processing the given search string.
+	 * @param searchString
+	 * @return
+	 */
+	Set<String> getDocuments(String searchString);
+	/**
+	 * This method returns the number of documents based on how the underlying index is processing the given search string.
+	 * @param searchString
+	 * @return
+	 */
+	int count(String searchString);
+	/**
+	 * This methods returns the total number of documents contained in the index.
+	 * @return the total number of documents contained in the index
+	 */
+	int getSize();
+	
+}

Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/AbstractRelevanceMetric.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/AbstractRelevanceMetric.java	                        (rev 0)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/AbstractRelevanceMetric.java	2013-07-17 11:44:41 UTC (rev 4021)
@@ -0,0 +1,54 @@
+/**
+ * 
+ */
+package org.dllearner.algorithms.isle.metrics;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.dllearner.algorithms.isle.index.SemanticIndex;
+import org.semanticweb.owlapi.model.OWLEntity;
+
+/**
+ * @author Lorenz Buehmann
+ * 
+ */
+public abstract class AbstractRelevanceMetric implements RelevanceMetric {
+
+	protected SemanticIndex index;
+
+	public AbstractRelevanceMetric(SemanticIndex index) {
+		this.index = index;
+	}
+	
+	public Map<OWLEntity,Double> normalizeMinMax( Map<OWLEntity,Double> hmEntity2Score ){
+		Map<OWLEntity,Double> hmEntity2Norm = new HashMap<OWLEntity,Double>();
+		double dMin = Double.MAX_VALUE;
+		Double dMax = Double.MIN_VALUE;
+		for( OWLEntity e : hmEntity2Score.keySet() )
+		{
+			double dValue = hmEntity2Score.get(e);
+			if( dValue < dMin ){
+				dMin = dValue;
+			}
+			else if( dValue > dMax ){
+				dMax = dValue;
+			}
+		}
+		// System.out.println( "min="+ dMin +" max="+ dMax );
+		for( OWLEntity e : hmEntity2Score.keySet() )
+		{
+			double dValue = hmEntity2Score.get(e);
+			double dNorm = 0;
+			if( dMin == dMax ){
+				dNorm = dValue;
+			} 
+			else {
+				dNorm = ( dValue - dMin ) / ( dMax - dMin );
+			}
+			hmEntity2Norm.put( e, dNorm );
+		}
+		return hmEntity2Norm;
+	}
+
+}

Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/PMIRelevanceMetric.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/PMIRelevanceMetric.java	                        (rev 0)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/PMIRelevanceMetric.java	2013-07-17 11:44:41 UTC (rev 4021)
@@ -0,0 +1,37 @@
+/**
+ * 
+ */
+package org.dllearner.algorithms.isle.metrics;
+
+import java.util.Set;
+
+import org.dllearner.algorithms.isle.index.SemanticIndex;
+import org.dllearner.core.owl.Entity;
+
+import com.google.common.collect.Sets;
+
+/**
+ * @author Lorenz Buehmann
+ *
+ */
+public class PMIRelevanceMetric extends AbstractRelevanceMetric {
+
+	public PMIRelevanceMetric(SemanticIndex index) {
+		super(index);
+	}
+
+	@Override
+	public double getRelevance(Entity entityA, Entity entityB){
+		Set<String> documentsA = index.getDocuments(entityA);
+		Set<String> documentsB = index.getDocuments(entityB);
+		Set<String> documentsAB = Sets.intersection(documentsA, documentsB);
+		int nrOfDocuments = index.getSize();
+		
+		double dPClass = nrOfDocuments == 0 ? 0 : ((double) documentsA.size() / (double) nrOfDocuments);
+		double dPClassEntity = documentsB.size() == 0 ? 0 : (double) documentsAB.size() / (double) documentsB.size();
+		double pmi = Math.log(dPClassEntity / dPClass);
+		
+		return pmi;
+	}
+
+}

Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/RelevanceMetric.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/RelevanceMetric.java	                        (rev 0)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/RelevanceMetric.java	2013-07-17 11:44:41 UTC (rev 4021)
@@ -0,0 +1,33 @@
+/**
+ * Copyright (C) 2007-2011, Jens Lehmann
+ *
+ * This file is part of DL-Learner.
+ *
+ * DL-Learner is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * DL-Learner is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+package org.dllearner.algorithms.isle.metrics;
+
+import org.dllearner.core.owl.Entity;
+
+
+public interface RelevanceMetric {
+	/**
+	 * @param entity1
+	 * @param entity2
+	 * @return
+	 */
+	double getRelevance(Entity entity1, Entity entity2);
+}
\ No newline at end of file

Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/RelevanceUtils.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/RelevanceUtils.java	                        (rev 0)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/RelevanceUtils.java	2013-07-17 11:44:41 UTC (rev 4021)
@@ -0,0 +1,50 @@
+/**
+ * 
+ */
+package org.dllearner.algorithms.isle.metrics;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+import org.dllearner.core.owl.Entity;
+import org.dllearner.utilities.owl.OWLAPIConverter;
+import org.semanticweb.owlapi.model.OWLEntity;
+import org.semanticweb.owlapi.model.OWLOntology;
+
+/**
+ * @author Lorenz Buehmann
+ *
+ */
+public class RelevanceUtils {
+	
+	public static Map<Entity, Double> getRelevantEntities(Entity entity, Set<Entity> otherEntities, RelevanceMetric metric){
+		Map<Entity, Double> relevantEntities = new HashMap<Entity, Double>();
+		
+		for (Entity otherEntity : otherEntities) {
+			double relevance = metric.getRelevance(entity, otherEntity);
+			relevantEntities.put(otherEntity, relevance);
+		}
+		
+		return relevantEntities;
+	}
+	
+	public static Map<Entity, Double> getRelevantEntities(Entity entity, OWLOntology ontology, RelevanceMetric metric){
+		Map<Entity, Double> relevantEntities = new HashMap<Entity, Double>();
+		
+		Set<OWLEntity> owlEntities = new HashSet<OWLEntity>();
+		owlEntities.addAll(ontology.getClassesInSignature());
+		owlEntities.addAll(ontology.getDataPropertiesInSignature());
+		owlEntities.addAll(ontology.getObjectPropertiesInSignature());
+		Set<Entity> otherEntities = OWLAPIConverter.getEntities(owlEntities);
+		
+		for (Entity otherEntity : otherEntities) {
+			double relevance = metric.getRelevance(entity, otherEntity);
+			relevantEntities.put(otherEntity, relevance);
+		}
+		
+		return relevantEntities;
+	}
+
+}

Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/AnnotationEntityTextRetriever.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/AnnotationEntityTextRetriever.java	                        (rev 0)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/AnnotationEntityTextRetriever.java	2013-07-17 11:44:41 UTC (rev 4021)
@@ -0,0 +1,93 @@
+/**
+ * 
+ */
+package org.dllearner.algorithms.isle.textretrieval;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+
+import org.dllearner.core.owl.Entity;
+import org.dllearner.kb.OWLAPIOntology;
+import org.dllearner.utilities.owl.OWLAPIConverter;
+import org.semanticweb.owlapi.model.IRI;
+import org.semanticweb.owlapi.model.OWLAnnotation;
+import org.semanticweb.owlapi.model.OWLAnnotationProperty;
+import org.semanticweb.owlapi.model.OWLEntity;
+import org.semanticweb.owlapi.model.OWLLiteral;
+import org.semanticweb.owlapi.model.OWLOntology;
+import org.semanticweb.owlapi.model.OWLOntologyManager;
+import org.semanticweb.owlapi.util.IRIShortFormProvider;
+import org.semanticweb.owlapi.util.SimpleIRIShortFormProvider;
+
+
+/**
+ * @author Lorenz Buehmann
+ *
+ */
+public class AnnotationEntityTextRetriever implements EntityTextRetriever{
+	
+	private OWLOntology ontology;
+	private OWLOntologyManager manager;
+	
+	private String language = "en";
+	private double weight = 1d;
+	
+	private boolean useShortFormFallback = true;
+	private IRIShortFormProvider sfp = new SimpleIRIShortFormProvider();
+	
+	private OWLAnnotationProperty[] properties;
+
+	public AnnotationEntityTextRetriever(OWLOntology ontology, OWLAnnotationProperty... properties) {
+		this.ontology = ontology;
+		this.properties = properties;
+	}
+	
+	public AnnotationEntityTextRetriever(OWLAPIOntology ontology, OWLAnnotationProperty... properties) {
+		this.ontology = ontology.createOWLOntology(manager);
+	}
+	
+	/**
+	 * @param language the language to set
+	 */
+	public void setLanguage(String language) {
+		this.language = language;
+	}
+	
+	/**
+	 * Whether to use the short form of the IRI as fallback, if no label is given.
+	 * @param useShortFormFallback the useShortFormFallback to set
+	 */
+	public void setUseShortFormFallback(boolean useShortFormFallback) {
+		this.useShortFormFallback = useShortFormFallback;
+	}
+
+	/* (non-Javadoc)
+	 * @see org.dllearner.algorithms.isle.EntityTextRetriever#getRelevantText(org.dllearner.core.owl.Entity)
+	 */
+	@Override
+	public Map<String, Double> getRelevantText(Entity entity) {
+		Map<String, Double> textWithWeight = new HashMap<String, Double>();
+		
+		OWLEntity e = OWLAPIConverter.getOWLAPIEntity(entity);
+		
+		for (OWLAnnotationProperty property : properties) {
+			Set<OWLAnnotation> annotations = e.getAnnotations(ontology, property);
+			for (OWLAnnotation annotation : annotations) {
+				if (annotation.getValue() instanceof OWLLiteral) {
+		            OWLLiteral val = (OWLLiteral) annotation.getValue();
+		            if (val.hasLang(language)) {
+		            	String label = val.getLiteral();
+		            	textWithWeight.put(label, weight);
+		            }
+		        }
+			}
+		}
+		
+		if(textWithWeight.isEmpty() && useShortFormFallback){
+			textWithWeight.put(sfp.getShortForm(IRI.create(entity.getURI())), weight);
+		}
+		
+		return textWithWeight;
+	}
+}

Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/EntityTextRetriever.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/EntityTextRetriever.java	                        (rev 0)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/EntityTe...
 
[truncated message content]

Thread: [DL-Learner SVN] SF.net SVN: dl-learner:[4021] trunk/components-core/src/main/java/org/ dllearner/a

dl-learner-svn