From: <lor...@us...> - 2014-05-07 11:25:58
|
Revision: 4261 http://sourceforge.net/p/dl-learner/code/4261 Author: lorenz_b Date: 2014-05-07 11:25:54 +0000 (Wed, 07 May 2014) Log Message: ----------- Added eprocurement eval script. Added Paths: ----------- trunk/scripts/src/main/java/org/dllearner/scripts/OWLAxiomsHTMLWriter.java trunk/scripts/src/main/java/org/dllearner/scripts/RAChallenge.java trunk/scripts/src/main/java/org/dllearner/scripts/evaluation/EProcurementUseCase.java trunk/scripts/src/main/java/org/dllearner/scripts/evaluation/QTLEvaluation.java trunk/scripts/src/main/java/org/dllearner/scripts/pattern/FixPointDetection.java trunk/scripts/src/main/java/org/dllearner/scripts/pattern/UserEvaluation.java trunk/scripts/src/main/resources/db_settings.ini Added: trunk/scripts/src/main/java/org/dllearner/scripts/OWLAxiomsHTMLWriter.java =================================================================== --- trunk/scripts/src/main/java/org/dllearner/scripts/OWLAxiomsHTMLWriter.java (rev 0) +++ trunk/scripts/src/main/java/org/dllearner/scripts/OWLAxiomsHTMLWriter.java 2014-05-07 11:25:54 UTC (rev 4261) @@ -0,0 +1,91 @@ +package org.dllearner.scripts; + +import java.io.File; +import java.io.FileOutputStream; +import java.text.DecimalFormat; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.SortedMap; +import java.util.TreeMap; + +import org.dllearner.utilities.MapUtils; +import org.semanticweb.owlapi.apibinding.OWLManager; +import org.semanticweb.owlapi.io.ToStringRenderer; +import org.semanticweb.owlapi.model.AxiomType; +import org.semanticweb.owlapi.model.IRI; +import org.semanticweb.owlapi.model.OWLAnnotationProperty; +import org.semanticweb.owlapi.model.OWLClass; +import org.semanticweb.owlapi.model.OWLClassExpression; +import org.semanticweb.owlapi.model.OWLDataFactory; +import org.semanticweb.owlapi.model.OWLEquivalentClassesAxiom; +import org.semanticweb.owlapi.model.OWLLiteral; +import org.semanticweb.owlapi.model.OWLOntology; +import org.semanticweb.owlapi.model.OWLOntologyManager; + +import uk.ac.manchester.cs.owl.owlapi.mansyntaxrenderer.ManchesterOWLSyntaxOWLObjectRendererImpl; + +public class OWLAxiomsHTMLWriter { + + public static void main(String[] args) throws Exception{ + if(args.length != 2){ + System.out.println("Usage: OWLAxiomsHTMLWriter <ontology> <targetFile>"); + } + ToStringRenderer.getInstance().setRenderer(new ManchesterOWLSyntaxOWLObjectRendererImpl()); + + String ontologyURL = args[0]; + OWLOntologyManager man = OWLManager.createOWLOntologyManager(); + OWLDataFactory dataFactory = man.getOWLDataFactory(); + OWLOntology ontology = man.loadOntologyFromOntologyDocument(new File(ontologyURL)); + OWLAnnotationProperty anProp = dataFactory.getOWLAnnotationProperty(IRI.create("http://www.dl-learner.org/ontologies/enrichment.owl#confidence")); + + StringBuilder sb = new StringBuilder(); + DecimalFormat dfPercent = new DecimalFormat("0.00%"); + sb.append("<html>\n"); + sb.append("<table border=\"3\">\n"); + sb.append("<thead><tr><th>Source Class</th><th>Equivalent Class Expression</th><th>Accuracy</th></tr></thead>\n"); + sb.append("<tbody>\n"); + + SortedMap<OWLClass, Map<OWLClassExpression, Double>> map = new TreeMap<OWLClass, Map<OWLClassExpression,Double>>(); + for (OWLEquivalentClassesAxiom axiom : ontology.getAxioms(AxiomType.EQUIVALENT_CLASSES)) { + List<OWLClassExpression> classExpressionsAsList = axiom.getClassExpressionsAsList(); + OWLClass left = classExpressionsAsList.get(0).asOWLClass(); + if(!left.toStringID().startsWith("http://dbpedia.org/ontology/"))continue;//skip not DBpedia + OWLClassExpression right = classExpressionsAsList.get(1); + OWLLiteral lit = (OWLLiteral) axiom.getAnnotations(anProp).iterator().next().getValue(); + double accuracy = lit.parseDouble(); + Map<OWLClassExpression, Double> equivalentClasses = map.get(left); + if(equivalentClasses == null){ + equivalentClasses = new HashMap<OWLClassExpression, Double>(); + map.put(left, equivalentClasses); + } + equivalentClasses.put(right, accuracy); + } + + for (Entry<OWLClass, Map<OWLClassExpression, Double>> entry : map.entrySet()) { + OWLClass cls = entry.getKey(); + Map<OWLClassExpression, Double> equivalentClasses = entry.getValue(); + List<Entry<OWLClassExpression, Double>> sorted = MapUtils.sortByValues(equivalentClasses); + sb.append("<tr><th rowspan=\"" + (sorted.size()+1) + "\">" + cls.toString() + "</th>\n"); + for (Entry<OWLClassExpression, Double> expr : sorted) { + OWLClassExpression classExpression = expr.getKey(); + Double value = expr.getValue(); + sb.append("<tr>"); + sb.append("<td>" + classExpression.toString() + "</td>"); + sb.append("<td>" + dfPercent.format(value.doubleValue()) + "</td>"); + sb.append("</tr>\n"); + } + + } + + sb.append("</tbody>\n"); + sb.append("</table>\n"); + sb.append("</html>\n"); + + FileOutputStream fos = new FileOutputStream(new File(args[1])); + fos.write(sb.toString().getBytes()); + fos.close(); + } + +} Added: trunk/scripts/src/main/java/org/dllearner/scripts/RAChallenge.java =================================================================== --- trunk/scripts/src/main/java/org/dllearner/scripts/RAChallenge.java (rev 0) +++ trunk/scripts/src/main/java/org/dllearner/scripts/RAChallenge.java 2014-05-07 11:25:54 UTC (rev 4261) @@ -0,0 +1,299 @@ +/** + * + */ +package org.dllearner.scripts; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.FilenameFilter; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.SortedSet; +import java.util.TreeSet; + +import org.dllearner.algorithms.celoe.CELOE; +import org.dllearner.algorithms.celoe.OEHeuristicRuntime; +import org.dllearner.algorithms.elcopy.ELLearningAlgorithm; +import org.dllearner.algorithms.qtl.QueryTreeFactory; +import org.dllearner.algorithms.qtl.datastructures.QueryTree; +import org.dllearner.algorithms.qtl.datastructures.impl.QueryTreeImpl; +import org.dllearner.algorithms.qtl.impl.QueryTreeFactoryImpl; +import org.dllearner.algorithms.qtl.operations.lgg.LGGGenerator; +import org.dllearner.algorithms.qtl.operations.lgg.LGGGeneratorImpl; +import org.dllearner.core.AbstractCELA; +import org.dllearner.core.KnowledgeSource; +import org.dllearner.core.owl.Individual; +import org.dllearner.core.owl.NamedClass; +import org.dllearner.kb.OWLAPIOntology; +import org.dllearner.kb.sparql.ConciseBoundedDescriptionGenerator; +import org.dllearner.kb.sparql.ConciseBoundedDescriptionGeneratorImpl; +import org.dllearner.kb.sparql.SparqlEndpoint; +import org.dllearner.learningproblems.PosNegLPStandard; +import org.dllearner.reasoning.FastInstanceChecker; +import org.dllearner.reasoning.OWLAPIReasoner; +import org.dllearner.refinementoperators.RhoDRDown; +import org.semanticweb.owlapi.apibinding.OWLManager; +import org.semanticweb.owlapi.io.ToStringRenderer; +import org.semanticweb.owlapi.model.OWLClassExpression; +import org.semanticweb.owlapi.model.OWLOntology; +import org.semanticweb.owlapi.model.OWLOntologyManager; +import org.semanticweb.owlapi.util.SimpleShortFormProvider; + +import uk.ac.manchester.cs.owl.owlapi.mansyntaxrenderer.ManchesterOWLSyntaxOWLObjectRendererImpl; + +import com.hp.hpl.jena.query.QueryExecution; +import com.hp.hpl.jena.query.QueryExecutionFactory; +import com.hp.hpl.jena.query.QuerySolution; +import com.hp.hpl.jena.query.ResultSet; +import com.hp.hpl.jena.query.ResultSetFormatter; +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.ModelFactory; +import com.hp.hpl.jena.rdf.model.RDFNode; +import com.hp.hpl.jena.rdf.model.Statement; +import com.hp.hpl.jena.rdf.model.StmtIterator; +import com.hp.hpl.jena.vocabulary.OWL; +import com.hp.hpl.jena.vocabulary.RDF; + +/** + * @author Lorenz Buehmann + * + */ +public class RAChallenge { + + + private static boolean useEL = false; + + static Map<String, String> prefixes = new HashMap<>(); + + private static String baseURI = "http://bio2rdf.org/ra.challenge:"; + + static { + +// prefixes.put("ra", "http://bio2rdf.org/ra.challenge:"); + prefixes.put("ra-voc", "http://bio2rdf.org/ra.challenge_vocabulary:"); + prefixes.put("dbsnp", "http://bio2rdf.org/dbsnp:"); + prefixes.put("xsd", "http://www.w3.org/2001/XMLSchema#"); + prefixes.put("foaf", "http://xmlns.com/foaf/0.1/"); + prefixes.put("rdfs", "http://www.w3.org/2000/01/rdf-schema#"); + prefixes.put("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#"); + prefixes.put("drug-voc", "http://bio2rdf.org/drugbank_vocabulary:"); + + + } + + public static void main(String[] args) throws Exception{ + //load the data + File dataDir = new File(args[0]); + File[] files = dataDir.listFiles(new FilenameFilter() { + @Override + public boolean accept(File dir, String name) { + return name.endsWith(".nt") || name.endsWith(".ttl") || name.endsWith(".rdf") || name.endsWith(".owl"); + } + }); + System.out.println("loading data..."); + Model model = ModelFactory.createDefaultModel(); + for (File file : files) { + model.read(new FileInputStream(file), null, "TURTLE"); + } + + analyzeData(model); + + //get the positive and negative examples via SPARQL + //<http://bio2rdf.org/ra.challenge:1877000> <http://bio2rdf.org/ra.challenge_vocabulary:non-responder> "true"^^<http://www.w3.org/2001/XMLSchema#boolean> . + System.out.println("extracting pos/neg examples..."); + SortedSet<Individual> posExamples = new TreeSet<Individual>(); + SortedSet<Individual> negExamples = new TreeSet<Individual>(); + String query = "SELECT ?s WHERE {?s <http://bio2rdf.org/ra.challenge_vocabulary:non-responder> \"true\"^^<http://www.w3.org/2001/XMLSchema#boolean>} limit 400"; + QueryExecution qe = QueryExecutionFactory.create(query, model); + ResultSet rs = qe.execSelect(); + QuerySolution qs; + while(rs.hasNext()){ + qs = rs.next(); + posExamples.add(new Individual(qs.getResource("s").getURI())); + } + query = "SELECT ?s WHERE {?s <http://bio2rdf.org/ra.challenge_vocabulary:non-responder> \"false\"^^<http://www.w3.org/2001/XMLSchema#boolean>} limit 400"; + qe = QueryExecutionFactory.create(query, model); + rs = qe.execSelect(); + while(rs.hasNext()){ + qs = rs.next(); + negExamples.add(new Individual(qs.getResource("s").getURI())); + } + qe.close(); + System.out.println("#pos examples: " + posExamples.size()); + System.out.println("#neg examples: " + negExamples.size()); + + //remove triples with property non-responder + model.remove(model.listStatements(null, model.createProperty("http://bio2rdf.org/ra.challenge_vocabulary:non-responder"), (RDFNode)null)); + + //enrich with additional data + enrich(model); + + //check the LGG +// computeLGG(model, posExamples); + + //convert JENA model to OWL API ontology + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + model.write(baos , "N-TRIPLES"); + OWLOntologyManager man = OWLManager.createOWLOntologyManager(); + OWLOntology ontology = man.loadOntologyFromOntologyDocument(new ByteArrayInputStream(baos.toByteArray())); + + //init knowledge source + KnowledgeSource ks = new OWLAPIOntology(ontology); + + //init reasoner + System.out.println("initializing reasoner..."); + OWLAPIReasoner baseReasoner = new OWLAPIReasoner(ks); +// baseReasoner.setReasonerTypeString("elk"); + baseReasoner.init(); + FastInstanceChecker rc = new FastInstanceChecker(ks); + rc.setReasonerComponent(baseReasoner); + rc.setBaseURI(baseURI); + rc.setPrefixes(prefixes); + rc.init(); + + //init learning problem + System.out.println("initializing learning problem..."); + PosNegLPStandard lp = new PosNegLPStandard(rc, posExamples, negExamples); + lp.setUseApproximations(true); + lp.init(); + + //init learning algorithm + System.out.println("initializing learning algorithm..."); + AbstractCELA la; + if(useEL){ + la = new ELLearningAlgorithm(lp, rc); + ((ELLearningAlgorithm) la).setNoisePercentage(30); + ((ELLearningAlgorithm) la).setMaxNrOfResults(50); + ((ELLearningAlgorithm) la).setTreeSearchTimeSeconds(10); + } else { + OEHeuristicRuntime heuristic = new OEHeuristicRuntime(); + heuristic.setExpansionPenaltyFactor(0.1); + la = new CELOE(lp, rc); + ((CELOE) la).setHeuristic(heuristic); + ((CELOE) la).setMaxExecutionTimeInSeconds(100); + ((CELOE) la).setNoisePercentage(50); + ((CELOE) la).setMaxNrOfResults(50); + ((CELOE) la).setWriteSearchTree(true); + ((CELOE) la).setReplaceSearchTree(true); + ((CELOE) la).setStartClass(new NamedClass("http://xmlns.com/foaf/0.1/Person")); + RhoDRDown op = new RhoDRDown(); + op.setUseHasValueConstructor(true); + op.setUseObjectValueNegation(true); + op.setReasoner(rc); + op.init(); +// ((CELOE) la).setOperator(op); + } + la.init(); + + la.start(); + } + + /** + * Do some statistical queries. + * @param model + */ + private static void analyzeData(Model model){ + String query = "SELECT (COUNT(DISTINCT ?s)AS ?cnt) WHERE {?s <http://bio2rdf.org/ra.challenge_vocabulary:has-two> ?o.} "; + QueryExecution qe = QueryExecutionFactory.create(query, model); + ResultSet rs = qe.execSelect(); + System.out.println(ResultSetFormatter.asText(rs)); + + query = "SELECT ?o (COUNT(?s) AS ?cnt) WHERE {?s <http://bio2rdf.org/ra.challenge_vocabulary:has-two> ?o." +// + "OPTIONAL{?s_res <http://bio2rdf.org/ra.challenge_vocabulary:non-responder> \"true\"^^<http://www.w3.org/2001/XMLSchema#boolean>.FILTER(?s=s_res)} " +// + "?s_non_res <http://bio2rdf.org/ra.challenge_vocabulary:non-responder> \"false\"^^<http://www.w3.org/2001/XMLSchema#boolean>" + + "}" + + " GROUP BY ?o ORDER BY DESC(?cnt)"; + qe = QueryExecutionFactory.create(query, model); + rs = qe.execSelect(); + System.out.println(ResultSetFormatter.asText(rs)); + + query = "SELECT ?o (COUNT(?s) AS ?cnt) WHERE {" + + "?s <http://bio2rdf.org/ra.challenge_vocabulary:has-two> ?o." + + "?s <http://bio2rdf.org/ra.challenge_vocabulary:non-responder> \"true\"^^<http://www.w3.org/2001/XMLSchema#boolean>." + + "}" + + " GROUP BY ?o ORDER BY DESC(?cnt)"; + qe = QueryExecutionFactory.create(query, model); + rs = qe.execSelect(); + System.out.println(ResultSetFormatter.asText(rs)); + + query = "SELECT ?o (COUNT(?s) AS ?total) (Min(?cnt_res) as ?responder) (Min(?cnt_non_res) as ?non_responder) WHERE {" + + "?s <http://bio2rdf.org/ra.challenge_vocabulary:has-two> ?o." + + "{SELECT ?o (COUNT(?s) AS ?cnt_res) WHERE {" + + "?s <http://bio2rdf.org/ra.challenge_vocabulary:has-two> ?o." + + "?s <http://bio2rdf.org/ra.challenge_vocabulary:non-responder> \"true\"^^<http://www.w3.org/2001/XMLSchema#boolean>." + + "} GROUP BY ?o}" + + "{SELECT ?o (COUNT(?s) AS ?cnt_non_res) WHERE {" + + "?s <http://bio2rdf.org/ra.challenge_vocabulary:has-two> ?o." + + "?s <http://bio2rdf.org/ra.challenge_vocabulary:non-responder> \"false\"^^<http://www.w3.org/2001/XMLSchema#boolean>." + + "}" + + " GROUP BY ?o}} GROUP BY ?o ORDER BY DESC(?total)"; + qe = QueryExecutionFactory.create(query, model); + rs = qe.execSelect(); + System.out.println(ResultSetFormatter.asText(rs)); + } + + private static void computeLGG(Model model, SortedSet<Individual> posExamples){ + QueryTreeFactory<String> queryTreeFactory = new QueryTreeFactoryImpl(); + + List<QueryTree<String>> posExampleTrees = new ArrayList<QueryTree<String>>(); + for (Individual ex : posExamples) { + QueryTreeImpl<String> tree = queryTreeFactory.getQueryTree(ex.getName(), model); + posExampleTrees.add(tree); + } + + LGGGenerator<String> lggGenerator = new LGGGeneratorImpl<String>(); + QueryTree<String> lgg = lggGenerator.getLGG(posExampleTrees); + String lggString = lgg.getStringRepresentation(true); + lggString = lggString.replace(baseURI, ""); + for (Entry<String, String> entry : prefixes.entrySet()) { + String key = entry.getKey(); + String value = entry.getValue(); + lggString = lggString.replace(value, key + ":"); + } + System.out.println(lggString); + + OWLClassExpression classExpression = lgg.asOWLClassExpression(); + ToStringRenderer.getInstance().setRenderer(new ManchesterOWLSyntaxOWLObjectRendererImpl()); + ToStringRenderer.getInstance().setShortFormProvider(new SimpleShortFormProvider()); + System.out.println(classExpression); + } + + private static void enrich(Model model) throws MalformedURLException, FileNotFoundException{ + System.out.println("enriching data..."); + SparqlEndpoint endpoint = new SparqlEndpoint(new URL("http://cu.drugbank.bio2rdf.org/sparql")); + +// StmtIterator stmtIterator = model.listStatements(null, RDF.type, (RDFNode)null); +// Model classes = ModelFactory.createDefaultModel(); +// while(stmtIterator.hasNext()){ +// Statement st = stmtIterator.next(); +// classes.add(classes.createStatement(st.getObject().asResource(), RDF.type, OWL.Class)); +// } +// classes.write(new FileOutputStream("classes.nt"), "TURTLE"); + + ConciseBoundedDescriptionGenerator cbdGen = new ConciseBoundedDescriptionGeneratorImpl(endpoint, "cache/drugbank"); + List<RDFNode> drugs = model.listObjectsOfProperty(model.getProperty("http://bio2rdf.org/ra.challenge_vocabulary:drug")).toList(); + Model drugbankData = ModelFactory.createDefaultModel(); + Model cbd; + for (RDFNode drug : drugs) { + cbd = cbdGen.getConciseBoundedDescription(drug.asResource().getURI(), 0, true); + drugbankData.add(cbd); + } + drugbankData.setNsPrefix("drug-voc", "http://bio2rdf.org/drugbank_vocabulary:"); + drugbankData.setNsPrefix("drug-res", "http://bio2rdf.org/drugbank_resource:"); + drugbankData.setNsPrefix("drug", "http://bio2rdf.org/drugbank:"); + + drugbankData.write(new FileOutputStream("drugbank.ttl"), "TURTLE", null); + model.add(drugbankData); + + + } +} Added: trunk/scripts/src/main/java/org/dllearner/scripts/evaluation/EProcurementUseCase.java =================================================================== --- trunk/scripts/src/main/java/org/dllearner/scripts/evaluation/EProcurementUseCase.java (rev 0) +++ trunk/scripts/src/main/java/org/dllearner/scripts/evaluation/EProcurementUseCase.java 2014-05-07 11:25:54 UTC (rev 4261) @@ -0,0 +1,290 @@ +/** + * + */ +package org.dllearner.scripts.evaluation; + +import static org.dllearner.utilities.examples.AutomaticNegativeExampleFinderSPARQL2.Strategy.SIBLING; +import static org.dllearner.utilities.examples.AutomaticNegativeExampleFinderSPARQL2.Strategy.SUPERCLASS; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.SortedSet; + +import org.apache.log4j.Logger; +import org.dllearner.algorithms.celoe.CELOE; +import org.dllearner.algorithms.elcopy.ELLearningAlgorithm; +import org.dllearner.algorithms.qtl.QueryTreeFactory; +import org.dllearner.algorithms.qtl.datastructures.QueryTree; +import org.dllearner.algorithms.qtl.datastructures.impl.QueryTreeImpl; +import org.dllearner.algorithms.qtl.impl.QueryTreeFactoryImpl; +import org.dllearner.algorithms.qtl.operations.lgg.LGGGenerator; +import org.dllearner.algorithms.qtl.operations.lgg.LGGGeneratorImpl; +import org.dllearner.core.AbstractCELA; +import org.dllearner.core.AbstractKnowledgeSource; +import org.dllearner.core.AbstractLearningProblem; +import org.dllearner.core.AbstractReasonerComponent; +import org.dllearner.core.owl.Individual; +import org.dllearner.core.owl.NamedClass; +import org.dllearner.kb.LocalModelBasedSparqlEndpointKS; +import org.dllearner.kb.OWLAPIOntology; +import org.dllearner.kb.sparql.ConciseBoundedDescriptionGenerator; +import org.dllearner.kb.sparql.ConciseBoundedDescriptionGeneratorImpl; +import org.dllearner.learningproblems.PosNegLPStandard; +import org.dllearner.reasoning.FastInstanceChecker; +import org.dllearner.reasoning.SPARQLReasoner; +import org.dllearner.refinementoperators.RhoDRDown; +import org.dllearner.utilities.examples.AutomaticNegativeExampleFinderSPARQL2; +import org.dllearner.utilities.owl.OWLEntityTypeAdder; +import org.semanticweb.owlapi.apibinding.OWLManager; +import org.semanticweb.owlapi.model.OWLOntology; + +import com.google.common.collect.Sets; +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.ModelFactory; +import com.hp.hpl.jena.vocabulary.OWL; +import com.hp.hpl.jena.vocabulary.RDFS; + +/** + * @author Lorenz Buehmann + * + */ +public class EProcurementUseCase { + + + private static final Logger logger = Logger.getLogger(EProcurementUseCase.class.getName()); + static final int maxNrOfPositiveExamples = 100; + static final int maxNrOfNegativeExamples = 200; + static boolean posOnly = false; + static int maxCBDDepth = 2; + static int maxNrOfResults = 100; + static int maxExecutionTimeInSeconds = 200; + static double noiseInPercentage = 50; + static boolean useNegation = false; + static boolean useAllConstructor = false; + static String testFolder = "logs/eprocurement"; + + static boolean useEL = false; + private static int maxClassExpressionDepth = 2; + + static Map<String, String> prefixes = new HashMap<String, String>();; + static { + prefixes.put("pc", "http://purl.org/procurement/public-contracts#"); + prefixes.put("rdfs", "http://www.w3.org/2000/01/rdf-schema#"); + prefixes.put("skos", "http://www.w3.org/2004/02/skos/core#"); + prefixes.put("dcterms", "http://purl.org/dc/terms/"); + prefixes.put("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#"); + prefixes.put("activities", "http://purl.org/procurement/public-contracts-activities#"); + prefixes.put("gr", "http://purl.org/goodrelations/v1#"); + prefixes.put("schema", "http://schema.org/"); + + + } + + /** + * @param args + */ + public static void main(String[] args) throws Exception{ + NamedClass posClass = new NamedClass("http://purl.org/procurement/public-contracts#SuccessfulTender"); + NamedClass negClass = new NamedClass("http://purl.org/procurement/public-contracts#UnsuccessfulTender"); + + //1. setup the knowledge base + Model model = ModelFactory.createDefaultModel(); + //the data + model.read(new FileInputStream("../test/eprocurement/dl-learner-sample-with-classes-pco.rdf"), null); + //the schema + Model schema = ModelFactory.createDefaultModel(); + schema.read(new FileInputStream("../test/eprocurement/pco.rdf"), null); + schema.add(schema.getResource("http://purl.org/procurement/public-contracts#SuccessfulTender"), + OWL.disjointWith, + schema.getResource("http://purl.org/procurement/public-contracts#UnsuccessfulTender")); + schema.add(schema.getResource("http://purl.org/procurement/public-contracts#SuccessfulTender"), + RDFS.subClassOf, + schema.getResource("http://purl.org/procurement/public-contracts#Tender")); + schema.add(schema.getResource("http://purl.org/procurement/public-contracts#UnsuccessfulTender"), + RDFS.subClassOf, + schema.getResource("http://purl.org/procurement/public-contracts#Tender")); +// schema.read(new URL("http://opendata.cz/pco/public-contracts.ttl").openStream(), null, "TURTLE"); + model.add(schema); + // get positive examples + SortedSet<Individual> positiveExamples = getExamples(model, posClass); + // get negative examples +// SortedSet<Individual> negativeExamples = getNegativeExamples(model, cls, positiveExamples); + SortedSet<Individual> negativeExamples = getExamples(model, negClass); + //get the lgg of the pos. examples +// showLGG(model, positiveExamples); + // build a sample of the kb + model = getSample(model, Sets.union(positiveExamples, negativeExamples)); + //add inferred entity types + OWLEntityTypeAdder.addEntityTypes(model); + //the ontology + model.add(schema); + //convert all into DL-Learner kb object + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + model.write(baos, "TURTLE"); + OWLOntology ontology = OWLManager.createOWLOntologyManager().loadOntologyFromOntologyDocument(new ByteArrayInputStream(baos.toByteArray())); + AbstractKnowledgeSource ks = new OWLAPIOntology(ontology); + ks.init(); + + + + //2. setup the reasoner + AbstractReasonerComponent rc = new FastInstanceChecker(ks); + rc.init(); + + //3. setup the learning problem + AbstractLearningProblem lp; +// lp = new ClassLearningProblem(rc); +// ((ClassLearningProblem)lp).setClassToDescribe(cls); +// ((ClassLearningProblem)lp).setEquivalence(true); + lp = new PosNegLPStandard(rc, positiveExamples, negativeExamples); + lp.init(); + + //4. setup the learning algorithm + AbstractCELA la; + if(useEL){ + la = new ELLearningAlgorithm(lp, rc); + ((ELLearningAlgorithm)la).setNoisePercentage(noiseInPercentage); +// ((ELLearningAlgorithm)la).setStartClass(startClass); + ((ELLearningAlgorithm)la).setIgnoredConcepts(Sets.newHashSet(posClass)); + ((ELLearningAlgorithm)la).setClassToDescribe(posClass); + ((ELLearningAlgorithm)la).setTreeSearchTimeSeconds(maxExecutionTimeInSeconds); + ((ELLearningAlgorithm)la).setMaxNrOfResults(maxNrOfResults); + ((ELLearningAlgorithm)la).setMaxClassExpressionDepth(maxClassExpressionDepth ); +// la = new ELLearningAlgorithmDisjunctive(lp, reasoner); + } else { + //set up the refinement operator and the allowed OWL constructs + RhoDRDown rop = new RhoDRDown(); + rop.setReasoner(rc); + rop.setUseNegation(useNegation); + rop.setUseAllConstructor(useAllConstructor); + rop.init(); + //build CELOE la + CELOE laTmp = new CELOE(lp, rc); + laTmp.setMaxNrOfResults(maxNrOfResults); + laTmp.setOperator(rop); + laTmp.setMaxExecutionTimeInSeconds(maxExecutionTimeInSeconds); +// laTmp.setStartClass(startClass); + laTmp.setNoisePercentage(noiseInPercentage); + new File(testFolder).mkdirs(); + laTmp.setSearchTreeFile(testFolder + "searchTree.txt"); + laTmp.setWriteSearchTree(true); +// isle.setTerminateOnNoiseReached(true); + laTmp.setIgnoredConcepts(Collections.singleton(posClass)); + laTmp.setReplaceSearchTree(true); + laTmp.setMaxExecutionTimeInSeconds(maxExecutionTimeInSeconds); + laTmp.setExpandAccuracy100Nodes(true); + la = laTmp; + } + la.init(); + + //5. run + la.start(); + + + } + + private static void showLGG(Model model, SortedSet<Individual> positiveExamples){ + LGGGenerator<String> lggGen = new LGGGeneratorImpl<String>(); + QueryTree<String> lgg = lggGen.getLGG(buildTrees(model, positiveExamples)); + String s = lgg.getStringRepresentation(); + for (Entry<String, String> entry : prefixes.entrySet()) { + s = s.replace(entry.getValue(), entry.getKey() + ":"); + } + System.out.println(s); + ((QueryTreeImpl<String>) lgg).asGraph(); + } + + private static SortedSet<Individual> getExamples(Model model, NamedClass cls){ + logger.info("Generating examples..."); + SortedSet<Individual> individuals = new SPARQLReasoner(new LocalModelBasedSparqlEndpointKS(model)).getIndividuals(cls, 1000); + List<Individual> individualsList = new ArrayList<>(individuals); +// Collections.shuffle(individualsList, new Random(1234)); + individuals.clear(); + individuals.addAll(individualsList.subList(0, Math.min(maxNrOfPositiveExamples, individualsList.size()))); + logger.info("Done. Got " + individuals.size() + ": " + individuals); + return individuals; + } + + private static SortedSet<Individual> getPositiveExamples(Model model, NamedClass cls){ + logger.info("Generating positive examples..."); + SortedSet<Individual> individuals = new SPARQLReasoner(new LocalModelBasedSparqlEndpointKS(model)).getIndividuals(cls, 1000); + List<Individual> individualsList = new ArrayList<>(individuals); +// Collections.shuffle(individualsList, new Random(1234)); + individuals.clear(); + individuals.addAll(individualsList.subList(0, Math.min(maxNrOfPositiveExamples, individualsList.size()))); + logger.info("Done. Got " + individuals.size() + ": " + individuals); + return individuals; + } + + private static SortedSet<Individual> getNegativeExamples(Model model, NamedClass classToDescribe, Set<Individual> positiveExamples){ + logger.info("Generating positive examples..."); + SortedSet<Individual> individuals = new AutomaticNegativeExampleFinderSPARQL2(new SPARQLReasoner(new LocalModelBasedSparqlEndpointKS(model))).getNegativeExamples(classToDescribe, positiveExamples, Arrays.asList(SIBLING, SUPERCLASS), maxNrOfNegativeExamples); + logger.info("Done. Got " + individuals.size() + ": " + individuals); + return individuals; + } + + private static Model getSample(Model model, Individual individual){ + logger.info("Generating sample..."); + ConciseBoundedDescriptionGenerator cbdGen = new ConciseBoundedDescriptionGeneratorImpl(model, maxCBDDepth); + Model sample = cbdGen.getConciseBoundedDescription(individual.getName(), maxCBDDepth, true); + logger.info("Done. Got " + sample.size() + " triples."); + return sample; + } + + private static Model getSample(Model model, Set<Individual> individuals){ + logger.info("Generating sample..."); + ConciseBoundedDescriptionGenerator cbdGen = new ConciseBoundedDescriptionGeneratorImpl(model, maxCBDDepth); + Model sample = ModelFactory.createDefaultModel(); + Model cbd; + for (Individual individual : individuals) { +// System.out.println("##########################"); +// System.out.println(individual); + try { + cbd = cbdGen.getConciseBoundedDescription(individual.getName(), maxCBDDepth, true); +// showTree(individual, model); + sample.add(cbd); + } catch (Exception e) { + e.printStackTrace(); + } + } + logger.info("Done. Got " + sample.size() + " triples."); + return sample; + } + + private static QueryTree<String> buildTree(Individual ind, Model model){ + QueryTreeFactory<String> qf = new QueryTreeFactoryImpl(); + QueryTreeImpl<String> queryTree = qf.getQueryTree(ind.getName(), model); + return queryTree; + } + + private static List<QueryTree<String>> buildTrees(Model model, Collection<Individual> individuals){ + List<QueryTree<String>> trees = new ArrayList<QueryTree<String>>(); + for (Individual individual : individuals) { + trees.add(buildTree(individual, getSample(model, individual))); + } + return trees; + } + + private static void showTree(Individual ind, Model model){ + QueryTree<String> tree = buildTree(ind, model); + String s = tree.getStringRepresentation(); + + for (Entry<String, String> entry : prefixes.entrySet()) { + s = s.replace(entry.getValue(), entry.getKey() + ":"); + } + System.out.println(s); + } + + +} Added: trunk/scripts/src/main/java/org/dllearner/scripts/evaluation/QTLEvaluation.java =================================================================== --- trunk/scripts/src/main/java/org/dllearner/scripts/evaluation/QTLEvaluation.java (rev 0) +++ trunk/scripts/src/main/java/org/dllearner/scripts/evaluation/QTLEvaluation.java 2014-05-07 11:25:54 UTC (rev 4261) @@ -0,0 +1,516 @@ +/** + * + */ +package org.dllearner.scripts.evaluation; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.LinkedList; +import java.util.List; +import java.util.Random; +import java.util.SortedSet; +import java.util.TreeSet; + +import org.dllearner.algorithms.qtl.QTL2; +import org.dllearner.algorithms.qtl.QTL2Disjunctive; +import org.dllearner.algorithms.qtl.QueryTreeFactory; +import org.dllearner.algorithms.qtl.datastructures.QueryTree; +import org.dllearner.algorithms.qtl.datastructures.impl.QueryTreeImpl; +import org.dllearner.algorithms.qtl.impl.QueryTreeFactoryImpl; +import org.dllearner.cli.CrossValidation; +import org.dllearner.cli.SPARQLCrossValidation; +import org.dllearner.core.ComponentInitException; +import org.dllearner.core.LearningProblemUnsupportedException; +import org.dllearner.core.owl.Individual; +import org.dllearner.kb.LocalModelBasedSparqlEndpointKS; +import org.dllearner.kb.OWLAPIOntology; +import org.dllearner.learningproblems.PosNegLP; +import org.dllearner.learningproblems.PosNegLPStandard; +import org.dllearner.reasoning.FastInstanceChecker; +import org.dllearner.reasoning.SPARQLReasoner; +import org.semanticweb.owlapi.apibinding.OWLManager; +import org.semanticweb.owlapi.model.IRI; +import org.semanticweb.owlapi.model.OWLDataFactory; +import org.semanticweb.owlapi.model.OWLOntology; +import org.semanticweb.owlapi.model.OWLOntologyChange; +import org.semanticweb.owlapi.model.OWLOntologyCreationException; +import org.semanticweb.owlapi.model.OWLOntologyManager; +import org.semanticweb.owlapi.reasoner.OWLReasoner; +import org.semanticweb.owlapi.reasoner.OWLReasonerFactory; + +import com.clarkparsia.pellet.owlapiv3.PelletReasonerFactory; +import com.google.common.collect.Lists; +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.ModelFactory; + +/** + * @author Lorenz Buehmann + * + */ +public class QTLEvaluation { + + int nrOfFolds = 10; + private int nrOfPosExamples = 100; + private int nrOfNegExamples = 100; + + List<String> posExamples = Lists.newArrayList( + "http://dl-learner.org/carcinogenesis#d1", + "http://dl-learner.org/carcinogenesis#d10", + "http://dl-learner.org/carcinogenesis#d101", + "http://dl-learner.org/carcinogenesis#d102", + "http://dl-learner.org/carcinogenesis#d103", + "http://dl-learner.org/carcinogenesis#d106", + "http://dl-learner.org/carcinogenesis#d107", + "http://dl-learner.org/carcinogenesis#d108", + "http://dl-learner.org/carcinogenesis#d11", + "http://dl-learner.org/carcinogenesis#d12", + "http://dl-learner.org/carcinogenesis#d13", + "http://dl-learner.org/carcinogenesis#d134", + "http://dl-learner.org/carcinogenesis#d135", + "http://dl-learner.org/carcinogenesis#d136", + "http://dl-learner.org/carcinogenesis#d138", + "http://dl-learner.org/carcinogenesis#d140", + "http://dl-learner.org/carcinogenesis#d141", + "http://dl-learner.org/carcinogenesis#d144", + "http://dl-learner.org/carcinogenesis#d145", + "http://dl-learner.org/carcinogenesis#d146", + "http://dl-learner.org/carcinogenesis#d147", + "http://dl-learner.org/carcinogenesis#d15", + "http://dl-learner.org/carcinogenesis#d17", + "http://dl-learner.org/carcinogenesis#d19", + "http://dl-learner.org/carcinogenesis#d192", + "http://dl-learner.org/carcinogenesis#d193", + "http://dl-learner.org/carcinogenesis#d195", + "http://dl-learner.org/carcinogenesis#d196", + "http://dl-learner.org/carcinogenesis#d197", + "http://dl-learner.org/carcinogenesis#d198", + "http://dl-learner.org/carcinogenesis#d199", + "http://dl-learner.org/carcinogenesis#d2", + "http://dl-learner.org/carcinogenesis#d20", + "http://dl-learner.org/carcinogenesis#d200", + "http://dl-learner.org/carcinogenesis#d201", + "http://dl-learner.org/carcinogenesis#d202", + "http://dl-learner.org/carcinogenesis#d203", + "http://dl-learner.org/carcinogenesis#d204", + "http://dl-learner.org/carcinogenesis#d205", + "http://dl-learner.org/carcinogenesis#d21", + "http://dl-learner.org/carcinogenesis#d22", + "http://dl-learner.org/carcinogenesis#d226", + "http://dl-learner.org/carcinogenesis#d227", + "http://dl-learner.org/carcinogenesis#d228", + "http://dl-learner.org/carcinogenesis#d229", + "http://dl-learner.org/carcinogenesis#d231", + "http://dl-learner.org/carcinogenesis#d232", + "http://dl-learner.org/carcinogenesis#d234", + "http://dl-learner.org/carcinogenesis#d236", + "http://dl-learner.org/carcinogenesis#d239", + "http://dl-learner.org/carcinogenesis#d23_2", + "http://dl-learner.org/carcinogenesis#d242", + "http://dl-learner.org/carcinogenesis#d245", + "http://dl-learner.org/carcinogenesis#d247", + "http://dl-learner.org/carcinogenesis#d249", + "http://dl-learner.org/carcinogenesis#d25", + "http://dl-learner.org/carcinogenesis#d252", + "http://dl-learner.org/carcinogenesis#d253", + "http://dl-learner.org/carcinogenesis#d254", + "http://dl-learner.org/carcinogenesis#d255", + "http://dl-learner.org/carcinogenesis#d26", + "http://dl-learner.org/carcinogenesis#d272", + "http://dl-learner.org/carcinogenesis#d275", + "http://dl-learner.org/carcinogenesis#d277", + "http://dl-learner.org/carcinogenesis#d279", + "http://dl-learner.org/carcinogenesis#d28", + "http://dl-learner.org/carcinogenesis#d281", + "http://dl-learner.org/carcinogenesis#d283", + "http://dl-learner.org/carcinogenesis#d284", + "http://dl-learner.org/carcinogenesis#d288", + "http://dl-learner.org/carcinogenesis#d29", + "http://dl-learner.org/carcinogenesis#d290", + "http://dl-learner.org/carcinogenesis#d291", + "http://dl-learner.org/carcinogenesis#d292", + "http://dl-learner.org/carcinogenesis#d30", + "http://dl-learner.org/carcinogenesis#d31", + "http://dl-learner.org/carcinogenesis#d32", + "http://dl-learner.org/carcinogenesis#d33", + "http://dl-learner.org/carcinogenesis#d34", + "http://dl-learner.org/carcinogenesis#d35", + "http://dl-learner.org/carcinogenesis#d36", + "http://dl-learner.org/carcinogenesis#d37", + "http://dl-learner.org/carcinogenesis#d38", + "http://dl-learner.org/carcinogenesis#d42", + "http://dl-learner.org/carcinogenesis#d43", + "http://dl-learner.org/carcinogenesis#d44", + "http://dl-learner.org/carcinogenesis#d45", + "http://dl-learner.org/carcinogenesis#d46", + "http://dl-learner.org/carcinogenesis#d47", + "http://dl-learner.org/carcinogenesis#d48", + "http://dl-learner.org/carcinogenesis#d49", + "http://dl-learner.org/carcinogenesis#d5", + "http://dl-learner.org/carcinogenesis#d51", + "http://dl-learner.org/carcinogenesis#d52", + "http://dl-learner.org/carcinogenesis#d53", + "http://dl-learner.org/carcinogenesis#d55", + "http://dl-learner.org/carcinogenesis#d58", + "http://dl-learner.org/carcinogenesis#d6", + "http://dl-learner.org/carcinogenesis#d7", + "http://dl-learner.org/carcinogenesis#d84", + "http://dl-learner.org/carcinogenesis#d85_2", + "http://dl-learner.org/carcinogenesis#d86", + "http://dl-learner.org/carcinogenesis#d87", + "http://dl-learner.org/carcinogenesis#d88", + "http://dl-learner.org/carcinogenesis#d89", + "http://dl-learner.org/carcinogenesis#d9", + "http://dl-learner.org/carcinogenesis#d91", + "http://dl-learner.org/carcinogenesis#d92", + "http://dl-learner.org/carcinogenesis#d93", + "http://dl-learner.org/carcinogenesis#d95", + "http://dl-learner.org/carcinogenesis#d96", + "http://dl-learner.org/carcinogenesis#d98", + "http://dl-learner.org/carcinogenesis#d99", + "http://dl-learner.org/carcinogenesis#d100", + "http://dl-learner.org/carcinogenesis#d104", + "http://dl-learner.org/carcinogenesis#d105", + "http://dl-learner.org/carcinogenesis#d109", + "http://dl-learner.org/carcinogenesis#d137", + "http://dl-learner.org/carcinogenesis#d139", + "http://dl-learner.org/carcinogenesis#d14", + "http://dl-learner.org/carcinogenesis#d142", + "http://dl-learner.org/carcinogenesis#d143", + "http://dl-learner.org/carcinogenesis#d148", + "http://dl-learner.org/carcinogenesis#d16", + "http://dl-learner.org/carcinogenesis#d18", + "http://dl-learner.org/carcinogenesis#d191", + "http://dl-learner.org/carcinogenesis#d206", + "http://dl-learner.org/carcinogenesis#d230", + "http://dl-learner.org/carcinogenesis#d233", + "http://dl-learner.org/carcinogenesis#d235", + "http://dl-learner.org/carcinogenesis#d237", + "http://dl-learner.org/carcinogenesis#d238", + "http://dl-learner.org/carcinogenesis#d23_1", + "http://dl-learner.org/carcinogenesis#d24", + "http://dl-learner.org/carcinogenesis#d240", + "http://dl-learner.org/carcinogenesis#d241", + "http://dl-learner.org/carcinogenesis#d243", + "http://dl-learner.org/carcinogenesis#d244", + "http://dl-learner.org/carcinogenesis#d246", + "http://dl-learner.org/carcinogenesis#d248", + "http://dl-learner.org/carcinogenesis#d250", + "http://dl-learner.org/carcinogenesis#d251", + "http://dl-learner.org/carcinogenesis#d27", + "http://dl-learner.org/carcinogenesis#d273", + "http://dl-learner.org/carcinogenesis#d274", + "http://dl-learner.org/carcinogenesis#d278", + "http://dl-learner.org/carcinogenesis#d286", + "http://dl-learner.org/carcinogenesis#d289", + "http://dl-learner.org/carcinogenesis#d3", + "http://dl-learner.org/carcinogenesis#d39", + "http://dl-learner.org/carcinogenesis#d4", + "http://dl-learner.org/carcinogenesis#d40", + "http://dl-learner.org/carcinogenesis#d41", + "http://dl-learner.org/carcinogenesis#d50", + "http://dl-learner.org/carcinogenesis#d54", + "http://dl-learner.org/carcinogenesis#d56", + "http://dl-learner.org/carcinogenesis#d57", + "http://dl-learner.org/carcinogenesis#d8", + "http://dl-learner.org/carcinogenesis#d85_1", + "http://dl-learner.org/carcinogenesis#d90", + "http://dl-learner.org/carcinogenesis#d94", + "http://dl-learner.org/carcinogenesis#d97", + "http://dl-learner.org/carcinogenesis#d296", + "http://dl-learner.org/carcinogenesis#d305", + "http://dl-learner.org/carcinogenesis#d306", + "http://dl-learner.org/carcinogenesis#d307", + "http://dl-learner.org/carcinogenesis#d308", + "http://dl-learner.org/carcinogenesis#d311", + "http://dl-learner.org/carcinogenesis#d314", + "http://dl-learner.org/carcinogenesis#d315", + "http://dl-learner.org/carcinogenesis#d316", + "http://dl-learner.org/carcinogenesis#d320", + "http://dl-learner.org/carcinogenesis#d322", + "http://dl-learner.org/carcinogenesis#d323", + "http://dl-learner.org/carcinogenesis#d325", + "http://dl-learner.org/carcinogenesis#d329", + "http://dl-learner.org/carcinogenesis#d330", + "http://dl-learner.org/carcinogenesis#d331", + "http://dl-learner.org/carcinogenesis#d332", + "http://dl-learner.org/carcinogenesis#d333", + "http://dl-learner.org/carcinogenesis#d336", + "http://dl-learner.org/carcinogenesis#d337" + ); + + List<String> negExamples = Lists.newArrayList( + "http://dl-learner.org/carcinogenesis#d110", + "http://dl-learner.org/carcinogenesis#d111", + "http://dl-learner.org/carcinogenesis#d114", + "http://dl-learner.org/carcinogenesis#d116", + "http://dl-learner.org/carcinogenesis#d117", + "http://dl-learner.org/carcinogenesis#d119", + "http://dl-learner.org/carcinogenesis#d121", + "http://dl-learner.org/carcinogenesis#d123", + "http://dl-learner.org/carcinogenesis#d124", + "http://dl-learner.org/carcinogenesis#d125", + "http://dl-learner.org/carcinogenesis#d127", + "http://dl-learner.org/carcinogenesis#d128", + "http://dl-learner.org/carcinogenesis#d130", + "http://dl-learner.org/carcinogenesis#d133", + "http://dl-learner.org/carcinogenesis#d150", + "http://dl-learner.org/carcinogenesis#d151", + "http://dl-learner.org/carcinogenesis#d154", + "http://dl-learner.org/carcinogenesis#d155", + "http://dl-learner.org/carcinogenesis#d156", + "http://dl-learner.org/carcinogenesis#d159", + "http://dl-learner.org/carcinogenesis#d160", + "http://dl-learner.org/carcinogenesis#d161", + "http://dl-learner.org/carcinogenesis#d162", + "http://dl-learner.org/carcinogenesis#d163", + "http://dl-learner.org/carcinogenesis#d164", + "http://dl-learner.org/carcinogenesis#d165", + "http://dl-learner.org/carcinogenesis#d166", + "http://dl-learner.org/carcinogenesis#d169", + "http://dl-learner.org/carcinogenesis#d170", + "http://dl-learner.org/carcinogenesis#d171", + "http://dl-learner.org/carcinogenesis#d172", + "http://dl-learner.org/carcinogenesis#d173", + "http://dl-learner.org/carcinogenesis#d174", + "http://dl-learner.org/carcinogenesis#d178", + "http://dl-learner.org/carcinogenesis#d179", + "http://dl-learner.org/carcinogenesis#d180", + "http://dl-learner.org/carcinogenesis#d181", + "http://dl-learner.org/carcinogenesis#d183", + "http://dl-learner.org/carcinogenesis#d184", + "http://dl-learner.org/carcinogenesis#d185", + "http://dl-learner.org/carcinogenesis#d186", + "http://dl-learner.org/carcinogenesis#d188", + "http://dl-learner.org/carcinogenesis#d190", + "http://dl-learner.org/carcinogenesis#d194", + "http://dl-learner.org/carcinogenesis#d207", + "http://dl-learner.org/carcinogenesis#d208_1", + "http://dl-learner.org/carcinogenesis#d209", + "http://dl-learner.org/carcinogenesis#d210", + "http://dl-learner.org/carcinogenesis#d211", + "http://dl-learner.org/carcinogenesis#d212", + "http://dl-learner.org/carcinogenesis#d213", + "http://dl-learner.org/carcinogenesis#d214", + "http://dl-learner.org/carcinogenesis#d215", + "http://dl-learner.org/carcinogenesis#d217", + "http://dl-learner.org/carcinogenesis#d218", + "http://dl-learner.org/carcinogenesis#d219", + "http://dl-learner.org/carcinogenesis#d220", + "http://dl-learner.org/carcinogenesis#d224", + "http://dl-learner.org/carcinogenesis#d256", + "http://dl-learner.org/carcinogenesis#d257", + "http://dl-learner.org/carcinogenesis#d258", + "http://dl-learner.org/carcinogenesis#d261", + "http://dl-learner.org/carcinogenesis#d262", + "http://dl-learner.org/carcinogenesis#d263", + "http://dl-learner.org/carcinogenesis#d264", + "http://dl-learner.org/carcinogenesis#d265", + "http://dl-learner.org/carcinogenesis#d266", + "http://dl-learner.org/carcinogenesis#d267", + "http://dl-learner.org/carcinogenesis#d269", + "http://dl-learner.org/carcinogenesis#d271", + "http://dl-learner.org/carcinogenesis#d276", + "http://dl-learner.org/carcinogenesis#d280", + "http://dl-learner.org/carcinogenesis#d285", + "http://dl-learner.org/carcinogenesis#d287", + "http://dl-learner.org/carcinogenesis#d293", + "http://dl-learner.org/carcinogenesis#d294", + "http://dl-learner.org/carcinogenesis#d59", + "http://dl-learner.org/carcinogenesis#d60", + "http://dl-learner.org/carcinogenesis#d61", + "http://dl-learner.org/carcinogenesis#d63", + "http://dl-learner.org/carcinogenesis#d64", + "http://dl-learner.org/carcinogenesis#d65", + "http://dl-learner.org/carcinogenesis#d69", + "http://dl-learner.org/carcinogenesis#d70", + "http://dl-learner.org/carcinogenesis#d71", + "http://dl-learner.org/carcinogenesis#d72", + "http://dl-learner.org/carcinogenesis#d73", + "http://dl-learner.org/carcinogenesis#d74", + "http://dl-learner.org/carcinogenesis#d75", + "http://dl-learner.org/carcinogenesis#d76", + "http://dl-learner.org/carcinogenesis#d77", + "http://dl-learner.org/carcinogenesis#d78", + "http://dl-learner.org/carcinogenesis#d79", + "http://dl-learner.org/carcinogenesis#d80", + "http://dl-learner.org/carcinogenesis#d81", + "http://dl-learner.org/carcinogenesis#d82", + "http://dl-learner.org/carcinogenesis#d112", + "http://dl-learner.org/carcinogenesis#d113", + "http://dl-learner.org/carcinogenesis#d115", + "http://dl-learner.org/carcinogenesis#d118", + "http://dl-learner.org/carcinogenesis#d120", + "http://dl-learner.org/carcinogenesis#d122", + "http://dl-learner.org/carcinogenesis#d126", + "http://dl-learner.org/carcinogenesis#d129", + "http://dl-learner.org/carcinogenesis#d131", + "http://dl-learner.org/carcinogenesis#d132", + "http://dl-learner.org/carcinogenesis#d149", + "http://dl-learner.org/carcinogenesis#d152", + "http://dl-learner.org/carcinogenesis#d153", + "http://dl-learner.org/carcinogenesis#d157", + "http://dl-learner.org/carcinogenesis#d158", + "http://dl-learner.org/carcinogenesis#d167", + "http://dl-learner.org/carcinogenesis#d168", + "http://dl-learner.org/carcinogenesis#d175", + "http://dl-learner.org/carcinogenesis#d176", + "http://dl-learner.org/carcinogenesis#d177", + "http://dl-learner.org/carcinogenesis#d182", + "http://dl-learner.org/carcinogenesis#d187", + "http://dl-learner.org/carcinogenesis#d189", + "http://dl-learner.org/carcinogenesis#d208_2", + "http://dl-learner.org/carcinogenesis#d216", + "http://dl-learner.org/carcinogenesis#d221", + "http://dl-learner.org/carcinogenesis#d222", + "http://dl-learner.org/carcinogenesis#d223", + "http://dl-learner.org/carcinogenesis#d225", + "http://dl-learner.org/carcinogenesis#d259", + "http://dl-learner.org/carcinogenesis#d260", + "http://dl-learner.org/carcinogenesis#d268", + "http://dl-learner.org/carcinogenesis#d270", + "http://dl-learner.org/carcinogenesis#d282", + "http://dl-learner.org/carcinogenesis#d295", + "http://dl-learner.org/carcinogenesis#d62", + "http://dl-learner.org/carcinogenesis#d66", + "http://dl-learner.org/carcinogenesis#d67", + "http://dl-learner.org/carcinogenesis#d68", + "http://dl-learner.org/carcinogenesis#d83", + "http://dl-learner.org/carcinogenesis#d297", + "http://dl-learner.org/carcinogenesis#d298", + "http://dl-learner.org/carcinogenesis#d299", + "http://dl-learner.org/carcinogenesis#d300", + "http://dl-learner.org/carcinogenesis#d302", + "http://dl-learner.org/carcinogenesis#d303", + "http://dl-learner.org/carcinogenesis#d304", + "http://dl-learner.org/carcinogenesis#d309", + "http://dl-learner.org/carcinogenesis#d312", + "http://dl-learner.org/carcinogenesis#d313", + "http://dl-learner.org/carcinogenesis#d317", + "http://dl-learner.org/carcinogenesis#d318", + "http://dl-learner.org/carcinogenesis#d319", + "http://dl-learner.org/carcinogenesis#d324", + "http://dl-learner.org/carcinogenesis#d326", + "http://dl-learner.org/carcinogenesis#d327", + "http://dl-learner.org/carcinogenesis#d328", + "http://dl-learner.org/carcinogenesis#d334", + "http://dl-learner.org/carcinogenesis#d335" + ); + + private Model model; + private OWLOntology ontology; + private QueryTreeFactory<String> queryTreeFactory; + private List<QueryTree<String>> posExampleTrees; + private List<QueryTree<String>> negExampleTrees; + private PosNegLP lp; + + + + public QTLEvaluation() throws ComponentInitException { + queryTreeFactory = new QueryTreeFactoryImpl(); + queryTreeFactory.setMaxDepth(3); + + loadDataset(); + + loadExamples(); + } + + private void loadDataset(){ + File file = new File("../examples/carcinogenesis/carcinogenesis.owl"); + model = ModelFactory.createDefaultModel(); + try { + model.read(new FileInputStream(file), null, "RDF/XML"); + } catch (FileNotFoundException e) { + e.printStackTrace(); + } + + OWLOntologyManager man = OWLManager.createOWLOntologyManager(); + try { + ontology = man.loadOntologyFromOntologyDocument(file); + } catch (OWLOntologyCreationException e) { + e.printStackTrace(); + } + } + + private void loadExamples() throws ComponentInitException{ + + Collections.shuffle(posExamples, new Random(1)); + Collections.shuffle(negExamples, new Random(2)); + posExamples = posExamples.subList(0, Math.min(posExamples.size(), nrOfPosExamples)); + negExamples = negExamples.subList(0, Math.min(negExamples.size(), nrOfNegExamples)); + +// posExamples.clear(); +// String string = "http://dl-learner.org/carcinogenesis#d101, http://dl-learner.org/carcinogenesis#d103, http://dl-learner.org/carcinogenesis#d107, http://dl-learner.org/carcinogenesis#d108, http://dl-learner.org/carcinogenesis#d135, http://dl-learner.org/carcinogenesis#d139, http://dl-learner.org/carcinogenesis#d14, http://dl-learner.org/carcinogenesis#d141, http://dl-learner.org/carcinogenesis#d143, http://dl-learner.org/carcinogenesis#d147, http://dl-learner.org/carcinogenesis#d17, http://dl-learner.org/carcinogenesis#d19, http://dl-learner.org/carcinogenesis#d193, http://dl-learner.org/carcinogenesis#d198, http://dl-learner.org/carcinogenesis#d228, http://dl-learner.org/carcinogenesis#d236, http://dl-learner.org/carcinogenesis#d242, http://dl-learner.org/carcinogenesis#d244, http://dl-learner.org/carcinogenesis#d273, http://dl-learner.org/carcinogenesis#d275, http://dl-learner.org/carcinogenesis#d28, http://dl-learner.org/carcinogenesis#d283, http://dl-learner.org/carcinogenesis#d286, http://dl-learner.org/carcinogenesis#d291, http://dl-learner.org/carcinogenesis#d292, http://dl-learner.org/carcinogenesis#d307, http://dl-learner.org/carcinogenesis#d31, http://dl-learner.org/carcinogenesis#d325, http://dl-learner.org/carcinogenesis#d33, http://dl-learner.org/carcinogenesis#d333, http://dl-learner.org/carcinogenesis#d34, http://dl-learner.org/carcinogenesis#d36, http://dl-learner.org/carcinogenesis#d38, http://dl-learner.org/carcinogenesis#d4, http://dl-learner.org/carcinogenesis#d40, http://dl-learner.org/carcinogenesis#d44, http://dl-learner.org/carcinogenesis#d51, http://dl-learner.org/carcinogenesis#d85_2, http://dl-learner.org/carcinogenesis#d98, http://dl-learner.org/carcinogenesis#d99"; +// String[] split = string.split(","); +// for (String s : split) { +// posExamples.add(s.trim()); +// } +// negExamples.clear(); +// string = "http://dl-learner.org/carcinogenesis#d112, http://dl-learner.org/carcinogenesis#d116, http://dl-learner.org/carcinogenesis#d117, http://dl-learner.org/carcinogenesis#d119, http://dl-learner.org/carcinogenesis#d157, http://dl-learner.org/carcinogenesis#d160, http://dl-learner.org/carcinogenesis#d161, http://dl-learner.org/carcinogenesis#d162, http://dl-learner.org/carcinogenesis#d163, http://dl-learner.org/carcinogenesis#d167, http://dl-learner.org/carcinogenesis#d169, http://dl-learner.org/carcinogenesis#d175, http://dl-learner.org/carcinogenesis#d177, http://dl-learner.org/carcinogenesis#d184, http://dl-learner.org/carcinogenesis#d194, http://dl-learner.org/carcinogenesis#d208_2, http://dl-learner.org/carcinogenesis#d209, http://dl-learner.org/carcinogenesis#d217, http://dl-learner.org/carcinogenesis#d256, http://dl-learner.org/carcinogenesis#d257, http://dl-learner.org/carcinogenesis#d260, http://dl-learner.org/carcinogenesis#d271, http://dl-learner.org/carcinogenesis#d276, http://dl-learner.org/carcinogenesis#d282, http://dl-learner.org/carcinogenesis#d287, http://dl-learner.org/carcinogenesis#d294, http://dl-learner.org/carcinogenesis#d298, http://dl-learner.org/carcinogenesis#d300, http://dl-learner.org/carcinogenesis#d309, http://dl-learner.org/carcinogenesis#d319, http://dl-learner.org/carcinogenesis#d326, http://dl-learner.org/carcinogenesis#d328, http://dl-learner.org/carcinogenesis#d334, http://dl-learner.org/carcinogenesis#d60, http://dl-learner.org/carcinogenesis#d61, http://dl-learner.org/carcinogenesis#d66, http://dl-learner.org/carcinogenesis#d75, http://dl-learner.org/carcinogenesis#d79, http://dl-learner.org/carcinogenesis#d80, http://dl-learner.org/carcinogenesis#d83"; +// split = string.split(","); +// for (String s : split) { +// negExamples.add(s.trim()); +// } + + posExampleTrees = new ArrayList<QueryTree<String>>(); + for (String ex : posExamples) { + QueryTreeImpl<String> tree = queryTreeFactory.getQueryTree(ex, model); + posExampleTrees.add(tree); + } + + negExampleTrees = new ArrayList<QueryTree<String>>(); + for (String ex : negExamples) { + QueryTreeImpl<String> tree = queryTreeFactory.getQueryTree(ex, model); + negExampleTrees.add(tree); + } + + int cnt = 1; + for(QueryTree<String> tree : posExampleTrees){ +// System.out.println("TREE " + cnt); +// tree.dump(); +// +// System.out.println("-----------------------------"); + cnt++; +// System.out.println(((QueryTreeImpl<String>)tree).toQuery()); + } + + SortedSet<Individual> pos = new TreeSet<Individual>(); + for (String ex : posExamples) { + pos.add(new Individual(ex)); + } + SortedSet<Individual> neg = new TreeSet<Individual>(); + for (String ex : negExamples) { + neg.add(new Individual(ex)); + } + lp = new PosNegLPStandard(); + lp.setPositiveExamples(pos); + lp.setNegativeExamples(neg); + } + + public void run(boolean multiThreaded) throws ComponentInitException, LearningProblemUnsupportedException{ + long startTime = System.currentTimeMillis(); + FastInstanceChecker reasoner = new FastInstanceChecker(new OWLAPIOntology(ontology)); + reasoner.init(); + lp.setReasoner(reasoner); + lp.init(); + QTL2Disjunctive la = new QTL2Disjunctive(lp, reasoner); + la.init(); + la.start(); + +// CrossValidation.outputFile = new File("log/qtl-cv.log"); +// CrossValidation.writeToFile = true; +// CrossValidation.multiThreaded = multiThreaded; +// CrossValidation cv = new CrossValidation(la, lp, reasoner, nrOfFolds, false); + long endTime = System.currentTimeMillis(); + System.err.println((endTime - startTime) + "ms"); + } + + + public static void main(String[] args) throws Exception { + boolean multiThreaded = Boolean.valueOf(args[0]); + new QTLEvaluation().run(multiThreaded); + } + +} Added: trunk/scripts/src/main/java/org/dllearner/scripts/pattern/FixPointDetection.java =================================================================== --- trunk/scripts/src/main/java/org/dllearner/scripts/pattern/FixPointDetection.java (rev 0) +++ trunk/scripts/src/main/java/org/dllearner/scripts/pattern/FixPointDetection.java 2014-05-07 11:25:54 UTC (rev 4261) @@ -0,0 +1,281 @@ +/** + * + */ +package org.dllearner.scripts.pattern; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.io.StringWriter; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Random; +import java.util.Set; +import java.util.TreeMap; +import java.util.prefs.Preferences; + +import org.ini4j.IniPreferences; +import org.ini4j.InvalidFileFormatException; +import org.semanticweb.owlapi.apibinding.OWLManager; +import org.semanticweb.owlapi.io.OWLObjectRenderer; +import org.semanticweb.owlapi.model.OWLAxiom; +import org.semanticweb.owlapi.model.OWLOntology; +import org.semanticweb.owlapi.model.OWLOntologyCreationException; +import org.semanticweb.owlapi.model.OWLOntologyManager; + +import uk.ac.manchester.cs.owl.owlapi.mansyntaxrenderer.ManchesterOWLSyntaxOWLObjectRendererImpl; + +import com.google.common.base.Charsets; +import com.google.common.base.Joiner; +import com.google.common.collect.LinkedListMultimap; +import com.google.common.collect.Multimap; +import com.google.common.io.Files; + +/** + * @author Lorenz Buehmann + * + */ +public class FixPointDetection { + + private OWLObjectRenderer axiomRenderer = new ManchesterOWLSyntaxOWLObjectRendererImpl(); + + public FixPointDetection() { + initDBConnection(); + + File dir = new File("pattern-fixpoint"); + dir.mkdir(); + } + + private Connection conn; + private PreparedStatement ps; + + private void initDBConnection() { + try { + InputStream is = this.getClass().getClassLoader().getResourceAsStream("db_settings.ini"); + Preferences prefs = new IniPreferences(is); + String dbServer = prefs.node("database").get("server", null); + String dbName = prefs.node("database").get("name", null); + String dbUser = prefs.node("database").get("user", null); + String dbPass = prefs.node("database").get("pass", null); + + Class.forName("com.mysql.jdbc.Driver"); + String url = "jdbc:mysql://" + dbServer + "/" + dbName; + conn = DriverManager.getConnection(url, dbUser, dbPass); + } catch (ClassNotFoundException e) { + e.printStackTrace(); + } catch (SQLException e) { + e.printStackTrace(); + } catch (InvalidFileFormatException e) { + e.printStackTrace(); + } catch (FileNotFoundException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } + + try { + ps = conn.prepareStatement("SELECT occurrences FROM Ontology_Pattern WHERE pattern_id=? AND ontology_id=?"); + } catch (SQLException e) { + e.printStackTrace(); + } + } + + + private List<Integer> getProcessedOntologies() throws SQLException{ + List<Integer> ids = new ArrayList<Integer>(); + ResultSet rs = conn.createStatement().executeQuery("SELECT DISTINCT id FROM Ontology"); + while(rs.next()){ + int id = rs.getInt(1); + ... [truncated message content] |