From: <jen...@us...> - 2011-12-09 09:23:00
|
Revision: 3493 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3493&view=rev Author: jenslehmann Date: 2011-12-09 09:22:49 +0000 (Fri, 09 Dec 2011) Log Message: ----------- basic LOD enrichment script done Modified Paths: -------------- trunk/interfaces/src/main/java/org/dllearner/cli/Enrichment.java trunk/interfaces/src/main/java/org/dllearner/cli/GlobalEnrichment.java Modified: trunk/interfaces/src/main/java/org/dllearner/cli/Enrichment.java =================================================================== --- trunk/interfaces/src/main/java/org/dllearner/cli/Enrichment.java 2011-12-08 16:59:38 UTC (rev 3492) +++ trunk/interfaces/src/main/java/org/dllearner/cli/Enrichment.java 2011-12-09 09:22:49 UTC (rev 3493) @@ -155,7 +155,7 @@ public class Enrichment { // data structure for holding the result of an algorithm run - private class AlgorithmRun { + protected class AlgorithmRun { // we only store the algorithm class and not the learning algorithm object, // since otherwise we run into memory problems for full enrichment @@ -521,7 +521,7 @@ /* * Generates list of OWL axioms. */ - private List<OWLAxiom> toRDF(List<EvaluatedAxiom> evalAxioms, Class<? extends LearningAlgorithm> algorithm, Map<ConfigOption,Object> parameters, SparqlEndpointKS ks){ + List<OWLAxiom> toRDF(List<EvaluatedAxiom> evalAxioms, Class<? extends LearningAlgorithm> algorithm, Map<ConfigOption,Object> parameters, SparqlEndpointKS ks){ return toRDF(evalAxioms, algorithm, parameters, ks, null); } @@ -653,7 +653,7 @@ // return model; // } - private Model getModel(List<OWLAxiom> axioms) { + Model getModel(List<OWLAxiom> axioms) { Model model = ModelFactory.createDefaultModel(); try { OWLOntology ontology = OWLManager.createOWLOntologyManager().createOntology(new HashSet<OWLAxiom>(axioms)); Modified: trunk/interfaces/src/main/java/org/dllearner/cli/GlobalEnrichment.java =================================================================== --- trunk/interfaces/src/main/java/org/dllearner/cli/GlobalEnrichment.java 2011-12-08 16:59:38 UTC (rev 3492) +++ trunk/interfaces/src/main/java/org/dllearner/cli/GlobalEnrichment.java 2011-12-09 09:22:49 UTC (rev 3493) @@ -19,16 +19,33 @@ */ package org.dllearner.cli; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.lang.reflect.InvocationTargetException; import java.net.MalformedURLException; import java.net.URL; +import java.util.HashMap; import java.util.LinkedList; import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import org.apache.log4j.ConsoleAppender; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.apache.log4j.SimpleLayout; +import org.dllearner.cli.Enrichment.AlgorithmRun; +import org.dllearner.core.ComponentInitException; +import org.dllearner.core.LearningProblemUnsupportedException; +import org.dllearner.kb.SparqlEndpointKS; import org.dllearner.kb.sparql.SparqlEndpoint; import org.dllearner.kb.sparql.SparqlQuery; +import org.semanticweb.owlapi.model.OWLAxiom; import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSet; +import com.hp.hpl.jena.rdf.model.Model; /** * Enriches all of the LOD cloud. @@ -38,36 +55,89 @@ */ public class GlobalEnrichment { + // parameters + private static double threshold = 0.8; + private static int nrOfAxiomsToLearn = 10; + private static boolean useInference = true; + + // directory for generated schemata + private static String baseDir = "log/lod-enriched/"; + /** * @param args * @throws MalformedURLException + * @throws LearningProblemUnsupportedException + * @throws NoSuchMethodException + * @throws InvocationTargetException + * @throws IllegalAccessException + * @throws InstantiationException + * @throws ComponentInitException + * @throws SecurityException + * @throws IllegalArgumentException + * @throws FileNotFoundException */ - public static void main(String[] args) throws MalformedURLException { - // get all SPARQL endpoints and their graphs - List<SparqlEndpoint> endpoints = new LinkedList<SparqlEndpoint>(); + public static void main(String[] args) throws MalformedURLException, IllegalArgumentException, SecurityException, ComponentInitException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException, LearningProblemUnsupportedException, FileNotFoundException { + SimpleLayout layout = new SimpleLayout(); + ConsoleAppender consoleAppender = new ConsoleAppender(layout); + Logger.getRootLogger().setLevel(Level.WARN); + Logger.getLogger("org.dllearner").setLevel(Level.WARN); // seems to be needed for some reason (?) + Logger.getRootLogger().removeAllAppenders(); + Logger.getRootLogger().addAppender(consoleAppender); + + // get all SPARQL endpoints and their graphs - the key is a name-identifier + Map<String,SparqlEndpoint> endpoints = new HashMap<String,SparqlEndpoint>(); + String query = ""; - query += "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "; - query += "PREFIX void: <http://rdfs.org/ns/void#> "; - query += "PREFIX dcterms: <http://purl.org/dc/terms/> "; - query += "SELECT ?endpoint "; - query += "WHERE { "; - query += "?item rdf:type void:Dataset . "; - query += "?item dcterms:isPartOf <http://ckan.net/group/lodcloud> . "; - query += "?item void:sparqlEndpoint ?endpoint . "; - query += "}"; + query += "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> \n"; + query += "PREFIX void: <http://rdfs.org/ns/void#> \n"; + query += "PREFIX dcterms: <http://purl.org/dc/terms/> \n"; + query += "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> \n"; + query += "PREFIX ov: <http://open.vocab.org/terms/> \n"; + query += "SELECT * \n"; + query += "WHERE { \n"; + query += " ?item rdf:type void:Dataset . \n"; + query += " ?item dcterms:isPartOf <http://ckan.net/group/lodcloud> . \n"; + query += " ?item void:sparqlEndpoint ?endpoint . \n"; +// query += " ?item dcterms:subject ?subject . \n"; +// query += " ?item rdfs:label ?label . \n"; + query += " ?item ov:shortName ?shortName . \n"; + query += "}"; // query += "LIMIT 20"; + System.out.println("Getting list of SPARQL endpoints from LATC DSI:"); + System.out.println(query); - // LATC DSI/MDS + // contact LATC DSI/MDS SparqlEndpoint dsi = new SparqlEndpoint(new URL("http://api.talis.com/stores/latc-mds/services/sparql")); SparqlQuery sq = new SparqlQuery(query, dsi); ResultSet rs = sq.send(); while(rs.hasNext()) { QuerySolution qs = rs.next(); String endpoint = qs.get("endpoint").toString(); -// String graph = qs.getLiteral("graph").getString(); - System.out.println(endpoint); + String shortName = qs.get("shortName").toString(); + endpoints.put(shortName, new SparqlEndpoint(new URL(endpoint))); } + System.out.println(endpoints.size() + " endpoints detected."); + + // perform enrichment on endpoints + for(Entry<String,SparqlEndpoint> endpoint : endpoints.entrySet()) { + // run enrichment + SparqlEndpoint se = endpoint.getValue(); + String name = endpoint.getKey(); + System.out.println("Enriching " + name + " using " + se); + Enrichment e = new Enrichment(se, null, threshold, nrOfAxiomsToLearn, useInference, false); + e.start(); + // save results to a file + SparqlEndpointKS ks = new SparqlEndpointKS(se); + List<AlgorithmRun> runs = e.getAlgorithmRuns(); + List<OWLAxiom> axioms = new LinkedList<OWLAxiom>(); + for(AlgorithmRun run : runs) { + axioms.addAll(e.toRDF(run.getAxioms(), run.getAlgorithm(), run.getParameters(), ks)); + } + Model model = e.getModel(axioms); + File f = new File(baseDir + name + ".ttl"); + model.write(new FileOutputStream(f), "TURTLE"); + } } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2011-12-13 11:58:47
|
Revision: 3502 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3502&view=rev Author: lorenz_b Date: 2011-12-13 11:58:38 +0000 (Tue, 13 Dec 2011) Log Message: ----------- Made global enrichment script multi-threaded. Modified Paths: -------------- trunk/interfaces/src/main/java/org/dllearner/cli/Enrichment.java trunk/interfaces/src/main/java/org/dllearner/cli/GlobalEnrichment.java Modified: trunk/interfaces/src/main/java/org/dllearner/cli/Enrichment.java =================================================================== --- trunk/interfaces/src/main/java/org/dllearner/cli/Enrichment.java 2011-12-13 11:57:29 UTC (rev 3501) +++ trunk/interfaces/src/main/java/org/dllearner/cli/Enrichment.java 2011-12-13 11:58:38 UTC (rev 3502) @@ -278,8 +278,8 @@ SPARQLTasks st = new SPARQLTasks(se); //check if endpoint supports SPARQL 1.1 - boolean supportsSPARQL_1_1 = st.supportsSPARQL_1_1(); - ks.setSupportsSPARQL_1_1(supportsSPARQL_1_1); +// boolean supportsSPARQL_1_1 = st.supportsSPARQL_1_1(); +// ks.setSupportsSPARQL_1_1(supportsSPARQL_1_1); if(useInference){ reasoner = new SPARQLReasoner(ks); Modified: trunk/interfaces/src/main/java/org/dllearner/cli/GlobalEnrichment.java =================================================================== --- trunk/interfaces/src/main/java/org/dllearner/cli/GlobalEnrichment.java 2011-12-13 11:57:29 UTC (rev 3501) +++ trunk/interfaces/src/main/java/org/dllearner/cli/GlobalEnrichment.java 2011-12-13 11:58:38 UTC (rev 3502) @@ -32,9 +32,10 @@ import java.util.Map; import java.util.Map.Entry; import java.util.TreeSet; +import java.util.concurrent.ArrayBlockingQueue; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; -import javax.xml.ws.http.HTTPException; - import org.apache.log4j.ConsoleAppender; import org.apache.log4j.Level; import org.apache.log4j.Logger; @@ -70,6 +71,15 @@ // directory for generated schemata private static String baseDir = "log/lod-enriched/"; + + //parameters for thread pool + //Parallel running Threads(Executor) on System + private static int corePoolSize = 10; + //Maximum Threads allowed in Pool + private static int maximumPoolSize = 20; + //Keep alive time for waiting threads for jobs(Runnable) + private static long keepAliveTime = 10; + /** * @param args * @throws MalformedURLException @@ -129,58 +139,91 @@ TreeSet<String> blacklist = new TreeSet<String>(); blacklist.add("rkb-explorer-crime"); // computation never completes + ArrayBlockingQueue<Runnable> workQueue = new ArrayBlockingQueue<Runnable>(endpoints.size()); + ThreadPoolExecutor threadPool = new ThreadPoolExecutor(corePoolSize, maximumPoolSize, keepAliveTime, TimeUnit.SECONDS, workQueue); + + // perform enrichment on endpoints - for(Entry<String,SparqlEndpoint> endpoint : endpoints.entrySet()) { - // run enrichment - SparqlEndpoint se = endpoint.getValue(); - String name = endpoint.getKey(); + for(final Entry<String,SparqlEndpoint> endpoint : endpoints.entrySet()) { - File f = new File(baseDir + name + ".ttl"); - File log = new File(baseDir + name + ".log"); + threadPool.execute(new Runnable() { + + @Override + public void run() { + // run enrichment + SparqlEndpoint se = endpoint.getValue(); + String name = endpoint.getKey(); + + File f = new File(baseDir + name + ".ttl"); + File log = new File(baseDir + name + ".log"); + + System.out.println("Enriching " + name + " using " + se.getURL()); + Enrichment e = new Enrichment(se, null, threshold, nrOfAxiomsToLearn, useInference, false); + + e.maxEntitiesPerType = 3; // hack for faster testing of endpoints + +// if(blacklist.contains(name)) { +// continue; +// } + + boolean success = false; + // run enrichment script - we make a case distinguish to see which kind of problems we get + // (could be interesting for statistics later on) + try { + e.start(); + success = true; + } catch(StackOverflowError error) { + try { + error.printStackTrace(new PrintStream(log)); + } catch (FileNotFoundException e1) { + // TODO Auto-generated catch block + e1.printStackTrace(); + } + Files.appendToFile(log, "stack overflows could be caused by cycles in class hierarchies"); + error.printStackTrace(); + } catch(ResultSetException ex) { + try { + ex.printStackTrace(new PrintStream(log)); + } catch (FileNotFoundException e1) { + // TODO Auto-generated catch block + e1.printStackTrace(); + } + Files.appendToFile(log, ex.getMessage()); + ex.printStackTrace(); + } catch(QueryExceptionHTTP ex) { + try { + ex.printStackTrace(new PrintStream(log)); + } catch (FileNotFoundException e1) { + // TODO Auto-generated catch block + e1.printStackTrace(); + } + Files.appendToFile(log, ex.getMessage()); + ex.printStackTrace(); + } + catch(Exception ex) { + System.out.println("class of exception: " + ex.getClass()); + } + + // save results to a file (TODO: check if enrichment format + if(success) { + SparqlEndpointKS ks = new SparqlEndpointKS(se); + List<AlgorithmRun> runs = e.getAlgorithmRuns(); + List<OWLAxiom> axioms = new LinkedList<OWLAxiom>(); + for(AlgorithmRun run : runs) { + axioms.addAll(e.toRDF(run.getAxioms(), run.getAlgorithm(), run.getParameters(), ks)); + } + Model model = e.getModel(axioms); + try { + model.write(new FileOutputStream(f), "TURTLE"); + } catch (FileNotFoundException e1) { + // TODO Auto-generated catch block + e1.printStackTrace(); + } + } + + } + }); - System.out.println("Enriching " + name + " using " + se); - Enrichment e = new Enrichment(se, null, threshold, nrOfAxiomsToLearn, useInference, false); - - e.maxEntitiesPerType = 3; // hack for faster testing of endpoints - - if(blacklist.contains(name)) { - continue; - } - - boolean success = false; - // run enrichment script - we make a case distinguish to see which kind of problems we get - // (could be interesting for statistics later on) - try { - e.start(); - success = true; - } catch(StackOverflowError error) { - error.printStackTrace(new PrintStream(log)); - Files.appendToFile(log, "stack overflows could be caused by cycles in class hierarchies"); - error.printStackTrace(); - } catch(ResultSetException ex) { - ex.printStackTrace(new PrintStream(log)); - Files.appendToFile(log, ex.getMessage()); - ex.printStackTrace(); - } catch(QueryExceptionHTTP ex) { - ex.printStackTrace(new PrintStream(log)); - Files.appendToFile(log, ex.getMessage()); - ex.printStackTrace(); - } -// catch(Exception ex) { -// System.out.println("class of exception: " + ex.getClass()); -// } - - // save results to a file (TODO: check if enrichment format - if(success) { - SparqlEndpointKS ks = new SparqlEndpointKS(se); - List<AlgorithmRun> runs = e.getAlgorithmRuns(); - List<OWLAxiom> axioms = new LinkedList<OWLAxiom>(); - for(AlgorithmRun run : runs) { - axioms.addAll(e.toRDF(run.getAxioms(), run.getAlgorithm(), run.getParameters(), ks)); - } - Model model = e.getModel(axioms); - model.write(new FileOutputStream(f), "TURTLE"); - } } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-02-05 18:25:58
|
Revision: 3564 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3564&view=rev Author: lorenz_b Date: 2012-02-05 18:25:51 +0000 (Sun, 05 Feb 2012) Log Message: ----------- Added to CLI options to set query chunk size and max execution time in seconds. Modified Paths: -------------- trunk/interfaces/src/main/java/org/dllearner/cli/Enrichment.java trunk/interfaces/src/main/java/org/dllearner/cli/GlobalEnrichment.java Modified: trunk/interfaces/src/main/java/org/dllearner/cli/Enrichment.java =================================================================== --- trunk/interfaces/src/main/java/org/dllearner/cli/Enrichment.java 2012-02-02 13:17:12 UTC (rev 3563) +++ trunk/interfaces/src/main/java/org/dllearner/cli/Enrichment.java 2012-02-05 18:25:51 UTC (rev 3564) @@ -208,6 +208,7 @@ // some learners) private int nrOfAxiomsToLearn = 10; private double threshold = 0.7; + private int chunksize = 1000; private boolean useInference; private SPARQLReasoner reasoner; @@ -231,13 +232,15 @@ private Set<OWLAxiom> learnedOWLAxioms; private Set<EvaluatedAxiom> learnedEvaluatedAxioms; - public Enrichment(SparqlEndpoint se, Entity resource, double threshold, int nrOfAxiomsToLearn, boolean useInference, boolean verbose) { + public Enrichment(SparqlEndpoint se, Entity resource, double threshold, int nrOfAxiomsToLearn, boolean useInference, boolean verbose, int chunksize, int maxExecutionTimeInSeconds) { this.se = se; this.resource = resource; this.verbose = verbose; this.threshold = threshold; this.nrOfAxiomsToLearn = nrOfAxiomsToLearn; this.useInference = useInference; + this.chunksize = chunksize; + this.maxExecutionTimeInSeconds = maxExecutionTimeInSeconds; try { cacheDir = "cache" + File.separator + URLEncoder.encode(se.getURL().toString(), "UTF-8"); @@ -487,6 +490,7 @@ } ConfigHelper.configure(learner, "maxExecutionTimeInSeconds", maxExecutionTimeInSeconds); + ((AbstractAxiomLearningAlgorithm)learner).setLimit(chunksize); learner.init(); if(reasoner != null){ ((AbstractAxiomLearningAlgorithm)learner).setReasoner(reasoner); @@ -801,6 +805,11 @@ .withRequiredArg().ofType(File.class); parser.acceptsAll(asList("a", "annotations"), "Specifies whether to save scores as annotations.").withOptionalArg().ofType(Boolean.class).defaultsTo(true); + parser.acceptsAll(asList("chunksize"), + "Specifies the chunk size for the query result as the approach is incrementally.").withRequiredArg().ofType(Integer.class).defaultsTo(1000); + parser.acceptsAll(asList("maxExecutionTimeInSeconds"), + "Specifies the max execution time for each algorithm run and each entity.").withRequiredArg().ofType(Integer.class).defaultsTo(10); + // parse options and display a message for the user in case of problems OptionSet options = null; try { @@ -886,6 +895,9 @@ maxNrOfResults = Integer.MAX_VALUE; } + int chunksize = (Integer) options.valueOf("chunksize"); + int runtime = (Integer) options.valueOf("runtime"); + // TODO: some handling for inaccessible files or overwriting existing files File f = (File) options.valueOf("o"); @@ -895,7 +907,7 @@ System.setOut(printStream); } - Enrichment e = new Enrichment(se, resource, threshold, maxNrOfResults, useInference, false); + Enrichment e = new Enrichment(se, resource, threshold, maxNrOfResults, useInference, false, chunksize, runtime); e.start(); SparqlEndpointKS ks = new SparqlEndpointKS(se); Modified: trunk/interfaces/src/main/java/org/dllearner/cli/GlobalEnrichment.java =================================================================== --- trunk/interfaces/src/main/java/org/dllearner/cli/GlobalEnrichment.java 2012-02-02 13:17:12 UTC (rev 3563) +++ trunk/interfaces/src/main/java/org/dllearner/cli/GlobalEnrichment.java 2012-02-05 18:25:51 UTC (rev 3564) @@ -78,6 +78,8 @@ // parameters private static double threshold = 0.8; private static int nrOfAxiomsToLearn = 10; + private static int queryChunkSize = 1000; + private static int maxExecutionTimeInSeconds = 10; private static boolean useInference = true; // directory for generated schemata @@ -187,7 +189,7 @@ File log = new File(baseDir + File.separator + "failed" + File.separator + name + ".log"); System.out.println("Enriching " + name + " using " + se.getURL()); - Enrichment e = new Enrichment(se, null, threshold, nrOfAxiomsToLearn, useInference, false); + Enrichment e = new Enrichment(se, null, threshold, nrOfAxiomsToLearn, useInference, false, queryChunkSize, maxExecutionTimeInSeconds); e.maxEntitiesPerType = 3; // hack for faster testing of endpoints This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-02-06 14:15:23
|
Revision: 3570 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3570&view=rev Author: lorenz_b Date: 2012-02-06 14:15:12 +0000 (Mon, 06 Feb 2012) Log Message: ----------- Added CLI option to return only axioms which not already exist in KB. Modified Paths: -------------- trunk/interfaces/src/main/java/org/dllearner/cli/Enrichment.java trunk/interfaces/src/main/java/org/dllearner/cli/GlobalEnrichment.java Modified: trunk/interfaces/src/main/java/org/dllearner/cli/Enrichment.java =================================================================== --- trunk/interfaces/src/main/java/org/dllearner/cli/Enrichment.java 2012-02-06 13:48:50 UTC (rev 3569) +++ trunk/interfaces/src/main/java/org/dllearner/cli/Enrichment.java 2012-02-06 14:15:12 UTC (rev 3570) @@ -209,6 +209,7 @@ private int nrOfAxiomsToLearn = 10; private double threshold = 0.7; private int chunksize = 1000; + private boolean omitExistingAxioms; private boolean useInference; private SPARQLReasoner reasoner; @@ -232,7 +233,7 @@ private Set<OWLAxiom> learnedOWLAxioms; private Set<EvaluatedAxiom> learnedEvaluatedAxioms; - public Enrichment(SparqlEndpoint se, Entity resource, double threshold, int nrOfAxiomsToLearn, boolean useInference, boolean verbose, int chunksize, int maxExecutionTimeInSeconds) { + public Enrichment(SparqlEndpoint se, Entity resource, double threshold, int nrOfAxiomsToLearn, boolean useInference, boolean verbose, int chunksize, int maxExecutionTimeInSeconds, boolean omitExistingAxioms) { this.se = se; this.resource = resource; this.verbose = verbose; @@ -241,6 +242,7 @@ this.useInference = useInference; this.chunksize = chunksize; this.maxExecutionTimeInSeconds = maxExecutionTimeInSeconds; + this.omitExistingAxioms = omitExistingAxioms; try { cacheDir = "cache" + File.separator + URLEncoder.encode(se.getURL().toString(), "UTF-8"); @@ -426,6 +428,7 @@ ks2.setCacheDir(cacheDir); ks2.setRecursionDepth(2); ks2.setCloseAfterRecursion(true); + ks2.setDissolveBlankNodes(false); ks2.setSaveExtractedFragment(true); startTime = System.currentTimeMillis(); System.out.print("getting knowledge base fragment ... "); @@ -491,6 +494,7 @@ ConfigHelper.configure(learner, "maxExecutionTimeInSeconds", maxExecutionTimeInSeconds); ((AbstractAxiomLearningAlgorithm)learner).setLimit(chunksize); + ((AbstractAxiomLearningAlgorithm)learner).setReturnOnlyNewAxioms(omitExistingAxioms); learner.init(); if(reasoner != null){ ((AbstractAxiomLearningAlgorithm)learner).setReasoner(reasoner); @@ -809,6 +813,8 @@ "Specifies the chunk size for the query result as the approach is incrementally.").withRequiredArg().ofType(Integer.class).defaultsTo(1000); parser.acceptsAll(asList("maxExecutionTimeInSeconds"), "Specifies the max execution time for each algorithm run and each entity.").withRequiredArg().ofType(Integer.class).defaultsTo(10); + parser.acceptsAll(asList("omitExistingAxioms"), + "Specifies whether return only axioms which not already exist in the knowlegde base.").withOptionalArg().ofType(Boolean.class).defaultsTo(false); // parse options and display a message for the user in case of problems OptionSet options = null; @@ -896,7 +902,8 @@ } int chunksize = (Integer) options.valueOf("chunksize"); - int runtime = (Integer) options.valueOf("runtime"); + int maxExecutionTimeInSeconds = (Integer) options.valueOf("maxExecutionTimeInSeconds"); + boolean omitExistingAxioms = (Boolean) options.valueOf("omitExistingAxioms"); // TODO: some handling for inaccessible files or overwriting existing files File f = (File) options.valueOf("o"); @@ -907,7 +914,7 @@ System.setOut(printStream); } - Enrichment e = new Enrichment(se, resource, threshold, maxNrOfResults, useInference, false, chunksize, runtime); + Enrichment e = new Enrichment(se, resource, threshold, maxNrOfResults, useInference, false, chunksize, maxExecutionTimeInSeconds, omitExistingAxioms); e.start(); SparqlEndpointKS ks = new SparqlEndpointKS(se); Modified: trunk/interfaces/src/main/java/org/dllearner/cli/GlobalEnrichment.java =================================================================== --- trunk/interfaces/src/main/java/org/dllearner/cli/GlobalEnrichment.java 2012-02-06 13:48:50 UTC (rev 3569) +++ trunk/interfaces/src/main/java/org/dllearner/cli/GlobalEnrichment.java 2012-02-06 14:15:12 UTC (rev 3570) @@ -81,6 +81,7 @@ private static int queryChunkSize = 1000; private static int maxExecutionTimeInSeconds = 10; private static boolean useInference = true; + private static boolean omitExistingAxioms = false; // directory for generated schemata private static String baseDir = "log/lod-enriched/"; @@ -88,7 +89,7 @@ //parameters for thread pool //Parallel running Threads(Executor) on System - private static int corePoolSize = 5; + private static int corePoolSize = 1; //Maximum Threads allowed in Pool private static int maximumPoolSize = 20; //Keep alive time for waiting threads for jobs(Runnable) @@ -189,7 +190,8 @@ File log = new File(baseDir + File.separator + "failed" + File.separator + name + ".log"); System.out.println("Enriching " + name + " using " + se.getURL()); - Enrichment e = new Enrichment(se, null, threshold, nrOfAxiomsToLearn, useInference, false, queryChunkSize, maxExecutionTimeInSeconds); + Enrichment e = new Enrichment(se, null, threshold, nrOfAxiomsToLearn, useInference, + false, queryChunkSize, maxExecutionTimeInSeconds, omitExistingAxioms); e.maxEntitiesPerType = 3; // hack for faster testing of endpoints This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tc...@us...> - 2012-05-14 04:04:50
|
Revision: 3709 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3709&view=rev Author: tcanvn Date: 2012-05-14 04:04:43 +0000 (Mon, 14 May 2012) Log Message: ----------- Rename the PADCEL cross validation class (PDLLCrossValidation.java --> PADCELCrossValidation.java) Modified Paths: -------------- trunk/interfaces/src/main/java/org/dllearner/cli/CLI.java Added Paths: ----------- trunk/interfaces/src/main/java/org/dllearner/cli/PADCELCrossValidation.java Removed Paths: ------------- trunk/interfaces/src/main/java/org/dllearner/cli/PDLLCrossValidation.java Modified: trunk/interfaces/src/main/java/org/dllearner/cli/CLI.java =================================================================== --- trunk/interfaces/src/main/java/org/dllearner/cli/CLI.java 2012-05-14 04:00:47 UTC (rev 3708) +++ trunk/interfaces/src/main/java/org/dllearner/cli/CLI.java 2012-05-14 04:04:43 UTC (rev 3709) @@ -120,7 +120,7 @@ //this test is added for PDLL algorithm since it does not use the PosNegLP try { PADCELPosNegLP lp = context.getBean(PADCELPosNegLP.class); - new PDLLCrossValidation(la, lp, rs, nrOfFolds, false); + new PADCELCrossValidation(la, lp, rs, nrOfFolds, false); } catch (BeansException be) { PosNegLP lp = context.getBean(PosNegLP.class); Copied: trunk/interfaces/src/main/java/org/dllearner/cli/PADCELCrossValidation.java (from rev 3708, trunk/interfaces/src/main/java/org/dllearner/cli/PDLLCrossValidation.java) =================================================================== --- trunk/interfaces/src/main/java/org/dllearner/cli/PADCELCrossValidation.java (rev 0) +++ trunk/interfaces/src/main/java/org/dllearner/cli/PADCELCrossValidation.java 2012-05-14 04:04:43 UTC (rev 3709) @@ -0,0 +1,320 @@ +package org.dllearner.cli; + +import java.text.DecimalFormat; +import java.util.Collections; +import java.util.LinkedList; +import java.util.List; +import java.util.Random; +import java.util.Set; + + +import org.apache.log4j.Logger; +import org.dllearner.algorithms.PADCEL.PADCELAbstract; +import org.dllearner.algorithms.PADCEL.PADCELPosNegLP; +import org.dllearner.algorithms.PADCELEx.PADCELExAbstract; +import org.dllearner.core.AbstractCELA; +import org.dllearner.core.AbstractReasonerComponent; +import org.dllearner.core.ComponentInitException; +import org.dllearner.core.owl.Description; +import org.dllearner.core.owl.Individual; +import org.dllearner.learningproblems.Heuristics; +import org.dllearner.learningproblems.PosNegLP; +import org.dllearner.utilities.Files; +import org.dllearner.utilities.Helper; +import org.dllearner.utilities.statistics.Stat; + +/** + * Add PDLL cross validation support to Jens Lehmann work ( + * {@link org.dllearner.cli.CrossValidation}). In this cross validation, + * some more addition dimensions will be investigated such as: + * number partial definitions, partial definition length, etc. + * + * + * @author actran + * + */ + +public class PADCELCrossValidation extends CrossValidation { + + protected Stat noOfPartialDef = new Stat(); + protected Stat partialDefinitionLength = new Stat(); + + Logger logger = Logger.getLogger(this.getClass()); + + protected boolean interupted = false; + + /** + * Default constructor + */ + + public PADCELCrossValidation(AbstractCELA la, PosNegLP lp, AbstractReasonerComponent rs, + int folds, boolean leaveOneOut) { + super(la, lp, rs, folds, leaveOneOut); + } + + /** + * This is for PDLL cross validation + * + * @param la + * @param lp + * @param rs + * @param folds + * @param leaveOneOut + */ + public PADCELCrossValidation(AbstractCELA la, PADCELPosNegLP lp, AbstractReasonerComponent rs, + int folds, boolean leaveOneOut) { + + super(); // do nothing + + DecimalFormat df = new DecimalFormat(); + + // the training and test sets used later on + List<Set<Individual>> trainingSetsPos = new LinkedList<Set<Individual>>(); + List<Set<Individual>> trainingSetsNeg = new LinkedList<Set<Individual>>(); + List<Set<Individual>> testSetsPos = new LinkedList<Set<Individual>>(); + List<Set<Individual>> testSetsNeg = new LinkedList<Set<Individual>>(); + + // get examples and shuffle them too + Set<Individual> posExamples = lp.getPositiveExamples(); + List<Individual> posExamplesList = new LinkedList<Individual>(posExamples); + Collections.shuffle(posExamplesList, new Random(1)); + Set<Individual> negExamples = lp.getNegativeExamples(); + List<Individual> negExamplesList = new LinkedList<Individual>(negExamples); + Collections.shuffle(negExamplesList, new Random(2)); + + // sanity check whether nr. of folds makes sense for this benchmark + if (!leaveOneOut && (posExamples.size() < folds && negExamples.size() < folds)) { + System.out.println("The number of folds is higher than the number of " + + "positive/negative examples. This can result in empty test sets. Exiting."); + System.exit(0); + } + + if (leaveOneOut) { + // note that leave-one-out is not identical to k-fold with + // k = nr. of examples in the current implementation, because + // with n folds and n examples there is no guarantee that a fold + // is never empty (this is an implementation issue) + int nrOfExamples = posExamples.size() + negExamples.size(); + for (int i = 0; i < nrOfExamples; i++) { + // ... + } + System.out.println("Leave-one-out not supported yet."); + System.exit(1); + } else { + // calculating where to split the sets, ; note that we split + // positive and negative examples separately such that the + // distribution of positive and negative examples remains similar + // (note that there are better but more complex ways to implement + // this, + // which guarantee that the sum of the elements of a fold for pos + // and neg differs by at most 1 - it can differ by 2 in our + // implementation, + // e.g. with 3 folds, 4 pos. examples, 4 neg. examples) + int[] splitsPos = calculateSplits(posExamples.size(), folds); + int[] splitsNeg = calculateSplits(negExamples.size(), folds); + + // System.out.println(splitsPos[0]); + // System.out.println(splitsNeg[0]); + + // calculating training and test sets + for (int i = 0; i < folds; i++) { + Set<Individual> testPos = getTestingSet(posExamplesList, splitsPos, i); + Set<Individual> testNeg = getTestingSet(negExamplesList, splitsNeg, i); + testSetsPos.add(i, testPos); + testSetsNeg.add(i, testNeg); + trainingSetsPos.add(i, getTrainingSet(posExamples, testPos)); + trainingSetsNeg.add(i, getTrainingSet(negExamples, testNeg)); + } + + } + + // run the algorithm + int terminatedBypartialDefinition = 0, terminatedByCounterPartialDefinitions = 0; + + for (int currFold = 0; (currFold < folds); currFold++) { + + if (this.interupted) { + outputWriter("Cross validation has been interupted"); + return; + } + + // Set<String> pos = + // Datastructures.individualSetToStringSet(trainingSetsPos.get(currFold)); + // Set<String> neg = + // Datastructures.individualSetToStringSet(trainingSetsNeg.get(currFold)); + lp.setPositiveExamples(trainingSetsPos.get(currFold)); + lp.setNegativeExamples(trainingSetsNeg.get(currFold)); + + try { + lp.init(); + la.init(); + } catch (ComponentInitException e) { + e.printStackTrace(); + } + + long algorithmStartTime = System.nanoTime(); + try { + la.start(); + } catch (OutOfMemoryError e) { + System.out.println("out of memory at " + + (System.currentTimeMillis() - algorithmStartTime) / 1000 + "s"); + } + + long algorithmDuration = System.nanoTime() - algorithmStartTime; + runtime.addNumber(algorithmDuration / (double) 1000000000); + + Description concept = ((PADCELAbstract) la).getUnionCurrenlyBestDescription(); + + Set<Individual> tmp = rs.hasType(concept, trainingSetsPos.get(currFold)); + Set<Individual> tmp2 = Helper.difference(trainingSetsPos.get(currFold), tmp); + Set<Individual> tmp3 = rs.hasType(concept, trainingSetsNeg.get(currFold)); + + outputWriter("training set errors pos (" + tmp2.size() + "): " + tmp2); + outputWriter("training set errors neg (" + tmp3.size() + "): " + tmp3); + + tmp = rs.hasType(concept, testSetsPos.get(currFold)); + tmp2 = Helper.difference(testSetsPos.get(currFold), tmp); + tmp3 = rs.hasType(concept, testSetsNeg.get(currFold)); + + outputWriter("test set errors pos: " + tmp2); + outputWriter("test set errors neg: " + tmp3); + + // calculate training accuracies + int trainingCorrectPosClassified = getCorrectPosClassified(rs, concept, + trainingSetsPos.get(currFold)); + int trainingCorrectNegClassified = getCorrectNegClassified(rs, concept, + trainingSetsNeg.get(currFold)); + int trainingCorrectExamples = trainingCorrectPosClassified + + trainingCorrectNegClassified; + double trainingAccuracy = 100 * ((double) trainingCorrectExamples / (trainingSetsPos + .get(currFold).size() + trainingSetsNeg.get(currFold).size())); + + double trainingCompleteness = 100 * (double) trainingCorrectPosClassified + / trainingSetsPos.get(currFold).size(); + double trainingCorrectness = 100 * (double) trainingCorrectNegClassified + / trainingSetsNeg.get(currFold).size(); + + accuracyTraining.addNumber(trainingAccuracy); + trainingCompletenessStat.addNumber(trainingCompleteness); + trainingCorrectnessStat.addNumber(trainingCorrectness); + + // calculate test accuracies + int correctPosClassified = getCorrectPosClassified(rs, concept, + testSetsPos.get(currFold)); + int correctNegClassified = getCorrectNegClassified(rs, concept, + testSetsNeg.get(currFold)); + int correctExamples = correctPosClassified + correctNegClassified; + double currAccuracy = 100 * ((double) correctExamples / (testSetsPos.get(currFold) + .size() + testSetsNeg.get(currFold).size())); + + double testingCompleteness = 100 * (double) correctPosClassified + / testSetsPos.get(currFold).size(); + double testingCorrectness = 100 * (double) correctNegClassified + / testSetsNeg.get(currFold).size(); + + accuracy.addNumber(currAccuracy); + testingCompletenessStat.addNumber(testingCompleteness); + testingCorrectnessStat.addNumber(testingCorrectness); + + // calculate training F-Score + int negAsPosTraining = rs.hasType(concept, trainingSetsNeg.get(currFold)).size(); + double precisionTraining = trainingCorrectPosClassified + negAsPosTraining == 0 ? 0 + : trainingCorrectPosClassified + / (double) (trainingCorrectPosClassified + negAsPosTraining); + double recallTraining = trainingCorrectPosClassified + / (double) trainingSetsPos.get(currFold).size(); + fMeasureTraining.addNumber(100 * Heuristics + .getFScore(recallTraining, precisionTraining)); + // calculate test F-Score + int negAsPos = rs.hasType(concept, testSetsNeg.get(currFold)).size(); + double precision = correctPosClassified + negAsPos == 0 ? 0 : correctPosClassified + / (double) (correctPosClassified + negAsPos); + double recall = correctPosClassified / (double) testSetsPos.get(currFold).size(); + // System.out.println(precision);System.out.println(recall); + fMeasure.addNumber(100 * Heuristics.getFScore(recall, precision)); + + length.addNumber(concept.getLength()); + + outputWriter("fold " + currFold + ":"); + outputWriter(" training: " + trainingCorrectPosClassified + "/" + + trainingSetsPos.get(currFold).size() + " positive and " + + trainingCorrectNegClassified + "/" + trainingSetsNeg.get(currFold).size() + + " negative examples"); + outputWriter(" testing: " + correctPosClassified + "/" + + testSetsPos.get(currFold).size() + " correct positives, " + + correctNegClassified + "/" + testSetsNeg.get(currFold).size() + + " correct negatives"); + outputWriter(" concept: " + concept); + outputWriter(" accuracy: " + df.format(currAccuracy) + "% (correctness: " + + df.format(testingCorrectness) + "%; completeness: " + + df.format(testingCompleteness) + "%) --- training set: " + + df.format(trainingAccuracy) + "% (correctness: " + + df.format(trainingCorrectness) + "%; completeness: " + + df.format(trainingCompleteness) + "%)"); + outputWriter(" length: " + df.format(concept.getLength())); + outputWriter(" runtime: " + df.format(algorithmDuration / (double) 1000000000) + "s"); + + if (la instanceof PADCELAbstract) { + int pn = ((PADCELAbstract) la).getNoOfCompactedPartialDefinition(); + this.noOfPartialDef.addNumber(pn); + outputWriter(" number of partial definitions: " + pn + " (total: " + + ((PADCELAbstract) la).getNumberOfPartialDefinitions() + ")"); + + double pl = concept.getLength() / (double) pn; + this.partialDefinitionLength.addNumber(pl); + outputWriter(" avarage partial definition length: " + df.format(pl)); + + // show more information on counter partial definitions + if (la instanceof PADCELExAbstract) { + PADCELExAbstract pdllexla = (PADCELExAbstract) la; + outputWriter(" number of partial definitions for each type: 1:" + + pdllexla.getNumberOfPartialDefinitions(1) + "; 2:" + + pdllexla.getNumberOfPartialDefinitions(2) + "; 3:" + + pdllexla.getNumberOfPartialDefinitions(3) + "; 4:" + + pdllexla.getNumberOfPartialDefinitions(4)); + outputWriter(" number of counter partial definition used: " + + (concept.toString().split("NOT ").length - 1) + "/" + + pdllexla.getNumberOfCounterPartialDefinitionUsed()); + if (pdllexla.terminatedByCounterDefinitions()) { + outputWriter(" terminated by counter partial definitions"); + terminatedByCounterPartialDefinitions++; + } else if (pdllexla.terminatedByPartialDefinitions()) { + outputWriter(" terminated by partial definitions"); + terminatedBypartialDefinition++; + } else + outputWriter(" neither terminated by partial definition nor counter partial definition"); + } + } + + } + + outputWriter(""); + outputWriter("Finished " + folds + "-folds cross-validation."); + outputWriter("runtime: " + statOutput(df, runtime, "s")); + outputWriter("#partial definitions: " + statOutput(df, noOfPartialDef, "")); + outputWriter("avg. partial definition length: " + + statOutput(df, partialDefinitionLength, "")); + outputWriter("length: " + statOutput(df, length, "")); + outputWriter("F-Measure on training set: " + statOutput(df, fMeasureTraining, "%")); + outputWriter("F-Measure: " + statOutput(df, fMeasure, "%")); + outputWriter("predictive accuracy on training set: " + + statOutput(df, accuracyTraining, "%") + " --- correctness: " + + statOutput(df, trainingCorrectnessStat, "%") + "; completeness: " + + statOutput(df, trainingCompletenessStat, "%")); + outputWriter("predictive accuracy: " + statOutput(df, accuracy, "%") + " --- correctness: " + + statOutput(df, testingCorrectnessStat, "%") + "; completeness: " + + statOutput(df, testingCompletenessStat, "%")); + if (la instanceof PADCELExAbstract) + outputWriter("terminated by: partial def.: " + terminatedBypartialDefinition + + "; counter partial def.: " + terminatedByCounterPartialDefinitions); + } + + @Override + protected void outputWriter(String output) { + logger.info(output); + + if (writeToFile) + Files.appendToFile(outputFile, output + "\n"); + } + +} Property changes on: trunk/interfaces/src/main/java/org/dllearner/cli/PADCELCrossValidation.java ___________________________________________________________________ Added: svn:mime-type + text/plain Deleted: trunk/interfaces/src/main/java/org/dllearner/cli/PDLLCrossValidation.java =================================================================== --- trunk/interfaces/src/main/java/org/dllearner/cli/PDLLCrossValidation.java 2012-05-14 04:00:47 UTC (rev 3708) +++ trunk/interfaces/src/main/java/org/dllearner/cli/PDLLCrossValidation.java 2012-05-14 04:04:43 UTC (rev 3709) @@ -1,320 +0,0 @@ -package org.dllearner.cli; - -import java.text.DecimalFormat; -import java.util.Collections; -import java.util.LinkedList; -import java.util.List; -import java.util.Random; -import java.util.Set; - - -import org.apache.log4j.Logger; -import org.dllearner.algorithms.PADCEL.PADCELAbstract; -import org.dllearner.algorithms.PADCEL.PADCELPosNegLP; -import org.dllearner.algorithms.PADCELEx.PADCELExAbstract; -import org.dllearner.core.AbstractCELA; -import org.dllearner.core.AbstractReasonerComponent; -import org.dllearner.core.ComponentInitException; -import org.dllearner.core.owl.Description; -import org.dllearner.core.owl.Individual; -import org.dllearner.learningproblems.Heuristics; -import org.dllearner.learningproblems.PosNegLP; -import org.dllearner.utilities.Files; -import org.dllearner.utilities.Helper; -import org.dllearner.utilities.statistics.Stat; - -/** - * Add PDLL cross validation support to Jens Lehmann work ( - * {@link org.dllearner.cli.CrossValidation}). In this cross validation, - * some more addition dimensions will be investigated such as: - * number partial definitions, partial definition length, etc. - * - * - * @author actran - * - */ - -public class PDLLCrossValidation extends CrossValidation { - - protected Stat noOfPartialDef = new Stat(); - protected Stat partialDefinitionLength = new Stat(); - - Logger logger = Logger.getLogger(this.getClass()); - - protected boolean interupted = false; - - /** - * Default constructor - */ - - public PDLLCrossValidation(AbstractCELA la, PosNegLP lp, AbstractReasonerComponent rs, - int folds, boolean leaveOneOut) { - super(la, lp, rs, folds, leaveOneOut); - } - - /** - * This is for PDLL cross validation - * - * @param la - * @param lp - * @param rs - * @param folds - * @param leaveOneOut - */ - public PDLLCrossValidation(AbstractCELA la, PADCELPosNegLP lp, AbstractReasonerComponent rs, - int folds, boolean leaveOneOut) { - - super(); // do nothing - - DecimalFormat df = new DecimalFormat(); - - // the training and test sets used later on - List<Set<Individual>> trainingSetsPos = new LinkedList<Set<Individual>>(); - List<Set<Individual>> trainingSetsNeg = new LinkedList<Set<Individual>>(); - List<Set<Individual>> testSetsPos = new LinkedList<Set<Individual>>(); - List<Set<Individual>> testSetsNeg = new LinkedList<Set<Individual>>(); - - // get examples and shuffle them too - Set<Individual> posExamples = lp.getPositiveExamples(); - List<Individual> posExamplesList = new LinkedList<Individual>(posExamples); - Collections.shuffle(posExamplesList, new Random(1)); - Set<Individual> negExamples = lp.getNegativeExamples(); - List<Individual> negExamplesList = new LinkedList<Individual>(negExamples); - Collections.shuffle(negExamplesList, new Random(2)); - - // sanity check whether nr. of folds makes sense for this benchmark - if (!leaveOneOut && (posExamples.size() < folds && negExamples.size() < folds)) { - System.out.println("The number of folds is higher than the number of " - + "positive/negative examples. This can result in empty test sets. Exiting."); - System.exit(0); - } - - if (leaveOneOut) { - // note that leave-one-out is not identical to k-fold with - // k = nr. of examples in the current implementation, because - // with n folds and n examples there is no guarantee that a fold - // is never empty (this is an implementation issue) - int nrOfExamples = posExamples.size() + negExamples.size(); - for (int i = 0; i < nrOfExamples; i++) { - // ... - } - System.out.println("Leave-one-out not supported yet."); - System.exit(1); - } else { - // calculating where to split the sets, ; note that we split - // positive and negative examples separately such that the - // distribution of positive and negative examples remains similar - // (note that there are better but more complex ways to implement - // this, - // which guarantee that the sum of the elements of a fold for pos - // and neg differs by at most 1 - it can differ by 2 in our - // implementation, - // e.g. with 3 folds, 4 pos. examples, 4 neg. examples) - int[] splitsPos = calculateSplits(posExamples.size(), folds); - int[] splitsNeg = calculateSplits(negExamples.size(), folds); - - // System.out.println(splitsPos[0]); - // System.out.println(splitsNeg[0]); - - // calculating training and test sets - for (int i = 0; i < folds; i++) { - Set<Individual> testPos = getTestingSet(posExamplesList, splitsPos, i); - Set<Individual> testNeg = getTestingSet(negExamplesList, splitsNeg, i); - testSetsPos.add(i, testPos); - testSetsNeg.add(i, testNeg); - trainingSetsPos.add(i, getTrainingSet(posExamples, testPos)); - trainingSetsNeg.add(i, getTrainingSet(negExamples, testNeg)); - } - - } - - // run the algorithm - int terminatedBypartialDefinition = 0, terminatedByCounterPartialDefinitions = 0; - - for (int currFold = 0; (currFold < folds); currFold++) { - - if (this.interupted) { - outputWriter("Cross validation has been interupted"); - return; - } - - // Set<String> pos = - // Datastructures.individualSetToStringSet(trainingSetsPos.get(currFold)); - // Set<String> neg = - // Datastructures.individualSetToStringSet(trainingSetsNeg.get(currFold)); - lp.setPositiveExamples(trainingSetsPos.get(currFold)); - lp.setNegativeExamples(trainingSetsNeg.get(currFold)); - - try { - lp.init(); - la.init(); - } catch (ComponentInitException e) { - e.printStackTrace(); - } - - long algorithmStartTime = System.nanoTime(); - try { - la.start(); - } catch (OutOfMemoryError e) { - System.out.println("out of memory at " - + (System.currentTimeMillis() - algorithmStartTime) / 1000 + "s"); - } - - long algorithmDuration = System.nanoTime() - algorithmStartTime; - runtime.addNumber(algorithmDuration / (double) 1000000000); - - Description concept = ((PADCELAbstract) la).getUnionCurrenlyBestDescription(); - - Set<Individual> tmp = rs.hasType(concept, trainingSetsPos.get(currFold)); - Set<Individual> tmp2 = Helper.difference(trainingSetsPos.get(currFold), tmp); - Set<Individual> tmp3 = rs.hasType(concept, trainingSetsNeg.get(currFold)); - - outputWriter("training set errors pos (" + tmp2.size() + "): " + tmp2); - outputWriter("training set errors neg (" + tmp3.size() + "): " + tmp3); - - tmp = rs.hasType(concept, testSetsPos.get(currFold)); - tmp2 = Helper.difference(testSetsPos.get(currFold), tmp); - tmp3 = rs.hasType(concept, testSetsNeg.get(currFold)); - - outputWriter("test set errors pos: " + tmp2); - outputWriter("test set errors neg: " + tmp3); - - // calculate training accuracies - int trainingCorrectPosClassified = getCorrectPosClassified(rs, concept, - trainingSetsPos.get(currFold)); - int trainingCorrectNegClassified = getCorrectNegClassified(rs, concept, - trainingSetsNeg.get(currFold)); - int trainingCorrectExamples = trainingCorrectPosClassified - + trainingCorrectNegClassified; - double trainingAccuracy = 100 * ((double) trainingCorrectExamples / (trainingSetsPos - .get(currFold).size() + trainingSetsNeg.get(currFold).size())); - - double trainingCompleteness = 100 * (double) trainingCorrectPosClassified - / trainingSetsPos.get(currFold).size(); - double trainingCorrectness = 100 * (double) trainingCorrectNegClassified - / trainingSetsNeg.get(currFold).size(); - - accuracyTraining.addNumber(trainingAccuracy); - trainingCompletenessStat.addNumber(trainingCompleteness); - trainingCorrectnessStat.addNumber(trainingCorrectness); - - // calculate test accuracies - int correctPosClassified = getCorrectPosClassified(rs, concept, - testSetsPos.get(currFold)); - int correctNegClassified = getCorrectNegClassified(rs, concept, - testSetsNeg.get(currFold)); - int correctExamples = correctPosClassified + correctNegClassified; - double currAccuracy = 100 * ((double) correctExamples / (testSetsPos.get(currFold) - .size() + testSetsNeg.get(currFold).size())); - - double testingCompleteness = 100 * (double) correctPosClassified - / testSetsPos.get(currFold).size(); - double testingCorrectness = 100 * (double) correctNegClassified - / testSetsNeg.get(currFold).size(); - - accuracy.addNumber(currAccuracy); - testingCompletenessStat.addNumber(testingCompleteness); - testingCorrectnessStat.addNumber(testingCorrectness); - - // calculate training F-Score - int negAsPosTraining = rs.hasType(concept, trainingSetsNeg.get(currFold)).size(); - double precisionTraining = trainingCorrectPosClassified + negAsPosTraining == 0 ? 0 - : trainingCorrectPosClassified - / (double) (trainingCorrectPosClassified + negAsPosTraining); - double recallTraining = trainingCorrectPosClassified - / (double) trainingSetsPos.get(currFold).size(); - fMeasureTraining.addNumber(100 * Heuristics - .getFScore(recallTraining, precisionTraining)); - // calculate test F-Score - int negAsPos = rs.hasType(concept, testSetsNeg.get(currFold)).size(); - double precision = correctPosClassified + negAsPos == 0 ? 0 : correctPosClassified - / (double) (correctPosClassified + negAsPos); - double recall = correctPosClassified / (double) testSetsPos.get(currFold).size(); - // System.out.println(precision);System.out.println(recall); - fMeasure.addNumber(100 * Heuristics.getFScore(recall, precision)); - - length.addNumber(concept.getLength()); - - outputWriter("fold " + currFold + ":"); - outputWriter(" training: " + trainingCorrectPosClassified + "/" - + trainingSetsPos.get(currFold).size() + " positive and " - + trainingCorrectNegClassified + "/" + trainingSetsNeg.get(currFold).size() - + " negative examples"); - outputWriter(" testing: " + correctPosClassified + "/" - + testSetsPos.get(currFold).size() + " correct positives, " - + correctNegClassified + "/" + testSetsNeg.get(currFold).size() - + " correct negatives"); - outputWriter(" concept: " + concept); - outputWriter(" accuracy: " + df.format(currAccuracy) + "% (correctness: " - + df.format(testingCorrectness) + "%; completeness: " - + df.format(testingCompleteness) + "%) --- training set: " - + df.format(trainingAccuracy) + "% (correctness: " - + df.format(trainingCorrectness) + "%; completeness: " - + df.format(trainingCompleteness) + "%)"); - outputWriter(" length: " + df.format(concept.getLength())); - outputWriter(" runtime: " + df.format(algorithmDuration / (double) 1000000000) + "s"); - - if (la instanceof PADCELAbstract) { - int pn = ((PADCELAbstract) la).getNoOfCompactedPartialDefinition(); - this.noOfPartialDef.addNumber(pn); - outputWriter(" number of partial definitions: " + pn + " (total: " - + ((PADCELAbstract) la).getNumberOfPartialDefinitions() + ")"); - - double pl = concept.getLength() / (double) pn; - this.partialDefinitionLength.addNumber(pl); - outputWriter(" avarage partial definition length: " + df.format(pl)); - - // show more information on counter partial definitions - if (la instanceof PADCELExAbstract) { - PADCELExAbstract pdllexla = (PADCELExAbstract) la; - outputWriter(" number of partial definitions for each type: 1:" - + pdllexla.getNumberOfPartialDefinitions(1) + "; 2:" - + pdllexla.getNumberOfPartialDefinitions(2) + "; 3:" - + pdllexla.getNumberOfPartialDefinitions(3) + "; 4:" - + pdllexla.getNumberOfPartialDefinitions(4)); - outputWriter(" number of counter partial definition used: " - + (concept.toString().split("NOT ").length - 1) + "/" - + pdllexla.getNumberOfCounterPartialDefinitionUsed()); - if (pdllexla.terminatedByCounterDefinitions()) { - outputWriter(" terminated by counter partial definitions"); - terminatedByCounterPartialDefinitions++; - } else if (pdllexla.terminatedByPartialDefinitions()) { - outputWriter(" terminated by partial definitions"); - terminatedBypartialDefinition++; - } else - outputWriter(" neither terminated by partial definition nor counter partial definition"); - } - } - - } - - outputWriter(""); - outputWriter("Finished " + folds + "-folds cross-validation."); - outputWriter("runtime: " + statOutput(df, runtime, "s")); - outputWriter("#partial definitions: " + statOutput(df, noOfPartialDef, "")); - outputWriter("avg. partial definition length: " - + statOutput(df, partialDefinitionLength, "")); - outputWriter("length: " + statOutput(df, length, "")); - outputWriter("F-Measure on training set: " + statOutput(df, fMeasureTraining, "%")); - outputWriter("F-Measure: " + statOutput(df, fMeasure, "%")); - outputWriter("predictive accuracy on training set: " - + statOutput(df, accuracyTraining, "%") + " --- correctness: " - + statOutput(df, trainingCorrectnessStat, "%") + "; completeness: " - + statOutput(df, trainingCompletenessStat, "%")); - outputWriter("predictive accuracy: " + statOutput(df, accuracy, "%") + " --- correctness: " - + statOutput(df, testingCorrectnessStat, "%") + "; completeness: " - + statOutput(df, testingCompletenessStat, "%")); - if (la instanceof PADCELExAbstract) - outputWriter("terminated by: partial def.: " + terminatedBypartialDefinition - + "; counter partial def.: " + terminatedByCounterPartialDefinitions); - } - - @Override - protected void outputWriter(String output) { - logger.info(output); - - if (writeToFile) - Files.appendToFile(outputFile, output + "\n"); - } - -} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2014-05-02 19:17:08
|
Revision: 4254 http://sourceforge.net/p/dl-learner/code/4254 Author: lorenz_b Date: 2014-05-02 19:17:05 +0000 (Fri, 02 May 2014) Log Message: ----------- Added QTL cross validation. Modified Paths: -------------- trunk/interfaces/src/main/java/org/dllearner/cli/CLI.java Added Paths: ----------- trunk/interfaces/src/main/java/org/dllearner/cli/SPARQLCrossValidation.java Modified: trunk/interfaces/src/main/java/org/dllearner/cli/CLI.java =================================================================== --- trunk/interfaces/src/main/java/org/dllearner/cli/CLI.java 2014-04-30 11:45:53 UTC (rev 4253) +++ trunk/interfaces/src/main/java/org/dllearner/cli/CLI.java 2014-05-02 19:17:05 UTC (rev 4254) @@ -27,18 +27,23 @@ import java.util.List; import java.util.Map.Entry; - import org.apache.commons.lang.exception.ExceptionUtils; import org.apache.log4j.Level; import org.apache.xmlbeans.XmlObject; import org.dllearner.algorithms.ParCEL.ParCELPosNegLP; +import org.dllearner.algorithms.qtl.QTL2; import org.dllearner.configuration.IConfiguration; import org.dllearner.configuration.spring.ApplicationContextBuilder; import org.dllearner.configuration.spring.DefaultApplicationContextBuilder; import org.dllearner.configuration.util.SpringConfigurationXMLBeanConverter; import org.dllearner.confparser3.ConfParserConfiguration; import org.dllearner.confparser3.ParseException; -import org.dllearner.core.*; +import org.dllearner.core.AbstractCELA; +import org.dllearner.core.AbstractReasonerComponent; +import org.dllearner.core.ComponentInitException; +import org.dllearner.core.KnowledgeSource; +import org.dllearner.core.LearningAlgorithm; +import org.dllearner.core.ReasoningMethodUnsupportedException; import org.dllearner.learningproblems.PosNegLP; import org.dllearner.utilities.Files; import org.slf4j.Logger; @@ -136,7 +141,11 @@ } catch (BeansException be) { PosNegLP lp = context.getBean(PosNegLP.class); - new CrossValidation(la,lp,rs,nrOfFolds,false); + if(la instanceof QTL2){ + new SPARQLCrossValidation((QTL2) la,lp,nrOfFolds,false); + } else { + new CrossValidation(la,lp,rs,nrOfFolds,false); + } } } else { Added: trunk/interfaces/src/main/java/org/dllearner/cli/SPARQLCrossValidation.java =================================================================== --- trunk/interfaces/src/main/java/org/dllearner/cli/SPARQLCrossValidation.java (rev 0) +++ trunk/interfaces/src/main/java/org/dllearner/cli/SPARQLCrossValidation.java 2014-05-02 19:17:05 UTC (rev 4254) @@ -0,0 +1,373 @@ +/** + * Copyright (C) 2007-2008, Jens Lehmann + * + * This file is part of DL-Learner. + * + * DL-Learner is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * DL-Learner is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + */ +package org.dllearner.cli; + +import java.io.File; +import java.text.DecimalFormat; +import java.util.Collections; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Random; +import java.util.Set; +import java.util.TreeSet; + +import org.dllearner.algorithms.qtl.QTL2; +import org.dllearner.algorithms.qtl.datastructures.QueryTree; +import org.dllearner.algorithms.qtl.datastructures.impl.QueryTreeImpl.LiteralNodeSubsumptionStrategy; +import org.dllearner.core.AbstractLearningProblem; +import org.dllearner.core.ComponentInitException; +import org.dllearner.core.owl.Description; +import org.dllearner.core.owl.Individual; +import org.dllearner.learningproblems.Heuristics; +import org.dllearner.learningproblems.PosNegLP; +import org.dllearner.learningproblems.PosOnlyLP; +import org.dllearner.reasoning.SPARQLReasoner; +import org.dllearner.utilities.Files; +import org.dllearner.utilities.Helper; +import org.dllearner.utilities.datastructures.Datastructures; +import org.dllearner.utilities.statistics.Stat; + +/** + * Performs cross validation for the given problem. Supports + * k-fold cross-validation and leave-one-out cross-validation. + * + * @author Jens Lehmann + * + */ +public class SPARQLCrossValidation { + + // statistical values + protected Stat runtime = new Stat(); + protected Stat accuracy = new Stat(); + protected Stat length = new Stat(); + protected Stat accuracyTraining = new Stat(); + protected Stat fMeasure = new Stat(); + protected Stat fMeasureTraining = new Stat(); + protected static boolean writeToFile = false; + protected static File outputFile; + + + protected Stat trainingCompletenessStat = new Stat(); + protected Stat trainingCorrectnessStat = new Stat(); + + protected Stat testingCompletenessStat = new Stat(); + protected Stat testingCorrectnessStat = new Stat(); + + LiteralNodeSubsumptionStrategy literalNodeSubsumptionStrategy = LiteralNodeSubsumptionStrategy.INTERVAL; + + public SPARQLCrossValidation() { + + } + + public SPARQLCrossValidation(QTL2 la, AbstractLearningProblem lp, int folds, boolean leaveOneOut) { + + DecimalFormat df = new DecimalFormat(); + + // the training and test sets used later on + List<Set<Individual>> trainingSetsPos = new LinkedList<Set<Individual>>(); + List<Set<Individual>> trainingSetsNeg = new LinkedList<Set<Individual>>(); + List<Set<Individual>> testSetsPos = new LinkedList<Set<Individual>>(); + List<Set<Individual>> testSetsNeg = new LinkedList<Set<Individual>>(); + + // get examples and shuffle them too + Set<Individual> posExamples; + Set<Individual> negExamples; + if(lp instanceof PosNegLP){ + posExamples = ((PosNegLP)lp).getPositiveExamples(); + negExamples = ((PosNegLP)lp).getNegativeExamples(); + } else if(lp instanceof PosOnlyLP){ + posExamples = ((PosNegLP)lp).getPositiveExamples(); + negExamples = new HashSet<Individual>(); + } else { + throw new IllegalArgumentException("Only PosNeg and PosOnly learning problems are supported"); + } + List<Individual> posExamplesList = new LinkedList<Individual>(posExamples); + List<Individual> negExamplesList = new LinkedList<Individual>(negExamples); + Collections.shuffle(posExamplesList, new Random(1)); + Collections.shuffle(negExamplesList, new Random(2)); + + // sanity check whether nr. of folds makes sense for this benchmark + if(!leaveOneOut && (posExamples.size()<folds && negExamples.size()<folds)) { + System.out.println("The number of folds is higher than the number of " + + "positive/negative examples. This can result in empty test sets. Exiting."); + System.exit(0); + } + + if(leaveOneOut) { + // note that leave-one-out is not identical to k-fold with + // k = nr. of examples in the current implementation, because + // with n folds and n examples there is no guarantee that a fold + // is never empty (this is an implementation issue) + int nrOfExamples = posExamples.size() + negExamples.size(); + for(int i = 0; i < nrOfExamples; i++) { + // ... + } + System.out.println("Leave-one-out not supported yet."); + System.exit(1); + } else { + // calculating where to split the sets, ; note that we split + // positive and negative examples separately such that the + // distribution of positive and negative examples remains similar + // (note that there are better but more complex ways to implement this, + // which guarantee that the sum of the elements of a fold for pos + // and neg differs by at most 1 - it can differ by 2 in our implementation, + // e.g. with 3 folds, 4 pos. examples, 4 neg. examples) + int[] splitsPos = calculateSplits(posExamples.size(),folds); + int[] splitsNeg = calculateSplits(negExamples.size(),folds); + +// System.out.println(splitsPos[0]); +// System.out.println(splitsNeg[0]); + + // calculating training and test sets + for(int i=0; i<folds; i++) { + Set<Individual> testPos = getTestingSet(posExamplesList, splitsPos, i); + Set<Individual> testNeg = getTestingSet(negExamplesList, splitsNeg, i); + testSetsPos.add(i, testPos); + testSetsNeg.add(i, testNeg); + trainingSetsPos.add(i, getTrainingSet(posExamples, testPos)); + trainingSetsNeg.add(i, getTrainingSet(negExamples, testNeg)); + } + + } + + // run the algorithm + for(int currFold=0; currFold<folds; currFold++) { + + Set<String> pos = Datastructures.individualSetToStringSet(trainingSetsPos.get(currFold)); + Set<String> neg = Datastructures.individualSetToStringSet(trainingSetsNeg.get(currFold)); + if(lp instanceof PosNegLP){ + ((PosNegLP)lp).setPositiveExamples(trainingSetsPos.get(currFold)); + ((PosNegLP)lp).setNegativeExamples(trainingSetsNeg.get(currFold)); + } else if(lp instanceof PosOnlyLP){ + ((PosOnlyLP)lp).setPositiveExamples(new TreeSet<Individual>(trainingSetsPos.get(currFold))); + } + + + try { + lp.init(); + la.init(); + } catch (ComponentInitException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + long algorithmStartTime = System.nanoTime(); + la.start(); + long algorithmDuration = System.nanoTime() - algorithmStartTime; + runtime.addNumber(algorithmDuration/(double)1000000000); + + Description concept = la.getCurrentlyBestDescription(); + System.out.println(concept); +// Set<Individual> tmp = rs.hasType(concept, testSetsPos.get(currFold)); + Set<Individual> tmp = hasType(testSetsPos.get(currFold), la); + Set<Individual> tmp2 = Helper.difference(testSetsPos.get(currFold), tmp); +// Set<Individual> tmp3 = rs.hasType(concept, testSetsNeg.get(currFold)); + Set<Individual> tmp3 = hasType(testSetsNeg.get(currFold), la); + + outputWriter("test set errors pos: " + tmp2); + outputWriter("test set errors neg: " + tmp3); + + // calculate training accuracies +// int trainingCorrectPosClassified = getCorrectPosClassified(rs, concept, trainingSetsPos.get(currFold)); + int trainingCorrectPosClassified = getCorrectPosClassified(trainingSetsPos.get(currFold), la); +// int trainingCorrectNegClassified = getCorrectNegClassified(rs, concept, trainingSetsNeg.get(currFold)); + int trainingCorrectNegClassified = getCorrectNegClassified(trainingSetsNeg.get(currFold), la); + int trainingCorrectExamples = trainingCorrectPosClassified + trainingCorrectNegClassified; + double trainingAccuracy = 100*((double)trainingCorrectExamples/(trainingSetsPos.get(currFold).size()+ + trainingSetsNeg.get(currFold).size())); + accuracyTraining.addNumber(trainingAccuracy); + // calculate test accuracies +// int correctPosClassified = getCorrectPosClassified(rs, concept, testSetsPos.get(currFold)); + int correctPosClassified = getCorrectPosClassified(testSetsPos.get(currFold), la); +// int correctNegClassified = getCorrectNegClassified(rs, concept, testSetsNeg.get(currFold)); + int correctNegClassified = getCorrectNegClassified(testSetsNeg.get(currFold), la); + int correctExamples = correctPosClassified + correctNegClassified; + double currAccuracy = 100*((double)correctExamples/(testSetsPos.get(currFold).size()+ + testSetsNeg.get(currFold).size())); + accuracy.addNumber(currAccuracy); + // calculate training F-Score +// int negAsPosTraining = rs.hasType(concept, trainingSetsNeg.get(currFold)).size(); + int negAsPosTraining = trainingSetsNeg.get(currFold).size() - trainingCorrectNegClassified; + double precisionTraining = trainingCorrectPosClassified + negAsPosTraining == 0 ? 0 : trainingCorrectPosClassified / (double) (trainingCorrectPosClassified + negAsPosTraining); + double recallTraining = trainingCorrectPosClassified / (double) trainingSetsPos.get(currFold).size(); + fMeasureTraining.addNumber(100*Heuristics.getFScore(recallTraining, precisionTraining)); + // calculate test F-Score +// int negAsPos = rs.hasType(concept, testSetsNeg.get(currFold)).size(); + int negAsPos = testSetsNeg.get(currFold).size() - correctNegClassified; + double precision = correctPosClassified + negAsPos == 0 ? 0 : correctPosClassified / (double) (correctPosClassified + negAsPos); + double recall = correctPosClassified / (double) testSetsPos.get(currFold).size(); +// System.out.println(precision);System.out.println(recall); + fMeasure.addNumber(100*Heuristics.getFScore(recall, precision)); + + length.addNumber(concept.getLength()); + + outputWriter("fold " + currFold + ":"); + outputWriter(" training: " + pos.size() + " positive and " + neg.size() + " negative examples"); + outputWriter(" testing: " + correctPosClassified + "/" + testSetsPos.get(currFold).size() + " correct positives, " + + correctNegClassified + "/" + testSetsNeg.get(currFold).size() + " correct negatives"); + outputWriter(" concept: " + concept); + outputWriter(" accuracy: " + df.format(currAccuracy) + "% (" + df.format(trainingAccuracy) + "% on training set)"); + outputWriter(" length: " + df.format(concept.getLength())); + outputWriter(" runtime: " + df.format(algorithmDuration/(double)1000000000) + "s"); + + } + + outputWriter(""); + outputWriter("Finished " + folds + "-folds cross-validation."); + outputWriter("runtime: " + statOutput(df, runtime, "s")); + outputWriter("length: " + statOutput(df, length, "")); + outputWriter("F-Measure on training set: " + statOutput(df, fMeasureTraining, "%")); + outputWriter("F-Measure: " + statOutput(df, fMeasure, "%")); + outputWriter("predictive accuracy on training set: " + statOutput(df, accuracyTraining, "%")); + outputWriter("predictive accuracy: " + statOutput(df, accuracy, "%")); + + } + + protected int getCorrectPosClassified(SPARQLReasoner rs, Description concept, Set<Individual> testSetPos) { + return rs.hasType(concept, testSetPos).size(); + } + + protected Set<Individual> hasType(Set<Individual> individuals, QTL2 qtl) { + Set<Individual> coveredIndividuals = new HashSet<Individual>(); + QueryTree<String> solutionTree = qtl.getBestSolution().getTree(); + QueryTree<String> tree; + for (Individual ind : individuals) { + tree = qtl.getTreeCache().getQueryTree(ind.getName()); + if(tree.isSubsumedBy(solutionTree, literalNodeSubsumptionStrategy)){ + coveredIndividuals.add(ind); + } else { +// System.out.println("NOT COVERED"); +// System.out.println(tree.isSubsumedBy(solutionTree, literalNodeSubsumptionStrategy)); +// System.out.println(tree.isSubsumedBy(solutionTree)); +// tree.isSubsumedBy(solutionTree, literalNodeSubsumptionStrategy); +// tree.dump(); + } + } + return coveredIndividuals; + } + + protected int getCorrectPosClassified(Set<Individual> testSetPos, QTL2 qtl) { + QueryTree<String> tree = qtl.getBestSolution().getTree(); + QueryTree<String> posTree; + int i = 0; + for (Individual posInd : testSetPos) { + posTree = qtl.getTreeCache().getQueryTree(posInd.getName()); + if(posTree.isSubsumedBy(tree, literalNodeSubsumptionStrategy)){ + i++; + } + else { + System.out.println("POS NOT COVERED"); + posTree.dump(); + } + } + return i; + } + + protected int getCorrectNegClassified(SPARQLReasoner rs, Description concept, Set<Individual> testSetNeg) { + return testSetNeg.size() - rs.hasType(concept, testSetNeg).size(); + } + + protected int getCorrectNegClassified(Set<Individual> testSetNeg, QTL2 qtl) { + QueryTree<String> tree = qtl.getBestSolution().getTree(); + QueryTree<String> negTree; + int i = testSetNeg.size(); + for (Individual negInd : testSetNeg) { + negTree = qtl.getTreeCache().getQueryTree(negInd.getName()); + if(negTree.isSubsumedBy(tree, literalNodeSubsumptionStrategy)){ + i--; + } + } + return i; + } + + public static Set<Individual> getTestingSet(List<Individual> examples, int[] splits, int fold) { + int fromIndex; + // we either start from 0 or after the last fold ended + if(fold == 0) + fromIndex = 0; + else + fromIndex = splits[fold-1]; + // the split corresponds to the ends of the folds + int toIndex = splits[fold]; + +// System.out.println("from " + fromIndex + " to " + toIndex); + + Set<Individual> testingSet = new HashSet<Individual>(); + // +1 because 2nd element is exclusive in subList method + testingSet.addAll(examples.subList(fromIndex, toIndex)); + return testingSet; + } + + public static Set<Individual> getTrainingSet(Set<Individual> examples, Set<Individual> testingSet) { + return Helper.difference(examples, testingSet); + } + + // takes nr. of examples and the nr. of folds for this examples; + // returns an array which says where each fold ends, i.e. + // splits[i] is the index of the last element of fold i in the examples + public static int[] calculateSplits(int nrOfExamples, int folds) { + int[] splits = new int[folds]; + for(int i=1; i<=folds; i++) { + // we always round up to the next integer + splits[i-1] = (int)Math.ceil(i*nrOfExamples/(double)folds); + } + return splits; + } + + public static String statOutput(DecimalFormat df, Stat stat, String unit) { + String str = "av. " + df.format(stat.getMean()) + unit; + str += " (deviation " + df.format(stat.getStandardDeviation()) + unit + "; "; + str += "min " + df.format(stat.getMin()) + unit + "; "; + str += "max " + df.format(stat.getMax()) + unit + ")"; + return str; + } + + public Stat getAccuracy() { + return accuracy; + } + + public Stat getLength() { + return length; + } + + public Stat getRuntime() { + return runtime; + } + + protected void outputWriter(String output) { + if(writeToFile) { + Files.appendToFile(outputFile, output +"\n"); + System.out.println(output); + } else { + System.out.println(output); + } + + } + + public Stat getfMeasure() { + return fMeasure; + } + + public Stat getfMeasureTraining() { + return fMeasureTraining; + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |