From: <ku...@us...> - 2008-08-31 15:04:23
|
Revision: 1156 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=1156&view=rev Author: kurzum Date: 2008-08-31 15:04:13 +0000 (Sun, 31 Aug 2008) Log Message: ----------- Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlKnowledgeSource.java trunk/src/dl-learner/org/dllearner/scripts/SemanticBible2.java Added Paths: ----------- trunk/src/dl-learner/org/dllearner/utilities/StringFormatter.java Modified: trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlKnowledgeSource.java =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlKnowledgeSource.java 2008-08-30 16:43:11 UTC (rev 1155) +++ trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlKnowledgeSource.java 2008-08-31 15:04:13 UTC (rev 1156) @@ -83,8 +83,8 @@ static final boolean debug = false; static final boolean debugUseImprovedTupleAquisitor = debug && false; //switches tupleaquisitor static final boolean debugExitAfterExtraction = debug && false; //switches sysex und rdf generation - static final boolean debugAdditionallyGenerateRDF = debug && true; + private boolean useCache=true; // ConfigOptions public URL url; @@ -100,6 +100,7 @@ // private Set<String> classList; private String format = "N-TRIPLES"; private boolean dumpToFile = true; + private boolean convertNT2RDF = true ; private boolean useLits = false; private boolean getAllSuperClasses = true; private boolean closeAfterRecursion = true; @@ -172,6 +173,11 @@ "dumpToFile", "Specifies whether the extracted ontology is written to a file or not.", true)); + options + .add(new BooleanConfigOption( + "convertNT2RDF", + "Specifies whether the extracted NTriples are converted to RDF and deleted.", + true)); options.add(new BooleanConfigOption("useLits", "use Literals in SPARQL query")); options @@ -250,6 +256,8 @@ format = (String) entry.getValue(); } else if (option.equals("dumpToFile")) { dumpToFile = (Boolean) entry.getValue(); + } else if (option.equals("convertNT2RDF")) { + convertNT2RDF = (Boolean) entry.getValue(); } else if (option.equals("useLits")) { useLits = (Boolean) entry.getValue(); } else if (option.equals("useCache")) { @@ -329,16 +337,21 @@ new File(basedir).mkdir(); } + File dump = new File(basedir + filename); + FileWriter fw = new FileWriter( - new File(basedir + filename), true); + dump , true); fw.write(ont); fw.flush(); fw.close(); - dumpFile = (new File(basedir + filename)).toURI().toURL(); - if(debugAdditionallyGenerateRDF){ - NT2RDF.convertNT2RDF(basedir + filename); - //System.exit(0); + + dumpFile = (dump).toURI().toURL(); + + + if(convertNT2RDF){ + NT2RDF.convertNT2RDF(dump.getAbsolutePath()); + } } catch (Exception e) { e.printStackTrace(); Modified: trunk/src/dl-learner/org/dllearner/scripts/SemanticBible2.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/SemanticBible2.java 2008-08-30 16:43:11 UTC (rev 1155) +++ trunk/src/dl-learner/org/dllearner/scripts/SemanticBible2.java 2008-08-31 15:04:13 UTC (rev 1156) @@ -20,35 +20,34 @@ package org.dllearner.scripts; import java.io.File; -import java.util.ArrayList; import java.util.List; +import java.util.SortedSet; +import java.util.StringTokenizer; +import java.util.TreeSet; import org.apache.log4j.ConsoleAppender; import org.apache.log4j.FileAppender; import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.apache.log4j.SimpleLayout; -import org.dllearner.algorithms.refexamples.ExampleBasedROLComponent; import org.dllearner.cli.Start; import org.dllearner.core.Component; import org.dllearner.core.ComponentManager; -import org.dllearner.core.KnowledgeSource; +import org.dllearner.core.EvaluatedDescription; import org.dllearner.core.LearningAlgorithm; import org.dllearner.core.ReasoningService; -import org.dllearner.kb.extraction.ExtractionAlgorithm; -import org.dllearner.kb.extraction.Manager; -import org.dllearner.kb.sparql.Cache; -import org.dllearner.kb.sparql.SparqlKnowledgeSource; -import org.dllearner.kb.sparql.SparqlQuery; -import org.dllearner.reasoning.FastInstanceChecker; +import org.dllearner.core.owl.Description; +import org.dllearner.core.owl.Individual; import org.dllearner.utilities.Files; -import org.dllearner.utilities.examples.AutomaticNegativeExampleFinderSPARQL; -import org.dllearner.utilities.examples.AutomaticPositiveExampleFinderOWL; -import org.dllearner.utilities.examples.AutomaticPositiveExampleFinderSPARQL; +import org.dllearner.utilities.JamonMonitorLogger; +import org.dllearner.utilities.StringFormatter; +import org.dllearner.utilities.datastructures.SetManipulation; import org.dllearner.utilities.owl.ReasoningServiceFactory; import org.dllearner.utilities.owl.ReasoningServiceFactory.AvailableReasoners; import org.dllearner.utilities.statistics.SimpleClock; +import com.jamonapi.Monitor; + public class SemanticBible2 { private static ReasoningService reasoningService; @@ -62,7 +61,9 @@ public static String normaldir = dir+"normal/"; public static String tmpFilename = dir + "tmp.conf"; + static File log = new File(dir+"results.txt"); + //private static Class usedReasoner = FastInstanceChecker.class; private static boolean useSPARQL = true; @@ -71,90 +72,191 @@ * @param args */ public static void main(String[] args) { - SimpleClock sc = new SimpleClock(); + initLogger(); logger.info("Start"); File tmpFile = new File(tmpFilename); + String del="\t"; + String cr="\n"; + int max = 100; + SortedSet<String> confs = getFilesContaining(useSPARQL,"ten","all", "99+"); + analyzeFiles(confs); + Files.createFile(log, "accOnFragment"+del+"accOnOnt"+del+"timeFragme"+del+"timeWhole"+cr); - List<File> confs = getFilesContaining(useSPARQL,"ten","all", "99+"); - System.out.println(confs); - //reasoningService = ReasoningServiceFactory.getReasoningService(ontologyPath, AvailableReasoners.OWLAPIREASONERPELLET); + reasoningService = ReasoningServiceFactory.getReasoningService(ontologyPath, AvailableReasoners.OWLAPIREASONERPELLET); + ComponentManager cm =ComponentManager.getInstance(); try{ - for (File file : confs) { - System.out.println(file.getAbsolutePath()); - StringBuffer sbuf = new StringBuffer(Files.readFile( file)); + int i=0; + for (String fileContent : confs) { + if(i>= max) {break;}i++; + String logLine =""; + SortedSet<Individual> posEx = SetManipulation.stringToInd(getIndividuals(fileContent, true)); + SortedSet<Individual> negEx = SetManipulation.stringToInd(getIndividuals(fileContent, false)); + + + StringBuffer sbuf = new StringBuffer(fileContent); sbuf.insert(0, (useSPARQL)?sparqlOptions():normalOptions()); Files.createFile(tmpFile, sbuf.toString()); //System.out.println(tmpFile.getCanonicalPath()); + Monitor m = JamonMonitorLogger.getTimeMonitor(SemanticBible2.class, "learn on fragment").start(); Start.main(new String[] { tmpFilename }); - ComponentManager cm =ComponentManager.getInstance(); - List<Component> comp = cm.getLiveComponents(); - for (Component component : comp) { - System.out.println(component.getClass().getCanonicalName()); - if(component instanceof LearningAlgorithm){ - System.out.println("yyyy"); - System.exit(0); - } - - } + m.stop(); + LearningAlgorithm la = getLearningAlgorithm(); - - Cache.getDefaultCache().clearCache(); + EvaluatedDescription onFragment =(la.getCurrentlyBestEvaluatedDescription()); + logLine += StringFormatter.doubleToPercent(onFragment.getAccuracy())+del; + SortedSet<Individual> retrieved = reasoningService.retrieval(onFragment.getDescription()); + EvaluatedDescription onOnto = reEvaluateDescription( + onFragment.getDescription(), retrieved, posEx, negEx); + logLine += StringFormatter.doubleToPercent(onOnto.getAccuracy())+del; + logLine += m.getTotal()+del+"missing instead size of retrieve: "+retrieved.size()+cr; + Files.appendFile(log, logLine); + //Cache.getDefaultCache().clearCache(); cm.freeAllComponents(); System.exit(0); }//end for }catch (Exception e) { e.printStackTrace(); } - logger.info("finished"); + logger.info("Finished"); } - public static List<File> getFilesContaining(boolean sparql, String numExamples, String allOrEx, String acc) { - List<File> ret = new ArrayList<File>(); - try{ + public static EvaluatedDescription reEvaluateDescription(Description d, SortedSet<Individual> retrieved ,SortedSet<Individual> posEx ,SortedSet<Individual> negEx ){ + SortedSet<Individual> PosAsPos = new TreeSet<Individual>(); + SortedSet<Individual> PosAsNeg = new TreeSet<Individual>(); + SortedSet<Individual> NegAsPos = new TreeSet<Individual>(); + SortedSet<Individual> NegAsNeg = new TreeSet<Individual>(); + + // PosAsPos + PosAsPos.addAll(posEx); + PosAsPos.retainAll(retrieved); + + // PosAsNeg + PosAsNeg.addAll(posEx); + PosAsNeg.removeAll(retrieved); + + // NegAsPos + NegAsPos.addAll(negEx); + NegAsPos.retainAll(retrieved); + + // PosAsNeg + NegAsNeg.addAll(negEx); + NegAsNeg.removeAll(retrieved); + + return new EvaluatedDescription(d, PosAsPos, PosAsNeg, NegAsPos,NegAsNeg); + + } + + public static LearningAlgorithm getLearningAlgorithm(){ + ComponentManager cm =ComponentManager.getInstance(); + + List<Component> comp = cm.getLiveComponents(); + for (Component component : comp) { + if(component instanceof LearningAlgorithm){ + return (LearningAlgorithm) component; + } + + } + return null; + } + + public static SortedSet<String> getFilesContaining(boolean sparql, String numExamples, String allOrEx, String acc) { + //List<File> ret = new ArrayList<File>(); + SortedSet<String> ret = new TreeSet<String>(); + String actualDir = (sparql)?sparqldir:normaldir; - System.out.println(actualDir); + logger.info(actualDir); File f = new File(actualDir); String[] files = f.list(); - + int consistent = 0; + try{ for (int i = 0; i < files.length; i++) { - System.out.println(files[i]); - if( - files[i].contains(numExamples) + + if( files[i].contains(numExamples) && files[i].contains(allOrEx) && files[i].contains(acc) ){ - ret.add(new File(actualDir+files[i])); + consistent++; + ret.add(Files.readFile(new File(actualDir+files[i]))); + if(ret.size() != consistent){ + logger.info("double file: "+files[i]); + } } } }catch (Exception e) { - logger.warn("deleting cache failed"); + e.printStackTrace(); } + if(consistent != ret.size()){ + logger.info("double files"+consistent+"::"+ret.size()); + System.exit(0); + }else{ + logger.info("all files different"); + } return ret; } + public static void analyzeFiles(SortedSet<String> l){ + SortedSet<String> differentIndividuals = new TreeSet<String>(); + for (String content : l) { + differentIndividuals.addAll(getIndividuals(content, true)); + differentIndividuals.addAll(getIndividuals(content, false)); + + } + System.out.println("found diff inds "+differentIndividuals.size()); + + } + public static SortedSet<String> getIndividuals(String target, boolean posOrNeg){ + if(posOrNeg){ + return getAllStringsBetween(target, "+\"", "\""); + }else{ + return getAllStringsBetween(target, "-\"", "\""); + } + + } + + public static SortedSet<String> getAllStringsBetween(String target, String start, String end){ + SortedSet<String> ret = new TreeSet<String>(); + StringTokenizer st = new StringTokenizer(target,"\n"); + while(st.hasMoreElements()){ + String line = st.nextToken(); + if(line.contains(start)){ + line = line.substring(line.indexOf(start)+start.length()); + String current = line.substring(0,line.indexOf(end)); + ret.add(current); + } + } + + return ret; + } + + public static String getCombinedOptions(){ + String s="\n"+ + "algorithm = refexamples;\n"+ + "refexamples.useAllConstructor = true;\n"+ + "refexamples.useNegation = true;\n"+ + "refexamples.useCardinalityRestrictions = true;\n"+ + "refexamples.guaranteeXgoodDescriptions = 1;\n"+ + "refexamples.maxExecutionTimeInSeconds = 100;\n"+ + "\n"+ + "reasoner = owlAPI;\n"+ + //"reasoner = fastInstanceChecker;\n"+ + "owlAPIReasoner.reasonerType = pellet;\n\n"+ + ""; + return s; + } + public static String sparqlOptions (){ String s="// SPARQL options\n"+ "sparql.recursionDepth = 3;\n"+ "sparql.useLits = true;\n"+ "sparql.predefinedEndpoint = \"LOCALJOSEKIBIBLE\";\n"+ "import(\"lalala\",\"SPARQL\");\n"+ - - "algorithm = refexamples;\n"+ - "refexamples.useAllConstructor = true;\n"+ - "refexamples.useNegation = true;\n"+ - "refexamples.useCardinalityRestrictions = true;\n"+ - "refexamples.guaranteeXgoodDescriptions = 1;\n"+ - "refexamples.maxExecutionTimeInSeconds = 1;\n"+ - "\n"+ - "reasoner = owlAPI;\n"+ - //"reasoner = fastInstanceChecker;\n"+ - //"owlAPIReasoner.reasonerType = pellet;\n\n"; + getCombinedOptions()+ ""; return s; } @@ -162,15 +264,7 @@ public static String normalOptions (){ String s="\n"+ "import(\"NTNcombined.owl\");\n"+ - "algorithm = refexamples;\n"+ - "refexamples.useAllConstructor = true;\n"+ - "refexamples.useNegation = true;\n"+ - "refexamples.useCardinalityRestrictions = true;\n"+ - "refexamples.guaranteeXgoodDescriptions = 1;\n"+ - "\n"+ - "reasoner = owlAPI;\n"+ - //"reasoner = fastInstanceChecker;\n"+ - //"owlAPIReasoner.reasonerType = pellet;\n\n"; + getCombinedOptions()+ ""; return s; } @@ -191,9 +285,9 @@ ConsoleAppender consoleAppender = new ConsoleAppender(layout); logger.removeAllAppenders(); - logger.addAppender(consoleAppender); + //logger.addAppender(consoleAppender); logger.addAppender(fileAppender); - logger.setLevel(Level.DEBUG); + logger.setLevel(Level.INFO); } Added: trunk/src/dl-learner/org/dllearner/utilities/StringFormatter.java =================================================================== --- trunk/src/dl-learner/org/dllearner/utilities/StringFormatter.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/utilities/StringFormatter.java 2008-08-31 15:04:13 UTC (rev 1156) @@ -0,0 +1,26 @@ +package org.dllearner.utilities; + + +public class StringFormatter { + + + /** + * formats a double value between 0 and 100 to a percentage + * ex: 0.7854684 will be return 78.5% + * @param d + * @return + */ + public static String doubleToPercent(double d){ + if(d>1.0 || d<0.0)return "bad format: "+d; + else if(d == 1.0){ + return "100.0%"; + }else if(d == 0.0 ){ + return "0.0%"; + }else { + String acc = (d*100)+""; + acc = acc.substring(0,"55.5".length()); + return acc+"%"; + } + + } +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |