From: <ku...@us...> - 2010-02-13 13:32:00
|
Revision: 2027 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=2027&view=rev Author: kurzum Date: 2010-02-13 13:02:53 +0000 (Sat, 13 Feb 2010) Log Message: ----------- added a script for iterative learning over tiger data Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlQueryDescriptionConvertVisitor.java trunk/src/dl-learner/org/dllearner/utilities/examples/ExMakerFixedSize.java Added Paths: ----------- trunk/src/dl-learner/org/dllearner/scripts/tiger/ trunk/src/dl-learner/org/dllearner/scripts/tiger/ExperimentConfig.java trunk/src/dl-learner/org/dllearner/scripts/tiger/LogHelper.java trunk/src/dl-learner/org/dllearner/scripts/tiger/PrefixMap.java trunk/src/dl-learner/org/dllearner/scripts/tiger/TestIterativeLearning.java Modified: trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlQueryDescriptionConvertVisitor.java =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlQueryDescriptionConvertVisitor.java 2010-02-13 13:00:12 UTC (rev 2026) +++ trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlQueryDescriptionConvertVisitor.java 2010-02-13 13:02:53 UTC (rev 2027) @@ -402,6 +402,14 @@ * .DatatypeValueRestriction) */ public void visit(DatatypeValueRestriction description) { + String current = stack.peek(); + String property = description.getRestrictedPropertyExpression().toString(); + String value = description.getValue().toString(); + System.out.println("here"); + System.out.println(stack.peek()); + System.out.println(current); + System.out.println(property); + System.out.println(value); logger.trace("DatatypeValueRestriction"); } Added: trunk/src/dl-learner/org/dllearner/scripts/tiger/ExperimentConfig.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/tiger/ExperimentConfig.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/scripts/tiger/ExperimentConfig.java 2010-02-13 13:02:53 UTC (rev 2027) @@ -0,0 +1,19 @@ +package org.dllearner.scripts.tiger; + +import org.dllearner.utilities.examples.Examples; + +public class ExperimentConfig { + + + public int resultLimit = -1; + public int splits = 5; + public int initialsplits = 30; + + public int iteration = 1; + public int maxExecutionTime = 3; + + public boolean stopCondition(int iteration, Examples learn){ + return (iteration<this.iteration); + } + +} Added: trunk/src/dl-learner/org/dllearner/scripts/tiger/LogHelper.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/tiger/LogHelper.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/scripts/tiger/LogHelper.java 2010-02-13 13:02:53 UTC (rev 2027) @@ -0,0 +1,56 @@ +package org.dllearner.scripts.tiger; + +import java.io.IOException; + +import org.apache.log4j.ConsoleAppender; +import org.apache.log4j.FileAppender; +import org.apache.log4j.Layout; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.apache.log4j.PatternLayout; +import org.apache.log4j.PropertyConfigurator; + +public class LogHelper { + + public static String log4jConfigFile = "log4j.properties"; + private static Logger rootLogger = Logger.getRootLogger(); + + public static Logger initLoggers() { + initHere(); + return Logger.getRootLogger(); + // initFile(log4jConfigFile); + } + + @SuppressWarnings("unused") + private static void initHere() { + Layout layout = new PatternLayout(); + layout = new PatternLayout("%-5p [%C{1}]: %m%n"); + ConsoleAppender consoleAppender = new ConsoleAppender(layout); +// consoleAppender.setThreshold(Level.DEBUG); + + Layout layout2 = null; + FileAppender fileAppenderNormal = null; + String fileName; + layout2 = new PatternLayout("%-5p [%C{1}]: %m%n"); + fileName = "log/log.txt"; + try { + fileAppenderNormal = new FileAppender(layout2, fileName, false); + } catch (IOException e) { + e.printStackTrace(); + } + + // add both loggers + rootLogger.removeAllAppenders(); + rootLogger.addAppender(consoleAppender); + rootLogger.addAppender(fileAppenderNormal); + rootLogger.setLevel(Level.DEBUG); + } + + @SuppressWarnings("unused") + private static void initFile(String log4jConfigFile) { + + System.out.println("Loading log config from file: '" + log4jConfigFile + "'"); + PropertyConfigurator.configure(log4jConfigFile); + + } +} Added: trunk/src/dl-learner/org/dllearner/scripts/tiger/PrefixMap.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/tiger/PrefixMap.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/scripts/tiger/PrefixMap.java 2010-02-13 13:02:53 UTC (rev 2027) @@ -0,0 +1,39 @@ +package org.dllearner.scripts.tiger; + +import java.util.HashMap; +import java.util.Map; + +import org.dllearner.core.EvaluatedDescription; +import org.dllearner.core.owl.Description; + + +public class PrefixMap { + + static String prefix = "http://nlp2rdf.org/ontology/"; + static Map<String,String> m = getPrefixMap(); + + private static Map<String,String> getPrefixMap(){ + Map<String,String> m = new HashMap<String, String>(); + m.put("stts", "http://nachhalt.sfb632.uni-potsdam.de/owl/stts.owl#"); + m.put("tiger", "http://nachhalt.sfb632.uni-potsdam.de/owl/tiger-syntax.owl#"); + return m; + } + + public static String toKBSyntaxString(EvaluatedDescription d){ + return toKBSyntaxString(d.getDescription()); + } + + public static String toKBSyntaxString(Description d){ + return d.toKBSyntaxString(prefix, m); + } + + public static String toManchesterSyntaxString(Description d){ + return d.toManchesterSyntaxString(prefix, m); + } + + public static String toManchesterSyntaxString(EvaluatedDescription d){ + return toManchesterSyntaxString(d.getDescription()); + } + + +} Added: trunk/src/dl-learner/org/dllearner/scripts/tiger/TestIterativeLearning.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/tiger/TestIterativeLearning.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/scripts/tiger/TestIterativeLearning.java 2010-02-13 13:02:53 UTC (rev 2027) @@ -0,0 +1,380 @@ +package org.dllearner.scripts.tiger; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.net.URL; +import java.text.DecimalFormat; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.SortedSet; +import java.util.TreeSet; + +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.dllearner.algorithms.refinement2.ROLComponent2; +import org.dllearner.algorithms.refinement2.ROLearner2; +import org.dllearner.core.ComponentPool; +import org.dllearner.core.EvaluatedDescription; +import org.dllearner.core.KnowledgeSource; +import org.dllearner.core.LearningAlgorithm; +import org.dllearner.core.LearningProblem; +import org.dllearner.core.ReasonerComponent; +import org.dllearner.core.configurators.ComponentFactory; +import org.dllearner.kb.OWLFile; +import org.dllearner.kb.sparql.Cache; +import org.dllearner.kb.sparql.SPARQLTasks; +import org.dllearner.kb.sparql.SparqlEndpoint; +import org.dllearner.kb.sparql.SparqlKnowledgeSource; +import org.dllearner.kb.sparql.SparqlQuery; +import org.dllearner.kb.sparql.SparqlQueryDescriptionConvertVisitor; +import org.dllearner.learningproblems.PosNegLPStandard; +import org.dllearner.parser.ParseException; +import org.dllearner.reasoning.FastInstanceChecker; +import org.dllearner.utilities.Helper; +import org.dllearner.utilities.JamonMonitorLogger; +import org.dllearner.utilities.examples.ExMakerFixedSize; +import org.dllearner.utilities.examples.ExampleDataCollector; +import org.dllearner.utilities.examples.Examples; + +import com.jamonapi.Monitor; + +public class TestIterativeLearning { + private static final Logger logger = Logger.getLogger(TestIterativeLearning.class); + + static String backgroundXML = "files/tiger.noSchema.noImports.rdf"; + static String propertiesXML = "files/propertiesOnly.rdf"; + static String sentenceXMLFolder = "files/tiger/"; + static String sentenceprefix = "http://nlp2rdf.org/ontology/s"; + static String prefix = "http://nlp2rdf.org/ontology/"; + + static String active = "files/active_all_sentenceNumbers.txt"; + static String passiveNoZU = "files/passive_noZuInf_sentenceNumbers.txt"; + static String passiveWithZu = "files/passive_zuInf_sentenceNumbers.txt"; + static String test_has_pos = "files/test_has_pos.txt"; + static String test_has_neg = "files/test_has_neg.txt"; + + static SparqlEndpoint sparqlEndpoint; + static SPARQLTasks sparqlTasks; + + static String sparqlEndpointURL = "http://db0.aksw.org:8893/sparql"; + static String graph = "http://nlp2rdf.org/tiger"; + static String rulegraph = "http://nlp2rdf.org/schema/rules1"; + + + + final static boolean debug = true; + //no randomization in examples + final static boolean randomizedebug = !debug; + + public static void main(String[] args) { + LogHelper.initLoggers(); + Logger.getLogger(Cache.class).setLevel(Level.INFO); + Logger.getLogger(ComponentPool.class).setLevel(Level.INFO); + Logger.getLogger(ROLearner2.class).setLevel(Level.TRACE); + Logger.getLogger(SparqlQuery.class).setLevel(Level.INFO); + + + + try { + sparqlEndpoint = new SparqlEndpoint(new URL(sparqlEndpointURL), new ArrayList<String>(Arrays + .asList(new String[] { graph })), new ArrayList<String>()); + sparqlTasks = new SPARQLTasks(Cache.getDefaultCache(), sparqlEndpoint); + } catch (Exception e) { + e.printStackTrace(); + } + + Examples allExamples = new Examples(); + SortedSet<String> positives; + SortedSet<String> negatives; + + if(debug) { + positives = read(test_has_pos);; + negatives = read(test_has_neg);; + }else{ +// positives = read(passiveWithZu); + positives = read(passiveNoZU); + negatives = read(active); + } + positives = read(passiveNoZU); + negatives = read(active); + //removing overlap + positives.removeAll(negatives); + negatives.removeAll(positives); + +// System.out.println(Helper.intersection(passiveZuInfSentences, activeSentences)); +// System.out.println(Helper.intersection(passiveZuInfSentences, passiveNoZuSentences)); +// System.out.println(Helper.intersection(activeSentences, passiveNoZuSentences)); + allExamples.addPosTrain(positives); + allExamples.addNegTrain(negatives); + + logger.debug("All examples \n"+allExamples); + + ExperimentConfig config = new ExperimentConfig(); + firstContact( allExamples, config); + //retrieved wird neues Example, als schnittmenge mit all + //und den bisher gewaehlten + //dann splits auswählen und + //pos und neg wieder hinzufuegen + + } + + public static void firstContact(Examples allExamples, ExperimentConfig config){ + ExMakerFixedSize fs = new ExMakerFixedSize(allExamples, randomizedebug); + Examples learn = fs.select(config.initialsplits, config.initialsplits); + logger.debug("Intial training set \n"+learn); +// System.out.println(learn.getPosTrain()); +// System.out.println(learn.getNegTrain()); +// if (true) { +// System.exit(0); +// } +// int size = 0; + for(int i = 0 ; config.stopCondition(i, learn) ;i++ ) { + /*LEARNING*/ + EvaluatedDescription ed = learn(learn, config); + + /*RETRIEVING*/ + SortedSet<String> retrieved = getSentences(ed, config.resultLimit); + logger.debug("Retrieved "+retrieved.size()+" sentences"); + + + /*MASHING*/ + //Menge aller positiven geschn. mit den gefundenen + SortedSet<String> posAsPos = Helper.intersection(retrieved, allExamples.getPosTrain()); + logger.debug("Number of retrieved positives: "+posAsPos.size()); + logger.debug("Number of total positives: "+allExamples.getPosTrain().size()); + precision( posAsPos.size(), retrieved.size()); + + //Menge aller positiven geschn. mit den gefundenen + recall( posAsPos.size(),allExamples.getPosTrain().size()); + + SortedSet<String> negAsPos = Helper.intersection(retrieved, allExamples.getNegTrain()); + logger.debug("Number of retrieved negatives: "+negAsPos.size()); + logger.debug("Total: "+posAsPos.size()+" + "+negAsPos.size() +" = "+retrieved.size()); + +// if(retrieved.size()!=(posAsPos.size()+negAsPos.size())){ +// logger.warn("sets are wrong"); +// System.exit(0); +// } + + Examples newlyFound = new Examples(); + newlyFound.addPosTrain(Helper.intersection(retrieved, learn.getPosTest())); + newlyFound.addNegTrain(Helper.intersection(retrieved, learn.getNegTest())); + fs = new ExMakerFixedSize(newlyFound, randomizedebug); + newlyFound = fs.select(config.splits, config.splits); + + learn.addPosTrain(newlyFound.getPosTrain()); + learn.addNegTrain(newlyFound.getNegTrain()); + logger.debug("Next training set \n"+learn); +// size = learn.getPosTrain().size() + learn.getNegTrain().size(); + + } + + + + + + } + + public static void precision( int posAsPos, int retrieved){ + logger.info("Precision: "+DecimalFormat.getPercentInstance().format(((double)posAsPos)/(double)retrieved)+"%"); + } + public static void recall( int posAsPos, int allPositives){ + logger.info("Recall: "+DecimalFormat.getPercentInstance().format(((double)posAsPos)/(double)allPositives)+"%"); + + } + + private static Set<KnowledgeSource> _getOWL(Examples ex) throws Exception{ + Set<KnowledgeSource> tmp = new HashSet<KnowledgeSource>(); + List<URL> urls = new ArrayList<URL>(); + urls.addAll(ExampleDataCollector.convert(sentenceXMLFolder, ex.getPosTrain())); + urls.addAll(ExampleDataCollector.convert(sentenceXMLFolder, ex.getNegTrain())); + urls.add(new File(backgroundXML).toURI().toURL()); + + for (URL u : urls) { + OWLFile ks = ComponentFactory.getOWLFile(u); + tmp.add(ks); + } + return tmp; + } + @SuppressWarnings("unused") + private static Set<KnowledgeSource> _getSPARQL(Examples ex) throws Exception{ + Set<KnowledgeSource> tmp = new HashSet<KnowledgeSource>(); + + Set<String> examples = new TreeSet<String>(); + examples.addAll(ex.getPosTrain()); + examples.addAll(ex.getNegTrain()); + SparqlKnowledgeSource ks = ComponentFactory.getSparqlKnowledgeSource(new URL(sparqlEndpointURL), examples); + ks.getConfigurator().setUrl(new URL(sparqlEndpointURL)); + ks.getConfigurator().setDefaultGraphURIs(new HashSet<String>(Arrays.asList(new String[]{graph}))); + ks.getConfigurator().setInstances(examples); + ks.getConfigurator().setDissolveBlankNodes(false); + ks.getConfigurator().setRecursionDepth(2); + ks.getConfigurator().setDissolveBlankNodes(false); + ks.getConfigurator().setCloseAfterRecursion(true); + ks.getConfigurator().setGetAllSuperClasses(true); + ks.getConfigurator().setGetPropertyInformation(false); + ks.getConfigurator().setUseLits(true); +// ks.getConfigurator(). + OWLFile ks2 = ComponentFactory.getOWLFile(new File(propertiesXML).toURI().toURL()); + tmp.add(ks); + tmp.add(ks2); + + return tmp; + } + + public static EvaluatedDescription learn(Examples ex, ExperimentConfig config) { + Monitor init = JamonMonitorLogger.getTimeMonitor(TestIterativeLearning.class, "init").start(); + + EvaluatedDescription result = null; + + try { + Set<KnowledgeSource> tmp = _getOWL(ex); +// Set<KnowledgeSource> tmp = _getSPARQL(ex); + + + FastInstanceChecker rc = ComponentFactory.getFastInstanceChecker(tmp); + PosNegLPStandard lp = ComponentFactory + .getPosNegLPStandard(rc, ex.getPosTrain(), ex.getNegTrain()); + LearningAlgorithm la = _getROLLearner(lp, rc, config ); + + for (KnowledgeSource ks : tmp) { + ks.init(); + } + rc.init(); + lp.init(); + la.init(); + init.stop(); + Monitor learning = JamonMonitorLogger.getTimeMonitor(TestIterativeLearning.class, "learning") + .start(); + la.start(); + learning.stop(); + + result = la.getCurrentlyBestEvaluatedDescription(); + logger.debug(PrefixMap.toKBSyntaxString(result.getDescription())); + logger.debug(PrefixMap.toManchesterSyntaxString(result.getDescription())); + + } catch (Exception e) { + e.printStackTrace(); + System.exit(0); + } + return result; + } + + public static SortedSet<String> getSentences(EvaluatedDescription ed, int resultLimit) { + SortedSet<String> result = new TreeSet<String>(); + SparqlQueryDescriptionConvertVisitor visit = new SparqlQueryDescriptionConvertVisitor(); + visit.setDistinct(true); + visit.setLabels(false); + visit.setLimit(resultLimit); + String sparqlQuery = ""; + try { + logger.debug(PrefixMap.toKBSyntaxString(ed.getDescription())); +// sparqlQuery = visit.getSparqlQuery(ed.getDescription().toKBSyntaxString()); +// logger.debug(sparqlQuery); + sparqlQuery = visit.getSparqlQuery(ed.getDescription()); + logger.debug(sparqlQuery); + if (true) { + System.exit(0); + } + } catch (Exception e1) { + e1.printStackTrace(); + } + + logger.debug(PrefixMap.toKBSyntaxString(ed.getDescription())); + + sparqlQuery = " \n define input:inference \"" + rulegraph + "\" \n" + "" + sparqlQuery; + logger.debug(sparqlQuery); + + Monitor m = JamonMonitorLogger.getTimeMonitor(TestIterativeLearning.class, "sparqlquery").start(); + result.addAll(sparqlTasks.queryAsSet(sparqlQuery, "subject")); + logger.debug("query avg: " + ((double)m.getAvg() / (double)1000)+ " seconds (last: "+((double)m.getLastValue() / (double)1000)+")"); + if(debug && result.isEmpty()){ + logger.error("sparql query returned no results "); + System.exit(0); + } + return result; + } + + private static LearningAlgorithm _getROLLearner(LearningProblem lp, ReasonerComponent rc, ExperimentConfig config) + throws Exception { + ROLComponent2 la = ComponentFactory.getROLComponent2(lp, rc); + la.getConfigurator().setUseExistsConstructor(true); + + // la.getConfigurator().setUseAllConstructor(true); + // la.getConfigurator().setUseCardinalityRestrictions(true); + // la.getConfigurator().setUseNegation(true); + // la.getConfigurator().setUseHasValueConstructor(true); + // la.getConfigurator().setValueFrequencyThreshold(10); + + la.getConfigurator().setUseAllConstructor(false); + la.getConfigurator().setUseCardinalityRestrictions(false); + la.getConfigurator().setUseNegation(false); + la.getConfigurator().setUseHasValueConstructor(false); + la.getConfigurator().setUseDataHasValueConstructor(true); +// la.getConfigurator().setValueFrequencyThreshold(1); + + la.getConfigurator().setIgnoredConcepts(new HashSet<String>(Arrays.asList(new String[]{ + "http://nlp2rdf.org/ontology/sentencefinalpunctuation_tag", + "http://nlp2rdf.org/ontology/comma_tag", + "http://nachhalt.sfb632.uni-potsdam.de/owl/stts.owl#SentenceFinalPunctuation" + }))); + + + la.getConfigurator().setNoisePercentage(0); + la.getConfigurator().setTerminateOnNoiseReached(true); +// la.getConfigurator().setStartClass(Config.getConfig().prefix + "Sentence"); + la.getConfigurator().setMaxExecutionTimeInSeconds(config.maxExecutionTime); +// la.getConfigurator().setMinExecutionTimeInSeconds(20); + + // la.getConfigurator().setMinExecutionTimeInSeconds(100); + la.getConfigurator().setWriteSearchTree(false); + la.getConfigurator().setSearchTreeFile("log/searchTree.txt"); + la.getConfigurator().setReplaceSearchTree(false); + return la; + } + + public static SortedSet<String> read(String f) { + SortedSet<String> result = new TreeSet<String>(); + BufferedReader in = null; + try { + in = new BufferedReader(new InputStreamReader(new FileInputStream(f))); + + String line; + while ((line = in.readLine()) != null) { + try { + line = line.trim(); + Integer.parseInt(line); + if (!result.add(sentenceprefix + line)) { + logger.error("reading failed"); + System.exit(0); + } + } catch (Exception e) { + e.printStackTrace(); + System.exit(0); + } + } + + } catch (Exception e) { + e.printStackTrace(); + logger.error("Could not read examples from: " + f); + System.exit(0); + + } finally { + try { + in.close(); + } catch (IOException e) { + e.printStackTrace(); + } + } + logger.info("read " + result.size() + " lines from " + f); + + return result; + } + +} Modified: trunk/src/dl-learner/org/dllearner/utilities/examples/ExMakerFixedSize.java =================================================================== --- trunk/src/dl-learner/org/dllearner/utilities/examples/ExMakerFixedSize.java 2010-02-13 13:00:12 UTC (rev 2026) +++ trunk/src/dl-learner/org/dllearner/utilities/examples/ExMakerFixedSize.java 2010-02-13 13:02:53 UTC (rev 2027) @@ -35,9 +35,15 @@ private static Logger logger = Logger.getLogger(ExMakerFixedSize.class); private final Examples examples; + private final boolean randomize; public ExMakerFixedSize(Examples examples ){ + this(examples, true) ; + } + + public ExMakerFixedSize(Examples examples, boolean randomize ){ this.examples = examples; + this.randomize = randomize; } public static void main(String[] args) { @@ -54,6 +60,24 @@ } + /** + * same as select(int,int) + * uses both times the same number + * @param both + * @return + */ + public Examples select(int both){ + return select( both, both); + } + + /** + * returns a new example object based on all Examples in the old set + * picks a fixed number of examples, puts them into + * training sets rest to test set + * @param nrOfPos + * @param nrOfNeg + * @return + */ public Examples select(int nrOfPos, int nrOfNeg){ SortedSet<String> posTrain = new TreeSet<String>(); @@ -68,14 +92,24 @@ negOld.addAll(examples.getNegativeExamples()); while (!posOld.isEmpty() && posTrain.size()< nrOfPos) { - String one = pickOneRandomly(posOld.toArray(new String[] {})); + String one; + if(randomize){ + one = pickOneRandomly(posOld.toArray(new String[] {})); + }else{ + one = posOld.first(); + } posOld.remove(one); posTrain.add(one); } posTest.addAll(posOld); while (!negOld.isEmpty() && negTrain.size()< nrOfNeg) { - String one = pickOneRandomly(negOld.toArray(new String[] {})); + String one; + if(randomize){ + one = pickOneRandomly(negOld.toArray(new String[] {})); + }else{ + one = negOld.first(); + } negOld.remove(one); negTrain.add(one); } @@ -84,6 +118,7 @@ return new Examples(posTrain, negTrain, posTest, negTest); } + public static String pickOneRandomly(String[] from){ Random r = new Random(); int index = Math.round((float)(from.length*r.nextFloat())); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |