From: <dc...@us...> - 2013-03-12 09:29:07
|
Revision: 3911 http://sourceforge.net/p/dl-learner/code/3911 Author: dcherix Date: 2013-03-12 09:29:04 +0000 (Tue, 12 Mar 2013) Log Message: ----------- statics paths removed Modified Paths: -------------- trunk/scripts/src/main/java/org/dllearner/scripts/improveWikipedia/DBpediaClassLearnerCELOE.java trunk/scripts/src/main/java/org/dllearner/scripts/improveWikipedia/NewSparqlCompDBpediaClassLearnerCELOE.java Modified: trunk/scripts/src/main/java/org/dllearner/scripts/improveWikipedia/DBpediaClassLearnerCELOE.java =================================================================== --- trunk/scripts/src/main/java/org/dllearner/scripts/improveWikipedia/DBpediaClassLearnerCELOE.java 2013-03-04 15:00:36 UTC (rev 3910) +++ trunk/scripts/src/main/java/org/dllearner/scripts/improveWikipedia/DBpediaClassLearnerCELOE.java 2013-03-12 09:29:04 UTC (rev 3911) @@ -88,369 +88,387 @@ * @author Sebastian Hellmann */ public class DBpediaClassLearnerCELOE { - - public static String endpointurl = "http://live.dbpedia.org/sparql"; - public static int examplesize = 30; - - private static org.apache.log4j.Logger logger = org.apache.log4j.Logger - .getLogger(DBpediaClassLearnerCELOE.class); - - SparqlEndpoint sparqlEndpoint = null; - private Cache cache; - - public DBpediaClassLearnerCELOE() { - // OPTIONAL: if you want to do some case distinctions in the learnClass - // method, you could add - // parameters to the constructure e.g. YAGO_ - try { - sparqlEndpoint = new SparqlEndpoint(new URL(endpointurl)); - } catch (MalformedURLException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - cache = new Cache("basCache"); - } - - public static void main(String args[]) - throws LearningProblemUnsupportedException, IOException, Exception { - for (int i = 0; i < 4; i++) { - DBpediaClassLearnerCELOE dcl = new DBpediaClassLearnerCELOE(); - Set<String> classesToLearn = dcl.getClasses(); - Monitor mon = MonitorFactory.start("Learn DBpedia"); - KB kb = dcl.learnAllClasses(classesToLearn); - mon.stop(); - kb.export(new File("/home/dcherix/dllearner/old/result"+i+".owl"), - OntologyFormat.RDF_XML); - // Set<String> pos = - // dcl.getPosEx("http://dbpedia.org/ontology/Person"); - // dcl.getNegEx("http://dbpedia.org/ontology/Person", pos); - logger.info("Test" + i + ":\n" - + JamonMonitorLogger.getStringForAllSortedByLabel()); - System.gc(); - } - } - - public KB learnAllClasses(Set<String> classesToLearn) { - KB kb = new KB(); - for (String classToLearn : classesToLearn) { - logger.info("Leanring class: " + classToLearn); - try { - Description d = learnClass(classToLearn); - if (d == null - || d.toKBSyntaxString().equals( - new Thing().toKBSyntaxString())) { - logger.error("Description was " + d + ", continueing"); - continue; - } - kb.addAxiom(new EquivalentClassesAxiom(new NamedClass( - classToLearn), d)); - kb.export(new File( - "/home/dcherix/dllearner/old/result_partial.owl"), - OntologyFormat.RDF_XML); - - } catch (Exception e) { - logger.warn("", e); - } - this.dropCache(); - } - - return kb; - } - - public Description learnClass(String classToLearn) throws Exception { - // TODO: use aksw-commons-sparql instead of sparql-scala - SortedSet<String> posEx = new TreeSet<String>(getPosEx(classToLearn)); - logger.info("Found " + posEx.size() + " positive examples"); - if (posEx.isEmpty()) { - return null; - } - SortedSet<String> negEx = new TreeSet<String>(getNegEx(classToLearn, - posEx)); - - posEx = SetManipulation.fuzzyShrink(posEx, examplesize); - negEx = SetManipulation.fuzzyShrink(negEx, examplesize); - - SortedSet<Individual> posExamples = Helper.getIndividualSet(posEx); - SortedSet<Individual> negExamples = Helper.getIndividualSet(negEx); - SortedSetTuple<Individual> examples = new SortedSetTuple<Individual>( - posExamples, negExamples); - - ComponentManager cm = ComponentManager.getInstance(); - - SparqlKnowledgeSource ks = cm - .knowledgeSource(SparqlKnowledgeSource.class); - ks.setInstances(Datastructures.individualSetToStringSet(examples - .getCompleteSet())); - // ks.getConfigurator().setPredefinedEndpoint("DBPEDIA"); // TODO: - // probably the official endpoint is too slow? - ks.setUrl(new URL(endpointurl)); - ks.setUseLits(false); - ks.setUseCacheDatabase(true); - ks.setUseCache(true); - ks.setRecursionDepth(1); - ks.setCloseAfterRecursion(true); - ks.setSaveExtractedFragment(true); - ks.setPredList(new HashSet<String>(Arrays.asList(new String[] { - "http://dbpedia.org/property/wikiPageUsesTemplate", - "http://dbpedia.org/ontology/wikiPageExternalLink", - "http://dbpedia.org/property/wordnet_type", - "http://www.w3.org/2002/07/owl#sameAs" }))); - - ks.setObjList(new HashSet<String>(Arrays.asList(new String[] { - "http://dbpedia.org/class/yago/", - "http://dbpedia.org/resource/Category:" }))); - - ks.init(); - - AbstractReasonerComponent rc = cm.reasoner(FastInstanceChecker.class, - ks); - rc.init(); - - PosNegLPStandard lp = cm.learningProblem(PosNegLPStandard.class, rc); - lp.setPositiveExamples(posExamples); - lp.setNegativeExamples(negExamples); - lp.setAccuracyMethod("fmeasure"); - lp.setUseApproximations(false); - lp.init(); - - CELOE la = cm.learningAlgorithm(CELOE.class, lp, rc); - // CELOEConfigurator cc = la.getConfigurator(); - la.setMaxExecutionTimeInSeconds(100); - la.init(); - RhoDRDown op = (RhoDRDown) la.getOperator(); - - op.setUseNegation(false); - op.setUseAllConstructor(false); - op.setUseCardinalityRestrictions(false); - op.setUseHasValueConstructor(true); - la.setNoisePercentage(20); - la.setIgnoredConcepts(new HashSet<NamedClass>(Arrays - .asList(new NamedClass[] { new NamedClass(classToLearn) }))); - la.init(); - - // to write the above configuration in a conf file (optional) - Config cf = new Config(cm, ks, rc, lp, la); - new ConfigSave(cf).saveFile(new File("/dev/null")); - - la.start(); - - cm.freeAllComponents(); - return la.getCurrentlyBestDescription(); - } - - public Set<String> getClasses() throws Exception { - OntModel model = ModelFactory.createOntologyModel(); - model.read(new FileInputStream( - "/home/dcherix/Downloads/dbpedia_3.6.owl"), null); - Set<OntClass> classes = model.listClasses().toSet(); - Set<String> results = new HashSet<String>(); - for (OntClass ontClass : classes) { - results.add(ontClass.getURI()); - } - return results; - } - - // gets all DBpedia Classes - // public Set<String> getClasses() throws Exception { - // SparqlTemplate st = SparqlTemplate.getInstance("allClasses.vm"); - // st.setLimit(0); - // st.addFilter(sparqlEndpoint.like("classes", new - // HashSet<String>(Arrays.asList(new - // String[]{"http://dbpedia.org/ontology/"})))); - // VelocityContext vc = st.putSgetVelocityContext(); - // String query = st.getQuery(); - // return new - // HashSet<String>(ResultSetRenderer.asStringSet(sparqlEndpoint.executeSelect(query))); - // } - // - public Set<String> getPosEx(String clazz) throws Exception { -// SparqlTemplate st = SparqlTemplate.getInstance("instancesOfClass.vm"); -// st.setLimit(0); -// VelocityContext vc = st.getVelocityContext(); -// vc.put("class", clazz); -// String queryString = st.getQuery(); - StringBuilder queryString = new StringBuilder(); - queryString.append("PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>"); - queryString.append(" SELECT ?instances WHERE { ?instances rdf:type <"); - queryString.append(clazz); - queryString.append("> }"); - System.out.println(queryString); - return this.executeResourceQuery(queryString.toString()); - } - - /** - * gets all direct classes of all instances and has a look, what the most - * common is - * - * @param clazz - * @param posEx - * @return - * @throws Exception - */ - public String selectClass(String clazz, Set<String> posEx) throws Exception { - Map<String, Integer> m = new HashMap<String, Integer>(); - // TODO: use aksw-commons-sparql instead of sparql-scala - /* - * for (String pos : posEx) { SparqlTemplate st = - * SparqlTemplate.getInstance("directClassesOfInstance.vm"); - * st.setLimit(0); st.addFilter(sparqlEndpoint.like("direct", new - * HashSet<String>(Arrays.asList(new - * String[]{"http://dbpedia.org/ontology/"})))); VelocityContext vc = - * st.getVelocityContext(); vc.put("instance", pos); String query = - * st.getQuery(); Set<String> classes = new - * HashSet<String>(ResultSetRenderer - * .asStringSet(sparqlEndpoint.executeSelect(query))); - * classes.remove(clazz); for (String s : classes) { if (m.get(s) == - * null) { m.put(s, 0); } m.put(s, m.get(s).intValue() + 1); } } - */ - - int max = 0; - String maxClass = ""; - for (String key : m.keySet()) { - if (m.get(key).intValue() > max) { - maxClass = key; - } - } - - return maxClass; - } - - /** - * gets instances of a class or random instances - * - * @param clazz - * @param posEx - * @return - * @throws Exception - */ - - public Set<String> getNegEx(String clazz, Set<String> posEx) - throws Exception { - Set<String> negEx = new HashSet<String>(); - // TODO: use aksw-commons-sparql instead of sparql-scala - /* - * String targetClass = getParallelClass(clazz); - * logger.info("using class for negatives: " + targetClass); if - * (targetClass != null) { - * - * SparqlTemplate st = - * SparqlTemplate.getInstance("instancesOfClass.vm"); st.setLimit(0); - * VelocityContext vc = st.getVelocityContext(); vc.put("class", - * targetClass); // st.addFilter(sparqlEndpoint.like("class", new - * HashSet<String>(Arrays.asList(new - * String[]{"http://dbpedia.org/ontology/"})))); String query = - * st.getQuery(); // negEx.addAll(new - * HashSet<String>(ResultSetRenderer.asStringSet - * (sparqlEndpoint.executeSelect(query)))); } else { - * - * SparqlTemplate st = SparqlTemplate.getInstance("someInstances.vm"); - * st.setLimit(posEx.size() + 100); VelocityContext vc = - * st.getVelocityContext(); String query = st.getQuery(); // - * negEx.addAll(new - * HashSet<String>(ResultSetRenderer.asStringSet(sparqlEndpoint - * .executeSelect(query)))); } negEx.removeAll(posEx); - */ - - String targetClass = getParallelClass(clazz); - logger.info("using class for negatives: " + targetClass); - if (targetClass != null) { - SparqlTemplate st = SparqlTemplate - .getInstance("instancesOfClass2.vm"); - st.setLimit(0); - VelocityContext vc = st.getVelocityContext(); - vc.put("class", targetClass); - st.addFilter("FILTER ( ?class LIKE (<http://dbpedia.org/ontology/%>"); - - String query = st.getQuery(); - negEx.addAll(this.executeResourceQuery(query)); - } else { - SparqlTemplate st = SparqlTemplate.getInstance("someInstances.vm"); - st.setLimit(posEx.size() + 100); - VelocityContext vc = st.getVelocityContext(); - String query = st.getQuery(); - negEx.addAll(this.executeResourceQuery(query)); - } - negEx.removeAll(posEx); - return negEx; - - } - - public String getParallelClass(String clazz) throws Exception { - // TODO: use aksw-commons-sparql instead of sparql-scala - // SparqlTemplate st = SparqlTemplate.getInstance("parallelClass.vm"); - // st.setLimit(0); - // VelocityContext vc = st.getVelocityContext(); - // vc.put("class", clazz); - // String query = st.getQuery(); - // Set<String> parClasses = new - // HashSet<String>(ResultSetRenderer.asStringSet(sparqlEndpoint.executeSelect(query))); - // for (String s : parClasses) { - // return s; - // } - SparqlTemplate st = SparqlTemplate.getInstance("parallelClass.vm"); - st.setLimit(0); - VelocityContext vc = st.getVelocityContext(); - vc.put("class", clazz); - String query = st.getQuery(); - Set<String> parClasses = this.executeClassQuery(query); - for (String s : parClasses) { - if (s.startsWith("http://dbpedia.org/ontology")) { - if (!s.endsWith("Unknown")) { - return s; - } - } - } - return null; - } - - public Set<String> executeResourceQuery(String queryString) { -// Query query = QueryFactory.create(queryString); -// QueryExecution qexec = QueryExecutionFactory.sparqlService(endpointurl, -// query); -// ResultSet resultSet = qexec.execSelect(); - ResultSetRewindable resultSet = SparqlQuery.convertJSONtoResultSet(cache.executeSparqlQuery(new SparqlQuery(queryString,sparqlEndpoint))); - QuerySolution solution; - Set<String> results = new HashSet<String>(); - while (resultSet.hasNext()) { - solution = resultSet.next(); - results.add(solution.getResource("instances").getURI()); - } - return results; - } - - public Set<String> executeClassQuery(String queryString) { -// Query query = QueryFactory.create(queryString); -// QueryExecution qexec = QueryExecutionFactory.sparqlService(endpointurl, -// query); -// ResultSet resultSet = qexec.execSelect(); - ResultSetRewindable resultSet = SparqlQuery.convertJSONtoResultSet(cache.executeSparqlQuery(new SparqlQuery(queryString,sparqlEndpoint))); - QuerySolution solution; - Set<String> results = new HashSet<String>(); - while (resultSet.hasNext()) { - solution = resultSet.next(); - results.add(solution.getResource("sub").getURI()); - } - return results; - } - - private void dropCache(){ - try { - Class.forName("org.h2.Driver"); - String databaseName="extraction"; - String databaseDirectory="cache"; - Connection conn = DriverManager.getConnection("jdbc:h2:"+databaseDirectory+"/"+databaseName, "sa", ""); - Statement st = conn.createStatement(); - st.execute("DELETE FROM QUERY_CACHE"); - st.close(); - conn.close(); - System.gc(); - } catch (ClassNotFoundException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } catch (SQLException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - } - + + public static String endpointurl = "http://live.dbpedia.org/sparql"; + public static int examplesize = 30; + + private static org.apache.log4j.Logger logger = org.apache.log4j.Logger + .getLogger(DBpediaClassLearnerCELOE.class); + private static String output; + private static String input; + + SparqlEndpoint sparqlEndpoint = null; + private Cache cache; + + public DBpediaClassLearnerCELOE() { + // OPTIONAL: if you want to do some case distinctions in the learnClass + // method, you could add + // parameters to the constructure e.g. YAGO_ + try { + sparqlEndpoint = new SparqlEndpoint(new URL(endpointurl)); + } catch (MalformedURLException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + cache = new Cache("basCache"); + } + + public static void main(String args[]) throws LearningProblemUnsupportedException, IOException, + Exception { + if (args.length < 3) { + usage(); + return; + } + int iter; + try { + output = args[1]; + input = args[0]; + iter = Integer.parseInt(args[2]); + } catch (Exception e) { + usage(); + return; + } + for (int i = 0; i < iter; i++) { + DBpediaClassLearnerCELOE dcl = new DBpediaClassLearnerCELOE(); + Set<String> classesToLearn = dcl.getClasses(); + Monitor mon = MonitorFactory.start("Learn DBpedia"); + KB kb = dcl.learnAllClasses(classesToLearn); + mon.stop(); + kb.export(new File(output + "/result" + i + ".owl"), OntologyFormat.RDF_XML); + // Set<String> pos = + // dcl.getPosEx("http://dbpedia.org/ontology/Person"); + // dcl.getNegEx("http://dbpedia.org/ontology/Person", pos); + logger.info("Test" + i + ":\n" + JamonMonitorLogger.getStringForAllSortedByLabel()); + System.gc(); + } + } + + /** + * Show the required parameters for usage + */ + private static void usage() { + System.out.println("***************************************************************"); + System.out.println("* Usage: java DBpediaClassLearnerCELOE input output iteration *"); + System.out.println("* As input is the dbpedia schema as owl necessary *"); + System.out.println("* As output is a directory for the owl results file expected *"); + System.out.println("***************************************************************"); + } + + public KB learnAllClasses(Set<String> classesToLearn) { + KB kb = new KB(); + for (String classToLearn : classesToLearn) { + logger.info("Learning class: " + classToLearn); + try { + Description d = learnClass(classToLearn); + if (d == null || d.toKBSyntaxString().equals(new Thing().toKBSyntaxString())) { + logger.error("Description was " + d + ", continueing"); + continue; + } + kb.addAxiom(new EquivalentClassesAxiom(new NamedClass(classToLearn), d)); + kb.export(new File(output+"/result_partial.owl"), + OntologyFormat.RDF_XML); + + } catch (Exception e) { + logger.warn("", e); + } + this.dropCache(); + } + + return kb; + } + + public Description learnClass(String classToLearn) throws Exception { + // TODO: use aksw-commons-sparql instead of sparql-scala + SortedSet<String> posEx = new TreeSet<String>(getPosEx(classToLearn)); + logger.info("Found " + posEx.size() + " positive examples"); + if (posEx.isEmpty()) { + return null; + } + SortedSet<String> negEx = new TreeSet<String>(getNegEx(classToLearn, posEx)); + + posEx = SetManipulation.fuzzyShrink(posEx, examplesize); + negEx = SetManipulation.fuzzyShrink(negEx, examplesize); + + SortedSet<Individual> posExamples = Helper.getIndividualSet(posEx); + SortedSet<Individual> negExamples = Helper.getIndividualSet(negEx); + SortedSetTuple<Individual> examples = new SortedSetTuple<Individual>(posExamples, + negExamples); + + ComponentManager cm = ComponentManager.getInstance(); + + SparqlKnowledgeSource ks = cm.knowledgeSource(SparqlKnowledgeSource.class); + ks.setInstances(Datastructures.individualSetToStringSet(examples.getCompleteSet())); + // ks.getConfigurator().setPredefinedEndpoint("DBPEDIA"); // TODO: + // probably the official endpoint is too slow? + ks.setUrl(new URL(endpointurl)); + ks.setUseLits(false); + ks.setUseCacheDatabase(true); + ks.setUseCache(true); + ks.setRecursionDepth(1); + ks.setCloseAfterRecursion(true); + ks.setSaveExtractedFragment(true); + ks.setPredList(new HashSet<String>(Arrays + .asList(new String[] { "http://dbpedia.org/property/wikiPageUsesTemplate", + "http://dbpedia.org/ontology/wikiPageExternalLink", + "http://dbpedia.org/property/wordnet_type", + "http://www.w3.org/2002/07/owl#sameAs" }))); + + ks.setObjList(new HashSet<String>(Arrays.asList(new String[] { + "http://dbpedia.org/class/yago/", "http://dbpedia.org/resource/Category:" }))); + + ks.init(); + + AbstractReasonerComponent rc = cm.reasoner(FastInstanceChecker.class, ks); + rc.init(); + + PosNegLPStandard lp = cm.learningProblem(PosNegLPStandard.class, rc); + lp.setPositiveExamples(posExamples); + lp.setNegativeExamples(negExamples); + lp.setAccuracyMethod("fmeasure"); + lp.setUseApproximations(false); + lp.init(); + + CELOE la = cm.learningAlgorithm(CELOE.class, lp, rc); + // CELOEConfigurator cc = la.getConfigurator(); + la.setMaxExecutionTimeInSeconds(100); + la.init(); + RhoDRDown op = (RhoDRDown) la.getOperator(); + + op.setUseNegation(false); + op.setUseAllConstructor(false); + op.setUseCardinalityRestrictions(false); + op.setUseHasValueConstructor(true); + la.setNoisePercentage(20); + la.setIgnoredConcepts(new HashSet<NamedClass>(Arrays + .asList(new NamedClass[] { new NamedClass(classToLearn) }))); + la.init(); + + // to write the above configuration in a conf file (optional) + Config cf = new Config(cm, ks, rc, lp, la); + new ConfigSave(cf).saveFile(new File("/dev/null")); + + la.start(); + + cm.freeAllComponents(); + return la.getCurrentlyBestDescription(); + } + + public Set<String> getClasses() throws Exception { + OntModel model = ModelFactory.createOntologyModel(); + model.read(new FileInputStream(input), null); + Set<OntClass> classes = model.listClasses().toSet(); + Set<String> results = new HashSet<String>(); + for (OntClass ontClass : classes) { + results.add(ontClass.getURI()); + } + return results; + } + + // gets all DBpedia Classes + // public Set<String> getClasses() throws Exception { + // SparqlTemplate st = SparqlTemplate.getInstance("allClasses.vm"); + // st.setLimit(0); + // st.addFilter(sparqlEndpoint.like("classes", new + // HashSet<String>(Arrays.asList(new + // String[]{"http://dbpedia.org/ontology/"})))); + // VelocityContext vc = st.putSgetVelocityContext(); + // String query = st.getQuery(); + // return new + // HashSet<String>(ResultSetRenderer.asStringSet(sparqlEndpoint.executeSelect(query))); + // } + // + public Set<String> getPosEx(String clazz) throws Exception { + // SparqlTemplate st = + // SparqlTemplate.getInstance("instancesOfClass.vm"); + // st.setLimit(0); + // VelocityContext vc = st.getVelocityContext(); + // vc.put("class", clazz); + // String queryString = st.getQuery(); + StringBuilder queryString = new StringBuilder(); + queryString.append("PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>"); + queryString.append(" SELECT ?instances WHERE { ?instances rdf:type <"); + queryString.append(clazz); + queryString.append("> }"); + System.out.println(queryString); + return this.executeResourceQuery(queryString.toString()); + } + + /** + * gets all direct classes of all instances and has a look, what the most + * common is + * + * @param clazz + * @param posEx + * @return + * @throws Exception + */ + public String selectClass(String clazz, Set<String> posEx) throws Exception { + Map<String, Integer> m = new HashMap<String, Integer>(); + // TODO: use aksw-commons-sparql instead of sparql-scala + /* + * for (String pos : posEx) { SparqlTemplate st = + * SparqlTemplate.getInstance("directClassesOfInstance.vm"); + * st.setLimit(0); st.addFilter(sparqlEndpoint.like("direct", new + * HashSet<String>(Arrays.asList(new + * String[]{"http://dbpedia.org/ontology/"})))); VelocityContext vc = + * st.getVelocityContext(); vc.put("instance", pos); String query = + * st.getQuery(); Set<String> classes = new + * HashSet<String>(ResultSetRenderer + * .asStringSet(sparqlEndpoint.executeSelect(query))); + * classes.remove(clazz); for (String s : classes) { if (m.get(s) == + * null) { m.put(s, 0); } m.put(s, m.get(s).intValue() + 1); } } + */ + + int max = 0; + String maxClass = ""; + for (String key : m.keySet()) { + if (m.get(key).intValue() > max) { + maxClass = key; + } + } + + return maxClass; + } + + /** + * gets instances of a class or random instances + * + * @param clazz + * @param posEx + * @return + * @throws Exception + */ + + public Set<String> getNegEx(String clazz, Set<String> posEx) throws Exception { + Set<String> negEx = new HashSet<String>(); + // TODO: use aksw-commons-sparql instead of sparql-scala + /* + * String targetClass = getParallelClass(clazz); + * logger.info("using class for negatives: " + targetClass); if + * (targetClass != null) { + * + * SparqlTemplate st = + * SparqlTemplate.getInstance("instancesOfClass.vm"); st.setLimit(0); + * VelocityContext vc = st.getVelocityContext(); vc.put("class", + * targetClass); // st.addFilter(sparqlEndpoint.like("class", new + * HashSet<String>(Arrays.asList(new + * String[]{"http://dbpedia.org/ontology/"})))); String query = + * st.getQuery(); // negEx.addAll(new + * HashSet<String>(ResultSetRenderer.asStringSet + * (sparqlEndpoint.executeSelect(query)))); } else { + * + * SparqlTemplate st = SparqlTemplate.getInstance("someInstances.vm"); + * st.setLimit(posEx.size() + 100); VelocityContext vc = + * st.getVelocityContext(); String query = st.getQuery(); // + * negEx.addAll(new + * HashSet<String>(ResultSetRenderer.asStringSet(sparqlEndpoint + * .executeSelect(query)))); } negEx.removeAll(posEx); + */ + + String targetClass = getParallelClass(clazz); + logger.info("using class for negatives: " + targetClass); + if (targetClass != null) { + SparqlTemplate st = SparqlTemplate.getInstance("instancesOfClass2.vm"); + st.setLimit(0); + VelocityContext vc = st.getVelocityContext(); + vc.put("class", targetClass); + st.addFilter("FILTER ( ?class LIKE (<http://dbpedia.org/ontology/%>"); + + String query = st.getQuery(); + negEx.addAll(this.executeResourceQuery(query)); + } else { + SparqlTemplate st = SparqlTemplate.getInstance("someInstances.vm"); + st.setLimit(posEx.size() + 100); + VelocityContext vc = st.getVelocityContext(); + String query = st.getQuery(); + negEx.addAll(this.executeResourceQuery(query)); + } + negEx.removeAll(posEx); + return negEx; + + } + + public String getParallelClass(String clazz) throws Exception { + // TODO: use aksw-commons-sparql instead of sparql-scala + // SparqlTemplate st = SparqlTemplate.getInstance("parallelClass.vm"); + // st.setLimit(0); + // VelocityContext vc = st.getVelocityContext(); + // vc.put("class", clazz); + // String query = st.getQuery(); + // Set<String> parClasses = new + // HashSet<String>(ResultSetRenderer.asStringSet(sparqlEndpoint.executeSelect(query))); + // for (String s : parClasses) { + // return s; + // } + SparqlTemplate st = SparqlTemplate.getInstance("parallelClass.vm"); + st.setLimit(0); + VelocityContext vc = st.getVelocityContext(); + vc.put("class", clazz); + String query = st.getQuery(); + Set<String> parClasses = this.executeClassQuery(query); + for (String s : parClasses) { + if (s.startsWith("http://dbpedia.org/ontology")) { + if (!s.endsWith("Unknown")) { + return s; + } + } + } + return null; + } + + public Set<String> executeResourceQuery(String queryString) { + // Query query = QueryFactory.create(queryString); + // QueryExecution qexec = + // QueryExecutionFactory.sparqlService(endpointurl, + // query); + // ResultSet resultSet = qexec.execSelect(); + ResultSetRewindable resultSet = SparqlQuery.convertJSONtoResultSet(cache + .executeSparqlQuery(new SparqlQuery(queryString, sparqlEndpoint))); + QuerySolution solution; + Set<String> results = new HashSet<String>(); + while (resultSet.hasNext()) { + solution = resultSet.next(); + results.add(solution.getResource("instances").getURI()); + } + return results; + } + + public Set<String> executeClassQuery(String queryString) { + // Query query = QueryFactory.create(queryString); + // QueryExecution qexec = + // QueryExecutionFactory.sparqlService(endpointurl, + // query); + // ResultSet resultSet = qexec.execSelect(); + ResultSetRewindable resultSet = SparqlQuery.convertJSONtoResultSet(cache + .executeSparqlQuery(new SparqlQuery(queryString, sparqlEndpoint))); + QuerySolution solution; + Set<String> results = new HashSet<String>(); + while (resultSet.hasNext()) { + solution = resultSet.next(); + results.add(solution.getResource("sub").getURI()); + } + return results; + } + + private void dropCache() { + try { + Class.forName("org.h2.Driver"); + String databaseName = "extraction"; + String databaseDirectory = "cache"; + Connection conn = DriverManager.getConnection("jdbc:h2:" + databaseDirectory + "/" + + databaseName, "sa", ""); + Statement st = conn.createStatement(); + st.execute("DELETE FROM QUERY_CACHE"); + st.close(); + conn.close(); + System.gc(); + } catch (ClassNotFoundException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (SQLException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + } \ No newline at end of file Modified: trunk/scripts/src/main/java/org/dllearner/scripts/improveWikipedia/NewSparqlCompDBpediaClassLearnerCELOE.java =================================================================== --- trunk/scripts/src/main/java/org/dllearner/scripts/improveWikipedia/NewSparqlCompDBpediaClassLearnerCELOE.java 2013-03-04 15:00:36 UTC (rev 3910) +++ trunk/scripts/src/main/java/org/dllearner/scripts/improveWikipedia/NewSparqlCompDBpediaClassLearnerCELOE.java 2013-03-12 09:29:04 UTC (rev 3911) @@ -90,6 +90,8 @@ private static org.apache.log4j.Logger logger = org.apache.log4j.Logger .getLogger(NewSparqlCompDBpediaClassLearnerCELOE.class); + private static String output; + private static String input; SparqlEndpoint sparqlEndpoint = null; @@ -107,6 +109,19 @@ public static void main(String args[]) throws LearningProblemUnsupportedException, IOException, Exception { + if (args.length < 3) { + usage(); + return; + } + int iter; + try { + output = args[1]; + input = args[0]; + iter = Integer.parseInt(args[2]); + } catch (Exception e) { + usage(); + return; + } for (int i = 0; i < 4; i++) { NewSparqlCompDBpediaClassLearnerCELOE dcl = new NewSparqlCompDBpediaClassLearnerCELOE(); Set<String> classesToLearn = dcl.getClasses(); @@ -114,7 +129,7 @@ Monitor mon = MonitorFactory.start("Learn DBpedia"); KB kb = dcl.learnAllClasses(classesToLearn); mon.stop(); - kb.export(new File("/home/dcherix/dllearner/simple/result" + i + kb.export(new File(output+"/result" + i + ".owl"), OntologyFormat.RDF_XML); // Set<String> pos = // dcl.getPosEx("http://dbpedia.org/ontology/Person"); @@ -125,6 +140,17 @@ .getStringForAllSortedByLabel()); } } + + /** + * Show the required parameters for usage + */ + private static void usage() { + System.out.println("***************************************************************"); + System.out.println("* Usage: java DBpediaClassLearnerCELOE input output iteration *"); + System.out.println("* As input is the dbpedia schema as owl necessary *"); + System.out.println("* As output is a directory for the owl results file expected *"); + System.out.println("***************************************************************"); + } public KB learnAllClasses(Set<String> classesToLearn) { KB kb = new KB(); @@ -141,7 +167,7 @@ kb.addAxiom(new EquivalentClassesAxiom(new NamedClass( classToLearn), d)); kb.export(new File( - "/home/dcherix/dllearner/simple/result_partial.owl"), + output+"/result_partial.owl"), OntologyFormat.RDF_XML); } catch (Exception e) { @@ -184,7 +210,7 @@ // ks.setUseCacheDatabase(true); ks.setRecursionDepth(1); ArrayList<String> ontologyUrls = new ArrayList<String>(); - ontologyUrls.add("http://downloads.dbpedia.org/3.6/dbpedia_3.6.owl"); + ontologyUrls.add(new File(input).toURI().toURL().toString()); ks.setOntologySchemaUrls(ontologyUrls); ks.setAboxfilter("FILTER (!regex(str(?p), '^http://dbpedia.org/property/wikiPageUsesTemplate') && " + "!regex(str(?p), '^http://dbpedia.org/ontology/wikiPageExternalLink') && " @@ -247,7 +273,7 @@ public Set<String> getClasses() throws Exception { OntModel model = ModelFactory.createOntologyModel(); model.read(new FileInputStream( - "/home/dcherix/Downloads/dbpedia_3.6.owl"), null); + input), null); Set<OntClass> classes = model.listClasses().toSet(); Set<String> results = new HashSet<String>(); int i = 0; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |